From 3c5154f010a34f9845b40e49a7870771e7f5372e Mon Sep 17 00:00:00 2001 From: "J. Eric Ivancich" Date: Fri, 29 Jan 2021 12:03:50 -0500 Subject: [PATCH] rgw: add rgw-gap-list-comparator tool The rgw-gap-list tool can produce a number of false positives when the cluster is being used during its run. One technique to minimize the number of false positives is to run the tool twice and look for the objects that appear in both lists. The rgw-gap-list-comparator tool is designed to do this comparison. Signed-off-by: Michael Kidd --- ceph.spec.in | 1 + debian/radosgw.install | 1 + src/rgw/CMakeLists.txt | 1 + src/rgw/rgw-gap-list-comparator | 119 ++++++++++++++++++++++++++++++++ 4 files changed, 122 insertions(+) create mode 100755 src/rgw/rgw-gap-list-comparator diff --git a/ceph.spec.in b/ceph.spec.in index 2d621ca3962..55d26fea09b 100644 --- a/ceph.spec.in +++ b/ceph.spec.in @@ -1961,6 +1961,7 @@ fi %{_bindir}/radosgw-es %{_bindir}/radosgw-object-expirer %{_bindir}/rgw-gap-list +%{_bindir}/rgw-gap-list-comparator %{_bindir}/rgw-orphan-list %{_libdir}/libradosgw.so* %{_mandir}/man8/radosgw.8* diff --git a/debian/radosgw.install b/debian/radosgw.install index 7b44d7072ca..c36df637c9c 100644 --- a/debian/radosgw.install +++ b/debian/radosgw.install @@ -5,6 +5,7 @@ usr/bin/radosgw-es usr/bin/radosgw-object-expirer usr/bin/radosgw-token usr/bin/rgw-gap-list +usr/bin/rgw-gap-list-comparator usr/bin/rgw-orphan-list usr/lib/libradosgw.so* usr/share/man/man8/ceph-diff-sorted.8 diff --git a/src/rgw/CMakeLists.txt b/src/rgw/CMakeLists.txt index 5588dfa2b26..313cad3beb5 100644 --- a/src/rgw/CMakeLists.txt +++ b/src/rgw/CMakeLists.txt @@ -443,5 +443,6 @@ endif(WITH_TESTS) install(PROGRAMS rgw-gap-list + rgw-gap-list-comparator rgw-orphan-list DESTINATION bin) diff --git a/src/rgw/rgw-gap-list-comparator b/src/rgw/rgw-gap-list-comparator new file mode 100755 index 00000000000..c377fdaf812 --- /dev/null +++ b/src/rgw/rgw-gap-list-comparator @@ -0,0 +1,119 @@ +#!/usr/bin/awk -f + +# +# Version 1 +# +# This awk script takes two, similarly sorted lists and outputs +# only the lines which exist in both lists. The script takes +# three inputs: +# +# ./rgw-gap-list-comparator \ +# -v filetwo=gap-list-B.txt \ +# -v matchout=matched_lines.txt \ +# gap-list-A.txt +# + +function usage() { + print "">>"/dev/stderr" + print "">>"/dev/stderr" + print "The idea behind the script is to eliminate false positive hits">>"/dev/stderr" + print "from the rgw-gap-list tool which are due to upload timing of new">>"/dev/stderr" + print "objects during the tool's execution. To use the tool properly,">>"/dev/stderr" + print "the following process should be followed:">>"/dev/stderr" + print "">>"/dev/stderr" + print "">>"/dev/stderr" + print " 1: Run the 'rgw-gap-list' tool twice">>"/dev/stderr" + print "">>"/dev/stderr" + print " 2: Sort the resulting map files:">>"/dev/stderr" + print " $ export LC_ALL=C">>"/dev/stderr" + print " $ sort gap-list-A.gap > gap-list-A.sorted.gap">>"/dev/stderr" + print " $ sort gap-list-B.gap > gap-list.B.sorted.gap">>"/dev/stderr" + print " -- Where the A / B in the gap-list file names are the date/time associated with each of the respective 'rgw-gap-list' outputs">>"/dev/stderr" + print "">>"/dev/stderr" + print " 3: Run the 'same_lines_only.awk' script over the two files:">>"/dev/stderr" + print " $ rm matched_lines.txt">>"/dev/stderr" + print " $ ./rgw-gap-list-comparator -v filetwo=gap-list-B.sorted.gap -v matchout=matched_lines.txt gap-list-A.sorted.gap">>"/dev/stderr" + print " -- Where the A / B in the gap-list file names are the date/time associated with each of the respective 'rgw-gap-list' outputs">>"/dev/stderr" + print "">>"/dev/stderr" + print " The resulting 'matched_lines.txt' will be a high confidence list of impacted objects with little to no false positives.">>"/dev/stderr" + print "">>"/dev/stderr" + print "">>"/dev/stderr" + exit 1 +} + +function advance_f2() { + if ((getline f2line>matchout + lineoutcount++ + advance_f2() + return 0 + } else if ($0>f2line) { + return 2 + } else { + return 1 + } +} + +function status_out() { + printf("%s % 17d\t% 17d\t% 12d\n",get_date_time(),f1_count,f2_count,lineoutcount)>>"/dev/stderr" +} + +function get_date_time() { + dtstr="date +%F\\ %T" + dtstr | getline mydt + close(dtstr) + return mydt +} + +BEGIN { + if(filetwo==""||matchout=="") { + print "">>"/dev/stderr" + print "">>"/dev/stderr" + print "Missing parameter." + print "">>"/dev/stderr" + print "">>"/dev/stderr" + usage() + } + + f1_count=0 + f2_count=0 + lineoutcount=0 + f2_eof=0 + statusevery=100000 + advance_f2() + printf("%s File 1 Line Count\tFile 2 Line Count\tPotentially Impacted Objects\n",get_date_time())>>"/dev/stderr" + status_out() +} + + +{ + f1_count++ + if(f2_eof==0) { + if(test_lines()==2) { + while($0>f2line && f2_eof==0) { + advance_f2() + } + test_lines() + } + } else { + exit 0 + } + if ((f1_count % statusevery)==0) { + status_out() + } +} + +END { + if(f1_count>0) { + status_out() + } +} + -- 2.39.5