]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
rgw: add rgw-gap-list-comparator tool 39785/head
authorJ. Eric Ivancich <ivancich@redhat.com>
Fri, 29 Jan 2021 17:03:50 +0000 (12:03 -0500)
committerJ. Eric Ivancich <ivancich@redhat.com>
Tue, 2 Mar 2021 15:34:50 +0000 (10:34 -0500)
The rgw-gap-list tool can produce a number of false positives when the
cluster is being used during its run. One technique to minimize the
number of false positives is to run the tool twice and look for the
objects that appear in both lists. The rgw-gap-list-comparator tool is
designed to do this comparison.

Signed-off-by: Michael Kidd <linuxkidd@gmail.com>
(cherry picked from commit 3c5154f010a34f9845b40e49a7870771e7f5372e)

ceph.spec.in
debian/radosgw.install
src/rgw/CMakeLists.txt
src/rgw/rgw-gap-list-comparator [new file with mode: 0755]

index e6cd00ad5054fcd8d8d53b8080ccd2c271addfdd..b6759d0a3109e6bc41b36c7f42416fe7ac9e6ab0 100644 (file)
@@ -1947,6 +1947,7 @@ fi
 %{_bindir}/radosgw-es
 %{_bindir}/radosgw-object-expirer
 %{_bindir}/rgw-gap-list
+%{_bindir}/rgw-gap-list-comparator
 %{_bindir}/rgw-orphan-list
 %{_libdir}/libradosgw.so*
 %{_mandir}/man8/radosgw.8*
index 7b44d7072ca8dacf71aa6861c7bfbb2224b018d7..c36df637c9c95dfad54cf87c0017c3d4488f6f1e 100644 (file)
@@ -5,6 +5,7 @@ usr/bin/radosgw-es
 usr/bin/radosgw-object-expirer
 usr/bin/radosgw-token
 usr/bin/rgw-gap-list
+usr/bin/rgw-gap-list-comparator
 usr/bin/rgw-orphan-list
 usr/lib/libradosgw.so*
 usr/share/man/man8/ceph-diff-sorted.8
index 2d1b7eaad5fe230685a79cb05914a3d1b5320e41..c631c7668ad0721db0c6a04acf07f0be03ab18d2 100644 (file)
@@ -419,5 +419,6 @@ endif(WITH_TESTS)
 
 install(PROGRAMS
   rgw-gap-list
+  rgw-gap-list-comparator
   rgw-orphan-list
   DESTINATION bin)
diff --git a/src/rgw/rgw-gap-list-comparator b/src/rgw/rgw-gap-list-comparator
new file mode 100755 (executable)
index 0000000..c377fda
--- /dev/null
@@ -0,0 +1,119 @@
+#!/usr/bin/awk -f
+
+#
+# Version 1
+#
+# This awk script takes two, similarly sorted lists and outputs
+# only the lines which exist in both lists.  The script takes
+# three inputs:
+#
+# ./rgw-gap-list-comparator \
+#     -v filetwo=gap-list-B.txt \
+#     -v matchout=matched_lines.txt \
+#     gap-list-A.txt
+#
+
+function usage() {
+  print "">>"/dev/stderr"
+  print "">>"/dev/stderr"
+  print "The idea behind the script is to eliminate false positive hits">>"/dev/stderr"
+  print "from the rgw-gap-list tool which are due to upload timing of new">>"/dev/stderr"
+  print "objects during the tool's execution.  To use the tool properly,">>"/dev/stderr"
+  print "the following process should be followed:">>"/dev/stderr"
+  print "">>"/dev/stderr"
+  print "">>"/dev/stderr"
+  print " 1: Run the 'rgw-gap-list' tool twice">>"/dev/stderr"
+  print "">>"/dev/stderr"
+  print " 2: Sort the resulting map files:">>"/dev/stderr"
+  print "   $ export LC_ALL=C">>"/dev/stderr"
+  print "   $ sort gap-list-A.gap > gap-list-A.sorted.gap">>"/dev/stderr"
+  print "   $ sort gap-list-B.gap > gap-list.B.sorted.gap">>"/dev/stderr"
+  print "   -- Where the A / B in the gap-list file names are the date/time associated with each of the respective 'rgw-gap-list' outputs">>"/dev/stderr"
+  print "">>"/dev/stderr"
+  print " 3: Run the 'same_lines_only.awk' script over the two files:">>"/dev/stderr"
+  print "   $ rm matched_lines.txt">>"/dev/stderr"
+  print "   $ ./rgw-gap-list-comparator -v filetwo=gap-list-B.sorted.gap -v matchout=matched_lines.txt gap-list-A.sorted.gap">>"/dev/stderr"
+  print "   -- Where the A / B in the gap-list file names are the date/time associated with each of the respective 'rgw-gap-list' outputs">>"/dev/stderr"
+  print "">>"/dev/stderr"
+  print " The resulting 'matched_lines.txt' will be a high confidence list of impacted objects with little to no false positives.">>"/dev/stderr"
+  print "">>"/dev/stderr"
+  print "">>"/dev/stderr"
+  exit 1
+}
+
+function advance_f2() {
+  if ((getline f2line<filetwo) <= 0) {
+    f2_eof=1
+  } else {
+    f2_count++
+  }
+}
+
+function test_lines() {
+ if($0==f2line) {
+    print $0>>matchout
+    lineoutcount++
+    advance_f2()
+    return 0
+  } else if ($0>f2line) {
+    return 2
+  } else {
+    return 1
+  }
+}
+
+function status_out() {
+  printf("%s % 17d\t% 17d\t% 12d\n",get_date_time(),f1_count,f2_count,lineoutcount)>>"/dev/stderr"
+}
+
+function get_date_time() {
+  dtstr="date +%F\\ %T"
+  dtstr | getline mydt
+  close(dtstr)
+  return mydt
+}
+
+BEGIN {
+  if(filetwo==""||matchout=="") {
+     print "">>"/dev/stderr"
+     print "">>"/dev/stderr"
+     print "Missing parameter."
+     print "">>"/dev/stderr"
+     print "">>"/dev/stderr"
+     usage()
+  }
+
+  f1_count=0
+  f2_count=0
+  lineoutcount=0
+  f2_eof=0
+  statusevery=100000
+  advance_f2()
+  printf("%s File 1 Line Count\tFile 2 Line Count\tPotentially Impacted Objects\n",get_date_time())>>"/dev/stderr"
+  status_out()
+}
+
+
+{
+  f1_count++
+  if(f2_eof==0) {
+    if(test_lines()==2) {
+      while($0>f2line && f2_eof==0) {
+        advance_f2()
+      }
+      test_lines()
+    }
+  } else {
+    exit 0
+  }
+  if ((f1_count % statusevery)==0) {
+    status_out()
+  }
+}
+
+END {
+  if(f1_count>0) {
+    status_out()
+  }
+}
+