rgw: rgw-orphan-list should use "plain" formatted `rados ls` output

author J. Eric Ivancich <ivancich@redhat.com>

Fri, 9 Oct 2020 20:06:55 +0000 (16:06 -0400)

committer Nathan Cutler <ncutler@suse.com>

Mon, 26 Oct 2020 17:49:47 +0000 (18:49 +0100)
author J. Eric Ivancich <ivancich@redhat.com>
Fri, 9 Oct 2020 20:06:55 +0000 (16:06 -0400)
committer Nathan Cutler <ncutler@suse.com>
Mon, 26 Oct 2020 17:49:47 +0000 (18:49 +0100)
diff --git a/src/rgw/rgw-orphan-list b/src/rgw/rgw-orphan-list

index 1ebd60c719d131b4c4983a276a693c7624802900..7f60c651a338e48d857a167bff307d2834bfa9b7 100755 (executable)
--- a/src/rgw/rgw-orphan-list
+++ b/src/rgw/rgw-orphan-list
@@ -1,6 +1,6 @@
  #!/usr/bin/env bash
  
-# version 1
+# version 2020-10-20
  
  # IMPORTANT: affects order produced by 'sort' and 'ceph-diff-sorted'
  # relies on this ordering
@@ -8,9 +8,10 @@ export LANG=C
  
  out_dir="."
  temp_file=/tmp/temp.$$
-timestamp=$(date -u +%Y%m%d%H%M)
+timestamp=$(date -u +%Y%m%d%H%M%S)
  lspools_err="${out_dir}/lspools-${timestamp}.error"
  rados_out="${out_dir}/rados-${timestamp}.intermediate"
+rados_odd="${out_dir}/rados-${timestamp}.issues"
  rados_err="${out_dir}/rados-${timestamp}.error"
  rgwadmin_out="${out_dir}/radosgw-admin-${timestamp}.intermediate"
  rgwadmin_err="${out_dir}/radosgw-admin-${timestamp}.error"
@@ -57,19 +58,50 @@ echo "Pool is \"$pool\"."
  echo "Note: output files produced will be tagged with the current timestamp -- ${timestamp}."
  
  echo "running 'rados ls' at $(date)"
-rados ls --pool="$pool" --format=json-pretty --all >"$rados_out" 2>"$rados_err"
+# since --format is not specified, plain should be used
+rados ls --pool="$pool" --all >"$rados_out" 2>"$rados_err"
  if [ "$?" -ne 0 ] ;then
      error_out "rados ls" "$rados_err"
  fi
  
-# check for namespaces and error out if any found
-grep '^[[:blank:]]*"namespace":' "$rados_out" | grep --silent '[^:]*: "[^"]'
-if [ "${PIPESTATUS[1]}" -eq 0 ] ;then
-    error_out "rados ls" "$rados_out" "Found one or more RADOS objects existing in a namespace."
+# NOTE: Each entry (line of output) of `rados ls --all` should be in
+# one of four formats depending on whether or not an entry has a
+# namespace and/or locator:
+#
+#   <TAB>oid
+#   <TAB>oid<TAB>locator
+#   namespace<TAB>oid
+#   namespace<TAB>oid<TAB>locator
+#
+# Any occurrences of the 2nd, 3rd, or 4th (i.e., existence of
+# namespace and/or locator) should cause the create of the "odd" file
+# and an explanation in the output, and those entries will not be
+# retained, and therefore they will not be called out as orphans. They
+# will need special handling by the end-user as we do not expect
+# namespaces or locators.
+
+# check for namespaces -- any line that does not begin with a tab
+# indicates a namespace; add those to "odd" file and set flag; note:
+# this also picks up entries with namespace and locator
+grep $'^[^\t]' "$rados_out" >"$rados_odd"
+if [ "${PIPESTATUS[0]}" -eq 0 ] ;then
+    namespace_found=1
  fi
  
-# move from json to list of oids
-grep '^[[:blank:]]*"name":' "$rados_out" | sed 's/[^:]*: "\(.*\)"$/\1/' >"$temp_file"
+# check for locators (w/o namespace); we idenitfy them by skipping
+# past the empty namespace (i.e., one TAB), skipping past the oid,
+# then looking for a TAB; note we use egrep to get the '+' character
+# and the $ in front of the ' allows the \t to be interpreted as a TAB
+egrep $'^\t[[:graph:]]+\t' "$rados_out" >>"$rados_odd"
+if [ "${PIPESTATUS[0]}" -eq 0 ] ;then
+    locator_found=1
+fi
+
+# extract the entries that are just oids (i.e., no namespace or
+# locator) for further processing; only look at lines that begin with
+# a TAB and do not contain a second TAB, and then grab everything
+# after the initial TAB
+grep $'^\t' "$rados_out" | grep -v $'^\t.*\t' | sed -E 's/^\t//' >"$temp_file"
  mv -f "$temp_file" "$rados_out"
  
  sort -u "$rados_out" >"$temp_file"
@@ -99,8 +131,12 @@ if [ $possible -ne 0 ] ;then
  fi
  
  echo "$found potential orphans found out of a possible $possible (${percentage}%)."
-echo "The results can be found in ${delta_out}."
-echo "    Intermediate files: ${rados_out} and ${rgwadmin_out}"
+echo "The results can be found in '${delta_out}'."
+echo "    Intermediate files are '${rados_out}' and '${rgwadmin_out}'."
+if [ -n "$namespace_found" -o -n "$locator_found" ] ;then
+    echo "    Note: 'rados ls' found entries that might be in a namespace or might"
+    echo "          have a locator; see '${rados_odd}' for those entries."
+fi
  echo "***"
  echo "*** WARNING: This is EXPERIMENTAL code and the results should be used"
  echo "***          only with CAUTION!"
author	J. Eric Ivancich <ivancich@redhat.com>
	Fri, 9 Oct 2020 20:06:55 +0000 (16:06 -0400)
committer	Nathan Cutler <ncutler@suse.com>
	Mon, 26 Oct 2020 17:49:47 +0000 (18:49 +0100)