rgw: add documentation to bucket index code

author J. Eric Ivancich <ivancich@redhat.com>

Tue, 3 Sep 2019 17:00:30 +0000 (13:00 -0400)

committer J. Eric Ivancich <ivancich@redhat.com>

Fri, 24 Jan 2020 18:44:09 +0000 (13:44 -0500)
author J. Eric Ivancich <ivancich@redhat.com>
Tue, 3 Sep 2019 17:00:30 +0000 (13:00 -0400)
committer J. Eric Ivancich <ivancich@redhat.com>
Fri, 24 Jan 2020 18:44:09 +0000 (13:44 -0500)
diff --git a/src/cls/rgw/cls_rgw.cc b/src/cls/rgw/cls_rgw.cc

index 7d468f2595a5c7bd8051265ba7b242e9a2ef8b90..abf566322ffbc4d401945db9869cd90b21b4513d 100644 (file)
--- a/src/cls/rgw/cls_rgw.cc
+++ b/src/cls/rgw/cls_rgw.cc
@@ -19,6 +19,10 @@ CLS_VER(1,0)
  CLS_NAME(rgw)
  
  
+// No UTF-8 character can begin with 0x80, so this is a safe indicator
+// of a special bucket-index entry for the first byte. Note: although
+// it has no impact, the 2nd, 3rd, or 4th byte of a UTF-8 character
+// may be 0x80.
  #define BI_PREFIX_CHAR 0x80
  
  #define BI_BUCKET_OBJS_INDEX          0
@@ -143,7 +147,10 @@ static int log_index_operation(cls_method_context_t hctx, cls_rgw_obj_key& obj_k
  }
  
  /*
- * read list of objects, skips objects in the ugly namespace
+ * Read list of objects, skipping objects in the "ugly namespace". The
+ * "ugly namespace" entries begin with BI_PREFIX_CHAR (0x80). Valid
+ * UTF-8 object names can *both* preceed and follow the "ugly
+ * namespace".
   */
  static int get_obj_vals(cls_method_context_t hctx, const string& start, const string& filter_prefix,
                          int num_entries, map<string, bufferlist> *pkeys, bool *pmore)
@@ -157,17 +164,31 @@ static int get_obj_vals(cls_method_context_t hctx, const string& start, const st
  
    auto last_element = pkeys->rbegin();
    if ((unsigned char)last_element->first[0] < BI_PREFIX_CHAR) {
-    /* nothing to see here, move along */
+    /* if the first character of the last entry is less than the
+     * prefix then all entries must preceed the "ugly namespace" and
+     * we're done
+     */
      return 0;
    }
  
    auto first_element = pkeys->begin();
    if ((unsigned char)first_element->first[0] > BI_PREFIX_CHAR) {
+    /* the first character of the last entry is in or after the "ugly
+     * namespace", so if the first character of the first entry
+     * follows the "ugly namespace" then all entries do and we're done
+     */
      return 0;
    }
  
-  /* let's rebuild the list, only keep entries we're interested in */
-  auto comp = [](const pair<string, bufferlist>& l, const string &r) { return l.first < r; };
+  /* at this point we know we have entries that could precede the
+   * "ugly namespace", be in the "ugly namespace", and follow the
+   * "ugly namespace", so let's rebuild the list, only keeping entries
+   * outside the "ugly namespace"
+   */
+
+  auto comp = [](const pair<string, bufferlist>& l, const string &r) {
+               return l.first < r;
+             };
    string new_start = {static_cast<char>(BI_PREFIX_CHAR + 1)};
  
    auto lower = pkeys->lower_bound(string{static_cast<char>(BI_PREFIX_CHAR)});
diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc

index 38ce49f5341d14eadf5bb715b2a2c0cea6eebb69..235bde47f889ef1356b6e7c99db6e1dc43a8047c 100644 (file)
--- a/src/rgw/rgw_rados.cc
+++ b/src/rgw/rgw_rados.cc
@@ -1710,7 +1710,7 @@ int RGWRados::Bucket::update_bucket_id(const string& new_bucket_id)
  /**
   * Get ordered listing of the objects in a bucket.
   *
- * max: maximum number of results to return
+ * max_p: maximum number of results to return
   * bucket: bucket to list contents of
   * prefix: only return results that match this prefix
   * delim: do not include results that match this string.
author	J. Eric Ivancich <ivancich@redhat.com>
	Tue, 3 Sep 2019 17:00:30 +0000 (13:00 -0400)
committer	J. Eric Ivancich <ivancich@redhat.com>
	Fri, 24 Jan 2020 18:44:09 +0000 (13:44 -0500)
src/cls/rgw/cls_rgw.cc		patch \| blob \| history
src/rgw/rgw_rados.cc		patch \| blob \| history