]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
rgw: let newer RGWs work with older OSDs re: cls-side filtering 30272/head
authorJ. Eric Ivancich <ivancich@redhat.com>
Fri, 27 Sep 2019 16:05:17 +0000 (12:05 -0400)
committerJ. Eric Ivancich <ivancich@redhat.com>
Fri, 24 Jan 2020 19:32:39 +0000 (14:32 -0500)
Previous commits moved bucket list filtering when a delimiter was
specified to the osd/cls layer. However, since rgw's are often
upgraded before osd's are, until we reach verison ceph version 16, an
rgw cannot assume that the osd/cls did the filtering. This is
addressed in the following ways....

First rgw_cls_list_ret now indicates whether filtering was done on the
osd/cls side.

And second, the old filtering code in the rgw is maintained in
RGWRados::Bucket::List::list_objects_ordered, so it can still be
triggered when all osd's are not doing the filtering.

Once we reach ceph version 16, and there is no chance that the rgw is
working with a osd running "young" version 14 code, we can remove the
backward compatibility code in
RGWRados::Bucket::List::list_objects_ordered.

Signed-off-by: J. Eric Ivancich <ivancich@redhat.com>
src/cls/rgw/cls_rgw_ops.h
src/rgw/rgw_admin.cc
src/rgw/rgw_bucket.cc
src/rgw/rgw_rados.cc
src/rgw/rgw_rados.h

index 47388f4a97b9c98f651bef95b8f4747f5ee1b2ac..d752118b2fb19d0cd7b2f130298356d1d3bf8082 100644 (file)
@@ -385,8 +385,8 @@ struct rgw_cls_list_op
   cls_rgw_obj_key start_obj;
   uint32_t num_entries;
   string filter_prefix;
-  string delimiter;
   bool list_versions;
+  string delimiter;
 
   rgw_cls_list_op() : num_entries(0), list_versions(false) {}
 
@@ -428,18 +428,27 @@ struct rgw_cls_list_ret {
   rgw_bucket_dir dir;
   bool is_truncated;
 
-  rgw_cls_list_ret() : is_truncated(false) {}
+  // cls_filtered is not transmitted; it is assumed true for versions
+  // on/after 3 and false for prior versions; this allows the rgw
+  // layer to know when an older osd (cls) does not do the filtering
+  bool cls_filtered;
+
+  rgw_cls_list_ret() :
+    is_truncated(false),
+    cls_filtered(true)
+  {}
 
   void encode(bufferlist &bl) const {
-    ENCODE_START(2, 2, bl);
+    ENCODE_START(3, 2, bl);
     encode(dir, bl);
     encode(is_truncated, bl);
     ENCODE_FINISH(bl);
   }
   void decode(bufferlist::const_iterator &bl) {
-    DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
+    DECODE_START_LEGACY_COMPAT_LEN(3, 2, 2, bl);
     decode(dir, bl);
     decode(is_truncated, bl);
+    cls_filtered = struct_v >= 3;
     DECODE_FINISH(bl);
   }
   void dump(Formatter *f) const;
index 7035326c64e59830fee2ed949a41fe707553f093..ff5b4ab57afb9ce57bf3957f7ce670d9a3e23430 100644 (file)
@@ -6245,6 +6245,7 @@ next:
     }
 
     bool is_truncated = true;
+    bool cls_filtered = true;
 
     rgw_obj_index_key marker;
     string empty_prefix;
@@ -6259,7 +6260,7 @@ next:
        bucket_info, RGW_NO_SHARD,
        marker, empty_prefix, empty_delimiter,
        1000, true,
-       result, &is_truncated, &marker,
+       result, &is_truncated, &cls_filtered, &marker,
        null_yield,
        rgw_bucket_object_check_filter);
       if (r < 0 && r != -ENOENT) {
index 800efadcd4c96d0dbea6fdff8043483cedfa8395..77cf9de5740a1b8b2c27d9ac5ee78cbe8a01382c 100644 (file)
@@ -1007,6 +1007,7 @@ int RGWBucket::check_object_index(RGWBucketAdminOpState& op_state,
   string empty_delimiter;
   rgw_obj_index_key marker;
   bool is_truncated = true;
+  bool cls_filtered = true;
 
   Formatter *formatter = flusher.get_formatter();
   formatter->open_object_section("objects");
@@ -1016,8 +1017,8 @@ int RGWBucket::check_object_index(RGWBucketAdminOpState& op_state,
 
     int r = store->getRados()->cls_bucket_list_ordered(
       bucket_info, RGW_NO_SHARD, marker, prefix, empty_delimiter,
-      listing_max_entries, true, result, &is_truncated, &marker, y,
-      rgw_bucket_object_check_filter);
+      listing_max_entries, true, result, &is_truncated, &cls_filtered,
+      &marker, y, rgw_bucket_object_check_filter);
     if (r == -ENOENT) {
       break;
     } else if (r < 0 && r != -ENOENT) {
index 5c09d3bdbaa0a72c35fca264263821b7014b6c15..0d6b0d0fced88ef386d7f888d8a4bef7d25046f7 100644 (file)
@@ -1737,6 +1737,7 @@ int RGWRados::Bucket::List::list_objects_ordered(
 
   int count = 0;
   bool truncated = true;
+  bool cls_filtered = false;
   const int64_t max = // protect against memory issues and negative vals
     std::min(bucket_list_objects_absolute_max, std::max(int64_t(0), max_p));
   int read_ahead = std::max(cct->_conf->rgw_list_bucket_min_readahead, max);
@@ -1789,6 +1790,7 @@ int RGWRados::Bucket::List::list_objects_ordered(
                                           params.list_versions,
                                           ent_map,
                                           &truncated,
+                                          &cls_filtered,
                                           &cur_marker,
                                            y);
     if (r < 0) {
@@ -1851,26 +1853,61 @@ int RGWRados::Bucket::List::list_objects_ordered(
       }
 
       if (!params.delim.empty()) {
-        int delim_pos = obj.name.find(params.delim, params.prefix.size());
-
-        if (delim_pos >= 0) {
-         // should only find one delimiter at the end if it finds any
-         // after the prefix
-         ceph_assert(delim_pos ==
-                     int(obj.name.length() - params.delim.length()));
-          if (common_prefixes) {
-            if (count >= max) {
-              truncated = true;
-              goto done;
-            }
-
-            (*common_prefixes)[obj.name] = true;
-            count++;
-          }
-
-          continue;
-        } // if found delimiter after prefix
-      } // if there is a delimiter
+       const int delim_pos = obj.name.find(params.delim, params.prefix.size());
+       if (delim_pos >= 0) {
+         // run either the code where delimiter filtering is done a)
+         // in the OSD/CLS or b) here.
+         if (cls_filtered) {
+           // NOTE: this condition is for the newer versions of the
+           // OSD that does filtering on the CLS side
+
+           // should only find one delimiter at the end if it finds any
+           // after the prefix
+           if (delim_pos !=
+               int(obj.name.length() - params.delim.length())) {
+             ldout(cct, 0) <<
+               "WARNING: found delimiter in place other than the end of "
+               "the prefix; obj.name=" << obj.name <<
+               ", prefix=" << params.prefix << dendl;
+           }
+           if (common_prefixes) {
+             if (count >= max) {
+               truncated = true;
+               goto done;
+             }
+
+             (*common_prefixes)[obj.name] = true;
+             count++;
+           }
+
+           continue;
+         } else {
+           // NOTE: this condition is for older versions of the OSD
+           // that do not filter on the CLS side, so the following code
+           // must do the filtering; once we reach version 16 of ceph,
+           // this code can be removed along with the conditional that
+           // can lead this way
+
+           /* extract key -with trailing delimiter- for CommonPrefix */
+           string prefix_key =
+             obj.name.substr(0, delim_pos + params.delim.length());
+
+           if (common_prefixes &&
+               common_prefixes->find(prefix_key) == common_prefixes->end()) {
+             if (count >= max) {
+               truncated = true;
+               goto done;
+             }
+             next_marker = prefix_key;
+             (*common_prefixes)[prefix_key] = true;
+
+             count++;
+           }
+
+           continue;
+         } // if we're running an older OSD version
+       } // if a delimiter was found after prefix
+      } // if a delimiter was passed in
 
       if (count >= max) {
         truncated = true;
@@ -1881,6 +1918,30 @@ int RGWRados::Bucket::List::list_objects_ordered(
       count++;
     } // eiter for loop
 
+    // NOTE: the following conditional is needed by older versions of
+    // the OSD that don't do delimiter filtering on the CLS side; once
+    // we reach version 16 of ceph, the following conditional and the
+    // code within can be removed
+    if (!cls_filtered && !params.delim.empty()) {
+      int marker_delim_pos =
+       cur_marker.name.find(params.delim, cur_prefix.size());
+      if (marker_delim_pos >= 0) {
+       std::string skip_after_delim =
+         cur_marker.name.substr(0, marker_delim_pos);
+        skip_after_delim.append(after_delim_s);
+
+        ldout(cct, 20) << "skip_after_delim=" << skip_after_delim << dendl;
+
+        if (skip_after_delim > cur_marker.name) {
+          cur_marker = skip_after_delim;
+          ldout(cct, 20) << "setting cur_marker="
+                         << cur_marker.name
+                         << "[" << cur_marker.instance << "]"
+                         << dendl;
+        }
+      }
+    } // if older osd didn't do delimiter filtering
+
     // if we finished listing, or if we're returning at least half the
     // requested entries, that's enough; S3 and swift protocols allow
     // returning fewer than max entries
@@ -8013,7 +8074,8 @@ int RGWRados::cls_bucket_list_ordered(RGWBucketInfo& bucket_info,
                                      uint32_t num_entries,
                                      bool list_versions,
                                      ent_map_t& m,
-                                     bool *is_truncated,
+                                     bool* is_truncated,
+                                     bool* cls_filtered,
                                      rgw_obj_index_key *last_entry,
                                       optional_yield y,
                                      check_filter_t force_check_filter)
@@ -8059,10 +8121,20 @@ int RGWRados::cls_bucket_list_ordered(RGWBucketInfo& bucket_info,
   vcurrents.reserve(list_results.size());
   vends.reserve(list_results.size());
   vnames.reserve(list_results.size());
-  for (auto& iter : list_results) {
-    vcurrents.push_back(iter.second.dir.m.begin());
-    vends.push_back(iter.second.dir.m.end());
-    vnames.push_back(oids[iter.first]);
+  *is_truncated = false;
+  *cls_filtered = true;
+  for (auto& r : list_results) {
+    vcurrents.push_back(r.second.dir.m.begin());
+    vends.push_back(r.second.dir.m.end());
+    vnames.push_back(oids[r.first]);
+
+    // if any *one* shard's result is trucated, the entire result is
+    // truncated
+    *is_truncated = *is_truncated || r.second.is_truncated;
+
+    // unless *all* are shards are cls_filtered, the entire result is
+    // not filtered
+    *cls_filtered = *cls_filtered && r.second.cls_filtered;
   }
 
   // create a map to track the next candidate entry from each shard,
index 2508e46becec787d26fb4996c2f270fbc1c5ebe7..8213f20cacc8a8f1a3ec4784e20fbc715e056f4b 100644 (file)
@@ -1356,7 +1356,8 @@ public:
                              uint32_t num_entries,
                              bool list_versions,
                              ent_map_t& m,
-                             bool *is_truncated,
+                             bool* is_truncated,
+                             bool* cls_filtered,
                              rgw_obj_index_key *last_entry,
                               optional_yield y,
                              check_filter_t force_check_filter = nullptr);