rgw: let newer RGWs work with older OSDs re: cls-side filtering

author J. Eric Ivancich <ivancich@redhat.com>

Fri, 27 Sep 2019 16:05:17 +0000 (12:05 -0400)

committer J. Eric Ivancich <ivancich@redhat.com>

Fri, 24 Jan 2020 19:32:39 +0000 (14:32 -0500)
author J. Eric Ivancich <ivancich@redhat.com>
Fri, 27 Sep 2019 16:05:17 +0000 (12:05 -0400)
committer J. Eric Ivancich <ivancich@redhat.com>
Fri, 24 Jan 2020 19:32:39 +0000 (14:32 -0500)
diff --git a/src/cls/rgw/cls_rgw_ops.h b/src/cls/rgw/cls_rgw_ops.h

index 47388f4a97b9c98f651bef95b8f4747f5ee1b2ac..d752118b2fb19d0cd7b2f130298356d1d3bf8082 100644 (file)
--- a/src/cls/rgw/cls_rgw_ops.h
+++ b/src/cls/rgw/cls_rgw_ops.h
@@ -385,8 +385,8 @@ struct rgw_cls_list_op
    cls_rgw_obj_key start_obj;
    uint32_t num_entries;
    string filter_prefix;
-  string delimiter;
    bool list_versions;
+  string delimiter;
  
    rgw_cls_list_op() : num_entries(0), list_versions(false) {}
  
@@ -428,18 +428,27 @@ struct rgw_cls_list_ret {
    rgw_bucket_dir dir;
    bool is_truncated;
  
-  rgw_cls_list_ret() : is_truncated(false) {}
+  // cls_filtered is not transmitted; it is assumed true for versions
+  // on/after 3 and false for prior versions; this allows the rgw
+  // layer to know when an older osd (cls) does not do the filtering
+  bool cls_filtered;
+
+  rgw_cls_list_ret() :
+    is_truncated(false),
+    cls_filtered(true)
+  {}
  
    void encode(bufferlist &bl) const {
-    ENCODE_START(2, 2, bl);
+    ENCODE_START(3, 2, bl);
      encode(dir, bl);
      encode(is_truncated, bl);
      ENCODE_FINISH(bl);
    }
    void decode(bufferlist::const_iterator &bl) {
-    DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
+    DECODE_START_LEGACY_COMPAT_LEN(3, 2, 2, bl);
      decode(dir, bl);
      decode(is_truncated, bl);
+    cls_filtered = struct_v >= 3;
      DECODE_FINISH(bl);
    }
    void dump(Formatter *f) const;
diff --git a/src/rgw/rgw_admin.cc b/src/rgw/rgw_admin.cc

index 7035326c64e59830fee2ed949a41fe707553f093..ff5b4ab57afb9ce57bf3957f7ce670d9a3e23430 100644 (file)
--- a/src/rgw/rgw_admin.cc
+++ b/src/rgw/rgw_admin.cc
@@ -6245,6 +6245,7 @@ next:
      }
  
      bool is_truncated = true;
+    bool cls_filtered = true;
  
      rgw_obj_index_key marker;
      string empty_prefix;
@@ -6259,7 +6260,7 @@ next:
         bucket_info, RGW_NO_SHARD,
         marker, empty_prefix, empty_delimiter,
         1000, true,
-       result, &is_truncated, &marker,
+       result, &is_truncated, &cls_filtered, &marker,
         null_yield,
         rgw_bucket_object_check_filter);
        if (r < 0 && r != -ENOENT) {
diff --git a/src/rgw/rgw_bucket.cc b/src/rgw/rgw_bucket.cc

index 800efadcd4c96d0dbea6fdff8043483cedfa8395..77cf9de5740a1b8b2c27d9ac5ee78cbe8a01382c 100644 (file)
--- a/src/rgw/rgw_bucket.cc
+++ b/src/rgw/rgw_bucket.cc
@@ -1007,6 +1007,7 @@ int RGWBucket::check_object_index(RGWBucketAdminOpState& op_state,
    string empty_delimiter;
    rgw_obj_index_key marker;
    bool is_truncated = true;
+  bool cls_filtered = true;
  
    Formatter *formatter = flusher.get_formatter();
    formatter->open_object_section("objects");
@@ -1016,8 +1017,8 @@ int RGWBucket::check_object_index(RGWBucketAdminOpState& op_state,
  
      int r = store->getRados()->cls_bucket_list_ordered(
        bucket_info, RGW_NO_SHARD, marker, prefix, empty_delimiter,
-      listing_max_entries, true, result, &is_truncated, &marker, y,
-      rgw_bucket_object_check_filter);
+      listing_max_entries, true, result, &is_truncated, &cls_filtered,
+      &marker, y, rgw_bucket_object_check_filter);
      if (r == -ENOENT) {
        break;
      } else if (r < 0 && r != -ENOENT) {
diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc

index 5c09d3bdbaa0a72c35fca264263821b7014b6c15..0d6b0d0fced88ef386d7f888d8a4bef7d25046f7 100644 (file)
--- a/src/rgw/rgw_rados.cc
+++ b/src/rgw/rgw_rados.cc
@@ -1737,6 +1737,7 @@ int RGWRados::Bucket::List::list_objects_ordered(
  
    int count = 0;
    bool truncated = true;
+  bool cls_filtered = false;
    const int64_t max = // protect against memory issues and negative vals
      std::min(bucket_list_objects_absolute_max, std::max(int64_t(0), max_p));
    int read_ahead = std::max(cct->_conf->rgw_list_bucket_min_readahead, max);
@@ -1789,6 +1790,7 @@ int RGWRados::Bucket::List::list_objects_ordered(
                                            params.list_versions,
                                            ent_map,
                                            &truncated,
+                                          &cls_filtered,
                                            &cur_marker,
                                             y);
      if (r < 0) {
@@ -1851,26 +1853,61 @@ int RGWRados::Bucket::List::list_objects_ordered(
        }
  
        if (!params.delim.empty()) {
-        int delim_pos = obj.name.find(params.delim, params.prefix.size());
-
-        if (delim_pos >= 0) {
-         // should only find one delimiter at the end if it finds any
-         // after the prefix
-         ceph_assert(delim_pos ==
-                     int(obj.name.length() - params.delim.length()));
-          if (common_prefixes) {
-            if (count >= max) {
-              truncated = true;
-              goto done;
-            }
-
-            (*common_prefixes)[obj.name] = true;
-            count++;
-          }
-
-          continue;
-        } // if found delimiter after prefix
-      } // if there is a delimiter
+       const int delim_pos = obj.name.find(params.delim, params.prefix.size());
+       if (delim_pos >= 0) {
+         // run either the code where delimiter filtering is done a)
+         // in the OSD/CLS or b) here.
+         if (cls_filtered) {
+           // NOTE: this condition is for the newer versions of the
+           // OSD that does filtering on the CLS side
+
+           // should only find one delimiter at the end if it finds any
+           // after the prefix
+           if (delim_pos !=
+               int(obj.name.length() - params.delim.length())) {
+             ldout(cct, 0) <<
+               "WARNING: found delimiter in place other than the end of "
+               "the prefix; obj.name=" << obj.name <<
+               ", prefix=" << params.prefix << dendl;
+           }
+           if (common_prefixes) {
+             if (count >= max) {
+               truncated = true;
+               goto done;
+             }
+
+             (*common_prefixes)[obj.name] = true;
+             count++;
+           }
+
+           continue;
+         } else {
+           // NOTE: this condition is for older versions of the OSD
+           // that do not filter on the CLS side, so the following code
+           // must do the filtering; once we reach version 16 of ceph,
+           // this code can be removed along with the conditional that
+           // can lead this way
+
+           /* extract key -with trailing delimiter- for CommonPrefix */
+           string prefix_key =
+             obj.name.substr(0, delim_pos + params.delim.length());
+
+           if (common_prefixes &&
+               common_prefixes->find(prefix_key) == common_prefixes->end()) {
+             if (count >= max) {
+               truncated = true;
+               goto done;
+             }
+             next_marker = prefix_key;
+             (*common_prefixes)[prefix_key] = true;
+
+             count++;
+           }
+
+           continue;
+         } // if we're running an older OSD version
+       } // if a delimiter was found after prefix
+      } // if a delimiter was passed in
  
        if (count >= max) {
          truncated = true;
@@ -1881,6 +1918,30 @@ int RGWRados::Bucket::List::list_objects_ordered(
        count++;
      } // eiter for loop
  
+    // NOTE: the following conditional is needed by older versions of
+    // the OSD that don't do delimiter filtering on the CLS side; once
+    // we reach version 16 of ceph, the following conditional and the
+    // code within can be removed
+    if (!cls_filtered && !params.delim.empty()) {
+      int marker_delim_pos =
+       cur_marker.name.find(params.delim, cur_prefix.size());
+      if (marker_delim_pos >= 0) {
+       std::string skip_after_delim =
+         cur_marker.name.substr(0, marker_delim_pos);
+        skip_after_delim.append(after_delim_s);
+
+        ldout(cct, 20) << "skip_after_delim=" << skip_after_delim << dendl;
+
+        if (skip_after_delim > cur_marker.name) {
+          cur_marker = skip_after_delim;
+          ldout(cct, 20) << "setting cur_marker="
+                         << cur_marker.name
+                         << "[" << cur_marker.instance << "]"
+                         << dendl;
+        }
+      }
+    } // if older osd didn't do delimiter filtering
+
      // if we finished listing, or if we're returning at least half the
      // requested entries, that's enough; S3 and swift protocols allow
      // returning fewer than max entries
@@ -8013,7 +8074,8 @@ int RGWRados::cls_bucket_list_ordered(RGWBucketInfo& bucket_info,
                                       uint32_t num_entries,
                                       bool list_versions,
                                       ent_map_t& m,
-                                     bool *is_truncated,
+                                     bool* is_truncated,
+                                     bool* cls_filtered,
                                       rgw_obj_index_key *last_entry,
                                        optional_yield y,
                                       check_filter_t force_check_filter)
@@ -8059,10 +8121,20 @@ int RGWRados::cls_bucket_list_ordered(RGWBucketInfo& bucket_info,
    vcurrents.reserve(list_results.size());
    vends.reserve(list_results.size());
    vnames.reserve(list_results.size());
-  for (auto& iter : list_results) {
-    vcurrents.push_back(iter.second.dir.m.begin());
-    vends.push_back(iter.second.dir.m.end());
-    vnames.push_back(oids[iter.first]);
+  *is_truncated = false;
+  *cls_filtered = true;
+  for (auto& r : list_results) {
+    vcurrents.push_back(r.second.dir.m.begin());
+    vends.push_back(r.second.dir.m.end());
+    vnames.push_back(oids[r.first]);
+
+    // if any *one* shard's result is trucated, the entire result is
+    // truncated
+    *is_truncated = *is_truncated || r.second.is_truncated;
+
+    // unless *all* are shards are cls_filtered, the entire result is
+    // not filtered
+    *cls_filtered = *cls_filtered && r.second.cls_filtered;
    }
  
    // create a map to track the next candidate entry from each shard,
diff --git a/src/rgw/rgw_rados.h b/src/rgw/rgw_rados.h

index 2508e46becec787d26fb4996c2f270fbc1c5ebe7..8213f20cacc8a8f1a3ec4784e20fbc715e056f4b 100644 (file)
--- a/src/rgw/rgw_rados.h
+++ b/src/rgw/rgw_rados.h
@@ -1356,7 +1356,8 @@ public:
                               uint32_t num_entries,
                               bool list_versions,
                               ent_map_t& m,
-                             bool *is_truncated,
+                             bool* is_truncated,
+                             bool* cls_filtered,
                               rgw_obj_index_key *last_entry,
                                optional_yield y,
                               check_filter_t force_check_filter = nullptr);
author	J. Eric Ivancich <ivancich@redhat.com>
	Fri, 27 Sep 2019 16:05:17 +0000 (12:05 -0400)
committer	J. Eric Ivancich <ivancich@redhat.com>
	Fri, 24 Jan 2020 19:32:39 +0000 (14:32 -0500)
src/cls/rgw/cls_rgw_ops.h		patch \| blob \| history
src/rgw/rgw_admin.cc		patch \| blob \| history
src/rgw/rgw_bucket.cc		patch \| blob \| history
src/rgw/rgw_rados.cc		patch \| blob \| history
src/rgw/rgw_rados.h		patch \| blob \| history