]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
rgw_file: return common_prefixes in lexical order 38369/head
authorMatt Benjamin <mbenjamin@redhat.com>
Tue, 1 Dec 2020 12:53:56 +0000 (07:53 -0500)
committerMatt Benjamin <mbenjamin@redhat.com>
Tue, 1 Dec 2020 13:30:04 +0000 (08:30 -0500)
Since inception RGWReaddirRequest has sent all leaf objects first
(in lexical order), then common_prefixes (in lexical order). In
hindsight, an overall listing could trivially be returned out of
lexical order, which can cause continued listing of large, mixed
directories to fail.

RCA by Dan Gryniewicz.

Fixes: https://tracker.ceph.com/issues/48410
Signed-off-by: Matt Benjamin <mbenjamin@redhat.com>
src/rgw/rgw_file.h

index 3dde51a9bc7b72b570d05cde75ff47a0d0b224c9..50a8c4fec1f133865e5bd09104bb09078564b150 100644 (file)
@@ -1553,94 +1553,191 @@ public:
 
   void send_response() override {
     struct req_state* state = get_state();
-    for (const auto& iter : objs) {
+    auto cnow = real_clock::now();
 
-      std::string_view sref {iter.key.name};
+    /* enumerate objs and common_prefixes in parallel,
+     * avoiding increment on and end iterator, which is
+     * undefined */
 
-      lsubdout(cct, rgw, 15) << "readdir objects prefix: " << prefix
-                            << " obj: " << sref << dendl;
+    class DirIterator
+    {
+      vector<rgw_bucket_dir_entry>& objs;
+      vector<rgw_bucket_dir_entry>::iterator obj_iter;
 
-      size_t last_del = sref.find_last_of('/');
-      if (last_del != string::npos)
-       sref.remove_prefix(last_del+1);
+      map<string, bool>& common_prefixes;
+      map<string, bool>::iterator cp_iter;
 
-      /* leaf directory? */
-      if (sref.empty())
-       continue;
+      boost::optional<std::string_view> obj_sref;
+      boost::optional<std::string_view> cp_sref;
+      bool _skip_cp;
 
-      lsubdout(cct, rgw, 15) << "RGWReaddirRequest "
-                            << __func__ << " "
-                            << "list uri=" << state->relative_uri << " "
-                            << " prefix=" << prefix << " "
-                            << " obj path=" << iter.key.name
-                            << " (" << sref << ")" << ""
-                            << " mtime="
-                            << real_clock::to_time_t(iter.meta.mtime)
-                            << " size=" << iter.meta.accounted_size
-                            << dendl;
+    public:
 
-      if (! this->operator()(sref, next_marker, iter.meta.mtime,
-                            iter.meta.accounted_size, RGW_FS_TYPE_FILE)) {
-       /* caller cannot accept more */
-       lsubdout(cct, rgw, 5) << "readdir rcb failed"
-                             << " dirent=" << sref.data()
-                             << " call count=" << ix
-                             << dendl;
-       rcb_eof = true;
-       return;
+      DirIterator(vector<rgw_bucket_dir_entry>& objs,
+                 map<string, bool>& common_prefixes)
+       : objs(objs), common_prefixes(common_prefixes), _skip_cp(false)
+       {
+         obj_iter = objs.begin();
+         parse_obj();
+         cp_iter = common_prefixes.begin();
+         parse_cp();
+       }
+
+      bool is_obj() {
+       return (obj_iter != objs.end());
       }
-      ++ix;
-    }
 
-    auto cnow = real_clock::now();
-    for (auto& iter : common_prefixes) {
+      bool is_cp(){
+       return (cp_iter != common_prefixes.end());
+      }
 
-      lsubdout(cct, rgw, 15) << "readdir common prefixes prefix: " << prefix
-                            << " iter first: " << iter.first
-                            << " iter second: " << iter.second
-                            << dendl;
+      bool eof() {
+       return ((!is_obj()) && (!is_cp()));
+      }
 
-      /* XXX aieee--I have seen this case! */
-      if (iter.first == "/")
-       continue;
+      void parse_obj() {
+       if (is_obj()) {
+         std::string_view sref{obj_iter->key.name};
+         size_t last_del = sref.find_last_of('/');
+         if (last_del != string::npos)
+           sref.remove_prefix(last_del+1);
+         obj_sref = sref;
+       }
+      } /* parse_obj */
 
-      /* it's safest to modify the element in place--a suffix-modifying
-       * string_ref operation is problematic since ULP rgw_file callers
-       * will ultimately need a c-string */
-      if (iter.first.back() == '/')
-       const_cast<std::string&>(iter.first).pop_back();
+      void next_obj() {
+       ++obj_iter;
+       parse_obj();
+      }
 
-      std::string_view sref{iter.first};
+      void parse_cp() {
+       if (is_cp()) {
+         /* leading-/ skip case */
+         if (cp_iter->first == "/") {
+           _skip_cp = true;
+           return;
+         } else
+           _skip_cp = false;
+
+         /* it's safest to modify the element in place--a suffix-modifying
+          * string_ref operation is problematic since ULP rgw_file callers
+          * will ultimately need a c-string */
+         if (cp_iter->first.back() == '/')
+           const_cast<std::string&>(cp_iter->first).pop_back();
+
+         std::string_view sref{cp_iter->first};
+         size_t last_del = sref.find_last_of('/');
+         if (last_del != string::npos)
+           sref.remove_prefix(last_del+1);
+         cp_sref = sref;
+       } /* is_cp */
+      } /* parse_cp */
+
+      void next_cp() {
+       ++cp_iter;
+       parse_cp();
+      }
 
-      size_t last_del = sref.find_last_of('/');
-      if (last_del != string::npos)
-       sref.remove_prefix(last_del+1);
+      bool skip_cp() {
+       return _skip_cp;
+      }
 
-      lsubdout(cct, rgw, 15) << "RGWReaddirRequest "
-                            << __func__ << " "
-                            << "list uri=" << state->relative_uri << " "
-                            << " prefix=" << prefix << " "
-                            << " cpref=" << sref
-                            << dendl;
+      bool entry_is_obj() {
+       return (is_obj() &&
+               ((! is_cp()) ||
+                (obj_sref.get() < cp_sref.get())));
+      }
 
-      if (sref.empty()) {
-       /* null path segment--could be created in S3 but has no NFS
-        * interpretation */
-       return;
+      std::string_view get_obj_sref() {
+       return obj_sref.get();
       }
 
-      if (! this->operator()(sref, next_marker, cnow, 0,
-                            RGW_FS_TYPE_DIRECTORY)) {
-       /* caller cannot accept more */
-       lsubdout(cct, rgw, 5) << "readdir rcb failed"
-                             << " dirent=" << sref.data()
-                             << " call count=" << ix
-                             << dendl;
-       rcb_eof = true;
-       return;
+      std::string_view get_cp_sref() {
+       return cp_sref.get();
       }
-      ++ix;
-    }
+
+      vector<rgw_bucket_dir_entry>::iterator& get_obj_iter() {
+       return obj_iter;
+      }
+
+      map<string, bool>::iterator& get_cp_iter() {
+       return cp_iter;
+      }
+
+    }; /* DirIterator */
+
+    DirIterator di{objs, common_prefixes};
+
+    for (;;) {
+
+      if (di.eof()) {
+       break; // done
+      }
+
+      /* assert: one of is_obj() || is_cp() holds */
+      if (di.entry_is_obj()) {
+       auto sref = di.get_obj_sref();
+       if (sref.empty()) {
+         /* recursive list of a leaf dir (iirc), do nothing */
+       } else {
+         /* send a file entry */
+         auto obj_entry = *(di.get_obj_iter());
+
+         lsubdout(cct, rgw, 15) << "RGWReaddirRequest "
+                                << __func__ << " "
+                                << "list uri=" << state->relative_uri << " "
+                                << " prefix=" << prefix << " "
+                                << " obj path=" << obj_entry.key.name
+                                << " (" << sref << ")" << ""
+                                << " mtime="
+                                << real_clock::to_time_t(obj_entry.meta.mtime)
+                                << " size=" << obj_entry.meta.accounted_size
+                                << dendl;
+
+         if (! this->operator()(sref, next_marker, obj_entry.meta.mtime,
+                                obj_entry.meta.accounted_size,
+                                RGW_FS_TYPE_FILE)) {
+           /* caller cannot accept more */
+           lsubdout(cct, rgw, 5) << "readdir rcb caller signalled stop"
+                                 << " dirent=" << sref.data()
+                                 << " call count=" << ix
+                                 << dendl;
+           rcb_eof = true;
+           return;
+         }
+       }
+       di.next_obj(); // and advance object
+      } else {
+       /* send a dir entry */
+       if (! di.skip_cp()) {
+         auto sref = di.get_cp_sref();
+
+         lsubdout(cct, rgw, 15) << "RGWReaddirRequest "
+                                << __func__ << " "
+                                << "list uri=" << state->relative_uri << " "
+                                << " prefix=" << prefix << " "
+                                << " cpref=" << sref
+                                << dendl;
+
+         if (sref.empty()) {
+           /* null path segment--could be created in S3 but has no NFS
+            * interpretation */
+         } else {
+           if (! this->operator()(sref, next_marker, cnow, 0,
+                                  RGW_FS_TYPE_DIRECTORY)) {
+             /* caller cannot accept more */
+             lsubdout(cct, rgw, 5) << "readdir rcb caller signalled stop"
+                                   << " dirent=" << sref.data()
+                                   << " call count=" << ix
+                                   << dendl;
+             rcb_eof = true;
+             return;
+           }
+         }
+       }
+       di.next_cp(); // and advance common_prefixes
+      } /* ! di.entry_is_obj() */
+    } /* for (;;) */
   }
 
   virtual void send_versioned_response() {