]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
rgw_file: return common_prefixes in lexical order 38829/head
authorMatt Benjamin <mbenjamin@redhat.com>
Fri, 8 Jan 2021 18:23:56 +0000 (13:23 -0500)
committerNathan Cutler <ncutler@suse.com>
Fri, 8 Jan 2021 18:31:28 +0000 (19:31 +0100)
Since inception RGWReaddirRequest has sent all leaf objects first
(in lexical order), then common_prefixes (in lexical order). In
hindsight, an overall listing could trivially be returned out of
lexical order, which can cause continued listing of large, mixed
directories to fail.

RCA by Dan Gryniewicz.

Fixes: https://tracker.ceph.com/issues/48410
Signed-off-by: Matt Benjamin <mbenjamin@redhat.com>
(cherry picked from commit e561e98e5cca2678854e01c990f95e474022b7ed)

Conflicts:
        src/rgw/rgw_file.h

src/rgw/rgw_file.h

index 8d9fff4609b772406a4296cfdd20c32480f7078b..32449721ea1f6af8e1a12714040fd14b12a27651 100644 (file)
@@ -1560,94 +1560,191 @@ public:
 
   void send_response() override {
     struct req_state* s = get_state();
-    for (const auto& iter : objs) {
+    auto cnow = real_clock::now();
 
-      boost::string_ref sref {iter.key.name};
+    /* enumerate objs and common_prefixes in parallel,
+     * avoiding increment on and end iterator, which is
+     * undefined */
 
-      lsubdout(cct, rgw, 15) << "readdir objects prefix: " << prefix
-                            << " obj: " << sref << dendl;
+    class DirIterator
+    {
+      vector<rgw_bucket_dir_entry>& objs;
+      vector<rgw_bucket_dir_entry>::iterator obj_iter;
 
-      size_t last_del = sref.find_last_of('/');
-      if (last_del != string::npos)
-       sref.remove_prefix(last_del+1);
+      map<string, bool>& common_prefixes;
+      map<string, bool>::iterator cp_iter;
 
-      /* leaf directory? */
-      if (sref.empty())
-       continue;
+      boost::optional<boost::string_ref> obj_sref;
+      boost::optional<boost::string_ref> cp_sref;
+      bool _skip_cp;
 
-      lsubdout(cct, rgw, 15) << "RGWReaddirRequest "
-                            << __func__ << " "
-                            << "list uri=" << s->relative_uri << " "
-                            << " prefix=" << prefix << " "
-                            << " obj path=" << iter.key.name
-                            << " (" << sref << ")" << ""
-                            << " mtime="
-                            << real_clock::to_time_t(iter.meta.mtime)
-                            << " size=" << iter.meta.accounted_size
-                            << dendl;
+    public:
 
-      if (! this->operator()(sref, next_marker, iter.meta.mtime,
-                            iter.meta.accounted_size, RGW_FS_TYPE_FILE)) {
-       /* caller cannot accept more */
-       lsubdout(cct, rgw, 5) << "readdir rcb failed"
-                             << " dirent=" << sref.data()
-                             << " call count=" << ix
-                             << dendl;
-       rcb_eof = true;
-       return;
+      DirIterator(vector<rgw_bucket_dir_entry>& objs,
+                 map<string, bool>& common_prefixes)
+       : objs(objs), common_prefixes(common_prefixes), _skip_cp(false)
+       {
+         obj_iter = objs.begin();
+         parse_obj();
+         cp_iter = common_prefixes.begin();
+         parse_cp();
+       }
+
+      bool is_obj() {
+       return (obj_iter != objs.end());
       }
-      ++ix;
-    }
 
-    auto cnow = real_clock::now();
-    for (auto& iter : common_prefixes) {
+      bool is_cp(){
+       return (cp_iter != common_prefixes.end());
+      }
 
-      lsubdout(cct, rgw, 15) << "readdir common prefixes prefix: " << prefix
-                            << " iter first: " << iter.first
-                            << " iter second: " << iter.second
-                            << dendl;
+      bool eof() {
+       return ((!is_obj()) && (!is_cp()));
+      }
 
-      /* XXX aieee--I have seen this case! */
-      if (iter.first == "/")
-       continue;
+      void parse_obj() {
+       if (is_obj()) {
+         boost::string_ref sref{obj_iter->key.name};
+         size_t last_del = sref.find_last_of('/');
+         if (last_del != string::npos)
+           sref.remove_prefix(last_del+1);
+         obj_sref = sref;
+       }
+      } /* parse_obj */
 
-      /* it's safest to modify the element in place--a suffix-modifying
-       * string_ref operation is problematic since ULP rgw_file callers
-       * will ultimately need a c-string */
-      if (iter.first.back() == '/')
-       const_cast<std::string&>(iter.first).pop_back();
+      void next_obj() {
+       ++obj_iter;
+       parse_obj();
+      }
 
-      boost::string_ref sref{iter.first};
+      void parse_cp() {
+       if (is_cp()) {
+         /* leading-/ skip case */
+         if (cp_iter->first == "/") {
+           _skip_cp = true;
+           return;
+         } else
+           _skip_cp = false;
+
+         /* it's safest to modify the element in place--a suffix-modifying
+          * string_ref operation is problematic since ULP rgw_file callers
+          * will ultimately need a c-string */
+         if (cp_iter->first.back() == '/')
+           const_cast<std::string&>(cp_iter->first).pop_back();
+
+         boost::string_ref sref{cp_iter->first};
+         size_t last_del = sref.find_last_of('/');
+         if (last_del != string::npos)
+           sref.remove_prefix(last_del+1);
+         cp_sref = sref;
+       } /* is_cp */
+      } /* parse_cp */
+
+      void next_cp() {
+       ++cp_iter;
+       parse_cp();
+      }
 
-      size_t last_del = sref.find_last_of('/');
-      if (last_del != string::npos)
-       sref.remove_prefix(last_del+1);
+      bool skip_cp() {
+       return _skip_cp;
+      }
 
-      lsubdout(cct, rgw, 15) << "RGWReaddirRequest "
-                            << __func__ << " "
-                            << "list uri=" << s->relative_uri << " "
-                            << " prefix=" << prefix << " "
-                            << " cpref=" << sref
-                            << dendl;
+      bool entry_is_obj() {
+       return (is_obj() &&
+               ((! is_cp()) ||
+                (obj_sref.get() < cp_sref.get())));
+      }
 
-      if (sref.empty()) {
-       /* null path segment--could be created in S3 but has no NFS
-        * interpretation */
-       return;
+      boost::string_ref get_obj_sref() {
+       return obj_sref.get();
       }
 
-      if (! this->operator()(sref, next_marker, cnow, 0,
-                            RGW_FS_TYPE_DIRECTORY)) {
-       /* caller cannot accept more */
-       lsubdout(cct, rgw, 5) << "readdir rcb failed"
-                             << " dirent=" << sref.data()
-                             << " call count=" << ix
-                             << dendl;
-       rcb_eof = true;
-       return;
+      boost::string_ref get_cp_sref() {
+       return cp_sref.get();
       }
-      ++ix;
-    }
+
+      vector<rgw_bucket_dir_entry>::iterator& get_obj_iter() {
+       return obj_iter;
+      }
+
+      map<string, bool>::iterator& get_cp_iter() {
+       return cp_iter;
+      }
+
+    }; /* DirIterator */
+
+    DirIterator di{objs, common_prefixes};
+
+    for (;;) {
+
+      if (di.eof()) {
+       break; // done
+      }
+
+      /* assert: one of is_obj() || is_cp() holds */
+      if (di.entry_is_obj()) {
+       auto sref = di.get_obj_sref();
+       if (sref.empty()) {
+         /* recursive list of a leaf dir (iirc), do nothing */
+       } else {
+         /* send a file entry */
+         auto obj_entry = *(di.get_obj_iter());
+
+         lsubdout(cct, rgw, 15) << "RGWReaddirRequest "
+                                << __func__ << " "
+                                << "list uri=" << s->relative_uri << " "
+                                << " prefix=" << prefix << " "
+                                << " obj path=" << obj_entry.key.name
+                                << " (" << sref << ")" << ""
+                                << " mtime="
+                                << real_clock::to_time_t(obj_entry.meta.mtime)
+                                << " size=" << obj_entry.meta.accounted_size
+                                << dendl;
+
+         if (! this->operator()(sref, next_marker, obj_entry.meta.mtime,
+                                obj_entry.meta.accounted_size,
+                                RGW_FS_TYPE_FILE)) {
+           /* caller cannot accept more */
+           lsubdout(cct, rgw, 5) << "readdir rcb caller signalled stop"
+                                 << " dirent=" << sref.data()
+                                 << " call count=" << ix
+                                 << dendl;
+           rcb_eof = true;
+           return;
+         }
+       }
+       di.next_obj(); // and advance object
+      } else {
+       /* send a dir entry */
+       if (! di.skip_cp()) {
+         auto sref = di.get_cp_sref();
+
+         lsubdout(cct, rgw, 15) << "RGWReaddirRequest "
+                                << __func__ << " "
+                                << "list uri=" << s->relative_uri << " "
+                                << " prefix=" << prefix << " "
+                                << " cpref=" << sref
+                                << dendl;
+
+         if (sref.empty()) {
+           /* null path segment--could be created in S3 but has no NFS
+            * interpretation */
+         } else {
+           if (! this->operator()(sref, next_marker, cnow, 0,
+                                  RGW_FS_TYPE_DIRECTORY)) {
+             /* caller cannot accept more */
+             lsubdout(cct, rgw, 5) << "readdir rcb caller signalled stop"
+                                   << " dirent=" << sref.data()
+                                   << " call count=" << ix
+                                   << dendl;
+             rcb_eof = true;
+             return;
+           }
+         }
+       }
+       di.next_cp(); // and advance common_prefixes
+      } /* ! di.entry_is_obj() */
+    } /* for (;;) */
   }
 
   virtual void send_versioned_response() {