]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mds: change scrub traverse from post-order to breadth-first search
authorSimon Gao <simon29rock@gmail.com>
Fri, 15 May 2020 09:19:14 +0000 (17:19 +0800)
committerYan, Zheng <zyan@redhat.com>
Mon, 16 Nov 2020 01:02:17 +0000 (09:02 +0800)
After using breadth-first search, scrubing a dir inode does not need
to wait until all of its descendant difrags/inodes are scrubbed. This
simplfies scrub code a lot. The downside is that a scrubbed dir inode
no longer implies corresponding subtree has been fully scrubbed. It
makes later scrub (without force option) less efficient.

Signed-off-by: Simon Gao <simon29rock@gmail.com>
Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
src/mds/CDir.cc
src/mds/CDir.h
src/mds/CInode.cc
src/mds/CInode.h
src/mds/ScrubStack.cc
src/mds/ScrubStack.h

index cab73b2352ec3e4210bbcf5884d557978c556dfb..5073c6b4ea373057a0ab481b7d4e2587d57fc346 100644 (file)
@@ -2049,11 +2049,6 @@ void CDir::_omap_fetched(bufferlist& hdrbl, map<string, bufferlist>& omap,
     wanted_items.clear();
     mark_complete();
     state_clear(STATE_FETCHING);
-
-    if (scrub_infop && scrub_infop->need_scrub_local) {
-      scrub_infop->need_scrub_local = false;
-      scrub_local();
-    }
   }
 
   // open & force frags
@@ -3469,10 +3464,8 @@ void CDir::scrub_info_create() const
 
   std::unique_ptr<scrub_info_t> si(new scrub_info_t());
 
-  si->last_recursive.version = si->recursive_start.version =
-      pf->recursive_scrub_version;
-  si->last_recursive.time = si->recursive_start.time =
-      pf->recursive_scrub_stamp;
+  si->last_recursive.version = pf->recursive_scrub_version;
+  si->last_recursive.time = pf->recursive_scrub_stamp;
 
   si->last_local.version = pf->localized_scrub_version;
   si->last_local.time = pf->localized_scrub_stamp;
@@ -3488,227 +3481,55 @@ void CDir::scrub_initialize(const ScrubHeaderRef& header, MDSContext* f)
   scrub_info();
   scrub_infop->header = header;
   scrub_infop->on_finish = f;
-}
-
-void CDir::scrub_initialize_data()
-{
-  dout(20) << __func__ << dendl;
-  ceph_assert(is_complete());
-
-  // FIXME: weird implicit construction, is someone else meant
-  // to be calling scrub_info_create first?
-  scrub_info();
-  ceph_assert(scrub_infop && !scrub_infop->directory_scrubbing);
-
-  scrub_infop->recursive_start.version = get_projected_version();
-  scrub_infop->recursive_start.time = ceph_clock_now();
-
-  scrub_infop->directories_to_scrub.clear();
-  scrub_infop->directories_scrubbing.clear();
-  scrub_infop->directories_scrubbed.clear();
-  scrub_infop->others_to_scrub.clear();
-  scrub_infop->others_scrubbing.clear();
-  scrub_infop->others_scrubbed.clear();
-
-  for (auto i = items.begin();
-      i != items.end();
-      ++i) {
-    // TODO: handle snapshot scrubbing
-    if (i->first.snapid != CEPH_NOSNAP)
-      continue;
-
-    CDentry::linkage_t *dnl = i->second->get_projected_linkage();
-    if (dnl->is_primary()) {
-      if (dnl->get_inode()->is_dir())
-       scrub_infop->directories_to_scrub.insert(i->first);
-      else
-       scrub_infop->others_to_scrub.insert(i->first);
-    } else if (dnl->is_remote()) {
-      // TODO: check remote linkage
-    }
-  }
   scrub_infop->directory_scrubbing = true;
 }
 
 void CDir::scrub_aborted(MDSContext **c) {
   dout(20) << __func__ << dendl;
-  ceph_assert(scrub_infop && scrub_infop->directory_scrubbing);
+  ceph_assert(scrub_is_in_progress());
 
   *c = scrub_infop->on_finish;
   scrub_infop->on_finish = nullptr;
 
   scrub_infop->directory_scrubbing = false;
-  scrub_infop->need_scrub_local = false;
   scrub_infop->last_scrub_dirty = false;
-  scrub_infop->pending_scrub_error = false;
-  scrub_infop->dirty_scrub_stamps.clear();
   scrub_infop.reset();
 }
 
 void CDir::scrub_finished(MDSContext **c)
 {
   dout(20) << __func__ << dendl;
-  ceph_assert(scrub_infop && scrub_infop->directory_scrubbing);
-
-  ceph_assert(scrub_infop->directories_to_scrub.empty());
-  ceph_assert(scrub_infop->directories_scrubbing.empty());
-  scrub_infop->directories_scrubbed.clear();
-  ceph_assert(scrub_infop->others_to_scrub.empty());
-  ceph_assert(scrub_infop->others_scrubbing.empty());
-  scrub_infop->others_scrubbed.clear();
+  ceph_assert(scrub_is_in_progress());
   scrub_infop->directory_scrubbing = false;
 
-  scrub_infop->last_recursive = scrub_infop->recursive_start;
+  scrub_infop->last_local.time = ceph_clock_now();
+  scrub_infop->last_local.version = get_version();
+  if (scrub_infop->header->get_recursive())
+    scrub_infop->last_recursive = scrub_infop->last_local;
+
   scrub_infop->last_scrub_dirty = true;
 
   *c = scrub_infop->on_finish;
   scrub_infop->on_finish = nullptr;
 }
 
-int CDir::_next_dentry_on_set(dentry_key_set &dns, bool missing_okay,
-                              MDSContext *cb, CDentry **dnout)
-{
-  dentry_key_t dnkey;
-  CDentry *dn;
-
-  while (!dns.empty()) {
-    set<dentry_key_t>::iterator front = dns.begin();
-    dnkey = *front;
-    dn = lookup(dnkey.name);
-    if (!dn) {
-      if (!is_complete() &&
-          (!has_bloom() || is_in_bloom(dnkey.name))) {
-        // need to re-read this dirfrag
-        fetch(cb);
-        return EAGAIN;
-      }
-      // okay, we lost it
-      if (missing_okay) {
-       dout(15) << " we no longer have directory dentry "
-                << dnkey.name << ", assuming it got renamed" << dendl;
-       dns.erase(dnkey);
-       continue;
-      } else {
-       dout(5) << " we lost dentry " << dnkey.name
-               << ", bailing out because that's impossible!" << dendl;
-       ceph_abort();
-      }
-    }
-    // okay, we got a  dentry
-    dns.erase(dnkey);
-
-    if (dn->get_projected_version() < scrub_infop->last_recursive.version &&
-       !(scrub_infop->header->get_force())) {
-      dout(15) << " skip dentry " << dnkey.name
-              << ", no change since last scrub" << dendl;
-      continue;
-    }
-
-    if (!dn->get_linkage()->is_primary()) {
-      dout(15) << " skip dentry " << dnkey.name
-              << ", no longer primary" << dendl;
-      continue;
-    }
-
-    *dnout = dn;
-    return 0;
-  }
-  *dnout = NULL;
-  return ENOENT;
-}
-
-int CDir::scrub_dentry_next(MDSContext *cb, CDentry **dnout)
-{
-  dout(20) << __func__ << dendl;
-  ceph_assert(scrub_infop && scrub_infop->directory_scrubbing);
-
-  dout(20) << "trying to scrub directories underneath us" << dendl;
-  int rval = _next_dentry_on_set(scrub_infop->directories_to_scrub, true,
-                                 cb, dnout);
-  if (rval == 0) {
-    dout(20) << __func__ << " inserted to directories scrubbing: "
-      << *dnout << dendl;
-    scrub_infop->directories_scrubbing.insert((*dnout)->key());
-  } else if (rval == EAGAIN) {
-    // we don't need to do anything else
-  } else { // we emptied out the directory scrub set
-    ceph_assert(rval == ENOENT);
-    dout(20) << "no directories left, moving on to other kinds of dentries"
-             << dendl;
-    
-    rval = _next_dentry_on_set(scrub_infop->others_to_scrub, false, cb, dnout);
-    if (rval == 0) {
-      dout(20) << __func__ << " inserted to others scrubbing: "
-        << *dnout << dendl;
-      scrub_infop->others_scrubbing.insert((*dnout)->key());
-    }
-  }
-  dout(20) << " returning " << rval << " with dn=" << *dnout << dendl;
-  return rval;
-}
-
-std::vector<CDentry*> CDir::scrub_dentries_scrubbing()
-{
-  dout(20) << __func__ << dendl;
-  ceph_assert(scrub_infop && scrub_infop->directory_scrubbing);
-
-  std::vector<CDentry*> result;
-  for (auto& scrub_info : scrub_infop->directories_scrubbing) {
-    CDentry *d = lookup(scrub_info.name, scrub_info.snapid);
-    ceph_assert(d);
-    result.push_back(d);
-  }
-  for (auto& scrub_info : scrub_infop->others_scrubbing) {
-    CDentry *d = lookup(scrub_info.name, scrub_info.snapid);
-    ceph_assert(d);
-    result.push_back(d);
-  }
-  return result;
-}
-
-void CDir::scrub_dentry_finished(CDentry *dn)
-{
-  dout(20) << __func__ << " on dn " << *dn << dendl;
-  ceph_assert(scrub_infop && scrub_infop->directory_scrubbing);
-  dentry_key_t dn_key = dn->key();
-  if (scrub_infop->directories_scrubbing.erase(dn_key)) {
-    scrub_infop->directories_scrubbed.insert(dn_key);
-  } else {
-    ceph_assert(scrub_infop->others_scrubbing.count(dn_key));
-    scrub_infop->others_scrubbing.erase(dn_key);
-    scrub_infop->others_scrubbed.insert(dn_key);
-  }
-}
-
 void CDir::scrub_maybe_delete_info()
 {
   if (scrub_infop &&
       !scrub_infop->directory_scrubbing &&
-      !scrub_infop->need_scrub_local &&
-      !scrub_infop->last_scrub_dirty &&
-      !scrub_infop->pending_scrub_error &&
-      scrub_infop->dirty_scrub_stamps.empty()) {
+      !scrub_infop->last_scrub_dirty)
     scrub_infop.reset();
-  }
 }
 
 bool CDir::scrub_local()
 {
   ceph_assert(is_complete());
-  bool rval = check_rstats(true);
-
-  scrub_info();
-  if (rval) {
-    scrub_infop->last_local.time = ceph_clock_now();
-    scrub_infop->last_local.version = get_projected_version();
-    scrub_infop->pending_scrub_error = false;
-    scrub_infop->last_scrub_dirty = true;
-  } else {
-    scrub_infop->pending_scrub_error = true;
-    if (scrub_infop->header->get_repair())
-      mdcache->repair_dirfrag_stats(this);
+  bool good = check_rstats(true);
+  if (!good && scrub_infop->header->get_repair()) {
+    mdcache->repair_dirfrag_stats(this);
+    scrub_infop->header->set_repaired();
   }
-  return rval;
+  return good;
 }
 
 std::string CDir::get_path() const
index 1dc6e63d2d915af655e1c5afc585f149a00fee79..db9dee02801aec185f29ec3d306318aaf6363534 100644 (file)
@@ -90,41 +90,19 @@ public:
   public:
     MEMPOOL_CLASS_HELPERS();
     struct scrub_stamps {
-      version_t version;
+      version_t version = 0;
       utime_t time;
-      scrub_stamps() : version(0) {}
-      void operator=(const scrub_stamps &o) {
-        version = o.version;
-        time = o.time;
-      }
     };
 
-    scrub_info_t() :
-      directory_scrubbing(false),
-      need_scrub_local(false),
-      last_scrub_dirty(false),
-      pending_scrub_error(false) {}
-
-    /// inodes we contain with dirty scrub stamps
-    dentry_key_map dirty_scrub_stamps; // TODO: make use of this!
+    scrub_info_t() {}
 
     MDSContext *on_finish = nullptr;
-    scrub_stamps recursive_start; // when we last started a recursive scrub
+
     scrub_stamps last_recursive; // when we last finished a recursive scrub
     scrub_stamps last_local; // when we last did a local scrub
 
-    bool directory_scrubbing; /// safety check
-    bool need_scrub_local;
-    bool last_scrub_dirty; /// is scrub info dirty or is it flushed to fnode?
-    bool pending_scrub_error;
-
-    /// these are lists of children in each stage of scrubbing
-    dentry_key_set directories_to_scrub;
-    dentry_key_set directories_scrubbing;
-    dentry_key_set directories_scrubbed;
-    dentry_key_set others_to_scrub;
-    dentry_key_set others_scrubbing;
-    dentry_key_set others_scrubbed;
+    bool directory_scrubbing = false; /// safety check
+    bool last_scrub_dirty = false; /// is scrub info dirty or is it flushed to fnode?
 
     ScrubHeaderRef header;
   };
@@ -321,41 +299,15 @@ public:
    * @pre The CDir is marked complete.
    * @post It has set up its internal scrubbing state.
    */
-  void scrub_initialize(const ScrubHeaderRef& header,
-                       MDSContext* f);
-  void scrub_initialize_data();
+  void scrub_initialize(const ScrubHeaderRef& header, MDSContext* f);
   ScrubHeaderRef get_scrub_header() {
     return scrub_infop ? scrub_infop->header : nullptr;
   }
 
-  /**
-   * Get the next dentry to scrub. Gives you a CDentry* and its meaning. This
-   * function will give you all directory-representing dentries before any
-   * others.
-   * 0: success, you should scrub this CDentry right now
-   * EAGAIN: is currently fetching the next CDentry into memory for you.
-   *   It will activate your callback when done; try again when it does!
-   * ENOENT: there are no remaining dentries to scrub
-   * <0: There was an unexpected error
-   *
-   * @param cb An MDSContext which will be activated only if
-   *   we return EAGAIN via rcode, or else ignored
-   * @param dnout CDentry * which you should next scrub, or NULL
-   * @returns a value as described above
-   */
-  int scrub_dentry_next(MDSContext *cb, CDentry **dnout);
-  /**
-   * Get the currently scrubbing dentries. When returned, the passed-in
-   * list will be filled with all CDentry * which have been returned
-   * from scrub_dentry_next() but not sent back via scrub_dentry_finished().
-   */
-  std::vector<CDentry*> scrub_dentries_scrubbing();
-  /**
-   * Report to the CDir that a CDentry has been scrubbed. Call this
-   * for every CDentry returned from scrub_dentry_next().
-   * @param dn The CDentry which has been scrubbed.
-   */
-  void scrub_dentry_finished(CDentry *dn);
+  bool scrub_is_in_progress() const {
+    return (scrub_infop && scrub_infop->directory_scrubbing);
+  }
+
   /**
    * Call this once all CDentries have been scrubbed, according to
    * scrub_dentry_next's listing. It finalizes the scrub statistics.
@@ -371,9 +323,8 @@ public:
   bool scrub_local();
 
   const scrub_info_t *scrub_info() const {
-    if (!scrub_infop) {
+    if (!scrub_infop)
       scrub_info_create();
-    }
     return scrub_infop.get();
   }
 
@@ -806,12 +757,6 @@ private:
    * Delete the scrub_infop if it's not got any useful data.
    */
   void scrub_maybe_delete_info();
-  /**
-   * Check the given set (presumably one of those in scrub_info_t) for the
-   * next key to scrub and look it up (or fail!).
-   */
-  int _next_dentry_on_set(dentry_key_set &dns, bool missing_okay,
-                          MDSContext *cb, CDentry **dnout);
 
   void link_inode_work( CDentry *dn, CInode *in );
   void unlink_inode_work( CDentry *dn );
index 94790889e88cfa9726bd3011968af3116b2ec85c..63f6548b0ae9e941aac403a8b4538cd6c9490926 100644 (file)
@@ -306,17 +306,9 @@ ostream& operator<<(ostream& out, const CInode& in)
   return out;
 }
 
-ostream& operator<<(ostream& out, const CInode::scrub_stamp_info_t& si)
-{
-  out << "{scrub_start_version: " << si.scrub_start_version
-      << ", scrub_start_stamp: " << si.scrub_start_stamp
-      << ", last_scrub_version: " << si.last_scrub_version
-      << ", last_scrub_stamp: " << si.last_scrub_stamp;
-  return out;
-}
-
 CInode::CInode(MDCache *c, bool auth, snapid_t f, snapid_t l) :
-    mdcache(c), first(f), last(l), item_dirty(this),
+    mdcache(c), first(f), last(l),
+    item_dirty(this),
     item_caps(this),
     item_open_file(this),
     item_dirty_parent(this),
@@ -4819,32 +4811,10 @@ next:
     }
 
     bool check_dirfrag_rstats() {
-      MDSGatherBuilder gather(g_ceph_context);
-      frag_vec_t leaves;
-      in->dirfragtree.get_leaves(leaves);
-      for (const auto& leaf : leaves) {
-        CDir *dir = in->get_or_open_dirfrag(in->mdcache, leaf);
-       dir->scrub_info();
-       if (!dir->scrub_infop->header)
-         dir->scrub_infop->header = in->scrub_infop->header;
-        if (dir->is_complete()) {
-         dir->scrub_local();
-       } else {
-         dir->scrub_infop->need_scrub_local = true;
-         dir->fetch(gather.new_sub(), false);
-       }
-      }
-      if (gather.has_subs()) {
-        gather.set_finisher(get_internal_callback(DIRFRAGS));
-        gather.activate();
-        return false;
-      } else {
-        return immediate(DIRFRAGS, 0);
-      }
+      return immediate(DIRFRAGS, 0);
     }
 
     bool _dirfrags(int rval) {
-      int frags_errors = 0;
       // basic reporting setup
       results->raw_stats.checked = true;
       results->raw_stats.ondisk_read_retval = rval;
@@ -4865,18 +4835,6 @@ next:
        ceph_assert(dir->get_version() > 0);
        nest_info.add(dir->get_fnode()->accounted_rstat);
        dir_info.add(dir->get_fnode()->accounted_fragstat);
-       if (dir->scrub_infop->pending_scrub_error) {
-         dir->scrub_infop->pending_scrub_error = false;
-         if (dir->scrub_infop->header->get_repair()) {
-            results->raw_stats.repaired = true;
-           results->raw_stats.error_str
-             << "dirfrag(" << p.first << ") has bad stats (will be fixed); ";
-         } else {
-           results->raw_stats.error_str
-             << "dirfrag(" << p.first << ") has bad stats; ";
-         }
-         frags_errors++;
-       }
       }
       nest_info.rsubdirs++; // it gets one to account for self
       if (const sr_t *srnode = in->get_projected_srnode(); srnode)
@@ -4896,8 +4854,6 @@ next:
        }
        goto next;
       }
-      if (frags_errors > 0)
-       goto next;
 
       results->raw_stats.passed = true;
 next:
@@ -5127,11 +5083,11 @@ void CInode::scrub_info_create() const
   CInode *me = const_cast<CInode*>(this);
   const auto& pi = me->get_projected_inode();
 
-  scrub_info_t *si = new scrub_info_t();
-  si->scrub_start_stamp = si->last_scrub_stamp = pi->last_scrub_stamp;
-  si->scrub_start_version = si->last_scrub_version = pi->last_scrub_version;
+  std::unique_ptr<scrub_info_t> si(new scrub_info_t());
+  si->last_scrub_stamp = pi->last_scrub_stamp;
+  si->last_scrub_version = pi->last_scrub_version;
 
-  me->scrub_infop = si;
+  me->scrub_infop.swap(si);
 }
 
 void CInode::scrub_maybe_delete_info()
@@ -5139,118 +5095,22 @@ void CInode::scrub_maybe_delete_info()
   if (scrub_infop &&
       !scrub_infop->scrub_in_progress &&
       !scrub_infop->last_scrub_dirty) {
-    delete scrub_infop;
-    scrub_infop = NULL;
+    scrub_infop.reset();
   }
 }
 
-void CInode::scrub_initialize(CDentry *scrub_parent,
-                             ScrubHeaderRef& header,
+void CInode::scrub_initialize(ScrubHeaderRef& header,
                              MDSContext *f)
 {
   dout(20) << __func__ << " with scrub_version " << get_version() << dendl;
-  if (scrub_is_in_progress()) {
-    dout(20) << __func__ << " inode moved during scrub, reinitializing "
-            << dendl;
-    ceph_assert(scrub_infop->scrub_parent);
-    CDentry *dn = scrub_infop->scrub_parent;
-    CDir *dir = dn->dir;
-    dn->put(CDentry::PIN_SCRUBPARENT);
-    ceph_assert(dir->scrub_infop && dir->scrub_infop->directory_scrubbing);
-    dir->scrub_infop->directories_scrubbing.erase(dn->key());
-    dir->scrub_infop->others_scrubbing.erase(dn->key());
-  }
-  scrub_info();
-  if (!scrub_infop)
-    scrub_infop = new scrub_info_t();
 
-  if (get_projected_inode()->is_dir()) {
-    // fill in dirfrag_stamps with initial state
-    frag_vec_t leaves;
-    dirfragtree.get_leaves(leaves);
-    for (const auto& leaf : leaves) {
-      if (header->get_force())
-       scrub_infop->dirfrag_stamps[leaf].reset();
-      else
-       scrub_infop->dirfrag_stamps[leaf];
-    }
-  }
-
-  if (scrub_parent)
-    scrub_parent->get(CDentry::PIN_SCRUBPARENT);
-  scrub_infop->scrub_parent = scrub_parent;
+  scrub_info();
   scrub_infop->on_finish = f;
   scrub_infop->scrub_in_progress = true;
-  scrub_infop->children_scrubbed = false;
   scrub_infop->header = header;
-
-  scrub_infop->scrub_start_version = get_version();
-  scrub_infop->scrub_start_stamp = ceph_clock_now();
   // right now we don't handle remote inodes
 }
 
-int CInode::scrub_dirfrag_next(frag_t* out_dirfrag)
-{
-  dout(20) << __func__ << dendl;
-  ceph_assert(scrub_is_in_progress());
-
-  if (!is_dir()) {
-    return -ENOTDIR;
-  }
-
-  std::map<frag_t, scrub_stamp_info_t>::iterator i =
-      scrub_infop->dirfrag_stamps.begin();
-
-  while (i != scrub_infop->dirfrag_stamps.end()) {
-    if (i->second.scrub_start_version < scrub_infop->scrub_start_version) {
-      i->second.scrub_start_version = get_projected_version();
-      i->second.scrub_start_stamp = ceph_clock_now();
-      *out_dirfrag = i->first;
-      dout(20) << " return frag " << *out_dirfrag << dendl;
-      return 0;
-    }
-    ++i;
-  }
-
-  dout(20) << " no frags left, ENOENT " << dendl;
-  return ENOENT;
-}
-
-void CInode::scrub_dirfrags_scrubbing(frag_vec_t* out_dirfrags)
-{
-  ceph_assert(out_dirfrags != NULL);
-  ceph_assert(scrub_infop != NULL);
-
-  out_dirfrags->clear();
-  std::map<frag_t, scrub_stamp_info_t>::iterator i =
-      scrub_infop->dirfrag_stamps.begin();
-
-  while (i != scrub_infop->dirfrag_stamps.end()) {
-    if (i->second.scrub_start_version >= scrub_infop->scrub_start_version) {
-      if (i->second.last_scrub_version < scrub_infop->scrub_start_version)
-        out_dirfrags->push_back(i->first);
-    } else {
-      return;
-    }
-
-    ++i;
-  }
-}
-
-void CInode::scrub_dirfrag_finished(frag_t dirfrag)
-{
-  dout(20) << __func__ << " on frag " << dirfrag << dendl;
-  ceph_assert(scrub_is_in_progress());
-
-  std::map<frag_t, scrub_stamp_info_t>::iterator i =
-      scrub_infop->dirfrag_stamps.find(dirfrag);
-  ceph_assert(i != scrub_infop->dirfrag_stamps.end());
-
-  scrub_stamp_info_t &si = i->second;
-  si.last_scrub_stamp = si.scrub_start_stamp;
-  si.last_scrub_version = si.scrub_start_version;
-}
-
 void CInode::scrub_aborted(MDSContext **c) {
   dout(20) << __func__ << dendl;
   ceph_assert(scrub_is_in_progress());
@@ -5258,43 +5118,19 @@ void CInode::scrub_aborted(MDSContext **c) {
   *c = nullptr;
   std::swap(*c, scrub_infop->on_finish);
 
-  if (scrub_infop->scrub_parent) {
-    CDentry *dn = scrub_infop->scrub_parent;
-    scrub_infop->scrub_parent = NULL;
-    dn->dir->scrub_dentry_finished(dn);
-    dn->put(CDentry::PIN_SCRUBPARENT);
-  }
-
-  delete scrub_infop;
-  scrub_infop = nullptr;
+  scrub_infop->scrub_in_progress = false;
+  scrub_maybe_delete_info();
 }
 
 void CInode::scrub_finished(MDSContext **c) {
   dout(20) << __func__ << dendl;
   ceph_assert(scrub_is_in_progress());
-  for (std::map<frag_t, scrub_stamp_info_t>::iterator i =
-      scrub_infop->dirfrag_stamps.begin();
-      i != scrub_infop->dirfrag_stamps.end();
-      ++i) {
-    if(i->second.last_scrub_version != i->second.scrub_start_version) {
-      derr << i->second.last_scrub_version << " != "
-        << i->second.scrub_start_version << dendl;
-    }
-    ceph_assert(i->second.last_scrub_version == i->second.scrub_start_version);
-  }
 
-  scrub_infop->last_scrub_version = scrub_infop->scrub_start_version;
-  scrub_infop->last_scrub_stamp = scrub_infop->scrub_start_stamp;
+  scrub_infop->last_scrub_version = get_version();
+  scrub_infop->last_scrub_stamp = ceph_clock_now();
   scrub_infop->last_scrub_dirty = true;
   scrub_infop->scrub_in_progress = false;
 
-  if (scrub_infop->scrub_parent) {
-    CDentry *dn = scrub_infop->scrub_parent;
-    scrub_infop->scrub_parent = NULL;
-    dn->dir->scrub_dentry_finished(dn);
-    dn->put(CDentry::PIN_SCRUBPARENT);
-  }
-
   *c = scrub_infop->on_finish;
   scrub_infop->on_finish = NULL;
 
index 2168284819b376875f4f9a7270cf205fb39aa505..e38286dbc6eaca18b5405ab80d97493c45e61d63 100644 (file)
@@ -294,36 +294,17 @@ class CInode : public MDSCacheObject, public InodeStoreBase, public Counter<CIno
   friend class CDir;
   friend std::ostream& operator<<(std::ostream&, const CInode&);
 
-  class scrub_stamp_info_t {
-  public:
-    scrub_stamp_info_t() {}
-    void reset() {
-      scrub_start_version = last_scrub_version = 0;
-      scrub_start_stamp = last_scrub_stamp = utime_t();
-    }
-    /// version we started our latest scrub (whether in-progress or finished)
-    version_t scrub_start_version = 0;
-    /// time we started our latest scrub (whether in-progress or finished)
-    utime_t scrub_start_stamp;
-    /// version we started our most recent finished scrub
-    version_t last_scrub_version = 0;
-    /// time we started our most recent finished scrub
-    utime_t last_scrub_stamp;
-  };
-
-  class scrub_info_t : public scrub_stamp_info_t {
+  class scrub_info_t {
   public:
     scrub_info_t() {}
 
-    CDentry *scrub_parent = nullptr;
     MDSContext *on_finish = nullptr;
 
+    version_t last_scrub_version = 0;
+    utime_t last_scrub_stamp;
+
     bool last_scrub_dirty = false; /// are our stamps dirty with respect to disk state?
     bool scrub_in_progress = false; /// are we currently scrubbing?
-    bool children_scrubbed = false;
-
-    /// my own (temporary) stamps and versions for each dirfrag we have
-    std::map<frag_t, scrub_stamp_info_t> dirfrag_stamps; // XXX not part of mempool
 
     ScrubHeaderRef header;
   };
@@ -444,15 +425,11 @@ class CInode : public MDSCacheObject, public InodeStoreBase, public Counter<CIno
   const scrub_info_t *scrub_info() const{
     if (!scrub_infop)
       scrub_info_create();
-    return scrub_infop;
+    return scrub_infop.get();
   }
 
   ScrubHeaderRef get_scrub_header() {
-    if (scrub_infop == nullptr) {
-      return nullptr;
-    } else {
-      return scrub_infop->header;
-    }
+    return scrub_infop ? scrub_infop->header : nullptr;
   }
 
   bool scrub_is_in_progress() const {
@@ -466,32 +443,7 @@ class CInode : public MDSCacheObject, public InodeStoreBase, public Counter<CIno
    * @param scrub_version What version are we scrubbing at (usually, parent
    * directory's get_projected_version())
    */
-  void scrub_initialize(CDentry *scrub_parent,
-                       ScrubHeaderRef& header,
-                       MDSContext *f);
-  /**
-   * Get the next dirfrag to scrub. Gives you a frag_t in output param which
-   * you must convert to a CDir (and possibly load off disk).
-   * @param dir A pointer to frag_t, will be filled in with the next dirfrag to
-   * scrub if there is one.
-   * @returns 0 on success, you should scrub the passed-out frag_t right now;
-   * ENOENT: There are no remaining dirfrags to scrub
-   * <0 There was some other error (It will return -ENOTDIR if not a directory)
-   */
-  int scrub_dirfrag_next(frag_t* out_dirfrag);
-  /**
-   * Get the currently scrubbing dirfrags. When returned, the
-   * passed-in list will be filled in with all frag_ts which have
-   * been returned from scrub_dirfrag_next but not sent back
-   * via scrub_dirfrag_finished.
-   */
-  void scrub_dirfrags_scrubbing(frag_vec_t *out_dirfrags);
-  /**
-   * Report to the CInode that a dirfrag it owns has been scrubbed. Call
-   * this for every frag_t returned from scrub_dirfrag_next().
-   * @param dirfrag The frag_t that was scrubbed
-   */
-  void scrub_dirfrag_finished(frag_t dirfrag);
+  void scrub_initialize(ScrubHeaderRef& header, MDSContext *f);
   /**
    * Call this once the scrub has been completed, whether it's a full
    * recursive scrub on a directory or simply the data on a file (or
@@ -503,12 +455,6 @@ class CInode : public MDSCacheObject, public InodeStoreBase, public Counter<CIno
 
   void scrub_aborted(MDSContext **c);
 
-  /**
-   * Report to the CInode that alldirfrags it owns have been scrubbed.
-   */
-  void scrub_children_finished() {
-    scrub_infop->children_scrubbed = true;
-  }
   void scrub_set_finisher(MDSContext *c) {
     ceph_assert(!scrub_infop->on_finish);
     scrub_infop->on_finish = c;
@@ -1290,12 +1236,11 @@ private:
   int num_exporting_dirs = 0;
 
   int stickydir_ref = 0;
-  scrub_info_t *scrub_infop = nullptr;
+  std::unique_ptr<scrub_info_t> scrub_infop;
   /** @} Scrubbing and fsck */
 };
 
 std::ostream& operator<<(std::ostream& out, const CInode& in);
-std::ostream& operator<<(std::ostream& out, const CInode::scrub_stamp_info_t& si);
 
 extern cinode_lock_info_t cinode_lock_info[];
 extern int num_cinode_locks;
index c7da5a1c03fefcea73d9bc6e660d0c1674a2b721..3e66f7603dd9f280ea88b5d4b396676543198a3f 100644 (file)
@@ -58,15 +58,14 @@ void ScrubStack::dequeue(MDSCacheObject *obj)
   stack_size--;
 }
 
-void ScrubStack::_enqueue(MDSCacheObject *obj, CDentry *parent,
-                         ScrubHeaderRef& header,
+void ScrubStack::_enqueue(MDSCacheObject *obj, ScrubHeaderRef& header,
                          MDSContext *on_finish, bool top)
 {
   ceph_assert(ceph_mutex_is_locked_by_me(mdcache->mds->mds_lock));
   if (CInode *in = dynamic_cast<CInode*>(obj)) {
     dout(10) << __func__ << " with {" << *in << "}"
             << ", on_finish=" << on_finish << ", top=" << top << dendl;
-    in->scrub_initialize(parent, header, on_finish);
+    in->scrub_initialize(header, on_finish);
   } else if (CDir *dir = dynamic_cast<CDir*>(obj)) {
     dout(10) << __func__ << " with {" << *dir << "}"
             << ", on_finish=" << on_finish << ", top=" << top << dendl;
@@ -99,7 +98,7 @@ void ScrubStack::enqueue(CInode *in, ScrubHeaderRef& header,
   scrub_origins.emplace(in);
   clog_scrub_summary(in);
 
-  _enqueue(in, nullptr, header, on_finish, top);
+  _enqueue(in, header, on_finish, top);
   kick_off_scrubs();
 }
 
@@ -131,10 +130,8 @@ void ScrubStack::kick_off_scrubs()
 
   dout(20) << __func__ << " entering with " << scrubs_in_progress << " in "
               "progress and " << stack_size << " in the stack" << dendl;
-  bool can_continue = true;
   elist<MDSCacheObject*>::iterator it = scrub_stack.begin();
-  while (g_conf()->mds_max_scrub_ops_in_progress > scrubs_in_progress &&
-         can_continue) {
+  while (g_conf()->mds_max_scrub_ops_in_progress > scrubs_in_progress) {
     if (it.end()) {
       if (scrubs_in_progress == 0) {
         set_state(STATE_IDLE);
@@ -147,9 +144,8 @@ void ScrubStack::kick_off_scrubs()
     set_state(STATE_RUNNING);
 
     if (CInode *in = dynamic_cast<CInode*>(*it)) {
-      ++it; // we have our reference, push iterator forward
       dout(20) << __func__ << " examining " << *in << dendl;
-
+      ++it;
       if (!in->is_dir()) {
        // it's a regular file, symlink, or hard link
        dequeue(in); // we only touch it this once, so remove from stack
@@ -159,37 +155,26 @@ void ScrubStack::kick_off_scrubs()
          in->scrub_set_finisher(&scrub_kick);
        }
        scrub_file_inode(in);
-       can_continue = true;
       } else {
-       bool done; // it's done, so pop it off the stack
-       bool added_children; // it added new dentries to the top of the stack
+       bool added_children = false;
+       bool done = false; // it's done, so pop it off the stack
        scrub_dir_inode(in, &added_children, &done);
        if (done) {
          dout(20) << __func__ << " dir inode, done" << dendl;
          dequeue(in);
-       } else if (added_children) {
-         dout(20) << __func__ << " dir inode, added_children" << dendl;
-         // we added new stuff to top of stack, so reset ourselves there
+       }
+       if (added_children) {
+         // dirfrags were queued at top of stack
          it = scrub_stack.begin();
-       } else {
-         dout(20) << __func__ << " dir inode, no progress" << dendl;
-         can_continue = false;
        }
       }
     } else if (CDir *dir = dynamic_cast<CDir*>(*it)) {
-      bool done; // it's done, so pop it off the stack
-      bool added_children; // it added new dentries to the top of the stack
-      scrub_dirfrag(dir, &added_children, &done);
+      bool done = false; // it's done, so pop it off the stack
+      scrub_dirfrag(dir, &done);
+      ++it;
       if (done) {
        dout(20) << __func__ << " dirfrag, done" << dendl;
        dequeue(dir);
-      } else if (added_children) {
-       dout(20) << __func__ << " dirfrag, added_children" << dendl;
-       // we added new stuff to top of stack, so reset ourselves there
-       it = scrub_stack.begin();
-      } else {
-       dout(20) << __func__ << " dirfrag, no progress" << dendl;
-       can_continue = false;
       }
     } else {
       ceph_assert(0 == "dentry in scrub stack");
@@ -197,118 +182,47 @@ void ScrubStack::kick_off_scrubs()
   }
 }
 
-void ScrubStack::scrub_dir_inode(CInode *in,
-                                 bool *added_children,
-                                 bool *done)
+void ScrubStack::scrub_dir_inode(CInode *in, bool *added_children, bool *done)
 {
   dout(10) << __func__ << " " << *in << dendl;
 
-  *added_children = false;
-  bool all_frags_done = true;
-
   ScrubHeaderRef header = in->get_scrub_header();
-  ceph_assert(header != nullptr);
+  ceph_assert(header);
 
-  if (header->get_recursive()) {
-    frag_vec_t scrubbing_frags;
-    std::queue<CDir*> scrubbing_cdirs;
-    in->scrub_dirfrags_scrubbing(&scrubbing_frags);
-    dout(20) << __func__ << " iterating over " << scrubbing_frags.size()
-      << " scrubbing frags" << dendl;
-    for (const auto& fg : scrubbing_frags) {
-      // turn frags into CDir *
-      CDir *dir = in->get_dirfrag(fg);
-      if (dir) {
-       scrubbing_cdirs.push(dir);
-       dout(25) << __func__ << " got CDir " << *dir << " presently scrubbing" << dendl;
-      } else {
-       in->scrub_dirfrag_finished(fg);
-       dout(25) << __func__ << " missing dirfrag " << fg << " skip scrubbing" << dendl;
-      }
-    }
+  MDSGatherBuilder gather(g_ceph_context);
 
-    dout(20) << __func__ << " consuming from " << scrubbing_cdirs.size()
-            << " scrubbing cdirs" << dendl;
+  frag_vec_t frags;
+  in->dirfragtree.get_leaves(frags);
+  dout(20) << __func__ << "recursive mode, frags " << frags << dendl;
 
-    while (g_conf()->mds_max_scrub_ops_in_progress > scrubs_in_progress) {
-      // select next CDir
-      CDir *cur_dir = NULL;
-      if (!scrubbing_cdirs.empty()) {
-       cur_dir = scrubbing_cdirs.front();
-        scrubbing_cdirs.pop();
-       dout(20) << __func__ << " got cur_dir = " << *cur_dir << dendl;
-      } else {
-       bool ready = get_next_cdir(in, &cur_dir);
-       dout(20) << __func__ << " get_next_cdir ready=" << ready << dendl;
-
-       if (ready && cur_dir) {
-         cur_dir->scrub_initialize(header, nullptr);
-         scrubbing_cdirs.push(cur_dir);
-       } else if (!ready) {
-         // We are waiting for load of a frag
-         all_frags_done = false;
-         break;
-       } else {
-         // Finished with all frags
-         break;
-       }
-      }
-      // scrub that CDir
-      bool frag_added_children = false;
-      bool frag_done = false;
-      scrub_dirfrag(cur_dir,
-                   &frag_added_children, &frag_done);
-      if (frag_done) {
-       cur_dir->inode->scrub_dirfrag_finished(cur_dir->frag);
-      }
-      *added_children |= frag_added_children;
-      all_frags_done = all_frags_done && frag_done;
-    }
-
-    dout(20) << "finished looping, all_frags_done=" << all_frags_done << dendl;
-  } else {
-    dout(20) << "!scrub_recursive" << dendl;
+  for (auto &fg : frags) {
+    CDir *dir = in->get_or_open_dirfrag(mdcache, fg);
+    if (dir->get_version() == 0)
+      dir->fetch(gather.new_sub());
   }
-
-  if (all_frags_done) {
-    assert (!*added_children); // can't do this if children are still pending
-
-    // OK, so now I can... fire off a validate on the dir inode, and
-    // when it completes, come through here again, noticing that we've
-    // set a flag to indicate the validate happened, and 
-    scrub_dir_inode_final(in);
+  if (gather.has_subs()) {
+    scrubs_in_progress++;
+    gather.set_finisher(&scrub_kick);
+    gather.activate();
+    dout(10) << __func__ << " barebones dirfrags, fetching" << dendl;
+    return;
   }
 
-  *done = all_frags_done;
-  dout(10) << __func__ << " is exiting " << *done << dendl;
-  return;
-}
-
-bool ScrubStack::get_next_cdir(CInode *in, CDir **new_dir)
-{
-  dout(20) << __func__ << " on " << *in << dendl;
-  frag_t next_frag;
-  int r = in->scrub_dirfrag_next(&next_frag);
-  assert (r >= 0);
-
-  if (r == 0) {
-    // we got a frag to scrub, otherwise it would be ENOENT
-    dout(25) << "looking up new frag " << next_frag << dendl;
-    CDir *next_dir = in->get_or_open_dirfrag(mdcache, next_frag);
-    if (!next_dir->is_complete()) {
-      scrubs_in_progress++;
-      next_dir->fetch(&scrub_kick);
-      dout(25) << "fetching frag from RADOS" << dendl;
-      return false;
+  std::vector<CDir*> dfs;
+  in->get_dirfrags(dfs);
+  for (auto &dir : dfs) {
+    if (dir->is_auth()){
+      _enqueue(dir, header, nullptr, true);
+      *added_children = true;
+    } else {
+      // FIXME: ask auth mds to scrub
     }
-    *new_dir = next_dir;
-    dout(25) << "returning dir " << *new_dir << dendl;
-    return true;
   }
-  ceph_assert(r == ENOENT);
-  // there are no dirfrags left
-  *new_dir = NULL;
-  return true;
+
+  scrub_dir_inode_final(in);
+
+  *done = true;
+  dout(10) << __func__ << " done" << dendl;
 }
 
 class C_InodeValidated : public MDSInternalContext
@@ -333,90 +247,62 @@ void ScrubStack::scrub_dir_inode_final(CInode *in)
 {
   dout(20) << __func__ << " " << *in << dendl;
 
-  // Two passes through this function.  First one triggers inode validation,
-  // second one sets finally_done
-  // FIXME: kind of overloading scrub_in_progress here, using it while
-  // dentry is still on stack to indicate that we have finished
-  // doing our validate_disk_state on the inode
-  // FIXME: the magic-constructing scrub_info() is going to leave
-  // an unneeded scrub_infop lying around here
-  if (!in->scrub_info()->children_scrubbed) {
-    if (!in->scrub_info()->on_finish) {
-      scrubs_in_progress++;
-      in->scrub_set_finisher(&scrub_kick);
-    }
-
-    in->scrub_children_finished();
-    C_InodeValidated *fin = new C_InodeValidated(mdcache->mds, this, in);
-    in->validate_disk_state(&fin->result, fin);
+  if (!in->scrub_info()->on_finish) {
+    scrubs_in_progress++;
+    in->scrub_set_finisher(&scrub_kick);
   }
 
+  C_InodeValidated *fin = new C_InodeValidated(mdcache->mds, this, in);
+  in->validate_disk_state(&fin->result, fin);
+
   return;
 }
 
-void ScrubStack::scrub_dirfrag(CDir *dir, bool *added_children, bool *done)
+void ScrubStack::scrub_dirfrag(CDir *dir, bool *done)
 {
   ceph_assert(dir != NULL);
 
-  dout(20) << __func__ << " on " << *dir << dendl;
-  *added_children = false;
-  *done = false;
-
-  ScrubHeaderRef header = dir->get_scrub_header();
-
-  if (!dir->scrub_info()->directory_scrubbing) {
-    // Get the frag complete before calling
-    // scrub initialize, so that it can populate its lists
-    // of dentries.
-    if (!dir->is_complete()) {
-      scrubs_in_progress++;
-      dir->fetch(&scrub_kick);
-      return;
-    }
-
-    dir->scrub_initialize_data();
-  }
+  dout(10) << __func__ << " " << *dir << dendl;
 
-  int r = 0;
-  while(r == 0) {
-    CDentry *dn = NULL;
+  if (!dir->is_complete()) {
     scrubs_in_progress++;
-    r = dir->scrub_dentry_next(&scrub_kick, &dn);
-    if (r != EAGAIN) {
-      scrubs_in_progress--;
-    }
-
-    if (r == EAGAIN) {
-      // Drop out, CDir fetcher will call back our kicker context
-      dout(20) << __func__ << " waiting for fetch on " << *dir << dendl;
-      return;
-    }
+    dir->fetch(&scrub_kick, true); // already auth pinned
+    dout(10) << __func__ << " incomplete, fetching" << dendl;
+    return;
+  }
 
-    if (r == ENOENT) {
-      // Nothing left to scrub, are we done?
-      auto&& scrubbing = dir->scrub_dentries_scrubbing();
-      if (scrubbing.empty()) {
-       dout(20) << __func__ << " dirfrag done: " << *dir << dendl;
-       MDSContext *c = nullptr;
-       dir->scrub_finished(&c);
-       if (c)
-         finisher->queue(new MDSIOContextWrapper(mdcache->mds, c), 0);
-       *done = true;
-      } else {
-        dout(20) << __func__ << " " << scrubbing.size() << " dentries still "
-                   "scrubbing in " << *dir << dendl;
+  ScrubHeaderRef header = dir->get_scrub_header();
+  version_t last_scrub = dir->scrub_info()->last_recursive.version;
+  if (header->get_recursive()) {
+    for (auto it = dir->begin(); it != dir->end(); ++it) {
+      if (it->first.snapid != CEPH_NOSNAP)
+       continue;
+      CDentry *dn = it->second;
+      CDentry::linkage_t *dnl = dn->get_linkage();
+      if (dn->get_version() <= last_scrub &&
+         dnl->get_remote_d_type() != DT_DIR &&
+         !header->get_force()) {
+       dout(15) << __func__ << " skip dentry " << it->first
+                << ", no change since last scrub" << dendl;
+       continue;
+      }
+      if (dnl->is_primary()) {
+       _enqueue(dnl->get_inode(), header, nullptr, false);
+      } else if (dnl->is_remote()) {
+       // TODO: check remote linkage
       }
-      return;
     }
+  }
 
-    // scrub_dentry_next defined to only give EAGAIN, ENOENT, 0 -- we should
-    // never get random IO errors here.
-    ceph_assert(r == 0);
+  dir->scrub_local();
 
-    _enqueue(dn->get_projected_inode(), dn, header, nullptr, true);
+  MDSContext *c = nullptr;
+  dir->scrub_finished(&c);
+  if (c)
+    finisher->queue(new MDSIOContextWrapper(mdcache->mds, c), 0);
 
-    *added_children = true;
-  }
+  *done = true;
+  dout(10) << __func__ << " done" << dendl;
 }
 
 void ScrubStack::scrub_file_inode(CInode *in)
index 768d466b8461c673379ef43eac07eeed0405736d..02c17299be1984a0368b804118be9268ed239b67 100644 (file)
@@ -141,7 +141,7 @@ private:
 
   friend class C_InodeValidated;
 
-  void _enqueue(MDSCacheObject *obj, CDentry *parent, ScrubHeaderRef& header,
+  void _enqueue(MDSCacheObject *obj, ScrubHeaderRef& header,
                MDSContext *on_finish, bool top);
   /**
    * Remove the inode/dirfrag from the stack.
@@ -169,59 +169,28 @@ private:
                            const CInode::validated_data &result);
 
   /**
-   * Make progress on scrubbing a directory-representing dirfrag and
-   * its children..
+   * Scrub a directory inode. It queues child dirfrags, then does
+   * final scrub of the inode.
    *
-   * 1) Select the next dirfrag which hasn't been scrubbed, and make progress
-   * on it if possible.
-   *
-   * 2) If not, move on to the next dirfrag and start it up, if any.
-   *
-   * 3) If waiting for results from dirfrag scrubs, do nothing.
-   *
-   * 4) If all dirfrags have been scrubbed, scrub my inode.
-   *
-   * @param in The CInode to scrub as a directory
+   * @param in The directory indoe to scrub
    * @param added_children set to true if we pushed some of our children
-   * onto the ScrubStack
-   * remaining to start scrubbing.
-   * @param done set to true if we and all our children have finished scrubbing
+   * @param done set to true if we started to do final scrub
    */
   void scrub_dir_inode(CInode *in, bool *added_children, bool *done);
   /**
-   * Make progress on scrubbing a dirfrag. It may return after each of the
-   * following steps, but will report making progress on each one.
-   *
-   * 1) enqueues the next unscrubbed child directory dentry at the
-   * top of the stack.
-   *
-   * 2) Initiates a scrub on the next unscrubbed file dentry
-   *
-   * If there are scrubs currently in progress on child dentries, no more child
-   * dentries to scrub, and this function is invoked, it will report no
-   * progress. Try again later.
+   * Scrub a dirfrag. It queues child dentries, then does final
+   * scrub of the dirfrag.
    *
+   * @param dir The dirfrag to scrub
+   * @param done set to true if we started to do final scrub
    */
-  void scrub_dirfrag(CDir *dir, bool *added_children, bool *done);
+  void scrub_dirfrag(CDir *dir, bool *done);
   /**
    * Scrub a directory-representing dentry.
    *
    * @param in The directory inode we're doing final scrub on.
    */
   void scrub_dir_inode_final(CInode *in);
-
-  /**
-   * Get a CDir into memory, and return it if it's already complete.
-   * Otherwise, fetch it and kick off scrubbing when done.
-   *
-   * @param in The Inode to get the next directory from
-   * @param new_dir The CDir we're returning to you. NULL if
-   * not ready yet or there aren't any.
-   * @returns false if you have to wait, true if there's no work
-   * left to do (we returned it, or there are none left in this inode).
-   */
-  bool get_next_cdir(CInode *in, CDir **new_dir);
-
   /**
    * Set scrub state
    * @param next_state State to move the scrub to.