From b7101f1bc7cddb780eca9ba5af192b5dd21ea631 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 4 Aug 2020 10:48:59 +0800 Subject: [PATCH] mds: remove snap past parents compat code snap format change was introduced in Mimic release. It's long enough to remove compat code. Signed-off-by: "Yan, Zheng" --- PendingReleaseNotes | 8 ++ src/include/ceph_fs.h | 1 - src/mds/CDir.cc | 11 +- src/mds/CInode.cc | 78 +---------- src/mds/MDCache.cc | 277 ++++++++------------------------------ src/mds/MDCache.h | 3 - src/mds/MDSRank.cc | 6 - src/mds/MDSRank.h | 2 - src/mds/Server.cc | 11 +- src/mds/SnapRealm.cc | 289 +--------------------------------------- src/mds/SnapRealm.h | 24 +--- src/mds/SnapServer.h | 14 +- src/mds/StrayManager.cc | 19 +-- 13 files changed, 85 insertions(+), 658 deletions(-) diff --git a/PendingReleaseNotes b/PendingReleaseNotes index f9adfc19ceb1c..aea25856da77f 100644 --- a/PendingReleaseNotes +++ b/PendingReleaseNotes @@ -149,6 +149,14 @@ - ``mds session blacklist on timeout`` is now ``mds session blocklist on timeout`` - ``mds session blacklist on evict`` is now ``mds session blocklist on evict`` +* CephFS: Compatibility code for old on-disk format of snapshot has been removed. + Current on-disk format of snapshot was introduced by Mimic release. If there + are any snapshots created by Ceph release older than Mimic. Before upgrading, + either delete them all or scrub the whole filesystem: + + ceph daemon scrub_path / force recursive repair + ceph daemon scrub_path '~mdsdir' force recursive repair + * The following librados API calls have changed: - ``rados_blacklist_add`` is now ``rados_blocklist_add``; the former will issue a deprecation warning and be removed in a future release. diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h index d8ae8271e2ff5..fe98eef892428 100644 --- a/src/include/ceph_fs.h +++ b/src/include/ceph_fs.h @@ -415,7 +415,6 @@ enum { CEPH_MDS_OP_ENQUEUE_SCRUB = 0x01503, CEPH_MDS_OP_REPAIR_FRAGSTATS = 0x01504, CEPH_MDS_OP_REPAIR_INODESTATS = 0x01505, - CEPH_MDS_OP_UPGRADE_SNAPREALM = 0x01506 }; extern const char *ceph_mds_op_name(int op); diff --git a/src/mds/CDir.cc b/src/mds/CDir.cc index 78fcdb5b6038d..8c8c5b4a0979f 100755 --- a/src/mds/CDir.cc +++ b/src/mds/CDir.cc @@ -1982,13 +1982,10 @@ void CDir::_omap_fetched(bufferlist& hdrbl, map& omap, list undef_inodes; // purge stale snaps? - // only if we have past_parents open! bool force_dirty = false; const set *snaps = NULL; SnapRealm *realm = inode->find_snaprealm(); - if (!realm->have_past_parents_open()) { - dout(10) << " no snap purge, one or more past parents NOT open" << dendl; - } else if (fnode->snap_purged_thru < realm->get_last_destroyed()) { + if (fnode->snap_purged_thru < realm->get_last_destroyed()) { snaps = &realm->get_snaps(); dout(10) << " snap_purged_thru " << fnode->snap_purged_thru << " < " << realm->get_last_destroyed() @@ -2181,9 +2178,7 @@ void CDir::_omap_commit(int op_prio) // snap purge? const set *snaps = NULL; SnapRealm *realm = inode->find_snaprealm(); - if (!realm->have_past_parents_open()) { - dout(10) << " no snap purge, one or more past parents NOT open" << dendl; - } else if (fnode->snap_purged_thru < realm->get_last_destroyed()) { + if (fnode->snap_purged_thru < realm->get_last_destroyed()) { snaps = &realm->get_snaps(); dout(10) << " snap_purged_thru " << fnode->snap_purged_thru << " < " << realm->get_last_destroyed() @@ -2338,7 +2333,7 @@ void CDir::_encode_dentry(CDentry *dn, bufferlist& bl, if (!in->snaprealm) { if (snaps) in->purge_stale_snap_data(*snaps); - } else if (in->snaprealm->have_past_parents_open()) { + } else { in->purge_stale_snap_data(in->snaprealm->get_snaps()); } } diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index f150005d87a9c..509662f9243db 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -499,21 +499,6 @@ sr_t *CInode::prepare_new_srnode(snapid_t snapid) if (cur_srnode) { new_srnode = new sr_t(*cur_srnode); - if (!new_srnode->past_parents.empty()) { - // convert past_parents to past_parent_snaps - ceph_assert(snaprealm); - auto& snaps = snaprealm->get_snaps(); - for (auto p : snaps) { - if (p >= new_srnode->current_parent_since) - break; - if (!new_srnode->snaps.count(p)) - new_srnode->past_parent_snaps.insert(p); - } - new_srnode->seq = snaprealm->get_newest_seq(); - new_srnode->past_parents.clear(); - } - if (snaprealm) - snaprealm->past_parents_dirty = false; } else { if (snapid == 0) snapid = mdcache->get_global_snaprealm()->get_newest_seq(); @@ -642,33 +627,17 @@ void CInode::pop_projected_snaprealm(sr_t *next_snaprealm, bool early) if (next_snaprealm) { dout(10) << __func__ << (early ? " (early) " : " ") << next_snaprealm << " seq " << next_snaprealm->seq << dendl; - bool invalidate_cached_snaps = false; - if (!snaprealm) { + if (!snaprealm) open_snaprealm(); - } else if (!snaprealm->srnode.past_parents.empty()) { - invalidate_cached_snaps = true; - // re-open past parents - snaprealm->close_parents(); - dout(10) << " realm " << *snaprealm << " past_parents " << snaprealm->srnode.past_parents - << " -> " << next_snaprealm->past_parents << dendl; - } auto old_flags = snaprealm->srnode.flags; snaprealm->srnode = *next_snaprealm; delete next_snaprealm; if ((snaprealm->srnode.flags ^ old_flags) & sr_t::PARENT_GLOBAL) { - snaprealm->close_parents(); snaprealm->adjust_parent(); } - // we should be able to open these up (or have them already be open). - bool ok = snaprealm->_open_parents(NULL); - ceph_assert(ok); - - if (invalidate_cached_snaps) - snaprealm->invalidate_cached_snaps(); - if (snaprealm->parent) dout(10) << " realm " << *snaprealm << " parent " << *snaprealm->parent << dendl; } else { @@ -3118,7 +3087,6 @@ void CInode::close_snaprealm(bool nojoin) { if (snaprealm) { dout(15) << __func__ << " " << *snaprealm << dendl; - snaprealm->close_parents(); if (snaprealm->parent) { snaprealm->parent->open_children.erase(snaprealm); //if (!nojoin) @@ -3157,12 +3125,8 @@ void CInode::decode_snap_blob(const bufferlist& snapbl) auto old_flags = snaprealm->srnode.flags; auto p = snapbl.cbegin(); decode(snaprealm->srnode, p); - if (is_base()) { - bool ok = snaprealm->_open_parents(NULL); - ceph_assert(ok); - } else { + if (!is_base()) { if ((snaprealm->srnode.flags ^ old_flags) & sr_t::PARENT_GLOBAL) { - snaprealm->close_parents(); snaprealm->adjust_parent(); } } @@ -4606,7 +4570,6 @@ void CInode::validate_disk_state(CInode::validated_data *results, set_callback(BACKTRACE, static_cast(&ValidationContinuation::_backtrace)); set_callback(INODE, static_cast(&ValidationContinuation::_inode_disk)); set_callback(DIRFRAGS, static_cast(&ValidationContinuation::_dirfrags)); - set_callback(SNAPREALM, static_cast(&ValidationContinuation::_snaprealm)); } ~ValidationContinuation() override { @@ -4655,10 +4618,6 @@ void CInode::validate_disk_state(CInode::validated_data *results, return true; } - // prefetch snaprealm's past parents - if (in->snaprealm && !in->snaprealm->have_past_parents_open()) - in->snaprealm->open_parents(nullptr); - C_OnFinisher *conf = new C_OnFinisher(get_io_callback(BACKTRACE), in->mdcache->mds->finisher); @@ -4779,7 +4738,7 @@ next: return validate_directory_data(); } else { // TODO: validate on-disk inode for normal files - return check_inode_snaprealm(); + return true; } } @@ -4911,37 +4870,6 @@ next: results->raw_stats.passed = true; next: - // snaprealm - return check_inode_snaprealm(); - } - - bool check_inode_snaprealm() { - if (!in->snaprealm) - return true; - - if (!in->snaprealm->have_past_parents_open()) { - in->snaprealm->open_parents(get_internal_callback(SNAPREALM)); - return false; - } else { - return immediate(SNAPREALM, 0); - } - } - - bool _snaprealm(int rval) { - - if (in->snaprealm->past_parents_dirty || - !in->get_projected_srnode()->past_parents.empty()) { - // temporarily store error in field of on-disk inode validation temporarily - results->inode.checked = true; - results->inode.passed = false; - if (in->scrub_infop->header->get_repair()) { - results->inode.error_str << "Inode has old format snaprealm (will upgrade)"; - results->inode.repaired = true; - in->mdcache->upgrade_inode_snaprealm(in); - } else { - results->inode.error_str << "Inode has old format snaprealm"; - } - } return true; } diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 860c39f4a5cd6..6d57672f03b51 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -5704,9 +5704,7 @@ void MDCache::prepare_realm_merge(SnapRealm *realm, SnapRealm *parent_realm, vector split_inos; vector split_realms; - for (elist::iterator p = realm->inodes_with_caps.begin(member_offset(CInode, item_caps)); - !p.end(); - ++p) + for (auto p = realm->inodes_with_caps.begin(); !p.end(); ++p) split_inos.push_back((*p)->ino()); for (set::iterator p = realm->open_children.begin(); p != realm->open_children.end(); @@ -5907,24 +5905,20 @@ void MDCache::do_cap_import(Session *session, CInode *in, Capability *cap, int peer, int p_flags) { SnapRealm *realm = in->find_snaprealm(); - if (realm->have_past_parents_open()) { - dout(10) << "do_cap_import " << session->info.inst.name << " mseq " << cap->get_mseq() << " on " << *in << dendl; - if (cap->get_last_seq() == 0) // reconnected cap - cap->inc_last_seq(); - cap->set_last_issue(); - cap->set_last_issue_stamp(ceph_clock_now()); - cap->clear_new(); - auto reap = make_message( - CEPH_CAP_OP_IMPORT, in->ino(), realm->inode->ino(), cap->get_cap_id(), - cap->get_last_seq(), cap->pending(), cap->wanted(), 0, cap->get_mseq(), - mds->get_osd_epoch_barrier()); - in->encode_cap_message(reap, cap); - reap->snapbl = realm->get_snap_trace(); - reap->set_cap_peer(p_cap_id, p_seq, p_mseq, peer, p_flags); - mds->send_message_client_counted(reap, session); - } else { - ceph_abort(); - } + dout(10) << "do_cap_import " << session->info.inst.name << " mseq " << cap->get_mseq() << " on " << *in << dendl; + if (cap->get_last_seq() == 0) // reconnected cap + cap->inc_last_seq(); + cap->set_last_issue(); + cap->set_last_issue_stamp(ceph_clock_now()); + cap->clear_new(); + auto reap = make_message(CEPH_CAP_OP_IMPORT, + in->ino(), realm->inode->ino(), cap->get_cap_id(), + cap->get_last_seq(), cap->pending(), cap->wanted(), + 0, cap->get_mseq(), mds->get_osd_epoch_barrier()); + in->encode_cap_message(reap, cap); + reap->snapbl = realm->get_snap_trace(); + reap->set_cap_peer(p_cap_id, p_seq, p_mseq, peer, p_flags); + mds->send_message_client_counted(reap, session); } void MDCache::do_delayed_cap_imports() @@ -5945,77 +5939,50 @@ void MDCache::open_snaprealms() { dout(10) << "open_snaprealms" << dendl; - MDSGatherBuilder gather(g_ceph_context); - auto it = rejoin_pending_snaprealms.begin(); while (it != rejoin_pending_snaprealms.end()) { CInode *in = *it; SnapRealm *realm = in->snaprealm; ceph_assert(realm); - if (realm->have_past_parents_open() || - realm->open_parents(gather.new_sub())) { - dout(10) << " past parents now open on " << *in << dendl; - - map> splits; - // finish off client snaprealm reconnects? - map >::iterator q = reconnected_snaprealms.find(in->ino()); - if (q != reconnected_snaprealms.end()) { - for (const auto& r : q->second) - finish_snaprealm_reconnect(r.first, realm, r.second, splits); - reconnected_snaprealms.erase(q); - } - - for (elist::iterator p = realm->inodes_with_caps.begin(member_offset(CInode, item_caps)); - !p.end(); ++p) { - CInode *child = *p; - auto q = reconnected_caps.find(child->ino()); - ceph_assert(q != reconnected_caps.end()); - for (auto r = q->second.begin(); r != q->second.end(); ++r) { - Capability *cap = child->get_client_cap(r->first); - if (!cap) - continue; - if (r->second.snap_follows > 0) { - if (r->second.snap_follows < child->first - 1) { - rebuild_need_snapflush(child, realm, r->first, r->second.snap_follows); - } else if (r->second.snapflush) { - // When processing a cap flush message that is re-sent, it's possble - // that the sender has already released all WR caps. So we should - // force MDCache::cow_inode() to setup CInode::client_need_snapflush. - cap->mark_needsnapflush(); - } - } - // make sure client's cap is in the correct snaprealm. - if (r->second.realm_ino != in->ino()) { - prepare_realm_split(realm, r->first, child->ino(), splits); + + map> splits; + // finish off client snaprealm reconnects? + auto q = reconnected_snaprealms.find(in->ino()); + if (q != reconnected_snaprealms.end()) { + for (const auto& r : q->second) + finish_snaprealm_reconnect(r.first, realm, r.second, splits); + reconnected_snaprealms.erase(q); + } + + for (auto p = realm->inodes_with_caps.begin(); !p.end(); ++p) { + CInode *child = *p; + auto q = reconnected_caps.find(child->ino()); + ceph_assert(q != reconnected_caps.end()); + for (auto r = q->second.begin(); r != q->second.end(); ++r) { + Capability *cap = child->get_client_cap(r->first); + if (!cap) + continue; + if (r->second.snap_follows > 0) { + if (r->second.snap_follows < child->first - 1) { + rebuild_need_snapflush(child, realm, r->first, r->second.snap_follows); + } else if (r->second.snapflush) { + // When processing a cap flush message that is re-sent, it's possble + // that the sender has already released all WR caps. So we should + // force MDCache::cow_inode() to setup CInode::client_need_snapflush. + cap->mark_needsnapflush(); } } + // make sure client's cap is in the correct snaprealm. + if (r->second.realm_ino != in->ino()) { + prepare_realm_split(realm, r->first, child->ino(), splits); + } } - - rejoin_pending_snaprealms.erase(it++); - in->put(CInode::PIN_OPENINGSNAPPARENTS); - - send_snaps(splits); - } else { - dout(10) << " opening past parents on " << *in << dendl; - ++it; } - } - if (gather.has_subs()) { - if (gather.num_subs_remaining() == 0) { - // cleanup gather - gather.set_finisher(new C_MDSInternalNoop); - gather.activate(); - } else { - // for multimds, must succeed the first time - ceph_assert(recovery_set.empty()); + rejoin_pending_snaprealms.erase(it++); + in->put(CInode::PIN_OPENINGSNAPPARENTS); - dout(10) << "open_snaprealms - waiting for " - << gather.num_subs_remaining() << dendl; - gather.set_finisher(new C_MDC_OpenSnapRealms(this)); - gather.activate(); - return; - } + send_snaps(splits); } notify_global_snaprealm_update(CEPH_SNAP_OP_UPDATE); @@ -8318,12 +8285,6 @@ int MDCache::path_traverse(MDRequestRef& mdr, MDSContextFactory& cf, if (cur->state_test(CInode::STATE_PURGING)) return -ESTALE; - // make sure snaprealm are open... - if (mdr && cur->snaprealm && !cur->snaprealm->have_past_parents_open() && - !cur->snaprealm->open_parents(cf.build())) { - return 1; - } - if (flags & MDS_TRAVERSE_CHECK_LOCKCACHE) mds->locker->find_and_attach_lock_cache(mdr, cur); @@ -8525,11 +8486,6 @@ int MDCache::path_traverse(MDRequestRef& mdr, MDSContextFactory& cf, } cur = in; - // make sure snaprealm are open... - if (mdr && cur->snaprealm && !cur->snaprealm->have_past_parents_open() && - !cur->snaprealm->open_parents(cf.build())) { - return 1; - } if (rdlock_snap && !(want_dentry && depth == path.depth() - 1)) { lov.clear(); @@ -9726,9 +9682,6 @@ void MDCache::dispatch_request(MDRequestRef& mdr) case CEPH_MDS_OP_REPAIR_INODESTATS: repair_inode_stats_work(mdr); break; - case CEPH_MDS_OP_UPGRADE_SNAPREALM: - upgrade_inode_snaprealm_work(mdr); - break; default: ceph_abort(); } @@ -9891,21 +9844,16 @@ void MDCache::do_realm_invalidate_and_update_notify(CInode *in, int snapop, bool vector split_realms; if (notify_clients) { - ceph_assert(in->snaprealm->have_past_parents_open()); if (snapop == CEPH_SNAP_OP_SPLIT) { // notify clients of update|split - for (elist::iterator p = in->snaprealm->inodes_with_caps.begin(member_offset(CInode, item_caps)); - !p.end(); ++p) + for (auto p = in->snaprealm->inodes_with_caps.begin(); !p.end(); ++p) split_inos.push_back((*p)->ino()); - for (set::iterator p = in->snaprealm->open_children.begin(); - p != in->snaprealm->open_children.end(); - ++p) - split_realms.push_back((*p)->inode->ino()); + for (auto& r : in->snaprealm->open_children) + split_realms.push_back(r->inode->ino()); } } - set past_children; map> updates; list q; q.push_back(in->snaprealm); @@ -9934,60 +9882,14 @@ void MDCache::do_realm_invalidate_and_update_notify(CInode *in, int snapop, bool } } - if (snapop == CEPH_SNAP_OP_UPDATE || snapop == CEPH_SNAP_OP_DESTROY) { - for (set::iterator p = realm->open_past_children.begin(); - p != realm->open_past_children.end(); - ++p) - past_children.insert(*p); - } - // notify for active children, too. dout(10) << " " << realm << " open_children are " << realm->open_children << dendl; - for (set::iterator p = realm->open_children.begin(); - p != realm->open_children.end(); - ++p) - q.push_back(*p); + for (auto& r : realm->open_children) + q.push_back(r); } if (notify_clients) send_snaps(updates); - - // notify past children and their descendants if we update/delete old snapshots - for (set::iterator p = past_children.begin(); - p != past_children.end(); - ++p) - q.push_back(*p); - - while (!q.empty()) { - SnapRealm *realm = q.front(); - q.pop_front(); - - realm->invalidate_cached_snaps(); - - for (set::iterator p = realm->open_children.begin(); - p != realm->open_children.end(); - ++p) { - if (past_children.count(*p) == 0) - q.push_back(*p); - } - - for (set::iterator p = realm->open_past_children.begin(); - p != realm->open_past_children.end(); - ++p) { - if (past_children.count(*p) == 0) { - q.push_back(*p); - past_children.insert(*p); - } - } - } - - if (snapop == CEPH_SNAP_OP_DESTROY) { - // eval stray inodes if we delete snapshot from their past ancestor snaprealm - for (set::iterator p = past_children.begin(); - p != past_children.end(); - ++p) - maybe_eval_stray((*p)->inode, true); - } } void MDCache::send_snap_update(CInode *in, version_t stid, int snap_op) @@ -11251,7 +11153,6 @@ void MDCache::handle_dentry_unlink(const cref_t &m) bool hadrealm = (in->snaprealm ? true : false); in->decode_snap_blob(m->snapbl); ceph_assert(in->snaprealm); - ceph_assert(in->snaprealm->have_past_parents_open()); if (!hadrealm) do_realm_invalidate_and_update_notify(in, CEPH_SNAP_OP_SPLIT, false); } @@ -12929,18 +12830,9 @@ void MDCache::enqueue_scrub_work(MDRequestRef& mdr) header->set_origin(in); - Context *fin; - if (header->get_recursive()) { - header->get_origin()->get(CInode::PIN_SCRUBQUEUE); - fin = new MDSInternalContextWrapper(mds, - new LambdaContext([this, header](int r) { - recursive_scrub_finish(header); - header->get_origin()->put(CInode::PIN_SCRUBQUEUE); - }) - ); - } else { + Context *fin = nullptr; + if (!header->get_recursive()) fin = cs->take_finisher(); - } // If the scrub did some repair, then flush the journal at the end of // the scrub. Otherwise in the case of e.g. rewriting a backtrace @@ -12985,22 +12877,6 @@ void MDCache::enqueue_scrub_work(MDRequestRef& mdr) return; } -void MDCache::recursive_scrub_finish(const ScrubHeaderRef& header) -{ - if (header->get_origin()->is_base() && - header->get_force() && header->get_repair()) { - // notify snapserver that base directory is recursively scrubbed. - // After both root and mdsdir are recursively scrubbed, snapserver - // knows that all old format snaprealms are converted to the new - // format. - if (mds->mdsmap->get_num_in_mds() == 1 && - mds->mdsmap->get_num_failed_mds() == 0 && - mds->mdsmap->get_tableserver() == mds->get_nodeid()) { - mds->mark_base_recursively_scrubbed(header->get_origin()->ino()); - } - } -} - struct C_MDC_RespondInternalRequest : public MDCacheLogContext { MDRequestRef mdr; C_MDC_RespondInternalRequest(MDCache *c, MDRequestRef& m) : @@ -13216,49 +13092,6 @@ do_rdlocks: mds->server->respond_to_request(mdr, 0); } -void MDCache::upgrade_inode_snaprealm(CInode *in) -{ - MDRequestRef mdr = request_start_internal(CEPH_MDS_OP_UPGRADE_SNAPREALM); - mdr->pin(in); - mdr->internal_op_private = in; - mdr->internal_op_finish = new C_MDSInternalNoop; - upgrade_inode_snaprealm_work(mdr); -} - -void MDCache::upgrade_inode_snaprealm_work(MDRequestRef& mdr) -{ - CInode *in = static_cast(mdr->internal_op_private); - dout(10) << __func__ << " " << *in << dendl; - - if (!in->is_auth()) { - mds->server->respond_to_request(mdr, -ESTALE); - return; - } - - MutationImpl::LockOpVec lov; - lov.add_xlock(&in->snaplock); - if (!mds->locker->acquire_locks(mdr, lov)) - return; - - // project_snaprealm() upgrades snaprealm format - auto pi = in->project_inode(mdr, false, true); - pi.inode->version = in->pre_dirty(); - - mdr->ls = mds->mdlog->get_current_segment(); - EUpdate *le = new EUpdate(mds->mdlog, "upgrade_snaprealm"); - mds->mdlog->start_entry(le); - - if (in->is_base()) { - le->metablob.add_root(true, in); - } else { - CDentry *pdn = in->get_projected_parent_dn(); - le->metablob.add_dir_context(pdn->get_dir()); - le->metablob.add_primary_dentry(pdn, in, true); - } - - mds->mdlog->submit_entry(le, new C_MDC_RespondInternalRequest(this, mdr)); -} - void MDCache::flush_dentry(std::string_view path, Context *fin) { if (is_readonly()) { diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index c614e2778c43b..3dab7fba81cdc 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -937,7 +937,6 @@ class MDCache { Formatter *f, Context *fin); void repair_inode_stats(CInode *diri); void repair_dirfrag_stats(CDir *dir); - void upgrade_inode_snaprealm(CInode *in); // my leader MDSRank *mds; @@ -1122,10 +1121,8 @@ class MDCache { * long time) */ void enqueue_scrub_work(MDRequestRef& mdr); - void recursive_scrub_finish(const ScrubHeaderRef& header); void repair_inode_stats_work(MDRequestRef& mdr); void repair_dirfrag_stats_work(MDRequestRef& mdr); - void upgrade_inode_snaprealm_work(MDRequestRef& mdr); ceph::unordered_map inode_map; // map of head inodes by ino map snap_inode_map; // map of snap inodes by ino diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc index 80e1efe215bca..745965b1e84de 100644 --- a/src/mds/MDSRank.cc +++ b/src/mds/MDSRank.cc @@ -702,12 +702,6 @@ void MDSRank::set_mdsmap_multimds_snaps_allowed() already_sent = true; } -void MDSRank::mark_base_recursively_scrubbed(inodeno_t ino) -{ - if (mdsmap->get_tableserver() == whoami) - snapserver->mark_base_recursively_scrubbed(ino); -} - void MDSRankDispatcher::tick() { heartbeat_reset(); diff --git a/src/mds/MDSRank.h b/src/mds/MDSRank.h index ceaaf1148a3ef..6373c419be01e 100644 --- a/src/mds/MDSRank.h +++ b/src/mds/MDSRank.h @@ -354,8 +354,6 @@ class MDSRank { const std::string& option, const std::string& value, std::ostream& ss); - void mark_base_recursively_scrubbed(inodeno_t ino); - // Reference to global MDS::mds_lock, so that users of MDSRank don't // carry around references to the outer MDS, and we can substitute // a separate lock here in future potentially. diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 0aebf011d765c..7b9320dac4a1e 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -3899,11 +3899,6 @@ void Server::handle_client_lookup_ino(MDRequestRef& mdr, return; } - if (mdr && in->snaprealm && !in->snaprealm->have_past_parents_open() && - !in->snaprealm->open_parents(new C_MDS_RetryRequest(mdcache, mdr))) { - return; - } - // check for nothing (not read or write); this still applies the // path check. if (!check_access(mdr, in, 0)) @@ -7425,7 +7420,6 @@ void Server::_logged_peer_rmdir(MDRequestRef& mdr, CDentry *dn, CDentry *straydn new_realm = !in->snaprealm; in->decode_snap_blob(mdr->peer_request->desti_snapbl); ceph_assert(in->snaprealm); - ceph_assert(in->snaprealm->have_past_parents_open()); } else { new_realm = false; } @@ -8668,7 +8662,6 @@ void Server::_rename_apply(MDRequestRef& mdr, CDentry *srcdn, CDentry *destdn, C new_oldin_snaprealm = !oldin->snaprealm; oldin->decode_snap_blob(mdr->peer_request->desti_snapbl); ceph_assert(oldin->snaprealm); - ceph_assert(oldin->snaprealm->have_past_parents_open()); } } @@ -8717,7 +8710,6 @@ void Server::_rename_apply(MDRequestRef& mdr, CDentry *srcdn, CDentry *destdn, C new_in_snaprealm = !in->snaprealm; in->decode_snap_blob(mdr->peer_request->srci_snapbl); ceph_assert(in->snaprealm); - ceph_assert(in->snaprealm->have_past_parents_open()); } } } @@ -10226,8 +10218,7 @@ void Server::_rmsnap_finish(MDRequestRef& mdr, CInode *diri, snapid_t snapid) respond_to_request(mdr, 0); // purge snapshot data - if (diri->snaprealm->have_past_parents_open()) - diri->purge_stale_snap_data(diri->snaprealm->get_snaps()); + diri->purge_stale_snap_data(diri->snaprealm->get_snaps()); } struct C_MDS_renamesnap_finish : public ServerLogContext { diff --git a/src/mds/SnapRealm.cc b/src/mds/SnapRealm.cc index 7c89ba019d587..a8d683c26b1bd 100644 --- a/src/mds/SnapRealm.cc +++ b/src/mds/SnapRealm.cc @@ -46,18 +46,6 @@ ostream& operator<<(ostream& out, const SnapRealm& realm) out << " snaps=" << realm.srnode.snaps; out << " past_parent_snaps=" << realm.srnode.past_parent_snaps; - if (realm.srnode.past_parents.size()) { - out << " past_parents=("; - for (map::const_iterator p = realm.srnode.past_parents.begin(); - p != realm.srnode.past_parents.end(); - ++p) { - if (p != realm.srnode.past_parents.begin()) out << ","; - out << p->second.first << "-" << p->first - << "=" << p->second.ino; - } - out << ")"; - } - if (realm.srnode.is_parent_global()) out << " global "; out << " " << &realm << ")"; @@ -65,194 +53,11 @@ ostream& operator<<(ostream& out, const SnapRealm& realm) } SnapRealm::SnapRealm(MDCache *c, CInode *in) : - mdcache(c), inode(in) + mdcache(c), inode(in), inodes_with_caps(member_offset(CInode, item_caps)) { global = (inode->ino() == MDS_INO_GLOBAL_SNAPREALM); } -void SnapRealm::add_open_past_parent(SnapRealm *parent, snapid_t last) -{ - auto p = open_past_parents.find(parent->inode->ino()); - if (p != open_past_parents.end()) { - ceph_assert(p->second.second.count(last) == 0); - p->second.second.insert(last); - } else { - open_past_parents[parent->inode->ino()].first = parent; - open_past_parents[parent->inode->ino()].second.insert(last); - parent->open_past_children.insert(this); - parent->inode->get(CInode::PIN_PASTSNAPPARENT); - } - ++num_open_past_parents; -} - -void SnapRealm::remove_open_past_parent(inodeno_t ino, snapid_t last) -{ - auto p = open_past_parents.find(ino); - ceph_assert(p != open_past_parents.end()); - auto q = p->second.second.find(last); - ceph_assert(q != p->second.second.end()); - p->second.second.erase(q); - --num_open_past_parents; - if (p->second.second.empty()) { - SnapRealm *parent = p->second.first; - open_past_parents.erase(p); - parent->open_past_children.erase(this); - parent->inode->put(CInode::PIN_PASTSNAPPARENT); - } -} - -struct C_SR_RetryOpenParents : public MDSContext { - SnapRealm *sr; - snapid_t first, last, parent_last; - inodeno_t parent; - MDSContext* fin; - C_SR_RetryOpenParents(SnapRealm *s, snapid_t f, snapid_t l, snapid_t pl, - inodeno_t p, MDSContext *c) : - sr(s), first(f), last(l), parent_last(pl), parent(p), fin(c) { - sr->inode->get(CInode::PIN_OPENINGSNAPPARENTS); - } - MDSRank *get_mds() override { return sr->mdcache->mds; } - void finish(int r) override { - if (r < 0) - sr->_remove_missing_parent(parent_last, parent, r); - if (sr->_open_parents(fin, first, last)) { - if (fin) - fin->complete(0); - } - sr->inode->put(CInode::PIN_OPENINGSNAPPARENTS); - } -}; - -void SnapRealm::_remove_missing_parent(snapid_t snapid, inodeno_t parent, int err) -{ - map::iterator p = srnode.past_parents.find(snapid); - if (p != srnode.past_parents.end()) { - dout(10) << __func__ << " " << parent << " [" << p->second.first << "," - << p->first << "] errno " << err << dendl; - srnode.past_parents.erase(p); - past_parents_dirty = true; - } else { - dout(10) << __func__ << " " << parent << " not found" << dendl; - } -} - -bool SnapRealm::_open_parents(MDSContext *finish, snapid_t first, snapid_t last) -{ - dout(10) << "open_parents [" << first << "," << last << "]" << dendl; - if (open) - return true; - - // make sure my current parents' parents are open... - if (parent) { - dout(10) << " current parent [" << srnode.current_parent_since << ",head] is " << *parent - << " on " << *parent->inode << dendl; - if (last >= srnode.current_parent_since && - !parent->_open_parents(finish, std::max(first, srnode.current_parent_since), last)) - return false; - } - - if (!srnode.past_parent_snaps.empty()) - ceph_assert(mdcache->mds->snapclient->get_cached_version() > 0); - - if (!srnode.past_parents.empty() && - mdcache->mds->allows_multimds_snaps()) { - dout(10) << " skip non-empty past_parents since multimds_snaps is allowed" << dendl; - open = true; - return true; - } - - // and my past parents too! - ceph_assert(srnode.past_parents.size() >= num_open_past_parents); - if (srnode.past_parents.size() > num_open_past_parents) { - for (map::iterator p = srnode.past_parents.begin(); - p != srnode.past_parents.end(); ) { - dout(10) << " past_parent [" << p->second.first << "," << p->first << "] is " - << p->second.ino << dendl; - CInode *parent = mdcache->get_inode(p->second.ino); - if (!parent) { - C_SR_RetryOpenParents *fin = new C_SR_RetryOpenParents(this, first, last, p->first, - p->second.ino, finish); - mdcache->open_ino(p->second.ino, mdcache->mds->mdsmap->get_metadata_pool(), fin); - return false; - } - if (parent->state_test(CInode::STATE_PURGING)) { - dout(10) << " skip purging past_parent " << *parent << dendl; - srnode.past_parents.erase(p++); - past_parents_dirty = true; - continue; - } - ceph_assert(parent->snaprealm); // hmm! - if (!parent->snaprealm->_open_parents(finish, p->second.first, p->first)) - return false; - auto q = open_past_parents.find(p->second.ino); - if (q == open_past_parents.end() || - q->second.second.count(p->first) == 0) { - add_open_past_parent(parent->snaprealm, p->first); - } - ++p; - } - } - - open = true; - return true; -} - -bool SnapRealm::open_parents(MDSContext *retryorfinish) { - if (!_open_parents(retryorfinish)) - return false; - delete retryorfinish; - return true; -} - -bool SnapRealm::have_past_parents_open(snapid_t first, snapid_t last) const -{ - dout(10) << "have_past_parents_open [" << first << "," << last << "]" << dendl; - if (open) - return true; - - if (!srnode.past_parent_snaps.empty()) - ceph_assert(mdcache->mds->snapclient->get_cached_version() > 0); - - if (!srnode.past_parents.empty() && - mdcache->mds->allows_multimds_snaps()) { - dout(10) << " skip non-empty past_parents since multimds_snaps is allowed" << dendl; - open = true; - return true; - } - - for (auto p = srnode.past_parents.lower_bound(first); - p != srnode.past_parents.end(); - ++p) { - if (p->second.first > last) - break; - dout(10) << " past parent [" << p->second.first << "," << p->first << "] was " - << p->second.ino << dendl; - auto q = open_past_parents.find(p->second.ino); - if (q == open_past_parents.end()) { - dout(10) << " past parent " << p->second.ino << " is not open" << dendl; - return false; - } - SnapRealm *parent_realm = q->second.first; - if (!parent_realm->have_past_parents_open(std::max(first, p->second.first), - std::min(last, p->first))) - return false; - } - - open = true; - return true; -} - -void SnapRealm::close_parents() -{ - for (auto p = open_past_parents.begin(); p != open_past_parents.end(); ++p) { - num_open_past_parents -= p->second.second.size(); - p->second.first->inode->put(CInode::PIN_PASTSNAPPARENT); - p->second.first->open_past_children.erase(this); - } - open_past_parents.clear(); -} - - /* * get list of snaps for this realm. we must include parents' snaps * for the intervals during which they were our parent. @@ -280,24 +85,6 @@ void SnapRealm::build_snap_set() const cached_last_created = std::max(cached_last_created, last); } cached_snaps.insert(snaps.begin(), snaps.end()); - } else { - // include snaps for parents - for (const auto& p : srnode.past_parents) { - const CInode *oldparent = mdcache->get_inode(p.second.ino); - ceph_assert(oldparent); // call open_parents first! - ceph_assert(oldparent->snaprealm); - - const set& snaps = oldparent->snaprealm->get_snaps(); - snapid_t last = 0; - for (auto q = snaps.lower_bound(p.second.first); - q != snaps.end() && *q <= p.first; - q++) { - cached_snaps.insert(*q); - last = *q; - } - cached_seq = std::max(cached_seq, last); - cached_last_created = std::max(cached_last_created, last); - } } snapid_t parent_seq = parent ? parent->get_newest_seq() : snapid_t(0); @@ -312,7 +99,6 @@ void SnapRealm::build_snap_set() const void SnapRealm::check_cache() const { - ceph_assert(have_past_parents_open()); snapid_t seq; snapid_t last_created; snapid_t last_destroyed = mdcache->mds->snapclient->get_last_destroyed(); @@ -402,18 +188,6 @@ void SnapRealm::get_snap_info(map& infomap, snapid_t map _infomap; mdcache->mds->snapclient->get_snap_infos(_infomap, snaps); infomap.insert(_infomap.begin(), _infomap.end()); - } else { - // include snaps for parents during intervals that intersect [first,last] - for (map::iterator p = srnode.past_parents.lower_bound(first); - p != srnode.past_parents.end() && p->first >= first && p->second.first <= last; - ++p) { - CInode *oldparent = mdcache->get_inode(p->second.ino); - ceph_assert(oldparent); // call open_parents first! - ceph_assert(oldparent->snaprealm); - oldparent->snaprealm->get_snap_info(infomap, - std::max(first, p->second.first), - std::min(last, p->first)); - } } if (srnode.current_parent_since <= last && parent) @@ -440,14 +214,6 @@ std::string_view SnapRealm::get_snapname(snapid_t snapid, inodeno_t atino) return sinfo->get_long_name(); } } - } else { - map::iterator p = srnode.past_parents.lower_bound(snapid); - if (p != srnode.past_parents.end() && p->second.first <= snapid) { - CInode *oldparent = mdcache->get_inode(p->second.ino); - ceph_assert(oldparent); // call open_parents first! - ceph_assert(oldparent->snaprealm); - return oldparent->snaprealm->get_snapname(snapid, atino); - } } ceph_assert(srnode.current_parent_since <= snapid); @@ -502,20 +268,6 @@ snapid_t SnapRealm::resolve_snapname(std::string_view n, inodeno_t atino, snapid if (!actual && it.second->name == pname && it.second->ino == pino) return it.first; } - } else { - // include snaps for parents during intervals that intersect [first,last] - for (map::iterator p = srnode.past_parents.lower_bound(first); - p != srnode.past_parents.end() && p->first >= first && p->second.first <= last; - ++p) { - CInode *oldparent = mdcache->get_inode(p->second.ino); - ceph_assert(oldparent); // call open_parents first! - ceph_assert(oldparent->snaprealm); - snapid_t r = oldparent->snaprealm->resolve_snapname(n, atino, - std::max(first, p->second.first), - std::min(last, p->first)); - if (r) - return r; - } } if (parent && srnode.current_parent_since <= last) @@ -584,8 +336,7 @@ void SnapRealm::split_at(SnapRealm *child) } // split inodes_with_caps - for (elist::iterator p = inodes_with_caps.begin(member_offset(CInode, item_caps)); - !p.end(); ) { + for (auto p = inodes_with_caps.begin(); !p.end(); ) { CInode *in = *p; ++p; // does inode fall within the child realm? @@ -604,8 +355,6 @@ void SnapRealm::merge_to(SnapRealm *newparent) newparent = parent; dout(10) << "merge to " << *newparent << " on " << *newparent->inode << dendl; - ceph_assert(open_past_children.empty()); - dout(10) << " open_children are " << open_children << dendl; for (auto realm : open_children) { dout(20) << " child realm " << *realm << " on " << *realm->inode << dendl; @@ -614,8 +363,7 @@ void SnapRealm::merge_to(SnapRealm *newparent) } open_children.clear(); - elist::iterator p = inodes_with_caps.begin(member_offset(CInode, item_caps)); - while (!p.end()) { + for (auto p = inodes_with_caps.begin(); !p.end(); ) { CInode *in = *p; ++p; in->move_to_realm(newparent); @@ -658,17 +406,6 @@ void SnapRealm::build_snap_trace() const auto p = past.lower_bound(srnode.current_parent_since); past.erase(p, past.end()); } - } else if (!srnode.past_parents.empty()) { - const set& snaps = get_snaps(); - for (const auto& p : srnode.past_parents) { - for (auto q = snaps.lower_bound(p.second.first); - q != snaps.end() && *q <= p.first; - q++) { - if (srnode.snaps.count(*q)) - continue; - past.insert(*q); - } - } } if (!past.empty()) { @@ -693,31 +430,19 @@ void SnapRealm::build_snap_trace() const cached_snap_trace.append(parent->get_snap_trace()); } -void SnapRealm::prune_past_parents() +void SnapRealm::prune_past_parent_snaps() { - dout(10) << "prune_past_parents" << dendl; + dout(10) << __func__ << dendl; check_cache(); - // convert past_parents to past_parent_snaps - if (!srnode.past_parents.empty()) { - for (auto p = cached_snaps.begin(); - p != cached_snaps.end() && *p < srnode.current_parent_since; - ++p) { - if (!srnode.snaps.count(*p)) - srnode.past_parent_snaps.insert(*p); - } - srnode.past_parents.clear(); - past_parents_dirty = true; - } - for (auto p = srnode.past_parent_snaps.begin(); p != srnode.past_parent_snaps.end(); ) { auto q = cached_snaps.find(*p); if (q == cached_snaps.end()) { - dout(10) << "prune_past_parents pruning " << *p << dendl; + dout(10) << __func__ << " pruning " << *p << dendl; srnode.past_parent_snaps.erase(p++); } else { - dout(10) << "prune_past_parents keeping " << *p << dendl; + dout(10) << __func__ << " keeping " << *p << dendl; ++p; } } diff --git a/src/mds/SnapRealm.h b/src/mds/SnapRealm.h index 8c7a48ef0fbf5..7ddffe0ba9a85 100644 --- a/src/mds/SnapRealm.h +++ b/src/mds/SnapRealm.h @@ -36,18 +36,9 @@ public: return false; } - bool _open_parents(MDSContext *retryorfinish, snapid_t first=1, snapid_t last=CEPH_NOSNAP); - bool open_parents(MDSContext *retryorfinish); - void _remove_missing_parent(snapid_t snapid, inodeno_t parent, int err); - bool have_past_parents_open(snapid_t first=1, snapid_t last=CEPH_NOSNAP) const; - void add_open_past_parent(SnapRealm *parent, snapid_t last); - void remove_open_past_parent(inodeno_t ino, snapid_t last); - void close_parents(); - - void prune_past_parents(); - bool has_past_parents() const { - return !srnode.past_parent_snaps.empty() || - !srnode.past_parents.empty(); + void prune_past_parent_snaps(); + bool has_past_parent_snaps() const { + return !srnode.past_parent_snaps.empty(); } void build_snap_set() const; @@ -133,25 +124,18 @@ public: MDCache *mdcache; CInode *inode; - bool past_parents_dirty = false; - SnapRealm *parent = nullptr; std::set open_children; // active children that are currently open - std::set open_past_children; // past children who has pinned me - elist inodes_with_caps = 0; // for efficient realm splits + elist inodes_with_caps; // for efficient realm splits std::map* > client_caps; // to identify clients who need snap notifications protected: void check_cache() const; private: - mutable bool open = false; // set to true once all past_parents are opened bool global; - std::map>> open_past_parents; // these are explicitly pinned. - unsigned num_open_past_parents = 0; - // cache mutable snapid_t cached_seq; // max seq over self and all past+present parents. mutable snapid_t cached_last_created; // max last_created over all past+present parents diff --git a/src/mds/SnapServer.h b/src/mds/SnapServer.h index 5b38f55587fc5..a1019c3c864f6 100644 --- a/src/mds/SnapServer.h +++ b/src/mds/SnapServer.h @@ -53,17 +53,8 @@ public: void check_osd_map(bool force); - void mark_base_recursively_scrubbed(inodeno_t ino) { - if (ino == MDS_INO_ROOT) - root_scrubbed = true; - else if (ino == MDS_INO_MDSDIR(rank)) - mdsdir_scrubbed = true; - else - ceph_abort(); - } bool can_allow_multimds_snaps() const { - return (root_scrubbed && mdsdir_scrubbed) || - snaps.empty() || snaps.begin()->first >= snaprealm_v2_since; + return snaps.empty() || snaps.begin()->first >= snaprealm_v2_since; } void encode(bufferlist& bl) const { @@ -144,9 +135,6 @@ protected: set pending_noop; version_t last_checked_osdmap = 0; - - bool root_scrubbed = false; // all snaprealms under root are converted? - bool mdsdir_scrubbed = false; // all snaprealms under ~mds0 are converted? }; WRITE_CLASS_ENCODER(SnapServer) diff --git a/src/mds/StrayManager.cc b/src/mds/StrayManager.cc index 2ff639e18d22d..d85ab139dcea4 100644 --- a/src/mds/StrayManager.cc +++ b/src/mds/StrayManager.cc @@ -347,16 +347,7 @@ void StrayManager::_enqueue(CDentry *dn, bool trunc) return; } - CInode *in = dn->get_linkage()->get_inode(); - if (in->snaprealm && - !in->snaprealm->have_past_parents_open() && - !in->snaprealm->open_parents(new C_RetryEnqueue(this, dn, trunc))) { - // this can happen if the dentry had been trimmed from cache. - return; - } - dn->get_dir()->auth_pin(this); - if (trunc) { truncate(dn); } else { @@ -474,15 +465,11 @@ bool StrayManager::_eval_stray(CDentry *dn) // only important for directories. normal file data snaps are handled // by the object store. if (in->snaprealm) { - if (!in->snaprealm->have_past_parents_open() && - !in->snaprealm->open_parents(new C_MDC_EvalStray(this, dn))) { - return false; - } - in->snaprealm->prune_past_parents(); + in->snaprealm->prune_past_parent_snaps(); in->purge_stale_snap_data(in->snaprealm->get_snaps()); } if (in->is_dir()) { - if (in->snaprealm && in->snaprealm->has_past_parents()) { + if (in->snaprealm && in->snaprealm->has_past_parent_snaps()) { dout(20) << " directory has past parents " << in->snaprealm << dendl; if (in->state_test(CInode::STATE_MISSINGOBJS)) { @@ -527,7 +514,7 @@ bool StrayManager::_eval_stray(CDentry *dn) return false; } // don't purge multiversion inode with snap data - if (in->snaprealm && in->snaprealm->has_past_parents() && + if (in->snaprealm && in->snaprealm->has_past_parent_snaps() && in->is_any_old_inodes()) { // A file with snapshots: we will truncate the HEAD revision // but leave the metadata intact. -- 2.39.5