From: Yan, Zheng Date: Mon, 16 Apr 2018 09:27:22 +0000 (+0800) Subject: mds: upgrade snaprealm format during scrub X-Git-Tag: v13.1.0~2^2~9 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=c73c247cd251b3cc9f6864a6f0f05342cfee2ed7;p=ceph-ci.git mds: upgrade snaprealm format during scrub Signed-off-by: "Yan, Zheng" --- diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h index d0f3b9abdaf..7e3fe651fa9 100644 --- a/src/include/ceph_fs.h +++ b/src/include/ceph_fs.h @@ -366,7 +366,8 @@ enum { CEPH_MDS_OP_FLUSH = 0x01502, CEPH_MDS_OP_ENQUEUE_SCRUB = 0x01503, CEPH_MDS_OP_REPAIR_FRAGSTATS = 0x01504, - CEPH_MDS_OP_REPAIR_INODESTATS = 0x01505 + CEPH_MDS_OP_REPAIR_INODESTATS = 0x01505, + CEPH_MDS_OP_UPGRADE_SNAPREALM = 0x01506 }; extern const char *ceph_mds_op_name(int op); diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index 628209068c9..db7e0915842 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -435,6 +435,8 @@ sr_t *CInode::prepare_new_srnode(snapid_t snapid) new_srnode->seq = snaprealm->get_newest_seq(); new_srnode->past_parents.clear(); } + if (snaprealm) + snaprealm->past_parents_dirty = false; } else { if (snapid == 0) snapid = mdcache->get_global_snaprealm()->get_newest_seq(); @@ -3994,7 +3996,8 @@ void CInode::validate_disk_state(CInode::validated_data *results, START = 0, BACKTRACE, INODE, - DIRFRAGS + DIRFRAGS, + SNAPREALM, }; ValidationContinuation(CInode *i, @@ -4009,6 +4012,7 @@ void CInode::validate_disk_state(CInode::validated_data *results, set_callback(BACKTRACE, static_cast(&ValidationContinuation::_backtrace)); set_callback(INODE, static_cast(&ValidationContinuation::_inode_disk)); set_callback(DIRFRAGS, static_cast(&ValidationContinuation::_dirfrags)); + set_callback(SNAPREALM, static_cast(&ValidationContinuation::_snaprealm)); } ~ValidationContinuation() override { @@ -4056,6 +4060,10 @@ void CInode::validate_disk_state(CInode::validated_data *results, return true; } + // prefetch snaprealm's past parents + if (in->snaprealm && !in->snaprealm->have_past_parents_open()) + in->snaprealm->open_parents(nullptr); + C_OnFinisher *conf = new C_OnFinisher(get_io_callback(BACKTRACE), in->mdcache->mds->finisher); @@ -4157,13 +4165,13 @@ next: } } - // quit if we're a file, or kick off directory checks otherwise - // TODO: validate on-disk inode for non-base directories - if (!in->is_dir()) { - return true; - } - return validate_directory_data(); + if (in->is_dir()) { + return validate_directory_data(); + } else { + // TODO: validate on-disk inode for normal files + return check_inode_snaprealm(); + } } bool validate_directory_data() { @@ -4178,8 +4186,9 @@ next: shadow_in->fetch(get_internal_callback(INODE)); return false; } else { + // TODO: validate on-disk inode for non-base directories results->inode.passed = true; - return check_dirfrag_rstats(); + return check_dirfrag_rstats(); } } @@ -4193,7 +4202,7 @@ next: mempool_inode& i = in->inode; if (si.version > i.version) { // uh, what? - results->inode.error_str << "On-disk inode is newer than in-memory one!"; + results->inode.error_str << "On-disk inode is newer than in-memory one; "; goto next; } else { bool divergent = false; @@ -4201,7 +4210,7 @@ next: results->inode.passed = !divergent && r >= 0; if (!results->inode.passed) { results->inode.error_str << - "On-disk inode is divergent or newer than in-memory one!"; + "On-disk inode is divergent or newer than in-memory one; "; goto next; } } @@ -4291,6 +4300,37 @@ next: results->raw_stats.passed = true; next: + // snaprealm + return check_inode_snaprealm(); + } + + bool check_inode_snaprealm() { + if (!in->snaprealm) + return true; + + if (!in->snaprealm->have_past_parents_open()) { + in->snaprealm->open_parents(get_internal_callback(SNAPREALM)); + return false; + } else { + return immediate(SNAPREALM, 0); + } + } + + bool _snaprealm(int rval) { + + if (in->snaprealm->past_parents_dirty || + !in->get_projected_srnode()->past_parents.empty()) { + // temporarily store error in field of on-disk inode validation temporarily + results->inode.checked = true; + results->inode.passed = false; + if (in->scrub_infop->header->get_repair()) { + results->inode.error_str << "Inode has old format snaprealm (will upgrade)"; + results->inode.repaired = true; + in->mdcache->upgrade_inode_snaprealm(in); + } else { + results->inode.error_str << "Inode has old format snaprealm"; + } + } return true; } diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 4ef6e37c411..e93db51b5c8 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -9319,6 +9319,9 @@ void MDCache::dispatch_request(MDRequestRef& mdr) case CEPH_MDS_OP_REPAIR_INODESTATS: repair_inode_stats_work(mdr); break; + case CEPH_MDS_OP_UPGRADE_SNAPREALM: + upgrade_inode_snaprealm_work(mdr); + break; default: ceph_abort(); } @@ -12349,9 +12352,9 @@ void MDCache::enqueue_scrub_work(MDRequestRef& mdr) return; } -struct C_MDC_RepairDirfragStats : public MDCacheLogContext { +struct C_MDC_RespondInternalRequest : public MDCacheLogContext { MDRequestRef mdr; - C_MDC_RepairDirfragStats(MDCache *c, MDRequestRef& m) : + C_MDC_RespondInternalRequest(MDCache *c, MDRequestRef& m) : MDCacheLogContext(c), mdr(m) {} void finish(int r) override { mdr->apply(); @@ -12461,7 +12464,7 @@ void MDCache::repair_dirfrag_stats_work(MDRequestRef& mdr) le->metablob.add_dir_context(dir); le->metablob.add_dir(dir, true); - mds->mdlog->submit_entry(le, new C_MDC_RepairDirfragStats(this, mdr)); + mds->mdlog->submit_entry(le, new C_MDC_RespondInternalRequest(this, mdr)); } void MDCache::repair_inode_stats(CInode *diri) @@ -12557,6 +12560,53 @@ do_rdlocks: mds->server->respond_to_request(mdr, 0); } +void MDCache::upgrade_inode_snaprealm(CInode *in) +{ + MDRequestRef mdr = request_start_internal(CEPH_MDS_OP_UPGRADE_SNAPREALM); + mdr->pin(in); + mdr->internal_op_private = in; + mdr->internal_op_finish = new C_MDSInternalNoop; + upgrade_inode_snaprealm_work(mdr); +} + +void MDCache::upgrade_inode_snaprealm_work(MDRequestRef& mdr) +{ + CInode *in = static_cast(mdr->internal_op_private); + dout(10) << __func__ << " " << *in << dendl; + + if (!in->is_auth()) { + mds->server->respond_to_request(mdr, -ESTALE); + return; + } + + set rdlocks, wrlocks, xlocks; + mds->locker->include_snap_rdlocks(rdlocks, in); + rdlocks.erase(&in->snaplock); + xlocks.insert(&in->snaplock); + + if (!mds->locker->acquire_locks(mdr, rdlocks, wrlocks, xlocks)) + return; + + // project_snaprealm() upgrades snaprealm format + auto &pi = in->project_inode(false, true); + mdr->add_projected_inode(in); + pi.inode.version = in->pre_dirty(); + + mdr->ls = mds->mdlog->get_current_segment(); + EUpdate *le = new EUpdate(mds->mdlog, "upgrade_snaprealm"); + mds->mdlog->start_entry(le); + + if (in->is_base()) { + le->metablob.add_root(true, in, in->get_projected_inode()); + } else { + CDentry *pdn = in->get_projected_parent_dn(); + le->metablob.add_dir_context(pdn->get_dir()); + le->metablob.add_primary_dentry(pdn, in, true); + } + + mds->mdlog->submit_entry(le, new C_MDC_RespondInternalRequest(this, mdr)); +} + void MDCache::flush_dentry(std::string_view path, Context *fin) { if (is_readonly()) { diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index d8c9cd401de..2963e66c9a9 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -1229,7 +1229,8 @@ protected: void enqueue_scrub_work(MDRequestRef& mdr); void repair_inode_stats_work(MDRequestRef& mdr); void repair_dirfrag_stats_work(MDRequestRef& mdr); - friend class C_MDC_RepairDirfragStats; + void upgrade_inode_snaprealm_work(MDRequestRef& mdr); + friend class C_MDC_RespondInternalRequest; public: void flush_dentry(std::string_view path, Context *fin); /** @@ -1240,6 +1241,7 @@ public: Formatter *f, Context *fin); void repair_inode_stats(CInode *diri); void repair_dirfrag_stats(CDir *dir); + void upgrade_inode_snaprealm(CInode *in); public: /* Because exports may fail, this set lets us keep track of inodes that need exporting. */ diff --git a/src/mds/SnapRealm.cc b/src/mds/SnapRealm.cc index 4a64415ac6f..ff3a3096811 100644 --- a/src/mds/SnapRealm.cc +++ b/src/mds/SnapRealm.cc @@ -67,7 +67,7 @@ ostream& operator<<(ostream& out, const SnapRealm& realm) } SnapRealm::SnapRealm(MDCache *c, CInode *in) : - mdcache(c), inode(in), open(false), parent(0), + mdcache(c), inode(in), parent(nullptr), num_open_past_parents(0), inodes_with_caps(0) { global = (inode->ino() == MDS_INO_GLOBAL_SNAPREALM); @@ -118,8 +118,10 @@ struct C_SR_RetryOpenParents : public MDSInternalContextBase { void finish(int r) override { if (r < 0) sr->_remove_missing_parent(parent_last, parent, r); - if (sr->_open_parents(fin, first, last)) - fin->complete(0); + if (sr->_open_parents(fin, first, last)) { + if (fin) + fin->complete(0); + } sr->inode->put(CInode::PIN_OPENINGSNAPPARENTS); } }; @@ -131,6 +133,7 @@ void SnapRealm::_remove_missing_parent(snapid_t snapid, inodeno_t parent, int er dout(10) << __func__ << " " << parent << " [" << p->second.first << "," << p->first << "] errno " << err << dendl; srnode.past_parents.erase(p); + past_parents_dirty = true; } else { dout(10) << __func__ << " " << parent << " not found" << dendl; } @@ -178,6 +181,7 @@ bool SnapRealm::_open_parents(MDSInternalContextBase *finish, snapid_t first, sn if (parent->state_test(CInode::STATE_PURGING)) { dout(10) << " skip purging past_parent " << *parent << dendl; srnode.past_parents.erase(p++); + past_parents_dirty = true; continue; } assert(parent->snaprealm); // hmm! @@ -702,6 +706,7 @@ void SnapRealm::prune_past_parents() srnode.past_parent_snaps.insert(*p); } srnode.past_parents.clear(); + past_parents_dirty = true; } for (auto p = srnode.past_parent_snaps.begin(); diff --git a/src/mds/SnapRealm.h b/src/mds/SnapRealm.h index cc5e9df7925..90ee2ef5843 100644 --- a/src/mds/SnapRealm.h +++ b/src/mds/SnapRealm.h @@ -45,7 +45,8 @@ public: MDCache *mdcache; CInode *inode; - mutable bool open; // set to true once all past_parents are opened + mutable bool open = false; // set to true once all past_parents are opened + bool past_parents_dirty = false; bool global; SnapRealm *parent;