From b593e5a881fbc8e3087f5e2e463dfa71d485730e Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Fri, 12 Oct 2018 16:01:34 +0800 Subject: [PATCH] tools/cephfs: make 'cephfs-data-scan scan_links' update snaptable Signed-off-by: "Yan, Zheng" --- PendingReleaseNotes | 3 +- qa/tasks/cephfs/test_data_scan.py | 35 +++++++++++++++ src/mds/MDSTableServer.h | 5 +++ src/mds/SnapServer.cc | 39 +++++++++++++++- src/mds/SnapServer.h | 3 ++ src/mds/snap.h | 6 +++ src/tools/cephfs/DataScan.cc | 74 ++++++++++++++++++++++++++++++- src/tools/cephfs/DataScan.h | 6 ++- 8 files changed, 166 insertions(+), 5 deletions(-) diff --git a/PendingReleaseNotes b/PendingReleaseNotes index 130b66e423d..7800b9d219f 100644 --- a/PendingReleaseNotes +++ b/PendingReleaseNotes @@ -157,7 +157,8 @@ * the callback function passed to LibRGWFS.readdir() now accepts a ``flags`` parameter. it will be the last parameter passed to ``readdir()` method. -* The 'cephfs-data-scan scan_links' now automatically repair inotables. +* The 'cephfs-data-scan scan_links' now automatically repair inotables and + snaptable. >=13.1.0 -------- diff --git a/qa/tasks/cephfs/test_data_scan.py b/qa/tasks/cephfs/test_data_scan.py index 0cb972e51b4..6e1e23063bc 100644 --- a/qa/tasks/cephfs/test_data_scan.py +++ b/qa/tasks/cephfs/test_data_scan.py @@ -642,3 +642,38 @@ class TestDataScan(CephFSTestCase): mds1_inotable = json.loads(self.fs.table_tool([self.fs.name + ":1", "show", "inode"])) self.assertGreaterEqual( mds1_inotable['1']['data']['inotable']['free'][0]['start'], file_ino) + + def test_rebuild_snaptable(self): + """ + The scan_links command repair snaptable + """ + self.fs.set_allow_new_snaps(True) + + self.mount_a.run_shell(["mkdir", "dir1"]) + self.mount_a.run_shell(["mkdir", "dir1/.snap/s1"]) + self.mount_a.run_shell(["mkdir", "dir1/.snap/s2"]) + self.mount_a.run_shell(["rmdir", "dir1/.snap/s2"]) + + self.mount_a.umount_wait() + + mds0_id = self.fs.get_active_names()[0] + self.fs.mds_asok(["flush", "journal"], mds0_id) + + # wait for mds to update removed snaps + time.sleep(10) + + old_snaptable = json.loads(self.fs.table_tool([self.fs.name + ":0", "show", "snap"])) + # stamps may have minor difference + for item in old_snaptable['snapserver']['snaps']: + del item['stamp'] + + self.fs.rados(["rm", "mds_snaptable"]) + self.fs.data_scan(["scan_links", "--filesystem", self.fs.name]) + + new_snaptable = json.loads(self.fs.table_tool([self.fs.name + ":0", "show", "snap"])) + for item in new_snaptable['snapserver']['snaps']: + del item['stamp'] + self.assertGreaterEqual( + new_snaptable['snapserver']['last_snap'], old_snaptable['snapserver']['last_snap']) + self.assertEqual( + new_snaptable['snapserver']['snaps'], old_snaptable['snapserver']['snaps']) diff --git a/src/mds/MDSTableServer.h b/src/mds/MDSTableServer.h index 065aa474eb5..74c1ca382e4 100644 --- a/src/mds/MDSTableServer.h +++ b/src/mds/MDSTableServer.h @@ -94,6 +94,11 @@ public: MDSTable(m, get_mdstable_name(tab), false), table(tab), recovered(false) {} ~MDSTableServer() override {} + void reset_state() override { + pending_for_mds.clear(); + ++version; + } + void handle_request(const MMDSTableRequest::const_ref &m); void do_server_update(bufferlist& bl); diff --git a/src/mds/SnapServer.cc b/src/mds/SnapServer.cc index 27611f4af14..d9690d400fc 100644 --- a/src/mds/SnapServer.cc +++ b/src/mds/SnapServer.cc @@ -38,6 +38,9 @@ void SnapServer::reset_state() last_snap = 1; /* snapid 1 reserved for initial root snaprealm */ snaps.clear(); need_to_purge.clear(); + pending_update.clear(); + pending_destroy.clear(); + pending_noop.clear(); // find any removed snapshot in data pools if (mds) { // only if I'm running in a live MDS @@ -61,7 +64,8 @@ void SnapServer::reset_state() last_created = last_snap; last_destroyed = last_snap; snaprealm_v2_since = last_snap + 1; - version++; + + MDSTableServer::reset_state(); } @@ -437,3 +441,36 @@ void SnapServer::generate_test_instances(list& ls) ls.push_back(populated); } + +bool SnapServer::force_update(snapid_t last, snapid_t v2_since, + map& _snaps) +{ + bool modified = false; + if (last > last_snap) { + derr << " updating last_snap " << last_snap << " -> " << last << dendl; + last_snap = last; + last_created = last; + last_destroyed = last; + modified = true; + } + if (v2_since > snaprealm_v2_since) { + derr << " updating snaprealm_v2_since " << snaprealm_v2_since + << " -> " << v2_since << dendl; + snaprealm_v2_since = v2_since; + modified = true; + } + if (snaps != _snaps) { + derr << " updating snaps {" << snaps << "} -> {" << _snaps << "}" << dendl; + snaps = _snaps; + modified = true; + } + + if (modified) { + need_to_purge.clear(); + pending_update.clear(); + pending_destroy.clear(); + pending_noop.clear(); + MDSTableServer::reset_state(); + } + return modified; +} diff --git a/src/mds/SnapServer.h b/src/mds/SnapServer.h index 7c4262fc51e..50925e9553e 100644 --- a/src/mds/SnapServer.h +++ b/src/mds/SnapServer.h @@ -139,6 +139,9 @@ public: void dump(Formatter *f) const; static void generate_test_instances(list& ls); + + bool force_update(snapid_t last, snapid_t v2_since, + map& _snaps); }; WRITE_CLASS_ENCODER(SnapServer) diff --git a/src/mds/snap.h b/src/mds/snap.h index d3a76b0ee9b..b3c00ddf78a 100644 --- a/src/mds/snap.h +++ b/src/mds/snap.h @@ -40,6 +40,12 @@ struct SnapInfo { }; WRITE_CLASS_ENCODER(SnapInfo) +inline bool operator==(const SnapInfo &l, const SnapInfo &r) +{ + return l.snapid == r.snapid && l.ino == r.ino && + l.stamp == r.stamp && l.name == r.name; +} + ostream& operator<<(ostream& out, const SnapInfo &sn); diff --git a/src/tools/cephfs/DataScan.cc b/src/tools/cephfs/DataScan.cc index 5463af9d8d8..4dfbe48ccd9 100644 --- a/src/tools/cephfs/DataScan.cc +++ b/src/tools/cephfs/DataScan.cc @@ -20,6 +20,7 @@ #include "mds/CInode.h" #include "mds/InoTable.h" +#include "mds/SnapServer.h" #include "cls/cephfs/cls_cephfs_client.h" #include "PgFiles.h" @@ -298,6 +299,8 @@ int DataScan::main(const std::vector &args) if (r != 0) { return r; } + + data_pools = fs->mds_map.get_data_pools(); } // Finally, dispatch command @@ -906,6 +909,9 @@ int DataScan::scan_links() interval_set used_inos; map remote_links; + map snaps; + snapid_t last_snap = 1; + snapid_t snaprealm_v2_since = 2; struct link_info_t { inodeno_t dirino; @@ -914,6 +920,7 @@ int DataScan::scan_links() version_t version; int nlink; bool is_dir; + map snaps; link_info_t() : version(0), nlink(0), is_dir(false) {} link_info_t(inodeno_t di, frag_t df, const string& n, const CInode::mempool_inode& i) : dirino(di), frag(df), name(n), @@ -972,6 +979,10 @@ int DataScan::scan_links() try { snapid_t dnfirst; decode(dnfirst, q); + if (dnfirst <= CEPH_MAXSNAP) { + if (dnfirst - 1 > last_snap) + last_snap = dnfirst - 1; + } char dentry_type; decode(dentry_type, q); if (dentry_type == 'I') { @@ -986,9 +997,33 @@ int DataScan::scan_links() used_inos.insert(ino); } } else if (step == CHECK_LINK) { + sr_t srnode; + if (inode.snap_blob.length()) { + auto p = inode.snap_blob.cbegin(); + decode(srnode, p); + for (auto it = srnode.snaps.begin(); + it != srnode.snaps.end(); ) { + if (it->second.ino != ino || + it->second.snapid != it->first) { + srnode.snaps.erase(it++); + } else { + ++it; + } + } + if (!srnode.past_parents.empty()) { + snapid_t last = srnode.past_parents.rbegin()->first; + if (last + 1 > snaprealm_v2_since) + snaprealm_v2_since = last + 1; + } + } + if (!inode.old_inodes.empty()) { + if (inode.old_inodes.rbegin()->first > last_snap) + last_snap = inode.old_inodes.rbegin()->first; + } auto q = dup_primaries.find(ino); if (q != dup_primaries.end()) { q->second.push_back(link_info_t(dir_ino, frag_id, dname, inode.inode)); + q->second.back().snaps.swap(srnode.snaps); } else { int nlink = 0; auto r = remote_links.find(ino); @@ -1002,6 +1037,8 @@ int DataScan::scan_links() bad_nlink_inos[ino] = link_info_t(dir_ino, frag_id, dname, inode.inode); bad_nlink_inos[ino].nlink = nlink; } + snaps.insert(make_move_iterator(begin(srnode.snaps)), + make_move_iterator(end(srnode.snaps))); } } } else if (dentry_type == 'L') { @@ -1074,8 +1111,11 @@ int DataScan::scan_links() for (auto& q : p.second) { // in the middle of dir fragmentation? - if (newest.dirino == q.dirino && newest.name == q.name) + if (newest.dirino == q.dirino && newest.name == q.name) { + snaps.insert(make_move_iterator(begin(q.snaps)), + make_move_iterator(end(q.snaps))); continue; + } std::string key; dentry_key_t dn_key(CEPH_NOSNAP, q.name.c_str()); @@ -1150,6 +1190,38 @@ int DataScan::scan_links() } } + { + objecter->with_osdmap([&](const OSDMap& o) { + for (auto p : data_pools) { + const pg_pool_t *pi = o.get_pg_pool(p); + if (!pi) + continue; + if (pi->snap_seq > last_snap) + last_snap = pi->snap_seq; + } + }); + + if (!snaps.empty()) { + if (snaps.rbegin()->first > last_snap) + last_snap = snaps.rbegin()->first; + } + + SnapServer snaptable; + snaptable.set_rank(0); + bool dirty = false; + int r = metadata_driver->load_table(&snaptable); + if (r < 0) { + snaptable.reset_state(); + dirty = true; + } + if (snaptable.force_update(last_snap, snaprealm_v2_since, snaps)) + dirty = true; + if (dirty) { + r = metadata_driver->save_table(&snaptable); + if (r < 0) + return r; + } + } return 0; } diff --git a/src/tools/cephfs/DataScan.h b/src/tools/cephfs/DataScan.h index 007fe824c74..dd064492d01 100644 --- a/src/tools/cephfs/DataScan.h +++ b/src/tools/cephfs/DataScan.h @@ -244,11 +244,13 @@ class DataScan : public MDSUtility, public MetadataTool RecoveryDriver *driver; fs_cluster_id_t fscid; + string metadata_pool_name; + std::vector data_pools; + // IoCtx for data pool (where we scrape file backtraces from) librados::IoCtx data_io; // Remember the data pool ID for use in layouts int64_t data_pool_id; - string metadata_pool_name; uint32_t n; uint32_t m; @@ -325,7 +327,7 @@ class DataScan : public MDSUtility, public MetadataTool DataScan() : driver(NULL), fscid(FS_CLUSTER_ID_NONE), - data_pool_id(-1), metadata_pool_name(""), n(0), m(1), + data_pool_id(-1), n(0), m(1), force_pool(false), force_corrupt(false), force_init(false) { -- 2.39.5