From b77ebe8cd418552c177b8aa3a95af19d83b94fc6 Mon Sep 17 00:00:00 2001 From: Radoslaw Zarzynski Date: Wed, 23 Aug 2023 20:05:25 +0200 Subject: [PATCH] =?utf8?q?Revert=20"osd/SnapMapper:=20Maintain=20the=20pre?= =?utf8?q?fix=5Fitr=20between=20calls=20to=20avoid=20search=E2=80=A6"?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Radosław Zarzyński --- .../singleton-nomsgr/all/ceph-snapmapper.yaml | 22 -- qa/tasks/ceph.conf.template | 3 +- src/common/options/osd.yaml.in | 5 - src/osd/SnapMapper.cc | 70 +--- src/osd/SnapMapper.h | 25 +- src/test/CMakeLists.txt | 4 - src/test/test_snap_mapper.cc | 323 ++---------------- 7 files changed, 49 insertions(+), 403 deletions(-) delete mode 100644 qa/suites/rados/singleton-nomsgr/all/ceph-snapmapper.yaml diff --git a/qa/suites/rados/singleton-nomsgr/all/ceph-snapmapper.yaml b/qa/suites/rados/singleton-nomsgr/all/ceph-snapmapper.yaml deleted file mode 100644 index cbfd08620e768..0000000000000 --- a/qa/suites/rados/singleton-nomsgr/all/ceph-snapmapper.yaml +++ /dev/null @@ -1,22 +0,0 @@ -openstack: - - volumes: # attached to each instance - count: 3 - size: 10 # GB -roles: -- [mon.a, mgr.x, osd.0, osd.1, osd.2, client.0] - -overrides: - ceph: - pre-mgr-commands: - - sudo ceph config set mgr mgr_pool false --force - log-ignorelist: - - but it is still running - - overall HEALTH_ - - \(POOL_APP_NOT_ENABLED\) - -tasks: -- install: -- ceph: -- exec: - client.0: - - ceph_test_snap_mapper diff --git a/qa/tasks/ceph.conf.template b/qa/tasks/ceph.conf.template index 7ae2b83c29515..a9cce29539aa1 100644 --- a/qa/tasks/ceph.conf.template +++ b/qa/tasks/ceph.conf.template @@ -55,9 +55,8 @@ osd debug shutdown = true osd debug op order = true osd debug verify stray on activate = true - osd debug trim objects = true - osd open classes on start = true + osd open classes on start = true osd debug pg log writeout = true osd deep scrub update digest min age = 30 diff --git a/src/common/options/osd.yaml.in b/src/common/options/osd.yaml.in index f4c60db26b774..5d8d40cf12d1f 100644 --- a/src/common/options/osd.yaml.in +++ b/src/common/options/osd.yaml.in @@ -194,11 +194,6 @@ options: load on busy clusters. default: false with_legacy: true -- name: osd_debug_trim_objects - type: bool - level: advanced - desc: Asserts that no clone-objects were added to a snap after we start trimming it - default: false - name: osd_repair_during_recovery type: bool level: advanced diff --git a/src/osd/SnapMapper.cc b/src/osd/SnapMapper.cc index b12e063548a9c..7893bc08fdcb6 100644 --- a/src/osd/SnapMapper.cc +++ b/src/osd/SnapMapper.cc @@ -540,28 +540,36 @@ void SnapMapper::add_oid( backend.set_keys(to_add, t); } -void SnapMapper::get_objects_by_prefixes( +int SnapMapper::get_next_objects_to_trim( snapid_t snap, unsigned max, vector *out) { - /// maintain the prefix_itr between calls to avoid searching depleted prefixes - for ( ; prefix_itr != prefixes.end(); prefix_itr++) { - string prefix(get_prefix(pool, snap) + *prefix_itr); + ceph_assert(out); + ceph_assert(out->empty()); + + // if max would be 0, we return ENOENT and the caller would mistakenly + // trim the snaptrim queue + ceph_assert(max > 0); + int r = 0; + + /// \todo cache the prefixes-set in update_bits() + for (set::iterator i = prefixes.begin(); + i != prefixes.end() && out->size() < max && r == 0; + ++i) { + string prefix(get_prefix(pool, snap) + *i); string pos = prefix; while (out->size() < max) { pair next; - // access RocksDB (an expensive operation!) - int r = backend.get_next(pos, &next); + r = backend.get_next(pos, &next); dout(20) << __func__ << " get_next(" << pos << ") returns " << r << " " << next << dendl; if (r != 0) { - return; // Done + break; // Done } if (next.first.substr(0, prefix.size()) != prefix) { - // TBD: we access the DB twice for the first object of each iterator... break; // Done with this prefix } @@ -575,52 +583,7 @@ void SnapMapper::get_objects_by_prefixes( out->push_back(next_decoded.second); pos = next.first; } - - if (out->size() >= max) { - return; - } } -} - -int SnapMapper::get_next_objects_to_trim( - snapid_t snap, - unsigned max, - vector *out) -{ - ceph_assert(out); - ceph_assert(out->empty()); - - // if max would be 0, we return ENOENT and the caller would mistakenly - // trim the snaptrim queue - ceph_assert(max > 0); - - // The prefix_itr is bound to a prefix_itr_snap so if we trim another snap - // we must reset the prefix_itr (should not happen normally) - if (prefix_itr_snap != snap) { - dout(10) << __func__ << "::Reset prefix_itr(unexpcted) snap was changed from <" - << prefix_itr_snap << "> to <" << snap << ">" << dendl; - reset_prefix_itr(snap); - } - - // when reaching the end of the DB reset the prefix_ptr and verify - // we didn't miss objects which were added after we started trimming - // This should never happen in reality because the snap was logically deleted - // before trimming starts (and so no new clone-objects could be added) - // For more info see PG::filter_snapc() - // - // We still like to be extra careful and run one extra loop over all prefeixes - get_objects_by_prefixes(snap, max, out); - if (out->size() == 0) { - dout(10) << __func__ << "::Reset prefix_itr after a complete trim!" << dendl; - reset_prefix_itr(snap); - get_objects_by_prefixes(snap, max, out); - - bool osd_debug_trim_objects = cct->_conf.get_val("osd_debug_trim_objects"); - if (osd_debug_trim_objects) { - ceph_assert(out->size() == 0); - } - } - if (out->size() == 0) { return -ENOENT; } else { @@ -628,6 +591,7 @@ int SnapMapper::get_next_objects_to_trim( } } + int SnapMapper::remove_oid( const hobject_t &oid, MapCacher::Transaction *t) diff --git a/src/osd/SnapMapper.h b/src/osd/SnapMapper.h index d2dcf69324bf7..eb43a23c2b0e0 100644 --- a/src/osd/SnapMapper.h +++ b/src/osd/SnapMapper.h @@ -281,24 +281,6 @@ private: tl::expected get_snaps_common( const hobject_t &hoid) const; - /// file @out vector with the first objects with @snap as a snap - void get_objects_by_prefixes( - snapid_t snap, - unsigned max, - std::vector *out); - - std::set prefixes; - // maintain a current active prefix - std::set::iterator prefix_itr; - // associate the active prefix with a snap - snapid_t prefix_itr_snap; - - // reset the prefix iterator to the first prefix hash - void reset_prefix_itr(snapid_t snap) { - prefix_itr_snap = snap; - prefix_itr = prefixes.begin(); - } - public: static std::string make_shard_prefix(shard_id_t shard) { if (shard == shard_id_t::NO_SHARD) @@ -327,6 +309,7 @@ private: update_bits(mask_bits); } + std::set prefixes; /// Update bits in case of pg split or merge void update_bits( uint32_t new_bits ///< [in] new split bits @@ -340,12 +323,6 @@ private: for (auto i = _prefixes.begin(); i != _prefixes.end(); ++i) { prefixes.insert(shard_prefix + *i); } - - reset_prefix_itr(CEPH_NOSNAP); - } - - const std::set::iterator get_prefix_itr() { - return prefix_itr; } /// Update snaps for oid, empty new_snaps removes the mapping diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt index 5bfa5397a31f0..09281ab2dbf54 100644 --- a/src/test/CMakeLists.txt +++ b/src/test/CMakeLists.txt @@ -466,10 +466,6 @@ add_executable(ceph_test_snap_mapper $ ) target_link_libraries(ceph_test_snap_mapper osd global ${BLKID_LIBRARIES} ${UNITTEST_LIBS}) - -install(TARGETS - ceph_test_snap_mapper - DESTINATION ${CMAKE_INSTALL_BINDIR}) endif(NOT WIN32) add_executable(ceph_test_stress_watch diff --git a/src/test/test_snap_mapper.cc b/src/test/test_snap_mapper.cc index 6e7b4c566043c..e502892cc42f1 100644 --- a/src/test/test_snap_mapper.cc +++ b/src/test/test_snap_mapper.cc @@ -461,19 +461,7 @@ public: uint32_t bits) : driver(driver), mapper(new SnapMapper(g_ceph_context, driver, mask, bits, 0, shard_id_t(1))), - mask(mask), bits(bits) {} - - hobject_t create_hobject( - unsigned idx, - snapid_t snapid, - int64_t pool, - const std::string& nspace) { - const object_t oid("OID" + std::to_string(idx)); - const std::string key("KEY" + std::to_string(idx)); - const uint32_t hash = (idx & ((~0)< &snaps) { - hobject_to_snap[obj] = snaps; - for (auto snap : snaps) { - map >::iterator j = snap_to_hobject.find(snap); - ceph_assert(j != snap_to_hobject.end()); - j->second.insert(obj); - } - { - PausyAsyncMap::Transaction t; - mapper->add_oid(obj, snaps, &t); - driver->submit(&t); - } } void create_object() { @@ -525,9 +494,20 @@ public: obj = random_hobject(); } while (hobject_to_snap.count(obj)); - set snaps; + set &snaps = hobject_to_snap[obj]; choose_random_snaps(1 + (rand() % 20), &snaps); - add_object_to_snaps(obj, snaps); + for (set::iterator i = snaps.begin(); + i != snaps.end(); + ++i) { + map >::iterator j = snap_to_hobject.find(*i); + ceph_assert(j != snap_to_hobject.end()); + j->second.insert(obj); + } + { + PausyAsyncMap::Transaction t; + mapper->add_oid(obj, snaps, &t); + driver->submit(&t); + } } std::pair to_raw( @@ -555,23 +535,27 @@ public: return mapper->make_purged_snap_key(std::forward(args)...); } - // must be called with lock held to protect access to - // snap_to_hobject and hobject_to_snap - int trim_snap(snapid_t snapid, unsigned max_count, vector & out) { - set& hobjects = snap_to_hobject[snapid]; + void trim_snap() { + std::lock_guard l{lock}; + if (snap_to_hobject.empty()) + return; + map >::iterator snap = + rand_choose(snap_to_hobject); + set hobjects = snap->second; + vector hoids; - int ret = mapper->get_next_objects_to_trim(snapid, max_count, &hoids); - if (ret == 0) { - out.insert(out.end(), hoids.begin(), hoids.end()); + while (mapper->get_next_objects_to_trim( + snap->first, rand() % 5 + 1, &hoids) == 0) { for (auto &&hoid: hoids) { ceph_assert(!hoid.is_max()); ceph_assert(hobjects.count(hoid)); hobjects.erase(hoid); - map>::iterator j = hobject_to_snap.find(hoid); - ceph_assert(j->second.count(snapid)); + map>::iterator j = + hobject_to_snap.find(hoid); + ceph_assert(j->second.count(snap->first)); set old_snaps(j->second); - j->second.erase(snapid); + j->second.erase(snap->first); { PausyAsyncMap::Transaction t; @@ -589,49 +573,6 @@ public: } hoids.clear(); } - return ret; - } - - // must be called with lock held to protect access to - // snap_to_hobject and hobject_to_snap in trim_snap - // will keep trimming until reaching max_count or failing a call to trim_snap() - void trim_snap_force(snapid_t snapid, - unsigned max_count, - vector& out) { - - int guard = 1000; - vector tmp; - unsigned prev_size = 0; - while (tmp.size() < max_count) { - unsigned req_size = max_count - tmp.size(); - // each call adds more objects into the tmp vector - trim_snap(snapid, req_size, tmp); - if (prev_size < tmp.size()) { - prev_size = tmp.size(); - } - else{ - // the tmp vector size was not increased in the last call - // which means we were unable to find anything to trim - break; - } - ceph_assert(--guard > 0); - } - out.insert(out.end(), tmp.begin(), tmp.end()); - } - - void trim_snap() { - std::lock_guard l{lock}; - if (snap_to_hobject.empty()) { - return; - } - int ret = 0; - map >::iterator snap = rand_choose(snap_to_hobject); - do { - int max_count = rand() % 5 + 1; - vector out; - ret = trim_snap(snap->first, max_count, out); - } while(ret == 0); - set hobjects = snap->second; ceph_assert(hobjects.empty()); snap_to_hobject.erase(snap); } @@ -671,189 +612,6 @@ public: ceph_assert(r == 0); ASSERT_EQ(snaps, obj->second); } - - void test_prefix_itr() { - // protects access to snap_to_hobject and hobject_to_snap - std::lock_guard l{lock}; - snapid_t snapid = create_snap(); - // we initialize 32 PGS - ceph_assert(bits == 5); - - const int64_t pool(0); - const std::string nspace("GBH"); - set snaps = { snapid }; - set& hobjects = snap_to_hobject[snapid]; - vector trimmed_objs; - vector stored_objs; - - // add objects 0, 32, 64, 96, 128, 160, 192, 224 - // which should hit all the prefixes - for (unsigned idx = 0; idx < 8; idx++) { - hobject_t hobj = create_hobject(idx * 32, snapid, pool, nspace); - add_object_to_snaps(hobj, snaps); - stored_objs.push_back(hobj); - } - ceph_assert(hobjects.size() == 8); - - // trim 0, 32, 64, 96 - trim_snap(snapid, 4, trimmed_objs); - ceph_assert(hobjects.size() == 4); - - // add objects (3, 35, 67) before the prefix_itr position - // to force an iteartor reset later - for (unsigned idx = 0; idx < 3; idx++) { - hobject_t hobj = create_hobject(idx * 32 + 3, snapid, pool, nspace); - add_object_to_snaps(hobj, snaps); - stored_objs.push_back(hobj); - } - ceph_assert(hobjects.size() == 7); - - // will now trim 128, 160, 192, 224 - trim_snap(snapid, 4, trimmed_objs); - ceph_assert(hobjects.size() == 3); - - // finally, trim 3, 35, 67 - // This will force a reset to the prefix_itr (which is what we test here) - trim_snap(snapid, 3, trimmed_objs); - ceph_assert(hobjects.size() == 0); - - ceph_assert(trimmed_objs.size() == 11); - // trimmed objs must be in the same order they were inserted - // this will prove that the second call to add_object_to_snaps inserted - // them before the current prefix_itr - ceph_assert(trimmed_objs.size() == stored_objs.size()); - ceph_assert(std::equal(trimmed_objs.begin(), trimmed_objs.end(), - stored_objs.begin())); - snap_to_hobject.erase(snapid); - } - - // insert 256 objects which should populate multiple prefixes - // trim until we change prefix and then insert an old object - // which we know for certain belongs to a prefix before prefix_itr - void test_prefix_itr2() { - // protects access to snap_to_hobject and hobject_to_snap - std::lock_guard l{lock}; - snapid_t snapid = create_snap(); - // we initialize 32 PGS - ceph_assert(bits == 5); - - const int64_t pool(0); - const std::string nspace("GBH"); - set snaps = { snapid }; - vector trimmed_objs; - vector stored_objs; - - constexpr unsigned MAX_IDX = 256; - for (unsigned idx = 0; idx < MAX_IDX; idx++) { - hobject_t hobj = create_hobject(idx, snapid, pool, nspace); - add_object_to_snaps(hobj, snaps); - stored_objs.push_back(hobj); - } - - hobject_t dup_hobj; - bool found = false; - trim_snap(snapid, 1, trimmed_objs); - const std::set::iterator itr = mapper->get_prefix_itr(); - for (unsigned idx = 1; idx < MAX_IDX + 1; idx++) { - trim_snap(snapid, 1, trimmed_objs); - if (!found && mapper->get_prefix_itr() != itr) { - // we changed prefix -> insert an OBJ belonging to perv prefix - dup_hobj = create_hobject(idx - 1, snapid, pool, nspace); - add_object_to_snaps(dup_hobj, snaps); - stored_objs.push_back(dup_hobj); - found = true; - } - } - ceph_assert(found); - - sort(trimmed_objs.begin(), trimmed_objs.end()); - sort(stored_objs.begin(), stored_objs.end()); - ceph_assert(trimmed_objs.size() == MAX_IDX+1); - ceph_assert(trimmed_objs.size() == stored_objs.size()); - ceph_assert(std::equal(trimmed_objs.begin(), trimmed_objs.end(), - stored_objs.begin())); - snap_to_hobject.erase(snapid); - } - - void add_rand_hobjects(unsigned count, - snapid_t snapid, - int64_t pool, - const std::string& nspace, - vector& stored_objs) { - constexpr unsigned MAX_VAL = 1000; - set snaps = { snapid }; - for (unsigned i = 0; i < count; i++) { - hobject_t hobj; - do { - unsigned val = rand() % MAX_VAL; - hobj = create_hobject(val, snapid, pool, nspace); - }while (hobject_to_snap.count(hobj)); - add_object_to_snaps(hobj, snaps); - stored_objs.push_back(hobj); - } - } - - // Start with a set of random objects then run a partial trim - // followed by another random insert - // This should cause *some* objects to be added before the prefix_itr - // and will verify that we still remove them - void test_prefix_itr_rand() { - // protects access to snap_to_hobject and hobject_to_snap - std::lock_guard l{lock}; - snapid_t snapid = create_snap(); - // we initialize 32 PGS - ceph_assert(bits == 5); - - const int64_t pool(0); - const std::string nspace("GBH"); - vector trimmed_objs; - vector stored_objs; - set& hobjects = snap_to_hobject[snapid]; - ceph_assert(hobjects.size() == 0); - - // add 100 random objects - add_rand_hobjects(100, snapid, pool, nspace, stored_objs); - ceph_assert(hobjects.size() == 100); - - // trim the first 75 objects leaving 25 objects - trim_snap(snapid, 75, trimmed_objs); - ceph_assert(hobjects.size() == 25); - - // add another 25 random objects (now we got 50 objects) - add_rand_hobjects(25, snapid, pool, nspace, stored_objs); - ceph_assert(hobjects.size() == 50); - - // trim 49 objects leaving a single object - // we must use a wrapper function to keep trimming while until -ENOENT - trim_snap_force(snapid, 49, trimmed_objs); - ceph_assert(hobjects.size() == 1); - - // add another 9 random objects (now we got 10 objects) - add_rand_hobjects(9, snapid, pool, nspace, stored_objs); - ceph_assert(hobjects.size() == 10); - - // trim 10 objects leaving no object in store - trim_snap_force(snapid, 10, trimmed_objs); - ceph_assert(hobjects.size() == 0); - - // add 10 random objects (now we got 10 objects) - add_rand_hobjects(10, snapid, pool, nspace, stored_objs); - ceph_assert(hobjects.size() == 10); - - // trim 10 objects leaving no object in store - trim_snap_force(snapid, 10, trimmed_objs); - ceph_assert(hobjects.size() == 0); - - sort(trimmed_objs.begin(), trimmed_objs.end()); - sort(stored_objs.begin(), stored_objs.end()); - ceph_assert(trimmed_objs.size() == 144); - ceph_assert(trimmed_objs.size() == stored_objs.size()); - - bool are_equal = std::equal(trimmed_objs.begin(), trimmed_objs.end(), - stored_objs.begin()); - ceph_assert(are_equal); - snap_to_hobject.erase(snapid); - } }; class SnapMapperTest : public ::testing::Test { @@ -917,27 +675,6 @@ protected: } }; -// This test creates scenarios which are impossible to get with normal code. -// The normal code deletes the snap before calling TRIM and so no new clones -// can be added to that snap. -// Our test calls get_next_objects_to_trim() *without* deleting the snap first. -// This allows us to add objects to the (non-deleted) snap after trimming began. -// We test that SnapTrim will find them even when added into positions before the prefix_itr. -// Since those tests are doing illegal inserts we must disable osd_debug_trim_objects -// during those tests as otherwise the code will assert. -TEST_F(SnapMapperTest, prefix_itr) { - bool orig_val = g_ceph_context->_conf.get_val("osd_debug_trim_objects"); - std::cout << "osd_debug_trim_objects = " << orig_val << std::endl; - g_ceph_context->_conf.set_val("osd_debug_trim_objects", std::to_string(false)); - init(32); - get_tester().test_prefix_itr(); - get_tester().test_prefix_itr2(); - get_tester().test_prefix_itr_rand(); - g_ceph_context->_conf.set_val("osd_debug_trim_objects", std::to_string(orig_val)); - bool curr_val = g_ceph_context->_conf.get_val("osd_debug_trim_objects"); - ceph_assert(curr_val == orig_val); -} - TEST_F(SnapMapperTest, Simple) { init(1); get_tester().create_snap(); -- 2.39.5