From: Radoslaw Zarzynski Date: Thu, 9 Jun 2022 20:10:31 +0000 (+0000) Subject: Revert "tools/ceph_objectstore_took: Add duplicate entry trimming" X-Git-Tag: v17.2.1~4^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F46605%2Fhead;p=ceph.git Revert "tools/ceph_objectstore_took: Add duplicate entry trimming" This reverts commit 5245fb33dd02e53cf0ef5d2f7be5904b6fbe63ce. Although the chunking in off-line `dups` trimming (via COT) seems fine, the `ceph-objectstore-tool` is a client of `trim()` of `PGLog::IndexedLog` which means than a partial revert is not possible without extensive changes. The backport ticket is: https://tracker.ceph.com/issues/55981 Signed-off-by: Radoslaw Zarzynski --- diff --git a/src/osd/PGLog.cc b/src/osd/PGLog.cc index 0f184497ada98..d7ab12c01dfa2 100644 --- a/src/osd/PGLog.cc +++ b/src/osd/PGLog.cc @@ -56,7 +56,7 @@ void PGLog::IndexedLog::split_out_child( void PGLog::IndexedLog::trim( CephContext* cct, eversion_t s, - set *trimmed, + set *trimmed, set* trimmed_dups, eversion_t *write_from_dups) { @@ -76,7 +76,7 @@ void PGLog::IndexedLog::trim( break; lgeneric_subdout(cct, osd, 20) << "trim " << e << dendl; if (trimmed) - trimmed->insert(e.get_key_name()); + trimmed->emplace(e.version); unindex(e); // remove from index, @@ -681,7 +681,7 @@ void PGLog::write_log_and_missing( eversion_t::max(), eversion_t(), eversion_t(), - set(), + set(), set(), missing, true, require_rollback, false, @@ -815,7 +815,7 @@ void PGLog::_write_log_and_missing( eversion_t dirty_to, eversion_t dirty_from, eversion_t writeout_from, - set &&trimmed, + set &&trimmed, set &&trimmed_dups, const pg_missing_tracker_t &missing, bool touch_log, @@ -829,7 +829,8 @@ void PGLog::_write_log_and_missing( ) { set to_remove; to_remove.swap(trimmed_dups); - for (auto& key : trimmed) { + for (auto& t : trimmed) { + string key = t.get_key_name(); if (log_keys_debug) { auto it = log_keys_debug->find(key); ceph_assert(it != log_keys_debug->end()); diff --git a/src/osd/PGLog.h b/src/osd/PGLog.h index 06d0024743a98..25c88217c51cc 100644 --- a/src/osd/PGLog.h +++ b/src/osd/PGLog.h @@ -633,7 +633,7 @@ public: void trim( CephContext* cct, eversion_t s, - std::set *trimmed, + std::set *trimmed, std::set* trimmed_dups, eversion_t *write_from_dups); @@ -650,7 +650,7 @@ protected: eversion_t dirty_to; ///< must clear/writeout all keys <= dirty_to eversion_t dirty_from; ///< must clear/writeout all keys >= dirty_from eversion_t writeout_from; ///< must writout keys >= writeout_from - std::set trimmed; ///< must clear keys in trimmed + std::set trimmed; ///< must clear keys in trimmed eversion_t dirty_to_dups; ///< must clear/writeout all dups <= dirty_to_dups eversion_t dirty_from_dups; ///< must clear/writeout all dups >= dirty_from_dups eversion_t write_from_dups; ///< must write keys >= write_from_dups @@ -1372,7 +1372,7 @@ public: eversion_t dirty_to, eversion_t dirty_from, eversion_t writeout_from, - std::set &&trimmed, + std::set &&trimmed, std::set &&trimmed_dups, const pg_missing_tracker_t &missing, bool touch_log, diff --git a/src/test/osd/TestPGLog.cc b/src/test/osd/TestPGLog.cc index b08b9f0ccc288..3aab689a6d281 100644 --- a/src/test/osd/TestPGLog.cc +++ b/src/test/osd/TestPGLog.cc @@ -2717,7 +2717,7 @@ TEST_F(PGLogTrimTest, TestPartialTrim) log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 165), mk_evt(26, 160))); log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 167), mk_evt(31, 166))); - std::set trimmed; + std::set trimmed; std::set trimmed_dups; eversion_t write_from_dups = eversion_t::max(); @@ -2731,7 +2731,7 @@ TEST_F(PGLogTrimTest, TestPartialTrim) SetUp(15); - std::set trimmed2; + std::set trimmed2; std::set trimmed_dups2; eversion_t write_from_dups2 = eversion_t::max(); @@ -2784,7 +2784,7 @@ TEST_F(PGLogTrimTest, TestTrimNoDups) log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 165), mk_evt(26, 160))); log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 167), mk_evt(31, 166))); - std::set trimmed; + std::set trimmed; std::set trimmed_dups; eversion_t write_from_dups = eversion_t::max(); @@ -2812,7 +2812,7 @@ TEST_F(PGLogTrimTest, TestNoTrim) log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 165), mk_evt(26, 160))); log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 167), mk_evt(31, 166))); - std::set trimmed; + std::set trimmed; std::set trimmed_dups; eversion_t write_from_dups = eversion_t::max(); @@ -2841,7 +2841,7 @@ TEST_F(PGLogTrimTest, TestTrimAll) log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 165), mk_evt(26, 160))); log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 167), mk_evt(31, 166))); - std::set trimmed; + std::set trimmed; std::set trimmed_dups; eversion_t write_from_dups = eversion_t::max(); diff --git a/src/tools/ceph_objectstore_tool.cc b/src/tools/ceph_objectstore_tool.cc index 2d8637c4ed86f..8690bd96a1887 100644 --- a/src/tools/ceph_objectstore_tool.cc +++ b/src/tools/ceph_objectstore_tool.cc @@ -632,20 +632,6 @@ int write_pg(ObjectStore::Transaction &t, epoch_t epoch, pg_info_t &info, return 0; } -bool need_trim_dups(ObjectStore *store, ObjectStore::CollectionHandle ch, - const ghobject_t &oid) { - size_t count_dups = 0; - ObjectMap::ObjectMapIterator p = store->get_omap_iterator(ch, oid); - if (!p) - return false; - for (p->seek_to_first(); p->valid(); p->next()) { - if (p->key().substr(0, 4) == string("dup_")) - if (++count_dups > g_ceph_context->_conf->osd_pg_log_dups_tracked) - return true; - } - return false; -} - int do_trim_pg_log(ObjectStore *store, const coll_t &coll, pg_info_t &info, const spg_t &pgid, epoch_t map_epoch, @@ -660,34 +646,23 @@ int do_trim_pg_log(ObjectStore *store, const coll_t &coll, cerr << "Log bounds are: " << "(" << info.log_tail << "," << info.last_update << "]" << std::endl; - uint64_t max_entries = std::min(g_ceph_context->_conf->osd_max_pg_log_entries, - (info.last_update.version) ? (info.last_update.version - 1) - : g_ceph_context->_conf->osd_max_pg_log_entries - ); - if (info.last_update.version - info.log_tail.version <= max_entries && - !need_trim_dups(store, ch, oid)) { - cerr << "Log not larger than osd_max_pg_log_entries " << max_entries << " and no duplicate entries to trim" << std::endl; + + uint64_t max_entries = g_ceph_context->_conf->osd_max_pg_log_entries; + if (info.last_update.version - info.log_tail.version <= max_entries) { + cerr << "Log not larger than osd_max_pg_log_entries " << max_entries << std::endl; return 0; } - PGLog::IndexedLog pg_log; - pg_log.head = info.last_update; - pg_log.skip_can_rollback_to_to_head(); - ceph_assert(info.last_update.version > max_entries); - eversion_t trim_to(info.last_update.epoch, info.last_update.version - max_entries); - + version_t trim_to = info.last_update.version - max_entries; size_t trim_at_once = g_ceph_context->_conf->osd_pg_log_trim_max; - size_t tracked_dups = g_ceph_context->_conf->osd_pg_log_dups_tracked; - std::deque latest_dups; - g_ceph_context->_conf->osd_pg_log_dups_tracked = 0; // Fake dup trimming all - eversion_t new_tail; bool done = false; while (!done) { // gather keys so we can delete them in a batch without // affecting the iterator + set keys_to_trim; { ObjectMap::ObjectMapIterator p = store->get_omap_iterator(ch, oid); if (!p) @@ -705,80 +680,47 @@ int do_trim_pg_log(ObjectStore *store, const coll_t &coll, continue; if (p->key().substr(0, 7) == string("missing")) continue; + if (p->key().substr(0, 4) == string("dup_")) + continue; + bufferlist bl = p->value(); auto bp = bl.cbegin(); - - if (p->key().substr(0, 4) == string("dup_")) { - pg_log_dup_t dup; - try { - dup.decode(bp); - latest_dups.push_back(dup); - if (latest_dups.size() > tracked_dups) { - pg_log.dups.push_back(latest_dups.front()); - latest_dups.pop_front(); - } - if (debug) { - cerr << "read entry " << dup << std::endl; - } - } catch (buffer::error& e) { - cerr << "error reading log dup: " << p->key() << std::endl; - return -EFAULT; - } - } else { - pg_log_entry_t e; - try { - e.decode_with_checksum(bp); - pg_log.log.push_back(e); - if (debug) { - cerr << "read entry " << e << std::endl; - } - } catch (const buffer::error &e) { - cerr << "Error reading pg log entry: " << e.what() << std::endl; - } + pg_log_entry_t e; + try { + e.decode_with_checksum(bp); + } catch (const buffer::error &e) { + cerr << "Error reading pg log entry: " << e.what() << std::endl; } - // We have enough pg logs entries to trim - if ((pg_log.log.size() + pg_log.dups.size()) >= trim_at_once) { - break; + if (debug) { + cerr << "read entry " << e << std::endl; + } + if (e.version.version > trim_to) { + done = true; + break; } + keys_to_trim.insert(p->key()); + new_tail = e.version; + if (keys_to_trim.size() >= trim_at_once) + break; } + if (!p->valid()) done = true; - - pg_log.index(); - } // deconstruct ObjectMapIterator - if (!dry_run && ((pg_log.log.size() > 0) || (pg_log.dups.size() > 0))) { - std::set trimmed; - std::set trimmed_dups; - eversion_t write_from_dups = eversion_t::max(); - - pg_log.trim(g_ceph_context, trim_to, &trimmed, &trimmed_dups, &write_from_dups); - // Is there any trimming to do? - if (!trimmed.empty() || !trimmed_dups.empty()) { - new_tail = pg_log.tail; - - ObjectStore::Transaction t; - if (!trimmed.empty()) { - cerr << "Removing keys " << *trimmed.begin() << " - " << *trimmed.rbegin() << std::endl; - t.omap_rmkeys(coll, oid, trimmed); - } - if (!trimmed_dups.empty()) { - cerr << "Removing Dups " << *trimmed_dups.begin() << " - " << *trimmed_dups.rbegin() << std::endl; - t.omap_rmkeys(coll, oid, trimmed_dups); - } - store->queue_transaction(ch, std::move(t)); - ch->flush(); - pg_log.log.clear(); - pg_log.dups.clear(); - } + // delete the keys + if (!dry_run && !keys_to_trim.empty()) { + cout << "Removing keys " << *keys_to_trim.begin() << " - " << *keys_to_trim.rbegin() << std::endl; + ObjectStore::Transaction t; + t.omap_rmkeys(coll, oid, keys_to_trim); + store->queue_transaction(ch, std::move(t)); + ch->flush(); } } // update pg info with new tail if (!dry_run && new_tail != eversion_t()) { info.log_tail = new_tail; - ObjectStore::Transaction t; int ret = write_info(t, map_epoch, info, past_intervals); if (ret) @@ -787,7 +729,6 @@ int do_trim_pg_log(ObjectStore *store, const coll_t &coll, ch->flush(); } - g_ceph_context->_conf->osd_pg_log_dups_tracked = tracked_dups; //Restore original // compact the db since we just removed a bunch of data cerr << "Finished trimming, now compacting..." << std::endl; if (!dry_run)