From 9fb7ec61ba10896ce01d5970375f1ce1dd993874 Mon Sep 17 00:00:00 2001 From: NitzanMordhai Date: Wed, 23 Mar 2022 18:52:45 +0000 Subject: [PATCH] tools/ceph_objectstore_took: Add duplicate entry trimming Adding duplicate entries trimming to trim-pg-log opertion, we will use the exist PGLog trim function to find out the set of entries\dup entries that we suppose to trim. to use it we need to build the PGLog from disk. Fixed: https://tracker.ceph.com/issues/53729 Signed-off-by: Nitzan Mordechai --- src/osd/PGLog.cc | 11 ++- src/osd/PGLog.h | 6 +- src/test/osd/TestPGLog.cc | 10 +-- src/tools/ceph_objectstore_tool.cc | 123 +++++++++++++++++++++-------- 4 files changed, 104 insertions(+), 46 deletions(-) diff --git a/src/osd/PGLog.cc b/src/osd/PGLog.cc index d7ab12c01dfa..0f184497ada9 100644 --- a/src/osd/PGLog.cc +++ b/src/osd/PGLog.cc @@ -56,7 +56,7 @@ void PGLog::IndexedLog::split_out_child( void PGLog::IndexedLog::trim( CephContext* cct, eversion_t s, - set *trimmed, + set *trimmed, set* trimmed_dups, eversion_t *write_from_dups) { @@ -76,7 +76,7 @@ void PGLog::IndexedLog::trim( break; lgeneric_subdout(cct, osd, 20) << "trim " << e << dendl; if (trimmed) - trimmed->emplace(e.version); + trimmed->insert(e.get_key_name()); unindex(e); // remove from index, @@ -681,7 +681,7 @@ void PGLog::write_log_and_missing( eversion_t::max(), eversion_t(), eversion_t(), - set(), + set(), set(), missing, true, require_rollback, false, @@ -815,7 +815,7 @@ void PGLog::_write_log_and_missing( eversion_t dirty_to, eversion_t dirty_from, eversion_t writeout_from, - set &&trimmed, + set &&trimmed, set &&trimmed_dups, const pg_missing_tracker_t &missing, bool touch_log, @@ -829,8 +829,7 @@ void PGLog::_write_log_and_missing( ) { set to_remove; to_remove.swap(trimmed_dups); - for (auto& t : trimmed) { - string key = t.get_key_name(); + for (auto& key : trimmed) { if (log_keys_debug) { auto it = log_keys_debug->find(key); ceph_assert(it != log_keys_debug->end()); diff --git a/src/osd/PGLog.h b/src/osd/PGLog.h index 25c88217c51c..06d0024743a9 100644 --- a/src/osd/PGLog.h +++ b/src/osd/PGLog.h @@ -633,7 +633,7 @@ public: void trim( CephContext* cct, eversion_t s, - std::set *trimmed, + std::set *trimmed, std::set* trimmed_dups, eversion_t *write_from_dups); @@ -650,7 +650,7 @@ protected: eversion_t dirty_to; ///< must clear/writeout all keys <= dirty_to eversion_t dirty_from; ///< must clear/writeout all keys >= dirty_from eversion_t writeout_from; ///< must writout keys >= writeout_from - std::set trimmed; ///< must clear keys in trimmed + std::set trimmed; ///< must clear keys in trimmed eversion_t dirty_to_dups; ///< must clear/writeout all dups <= dirty_to_dups eversion_t dirty_from_dups; ///< must clear/writeout all dups >= dirty_from_dups eversion_t write_from_dups; ///< must write keys >= write_from_dups @@ -1372,7 +1372,7 @@ public: eversion_t dirty_to, eversion_t dirty_from, eversion_t writeout_from, - std::set &&trimmed, + std::set &&trimmed, std::set &&trimmed_dups, const pg_missing_tracker_t &missing, bool touch_log, diff --git a/src/test/osd/TestPGLog.cc b/src/test/osd/TestPGLog.cc index 3aab689a6d28..b08b9f0ccc28 100644 --- a/src/test/osd/TestPGLog.cc +++ b/src/test/osd/TestPGLog.cc @@ -2717,7 +2717,7 @@ TEST_F(PGLogTrimTest, TestPartialTrim) log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 165), mk_evt(26, 160))); log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 167), mk_evt(31, 166))); - std::set trimmed; + std::set trimmed; std::set trimmed_dups; eversion_t write_from_dups = eversion_t::max(); @@ -2731,7 +2731,7 @@ TEST_F(PGLogTrimTest, TestPartialTrim) SetUp(15); - std::set trimmed2; + std::set trimmed2; std::set trimmed_dups2; eversion_t write_from_dups2 = eversion_t::max(); @@ -2784,7 +2784,7 @@ TEST_F(PGLogTrimTest, TestTrimNoDups) log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 165), mk_evt(26, 160))); log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 167), mk_evt(31, 166))); - std::set trimmed; + std::set trimmed; std::set trimmed_dups; eversion_t write_from_dups = eversion_t::max(); @@ -2812,7 +2812,7 @@ TEST_F(PGLogTrimTest, TestNoTrim) log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 165), mk_evt(26, 160))); log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 167), mk_evt(31, 166))); - std::set trimmed; + std::set trimmed; std::set trimmed_dups; eversion_t write_from_dups = eversion_t::max(); @@ -2841,7 +2841,7 @@ TEST_F(PGLogTrimTest, TestTrimAll) log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 165), mk_evt(26, 160))); log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 167), mk_evt(31, 166))); - std::set trimmed; + std::set trimmed; std::set trimmed_dups; eversion_t write_from_dups = eversion_t::max(); diff --git a/src/tools/ceph_objectstore_tool.cc b/src/tools/ceph_objectstore_tool.cc index 8690bd96a188..2d8637c4ed86 100644 --- a/src/tools/ceph_objectstore_tool.cc +++ b/src/tools/ceph_objectstore_tool.cc @@ -632,6 +632,20 @@ int write_pg(ObjectStore::Transaction &t, epoch_t epoch, pg_info_t &info, return 0; } +bool need_trim_dups(ObjectStore *store, ObjectStore::CollectionHandle ch, + const ghobject_t &oid) { + size_t count_dups = 0; + ObjectMap::ObjectMapIterator p = store->get_omap_iterator(ch, oid); + if (!p) + return false; + for (p->seek_to_first(); p->valid(); p->next()) { + if (p->key().substr(0, 4) == string("dup_")) + if (++count_dups > g_ceph_context->_conf->osd_pg_log_dups_tracked) + return true; + } + return false; +} + int do_trim_pg_log(ObjectStore *store, const coll_t &coll, pg_info_t &info, const spg_t &pgid, epoch_t map_epoch, @@ -646,23 +660,34 @@ int do_trim_pg_log(ObjectStore *store, const coll_t &coll, cerr << "Log bounds are: " << "(" << info.log_tail << "," << info.last_update << "]" << std::endl; - - uint64_t max_entries = g_ceph_context->_conf->osd_max_pg_log_entries; - if (info.last_update.version - info.log_tail.version <= max_entries) { - cerr << "Log not larger than osd_max_pg_log_entries " << max_entries << std::endl; + uint64_t max_entries = std::min(g_ceph_context->_conf->osd_max_pg_log_entries, + (info.last_update.version) ? (info.last_update.version - 1) + : g_ceph_context->_conf->osd_max_pg_log_entries + ); + if (info.last_update.version - info.log_tail.version <= max_entries && + !need_trim_dups(store, ch, oid)) { + cerr << "Log not larger than osd_max_pg_log_entries " << max_entries << " and no duplicate entries to trim" << std::endl; return 0; } + PGLog::IndexedLog pg_log; + pg_log.head = info.last_update; + pg_log.skip_can_rollback_to_to_head(); + ceph_assert(info.last_update.version > max_entries); - version_t trim_to = info.last_update.version - max_entries; + eversion_t trim_to(info.last_update.epoch, info.last_update.version - max_entries); + size_t trim_at_once = g_ceph_context->_conf->osd_pg_log_trim_max; + size_t tracked_dups = g_ceph_context->_conf->osd_pg_log_dups_tracked; + std::deque latest_dups; + g_ceph_context->_conf->osd_pg_log_dups_tracked = 0; // Fake dup trimming all + eversion_t new_tail; bool done = false; while (!done) { // gather keys so we can delete them in a batch without // affecting the iterator - set keys_to_trim; { ObjectMap::ObjectMapIterator p = store->get_omap_iterator(ch, oid); if (!p) @@ -680,47 +705,80 @@ int do_trim_pg_log(ObjectStore *store, const coll_t &coll, continue; if (p->key().substr(0, 7) == string("missing")) continue; - if (p->key().substr(0, 4) == string("dup_")) - continue; - bufferlist bl = p->value(); auto bp = bl.cbegin(); - pg_log_entry_t e; - try { - e.decode_with_checksum(bp); - } catch (const buffer::error &e) { - cerr << "Error reading pg log entry: " << e.what() << std::endl; - } - if (debug) { - cerr << "read entry " << e << std::endl; + + if (p->key().substr(0, 4) == string("dup_")) { + pg_log_dup_t dup; + try { + dup.decode(bp); + latest_dups.push_back(dup); + if (latest_dups.size() > tracked_dups) { + pg_log.dups.push_back(latest_dups.front()); + latest_dups.pop_front(); + } + if (debug) { + cerr << "read entry " << dup << std::endl; + } + } catch (buffer::error& e) { + cerr << "error reading log dup: " << p->key() << std::endl; + return -EFAULT; + } + } else { + pg_log_entry_t e; + try { + e.decode_with_checksum(bp); + pg_log.log.push_back(e); + if (debug) { + cerr << "read entry " << e << std::endl; + } + } catch (const buffer::error &e) { + cerr << "Error reading pg log entry: " << e.what() << std::endl; + } } - if (e.version.version > trim_to) { - done = true; - break; + // We have enough pg logs entries to trim + if ((pg_log.log.size() + pg_log.dups.size()) >= trim_at_once) { + break; } - keys_to_trim.insert(p->key()); - new_tail = e.version; - if (keys_to_trim.size() >= trim_at_once) - break; } - if (!p->valid()) done = true; + + pg_log.index(); + } // deconstruct ObjectMapIterator - // delete the keys - if (!dry_run && !keys_to_trim.empty()) { - cout << "Removing keys " << *keys_to_trim.begin() << " - " << *keys_to_trim.rbegin() << std::endl; - ObjectStore::Transaction t; - t.omap_rmkeys(coll, oid, keys_to_trim); - store->queue_transaction(ch, std::move(t)); - ch->flush(); + if (!dry_run && ((pg_log.log.size() > 0) || (pg_log.dups.size() > 0))) { + std::set trimmed; + std::set trimmed_dups; + eversion_t write_from_dups = eversion_t::max(); + + pg_log.trim(g_ceph_context, trim_to, &trimmed, &trimmed_dups, &write_from_dups); + // Is there any trimming to do? + if (!trimmed.empty() || !trimmed_dups.empty()) { + new_tail = pg_log.tail; + + ObjectStore::Transaction t; + if (!trimmed.empty()) { + cerr << "Removing keys " << *trimmed.begin() << " - " << *trimmed.rbegin() << std::endl; + t.omap_rmkeys(coll, oid, trimmed); + } + if (!trimmed_dups.empty()) { + cerr << "Removing Dups " << *trimmed_dups.begin() << " - " << *trimmed_dups.rbegin() << std::endl; + t.omap_rmkeys(coll, oid, trimmed_dups); + } + store->queue_transaction(ch, std::move(t)); + ch->flush(); + pg_log.log.clear(); + pg_log.dups.clear(); + } } } // update pg info with new tail if (!dry_run && new_tail != eversion_t()) { info.log_tail = new_tail; + ObjectStore::Transaction t; int ret = write_info(t, map_epoch, info, past_intervals); if (ret) @@ -729,6 +787,7 @@ int do_trim_pg_log(ObjectStore *store, const coll_t &coll, ch->flush(); } + g_ceph_context->_conf->osd_pg_log_dups_tracked = tracked_dups; //Restore original // compact the db since we just removed a bunch of data cerr << "Finished trimming, now compacting..." << std::endl; if (!dry_run) -- 2.47.3