]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
Revert "tools/ceph_objectstore_took: Add duplicate entry trimming"
authorRadoslaw Zarzynski <rzarzyns@redhat.com>
Thu, 9 Jun 2022 21:38:09 +0000 (21:38 +0000)
committerRadoslaw Zarzynski <rzarzyns@redhat.com>
Thu, 9 Jun 2022 21:38:09 +0000 (21:38 +0000)
This reverts commit ef04b8c06b2a0c5655233174f317994b1e70741c.

Although the chunking in off-line `dups` trimming (via COT) seems
fine, the `ceph-objectstore-tool` is a client of `trim()` of
`PGLog::IndexedLog` which means than a partial revert is not
possible without extensive changes.

The backport ticket is: https://tracker.ceph.com/issues/55990

Signed-off-by: Radoslaw Zarzynski <rzarzyns@redhat.com>
src/osd/PGLog.cc
src/osd/PGLog.h
src/test/osd/TestPGLog.cc
src/tools/ceph_objectstore_tool.cc

index 055256d830ecbdcae03926c1f50ad0e6d63b6823..7ed51488083652e6f0c3639daf7d10905a820eb0 100644 (file)
@@ -46,7 +46,7 @@ void PGLog::IndexedLog::split_out_child(
 void PGLog::IndexedLog::trim(
   CephContext* cct,
   eversion_t s,
-  set<string> *trimmed,
+  set<eversion_t> *trimmed,
   set<string>* trimmed_dups,
   eversion_t *write_from_dups)
 {
@@ -66,7 +66,7 @@ void PGLog::IndexedLog::trim(
       break;
     lgeneric_subdout(cct, osd, 20) << "trim " << e << dendl;
     if (trimmed)
-      trimmed->insert(e.get_key_name());
+      trimmed->emplace(e.version);
 
     unindex(e);         // remove from index,
 
@@ -685,7 +685,7 @@ void PGLog::write_log_and_missing(
     eversion_t::max(),
     eversion_t(),
     eversion_t(),
-    set<string>(),
+    set<eversion_t>(),
     set<string>(),
     missing,
     true, require_rollback, false,
@@ -819,7 +819,7 @@ void PGLog::_write_log_and_missing(
   eversion_t dirty_to,
   eversion_t dirty_from,
   eversion_t writeout_from,
-  set<string> &&trimmed,
+  set<eversion_t> &&trimmed,
   set<string> &&trimmed_dups,
   const pg_missing_tracker_t &missing,
   bool touch_log,
@@ -833,7 +833,8 @@ void PGLog::_write_log_and_missing(
   ) {
   set<string> to_remove;
   to_remove.swap(trimmed_dups);
-  for (auto& key : trimmed) {
+  for (auto& t : trimmed) {
+    string key = t.get_key_name();
     if (log_keys_debug) {
       auto it = log_keys_debug->find(key);
       ceph_assert(it != log_keys_debug->end());
index 1a4e9ee63343c1e8cf7c094fc432981cf35a405a..297e89d9a40ad0e3c822e9fa6455498ba45f66bb 100644 (file)
@@ -565,7 +565,7 @@ public:
     void trim(
       CephContext* cct,
       eversion_t s,
-      set<string> *trimmed,
+      set<eversion_t> *trimmed,
       set<string>* trimmed_dups,
       eversion_t *write_from_dups);
 
@@ -582,7 +582,7 @@ protected:
   eversion_t dirty_to;         ///< must clear/writeout all keys <= dirty_to
   eversion_t dirty_from;       ///< must clear/writeout all keys >= dirty_from
   eversion_t writeout_from;    ///< must writout keys >= writeout_from
-  set<string> trimmed;         ///< must clear keys in trimmed
+  set<eversion_t> trimmed;     ///< must clear keys in trimmed
   eversion_t dirty_to_dups;    ///< must clear/writeout all dups <= dirty_to_dups
   eversion_t dirty_from_dups;  ///< must clear/writeout all dups >= dirty_from_dups
   eversion_t write_from_dups;  ///< must write keys >= write_from_dups
@@ -1313,7 +1313,7 @@ public:
     eversion_t dirty_to,
     eversion_t dirty_from,
     eversion_t writeout_from,
-    set<string> &&trimmed,
+    set<eversion_t> &&trimmed,
     set<string> &&trimmed_dups,
     const pg_missing_tracker_t &missing,
     bool touch_log,
index 49b477b6b99293db5c0893c7b5804028bd933e90..99e22f82df9688cd11c7469969e6490611a5028e 100644 (file)
@@ -2716,7 +2716,7 @@ TEST_F(PGLogTrimTest, TestPartialTrim)
   log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 165), mk_evt(26, 160)));
   log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 167), mk_evt(31, 166)));
 
-  std::set<std::string> trimmed;
+  std::set<eversion_t> trimmed;
   std::set<std::string> trimmed_dups;
   eversion_t write_from_dups = eversion_t::max();
 
@@ -2730,7 +2730,7 @@ TEST_F(PGLogTrimTest, TestPartialTrim)
 
   SetUp(15);
 
-  std::set<std::string> trimmed2;
+  std::set<eversion_t> trimmed2;
   std::set<std::string> trimmed_dups2;
   eversion_t write_from_dups2 = eversion_t::max();
 
@@ -2783,7 +2783,7 @@ TEST_F(PGLogTrimTest, TestTrimNoDups)
   log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 165), mk_evt(26, 160)));
   log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 167), mk_evt(31, 166)));
 
-  std::set<std::string> trimmed;
+  std::set<eversion_t> trimmed;
   std::set<std::string> trimmed_dups;
   eversion_t write_from_dups = eversion_t::max();
 
@@ -2811,7 +2811,7 @@ TEST_F(PGLogTrimTest, TestNoTrim)
   log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 165), mk_evt(26, 160)));
   log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 167), mk_evt(31, 166)));
 
-  std::set<std::string> trimmed;
+  std::set<eversion_t> trimmed;
   std::set<std::string> trimmed_dups;
   eversion_t write_from_dups = eversion_t::max();
 
@@ -2840,7 +2840,7 @@ TEST_F(PGLogTrimTest, TestTrimAll)
   log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 165), mk_evt(26, 160)));
   log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 167), mk_evt(31, 166)));
 
-  std::set<std::string> trimmed;
+  std::set<eversion_t> trimmed;
   std::set<std::string> trimmed_dups;
   eversion_t write_from_dups = eversion_t::max();
 
index e98ef8f831a6563a236419e8fbb16c686e98447a..2f92be8fad7df38de69f9917b10294e6f9f79d23 100644 (file)
@@ -624,20 +624,6 @@ int write_pg(ObjectStore::Transaction &t, epoch_t epoch, pg_info_t &info,
   return 0;
 }
 
-bool need_trim_dups(ObjectStore *store, ObjectStore::CollectionHandle ch,
-                      const ghobject_t &oid) {
-  size_t count_dups = 0;
-  ObjectMap::ObjectMapIterator p = store->get_omap_iterator(ch, oid);
-  if (!p)
-    return false;
-  for (p->seek_to_first(); p->valid(); p->next()) {
-    if (p->key().substr(0, 4) == string("dup_"))
-       if (++count_dups > g_ceph_context->_conf->osd_pg_log_dups_tracked)
-        return true;
-   }
-   return false;
-}
-
 int do_trim_pg_log(ObjectStore *store, const coll_t &coll,
                   pg_info_t &info, const spg_t &pgid,
                   epoch_t map_epoch,
@@ -652,34 +638,23 @@ int do_trim_pg_log(ObjectStore *store, const coll_t &coll,
 
   cerr << "Log bounds are: " << "(" << info.log_tail << ","
        << info.last_update << "]" << std::endl;
-  uint64_t max_entries = std::min<uint64_t>(g_ceph_context->_conf->osd_max_pg_log_entries,
-                                           (info.last_update.version) ? (info.last_update.version - 1)
-                                               : g_ceph_context->_conf->osd_max_pg_log_entries
-                                           );
-  if (info.last_update.version - info.log_tail.version <= max_entries &&
-      !need_trim_dups(store, ch, oid)) {
-    cerr << "Log not larger than osd_max_pg_log_entries " << max_entries << " and no duplicate entries to trim" << std::endl;
+
+  uint64_t max_entries = g_ceph_context->_conf->osd_max_pg_log_entries;
+  if (info.last_update.version - info.log_tail.version <= max_entries) {
+    cerr << "Log not larger than osd_max_pg_log_entries " << max_entries << std::endl;
     return 0;
   }
 
-  PGLog::IndexedLog pg_log;
-  pg_log.head = info.last_update;
-  pg_log.skip_can_rollback_to_to_head();
-
   ceph_assert(info.last_update.version > max_entries);
-  eversion_t trim_to(info.last_update.epoch, info.last_update.version - max_entries);
-
+  version_t trim_to = info.last_update.version - max_entries;
   size_t trim_at_once = g_ceph_context->_conf->osd_pg_log_trim_max;
-  size_t tracked_dups = g_ceph_context->_conf->osd_pg_log_dups_tracked;
-  std::deque<pg_log_dup_t> latest_dups;
-  g_ceph_context->_conf->osd_pg_log_dups_tracked = 0; // Fake dup trimming all
-
   eversion_t new_tail;
   bool done = false;
 
   while (!done) {
     // gather keys so we can delete them in a batch without
     // affecting the iterator
+    set<string> keys_to_trim;
     {
     ObjectMap::ObjectMapIterator p = store->get_omap_iterator(ch, oid);
     if (!p)
@@ -697,80 +672,47 @@ int do_trim_pg_log(ObjectStore *store, const coll_t &coll,
        continue;
       if (p->key().substr(0, 7) == string("missing"))
        continue;
+      if (p->key().substr(0, 4) == string("dup_"))
+       continue;
+
       bufferlist bl = p->value();
       auto bp = bl.cbegin();
-
-      if (p->key().substr(0, 4) == string("dup_")) {
-        pg_log_dup_t dup;
-       try {
-         dup.decode(bp);
-         latest_dups.push_back(dup);
-         if (latest_dups.size() > tracked_dups) {
-           pg_log.dups.push_back(latest_dups.front());
-           latest_dups.pop_front();
-         }
-         if (debug) {
-            cerr << "read entry " << dup << std::endl;
-          }
-       } catch (buffer::error& e) {
-         cerr << "error reading log dup: " << p->key() << std::endl;
-         return -EFAULT;
-       }
-      } else {
-        pg_log_entry_t e;
-        try {
-         e.decode_with_checksum(bp);
-         pg_log.log.push_back(e);
-         if (debug) {
-            cerr << "read entry " << e << std::endl;
-          }
-       } catch (const buffer::error &e) {
-          cerr << "Error reading pg log entry: " << e << std::endl;
-        }
+      pg_log_entry_t e;
+      try {
+       e.decode_with_checksum(bp);
+      } catch (const buffer::error &e) {
+       cerr << "Error reading pg log entry: " << e << std::endl;
       }
-      // We have enough pg logs entries to trim
-      if ((pg_log.log.size() + pg_log.dups.size()) >= trim_at_once) {
-        break;
+      if (debug) {
+       cerr << "read entry " << e << std::endl;
+      }
+      if (e.version.version > trim_to) {
+       done = true;
+       break;
       }
+      keys_to_trim.insert(p->key());
+      new_tail = e.version;
+      if (keys_to_trim.size() >= trim_at_once)
+       break;
     }
+
     if (!p->valid())
       done = true;
-
-    pg_log.index();
-
     } // deconstruct ObjectMapIterator
 
-    if (!dry_run && ((pg_log.log.size() > 0) || (pg_log.dups.size() > 0))) {
-      std::set<std::string> trimmed;
-      std::set<std::string> trimmed_dups;
-      eversion_t write_from_dups = eversion_t::max();
-
-      pg_log.trim(g_ceph_context, trim_to, &trimmed, &trimmed_dups, &write_from_dups);
-      // Is there any trimming to do?
-      if (!trimmed.empty() || !trimmed_dups.empty()) {
-        new_tail = pg_log.tail;
-
-        ObjectStore::Transaction t;
-        if (!trimmed.empty()) {
-         cerr << "Removing keys " << *trimmed.begin() << " - " << *trimmed.rbegin() << std::endl;
-          t.omap_rmkeys(coll, oid, trimmed);
-       }
-       if (!trimmed_dups.empty()) {
-          cerr << "Removing Dups " << *trimmed_dups.begin() << " - " << *trimmed_dups.rbegin() << std::endl;
-          t.omap_rmkeys(coll, oid, trimmed_dups);
-       }
-        store->queue_transaction(ch, std::move(t));
-        ch->flush();
-       pg_log.log.clear();
-       pg_log.dups.clear();
-      }
+    // delete the keys
+    if (!dry_run && !keys_to_trim.empty()) {
+      cout << "Removing keys " << *keys_to_trim.begin() << " - " << *keys_to_trim.rbegin() << std::endl;
+      ObjectStore::Transaction t;
+      t.omap_rmkeys(coll, oid, keys_to_trim);
+      store->queue_transaction(ch, std::move(t));
+      ch->flush();
     }
   }
 
   // update pg info with new tail
   if (!dry_run && new_tail !=  eversion_t()) {
     info.log_tail = new_tail;
-
     ObjectStore::Transaction t;
     int ret = write_info(t, map_epoch, info, past_intervals);
     if (ret)
@@ -779,7 +721,6 @@ int do_trim_pg_log(ObjectStore *store, const coll_t &coll,
     ch->flush();
   }
 
-  g_ceph_context->_conf->osd_pg_log_dups_tracked = tracked_dups; //Restore original
   // compact the db since we just removed a bunch of data
   cerr << "Finished trimming, now compacting..." << std::endl;
   if (!dry_run)