]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
Revert "tools/ceph_objectstore_took: Add duplicate entry trimming" 46605/head
authorRadoslaw Zarzynski <rzarzyns@redhat.com>
Thu, 9 Jun 2022 20:10:31 +0000 (20:10 +0000)
committerRadoslaw Zarzynski <rzarzyns@redhat.com>
Thu, 9 Jun 2022 20:17:24 +0000 (20:17 +0000)
This reverts commit 5245fb33dd02e53cf0ef5d2f7be5904b6fbe63ce.

Although the chunking in off-line `dups` trimming (via COT) seems
fine, the `ceph-objectstore-tool` is a client of `trim()` of
`PGLog::IndexedLog` which means than a partial revert is not
possible without extensive changes.

The backport ticket is: https://tracker.ceph.com/issues/55981

Signed-off-by: Radoslaw Zarzynski <rzarzyns@redhat.com>
src/osd/PGLog.cc
src/osd/PGLog.h
src/test/osd/TestPGLog.cc
src/tools/ceph_objectstore_tool.cc

index 0f184497ada981c0657b0d5c2257f4a4713b03a2..d7ab12c01dfa269ab18f7110876625f40749c693 100644 (file)
@@ -56,7 +56,7 @@ void PGLog::IndexedLog::split_out_child(
 void PGLog::IndexedLog::trim(
   CephContext* cct,
   eversion_t s,
-  set<string> *trimmed,
+  set<eversion_t> *trimmed,
   set<string>* trimmed_dups,
   eversion_t *write_from_dups)
 {
@@ -76,7 +76,7 @@ void PGLog::IndexedLog::trim(
       break;
     lgeneric_subdout(cct, osd, 20) << "trim " << e << dendl;
     if (trimmed)
-      trimmed->insert(e.get_key_name());
+      trimmed->emplace(e.version);
 
     unindex(e);         // remove from index,
 
@@ -681,7 +681,7 @@ void PGLog::write_log_and_missing(
     eversion_t::max(),
     eversion_t(),
     eversion_t(),
-    set<string>(),
+    set<eversion_t>(),
     set<string>(),
     missing,
     true, require_rollback, false,
@@ -815,7 +815,7 @@ void PGLog::_write_log_and_missing(
   eversion_t dirty_to,
   eversion_t dirty_from,
   eversion_t writeout_from,
-  set<string> &&trimmed,
+  set<eversion_t> &&trimmed,
   set<string> &&trimmed_dups,
   const pg_missing_tracker_t &missing,
   bool touch_log,
@@ -829,7 +829,8 @@ void PGLog::_write_log_and_missing(
   ) {
   set<string> to_remove;
   to_remove.swap(trimmed_dups);
-  for (auto& key : trimmed) {
+  for (auto& t : trimmed) {
+    string key = t.get_key_name();
     if (log_keys_debug) {
       auto it = log_keys_debug->find(key);
       ceph_assert(it != log_keys_debug->end());
index 06d0024743a988d8750eaf0280f7e4d912ac13ff..25c88217c51cce1ddd35c7da9e58eeea2495b396 100644 (file)
@@ -633,7 +633,7 @@ public:
     void trim(
       CephContext* cct,
       eversion_t s,
-      std::set<std::string> *trimmed,
+      std::set<eversion_t> *trimmed,
       std::set<std::string>* trimmed_dups,
       eversion_t *write_from_dups);
 
@@ -650,7 +650,7 @@ protected:
   eversion_t dirty_to;         ///< must clear/writeout all keys <= dirty_to
   eversion_t dirty_from;       ///< must clear/writeout all keys >= dirty_from
   eversion_t writeout_from;    ///< must writout keys >= writeout_from
-  std::set<std::string> trimmed;     ///< must clear keys in trimmed
+  std::set<eversion_t> trimmed;     ///< must clear keys in trimmed
   eversion_t dirty_to_dups;    ///< must clear/writeout all dups <= dirty_to_dups
   eversion_t dirty_from_dups;  ///< must clear/writeout all dups >= dirty_from_dups
   eversion_t write_from_dups;  ///< must write keys >= write_from_dups
@@ -1372,7 +1372,7 @@ public:
     eversion_t dirty_to,
     eversion_t dirty_from,
     eversion_t writeout_from,
-    std::set<std::string> &&trimmed,
+    std::set<eversion_t> &&trimmed,
     std::set<std::string> &&trimmed_dups,
     const pg_missing_tracker_t &missing,
     bool touch_log,
index b08b9f0ccc288ed1d9324b5e2b88c57d072cdda0..3aab689a6d281d357223261db3a5e053da28e265 100644 (file)
@@ -2717,7 +2717,7 @@ TEST_F(PGLogTrimTest, TestPartialTrim)
   log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 165), mk_evt(26, 160)));
   log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 167), mk_evt(31, 166)));
 
-  std::set<std::string> trimmed;
+  std::set<eversion_t> trimmed;
   std::set<std::string> trimmed_dups;
   eversion_t write_from_dups = eversion_t::max();
 
@@ -2731,7 +2731,7 @@ TEST_F(PGLogTrimTest, TestPartialTrim)
 
   SetUp(15);
 
-  std::set<std::string> trimmed2;
+  std::set<eversion_t> trimmed2;
   std::set<std::string> trimmed_dups2;
   eversion_t write_from_dups2 = eversion_t::max();
 
@@ -2784,7 +2784,7 @@ TEST_F(PGLogTrimTest, TestTrimNoDups)
   log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 165), mk_evt(26, 160)));
   log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 167), mk_evt(31, 166)));
 
-  std::set<std::string> trimmed;
+  std::set<eversion_t> trimmed;
   std::set<std::string> trimmed_dups;
   eversion_t write_from_dups = eversion_t::max();
 
@@ -2812,7 +2812,7 @@ TEST_F(PGLogTrimTest, TestNoTrim)
   log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 165), mk_evt(26, 160)));
   log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 167), mk_evt(31, 166)));
 
-  std::set<std::string> trimmed;
+  std::set<eversion_t> trimmed;
   std::set<std::string> trimmed_dups;
   eversion_t write_from_dups = eversion_t::max();
 
@@ -2841,7 +2841,7 @@ TEST_F(PGLogTrimTest, TestTrimAll)
   log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 165), mk_evt(26, 160)));
   log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 167), mk_evt(31, 166)));
 
-  std::set<std::string> trimmed;
+  std::set<eversion_t> trimmed;
   std::set<std::string> trimmed_dups;
   eversion_t write_from_dups = eversion_t::max();
 
index 2d8637c4ed86f15fdea16da694e60c9fd757f527..8690bd96a1887027f9fd02e6ee3ec86cd3bbb67f 100644 (file)
@@ -632,20 +632,6 @@ int write_pg(ObjectStore::Transaction &t, epoch_t epoch, pg_info_t &info,
   return 0;
 }
 
-bool need_trim_dups(ObjectStore *store, ObjectStore::CollectionHandle ch,
-                      const ghobject_t &oid) {
-  size_t count_dups = 0;
-  ObjectMap::ObjectMapIterator p = store->get_omap_iterator(ch, oid);
-  if (!p)
-    return false;
-  for (p->seek_to_first(); p->valid(); p->next()) {
-    if (p->key().substr(0, 4) == string("dup_"))
-       if (++count_dups > g_ceph_context->_conf->osd_pg_log_dups_tracked)
-        return true;
-   }
-   return false;
-}
-
 int do_trim_pg_log(ObjectStore *store, const coll_t &coll,
                   pg_info_t &info, const spg_t &pgid,
                   epoch_t map_epoch,
@@ -660,34 +646,23 @@ int do_trim_pg_log(ObjectStore *store, const coll_t &coll,
 
   cerr << "Log bounds are: " << "(" << info.log_tail << ","
        << info.last_update << "]" << std::endl;
-  uint64_t max_entries = std::min<uint64_t>(g_ceph_context->_conf->osd_max_pg_log_entries,
-                                           (info.last_update.version) ? (info.last_update.version - 1)
-                                               : g_ceph_context->_conf->osd_max_pg_log_entries
-                                           );
-  if (info.last_update.version - info.log_tail.version <= max_entries &&
-      !need_trim_dups(store, ch, oid)) {
-    cerr << "Log not larger than osd_max_pg_log_entries " << max_entries << " and no duplicate entries to trim" << std::endl;
+
+  uint64_t max_entries = g_ceph_context->_conf->osd_max_pg_log_entries;
+  if (info.last_update.version - info.log_tail.version <= max_entries) {
+    cerr << "Log not larger than osd_max_pg_log_entries " << max_entries << std::endl;
     return 0;
   }
 
-  PGLog::IndexedLog pg_log;
-  pg_log.head = info.last_update;
-  pg_log.skip_can_rollback_to_to_head();
-
   ceph_assert(info.last_update.version > max_entries);
-  eversion_t trim_to(info.last_update.epoch, info.last_update.version - max_entries);
-
+  version_t trim_to = info.last_update.version - max_entries;
   size_t trim_at_once = g_ceph_context->_conf->osd_pg_log_trim_max;
-  size_t tracked_dups = g_ceph_context->_conf->osd_pg_log_dups_tracked;
-  std::deque<pg_log_dup_t> latest_dups;
-  g_ceph_context->_conf->osd_pg_log_dups_tracked = 0; // Fake dup trimming all
-
   eversion_t new_tail;
   bool done = false;
 
   while (!done) {
     // gather keys so we can delete them in a batch without
     // affecting the iterator
+    set<string> keys_to_trim;
     {
     ObjectMap::ObjectMapIterator p = store->get_omap_iterator(ch, oid);
     if (!p)
@@ -705,80 +680,47 @@ int do_trim_pg_log(ObjectStore *store, const coll_t &coll,
        continue;
       if (p->key().substr(0, 7) == string("missing"))
        continue;
+      if (p->key().substr(0, 4) == string("dup_"))
+       continue;
+
       bufferlist bl = p->value();
       auto bp = bl.cbegin();
-
-      if (p->key().substr(0, 4) == string("dup_")) {
-        pg_log_dup_t dup;
-       try {
-         dup.decode(bp);
-         latest_dups.push_back(dup);
-         if (latest_dups.size() > tracked_dups) {
-           pg_log.dups.push_back(latest_dups.front());
-           latest_dups.pop_front();
-         }
-         if (debug) {
-            cerr << "read entry " << dup << std::endl;
-          }
-       } catch (buffer::error& e) {
-         cerr << "error reading log dup: " << p->key() << std::endl;
-         return -EFAULT;
-       }
-      } else {
-        pg_log_entry_t e;
-        try {
-         e.decode_with_checksum(bp);
-         pg_log.log.push_back(e);
-         if (debug) {
-            cerr << "read entry " << e << std::endl;
-          }
-       } catch (const buffer::error &e) {
-          cerr << "Error reading pg log entry: " << e.what() << std::endl;
-        }
+      pg_log_entry_t e;
+      try {
+       e.decode_with_checksum(bp);
+      } catch (const buffer::error &e) {
+       cerr << "Error reading pg log entry: " << e.what() << std::endl;
       }
-      // We have enough pg logs entries to trim
-      if ((pg_log.log.size() + pg_log.dups.size()) >= trim_at_once) {
-        break;
+      if (debug) {
+       cerr << "read entry " << e << std::endl;
+      }
+      if (e.version.version > trim_to) {
+       done = true;
+       break;
       }
+      keys_to_trim.insert(p->key());
+      new_tail = e.version;
+      if (keys_to_trim.size() >= trim_at_once)
+       break;
     }
+
     if (!p->valid())
       done = true;
-
-    pg_log.index();
-
     } // deconstruct ObjectMapIterator
 
-    if (!dry_run && ((pg_log.log.size() > 0) || (pg_log.dups.size() > 0))) {
-      std::set<std::string> trimmed;
-      std::set<std::string> trimmed_dups;
-      eversion_t write_from_dups = eversion_t::max();
-
-      pg_log.trim(g_ceph_context, trim_to, &trimmed, &trimmed_dups, &write_from_dups);
-      // Is there any trimming to do?
-      if (!trimmed.empty() || !trimmed_dups.empty()) {
-        new_tail = pg_log.tail;
-
-        ObjectStore::Transaction t;
-        if (!trimmed.empty()) {
-         cerr << "Removing keys " << *trimmed.begin() << " - " << *trimmed.rbegin() << std::endl;
-          t.omap_rmkeys(coll, oid, trimmed);
-       }
-       if (!trimmed_dups.empty()) {
-          cerr << "Removing Dups " << *trimmed_dups.begin() << " - " << *trimmed_dups.rbegin() << std::endl;
-          t.omap_rmkeys(coll, oid, trimmed_dups);
-       }
-        store->queue_transaction(ch, std::move(t));
-        ch->flush();
-       pg_log.log.clear();
-       pg_log.dups.clear();
-      }
+    // delete the keys
+    if (!dry_run && !keys_to_trim.empty()) {
+      cout << "Removing keys " << *keys_to_trim.begin() << " - " << *keys_to_trim.rbegin() << std::endl;
+      ObjectStore::Transaction t;
+      t.omap_rmkeys(coll, oid, keys_to_trim);
+      store->queue_transaction(ch, std::move(t));
+      ch->flush();
     }
   }
 
   // update pg info with new tail
   if (!dry_run && new_tail !=  eversion_t()) {
     info.log_tail = new_tail;
-
     ObjectStore::Transaction t;
     int ret = write_info(t, map_epoch, info, past_intervals);
     if (ret)
@@ -787,7 +729,6 @@ int do_trim_pg_log(ObjectStore *store, const coll_t &coll,
     ch->flush();
   }
 
-  g_ceph_context->_conf->osd_pg_log_dups_tracked = tracked_dups; //Restore original
   // compact the db since we just removed a bunch of data
   cerr << "Finished trimming, now compacting..." << std::endl;
   if (!dry_run)