]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
tools/ceph_objectstore_took: Add duplicate entry trimming
authorNitzanMordhai <nmordech@redhat.com>
Wed, 23 Mar 2022 18:52:45 +0000 (18:52 +0000)
committerRadosław Zarzyński <rzarzyns@redhat.com>
Thu, 12 May 2022 22:39:53 +0000 (00:39 +0200)
Adding duplicate entries trimming to trim-pg-log opertion, we will use the exist
PGLog trim function to find out the set of entries\dup entries that we suppose to
trim. to use it we need to build the PGLog from disk.

Fixed: https://tracker.ceph.com/issues/53729
Signed-off-by: Nitzan Mordechai <nmordec@redhat.com>
(cherry picked from commit 9fb7ec61ba10896ce01d5970375f1ce1dd993874)

src/osd/PGLog.cc
src/osd/PGLog.h
src/test/osd/TestPGLog.cc
src/tools/ceph_objectstore_tool.cc

index 2c322b3d8f38c2938bb76070907688c52e055f6a..91f75ddae4ed07e25a5d9509ee6e16a3f1f47983 100644 (file)
@@ -56,7 +56,7 @@ void PGLog::IndexedLog::split_out_child(
 void PGLog::IndexedLog::trim(
   CephContext* cct,
   eversion_t s,
-  set<eversion_t> *trimmed,
+  set<string> *trimmed,
   set<string>* trimmed_dups,
   eversion_t *write_from_dups)
 {
@@ -76,7 +76,7 @@ void PGLog::IndexedLog::trim(
       break;
     lgeneric_subdout(cct, osd, 20) << "trim " << e << dendl;
     if (trimmed)
-      trimmed->emplace(e.version);
+      trimmed->insert(e.get_key_name());
 
     unindex(e);         // remove from index,
 
@@ -681,7 +681,7 @@ void PGLog::write_log_and_missing(
     eversion_t::max(),
     eversion_t(),
     eversion_t(),
-    set<eversion_t>(),
+    set<string>(),
     set<string>(),
     missing,
     true, require_rollback, false,
@@ -815,7 +815,7 @@ void PGLog::_write_log_and_missing(
   eversion_t dirty_to,
   eversion_t dirty_from,
   eversion_t writeout_from,
-  set<eversion_t> &&trimmed,
+  set<string> &&trimmed,
   set<string> &&trimmed_dups,
   const pg_missing_tracker_t &missing,
   bool touch_log,
@@ -829,8 +829,7 @@ void PGLog::_write_log_and_missing(
   ) {
   set<string> to_remove;
   to_remove.swap(trimmed_dups);
-  for (auto& t : trimmed) {
-    string key = t.get_key_name();
+  for (auto& key : trimmed) {
     if (log_keys_debug) {
       auto it = log_keys_debug->find(key);
       ceph_assert(it != log_keys_debug->end());
index 920b40dabc1a225df354e5168449602b9e5b6fd1..0adcaf8a1121f0d7c856670c03b3dfaa82e45c76 100644 (file)
@@ -633,7 +633,7 @@ public:
     void trim(
       CephContext* cct,
       eversion_t s,
-      std::set<eversion_t> *trimmed,
+      std::set<std::string> *trimmed,
       std::set<std::string>* trimmed_dups,
       eversion_t *write_from_dups);
 
@@ -650,7 +650,7 @@ protected:
   eversion_t dirty_to;         ///< must clear/writeout all keys <= dirty_to
   eversion_t dirty_from;       ///< must clear/writeout all keys >= dirty_from
   eversion_t writeout_from;    ///< must writout keys >= writeout_from
-  std::set<eversion_t> trimmed;     ///< must clear keys in trimmed
+  std::set<std::string> trimmed;     ///< must clear keys in trimmed
   eversion_t dirty_to_dups;    ///< must clear/writeout all dups <= dirty_to_dups
   eversion_t dirty_from_dups;  ///< must clear/writeout all dups >= dirty_from_dups
   eversion_t write_from_dups;  ///< must write keys >= write_from_dups
@@ -1372,7 +1372,7 @@ public:
     eversion_t dirty_to,
     eversion_t dirty_from,
     eversion_t writeout_from,
-    std::set<eversion_t> &&trimmed,
+    std::set<std::string> &&trimmed,
     std::set<std::string> &&trimmed_dups,
     const pg_missing_tracker_t &missing,
     bool touch_log,
index 3c45d1212e459a6fcb111230dd15f7746eb70d79..f1bfb56b355a64061fed30bd2fb21d61b04e6bf4 100644 (file)
@@ -2716,7 +2716,7 @@ TEST_F(PGLogTrimTest, TestPartialTrim)
   log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 165), mk_evt(26, 160)));
   log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 167), mk_evt(31, 166)));
 
-  std::set<eversion_t> trimmed;
+  std::set<std::string> trimmed;
   std::set<std::string> trimmed_dups;
   eversion_t write_from_dups = eversion_t::max();
 
@@ -2730,7 +2730,7 @@ TEST_F(PGLogTrimTest, TestPartialTrim)
 
   SetUp(15);
 
-  std::set<eversion_t> trimmed2;
+  std::set<std::string> trimmed2;
   std::set<std::string> trimmed_dups2;
   eversion_t write_from_dups2 = eversion_t::max();
 
@@ -2783,7 +2783,7 @@ TEST_F(PGLogTrimTest, TestTrimNoDups)
   log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 165), mk_evt(26, 160)));
   log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 167), mk_evt(31, 166)));
 
-  std::set<eversion_t> trimmed;
+  std::set<std::string> trimmed;
   std::set<std::string> trimmed_dups;
   eversion_t write_from_dups = eversion_t::max();
 
@@ -2811,7 +2811,7 @@ TEST_F(PGLogTrimTest, TestNoTrim)
   log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 165), mk_evt(26, 160)));
   log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 167), mk_evt(31, 166)));
 
-  std::set<eversion_t> trimmed;
+  std::set<std::string> trimmed;
   std::set<std::string> trimmed_dups;
   eversion_t write_from_dups = eversion_t::max();
 
@@ -2840,7 +2840,7 @@ TEST_F(PGLogTrimTest, TestTrimAll)
   log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 165), mk_evt(26, 160)));
   log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 167), mk_evt(31, 166)));
 
-  std::set<eversion_t> trimmed;
+  std::set<std::string> trimmed;
   std::set<std::string> trimmed_dups;
   eversion_t write_from_dups = eversion_t::max();
 
index d9abe03af6edf452d689ced3ef636a5cc5fa38bc..55e3993414793f91e70faccbac6b3aec17f2c01e 100644 (file)
@@ -624,6 +624,20 @@ int write_pg(ObjectStore::Transaction &t, epoch_t epoch, pg_info_t &info,
   return 0;
 }
 
+bool need_trim_dups(ObjectStore *store, ObjectStore::CollectionHandle ch,
+                      const ghobject_t &oid) {
+  size_t count_dups = 0;
+  ObjectMap::ObjectMapIterator p = store->get_omap_iterator(ch, oid);
+  if (!p)
+    return false;
+  for (p->seek_to_first(); p->valid(); p->next()) {
+    if (p->key().substr(0, 4) == string("dup_"))
+       if (++count_dups > g_ceph_context->_conf->osd_pg_log_dups_tracked)
+        return true;
+   }
+   return false;
+}
+
 int do_trim_pg_log(ObjectStore *store, const coll_t &coll,
                   pg_info_t &info, const spg_t &pgid,
                   epoch_t map_epoch,
@@ -638,23 +652,34 @@ int do_trim_pg_log(ObjectStore *store, const coll_t &coll,
 
   cerr << "Log bounds are: " << "(" << info.log_tail << ","
        << info.last_update << "]" << std::endl;
-
-  uint64_t max_entries = g_ceph_context->_conf->osd_max_pg_log_entries;
-  if (info.last_update.version - info.log_tail.version <= max_entries) {
-    cerr << "Log not larger than osd_max_pg_log_entries " << max_entries << std::endl;
+  uint64_t max_entries = std::min<uint64_t>(g_ceph_context->_conf->osd_max_pg_log_entries,
+                                           (info.last_update.version) ? (info.last_update.version - 1)
+                                               : g_ceph_context->_conf->osd_max_pg_log_entries
+                                           );
+  if (info.last_update.version - info.log_tail.version <= max_entries &&
+      !need_trim_dups(store, ch, oid)) {
+    cerr << "Log not larger than osd_max_pg_log_entries " << max_entries << " and no duplicate entries to trim" << std::endl;
     return 0;
   }
 
+  PGLog::IndexedLog pg_log;
+  pg_log.head = info.last_update;
+  pg_log.skip_can_rollback_to_to_head();
+
   ceph_assert(info.last_update.version > max_entries);
-  version_t trim_to = info.last_update.version - max_entries;
+  eversion_t trim_to(info.last_update.epoch, info.last_update.version - max_entries);
+
   size_t trim_at_once = g_ceph_context->_conf->osd_pg_log_trim_max;
+  size_t tracked_dups = g_ceph_context->_conf->osd_pg_log_dups_tracked;
+  std::deque<pg_log_dup_t> latest_dups;
+  g_ceph_context->_conf->osd_pg_log_dups_tracked = 0; // Fake dup trimming all
+
   eversion_t new_tail;
   bool done = false;
 
   while (!done) {
     // gather keys so we can delete them in a batch without
     // affecting the iterator
-    set<string> keys_to_trim;
     {
     ObjectMap::ObjectMapIterator p = store->get_omap_iterator(ch, oid);
     if (!p)
@@ -672,47 +697,80 @@ int do_trim_pg_log(ObjectStore *store, const coll_t &coll,
        continue;
       if (p->key().substr(0, 7) == string("missing"))
        continue;
-      if (p->key().substr(0, 4) == string("dup_"))
-       continue;
-
       bufferlist bl = p->value();
       auto bp = bl.cbegin();
-      pg_log_entry_t e;
-      try {
-       e.decode_with_checksum(bp);
-      } catch (const buffer::error &e) {
-       cerr << "Error reading pg log entry: " << e.what() << std::endl;
-      }
-      if (debug) {
-       cerr << "read entry " << e << std::endl;
+
+      if (p->key().substr(0, 4) == string("dup_")) {
+        pg_log_dup_t dup;
+       try {
+         dup.decode(bp);
+         latest_dups.push_back(dup);
+         if (latest_dups.size() > tracked_dups) {
+           pg_log.dups.push_back(latest_dups.front());
+           latest_dups.pop_front();
+         }
+         if (debug) {
+            cerr << "read entry " << dup << std::endl;
+          }
+       } catch (buffer::error& e) {
+         cerr << "error reading log dup: " << p->key() << std::endl;
+         return -EFAULT;
+       }
+      } else {
+        pg_log_entry_t e;
+        try {
+         e.decode_with_checksum(bp);
+         pg_log.log.push_back(e);
+         if (debug) {
+            cerr << "read entry " << e << std::endl;
+          }
+       } catch (const buffer::error &e) {
+          cerr << "Error reading pg log entry: " << e.what() << std::endl;
+        }
       }
-      if (e.version.version > trim_to) {
-       done = true;
-       break;
+      // We have enough pg logs entries to trim
+      if ((pg_log.log.size() + pg_log.dups.size()) >= trim_at_once) {
+        break;
       }
-      keys_to_trim.insert(p->key());
-      new_tail = e.version;
-      if (keys_to_trim.size() >= trim_at_once)
-       break;
     }
-
     if (!p->valid())
       done = true;
+
+    pg_log.index();
+
     } // deconstruct ObjectMapIterator
 
-    // delete the keys
-    if (!dry_run && !keys_to_trim.empty()) {
-      cout << "Removing keys " << *keys_to_trim.begin() << " - " << *keys_to_trim.rbegin() << std::endl;
-      ObjectStore::Transaction t;
-      t.omap_rmkeys(coll, oid, keys_to_trim);
-      store->queue_transaction(ch, std::move(t));
-      ch->flush();
+    if (!dry_run && ((pg_log.log.size() > 0) || (pg_log.dups.size() > 0))) {
+      std::set<std::string> trimmed;
+      std::set<std::string> trimmed_dups;
+      eversion_t write_from_dups = eversion_t::max();
+
+      pg_log.trim(g_ceph_context, trim_to, &trimmed, &trimmed_dups, &write_from_dups);
+      // Is there any trimming to do?
+      if (!trimmed.empty() || !trimmed_dups.empty()) {
+        new_tail = pg_log.tail;
+
+        ObjectStore::Transaction t;
+        if (!trimmed.empty()) {
+         cerr << "Removing keys " << *trimmed.begin() << " - " << *trimmed.rbegin() << std::endl;
+          t.omap_rmkeys(coll, oid, trimmed);
+       }
+       if (!trimmed_dups.empty()) {
+          cerr << "Removing Dups " << *trimmed_dups.begin() << " - " << *trimmed_dups.rbegin() << std::endl;
+          t.omap_rmkeys(coll, oid, trimmed_dups);
+       }
+        store->queue_transaction(ch, std::move(t));
+        ch->flush();
+       pg_log.log.clear();
+       pg_log.dups.clear();
+      }
     }
   }
 
   // update pg info with new tail
   if (!dry_run && new_tail !=  eversion_t()) {
     info.log_tail = new_tail;
+
     ObjectStore::Transaction t;
     int ret = write_info(t, map_epoch, info, past_intervals);
     if (ret)
@@ -721,6 +779,7 @@ int do_trim_pg_log(ObjectStore *store, const coll_t &coll,
     ch->flush();
   }
 
+  g_ceph_context->_conf->osd_pg_log_dups_tracked = tracked_dups; //Restore original
   // compact the db since we just removed a bunch of data
   cerr << "Finished trimming, now compacting..." << std::endl;
   if (!dry_run)