From: Radoslaw Zarzynski Date: Thu, 9 Jun 2022 18:22:45 +0000 (+0000) Subject: Revert "osd/PGLog.cc: Trim duplicates by number of entries" X-Git-Tag: v17.2.1~4^2~1 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=25bc8c1c4a9c5acdadc3df893d233fef8eac4018;p=ceph.git Revert "osd/PGLog.cc: Trim duplicates by number of entries" This reverts commit 3ff0df6a28a1d9e197bdba40be7126fed8a14ae9 which is the in-OSD part of the fix for accumulation of `dup` entries in a PG Log. Brainstorming it has brought questions on the OSD's behaviour during an upgrade if there are tons of dups in the log. What must be double-checked before bringing it back is ensuring we chunk the deletions properly to not impose OOMs / stalls in, to exemplify, RocksDB. The backport ticket is: https://tracker.ceph.com/issues/55981 Signed-off-by: Radoslaw Zarzynski --- diff --git a/src/osd/PGLog.cc b/src/osd/PGLog.cc index 3fa711a6f1dc9..0f184497ada98 100644 --- a/src/osd/PGLog.cc +++ b/src/osd/PGLog.cc @@ -131,8 +131,10 @@ void PGLog::IndexedLog::trim( } } - while (dups.size() > cct->_conf->osd_pg_log_dups_tracked) { + while (!dups.empty()) { const auto& e = *dups.begin(); + if (e.version.version >= earliest_dup_version) + break; lgeneric_subdout(cct, osd, 20) << "trim dup " << e << dendl; if (trimmed_dups) trimmed_dups->insert(e.get_key_name()); diff --git a/src/osd/PGLog.h b/src/osd/PGLog.h index 34ed71a72b366..06d0024743a98 100644 --- a/src/osd/PGLog.h +++ b/src/osd/PGLog.h @@ -1395,7 +1395,7 @@ public: bool debug_verify_stored_missing = false ) { return read_log_and_missing( - cct, store, ch, pgmeta_oid, info, + store, ch, pgmeta_oid, info, log, missing, oss, tolerate_divergent_missing_log, &clear_divergent_priors, @@ -1406,7 +1406,6 @@ public: template static void read_log_and_missing( - CephContext *cct, ObjectStore *store, ObjectStore::CollectionHandle &ch, ghobject_t pgmeta_oid, @@ -1476,9 +1475,6 @@ public: ceph_assert(dups.back().version < dup.version); } dups.push_back(dup); - if (dups.size() >= cct->_conf->osd_pg_log_dups_tracked) { - dups.pop_front(); - } } else { pg_log_entry_t e; e.decode_with_checksum(bp); diff --git a/src/test/osd/TestPGLog.cc b/src/test/osd/TestPGLog.cc index 004e3cd1c5891..b08b9f0ccc288 100644 --- a/src/test/osd/TestPGLog.cc +++ b/src/test/osd/TestPGLog.cc @@ -2740,8 +2740,8 @@ TEST_F(PGLogTrimTest, TestPartialTrim) EXPECT_EQ(eversion_t(19, 160), write_from_dups2); EXPECT_EQ(2u, log.log.size()); EXPECT_EQ(1u, trimmed2.size()); - EXPECT_EQ(3u, log.dups.size()); - EXPECT_EQ(0u, trimmed_dups2.size()); + EXPECT_EQ(2u, log.dups.size()); + EXPECT_EQ(1u, trimmed_dups2.size()); } @@ -3024,7 +3024,7 @@ TEST_F(PGLogTrimTest, TestTrimDups) { EXPECT_EQ(eversion_t(20, 103), write_from_dups) << log; EXPECT_EQ(2u, log.log.size()) << log; - EXPECT_EQ(4u, log.dups.size()) << log; + EXPECT_EQ(3u, log.dups.size()) << log; } // This tests trim() to make copies of @@ -3068,50 +3068,7 @@ TEST_F(PGLogTrimTest, TestTrimDups2) { EXPECT_EQ(eversion_t(10, 100), write_from_dups) << log; EXPECT_EQ(4u, log.log.size()) << log; - EXPECT_EQ(6u, log.dups.size()) << log; -} - -// This tests trim() to make copies of -// 5 log entries (107, 106, 105, 104, 103) and trim all dups -TEST_F(PGLogTrimTest, TestTrimAllDups) { - SetUp(0); - PGLog::IndexedLog log; - log.head = mk_evt(21, 107); - log.skip_can_rollback_to_to_head(); - log.tail = mk_evt(9, 99); - log.head = mk_evt(9, 99); - - entity_name_t client = entity_name_t::CLIENT(777); - - log.dups.push_back(pg_log_dup_t(mk_ple_mod(mk_obj(1), - mk_evt(9, 98), mk_evt(8, 97), osd_reqid_t(client, 8, 1)))); - log.dups.push_back(pg_log_dup_t(mk_ple_mod(mk_obj(1), - mk_evt(9, 99), mk_evt(8, 98), osd_reqid_t(client, 8, 1)))); - - log.add(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(9, 99), - osd_reqid_t(client, 8, 1))); - log.add(mk_ple_dt(mk_obj(2), mk_evt(15, 101), mk_evt(10, 100), - osd_reqid_t(client, 8, 2))); - log.add(mk_ple_mod_rb(mk_obj(3), mk_evt(15, 102), mk_evt(15, 101), - osd_reqid_t(client, 8, 3))); - log.add(mk_ple_mod(mk_obj(1), mk_evt(20, 103), mk_evt(15, 102), - osd_reqid_t(client, 8, 4))); - log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 104), mk_evt(20, 103), - osd_reqid_t(client, 8, 5))); - log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 105), mk_evt(21, 104), - osd_reqid_t(client, 8, 6))); - log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 106), mk_evt(21, 105), - osd_reqid_t(client, 8, 6))); - log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 107), mk_evt(21, 106), - osd_reqid_t(client, 8, 6))); - - eversion_t write_from_dups = eversion_t::max(); - - log.trim(cct, mk_evt(20, 102), nullptr, nullptr, &write_from_dups); - - EXPECT_EQ(eversion_t::max(), write_from_dups) << log; - EXPECT_EQ(5u, log.log.size()) << log; - EXPECT_EQ(0u, log.dups.size()) << log; + EXPECT_EQ(5u, log.dups.size()) << log; } // This tests copy_up_to() to make copies of diff --git a/src/tools/ceph_objectstore_tool.cc b/src/tools/ceph_objectstore_tool.cc index b51c7c1f5dc24..2d8637c4ed86f 100644 --- a/src/tools/ceph_objectstore_tool.cc +++ b/src/tools/ceph_objectstore_tool.cc @@ -447,7 +447,7 @@ int get_log(ObjectStore *fs, __u8 struct_ver, ostringstream oss; ceph_assert(struct_ver > 0); PGLog::read_log_and_missing( - g_ceph_context, fs, ch, + fs, ch, pgid.make_pgmeta_oid(), info, log, missing, oss,