]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: Avoid pwlc spanning intervals 67244/head
authorBill Scales <bill_scales@uk.ibm.com>
Fri, 6 Feb 2026 17:22:29 +0000 (17:22 +0000)
committerBill Scales <bill_scales@uk.ibm.com>
Mon, 9 Feb 2026 10:21:23 +0000 (10:21 +0000)
Prevent the first write to FastEC in each interval from being
a partial write to avoid the span of partial writes tracked by
pwlc from spanning intervals. This stops bugs such as 73891
where a divergent write was not removed from the log because
pwlc recorded that the shard had not participated in writes
before and after the divergent write.

Fixes: https://tracker.ceph.com/issues/73891
Signed-off-by: Bill Scales <bill_scales@uk.ibm.com>
src/osd/ECBackend.cc
src/osd/ECCommon.cc
src/osd/ECCommon.h
src/osd/ECTransaction.cc
src/osd/ECTransaction.h

index 8e462f1a0617b0b089ef8b2c45b19464b8c8765b..36b16f47b9dcc4e7ae6a1df6bb4de70515eb28d6 100644 (file)
@@ -898,7 +898,8 @@ struct ECClassicalOp : ECCommon::RMWPipeline::Op {
     map<hobject_t, ECUtil::shard_extent_map_t> *written,
     shard_id_map<ObjectStore::Transaction> *transactions,
     DoutPrefixProvider *dpp,
-    const OSDMapRef &osdmap) final {
+    const OSDMapRef &osdmap,
+    bool& first_write_in_interval) final {
     ceph_assert(t);
     ECTransaction::generate_transactions(
       t.get(),
@@ -913,7 +914,8 @@ struct ECClassicalOp : ECCommon::RMWPipeline::Op {
       &temp_added,
       &temp_cleared,
       dpp,
-      osdmap);
+      osdmap,
+      first_write_in_interval);
   }
 
   bool skip_transaction(
index 36c69fa31ce1c753d57375fecd5e553b9edab398..62b0615ee6aabfb613cb8fd8ec53e40c81fe5787 100644 (file)
@@ -841,7 +841,8 @@ void ECCommon::RMWPipeline::cache_ready(Op &op) {
     &written,
     &trans,
     get_parent()->get_dpp(),
-    get_osdmap());
+    get_osdmap(),
+    first_write_in_interval);
 
   dout(20) << __func__ << ": written: " << written << ", op: " << op << dendl;
 
@@ -972,7 +973,8 @@ struct ECDummyOp final : ECCommon::RMWPipeline::Op {
       map<hobject_t, ECUtil::shard_extent_map_t> *written,
       shard_id_map<ObjectStore::Transaction> *transactions,
       DoutPrefixProvider *dpp,
-      const OSDMapRef &osdmap
+      const OSDMapRef &osdmap,
+      bool &first_write_in_interval
     ) override {
     // NOP, as -- in contrast to ECClassicalOp -- there is no
     // transaction involved
@@ -1054,6 +1056,7 @@ void ECCommon::RMWPipeline::on_change() {
   oid_to_version.clear();
   waiting_commit.clear();
   next_write_all_shards = false;
+  first_write_in_interval = true;
 }
 
 void ECCommon::RMWPipeline::on_change2() {
index d98ac2cb49995c9eb4a29ab412c978e02328a0a6..895ffffb7072755d35229e8d71ce9ec41077b555 100644 (file)
@@ -549,7 +549,8 @@ struct ECCommon {
           std::map<hobject_t, ECUtil::shard_extent_map_t> *written,
           shard_id_map<ceph::os::Transaction> *transactions,
           DoutPrefixProvider *dpp,
-          const OSDMapRef &osdmap) = 0;
+          const OSDMapRef &osdmap,
+          bool &first_write_in_interval) = 0;
 
       virtual bool skip_transaction(
           std::set<shard_id_t> &pending_roll_forward,
@@ -663,6 +664,11 @@ struct ECCommon {
     uint64_t ec_pdw_write_mode;
     bool next_write_all_shards = false;
 
+    // Set by on_change, forces first write in each interval to be
+    // a full write to avoid PWLC spanning intervals. Fixes
+    // https://tracker.ceph.com/issues/73891
+    bool first_write_in_interval;
+
     RMWPipeline(CephContext *cct,
                 ceph::ErasureCodeInterfaceRef ec_impl,
                 const ECUtil::stripe_info_t &sinfo,
index 14f568af7af88a0a53ea85809484624920782204..6ce0f4e5ff2076b6298179223c7c554a21c803a2 100644 (file)
@@ -469,7 +469,8 @@ ECTransaction::Generate::Generate(PGTransaction &t,
     PGTransaction::ObjectOperation &op,
     WritePlanObj &plan,
     DoutPrefixProvider *dpp,
-    pg_log_entry_t *entry)
+    pg_log_entry_t *entry,
+    bool &first_write_in_interval)
   : t(t),
     ec_impl(ec_impl),
     pgid(pgid),
@@ -585,8 +586,9 @@ ECTransaction::Generate::Generate(PGTransaction &t,
     }
   }
 
-  if (size_change || clear_whiteout) {
+  if (size_change || clear_whiteout || first_write_in_interval) {
     all_shards_written();
+    first_write_in_interval = false;
   } else {
     // All primary shards must always be written, regardless of the write plan.
     shards_written(sinfo.get_parity_shards());
@@ -1008,7 +1010,8 @@ void ECTransaction::generate_transactions(
     set<hobject_t> *temp_added,
     set<hobject_t> *temp_removed,
     DoutPrefixProvider *dpp,
-    const OSDMapRef &osdmap) {
+    const OSDMapRef &osdmap,
+    bool &first_write_in_interval) {
   ceph_assert(written_map);
   ceph_assert(transactions);
   ceph_assert(temp_added);
@@ -1041,7 +1044,7 @@ void ECTransaction::generate_transactions(
       ceph_assert(plan.hoid == oid);
 
       Generate generate(t, ec_impl, pgid, sinfo, partial_extents, written_map,
-        *transactions, osdmap, oid, op, plan, dpp, entry);
+        *transactions, osdmap, oid, op, plan, dpp, entry, first_write_in_interval);
 
       plans.plans.pop_front();
   });
index 8b62d1b4394ed0deb66107b28b0a1f36c71c33c7..78526dbecba8af0187f00b73726efcd7dd382f4f 100644 (file)
@@ -121,7 +121,8 @@ class Generate {
     const hobject_t &oid, PGTransaction::ObjectOperation &op,
     WritePlanObj &plan,
     DoutPrefixProvider *dpp,
-    pg_log_entry_t *entry);
+    pg_log_entry_t *entry,
+    bool &first_write_in_interval);
 };
 
 void generate_transactions(
@@ -137,6 +138,7 @@ void generate_transactions(
     std::set<hobject_t> *temp_added,
     std::set<hobject_t> *temp_removed,
     DoutPrefixProvider *dpp,
-    const OSDMapRef &osdmap
+    const OSDMapRef &osdmap,
+    bool &first_write_in_interval
   );
 }