]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: Avoid pwlc spanning intervals 68708/head
authorBill Scales <bill_scales@uk.ibm.com>
Fri, 6 Feb 2026 17:22:29 +0000 (17:22 +0000)
committerBill Scales <bill_scales@uk.ibm.com>
Fri, 1 May 2026 07:40:50 +0000 (08:40 +0100)
Prevent the first write to FastEC in each interval from being
a partial write to avoid the span of partial writes tracked by
pwlc from spanning intervals. This stops bugs such as 73891
where a divergent write was not removed from the log because
pwlc recorded that the shard had not participated in writes
before and after the divergent write.

Fixes: https://tracker.ceph.com/issues/73891
Signed-off-by: Bill Scales <bill_scales@uk.ibm.com>
(cherry picked from commit dc0a195b937eb175cf663d48a856f486435e987f)

Conflicts:
  src/osd/ECBackend.cc
- change assert to ceph_assert

src/osd/ECBackend.cc
src/osd/ECCommon.cc
src/osd/ECCommon.h
src/osd/ECTransaction.cc
src/osd/ECTransaction.h

index e1477b737d65339a458c1ceb93442d10a01e1cb0..b654eebfd2245e1ecf8dab01199788a87549bde6 100644 (file)
@@ -896,8 +896,9 @@ struct ECClassicalOp : ECCommon::RMWPipeline::Op {
     map<hobject_t, ECUtil::shard_extent_map_t> *written,
     shard_id_map<ObjectStore::Transaction> *transactions,
     DoutPrefixProvider *dpp,
-    const OSDMapRef &osdmap) final {
-    assert(t);
+    const OSDMapRef &osdmap,
+    bool& first_write_in_interval) final {
+    ceph_assert(t);
     ECTransaction::generate_transactions(
       t.get(),
       plan,
@@ -911,7 +912,8 @@ struct ECClassicalOp : ECCommon::RMWPipeline::Op {
       &temp_added,
       &temp_cleared,
       dpp,
-      osdmap);
+      osdmap,
+      first_write_in_interval);
   }
 
   bool skip_transaction(
index 13bf7ff3d0a0149abb1dbdf26cbafa55915ab6be..9bb2de9edb98a7c3d4debd2840f6203406563cbd 100644 (file)
@@ -787,7 +787,8 @@ void ECCommon::RMWPipeline::cache_ready(Op &op) {
     &written,
     &trans,
     get_parent()->get_dpp(),
-    get_osdmap());
+    get_osdmap(),
+    first_write_in_interval);
 
   dout(20) << __func__ << ": written: " << written << ", op: " << op << dendl;
 
@@ -918,7 +919,8 @@ struct ECDummyOp final : ECCommon::RMWPipeline::Op {
       map<hobject_t, ECUtil::shard_extent_map_t> *written,
       shard_id_map<ObjectStore::Transaction> *transactions,
       DoutPrefixProvider *dpp,
-      const OSDMapRef &osdmap
+      const OSDMapRef &osdmap,
+      bool &first_write_in_interval
     ) override {
     // NOP, as -- in contrast to ECClassicalOp -- there is no
     // transaction involved
@@ -1000,6 +1002,7 @@ void ECCommon::RMWPipeline::on_change() {
   oid_to_version.clear();
   waiting_commit.clear();
   next_write_all_shards = false;
+  first_write_in_interval = true;
 }
 
 void ECCommon::RMWPipeline::on_change2() {
index 6e9d3a6a7b8876fdd6e2b4459a7199d3f4e62f31..8a0186d548a0e5c2d61d39ba1a30197f16b4153e 100644 (file)
@@ -535,7 +535,8 @@ struct ECCommon {
           std::map<hobject_t, ECUtil::shard_extent_map_t> *written,
           shard_id_map<ceph::os::Transaction> *transactions,
           DoutPrefixProvider *dpp,
-          const OSDMapRef &osdmap) = 0;
+          const OSDMapRef &osdmap,
+          bool &first_write_in_interval) = 0;
 
       virtual bool skip_transaction(
           std::set<shard_id_t> &pending_roll_forward,
@@ -649,6 +650,11 @@ struct ECCommon {
     uint64_t ec_pdw_write_mode;
     bool next_write_all_shards = false;
 
+    // Set by on_change, forces first write in each interval to be
+    // a full write to avoid PWLC spanning intervals. Fixes
+    // https://tracker.ceph.com/issues/73891
+    bool first_write_in_interval;
+
     RMWPipeline(CephContext *cct,
                 ceph::ErasureCodeInterfaceRef ec_impl,
                 const ECUtil::stripe_info_t &sinfo,
index eacce3c2d1a33c45a2aa63c93cd81a850c1f3e0a..efbc57a08dd52485db18399fbc17529c031432e9 100644 (file)
@@ -468,7 +468,8 @@ ECTransaction::Generate::Generate(PGTransaction &t,
     PGTransaction::ObjectOperation &op,
     WritePlanObj &plan,
     DoutPrefixProvider *dpp,
-    pg_log_entry_t *entry)
+    pg_log_entry_t *entry,
+    bool &first_write_in_interval)
   : t(t),
     ec_impl(ec_impl),
     pgid(pgid),
@@ -584,8 +585,9 @@ ECTransaction::Generate::Generate(PGTransaction &t,
     }
   }
 
-  if (size_change || clear_whiteout) {
+  if (size_change || clear_whiteout || first_write_in_interval) {
     all_shards_written();
+    first_write_in_interval = false;
   } else {
     // All primary shards must always be written, regardless of the write plan.
     shards_written(sinfo.get_parity_shards());
@@ -1007,7 +1009,8 @@ void ECTransaction::generate_transactions(
     set<hobject_t> *temp_added,
     set<hobject_t> *temp_removed,
     DoutPrefixProvider *dpp,
-    const OSDMapRef &osdmap) {
+    const OSDMapRef &osdmap,
+    bool &first_write_in_interval) {
   ceph_assert(written_map);
   ceph_assert(transactions);
   ceph_assert(temp_added);
@@ -1040,7 +1043,7 @@ void ECTransaction::generate_transactions(
       ceph_assert(plan.hoid == oid);
 
       Generate generate(t, ec_impl, pgid, sinfo, partial_extents, written_map,
-        *transactions, osdmap, oid, op, plan, dpp, entry);
+        *transactions, osdmap, oid, op, plan, dpp, entry, first_write_in_interval);
 
       plans.plans.pop_front();
   });
index ed946ae888a59b568f1a17ec0acadfcd620d1b67..41aa30c499f2cd39a32cbc6cee06853a840bc7d4 100644 (file)
@@ -120,7 +120,8 @@ class Generate {
     const hobject_t &oid, PGTransaction::ObjectOperation &op,
     WritePlanObj &plan,
     DoutPrefixProvider *dpp,
-    pg_log_entry_t *entry);
+    pg_log_entry_t *entry,
+    bool &first_write_in_interval);
 };
 
 void generate_transactions(
@@ -136,6 +137,7 @@ void generate_transactions(
     std::set<hobject_t> *temp_added,
     std::set<hobject_t> *temp_removed,
     DoutPrefixProvider *dpp,
-    const OSDMapRef &osdmap
+    const OSDMapRef &osdmap,
+    bool &first_write_in_interval
   );
 }