From: Bill Scales Date: Fri, 6 Feb 2026 17:22:29 +0000 (+0000) Subject: osd: Avoid pwlc spanning intervals X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=bf1d95120febc72d3e814d99d862a4be60273d86;p=ceph.git osd: Avoid pwlc spanning intervals Prevent the first write to FastEC in each interval from being a partial write to avoid the span of partial writes tracked by pwlc from spanning intervals. This stops bugs such as 73891 where a divergent write was not removed from the log because pwlc recorded that the shard had not participated in writes before and after the divergent write. Fixes: https://tracker.ceph.com/issues/73891 Signed-off-by: Bill Scales (cherry picked from commit dc0a195b937eb175cf663d48a856f486435e987f) Conflicts: src/osd/ECBackend.cc - change assert to ceph_assert --- diff --git a/src/osd/ECBackend.cc b/src/osd/ECBackend.cc index e1477b737d65..b654eebfd224 100644 --- a/src/osd/ECBackend.cc +++ b/src/osd/ECBackend.cc @@ -896,8 +896,9 @@ struct ECClassicalOp : ECCommon::RMWPipeline::Op { map *written, shard_id_map *transactions, DoutPrefixProvider *dpp, - const OSDMapRef &osdmap) final { - assert(t); + const OSDMapRef &osdmap, + bool& first_write_in_interval) final { + ceph_assert(t); ECTransaction::generate_transactions( t.get(), plan, @@ -911,7 +912,8 @@ struct ECClassicalOp : ECCommon::RMWPipeline::Op { &temp_added, &temp_cleared, dpp, - osdmap); + osdmap, + first_write_in_interval); } bool skip_transaction( diff --git a/src/osd/ECCommon.cc b/src/osd/ECCommon.cc index 13bf7ff3d0a0..9bb2de9edb98 100644 --- a/src/osd/ECCommon.cc +++ b/src/osd/ECCommon.cc @@ -787,7 +787,8 @@ void ECCommon::RMWPipeline::cache_ready(Op &op) { &written, &trans, get_parent()->get_dpp(), - get_osdmap()); + get_osdmap(), + first_write_in_interval); dout(20) << __func__ << ": written: " << written << ", op: " << op << dendl; @@ -918,7 +919,8 @@ struct ECDummyOp final : ECCommon::RMWPipeline::Op { map *written, shard_id_map *transactions, DoutPrefixProvider *dpp, - const OSDMapRef &osdmap + const OSDMapRef &osdmap, + bool &first_write_in_interval ) override { // NOP, as -- in contrast to ECClassicalOp -- there is no // transaction involved @@ -1000,6 +1002,7 @@ void ECCommon::RMWPipeline::on_change() { oid_to_version.clear(); waiting_commit.clear(); next_write_all_shards = false; + first_write_in_interval = true; } void ECCommon::RMWPipeline::on_change2() { diff --git a/src/osd/ECCommon.h b/src/osd/ECCommon.h index 6e9d3a6a7b88..8a0186d548a0 100644 --- a/src/osd/ECCommon.h +++ b/src/osd/ECCommon.h @@ -535,7 +535,8 @@ struct ECCommon { std::map *written, shard_id_map *transactions, DoutPrefixProvider *dpp, - const OSDMapRef &osdmap) = 0; + const OSDMapRef &osdmap, + bool &first_write_in_interval) = 0; virtual bool skip_transaction( std::set &pending_roll_forward, @@ -649,6 +650,11 @@ struct ECCommon { uint64_t ec_pdw_write_mode; bool next_write_all_shards = false; + // Set by on_change, forces first write in each interval to be + // a full write to avoid PWLC spanning intervals. Fixes + // https://tracker.ceph.com/issues/73891 + bool first_write_in_interval; + RMWPipeline(CephContext *cct, ceph::ErasureCodeInterfaceRef ec_impl, const ECUtil::stripe_info_t &sinfo, diff --git a/src/osd/ECTransaction.cc b/src/osd/ECTransaction.cc index eacce3c2d1a3..efbc57a08dd5 100644 --- a/src/osd/ECTransaction.cc +++ b/src/osd/ECTransaction.cc @@ -468,7 +468,8 @@ ECTransaction::Generate::Generate(PGTransaction &t, PGTransaction::ObjectOperation &op, WritePlanObj &plan, DoutPrefixProvider *dpp, - pg_log_entry_t *entry) + pg_log_entry_t *entry, + bool &first_write_in_interval) : t(t), ec_impl(ec_impl), pgid(pgid), @@ -584,8 +585,9 @@ ECTransaction::Generate::Generate(PGTransaction &t, } } - if (size_change || clear_whiteout) { + if (size_change || clear_whiteout || first_write_in_interval) { all_shards_written(); + first_write_in_interval = false; } else { // All primary shards must always be written, regardless of the write plan. shards_written(sinfo.get_parity_shards()); @@ -1007,7 +1009,8 @@ void ECTransaction::generate_transactions( set *temp_added, set *temp_removed, DoutPrefixProvider *dpp, - const OSDMapRef &osdmap) { + const OSDMapRef &osdmap, + bool &first_write_in_interval) { ceph_assert(written_map); ceph_assert(transactions); ceph_assert(temp_added); @@ -1040,7 +1043,7 @@ void ECTransaction::generate_transactions( ceph_assert(plan.hoid == oid); Generate generate(t, ec_impl, pgid, sinfo, partial_extents, written_map, - *transactions, osdmap, oid, op, plan, dpp, entry); + *transactions, osdmap, oid, op, plan, dpp, entry, first_write_in_interval); plans.plans.pop_front(); }); diff --git a/src/osd/ECTransaction.h b/src/osd/ECTransaction.h index ed946ae888a5..41aa30c499f2 100644 --- a/src/osd/ECTransaction.h +++ b/src/osd/ECTransaction.h @@ -120,7 +120,8 @@ class Generate { const hobject_t &oid, PGTransaction::ObjectOperation &op, WritePlanObj &plan, DoutPrefixProvider *dpp, - pg_log_entry_t *entry); + pg_log_entry_t *entry, + bool &first_write_in_interval); }; void generate_transactions( @@ -136,6 +137,7 @@ void generate_transactions( std::set *temp_added, std::set *temp_removed, DoutPrefixProvider *dpp, - const OSDMapRef &osdmap + const OSDMapRef &osdmap, + bool &first_write_in_interval ); }