From: Samuel Just Date: Thu, 28 Mar 2024 02:05:16 +0000 (-0700) Subject: osd,crimson/osd: use pg_committed_to rather than mlcod for submit_transaction X-Git-Tag: v20.0.0~707^2~17 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=c58a40f1dd560b74e825ef2f2d9792d1560c2683;p=ceph.git osd,crimson/osd: use pg_committed_to rather than mlcod for submit_transaction This commit actually changes the bound we're propagating. This solves two bugs: - Using min_last_complete_ondisk caused replicas to be two update rounds behind rather than one - Replicas don't actually have enough information to set min_last_complete_ondisk on activation, so we couldn't serve replica reads until the first write. pg_committed_to, on the other hand, is fine as the activation last_update cannot become divergent. Moreover, last_complete won't advance past missing objects causing min_last_complete_ondisk to be blocked by any replica missing object. Note that the replica read pathway seperately checks whether the target is missing locally, so that property was not needed. Fixes: https://tracker.ceph.com/issues/65086 Fixes: https://tracker.ceph.com/issues/65085 Signed-off-by: Samuel Just --- diff --git a/src/crimson/osd/ops_executer.cc b/src/crimson/osd/ops_executer.cc index 7585c96897cc..4e735c3b4cb9 100644 --- a/src/crimson/osd/ops_executer.cc +++ b/src/crimson/osd/ops_executer.cc @@ -828,7 +828,7 @@ void OpsExecuter::fill_op_params(OpsExecuter::modified_by m) osd_op_params->mtime = msg->get_mtime(); osd_op_params->at_version = pg->get_next_version(); osd_op_params->pg_trim_to = pg->get_pg_trim_to(); - osd_op_params->pg_committed_to = pg->get_min_last_complete_ondisk(); + osd_op_params->pg_committed_to = pg->get_pg_committed_to(); osd_op_params->last_complete = pg->get_info().last_complete; osd_op_params->user_modify = (m == modified_by::user); } diff --git a/src/crimson/osd/pg.h b/src/crimson/osd/pg.h index f37775bc2863..4c429d6043ab 100644 --- a/src/crimson/osd/pg.h +++ b/src/crimson/osd/pg.h @@ -130,7 +130,11 @@ public: } eversion_t get_min_last_complete_ondisk() const { - return peering_state.get_min_last_complete_ondisk(); + return peering_state.get_pg_committed_to(); + } + + eversion_t get_pg_committed_to() const { + return peering_state.get_pg_committed_to(); } const pg_info_t& get_info() const final { diff --git a/src/osd/PeeringState.cc b/src/osd/PeeringState.cc index 664852a5a5ef..481581d6affc 100644 --- a/src/osd/PeeringState.cc +++ b/src/osd/PeeringState.cc @@ -2674,6 +2674,10 @@ void PeeringState::activate( if (info.last_epoch_started < activation_epoch) { info.last_epoch_started = activation_epoch; info.last_interval_started = info.history.same_interval_since; + + // updating last_epoch_started ensures that last_update will not + // become divergent after activation completes. + pg_committed_to = info.last_update; } } diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index 4ee81ccab144..be2eddd54790 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -11491,7 +11491,7 @@ void PrimaryLogPG::issue_repop(RepGather *repop, OpContext *ctx) ctx->at_version, std::move(ctx->op_t), recovery_state.get_pg_trim_to(), - recovery_state.get_min_last_complete_ondisk(), + recovery_state.get_pg_committed_to(), std::move(ctx->log), ctx->updated_hset_history, on_all_commit,