osd_op_params->mtime = msg->get_mtime();
osd_op_params->at_version = pg->get_next_version();
osd_op_params->pg_trim_to = pg->get_pg_trim_to();
- osd_op_params->min_last_complete_ondisk = pg->get_min_last_complete_ondisk();
+ osd_op_params->pg_committed_to = pg->get_min_last_complete_ondisk();
osd_op_params->last_complete = pg->get_info().last_complete;
osd_op_params->user_modify = (m == modified_by::user);
}
utime_t mtime;
eversion_t at_version;
eversion_t pg_trim_to;
- eversion_t min_last_complete_ondisk;
+ eversion_t pg_committed_to;
eversion_t last_complete;
bool user_modify = false;
ObjectCleanRegions clean_regions;
log_operation(std::move(log_entries),
req->pg_trim_to,
req->version,
- req->min_last_complete_ondisk,
+ req->pg_committed_to,
!txn.empty(),
txn,
false);
std::vector<pg_log_entry_t>&& logv,
const eversion_t &trim_to,
const eversion_t &roll_forward_to,
- const eversion_t &min_last_complete_ondisk,
+ const eversion_t &pg_committed_to,
bool transaction_applied,
ObjectStore::Transaction &txn,
bool async) {
peering_state.append_log(std::move(logv),
trim_to,
roll_forward_to,
- min_last_complete_ondisk,
+ pg_committed_to,
txn,
!txn.empty(),
false);
std::vector<pg_log_entry_t>&& logv,
const eversion_t &trim_to,
const eversion_t &roll_forward_to,
- const eversion_t &min_last_complete_ondisk,
+ const eversion_t &pg_commited_to,
bool transaction_applied,
ObjectStore::Transaction &txn,
bool async = false);
pending_txn->second.acked_peers.push_back({pg_shard, eversion_t{}});
encode(log_entries, m->logbl);
m->pg_trim_to = osd_op_p.pg_trim_to;
- m->min_last_complete_ondisk = osd_op_p.min_last_complete_ondisk;
+ m->pg_committed_to = osd_op_p.pg_committed_to;
m->pg_stats = pg.get_info().stats;
// TODO: set more stuff. e.g., pg_states
sends->emplace_back(
std::move(log_entries),
osd_op_p.pg_trim_to,
osd_op_p.at_version,
- osd_op_p.min_last_complete_ondisk,
+ osd_op_p.pg_committed_to,
true,
txn,
false);
// piggybacked osd/og state
eversion_t pg_trim_to; // primary->replica: trim to here
- eversion_t min_last_complete_ondisk; // lower bound on committed version
+
+ /**
+ * pg_committed_to
+ *
+ * Used by the primary to propagate pg_committed_to to replicas for use in
+ * serving replica reads.
+ *
+ * Because updates <= pg_committed_to cannot become divergent, replicas
+ * may safely serve reads on objects which do not have more recent updates.
+ *
+ * See PeeringState::pg_committed_to, PeeringState::can_serve_replica_read
+ *
+ * Historical note: Prior to early 2024, this field was named
+ * min_last_complete_ondisk. The replica, however, only actually relied on
+ * a single property of this field -- that any objects not modified since
+ * mlcod couldn't have uncommitted state. Weakening the field to the condition
+ * above is therefore safe -- mlcod is always <= pg_committed_to and
+ * sending pg_committed_to to a replica expecting mlcod will work correctly
+ * as it only actually uses mlcod to check replica reads. The primary difference
+ * between mlcod and pg_committed_to is simply that mlcod doesn't advance past
+ * objects missing on replicas, but we check for that anyway. This note may be
+ * removed in main after U is released.
+ */
+ eversion_t pg_committed_to;
hobject_t new_temp_oid; ///< new temp object that we must now start tracking
hobject_t discard_temp_oid; ///< previously used temp object that we can now stop tracking
decode(updated_hit_set_history, p);
ceph_assert(header.version >= 3);
- decode(min_last_complete_ondisk, p);
+ decode(pg_committed_to, p);
final_decode_needed = false;
}
encode(discard_temp_oid, payload);
encode(from, payload);
encode(updated_hit_set_history, payload);
- encode(min_last_complete_ondisk, payload);
+ encode(pg_committed_to, payload);
}
MOSDRepOp()
out << " " << poid << " v " << version;
if (updated_hit_set_history)
out << ", has_updated_hit_set_history";
- out << ", mlcod=" << min_last_complete_ondisk;
+ out << ", pct=" << pg_committed_to;
}
out << ")";
}
const std::optional<pg_hit_set_history_t> &hset_history,
const eversion_t &trim_to,
const eversion_t &roll_forward_to,
- const eversion_t &min_last_complete_ondisk,
+ const eversion_t &pg_committed_to,
bool transaction_applied,
ceph::os::Transaction &t,
bool async = false) = 0;
const std::optional<pg_hit_set_history_t> &hset_history,
const eversion_t &trim_to,
const eversion_t &roll_forward_to,
- const eversion_t &min_last_complete_ondisk,
+ const eversion_t &pg_committed_to,
bool transaction_applied,
ObjectStore::Transaction &t,
bool async = false) = 0;
const eversion_t &at_version, ///< [in] version
PGTransactionUPtr &&t, ///< [in] trans to execute (move)
const eversion_t &trim_to, ///< [in] trim log to here
- const eversion_t &min_last_complete_ondisk, ///< [in] lower bound on
- /// committed version
+ const eversion_t &pg_committed_to, ///< [in] lower bound on
+ /// committed version
std::vector<pg_log_entry_t>&& log_entries, ///< [in] log entries for t
/// [in] hitset history (if updated with this transaction)
std::optional<pg_hit_set_history_t> &hset_history,
bool PeeringState::can_serve_replica_read(const hobject_t &hoid)
{
ceph_assert(!is_primary());
- eversion_t min_last_complete_ondisk = get_min_last_complete_ondisk();
if (!pg_log.get_log().has_write_since(
- hoid, min_last_complete_ondisk)) {
+ hoid, pg_committed_to)) {
psdout(20) << "can be safely read on this replica" << dendl;
return true;
} else {
vector<pg_log_entry_t>&& logv,
eversion_t trim_to,
eversion_t roll_forward_to,
- eversion_t mlcod,
+ eversion_t pct,
ObjectStore::Transaction &t,
bool transaction_applied,
bool async)
write_if_dirty(t);
if (!is_primary())
- min_last_complete_ondisk = mlcod;
+ pg_committed_to = pct;
}
void PeeringState::recover_got(
if (ps.last_complete_ondisk != ps.info.last_complete)
out << " lcod " << ps.last_complete_ondisk;
- out << " mlcod " << ps.min_last_complete_ondisk;
+ if (ps.is_primary())
+ out << " mlcod " << ps.min_last_complete_ondisk;
out << " " << pg_state_string(ps.get_state());
if (ps.should_send_notify())
std::vector<pg_log_entry_t>&& logv,
eversion_t trim_to,
eversion_t roll_forward_to,
- eversion_t min_last_complete_ondisk,
+ eversion_t pg_committed_to,
ObjectStore::Transaction &t,
bool transaction_applied,
bool async);
const std::optional<pg_hit_set_history_t> &hset_history,
const eversion_t &trim_to,
const eversion_t &roll_forward_to,
- const eversion_t &min_last_complete_ondisk,
+ const eversion_t &pg_committed_to,
bool transaction_applied,
ObjectStore::Transaction &t,
bool async = false) override {
replica_clear_repop_obc(logv, t);
}
recovery_state.append_log(
- std::move(logv), trim_to, roll_forward_to, min_last_complete_ondisk,
+ std::move(logv), trim_to, roll_forward_to, pg_committed_to,
t, transaction_applied, async);
}
const eversion_t &at_version,
PGTransactionUPtr &&_t,
const eversion_t &trim_to,
- const eversion_t &min_last_complete_ondisk,
+ const eversion_t &pg_committed_to,
vector<pg_log_entry_t>&& _log_entries,
std::optional<pg_hit_set_history_t> &hset_history,
Context *on_all_commit,
tid,
reqid,
trim_to,
- min_last_complete_ondisk,
+ pg_committed_to,
added.size() ? *(added.begin()) : hobject_t(),
removed.size() ? *(removed.begin()) : hobject_t(),
log_entries,
hset_history,
trim_to,
at_version,
- min_last_complete_ondisk,
+ pg_committed_to,
true,
op_t);
ceph_tid_t tid,
osd_reqid_t reqid,
eversion_t pg_trim_to,
- eversion_t min_last_complete_ondisk,
+ eversion_t pg_committed_to,
hobject_t new_temp_oid,
hobject_t discard_temp_oid,
const bufferlist &log_entries,
// this feature is from 2019 (6f12bf27cb91), assume present
ceph_assert(HAVE_FEATURE(parent->min_peer_features(), OSD_REPOP_MLCOD));
- wr->min_last_complete_ondisk = min_last_complete_ondisk;
+ wr->pg_committed_to = pg_committed_to;
wr->new_temp_oid = new_temp_oid;
wr->discard_temp_oid = discard_temp_oid;
ceph_tid_t tid,
osd_reqid_t reqid,
eversion_t pg_trim_to,
- eversion_t min_last_complete_ondisk,
+ eversion_t pg_committed_to,
hobject_t new_temp_oid,
hobject_t discard_temp_oid,
const vector<pg_log_entry_t> &log_entries,
tid,
reqid,
pg_trim_to,
- min_last_complete_ondisk,
+ pg_committed_to,
new_temp_oid,
discard_temp_oid,
logs,
m->updated_hit_set_history,
m->pg_trim_to,
m->version, /* Replicated PGs don't have rollback info */
- m->min_last_complete_ondisk,
+ m->pg_committed_to,
update_snaps,
rm->localt,
async);
const eversion_t &at_version,
PGTransactionUPtr &&t,
const eversion_t &trim_to,
- const eversion_t &min_last_complete_ondisk,
+ const eversion_t &pg_committed_to,
std::vector<pg_log_entry_t>&& log_entries,
std::optional<pg_hit_set_history_t> &hset_history,
Context *on_all_commit,
ceph_tid_t tid,
osd_reqid_t reqid,
eversion_t pg_trim_to,
- eversion_t min_last_complete_ondisk,
+ eversion_t pg_committed_to,
hobject_t new_temp_oid,
hobject_t discard_temp_oid,
const ceph::buffer::list &log_entries,
ceph_tid_t tid,
osd_reqid_t reqid,
eversion_t pg_trim_to,
- eversion_t min_last_complete_ondisk,
+ eversion_t pg_committed_to,
hobject_t new_temp_oid,
hobject_t discard_temp_oid,
const std::vector<pg_log_entry_t> &log_entries,