From 044fa54515531ac1b7a9bf5d65560e9e30d5b100 Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Mon, 9 Dec 2019 18:27:32 -0800 Subject: [PATCH] osd/: bounce writes on objects written since mlcod back to primary If there are any writes since mlcod on an object with the same head, our ondisk state may be unstable or may reflect uncommitted state. Bounce back to primary. Signed-off-by: Samuel Just --- src/osd/PGLog.h | 10 ++++++++++ src/osd/PeeringState.h | 9 +++++++++ src/osd/PrimaryLogPG.cc | 13 +++++++++++++ 3 files changed, 32 insertions(+) diff --git a/src/osd/PGLog.h b/src/osd/PGLog.h index f39dc979c40..2990366f5df 100644 --- a/src/osd/PGLog.h +++ b/src/osd/PGLog.h @@ -329,6 +329,16 @@ public: return false; } + bool has_write_since(const hobject_t &oid, const eversion_t &bound) const { + for (auto i = log.rbegin(); i != log.rend(); ++i) { + if (i->version <= bound) + return false; + if (i->soid.get_head() == oid.get_head()) + return true; + } + return false; + } + /// get a (bounded) list of recent reqids for the given object void get_object_reqids(const hobject_t& oid, unsigned max, mempool::osd_pglog::vector > *pls, diff --git a/src/osd/PeeringState.h b/src/osd/PeeringState.h index c67a76881fd..fc2719b02ae 100644 --- a/src/osd/PeeringState.h +++ b/src/osd/PeeringState.h @@ -2199,6 +2199,15 @@ public: bool needs_recovery() const; bool needs_backfill() const; + /** + * Returns whether a particular object can be safely read on this replica + */ + bool can_serve_replica_read(const hobject_t &hoid) { + ceph_assert(!is_primary()); + return !pg_log.get_log().has_write_since( + hoid, get_min_last_complete_ondisk()); + } + /** * Returns whether all peers which might have unfound objects have been * queried or marked lost. diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index 579b023914b..6e77d927851 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -2112,6 +2112,19 @@ void PrimaryLogPG::do_op(OpRequestRef& op) return; } + if (!is_primary()) { + if (!recovery_state.can_serve_replica_read(oid)) { + dout(20) << __func__ << ": oid " << oid + << " unstable write on replica, bouncing to primary." + << *m << dendl; + osd->reply_op_error(op, -EAGAIN); + return; + } else { + dout(20) << __func__ << ": serving replica read on oid" << oid + << dendl; + } + } + int r = find_object_context( oid, &obc, can_create, m->has_flag(CEPH_OSD_FLAG_MAP_SNAP_CLONE), -- 2.39.5