From: Samuel Just Date: Tue, 10 Dec 2019 02:27:32 +0000 (-0800) Subject: osd/: bounce writes on objects written since mlcod back to primary X-Git-Tag: v15.1.0~411^2~3 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=044fa54515531ac1b7a9bf5d65560e9e30d5b100;p=ceph.git osd/: bounce writes on objects written since mlcod back to primary If there are any writes since mlcod on an object with the same head, our ondisk state may be unstable or may reflect uncommitted state. Bounce back to primary. Signed-off-by: Samuel Just --- diff --git a/src/osd/PGLog.h b/src/osd/PGLog.h index f39dc979c401..2990366f5df8 100644 --- a/src/osd/PGLog.h +++ b/src/osd/PGLog.h @@ -329,6 +329,16 @@ public: return false; } + bool has_write_since(const hobject_t &oid, const eversion_t &bound) const { + for (auto i = log.rbegin(); i != log.rend(); ++i) { + if (i->version <= bound) + return false; + if (i->soid.get_head() == oid.get_head()) + return true; + } + return false; + } + /// get a (bounded) list of recent reqids for the given object void get_object_reqids(const hobject_t& oid, unsigned max, mempool::osd_pglog::vector > *pls, diff --git a/src/osd/PeeringState.h b/src/osd/PeeringState.h index c67a76881fdc..fc2719b02aee 100644 --- a/src/osd/PeeringState.h +++ b/src/osd/PeeringState.h @@ -2199,6 +2199,15 @@ public: bool needs_recovery() const; bool needs_backfill() const; + /** + * Returns whether a particular object can be safely read on this replica + */ + bool can_serve_replica_read(const hobject_t &hoid) { + ceph_assert(!is_primary()); + return !pg_log.get_log().has_write_since( + hoid, get_min_last_complete_ondisk()); + } + /** * Returns whether all peers which might have unfound objects have been * queried or marked lost. diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index 579b023914b2..6e77d927851c 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -2112,6 +2112,19 @@ void PrimaryLogPG::do_op(OpRequestRef& op) return; } + if (!is_primary()) { + if (!recovery_state.can_serve_replica_read(oid)) { + dout(20) << __func__ << ": oid " << oid + << " unstable write on replica, bouncing to primary." + << *m << dendl; + osd->reply_op_error(op, -EAGAIN); + return; + } else { + dout(20) << __func__ << ": serving replica read on oid" << oid + << dendl; + } + } + int r = find_object_context( oid, &obc, can_create, m->has_flag(CEPH_OSD_FLAG_MAP_SNAP_CLONE),