]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/: bounce writes on objects written since mlcod back to primary
authorSamuel Just <sjust@redhat.com>
Tue, 10 Dec 2019 02:27:32 +0000 (18:27 -0800)
committerSamuel Just <sjust@redhat.com>
Fri, 20 Dec 2019 01:35:36 +0000 (17:35 -0800)
If there are any writes since mlcod on an object with the same head,
our ondisk state may be unstable or may reflect uncommitted state.
Bounce back to primary.

Signed-off-by: Samuel Just <sjust@redhat.com>
src/osd/PGLog.h
src/osd/PeeringState.h
src/osd/PrimaryLogPG.cc

index f39dc979c4017335c5a640e53472c89e681f7fa1..2990366f5df88fd2b488347fd9248ec4c13ce36f 100644 (file)
@@ -329,6 +329,16 @@ public:
       return false;
     }
 
+    bool has_write_since(const hobject_t &oid, const eversion_t &bound) const {
+      for (auto i = log.rbegin(); i != log.rend(); ++i) {
+       if (i->version <= bound)
+         return false;
+       if (i->soid.get_head() == oid.get_head())
+         return true;
+      }
+      return false;
+    }
+
     /// get a (bounded) list of recent reqids for the given object
     void get_object_reqids(const hobject_t& oid, unsigned max,
                           mempool::osd_pglog::vector<pair<osd_reqid_t, version_t> > *pls,
index c67a76881fdc416dd06d7052f2bb225920c62141..fc2719b02aee8652e3365e537dde28130d020495 100644 (file)
@@ -2199,6 +2199,15 @@ public:
   bool needs_recovery() const;
   bool needs_backfill() const;
 
+  /**
+   * Returns whether a particular object can be safely read on this replica
+   */
+  bool can_serve_replica_read(const hobject_t &hoid) {
+    ceph_assert(!is_primary());
+    return !pg_log.get_log().has_write_since(
+      hoid, get_min_last_complete_ondisk());
+  }
+
   /**
    * Returns whether all peers which might have unfound objects have been
    * queried or marked lost.
index 579b023914b24ed1a86cec4c35e9355f715bc59f..6e77d927851c51edc1a55e3fd9c89d24e5629aa5 100644 (file)
@@ -2112,6 +2112,19 @@ void PrimaryLogPG::do_op(OpRequestRef& op)
     return;
   }
 
+  if (!is_primary()) {
+    if (!recovery_state.can_serve_replica_read(oid)) {
+      dout(20) << __func__ << ": oid " << oid
+              << " unstable write on replica, bouncing to primary."
+              << *m << dendl;
+      osd->reply_op_error(op, -EAGAIN);
+      return;
+    } else {
+      dout(20) << __func__ << ": serving replica read on oid" << oid
+              << dendl;
+    }
+  }
+
   int r = find_object_context(
     oid, &obc, can_create,
     m->has_flag(CEPH_OSD_FLAG_MAP_SNAP_CLONE),