]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/PrimaryLogPG: don't populate watchers if replica 35478/head
authorIlya Dryomov <idryomov@gmail.com>
Mon, 8 Jun 2020 10:57:18 +0000 (10:57 +0000)
committerIlya Dryomov <idryomov@gmail.com>
Mon, 8 Jun 2020 10:57:18 +0000 (10:57 +0000)
If an object has an established watch, serving a read from replica
eventually leads to an assert in is_degraded_or_backfilling_object()
called from handle_watch_timeout().

The issue is that after can_serve_replica_read() check is satisfied,
we look up the object context.  If not found, we fetch the object info
and then call populate_obc_watchers() which sees the recorded watch and
treats it as unconnected, arming HandleWatchTimeout on replica.

Fixes: https://tracker.ceph.com/issues/45795
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
src/osd/PrimaryLogPG.cc

index 23197dd8eed88a20b7439c371304bc8d4f187a4a..a15941d381661c9956f2ab249f8623c4a27e91c5 100644 (file)
@@ -10878,7 +10878,7 @@ void PrimaryLogPG::check_blacklisted_obc_watchers(ObjectContextRef obc)
 
 void PrimaryLogPG::populate_obc_watchers(ObjectContextRef obc)
 {
-  ceph_assert(is_active());
+  ceph_assert(is_primary() && is_active());
   auto it_objects = recovery_state.get_pg_log().get_log().objects.find(obc->obs.oi.soid);
   ceph_assert((recovering.count(obc->obs.oi.soid) ||
          !is_missing_object(obc->obs.oi.soid)) ||
@@ -11072,7 +11072,7 @@ ObjectContextRef PrimaryLogPG::get_object_context(
       soid, true,
       soid.has_snapset() ? attrs : 0);
 
-    if (is_active())
+    if (is_primary() && is_active())
       populate_obc_watchers(obc);
 
     if (pool.info.is_erasure()) {