]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/PrimaryLogPG: don't populate watchers if replica 36029/head
authorIlya Dryomov <idryomov@gmail.com>
Mon, 8 Jun 2020 10:57:18 +0000 (10:57 +0000)
committerNathan Cutler <ncutler@suse.com>
Fri, 10 Jul 2020 21:23:03 +0000 (23:23 +0200)
If an object has an established watch, serving a read from replica
eventually leads to an assert in is_degraded_or_backfilling_object()
called from handle_watch_timeout().

The issue is that after can_serve_replica_read() check is satisfied,
we look up the object context.  If not found, we fetch the object info
and then call populate_obc_watchers() which sees the recorded watch and
treats it as unconnected, arming HandleWatchTimeout on replica.

Fixes: https://tracker.ceph.com/issues/45795
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
(cherry picked from commit 1652b321da73b45cd33d7a03a2fef47e5708e37e)

src/osd/PrimaryLogPG.cc

index a16ba09eb22d18d63f79b67856d52937c116c071..f02e12636e4088f2af64dc2df18367bf320630a2 100644 (file)
@@ -10868,7 +10868,7 @@ void PrimaryLogPG::check_blacklisted_obc_watchers(ObjectContextRef obc)
 
 void PrimaryLogPG::populate_obc_watchers(ObjectContextRef obc)
 {
-  ceph_assert(is_active());
+  ceph_assert(is_primary() && is_active());
   auto it_objects = recovery_state.get_pg_log().get_log().objects.find(obc->obs.oi.soid);
   ceph_assert((recovering.count(obc->obs.oi.soid) ||
          !is_missing_object(obc->obs.oi.soid)) ||
@@ -11062,7 +11062,7 @@ ObjectContextRef PrimaryLogPG::get_object_context(
       soid, true,
       soid.has_snapset() ? attrs : 0);
 
-    if (is_active())
+    if (is_primary() && is_active())
       populate_obc_watchers(obc);
 
     if (pool.info.is_erasure()) {