]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: fix waiting_for_peered vs flushing 17759/head
authorSage Weil <sage@redhat.com>
Fri, 15 Sep 2017 20:03:38 +0000 (16:03 -0400)
committerSage Weil <sage@redhat.com>
Mon, 9 Oct 2017 20:27:51 +0000 (15:27 -0500)
on_flush() requeues waiting_for_peered, but we flush twice on the
primary during peering, and we don't want to requeue the first one
(when we have the master pg log merged).

Fix by moving waiting_for_peered to waiting_for_flush if we aren't
already flush on _activate_committed.  If we get an op and are
peered but not flushed, queue ops there.  (We can simplify this
check a bit since pgbackend inactive message handling doesn't care
about flushed or not flushed.)  When flushed, we requeue
waiting_for_flush.

The waiting_for_flush, waiting_for_peered, and waiting_for_active
lists are all mutually exclusive, so this mostly serves to
clarify what we are waiting for (not to keep items separate). And
it means that on_flushed() will only requeue things that were
waiting for it specifically.

Fixes: http://tracker.ceph.com/issues/21407
Signed-off-by: Sage Weil <sage@redhat.com>
src/osd/PG.cc
src/osd/PG.h
src/osd/PrimaryLogPG.cc

index 8e259b5802acfbb3e1eb0a15c44b28afab7542dc..231990e9aa7a3592a792e2be0fb6b4d2b739aa32 100644 (file)
@@ -1919,6 +1919,12 @@ void PG::_activate_committed(epoch_t epoch, epoch_t activation_epoch)
     // waiters
     if (flushes_in_progress == 0) {
       requeue_ops(waiting_for_peered);
+    } else if (!waiting_for_peered.empty()) {
+      dout(10) << __func__ << " flushes in progress, moving "
+              << waiting_for_peered.size() << " items to waiting_for_flush"
+              << dendl;
+      assert(waiting_for_flush.empty());
+      waiting_for_flush.swap(waiting_for_peered);
     }
   }
 
@@ -7174,6 +7180,13 @@ boost::statechart::result PG::RecoveryState::Active::react(const AllReplicasActi
   // waiters
   if (pg->flushes_in_progress == 0) {
     pg->requeue_ops(pg->waiting_for_peered);
+  } else if (!pg->waiting_for_peered.empty()) {
+    ldout(pg->cct, 10) << __func__ << " flushes in progress, moving "
+                      << pg->waiting_for_peered.size()
+                      << " items to waiting_for_flush"
+                      << dendl;
+    assert(pg->waiting_for_flush.empty());
+    pg->waiting_for_flush.swap(pg->waiting_for_peered);
   }
 
   pg->on_activate();
index 26be311f9872822e4fa4cd9cb2d04e803d18538a..6757304e4fed526829edb68207472a0ae9a408a8 100644 (file)
@@ -870,6 +870,9 @@ protected:
    *  - waiting_for_active
    *    - !is_active()
    *    - only starts blocking on interval change; never restarts
+   *  - waiting_for_flush
+   *    - is_active() and flushes_in_progress
+   *    - waiting for final flush during activate
    *  - waiting_for_scrub
    *    - starts and stops blocking for varying intervals during scrub
    *  - waiting_for_unreadable_object
@@ -912,6 +915,7 @@ protected:
 
   // ops waiting on active (require peered as well)
   list<OpRequestRef>            waiting_for_active;
+  list<OpRequestRef>            waiting_for_flush;
   list<OpRequestRef>            waiting_for_scrub;
 
   list<OpRequestRef>            waiting_for_cache_not_full;
index 0fbdf7da080b37071a8ad7f6443ed4e8bad79dc9..15af0e33e440d2c3a85743ec02a1821a52c723e3 100644 (file)
@@ -1678,15 +1678,6 @@ void PrimaryLogPG::do_request(
     }
   }
 
-  if (flushes_in_progress > 0) {
-    dout(20) << flushes_in_progress
-            << " flushes_in_progress pending "
-            << "waiting for active on " << op << dendl;
-    waiting_for_peered.push_back(op);
-    op->mark_delayed("waiting for peered");
-    return;
-  }
-
   if (!is_peered()) {
     // Delay unless PGBackend says it's ok
     if (pgbackend->can_handle_while_inactive(op)) {
@@ -1700,6 +1691,15 @@ void PrimaryLogPG::do_request(
     }
   }
 
+  if (flushes_in_progress > 0) {
+    dout(20) << flushes_in_progress
+            << " flushes_in_progress pending "
+            << "waiting for flush on " << op << dendl;
+    waiting_for_flush.push_back(op);
+    op->mark_delayed("waiting for flush");
+    return;
+  }
+
   assert(is_peered() && flushes_in_progress == 0);
   if (pgbackend->handle_message(op))
     return;
@@ -10652,7 +10652,7 @@ void PrimaryLogPG::on_flushed()
   assert(flushes_in_progress > 0);
   flushes_in_progress--;
   if (flushes_in_progress == 0) {
-    requeue_ops(waiting_for_peered);
+    requeue_ops(waiting_for_flush);
   }
   if (!is_peered() || !is_primary()) {
     pair<hobject_t, ObjectContextRef> i;
@@ -10817,6 +10817,7 @@ void PrimaryLogPG::on_change(ObjectStore::Transaction *t)
   // requeue everything in the reverse order they should be
   // reexamined.
   requeue_ops(waiting_for_peered);
+  requeue_ops(waiting_for_flush);
   requeue_ops(waiting_for_active);
 
   clear_scrub_reserved();
@@ -13154,6 +13155,7 @@ bool PrimaryLogPG::agent_choose_mode(bool restart, OpRequestRef op)
        is_active()) {
       if (op)
        requeue_op(op);
+      requeue_ops(waiting_for_flush);
       requeue_ops(waiting_for_active);
       requeue_ops(waiting_for_scrub);
       requeue_ops(waiting_for_cache_not_full);