From 8f7dc8b0d8135b20322c09a5909ad43d4c83b0ea Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 15 Sep 2017 16:03:38 -0400 Subject: [PATCH] osd: fix waiting_for_peered vs flushing on_flush() requeues waiting_for_peered, but we flush twice on the primary during peering, and we don't want to requeue the first one (when we have the master pg log merged). Fix by moving waiting_for_peered to waiting_for_flush if we aren't already flush on _activate_committed. If we get an op and are peered but not flushed, queue ops there. (We can simplify this check a bit since pgbackend inactive message handling doesn't care about flushed or not flushed.) When flushed, we requeue waiting_for_flush. The waiting_for_flush, waiting_for_peered, and waiting_for_active lists are all mutually exclusive, so this mostly serves to clarify what we are waiting for (not to keep items separate). And it means that on_flushed() will only requeue things that were waiting for it specifically. Fixes: http://tracker.ceph.com/issues/21407 Signed-off-by: Sage Weil --- src/osd/PG.cc | 13 +++++++++++++ src/osd/PG.h | 4 ++++ src/osd/PrimaryLogPG.cc | 22 ++++++++++++---------- 3 files changed, 29 insertions(+), 10 deletions(-) diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 8e259b5802acf..231990e9aa7a3 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -1919,6 +1919,12 @@ void PG::_activate_committed(epoch_t epoch, epoch_t activation_epoch) // waiters if (flushes_in_progress == 0) { requeue_ops(waiting_for_peered); + } else if (!waiting_for_peered.empty()) { + dout(10) << __func__ << " flushes in progress, moving " + << waiting_for_peered.size() << " items to waiting_for_flush" + << dendl; + assert(waiting_for_flush.empty()); + waiting_for_flush.swap(waiting_for_peered); } } @@ -7174,6 +7180,13 @@ boost::statechart::result PG::RecoveryState::Active::react(const AllReplicasActi // waiters if (pg->flushes_in_progress == 0) { pg->requeue_ops(pg->waiting_for_peered); + } else if (!pg->waiting_for_peered.empty()) { + ldout(pg->cct, 10) << __func__ << " flushes in progress, moving " + << pg->waiting_for_peered.size() + << " items to waiting_for_flush" + << dendl; + assert(pg->waiting_for_flush.empty()); + pg->waiting_for_flush.swap(pg->waiting_for_peered); } pg->on_activate(); diff --git a/src/osd/PG.h b/src/osd/PG.h index 26be311f98728..6757304e4fed5 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -870,6 +870,9 @@ protected: * - waiting_for_active * - !is_active() * - only starts blocking on interval change; never restarts + * - waiting_for_flush + * - is_active() and flushes_in_progress + * - waiting for final flush during activate * - waiting_for_scrub * - starts and stops blocking for varying intervals during scrub * - waiting_for_unreadable_object @@ -912,6 +915,7 @@ protected: // ops waiting on active (require peered as well) list waiting_for_active; + list waiting_for_flush; list waiting_for_scrub; list waiting_for_cache_not_full; diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index 0fbdf7da080b3..15af0e33e440d 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -1678,15 +1678,6 @@ void PrimaryLogPG::do_request( } } - if (flushes_in_progress > 0) { - dout(20) << flushes_in_progress - << " flushes_in_progress pending " - << "waiting for active on " << op << dendl; - waiting_for_peered.push_back(op); - op->mark_delayed("waiting for peered"); - return; - } - if (!is_peered()) { // Delay unless PGBackend says it's ok if (pgbackend->can_handle_while_inactive(op)) { @@ -1700,6 +1691,15 @@ void PrimaryLogPG::do_request( } } + if (flushes_in_progress > 0) { + dout(20) << flushes_in_progress + << " flushes_in_progress pending " + << "waiting for flush on " << op << dendl; + waiting_for_flush.push_back(op); + op->mark_delayed("waiting for flush"); + return; + } + assert(is_peered() && flushes_in_progress == 0); if (pgbackend->handle_message(op)) return; @@ -10652,7 +10652,7 @@ void PrimaryLogPG::on_flushed() assert(flushes_in_progress > 0); flushes_in_progress--; if (flushes_in_progress == 0) { - requeue_ops(waiting_for_peered); + requeue_ops(waiting_for_flush); } if (!is_peered() || !is_primary()) { pair i; @@ -10817,6 +10817,7 @@ void PrimaryLogPG::on_change(ObjectStore::Transaction *t) // requeue everything in the reverse order they should be // reexamined. requeue_ops(waiting_for_peered); + requeue_ops(waiting_for_flush); requeue_ops(waiting_for_active); clear_scrub_reserved(); @@ -13154,6 +13155,7 @@ bool PrimaryLogPG::agent_choose_mode(bool restart, OpRequestRef op) is_active()) { if (op) requeue_op(op); + requeue_ops(waiting_for_flush); requeue_ops(waiting_for_active); requeue_ops(waiting_for_scrub); requeue_ops(waiting_for_cache_not_full); -- 2.39.5