From: Sage Weil Date: Fri, 15 Sep 2017 20:03:38 +0000 (-0400) Subject: osd: fix waiting_for_peered vs flushing X-Git-Tag: v12.2.3~188^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=acccae56c4b67bbb0a788e8ca7b248e9b8826285;p=ceph.git osd: fix waiting_for_peered vs flushing on_flush() requeues waiting_for_peered, but we flush twice on the primary during peering, and we don't want to requeue the first one (when we have the master pg log merged). Fix by moving waiting_for_peered to waiting_for_flush if we aren't already flush on _activate_committed. If we get an op and are peered but not flushed, queue ops there. (We can simplify this check a bit since pgbackend inactive message handling doesn't care about flushed or not flushed.) When flushed, we requeue waiting_for_flush. The waiting_for_flush, waiting_for_peered, and waiting_for_active lists are all mutually exclusive, so this mostly serves to clarify what we are waiting for (not to keep items separate). And it means that on_flushed() will only requeue things that were waiting for it specifically. Fixes: http://tracker.ceph.com/issues/21407 Signed-off-by: Sage Weil (cherry picked from commit 8f7dc8b0d8135b20322c09a5909ad43d4c83b0ea) --- diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 139a9ac6d179..e7e098f583d5 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -1932,6 +1932,12 @@ void PG::_activate_committed(epoch_t epoch, epoch_t activation_epoch) // waiters if (flushes_in_progress == 0) { requeue_ops(waiting_for_peered); + } else if (!waiting_for_peered.empty()) { + dout(10) << __func__ << " flushes in progress, moving " + << waiting_for_peered.size() << " items to waiting_for_flush" + << dendl; + assert(waiting_for_flush.empty()); + waiting_for_flush.swap(waiting_for_peered); } } @@ -7396,6 +7402,13 @@ boost::statechart::result PG::RecoveryState::Active::react(const AllReplicasActi // waiters if (pg->flushes_in_progress == 0) { pg->requeue_ops(pg->waiting_for_peered); + } else if (!pg->waiting_for_peered.empty()) { + ldout(pg->cct, 10) << __func__ << " flushes in progress, moving " + << pg->waiting_for_peered.size() + << " items to waiting_for_flush" + << dendl; + assert(pg->waiting_for_flush.empty()); + pg->waiting_for_flush.swap(pg->waiting_for_peered); } pg->on_activate(); diff --git a/src/osd/PG.h b/src/osd/PG.h index fd5f36aec066..bc3135504ba1 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -871,6 +871,9 @@ protected: * - waiting_for_active * - !is_active() * - only starts blocking on interval change; never restarts + * - waiting_for_flush + * - is_active() and flushes_in_progress + * - waiting for final flush during activate * - waiting_for_scrub * - starts and stops blocking for varying intervals during scrub * - waiting_for_unreadable_object @@ -913,6 +916,7 @@ protected: // ops waiting on active (require peered as well) list waiting_for_active; + list waiting_for_flush; list waiting_for_scrub; list waiting_for_cache_not_full; diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index 3dd4eff09893..bee814ddea08 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -1701,15 +1701,6 @@ void PrimaryLogPG::do_request( } } - if (flushes_in_progress > 0) { - dout(20) << flushes_in_progress - << " flushes_in_progress pending " - << "waiting for active on " << op << dendl; - waiting_for_peered.push_back(op); - op->mark_delayed("waiting for peered"); - return; - } - if (!is_peered()) { // Delay unless PGBackend says it's ok if (pgbackend->can_handle_while_inactive(op)) { @@ -1723,6 +1714,15 @@ void PrimaryLogPG::do_request( } } + if (flushes_in_progress > 0) { + dout(20) << flushes_in_progress + << " flushes_in_progress pending " + << "waiting for flush on " << op << dendl; + waiting_for_flush.push_back(op); + op->mark_delayed("waiting for flush"); + return; + } + assert(is_peered() && flushes_in_progress == 0); if (pgbackend->handle_message(op)) return; @@ -10904,7 +10904,7 @@ void PrimaryLogPG::on_flushed() assert(flushes_in_progress > 0); flushes_in_progress--; if (flushes_in_progress == 0) { - requeue_ops(waiting_for_peered); + requeue_ops(waiting_for_flush); } if (!is_peered() || !is_primary()) { pair i; @@ -11070,6 +11070,7 @@ void PrimaryLogPG::on_change(ObjectStore::Transaction *t) // requeue everything in the reverse order they should be // reexamined. requeue_ops(waiting_for_peered); + requeue_ops(waiting_for_flush); requeue_ops(waiting_for_active); clear_scrub_reserved(); @@ -13470,6 +13471,7 @@ bool PrimaryLogPG::agent_choose_mode(bool restart, OpRequestRef op) is_active()) { if (op) requeue_op(op); + requeue_ops(waiting_for_flush); requeue_ops(waiting_for_active); requeue_ops(waiting_for_scrub); requeue_ops(waiting_for_cache_not_full);