From 9c4d780c5d3d856393b3225a38d105663a2c4af6 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 9 Jul 2019 10:36:49 -0500 Subject: [PATCH] osd/PrimaryLogPG: put ops on waitlist when PG is laggy Move PG to laggy state if we aren't readable Signed-off-by: Sage Weil --- src/osd/PG.h | 7 ++++++ src/osd/PrimaryLogPG.cc | 49 +++++++++++++++++++++++++++++++++++++++++ src/osd/PrimaryLogPG.h | 12 ++++++++++ 3 files changed, 68 insertions(+) diff --git a/src/osd/PG.h b/src/osd/PG.h index 43a3a750237..a35ce49215d 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -911,6 +911,9 @@ protected: * - waiting_for_active * - !is_active() * - only starts blocking on interval change; never restarts + * - waiting_for_readable + * - now > readable_until + * - unblocks when we get fresh(er) osd_pings * - waiting_for_scrub * - starts and stops blocking for varying intervals during scrub * - waiting_for_unreadable_object @@ -948,6 +951,9 @@ protected: // ops waiting on peered list waiting_for_peered; + /// ops waiting on readble + list waiting_for_readable; + // ops waiting on active (require peered as well) list waiting_for_active; list waiting_for_flush; @@ -1403,6 +1409,7 @@ protected: bool is_recovering() const { return recovery_state.is_recovering(); } bool is_premerge() const { return recovery_state.is_premerge(); } bool is_repair() const { return recovery_state.is_repair(); } + bool is_laggy() const { return state_test(PG_STATE_LAGGY); } bool is_empty() const { return recovery_state.is_empty(); } diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index 3f4222277c7..a048b43588a 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -752,6 +752,40 @@ void PrimaryLogPG::maybe_force_recovery() maybe_kick_recovery(soid); } +bool PrimaryLogPG::check_laggy(OpRequestRef& op) +{ + if (!state_test(PG_STATE_LAGGY)) { + auto mnow = osd->get_mnow(); + auto ru = recovery_state.get_readable_until(); + if (mnow <= ru) { + // not laggy + return true; + } + dout(10) << __func__ + << " mnow " << mnow + << " > readable_until " << ru << dendl; + + // go to laggy state + state_set(PG_STATE_LAGGY); + publish_stats_to_osd(); + } + dout(10) << __func__ << " not readable" << dendl; + waiting_for_readable.push_back(op); + op->mark_delayed("waiting for readable"); + return false; +} + +bool PrimaryLogPG::check_laggy_requeue(OpRequestRef& op) +{ + if (!state_test(PG_STATE_LAGGY)) { + return true; // not laggy + } + dout(10) << __func__ << " not readable" << dendl; + waiting_for_readable.push_front(op); + op->mark_delayed("waiting for readable"); + return false; +} + bool PrimaryLogPG::pgls_filter(const PGLSFilter& filter, const hobject_t& sobj) { bufferlist bl; @@ -1719,6 +1753,10 @@ void PrimaryLogPG::do_op(OpRequestRef& op) } } + if (!check_laggy(op)) { + return; + } + if (!op_has_sufficient_caps(op)) { osd->reply_op_error(op, -EPERM); return; @@ -1868,6 +1906,9 @@ void PrimaryLogPG::do_op(OpRequestRef& op) op->mark_delayed("waiting for scrub"); return; } + if (!check_laggy_requeue(op)) { + return; + } // blocked on snap? if (auto blocked_iter = objects_blocked_on_degraded_snap.find(head); @@ -2224,6 +2265,9 @@ PrimaryLogPG::cache_result_t PrimaryLogPG::maybe_handle_manifest_detail( op->mark_delayed("waiting for scrub"); return cache_result_t::BLOCKED_RECOVERY; } + if (!check_laggy_requeue(op)) { + return cache_result_t::BLOCKED_RECOVERY; + } for (auto& p : obc->obs.oi.manifest.chunk_map) { if (p.second.is_missing()) { @@ -3577,6 +3621,9 @@ void PrimaryLogPG::promote_object(ObjectContextRef obc, } return; } + if (op && !check_laggy_requeue(op)) { + return; + } if (!obc) { // we need to create an ObjectContext ceph_assert(missing_oid != hobject_t()); obc = get_object_context(missing_oid, true); @@ -11975,6 +12022,7 @@ void PrimaryLogPG::on_change(ObjectStore::Transaction &t) requeue_ops(waiting_for_peered); requeue_ops(waiting_for_flush); requeue_ops(waiting_for_active); + requeue_ops(waiting_for_readable); clear_scrub_reserved(); @@ -14298,6 +14346,7 @@ bool PrimaryLogPG::agent_choose_mode(bool restart, OpRequestRef op) requeue_op(op); requeue_ops(waiting_for_flush); requeue_ops(waiting_for_active); + requeue_ops(waiting_for_readable); requeue_ops(waiting_for_scrub); requeue_ops(waiting_for_cache_not_full); objects_blocked_on_cache_full.clear(); diff --git a/src/osd/PrimaryLogPG.h b/src/osd/PrimaryLogPG.h index c5d05615092..9b68c112352 100644 --- a/src/osd/PrimaryLogPG.h +++ b/src/osd/PrimaryLogPG.h @@ -910,6 +910,15 @@ protected: p.second, p.second.begin(), p.second.end()); + } else if (is_laggy()) { + for (auto& op : p.second) { + op->mark_delayed("waiting for readable"); + } + waiting_for_readable.splice( + waiting_for_readable.begin(), + p.second, + p.second.begin(), + p.second.end()); } else { requeue_ops(p.second); } @@ -1864,6 +1873,9 @@ public: void wait_for_unreadable_object(const hobject_t& oid, OpRequestRef op); void wait_for_all_missing(OpRequestRef op); + bool check_laggy(OpRequestRef& op); + bool check_laggy_requeue(OpRequestRef& op); + bool is_backfill_target(pg_shard_t osd) const { return recovery_state.is_backfill_target(osd); } -- 2.39.5