From: Greg Farnum Date: Fri, 7 Apr 2017 22:41:18 +0000 (-0700) Subject: osd/PrimaryLogPG: limit the number of concurrently trimming pgs X-Git-Tag: v11.2.1~21^2~10 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=995ef4a6b01635ef09532487f7a4ea5fc90e721d;p=ceph.git osd/PrimaryLogPG: limit the number of concurrently trimming pgs This patch introduces an AsyncReserver for snap trimming to limit the number of pgs on any single OSD which can be trimming, as with backfill. Unlike backfill, we don't take remote reservations on the assumption that the set of pgs with trimming work to do is already well distributed, so it doesn't seem worth the implementation overhead to get reservations from the peers as well. Signed-off-by: Samuel Just (cherry picked from commit 21cc515adfb225ba70f1d80b1b76f0345c214c22) Conflicts: src/osd/PrimaryLogPG.cc src/osd/PrimaryLogPG.h Signed-off-by: Greg Farnum --- diff --git a/src/common/config_opts.h b/src/common/config_opts.h index 673da99a8acf0..1a851c5f0f489 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -761,6 +761,8 @@ OPTION(osd_heartbeat_use_min_delay_socket, OPT_BOOL, false) // prio the heartbea // max number of parallel snap trims/pg OPTION(osd_pg_max_concurrent_snap_trims, OPT_U64, 2) +// max number of trimming pgs +OPTION(osd_max_trimming_pgs, OPT_U64, 2) // minimum number of peers that must be reachable to mark ourselves // back up after being wrongly marked down. diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 134b141f54200..d11099943a4b9 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -257,6 +257,8 @@ OSDService::OSDService(OSD *osd) : remote_reserver(&reserver_finisher, cct->_conf->osd_max_backfills, cct->_conf->osd_min_recovery_priority), pg_temp_lock("OSDService::pg_temp_lock"), + snap_reserver(&reserver_finisher, + cct->_conf->osd_max_trimming_pgs), recovery_lock("OSDService::recovery_lock"), recovery_ops_active(0), recovery_ops_reserved(0), @@ -9200,6 +9202,9 @@ void OSD::handle_conf_change(const struct md_config_t *conf, service.local_reserver.set_min_priority(cct->_conf->osd_min_recovery_priority); service.remote_reserver.set_min_priority(cct->_conf->osd_min_recovery_priority); } + if (changed.count("osd_max_trimming_pgs")) { + service.snap_reserver.set_max(cct->_conf->osd_max_trimming_pgs); + } if (changed.count("osd_op_complaint_time") || changed.count("osd_op_log_threshold")) { op_tracker.set_complaint_and_threshold(cct->_conf->osd_op_complaint_time, diff --git a/src/osd/OSD.h b/src/osd/OSD.h index d5c19ff3b4574..08b4edd8b1b0a 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -889,7 +889,10 @@ public: void send_pg_temp(); void queue_for_peering(PG *pg); + + AsyncReserver snap_reserver; void queue_for_snap_trim(PG *pg); + void queue_for_scrub(PG *pg) { op_wq.queue( make_pair( diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index 57d09420743d9..b9ae9e6493023 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -13228,11 +13228,6 @@ void PrimaryLogPG::_scrub_finish() #undef dout_prefix #define dout_prefix *_dout << pg->gen_prefix() -PrimaryLogPG::SnapTrimmer::~SnapTrimmer() -{ - in_flight.clear(); -} - void PrimaryLogPG::SnapTrimmer::log_enter(const char *state_name) { dout(20) << "enter " << state_name << dendl; @@ -13266,24 +13261,40 @@ boost::statechart::result PrimaryLogPG::NotTrimming::react(const KickTrim&) PrimaryLogPG *pg = context< SnapTrimmer >().pg; dout(10) << "NotTrimming react KickTrim" << dendl; - assert(pg->is_primary() && pg->is_active()); + if (!(pg->is_primary() && pg->is_active())) { + ldout(pg->cct, 10) << "NotTrimming not primary or active" << dendl; + return discard_event(); + } if (!pg->is_clean() || pg->snap_trimq.empty()) { dout(10) << "NotTrimming not clean or nothing to trim" << dendl; return discard_event(); } - if (pg->scrubber.active) { dout(10) << " scrubbing, will requeue snap_trimmer after" << dendl; pg->scrubber.queue_snap_trim = true; return transit< WaitScrub >(); } else { - context().snap_to_trim = pg->snap_trimq.range_start(); - dout(10) << "NotTrimming: trimming " - << pg->snap_trimq.range_start() - << dendl; - return transit< AwaitAsyncWork >(); + return transit< Trimming >(); + } +} + +boost::statechart::result PrimaryLogPG::WaitReservation::react(const SnapTrimReserved&) +{ + PrimaryLogPG *pg = context< SnapTrimmer >().pg; + ldout(pg->cct, 10) << "WaitReservation react SnapTrimReserved" << dendl; + + pending = nullptr; + if (!context< SnapTrimmer >().can_trim()) { + post_event(KickTrim()); + return transit< NotTrimming >(); } + + context().snap_to_trim = pg->snap_trimq.range_start(); + ldout(pg->cct, 10) << "NotTrimming: trimming " + << pg->snap_trimq.range_start() + << dendl; + return transit< AwaitAsyncWork >(); } /* AwaitAsyncWork */ @@ -13296,23 +13307,18 @@ PrimaryLogPG::AwaitAsyncWork::AwaitAsyncWork(my_context ctx) context< SnapTrimmer >().pg); } -void PrimaryLogPG::AwaitAsyncWork::exit() -{ - context< SnapTrimmer >().log_exit(state_name, enter_time); -} - boost::statechart::result PrimaryLogPG::AwaitAsyncWork::react(const DoSnapWork&) { dout(10) << "AwaitAsyncWork react" << dendl; PrimaryLogPGRef pg = context< SnapTrimmer >().pg; - snapid_t snap_to_trim = context().snap_to_trim; - auto &in_flight = context().in_flight; + + snapid_t snap_to_trim = context().snap_to_trim; + auto &in_flight = context().in_flight; assert(in_flight.empty()); assert(pg->is_primary() && pg->is_active()); - if (!pg->is_clean() || - pg->scrubber.active) { - dout(10) << "something changed, reverting to NotTrimming" << dendl; + if (!context< SnapTrimmer >().can_trim()) { + ldout(pg->cct, 10) << "something changed, reverting to NotTrimming" << dendl; post_event(KickTrim()); return transit< NotTrimming >(); } diff --git a/src/osd/PrimaryLogPG.h b/src/osd/PrimaryLogPG.h index 019e871e12d7e..fad46e3d6718f 100644 --- a/src/osd/PrimaryLogPG.h +++ b/src/osd/PrimaryLogPG.h @@ -1421,7 +1421,7 @@ public: PG::_init(*t, child, pool); } private: - struct NotTrimming; + struct DoSnapWork : boost::statechart::event< DoSnapWork > { DoSnapWork() : boost::statechart::event < DoSnapWork >() {} }; @@ -1440,106 +1440,175 @@ private: struct Reset : boost::statechart::event< Reset > { Reset() : boost::statechart::event< Reset >() {} }; + struct SnapTrimReserved : boost::statechart::event< SnapTrimReserved > { + SnapTrimReserved() : boost::statechart::event< SnapTrimReserved >() {} + }; + + struct NotTrimming; struct SnapTrimmer : public boost::statechart::state_machine< SnapTrimmer, NotTrimming > { PrimaryLogPG *pg; - set in_flight; - snapid_t snap_to_trim; explicit SnapTrimmer(PrimaryLogPG *pg) : pg(pg) {} - ~SnapTrimmer(); void log_enter(const char *state_name); void log_exit(const char *state_name, utime_t duration); + bool can_trim() { + return pg->is_clean() && !pg->scrubber.active && !pg->snap_trimq.empty(); + } } snap_trimmer_machine; - /* SnapTrimmerStates */ - struct AwaitAsyncWork : boost::statechart::state< AwaitAsyncWork, SnapTrimmer >, NamedState { + struct WaitReservation; + struct Trimming : boost::statechart::state< Trimming, SnapTrimmer, WaitReservation >, NamedState { typedef boost::mpl::list < - boost::statechart::custom_reaction< DoSnapWork >, boost::statechart::custom_reaction< KickTrim >, boost::statechart::transition< Reset, NotTrimming > > reactions; - explicit AwaitAsyncWork(my_context ctx); - void exit(); - boost::statechart::result react(const DoSnapWork&); + + set in_flight; + snapid_t snap_to_trim; + + explicit Trimming(my_context ctx) + : my_base(ctx), + NamedState(context< SnapTrimmer >().pg->cct, "Trimming") { + context< SnapTrimmer >().log_enter(state_name); + assert(context< SnapTrimmer >().can_trim()); + assert(in_flight.empty()); + } + void exit() { + context< SnapTrimmer >().log_exit(state_name, enter_time); + auto *pg = context< SnapTrimmer >().pg; + pg->osd->snap_reserver.cancel_reservation(pg->get_pgid()); + } boost::statechart::result react(const KickTrim&) { return discard_event(); } }; - struct WaitRWLock : boost::statechart::state< WaitRWLock, SnapTrimmer >, NamedState { + /* SnapTrimmerStates */ + struct WaitRWLock : boost::statechart::state< WaitRWLock, Trimming >, NamedState { typedef boost::mpl::list < - boost::statechart::custom_reaction< TrimWriteUnblocked >, - boost::statechart::custom_reaction< KickTrim >, - boost::statechart::transition< Reset, NotTrimming > + boost::statechart::custom_reaction< TrimWriteUnblocked > > reactions; explicit WaitRWLock(my_context ctx) : my_base(ctx), NamedState(context< SnapTrimmer >().pg->cct, "Trimming/WaitRWLock") { context< SnapTrimmer >().log_enter(state_name); - assert(context().in_flight.empty()); + assert(context().in_flight.empty()); } void exit() { context< SnapTrimmer >().log_exit(state_name, enter_time); } boost::statechart::result react(const TrimWriteUnblocked&) { - post_event(KickTrim()); - return discard_event(); - } - boost::statechart::result react(const KickTrim&) { - return discard_event(); + if (!context< SnapTrimmer >().can_trim()) { + post_event(KickTrim()); + return transit< NotTrimming >(); + } else { + return transit< AwaitAsyncWork >(); + } } }; - struct WaitScrub : boost::statechart::state< WaitScrub, SnapTrimmer >, NamedState { + struct WaitRepops : boost::statechart::state< WaitRepops, Trimming >, NamedState { typedef boost::mpl::list < - boost::statechart::custom_reaction< ScrubComplete >, - boost::statechart::custom_reaction< KickTrim >, - boost::statechart::transition< Reset, NotTrimming > + boost::statechart::custom_reaction< RepopsComplete > > reactions; - explicit WaitScrub(my_context ctx) + explicit WaitRepops(my_context ctx) : my_base(ctx), - NamedState(context< SnapTrimmer >().pg->cct, "Trimming/WaitScrub") { + NamedState(context< SnapTrimmer >().pg->cct, "Trimming/WaitRepops") { context< SnapTrimmer >().log_enter(state_name); - assert(context().in_flight.empty()); + assert(!context().in_flight.empty()); } void exit() { context< SnapTrimmer >().log_exit(state_name, enter_time); } - boost::statechart::result react(const ScrubComplete&) { - post_event(KickTrim()); - return transit< NotTrimming >(); + boost::statechart::result react(const RepopsComplete&) { + if (!context< SnapTrimmer >().can_trim()) { + post_event(KickTrim()); + return transit< NotTrimming >(); + } else { + return transit< AwaitAsyncWork >(); + } + } + }; + + struct AwaitAsyncWork : boost::statechart::state< AwaitAsyncWork, Trimming >, NamedState { + typedef boost::mpl::list < + boost::statechart::custom_reaction< DoSnapWork > + > reactions; + explicit AwaitAsyncWork(my_context ctx); + void exit() { + context< SnapTrimmer >().log_exit(state_name, enter_time); + } + boost::statechart::result react(const DoSnapWork&); + }; + + struct WaitReservation : boost::statechart::state< WaitReservation, Trimming >, NamedState { + /* WaitReservation is a sub-state of trimming simply so that exiting Trimming + * always cancels the reservation */ + typedef boost::mpl::list < + boost::statechart::custom_reaction< SnapTrimReserved > + > reactions; + struct ReservationCB : public Context { + PrimaryLogPGRef pg; + bool canceled; + ReservationCB(PrimaryLogPG *pg) : pg(pg), canceled(false) {} + void finish(int) override { + pg->lock(); + if (!canceled) + pg->snap_trimmer_machine.process_event(SnapTrimReserved()); + pg->unlock(); + } + void cancel() { + assert(pg->is_locked()); + assert(!canceled); + canceled = true; + } + }; + ReservationCB *pending = nullptr; + + explicit WaitReservation(my_context ctx) + : my_base(ctx), + NamedState(context< SnapTrimmer >().pg->cct, "Trimming/WaitReservation") { + context< SnapTrimmer >().log_enter(state_name); + assert(context().in_flight.empty()); + auto *pg = context< SnapTrimmer >().pg; + pending = new ReservationCB(pg); + pg->osd->snap_reserver.request_reservation( + pg->get_pgid(), + pending, + 0); + } + boost::statechart::result react(const SnapTrimReserved&); + void exit() { + context< SnapTrimmer >().log_exit(state_name, enter_time); + if (pending) + pending->cancel(); + pending = nullptr; } boost::statechart::result react(const KickTrim&) { return discard_event(); } }; - struct WaitRepops : boost::statechart::state< WaitRepops, SnapTrimmer >, NamedState { + struct WaitScrub : boost::statechart::state< WaitScrub, SnapTrimmer >, NamedState { typedef boost::mpl::list < - boost::statechart::custom_reaction< RepopsComplete >, + boost::statechart::custom_reaction< ScrubComplete >, boost::statechart::custom_reaction< KickTrim >, - boost::statechart::custom_reaction< Reset > + boost::statechart::transition< Reset, NotTrimming > > reactions; - explicit WaitRepops(my_context ctx) + explicit WaitScrub(my_context ctx) : my_base(ctx), - NamedState(context< SnapTrimmer >().pg->cct, "Trimming/WaitRepops") { + NamedState(context< SnapTrimmer >().pg->cct, "Trimming/WaitScrub") { context< SnapTrimmer >().log_enter(state_name); - assert(!context().in_flight.empty()); } void exit() { context< SnapTrimmer >().log_exit(state_name, enter_time); - assert(context().in_flight.empty()); } - boost::statechart::result react(const RepopsComplete&) { + boost::statechart::result react(const ScrubComplete&) { post_event(KickTrim()); return transit< NotTrimming >(); } boost::statechart::result react(const KickTrim&) { return discard_event(); } - boost::statechart::result react(const Reset&) { - context().in_flight.clear(); - return transit< NotTrimming>(); - } }; struct NotTrimming : boost::statechart::state< NotTrimming, SnapTrimmer >, NamedState {