From: Samuel Just Date: Wed, 1 Feb 2017 20:07:09 +0000 (-0800) Subject: osd/PrimaryLogPG: limit the number of concurrently trimming pgs X-Git-Tag: v12.0.1~273^2~3 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=21cc515adfb225ba70f1d80b1b76f0345c214c22;p=ceph.git osd/PrimaryLogPG: limit the number of concurrently trimming pgs This patch introduces an AsyncReserver for snap trimming to limit the number of pgs on any single OSD which can be trimming, as with backfill. Unlike backfill, we don't take remote reservations on the assumption that the set of pgs with trimming work to do is already well distributed, so it doesn't seem worth the implementation overhead to get reservations from the peers as well. Signed-off-by: Samuel Just --- diff --git a/src/common/config_opts.h b/src/common/config_opts.h index 76c3f0ddd160..16e9804bcf2e 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -774,6 +774,8 @@ OPTION(osd_heartbeat_use_min_delay_socket, OPT_BOOL, false) // prio the heartbea // max number of parallel snap trims/pg OPTION(osd_pg_max_concurrent_snap_trims, OPT_U64, 2) +// max number of trimming pgs +OPTION(osd_max_trimming_pgs, OPT_U64, 2) // minimum number of peers that must be reachable to mark ourselves // back up after being wrongly marked down. diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index c101a2b8d1ca..1d77f5aa6d40 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -261,6 +261,8 @@ OSDService::OSDService(OSD *osd) : remote_reserver(&reserver_finisher, cct->_conf->osd_max_backfills, cct->_conf->osd_min_recovery_priority), pg_temp_lock("OSDService::pg_temp_lock"), + snap_reserver(&reserver_finisher, + cct->_conf->osd_max_trimming_pgs), recovery_lock("OSDService::recovery_lock"), recovery_ops_active(0), recovery_ops_reserved(0), @@ -9296,6 +9298,9 @@ void OSD::handle_conf_change(const struct md_config_t *conf, service.local_reserver.set_min_priority(cct->_conf->osd_min_recovery_priority); service.remote_reserver.set_min_priority(cct->_conf->osd_min_recovery_priority); } + if (changed.count("osd_max_trimming_pgs")) { + service.snap_reserver.set_max(cct->_conf->osd_max_trimming_pgs); + } if (changed.count("osd_op_complaint_time") || changed.count("osd_op_log_threshold")) { op_tracker.set_complaint_and_threshold(cct->_conf->osd_op_complaint_time, diff --git a/src/osd/OSD.h b/src/osd/OSD.h index e2d35f01d32b..c43be7a3662e 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -911,7 +911,10 @@ public: void send_pg_temp(); void queue_for_peering(PG *pg); + + AsyncReserver snap_reserver; void queue_for_snap_trim(PG *pg); + void queue_for_scrub(PG *pg) { op_wq.queue( make_pair( diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index 977a0dbca8d3..c76d2a0669c4 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -13062,11 +13062,6 @@ void PrimaryLogPG::_scrub_finish() #undef dout_prefix #define dout_prefix *_dout << pg->gen_prefix() -PrimaryLogPG::SnapTrimmer::~SnapTrimmer() -{ - in_flight.clear(); -} - void PrimaryLogPG::SnapTrimmer::log_enter(const char *state_name) { ldout(pg->cct, 20) << "enter " << state_name << dendl; @@ -13100,24 +13095,40 @@ boost::statechart::result PrimaryLogPG::NotTrimming::react(const KickTrim&) PrimaryLogPG *pg = context< SnapTrimmer >().pg; ldout(pg->cct, 10) << "NotTrimming react KickTrim" << dendl; - assert(pg->is_primary() && pg->is_active()); + if (!(pg->is_primary() && pg->is_active())) { + ldout(pg->cct, 10) << "NotTrimming not primary or active" << dendl; + return discard_event(); + } if (!pg->is_clean() || pg->snap_trimq.empty()) { ldout(pg->cct, 10) << "NotTrimming not clean or nothing to trim" << dendl; return discard_event(); } - if (pg->scrubber.active) { ldout(pg->cct, 10) << " scrubbing, will requeue snap_trimmer after" << dendl; pg->scrubber.queue_snap_trim = true; return transit< WaitScrub >(); } else { - context().snap_to_trim = pg->snap_trimq.range_start(); - ldout(pg->cct, 10) << "NotTrimming: trimming " - << pg->snap_trimq.range_start() - << dendl; - return transit< AwaitAsyncWork >(); + return transit< Trimming >(); + } +} + +boost::statechart::result PrimaryLogPG::WaitReservation::react(const SnapTrimReserved&) +{ + PrimaryLogPG *pg = context< SnapTrimmer >().pg; + ldout(pg->cct, 10) << "WaitReservation react SnapTrimReserved" << dendl; + + pending = nullptr; + if (!context< SnapTrimmer >().can_trim()) { + post_event(KickTrim()); + return transit< NotTrimming >(); } + + context().snap_to_trim = pg->snap_trimq.range_start(); + ldout(pg->cct, 10) << "NotTrimming: trimming " + << pg->snap_trimq.range_start() + << dendl; + return transit< AwaitAsyncWork >(); } /* AwaitAsyncWork */ @@ -13130,22 +13141,15 @@ PrimaryLogPG::AwaitAsyncWork::AwaitAsyncWork(my_context ctx) context< SnapTrimmer >().pg); } -void PrimaryLogPG::AwaitAsyncWork::exit() -{ - context< SnapTrimmer >().log_exit(state_name, enter_time); -} - boost::statechart::result PrimaryLogPG::AwaitAsyncWork::react(const DoSnapWork&) { PrimaryLogPGRef pg = context< SnapTrimmer >().pg; - ldout(pg->cct, 10) << "AwaitAsyncWork react" << dendl; - snapid_t snap_to_trim = context().snap_to_trim; - auto &in_flight = context().in_flight; + snapid_t snap_to_trim = context().snap_to_trim; + auto &in_flight = context().in_flight; assert(in_flight.empty()); assert(pg->is_primary() && pg->is_active()); - if (!pg->is_clean() || - pg->scrubber.active) { + if (!context< SnapTrimmer >().can_trim()) { ldout(pg->cct, 10) << "something changed, reverting to NotTrimming" << dendl; post_event(KickTrim()); return transit< NotTrimming >(); @@ -13202,8 +13206,7 @@ boost::statechart::result PrimaryLogPG::AwaitAsyncWork::react(const DoSnapWork&) return transit< WaitRWLock >(); } else { - ldout(pg->cct, 10) << "letting the ones we already started finish" - << dendl; + ldout(pg->cct, 10) << "letting the ones we already started finish" << dendl; return transit< WaitRepops >(); } } diff --git a/src/osd/PrimaryLogPG.h b/src/osd/PrimaryLogPG.h index 69aa55d1d5f1..75f40c9f3921 100644 --- a/src/osd/PrimaryLogPG.h +++ b/src/osd/PrimaryLogPG.h @@ -1429,7 +1429,7 @@ public: PG::_init(*t, child, pool); } private: - struct NotTrimming; + struct DoSnapWork : boost::statechart::event< DoSnapWork > { DoSnapWork() : boost::statechart::event < DoSnapWork >() {} }; @@ -1448,106 +1448,175 @@ private: struct Reset : boost::statechart::event< Reset > { Reset() : boost::statechart::event< Reset >() {} }; + struct SnapTrimReserved : boost::statechart::event< SnapTrimReserved > { + SnapTrimReserved() : boost::statechart::event< SnapTrimReserved >() {} + }; + + struct NotTrimming; struct SnapTrimmer : public boost::statechart::state_machine< SnapTrimmer, NotTrimming > { PrimaryLogPG *pg; - set in_flight; - snapid_t snap_to_trim; explicit SnapTrimmer(PrimaryLogPG *pg) : pg(pg) {} - ~SnapTrimmer(); void log_enter(const char *state_name); void log_exit(const char *state_name, utime_t duration); + bool can_trim() { + return pg->is_clean() && !pg->scrubber.active && !pg->snap_trimq.empty(); + } } snap_trimmer_machine; - /* SnapTrimmerStates */ - struct AwaitAsyncWork : boost::statechart::state< AwaitAsyncWork, SnapTrimmer >, NamedState { + struct WaitReservation; + struct Trimming : boost::statechart::state< Trimming, SnapTrimmer, WaitReservation >, NamedState { typedef boost::mpl::list < - boost::statechart::custom_reaction< DoSnapWork >, boost::statechart::custom_reaction< KickTrim >, boost::statechart::transition< Reset, NotTrimming > > reactions; - explicit AwaitAsyncWork(my_context ctx); - void exit(); - boost::statechart::result react(const DoSnapWork&); + + set in_flight; + snapid_t snap_to_trim; + + explicit Trimming(my_context ctx) + : my_base(ctx), + NamedState(context< SnapTrimmer >().pg->cct, "Trimming") { + context< SnapTrimmer >().log_enter(state_name); + assert(context< SnapTrimmer >().can_trim()); + assert(in_flight.empty()); + } + void exit() { + context< SnapTrimmer >().log_exit(state_name, enter_time); + auto *pg = context< SnapTrimmer >().pg; + pg->osd->snap_reserver.cancel_reservation(pg->get_pgid()); + } boost::statechart::result react(const KickTrim&) { return discard_event(); } }; - struct WaitRWLock : boost::statechart::state< WaitRWLock, SnapTrimmer >, NamedState { + /* SnapTrimmerStates */ + struct WaitRWLock : boost::statechart::state< WaitRWLock, Trimming >, NamedState { typedef boost::mpl::list < - boost::statechart::custom_reaction< TrimWriteUnblocked >, - boost::statechart::custom_reaction< KickTrim >, - boost::statechart::transition< Reset, NotTrimming > + boost::statechart::custom_reaction< TrimWriteUnblocked > > reactions; explicit WaitRWLock(my_context ctx) : my_base(ctx), NamedState(context< SnapTrimmer >().pg->cct, "Trimming/WaitRWLock") { context< SnapTrimmer >().log_enter(state_name); - assert(context().in_flight.empty()); + assert(context().in_flight.empty()); } void exit() { context< SnapTrimmer >().log_exit(state_name, enter_time); } boost::statechart::result react(const TrimWriteUnblocked&) { - post_event(KickTrim()); - return discard_event(); - } - boost::statechart::result react(const KickTrim&) { - return discard_event(); + if (!context< SnapTrimmer >().can_trim()) { + post_event(KickTrim()); + return transit< NotTrimming >(); + } else { + return transit< AwaitAsyncWork >(); + } } }; - struct WaitScrub : boost::statechart::state< WaitScrub, SnapTrimmer >, NamedState { + struct WaitRepops : boost::statechart::state< WaitRepops, Trimming >, NamedState { typedef boost::mpl::list < - boost::statechart::custom_reaction< ScrubComplete >, - boost::statechart::custom_reaction< KickTrim >, - boost::statechart::transition< Reset, NotTrimming > + boost::statechart::custom_reaction< RepopsComplete > > reactions; - explicit WaitScrub(my_context ctx) + explicit WaitRepops(my_context ctx) : my_base(ctx), - NamedState(context< SnapTrimmer >().pg->cct, "Trimming/WaitScrub") { + NamedState(context< SnapTrimmer >().pg->cct, "Trimming/WaitRepops") { context< SnapTrimmer >().log_enter(state_name); - assert(context().in_flight.empty()); + assert(!context().in_flight.empty()); } void exit() { context< SnapTrimmer >().log_exit(state_name, enter_time); } - boost::statechart::result react(const ScrubComplete&) { - post_event(KickTrim()); - return transit< NotTrimming >(); + boost::statechart::result react(const RepopsComplete&) { + if (!context< SnapTrimmer >().can_trim()) { + post_event(KickTrim()); + return transit< NotTrimming >(); + } else { + return transit< AwaitAsyncWork >(); + } + } + }; + + struct AwaitAsyncWork : boost::statechart::state< AwaitAsyncWork, Trimming >, NamedState { + typedef boost::mpl::list < + boost::statechart::custom_reaction< DoSnapWork > + > reactions; + explicit AwaitAsyncWork(my_context ctx); + void exit() { + context< SnapTrimmer >().log_exit(state_name, enter_time); + } + boost::statechart::result react(const DoSnapWork&); + }; + + struct WaitReservation : boost::statechart::state< WaitReservation, Trimming >, NamedState { + /* WaitReservation is a sub-state of trimming simply so that exiting Trimming + * always cancels the reservation */ + typedef boost::mpl::list < + boost::statechart::custom_reaction< SnapTrimReserved > + > reactions; + struct ReservationCB : public Context { + PrimaryLogPGRef pg; + bool canceled; + ReservationCB(PrimaryLogPG *pg) : pg(pg), canceled(false) {} + void finish(int) override { + pg->lock(); + if (!canceled) + pg->snap_trimmer_machine.process_event(SnapTrimReserved()); + pg->unlock(); + } + void cancel() { + assert(pg->is_locked()); + assert(!canceled); + canceled = true; + } + }; + ReservationCB *pending = nullptr; + + explicit WaitReservation(my_context ctx) + : my_base(ctx), + NamedState(context< SnapTrimmer >().pg->cct, "Trimming/WaitReservation") { + context< SnapTrimmer >().log_enter(state_name); + assert(context().in_flight.empty()); + auto *pg = context< SnapTrimmer >().pg; + pending = new ReservationCB(pg); + pg->osd->snap_reserver.request_reservation( + pg->get_pgid(), + pending, + 0); + } + boost::statechart::result react(const SnapTrimReserved&); + void exit() { + context< SnapTrimmer >().log_exit(state_name, enter_time); + if (pending) + pending->cancel(); + pending = nullptr; } boost::statechart::result react(const KickTrim&) { return discard_event(); } }; - struct WaitRepops : boost::statechart::state< WaitRepops, SnapTrimmer >, NamedState { + struct WaitScrub : boost::statechart::state< WaitScrub, SnapTrimmer >, NamedState { typedef boost::mpl::list < - boost::statechart::custom_reaction< RepopsComplete >, + boost::statechart::custom_reaction< ScrubComplete >, boost::statechart::custom_reaction< KickTrim >, - boost::statechart::custom_reaction< Reset > + boost::statechart::transition< Reset, NotTrimming > > reactions; - explicit WaitRepops(my_context ctx) + explicit WaitScrub(my_context ctx) : my_base(ctx), - NamedState(context< SnapTrimmer >().pg->cct, "Trimming/WaitRepops") { + NamedState(context< SnapTrimmer >().pg->cct, "Trimming/WaitScrub") { context< SnapTrimmer >().log_enter(state_name); - assert(!context().in_flight.empty()); } void exit() { context< SnapTrimmer >().log_exit(state_name, enter_time); - assert(context().in_flight.empty()); } - boost::statechart::result react(const RepopsComplete&) { + boost::statechart::result react(const ScrubComplete&) { post_event(KickTrim()); return transit< NotTrimming >(); } boost::statechart::result react(const KickTrim&) { return discard_event(); } - boost::statechart::result react(const Reset&) { - context().in_flight.clear(); - return transit< NotTrimming>(); - } }; struct NotTrimming : boost::statechart::state< NotTrimming, SnapTrimmer >, NamedState {