From 407eaaca324fd5f57ff621d999c507cc5ddccdfc Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Fri, 21 Oct 2016 14:29:09 -0700 Subject: [PATCH] osd/: cleanup the snap trimmer and deal with delayed repops With the PGBackend changes, it's not necessarily the case that calling simple_opc_submit syncronously updates the SnapMapper. Thus, we can't rely on being able to just ask the snap mapper for the next object immediately (we could well loop on the same one if ECBackend is flushing the pipeline). Instead, update SnapMapper and the SnapTrimmer to grab N at a time. Additionally, we need to make sure we don't try this again until all of the previously submitted repops are flushed (a good idea anyway). To that end, this patch also refactors the SnapTrimmer machine to be fully explicit about why it's blocked so we can be sure that we don't queue an async work item unless we really want to. Signed-off-by: Samuel Just --- src/osd/OSD.cc | 13 ++ src/osd/OSD.h | 12 +- src/osd/PG.cc | 40 ++---- src/osd/PG.h | 4 +- src/osd/ReplicatedPG.cc | 236 +++++++++++++++++------------------ src/osd/ReplicatedPG.h | 121 +++++++++++++++--- src/osd/SnapMapper.cc | 57 +++++---- src/osd/SnapMapper.h | 5 +- src/osd/osd_types.h | 10 +- src/test/test_snap_mapper.cc | 53 ++++---- 10 files changed, 319 insertions(+), 232 deletions(-) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 91766776eb1..f294aad6007 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -1454,6 +1454,19 @@ void OSDService::queue_for_peering(PG *pg) peering_wq.queue(pg); } +void OSDService::queue_for_snap_trim(PG *pg) { + dout(10) << "queueing " << *pg << " for snaptrim" << dendl; + op_wq.queue( + make_pair( + pg, + PGQueueable( + PGSnapTrim(pg->get_osdmap()->get_epoch()), + cct->_conf->osd_snap_trim_cost, + cct->_conf->osd_snap_trim_priority, + ceph_clock_now(cct), + entity_inst_t()))); +} + // ==================================================================== // OSD diff --git a/src/osd/OSD.h b/src/osd/OSD.h index 6c67ba629dc..ba68ae88fbb 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -892,17 +892,7 @@ public: void send_pg_temp(); void queue_for_peering(PG *pg); - void queue_for_snap_trim(PG *pg) { - op_wq.queue( - make_pair( - pg, - PGQueueable( - PGSnapTrim(pg->get_osdmap()->get_epoch()), - cct->_conf->osd_snap_trim_cost, - cct->_conf->osd_snap_trim_priority, - ceph_clock_now(cct), - entity_inst_t()))); - } + void queue_for_snap_trim(PG *pg); void queue_for_scrub(PG *pg) { op_wq.queue( make_pair( diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 7315c28b5b4..4b43a141956 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -219,7 +219,6 @@ PG::PG(OSDService *o, OSDMapRef curmap, pgmeta_oid(p.make_pgmeta_oid()), missing_loc(this), stat_queue_item(this), - snap_trim_queued(false), scrub_queued(false), recovery_queued(false), recovery_ops_active(0), @@ -1618,9 +1617,6 @@ void PG::activate(ObjectStore::Transaction& t, << pool.cached_removed_snaps << ")" << dendl; snap_trimq.subtract(intersection); } - dout(10) << "activate - snap_trimq " << snap_trimq << dendl; - if (!snap_trimq.empty() && is_clean()) - queue_snap_trim(); } // init complete pointer @@ -2034,17 +2030,6 @@ void PG::all_activated_and_committed() AllReplicasActivated()))); } -void PG::queue_snap_trim() -{ - if (snap_trim_queued) { - dout(10) << "queue_snap_trim -- already queued" << dendl; - } else { - dout(10) << "queue_snap_trim -- queuing" << dendl; - snap_trim_queued = true; - osd->queue_for_snap_trim(this); - } -} - bool PG::requeue_scrub() { assert(is_locked()); @@ -2123,8 +2108,11 @@ void PG::mark_clean() trim_past_intervals(); - if (is_clean() && !snap_trimq.empty()) - queue_snap_trim(); + if (is_active()) { + /* The check is needed because if we are below min_size we're not + * actually active */ + kick_snap_trim(); + } dirty_info = true; } @@ -2266,7 +2254,6 @@ void PG::split_ops(PG *child, unsigned split_bits) { assert(waiting_for_active.empty()); split_replay_queue(&replay_queue, &(child->replay_queue), match, split_bits); - snap_trim_queued = false; osd->dequeue_pg(this, &waiting_for_peered); OSD::split_list( @@ -4352,7 +4339,7 @@ void PG::scrub_clear_state() if (scrubber.queue_snap_trim) { dout(10) << "scrub finished, requeuing snap_trimmer" << dendl; - queue_snap_trim(); + snap_trimmer_scrub_complete(); } scrubber.reset(); @@ -5102,7 +5089,6 @@ void PG::start_peering_interval( peer_purged.clear(); actingbackfill.clear(); - snap_trim_queued = false; scrub_queued = false; // reset primary state? @@ -5230,16 +5216,17 @@ void PG::proc_primary_info(ObjectStore::Transaction &t, const pg_info_t &oinfo) for (snapid_t snap = i.get_start(); snap != i.get_len() + i.get_start(); ++snap) { - hobject_t hoid; - int r = snap_mapper.get_next_object_to_trim(snap, &hoid); + vector hoids; + int r = snap_mapper.get_next_objects_to_trim(snap, 1, &hoids); if (r != 0 && r != -ENOENT) { derr << __func__ << ": snap_mapper get_next_object_to_trim returned " << cpp_strerror(r) << dendl; assert(0); } else if (r != -ENOENT) { + assert(!hoids.empty()); derr << __func__ << ": snap_mapper get_next_object_to_trim returned " << cpp_strerror(r) << " for object " - << hoid << " on snap " << snap + << hoids[0] << " on snap " << snap << " which should have been fully trimmed " << dendl; assert(0); } @@ -6813,10 +6800,9 @@ boost::statechart::result PG::RecoveryState::Active::react(const ActMap&) pg->osd->clog->error() << pg->info.pgid.pgid << " has " << unfound << " objects unfound and apparently lost\n"; } - if (!pg->snap_trimq.empty() && - pg->is_clean()) { - dout(10) << "Active: queuing snap trim" << dendl; - pg->queue_snap_trim(); + if (pg->is_active()) { + dout(10) << "Active: kicking snap trim" << dendl; + pg->kick_snap_trim(); } if (pg->is_peered() && diff --git a/src/osd/PG.h b/src/osd/PG.h index e98e5a61bab..22ab92f38ab 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -499,7 +499,6 @@ public: /* You should not use these items without taking their respective queue locks * (if they have one) */ xlist::item stat_queue_item; - bool snap_trim_queued; bool scrub_queued; bool recovery_queued; @@ -2230,7 +2229,8 @@ public: void log_weirdness(); - void queue_snap_trim(); + virtual void kick_snap_trim() = 0; + virtual void snap_trimmer_scrub_complete() = 0; bool requeue_scrub(); void queue_recovery(bool front = false); bool queue_scrub(); diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 3e3a87e370d..b515a41f0a4 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -3553,7 +3553,7 @@ void ReplicatedPG::do_backfill(OpRequestRef op) } } -ReplicatedPG::OpContextUPtr ReplicatedPG::trim_object(const hobject_t &coid) +ReplicatedPG::OpContextUPtr ReplicatedPG::trim_object(bool first, const hobject_t &coid) { // load clone info bufferlist bl; @@ -3610,7 +3610,8 @@ ReplicatedPG::OpContextUPtr ReplicatedPG::trim_object(const hobject_t &coid) if (!ctx->lock_manager.get_snaptrimmer_write( coid, - obc)) { + obc, + first)) { close_op_ctx(ctx.release()); dout(10) << __func__ << ": Unable to get a wlock on " << coid << dendl; return NULL; @@ -3618,7 +3619,8 @@ ReplicatedPG::OpContextUPtr ReplicatedPG::trim_object(const hobject_t &coid) if (!ctx->lock_manager.get_snaptrimmer_write( snapoid, - snapset_obc)) { + snapset_obc, + first)) { close_op_ctx(ctx.release()); dout(10) << __func__ << ": Unable to get a wlock on " << snapoid << dendl; return NULL; @@ -3776,8 +3778,29 @@ ReplicatedPG::OpContextUPtr ReplicatedPG::trim_object(const hobject_t &coid) return ctx; } +void ReplicatedPG::kick_snap_trim() +{ + assert(is_active()); + assert(is_primary()); + if (is_clean() && !snap_trimq.empty()) { + dout(10) << __func__ << ": clean and snaps to trim, kicking" << dendl; + snap_trimmer_machine.process_event(KickTrim()); + } +} + +void ReplicatedPG::snap_trimmer_scrub_complete() +{ + if (is_primary() && is_active() && is_clean()) { + assert(!snap_trimq.empty()); + snap_trimmer_machine.process_event(ScrubComplete()); + } +} + void ReplicatedPG::snap_trimmer(epoch_t queued) { + if (deleting || pg_has_reset_since(queued)) { + return; + } if (g_conf->osd_snap_trim_sleep > 0) { unlock(); utime_t t; @@ -3786,31 +3809,12 @@ void ReplicatedPG::snap_trimmer(epoch_t queued) lock(); dout(20) << __func__ << " slept for " << t << dendl; } - if (deleting || pg_has_reset_since(queued)) { - return; - } - snap_trim_queued = false; - dout(10) << "snap_trimmer entry" << dendl; - if (is_primary()) { - if (scrubber.active) { - dout(10) << " scrubbing, will requeue snap_trimmer after" << dendl; - scrubber.queue_snap_trim = true; - return; - } - dout(10) << "snap_trimmer posting" << dendl; - snap_trimmer_machine.process_event(SnapTrim()); + assert(is_primary()); - if (snap_trimmer_machine.need_share_pg_info) { - dout(10) << "snap_trimmer share_pg_info" << dendl; - snap_trimmer_machine.need_share_pg_info = false; - share_pg_info(); - } - } else if (is_active() && - last_complete_ondisk.epoch > info.history.last_epoch_started) { - // replica collection trimming - snap_trimmer_machine.process_event(SnapTrim()); - } + dout(10) << "snap_trimmer posting" << dendl; + snap_trimmer_machine.process_event(DoSnapWork()); + dout(10) << "snap_trimmer complete" << dendl; return; } @@ -13062,137 +13066,133 @@ void ReplicatedPG::NotTrimming::exit() context< SnapTrimmer >().log_exit(state_name, enter_time); } -boost::statechart::result ReplicatedPG::NotTrimming::react(const SnapTrim&) +boost::statechart::result ReplicatedPG::NotTrimming::react(const KickTrim&) { ReplicatedPG *pg = context< SnapTrimmer >().pg; - dout(10) << "NotTrimming react" << dendl; + dout(10) << "NotTrimming react KickTrim" << dendl; - if (!pg->is_primary() || !pg->is_active() || !pg->is_clean()) { - dout(10) << "NotTrimming not primary, active, clean" << dendl; - return discard_event(); - } else if (pg->scrubber.active) { - dout(10) << "NotTrimming finalizing scrub" << dendl; - pg->queue_snap_trim(); + assert(pg->is_primary() && pg->is_active()); + if (!pg->is_clean() || + pg->snap_trimq.empty()) { + dout(10) << "NotTrimming not clean or nothing to trim" << dendl; return discard_event(); } - // Primary trimming - if (pg->snap_trimq.empty()) { - return discard_event(); + if (pg->scrubber.active) { + dout(10) << " scrubbing, will requeue snap_trimmer after" << dendl; + pg->scrubber.queue_snap_trim = true; + return transit< WaitScrub >(); } else { context().snap_to_trim = pg->snap_trimq.range_start(); dout(10) << "NotTrimming: trimming " << pg->snap_trimq.range_start() << dendl; - post_event(SnapTrim()); - return transit(); + return transit< AwaitAsyncWork >(); } } -/* TrimmingObjects */ -ReplicatedPG::TrimmingObjects::TrimmingObjects(my_context ctx) +/* AwaitAsyncWork */ +ReplicatedPG::AwaitAsyncWork::AwaitAsyncWork(my_context ctx) : my_base(ctx), - NamedState(context< SnapTrimmer >().pg->cct, "Trimming/TrimmingObjects") + NamedState(context< SnapTrimmer >().pg->cct, "Trimming/AwaitAsyncWork") { context< SnapTrimmer >().log_enter(state_name); + context< SnapTrimmer >().pg->osd->queue_for_snap_trim( + context< SnapTrimmer >().pg); } -void ReplicatedPG::TrimmingObjects::exit() +void ReplicatedPG::AwaitAsyncWork::exit() { context< SnapTrimmer >().log_exit(state_name, enter_time); - context().in_flight.clear(); } -boost::statechart::result ReplicatedPG::TrimmingObjects::react(const SnapTrim&) +boost::statechart::result ReplicatedPG::AwaitAsyncWork::react(const DoSnapWork&) { - dout(10) << "TrimmingObjects react" << dendl; + dout(10) << "AwaitAsyncWork react" << dendl; ReplicatedPGRef pg = context< SnapTrimmer >().pg; snapid_t snap_to_trim = context().snap_to_trim; auto &in_flight = context().in_flight; + assert(in_flight.empty()); + + assert(pg->is_primary() && pg->is_active()); + if (!pg->is_clean() || + pg->scrubber.active) { + dout(10) << "something changed, reverting to NotTrimming" << dendl; + post_event(KickTrim()); + return transit< NotTrimming >(); + } + + dout(10) << "AwaitAsyncWork: trimming snap " << snap_to_trim << dendl; + + vector to_trim; + unsigned max = g_conf->osd_pg_max_concurrent_snap_trims; + to_trim.reserve(max); + int r = pg->snap_mapper.get_next_objects_to_trim( + snap_to_trim, + max, + &to_trim); + if (r != 0 && r != -ENOENT) { + derr << "get_next_objects_to_trim returned " + << cpp_strerror(r) << dendl; + assert(0 == "get_next_objects_to_trim returned an invalid code"); + } else if (r == -ENOENT) { + // Done! + dout(10) << "got ENOENT" << dendl; + + dout(10) << "adding snap " << snap_to_trim + << " to purged_snaps" + << dendl; + pg->info.purged_snaps.insert(snap_to_trim); + pg->snap_trimq.erase(snap_to_trim); + dout(10) << "purged_snaps now " + << pg->info.purged_snaps << ", snap_trimq now " + << pg->snap_trimq << dendl; - dout(10) << "TrimmingObjects: trimming snap " << snap_to_trim << dendl; + ObjectStore::Transaction t; + pg->dirty_big_info = true; + pg->write_if_dirty(t); + int tr = pg->osd->store->queue_transaction(pg->osr.get(), std::move(t), NULL); + assert(tr == 0); - while (in_flight.size() < g_conf->osd_pg_max_concurrent_snap_trims) { - // Get next - hobject_t old_pos = pos; - int r = pg->snap_mapper.get_next_object_to_trim(snap_to_trim, &pos); - if (r != 0 && r != -ENOENT) { - derr << __func__ << ": get_next returned " << cpp_strerror(r) << dendl; - assert(0); - } else if (r == -ENOENT) { - // Done! - dout(10) << "TrimmingObjects: got ENOENT" << dendl; - post_event(SnapTrim()); - return transit< WaitingOnReplicas >(); - } + pg->share_pg_info(); + post_event(KickTrim()); + return transit< NotTrimming >(); + } + assert(!to_trim.empty()); - dout(10) << "TrimmingObjects react trimming " << pos << dendl; - OpContextUPtr ctx = pg->trim_object(pos); + for (auto &&object: to_trim) { + // Get next + dout(10) << "AwaitAsyncWork react trimming " << object << dendl; + OpContextUPtr ctx = pg->trim_object(in_flight.empty(), object); if (!ctx) { - dout(10) << __func__ << " could not get write lock on obj " - << pos << dendl; - pos = old_pos; - return discard_event(); + dout(10) << "could not get write lock on obj " + << object << dendl; + if (in_flight.empty()) { + dout(10) << "waiting for it to clear" + << dendl; + return transit< WaitRWLock >(); + + } else { + dout(10) << "letting the ones we already started finish" + << dendl; + return transit< WaitRepops >(); + } } - assert(ctx); - hobject_t to_remove = pos; + + in_flight.insert(object); ctx->register_on_success( - [pg, to_remove, &in_flight]() { - in_flight.erase(to_remove); - pg->queue_snap_trim(); + [pg, object, &in_flight]() { + assert(in_flight.find(object) != in_flight.end()); + in_flight.erase(object); + if (in_flight.empty()) + pg->snap_trimmer_machine.process_event(RepopsComplete()); }); pg->apply_ctx_scrub_stats(ctx.get()); - - in_flight.insert(pos); pg->simple_opc_submit(std::move(ctx)); } - return discard_event(); -} - -/* WaitingOnReplicasObjects */ -ReplicatedPG::WaitingOnReplicas::WaitingOnReplicas(my_context ctx) - : my_base(ctx), - NamedState(context< SnapTrimmer >().pg->cct, "Trimming/WaitingOnReplicas") -{ - context< SnapTrimmer >().log_enter(state_name); -} - -void ReplicatedPG::WaitingOnReplicas::exit() -{ - context< SnapTrimmer >().log_exit(state_name, enter_time); - context().in_flight.clear(); -} - -boost::statechart::result ReplicatedPG::WaitingOnReplicas::react(const SnapTrim&) -{ - // Have all the trims finished? - dout(10) << "Waiting on Replicas react" << dendl; - ReplicatedPG *pg = context< SnapTrimmer >().pg; - if (!context().in_flight.empty()) { - return discard_event(); - } - - snapid_t &sn = context().snap_to_trim; - dout(10) << "WaitingOnReplicas: adding snap " << sn << " to purged_snaps" - << dendl; - - pg->info.purged_snaps.insert(sn); - pg->snap_trimq.erase(sn); - dout(10) << "purged_snaps now " << pg->info.purged_snaps << ", snap_trimq now " - << pg->snap_trimq << dendl; - - ObjectStore::Transaction t; - pg->dirty_big_info = true; - pg->write_if_dirty(t); - int tr = pg->osd->store->queue_transaction(pg->osr.get(), std::move(t), NULL); - assert(tr == 0); - - context().need_share_pg_info = true; - // Back to the start - pg->queue_snap_trim(); - return transit< NotTrimming >(); + return transit< WaitRepops >(); } void ReplicatedPG::setattr_maybe_cache( diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h index 88cf36ba2c8..bb9fbf3fdca 100644 --- a/src/osd/ReplicatedPG.h +++ b/src/osd/ReplicatedPG.h @@ -794,7 +794,7 @@ protected: if (requeue_recovery) queue_recovery(); if (requeue_snaptrim) - queue_snap_trim(); + snap_trimmer_machine.process_event(TrimWriteUnblocked()); if (!to_req.empty()) { // requeue at front of scrub blocking queue if we are blocked by scrub @@ -1375,8 +1375,10 @@ public: ThreadPool::TPHandle &handle) override; void do_backfill(OpRequestRef op) override; - OpContextUPtr trim_object(const hobject_t &coid); + OpContextUPtr trim_object(bool first, const hobject_t &coid); void snap_trimmer(epoch_t e) override; + void kick_snap_trim() override; + void snap_trimmer_scrub_complete() override; int do_osd_ops(OpContext *ctx, vector& ops); int _get_tmap(OpContext *ctx, bufferlist *header, bufferlist *vals); @@ -1438,8 +1440,20 @@ public: } private: struct NotTrimming; - struct SnapTrim : boost::statechart::event< SnapTrim > { - SnapTrim() : boost::statechart::event < SnapTrim >() {} + struct DoSnapWork : boost::statechart::event< DoSnapWork > { + DoSnapWork() : boost::statechart::event < DoSnapWork >() {} + }; + struct KickTrim : boost::statechart::event< KickTrim > { + KickTrim() : boost::statechart::event < KickTrim >() {} + }; + struct RepopsComplete : boost::statechart::event< RepopsComplete > { + RepopsComplete() : boost::statechart::event < RepopsComplete >() {} + }; + struct ScrubComplete : boost::statechart::event< ScrubComplete > { + ScrubComplete() : boost::statechart::event < ScrubComplete >() {} + }; + struct TrimWriteUnblocked : boost::statechart::event< TrimWriteUnblocked > { + TrimWriteUnblocked() : boost::statechart::event < TrimWriteUnblocked >() {} }; struct Reset : boost::statechart::event< Reset > { Reset() : boost::statechart::event< Reset >() {} @@ -1448,43 +1462,112 @@ private: ReplicatedPG *pg; set in_flight; snapid_t snap_to_trim; - bool need_share_pg_info; - explicit SnapTrimmer(ReplicatedPG *pg) : pg(pg), need_share_pg_info(false) {} + explicit SnapTrimmer(ReplicatedPG *pg) : pg(pg) {} ~SnapTrimmer(); void log_enter(const char *state_name); void log_exit(const char *state_name, utime_t duration); } snap_trimmer_machine; /* SnapTrimmerStates */ - struct TrimmingObjects : boost::statechart::state< TrimmingObjects, SnapTrimmer >, NamedState { + struct AwaitAsyncWork : boost::statechart::state< AwaitAsyncWork, SnapTrimmer >, NamedState { typedef boost::mpl::list < - boost::statechart::custom_reaction< SnapTrim >, + boost::statechart::custom_reaction< DoSnapWork >, + boost::statechart::custom_reaction< KickTrim >, boost::statechart::transition< Reset, NotTrimming > > reactions; - hobject_t pos; - explicit TrimmingObjects(my_context ctx); + explicit AwaitAsyncWork(my_context ctx); void exit(); - boost::statechart::result react(const SnapTrim&); + boost::statechart::result react(const DoSnapWork&); + boost::statechart::result react(const KickTrim&) { + return discard_event(); + } }; - struct WaitingOnReplicas : boost::statechart::state< WaitingOnReplicas, SnapTrimmer >, NamedState { + struct WaitRWLock : boost::statechart::state< WaitRWLock, SnapTrimmer >, NamedState { typedef boost::mpl::list < - boost::statechart::custom_reaction< SnapTrim >, + boost::statechart::custom_reaction< TrimWriteUnblocked >, + boost::statechart::custom_reaction< KickTrim >, boost::statechart::transition< Reset, NotTrimming > > reactions; - explicit WaitingOnReplicas(my_context ctx); - void exit(); - boost::statechart::result react(const SnapTrim&); + explicit WaitRWLock(my_context ctx) + : my_base(ctx), + NamedState(context< SnapTrimmer >().pg->cct, "Trimming/WaitRWLock") { + context< SnapTrimmer >().log_enter(state_name); + assert(context().in_flight.empty()); + } + void exit() { + context< SnapTrimmer >().log_exit(state_name, enter_time); + } + boost::statechart::result react(const TrimWriteUnblocked&) { + post_event(KickTrim()); + return discard_event(); + } + boost::statechart::result react(const KickTrim&) { + return discard_event(); + } }; - + + struct WaitScrub : boost::statechart::state< WaitScrub, SnapTrimmer >, NamedState { + typedef boost::mpl::list < + boost::statechart::custom_reaction< ScrubComplete >, + boost::statechart::custom_reaction< KickTrim >, + boost::statechart::transition< Reset, NotTrimming > + > reactions; + explicit WaitScrub(my_context ctx) + : my_base(ctx), + NamedState(context< SnapTrimmer >().pg->cct, "Trimming/WaitScrub") { + context< SnapTrimmer >().log_enter(state_name); + assert(context().in_flight.empty()); + } + void exit() { + context< SnapTrimmer >().log_exit(state_name, enter_time); + } + boost::statechart::result react(const ScrubComplete&) { + post_event(KickTrim()); + return transit< NotTrimming >(); + } + boost::statechart::result react(const KickTrim&) { + return discard_event(); + } + }; + + struct WaitRepops : boost::statechart::state< WaitRepops, SnapTrimmer >, NamedState { + typedef boost::mpl::list < + boost::statechart::custom_reaction< RepopsComplete >, + boost::statechart::custom_reaction< KickTrim >, + boost::statechart::custom_reaction< Reset > + > reactions; + explicit WaitRepops(my_context ctx) + : my_base(ctx), + NamedState(context< SnapTrimmer >().pg->cct, "Trimming/WaitRepops") { + context< SnapTrimmer >().log_enter(state_name); + assert(!context().in_flight.empty()); + } + void exit() { + context< SnapTrimmer >().log_exit(state_name, enter_time); + assert(context().in_flight.empty()); + } + boost::statechart::result react(const RepopsComplete&) { + post_event(KickTrim()); + return transit< NotTrimming >(); + } + boost::statechart::result react(const KickTrim&) { + return discard_event(); + } + boost::statechart::result react(const Reset&) { + context().in_flight.clear(); + return transit< NotTrimming>(); + } + }; + struct NotTrimming : boost::statechart::state< NotTrimming, SnapTrimmer >, NamedState { typedef boost::mpl::list < - boost::statechart::custom_reaction< SnapTrim >, + boost::statechart::custom_reaction< KickTrim >, boost::statechart::transition< Reset, NotTrimming > > reactions; explicit NotTrimming(my_context ctx); void exit(); - boost::statechart::result react(const SnapTrim&); + boost::statechart::result react(const KickTrim&); }; int _verify_no_head_clones(const hobject_t& soid, diff --git a/src/osd/SnapMapper.cc b/src/osd/SnapMapper.cc index 6be5018d9a9..d464ac79566 100644 --- a/src/osd/SnapMapper.cc +++ b/src/osd/SnapMapper.cc @@ -240,37 +240,46 @@ void SnapMapper::add_oid( backend.set_keys(to_add, t); } -int SnapMapper::get_next_object_to_trim( +int SnapMapper::get_next_objects_to_trim( snapid_t snap, - hobject_t *hoid) + unsigned max, + vector *out) { + assert(out); + assert(out->empty()); + int r = 0; for (set::iterator i = prefixes.begin(); - i != prefixes.end(); + i != prefixes.end() && out->size() < max && r == 0; ++i) { - string list_after(get_prefix(snap) + *i); - - pair next; - int r = backend.get_next(list_after, &next); - if (r < 0) { - break; // Done - } - - if (next.first.substr(0, list_after.size()) != - list_after) { - continue; // Done with this prefix + string prefix(get_prefix(snap) + *i); + string pos = prefix; + while (out->size() < max) { + pair next; + r = backend.get_next(pos, &next); + if (r != 0) { + break; // Done + } + + if (next.first.substr(0, prefix.size()) != + prefix) { + break; // Done with this prefix + } + + assert(is_mapping(next.first)); + + pair next_decoded(from_raw(next)); + assert(next_decoded.first == snap); + assert(check(next_decoded.second)); + + out->push_back(next_decoded.second); + pos = next.first; } - - assert(is_mapping(next.first)); - - pair next_decoded(from_raw(next)); - assert(next_decoded.first == snap); - assert(check(next_decoded.second)); - - if (hoid) - *hoid = next_decoded.second; + } + if (out->size() == 0) { + return -ENOENT; + } else { return 0; } - return -ENOENT; } diff --git a/src/osd/SnapMapper.h b/src/osd/SnapMapper.h index 7b95abe3a71..16cc763f7f1 100644 --- a/src/osd/SnapMapper.h +++ b/src/osd/SnapMapper.h @@ -211,9 +211,10 @@ public: ); /// Returns first object with snap as a snap - int get_next_object_to_trim( + int get_next_objects_to_trim( snapid_t snap, ///< [in] snap to check - hobject_t *hoid ///< [out] next hoid to trim + unsigned max, ///< [in] max to get + vector *out ///< [out] next objects to trim (must be empty) ); ///< @return error, -ENOENT if no more objects /// Remove mapping for oid diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index a831df932e1..bc90b40f936 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -4322,11 +4322,12 @@ public: bool get_write_greedy(OpRequestRef op) { return rwstate.get_write(op, true); } - bool get_snaptrimmer_write() { + bool get_snaptrimmer_write(bool mark_if_unsuccessful) { if (rwstate.get_write_lock()) { return true; } else { - rwstate.snaptrimmer_write_marker = true; + if (mark_if_unsuccessful) + rwstate.snaptrimmer_write_marker = true; return false; } } @@ -4529,9 +4530,10 @@ public: /// Get write lock for snap trim bool get_snaptrimmer_write( const hobject_t &hoid, - ObjectContextRef obc) { + ObjectContextRef obc, + bool mark_if_unsuccessful) { assert(locks.find(hoid) == locks.end()); - if (obc->get_snaptrimmer_write()) { + if (obc->get_snaptrimmer_write(mark_if_unsuccessful)) { locks.insert( make_pair( hoid, ObjectLockState(obc, ObjectContext::RWState::RWWRITE))); diff --git a/src/test/test_snap_mapper.cc b/src/test/test_snap_mapper.cc index 93c87abe41c..53f1d292897 100644 --- a/src/test/test_snap_mapper.cc +++ b/src/test/test_snap_mapper.cc @@ -512,34 +512,37 @@ public: rand_choose(snap_to_hobject); set hobjects = snap->second; - hobject_t hoid; - while (mapper->get_next_object_to_trim(snap->first, &hoid) == 0) { - assert(!hoid.is_max()); - assert(hobjects.count(hoid)); - hobjects.erase(hoid); - - map, hobject_t::BitwiseComparator>::iterator j = - hobject_to_snap.find(hoid); - assert(j->second.count(snap->first)); - set old_snaps(j->second); - j->second.erase(snap->first); - - { - PausyAsyncMap::Transaction t; - mapper->update_snaps( - hoid, - j->second, - &old_snaps, - &t); - driver->submit(&t); - } - if (j->second.empty()) { - hobject_to_snap.erase(j); + vector hoids; + while (mapper->get_next_objects_to_trim( + snap->first, rand() % 5 + 1, &hoids) == 0) { + for (auto &&hoid: hoids) { + assert(!hoid.is_max()); + assert(hobjects.count(hoid)); + hobjects.erase(hoid); + + map, hobject_t::BitwiseComparator>::iterator j = + hobject_to_snap.find(hoid); + assert(j->second.count(snap->first)); + set old_snaps(j->second); + j->second.erase(snap->first); + + { + PausyAsyncMap::Transaction t; + mapper->update_snaps( + hoid, + j->second, + &old_snaps, + &t); + driver->submit(&t); + } + if (j->second.empty()) { + hobject_to_snap.erase(j); + } + hoid = hobject_t::get_max(); } - hoid = hobject_t::get_max(); + hoids.clear(); } assert(hobjects.empty()); - snap_to_hobject.erase(snap); } -- 2.39.5