From: Samuel Just Date: Wed, 10 Dec 2014 23:08:21 +0000 (-0800) Subject: osd/: convert snap trimming to use OpWQ X-Git-Tag: v9.0.2~48^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=3e4b852728bf147913e7a6e45b2d3c20402c3184;p=ceph.git osd/: convert snap trimming to use OpWQ Signed-off-by: Samuel Just --- diff --git a/src/common/config_opts.h b/src/common/config_opts.h index 88c09f004dcc..98f15ff550d6 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -587,8 +587,6 @@ OPTION(osd_op_thread_suicide_timeout, OPT_INT, 150) OPTION(osd_recovery_thread_timeout, OPT_INT, 30) OPTION(osd_recovery_thread_suicide_timeout, OPT_INT, 300) OPTION(osd_recovery_sleep, OPT_FLOAT, 0) // seconds to sleep between recovery ops -OPTION(osd_snap_trim_thread_timeout, OPT_INT, 60*60*1) -OPTION(osd_snap_trim_thread_suicide_timeout, OPT_INT, 60*60*10) OPTION(osd_snap_trim_sleep, OPT_FLOAT, 0) OPTION(osd_scrub_thread_timeout, OPT_INT, 60) OPTION(osd_scrub_thread_suicide_timeout, OPT_INT, 60) @@ -742,18 +740,23 @@ OPTION(rocksdb_disableWAL, OPT_BOOL, false) // if true, writes will not first g /** - * osd_client_op_priority and osd_recovery_op_priority adjust the relative - * priority of client io vs recovery io. + * osd_*_priority adjust the relative priority of client io, recovery io, + * snaptrim io, etc * - * osd_client_op_priority/osd_recovery_op_priority determines the ratio of - * available io between client and recovery. Each option may be set between + * osd_*_priority determines the ratio of available io between client and + * recovery. Each option may be set between * 1..63. - * - * osd_recovery_op_warn_multiple scales the normal warning threshhold, - * osd_op_complaint_time, so that slow recovery ops won't cause noise */ OPTION(osd_client_op_priority, OPT_U32, 63) OPTION(osd_recovery_op_priority, OPT_U32, 10) + +OPTION(osd_snap_trim_priority, OPT_U32, 5) +OPTION(osd_snap_trim_cost, OPT_U32, 1<<20) // set default cost equal to 1MB io + +/** + * osd_recovery_op_warn_multiple scales the normal warning threshhold, + * osd_op_complaint_time, so that slow recovery ops won't cause noise + */ OPTION(osd_recovery_op_warn_multiple, OPT_U32, 16) // Max time to wait between notifying mon of shutdown and shutting down diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 246c4f71ae16..8c810f0fe4f3 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -156,6 +156,10 @@ void PGQueueable::RunVis::operator()(OpRequestRef &op) { return osd->dequeue_op(pg, op, handle); } +void PGQueueable::RunVis::operator()(PGSnapTrim &op) { + return pg->snap_trimmer(op.epoch_queued); +} + //Initial features in new superblock. //Features here are also automatically upgraded CompatSet OSD::get_osd_initial_compat_set() { @@ -200,7 +204,6 @@ OSDService::OSDService(OSD *osd) : op_wq(osd->op_shardedwq), peering_wq(osd->peering_wq), recovery_wq(osd->recovery_wq), - snap_trim_wq(osd->snap_trim_wq), scrub_wq(osd->scrub_wq), recovery_gen_wq("recovery_gen_wq", cct->_conf->osd_recovery_thread_timeout, &osd->recovery_tp), @@ -1541,11 +1544,6 @@ OSD::OSD(CephContext *cct_, ObjectStore *store_, cct->_conf->osd_recovery_thread_suicide_timeout, &recovery_tp), replay_queue_lock("OSD::replay_queue_lock"), - snap_trim_wq( - this, - cct->_conf->osd_snap_trim_thread_timeout, - cct->_conf->osd_snap_trim_thread_suicide_timeout, - &disk_tp), scrub_wq( this, cct->_conf->osd_scrub_thread_timeout, diff --git a/src/osd/OSD.h b/src/osd/OSD.h index 78d33959d849..d01d8f3ea936 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -316,9 +316,19 @@ public: typedef ceph::shared_ptr DeletingStateRef; class OSD; + +struct PGSnapTrim { + epoch_t epoch_queued; + PGSnapTrim(epoch_t e) : epoch_queued(e) {} + ostream &operator<<(ostream &rhs) { + return rhs << "SnapTrim"; + } +}; + class PGQueueable { typedef boost::variant< - OpRequestRef + OpRequestRef, + PGSnapTrim > QVariant; QVariant qvariant; int cost; @@ -332,6 +342,7 @@ class PGQueueable { RunVis(OSD *osd, PGRef &pg, ThreadPool::TPHandle &handle) : osd(osd), pg(pg), handle(handle) {} void operator()(OpRequestRef &op); + void operator()(PGSnapTrim &op); }; public: PGQueueable(OpRequestRef op) @@ -340,6 +351,11 @@ public: start_time(op->get_req()->get_recv_stamp()), owner(op->get_req()->get_source_inst()) {} + PGQueueable( + const PGSnapTrim &op, int cost, unsigned priority, utime_t start_time, + const entity_inst_t &owner) + : qvariant(op), cost(cost), priority(priority), start_time(start_time), + owner(owner) {} boost::optional maybe_get_op() { OpRequestRef *op = boost::get(&qvariant); return op ? *op : boost::optional(); @@ -375,7 +391,6 @@ public: ShardedThreadPool::ShardedWQ < pair > &op_wq; ThreadPool::BatchWorkQueue &peering_wq; ThreadPool::WorkQueue &recovery_wq; - ThreadPool::WorkQueue &snap_trim_wq; ThreadPool::WorkQueue &scrub_wq; GenContextWQ recovery_gen_wq; GenContextWQ op_gen_wq; @@ -743,8 +758,16 @@ public: void queue_for_peering(PG *pg); bool queue_for_recovery(PG *pg); - bool queue_for_snap_trim(PG *pg) { - return snap_trim_wq.queue(pg); + void queue_for_snap_trim(PG *pg) { + op_wq.queue( + make_pair( + pg, + PGQueueable( + PGSnapTrim(pg->get_osdmap()->get_epoch()), + cct->_conf->osd_snap_trim_cost, + cct->_conf->osd_snap_trim_priority, + ceph_clock_now(cct), + entity_inst_t()))); } bool queue_for_scrub(PG *pg) { return scrub_wq.queue(pg); @@ -2127,48 +2150,6 @@ protected: void check_replay_queue(); - - // -- snap trimming -- - xlist snap_trim_queue; - - struct SnapTrimWQ : public ThreadPool::WorkQueue { - OSD *osd; - SnapTrimWQ(OSD *o, time_t ti, time_t si, ThreadPool *tp) - : ThreadPool::WorkQueue("OSD::SnapTrimWQ", ti, si, tp), osd(o) {} - - bool _empty() { - return osd->snap_trim_queue.empty(); - } - bool _enqueue(PG *pg) { - if (pg->snap_trim_item.is_on_list()) - return false; - pg->get("SnapTrimWQ"); - osd->snap_trim_queue.push_back(&pg->snap_trim_item); - return true; - } - void _dequeue(PG *pg) { - if (pg->snap_trim_item.remove_myself()) - pg->put("SnapTrimWQ"); - } - PG *_dequeue() { - if (osd->snap_trim_queue.empty()) - return NULL; - PG *pg = osd->snap_trim_queue.front(); - osd->snap_trim_queue.pop_front(); - return pg; - } - void _process(PG *pg) { - pg->snap_trimmer(); - pg->put("SnapTrimWQ"); - } - void _clear() { - while (PG *pg = _dequeue()) { - pg->put("SnapTrimWQ"); - } - } - } snap_trim_wq; - - // -- scrubbing -- void sched_scrub(); bool scrub_random_backoff(); diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 7bbf7626ccbf..22df0afc4828 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -195,7 +195,8 @@ PG::PG(OSDService *o, OSDMapRef curmap, coll(p), pg_log(cct), pgmeta_oid(p.make_pgmeta_oid()), missing_loc(this), - recovery_item(this), scrub_item(this), snap_trim_item(this), stat_queue_item(this), + recovery_item(this), scrub_item(this), stat_queue_item(this), + snap_trim_queued(false), recovery_ops_active(0), role(0), state(0), @@ -884,7 +885,6 @@ void PG::clear_primary_state() scrub_after_recovery = false; osd->recovery_wq.dequeue(this); - osd->snap_trim_wq.dequeue(this); agent_clear(); } @@ -1928,10 +1928,13 @@ void PG::all_activated_and_committed() void PG::queue_snap_trim() { - if (osd->queue_for_snap_trim(this)) + if (snap_trim_queued) { + dout(10) << "queue_snap_trim -- already queued" << dendl; + } else { dout(10) << "queue_snap_trim -- queuing" << dendl; - else - dout(10) << "queue_snap_trim -- already trimming" << dendl; + snap_trim_queued = true; + osd->queue_for_snap_trim(this); + } } bool PG::queue_scrub() @@ -2110,7 +2113,9 @@ void PG::split_ops(PG *child, unsigned split_bits) { assert(waiting_for_active.empty()); split_replay_queue(&replay_queue, &(child->replay_queue), match, split_bits); + snap_trim_queued = false; osd->dequeue_pg(this, &waiting_for_peered); + OSD::split_list( &waiting_for_peered, &(child->waiting_for_peered), match, split_bits); { @@ -4810,6 +4815,7 @@ void PG::start_peering_interval( peer_missing.clear(); peer_purged.clear(); actingbackfill.clear(); + snap_trim_queued = false; // reset primary state? if (was_old_primary || is_primary()) { diff --git a/src/osd/PG.h b/src/osd/PG.h index 2f4dc35b110d..ae87319b40f7 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -220,11 +220,13 @@ protected: return osdmap_ref; } +public: OSDMapRef get_osdmap() const { assert(is_locked()); assert(osdmap_ref); return osdmap_ref; } +protected: /** locking and reference counting. * I destroy myself when the reference count hits zero. @@ -432,7 +434,9 @@ public: /* You should not use these items without taking their respective queue locks * (if they have one) */ - xlist::item recovery_item, scrub_item, snap_trim_item, stat_queue_item; + xlist::item recovery_item, scrub_item, stat_queue_item; + bool snap_trim_queued; + int recovery_ops_active; set waiting_on_backfill; #ifdef DEBUG_RECOVERY_OIDS @@ -2247,7 +2251,7 @@ public: ThreadPool::TPHandle &handle ) = 0; virtual void do_backfill(OpRequestRef op) = 0; - virtual void snap_trimmer() = 0; + virtual void snap_trimmer(epoch_t epoch_queued) = 0; virtual int do_command(cmdmap_t cmdmap, ostream& ss, bufferlist& idata, bufferlist& odata) = 0; diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index f2225b7eef22..3d776a5696c4 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -2906,13 +2906,8 @@ ReplicatedPG::RepGather *ReplicatedPG::trim_object(const hobject_t &coid) return repop; } -void ReplicatedPG::snap_trimmer() +void ReplicatedPG::snap_trimmer(epoch_t queued) { - lock(); - if (deleting) { - unlock(); - return; - } if (g_conf->osd_snap_trim_sleep > 0) { unlock(); utime_t t; @@ -2921,13 +2916,16 @@ void ReplicatedPG::snap_trimmer() lock(); dout(20) << __func__ << " slept for " << t << dendl; } + if (deleting || pg_has_reset_since(queued)) { + return; + } + snap_trim_queued = false; dout(10) << "snap_trimmer entry" << dendl; if (is_primary()) { entity_inst_t nobody; if (scrubber.active) { dout(10) << " scrubbing, will requeue snap_trimmer after" << dendl; scrubber.queue_snap_trim = true; - unlock(); return; } @@ -2944,7 +2942,6 @@ void ReplicatedPG::snap_trimmer() // replica collection trimming snap_trimmer_machine.process_event(SnapTrim()); } - unlock(); return; } @@ -8750,7 +8747,7 @@ void ReplicatedPG::on_shutdown() // remove from queues osd->recovery_wq.dequeue(this); - osd->snap_trim_wq.dequeue(this); + osd->scrub_wq.dequeue(this); osd->pg_stat_queue_dequeue(this); osd->dequeue_pg(this, 0); osd->peering_wq.dequeue(this); diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h index f2dfca9faf16..90391786ed68 100644 --- a/src/osd/ReplicatedPG.h +++ b/src/osd/ReplicatedPG.h @@ -1399,7 +1399,7 @@ public: void do_backfill(OpRequestRef op); RepGather *trim_object(const hobject_t &coid); - void snap_trimmer(); + void snap_trimmer(epoch_t e); int do_osd_ops(OpContext *ctx, vector& ops); int _get_tmap(OpContext *ctx, bufferlist *header, bufferlist *vals);