OPTION(osd_recovery_thread_timeout, OPT_INT, 30)
OPTION(osd_recovery_thread_suicide_timeout, OPT_INT, 300)
OPTION(osd_recovery_sleep, OPT_FLOAT, 0) // seconds to sleep between recovery ops
-OPTION(osd_snap_trim_thread_timeout, OPT_INT, 60*60*1)
-OPTION(osd_snap_trim_thread_suicide_timeout, OPT_INT, 60*60*10)
OPTION(osd_snap_trim_sleep, OPT_FLOAT, 0)
OPTION(osd_scrub_thread_timeout, OPT_INT, 60)
OPTION(osd_scrub_thread_suicide_timeout, OPT_INT, 60)
/**
- * osd_client_op_priority and osd_recovery_op_priority adjust the relative
- * priority of client io vs recovery io.
+ * osd_*_priority adjust the relative priority of client io, recovery io,
+ * snaptrim io, etc
*
- * osd_client_op_priority/osd_recovery_op_priority determines the ratio of
- * available io between client and recovery. Each option may be set between
+ * osd_*_priority determines the ratio of available io between client and
+ * recovery. Each option may be set between
* 1..63.
- *
- * osd_recovery_op_warn_multiple scales the normal warning threshhold,
- * osd_op_complaint_time, so that slow recovery ops won't cause noise
*/
OPTION(osd_client_op_priority, OPT_U32, 63)
OPTION(osd_recovery_op_priority, OPT_U32, 10)
+
+OPTION(osd_snap_trim_priority, OPT_U32, 5)
+OPTION(osd_snap_trim_cost, OPT_U32, 1<<20) // set default cost equal to 1MB io
+
+/**
+ * osd_recovery_op_warn_multiple scales the normal warning threshhold,
+ * osd_op_complaint_time, so that slow recovery ops won't cause noise
+ */
OPTION(osd_recovery_op_warn_multiple, OPT_U32, 16)
// Max time to wait between notifying mon of shutdown and shutting down
return osd->dequeue_op(pg, op, handle);
}
+void PGQueueable::RunVis::operator()(PGSnapTrim &op) {
+ return pg->snap_trimmer(op.epoch_queued);
+}
+
//Initial features in new superblock.
//Features here are also automatically upgraded
CompatSet OSD::get_osd_initial_compat_set() {
op_wq(osd->op_shardedwq),
peering_wq(osd->peering_wq),
recovery_wq(osd->recovery_wq),
- snap_trim_wq(osd->snap_trim_wq),
scrub_wq(osd->scrub_wq),
recovery_gen_wq("recovery_gen_wq", cct->_conf->osd_recovery_thread_timeout,
&osd->recovery_tp),
cct->_conf->osd_recovery_thread_suicide_timeout,
&recovery_tp),
replay_queue_lock("OSD::replay_queue_lock"),
- snap_trim_wq(
- this,
- cct->_conf->osd_snap_trim_thread_timeout,
- cct->_conf->osd_snap_trim_thread_suicide_timeout,
- &disk_tp),
scrub_wq(
this,
cct->_conf->osd_scrub_thread_timeout,
typedef ceph::shared_ptr<DeletingState> DeletingStateRef;
class OSD;
+
+struct PGSnapTrim {
+ epoch_t epoch_queued;
+ PGSnapTrim(epoch_t e) : epoch_queued(e) {}
+ ostream &operator<<(ostream &rhs) {
+ return rhs << "SnapTrim";
+ }
+};
+
class PGQueueable {
typedef boost::variant<
- OpRequestRef
+ OpRequestRef,
+ PGSnapTrim
> QVariant;
QVariant qvariant;
int cost;
RunVis(OSD *osd, PGRef &pg, ThreadPool::TPHandle &handle)
: osd(osd), pg(pg), handle(handle) {}
void operator()(OpRequestRef &op);
+ void operator()(PGSnapTrim &op);
};
public:
PGQueueable(OpRequestRef op)
start_time(op->get_req()->get_recv_stamp()),
owner(op->get_req()->get_source_inst())
{}
+ PGQueueable(
+ const PGSnapTrim &op, int cost, unsigned priority, utime_t start_time,
+ const entity_inst_t &owner)
+ : qvariant(op), cost(cost), priority(priority), start_time(start_time),
+ owner(owner) {}
boost::optional<OpRequestRef> maybe_get_op() {
OpRequestRef *op = boost::get<OpRequestRef>(&qvariant);
return op ? *op : boost::optional<OpRequestRef>();
ShardedThreadPool::ShardedWQ < pair <PGRef, PGQueueable> > &op_wq;
ThreadPool::BatchWorkQueue<PG> &peering_wq;
ThreadPool::WorkQueue<PG> &recovery_wq;
- ThreadPool::WorkQueue<PG> &snap_trim_wq;
ThreadPool::WorkQueue<PG> &scrub_wq;
GenContextWQ recovery_gen_wq;
GenContextWQ op_gen_wq;
void queue_for_peering(PG *pg);
bool queue_for_recovery(PG *pg);
- bool queue_for_snap_trim(PG *pg) {
- return snap_trim_wq.queue(pg);
+ void queue_for_snap_trim(PG *pg) {
+ op_wq.queue(
+ make_pair(
+ pg,
+ PGQueueable(
+ PGSnapTrim(pg->get_osdmap()->get_epoch()),
+ cct->_conf->osd_snap_trim_cost,
+ cct->_conf->osd_snap_trim_priority,
+ ceph_clock_now(cct),
+ entity_inst_t())));
}
bool queue_for_scrub(PG *pg) {
return scrub_wq.queue(pg);
void check_replay_queue();
-
- // -- snap trimming --
- xlist<PG*> snap_trim_queue;
-
- struct SnapTrimWQ : public ThreadPool::WorkQueue<PG> {
- OSD *osd;
- SnapTrimWQ(OSD *o, time_t ti, time_t si, ThreadPool *tp)
- : ThreadPool::WorkQueue<PG>("OSD::SnapTrimWQ", ti, si, tp), osd(o) {}
-
- bool _empty() {
- return osd->snap_trim_queue.empty();
- }
- bool _enqueue(PG *pg) {
- if (pg->snap_trim_item.is_on_list())
- return false;
- pg->get("SnapTrimWQ");
- osd->snap_trim_queue.push_back(&pg->snap_trim_item);
- return true;
- }
- void _dequeue(PG *pg) {
- if (pg->snap_trim_item.remove_myself())
- pg->put("SnapTrimWQ");
- }
- PG *_dequeue() {
- if (osd->snap_trim_queue.empty())
- return NULL;
- PG *pg = osd->snap_trim_queue.front();
- osd->snap_trim_queue.pop_front();
- return pg;
- }
- void _process(PG *pg) {
- pg->snap_trimmer();
- pg->put("SnapTrimWQ");
- }
- void _clear() {
- while (PG *pg = _dequeue()) {
- pg->put("SnapTrimWQ");
- }
- }
- } snap_trim_wq;
-
-
// -- scrubbing --
void sched_scrub();
bool scrub_random_backoff();
coll(p), pg_log(cct),
pgmeta_oid(p.make_pgmeta_oid()),
missing_loc(this),
- recovery_item(this), scrub_item(this), snap_trim_item(this), stat_queue_item(this),
+ recovery_item(this), scrub_item(this), stat_queue_item(this),
+ snap_trim_queued(false),
recovery_ops_active(0),
role(0),
state(0),
scrub_after_recovery = false;
osd->recovery_wq.dequeue(this);
- osd->snap_trim_wq.dequeue(this);
agent_clear();
}
void PG::queue_snap_trim()
{
- if (osd->queue_for_snap_trim(this))
+ if (snap_trim_queued) {
+ dout(10) << "queue_snap_trim -- already queued" << dendl;
+ } else {
dout(10) << "queue_snap_trim -- queuing" << dendl;
- else
- dout(10) << "queue_snap_trim -- already trimming" << dendl;
+ snap_trim_queued = true;
+ osd->queue_for_snap_trim(this);
+ }
}
bool PG::queue_scrub()
assert(waiting_for_active.empty());
split_replay_queue(&replay_queue, &(child->replay_queue), match, split_bits);
+ snap_trim_queued = false;
osd->dequeue_pg(this, &waiting_for_peered);
+
OSD::split_list(
&waiting_for_peered, &(child->waiting_for_peered), match, split_bits);
{
peer_missing.clear();
peer_purged.clear();
actingbackfill.clear();
+ snap_trim_queued = false;
// reset primary state?
if (was_old_primary || is_primary()) {
return osdmap_ref;
}
+public:
OSDMapRef get_osdmap() const {
assert(is_locked());
assert(osdmap_ref);
return osdmap_ref;
}
+protected:
/** locking and reference counting.
* I destroy myself when the reference count hits zero.
/* You should not use these items without taking their respective queue locks
* (if they have one) */
- xlist<PG*>::item recovery_item, scrub_item, snap_trim_item, stat_queue_item;
+ xlist<PG*>::item recovery_item, scrub_item, stat_queue_item;
+ bool snap_trim_queued;
+
int recovery_ops_active;
set<pg_shard_t> waiting_on_backfill;
#ifdef DEBUG_RECOVERY_OIDS
ThreadPool::TPHandle &handle
) = 0;
virtual void do_backfill(OpRequestRef op) = 0;
- virtual void snap_trimmer() = 0;
+ virtual void snap_trimmer(epoch_t epoch_queued) = 0;
virtual int do_command(cmdmap_t cmdmap, ostream& ss,
bufferlist& idata, bufferlist& odata) = 0;
return repop;
}
-void ReplicatedPG::snap_trimmer()
+void ReplicatedPG::snap_trimmer(epoch_t queued)
{
- lock();
- if (deleting) {
- unlock();
- return;
- }
if (g_conf->osd_snap_trim_sleep > 0) {
unlock();
utime_t t;
lock();
dout(20) << __func__ << " slept for " << t << dendl;
}
+ if (deleting || pg_has_reset_since(queued)) {
+ return;
+ }
+ snap_trim_queued = false;
dout(10) << "snap_trimmer entry" << dendl;
if (is_primary()) {
entity_inst_t nobody;
if (scrubber.active) {
dout(10) << " scrubbing, will requeue snap_trimmer after" << dendl;
scrubber.queue_snap_trim = true;
- unlock();
return;
}
// replica collection trimming
snap_trimmer_machine.process_event(SnapTrim());
}
- unlock();
return;
}
// remove from queues
osd->recovery_wq.dequeue(this);
- osd->snap_trim_wq.dequeue(this);
+ osd->scrub_wq.dequeue(this);
osd->pg_stat_queue_dequeue(this);
osd->dequeue_pg(this, 0);
osd->peering_wq.dequeue(this);
void do_backfill(OpRequestRef op);
RepGather *trim_object(const hobject_t &coid);
- void snap_trimmer();
+ void snap_trimmer(epoch_t e);
int do_osd_ops(OpContext *ctx, vector<OSDOp>& ops);
int _get_tmap(OpContext *ctx, bufferlist *header, bufferlist *vals);