#define OP_QUEUE_H
#include "include/msgr.h"
+#include "osd/osd_types.h"
#include <list>
#include <functional>
// Human readable brief description of queue and relevant parameters
virtual void print(std::ostream &f) const = 0;
+ // Get the type of OpQueue implementation
+ virtual op_queue_type_t get_type() const = 0;
+
// Don't leak resources on destruction
virtual ~OpQueue() {};
};
}
void print(std::ostream &ostream) const final {
- ostream << "PrioritizedQueue";
+ ostream << get_op_queue_type_name(get_type());
+ }
+
+ op_queue_type_t get_type() const final {
+ return op_queue_type_t::PrioritizedQueue;
}
};
}
void print(std::ostream &ostream) const final {
- ostream << "WeightedPriorityQueue";
+ ostream << get_op_queue_type_name(get_type());
+ }
+
+ op_queue_type_t get_type() const final {
+ return op_queue_type_t::WeightedPriorityQueue;
}
};
desc: Do not store full-object checksums if the backend (bluestore) does its own
checksums. Only usable with all BlueStore OSDs.
default: false
-# PrioritzedQueue (prio), Weighted Priority Queue (wpq ; default),
-# mclock_opclass, mclock_client, or debug_random. "mclock_opclass"
-# and "mclock_client" are based on the mClock/dmClock algorithm
-# (Gulati, et al. 2010). "mclock_opclass" prioritizes based on the
-# class the operation belongs to. "mclock_client" does the same but
-# also works to ienforce fairness between clients. "debug_random"
-# chooses among all four with equal probability.
+# Weighted Priority Queue (wpq), mClock Scheduler (mclock_scheduler: default)
+# or debug_random. "mclock_scheduler" is based on the mClock/dmClock
+# algorithm (Gulati, et al. 2010). "mclock_scheduler" prioritizes based on
+# the class the operation belongs to. "wpq" dequeues ops based on their
+# priorities. "debug_random" chooses among the two with equal probability.
+# Note: PrioritzedQueue (prio) implementation is not used for scheduling ops
+# within OSDs and is therefore not listed.
- name: osd_op_queue
type: str
level: advanced
trace_endpoint.copy_name(ss.str());
#endif
+ // Determine scheduler type for this OSD
+ auto get_op_queue_type = [this, &conf = cct->_conf]() {
+ op_queue_type_t queue_type;
+ if (auto type = conf.get_val<std::string>("osd_op_queue");
+ type != "debug_random") {
+ if (auto qt = get_op_queue_type_by_name(type); qt.has_value()) {
+ queue_type = *qt;
+ } else {
+ // This should never happen
+ dout(0) << "Invalid value passed for 'osd_op_queue': " << type << dendl;
+ ceph_assert(0 == "Unsupported op queue type");
+ }
+ } else {
+ static const std::vector<op_queue_type_t> index_lookup = {
+ op_queue_type_t::mClockScheduler,
+ op_queue_type_t::WeightedPriorityQueue
+ };
+ std::mt19937 random_gen(std::random_device{}());
+ auto which = random_gen() % index_lookup.size();
+ queue_type = index_lookup[which];
+ }
+ return queue_type;
+ };
+ op_queue_type_t op_queue = get_op_queue_type();
+
// initialize shards
num_shards = get_num_op_shards();
for (uint32_t i = 0; i < num_shards; i++) {
OSDShard *one_shard = new OSDShard(
i,
cct,
- this);
+ this,
+ op_queue);
shards.push_back(one_shard);
}
}
store->get_type()) != unsupported_objstores.end();
}
+op_queue_type_t OSD::osd_op_queue_type() const
+{
+ /**
+ * All OSD shards employ the same scheduler type. Therefore, return
+ * the scheduler type set on the OSD shard with lowest id(0).
+ */
+ ceph_assert(shards.size());
+ return shards[0]->get_op_queue_type();
+}
+
void OSD::update_log_config()
{
auto parsed_options = clog->parse_client_options(cct);
scheduler->update_configuration();
}
-std::string OSDShard::get_scheduler_type()
+op_queue_type_t OSDShard::get_op_queue_type() const
{
- std::ostringstream scheduler_type;
- scheduler_type << *scheduler;
- return scheduler_type.str();
+ return scheduler->get_type();
}
OSDShard::OSDShard(
int id,
CephContext *cct,
- OSD *osd)
+ OSD *osd,
+ op_queue_type_t osd_op_queue)
: shard_id(id),
cct(cct),
osd(osd),
shard_lock{make_mutex(shard_lock_name)},
scheduler(ceph::osd::scheduler::make_scheduler(
cct, osd->whoami, osd->num_shards, id, osd->store->is_rotational(),
- osd->store->get_type(), osd->monc)),
+ osd->store->get_type(), osd_op_queue, osd->monc)),
context_queue(sdata_wait_lock, sdata_cond)
{
dout(0) << "using op scheduler " << *scheduler << dendl;
void register_and_wake_split_child(PG *pg);
void unprime_split_children(spg_t parent, unsigned old_pg_num);
void update_scheduler_config();
- std::string get_scheduler_type();
+ op_queue_type_t get_op_queue_type() const;
OSDShard(
int id,
CephContext *cct,
- OSD *osd);
+ OSD *osd,
+ op_queue_type_t osd_op_queue);
};
class OSD : public Dispatcher,
OSDService service;
friend class OSDService;
+ /// op queue type set for the OSD
+ op_queue_type_t osd_op_queue_type() const;
+
private:
void set_perf_queries(const ConfigPayload &config_payload);
MetricPayload get_perf_reports();
{
return xattr_data.contents_equal(val.c_str(), val.size());
}
+
+std::string_view get_op_queue_type_name(const op_queue_type_t &q)
+{
+ switch (q) {
+ case op_queue_type_t::WeightedPriorityQueue:
+ return "wpq";
+ case op_queue_type_t::mClockScheduler:
+ return "mclock_scheduler";
+ case op_queue_type_t::PrioritizedQueue:
+ return "PrioritizedQueue";
+ default:
+ return "unknown";
+ }
+}
+
+std::optional<op_queue_type_t> get_op_queue_type_by_name(
+ const std::string_view &s)
+{
+ if (s == "wpq") {
+ return op_queue_type_t::WeightedPriorityQueue;
+ } else if (s == "mclock_scheduler") {
+ return op_queue_type_t::mClockScheduler;
+ } else if (s == "PrioritizedQueue") {
+ return op_queue_type_t::PrioritizedQueue;
+ } else {
+ return std::nullopt;
+ }
+}
std::pair<std::optional<uint32_t>,
std::optional<uint32_t>>>;
+/**
+ * op_queue_type_t
+ *
+ * Supported op queue types
+ */
+enum class op_queue_type_t : uint8_t {
+ WeightedPriorityQueue = 0,
+ mClockScheduler,
+ PrioritizedQueue
+};
+std::string_view get_op_queue_type_name(const op_queue_type_t &q);
+std::optional<op_queue_type_t> get_op_queue_type_by_name(
+ const std::string_view &s);
+
#endif
OpSchedulerRef make_scheduler(
CephContext *cct, int whoami, uint32_t num_shards, int shard_id,
- bool is_rotational, std::string_view osd_objectstore, MonClient *monc)
+ bool is_rotational, std::string_view osd_objectstore,
+ op_queue_type_t osd_scheduler, MonClient *monc)
{
- const std::string *type = &cct->_conf->osd_op_queue;
- if (*type == "debug_random") {
- static const std::string index_lookup[] = { "mclock_scheduler",
- "wpq" };
- srand(time(NULL));
- unsigned which = rand() % (sizeof(index_lookup) / sizeof(index_lookup[0]));
- type = &index_lookup[which];
- }
-
// Force the use of 'wpq' scheduler for filestore OSDs.
// The 'mclock_scheduler' is not supported for filestore OSDs.
- if (*type == "wpq" || osd_objectstore == "filestore") {
+ if (op_queue_type_t::WeightedPriorityQueue == osd_scheduler ||
+ osd_objectstore == "filestore") {
return std::make_unique<
ClassedOpQueueScheduler<WeightedPriorityQueue<OpSchedulerItem, client>>>(
cct,
cct->_conf->osd_op_pq_max_tokens_per_priority,
cct->_conf->osd_op_pq_min_cost
);
- } else if (*type == "mclock_scheduler") {
+ } else if (op_queue_type_t::mClockScheduler == osd_scheduler) {
// default is 'mclock_scheduler'
return std::make_unique<
mClockScheduler>(cct, whoami, num_shards, shard_id, is_rotational, monc);
#include <variant>
#include "common/ceph_context.h"
+#include "common/OpQueue.h"
#include "mon/MonClient.h"
#include "osd/scheduler/OpSchedulerItem.h"
// Apply config changes to the scheduler (if any)
virtual void update_configuration() = 0;
+ // Get the scheduler type set for the queue
+ virtual op_queue_type_t get_type() const = 0;
+
// Destructor
virtual ~OpScheduler() {};
};
OpSchedulerRef make_scheduler(
CephContext *cct, int whoami, uint32_t num_shards, int shard_id,
- bool is_rotational, std::string_view osd_objectstore, MonClient *monc);
+ bool is_rotational, std::string_view osd_objectstore,
+ op_queue_type_t osd_scheduler, MonClient *monc);
/**
* Implements OpScheduler in terms of OpQueue
// no-op
}
+ op_queue_type_t get_type() const final {
+ return queue.get_type();
+ }
+
~ClassedOpQueueScheduler() final {};
};
void dump(ceph::Formatter &f) const final;
void print(std::ostream &ostream) const final {
- ostream << "mClockScheduler";
+ ostream << get_op_queue_type_name(get_type());
}
// Update data associated with the modified mclock config key(s)
void update_configuration() final;
+ // Return the scheduler type
+ op_queue_type_t get_type() const final {
+ return op_queue_type_t::mClockScheduler;
+ }
+
const char** get_tracked_conf_keys() const final;
void handle_conf_change(const ConfigProxy& conf,
const std::set<std::string> &changed) final;