si.last_scrub_version = si.scrub_start_version;
}
+void CInode::scrub_aborted(MDSInternalContextBase **c) {
+ dout(20) << __func__ << dendl;
+ ceph_assert(scrub_is_in_progress());
+
+ *c = nullptr;
+ std::swap(*c, scrub_infop->on_finish);
+
+ if (scrub_infop->scrub_parent) {
+ CDentry *dn = scrub_infop->scrub_parent;
+ scrub_infop->scrub_parent = NULL;
+ dn->dir->scrub_dentry_finished(dn);
+ dn->put(CDentry::PIN_SCRUBPARENT);
+ }
+
+ delete scrub_infop;
+ scrub_infop = nullptr;
+}
+
void CInode::scrub_finished(MDSInternalContextBase **c) {
dout(20) << __func__ << dendl;
ceph_assert(scrub_is_in_progress());
* be complete()ed.
*/
void scrub_finished(MDSInternalContextBase **c);
+
+ void scrub_aborted(MDSInternalContextBase **c);
+
/**
* Report to the CInode that alldirfrags it owns have been scrubbed.
*/
MDSCommand("cache drop name=timeout,type=CephInt,range=0,req=false", "trim cache and optionally request client to release all caps and flush the journal"),
MDSCommand("scrub start name=path,type=CephString name=scrubops,type=CephChoices,strings=force|recursive|repair,n=N,req=false name=tag,type=CephString,req=false",
"scrub an inode and output results"),
+ MDSCommand("scrub abort", "Abort in progress scrub operation(s)"),
+ MDSCommand("scrub pause", "Pause in progress scrub operation(s)"),
+ MDSCommand("scrub resume", "Resume paused scrub operation(s)"),
+ MDSCommand("scrub status", "Status of scrub operation"),
};
return commands;
};
std::vector<std::string> scrubop;
};
+class C_ScrubControlExecAndReply : public C_ExecAndReply {
+public:
+ C_ScrubControlExecAndReply(MDSRank *mds, const MCommand::const_ref &m,
+ const std::string &command)
+ : C_ExecAndReply(mds, m), command(command) {
+ }
+
+ void exec() override {
+ if (command == "abort") {
+ mds->command_scrub_abort(&f, this);
+ } else if (command == "pause") {
+ mds->command_scrub_pause(&f, this);
+ } else {
+ ceph_abort();
+ }
+ }
+
+ void finish(int r) override {
+ f.open_object_section("result");
+ f.dump_int("return_code", r);
+ f.close_section();
+ C_ExecAndReply::finish(r);
+ }
+
+private:
+ std::string command;
+};
+
/**
* This function drops the mds_lock, so don't do anything with
* MDSRank after calling it (we could have gone into shutdown): just
scond.wait();
}
+void MDSRank::command_scrub_abort(Formatter *f, Context *on_finish) {
+ std::lock_guard l(mds_lock);
+ scrubstack->scrub_abort(on_finish);
+}
+
+void MDSRank::command_scrub_pause(Formatter *f, Context *on_finish) {
+ std::lock_guard l(mds_lock);
+ scrubstack->scrub_pause(on_finish);
+}
+
+void MDSRank::command_scrub_resume(Formatter *f) {
+ int r = scrubstack->scrub_resume();
+
+ f->open_object_section("result");
+ f->dump_int("return_code", r);
+ f->close_section();
+}
+
+void MDSRank::command_scrub_status(Formatter *f) {
+ scrubstack->scrub_status(f);
+}
+
void MDSRank::command_flush_path(Formatter *f, std::string_view path)
{
C_SaferCond scond;
*run_later = create_async_exec_context(new C_ScrubExecAndReply
(this, m, path, tag, scrubop_vec));
return true;
+ } else if (prefix == "scrub abort") {
+ *need_reply = false;
+ *run_later = create_async_exec_context(new C_ScrubControlExecAndReply
+ (this, m, "abort"));
+ return true;
+ } else if (prefix == "scrub pause") {
+ *need_reply = false;
+ *run_later = create_async_exec_context(new C_ScrubControlExecAndReply
+ (this, m, "pause"));
+ return true;
+ } else if (prefix == "scrub resume") {
+ JSONFormatter f(true);
+ command_scrub_resume(&f);
+ f.flush(*ds);
+ return true;
+ } else if (prefix == "scrub status") {
+ JSONFormatter f(true);
+ command_scrub_status(&f);
+ f.flush(*ds);
+ return true;
} else {
return false;
}
friend class C_CacheDropExecAndReply;
friend class C_ScrubExecAndReply;
+ friend class C_ScrubControlExecAndReply;
mds_rank_t get_nodeid() const { return whoami; }
int64_t get_metadata_pool();
const vector<string>& scrubop_vec, Context *on_finish);
void command_tag_path(Formatter *f, std::string_view path,
std::string_view tag);
+ // scrub control commands
+ void command_scrub_abort(Formatter *f, Context *on_finish);
+ void command_scrub_pause(Formatter *f, Context *on_finish);
+ void command_scrub_resume(Formatter *f);
+ void command_scrub_status(Formatter *f);
+
void command_flush_path(Formatter *f, std::string_view path);
void command_flush_journal(Formatter *f);
void command_get_subtrees(Formatter *f);
return *_dout << "mds." << mds->get_nodeid() << ".scrubstack ";
}
+std::ostream &operator<<(std::ostream &os, const ScrubStack::State &state) {
+ switch(state) {
+ case ScrubStack::STATE_RUNNING:
+ os << "RUNNING";
+ break;
+ case ScrubStack::STATE_IDLE:
+ os << "IDLE";
+ break;
+ case ScrubStack::STATE_PAUSING:
+ os << "PAUSING";
+ break;
+ case ScrubStack::STATE_PAUSED:
+ os << "PAUSED";
+ break;
+ default:
+ ceph_abort();
+ }
+
+ return os;
+}
+
void ScrubStack::push_inode(CInode *in)
{
dout(20) << "pushing " << *in << " on top of ScrubStack" << dendl;
void ScrubStack::enqueue_inode(CInode *in, ScrubHeaderRef& header,
MDSInternalContextBase *on_finish, bool top)
{
+ // abort in progress
+ if (clear_inode_stack) {
+ on_finish->complete(-EAGAIN);
+ return;
+ }
+
_enqueue_inode(in, NULL, header, on_finish, top);
kick_off_scrubs();
}
void ScrubStack::kick_off_scrubs()
{
+ ceph_assert(mdcache->mds->mds_lock.is_locked());
+ dout(20) << __func__ << ": state=" << state << dendl;
+
+ if (clear_inode_stack || state == STATE_PAUSING || state == STATE_PAUSED) {
+ if (scrubs_in_progress == 0) {
+ dout(10) << __func__ << ": in progress scrub operations finished, "
+ << stack_size << " in the stack" << dendl;
+
+ State final_state = state;
+ if (clear_inode_stack) {
+ abort_pending_scrubs();
+ final_state = STATE_IDLE;
+ }
+ if (state == STATE_PAUSING) {
+ final_state = STATE_PAUSED;
+ }
+
+ set_state(final_state);
+ complete_control_contexts(0);
+ }
+
+ return;
+ }
+
dout(20) << __func__ << " entering with " << scrubs_in_progress << " in "
"progress and " << stack_size << " in the stack" << dendl;
bool can_continue = true;
elist<CInode*>::iterator i = inode_stack.begin();
while (g_conf()->mds_max_scrub_ops_in_progress > scrubs_in_progress &&
- can_continue && !i.end()) {
+ can_continue) {
+ if (i.end()) {
+ if (scrubs_in_progress == 0) {
+ set_state(STATE_IDLE);
+ }
+
+ return;
+ }
+
+ assert(state == STATE_RUNNING || state == STATE_IDLE);
+ set_state(STATE_RUNNING);
+
CInode *curi = *i;
++i; // we have our reference, push iterator forward
MDSInternalContextBase *c = NULL;
in->scrub_finished(&c);
- if (!header->get_recursive() && in == header->get_origin()) {
- if (r >= 0) { // we got into the scrubbing dump it
- result.dump(&(header->get_formatter()));
- } else { // we failed the lookup or something; dump ourselves
- header->get_formatter().open_object_section("results");
- header->get_formatter().dump_int("return_code", r);
- header->get_formatter().close_section(); // results
+ if (in == header->get_origin()) {
+ scrub_origins.erase(in);
+ if (!header->get_recursive()) {
+ if (r >= 0) { // we got into the scrubbing dump it
+ result.dump(&(header->get_formatter()));
+ } else { // we failed the lookup or something; dump ourselves
+ header->get_formatter().open_object_section("results");
+ header->get_formatter().dump_int("return_code", r);
+ header->get_formatter().close_section(); // results
+ }
}
}
if (c) {
ScrubStack::C_KickOffScrubs::C_KickOffScrubs(MDCache *mdcache, ScrubStack *s)
: MDSInternalContext(mdcache->mds), stack(s) { }
+
+void ScrubStack::complete_control_contexts(int r) {
+ ceph_assert(mdcache->mds->mds_lock.is_locked_by_me());
+
+ for (auto &ctx : control_ctxs) {
+ ctx->complete(r);
+ }
+ control_ctxs.clear();
+}
+
+void ScrubStack::set_state(State next_state) {
+ if (state != next_state) {
+ dout(20) << __func__ << ", from state=" << state << ", to state="
+ << next_state << dendl;
+ state = next_state;
+ }
+}
+
+bool ScrubStack::scrub_in_transition_state() {
+ ceph_assert(mdcache->mds->mds_lock.is_locked_by_me());
+ dout(20) << __func__ << ": state=" << state << dendl;
+
+ // STATE_RUNNING is considered as a transition state so as to
+ // "delay" the scrub control operation.
+ if (state == STATE_RUNNING || state == STATE_PAUSING) {
+ return true;
+ }
+
+ return false;
+}
+
+void ScrubStack::scrub_status(Formatter *f) {
+ ceph_assert(mdcache->mds->mds_lock.is_locked_by_me());
+
+ f->open_object_section("result");
+
+ std::stringstream ss;
+ bool have_more = false;
+
+ if (state == STATE_IDLE) {
+ ss << "no active scrubs running";
+ } else if (state == STATE_RUNNING) {
+ if (clear_inode_stack) {
+ ss << "ABORTING";
+ } else {
+ ss << "scrub active";
+ }
+ ss << " (" << stack_size << " inodes in the stack)";
+ } else {
+ if (state == STATE_PAUSING || state == STATE_PAUSED) {
+ have_more = true;
+ ss << state;
+ }
+ if (clear_inode_stack) {
+ if (have_more) {
+ ss << "+";
+ }
+ ss << "ABORTING";
+ }
+
+ ss << " (" << stack_size << " inodes in the stack)";
+ }
+ f->dump_string("status", ss.str());
+
+ f->open_object_section("scrubs");
+ for (auto &inode : scrub_origins) {
+ have_more = false;
+ ScrubHeaderRefConst header = inode->get_scrub_header();
+
+ std::string tag(header->get_tag());
+ f->open_object_section(tag.c_str()); // scrub id
+
+ std::string path;
+ inode->make_path_string(path, true);
+ f->dump_string("path", path.empty() ? "/" : path.c_str());
+
+ std::stringstream optss;
+ if (header->get_recursive()) {
+ optss << "recursive";
+ have_more = true;
+ }
+ if (header->get_repair()) {
+ if (have_more) {
+ optss << ",";
+ }
+ optss << "repair";
+ have_more = true;
+ }
+ if (header->get_force()) {
+ if (have_more) {
+ optss << ",";
+ }
+ optss << "force";
+ }
+
+ f->dump_string("options", optss.str());
+ f->close_section(); // scrub id
+ }
+ f->close_section(); // scrubs
+ f->close_section(); // result
+}
+
+void ScrubStack::abort_pending_scrubs() {
+ ceph_assert(mdcache->mds->mds_lock.is_locked_by_me());
+ ceph_assert(clear_inode_stack);
+
+ for (auto inode = inode_stack.begin(); !inode.end(); ++inode) {
+ CInode *in = *inode;
+ if (in == in->scrub_info()->header->get_origin()) {
+ scrub_origins.erase(in);
+ }
+
+ MDSInternalContextBase *ctx = nullptr;
+ in->scrub_aborted(&ctx);
+ if (ctx != nullptr) {
+ ctx->complete(-ECANCELED);
+ }
+ }
+
+ stack_size = 0;
+ inode_stack.clear();
+ clear_inode_stack = false;
+}
+
+void ScrubStack::scrub_abort(Context *on_finish) {
+ ceph_assert(mdcache->mds->mds_lock.is_locked_by_me());
+ ceph_assert(on_finish != nullptr);
+
+ dout(10) << __func__ << ": aborting with " << scrubs_in_progress
+ << " scrubs in progress and " << stack_size << " in the"
+ << " stack" << dendl;
+
+ clear_inode_stack = true;
+ if (scrub_in_transition_state()) {
+ control_ctxs.push_back(on_finish);
+ return;
+ }
+
+ abort_pending_scrubs();
+ if (state != STATE_PAUSED) {
+ set_state(STATE_IDLE);
+ }
+ on_finish->complete(0);
+}
+
+void ScrubStack::scrub_pause(Context *on_finish) {
+ ceph_assert(mdcache->mds->mds_lock.is_locked_by_me());
+ ceph_assert(on_finish != nullptr);
+
+ dout(10) << __func__ << ": pausing with " << scrubs_in_progress
+ << " scrubs in progress and " << stack_size << " in the"
+ << " stack" << dendl;
+
+ // abort is in progress
+ if (clear_inode_stack) {
+ on_finish->complete(-EINVAL);
+ return;
+ }
+
+ bool done = scrub_in_transition_state();
+ if (done) {
+ set_state(STATE_PAUSING);
+ control_ctxs.push_back(on_finish);
+ return;
+ }
+
+ set_state(STATE_PAUSED);
+ on_finish->complete(0);
+}
+
+bool ScrubStack::scrub_resume() {
+ ceph_assert(mdcache->mds->mds_lock.is_locked_by_me());
+ dout(20) << __func__ << ": state=" << state << dendl;
+
+ int r = 0;
+
+ if (clear_inode_stack) {
+ r = -EINVAL;
+ } else if (state == STATE_PAUSING) {
+ set_state(STATE_RUNNING);
+ complete_control_contexts(-ECANCELED);
+ } else if (state == STATE_PAUSED) {
+ set_state(STATE_RUNNING);
+ kick_off_scrubs();
+ }
+
+ return r;
+}
C_KickOffScrubs(MDCache *mdcache, ScrubStack *s);
void finish(int r) override { }
void complete(int r) override {
+ if (r == -ECANCELED) {
+ return;
+ }
+
stack->scrubs_in_progress--;
stack->kick_off_scrubs();
// don't delete self
void enqueue_inode_top(CInode *in, ScrubHeaderRef& header,
MDSInternalContextBase *on_finish) {
enqueue_inode(in, header, on_finish, true);
+ scrub_origins.emplace(in);
}
/** Like enqueue_inode_top, but we wait for all pending scrubs before
* starting this one.
void enqueue_inode_bottom(CInode *in, ScrubHeaderRef& header,
MDSInternalContextBase *on_finish) {
enqueue_inode(in, header, on_finish, false);
+ scrub_origins.emplace(in);
}
+ /**
+ * Abort an ongoing scrub operation. The abort operation could be
+ * delayed if there are in-progress scrub operations on going. The
+ * caller should provide a context which is completed after all
+ * in-progress scrub operations are completed and pending inodes
+ * are removed from the scrub stack (with the context callbacks for
+ * inodes completed with -ECANCELED).
+ * @param on_finish Context callback to invoke after abort
+ */
+ void scrub_abort(Context *on_finish);
+
+ /**
+ * Pause scrub operations. Similar to abort, pause is delayed if
+ * there are in-progress scrub operations on going. The caller
+ * should provide a context which is completed after all in-progress
+ * scrub operations are completed. Subsequent scrub operations are
+ * queued until scrub is resumed.
+ * @param on_finish Context callback to invoke after pause
+ */
+ void scrub_pause(Context *on_finish);
+
+ /**
+ * Resume a paused scrub. Unlike abort or pause, this is instantaneous.
+ * Pending pause operations are cancelled (context callbacks are
+ * invoked with -ECANCELED).
+ * @returns 0 (success) if resumed, -EINVAL if an abort is in-progress.
+ */
+ bool scrub_resume();
+
+ /**
+ * Get the current scrub status as human readable string. Some basic
+ * information is returned such as number of inodes pending abort/pause.
+ */
+ void scrub_status(Formatter *f);
+
private:
+ // scrub abort is _not_ a state, rather it's an operation that's
+ // performed after in-progress scrubs are finished.
+ enum State {
+ STATE_RUNNING = 0,
+ STATE_IDLE,
+ STATE_PAUSING,
+ STATE_PAUSED,
+ };
+ friend std::ostream &operator<<(std::ostream &os, const State &state);
+
+ State state = STATE_IDLE;
+ bool clear_inode_stack = false;
+
+ // list of pending context completions for asynchronous scrub
+ // control operations.
+ std::list<Context *> control_ctxs;
+
+ // list of inodes for which scrub operations are running -- used
+ // to diplay out in `scrub status`.
+ std::set<CInode *> scrub_origins;
+
/**
* Put the inode at either the top or bottom of the stack, with
* the given scrub params, and then try and kick off more scrubbing.
*/
bool get_next_cdir(CInode *in, CDir **new_dir);
+ /**
+ * Set scrub state
+ * @param next_state State to move the scrub to.
+ */
+ void set_state(State next_state);
+
+ /**
+ * Is scrub in one of transition states (running, pausing)
+ */
+ bool scrub_in_transition_state();
+
+ /**
+ * complete queued up contexts
+ * @param r return value to complete contexts.
+ */
+ void complete_control_contexts(int r);
+
+ /**
+ * Abort pending scrubs for inodes waiting in the inode stack.
+ * Completion context is complete with -ECANCELED.
+ */
+ void abort_pending_scrubs();
};
#endif /* SCRUBSTACK_H_ */