From b9153d14ee9108263b38c9c6dbd0a5f7bcc3f3cd Mon Sep 17 00:00:00 2001 From: Venky Shankar Date: Mon, 5 Nov 2018 01:24:39 -0500 Subject: [PATCH] mds: scrub abort/pause/resume/status control commands Signed-off-by: Venky Shankar --- src/mds/CInode.cc | 18 +++ src/mds/CInode.h | 3 + src/mds/MDSDaemon.cc | 4 + src/mds/MDSRank.cc | 70 +++++++++++ src/mds/MDSRank.h | 7 ++ src/mds/ScrubStack.cc | 269 ++++++++++++++++++++++++++++++++++++++++-- src/mds/ScrubStack.h | 84 +++++++++++++ 7 files changed, 447 insertions(+), 8 deletions(-) diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index 636e64ed796..28fc20a72c0 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -4770,6 +4770,24 @@ void CInode::scrub_dirfrag_finished(frag_t dirfrag) si.last_scrub_version = si.scrub_start_version; } +void CInode::scrub_aborted(MDSInternalContextBase **c) { + dout(20) << __func__ << dendl; + ceph_assert(scrub_is_in_progress()); + + *c = nullptr; + std::swap(*c, scrub_infop->on_finish); + + if (scrub_infop->scrub_parent) { + CDentry *dn = scrub_infop->scrub_parent; + scrub_infop->scrub_parent = NULL; + dn->dir->scrub_dentry_finished(dn); + dn->put(CDentry::PIN_SCRUBPARENT); + } + + delete scrub_infop; + scrub_infop = nullptr; +} + void CInode::scrub_finished(MDSInternalContextBase **c) { dout(20) << __func__ << dendl; ceph_assert(scrub_is_in_progress()); diff --git a/src/mds/CInode.h b/src/mds/CInode.h index e4309c89fbc..bbdf01c0a08 100644 --- a/src/mds/CInode.h +++ b/src/mds/CInode.h @@ -358,6 +358,9 @@ class CInode : public MDSCacheObject, public InodeStoreBase, public Counter& MDSDaemon::get_commands() MDSCommand("cache drop name=timeout,type=CephInt,range=0,req=false", "trim cache and optionally request client to release all caps and flush the journal"), MDSCommand("scrub start name=path,type=CephString name=scrubops,type=CephChoices,strings=force|recursive|repair,n=N,req=false name=tag,type=CephString,req=false", "scrub an inode and output results"), + MDSCommand("scrub abort", "Abort in progress scrub operation(s)"), + MDSCommand("scrub pause", "Pause in progress scrub operation(s)"), + MDSCommand("scrub resume", "Resume paused scrub operation(s)"), + MDSCommand("scrub status", "Status of scrub operation"), }; return commands; }; diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc index 9b3a8643746..954a5984a0a 100644 --- a/src/mds/MDSRank.cc +++ b/src/mds/MDSRank.cc @@ -2638,6 +2638,34 @@ private: std::vector scrubop; }; +class C_ScrubControlExecAndReply : public C_ExecAndReply { +public: + C_ScrubControlExecAndReply(MDSRank *mds, const MCommand::const_ref &m, + const std::string &command) + : C_ExecAndReply(mds, m), command(command) { + } + + void exec() override { + if (command == "abort") { + mds->command_scrub_abort(&f, this); + } else if (command == "pause") { + mds->command_scrub_pause(&f, this); + } else { + ceph_abort(); + } + } + + void finish(int r) override { + f.open_object_section("result"); + f.dump_int("return_code", r); + f.close_section(); + C_ExecAndReply::finish(r); + } + +private: + std::string command; +}; + /** * This function drops the mds_lock, so don't do anything with * MDSRank after calling it (we could have gone into shutdown): just @@ -2755,6 +2783,28 @@ void MDSRank::command_tag_path(Formatter *f, scond.wait(); } +void MDSRank::command_scrub_abort(Formatter *f, Context *on_finish) { + std::lock_guard l(mds_lock); + scrubstack->scrub_abort(on_finish); +} + +void MDSRank::command_scrub_pause(Formatter *f, Context *on_finish) { + std::lock_guard l(mds_lock); + scrubstack->scrub_pause(on_finish); +} + +void MDSRank::command_scrub_resume(Formatter *f) { + int r = scrubstack->scrub_resume(); + + f->open_object_section("result"); + f->dump_int("return_code", r); + f->close_section(); +} + +void MDSRank::command_scrub_status(Formatter *f) { + scrubstack->scrub_status(f); +} + void MDSRank::command_flush_path(Formatter *f, std::string_view path) { C_SaferCond scond; @@ -3462,6 +3512,26 @@ bool MDSRankDispatcher::handle_command( *run_later = create_async_exec_context(new C_ScrubExecAndReply (this, m, path, tag, scrubop_vec)); return true; + } else if (prefix == "scrub abort") { + *need_reply = false; + *run_later = create_async_exec_context(new C_ScrubControlExecAndReply + (this, m, "abort")); + return true; + } else if (prefix == "scrub pause") { + *need_reply = false; + *run_later = create_async_exec_context(new C_ScrubControlExecAndReply + (this, m, "pause")); + return true; + } else if (prefix == "scrub resume") { + JSONFormatter f(true); + command_scrub_resume(&f); + f.flush(*ds); + return true; + } else if (prefix == "scrub status") { + JSONFormatter f(true); + command_scrub_status(&f); + f.flush(*ds); + return true; } else { return false; } diff --git a/src/mds/MDSRank.h b/src/mds/MDSRank.h index 38984d640f6..b2f2179ae0f 100644 --- a/src/mds/MDSRank.h +++ b/src/mds/MDSRank.h @@ -141,6 +141,7 @@ class MDSRank { friend class C_CacheDropExecAndReply; friend class C_ScrubExecAndReply; + friend class C_ScrubControlExecAndReply; mds_rank_t get_nodeid() const { return whoami; } int64_t get_metadata_pool(); @@ -465,6 +466,12 @@ class MDSRank { const vector& scrubop_vec, Context *on_finish); void command_tag_path(Formatter *f, std::string_view path, std::string_view tag); + // scrub control commands + void command_scrub_abort(Formatter *f, Context *on_finish); + void command_scrub_pause(Formatter *f, Context *on_finish); + void command_scrub_resume(Formatter *f); + void command_scrub_status(Formatter *f); + void command_flush_path(Formatter *f, std::string_view path); void command_flush_journal(Formatter *f); void command_get_subtrees(Formatter *f); diff --git a/src/mds/ScrubStack.cc b/src/mds/ScrubStack.cc index 2b864e5e536..0d72dba8100 100644 --- a/src/mds/ScrubStack.cc +++ b/src/mds/ScrubStack.cc @@ -28,6 +28,27 @@ static ostream& _prefix(std::ostream *_dout, MDSRank *mds) { return *_dout << "mds." << mds->get_nodeid() << ".scrubstack "; } +std::ostream &operator<<(std::ostream &os, const ScrubStack::State &state) { + switch(state) { + case ScrubStack::STATE_RUNNING: + os << "RUNNING"; + break; + case ScrubStack::STATE_IDLE: + os << "IDLE"; + break; + case ScrubStack::STATE_PAUSING: + os << "PAUSING"; + break; + case ScrubStack::STATE_PAUSED: + os << "PAUSED"; + break; + default: + ceph_abort(); + } + + return os; +} + void ScrubStack::push_inode(CInode *in) { dout(20) << "pushing " << *in << " on top of ScrubStack" << dendl; @@ -75,18 +96,59 @@ void ScrubStack::_enqueue_inode(CInode *in, CDentry *parent, void ScrubStack::enqueue_inode(CInode *in, ScrubHeaderRef& header, MDSInternalContextBase *on_finish, bool top) { + // abort in progress + if (clear_inode_stack) { + on_finish->complete(-EAGAIN); + return; + } + _enqueue_inode(in, NULL, header, on_finish, top); kick_off_scrubs(); } void ScrubStack::kick_off_scrubs() { + ceph_assert(mdcache->mds->mds_lock.is_locked()); + dout(20) << __func__ << ": state=" << state << dendl; + + if (clear_inode_stack || state == STATE_PAUSING || state == STATE_PAUSED) { + if (scrubs_in_progress == 0) { + dout(10) << __func__ << ": in progress scrub operations finished, " + << stack_size << " in the stack" << dendl; + + State final_state = state; + if (clear_inode_stack) { + abort_pending_scrubs(); + final_state = STATE_IDLE; + } + if (state == STATE_PAUSING) { + final_state = STATE_PAUSED; + } + + set_state(final_state); + complete_control_contexts(0); + } + + return; + } + dout(20) << __func__ << " entering with " << scrubs_in_progress << " in " "progress and " << stack_size << " in the stack" << dendl; bool can_continue = true; elist::iterator i = inode_stack.begin(); while (g_conf()->mds_max_scrub_ops_in_progress > scrubs_in_progress && - can_continue && !i.end()) { + can_continue) { + if (i.end()) { + if (scrubs_in_progress == 0) { + set_state(STATE_IDLE); + } + + return; + } + + assert(state == STATE_RUNNING || state == STATE_IDLE); + set_state(STATE_RUNNING); + CInode *curi = *i; ++i; // we have our reference, push iterator forward @@ -418,13 +480,16 @@ void ScrubStack::_validate_inode_done(CInode *in, int r, MDSInternalContextBase *c = NULL; in->scrub_finished(&c); - if (!header->get_recursive() && in == header->get_origin()) { - if (r >= 0) { // we got into the scrubbing dump it - result.dump(&(header->get_formatter())); - } else { // we failed the lookup or something; dump ourselves - header->get_formatter().open_object_section("results"); - header->get_formatter().dump_int("return_code", r); - header->get_formatter().close_section(); // results + if (in == header->get_origin()) { + scrub_origins.erase(in); + if (!header->get_recursive()) { + if (r >= 0) { // we got into the scrubbing dump it + result.dump(&(header->get_formatter())); + } else { // we failed the lookup or something; dump ourselves + header->get_formatter().open_object_section("results"); + header->get_formatter().dump_int("return_code", r); + header->get_formatter().close_section(); // results + } } } if (c) { @@ -434,3 +499,191 @@ void ScrubStack::_validate_inode_done(CInode *in, int r, ScrubStack::C_KickOffScrubs::C_KickOffScrubs(MDCache *mdcache, ScrubStack *s) : MDSInternalContext(mdcache->mds), stack(s) { } + +void ScrubStack::complete_control_contexts(int r) { + ceph_assert(mdcache->mds->mds_lock.is_locked_by_me()); + + for (auto &ctx : control_ctxs) { + ctx->complete(r); + } + control_ctxs.clear(); +} + +void ScrubStack::set_state(State next_state) { + if (state != next_state) { + dout(20) << __func__ << ", from state=" << state << ", to state=" + << next_state << dendl; + state = next_state; + } +} + +bool ScrubStack::scrub_in_transition_state() { + ceph_assert(mdcache->mds->mds_lock.is_locked_by_me()); + dout(20) << __func__ << ": state=" << state << dendl; + + // STATE_RUNNING is considered as a transition state so as to + // "delay" the scrub control operation. + if (state == STATE_RUNNING || state == STATE_PAUSING) { + return true; + } + + return false; +} + +void ScrubStack::scrub_status(Formatter *f) { + ceph_assert(mdcache->mds->mds_lock.is_locked_by_me()); + + f->open_object_section("result"); + + std::stringstream ss; + bool have_more = false; + + if (state == STATE_IDLE) { + ss << "no active scrubs running"; + } else if (state == STATE_RUNNING) { + if (clear_inode_stack) { + ss << "ABORTING"; + } else { + ss << "scrub active"; + } + ss << " (" << stack_size << " inodes in the stack)"; + } else { + if (state == STATE_PAUSING || state == STATE_PAUSED) { + have_more = true; + ss << state; + } + if (clear_inode_stack) { + if (have_more) { + ss << "+"; + } + ss << "ABORTING"; + } + + ss << " (" << stack_size << " inodes in the stack)"; + } + f->dump_string("status", ss.str()); + + f->open_object_section("scrubs"); + for (auto &inode : scrub_origins) { + have_more = false; + ScrubHeaderRefConst header = inode->get_scrub_header(); + + std::string tag(header->get_tag()); + f->open_object_section(tag.c_str()); // scrub id + + std::string path; + inode->make_path_string(path, true); + f->dump_string("path", path.empty() ? "/" : path.c_str()); + + std::stringstream optss; + if (header->get_recursive()) { + optss << "recursive"; + have_more = true; + } + if (header->get_repair()) { + if (have_more) { + optss << ","; + } + optss << "repair"; + have_more = true; + } + if (header->get_force()) { + if (have_more) { + optss << ","; + } + optss << "force"; + } + + f->dump_string("options", optss.str()); + f->close_section(); // scrub id + } + f->close_section(); // scrubs + f->close_section(); // result +} + +void ScrubStack::abort_pending_scrubs() { + ceph_assert(mdcache->mds->mds_lock.is_locked_by_me()); + ceph_assert(clear_inode_stack); + + for (auto inode = inode_stack.begin(); !inode.end(); ++inode) { + CInode *in = *inode; + if (in == in->scrub_info()->header->get_origin()) { + scrub_origins.erase(in); + } + + MDSInternalContextBase *ctx = nullptr; + in->scrub_aborted(&ctx); + if (ctx != nullptr) { + ctx->complete(-ECANCELED); + } + } + + stack_size = 0; + inode_stack.clear(); + clear_inode_stack = false; +} + +void ScrubStack::scrub_abort(Context *on_finish) { + ceph_assert(mdcache->mds->mds_lock.is_locked_by_me()); + ceph_assert(on_finish != nullptr); + + dout(10) << __func__ << ": aborting with " << scrubs_in_progress + << " scrubs in progress and " << stack_size << " in the" + << " stack" << dendl; + + clear_inode_stack = true; + if (scrub_in_transition_state()) { + control_ctxs.push_back(on_finish); + return; + } + + abort_pending_scrubs(); + if (state != STATE_PAUSED) { + set_state(STATE_IDLE); + } + on_finish->complete(0); +} + +void ScrubStack::scrub_pause(Context *on_finish) { + ceph_assert(mdcache->mds->mds_lock.is_locked_by_me()); + ceph_assert(on_finish != nullptr); + + dout(10) << __func__ << ": pausing with " << scrubs_in_progress + << " scrubs in progress and " << stack_size << " in the" + << " stack" << dendl; + + // abort is in progress + if (clear_inode_stack) { + on_finish->complete(-EINVAL); + return; + } + + bool done = scrub_in_transition_state(); + if (done) { + set_state(STATE_PAUSING); + control_ctxs.push_back(on_finish); + return; + } + + set_state(STATE_PAUSED); + on_finish->complete(0); +} + +bool ScrubStack::scrub_resume() { + ceph_assert(mdcache->mds->mds_lock.is_locked_by_me()); + dout(20) << __func__ << ": state=" << state << dendl; + + int r = 0; + + if (clear_inode_stack) { + r = -EINVAL; + } else if (state == STATE_PAUSING) { + set_state(STATE_RUNNING); + complete_control_contexts(-ECANCELED); + } else if (state == STATE_PAUSED) { + set_state(STATE_RUNNING); + kick_off_scrubs(); + } + + return r; +} diff --git a/src/mds/ScrubStack.h b/src/mds/ScrubStack.h index 86e0b28e7d4..e01cebfa484 100644 --- a/src/mds/ScrubStack.h +++ b/src/mds/ScrubStack.h @@ -44,6 +44,10 @@ protected: C_KickOffScrubs(MDCache *mdcache, ScrubStack *s); void finish(int r) override { } void complete(int r) override { + if (r == -ECANCELED) { + return; + } + stack->scrubs_in_progress--; stack->kick_off_scrubs(); // don't delete self @@ -76,6 +80,7 @@ public: void enqueue_inode_top(CInode *in, ScrubHeaderRef& header, MDSInternalContextBase *on_finish) { enqueue_inode(in, header, on_finish, true); + scrub_origins.emplace(in); } /** Like enqueue_inode_top, but we wait for all pending scrubs before * starting this one. @@ -83,9 +88,66 @@ public: void enqueue_inode_bottom(CInode *in, ScrubHeaderRef& header, MDSInternalContextBase *on_finish) { enqueue_inode(in, header, on_finish, false); + scrub_origins.emplace(in); } + /** + * Abort an ongoing scrub operation. The abort operation could be + * delayed if there are in-progress scrub operations on going. The + * caller should provide a context which is completed after all + * in-progress scrub operations are completed and pending inodes + * are removed from the scrub stack (with the context callbacks for + * inodes completed with -ECANCELED). + * @param on_finish Context callback to invoke after abort + */ + void scrub_abort(Context *on_finish); + + /** + * Pause scrub operations. Similar to abort, pause is delayed if + * there are in-progress scrub operations on going. The caller + * should provide a context which is completed after all in-progress + * scrub operations are completed. Subsequent scrub operations are + * queued until scrub is resumed. + * @param on_finish Context callback to invoke after pause + */ + void scrub_pause(Context *on_finish); + + /** + * Resume a paused scrub. Unlike abort or pause, this is instantaneous. + * Pending pause operations are cancelled (context callbacks are + * invoked with -ECANCELED). + * @returns 0 (success) if resumed, -EINVAL if an abort is in-progress. + */ + bool scrub_resume(); + + /** + * Get the current scrub status as human readable string. Some basic + * information is returned such as number of inodes pending abort/pause. + */ + void scrub_status(Formatter *f); + private: + // scrub abort is _not_ a state, rather it's an operation that's + // performed after in-progress scrubs are finished. + enum State { + STATE_RUNNING = 0, + STATE_IDLE, + STATE_PAUSING, + STATE_PAUSED, + }; + friend std::ostream &operator<<(std::ostream &os, const State &state); + + State state = STATE_IDLE; + bool clear_inode_stack = false; + + // list of pending context completions for asynchronous scrub + // control operations. + std::list control_ctxs; + + // list of inodes for which scrub operations are running -- used + // to diplay out in `scrub status`. + std::set scrub_origins; + /** * Put the inode at either the top or bottom of the stack, with * the given scrub params, and then try and kick off more scrubbing. @@ -185,6 +247,28 @@ private: */ bool get_next_cdir(CInode *in, CDir **new_dir); + /** + * Set scrub state + * @param next_state State to move the scrub to. + */ + void set_state(State next_state); + + /** + * Is scrub in one of transition states (running, pausing) + */ + bool scrub_in_transition_state(); + + /** + * complete queued up contexts + * @param r return value to complete contexts. + */ + void complete_control_contexts(int r); + + /** + * Abort pending scrubs for inodes waiting in the inode stack. + * Completion context is complete with -ECANCELED. + */ + void abort_pending_scrubs(); }; #endif /* SCRUBSTACK_H_ */ -- 2.39.5