}));
return;
} else if (command == "scrub abort") {
+ if (whoami != 0) {
+ *css << "Not rank 0";
+ r = -EXDEV;
+ goto out;
+ }
+
finisher->queue(
new LambdaContext(
[this, on_finish, f](int r) {
}));
return;
} else if (command == "scrub pause") {
+ if (whoami != 0) {
+ *css << "Not rank 0";
+ r = -EXDEV;
+ goto out;
+ }
+
finisher->queue(
new LambdaContext(
[this, on_finish, f](int r) {
}));
return;
} else if (command == "scrub resume") {
+ if (whoami != 0) {
+ *css << "Not rank 0";
+ r = -EXDEV;
+ goto out;
+ }
command_scrub_resume(f);
} else if (command == "scrub status") {
command_scrub_status(f);
clear_stack = false;
}
+void ScrubStack::send_state_message(int op) {
+ MDSRank *mds = mdcache->mds;
+ set<mds_rank_t> up_mds;
+ mds->get_mds_map()->get_up_mds_set(up_mds);
+ for (auto& r : up_mds) {
+ if (r == 0)
+ continue;
+ auto m = make_message<MMDSScrub>(op);
+ mds->send_message_mds(m, r);
+ }
+}
+
void ScrubStack::scrub_abort(Context *on_finish) {
ceph_assert(ceph_mutex_is_locked_by_me(mdcache->mds->mds_lock));
- ceph_assert(on_finish != nullptr);
dout(10) << __func__ << ": aborting with " << scrubs_in_progress
<< " scrubs in progress and " << stack_size << " in the"
<< " stack" << dendl;
+ if (mdcache->mds->get_nodeid() == 0) {
+ scrub_epoch_last_abort = scrub_epoch;
+ scrub_any_peer_aborting = true;
+ send_state_message(MMDSScrub::OP_ABORT);
+ }
+
clear_stack = true;
if (scrub_in_transition_state()) {
- control_ctxs.push_back(on_finish);
+ if (on_finish)
+ control_ctxs.push_back(on_finish);
return;
}
abort_pending_scrubs();
- if (state != STATE_PAUSED) {
+ if (state != STATE_PAUSED)
set_state(STATE_IDLE);
- }
- on_finish->complete(0);
+
+ if (on_finish)
+ on_finish->complete(0);
}
void ScrubStack::scrub_pause(Context *on_finish) {
ceph_assert(ceph_mutex_is_locked_by_me(mdcache->mds->mds_lock));
- ceph_assert(on_finish != nullptr);
dout(10) << __func__ << ": pausing with " << scrubs_in_progress
<< " scrubs in progress and " << stack_size << " in the"
<< " stack" << dendl;
+ if (mdcache->mds->get_nodeid() == 0)
+ send_state_message(MMDSScrub::OP_PAUSE);
+
// abort is in progress
if (clear_stack) {
- on_finish->complete(-EINVAL);
+ if (on_finish)
+ on_finish->complete(-EINVAL);
return;
}
bool done = scrub_in_transition_state();
if (done) {
set_state(STATE_PAUSING);
- control_ctxs.push_back(on_finish);
+ if (on_finish)
+ control_ctxs.push_back(on_finish);
return;
}
set_state(STATE_PAUSED);
- on_finish->complete(0);
+ if (on_finish)
+ on_finish->complete(0);
}
bool ScrubStack::scrub_resume() {
ceph_assert(ceph_mutex_is_locked_by_me(mdcache->mds->mds_lock));
dout(20) << __func__ << ": state=" << state << dendl;
+ if (mdcache->mds->get_nodeid() == 0)
+ send_state_message(MMDSScrub::OP_RESUME);
+
int r = 0;
if (clear_stack) {
}
}
break;
+ case MMDSScrub::OP_ABORT:
+ scrub_abort(nullptr);
+ break;
+ case MMDSScrub::OP_PAUSE:
+ scrub_pause(nullptr);
+ break;
+ case MMDSScrub::OP_RESUME:
+ scrub_resume();
+ break;
default:
derr << " scrub stack unknown scrub operation " << m->get_op() << dendl_impl;
ceph_abort_msg("scrub stack unknown scrub operation");
scrub_epoch = m->get_epoch();
- auto ack = make_message<MMDSScrubStats>(scrub_epoch, std::move(scrubbing_tags));
+ auto ack = make_message<MMDSScrubStats>(scrub_epoch,
+ std::move(scrubbing_tags), clear_stack);
mdcache->mds->send_message_mds(ack, 0);
if (any_finished)
auto& stat = mds_scrub_stats[from];
stat.epoch_acked = m->get_epoch();
stat.scrubbing_tags = m->get_scrubbing_tags();
+ stat.aborting = m->is_aborting();
}
}
}
void ScrubStack::advance_scrub_status()
{
- if (scrubbing_map.empty())
+ if (!scrub_any_peer_aborting && scrubbing_map.empty())
return;
MDSRank *mds = mdcache->mds;
if (up_max == 0) {
update_scrubbing = true;
+ scrub_any_peer_aborting = false;
} else if (mds_scrub_stats.size() > (size_t)(up_max)) {
+ bool any_aborting = false;
bool fully_acked = true;
for (const auto& stat : mds_scrub_stats) {
+ if (stat.aborting || stat.epoch_acked <= scrub_epoch_last_abort)
+ any_aborting = true;
if (stat.epoch_acked != scrub_epoch) {
fully_acked = false;
- break;
+ continue;
}
scrubbing_tags.insert(stat.scrubbing_tags.begin(),
stat.scrubbing_tags.end());
}
+ if (!any_aborting)
+ scrub_any_peer_aborting = false;
if (fully_acked) {
// handle_scrub_stats() reports scrub is still in-progress if it has
// forwarded any object to other mds since previous epoch. Let's assume,
void ScrubStack::handle_mds_failure(mds_rank_t mds)
{
+ if (mds == 0) {
+ scrub_abort(nullptr);
+ return;
+ }
+
bool kick = false;
for (auto it = remote_scrubs.begin(); it != remote_scrubs.end(); ) {
if (it->second.gather_set.erase(mds) &&
unsigned scrub_epoch = 2;
unsigned scrub_epoch_fully_acked = 0;
+ unsigned scrub_epoch_last_abort = 2;
+ // check if any mds is aborting scrub after mds.0 starts
+ bool scrub_any_peer_aborting = true;
struct scrub_stat_t {
unsigned epoch_acked = 0;
std::set<std::string> scrubbing_tags;
+ bool aborting = false;
};
std::vector<scrub_stat_t> mds_scrub_stats;
*/
void complete_control_contexts(int r);
+ /**
+ * ask peer mds (rank > 0) to abort/pause/resume scrubs
+ */
+ void send_state_message(int op);
+
/**
* Abort pending scrubs for inodes waiting in the inode stack.
* Completion context is complete with -ECANCELED.
static constexpr int OP_QUEUEDIR_ACK = -1;
static constexpr int OP_QUEUEINO = 2;
static constexpr int OP_QUEUEINO_ACK = -2;
+ static constexpr int OP_ABORT = 3;
+ static constexpr int OP_PAUSE = 4;
+ static constexpr int OP_RESUME = 5;
static const char *get_opname(int o) {
switch (o) {
case OP_QUEUEDIR_ACK: return "queue_dir_ack";
case OP_QUEUEINO: return "queue_ino";
case OP_QUEUEINO_ACK: return "queue_ino_ack";
+ case OP_ABORT: return "abort";
+ case OP_PAUSE: return "pause";
+ case OP_RESUME: return "resume";
default: ceph_abort(); return nullptr;
}
}
static constexpr int COMPAT_VERSION = 1;
MMDSScrub() : MMDSOp(MSG_MDS_SCRUB, HEAD_VERSION, COMPAT_VERSION) {}
+ MMDSScrub(int o)
+ : MMDSOp(MSG_MDS_SCRUB, HEAD_VERSION, COMPAT_VERSION), op(o) {}
MMDSScrub(int o, inodeno_t i, fragset_t&& _frags, std::string_view _tag,
inodeno_t _origin=inodeno_t(), bool internal_tag=false,
bool force=false, bool recursive=false, bool repair=false)
void print(ostream& o) const override {
o << "mds_scrub_stats(e" << epoch;
if (update_scrubbing)
- o << " [" << scrubbing_tags << "])";
- else
- o << ")";
+ o << " [" << scrubbing_tags << "]";
+ if (aborting)
+ o << " aborting";
+ o << ")";
}
unsigned get_epoch() const { return epoch; }
const auto& get_scrubbing_tags() const { return scrubbing_tags; }
+ bool is_aborting() const { return aborting; }
bool is_finished(const std::string& tag) const {
return update_scrubbing && !scrubbing_tags.count(tag);
}
encode(epoch, payload);
encode(scrubbing_tags, payload);
encode(update_scrubbing, payload);
+ encode(aborting, payload);
}
void decode_payload() override {
using ceph::decode;
decode(epoch, p);
decode(scrubbing_tags, p);
decode(update_scrubbing, p);
+ decode(aborting, p);
}
protected:
MMDSScrubStats(unsigned e=0) :
MMDSOp(MSG_MDS_SCRUB_STATS, HEAD_VERSION, COMPAT_VERSION),
epoch(e) {}
- MMDSScrubStats(unsigned e, std::set<std::string>&& tags) :
+ MMDSScrubStats(unsigned e, std::set<std::string>&& tags, bool abrt=false) :
MMDSOp(MSG_MDS_SCRUB_STATS, HEAD_VERSION, COMPAT_VERSION),
- epoch(e), scrubbing_tags(std::move(tags)), update_scrubbing(true) {}
- MMDSScrubStats(unsigned e, const std::set<std::string>& tags) :
+ epoch(e), scrubbing_tags(std::move(tags)), update_scrubbing(true), aborting(abrt) {}
+ MMDSScrubStats(unsigned e, const std::set<std::string>& tags, bool abrt=false) :
MMDSOp(MSG_MDS_SCRUB_STATS, HEAD_VERSION, COMPAT_VERSION),
- epoch(e), scrubbing_tags(tags), update_scrubbing(true) {}
+ epoch(e), scrubbing_tags(tags), update_scrubbing(true), aborting(abrt) {}
~MMDSScrubStats() override {}
private:
unsigned epoch;
std::set<std::string> scrubbing_tags;
bool update_scrubbing = false;
+ bool aborting = false;
template<class T, typename... Args>
friend boost::intrusive_ptr<T> ceph::make_message(Args&&... args);