case EXPORT_LOCKING:
dout(10) << "export state=locking : dropping locks and removing auth_pin" << dendl;
num_locking_exports--;
- it->second.state = EXPORT_CANCELLED;
+ it->second.set_state(EXPORT_CANCELLED);
dir->auth_unpin(this);
break;
case EXPORT_DISCOVERING:
dout(10) << "export state=discovering : canceling freeze and removing auth_pin" << dendl;
- it->second.state = EXPORT_CANCELLED;
+ it->second.set_state(EXPORT_CANCELLED);
dir->unfreeze_tree(); // cancel the freeze
dir->auth_unpin(this);
if (notify_peer &&
case EXPORT_FREEZING:
dout(10) << "export state=freezing : canceling freeze" << dendl;
- it->second.state = EXPORT_CANCELLED;
+ it->second.set_state(EXPORT_CANCELLED);
dir->unfreeze_tree(); // cancel the freeze
if (dir->is_subtree_root())
mdcache->try_subtree_merge(dir);
// NOTE: state order reversal, warning comes after prepping
case EXPORT_WARNING:
dout(10) << "export state=warning : unpinning bounds, unfreezing, notifying" << dendl;
- it->second.state = EXPORT_CANCELLING;
+ it->second.set_state(EXPORT_CANCELLING);
// fall-thru
case EXPORT_PREPPING:
if (state != EXPORT_WARNING) {
dout(10) << "export state=prepping : unpinning bounds, unfreezing" << dendl;
- it->second.state = EXPORT_CANCELLED;
+ it->second.set_state(EXPORT_CANCELLED);
}
{
case EXPORT_EXPORTING:
dout(10) << "export state=exporting : reversing, and unfreezing" << dendl;
- it->second.state = EXPORT_CANCELLING;
+ it->second.set_state(EXPORT_CANCELLING);
export_reverse(dir, it->second);
break;
ceph_assert(export_state.count(dir) == 0);
export_state_t& stat = export_state[dir];
num_locking_exports++;
- stat.state = EXPORT_LOCKING;
+ stat.set_state(EXPORT_LOCKING);
stat.peer = dest;
stat.tid = mdr->reqid.tid;
stat.mut = mdr;
if (results.size() == 1 && results.front().first == dir) {
num_locking_exports--;
- it->second.state = EXPORT_DISCOVERING;
+ it->second.set_state(EXPORT_DISCOVERING);
// send ExportDirDiscover (ask target)
filepath path;
dir->inode->make_path(path);
ceph_assert(export_state.count(sub) == 0);
auto& stat = export_state[sub];
num_locking_exports++;
- stat.state = EXPORT_LOCKING;
+ stat.set_state(EXPORT_LOCKING);
stat.peer = dest;
stat.tid = _mdr->reqid.tid;
stat.mut = _mdr;
ceph_assert(it->second.state == EXPORT_DISCOVERING);
if (m->is_success()) {
+ // move to freezing the subtree
+ it->second.set_state(EXPORT_FREEZING);
// release locks to avoid deadlock
MDRequestRef mdr = static_cast<MDRequestImpl*>(it->second.mut.get());
ceph_assert(mdr);
}
// send.
- it->second.state = EXPORT_PREPPING;
+ it->second.set_state(EXPORT_PREPPING);
mds->send_message_mds(prep, it->second.peer);
ceph_assert(g_conf()->mds_kill_export_at != 4);
// make sure any new instantiations of caps are flushed out
ceph_assert(it->second.warning_ack_waiting.empty());
- set<client_t> export_client_set;
- get_export_client_set(dir, export_client_set);
+ ceph_assert(it->second.export_client_set.empty());
+ get_export_client_set(dir, it->second.export_client_set);
MDSGatherBuilder gather(g_ceph_context);
- mds->server->flush_client_sessions(export_client_set, gather);
+ mds->server->flush_client_sessions(it->second.export_client_set, gather);
if (gather.has_subs()) {
it->second.warning_ack_waiting.insert(MDS_RANK_NONE);
gather.set_finisher(new C_M_ExportSessionsFlushed(this, dir, it->second.tid));
}
- it->second.state = EXPORT_WARNING;
+ it->second.set_state(EXPORT_WARNING);
ceph_assert(g_conf()->mds_kill_export_at != 6);
// nobody to warn?
dout(7) << *dir << " to " << dest << dendl;
mdcache->show_subtrees();
-
- it->second.state = EXPORT_EXPORTING;
+
+ it->second.set_state(EXPORT_EXPORTING);
ceph_assert(g_conf()->mds_kill_export_at != 7);
ceph_assert(dir->is_frozen_tree_root());
auto bp = m->imported_caps.cbegin();
decode(it->second.peer_imported, bp);
- it->second.state = EXPORT_LOGGINGFINISH;
+ it->second.set_state(EXPORT_LOGGINGFINISH);
ceph_assert(g_conf()->mds_kill_export_at != 9);
set<CDir*> bounds;
mdcache->get_subtree_bounds(dir, bounds);
ceph_assert(stat.state == EXPORT_CANCELLING);
if (stat.notify_ack_waiting.empty()) {
- stat.state = EXPORT_CANCELLED;
+ stat.set_state(EXPORT_CANCELLED);
return;
}
}
// wait for notifyacks
- stat.state = EXPORT_NOTIFYING;
+ stat.set_state(EXPORT_NOTIFYING);
ceph_assert(g_conf()->mds_kill_export_at != 11);
// no notifies to wait for?
}
}
+void Migrator::dump_export_states(Formatter *f)
+{
+ f->open_array_section("states");
+ for (const auto& [dir, state] : export_state) {
+ f->open_object_section("state");
+
+ f->dump_unsigned("tid", state.tid);
+
+ dir->dump(f, CDir::DUMP_PATH | CDir::DUMP_DIRFRAG);
+
+ f->dump_string("state", get_export_statename(state.state));
+
+ f->open_object_section("state_history");
+ for (const auto& [s, _1] : state.state_history) {
+ f->open_object_section(get_export_statename(s));
+ f->dump_stream("start_at") << state.get_start_time(s);
+ f->dump_float("time_spent", state.get_time_spent(s));
+ f->close_section();
+ }
+ f->close_section();
+
+ f->dump_int("peer", state.peer);
+
+ switch (state.state) {
+ case EXPORT_DISCOVERING:
+ case EXPORT_FREEZING:
+ f->dump_stream("last_cum_auth_pins_change") << state.last_cum_auth_pins_change;
+ f->dump_int("last_cum_auth_pins", state.last_cum_auth_pins);
+ f->dump_int("num_remote_waiters", state.num_remote_waiters);
+
+ break;
+
+ case EXPORT_PREPPING:
+ case EXPORT_WARNING:
+ f->open_array_section("flushed_clients");
+ for (const auto &client : state.export_client_set)
+ f->dump_int("client", client.v);
+ f->close_section();
+
+ f->open_array_section("warning_ack_waiting");
+ for (const auto &rank : state.warning_ack_waiting)
+ f->dump_int("rank", rank);
+ f->close_section();
+
+ if (state.state == EXPORT_PREPPING)
+ break;
+ // fall-thru
+
+ case EXPORT_EXPORTING:
+ case EXPORT_LOGGINGFINISH:
+ case EXPORT_NOTIFYING:
+ f->open_array_section("notify_ack_waiting");
+ for (const auto &rank : state.notify_ack_waiting)
+ f->dump_int("rank", rank);
+ f->close_section();
+
+ break;
+
+ default:
+ break;
+ }
+
+ if (state.state >= EXPORT_DISCOVERING) {
+ f->dump_unsigned("approx_size", state.approx_size);
+ f->dump_unsigned("unfreeze_tree_waiters", dir->count_unfreeze_tree_waiters());
+ f->dump_float("freeze_tree_time", state.get_freeze_tree_time());
+ }
+
+ f->close_section();
+ }
+ f->close_section();
+}
+
void Migrator::decode_import_inode(CDentry *dn, bufferlist::const_iterator& blp,
mds_rank_t oldauth, LogSegment *ls,
map<CInode*, map<client_t,Capability::Export> >& peer_exports,
void import_finish(CDir *dir, bool notify, bool last=true);
+ void dump_export_states(Formatter *f);
+
protected:
struct export_base_t {
export_base_t(dirfrag_t df, mds_rank_t d, unsigned c, uint64_t g) :
struct export_state_t {
export_state_t() {}
- int state = 0;
+ void set_state(int s) {
+ ceph_assert(s != state);
+ if (state != EXPORT_CANCELLED) {
+ auto& t = state_history.at(state);
+ t.second = double(ceph_clock_now()) - double(t.first);
+ }
+ state = s;
+ state_history[state] = std::pair<utime_t, double>(ceph_clock_now(), 0.0);
+ }
+ utime_t get_start_time(int s) const {
+ ceph_assert(state_history.count(s) > 0);
+ return state_history.at(s).first;
+ }
+ double get_time_spent(int s) const {
+ ceph_assert(state_history.count(s) > 0);
+ const auto& t = state_history.at(s);
+ return s == state ? double(ceph_clock_now()) - double(t.first) : t.second;
+ }
+ double get_freeze_tree_time() const {
+ ceph_assert(state >= EXPORT_DISCOVERING);
+ ceph_assert(state_history.count((int)EXPORT_DISCOVERING) > 0);
+ return double(ceph_clock_now()) - double(state_history.at((int)EXPORT_DISCOVERING).first);
+ };
+
+ int state = EXPORT_CANCELLED;
mds_rank_t peer = MDS_RANK_NONE;
uint64_t tid = 0;
std::set<mds_rank_t> warning_ack_waiting;
std::map<inodeno_t,std::map<client_t,Capability::Import> > peer_imported;
MutationRef mut;
size_t approx_size = 0;
+ // record the start time and time spent of each export state
+ std::map<int, std::pair<utime_t, double> > state_history;
+ // record the clients whose sessions need to be flushed
+ std::set<client_t> export_client_set;
// for freeze tree deadlock detection
utime_t last_cum_auth_pins_change;
int last_cum_auth_pins = 0;