Current code sends resolve messages when resolving MDS set changes.
There is no need to send resolve messages when some MDS leave the
resolve stage. Sending message while some MDS are replaying is also
not very useful.
Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
int from = m->get_source().num();
if (mds->get_state() < MDSMap::STATE_RESOLVE) {
+ if (mds->get_want_state() == CEPH_MDS_STATE_RESOLVE) {
+ mds->wait_for_resolve(new C_MDS_RetryMessage(mds, m));
+ return;
+ }
// wait until we reach the resolve stage!
m->put();
return;
// RESOLVE
// is someone else newly resolving?
if (is_resolve() || is_rejoin() || is_clientreplay() || is_active() || is_stopping()) {
- set<int> oldresolve, resolve;
- oldmap->get_mds_set(oldresolve, MDSMap::STATE_RESOLVE);
- mdsmap->get_mds_set(resolve, MDSMap::STATE_RESOLVE);
- if (oldresolve != resolve) {
- dout(10) << " resolve set is " << resolve << ", was " << oldresolve << dendl;
+ if (!oldmap->is_resolving() && mdsmap->is_resolving()) {
+ set<int> oldresolve, resolve;
+ mdsmap->get_mds_set(resolve, MDSMap::STATE_RESOLVE);
+ dout(10) << " resolve set is " << resolve << dendl;
calc_recovery_set();
- if (!mdsmap->is_any_failed())
- mdcache->send_resolves();
+ mdcache->send_resolves();
}
}
reopen_log();
mdcache->resolve_start();
+ finish_contexts(g_ceph_context, waiting_for_resolve);
}
void MDS::resolve_done()
{
int state; // my confirmed state
int want_state; // the state i want
- list<Context*> waiting_for_active, waiting_for_replay, waiting_for_reconnect;
+ list<Context*> waiting_for_active, waiting_for_replay, waiting_for_reconnect, waiting_for_resolve;
list<Context*> replay_queue;
map<int, list<Context*> > waiting_for_active_peer;
list<Message*> waiting_for_nolaggy;
void wait_for_reconnect(Context *c) {
waiting_for_reconnect.push_back(c);
}
+ void wait_for_resolve(Context *c) {
+ waiting_for_resolve.push_back(c);
+ }
void wait_for_mdsmap(epoch_t e, Context *c) {
waiting_for_mdsmap[e].push_back(c);
}
bool is_any_failed() {
return failed.size();
}
+ bool is_resolving() {
+ return
+ get_num_mds(STATE_RESOLVE) > 0 &&
+ get_num_mds(STATE_REPLAY) == 0 &&
+ failed.empty();
+ }
bool is_rejoining() {
// nodes are rejoining cache state
return