From: Yan, Zheng Date: Wed, 13 Mar 2013 08:23:30 +0000 (+0800) Subject: mds: compose and send resolve messages in batch X-Git-Tag: v0.62~120^2~29 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=30dbb1d4e5e740b11c41d3c33af9198370b2b248;p=ceph.git mds: compose and send resolve messages in batch Resolve messages for all MDS are the same, so we can compose and send them in batch. Signed-off-by: Yan, Zheng Reviewed-by: Greg Farnum --- diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index f42c018352f4..d3a15981a124 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -2436,10 +2436,6 @@ void MDCache::resolve_start() if (rootdir) adjust_subtree_auth(rootdir, CDIR_AUTH_UNKNOWN); } - - for (map >::iterator p = uncommitted_slave_updates.begin(); - p != uncommitted_slave_updates.end(); ++p) - need_resolve_ack.insert(p->first); } void MDCache::send_resolves() @@ -2448,9 +2444,10 @@ void MDCache::send_resolves() got_resolve.clear(); other_ambiguous_imports.clear(); - if (!need_resolve_ack.empty()) { - for (set::iterator p = need_resolve_ack.begin(); p != need_resolve_ack.end(); ++p) - send_slave_resolve(*p); + send_slave_resolves(); + if (!resolve_ack_gather.empty()) { + dout(10) << "send_resolves still waiting for resolve ack from (" + << need_resolve_ack << ")" << dendl; return; } if (!need_resolve_rollback.empty()) { @@ -2458,95 +2455,74 @@ void MDCache::send_resolves() << need_resolve_rollback << ")" << dendl; return; } - assert(uncommitted_slave_updates.empty()); - for (set::iterator p = recovery_set.begin(); p != recovery_set.end(); ++p) { - int who = *p; - if (who == mds->whoami) - continue; - if (migrator->is_importing() || - migrator->is_exporting()) - send_resolve_later(who); - else - send_resolve_now(who); - } -} - -void MDCache::send_resolve_later(int who) -{ - dout(10) << "send_resolve_later to mds." << who << dendl; - wants_resolve.insert(who); + send_subtree_resolves(); } -void MDCache::maybe_send_pending_resolves() +void MDCache::send_slave_resolves() { - if (wants_resolve.empty()) - return; // nothing to send. - - // only if it's appropriate! - if (migrator->is_exporting() || - migrator->is_importing()) { - dout(7) << "maybe_send_pending_resolves waiting, imports/exports still in progress" << dendl; - migrator->show_importing(); - migrator->show_exporting(); - return; // not now - } - - // ok, send them. - for (set::iterator p = wants_resolve.begin(); - p != wants_resolve.end(); - ++p) - send_resolve_now(*p); - wants_resolve.clear(); -} + dout(10) << "send_slave_resolves" << dendl; + map resolves; -class C_MDC_SendResolve : public Context { - MDCache *mdc; - int who; -public: - C_MDC_SendResolve(MDCache *c, int w) : mdc(c), who(w) { } - void finish(int r) { - mdc->send_resolve_now(who); - } -}; - -void MDCache::send_slave_resolve(int who) -{ - dout(10) << "send_slave_resolve to mds." << who << dendl; - MMDSResolve *m = new MMDSResolve; - - // list prepare requests lacking a commit - // [active survivor] - for (hash_map::iterator p = active_requests.begin(); - p != active_requests.end(); - ++p) { - if (p->second->is_slave() && p->second->slave_to_mds == who) { - dout(10) << " including uncommitted " << *p->second << dendl; - m->add_slave_request(p->first); + if (mds->is_resolve()) { + for (map >::iterator p = uncommitted_slave_updates.begin(); + p != uncommitted_slave_updates.end(); + ++p) { + resolves[p->first] = new MMDSResolve; + for (map::iterator q = p->second.begin(); + q != p->second.end(); + ++q) { + dout(10) << " including uncommitted " << q->first << dendl; + resolves[p->first]->add_slave_request(q->first); + } } - } - // [resolving] - if (uncommitted_slave_updates.count(who) && - !uncommitted_slave_updates[who].empty()) { - for (map::iterator p = uncommitted_slave_updates[who].begin(); - p != uncommitted_slave_updates[who].end(); - ++p) { - dout(10) << " including uncommitted " << p->first << dendl; - m->add_slave_request(p->first); + } else { + set resolve_set; + mds->mdsmap->get_mds_set(resolve_set, MDSMap::STATE_RESOLVE); + for (hash_map::iterator p = active_requests.begin(); + p != active_requests.end(); + ++p) { + if (!p->second->is_slave() || !p->second->slave_did_prepare()) + continue; + int master = p->second->slave_to_mds; + if (resolve_set.count(master)) { + dout(10) << " including uncommitted " << *p->second << dendl; + if (!resolves.count(master)) + resolves[master] = new MMDSResolve; + resolves[master]->add_slave_request(p->first); + } } } - assert(!m->slave_requests.empty()); - dout(10) << " will need resolve ack from mds." << who << dendl; - mds->send_message_mds(m, who); + for (map::iterator p = resolves.begin(); + p != resolves.end(); + ++p) { + dout(10) << "sending slave resolve to mds." << p->first << dendl; + mds->send_message_mds(p->second, p->first); + need_resolve_ack.insert(p->first); + } } -void MDCache::send_resolve_now(int who) +void MDCache::send_subtree_resolves() { - dout(10) << "send_resolve_now to mds." << who << dendl; - MMDSResolve *m = new MMDSResolve; + dout(10) << "send_subtree_resolves" << dendl; - show_subtrees(); + if (migrator->is_exporting() || migrator->is_importing()) { + dout(7) << "send_subtree_resolves waiting, imports/exports still in progress" << dendl; + migrator->show_importing(); + migrator->show_exporting(); + resolves_pending = true; + return; // not now + } + + map resolves; + for (set::iterator p = recovery_set.begin(); + p != recovery_set.end(); + ++p) { + if (*p == mds->whoami) + continue; + resolves[*p] = new MMDSResolve; + } // known for (map >::iterator p = subtrees.begin(); @@ -2566,22 +2542,30 @@ void MDCache::send_resolve_now(int who) set bounds; get_subtree_bounds(dir, bounds); vector dfls; - for (set::iterator p = bounds.begin(); p != bounds.end(); ++p) - dfls.push_back((*p)->dirfrag()); - m->add_ambiguous_import(dir->dirfrag(), dfls); + for (set::iterator q = bounds.begin(); q != bounds.end(); ++q) + dfls.push_back((*q)->dirfrag()); + for (map::iterator q = resolves.begin(); + q != resolves.end(); + ++q) + resolves[q->first]->add_ambiguous_import(dir->dirfrag(), dfls); dout(10) << " ambig " << dir->dirfrag() << " " << dfls << dendl; } else { // not ambiguous. - m->add_subtree(dir->dirfrag()); - + for (map::iterator q = resolves.begin(); + q != resolves.end(); + ++q) + resolves[q->first]->add_subtree(dir->dirfrag()); // bounds too vector dfls; for (set::iterator q = subtrees[dir].begin(); q != subtrees[dir].end(); ++q) { CDir *bound = *q; - m->add_subtree_bound(dir->dirfrag(), bound->dirfrag()); dfls.push_back(bound->dirfrag()); + for (map::iterator r = resolves.begin(); + r != resolves.end(); + ++r) + resolves[r->first]->add_subtree_bound(dir->dirfrag(), bound->dirfrag()); } dout(10) << " claim " << dir->dirfrag() << " " << dfls << dendl; } @@ -2591,15 +2575,23 @@ void MDCache::send_resolve_now(int who) for (map >::iterator p = my_ambiguous_imports.begin(); p != my_ambiguous_imports.end(); ++p) { - m->add_ambiguous_import(p->first, p->second); + for (map::iterator q = resolves.begin(); + q != resolves.end(); + ++q) + resolves[q->first]->add_ambiguous_import(p->first, p->second); dout(10) << " ambig " << p->first << " " << p->second << dendl; } // send - mds->send_message_mds(m, who); + for (map::iterator p = resolves.begin(); + p != resolves.end(); + ++p) { + dout(10) << "sending subtee resolve to mds." << p->first << dendl; + mds->send_message_mds(p->second, p->first); + } + resolves_pending = false; } - void MDCache::handle_mds_failure(int who) { dout(7) << "handle_mds_failure mds." << who << dendl; @@ -2635,7 +2627,6 @@ void MDCache::handle_mds_failure(int who) // slave to the failed node? if (p->second->slave_to_mds == who) { if (p->second->slave_did_prepare()) { - need_resolve_ack.insert(who); dout(10) << " slave request " << *p->second << " uncommitted, will resolve shortly" << dendl; } else { dout(10) << " slave request " << *p->second << " has no prepare, finishing up" << dendl; @@ -3015,7 +3006,7 @@ void MDCache::handle_resolve_ack(MMDSResolveAck *ack) need_resolve_ack.erase(from); if (need_resolve_ack.empty() && need_resolve_rollback.empty()) { - send_resolves(); + send_subtree_resolves(); process_delayed_resolve(); } @@ -3082,7 +3073,7 @@ void MDCache::finish_rollback(metareqid_t reqid) { finish_uncommitted_slave_update(reqid, need_resolve_rollback[reqid]); need_resolve_rollback.erase(reqid); if (need_resolve_ack.empty() && need_resolve_rollback.empty()) { - send_resolves(); + send_subtree_resolves(); process_delayed_resolve(); } } diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index 1666a3dff8d4..f646e91866de 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -328,6 +328,7 @@ protected: friend class ESlaveUpdate; friend class ECommitted; + bool resolves_pending; set wants_resolve; // nodes i need to send my resolve to set got_resolve; // nodes i got resolves from set need_resolve_ack; // nodes i need a resolve_ack from @@ -367,10 +368,12 @@ public: void finish_ambiguous_import(dirfrag_t dirino); void resolve_start(); void send_resolves(); - void send_slave_resolve(int who); - void send_resolve_now(int who); - void send_resolve_later(int who); - void maybe_send_pending_resolves(); + void send_slave_resolves(); + void send_subtree_resolves(); + void maybe_send_pending_resolves() { + if (resolves_pending) + send_subtree_resolves(); + } void _move_subtree_map_bound(dirfrag_t df, dirfrag_t oldparent, dirfrag_t newparent, map >& subtrees);