From: Sage Weil Date: Thu, 5 Jun 2008 14:14:45 +0000 (-0700) Subject: mds: slight cleanup of client reconnect failures X-Git-Tag: v0.3~166 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=6c2f711cc2238d2ece5871d1ef27021672c994dd;p=ceph.git mds: slight cleanup of client reconnect failures --- diff --git a/src/TODO b/src/TODO index 3f5ce04a0d3..bfd05ba6e79 100644 --- a/src/TODO +++ b/src/TODO @@ -82,13 +82,12 @@ mds - lease length heuristics - mds lock last_change stamp? -- can we get rid of the dirlock remote auth_pin weirdness on subtree roots? +- handle slow client reconnect (i.e. after mds has gone active) - fix reconnect/rejoin open file weirdness - get rid of C*Discover objects for replicate_to .. encode to bufferlists directly? -- failure during reconnect vs clientmap. - +- can we get rid of the dirlock remote auth_pin weirdness on subtree roots? - anchor_destroy needs to xlock linklock.. which means it needs a Mutation wrapper? - ... when it gets a caller.. someday.. diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index fbb23bd580d..9a88a8f527e 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -2923,10 +2923,12 @@ void MDCache::queue_file_recover(CInode *in) */ void MDCache::identify_files_to_recover() { + /* no. we may have failed a reconnect, then crashed before recovering all sizes.. if (!mds->server->failed_reconnects) { dout(10) << "identify_files_to_recover -- all clients reconnected, nothing to do" << dendl; return; } + */ dout(10) << "identify_files_to_recover" << dendl; for (hash_map::iterator p = inode_map.begin(); diff --git a/src/mds/MDS.cc b/src/mds/MDS.cc index 8a6d9762956..5e53bec9cd1 100644 --- a/src/mds/MDS.cc +++ b/src/mds/MDS.cc @@ -1237,9 +1237,6 @@ void MDS::ms_handle_failure(Message *m, const entity_inst_t& inst) mds_lock.Lock(); dout(0) << "ms_handle_failure to " << inst << " on " << *m << dendl; - if (m->get_type() == CEPH_MSG_MDS_MAP && m->get_dest().is_client()) - server->client_reconnect_failure(m->get_dest().num()); - delete m; mds_lock.Unlock(); } diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 8dbaa739e51..c1820c41cbb 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -461,18 +461,6 @@ void Server::process_reconnected_caps() } -void Server::client_reconnect_failure(int from) -{ - dout(5) << "client_reconnect_failure on client" << from << dendl; - if (mds->is_reconnect() && - client_reconnect_gather.count(from)) { - failed_reconnects++; - client_reconnect_gather.erase(from); - if (client_reconnect_gather.empty()) - reconnect_gather_finish(); - } -} - void Server::reconnect_gather_finish() { dout(7) << "reconnect_gather_finish. failed on " << failed_reconnects << " clients" << dendl; @@ -489,10 +477,18 @@ void Server::reconnect_tick() for (set::iterator p = client_reconnect_gather.begin(); p != client_reconnect_gather.end(); p++) { + Session *session = mds->sessionmap.get_session(entity_name_t::CLIENT(*p)); + dout(1) << "reconnect gave up on " << session->inst << dendl; + + /* no, we need to respect g_conf.mds_session_autoclose + // since we are reconnecting, cheat a bit and don't project anything. + mds->sessionmap.projected++; + mds->sessionmap.version++; + mdlog->submit_entry(new ESession(session->inst, false, mds->sessionmap.version)); + mds->messenger->mark_down(session->inst.addr); + */ + failed_reconnects++; - dout(1) << "reconnect gave up on " - << mds->sessionmap.get_inst(entity_name_t::CLIENT(*p)) - << dendl; } client_reconnect_gather.clear(); reconnect_gather_finish(); @@ -506,7 +502,7 @@ void Server::reconnect_tick() /* - * send generic response (just and error code) + * send generic response (just an error code) */ void Server::reply_request(MDRequest *mdr, int r, CInode *tracei, CDentry *tracedn) { diff --git a/src/mds/Server.h b/src/mds/Server.h index 1d213438a2f..90d768929b5 100644 --- a/src/mds/Server.h +++ b/src/mds/Server.h @@ -75,7 +75,6 @@ public: reconnected_caps.insert(in); } void process_reconnected_caps(); - void client_reconnect_failure(int from); void reconnect_gather_finish(); void reconnect_tick();