From: Sage Weil Date: Sat, 2 Aug 2008 17:46:10 +0000 (-0700) Subject: mds: reconnect compiles... X-Git-Tag: v0.4~347 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=2b76325fa857796de6466cf9d522fb37a5fc2e83;p=ceph.git mds: reconnect compiles... --- diff --git a/src/TODO b/src/TODO index f888e000f2da..8c3ce9506607 100644 --- a/src/TODO +++ b/src/TODO @@ -23,11 +23,6 @@ big items snaps on mds - cap release probably needs ack by mds. or, mds needs to possibly initiate recovery on import? no, release should pbly just be acked by mds... like it was way back when... bah! -/- scatterlock recovery -- test rejoin -- client reconnect - - esp cap claim - - client snap caps - NO CAP STATE FOR SNAPPED INODES. - mds grants open access (yes/no), but there is no state, since there is no concurrency. @@ -36,6 +31,15 @@ snaps on mds - reconnect should map caps into snaprealms, and include snaprealm state, such that those can be brought in sync w/ the mds. - reconnect does _not_ need any per-cap snap-related info. +- mds open within snapshot +- client snap read + + +- test rejoin +- client reconnect + - esp cap claim + + /- call open_parents() where needed. - what about during recovery? e.g. client reconnected caps... diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 8d4cff86b5cb..1ccb4298a767 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -3439,14 +3439,13 @@ void MDCache::rejoin_gather_finish() ++p) { CInode *in = get_inode(p->first); assert(in); - reconnected_caps.insert(in); for (map >::iterator q = p->second.begin(); q != p->second.end(); ++q) for (map::iterator r = q->second.begin(); r != q->second.end(); ++r) { - add_reconnected_cap(in, q->first, inodeno_t(q->second.snaprealm)); + add_reconnected_cap(in, q->first, inodeno_t(r->second.snaprealm)); if (r->first >= 0) rejoin_import_cap(in, q->first, r->second, r->first); } @@ -3474,15 +3473,13 @@ void MDCache::process_reconnected_caps() { dout(10) << "process_reconnected_caps" << dendl; - set have_open_snap_parents; - map splits; // adjust filelock state appropriately - for (map >::iterator p = reconnected_caps.begin(); - p != reconnected_caps.end(); - ++p) { + map >::iterator p = reconnected_caps.begin(); + while (p != reconnected_caps.end()) { CInode *in = p->first; + p++; int issued = in->get_caps_issued(); if (in->is_auth()) { // wr? @@ -3506,20 +3503,24 @@ void MDCache::process_reconnected_caps() << " on " << *in << dendl; SnapRealm *realm = in->find_snaprealm(); - if (!missing_snap_parents.count(realm->inode) && - !have_open_snap_parents.count(realm->inode)) { - if (realm->have_past_parents_open()) { - dout(10) << " have past snap parents for realm " << *realm << " on " << *realm->inode << dendl; - have_open_snap_parents.insert(realm->inode); - } else { - dout(10) << " MISSING past snap parents for realm " << *realm << " on " << *realm->inode << dendl; - missing_snap_parents.insert(realm->inode); + + // is this realm's parents fully open? + if (realm->have_past_parents_open()) { + dout(10) << " have past snap parents for realm " << *realm + << " on " << *realm->inode << dendl; + } else { + if (!missing_snap_parents.count(realm->inode)) { + dout(10) << " MISSING past snap parents for realm " << *realm + << " on " << *realm->inode << dendl; realm->inode->get(CInode::PIN_OPENINGSNAPPARENTS); + missing_snap_parents[realm->inode].size(); // just to get it into the map! + } else { + dout(10) << " (already) MISSING past snap parents for realm " << *realm + << " on " << *realm->inode << dendl; } } // also, make sure client's cap is in the correct snaprealm. - SnapRealm *realm = in->find_snaprealm(); for (map::iterator q = p->second.begin(); q != p->second.end(); q++) { @@ -3528,19 +3529,55 @@ void MDCache::process_reconnected_caps() } else { dout(15) << " client" << q->first << " has wrong realm " << q->second << " != " << realm->inode->ino() << dendl; - MClientSnap *snap; - if (splits.count(q->first) == 0) { - splits[q->first] = snap = new MClientSnap(CEPH_SNAP_OP_SPLIT); - snap->split = realm->inode->ino(); - realm->build_snap_trace_maybe(snap->bl); - } else - snap = splits[q->first]; - snap->split_inos.push_back(in->ino()); + if (realm->have_past_parents_open()) { + // ok, include in a split message _now_. + prepare_realm_split(realm, q->first, in->ino(), splits); + } else { + // send the split later. + missing_snap_parents[realm->inode][q->first].insert(in->ino()); + } } - } + } + } + reconnected_caps.clear(); + + send_realm_splits(splits); +} + +void MDCache::prepare_realm_split(SnapRealm *realm, int client, inodeno_t ino, + map& splits) +{ + MClientSnap *snap; + if (splits.count(client) == 0) { + splits[client] = snap = new MClientSnap(CEPH_SNAP_OP_SPLIT); + snap->split = realm->inode->ino(); + realm->build_snap_trace(snap->bl); + } else + snap = splits[client]; + snap->split_inos.push_back(ino); +} + +void MDCache::send_realm_splits(map& splits) +{ + dout(10) << "send_realm_splits" << dendl; + + for (map::iterator p = splits.begin(); + p != splits.end(); + p++) { + Session *session = mds->sessionmap.get_session(entity_name_t::CLIENT(p->first)); + if (session) { + dout(10) << " client" << p->first << " split " << p->second->split + << " inos " << p->second->split_inos << dendl; + mds->send_message_client(p->second, session->inst); + } else { + dout(10) << " no session for client" << p->first << dendl; + delete p->second; + } } + splits.clear(); } + void MDCache::rejoin_import_cap(CInode *in, int client, ceph_mds_cap_reconnect& icr, int frommds) { dout(10) << "rejoin_import_cap for client" << client << " from mds" << frommds @@ -3581,7 +3618,7 @@ void MDCache::do_cap_import(Session *session, CInode *in, Capability *cap) in->auth_pin(this); cap->inc_suppress(); delayed_imported_caps[client].insert(in); - missing_snap_parents.insert(in); + missing_snap_parents[in].size(); } } @@ -3624,14 +3661,25 @@ void MDCache::open_snap_parents() { dout(10) << "open_snap_parents" << dendl; + map splits; C_Gather *gather = new C_Gather; - set::iterator p = missing_snap_parents.begin(); + map > >::iterator p = missing_snap_parents.begin(); while (p != missing_snap_parents.end()) { - CInode *in = *p; + CInode *in = p->first; assert(in->snaprealm); if (in->snaprealm->open_parents(gather->new_sub())) { dout(10) << " past parents now open on " << *in << dendl; + + // include in a (now safe) snap split? + for (map >::iterator q = p->second.begin(); + q != p->second.end(); + q++) + for (set::iterator r = q->second.begin(); + r != q->second.end(); + r++) + prepare_realm_split(in->snaprealm, q->first, *r, splits); + missing_snap_parents.erase(p++); in->put(CInode::PIN_OPENINGSNAPPARENTS); @@ -3651,6 +3699,8 @@ void MDCache::open_snap_parents() } } + send_realm_splits(splits); + if (gather->get_num()) { dout(10) << "open_snap_parents - waiting for " << gather->get_num() << dendl; gather->set_finisher(new C_MDC_OpenSnapParents(this)); diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index ca7c981ce4ae..da852d47d4dd 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -49,6 +49,7 @@ class MLock; class Message; class MClientRequest; class MMDSSlaveRequest; +class MClientSnap; class MMDSFragmentNotify; @@ -662,17 +663,22 @@ public: reconnected_snaprealms[ino][client] = seq; } void process_reconnected_caps(); + void prepare_realm_split(SnapRealm *realm, int client, inodeno_t ino, + map& splits); + void send_realm_splits(map& splits); void rejoin_import_cap(CInode *in, int client, ceph_mds_cap_reconnect& icr, int frommds); void finish_snaprealm_reconnect(int client, SnapRealm *realm, snapid_t seq); // cap imports. delayed snap parent opens. - set missing_snap_parents; + // realm inode -> client -> cap inodes needing to split to this realm + map > > missing_snap_parents; map > delayed_imported_caps; void do_cap_import(Session *session, CInode *in, Capability *cap); void do_delayed_cap_imports(); void open_snap_parents(); + friend class Locker; diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 66485f0a57b6..bd4f6a6e06a2 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -401,7 +401,7 @@ void Server::handle_client_reconnect(MClientReconnect *m) dout(15) << "open caps on " << *in << dendl; Capability *cap = in->reconnect_cap(from, p->second.capinfo); session->touch_cap(cap); - mds->mdcache->add_reconnected_cap(in); + mds->mdcache->add_reconnected_cap(in, from, inodeno_t(p->second.capinfo.snaprealm)); continue; } diff --git a/src/messages/MClientFileCaps.h b/src/messages/MClientFileCaps.h index 043722c65f42..8340b3ac4b17 100644 --- a/src/messages/MClientFileCaps.h +++ b/src/messages/MClientFileCaps.h @@ -41,9 +41,6 @@ class MClientFileCaps : public Message { int get_migrate_seq() { return h.migrate_seq; } int get_op() { return h.op; } - //inodeno_t get_snap_realm() { return inodeno_t(h.snap_realm); } - //snapid_t get_snap_created() { return snapid_t(h.snap_created); } - //snapid_t get_snap_highwater() { return snapid_t(h.snap_highwater); } snapid_t get_snap_follows() { return snapid_t(h.snap_follows); } void set_snap_follows(snapid_t s) { h.snap_follows = s; }