From b212b34d4c06112db8029b763a2f37bfcab5fa5e Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 1 Aug 2008 16:37:09 -0700 Subject: [PATCH] mds: lots of reconnect fun --- src/client/Client.cc | 2 +- src/include/Context.h | 12 +- src/include/ceph_fs.h | 2 + src/mds/CInode.h | 2 + src/mds/Capability.h | 9 +- src/mds/Locker.cc | 7 +- src/mds/Locker.h | 2 +- src/mds/MDCache.cc | 257 +++++++++++++++++++++++++++++++++++------- src/mds/MDCache.h | 26 ++++- src/mds/Migrator.cc | 12 +- src/mds/Server.cc | 64 ++++------- src/mds/Server.h | 5 - src/mds/snap.cc | 19 +++- src/mds/snap.h | 2 +- 14 files changed, 304 insertions(+), 117 deletions(-) diff --git a/src/client/Client.cc b/src/client/Client.cc index de01689b104c6..ec2e7d08e1190 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -1752,7 +1752,7 @@ void Client::handle_snap(MClientSnap *m) } } - update_snap_trace(m->bl, m->op == CEPH_SNAP_OP_CREATE); + update_snap_trace(m->bl, m->op != CEPH_SNAP_OP_DESTROY); if (realm) { for (list::iterator p = to_move.begin(); p != to_move.end(); p++) { diff --git a/src/include/Context.h b/src/include/Context.h index e5c74de6cb6e5..d68fea99ed2d8 100644 --- a/src/include/Context.h +++ b/src/include/Context.h @@ -112,8 +112,14 @@ public: public: C_GatherSub(C_Gather *g, int n) : gather(g), num(n) {} void finish(int r) { - if (gather->sub_finish(num)) + if (gather->sub_finish(num)) { delete gather; // last one! + gather = 0; + } + } + ~C_GatherSub() { + if (gather) + gather->rm_sub(num); } }; @@ -140,6 +146,10 @@ public: waitfor.insert(num); return new C_GatherSub(this, num); } + void rm_sub(int n) { + num--; + waitfor.erase(n); + } bool empty() { return num == 0; } int get_num() { return num; } diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h index 4ba2c3ef5714e..c66ee8069a38e 100644 --- a/src/include/ceph_fs.h +++ b/src/include/ceph_fs.h @@ -849,6 +849,7 @@ struct ceph_mds_snaprealm_reconnect { * snaps */ enum { + CEPH_SNAP_OP_UPDATE, /* CREATE or DESTROY */ CEPH_SNAP_OP_CREATE, CEPH_SNAP_OP_DESTROY, CEPH_SNAP_OP_SPLIT, @@ -856,6 +857,7 @@ enum { static inline const char *ceph_snap_op_name(int o) { switch (o) { + case CEPH_SNAP_OP_UPDATE: return "update"; case CEPH_SNAP_OP_CREATE: return "create"; case CEPH_SNAP_OP_DESTROY: return "destroy"; case CEPH_SNAP_OP_SPLIT: return "split"; diff --git a/src/mds/CInode.h b/src/mds/CInode.h index 89b51bc58caac..0ec80acb7fcd4 100644 --- a/src/mds/CInode.h +++ b/src/mds/CInode.h @@ -70,6 +70,7 @@ class CInode : public MDSCacheObject { static const int PIN_FROZEN = 14; static const int PIN_IMPORTINGCAPS = 15; static const int PIN_PASTSNAPPARENT = -16; + static const int PIN_OPENINGSNAPPARENTS = 17; const char *pin_name(int p) { switch (p) { @@ -88,6 +89,7 @@ class CInode : public MDSCacheObject { case PIN_FROZEN: return "frozen"; case PIN_IMPORTINGCAPS: return "importingcaps"; case PIN_PASTSNAPPARENT: return "pastsnapparent"; + case PIN_OPENINGSNAPPARENTS: return "openingsnapparents"; default: return generic_pin_name(p); } } diff --git a/src/mds/Capability.h b/src/mds/Capability.h index a5a1138b1e4ae..5d9be6f06ae3e 100644 --- a/src/mds/Capability.h +++ b/src/mds/Capability.h @@ -63,7 +63,7 @@ private: capseq_t last_open; capseq_t mseq; - bool suppress; + int suppress; bool stale; public: snapid_t client_follows; @@ -79,7 +79,7 @@ public: last_recv(s), last_open(0), mseq(0), - suppress(false), stale(false), + suppress(0), stale(false), client_follows(0), session_caps_item(this), snaprealm_caps_item(this) { } @@ -88,8 +88,9 @@ public: capseq_t get_last_open() { return last_open; } void set_last_open() { last_open = last_sent; } - bool is_suppress() { return suppress; } - void set_suppress(bool b) { suppress = b; } + bool is_suppress() { return suppress > 0; } + void inc_suppress() { suppress++; } + void dec_suppress() { suppress--; } bool is_stale() { return stale; } void set_stale(bool b) { stale = b; } diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index 15e0b9546661d..869be4d47731b 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -498,7 +498,8 @@ void Locker::file_update_finish(CInode *in, Mutation *mut, bool share) Capability* Locker::issue_new_caps(CInode *in, int mode, - Session *session) + Session *session, + bool& is_new) { dout(7) << "issue_new_caps for mode " << mode << " on " << *in << dendl; @@ -514,8 +515,10 @@ Capability* Locker::issue_new_caps(CInode *in, cap = in->add_client_cap(my_client); session->touch_cap(cap); cap->set_wanted(my_want); - cap->set_suppress(true); // suppress file cap messages for new cap (we'll bundle with the open() reply) + cap->inc_suppress(); // suppress file cap messages for new cap (we'll bundle with the open() reply) + is_new = true; } else { + is_new = false; // make sure it wants sufficient caps if (my_want & ~cap->wanted()) { // augment wanted caps for this client diff --git a/src/mds/Locker.h b/src/mds/Locker.h index 52f17177e244b..6b13800a168a6 100644 --- a/src/mds/Locker.h +++ b/src/mds/Locker.h @@ -195,7 +195,7 @@ protected: // -- file i/o -- public: version_t issue_file_data_version(CInode *in); - Capability* issue_new_caps(CInode *in, int mode, Session *session); + Capability* issue_new_caps(CInode *in, int mode, Session *session, bool& is_new); bool issue_caps(CInode *in); void issue_truncate(CInode *in); void revoke_stale_caps(Session *session); diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 384862b4d71e6..b3d382043f6d0 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -3439,7 +3439,7 @@ void MDCache::rejoin_gather_finish() ++p) { CInode *in = get_inode(p->first); assert(in); - mds->server->add_reconnected_cap_inode(in); + reconnected_caps.insert(in); for (map >::iterator q = p->second.begin(); q != p->second.end(); ++q) @@ -3450,15 +3450,76 @@ void MDCache::rejoin_gather_finish() rejoin_import_cap(in, q->first, r->second, r->first); } - mds->server->process_reconnected_caps(); + process_reconnected_caps(); identify_files_to_recover(); rejoin_send_acks(); // did we already get our acks too? // this happens when the rejoin_gather has to wait on a MISSING/FULL exchange. - if (rejoin_ack_gather.empty()) + if (rejoin_ack_gather.empty()) { mds->rejoin_done(); + + // finally, kickstart past snap parent opens + open_snap_parents(); + } +} + +/* + * choose lock states based on reconnected caps + */ +void MDCache::process_reconnected_caps() +{ + dout(10) << "process_reconnected_caps" << dendl; + + set have_open_snap_parents; + + map splits; + + // adjust filelock state appropriately + for (set::iterator p = reconnected_caps.begin(); + p != reconnected_caps.end(); + ++p) { + CInode *in = *p; + int issued = in->get_caps_issued(); + if (in->is_auth()) { + // wr? + if (issued & (CEPH_CAP_WR|CEPH_CAP_WRBUFFER)) { + if (issued & (CEPH_CAP_RDCACHE|CEPH_CAP_WRBUFFER)) { + in->filelock.set_state(LOCK_LONER); + } else { + in->filelock.set_state(LOCK_MIXED); + } + } + } else { + // note that client should perform stale/reap cleanup during reconnect. + assert((issued & (CEPH_CAP_WR|CEPH_CAP_WRBUFFER)) == 0); // ???? + if (in->filelock.is_xlocked()) + in->filelock.set_state(LOCK_LOCK); + else + in->filelock.set_state(LOCK_SYNC); // might have been lock, previously + } + dout(15) << " issued " << cap_string(issued) + << " chose " << in->filelock + << " on " << *in << dendl; + + SnapRealm *realm = in->find_snaprealm(); + if (!missing_snap_parents.count(realm->inode) && + !have_open_snap_parents.count(realm->inode)) { + if (realm->have_past_parents_open()) { + dout(10) << " have past snap parents for realm " << *realm << " on " << *realm->inode << dendl; + have_open_snap_parents.insert(realm->inode); + } else { + dout(10) << " MISSING past snap parents for realm " << *realm << " on " << *realm->inode << dendl; + missing_snap_parents.insert(realm->inode); + realm->inode->get(CInode::PIN_OPENINGSNAPPARENTS); + } + } + + // also, make sure each cap is in the correct snaprealm. + //SnapRealm *r = + + } } void MDCache::rejoin_import_cap(CInode *in, int client, ceph_mds_cap_reconnect& icr, int frommds) @@ -3472,20 +3533,139 @@ void MDCache::rejoin_import_cap(CInode *in, int client, ceph_mds_cap_reconnect& // add cap Capability *cap = in->reconnect_cap(client, icr); session->touch_cap(cap); - - // send IMPORT + + do_cap_import(session, in, cap); +} + + +// ------- +// cap imports and delayed snap parent opens + +void MDCache::do_cap_import(Session *session, CInode *in, Capability *cap) +{ + int client = session->inst.name.num(); SnapRealm *realm = in->find_snaprealm(); - MClientFileCaps *reap = new MClientFileCaps(CEPH_CAP_OP_IMPORT, - in->inode, - realm->inode->ino(), - cap->get_last_seq(), - cap->pending(), - cap->wanted(), - cap->get_mseq()); - realm->build_snap_trace(reap->snapbl); - mds->messenger->send_message(reap, session->inst); + if (realm->have_past_parents_open()) { + dout(10) << "do_cap_import " << session->inst.name << " mseq " << cap->get_mseq() << " on " << *in << dendl; + MClientFileCaps *reap = new MClientFileCaps(CEPH_CAP_OP_IMPORT, + in->inode, + realm->inode->ino(), + cap->get_last_seq(), + cap->pending(), + cap->wanted(), + cap->get_mseq()); + realm->build_snap_trace(reap->snapbl); + mds->messenger->send_message(reap, session->inst); + } else { + dout(10) << "do_cap_import missing past snap parents, delaying " << session->inst.name << " mseq " + << cap->get_mseq() << " on " << *in << dendl; + in->auth_pin(this); + cap->inc_suppress(); + delayed_imported_caps[client].insert(in); + missing_snap_parents.insert(in); + } } +void MDCache::do_delayed_cap_imports() +{ + dout(10) << "do_delayed_cap_imports" << dendl; + + map > d; + d.swap(delayed_imported_caps); + + for (map >::iterator p = d.begin(); + p != d.end(); + p++) { + for (set::iterator q = p->second.begin(); + q != p->second.end(); + q++) { + CInode *in = *q; + Session *session = mds->sessionmap.get_session(entity_name_t::CLIENT(p->first)); + if (session) { + Capability *cap = in->get_client_cap(p->first); + if (cap) { + do_cap_import(session, in, cap); // note: this may fail and requeue! + cap->dec_suppress(); + } + } + in->auth_unpin(this); + } + } +} + +struct C_MDC_OpenSnapParents : public Context { + MDCache *mdcache; + C_MDC_OpenSnapParents(MDCache *c) : mdcache(c) {} + void finish(int r) { + mdcache->open_snap_parents(); + } +}; + +void MDCache::open_snap_parents() +{ + dout(10) << "open_snap_parents" << dendl; + + C_Gather *gather = new C_Gather; + + set::iterator p = missing_snap_parents.begin(); + while (p != missing_snap_parents.end()) { + CInode *in = *p; + assert(in->snaprealm); + if (in->snaprealm->open_parents(gather->new_sub())) { + dout(10) << " past parents now open on " << *in << dendl; + missing_snap_parents.erase(p++); + + in->put(CInode::PIN_OPENINGSNAPPARENTS); + + // finish off client snaprealm reconnects? + map >::iterator q = reconnected_snaprealms.find(in->ino()); + if (q != reconnected_snaprealms.end()) { + for (map::iterator r = q->second.begin(); + r != q->second.end(); + r++) + finish_snaprealm_reconnect(r->first, in->snaprealm, r->second); + reconnected_snaprealms.erase(q); + } + } else { + dout(10) << " opening past parents on " << *in << dendl; + p++; + } + } + + if (gather->get_num()) { + dout(10) << "open_snap_parents - waiting for " << gather->get_num() << dendl; + gather->set_finisher(new C_MDC_OpenSnapParents(this)); + } else { + assert(missing_snap_parents.empty()); + assert(reconnected_snaprealms.empty()); + dout(10) << "open_snap_parents - all open" << dendl; + do_delayed_cap_imports(); + } +} + +void MDCache::finish_snaprealm_reconnect(int client, SnapRealm *realm, snapid_t seq) +{ + if (seq < realm->get_newest_seq()) { + dout(10) << "finish_snaprealm_reconnect client" << client << " has old seq " << seq << " < " + << realm->get_newest_seq() + << " on " << *realm << dendl; + // send an update + Session *session = mds->sessionmap.get_session(entity_name_t::CLIENT(client)); + if (session) { + MClientSnap *snap = new MClientSnap(CEPH_SNAP_OP_UPDATE); + realm->build_snap_trace(snap->bl); + mds->send_message_client(snap, session->inst); + } else { + dout(10) << " ...or not, no session for this client!" << dendl; + } + } else { + dout(10) << "finish_snaprealm_reconnect client" << client << " up to date" + << " on " << *realm << dendl; + } +} + + + void MDCache::rejoin_send_acks() { dout(7) << "rejoin_send_acks" << dendl; @@ -4897,7 +5077,7 @@ int MDCache::path_traverse(MDRequest *mdr, Message *req, // who // make sure snaprealm parents are open... if (cur->snaprealm && !cur->snaprealm->open && mdr && - !cur->snaprealm->open_parents(mdr)) + !cur->snaprealm->open_parents(new C_MDS_RetryRequest(this, mdr))) return 1; @@ -4931,7 +5111,7 @@ int MDCache::path_traverse(MDRequest *mdr, Message *req, // who } else { dout(7) << "remote link to " << dn->get_remote_ino() << ", which i don't have" << dendl; assert(mdr); // we shouldn't hit non-primary dentries doing a non-mdr traversal! - open_remote_ino(dn->get_remote_ino(), mdr, _get_waiter(mdr, req)); + open_remote_ino(dn->get_remote_ino(), _get_waiter(mdr, req)); if (mds->logger) mds->logger->inc("trino"); return 1; } @@ -5258,7 +5438,7 @@ CInode *MDCache::get_dentry_inode(CDentry *dn, MDRequest *mdr) return in; } else { dout(10) << "get_dentry_inode on remote dn, opening inode for " << *dn << dendl; - open_remote_ino(dn->get_remote_ino(), mdr, new C_MDS_RetryRequest(this, mdr)); + open_remote_ino(dn->get_remote_ino(), new C_MDS_RetryRequest(this, mdr)); return 0; } } @@ -5266,13 +5446,12 @@ CInode *MDCache::get_dentry_inode(CDentry *dn, MDRequest *mdr) class C_MDC_RetryOpenRemoteIno : public Context { MDCache *mdcache; inodeno_t ino; - MDRequest *mdr; Context *onfinish; public: - C_MDC_RetryOpenRemoteIno(MDCache *mdc, inodeno_t i, MDRequest *r, Context *c) : - mdcache(mdc), ino(i), mdr(r), onfinish(c) {} + C_MDC_RetryOpenRemoteIno(MDCache *mdc, inodeno_t i, Context *c) : + mdcache(mdc), ino(i), onfinish(c) {} void finish(int r) { - mdcache->open_remote_ino(ino, mdr, onfinish); + mdcache->open_remote_ino(ino, onfinish); } }; @@ -5280,21 +5459,20 @@ public: class C_MDC_OpenRemoteIno : public Context { MDCache *mdcache; inodeno_t ino; - MDRequest *mdr; Context *onfinish; public: vector anchortrace; - C_MDC_OpenRemoteIno(MDCache *mdc, inodeno_t i, MDRequest *r, Context *c) : - mdcache(mdc), ino(i), mdr(r), onfinish(c) {} + C_MDC_OpenRemoteIno(MDCache *mdc, inodeno_t i, Context *c) : + mdcache(mdc), ino(i), onfinish(c) {} C_MDC_OpenRemoteIno(MDCache *mdc, inodeno_t i, vector& at, - MDRequest *r, Context *c) : - mdcache(mdc), ino(i), mdr(r), onfinish(c), anchortrace(at) {} + Context *c) : + mdcache(mdc), ino(i), onfinish(c), anchortrace(at) {} void finish(int r) { assert(r == 0); if (r == 0) - mdcache->open_remote_ino_2(ino, mdr, anchortrace, onfinish); + mdcache->open_remote_ino_2(ino, anchortrace, onfinish); else { onfinish->finish(r); delete onfinish; @@ -5302,18 +5480,15 @@ public: } }; -void MDCache::open_remote_ino(inodeno_t ino, - MDRequest *mdr, - Context *onfinish) +void MDCache::open_remote_ino(inodeno_t ino, Context *onfinish) { dout(7) << "open_remote_ino on " << ino << dendl; - C_MDC_OpenRemoteIno *c = new C_MDC_OpenRemoteIno(this, ino, mdr, onfinish); + C_MDC_OpenRemoteIno *c = new C_MDC_OpenRemoteIno(this, ino, onfinish); mds->anchorclient->lookup(ino, c->anchortrace, c); } void MDCache::open_remote_ino_2(inodeno_t ino, - MDRequest *mdr, vector& anchortrace, Context *onfinish) { @@ -5362,7 +5537,7 @@ void MDCache::open_remote_ino_2(inodeno_t ino, if (!in->dirfragtree.contains(frag)) { dout(10) << "frag " << frag << " not valid, requerying anchortable" << dendl; - open_remote_ino(ino, mdr, onfinish); + open_remote_ino(ino, onfinish); return; } @@ -5372,14 +5547,14 @@ void MDCache::open_remote_ino_2(inodeno_t ino, dout(10) << "opening remote dirfrag " << frag << " under " << *in << dendl; /* we re-query the anchortable just to avoid a fragtree update race */ open_remote_dirfrag(in, frag, - new C_MDC_RetryOpenRemoteIno(this, ino, mdr, onfinish)); + new C_MDC_RetryOpenRemoteIno(this, ino, onfinish)); return; } if (!dir && in->is_auth()) { if (dir->is_frozen_dir()) { dout(7) << "traverse: " << *dir << " is frozen_dir, waiting" << dendl; - dir->add_waiter(CDir::WAIT_UNFREEZE, _get_waiter(mdr, 0)); + dir->add_waiter(CDir::WAIT_UNFREEZE, onfinish); return; } dir = in->get_or_open_dirfrag(this, frag); @@ -5393,19 +5568,19 @@ void MDCache::open_remote_ino_2(inodeno_t ino, << " in complete dir " << *dir << ", requerying anchortable" << dendl; - open_remote_ino(ino, mdr, onfinish); + open_remote_ino(ino, onfinish); } else { dout(10) << "need ino " << anchortrace[i].ino << ", fetching incomplete dir " << *dir << dendl; - dir->fetch(new C_MDC_OpenRemoteIno(this, ino, anchortrace, mdr, onfinish)); + dir->fetch(new C_MDC_OpenRemoteIno(this, ino, anchortrace, onfinish)); } } else { // hmm, discover. dout(10) << "have remote dirfrag " << *dir << ", discovering " << anchortrace[i].ino << dendl; discover_ino(dir, anchortrace[i].ino, - new C_MDC_OpenRemoteIno(this, ino, anchortrace, mdr, onfinish)); + new C_MDC_OpenRemoteIno(this, ino, anchortrace, onfinish)); } } @@ -6265,13 +6440,11 @@ void MDCache::handle_discover(MDiscover *dis) assert(dis->get_asker() != whoami); - /* if (mds->get_state() < MDSMap::STATE_ACTIVE) { - dout(-7) << "discover_reply NOT ACTIVE YET" << dendl; - delete dis; + dout(-7) << "discover_reply not yet active, delaying" << dendl; + mds->wait_for_active(new C_MDS_RetryMessage(mds, dis)); return; } - */ CInode *cur = 0; diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index 8b933d2c5d97b..c43461725457d 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -650,8 +650,30 @@ public: cap_imports[ino][client][frommds] = icr.capinfo; cap_import_paths[ino] = icr.path; } + + // [reconnect/rejoin caps] + set reconnected_caps; + map > reconnected_snaprealms; + + void add_reconnected_cap(CInode *in) { + reconnected_caps.insert(in); + } + void add_reconnected_snaprealm(int client, inodeno_t ino, snapid_t seq) { + reconnected_snaprealms[ino][client] = seq; + } + void process_reconnected_caps(); void rejoin_import_cap(CInode *in, int client, ceph_mds_cap_reconnect& icr, int frommds); + void finish_snaprealm_reconnect(int client, SnapRealm *realm, snapid_t seq); + // cap imports. delayed snap parent opens. + set missing_snap_parents; + map > delayed_imported_caps; + + void do_cap_import(Session *session, CInode *in, Capability *cap); + void do_delayed_cap_imports(); + void open_snap_parents(); + + friend class Locker; friend class Migrator; @@ -817,8 +839,8 @@ public: void open_remote_dirfrag(CInode *diri, frag_t fg, Context *fin); CInode *get_dentry_inode(CDentry *dn, MDRequest *mdr); - void open_remote_ino(inodeno_t ino, MDRequest *mdr, Context *fin); - void open_remote_ino_2(inodeno_t ino, MDRequest *mdr, + void open_remote_ino(inodeno_t ino, Context *fin); + void open_remote_ino_2(inodeno_t ino, vector& anchortrace, Context *onfinish); diff --git a/src/mds/Migrator.cc b/src/mds/Migrator.cc index 2b48ebda5067d..a9e6d19b4e35c 100644 --- a/src/mds/Migrator.cc +++ b/src/mds/Migrator.cc @@ -2043,17 +2043,7 @@ void Migrator::finish_import_inode_caps(CInode *in, int from, } cap->merge(it->second); - SnapRealm *realm = in->find_snaprealm(); - MClientFileCaps *caps = new MClientFileCaps(CEPH_CAP_OP_IMPORT, - in->inode, - realm->inode->ino(), - cap->get_last_seq(), - cap->pending(), - cap->wanted(), - cap->get_mseq()); - realm->build_snap_trace(caps->snapbl); - - mds->send_message_client(caps, session->inst); + mds->mdcache->do_cap_import(session, in, cap); } in->put(CInode::PIN_IMPORTINGCAPS); diff --git a/src/mds/Server.cc b/src/mds/Server.cc index d96fe02665727..66485f0a57b6e 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -375,6 +375,22 @@ void Server::handle_client_reconnect(MClientReconnect *m) new C_MDS_session_finish(mds, session, false, pv)); } else { + // snaprealms + for (map::iterator p = m->realms.begin(); + p != m->realms.end(); + p++) { + CInode *in = mdcache->get_inode(p->first); + if (in) { + assert(in->snaprealm); + if (in->snaprealm->have_past_parents_open()) + mdcache->finish_snaprealm_reconnect(from, in->snaprealm, snapid_t(p->second.seq)); + else + mdcache->add_reconnected_snaprealm(from, p->first, snapid_t(p->second.seq)); + } else { + mdcache->add_reconnected_snaprealm(from, p->first, snapid_t(p->second.seq)); + } + } + // caps for (map::iterator p = m->caps.begin(); p != m->caps.end(); @@ -385,7 +401,7 @@ void Server::handle_client_reconnect(MClientReconnect *m) dout(15) << "open caps on " << *in << dendl; Capability *cap = in->reconnect_cap(from, p->second.capinfo); session->touch_cap(cap); - reconnected_caps.insert(in); + mds->mdcache->add_reconnected_cap(in); continue; } @@ -423,47 +439,12 @@ void Server::handle_client_reconnect(MClientReconnect *m) // remove from gather set client_reconnect_gather.erase(from); - if (client_reconnect_gather.empty()) reconnect_gather_finish(); + if (client_reconnect_gather.empty()) + reconnect_gather_finish(); delete m; } -/* - * called by mdcache, late in rejoin (right before acks are sent) - */ -void Server::process_reconnected_caps() -{ - dout(10) << "process_reconnected_caps" << dendl; - - // adjust filelock state appropriately - for (set::iterator p = reconnected_caps.begin(); - p != reconnected_caps.end(); - ++p) { - CInode *in = *p; - int issued = in->get_caps_issued(); - if (in->is_auth()) { - // wr? - if (issued & (CEPH_CAP_WR|CEPH_CAP_WRBUFFER)) { - if (issued & (CEPH_CAP_RDCACHE|CEPH_CAP_WRBUFFER)) { - in->filelock.set_state(LOCK_LONER); - } else { - in->filelock.set_state(LOCK_MIXED); - } - } - } else { - // note that client should perform stale/reap cleanup during reconnect. - assert((issued & (CEPH_CAP_WR|CEPH_CAP_WRBUFFER)) == 0); // ???? - if (in->filelock.is_xlocked()) - in->filelock.set_state(LOCK_LOCK); - else - in->filelock.set_state(LOCK_SYNC); // might have been lock, previously - } - dout(15) << " issued " << cap_string(issued) - << " chose " << in->filelock - << " on " << *in << dendl; - } - reconnected_caps.clear(); // clean up -} void Server::reconnect_gather_finish() @@ -2010,7 +1991,6 @@ void Server::handle_client_readdir(MDRequest *mdr) dn->link_remote(in); } else { mdcache->open_remote_ino(dn->get_remote_ino(), - mdr, new C_MDS_RetryRequest(mdcache, mdr)); // touch everything i _do_ have @@ -4617,12 +4597,14 @@ void Server::_do_open(MDRequest *mdr, CInode *cur) if (cur->inode.is_dir()) cmode = CEPH_FILE_MODE_PIN; // register new cap - Capability *cap = mds->locker->issue_new_caps(cur, cmode, mdr->session); + bool is_new = false; + Capability *cap = mds->locker->issue_new_caps(cur, cmode, mdr->session, is_new); // drop our locks (they may interfere with us issuing new caps) mdcache->request_drop_locks(mdr); - cap->set_suppress(false); // stop suppressing messages on this cap + if (is_new) + cap->dec_suppress(); // stop suppressing messages on new cap dout(12) << "_do_open issued caps " << cap_string(cap->pending()) << " for " << req->get_source() diff --git a/src/mds/Server.h b/src/mds/Server.h index 1de2d5b751914..8fa30a54ceef2 100644 --- a/src/mds/Server.h +++ b/src/mds/Server.h @@ -58,7 +58,6 @@ public: // -- sessions and recovery -- utime_t reconnect_start; set client_reconnect_gather; // clients i need a reconnect msg from. - set reconnected_caps; void handle_client_session(class MClientSession *m); void _session_logged(Session *session, bool open, version_t pv); @@ -69,10 +68,6 @@ public: void reconnect_clients(); void handle_client_reconnect(class MClientReconnect *m); void process_reconnect_cap(CInode *in, int from, ceph_mds_cap_reconnect& capinfo); - void add_reconnected_cap_inode(CInode *in) { - reconnected_caps.insert(in); - } - void process_reconnected_caps(); void reconnect_gather_finish(); void reconnect_tick(); diff --git a/src/mds/snap.cc b/src/mds/snap.cc index cdf32ddf86e76..f1bb1c20fd5c9 100644 --- a/src/mds/snap.cc +++ b/src/mds/snap.cc @@ -27,18 +27,20 @@ << ".cache.snaprealm(" << inode->ino() \ << " seq " << seq << " " << this << ") " -bool SnapRealm::open_parents(MDRequest *mdr, snapid_t first, snapid_t last) +bool SnapRealm::open_parents(Context *finish, snapid_t first, snapid_t last) { dout(10) << "open_parents [" << first << "," << last << "]" << dendl; - if (open) + if (open) { + delete finish; return true; + } // make sure my current parents' parents are open... if (parent) { dout(10) << " current parent [" << current_parent_since << ",head] is " << *parent << " on " << *parent->inode << dendl; if (last >= current_parent_since && - !parent->open_parents(mdr, MAX(first, current_parent_since), last)) + !parent->open_parents(finish, MAX(first, current_parent_since), last)) return false; } @@ -52,8 +54,7 @@ bool SnapRealm::open_parents(MDRequest *mdr, snapid_t first, snapid_t last) << p->second.ino << dendl; CInode *parent = mdcache->get_inode(p->second.ino); if (!parent) { - mdcache->open_remote_ino(p->second.ino, mdr, - new C_MDS_RetryRequest(mdcache, mdr)); + mdcache->open_remote_ino(p->second.ino, finish); return false; } assert(parent->snaprealm); // hmm! @@ -61,18 +62,22 @@ bool SnapRealm::open_parents(MDRequest *mdr, snapid_t first, snapid_t last) open_past_parents[p->second.ino] = parent->snaprealm; parent->get(CInode::PIN_PASTSNAPPARENT); } - if (!parent->snaprealm->open_parents(mdr, p->second.first, p->first)) + if (!parent->snaprealm->open_parents(finish, p->second.first, p->first)) return false; } } open = true; + delete finish; return true; } bool SnapRealm::have_past_parents_open(snapid_t first, snapid_t last) { dout(10) << "have_past_parents_open [" << first << "," << last << "]" << dendl; + if (open) + return true; + for (map::iterator p = past_parents.lower_bound(first); p != past_parents.end(); p++) { @@ -88,6 +93,8 @@ bool SnapRealm::have_past_parents_open(snapid_t first, snapid_t last) MIN(last, p->first))) return false; } + + open = true; return true; } diff --git a/src/mds/snap.h b/src/mds/snap.h index b8468609eed05..f2c3df0ebf65d 100644 --- a/src/mds/snap.h +++ b/src/mds/snap.h @@ -144,7 +144,7 @@ struct SnapRealm { return false; } - bool open_parents(MDRequest *mdr, snapid_t first=1, snapid_t last=CEPH_NOSNAP); + bool open_parents(Context *retryorfinish, snapid_t first=1, snapid_t last=CEPH_NOSNAP); bool have_past_parents_open(snapid_t first=1, snapid_t last=CEPH_NOSNAP); void close_parents(); -- 2.39.5