From: Sage Weil Date: Thu, 31 Jul 2008 22:44:47 +0000 (-0700) Subject: mds: first pass at snap-aware client reconnect X-Git-Tag: v0.4~351 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=7d4e45f514f383a6acf9d63b0c89930695c4b3df;p=ceph.git mds: first pass at snap-aware client reconnect --- diff --git a/src/client/Client.cc b/src/client/Client.cc index 6e4d9f9127f2..c7b8ac804490 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -1213,15 +1213,15 @@ void Client::send_reconnect(int mds) << " " << cap_string(in->caps[mds]->issued) << " wants " << cap_string(in->caps_wanted()) << dendl; - in->caps[mds]->seq = 0; // reset seq. - m->add_inode_caps(p->first.ino, // ino - in->caps_wanted(), // wanted - in->caps[mds]->issued, // issued - in->inode.size, in->inode.mtime, in->inode.atime); filepath path; in->make_path(path); dout(10) << " path on " << p->first << " is " << path << dendl; - m->add_inode_path(p->first.ino, path.get_path()); + + in->caps[mds]->seq = 0; // reset seq. + m->add_cap(p->first, path.get_path(), // ino + in->caps_wanted(), // wanted + in->caps[mds]->issued, // issued + in->inode.size, in->inode.mtime, in->inode.atime); } if (in->exporting_mds == mds) { dout(10) << " clearing exporting_caps on " << p->first << dendl; @@ -1441,7 +1441,7 @@ void Client::check_caps(Inode *in, bool flush_snap) op = CEPH_CAP_OP_RELEASE; dout(10) << " op = " << op << dendl; MClientFileCaps *m = new MClientFileCaps(op, - in->inode, + in->inode, in->snapid, 0, cap->seq, cap->issued, diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h index 27b5032ed52d..b779835bb292 100644 --- a/src/include/ceph_fs.h +++ b/src/include/ceph_fs.h @@ -807,9 +807,10 @@ inline static const char* ceph_cap_op_name(int op) { struct ceph_mds_file_caps { __le32 op; + __le64 ino; + __le64 snapid; __le32 seq; __le32 caps, wanted; - __le64 ino; __le64 size, max_size; __le32 migrate_seq; struct ceph_timespec mtime, atime, ctime; diff --git a/src/include/types.h b/src/include/types.h index 70b8542559ad..59a7aee966d8 100644 --- a/src/include/types.h +++ b/src/include/types.h @@ -117,6 +117,7 @@ WRITE_RAW_ENCODER(ceph_mds_file_caps) WRITE_RAW_ENCODER(ceph_mds_lease) WRITE_RAW_ENCODER(ceph_mds_reply_head) WRITE_RAW_ENCODER(ceph_mds_reply_inode) +WRITE_RAW_ENCODER(ceph_mds_cap_reconnect) WRITE_RAW_ENCODER(ceph_frag_tree_split) WRITE_RAW_ENCODER(ceph_inopath_item) diff --git a/src/include/utime.h b/src/include/utime.h index 18243328bedb..ebc34d53da94 100644 --- a/src/include/utime.h +++ b/src/include/utime.h @@ -101,6 +101,12 @@ public: operator double() { return (double)sec() + ((double)usec() / 1000000.0L); } + operator ceph_timespec() { + ceph_timespec ts; + ts.tv_sec = sec(); + ts.tv_nsec = nsec(); + return ts; + } }; WRITE_CLASS_ENCODER(utime_t) diff --git a/src/mds/CInode.h b/src/mds/CInode.h index a396b1081aef..89b51bc58caa 100644 --- a/src/mds/CInode.h +++ b/src/mds/CInode.h @@ -545,7 +545,7 @@ public: containing_realm = realm; } - Capability *reconnect_cap(int client, inode_caps_reconnect_t& icr) { + Capability *reconnect_cap(int client, ceph_mds_cap_reconnect& icr) { Capability *cap = get_client_cap(client); if (cap) { cap->merge(icr.wanted, icr.issued); @@ -555,8 +555,8 @@ public: cap->issue(icr.issued); } inode.size = MAX(inode.size, icr.size); - inode.mtime = MAX(inode.mtime, icr.mtime); - inode.atime = MAX(inode.atime, icr.atime); + inode.mtime = MAX(inode.mtime, utime_t(icr.mtime)); + inode.atime = MAX(inode.atime, utime_t(icr.atime)); return cap; } void clear_client_caps() { diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index 15e0b9546661..2c50eb5bc42c 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -619,7 +619,7 @@ bool Locker::issue_caps(CInode *in) << " new pending " << cap_string(cap->pending()) << " was " << cap_string(before) << dendl; mds->send_message_client(new MClientFileCaps(CEPH_CAP_OP_GRANT, - in->inode, + in->inode, in->last, in->find_snaprealm()->inode->ino(), cap->get_last_seq(), cap->pending(), @@ -642,7 +642,7 @@ void Locker::issue_truncate(CInode *in) it++) { Capability *cap = it->second; mds->send_message_client(new MClientFileCaps(CEPH_CAP_OP_TRUNC, - in->inode, + in->inode, in->last, in->find_snaprealm()->inode->ino(), cap->get_last_seq(), cap->pending(), @@ -902,7 +902,7 @@ void Locker::share_inode_max_size(CInode *in) if (cap->pending() & CEPH_CAP_WR) { dout(10) << "share_inode_max_size with client" << client << dendl; mds->send_message_client(new MClientFileCaps(CEPH_CAP_OP_GRANT, - in->inode, + in->inode, in->last, in->find_snaprealm()->inode->ino(), cap->get_last_seq(), cap->pending(), diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index e47645d4f9bc..95e728caf333 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -2717,12 +2717,12 @@ void MDCache::handle_cache_rejoin_weak(MMDSCacheRejoin *weak) ack = new MMDSCacheRejoin(MMDSCacheRejoin::OP_ACK); // check cap exports - for (map >::iterator p = weak->cap_exports.begin(); + for (map >::iterator p = weak->cap_exports.begin(); p != weak->cap_exports.end(); ++p) { CInode *in = get_inode(p->first); if (!in || !in->is_auth()) continue; - for (map::iterator q = p->second.begin(); + for (map::iterator q = p->second.begin(); q != p->second.end(); ++q) { dout(10) << " claiming cap import " << p->first << " client" << q->first << " on " << *in << dendl; @@ -2733,20 +2733,21 @@ void MDCache::handle_cache_rejoin_weak(MMDSCacheRejoin *weak) assert(mds->is_rejoin()); // check cap exports. - for (map >::iterator p = weak->cap_exports.begin(); + for (map >::iterator p = weak->cap_exports.begin(); p != weak->cap_exports.end(); ++p) { CInode *in = get_inode(p->first); if (in && !in->is_auth()) continue; + string& path = weak->cap_export_paths[p->first]; if (!in) { - if (!path_is_mine(weak->cap_export_paths[p->first])) + if (!path_is_mine(path)) continue; - cap_import_paths[p->first] = weak->cap_export_paths[p->first]; - dout(10) << " noting cap import " << p->first << " path " << weak->cap_export_paths[p->first] << dendl; + cap_import_paths[p->first] = path; + dout(10) << " noting cap import " << p->first << " path " << path << dendl; } // note - for (map::iterator q = p->second.begin(); + for (map::iterator q = p->second.begin(); q != p->second.end(); ++q) { dout(10) << " claiming cap import " << p->first << " client" << q->first << dendl; @@ -2868,13 +2869,13 @@ void MDCache::handle_cache_rejoin_weak(MMDSCacheRejoin *weak) * returns a C_Gather* is there is work to do. caller is responsible for setting * the C_Gather completer. */ -C_Gather *MDCache::parallel_fetch(map& pathmap) +C_Gather *MDCache::parallel_fetch(map& pathmap) { dout(10) << "parallel_fetch on " << pathmap.size() << " paths" << dendl; // scan list set fetch_queue; - map::iterator p = pathmap.begin(); + map::iterator p = pathmap.begin(); while (p != pathmap.end()) { CInode *in = get_inode(p->first); if (in) { @@ -3433,16 +3434,16 @@ void MDCache::rejoin_gather_finish() // process cap imports // ino -> client -> frommds -> capex - for (map > >::iterator p = cap_imports.begin(); + for (map > >::iterator p = cap_imports.begin(); p != cap_imports.end(); ++p) { CInode *in = get_inode(p->first); assert(in); mds->server->add_reconnected_cap_inode(in); - for (map >::iterator q = p->second.begin(); + for (map >::iterator q = p->second.begin(); q != p->second.end(); ++q) - for (map::iterator r = q->second.begin(); + for (map::iterator r = q->second.begin(); r != q->second.end(); ++r) if (r->first >= 0) @@ -3460,7 +3461,7 @@ void MDCache::rejoin_gather_finish() mds->rejoin_done(); } -void MDCache::rejoin_import_cap(CInode *in, int client, inode_caps_reconnect_t& icr, int frommds) +void MDCache::rejoin_import_cap(CInode *in, int client, ceph_mds_cap_reconnect& icr, int frommds) { dout(10) << "rejoin_import_cap for client" << client << " from mds" << frommds << " on " << *in << dendl; @@ -3475,7 +3476,7 @@ void MDCache::rejoin_import_cap(CInode *in, int client, inode_caps_reconnect_t& // send IMPORT SnapRealm *realm = in->find_snaprealm(); MClientFileCaps *reap = new MClientFileCaps(CEPH_CAP_OP_IMPORT, - in->inode, + in->inode, in->last, realm->inode->ino(), cap->get_last_seq(), cap->pending(), diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index 1b1f25f33d3b..fe8c5a884274 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -617,11 +617,11 @@ protected: set rejoin_sent; // nodes i sent a rejoin to set rejoin_ack_gather; // nodes from whom i need a rejoin ack - map > cap_exports; // ino -> client -> capex - map cap_export_paths; + map > cap_exports; // ino -> client -> capex + map cap_export_paths; - map > > cap_imports; // ino -> client -> frommds -> capex - map cap_import_paths; + map > > cap_imports; // ino -> client -> frommds -> capex + map cap_import_paths; set rejoin_undef_inodes; set rejoin_potential_updated_scatterlocks; @@ -641,16 +641,16 @@ protected: public: void rejoin_gather_finish(); void rejoin_send_rejoins(); - void rejoin_export_caps(inodeno_t ino, string& path, int client, inode_caps_reconnect_t& icr) { - cap_exports[ino][client] = icr; - cap_export_paths[ino] = path; + void rejoin_export_caps(vinodeno_t vino, int client, cap_reconnect_t& icr) { + cap_exports[vino][client] = icr.capinfo; + cap_export_paths[vino] = icr.path; } - void rejoin_recovered_caps(inodeno_t ino, string& path, int client, inode_caps_reconnect_t& icr, + void rejoin_recovered_caps(vinodeno_t vino, int client, cap_reconnect_t& icr, int frommds=-1) { - cap_imports[ino][client][frommds] = icr; - cap_import_paths[ino] = path; + cap_imports[vino][client][frommds] = icr.capinfo; + cap_import_paths[vino] = icr.path; } - void rejoin_import_cap(CInode *in, int client, inode_caps_reconnect_t& icr, int frommds); + void rejoin_import_cap(CInode *in, int client, ceph_mds_cap_reconnect& icr, int frommds); friend class Locker; @@ -822,7 +822,7 @@ public: vector& anchortrace, Context *onfinish); - C_Gather *parallel_fetch(map& pathmap); + C_Gather *parallel_fetch(map& pathmap); void make_trace(vector& trace, CInode *in); diff --git a/src/mds/Migrator.cc b/src/mds/Migrator.cc index 417cce9b5ff7..f1fc8792dc1d 100644 --- a/src/mds/Migrator.cc +++ b/src/mds/Migrator.cc @@ -896,7 +896,7 @@ void Migrator::finish_export_inode_caps(CInode *in) dout(7) << "finish_export_inode telling client" << it->first << " exported caps on " << *in << dendl; MClientFileCaps *m = new MClientFileCaps(CEPH_CAP_OP_EXPORT, - in->inode, + in->inode, in->last, in->find_snaprealm()->inode->ino(), cap->get_last_seq(), cap->pending(), @@ -2045,7 +2045,7 @@ void Migrator::finish_import_inode_caps(CInode *in, int from, SnapRealm *realm = in->find_snaprealm(); MClientFileCaps *caps = new MClientFileCaps(CEPH_CAP_OP_IMPORT, - in->inode, + in->inode, in->last, realm->inode->ino(), cap->get_last_seq(), cap->pending(), diff --git a/src/mds/Server.cc b/src/mds/Server.cc index e6a9d11b23c4..13e2e6f53507 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -376,46 +376,46 @@ void Server::handle_client_reconnect(MClientReconnect *m) } else { // caps - for (map::iterator p = m->inode_caps.begin(); - p != m->inode_caps.end(); + for (map::iterator p = m->caps.begin(); + p != m->caps.end(); ++p) { CInode *in = mdcache->get_inode(p->first); if (in && in->is_auth()) { // we recovered it, and it's ours. take note. dout(15) << "open caps on " << *in << dendl; - Capability *cap = in->reconnect_cap(from, p->second); + Capability *cap = in->reconnect_cap(from, p->second.capinfo); session->touch_cap(cap); reconnected_caps.insert(in); continue; } - filepath path = m->inode_path[p->first]; + filepath path = p->second.path; if ((in && !in->is_auth()) || !mds->mdcache->path_is_mine(path)) { // not mine. - dout(0) << "non-auth " << p->first << " " << m->inode_path[p->first] + dout(0) << "non-auth " << p->first << " " << path << ", will pass off to authority" << dendl; // mark client caps stale. inode_t fake_inode; memset(&fake_inode, 0, sizeof(fake_inode)); - fake_inode.ino = p->first; + fake_inode.ino = p->first.ino; MClientFileCaps *stale = new MClientFileCaps(CEPH_CAP_OP_EXPORT, - fake_inode, + fake_inode, p->first.snapid, 0, 0, 0, // doesn't matter. - p->second.wanted, // doesn't matter. + p->second.capinfo.wanted, // doesn't matter. 0); // FIXME get proper mseq here? hmm. mds->send_message_client(stale, m->get_source_inst()); // add to cap export list. - mdcache->rejoin_export_caps(p->first, m->inode_path[p->first], from, p->second); + mdcache->rejoin_export_caps(p->first, from, p->second); } else { // mine. fetch later. - dout(0) << "missing " << p->first << " " << m->inode_path[p->first] + dout(0) << "missing " << p->first << " " << path << " (mine), will load later" << dendl; - mdcache->rejoin_recovered_caps(p->first, m->inode_path[p->first], from, p->second, + mdcache->rejoin_recovered_caps(p->first, from, p->second, -1); // "from" me. } } diff --git a/src/mds/mdstypes.h b/src/mds/mdstypes.h index f1091d3b6631..24708e1667e3 100644 --- a/src/mds/mdstypes.h +++ b/src/mds/mdstypes.h @@ -535,36 +535,31 @@ namespace __gnu_cxx { } -// inode caps info for client reconnect -struct inode_caps_reconnect_t { - int32_t wanted; - int32_t issued; - uint64_t size; - utime_t mtime, atime; - - inode_caps_reconnect_t() {} - inode_caps_reconnect_t(int w, int i) : - wanted(w), issued(i), size(0) {} - inode_caps_reconnect_t(int w, int i, uint64_t sz, utime_t mt, utime_t at) : - wanted(w), issued(i), size(sz), mtime(mt), atime(at) {} -}; +// cap info for client reconnect +struct cap_reconnect_t { + string path; + ceph_mds_cap_reconnect capinfo; + + cap_reconnect_t() {} + cap_reconnect_t(const string& p, int w, int i, uint64_t sz, utime_t mt, utime_t at) : + path(p) { + capinfo.wanted = w; + capinfo.issued = i; + capinfo.size = sz; + capinfo.mtime = mt; + capinfo.atime = at; + } -static inline void encode(const inode_caps_reconnect_t &ic, bufferlist &bl) -{ - ::encode(ic.wanted, bl); - ::encode(ic.issued, bl); - ::encode(ic.size, bl); - ::encode(ic.mtime, bl); - ::encode(ic.atime, bl); -} -static inline void decode(inode_caps_reconnect_t &ic, bufferlist::iterator &p) -{ - ::decode(ic.wanted, p); - ::decode(ic.issued, p); - ::decode(ic.size, p); - ::decode(ic.mtime, p); - ::decode(ic.atime, p); -} + void encode(bufferlist& bl) const { + ::encode(path, bl); + ::encode(capinfo, bl); + } + void decode(bufferlist::iterator& bl) { + ::decode(path, bl); + ::decode(capinfo, bl); + } +}; +WRITE_CLASS_ENCODER(cap_reconnect_t) // ================================================================ diff --git a/src/messages/MClientFileCaps.h b/src/messages/MClientFileCaps.h index ff7c5f298cfa..25440c6cf28f 100644 --- a/src/messages/MClientFileCaps.h +++ b/src/messages/MClientFileCaps.h @@ -30,6 +30,8 @@ class MClientFileCaps : public Message { capseq_t get_mseq() { return h.migrate_seq; } inodeno_t get_ino() { return inodeno_t(h.ino); } + snapid_t get_snapid() { return snapid_t(h.snapid); } + __u64 get_size() { return h.size; } __u64 get_max_size() { return h.max_size; } utime_t get_ctime() { return utime_t(h.ctime); } @@ -61,6 +63,7 @@ class MClientFileCaps : public Message { MClientFileCaps() {} MClientFileCaps(int op, inode_t& inode, + snapid_t snapid, inodeno_t realm, long seq, int caps, @@ -68,10 +71,11 @@ class MClientFileCaps : public Message { int mseq) : Message(CEPH_MSG_CLIENT_FILECAPS) { h.op = op; + h.ino = inode.ino; + h.snapid = snapid; h.seq = seq; h.caps = caps; h.wanted = wanted; - h.ino = inode.ino; h.size = inode.size; h.max_size = inode.max_size; h.migrate_seq = mseq; diff --git a/src/messages/MClientReconnect.h b/src/messages/MClientReconnect.h index 26b08fce21ab..165dec0007b1 100644 --- a/src/messages/MClientReconnect.h +++ b/src/messages/MClientReconnect.h @@ -20,8 +20,7 @@ class MClientReconnect : public Message { public: - map inode_caps; - map inode_path; + map caps; __u8 closed; // true if this session was closed by the client. MClientReconnect() : Message(CEPH_MSG_CLIENT_RECONNECT), @@ -29,41 +28,25 @@ public: const char *get_type_name() { return "client_reconnect"; } void print(ostream& out) { - out << "client_reconnect(" << inode_caps.size() << " caps)"; + out << "client_reconnect(" + << (closed ? "closed":"") + << caps.size() << " caps)"; } - void add_inode_caps(inodeno_t ino, - int wanted, int issued, - loff_t sz, utime_t mt, utime_t at) { - inode_caps[ino] = inode_caps_reconnect_t(wanted, issued, sz, mt, at); - } - void add_inode_path(inodeno_t ino, const string& path) { - inode_path[ino] = path; + void add_cap(vinodeno_t ino, const string& path, + int wanted, int issued, + loff_t sz, utime_t mt, utime_t at) { + caps[ino] = cap_reconnect_t(path, wanted, issued, sz, mt, at); } void encode_payload() { - __u32 n = inode_caps.size(); ::encode(closed, payload); - ::encode(n, payload); - for (map::iterator p = inode_caps.begin(); - p != inode_caps.end(); - p++) { - ::encode(p->first, payload); - ::encode(p->second, payload); - ::encode(inode_path[p->first], payload); - } + ::encode(caps, payload); } void decode_payload() { bufferlist::iterator p = payload.begin(); ::decode(closed, p); - __u32 n; - ::decode(n, p); - while (n--) { - inodeno_t ino; - ::decode(ino, p); - ::decode(inode_caps[ino], p); - ::decode(inode_path[ino], p); - } + ::decode(caps, p); } }; diff --git a/src/messages/MMDSCacheRejoin.h b/src/messages/MMDSCacheRejoin.h index f84bd665abdb..e937bc13c320 100644 --- a/src/messages/MMDSCacheRejoin.h +++ b/src/messages/MMDSCacheRejoin.h @@ -142,8 +142,8 @@ class MMDSCacheRejoin : public Message { // open bufferlist cap_export_bl; - map > cap_exports; - map cap_export_paths; + map > cap_exports; + map cap_export_paths; // full bufferlist inode_base;