From: Sage Weil Date: Thu, 17 Jul 2008 18:04:35 +0000 (-0700) Subject: mds: fix up snap cap flushing X-Git-Tag: v0.4~445 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=6ec022bacd68176774d55a2bee912d16f15b3dde;p=ceph.git mds: fix up snap cap flushing --- diff --git a/src/TODO b/src/TODO index 5b5d0750288b..fa7147da35bb 100644 --- a/src/TODO +++ b/src/TODO @@ -79,6 +79,8 @@ mon - osdmon needs to lower-bound old osdmap versions it keeps around? mds +- fix shared write vs max_size + - proper handling of cache expire messages during rejoin phase? -> i think cache expires are fine; the rejoin_ack handler just has to behave if rejoining items go missing - try_remove_unlinked_dn thing diff --git a/src/client/Client.cc b/src/client/Client.cc index 84dd1beb4f0d..2684f985f484 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -1374,7 +1374,7 @@ void Client::put_cap_ref(Inode *in, int cap) } } -void Client::check_caps(Inode *in, bool force_dirty) +void Client::check_caps(Inode *in, bool flush_snap) { int wanted = in->caps_wanted(); int used = in->caps_used(); @@ -1415,7 +1415,7 @@ void Client::check_caps(Inode *in, bool force_dirty) goto ack; } - if (force_dirty && + if (flush_snap && (cap->issued & (CEPH_CAP_WR|CEPH_CAP_WRBUFFER))) goto ack; @@ -1431,7 +1431,13 @@ void Client::check_caps(Inode *in, bool force_dirty) */ ack: - MClientFileCaps *m = new MClientFileCaps(CEPH_CAP_OP_ACK, + int op = CEPH_CAP_OP_ACK; + if (flush_snap) + op = CEPH_CAP_OP_FLUSHSNAP; + else if (wanted == 0) + op = CEPH_CAP_OP_RELEASE; + dout(10) << " op = " << op << dendl; + MClientFileCaps *m = new MClientFileCaps(op, in->inode, 0, cap->seq, @@ -1443,11 +1449,11 @@ void Client::check_caps(Inode *in, bool force_dirty) in->requested_max_size = in->wanted_max_size; m->get_snaps() = in->snaprealm->snaps; messenger->send_message(m, mdsmap->get_inst(it->first)); - if (wanted == 0) + if (wanted == 0 && !flush_snap) mds_sessions[it->first].num_caps--; } - if (wanted == 0) { + if (wanted == 0 && !flush_snap) { remove_all_caps(in); } } @@ -1797,13 +1803,10 @@ void Client::handle_file_caps(MClientFileCaps *m) << " seq " << m->get_seq() << " " << cap_string(m->get_caps()) << ", which we don't want caps for, releasing." << dendl; - m->set_op(CEPH_CAP_OP_ACK); + m->set_op(CEPH_CAP_OP_RELEASE); m->set_caps(0); m->set_wanted(0); messenger->send_message(m, m->get_source_inst()); - - // FIXME... - return; } diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h index ac13a88a3f17..67ae033258ba 100644 --- a/src/include/ceph_fs.h +++ b/src/include/ceph_fs.h @@ -781,14 +781,30 @@ static inline int ceph_caps_for_mode(int mode) } enum { - CEPH_CAP_OP_GRANT, /* mds->client grant */ - CEPH_CAP_OP_ACK, /* client->mds ack (if prior grant was a recall) */ - CEPH_CAP_OP_REQUEST, /* client->mds request (update wanted bits) */ - CEPH_CAP_OP_TRUNC, /* mds->client trunc notify (invalidate size+mtime) */ - CEPH_CAP_OP_EXPORT, /* mds has exported the cap */ - CEPH_CAP_OP_IMPORT /* mds has imported the cap from specified mds */ + CEPH_CAP_OP_GRANT, /* mds->client grant */ + CEPH_CAP_OP_TRUNC, /* mds->client trunc notify (invalidate size+mtime) */ + CEPH_CAP_OP_EXPORT, /* mds has exported the cap */ + CEPH_CAP_OP_IMPORT, /* mds has imported the cap from specified mds */ + CEPH_CAP_OP_ACK, /* client->mds ack (if prior grant was a recall) */ + CEPH_CAP_OP_REQUEST, /* client->mds request (update wanted bits) */ + CEPH_CAP_OP_FLUSHSNAP, /* client->mds flush snapped metadata */ + CEPH_CAP_OP_RELEASE, /* client->mds released cap entirely */ }; +inline static const char* ceph_cap_op_name(int op) { + switch (op) { + case CEPH_CAP_OP_GRANT: return "grant"; + case CEPH_CAP_OP_TRUNC: return "trunc"; + case CEPH_CAP_OP_EXPORT: return "export"; + case CEPH_CAP_OP_IMPORT: return "import"; + case CEPH_CAP_OP_ACK: return "ack"; + case CEPH_CAP_OP_REQUEST: return "request"; + case CEPH_CAP_OP_FLUSHSNAP: return "flushsnap"; + case CEPH_CAP_OP_RELEASE: return "release"; + default: return 0; + } +} + struct ceph_mds_file_caps { __le32 op; __le32 seq; diff --git a/src/mds/CInode.h b/src/mds/CInode.h index faf1234ae869..6e9743045e18 100644 --- a/src/mds/CInode.h +++ b/src/mds/CInode.h @@ -479,6 +479,7 @@ public: put(PIN_CAPS); xlist_caps.remove_myself(); containing_realm = NULL; + xlist_open_file.remove_myself(); // unpin logsegment } } void move_to_containing_realm(SnapRealm *realm) { diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index a03dd2200661..5e9735e74d8f 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -936,8 +936,9 @@ void Locker::handle_client_file_caps(MClientFileCaps *m) snapid_t follows = 0; if (m->get_snaps().size()) follows = m->get_snaps()[0]; - - dout(7) << "handle_client_file_caps on " << m->get_ino() << " follows " << follows << dendl; + dout(7) << "handle_client_file_caps on " << m->get_ino() + << " follows " << follows + << " op " << m->get_op() << dendl; CInode *head_in = mdcache->get_inode(m->get_ino()); if (!head_in) { @@ -961,53 +962,79 @@ void Locker::handle_client_file_caps(MClientFileCaps *m) dout(7) << "handle_client_file_caps no cap for client" << client << " on " << *in << dendl; delete m; return; - } - + } assert(cap); - // filter wanted based on what we could ever give out (given auth/replica status) - int wanted = m->get_wanted() & in->filelock.caps_allowed_ever(); - - dout(7) << "handle_client_file_caps seq " << m->get_seq() - << " confirms caps " << cap_string(m->get_caps()) - << " wants " << cap_string(wanted) - << " from client" << client - << " on " << *in - << dendl; - - // confirm caps - int had = cap->confirm_receipt(m->get_seq(), m->get_caps()); - int has = cap->confirmed(); - dout(10) << "client had " << cap_string(had) << ", has " << cap_string(has) << dendl; - - cap->client_follows = follows; - - // update wanted - if (cap->wanted() != wanted) { - if (m->get_seq() < cap->get_last_open()) { - /* this is awkward. - client may be trying to release caps (i.e. inode closed, etc.) by setting reducing wanted - set. - but it may also be opening the same filename, not sure that it'll map to the same inode. - so, we don't want wanted reductions to clobber mds's notion of wanted unless we're - sure the client has seen all the latest caps. - */ - dout(10) << "handle_client_file_caps ignoring wanted " << cap_string(m->get_wanted()) - << " bc seq " << m->get_seq() << " < last open " << cap->get_last_open() << dendl; - } else if (wanted == 0) { - // outright release? - dout(7) << " cap for client" << client << " is now null, removing from " << *in << dendl; - in->remove_client_cap(client); - if (!in->is_any_caps()) - in->xlist_open_file.remove_myself(); // unpin logsegment - if (!in->is_auth()) - request_inode_file_caps(in); + // flushsnap? + if (m->get_op() == CEPH_CAP_OP_FLUSHSNAP) { + dout(7) << " flushsnap follows " << follows + << " client" << client << " on " << *in << dendl; + int had = cap->confirm_receipt(m->get_seq(), m->get_caps()); + int has = cap->confirmed(); + if (in->last != 0 && in->last < CEPH_NOSNAP) { + dout(10) << " flushsnap releasing cloned cap" << dendl; + in->remove_client_cap(client); } else { - cap->set_wanted(wanted); + dout(10) << " flushsnap NOT releasing live cap" << dendl; + } + _do_cap_update(in, has|had, 0, follows, m); + } else { + + // for this and all subsequent versions of this inode, + while (1) { + // filter wanted based on what we could ever give out (given auth/replica status) + int wanted = m->get_wanted() & head_in->filelock.caps_allowed_ever(); + int had = cap->confirm_receipt(m->get_seq(), m->get_caps()); + int has = cap->confirmed(); + cap->client_follows = follows; + dout(10) << " follows " << follows + << ", had " << cap_string(had) + << ", has " << cap_string(has) + << " on " << *in << dendl; + + _do_cap_update(in, had, wanted, follows, m); + + if (m->get_seq() < cap->get_last_open()) { + /* client may be trying to release caps (i.e. inode closed, etc.) + * by setting reducing wanted set. but it may also be opening the + * same filename, not sure that it'll map to the same inode. so, + * we don't want RELEASE or wanted updates to clobber mds's notion + * of wanted unless we're sure the client has seen all the latest + * caps. + */ + dout(10) << " ignoring release|wanted " << cap_string(m->get_wanted()) + << " bc seq " << m->get_seq() << " < last open " << cap->get_last_open() << dendl; + } else if (m->get_op() == CEPH_CAP_OP_RELEASE) { + dout(7) << " release client" << client << " on " << *in << dendl; + in->remove_client_cap(client); + if (!in->is_auth()) + request_inode_file_caps(in); + } else if (wanted != cap->wanted()) { + dout(10) << " wanted " << cap_string(cap->wanted()) + << " -> " << cap_string(wanted) << dendl; + cap->set_wanted(wanted); + } + + // done? + if (in->last == CEPH_NOSNAP || in->last == 0) + break; + + // next! + in = mdcache->pick_inode_snap(in, in->last); + cap = in->get_client_cap(client); + assert(cap); } } + delete m; +} + + +void Locker::_do_cap_update(CInode *in, int had, int wanted, snapid_t follows, MClientFileCaps *m) +{ + dout(10) << "_do_cap_update had " << cap_string(had) << " on " << *in << dendl; + inode_t *latest = in->get_projected_inode(); utime_t atime = m->get_atime(); @@ -1016,8 +1043,8 @@ void Locker::handle_client_file_caps(MClientFileCaps *m) uint64_t size = m->get_size(); // atime|mtime|size? - bool had_or_has_wr = (had|has) & CEPH_CAP_WR; - bool excl = (had|has) & CEPH_CAP_EXCL; + bool had_or_has_wr = had & CEPH_CAP_WR; + bool excl = had & CEPH_CAP_EXCL; bool dirty_atime = false; bool dirty_mtime = false; bool dirty_ctime = false; @@ -1118,8 +1145,6 @@ void Locker::handle_client_file_caps(MClientFileCaps *m) file_eval_gather(&in->filelock); else if (in->is_auth()) file_eval(&in->filelock); - - delete m; } diff --git a/src/mds/Locker.h b/src/mds/Locker.h index 9c0be82858f8..602be38a6264 100644 --- a/src/mds/Locker.h +++ b/src/mds/Locker.h @@ -212,6 +212,8 @@ protected: protected: void handle_client_file_caps(class MClientFileCaps *m); + void _do_cap_update(CInode *in, int had, int wanted, snapid_t follows, MClientFileCaps *m); + void request_inode_file_caps(CInode *in); void handle_inode_file_caps(class MInodeFileCaps *m); diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index cbd1d1e0dc2a..ec4804ec1d03 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -978,21 +978,25 @@ CInode *MDCache::cow_inode(CInode *in, snapid_t last) dout(10) << " oldin " << *oldin << dendl; add_inode(oldin); - for(map::iterator p = in->client_caps.begin(); + // clone caps? + for (map::iterator p = in->client_caps.begin(); p != in->client_caps.end(); p++) { Capability *cap = p->second; - if (cap->client_follows <= last) { - // move to oldin + if ((cap->issued() & (CEPH_CAP_WR|CEPH_CAP_WRBUFFER)) && + cap->client_follows <= last) { + // clone to oldin int client = p->first; Capability *newcap = oldin->add_client_cap(client, in->containing_realm); newcap->issue(cap->issued()); newcap->client_follows = cap->client_follows; - dout(10) << " cloning client" << client << " cap " << cap + dout(10) << " cloning client" << client << " wr cap " << cap << " to " << newcap << " on cloned inode" << dendl; cap->client_follows = last; } } + if (oldin->is_any_caps()) + oldin->filelock.set_state(LOCK_LOCK); return oldin; } diff --git a/src/messages/MClientFileCaps.h b/src/messages/MClientFileCaps.h index 02022beb54fa..314db323a725 100644 --- a/src/messages/MClientFileCaps.h +++ b/src/messages/MClientFileCaps.h @@ -19,19 +19,6 @@ class MClientFileCaps : public Message { - public: - static const char* get_opname(int op) { - switch (op) { - case CEPH_CAP_OP_GRANT: return "grant"; - case CEPH_CAP_OP_ACK: return "ack"; - case CEPH_CAP_OP_REQUEST: return "request"; - case CEPH_CAP_OP_TRUNC: return "trunc"; - case CEPH_CAP_OP_EXPORT: return "export"; - case CEPH_CAP_OP_IMPORT: return "import"; - default: assert(0); return 0; - } - } - private: struct ceph_mds_file_caps h; vector snaps; @@ -107,7 +94,7 @@ class MClientFileCaps : public Message { const char *get_type_name() { return "Cfcap";} void print(ostream& out) { - out << "client_file_caps(" << get_opname(h.op) + out << "client_file_caps(" << ceph_cap_op_name(h.op) << " ino " << inodeno_t(h.ino) << " seq " << h.seq << " caps " << cap_string(h.caps)