From 1a8f45445e3a0af97f395eee42143b2f911d2f73 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 23 Jul 2008 15:52:28 -0700 Subject: [PATCH] allow leases on snapped metadata --- src/TODO | 1 - src/client/Client.cc | 4 +- src/include/ceph_fs.h | 1 + src/mds/Locker.cc | 8 ++-- src/mds/MDCache.cc | 77 ++++++++++++++++++++++++++----------- src/messages/MClientLease.h | 12 +++++- 6 files changed, 71 insertions(+), 32 deletions(-) diff --git a/src/TODO b/src/TODO index 9df7010e36e37..acf83955aab34 100644 --- a/src/TODO +++ b/src/TODO @@ -249,7 +249,6 @@ todo - for simplicity, don't replicate any snapshot data. -- need rsnaprealms in fraginfo_t - rename() needs to create a new realm if src/dst realms differ and (rrealms, or open_children, or not subtree leaf) (similar logic to the anchor update) - will snapshots and CAS play nice? diff --git a/src/client/Client.cc b/src/client/Client.cc index fe86aa642a069..a2211c81da532 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -1294,7 +1294,7 @@ void Client::handle_lease(MClientLease *m) revoke: messenger->send_message(new MClientLease(CEPH_MDS_LEASE_RELEASE, - m->get_mask(), m->get_ino(), m->dname), + m->get_mask(), m->get_ino(), m->get_first(), m->get_last(), m->dname), m->get_source_inst()); delete m; } @@ -1323,7 +1323,7 @@ void Client::release_lease(Inode *in, Dentry *dn, int mask) if (mds >= 0 && mdsmap->is_up(mds)) { dout(10) << "release_lease mds" << mds << " mask " << mask << " on " << in->ino() << " " << dname << dendl; - messenger->send_message(new MClientLease(CEPH_MDS_LEASE_RELEASE, mask, in->ino(), dname), + messenger->send_message(new MClientLease(CEPH_MDS_LEASE_RELEASE, mask, in->ino(), in->snapid, in->snapid, dname), mdsmap->get_inst(mds)); } } diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h index f89645f73570a..3249fe4aaa753 100644 --- a/src/include/ceph_fs.h +++ b/src/include/ceph_fs.h @@ -826,6 +826,7 @@ struct ceph_mds_lease { __u8 action; __le16 mask; __le64 ino; + __le64 first, last; } __attribute__ ((packed)); /* followed by a __le32+string for dname */ diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index 34a17fd76aa3b..67903d58b4aab 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -1163,9 +1163,9 @@ void Locker::handle_client_lease(MClientLease *m) assert(m->get_source().is_client()); int client = m->get_source().num(); - CInode *in = mdcache->get_inode(m->get_ino()); + CInode *in = mdcache->get_inode(m->get_ino(), m->get_last()); if (!in) { - dout(7) << "handle_client_lease don't have ino " << m->get_ino() << dendl; + dout(7) << "handle_client_lease don't have ino " << m->get_ino() << "." << m->get_last() << dendl; delete m; return; } @@ -1298,13 +1298,13 @@ void Locker::revoke_client_leases(SimpleLock *lock) mds->send_message_client(new MClientLease(CEPH_MDS_LEASE_REVOKE, mask, dn->get_dir()->ino(), + dn->get_dir()->get_inode()->first, CEPH_NOSNAP, dn->get_name()), l->client); } else { CInode *in = (CInode*)lock->get_parent(); mds->send_message_client(new MClientLease(CEPH_MDS_LEASE_REVOKE, - lock->get_type(), - in->ino()), + lock->get_type(), in->ino(), in->first, in->last), l->client); } } diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 1871cb5151e4f..d5f91f804e4d8 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -1008,6 +1008,21 @@ CInode *MDCache::cow_inode(CInode *in, snapid_t last) else oldin->inode.max_size = 0; + // clone leases? + if (in->last != CEPH_NOSNAP) { + // only if we are not the head, and our lease may cover an + // instance in either the old or new inodes valid interval. + for (hash_map::iterator p = in->client_lease_map.begin(); + p != in->client_lease_map.end(); + p++) { + dout(10) << " cloning client" << p->first << " lease on " << p->second->mask << " to cloned inode" << dendl; + ClientLease *l = oldin->add_client_lease(p->first, p->second->mask); + l->ttl = p->second->ttl; + p->second->session_lease_item.get_xlist()->push_back(&l->session_lease_item); + p->second->lease_item.get_xlist()->push_back(&l->lease_item); + } + } + return oldin; } @@ -1295,6 +1310,7 @@ void MDCache::predirty_journal_parents(Mutation *mut, EMetaBlob *blob, // build list of inodes to wrlock, dirty, and update list lsi; CInode *cur = in; + CDentry *parentdn = cur->get_projected_parent_dn(); while (parent) { //assert(cur->is_auth() || !primary_dn); // this breaks the rename auth twiddle hack assert(parent->is_auth()); @@ -1337,26 +1353,38 @@ void MDCache::predirty_journal_parents(Mutation *mut, EMetaBlob *blob, // rstat - SnapRealm *prealm = parent->inode->find_snaprealm(); - snapid_t latest = prealm->get_latest_snap(); - - snapid_t follows = cfollows; - if (follows == CEPH_NOSNAP || follows == 0) - follows = latest; - - snapid_t first = follows+1; - if (cur->first > first) - first = cur->first; if (primary_dn) { + SnapRealm *prealm = parent->inode->find_snaprealm(); + snapid_t latest = prealm->get_latest_snap(); + + snapid_t follows = cfollows; + if (follows == CEPH_NOSNAP || follows == 0) + follows = latest; + + snapid_t first = follows+1; + if (cur->first > first) + first = cur->first; + dout(10) << " frag head is [" << parent->first << ",head] " << dendl; dout(10) << " inode update is [" << first << "," << cur->last << "]" << dendl; - project_rstat_inode_to_frag(*curi, first, cur->last, parent, linkunlink); + + /* + * FIXME. this incompletely propagates rstats to _old_ parents + * (i.e. shortly after a directory rename). but we need full + * blow hard link backpointers to make this work properly... + */ + snapid_t floor = parentdn->first; + dout(10) << " floor of " << floor << " from parent dn " << *parentdn << dendl; + + if (cur->last >= floor) + project_rstat_inode_to_frag(*curi, MAX(first, floor), cur->last, parent, linkunlink); for (set::iterator p = cur->dirty_old_rstats.begin(); p != cur->dirty_old_rstats.end(); p++) { old_inode_t& old = cur->old_inodes[*p]; - project_rstat_inode_to_frag(old.inode, old.first, *p, parent); + if (*p >= floor) + project_rstat_inode_to_frag(old.inode, MAX(old.first, floor), *p, parent); } cur->dirty_old_rstats.clear(); } @@ -1398,7 +1426,7 @@ void MDCache::predirty_journal_parents(Mutation *mut, EMetaBlob *blob, mut->add_projected_inode(pin); lsi.push_front(pin); - pin->pre_cow_old_inode(); // avoid cow mayhem + pin->pre_cow_old_inode(); // avoid cow mayhem! inode_t *pi = pin->project_inode(); pi->version = pin->pre_dirty(); @@ -1415,24 +1443,32 @@ void MDCache::predirty_journal_parents(Mutation *mut, EMetaBlob *blob, dout(15) << "predirty_journal_parents gives " << pi->dirstat << " on " << *pin << dendl; } - // parent dn [first,last]? - CDentry *parentdn = cur->get_projected_parent_dn(); - dout(10) << "predirty_journal_parents parentdn is " << *parentdn << dendl; + /* + * the rule here is to follow the _oldest_ parent with dirty rstat + * data. if we don't propagate all data, we add ourselves to the + * nudge list. that way all rstat data will (eventually) get + * pushed up the tree. + * + * actually, no. for now, silently drop rstats for old parents. we need + * hard link backpointers to do the above properly. + */ + parentdn = pin->get_projected_parent_dn(); // rstat if (primary_dn) { - project_rstat_frag_to_inode(*pf, first, CEPH_NOSNAP, pin, false); for (map::iterator p = parent->dirty_old_fnodes.begin(); p != parent->dirty_old_fnodes.end(); p++) project_rstat_frag_to_inode(p->second.fnode, p->second.first, p->first, pin, false); parent->dirty_old_fnodes.clear(); + + project_rstat_frag_to_inode(*pf, pin->first, CEPH_NOSNAP, pin, false); } // next parent! cur = pin; curi = pi; - parent = cur->get_projected_parent_dn()->get_dir(); + parent = parentdn->get_dir(); linkunlink = 0; do_parent_mtime = false; primary_dn = true; @@ -1442,15 +1478,10 @@ void MDCache::predirty_journal_parents(Mutation *mut, EMetaBlob *blob, assert(parent->is_auth()); blob->add_dir_context(parent); blob->add_dir(parent, true); - SnapRealm *realm = 0; for (list::iterator p = lsi.begin(); p != lsi.end(); p++) { CInode *cur = *p; - if (!realm) - realm = cur->find_snaprealm(); - else if (cur->snaprealm) - realm = cur->snaprealm; journal_dirty_inode(blob, cur); } diff --git a/src/messages/MClientLease.h b/src/messages/MClientLease.h index 0e12803dd7512..2beef88efb099 100644 --- a/src/messages/MClientLease.h +++ b/src/messages/MClientLease.h @@ -36,20 +36,26 @@ struct MClientLease : public Message { int get_action() { return h.action; } int get_mask() { return h.mask; } inodeno_t get_ino() { return inodeno_t(h.ino); } + snapid_t get_first() { return snapid_t(h.first); } + snapid_t get_last() { return snapid_t(h.last); } MClientLease() : Message(CEPH_MSG_CLIENT_LEASE) {} - MClientLease(int ac, int m, __u64 i) : + MClientLease(int ac, int m, __u64 i, __u64 sf, __u64 sl) : Message(CEPH_MSG_CLIENT_LEASE) { h.action = ac; h.mask = m; h.ino = i; + h.first = sf; + h.last = sl; } - MClientLease(int ac, int m, __u64 i, const nstring& d) : + MClientLease(int ac, int m, __u64 i, __u64 sf, __u64 sl, const nstring& d) : Message(CEPH_MSG_CLIENT_LEASE), dname(d) { h.action = ac; h.mask = m; h.ino = i; + h.first = sf; + h.last = sl; } const char *get_type_name() { return "client_lease"; } @@ -57,6 +63,8 @@ struct MClientLease : public Message { out << "client_lease(a=" << get_lease_action_name(get_action()) << " mask " << get_mask(); out << " " << get_ino(); + if (h.last != CEPH_NOSNAP) + out << " [" << snapid_t(h.first) << "," << snapid_t(h.last) << "]"; if (dname.length()) out << "/" << dname; out << ")"; -- 2.39.5