From 517f2b3967f09e4ddff611c8fe6867687cc665cf Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 16 Jul 2008 16:46:49 -0700 Subject: [PATCH] mds: fixed up caps vs cow_inode --- src/TODO | 5 ---- src/mds/CInode.h | 23 +++++++++++++++ src/mds/Capability.h | 5 +++- src/mds/Locker.cc | 52 +++++++++++++++++++++------------- src/mds/MDCache.cc | 31 ++++++++++++++++---- src/mds/MDCache.h | 2 +- src/messages/MClientFileCaps.h | 3 +- 7 files changed, 87 insertions(+), 34 deletions(-) diff --git a/src/TODO b/src/TODO index d0cae5a092de6..5b5d0750288b7 100644 --- a/src/TODO +++ b/src/TODO @@ -226,9 +226,6 @@ todo - garbage collection - realms - make better sense of snap_highwater...? -- snap creation - - async SnapClient for the possibly remote SnapTable - - hmm, can we generalize any of AnchorClient? /- cdir fetch/store versioned dentries - emetablob.. journaling a versioned update.. @@ -242,8 +239,6 @@ todo remote link -> multiversion inode - for simplicity, don't replicate any snapshot data. -- or issue any leases or capabilities... the content is static! -- - need rrealms in fraginfo_t - rename() needs to create a new realm if src/dst realms differ and (rrealms, or open_children, or not subtree leaf) (similar logic to the anchor update) diff --git a/src/mds/CInode.h b/src/mds/CInode.h index 38704db763c85..d3541c6feb42c 100644 --- a/src/mds/CInode.h +++ b/src/mds/CInode.h @@ -478,6 +478,29 @@ public: containing_realm = NULL; } } + void steal_client_cap(int client, Capability *cap) { + // remove + CInode *from = cap->get_inode(); + assert(from->ino() == ino()); + containing_realm = from->containing_realm; // hackish! + from->containing_realm->remove_cap(client, cap); + from->client_caps.erase(client); + if (from->client_caps.empty()) { + from->put(PIN_CAPS); + from->xlist_caps.remove_myself(); + from->containing_realm = NULL; + } + + // add + if (client_caps.empty()) { + get(PIN_CAPS); + //containing_realm = find_snaprealm(); // see hacky above + containing_realm->inodes_with_caps.push_back(&xlist_caps); + } + assert(client_caps.count(client) == 0); + cap->set_inode(this); + containing_realm->add_cap(client, cap); + } void move_to_containing_realm(SnapRealm *realm) { for (map::iterator q = client_caps.begin(); q != client_caps.end(); diff --git a/src/mds/Capability.h b/src/mds/Capability.h index 53776dfbd20cb..a5a1138b1e4ae 100644 --- a/src/mds/Capability.h +++ b/src/mds/Capability.h @@ -62,10 +62,12 @@ private: capseq_t last_sent, last_recv; capseq_t last_open; capseq_t mseq; - + bool suppress; bool stale; public: + snapid_t client_follows; + xlist::item session_caps_item; xlist::item snaprealm_caps_item; @@ -78,6 +80,7 @@ public: last_open(0), mseq(0), suppress(false), stale(false), + client_follows(0), session_caps_item(this), snaprealm_caps_item(this) { } capseq_t get_mseq() { return mseq; } diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index be7684a80dc2f..d10364d2595ff 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -932,17 +932,33 @@ void Locker::share_inode_max_size(CInode *in) void Locker::handle_client_file_caps(MClientFileCaps *m) { int client = m->get_source().num(); - CInode *in = mdcache->get_inode(m->get_ino()); + + snapid_t follows = 0; + if (m->get_snaps().size()) + follows = m->get_snaps()[0]; + + dout(7) << "handle_client_file_caps on " << m->get_ino() << " follows " << follows << dendl; + + CInode *head_in = mdcache->get_inode(m->get_ino()); + if (!head_in) { + dout(7) << "handle_client_file_caps on unknown ino " << m->get_ino() << ", dropping" << dendl; + delete m; + return; + } + + CInode *in = 0; + if (head_in) { + in = mdcache->pick_inode_snap(head_in, follows); + if (in != head_in) + dout(10) << " head inode " << *head_in << dendl; + dout(10) << " cap inode " << *in << dendl; + } + Capability *cap = 0; if (in) cap = in->get_client_cap(client); - - if (!in || !cap) { - if (!in) { - dout(7) << "handle_client_file_caps on unknown ino " << m->get_ino() << ", dropping" << dendl; - } else { - dout(7) << "handle_client_file_caps no cap for client" << client << " on " << *in << dendl; - } + if (!cap) { + dout(7) << "handle_client_file_caps no cap for client" << client << " on " << *in << dendl; delete m; return; } @@ -964,6 +980,8 @@ void Locker::handle_client_file_caps(MClientFileCaps *m) int has = cap->confirmed(); dout(10) << "client had " << cap_string(had) << ", has " << cap_string(has) << dendl; + cap->client_follows = follows; + // update wanted if (cap->wanted() != wanted) { if (m->get_seq() < cap->get_last_open()) { @@ -1051,13 +1069,8 @@ void Locker::handle_client_file_caps(MClientFileCaps *m) !in->is_base()) { // FIXME.. what about root inode mtime/atime? EUpdate *le = new EUpdate(mds->mdlog, "size|max_size|mtime|ctime|atime update"); - snapid_t follows = 0; - if (m->get_snaps().size()) - follows = m->get_snaps()[0]; - CInode *upi = mdcache->pick_inode_snap(in, follows); - - inode_t *pi = upi->project_inode(); - pi->version = upi->pre_dirty(); + inode_t *pi = in->project_inode(); + pi->version = in->pre_dirty(); if (change_max) { dout(7) << " max_size " << pi->max_size << " -> " << new_max << dendl; pi->max_size = new_max; @@ -1090,12 +1103,11 @@ void Locker::handle_client_file_caps(MClientFileCaps *m) } Mutation *mut = new Mutation; mut->ls = mds->mdlog->get_current_segment(); - if (upi == in) - file_wrlock_force(&in->filelock, mut); // wrlock for duration of journal - mut->auth_pin(upi); - predirty_nested(mut, &le->metablob, upi, 0, PREDIRTY_PRIMARY, false); + file_wrlock_force(&in->filelock, mut); // wrlock for duration of journal + mut->auth_pin(in); + predirty_nested(mut, &le->metablob, in, 0, PREDIRTY_PRIMARY, false); - mdcache->journal_dirty_inode(&le->metablob, upi, follows); + mdcache->journal_dirty_inode(&le->metablob, in, follows); //le->metablob.add_primary_dentry(in->parent, true, 0, pi); mds->mdlog->submit_entry(le, new C_Locker_FileUpdate_finish(this, in, mut, change_max)); diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 0a3558f283b66..d60f2cc2bfba1 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -962,21 +962,35 @@ CInode *MDCache::pick_inode_snap(CInode *in, snapid_t follows) return in; } -CInode *MDCache::cow_inode(CInode *in, snapid_t first, snapid_t last) +CInode *MDCache::cow_inode(CInode *in, snapid_t last) { + assert(last >= in->first); + CInode *oldin = new CInode(this); oldin->inode = *in->get_previous_projected_inode(); oldin->symlink = in->symlink; oldin->xattrs = in->xattrs; - oldin->first = first; + oldin->first = in->first; oldin->last = last; + in->first = last+1; dout(10) << " oldin " << *oldin << dendl; add_inode(oldin); - //if (in->get_caps_issued() & (CEPH_CAP_WR|CEPH_CAP_WRBUFFER)) - //oldin->get(CInode::PIN_SNAPCAPS); + map::iterator p = in->client_caps.begin(); + while (p != in->client_caps.end()) { + Capability *cap = p->second; + assert(cap->client_follows >= oldin->first); + if (cap->client_follows <= last) { + // move to oldin + int client = p->first; + dout(10) << " moving client" << client << " cap " << cap << " to cloned inode" << dendl; + p++; + oldin->steal_client_cap(client, cap); + } else + p++; + } return oldin; } @@ -989,11 +1003,15 @@ void MDCache::journal_cow_dentry(EMetaBlob *metablob, CDentry *dn, snapid_t foll // nothing to cow on a null dentry assert(!dn->is_null()); + // within current snap? + if (follows < dn->first) + return; + if (dn->is_primary() && dn->inode->is_multiversion()) { // multiversion inode. CInode *in = dn->inode; - if (follows == CEPH_NOSNAP) + if (follows == CEPH_NOSNAP || follows == 0) follows = in->find_snaprealm()->get_latest_snap(); old_inode_t &old = in->old_inodes[follows]; @@ -1016,7 +1034,8 @@ void MDCache::journal_cow_dentry(EMetaBlob *metablob, CDentry *dn, snapid_t foll dout(10) << " dn " << *dn << dendl; if (dn->is_primary()) { - CInode *oldin = cow_inode(dn->inode, dn->inode->first, follows); + assert(oldfirst == dn->inode->first); + CInode *oldin = cow_inode(dn->inode, follows); CDentry *olddn = dn->dir->add_primary_dentry(dn->name, oldin, oldfirst, follows); dout(10) << " olddn " << *olddn << dendl; metablob->add_primary_dentry(olddn, true); diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index defe2a8c30994..12076931f4828 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -472,7 +472,7 @@ public: // journal helpers CInode *pick_inode_snap(CInode *in, snapid_t follows); - CInode *cow_inode(CInode *in, snapid_t first, snapid_t last); + CInode *cow_inode(CInode *in, snapid_t last); void journal_cow_dentry(EMetaBlob *metablob, CDentry *dn, snapid_t follows=CEPH_NOSNAP); void journal_cow_inode(EMetaBlob *metablob, CInode *in, snapid_t follows=CEPH_NOSNAP); void journal_dirty_inode(EMetaBlob *metablob, CInode *in, snapid_t follows=CEPH_NOSNAP); diff --git a/src/messages/MClientFileCaps.h b/src/messages/MClientFileCaps.h index 5d9b6774356d2..02022beb54fa3 100644 --- a/src/messages/MClientFileCaps.h +++ b/src/messages/MClientFileCaps.h @@ -114,7 +114,8 @@ class MClientFileCaps : public Message { << " wanted" << cap_string(h.wanted) << " size " << h.size << "/" << h.max_size << " mtime " << utime_t(h.mtime) - << " tws " << h.time_warp_seq; + << " tws " << h.time_warp_seq + << " snaps " << snaps; if (h.migrate_seq) out << " mseq " << h.migrate_seq; out << ")"; -- 2.39.5