From d6b3d66dddee5f7a10ec5ba198e5a3a6c09d3477 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 10 Jul 2008 15:50:34 -0700 Subject: [PATCH] mds: cap flush on snap updates; mds may update snapshotted inodes --- src/client/Client.cc | 37 ++++++++++++++++++++-- src/client/Client.h | 7 +++-- src/mds/CDentry.h | 11 ++++--- src/mds/CDir.cc | 14 ++++----- src/mds/CDir.h | 6 ++-- src/mds/CInode.cc | 8 +++-- src/mds/Locker.cc | 22 +++++++++---- src/mds/MDCache.cc | 74 ++++++++++++++++++++++++++++++++++++++++++++ src/mds/MDCache.h | 5 +++ src/mds/Server.cc | 2 +- src/mds/snap.cc | 6 ++-- src/mds/snap.h | 7 +++++ 12 files changed, 167 insertions(+), 32 deletions(-) diff --git a/src/client/Client.cc b/src/client/Client.cc index d72eccc2aa118..6d90da3c06fe4 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -1358,7 +1358,7 @@ void Client::put_cap_ref(Inode *in, int cap) } } -void Client::check_caps(Inode *in) +void Client::check_caps(Inode *in, bool force_dirty) { int wanted = in->caps_wanted(); int used = in->caps_used(); @@ -1399,6 +1399,10 @@ void Client::check_caps(Inode *in) goto ack; } + if (force_dirty && + (cap->issued & (CEPH_CAP_WR|CEPH_CAP_WRBUFFER))) + goto ack; + if ((cap->issued & ~wanted) == 0) continue; /* nothing extra, all good */ @@ -1421,6 +1425,7 @@ void Client::check_caps(Inode *in) in->reported_size = in->inode.size; m->set_max_size(in->wanted_max_size); in->requested_max_size = in->wanted_max_size; + m->get_snaps() = in->snaprealm->snaps; messenger->send_message(m, mdsmap->get_inst(it->first)); if (wanted == 0) mds_sessions[it->first].num_caps--; @@ -1522,6 +1527,7 @@ void Client::add_update_cap(Inode *in, int mds, if (in->caps.empty()) { assert(in->snaprealm == 0); in->snaprealm = get_snap_realm(realm); + in->snaprealm->inodes_with_caps.push_back(&in->snaprealm_item); in->get(); dout(15) << "add_update_cap first one, opened snaprealm " << in->snaprealm << dendl; } @@ -1554,6 +1560,7 @@ void Client::remove_cap(Inode *in, int mds) put_inode(in); put_snap_realm(in->snaprealm); in->snaprealm = 0; + in->snaprealm_item.remove_myself(); } } @@ -1566,14 +1573,28 @@ void Client::remove_all_caps(Inode *in) put_inode(in); put_snap_realm(in->snaprealm); in->snaprealm = 0; + in->snaprealm_item.remove_myself(); } } void Client::maybe_update_snaprealm(SnapRealm *realm, snapid_t snap_created, snapid_t snap_highwater, vector& snaps) { - if (realm->maybe_update(snap_created, snap_highwater, snaps)) + if (snap_created) + realm->created = snap_created; + + if (realm->highwater == 0 || snap_highwater > realm->highwater) { dout(10) << *realm << " now " << snaps << " highwater " << snap_highwater << dendl; + + // writeback any dirty caps _before_ updating snap list (i.e. with old snap info) + for (xlist::iterator p = realm->inodes_with_caps.begin(); !p.end(); ++p) { + Inode *in = *p; + check_caps(in, true); // force writeback of write caps + } + + realm->snaps = snaps; // ok. + realm->highwater = snap_highwater; + } } void Client::handle_snap(MClientSnap *m) @@ -1584,6 +1605,13 @@ void Client::handle_snap(MClientSnap *m) if (m->split) { SnapRealm *realm = get_snap_realm(m->split); realm->created = m->snap_created; + if (realm->snaps.empty()) { + // new split.. pretend we have one less snap than we do now! + vector& newsnaps = m->realms[m->split]; + realm->snaps.resize(newsnaps.size() - 1); + for (unsigned i=0; isnaps.size(); i++) + realm->snaps[i] = newsnaps[i+1]; + } dout(10) << " splitting off " << *realm << dendl; for (list::iterator p = m->split_inos.begin(); p != m->split_inos.end(); @@ -1599,7 +1627,10 @@ void Client::handle_snap(MClientSnap *m) } dout(10) << " moving " << *in << " from " << *in->snaprealm << dendl; put_snap_realm(in->snaprealm); + in->snaprealm_item.remove_myself(); + in->snaprealm = realm; + realm->inodes_with_caps.push_back(&in->snaprealm_item); realm->nref++; } } @@ -1609,6 +1640,7 @@ void Client::handle_snap(MClientSnap *m) for (map >::iterator p = m->realms.begin(); p != m->realms.end(); p++) { + dout(10) << "realm " << p->first << " snaps " << p->second << dendl; SnapRealm *realm = get_snap_realm(p->first); maybe_update_snaprealm(realm, 0, m->snap_highwater, p->second); put_snap_realm(realm); @@ -1803,6 +1835,7 @@ void Client::handle_file_caps(MClientFileCaps *m) m->set_mtime(in->inode.mtime); m->set_atime(in->inode.atime); m->set_wanted(wanted); + m->get_snaps() = in->snaprealm->snaps; // just in case it's newer (via another mds) } } else if (old_caps == new_caps) { dout(10) << " caps unchanged at " << cap_string(old_caps) << dendl; diff --git a/src/client/Client.h b/src/client/Client.h index 76bffcf9a2310..cfc94f7f489f4 100644 --- a/src/client/Client.h +++ b/src/client/Client.h @@ -134,6 +134,8 @@ struct SnapRealm { snapid_t highwater; vector snaps; + xlist inodes_with_caps; + SnapRealm(inodeno_t i) : dirino(i), nref(0), created(0), highwater(0) { } @@ -183,6 +185,7 @@ class Inode { capseq_t exporting_mseq; SnapRealm *snaprealm; + xlist::item snaprealm_item; //int open_by_mode[CEPH_FILE_MODE_NUM]; map open_by_mode; @@ -237,7 +240,7 @@ class Inode { lease_mask(0), lease_mds(-1), dir_auth(-1), dir_hashed(false), dir_replicated(false), exporting_issued(0), exporting_mds(-1), exporting_mseq(0), - snaprealm(0), + snaprealm(0), snaprealm_item(this), reported_size(0), wanted_max_size(0), requested_max_size(0), ref(0), ll_ref(0), dir(0), dn(0), symlink(0), @@ -807,7 +810,7 @@ protected: void handle_snap(class MClientSnap *m); void handle_file_caps(class MClientFileCaps *m); - void check_caps(Inode *in); + void check_caps(Inode *in, bool force_dirty=false); void put_cap_ref(Inode *in, int cap); void _release(Inode *in, bool checkafter=true); diff --git a/src/mds/CDentry.h b/src/mds/CDentry.h index 049945d18589c..93d9e00c773d7 100644 --- a/src/mds/CDentry.h +++ b/src/mds/CDentry.h @@ -121,22 +121,23 @@ public: public: // cons - CDentry(const nstring& n, CInode *in, snapid_t f=0, snapid_t l=CEPH_NOSNAP) : + CDentry(const nstring& n, + snapid_t f, snapid_t l) : name(n), first(f), last(l), remote_ino(0), remote_d_type(0), - inode(in), dir(0), + inode(0), dir(0), version(0), projected_version(0), xlist_dirty(this), dir_offset(0), auth_pins(0), nested_auth_pins(0), nested_anchors(0), lock(this, CEPH_LOCK_DN, WAIT_LOCK_OFFSET) { } - CDentry(const nstring& n, inodeno_t ino, unsigned char dt, CInode *in=0, - snapid_t f=0, snapid_t l=CEPH_NOSNAP) : + CDentry(const nstring& n, inodeno_t ino, unsigned char dt, + snapid_t f, snapid_t l) : name(n), first(f), last(l), remote_ino(ino), remote_d_type(dt), - inode(in), dir(0), + inode(0), dir(0), version(0), projected_version(0), xlist_dirty(this), dir_offset(0), diff --git a/src/mds/CDir.cc b/src/mds/CDir.cc index 1dc2b872a6e58..f6c6a61b4c7a8 100644 --- a/src/mds/CDir.cc +++ b/src/mds/CDir.cc @@ -193,10 +193,10 @@ CDentry* CDir::add_null_dentry(const nstring& dname, snapid_t first, snapid_t last) { // foreign - assert(lookup(dname) == 0); - + assert(lookup(dname, last) == 0); + // create dentry - CDentry* dn = new CDentry(dname, NULL, first, last); + CDentry* dn = new CDentry(dname, first, last); if (is_auth()) dn->state_set(CDentry::STATE_AUTH); cache->lru.lru_insert_mid(dn); @@ -226,10 +226,10 @@ CDentry* CDir::add_primary_dentry(const nstring& dname, CInode *in, snapid_t first, snapid_t last) { // primary - assert(lookup(dname) == 0); + assert(lookup(dname, last) == 0); // create dentry - CDentry* dn = new CDentry(dname, NULL, first, last); + CDentry* dn = new CDentry(dname, first, last); if (is_auth()) dn->state_set(CDentry::STATE_AUTH); cache->lru.lru_insert_mid(dn); @@ -258,10 +258,10 @@ CDentry* CDir::add_remote_dentry(const nstring& dname, inodeno_t ino, unsigned c snapid_t first, snapid_t last) { // foreign - assert(lookup(dname) == 0); + assert(lookup(dname, last) == 0); // create dentry - CDentry* dn = new CDentry(dname, ino, d_type, NULL, first, last); + CDentry* dn = new CDentry(dname, ino, d_type, first, last); if (is_auth()) dn->state_set(CDentry::STATE_AUTH); cache->lru.lru_insert_mid(dn); diff --git a/src/mds/CDir.h b/src/mds/CDir.h index c34c0827e184f..c96caf159cd7d 100644 --- a/src/mds/CDir.h +++ b/src/mds/CDir.h @@ -301,11 +301,11 @@ protected: } CDentry* add_null_dentry(const nstring& dname, - snapid_t first=0, snapid_t last=CEPH_NOSNAP); + snapid_t first=1, snapid_t last=CEPH_NOSNAP); CDentry* add_primary_dentry(const nstring& dname, CInode *in, - snapid_t first=0, snapid_t last=CEPH_NOSNAP); + snapid_t first=1, snapid_t last=CEPH_NOSNAP); CDentry* add_remote_dentry(const nstring& dname, inodeno_t ino, unsigned char d_type, - snapid_t first=0, snapid_t last=CEPH_NOSNAP); + snapid_t first=1, snapid_t last=CEPH_NOSNAP); void remove_dentry( CDentry *dn ); // delete dentry void link_remote_inode( CDentry *dn, inodeno_t ino, unsigned char d_type); void link_remote_inode( CDentry *dn, CInode *in ); diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index 823bfac553f28..296a4c9c694c5 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -50,14 +50,16 @@ ostream& operator<<(ostream& out, CInode& in) { filepath path; in.make_path(path); - out << "[inode " << in.inode.ino << " " << path << (in.is_dir() ? "/ ":" "); + out << "[inode " << in.inode.ino << " " << path << (in.is_dir() ? "/":""); + if (in.snapid) + out << " SNAP=" << in.snapid; if (in.is_auth()) { - out << "auth"; + out << " auth"; if (in.is_replicated()) out << in.get_replicas(); } else { pair a = in.authority(); - out << "rep@" << a.first; + out << " rep@" << a.first; if (a.second != CDIR_AUTH_UNKNOWN) out << "," << a.second; out << "." << in.get_replica_nonce(); diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index ee3a8fc208eef..b49fe010537f3 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -1050,8 +1050,14 @@ void Locker::handle_client_file_caps(MClientFileCaps *m) if ((dirty || change_max) && !in->is_base()) { // FIXME.. what about root inode mtime/atime? EUpdate *le = new EUpdate(mds->mdlog, "size|max_size|mtime|ctime|atime update"); - inode_t *pi = in->project_inode(); - pi->version = in->pre_dirty(); + + snapid_t follows = 0; + if (m->get_snaps().size()) + follows = m->get_snaps()[0]; + CInode *upi = mdcache->pick_inode_snap(in, follows); + + inode_t *pi = upi->project_inode(); + pi->version = upi->pre_dirty(); if (change_max) { dout(7) << " max_size " << pi->max_size << " -> " << new_max << dendl; pi->max_size = new_max; @@ -1084,10 +1090,14 @@ void Locker::handle_client_file_caps(MClientFileCaps *m) } Mutation *mut = new Mutation; mut->ls = mds->mdlog->get_current_segment(); - file_wrlock_force(&in->filelock, mut); // wrlock for duration of journal - mut->auth_pin(in); - predirty_nested(mut, &le->metablob, in, 0, PREDIRTY_PRIMARY, false); - le->metablob.add_primary_dentry(in->parent, true, 0, pi); + if (upi == in) + file_wrlock_force(&in->filelock, mut); // wrlock for duration of journal + mut->auth_pin(upi); + predirty_nested(mut, &le->metablob, upi, 0, PREDIRTY_PRIMARY, false); + + mdcache->journal_dirty_inode(&le->metablob, upi, follows); + //le->metablob.add_primary_dentry(in->parent, true, 0, pi); + mds->mdlog->submit_entry(le, new C_Locker_FileUpdate_finish(this, in, mut, change_max)); } diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index f865390c6c839..e169b1f665212 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -929,6 +929,80 @@ int MDCache::num_subtrees_fullnonauth() +// =================================== +// journal helpers + +CInode *MDCache::pick_inode_snap(CInode *in, snapid_t follows) +{ + if (follows == 0) + return in; + + dout(10) << "pick_inode_snap follows " << follows << " on " << *in << dendl; + + SnapRealm *realm = in->find_snaprealm(); + vector& snaps = *realm->get_snap_vector(); + dout(10) << " realm " << *realm << " " << *realm->inode << dendl; + dout(10) << " snaps " << snaps << dendl; + + unsigned i=0; + while (i+1 < snaps.size() && snaps[i+1] > follows) i++; + CInode *t = 0; + do { + t = get_inode(in->ino(), snaps[i]); + if (t) + break; + } while (i-- > 0); + if (t) { + in = t; + dout(10) << "pick_inode_snap " << snaps[i] << " found " << *in << dendl; + } + return in; +} + +CInode *MDCache::cow_inode(CInode *in, snapid_t tosnap) +{ + CInode *oldin = new CInode(this); + oldin->inode = *in->get_previous_projected_inode(); + oldin->snapid = tosnap; + dout(10) << " oldin " << *oldin << dendl; + add_inode(oldin); + + //if (in->get_caps_issued() & (CEPH_CAP_WR|CEPH_CAP_WRBUFFER)) + //oldin->get(CInode::PIN_SNAPCAPS); + + return oldin; +} + +void MDCache::journal_dirty_inode(EMetaBlob *metablob, CInode *in, snapid_t follows) +{ + dout(10) << "journal_dirty_inode follows " << follows << " on " << *in << dendl; + CDentry *dn = in->parent; + dout(10) << " orig dn " << *dn << dendl; + + if (in->snaprealm || in->inode.is_dir()) { + // multiversion inode. + assert(0); + } else { + // is dn within current snap? + if (follows < dn->first) { + metablob->add_primary_dentry(dn, true, 0, in->get_projected_inode()); + } else { + snapid_t oldfirst = dn->first; + dn->first = follows+1; + + dout(10) << " dn " << *dn << dendl; + CInode *oldin = cow_inode(in, follows); + CDentry *olddn = dn->dir->add_primary_dentry(dn->name, oldin, oldfirst, follows); + dout(10) << " olddn " << *olddn << dendl; + + metablob->add_primary_dentry(olddn, true); + metablob->add_primary_dentry(dn, true, 0, in->get_projected_inode()); + } + } +} + + + // =================================== // slave requests diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index 82c4b30acf037..59a0f5b686550 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -464,6 +464,11 @@ public: void request_drop_locks(MDRequest *r); void request_cleanup(MDRequest *r); + // journal helpers + CInode *pick_inode_snap(CInode *in, snapid_t follows); + CInode *cow_inode(CInode *in, snapid_t tosnap); + void journal_dirty_inode(EMetaBlob *metablob, CInode *in, snapid_t follows); + // slaves void add_uncommitted_master(metareqid_t reqid, LogSegment *ls, set &slaves) { uncommitted_masters[reqid].ls = ls; diff --git a/src/mds/Server.cc b/src/mds/Server.cc index cc75ec9b5b35c..374d244eef7aa 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -4754,7 +4754,7 @@ void Server::handle_client_mksnap(MDRequest *mdr) update->snap_highwater = snapid; if (split_parent) { update->snap_created = diri->snaprealm->created; - update->split = snapid; + update->split = diri->ino(); update->split_inos = split_inos; split_parent = 0; } diff --git a/src/mds/snap.cc b/src/mds/snap.cc index 3bf27915f6757..fe5dd26ea2afe 100644 --- a/src/mds/snap.cc +++ b/src/mds/snap.cc @@ -110,9 +110,9 @@ vector *SnapRealm::get_snap_vector() vector *SnapRealm::update_snap_vector(snapid_t creating) { - if (cached_snaps.empty() && !snap_highwater) { - snap_highwater = creating; - return get_snap_vector(); + if (!snap_highwater) { + assert(cached_snaps.empty()); + get_snap_vector(); } snap_highwater = creating; cached_snaps.insert(cached_snaps.begin(), creating); // FIXME.. we should store this in reverse! diff --git a/src/mds/snap.h b/src/mds/snap.h index 3e3299e6b283f..2a08408f90bbc 100644 --- a/src/mds/snap.h +++ b/src/mds/snap.h @@ -123,6 +123,13 @@ struct SnapRealm { void get_snap_set(set& s, snapid_t first=0, snapid_t last=CEPH_NOSNAP); vector *get_snap_vector(); vector *update_snap_vector(snapid_t adding=0); + snapid_t get_latest_snap() { + vector *snaps = get_snap_vector(); + if (snaps->empty()) + return 0; + else + return (*snaps)[0]; + } void split_at(SnapRealm *child); -- 2.39.5