From ee3fc3469c5e6add020225a820322f8ea9c9df0a Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 7 Jul 2008 20:52:04 -0700 Subject: [PATCH] mds: no link for current parent; rename some fields --- src/TODO | 63 ++++++--------------------------------------- src/mds/CInode.cc | 25 ++++++++---------- src/mds/CInode.h | 4 +-- src/mds/Locker.cc | 6 ++--- src/mds/MDCache.cc | 4 +-- src/mds/Migrator.cc | 4 +-- src/mds/Server.cc | 17 +++++------- src/mds/snap.cc | 45 +++++++++++++++++++++----------- src/mds/snap.h | 30 ++++++++++----------- 9 files changed, 77 insertions(+), 121 deletions(-) diff --git a/src/TODO b/src/TODO index a649fc6c2c3f1..1fe4ef8a60310 100644 --- a/src/TODO +++ b/src/TODO @@ -218,71 +218,24 @@ remaining hard problems snapshot notes -- todo -/- basic types (snapid_t, etc.) -/- snap lineage in MOSDOp - rados bits to do clone+write -/ - cloning - fix cloning on unlinked file (where snaps=[], but head may have follows_snap attr) - - make sense of snap_highwater... - - figure out how to fix up rados logging - snap collections - garbage collection -- mds types -- client capgroups -- mds snapid allocation +- realms + - make better sense of snap_highwater...? - snap creation - - async SnapClient for the (possibly remote) SnapTable + - enforce name uniqueness? + - async SnapClient for the possibly remote SnapTable - hmm, can we generalize any of AnchorClient? -- mds metadata versioning -- mds server ops - -- base types - -typedef __u64 snapid_t; -#define MAXSNAP (snapid_t)(0xffffffffffffffull) /* 56 bits.. see ceph_pg */ -#define NOSNAP (snapid_t)(-1) - -- let's go with [first, last] throughout, instead of non-inclusive drev... - - - -mds -- break mds hierarchy into snaprealms - - keep per-realm inode xlists, so that breaking a realm is O(size(realm)) -struct Snap { - snapid_t snapid; - string name; - utime_t ctime; -}; - -struct snaplink_t { - snaprealm *realm; - snapid_t first; -}; -struct SnapRealm { - inodeno_t dirino; - map snaps; - - int nlink; - multimap parents; // key is "last" (or NOSNAP) - multimap children; - - xlist inodes_with_caps; // used for efficient realm splits -}; -- realm's parent can vary over time; we need to track the full history, so that we know which parents' snaps to include in the snap lineage. +- mds metadata versioning + - (dir) inode versions.. -- link client caps to realm, so that snapshot creation is O(num_child_realms*num_clients) - - keep per-realm, per-client record with cap refcount, to avoid traversinng realm inode lists looking for caps +- will snapshots and CAS play nice? -struct CapabilityGroup { - int client; - xlist caps; - SnapRealm *realm; -}; -in SnapRealm, - map client_cap_groups; // used to identify clients who need snap notifications +- mds server ops - when we create a snapshot, - xlock snaplock diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index e429b635d8323..823bfac553f28 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -981,14 +981,14 @@ CInodeDiscover* CInode::replicate_to( int rep ) void CInode::open_snaprealm() { if (!snaprealm) { + SnapRealm *parent = find_snaprealm(); snaprealm = new SnapRealm(mdcache, this); - - snaprealm->open_parent = find_containing_snaprealm(); - if (snaprealm->open_parent) { - snaprealm->open_parent->open_children.insert(snaprealm); + if (parent) { + snaprealm->parent = parent; + parent->open_children.insert(snaprealm); dout(10) << " opened snaprealm " << snaprealm - << " parent is " << snaprealm->open_parent - << " siblings are " << snaprealm->open_parent->open_children + << " parent is " << parent + << " siblings are " << parent->open_children << dendl; } } @@ -996,8 +996,8 @@ void CInode::open_snaprealm() void CInode::close_snaprealm() { if (snaprealm) { - if (snaprealm->open_parent) - snaprealm->open_parent->open_children.erase(snaprealm); + if (snaprealm->parent) + snaprealm->parent->open_children.erase(snaprealm); delete snaprealm; snaprealm = 0; } @@ -1007,15 +1007,12 @@ void CInode::close_snaprealm() * note: this is _not_ inclusive of *this->snaprealm, as that is for * nested directory content. */ -SnapRealm *CInode::find_containing_snaprealm() +SnapRealm *CInode::find_snaprealm() { CInode *cur = this; - while (cur->get_parent_dn()) { + while (cur->get_parent_dn() && !cur->snaprealm) cur = cur->get_parent_dn()->get_dir()->get_inode(); - if (cur->snaprealm) - return cur->snaprealm; - } - return 0; + return cur->snaprealm; } void CInode::encode_snap(bufferlist &bl) diff --git a/src/mds/CInode.h b/src/mds/CInode.h index cf88a229026f1..498ff9fc8ba51 100644 --- a/src/mds/CInode.h +++ b/src/mds/CInode.h @@ -444,7 +444,7 @@ public: // -- snap -- void open_snaprealm(); void close_snaprealm(); - SnapRealm *find_containing_snaprealm(); + SnapRealm *find_snaprealm(); void encode_snap(bufferlist &bl); void decode_snap(bufferlist::iterator& p) { bufferlist snapbl; @@ -490,7 +490,7 @@ public: Capability *add_client_cap(int client, CInode *in) { if (client_caps.empty()) { get(PIN_CAPS); - containing_realm = find_containing_snaprealm(); + containing_realm = find_snaprealm(); containing_realm->inodes_with_caps.push_back(&xlist_caps); } diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index 1a10d1c435da8..376b2e297ce2b 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -623,7 +623,7 @@ bool Locker::issue_caps(CInode *in) << dendl; mds->send_message_client(new MClientFileCaps(CEPH_CAP_OP_GRANT, in->inode, - in->find_containing_snaprealm()->inode->ino(), + in->find_snaprealm()->inode->ino(), cap->get_last_seq(), cap->pending(), cap->wanted(), @@ -646,7 +646,7 @@ void Locker::issue_truncate(CInode *in) Capability *cap = it->second; mds->send_message_client(new MClientFileCaps(CEPH_CAP_OP_TRUNC, in->inode, - in->find_containing_snaprealm()->inode->ino(), + in->find_snaprealm()->inode->ino(), cap->get_last_seq(), cap->pending(), cap->wanted(), @@ -906,7 +906,7 @@ void Locker::share_inode_max_size(CInode *in) dout(10) << "share_inode_max_size with client" << client << dendl; mds->send_message_client(new MClientFileCaps(CEPH_CAP_OP_GRANT, in->inode, - in->find_containing_snaprealm()->inode->ino(), + in->find_snaprealm()->inode->ino(), cap->get_last_seq(), cap->pending(), cap->wanted(), diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 506b9e0bdd890..b099c932489a9 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -2789,7 +2789,7 @@ void MDCache::rejoin_import_cap(CInode *in, int client, inode_caps_reconnect_t& session->touch_cap(cap); // send IMPORT - SnapRealm *realm = in->find_containing_snaprealm(); + SnapRealm *realm = in->find_snaprealm(); MClientFileCaps *reap = new MClientFileCaps(CEPH_CAP_OP_IMPORT, in->inode, realm->inode->ino(), @@ -2970,7 +2970,7 @@ void MDCache::do_file_recover() CInode *in = *file_recover_queue.begin(); file_recover_queue.erase(in); - vector *snaps = in->find_containing_snaprealm()->get_snap_vector(); + vector *snaps = in->find_snaprealm()->get_snap_vector(); if (in->inode.max_size > in->inode.size) { dout(10) << "do_file_recover starting " << in->inode.size << "/" << in->inode.max_size diff --git a/src/mds/Migrator.cc b/src/mds/Migrator.cc index 1e5d159c991da..a2150a8399799 100644 --- a/src/mds/Migrator.cc +++ b/src/mds/Migrator.cc @@ -898,7 +898,7 @@ void Migrator::finish_export_inode_caps(CInode *in) << " exported caps on " << *in << dendl; MClientFileCaps *m = new MClientFileCaps(CEPH_CAP_OP_EXPORT, in->inode, - in->find_containing_snaprealm()->inode->ino(), + in->find_snaprealm()->inode->ino(), cap->get_last_seq(), cap->pending(), cap->wanted(), @@ -2053,7 +2053,7 @@ void Migrator::finish_import_inode_caps(CInode *in, int from, } cap->merge(it->second); - SnapRealm *realm = in->find_containing_snaprealm(); + SnapRealm *realm = in->find_snaprealm(); MClientFileCaps *caps = new MClientFileCaps(CEPH_CAP_OP_IMPORT, in->inode, realm->inode->ino(), diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 23b91844f51f7..81af42a867cbb 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -4433,7 +4433,7 @@ void Server::_do_open(MDRequest *mdr, CInode *cur) reply->set_file_caps_seq(cap->get_last_seq()); reply->set_file_caps_mseq(cap->get_mseq()); - SnapRealm *realm = cur->find_containing_snaprealm(); + SnapRealm *realm = cur->find_snaprealm(); reply->get_snaps() = *realm->get_snap_vector(); reply->set_snap_info(realm->inode->ino(), realm->created, realm->snap_highwater); dout(10) << " snaprealm is " << *realm << " snaps=" << reply->get_snaps() << " on " << *realm->inode << dendl; @@ -4694,6 +4694,9 @@ void Server::handle_client_mksnap(MDRequest *mdr) snapid_t snapid = mds->snaptable->create(diri->ino(), req->get_path2(), mdr->now); dout(10) << " snapid is " << snapid << dendl; + + // GO. + // create realm? inodeno_t split_parent = 0; if (!diri->snaprealm) { @@ -4701,18 +4704,10 @@ void Server::handle_client_mksnap(MDRequest *mdr) diri->open_snaprealm(); diri->snaprealm->created = snapid; - // link them up - // HACK! parent may be on another mds... - - SnapRealm *parent = diri->snaprealm->open_parent; + // split existing caps + SnapRealm *parent = diri->snaprealm->parent; assert(parent); assert(parent->open_children.count(diri->snaprealm)); - snaplink_t link; - link.first = 0; - link.dirino = parent->inode->ino(); - diri->snaprealm->parents.insert(pair(CEPH_NOSNAP, link)); - - // split existing caps parent->split_at(diri->snaprealm); split_parent = parent->inode->ino(); } diff --git a/src/mds/snap.cc b/src/mds/snap.cc index ffc8fcf71bca1..1e6b9be647cd0 100644 --- a/src/mds/snap.cc +++ b/src/mds/snap.cc @@ -30,9 +30,19 @@ bool SnapRealm::open_parents(MDRequest *mdr) { dout(10) << "open_parents" << dendl; - for (multimap::iterator p = parents.begin(); - p != parents.end(); - p++) { + + // make sure my current parents' parents are open... + if (parent) { + dout(10) << " parent is " << *parent + << " on " << *parent->inode << dendl; + if (!parent->open_parents(mdr)) + return false; + } + + // and my past parents too! + for (map::iterator p = past_parents.begin(); + p != past_parents.end(); + p++) { CInode *parent = mdcache->get_inode(p->second.dirino); if (parent) continue; @@ -58,17 +68,22 @@ void SnapRealm::get_snap_set(set &s, snapid_t first, snapid_t last) s.insert(p->first); // include snaps for parents during intervals that intersect [first,last] - for (multimap::iterator p = parents.lower_bound(first); - p != parents.end() && p->first >= first && p->second.first <= last; + snapid_t thru = first; + for (map::iterator p = past_parents.lower_bound(first); + p != past_parents.end() && p->first >= first && p->second.first <= last; p++) { - CInode *parent = mdcache->get_inode(p->second.dirino); - assert(parent); // call open_parents first! - assert(parent->snaprealm); - - parent->snaprealm->get_snap_set(s, - MAX(first, p->second.first), - MIN(last, p->first)); + CInode *oldparent = mdcache->get_inode(p->second.dirino); + assert(oldparent); // call open_parents first! + assert(oldparent->snaprealm); + + thru = MIN(last, p->first); + oldparent->snaprealm->get_snap_set(s, + MAX(first, p->second.first), + thru); + thru++; } + if (thru <= last && parent) + parent->get_snap_set(s, thru, last); } /* @@ -100,7 +115,7 @@ vector *SnapRealm::update_snap_vector(snapid_t creating) return get_snap_vector(); } snap_highwater = creating; - cached_snaps.push_back(creating); + cached_snaps.insert(cached_snaps.begin(), creating); // FIXME.. we should store this in reverse! return &cached_snaps; } @@ -110,7 +125,7 @@ void SnapRealm::split_at(SnapRealm *child) dout(10) << "split_at " << *child << " on " << *child->inode << dendl; - // split children + // split open_children dout(10) << " my children are " << open_children << dendl; for (set::iterator p = open_children.begin(); p != open_children.end(); ) { @@ -118,7 +133,7 @@ void SnapRealm::split_at(SnapRealm *child) if (realm != child && child->inode->is_ancestor_of(realm->inode)) { dout(20) << " child gets child realm " << *realm << " on " << *realm->inode << dendl; - realm->open_parent = child; + realm->parent = child; child->open_children.insert(realm); open_children.erase(p++); } else { diff --git a/src/mds/snap.h b/src/mds/snap.h index 3f06c7ecf0d43..3e3299e6b283f 100644 --- a/src/mds/snap.h +++ b/src/mds/snap.h @@ -85,27 +85,27 @@ struct SnapRealm { // realm state snapid_t created; map snaps; - multimap parents; // key is "last" (or NOSNAP) + map past_parents; // key is "last" (or NOSNAP) void encode(bufferlist& bl) const { ::encode(created, bl); ::encode(snaps, bl); - ::encode(parents, bl); + ::encode(past_parents, bl); } void decode(bufferlist::iterator& p) { ::decode(created, p); ::decode(snaps, p); - ::decode(parents, p); + ::decode(past_parents, p); } // in-memory state MDCache *mdcache; CInode *inode; - // caches? - SnapRealm *open_parent; + SnapRealm *parent; set open_children; // active children that are currently open + // caches? vector cached_snaps; snapid_t snap_highwater; @@ -115,7 +115,7 @@ struct SnapRealm { SnapRealm(MDCache *c, CInode *in) : created(0), mdcache(c), inode(in), - open_parent(0), + parent(0), snap_highwater(0) { } @@ -139,18 +139,14 @@ WRITE_CLASS_ENCODER(SnapRealm) inline ostream& operator<<(ostream& out, const SnapRealm &realm) { out << "snaprealm(" << realm.snaps; - if (realm.parents.size()) { - out << " parents=("; - for (multimap::const_iterator p = realm.parents.begin(); - p != realm.parents.end(); + if (realm.past_parents.size()) { + out << " past_parents=("; + for (map::const_iterator p = realm.past_parents.begin(); + p != realm.past_parents.end(); p++) { - if (p != realm.parents.begin()) out << ","; - out << p->second.first << "-"; - if (p->first == CEPH_NOSNAP) - out << "head"; - else - out << p->first; - out << "=" << p->second.dirino; + if (p != realm.past_parents.begin()) out << ","; + out << p->second.first << "-" << p->first + << "=" << p->second.dirino; } out << ")"; } -- 2.39.5