]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mds: no link for current parent; rename some fields
authorSage Weil <sage@newdream.net>
Tue, 8 Jul 2008 03:52:04 +0000 (20:52 -0700)
committerSage Weil <sage@newdream.net>
Tue, 8 Jul 2008 03:52:04 +0000 (20:52 -0700)
src/TODO
src/mds/CInode.cc
src/mds/CInode.h
src/mds/Locker.cc
src/mds/MDCache.cc
src/mds/Migrator.cc
src/mds/Server.cc
src/mds/snap.cc
src/mds/snap.h

index a649fc6c2c3f10229be766551fbfe56f99f94385..1fe4ef8a60310f60c4285ba4d9166dca3fec3210 100644 (file)
--- a/src/TODO
+++ b/src/TODO
@@ -218,71 +218,24 @@ remaining hard problems
 snapshot notes --
 
 todo
-/- basic types (snapid_t, etc.)
-/- snap lineage in MOSDOp
 - rados bits to do clone+write
-/  - cloning
   - fix cloning on unlinked file (where snaps=[], but head may have follows_snap attr)
-  - make sense of snap_highwater...
-
   - figure out how to fix up rados logging
   - snap collections
   - garbage collection
-- mds types
-- client capgroups
-- mds snapid allocation
+- realms
+  - make better sense of snap_highwater...?
 - snap creation
-  - async SnapClient for the (possibly remote) SnapTable
+  - enforce name uniqueness?
+  - async SnapClient for the possibly remote SnapTable
     - hmm, can we generalize any of AnchorClient?
-- mds metadata versioning
-- mds server ops
-
-- base types
-
-typedef __u64 snapid_t;
-#define MAXSNAP (snapid_t)(0xffffffffffffffull)  /* 56 bits.. see ceph_pg */
-#define NOSNAP  (snapid_t)(-1)
-
-- let's go with [first, last] throughout, instead of non-inclusive drev...
-
-
-
-mds
-- break mds hierarchy into snaprealms
-  - keep per-realm inode xlists, so that breaking a realm is O(size(realm))
 
-struct Snap {
-  snapid_t snapid;
-  string name;
-  utime_t ctime;
-};
-
-struct snaplink_t {
-       snaprealm *realm;
-       snapid_t first;
-};
-struct SnapRealm {
-       inodeno_t dirino;
-       map<snapid_t, Snap> snaps;
-
-       int nlink;  
-       multimap<snapid_t, snaplink_t> parents;   // key is "last" (or NOSNAP)
-       multimap<snapid_t, snaplink_t> children;
-
-       xlist<CInode*> inodes_with_caps;   // used for efficient realm splits
-};
-- realm's parent can vary over time; we need to track the full history, so that we know which parents' snaps to include in the snap lineage.
+- mds metadata versioning
+  - (dir) inode versions..
 
-- link client caps to realm, so that snapshot creation is O(num_child_realms*num_clients)
-  - keep per-realm, per-client record with cap refcount, to avoid traversinng realm inode lists looking for caps
+- will snapshots and CAS play nice?
 
-struct CapabilityGroup {
-   int client;
-   xlist<Capability*> caps;
-   SnapRealm *realm;
-};
-in SnapRealm, 
-   map<int, CapabilityGroup*> client_cap_groups;  // used to identify clients who need snap notifications
+- mds server ops
 
 - when we create a snapshot,
   - xlock snaplock
index e429b635d8323a09436d964aa745940a9a25824e..823bfac553f2806eff3c3fb8498640b7365ca28a 100644 (file)
@@ -981,14 +981,14 @@ CInodeDiscover* CInode::replicate_to( int rep )
 void CInode::open_snaprealm()
 {
   if (!snaprealm) {
+    SnapRealm *parent = find_snaprealm();
     snaprealm = new SnapRealm(mdcache, this);
-
-    snaprealm->open_parent = find_containing_snaprealm();
-    if (snaprealm->open_parent) {
-      snaprealm->open_parent->open_children.insert(snaprealm);
+    if (parent) {
+      snaprealm->parent = parent;
+      parent->open_children.insert(snaprealm);
       dout(10) << " opened snaprealm " << snaprealm
-              << " parent is " << snaprealm->open_parent
-              << " siblings are " << snaprealm->open_parent->open_children
+              << " parent is " << parent
+              << " siblings are " << parent->open_children
               << dendl;
     }
   }
@@ -996,8 +996,8 @@ void CInode::open_snaprealm()
 void CInode::close_snaprealm()
 {
   if (snaprealm) {
-    if (snaprealm->open_parent)
-      snaprealm->open_parent->open_children.erase(snaprealm);
+    if (snaprealm->parent)
+      snaprealm->parent->open_children.erase(snaprealm);
     delete snaprealm;
     snaprealm = 0;
   }
@@ -1007,15 +1007,12 @@ void CInode::close_snaprealm()
  * note: this is _not_ inclusive of *this->snaprealm, as that is for
  * nested directory content.
  */ 
-SnapRealm *CInode::find_containing_snaprealm()
+SnapRealm *CInode::find_snaprealm()
 {
   CInode *cur = this;
-  while (cur->get_parent_dn()) {
+  while (cur->get_parent_dn() && !cur->snaprealm)
     cur = cur->get_parent_dn()->get_dir()->get_inode();
-    if (cur->snaprealm)
-      return cur->snaprealm;
-  }
-  return 0;
+  return cur->snaprealm;
 }
 
 void CInode::encode_snap(bufferlist &bl)
index cf88a229026f1b4f02f374cb36ebec2bbfc49ebb..498ff9fc8ba517a40b456338c4717a87042ffc3d 100644 (file)
@@ -444,7 +444,7 @@ public:
   // -- snap --
   void open_snaprealm();
   void close_snaprealm();
-  SnapRealm *find_containing_snaprealm();
+  SnapRealm *find_snaprealm();
   void encode_snap(bufferlist &bl);
   void decode_snap(bufferlist::iterator& p) {
     bufferlist snapbl;
@@ -490,7 +490,7 @@ public:
   Capability *add_client_cap(int client, CInode *in) {
     if (client_caps.empty()) {
       get(PIN_CAPS);
-      containing_realm = find_containing_snaprealm();
+      containing_realm = find_snaprealm();
       containing_realm->inodes_with_caps.push_back(&xlist_caps);
     }
 
index 1a10d1c435da8d6041a270454e4e1e396b31c00b..376b2e297ce2bb3d37dedd020cd89f4ad9ceb6d3 100644 (file)
@@ -623,7 +623,7 @@ bool Locker::issue_caps(CInode *in)
                << dendl;
         mds->send_message_client(new MClientFileCaps(CEPH_CAP_OP_GRANT,
                                                     in->inode,
-                                                    in->find_containing_snaprealm()->inode->ino(),
+                                                    in->find_snaprealm()->inode->ino(),
                                                     cap->get_last_seq(),
                                                     cap->pending(),
                                                     cap->wanted(),
@@ -646,7 +646,7 @@ void Locker::issue_truncate(CInode *in)
     Capability *cap = it->second;
     mds->send_message_client(new MClientFileCaps(CEPH_CAP_OP_TRUNC,
                                                 in->inode,
-                                                in->find_containing_snaprealm()->inode->ino(),
+                                                in->find_snaprealm()->inode->ino(),
                                                 cap->get_last_seq(),
                                                 cap->pending(),
                                                 cap->wanted(),
@@ -906,7 +906,7 @@ void Locker::share_inode_max_size(CInode *in)
       dout(10) << "share_inode_max_size with client" << client << dendl;
       mds->send_message_client(new MClientFileCaps(CEPH_CAP_OP_GRANT,
                                                   in->inode,
-                                                  in->find_containing_snaprealm()->inode->ino(),
+                                                  in->find_snaprealm()->inode->ino(),
                                                   cap->get_last_seq(),
                                                   cap->pending(),
                                                   cap->wanted(),
index 506b9e0bdd890c57ef3129cbc555a84d7e6c76f3..b099c932489a9e642168252153f5cd973b635673 100644 (file)
@@ -2789,7 +2789,7 @@ void MDCache::rejoin_import_cap(CInode *in, int client, inode_caps_reconnect_t&
   session->touch_cap(cap);
   
   // send IMPORT
-  SnapRealm *realm = in->find_containing_snaprealm();
+  SnapRealm *realm = in->find_snaprealm();
   MClientFileCaps *reap = new MClientFileCaps(CEPH_CAP_OP_IMPORT,
                                              in->inode,
                                              realm->inode->ino(),
@@ -2970,7 +2970,7 @@ void MDCache::do_file_recover()
     CInode *in = *file_recover_queue.begin();
     file_recover_queue.erase(in);
 
-    vector<snapid_t> *snaps = in->find_containing_snaprealm()->get_snap_vector();
+    vector<snapid_t> *snaps = in->find_snaprealm()->get_snap_vector();
 
     if (in->inode.max_size > in->inode.size) {
       dout(10) << "do_file_recover starting " << in->inode.size << "/" << in->inode.max_size 
index 1e5d159c991da178b89bf89e516eaccbd5769f8b..a2150a8399799c6e467c4d683a8c4e4b7e963925 100644 (file)
@@ -898,7 +898,7 @@ void Migrator::finish_export_inode_caps(CInode *in)
            << " exported caps on " << *in << dendl;
     MClientFileCaps *m = new MClientFileCaps(CEPH_CAP_OP_EXPORT,
                                             in->inode, 
-                                            in->find_containing_snaprealm()->inode->ino(),
+                                            in->find_snaprealm()->inode->ino(),
                                              cap->get_last_seq(), 
                                              cap->pending(),
                                              cap->wanted(),
@@ -2053,7 +2053,7 @@ void Migrator::finish_import_inode_caps(CInode *in, int from,
     }
     cap->merge(it->second);
 
-    SnapRealm *realm = in->find_containing_snaprealm();
+    SnapRealm *realm = in->find_snaprealm();
     MClientFileCaps *caps = new MClientFileCaps(CEPH_CAP_OP_IMPORT,
                                                in->inode,
                                                realm->inode->ino(),
index 23b91844f51f7e1bf20efdad1cf0423fae8c0657..81af42a867cbbfe2ead3950fd64eeb70942aa1cb 100644 (file)
@@ -4433,7 +4433,7 @@ void Server::_do_open(MDRequest *mdr, CInode *cur)
   reply->set_file_caps_seq(cap->get_last_seq());
   reply->set_file_caps_mseq(cap->get_mseq());
 
-  SnapRealm *realm = cur->find_containing_snaprealm();
+  SnapRealm *realm = cur->find_snaprealm();
   reply->get_snaps() = *realm->get_snap_vector();
   reply->set_snap_info(realm->inode->ino(), realm->created, realm->snap_highwater);
   dout(10) << " snaprealm is " << *realm << " snaps=" << reply->get_snaps() << " on " << *realm->inode << dendl;
@@ -4694,6 +4694,9 @@ void Server::handle_client_mksnap(MDRequest *mdr)
   snapid_t snapid = mds->snaptable->create(diri->ino(), req->get_path2(), mdr->now);
   dout(10) << " snapid is " << snapid << dendl;
 
+
+  // GO.
+
   // create realm?
   inodeno_t split_parent = 0;
   if (!diri->snaprealm) {
@@ -4701,18 +4704,10 @@ void Server::handle_client_mksnap(MDRequest *mdr)
     diri->open_snaprealm();
     diri->snaprealm->created = snapid;
 
-    // link them up
-    // HACK!  parent may be on another mds...
-
-    SnapRealm *parent = diri->snaprealm->open_parent;
+    // split existing caps
+    SnapRealm *parent = diri->snaprealm->parent;
     assert(parent);
     assert(parent->open_children.count(diri->snaprealm));
-    snaplink_t link;
-    link.first = 0;
-    link.dirino = parent->inode->ino();
-    diri->snaprealm->parents.insert(pair<snapid_t,snaplink_t>(CEPH_NOSNAP, link));
-
-    // split existing caps
     parent->split_at(diri->snaprealm);
     split_parent = parent->inode->ino();
   }
index ffc8fcf71bca1c1de7037435ca4e6fb0e6ba8aa5..1e6b9be647cd024a1f5568aac857ed142ed5b1e1 100644 (file)
 bool SnapRealm::open_parents(MDRequest *mdr)
 {
   dout(10) << "open_parents" << dendl;
-  for (multimap<snapid_t, snaplink_t>::iterator p = parents.begin();
-       p != parents.end();
-       p++) {
+
+  // make sure my current parents' parents are open...
+  if (parent) {
+    dout(10) << " parent is " << *parent
+            << " on " << *parent->inode << dendl;
+    if (!parent->open_parents(mdr))
+      return false;
+  }
+
+  // and my past parents too!
+  for (map<snapid_t, snaplink_t>::iterator p = past_parents.begin();
+       p != past_parents.end();
+       p++) {    
     CInode *parent = mdcache->get_inode(p->second.dirino);
     if (parent)
       continue;
@@ -58,17 +68,22 @@ void SnapRealm::get_snap_set(set<snapid_t> &s, snapid_t first, snapid_t last)
     s.insert(p->first);
 
   // include snaps for parents during intervals that intersect [first,last]
-  for (multimap<snapid_t, snaplink_t>::iterator p = parents.lower_bound(first);
-       p != parents.end() && p->first >= first && p->second.first <= last;
+  snapid_t thru = first;
+  for (map<snapid_t, snaplink_t>::iterator p = past_parents.lower_bound(first);
+       p != past_parents.end() && p->first >= first && p->second.first <= last;
        p++) {
-    CInode *parent = mdcache->get_inode(p->second.dirino);
-    assert(parent);  // call open_parents first!
-    assert(parent->snaprealm);
-
-    parent->snaprealm->get_snap_set(s, 
-                                   MAX(first, p->second.first),
-                                   MIN(last, p->first));                                   
+    CInode *oldparent = mdcache->get_inode(p->second.dirino);
+    assert(oldparent);  // call open_parents first!
+    assert(oldparent->snaprealm);
+    
+    thru = MIN(last, p->first);
+    oldparent->snaprealm->get_snap_set(s, 
+                                      MAX(first, p->second.first),
+                                      thru);
+    thru++;
   }
+  if (thru <= last && parent)
+    parent->get_snap_set(s, thru, last);
 }
 
 /*
@@ -100,7 +115,7 @@ vector<snapid_t> *SnapRealm::update_snap_vector(snapid_t creating)
     return get_snap_vector();
   }
   snap_highwater = creating;
-  cached_snaps.push_back(creating);
+  cached_snaps.insert(cached_snaps.begin(), creating); // FIXME.. we should store this in reverse!
   return &cached_snaps;
 }
 
@@ -110,7 +125,7 @@ void SnapRealm::split_at(SnapRealm *child)
   dout(10) << "split_at " << *child 
           << " on " << *child->inode << dendl;
 
-  // split children
+  // split open_children
   dout(10) << " my children are " << open_children << dendl;
   for (set<SnapRealm*>::iterator p = open_children.begin();
        p != open_children.end(); ) {
@@ -118,7 +133,7 @@ void SnapRealm::split_at(SnapRealm *child)
     if (realm != child &&
        child->inode->is_ancestor_of(realm->inode)) {
       dout(20) << " child gets child realm " << *realm << " on " << *realm->inode << dendl;
-      realm->open_parent = child;
+      realm->parent = child;
       child->open_children.insert(realm);
       open_children.erase(p++);
     } else {
index 3f06c7ecf0d43ab351b76862cf90bb101e3651b1..3e3299e6b283fccb50805176d826c06fd9864564 100644 (file)
@@ -85,27 +85,27 @@ struct SnapRealm {
   // realm state
   snapid_t created;
   map<snapid_t, SnapInfo> snaps;
-  multimap<snapid_t, snaplink_t> parents;  // key is "last" (or NOSNAP)
+  map<snapid_t, snaplink_t> past_parents;  // key is "last" (or NOSNAP)
 
   void encode(bufferlist& bl) const {
     ::encode(created, bl);
     ::encode(snaps, bl);
-    ::encode(parents, bl);
+    ::encode(past_parents, bl);
   }
   void decode(bufferlist::iterator& p) {
     ::decode(created, p);
     ::decode(snaps, p);
-    ::decode(parents, p);
+    ::decode(past_parents, p);
   }
 
   // in-memory state
   MDCache *mdcache;
   CInode *inode;
 
-  // caches?
-  SnapRealm *open_parent;
+  SnapRealm *parent;
   set<SnapRealm*> open_children;    // active children that are currently open
 
+  // caches?
   vector<snapid_t> cached_snaps;
   snapid_t snap_highwater;
 
@@ -115,7 +115,7 @@ struct SnapRealm {
   SnapRealm(MDCache *c, CInode *in) : 
     created(0),
     mdcache(c), inode(in),
-    open_parent(0),
+    parent(0),
     snap_highwater(0) 
   { }
 
@@ -139,18 +139,14 @@ WRITE_CLASS_ENCODER(SnapRealm)
 
 inline ostream& operator<<(ostream& out, const SnapRealm &realm) {
   out << "snaprealm(" << realm.snaps;
-  if (realm.parents.size()) {
-    out << " parents=(";
-    for (multimap<snapid_t, snaplink_t>::const_iterator p = realm.parents.begin(); 
-        p != realm.parents.end(); 
+  if (realm.past_parents.size()) {
+    out << " past_parents=(";
+    for (map<snapid_t, snaplink_t>::const_iterator p = realm.past_parents.begin(); 
+        p != realm.past_parents.end(); 
         p++) {
-      if (p != realm.parents.begin()) out << ",";
-      out << p->second.first << "-";
-      if (p->first == CEPH_NOSNAP)
-       out << "head";
-      else
-       out << p->first;
-      out << "=" << p->second.dirino;
+      if (p != realm.past_parents.begin()) out << ",";
+      out << p->second.first << "-" << p->first
+         << "=" << p->second.dirino;
     }
     out << ")";
   }