]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
*** empty log message ***
authorsage <sage@29311d96-e01e-0410-9327-a35deaab8ce9>
Thu, 5 May 2005 20:48:44 +0000 (20:48 +0000)
committersage <sage@29311d96-e01e-0410-9327-a35deaab8ce9>
Thu, 5 May 2005 20:48:44 +0000 (20:48 +0000)
git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@208 29311d96-e01e-0410-9327-a35deaab8ce9

ceph/config.cc
ceph/config.h
ceph/mds/MDCache.cc
ceph/mds/MDS.cc
ceph/mds/MDS.h
ceph/messages/MClientReply.h

index af39051857fd7a8795bdcc800a99d8d4f2c75da6..31c93108b8c955a7a056892d69b7a55657af7f5f 100644 (file)
@@ -42,6 +42,7 @@ md_config_t g_conf = {
 
   mds_heartbeat_op_interval: 200,
   mds_verify_export_dirauth: true,
+  mds_log_before_reply: true,
 
   // fakeclient
   num_fakeclient: 10,
index d6b0f0f60eff62db6b59ed5b828350f9a11ef62e..c1e3f001b0d4c6f1fbbba99dae34ec7f724220c1 100644 (file)
@@ -32,6 +32,7 @@ struct md_config_t {
 
   int   mds_heartbeat_op_interval;
   bool  mds_verify_export_dirauth;     // debug flag
+  bool  mds_log_before_reply;
 
   // fake client
   int num_fakeclient;
index 3ecf4fb54ccdacd56e68aa1ad32f578cb9357a79..31549fc8864d675f5d2324356b476adc6fe57d3c 100644 (file)
@@ -918,7 +918,7 @@ int MDCache::path_traverse(filepath& origpath,
        // dentry
        CDentry *dn = cur->dir->lookup(path[depth]);
 
-       // xlocked and null?
+       // xlocked and null?  ** all wrong, FIXME
        if (onfail == MDS_TRAVERSE_DISCOVERXLOCK &&
                dn && !dn->inode && dn->is_xlockedbyme(req) &&
                depth == path.depth()-1) {
index 9c7bd8a5e00d8b32c6220fb2c2a5417d3bfccbc5..8b6166d2ea450fc312397d13f1280303aa79a904 100644 (file)
@@ -371,18 +371,112 @@ void MDS::handle_ping(MPing *m)
 }
 
 
-void MDS::reply_request(MClientRequest *req, int r)
-{      
-  // send error
-  messenger->send_message(new MClientReply(req, r),
+/*******
+ * some generic stuff for finishing off requests
+ */
+
+/** C_MDS_CommitRequest
+ */
+
+class C_MDS_CommitRequest : public Context {
+  MDS *mds;
+  MClientRequest *req;
+  MClientReply *reply;
+  CInode *tracei;    // inode to include a trace for
+  LogEvent *event;
+public:
+  C_MDS_CommitRequest(MDS *mds, 
+                                        MClientRequest *req, MClientReply *reply, CInode *tracei,
+                                        LogEvent *event = 0) {
+       this->mds = mds;
+       this->req = req;
+       this->tracei = tracei;
+       this->reply = reply;
+       this->event = event;
+  }
+  void finish(int r) {
+       if (r == 0) {
+         // success.  log and reply.
+         mds->commit_request(req, reply, tracei, event);
+       } else {
+         // failure.  set failure code and reply.
+         reply->set_result(r);
+         mds->reply_request(req, reply, tracei);
+       }
+  }
+};
+
+/*
+ * send generic response (just and error code)
+ */
+void MDS::reply_request(MClientRequest *req, int r, CInode *tracei)
+{
+  reply_request(req, new MClientReply(req, r), tracei);
+}
+
+/*
+ * send given reply
+ * include a trace to tracei
+ */
+void MDS::reply_request(MClientRequest *req, MClientReply *reply, CInode *tracei) {
+  // include trace
+  if (tracei)
+       reply->set_trace_dist( tracei, whoami );
+  
+  // send reply
+  messenger->send_message(reply,
                                                  MSG_ADDR_CLIENT(req->get_client()), 0,
                                                  MDS_PORT_SERVER);
-  
+
   // discard request
   mdcache->request_finish(req);
 }
 
+/* 
+ * commit event(s) to the metadata log, then reply.
+ * or, be sloppy and do it concurrently (see g_conf)
+ */
+void MDS::commit_request(MClientRequest *req,
+                                                MClientReply *reply,
+                                                CInode *tracei,
+                                                LogEvent *event,
+                                                LogEvent *event2) 
+{
+  if (g_conf.mds_log_before_reply) {
+       // SAFE mode!
+       
+       if (event) {
+         // log, then reply
+         // pass event2 as event1 (so we chain together!)
+         /*
+               WARNING: by chaining back to CommitRequest we may get
+               something not quite right if the log commit fails.  what 
+               happens (to the whole system!) then?   ** FIXME **
+         */
+         mdlog->submit_entry(event, 
+                                                 new C_MDS_CommitRequest(this, req, reply, tracei, event2));
+       }
+       else {
+         // just reply, no log entry (anymore).
+         reply_request(req, reply, tracei);
+       }
+  } else {
+       // SLOPPY mode!
+
+       // log
+       if (event) mdlog->submit_entry(event, NULL);
+       if (event2) mdlog->submit_entry(event2, NULL);
+
+       // reply
+       reply_request(req, reply, tracei);
+  }
+}
+
+
 
+/***
+ * process a client request
+ */
 
 void MDS::handle_client_request(MClientRequest *req)
 {
@@ -605,9 +699,8 @@ void MDS::handle_client_stat(MClientRequest *req,
 
   dout(10) << "reply to " << *req << " stat " << ref->inode.mtime << " pop " << ref->get_popularity() << endl;
   MClientReply *reply = new MClientReply(req);
-  reply->set_trace_dist( ref, whoami );
 
-  // FIXME: put inode info in reply...
+  // inode info is in the trace
 
   mdcache->inode_soft_read_finish(ref);
 
@@ -616,59 +709,13 @@ void MDS::handle_client_stat(MClientRequest *req,
   stat_req.hit();
   stat_ops++;
 
-  messenger->send_message(reply,
-                                                 MSG_ADDR_CLIENT(req->get_client()), 0,
-                                                 MDS_PORT_SERVER);
-
-  mdcache->request_finish(req);
-}
-
-
-
-// INODE UPDATES
-
-// SOFT
-
-class C_MDS_InodeSoftUpdateFinish : public Context {
-public:
-  CInode *in;
-  MClientRequest *req;
-  MDS *mds;
-  MClientReply *reply;
-  C_MDS_InodeSoftUpdateFinish(MDS *mds, MClientRequest *req, CInode *cur, MClientReply *reply) {
-       this->mds = mds;
-       this->in = cur;
-       this->req = req;
-       this->reply = reply;
-  }
-  virtual void finish(int result) {
-       mds->handle_client_inode_soft_update_2(req, reply, in);
-  }
-};
-
-void MDS::handle_client_inode_soft_update_2(MClientRequest *req,
-                                                                                       MClientReply *reply,
-                                                                                       CInode *cur)
-{
   // reply
-  dout(10) << "reply to " << *req << " inode soft update " << *cur << endl;
-  
-  messenger->send_message(reply,
-                                                 MSG_ADDR_CLIENT(req->get_client()), 0,
-                                                 MDS_PORT_SERVER);
-  
-  logger->inc("otouch");
-  stat_write.hit();
-  stat_req.hit();
-  stat_ops++;
-  
-  mdcache->inode_soft_write_finish(cur);
-
-  mdcache->request_finish(req);
+  reply_request(req, reply, ref);
 }
 
 
 
+// INODE UPDATES
 
 // utime
 
@@ -686,63 +733,22 @@ void MDS::handle_client_utime(MClientRequest *req,
   cur->inode.atime = mtime;
   if (cur->is_auth())
        cur->mark_dirty();
+
+  mdcache->inode_soft_write_finish(cur);
   
   // init reply
-  MClientReply *reply = new MClientReply(req);
-  reply->set_trace_dist( cur, whoami );
+  MClientReply *reply = new MClientReply(req, 0);
   reply->set_result(0);
 
-  // wait for log to finish
-  dout(10) << "log for " << *req << " utime " << cur->inode.mtime << endl;
-  mdlog->submit_entry(new EInodeUpdate(cur),
-                                         new C_MDS_InodeSoftUpdateFinish(this, req, cur, reply));
-  return;
+  // commit
+  commit_request(req, reply, cur,
+                                new EInodeUpdate(cur));
 }
 
                                                   
 
 // HARD
 
-class C_MDS_InodeHardUpdateFinish : public Context {
-public:
-  CInode *in;
-  MClientRequest *req;
-  MDS *mds;
-  MClientReply *reply;
-  C_MDS_InodeHardUpdateFinish(MDS *mds, MClientRequest *req, CInode *cur, MClientReply *reply) {
-       this->mds = mds;
-       this->in = cur;
-       this->req = req;
-       this->reply = reply;
-  }
-  virtual void finish(int result) {
-       mds->handle_client_inode_hard_update_2(req, reply, in);
-  }
-};
-
-void MDS::handle_client_inode_hard_update_2(MClientRequest *req,
-                                                                                       MClientReply *reply,
-                                                                                       CInode *cur)
-{
-  // reply
-  dout(10) << "reply to " << *req << " inode hard update " << *cur << endl;
-  
-  messenger->send_message(reply,
-                                                 MSG_ADDR_CLIENT(req->get_client()), 0,
-                                                 MDS_PORT_SERVER);
-  
-  logger->inc("otouch");
-  stat_write.hit();
-  stat_req.hit();
-  stat_ops++;
-  
-  // done
-  mdcache->inode_hard_write_finish(cur);
-
-  mdcache->request_finish(req);
-}
-
-
 // chmod
 
 void MDS::handle_client_chmod(MClientRequest *req,
@@ -761,16 +767,14 @@ void MDS::handle_client_chmod(MClientRequest *req,
   cur->inode.mode |= (mode & 04777);
   cur->mark_dirty();
 
+  mdcache->inode_hard_write_finish(cur);
+
   // start reply
-  MClientReply *reply = new MClientReply(req);
-  reply->set_trace_dist( cur, whoami );
-  reply->set_result(0);
+  MClientReply *reply = new MClientReply(req, 0);
 
-  // wait for log to finish
-  dout(10) << "log for " << *req << " chmod" << endl;
-  mdlog->submit_entry(new EInodeUpdate(cur),
-                                         new C_MDS_InodeHardUpdateFinish(this, req, cur, reply));
-  return;
+  // commit
+  commit_request(req, reply, cur,
+                                new EInodeUpdate(cur));
 }
 
 // chown
@@ -791,16 +795,14 @@ void MDS::handle_client_chown(MClientRequest *req,
   cur->inode.gid = gid;
   cur->mark_dirty();
 
+  mdcache->inode_hard_write_finish(cur);
+
   // start reply
-  MClientReply *reply = new MClientReply(req);
-  reply->set_trace_dist( cur, whoami );
-  reply->set_result(0);
+  MClientReply *reply = new MClientReply(req, 0);
 
-  // wait for log to finish
-  dout(10) << "log for " << *req << " chown" << endl;
-  mdlog->submit_entry(new EInodeUpdate(cur),
-                                         new C_MDS_InodeHardUpdateFinish(this, req, cur, reply));
-  return;
+  // commit
+  commit_request(req, reply, cur,
+                                new EInodeUpdate(cur));
 }
 
 
@@ -853,49 +855,49 @@ void MDS::handle_client_readdir(MClientRequest *req,
        return;
   mdcache->inode_hard_read_finish(cur);
 
+
+  if (!cur->dir->is_complete()) {
+       // fetch
+       dout(10) << " incomplete dir contents for readdir on " << *cur->dir << ", fetching" << endl;
+       mdstore->fetch_dir(cur->dir, new C_MDS_RetryRequest(this, req, cur));
+       return;
+  }
   
-  if (cur->dir->is_complete()) {
-       // yay, reply
-       MClientReply *reply = new MClientReply(req);
-       
-       // FIXME: need to sync all inodes in this dir.  blech!
+  // yay, reply
+  MClientReply *reply = new MClientReply(req);
+  
+  // build dir contents
+  CDir_map_t::iterator it;
+  int numfiles = 0;
+  for (it = cur->dir->begin(); it != cur->dir->end(); it++) {
+       CDentry *dn = it->second;
        
-       // build dir contents
-       CDir_map_t::iterator it;
-       int numfiles = 0;
-       for (it = cur->dir->begin(); it != cur->dir->end(); it++) {
-         //string name = it->first;
-         CDentry *dn = it->second;
-         CInode *in = dn->inode;
-         if (!in) continue;  // null
-         c_inode_info *i = new c_inode_info;
-         i->inode = in->inode;
-         in->get_dist_spec(i->dist, whoami);
-         i->ref_dn = it->first;
-         reply->add_dir_item(i);
-         numfiles++;
+       // is dentry readable?
+       if (dn->is_xlocked()) {
+         // ***** FIXME *****
+         dout(10) << "warning, returning xlocked dentry, we are technically WRONG" << endl;
        }
        
-       dout(10) << "reply to " << *req << " readdir " << numfiles << " files" << endl;
-       reply->set_trace_dist( cur, whoami );
-       reply->set_result(0);
-       
-       logger->inc("ordir");
-       stat_read.hit();
-       stat_req.hit();
-       stat_ops++;
-       
-       //mdcache->path_unpin(trace, req);
+       CInode *in = dn->inode;
+       if (!in) continue;  // null dentry?
        
-       messenger->send_message(reply,
-                                                       MSG_ADDR_CLIENT(req->get_client()), 0,
-                                                       MDS_PORT_SERVER);
-       mdcache->request_finish(req);
-  } else {
-       // fetch
-       dout(10) << " incomplete dir contents for readdir on " << *cur->dir << ", fetching" << endl;
-       mdstore->fetch_dir(cur->dir, new C_MDS_RetryRequest(this, req, cur));
+       // add this item
+       // note: c_inode_info makes note of whether inode data is readable.
+       c_inode_info *i = new c_inode_info(in, whoami, it->first);
+       reply->add_dir_item(i);
+       numfiles++;
   }
+  
+  dout(10) << "reply to " << *req << " readdir " << numfiles << " files" << endl;
+  reply->set_result(0);
+  
+  logger->inc("ordir");
+  stat_read.hit();
+  stat_req.hit();
+  stat_ops++;
+
+  // reply
+  reply_request(req, reply, cur);
 }
 
 
@@ -907,14 +909,9 @@ void MDS::handle_client_mknod(MClientRequest *req, CInode *ref)
   CInode *newi = mknod(req, ref);
   if (!newi) return;
   
-  // log it
-  dout(10) << "log for " << *req << " mknod " << newi->ino() << endl;
-  mdlog->submit_entry(new EInodeUpdate(newi),                    // FIXME should be differnet log entry
-                                         NULL);
-  
-  // reply
-  reply_request(req, 0);
-  return;
+  // commit
+  commit_request(req, new MClientReply(req, 0), ref,
+                                new EInodeUpdate(newi));  // FIXME this is the wrong message
 }
 
 // mknod(): used by handle_client_mkdir, handle_client_mknod, which are mostly identical.
@@ -1017,27 +1014,6 @@ CInode *MDS::mknod(MClientRequest *req, CInode *diri, bool okexist)
 
 // UNLINK
 
-class C_MDS_Unlink : public Context {
-public:
-  MDS *mds;
-  CDentry *dn;
-  MClientRequest *req;
-  C_MDS_Unlink(MDS *mds, CDentry *dn, MClientRequest *req) {
-       this->mds = mds;
-       this->dn = dn;
-       this->req = req;
-  }
-  virtual void finish(int r) {
-       // reply
-       MClientReply *reply = new MClientReply(req);
-       mds->messenger->send_message(reply,
-                                                                MSG_ADDR_CLIENT(req->get_client()), 0, MDS_PORT_SERVER);
-       
-       // done.
-       mds->mdcache->request_finish(req);
-  }
-};
-
 void MDS::handle_client_unlink(MClientRequest *req, 
                                                           CInode *diri)
 {
@@ -1191,15 +1167,20 @@ void MDS::handle_client_unlink(MClientRequest *req,
        return;
        
   // it's locked, unlink!
+  MClientReply *reply = new MClientReply(req,0);
   mdcache->dentry_unlink(dn,
-                                                new C_MDS_Unlink(this,dn,req));
+                                                new C_MDS_CommitRequest(this, req, reply, diri,
+                                                                                                new EInodeUpdate(diri))); // FIXME WRONG EVENT
   return;
 }
 
 
-// RENAME
 
 
+
+
+// RENAME
+
 class C_MDS_RenameTraverseDst : public Context {
   MDS *mds;
   MClientRequest *req;
@@ -1662,9 +1643,12 @@ void MDS::handle_client_rename_local(MClientRequest *req,
        dosrc = !dosrc;
   }
 
-  // we're golden (everything is xlocked by us, we rule, etc.)
+  // we're golden.
+  // everything is xlocked by us, we rule, etc.
+  MClientReply *reply = new MClientReply(req, 0);
   mdcache->file_rename( srcdn, destdn,
-                                               new C_MDS_RenameFinish(this, req, srcdn->inode),
+                                               new C_MDS_CommitRequest(this, req, reply, srcdn->inode,
+                                                                                               new EInodeUpdate(srcdn->inode)),  // FIXME WRONG EVENT
                                                everybody );
 }
 
@@ -1682,6 +1666,9 @@ void MDS::handle_client_mkdir(MClientRequest *req, CInode *diri)
   CInode *newi = mknod(req, diri);
   if (!newi) return;
 
+  // set the dir mode
+  newi->inode.mode = req->get_iarg();
+  
   // make my new inode a dir.
   newi->inode.mode |= INODE_MODE_DIR;
   
@@ -1690,18 +1677,14 @@ void MDS::handle_client_mkdir(MClientRequest *req, CInode *diri)
   newdir->mark_complete();
   newdir->mark_dirty();
   
-  // log it
-  dout(10) << "log for " << *req << " mkdir " << newi->ino() << endl;
-  mdlog->submit_entry(new EInodeUpdate(newi),                    // FIXME should be differnet log entry
-                                         NULL);
+  // commit
+  commit_request(req, new MClientReply(req, 0), diri,
+                                new EInodeUpdate(newi));                  // FIXME should be differnet log entry
   
   // schedule a commit for good measure 
   // NOTE: not strictly necessary.. it's in the log!
-  // but, if test crashes we'll be less likely to corrupt osddata/* (in leiu of a real recovery mechanism)
+  // but, if fakemds crashes we'll be less likely to corrupt osddata/* (in leiu of a real recovery mechanism)
   mdstore->commit_dir(newdir, NULL);
-
-  // reply
-  reply_request(req, 0);
   return;
 }
 
@@ -1723,14 +1706,9 @@ void MDS::handle_client_symlink(MClientRequest *req, CInode *diri)
   // set target
   newi->symlink = req->get_sarg();
   
-  // log it
-  dout(10) << "log for " << *req << " symlink " << newi->ino() << endl;
-  mdlog->submit_entry(new EInodeUpdate(newi),                    // FIXME should be differnet log entry
-                                         NULL);
-  
-  // reply
-  reply_request(req, 0);
-  return;
+  // commit
+  commit_request(req, new MClientReply(req, 0), diri,
+                                new EInodeUpdate(newi));                   // FIXME should be differnet log entry
 }
 
 
@@ -1795,14 +1773,7 @@ void MDS::handle_client_open(MClientRequest *req,
 
   // reply
   MClientReply *reply = new MClientReply(req, f->fh);   // fh # is return code
-  reply->set_trace_dist( cur, whoami );
-
-  messenger->send_message(reply,
-                                                 MSG_ADDR_CLIENT(req->get_client()), 0,
-                                                 MDS_PORT_SERVER);
-  
-  // discard request
-  mdcache->request_finish(req);
+  reply_request(req, reply, cur);
 }
 
 
@@ -1843,14 +1814,9 @@ void MDS::handle_client_close(MClientRequest *req, CInode *cur)
   // update size, mtime
   // XXX
 
-  /*
+  /* FIXME ****
   // mark dirty
-  cur->mark_dirty();
-
-  // log it
-  dout(10) << "log for " << *req << " touch " << cur->inode.mtime << endl;
-  mdlog->submit_entry(new EInodeUpdate(cur),
-                                         new C_MDS_TouchFinish(this, req, cur, reply));
+  cur->mark_dirty();        
   */
 
   // close it.
@@ -1860,7 +1826,7 @@ void MDS::handle_client_close(MClientRequest *req, CInode *cur)
   idalloc->reclaim_id(ID_FH, f->fh);
 
   // ok we're done
-  if(f->mode != CFILE_MODE_R) {
+  if (f->mode != CFILE_MODE_R) {
        if (!cur->is_auth() &&
                !cur->is_open_write()) {
          // we were a replica writer!
@@ -1881,16 +1847,9 @@ void MDS::handle_client_close(MClientRequest *req, CInode *cur)
 
   // XXX what about atime?
 
-
-  // reply
-  MClientReply *reply = new MClientReply(req);
-  reply->set_trace_dist( cur, whoami );
-  //reply->set_iarg( req->get_iarg() );
-  messenger->send_message(reply,
-                                                 req->get_source(), 0, MDS_PORT_SERVER);
-
-  // done
-  mdcache->request_finish(req);
+  // commit
+  commit_request(req, new MClientReply(req, 0), cur,
+                                new EInodeUpdate(cur));               // FIXME wrong message?
 }
 
 
index 5333cf4ce6ade569cac9b61f7a456c28e614b557..24f2dcc03b1c66ac4418cea417b0cf5d36ba9291 100644 (file)
@@ -53,6 +53,7 @@ class Message;
 class MClientRequest;
 class MClientReply;
 class MDBalancer;
+class LogEvent;
 
 // types
 
@@ -142,9 +143,16 @@ class MDS : public Dispatcher {
   bool open_root(Context *c);
   bool open_root_2(int result, Context *c);
 
-  void reply_request(MClientRequest *req, int r = 0);
-
-
+  // generic request helpers
+  void reply_request(MClientRequest *req, int r = 0, CInode *tracei = 0);
+  void reply_request(MClientRequest *req, MClientReply *reply, CInode *tracei);
+  void commit_request(MClientRequest *req,
+                                         MClientReply *reply,
+                                         CInode *tracei,
+                                         LogEvent *event,
+                                         LogEvent *event2 = 0);
+  
+  
   void handle_ping(class MPing *m);
   void handle_client_done(Message *m);
   void handle_shutdown_start(Message *m);
index 863a8abc98c919891cac7ba1395b1844308ba88a..840d42402dc67d12acb83e2022fea9a824cb0120 100644 (file)
@@ -30,13 +30,82 @@ class CInode;
  *
  */
 
-typedef struct {
+class c_inode_info {
+ public:
   inode_t inode;
-  set<int> dist;
   string ref_dn;    // referring dentry (blank if root)
   string symlink;   // symlink content (if symlink)
-  bool is_sync;     
-} c_inode_info;
+
+  bool inode_soft_valid;  // true if inode info is valid (ie was readable on mds at the time)
+  bool inode_hard_valid;  // true if inode info is valid (ie was readable on mds at the time)
+
+  set<int> dist;    // where am i replicated?
+
+ public:
+  c_inode_info() {}
+  c_inode_info(CInode *in, int whoami, string ref_dn) {
+       // inode
+       this->inode = in->inode;
+       this->inode_soft_valid = in->softlock.can_read(in->is_auth());
+       this->inode_hard_valid = in->hardlock.can_read(in->is_auth());
+       
+       // symlink content?
+       if (in->is_symlink()) this->symlink = in->symlink;
+         
+       // referring dentry?
+       this->ref_dn = ref_dn;
+       
+       // replicated where?
+       in->get_dist_spec(this->dist, whoami);
+  }
+  
+  void _rope(crope &s) {
+       s.append((char*)&inode, sizeof(inode));
+       s.append((char*)&inode_soft_valid, sizeof(inode_soft_valid));
+       s.append((char*)&inode_hard_valid, sizeof(inode_hard_valid));
+
+       s.append(ref_dn.c_str());
+       s.append((char)0);
+       s.append(symlink.c_str());
+       s.append((char)0);
+
+       // distn
+       int n = dist.size();
+       s.append((char*)&n, sizeof(int));
+       for (set<int>::iterator it = dist.begin();
+                it != dist.end();
+                it++) {
+         int j = *it;
+         s.append((char*)&j,sizeof(int));
+       }
+  }
+  
+  void _unrope(crope &s, int& off) {
+       s.copy(off, sizeof(inode), (char*)&inode);
+       off += sizeof(inode);
+       s.copy(off, sizeof(inode_soft_valid), (char*)&inode_soft_valid);
+       off += sizeof(inode_soft_valid);
+       s.copy(off, sizeof(inode_hard_valid), (char*)&inode_hard_valid);
+       off += sizeof(inode_hard_valid);
+
+       ref_dn = s.c_str() + off;
+       off += ref_dn.length() + 1;
+
+       symlink = s.c_str() + off;
+       off += symlink.length() + 1;
+
+       int l;
+       s.copy(off, sizeof(int), (char*)&l);
+       off += sizeof(int);
+       for (int i=0; i<l; i++) {
+         int j;
+         s.copy(off, sizeof(int), (char*)&j);
+         off += sizeof(int);
+         dist.insert(j);
+       }
+  }
+} ;
+
 
 typedef struct {
   long pcid;
@@ -92,69 +161,26 @@ class MClientReply : public Message {
   }
   virtual char *get_type_name() { return "creply"; }
 
-  
-  crope rope_info(c_inode_info *ci) {
-       crope s;
-       s.append((char*)&ci->inode, sizeof(inode_t));
-       s.append((char*)&ci->is_sync, sizeof(bool));
-
-       int n = ci->dist.size();
-       s.append((char*)&n, sizeof(int));
-       for (set<int>::iterator it = ci->dist.begin();
-                it != ci->dist.end();
-                it++) {
-         int j = *it;
-         s.append((char*)&j,sizeof(int));
-       }
-
-       s.append(ci->ref_dn.c_str());
-       s.append((char)0);
-       s.append(ci->symlink.c_str());
-       s.append((char)0);
-       return s;
-  }
-  int unrope_info(c_inode_info *ci, crope s) {
-       s.copy(0, sizeof(inode_t), (char*)(&ci->inode));
-       int off = sizeof(inode_t);
-       s.copy(off, sizeof(bool), (char*)(&ci->is_sync));
-       off += sizeof(bool);
-
-       int l;
-       s.copy(off, sizeof(int), (char*)&l);
-       off += sizeof(int);
-       for (int i=0; i<l; i++) {
-         int j;
-         s.copy(off, sizeof(int), (char*)&j);
-         off += sizeof(int);
-         ci->dist.insert(j);
-       }
-
-       ci->ref_dn = s.c_str() + off;
-       off += ci->ref_dn.length() + 1;
-
-       ci->symlink = s.c_str() + off;
-       off += ci->symlink.length() + 1;
-
-       return off;
-  }
 
   // serialization
   virtual void decode_payload(crope& s) {
-       crope::iterator sp = s.mutable_begin();
-       s.copy(0, sizeof(st), (char*)&st);
-       path = s.c_str() + sizeof(st);
-       sp += sizeof(st) + path.length() + 1;
+       int off = 0;
+       s.copy(off, sizeof(st), (char*)&st);
+       off += sizeof(st);
+
+       path = s.c_str();
+       off += path.length() + 1;
        
        for (int i=0; i<st.trace_depth; i++) {
          c_inode_info *ci = new c_inode_info;
-         sp += unrope_info(ci, s.substr(sp, s.end()));
+         ci->_unrope(s, off);
          trace.push_back(ci);
        }
 
        if (st.dir_size) {
          for (int i=0; i<st.dir_size; i++) {
                c_inode_info *ci = new c_inode_info;
-               sp += unrope_info(ci, s.substr(sp, s.end()));
+               ci->_unrope(s, off);
                dir_contents.push_back(ci);
          }
        }
@@ -164,15 +190,15 @@ class MClientReply : public Message {
        st.trace_depth = trace.size();
        
        r.append((char*)&st, sizeof(st));
-       if (path.length()) r.append(path.c_str());
+       r.append(path.c_str());
        r.append((char)0);
        
        vector<c_inode_info*>::iterator it;
        for (it = trace.begin(); it != trace.end(); it++) 
-         r.append(rope_info(*it));
+         (*it)->_rope(r);
 
        for (it = dir_contents.begin(); it != dir_contents.end(); it++) 
-         r.append(rope_info(*it));
+         (*it)->_rope(r);
   }
 
   // builders
@@ -182,22 +208,13 @@ class MClientReply : public Message {
 
   void set_trace_dist(CInode *in, int whoami) {
        while (in) {
-         c_inode_info *info = new c_inode_info;
-         info->inode = in->inode;
-
-         // symlink content?
-         if (in->is_symlink()) info->symlink = in->symlink;
-         
-         // referring dentry?
+         // add this inode to trace, along with referring dentry name
+         string ref_dn;
          CDentry *dn = in->get_parent_dn();
-         if (dn) info->ref_dn = dn->get_name();
-         
-         //info->is_sync = in->is_sync() || in->is_presync();
+         if (dn) ref_dn = dn->get_name();
 
-         // replicated where?
-         in->get_dist_spec(info->dist, whoami);
+         c_inode_info *info = new c_inode_info(in, whoami, ref_dn);
 
-         // next!
          trace.insert(trace.begin(), info);
          in = in->get_parent_inode();
        }