]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mds: avoid sending traceless reply for request that created new inode
authorYan, Zheng <zyan@redhat.com>
Tue, 3 Feb 2015 13:58:25 +0000 (21:58 +0800)
committerYan, Zheng <zyan@redhat.com>
Wed, 4 Feb 2015 06:53:42 +0000 (14:53 +0800)
When kernel client receives traceless reply for request that created
new inode, it re-send a lookup request to MDS get information of the
newly created inode. (VFS expects FS' callback return an inode in this
case). This breaks one request into two requests. Other client may
modify or move to the new inode in the middle.

The fix is avoid sending traceless reply for request that created new
inode. Instead, we convert the origin request into 'lookup' request.

Signed-off-by: Yan, Zheng <zyan@redhat.com>
src/mds/Mutation.h
src/mds/Server.cc

index 9b6569536410951e9b5df97d39d924fe26cf8fa5..bf63744bf72f4a28400c92bb8fd8b2936a0a9e3f 100644 (file)
@@ -198,9 +198,10 @@ struct MDRequestImpl : public MutationImpl, public TrackedOp {
   interval_set<inodeno_t> prealloc_inos;
 
   int snap_caps;
+  int getattr_caps;       ///< caps requested by getattr
   bool did_early_reply;
   bool o_trunc;           ///< request is an O_TRUNC mutation
-  int getattr_caps;       ///< caps requested by getattr
+  bool has_completed;     ///< request has already completed
 
   bufferlist reply_extra_bl;
 
@@ -300,8 +301,9 @@ struct MDRequestImpl : public MutationImpl, public TrackedOp {
     TrackedOp(tracker, params.initiated),
     session(NULL), item_session_request(this),
     client_request(params.client_req), straydn(NULL), snapid(CEPH_NOSNAP),
-    tracei(NULL), tracedn(NULL), alloc_ino(0), used_prealloc_ino(0), snap_caps(0),
-    did_early_reply(false), o_trunc(false), getattr_caps(0),
+    tracei(NULL), tracedn(NULL), alloc_ino(0), used_prealloc_ino(0),
+    snap_caps(0), getattr_caps(0),
+    did_early_reply(false), o_trunc(false), has_completed(false),
     slave_request(NULL), internal_op(params.internal_op), internal_op_finish(NULL),
     internal_op_private(NULL),
     retry(0),
index 2dfe172770d4d336ab294624ef8fb705ca109480..ce56c7e716175ce221a85eb62a4ce06d054c2064 100644 (file)
@@ -1111,7 +1111,10 @@ void Server::reply_client_request(MDRequestRef& mdr, MClientReply *reply)
     reply->set_mdsmap_epoch(mds->mdsmap->get_epoch());
     client_con->send_message(reply);
   }
-  
+
+  if (mdr->has_completed && mds->is_clientreplay())
+    mds->queue_one_replay();
+
   // clean up request
   mdcache->request_finish(mdr);
 
@@ -1271,27 +1274,40 @@ void Server::handle_client_request(MClientRequest *req)
   }
 
   // completed request?
-  if (req->is_replay() ||
-      (req->get_retry_attempt() &&
-       req->get_op() != CEPH_MDS_OP_OPEN && 
-       req->get_op() != CEPH_MDS_OP_CREATE)) {
+  bool has_completed = false;
+  if (req->is_replay() || req->get_retry_attempt()) {
     assert(session);
     inodeno_t created;
     if (session->have_completed_request(req->get_reqid().tid, &created)) {
-      dout(5) << "already completed " << req->get_reqid() << dendl;
-      MClientReply *reply = new MClientReply(req, 0);
-      if (created != inodeno_t()) {
-       bufferlist extra;
-       ::encode(created, extra);
-       reply->set_extra_bl(extra);
-      }
-      req->get_connection()->send_message(reply);
+      has_completed = true;
+      // Don't send traceless reply if the completed request has created
+      // new inode. Treat the request as lookup request instead.
+      if (req->is_replay() ||
+         ((created == inodeno_t() || !mds->is_clientreplay()) &&
+          req->get_op() != CEPH_MDS_OP_OPEN &&
+          req->get_op() != CEPH_MDS_OP_CREATE)) {
+       dout(5) << "already completed " << req->get_reqid() << dendl;
+       MClientReply *reply = new MClientReply(req, 0);
+       if (created != inodeno_t()) {
+         bufferlist extra;
+         ::encode(created, extra);
+         reply->set_extra_bl(extra);
+       }
+       req->get_connection()->send_message(reply);
 
-      if (req->is_replay())
-       mds->queue_one_replay();
+       if (req->is_replay())
+         mds->queue_one_replay();
 
-      req->put();
-      return;
+       req->put();
+       return;
+      }
+      if (req->get_op() != CEPH_MDS_OP_OPEN &&
+         req->get_op() != CEPH_MDS_OP_CREATE) {
+       dout(10) << " completed request which created new inode " << created
+                << ", convert it to lookup request" << dendl;
+       req->head.op = req->get_dentry_wanted() ? CEPH_MDS_OP_LOOKUP : CEPH_MDS_OP_GETATTR;
+       req->head.args.getattr.mask = CEPH_STAT_CAP_INODE_ALL;
+      }
     }
   }
 
@@ -1309,6 +1325,8 @@ void Server::handle_client_request(MClientRequest *req)
       mdr->session = session;
       session->requests.push_back(&mdr->item_session_request);
     }
+    if (has_completed)
+      mdr->has_completed = true;
   }
 
   // process embedded cap releases?
@@ -1349,7 +1367,7 @@ void Server::dispatch_client_request(MDRequestRef& mdr)
   // we shouldn't be waiting on anyone.
   assert(mdr->more()->waiting_on_slave.empty());
 
-  if (req->get_op() & CEPH_MDS_OP_WRITE) {
+  if (req->may_write()) {
     if (mdcache->is_readonly()) {
       dout(10) << " read-only FS" << dendl;
       respond_to_request(mdr, -EROFS);
@@ -1436,8 +1454,7 @@ void Server::dispatch_client_request(MDRequestRef& mdr)
 
     // funky.
   case CEPH_MDS_OP_CREATE:
-    if (req->get_retry_attempt() &&
-       mdr->session->have_completed_request(req->get_reqid().tid, NULL))
+    if (mdr->has_completed)
       handle_client_open(mdr);  // already created.. just open
     else
       handle_client_openc(mdr);
@@ -2745,9 +2762,7 @@ void Server::handle_client_open(MDRequestRef& mdr)
   }
 
   // O_TRUNC
-  if ((flags & O_TRUNC) &&
-      !(req->get_retry_attempt() &&
-       mdr->session->have_completed_request(req->get_reqid().tid, NULL))) {
+  if ((flags & O_TRUNC) && !mdr->has_completed) {
     assert(cur->is_auth());
 
     xlocks.insert(&cur->filelock);