]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mds: fix mds gets stuck in clientreplay state
authorYan, Zheng <zyan@redhat.com>
Fri, 24 Feb 2017 09:24:47 +0000 (17:24 +0800)
committerYan, Zheng <zyan@redhat.com>
Thu, 30 Mar 2017 01:46:05 +0000 (09:46 +0800)
When client request in clientreplay queue finishes, we should call
MDSRank::queue_one_replay(). Otherwise mds gets stuck in clientreplay
state. There are several cases that client request in clientreplay
queue finishes, but MDSRank::queue_one_replay() does not get called

To make the code clear, add a flag to MClientRequest to indicate if
it's in clientreplay queue.

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
src/mds/Server.cc
src/messages/MClientRequest.h

index 470b36f949ca1f4a9c425cae91b14b485ccfdb2a..02acd6ebee143e16b5f5efc430714a04f38d4aa6 100644 (file)
@@ -173,6 +173,7 @@ void Server::dispatch(Message *m)
        req->releases.clear();
       }
       if (queue_replay) {
+       req->mark_queued_for_replay();
        mds->enqueue_replay(new C_MDS_RetryMessage(mds, m));
        return;
       }
@@ -190,12 +191,8 @@ void Server::dispatch(Message *m)
        wait_for_active = false;
       } else if (m->get_type() == CEPH_MSG_CLIENT_REQUEST) {
        MClientRequest *req = static_cast<MClientRequest*>(m);
-       if (req->is_replay()) {
+       if (req->is_queued_for_replay()) {
          wait_for_active = false;
-       } else {
-         Session *session = get_session(req);
-         if (session && session->have_completed_request(req->get_reqid().tid, NULL))
-           wait_for_active = false;
        }
       }
     }
@@ -1003,7 +1000,7 @@ void Server::journal_and_reply(MDRequestRef& mdr, CInode *in, CDentry *dn, LogEv
   mdr->committing = true;
   submit_mdlog_entry(le, fin, mdr, __func__);
   
-  if (mdr->client_request && mdr->client_request->is_replay()) {
+  if (mdr->client_request && mdr->client_request->is_queued_for_replay()) {
     if (mds->queue_one_replay()) {
       dout(10) << " queued next replay op" << dendl;
     } else {
@@ -1195,7 +1192,16 @@ void Server::reply_client_request(MDRequestRef& mdr, MClientReply *reply)
     req->get_connection()->send_message(reply);
   }
 
-  const bool completed = mdr->has_completed;
+  if (req->is_queued_for_replay() &&
+      (mdr->has_completed || reply->get_result() < 0)) {
+    if (reply->get_result() < 0) {
+      int r = reply->get_result();
+      derr << "reply_client_request: failed to replay " << *req
+          << " error " << r << " (" << cpp_strerror(r)  << ")" << dendl;
+      mds->clog->warn() << "failed to replay " << req->get_reqid() << " error " << r;
+    }
+    mds->queue_one_replay();
+  }
 
   // clean up request
   mdcache->request_finish(mdr);
@@ -1206,11 +1212,6 @@ void Server::reply_client_request(MDRequestRef& mdr, MClientReply *reply)
       tracedn->get_projected_linkage()->is_remote()) {
     mdcache->eval_remote(tracedn);
   }
-
-  // Advance clientreplay process if we're in it
-  if (completed && mds->is_clientreplay()) {
-    mds->queue_one_replay();
-  }
 }
 
 
@@ -1345,13 +1346,15 @@ void Server::handle_client_request(MClientRequest *req)
     session = get_session(req);
     if (!session) {
       dout(5) << "no session for " << req->get_source() << ", dropping" << dendl;
-      req->put();
-      return;
-    }
-    if (session->is_closed() ||
-       session->is_closing() ||
-       session->is_killing()) {
+    } else if (session->is_closed() ||
+              session->is_closing() ||
+              session->is_killing()) {
       dout(5) << "session closed|closing|killing, dropping" << dendl;
+      session = NULL;
+    }
+    if (!session) {
+      if (req->is_queued_for_replay())
+       mds->queue_one_replay();
       req->put();
       return;
     }
@@ -1385,7 +1388,7 @@ void Server::handle_client_request(MClientRequest *req)
        }
        req->get_connection()->send_message(reply);
 
-       if (mds->is_clientreplay())
+       if (req->is_queued_for_replay())
          mds->queue_one_replay();
 
        req->put();
index c744eb7faabb151b6754cdb9fe91135776784de6..d78f4171b26b1485fae298ebca89600760d188cf 100644 (file)
@@ -78,7 +78,7 @@ public:
   filepath path, path2;
   vector<uint64_t> gid_list;
 
-
+  bool queued_for_replay = false;
 
  public:
   // cons
@@ -167,6 +167,9 @@ public:
 
   int get_dentry_wanted() { return get_flags() & CEPH_MDS_FLAG_WANT_DENTRY; }
 
+  void mark_queued_for_replay() { queued_for_replay = true; }
+  bool is_queued_for_replay() { return queued_for_replay; }
+
   void decode_payload() override {
     bufferlist::iterator p = payload.begin();
 
@@ -260,6 +263,8 @@ public:
       out << " RETRY=" << (int)head.num_retry;
     if (get_flags() & CEPH_MDS_FLAG_REPLAY)
       out << " REPLAY";
+    if (queued_for_replay)
+      out << " QUEUED_FOR_REPLAY";
     out << " caller_uid=" << head.caller_uid
        << ", caller_gid=" << head.caller_gid
        << '{';