]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mds: log master commit after all slave commits get journaled
authorYan, Zheng <zyan@redhat.com>
Mon, 6 Feb 2017 08:47:43 +0000 (16:47 +0800)
committerYan, Zheng <zyan@redhat.com>
Mon, 20 Feb 2017 08:12:36 +0000 (16:12 +0800)
When survivor mds sends resolve message to recovering mds, aslo
records committing slave request in the message. So the recovering
mds knows the slave commit is still being journaled. It journals
master commit after receiving corresponding OP_COMMITTED message.

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
src/mds/MDCache.cc
src/mds/MDCache.h
src/mds/Server.cc
src/messages/MMDSResolve.h

index f635135548e375e0e9257c982fe8c5670b9bb0de..3884415a7d39077e6e8e7581329a145ad5db5ace 100644 (file)
@@ -2720,7 +2720,7 @@ void MDCache::send_slave_resolves()
           q != p->second.end();
           ++q) {
        dout(10) << " including uncommitted " << q->first << dendl;
-       resolves[p->first]->add_slave_request(q->first);
+       resolves[p->first]->add_slave_request(q->first, false);
       }
     }
   } else {
@@ -2730,14 +2730,18 @@ void MDCache::send_slave_resolves()
         p != active_requests.end();
         ++p) {
       MDRequestRef& mdr = p->second;
-      if (!mdr->is_slave() || !mdr->slave_did_prepare())
+      if (!mdr->is_slave())
        continue;
+      if (!mdr->slave_did_prepare() && !mdr->committing) {
+       continue;
+      }
       mds_rank_t master = mdr->slave_to_mds;
       if (resolve_set.count(master) || is_ambiguous_slave_update(p->first, master)) {
        dout(10) << " including uncommitted " << *mdr << dendl;
        if (!resolves.count(master))
          resolves[master] = new MMDSResolve;
-       if (mdr->has_more() && mdr->more()->is_inode_exporter) {
+       if (!mdr->committing &&
+           mdr->has_more() && mdr->more()->is_inode_exporter) {
          // re-send cap exports
          CInode *in = mdr->more()->rename_inode;
          map<client_t, Capability::Export> cap_map;
@@ -2747,7 +2751,7 @@ void MDCache::send_slave_resolves()
          ::encode(cap_map, bl);
          resolves[master]->add_slave_request(p->first, bl);
        } else {
-         resolves[master]->add_slave_request(p->first);
+         resolves[master]->add_slave_request(p->first, mdr->committing);
        }
       }
     }
@@ -3105,36 +3109,41 @@ void MDCache::handle_resolve(MMDSResolve *m)
 
   // ambiguous slave requests?
   if (!m->slave_requests.empty()) {
-    for (map<metareqid_t, bufferlist>::iterator p = m->slave_requests.begin();
-        p != m->slave_requests.end();
-        ++p) {
-      if (uncommitted_masters.count(p->first) && !uncommitted_masters[p->first].safe)
-       pending_masters.insert(p->first);
-    }
+    if (mds->is_clientreplay() || mds->is_active() || mds->is_stopping()) {
+      for (auto p = m->slave_requests.begin(); p != m->slave_requests.end(); ++p) {
+       if (uncommitted_masters.count(p->first) && !uncommitted_masters[p->first].safe) {
+         assert(!p->second.committing);
+         pending_masters.insert(p->first);
+       }
+      }
 
-    if (!pending_masters.empty()) {
-      dout(10) << " still have pending updates, delay processing slave resolve" << dendl;
-      delayed_resolve[from] = m;
-      return;
+      if (!pending_masters.empty()) {
+       dout(10) << " still have pending updates, delay processing slave resolve" << dendl;
+       delayed_resolve[from] = m;
+       return;
+      }
     }
 
     MMDSResolveAck *ack = new MMDSResolveAck;
-    for (map<metareqid_t, bufferlist>::iterator p = m->slave_requests.begin();
-        p != m->slave_requests.end();
-        ++p) {
+    for (auto p = m->slave_requests.begin(); p != m->slave_requests.end(); ++p) {
       if (uncommitted_masters.count(p->first)) {  //mds->sessionmap.have_completed_request(p->first)) {
        // COMMIT
-       dout(10) << " ambiguous slave request " << *p << " will COMMIT" << dendl;
-       ack->add_commit(p->first);
+       if (p->second.committing) {
+         // already committing, waiting for the OP_COMMITTED slave reply
+         dout(10) << " already committing slave request " << *p << " noop "<< dendl;
+       } else {
+         dout(10) << " ambiguous slave request " << *p << " will COMMIT" << dendl;
+         ack->add_commit(p->first);
+       }
        uncommitted_masters[p->first].slaves.insert(from);   // wait for slave OP_COMMITTED before we log ECommitted
 
-       if (p->second.length() > 0) {
+       if (p->second.inode_caps.length() > 0) {
          // slave wants to export caps (rename)
          assert(mds->is_resolve());
 
          inodeno_t ino;
          map<client_t,Capability::Export> cap_exports;
-         bufferlist::iterator q = p->second.begin();
+         bufferlist::iterator q = p->second.inode_caps.begin();
          ::decode(ino, q);
          ::decode(cap_exports, q);
 
@@ -3159,6 +3168,7 @@ void MDCache::handle_resolve(MMDSResolve *m)
       } else {
        // ABORT
        dout(10) << " ambiguous slave request " << *p << " will ABORT" << dendl;
+       assert(!p->second.committing);
        ack->add_abort(p->first);
       }
     }
index fdd51c7175826ed57b02402477dc75db61282d5b..ab131c79b921ec557314ad3c2a82ba56c5e42a02 100644 (file)
@@ -381,8 +381,9 @@ public:
   void wait_for_uncommitted_master(metareqid_t reqid, MDSInternalContextBase *c) {
     uncommitted_masters[reqid].waiters.push_back(c);
   }
-  bool have_uncommitted_master(metareqid_t reqid) {
-    return uncommitted_masters.count(reqid);
+  bool have_uncommitted_master(metareqid_t reqid, mds_rank_t from) {
+    auto p = uncommitted_masters.find(reqid);
+    return p != uncommitted_masters.end() && p->second.slaves.count(from) > 0;
   }
   void log_master_commit(metareqid_t reqid);
   void logged_master_update(metareqid_t reqid);
index 2acadc3886fc9784d50ff261bb63c9bf2ede48e4..6eceb4cdaaff1ab7ee1f2d1e0bb646316ac4a81d 100644 (file)
@@ -1738,8 +1738,9 @@ void Server::handle_slave_request_reply(MMDSSlaveRequest *m)
   
   if (!mds->is_clientreplay() && !mds->is_active() && !mds->is_stopping()) {
     metareqid_t r = m->get_reqid();
-    if (!mdcache->have_uncommitted_master(r)) {
-      dout(10) << "handle_slave_request_reply ignoring reply from unknown reqid " << r << dendl;
+    if (!mdcache->have_uncommitted_master(r, from)) {
+      dout(10) << "handle_slave_request_reply ignoring slave reply from mds."
+              << from << " reqid " << r << dendl;
       m->put();
       return;
     }
index 263bd54750d4f6b8b3e34f38839da75c12a99ab2..4f57c0c44c99db17d027fbc989d1278a5ab4cfc6 100644 (file)
 #include "include/types.h"
 
 class MMDSResolve : public Message {
- public:
+public:
   map<dirfrag_t, vector<dirfrag_t> > subtrees;
   map<dirfrag_t, vector<dirfrag_t> > ambiguous_imports;
-  map<metareqid_t, bufferlist> slave_requests;
+
+  struct slave_request {
+    bufferlist inode_caps;
+    bool committing;
+    slave_request() : committing(false) {}
+    void encode(bufferlist &bl) const {
+      ::encode(inode_caps, bl);
+      ::encode(committing, bl);
+    }
+    void decode(bufferlist::iterator &bl) {
+      ::decode(inode_caps, bl);
+      ::decode(committing, bl);
+    }
+  };
+  WRITE_CLASS_ENCODER(slave_request)
+
+  map<metareqid_t, slave_request> slave_requests;
 
   MMDSResolve() : Message(MSG_MDS_RESOLVE) {}
 private:
@@ -49,12 +65,12 @@ public:
     ambiguous_imports[im] = m;
   }
 
-  void add_slave_request(metareqid_t reqid) {
-    slave_requests[reqid].clear();
+  void add_slave_request(metareqid_t reqid, bool committing) {
+    slave_requests[reqid].committing = committing;
   }
 
   void add_slave_request(metareqid_t reqid, bufferlist& bl) {
-    slave_requests[reqid].claim(bl);
+    slave_requests[reqid].inode_caps.claim(bl);
   }
 
   void encode_payload(uint64_t features) {
@@ -70,4 +86,9 @@ public:
   }
 };
 
+inline ostream& operator<<(ostream& out, const MMDSResolve::slave_request) {
+    return out;
+}
+
+WRITE_CLASS_ENCODER(MMDSResolve::slave_request)
 #endif