]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mds: finish lock waiters in the same order that they were added. 9560/head
authorYan, Zheng <zyan@redhat.com>
Fri, 6 May 2016 11:07:07 +0000 (19:07 +0800)
committerAbhishek Varshney <abhishek.varshney@flipkart.com>
Tue, 7 Jun 2016 14:40:07 +0000 (20:10 +0530)
Current code first processes lock waiters who have smaller wait mask.
Lock waiters who have large wait mask can starve if client keeps
sending requests that add waiter with small mask.

Signed-off-by: Yan, Zheng <zyan@redhat.com>
(cherry picked from commit d463107473382170c07d9250bb7ace0e5a2a7de2)

src/mds/MDSRank.cc
src/mds/SimpleLock.h
src/mds/mdstypes.cc
src/mds/mdstypes.h

index 188750a195dc4b961d39d44716d84ec8a0086004..34b1fb1512eb79403206789e163af66be05781b4 100644 (file)
@@ -251,13 +251,13 @@ void MDSRankDispatcher::shutdown()
 /**
  * Helper for simple callbacks that call a void fn with no args.
  */
-class C_VoidFn : public MDSInternalContext
+class C_MDS_VoidFn : public MDSInternalContext
 {
   typedef void (MDSRank::*fn_ptr)();
   protected:
    fn_ptr fn;
   public:
-  C_VoidFn(MDSRank *mds_, fn_ptr fn_)
+  C_MDS_VoidFn(MDSRank *mds_, fn_ptr fn_)
     : MDSInternalContext(mds_), fn(fn_)
   {
     assert(mds_);
@@ -1160,7 +1160,7 @@ void MDSRank::resolve_start()
 
   reopen_log();
 
-  mdcache->resolve_start(new C_VoidFn(this, &MDSRank::resolve_done));
+  mdcache->resolve_start(new C_MDS_VoidFn(this, &MDSRank::resolve_done));
   finish_contexts(g_ceph_context, waiting_for_resolve);
 }
 void MDSRank::resolve_done()
@@ -1177,7 +1177,7 @@ void MDSRank::reconnect_start()
     reopen_log();
   }
 
-  server->reconnect_clients(new C_VoidFn(this, &MDSRank::reconnect_done));
+  server->reconnect_clients(new C_MDS_VoidFn(this, &MDSRank::reconnect_done));
   finish_contexts(g_ceph_context, waiting_for_reconnect);
 }
 void MDSRank::reconnect_done()
@@ -1194,7 +1194,7 @@ void MDSRank::rejoin_joint_start()
 void MDSRank::rejoin_start()
 {
   dout(1) << "rejoin_start" << dendl;
-  mdcache->rejoin_start(new C_VoidFn(this, &MDSRank::rejoin_done));
+  mdcache->rejoin_start(new C_MDS_VoidFn(this, &MDSRank::rejoin_done));
 }
 void MDSRank::rejoin_done()
 {
@@ -1299,7 +1299,7 @@ void MDSRank::boot_create()
 {
   dout(3) << "boot_create" << dendl;
 
-  MDSGatherBuilder fin(g_ceph_context, new C_VoidFn(this, &MDSRank::creating_done));
+  MDSGatherBuilder fin(g_ceph_context, new C_MDS_VoidFn(this, &MDSRank::creating_done));
 
   mdcache->init_layouts();
 
index faec26ad5a473c6d0197b1d02c08e0772b13cf22..6d1d7fab9d55d7d417fbb39bb75ebc852b96ef2f 100644 (file)
@@ -306,7 +306,7 @@ public:
     parent->take_waiting(mask << get_wait_shift(), ls);
   }
   void add_waiter(uint64_t mask, MDSInternalContextBase *c) {
-    parent->add_waiter(mask << get_wait_shift(), c);
+    parent->add_waiter((mask << get_wait_shift()) | MDSCacheObject::WAIT_ORDERED, c);
   }
   bool is_waiter_for(uint64_t mask) const {
     return parent->is_waiter_for(mask << get_wait_shift());
index 066b26b3536fa218f17bc3b4b16b4ee0accc5dfe..2fefbb7159d53e69074376b6238b91c6db56bb34 100644 (file)
@@ -1038,6 +1038,8 @@ void cap_reconnect_t::generate_test_instances(list<cap_reconnect_t*>& ls)
   ls.back()->capinfo.cap_id = 1;
 }
 
+uint64_t MDSCacheObject::last_wait_seq = 0;
+
 void MDSCacheObject::dump(Formatter *f) const
 {
   f->dump_bool("is_auth", is_auth());
index e789856b36d7cb70e1ddb401383fdaf0021f3109..88f184de04290bd39d3edad15b9038cce28ccb02 100644 (file)
@@ -1336,6 +1336,7 @@ class MDSCacheObject {
 
 
   // -- wait --
+  const static uint64_t WAIT_ORDERED    = (1ull<<61);
   const static uint64_t WAIT_SINGLEAUTH  = (1ull<<60);
   const static uint64_t WAIT_UNFREEZE    = (1ull<<59); // pka AUTHPINNABLE
 
@@ -1544,7 +1545,8 @@ protected:
   // ---------------------------------------------
   // waiting
  protected:
-  compact_multimap<uint64_t, MDSInternalContextBase*>  waiting;
+  compact_multimap<uint64_t, pair<uint64_t, MDSInternalContextBase*> > waiting;
+  static uint64_t last_wait_seq;
 
  public:
   bool is_waiter_for(uint64_t mask, uint64_t min=0) {
@@ -1553,7 +1555,7 @@ protected:
       while (min & (min-1))  // if more than one bit is set
        min &= min-1;        //  clear LSB
     }
-    for (compact_multimap<uint64_t,MDSInternalContextBase*>::iterator p = waiting.lower_bound(min);
+    for (auto p = waiting.lower_bound(min);
         p != waiting.end();
         ++p) {
       if (p->first & mask) return true;
@@ -1564,7 +1566,15 @@ protected:
   virtual void add_waiter(uint64_t mask, MDSInternalContextBase *c) {
     if (waiting.empty())
       get(PIN_WAITER);
-    waiting.insert(pair<uint64_t,MDSInternalContextBase*>(mask, c));
+
+    uint64_t seq = 0;
+    if (mask & WAIT_ORDERED) {
+      seq = ++last_wait_seq;
+      mask &= ~WAIT_ORDERED;
+    }
+    waiting.insert(pair<uint64_t, pair<uint64_t, MDSInternalContextBase*> >(
+                           mask,
+                           pair<uint64_t, MDSInternalContextBase*>(seq, c)));
 //    pdout(10,g_conf->debug_mds) << (mdsco_db_line_prefix(this)) 
 //                            << "add_waiter " << hex << mask << dec << " " << c
 //                            << " on " << *this
@@ -1573,10 +1583,18 @@ protected:
   }
   virtual void take_waiting(uint64_t mask, list<MDSInternalContextBase*>& ls) {
     if (waiting.empty()) return;
-    compact_multimap<uint64_t,MDSInternalContextBase*>::iterator it = waiting.begin();
-    while (it != waiting.end()) {
+
+    // process ordered waiters in the same order that they were added.
+    std::map<uint64_t, MDSInternalContextBase*> ordered_waiters;
+
+    for (auto it = waiting.begin();
+        it != waiting.end(); ) {
       if (it->first & mask) {
-       ls.push_back(it->second);
+
+       if (it->second.first > 0)
+         ordered_waiters.insert(it->second);
+       else
+         ls.push_back(it->second.second);
 //     pdout(10,g_conf->debug_mds) << (mdsco_db_line_prefix(this))
 //                                << "take_waiting mask " << hex << mask << dec << " took " << it->second
 //                                << " tag " << hex << it->first << dec
@@ -1591,6 +1609,11 @@ protected:
        ++it;
       }
     }
+    for (auto it = ordered_waiters.begin();
+        it != ordered_waiters.end();
+        ++it) {
+      ls.push_back(it->second);
+    }
     if (waiting.empty())
       put(PIN_WAITER);
   }