]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mds: finish lock waiters in the same order that they were added. 8965/head
authorYan, Zheng <zyan@redhat.com>
Fri, 6 May 2016 11:07:07 +0000 (19:07 +0800)
committerYan, Zheng <zyan@redhat.com>
Wed, 1 Jun 2016 13:00:15 +0000 (21:00 +0800)
Current code first processes lock waiters who have smaller wait mask.
Lock waiters who have large wait mask can starve if client keeps
sending requests that add waiter with small mask.

Signed-off-by: Yan, Zheng <zyan@redhat.com>
src/mds/MDSRank.cc
src/mds/SimpleLock.h
src/mds/mdstypes.cc
src/mds/mdstypes.h

index 188750a195dc4b961d39d44716d84ec8a0086004..34b1fb1512eb79403206789e163af66be05781b4 100644 (file)
@@ -251,13 +251,13 @@ void MDSRankDispatcher::shutdown()
 /**
  * Helper for simple callbacks that call a void fn with no args.
  */
-class C_VoidFn : public MDSInternalContext
+class C_MDS_VoidFn : public MDSInternalContext
 {
   typedef void (MDSRank::*fn_ptr)();
   protected:
    fn_ptr fn;
   public:
-  C_VoidFn(MDSRank *mds_, fn_ptr fn_)
+  C_MDS_VoidFn(MDSRank *mds_, fn_ptr fn_)
     : MDSInternalContext(mds_), fn(fn_)
   {
     assert(mds_);
@@ -1160,7 +1160,7 @@ void MDSRank::resolve_start()
 
   reopen_log();
 
-  mdcache->resolve_start(new C_VoidFn(this, &MDSRank::resolve_done));
+  mdcache->resolve_start(new C_MDS_VoidFn(this, &MDSRank::resolve_done));
   finish_contexts(g_ceph_context, waiting_for_resolve);
 }
 void MDSRank::resolve_done()
@@ -1177,7 +1177,7 @@ void MDSRank::reconnect_start()
     reopen_log();
   }
 
-  server->reconnect_clients(new C_VoidFn(this, &MDSRank::reconnect_done));
+  server->reconnect_clients(new C_MDS_VoidFn(this, &MDSRank::reconnect_done));
   finish_contexts(g_ceph_context, waiting_for_reconnect);
 }
 void MDSRank::reconnect_done()
@@ -1194,7 +1194,7 @@ void MDSRank::rejoin_joint_start()
 void MDSRank::rejoin_start()
 {
   dout(1) << "rejoin_start" << dendl;
-  mdcache->rejoin_start(new C_VoidFn(this, &MDSRank::rejoin_done));
+  mdcache->rejoin_start(new C_MDS_VoidFn(this, &MDSRank::rejoin_done));
 }
 void MDSRank::rejoin_done()
 {
@@ -1299,7 +1299,7 @@ void MDSRank::boot_create()
 {
   dout(3) << "boot_create" << dendl;
 
-  MDSGatherBuilder fin(g_ceph_context, new C_VoidFn(this, &MDSRank::creating_done));
+  MDSGatherBuilder fin(g_ceph_context, new C_MDS_VoidFn(this, &MDSRank::creating_done));
 
   mdcache->init_layouts();
 
index faec26ad5a473c6d0197b1d02c08e0772b13cf22..6d1d7fab9d55d7d417fbb39bb75ebc852b96ef2f 100644 (file)
@@ -306,7 +306,7 @@ public:
     parent->take_waiting(mask << get_wait_shift(), ls);
   }
   void add_waiter(uint64_t mask, MDSInternalContextBase *c) {
-    parent->add_waiter(mask << get_wait_shift(), c);
+    parent->add_waiter((mask << get_wait_shift()) | MDSCacheObject::WAIT_ORDERED, c);
   }
   bool is_waiter_for(uint64_t mask) const {
     return parent->is_waiter_for(mask << get_wait_shift());
index 066b26b3536fa218f17bc3b4b16b4ee0accc5dfe..2fefbb7159d53e69074376b6238b91c6db56bb34 100644 (file)
@@ -1038,6 +1038,8 @@ void cap_reconnect_t::generate_test_instances(list<cap_reconnect_t*>& ls)
   ls.back()->capinfo.cap_id = 1;
 }
 
+uint64_t MDSCacheObject::last_wait_seq = 0;
+
 void MDSCacheObject::dump(Formatter *f) const
 {
   f->dump_bool("is_auth", is_auth());
index 5eadd7ddc7f0e6e28bb892c576dfeada9a88155d..4598df43792ad65b5e7244e59260d51e745af08a 100644 (file)
@@ -1336,6 +1336,7 @@ class MDSCacheObject {
 
 
   // -- wait --
+  const static uint64_t WAIT_ORDERED    = (1ull<<61);
   const static uint64_t WAIT_SINGLEAUTH  = (1ull<<60);
   const static uint64_t WAIT_UNFREEZE    = (1ull<<59); // pka AUTHPINNABLE
 
@@ -1544,7 +1545,8 @@ protected:
   // ---------------------------------------------
   // waiting
  protected:
-  compact_multimap<uint64_t, MDSInternalContextBase*>  waiting;
+  compact_multimap<uint64_t, pair<uint64_t, MDSInternalContextBase*> > waiting;
+  static uint64_t last_wait_seq;
 
  public:
   bool is_waiter_for(uint64_t mask, uint64_t min=0) {
@@ -1553,7 +1555,7 @@ protected:
       while (min & (min-1))  // if more than one bit is set
        min &= min-1;        //  clear LSB
     }
-    for (compact_multimap<uint64_t,MDSInternalContextBase*>::iterator p = waiting.lower_bound(min);
+    for (auto p = waiting.lower_bound(min);
         p != waiting.end();
         ++p) {
       if (p->first & mask) return true;
@@ -1564,7 +1566,15 @@ protected:
   virtual void add_waiter(uint64_t mask, MDSInternalContextBase *c) {
     if (waiting.empty())
       get(PIN_WAITER);
-    waiting.insert(pair<uint64_t,MDSInternalContextBase*>(mask, c));
+
+    uint64_t seq = 0;
+    if (mask & WAIT_ORDERED) {
+      seq = ++last_wait_seq;
+      mask &= ~WAIT_ORDERED;
+    }
+    waiting.insert(pair<uint64_t, pair<uint64_t, MDSInternalContextBase*> >(
+                           mask,
+                           pair<uint64_t, MDSInternalContextBase*>(seq, c)));
 //    pdout(10,g_conf->debug_mds) << (mdsco_db_line_prefix(this)) 
 //                            << "add_waiter " << hex << mask << dec << " " << c
 //                            << " on " << *this
@@ -1573,10 +1583,18 @@ protected:
   }
   virtual void take_waiting(uint64_t mask, list<MDSInternalContextBase*>& ls) {
     if (waiting.empty()) return;
-    compact_multimap<uint64_t,MDSInternalContextBase*>::iterator it = waiting.begin();
-    while (it != waiting.end()) {
+
+    // process ordered waiters in the same order that they were added.
+    std::map<uint64_t, MDSInternalContextBase*> ordered_waiters;
+
+    for (auto it = waiting.begin();
+        it != waiting.end(); ) {
       if (it->first & mask) {
-       ls.push_back(it->second);
+
+       if (it->second.first > 0)
+         ordered_waiters.insert(it->second);
+       else
+         ls.push_back(it->second.second);
 //     pdout(10,g_conf->debug_mds) << (mdsco_db_line_prefix(this))
 //                                << "take_waiting mask " << hex << mask << dec << " took " << it->second
 //                                << " tag " << hex << it->first << dec
@@ -1591,6 +1609,11 @@ protected:
        ++it;
       }
     }
+    for (auto it = ordered_waiters.begin();
+        it != ordered_waiters.end();
+        ++it) {
+      ls.push_back(it->second);
+    }
     if (waiting.empty())
       put(PIN_WAITER);
   }