]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mds: implement remote_wrlock
authorSage Weil <sage.weil@dreamhost.com>
Fri, 8 Jul 2011 16:30:29 +0000 (09:30 -0700)
committerSage Weil <sage.weil@dreamhost.com>
Fri, 8 Jul 2011 16:48:00 +0000 (09:48 -0700)
For the rename code to behave, we need to hold a wrlock on the slave node
to ensure that any racing gather (mix->lock) is not sent prior to the
_rename_prepare() running; otherwise we violate the locking rules and
corrupt rstats.

Implement a remote_wrlock that will be used by rename.  The wrlock is held
on a remote node instead of the local node, and is set up similarly to
remote_xlocks.

Signed-off-by: Sage Weil <sage.weil@dreamhost.com>
src/mds/Locker.cc
src/mds/Locker.h
src/mds/MDCache.cc
src/mds/MDCache.h
src/mds/Server.cc
src/messages/MMDSSlaveRequest.h

index 1a793484f81fe96cea24e1d95bd088819627226d..e6fff443c02941fb5c1508642f489fbc4d275678 100644 (file)
@@ -170,7 +170,8 @@ void Locker::include_snap_rdlocks_wlayout(set<SimpleLock*>& rdlocks, CInode *in,
 bool Locker::acquire_locks(MDRequest *mdr,
                           set<SimpleLock*> &rdlocks,
                           set<SimpleLock*> &wrlocks,
-                          set<SimpleLock*> &xlocks)
+                          set<SimpleLock*> &xlocks,
+                          map<SimpleLock*,int> *remote_wrlocks)
 {
   if (mdr->done_locking &&
       !mdr->is_slave()) {  // not on slaves!  master requests locks piecemeal.
@@ -228,13 +229,24 @@ bool Locker::acquire_locks(MDRequest *mdr,
     if ((*p)->get_parent()->is_auth())
       mustpin.insert(*p);
     else if (!(*p)->get_parent()->is_auth() &&
-            !(*p)->can_wrlock(client)) {       // we might have to request a scatter
+            !(*p)->can_wrlock(client) &&  // we might have to request a scatter
+            !mdr->is_slave()) {           // if we are slave (remote_wrlock), the master already authpinned
       dout(15) << " will also auth_pin " << *(*p)->get_parent()
               << " in case we need to request a scatter" << dendl;
       mustpin.insert(*p);
     }
   }
 
+  // remote_wrlocks
+  if (remote_wrlocks) {
+    for (map<SimpleLock*,int>::iterator p = remote_wrlocks->begin(); p != remote_wrlocks->end(); ++p) {
+      dout(20) << " must remote_wrlock on mds" << p->second << " "
+              << *p->first << " " << *(p->first)->get_parent() << dendl;
+      sorted.insert(p->first);
+      mustpin.insert(p->first);
+    }
+  }
+
   // rdlocks
   for (set<SimpleLock*>::iterator p = rdlocks.begin();
         p != rdlocks.end();
@@ -343,15 +355,30 @@ bool Locker::acquire_locks(MDRequest *mdr,
       // right kind?
       SimpleLock *have = *existing;
       existing++;
-      if (xlocks.count(*p) && mdr->xlocks.count(*p))
+      if (xlocks.count(have) && mdr->xlocks.count(have)) {
        dout(10) << " already xlocked " << *have << " " << *have->get_parent() << dendl;
-      else if (wrlocks.count(*p) && mdr->wrlocks.count(*p))
+       continue;
+      }
+      if (wrlocks.count(have) && mdr->wrlocks.count(have)) {
        dout(10) << " already wrlocked " << *have << " " << *have->get_parent() << dendl;
-      else if (rdlocks.count(*p) && mdr->rdlocks.count(*p))
+       continue;
+      }
+      if (remote_wrlocks && remote_wrlocks->count(have) &&
+         mdr->remote_wrlocks.count(have)) {
+       if (mdr->remote_wrlocks[have] == (*remote_wrlocks)[have]) {
+         dout(10) << " already remote_wrlocked " << *have << " " << *have->get_parent() << dendl;
+         continue;
+       }
+       dout(10) << " unlocking remote_wrlock on wrong mds" << mdr->remote_wrlocks[have]
+                << " (want mds" << (*remote_wrlocks)[have] << ") " 
+                << *have << " " << *have->get_parent() << dendl;
+       remote_wrlock_finish(have, mdr->remote_wrlocks[have], mdr);
+       // continue...
+      }
+      if (rdlocks.count(have) && mdr->rdlocks.count(have)) {
        dout(10) << " already rdlocked " << *have << " " << *have->get_parent() << dendl;
-      else
-       assert(0);
-      continue;
+       continue;
+      }
     }
     
     // hose any stray locks
@@ -364,6 +391,8 @@ bool Locker::acquire_locks(MDRequest *mdr,
        xlock_finish(stray, mdr, &need_issue);
       else if (mdr->wrlocks.count(stray))
        wrlock_finish(stray, mdr, &need_issue);
+      else if (mdr->remote_wrlocks.count(stray))
+       remote_wrlock_finish(stray, mdr->remote_wrlocks[stray], mdr);
       else
        rdlock_finish(stray, mdr, &need_issue);
       if (need_issue)
@@ -379,6 +408,9 @@ bool Locker::acquire_locks(MDRequest *mdr,
       if (!wrlock_start(*p, mdr)) 
        goto out;
       dout(10) << " got wrlock on " << **p << " " << *(*p)->get_parent() << dendl;
+    } else if (remote_wrlocks && remote_wrlocks->count(*p)) {
+      remote_wrlock_start(*p, (*remote_wrlocks)[*p], mdr);
+      goto out;
     } else {
       if (!rdlock_start(*p, mdr)) 
        goto out;
@@ -396,6 +428,8 @@ bool Locker::acquire_locks(MDRequest *mdr,
       xlock_finish(stray, mdr, &need_issue);
     else if (mdr->wrlocks.count(stray))
       wrlock_finish(stray, mdr, &need_issue);
+    else if (mdr->remote_wrlocks.count(stray))
+      remote_wrlock_finish(stray, mdr->remote_wrlocks[stray], mdr);
     else
       rdlock_finish(stray, mdr, &need_issue);
     if (need_issue)
@@ -441,7 +475,10 @@ void Locker::drop_locks(Mutation *mut, set<CInode*> *pneed_issue)
     rdlock_finish(*mut->rdlocks.begin(), mut, &ni);
     if (ni)
       pneed_issue->insert((CInode*)p);
-  }     
+  }
+  while (!mut->remote_wrlocks.empty()) {
+    remote_wrlock_finish(mut->remote_wrlocks.begin()->first, mut->remote_wrlocks.begin()->second, mut);
+  }
   while (!mut->wrlocks.empty()) {
     bool ni = false;
     MDSCacheObject *p = (*mut->wrlocks.begin())->get_parent();
@@ -468,6 +505,9 @@ void Locker::drop_non_rdlocks(Mutation *mut, set<CInode*> *pneed_issue)
     if (ni)
       pneed_issue->insert((CInode*)p);
   }
+  while (!mut->remote_wrlocks.empty()) {
+    remote_wrlock_finish(mut->remote_wrlocks.begin()->first, mut->remote_wrlocks.begin()->second, mut);
+  }
   while (!mut->wrlocks.empty()) {
     bool ni = false;
     MDSCacheObject *p = (*mut->wrlocks.begin())->get_parent();
@@ -1138,6 +1178,45 @@ void Locker::wrlock_finish(SimpleLock *lock, Mutation *mut, bool *pneed_issue)
 }
 
 
+// remote wrlock
+
+void Locker::remote_wrlock_start(SimpleLock *lock, int target, MDRequest *mut)
+{
+  dout(7) << "remote_wrlock_start mds" << target << " on " << *lock << " on " << *lock->get_parent() << dendl;
+
+  // wait for single auth
+  if (lock->get_parent()->is_ambiguous_auth()) {
+    lock->get_parent()->add_waiter(MDSCacheObject::WAIT_SINGLEAUTH, 
+                                  new C_MDS_RetryRequest(mdcache, mut));
+    return;
+  }
+    
+  // send lock request
+  mut->more()->slaves.insert(target);
+  MMDSSlaveRequest *r = new MMDSSlaveRequest(mut->reqid, MMDSSlaveRequest::OP_WRLOCK);
+  r->set_lock_type(lock->get_type());
+  lock->get_parent()->set_object_info(r->get_object_info());
+  mds->send_message_mds(r, target);
+  
+  // wait
+  lock->add_waiter(SimpleLock::WAIT_REMOTEXLOCK, new C_MDS_RetryRequest(mdcache, mut));
+}
+
+void Locker::remote_wrlock_finish(SimpleLock *lock, int target, Mutation *mut)
+{
+  // drop ref
+  mut->remote_wrlocks.erase(lock);
+  mut->locks.erase(lock);
+  
+  dout(7) << "remote_wrlock_finish releasing remote wrlock on mds" << target
+         << " " << *lock->get_parent()  << dendl;
+  if (mds->mdsmap->get_state(target) >= MDSMap::STATE_REJOIN) {
+    MMDSSlaveRequest *slavereq = new MMDSSlaveRequest(mut->reqid, MMDSSlaveRequest::OP_UNWRLOCK);
+    slavereq->set_lock_type(lock->get_type());
+    lock->get_parent()->set_object_info(slavereq->get_object_info());
+    mds->send_message_mds(slavereq, target);
+  }
+}
 
 
 // ------------------
index 68344984acff82758aafb905e5e06853abb17f2b..39f01f80bf460f33e908f33222badfbc18c27fdb 100644 (file)
@@ -85,7 +85,8 @@ public:
   bool acquire_locks(MDRequest *mdr,
                     set<SimpleLock*> &rdlocks,
                     set<SimpleLock*> &wrlocks,
-                    set<SimpleLock*> &xlocks);
+                    set<SimpleLock*> &xlocks,
+                    map<SimpleLock*,int> *remote_wrlocks=NULL);
 
   void drop_locks(Mutation *mut, set<CInode*> *pneed_issue=0);
   void set_xlocks_done(Mutation *mut);
@@ -133,6 +134,9 @@ public:
   bool wrlock_start(SimpleLock *lock, MDRequest *mut, bool nowait=false);
   void wrlock_finish(SimpleLock *lock, Mutation *mut, bool *pneed_issue);
 
+  void remote_wrlock_start(SimpleLock *lock, int target, MDRequest *mut);
+  void remote_wrlock_finish(SimpleLock *lock, int target, Mutation *mut);
+
   bool xlock_start(SimpleLock *lock, MDRequest *mut);
   void xlock_finish(SimpleLock *lock, Mutation *mut, bool *pneed_issue);
 
index 79d9ba68e0f9b5a7dcfc65220707ccb716bc1153..705d9490feb131b086f8b2f31b88c5d730aa9ef1 100644 (file)
@@ -7351,6 +7351,15 @@ void MDCache::request_drop_foreign_locks(MDRequest *mdr)
     }
   }
 
+  map<SimpleLock*, int>::iterator q = mdr->remote_wrlocks.begin();
+  while (q != mdr->remote_wrlocks.end()) {
+    dout(10) << "request_drop_foreign_locks forgetting remote_wrlock " << *q->first
+            << " on mds" << q->second
+            << " on " << *(q->first)->get_parent() << dendl;
+    mdr->locks.erase(q->first);
+    mdr->remote_wrlocks.erase(q++);
+  }
+
   mdr->more()->slaves.clear(); /* we no longer have requests out to them, and
                                 * leaving them in can cause double-notifies as
                                 * this function can get called more than once */
index 3c8191453a86bc6549bf53a952260705a5c1d8a9..0e9f023c333e195cd3eb70d82b0fb13e5bb83edb 100644 (file)
@@ -86,6 +86,7 @@ struct Mutation {
   // held locks
   set< SimpleLock* > rdlocks;  // always local.
   set< SimpleLock* > wrlocks;  // always local.
+  map< SimpleLock*, int > remote_wrlocks;
   set< SimpleLock* > xlocks;   // local or remote.
   set< SimpleLock*, SimpleLock::ptr_lt > locks;  // full ordering
 
@@ -118,6 +119,7 @@ struct Mutation {
     assert(xlocks.empty());
     assert(rdlocks.empty());
     assert(wrlocks.empty());
+    assert(remote_wrlocks.empty());
   }
 
   bool is_master() { return slave_to_mds < 0; }
index 3a86e535791c19695b3ebf645a14678b7e9b9f66..21fddbabcb129b9f525496f77c87c4f32a6c43dc 100644 (file)
@@ -1253,6 +1253,20 @@ void Server::handle_slave_request(MMDSSlaveRequest *m)
       }
       break;
 
+    case MMDSSlaveRequest::OP_WRLOCKACK:
+      {
+       // identify lock, master request
+       SimpleLock *lock = mds->locker->get_lock(m->get_lock_type(),
+                                                m->get_object_info());
+       MDRequest *mdr = mdcache->request_get(m->get_reqid());
+       mdr->more()->slaves.insert(from);
+       dout(10) << "got remote wrlock on " << *lock << " on " << *lock->get_parent() << dendl;
+       mdr->remote_wrlocks[lock] = from;
+       mdr->locks.insert(lock);
+       lock->finish_waiters(SimpleLock::WAIT_REMOTEXLOCK);
+      }
+      break;
+
     case MMDSSlaveRequest::OP_AUTHPINACK:
       {
        MDRequest *mdr = mdcache->request_get(m->get_reqid());
@@ -1331,37 +1345,48 @@ void Server::dispatch_slave_request(MDRequest *mdr)
 
   if (logger) logger->inc(l_mdss_dsreq);
 
-  switch (mdr->slave_request->get_op()) {
+  int op = mdr->slave_request->get_op();
+  switch (op) {
   case MMDSSlaveRequest::OP_XLOCK:
+  case MMDSSlaveRequest::OP_WRLOCK:
     {
       // identify object
       SimpleLock *lock = mds->locker->get_lock(mdr->slave_request->get_lock_type(),
                                               mdr->slave_request->get_object_info());
 
-      if (lock && lock->get_parent()->is_auth()) {
-       // xlock.
+      if (!lock) {
+       dout(10) << "don't have object, dropping" << dendl;
+       assert(0); // can this happen, if we auth pinned properly.
+      }
+      if (op == MMDSSlaveRequest::OP_XLOCK && !lock->get_parent()->is_auth()) {
+       dout(10) << "not auth for remote xlock attempt, dropping on " 
+                << *lock << " on " << *lock->get_parent() << dendl;
+      } else {
        // use acquire_locks so that we get auth_pinning.
        set<SimpleLock*> rdlocks;
-       set<SimpleLock*> wrlocks;
+       set<SimpleLock*> wrlocks = mdr->wrlocks;
        set<SimpleLock*> xlocks = mdr->xlocks;
-       xlocks.insert(lock);
+
+       int replycode;
+       switch (op) {
+       case MMDSSlaveRequest::OP_XLOCK:
+         xlocks.insert(lock);
+         replycode = MMDSSlaveRequest::OP_XLOCKACK;
+         break;
+       case MMDSSlaveRequest::OP_WRLOCK:
+         wrlocks.insert(lock);
+         replycode = MMDSSlaveRequest::OP_WRLOCKACK;
+         break;
+       }
        
        if (!mds->locker->acquire_locks(mdr, rdlocks, wrlocks, xlocks))
          return;
        
        // ack
-       MMDSSlaveRequest *r = new MMDSSlaveRequest(mdr->reqid, MMDSSlaveRequest::OP_XLOCKACK);
+       MMDSSlaveRequest *r = new MMDSSlaveRequest(mdr->reqid, replycode);
        r->set_lock_type(lock->get_type());
        lock->get_parent()->set_object_info(r->get_object_info());
        mds->send_message(r, mdr->slave_request->get_connection());
-      } else {
-       if (lock) {
-         dout(10) << "not auth for remote xlock attempt, dropping on " 
-                  << *lock << " on " << *lock->get_parent() << dendl;
-       } else {
-         dout(10) << "don't have object, dropping" << dendl;
-         assert(0); // can this happen, if we auth pinned properly.
-       }
       }
 
       // done.
@@ -1371,12 +1396,20 @@ void Server::dispatch_slave_request(MDRequest *mdr)
     break;
 
   case MMDSSlaveRequest::OP_UNXLOCK:
+  case MMDSSlaveRequest::OP_UNWRLOCK:
     {  
       SimpleLock *lock = mds->locker->get_lock(mdr->slave_request->get_lock_type(),
                                               mdr->slave_request->get_object_info());
       assert(lock);
       bool need_issue = false;
-      mds->locker->xlock_finish(lock, mdr, &need_issue);
+      switch (op) {
+      case MMDSSlaveRequest::OP_UNXLOCK:
+       mds->locker->xlock_finish(lock, mdr, &need_issue);
+       break;
+      case MMDSSlaveRequest::OP_UNWRLOCK:
+       mds->locker->wrlock_finish(lock, mdr, &need_issue);
+       break;
+      }
       if (need_issue)
        mds->locker->issue_caps((CInode*)lock->get_parent());
 
index bb21d9ebae2e2d7e49b538e8e632d2561c751ae9..855088755b2fd79acd582d26cdc29ab4d574d67f 100644 (file)
@@ -34,6 +34,10 @@ class MMDSSlaveRequest : public Message {
   static const int OP_RENAMEPREP =     7;
   static const int OP_RENAMEPREPACK = -7;
 
+  static const int OP_WRLOCK = 8;
+  static const int OP_WRLOCKACK = -8;
+  static const int OP_UNWRLOCK = 9;
+
   static const int OP_FINISH = 17;  
   static const int OP_COMMITTED = -18;  
 
@@ -59,6 +63,10 @@ class MMDSSlaveRequest : public Message {
     case OP_FINISH: return "finish"; // commit
     case OP_COMMITTED: return "committed";
 
+    case OP_WRLOCK: return "wrlock";
+    case OP_WRLOCKACK: return "wrlock_ack";
+    case OP_UNWRLOCK: return "unwrlock";
+
     case OP_ABORT: return "abort";
       //case OP_COMMIT: return "commit";