osd: move next_osdmap under separate lock

author Sage Weil <sage@inktank.com>

Thu, 29 Nov 2012 22:16:16 +0000 (14:16 -0800)

committer Sage Weil <sage@inktank.com>

Thu, 29 Nov 2012 23:15:31 +0000 (15:15 -0800)
author Sage Weil <sage@inktank.com>
Thu, 29 Nov 2012 22:16:16 +0000 (14:16 -0800)
committer Sage Weil <sage@inktank.com>
Thu, 29 Nov 2012 23:15:31 +0000 (15:15 -0800)
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc

index 2f4eb0f56ec7a191ec0ca5f858a54f4296e18e51..a252265fe9f85827c77444904611ff40684270f7 100644 (file)
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -158,6 +158,7 @@ OSDService::OSDService(OSD *osd) :
    rep_scrub_wq(osd->rep_scrub_wq),
    class_handler(osd->class_handler),
    publish_lock("OSDService::publish_lock"),
+  pre_publish_lock("OSDService::pre_publish_lock"),
    sched_scrub_lock("OSDService::sched_scrub_lock"), scrubs_pending(0),
    scrubs_active(0),
    watch_lock("OSD::watch_lock"),
@@ -2498,7 +2499,7 @@ void OSD::send_alive()
  
  void OSDService::send_message_osd_cluster(int peer, Message *m, epoch_t from_epoch)
  {
-  Mutex::Locker l(publish_lock);
+  Mutex::Locker l(pre_publish_lock);
  
    // service map is always newer/newest
    assert(from_epoch <= next_osdmap->get_epoch());
@@ -2513,7 +2514,7 @@ void OSDService::send_message_osd_cluster(int peer, Message *m, epoch_t from_epo
  
  Connection *OSDService::get_con_osd_cluster(int peer, epoch_t from_epoch)
  {
-  Mutex::Locker l(publish_lock);
+  Mutex::Locker l(pre_publish_lock);
  
    // service map is always newer/newest
    assert(from_epoch <= next_osdmap->get_epoch());
@@ -2527,7 +2528,7 @@ Connection *OSDService::get_con_osd_cluster(int peer, epoch_t from_epoch)
  
  Connection *OSDService::get_con_osd_hb(int peer, epoch_t from_epoch)
  {
-  Mutex::Locker l(publish_lock);
+  Mutex::Locker l(pre_publish_lock);
  
    // service map is always newer/newest
    assert(from_epoch <= next_osdmap->get_epoch());
@@ -3995,6 +3996,7 @@ void OSD::activate_map()
    }
    to_remove.clear();
  
+  service.pre_publish_map(osdmap);
    service.publish_map(osdmap);
  
    // scan pg's
diff --git a/src/osd/OSD.h b/src/osd/OSD.h

index e5258b0c99f023180254a16c660b227c5cb9c5af..2b623efa339ec157d9979cc9c671710be7b46aae 100644 (file)
--- a/src/osd/OSD.h
+++ b/src/osd/OSD.h
@@ -183,7 +183,7 @@ public:
    ClassHandler  *&class_handler;
  
    // -- superblock --
-  Mutex publish_lock;
+  Mutex publish_lock, pre_publish_lock;
    OSDSuperblock superblock;
    OSDSuperblock get_superblock() {
      Mutex::Locker l(publish_lock);
@@ -193,24 +193,38 @@ public:
      Mutex::Locker l(publish_lock);
      superblock = block;
    }
-  OSDMapRef osdmap, next_osdmap;
+
+  int get_nodeid() const { return whoami; }
+
+  OSDMapRef osdmap;
    OSDMapRef get_osdmap() {
      Mutex::Locker l(publish_lock);
      return osdmap;
    }
-  void pre_publish_map(OSDMapRef map) {
-    Mutex::Locker l(publish_lock);
-    next_osdmap = map;
-  }
    void publish_map(OSDMapRef map) {
      Mutex::Locker l(publish_lock);
      osdmap = map;
-    next_osdmap = map;
    }
  
-  int get_nodeid() const { return whoami; }
-
-  // -- message helpers --
+  /*
+   * osdmap - current published amp
+   * next_osdmap - pre_published map that is about to be published.
+   *
+   * We use the next_osdmap to send messages and initiate connections,
+   * but only if the target is the same instance as the one in the map
+   * epoch the current user is working from (i.e., the result is
+   * equivalent to what is in next_osdmap).
+   *
+   * This allows the helpers to start ignoring osds that are about to
+   * go down, and let OSD::handle_osd_map()/note_down_osd() mark them
+   * down, without worrying about reopening connections from threads
+   * working from old maps.
+   */
+  OSDMapRef next_osdmap;
+  void pre_publish_map(OSDMapRef map) {
+    Mutex::Locker l(pre_publish_lock);
+    next_osdmap = map;
+  }
    Connection *get_con_osd_cluster(int peer, epoch_t from_epoch);
    Connection *get_con_osd_hb(int peer, epoch_t from_epoch);
    void send_message_osd_cluster(int peer, Message *m, epoch_t from_epoch);
author	Sage Weil <sage@inktank.com>
	Thu, 29 Nov 2012 22:16:16 +0000 (14:16 -0800)
committer	Sage Weil <sage@inktank.com>
	Thu, 29 Nov 2012 23:15:31 +0000 (15:15 -0800)
src/osd/OSD.cc		patch \| blob \| history
src/osd/OSD.h		patch \| blob \| history