mon/MgrMonitor: blacklist previous instance

author Patrick Donnelly <pdonnell@redhat.com>

Thu, 21 Nov 2019 20:31:16 +0000 (12:31 -0800)

committer Patrick Donnelly <pdonnell@redhat.com>

Wed, 4 Dec 2019 01:53:13 +0000 (17:53 -0800)
author Patrick Donnelly <pdonnell@redhat.com>
Thu, 21 Nov 2019 20:31:16 +0000 (12:31 -0800)
committer Patrick Donnelly <pdonnell@redhat.com>
Wed, 4 Dec 2019 01:53:13 +0000 (17:53 -0800)
diff --git a/src/common/options.cc b/src/common/options.cc

index 4a250a7740998fce157012fa81f44f8448cfd911..705bca131bc3c552bda3a733aa78e5b085dafa9a 100644 (file)
--- a/src/common/options.cc
+++ b/src/common/options.cc
@@ -2089,6 +2089,14 @@ std::vector<Option> get_global_options() {
                       "daemons remain in the OSD map")
      .set_flag(Option::FLAG_RUNTIME),
  
+    Option("mon_mgr_blacklist_interval", Option::TYPE_FLOAT, Option::LEVEL_DEV)
+    .set_default(1_day)
+    .set_min(1_hr)
+    .add_service("mon")
+    .set_description("Duration in seconds that blacklist entries for mgr "
+                     "daemons remain in the OSD map")
+    .set_flag(Option::FLAG_RUNTIME),
+
      Option("mon_osd_crush_smoke_test", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
      .set_default(true)
      .add_service("mon")
diff --git a/src/mgr/Mgr.cc b/src/mgr/Mgr.cc

index 8a8b7e9f28a5a25a3667b56360568ee0c98a0aaa..c216d604a33fcc76fcd4650a2b8d31f8e8f3c1f6 100644 (file)
--- a/src/mgr/Mgr.cc
+++ b/src/mgr/Mgr.cc
@@ -248,22 +248,6 @@ void Mgr::init()
    register_async_signal_handler_oneshot(SIGINT, handle_mgr_signal);
    register_async_signal_handler_oneshot(SIGTERM, handle_mgr_signal);
  
-  // Start communicating with daemons to learn statistics etc
-  int r = server.init(monc->get_global_id(), client_messenger->get_myaddrs());
-  if (r < 0) {
-    derr << "Initialize server fail: " << cpp_strerror(r) << dendl;
-    // This is typically due to a bind() failure, so let's let
-    // systemd restart us.
-    exit(1);
-  }
-  dout(4) << "Initialized server at " << server.get_myaddrs() << dendl;
-
-  // Preload all daemon metadata (will subsequently keep this
-  // up to date by watching maps, so do the initial load before
-  // we subscribe to any maps)
-  dout(4) << "Loading daemon metadata..." << dendl;
-  load_all_metadata();
-
    // subscribe to all the maps
    monc->sub_want("log-info", 0, 0);
    monc->sub_want("mgrdigest", 0, 0);
@@ -281,9 +265,32 @@ void Mgr::init()
  
    // Start Objecter and wait for OSD map
    lock.unlock();  // Drop lock because OSDMap dispatch calls into my ms_dispatch
-  objecter->wait_for_osd_map();
+  epoch_t e;
+  cluster_state.with_mgrmap([&e](const MgrMap& m) {
+    e = m.last_failure_osd_epoch;
+  });
+  /* wait for any blacklists to be applied to previous mgr instance */
+  dout(4) << "Waiting for new OSDMap (e=" << e
+          << ") that may blacklist prior active." << dendl;
+  objecter->wait_for_osd_map(e);
    lock.lock();
  
+  // Start communicating with daemons to learn statistics etc
+  int r = server.init(monc->get_global_id(), client_messenger->get_myaddrs());
+  if (r < 0) {
+    derr << "Initialize server fail: " << cpp_strerror(r) << dendl;
+    // This is typically due to a bind() failure, so let's let
+    // systemd restart us.
+    exit(1);
+  }
+  dout(4) << "Initialized server at " << server.get_myaddrs() << dendl;
+
+  // Preload all daemon metadata (will subsequently keep this
+  // up to date by watching maps, so do the initial load before
+  // we subscribe to any maps)
+  dout(4) << "Loading daemon metadata..." << dendl;
+  load_all_metadata();
+
    // Populate PGs in ClusterState
    cluster_state.with_osdmap_and_pgmap([this](const OSDMap &osd_map,
                                              const PGMap& pg_map) {
diff --git a/src/mon/MgrMap.h b/src/mon/MgrMap.h

index ab82b2d63e4eaec5cd6f407173729d9bf3a73bf2..410ebd2f6110eff6eb3aa3e04f2b47d842d527d7 100644 (file)
--- a/src/mon/MgrMap.h
+++ b/src/mon/MgrMap.h
@@ -223,6 +223,7 @@ public:
    };
  
    epoch_t epoch = 0;
+  epoch_t last_failure_osd_epoch = 0;
  
    /// global_id of the ceph-mgr instance selected as a leader
    uint64_t active_gid = 0;
@@ -255,6 +256,7 @@ public:
    std::map<std::string, std::string> services;
  
    epoch_t get_epoch() const { return epoch; }
+  epoch_t get_last_failure_osd_epoch() const { return last_failure_osd_epoch; }
    entity_addrvec_t get_active_addrs() const { return active_addrs; }
    uint64_t get_active_gid() const { return active_gid; }
    bool get_available() const { return available; }
@@ -379,7 +381,7 @@ public:
        ENCODE_FINISH(bl);
        return;
      }
-    ENCODE_START(9, 6, bl);
+    ENCODE_START(10, 6, bl);
      encode(epoch, bl);
      encode(active_addrs, bl, features);
      encode(active_gid, bl);
@@ -392,6 +394,7 @@ public:
      encode(active_change, bl);
      encode(always_on_modules, bl);
      encode(active_mgr_features, bl);
+    encode(last_failure_osd_epoch, bl);
      ENCODE_FINISH(bl);
      return;
    }
@@ -440,6 +443,9 @@ public:
      if (struct_v >= 9) {
        decode(active_mgr_features, p);
      }
+    if (struct_v >= 10) {
+      decode(last_failure_osd_epoch, p);
+    }
      DECODE_FINISH(p);
    }
  
@@ -491,6 +497,7 @@ public:
        }
        f->close_section();
      }
+    f->dump_int("last_failure_osd_epoch", last_failure_osd_epoch);
      f->close_section();
    }
  
diff --git a/src/mon/MgrMonitor.cc b/src/mon/MgrMonitor.cc

index c9cf10fc2d819d2661f16361f397bbe1c8f22844..a361978fcdcae42cf3962e3816c9358a5cafc9bd 100644 (file)
--- a/src/mon/MgrMonitor.cc
+++ b/src/mon/MgrMonitor.cc
@@ -454,6 +454,12 @@ bool MgrMonitor::prepare_beacon(MonOpRequestRef op)
      dout(4) << "Active daemon restart (mgr." << m->get_name() << ")" << dendl;
      mon->clog->info() << "Active manager daemon " << m->get_name()
                        << " restarted";
+    if (!mon->osdmon()->is_writeable()) {
+      dout(1) << __func__ << ":  waiting for osdmon writeable to"
+                 " blacklist old instance." << dendl;
+      mon->osdmon()->wait_for_writeable(op, new C_RetryMessage(this, op));
+      return false;
+    }
      drop_active();
    }
  
@@ -744,7 +750,8 @@ void MgrMonitor::tick()
    }
  
    if (pending_map.active_gid != 0
-      && last_beacon.at(pending_map.active_gid) < cutoff) {
+      && last_beacon.at(pending_map.active_gid) < cutoff
+      && mon->osdmon()->is_writeable()) {
      const std::string old_active_name = pending_map.active_name;
      drop_active();
      propose = true;
@@ -814,10 +821,21 @@ bool MgrMonitor::promote_standby()
  
  void MgrMonitor::drop_active()
  {
+  ceph_assert(mon->osdmon()->is_writeable());
+
    if (last_beacon.count(pending_map.active_gid) > 0) {
      last_beacon.erase(pending_map.active_gid);
    }
  
+  ceph_assert(pending_map.active_gid > 0);
+  auto until = ceph_clock_now();
+  until += g_conf().get_val<double>("mon_mgr_blacklist_interval");
+  dout(5) << "blacklisting previous mgr." << pending_map.active_name << "."
+          << pending_map.active_gid << " ("
+          << pending_map.active_addrs << ")" << dendl;
+  auto blacklist_epoch = mon->osdmon()->blacklist(pending_map.active_addrs, until);
+  request_proposal(mon->osdmon());
+
    pending_metadata_rm.insert(pending_map.active_name);
    pending_metadata.erase(pending_map.active_name);
    pending_map.active_name = "";
@@ -827,6 +845,7 @@ void MgrMonitor::drop_active()
    pending_map.available = false;
    pending_map.active_addrs = entity_addrvec_t();
    pending_map.services.clear();
+  pending_map.last_failure_osd_epoch = blacklist_epoch;
  
    // So that when new active mgr subscribes to mgrdigest, it will
    // get an immediate response instead of waiting for next timer
@@ -1023,6 +1042,10 @@ bool MgrMonitor::prepare_command(MonOpRequestRef op)
      if (!err.empty()) {
        // Does not parse as a gid, treat it as a name
        if (pending_map.active_name == who) {
+        if (!mon->osdmon()->is_writeable()) {
+          mon->osdmon()->wait_for_writeable(op, new C_RetryMessage(this, op));
+          return false;
+        }
          drop_active();
          changed = true;
        } else {
@@ -1042,6 +1065,10 @@ bool MgrMonitor::prepare_command(MonOpRequestRef op)
        }
      } else {
        if (pending_map.active_gid == gid) {
+        if (!mon->osdmon()->is_writeable()) {
+          mon->osdmon()->wait_for_writeable(op, new C_RetryMessage(this, op));
+          return false;
+        }
          drop_active();
          changed = true;
        } else if (pending_map.standbys.count(gid) > 0) {
diff --git a/src/osdc/Objecter.cc b/src/osdc/Objecter.cc

index 37dc01d0144d5639e69d809065798c011d2d674a..6665957ddb3e12cef97f25cfaf9b49e21291c355 100644 (file)
--- a/src/osdc/Objecter.cc
+++ b/src/osdc/Objecter.cc
@@ -1911,10 +1911,10 @@ void Objecter::close_session(OSDSession *s)
    logger->set(l_osdc_osd_sessions, osd_sessions.size());
  }
  
-void Objecter::wait_for_osd_map()
+void Objecter::wait_for_osd_map(epoch_t e)
  {
    unique_lock l(rwlock);
-  if (osdmap->get_epoch()) {
+  if (osdmap->get_epoch() >= e) {
      l.unlock();
      return;
    }
@@ -1925,7 +1925,7 @@ void Objecter::wait_for_osd_map()
    bool done;
    std::unique_lock mlock{lock};
    C_SafeCond *context = new C_SafeCond(lock, cond, &done, NULL);
-  waiting_for_map[0].push_back(pair<Context*, int>(context, 0));
+  waiting_for_map[e].push_back(pair<Context*, int>(context, 0));
    l.unlock();
    cond.wait(mlock, [&done] { return done; });
  }
diff --git a/src/osdc/Objecter.h b/src/osdc/Objecter.h

index f669ce31c5fbf6c23c899bca45f0751af0088ecf..c9413e1f354d82fb1e6a69399fbf23bcea4eda76 100644 (file)
--- a/src/osdc/Objecter.h
+++ b/src/osdc/Objecter.h
@@ -2185,7 +2185,7 @@ private:
    void handle_osd_backoff(class MOSDBackoff *m);
    void handle_watch_notify(class MWatchNotify *m);
    void handle_osd_map(class MOSDMap *m);
-  void wait_for_osd_map();
+  void wait_for_osd_map(epoch_t e=0);
  
    /**
     * Get std::list of entities blacklisted since this was last called,
author	Patrick Donnelly <pdonnell@redhat.com>
	Thu, 21 Nov 2019 20:31:16 +0000 (12:31 -0800)
committer	Patrick Donnelly <pdonnell@redhat.com>
	Wed, 4 Dec 2019 01:53:13 +0000 (17:53 -0800)
src/common/options.cc		patch \| blob \| history
src/mgr/Mgr.cc		patch \| blob \| history
src/mon/MgrMap.h		patch \| blob \| history
src/mon/MgrMonitor.cc		patch \| blob \| history
src/osdc/Objecter.cc		patch \| blob \| history
src/osdc/Objecter.h		patch \| blob \| history