]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
mon: clean up cluster logging on mon events
authorJohn Spray <john.spray@redhat.com>
Wed, 8 Nov 2017 16:54:44 +0000 (11:54 -0500)
committerJohn Spray <john.spray@redhat.com>
Fri, 10 Nov 2017 09:45:00 +0000 (09:45 +0000)
These changes come from observing the output
when killing a mon and watching the survivors
form a new quorum.

Fixes: http://tracker.ceph.com/issues/22082
Signed-off-by: John Spray <john.spray@redhat.com>
src/mon/Monitor.cc
src/mon/Monitor.h

index 41b19bb6a78702513315d6f4970f1f0700a81cf8..ac8339a77d5e310744ec9e5a4c7c40997f39e5ea 100644 (file)
@@ -1850,7 +1850,7 @@ void Monitor::start_election()
   logger->inc(l_mon_num_elections);
   logger->inc(l_mon_election_call);
 
-  clog->info() << "mon." << name << " calling new monitor election";
+  clog->info() << "mon." << name << " calling monitor election";
   elector.call_election();
 }
 
@@ -1918,8 +1918,8 @@ void Monitor::win_election(epoch_t epoch, set<int>& active, uint64_t features,
   pending_metadata = metadata;
   outside_quorum.clear();
 
-  clog->info() << "mon." << name << "@" << rank
-               << " won leader election with quorum " << quorum;
+  clog->info() << "mon." << name << " is new leader, mons " << get_quorum_names()
+      << " in quorum (ranks " << quorum << ")";
 
   set_leader_commands(get_local_commands(mon_features));
 
@@ -1960,7 +1960,25 @@ void Monitor::win_election(epoch_t epoch, set<int>& active, uint64_t features,
       monmap->get_epoch() > 0) {
     timecheck_start();
     health_tick_start();
-    do_health_to_clog_interval();
+
+    // Freshen the health status before doing health_to_clog in case
+    // our just-completed election changed the health
+    healthmon()->wait_for_active_ctx(new FunctionContext([this](int r){
+      dout(20) << "healthmon now active" << dendl;
+      healthmon()->tick();
+      if (healthmon()->is_proposing()) {
+        dout(20) << __func__ << " healthmon proposing, waiting" << dendl;
+        healthmon()->wait_for_finished_proposal(nullptr, new C_MonContext(this,
+              [this](int r){
+                assert(lock.is_locked_by_me());
+                do_health_to_clog_interval();
+              }));
+
+      } else {
+        do_health_to_clog_interval();
+      }
+    }));
+
     scrub_event_start();
   }
 }
@@ -2287,7 +2305,6 @@ void Monitor::health_tick_start()
     new C_MonContext(this, [this](int r) {
        if (r < 0)
          return;
-       do_health_to_clog();
        health_tick_start();
       }));
 }
@@ -4611,7 +4628,9 @@ void Monitor::handle_timecheck_leader(MonOpRequestRef op)
 
   ostringstream ss;
   health_status_t status = timecheck_status(ss, skew_bound, latency);
-  clog->health(status) << other << " " << ss.str();
+  if (status != HEALTH_OK) {
+    clog->health(status) << other << " " << ss.str();
+  }
 
   dout(10) << __func__ << " from " << other << " ts " << m->timestamp
           << " delta " << delta << " skew_bound " << skew_bound
index 6b3307f0d3b613b8d44a68707933893bc9f977ba..4d0ea79637edde5ee7b17c71f14639b0d16b8a6d 100644 (file)
@@ -635,8 +635,8 @@ public:
     return (class MgrStatMonitor*) paxos_service[PAXOS_MGRSTAT];
   }
 
-  class MgrStatMonitor *healthmon() {
-    return (class MgrStatMonitor*) paxos_service[PAXOS_MGRSTAT];
+  class HealthMonitor *healthmon() {
+    return (class HealthMonitor*) paxos_service[PAXOS_HEALTH];
   }
 
   friend class Paxos;