]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mds: emit warning for estinated replay time
authorVenky Shankar <vshankar@redhat.com>
Tue, 4 Jun 2024 07:02:41 +0000 (03:02 -0400)
committerVenky Shankar <vshankar@redhat.com>
Fri, 29 Nov 2024 10:51:21 +0000 (16:21 +0530)
If replay runs more than 30 seconds, emit a warning with
estimated replay completion time.

Fixes: https://tracker.ceph.com/issues/61863
Signed-off-by: Manish M Yathnalli <myathnal@redhat.com>
Signed-off-by: Venky Shankar <vshankar@redhat.com>
src/common/options/mds.yaml.in
src/mds/Beacon.cc
src/mds/MDLog.cc
src/mds/MDLog.h
src/mds/mdstypes.cc
src/mds/mdstypes.h
src/messages/MMDSBeacon.h

index 94824faef6bcf2689a028f927f0582e5bcb11310..4cbc079a21573fc7977979118c61f5456c4d4ab0 100644 (file)
@@ -1713,6 +1713,12 @@ options:
   default: 1000
   services:
   - mds
+- name: mds_delay_journal_replay_for_testing
+  type: millisecs
+  level: dev
+  desc: Delay the journal replay to verify the replay time estimate
+  long_desc: Jorunal replay warning is activated if the mds has been in replay state for more than 30 seconds. This config delays replay for validating the replay warning in tests.
+  default: 0
   flags:
   - runtime
 - name: mds_server_dispatch_killpoint_random
index 6fbfc79d416a9607c42090ae685486b3f76ce69e..1c1eeb4ecf8a50ca82ff97ff163320fa5f5a7401 100644 (file)
@@ -26,6 +26,7 @@
 #include "mds/MDSRank.h"
 #include "mds/MDSMap.h"
 #include "mds/Locker.h"
+#include "mds/mdstypes.h"
 
 #include "Beacon.h"
 
@@ -550,6 +551,19 @@ void Beacon::notify_health(MDSRank const *mds)
       }
     }
   }
+  if (mds->is_replay()) {
+    CachedStackStringStream css;
+    auto estimate = mds->mdlog->get_estimated_replay_finish_time();
+    // this probably should be configurable, however, its fine to report
+    // if replay is running for more than 30 seconds.
+    if (estimate.elapsed_time > std::chrono::seconds(30)) {
+      *css << "replay: " << estimate.percent_complete << "% complete - elapsed time: "
+          << estimate.elapsed_time << ", estimated time remaining: "
+          << estimate.estimated_time;
+      MDSHealthMetric m(MDS_HEALTH_ESTIMATED_REPLAY_TIME, HEALTH_WARN, css->strv());
+      health.metrics.push_back(m);
+    }
+  }
 }
 
 MDSMap::DaemonState Beacon::get_want_state() const
index 4bbf2a1a14154491796ecc5190d431d1db77d3a5..d041e3b2fc8ae5495a11572f95c202e4668e4cc1 100644 (file)
@@ -221,7 +221,46 @@ uint64_t MDLog::get_safe_pos() const
   return journaler->get_write_safe_pos(); 
 }
 
+// estimate the replay completion time based on mdlog journal pointers
+EstimatedReplayTime MDLog::get_estimated_replay_finish_time() {
+  ceph_assert(mds->is_replay());
 
+  EstimatedReplayTime estimated_time{0, std::chrono::seconds::zero(), std::chrono::seconds::zero()};
+  if (!journaler) {
+    return estimated_time;
+  }
+
+  auto read_pos = journaler->get_read_pos();
+  auto write_pos = journaler->get_write_pos();
+  auto trimmed_pos = journaler->get_trimmed_pos();
+
+  dout(20) << __func__ << ": read_pos=" << read_pos << ", write_pos="
+          << write_pos << ", trimmed_pos=" << trimmed_pos << dendl;
+
+  if (read_pos == trimmed_pos || write_pos == trimmed_pos) {
+    return estimated_time;
+  }
+
+  auto total_bytes = write_pos - trimmed_pos;
+  double percent_complete = ((double)(read_pos - trimmed_pos)) / (double)total_bytes;
+  auto elapsed_time = std::chrono::duration_cast<std::chrono::seconds>
+    (ceph::coarse_mono_clock::now() - replay_start_time);
+  auto time = ((1 - percent_complete) / percent_complete) * elapsed_time;
+
+  dout(20) << __func__ << "percent_complete=" << percent_complete
+          << ", elapsed_time=" << elapsed_time
+          << ", estimated_time=" << std::chrono::round<std::chrono::seconds>(time)
+          << dendl;
+
+  estimated_time.percent_complete = percent_complete * 100;
+  estimated_time.elapsed_time = elapsed_time;
+  estimated_time.estimated_time = std::chrono::round<std::chrono::seconds>(time);
+  dout(20) << __func__ << "estimated_time.percent_complete=" << estimated_time.percent_complete
+          << ", estimated_time.elapsed_time=" << estimated_time.elapsed_time
+          << ", estimated_time.estimated_time=" << estimated_time.estimated_time
+          << dendl;
+  return estimated_time;
+}
 
 void MDLog::create(MDSContext *c)
 {
@@ -1137,6 +1176,7 @@ void MDLog::_recovery_thread(MDSContext *completion)
   {
     std::lock_guard l(mds->mds_lock);
     journaler = front_journal;
+    replay_start_time = ceph::coarse_mono_clock::now();
   }
 
   C_SaferCond recover_wait;
@@ -1374,11 +1414,17 @@ void MDLog::_reformat_journal(JournalPointer const &jp_in, Journaler *old_journa
 // i am a separate thread
 void MDLog::_replay_thread()
 {
-  dout(10) << "_replay_thread start" << dendl;
+  dout(10) << __func__ << ": start time: " << replay_start_time << ", now: "
+           << ceph::coarse_mono_clock::now() << dendl;
 
   // loop
   int r = 0;
   while (1) {
+    auto sleep_time = g_conf().get_val<std::chrono::milliseconds>("mds_delay_journal_replay_for_testing");
+    if (unlikely(sleep_time > 0ms)) {
+      dout(10) << __func__ << ": sleeping for " << sleep_time << "ms" << dendl;
+      std::this_thread::sleep_for(sleep_time);
+    }
     // wait for read?
     journaler->check_isreadable(); 
     if (journaler->get_error()) {
index a858b40fa036d2b33588a1aa565aa3ac8ae74e15..180a34c9d824501bec285ce145ad1e0496e37d8d 100644 (file)
@@ -53,6 +53,7 @@ enum {
 #include "LogSegment.h"
 #include "MDSMap.h"
 #include "SegmentBoundary.h"
+#include "mdstypes.h"
 
 #include <list>
 #include <map>
@@ -162,6 +163,7 @@ public:
   void reopen(MDSContext *onopen);
   void append();
   void replay(MDSContext *onfinish);
+  EstimatedReplayTime get_estimated_replay_finish_time();
 
   void standby_trim_segments();
 
@@ -328,5 +330,7 @@ private:
   std::atomic<bool> upkeep_log_trim_shutdown{false};
 
   std::map<uint64_t, std::vector<Context*>> waiting_for_expire; // protected by mds_lock
+
+  ceph::coarse_mono_time replay_start_time = ceph::coarse_mono_clock::zero();
 };
 #endif
index 680218e62e30e8ef8fac5cdd789e531d5a1d1bf6..f9424eed6dc283af61f0abfb934135ddb11d5757 100644 (file)
@@ -1042,3 +1042,8 @@ void snaprealm_reconnect_t::generate_test_instances(std::list<snaprealm_reconnec
   ls.back()->realm.seq = 2;
   ls.back()->realm.parent = 1;
 }
+
+void EstimatedReplayTime::print(std::ostream& out) {
+  out << "replay: " << percent_complete << "% complete - elapsed time: "
+      << elapsed_time << ", estimated time remaining: " << estimated_time;
+}
index 3b8269006cb783be67d7d3a72db6c8b6eab2220a..742d7b234325eb7f875ecd5901cde342697d383d 100644 (file)
@@ -1044,4 +1044,12 @@ inline bool operator==(const MDSCacheObjectInfo& l, const MDSCacheObjectInfo& r)
 }
 WRITE_CLASS_ENCODER(MDSCacheObjectInfo)
 
+struct EstimatedReplayTime {
+  double percent_complete;
+  std::chrono::seconds estimated_time;
+  std::chrono::seconds elapsed_time;
+
+  void print(std::ostream& out);
+};
+
 #endif
index c157c33e758b5ab194605dd6c0072a68f058c977..526285aae8c93b81977c18e95150bbf5abb44bc2 100644 (file)
@@ -48,6 +48,7 @@ enum mds_metric_t {
   MDS_HEALTH_CLIENTS_LAGGY,
   MDS_HEALTH_CLIENTS_LAGGY_MANY,
   MDS_HEALTH_CLIENTS_BROKEN_ROOTSQUASH,
+  MDS_HEALTH_ESTIMATED_REPLAY_TIME,
   MDS_HEALTH_DUMMY, // not a real health warning, for testing
 };
 
@@ -69,6 +70,7 @@ inline const char *mds_metric_name(mds_metric_t m)
   case MDS_HEALTH_CLIENTS_LAGGY: return "MDS_CLIENTS_LAGGY";
   case MDS_HEALTH_CLIENTS_LAGGY_MANY: return "MDS_CLIENTS_LAGGY_MANY";
   case MDS_HEALTH_CLIENTS_BROKEN_ROOTSQUASH: return "MDS_CLIENTS_BROKEN_ROOTSQUASH";
+  case MDS_HEALTH_ESTIMATED_REPLAY_TIME: return "MDS_ESTIMATED_REPLAY_TIME";
   case MDS_HEALTH_DUMMY: return "MDS_DUMMY";
   default:
     return "???";
@@ -107,6 +109,8 @@ inline const char *mds_metric_summary(mds_metric_t m)
     return "%num% client(s) laggy due to laggy OSDs";  
   case MDS_HEALTH_CLIENTS_BROKEN_ROOTSQUASH:
     return "%num% MDS report clients with broken root_squash implementation";
+  case MDS_HEALTH_ESTIMATED_REPLAY_TIME:
+    return "%num% estimated journal replay time";
   default:
     return "???";
   }