default: 1000
services:
- mds
+- name: mds_delay_journal_replay_for_testing
+ type: millisecs
+ level: dev
+ desc: Delay the journal replay to verify the replay time estimate
+ long_desc: Jorunal replay warning is activated if the mds has been in replay state for more than 30 seconds. This config delays replay for validating the replay warning in tests.
+ default: 0
flags:
- runtime
- name: mds_server_dispatch_killpoint_random
#include "mds/MDSRank.h"
#include "mds/MDSMap.h"
#include "mds/Locker.h"
+#include "mds/mdstypes.h"
#include "Beacon.h"
}
}
}
+ if (mds->is_replay()) {
+ CachedStackStringStream css;
+ auto estimate = mds->mdlog->get_estimated_replay_finish_time();
+ // this probably should be configurable, however, its fine to report
+ // if replay is running for more than 30 seconds.
+ if (estimate.elapsed_time > std::chrono::seconds(30)) {
+ *css << "replay: " << estimate.percent_complete << "% complete - elapsed time: "
+ << estimate.elapsed_time << ", estimated time remaining: "
+ << estimate.estimated_time;
+ MDSHealthMetric m(MDS_HEALTH_ESTIMATED_REPLAY_TIME, HEALTH_WARN, css->strv());
+ health.metrics.push_back(m);
+ }
+ }
}
MDSMap::DaemonState Beacon::get_want_state() const
return journaler->get_write_safe_pos();
}
+// estimate the replay completion time based on mdlog journal pointers
+EstimatedReplayTime MDLog::get_estimated_replay_finish_time() {
+ ceph_assert(mds->is_replay());
+ EstimatedReplayTime estimated_time{0, std::chrono::seconds::zero(), std::chrono::seconds::zero()};
+ if (!journaler) {
+ return estimated_time;
+ }
+
+ auto read_pos = journaler->get_read_pos();
+ auto write_pos = journaler->get_write_pos();
+ auto trimmed_pos = journaler->get_trimmed_pos();
+
+ dout(20) << __func__ << ": read_pos=" << read_pos << ", write_pos="
+ << write_pos << ", trimmed_pos=" << trimmed_pos << dendl;
+
+ if (read_pos == trimmed_pos || write_pos == trimmed_pos) {
+ return estimated_time;
+ }
+
+ auto total_bytes = write_pos - trimmed_pos;
+ double percent_complete = ((double)(read_pos - trimmed_pos)) / (double)total_bytes;
+ auto elapsed_time = std::chrono::duration_cast<std::chrono::seconds>
+ (ceph::coarse_mono_clock::now() - replay_start_time);
+ auto time = ((1 - percent_complete) / percent_complete) * elapsed_time;
+
+ dout(20) << __func__ << "percent_complete=" << percent_complete
+ << ", elapsed_time=" << elapsed_time
+ << ", estimated_time=" << std::chrono::round<std::chrono::seconds>(time)
+ << dendl;
+
+ estimated_time.percent_complete = percent_complete * 100;
+ estimated_time.elapsed_time = elapsed_time;
+ estimated_time.estimated_time = std::chrono::round<std::chrono::seconds>(time);
+ dout(20) << __func__ << "estimated_time.percent_complete=" << estimated_time.percent_complete
+ << ", estimated_time.elapsed_time=" << estimated_time.elapsed_time
+ << ", estimated_time.estimated_time=" << estimated_time.estimated_time
+ << dendl;
+ return estimated_time;
+}
void MDLog::create(MDSContext *c)
{
{
std::lock_guard l(mds->mds_lock);
journaler = front_journal;
+ replay_start_time = ceph::coarse_mono_clock::now();
}
C_SaferCond recover_wait;
// i am a separate thread
void MDLog::_replay_thread()
{
- dout(10) << "_replay_thread start" << dendl;
+ dout(10) << __func__ << ": start time: " << replay_start_time << ", now: "
+ << ceph::coarse_mono_clock::now() << dendl;
// loop
int r = 0;
while (1) {
+ auto sleep_time = g_conf().get_val<std::chrono::milliseconds>("mds_delay_journal_replay_for_testing");
+ if (unlikely(sleep_time > 0ms)) {
+ dout(10) << __func__ << ": sleeping for " << sleep_time << "ms" << dendl;
+ std::this_thread::sleep_for(sleep_time);
+ }
// wait for read?
journaler->check_isreadable();
if (journaler->get_error()) {
#include "LogSegment.h"
#include "MDSMap.h"
#include "SegmentBoundary.h"
+#include "mdstypes.h"
#include <list>
#include <map>
void reopen(MDSContext *onopen);
void append();
void replay(MDSContext *onfinish);
+ EstimatedReplayTime get_estimated_replay_finish_time();
void standby_trim_segments();
std::atomic<bool> upkeep_log_trim_shutdown{false};
std::map<uint64_t, std::vector<Context*>> waiting_for_expire; // protected by mds_lock
+
+ ceph::coarse_mono_time replay_start_time = ceph::coarse_mono_clock::zero();
};
#endif
ls.back()->realm.seq = 2;
ls.back()->realm.parent = 1;
}
+
+void EstimatedReplayTime::print(std::ostream& out) {
+ out << "replay: " << percent_complete << "% complete - elapsed time: "
+ << elapsed_time << ", estimated time remaining: " << estimated_time;
+}
}
WRITE_CLASS_ENCODER(MDSCacheObjectInfo)
+struct EstimatedReplayTime {
+ double percent_complete;
+ std::chrono::seconds estimated_time;
+ std::chrono::seconds elapsed_time;
+
+ void print(std::ostream& out);
+};
+
#endif
MDS_HEALTH_CLIENTS_LAGGY,
MDS_HEALTH_CLIENTS_LAGGY_MANY,
MDS_HEALTH_CLIENTS_BROKEN_ROOTSQUASH,
+ MDS_HEALTH_ESTIMATED_REPLAY_TIME,
MDS_HEALTH_DUMMY, // not a real health warning, for testing
};
case MDS_HEALTH_CLIENTS_LAGGY: return "MDS_CLIENTS_LAGGY";
case MDS_HEALTH_CLIENTS_LAGGY_MANY: return "MDS_CLIENTS_LAGGY_MANY";
case MDS_HEALTH_CLIENTS_BROKEN_ROOTSQUASH: return "MDS_CLIENTS_BROKEN_ROOTSQUASH";
+ case MDS_HEALTH_ESTIMATED_REPLAY_TIME: return "MDS_ESTIMATED_REPLAY_TIME";
case MDS_HEALTH_DUMMY: return "MDS_DUMMY";
default:
return "???";
return "%num% client(s) laggy due to laggy OSDs";
case MDS_HEALTH_CLIENTS_BROKEN_ROOTSQUASH:
return "%num% MDS report clients with broken root_squash implementation";
+ case MDS_HEALTH_ESTIMATED_REPLAY_TIME:
+ return "%num% estimated journal replay time";
default:
return "???";
}