OPTION(mds_snap_rstat, OPT_BOOL, false) // enable/disbale nested stat for snapshot
OPTION(mds_verify_backtrace, OPT_U32, 1)
// detect clients which aren't trimming completed requests
+OPTION(mds_max_completed_flushes, OPT_U32, 100000)
OPTION(mds_max_completed_requests, OPT_U32, 100000)
OPTION(mds_action_on_write_error, OPT_U32, 1) // 0: ignore; 1: force readonly; 2: crash
dout(20) << " within timeout " << session->recalled_at << " vs. " << cutoff << dendl;
}
}
- if (session->get_num_trim_requests_warnings() > 0 &&
- session->get_num_completed_requests() >= g_conf->mds_max_completed_requests) {
+ if ((session->get_num_trim_requests_warnings() > 0 &&
+ session->get_num_completed_requests() >= g_conf->mds_max_completed_requests) ||
+ (session->get_num_trim_flushes_warnings() > 0 &&
+ session->get_num_completed_flushes() >= g_conf->mds_max_completed_flushes)) {
std::ostringstream oss;
- oss << "Client " << session->get_human_name() << " failing to advance its oldest_client_tid";
+ oss << "Client " << session->get_human_name() << " failing to advance its oldest client/flush tid";
MDSHealthMetric m(MDS_HEALTH_CLIENT_OLDEST_TID, HEALTH_WARN, oss.str());
m.metadata["client_id"] = session->info.inst.name.num();
large_completed_requests_metrics.push_back(m);
} else {
std::ostringstream oss;
oss << "Many clients (" << large_completed_requests_metrics.size()
- << ") failing to advance their oldest_client_tid";
+ << ") failing to advance their oldest client/flush tid";
MDSHealthMetric m(MDS_HEALTH_CLIENT_OLDEST_TID_MANY, HEALTH_WARN, oss.str());
m.metadata["client_count"] = large_completed_requests_metrics.size();
health.metrics.push_back(m);
if (m->get_oldest_flush_tid() > 0) {
if (session->trim_completed_flushes(m->get_oldest_flush_tid())) {
mds->mdlog->get_current_segment()->touched_sessions.insert(session->info.inst.name);
+
+ if (session->get_num_trim_flushes_warnings() > 0 &&
+ session->get_num_completed_flushes() * 2 < g_conf->mds_max_completed_flushes)
+ session->reset_num_trim_flushes_warnings();
+ } else {
+ if (session->get_num_completed_flushes() >=
+ (g_conf->mds_max_completed_flushes << session->get_num_trim_flushes_warnings())) {
+ session->inc_num_trim_flushes_warnings();
+ stringstream ss;
+ ss << "client." << session->get_client() << " does not advance its oldest_flush_tid ("
+ << m->get_oldest_flush_tid() << "), "
+ << session->get_num_completed_flushes()
+ << " completed flushes recorded in session\n";
+ mds->clog->warn() << ss.str();
+ dout(20) << __func__ << " " << ss.str() << dendl;
+ }
}
}
// wrote this session out?
bool completed_requests_dirty;
+ unsigned num_trim_flushes_warnings;
unsigned num_trim_requests_warnings;
public:
void add_completed_request(ceph_tid_t t, inodeno_t created) {
return info.completed_flushes.count(tid);
}
+ unsigned get_num_completed_flushes() const { return info.completed_flushes.size(); }
+ unsigned get_num_trim_flushes_warnings() { return num_trim_flushes_warnings; }
+ void inc_num_trim_flushes_warnings() { ++num_trim_flushes_warnings; }
+ void reset_num_trim_flushes_warnings() { num_trim_flushes_warnings = 0; }
+
unsigned get_num_completed_requests() const { return info.completed_requests.size(); }
unsigned get_num_trim_requests_warnings() { return num_trim_requests_warnings; }
void inc_num_trim_requests_warnings() { ++num_trim_requests_warnings; }
cap_push_seq(0),
lease_seq(0),
completed_requests_dirty(false),
+ num_trim_flushes_warnings(0),
num_trim_requests_warnings(0) { }
~Session() {
assert(!item_session_list.is_on_list());