- mds
fmt_desc: Determines whether the MDS should try to skip corrupt journal
events during journal replay.
- with_legacy: true
+ flags:
+ - runtime
+- name: mds_log_skip_unbounded_events
+ type: bool
+ level: dev
+ default: false
+ services:
+ - mds
+ fmt_desc: Determines whether the MDS should try to skip journal
+ events during journal replay that wrongly exist before
+ a major segment boundary.
+ flags:
+ - runtime
- name: mds_log_max_events
type: int
level: advanced
#include "events/ENoOp.h"
#include "events/ESegment.h"
+#include "events/ELid.h"
#define dout_context g_ceph_context
case EVENT_TABLESERVER: return "TABLESERVER";
case EVENT_NOOP: return "NOOP";
case EVENT_SEGMENT: return "SEGMENT";
+ case EVENT_LID: return "LID";
default:
generic_dout(0) << "get_type_str: unknown type " << _type << dendl;
{"TABLECLIENT", EVENT_TABLECLIENT},
{"TABLESERVER", EVENT_TABLESERVER},
{"NOOP", EVENT_NOOP},
- {"SEGMENT", EVENT_SEGMENT}
+ {"SEGMENT", EVENT_SEGMENT},
+ {"LID", EVENT_LID}
};
/*
case EVENT_SEGMENT:
le = std::make_unique<ESegment>();
break;
+ case EVENT_LID:
+ le = std::make_unique<ELid>();
+ break;
default:
generic_dout(0) << "uh oh, unknown log event type " << type << " length " << length << dendl;
return nullptr;
#define EVENT_NOOP 51
#define EVENT_SEGMENT 100
+#define EVENT_LID 101
#include "include/buffer_fwd.h"
#include "osdc/Filer.h"
#include "events/ESubtreeMap.h"
+#include "events/ELid.h"
#include "events/EUpdate.h"
#include "events/EPeerUpdate.h"
#include "events/EImportFinish.h"
// Fully trim the log so that all objects in cache are clean and may be
// trimmed by a future MDCache::trim. Note that MDSRank::tick does not
// trim the log such that the cache eventually becomes clean.
- if (mds->mdlog->get_num_segments() > 0) {
+ if (mds->mdlog->get_num_segments() > 0 && !mds->mdlog->is_capped()) {
auto ls = mds->mdlog->get_current_segment();
if (ls->num_events > 1 || !ls->dirty_dirfrags.empty()) {
// Current segment contains events other than subtreemap or
// (only do this once!)
if (!mds->mdlog->is_capped()) {
dout(7) << "capping the mdlog" << dendl;
+ mds->mdlog->submit_entry(new ELid());
+ mds->mdlog->flush();
mds->mdlog->cap();
- }
- ceph_assert(kill_shutdown_at != KILL_SHUTDOWN_AT::SHUTDOWN_LOGCAP);
-
- if (!mds->mdlog->empty())
- mds->mdlog->trim(0);
-
- if (!mds->mdlog->empty()) {
- dout(7) << "waiting for log to flush.. " << mds->mdlog->get_num_events()
- << " in " << mds->mdlog->get_num_segments() << " segments" << dendl;
- return false;
- }
-
- if (!did_shutdown_log_cap) {
- // flush journal header
- dout(7) << "writing header for (now-empty) journal" << dendl;
- ceph_assert(mds->mdlog->empty());
- mds->mdlog->write_head(0);
- // NOTE: filer active checker below will block us until this completes.
- did_shutdown_log_cap = true;
+ ceph_assert(kill_shutdown_at != KILL_SHUTDOWN_AT::SHUTDOWN_LOGCAP);
return false;
}
#include "events/ESubtreeMap.h"
#include "events/ESegment.h"
+#include "events/ELid.h"
#include "common/config.h"
#include "common/errno.h"
major_segment_event_ratio = g_conf().get_val<uint64_t>("mds_log_major_segment_event_ratio");
max_segments = g_conf().get_val<uint64_t>("mds_log_max_segments");
max_events = g_conf().get_val<int64_t>("mds_log_max_events");
+ skip_corrupt_events = g_conf().get_val<bool>("mds_log_skip_corrupt_events");
+ skip_unbounded_events = g_conf().get_val<bool>("mds_log_skip_unbounded_events");
}
MDLog::~MDLog()
mds->clog->error() << "corrupt journal event at " << pos << "~"
<< bl.length() << " / "
<< journaler->get_write_pos();
- if (g_conf()->mds_log_skip_corrupt_events) {
+ if (skip_corrupt_events) {
continue;
} else {
mds->damaged_unlocked();
ceph_abort(); // Should be unreachable because damaged() calls
// respawn()
}
- }
- le->set_start_off(pos);
-
- // have we seen an import map yet?
- if (segments.empty() && !dynamic_cast<ESubtreeMap*>(le.get())) {
- dout(1) << "_replay " << pos << "~" << bl.length() << " / " << journaler->get_write_pos()
- << " " << le->get_stamp() << " -- waiting for ESubtreeMap. (skipping " << *le << ")" << dendl;
+ } else if (!segments.empty() && dynamic_cast<ELid*>(le.get())) {
+ /* This can reasonably happen when a up:stopping MDS restarts after
+ * writing ELid. We will merge with the previous segment.
+ * We are enforcing the constraint that ESubtreeMap should begin
+ * the journal.
+ */
+ dout(20) << "found ELid not at the start of the journal" << dendl;
continue;
}
+ le->set_start_off(pos);
events_since_last_major_segment++;
if (auto sb = dynamic_cast<SegmentBoundary*>(le.get()); sb) {
event_seq++;
}
+ if (major_segments.empty()) {
+ dout(0) << __func__ << " " << pos << "~" << bl.length() << " / "
+ << journaler->get_write_pos() << " " << le->get_stamp()
+ << " -- waiting for major segment."
+ << dendl;
+ dout(0) << " Log event is " << *le << dendl;
+ if (skip_unbounded_events) {
+ dout(5) << __func__ << " skipping!" << dendl;
+ continue;
+ } else {
+ mds->damaged_unlocked();
+ ceph_abort(); // Should be unreachable because damaged() calls
+ // respawn()
+ }
+ }
+
dout(10) << "_replay " << pos << "~" << bl.length() << " / " << journaler->get_write_pos()
<< " " << le->get_stamp() << ": " << *le << dendl;
le->_segment = get_current_segment(); // replay may need this
kick_submitter();
}
}
+ if (changed.count("mds_log_skip_corrupt_events")) {
+ skip_corrupt_events = g_conf().get_val<bool>("mds_log_skip_corrupt_events");
+ }
+ if (changed.count("mds_log_skip_unbounded_events")) {
+ skip_unbounded_events = g_conf().get_val<bool>("mds_log_skip_unbounded_events");
+ }
}
int64_t max_events;
uint64_t max_segments;
bool pause;
+ bool skip_corrupt_events;
+ bool skip_unbounded_events;
std::set<uint64_t> major_segments;
std::set<LogSegment*> expired_segments;
#include "common/HeartbeatMap.h"
#include "ScrubStack.h"
#include "events/ESubtreeMap.h"
-#include "events/ESegment.h"
+#include "events/ELid.h"
#include "MDSRank.h"
mdlog->create(fin.new_sub());
// open new journal segment, but do not journal subtree map (yet)
- // N.B. this singular event will be skipped during replay
- auto le = new ESegment();
+ auto le = new ELid();
mdlog->submit_entry(le);
if (whoami == mdsmap->get_root()) {
"mds_log_max_events",
"mds_log_max_segments",
"mds_log_pause",
+ "mds_log_skip_corrupt_events",
+ "mds_log_skip_unbounded_events",
"mds_max_caps_per_client",
"mds_max_export_size",
"mds_max_purge_files",
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2022 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_MDS_ELID_H
+#define CEPH_MDS_ELID_H
+
+#include <string_view>
+
+#include "../LogEvent.h"
+#include "../SegmentBoundary.h"
+
+class ELid : public LogEvent, public SegmentBoundary {
+public:
+ ELid() : LogEvent(EVENT_LID) {}
+ ELid(LogSegment::seq_t _seq) : LogEvent(EVENT_SEGMENT), SegmentBoundary(_seq) {}
+
+ bool is_major_segment_boundary() const override {
+ return true;
+ }
+
+ void print(std::ostream& out) const override {
+ out << "ELid(" << seq << ")";
+ }
+
+ void encode(bufferlist& bl, uint64_t features) const override;
+ void decode(bufferlist::const_iterator& bl) override;
+ void dump(Formatter *f) const override;
+ void replay(MDSRank *mds) override;
+ static void generate_test_instances(std::list<ELid*>& ls);
+};
+WRITE_CLASS_ENCODER_FEATURES(ELid)
+
+#endif
#include "events/ETableClient.h"
#include "events/ETableServer.h"
#include "events/ESegment.h"
+#include "events/ELid.h"
#include "include/stringify.h"
ls.push_back(new ESegment);
}
+void ELid::encode(bufferlist &bl, uint64_t features) const
+{
+ ENCODE_START(1, 1, bl);
+ encode(seq, bl);
+ ENCODE_FINISH(bl);
+}
+
+void ELid::decode(bufferlist::const_iterator &bl)
+{
+ DECODE_START(1, bl);
+ decode(seq, bl);
+ DECODE_FINISH(bl);
+}
+
+void ELid::replay(MDSRank *mds)
+{
+ dout(4) << "ELid::replay, seq " << seq << dendl;
+}
+
+void ELid::dump(Formatter *f) const
+{
+ f->dump_int("seq", seq);
+}
+
+void ELid::generate_test_instances(std::list<ELid*>& ls)
+{
+ ls.push_back(new ELid);
+}
void ENoOp::encode(bufferlist &bl, uint64_t features) const
{