Context *respawn_hook_,
Context *suicide_hook_,
boost::asio::io_context& ioctx) :
- whoami(whoami_), incarnation(0),
- mds_lock(mds_lock_), cct(msgr->cct), clog(clog_), timer(timer_),
- mdsmap(mdsmap_),
+ cct(msgr->cct), mds_lock(mds_lock_), clog(clog_),
+ timer(timer_), mdsmap(mdsmap_),
objecter(new Objecter(g_ceph_context, msgr, monc_, ctxpool, 0, 0)),
- server(NULL), mdcache(NULL), locker(NULL), mdlog(NULL),
- balancer(NULL), scrubstack(NULL),
- damage_table(whoami_),
- inotable(NULL), snapserver(NULL), snapclient(NULL),
- sessionmap(this), logger(NULL), mlogger(NULL),
+ damage_table(whoami_), sessionmap(this),
op_tracker(g_ceph_context, g_conf()->mds_enable_op_tracker,
g_conf()->osd_num_op_tracker_shard),
- last_state(MDSMap::STATE_BOOT),
- state(MDSMap::STATE_BOOT),
- cluster_degraded(false), stopping(false),
+ progress_thread(this), whoami(whoami_),
purge_queue(g_ceph_context, whoami_,
mdsmap_->get_metadata_pool(), objecter,
new LambdaContext([this](int r) {
}
)
),
- progress_thread(this), dispatch_depth(0),
- hb(NULL), last_tid(0), osd_epoch_barrier(0), beacon(beacon_),
- mds_slow_req_count(0),
- last_client_mdsmap_bcast(0),
+ beacon(beacon_),
messenger(msgr), monc(monc_), mgrc(mgrc),
respawn_hook(respawn_hook_),
suicide_hook(suicide_hook_),
- standby_replaying(false),
starttime(mono_clock::now())
{
hb = g_ceph_context->get_heartbeat_map()->add_worker("MDSRank", pthread_self());
* to the other subsystems, and message-sending calls.
*/
class MDSRank {
- protected:
- const mds_rank_t whoami;
-
- // Incarnation as seen in MDSMap at the point where a rank is
- // assigned.
- int incarnation;
-
public:
-
friend class C_Flush_Journal;
friend class C_Drop_Cache;
-
friend class C_CacheDropExecAndReply;
friend class C_ScrubExecAndReply;
friend class C_ScrubControlExecAndReply;
+ class CephContext *cct;
+
+ MDSRank(
+ mds_rank_t whoami_,
+ ceph::mutex &mds_lock_,
+ LogChannelRef &clog_,
+ SafeTimer &timer_,
+ Beacon &beacon_,
+ std::unique_ptr<MDSMap> & mdsmap_,
+ Messenger *msgr,
+ MonClient *monc_,
+ MgrClient *mgrc,
+ Context *respawn_hook_,
+ Context *suicide_hook_,
+ boost::asio::io_context& ictx);
+
mds_rank_t get_nodeid() const { return whoami; }
int64_t get_metadata_pool();
- // Reference to global MDS::mds_lock, so that users of MDSRank don't
- // carry around references to the outer MDS, and we can substitute
- // a separate lock here in future potentially.
- ceph::mutex &mds_lock;
-
mono_time get_starttime() const {
return starttime;
}
return chrono::duration<double>(now-starttime);
}
- class CephContext *cct;
-
bool is_daemon_stopping() const;
- // Reference to global cluster log client, just to avoid initialising
- // a separate one here.
- LogChannelRef &clog;
-
- // Reference to global timer utility, because MDSRank and MDSDaemon
- // currently both use the same mds_lock, so it makes sense for them
- // to share a timer.
- SafeTimer &timer;
-
- std::unique_ptr<MDSMap> &mdsmap; /* MDSDaemon::mdsmap */
-
- ceph::async::io_context_pool ctxpool;
- Objecter *objecter;
-
- // sub systems
- Server *server;
- MDCache *mdcache;
- Locker *locker;
- MDLog *mdlog;
- MDBalancer *balancer;
- ScrubStack *scrubstack;
- DamageTable damage_table;
-
-
- InoTable *inotable;
-
- SnapServer *snapserver;
- SnapClient *snapclient;
-
MDSTableClient *get_table_client(int t);
MDSTableServer *get_table_server(int t);
- SessionMap sessionmap;
Session *get_session(client_t client) {
return sessionmap.get_session(entity_name_t::CLIENT(client.v));
}
Session *get_session(const cref_t<Message> &m);
- PerfCounters *logger, *mlogger;
- OpTracker op_tracker;
-
- // The last different state I held before current
- MDSMap::DaemonState last_state;
- // The state assigned to me by the MDSMap
- MDSMap::DaemonState state;
-
- bool cluster_degraded;
-
MDSMap::DaemonState get_state() const { return state; }
MDSMap::DaemonState get_want_state() const { return beacon.get_want_state(); }
void handle_write_error(int err);
void update_mlogger();
- protected:
- // Flag to indicate we entered shutdown: anyone seeing this to be true
- // after taking mds_lock must drop out.
- bool stopping;
-
- // PurgeQueue is only used by StrayManager, but it is owned by MDSRank
- // because its init/shutdown happens at the top level.
- PurgeQueue purge_queue;
-
- class ProgressThread : public Thread {
- MDSRank *mds;
- ceph::condition_variable cond;
- public:
- explicit ProgressThread(MDSRank *mds_) : mds(mds_) {}
- void * entry() override;
- void shutdown();
- void signal() {cond.notify_all();}
- } progress_thread;
-
- list<cref_t<Message>> waiting_for_nolaggy;
- MDSContext::que finished_queue;
- // Dispatch, retry, queues
- int dispatch_depth;
- void inc_dispatch_depth() { ++dispatch_depth; }
- void dec_dispatch_depth() { --dispatch_depth; }
- void retry_dispatch(const cref_t<Message> &m);
- bool handle_deferrable_message(const cref_t<Message> &m);
- void _advance_queues();
- bool _dispatch(const cref_t<Message> &m, bool new_msg);
- ceph::heartbeat_handle_d *hb; // Heartbeat for threads using mds_lock
-
- bool is_stale_message(const cref_t<Message> &m) const;
-
- map<mds_rank_t, version_t> peer_mdsmap_epoch;
-
- ceph_tid_t last_tid; // for mds-initiated requests (e.g. stray rename)
-
- MDSContext::vec waiting_for_active, waiting_for_replay, waiting_for_rejoin,
- waiting_for_reconnect, waiting_for_resolve;
- MDSContext::vec waiting_for_any_client_connection;
- MDSContext::que replay_queue;
- bool replaying_requests_done = false;
-
- map<mds_rank_t, MDSContext::vec > waiting_for_active_peer;
- map<epoch_t, MDSContext::vec > waiting_for_mdsmap;
-
- epoch_t osd_epoch_barrier;
-
- // Const reference to the beacon so that we can behave differently
- // when it's laggy.
- Beacon &beacon;
-
- /**
- * Emit clog warnings for any ops reported as warnings by optracker
- */
- void check_ops_in_flight();
-
- int mds_slow_req_count;
-
- /**
- * Share MDSMap with clients
- */
- void bcast_mds_map(); // to mounted clients
- epoch_t last_client_mdsmap_bcast;
-
- map<mds_rank_t,DecayCounter> export_targets; /* targets this MDS is exporting to or wants/tries to */
-
- void create_logger();
- public:
void queue_waiter(MDSContext *c) {
finished_queue.push_back(c);
progress_thread.signal();
progress_thread.signal();
}
- MDSRank(
- mds_rank_t whoami_,
- ceph::mutex &mds_lock_,
- LogChannelRef &clog_,
- SafeTimer &timer_,
- Beacon &beacon_,
- std::unique_ptr<MDSMap> & mdsmap_,
- Messenger *msgr,
- MonClient *monc_,
- MgrClient *mgrc,
- Context *respawn_hook_,
- Context *suicide_hook_,
- boost::asio::io_context& ictx);
-
- protected:
- ~MDSRank();
-
- public:
-
// Daemon lifetime functions: these guys break the abstraction
// and call up into the parent MDSDaemon instance. It's kind
// of unavoidable: if we want any depth into our calls
ceph_tid_t issue_tid() { return ++last_tid; }
- Finisher *finisher;
-
MDSMap *get_mds_map() { return mdsmap.get(); }
uint64_t get_num_requests() const { return logger->get(l_mds_request); }
void mark_base_recursively_scrubbed(inodeno_t ino);
+ // Reference to global MDS::mds_lock, so that users of MDSRank don't
+ // carry around references to the outer MDS, and we can substitute
+ // a separate lock here in future potentially.
+ ceph::mutex &mds_lock;
+
+ // Reference to global cluster log client, just to avoid initialising
+ // a separate one here.
+ LogChannelRef &clog;
+
+ // Reference to global timer utility, because MDSRank and MDSDaemon
+ // currently both use the same mds_lock, so it makes sense for them
+ // to share a timer.
+ SafeTimer &timer;
+
+ std::unique_ptr<MDSMap> &mdsmap; /* MDSDaemon::mdsmap */
+
+ ceph::async::io_context_pool ctxpool;
+ Objecter *objecter;
+
+ // sub systems
+ Server *server = nullptr;
+ MDCache *mdcache = nullptr;
+ Locker *locker = nullptr;
+ MDLog *mdlog = nullptr;
+ MDBalancer *balancer = nullptr;
+ ScrubStack *scrubstack = nullptr;
+ DamageTable damage_table;
+
+ InoTable *inotable = nullptr;
+
+ SnapServer *snapserver = nullptr;
+ SnapClient *snapclient = nullptr;
+
+ SessionMap sessionmap;
+
+ PerfCounters *logger = nullptr, *mlogger = nullptr;
+ OpTracker op_tracker;
+
+ // The last different state I held before current
+ MDSMap::DaemonState last_state = MDSMap::STATE_BOOT;
+ // The state assigned to me by the MDSMap
+ MDSMap::DaemonState state = MDSMap::STATE_BOOT;
+
+ bool cluster_degraded = false;
+
+ Finisher *finisher;
protected:
+ typedef enum {
+ // The MDSMap is available, configure default layouts and structures
+ MDS_BOOT_INITIAL = 0,
+ // We are ready to open some inodes
+ MDS_BOOT_OPEN_ROOT,
+ // We are ready to do a replay if needed
+ MDS_BOOT_PREPARE_LOG,
+ // Replay is complete
+ MDS_BOOT_REPLAY_DONE
+ } BootStep;
+
+ class ProgressThread : public Thread {
+ public:
+ explicit ProgressThread(MDSRank *mds_) : mds(mds_) {}
+ void * entry() override;
+ void shutdown();
+ void signal() {cond.notify_all();}
+ private:
+ MDSRank *mds;
+ ceph::condition_variable cond;
+ } progress_thread;
+
+ class C_MDS_StandbyReplayRestart;
+ class C_MDS_StandbyReplayRestartFinish;
+ // Friended to access retry_dispatch
+ friend class C_MDS_RetryMessage;
+ friend class C_MDS_BootStart;
+ friend class C_MDS_InternalBootStart;
+ friend class C_MDS_MonCommand;
+
+ const mds_rank_t whoami;
+
+ ~MDSRank();
+
+ void inc_dispatch_depth() { ++dispatch_depth; }
+ void dec_dispatch_depth() { --dispatch_depth; }
+ void retry_dispatch(const cref_t<Message> &m);
+ bool handle_deferrable_message(const cref_t<Message> &m);
+ void _advance_queues();
+ bool _dispatch(const cref_t<Message> &m, bool new_msg);
+ bool is_stale_message(const cref_t<Message> &m) const;
+
+ /**
+ * Emit clog warnings for any ops reported as warnings by optracker
+ */
+ void check_ops_in_flight();
+
+ /**
+ * Share MDSMap with clients
+ */
+ void bcast_mds_map(); // to mounted clients
+ void create_logger();
+
void dump_clientreplay_status(Formatter *f) const;
void command_scrub_start(Formatter *f,
std::string_view path, std::string_view tag,
void command_dump_inode(Formatter *f, const cmdmap_t &cmdmap, std::ostream &ss);
void command_cache_drop(uint64_t timeout, Formatter *f, Context *on_finish);
- protected:
- Messenger *messenger;
- MonClient *monc;
- MgrClient *mgrc;
-
- Context *respawn_hook;
- Context *suicide_hook;
-
- // Friended to access retry_dispatch
- friend class C_MDS_RetryMessage;
-
// FIXME the state machine logic should be separable from the dispatch
// logic that calls it.
// >>>
void calc_recovery_set();
void request_state(MDSMap::DaemonState s);
- bool standby_replaying; // true if current replay pass is in standby-replay mode
-
- typedef enum {
- // The MDSMap is available, configure default layouts and structures
- MDS_BOOT_INITIAL = 0,
- // We are ready to open some inodes
- MDS_BOOT_OPEN_ROOT,
- // We are ready to do a replay if needed
- MDS_BOOT_PREPARE_LOG,
- // Replay is complete
- MDS_BOOT_REPLAY_DONE
- } BootStep;
- friend class C_MDS_BootStart;
- friend class C_MDS_InternalBootStart;
void boot_create(); // i am new mds.
void boot_start(BootStep step=MDS_BOOT_INITIAL, int r=0); // starting|replay
void replay_done();
void standby_replay_restart();
void _standby_replay_restart_finish(int r, uint64_t old_read_pos);
- class C_MDS_StandbyReplayRestart;
- class C_MDS_StandbyReplayRestartFinish;
void reopen_log();
void stopping_done();
void validate_sessions();
- // <<<
-
- // >>>
+
void handle_mds_recovery(mds_rank_t who);
void handle_mds_failure(mds_rank_t who);
- // <<<
/* Update MDSMap export_targets for this rank. Called on ::tick(). */
void update_targets();
- friend class C_MDS_MonCommand;
void _mon_command_finish(int r, std::string_view cmd, std::string_view outs);
void set_mdsmap_multimds_snaps_allowed();
-private:
- mono_time starttime = mono_clock::zero();
+ Context *create_async_exec_context(C_ExecAndReply *ctx);
+
+ // Incarnation as seen in MDSMap at the point where a rank is
+ // assigned.
+ int incarnation = 0;
+
+ // Flag to indicate we entered shutdown: anyone seeing this to be true
+ // after taking mds_lock must drop out.
+ bool stopping = false;
+
+ // PurgeQueue is only used by StrayManager, but it is owned by MDSRank
+ // because its init/shutdown happens at the top level.
+ PurgeQueue purge_queue;
+
+ list<cref_t<Message>> waiting_for_nolaggy;
+ MDSContext::que finished_queue;
+ // Dispatch, retry, queues
+ int dispatch_depth = 0;
+
+ ceph::heartbeat_handle_d *hb = nullptr; // Heartbeat for threads using mds_lock
+
+ map<mds_rank_t, version_t> peer_mdsmap_epoch;
+
+ ceph_tid_t last_tid = 0; // for mds-initiated requests (e.g. stray rename)
+
+ MDSContext::vec waiting_for_active, waiting_for_replay, waiting_for_rejoin,
+ waiting_for_reconnect, waiting_for_resolve;
+ MDSContext::vec waiting_for_any_client_connection;
+ MDSContext::que replay_queue;
+ bool replaying_requests_done = false;
+
+ map<mds_rank_t, MDSContext::vec > waiting_for_active_peer;
+ map<epoch_t, MDSContext::vec > waiting_for_mdsmap;
+
+ epoch_t osd_epoch_barrier = 0;
+
+ // Const reference to the beacon so that we can behave differently
+ // when it's laggy.
+ Beacon &beacon;
+
+ int mds_slow_req_count = 0;
+
+ epoch_t last_client_mdsmap_bcast = 0;
+
+ map<mds_rank_t,DecayCounter> export_targets; /* targets this MDS is exporting to or wants/tries to */
+
+ Messenger *messenger;
+ MonClient *monc;
+ MgrClient *mgrc;
+
+ Context *respawn_hook;
+ Context *suicide_hook;
+
+ bool standby_replaying = false; // true if current replay pass is in standby-replay mode
+private:
// "task" string that gets displayed in ceph status
inline static const std::string SCRUB_STATUS_KEY = "scrub status";
void schedule_update_timer_task();
void send_task_status();
-protected:
- Context *create_async_exec_context(C_ExecAndReply *ctx);
+ mono_time starttime = mono_clock::zero();
};
/* This expects to be given a reference which it is responsible for.
MDSContext *build() {
return new C_MDS_RetryMessage(mds, m);
}
-
private:
MDSRank *mds;
cref_t<Message> m;
class MDSRankDispatcher : public MDSRank, public md_config_obs_t
{
public:
+ MDSRankDispatcher(
+ mds_rank_t whoami_,
+ ceph::mutex &mds_lock_,
+ LogChannelRef &clog_,
+ SafeTimer &timer_,
+ Beacon &beacon_,
+ std::unique_ptr<MDSMap> &mdsmap_,
+ Messenger *msgr,
+ MonClient *monc_,
+ MgrClient *mgrc,
+ Context *respawn_hook_,
+ Context *suicide_hook_,
+ boost::asio::io_context& ictx);
+
void init();
void tick();
void shutdown();
// Call into me from MDS::ms_dispatch
bool ms_dispatch(const cref_t<Message> &m);
-
- MDSRankDispatcher(
- mds_rank_t whoami_,
- ceph::mutex &mds_lock_,
- LogChannelRef &clog_,
- SafeTimer &timer_,
- Beacon &beacon_,
- std::unique_ptr<MDSMap> &mdsmap_,
- Messenger *msgr,
- MonClient *monc_,
- MgrClient *mgrc,
- Context *respawn_hook_,
- Context *suicide_hook_,
- boost::asio::io_context& ictx);
};
// This utility for MDS and MDSRank dispatchers.