From: John Spray Date: Mon, 16 Mar 2015 00:04:36 +0000 (+0000) Subject: mds: add damaged() method X-Git-Tag: v9.0.0~113^2~8 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=c9ff8b4439fcf45b4692a81b70b6e488cd2905c6;p=ceph.git mds: add damaged() method A hook to fire off one last DAMAGED beacon to mon before respawning. Signed-off-by: John Spray --- diff --git a/src/common/config_opts.h b/src/common/config_opts.h index 9441a9ec946f..43a3f20b0a16 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -449,6 +449,7 @@ OPTION(mds_snap_rstat, OPT_BOOL, false) // enable/disbale nested stat for snapsh OPTION(mds_verify_backtrace, OPT_U32, 1) OPTION(mds_action_on_write_error, OPT_U32, 1) // 0: ignore; 1: force readonly; 2: crash +OPTION(mds_mon_shutdown_timeout, OPT_DOUBLE, 5) // If true, compact leveldb store on mount OPTION(osd_compact_leveldb_on_mount, OPT_BOOL, false) diff --git a/src/mds/MDS.cc b/src/mds/MDS.cc index a56d6ba177a1..cf042a951c72 100644 --- a/src/mds/MDS.cc +++ b/src/mds/MDS.cc @@ -2361,6 +2361,19 @@ void MDS::handle_signal(int signum) mds_lock.Unlock(); } +void MDS::damaged() +{ + set_want_state(MDSMap::STATE_DAMAGED); + beacon.notify_health(this); // Include latest status in our swan song + beacon.send_and_wait(g_conf->mds_mon_shutdown_timeout); + + // It's okay if we timed out and the mon didn't get our beacon, because + // another daemon (or ourselves after respawn) will eventually take the + // rank and report DAMAGED again when it hits same problem we did. + + respawn(); // Respawn into standby in case mon has other work for us +} + void MDS::suicide() { assert(mds_lock.is_locked()); diff --git a/src/mds/MDS.h b/src/mds/MDS.h index dca10c702494..62adf193a292 100644 --- a/src/mds/MDS.h +++ b/src/mds/MDS.h @@ -443,6 +443,14 @@ private: void handle_mds_recovery(mds_rank_t who); void handle_mds_failure(mds_rank_t who); + /** + * Report state DAMAGED to the mon, and then pass on to respawn(). Call + * this when an unrecoverable error is encountered while attempting + * to load an MDS rank's data structures. This is *not* for use with + * errors affecting normal dirfrag/inode objects -- they should be handled + * through cleaner scrub/repair mechanisms. + */ + void damaged(); void suicide(); void respawn(); void handle_write_error(int err);