mds: add damaged() method

author John Spray <john.spray@redhat.com>

Mon, 16 Mar 2015 00:04:36 +0000 (00:04 +0000)

committer John Spray <john.spray@redhat.com>

Mon, 23 Mar 2015 10:55:08 +0000 (10:55 +0000)
author John Spray <john.spray@redhat.com>
Mon, 16 Mar 2015 00:04:36 +0000 (00:04 +0000)
committer John Spray <john.spray@redhat.com>
Mon, 23 Mar 2015 10:55:08 +0000 (10:55 +0000)
diff --git a/src/common/config_opts.h b/src/common/config_opts.h

index 9441a9ec946f22539e47dfdf1fc81c790f171ba2..43a3f20b0a16760e6bf0423fca4b2f425921d03f 100644 (file)
--- a/src/common/config_opts.h
+++ b/src/common/config_opts.h
@@ -449,6 +449,7 @@ OPTION(mds_snap_rstat, OPT_BOOL, false) // enable/disbale nested stat for snapsh
  OPTION(mds_verify_backtrace, OPT_U32, 1)
  
  OPTION(mds_action_on_write_error, OPT_U32, 1) // 0: ignore; 1: force readonly; 2: crash
+OPTION(mds_mon_shutdown_timeout, OPT_DOUBLE, 5)
  
  // If true, compact leveldb store on mount
  OPTION(osd_compact_leveldb_on_mount, OPT_BOOL, false)
diff --git a/src/mds/MDS.cc b/src/mds/MDS.cc

index a56d6ba177a1c78c64358c53059846415629ec2a..cf042a951c727b56972366b6e0097d36a5647935 100644 (file)
--- a/src/mds/MDS.cc
+++ b/src/mds/MDS.cc
@@ -2361,6 +2361,19 @@ void MDS::handle_signal(int signum)
    mds_lock.Unlock();
  }
  
+void MDS::damaged()
+{
+  set_want_state(MDSMap::STATE_DAMAGED);
+  beacon.notify_health(this);  // Include latest status in our swan song
+  beacon.send_and_wait(g_conf->mds_mon_shutdown_timeout);
+
+  // It's okay if we timed out and the mon didn't get our beacon, because
+  // another daemon (or ourselves after respawn) will eventually take the
+  // rank and report DAMAGED again when it hits same problem we did.
+
+  respawn();  // Respawn into standby in case mon has other work for us
+}
+
  void MDS::suicide()
  {
    assert(mds_lock.is_locked());
diff --git a/src/mds/MDS.h b/src/mds/MDS.h

index dca10c70249418785b4abd679d9350b0e554aaed..62adf193a292a25aad4c25f05acc0306ba293541 100644 (file)
--- a/src/mds/MDS.h
+++ b/src/mds/MDS.h
@@ -443,6 +443,14 @@ private:
    void handle_mds_recovery(mds_rank_t who);
    void handle_mds_failure(mds_rank_t who);
  
+  /**
+   * Report state DAMAGED to the mon, and then pass on to respawn().  Call
+   * this when an unrecoverable error is encountered while attempting
+   * to load an MDS rank's data structures.  This is *not* for use with
+   * errors affecting normal dirfrag/inode objects -- they should be handled
+   * through cleaner scrub/repair mechanisms.
+   */
+  void damaged();
    void suicide();
    void respawn();
    void handle_write_error(int err);
author	John Spray <john.spray@redhat.com>
	Mon, 16 Mar 2015 00:04:36 +0000 (00:04 +0000)
committer	John Spray <john.spray@redhat.com>
	Mon, 23 Mar 2015 10:55:08 +0000 (10:55 +0000)
src/common/config_opts.h		patch \| blob \| history
src/mds/MDS.cc		patch \| blob \| history
src/mds/MDS.h		patch \| blob \| history