]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mds: respawn when blacklisted
authorSage Weil <sage.weil@dreamhost.com>
Sat, 3 Mar 2012 22:28:21 +0000 (14:28 -0800)
committerSage Weil <sage@newdream.net>
Tue, 6 Mar 2012 19:02:17 +0000 (11:02 -0800)
If we are blacklisted by the OSD cluster, it's because we were too slow
and were replaced by another ceph-mds.  Respawn and re-register as a
standby.

If we get some other write error, shut down.

Fixes: #1796
Signed-off-by: Sage Weil <sage.weil@dreamhost.com>
src/mds/MDLog.cc
src/mds/MDLog.h

index de1619e1ac45c4162899f76d0ee76fd2e0fff9a3..a5c1b096578d278902989c6fe13a86316daa9a84 100644 (file)
@@ -84,6 +84,18 @@ void MDLog::init_journaler()
                            logger, l_mdl_jlat,
                            &mds->timer);
   assert(journaler->is_readonly());
+  journaler->set_write_error_handler(new C_MDL_WriteError(this));
+}
+
+void MDLog::handle_journaler_write_error(int r)
+{
+  if (r == -EBLACKLISTED) {
+    derr << "we have been blacklisted (fenced), respawning..." << dendl;
+    mds->respawn();
+  } else {
+    derr << "unhandled error " << cpp_strerror(r) << ", shutting down..." << dendl;
+    mds->suicide();
+  }
 }
 
 void MDLog::write_head(Context *c) 
index 71062411b2bcec97be0e6d1cf9bbc1b9adfb2189..2e049e8aed407956dc220f048b449156cd03db50 100644 (file)
@@ -127,7 +127,16 @@ public:
 
 private:
   void init_journaler();
-  
+
+  struct C_MDL_WriteError : public Context {
+    MDLog *mdlog;
+    C_MDL_WriteError(MDLog *m) : mdlog(m) {}
+    void finish(int r) {
+      mdlog->handle_journaler_write_error(r);
+    }
+  };
+  void handle_journaler_write_error(int r);
 public:
   void create_logger();