From: Sage Weil Date: Fri, 3 May 2013 18:29:24 +0000 (-0700) Subject: mon: fork early to avoid leveldb static env state X-Git-Tag: v0.61~10 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=2e0dd5ae6c8751e33d456b2b06c1204b63db959a;p=ceph.git mon: fork early to avoid leveldb static env state leveldb has static state that prevents it from recreating its worker thread after our fork(), even when we close and reopen the database (tsk tsk!). Avoid this by forking early, before we touch leveldb. Hide the details in a Preforker class. This is modeled after what ceph-fuse already does; we should convert it later. Signed-off-by: Sage Weil Reviewed-by: Samuel Just Reviewed-by: Greg Farnum --- diff --git a/src/Makefile.am b/src/Makefile.am index 13ea671fbfae..cb8dbb810c2d 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1586,6 +1586,7 @@ noinst_HEADERS = \ common/HeartbeatMap.h\ common/LogClient.h\ common/LogEntry.h\ + common/Preforker.h\ common/WorkQueue.h\ common/PrioritizedQueue.h\ common/ceph_argparse.h\ diff --git a/src/ceph_mon.cc b/src/ceph_mon.cc index 69bcf6d3282e..0189a319b5df 100644 --- a/src/ceph_mon.cc +++ b/src/ceph_mon.cc @@ -36,6 +36,7 @@ using namespace std; #include "common/pick_address.h" #include "common/Timer.h" #include "common/errno.h" +#include "common/Preforker.h" #include "global/global_init.h" #include "global/signal_handler.h" @@ -272,6 +273,20 @@ int main(int argc, const char **argv) return 0; } + // we fork early to prevent leveldb's environment static state from + // screwing us over + Preforker prefork; + if (g_conf->daemonize) { + global_init_prefork(g_ceph_context, 0); + prefork.prefork(); + if (prefork.is_parent()) { + return prefork.parent_wait(); + } + global_init_postfork(g_ceph_context, 0); + common_init_finish(g_ceph_context); + global_init_chdir(g_ceph_context); + } + { Monitor::StoreConverter converter(g_conf->mon_data); int ret = converter.needs_conversion(); @@ -280,7 +295,7 @@ int main(int argc, const char **argv) } else if (ret < 0) { derr << "found errors while attempting to convert the monitor store: " << cpp_strerror(ret) << dendl; - exit(1); + prefork.exit(1); } } @@ -289,7 +304,7 @@ int main(int argc, const char **argv) if (err < 0) { cerr << argv[0] << ": error opening mon data store at '" << g_conf->mon_data << "': " << cpp_strerror(err) << std::endl; - exit(1); + prefork.exit(1); } assert(err == 0); @@ -297,18 +312,18 @@ int main(int argc, const char **argv) err = store.get(Monitor::MONITOR_NAME, "magic", magicbl); if (!magicbl.length()) { cerr << "unable to read magic from mon data.. did you run mkcephfs?" << std::endl; - exit(1); + prefork.exit(1); } string magic(magicbl.c_str(), magicbl.length()-1); // ignore trailing \n if (strcmp(magic.c_str(), CEPH_MON_ONDISK_MAGIC)) { cerr << "mon fs magic '" << magic << "' != current '" << CEPH_MON_ONDISK_MAGIC << "'" << std::endl; - exit(1); + prefork.exit(1); } err = Monitor::check_features(&store); if (err < 0) { cerr << "error checking features: " << cpp_strerror(err) << std::endl; - exit(1); + prefork.exit(1); } // inject new monmap? @@ -319,7 +334,7 @@ int main(int argc, const char **argv) if (r) { cerr << "unable to read monmap from " << inject_monmap << ": " << error << std::endl; - exit(1); + prefork.exit(1); } // get next version @@ -348,7 +363,7 @@ int main(int argc, const char **argv) store.apply_transaction(t); cout << "done." << std::endl; - exit(0); + prefork.exit(0); } // monmap? @@ -403,14 +418,14 @@ int main(int argc, const char **argv) if (err < 0) { cerr << argv[0] << ": error generating initial monmap: " << cpp_strerror(err) << std::endl; usage(); - exit(1); + prefork.exit(1); } if (tmpmap.contains(g_conf->name.get_id())) { ipaddr = tmpmap.get_addr(g_conf->name.get_id()); } else { derr << "no public_addr or public_network specified, and " << g_conf->name << " not present in monmap or ceph.conf" << dendl; - exit(1); + prefork.exit(1); } } } @@ -467,7 +482,7 @@ int main(int argc, const char **argv) err = messenger->bind(ipaddr); if (err < 0) - return 1; + prefork.exit(1); // start monitor mon = new Monitor(g_ceph_context, g_conf->name.get_id(), &store, @@ -475,7 +490,7 @@ int main(int argc, const char **argv) err = mon->preinit(); if (err < 0) - return 1; + prefork.exit(1); if (compact || g_conf->mon_compact_on_start) { derr << "compacting monitor store ..." << dendl; @@ -483,9 +498,8 @@ int main(int argc, const char **argv) derr << "done compacting" << dendl; } - global_init_daemonize(g_ceph_context, 0); - common_init_finish(g_ceph_context); - global_init_chdir(g_ceph_context); + if (g_conf->daemonize) + prefork.daemonize(); // set up signal handlers, now that we've daemonized/forked. init_async_signal_handler(); @@ -518,6 +532,6 @@ int main(int argc, const char **argv) dout(0) << "ceph-mon: gmon.out should be in " << s << dendl; } - return 0; + prefork.exit(0); } diff --git a/src/common/Preforker.h b/src/common/Preforker.h new file mode 100644 index 000000000000..07f21b45e460 --- /dev/null +++ b/src/common/Preforker.h @@ -0,0 +1,94 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +#ifndef CEPH_COMMON_PREFORKER_H +#define CEPH_COMMON_PREFORKER_H + +#include +#include +#include +#include +#include "common/safe_io.h" + +/** + * pre-fork fork/daemonize helper class + * + * Hide the details of letting a process fork early, do a bunch of + * initialization work that may spam stdout or exit with an error, and + * then daemonize. The exit() method will either exit directly (if we + * haven't forked) or pass a message to the parent with the error if + * we have. + */ +class Preforker { + pid_t childpid; + bool forked; + int fd[2]; // parent's, child's + +public: + Preforker() + : childpid(0), + forked(false) + {} + + void prefork() { + assert(!forked); + int r = socketpair(AF_UNIX, SOCK_STREAM, 0, fd); + if (r < 0) { + cerr << "[" << getpid() << "]: unable to create socketpair: " << cpp_strerror(errno) << std::endl; + exit(errno); + } + + forked = true; + + childpid = fork(); + if (childpid == 0) { + ::close(fd[0]); + } else { + ::close(fd[1]); + } + } + + bool is_child() { + return childpid == 0; + } + + bool is_parent() { + return childpid != 0; + } + + int parent_wait() { + assert(forked); + + int r = -1; + int err = safe_read_exact(fd[0], &r, sizeof(r)); + if (err == 0 && r == -1) { + // daemonize + ::close(0); + ::close(1); + ::close(2); + r = 0; + } else if (err) { + cerr << "[" << getpid() << "]: " << cpp_strerror(-err) << std::endl; + } else { + // wait for child to exit + waitpid(childpid, NULL, 0); + } + return r; + } + + void exit(int r) { + if (forked) { + // tell parent + ::write(fd[1], &r, sizeof(r)); + } + ::exit(r); + } + + void daemonize() { + assert(forked); + static int r = -1; + ::write(fd[1], &r, sizeof(r)); + } + +}; + +#endif diff --git a/src/global/global_init.cc b/src/global/global_init.cc index 7cd9f27b64f0..91733f5968b0 100644 --- a/src/global/global_init.cc +++ b/src/global/global_init.cc @@ -143,17 +143,24 @@ static void pidfile_remove_void(void) pidfile_remove(); } -void global_init_daemonize(CephContext *cct, int flags) +int global_init_prefork(CephContext *cct, int flags) { if (g_code_env != CODE_ENVIRONMENT_DAEMON) - return; + return -1; const md_config_t *conf = cct->_conf; if (!conf->daemonize) - return; + return -1; // stop log thread g_ceph_context->_log->flush(); g_ceph_context->_log->stop(); + return 0; +} + +void global_init_daemonize(CephContext *cct, int flags) +{ + if (global_init_prefork(cct, flags) < 0) + return; int ret = daemon(1, 1); if (ret) { @@ -163,6 +170,13 @@ void global_init_daemonize(CephContext *cct, int flags) exit(1); } + global_init_postfork(cct, flags); +} + +void global_init_postfork(CephContext *cct, int flags) +{ + int ret; + // restart log thread g_ceph_context->_log->start(); diff --git a/src/global/global_init.h b/src/global/global_init.h index cd8432bb7919..d2ba6ef9dedb 100644 --- a/src/global/global_init.h +++ b/src/global/global_init.h @@ -33,10 +33,25 @@ class CephContext; void global_init(std::vector < const char * > *alt_def_args, std::vector < const char* >& args, uint32_t module_type, code_environment_t code_env, int flags); +/* + * perform all of the steps that global_init_daemonize performs just prior + * to actually forking (via daemon(3)). return 0 if we are going to proceed + * with the fork, or -1 otherwise. + */ +int global_init_prefork(CephContext *cct, int flags); + +/* + * perform all of the steps that global_init_daemonize performs just after + * the fork. + */ +void global_init_postfork(CephContext *cct, int flags); + /* * global_init_daemonize handles daemonizing a process. * - * If this is called, it *must* be called before common_init_finish + * If this is called, it *must* be called before common_init_finish. + * Note that this is equivalent to calling _prefork(), daemon(), and + * _postfork. */ void global_init_daemonize(CephContext *cct, int flags);