From: Sharif Olorin Date: Wed, 12 Mar 2014 08:01:00 +0000 (+1100) Subject: Work around race condition in libnss X-Git-Tag: v0.79~157^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=44aaaaffd5224412d30435c58b399e21ccff19bd;p=ceph.git Work around race condition in libnss This change prevents a segfault in ceph::crypto::init when using NSS and calling rados_connect from multiple threads simultaneously on different rados_t objects (and updates the documentation for rados_connect to reflect the fix). It's pretty simple, just one static mutex wrapping the NSS definition of ceph::crypto::init. More details regarding the race condition are in this[0] commit (and pull request #1424). To reproduce the race condition in the existing codebase, the below[1] C program will work (depending on number of cores and probably other things, the number of threads needed to reliably reproduce varies, but the more the better - in my environment five is sufficient, with four cores. [0]: 377c919088423bafcae92759f8d578438717705c [1]: ```c void *init(void *p) { int err; rados_t cluster; err = rados_create(&cluster, NULL); if (err < 0) { fprintf(stderr, "cannot create cluster handle: %s\n", strerror(-err)); return NULL; } err = rados_conf_read_file(cluster, "./ceph.conf"); if (err < 0) { fprintf(stderr, "error reading config file: %s\n", strerror(-err)); return NULL; } rados_connect(cluster); return NULL; } int main() { pthread_t ts[NTHREAD]; int i; for (i = 0; i < NTHREAD; i++) { pthread_create(&ts[i], NULL, init, NULL); } for (i = 0; i < NTHREAD; i++) { int k; void *p = (void*)&k; pthread_join(ts[i], p); } return 0; } ``` Signed-off-by: Sharif Olorin --- diff --git a/src/common/ceph_crypto.cc b/src/common/ceph_crypto.cc index 96fa157c9f5a..b81ffdfe3235 100644 --- a/src/common/ceph_crypto.cc +++ b/src/common/ceph_crypto.cc @@ -37,14 +37,20 @@ ceph::crypto::HMACSHA1::~HMACSHA1() #elif USE_NSS +// Initialization of NSS requires a mutex due to a race condition in +// NSS_NoDB_Init. +static pthread_mutex_t crypto_init_mutex = PTHREAD_MUTEX_INITIALIZER; + void ceph::crypto::init(CephContext *cct) { SECStatus s; + pthread_mutex_lock(&crypto_init_mutex); if (cct->_conf->nss_db_path.empty()) { s = NSS_NoDB_Init(NULL); } else { s = NSS_Init(cct->_conf->nss_db_path.c_str()); } + pthread_mutex_unlock(&crypto_init_mutex); assert(s == SECSuccess); } diff --git a/src/include/rados/librados.h b/src/include/rados/librados.h index 7254e3ad0ba4..f0d45af34b86 100644 --- a/src/include/rados/librados.h +++ b/src/include/rados/librados.h @@ -355,9 +355,6 @@ int rados_ping_monitor(rados_t cluster, const char *mon_id, * * @post If this succeeds, any function in librados may be used * - * @warning This function is not thread-safe if using libnss (built with - * --with-nss); callers must implement their own locking mechanisms. - * * @param cluster The cluster to connect to. * @returns 0 on sucess, negative error code on failure */