From 44aaaaffd5224412d30435c58b399e21ccff19bd Mon Sep 17 00:00:00 2001 From: Sharif Olorin Date: Wed, 12 Mar 2014 19:01:00 +1100 Subject: [PATCH] Work around race condition in libnss This change prevents a segfault in ceph::crypto::init when using NSS and calling rados_connect from multiple threads simultaneously on different rados_t objects (and updates the documentation for rados_connect to reflect the fix). It's pretty simple, just one static mutex wrapping the NSS definition of ceph::crypto::init. More details regarding the race condition are in this[0] commit (and pull request #1424). To reproduce the race condition in the existing codebase, the below[1] C program will work (depending on number of cores and probably other things, the number of threads needed to reliably reproduce varies, but the more the better - in my environment five is sufficient, with four cores. [0]: 377c919088423bafcae92759f8d578438717705c [1]: ```c void *init(void *p) { int err; rados_t cluster; err = rados_create(&cluster, NULL); if (err < 0) { fprintf(stderr, "cannot create cluster handle: %s\n", strerror(-err)); return NULL; } err = rados_conf_read_file(cluster, "./ceph.conf"); if (err < 0) { fprintf(stderr, "error reading config file: %s\n", strerror(-err)); return NULL; } rados_connect(cluster); return NULL; } int main() { pthread_t ts[NTHREAD]; int i; for (i = 0; i < NTHREAD; i++) { pthread_create(&ts[i], NULL, init, NULL); } for (i = 0; i < NTHREAD; i++) { int k; void *p = (void*)&k; pthread_join(ts[i], p); } return 0; } ``` Signed-off-by: Sharif Olorin --- src/common/ceph_crypto.cc | 6 ++++++ src/include/rados/librados.h | 3 --- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/common/ceph_crypto.cc b/src/common/ceph_crypto.cc index 96fa157c9f5a7..b81ffdfe32359 100644 --- a/src/common/ceph_crypto.cc +++ b/src/common/ceph_crypto.cc @@ -37,14 +37,20 @@ ceph::crypto::HMACSHA1::~HMACSHA1() #elif USE_NSS +// Initialization of NSS requires a mutex due to a race condition in +// NSS_NoDB_Init. +static pthread_mutex_t crypto_init_mutex = PTHREAD_MUTEX_INITIALIZER; + void ceph::crypto::init(CephContext *cct) { SECStatus s; + pthread_mutex_lock(&crypto_init_mutex); if (cct->_conf->nss_db_path.empty()) { s = NSS_NoDB_Init(NULL); } else { s = NSS_Init(cct->_conf->nss_db_path.c_str()); } + pthread_mutex_unlock(&crypto_init_mutex); assert(s == SECSuccess); } diff --git a/src/include/rados/librados.h b/src/include/rados/librados.h index 7254e3ad0ba4e..f0d45af34b86a 100644 --- a/src/include/rados/librados.h +++ b/src/include/rados/librados.h @@ -355,9 +355,6 @@ int rados_ping_monitor(rados_t cluster, const char *mon_id, * * @post If this succeeds, any function in librados may be used * - * @warning This function is not thread-safe if using libnss (built with - * --with-nss); callers must implement their own locking mechanisms. - * * @param cluster The cluster to connect to. * @returns 0 on sucess, negative error code on failure */ -- 2.39.5