From: Sage Weil Date: Wed, 16 May 2018 21:56:33 +0000 (-0500) Subject: ceph_test_rados_api_misc: fix LibRadosMiscPool.PoolCreationRace X-Git-Tag: v14.0.0~77^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F22042%2Fhead;p=ceph.git ceph_test_rados_api_misc: fix LibRadosMiscPool.PoolCreationRace This test is trying to reproduce a case where we don't have a session for the given OSD, drop our big lock to open one, and when we retake the osdmap has changed. Previously that was an easy situation to reproduce by creating an ioctx for a pool that didn't exist (yet) and then creating it. The missing pool would send us into the path that rechecks for an updated osdmap and trigger the race condition. However, as of 7037cf8f77588aba1e3f251e04f696da0eccf462 we no longer create ioctxs for pools that don't exist. And there isn't an easy way that I can think of to trigger the missing osd session that races with an osdmap update. So, use the debug option that injects the delay to drop us into the same path. This is a bit artificial in that we do actually have an osdmap session, but the block handles that case (since the old no pool case worked that way), so we cover the same code. This fix ignores the fact that we still have all this "pool doesn't exist" handling code in Objecter that deals with teh fact that you could create an ioctx for a missing pool...that is no longer possible. Cleanup for another day! Fixes: http://tracker.ceph.com/issues/24150 Signed-off-by: Sage Weil --- diff --git a/src/osdc/Objecter.cc b/src/osdc/Objecter.cc index 6118197a281..7c4381f5cdc 100644 --- a/src/osdc/Objecter.cc +++ b/src/osdc/Objecter.cc @@ -2393,7 +2393,8 @@ void Objecter::_op_submit(Op *op, shunique_lock& sul, ceph_tid_t *ptid) // Try to get a session, including a retry if we need to take write lock int r = _get_session(op->target.osd, &s, sul); if (r == -EAGAIN || - (check_for_latest_map && sul.owns_lock_shared())) { + (check_for_latest_map && sul.owns_lock_shared()) || + cct->_conf->objecter_debug_inject_relock_delay) { epoch_t orig_epoch = osdmap->get_epoch(); sul.unlock(); if (cct->_conf->objecter_debug_inject_relock_delay) { diff --git a/src/test/librados/misc.cc b/src/test/librados/misc.cc index a07c344bf49..afa7a299b2f 100644 --- a/src/test/librados/misc.cc +++ b/src/test/librados/misc.cc @@ -99,27 +99,25 @@ TEST(LibRadosMiscPool, PoolCreationRace) { ASSERT_EQ(0, rados_conf_read_file(cluster_a, NULL)); // kludge: i want to --log-file foo and only get cluster b //ASSERT_EQ(0, rados_conf_parse_env(cluster_a, NULL)); + ASSERT_EQ(0, rados_conf_set(cluster_a, + "objecter_debug_inject_relock_delay", "true")); ASSERT_EQ(0, rados_connect(cluster_a)); ASSERT_EQ(0, rados_create(&cluster_b, NULL)); ASSERT_EQ(0, rados_conf_read_file(cluster_b, NULL)); ASSERT_EQ(0, rados_conf_parse_env(cluster_b, NULL)); - ASSERT_EQ(0, rados_conf_set(cluster_b, - "objecter_debug_inject_relock_delay", "true")); ASSERT_EQ(0, rados_connect(cluster_b)); char poolname[80]; snprintf(poolname, sizeof(poolname), "poolrace.%d", rand()); rados_pool_create(cluster_a, poolname); - rados_ioctx_t a, b; + rados_ioctx_t a; rados_ioctx_create(cluster_a, poolname, &a); int64_t poolid = rados_ioctx_get_id(a); - rados_ioctx_create2(cluster_b, poolid+1, &b); - char pool2name[80]; snprintf(pool2name, sizeof(pool2name), "poolrace2.%d", rand()); - rados_pool_create(cluster_a, pool2name); + rados_pool_create(cluster_b, pool2name); list cls; // this should normally trigger pretty easily, but we need to bound @@ -131,7 +129,7 @@ TEST(LibRadosMiscPool, PoolCreationRace) { rados_completion_t c; rados_aio_create_completion(0, 0, 0, &c); cls.push_back(c); - rados_aio_read(b, "PoolCreationRaceObj", c, buf, 100, 0); + rados_aio_read(a, "PoolCreationRaceObj", c, buf, 100, 0); cout << "started " << (void*)c << std::endl; if (rados_aio_is_complete(cls.front())) { break; @@ -151,7 +149,6 @@ TEST(LibRadosMiscPool, PoolCreationRace) { cout << "done." << std::endl; rados_ioctx_destroy(a); - rados_ioctx_destroy(b); rados_pool_delete(cluster_a, poolname); rados_pool_delete(cluster_a, pool2name); rados_shutdown(cluster_b);