]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osdc/Objecter: fix relock race 12234/head
authorSage Weil <sage@redhat.com>
Wed, 30 Nov 2016 03:55:11 +0000 (22:55 -0500)
committerSage Weil <sage@redhat.com>
Wed, 30 Nov 2016 03:55:11 +0000 (22:55 -0500)
In commit a863ae1c0fab636eabced0979889cbb3be74bf74 we tried
to fix a race but failed because we didn't update the
session pointer.  Add a proper test for this case with
a delay injection and fix the bug.

Fixes: http://tracker.ceph.com/issues/17942
Signed-off-by: Sage Weil <sage@redhat.com>
src/common/config_opts.h
src/osdc/Objecter.cc
src/test/librados/misc.cc

index 108ffd2ed502443ba5b707330057802111772cb5..52b85e6d4b546ce38c936f69e96596e6b5c2eec4 100644 (file)
@@ -461,6 +461,7 @@ OPTION(objecter_inflight_ops, OPT_U64, 1024)               // max in-flight ios
 OPTION(objecter_completion_locks_per_session, OPT_U64, 32) // num of completion locks per each session, for serializing same object responses
 OPTION(objecter_inject_no_watch_ping, OPT_BOOL, false)   // suppress watch pings
 OPTION(objecter_retry_writes_after_first_reply, OPT_BOOL, false)   // ignore the first reply for each write, and resend the osd op instead
+OPTION(objecter_debug_inject_relock_delay, OPT_BOOL, false)
 
 // Max number of deletes at once in a single Filer::purge call
 OPTION(filer_max_purge_ops, OPT_U32, 10)
index b8d43fefcce32e2bb5078a3a83270aff126070ac..410c6646995af03054039449faa7e27696b178bf 100644 (file)
@@ -2305,11 +2305,21 @@ void Objecter::_op_submit(Op *op, shunique_lock& sul, ceph_tid_t *ptid)
       (check_for_latest_map && sul.owns_lock_shared())) {
     epoch_t orig_epoch = osdmap->get_epoch();
     sul.unlock();
+    if (cct->_conf->objecter_debug_inject_relock_delay) {
+      sleep(1);
+    }
     sul.lock();
     if (orig_epoch != osdmap->get_epoch()) {
       // map changed; recalculate mapping
+      ldout(cct, 10) << __func__ << " relock raced with osdmap, recalc target"
+                    << dendl;
       check_for_latest_map = _calc_target(&op->target, &op->last_force_resend)
        == RECALC_OP_TARGET_POOL_DNE;
+      if (s) {
+       put_session(s);
+       s = NULL;
+       r = -EAGAIN;
+      }
     }
   }
   if (r == -EAGAIN) {
index d3ae2719a7b7687018bc0c8a2ae49a32d1957a08..7086e8b2899597ca824804a07efec1e8b5d9d5d5 100644 (file)
@@ -73,6 +73,66 @@ TEST(LibRadosMiscConnectFailure, ConnectFailure) {
   rados_shutdown(cluster);
 }
 
+TEST(LibRadosMiscPool, PoolCreationRace) {
+  rados_t cluster_a, cluster_b;
+
+  char *id = getenv("CEPH_CLIENT_ID");
+  if (id)
+    std::cerr << "Client id is: " << id << std::endl;
+
+  ASSERT_EQ(0, rados_create(&cluster_a, NULL));
+  ASSERT_EQ(0, rados_conf_read_file(cluster_a, NULL));
+  // kludge: i want to --log-file foo and only get cluster b
+  //ASSERT_EQ(0, rados_conf_parse_env(cluster_a, NULL));
+  ASSERT_EQ(0, rados_connect(cluster_a));
+
+  ASSERT_EQ(0, rados_create(&cluster_b, NULL));
+  ASSERT_EQ(0, rados_conf_read_file(cluster_b, NULL));
+  ASSERT_EQ(0, rados_conf_parse_env(cluster_b, NULL));
+  ASSERT_EQ(0, rados_conf_set(cluster_b,
+                             "objecter_debug_inject_relock_delay", "true"));
+  ASSERT_EQ(0, rados_connect(cluster_b));
+
+  char poolname[80];
+  snprintf(poolname, sizeof(poolname), "poolrace.%d", rand());
+  rados_pool_create(cluster_a, poolname);
+  rados_ioctx_t a, b;
+  rados_ioctx_create(cluster_a, poolname, &a);
+  int64_t poolid = rados_ioctx_get_id(a);
+
+  rados_ioctx_create2(cluster_b, poolid+1, &b);
+
+  char pool2name[80];
+  snprintf(pool2name, sizeof(pool2name), "poolrace2.%d", rand());
+  rados_pool_create(cluster_a, pool2name);
+
+  list<rados_completion_t> cls;
+  while (true) {
+    rados_completion_t c;
+    rados_aio_create_completion(0, 0, 0, &c);
+    cls.push_back(c);
+    rados_aio_write(b, "foo", c, "data", 4, 0);
+    cout << "started " << (void*)c << std::endl;
+    if (rados_aio_is_complete(cls.front())) {
+      break;
+    }
+  }
+  cout << " started " << cls.size() << " aios" << std::endl;
+  for (auto c : cls) {
+    cout << "waiting " << (void*)c << std::endl;
+    rados_aio_wait_for_complete_and_cb(c);
+    rados_aio_release(c);
+  }
+  cout << "done." << std::endl;
+
+  rados_ioctx_destroy(a);
+  rados_ioctx_destroy(b);
+  rados_pool_delete(cluster_a, poolname);
+  rados_pool_delete(cluster_a, pool2name);
+  rados_shutdown(cluster_b);
+  rados_shutdown(cluster_a);
+}
+
 TEST_F(LibRadosMisc, ClusterFSID) {
   char fsid[37];
   ASSERT_EQ(-ERANGE, rados_cluster_fsid(cluster, fsid, sizeof(fsid) - 1));