From: Piotr Dałek Date: Mon, 6 Jul 2015 07:56:11 +0000 (+0200) Subject: tools: fix race condition in seq/rand bench X-Git-Tag: v0.94.6~11^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=c2c6d02591519dfd15ddcb397ac440322a964deb;p=ceph.git tools: fix race condition in seq/rand bench Under certain conditions (like bench seq/rand -b 1024 -t 128) it is possible that aio_read reads data into destination buffers before or during memcmp execution, resulting in "[..] is not correct!" errors even if actual objects are perfectly fine. Also, moved latencty calculation around, so it is no longer affeted by memcmp. Signed-off-by: Piotr Dałek Conflicts: src/common/obj_bencher.cc --- diff --git a/src/common/obj_bencher.cc b/src/common/obj_bencher.cc index 2ed0c52ff8e78..db4fd8f8c73a8 100644 --- a/src/common/obj_bencher.cc +++ b/src/common/obj_bencher.cc @@ -576,31 +576,43 @@ int ObjBencher::seq_read_bench(int seconds_to_run, int num_objects, int concurre } lc.cond.Wait(lock); } - lock.Unlock(); - newName = generate_object_name(data.started, pid); + + // calculate latency here, so memcmp doesn't inflate it + data.cur_latency = ceph_clock_now(cct) - start_times[slot]; + + cur_contents = contents[slot]; int current_index = index[slot]; + + // invalidate internal crc cache + cur_contents->invalidate_crc(); + + if (!no_verify) { + snprintf(data.object_contents, data.object_size, "I'm the %16dth object!", current_index); + if (memcmp(data.object_contents, cur_contents->c_str(), data.object_size) != 0) { + cerr << name[slot] << " is not correct!" << std::endl; + ++errors; + } + } + + newName = generate_object_name(data.started, pid); index[slot] = data.started; + lock.Unlock(); completion_wait(slot); - lock.Lock(); r = completion_ret(slot); if (r < 0) { cerr << "read got " << r << std::endl; lock.Unlock(); goto ERR; } - data.cur_latency = ceph_clock_now(cct) - start_times[slot]; + lock.Lock(); total_latency += data.cur_latency; - if( data.cur_latency > data.max_latency) data.max_latency = data.cur_latency; + if (data.cur_latency > data.max_latency) data.max_latency = data.cur_latency; if (data.cur_latency < data.min_latency) data.min_latency = data.cur_latency; ++data.finished; data.avg_latency = total_latency / data.finished; --data.in_flight; lock.Unlock(); release_completion(slot); - cur_contents = contents[slot]; - - // invalidate internal crc cache - cur_contents->invalidate_crc(); //start new read and check data if requested start_times[slot] = ceph_clock_now(cct); @@ -612,13 +624,10 @@ int ObjBencher::seq_read_bench(int seconds_to_run, int num_objects, int concurre lock.Lock(); ++data.started; ++data.in_flight; - if (!no_verify) { - snprintf(data.object_contents, data.object_size, "I'm the %16dth object!", current_index); lock.Unlock(); - if (memcmp(data.object_contents, cur_contents->c_str(), data.object_size) != 0) { - cerr << name[slot] << " is not correct!" << std::endl; - ++errors; - } + if (memcmp(data.object_contents, cur_contents->c_str(), data.object_size) != 0) { + cerr << name[slot] << " is not correct!" << std::endl; + ++errors; } else { lock.Unlock(); } @@ -776,11 +785,14 @@ int ObjBencher::rand_read_bench(int seconds_to_run, int num_objects, int concurr } lc.cond.Wait(lock); } + + // calculate latency here, so memcmp doesn't inflate it + data.cur_latency = ceph_clock_now(cct) - start_times[slot]; + lock.Unlock(); - rand_id = rand() % num_objects; - newName = generate_object_name(rand_id, pid); + int current_index = index[slot]; - index[slot] = rand_id; + cur_contents = contents[slot]; completion_wait(slot); lock.Lock(); r = completion_ret(slot); @@ -789,16 +801,27 @@ int ObjBencher::rand_read_bench(int seconds_to_run, int num_objects, int concurr lock.Unlock(); goto ERR; } - data.cur_latency = ceph_clock_now(g_ceph_context) - start_times[slot]; + total_latency += data.cur_latency; - if( data.cur_latency > data.max_latency) data.max_latency = data.cur_latency; + if (data.cur_latency > data.max_latency) data.max_latency = data.cur_latency; if (data.cur_latency < data.min_latency) data.min_latency = data.cur_latency; ++data.finished; data.avg_latency = total_latency / data.finished; --data.in_flight; lock.Unlock(); + + if (!no_verify) { + snprintf(data.object_contents, data.object_size, "I'm the %16dth object!", current_index); + if (memcmp(data.object_contents, cur_contents->c_str(), data.object_size) != 0) { + cerr << name[slot] << " is not correct!" << std::endl; + ++errors; + } + } + + rand_id = rand() % num_objects; + newName = generate_object_name(rand_id, pid); + index[slot] = rand_id; release_completion(slot); - cur_contents = contents[slot]; // invalidate internal crc cache cur_contents->invalidate_crc(); @@ -813,19 +836,11 @@ int ObjBencher::rand_read_bench(int seconds_to_run, int num_objects, int concurr lock.Lock(); ++data.started; ++data.in_flight; - if (!no_verify) { - snprintf(data.object_contents, data.object_size, "I'm the %16dth object!", current_index); - lock.Unlock(); - if (memcmp(data.object_contents, cur_contents->c_str(), data.object_size) != 0) { - cerr << name[slot] << " is not correct!" << std::endl; - ++errors; - } - } else { - lock.Unlock(); - } + lock.Unlock(); name[slot] = newName; } + //wait for final reads to complete while (data.finished < data.started) { slot = data.finished % concurrentios;