]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
tools: fix race condition in seq/rand bench 6791/head
authorPiotr Dałek <piotr.dalek@ts.fujitsu.com>
Mon, 6 Jul 2015 07:56:11 +0000 (09:56 +0200)
committerPiotr Dałek <piotr.dalek@ts.fujitsu.com>
Thu, 3 Dec 2015 13:18:18 +0000 (14:18 +0100)
Under certain conditions (like bench seq/rand -b 1024 -t 128) it is
possible that aio_read reads data into destination buffers before or
during memcmp execution, resulting in "[..] is not correct!" errors
even if actual objects are perfectly fine.
Also, moved latencty calculation around, so it is no longer affeted
by memcmp.

Signed-off-by: Piotr Dałek <piotr.dalek@ts.fujitsu.com>
Conflicts:
src/common/obj_bencher.cc

src/common/obj_bencher.cc

index 2ed0c52ff8e787f08455b1061b60786521d9095b..db4fd8f8c73a86ea23817e3faa770d76a7c23805 100644 (file)
@@ -576,31 +576,43 @@ int ObjBencher::seq_read_bench(int seconds_to_run, int num_objects, int concurre
       }
       lc.cond.Wait(lock);
     }
-    lock.Unlock();
-    newName = generate_object_name(data.started, pid);
+
+    // calculate latency here, so memcmp doesn't inflate it
+    data.cur_latency = ceph_clock_now(cct) - start_times[slot];
+
+    cur_contents = contents[slot];
     int current_index = index[slot];
+    
+    // invalidate internal crc cache
+    cur_contents->invalidate_crc();
+  
+    if (!no_verify) {
+      snprintf(data.object_contents, data.object_size, "I'm the %16dth object!", current_index);
+      if (memcmp(data.object_contents, cur_contents->c_str(), data.object_size) != 0) {
+        cerr << name[slot] << " is not correct!" << std::endl;
+        ++errors;
+      }
+    }
+
+    newName = generate_object_name(data.started, pid);
     index[slot] = data.started;
+    lock.Unlock();
     completion_wait(slot);
-    lock.Lock();
     r = completion_ret(slot);
     if (r < 0) {
       cerr << "read got " << r << std::endl;
       lock.Unlock();
       goto ERR;
     }
-    data.cur_latency = ceph_clock_now(cct) - start_times[slot];
+    lock.Lock();
     total_latency += data.cur_latency;
-    ifdata.cur_latency > data.max_latency) data.max_latency = data.cur_latency;
+    if (data.cur_latency > data.max_latency) data.max_latency = data.cur_latency;
     if (data.cur_latency < data.min_latency) data.min_latency = data.cur_latency;
     ++data.finished;
     data.avg_latency = total_latency / data.finished;
     --data.in_flight;
     lock.Unlock();
     release_completion(slot);
-    cur_contents = contents[slot];
-
-    // invalidate internal crc cache
-    cur_contents->invalidate_crc();
 
     //start new read and check data if requested
     start_times[slot] = ceph_clock_now(cct);
@@ -612,13 +624,10 @@ int ObjBencher::seq_read_bench(int seconds_to_run, int num_objects, int concurre
     lock.Lock();
     ++data.started;
     ++data.in_flight;
-    if (!no_verify) {
-      snprintf(data.object_contents, data.object_size, "I'm the %16dth object!", current_index);
       lock.Unlock();
-      if (memcmp(data.object_contents, cur_contents->c_str(), data.object_size) != 0) {
-        cerr << name[slot] << " is not correct!" << std::endl;
-        ++errors;
-      }
+    if (memcmp(data.object_contents, cur_contents->c_str(), data.object_size) != 0) {
+      cerr << name[slot] << " is not correct!" << std::endl;
+      ++errors;
     } else {
       lock.Unlock();
     }
@@ -776,11 +785,14 @@ int ObjBencher::rand_read_bench(int seconds_to_run, int num_objects, int concurr
       }
       lc.cond.Wait(lock);
     }
+
+    // calculate latency here, so memcmp doesn't inflate it
+    data.cur_latency = ceph_clock_now(cct) - start_times[slot];
+
     lock.Unlock();
-    rand_id = rand() % num_objects;
-    newName = generate_object_name(rand_id, pid);
+
     int current_index = index[slot];
-    index[slot] = rand_id;
+    cur_contents = contents[slot];
     completion_wait(slot);
     lock.Lock();
     r = completion_ret(slot);
@@ -789,16 +801,27 @@ int ObjBencher::rand_read_bench(int seconds_to_run, int num_objects, int concurr
       lock.Unlock();
       goto ERR;
     }
-    data.cur_latency = ceph_clock_now(g_ceph_context) - start_times[slot];
+
     total_latency += data.cur_latency;
-    ifdata.cur_latency > data.max_latency) data.max_latency = data.cur_latency;
+    if (data.cur_latency > data.max_latency) data.max_latency = data.cur_latency;
     if (data.cur_latency < data.min_latency) data.min_latency = data.cur_latency;
     ++data.finished;
     data.avg_latency = total_latency / data.finished;
     --data.in_flight;
     lock.Unlock();
+    
+    if (!no_verify) {
+      snprintf(data.object_contents, data.object_size, "I'm the %16dth object!", current_index);
+      if (memcmp(data.object_contents, cur_contents->c_str(), data.object_size) != 0) {
+        cerr << name[slot] << " is not correct!" << std::endl;
+        ++errors;
+      }
+    } 
+
+    rand_id = rand() % num_objects;
+    newName = generate_object_name(rand_id, pid);
+    index[slot] = rand_id;
     release_completion(slot);
-    cur_contents = contents[slot];
 
     // invalidate internal crc cache
     cur_contents->invalidate_crc();
@@ -813,19 +836,11 @@ int ObjBencher::rand_read_bench(int seconds_to_run, int num_objects, int concurr
     lock.Lock();
     ++data.started;
     ++data.in_flight;
-    if (!no_verify) {
-      snprintf(data.object_contents, data.object_size, "I'm the %16dth object!", current_index);
-      lock.Unlock();
-      if (memcmp(data.object_contents, cur_contents->c_str(), data.object_size) != 0) {
-        cerr << name[slot] << " is not correct!" << std::endl;
-        ++errors;
-      }
-    } else {
-        lock.Unlock();
-    }
+    lock.Unlock();
     name[slot] = newName;
   }
 
+
   //wait for final reads to complete
   while (data.finished < data.started) {
     slot = data.finished % concurrentios;