]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/bluestore: Actually wait until completion in write_sync 26909/head
authorVitaliy Filippov <vitalif@yourcmc.ru>
Wed, 6 Mar 2019 23:01:18 +0000 (02:01 +0300)
committerVitaliy Filippov <vitalif@yourcmc.ru>
Tue, 12 Mar 2019 12:02:27 +0000 (15:02 +0300)
This function is only used by RocksDB WAL writing so it must sync data.

This fixes #18338 and thus allows to actually set `bluefs_preextend_wal_files`
to true, gaining +100% single-thread write iops in disk-bound (HDD or bad SSD) setups.
To my knowledge it doesn't hurt performance in other cases.
Test it yourself on any HDD with `fio -ioengine=rbd -direct=1 -bs=4k -iodepth=1`.

Issue #18338 is easily reproduced without this patch by issuing a `kill -9` to the OSD
while doing `fio -ioengine=rbd -direct=1 -bs=4M -iodepth=16`.

Fixes: https://tracker.ceph.com/issues/18338 https://tracker.ceph.com/issues/38559
Signed-off-by: Vitaliy Filippov <vitalif@yourcmc.ru>
src/os/bluestore/KernelDevice.cc

index 5812d84b1774121143adb22eb61a5cfb174f9a93..fb807abffc31ed597a7827a69d71faa2ccbfba76 100644 (file)
@@ -762,8 +762,8 @@ int KernelDevice::_sync_write(uint64_t off, bufferlist &bl, bool buffered, int w
   }
 #ifdef HAVE_SYNC_FILE_RANGE
   if (buffered) {
-    // initiate IO (but do not wait)
-    r = ::sync_file_range(fd_buffereds[WRITE_LIFE_NOT_SET], off, len, SYNC_FILE_RANGE_WRITE);
+    // initiate IO and wait till it completes
+    r = ::sync_file_range(fd_buffereds[WRITE_LIFE_NOT_SET], off, len, SYNC_FILE_RANGE_WRITE|SYNC_FILE_RANGE_WAIT_AFTER|SYNC_FILE_RANGE_WAIT_BEFORE);
     if (r < 0) {
       r = -errno;
       derr << __func__ << " sync_file_range error: " << cpp_strerror(r) << dendl;