]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
os/bluestore: do read io via aio in parallel
authorSage Weil <sage@redhat.com>
Mon, 23 Jan 2017 15:16:22 +0000 (10:16 -0500)
committerSage Weil <sage@redhat.com>
Fri, 27 Jan 2017 15:27:03 +0000 (10:27 -0500)
Dispatch all blob reads in parallel via aio.

Signed-off-by: Sage Weil <sage@redhat.com>
src/os/bluestore/BlueStore.cc

index 2339948fc4ed2f5a1c3ab1b0cbde60fd17f2d239..956a3813edd6a77063e8a9892d002b1012b90092 100644 (file)
@@ -5171,6 +5171,11 @@ struct region_t {
   uint64_t logical_offset;
   uint64_t blob_xoffset;   //region offset within the blob
   uint64_t length;
+  bufferlist bl;
+
+  // used later in read process
+  uint64_t front = 0;
+  uint64_t r_off = 0;
 
   region_t(uint64_t offset, uint64_t b_offs, uint64_t len)
     : logical_offset(offset),
@@ -5258,7 +5263,8 @@ int BlueStore::_do_read(
 
     ready_regions_t cache_res;
     interval_set<uint32_t> cache_interval;
-    bptr->shared_blob->bc.read(bptr->shared_blob->get_cache(), b_off, b_len, cache_res, cache_interval);
+    bptr->shared_blob->bc.read(
+      bptr->shared_blob->get_cache(), b_off, b_len, cache_res, cache_interval);
     dout(20) << __func__ << "  blob " << *bptr << std::hex
             << " need 0x" << b_off << "~" << b_len
             << " cache has 0x" << cache_interval
@@ -5292,52 +5298,40 @@ int BlueStore::_do_read(
     ++lp;
   }
 
-  // enumerate and read/decompress desired blobs
-  blobs2read_t::iterator b2r_it = blobs2read.begin();
-  while (b2r_it != blobs2read.end()) {
-    BlobRef bptr = b2r_it->first;
+  // read raw blob data
+  vector<bufferlist> compressed_blob_bls;
+  IOContext ioc(cct, NULL);
+  for (auto& p : blobs2read) {
+    BlobRef bptr = p.first;
     dout(20) << __func__ << "  blob " << *bptr << std::hex
-            << " need 0x" << b2r_it->second << std::dec << dendl;
-    if (bptr->get_blob().has_flag(bluestore_blob_t::FLAG_COMPRESSED)) {
-      bufferlist compressed_bl, raw_bl;
-      IOContext ioc(cct, NULL);   // FIXME?
+            << " need 0x" << p.second << std::dec << dendl;
+    if (bptr->get_blob().is_compressed()) {
+      // read the whole thing
+      if (compressed_blob_bls.empty()) {
+       // ensure we avoid any reallocation on subsequent blobs
+       compressed_blob_bls.reserve(blobs2read.size());
+      }
+      compressed_blob_bls.push_back(bufferlist());
+      bufferlist& bl = compressed_blob_bls.back();
       r = bptr->get_blob().map(
        0, bptr->get_blob().get_ondisk_length(),
        [&](uint64_t offset, uint64_t length) {
-         bufferlist t;
-         int r = bdev->read(offset, length, &t, &ioc, false);
+         int r = bdev->aio_read(offset, length, &bl, &ioc);
          if (r < 0)
             return r;
-         compressed_bl.claim_append(t);
           return 0;
        });
-      if (r < 0)
-        return r;
-
-      if (_verify_csum(o, &bptr->get_blob(), 0, compressed_bl,
-                      b2r_it->second.front().logical_offset) < 0) {
-       return -EIO;
-      }
-      r = _decompress(compressed_bl, &raw_bl);
-      if (r < 0)
-       return r;
-      if (buffered) {
-       bptr->shared_blob->bc.did_read(bptr->shared_blob->get_cache(), 0, raw_bl);
-      }
-      for (auto& i : b2r_it->second) {
-       ready_regions[i.logical_offset].substr_of(
-         raw_bl, i.blob_xoffset, i.length);
-      }
     } else {
-      for (auto reg : b2r_it->second) {
+      // read the pieces
+      for (auto& reg : p.second) {
        // determine how much of the blob to read
        uint64_t chunk_size = bptr->get_blob().get_chunk_size(block_size);
-       uint64_t r_off = reg.blob_xoffset;
+       reg.r_off = reg.blob_xoffset;
        uint64_t r_len = reg.length;
-       unsigned front = r_off % chunk_size;
-       if (front) {
-         r_off -= front;
-         r_len += front;
+       reg.front = reg.r_off % chunk_size;
+       if (reg.front) {
+         reg.r_off -= reg.front;
+         r_len += reg.front;
        }
        unsigned tail = r_len % chunk_size;
        if (tail) {
@@ -5346,34 +5340,67 @@ int BlueStore::_do_read(
        dout(20) << __func__ << "    region 0x" << std::hex
                 << reg.logical_offset
                 << ": 0x" << reg.blob_xoffset << "~" << reg.length
-                << " reading 0x" << r_off << "~" << r_len << std::dec
+                << " reading 0x" << reg.r_off << "~" << r_len << std::dec
                 << dendl;
 
        // read it
-       IOContext ioc(cct, NULL);  // FIXME?
-       bufferlist bl;
-       r = bptr->get_blob().map(r_off, r_len,
-                            [&](uint64_t offset, uint64_t length) {
-           bufferlist t;
-           int r = bdev->read(offset, length, &t, &ioc, false);
+       r = bptr->get_blob().map(
+         reg.r_off, r_len,
+         [&](uint64_t offset, uint64_t length) {
+           int r = bdev->aio_read(offset, length, &reg.bl, &ioc);
            if (r < 0)
               return r;
-           bl.claim_append(t);
             return 0;
          });
         if (r < 0)
           return r;
+      }
+    }
+  }
+  bdev->aio_submit(&ioc);
+  ioc.aio_wait();
 
-       r = _verify_csum(o, &bptr->get_blob(), r_off, bl, reg.logical_offset);
+  // enumerate and decompress desired blobs
+  auto p = compressed_blob_bls.begin();
+  blobs2read_t::iterator b2r_it = blobs2read.begin();
+  while (b2r_it != blobs2read.end()) {
+    BlobRef bptr = b2r_it->first;
+    dout(20) << __func__ << "  blob " << *bptr << std::hex
+            << " need 0x" << b2r_it->second << std::dec << dendl;
+    if (bptr->get_blob().is_compressed()) {
+      assert(p != compressed_blob_bls.end());
+      bufferlist& compressed_bl = *p++;
+      if (_verify_csum(o, &bptr->get_blob(), 0, compressed_bl,
+                      b2r_it->second.front().logical_offset) < 0) {
+       return -EIO;
+      }
+      bufferlist raw_bl;
+      r = _decompress(compressed_bl, &raw_bl);
+      if (r < 0)
+       return r;
+      if (buffered) {
+       bptr->shared_blob->bc.did_read(bptr->shared_blob->get_cache(), 0,
+                                      raw_bl);
+      }
+      for (auto& i : b2r_it->second) {
+       ready_regions[i.logical_offset].substr_of(
+         raw_bl, i.blob_xoffset, i.length);
+      }
+    } else {
+      for (auto& reg : b2r_it->second) {
+       r = _verify_csum(o, &bptr->get_blob(), reg.r_off, reg.bl,
+                        reg.logical_offset);
        if (r < 0) {
          return -EIO;
        }
        if (buffered) {
-         bptr->shared_blob->bc.did_read(bptr->shared_blob->get_cache(), r_off, bl);
+         bptr->shared_blob->bc.did_read(bptr->shared_blob->get_cache(),
+                                        reg.r_off, reg.bl);
        }
 
        // prune and keep result
-       ready_regions[reg.logical_offset].substr_of(bl, front, reg.length);
+       ready_regions[reg.logical_offset].substr_of(
+         reg.bl, reg.front, reg.length);
       }
     }
     ++b2r_it;