]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
os/bluestore: support RocksDB prefetching at BlueFS
authorIgor Fedotov <ifedotov@suse.com>
Thu, 25 Apr 2019 11:39:14 +0000 (14:39 +0300)
committerIgor Fedotov <ifedotov@suse.com>
Thu, 9 May 2019 11:13:42 +0000 (14:13 +0300)
Signed-off-by: Igor Fedotov <ifedotov@suse.com>
src/os/bluestore/BlueFS.cc
src/os/bluestore/BlueFS.h
src/os/bluestore/BlueRocksEnv.cc

index de0cc1c9ed747645cca42b00e1ddf8c9c9b88880..35d6c3bafca00089c9d95d2148d534741d0af78f 100644 (file)
@@ -124,6 +124,35 @@ void BlueFS::_init_logger()
                    "Maximum bytes allocated from DB");
   b.add_u64_counter(l_bluefs_max_bytes_slow, "max_bytes_slow",
                    "Maximum bytes allocated from SLOW");
+
+  b.add_u64_counter(l_bluefs_read_random_count, "read_random_count",
+                   "random read requests processed");
+  b.add_u64_counter(l_bluefs_read_random_bytes, "read_random_bytes",
+                   "Bytes requested in random read mode", NULL,
+                   PerfCountersBuilder::PRIO_USEFUL, unit_t(UNIT_BYTES));
+  b.add_u64_counter(l_bluefs_read_random_disk_count, "read_random_disk_count",
+                   "random reads requests going to disk");
+  b.add_u64_counter(l_bluefs_read_random_disk_bytes, "read_random_disk_bytes",
+                   "Bytes read from disk in random read mode", NULL,
+                   PerfCountersBuilder::PRIO_USEFUL, unit_t(UNIT_BYTES));
+  b.add_u64_counter(l_bluefs_read_random_buffer_count, "read_random_buffer_count",
+                   "random read requests processed using prefetch buffer");
+  b.add_u64_counter(l_bluefs_read_random_buffer_bytes, "read_random_buffer_bytes",
+                   "Bytes read from prefetch buffer in random read mode", NULL,
+                   PerfCountersBuilder::PRIO_USEFUL, unit_t(UNIT_BYTES));
+
+  b.add_u64_counter(l_bluefs_read_count, "read_count",
+                   "buffered read requests processed");
+  b.add_u64_counter(l_bluefs_read_bytes, "read_bytes",
+                   "Bytes requested in buffered read mode", NULL,
+                   PerfCountersBuilder::PRIO_USEFUL, unit_t(UNIT_BYTES));
+
+  b.add_u64_counter(l_bluefs_read_prefetch_count, "read_prefetch_count",
+                   "prefetch read requests processed");
+  b.add_u64_counter(l_bluefs_read_prefetch_bytes, "read_prefetch_bytes",
+                   "Bytes requested in prefetch read mode", NULL,
+                   PerfCountersBuilder::PRIO_USEFUL, unit_t(UNIT_BYTES));
+
   logger = b.create_perf_counters();
   cct->get_perfcounters_collection()->add(logger);
 }
@@ -1328,6 +1357,9 @@ int BlueFS::_read_random(
   size_t len,            ///< [in] this many bytes
   char *out)             ///< [out] optional: or copy it here
 {
+  auto* buf = &h->buf;
+
+  int ret = 0;
   dout(10) << __func__ << " h " << h
            << " 0x" << std::hex << off << "~" << len << std::dec
           << " from " << h->file->fnode << dendl;
@@ -1343,24 +1375,57 @@ int BlueFS::_read_random(
     dout(20) << __func__ << " reaching (or past) eof, len clipped to 0x"
             << std::hex << len << std::dec << dendl;
   }
+  logger->inc(l_bluefs_read_random_count, 1);
+  logger->inc(l_bluefs_read_random_bytes, len);
+
 
-  int ret = 0;
   while (len > 0) {
-    uint64_t x_off = 0;
-    auto p = h->file->fnode.seek(off, &x_off);
-    uint64_t l = std::min(p->length - x_off, static_cast<uint64_t>(len));
-    dout(20) << __func__ << " read buffered 0x"
-             << std::hex << x_off << "~" << l << std::dec
-             << " of " << *p << dendl;
-    int r = bdev[p->bdev]->read_random(p->offset + x_off, l, out,
-                                      cct->_conf->bluefs_buffered_io);
-    ceph_assert(r == 0);
-    off += l;
-    len -= l;
-    ret += l;
-    out += l;
-  }
+    if (off < buf->bl_off || off >= buf->get_buf_end()) {
 
+      uint64_t x_off = 0;
+      auto p = h->file->fnode.seek(off, &x_off);
+      uint64_t l = std::min(p->length - x_off, static_cast<uint64_t>(len));
+      dout(20) << __func__ << " read random 0x"
+              << std::hex << x_off << "~" << l << std::dec
+              << " of " << *p << dendl;
+      int r = bdev[p->bdev]->read_random(p->offset + x_off, l, out,
+                                        cct->_conf->bluefs_buffered_io);
+      ceph_assert(r == 0);
+      off += l;
+      len -= l;
+      ret += l;
+      out += l;
+
+      logger->inc(l_bluefs_read_random_disk_count, 1);
+      logger->inc(l_bluefs_read_random_disk_bytes, l);
+    } else {
+      auto left = buf->get_buf_remaining(off);
+      int r = std::min(len, left);
+      logger->inc(l_bluefs_read_random_buffer_count, 1);
+      logger->inc(l_bluefs_read_random_buffer_bytes, r);
+      dout(20) << __func__ << " left 0x" << std::hex << left
+             << " 0x" << off << "~" << len << std::dec
+             << dendl;
+
+      if (out) {
+       // NOTE: h->bl is normally a contiguous buffer so c_str() is free.
+       memcpy(out, buf->bl.c_str() + off - buf->bl_off, r);
+       out += r;
+      }
+
+      dout(30) << __func__ << " result chunk (0x"
+              << std::hex << r << std::dec << " bytes):\n";
+      bufferlist t;
+      t.substr_of(buf->bl, off - buf->bl_off, r);
+      t.hexdump(*_dout);
+      *_dout << dendl;
+
+      off += r;
+      len -= r;
+      ret += r;
+      buf->pos += r;
+    }
+  }
   dout(20) << __func__ << " got " << ret << dendl;
   --h->file->num_reading;
   return ret;
@@ -1374,9 +1439,12 @@ int BlueFS::_read(
   bufferlist *outbl,     ///< [out] optional: reference the result here
   char *out)             ///< [out] optional: or copy it here
 {
+  bool prefetch = !outbl && !out;
   dout(10) << __func__ << " h " << h
            << " 0x" << std::hex << off << "~" << len << std::dec
-          << " from " << h->file->fnode << dendl;
+          << " from " << h->file->fnode
+          << (prefetch ? " prefetch" : "")
+          << dendl;
 
   ++h->file->num_reading;
 
@@ -1389,6 +1457,13 @@ int BlueFS::_read(
     dout(20) << __func__ << " reaching (or past) eof, len clipped to 0x"
             << std::hex << len << std::dec << dendl;
   }
+  logger->inc(l_bluefs_read_count, 1);
+  logger->inc(l_bluefs_read_bytes, len);
+  if (prefetch) {
+    logger->inc(l_bluefs_read_prefetch_count, 1);
+    logger->inc(l_bluefs_read_prefetch_bytes, len);
+  }
+
   if (outbl)
     outbl->clear();
 
index 2eae38fc15fe563d5319f6894ce827b5b8a4ac08..12c25ca238737481176f562f8f25386193d41283 100644 (file)
@@ -39,6 +39,17 @@ enum {
   l_bluefs_max_bytes_wal,
   l_bluefs_max_bytes_db,
   l_bluefs_max_bytes_slow,
+  l_bluefs_read_random_count,
+  l_bluefs_read_random_bytes,
+  l_bluefs_read_random_disk_count,
+  l_bluefs_read_random_disk_bytes,
+  l_bluefs_read_random_buffer_count,
+  l_bluefs_read_random_buffer_bytes,
+  l_bluefs_read_count,
+  l_bluefs_read_bytes,
+  l_bluefs_read_prefetch_count,
+  l_bluefs_read_prefetch_bytes,
+
   l_bluefs_last,
 };
 
index 6536f6ea38d29b6ea99c26b17798009105757318..7f2f8991b9e12e141c5c4d5c62f6f66aae99ec56 100644 (file)
@@ -121,6 +121,12 @@ class BlueRocksRandomAccessFile : public rocksdb::RandomAccessFile {
                    (unsigned long long)h->file->fnode.ino);
   };
 
+  // Readahead the file starting from offset by n bytes for caching.
+  rocksdb::Status Prefetch(uint64_t offset, size_t n) override {
+    fs->read(h, &h->buf, offset, n, nullptr, nullptr);
+    return rocksdb::Status::OK();
+  }
+
   //enum AccessPattern { NORMAL, RANDOM, SEQUENTIAL, WILLNEED, DONTNEED };
 
   void Hint(AccessPattern pattern) override {