]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson: Add support for bench osd command 57952/head
authorNitzan Mordechai <nmordech@redhat.com>
Mon, 10 Jun 2024 10:51:03 +0000 (10:51 +0000)
committerNitzan Mordechai <nmordech@redhat.com>
Mon, 26 Aug 2024 05:43:17 +0000 (05:43 +0000)
this commit adds support for the 'bench' admin command in the OSD,
allowing administrators to perform benchmark tests on the OSD. The
'bench' command accepts 4 optional parameters with the following
default values:

1. count - Total number of bytes to write (default: 1GB).
2. size - Block size for each write operation (default: 4MB).
3. object_size - Size of each object to write (default: 0).
4. object_num - Number of objects to write (default: 0).

The results of the benchmark are returned in a JSON formatted output,
which includes the following fields:

1. bytes_written - Total number of bytes written during the benchmark.
2. blocksize - Block size used for each write operation.
3. elapsed_sec - Total time taken to complete the benchmark in seconds.
4. bytes_per_sec - Write throughput in bytes per second.
5. iops - Number of input/output operations per second.

Example JSON output:

```json
{
  "osd_bench_results": {
    "bytes_written": 1073741824,
    "blocksize": 4194304,
    "elapsed_sec": 0.5,
    "bytes_per_sec": 2147483648,
    "iops": 512
  }
}

Fixes: https://tracker.ceph.com/issues/66380
Signed-off-by: Nitzan Mordechai <nmordech@redhat.com>
qa/suites/crimson-rados/basic/tasks/rados_python.yaml
src/crimson/admin/osd_admin.cc
src/crimson/admin/osd_admin.h
src/crimson/osd/osd.cc
src/crimson/osd/osd.h
src/include/types.h

index 06d475e2165ed218ed378b39514b6b481d31ca47..a6af29571194649b02d58cca7775d720b69ab340 100644 (file)
@@ -17,4 +17,4 @@ tasks:
     timeout: 1h
     clients:
       client.0:
-        - rados/test_python.sh -m 'not (tier or ec or bench)'
+        - rados/test_python.sh -m 'not (tier or ec)'
index 0436e5184df8c20624949c10898e9e49aca4bb25..de9626a2f2d4555f8c25cd191b02582e20310a63 100644 (file)
@@ -19,6 +19,7 @@
 #include "crimson/osd/pg.h"
 #include "crimson/osd/shard_services.h"
 
+SET_SUBSYS(osd);
 namespace {
 seastar::logger& logger()
 {
@@ -93,6 +94,105 @@ private:
 template std::unique_ptr<AdminSocketHook>
 make_asok_hook<SendBeaconHook>(crimson::osd::OSD& osd);
 
+/**
+ * An OSD admin hook: run bench
+ * Usage parameters:
+ *   count=Count of bytes to write
+ *   bsize=block size
+ *   osize=Object size
+ *   onum=Number of objects
+ */
+class RunOSDBenchHook : public AdminSocketHook {
+public:
+  explicit RunOSDBenchHook(crimson::osd::OSD& osd) :
+    AdminSocketHook{"bench",
+      "name=count,type=CephInt,req=false "
+      "name=size,type=CephInt,req=false "
+      "name=object_size,type=CephInt,req=false "
+      "name=object_num,type=CephInt,req=false",
+      "run OSD bench"},
+    osd(osd)
+  {}
+  seastar::future<tell_result_t> call(const cmdmap_t& cmdmap,
+              std::string_view format,
+              ceph::bufferlist&& input) const final
+  {
+    LOG_PREFIX(RunOSDBenchHook::call);
+    int64_t count = cmd_getval_or<int64_t>(cmdmap, "count", 1LL << 30);
+    int64_t bsize = cmd_getval_or<int64_t>(cmdmap, "size", 4LL << 20);
+    int64_t osize = cmd_getval_or<int64_t>(cmdmap, "object_size", 0);
+    int64_t onum = cmd_getval_or<int64_t>(cmdmap, "object_num", 0);
+    auto duration = local_conf()->osd_bench_duration;
+    auto max_block_size = local_conf()->osd_bench_max_block_size;
+    if (bsize > static_cast<int64_t>(max_block_size)) {
+      // let us limit the block size because the next checks rely on it
+      // having a sane value.  If we allow any block size to be set things
+      // can still go sideways.
+      INFO("block 'size' values are capped at {}. If you wish to use"
+        " a higher value, please adjust 'osd_bench_max_block_size'",
+        byte_u_t(max_block_size));
+      return seastar::make_ready_future<tell_result_t>(-EINVAL, "block size too large");
+    } else if (bsize < (1LL << 20)) {
+      // entering the realm of small block sizes.
+      // limit the count to a sane value, assuming a configurable amount of
+      // IOPS and duration, so that the OSD doesn't get hung up on this,
+      // preventing timeouts from going off
+      int64_t max_count = bsize * duration * local_conf()->osd_bench_small_size_max_iops;
+      if (count > max_count) {
+        INFO("bench count {} > osd_bench_small_size_max_iops {}",
+          count, max_count);
+        return seastar::make_ready_future<tell_result_t>(-EINVAL, "count too large");
+      }
+    } else {
+      // 1MB block sizes are big enough so that we get more stuff done.
+      // However, to avoid the osd from getting hung on this and having
+      // timers being triggered, we are going to limit the count assuming
+      // a configurable throughput and duration.
+      // NOTE: max_count is the total amount of bytes that we believe we
+      //       will be able to write during 'duration' for the given
+      //       throughput.  The block size hardly impacts this unless it's
+      //       way too big.  Given we already check how big the block size
+      //       is, it's safe to assume everything will check out.
+      int64_t max_count = local_conf()->osd_bench_large_size_max_throughput * duration;
+      if (count > max_count) {
+        INFO("'count' values greater than {} for a block size of {},"
+          " assuming {} IOPS, for {} seconds, can cause ill effects"
+          " on osd. Please adjust 'osd_bench_small_size_max_iops'"
+          " with a higher value if you wish to use a higher 'count'.",
+          max_count, byte_u_t(bsize), local_conf()->osd_bench_small_size_max_iops,
+          duration);
+        return seastar::make_ready_future<tell_result_t>(-EINVAL, "count too large");
+      }
+    }
+    if (osize && bsize > osize) {
+      bsize = osize;
+    }
+
+    return osd.run_bench(count, bsize, osize, onum).then(
+      [format, bsize, count](double elapsed) {
+      if (elapsed < 0) {
+        return seastar::make_ready_future<tell_result_t>
+          (elapsed, "bench failed with error");
+      }
+
+      unique_ptr<Formatter> f{Formatter::create(format, "json-pretty", "json-pretty")};
+      f->open_object_section("osd_bench_results");
+      f->dump_int("bytes_written", count);
+      f->dump_int("blocksize", bsize);
+      f->dump_float("elapsed_sec", elapsed);
+      f->dump_float("bytes_per_sec", (elapsed > 0) ? count / elapsed : 0);
+      f->dump_float("iops", (elapsed > 0) ? (count / elapsed) / bsize : 0);
+      f->close_section();
+      
+      return seastar::make_ready_future<tell_result_t>(std::move(f));
+    });
+  }
+private:
+  crimson::osd::OSD& osd;
+};
+template std::unique_ptr<AdminSocketHook>
+make_asok_hook<RunOSDBenchHook>(crimson::osd::OSD& osd);
+
 /**
  * send the latest pg stats to mgr
  */
index a3ddd66b9a6aefd6a652703116a9578db2b1e316..1aafc5bee20adcd004a15e2d5202dd4d890115bf 100644 (file)
@@ -17,6 +17,7 @@ class InjectDataErrorHook;
 class InjectMDataErrorHook;
 class OsdStatusHook;
 class SendBeaconHook;
+class RunOSDBenchHook;
 class DumpInFlightOpsHook;
 class DumpHistoricOpsHook;
 class DumpSlowestHistoricOpsHook;
index 49291204d21bb37800832fd8a990196d7505b89c..321fefe45234c15ad97a6c149ee84851b4eb682c 100644 (file)
@@ -677,6 +677,7 @@ seastar::future<> OSD::start_asok_admin()
     asok->register_admin_commands();
     asok->register_command(make_asok_hook<OsdStatusHook>(std::as_const(*this)));
     asok->register_command(make_asok_hook<SendBeaconHook>(*this));
+    asok->register_command(make_asok_hook<RunOSDBenchHook>(*this));
     asok->register_command(make_asok_hook<FlushPgStatsHook>(*this));
     asok->register_command(
       make_asok_hook<DumpPGStateHistory>(std::as_const(pg_shard_manager)));
@@ -1418,6 +1419,82 @@ seastar::future<> OSD::send_beacon()
   return monc->send_message(std::move(beacon));
 }
 
+seastar::future<double> OSD::run_bench(int64_t count, int64_t bsize, int64_t osize, int64_t onum) {
+    LOG_PREFIX(OSD::run_bench);
+    DEBUG();
+    std::vector<seastar::future<>> futures;
+    std::vector<seastar::future<>> cleanup_futures;
+    
+    auto collection_future = store.get_sharded_store().open_collection(
+      coll_t::meta());
+    auto collection_ref = co_await std::move(collection_future);
+    ceph::os::Transaction cleanup_t;
+
+    if (osize && onum) {
+      std::string data(osize, 'a');
+      ceph::buffer::list bl;
+      bl.append(data);
+
+      for (int i = 0; i < onum; ++i) {
+        ceph::os::Transaction t;
+        std::string oid_str = fmt::format("disk_bw_test_{}", i);
+        ghobject_t oid(hobject_t(sobject_t(object_t(oid_str), 0)),
+                        ghobject_t::NO_GEN,
+                        shard_id_t::NO_SHARD);
+        t.write(coll_t::meta(), oid, 0, data.size(), bl);
+        futures.push_back(store.get_sharded_store().do_transaction(
+          collection_ref, std::move(t)));
+        cleanup_t.remove(coll_t::meta(), oid);
+        cleanup_futures.push_back(store.get_sharded_store().do_transaction(
+          collection_ref, std::move(cleanup_t)));
+      }
+    }
+
+    co_await seastar::when_all_succeed(futures.begin(), futures.end());
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::uniform_int_distribution<> dis(0, 255);
+    std::vector<seastar::future<>> futures_bench;
+    auto start = std::chrono::steady_clock::now();
+
+    for (int i = 0; i < count / bsize; ++i) {
+      ceph::os::Transaction t;
+      ceph::buffer::ptr bp(bsize);
+      std::generate_n(bp.c_str(), bp.length(), [&dis, &gen]() {
+          return static_cast<char>(dis(gen));
+      });
+      ceph::buffer::list bl(bsize);
+      bl.push_back(std::move(bp));
+      bl.rebuild_page_aligned();
+
+      std::string oid_str;
+      uint64_t offset = 0;
+      if (onum && osize) {
+        oid_str = fmt::format("disk_bw_test_{}", dis(gen) % onum);
+        offset = (dis(gen) % (osize / bsize)) * bsize;
+      } else {
+        oid_str = fmt::format("disk_bw_test_{}", i * bsize);
+      }
+      ghobject_t oid(hobject_t(sobject_t(object_t(oid_str), 0)));
+
+      t.write(coll_t::meta(), oid, offset, bsize, bl);
+
+      futures_bench.push_back(store.get_sharded_store().do_transaction(
+        collection_ref, std::move(t)));
+
+      if (!onum || !osize) {
+        cleanup_t.remove(coll_t::meta(), oid);
+        cleanup_futures.push_back(store.get_sharded_store().do_transaction(
+          collection_ref, std::move(cleanup_t)));
+      }
+    }
+    co_await seastar::when_all_succeed(futures_bench.begin(), futures_bench.end());
+    auto end = std::chrono::steady_clock::now();
+    double elapsed = std::chrono::duration<double>(end - start).count();
+    co_await seastar::when_all_succeed(cleanup_futures.begin(), cleanup_futures.end());
+    co_return co_await seastar::make_ready_future<double>(elapsed);
+}
+
 seastar::future<> OSD::update_heartbeat_peers()
 {
   if (!pg_shard_manager.is_active()) {
index 7b0a08fc3b9a22a122e50f225cf0e7e59db2e3fc..8df23c53f7a106c703baeb82cf387a1a1d9b1b6c 100644 (file)
@@ -247,6 +247,10 @@ private:
 
 public:
   seastar::future<> send_beacon();
+  seastar::future<double> run_bench(int64_t count,
+    int64_t bsize,
+    int64_t osize,
+    int64_t onum);
 
 private:
   LogClient log_client;
index c3969f88a34c3a3a6fbfde82777f00b6e5f73fd4..bc2cdb52e717f8a2c139452a98fd9d6586064a94 100644 (file)
@@ -482,6 +482,10 @@ struct byte_u_t {
   explicit byte_u_t(uint64_t _v) : v(_v) {};
 };
 
+#if FMT_VERSION >= 90000
+template <> struct fmt::formatter<byte_u_t> : fmt::ostream_formatter {};
+#endif
+
 inline std::ostream& operator<<(std::ostream& out, const byte_u_t& b)
 {
   uint64_t n = b.v;