From: Nitzan Mordechai Date: Mon, 10 Jun 2024 10:51:03 +0000 (+0000) Subject: crimson: Add support for bench osd command X-Git-Tag: v20.0.0~1199^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F57952%2Fhead;p=ceph.git crimson: Add support for bench osd command this commit adds support for the 'bench' admin command in the OSD, allowing administrators to perform benchmark tests on the OSD. The 'bench' command accepts 4 optional parameters with the following default values: 1. count - Total number of bytes to write (default: 1GB). 2. size - Block size for each write operation (default: 4MB). 3. object_size - Size of each object to write (default: 0). 4. object_num - Number of objects to write (default: 0). The results of the benchmark are returned in a JSON formatted output, which includes the following fields: 1. bytes_written - Total number of bytes written during the benchmark. 2. blocksize - Block size used for each write operation. 3. elapsed_sec - Total time taken to complete the benchmark in seconds. 4. bytes_per_sec - Write throughput in bytes per second. 5. iops - Number of input/output operations per second. Example JSON output: ```json { "osd_bench_results": { "bytes_written": 1073741824, "blocksize": 4194304, "elapsed_sec": 0.5, "bytes_per_sec": 2147483648, "iops": 512 } } Fixes: https://tracker.ceph.com/issues/66380 Signed-off-by: Nitzan Mordechai --- diff --git a/qa/suites/crimson-rados/basic/tasks/rados_python.yaml b/qa/suites/crimson-rados/basic/tasks/rados_python.yaml index 06d475e2165e..a6af29571194 100644 --- a/qa/suites/crimson-rados/basic/tasks/rados_python.yaml +++ b/qa/suites/crimson-rados/basic/tasks/rados_python.yaml @@ -17,4 +17,4 @@ tasks: timeout: 1h clients: client.0: - - rados/test_python.sh -m 'not (tier or ec or bench)' + - rados/test_python.sh -m 'not (tier or ec)' diff --git a/src/crimson/admin/osd_admin.cc b/src/crimson/admin/osd_admin.cc index 0436e5184df8..de9626a2f2d4 100644 --- a/src/crimson/admin/osd_admin.cc +++ b/src/crimson/admin/osd_admin.cc @@ -19,6 +19,7 @@ #include "crimson/osd/pg.h" #include "crimson/osd/shard_services.h" +SET_SUBSYS(osd); namespace { seastar::logger& logger() { @@ -93,6 +94,105 @@ private: template std::unique_ptr make_asok_hook(crimson::osd::OSD& osd); +/** + * An OSD admin hook: run bench + * Usage parameters: + * count=Count of bytes to write + * bsize=block size + * osize=Object size + * onum=Number of objects + */ +class RunOSDBenchHook : public AdminSocketHook { +public: + explicit RunOSDBenchHook(crimson::osd::OSD& osd) : + AdminSocketHook{"bench", + "name=count,type=CephInt,req=false " + "name=size,type=CephInt,req=false " + "name=object_size,type=CephInt,req=false " + "name=object_num,type=CephInt,req=false", + "run OSD bench"}, + osd(osd) + {} + seastar::future call(const cmdmap_t& cmdmap, + std::string_view format, + ceph::bufferlist&& input) const final + { + LOG_PREFIX(RunOSDBenchHook::call); + int64_t count = cmd_getval_or(cmdmap, "count", 1LL << 30); + int64_t bsize = cmd_getval_or(cmdmap, "size", 4LL << 20); + int64_t osize = cmd_getval_or(cmdmap, "object_size", 0); + int64_t onum = cmd_getval_or(cmdmap, "object_num", 0); + auto duration = local_conf()->osd_bench_duration; + auto max_block_size = local_conf()->osd_bench_max_block_size; + if (bsize > static_cast(max_block_size)) { + // let us limit the block size because the next checks rely on it + // having a sane value. If we allow any block size to be set things + // can still go sideways. + INFO("block 'size' values are capped at {}. If you wish to use" + " a higher value, please adjust 'osd_bench_max_block_size'", + byte_u_t(max_block_size)); + return seastar::make_ready_future(-EINVAL, "block size too large"); + } else if (bsize < (1LL << 20)) { + // entering the realm of small block sizes. + // limit the count to a sane value, assuming a configurable amount of + // IOPS and duration, so that the OSD doesn't get hung up on this, + // preventing timeouts from going off + int64_t max_count = bsize * duration * local_conf()->osd_bench_small_size_max_iops; + if (count > max_count) { + INFO("bench count {} > osd_bench_small_size_max_iops {}", + count, max_count); + return seastar::make_ready_future(-EINVAL, "count too large"); + } + } else { + // 1MB block sizes are big enough so that we get more stuff done. + // However, to avoid the osd from getting hung on this and having + // timers being triggered, we are going to limit the count assuming + // a configurable throughput and duration. + // NOTE: max_count is the total amount of bytes that we believe we + // will be able to write during 'duration' for the given + // throughput. The block size hardly impacts this unless it's + // way too big. Given we already check how big the block size + // is, it's safe to assume everything will check out. + int64_t max_count = local_conf()->osd_bench_large_size_max_throughput * duration; + if (count > max_count) { + INFO("'count' values greater than {} for a block size of {}," + " assuming {} IOPS, for {} seconds, can cause ill effects" + " on osd. Please adjust 'osd_bench_small_size_max_iops'" + " with a higher value if you wish to use a higher 'count'.", + max_count, byte_u_t(bsize), local_conf()->osd_bench_small_size_max_iops, + duration); + return seastar::make_ready_future(-EINVAL, "count too large"); + } + } + if (osize && bsize > osize) { + bsize = osize; + } + + return osd.run_bench(count, bsize, osize, onum).then( + [format, bsize, count](double elapsed) { + if (elapsed < 0) { + return seastar::make_ready_future + (elapsed, "bench failed with error"); + } + + unique_ptr f{Formatter::create(format, "json-pretty", "json-pretty")}; + f->open_object_section("osd_bench_results"); + f->dump_int("bytes_written", count); + f->dump_int("blocksize", bsize); + f->dump_float("elapsed_sec", elapsed); + f->dump_float("bytes_per_sec", (elapsed > 0) ? count / elapsed : 0); + f->dump_float("iops", (elapsed > 0) ? (count / elapsed) / bsize : 0); + f->close_section(); + + return seastar::make_ready_future(std::move(f)); + }); + } +private: + crimson::osd::OSD& osd; +}; +template std::unique_ptr +make_asok_hook(crimson::osd::OSD& osd); + /** * send the latest pg stats to mgr */ diff --git a/src/crimson/admin/osd_admin.h b/src/crimson/admin/osd_admin.h index a3ddd66b9a6a..1aafc5bee20a 100644 --- a/src/crimson/admin/osd_admin.h +++ b/src/crimson/admin/osd_admin.h @@ -17,6 +17,7 @@ class InjectDataErrorHook; class InjectMDataErrorHook; class OsdStatusHook; class SendBeaconHook; +class RunOSDBenchHook; class DumpInFlightOpsHook; class DumpHistoricOpsHook; class DumpSlowestHistoricOpsHook; diff --git a/src/crimson/osd/osd.cc b/src/crimson/osd/osd.cc index 49291204d21b..321fefe45234 100644 --- a/src/crimson/osd/osd.cc +++ b/src/crimson/osd/osd.cc @@ -677,6 +677,7 @@ seastar::future<> OSD::start_asok_admin() asok->register_admin_commands(); asok->register_command(make_asok_hook(std::as_const(*this))); asok->register_command(make_asok_hook(*this)); + asok->register_command(make_asok_hook(*this)); asok->register_command(make_asok_hook(*this)); asok->register_command( make_asok_hook(std::as_const(pg_shard_manager))); @@ -1418,6 +1419,82 @@ seastar::future<> OSD::send_beacon() return monc->send_message(std::move(beacon)); } +seastar::future OSD::run_bench(int64_t count, int64_t bsize, int64_t osize, int64_t onum) { + LOG_PREFIX(OSD::run_bench); + DEBUG(); + std::vector> futures; + std::vector> cleanup_futures; + + auto collection_future = store.get_sharded_store().open_collection( + coll_t::meta()); + auto collection_ref = co_await std::move(collection_future); + ceph::os::Transaction cleanup_t; + + if (osize && onum) { + std::string data(osize, 'a'); + ceph::buffer::list bl; + bl.append(data); + + for (int i = 0; i < onum; ++i) { + ceph::os::Transaction t; + std::string oid_str = fmt::format("disk_bw_test_{}", i); + ghobject_t oid(hobject_t(sobject_t(object_t(oid_str), 0)), + ghobject_t::NO_GEN, + shard_id_t::NO_SHARD); + t.write(coll_t::meta(), oid, 0, data.size(), bl); + futures.push_back(store.get_sharded_store().do_transaction( + collection_ref, std::move(t))); + cleanup_t.remove(coll_t::meta(), oid); + cleanup_futures.push_back(store.get_sharded_store().do_transaction( + collection_ref, std::move(cleanup_t))); + } + } + + co_await seastar::when_all_succeed(futures.begin(), futures.end()); + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<> dis(0, 255); + std::vector> futures_bench; + auto start = std::chrono::steady_clock::now(); + + for (int i = 0; i < count / bsize; ++i) { + ceph::os::Transaction t; + ceph::buffer::ptr bp(bsize); + std::generate_n(bp.c_str(), bp.length(), [&dis, &gen]() { + return static_cast(dis(gen)); + }); + ceph::buffer::list bl(bsize); + bl.push_back(std::move(bp)); + bl.rebuild_page_aligned(); + + std::string oid_str; + uint64_t offset = 0; + if (onum && osize) { + oid_str = fmt::format("disk_bw_test_{}", dis(gen) % onum); + offset = (dis(gen) % (osize / bsize)) * bsize; + } else { + oid_str = fmt::format("disk_bw_test_{}", i * bsize); + } + ghobject_t oid(hobject_t(sobject_t(object_t(oid_str), 0))); + + t.write(coll_t::meta(), oid, offset, bsize, bl); + + futures_bench.push_back(store.get_sharded_store().do_transaction( + collection_ref, std::move(t))); + + if (!onum || !osize) { + cleanup_t.remove(coll_t::meta(), oid); + cleanup_futures.push_back(store.get_sharded_store().do_transaction( + collection_ref, std::move(cleanup_t))); + } + } + co_await seastar::when_all_succeed(futures_bench.begin(), futures_bench.end()); + auto end = std::chrono::steady_clock::now(); + double elapsed = std::chrono::duration(end - start).count(); + co_await seastar::when_all_succeed(cleanup_futures.begin(), cleanup_futures.end()); + co_return co_await seastar::make_ready_future(elapsed); +} + seastar::future<> OSD::update_heartbeat_peers() { if (!pg_shard_manager.is_active()) { diff --git a/src/crimson/osd/osd.h b/src/crimson/osd/osd.h index 7b0a08fc3b9a..8df23c53f7a1 100644 --- a/src/crimson/osd/osd.h +++ b/src/crimson/osd/osd.h @@ -247,6 +247,10 @@ private: public: seastar::future<> send_beacon(); + seastar::future run_bench(int64_t count, + int64_t bsize, + int64_t osize, + int64_t onum); private: LogClient log_client; diff --git a/src/include/types.h b/src/include/types.h index c3969f88a34c..bc2cdb52e717 100644 --- a/src/include/types.h +++ b/src/include/types.h @@ -482,6 +482,10 @@ struct byte_u_t { explicit byte_u_t(uint64_t _v) : v(_v) {}; }; +#if FMT_VERSION >= 90000 +template <> struct fmt::formatter : fmt::ostream_formatter {}; +#endif + inline std::ostream& operator<<(std::ostream& out, const byte_u_t& b) { uint64_t n = b.v;