From: Adam Kupczyk Date: Mon, 18 May 2026 16:33:45 +0000 (+0000) Subject: objectstore/test_kv: Unittest for util_divide_key_range X-Git-Tag: v21.0.1~9^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=b099187a6b802e043916fbf5f7a579c9eac2ef4d;p=ceph.git objectstore/test_kv: Unittest for util_divide_key_range Extensive tests for quality of KeyValueDB::util_divide_key_range. Tests speed and correctness of split. Has 2 control modes: 1) on Jenkins (detected by JENKINS_HOME) run with reduced scope 2) passing env VERBOSE=1 gives more details Signed-off-by: Adam Kupczyk --- diff --git a/src/test/objectstore/test_kv.cc b/src/test/objectstore/test_kv.cc index 92114399ee4..f5336645594 100644 --- a/src/test/objectstore/test_kv.cc +++ b/src/test/objectstore/test_kv.cc @@ -13,9 +13,12 @@ * */ +#include #include #include #include +#include +#include #include #include #include "kv/KeyValueDB.h" @@ -28,6 +31,7 @@ #include "common/errno.h" #include "include/stringify.h" #include +#include using namespace std; @@ -1286,6 +1290,425 @@ TEST_F(RocksDBResharding, change_reshard) { } } +typedef std::mt19937 gen_type; + +class RocksDBSplitRange : public ::testing::Test { +public: + boost::scoped_ptr db; + gen_type rng = gen_type(0); + RocksDBSplitRange() : db(0) {} + + string _bl_to_str(bufferlist val) { + string str(val.c_str(), val.length()); + return str; + } + + void rm_r(string path) { + string cmd = string("rm -r ") + path; + if (verbose) + cout << "==> " << cmd << std::endl; + int r = ::system(cmd.c_str()); + if (r) { + cerr << "failed with exit code " << r + << ", continuing anyway" << std::endl; + } + } + + void SetUp() override { + verbose = getenv("VERBOSE") && strcmp(getenv("VERBOSE"), "1") == 0; + + int r = ::mkdir("kv_test_temp_dir", 0777); + if (r < 0 && errno != EEXIST) { + r = -errno; + cerr << __func__ << ": unable to create kv_test_temp_dir: " + << cpp_strerror(r) << std::endl; + return; + } + + KeyValueDB* db_kv = KeyValueDB::create(g_ceph_context, "rocksdb", "kv_test_temp_dir"); + RocksDBStore* db_rocks = dynamic_cast(db_kv); + ceph_assert(db_rocks); + db.reset(db_rocks); + ASSERT_EQ(0, db->init(g_conf()->bluestore_rocksdb_options)); + } + + void TearDown() override { + db.reset(nullptr); + rm_r("kv_test_temp_dir"); + } + + bool is_jenkins() { + return (getenv("JENKINS_HOME") != nullptr); + } + + bool verbose; + std::vector random_words = { + "found", "brain", "fully", "pen", "worth", "race", + "stand", "nodded", "whenever", "surrounded", "industrial", "skin", + "this", "direction", "family", "beginning", "whenever", "held", + "metal", "year", "like", "valuable", "softly", "whistle", + "perfectly", "broken", "idea", "also", "coffee", "branch", + "tongue", "immediately", "bent", "partly", "burn", "include", + "certain", "burst", "final", "smoke", "positive", "perfectly" + }; + uniform_int_distribution<> uniform_random_words = + uniform_int_distribution<>(0, random_words.size() - 1); + uniform_int_distribution<> random_byte = + uniform_int_distribution<>('!', '~'); + std::map data; + + std::string random_string(uint32_t msg_size, uint32_t compress_target) + { + std::string result; + uint32_t s = 0; // accumulated message + uint32_t c = 0; // estimated compressed size + // pick words + while (s < msg_size && (msg_size - s) > (compress_target - c)) { + uint32_t idx = uniform_random_words(rng); + result += random_words[idx]; + s += random_words[idx].length(); + c += 2; //~1 = random byte, ~2 = ref message + } + // fillup with full random bytes + while (s < msg_size) { + result.push_back(random_byte(rng)); + s++; + } + return result; + } + + struct DataToDB { + KeyValueDB* db; + std::string prefix; + KeyValueDB::Transaction t; + uint32_t cnt = 0; + DataToDB(KeyValueDB* db, const std::string& prefix) + : db(db), prefix(prefix) + { + t = db->get_transaction(); + } + void add(const std::string& key, const std::string& value) + { + bufferlist v; + v.append(value); + t->set(prefix, key, v); + cnt++; + if ((cnt % 1000) == 0) { + ASSERT_EQ(db->submit_transaction_sync(t), 0); + t.reset(); + t = db->get_transaction(); + } + } + void rm(const std::string& key) + { + t->rmkey(prefix, key); + cnt++; + if ((cnt % 1000) == 0) { + ASSERT_EQ(db->submit_transaction_sync(t), 0); + t.reset(); + t = db->get_transaction(); + } + } + void flush() + { + ASSERT_EQ(db->submit_transaction_sync(t), 0); + t.reset(); + t = db->get_transaction(); + } + }; + + struct TreeGen { + uint32_t left_w; + uint32_t right_w; + std::string left_name; + std::string right_name; + bool sub_tree_branch(int& n) const + { + uint32_t count_left = (n * left_w) / (left_w + right_w); + uint32_t rem = (n * left_w) % (left_w + right_w); + uint32_t count_right = n - count_left; + if (rem >= right_w) { + n = count_left; + return false; //left + } else { + n = count_right - 1; //right + return true; + } + } + std::string get_branch_name(int n) const + { + std::string result; + while (n > 0) { + bool do_right = sub_tree_branch(n); + if (do_right) + result.append(right_name); + else + result.append(left_name); + } + return result; + } + TreeGen( + uint32_t left_weight, + uint32_t right_weight, + const std::string& left_name, + const std::string& right_name) + : left_w(left_weight) + , right_w(right_weight) + , left_name(left_name) + , right_name(right_name) + { + if (left_weight < right_weight) { + std::swap(this->left_name, this->right_name); + std::swap(left_weight, right_weight); + } + } + }; + + void grow_tree( + const TreeGen& t, DataToDB& data_to_db, + uint32_t key_from, uint32_t key_to, + const std::string& key_tail, + uint32_t value_size, uint32_t value_comp_size) + { + for(uint32_t i = key_from; i < key_to; i++) { + std::string key = t.get_branch_name(i) + key_tail; + std::string value = random_string(value_size, value_comp_size); + data_to_db.add(key, value); + } + data_to_db.flush(); + } + + void prune_tree( + const TreeGen& t, DataToDB& data_to_db, + uint32_t key_from, uint32_t key_to, + const std::string& key_tail) + { + for(uint32_t i = key_from; i < key_to; i++) { + std::string key = t.get_branch_name(i) + key_tail; + data_to_db.rm(key); + } + data_to_db.flush(); + } + + std::pair get_split_quality( + const std::string& prefix, + const std::vector& chunks) + { + uint64_t all_keys = 0; + uint64_t max_keys = 0; + KeyValueDB::Iterator it; + it = db->get_iterator(prefix); + if (verbose) std::cout << "keys_in_shards: "; + for (const auto& c : chunks) { + uint64_t keys = 0; + it->lower_bound(c.first_key); + while(it->valid() && it->key() < c.upper_bound) { + keys++; + all_keys++; + it->next(); + } + if (verbose) std::cout << keys << " " << std::flush; + if (max_keys < keys) { + max_keys = keys; + } + } + if (verbose) std::cout << std::endl; + return make_pair(all_keys, max_keys); + } + + void scan_split_quality( + const std::string& prefix) + { + if (!verbose) return; + std::vector chunks; + auto c_set = {1, 3, 5, 10, 20, 30}; + auto p_set = {0.2, 0.1, 0.05, 0.02, 0.01}; + std::cout << fmt::format("{:6}",""); + for (uint32_t c: c_set) { + std::cout << fmt::format("{:10}",c); + } + std::cout << std::endl; + for (double prec: p_set) { + std::cout << fmt::format("{:5.2f} ",prec); + for (uint32_t c: c_set) { + utime_t start = ceph_clock_now(); + db->util_divide_key_range( + prefix, "", std::string(20, '\377'), + c, 1000000, prec, chunks); + utime_t end = ceph_clock_now(); + std::cout << fmt::format("{:7.3f}/{:2}", double(end - start) * 1000, chunks.size()); + } + std::cout << std::endl; + } + } +}; + +TEST_F(RocksDBSplitRange, grow_tree) { + ASSERT_EQ(0, db->create_and_open(cout, "O")); + + TreeGen t(70, 30, "l", "r"); + DataToDB to_db(db.get(), "O"); + vector keys_to_test = {0, 1000, 10000, 100000, 1'000'000, 2'000'000, 5'000'000, 10'000'000}; + if (is_jenkins()) keys_to_test = {1'000'000}; + uint64_t keys = 0; + for (auto keys_end : keys_to_test) { + if (verbose) std::cout << "grow " << keys << "->" << keys_end << std::endl; + grow_tree(t, to_db, keys, keys_end, "a", 1000, 300); + keys = keys_end; + keys_end = keys_end * 2; + + std::vector chunks; + utime_t start = ceph_clock_now(); + db->util_divide_key_range( + "O", "", "", + 10, 10000000, 0.1, chunks); + utime_t end = ceph_clock_now(); + if (verbose) std::cout << "time=" << end - start << std::endl; + auto [all_keys, max_keys] = get_split_quality("O", chunks); + double speedup = double(all_keys) / max_keys; + if (verbose) std::cout << "quality=" << double(all_keys) / max_keys / chunks.size() + << " speedup=" << speedup << std::endl; + EXPECT_EQ(all_keys, keys); + if (all_keys > 1000000) { + EXPECT_GT(speedup, 8); + } + scan_split_quality("O"); + } + db->close(); +} + +TEST_F(RocksDBSplitRange, grow_tree_high_quality) { + ASSERT_EQ(0, db->create_and_open(cout, "O")); + + TreeGen t(70, 30, "l", "r"); + DataToDB to_db(db.get(), "O"); + vector keys_to_test = {0, 1000, 10000, 100000, 1'000'000, 2'000'000, 5'000'000, 10'000'000}; + if (is_jenkins()) keys_to_test = {1'000'000}; + uint64_t keys = 0; + for (auto keys_end : keys_to_test) { + if (verbose) std::cout << "grow " << keys << "->" << keys_end << std::endl; + grow_tree(t, to_db, keys, keys_end, "a", 1000, 300); + keys = keys_end; + keys_end = keys_end * 2; + + std::vector chunks; + db->util_divide_key_range( + "O", "", "", + 10, 10000000, 0.02, chunks); + auto [all_keys, max_keys] = get_split_quality("O", chunks); + double speedup = double(all_keys) / max_keys; + if (verbose) std::cout << "quality=" << double(all_keys) / max_keys / chunks.size() + << " speedup=" << speedup << std::endl; + EXPECT_EQ(all_keys, keys); + if (all_keys > 1000000) { + EXPECT_GT(speedup, 8); + } + scan_split_quality("O"); + } + db->close(); +} + +TEST_F(RocksDBSplitRange, grow_two_trees) { + ASSERT_EQ(0, db->create_and_open(cout, "O")); + + TreeGen t1(70, 30, "left", "right"); + TreeGen t2(60, 40, "left", "right"); + DataToDB to_db(db.get(), "O"); + vector keys_to_test = {0, 1000, 10000, 100000, 1'000'000, 2'000'000, 5'000'000, 10'000'000}; + if (is_jenkins()) keys_to_test = {1'000'000}; + uint64_t keys = 0; + for (auto keys_end : keys_to_test) { + if (verbose) std::cout << "grow " << keys << "->" << keys_end << std::endl; + grow_tree(t1, to_db, keys, keys_end, "a", 1000, 300); + grow_tree(t2, to_db, keys, keys_end, "b", 500, 150); + keys = keys_end; + keys_end = keys_end * 2; + + std::vector chunks; + db->util_divide_key_range( + "O", "", "", + 10, 10000000, 0.1, chunks); + auto [all_keys, max_keys] = get_split_quality("O", chunks); + double speedup = double(all_keys) / max_keys; + if (verbose) std::cout << "quality=" << double(all_keys) / max_keys / chunks.size() + << " speedup=" << speedup << std::endl; + EXPECT_EQ(all_keys, keys * 2); + if (all_keys > 1000000) { + EXPECT_GT(speedup, 5); + } + scan_split_quality("O"); + } + db->close(); +} + +TEST_F(RocksDBSplitRange, grow_prune_tree) { + ASSERT_EQ(0, db->create_and_open(cout, "O")); + + TreeGen t(70, 30, "l", "r"); + DataToDB to_db(db.get(), "O"); + vector keys_to_test = {0, 1000, 10000, 100000, 1'000'000, 2'000'000, 5'000'000, 10'000'000}; + if (is_jenkins()) keys_to_test = {1'000'000}; + uint64_t keys = 0; + uint64_t pruned_keys = 0; + for (auto keys_end : keys_to_test) { + if (verbose) std::cout << "grow " << keys << "->" << keys_end << std::endl; + grow_tree(t, to_db, keys, keys_end, "a", 1000, 300); + if (verbose) std::cout << "prune " << pruned_keys << "->" << keys / 2 << std::endl; + prune_tree(t, to_db, pruned_keys, keys / 2, "a"); + pruned_keys = keys / 2; + keys = keys_end; + keys_end = keys_end * 2; + + std::vector chunks; + db->util_divide_key_range( + "O", "", "", + 10, 10000000, 0.1, chunks); + auto [all_keys, max_keys] = get_split_quality("O", chunks); + double speedup = double(all_keys) / max_keys; + if (verbose) std::cout << "quality=" << double(all_keys) / max_keys / chunks.size() + << " speedup=" << speedup << std::endl; + EXPECT_EQ(all_keys, keys - pruned_keys); + if (all_keys > 1000000) { + EXPECT_GT(speedup, 5); + } + scan_split_quality("O"); + } + db->close(); +} + +TEST_F(RocksDBSplitRange, grow_tree_small_leaves) { + ASSERT_EQ(0, db->create_and_open(cout, "O")); + + TreeGen t(7, 3, "0", "1"); + DataToDB to_db(db.get(), "O"); + vector keys_to_test = { + 0, 1000, 10000, 100000, 1'000'000, 2'000'000, 5'000'000, 10'000'000, 20'000'000, 50'000'000}; + if (is_jenkins()) keys_to_test = {2'500'000}; + + uint64_t keys = 0; + for (auto keys_end : keys_to_test) { + if (verbose) std::cout << "grow " << keys << "->" << keys_end << std::endl; + grow_tree(t, to_db, keys, keys_end, "aaaa", 100, 30); + keys = keys_end; + keys_end = keys_end * 2; + + std::vector chunks; + db->util_divide_key_range( + "O", "", "", + 10, 10000000, 0.05, chunks); + auto [all_keys, max_keys] = get_split_quality("O", chunks); + double speedup = double(all_keys) / max_keys; + if (verbose) std::cout << "quality=" << double(all_keys) / max_keys / chunks.size() + << " speedup=" << speedup << std::endl; + EXPECT_EQ(all_keys, keys); + if (all_keys > 2000000) { + EXPECT_GT(speedup, 6); + } + scan_split_quality("O"); + } + db->close(); +} + INSTANTIATE_TEST_SUITE_P( KeyValueDB,