From: Ronen Friedman Date: Mon, 29 Sep 2025 07:16:13 +0000 (-0500) Subject: common: ModeCollector: locating the value of the mode X-Git-Tag: testing/wip-vshankar-testing-20260219.125903~6^2~12^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=3efcdbf92fb29932d17e7ddca49bf6a9105bb3aa;p=ceph-ci.git common: ModeCollector: locating the value of the mode The ModeCollector class is used to collect values of some type 'key', each associated with some object identified by an 'ID'. The collector reports the 'mode' value - the value associated with the largest number of distinct IDs. The results structure returned by the collector specifies one of three possible mode_status_t values: - no_mode_value - No clear victory for any value - mode_value - we have a winner, but it has less than half of the samples - authorative_value - more than half of the samples are of the same value Signed-off-by: Ronen Friedman --- diff --git a/src/common/mode_collector.h b/src/common/mode_collector.h new file mode 100644 index 00000000000..1d60d263265 --- /dev/null +++ b/src/common/mode_collector.h @@ -0,0 +1,174 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +#pragma once +/** + * \file A (small) container for fast mode lookups + * ('mode' here is the statistical mode of a set of values, i.e. the + * value that appears most frequently in the set). + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +/** + * ModeCollector is designed to collect a set of values (e.g. - the data digest + * reported by each replica), associating each value with an object ID (in our + * example - the replica ID), and efficiently finding the mode (the value that + * appears most frequently) of the collected values. + * + * The template parameters are: + * - OBJ_ID: The type of the object ID (e.g., replica ID). + * - K: The type of the value being collected. + * - HSH: The hash function for K, to be used with the unordered_map. + * Note: if HSH is std::identity, then K must fit in size_t. + * - MAX_ELEM is used to calculate the estimated memory footprint of the + * unordered_map. + * + * ModeCollector uses a monotonic buffer resource to manage memory + * efficiently, avoiding frequent allocations and deallocations. + * My tests (see link for details and caveats) show that using the PMR + * allocator speeds up the mode-finding process by 20% to 40%. + */ + +struct ModeFinder { + + /// a 'non-templated' version of mode_status_t, to simplify usage. + enum class mode_status_t { + no_mode_value, ///< No clear victory for any value + mode_value, ///< we have a winner, but it appears in less than half + ///< of the samples + authorative_value ///< more than half of the samples are of the same value + }; +}; + +// note the use of std::identity: it's a pretty fast hash function, +// but we are restricted to size_t sized keys (per stdlib implementation +// of the unrdered map). + +template < + typename OBJ_ID, ///< how to identify the object that reported a value + typename K, ///< the type of the value being collected + typename HSH = std::identity, ///< the hash function for K + int MAX_ELEM = 12> + requires( + std::invocable && + sizeof(std::invoke_result_t) <= sizeof(size_t)) +class ModeCollector : public ModeFinder { + private: + struct node_type_t { + size_t m_count{0}; + OBJ_ID m_id; ///< Stores the object ID associated with this value + }; + + // estimated (upper limit) memory footprint of the unordered_map + // vvvvvvvvvvvvvvvvvvvvvvvvvvvv + // Bucket array: typically 2x num_elements for good load factor + static const size_t bucket_array_size = (MAX_ELEM * 2) * sizeof(void*); + // Node storage: each elem needs hash + next-ptr + static constexpr size_t node_overhead = sizeof(void*) + sizeof(size_t); + static constexpr size_t node_storage = + MAX_ELEM * (sizeof(K) + sizeof(node_type_t) + node_overhead); + // PMR allocator overhead (alignment, bookkeeping) + static constexpr size_t pmr_overhead_per_alloc = 16; // typical + // bucket array + nodes + static constexpr size_t total_overhead = pmr_overhead_per_alloc * 2; + static constexpr size_t m_estimated_memory_footprint = + bucket_array_size + node_storage + total_overhead; + // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + std::array m_buffer; + std::pmr::monotonic_buffer_resource m_mbr{m_buffer.data(), m_buffer.size()}; + + /// Map to store the occurrence count of each value + std::pmr::unordered_map< + K, + node_type_t, + HSH, + std::equal_to > + m_frequency_map; + + /// Actual count of elements added + size_t m_actual_count{0}; + + public: + using mode_status_t = ModeFinder::mode_status_t; + + struct results_t { + /// do we have a mode value? + mode_status_t tag; + /// the mode value (if any) + K key; + /// an object ID, "arbitrary" selected from the set of objects that + /// reported the mode value + OBJ_ID id; + /// the number of times the mode value was reported + size_t count; + auto operator<=>(const results_t& rhs) const = default; + }; + + explicit ModeCollector() : m_frequency_map(&m_mbr) + { + m_frequency_map.reserve(MAX_ELEM); + } + + /// Add a value to the collector + void insert(const OBJ_ID& obj, const K& value) noexcept + { + auto& node = m_frequency_map[value]; + node.m_count++; + // Store the object ID associated with this value + // (note: it's OK to overwrite the ID here) + node.m_id = obj; + m_actual_count++; + } + + + /** + * Find the mode of the collected values + * + * Note: we are losing ~4% performance due to find_mode() not being noexcept. + */ + results_t find_mode() + { + assert(!m_frequency_map.empty()); + + auto max_elem = std::ranges::max_element( + m_frequency_map, {}, + [](const auto& pair) { return pair.second.m_count; }); + + // Check for clear victory + if (max_elem->second.m_count > m_actual_count / 2) { + return { + mode_status_t::authorative_value, max_elem->first, + max_elem->second.m_id, max_elem->second.m_count}; + } + + // Check for possible ties + const auto max_elem_cnt = max_elem->second.m_count; + + max_elem->second.m_count = 0; // Reset the count of the max element + const auto second_best_elem = std::ranges::max_element( + m_frequency_map, {}, + [](const auto& pair) { return pair.second.m_count; }); + max_elem->second.m_count = max_elem_cnt; // Restore the count + + if (second_best_elem->second.m_count == max_elem_cnt) { + return { + mode_status_t::no_mode_value, max_elem->first, max_elem->second.m_id, + max_elem_cnt}; + } + + return { + mode_status_t::mode_value, max_elem->first, max_elem->second.m_id, + max_elem_cnt}; + } +}; + diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt index be950b70b51..444d1e29675 100644 --- a/src/test/CMakeLists.txt +++ b/src/test/CMakeLists.txt @@ -1013,6 +1013,12 @@ add_executable(unittest_not_before_queue add_ceph_unittest(unittest_not_before_queue) target_link_libraries(unittest_not_before_queue ceph-common) +# unittest_mode_collector +add_executable(unittest_mode_collector + test_mode_collector.cc) +add_ceph_unittest(unittest_mode_collector) +target_link_libraries(unittest_mode_collector ceph-common) + if(NOT WIN32) # unittest_on_exit add_executable(unittest_on_exit diff --git a/src/test/test_mode_collector.cc b/src/test/test_mode_collector.cc new file mode 100644 index 00000000000..6c9a1e467e7 --- /dev/null +++ b/src/test/test_mode_collector.cc @@ -0,0 +1,462 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include +#include +#include + +#include "common/mode_collector.h" +#include "gtest/gtest.h" + +using mode_status_t = ModeFinder::mode_status_t; + +// --- some Ceph structures look-alike --- + +struct shard_id_t { + int8_t id; + + shard_id_t() : id(0) {} + explicit constexpr shard_id_t(int8_t _id) : id(_id) {} + + explicit constexpr operator int8_t() const { return id; } + explicit constexpr operator int64_t() const { return id; } + explicit constexpr operator int() const { return id; } + explicit constexpr operator unsigned() const { return id; } + + const static shard_id_t NO_SHARD; + + static void generate_test_instances(std::list& ls) + { + ls.push_back(new shard_id_t(1)); + ls.push_back(new shard_id_t(2)); + } + + shard_id_t& operator++() + { + ++id; + return *this; + } + friend constexpr std::strong_ordering operator<=>( + const shard_id_t& lhs, + const shard_id_t& rhs) + { + return lhs.id <=> rhs.id; + } + + friend constexpr std::strong_ordering operator<=>( + int lhs, + const shard_id_t& rhs) + { + return lhs <=> rhs.id; + } + friend constexpr std::strong_ordering operator<=>( + const shard_id_t& lhs, + int rhs) + { + return lhs.id <=> rhs; + } + + shard_id_t& operator=(int other) + { + id = other; + return *this; + } + bool operator==(const shard_id_t& other) const { return id == other.id; } + + shard_id_t operator+(int other) const { return shard_id_t(id + other); } + shard_id_t operator-(int other) const { return shard_id_t(id - other); } +}; + + +struct pg_shard_t { + static const int32_t NO_OSD = 0x7fffffff; + int32_t osd; + shard_id_t shard; + pg_shard_t() : osd(-1), shard(shard_id_t::NO_SHARD) {} + explicit pg_shard_t(int osd) : osd(osd), shard(shard_id_t::NO_SHARD) {} + pg_shard_t(int osd, shard_id_t shard) : osd(osd), shard(shard) {} + bool is_undefined() const { return osd == -1; } + std::string get_osd() const + { + return (osd == NO_OSD ? "NONE" : std::to_string(osd)); + } + static void generate_test_instances(std::list& o) + { + o.push_back(new pg_shard_t); + o.push_back(new pg_shard_t(1)); + o.push_back(new pg_shard_t(1, shard_id_t(2))); + } + auto operator<=>(const pg_shard_t&) const = default; +}; + +struct ceph_eversion { + uint32_t epoch; // note: originally 'le'32_t + uint64_t version; +} __attribute__((packed)); + +typedef uint64_t version_t; +typedef uint32_t + epoch_t; // map epoch (32bits -> 13 epochs/second for 10 years) + + +class eversion_t { + public: + version_t version; + epoch_t epoch; + __u32 __pad; + eversion_t() : version(0), epoch(0), __pad(0) {} + eversion_t(epoch_t e, version_t v) : version(v), epoch(e), __pad(0) {} + + // cppcheck-suppress noExplicitConstructor + eversion_t(const ceph_eversion& ce) + : version(ce.version) + , epoch(ce.epoch) + , __pad(0) + {} + + + static const eversion_t& max() + { + static const eversion_t max(-1, -1); + return max; + } + + operator ceph_eversion() + { + ceph_eversion c; + c.epoch = epoch; + c.version = version; + return c; + } + + static std::list generate_test_instances() + { + std::list o; + o.emplace_back(); + o.push_back(eversion_t(1, 2)); + return o; + } +}; + +inline bool operator==(const eversion_t& l, const eversion_t& r) +{ + return (l.epoch == r.epoch) && (l.version == r.version); +} +inline bool operator!=(const eversion_t& l, const eversion_t& r) +{ + return (l.epoch != r.epoch) || (l.version != r.version); +} +inline bool operator<(const eversion_t& l, const eversion_t& r) +{ + return (l.epoch == r.epoch) ? (l.version < r.version) : (l.epoch < r.epoch); +} +inline bool operator<=(const eversion_t& l, const eversion_t& r) +{ + return (l.epoch == r.epoch) ? (l.version <= r.version) : (l.epoch <= r.epoch); +} +inline bool operator>(const eversion_t& l, const eversion_t& r) +{ + return (l.epoch == r.epoch) ? (l.version > r.version) : (l.epoch > r.epoch); +} +inline bool operator>=(const eversion_t& l, const eversion_t& r) +{ + return (l.epoch == r.epoch) ? (l.version >= r.version) : (l.epoch >= r.epoch); +} +inline std::ostream& operator<<(std::ostream& out, const eversion_t& e) +{ + return out << e.epoch << "'" << e.version; +} + +namespace std { +template <> +struct hash { + size_t operator()(const eversion_t& ev) const noexcept + { + // Combine epoch and version with a simple shift-based mix + // This is fast and works well when differences are small + return (size_t)ev.epoch ^ ((size_t)ev.version << 8 | + (size_t)ev.version >> (sizeof(size_t) * 8 - 8)); + } +}; +} // namespace std + +// --------------------------------------------------------------------------- +// using uint64_t as key and object ID +using MP_u64_u64_ident = ModeCollector; + +// using pg_shard_t as object ID, and eversion_t (w/ std::hash) as key +using MP_pg_shard_t_eversion_t = + ModeCollector>; + + +template +struct ModeCollectorTestB : public ::testing::Test { + T mc; + + void collect(const DT& test_case) + { + for (const auto& [key, value] : test_case.data) { + mc.insert(key, value); + } + } + + void verify(const DT& test_case) + { + const auto r = mc.find_mode(); + EXPECT_EQ(r.tag, test_case.expected.tag); + if (r.tag == mode_status_t::authorative_value) { + EXPECT_EQ(r.key, test_case.expected.key); + EXPECT_EQ(r.count, test_case.expected.count); + // object ID is "arbitrary" - just check it is one of the expected ones + bool found = false; + for (const auto& [k, v] : test_case.data) { + if (v == r.key && k == test_case.expected.id) { + found = true; + break; + } + } + EXPECT_TRUE(found); + } + } + + void expect_wrong_ID(const DT& test_case) + { + const auto r = mc.find_mode(); + EXPECT_EQ(r.tag, test_case.expected.tag); + if (r.tag == mode_status_t::authorative_value) { + EXPECT_EQ(r.key, test_case.expected.key); + EXPECT_EQ(r.count, test_case.expected.count); + // object ID is "arbitrary" - just check it is one of the expected ones + bool found = false; + for (const auto& [k, v] : test_case.data) { + if (v == r.key && k == test_case.expected.id) { + found = true; + break; + } + } + EXPECT_FALSE(found); + } + } +}; + + +// --------------------------------------------------------------------------- +// basic tests - simple keys and objects + +struct data_n_res_t { + std::vector> data; + MP_u64_u64_ident::results_t expected; +}; + + +using ModeCollectorTest = ModeCollectorTestB; + + +TEST_F(ModeCollectorTest, basic_ints_1) +{ + static const std::vector> test_data{ + std::pair(1000, 101), std::pair(1001, 2), std::pair(1002, 101), + std::pair(1003, 3), std::pair(1004, 101), + }; + + static const data_n_res_t test_case{ + test_data, MP_u64_u64_ident::results_t{ + mode_status_t::authorative_value, 101, 1004, 3}}; + + collect(test_case); + verify(test_case); +} + +TEST_F(ModeCollectorTest, basic_ints_2) +{ + static const std::vector> test_data{ + std::pair(1000, 101), std::pair(1001, 2), std::pair(1002, 101), + std::pair(1003, 2), std::pair(1004, 101), std::pair(1005, 2), + }; + + static const data_n_res_t test_case{ + test_data, MP_u64_u64_ident::results_t{ + MP_u64_u64_ident::mode_status_t::no_mode_value, 0, 0, 0}}; + + collect(test_case); + verify(test_case); +} + +TEST_F(ModeCollectorTest, basic_ints_3) +{ + static const std::vector> test_data{ + std::pair(1000, 101), std::pair(1001, 2), std::pair(1002, 101), + std::pair(1003, 3), std::pair(1004, 4), std::pair(1005, 5), + }; + + static const data_n_res_t test_case{ + test_data, + MP_u64_u64_ident::results_t{ + MP_u64_u64_ident::mode_status_t::mode_value, 101, 1002, 2}}; + + collect(test_case); + verify(test_case); +} + +TEST_F(ModeCollectorTest, basic_ints_4) +{ + static const std::vector> test_data{ + std::pair(1000, 10), std::pair(1001, 11), std::pair(1002, 12), + std::pair(1003, 13), std::pair(1004, 14), std::pair(1005, 15), + }; + + static const data_n_res_t test_case{ + test_data, MP_u64_u64_ident::results_t{ + MP_u64_u64_ident::mode_status_t::no_mode_value, 0, 0, 0}}; + + collect(test_case); + verify(test_case); +} + +// edge cases + +TEST_F(ModeCollectorTest, edge_ints_smallest) +{ + static const std::vector> test_data{ + std::pair(1000, 99), + }; + + static const data_n_res_t test_case{ + test_data, MP_u64_u64_ident::results_t{ + mode_status_t::authorative_value, 99, 1000, 1}}; + + collect(test_case); + verify(test_case); +} + +TEST_F(ModeCollectorTest, edge_ints_pair) +{ + static const std::vector> test_data{ + std::pair(1000, 99), + std::pair(1001, 55), + }; + + static const data_n_res_t test_case{ + test_data, + MP_u64_u64_ident::results_t{mode_status_t::no_mode_value, 0, 0, 0}}; + + collect(test_case); + verify(test_case); +} + + +// --------------------------------------------------------------------------- +// shards & versions + +struct data_n_res_t_2 { + std::vector> data; + MP_pg_shard_t_eversion_t::results_t expected; +}; + +using ModeCollectorTest_2 = + ModeCollectorTestB; + + +TEST_F(ModeCollectorTest_2, basic_ev_1) +{ + static const std::vector> test_data{ + std::pair(pg_shard_t{1}, eversion_t{1001, 1001}), + std::pair(pg_shard_t{2}, eversion_t{1002, 1002}), + std::pair(pg_shard_t{3}, eversion_t{1001, 1001}), + std::pair(pg_shard_t{4}, eversion_t{1001, 1001}), + std::pair(pg_shard_t{5}, eversion_t{1005, 1005}), + }; + + static const data_n_res_t_2 test_case{ + test_data, MP_pg_shard_t_eversion_t::results_t{ + mode_status_t::authorative_value, eversion_t{1001, 1001}, + pg_shard_t{1}, 3}}; + + collect(test_case); + verify(test_case); +} + +TEST_F(ModeCollectorTest_2, large_ev_1) +{ + static const std::vector> test_data{ + std::pair(pg_shard_t{1}, eversion_t{1001, 1001}), + std::pair(pg_shard_t{2}, eversion_t{1002, 1002}), + std::pair(pg_shard_t{3}, eversion_t{1001, 1001}), + std::pair(pg_shard_t{4}, eversion_t{1001, 1001}), + std::pair(pg_shard_t{5}, eversion_t{1005, 1005}), + std::pair(pg_shard_t{6}, eversion_t{1001, 1001}), + std::pair(pg_shard_t{7}, eversion_t{1002, 1002}), + std::pair(pg_shard_t{8}, eversion_t{1001, 1001}), + std::pair(pg_shard_t{9}, eversion_t{1001, 1001}), + std::pair(pg_shard_t{10}, eversion_t{1005, 1005}), + std::pair(pg_shard_t{11}, eversion_t{1005, 1005}), + std::pair(pg_shard_t{12}, eversion_t{1012, 1012}), + std::pair(pg_shard_t{13}, eversion_t{1013, 1013}), + std::pair(pg_shard_t{14}, eversion_t{1014, 1014}), + }; + + static const data_n_res_t_2 test_case{ + test_data, + MP_pg_shard_t_eversion_t::results_t{ + mode_status_t::mode_value, eversion_t{1001, 1001}, pg_shard_t{1}, 3}}; + + collect(test_case); + verify(test_case); +} + + +TEST_F(ModeCollectorTest_2, large_ev_2) +{ + static const std::vector> test_data{ + std::pair(pg_shard_t{1}, eversion_t{1001, 1001}), + std::pair(pg_shard_t{2}, eversion_t{1002, 1002}), + std::pair(pg_shard_t{3}, eversion_t{1001, 1001}), + std::pair(pg_shard_t{4}, eversion_t{1001, 1001}), + std::pair(pg_shard_t{5}, eversion_t{1005, 1005}), + std::pair(pg_shard_t{6}, eversion_t{1001, 1001}), + std::pair(pg_shard_t{7}, eversion_t{1002, 1002}), + std::pair(pg_shard_t{8}, eversion_t{1001, 1001}), + std::pair(pg_shard_t{9}, eversion_t{1001, 1001}), + std::pair(pg_shard_t{10}, eversion_t{1005, 1005}), + std::pair(pg_shard_t{11}, eversion_t{1005, 1005}), + std::pair(pg_shard_t{12}, eversion_t{1001, 1001}), + std::pair(pg_shard_t{13}, eversion_t{1001, 1001}), + std::pair(pg_shard_t{14}, eversion_t{1014, 1014}), + }; + + static const data_n_res_t_2 test_case{ + test_data, MP_pg_shard_t_eversion_t::results_t{ + mode_status_t::authorative_value, eversion_t{1001, 1001}, + pg_shard_t{1}, 8}}; + + collect(test_case); + verify(test_case); +} + +TEST_F(ModeCollectorTest_2, failedID_ev) +{ + static const std::vector> test_data{ + std::pair(pg_shard_t{1}, eversion_t{1001, 1001}), + std::pair(pg_shard_t{2}, eversion_t{1002, 1002}), + std::pair(pg_shard_t{3}, eversion_t{1001, 1001}), + std::pair(pg_shard_t{4}, eversion_t{1001, 1001}), + std::pair(pg_shard_t{5}, eversion_t{1005, 1005}), + std::pair(pg_shard_t{6}, eversion_t{1001, 1001}), + std::pair(pg_shard_t{7}, eversion_t{1002, 1002}), + std::pair(pg_shard_t{8}, eversion_t{1001, 1001}), + std::pair(pg_shard_t{9}, eversion_t{1001, 1001}), + std::pair(pg_shard_t{10}, eversion_t{1005, 1005}), + std::pair(pg_shard_t{11}, eversion_t{1005, 1005}), + std::pair(pg_shard_t{12}, eversion_t{1001, 1001}), + std::pair(pg_shard_t{13}, eversion_t{1001, 1001}), + std::pair(pg_shard_t{14}, eversion_t{1014, 1014}), + }; + + static const data_n_res_t_2 test_case{ + test_data, MP_pg_shard_t_eversion_t::results_t{ + mode_status_t::authorative_value, eversion_t{1001, 1001}, + pg_shard_t{14}, 8}}; + + collect(test_case); + expect_wrong_ID(test_case); +}