]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
test/osd: Add extensive new PGBackend test harness.
authorAlex Ainscow <aainscow@uk.ibm.com>
Sun, 1 Mar 2026 22:33:52 +0000 (22:33 +0000)
committerAlex Ainscow <aainscow@uk.ibm.com>
Tue, 24 Mar 2026 17:33:55 +0000 (17:33 +0000)
The exists unittest harness for EC were limited to the functionality
that did not require bluestore, messaging or other infrastructure.
This significantly limited the testing possible in this framework.

This test harness provides this missing infrastructure:

- Multiple OSDs
- Messaging
- Single threaded event loop for consistency.
- Event loop can be single stepped or intercepted.

New test infrastructure files:
- ECPeeringTestFixture.cc/h: Test fixture for EC peering tests
- PGBackendTestFixture.cc/h: Test fixture for PGBackend tests
- EventLoop.h: Simple event loop for test message dispatch
- OSDMapTestHelpers.h: Helper functions for OSDMap test setup

New test files:
- TestBackendBasics.cc: Basic PGBackend unit tests
- TestECFailoverWithPeering.cc: EC failover tests with full peering

Signed-off-by: Alex Ainscow <aainscow@uk.ibm.com>
19 files changed:
src/test/osd/CMakeLists.txt
src/test/osd/ECPeeringTestFixture.cc [new file with mode: 0644]
src/test/osd/ECPeeringTestFixture.h [new file with mode: 0644]
src/test/osd/EventLoop.h [new file with mode: 0644]
src/test/osd/MockConnection.h
src/test/osd/MockECReadPred.h
src/test/osd/MockECRecPred.h
src/test/osd/MockLog.h [deleted file]
src/test/osd/MockPGBackend.h
src/test/osd/MockPGBackendListener.h
src/test/osd/MockPGLogEntryHandler.h
src/test/osd/MockPeeringListener.h
src/test/osd/OSDMapTestHelpers.h [new file with mode: 0644]
src/test/osd/PGBackendTestFixture.cc [new file with mode: 0644]
src/test/osd/PGBackendTestFixture.h [new file with mode: 0644]
src/test/osd/TestBackendBasics.cc [new file with mode: 0644]
src/test/osd/TestCommon.h [new file with mode: 0644]
src/test/osd/TestECFailoverWithPeering.cc [new file with mode: 0644]
src/test/osd/TestPeeringState.cc

index f4b59c8c7b5bf73f09ddf6b6fc4d9f22ade12b6b..0b7f7c7dba0aadfcaa8c898e213191c8dc14eeb4 100644 (file)
@@ -81,6 +81,7 @@ target_link_libraries(unittest_ecbackend osd global)
 add_executable(unittest_ecutil
         TestECUtil.cc
         $<TARGET_OBJECTS:unit-main>
+        $<TARGET_OBJECTS:erasure_code_objs>
 )
 add_ceph_unittest(unittest_ecutil)
 target_link_libraries(unittest_ecutil osd global)
@@ -116,6 +117,38 @@ add_executable(unittest_peeringstate
   )
 add_ceph_unittest(unittest_peeringstate)
 target_link_libraries(unittest_peeringstate osd os global ${CMAKE_DL_LIBS} ${BLKID_LIBRARIES})
+# pg_backend_test_fixture: object library for PGBackendTestFixture implementation
+add_library(pg_backend_test_fixture OBJECT
+  PGBackendTestFixture.cc
+)
+target_link_libraries(pg_backend_test_fixture osd os global)
+
+# ec_peering_test_fixture: object library for ECPeeringTestFixture implementation
+add_library(ec_peering_test_fixture OBJECT
+  ECPeeringTestFixture.cc
+)
+target_link_libraries(ec_peering_test_fixture osd os global)
+
+# unittest_backend_basics (replaces unittest_ecbasics + unittest_replicatedbasics)
+add_executable(unittest_backend_basics
+  TestBackendBasics.cc
+  $<TARGET_OBJECTS:unit-main>
+  $<TARGET_OBJECTS:pg_backend_test_fixture>
+  )
+add_ceph_unittest(unittest_backend_basics)
+target_link_libraries(unittest_backend_basics osd os global ${CMAKE_DL_LIBS} ${BLKID_LIBRARIES})
+add_dependencies(unittest_backend_basics ec_isa ec_jerasure)
+# unittest_ecfailover_with_peering
+add_executable(unittest_ecfailover_with_peering
+  TestECFailoverWithPeering.cc
+  $<TARGET_OBJECTS:unit-main>
+  $<TARGET_OBJECTS:store_test_fixture>
+  $<TARGET_OBJECTS:pg_backend_test_fixture>
+  $<TARGET_OBJECTS:ec_peering_test_fixture>
+  )
+add_ceph_unittest(unittest_ecfailover_with_peering)
+target_link_libraries(unittest_ecfailover_with_peering osd os global ${CMAKE_DL_LIBS} ${BLKID_LIBRARIES})
+add_dependencies(unittest_ecfailover_with_peering ec_isa)
 # unittest_hitset
 add_executable(unittest_hitset
   hitset.cc
@@ -178,3 +211,46 @@ add_ceph_unittest(unittest_mclock_scheduler)
 target_link_libraries(unittest_mclock_scheduler
   global osd dmclock os
 )
+
+# osd_unittests: custom target that builds and runs all OSD unit tests
+# Not including unittest_osdmap, as it is slow. It is tested elsewhere.
+set(OSD_UNITTESTS
+  unittest_backend_basics
+  unittest_ec_transaction
+  unittest_ec_transaction_l
+  unittest_ecbackend
+  unittest_ecbackend_l
+  unittest_ecfailover_with_peering
+  unittest_ecutil
+  unittest_extent_cache
+  unittest_extent_cache_l
+  unittest_hitset
+  unittest_mclock_scheduler
+  unittest_osd_osdcap
+  unittest_osd_types
+  unittest_osdscrub
+  unittest_peeringstate
+  unittest_pg_transaction
+  unittest_pglog
+  unittest_scrubber_be
+)
+
+# osd_unittests: build all OSD unit tests and run them via ctest.
+# This is for development convenience only, it is not used as part of make
+# check.
+# Using ctest ensures:
+#   - All tests run even if one fails (--no-tests-on-failure continues)
+#   - Output is shown for failing tests (--output-on-failure)
+#   - Adding a new test only requires adding it to OSD_UNITTESTS above
+#   - Excludes unittest_osdmap because it is relatively slow.
+string(JOIN "|" OSD_UNITTEST_REGEX ${OSD_UNITTESTS})
+add_custom_target(osd_unittests
+  DEPENDS ${OSD_UNITTESTS}
+  COMMAND ${CMAKE_CTEST_COMMAND}
+    --test-dir ${CMAKE_BINARY_DIR}
+    -R "^(${OSD_UNITTEST_REGEX})$"
+    --output-on-failure
+  WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
+  COMMENT "Building and running all OSD unit tests via ctest"
+  VERBATIM
+)
diff --git a/src/test/osd/ECPeeringTestFixture.cc b/src/test/osd/ECPeeringTestFixture.cc
new file mode 100644 (file)
index 0000000..25f031d
--- /dev/null
@@ -0,0 +1,308 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+// vim: ts=8 sw=2 sts=2 expandtab
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2026 IBM
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#include "test/osd/ECPeeringTestFixture.h"
+
+PeeringState* ECPeeringTestFixture::create_peering_state(int shard)
+{
+  const pg_pool_t& pi = get_pool();
+  pg_shard_t pg_whoami(shard, shard_id_t(shard));
+  PGPool pool(osdmap, pool_id, pi, "test_pool");
+
+  shard_dpps[shard] = std::make_unique<ShardDpp>(g_ceph_context, this, shard);
+
+  shard_peering_listeners[shard] = std::make_unique<MockPeeringListener>(
+    osdmap, pool_id, shard_dpps[shard].get(), pg_whoami);
+  shard_peering_listeners[shard]->current_epoch = osdmap->get_epoch();
+
+  shard_peering_listeners[shard]->queue_transaction_callback =
+    [this, shard](ObjectStore::Transaction&& t) -> int {
+      return queue_transaction_helper(shard, std::move(t));
+    };
+
+  // Transfer ownership of the backend listener from the base class listeners[]
+  // map into the peering listener.  The factory (set in our constructor) already
+  // recorded a raw pointer in backend_listeners[] so we know which entry to move.
+  // After the move, listeners[shard] holds a null unique_ptr; TearDown() already
+  // guards against that with "if (list)".
+  shard_peering_listeners[shard]->backend_listener = std::move(listeners[shard]);
+  shard_peering_listeners[shard]->coll = colls[shard];
+  shard_peering_listeners[shard]->ch = chs[shard];
+
+  // Recreate backend with the correct backend_listener pointer.
+  // The MockPeeringListener constructor created backend with the temporary
+  // backend_listener it allocated internally, but we just replaced backend_listener
+  // with the one from the base class listeners[] map.  We must recreate backend
+  // so its parent pointer points to the new backend_listener, not the destroyed one.
+  shard_peering_listeners[shard]->backend = std::make_unique<MockPGBackend>(
+    g_ceph_context,
+    shard_peering_listeners[shard]->backend_listener.get(),
+    nullptr,
+    colls[shard],
+    chs[shard]);
+
+  spg_t spgid(pgid, shard_id_t(shard));
+  auto ps = std::make_unique<PeeringState>(
+    g_ceph_context,
+    pg_whoami,
+    spgid,
+    pool,
+    osdmap,
+    PG_FEATURE_CLASSIC_ALL,
+    shard_dpps[shard].get(),
+    shard_peering_listeners[shard].get());
+
+  shard_peering_listeners[shard]->ps = ps.get();
+  
+  ps->set_backend_predicates(
+    get_is_readable_predicate(),
+    get_is_recoverable_predicate());
+
+  shard_peering_states[shard] = std::move(ps);
+  shard_peering_listeners[shard]->backend_listener->set_peering_state(shard_peering_states[shard].get());
+  shard_peering_ctxs[shard] = std::make_unique<PeeringCtx>();
+
+  return shard_peering_states[shard].get();
+}
+
+void ECPeeringTestFixture::init_peering(bool dne)
+{
+  pg_history_t history;
+  history.same_interval_since = osdmap->get_epoch();
+  history.epoch_pool_created = osdmap->get_epoch();
+  history.last_epoch_clean = osdmap->get_epoch();
+  if (!dne) {
+    history.epoch_created = osdmap->get_epoch();
+  }
+  PastIntervals past_intervals;
+
+  // Get primary from OSDMap using base class pgid member
+  std::vector<int> up_osds, acting_osds;
+  int up_primary = -1, acting_primary = -1;
+  osdmap->pg_to_up_acting_osds(this->pgid, &up_osds, &up_primary, &acting_osds, &acting_primary);
+
+  for (int shard : acting_osds) {
+    ObjectStore::Transaction t;
+    get_peering_state(shard)->init(
+      (shard == acting_primary) ? 0 : 1,  // role
+      up_osds,
+      up_primary,
+      acting_osds,
+      acting_primary,
+      history,
+      past_intervals,
+      t);
+
+    queue_transaction_helper(shard, std::move(t));
+  }
+}
+
+void ECPeeringTestFixture::update_osdmap_with_peering(
+  std::shared_ptr<OSDMap> new_osdmap,
+  std::optional<pg_shard_t> new_primary)
+{
+  OSDMapRef old_osdmap = osdmap;
+
+  update_osdmap(new_osdmap, new_primary);
+
+  // Update peering listeners for ALL shards (even failed ones need epoch updates)
+  for (auto& [shard, listener] : shard_peering_listeners) {
+    listener->current_epoch = new_osdmap->get_epoch();
+  }
+
+  // Get primary from OSDMap for advance_map calls using base class pgid member
+  std::vector<int> up_osds, acting_osds;
+  int up_primary = -1, acting_primary = -1;
+  osdmap->pg_to_up_acting_osds(this->pgid, &up_osds, &up_primary, &acting_osds, &acting_primary);
+
+  // Call advance_map on ALL shards that have peering states, including failed ones
+  // This ensures that failed OSDs are notified of map changes (e.g., primary failover)
+  // Use the newly computed up_osds and acting_osds from the new OSDMap
+  for (auto& [shard, ps] : shard_peering_states) {
+    ps->advance_map(
+      osdmap, old_osdmap, up_osds, up_primary, acting_osds, acting_primary,
+      *get_peering_ctx(shard));
+  }
+
+  // Call activate_map on ALL shards that have peering states
+  // This ensures failed OSDs properly transition state and notify their backends
+  for (auto& [shard, ps] : shard_peering_states) {
+    ps->activate_map(*get_peering_ctx(shard));
+  }
+
+  dispatch_all();
+
+  // Handle up_thru requirements - keep creating new epochs until peering completes.
+  // Note: For primary failover scenarios, full peering may not complete immediately.
+  int max_iterations = 3;
+  do {
+    event_advance_map();
+    event_activate_map();
+  } while (new_epoch(true) && --max_iterations);
+}
+
+bool ECPeeringTestFixture::new_epoch(bool if_required)
+{
+  bool did_work = false;
+  epoch_t e = osdmap->get_epoch();
+  OSDMap::Incremental pending_inc(e + 1);
+  pending_inc.fsid = osdmap->get_fsid();
+
+  // Get acting set from OSDMap
+  std::vector<int> acting_osds;
+  int acting_primary = -1;
+  osdmap->pg_to_acting_osds(this->pgid, &acting_osds, &acting_primary);
+
+  for (int shard : acting_osds) {
+    // Skip failed OSDs (marked as CRUSH_ITEM_NONE)
+    if (shard == CRUSH_ITEM_NONE) {
+      continue;
+    }
+    if (get_peering_state(shard)->get_need_up_thru()) {
+      pending_inc.new_up_thru[shard] = e;
+      did_work = true;
+    }
+  }
+
+  if (acting_primary >= 0) {
+    auto& listener = shard_peering_listeners[acting_primary];
+    if (listener->pg_temp_wanted) {
+      // Get up set from OSDMap
+      std::vector<int> up_osds;
+      int up_primary = -1;
+      osdmap->pg_to_up_acting_osds(this->pgid, &up_osds, &up_primary, nullptr, nullptr);
+      
+      std::vector<int> acting_temp = listener->next_acting;
+      if (acting_temp.empty()) {
+        acting_temp = up_osds;
+      }
+      
+      // Apply the pg_temp change that peering requested.
+      // For EC pools with optimizations, transform to primaryfirst order
+      // (this simulates what the monitor does in production).
+      const pg_pool_t* pool = osdmap->get_pg_pool(this->pgid.pool());
+      std::vector<int> pg_temp_acting = acting_temp;
+      if (pool && pool->allows_ecoptimizations()) {
+        pg_temp_acting = osdmap->pgtemp_primaryfirst(*pool, acting_temp);
+      }
+      
+      pending_inc.new_pg_temp[this->pgid] =
+        mempool::osdmap::vector<int>(pg_temp_acting.begin(), pg_temp_acting.end());
+      
+      listener->pg_temp_wanted = false;
+      did_work = true;
+    }
+  }
+
+  if (!did_work && if_required) {
+    return false;
+  }
+
+  osdmap->apply_incremental(pending_inc);
+
+  for (auto& [shard, listener] : shard_peering_listeners) {
+    listener->current_epoch = osdmap->get_epoch();
+  }
+
+  return true;
+}
+
+void ECPeeringTestFixture::run_peering_cycle()
+{
+  init_peering();
+  event_initialize();
+  dispatch_all();
+  event_advance_map();
+  dispatch_all();
+  event_activate_map();
+  dispatch_all();
+
+  // Handle up_thru requirements - keep creating new epochs until peering completes.
+  int max_iterations = 10;
+  for (int i = 0; i < max_iterations && !all_shards_active(); i++) {
+    if (new_epoch(true)) {
+      event_advance_map();
+      dispatch_all();
+      event_activate_map();
+      dispatch_all();
+    }
+  }
+}
+
+int ECPeeringTestFixture::queue_transaction_helper(int shard, ObjectStore::Transaction&& t)
+{
+  if (t.empty()) {
+    return 0;
+  }
+
+  // Note: Contexts are stolen by MockPGBackendListener::queue_transaction,
+  // so we don't need to call execute_finishers here
+  int result = store->queue_transaction(chs[shard], std::move(t));
+
+  return result;
+}
+
+void ECPeeringTestFixture::mark_osd_down(int osd_id)
+{
+  // Create new OSDMap with the OSD marked as down
+  // This emulates what the real monitor does: just mark the OSD down,
+  // do NOT set pg_temp. Peering will detect the change and request pg_temp.
+  auto new_osdmap = std::make_shared<OSDMap>();
+  new_osdmap->deepish_copy_from(*osdmap);
+  OSDMapTestHelpers::mark_osd_down(new_osdmap, osd_id);
+  
+  update_osdmap_with_peering(new_osdmap);
+  dispatch_all();
+  
+  // Process any pg_temp requests from peering (emulates monitor processing MOSDPGTemp)
+  // This will apply the primaryfirst transformation if needed
+  if (new_epoch(false)) {
+    event_advance_map();
+    dispatch_all();
+  }
+}
+
+void ECPeeringTestFixture::mark_osd_up(int osd_id)
+{
+  // Create new OSDMap with the OSD marked as up using OSDMapTestHelpers
+  auto new_osdmap = std::make_shared<OSDMap>();
+  new_osdmap->deepish_copy_from(*osdmap);
+  OSDMapTestHelpers::mark_osd_up(new_osdmap, osd_id);
+  
+  update_osdmap_with_peering(new_osdmap);
+  dispatch_all();
+}
+
+void ECPeeringTestFixture::mark_osds_down(const std::vector<int>& osd_ids)
+{
+  // Create new OSDMap with all OSDs marked as down using OSDMapTestHelpers
+  auto new_osdmap = std::make_shared<OSDMap>();
+  new_osdmap->deepish_copy_from(*osdmap);
+  OSDMapTestHelpers::mark_osds_down(new_osdmap, osd_ids);
+  
+  update_osdmap_with_peering(new_osdmap);
+  dispatch_all();
+}
+
+void ECPeeringTestFixture::advance_epoch()
+{
+  auto new_osdmap = std::make_shared<OSDMap>();
+  new_osdmap->deepish_copy_from(*osdmap);
+  OSDMapTestHelpers::advance_epoch(new_osdmap);
+  
+  update_osdmap_with_peering(new_osdmap);
+  dispatch_all();
+}
+
diff --git a/src/test/osd/ECPeeringTestFixture.h b/src/test/osd/ECPeeringTestFixture.h
new file mode 100644 (file)
index 0000000..5c33603
--- /dev/null
@@ -0,0 +1,499 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+// vim: ts=8 sw=2 sts=2 expandtab
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2026 IBM
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#pragma once
+
+#include <memory>
+#include <map>
+#include <vector>
+#include "test/osd/PGBackendTestFixture.h"
+#include "test/osd/MockPeeringListener.h"
+#include "test/osd/MockConnection.h"
+#include "test/osd/MockECRecPred.h"
+#include "test/osd/MockECReadPred.h"
+#include "test/osd/OSDMapTestHelpers.h"
+#include "osd/PeeringState.h"
+#include "messages/MOSDPeeringOp.h"
+
+/**
+ * ECPeeringTestFixture - EC test fixture with full peering infrastructure
+ *
+ * This fixture extends PGBackendTestFixture to add full PeeringState support
+ * for each shard, enabling comprehensive testing of EC peering, recovery,
+ * and failover scenarios. It combines the principles from TestPeeringState
+ * with the EC backend infrastructure from PGBackendTestFixture.
+ */
+class ECPeeringTestFixture : public PGBackendTestFixture {
+protected:
+  std::map<int, std::unique_ptr<PeeringState>> shard_peering_states;
+  std::map<int, std::unique_ptr<PeeringCtx>> shard_peering_ctxs;
+  std::map<int, std::unique_ptr<MockPeeringListener>> shard_peering_listeners;
+  
+  std::map<int, std::list<MessageRef>> shard_messages;
+  std::map<int, std::list<PGPeeringEventRef>> shard_events;
+
+  // Raw-pointer map giving this fixture direct access to the backend listeners
+  // created by the listener_factory.  The pointers are valid for the lifetime
+  // of the test because ownership is transferred to
+  // shard_peering_listeners[i]->backend_listener in create_peering_state().
+  std::map<int, MockPGBackendListener*> backend_listeners;
+  
+  class ShardDpp : public NoDoutPrefix {
+  public:
+    ECPeeringTestFixture *fixture;
+    int shard;
+    
+    ShardDpp(CephContext *cct, ECPeeringTestFixture *f, int s)
+      : NoDoutPrefix(cct, ceph_subsys_osd), fixture(f), shard(s) {}
+    
+    std::ostream& gen_prefix(std::ostream& out) const override {
+      out << "shard " << shard << ": ";
+      if (fixture->shard_peering_states.contains(shard)) {
+        PeeringState *ps = fixture->shard_peering_states[shard].get();
+        out << *ps << " ";
+      }
+      return out;
+    }
+  };
+  std::map<int, std::unique_ptr<ShardDpp>> shard_dpps;
+  
+  IsPGRecoverablePredicate *get_is_recoverable_predicate() {
+    return new MockECRecPred(k, m);
+  }
+  
+  IsPGReadablePredicate *get_is_readable_predicate() {
+    return new MockECReadPred(k, m);
+  }
+
+public:
+  ECPeeringTestFixture() : PGBackendTestFixture(PGBackendTestFixture::EC) {
+    // Install a listener_factory so that setup_ec_pool() creates listeners
+    // that we can access directly (via backend_listeners[]) without needing
+    // to steal ownership via release_listener().
+    //
+    // The factory records a raw pointer in backend_listeners[instance] and
+    // returns the unique_ptr to the base class, which stores it in listeners[].
+    // In create_peering_state() we then move that unique_ptr from listeners[]
+    // into shard_peering_listeners[]->backend_listener, at which point the
+    // raw pointer in backend_listeners[] remains valid (owned by the peering
+    // listener for the rest of the test).
+    listener_factory = [this](
+      int instance,
+      std::shared_ptr<OSDMap> om,
+      int64_t pool_id,
+      DoutPrefixProvider* dpp_arg,
+      pg_shard_t whoami) -> std::unique_ptr<MockPGBackendListener>
+    {
+      auto bl = std::make_unique<MockPGBackendListener>(
+        om, pool_id, dpp_arg, whoami);
+      // Record raw pointer so tests can access the listener directly
+      backend_listeners[instance] = bl.get();
+      return bl;
+    };
+  }
+  
+  void SetUp() override {
+    PGBackendTestFixture::SetUp();
+    for (int i = 0; i < k + m; i++) {
+      create_peering_state(i);
+    }
+  }
+  
+  void TearDown() override {
+    shard_peering_states.clear();
+    shard_peering_ctxs.clear();
+    shard_peering_listeners.clear();
+    shard_dpps.clear();
+    shard_messages.clear();
+    shard_events.clear();
+    PGBackendTestFixture::TearDown();
+  }
+  
+  PeeringState* create_peering_state(int shard);
+  
+  PeeringState* get_peering_state(int shard) {
+    ceph_assert(shard >= 0 && shard < k + m);
+    auto it = shard_peering_states.find(shard);
+    ceph_assert(it != shard_peering_states.end());
+    ceph_assert(it->second != nullptr);
+    return it->second.get();
+  }
+  
+  PeeringCtx* get_peering_ctx(int shard) {
+    ceph_assert(shard >= 0 && shard < k + m);
+    auto it = shard_peering_ctxs.find(shard);
+    ceph_assert(it != shard_peering_ctxs.end());
+    ceph_assert(it->second != nullptr);
+    return it->second.get();
+  }
+  
+  MockPeeringListener* get_peering_listener(int shard) {
+    ceph_assert(shard >= 0 && shard < k + m);
+    auto it = shard_peering_listeners.find(shard);
+    ceph_assert(it != shard_peering_listeners.end());
+    ceph_assert(it->second != nullptr);
+    return it->second.get();
+  }
+  
+  /**
+   * Query the OSDMap to determine which shard is the primary.
+   * This is the authoritative source of truth for primary determination.
+   *
+   * @return The shard ID of the primary, or -1 if no primary exists
+   */
+  int get_primary_shard_from_osdmap() const {
+    std::vector<int> acting_osds;
+    int acting_primary = -1;
+    osdmap->pg_to_acting_osds(this->pgid, &acting_osds, &acting_primary);
+    return acting_primary;
+  }
+  
+  // Override base class methods to work with peering fixture's structure
+  MockPGBackendListener* get_primary_listener() override {
+    int primary_shard = get_primary_shard_from_osdmap();
+    if (primary_shard < 0) {
+      return nullptr;
+    }
+    
+    auto it = shard_peering_listeners.find(primary_shard);
+    if (it != shard_peering_listeners.end() && it->second &&
+        it->second->backend_listener) {
+      // Assert that the backend listener agrees it's primary
+      ceph_assert(it->second->backend_listener->pgb_is_primary());
+      return it->second->backend_listener.get();
+    }
+    return nullptr;
+  }
+  
+  PGBackend* get_primary_backend() override {
+    int primary_shard = get_primary_shard_from_osdmap();
+    if (primary_shard < 0) {
+      return nullptr;
+    }
+    
+    auto listener_it = shard_peering_listeners.find(primary_shard);
+    if (listener_it != shard_peering_listeners.end() && listener_it->second &&
+        listener_it->second->backend_listener) {
+      // Assert that the backend listener agrees it's primary
+      ceph_assert(listener_it->second->backend_listener->pgb_is_primary());
+      
+      // Return the backend from the base class's backends map, not from
+      // the peering listener, because the base class backend is connected
+      // to the event loop and message routers
+      auto backend_it = backends.find(primary_shard);
+      return (backend_it != backends.end()) ? backend_it->second.get() : nullptr;
+    }
+    return nullptr;
+  }
+  
+  void init_peering(bool dne = false);
+  
+  void event_initialize() {
+    // Get acting set from OSDMap
+    std::vector<int> acting_osds;
+    int acting_primary = -1;
+    osdmap->pg_to_acting_osds(this->pgid, &acting_osds, &acting_primary);
+    
+    for (int shard : acting_osds) {
+      // Skip failed OSDs (marked as CRUSH_ITEM_NONE)
+      if (shard == CRUSH_ITEM_NONE) {
+        continue;
+      }
+      auto evt = std::make_shared<PGPeeringEvent>(
+        osdmap->get_epoch(),
+        osdmap->get_epoch(),
+        PeeringState::Initialize());
+      
+      get_peering_state(shard)->handle_event(evt, get_peering_ctx(shard));
+    }
+  }
+  
+  void event_advance_map() {
+    // Get primary from OSDMap - query once before the loop
+    std::vector<int> up_osds, acting_osds;
+    int up_primary = -1, acting_primary = -1;
+    osdmap->pg_to_up_acting_osds(this->pgid, &up_osds, &up_primary, &acting_osds, &acting_primary);
+    
+    for (int shard : acting_osds) {
+      // Skip failed OSDs (marked as CRUSH_ITEM_NONE)
+      if (shard == CRUSH_ITEM_NONE) {
+        continue;
+      }
+      get_peering_state(shard)->advance_map(
+        osdmap, osdmap, up_osds, up_primary, acting_osds, acting_primary,
+        *get_peering_ctx(shard));
+    }
+  }
+  
+  void event_activate_map() {
+    // Get acting set from OSDMap - must use same set as advance_map
+    std::vector<int> up_osds, acting_osds;
+    int up_primary = -1, acting_primary = -1;
+    osdmap->pg_to_up_acting_osds(this->pgid, &up_osds, &up_primary, &acting_osds, &acting_primary);
+    
+    for (int shard : acting_osds) {
+      // Skip failed OSDs (marked as CRUSH_ITEM_NONE)
+      if (shard == CRUSH_ITEM_NONE) {
+        continue;
+      }
+      get_peering_state(shard)->activate_map(*get_peering_ctx(shard));
+    }
+  }
+  
+private:
+  // Dispatch all messages from a map<int, Container<MessageRef>>.
+  // Templated to work with both std::vector (PeeringCtx::message_map) and
+  // std::list (MockPeeringListener::messages).
+  template <typename Container>
+  bool dispatch_messages_from_map(int from_shard,
+                                  std::map<int, Container>& msg_map) {
+    bool did_work = false;
+
+    // Get acting set from OSDMap
+    std::vector<int> acting_osds;
+    int acting_primary = -1;
+    osdmap->pg_to_acting_osds(this->pgid, &acting_osds, &acting_primary);
+
+    for (auto& [to_shard, msg_list] : msg_map) {
+      if (std::find(acting_osds.begin(), acting_osds.end(), to_shard) == acting_osds.end()) {
+        continue;
+      }
+
+      while (!msg_list.empty()) {
+        MessageRef m = msg_list.front();
+        msg_list.erase(msg_list.begin());
+
+        // Cast to MOSDPeeringOp - all peering messages inherit from this.
+        // Use dynamic_cast with assertion to catch unexpected message types.
+        // Use m.get() (not m.detach()) to avoid leaking the raw pointer.
+        MOSDPeeringOp *op = dynamic_cast<MOSDPeeringOp*>(m.get());
+        ceph_assert(op != nullptr) /* message must be a MOSDPeeringOp */;
+
+        // Set connection peer to the SENDER, not the destination
+        ceph_msg_header h = op->get_header();
+        h.src.num = from_shard;
+        op->set_header(h);
+
+        ConnectionRef conn = new MockConnection(from_shard);
+        op->set_connection(conn);
+
+        // get_event() returns a newly allocated PGPeeringEvent,
+        // so we take ownership directly into a shared_ptr (matching OSD.cc pattern)
+        PGPeeringEventRef evt_ref(op->get_event());
+
+        get_peering_state(to_shard)->handle_event(
+          evt_ref,
+          get_peering_ctx(to_shard));
+
+        did_work = true;
+      }
+    }
+
+    return did_work;
+  }
+
+public:
+  bool dispatch_peering_messages(int from_shard) {
+    auto* ctx = get_peering_ctx(from_shard);
+    return dispatch_messages_from_map(from_shard, ctx->message_map);
+  }
+
+  bool dispatch_cluster_messages(int from_shard) {
+    auto& listener = shard_peering_listeners[from_shard];
+    return dispatch_messages_from_map(from_shard, listener->messages);
+  }
+  
+  bool dispatch_all_peering_messages() {
+    bool did_work = false;
+    bool work_this_round;
+    
+    // Get acting set from OSDMap
+    std::vector<int> acting_osds;
+    int acting_primary = -1;
+    osdmap->pg_to_acting_osds(this->pgid, &acting_osds, &acting_primary);
+    
+    do {
+      work_this_round = false;
+      for (int shard : acting_osds) {
+        // Skip failed OSDs (marked as CRUSH_ITEM_NONE)
+        if (shard == CRUSH_ITEM_NONE) {
+          continue;
+        }
+        work_this_round |= dispatch_peering_messages(shard);
+      }
+      did_work |= work_this_round;
+    } while (work_this_round);
+    
+    return did_work;
+  }
+  
+  bool dispatch_events(int shard, bool stalled = false) {
+    auto& listener = shard_peering_listeners[shard];
+    std::list<PGPeeringEventRef>& event_queue = 
+      stalled ? listener->stalled_events : listener->events;
+    
+    if (event_queue.empty()) {
+      return false;
+    }
+    
+    bool did_work = false;
+    while (!event_queue.empty()) {
+      PGPeeringEventRef evt = event_queue.front();
+      event_queue.pop_front();
+      
+      get_peering_state(shard)->handle_event(evt, get_peering_ctx(shard));
+      did_work = true;
+    }
+    
+    return did_work;
+  }
+  
+  bool dispatch_all_events(bool stalled = false) {
+    bool did_work = false;
+    bool work_this_round;
+    
+    // Get acting set from OSDMap
+    std::vector<int> acting_osds;
+    int acting_primary = -1;
+    osdmap->pg_to_acting_osds(this->pgid, &acting_osds, &acting_primary);
+    
+    do {
+      work_this_round = false;
+      for (int shard : acting_osds) {
+        // Skip failed OSDs (marked as CRUSH_ITEM_NONE)
+        if (shard == CRUSH_ITEM_NONE) {
+          continue;
+        }
+        work_this_round |= dispatch_events(shard, stalled);
+      }
+      did_work |= work_this_round;
+    } while (work_this_round);
+    
+    return did_work;
+  }
+  
+  bool dispatch_all_cluster_messages() {
+    bool did_work = false;
+    bool work_this_round;
+    
+    // Get acting set from OSDMap
+    std::vector<int> acting_osds;
+    int acting_primary = -1;
+    osdmap->pg_to_acting_osds(this->pgid, &acting_osds, &acting_primary);
+    
+    do {
+      work_this_round = false;
+      for (int shard : acting_osds) {
+        // Skip failed OSDs (marked as CRUSH_ITEM_NONE)
+        if (shard == CRUSH_ITEM_NONE) {
+          continue;
+        }
+        work_this_round |= dispatch_cluster_messages(shard);
+      }
+      did_work |= work_this_round;
+    } while (work_this_round);
+    
+    return did_work;
+  }
+  
+  bool dispatch_all() {
+    bool did_work = false;
+    bool work_this_round;
+    
+    do {
+      work_this_round = false;
+      work_this_round |= dispatch_all_peering_messages();
+      work_this_round |= dispatch_all_cluster_messages();
+      work_this_round |= dispatch_all_events();
+      did_work |= work_this_round;
+    } while (work_this_round);
+    
+    return did_work;
+  }
+  
+  // IMPORTANT: For EC pools, shard positions in acting array must be preserved.
+  // Failed OSDs should be replaced with CRUSH_ITEM_NONE, not removed.
+  void update_osdmap_with_peering(
+    std::shared_ptr<OSDMap> new_osdmap,
+    std::optional<pg_shard_t> new_primary = std::nullopt);
+
+  bool new_epoch(bool if_required = false);
+
+  int queue_transaction_helper(int shard, ObjectStore::Transaction&& t);
+
+  void run_peering_cycle();
+  
+  // OSDMap manipulation helpers - these create a new epoch and trigger peering
+  
+  /**
+   * Mark an OSD as down (exists but not UP).
+   * Creates a new OSDMap epoch and triggers peering.
+   */
+  void mark_osd_down(int osd_id);
+  
+  /**
+   * Mark an OSD as up.
+   * Creates a new OSDMap epoch and triggers peering.
+   */
+  void mark_osd_up(int osd_id);
+  
+  /**
+   * Mark multiple OSDs as down.
+   * Creates a new OSDMap epoch and triggers peering.
+   */
+  void mark_osds_down(const std::vector<int>& osd_ids);
+  
+  /**
+   * Advance to a new epoch without changing OSD states.
+   * Useful for testing re-peering scenarios.
+   */
+  void advance_epoch();
+  
+  bool all_shards_active() {
+    // Get acting set from OSDMap
+    std::vector<int> acting_osds;
+    int acting_primary = -1;
+    osdmap->pg_to_acting_osds(this->pgid, &acting_osds, &acting_primary);
+    
+    for (int shard : acting_osds) {
+      // Skip failed OSDs (marked as CRUSH_ITEM_NONE)
+      if (shard == CRUSH_ITEM_NONE) {
+        continue;
+      }
+      if (!get_peering_state(shard)->is_active()) {
+        return false;
+      }
+    }
+    return true;
+  }
+  
+  // In EC pools, only the primary tracks PG_STATE_CLEAN.
+  bool all_shards_clean() {
+    // Get primary from OSDMap
+    std::vector<int> acting_osds;
+    int acting_primary = -1;
+    osdmap->pg_to_acting_osds(this->pgid, &acting_osds, &acting_primary);
+    
+    if (acting_primary >= 0 && acting_primary != CRUSH_ITEM_NONE) {
+      return get_peering_state(acting_primary)->is_clean();
+    }
+    return false;
+  }
+  
+  std::string get_state_name(int shard) {
+    return get_peering_state(shard)->get_current_state();
+  }
+};
+
diff --git a/src/test/osd/EventLoop.h b/src/test/osd/EventLoop.h
new file mode 100644 (file)
index 0000000..1167b82
--- /dev/null
@@ -0,0 +1,265 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+// vim: ts=8 sw=2 sts=2 expandtab
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2026 IBM
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#pragma once
+
+#include <iostream>
+#include <functional>
+#include <queue>
+#include <map>
+#include <list>
+#include <vector>
+#include "include/types.h"
+#include "messages/MOSDOp.h"
+#include "osd/OpRequest.h"
+#include "osd/PeeringState.h"
+#include "os/ObjectStore.h"
+
+/**
+ * EventLoop - Unified single-threaded event loop for OSD tests.
+ *
+ * Combines EC backend messages, ObjectStore transactions, peering messages,
+ * and peering events into a single deterministic queue.  This allows tests
+ * to properly interleave peering state changes with EC backend operations.
+ */
+class EventLoop {
+public:
+  using GenericEvent = std::function<void()>;
+  
+  enum class EventType {
+    GENERIC,
+    OSD_MESSAGE,
+    TRANSACTION,
+    PEERING_MESSAGE,
+    CLUSTER_MESSAGE,
+    PEERING_EVENT
+  };
+  
+private:
+  struct Event {
+    EventType type;
+    int osd;  // -1 for generic events
+    GenericEvent callback;
+    
+    Event(EventType t, int o, GenericEvent cb)
+      : type(t), osd(o), callback(std::move(cb)) {}
+  };
+  
+  std::queue<Event> event_queue;
+  bool verbose = false;
+  int events_executed = 0;
+  std::map<EventType, int> events_by_type;
+  
+  static constexpr const char* event_type_name(EventType type) {
+    switch (type) {
+      case EventType::GENERIC: return "GENERIC";
+      case EventType::OSD_MESSAGE: return "OSD_MESSAGE";
+      case EventType::TRANSACTION: return "TRANSACTION";
+      case EventType::PEERING_MESSAGE: return "PEERING_MESSAGE";
+      case EventType::CLUSTER_MESSAGE: return "CLUSTER_MESSAGE";
+      case EventType::PEERING_EVENT: return "PEERING_EVENT";
+      default: return "UNKNOWN";
+    }
+  }
+  
+public:
+  EventLoop(bool verbose = false) : verbose(verbose) {}
+  
+  void schedule_generic(GenericEvent event) {
+    event_queue.emplace(EventType::GENERIC, -1, std::move(event));
+  }
+  
+  void schedule_osd_message(int osd, GenericEvent callback) {
+    event_queue.emplace(EventType::OSD_MESSAGE, osd, std::move(callback));
+  }
+  
+  void schedule_transaction(int osd, GenericEvent callback) {
+    event_queue.emplace(EventType::TRANSACTION, osd, std::move(callback));
+  }
+  
+  void schedule_peering_message(int to_osd, GenericEvent callback) {
+    event_queue.emplace(EventType::PEERING_MESSAGE, to_osd, std::move(callback));
+  }
+  
+  void schedule_cluster_message(int to_osd, GenericEvent callback) {
+    event_queue.emplace(EventType::CLUSTER_MESSAGE, to_osd, std::move(callback));
+  }
+  
+  void schedule_peering_event(int osd, GenericEvent callback) {
+    event_queue.emplace(EventType::PEERING_EVENT, osd, std::move(callback));
+  }
+  
+  bool has_events() const {
+    return !event_queue.empty();
+  }
+  
+  size_t queued_event_count() const {
+    return event_queue.size();
+  }
+  
+  int get_events_executed() const {
+    return events_executed;
+  }
+  
+  const std::map<EventType, int>& get_stats_by_type() const {
+    return events_by_type;
+  }
+  
+  void reset_stats() {
+    events_executed = 0;
+    events_by_type.clear();
+  }
+  
+  bool run_one() {
+    if (event_queue.empty()) {
+      return false;
+    }
+    
+    Event event = std::move(event_queue.front());
+    event_queue.pop();
+    
+    if (verbose) {
+      std::cout << "  [Event " << (events_executed + 1) << "] "
+                << event_type_name(event.type);
+      if (event.osd >= 0) {
+        std::cout << " (OSD " << event.osd << ")";
+      }
+      std::cout << " Executing..." << std::endl;
+    }
+    
+    event.callback();
+    events_executed++;
+    events_by_type[event.type]++;
+    
+    return true;
+  }
+  
+  int run_many(int count) {
+    if (verbose) {
+      std::cout << "\n=== Running " << count << " events ===" << std::endl;
+    }
+    
+    int executed = 0;
+    for (int i = 0; i < count && run_one(); i++) {
+      executed++;
+    }
+    
+    if (verbose) {
+      std::cout << "=== Executed " << executed << " events, " 
+                << event_queue.size() << " remaining ===" << std::endl;
+    }
+    
+    return executed;
+  }
+  
+  /**
+   * Run until the queue is empty or max_events is reached.
+   * Returns -1 if max_events was reached before the queue emptied.
+   */
+  int run_until_idle(int max_events = 0) {
+    if (verbose) {
+      std::cout << "\n=== Running until idle";
+      if (max_events > 0) {
+        std::cout << " (max " << max_events << " events)";
+      }
+      std::cout << " ===" << std::endl;
+    }
+    
+    int executed = 0;
+    while (has_events()) {
+      if (max_events > 0 && executed >= max_events) {
+        if (verbose) {
+          std::cout << "=== Max events (" << max_events << ") reached, " 
+                    << event_queue.size() << " events remaining ===" << std::endl;
+        }
+        return -1;  // Timeout
+      }
+      
+      run_one();
+      executed++;
+    }
+    
+    if (verbose) {
+      std::cout << "=== Idle: Executed " << executed << " events ===" << std::endl;
+      print_stats();
+    }
+    
+    return executed;
+  }
+  
+  /**
+   * Run until a condition is met, idle, or max_events is reached.
+   * The condition is checked after each event execution.
+   * Returns -1 if max_events was reached.
+   */
+  int run_until(int max_events, std::function<bool()> condition) {
+    if (verbose) {
+      std::cout << "\n=== Running until condition";
+      if (max_events > 0) {
+        std::cout << " (max " << max_events << " events)";
+      }
+      std::cout << " ===" << std::endl;
+    }
+    
+    int executed = 0;
+    while (has_events()) {
+      if (max_events > 0 && executed >= max_events) {
+        if (verbose) {
+          std::cout << "=== Max events (" << max_events << ") reached ===" << std::endl;
+        }
+        return -1;  // Timeout
+      }
+      
+      run_one();
+      executed++;
+      
+      if (condition()) {
+        if (verbose) {
+          std::cout << "=== Condition met after " << executed << " events ===" << std::endl;
+        }
+        return executed;
+      }
+    }
+    
+    if (verbose) {
+      std::cout << "=== Idle: Executed " << executed << " events, condition not met ===" << std::endl;
+    }
+    
+    return executed;
+  }
+  
+  void clear() {
+    while (!event_queue.empty()) {
+      event_queue.pop();
+    }
+  }
+  
+  void set_verbose(bool v) {
+    verbose = v;
+  }
+  
+  void print_stats() const {
+    if (events_by_type.empty()) {
+      return;
+    }
+    
+    std::cout << "=== Event Statistics ===" << std::endl;
+    for (const auto& [type, count] : events_by_type) {
+      std::cout << "  " << event_type_name(type) << ": " << count << std::endl;
+    }
+    std::cout << "  TOTAL: " << events_executed << std::endl;
+  }
+};
+
index a0785c217ac139a0e8f3fd0b3b21118d522ffd5d..293bf589a3e56f99414dd8b7b13df7647e7de372 100644 (file)
 //MockConnection - simple stub. Required because PeeringState needs
 //to know the features of the peer OSD which sent a peering message
 class MockConnection : public Connection {
+ private:
+  int peer_osd;
+  
  public:
-  MockConnection() : Connection(g_ceph_context, nullptr) {
+  MockConnection(int peer = -1) : Connection(g_ceph_context, nullptr), peer_osd(peer) {
     set_features(CEPH_FEATURES_ALL);
   }
 
+  int get_peer_osd() const {
+    return peer_osd;
+  }
+
   bool is_connected() override {
     return true;
   }
@@ -48,4 +55,3 @@ class MockConnection : public Connection {
     return entity_addr_t();
   }
 };
-
index cff7b353f16653acab5a1455b6df4ea1ed181875..2e7843d66ebe3482f35b7be4654b771541d033af 100644 (file)
 #include <set>
 #include "osd/PGBackend.h"
 
-// MockECReadPred - simple stub for IsPGReadablePredicate
-// Warning - this always returns true. This means we cannot test scenarios
-// where there are too many OSDs down and the PG should be incomplete
+/**
+ * MockECReadPred - configurable stub for IsPGReadablePredicate.
+ *
+ * When constructed with default arguments (k=0, m=0), always returns true
+ * (original behaviour, suitable for basic tests that don't need quorum
+ * checking).
+ *
+ * When constructed with real k and m values, implements proper quorum
+ * checking: the PG is readable if at least k shards are available (i.e.
+ * we have enough data shards to reconstruct the object without needing
+ * any coding shards).
+ *
+ * This enables negative testing of scenarios where too many OSDs are down
+ * and the PG should be unreadable.
+ */
 class MockECReadPred : public IsPGReadablePredicate {
  public:
-  MockECReadPred() {}
-  bool operator()(const std::set<pg_shard_t> &_have) const override {
-    return true;
+  /**
+   * @param k  Number of data chunks (0 = always-true mode)
+   * @param m  Number of coding chunks (unused in read predicate, kept for
+   *           symmetry with MockECRecPred)
+   */
+  explicit MockECReadPred(int k = 0, int m = 0) : k(k), m(m) {}
+
+  bool operator()(const std::set<pg_shard_t> &have) const override {
+    // When k==0 fall back to always-true (backward-compatible default)
+    if (k == 0) {
+      return true;
+    }
+    // Readable when we have at least k shards available
+    return static_cast<int>(have.size()) >= k;
   }
+
+ private:
+  int k;
+  int m;
 };
 
index 1b603350190df3b553131173678efd538e9734ab..64b1d48403dcc7516fad8b84c30b0099382ebfbd 100644 (file)
 #include <set>
 #include "osd/PGBackend.h"
 
-// MockECRecPred - simple stub for IsPGRecoverablePredicate
-// Warning - this always returns true. This means we cannot test scenarios
-// where there are too many OSDs down and the PG should be incomplete
+/**
+ * MockECRecPred - configurable stub for IsPGRecoverablePredicate.
+ *
+ * When constructed with default arguments (k=0, m=0), always returns true
+ * (original behaviour, suitable for basic tests that don't need quorum
+ * checking).
+ *
+ * When constructed with real k and m values, implements proper quorum
+ * checking: the PG is recoverable if at least k shards are available (i.e.
+ * we have enough shards to reconstruct all data, since any k-of-(k+m) EC
+ * scheme can recover from up to m failures).
+ *
+ * This enables negative testing of scenarios where too many OSDs are down
+ * and the PG should be marked Incomplete.
+ */
 class MockECRecPred : public IsPGRecoverablePredicate {
  public:
-  MockECRecPred() {}
+  /**
+   * @param k  Number of data chunks (0 = always-true mode)
+   * @param m  Number of coding chunks (0 = always-true mode)
+   */
+  explicit MockECRecPred(int k = 0, int m = 0) : k(k), m(m) {}
 
-  bool operator()(const std::set<pg_shard_t> &_have) const override {
-    return true;
+  bool operator()(const std::set<pg_shard_t> &have) const override {
+    // When k==0 fall back to always-true (backward-compatible default)
+    if (k == 0) {
+      return true;
+    }
+    // Recoverable when we have at least k shards (can tolerate up to m failures)
+    return static_cast<int>(have.size()) >= k;
   }
+
+ private:
+  int k;
+  int m;
 };
 
diff --git a/src/test/osd/MockLog.h b/src/test/osd/MockLog.h
deleted file mode 100644 (file)
index e3ee9cf..0000000
+++ /dev/null
@@ -1,107 +0,0 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
-// vim: ts=8 sw=2 sts=2 expandtab
-/*
- * Ceph - scalable distributed file system
- *
- * Copyright (C) 2026 IBM
- *
- * This is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License version 2.1, as published by the Free Software
- * Foundation.  See file COPYING.
- *
- */
-
-#pragma once
-
-#include <iostream>
-#include <sstream>
-#include <string>
-#include "common/ostream_temp.h"
-
-//MockLog - simple stub
-class MockLog : public LoggerSinkSet {
- public:
-  void debug(std::stringstream& s) final
-  {
-    std::cout << "\n<<debug>> " << s.str() << std::endl;
-  }
-
-  void info(std::stringstream& s) final
-  {
-    std::cout << "\n<<info>> " << s.str() << std::endl;
-  }
-
-  void sec(std::stringstream& s) final
-  {
-    std::cout << "\n<<sec>> " << s.str() << std::endl;
-  }
-
-  void warn(std::stringstream& s) final
-  {
-    std::cout << "\n<<warn>> " << s.str() << std::endl;
-  }
-
-  void error(std::stringstream& s) final
-  {
-    err_count++;
-    std::cout << "\n<<error>> " << s.str() << std::endl;
-  }
-
-  OstreamTemp info() final { return OstreamTemp(CLOG_INFO, this); }
-  OstreamTemp warn() final { return OstreamTemp(CLOG_WARN, this); }
-  OstreamTemp error() final { return OstreamTemp(CLOG_ERROR, this); }
-  OstreamTemp sec() final { return OstreamTemp(CLOG_ERROR, this); }
-  OstreamTemp debug() final { return OstreamTemp(CLOG_DEBUG, this); }
-
-  void do_log(clog_type prio, std::stringstream& ss) final
-  {
-    switch (prio) {
-      case CLOG_DEBUG:
-        debug(ss);
-        break;
-      case CLOG_INFO:
-        info(ss);
-        break;
-      case CLOG_SEC:
-        sec(ss);
-        break;
-      case CLOG_WARN:
-        warn(ss);
-        break;
-      case CLOG_ERROR:
-      default:
-        error(ss);
-        break;
-    }
-  }
-
-  void do_log(clog_type prio, const std::string& ss) final
-  {
-    switch (prio) {
-      case CLOG_DEBUG:
-        debug() << ss;
-        break;
-      case CLOG_INFO:
-        info() << ss;
-        break;
-      case CLOG_SEC:
-        sec() << ss;
-        break;
-      case CLOG_WARN:
-        warn() << ss;
-        break;
-      case CLOG_ERROR:
-      default:
-        error() << ss;
-        break;
-    }
-  }
-
-  virtual ~MockLog() {}
-
-  int err_count{0};
-  int expected_err_count{0};
-  void set_expected_err_count(int c) { expected_err_count = c; }
-};
-
index 5be8a218664a498131794a305f365bc2988282ef..a5834bd766d92c5e2b572c235b2669653ad6f8a6 100644 (file)
 
 #pragma once
 
-#include <functional>
-#include <list>
-#include <optional>
-#include <vector>
 #include "osd/PGBackend.h"
-#include "osd/ECUtil.h"
 #include "os/ObjectStore.h"
 
 // MockPGBackend - simple stub for PGBackend
index 219e5298b61639d9e8553686b4dbd87a613992d1..35049f4c0832183bef5b52d8aef720c5f86f830a 100644 (file)
 
 #pragma once
 
+#include <functional>
+#include <vector>
 #include <map>
-#include <set>
-#include <optional>
 #include "osd/PGBackend.h"
+#include "osd/ECBackend.h"
+#include "osd/PGLog.h"
 #include "osd/OSDMap.h"
 #include "osd/osd_types.h"
-#include "osd/PGLog.h"
-#include "common/intrusive_timer.h"
-#include "common/ostream_temp.h"
-#include "global/global_context.h"
+#include "osd/osd_perf_counters.h"
+#include "osd/PeeringState.h"
+#include "common/ceph_context.h"
+#include "common/TrackedOp.h"
+#include "common/perf_counters.h"
+#include "messages/MOSDPGPush.h"
 #include "os/ObjectStore.h"
+#include "global/global_context.h"
+#include "test/osd/MockConnection.h"
+#include "test/osd/EventLoop.h"
+#include "osd/OpRequest.h"
 
-// MockPGBackendListener - simple stub for PGBackend::Listener
-class MockPGBackendListener : public PGBackend::Listener {
+// MockPGBackendListener - mock PGBackend::Listener and ECListener for multi-instance testing.
+class MockPGBackendListener : public PGBackend::Listener, public ECListener {
 public:
   pg_info_t info;
   OSDMapRef osdmap;
-  const pg_pool_t pool;
+  int64_t pool_id;
   PGLog log;
   DoutPrefixProvider *dpp;
   pg_shard_t pg_whoami;
   std::set<pg_shard_t> shardset;
+  
+  // Pointer to PeeringState for tests that use full peering
+  PeeringState *peering_state = nullptr;
+  
+  shard_id_set acting_recovery_backfill_shard_id_set;
   std::map<pg_shard_t, pg_info_t> shard_info;
   std::map<pg_shard_t, pg_missing_t> shard_missing;
   std::map<hobject_t, std::set<pg_shard_t>> missing_loc_shards;
   pg_missing_tracker_t local_missing;
-
-  MockPGBackendListener(OSDMapRef osdmap, const pg_pool_t pi, DoutPrefixProvider *dpp, pg_shard_t pg_whoami) :
-    osdmap(osdmap), pool(pi), log(g_ceph_context), dpp(dpp), pg_whoami(pg_whoami) {}
+  
+  std::vector<MessageRef> sent_messages;
+  std::vector<std::pair<int, MessageRef>> sent_messages_with_dest;
+  
+  ObjectStore *store = nullptr;
+  ObjectStore::CollectionHandle ch;
+  EventLoop *event_loop = nullptr;
+  std::function<bool(OpRequestRef)> handle_message_callback;
+  std::map<int, std::function<bool(OpRequestRef)>> *message_router = nullptr;
+  OpTracker *op_tracker = nullptr;
+  PerfCounters *perf_logger = nullptr;
+
+  MockPGBackendListener(OSDMapRef osdmap, int64_t pool_id, DoutPrefixProvider *dpp, pg_shard_t pg_whoami, PeeringState *ps = nullptr) :
+    osdmap(osdmap), pool_id(pool_id), log(g_ceph_context), dpp(dpp), pg_whoami(pg_whoami), peering_state(ps) {
+    // Create a full OSD PerfCounters using the standard build_osd_logger function.
+    // This prevents null pointer dereferences when ReplicatedBackend calls get_logger()->inc().
+    perf_logger = build_osd_logger(g_ceph_context);
+  }
+  
+  ~MockPGBackendListener() {
+    if (perf_logger) {
+      delete perf_logger;
+      perf_logger = nullptr;
+    }
+  }
+  
+  void set_store(ObjectStore *s, ObjectStore::CollectionHandle c) {
+    store = s;
+    ch = c;
+  }
+  
+  void set_event_loop(EventLoop *loop) {
+    event_loop = loop;
+  }
+  
+  void set_op_tracker(OpTracker *tracker) {
+    op_tracker = tracker;
+  }
+  
+  void set_peering_state(PeeringState *ps) {
+    peering_state = ps;
+  }
+  
+  void set_handle_message_callback(std::function<bool(OpRequestRef)> cb) {
+    handle_message_callback = cb;
+  }
+  
+  void set_message_router(std::map<int, std::function<bool(OpRequestRef)>> *router) {
+    message_router = router;
+  }
 
   // Debugging
   DoutPrefixProvider *get_dpp() override {
@@ -68,11 +128,17 @@ public:
     pg_shard_t peer,
     const hobject_t &oid,
     const ObjectRecoveryInfo &recovery_info) override {
+    if (peering_state) {
+      peering_state->on_peer_recover(peer, oid, recovery_info.version);
+    }
   }
 
   void begin_peer_recover(
     pg_shard_t peer,
     const hobject_t oid) override {
+    if (peering_state) {
+      peering_state->begin_peer_recover(peer, oid);
+    }
   }
 
   void apply_stats(
@@ -116,25 +182,93 @@ public:
     return c;
   }
 
-  // Messaging
+  // Routes messages through EventLoop for asynchronous EC message processing.
   void send_message(int to_osd, Message *m) override {
+    MessageRef mref(m);
+    sent_messages.push_back(mref);
+    sent_messages_with_dest.push_back({to_osd, mref});
+    
+    if (event_loop && op_tracker && message_router) {
+      // Capture the sender's OSD ID
+      int from_osd = pg_whoami.osd;
+      
+      // IMPORTANT: Encode the message payload to simulate network transmission
+      // This ensures that txn_payload is moved to the middle section for MOSDRepOp messages
+      // Without this, Transaction::decode will fail because the message structure is incomplete
+      mref->encode_payload(CEPH_FEATURES_ALL);
+      
+      event_loop->schedule_osd_message(to_osd, [this, mref, to_osd, from_osd]() {
+        if (!mref->get_connection()) {
+          // Set connection peer to the SENDER, not the destination
+          ConnectionRef conn = new MockConnection(from_osd);
+          mref->set_connection(conn);
+        }
+        OpRequestRef op = op_tracker->create_request<OpRequest>(mref.get());
+        
+        // Route to the correct shard's backend using the message router
+        auto it = message_router->find(to_osd);
+        if (it != message_router->end()) {
+          it->second(op);
+        }
+      });
+    }
   }
 
   void queue_transaction(
     ObjectStore::Transaction&& t,
     OpRequestRef op = OpRequestRef()) override {
+    std::vector<ObjectStore::Transaction> tls;
+    tls.push_back(std::move(t));
+    queue_transactions(tls, op);
   }
 
   void queue_transactions(
     std::vector<ObjectStore::Transaction>& tls,
     OpRequestRef op = OpRequestRef()) override {
+    if (event_loop && store && ch) {
+      // Steal the Context callbacks from the transactions before calling MemStore.
+      // This allows the test harness to manage the context callbacks itself instead of using
+      // a Finisher thread. This keeps the test harness single threaded and gives more
+      // control for ordering async replies.
+      Context *on_apply = nullptr;
+      Context *on_apply_sync = nullptr;
+      Context *on_commit = nullptr;
+      ObjectStore::Transaction::collect_contexts(tls, &on_apply, &on_commit, &on_apply_sync);
+
+      // Execute transactions through the store (without contexts - we stole them)
+      store->queue_transactions(ch, tls, TrackedOpRef(), nullptr);
+
+      // Apply the on_apply_sync synchronously. This is what queue_transactions
+      // would do anyway.
+      // NOTE: Memstore will panic rather than fail
+      if (on_apply_sync) {
+        on_apply_sync->complete(0);
+      }
+
+      if (on_apply) {
+        event_loop->schedule_transaction(pg_whoami.osd, [on_apply]() mutable {
+          on_apply->complete(0);
+        });
+      }
+      if (on_commit) {
+        event_loop->schedule_transaction(pg_whoami.osd, [on_commit]() mutable {
+          on_commit->complete(0);
+        });
+      }
+    }
   }
 
   epoch_t get_interval_start_epoch() const override {
+    if (peering_state) {
+      return peering_state->get_info().history.same_interval_since;
+    }
     return 1;
   }
 
   epoch_t get_last_peering_reset_epoch() const override {
+    if (peering_state) {
+      return peering_state->get_last_peering_reset();
+    }
     return 1;
   }
 
@@ -143,11 +277,21 @@ public:
     return shardset;
   }
 
+  const shard_id_set &get_acting_recovery_backfill_shard_id_set() const {
+    return acting_recovery_backfill_shard_id_set;
+  }
+
   const std::set<pg_shard_t> &get_acting_shards() const override {
+    if (peering_state) {
+      return peering_state->get_actingset();
+    }
     return shardset;
   }
 
   const std::set<pg_shard_t> &get_backfill_shards() const override {
+    if (peering_state) {
+      return peering_state->get_backfill_targets();
+    }
     return shardset;
   }
 
@@ -156,34 +300,68 @@ public:
   }
 
   const std::map<hobject_t, std::set<pg_shard_t>> &get_missing_loc_shards() const override {
+    if (peering_state) {
+      return peering_state->get_missing_loc().get_missing_locs();
+    }
     return missing_loc_shards;
   }
 
   const pg_missing_tracker_t &get_local_missing() const override {
+    if (peering_state) {
+      return peering_state->get_pg_log().get_missing();
+    }
     return local_missing;
   }
 
   void add_local_next_event(const pg_log_entry_t& e) override {
+    if (peering_state) {
+      peering_state->add_local_next_event(e);
+    }
   }
 
   const std::map<pg_shard_t, pg_missing_t> &get_shard_missing() const override {
+    if (peering_state) {
+      return peering_state->get_peer_missing();
+    }
     return shard_missing;
   }
 
   const pg_missing_const_i &get_shard_missing(pg_shard_t peer) const override {
+    if (peering_state) {
+      auto m = maybe_get_shard_missing(peer);
+      ceph_assert(m);
+      return *m;
+    }
     return local_missing;
   }
 
   const std::map<pg_shard_t, pg_info_t> &get_shard_info() const override {
+    if (peering_state) {
+      return peering_state->get_peer_info();
+    }
     return shard_info;
   }
 
   const PGLog &get_log() const override {
+    if (peering_state) {
+      return peering_state->get_pg_log();
+    }
     return log;
   }
 
   bool pgb_is_primary() const override {
-    return true;
+    // For peering tests, use the PeeringState's view of primary
+    if (peering_state) {
+      return peering_state->is_primary();
+    }
+    
+    // For basic tests without peering, query the OSDMap to determine primary
+    // This uses pg_temp if set, otherwise uses the CRUSH mapping
+    std::vector<int> acting;
+    int acting_primary = -1;
+    osdmap->pg_to_acting_osds(info.pgid.pgid, &acting, &acting_primary);
+    
+    return pg_whoami.osd == acting_primary;
   }
 
   const OSDMapRef& pgb_get_osdmap() const override {
@@ -195,14 +373,23 @@ public:
   }
 
   const pg_info_t &get_info() const override {
+    // When PeeringState is available, use its pg_info_t as the single source of truth
+    if (peering_state) {
+      return peering_state->get_info();
+    }
     return info;
   }
 
   const pg_pool_t &get_pool() const override {
-    return pool;
+    const pg_pool_t *p = osdmap->get_pg_pool(pool_id);
+    ceph_assert(p != nullptr);
+    return *p;
   }
 
   eversion_t get_pg_committed_to() const override {
+    if (peering_state) {
+      return peering_state->get_pg_committed_to();
+    }
     return eversion_t();
   }
 
@@ -257,6 +444,18 @@ public:
     bool transaction_applied,
     ObjectStore::Transaction &t,
     bool async = false) override {
+    // If we have a PeeringState, append the log entries to it
+    // This creates proper integration between backend operations and peering state
+    if (peering_state && !logv.empty()) {
+      peering_state->append_log(
+        std::move(logv),
+        trim_to,
+        roll_forward_to,
+        pg_committed_to,
+        t,
+        transaction_applied,
+        async);
+    }
   }
 
   void pgb_set_object_snap_mapping(
@@ -273,15 +472,31 @@ public:
   void update_peer_last_complete_ondisk(
     pg_shard_t fromosd,
     eversion_t lcod) override {
+    if (peering_state) {
+      peering_state->update_peer_last_complete_ondisk(fromosd, lcod);
+    }
   }
 
   void update_last_complete_ondisk(eversion_t lcod) override {
+    if (peering_state) {
+      peering_state->update_last_complete_ondisk(lcod);
+    }
   }
 
   void update_pct(eversion_t pct) override {
+    if (peering_state) {
+      peering_state->update_pct(pct);
+    }
   }
 
   void update_stats(const pg_stat_t &stat) override {
+    if (peering_state) {
+      peering_state->update_stats(
+        [&stat](auto &history, auto &stats) {
+          stats = stat;
+          return false;
+        });
+    }
   }
 
   void schedule_recovery_work(
@@ -302,18 +517,52 @@ public:
   }
 
   pg_shard_t primary_shard() const override {
-    return pg_shard_t();
+    if (peering_state) {
+      return peering_state->get_primary();
+    }
+    
+    // Query the OSDMap to get the current primary
+    pg_t pgid = info.pgid.pgid;
+    std::vector<int> acting;
+    int acting_primary = -1;
+    osdmap->pg_to_acting_osds(pgid, &acting, &acting_primary);
+    
+    // For EC pools, the primary shard ID matches the OSD ID in the acting set
+    // For replicated pools, use NO_SHARD
+    if (pg_whoami.shard != shard_id_t::NO_SHARD) {
+      // EC pool: find the shard ID of the acting primary in the acting set
+      shard_id_t primary_shard_id = shard_id_t::NO_SHARD;
+      for (size_t i = 0; i < acting.size(); i++) {
+        if (acting[i] == acting_primary) {
+          primary_shard_id = shard_id_t(i);
+          break;
+        }
+      }
+      return pg_shard_t(acting_primary, primary_shard_id);
+    } else {
+      // Replicated pool: use NO_SHARD
+      return pg_shard_t(acting_primary, shard_id_t::NO_SHARD);
+    }
   }
 
   uint64_t min_peer_features() const override {
+    if (peering_state) {
+      return peering_state->get_min_peer_features();
+    }
     return CEPH_FEATURES_ALL;
   }
 
   uint64_t min_upacting_features() const override {
+    if (peering_state) {
+      return peering_state->get_min_upacting_features();
+    }
     return CEPH_FEATURES_ALL;
   }
 
   pg_feature_vec_t get_pg_acting_features() const override {
+    if (peering_state) {
+      return peering_state->get_pg_acting_features();
+    }
     return pg_feature_vec_t();
   }
 
@@ -325,16 +574,24 @@ public:
 
   void send_message_osd_cluster(
     int peer, Message *m, epoch_t from_epoch) override {
+    send_message(peer, m);
   }
 
   void send_message_osd_cluster(
     std::vector<std::pair<int, Message*>>& messages, epoch_t from_epoch) override {
+    for (auto& [osd, m] : messages) {
+      send_message(osd, m);
+    }
   }
 
-  void send_message_osd_cluster(MessageRef, Connection *con) override {
+  void send_message_osd_cluster(MessageRef m, Connection *con) override {
+    MockConnection* mock_con = dynamic_cast<MockConnection*>(con);
+    send_message(mock_con->get_peer_osd(), m.get());
   }
 
   void send_message_osd_cluster(Message *m, const ConnectionRef& con) override {
+    MockConnection* mock_con = dynamic_cast<MockConnection*>(con.get());
+    send_message(mock_con->get_peer_osd(), m);
   }
 
   void start_mon_command(
@@ -352,7 +609,7 @@ public:
   }
 
   PerfCounters *get_logger() override {
-    return nullptr;
+    return perf_logger;
   }
 
   ceph_tid_t get_tid() override {
@@ -393,9 +650,57 @@ public:
   bool maybe_preempt_replica_scrub(const hobject_t& oid) override {
     return false;
   }
+  void add_temp_obj(const hobject_t &oid) override {
+  }
+
+  void clear_temp_obj(const hobject_t &oid) override {
+  }
+
+  const pg_missing_const_i * maybe_get_shard_missing(
+    pg_shard_t peer) const override {
+    if (peering_state) {
+      if (peer == peering_state->get_primary()) {
+        return &peering_state->get_pg_log().get_missing();
+      } else {
+        auto i = peering_state->get_peer_missing().find(peer);
+        if (i == peering_state->get_peer_missing().end()) {
+          return nullptr;
+        } else {
+          return &(i->second);
+        }
+      }
+    }
+    return &local_missing;
+  }
+
+  const pg_info_t &get_shard_info(pg_shard_t peer) const override {
+    if (peering_state) {
+      if (peer == peering_state->get_primary()) {
+        return peering_state->get_info();
+      } else {
+        auto i = peering_state->get_peer_info().find(peer);
+        ceph_assert(i != peering_state->get_peer_info().end());
+        return i->second;
+      }
+    }
+    
+    auto it = shard_info.find(peer);
+    if (it != shard_info.end()) {
+      return it->second;
+    }
+    return info;
+  }
+
+  bool is_missing_object(const hobject_t& oid) const override {
+    return false;
+  }
+  void send_message_osd_cluster(
+    int osd, MOSDPGPush* msg, epoch_t from_epoch) override {
+    send_message(osd, msg);
+  }
 
   struct ECListener *get_eclistener() override {
-    return nullptr;
+    return static_cast<ECListener *>(this);
   }
 };
 
index 629d335de61b546668ada70ef327f4c899c1f17b..79651d9870cffadc2bb313da47ef910ed5698853 100644 (file)
 
 #include "osd/PGLog.h"
 #include "os/ObjectStore.h"
-#include "test/osd/MockPGBackend.h"
-
-// dout using global context and OSD subsystem
-#define dout_context g_ceph_context
-#define dout_subsys ceph_subsys_osd
+#include "MockPGBackend.h"
 
 // MockPGLogEntryHandler
 //
@@ -38,34 +34,31 @@ class MockPGLogEntryHandler : public PGLog::LogEntryHandler {
 
   // LogEntryHandler
   void remove(const hobject_t &hoid) override {
-    dout(0) << "MockPGLogEntryHandler::remove " << hoid << dendl;
+    lgeneric_dout(g_ceph_context, 0) << "MockPGLogEntryHandler::remove " << hoid << dendl;
     backend->remove(hoid, t);
   }
   void try_stash(const hobject_t &hoid, version_t v) override {
-    dout(0) << "MockPGLogEntryHandler::try_stash " << hoid << " " << v << dendl;
+    lgeneric_dout(g_ceph_context, 0) << "MockPGLogEntryHandler::try_stash " << hoid << " " << v << dendl;
     backend->try_stash(hoid, v, t);
   }
   void rollback(const pg_log_entry_t &entry) override {
-    dout(0) << "MockPGLogEntryHandler::rollback " << entry << dendl;
+    lgeneric_dout(g_ceph_context, 0) << "MockPGLogEntryHandler::rollback " << entry << dendl;
     ceph_assert(entry.can_rollback());
     backend->rollback(entry, t);
   }
   void rollforward(const pg_log_entry_t &entry) override {
-    dout(0) << "MockPGLogEntryHandler::rollforward " << entry << dendl;
+    lgeneric_dout(g_ceph_context, 0) << "MockPGLogEntryHandler::rollforward " << entry << dendl;
     backend->rollforward(entry, t);
   }
   void trim(const pg_log_entry_t &entry) override {
-    dout(0) << "MockPGLogEntryHandler::trim " << entry << dendl;
+    lgeneric_dout(g_ceph_context, 0) << "MockPGLogEntryHandler::trim " << entry << dendl;
     backend->trim(entry, t);
   }
   void partial_write(pg_info_t *info, eversion_t previous_version,
                       const pg_log_entry_t &entry
     ) override {
-    dout(0) << "MockPGLogEntryHandler::partial_write " << entry << dendl;
+    lgeneric_dout(g_ceph_context, 0) << "MockPGLogEntryHandler::partial_write " << entry << dendl;
     backend->partial_write(info, previous_version, entry);
   }
 };
 
-#undef dout_context
-#undef dout_subsys
-
index d2e9140c23994bd2b4a029d7d54be0ac7a0de4b8..12c05151146a76405ef12971f9522379d8f5341b 100644 (file)
 
 #pragma once
 
-#include <list>
-#include <map>
 #include <memory>
-#include <set>
-#include <string>
 #include <vector>
+#include <list>
+#include <map>
 #include "osd/PeeringState.h"
 #include "osd/osd_perf_counters.h"
-#include "common/perf_counters_collection.h"
-#include "global/global_context.h"
+#include "common/HeartbeatMap.h"
 #include "os/ObjectStore.h"
-#include "test/osd/MockLog.h"
-#include "test/osd/MockPGBackend.h"
-#include "test/osd/MockPGBackendListener.h"
-#include "test/osd/MockPGLogEntryHandler.h"
+#include "MockPGBackendListener.h"
+#include "MockPGBackend.h"
+#include "MockPGLogEntryHandler.h"
+#include "global/global_context.h"
 
-// dout using global context and OSD subsystem
 #define dout_context g_ceph_context
 #define dout_subsys ceph_subsys_osd
 
-using namespace std;
-
-// Mock PeeringListener - stub of PeeringState::PeeringListener
-// to help with testing of PeeringState. Keep track of calls
-// from PeeringState and emulate some of PrimaryLogPG/PG
-// functionality for testing purposes.
-//
-// There are some inject_* variables that can be used to help
-// tests create race hazards or test failure paths
+// Mock implementation of PeeringState::PeeringListener for testing.
+// inject_* variables can be used to create race hazards or test failure paths.
 class MockPeeringListener : public PeeringState::PeeringListener {
  public:
   pg_shard_t pg_whoami;
-  MockLog logger;
   PeeringState *ps;
-  unique_ptr<MockPGBackendListener> backend_listener;
+  std::unique_ptr<MockPGBackendListener> backend_listener;
   coll_t coll;
   ObjectStore::CollectionHandle ch;
-  unique_ptr<MockPGBackend> backend;
+  std::unique_ptr<MockPGBackend> backend;
   PerfCounters* recoverystate_perf;
   PerfCounters* logger_perf;
   std::vector<int> next_acting;
@@ -84,19 +72,33 @@ class MockPeeringListener : public PeeringState::PeeringListener {
   // migration requests with too full
   bool inject_fail_reserve_recovery_space = false;
 
+  std::function<int(ObjectStore::Transaction&&)> queue_transaction_callback;
+
   MockPeeringListener(OSDMapRef osdmap,
-                      const pg_pool_t pi,
+                      int64_t pool_id,
                       DoutPrefixProvider *dpp,
                       pg_shard_t pg_whoami) : pg_whoami(pg_whoami) {
-    backend_listener = make_unique<MockPGBackendListener>(osdmap, pi, dpp, pg_whoami);
-    backend = make_unique<MockPGBackend>(g_ceph_context, backend_listener.get(), nullptr, coll, ch);
+    backend_listener = std::make_unique<MockPGBackendListener>(osdmap, pool_id, dpp, pg_whoami);
+    backend = std::make_unique<MockPGBackend>(g_ceph_context, backend_listener.get(), nullptr, coll, ch);
     recoverystate_perf = build_recoverystate_perf(g_ceph_context);
     g_ceph_context->get_perfcounters_collection()->add(recoverystate_perf);
     logger_perf = build_osd_logger(g_ceph_context);
     g_ceph_context->get_perfcounters_collection()->add(logger_perf);
   }
 
-  // EpochSource interface
+  ~MockPeeringListener() {
+    if (recoverystate_perf) {
+      g_ceph_context->get_perfcounters_collection()->remove(recoverystate_perf);
+      delete recoverystate_perf;
+      recoverystate_perf = nullptr;
+    }
+    if (logger_perf) {
+      g_ceph_context->get_perfcounters_collection()->remove(logger_perf);
+      delete logger_perf;
+      logger_perf = nullptr;
+    }
+  }
+
   epoch_t get_osdmap_epoch() const override {
     return current_epoch;
   }
@@ -112,6 +114,13 @@ class MockPeeringListener : public PeeringState::PeeringListener {
     bool need_write_epoch,
     ObjectStore::Transaction &t) override {
     prepare_write_called = true;
+    
+    // If a callback is set, queue the transaction
+    if (queue_transaction_callback && !t.empty()) {
+      ObjectStore::Transaction copy;
+      copy.append(t);
+      queue_transaction_callback(std::move(copy));
+    }
   }
 
   void scrub_requested(scrub_level_t scrub_level, scrub_type_t scrub_type) override {
@@ -408,14 +417,14 @@ class MockPeeringListener : public PeeringState::PeeringListener {
   }
 
   void log_state_enter(const char *state) override {
-    last_state_entered = string(state);
+    last_state_entered = std::string(state);
     state_entered = true;
   }
 
   void log_state_exit(
     const char *state_name, utime_t enter_time,
     uint64_t events, utime_t event_dur) override {
-    last_state_exited = string(state_name);
+    last_state_exited = std::string(state_name);
     state_exited = true;
   }
 
@@ -424,15 +433,15 @@ class MockPeeringListener : public PeeringState::PeeringListener {
   }
 
   OstreamTemp get_clog_info() override {
-    return logger.info();
+    return OstreamTemp(CLOG_INFO, nullptr);
   }
 
   OstreamTemp get_clog_error() override {
-    return logger.error();
+    return OstreamTemp(CLOG_ERROR, nullptr);
   }
 
   OstreamTemp get_clog_debug() override {
-    return logger.debug();
+    return OstreamTemp(CLOG_DEBUG, nullptr);
   }
 
   void on_activate_complete() override {
@@ -498,7 +507,6 @@ class MockPeeringListener : public PeeringState::PeeringListener {
     removal_called = true;
   }
 
-  // Test state tracking
   unsigned target_pg_log_entries = 100;
   bool renew_lease_scheduled = false;
   bool check_readable_queued = false;
@@ -527,9 +535,9 @@ class MockPeeringListener : public PeeringState::PeeringListener {
   bool recovery_space_reserved = false;
   bool recovery_space_unreserved = false;
   bool missing_set_rebuilt = false;
-  string last_state_entered;
+  std::string last_state_entered;
   bool state_entered = false;
-  string last_state_exited;
+  std::string last_state_exited;
   bool state_exited = false;
   mutable bool recovery_info_dumped = false;
   epoch_t current_epoch = 1;
@@ -567,6 +575,3 @@ class MockPeeringListener : public PeeringState::PeeringListener {
   bool first_write_in_interval = false;
 };
 
-#undef dout_context
-#undef dout_subsys
-
diff --git a/src/test/osd/OSDMapTestHelpers.h b/src/test/osd/OSDMapTestHelpers.h
new file mode 100644 (file)
index 0000000..916f7cc
--- /dev/null
@@ -0,0 +1,355 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+// vim: ts=8 sw=2 sts=2 expandtab
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2026 IBM
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+#include "osd/OSDMap.h"
+#include "osd/osd_types.h"
+
+// Utility functions for managing OSDMap state in tests.
+// (Previously in OSDMapTestHelpers.h — embedded here as the sole user.)
+class OSDMapTestHelpers {
+public:
+  // Add or update a pool in the OSDMap. Pass pool_id=-1 to auto-assign.
+  static int64_t add_pool(
+    OSDMap& osdmap,
+    int64_t pool_id,
+    const pg_pool_t& pool,
+    const std::string& pool_name = "")
+  {
+    if (pool_id < 0) {
+      pool_id = osdmap.get_pool_max() + 1;
+    }
+    
+    std::string name = pool_name.empty() ?
+      ("pool_" + std::to_string(pool_id)) : pool_name;
+    
+    // Use OSDMap::Incremental to properly add pool and pool name
+    // This ensures both pools map and pool_name map are updated correctly
+    OSDMap::Incremental inc(osdmap.get_epoch() + 1);
+    inc.fsid = osdmap.get_fsid();
+    inc.new_pools[pool_id] = pool;
+    inc.new_pool_names[pool_id] = name;
+    
+    osdmap.apply_incremental(inc);
+    
+    return pool_id;
+  }
+  
+  static int64_t add_pool(
+    std::shared_ptr<OSDMap> osdmap,
+    int64_t pool_id,
+    const pg_pool_t& pool,
+    const std::string& pool_name = "")
+  {
+    return add_pool(*osdmap, pool_id, pool, pool_name);
+  }
+  
+  static const pg_pool_t* get_pool(
+    const OSDMap& osdmap,
+    int64_t pool_id)
+  {
+    return osdmap.get_pg_pool(pool_id);
+  }
+  
+  static const pg_pool_t* get_pool(
+    const std::shared_ptr<OSDMap>& osdmap,
+    int64_t pool_id)
+  {
+    return get_pool(*osdmap, pool_id);
+  }
+  
+  // Set acting set for a PG using pg_temp (standard Ceph mechanism for overriding CRUSH).
+  // For EC pools with nonprimary_shards optimization, pg_temp must be stored in
+  // "primaryfirst" order (primary-capable shards first). This simulates what the
+  // monitor does in production when initially setting up pg_temp.
+  static void set_pg_acting(
+    OSDMap& osdmap,
+    pg_t pgid,
+    const std::vector<int>& acting)
+  {
+    OSDMap::Incremental inc(osdmap.get_epoch() + 1);
+    inc.fsid = osdmap.get_fsid();
+    
+    if (acting.empty()) {
+      // Empty acting set means remove pg_temp
+      inc.new_pg_temp[pgid] = mempool::osdmap::vector<int32_t>();
+    } else {
+      // For EC pools with optimizations, transform to primaryfirst order.
+      // This is used for initial setup. For dynamic changes during peering,
+      // the test should let peering detect invalid primaries and request
+      // corrections via queue_want_pg_temp().
+      std::vector<int> transformed_acting = acting;
+      const pg_pool_t* pool = osdmap.get_pg_pool(pgid.pool());
+      if (pool && pool->allows_ecoptimizations()) {
+        transformed_acting = osdmap.pgtemp_primaryfirst(*pool, acting);
+      }
+      
+      mempool::osdmap::vector<int32_t> temp_acting;
+      for (int osd : transformed_acting) {
+        temp_acting.push_back(osd);
+      }
+      inc.new_pg_temp[pgid] = temp_acting;
+    }
+    
+    osdmap.apply_incremental(inc);
+  }
+  
+  static void set_pg_acting(
+    std::shared_ptr<OSDMap> osdmap,
+    pg_t pgid,
+    const std::vector<int>& acting)
+  {
+    set_pg_acting(*osdmap, pgid, acting);
+  }
+  
+  static bool get_pg_acting(
+    const OSDMap& osdmap,
+    pg_t pgid,
+    std::vector<int>& acting)
+  {
+    acting.clear();
+    int primary;
+    osdmap.pg_to_acting_osds(pgid, &acting, &primary);
+    return !acting.empty();
+  }
+  
+  static bool get_pg_acting(
+    const std::shared_ptr<OSDMap>& osdmap,
+    pg_t pgid,
+    std::vector<int>& acting)
+  {
+    return get_pg_acting(*osdmap, pgid, acting);
+  }
+  
+  static void set_pg_acting_primary(
+    OSDMap& osdmap,
+    pg_t pgid,
+    int primary)
+  {
+    OSDMap::Incremental inc(osdmap.get_epoch() + 1);
+    inc.fsid = osdmap.get_fsid();
+    inc.new_primary_temp[pgid] = primary;
+    osdmap.apply_incremental(inc);
+  }
+  
+  static void set_pg_acting_primary(
+    std::shared_ptr<OSDMap> osdmap,
+    pg_t pgid,
+    int primary)
+  {
+    set_pg_acting_primary(*osdmap, pgid, primary);
+  }
+  
+  static bool get_pg_acting_primary(
+    const OSDMap& osdmap,
+    pg_t pgid,
+    int& primary)
+  {
+    std::vector<int> acting;
+    osdmap.pg_to_acting_osds(pgid, &acting, &primary);
+    return primary >= 0;
+  }
+  
+  static bool get_pg_acting_primary(
+    const std::shared_ptr<OSDMap>& osdmap,
+    pg_t pgid,
+    int& primary)
+  {
+    return get_pg_acting_primary(*osdmap, pgid, primary);
+  }
+  
+  static pg_pool_t create_ec_pool(
+    int k,
+    int m,
+    uint64_t stripe_width,
+    uint64_t flags,
+    int64_t pool_id = 0)
+  {
+    pg_pool_t pool;
+    pool.type = pg_pool_t::TYPE_ERASURE;
+    pool.size = k + m;
+    pool.min_size = k;
+    pool.crush_rule = 0;
+    pool.erasure_code_profile = "default";
+    pool.stripe_width = stripe_width;
+    
+    // Set flags as specified by caller
+    pool.flags = flags;
+    
+    // Only set nonprimary_shards if OPTIMIZATIONS flag is set
+    if (flags & pg_pool_t::FLAG_EC_OPTIMIZATIONS) {
+      // Mark shards 1 to k-1 (inclusive) as nonprimary
+      // Shard 0 can be primary, shards k to k+m-1 (coding shards) can be primary
+      for (int i = 1; i < k; i++) {
+        pool.nonprimary_shards.insert(shard_id_t(i));
+      }
+    }
+    
+    return pool;
+  }
+  
+  static pg_pool_t create_replicated_pool(
+    int size,
+    int min_size,
+    int64_t pool_id = 0)
+  {
+    pg_pool_t pool;
+    pool.type = pg_pool_t::TYPE_REPLICATED;
+    pool.size = size;
+    pool.min_size = min_size;
+    pool.crush_rule = 0;
+    
+    return pool;
+  }
+  
+  static void setup_ec_pg(
+    OSDMap& osdmap,
+    pg_t pgid,
+    int k,
+    int m,
+    int primary_shard = 0)
+  {
+    std::vector<int> acting;
+    for (int i = 0; i < k + m; i++) {
+      acting.push_back(i);
+    }
+    set_pg_acting(osdmap, pgid, acting);
+    // Don't set primary_temp for EC pools - let OSDMap determine the primary
+    // based on the pool's nonprimary_shards configuration
+    // set_pg_acting_primary(osdmap, pgid, primary_shard);
+  }
+  
+  static void setup_ec_pg(
+    std::shared_ptr<OSDMap> osdmap,
+    pg_t pgid,
+    int k,
+    int m,
+    int primary_shard = 0)
+  {
+    setup_ec_pg(*osdmap, pgid, k, m, primary_shard);
+  }
+
+  // Copy the pool, unset the flag, then apply via incremental.
+  static void clear_pool_flag(
+    OSDMap& osdmap,
+    int64_t pool_id,
+    uint64_t flag)
+  {
+    const pg_pool_t* existing = osdmap.get_pg_pool(pool_id);
+    ceph_assert(existing != nullptr);
+
+    pg_pool_t updated = *existing;
+    updated.unset_flag(flag);
+
+    OSDMap::Incremental inc(osdmap.get_epoch() + 1);
+    inc.fsid = osdmap.get_fsid();
+    inc.new_pools[pool_id] = updated;
+    osdmap.apply_incremental(inc);
+  }
+
+  static void clear_pool_flag(
+    std::shared_ptr<OSDMap> osdmap,
+    int64_t pool_id,
+    uint64_t flag)
+  {
+    clear_pool_flag(*osdmap, pool_id, flag);
+  }
+
+  // OSD state manipulation methods
+  
+  /**
+   * Mark an OSD as down (exists but not UP) in the OSDMap.
+   * Creates a new epoch.
+   *
+   * @param osdmap The OSDMap to modify
+   * @param osd_id The OSD to mark as down
+   */
+  static void mark_osd_down(OSDMap& osdmap, int osd_id)
+  {
+    OSDMap::Incremental inc(osdmap.get_epoch() + 1);
+    inc.fsid = osdmap.get_fsid();
+    inc.new_state[osd_id] = CEPH_OSD_EXISTS;  // Mark as down (exists but not UP)
+    osdmap.apply_incremental(inc);
+  }
+  
+  static void mark_osd_down(std::shared_ptr<OSDMap> osdmap, int osd_id)
+  {
+    mark_osd_down(*osdmap, osd_id);
+  }
+  
+  /**
+   * Mark an OSD as up in the OSDMap.
+   * Creates a new epoch.
+   *
+   * @param osdmap The OSDMap to modify
+   * @param osd_id The OSD to mark as up
+   */
+  static void mark_osd_up(OSDMap& osdmap, int osd_id)
+  {
+    OSDMap::Incremental inc(osdmap.get_epoch() + 1);
+    inc.fsid = osdmap.get_fsid();
+    inc.new_state[osd_id] = CEPH_OSD_EXISTS | CEPH_OSD_UP;
+    osdmap.apply_incremental(inc);
+  }
+  
+  static void mark_osd_up(std::shared_ptr<OSDMap> osdmap, int osd_id)
+  {
+    mark_osd_up(*osdmap, osd_id);
+  }
+  
+  /**
+   * Mark multiple OSDs as down in the OSDMap.
+   * Creates a new epoch.
+   *
+   * @param osdmap The OSDMap to modify
+   * @param osd_ids The OSDs to mark as down
+   */
+  static void mark_osds_down(OSDMap& osdmap, const std::vector<int>& osd_ids)
+  {
+    OSDMap::Incremental inc(osdmap.get_epoch() + 1);
+    inc.fsid = osdmap.get_fsid();
+    for (int osd_id : osd_ids) {
+      inc.new_state[osd_id] = CEPH_OSD_EXISTS;  // Mark as down (exists but not UP)
+    }
+    osdmap.apply_incremental(inc);
+  }
+  
+  static void mark_osds_down(std::shared_ptr<OSDMap> osdmap, const std::vector<int>& osd_ids)
+  {
+    mark_osds_down(*osdmap, osd_ids);
+  }
+  
+  /**
+   * Advance to a new epoch without changing OSD states.
+   * Useful for testing re-peering scenarios.
+   *
+   * @param osdmap The OSDMap to modify
+   */
+  static void advance_epoch(OSDMap& osdmap)
+  {
+    OSDMap::Incremental inc(osdmap.get_epoch() + 1);
+    inc.fsid = osdmap.get_fsid();
+    osdmap.apply_incremental(inc);
+  }
+  
+  static void advance_epoch(std::shared_ptr<OSDMap> osdmap)
+  {
+    advance_epoch(*osdmap);
+  }
+};
diff --git a/src/test/osd/PGBackendTestFixture.cc b/src/test/osd/PGBackendTestFixture.cc
new file mode 100644 (file)
index 0000000..c54bfc5
--- /dev/null
@@ -0,0 +1,553 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+// vim: ts=8 sw=2 sts=2 expandtab
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2026 IBM
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#include "test/osd/PGBackendTestFixture.h"
+#include "common/errno.h"
+
+void PGBackendTestFixture::setup_ec_pool()
+{
+  CephContext *cct = g_ceph_context;
+
+  osdmap = std::make_shared<OSDMap>();
+  osdmap->set_max_osd(k + m);
+
+  for (int i = 0; i < k + m; i++) {
+    osdmap->set_state(i, CEPH_OSD_EXISTS);
+    osdmap->set_weight(i, CEPH_OSD_OUT);
+    osdmap->crush->set_item_name(i, "osd." + std::to_string(i));
+  }
+
+  // Use incremental to set OSDs as up and with proper features
+  OSDMap::Incremental inc(osdmap->get_epoch() + 1);
+  inc.fsid = osdmap->get_fsid();
+
+  for (int i = 0; i < k + m; i++) {
+    inc.new_state[i] = CEPH_OSD_UP;
+    inc.new_weight[i] = CEPH_OSD_IN;
+
+    // Set up_thru to a high value to avoid WaitUpThru state during initial peering
+    // The OSDMap will go through several increments (adding pools, etc.) so we need
+    // up_thru to be higher than the final epoch
+    inc.new_up_thru[i] = 100;
+
+    // Set OSD features to include NAUTILUS, OCTOPUS and QUINCY server features (required for peering)
+    osd_xinfo_t xinfo;
+    xinfo.features = CEPH_FEATUREMASK_SERVER_NAUTILUS | CEPH_FEATUREMASK_SERVER_OCTOPUS | CEPH_FEATUREMASK_SERVER_QUINCY;
+    inc.new_xinfo[i] = xinfo;
+  }
+
+  // Apply the incremental to set state, weight, and features
+  // This will properly calculate up_osd_features
+  osdmap->apply_incremental(inc);
+
+  pg_pool_t pool = OSDMapTestHelpers::create_ec_pool(k, m, stripe_unit * k, pool_flags, pool_id);
+  OSDMapTestHelpers::add_pool(osdmap, pool_id, pool);
+
+  pgid = pg_t(0, pool_id);
+  spgid = spg_t(pgid, shard_id_t(0));
+
+  OSDMapTestHelpers::setup_ec_pg(osdmap, pgid, k, m, 0);
+
+  // Finalize the CRUSH map to calculate working_size
+  // This is required for crush_init_workspace() to work correctly
+  osdmap->crush->finalize();
+
+  if (ec_plugin == "mock") {
+    ec_impl = std::make_shared<MockErasureCode>(k, k + m);
+  } else {
+    ErasureCodeProfile profile;
+    profile["k"] = std::to_string(k);
+    profile["m"] = std::to_string(m);
+    profile["plugin"] = ec_plugin;
+
+    if (!ec_technique.empty()) {
+      profile["technique"] = ec_technique;
+    }
+
+    profile["stripe_unit"] = std::to_string(stripe_unit);
+
+    std::stringstream ss;
+    // Tests are run from the build directory, so "./lib" points to the
+    // erasure code plugins in the build tree rather than /usr/local/lib64/ceph/erasure-code/
+    int ret = ceph::ErasureCodePluginRegistry::instance().factory(
+      ec_plugin,
+      "./lib",
+      profile,
+      &ec_impl,
+      &ss);
+
+    if (ret != 0) {
+      FAIL() << "Failed to create EC plugin '" << ec_plugin << "': " << ss.str();
+      return;
+    }
+  }
+
+  ObjectStore::Transaction t;
+  for (int i = 0; i < k + m; i++) {
+    spg_t shard_spgid(pgid, shard_id_t(i));
+    coll_t shard_coll(shard_spgid);
+    auto shard_ch = store->create_new_collection(shard_coll);
+    t.create_collection(shard_coll, 0);
+
+    colls[i] = shard_coll;
+    chs[i] = shard_ch;
+
+    if (i == 0) {
+      ch = shard_ch;
+      coll = shard_coll;
+    }
+  }
+
+  ASSERT_EQ(store->queue_transaction(ch, std::move(t)), 0);
+
+  const pg_pool_t* pool_ptr = OSDMapTestHelpers::get_pool(osdmap, pool_id);
+  ceph_assert(pool_ptr != nullptr);
+
+  for (int i = 0; i < k + m; i++) {
+    std::unique_ptr<MockPGBackendListener> shard_listener;
+    if (listener_factory) {
+      shard_listener = listener_factory(
+        i,
+        osdmap,
+        pool_id,
+        dpp.get(),
+        pg_shard_t(i, shard_id_t(i)));
+    } else {
+      shard_listener = std::make_unique<MockPGBackendListener>(
+        osdmap,
+        pool_id,
+        dpp.get(),
+        pg_shard_t(i, shard_id_t(i))
+      );
+    }
+
+    // Initialize the listener's own info.pgid so OSDMap queries work
+    shard_listener->info.pgid = spg_t(pgid, shard_id_t(i));
+
+    for (int j = 0; j < k + m; j++) {
+      shard_listener->shardset.insert(pg_shard_t(j, shard_id_t(j)));
+      shard_listener->acting_recovery_backfill_shard_id_set.insert(shard_id_t(j));
+
+      // Initialize shard_info for each shard - required by EC backend
+      pg_info_t shard_pg_info;
+      shard_pg_info.pgid = spg_t(pgid, shard_id_t(j));
+      shard_listener->shard_info[pg_shard_t(j, shard_id_t(j))] = shard_pg_info;
+
+      // Initialize shard_missing for each shard - required by EC backend
+      pg_missing_t shard_missing;
+      shard_listener->shard_missing[pg_shard_t(j, shard_id_t(j))] = shard_missing;
+    }
+
+    shard_listener->set_store(store.get(), chs[i]);
+    shard_listener->set_event_loop(event_loop.get());
+    shard_listener->set_op_tracker(op_tracker.get());
+
+    auto shard_lru = std::make_unique<ECExtentCache::LRU>(1024 * 1024 * 100);
+    auto shard_ec_switch = std::make_unique<ECSwitch>(
+      shard_listener.get(), colls[i], chs[i], store.get(),
+      cct, ec_impl, stripe_unit * k, *shard_lru);
+
+    listeners[i] = std::move(shard_listener);
+    lrus[i] = std::move(shard_lru);
+    backends[i] = std::move(shard_ec_switch);
+  }
+
+  for (int i = 0; i < k + m; i++) {
+    message_router[i] = [this, i](OpRequestRef op) -> bool {
+      return backends[i]->_handle_message(op);
+    };
+  }
+
+  for (int i = 0; i < k + m; i++) {
+    listeners[i]->set_message_router(&message_router);
+    listeners[i]->set_handle_message_callback(
+      [this, i](OpRequestRef op) -> bool {
+        return backends[i]->_handle_message(op);
+      });
+  }
+}
+
+void PGBackendTestFixture::setup_replicated_pool()
+{
+  CephContext *cct = g_ceph_context;
+
+  osdmap = std::make_shared<OSDMap>();
+  osdmap->set_max_osd(num_replicas);
+  osdmap->set_state(0, CEPH_OSD_EXISTS | CEPH_OSD_UP);
+
+  pg_pool_t pool;
+  pool.type = pg_pool_t::TYPE_REPLICATED;
+  pool.size = num_replicas;
+  pool.min_size = min_size;
+  pool.crush_rule = 0;
+
+  osdmap->inc_epoch();
+
+  OSDMapTestHelpers::add_pool(osdmap, pool_id, pool);
+
+  // Finalize the CRUSH map to calculate working_size
+  // This is required for crush_init_workspace() to work correctly
+  osdmap->crush->finalize();
+
+  pgid = pg_t(0, pool_id);
+  spgid = spg_t(pgid, shard_id_t::NO_SHARD);
+  
+  // Set up pg_temp to define the acting set with OSD 0 as primary
+  std::vector<int> acting;
+  for (int i = 0; i < num_replicas; i++) {
+    acting.push_back(i);
+  }
+  OSDMapTestHelpers::set_pg_acting(osdmap, pgid, acting);
+  OSDMapTestHelpers::set_pg_acting_primary(osdmap, pgid, 0);
+
+  ObjectStore::Transaction t;
+  spg_t replica_spgid(pgid, shard_id_t::NO_SHARD);
+  coll_t replica_coll(replica_spgid);
+  auto replica_ch = store->create_new_collection(replica_coll);
+  t.create_collection(replica_coll, 0);
+
+  ASSERT_EQ(store->queue_transaction(replica_ch, std::move(t)), 0);
+
+  // All replicas share the same collection
+  for (int i = 0; i < num_replicas; i++) {
+    colls[i] = replica_coll;
+    chs[i] = replica_ch;
+  }
+
+  ch = replica_ch;
+  coll = replica_coll;
+
+  const pg_pool_t* pool_ptr = OSDMapTestHelpers::get_pool(osdmap, pool_id);
+  ceph_assert(pool_ptr != nullptr);
+
+  for (int i = 0; i < num_replicas; i++) {
+    std::unique_ptr<MockPGBackendListener> replica_listener;
+    if (listener_factory) {
+      replica_listener = listener_factory(
+        i,
+        osdmap,
+        pool_id,
+        dpp.get(),
+        pg_shard_t(i, shard_id_t::NO_SHARD));
+    } else {
+      replica_listener = std::make_unique<MockPGBackendListener>(
+        osdmap,
+        pool_id,
+        dpp.get(),
+        pg_shard_t(i, shard_id_t::NO_SHARD)
+      );
+    }
+
+    // Initialize the listener's own info.pgid so OSDMap queries work
+    replica_listener->info.pgid = spg_t(pgid, shard_id_t::NO_SHARD);
+
+    // For replicated pools, use NO_SHARD for all replicas
+    for (int j = 0; j < num_replicas; j++) {
+      replica_listener->shardset.insert(pg_shard_t(j, shard_id_t::NO_SHARD));
+
+      // Initialize shard_info for each replica - required by backend
+      pg_info_t replica_pg_info;
+      replica_pg_info.pgid = spg_t(pgid, shard_id_t::NO_SHARD);
+      replica_listener->shard_info[pg_shard_t(j, shard_id_t::NO_SHARD)] = replica_pg_info;
+
+      // Initialize shard_missing for each replica - required by backend
+      pg_missing_t replica_missing;
+      replica_listener->shard_missing[pg_shard_t(j, shard_id_t::NO_SHARD)] = replica_missing;
+    }
+
+    replica_listener->set_store(store.get(), chs[i]);
+    replica_listener->set_event_loop(event_loop.get());
+    replica_listener->set_op_tracker(op_tracker.get());
+
+    auto replica_backend = std::make_unique<ReplicatedBackend>(
+      replica_listener.get(), colls[i], chs[i], store.get(), cct);
+
+    listeners[i] = std::move(replica_listener);
+    backends[i] = std::move(replica_backend);
+  }
+
+  for (int i = 0; i < num_replicas; i++) {
+    message_router[i] = [this, i](OpRequestRef op) -> bool {
+      return backends[i]->_handle_message(op);
+    };
+  }
+
+  for (int i = 0; i < num_replicas; i++) {
+    listeners[i]->set_message_router(&message_router);
+    listeners[i]->set_handle_message_callback(
+      [this, i](OpRequestRef op) -> bool {
+        return backends[i]->_handle_message(op);
+      });
+  }
+}
+
+int PGBackendTestFixture::do_transaction_and_complete(
+  const hobject_t& hoid,
+  PGTransactionUPtr pg_t,
+  const object_stat_sum_t& delta_stats,
+  const eversion_t& at_version,
+  std::vector<pg_log_entry_t> log_entries)
+{
+  eversion_t trim_to(0, 0);
+  eversion_t pg_committed_to(0, 0);
+  std::optional<pg_hit_set_history_t> hset_history;
+
+  bool completed = false;
+  int completion_result = -1;
+  Context *on_complete = new LambdaContext([&completed, &completion_result](int r) {
+    completed = true;
+    completion_result = r;
+  });
+
+  ceph_tid_t tid = 1;
+  osd_reqid_t reqid(entity_name_t::OSD(0), 0, tid);
+
+  PGBackend* primary_backend = get_primary_backend();
+  ceph_assert(primary_backend != nullptr);
+  primary_backend->submit_transaction(
+    hoid,
+    delta_stats,
+    at_version,
+    std::move(pg_t),
+    trim_to,
+    pg_committed_to,
+    std::move(log_entries),
+    hset_history,
+    on_complete,
+    tid,
+    reqid,
+    OpRequestRef()
+  );
+
+  event_loop->run_until_idle(10000);
+
+  if (!completed) {
+    throw std::runtime_error("Transaction did not complete within timeout");
+  }
+
+  return completion_result;
+}
+
+int PGBackendTestFixture::create_and_write(
+  const std::string& obj_name,
+  const std::string& data,
+  const eversion_t& at_version)
+{
+  hobject_t hoid = make_test_object(obj_name);
+  PGTransactionUPtr pg_t = std::make_unique<PGTransaction>();
+  pg_t->create(hoid);
+
+  ObjectContextRef obc = make_object_context(hoid, false, 0);
+  pg_t->obc_map[hoid] = obc;
+
+  bufferlist bl;
+  bl.append(data);
+  pg_t->write(hoid, 0, bl.length(), bl);
+
+  object_stat_sum_t delta_stats;
+  delta_stats.num_objects = 1;
+  delta_stats.num_bytes = bl.length();
+
+  std::vector<pg_log_entry_t> log_entries;
+  pg_log_entry_t entry;
+  entry.mark_unrollbackable();
+  entry.op = pg_log_entry_t::MODIFY;
+  entry.soid = hoid;
+  entry.version = at_version;
+  entry.prior_version = eversion_t(0, 0);
+  log_entries.push_back(entry);
+
+  int result = do_transaction_and_complete(
+    hoid, std::move(pg_t), delta_stats, at_version, std::move(log_entries));
+
+  if (result == 0) {
+    obc->obs.exists = true;
+    obc->obs.oi.size = bl.length();
+    obc->obs.oi.version = at_version;
+  }
+
+  return result;
+}
+
+int PGBackendTestFixture::write(
+  const std::string& obj_name,
+  uint64_t offset,
+  const std::string& data,
+  const eversion_t& prior_version,
+  const eversion_t& at_version,
+  uint64_t object_size)
+{
+  hobject_t hoid = make_test_object(obj_name);
+  PGTransactionUPtr pg_t = std::make_unique<PGTransaction>();
+
+  ObjectContextRef obc = make_object_context(hoid, true, object_size);
+  obc->obs.oi.version = prior_version;
+  pg_t->obc_map[hoid] = obc;
+
+  bufferlist bl;
+  bl.append(data);
+  pg_t->write(hoid, offset, bl.length(), bl);
+
+  object_stat_sum_t delta_stats;
+  uint64_t new_size = std::max(object_size, offset + bl.length());
+  if (new_size > object_size) {
+    delta_stats.num_bytes = new_size - object_size;
+  } else {
+    delta_stats.num_bytes = 0;
+  }
+
+  std::vector<pg_log_entry_t> log_entries;
+  pg_log_entry_t entry;
+  // Don't mark as unrollbackable - partial writes need rollback support
+  entry.op = pg_log_entry_t::MODIFY;
+  entry.soid = hoid;
+  entry.version = at_version;
+  entry.prior_version = prior_version;
+  log_entries.push_back(entry);
+
+  int result = do_transaction_and_complete(
+    hoid, std::move(pg_t), delta_stats, at_version, std::move(log_entries));
+
+  if (result == 0) {
+    obc->obs.oi.size = new_size;
+    obc->obs.oi.version = at_version;
+  }
+
+  return result;
+}
+
+int PGBackendTestFixture::read_object(
+  const std::string& obj_name,
+  uint64_t offset,
+  uint64_t length,
+  bufferlist& out_data,
+  uint64_t object_size)
+{
+  hobject_t hoid = make_test_object(obj_name);
+
+  if (pool_type == EC) {
+    bool completed = false;
+    int completion_result = -1;
+
+    std::list<std::pair<ec_align_t, std::pair<bufferlist*, Context*>>> to_read;
+
+    ec_align_t align(offset, length, 0);
+
+    Context *read_complete = new LambdaContext([&completed, &completion_result](int r) {
+      completed = true;
+      completion_result = r;
+    });
+
+    to_read.push_back(std::make_pair(align, std::make_pair(&out_data, read_complete)));
+
+    Context *on_complete = new LambdaContext([](int r) {
+    });
+
+    PGBackend* primary_backend = get_primary_backend();
+    ceph_assert(primary_backend != nullptr);
+    ECSwitch* ec_switch = dynamic_cast<ECSwitch*>(primary_backend);
+    ceph_assert(ec_switch != nullptr);
+
+    ec_switch->objects_read_async(
+      hoid,
+      object_size,
+      to_read,
+      on_complete,
+      false
+    );
+
+    event_loop->run_until_idle(10000);
+
+    if (!completed) {
+      throw std::runtime_error("Read operation did not complete within timeout");
+    }
+
+    return completion_result;
+  } else {
+    PGBackend* primary_backend = get_primary_backend();
+    ceph_assert(primary_backend != nullptr);
+    ReplicatedBackend* rep_backend = dynamic_cast<ReplicatedBackend*>(primary_backend);
+    ceph_assert(rep_backend != nullptr);
+
+    int result = rep_backend->objects_read_sync(
+      hoid,
+      offset,
+      length,
+      0,
+      &out_data
+    );
+
+    return result;
+  }
+}
+
+// ---------------------------------------------------------------------------
+// NOTE: update_osdmap() intentionally does NOT reconcile listener acting sets
+//
+// This method updates only:
+//   - The fixture's osdmap pointer
+//   - The osdmap reference in all listeners
+//
+// It does NOT update the following fields on any MockPGBackendListener:
+//   - shardset
+//   - acting_recovery_backfill_shard_id_set
+//   - shard_info
+//   - shard_missing
+//
+// This is intentional: those fields describe the acting set as seen by each
+// individual OSD, and their correct values depend on the specific failure
+// scenario being simulated.  Updating them blindly here would hide bugs and
+// make it impossible to test partial-failure cases.
+//
+// Callers that need to simulate an OSD failure MUST update those fields
+// themselves before (or after) calling update_osdmap().
+//
+// See TestECFailover::simulate_osd_failure() for a worked example that
+// removes the failed shard from shardset and
+// acting_recovery_backfill_shard_id_set on every listener before delegating
+// to update_osdmap().
+// ---------------------------------------------------------------------------
+void PGBackendTestFixture::update_osdmap(
+  std::shared_ptr<OSDMap> new_osdmap,
+  std::optional<pg_shard_t> new_primary)
+{
+  // Step 1: Call on_change() on all backends to clear in-flight operations
+  for (auto& [instance, be] : backends) {
+    if (be) {
+      be->on_change();
+    }
+  }
+
+  // Step 2: Update the osdmap reference
+  osdmap = new_osdmap;
+
+  // Step 3: Update the osdmap in all listeners
+  for (auto& [instance, list] : listeners) {
+    if (list) {
+      list->osdmap = new_osdmap;
+    }
+  }
+}
+
+void PGBackendTestFixture::cleanup_data_dir()
+{
+  // Only clean up if the directory exists and hasn't been cleaned already
+  if (!data_dir.empty() && std::filesystem::exists(data_dir)) {
+    std::error_code ec;
+    std::filesystem::remove_all(data_dir, ec);
+    // Silently ignore errors during cleanup - we tried our best
+  }
+}
+
diff --git a/src/test/osd/PGBackendTestFixture.h b/src/test/osd/PGBackendTestFixture.h
new file mode 100644 (file)
index 0000000..49c703f
--- /dev/null
@@ -0,0 +1,340 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+// vim: ts=8 sw=2 sts=2 expandtab
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2026 IBM
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#pragma once
+
+#include <filesystem>
+#include <memory>
+#include <random>
+#include <sstream>
+#include <iomanip>
+#include <gtest/gtest.h>
+#include "common/errno.h"
+#include "test/osd/MockErasureCode.h"
+#include "test/osd/MockPGBackendListener.h"
+#include "test/osd/EventLoop.h"
+#include "common/TrackedOp.h"
+#include "os/memstore/MemStore.h"
+#include "osd/ECSwitch.h"
+#include "osd/ECExtentCache.h"
+#include "osd/ReplicatedBackend.h"
+#include "osd/PGBackend.h"
+#include "osd/OSDMap.h"
+#include "osd/osd_types.h"
+#include "osd/PGTransaction.h"
+#include "common/ceph_context.h"
+#include "os/ObjectStore.h"
+#include "erasure-code/ErasureCodePlugin.h"
+#include "test/osd/OSDMapTestHelpers.h"
+
+// Unified test fixture for EC and Replicated backend tests with ObjectStore.
+// Uses PoolType to branch between EC (ECSwitch) and Replicated (ReplicatedBackend).
+class PGBackendTestFixture : public ::testing::Test {
+public:
+  enum PoolType {
+    EC,
+    REPLICATED
+  };
+
+protected:
+  PoolType pool_type;
+
+  // Pool flags to set on the EC pool (e.g., FLAG_EC_OVERWRITES, FLAG_EC_OPTIMIZATIONS).
+  // Derived classes can set this before SetUp() to configure the pool flags.
+  // setup_ec_pool() uses this value when creating the pool.
+  // Default includes both OVERWRITES and OPTIMIZATIONS flags.
+  uint64_t pool_flags = pg_pool_t::FLAG_EC_OVERWRITES | pg_pool_t::FLAG_EC_OPTIMIZATIONS;
+  
+  std::unique_ptr<MemStore> store;
+  std::string data_dir;
+  ObjectStore::CollectionHandle ch;
+  coll_t coll;
+  
+  std::shared_ptr<OSDMap> osdmap;
+  std::unique_ptr<OpTracker> op_tracker;
+  std::unique_ptr<EventLoop> event_loop;
+  std::map<int, std::function<bool(OpRequestRef)>> message_router;
+  
+  std::map<int, std::unique_ptr<MockPGBackendListener>> listeners;
+  std::map<int, std::unique_ptr<PGBackend>> backends;
+  std::map<int, coll_t> colls;
+  std::map<int, ObjectStore::CollectionHandle> chs;
+  
+  /**
+   * Optional listener factory callback.
+   *
+   * If set, setup_ec_pool() and setup_replicated_pool() will call this
+   * factory instead of constructing MockPGBackendListener directly.
+   * The factory receives the instance index and the parameters needed to
+   * construct the listener, and must return a unique_ptr to the new
+   * MockPGBackendListener.  The returned object is stored in listeners[i]
+   * as usual, so ownership stays with the base class.
+   *
+   * Derived classes (e.g. ECPeeringTestFixture) can set this in their
+   * constructor to gain direct access to the created listeners without
+   * needing to steal ownership via release_listener().
+   */
+  std::function<std::unique_ptr<MockPGBackendListener>(
+    int instance,
+    std::shared_ptr<OSDMap> osdmap,
+    int64_t pool_id,
+    DoutPrefixProvider* dpp,
+    pg_shard_t whoami)> listener_factory;
+
+  ceph::ErasureCodeInterfaceRef ec_impl;
+  std::map<int, std::unique_ptr<ECExtentCache::LRU>> lrus;
+  int k = 4;  // data chunks
+  int m = 2;  // coding chunks
+  uint64_t stripe_unit = 4096;  // aka chunk_size
+  std::string ec_plugin = "isa";
+  std::string ec_technique = "reed_sol_van";
+  
+  int num_replicas = 3;
+  int min_size = 2;
+  
+  int64_t pool_id = 0;
+  pg_t pgid;
+  spg_t spgid;
+  
+  class TestDpp : public NoDoutPrefix {
+  public:
+    TestDpp(CephContext *cct) : NoDoutPrefix(cct, ceph_subsys_osd) {}
+    
+    std::ostream& gen_prefix(std::ostream& out) const override {
+      out << "PGBackendTest: ";
+      return out;
+    }
+  };
+  std::unique_ptr<TestDpp> dpp;
+
+public:
+  explicit PGBackendTestFixture(PoolType type = EC) : pool_type(type)
+  {
+    std::random_device rd;
+    std::mt19937_64 gen(rd());
+    std::uniform_int_distribution<uint64_t> dis;
+    uint64_t random_num = dis(gen);
+    
+    std::ostringstream oss;
+    oss << "memstore_test_" << std::hex << std::setfill('0') << std::setw(16) << random_num;
+    data_dir = oss.str();
+    
+    ceph_assert(stripe_unit % 4096 == 0);
+    ceph_assert(stripe_unit != 0);
+  }
+  
+  ~PGBackendTestFixture() {
+    // Ensure cleanup happens even if TearDown() wasn't called or failed
+    cleanup_data_dir();
+  }
+  
+  void SetUp() override {
+    int r = ::mkdir(data_dir.c_str(), 0777);
+    if (r < 0) {
+      r = -errno;
+      std::cerr << __func__ << ": unable to create " << data_dir << ": " << cpp_strerror(r) << std::endl;
+    }
+    ASSERT_EQ(0, r);
+    
+    // Create MemStore - contexts are stolen by MockPGBackendListener, so we don't need manual_finisher
+    store.reset(new MemStore(g_ceph_context, data_dir));
+    ASSERT_TRUE(store);
+    ASSERT_EQ(0, store->mkfs());
+    ASSERT_EQ(0, store->mount());
+    
+    g_conf().set_safe_to_start_threads();
+    
+    CephContext *cct = g_ceph_context;
+    dpp = std::make_unique<TestDpp>(cct);
+    event_loop = std::make_unique<EventLoop>(false);
+    op_tracker = std::make_unique<OpTracker>(cct, false, 1);
+    
+    if (pool_type == EC) {
+      setup_ec_pool();
+    } else {
+      setup_replicated_pool();
+    }
+  }
+  
+  void TearDown() override {
+    // 0. Process any remaining events in the EventLoop.
+    // If the test passed, orphaned events indicate a bug - warn and skip draining
+    // so the test fails loudly.  If the test already failed, drain silently to
+    // allow the rest of TearDown to complete without cascading errors.
+    if (event_loop) {
+      if (event_loop->has_events()) {
+        if (!HasFailure()) {
+          ADD_FAILURE() << "TearDown: " << event_loop->queued_event_count()
+                        << " orphaned events remain after a passing test";
+        }
+        event_loop->run_until_idle(1000);
+      }
+    }
+    
+    // 1. Clean up all backend instances (polymorphic cleanup)
+    //    Note: We skip calling on_change() during teardown as it may access
+    //    invalid state. The backends will be destroyed anyway.
+    backends.clear();
+    
+    // 2. Clean up EC-specific resources
+    if (pool_type == EC) {
+      lrus.clear();
+      ec_impl.reset();
+    }
+    
+    // 3. Clean up listeners
+    listeners.clear();
+    
+    // 4. Reset op tracker (call on_shutdown first)
+    if (op_tracker) {
+      op_tracker->on_shutdown();
+      op_tracker.reset();
+    }
+    
+    // 5. Reset all collection handles
+    chs.clear();
+    colls.clear();
+    
+    if (ch) {
+      ch.reset();
+    }
+    
+    // 6. Unmount and destroy the store
+    if (store) {
+      store->umount();
+      store.reset();
+    }
+    
+    // 7. Clean up the test directory
+    cleanup_data_dir();
+  }
+  
+private:
+  void setup_ec_pool();
+  void setup_replicated_pool();
+  void cleanup_data_dir();
+
+public:
+  const pg_pool_t& get_pool() const {
+    const pg_pool_t* pool = OSDMapTestHelpers::get_pool(osdmap, pool_id);
+    ceph_assert(pool != nullptr);
+    return *pool;
+  }
+  
+  int get_instance_count() const {
+    return pool_type == EC ? (k + m) : num_replicas;
+  }
+  
+  int get_data_chunk_count() const {
+    return k;
+  }
+  
+  int get_coding_chunk_count() const {
+    return m;
+  }
+  
+  uint64_t get_stripe_width() const {
+    return stripe_unit * k;
+  }
+  
+  int get_min_size() const {
+    return min_size;
+  }
+  
+  // Get the primary listener and backend by checking which listener reports itself as primary
+  virtual MockPGBackendListener* get_primary_listener() {
+    for (auto& [instance, listener] : listeners) {
+      if (listener && listener->pgb_is_primary()) {
+        return listener.get();
+      }
+    }
+    return nullptr;
+  }
+  
+  virtual PGBackend* get_primary_backend() {
+    for (auto& [instance, listener] : listeners) {
+      if (listener && listener->pgb_is_primary()) {
+        auto it = backends.find(instance);
+        return (it != backends.end()) ? it->second.get() : nullptr;
+      }
+    }
+    return nullptr;
+  }
+  
+  hobject_t make_test_object(const std::string& name) const {
+    return hobject_t(object_t(name), "", CEPH_NOSNAP, 0, pool_id, "");
+  }
+  
+  ObjectContextRef make_object_context(
+    const hobject_t& hoid,
+    bool exists = false,
+    uint64_t size = 0) const
+  {
+    ObjectContextRef obc = std::make_shared<ObjectContext>();
+    obc->obs.oi = object_info_t(hoid);
+    obc->obs.oi.size = size;
+    obc->obs.exists = exists;
+    obc->ssc = nullptr;
+    return obc;
+  }
+  
+  int do_transaction_and_complete(
+    const hobject_t& hoid,
+    PGTransactionUPtr pg_t,
+    const object_stat_sum_t& delta_stats,
+    const eversion_t& at_version,
+    std::vector<pg_log_entry_t> log_entries);
+  
+  virtual int create_and_write(
+    const std::string& obj_name,
+    const std::string& data,
+    const eversion_t& at_version = eversion_t(1, 1));
+
+public:
+  
+  int write(
+    const std::string& obj_name,
+    uint64_t offset,
+    const std::string& data,
+    const eversion_t& prior_version,
+    const eversion_t& at_version,
+    uint64_t object_size);
+
+  int read_object(
+    const std::string& obj_name,
+    uint64_t offset,
+    uint64_t length,
+    bufferlist& out_data,
+    uint64_t object_size);
+
+  /**
+   * Update the OSDMap and trigger backend cleanup.
+   *
+   * Calls on_change() on all backends, then updates the osdmap reference in
+   * the fixture and all listeners.  Optionally updates the primary field on
+   * every MockPGBackendListener and the convenience pointers (listener, backend).
+   *
+   * Does NOT update acting-set fields (shardset,
+   * acting_recovery_backfill_shard_id_set, shard_info, shard_missing) on any
+   * listener — those depend on the specific failure scenario being simulated
+   * and must be updated by the caller.  See TestECFailover::simulate_osd_failure()
+   * for a worked example.
+   */
+  virtual void update_osdmap(
+    std::shared_ptr<OSDMap> new_osdmap,
+    std::optional<pg_shard_t> new_primary = std::nullopt);
+
+};
+
diff --git a/src/test/osd/TestBackendBasics.cc b/src/test/osd/TestBackendBasics.cc
new file mode 100644 (file)
index 0000000..8e81f75
--- /dev/null
@@ -0,0 +1,594 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+// vim: ts=8 sw=2 sts=2 expandtab
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2026 IBM
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+/*
+ * TestBackendBasics - Unified parameterized test harness for EC and Replicated
+ * backend operations.
+ *
+ * Two fixture classes are defined, each parameterized over the full set of
+ * backend configurations:
+ *
+ * TestBackendBasics
+ *   Parameterized over BackendWriteReadParam (BackendConfig × WriteReadParam).
+ *   13 backends × 8 data sizes = 104 instances per test body.
+ *
+ *   WriteThenRead  – write data, verify protocol messages, read back, verify
+ *                    data integrity.
+ *   PartialWrite   – create an object, perform a partial write at a non-zero
+ *                    offset, read back and verify all three regions.
+ *
+ * TestECFailover
+ *   Parameterized over BackendConfig (EC configs only, 12 instances).
+ *   Failover is an EC-specific concept (shard-based primary election).
+ *
+ *   BasicOSDMapUpdate – write, update OSDMap epoch, verify read still works.
+ *   PrimaryFailover   – write, fail OSD 0, verify new primary and degraded
+ *                       read with EC reconstruction.
+ */
+
+#include <gtest/gtest.h>
+#include "test/osd/PGBackendTestFixture.h"
+#include "test/osd/TestCommon.h"
+#include "messages/MOSDECSubOpWrite.h"
+
+using namespace std;
+
+// ---------------------------------------------------------------------------
+// TestBackendBasics fixture
+// ---------------------------------------------------------------------------
+
+/**
+ * TestBackendBasics - single fixture parameterized over BackendWriteReadParam.
+ *
+ * The constructor reads the BackendConfig portion of the parameter and
+ * configures the base fixture fields (pool_type, k, m, stripe_unit, ec_plugin,
+ * ec_technique, ec_optimizations, num_replicas, min_size) before SetUp() is
+ * called by GTest.
+ */
+class TestBackendBasics : public PGBackendTestFixture,
+                          public ::testing::WithParamInterface<BackendWriteReadParam> {
+public:
+  TestBackendBasics() : PGBackendTestFixture() {
+    const auto& config = GetParam().backend;
+    pool_type = config.pool_type;
+    if (pool_type == EC) {
+      k = config.k;
+      m = config.m;
+      stripe_unit = config.stripe_unit;
+      ec_plugin = config.ec_plugin;
+      ec_technique = config.ec_technique;
+      pool_flags = config.pool_flags;
+    } else {
+      num_replicas = 3;
+      min_size = 2;
+    }
+  }
+
+  void SetUp() override {
+    PGBackendTestFixture::SetUp();
+  }
+};
+
+// ---------------------------------------------------------------------------
+// TestBackendBasics: WriteThenRead
+// ---------------------------------------------------------------------------
+
+/**
+ * WriteThenRead - write data of the parameterized size, verify protocol
+ * messages were sent, read back, and verify data integrity.
+ *
+ * For EC backends: asserts that MSG_OSD_EC_WRITE messages were sent and that
+ * read messages are sent to shards.
+ * For Replicated backends: asserts that at least one message was sent.
+ */
+TEST_P(TestBackendBasics, WriteThenRead) {
+  const auto& param = GetParam().write_read;
+  const auto& backend_config = GetParam().backend;
+
+  std::string test_data(param.size, param.fill);
+  std::string obj_name = "test_backend_" + backend_config.label + "_" + param.label;
+
+  // Execute create+write operation
+  int result = create_and_write(obj_name, test_data);
+  EXPECT_EQ(result, 0) << param.label << " write should complete successfully";
+
+  // Verify messages were sent to replicas/shards
+  auto* primary_listener = get_primary_listener();
+  ASSERT_TRUE(primary_listener != nullptr) << "Primary listener should exist";
+  ASSERT_GT(primary_listener->sent_messages.size(), 0u)
+    << "Should send messages to replicas/shards";
+
+  // For EC backends: verify EC write messages were sent
+  if (backend_config.pool_type == EC) {
+    int write_messages_sent = 0;
+    for (auto msg : primary_listener->sent_messages) {
+      if (msg->get_type() == MSG_OSD_EC_WRITE) {
+        write_messages_sent++;
+      }
+    }
+    ASSERT_GT(write_messages_sent, 0) << "Should send EC write messages";
+  }
+
+  // Clear sent messages before read to distinguish read messages
+  primary_listener->sent_messages.clear();
+  primary_listener->sent_messages_with_dest.clear();
+
+  // Perform the read operation
+  bufferlist read_data;
+  int read_result = read_object(
+    obj_name,
+    0,                  // offset
+    test_data.length(), // length
+    read_data,
+    test_data.length()  // object_size
+  );
+
+  EXPECT_GE(read_result, 0) << param.label << " read should complete successfully";
+
+  // Verify data length
+  ASSERT_EQ(read_data.length(), test_data.length())
+    << param.label << " read data length should match written data length";
+
+  // Verify data content
+  std::string read_string(read_data.c_str(), read_data.length());
+  EXPECT_EQ(read_string, test_data)
+    << param.label << " read data should match written data";
+
+  // For EC backends: verify read messages were sent to shards
+  if (backend_config.pool_type == EC) {
+    primary_listener = get_primary_listener();
+    ASSERT_TRUE(primary_listener != nullptr) << "Primary listener should exist";
+    ASSERT_GT(primary_listener->sent_messages.size(), 0u)
+      << "Should send read messages to EC shards";
+  }
+
+  // All events should be processed by now
+  ASSERT_FALSE(event_loop->has_events()) << "Event loop should be idle after read";
+
+  primary_listener = get_primary_listener();
+  if (primary_listener) {
+    primary_listener->sent_messages.clear();
+  }
+}
+
+// ---------------------------------------------------------------------------
+// TestBackendBasics: PartialWrite
+// ---------------------------------------------------------------------------
+
+/**
+ * PartialWrite - create an object of the parameterized size (rounded up to a
+ * multiple of the stripe width for EC, or used directly for replicated), write
+ * a partial region at a non-zero offset, read back and verify that:
+ *   - the region before the partial write is unchanged,
+ *   - the partial-write region contains the new data,
+ *   - the region after the partial write is unchanged.
+ */
+TEST_P(TestBackendBasics, PartialWrite) {
+  const auto& param = GetParam().write_read;
+  const auto& backend_config = GetParam().backend;
+
+  std::string obj_name = "test_partial_" + backend_config.label + "_" + param.label;
+
+  // Use the parameterized size as the initial object size, but ensure it is
+  // large enough to accommodate a non-trivial partial write.  We need at least
+  // 3 regions: prefix, modified, suffix.  Use max(param.size, 3 * 4096) so
+  // that even the smallest size parameters produce a meaningful test.
+  const size_t initial_size = std::max(param.size, size_t(3 * 4096));
+
+  // Partial write covers the middle third of the object (aligned to 4 KB).
+  const size_t region = (initial_size / 3) & ~size_t(4095);  // round down to 4 KB
+  const size_t partial_offset = region ? region : 4096;
+  const size_t partial_size   = region ? region : 4096;
+
+  // Create initial data filled with the parameterized fill character
+  std::string initial_data(initial_size, param.fill);
+
+  int result = create_and_write(obj_name, initial_data, eversion_t(1, 1));
+  EXPECT_EQ(result, 0) << param.label << " initial write should complete successfully";
+
+  // Partial write data uses the next fill character (wraps around 'z' -> 'a')
+  char partial_fill = (param.fill == 'z') ? 'a' : (param.fill + 1);
+  std::string partial_data(partial_size, partial_fill);
+
+  result = write(
+    obj_name,
+    partial_offset,
+    partial_data,
+    eversion_t(1, 1),  // prior_version
+    eversion_t(1, 2),  // at_version
+    initial_size       // object_size
+  );
+  EXPECT_EQ(result, 0) << param.label << " partial write should complete successfully";
+
+  // Read back the entire object
+  bufferlist read_data;
+  int read_result = read_object(obj_name, 0, initial_size, read_data, initial_size);
+  EXPECT_GE(read_result, 0)
+    << param.label << " read after partial write should complete successfully";
+
+  ASSERT_EQ(read_data.length(), initial_size)
+    << param.label << " read data length should match object size";
+
+  const char* buf = read_data.c_str();
+
+  // Region before the partial write should be unchanged
+  for (size_t i = 0; i < partial_offset; i++) {
+    ASSERT_EQ(buf[i], param.fill)
+      << param.label << " data before partial write offset should be unchanged at position " << i;
+  }
+
+  // Partial-write region should contain the new fill character
+  for (size_t i = partial_offset; i < partial_offset + partial_size; i++) {
+    ASSERT_EQ(buf[i], partial_fill)
+      << param.label << " data at partial write region should be '" << partial_fill
+      << "' at position " << i;
+  }
+
+  // Region after the partial write should be unchanged
+  for (size_t i = partial_offset + partial_size; i < initial_size; i++) {
+    ASSERT_EQ(buf[i], param.fill)
+      << param.label << " data after partial write region should be unchanged at position " << i;
+  }
+}
+
+// ---------------------------------------------------------------------------
+// TestBackendBasics: DirectRead
+// ---------------------------------------------------------------------------
+
+/**
+ * DirectRead - test EC direct reads to individual shards.
+ *
+ * This test:
+ * 1. Skips non-optimized EC (we don't support sync reads there)
+ * 2. Writes patterned data covering an entire stripe
+ * 3. Performs sync reads to each data shard with EC_DIRECT_READ flag
+ * 4. Verifies data integrity for each shard
+ */
+TEST_P(TestBackendBasics, DirectRead) {
+  const auto& param = GetParam().write_read;
+  const auto& backend_config = GetParam().backend;
+
+  // Skip test for non-EC backends
+  if (backend_config.pool_type != EC) {
+    GTEST_SKIP() << "DirectRead test only applies to EC backends";
+  }
+
+  // Skip test for non-optimized EC - we don't support sync reads
+  if (!(backend_config.pool_flags & pg_pool_t::FLAG_EC_OPTIMIZATIONS)) {
+    GTEST_SKIP() << "DirectRead test requires optimized EC";
+  }
+
+  std::string obj_name = "test_direct_read_" + backend_config.label + "_" + param.label;
+
+  // Get stripe width from the pool
+  uint64_t stripe_width = get_stripe_width();
+
+  // Create patterned data where each stripe_unit has a distinct pattern
+  // This allows us to verify we're reading the correct shard
+  std::string test_data;
+  test_data.reserve(stripe_width);
+  
+  for (size_t i = 0; i < stripe_width; i++) {
+    // Pattern: each stripe_unit gets a different character based on its shard position
+    size_t shard_index = i / stripe_unit;
+    char fill_char = 'A' + (shard_index % 26);
+    test_data.push_back(fill_char);
+  }
+
+  // Write the data (one full stripe)
+  int result = create_and_write(obj_name, test_data);
+  EXPECT_EQ(result, 0) << param.label << " write should complete successfully";
+
+  hobject_t hoid = make_test_object(obj_name);
+
+  // Perform direct reads to each data shard (skip coding shards)
+  for (auto& [shard_id, backend] : backends) {
+    // Skip coding shards - only test data shards
+    if (shard_id >= k) {
+      continue;
+    }
+
+    ASSERT_TRUE(backend != nullptr) << "Backend for shard " << shard_id << " should not be null";
+    
+    ECSwitch* ec_switch = dynamic_cast<ECSwitch*>(backend.get());
+    ASSERT_TRUE(ec_switch != nullptr) << "Backend should be ECSwitch for EC pools";
+
+    bufferlist shard_data;
+    
+    // Perform sync read with EC_DIRECT_READ flag
+    // Read the entire stripe - we expect only this shard's data back
+    int read_result = ec_switch->objects_read_sync(
+      hoid,
+      0,                                    // offset
+      stripe_width,                         // length (full stripe)
+      CEPH_OSD_RMW_FLAG_EC_DIRECT_READ,    // op_flags with direct read flag
+      &shard_data
+    );
+
+    EXPECT_GE(read_result, 0)
+      << param.label << " direct read to shard " << shard_id << " should complete successfully";
+
+    // For direct reads, we expect to get back only the data for this shard
+    // which is one stripe_unit
+    ASSERT_EQ(shard_data.length(), stripe_unit)
+      << param.label << " shard " << shard_id << " should return " << stripe_unit << " bytes";
+
+    // Verify data integrity: this shard should contain the expected pattern
+    const char* buf = shard_data.c_str();
+    char expected_char = 'A' + (shard_id % 26);
+    
+    for (size_t i = 0; i < stripe_unit; i++) {
+      ASSERT_EQ(buf[i], expected_char)
+        << param.label << " shard " << shard_id << " byte " << i
+        << " should be '" << expected_char << "'";
+    }
+  }
+
+  // Clean up
+  auto* primary_listener = get_primary_listener();
+  if (primary_listener) {
+    primary_listener->sent_messages.clear();
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Backend configurations and size parameters
+// ---------------------------------------------------------------------------
+
+namespace {
+
+const std::vector<BackendConfig> kBackendConfigs = {
+  {PGBackendTestFixture::REPLICATED, "", "", 0, 4096, 4, 2, "Replicated"},
+  {PGBackendTestFixture::EC, "isa", "reed_sol_van", pg_pool_t::FLAG_EC_OVERWRITES | pg_pool_t::FLAG_EC_OPTIMIZATIONS,  4096,  4, 2, "EC_ISA_Opt_k4m2_su4k"},
+  {PGBackendTestFixture::EC, "isa", "reed_sol_van", pg_pool_t::FLAG_EC_OVERWRITES | pg_pool_t::FLAG_EC_OPTIMIZATIONS,  8192,  4, 2, "EC_ISA_Opt_k4m2_su8k"},
+  {PGBackendTestFixture::EC, "isa", "reed_sol_van", pg_pool_t::FLAG_EC_OVERWRITES | pg_pool_t::FLAG_EC_OPTIMIZATIONS,  16384, 4, 2, "EC_ISA_Opt_k4m2_su16k"},
+  {PGBackendTestFixture::EC, "isa", "reed_sol_van", pg_pool_t::FLAG_EC_OVERWRITES | pg_pool_t::FLAG_EC_OPTIMIZATIONS,  4096,  2, 1, "EC_ISA_Opt_k2m1_su4k"},
+  {PGBackendTestFixture::EC, "isa", "reed_sol_van", pg_pool_t::FLAG_EC_OVERWRITES | pg_pool_t::FLAG_EC_OPTIMIZATIONS,  4096,  8, 3, "EC_ISA_Opt_k8m3_su4k"},
+  {PGBackendTestFixture::EC, "isa", "reed_sol_van", pg_pool_t::FLAG_EC_OVERWRITES, 4096,  4, 2, "EC_ISA_NonOpt_k4m2_su4k"},
+  {PGBackendTestFixture::EC, "jerasure", "reed_sol_van", pg_pool_t::FLAG_EC_OVERWRITES | pg_pool_t::FLAG_EC_OPTIMIZATIONS,  4096,  4, 2, "EC_Jerasure_Opt_k4m2_su4k"},
+  {PGBackendTestFixture::EC, "jerasure", "reed_sol_van", pg_pool_t::FLAG_EC_OVERWRITES | pg_pool_t::FLAG_EC_OPTIMIZATIONS,  8192,  4, 2, "EC_Jerasure_Opt_k4m2_su8k"},
+  {PGBackendTestFixture::EC, "jerasure", "reed_sol_van", pg_pool_t::FLAG_EC_OVERWRITES | pg_pool_t::FLAG_EC_OPTIMIZATIONS,  16384, 4, 2, "EC_Jerasure_Opt_k4m2_su16k"},
+  {PGBackendTestFixture::EC, "jerasure", "reed_sol_van", pg_pool_t::FLAG_EC_OVERWRITES | pg_pool_t::FLAG_EC_OPTIMIZATIONS,  4096,  2, 1, "EC_Jerasure_Opt_k2m1_su4k"},
+  {PGBackendTestFixture::EC, "jerasure", "reed_sol_van", pg_pool_t::FLAG_EC_OVERWRITES | pg_pool_t::FLAG_EC_OPTIMIZATIONS,  4096,  8, 3, "EC_Jerasure_Opt_k8m3_su4k"},
+  {PGBackendTestFixture::EC, "jerasure", "reed_sol_van", pg_pool_t::FLAG_EC_OVERWRITES, 4096,  4, 2, "EC_Jerasure_NonOpt_k4m2_su4k"},
+};
+
+const std::vector<WriteReadParam> kSizeParams = {
+  {4  * 1024,       'A', "4k"},
+  {8  * 1024,       'B', "8k"},
+  {12 * 1024,       'C', "12k"},
+  {12 * 1024 + 512, 'D', "12_5k"},
+  {16 * 1024,       'E', "16k"},
+  {31 * 1024 + 512, 'F', "31_5k"},
+  {32 * 1024,       'G', "32k"},
+  {32 * 1024 + 512, 'H', "32_5k"},
+};
+
+/**
+ * Build the cross-product of kBackendConfigs × kSizeParams.
+ */
+std::vector<BackendWriteReadParam> make_cross_product() {
+  std::vector<BackendWriteReadParam> result;
+  result.reserve(kBackendConfigs.size() * kSizeParams.size());
+  for (const auto& backend : kBackendConfigs) {
+    for (const auto& size : kSizeParams) {
+      result.push_back({backend, size});
+    }
+  }
+  return result;
+}
+
+}  // namespace
+
+// ---------------------------------------------------------------------------
+// Instantiate TestBackendBasics with the full cross-product
+// ---------------------------------------------------------------------------
+
+INSTANTIATE_TEST_SUITE_P(
+  BackendSizes,
+  TestBackendBasics,
+  ::testing::ValuesIn(make_cross_product()),
+  [](const ::testing::TestParamInfo<BackendWriteReadParam>& info) {
+    return info.param.backend.label + "_" + info.param.write_read.label;
+  }
+);
+
+// ---------------------------------------------------------------------------
+// TestECFailover fixture and tests
+// ---------------------------------------------------------------------------
+
+/**
+ * TestECFailover - tests OSDMap updates and primary failover, parameterized
+ * over all EC backend configurations.
+ *
+ * Failover is an EC-specific concept (shard-based primary election), so only
+ * EC configs are included.  The fixture reads k/m/stripe_unit/plugin/technique
+ * from the BackendConfig parameter so that every EC variant is exercised.
+ */
+class TestECFailover : public PGBackendTestFixture,
+                       public ::testing::WithParamInterface<BackendConfig> {
+public:
+  TestECFailover() : PGBackendTestFixture(PGBackendTestFixture::EC) {
+    const auto& config = GetParam();
+    k = config.k;
+    m = config.m;
+    stripe_unit = config.stripe_unit;
+    ec_plugin = config.ec_plugin;
+    ec_technique = config.ec_technique;
+    pool_flags = config.pool_flags;
+  }
+
+  void SetUp() override {
+    PGBackendTestFixture::SetUp();
+  }
+
+  void simulate_osd_failure(int failed_osd, int new_primary_instance)
+  {
+    auto new_osdmap = std::make_shared<OSDMap>();
+    new_osdmap->deepish_copy_from(*osdmap);
+
+    // Build new acting set with the failed OSD replaced by CRUSH_ITEM_NONE
+    std::vector<int> new_acting;
+    for (int i = 0; i < k+m; i++) {
+      new_acting.push_back((i == failed_osd) ? CRUSH_ITEM_NONE : i);
+    }
+    
+    // Get the pool to use pgtemp_primaryfirst transformation
+    const pg_pool_t* pool = new_osdmap->get_pg_pool(pgid.pool());
+    ceph_assert(pool != nullptr);
+    
+    // For EC pools with optimizations, pgtemp_primaryfirst reorders the acting set
+    // to put primary-eligible shards first. We need to apply this transformation
+    // before setting pg_temp so that the OSDMap will correctly identify the primary.
+    std::vector<int> transformed_acting = new_osdmap->pgtemp_primaryfirst(*pool, new_acting);
+    
+    // Use OSDMap::Incremental to set pg_temp with the transformed acting set
+    OSDMap::Incremental inc(new_osdmap->get_epoch() + 1);
+    inc.fsid = new_osdmap->get_fsid();
+    inc.new_state[failed_osd] = CEPH_OSD_EXISTS;  // Mark as down (exists but not UP)
+    
+    // Convert to mempool vector for pg_temp
+    mempool::osdmap::vector<int> pg_temp_vec(transformed_acting.begin(), transformed_acting.end());
+    inc.new_pg_temp[pgid] = pg_temp_vec;
+
+    new_osdmap->apply_incremental(inc);
+    
+    // Finalize the CRUSH map to ensure working_size is calculated
+    new_osdmap->crush->finalize();
+
+    pg_shard_t failed_shard(failed_osd, shard_id_t(failed_osd));
+    for (auto& [instance_id, list] : listeners) {
+      list->shardset.erase(failed_shard);
+      list->acting_recovery_backfill_shard_id_set.erase(shard_id_t(failed_osd));
+    }
+
+    // update_osdmap will query the OSDMap to determine the primary
+    update_osdmap(new_osdmap);
+  }
+};
+
+TEST_P(TestECFailover, BasicOSDMapUpdate) {
+  const std::string obj_name = "test_failover_object";
+  const std::string test_data = "Initial data before OSDMap change";
+
+  int result = create_and_write(obj_name, test_data);
+  EXPECT_EQ(result, 0) << "Initial write should complete successfully";
+
+  bufferlist read_data;
+  int read_result = read_object(obj_name, 0, test_data.length(), read_data, test_data.length());
+  EXPECT_GE(read_result, 0) << "Read should complete successfully";
+  ASSERT_EQ(read_data.length(), test_data.length());
+
+  auto new_osdmap = std::make_shared<OSDMap>();
+  new_osdmap->deepish_copy_from(*osdmap);
+  new_osdmap->inc_epoch();
+
+  update_osdmap(new_osdmap);
+
+  EXPECT_EQ(osdmap, new_osdmap) << "OSDMap should be updated";
+  auto* primary_listener = get_primary_listener();
+  ASSERT_TRUE(primary_listener != nullptr) << "Primary listener should exist";
+  EXPECT_EQ(primary_listener->osdmap, new_osdmap) << "Listener OSDMap should be updated";
+
+  bufferlist read_data2;
+  read_result = read_object(obj_name, 0, test_data.length(), read_data2, test_data.length());
+  EXPECT_GE(read_result, 0) << "Read after OSDMap update should complete successfully";
+  ASSERT_EQ(read_data2.length(), test_data.length());
+
+  std::string read_string(read_data2.c_str(), read_data2.length());
+  EXPECT_EQ(read_string, test_data) << "Data should match after OSDMap update";
+}
+
+TEST_P(TestECFailover, PrimaryFailover) {
+  const std::string obj_name = "test_primary_failover";
+  const std::string test_data = "Data written before primary failover";
+
+  int result = create_and_write(obj_name, test_data);
+  EXPECT_EQ(result, 0) << "Initial write should complete successfully";
+
+  bufferlist read_data;
+  int read_result = read_object(obj_name, 0, test_data.length(), read_data, test_data.length());
+  EXPECT_GE(read_result, 0) << "Read should complete successfully";
+  ASSERT_EQ(read_data.length(), test_data.length());
+
+  std::string read_string(read_data.c_str(), read_data.length());
+  EXPECT_EQ(read_string, test_data) << "Data should match before failover";
+
+  EXPECT_TRUE(listeners[0]->pgb_is_primary())
+    << "Instance 0 should be primary before failover";
+  EXPECT_FALSE(listeners[k]->pgb_is_primary())
+    << "Instance " << k << " should not be primary before failover";
+
+  // Determine expected new primary based on pool optimization
+  // For optimized EC: shards 1 to k-1 are nonprimary, so new primary will be shard k
+  // For non-optimized EC: any shard can be primary, so new primary will be shard 1
+  const pg_pool_t& pool = get_pool();
+  bool is_optimized = pool.has_flag(pg_pool_t::FLAG_EC_OPTIMIZATIONS);
+  int expected_new_primary = is_optimized ? k : 1;
+  
+  simulate_osd_failure(0, expected_new_primary);
+
+  EXPECT_FALSE(listeners[0]->pgb_is_primary())
+    << "Instance 0 should not be primary after failover";
+  EXPECT_TRUE(listeners[expected_new_primary]->pgb_is_primary())
+    << "Instance " << expected_new_primary << " should be primary after failover";
+
+  // Verify the query functions return the correct primary
+  auto* new_primary_listener = get_primary_listener();
+  auto* new_primary_backend = get_primary_backend();
+  EXPECT_EQ(new_primary_listener, listeners[expected_new_primary].get())
+    << "get_primary_listener() should return the new primary";
+  EXPECT_EQ(new_primary_backend, backends[expected_new_primary].get())
+    << "get_primary_backend() should return the new primary";
+
+  bufferlist read_data_after;
+  int read_result_after = read_object(obj_name, 0, test_data.length(), read_data_after, test_data.length());
+  EXPECT_GE(read_result_after, 0) << "Degraded read should complete successfully after failover";
+  ASSERT_EQ(read_data_after.length(), test_data.length());
+
+  std::string read_string_after(read_data_after.c_str(), read_data_after.length());
+  EXPECT_EQ(read_string_after, test_data) << "Data should match after failover with EC reconstruction";
+
+  EXPECT_TRUE(new_primary_listener != nullptr) << "Primary listener should exist after failover";
+  EXPECT_GT(new_primary_listener->osdmap->get_epoch(), 1)
+    << "OSDMap epoch should have incremented after failover";
+}
+
+// ---------------------------------------------------------------------------
+// Instantiate TestECFailover with EC-only backend configurations
+// ---------------------------------------------------------------------------
+
+namespace {
+
+std::vector<BackendConfig> make_ec_configs() {
+  std::vector<BackendConfig> ec_configs;
+  for (const auto& cfg : kBackendConfigs) {
+    if (cfg.pool_type == PGBackendTestFixture::EC) {
+      ec_configs.push_back(cfg);
+    }
+  }
+  return ec_configs;
+}
+
+}  // namespace
+
+INSTANTIATE_TEST_SUITE_P(
+  ECBackends,
+  TestECFailover,
+  ::testing::ValuesIn(make_ec_configs()),
+  [](const ::testing::TestParamInfo<BackendConfig>& info) {
+    return info.param.label;
+  }
+);
diff --git a/src/test/osd/TestCommon.h b/src/test/osd/TestCommon.h
new file mode 100644 (file)
index 0000000..4a68844
--- /dev/null
@@ -0,0 +1,63 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+// vim: ts=8 sw=2 sts=2 expandtab
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2026 IBM
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#pragma once
+
+#include <cstddef>
+#include <string>
+#include "test/osd/PGBackendTestFixture.h"
+
+/**
+ * WriteReadParam - parameter structure for write-then-read parameterized tests.
+ *
+ * Shared between test files to avoid ODR violations if both translation units
+ * are ever linked together, and to eliminate code duplication.
+ */
+struct WriteReadParam {
+  size_t size;
+  char fill;
+  std::string label;
+};
+
+/**
+ * BackendConfig - parameterizes the backend type for unified tests.
+ *
+ * Each configuration defines a pool type (EC or REPLICATED) plus
+ * EC-specific settings.  The test fixture uses this to configure
+ * PGBackendTestFixture before SetUp().
+ */
+struct BackendConfig {
+  PGBackendTestFixture::PoolType pool_type;
+  // EC-specific (ignored for REPLICATED)
+  std::string ec_plugin;     // e.g. "isa", "jerasure", "mock"
+  std::string ec_technique;  // e.g. "reed_sol_van"
+  uint64_t pool_flags;       // Pool flags (e.g., FLAG_EC_OVERWRITES | FLAG_EC_OPTIMIZATIONS)
+  uint64_t stripe_unit = 4096;  // aka chunk_size; stripe_width = stripe_unit * k
+  int k = 4;  // data chunks (EC only)
+  int m = 2;  // coding chunks (EC only)
+  // Label for test naming
+  std::string label;
+};
+
+/**
+ * BackendWriteReadParam - combined parameter for backend + write/read size tests.
+ *
+ * Used for two-level parameterization: backend configuration × data sizes.
+ */
+struct BackendWriteReadParam {
+  BackendConfig backend;
+  WriteReadParam write_read;
+};
+
diff --git a/src/test/osd/TestECFailoverWithPeering.cc b/src/test/osd/TestECFailoverWithPeering.cc
new file mode 100644 (file)
index 0000000..37fffcb
--- /dev/null
@@ -0,0 +1,463 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+// vim: ts=8 sw=2 sts=2 expandtab
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2026 IBM
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#include <gtest/gtest.h>
+#include "test/osd/ECPeeringTestFixture.h"
+
+using namespace std;
+
+class TestECFailoverWithPeering : public ECPeeringTestFixture {
+public:
+  TestECFailoverWithPeering() : ECPeeringTestFixture() {
+    k = 4;
+    m = 2;
+    stripe_unit = 4096;
+    ec_plugin = "isa";
+    ec_technique = "reed_sol_van";
+  }
+};
+
+TEST_F(TestECFailoverWithPeering, BasicPeeringCycle) {
+  run_peering_cycle();
+  
+  EXPECT_TRUE(all_shards_active()) << "All shards should be active after peering";
+  
+  // Note: In EC pools, only the primary tracks PG_STATE_CLEAN.
+  // Replicas are in ReplicaActive state and don't set the CLEAN flag.
+  // Get acting_primary from OSDMap
+  pg_t pgid = get_peering_state(0)->get_info().pgid.pgid;
+  std::vector<int> acting_osds;
+  int acting_primary = -1;
+  osdmap->pg_to_acting_osds(pgid, &acting_osds, &acting_primary);
+  
+  EXPECT_TRUE(get_peering_state(acting_primary)->is_clean())
+    << "Primary should be clean after peering";
+  
+  // Verify primary is shard 0
+  EXPECT_TRUE(get_peering_listener(0)->backend_listener->pgb_is_primary())
+    << "Shard 0 should be primary";
+  
+  for (int i = 1; i < k + m; i++) {
+    EXPECT_FALSE(get_peering_listener(i)->backend_listener->pgb_is_primary())
+      << "Shard " << i << " should not be primary";
+  }
+}
+
+TEST_F(TestECFailoverWithPeering, WriteWithPeering) {
+  run_peering_cycle();
+  ASSERT_TRUE(all_shards_active()) << "Peering must complete before write";
+  
+  const std::string obj_name = "test_write_with_peering";
+  const std::string test_data = "Data written with full peering support";
+  
+  int result = create_and_write(obj_name, test_data);
+  EXPECT_EQ(result, 0) << "Write should complete successfully";
+  
+  bufferlist read_data;
+  int read_result = read_object(obj_name, 0, test_data.length(), read_data, test_data.length());
+  EXPECT_GE(read_result, 0) << "Read should complete successfully";
+  ASSERT_EQ(read_data.length(), test_data.length());
+  
+  std::string read_string(read_data.c_str(), read_data.length());
+  EXPECT_EQ(read_string, test_data) << "Data should match";
+  
+  auto* primary_ps = get_peering_state(0);
+  EXPECT_GT(primary_ps->get_pg_log().get_log().log.size(), 0)
+    << "Primary should have log entries after write";
+}
+
+TEST_F(TestECFailoverWithPeering, OSDFailureWithPeering) {
+  run_peering_cycle();
+  ASSERT_TRUE(all_shards_active()) << "Initial peering must complete";
+  
+  const std::string obj_name = "test_osd_failure";
+  // Write 16KB but read only 8KB to force reconstruction when shard 1 is down
+  const std::string test_data(16384, 'X');  // 16KB write
+  const size_t read_length = 8192;  // 8KB read
+  
+  int result = create_and_write(obj_name, test_data);
+  EXPECT_EQ(result, 0) << "Initial write should complete";
+  
+  // Pre-failover read: measure baseline message count with all OSDs up
+  // Clear message counters first
+  for (auto& [shard, listener] : backend_listeners) {
+    listener->sent_messages.clear();
+  }
+  
+  bufferlist pre_failover_read;
+  int pre_read_result = read_object(obj_name, 0, read_length,
+                                     pre_failover_read, test_data.length());
+  EXPECT_GE(pre_read_result, 0) << "Pre-failover read should complete";
+  
+  // Count messages sent during pre-failover read
+  size_t pre_failover_msg_count = 0;
+  for (auto& [shard, listener] : backend_listeners) {
+    pre_failover_msg_count += listener->sent_messages.size();
+  }
+
+  int failed_osd = 1;  // Fail shard 1 which contains part of the data
+  
+  // Use fixture helper to mark OSD as down
+  mark_osd_down(failed_osd);
+  
+  // Primary (OSD 0) should remain active after non-primary OSD failure
+  auto* primary_ps = get_peering_state(0);
+  std::string primary_state = get_state_name(0);
+  EXPECT_TRUE(primary_state.find("Peering") != std::string::npos ||
+              primary_state.find("Active") != std::string::npos)
+    << "Primary should be peering or active after OSD failure, got: " << primary_state;
+  
+  EXPECT_TRUE(primary_ps->get_acting_recovery_backfill().count(pg_shard_t(failed_osd, shard_id_t(failed_osd))) == 0)
+    << "Failed OSD should not be in acting set";
+  
+  // Clear message counters before post-failover read
+  for (auto& [shard, listener] : backend_listeners) {
+    listener->sent_messages.clear();
+  }
+  
+  // Post-failover read: verify EC reconstruction works with one OSD down
+  bufferlist post_failover_read;
+  int post_read_result = read_object(obj_name, 0, read_length,
+                                      post_failover_read, test_data.length());
+  EXPECT_GE(post_read_result, 0) << "Read should complete successfully after OSD failure";
+  ASSERT_EQ(post_failover_read.length(), read_length)
+    << "Read length should match after OSD failure";
+  
+  std::string read_string(post_failover_read.c_str(), post_failover_read.length());
+  std::string expected_data(read_length, 'X');
+  EXPECT_EQ(read_string, expected_data)
+    << "Data should be correctly reconstructed via EC after OSD failure";
+  
+  // Count messages sent during post-failover read
+  size_t post_failover_msg_count = 0;
+  for (auto& [shard, listener] : backend_listeners) {
+    post_failover_msg_count += listener->sent_messages.size();
+  }
+  
+  // This is an 8k read of a 16k object in a 4+2 array.  This means that if shard 1
+  // is missing, then this should result in 4 reads, rather than 2 to recover.
+  EXPECT_GT(post_failover_msg_count, pre_failover_msg_count)
+    << "Post-failover read should complete successfully "
+    << "(pre: " << pre_failover_msg_count << ", post: " << post_failover_msg_count << ")";
+}
+
+TEST_F(TestECFailoverWithPeering, PrimaryFailoverWithPeering) {
+  run_peering_cycle();
+  ASSERT_TRUE(all_shards_active()) << "Initial peering must complete";
+  
+  const std::string obj_name = "test_primary_failover";
+  const std::string test_data = "Data before primary failover";
+  
+  int result = create_and_write(obj_name, test_data);
+  EXPECT_EQ(result, 0) << "Initial write should complete";
+  
+  EXPECT_TRUE(get_peering_listener(0)->backend_listener->pgb_is_primary())
+    << "Shard 0 should be primary initially";
+  
+  // Mark OSD 0 (the initial primary) as down
+  // PeeringState will automatically determine the new primary
+  mark_osd_down(0);
+  
+  // Determine the actual new primary from the OSDMap
+  int new_primary_shard = get_primary_shard_from_osdmap();
+  ASSERT_GE(new_primary_shard, 0) << "Should have a valid new primary after failover";
+  
+  // For an optimized EC pool (k=4, m=2), the new primary should be a coding shard (>= k)
+  // For a non-optimized pool, it would be shard 1
+  const pg_pool_t& pool = get_pool();
+  if (pool.allows_ecoptimizations()) {
+    EXPECT_GE(new_primary_shard, k)
+      << "New primary should be a coding shard (>= k) for optimized pool";
+  } else {
+    EXPECT_EQ(new_primary_shard, 1)
+      << "New primary should be shard 1 for non-optimized pool";
+  }
+  
+  EXPECT_TRUE(get_peering_listener(new_primary_shard)->backend_listener->pgb_is_primary())
+    << "Shard " << new_primary_shard << " should be new primary";
+  
+  EXPECT_FALSE(get_peering_listener(0)->backend_listener->pgb_is_primary())
+    << "Failed shard should not be primary";
+  
+  std::string state = get_state_name(new_primary_shard);
+  EXPECT_TRUE(state.find("Active") != std::string::npos)
+    << "New primary should be Active after failover, got: " << state;
+  
+  // Verify the PG reached Active state
+  EXPECT_TRUE(get_peering_state(new_primary_shard)->is_active())
+    << "New primary should be in Active state";
+  
+  // Verify reads work after primary failover (with EC reconstruction)
+  bufferlist read_data;
+  int read_result = read_object(obj_name, 0, test_data.length(),
+                                read_data, test_data.length());
+  EXPECT_GE(read_result, 0) << "Read should complete successfully after primary failover";
+  ASSERT_EQ(read_data.length(), test_data.length())
+    << "Read length should match after primary failover";
+  
+  std::string read_string(read_data.c_str(), read_data.length());
+  EXPECT_EQ(read_string, test_data)
+    << "Data should be correctly reconstructed via EC after primary failover";
+}
+
+TEST_F(TestECFailoverWithPeering, MultipleOSDFailuresWithPeering) {
+  run_peering_cycle();
+  ASSERT_TRUE(all_shards_active()) << "Initial peering must complete";
+  
+  const std::string obj_name = "test_multiple_failures";
+  const std::string test_data = "Data before multiple failures";
+  
+  int result = create_and_write(obj_name, test_data);
+  EXPECT_EQ(result, 0) << "Initial write should complete";
+  
+  std::vector<int> failed_osds = {1, 2};  // Fail 2 data shards
+  ASSERT_EQ(failed_osds.size(), static_cast<size_t>(m))
+    << "Should fail exactly m OSDs";
+  
+  // Use fixture helper to mark multiple OSDs as down
+  mark_osds_down(failed_osds);
+  
+  auto* primary_ps = get_peering_state(0);
+  for (int failed_osd : failed_osds) {
+    EXPECT_TRUE(primary_ps->get_acting_recovery_backfill().count(
+      pg_shard_t(failed_osd, shard_id_t(failed_osd))) == 0)
+      << "Failed OSD " << failed_osd << " should not be in acting set";
+  }
+  
+  std::string primary_state = get_state_name(0);
+  EXPECT_TRUE(primary_state.find("Peering") != std::string::npos ||
+              primary_state.find("Active") != std::string::npos ||
+              primary_state.find("Recovery") != std::string::npos)
+    << "Primary should be operational, got: " << primary_state;
+}
+
+TEST_F(TestECFailoverWithPeering, PeeringWithLogDivergence) {
+  run_peering_cycle();
+  ASSERT_TRUE(all_shards_active()) << "Initial peering must complete";
+  
+  const std::string pre_div_obj = "test_pre_divergence";
+  const std::string pre_div_data = "Data written before divergence";
+  
+  int result = create_and_write(pre_div_obj, pre_div_data, eversion_t(1, 1));
+  EXPECT_EQ(result, 0) << "Pre-divergence write should complete";
+  
+  auto* primary_ps = get_peering_state(0);
+  size_t initial_log_size = primary_ps->get_pg_log().get_log().log.size();
+  EXPECT_GT(initial_log_size, 0) << "Primary should have log entries after pre-divergence write";
+  
+  // Note: get_pg_log().get_log().head reflects the log entries added via append_log
+  eversion_t pre_div_log_head = primary_ps->get_pg_log().get_log().head;
+  EXPECT_GT(pre_div_log_head.version, 0u) << "PG log head should be non-zero after write";
+  
+  const std::string post_div_obj = "test_post_divergence";
+  const std::string post_div_data = "Data written after divergence point";
+  
+  result = create_and_write(post_div_obj, post_div_data, eversion_t(1, 2));
+  EXPECT_EQ(result, 0) << "Post-divergence write should complete";
+  
+  eversion_t post_div_log_head = primary_ps->get_pg_log().get_log().head;
+  EXPECT_GT(post_div_log_head.version, pre_div_log_head.version)
+    << "PG log head should advance after post-divergence write";
+  
+  size_t log_size_after_writes = primary_ps->get_pg_log().get_log().log.size();
+  EXPECT_GE(log_size_after_writes, initial_log_size)
+    << "Primary log should have at least as many entries after second write";
+  
+  // Trigger a new peering cycle by advancing the map to simulate re-peering
+  // after a shard had a divergent log.
+  advance_epoch();
+  
+  std::string primary_state = get_state_name(0);
+  ASSERT_TRUE(all_shards_active() ||
+              primary_state.find("Recovery") != std::string::npos ||
+              primary_state.find("Peering") != std::string::npos)
+    << "Shards should be active, recovering, or peering after map advance, got: "
+    << primary_state;
+  
+  // --- Verify pre-divergence data is readable and correct ---
+  bufferlist pre_div_read;
+  int read_result = read_object(pre_div_obj, 0, pre_div_data.length(),
+                                pre_div_read, pre_div_data.length());
+  EXPECT_GE(read_result, 0) << "Pre-divergence object should be readable after reconciliation";
+  ASSERT_EQ(pre_div_read.length(), pre_div_data.length())
+    << "Pre-divergence read length should match";
+  {
+    std::string read_str(pre_div_read.c_str(), pre_div_read.length());
+    EXPECT_EQ(read_str, pre_div_data)
+      << "Pre-divergence data should match after log reconciliation";
+  }
+  
+  // --- Verify post-divergence data is readable and correct ---
+  bufferlist post_div_read;
+  read_result = read_object(post_div_obj, 0, post_div_data.length(),
+                            post_div_read, post_div_data.length());
+  EXPECT_GE(read_result, 0) << "Post-divergence object should be readable after reconciliation";
+  ASSERT_EQ(post_div_read.length(), post_div_data.length())
+    << "Post-divergence read length should match";
+  {
+    std::string read_str(post_div_read.c_str(), post_div_read.length());
+    EXPECT_EQ(read_str, post_div_data)
+      << "Post-divergence data should match after log reconciliation";
+  }
+  
+  // After peering, the primary's PG log head should reflect all writes.
+  eversion_t primary_log_head = primary_ps->get_pg_log().get_log().head;
+  EXPECT_EQ(primary_log_head, post_div_log_head)
+    << "Primary PG log head should reflect all writes after reconciliation";
+  
+  pg_t pgid = get_peering_state(0)->get_info().pgid.pgid;
+  std::vector<int> acting_osds;
+  int acting_primary = -1;
+  osdmap->pg_to_acting_osds(pgid, &acting_osds, &acting_primary);
+
+  for (int shard : acting_osds) {
+    if (shard == CRUSH_ITEM_NONE) {
+      continue;
+    }
+    auto* shard_ps = get_peering_state(shard);
+    if (shard_ps->is_active()) {
+      eversion_t shard_info_last_update = shard_ps->get_info().last_update;
+      if (shard == acting_primary) {
+        EXPECT_EQ(shard_info_last_update, post_div_log_head)
+          << "Primary shard info.last_update should match post-divergence log head";
+      } else {
+        EXPECT_LE(shard_info_last_update, post_div_log_head)
+          << "Shard " << shard << " info.last_update should not exceed primary's log head";
+      }
+    }
+  }
+  
+  // Verify the formerly-failed shard's PG log is accessible and consistent.
+  // We use the last data shard (k-1) as the "formerly-failed" shard to check.
+  int reconciled_shard = k - 1;
+  if (reconciled_shard >= 0 && reconciled_shard < k + m) {
+    auto* reconciled_ps = get_peering_state(reconciled_shard);
+    size_t reconciled_log_size = reconciled_ps->get_pg_log().get_log().log.size();
+    auto* primary_ps_check = get_peering_state(acting_primary);
+    size_t primary_log_size = primary_ps_check->get_pg_log().get_log().log.size();
+    EXPECT_LE(reconciled_log_size, primary_log_size)
+      << "Reconciled shard " << reconciled_shard
+      << " log size should not exceed primary's log size";
+    
+    if (reconciled_ps->is_active()) {
+      eversion_t reconciled_info_lu = reconciled_ps->get_info().last_update;
+      EXPECT_LE(reconciled_info_lu, post_div_log_head)
+        << "Reconciled shard " << reconciled_shard
+        << " info.last_update should not exceed primary's log head after log reconciliation";
+    }
+  }
+}
+
+TEST_F(TestECFailoverWithPeering, RecoveryWithPeering) {
+  run_peering_cycle();
+  ASSERT_TRUE(all_shards_active()) << "Initial peering must complete";
+  
+  const std::string obj1_name = "test_recovery_obj1";
+  const std::string obj1_data = "First object data for recovery test";
+  
+  const std::string obj2_name = "test_recovery_obj2";
+  const std::string obj2_data = "Second object data for recovery test";
+  
+  int result = create_and_write(obj1_name, obj1_data, eversion_t(1, 1));
+  EXPECT_EQ(result, 0) << "First pre-failure write should complete";
+  
+  result = create_and_write(obj2_name, obj2_data, eversion_t(1, 2));
+  EXPECT_EQ(result, 0) << "Second pre-failure write should complete";
+  
+  EXPECT_TRUE(all_shards_clean()) << "All shards should be clean before recovery test";
+  
+  auto* primary_ps = get_peering_state(0);
+  eversion_t pre_failure_log_head = primary_ps->get_pg_log().get_log().head;
+  EXPECT_GT(pre_failure_log_head.version, 0u)
+    << "Primary should have log entries before failure";
+  
+  int failed_osd = k - 1;  // Last data shard
+  
+  // Use fixture helper to mark OSD as down
+  mark_osd_down(failed_osd);
+  
+  std::string state_after_failure = get_state_name(0);
+  ASSERT_TRUE(all_shards_active() ||
+              state_after_failure.find("Recovery") != std::string::npos ||
+              state_after_failure.find("Peering") != std::string::npos)
+    << "PG should be active, recovering, or peering after OSD failure, got: "
+    << state_after_failure;
+  
+  // EC can reconstruct data from remaining k shards even with one shard missing
+  bufferlist obj1_read;
+  int read_result = read_object(obj1_name, 0, obj1_data.length(),
+                                obj1_read, obj1_data.length());
+  EXPECT_GE(read_result, 0) << "First object should be readable after OSD failure";
+  ASSERT_EQ(obj1_read.length(), obj1_data.length())
+    << "First object read length should match after failure";
+  {
+    std::string read_str(obj1_read.c_str(), obj1_read.length());
+    EXPECT_EQ(read_str, obj1_data)
+      << "First object data should be correct after OSD failure (EC reconstruction)";
+  }
+  
+  bufferlist obj2_read;
+  read_result = read_object(obj2_name, 0, obj2_data.length(),
+                            obj2_read, obj2_data.length());
+  EXPECT_GE(read_result, 0) << "Second object should be readable after OSD failure";
+  ASSERT_EQ(obj2_read.length(), obj2_data.length())
+    << "Second object read length should match after failure";
+  {
+    std::string read_str(obj2_read.c_str(), obj2_read.length());
+    EXPECT_EQ(read_str, obj2_data)
+      << "Second object data should be correct after OSD failure (EC reconstruction)";
+  }
+  
+  const std::string post_recovery_obj = "test_post_recovery";
+  const std::string post_recovery_data = "Data written after OSD failure and recovery";
+  
+  result = create_and_write(post_recovery_obj, post_recovery_data, eversion_t(1, 3));
+  EXPECT_EQ(result, 0) << "Write after OSD failure should complete successfully";
+  
+  bufferlist post_recovery_read;
+  read_result = read_object(post_recovery_obj, 0, post_recovery_data.length(),
+                            post_recovery_read, post_recovery_data.length());
+  EXPECT_GE(read_result, 0) << "Post-recovery object should be readable";
+  ASSERT_EQ(post_recovery_read.length(), post_recovery_data.length())
+    << "Post-recovery read length should match";
+  {
+    std::string read_str(post_recovery_read.c_str(), post_recovery_read.length());
+    EXPECT_EQ(read_str, post_recovery_data)
+      << "Post-recovery data should match what was written";
+  }
+  
+  eversion_t post_recovery_log_head = primary_ps->get_pg_log().get_log().head;
+  EXPECT_GT(post_recovery_log_head.version, pre_failure_log_head.version)
+    << "Primary PG log head should advance after post-recovery write";
+  
+  // Even though the OSD is "down", its PeeringState still holds the log
+  // from before it went down.
+  auto* failed_ps = get_peering_state(failed_osd);
+  EXPECT_TRUE(failed_ps != nullptr) << "Failed OSD's PeeringState should still exist";
+  
+  size_t primary_log_size = primary_ps->get_pg_log().get_log().log.size();
+  size_t failed_log_size = failed_ps->get_pg_log().get_log().log.size();
+  EXPECT_LE(failed_log_size, primary_log_size)
+    << "Failed OSD's PG log size should not exceed primary's log size";
+  // The primary wrote 3 objects (obj1, obj2, post_recovery_obj), so its log must be non-empty.
+  EXPECT_GT(primary_log_size, 0u)
+    << "Primary PG log should have entries after 3 writes";
+  
+  auto* listener_ptr = get_peering_listener(0);
+  EXPECT_TRUE(listener_ptr != nullptr) << "Peering listener should exist";
+  EXPECT_TRUE(listener_ptr->activate_complete_called)
+    << "on_activate_complete should have been called during peering";
+}
+
index 1013cc30e3b6c73f89a188d7d98ad1f69342ed96..85c373ffca8545426321f46e05f408e77f727eb8 100644 (file)
@@ -47,7 +47,6 @@
 
 using namespace std;
 
-
 IsPGRecoverablePredicate *get_is_recoverable_predicate() {
   return new MockECRecPred();
 }
@@ -56,7 +55,6 @@ IsPGReadablePredicate *get_is_readable_predicate() {
   return new MockECReadPred();
 }
 
-
 // Test fixture for PeeringState tests
 class PeeringStateTest : public ::testing::Test {
 protected:
@@ -446,8 +444,15 @@ protected:
       for (auto it = ls.begin(); it != ls.end();) {
         MessageRef m = *it;
         it = ls.erase(it);
-        // TODO : Should handle messages other than MOSDPeeringOp events, however
-        // for now this seems to be sufficient
+        // NOTE: This dispatcher only handles MOSDPeeringOp-derived messages (MOSDPGLog,
+        // MOSDPGNotify2, MOSDPGInfo2, MOSDPGLease, MOSDPGLeaseAck, MOSDPGQuery2, MOSDPGTrim).
+        // Non-peering messages like MOSDPGRemove and MRecoveryReserve are sent via
+        // send_cluster_message() but are not dispatched through this function - they are
+        // handled by other test mechanisms or are not relevant to peering state transitions.
+        // This is sufficient for testing PeeringState behavior as all peering-related
+        // messages derive from MOSDPeeringOp and provide get_event() for state machine events.
+        // Future enhancement: If testing non-peering cluster messages becomes necessary,
+        // add type checking and appropriate handling for Message-derived (non-MOSDPeeringOp) types.
         dout(0) << __func__ << " message type = " << m->get_type() << dendl;
         MOSDPeeringOp *pm = static_cast<MOSDPeeringOp*>(m.get());
         dout(0) << __func__ << " sending from osd." << fromosd << " to osd." << osd << " " << *pm << dendl;
@@ -552,7 +557,7 @@ protected:
     PGPool pool(osdmap, pool_id, pi, osdmap->get_pool_name(pool_id));
     dpp[osd] = make_unique<DppHelper>(g_ceph_context, dout_subsys, this, osd, shard);
     spg_t spgid = spg_t(pg_t(0, pool_id), pg_whoami.shard);
-    listeners[osd] = make_unique<MockPeeringListener>(osdmap, pi, get_dpp(osd), pg_whoami);
+    listeners[osd] = make_unique<MockPeeringListener>(osdmap, pool_id, get_dpp(osd), pg_whoami);
     get_listener(osd)->current_epoch = osdmap->get_epoch();
     unique_ptr<PeeringState> ps = make_unique<PeeringState>(
       g_ceph_context,