From 3a8f133923721b6e0405c8511383bf08b9d14360 Mon Sep 17 00:00:00 2001 From: Xuehan Xu Date: Tue, 20 Oct 2020 18:21:04 +0800 Subject: [PATCH] crimson/osd: make I/O reqs wait for object recovery if it's degraded or backfilling Signed-off-by: Xuehan Xu --- src/crimson/osd/backfill_state.h | 3 ++ .../osd/osd_operations/client_request.cc | 15 +++++++--- src/crimson/osd/pg.cc | 29 +++++++++++++++++++ src/crimson/osd/pg.h | 4 +++ src/crimson/osd/pg_recovery.h | 1 + src/crimson/osd/recovery_backend.cc | 2 +- 6 files changed, 49 insertions(+), 5 deletions(-) diff --git a/src/crimson/osd/backfill_state.h b/src/crimson/osd/backfill_state.h index 8094490db46..549daa28242 100644 --- a/src/crimson/osd/backfill_state.h +++ b/src/crimson/osd/backfill_state.h @@ -261,6 +261,9 @@ public: backfill_machine.process_event(*std::move(evt)); } + hobject_t get_last_backfill_started() const { + return last_backfill_started; + } private: hobject_t last_backfill_started; BackfillInterval backfill_info; diff --git a/src/crimson/osd/osd_operations/client_request.cc b/src/crimson/osd/osd_operations/client_request.cc index a51cc7256d6..f2fa0e20507 100644 --- a/src/crimson/osd/osd_operations/client_request.cc +++ b/src/crimson/osd/osd_operations/client_request.cc @@ -125,10 +125,17 @@ seastar::future<> ClientRequest::process_op( ).then([this, &pg, pgref] { eversion_t ver; const hobject_t& soid = m->get_hobj(); - if (pg.is_unreadable_object(soid, &ver)) { - auto [op, fut] = osd.get_shard_services().start_operation( - soid, ver, pgref, osd.get_shard_services(), m->get_min_epoch()); - return std::move(fut); + logger().debug("{} check for recovery, {}", *this, soid); + if (pg.is_unreadable_object(soid, &ver) || + pg.is_degraded_or_backfilling_object(soid)) { + logger().debug("{} need to wait for recovery, {}", *this, soid); + if (pg.get_recovery_backend()->is_recovering(soid)) { + return pg.get_recovery_backend()->get_recovering(soid).wait_for_recovered(); + } else { + auto [op, fut] = osd.get_shard_services().start_operation( + soid, ver, pgref, osd.get_shard_services(), pg.get_osdmap_epoch()); + return std::move(fut); + } } return seastar::now(); }).then([this, &pg] { diff --git a/src/crimson/osd/pg.cc b/src/crimson/osd/pg.cc index 541de86f1eb..441b7efbb43 100644 --- a/src/crimson/osd/pg.cc +++ b/src/crimson/osd/pg.cc @@ -1008,4 +1008,33 @@ bool PG::can_discard_op(const MOSDOp& m) const { < peering_state.get_info().history.same_primary_since, false); } +bool PG::is_degraded_or_backfilling_object(const hobject_t& soid) const { + /* The conditions below may clear (on_local_recover, before we queue + * the transaction) before we actually requeue the degraded waiters + * in on_global_recover after the transaction completes. + */ + if (peering_state.get_pg_log().get_missing().get_items().count(soid)) + return true; + ceph_assert(!get_acting_recovery_backfill().empty()); + for (auto& peer : get_acting_recovery_backfill()) { + if (peer == get_primary()) continue; + auto peer_missing_entry = peering_state.get_peer_missing().find(peer); + // If an object is missing on an async_recovery_target, return false. + // This will not block the op and the object is async recovered later. + if (peer_missing_entry != peering_state.get_peer_missing().end() && + peer_missing_entry->second.get_items().count(soid)) { + return true; + } + // Object is degraded if after last_backfill AND + // we are backfilling it + if (is_backfill_target(peer) && + peering_state.get_peer_info(peer).last_backfill <= soid && + recovery_handler->backfill_state->get_last_backfill_started() >= soid && + recovery_backend->is_recovering(soid)) { + return true; + } + } + return false; +} + } diff --git a/src/crimson/osd/pg.h b/src/crimson/osd/pg.h index 2e9f6023cdc..d4d3c82d350 100644 --- a/src/crimson/osd/pg.h +++ b/src/crimson/osd/pg.h @@ -602,6 +602,9 @@ public: const set &get_acting_recovery_backfill() const { return peering_state.get_acting_recovery_backfill(); } + bool is_backfill_target(pg_shard_t osd) const { + return peering_state.is_backfill_target(osd); + } void begin_peer_recover(pg_shard_t peer, const hobject_t oid) { peering_state.begin_peer_recover(peer, oid); } @@ -684,6 +687,7 @@ private: !peering_state.get_missing_loc().readable_with_acting( oid, get_actingset(), v); } + bool is_degraded_or_backfilling_object(const hobject_t& soid) const; const set &get_actingset() const { return peering_state.get_actingset(); } diff --git a/src/crimson/osd/pg_recovery.h b/src/crimson/osd/pg_recovery.h index 651becfed0f..e55547c95b5 100644 --- a/src/crimson/osd/pg_recovery.h +++ b/src/crimson/osd/pg_recovery.h @@ -105,5 +105,6 @@ private: bool budget_available() const final; void backfilled() final; friend crimson::osd::BackfillState::PGFacade; + friend crimson::osd::PG; // backfill end }; diff --git a/src/crimson/osd/recovery_backend.cc b/src/crimson/osd/recovery_backend.cc index c3c4ed9a5c8..9444379be17 100644 --- a/src/crimson/osd/recovery_backend.cc +++ b/src/crimson/osd/recovery_backend.cc @@ -233,7 +233,7 @@ seastar::future<> RecoveryBackend::handle_scan_digest( { logger().debug("{}", __func__); // Check that from is in backfill_targets vector - ceph_assert(pg.get_peering_state().is_backfill_target(m.from)); + ceph_assert(pg.is_backfill_target(m.from)); BackfillInterval bi; bi.begin = m.begin; -- 2.39.5