From: Xuehan Xu Date: Tue, 20 Oct 2020 10:21:04 +0000 (+0800) Subject: crimson/osd: make I/O reqs wait for object recovery if it's degraded or backfilling X-Git-Tag: v16.1.0~715^2~5 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=3a8f133923721b6e0405c8511383bf08b9d14360;p=ceph.git crimson/osd: make I/O reqs wait for object recovery if it's degraded or backfilling Signed-off-by: Xuehan Xu --- diff --git a/src/crimson/osd/backfill_state.h b/src/crimson/osd/backfill_state.h index 8094490db461..549daa282423 100644 --- a/src/crimson/osd/backfill_state.h +++ b/src/crimson/osd/backfill_state.h @@ -261,6 +261,9 @@ public: backfill_machine.process_event(*std::move(evt)); } + hobject_t get_last_backfill_started() const { + return last_backfill_started; + } private: hobject_t last_backfill_started; BackfillInterval backfill_info; diff --git a/src/crimson/osd/osd_operations/client_request.cc b/src/crimson/osd/osd_operations/client_request.cc index a51cc7256d64..f2fa0e205075 100644 --- a/src/crimson/osd/osd_operations/client_request.cc +++ b/src/crimson/osd/osd_operations/client_request.cc @@ -125,10 +125,17 @@ seastar::future<> ClientRequest::process_op( ).then([this, &pg, pgref] { eversion_t ver; const hobject_t& soid = m->get_hobj(); - if (pg.is_unreadable_object(soid, &ver)) { - auto [op, fut] = osd.get_shard_services().start_operation( - soid, ver, pgref, osd.get_shard_services(), m->get_min_epoch()); - return std::move(fut); + logger().debug("{} check for recovery, {}", *this, soid); + if (pg.is_unreadable_object(soid, &ver) || + pg.is_degraded_or_backfilling_object(soid)) { + logger().debug("{} need to wait for recovery, {}", *this, soid); + if (pg.get_recovery_backend()->is_recovering(soid)) { + return pg.get_recovery_backend()->get_recovering(soid).wait_for_recovered(); + } else { + auto [op, fut] = osd.get_shard_services().start_operation( + soid, ver, pgref, osd.get_shard_services(), pg.get_osdmap_epoch()); + return std::move(fut); + } } return seastar::now(); }).then([this, &pg] { diff --git a/src/crimson/osd/pg.cc b/src/crimson/osd/pg.cc index 541de86f1eb2..441b7efbb439 100644 --- a/src/crimson/osd/pg.cc +++ b/src/crimson/osd/pg.cc @@ -1008,4 +1008,33 @@ bool PG::can_discard_op(const MOSDOp& m) const { < peering_state.get_info().history.same_primary_since, false); } +bool PG::is_degraded_or_backfilling_object(const hobject_t& soid) const { + /* The conditions below may clear (on_local_recover, before we queue + * the transaction) before we actually requeue the degraded waiters + * in on_global_recover after the transaction completes. + */ + if (peering_state.get_pg_log().get_missing().get_items().count(soid)) + return true; + ceph_assert(!get_acting_recovery_backfill().empty()); + for (auto& peer : get_acting_recovery_backfill()) { + if (peer == get_primary()) continue; + auto peer_missing_entry = peering_state.get_peer_missing().find(peer); + // If an object is missing on an async_recovery_target, return false. + // This will not block the op and the object is async recovered later. + if (peer_missing_entry != peering_state.get_peer_missing().end() && + peer_missing_entry->second.get_items().count(soid)) { + return true; + } + // Object is degraded if after last_backfill AND + // we are backfilling it + if (is_backfill_target(peer) && + peering_state.get_peer_info(peer).last_backfill <= soid && + recovery_handler->backfill_state->get_last_backfill_started() >= soid && + recovery_backend->is_recovering(soid)) { + return true; + } + } + return false; +} + } diff --git a/src/crimson/osd/pg.h b/src/crimson/osd/pg.h index 2e9f6023cdc1..d4d3c82d3506 100644 --- a/src/crimson/osd/pg.h +++ b/src/crimson/osd/pg.h @@ -602,6 +602,9 @@ public: const set &get_acting_recovery_backfill() const { return peering_state.get_acting_recovery_backfill(); } + bool is_backfill_target(pg_shard_t osd) const { + return peering_state.is_backfill_target(osd); + } void begin_peer_recover(pg_shard_t peer, const hobject_t oid) { peering_state.begin_peer_recover(peer, oid); } @@ -684,6 +687,7 @@ private: !peering_state.get_missing_loc().readable_with_acting( oid, get_actingset(), v); } + bool is_degraded_or_backfilling_object(const hobject_t& soid) const; const set &get_actingset() const { return peering_state.get_actingset(); } diff --git a/src/crimson/osd/pg_recovery.h b/src/crimson/osd/pg_recovery.h index 651becfed0f8..e55547c95b59 100644 --- a/src/crimson/osd/pg_recovery.h +++ b/src/crimson/osd/pg_recovery.h @@ -105,5 +105,6 @@ private: bool budget_available() const final; void backfilled() final; friend crimson::osd::BackfillState::PGFacade; + friend crimson::osd::PG; // backfill end }; diff --git a/src/crimson/osd/recovery_backend.cc b/src/crimson/osd/recovery_backend.cc index c3c4ed9a5c88..9444379be171 100644 --- a/src/crimson/osd/recovery_backend.cc +++ b/src/crimson/osd/recovery_backend.cc @@ -233,7 +233,7 @@ seastar::future<> RecoveryBackend::handle_scan_digest( { logger().debug("{}", __func__); // Check that from is in backfill_targets vector - ceph_assert(pg.get_peering_state().is_backfill_target(m.from)); + ceph_assert(pg.is_backfill_target(m.from)); BackfillInterval bi; bi.begin = m.begin;