]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
crimson/osd: make I/O reqs wait for object recovery if it's degraded or backfilling
authorXuehan Xu <xxhdx1985126@gmail.com>
Tue, 20 Oct 2020 10:21:04 +0000 (18:21 +0800)
committerXuehan Xu <xxhdx1985126@gmail.com>
Sun, 1 Nov 2020 02:25:22 +0000 (10:25 +0800)
Signed-off-by: Xuehan Xu <xxhdx1985126@gmail.com>
src/crimson/osd/backfill_state.h
src/crimson/osd/osd_operations/client_request.cc
src/crimson/osd/pg.cc
src/crimson/osd/pg.h
src/crimson/osd/pg_recovery.h
src/crimson/osd/recovery_backend.cc

index 8094490db461a873c439db6e9d1e0a0c7a2f6623..549daa2824233421f6752e2b1e51bfab2a6be306 100644 (file)
@@ -261,6 +261,9 @@ public:
     backfill_machine.process_event(*std::move(evt));
   }
 
+  hobject_t get_last_backfill_started() const {
+    return last_backfill_started;
+  }
 private:
   hobject_t last_backfill_started;
   BackfillInterval backfill_info;
index a51cc7256d640fc780e665c4be664a49d977c2bf..f2fa0e2050759c28798342f5081254444ad9539e 100644 (file)
@@ -125,10 +125,17 @@ seastar::future<> ClientRequest::process_op(
   ).then([this, &pg, pgref] {
     eversion_t ver;
     const hobject_t& soid = m->get_hobj();
-    if (pg.is_unreadable_object(soid, &ver)) {
-      auto [op, fut] = osd.get_shard_services().start_operation<UrgentRecovery>(
-                         soid, ver, pgref, osd.get_shard_services(), m->get_min_epoch());
-      return std::move(fut);
+    logger().debug("{} check for recovery, {}", *this, soid);
+    if (pg.is_unreadable_object(soid, &ver) ||
+       pg.is_degraded_or_backfilling_object(soid)) {
+      logger().debug("{} need to wait for recovery, {}", *this, soid);
+      if (pg.get_recovery_backend()->is_recovering(soid)) {
+       return pg.get_recovery_backend()->get_recovering(soid).wait_for_recovered();
+      } else {
+       auto [op, fut] = osd.get_shard_services().start_operation<UrgentRecovery>(
+                           soid, ver, pgref, osd.get_shard_services(), pg.get_osdmap_epoch());
+       return std::move(fut);
+      }
     }
     return seastar::now();
   }).then([this, &pg] {
index 541de86f1eb26684b356991ea29ef93b8905060a..441b7efbb43936bc33c5058654abfea5dc940774 100644 (file)
@@ -1008,4 +1008,33 @@ bool PG::can_discard_op(const MOSDOp& m) const {
       < peering_state.get_info().history.same_primary_since, false);
 }
 
+bool PG::is_degraded_or_backfilling_object(const hobject_t& soid) const {
+  /* The conditions below may clear (on_local_recover, before we queue
+   * the transaction) before we actually requeue the degraded waiters
+   * in on_global_recover after the transaction completes.
+   */
+  if (peering_state.get_pg_log().get_missing().get_items().count(soid))
+    return true;
+  ceph_assert(!get_acting_recovery_backfill().empty());
+  for (auto& peer : get_acting_recovery_backfill()) {
+    if (peer == get_primary()) continue;
+    auto peer_missing_entry = peering_state.get_peer_missing().find(peer);
+    // If an object is missing on an async_recovery_target, return false.
+    // This will not block the op and the object is async recovered later.
+    if (peer_missing_entry != peering_state.get_peer_missing().end() &&
+       peer_missing_entry->second.get_items().count(soid)) {
+       return true;
+    }
+    // Object is degraded if after last_backfill AND
+    // we are backfilling it
+    if (is_backfill_target(peer) &&
+        peering_state.get_peer_info(peer).last_backfill <= soid &&
+       recovery_handler->backfill_state->get_last_backfill_started() >= soid &&
+       recovery_backend->is_recovering(soid)) {
+      return true;
+    }
+  }
+  return false;
+}
+
 }
index 2e9f6023cdc191e5dfc3fdf9220c937c73e86e7d..d4d3c82d3506399a0ef5970bbe926afa71d9a2d7 100644 (file)
@@ -602,6 +602,9 @@ public:
   const set<pg_shard_t> &get_acting_recovery_backfill() const {
     return peering_state.get_acting_recovery_backfill();
   }
+  bool is_backfill_target(pg_shard_t osd) const {
+    return peering_state.is_backfill_target(osd);
+  }
   void begin_peer_recover(pg_shard_t peer, const hobject_t oid) {
     peering_state.begin_peer_recover(peer, oid);
   }
@@ -684,6 +687,7 @@ private:
       !peering_state.get_missing_loc().readable_with_acting(
        oid, get_actingset(), v);
   }
+  bool is_degraded_or_backfilling_object(const hobject_t& soid) const;
   const set<pg_shard_t> &get_actingset() const {
     return peering_state.get_actingset();
   }
index 651becfed0f8976ea1c090588648aa6132123f41..e55547c95b59a90c59b3dc7ef163848b10816547 100644 (file)
@@ -105,5 +105,6 @@ private:
   bool budget_available() const final;
   void backfilled() final;
   friend crimson::osd::BackfillState::PGFacade;
+  friend crimson::osd::PG;
   // backfill end
 };
index c3c4ed9a5c88bb333c5a55832a3e5531bf0e2ab5..9444379be17133d8bdba6c57384a002986d5c94f 100644 (file)
@@ -233,7 +233,7 @@ seastar::future<> RecoveryBackend::handle_scan_digest(
 {
   logger().debug("{}", __func__);
   // Check that from is in backfill_targets vector
-  ceph_assert(pg.get_peering_state().is_backfill_target(m.from));
+  ceph_assert(pg.is_backfill_target(m.from));
 
   BackfillInterval bi;
   bi.begin = m.begin;