]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/osd/pg_shard_manager: discard outdated operations when the 60041/head
authorXuehan Xu <xuxuehan@qianxin.com>
Sun, 29 Sep 2024 09:26:04 +0000 (17:26 +0800)
committerXuehan Xu <xuxuehan@qianxin.com>
Thu, 19 Dec 2024 02:08:33 +0000 (10:08 +0800)
corresponding pgs are already removed

Fixes: https://tracker.ceph.com/issues/68286
Signed-off-by: Xuehan Xu <xuxuehan@qianxin.com>
12 files changed:
src/crimson/osd/osd_operation.h
src/crimson/osd/osd_operations/client_request.h
src/crimson/osd/osd_operations/logmissing_request.h
src/crimson/osd/osd_operations/logmissing_request_reply.h
src/crimson/osd/osd_operations/peering_event.h
src/crimson/osd/osd_operations/pg_advance_map.h
src/crimson/osd/osd_operations/recovery_subrequest.h
src/crimson/osd/osd_operations/replicated_request.h
src/crimson/osd/osd_operations/scrub_events.h
src/crimson/osd/pg_shard_manager.h
src/crimson/osd/shard_services.cc
src/crimson/osd/shard_services.h

index 2897a7e16237a1b28d1bff89eeda774636738fe8..8f525c6a8a42370d63f5e26cf472797d6562909b 100644 (file)
@@ -211,6 +211,9 @@ protected:
 
 public:
   static constexpr bool is_trackable = true;
+  virtual bool requires_pg() const {
+    return true;
+  }
 };
 
 template <class T>
index 98443bdfc0f73a213862a52abe8254970f61da55..91a6728fd4bcf1a9ab8cb070bd32409a12bfee0e 100644 (file)
@@ -42,6 +42,10 @@ class ClientRequest final : public PhasedOperationT<ClientRequest>,
   unsigned instance_id = 0;
 
 public:
+  epoch_t get_epoch_sent_at() const {
+    return m->get_map_epoch();
+  }
+
   /**
    * instance_handle_t
    *
index e12243ce430fd811bd3cdbd1a90db70588ad5060..fe4761c4ab482645ada4d64c1031ae2828a1bbdb 100644 (file)
@@ -36,6 +36,9 @@ public:
   }
   PipelineHandle &get_handle() { return handle; }
   epoch_t get_epoch() const { return req->get_min_epoch(); }
+  epoch_t get_epoch_sent_at() const {
+    return req->get_map_epoch();
+  }
 
   ConnectionPipeline &get_connection_pipeline();
 
index 71651d16789bc8e0bf1076d357569b3442756e82..bdb6c2ac6acdd19289d58002fa4e628d4ba47585 100644 (file)
@@ -36,6 +36,9 @@ public:
   }
   PipelineHandle &get_handle() { return handle; }
   epoch_t get_epoch() const { return req->get_min_epoch(); }
+  epoch_t get_epoch_sent_at() const {
+    return req->get_map_epoch();
+  }
 
   ConnectionPipeline &get_connection_pipeline();
 
index 85de5c711d67ce31615a41f02e9b70761f2ca70e..aa6b8a95a94ae2ac32d15d2034b3640763834e59 100644 (file)
@@ -44,6 +44,10 @@ protected:
   float delay = 0;
   PGPeeringEvent evt;
 
+  epoch_t get_epoch_sent_at() const {
+    return evt.get_epoch_sent();
+  }
+
   const pg_shard_t get_from() const {
     return from;
   }
@@ -84,6 +88,10 @@ public:
     evt(std::forward<Args>(args)...)
   {}
 
+  bool requires_pg() const final {
+    return evt.requires_pg;
+  }
+
   void print(std::ostream &) const final;
   void dump_detail(ceph::Formatter* f) const final;
   seastar::future<> with_pg(
index 43be7319545b1a693a65e8431e23bdd061b7ac71..21702f6ff4f766ae619667914805af70167fa2f5 100644 (file)
@@ -50,6 +50,10 @@ public:
     PGPeeringPipeline::Process::BlockingEvent
   > tracking_events;
 
+  epoch_t get_epoch_sent_at() const {
+    return to;
+  }
+
 private:
   PGPeeringPipeline &peering_pp(PG &pg);
 };
index 17c2faf97ea98af3b2c785d8615c10f6739178bc..2fe8ff372b3f95021d7058ce3461022321a16a12 100644 (file)
@@ -39,6 +39,9 @@ public:
   }
   PipelineHandle &get_handle() { return handle; }
   epoch_t get_epoch() const { return m->get_min_epoch(); }
+  epoch_t get_epoch_sent_at() const {
+    return m->get_map_epoch();
+  }
 
   ConnectionPipeline &get_connection_pipeline();
 
index 1e84fd108e23e1e83dd88093a50959dbdd8a362e..05724943cf040bf196000235530e51eba9574c32 100644 (file)
@@ -36,6 +36,9 @@ public:
   }
   PipelineHandle &get_handle() { return handle; }
   epoch_t get_epoch() const { return req->get_min_epoch(); }
+  epoch_t get_epoch_sent_at() const {
+    return req->get_map_epoch();
+  }
 
   ConnectionPipeline &get_connection_pipeline();
 
index 02a5d852bb7c2e79784b48dfb2d3aa485a44c002..8bed90e4c14fb56c6df7666e8d5c3b670019c36d 100644 (file)
@@ -27,11 +27,11 @@ class RemoteScrubEventBaseT : public PhasedOperationT<T> {
   crimson::net::ConnectionRef l_conn;
   crimson::net::ConnectionXcoreRef r_conn;
 
-  epoch_t epoch;
   spg_t pgid;
 
 protected:
   using interruptor = InterruptibleOperation::interruptor;
+  epoch_t epoch;
 
   template <typename U=void>
   using ifut = InterruptibleOperation::interruptible_future<U>;
@@ -40,7 +40,7 @@ protected:
 public:
   RemoteScrubEventBaseT(
     crimson::net::ConnectionRef conn, epoch_t epoch, spg_t pgid)
-    : l_conn(std::move(conn)), epoch(epoch), pgid(pgid) {}
+    : l_conn(std::move(conn)), pgid(pgid), epoch(epoch) {}
 
   PGPeeringPipeline &get_peering_pipeline(PG &pg);
 
@@ -117,6 +117,10 @@ public:
     : RemoteScrubEventBaseT<ScrubRequested>(std::forward<Args>(base_args)...),
       deep(deep) {}
 
+  epoch_t get_epoch_sent_at() const {
+    return epoch;
+  }
+
   void print(std::ostream &out) const final {
     out << "(deep=" << deep << ")";
   }
@@ -141,6 +145,10 @@ public:
     ceph_assert(scrub::PGScrubber::is_scrub_message(*m));
   }
 
+  epoch_t get_epoch_sent_at() const {
+    return epoch;
+  }
+
   void print(std::ostream &out) const final {
     out << "(m=" << *m << ")";
   }
index b9879c8c9ddde89fec1093720e25c97d2acfe894..f7bd7a6c08e30f6cb4c861ebcd70e9acb83da515 100644 (file)
@@ -256,18 +256,40 @@ public:
     auto &opref = *op;
     return opref.template with_blocking_event<
       PGMap::PGCreationBlockingEvent
-    >([&target_shard_services, &opref](auto &&trigger) {
-      return target_shard_services.wait_for_pg(
-        std::move(trigger), opref.get_pgid());
-    }).safe_then([&logger, &target_shard_services, &opref](Ref<PG> pgref) {
-      logger.debug("{}: have_pg", opref);
-      return opref.with_pg(target_shard_services, pgref);
-    }).handle_error(
-      crimson::ct_error::ecanceled::handle([&logger, &opref](auto) {
-        logger.debug("{}: pg creation canceled, dropping", opref);
-        return seastar::now();
-      })
-    ).then([op=std::move(op)] {});
+    >([&target_shard_services, &opref, &logger](auto &&trigger) mutable {
+      auto pg = target_shard_services.get_pg(opref.get_pgid());
+      auto fut = ShardServices::wait_for_pg_ertr::make_ready_future<Ref<PG>>(pg);
+      if (!pg) {
+       if (opref.requires_pg()) {
+         auto osdmap = target_shard_services.get_map();
+         if (!osdmap->is_up_acting_osd_shard(
+               opref.get_pgid(), target_shard_services.local_state.whoami)) {
+           logger.debug(
+             "pg {} for {} is no longer here, discarding",
+             opref.get_pgid(), opref);
+           opref.get_handle().exit();
+           auto _fut = seastar::now();
+           if (osdmap->get_epoch() > opref.get_epoch_sent_at()) {
+             _fut = target_shard_services.send_incremental_map(
+               std::ref(opref.get_foreign_connection()),
+               opref.get_epoch_sent_at() + 1);
+           }
+           return _fut;
+         }
+       }
+       fut = target_shard_services.wait_for_pg(
+         std::move(trigger), opref.get_pgid());
+      }
+      return fut.safe_then([&logger, &target_shard_services, &opref](Ref<PG> pgref) {
+       logger.debug("{}: have_pg", opref);
+       return opref.with_pg(target_shard_services, pgref);
+      }).handle_error(
+       crimson::ct_error::ecanceled::handle([&logger, &opref](auto) {
+         logger.debug("{}: pg creation canceled, dropping", opref);
+         return seastar::now();
+       })
+      );
+    }).then([op=std::move(op)] {});
   }
 
   seastar::future<> load_pgs(crimson::os::FuturizedStore& store);
index c23408989293dd4d7510972a2d7d1ef26142e8ef..e1acb34636f2dcd304d3f9440b9575bdcf65a498 100644 (file)
@@ -783,6 +783,11 @@ seastar::future<> ShardServices::dispatch_context_transaction(
   co_return;
 }
 
+Ref<PG> ShardServices::get_pg(spg_t pgid)
+{
+  return local_state.get_pg(pgid);
+}
+
 seastar::future<> ShardServices::dispatch_context_messages(
   BufferedRecoveryMessages &&ctx)
 {
index 56ac4963fff7187b589cf2d9a256640fc42275b1..f4d4b4c2eb4f5f18bf89f46c560cab781527cfd2 100644 (file)
@@ -483,6 +483,8 @@ public:
     return pg_to_shard_mapping.remove_pg_mapping(pgid);
   }
 
+  Ref<PG> get_pg(spg_t pgid);
+
   crimson::common::CephContext *get_cct() {
     return &(local_state.cct);
   }