crimson: conditionally reload ObjectState if execution fails.

author Radoslaw Zarzynski <rzarzyns@redhat.com>

Fri, 20 Nov 2020 15:42:36 +0000 (16:42 +0100)

committer Radoslaw Zarzynski <rzarzyns@redhat.com>

Tue, 1 Dec 2020 14:08:53 +0000 (15:08 +0100)
author Radoslaw Zarzynski <rzarzyns@redhat.com>
Fri, 20 Nov 2020 15:42:36 +0000 (16:42 +0100)
committer Radoslaw Zarzynski <rzarzyns@redhat.com>
Tue, 1 Dec 2020 14:08:53 +0000 (15:08 +0100)
diff --git a/src/crimson/osd/ops_executer.h b/src/crimson/osd/ops_executer.h

index d7e6c6980154cab9d414bf1f9bb96e6fc56a94b3..42fcf61b8003d67a219dc83b91240d1845e9bb7d 100644 (file)
--- a/src/crimson/osd/ops_executer.h
+++ b/src/crimson/osd/ops_executer.h
@@ -194,6 +194,10 @@ public:
    uint32_t get_pool_stripe_width() const {
      return pool_info.get_stripe_width();
    }
+
+  bool has_seen_write() const {
+    return num_write > 0;
+  }
  };
  
  template <class Context, class MainFunc, class EffectFunc>
diff --git a/src/crimson/osd/pg.cc b/src/crimson/osd/pg.cc

index 8893b6f43eb487ec1fc705058dd652331274e6fc..625f4322f5b3d7499bc5da433adedd1789fa40e9 100644 (file)
--- a/src/crimson/osd/pg.cc
+++ b/src/crimson/osd/pg.cc
@@ -615,21 +615,48 @@ osd_op_params_t&& PG::fill_op_params_bump_pg_version(
  seastar::future<Ref<MOSDOpReply>> PG::handle_failed_op(
    const std::error_code& e,
    ObjectContextRef obc,
+  const OpsExecuter& ox,
    const MOSDOp& m) const
  {
+  // Oops, an operation had failed. do_osd_ops() altogether with
+  // OpsExecuter already dropped the ObjectStore::Transaction if
+  // there was any. However, this is not enough to completely
+  // rollback as we gave OpsExecuter the very single copy of `obc`
+  // we maintain and we did it for both reading and writing.
+  // Now all modifications must be reverted.
+  //
+  // Let's just reload from the store. Evicting from the shared
+  // LRU would be tricky as next MOSDOp (the one at `get_obc`
+  // phase) could actually already finished the lookup. Fortunately,
+  // this is supposed to live on cold  paths, so performance is not
+  // a concern -- simplicity wins.
+  //
+  // The conditional's purpose is to efficiently handle hot errors
+  // which may appear as a result of e.g. CEPH_OSD_OP_CMPXATTR or
+  // CEPH_OSD_OP_OMAP_CMP. These are read-like ops and clients
+  // typically append them before any write. If OpsExecuter hasn't
+  // seen any modifying operation, `obc` is supposed to be kept
+  // unchanged.
    assert(e.value() > 0);
+  const bool need_reload_obc = ox.has_seen_write();
    logger().debug(
-    "{}: {} - object {} got error code {}, {}",
+    "{}: {} - object {} got error code {}, {}; need_reload_obc {}",
      __func__,
      m,
      obc->obs.oi.soid,
      e.value(),
-    e.message());
-  auto reply = make_message<MOSDOpReply>(
-    &m, -e.value(), get_osdmap_epoch(), 0, false);
-  reply->set_enoent_reply_versions(peering_state.get_info().last_update,
-                                   peering_state.get_info().last_user_version);
-  return seastar::make_ready_future<Ref<MOSDOpReply>>(std::move(reply));
+    e.message(),
+    need_reload_obc);
+  return (need_reload_obc ? reload_obc(*obc)
+                          : load_obc_ertr::now()
+  ).safe_then([&e, &m, obc = std::move(obc), this] {
+    auto reply = make_message<MOSDOpReply>(
+      &m, -e.value(), get_osdmap_epoch(), 0, false);
+    reply->set_enoent_reply_versions(
+      peering_state.get_info().last_update,
+      peering_state.get_info().last_user_version);
+    return seastar::make_ready_future<Ref<MOSDOpReply>>(std::move(reply));
+  }, load_obc_ertr::assert_all{ "can't live with object state messed up" });
  }
  
  seastar::future<Ref<MOSDOpReply>> PG::do_osd_ops(
@@ -689,7 +716,6 @@ seastar::future<Ref<MOSDOpReply>> PG::do_osd_ops(
    }).safe_then([this,
                  m,
                  obc,
-                ox_deleter = std::move(ox),
                  rvec = op_info.allows_returnvec()] {
      // TODO: should stop at the first op which returns a negative retval,
      //       cmpext uses it for returning the index of first unmatched byte
@@ -708,12 +734,18 @@ seastar::future<Ref<MOSDOpReply>> PG::do_osd_ops(
        *m,
        obc->obs.oi.soid);
      return seastar::make_ready_future<Ref<MOSDOpReply>>(std::move(reply));
-  }, OpsExecuter::osd_op_errorator::all_same_way([=] (const std::error_code& e) {
-    return handle_failed_op(e, obc, *m);
-  })).handle_exception_type([=](const crimson::osd::error& e) {
+  }, osd_op_errorator::all_same_way([ox = ox.get(),
+                                     m,
+                                     obc,
+                                     this] (const std::error_code& e) {
+    return handle_failed_op(e, std::move(obc), *ox, *m);
+  })).handle_exception_type([ox_deleter = std::move(ox),
+                             m,
+                             obc,
+                             this] (const crimson::osd::error& e) {
      // we need this handler because throwing path which aren't errorated yet.
      logger().debug("encountered the legacy error handling path!");
-    return handle_failed_op(e.code(), obc, *m);
+    return handle_failed_op(e.code(), std::move(obc), *ox_deleter, *m);
    });
  }
  
@@ -883,6 +915,29 @@ PG::load_head_obc(ObjectContextRef obc)
    });
  }
  
+PG::load_obc_ertr::future<>
+PG::reload_obc(crimson::osd::ObjectContext& obc) const
+{
+  assert(obc.is_head());
+  return backend->load_metadata(obc.get_oid()).safe_then([&obc](auto md)
+    -> load_obc_ertr::future<> {
+    logger().debug(
+      "{}: reloaded obs {} for {}",
+      __func__,
+      md->os.oi,
+      obc.get_oid());
+    if (!md->ss) {
+      logger().error(
+        "{}: oid {} missing snapset",
+        __func__,
+        obc.get_oid());
+      return crimson::ct_error::object_corrupted::make();
+    }
+    obc.set_head_state(std::move(md->os), std::move(*(md->ss)));
+    return load_obc_ertr::now();
+  });
+}
+
  PG::load_obc_ertr::future<>
  PG::with_locked_obc(Ref<MOSDOp> &m, const OpInfo &op_info,
                     Operation *op, PG::with_obc_func_t &&f)
diff --git a/src/crimson/osd/pg.h b/src/crimson/osd/pg.h

index 12244dcd6531ab511833f2c04c405cf0a54b9df2..2e061c4e1da0f61a7c8a1ab2c9adf1478891bd1b 100644 (file)
--- a/src/crimson/osd/pg.h
+++ b/src/crimson/osd/pg.h
@@ -34,8 +34,8 @@
  #include "crimson/osd/pg_recovery_listener.h"
  #include "crimson/osd/recovery_backend.h"
  
-class OSDMap;
  class MQuery;
+class OSDMap;
  class PGBackend;
  class PGPeeringEvent;
  class osd_op_params_t;
@@ -54,6 +54,7 @@ namespace crimson::os {
  
  namespace crimson::osd {
  class ClientRequest;
+class OpsExecuter;
  
  class PG : public boost::intrusive_ref_counter<
    PG,
@@ -500,6 +501,9 @@ public:
    load_obc_ertr::future<crimson::osd::ObjectContextRef>
    load_head_obc(ObjectContextRef obc);
  
+  load_obc_ertr::future<>
+  reload_obc(crimson::osd::ObjectContext& obc) const;
+
  public:
    using with_obc_func_t = std::function<seastar::future<> (ObjectContextRef)>;
  
@@ -538,6 +542,7 @@ private:
    seastar::future<Ref<MOSDOpReply>> handle_failed_op(
      const std::error_code& e,
      ObjectContextRef obc,
+    const OpsExecuter& ox,
      const MOSDOp& m) const;
    seastar::future<Ref<MOSDOpReply>> do_osd_ops(
      Ref<MOSDOp> m,
author	Radoslaw Zarzynski <rzarzyns@redhat.com>
	Fri, 20 Nov 2020 15:42:36 +0000 (16:42 +0100)
committer	Radoslaw Zarzynski <rzarzyns@redhat.com>
	Tue, 1 Dec 2020 14:08:53 +0000 (15:08 +0100)
src/crimson/osd/ops_executer.h		patch \| blob \| history
src/crimson/osd/pg.cc		patch \| blob \| history
src/crimson/osd/pg.h		patch \| blob \| history