seastar::future<Ref<MOSDOpReply>> PG::handle_failed_op(
const std::error_code& e,
ObjectContextRef obc,
+ const OpsExecuter& ox,
const MOSDOp& m) const
{
+ // Oops, an operation had failed. do_osd_ops() altogether with
+ // OpsExecuter already dropped the ObjectStore::Transaction if
+ // there was any. However, this is not enough to completely
+ // rollback as we gave OpsExecuter the very single copy of `obc`
+ // we maintain and we did it for both reading and writing.
+ // Now all modifications must be reverted.
+ //
+ // Let's just reload from the store. Evicting from the shared
+ // LRU would be tricky as next MOSDOp (the one at `get_obc`
+ // phase) could actually already finished the lookup. Fortunately,
+ // this is supposed to live on cold paths, so performance is not
+ // a concern -- simplicity wins.
+ //
+ // The conditional's purpose is to efficiently handle hot errors
+ // which may appear as a result of e.g. CEPH_OSD_OP_CMPXATTR or
+ // CEPH_OSD_OP_OMAP_CMP. These are read-like ops and clients
+ // typically append them before any write. If OpsExecuter hasn't
+ // seen any modifying operation, `obc` is supposed to be kept
+ // unchanged.
assert(e.value() > 0);
+ const bool need_reload_obc = ox.has_seen_write();
logger().debug(
- "{}: {} - object {} got error code {}, {}",
+ "{}: {} - object {} got error code {}, {}; need_reload_obc {}",
__func__,
m,
obc->obs.oi.soid,
e.value(),
- e.message());
- auto reply = make_message<MOSDOpReply>(
- &m, -e.value(), get_osdmap_epoch(), 0, false);
- reply->set_enoent_reply_versions(peering_state.get_info().last_update,
- peering_state.get_info().last_user_version);
- return seastar::make_ready_future<Ref<MOSDOpReply>>(std::move(reply));
+ e.message(),
+ need_reload_obc);
+ return (need_reload_obc ? reload_obc(*obc)
+ : load_obc_ertr::now()
+ ).safe_then([&e, &m, obc = std::move(obc), this] {
+ auto reply = make_message<MOSDOpReply>(
+ &m, -e.value(), get_osdmap_epoch(), 0, false);
+ reply->set_enoent_reply_versions(
+ peering_state.get_info().last_update,
+ peering_state.get_info().last_user_version);
+ return seastar::make_ready_future<Ref<MOSDOpReply>>(std::move(reply));
+ }, load_obc_ertr::assert_all{ "can't live with object state messed up" });
}
seastar::future<Ref<MOSDOpReply>> PG::do_osd_ops(
}).safe_then([this,
m,
obc,
- ox_deleter = std::move(ox),
rvec = op_info.allows_returnvec()] {
// TODO: should stop at the first op which returns a negative retval,
// cmpext uses it for returning the index of first unmatched byte
*m,
obc->obs.oi.soid);
return seastar::make_ready_future<Ref<MOSDOpReply>>(std::move(reply));
- }, OpsExecuter::osd_op_errorator::all_same_way([=] (const std::error_code& e) {
- return handle_failed_op(e, obc, *m);
- })).handle_exception_type([=](const crimson::osd::error& e) {
+ }, osd_op_errorator::all_same_way([ox = ox.get(),
+ m,
+ obc,
+ this] (const std::error_code& e) {
+ return handle_failed_op(e, std::move(obc), *ox, *m);
+ })).handle_exception_type([ox_deleter = std::move(ox),
+ m,
+ obc,
+ this] (const crimson::osd::error& e) {
// we need this handler because throwing path which aren't errorated yet.
logger().debug("encountered the legacy error handling path!");
- return handle_failed_op(e.code(), obc, *m);
+ return handle_failed_op(e.code(), std::move(obc), *ox_deleter, *m);
});
}
});
}
+PG::load_obc_ertr::future<>
+PG::reload_obc(crimson::osd::ObjectContext& obc) const
+{
+ assert(obc.is_head());
+ return backend->load_metadata(obc.get_oid()).safe_then([&obc](auto md)
+ -> load_obc_ertr::future<> {
+ logger().debug(
+ "{}: reloaded obs {} for {}",
+ __func__,
+ md->os.oi,
+ obc.get_oid());
+ if (!md->ss) {
+ logger().error(
+ "{}: oid {} missing snapset",
+ __func__,
+ obc.get_oid());
+ return crimson::ct_error::object_corrupted::make();
+ }
+ obc.set_head_state(std::move(md->os), std::move(*(md->ss)));
+ return load_obc_ertr::now();
+ });
+}
+
PG::load_obc_ertr::future<>
PG::with_locked_obc(Ref<MOSDOp> &m, const OpInfo &op_info,
Operation *op, PG::with_obc_func_t &&f)