From: Casey Bodley Date: Fri, 13 Mar 2020 20:09:50 +0000 (-0400) Subject: cls/cmpomap: add cls module for CMPXATTR-like functionality in omap X-Git-Tag: v16.1.0~2620^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=3d5c8e982e1441fa48159c49fd4c847de95cf6fa;p=ceph.git cls/cmpomap: add cls module for CMPXATTR-like functionality in omap provides a cmp_vals() op similar to librados' omap_cmp(), but with full support for the comparison modes and operations from cmpxattr(). if any of the requested key/value comparisons are unsuccessful, the op fails with -ECANCELED cmp_vals() can be composed with other ops like set_omap_vals() to make make them conditional on the successful comparison of all existing values also provides cmp_set_vals() and cmp_rm_keys(), which apply a mutation to only the keys that compare successfully. these enable batch operations where, unlike cmp_vals() + set_omap_vals(), you don't want one failed comparison to prevent operations on unrelated keys Signed-off-by: Casey Bodley --- diff --git a/src/cls/CMakeLists.txt b/src/cls/CMakeLists.txt index 72c3fd08bbe1..05db7f290823 100644 --- a/src/cls/CMakeLists.txt +++ b/src/cls/CMakeLists.txt @@ -318,3 +318,4 @@ set(cls_2pc_queue_client_srcs 2pc_queue/cls_2pc_queue_client.cc) add_library(cls_2pc_queue_client STATIC ${cls_2pc_queue_client_srcs}) +add_subdirectory(cmpomap) diff --git a/src/cls/cmpomap/CMakeLists.txt b/src/cls/cmpomap/CMakeLists.txt new file mode 100644 index 000000000000..de8ca278cd69 --- /dev/null +++ b/src/cls/cmpomap/CMakeLists.txt @@ -0,0 +1,9 @@ +add_library(cls_cmpomap SHARED server.cc) +set_target_properties(cls_cmpomap PROPERTIES + VERSION "1.0.0" + SOVERSION "1" + INSTALL_RPATH "" + CXX_VISIBILITY_PRESET hidden) +install(TARGETS cls_cmpomap DESTINATION ${cls_dir}) + +add_library(cls_cmpomap_client STATIC client.cc) diff --git a/src/cls/cmpomap/client.cc b/src/cls/cmpomap/client.cc new file mode 100644 index 000000000000..e0fbbff18d47 --- /dev/null +++ b/src/cls/cmpomap/client.cc @@ -0,0 +1,76 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2020 Red Hat, Inc + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + */ + +#include "include/rados/librados.hpp" +#include "client.h" +#include "ops.h" + +namespace cls::cmpomap { + +int cmp_vals(librados::ObjectReadOperation& op, + Mode mode, Op comparison, ComparisonMap values, + std::optional default_value) +{ + if (values.size() > max_keys) { + return -E2BIG; + } + cmp_vals_op call; + call.mode = mode; + call.comparison = comparison; + call.values = std::move(values); + call.default_value = std::move(default_value); + + bufferlist in; + encode(call, in); + op.exec("cmpomap", "cmp_vals", in); + return 0; +} + +int cmp_set_vals(librados::ObjectWriteOperation& op, + Mode mode, Op comparison, ComparisonMap values, + std::optional default_value) +{ + if (values.size() > max_keys) { + return -E2BIG; + } + cmp_set_vals_op call; + call.mode = mode; + call.comparison = comparison; + call.values = std::move(values); + call.default_value = std::move(default_value); + + bufferlist in; + encode(call, in); + op.exec("cmpomap", "cmp_set_vals", in); + return 0; +} + +int cmp_rm_keys(librados::ObjectWriteOperation& op, + Mode mode, Op comparison, ComparisonMap values) +{ + if (values.size() > max_keys) { + return -E2BIG; + } + cmp_rm_keys_op call; + call.mode = mode; + call.comparison = comparison; + call.values = std::move(values); + + bufferlist in; + encode(call, in); + op.exec("cmpomap", "cmp_rm_keys", in); + return 0; +} + +} // namespace cls::cmpomap diff --git a/src/cls/cmpomap/client.h b/src/cls/cmpomap/client.h new file mode 100644 index 000000000000..c55fbc4105c3 --- /dev/null +++ b/src/cls/cmpomap/client.h @@ -0,0 +1,66 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2020 Red Hat, Inc + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + */ + +#pragma once + +#include +#include "include/rados/librados_fwd.hpp" +#include "types.h" + +namespace cls::cmpomap { + +/// requests with too many key comparisons will be rejected with -E2BIG +static constexpr uint32_t max_keys = 1000; + +/// process each of the omap value comparisons according to the same rules as +/// cmpxattr(), and return -ECANCELED if a comparison is unsuccessful. for +/// comparisons with Mode::U64, failure to decode an input value is reported +/// as -EINVAL, while failure to decode a stored value is reported as -EIO +[[nodiscard]] int cmp_vals(librados::ObjectReadOperation& op, + Mode mode, Op comparison, ComparisonMap values, + std::optional default_value); + +/// process each of the omap value comparisons according to the same rules as +/// cmpxattr(). any key/value pairs that compare successfully are overwritten +/// with the corresponding input value. for comparisons with Mode::U64, failure +/// to decode an input value is reported as -EINVAL. decode failure of a stored +/// value is treated as an unsuccessful comparison and is not reported as an +/// error +[[nodiscard]] int cmp_set_vals(librados::ObjectWriteOperation& writeop, + Mode mode, Op comparison, ComparisonMap values, + std::optional default_value); + +/// process each of the omap value comparisons according to the same rules as +/// cmpxattr(). any key/value pairs that compare successfully are removed. for +/// comparisons with Mode::U64, failure to decode an input value is reported as +/// -EINVAL. decode failure of a stored value is treated as an unsuccessful +/// comparison and is not reported as an error +[[nodiscard]] int cmp_rm_keys(librados::ObjectWriteOperation& writeop, + Mode mode, Op comparison, ComparisonMap values); + + +// bufferlist factories for comparison values +inline ceph::bufferlist string_buffer(std::string_view value) { + ceph::bufferlist bl; + bl.append(value); + return bl; +} +inline ceph::bufferlist u64_buffer(uint64_t value) { + ceph::bufferlist bl; + using ceph::encode; + encode(value, bl); + return bl; +} + +} // namespace cls::cmpomap diff --git a/src/cls/cmpomap/ops.h b/src/cls/cmpomap/ops.h new file mode 100644 index 000000000000..39b1049e8f98 --- /dev/null +++ b/src/cls/cmpomap/ops.h @@ -0,0 +1,100 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2020 Red Hat, Inc + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + */ + +#pragma once + +#include "types.h" +#include "include/encoding.h" + +namespace cls::cmpomap { + +struct cmp_vals_op { + Mode mode; + Op comparison; + ComparisonMap values; + std::optional default_value; +}; + +inline void encode(const cmp_vals_op& o, ceph::bufferlist& bl, uint64_t f=0) +{ + ENCODE_START(1, 1, bl); + encode(o.mode, bl); + encode(o.comparison, bl); + encode(o.values, bl); + encode(o.default_value, bl); + ENCODE_FINISH(bl); +} + +inline void decode(cmp_vals_op& o, ceph::bufferlist::const_iterator& bl) +{ + DECODE_START(1, bl); + decode(o.mode, bl); + decode(o.comparison, bl); + decode(o.values, bl); + decode(o.default_value, bl); + DECODE_FINISH(bl); +} + +struct cmp_set_vals_op { + Mode mode; + Op comparison; + ComparisonMap values; + std::optional default_value; +}; + +inline void encode(const cmp_set_vals_op& o, ceph::bufferlist& bl, uint64_t f=0) +{ + ENCODE_START(1, 1, bl); + encode(o.mode, bl); + encode(o.comparison, bl); + encode(o.values, bl); + encode(o.default_value, bl); + ENCODE_FINISH(bl); +} + +inline void decode(cmp_set_vals_op& o, ceph::bufferlist::const_iterator& bl) +{ + DECODE_START(1, bl); + decode(o.mode, bl); + decode(o.comparison, bl); + decode(o.values, bl); + decode(o.default_value, bl); + DECODE_FINISH(bl); +} + +struct cmp_rm_keys_op { + Mode mode; + Op comparison; + ComparisonMap values; +}; + +inline void encode(const cmp_rm_keys_op& o, ceph::bufferlist& bl, uint64_t f=0) +{ + ENCODE_START(1, 1, bl); + encode(o.mode, bl); + encode(o.comparison, bl); + encode(o.values, bl); + ENCODE_FINISH(bl); +} + +inline void decode(cmp_rm_keys_op& o, ceph::bufferlist::const_iterator& bl) +{ + DECODE_START(1, bl); + decode(o.mode, bl); + decode(o.comparison, bl); + decode(o.values, bl); + DECODE_FINISH(bl); +} + +} // namespace cls::cmpomap diff --git a/src/cls/cmpomap/server.cc b/src/cls/cmpomap/server.cc new file mode 100644 index 000000000000..691832bfe636 --- /dev/null +++ b/src/cls/cmpomap/server.cc @@ -0,0 +1,299 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2020 Red Hat, Inc + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + */ + +#include "objclass/objclass.h" +#include "ops.h" + +CLS_VER(1,0) +CLS_NAME(cmpomap) + +using namespace cls::cmpomap; + +// returns negative error codes or 0/1 for failed/successful comparisons +template +static int compare_values(Op op, const T& lhs, const T& rhs) +{ + switch (op) { + case Op::EQ: return (lhs == rhs); + case Op::NE: return (lhs != rhs); + case Op::GT: return (lhs > rhs); + case Op::GTE: return (lhs >= rhs); + case Op::LT: return (lhs < rhs); + case Op::LTE: return (lhs <= rhs); + default: return -EINVAL; + } +} + +static int compare_values_u64(Op op, uint64_t lhs, const bufferlist& value) +{ + try { + // decode existing value as rhs + uint64_t rhs; + auto p = value.cbegin(); + using ceph::decode; + decode(rhs, p); + return compare_values(op, lhs, rhs); + } catch (const buffer::error&) { + // failures to decode existing values are reported as EIO + return -EIO; + } +} + +static int compare_value(Mode mode, Op op, const bufferlist& input, + const bufferlist& value) +{ + switch (mode) { + case Mode::String: + return compare_values(op, input, value); + case Mode::U64: + try { + // decode input value as lhs + uint64_t lhs; + auto p = input.cbegin(); + using ceph::decode; + decode(lhs, p); + return compare_values_u64(op, lhs, value); + } catch (const buffer::error&) { + // failures to decode input values are reported as EINVAL + return -EINVAL; + } + default: + return -EINVAL; + } +} + +static int cmp_vals(cls_method_context_t hctx, bufferlist *in, bufferlist *out) +{ + cmp_vals_op op; + try { + auto p = in->cbegin(); + decode(op, p); + } catch (const buffer::error&) { + CLS_LOG(1, "ERROR: cmp_vals(): failed to decode input"); + return -EINVAL; + } + + // collect the keys we need to read + std::set keys; + for (const auto& kv : op.values) { + keys.insert(kv.first); + } + + // read the values for each key to compare + std::map values; + int r = cls_cxx_map_get_vals_by_keys(hctx, keys, &values); + if (r < 0) { + CLS_LOG(4, "ERROR: cmp_vals() failed to read values r=%d", r); + return r; + } + + auto v = values.cbegin(); + for (const auto& [key, input] : op.values) { + bufferlist value; + if (v != values.end() && v->first == key) { + value = std::move(v->second); + ++v; + CLS_LOG(20, "cmp_vals() comparing key=%s mode=%d op=%d", + key.c_str(), (int)op.mode, (int)op.comparison); + } else if (!op.default_value) { + CLS_LOG(20, "cmp_vals() missing key=%s", key.c_str()); + return -ECANCELED; + } else { + // use optional default for missing keys + value = *op.default_value; + CLS_LOG(20, "cmp_vals() comparing missing key=%s mode=%d op=%d", + key.c_str(), (int)op.mode, (int)op.comparison); + } + + r = compare_value(op.mode, op.comparison, input, value); + if (r < 0) { + CLS_LOG(10, "cmp_vals() failed to compare key=%s r=%d", key.c_str(), r); + return r; + } + if (r == 0) { + CLS_LOG(10, "cmp_vals() comparison at key=%s returned false", key.c_str()); + return -ECANCELED; + } + CLS_LOG(20, "cmp_vals() comparison at key=%s returned true", key.c_str()); + } + + return 0; +} + +static int cmp_set_vals(cls_method_context_t hctx, bufferlist *in, bufferlist *out) +{ + cmp_set_vals_op op; + try { + auto p = in->cbegin(); + decode(op, p); + } catch (const buffer::error&) { + CLS_LOG(1, "ERROR: cmp_set_vals(): failed to decode input"); + return -EINVAL; + } + + // collect the keys we need to read + std::set keys; + for (const auto& kv : op.values) { + keys.insert(kv.first); + } + + // read the values for each key to compare + std::map values; + int r = cls_cxx_map_get_vals_by_keys(hctx, keys, &values); + if (r < 0) { + CLS_LOG(4, "ERROR: cmp_set_vals() failed to read values r=%d", r); + return r; + } + + auto v = values.begin(); + for (const auto& [key, input] : op.values) { + auto k = values.end(); + bufferlist value; + if (v != values.end() && v->first == key) { + value = std::move(v->second); + k = v++; + CLS_LOG(20, "cmp_set_vals() comparing key=%s mode=%d op=%d", + key.c_str(), (int)op.mode, (int)op.comparison); + } else if (!op.default_value) { + CLS_LOG(20, "cmp_set_vals() missing key=%s", key.c_str()); + continue; + } else { + // use optional default for missing keys + value = *op.default_value; + CLS_LOG(20, "cmp_set_vals() comparing missing key=%s mode=%d op=%d", + key.c_str(), (int)op.mode, (int)op.comparison); + } + + r = compare_value(op.mode, op.comparison, input, value); + if (r == -EIO) { + r = 0; // treat EIO as a failed comparison + } + if (r < 0) { + CLS_LOG(10, "cmp_set_vals() failed to compare key=%s r=%d", + key.c_str(), r); + return r; + } + if (r == 0) { + // unsuccessful comparison + if (k != values.end()) { + values.erase(k); // remove this key from the values to overwrite + CLS_LOG(20, "cmp_set_vals() not overwriting key=%s", key.c_str()); + } else { + CLS_LOG(20, "cmp_set_vals() not writing missing key=%s", key.c_str()); + } + } else { + // successful comparison + if (k != values.end()) { + // overwrite the value + k->second = std::move(input); + CLS_LOG(20, "cmp_set_vals() overwriting key=%s", key.c_str()); + } else { + // insert the value + values.emplace(key, std::move(input)); + CLS_LOG(20, "cmp_set_vals() overwriting missing key=%s", key.c_str()); + } + } + } + + if (values.empty()) { + CLS_LOG(20, "cmp_set_vals() has no values to overwrite"); + return 0; + } + + CLS_LOG(20, "cmp_set_vals() overwriting count=%d", (int)values.size()); + return cls_cxx_map_set_vals(hctx, &values); +} + +static int cmp_rm_keys(cls_method_context_t hctx, bufferlist *in, bufferlist *out) +{ + cmp_rm_keys_op op; + try { + auto p = in->cbegin(); + decode(op, p); + } catch (const buffer::error&) { + CLS_LOG(1, "ERROR: cmp_rm_keys(): failed to decode input"); + return -EINVAL; + } + + // collect the keys we need to read + std::set keys; + for (const auto& kv : op.values) { + keys.insert(kv.first); + } + + // read the values for each key to compare + std::map values; + int r = cls_cxx_map_get_vals_by_keys(hctx, keys, &values); + if (r < 0) { + CLS_LOG(4, "ERROR: cmp_rm_keys() failed to read values r=%d", r); + return r; + } + + auto v = values.cbegin(); + for (const auto& [key, input] : op.values) { + if (v == values.end() || v->first != key) { + CLS_LOG(20, "cmp_rm_keys() missing key=%s", key.c_str()); + continue; + } + CLS_LOG(20, "cmp_rm_keys() comparing key=%s mode=%d op=%d", + key.c_str(), (int)op.mode, (int)op.comparison); + + const bufferlist& value = v->second; + ++v; + + r = compare_value(op.mode, op.comparison, input, value); + if (r == -EIO) { + r = 0; // treat EIO as a failed comparison + } + if (r < 0) { + CLS_LOG(10, "cmp_rm_keys() failed to compare key=%s r=%d", + key.c_str(), r); + return r; + } + if (r == 0) { + // unsuccessful comparison + CLS_LOG(20, "cmp_rm_keys() preserving key=%s", key.c_str()); + } else { + // successful comparison + CLS_LOG(20, "cmp_rm_keys() removing key=%s", key.c_str()); + r = cls_cxx_map_remove_key(hctx, key); + if (r < 0) { + CLS_LOG(1, "ERROR: cmp_rm_keys() failed to remove key=%s r=%d", + key.c_str(), r); + return r; + } + } + } + + return 0; +} + +CLS_INIT(cmpomap) +{ + CLS_LOG(1, "Loaded cmpomap class!"); + + cls_handle_t h_class; + cls_method_handle_t h_cmp_vals; + cls_method_handle_t h_cmp_set_vals; + cls_method_handle_t h_cmp_rm_keys; + + cls_register("cmpomap", &h_class); + + cls_register_cxx_method(h_class, "cmp_vals", CLS_METHOD_RD, + cmp_vals, &h_cmp_vals); + cls_register_cxx_method(h_class, "cmp_set_vals", CLS_METHOD_RD | CLS_METHOD_WR, + cmp_set_vals, &h_cmp_set_vals); + cls_register_cxx_method(h_class, "cmp_rm_keys", CLS_METHOD_RD | CLS_METHOD_WR, + cmp_rm_keys, &h_cmp_rm_keys); +} diff --git a/src/cls/cmpomap/types.h b/src/cls/cmpomap/types.h new file mode 100644 index 000000000000..11e39575fea7 --- /dev/null +++ b/src/cls/cmpomap/types.h @@ -0,0 +1,44 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2020 Red Hat, Inc + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + */ + +#pragma once + +#include +#include +#include "include/rados.h" // CEPH_OSD_CMPXATTR_* +#include "include/encoding.h" + +namespace cls::cmpomap { + +/// comparison operand type +enum class Mode : uint8_t { + String = CEPH_OSD_CMPXATTR_MODE_STRING, + U64 = CEPH_OSD_CMPXATTR_MODE_U64, +}; + +/// comparison operation, where the left-hand operand is the input value and +/// the right-hand operand is the stored value (or the optional default) +enum class Op : uint8_t { + EQ = CEPH_OSD_CMPXATTR_OP_EQ, + NE = CEPH_OSD_CMPXATTR_OP_NE, + GT = CEPH_OSD_CMPXATTR_OP_GT, + GTE = CEPH_OSD_CMPXATTR_OP_GTE, + LT = CEPH_OSD_CMPXATTR_OP_LT, + LTE = CEPH_OSD_CMPXATTR_OP_LTE, +}; + +/// mapping of omap keys to value comparisons +using ComparisonMap = boost::container::flat_map; + +} // namespace cls::cmpomap diff --git a/src/osd/CMakeLists.txt b/src/osd/CMakeLists.txt index 56844af94993..32ac7f8e6384 100644 --- a/src/osd/CMakeLists.txt +++ b/src/osd/CMakeLists.txt @@ -59,7 +59,7 @@ endif() # libcls_* are runtime dependencies add_dependencies(osd cls_journal cls_hello cls_lock cls_log cls_numops - cls_refcount cls_timeindex cls_user cls_version cls_cas) + cls_refcount cls_timeindex cls_user cls_version cls_cas cls_cmpomap) if(WITH_CEPHFS) add_dependencies(osd cls_cephfs) endif()