From 8ff3417193906bb276a8f90249cb2c19759d6507 Mon Sep 17 00:00:00 2001 From: Matt Benjamin Date: Tue, 24 Sep 2019 17:12:02 -0400 Subject: [PATCH] rgw: add checksum and digest machinery Adds new Blake3 digest format (native), a concrete type to represent digests, and static_visitor machinery to unify varying checksum computations. This framework, together with new trailing checksum header support, is used to implement S3 additional checksum verification. Parts of the AWS content checksum API work build on a prior contribution from imtzw . Thank you! Fixes: https://tracker.ceph.com/issues/42080 Fixes: https://tracker.ceph.com/issues/63951 squashed commits: * rgw_cksum: add trival test vectors for sha-format digests Computed digests match those produced by sha1sum, sha256sum, and sha512sum utilities. * rgw_cksum: add test vectors for blake3 Tests the same input strings with digests validated by b3sum (https://crates.io/crates/b3sum). * rgw_ckum: switch to accel crc32c The internal Ceph convention appears to be to omit a final xor where ceph_crc32c is used, but it's required for compatibility with AWS implementations. * rgw_cksum: add XXH3 digest * rgw_cksum: write class encoder for rgw::digest::Cksum * rgw_cksum: also reverse crc32c (REBASEME) Mark noticed that the crc32c output was being tested against a byteswapped value (crc32c also needs byteswap on LE). * rgw_cksum: add digest::Cksum serde tests * rgw_cksum: fix main(...) linkage (so we run our main unit and not the one in gmock * rgw_cksum: convenience extensions for integration with RGW/S3 * introduce rgw_cksum unique_ptr factory * rgw_cksum: mark string transform accessors const * rgw_cksum: fixup unittest_rgw_chksum compilation--all existing tests pass * rgw_cksum: hook up put-object checksum workflow * tweaks to report on content checksum mismatch * rgw_cksum: match SDK as well as general checksum header * make it more efficient * initialize RGWPutObj_Cksum::digest * rgw_cksum: write parse_cksum_type w/const char* arg * initialize _type correctly; doing armored wrong * fix expected checksum header name, clean up verify * fix output on checksum verify fail, cleanup * introduce Cksum::to_armor(); all AWS cases pass * oops, extra 0-byte at end of to_armor() result * use to_armor() with decoded checksums (i.e., for all S3 presentation) * remove unnecessary finalize() in RGWPutObj_Cksum dtor * RGWPutObj_Cksum::Factory fixes * fixes test_object_checksum_sha256 * choose preferred checksum algorithm header if both are present * log verified checksums in RGWPutObj::execute at 16 * checksum not needed in policy condition * fix checksum trailing header format * move Blake3 to rgw_digest_blake3.h Signed-off-by: Matt Benjamin --- src/CMakeLists.txt | 4 + src/common/ceph_crypto.h | 1 - src/rgw/CMakeLists.txt | 29 +++ src/rgw/rgw_auth_s3.cc | 2 +- src/rgw/rgw_basic_types.h | 10 +- src/rgw/rgw_blake3_digest.h | 47 +++++ src/rgw/rgw_cksum.h | 297 +++++++++++++++++++++++++++ src/rgw/rgw_cksum_digest.h | 133 ++++++++++++ src/rgw/rgw_cksum_pipe.cc | 91 +++++++++ src/rgw/rgw_cksum_pipe.h | 93 +++++++++ src/rgw/rgw_common.h | 65 +----- src/rgw/rgw_crc_digest.h | 93 +++++++++ src/rgw/rgw_file_int.h | 1 + src/rgw/rgw_hex.h | 78 +++++++ src/rgw/rgw_op.cc | 53 ++++- src/rgw/rgw_op.h | 6 +- src/rgw/rgw_rest_s3.cc | 26 +++ src/rgw/rgw_rest_s3.h | 1 + src/rgw/rgw_xxh_digest.h | 53 +++++ src/test/rgw/CMakeLists.txt | 8 + src/test/rgw/test_rgw_cksum.cc | 350 ++++++++++++++++++++++++++++++++ src/test/test_rgw_admin_log.cc | 3 +- src/test/test_rgw_admin_meta.cc | 3 +- 23 files changed, 1380 insertions(+), 67 deletions(-) create mode 100644 src/rgw/rgw_blake3_digest.h create mode 100644 src/rgw/rgw_cksum.h create mode 100644 src/rgw/rgw_cksum_digest.h create mode 100644 src/rgw/rgw_cksum_pipe.cc create mode 100644 src/rgw/rgw_cksum_pipe.h create mode 100644 src/rgw/rgw_crc_digest.h create mode 100644 src/rgw/rgw_hex.h create mode 100644 src/rgw/rgw_xxh_digest.h create mode 100644 src/test/rgw/test_rgw_cksum.cc diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 5b21a2db04f59..d28a949bf4afe 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -203,6 +203,10 @@ if(HAVE_INTEL) else() set(object_format "elf64") endif() + + set(SAVE_CMAKE_ASM_FLAGS ${CMAKE_ASM_FLAGS}) + set(SAVE_CMAKE_ASM_COMPILER ${CMAKE_ASM_COMPILER}) + set(CMAKE_ASM_FLAGS "-f ${object_format}") set(CMAKE_ASM_COMPILER ${PROJECT_SOURCE_DIR}/src/nasm-wrapper) if(NOT WIN32) diff --git a/src/common/ceph_crypto.h b/src/common/ceph_crypto.h index ed93d09e6e273..6b2fa50dc2aa6 100644 --- a/src/common/ceph_crypto.h +++ b/src/common/ceph_crypto.h @@ -91,7 +91,6 @@ namespace TOPNSPC::crypto { SHA512 () : OpenSSLDigest(EVP_sha512()) { } }; - # if OPENSSL_VERSION_NUMBER < 0x10100000L class HMAC { private: diff --git a/src/rgw/CMakeLists.txt b/src/rgw/CMakeLists.txt index f91fbaed4308b..cb2f93d34dbf7 100644 --- a/src/rgw/CMakeLists.txt +++ b/src/rgw/CMakeLists.txt @@ -30,6 +30,33 @@ endfunction() find_package(ICU 52.0 COMPONENTS uc REQUIRED) +set(blake3_srcs + ${CMAKE_SOURCE_DIR}/src/BLAKE3/c/blake3.c + ${CMAKE_SOURCE_DIR}/src/BLAKE3/c/blake3_dispatch.c + ${CMAKE_SOURCE_DIR}/src/BLAKE3/c/blake3_portable.c) + +if (HAVE_INTEL) + # restore compiler detected assembler, following use gas syntax + set(CMAKE_ASM_FLAGS ${SAVE_CMAKE_ASM_FLAGS}) + set(CMAKE_ASM_COMPILER ${SAVE_CMAKE_ASM_COMPILER}) + if(WIN32) + list(APPEND blake3_srcs + ${CMAKE_SOURCE_DIR}/src/BLAKE3/c/blake3_sse2_x86-64_windows_gnu.S + ${CMAKE_SOURCE_DIR}/src/BLAKE3/c/blake3_sse41_x86-64_windows_gnu.S + ${CMAKE_SOURCE_DIR}/src/BLAKE3/c/blake3_avx2_x86-64_windows_gnu.S + ${CMAKE_SOURCE_DIR}/src/BLAKE3/c/blake3_avx512_x86-64_windows_gnu.S) + else() + list(APPEND blake3_srcs + ${CMAKE_SOURCE_DIR}/src/BLAKE3/c/blake3.c + ${CMAKE_SOURCE_DIR}/src/BLAKE3/c/blake3_dispatch.c + ${CMAKE_SOURCE_DIR}/src/BLAKE3/c/blake3_portable.c + ${CMAKE_SOURCE_DIR}/src/BLAKE3/c/blake3_sse2_x86-64_unix.S + ${CMAKE_SOURCE_DIR}/src/BLAKE3/c/blake3_sse41_x86-64_unix.S + ${CMAKE_SOURCE_DIR}/src/BLAKE3/c/blake3_avx2_x86-64_unix.S + ${CMAKE_SOURCE_DIR}/src/BLAKE3/c/blake3_avx512_x86-64_unix.S) + endif() +endif() + set(librgw_common_srcs services/svc_finisher.cc services/svc_bi_rados.cc @@ -57,6 +84,7 @@ set(librgw_common_srcs services/svc_user_rados.cc services/svc_zone.cc services/svc_zone_utils.cc + ${blake3_srcs} rgw_account.cc rgw_acl.cc rgw_acl_s3.cc @@ -70,6 +98,7 @@ set(librgw_common_srcs rgw_bucket.cc rgw_bucket_layout.cc rgw_cache.cc + rgw_cksum_pipe.cc rgw_common.cc rgw_compression.cc rgw_cors.cc diff --git a/src/rgw/rgw_auth_s3.cc b/src/rgw/rgw_auth_s3.cc index 61c2118b672fd..d521f60f6b125 100644 --- a/src/rgw/rgw_auth_s3.cc +++ b/src/rgw/rgw_auth_s3.cc @@ -1469,7 +1469,7 @@ inline void AWSv4ComplMulti::extract_trailing_headers( /* populate trailer map with expected headers and their values, if sent */ trailer_map.insert(trailer_map_t::value_type(k, v)); /* populate to req_info.env as well */ - put_prop(ys_header_mangle(k), v); + put_prop(ys_header_mangle(fmt::format("HTTP-{}", k)), v); }); consumed += get<2>(ex_header); } /* one trailer */ diff --git a/src/rgw/rgw_basic_types.h b/src/rgw/rgw_basic_types.h index cd56db1081b9d..771466ffae5f6 100644 --- a/src/rgw/rgw_basic_types.h +++ b/src/rgw/rgw_basic_types.h @@ -31,6 +31,7 @@ #include "rgw_user_types.h" #include "rgw_bucket_types.h" #include "rgw_obj_types.h" +#include "rgw_cksum.h" #include "driver/rados/rgw_obj_manifest.h" // FIXME: subclass dependency @@ -255,6 +256,7 @@ struct RGWUploadPartInfo { uint64_t size; uint64_t accounted_size{0}; std::string etag; + rgw::cksum::Cksum cksum; ceph::real_time modified; RGWObjManifest manifest; RGWCompressionInfo cs_info; @@ -265,7 +267,7 @@ struct RGWUploadPartInfo { RGWUploadPartInfo() : num(0), size(0) {} void encode(bufferlist& bl) const { - ENCODE_START(5, 2, bl); + ENCODE_START(6, 2, bl); encode(num, bl); encode(size, bl); encode(etag, bl); @@ -274,10 +276,11 @@ struct RGWUploadPartInfo { encode(cs_info, bl); encode(accounted_size, bl); encode(past_prefixes, bl); + encode(cksum, bl); ENCODE_FINISH(bl); } void decode(bufferlist::const_iterator& bl) { - DECODE_START_LEGACY_COMPAT_LEN(5, 2, 2, bl); + DECODE_START_LEGACY_COMPAT_LEN(6, 2, 2, bl); decode(num, bl); decode(size, bl); decode(etag, bl); @@ -293,6 +296,9 @@ struct RGWUploadPartInfo { if (struct_v >= 5) { decode(past_prefixes, bl); } + if (struct_v >= 6) { + decode(cksum, bl); + } DECODE_FINISH(bl); } void dump(Formatter *f) const; diff --git a/src/rgw/rgw_blake3_digest.h b/src/rgw/rgw_blake3_digest.h new file mode 100644 index 0000000000000..f8a9fd6ade0bb --- /dev/null +++ b/src/rgw/rgw_blake3_digest.h @@ -0,0 +1,47 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2023 Red Hat, Inc + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#pragma once + +#include +#include +#include "BLAKE3/c/blake3.h" + +#define XXH_INLINE_ALL 1 /* required for streaming variants */ +#include "xxhash.h" + +namespace rgw { namespace digest { + +class Blake3 { + private: + blake3_hasher h; + + public: + static constexpr uint16_t digest_size = BLAKE3_OUT_LEN /* 32 bytes */; + + Blake3() { Restart(); } + + void Restart() { blake3_hasher_init(&h); } + + void Update(const unsigned char *data, uint64_t len) { + blake3_hasher_update(&h, data, len); + } + + void Final(unsigned char* digest) { + blake3_hasher_finalize(&h, digest, digest_size); + } +}; /* Blake3 */ + +}} /* namespace */ diff --git a/src/rgw/rgw_cksum.h b/src/rgw/rgw_cksum.h new file mode 100644 index 0000000000000..06ccacf05e0cd --- /dev/null +++ b/src/rgw/rgw_cksum.h @@ -0,0 +1,297 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2019 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "fmt/format.h" +#include "common/ceph_crypto.h" +#include "common/armor.h" +#include "rgw_blake3_digest.h" +#include "rgw_crc_digest.h" +#include "rgw_xxh_digest.h" +#include +#include "rgw_hex.h" +#include "rgw_b64.h" + +#include "include/buffer.h" +#include "include/encoding.h" + +namespace rgw { namespace cksum { + + enum class Type : uint16_t + { + none = 0, + crc32, /* !cryptographic, but AWS supports */ + crc32c, /* !cryptographic, but AWS supports */ + xxh3, /* !cryptographic, but strong and very fast */ + sha1, /* unsafe, but AWS supports */ + sha256, + sha512, + blake3, + }; + + static constexpr uint16_t FLAG_NONE = 0x0000; + static constexpr uint16_t FLAG_AWS_CKSUM = 0x0001; + + class Desc + { + public: + const Type type; + const char* name; + const uint16_t digest_size; + const uint16_t armored_size; + const uint16_t flags; + + constexpr uint16_t to_armored_size(uint16_t sz) { + return sz / 3 * 4 + 4; + } + + constexpr Desc(Type _type, const char* _name, uint16_t _size, + uint16_t _flags) + : type(_type), name(_name), + digest_size(_size), + armored_size(to_armored_size(digest_size)), + flags(_flags) + {} + + constexpr bool aws() const { + return (flags & FLAG_AWS_CKSUM); + } + }; /* Desc */ + + namespace ba = boost::algorithm; + + class Cksum { + public: + static constexpr std::array checksums = + { + Desc(Type::none, "none", 0, FLAG_NONE), + Desc(Type::crc32, "crc32", 4, FLAG_AWS_CKSUM), + Desc(Type::crc32c, "crc32c", 4, FLAG_AWS_CKSUM), + Desc(Type::xxh3, "xxh3", 8, FLAG_NONE), + Desc(Type::sha1, "sha1", 20, FLAG_AWS_CKSUM), + Desc(Type::sha256, "sha256", 32, FLAG_AWS_CKSUM), + Desc(Type::sha512, "sha512", 64, FLAG_NONE), + Desc(Type::blake3, "blake3", 32, FLAG_NONE), + }; + + static constexpr uint16_t max_digest_size = 64; + using value_type = std::array; + + Type type; + value_type digest; + + Cksum(Type _type = Type::none) : type(_type) {} + + static const char* type_string(const Type type) { + return (Cksum::checksums[uint16_t(type)]).name; + } + + std::string aws_name() { + return fmt::format("x-amz-checksum-{}", type_string(type)); + } + + std::string rgw_name() { + return fmt::format("x-rgw-checksum-{}", type_string(type)); + } + + std::string header_name() { + return ((Cksum::checksums[uint16_t(type)]).aws()) ? + aws_name() : + rgw_name(); + } + + std::string to_armor() const { + std::string hs; + const auto& ckd = checksums[uint16_t(type)]; + hs.resize(ckd.armored_size); + memset(hs.data(), 0, hs.length()); + ceph_armor((char*) hs.data(), (char*) hs.data() + ckd.armored_size, + (char*) digest.begin(), (char*) digest.begin() + + ckd.digest_size); + return hs; + } + + std::string hex() const { + std::string hs; + const auto& ckd = checksums[uint16_t(type)]; + hs.reserve(ckd.digest_size * 2 + 1); + ba::hex_lower(digest.begin(), digest.begin() + ckd.digest_size, + std::back_inserter(hs)); + return hs; + } + + std::string to_base64() const { + return rgw::to_base64(hex()); + } + + std::string to_string() const { + std::string hs; + const auto& ckd = checksums[uint16_t(type)]; + return fmt::format("{{{}}}{}", ckd.name, to_base64()); + } + + void encode(buffer::list& bl) const { + const auto& ckd = checksums[uint16_t(type)]; + ENCODE_START(1, 1, bl); + encode(uint16_t(type), bl); + encode(ckd.digest_size, bl); + bl.append((char*)digest.data(), ckd.digest_size); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& p) { + DECODE_START(1, p); + uint16_t tt; + decode(tt, p); + type = cksum::Type(tt); + decode(tt, p); /* <= max_digest_size */ + p.copy(tt, (char*)digest.data()); + DECODE_FINISH(p); + } + }; /* Cksum */ + WRITE_CLASS_ENCODER(Cksum); + + static inline Type parse_cksum_type(const char* name) + { + for (const auto& ck : Cksum::checksums) { + if (boost::iequals(ck.name, name)) + return ck.type; + } + return Type::none; + } + + class Digest { + public: + virtual void Restart() = 0; + virtual void Update (const unsigned char *input, size_t length) = 0; + virtual void Update(const ceph::buffer::list& bl) = 0; + virtual void Final (unsigned char *digest) = 0; + virtual ~Digest() {} + }; + + template + class TDigest : public Digest + { + T d; + public: + TDigest() {} + TDigest(TDigest&& rhs) noexcept + : d(std::move(rhs.d)) + {} + void Restart() override { d.Restart(); } + void Update(const unsigned char* data, uint64_t len) override { + d.Update(data, len); + } + void Update(const ceph::buffer::list& bl) { + for (auto& p : bl.buffers()) { + d.Update((const unsigned char *)p.c_str(), p.length()); + } + } + void Final(unsigned char* digest) override { + d.Final(digest); + } + }; + + typedef TDigest Blake3; + typedef TDigest Crc32; + typedef TDigest Crc32c; + typedef TDigest XXH3; + typedef TDigest SHA1; + typedef TDigest SHA256; + typedef TDigest SHA512; + + typedef boost::variant DigestVariant; + + struct get_digest_ptr : public boost::static_visitor + { + get_digest_ptr() {}; + Digest* operator()(const boost::blank& b) const { return nullptr; } + Digest* operator()(Blake3& digest) const { return &digest; } + Digest* operator()(Crc32& digest) const { return &digest; } + Digest* operator()(Crc32c& digest) const { return &digest; } + Digest* operator()(XXH3& digest) const { return &digest; } + Digest* operator()(SHA1& digest) const { return &digest; } + Digest* operator()(SHA256& digest) const { return &digest; } + Digest* operator()(SHA512& digest) const { return &digest; } + }; + + static inline Digest* get_digest(DigestVariant& ev) + { + return boost::apply_visitor(get_digest_ptr{}, ev); + } + + static inline DigestVariant digest_factory(const Type cksum_type) + { + switch (cksum_type) { + case Type::blake3: + return Blake3(); + break; + case Type::sha256: + return SHA256(); + break; + case Type::crc32: + return Crc32(); + break; + case Type::crc32c: + return Crc32c(); + break; + case Type::xxh3: + return XXH3(); + break; + case Type::sha512: + return SHA512(); + break; + case Type::sha1: + return SHA1(); + break; + case Type::none: + break; + }; + return boost::blank(); + } /* digest_factory */ + + static inline Cksum finalize_digest(Digest* digest, Type type) + { + Cksum cksum(type); + if (digest) { + auto data = cksum.digest.data(); + digest->Final(data); + } + return cksum; + } + + static inline std::string to_string(const Type type) { + std::string hs; + const auto& ckd = Cksum::checksums[uint16_t(type)]; + return ckd.name; + } + +}} /* namespace */ diff --git a/src/rgw/rgw_cksum_digest.h b/src/rgw/rgw_cksum_digest.h new file mode 100644 index 0000000000000..a70c892cdbcbe --- /dev/null +++ b/src/rgw/rgw_cksum_digest.h @@ -0,0 +1,133 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2019 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#pragma once + +#include +#include +#include "rgw_blake3_digest.h" +#include "rgw_crc_digest.h" +#include "rgw_xxh_digest.h" + +#include "rgw_cksum.h" + +namespace rgw { namespace cksum { + + class Digest { + public: + virtual void Restart() = 0; + virtual void Update (const unsigned char *input, size_t length) = 0; + virtual void Update(const ceph::buffer::list& bl) = 0; + virtual void Final (unsigned char *digest) = 0; + virtual ~Digest() {} + }; + + template + class TDigest : public Digest + { + T d; + public: + TDigest() {} + TDigest(TDigest&& rhs) noexcept + : d(std::move(rhs.d)) + {} + void Restart() override { d.Restart(); } + void Update(const unsigned char* data, uint64_t len) override { + d.Update(data, len); + } + void Update(const ceph::buffer::list& bl) { + for (auto& p : bl.buffers()) { + d.Update((const unsigned char *)p.c_str(), p.length()); + } + } + void Final(unsigned char* digest) override { + d.Final(digest); + } + }; + + typedef TDigest Blake3; + typedef TDigest Crc32; + typedef TDigest Crc32c; + typedef TDigest XXH3; + typedef TDigest SHA1; + typedef TDigest SHA256; + typedef TDigest SHA512; + + typedef boost::variant DigestVariant; + + struct get_digest_ptr : public boost::static_visitor + { + get_digest_ptr() {}; + Digest* operator()(const boost::blank& b) const { return nullptr; } + Digest* operator()(Blake3& digest) const { return &digest; } + Digest* operator()(Crc32& digest) const { return &digest; } + Digest* operator()(Crc32c& digest) const { return &digest; } + Digest* operator()(XXH3& digest) const { return &digest; } + Digest* operator()(SHA1& digest) const { return &digest; } + Digest* operator()(SHA256& digest) const { return &digest; } + Digest* operator()(SHA512& digest) const { return &digest; } + }; + + static inline Digest* get_digest(DigestVariant& ev) + { + return boost::apply_visitor(get_digest_ptr{}, ev); + } + + static inline DigestVariant digest_factory(const Type cksum_type) + { + switch (cksum_type) { + case Type::blake3: + return Blake3(); + break; + case Type::sha256: + return SHA256(); + break; + case Type::crc32: + return Crc32(); + break; + case Type::crc32c: + return Crc32c(); + break; + case Type::xxh3: + return XXH3(); + break; + case Type::sha512: + return SHA512(); + break; + case Type::sha1: + return SHA1(); + break; + case Type::none: + break; + }; + return boost::blank(); + } /* digest_factory */ + + static inline Cksum finalize_digest(Digest* digest, Type type) + { + Cksum cksum(type); + if (digest) { + auto data = cksum.digest.data(); + digest->Final(data); + } + return cksum; + } + +}} /* namespace */ diff --git a/src/rgw/rgw_cksum_pipe.cc b/src/rgw/rgw_cksum_pipe.cc new file mode 100644 index 0000000000000..ea0389486bb0e --- /dev/null +++ b/src/rgw/rgw_cksum_pipe.cc @@ -0,0 +1,91 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright contributors to the Ceph project + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "rgw_cksum_pipe.h" +#include +#include +#include +#include +#include "rgw_common.h" +#include "common/dout.h" +#include "rgw_client_io.h" + +namespace rgw::putobj { + + RGWPutObj_Cksum::RGWPutObj_Cksum(rgw::sal::DataProcessor* next, + rgw::cksum::Type _typ, + cksum_hdr_t&& _hdr) + : Pipe(next), + _type(_typ), + dv(rgw::cksum::digest_factory(_type)), + _digest(cksum::get_digest(dv)), cksum_hdr(_hdr), + _state(State::DIGEST) + { + cksum::Digest* digest = cksum::get_digest(dv); + /* XXXX remove this */ + std::cout << "ctor had digest " << _digest + << " and got digest: " << digest + << std::endl; + } + + std::unique_ptr RGWPutObj_Cksum::Factory( + rgw::sal::DataProcessor* next, const RGWEnv& env) + { + /* look for matching headers */ + auto match = [&env] () -> const cksum_hdr_t { + /* If the individual checksum value you provide through + x-amz-checksum-algorithm doesn't match the checksum algorithm + you set through x-amz-sdk-checksum-algorithm, Amazon S3 ignores + any provided ChecksumAlgorithm parameter and uses the checksum + algorithm that matches the provided value in + x-amz-checksum-algorithm. + https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObject.html + */ + for (const auto hk : {"HTTP_X_AMZ_CHECKSUM_ALGORITHM", + "HTTP_X_AMZ_SDK_CHECKSUM_ALGORITHM"}) { + auto hv = env.get(hk); + if (hv) { + return cksum_hdr_t(hk, hv); + } + } + return cksum_hdr_t(nullptr, nullptr); + }; + + auto algo_header = match(); + if (algo_header.first) { + if (algo_header.second) { + auto cksum_type = cksum::parse_cksum_type(algo_header.second); + return std::make_unique( + next, + cksum_type, + std::move(algo_header)); + } + /* malformed checksum algorithm header(s) */ + throw rgw::io::Exception(EINVAL, std::system_category()); + } + /* no checksum header */ + return std::unique_ptr(); + } + + int RGWPutObj_Cksum::process(ceph::buffer::list &&data, uint64_t logical_offset) + { + for (const auto& ptr : data.buffers()) { + _digest->Update(reinterpret_cast(ptr.c_str()), + ptr.length()); + } + return 0; + } + +} // namespace rgw::putobj diff --git a/src/rgw/rgw_cksum_pipe.h b/src/rgw/rgw_cksum_pipe.h new file mode 100644 index 0000000000000..047fbcb4ddd6b --- /dev/null +++ b/src/rgw/rgw_cksum_pipe.h @@ -0,0 +1,93 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright contributors to the Ceph project + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#pragma once + +#include +#include +#include +#include "rgw_cksum.h" +#include "rgw_putobj.h" + +namespace rgw::putobj { + + namespace cksum = rgw::cksum; + using cksum_hdr_t = std::pair; + + // PutObj filter for streaming checksums + class RGWPutObj_Cksum : public rgw::putobj::Pipe { + + enum class State : uint16_t { + START, + DIGEST, + FINAL + }; + + cksum::Type _type; + cksum::DigestVariant dv; + cksum::Digest* _digest; + cksum::Cksum _cksum; + cksum_hdr_t cksum_hdr; + State _state; + + public: + + using VerifyResult = std::tuple; + + static std::unique_ptr Factory( + rgw::sal::DataProcessor* next, const RGWEnv&); + + RGWPutObj_Cksum(rgw::sal::DataProcessor* next, rgw::cksum::Type _type, + cksum_hdr_t&& _hdr); + RGWPutObj_Cksum(RGWPutObj_Cksum& rhs) = delete; + ~RGWPutObj_Cksum() {} + + cksum::Type type() { return _type; } + cksum::Digest* digest() const { return _digest; } + const cksum::Cksum& cksum() { return _cksum; }; + State state() const { return _state; } + + const cksum_hdr_t& header() const { + return cksum_hdr; + } + + const cksum::Cksum& finalize() { + _cksum = finalize_digest(_digest, _type); + _state = State::FINAL; + return _cksum; + } + + const char* expected(const RGWEnv& env) { + auto hk = fmt::format("HTTP_X_AMZ_CHECKSUM_{}", cksum_hdr.second); + auto hv = env.get(hk.c_str()); + return hv; + } + + VerifyResult verify(const RGWEnv& env) { + if (_state == State::DIGEST) [[likely]] { + (void) finalize(); + } + auto hv = expected(env); + auto cv = _cksum.to_armor(); + return VerifyResult(cksum_hdr.first && + hv && !std::strcmp(hv, cv.c_str()), + _cksum); + } + + int process(bufferlist &&data, uint64_t logical_offset) override; + + }; /* RGWPutObj_Cksum */ + +} // namespace rgw::putobj diff --git a/src/rgw/rgw_common.h b/src/rgw/rgw_common.h index 2002ae51ec9b9..929ebc60b9208 100644 --- a/src/rgw/rgw_common.h +++ b/src/rgw/rgw_common.h @@ -48,6 +48,7 @@ #include "include/rados/librados.hpp" #include "rgw_public_access.h" #include "rgw_sal_fwd.h" +#include "rgw_hex.h" namespace ceph { class Formatter; @@ -82,6 +83,7 @@ using ceph::crypto::MD5; #define RGW_ATTR_LC RGW_ATTR_PREFIX "lc" #define RGW_ATTR_CORS RGW_ATTR_PREFIX "cors" #define RGW_ATTR_ETAG RGW_ATTR_PREFIX "etag" +#define RGW_ATTR_CKSUM RGW_ATTR_PREFIX "cksum" #define RGW_ATTR_BUCKETS RGW_ATTR_PREFIX "buckets" #define RGW_ATTR_META_PREFIX RGW_ATTR_PREFIX RGW_AMZ_META_PREFIX #define RGW_ATTR_CONTENT_TYPE RGW_ATTR_PREFIX "content_type" @@ -160,6 +162,9 @@ using ceph::crypto::MD5; #define RGW_ATTR_UNIX_KEY1 RGW_ATTR_PREFIX "unix-key1" #define RGW_ATTR_UNIX1 RGW_ATTR_PREFIX "unix1" +/* Content Checksums */ +#define RGW_ATTR_AMZ_CKSUM RGW_ATTR_PREFIX "x-amz-content-checksum" + #define RGW_ATTR_CRYPT_PREFIX RGW_ATTR_PREFIX "crypt." #define RGW_ATTR_CRYPT_MODE RGW_ATTR_CRYPT_PREFIX "mode" #define RGW_ATTR_CRYPT_KEYMD5 RGW_ATTR_CRYPT_PREFIX "keymd5" @@ -1027,6 +1032,8 @@ enum RGWBucketFlags { class RGWSI_Zone; +#include "rgw_cksum.h" + struct RGWBucketInfo { rgw_bucket bucket; rgw_owner owner; @@ -1054,6 +1061,8 @@ struct RGWBucketInfo { std::map mdsearch_config; + rgw::cksum::Type cksum_type = rgw::cksum::Type::none; + // resharding cls_rgw_reshard_status reshard_status{cls_rgw_reshard_status::NOT_RESHARDING}; std::string new_bucket_instance_id; @@ -1064,7 +1073,6 @@ struct RGWBucketInfo { void encode(bufferlist& bl) const; void decode(bufferlist::const_iterator& bl); - void dump(Formatter *f) const; static void generate_test_instances(std::list& o); @@ -1550,61 +1558,6 @@ struct multipart_upload_info }; WRITE_CLASS_ENCODER(multipart_upload_info) -static inline void buf_to_hex(const unsigned char* const buf, - const size_t len, - char* const str) -{ - str[0] = '\0'; - for (size_t i = 0; i < len; i++) { - ::sprintf(&str[i*2], "%02x", static_cast(buf[i])); - } -} - -template static inline std::array -buf_to_hex(const std::array& buf) -{ - static_assert(N > 0, "The input array must be at least one element long"); - - std::array hex_dest; - buf_to_hex(buf.data(), N, hex_dest.data()); - return hex_dest; -} - -static inline int hexdigit(char c) -{ - if (c >= '0' && c <= '9') - return (c - '0'); - c = toupper(c); - if (c >= 'A' && c <= 'F') - return c - 'A' + 0xa; - return -EINVAL; -} - -static inline int hex_to_buf(const char *hex, char *buf, int len) -{ - int i = 0; - const char *p = hex; - while (*p) { - if (i >= len) - return -EINVAL; - buf[i] = 0; - int d = hexdigit(*p); - if (d < 0) - return d; - buf[i] = d << 4; - p++; - if (!*p) - return -EINVAL; - d = hexdigit(*p); - if (d < 0) - return d; - buf[i] += d; - i++; - p++; - } - return i; -} - static inline int rgw_str_to_bool(const char *s, int def_val) { if (!s) diff --git a/src/rgw/rgw_crc_digest.h b/src/rgw/rgw_crc_digest.h new file mode 100644 index 0000000000000..8e97df56b485a --- /dev/null +++ b/src/rgw/rgw_crc_digest.h @@ -0,0 +1,93 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2023 Red Hat, Inc + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#pragma once + +#include +#include +#include +#include +#include +#include "include/crc32c.h" +#include + +namespace rgw { namespace digest { + + /* crib impl of c++23 std::byteswap from + * https://en.cppreference.com/w/cpp/numeric/byteswap */ + template constexpr T byteswap(T value) noexcept { + static_assert(std::has_unique_object_representations_v, + "T may not have padding bits"); + auto value_representation = + std::bit_cast>(value); + std::ranges::reverse(value_representation); + return std::bit_cast(value_representation); + } /* byteswap */ + + /* impl. using boost::crc, as configured by imtzw */ + class Crc32 { + private: + using crc32_type = boost::crc_optimal< + 32, 0x04C11DB7, 0xFFFFFFFF, 0xFFFFFFFF, true, true>; + crc32_type crc; + + public: + static constexpr uint16_t digest_size = 4; + + Crc32() { Restart(); } + + void Restart() { crc.reset(); } + + void Update(const unsigned char *data, uint64_t len) { + crc.process_bytes(data, len); + } + + void Final(unsigned char* digest) { + /* XXX crc32 and cksfb utilities both treat the byteswapped result + * as canonical--possibly this needs to be omitted when BigEndian? */ + uint32_t final = crc(); + if constexpr (std::endian::native != std::endian::big) { + final = rgw::digest::byteswap(final); + } + memcpy((char*) digest, &final, sizeof(final)); + } + }; /* Crc32 */ + + /* use Ceph hw-specialized crc32c (0x1EDC6F41) */ + class Crc32c { + private: + uint32_t crc; + + public: + static constexpr uint16_t digest_size = 4; + static constexpr uint32_t initial_value = 0xffffffff; + + Crc32c() { Restart(); } + + void Restart() { crc = initial_value; } + + void Update(const unsigned char *data, uint64_t len) { + crc = ceph_crc32c(crc, data, len); + } + + void Final(unsigned char* digest) { + crc = crc ^ 0xffffffff; + if constexpr (std::endian::native != std::endian::big) { + crc = rgw::digest::byteswap(crc); + } + memcpy((char*) digest, &crc, sizeof(crc)); + } + }; /* Crc32c */ +}} /* namespace */ diff --git a/src/rgw/rgw_file_int.h b/src/rgw/rgw_file_int.h index d61ef33c8a880..0a1db64520722 100644 --- a/src/rgw/rgw_file_int.h +++ b/src/rgw/rgw_file_int.h @@ -37,6 +37,7 @@ #include "rgw_aio_throttle.h" #include "rgw_compression.h" #include "rgw_perf_counters.h" +#include "rgw_cksum.h" /* XXX diff --git a/src/rgw/rgw_hex.h b/src/rgw/rgw_hex.h new file mode 100644 index 0000000000000..ceb29ff47f67b --- /dev/null +++ b/src/rgw/rgw_hex.h @@ -0,0 +1,78 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2023 Red Hat, Inc + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +static inline void buf_to_hex(const unsigned char* const buf, + const size_t len, + char* const str) +{ + str[0] = '\0'; + for (size_t i = 0; i < len; i++) { + ::sprintf(&str[i*2], "%02x", static_cast(buf[i])); + } +} + +template static inline std::array +buf_to_hex(const std::array& buf) +{ + static_assert(N > 0, "The input array must be at least one element long"); + + std::array hex_dest; + buf_to_hex(buf.data(), N, hex_dest.data()); + return hex_dest; +} + +static inline int hexdigit(char c) +{ + if (c >= '0' && c <= '9') + return (c - '0'); + c = toupper(c); + if (c >= 'A' && c <= 'F') + return c - 'A' + 0xa; + return -EINVAL; +} + +static inline int hex_to_buf(const char *hex, char *buf, int len) +{ + int i = 0; + const char *p = hex; + while (*p) { + if (i >= len) + return -EINVAL; + buf[i] = 0; + int d = hexdigit(*p); + if (d < 0) + return d; + buf[i] = d << 4; + p++; + if (!*p) + return -EINVAL; + d = hexdigit(*p); + if (d < 0) + return d; + buf[i] += d; + i++; + p++; + } + return i; +} /* hex_to_buf */ diff --git a/src/rgw/rgw_op.cc b/src/rgw/rgw_op.cc index 71936702e93f5..2f8e2abf916eb 100644 --- a/src/rgw/rgw_op.cc +++ b/src/rgw/rgw_op.cc @@ -24,6 +24,7 @@ #include "common/ceph_json.h" #include "common/static_ptr.h" #include "common/perf_counters_key.h" +#include "rgw_common.h" #include "rgw_tracer.h" #include "rgw_rados.h" @@ -56,6 +57,7 @@ #include "rgw_sal.h" #include "rgw_sal_rados.h" #include "rgw_torrent.h" +#include "rgw_cksum_pipe.h" #include "rgw_lua_data_filter.h" #include "rgw_lua.h" #include "rgw_iam_managed_policy.h" @@ -4322,9 +4324,13 @@ void RGWPutObj::execute(optional_yield y) std::optional compressor; std::optional torrent; + /* XXX Cksum::DigestVariant was designed to avoid allocation, but going with + * factory method to avoid issues with move assignment when wrapped */ + std::unique_ptr cksum_filter; std::unique_ptr encrypt; std::unique_ptr run_lua; + /* data processor filters--last filter runs first */ if (!append) { // compression and encryption only apply to full object uploads op_ret = get_encrypt_filter(&encrypt, filter); if (op_ret < 0) { @@ -4352,7 +4358,8 @@ void RGWPutObj::execute(optional_yield y) if (torrent = get_torrent_filter(filter); torrent) { filter = &*torrent; } - // run lua script before data is compressed and encrypted - last filter runs first + /* checksum lua filters must run before compression and encryption + * filters, checksum first (probably?) */ op_ret = get_lua_filter(&run_lua, filter); if (op_ret < 0) { return; @@ -4360,7 +4367,18 @@ void RGWPutObj::execute(optional_yield y) if (run_lua) { filter = &*run_lua; } - } + /* optional streaming checksum */ + try { + cksum_filter = + rgw::putobj::RGWPutObj_Cksum::Factory(filter, *s->info.env); + } catch (const rgw::io::Exception& e) { + op_ret = e.code().value(); + return; + } + if (cksum_filter) { + filter = &*cksum_filter; + } + } /* !append */ tracepoint(rgw_op, before_data_transfer, s->req_id.c_str()); do { bufferlist data; @@ -4500,6 +4518,37 @@ void RGWPutObj::execute(optional_yield y) bl.append(etag.c_str(), etag.size()); emplace_attr(RGW_ATTR_ETAG, std::move(bl)); + if (cksum_filter) { + const auto& hdr = cksum_filter->header(); + auto cksum_verify = + cksum_filter->verify(*s->info.env); // valid or no supplied cksum + cksum = get<1>(cksum_verify); + if (std::get<0>(cksum_verify)) { + buffer::list cksum_bl; + ldpp_dout(this, 16) + << fmt::format("{} checksum verified ", hdr.second) + << fmt::format("\n\tcomputed={} == \n\texpected= {}", + cksum->to_armor(), + cksum_filter->expected(*s->info.env)) + << dendl; + cksum->encode(cksum_bl); + emplace_attr(RGW_ATTR_CKSUM, std::move(cksum_bl)); + } else { + /* content checksum mismatch */ + auto computed_ck = cksum->to_armor(); + auto expected_ck = cksum_filter->expected(*s->info.env); + + ldpp_dout(this, 4) + << fmt::format("{} content checksum mismatch", hdr.second) + << fmt::format("\n\tcalculated={} != \n\texpected={}", + computed_ck, + (!!expected_ck) ? expected_ck : "(checksum unavailable)") + << dendl; + op_ret = -ERR_INVALID_REQUEST; + return; + } + } + populate_with_generic_attrs(s, attrs); op_ret = rgw_get_request_metadata(this, s->cct, s->info, attrs); if (op_ret < 0) { diff --git a/src/rgw/rgw_op.h b/src/rgw/rgw_op.h index afd5f8a685b71..111fef519ace9 100644 --- a/src/rgw/rgw_op.h +++ b/src/rgw/rgw_op.h @@ -34,6 +34,7 @@ #include "common/ceph_json.h" #include "common/ceph_time.h" +#include "rgw_cksum.h" #include "rgw_common.h" #include "rgw_dmclock.h" #include "rgw_sal.h" @@ -1253,6 +1254,9 @@ protected: RGWObjectRetention *obj_retention; RGWObjectLegalHold *obj_legal_hold; + // optional cksum + boost::optional cksum; + public: RGWPutObj() : ofs(0), supplied_md5_b64(NULL), @@ -2152,7 +2156,7 @@ inline int rgw_get_request_metadata(const DoutPrefixProvider *dpp, "x-amz-server-side-encryption-customer-algorithm", "x-amz-server-side-encryption-customer-key", "x-amz-server-side-encryption-customer-key-md5", - "x-amz-storage-class" + "x-amz-storage-class", }; size_t valid_meta_count = 0; diff --git a/src/rgw/rgw_rest_s3.cc b/src/rgw/rgw_rest_s3.cc index e7316485a972c..3dcedb1a28bc8 100644 --- a/src/rgw/rgw_rest_s3.cc +++ b/src/rgw/rgw_rest_s3.cc @@ -306,6 +306,12 @@ int RGWGetObj_ObjStore_S3::get_params(optional_yield y) dst_zone_trace = s->info.args.get(RGW_SYS_PARAM_PREFIX "if-not-replicated-to"); get_torrent = s->info.args.exists("torrent"); + auto checksum_mode_hdr = + s->info.env->get_optional("HTTP_X_AMZ_CHECKSUM_MODE"); + checksum_mode = + (checksum_mode_hdr && + boost::algorithm::iequals(*checksum_mode_hdr, "enabled")); + // optional part number auto optstr = s->info.args.get_optional("partNumber"); if (optstr) { @@ -491,6 +497,20 @@ int RGWGetObj_ObjStore_S3::send_response_data(bufferlist& bl, off_t bl_ofs, } } + if (checksum_mode) { + if (auto i = attrs.find(RGW_ATTR_CKSUM); i != attrs.end()) { + try { + rgw::cksum::Cksum cksum; + decode(cksum, i->second); + dump_header(s, cksum.header_name(), cksum.to_armor()); + } catch (buffer::error& err) { + ldpp_dout(this, 0) << "ERROR: failed to decode rgw::cksum::Cksum" + << dendl; + /* XXX return error here? the user might prefer data we have */ + } + } + } /* checksum_mode */ + for (struct response_attr_param *p = resp_attr_params; p->param; p++) { bool exists; string val = s->info.args.get(p->param, &exists); @@ -2713,12 +2733,18 @@ void RGWPutObj_ObjStore_S3::send_response() dump_content_length(s, 0); dump_header_if_nonempty(s, "x-amz-version-id", version_id); dump_header_if_nonempty(s, "x-amz-expiration", expires); + if (cksum) { + dump_header(s, cksum->header_name(), cksum->to_armor()); + } for (auto &it : crypt_http_responses) dump_header(s, it.first, it.second); } else { dump_errno(s); dump_header_if_nonempty(s, "x-amz-version-id", version_id); dump_header_if_nonempty(s, "x-amz-expiration", expires); + if (cksum) { + dump_header(s, cksum->header_name(), cksum->to_armor()); + } end_header(s, this, to_mime_type(s->format)); dump_start(s); struct tm tmp; diff --git a/src/rgw/rgw_rest_s3.h b/src/rgw/rgw_rest_s3.h index dba3247174505..7f7b91df37db0 100644 --- a/src/rgw/rgw_rest_s3.h +++ b/src/rgw/rgw_rest_s3.h @@ -42,6 +42,7 @@ protected: // Serving a custom error page from an object is really a 200 response with // just the status line altered. int custom_http_ret = 0; + bool checksum_mode{false}; std::map crypt_http_responses; int override_range_hdr(const rgw::auth::StrategyRegistry& auth_registry, optional_yield y); public: diff --git a/src/rgw/rgw_xxh_digest.h b/src/rgw/rgw_xxh_digest.h new file mode 100644 index 0000000000000..fe78636fc51b5 --- /dev/null +++ b/src/rgw/rgw_xxh_digest.h @@ -0,0 +1,53 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2023 Red Hat, Inc + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#pragma once + +#include +#include +#include "rgw_crc_digest.h" + +#define XXH_INLINE_ALL 1 /* required for streaming variants */ +#include "xxhash.h" + +namespace rgw { namespace digest { + + class XXH3 { + private: + XXH3_state_t s; + + public: + static constexpr uint16_t digest_size = 8; + + XXH3() { + XXH3_INITSTATE(&s); + Restart(); + } + + void Restart() { XXH3_64bits_reset(&s); } + + void Update(const unsigned char *data, uint64_t len) { + XXH3_64bits_update(&s, data, len); + } + + void Final(unsigned char* digest) { + XXH64_hash_t final = XXH3_64bits_digest(&s); + if constexpr (std::endian::native != std::endian::big) { + final = rgw::digest::byteswap(final); + } + memcpy((char*) digest, &final, sizeof(final)); + } + }; /* XXH3 */ +}} /* namespace */ diff --git a/src/test/rgw/CMakeLists.txt b/src/test/rgw/CMakeLists.txt index c96e9012790f6..1b3c136349823 100644 --- a/src/test/rgw/CMakeLists.txt +++ b/src/test/rgw/CMakeLists.txt @@ -268,6 +268,14 @@ target_include_directories(unittest_rgw_lc target_link_libraries(unittest_rgw_lc rgw_common ${rgw_libs} ${EXPAT_LIBRARIES}) +# unittest_rgw_cksum +add_executable(unittest_rgw_cksum test_rgw_cksum.cc) +add_ceph_unittest(unittest_rgw_cksum) +target_include_directories(unittest_rgw_cksum + SYSTEM PRIVATE "${CMAKE_SOURCE_DIR}/src/rgw") +target_link_libraries(unittest_rgw_cksum + rgw_common ${rgw_libs}) + # unittest_rgw_arn add_executable(unittest_rgw_arn test_rgw_arn.cc) add_ceph_unittest(unittest_rgw_arn) diff --git a/src/test/rgw/test_rgw_cksum.cc b/src/test/rgw/test_rgw_cksum.cc new file mode 100644 index 0000000000000..c4655ba3a2447 --- /dev/null +++ b/src/test/rgw/test_rgw_cksum.cc @@ -0,0 +1,350 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2016 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include +#include +#include +#include + +#include "gtest/gtest.h" + +#include "common/config.h" +#include "common/ceph_argparse.h" +#include "common/debug.h" +#include "rgw/rgw_cksum.h" +#include +#include "rgw/rgw_hex.h" + +#define dout_subsys ceph_subsys_rgw + +namespace { + + using namespace rgw; + using namespace rgw::cksum; + + bool verbose = false; + + cksum::Type t1 = cksum::Type::blake3; + cksum::Type t2 = cksum::Type::sha1; + cksum::Type t3 = cksum::Type::sha256; + cksum::Type t4 = cksum::Type::sha512; + cksum::Type t5 = cksum::Type::crc32; + cksum::Type t6 = cksum::Type::crc32c; + cksum::Type t7 = cksum::Type::xxh3; + + std::string lorem = + "Lorem ipsum dolor sit amet"; + + std::string dolor = + R"(Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.)"; + +TEST(RGWCksum, Output) +{ + auto o_mode = std::ios::out|std::ios::trunc; + std::ofstream of; + of.open("/tmp/lorem", o_mode); + of << lorem; + of.close(); + + of.open("/tmp/dolor", o_mode); + of << dolor; + of.close(); +} + +TEST(RGWCksum, DigestCRC32) +{ + auto t = cksum::Type::crc32; + DigestVariant dv = rgw::cksum::digest_factory(t); + Digest* digest = get_digest(dv); + + ASSERT_NE(digest, nullptr); + + digest->Update((const unsigned char *)dolor.c_str(), dolor.length()); + + auto cksum = rgw::cksum::finalize_digest(digest, t); + + /* compare w/known value https://crccalc.com/ */ + ASSERT_EQ(cksum.hex(), "98b2c5bd"); + /* compare w/known value https://www.base64encode.org/ */ + ASSERT_EQ(cksum.to_base64(), "OThiMmM1YmQ="); + /* compare with aws-sdk-cpp encoded value */ + ASSERT_EQ(cksum.to_armor(), "mLLFvQ=="); +} + +TEST(RGWCksum, DigestCRC32c) +{ + auto t = cksum::Type::crc32c; + DigestVariant dv = rgw::cksum::digest_factory(t); + Digest* digest = get_digest(dv); + + ASSERT_NE(digest, nullptr); + + digest->Update((const unsigned char *)dolor.c_str(), dolor.length()); + + auto cksum = rgw::cksum::finalize_digest(digest, t); + /* compare w/known value https://crccalc.com/ */ + ASSERT_EQ(cksum.hex(), "95dc2e4b"); + /* compare w/known value https://www.base64encode.org/ */ + ASSERT_EQ(cksum.to_base64(), "OTVkYzJlNGI="); + /* compare with aws-sdk-cpp encoded value */ + ASSERT_EQ(cksum.to_armor(), "ldwuSw=="); +} + +TEST(RGWCksum, DigestXXH3) +{ + auto t = cksum::Type::xxh3; + DigestVariant dv = rgw::cksum::digest_factory(t); + Digest* digest = get_digest(dv); + + ASSERT_NE(digest, nullptr); + + digest->Update((const unsigned char *)dolor.c_str(), dolor.length()); + + auto cksum = rgw::cksum::finalize_digest(digest, t); + /* compare w/known value xxhsum -H3 */ + ASSERT_EQ(cksum.hex(), "5a164e0145351d01"); + /* compare w/known value https://www.base64encode.org/ */ + ASSERT_EQ(cksum.to_base64(), "NWExNjRlMDE0NTM1MWQwMQ=="); +} + +TEST(RGWCksum, DigestSha1) +{ + auto t = cksum::Type::sha1; + for (const auto input_str : {&lorem, &dolor}) { + DigestVariant dv = rgw::cksum::digest_factory(t); + Digest *digest = get_digest(dv); + + ASSERT_NE(digest, nullptr); + + digest->Update((const unsigned char *)input_str->c_str(), + input_str->length()); + + /* try by hand */ + unsigned char sha1_hash[SHA_DIGEST_LENGTH]; // == 20 + ::SHA1((unsigned char *)input_str->c_str(), input_str->length(), sha1_hash); + // do some stuff with the hash + + char buf[20 * 2 + 1]; + memset(buf, 0, sizeof(buf)); + buf_to_hex(sha1_hash, SHA_DIGEST_LENGTH, buf); + if (verbose) { + std::cout << "byhand sha1 " << buf << std::endl; + } + + auto cksum = rgw::cksum::finalize_digest(digest, t); + if (verbose) { + std::cout << "computed sha1: " << cksum.hex() << std::endl; + } + + /* check match with direct OpenSSL mech */ + ASSERT_TRUE(memcmp(buf, cksum.hex().c_str(), + cksum.hex().length()) == 0); + + if (input_str == &lorem) { + /* compare w/known value, openssl sha1 */ + ASSERT_EQ(cksum.hex(), "38f00f8738e241daea6f37f6f55ae8414d7b0219"); + /* compare w/known value https://www.base64encode.org/ */ + ASSERT_EQ(cksum.to_base64(), + "MzhmMDBmODczOGUyNDFkYWVhNmYzN2Y2ZjU1YWU4NDE0ZDdiMDIxOQ=="); + } else { // &dolor + /* compare w/known value, openssl sha1 */ + ASSERT_EQ(cksum.hex(), "cd36b370758a259b34845084a6cc38473cb95e27"); + /* compare w/known value https://www.base64encode.org/ */ + ASSERT_EQ(cksum.to_base64(), + "Y2QzNmIzNzA3NThhMjU5YjM0ODQ1MDg0YTZjYzM4NDczY2I5NWUyNw=="); + /* compare with aws-sdk-cpp encoded value */ + ASSERT_EQ(cksum.to_armor(), "zTazcHWKJZs0hFCEpsw4Rzy5Xic="); + } + } +} + +TEST(RGWCksum, DigestSha256) +{ + auto t = cksum::Type::sha256; + for (const auto input_str : {&lorem, &dolor}) { + DigestVariant dv = rgw::cksum::digest_factory(t); + Digest *digest = get_digest(dv); + + ASSERT_NE(digest, nullptr); + + digest->Update((const unsigned char *)input_str->c_str(), + input_str->length()); + + auto cksum = rgw::cksum::finalize_digest(digest, t); + if (verbose) { + std::cout << "computed sha256: " << cksum.hex() << std::endl; + } + + if (input_str == &lorem) { + /* compare w/known value, openssl sha1 */ + ASSERT_EQ(cksum.hex(), "16aba5393ad72c0041f5600ad3c2c52ec437a2f0c7fc08fadfc3c0fe9641d7a3"); + /* compare w/known value https://www.base64encode.org/ */ + ASSERT_EQ(cksum.to_base64(), + "MTZhYmE1MzkzYWQ3MmMwMDQxZjU2MDBhZDNjMmM1MmVjNDM3YTJmMGM3ZmMwOGZhZGZjM2MwZmU5NjQxZDdhMw=="); + } else { // &dolor + /* compare w/known value, openssl sha1 */ + ASSERT_EQ(cksum.hex(), "2d8c2f6d978ca21712b5f6de36c9d31fa8e96a4fa5d8ff8b0188dfb9e7c171bb"); + /* compare w/known value https://www.base64encode.org/ */ + ASSERT_EQ(cksum.to_base64(), + "MmQ4YzJmNmQ5NzhjYTIxNzEyYjVmNmRlMzZjOWQzMWZhOGU5NmE0ZmE1ZDhmZjhiMDE4OGRmYjllN2MxNzFiYg=="); + /* compare with aws-sdk-cpp encoded value */ + ASSERT_EQ(cksum.to_armor(), "LYwvbZeMohcStfbeNsnTH6jpak+l2P+LAYjfuefBcbs="); + } + } +} + +TEST(RGWCksum, DigestSha512) +{ + auto t = cksum::Type::sha512; + for (const auto input_str : {&lorem, &dolor}) { + DigestVariant dv = rgw::cksum::digest_factory(t); + Digest *digest = get_digest(dv); + + ASSERT_NE(digest, nullptr); + + digest->Update((const unsigned char *)input_str->c_str(), + input_str->length()); + + auto cksum = rgw::cksum::finalize_digest(digest, t); + + if (input_str == &lorem) { + /* compare w/known value, openssl sha1 */ + ASSERT_EQ(cksum.hex(), "b1f4aaa6b51c19ffbe4b1b6fa107be09c8acafd7c768106a3faf475b1e27a940d3c075fda671eadf46c68f93d7eabcf604bcbf7055da0dc4eae6743607a2fc3f"); + /* compare w/known value https://www.base64encode.org/ */ + ASSERT_EQ(cksum.to_base64(), + "YjFmNGFhYTZiNTFjMTlmZmJlNGIxYjZmYTEwN2JlMDljOGFjYWZkN2M3NjgxMDZhM2ZhZjQ3NWIxZTI3YTk0MGQzYzA3NWZkYTY3MWVhZGY0NmM2OGY5M2Q3ZWFiY2Y2MDRiY2JmNzA1NWRhMGRjNGVhZTY3NDM2MDdhMmZjM2Y="); + } else { // &dolor + /* compare w/known value, openssl sha1 */ + ASSERT_EQ(cksum.hex(), "8ba760cac29cb2b2ce66858ead169174057aa1298ccd581514e6db6dee3285280ee6e3a54c9319071dc8165ff061d77783100d449c937ff1fb4cd1bb516a69b9"); + /* compare w/known value https://www.base64encode.org/ */ + ASSERT_EQ(cksum.to_base64(), + "OGJhNzYwY2FjMjljYjJiMmNlNjY4NThlYWQxNjkxNzQwNTdhYTEyOThjY2Q1ODE1MTRlNmRiNmRlZTMyODUyODBlZTZlM2E1NGM5MzE5MDcxZGM4MTY1ZmYwNjFkNzc3ODMxMDBkNDQ5YzkzN2ZmMWZiNGNkMWJiNTE2YTY5Yjk="); + } + } +} + +TEST(RGWCksum, DigestBlake3) +{ + auto t = cksum::Type::blake3; + for (const auto input_str : {&lorem, &dolor}) { + DigestVariant dv = rgw::cksum::digest_factory(t); + Digest *digest = get_digest(dv); + + ASSERT_NE(digest, nullptr); + + digest->Update((const unsigned char *)input_str->c_str(), + input_str->length()); + + auto cksum = rgw::cksum::finalize_digest(digest, t); + + if (input_str == &lorem) { + /* compare w/known value, b3sum */ + ASSERT_EQ(cksum.hex(), "f1da5f4e2bd5669307bcdb2e223dad05af7425207cbee59e73526235f50f76ad"); + /* compare w/known value https://www.base64encode.org/ */ + ASSERT_EQ(cksum.to_base64(), + "ZjFkYTVmNGUyYmQ1NjY5MzA3YmNkYjJlMjIzZGFkMDVhZjc0MjUyMDdjYmVlNTllNzM1MjYyMzVmNTBmNzZhZA=="); + } else { // &dolor + /* compare w/known value, b3sum */ + ASSERT_EQ(cksum.hex(), "71fe44583a6268b56139599c293aeb854e5c5a9908eca00105d81ad5e22b7bb6"); + /* compare w/known value https://www.base64encode.org/ */ + ASSERT_EQ(cksum.to_base64(), + "NzFmZTQ0NTgzYTYyNjhiNTYxMzk1OTljMjkzYWViODU0ZTVjNWE5OTA4ZWNhMDAxMDVkODFhZDVlMjJiN2JiNg=="); + } + } +} /* blake3 */ + +TEST(RGWCksum, DigestSTR) +{ + for (auto t : {t1, t2, t3, t4, t5, t6, t7}) { + DigestVariant dv = rgw::cksum::digest_factory(t); + Digest* digest = get_digest(dv); + + ASSERT_NE(digest, nullptr); + + digest->Update((const unsigned char *)dolor.c_str(), dolor.length()); + auto cksum = rgw::cksum::finalize_digest(digest, t); + if (verbose) { + std::cout << "type: " << to_string(t) + << " digest: " << cksum.to_string() + << std::endl; + } + } +} + +TEST(RGWCksum, DigestBL) +{ + std::string lacrimae = dolor + dolor; + + ceph::buffer::list dolor_bl; + for ([[maybe_unused]] const auto& ix : {1, 2}) { + dolor_bl.push_back( + buffer::create_static(dolor.length(), + const_cast(dolor.data()))); + } + + for (auto t : {t1, t2, t3, t4, t5, t6, t7}) { + DigestVariant dv1 = rgw::cksum::digest_factory(t); + Digest* digest1 = get_digest(dv1); + ASSERT_NE(digest1, nullptr); + + DigestVariant dv2 = rgw::cksum::digest_factory(t); + Digest* digest2 = get_digest(dv2); + ASSERT_NE(digest2, nullptr); + + digest1->Update((const unsigned char *)lacrimae.c_str(), + lacrimae.length()); + digest2->Update(dolor_bl); + + auto cksum1 = rgw::cksum::finalize_digest(digest1, t); + auto cksum2 = rgw::cksum::finalize_digest(digest2, t); + + ASSERT_EQ(cksum1.to_string(), cksum2.to_string()); + + /* serialization */ + buffer::list bl_out; + encode(cksum1, bl_out); + + /* unserialization */ + buffer::list bl_in; + bl_in.append(bl_out.c_str(), bl_out.length()); + + rgw::cksum::Cksum cksum3; + auto iter = bl_in.cbegin(); + decode(cksum3, iter); + + /* all that way for a Strohs */ + ASSERT_EQ(cksum1.to_string(), cksum3.to_string()); + } /* for t1, ... */ +} +} /* namespace */ + +int main(int argc, char *argv[]) +{ + auto args = argv_to_vec(argc, argv); + env_to_vec(args); + + std::string val; + for (auto arg_iter = args.begin(); arg_iter != args.end();) { + if (ceph_argparse_flag(args, arg_iter, "--verbose", + (char*) nullptr)) { + verbose = true; + } else { + ++arg_iter; + } + } + + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/src/test/test_rgw_admin_log.cc b/src/test/test_rgw_admin_log.cc index 7dd7604db1a1f..0759b0c80ce25 100644 --- a/src/test/test_rgw_admin_log.cc +++ b/src/test/test_rgw_admin_log.cc @@ -30,6 +30,7 @@ extern "C"{ #include "common/ceph_json.h" #include "common/code_environment.h" #include "common/ceph_argparse.h" +#include "common/armor.h" #include "common/Finisher.h" #include "global/global_init.h" #include "rgw_common.h" @@ -55,8 +56,6 @@ using namespace std; static string uid = "ceph"; static string display_name = "CEPH"; -extern "C" int ceph_armor(char *dst, const char *dst_end, - const char *src, const char *end); static void print_usage(char *exec){ cout << "Usage: " << exec << " \n"; cout << "Options:\n" diff --git a/src/test/test_rgw_admin_meta.cc b/src/test/test_rgw_admin_meta.cc index 00c43d10b5495..962c08f369c1a 100644 --- a/src/test/test_rgw_admin_meta.cc +++ b/src/test/test_rgw_admin_meta.cc @@ -29,6 +29,7 @@ extern "C"{ #include "common/ceph_json.h" #include "common/code_environment.h" #include "common/ceph_argparse.h" +#include "common/armor.h" #include "common/Finisher.h" #include "global/global_init.h" #include "rgw_common.h" @@ -47,8 +48,6 @@ static string uid = CEPH_UID; static string display_name = "CEPH"; static string meta_caps = "metadata"; -extern "C" int ceph_armor(char *dst, const char *dst_end, - const char *src, const char *end); static void print_usage(char *exec){ cout << "Usage: " << exec << " \n"; cout << "Options:\n" -- 2.39.5