From: Matt Benjamin Date: Fri, 21 Feb 2025 19:43:53 +0000 (-0500) Subject: rgw:cksum: implement crc64nvme and combined 32- and 64-bit CRCs X-Git-Tag: testing/wip-vshankar-testing-20250407.170244-debug~72^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=ee03b5054147afcf6f174efe71e976394ede7715;p=ceph-ci.git rgw:cksum: implement crc64nvme and combined 32- and 64-bit CRCs * internally compensate for at-rest byteswapped crc64 representation (e.g., before combine step) * generalize Cksum crc api for 32bit and 64bit crcs, other cleanup * prototype abstract cksum::Combiner workflow * add support for forward and backward handling of composite vs full object checksums by marking the composites and the update flag day * clean up checksum formatting and checksum type reporting * add unit tests for Combiner interface * validate and track requested checksum type (i.e., composite or full), plus unit test fixture for combinations of full matrix of cksum types * doh. GET/HEAD checksums are in headers * add crcany license to COPYING * return ChecksumType as header in GET/HEAD Found by Casey in review * avoid fmt of char* null when no checksum header supplied A cksum_hdr_t(nullptr, nullptr) results in this case, and is intended, but its components obviously can't be presented to std::format unless cast to a safe pointer type. * fail checksum mismatch with BadDigest When uploading an S3 object with an invalid checksum, the return code should be BadDigest to mirror more closely the AWS S3 implementation. See: https://docs.aws.amazon.com/AmazonS3/latest/userguide/checking-object-integrity.html Fixes: https://tracker.ceph.com/issues/70614 Reported by Alex Wojno * fix comparison and display of composite checksums A string comparision bounds error and a bitmask comparison error are fixed. * fix build on centos9 On gcc(?13?) we can't declare rgw::cksum::FLAG_NONE and also rgw::cksum::Cksum::FlAG_NONE. SAD!! * include invariantly * fix checksum type return from complete-multipart This one is in the XML response * aieee, don't leak Combiners Use unique_ptr to polymorphic type...correctly. Signed-off-by: Matt Benjamin --- diff --git a/COPYING b/COPYING index 8bc6b59b1c2..3b758d58155 100644 --- a/COPYING +++ b/COPYING @@ -224,3 +224,26 @@ License: GNU Affero General Public License, Version 3 Files: src/common/*s390x* Copyright: 2024 IBM License: Apache License, version 2.0 + +Files: src/rgw/madler/* +License: + Copyright (C) 2014-2025 Mark Adler + + This software is provided 'as-is', without any express or implied warranty. + In no event will the authors be held liable for any damages arising from the + use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not claim + that you wrote the original software. If you use this software in a + product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Mark Adler + madler@alumni.caltech.edu diff --git a/src/rgw/CMakeLists.txt b/src/rgw/CMakeLists.txt index a4f154b6190..0c95fd7fbb9 100644 --- a/src/rgw/CMakeLists.txt +++ b/src/rgw/CMakeLists.txt @@ -57,6 +57,9 @@ set(librgw_common_srcs services/svc_zone.cc services/svc_zone_utils.cc spdk/crc64.c + madler/crc64nvme.c + madler/crc32iso_hdlc.c + madler/crc32iscsi.c rgw_account.cc rgw_acl.cc rgw_acl_s3.cc @@ -71,6 +74,7 @@ set(librgw_common_srcs rgw_bucket.cc rgw_bucket_layout.cc rgw_cache.cc + rgw_cksum.cc rgw_cksum_pipe.cc rgw_common.cc rgw_compression.cc diff --git a/src/rgw/driver/rados/rgw_sal_rados.cc b/src/rgw/driver/rados/rgw_sal_rados.cc index 0578d2e5958..cd459360337 100644 --- a/src/rgw/driver/rados/rgw_sal_rados.cc +++ b/src/rgw/driver/rados/rgw_sal_rados.cc @@ -3841,6 +3841,7 @@ int RadosMultipartUpload::init(const DoutPrefixProvider *dpp, optional_yield y, multipart_upload_info upload_info; upload_info.dest_placement = dest_placement; upload_info.cksum_type = cksum_type; + upload_info.cksum_flags = cksum_flags; if (obj_legal_hold) { upload_info.obj_legal_hold_exist = true; @@ -4256,6 +4257,7 @@ int RadosMultipartUpload::get_info(const DoutPrefixProvider *dpp, optional_yield return -EIO; } cksum_type = upload_info.cksum_type; + cksum_flags = upload_info.cksum_flags; placement = upload_info.dest_placement; upload_information = upload_info; *rule = &placement; diff --git a/src/rgw/rgw_cksum.cc b/src/rgw/rgw_cksum.cc new file mode 100644 index 00000000000..e6e4c5b5635 --- /dev/null +++ b/src/rgw/rgw_cksum.cc @@ -0,0 +1,157 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright contributors to the Ceph project + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "rgw_cksum.h" +#include "rgw_cksum_digest.h" +#include +#include +#include "rgw_crc_digest.h" + +extern "C" { +#include "madler/crc64nvme.h" +#include "madler/crc32iso_hdlc.h" +#include "madler/crc32iscsi.h" +#include "spdk/crc64.h" +} // extern "C" + +namespace rgw::cksum { + + std::optional + combine_crc_cksum(const Cksum& ck1, const Cksum& ck2, uintmax_t len1) + { + std::optional ck3; + if ((ck1.type != ck2.type) || + !ck1.crc()) { + goto out; + } + + switch(ck1.type) { + case cksum::Type::crc64nvme: + { + /* due to AWS (and other) convention, the at-rest + * digest is byteswapped (on LE?); restore the + * defined byte order before combining */ + auto cck1 = + rgw::digest::byteswap(std::get(*ck1.get_crc())); + auto cck2 = + rgw::digest::byteswap(std::get(*ck2.get_crc())); + /* madler crcany */ + auto cck3 = crc64nvme_comb(cck1, cck2, len1); + /* and byteswap */ + cck3 = rgw::digest::byteswap(cck3); + /* convert to a Cksum, no ascii armor */ + ck3 = Cksum(ck1.type, (char*) &cck3, Cksum::CtorStyle::raw); + } + break; + case cksum::Type::crc32: + case cksum::Type::crc32c: + { + uint32_t cck3; + auto cck1 = + rgw::digest::byteswap(std::get(*ck1.get_crc())); + auto cck2 = + rgw::digest::byteswap(std::get(*ck2.get_crc())); + /* madler crcany */ + switch (ck1.type) { + case cksum::Type::crc32: + cck3 = crc32iso_hdlc_comb(cck1, cck2, len1); + break; + case cksum::Type::crc32c: + cck3 = crc32iscsi_comb(cck1, cck2, len1); + break; + default: + break; + } + /* and byteswap */ + cck3 = rgw::digest::byteswap(cck3); + /* convert to a Cksum, no ascii armor */ + ck3 = Cksum(ck1.type, (char*) &cck3, Cksum::CtorStyle::raw); + } + break; + default: + break; + }; + + out: + return ck3; + } + + /* the checksum of the final object is a checksum (of the same type, + * presumably) of the concatenated checksum bytes of the parts, plus + * "-. See + * https://docs.aws.amazon.com/AmazonS3/latest/userguide/checking-object-integrity.html#large-object-checksums + */ + class DigestCombiner : public Combiner + { + DigestVariant dv; + Digest* digest{nullptr}; + + public: + DigestCombiner(cksum::Type t) + : Combiner(t), + dv(digest_factory(t)), + digest(get_digest(dv)) + {} + virtual void append(const Cksum& cksum, uint64_t ignored) { + auto ckr = cksum.raw(); + digest->Update((unsigned char *)ckr.data(), ckr.length()); + } + virtual Cksum final() { + auto cksum = finalize_digest(digest, get_type()); + cksum.flags |= Cksum::COMPOSITE_MASK; + return cksum; + } + }; /* Digest */ + + /* the checksum of the final object is an "ordinary" (full object) checksum, + * synthesized from the CRCs of the component checksums + */ + class CRCCombiner : public Combiner + { + Cksum cksum; + bool once{false}; + public: + CRCCombiner(cksum::Type t) + : Combiner(t) + {} + virtual void append(const Cksum& rhs, uint64_t part_size) { + if (! once) { + // save the first part checksum + cksum = rhs; + once = true; + } else { + // combine the next partial checksum into cksum + cksum = *combine_crc_cksum(cksum, rhs, part_size); + } + } + virtual Cksum final() { + cksum.flags |= Cksum::FULL_OBJECT_MASK; + return cksum; + } + }; /* CRCCombine */ + + std::unique_ptr CombinerFactory(cksum::Type t, uint16_t flags) + { + switch(t) { + case cksum::Type::crc32: + case cksum::Type::crc32c: + case cksum::Type::crc64nvme: + return std::unique_ptr(new CRCCombiner(t)); + default: + return std::unique_ptr(new DigestCombiner(t)); + }; + } + +} // namespace rgw::cksum diff --git a/src/rgw/rgw_cksum.h b/src/rgw/rgw_cksum.h index 900a1046d19..70836e7c6ba 100644 --- a/src/rgw/rgw_cksum.h +++ b/src/rgw/rgw_cksum.h @@ -17,12 +17,15 @@ #include #include #include +#include #include #include #include #include #include #include +#include +#include #include #include "fmt/format.h" #include "common/armor.h" @@ -48,8 +51,9 @@ namespace rgw { namespace cksum { crc64nvme, }; - static constexpr uint16_t FLAG_NONE = 0x0000; - static constexpr uint16_t FLAG_AWS_CKSUM = 0x0001; + static constexpr uint16_t FLAG_NONE = 0x0000; + static constexpr uint16_t FLAG_AWS_CKSUM = 0x0001; + static constexpr uint16_t FLAG_CRC = 0x0002; class Desc { @@ -84,30 +88,60 @@ namespace rgw { namespace cksum { static constexpr std::array checksums = { Desc(Type::none, "none", 0, FLAG_NONE), - Desc(Type::crc32, "crc32", 4, FLAG_AWS_CKSUM), - Desc(Type::crc32c, "crc32c", 4, FLAG_AWS_CKSUM), + Desc(Type::crc32, "crc32", 4, FLAG_AWS_CKSUM|FLAG_CRC), + Desc(Type::crc32c, "crc32c", 4, FLAG_AWS_CKSUM|FLAG_CRC), Desc(Type::xxh3, "xxh3", 8, FLAG_NONE), Desc(Type::sha1, "sha1", 20, FLAG_AWS_CKSUM), Desc(Type::sha256, "sha256", 32, FLAG_AWS_CKSUM), Desc(Type::sha512, "sha512", 64, FLAG_NONE), Desc(Type::blake3, "blake3", 32, FLAG_NONE), - Desc(Type::crc64nvme, "crc64nvme", 8, FLAG_AWS_CKSUM), + Desc(Type::crc64nvme, "crc64nvme", 8, FLAG_AWS_CKSUM|FLAG_CRC), }; static constexpr uint16_t max_digest_size = 64; using value_type = std::array; + static constexpr uint16_t FLAG_CKSUM_NONE = 0x0000; + static constexpr uint16_t FLAG_V2 = 0x0001; // struct_v >= 2 + static constexpr uint16_t FLAG_COMPOSITE = 0x0002; + static constexpr uint16_t FLAG_FULL_OBJECT = 0x0004; + static constexpr uint16_t FLAG_COMBINED = 0x0008; + + static constexpr uint16_t COMPOSITE_MASK = + (FLAG_COMBINED|FLAG_COMPOSITE); + static constexpr uint16_t FULL_OBJECT_MASK = + (FLAG_COMBINED|FLAG_FULL_OBJECT); + Type type; value_type digest; + uint16_t flags; + + enum class CtorStyle : uint8_t + { + raw = 0, + from_armored, + }; - Cksum(Type _type = Type::none) : type(_type) {} - Cksum(Type _type, const char* _armored_text) - : type(_type) { + Cksum(Type _type = Type::none) + : type(_type), flags(FLAG_V2) + {} + + Cksum(Type _type, const char* _data, CtorStyle style) + : type(_type), flags(FLAG_V2) + { const auto& ckd = checksums[uint16_t(type)]; + switch (style) { + case CtorStyle::from_armored: (void) ceph_unarmor((char*) digest.begin(), (char*) digest.begin() + ckd.digest_size, - _armored_text, - _armored_text + std::strlen(_armored_text)); + _data, + _data + std::strlen(_data)); + break; + case CtorStyle::raw: + default: + memcpy((char*) digest.data(), (char*) _data, ckd.digest_size); + break; + }; } const char* type_string() const { @@ -118,6 +152,28 @@ namespace rgw { namespace cksum { return (Cksum::checksums[uint16_t(type)]).aws(); } + const bool crc() const { + return (Cksum::checksums[uint16_t(type)]).flags & FLAG_CRC; + } + + const bool v2() const { + return flags & FLAG_V2; + } + + const bool composite() const { + /* treating COMPOSITE and FULL_OBJECT as flags has issues, + * as does not doing it; note that we cannot rely on crc() + * to mean !COMPOSITE/FULL_OBJECT, as CRC32 and CRC32C + * were deployed as digest checksums in 2023 (so we must be + * prepared to find them on disk) and per AWS can still be + * constructed as digests. + * + * invariant: FLAG_COMPOSITE is a property of /combined checksums/; + * it propogates through encode/decode, but we expect only + * logical combination/Combiner to set it */ + return ((flags & COMPOSITE_MASK) == COMPOSITE_MASK); + } + std::string aws_name() const { return fmt::format("x-amz-checksum-{}", type_string()); } @@ -164,27 +220,63 @@ namespace rgw { namespace cksum { } std::string to_string() const { - std::string hs; const auto& ckd = checksums[uint16_t(type)]; return fmt::format("{{{}}}{}", ckd.name, to_base64()); } + + using crc_type = std::variant; + + std::optional get_crc() const { + std::optional res; + const auto& ckd = checksums[uint16_t(type)]; + if (!crc()) { + goto out; + } + switch(ckd.digest_size) { + case 4: + { + uint32_t crc; + memcpy(&crc, (char*) digest.data(), sizeof(crc)); + res = crc; + } + break; + case 8: + { + uint64_t crc; + memcpy(&crc, (char*) digest.data(), sizeof(crc)); + res = crc; + } + break; + default: + break; + } + out: + return res; + } + void encode(buffer::list& bl) const { const auto& ckd = checksums[uint16_t(type)]; - ENCODE_START(1, 1, bl); + ENCODE_START(2, 1, bl); encode(uint16_t(type), bl); encode(ckd.digest_size, bl); bl.append((char*)digest.data(), ckd.digest_size); + encode(flags, bl); ENCODE_FINISH(bl); } void decode(bufferlist::const_iterator& p) { - DECODE_START(1, p); + DECODE_START(2, p); uint16_t tt; decode(tt, p); type = cksum::Type(tt); decode(tt, p); /* <= max_digest_size */ p.copy(tt, (char*)digest.data()); + if (struct_v < 2) { + flags = 0; + } else { + decode(flags, p); + } DECODE_FINISH(p); } }; /* Cksum */ @@ -192,8 +284,8 @@ namespace rgw { namespace cksum { static inline const std::optional no_cksum{std::nullopt}; + /* XXX would like std::string view */ static inline std::string to_string(const Type type) { - std::string hs; const auto& ckd = Cksum::checksums[uint16_t(type)]; return ckd.name; } @@ -207,6 +299,22 @@ namespace rgw { namespace cksum { return Type::none; } /* parse_cksum_type */ + static inline uint16_t cksum_flags_of(Type t) { + switch(t) { + case cksum::Type::none: + return Cksum::FLAG_CKSUM_NONE; + break; + case cksum::Type::crc64nvme: + case cksum::Type::crc32: + case cksum::Type::crc32c: + return Cksum::FLAG_FULL_OBJECT; + break; + default: + break; + }; + return Cksum::FLAG_COMPOSITE; + } /* cksum_flags_of */ + static inline Type parse_cksum_type_hdr(const std::string_view hdr_name) { auto pos = hdr_name.find("x-amz-checksum-", 0); if (pos == std::string::npos) { @@ -225,4 +333,69 @@ namespace rgw { namespace cksum { parse_cksum_type_hdr(hdr_name) != Type::none; } /* is_cksum_hdr */ + + using PermittedCksumResult + = std::tuple; + + static inline PermittedCksumResult + permitted_cksum_algo_and_type(Type type, uint16_t cksum_flags) { + if (type == Type::none) { + return PermittedCksumResult(true, "", ""); + } + if (cksum_flags & Cksum::FLAG_COMPOSITE) { + if (type == Type::crc64nvme) { + return PermittedCksumResult(false, to_string(type), "COMPOSITE"); + } + return PermittedCksumResult(true, to_string(type), "COMPOSITE"); + } + /* FULL_OBJECT */ + const auto& ckd = Cksum::checksums[uint16_t(type)]; + return + PermittedCksumResult( + (ckd.flags & cksum::FLAG_CRC), to_string(type), "FULL_OBJECT"); + } + + std::optional + combine_crc_cksum(const Cksum& ck1, const Cksum& ck2, uintmax_t len2); + + /* wrap combine/digest checksums */ + class Combiner + { + cksum::Type type; + public: + Combiner(cksum::Type t) + : type(t) {} + + cksum::Type get_type() const { return type; } + + virtual void append(const Cksum& cksum, uint64_t part_size) = 0; + virtual Cksum final() = 0; + virtual ~Combiner() {} + }; /* abstract Combiner */ + + /* choose type-correct Combiner */ + std::unique_ptr + CombinerFactory(cksum::Type t, uint16_t flags); + + using ChecksumTypeResult = std::tuple; + + static inline ChecksumTypeResult + get_checksum_type(const Cksum& cksum, bool is_multipart) { + /* non-multipart checksum */ + if (! is_multipart) { + return ChecksumTypeResult(Cksum::FLAG_CKSUM_NONE, "FULL_OBJECT"); + } + /* multipart cksum */ + if (cksum.v2()) [[likely]] { + if (! cksum.composite()) { + return ChecksumTypeResult(Cksum::FLAG_CKSUM_NONE, "FULL_OBJECT"); + } + /* composite */ + /* fall through */ + } + /* cksum predates the 2025 update that introduced CRC combining, + * so it's a "composite" checksum regardless of the algorithm */ + return ChecksumTypeResult(Cksum::FLAG_COMPOSITE, "COMPOSITE"); + } /* get_checksum_type */ + }} /* namespace */ diff --git a/src/rgw/rgw_cksum_pipe.cc b/src/rgw/rgw_cksum_pipe.cc index da60c2071f9..837dfac7f5a 100644 --- a/src/rgw/rgw_cksum_pipe.cc +++ b/src/rgw/rgw_cksum_pipe.cc @@ -14,6 +14,7 @@ */ #include "rgw_cksum_pipe.h" +#include #include #include #include @@ -27,16 +28,19 @@ namespace rgw::putobj { RGWPutObj_Cksum::RGWPutObj_Cksum(rgw::sal::DataProcessor* next, rgw::cksum::Type _typ, + uint16_t _flags, cksum_hdr_t&& _hdr) : Pipe(next), _type(_typ), + flags(_flags), dv(rgw::cksum::digest_factory(_type)), _digest(cksum::get_digest(dv)), cksum_hdr(_hdr) {} std::unique_ptr RGWPutObj_Cksum::Factory( rgw::sal::DataProcessor* next, const RGWEnv& env, - rgw::cksum::Type override_type) + rgw::cksum::Type override_type, + uint16_t cksum_flags) { /* look for matching headers */ auto algo_header = cksum_algorithm_hdr(env); @@ -47,7 +51,7 @@ namespace rgw::putobj { if (cksum_type != rgw::cksum::Type::none) { return std::make_unique( - next, cksum_type, std::move(algo_header)); + next, cksum_type, cksum_flags, std::move(algo_header)); } else { /* unknown checksum type requested */ throw rgw::io::Exception(EINVAL, std::system_category()); @@ -62,7 +66,7 @@ namespace rgw::putobj { auto algo_header = cksum_algorithm_hdr(override_type); return std::make_unique( - next, override_type, std::move(algo_header)); + next, override_type, cksum_flags, std::move(algo_header)); } /* no checksum requested */ return std::unique_ptr(); diff --git a/src/rgw/rgw_cksum_pipe.h b/src/rgw/rgw_cksum_pipe.h index c459d156335..da79d17cdac 100644 --- a/src/rgw/rgw_cksum_pipe.h +++ b/src/rgw/rgw_cksum_pipe.h @@ -81,6 +81,18 @@ namespace rgw::putobj { return cksum_hdr_t(nullptr, nullptr); } /* cksum_algorithm_hdr */ + static inline cksum::Type + multipart_cksum_algo(const RGWEnv& env) { + /* AWS has shifted to "strong" integrity checking by default, + * we may define policy in future */ + cksum::Type cksum_algo{cksum::Type::none}; + auto algo_hdr = rgw::putobj::cksum_algorithm_hdr(env); + if (algo_hdr.second) { + cksum_algo = rgw::cksum::parse_cksum_type(algo_hdr.second); + } + return cksum_algo; + } + using GetHeaderCksumResult = std::pair; static inline GetHeaderCksumResult get_hdr_cksum(const RGWEnv& env) { @@ -93,7 +105,8 @@ namespace rgw::putobj { auto hv = env.get(hk.c_str()); if (hv) { return - GetHeaderCksumResult(cksum::Cksum(cksum_type, hv), + GetHeaderCksumResult(cksum::Cksum(cksum_type, hv, + cksum::Cksum::CtorStyle::from_armored), std::string_view(hv, std::strlen(hv))); } } @@ -114,17 +127,37 @@ namespace rgw::putobj { auto hv = env.get(hk.c_str()); if (hv) { return - GetHeaderCksumResult(cksum::Cksum(cksum_type, hv), + GetHeaderCksumResult(cksum::Cksum(cksum_type, hv, + cksum::Cksum::CtorStyle::from_armored), std::string_view(hv, std::strlen(hv))); } } return GetHeaderCksumResult(cksum::Cksum(cksum_type), ""); } /* find_hdr_cksum */ + static inline uint16_t + parse_cksum_flags(cksum::Type t, boost::optional type_hdr) { + uint16_t cksum_flags{0}; + if (type_hdr) { + if (boost::algorithm::iequals(*type_hdr, "full_object")) { + cksum_flags |= cksum::Cksum::FLAG_FULL_OBJECT; + } + if (boost::algorithm::iequals(*type_hdr, "composite")) { + cksum_flags |= cksum::Cksum::FLAG_COMPOSITE; + } + } else { + /* if the client sent a checksum algorithm header but not a "type" header, + * we can select the matching type */ + cksum_flags |= cksum_flags_of(t); + } + return cksum_flags; + } /* parse_cksum_flags */ + // PutObj filter for streaming checksums class RGWPutObj_Cksum : public rgw::putobj::Pipe { cksum::Type _type; + uint16_t flags; cksum::DigestVariant dv; cksum::Digest* _digest; cksum::Cksum _cksum; @@ -136,10 +169,10 @@ namespace rgw::putobj { static std::unique_ptr Factory( rgw::sal::DataProcessor* next, const RGWEnv&, - rgw::cksum::Type override_type); + rgw::cksum::Type override_type, uint16_t cksum_flags); RGWPutObj_Cksum(rgw::sal::DataProcessor* next, rgw::cksum::Type _type, - cksum_hdr_t&& _hdr); + uint16_t _flags, cksum_hdr_t&& _hdr); RGWPutObj_Cksum(RGWPutObj_Cksum& rhs) = delete; ~RGWPutObj_Cksum() {} @@ -153,13 +186,17 @@ namespace rgw::putobj { const cksum::Cksum& finalize() { _cksum = finalize_digest(_digest, _type); + _cksum.flags = flags; // n.b., this may clear the COMPOSITE flag return _cksum; } const char* expected(const RGWEnv& env) { - auto hk = fmt::format("HTTP_X_AMZ_CHECKSUM_{}", cksum_hdr.second); - auto hv = env.get(hk.c_str()); - return hv; + if (cksum_hdr.second) { + auto hk = fmt::format("HTTP_X_AMZ_CHECKSUM_{}", cksum_hdr.second); + auto hv = env.get(hk.c_str()); + return hv; + } + return nullptr; } VerifyResult verify(const RGWEnv& env) { diff --git a/src/rgw/rgw_common.h b/src/rgw/rgw_common.h index bfb3690515e..7c03e49b52a 100644 --- a/src/rgw/rgw_common.h +++ b/src/rgw/rgw_common.h @@ -1539,9 +1539,10 @@ struct multipart_upload_info RGWObjectRetention obj_retention; RGWObjectLegalHold obj_legal_hold; rgw::cksum::Type cksum_type {rgw::cksum::Type::none}; + uint16_t cksum_flags{rgw::cksum::Cksum::FLAG_CKSUM_NONE}; void encode(bufferlist& bl) const { - ENCODE_START(3, 1, bl); + ENCODE_START(4, 1, bl); encode(dest_placement, bl); encode(obj_retention_exist, bl); encode(obj_legal_hold_exist, bl); @@ -1549,11 +1550,12 @@ struct multipart_upload_info encode(obj_legal_hold, bl); uint16_t ct{uint16_t(cksum_type)}; encode(ct, bl); + encode(cksum_flags, bl); ENCODE_FINISH(bl); } void decode(bufferlist::const_iterator& bl) { - DECODE_START_LEGACY_COMPAT_LEN(3, 1, 1, bl); + DECODE_START_LEGACY_COMPAT_LEN(4, 1, 1, bl); decode(dest_placement, bl); if (struct_v >= 2) { decode(obj_retention_exist, bl); @@ -1564,6 +1566,9 @@ struct multipart_upload_info uint16_t ct; decode(ct, bl); cksum_type = rgw::cksum::Type(ct); + if (struct_v >= 4) { + decode(cksum_flags, bl); + } } } else { obj_retention_exist = false; diff --git a/src/rgw/rgw_crc_digest.h b/src/rgw/rgw_crc_digest.h index 15e488152cd..5ca77f0792e 100644 --- a/src/rgw/rgw_crc_digest.h +++ b/src/rgw/rgw_crc_digest.h @@ -98,7 +98,7 @@ namespace rgw { namespace digest { public: static constexpr uint16_t digest_size = 8; - static constexpr uint64_t initial_value = 0x0; + static constexpr uint64_t initial_value = 0ULL; Crc64Nvme() { Restart(); } @@ -109,6 +109,10 @@ namespace rgw { namespace digest { } void Final(unsigned char* digest) { + /* due to AWS (and other) convention, the at-rest + * digest is byteswapped (on LE?); */ + /* XXX is this really endian specific? don't want + * break ARM */ if constexpr (std::endian::native != std::endian::big) { crc = rgw::digest::byteswap(crc); } diff --git a/src/rgw/rgw_op.cc b/src/rgw/rgw_op.cc index 750345c45bf..54ae09c4b38 100644 --- a/src/rgw/rgw_op.cc +++ b/src/rgw/rgw_op.cc @@ -15,6 +15,7 @@ #include #include +#include "common/dout.h" #include "include/scope_guard.h" #include "common/Clock.h" #include "common/armor.h" @@ -4359,6 +4360,7 @@ void RGWPutObj::execute(optional_yield y) } multipart_cksum_type = upload->cksum_type; + multipart_cksum_flags = upload->cksum_flags; /* upload will go out of scope, so copy the dest placement for later use */ s->dest_placement = *pdest_placement; @@ -4490,7 +4492,10 @@ void RGWPutObj::execute(optional_yield y) /* optional streaming checksum */ try { cksum_filter = - rgw::putobj::RGWPutObj_Cksum::Factory(filter, *s->info.env, multipart_cksum_type); + rgw::putobj::RGWPutObj_Cksum::Factory( + filter, *s->info.env, + multipart_cksum_type, + multipart_cksum_flags); } catch (const rgw::io::Exception& e) { op_ret = -e.code().value(); return; @@ -4642,6 +4647,7 @@ void RGWPutObj::execute(optional_yield y) if (cksum_filter) { const auto& hdr = cksum_filter->header(); + auto expected_ck = cksum_filter->expected(*s->info.env); auto cksum_verify = cksum_filter->verify(*s->info.env); // valid or no supplied cksum @@ -4653,7 +4659,7 @@ void RGWPutObj::execute(optional_yield y) ldpp_dout_fmt(this, 16, "{} checksum verified " "\n\tcomputed={} == \n\texpected={}", - hdr.second, + (hdr.second) ? hdr.second : "(no supplied checksum header)", cksum->to_armor(), (!!expected_ck) ? expected_ck : "(checksum unavailable)"); @@ -4670,7 +4676,7 @@ void RGWPutObj::execute(optional_yield y) computed_ck, (!!expected_ck) ? expected_ck : "(checksum unavailable)"); - op_ret = -ERR_INVALID_REQUEST; + op_ret = -ERR_BAD_DIGEST; return; } } @@ -4865,7 +4871,8 @@ void RGWPostObj::execute(optional_yield y) try { cksum_filter = rgw::putobj::RGWPutObj_Cksum::Factory( - filter, *s->info.env, rgw::cksum::Type::none /* no override */); + filter, *s->info.env, rgw::cksum::Type::none /* no override */, + rgw::cksum::Cksum::FLAG_CKSUM_NONE); } catch (const rgw::io::Exception& e) { op_ret = -e.code().value(); return; @@ -4979,7 +4986,7 @@ void RGWPostObj::execute(optional_yield y) cksum->to_armor(), cksum_filter->expected(*s->info.env)); - op_ret = -ERR_INVALID_REQUEST; + op_ret = -ERR_BAD_DIGEST; return; } } @@ -6672,18 +6679,24 @@ void RGWInitMultipart::execute(optional_yield y) std::unique_ptr upload; upload = s->bucket->get_multipart_upload(s->object->get_name(), upload_id); + + /* apparently, we are assured of upload */ upload->obj_legal_hold = obj_legal_hold; upload->obj_retention = obj_retention; upload->cksum_type = cksum_algo; - op_ret = upload->init(this, s->yield, s->owner, s->dest_placement, attrs); + /* cksum_flags have been validated against algorithm, so, e.g., if + * FLAG_COMPOSITE is set, the algorithm can be used with a composite/ + * digest checksum (e.g., it's not CRC64NVME) */ + upload->cksum_flags = cksum_flags; + op_ret = upload->init(this, s->yield, s->owner, s->dest_placement, attrs); if (op_ret == 0) { upload_id = upload->get_upload_id(); } s->trace->SetAttribute(tracing::rgw::UPLOAD_ID, upload_id); multipart_trace->UpdateName(tracing::rgw::MULTIPART + upload_id); -} +} /* RGWInitMultipart::execute() */ int RGWCompleteMultipart::verify_permission(optional_yield y) { @@ -6713,6 +6726,7 @@ try_sum_part_cksums(const DoutPrefixProvider *dpp, rgw::sal::MultipartUpload* upload, RGWMultiCompleteUpload* parts, std::optional& out_cksum, + std::optional& armored_cksum, optional_yield y) { /* 1. need checksum-algorithm header (if invalid, fail) @@ -6735,6 +6749,7 @@ try_sum_part_cksums(const DoutPrefixProvider *dpp, auto num_parts = int(parts->parts.size()); rgw::cksum::Type& cksum_type = upload->cksum_type; + uint16_t cksum_flags = upload->cksum_flags; int again_count{0}; again: @@ -6761,8 +6776,7 @@ try_sum_part_cksums(const DoutPrefixProvider *dpp, return 0; } - rgw::cksum::DigestVariant dv = rgw::cksum::digest_factory(cksum_type); - rgw::cksum::Digest* digest = rgw::cksum::get_digest(dv); + auto cksum_combiner = rgw::cksum::CombinerFactory(cksum_type, cksum_flags); /* returns the parts (currently?) in cache */ auto parts_ix{0}; @@ -6787,12 +6801,12 @@ try_sum_part_cksums(const DoutPrefixProvider *dpp, if ((part_cksum->type != cksum_type)) { /* if parts have inconsistent checksum, fail now */ - ldpp_dout_fmt(dpp, 14, - "ERROR: multipart part checksum type mismatch\n\tcomplete " - "multipart header={} part={}", - to_string(part_cksum->type), to_string(cksum_type)); + ldpp_dout_fmt(dpp, 14, + "ERROR: multipart part checksum type mismatch\n\tcomplete " + "multipart header={} part={}", + to_string(part_cksum->type), to_string(cksum_type)); - op_ret = -ERR_INVALID_REQUEST; + op_ret = -ERR_INVALID_REQUEST; return op_ret; } @@ -6801,18 +6815,27 @@ try_sum_part_cksums(const DoutPrefixProvider *dpp, * "-. See * https://docs.aws.amazon.com/AmazonS3/latest/userguide/checking-object-integrity.html#large-object-checksums */ - auto ckr = part_cksum->raw(); - digest->Update((unsigned char *)ckr.data(), ckr.length()); + cksum_combiner->append(*part_cksum, part.second->get_size()); + } /* all-parts */ /* we cannot verify this checksum, only compute it */ - out_cksum = rgw::cksum::finalize_digest(digest, cksum_type); + out_cksum = cksum_combiner->final(); + + armored_cksum = [&]() -> std::string { + std::string armor = out_cksum->to_armor(); + if (out_cksum->composite()) { + armor += fmt::format("-{}", num_parts); + } + return armor; + }(); ldpp_dout_fmt(dpp, 16, - "INFO: {} combined checksum {} {}-{}", + "INFO: {} combined checksum {} {}", __func__, out_cksum->type_string(), - out_cksum->to_armor(), num_parts); + out_cksum->to_armor(), + *armored_cksum); return op_ret; } /* try_sum_part_chksums */ @@ -6929,7 +6952,8 @@ void RGWCompleteMultipart::execute(optional_yield y) /* checksum computation */ if (upload->cksum_type != rgw::cksum::Type::none) { - op_ret = try_sum_part_cksums(this, s->cct, upload.get(), parts, cksum, y); + op_ret = try_sum_part_cksums(this, s->cct, upload.get(), parts, cksum, + armored_cksum, y); if (op_ret < 0) { ldpp_dout(this, 16) << "ERROR: try_sum_part_cksums failed, obj=" << meta_obj << " ret=" << op_ret << dendl; @@ -6949,9 +6973,6 @@ void RGWCompleteMultipart::execute(optional_yield y) auto& target_attrs = meta_obj->get_attrs(); if (cksum) { - armored_cksum = - fmt::format("{}-{}", cksum->to_armor(), parts->parts.size()); - /* validate computed checksum against supplied checksum, if present */ auto [hdr_cksum, supplied_cksum] = rgw::putobj::find_hdr_cksum(*(s->info.env)); @@ -6960,19 +6981,37 @@ void RGWCompleteMultipart::execute(optional_yield y) "INFO: client supplied checksum {}: {} ", hdr_cksum.header_name(), supplied_cksum); - if (! (supplied_cksum.empty()) && - (supplied_cksum != armored_cksum)) { - /* some minio SDK clients assert a checksum that is cryptographically - * valid but omits the part count */ - auto parts_suffix = fmt::format("-{}", parts->parts.size()); - auto suffix_len = armored_cksum->size() - parts_suffix.size(); - if (armored_cksum->compare(0, suffix_len, supplied_cksum) != 0) { - ldpp_dout_fmt(this, 4, - "{} content checksum mismatch" - "\n\tcalculated={} != \n\texpected={}", - hdr_cksum.header_name(), armored_cksum, supplied_cksum); - op_ret = -ERR_INVALID_REQUEST; - return; + /* in late 2024, we observed some minio SDK clients assert a checksum that + * was a cryptographically valid *digest*, but omitted the part count; + * + * in addition, from 2025, AWS specifies that multipart uploads using CRC + * checksums may (CRC32, CRC32c) or must (CRC64NVME) be computed as full + * object checksums [1], so perhaps should not suffix a part count. + * + * for the present, it appears safe to accept both forms regardless of the + * checksum algorithm. + * + * [1] https://docs.aws.amazon.com/AmazonS3/latest/userguide/checking-object-integrity.html + */ + + if (! supplied_cksum.empty()) { + const std::string_view acm = *armored_cksum; + const std::string_view scm = supplied_cksum; + if (scm != acm) { + auto c_len = acm.length(); + if (cksum->composite()) { + auto parts_suffix = fmt::format("-{}", parts->parts.size()); + c_len -= parts_suffix.length(); + } + auto c_res = scm.compare(0, c_len, acm, 0, c_len); + if (c_res != 0) { + ldpp_dout_fmt(this, 4, + "{} content checksum mismatch" + "\n\tcalculated={} != \n\texpected={}", + hdr_cksum.header_name(), armored_cksum, supplied_cksum); + op_ret = -ERR_BAD_DIGEST; + return; + } } } diff --git a/src/rgw/rgw_op.h b/src/rgw/rgw_op.h index cbe441c140b..d0c3505cb69 100644 --- a/src/rgw/rgw_op.h +++ b/src/rgw/rgw_op.h @@ -1240,6 +1240,7 @@ protected: std::string multipart_part_str; int multipart_part_num = 0; rgw::cksum::Type multipart_cksum_type{rgw::cksum::Type::none}; + uint16_t multipart_cksum_flags{rgw::cksum::Cksum::FLAG_CKSUM_NONE}; jspan_ptr multipart_trace; boost::optional delete_at; @@ -1914,6 +1915,7 @@ protected: std::optional obj_legal_hold = std::nullopt; rgw::sal::Attrs attrs; rgw::cksum::Type cksum_algo{rgw::cksum::Type::none}; + uint16_t cksum_flags{rgw::cksum::Cksum::FLAG_CKSUM_NONE}; public: RGWInitMultipart() {} diff --git a/src/rgw/rgw_rest_s3.cc b/src/rgw/rgw_rest_s3.cc index 14185c4de1d..e92bcb1009a 100644 --- a/src/rgw/rgw_rest_s3.cc +++ b/src/rgw/rgw_rest_s3.cc @@ -509,12 +509,29 @@ int RGWGetObj_ObjStore_S3::send_response_data(bufferlist& bl, off_t bl_ofs, try { rgw::cksum::Cksum cksum; decode(cksum, i->second); - if (multipart_parts_count && multipart_parts_count > 0) { + auto cksum_type = + rgw::cksum::get_checksum_type(cksum, + (multipart_parts_count && multipart_parts_count > 0) /* is_multipart */); + if (std::get<0>(cksum_type) == rgw::cksum::Cksum::FLAG_COMPOSITE) { + /* cksum was computed with a digest algorithm, or predates the 2025 + update that introduced CRC combining */ + ldpp_dout_fmt(this, 16, + "INFO: {} ChecksumMode==ENABLED element-name {} value {}-{}", + __func__, cksum.element_name(), cksum.to_armor(), + *multipart_parts_count); dump_header(s, cksum.header_name(), fmt::format("{}-{}", cksum.to_armor(), *multipart_parts_count)); } else { - dump_header(s, cksum.header_name(), cksum.to_armor()); + /* a full object checksum, if multipart, because the checksum is CRC family */ + auto elt_name = cksum.element_name(); + auto armored_cksum = cksum.to_armor(); + + ldpp_dout_fmt(this, 16, + "INFO: {} ChecksumMode==ENABLED element-name {} value {}", + __func__, cksum.element_name(), cksum.to_armor()); + dump_header(s, cksum.header_name(), armored_cksum); } + dump_header(s, "x-amz-checksum-type", std::get<1>(cksum_type)); } catch (buffer::error& err) { ldpp_dout(this, 0) << "ERROR: failed to decode rgw::cksum::Cksum" << dendl; @@ -2877,6 +2894,7 @@ void RGWPutObj_ObjStore_S3::send_response() dump_header_if_nonempty(s, "x-amz-version-id", version_id); dump_header_if_nonempty(s, "x-amz-expiration", expires); if (cksum && cksum->aws()) { + dump_header(s, "x-amz-checksum-type", "FULL_OBJECT"); dump_header(s, cksum->header_name(), cksum->to_armor()); } for (auto &it : crypt_http_responses) @@ -2885,7 +2903,8 @@ void RGWPutObj_ObjStore_S3::send_response() dump_errno(s); dump_header_if_nonempty(s, "x-amz-version-id", version_id); dump_header_if_nonempty(s, "x-amz-expiration", expires); - if (cksum) { + if (cksum && cksum->aws()) { + dump_header(s, "x-amz-checksum-type", "FULL_OBJECT"); dump_header(s, cksum->header_name(), cksum->to_armor()); } end_header(s, this, to_mime_type(s->format)); @@ -4006,12 +4025,20 @@ void RGWGetObjAttrs_ObjStore_S3::send_response() rgw::cksum::Cksum cksum; auto bliter = iter->second.cbegin(); cksum.decode(bliter); - if (multipart_parts_count && multipart_parts_count > 0) { + auto cksum_type = + rgw::cksum::get_checksum_type(cksum, + (multipart_parts_count && multipart_parts_count > 0) /* is_multipart */); + if (std::get<0>(cksum_type) == rgw::cksum::Cksum::FLAG_COMPOSITE) { + /* cksum was computed with a digest algorithm, or predates the 2025 + update that introduced CRC combining */ s->formatter->dump_string(cksum.element_name(), - fmt::format("{}-{}", cksum.to_armor(), *multipart_parts_count)); + fmt::format("{}-{}", cksum.to_armor(), + *multipart_parts_count)); } else { + /* a full object checksum, if multipart, because the checksum is CRC family */ s->formatter->dump_string(cksum.element_name(), cksum.to_armor()); } + s->formatter->dump_string("ChecksumType", std::get<1>(cksum_type)); } catch (buffer::error& err) { ldpp_dout(this, 0) << "ERROR: could not decode stored cksum, caught buffer::error" << dendl; @@ -4053,6 +4080,7 @@ void RGWGetObjAttrs_ObjStore_S3::send_response() s->formatter->dump_int("PartNumber", part.part_number); s->formatter->dump_unsigned("Size", part.part_size); if (part.cksum.type != rgw::cksum::Type::none) { + /* parts always have a non-digest checksum */ s->formatter->dump_string(part.cksum.element_name(), part.cksum.to_armor()); } s->formatter->close_section(); /* Part */ @@ -4467,13 +4495,24 @@ int RGWInitMultipart_ObjStore_S3::get_params(optional_yield y) return -ERR_INVALID_REQUEST; } - auto algo_hdr = rgw::putobj::cksum_algorithm_hdr(*(s->info.env)); - if (algo_hdr.second) { - cksum_algo = rgw::cksum::parse_cksum_type(algo_hdr.second); + /* checksums */ + auto checksum_type_hdr = + s->info.env->get_optional("HTTP_X_AMZ_CHECKSUM_TYPE"); + + /* composite or "full object" */ + cksum_algo = putobj::multipart_cksum_algo(*(s->info.env)); + cksum_flags = putobj::parse_cksum_flags(cksum_algo, checksum_type_hdr); + + auto aok = cksum::permitted_cksum_algo_and_type(cksum_algo, cksum_flags); + if (! std::get<0>(aok)) { + ldpp_dout_fmt(this, 5, + "ERROR: {} checksum type {} not compatible with checksum algorithm {}", + __func__, std::get<1>(aok), std::get<2>(aok)); + return -ERR_INVALID_REQUEST; } return 0; -} +} /* RGWInitMultipart_ObjStore_S3::get_params() */ void RGWInitMultipart_ObjStore_S3::send_response() { @@ -4555,8 +4594,11 @@ void RGWCompleteMultipart_ObjStore_S3::send_response() s->formatter->dump_string("Bucket", s->bucket_name); s->formatter->dump_string("Key", s->object->get_name()); s->formatter->dump_string("ETag", etag); - if (armored_cksum) { + if (armored_cksum) [[likely]] { + auto cksum_type + = rgw::cksum::get_checksum_type(*cksum, true /* is_multipart */); s->formatter->dump_string(cksum->element_name(), *armored_cksum); + s->formatter->dump_string("ChecksumType", std::get<1>(cksum_type)); } s->formatter->close_section(); rgw_flush_formatter_and_reset(s, s->formatter); diff --git a/src/rgw/rgw_sal.h b/src/rgw/rgw_sal.h index 45749c228f7..18ab2473288 100644 --- a/src/rgw/rgw_sal.h +++ b/src/rgw/rgw_sal.h @@ -1458,8 +1458,12 @@ public: //object lock std::optional obj_retention = std::nullopt; std::optional obj_legal_hold = std::nullopt; + rgw::cksum::Type cksum_type = rgw::cksum::Type::none; + // only a few (currently CRC) checksums are not composite + uint16_t cksum_flags{rgw::cksum::Cksum::FLAG_COMPOSITE}; + MultipartUpload() = default; virtual ~MultipartUpload() = default; diff --git a/src/rgw/spdk/crc64.c b/src/rgw/spdk/crc64.c index 305d8392508..72691f89298 100644 --- a/src/rgw/spdk/crc64.c +++ b/src/rgw/spdk/crc64.c @@ -166,4 +166,5 @@ spdk_crc64_nvme(const void *buf, size_t len, uint64_t crc) { return crc64_rocksoft_refl_base(crc, (const uint8_t *)buf, len); } + #endif diff --git a/src/test/rgw/test_rgw_cksum.cc b/src/test/rgw/test_rgw_cksum.cc index 4167d8779e9..53ae9818e00 100644 --- a/src/test/rgw/test_rgw_cksum.cc +++ b/src/test/rgw/test_rgw_cksum.cc @@ -13,10 +13,12 @@ * */ +#include #include #include #include #include +#include #include "gtest/gtest.h" @@ -28,6 +30,13 @@ #include #include "rgw/rgw_hex.h" +extern "C" { +#include "madler/crc64nvme.h" +#include "madler/crc32iso_hdlc.h" +#include "madler/crc32iscsi.h" +#include "spdk/crc64.h" +} // extern "C" + #define dout_subsys ceph_subsys_rgw namespace { @@ -52,6 +61,9 @@ namespace { std::string dolor = R"(Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.)"; +std::string lacrimae = dolor + dolor; +std::string dolorem = dolor + lorem; + TEST(RGWCksum, Output) { if (gen_test_data) { @@ -67,6 +79,16 @@ TEST(RGWCksum, Output) of.open("/tmp/dolor", o_mode); of << dolor; of.close(); + + std::cout << "writing lacrimae text to /tmp/lacrimae " << std::endl; + of.open("/tmp/lacrimae", o_mode); + of << lacrimae; + of.close(); + + std::cout << "writing dolorem text to /tmp/dolorem " << std::endl; + of.open("/tmp/dolorem", o_mode); + of << dolorem; + of.close(); } } @@ -303,8 +325,6 @@ TEST(RGWCksum, DigestSTR) TEST(RGWCksum, DigestBL) { - std::string lacrimae = dolor + dolor; - ceph::buffer::list dolor_bl; for ([[maybe_unused]] const auto& ix : {1, 2}) { dolor_bl.push_back( @@ -397,21 +417,319 @@ TEST(RGWCksum, CRC64NVME1) ASSERT_TRUE(crc == 0x9A2DF64B8E9E517E); } +TEST(RGWCksum, CRC64NVME_UNDIGEST) +{ + auto t = cksum::Type::crc64nvme; + + /* digest 1 */ + DigestVariant dv1 = rgw::cksum::digest_factory(t); + Digest *digest1 = get_digest(dv1); + ASSERT_NE(digest1, nullptr); + + digest1->Update((const unsigned char *)lacrimae.c_str(), lacrimae.length()); + + auto cksum1 = rgw::cksum::finalize_digest(digest1, t); + + uint64_t crc1 = rgw::digest::byteswap(std::get(*cksum1.get_crc())); + + uint64_t crc2 = spdk_crc64_nvme((const unsigned char *)lacrimae.c_str(), + lacrimae.length(), 0ULL); + ASSERT_EQ(crc1, crc2); +} + TEST(RGWCksum, CRC64NVME2) { auto t = cksum::Type::crc64nvme; - DigestVariant dv = rgw::cksum::digest_factory(t); - Digest *digest = get_digest(dv); - ASSERT_NE(digest, nullptr); - digest->Update((const unsigned char *)dolor.c_str(), dolor.length()); + /* digest 1 */ + DigestVariant dv1 = rgw::cksum::digest_factory(t); + Digest *digest1 = get_digest(dv1); + ASSERT_NE(digest1, nullptr); - auto cksum = rgw::cksum::finalize_digest(digest, t); + digest1->Update((const unsigned char *)dolor.c_str(), dolor.length()); + + auto cksum1 = rgw::cksum::finalize_digest(digest1, t); + + /* the armored value produced by awscliv2 2.24.5 */ + ASSERT_EQ(cksum1.to_armor(), "wiBA+PSv41M="); + + /* digest 2 */ + DigestVariant dv2 = rgw::cksum::digest_factory(t); + Digest* digest2 = get_digest(dv2); + ASSERT_NE(digest2, nullptr); + + digest2->Update((const unsigned char *)lacrimae.c_str(), lacrimae.length()); + + auto cksum2 = rgw::cksum::finalize_digest(digest2, t); /* the armored value produced by awscliv2 2.24.5 */ - ASSERT_EQ(cksum.to_armor(), "wiBA+PSv41M="); + ASSERT_EQ(cksum2.to_armor(), "oa2U66pdPLk="); +} + +TEST(RGWCksum, CRC64NVME_COMBINE1) +{ + /* do crc64nvme and combining by hand */ + + uint64_t crc1 = spdk_crc64_nvme((const unsigned char *)dolor.c_str(), + dolor.length(), 0ULL); + + uint64_t crc2 = spdk_crc64_nvme((const unsigned char *)lacrimae.c_str(), + lacrimae.length(), 0ULL); + + uint64_t crc4 = crc64nvme_comb(crc1, crc1, dolor.length()); + + ASSERT_EQ(crc2, crc4); +} + +TEST(RGWCksum, CRC64NVME_COMBINE2) +{ + /* do crc64nvme and combining by hand, non-uniform strings */ + + uint64_t crc1 = spdk_crc64_nvme((const unsigned char *)dolor.c_str(), + dolor.length(), 0ULL); + + uint64_t crc2 = spdk_crc64_nvme((const unsigned char *)lorem.c_str(), + lorem.length(), 0ULL); + + uint64_t crc3 = spdk_crc64_nvme((const unsigned char *)dolorem.c_str(), + dolorem.length(), 0ULL); + + uint64_t crc4 = crc64nvme_comb(crc1, crc2, lorem.length()); + + if (verbose) { + std::cout << "\ncrc1/dolor: " << crc1 + << "\ncrc2/lorem: " << crc2 + << "\ncrc3/dolorem: " << crc3 + << "\ncrc4/crc1+crc2: " << crc4 + << std::endl; + } + + ASSERT_EQ(crc3, crc4); } +TEST(RGWCksum, CRC64NVME_COMBINE3) +{ + auto t = cksum::Type::crc64nvme; + + DigestVariant dv1 = rgw::cksum::digest_factory(t); + Digest* digest1 = get_digest(dv1); + ASSERT_NE(digest1, nullptr); + + DigestVariant dv2 = rgw::cksum::digest_factory(t); + Digest* digest2 = get_digest(dv2); + ASSERT_NE(digest2, nullptr); + + DigestVariant dv3 = rgw::cksum::digest_factory(t); + Digest* digest3 = get_digest(dv3); + ASSERT_NE(digest3, nullptr); + + /* dolor */ + digest1->Update((const unsigned char *)dolor.c_str(), dolor.length()); + auto cksum1 = rgw::cksum::finalize_digest(digest1, t); + + uint64_t spdk_crc1 = spdk_crc64_nvme((const unsigned char *)dolor.c_str(), + dolor.length(), 0ULL); + auto cksum_crc1 = + rgw::digest::byteswap(std::get(*cksum1.get_crc())); + + ASSERT_EQ(cksum_crc1, spdk_crc1); + + /* lorem */ + digest2->Update((const unsigned char *)lorem.c_str(), lorem.length()); + auto cksum2 = rgw::cksum::finalize_digest(digest2, t); + + uint64_t spdk_crc2 = spdk_crc64_nvme((const unsigned char *)lorem.c_str(), + lorem.length(), 0ULL); + auto cksum_crc2 = + rgw::digest::byteswap(std::get(*cksum2.get_crc())); + + ASSERT_EQ(cksum_crc2, spdk_crc2); + + /* dolorem */ + digest3->Update((const unsigned char *)dolorem.c_str(), dolorem.length()); + auto cksum3 = rgw::cksum::finalize_digest(digest3, t); + + uint64_t spdk_crc3 = spdk_crc64_nvme((const unsigned char *)dolorem.c_str(), + dolorem.length(), 0ULL); + auto cksum_crc3 = + rgw::digest::byteswap(std::get(*cksum3.get_crc())); + + ASSERT_EQ(cksum_crc3, spdk_crc3); + + /* API combine check */ + auto cksum4 = rgw::cksum::combine_crc_cksum(cksum1, cksum2, lorem.length()); + ASSERT_TRUE(cksum4); + + auto cksum_crc4 = + rgw::digest::byteswap(std::get(*cksum4->get_crc())); + + auto armor3 = cksum3.to_armor(); + auto armor4 = cksum4->to_armor(); + + if (verbose) { + std::cout << "\ncrc1/dolor spdk: " << spdk_crc1 + << " cksum_crc1: " << cksum_crc1 + << "\ncrc2/lorem spdk: " << spdk_crc2 + << " cksum_crc2: " << cksum_crc2 + << "\ncrc3/dolorem spdk: " << spdk_crc3 + << " cksum_crc3: " << cksum_crc3 + << " cksum_crc3 armored: " << armor3 + << "\ncrc4/crc1+crc2: " << cksum_crc4 + << " crc4 armored: " << armor4 + << std::endl; + } + + /* the CRC of dolor+lorem == gf combination of cksum1 and cksum2 */ + ASSERT_EQ(cksum3.to_armor(), cksum4->to_armor()); +} /* crc64nvme */ + +TEST(RGWCksum, CRC32_COMBINE3) +{ + auto t = cksum::Type::crc32; + + DigestVariant dv1 = rgw::cksum::digest_factory(t); + Digest* digest1 = get_digest(dv1); + ASSERT_NE(digest1, nullptr); + + DigestVariant dv2 = rgw::cksum::digest_factory(t); + Digest* digest2 = get_digest(dv2); + ASSERT_NE(digest2, nullptr); + + DigestVariant dv3 = rgw::cksum::digest_factory(t); + Digest* digest3 = get_digest(dv3); + ASSERT_NE(digest3, nullptr); + + /* dolor */ + digest1->Update((const unsigned char *)dolor.c_str(), dolor.length()); + auto cksum1 = rgw::cksum::finalize_digest(digest1, t); + + uint32_t madler_crc1 = + crc32iso_hdlc_word(0U, (const unsigned char *)dolor.c_str(), + dolor.length()); + + auto cksum_crc1 = + rgw::digest::byteswap(std::get(*cksum1.get_crc())); + + ASSERT_EQ(cksum_crc1, madler_crc1); + + /* lorem */ + digest2->Update((const unsigned char *)lorem.c_str(), lorem.length()); + auto cksum2 = rgw::cksum::finalize_digest(digest2, t); + + uint32_t madler_crc2 = + crc32iso_hdlc_word(0U, (const unsigned char *)lorem.c_str(), + lorem.length()); + auto cksum_crc2 = + rgw::digest::byteswap(std::get(*cksum2.get_crc())); + + ASSERT_EQ(cksum_crc2, madler_crc2); + + /* dolorem */ + digest3->Update((const unsigned char *)dolorem.c_str(), dolorem.length()); + auto cksum3 = rgw::cksum::finalize_digest(digest3, t); + + uint32_t madler_crc3 = + crc32iso_hdlc_word(0U, (const unsigned char *)dolorem.c_str(), + dolorem.length()); + auto cksum_crc3 = + rgw::digest::byteswap(std::get(*cksum3.get_crc())); + + ASSERT_EQ(cksum_crc3, madler_crc3); + + /* API combine check */ + auto cksum4 = rgw::cksum::combine_crc_cksum(cksum1, cksum2, lorem.length()); + ASSERT_TRUE(cksum4); + + auto cksum_crc4 = + rgw::digest::byteswap(std::get(*cksum4->get_crc())); + + if (verbose) { + std::cout << "\ncrc1/dolor spdk: " << madler_crc1 + << " cksum_crc1: " << cksum_crc1 + << "\ncrc2/lorem spdk: " << madler_crc2 + << " cksum_crc2: " << cksum_crc2 + << "\ncrc3/dolorem spdk: " << madler_crc3 + << " cksum_crc3: " << cksum_crc3 + << "\ncrc4/crc1+crc2: " << cksum_crc4 + << std::endl; + } + + /* the CRC of dolor+lorem == gf combination of cksum1 and cksum2 */ + ASSERT_EQ(cksum3.to_armor(), cksum4->to_armor()); +} /* crc32 */ + +TEST(RGWCksum, CRC32C_COMBINE3) +{ + auto t = cksum::Type::crc32c; + + DigestVariant dv1 = rgw::cksum::digest_factory(t); + Digest* digest1 = get_digest(dv1); + ASSERT_NE(digest1, nullptr); + + DigestVariant dv2 = rgw::cksum::digest_factory(t); + Digest* digest2 = get_digest(dv2); + ASSERT_NE(digest2, nullptr); + + DigestVariant dv3 = rgw::cksum::digest_factory(t); + Digest* digest3 = get_digest(dv3); + ASSERT_NE(digest3, nullptr); + + /* dolor */ + digest1->Update((const unsigned char *)dolor.c_str(), dolor.length()); + auto cksum1 = rgw::cksum::finalize_digest(digest1, t); + + uint32_t madler_crc1 = crc32iscsi_word(0U, (const unsigned char *)dolor.c_str(), + dolor.length()); + + auto cksum_crc1 = + rgw::digest::byteswap(std::get(*cksum1.get_crc())); + + ASSERT_EQ(cksum_crc1, madler_crc1); + + /* lorem */ + digest2->Update((const unsigned char *)lorem.c_str(), lorem.length()); + auto cksum2 = rgw::cksum::finalize_digest(digest2, t); + + uint32_t madler_crc2 = crc32iscsi_word(0U, (const unsigned char *)lorem.c_str(), + lorem.length()); + auto cksum_crc2 = + rgw::digest::byteswap(std::get(*cksum2.get_crc())); + + ASSERT_EQ(cksum_crc2, madler_crc2); + + /* dolorem */ + digest3->Update((const unsigned char *)dolorem.c_str(), dolorem.length()); + auto cksum3 = rgw::cksum::finalize_digest(digest3, t); + + uint32_t madler_crc3 = crc32iscsi_word(0U, (const unsigned char *)dolorem.c_str(), + dolorem.length()); + auto cksum_crc3 = + rgw::digest::byteswap(std::get(*cksum3.get_crc())); + + ASSERT_EQ(cksum_crc3, madler_crc3); + + /* API combine check */ + auto cksum4 = rgw::cksum::combine_crc_cksum(cksum1, cksum2, lorem.length()); + ASSERT_TRUE(cksum4); + + auto cksum_crc4 = + rgw::digest::byteswap(std::get(*cksum4->get_crc())); + + if (verbose) { + std::cout << "\ncrc1/dolor spdk: " << madler_crc1 + << " cksum_crc1: " << cksum_crc1 + << "\ncrc2/lorem spdk: " << madler_crc2 + << " cksum_crc2: " << cksum_crc2 + << "\ncrc3/dolorem spdk: " << madler_crc3 + << " cksum_crc3: " << cksum_crc3 + << "\ncrc4/crc1+crc2: " << cksum_crc4 + << std::endl; + } + + /* the CRC of dolor+lorem == gf combination of cksum1 and cksum2 */ + ASSERT_EQ(cksum3.to_armor(), cksum4->to_armor()); +} /* crc32c */ + TEST(RGWCksum, CtorUnarmor) { auto t = cksum::Type::sha256; @@ -425,13 +743,233 @@ TEST(RGWCksum, CtorUnarmor) auto cksum1 = rgw::cksum::finalize_digest(digest, t); auto armored_text1 = cksum1.to_armor(); - auto cksum2 = rgw::cksum::Cksum(cksum1.type, armored_text1.c_str()); + auto cksum2 = rgw::cksum::Cksum(cksum1.type, armored_text1.c_str(), + rgw::cksum::Cksum::CtorStyle::from_armored); ASSERT_EQ(armored_text1, cksum2.to_armor()); } } /* namespace */ +using cksum_3tuple + = std::tuple; + +cksum_3tuple +mpu_checksum_helper(cksum::Type t, uint16_t flags) +{ + DigestVariant dv1 = rgw::cksum::digest_factory(t); + Digest* digest1 = get_digest(dv1); + + DigestVariant dv2 = rgw::cksum::digest_factory(t); + Digest* digest2 = get_digest(dv2); + + DigestVariant dv3 = rgw::cksum::digest_factory(t); + Digest* digest3 = get_digest(dv3); + + /* dolor */ + digest1->Update((const unsigned char *)dolor.c_str(), dolor.length()); + auto cksum1 = rgw::cksum::finalize_digest(digest1, t); + + /* lorem */ + digest2->Update((const unsigned char *)lorem.c_str(), lorem.length()); + auto cksum2 = rgw::cksum::finalize_digest(digest2, t); + + /* dolorem */ + digest3->Update((const unsigned char *)dolorem.c_str(), dolorem.length()); + auto cksum3 = rgw::cksum::finalize_digest(digest3, t); + + return cksum_3tuple(cksum1, cksum2, cksum3); +} + +TEST(RGWCksum, Combiner1) +{ + using std::get; + + auto t = cksum::Type::crc64nvme; + uint16_t flags = rgw::cksum::Cksum::FLAG_CKSUM_NONE; + + auto cksums = mpu_checksum_helper(t, flags); + auto& [cksum1, cksum2, cksum3] = cksums; + + auto armor1 = cksum1.to_armor(); + auto armor2 = cksum2.to_armor(); + auto armor3 = cksum3.to_armor(); + + if (verbose) { + std::cout << "\ncksums: cksum type: " << to_string(t) + << "\narmor1: " << armor1 + << "\narmor2: " << armor2 + << "\narmor3: " << armor3 + << std::endl; + } + + auto cmbnr = rgw::cksum::CombinerFactory(t, flags); + + ASSERT_TRUE(cmbnr); + ASSERT_EQ(t, cmbnr->get_type()); + + cmbnr->append(get<0>(cksums), dolor.length()); + cmbnr->append(get<1>(cksums), lorem.length()); + + /* depending on the checksum type and flags, cksum4 is + * either equivalent to cksum3, or, a composite digest + * of cksum1 and cksum2 */ + auto cksum4 = cmbnr->final(); + + /* if cksums(0) is !composite, then cksums(2) is + * == cksums(0) + cksums(1) and also == cksum4 */ + bool crc = get<0>(cksums).crc(); + if (crc) { + ASSERT_EQ(get<2>(cksums).to_armor(), cksum4.to_armor()); + } else { + /* all we can do is assert match against an external + * reference */ + } + if (verbose) { + /* pretty-print armored cksum */ + std::string cksum_flags = + (! get<0>(cksums).crc()) ? "COMPOSITE" : "FULL_OBJECT"; + std::cout << "\ncomposite cksum (delorem) " + << "\n\tcksum-type " << to_string(t) + << "\n\tflags " + << cksum_flags + << "\n\tarmored cksum " << cksum4.to_armor() + << std::endl; + } +} /* Combiner1 */ + +using cksum_4tuple + = std::tuple; + +class CksumCombinerFixture : public testing::Test +{ +public: + + static std::string long_a; // 5M string "AAAA...." + static std::string long_b; + static std::string long_c; + + static std::vector cksum_types; + + static void SetUpTestSuite() { + + using std::get; + using ST = std::tuple; + + + /* generate unique strings that match ones we use in + * a checksum test matrix in s3-tests */ + std::string a{"A"}; + std::string b{"B"}; + std::string c{"C"}; + + for (const auto& elt : {ST(a, long_a), + ST(b, long_b), + ST(c, long_c)}) { + + for (int ix = 0; ix < (5 * 1024 * 1024); ++ix) { + get<1>(elt) += get<0>(elt); + } + } + + /* generate check types */ + for (uint16_t ix = 1; ix <= uint16_t(cksum::Type::crc64nvme); ++ix) { + cksum_types.push_back(cksum::Type(ix)); + } + } /* SetUpTestSuite */ + + static cksum_4tuple mpu_checksums(cksum::Type t) { + + DigestVariant dva = rgw::cksum::digest_factory(t); + Digest *digesta = get_digest(dva); + digesta->Update((const unsigned char *)long_a.c_str(), long_a.length()); + auto cksum1 = rgw::cksum::finalize_digest(digesta, t); + + DigestVariant dvb = rgw::cksum::digest_factory(t); + Digest *digestb = get_digest(dvb); + digestb->Update((const unsigned char *)long_b.c_str(), long_b.length()); + auto cksum2 = rgw::cksum::finalize_digest(digestb, t); + + DigestVariant dvc = rgw::cksum::digest_factory(t); + Digest *digestc = get_digest(dvc); + digestc->Update((const unsigned char *)long_c.c_str(), long_c.length()); + auto cksum3 = rgw::cksum::finalize_digest(digestc, t); + + std::string long_d = long_a + long_b + long_c; + DigestVariant dvd = rgw::cksum::digest_factory(t); + Digest *digestd = get_digest(dvd); + digestd->Update((const unsigned char *)long_d.c_str(), long_d.length()); + auto cksum4 = rgw::cksum::finalize_digest(digestd, t); + + return cksum_4tuple(cksum1, cksum2, cksum3, cksum4); + } + + static void TearDownTestSuite() { + } +}; /* CksumCombinerfixture */ + +std::string CksumCombinerFixture::long_a; +std::string CksumCombinerFixture::long_b; +std::string CksumCombinerFixture::long_c; +std::vector CksumCombinerFixture::cksum_types; + +/* TODO: multipart test matrix using fixture */ +TEST_F(CksumCombinerFixture, Test1) { + + for (const auto t : CksumCombinerFixture::cksum_types) { + using std::get; + + auto cksums = CksumCombinerFixture::mpu_checksums(t); + + auto cksum1 = get<0>(cksums); + auto flags = cksum1.flags; + auto cmbnr = rgw::cksum::CombinerFactory(t, flags); + + ASSERT_TRUE(cmbnr); + ASSERT_EQ(t, cmbnr->get_type()); + + auto lena = CksumCombinerFixture::long_a.length(); + auto lenb = CksumCombinerFixture::long_b.length(); + auto lenc = CksumCombinerFixture::long_c.length(); + + ASSERT_EQ(lena, lenb); + ASSERT_EQ(lenb, lenc); + ASSERT_EQ(lenc, (5*1024*1024)); + + cmbnr->append(get<0>(cksums), lena); + cmbnr->append(get<1>(cksums), lena); + cmbnr->append(get<2>(cksums), lena); + + /* depending on the checksum type and flags, cksum4 is + * either equivalent to cksum3, or, a composite digest + * of cksum1 and cksum2 */ + auto cksum4 = cmbnr->final(); + + /* if cksums(0) is !composite, then cksums(3) is + * == SUM(cksums(0)..cksums(2)) and also == cksum4 */ + if (get<0>(cksums).crc()) { + ASSERT_EQ(get<3>(cksums).to_armor(), cksum4.to_armor()); + } else { + /* all we can do is assert match against an external + * reference */ + } + /* pretty-print armored cksum */ + std::string cksum_flags = + (! get<0>(cksums).crc()) ? "COMPOSITE" : "FULL_OBJECT"; + std::cout << "\ncomposite cksum (long_a+long_b+long_c) " + << "\n\tcksum-type " << to_string(t) + << "\n\tarmored cksum1 " << get<0>(cksums).to_armor() + << "\n\tarmored cksum2 " << get<1>(cksums).to_armor() + << "\n\tarmored cksum3 " << get<2>(cksums).to_armor() + << "\n\tflags " + << cksum_flags + << "\n\tarmored cksum4 (composite) " << cksum4.to_armor() + << std::endl; + } +} /* CksumCombinerFixture, Test1 */ + + int main(int argc, char *argv[]) { auto args = argv_to_vec(argc, argv);