]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
rgw:cksum: implement crc64nvme and combined 32- and 64-bit CRCs
authorMatt Benjamin <mbenjamin@redhat.com>
Fri, 21 Feb 2025 19:43:53 +0000 (14:43 -0500)
committerMatt Benjamin <mbenjamin@redhat.com>
Mon, 31 Mar 2025 14:59:17 +0000 (10:59 -0400)
* internally compensate for at-rest byteswapped crc64 representation (e.g., before combine step)
* generalize Cksum crc api for 32bit and 64bit crcs, other cleanup
* prototype abstract cksum::Combiner workflow
* add support for forward and backward handling of composite vs full object checksums
  by marking the composites and the update flag day
* clean up checksum formatting and checksum type reporting
* add unit tests for Combiner interface
* validate and track requested checksum type (i.e., composite or full), plus
  unit test fixture for combinations of full matrix of cksum types
* doh.  GET/HEAD checksums are in headers
* add crcany license to COPYING
* return ChecksumType as header in GET/HEAD

    Found by Casey in review

* avoid fmt of char* null when no checksum header supplied

    A cksum_hdr_t(nullptr, nullptr) results in this case, and is intended,
    but its components obviously can't be presented to std::format unless
    cast to a safe pointer type.

* fail checksum mismatch with BadDigest

    When uploading an S3 object with an invalid checksum, the return code
    should be BadDigest to mirror more closely the AWS S3 implementation.
    See: https://docs.aws.amazon.com/AmazonS3/latest/userguide/checking-object-integrity.html

Fixes: https://tracker.ceph.com/issues/70614
    Reported by Alex Wojno <awojno@bloomberg.net>

* fix comparison and display of composite checksums

A string comparision bounds error and a bitmask comparison
    error are fixed.

* fix build on centos9

On gcc(?13?) we can't declare rgw::cksum::FLAG_NONE
    and also rgw::cksum::Cksum::FlAG_NONE.

    SAD!!

* include <variant> invariantly

* fix checksum type return from complete-multipart

    This one is in the XML response

* aieee, don't leak Combiners

   Use unique_ptr to polymorphic type...correctly.

Signed-off-by: Matt Benjamin <mbenjamin@redhat.com>
15 files changed:
COPYING
src/rgw/CMakeLists.txt
src/rgw/driver/rados/rgw_sal_rados.cc
src/rgw/rgw_cksum.cc [new file with mode: 0644]
src/rgw/rgw_cksum.h
src/rgw/rgw_cksum_pipe.cc
src/rgw/rgw_cksum_pipe.h
src/rgw/rgw_common.h
src/rgw/rgw_crc_digest.h
src/rgw/rgw_op.cc
src/rgw/rgw_op.h
src/rgw/rgw_rest_s3.cc
src/rgw/rgw_sal.h
src/rgw/spdk/crc64.c
src/test/rgw/test_rgw_cksum.cc

diff --git a/COPYING b/COPYING
index 8bc6b59b1c2f102397c649b5e4d235c26824cd86..3b758d5815511e9ee349098df8b53bed13cd50d5 100644 (file)
--- a/COPYING
+++ b/COPYING
@@ -224,3 +224,26 @@ License: GNU Affero General Public License, Version 3
 Files: src/common/*s390x*
 Copyright: 2024 IBM <contact@ibm.com>
 License: Apache License, version 2.0
+
+Files: src/rgw/madler/*
+License:
+  Copyright (C) 2014-2025 Mark Adler
+
+  This software is provided 'as-is', without any express or implied warranty.
+  In no event will the authors be held liable for any damages arising from the
+  use of this software.
+
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely, subject to the following restrictions:
+
+  1. The origin of this software must not be misrepresented; you must not claim
+     that you wrote the original software. If you use this software in a
+     product, an acknowledgment in the product documentation would be
+     appreciated but is not required.
+  2. Altered source versions must be plainly marked as such, and must not be
+     misrepresented as being the original software.
+  3. This notice may not be removed or altered from any source distribution.
+
+  Mark Adler
+  madler@alumni.caltech.edu
index a4f154b619046c90ef708f774178b8d284f779b8..0c95fd7fbb9136eb6730749e8a8f4c5708b0ee86 100644 (file)
@@ -57,6 +57,9 @@ set(librgw_common_srcs
   services/svc_zone.cc
   services/svc_zone_utils.cc
   spdk/crc64.c
+  madler/crc64nvme.c
+  madler/crc32iso_hdlc.c
+  madler/crc32iscsi.c
   rgw_account.cc
   rgw_acl.cc
   rgw_acl_s3.cc
@@ -71,6 +74,7 @@ set(librgw_common_srcs
   rgw_bucket.cc
   rgw_bucket_layout.cc
   rgw_cache.cc
+  rgw_cksum.cc
   rgw_cksum_pipe.cc
   rgw_common.cc
   rgw_compression.cc
index 0578d2e59581f31cb1ae212a6735270a8acd28a4..cd459360337a4fe2a816d38e094384f87b9595df 100644 (file)
@@ -3841,6 +3841,7 @@ int RadosMultipartUpload::init(const DoutPrefixProvider *dpp, optional_yield y,
     multipart_upload_info upload_info;
     upload_info.dest_placement = dest_placement;
     upload_info.cksum_type = cksum_type;
+    upload_info.cksum_flags = cksum_flags;
 
     if (obj_legal_hold) {
       upload_info.obj_legal_hold_exist = true;
@@ -4256,6 +4257,7 @@ int RadosMultipartUpload::get_info(const DoutPrefixProvider *dpp, optional_yield
     return -EIO;
   }
   cksum_type = upload_info.cksum_type;
+  cksum_flags = upload_info.cksum_flags;
   placement = upload_info.dest_placement;
   upload_information = upload_info;
   *rule = &placement;
diff --git a/src/rgw/rgw_cksum.cc b/src/rgw/rgw_cksum.cc
new file mode 100644 (file)
index 0000000..e6e4c5b
--- /dev/null
@@ -0,0 +1,157 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright contributors to the Ceph project
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#include "rgw_cksum.h"
+#include "rgw_cksum_digest.h"
+#include <cstdint>
+#include <memory>
+#include "rgw_crc_digest.h"
+
+extern "C" {
+#include "madler/crc64nvme.h"
+#include "madler/crc32iso_hdlc.h"
+#include "madler/crc32iscsi.h"
+#include "spdk/crc64.h"
+} // extern "C"
+
+namespace rgw::cksum {
+
+  std::optional<Cksum>
+  combine_crc_cksum(const Cksum& ck1, const Cksum& ck2, uintmax_t len1)
+  {
+    std::optional<rgw::cksum::Cksum> ck3;
+    if ((ck1.type != ck2.type) ||
+       !ck1.crc()) {
+      goto out;
+    }
+
+    switch(ck1.type) {
+    case cksum::Type::crc64nvme:
+      {
+       /* due to AWS (and other) convention, the at-rest
+        * digest is byteswapped (on LE?);  restore the
+        * defined byte order before combining */
+       auto cck1 =
+         rgw::digest::byteswap(std::get<uint64_t>(*ck1.get_crc()));
+       auto cck2 =
+         rgw::digest::byteswap(std::get<uint64_t>(*ck2.get_crc()));
+       /* madler crcany */
+       auto cck3 = crc64nvme_comb(cck1, cck2, len1);
+       /* and byteswap */
+       cck3 = rgw::digest::byteswap(cck3);
+       /* convert to a Cksum, no ascii armor */
+       ck3 = Cksum(ck1.type, (char*) &cck3, Cksum::CtorStyle::raw);
+      }
+      break;
+    case cksum::Type::crc32:
+    case cksum::Type::crc32c:
+      {
+       uint32_t cck3;
+       auto cck1 =
+         rgw::digest::byteswap(std::get<uint32_t>(*ck1.get_crc()));
+       auto cck2 =
+         rgw::digest::byteswap(std::get<uint32_t>(*ck2.get_crc()));
+       /* madler crcany */
+       switch (ck1.type) {
+       case cksum::Type::crc32:
+         cck3 =  crc32iso_hdlc_comb(cck1, cck2, len1);
+         break;
+       case cksum::Type::crc32c:
+         cck3 =  crc32iscsi_comb(cck1, cck2, len1);
+         break;
+       default:
+         break;
+       }
+        /* and byteswap */
+       cck3 = rgw::digest::byteswap(cck3);
+       /* convert to a Cksum, no ascii armor */
+       ck3 = Cksum(ck1.type, (char*) &cck3, Cksum::CtorStyle::raw);
+      }
+      break;
+    default:
+      break;
+    };
+
+  out:
+    return ck3;
+  }
+
+  /* the checksum of the final object is a checksum (of the same type,
+   * presumably) of the concatenated checksum bytes of the parts, plus
+   * "-<num-parts>.  See
+   * https://docs.aws.amazon.com/AmazonS3/latest/userguide/checking-object-integrity.html#large-object-checksums
+   */
+  class DigestCombiner : public Combiner
+  {
+    DigestVariant dv;
+    Digest* digest{nullptr};
+
+  public:
+    DigestCombiner(cksum::Type t)
+      : Combiner(t),
+       dv(digest_factory(t)),
+       digest(get_digest(dv))
+    {}
+    virtual void append(const Cksum& cksum, uint64_t ignored) {
+      auto ckr = cksum.raw();
+      digest->Update((unsigned char *)ckr.data(), ckr.length());
+    }
+    virtual Cksum final() {
+      auto cksum = finalize_digest(digest, get_type());
+      cksum.flags |= Cksum::COMPOSITE_MASK;
+      return cksum;
+    }
+  }; /* Digest */
+
+  /* the checksum of the final object is an "ordinary" (full object) checksum,
+   * synthesized from the CRCs of the component checksums
+   */
+  class CRCCombiner : public Combiner
+  {
+    Cksum cksum;
+    bool once{false};
+  public:
+    CRCCombiner(cksum::Type t)
+      : Combiner(t)
+    {}
+    virtual void append(const Cksum& rhs, uint64_t part_size) {
+      if (! once) {
+       // save the first part checksum
+       cksum = rhs;
+       once = true;
+      } else {
+       // combine the next partial checksum into cksum
+       cksum = *combine_crc_cksum(cksum, rhs, part_size);
+      }
+    }
+    virtual Cksum final() {
+      cksum.flags |= Cksum::FULL_OBJECT_MASK;
+      return cksum;
+    }
+  }; /* CRCCombine */
+
+  std::unique_ptr<Combiner> CombinerFactory(cksum::Type t, uint16_t flags)
+  {
+    switch(t) {
+    case cksum::Type::crc32:
+    case cksum::Type::crc32c:
+    case cksum::Type::crc64nvme:
+      return std::unique_ptr<Combiner>(new CRCCombiner(t));
+    default:
+      return std::unique_ptr<Combiner>(new DigestCombiner(t));
+    };
+  }
+
+} // namespace rgw::cksum
index 900a1046d19af0063f0dd4bc73ddf8dfe73e7b25..70836e7c6bacd5876592810d4765f0ea8f180984 100644 (file)
 #include <boost/algorithm/string/predicate.hpp>
 #include <cstdint>
 #include <cstring>
+#include <memory>
 #include <optional>
 #include <stdint.h>
 #include <string>
 #include <string_view>
 #include <array>
 #include <iterator>
+#include <tuple>
+#include <variant>
 #include <boost/algorithm/string.hpp>
 #include "fmt/format.h"
 #include "common/armor.h"
@@ -48,8 +51,9 @@ namespace rgw { namespace cksum {
       crc64nvme,
   };
 
-  static constexpr uint16_t FLAG_NONE =      0x0000;
-  static constexpr uint16_t FLAG_AWS_CKSUM = 0x0001;
+  static constexpr uint16_t FLAG_NONE =       0x0000;
+  static constexpr uint16_t FLAG_AWS_CKSUM =  0x0001;
+  static constexpr uint16_t FLAG_CRC =        0x0002;
 
   class Desc
   {
@@ -84,30 +88,60 @@ namespace rgw { namespace cksum {
     static constexpr std::array<Desc, 9> checksums =
     {
       Desc(Type::none, "none", 0, FLAG_NONE),
-      Desc(Type::crc32, "crc32", 4, FLAG_AWS_CKSUM),
-      Desc(Type::crc32c, "crc32c", 4, FLAG_AWS_CKSUM),
+      Desc(Type::crc32, "crc32", 4, FLAG_AWS_CKSUM|FLAG_CRC),
+      Desc(Type::crc32c, "crc32c", 4, FLAG_AWS_CKSUM|FLAG_CRC),
       Desc(Type::xxh3, "xxh3", 8, FLAG_NONE),
       Desc(Type::sha1, "sha1", 20, FLAG_AWS_CKSUM),
       Desc(Type::sha256, "sha256", 32, FLAG_AWS_CKSUM),
       Desc(Type::sha512, "sha512", 64, FLAG_NONE),
       Desc(Type::blake3, "blake3", 32, FLAG_NONE),
-      Desc(Type::crc64nvme, "crc64nvme", 8, FLAG_AWS_CKSUM),
+      Desc(Type::crc64nvme, "crc64nvme", 8, FLAG_AWS_CKSUM|FLAG_CRC),
     };
 
     static constexpr uint16_t max_digest_size = 64;
     using value_type = std::array<unsigned char, max_digest_size>;
 
+    static constexpr uint16_t FLAG_CKSUM_NONE =           0x0000;
+    static constexpr uint16_t FLAG_V2 =             0x0001; // struct_v >= 2
+    static constexpr uint16_t FLAG_COMPOSITE =      0x0002;
+    static constexpr uint16_t FLAG_FULL_OBJECT =    0x0004;
+    static constexpr uint16_t FLAG_COMBINED =       0x0008;
+
+    static constexpr uint16_t COMPOSITE_MASK =
+      (FLAG_COMBINED|FLAG_COMPOSITE);
+    static constexpr uint16_t FULL_OBJECT_MASK =
+      (FLAG_COMBINED|FLAG_FULL_OBJECT);
+
     Type type;
     value_type digest;
+    uint16_t flags;
+
+    enum class CtorStyle : uint8_t
+    {
+      raw = 0,
+      from_armored,
+    };
 
-    Cksum(Type _type = Type::none) : type(_type) {}
-    Cksum(Type _type, const char* _armored_text)
-      : type(_type) {
+    Cksum(Type _type = Type::none)
+      : type(_type), flags(FLAG_V2)
+    {}
+
+    Cksum(Type _type, const char* _data, CtorStyle style)
+      : type(_type), flags(FLAG_V2)
+    {
       const auto& ckd = checksums[uint16_t(type)];
+      switch (style) {
+      case CtorStyle::from_armored:
       (void) ceph_unarmor((char*) digest.begin(),
                          (char*) digest.begin() + ckd.digest_size,
-                         _armored_text,
-                         _armored_text + std::strlen(_armored_text));
+                         _data,
+                         _data + std::strlen(_data));
+      break;
+      case CtorStyle::raw:
+      default:
+       memcpy((char*) digest.data(), (char*) _data, ckd.digest_size);
+       break;
+      };
     }
 
     const char* type_string() const {
@@ -118,6 +152,28 @@ namespace rgw { namespace cksum {
       return (Cksum::checksums[uint16_t(type)]).aws();
     }
 
+    const bool crc() const {
+      return (Cksum::checksums[uint16_t(type)]).flags & FLAG_CRC;
+    }
+
+    const bool v2() const {
+      return flags & FLAG_V2;
+    }
+
+    const bool composite() const {
+      /* treating COMPOSITE and FULL_OBJECT as flags has issues,
+       * as does not doing it;  note that we cannot rely on crc()
+       * to mean !COMPOSITE/FULL_OBJECT, as CRC32 and CRC32C
+       * were deployed as digest checksums in 2023 (so we must be
+       * prepared to find them on disk) and per AWS can still be
+       * constructed as digests.
+       *
+       * invariant: FLAG_COMPOSITE is a property of /combined checksums/;
+       * it propogates through encode/decode, but we expect only
+       * logical combination/Combiner to set it */
+      return ((flags & COMPOSITE_MASK) == COMPOSITE_MASK);
+    }
+
     std::string aws_name() const {
       return fmt::format("x-amz-checksum-{}", type_string());
     }
@@ -164,27 +220,63 @@ namespace rgw { namespace cksum {
     }
 
     std::string to_string() const  {
-      std::string hs;
       const auto& ckd = checksums[uint16_t(type)];
       return fmt::format("{{{}}}{}", ckd.name, to_base64());
     }
 
+
+    using crc_type = std::variant<uint32_t, uint64_t>;
+
+    std::optional<crc_type> get_crc() const {
+      std::optional<crc_type> res;
+      const auto& ckd = checksums[uint16_t(type)];
+      if (!crc()) {
+       goto out;
+      }
+      switch(ckd.digest_size) {
+      case 4:
+       {
+         uint32_t crc;
+         memcpy(&crc, (char*) digest.data(), sizeof(crc));
+         res = crc;
+       }
+       break;
+      case 8:
+       {
+         uint64_t crc;
+         memcpy(&crc, (char*) digest.data(), sizeof(crc));
+         res = crc;
+       }
+       break;
+      default:
+       break;
+      }
+    out:
+      return res;
+    }
+
     void encode(buffer::list& bl) const {
       const auto& ckd = checksums[uint16_t(type)];
-      ENCODE_START(1, 1, bl);
+      ENCODE_START(2, 1, bl);
       encode(uint16_t(type), bl);
       encode(ckd.digest_size, bl);
       bl.append((char*)digest.data(), ckd.digest_size);
+      encode(flags, bl);
       ENCODE_FINISH(bl);
     }
 
     void decode(bufferlist::const_iterator& p) {
-      DECODE_START(1, p);
+      DECODE_START(2, p);
       uint16_t tt;
       decode(tt, p);
       type = cksum::Type(tt);
       decode(tt, p); /* <= max_digest_size */
       p.copy(tt, (char*)digest.data());
+      if (struct_v < 2) {
+       flags = 0;
+      } else {
+       decode(flags, p);
+      }
       DECODE_FINISH(p);
     }
   }; /* Cksum */
@@ -192,8 +284,8 @@ namespace rgw { namespace cksum {
 
   static inline const std::optional<rgw::cksum::Cksum> no_cksum{std::nullopt};
 
+  /* XXX would like std::string view */
   static inline std::string to_string(const Type type) {
-    std::string hs;
     const auto& ckd = Cksum::checksums[uint16_t(type)];
     return ckd.name;
   }
@@ -207,6 +299,22 @@ namespace rgw { namespace cksum {
     return Type::none;
   } /* parse_cksum_type */
 
+  static inline uint16_t cksum_flags_of(Type t) {
+    switch(t) {
+    case cksum::Type::none:
+      return Cksum::FLAG_CKSUM_NONE;
+      break;
+    case cksum::Type::crc64nvme:
+    case cksum::Type::crc32:
+    case cksum::Type::crc32c:
+      return Cksum::FLAG_FULL_OBJECT;
+      break;
+    default:
+      break;
+    };
+    return Cksum::FLAG_COMPOSITE;
+  } /* cksum_flags_of */
+
   static inline Type parse_cksum_type_hdr(const std::string_view hdr_name) {
     auto pos = hdr_name.find("x-amz-checksum-", 0);
     if (pos == std::string::npos) {
@@ -225,4 +333,69 @@ namespace rgw { namespace cksum {
       parse_cksum_type_hdr(hdr_name) != Type::none;
   } /* is_cksum_hdr */
 
+
+  using PermittedCksumResult
+  = std::tuple<bool, const std::string, const char*>;
+
+  static inline PermittedCksumResult
+  permitted_cksum_algo_and_type(Type type, uint16_t cksum_flags) {
+    if (type == Type::none) {
+      return PermittedCksumResult(true, "", "");
+    }
+    if (cksum_flags & Cksum::FLAG_COMPOSITE) {
+      if (type == Type::crc64nvme) {
+       return PermittedCksumResult(false, to_string(type), "COMPOSITE");
+      }
+      return PermittedCksumResult(true, to_string(type), "COMPOSITE");
+    }
+    /* FULL_OBJECT */
+    const auto& ckd = Cksum::checksums[uint16_t(type)];
+    return
+      PermittedCksumResult(
+       (ckd.flags & cksum::FLAG_CRC), to_string(type), "FULL_OBJECT");
+  }
+
+  std::optional<Cksum>
+  combine_crc_cksum(const Cksum& ck1, const Cksum& ck2, uintmax_t len2);
+
+  /* wrap combine/digest checksums */
+  class Combiner
+  {
+    cksum::Type type;
+  public:
+    Combiner(cksum::Type t)
+      : type(t) {}
+
+    cksum::Type get_type() const { return type; }
+
+    virtual void append(const Cksum& cksum, uint64_t part_size) = 0;
+    virtual Cksum final() = 0;
+    virtual ~Combiner() {}
+  }; /* abstract Combiner */
+
+  /* choose type-correct Combiner */
+  std::unique_ptr<Combiner>
+  CombinerFactory(cksum::Type t, uint16_t flags);
+
+  using ChecksumTypeResult = std::tuple<uint16_t, const char*>;
+
+  static inline ChecksumTypeResult
+  get_checksum_type(const Cksum& cksum, bool is_multipart) {
+    /* non-multipart checksum */
+    if (! is_multipart) {
+      return ChecksumTypeResult(Cksum::FLAG_CKSUM_NONE, "FULL_OBJECT");
+    }
+    /* multipart cksum */
+    if (cksum.v2()) [[likely]] {
+      if (! cksum.composite()) {
+       return ChecksumTypeResult(Cksum::FLAG_CKSUM_NONE, "FULL_OBJECT");
+      }
+      /* composite */
+      /* fall through */
+    }
+    /* cksum predates the 2025 update that introduced CRC combining,
+     * so it's a "composite" checksum regardless of the algorithm */
+    return ChecksumTypeResult(Cksum::FLAG_COMPOSITE, "COMPOSITE");
+  } /* get_checksum_type */
+
 }} /* namespace */
index da60c2071f992afd8dbe2dbbb02354bb8ee20b09..837dfac7f5aafb9b662231785dd1179715546acd 100644 (file)
@@ -14,6 +14,7 @@
  */
 
 #include "rgw_cksum_pipe.h"
+#include <cstdint>
 #include <memory>
 #include <string>
 #include <fmt/format.h>
@@ -27,16 +28,19 @@ namespace rgw::putobj {
 
   RGWPutObj_Cksum::RGWPutObj_Cksum(rgw::sal::DataProcessor* next,
                                   rgw::cksum::Type _typ,
+                                  uint16_t _flags,
                                   cksum_hdr_t&& _hdr)
     : Pipe(next),
       _type(_typ),
+      flags(_flags),
       dv(rgw::cksum::digest_factory(_type)),
       _digest(cksum::get_digest(dv)), cksum_hdr(_hdr)
   {}
 
   std::unique_ptr<RGWPutObj_Cksum> RGWPutObj_Cksum::Factory(
     rgw::sal::DataProcessor* next, const RGWEnv& env,
-    rgw::cksum::Type override_type)
+    rgw::cksum::Type override_type,
+    uint16_t cksum_flags)
   {
     /* look for matching headers */
     auto algo_header = cksum_algorithm_hdr(env);
@@ -47,7 +51,7 @@ namespace rgw::putobj {
        if (cksum_type != rgw::cksum::Type::none) {
          return
            std::make_unique<RGWPutObj_Cksum>(
-                              next, cksum_type, std::move(algo_header));
+                 next, cksum_type, cksum_flags, std::move(algo_header));
        } else {
         /* unknown checksum type requested */
         throw rgw::io::Exception(EINVAL, std::system_category());
@@ -62,7 +66,7 @@ namespace rgw::putobj {
       auto algo_header = cksum_algorithm_hdr(override_type);
       return
        std::make_unique<RGWPutObj_Cksum>(
-                          next, override_type, std::move(algo_header));
+               next, override_type, cksum_flags, std::move(algo_header));
     }
     /* no checksum requested */
     return std::unique_ptr<RGWPutObj_Cksum>();
index c459d156335422585c9339142d5aa8a23e2ba407..da79d17cdac531e672125dac524b1779a6b7c722 100644 (file)
@@ -81,6 +81,18 @@ namespace rgw::putobj {
     return cksum_hdr_t(nullptr, nullptr);
   } /* cksum_algorithm_hdr */
 
+  static inline cksum::Type
+  multipart_cksum_algo(const RGWEnv& env) {
+    /* AWS has shifted to "strong" integrity checking by default,
+     * we may define policy in future */
+    cksum::Type cksum_algo{cksum::Type::none};
+    auto algo_hdr = rgw::putobj::cksum_algorithm_hdr(env);
+    if (algo_hdr.second) {
+      cksum_algo = rgw::cksum::parse_cksum_type(algo_hdr.second);
+    }
+    return cksum_algo;
+  }
+
   using GetHeaderCksumResult = std::pair<cksum::Cksum, std::string_view>;
 
   static inline GetHeaderCksumResult get_hdr_cksum(const RGWEnv& env) {
@@ -93,7 +105,8 @@ namespace rgw::putobj {
        auto hv = env.get(hk.c_str());
        if (hv) {
          return
-           GetHeaderCksumResult(cksum::Cksum(cksum_type, hv),
+           GetHeaderCksumResult(cksum::Cksum(cksum_type, hv,
+                                  cksum::Cksum::CtorStyle::from_armored),
                                 std::string_view(hv, std::strlen(hv)));
        }
       }
@@ -114,17 +127,37 @@ namespace rgw::putobj {
       auto hv = env.get(hk.c_str());
       if (hv) {
        return
-         GetHeaderCksumResult(cksum::Cksum(cksum_type, hv),
+         GetHeaderCksumResult(cksum::Cksum(cksum_type, hv,
+                                cksum::Cksum::CtorStyle::from_armored),
                               std::string_view(hv, std::strlen(hv)));
       }
     }
     return GetHeaderCksumResult(cksum::Cksum(cksum_type), "");
   } /* find_hdr_cksum */
 
+  static inline uint16_t
+  parse_cksum_flags(cksum::Type t, boost::optional<const std::string &> type_hdr)  {
+    uint16_t cksum_flags{0};
+    if (type_hdr) {
+      if (boost::algorithm::iequals(*type_hdr, "full_object")) {
+       cksum_flags |= cksum::Cksum::FLAG_FULL_OBJECT;
+      }
+      if (boost::algorithm::iequals(*type_hdr, "composite")) {
+       cksum_flags |= cksum::Cksum::FLAG_COMPOSITE;
+      }
+    } else {
+      /* if the client sent a checksum algorithm header but not a "type" header,
+       * we can select the matching type */
+      cksum_flags |= cksum_flags_of(t);
+    }
+    return cksum_flags;
+  } /* parse_cksum_flags */
+
   // PutObj filter for streaming checksums
   class RGWPutObj_Cksum : public rgw::putobj::Pipe {
 
     cksum::Type _type;
+    uint16_t flags;
     cksum::DigestVariant dv;
     cksum::Digest* _digest;
     cksum::Cksum _cksum;
@@ -136,10 +169,10 @@ namespace rgw::putobj {
 
     static std::unique_ptr<RGWPutObj_Cksum> Factory(
       rgw::sal::DataProcessor* next, const RGWEnv&,
-      rgw::cksum::Type override_type);
+      rgw::cksum::Type override_type, uint16_t cksum_flags);
 
     RGWPutObj_Cksum(rgw::sal::DataProcessor* next, rgw::cksum::Type _type,
-                   cksum_hdr_t&& _hdr);
+                   uint16_t _flags, cksum_hdr_t&& _hdr);
     RGWPutObj_Cksum(RGWPutObj_Cksum& rhs) = delete;
     ~RGWPutObj_Cksum() {}
 
@@ -153,13 +186,17 @@ namespace rgw::putobj {
 
     const cksum::Cksum& finalize() {
       _cksum = finalize_digest(_digest, _type);
+      _cksum.flags = flags; // n.b., this may clear the COMPOSITE flag
       return _cksum;
     }
 
     const char* expected(const RGWEnv& env) {
-      auto hk = fmt::format("HTTP_X_AMZ_CHECKSUM_{}", cksum_hdr.second);
-      auto hv = env.get(hk.c_str());
-      return hv;
+      if (cksum_hdr.second) {
+       auto hk = fmt::format("HTTP_X_AMZ_CHECKSUM_{}", cksum_hdr.second);
+       auto hv = env.get(hk.c_str());
+       return hv;
+      }
+      return nullptr;
     }
 
     VerifyResult verify(const RGWEnv& env) {
index bfb3690515e623b471e077a9b0aeb0c548654b4f..7c03e49b52a05801fec5b0fe0c31911b6bde5add 100644 (file)
@@ -1539,9 +1539,10 @@ struct multipart_upload_info
   RGWObjectRetention obj_retention;
   RGWObjectLegalHold obj_legal_hold;
   rgw::cksum::Type cksum_type {rgw::cksum::Type::none};
+  uint16_t cksum_flags{rgw::cksum::Cksum::FLAG_CKSUM_NONE};
 
   void encode(bufferlist& bl) const {
-    ENCODE_START(3, 1, bl);
+    ENCODE_START(4, 1, bl);
     encode(dest_placement, bl);
     encode(obj_retention_exist, bl);
     encode(obj_legal_hold_exist, bl);
@@ -1549,11 +1550,12 @@ struct multipart_upload_info
     encode(obj_legal_hold, bl);
     uint16_t ct{uint16_t(cksum_type)};
     encode(ct, bl);
+    encode(cksum_flags, bl);
     ENCODE_FINISH(bl);
   }
 
   void decode(bufferlist::const_iterator& bl) {
-    DECODE_START_LEGACY_COMPAT_LEN(3, 1, 1, bl);
+    DECODE_START_LEGACY_COMPAT_LEN(4, 1, 1, bl);
     decode(dest_placement, bl);
     if (struct_v >= 2) {
       decode(obj_retention_exist, bl);
@@ -1564,6 +1566,9 @@ struct multipart_upload_info
        uint16_t ct;
        decode(ct, bl);
        cksum_type = rgw::cksum::Type(ct);
+       if (struct_v >= 4) {
+         decode(cksum_flags, bl);
+       }
       }
     } else {
       obj_retention_exist = false;
index 15e488152cd0278b4833ead29de3a9e5fcb1c40b..5ca77f0792ec2d8fc40d39b1a7aebc340a850ca9 100644 (file)
@@ -98,7 +98,7 @@ namespace rgw { namespace digest {
 
   public:
     static constexpr uint16_t digest_size = 8;
-    static constexpr uint64_t initial_value = 0x0;
+    static constexpr uint64_t initial_value = 0ULL;
 
     Crc64Nvme() { Restart(); }
 
@@ -109,6 +109,10 @@ namespace rgw { namespace digest {
     }
 
     void Final(unsigned char* digest) {
+      /* due to AWS (and other) convention, the at-rest
+       * digest is byteswapped (on LE?); */
+      /* XXX is this really endian specific? don't want
+       * break ARM */
       if constexpr (std::endian::native != std::endian::big) {
        crc = rgw::digest::byteswap(crc);
       }
index 750345c45bf8082de65cb06c3aa7ea0ab933b8bc..54ae09c4b38b1975ee5b1a44ade3d7b3d46416ac 100644 (file)
@@ -15,6 +15,7 @@
 #include <boost/utility/in_place_factory.hpp>
 #include <fmt/format.h>
 
+#include "common/dout.h"
 #include "include/scope_guard.h"
 #include "common/Clock.h"
 #include "common/armor.h"
@@ -4359,6 +4360,7 @@ void RGWPutObj::execute(optional_yield y)
     }
 
     multipart_cksum_type = upload->cksum_type;
+    multipart_cksum_flags = upload->cksum_flags;
 
     /* upload will go out of scope, so copy the dest placement for later use */
     s->dest_placement = *pdest_placement;
@@ -4490,7 +4492,10 @@ void RGWPutObj::execute(optional_yield y)
     /* optional streaming checksum */
     try {
       cksum_filter =
-       rgw::putobj::RGWPutObj_Cksum::Factory(filter, *s->info.env, multipart_cksum_type);
+       rgw::putobj::RGWPutObj_Cksum::Factory(
+               filter, *s->info.env,
+              multipart_cksum_type,
+              multipart_cksum_flags);
     } catch (const rgw::io::Exception& e) {
       op_ret = -e.code().value();
       return;
@@ -4642,6 +4647,7 @@ void RGWPutObj::execute(optional_yield y)
 
   if (cksum_filter) {
     const auto& hdr = cksum_filter->header();
+
     auto expected_ck = cksum_filter->expected(*s->info.env);
     auto cksum_verify =
       cksum_filter->verify(*s->info.env); // valid or no supplied cksum
@@ -4653,7 +4659,7 @@ void RGWPutObj::execute(optional_yield y)
       ldpp_dout_fmt(this, 16,
                    "{} checksum verified "
                    "\n\tcomputed={} == \n\texpected={}",
-                   hdr.second,
+                   (hdr.second) ? hdr.second : "(no supplied checksum header)",
                    cksum->to_armor(),
                    (!!expected_ck) ? expected_ck : "(checksum unavailable)");
 
@@ -4670,7 +4676,7 @@ void RGWPutObj::execute(optional_yield y)
                    computed_ck,
                    (!!expected_ck) ? expected_ck : "(checksum unavailable)");
 
-      op_ret = -ERR_INVALID_REQUEST;
+      op_ret = -ERR_BAD_DIGEST;
       return;
     }
   }
@@ -4865,7 +4871,8 @@ void RGWPostObj::execute(optional_yield y)
     try {
       cksum_filter =
        rgw::putobj::RGWPutObj_Cksum::Factory(
-         filter, *s->info.env, rgw::cksum::Type::none /* no override */);
+               filter, *s->info.env, rgw::cksum::Type::none /* no override */,
+              rgw::cksum::Cksum::FLAG_CKSUM_NONE);
     } catch (const rgw::io::Exception& e) {
       op_ret = -e.code().value();
       return;
@@ -4979,7 +4986,7 @@ void RGWPostObj::execute(optional_yield y)
                      cksum->to_armor(),
                      cksum_filter->expected(*s->info.env));
 
-        op_ret = -ERR_INVALID_REQUEST;
+        op_ret = -ERR_BAD_DIGEST;
         return;
       }
     }
@@ -6672,18 +6679,24 @@ void RGWInitMultipart::execute(optional_yield y)
   std::unique_ptr<rgw::sal::MultipartUpload> upload;
   upload = s->bucket->get_multipart_upload(s->object->get_name(),
                                       upload_id);
+
+  /* apparently, we are assured of upload */
   upload->obj_legal_hold = obj_legal_hold;
   upload->obj_retention = obj_retention;
   upload->cksum_type = cksum_algo;
-  op_ret = upload->init(this, s->yield, s->owner, s->dest_placement, attrs);
+  /* cksum_flags have been validated against algorithm, so, e.g., if
+   * FLAG_COMPOSITE is set, the algorithm can be used with a composite/
+   * digest checksum (e.g., it's not CRC64NVME) */
+  upload->cksum_flags = cksum_flags;
 
+  op_ret = upload->init(this, s->yield, s->owner, s->dest_placement, attrs);
   if (op_ret == 0) {
     upload_id = upload->get_upload_id();
   }
   s->trace->SetAttribute(tracing::rgw::UPLOAD_ID, upload_id);
   multipart_trace->UpdateName(tracing::rgw::MULTIPART + upload_id);
 
-}
+} /* RGWInitMultipart::execute() */
 
 int RGWCompleteMultipart::verify_permission(optional_yield y)
 {
@@ -6713,6 +6726,7 @@ try_sum_part_cksums(const DoutPrefixProvider *dpp,
                    rgw::sal::MultipartUpload* upload,
                    RGWMultiCompleteUpload* parts,
                    std::optional<rgw::cksum::Cksum>& out_cksum,
+                   std::optional<std::string>& armored_cksum,
                    optional_yield y)
 {
   /* 1. need checksum-algorithm header (if invalid, fail)
@@ -6735,6 +6749,7 @@ try_sum_part_cksums(const DoutPrefixProvider *dpp,
   auto num_parts = int(parts->parts.size());
 
   rgw::cksum::Type& cksum_type = upload->cksum_type;
+  uint16_t cksum_flags = upload->cksum_flags;
 
   int again_count{0};
  again:
@@ -6761,8 +6776,7 @@ try_sum_part_cksums(const DoutPrefixProvider *dpp,
     return 0;
   }
 
-  rgw::cksum::DigestVariant dv = rgw::cksum::digest_factory(cksum_type);
-  rgw::cksum::Digest* digest = rgw::cksum::get_digest(dv);
+  auto cksum_combiner = rgw::cksum::CombinerFactory(cksum_type, cksum_flags);
 
   /* returns the parts (currently?) in cache */
   auto parts_ix{0};
@@ -6787,12 +6801,12 @@ try_sum_part_cksums(const DoutPrefixProvider *dpp,
     if ((part_cksum->type != cksum_type)) {
       /* if parts have inconsistent checksum, fail now */
 
-    ldpp_dout_fmt(dpp, 14,
-                 "ERROR: multipart part checksum type mismatch\n\tcomplete "
-                 "multipart header={} part={}",
-                 to_string(part_cksum->type), to_string(cksum_type));
+      ldpp_dout_fmt(dpp, 14,
+                   "ERROR: multipart part checksum type mismatch\n\tcomplete "
+                   "multipart header={} part={}",
+                   to_string(part_cksum->type), to_string(cksum_type));
 
-    op_ret = -ERR_INVALID_REQUEST;
+      op_ret = -ERR_INVALID_REQUEST;
       return op_ret;
     }
 
@@ -6801,18 +6815,27 @@ try_sum_part_cksums(const DoutPrefixProvider *dpp,
      * "-<num-parts>.  See
      * https://docs.aws.amazon.com/AmazonS3/latest/userguide/checking-object-integrity.html#large-object-checksums
      */
-    auto ckr = part_cksum->raw();
-    digest->Update((unsigned char *)ckr.data(), ckr.length());
+    cksum_combiner->append(*part_cksum, part.second->get_size());
+
   } /* all-parts */
 
   /* we cannot verify this checksum, only compute it */
-  out_cksum = rgw::cksum::finalize_digest(digest, cksum_type);
+  out_cksum = cksum_combiner->final();
+
+  armored_cksum = [&]() -> std::string {
+    std::string armor = out_cksum->to_armor();
+    if (out_cksum->composite()) {
+      armor += fmt::format("-{}", num_parts);
+    }
+    return armor;
+  }();
 
   ldpp_dout_fmt(dpp, 16,
-               "INFO: {} combined checksum {} {}-{}",
+               "INFO: {} combined checksum {} {}",
                __func__,
                out_cksum->type_string(),
-               out_cksum->to_armor(), num_parts);
+               out_cksum->to_armor(),
+               *armored_cksum);
 
   return op_ret;
 } /* try_sum_part_chksums */
@@ -6929,7 +6952,8 @@ void RGWCompleteMultipart::execute(optional_yield y)
 
   /* checksum computation */
   if (upload->cksum_type != rgw::cksum::Type::none) {
-    op_ret = try_sum_part_cksums(this, s->cct, upload.get(), parts, cksum, y);
+    op_ret = try_sum_part_cksums(this, s->cct, upload.get(), parts, cksum,
+                                armored_cksum, y);
     if (op_ret < 0) {
       ldpp_dout(this, 16) << "ERROR: try_sum_part_cksums failed, obj="
                          << meta_obj << " ret=" << op_ret << dendl;
@@ -6949,9 +6973,6 @@ void RGWCompleteMultipart::execute(optional_yield y)
   auto& target_attrs = meta_obj->get_attrs();
 
   if (cksum) {
-    armored_cksum =
-      fmt::format("{}-{}", cksum->to_armor(), parts->parts.size());
-
     /* validate computed checksum against supplied checksum, if present */
     auto [hdr_cksum, supplied_cksum] =
       rgw::putobj::find_hdr_cksum(*(s->info.env));
@@ -6960,19 +6981,37 @@ void RGWCompleteMultipart::execute(optional_yield y)
                    "INFO: client supplied checksum {}: {} ",
                    hdr_cksum.header_name(), supplied_cksum);
 
-    if (! (supplied_cksum.empty()) &&
-       (supplied_cksum != armored_cksum)) {
-      /* some minio SDK clients assert a checksum that is cryptographically
-       * valid but omits the part count */
-      auto parts_suffix = fmt::format("-{}", parts->parts.size());
-      auto suffix_len = armored_cksum->size() - parts_suffix.size();
-      if (armored_cksum->compare(0, suffix_len, supplied_cksum) != 0) {
-       ldpp_dout_fmt(this, 4,
-                     "{} content checksum mismatch"
-                     "\n\tcalculated={} != \n\texpected={}",
-                     hdr_cksum.header_name(), armored_cksum, supplied_cksum);
-       op_ret = -ERR_INVALID_REQUEST;
-       return;
+    /* in late 2024, we observed some minio SDK clients assert a checksum that
+     * was a cryptographically valid *digest*, but omitted the part count;
+     *
+     * in addition, from 2025, AWS specifies that multipart uploads using CRC
+     * checksums may (CRC32, CRC32c) or must (CRC64NVME) be computed as full
+     * object checksums [1], so perhaps should not suffix a part count.
+     *
+     * for the present, it appears safe to accept both forms regardless of the
+     * checksum algorithm.
+     *
+     * [1] https://docs.aws.amazon.com/AmazonS3/latest/userguide/checking-object-integrity.html
+     */
+
+    if (! supplied_cksum.empty()) {
+      const std::string_view acm = *armored_cksum;
+      const std::string_view scm = supplied_cksum;
+      if (scm != acm) {
+        auto c_len = acm.length();
+        if (cksum->composite()) {
+          auto parts_suffix = fmt::format("-{}", parts->parts.size());
+          c_len -= parts_suffix.length();
+        }
+        auto c_res = scm.compare(0, c_len, acm, 0, c_len);
+        if (c_res != 0) {
+          ldpp_dout_fmt(this, 4,
+                        "{} content checksum mismatch"
+                        "\n\tcalculated={} != \n\texpected={}",
+                        hdr_cksum.header_name(), armored_cksum, supplied_cksum);
+          op_ret = -ERR_BAD_DIGEST;
+          return;
+        }
       }
     }
 
index cbe441c140bfdf5883812ad6964108a7e367026f..d0c3505cb69dc689b7e13d3d6e9e18d1fde62f3f 100644 (file)
@@ -1240,6 +1240,7 @@ protected:
   std::string multipart_part_str;
   int multipart_part_num = 0;
   rgw::cksum::Type multipart_cksum_type{rgw::cksum::Type::none};
+  uint16_t multipart_cksum_flags{rgw::cksum::Cksum::FLAG_CKSUM_NONE};
   jspan_ptr multipart_trace;
 
   boost::optional<ceph::real_time> delete_at;
@@ -1914,6 +1915,7 @@ protected:
   std::optional<RGWObjectLegalHold> obj_legal_hold = std::nullopt;
   rgw::sal::Attrs attrs;
   rgw::cksum::Type cksum_algo{rgw::cksum::Type::none};
+  uint16_t cksum_flags{rgw::cksum::Cksum::FLAG_CKSUM_NONE};
 
 public:
   RGWInitMultipart() {}
index 14185c4de1df34f75f51107ebba17ce494da0ca3..e92bcb1009a5dc6baf73fbfc9a2d490699bf6b88 100644 (file)
@@ -509,12 +509,29 @@ int RGWGetObj_ObjStore_S3::send_response_data(bufferlist& bl, off_t bl_ofs,
        try {
          rgw::cksum::Cksum cksum;
          decode(cksum, i->second);
-         if (multipart_parts_count && multipart_parts_count > 0) {
+         auto cksum_type =
+           rgw::cksum::get_checksum_type(cksum,
+             (multipart_parts_count && multipart_parts_count > 0) /* is_multipart */);
+         if (std::get<0>(cksum_type) == rgw::cksum::Cksum::FLAG_COMPOSITE) {
+           /* cksum was computed with a digest algorithm, or predates the 2025
+              update that introduced CRC combining */
+           ldpp_dout_fmt(this, 16,
+                         "INFO: {} ChecksumMode==ENABLED element-name {} value {}-{}",
+                         __func__, cksum.element_name(), cksum.to_armor(),
+                         *multipart_parts_count);
            dump_header(s, cksum.header_name(),
                        fmt::format("{}-{}", cksum.to_armor(), *multipart_parts_count));
          } else {
-           dump_header(s, cksum.header_name(), cksum.to_armor());
+           /* a full object checksum, if multipart, because the checksum is CRC family */
+           auto elt_name = cksum.element_name();
+           auto armored_cksum = cksum.to_armor();
+
+           ldpp_dout_fmt(this, 16,
+                         "INFO: {} ChecksumMode==ENABLED element-name {} value {}",
+                         __func__, cksum.element_name(), cksum.to_armor());
+           dump_header(s, cksum.header_name(), armored_cksum);
          }
+         dump_header(s, "x-amz-checksum-type", std::get<1>(cksum_type));
        }  catch (buffer::error& err) {
          ldpp_dout(this, 0) << "ERROR: failed to decode rgw::cksum::Cksum"
                             << dendl;
@@ -2877,6 +2894,7 @@ void RGWPutObj_ObjStore_S3::send_response()
       dump_header_if_nonempty(s, "x-amz-version-id", version_id);
       dump_header_if_nonempty(s, "x-amz-expiration", expires);
       if (cksum && cksum->aws()) {
+       dump_header(s, "x-amz-checksum-type", "FULL_OBJECT");
        dump_header(s, cksum->header_name(), cksum->to_armor());
       }
       for (auto &it : crypt_http_responses)
@@ -2885,7 +2903,8 @@ void RGWPutObj_ObjStore_S3::send_response()
       dump_errno(s);
       dump_header_if_nonempty(s, "x-amz-version-id", version_id);
       dump_header_if_nonempty(s, "x-amz-expiration", expires);
-      if (cksum) {
+      if (cksum && cksum->aws()) {
+       dump_header(s, "x-amz-checksum-type", "FULL_OBJECT");
        dump_header(s, cksum->header_name(), cksum->to_armor());
       }
       end_header(s, this, to_mime_type(s->format));
@@ -4006,12 +4025,20 @@ void RGWGetObjAttrs_ObjStore_S3::send_response()
          rgw::cksum::Cksum cksum;
          auto bliter = iter->second.cbegin();
          cksum.decode(bliter);
-          if (multipart_parts_count && multipart_parts_count > 0) {
+         auto cksum_type =
+           rgw::cksum::get_checksum_type(cksum,
+             (multipart_parts_count && multipart_parts_count > 0) /* is_multipart */);
+         if (std::get<0>(cksum_type) == rgw::cksum::Cksum::FLAG_COMPOSITE) {
+           /* cksum was computed with a digest algorithm, or predates the 2025
+              update that introduced CRC combining */
            s->formatter->dump_string(cksum.element_name(),
-               fmt::format("{}-{}", cksum.to_armor(), *multipart_parts_count));
+                                     fmt::format("{}-{}", cksum.to_armor(),
+                                                 *multipart_parts_count));
          } else {
+           /* a full object checksum, if multipart, because the checksum is CRC family */
            s->formatter->dump_string(cksum.element_name(), cksum.to_armor());
          }
+         s->formatter->dump_string("ChecksumType", std::get<1>(cksum_type));
        } catch (buffer::error& err) {
          ldpp_dout(this, 0)
            << "ERROR: could not decode stored cksum, caught buffer::error" << dendl;
@@ -4053,6 +4080,7 @@ void RGWGetObjAttrs_ObjStore_S3::send_response()
              s->formatter->dump_int("PartNumber", part.part_number);
              s->formatter->dump_unsigned("Size", part.part_size);
              if (part.cksum.type != rgw::cksum::Type::none) {
+               /* parts always have a non-digest checksum */
                s->formatter->dump_string(part.cksum.element_name(), part.cksum.to_armor());
              }
              s->formatter->close_section(); /* Part */
@@ -4467,13 +4495,24 @@ int RGWInitMultipart_ObjStore_S3::get_params(optional_yield y)
     return -ERR_INVALID_REQUEST;
   }
 
-  auto algo_hdr = rgw::putobj::cksum_algorithm_hdr(*(s->info.env));
-  if (algo_hdr.second) {
-    cksum_algo = rgw::cksum::parse_cksum_type(algo_hdr.second);
+  /* checksums */
+  auto checksum_type_hdr =
+    s->info.env->get_optional("HTTP_X_AMZ_CHECKSUM_TYPE");
+
+  /* composite or "full object" */
+  cksum_algo = putobj::multipart_cksum_algo(*(s->info.env));
+  cksum_flags = putobj::parse_cksum_flags(cksum_algo, checksum_type_hdr);
+
+  auto aok = cksum::permitted_cksum_algo_and_type(cksum_algo, cksum_flags);
+  if (! std::get<0>(aok)) {
+    ldpp_dout_fmt(this, 5,
+                 "ERROR: {} checksum type {} not compatible with checksum algorithm {}",
+                 __func__, std::get<1>(aok), std::get<2>(aok));
+    return -ERR_INVALID_REQUEST;
   }
 
   return 0;
-}
+} /* RGWInitMultipart_ObjStore_S3::get_params() */
 
 void RGWInitMultipart_ObjStore_S3::send_response()
 {
@@ -4555,8 +4594,11 @@ void RGWCompleteMultipart_ObjStore_S3::send_response()
     s->formatter->dump_string("Bucket", s->bucket_name);
     s->formatter->dump_string("Key", s->object->get_name());
     s->formatter->dump_string("ETag", etag);
-    if (armored_cksum) {
+    if (armored_cksum) [[likely]] {
+      auto cksum_type
+       = rgw::cksum::get_checksum_type(*cksum, true /* is_multipart */);
       s->formatter->dump_string(cksum->element_name(), *armored_cksum);
+      s->formatter->dump_string("ChecksumType", std::get<1>(cksum_type));
     }
     s->formatter->close_section();
     rgw_flush_formatter_and_reset(s, s->formatter);
index 45749c228f749577612bae927f7130253098adc6..18ab24732885a62fe3bf0bffed8b842749feab73 100644 (file)
@@ -1458,8 +1458,12 @@ public:
   //object lock
   std::optional<RGWObjectRetention> obj_retention = std::nullopt;
   std::optional<RGWObjectLegalHold> obj_legal_hold = std::nullopt;
+
   rgw::cksum::Type cksum_type = rgw::cksum::Type::none;
 
+  // only a few (currently CRC) checksums are not composite
+  uint16_t cksum_flags{rgw::cksum::Cksum::FLAG_COMPOSITE};
+
   MultipartUpload() = default;
   virtual ~MultipartUpload() = default;
 
index 305d8392508bbb3a2e1612162772239af3295879..72691f892980bbdf0e80e529a306335d5a74a124 100644 (file)
@@ -166,4 +166,5 @@ spdk_crc64_nvme(const void *buf, size_t len, uint64_t crc)
 {
        return crc64_rocksoft_refl_base(crc, (const uint8_t *)buf, len);
 }
+
 #endif
index 4167d8779e9737ccd1bc261e5353673e59636916..53ae9818e003cda40a35300529a024026868198e 100644 (file)
  *
  */
 
+#include <cstdint>
 #include <errno.h>
 #include <iostream>
 #include <fstream>
 #include <string>
+#include <utility>
 
 #include "gtest/gtest.h"
 
 #include <openssl/sha.h>
 #include "rgw/rgw_hex.h"
 
+extern "C" {
+#include "madler/crc64nvme.h"
+#include "madler/crc32iso_hdlc.h"
+#include "madler/crc32iscsi.h"
+#include "spdk/crc64.h"
+} // extern "C"
+
 #define dout_subsys ceph_subsys_rgw
 
 namespace {
@@ -52,6 +61,9 @@ namespace {
   std::string dolor =
     R"(Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.)";
 
+std::string lacrimae = dolor + dolor;
+std::string dolorem = dolor + lorem;
+
 TEST(RGWCksum, Output)
 {
   if (gen_test_data) {
@@ -67,6 +79,16 @@ TEST(RGWCksum, Output)
     of.open("/tmp/dolor", o_mode);
     of << dolor;
     of.close();
+
+    std::cout << "writing lacrimae text to /tmp/lacrimae " << std::endl;
+    of.open("/tmp/lacrimae", o_mode);
+    of << lacrimae;
+    of.close();
+
+    std::cout << "writing dolorem text to /tmp/dolorem " << std::endl;
+    of.open("/tmp/dolorem", o_mode);
+    of << dolorem;
+    of.close();
   }
 }
  
@@ -303,8 +325,6 @@ TEST(RGWCksum, DigestSTR)
 
 TEST(RGWCksum, DigestBL)
 {
-  std::string lacrimae = dolor + dolor;
-
   ceph::buffer::list dolor_bl;
   for ([[maybe_unused]] const auto& ix : {1, 2}) {
     dolor_bl.push_back(
@@ -397,21 +417,319 @@ TEST(RGWCksum, CRC64NVME1)
   ASSERT_TRUE(crc == 0x9A2DF64B8E9E517E);
 }
 
+TEST(RGWCksum, CRC64NVME_UNDIGEST)
+{
+  auto t = cksum::Type::crc64nvme;
+
+  /* digest 1 */
+  DigestVariant dv1 = rgw::cksum::digest_factory(t);
+  Digest *digest1 = get_digest(dv1);
+  ASSERT_NE(digest1, nullptr);
+
+  digest1->Update((const unsigned char *)lacrimae.c_str(), lacrimae.length());
+
+  auto cksum1 = rgw::cksum::finalize_digest(digest1, t);
+
+  uint64_t crc1 = rgw::digest::byteswap(std::get<uint64_t>(*cksum1.get_crc()));
+
+  uint64_t crc2 = spdk_crc64_nvme((const unsigned char *)lacrimae.c_str(),
+                                 lacrimae.length(), 0ULL);
+  ASSERT_EQ(crc1, crc2);
+}
+
 TEST(RGWCksum, CRC64NVME2)
 {
   auto t = cksum::Type::crc64nvme;
-  DigestVariant dv = rgw::cksum::digest_factory(t);
-  Digest *digest = get_digest(dv);
-  ASSERT_NE(digest, nullptr);
 
-  digest->Update((const unsigned char *)dolor.c_str(), dolor.length());
+  /* digest 1 */
+  DigestVariant dv1 = rgw::cksum::digest_factory(t);
+  Digest *digest1 = get_digest(dv1);
+  ASSERT_NE(digest1, nullptr);
 
-  auto cksum = rgw::cksum::finalize_digest(digest, t);
+  digest1->Update((const unsigned char *)dolor.c_str(), dolor.length());
+
+  auto cksum1 = rgw::cksum::finalize_digest(digest1, t);
+
+  /* the armored value produced by awscliv2 2.24.5 */
+  ASSERT_EQ(cksum1.to_armor(), "wiBA+PSv41M=");
+
+  /* digest 2 */
+  DigestVariant dv2 = rgw::cksum::digest_factory(t);
+  Digest* digest2 = get_digest(dv2);
+  ASSERT_NE(digest2, nullptr);
+
+  digest2->Update((const unsigned char *)lacrimae.c_str(), lacrimae.length());
+
+  auto cksum2 = rgw::cksum::finalize_digest(digest2, t);
 
   /* the armored value produced by awscliv2 2.24.5 */
-  ASSERT_EQ(cksum.to_armor(), "wiBA+PSv41M=");
+  ASSERT_EQ(cksum2.to_armor(), "oa2U66pdPLk=");
+}
+
+TEST(RGWCksum, CRC64NVME_COMBINE1)
+{
+  /* do crc64nvme and combining by hand */
+
+  uint64_t crc1 = spdk_crc64_nvme((const unsigned char *)dolor.c_str(),
+                                 dolor.length(), 0ULL);
+
+  uint64_t crc2 = spdk_crc64_nvme((const unsigned char *)lacrimae.c_str(),
+                                 lacrimae.length(), 0ULL);
+
+  uint64_t crc4 =  crc64nvme_comb(crc1, crc1, dolor.length());
+
+  ASSERT_EQ(crc2, crc4);
+}
+
+TEST(RGWCksum, CRC64NVME_COMBINE2)
+{
+  /* do crc64nvme and combining by hand, non-uniform strings */
+
+  uint64_t crc1 = spdk_crc64_nvme((const unsigned char *)dolor.c_str(),
+                                 dolor.length(), 0ULL);
+  
+  uint64_t crc2 = spdk_crc64_nvme((const unsigned char *)lorem.c_str(),
+                                 lorem.length(), 0ULL);
+
+  uint64_t crc3 = spdk_crc64_nvme((const unsigned char *)dolorem.c_str(),
+                                 dolorem.length(), 0ULL);
+  
+  uint64_t crc4 = crc64nvme_comb(crc1, crc2, lorem.length());
+
+  if (verbose) {
+    std::cout << "\ncrc1/dolor: " << crc1
+             << "\ncrc2/lorem: " << crc2
+             << "\ncrc3/dolorem: " << crc3
+             << "\ncrc4/crc1+crc2: " << crc4
+             << std::endl;
+  }
+
+  ASSERT_EQ(crc3, crc4);
 }
 
+TEST(RGWCksum, CRC64NVME_COMBINE3)
+{
+  auto t = cksum::Type::crc64nvme;
+
+  DigestVariant dv1 = rgw::cksum::digest_factory(t);
+  Digest* digest1 = get_digest(dv1);
+  ASSERT_NE(digest1, nullptr);
+
+  DigestVariant dv2 = rgw::cksum::digest_factory(t);
+  Digest* digest2 = get_digest(dv2);
+  ASSERT_NE(digest2, nullptr);
+
+  DigestVariant dv3 = rgw::cksum::digest_factory(t);
+  Digest* digest3 = get_digest(dv3);
+  ASSERT_NE(digest3, nullptr);
+
+  /* dolor */
+  digest1->Update((const unsigned char *)dolor.c_str(), dolor.length());
+  auto cksum1 = rgw::cksum::finalize_digest(digest1, t);
+
+  uint64_t spdk_crc1 = spdk_crc64_nvme((const unsigned char *)dolor.c_str(),
+                                      dolor.length(), 0ULL);
+  auto cksum_crc1 =
+    rgw::digest::byteswap(std::get<uint64_t>(*cksum1.get_crc()));
+
+  ASSERT_EQ(cksum_crc1, spdk_crc1);
+
+  /* lorem */
+  digest2->Update((const unsigned char *)lorem.c_str(), lorem.length());
+  auto cksum2 = rgw::cksum::finalize_digest(digest2, t);
+
+  uint64_t spdk_crc2 = spdk_crc64_nvme((const unsigned char *)lorem.c_str(),
+                                      lorem.length(), 0ULL);
+  auto cksum_crc2 =
+    rgw::digest::byteswap(std::get<uint64_t>(*cksum2.get_crc()));
+
+  ASSERT_EQ(cksum_crc2, spdk_crc2);
+
+  /* dolorem */
+  digest3->Update((const unsigned char *)dolorem.c_str(), dolorem.length());
+  auto cksum3 = rgw::cksum::finalize_digest(digest3, t);
+
+  uint64_t spdk_crc3 = spdk_crc64_nvme((const unsigned char *)dolorem.c_str(),
+                                      dolorem.length(), 0ULL);
+  auto cksum_crc3 =
+    rgw::digest::byteswap(std::get<uint64_t>(*cksum3.get_crc()));
+
+  ASSERT_EQ(cksum_crc3, spdk_crc3);
+
+  /* API combine check */
+  auto cksum4 = rgw::cksum::combine_crc_cksum(cksum1, cksum2, lorem.length());
+  ASSERT_TRUE(cksum4);
+
+  auto cksum_crc4 =
+    rgw::digest::byteswap(std::get<uint64_t>(*cksum4->get_crc()));
+
+  auto armor3 = cksum3.to_armor();
+  auto armor4 = cksum4->to_armor();
+
+  if (verbose) {
+    std::cout << "\ncrc1/dolor spdk: " << spdk_crc1
+             << " cksum_crc1: " << cksum_crc1
+             << "\ncrc2/lorem spdk: " << spdk_crc2
+             << " cksum_crc2: " << cksum_crc2
+             << "\ncrc3/dolorem spdk: " << spdk_crc3
+             << " cksum_crc3: " << cksum_crc3
+             << " cksum_crc3 armored: " << armor3
+             << "\ncrc4/crc1+crc2: " << cksum_crc4
+             << " crc4 armored: " << armor4
+             << std::endl;
+  }
+
+  /* the CRC of dolor+lorem == gf combination of cksum1 and cksum2 */
+  ASSERT_EQ(cksum3.to_armor(), cksum4->to_armor());
+} /* crc64nvme */
+
+TEST(RGWCksum, CRC32_COMBINE3)
+{
+  auto t = cksum::Type::crc32;
+
+  DigestVariant dv1 = rgw::cksum::digest_factory(t);
+  Digest* digest1 = get_digest(dv1);
+  ASSERT_NE(digest1, nullptr);
+
+  DigestVariant dv2 = rgw::cksum::digest_factory(t);
+  Digest* digest2 = get_digest(dv2);
+  ASSERT_NE(digest2, nullptr);
+
+  DigestVariant dv3 = rgw::cksum::digest_factory(t);
+  Digest* digest3 = get_digest(dv3);
+  ASSERT_NE(digest3, nullptr);
+
+  /* dolor */
+  digest1->Update((const unsigned char *)dolor.c_str(), dolor.length());
+  auto cksum1 = rgw::cksum::finalize_digest(digest1, t);
+
+  uint32_t madler_crc1 =
+    crc32iso_hdlc_word(0U, (const unsigned char *)dolor.c_str(),
+                      dolor.length());
+
+  auto cksum_crc1 =
+    rgw::digest::byteswap(std::get<uint32_t>(*cksum1.get_crc()));
+
+  ASSERT_EQ(cksum_crc1, madler_crc1);
+
+  /* lorem */
+  digest2->Update((const unsigned char *)lorem.c_str(), lorem.length());
+  auto cksum2 = rgw::cksum::finalize_digest(digest2, t);
+
+  uint32_t madler_crc2 =
+    crc32iso_hdlc_word(0U, (const unsigned char *)lorem.c_str(),
+                      lorem.length());
+  auto cksum_crc2 =
+    rgw::digest::byteswap(std::get<uint32_t>(*cksum2.get_crc()));
+
+  ASSERT_EQ(cksum_crc2, madler_crc2);
+
+  /* dolorem */
+  digest3->Update((const unsigned char *)dolorem.c_str(), dolorem.length());
+  auto cksum3 = rgw::cksum::finalize_digest(digest3, t);
+
+  uint32_t madler_crc3 =
+    crc32iso_hdlc_word(0U, (const unsigned char *)dolorem.c_str(),
+                      dolorem.length());
+  auto cksum_crc3 =
+    rgw::digest::byteswap(std::get<uint32_t>(*cksum3.get_crc()));
+
+  ASSERT_EQ(cksum_crc3, madler_crc3);
+
+  /* API combine check */
+  auto cksum4 = rgw::cksum::combine_crc_cksum(cksum1, cksum2, lorem.length());
+  ASSERT_TRUE(cksum4);
+
+  auto cksum_crc4 =
+    rgw::digest::byteswap(std::get<uint32_t>(*cksum4->get_crc()));
+
+  if (verbose) {
+    std::cout << "\ncrc1/dolor spdk: " << madler_crc1
+             << " cksum_crc1: " << cksum_crc1
+             << "\ncrc2/lorem spdk: " << madler_crc2
+             << " cksum_crc2: " << cksum_crc2
+             << "\ncrc3/dolorem spdk: " << madler_crc3
+             << " cksum_crc3: " << cksum_crc3
+             << "\ncrc4/crc1+crc2: " << cksum_crc4
+             << std::endl;
+  }
+
+  /* the CRC of dolor+lorem == gf combination of cksum1 and cksum2 */
+  ASSERT_EQ(cksum3.to_armor(), cksum4->to_armor());
+} /* crc32 */
+
+TEST(RGWCksum, CRC32C_COMBINE3)
+{
+  auto t = cksum::Type::crc32c;
+
+  DigestVariant dv1 = rgw::cksum::digest_factory(t);
+  Digest* digest1 = get_digest(dv1);
+  ASSERT_NE(digest1, nullptr);
+
+  DigestVariant dv2 = rgw::cksum::digest_factory(t);
+  Digest* digest2 = get_digest(dv2);
+  ASSERT_NE(digest2, nullptr);
+
+  DigestVariant dv3 = rgw::cksum::digest_factory(t);
+  Digest* digest3 = get_digest(dv3);
+  ASSERT_NE(digest3, nullptr);
+
+  /* dolor */
+  digest1->Update((const unsigned char *)dolor.c_str(), dolor.length());
+  auto cksum1 = rgw::cksum::finalize_digest(digest1, t);
+
+  uint32_t madler_crc1 = crc32iscsi_word(0U, (const unsigned char *)dolor.c_str(),
+                                      dolor.length());
+
+  auto cksum_crc1 =
+    rgw::digest::byteswap(std::get<uint32_t>(*cksum1.get_crc()));
+
+  ASSERT_EQ(cksum_crc1, madler_crc1);
+
+  /* lorem */
+  digest2->Update((const unsigned char *)lorem.c_str(), lorem.length());
+  auto cksum2 = rgw::cksum::finalize_digest(digest2, t);
+
+  uint32_t madler_crc2 = crc32iscsi_word(0U, (const unsigned char *)lorem.c_str(),
+                                        lorem.length());
+  auto cksum_crc2 =
+    rgw::digest::byteswap(std::get<uint32_t>(*cksum2.get_crc()));
+
+  ASSERT_EQ(cksum_crc2, madler_crc2);
+
+  /* dolorem */
+  digest3->Update((const unsigned char *)dolorem.c_str(), dolorem.length());
+  auto cksum3 = rgw::cksum::finalize_digest(digest3, t);
+
+  uint32_t madler_crc3 = crc32iscsi_word(0U, (const unsigned char *)dolorem.c_str(),
+                                        dolorem.length());
+  auto cksum_crc3 =
+    rgw::digest::byteswap(std::get<uint32_t>(*cksum3.get_crc()));
+
+  ASSERT_EQ(cksum_crc3, madler_crc3);
+
+  /* API combine check */
+  auto cksum4 = rgw::cksum::combine_crc_cksum(cksum1, cksum2, lorem.length());
+  ASSERT_TRUE(cksum4);
+
+  auto cksum_crc4 =
+    rgw::digest::byteswap(std::get<uint32_t>(*cksum4->get_crc()));
+
+  if (verbose) {
+    std::cout << "\ncrc1/dolor spdk: " << madler_crc1
+             << " cksum_crc1: " << cksum_crc1
+             << "\ncrc2/lorem spdk: " << madler_crc2
+             << " cksum_crc2: " << cksum_crc2
+             << "\ncrc3/dolorem spdk: " << madler_crc3
+             << " cksum_crc3: " << cksum_crc3
+             << "\ncrc4/crc1+crc2: " << cksum_crc4
+             << std::endl;
+  }
+
+  /* the CRC of dolor+lorem == gf combination of cksum1 and cksum2 */
+  ASSERT_EQ(cksum3.to_armor(), cksum4->to_armor());
+} /* crc32c */
+
 TEST(RGWCksum, CtorUnarmor)
 {
   auto t = cksum::Type::sha256;
@@ -425,13 +743,233 @@ TEST(RGWCksum, CtorUnarmor)
 
   auto cksum1 = rgw::cksum::finalize_digest(digest, t);
   auto armored_text1 = cksum1.to_armor();
-  auto cksum2 = rgw::cksum::Cksum(cksum1.type, armored_text1.c_str());
+  auto cksum2 = rgw::cksum::Cksum(cksum1.type, armored_text1.c_str(),
+                                 rgw::cksum::Cksum::CtorStyle::from_armored);
 
   ASSERT_EQ(armored_text1, cksum2.to_armor());
 }
 
 } /* namespace */
 
+using cksum_3tuple
+    = std::tuple<rgw::cksum::Cksum, rgw::cksum::Cksum, rgw::cksum::Cksum>;
+
+cksum_3tuple
+mpu_checksum_helper(cksum::Type t, uint16_t flags)
+{
+  DigestVariant dv1 = rgw::cksum::digest_factory(t);
+  Digest* digest1 = get_digest(dv1);
+
+  DigestVariant dv2 = rgw::cksum::digest_factory(t);
+  Digest* digest2 = get_digest(dv2);
+
+  DigestVariant dv3 = rgw::cksum::digest_factory(t);
+  Digest* digest3 = get_digest(dv3);
+
+  /* dolor */
+  digest1->Update((const unsigned char *)dolor.c_str(), dolor.length());
+  auto cksum1 = rgw::cksum::finalize_digest(digest1, t);
+
+  /* lorem */
+  digest2->Update((const unsigned char *)lorem.c_str(), lorem.length());
+  auto cksum2 = rgw::cksum::finalize_digest(digest2, t);
+
+  /* dolorem */
+  digest3->Update((const unsigned char *)dolorem.c_str(), dolorem.length());
+  auto cksum3 = rgw::cksum::finalize_digest(digest3, t);
+
+  return cksum_3tuple(cksum1, cksum2, cksum3);
+}
+
+TEST(RGWCksum, Combiner1)
+{
+  using std::get;
+
+  auto t = cksum::Type::crc64nvme;
+  uint16_t flags = rgw::cksum::Cksum::FLAG_CKSUM_NONE;
+
+  auto cksums = mpu_checksum_helper(t, flags);
+  auto& [cksum1, cksum2, cksum3] = cksums;
+
+  auto armor1 = cksum1.to_armor();
+  auto armor2 = cksum2.to_armor();
+  auto armor3 = cksum3.to_armor();
+
+  if (verbose) {
+    std::cout << "\ncksums: cksum type: " << to_string(t)
+             << "\narmor1: " << armor1
+             << "\narmor2: " << armor2
+             << "\narmor3: " << armor3
+             << std::endl;
+  }
+
+  auto cmbnr = rgw::cksum::CombinerFactory(t, flags);
+
+  ASSERT_TRUE(cmbnr);
+  ASSERT_EQ(t, cmbnr->get_type());
+
+  cmbnr->append(get<0>(cksums), dolor.length());
+  cmbnr->append(get<1>(cksums), lorem.length());
+
+  /* depending on the checksum type and flags, cksum4 is
+   * either equivalent to cksum3, or, a composite digest
+   * of cksum1 and cksum2 */
+  auto cksum4 = cmbnr->final();
+
+  /* if cksums(0) is !composite, then cksums(2) is
+   * == cksums(0) + cksums(1) and also == cksum4 */
+  bool crc = get<0>(cksums).crc();
+  if (crc) {
+    ASSERT_EQ(get<2>(cksums).to_armor(), cksum4.to_armor());
+  } else {
+    /* all we can do is assert match against an external
+     * reference */
+  }
+  if (verbose) {
+  /* pretty-print armored cksum */
+  std::string cksum_flags =
+    (! get<0>(cksums).crc()) ? "COMPOSITE" : "FULL_OBJECT";
+  std::cout << "\ncomposite cksum (delorem) "
+           << "\n\tcksum-type " << to_string(t)
+           << "\n\tflags "
+           << cksum_flags
+           << "\n\tarmored cksum " << cksum4.to_armor()
+           << std::endl;
+  }
+} /* Combiner1 */
+
+using cksum_4tuple
+    = std::tuple<rgw::cksum::Cksum, rgw::cksum::Cksum, rgw::cksum::Cksum,
+                rgw::cksum::Cksum>;
+
+class CksumCombinerFixture : public testing::Test
+{
+public:
+
+  static std::string long_a; // 5M string "AAAA...."
+  static std::string long_b;
+  static std::string long_c;
+
+  static std::vector<cksum::Type> cksum_types;
+
+  static void SetUpTestSuite() {
+
+    using std::get;
+    using ST = std::tuple<std::string&, std::string&>;
+
+
+    /* generate unique strings that match ones we use in
+     * a checksum test matrix in s3-tests */
+    std::string a{"A"};
+    std::string b{"B"};
+    std::string c{"C"};
+
+    for (const auto& elt : {ST(a, long_a),
+                           ST(b, long_b),
+                           ST(c, long_c)}) {
+
+      for (int ix = 0; ix < (5 * 1024 * 1024); ++ix) {
+       get<1>(elt) += get<0>(elt);
+      }
+    }
+
+    /* generate check types */
+    for (uint16_t ix = 1; ix <= uint16_t(cksum::Type::crc64nvme); ++ix) {
+      cksum_types.push_back(cksum::Type(ix));
+    }
+  } /* SetUpTestSuite */
+
+  static cksum_4tuple mpu_checksums(cksum::Type t) {
+
+    DigestVariant dva = rgw::cksum::digest_factory(t);
+    Digest *digesta = get_digest(dva);
+    digesta->Update((const unsigned char *)long_a.c_str(), long_a.length());
+    auto cksum1 = rgw::cksum::finalize_digest(digesta, t);
+
+    DigestVariant dvb = rgw::cksum::digest_factory(t);
+    Digest *digestb = get_digest(dvb);
+    digestb->Update((const unsigned char *)long_b.c_str(), long_b.length());
+    auto cksum2 = rgw::cksum::finalize_digest(digestb, t);
+
+    DigestVariant dvc = rgw::cksum::digest_factory(t);
+    Digest *digestc = get_digest(dvc);
+    digestc->Update((const unsigned char *)long_c.c_str(), long_c.length());
+    auto cksum3 = rgw::cksum::finalize_digest(digestc, t);
+
+    std::string long_d = long_a + long_b + long_c;
+    DigestVariant dvd = rgw::cksum::digest_factory(t);
+    Digest *digestd = get_digest(dvd);
+    digestd->Update((const unsigned char *)long_d.c_str(), long_d.length());
+    auto cksum4 = rgw::cksum::finalize_digest(digestd, t);
+
+    return cksum_4tuple(cksum1, cksum2, cksum3, cksum4);
+  }
+
+  static void TearDownTestSuite() {
+  }
+}; /* CksumCombinerfixture */
+
+std::string CksumCombinerFixture::long_a;
+std::string CksumCombinerFixture::long_b;
+std::string CksumCombinerFixture::long_c;
+std::vector<cksum::Type> CksumCombinerFixture::cksum_types;
+
+/* TODO: multipart test matrix using fixture */
+TEST_F(CksumCombinerFixture, Test1) {
+
+  for (const auto t : CksumCombinerFixture::cksum_types) {
+    using std::get;
+
+    auto cksums = CksumCombinerFixture::mpu_checksums(t);
+
+    auto cksum1 = get<0>(cksums);
+    auto flags = cksum1.flags;
+    auto cmbnr = rgw::cksum::CombinerFactory(t, flags);
+
+    ASSERT_TRUE(cmbnr);
+    ASSERT_EQ(t, cmbnr->get_type());
+
+    auto lena = CksumCombinerFixture::long_a.length();
+    auto lenb = CksumCombinerFixture::long_b.length();
+    auto lenc = CksumCombinerFixture::long_c.length();
+
+    ASSERT_EQ(lena, lenb);
+    ASSERT_EQ(lenb, lenc);
+    ASSERT_EQ(lenc, (5*1024*1024));
+
+    cmbnr->append(get<0>(cksums), lena);
+    cmbnr->append(get<1>(cksums), lena);
+    cmbnr->append(get<2>(cksums), lena);
+
+    /* depending on the checksum type and flags, cksum4 is
+     * either equivalent to cksum3, or, a composite digest
+     * of cksum1 and cksum2 */
+    auto cksum4 = cmbnr->final();
+
+    /* if cksums(0) is !composite, then cksums(3) is
+     * == SUM(cksums(0)..cksums(2)) and also == cksum4 */
+    if (get<0>(cksums).crc()) {
+      ASSERT_EQ(get<3>(cksums).to_armor(), cksum4.to_armor());
+  } else {
+    /* all we can do is assert match against an external
+     * reference */
+  }
+  /* pretty-print armored cksum */
+  std::string cksum_flags =
+    (! get<0>(cksums).crc()) ? "COMPOSITE" : "FULL_OBJECT";
+  std::cout << "\ncomposite cksum (long_a+long_b+long_c) "
+           << "\n\tcksum-type " << to_string(t)
+           << "\n\tarmored cksum1 " << get<0>(cksums).to_armor()
+           << "\n\tarmored cksum2 " << get<1>(cksums).to_armor()
+           << "\n\tarmored cksum3 " << get<2>(cksums).to_armor()
+           << "\n\tflags "
+           << cksum_flags
+           << "\n\tarmored cksum4 (composite) " << cksum4.to_armor()
+           << std::endl;
+  }
+} /* CksumCombinerFixture, Test1 */
+
+
 int main(int argc, char *argv[])
 {
   auto args = argv_to_vec(argc, argv);