]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
rgw: add checksum and digest machinery
authorMatt Benjamin <mbenjamin@redhat.com>
Tue, 24 Sep 2019 21:12:02 +0000 (17:12 -0400)
committerMatt Benjamin <mbenjamin@redhat.com>
Wed, 3 Jul 2024 18:21:46 +0000 (14:21 -0400)
Adds new Blake3 digest format (native), a concrete type to
represent digests, and static_visitor machinery to unify varying
checksum computations.

This framework, together with new trailing checksum header support,
is used to implement S3 additional checksum verification.  Parts of
the AWS content checksum API work build on a prior contribution from
imtzw <tongzhiwei_yewu@cmss.chinamobile.com>.
Thank you!

Fixes: https://tracker.ceph.com/issues/42080
Fixes: https://tracker.ceph.com/issues/63951
squashed commits:
* rgw_cksum: add trival test vectors for sha-format digests
      Computed digests match those produced by sha1sum, sha256sum,
      and sha512sum utilities.
* rgw_cksum: add test vectors for blake3
      Tests the same input strings with digests validated by
      b3sum (https://crates.io/crates/b3sum).
* rgw_ckum: switch to accel crc32c
      The internal Ceph convention appears to be to omit a final
      xor where ceph_crc32c is used, but it's required for compatibility
      with AWS implementations.
* rgw_cksum: add XXH3 digest
* rgw_cksum: write class encoder for rgw::digest::Cksum
* rgw_cksum: also reverse crc32c (REBASEME)
      Mark noticed that the crc32c output was being tested against a
      byteswapped value (crc32c also needs byteswap on LE).
* rgw_cksum: add digest::Cksum serde tests
* rgw_cksum: fix main(...) linkage
      (so we run our main unit and not the one in gmock
* rgw_cksum: convenience extensions for integration with RGW/S3
* introduce rgw_cksum unique_ptr factory
* rgw_cksum: mark string transform accessors const
* rgw_cksum: fixup unittest_rgw_chksum compilation--all existing tests pass
* rgw_cksum: hook up put-object checksum workflow
* tweaks to report on content checksum mismatch
* rgw_cksum: match SDK as well as general checksum header
* make it more efficient
* initialize RGWPutObj_Cksum::digest
* rgw_cksum:  write parse_cksum_type w/const char* arg
* initialize _type correctly; doing armored wrong
* fix expected checksum header name, clean up verify
* fix output on checksum verify fail, cleanup
* introduce Cksum::to_armor();  all AWS cases pass
* oops, extra 0-byte at end of to_armor() result
* use to_armor() with decoded checksums (i.e., for all S3 presentation)
* remove unnecessary finalize() in RGWPutObj_Cksum dtor
* RGWPutObj_Cksum::Factory fixes
* fixes test_object_checksum_sha256
* choose preferred checksum algorithm header if both are present
* log verified checksums in RGWPutObj::execute at 16
* checksum not needed in policy condition
* fix checksum trailing header format
* move Blake3 to rgw_digest_blake3.h

Signed-off-by: Matt Benjamin <mbenjamin@redhat.com>
23 files changed:
src/CMakeLists.txt
src/common/ceph_crypto.h
src/rgw/CMakeLists.txt
src/rgw/rgw_auth_s3.cc
src/rgw/rgw_basic_types.h
src/rgw/rgw_blake3_digest.h [new file with mode: 0644]
src/rgw/rgw_cksum.h [new file with mode: 0644]
src/rgw/rgw_cksum_digest.h [new file with mode: 0644]
src/rgw/rgw_cksum_pipe.cc [new file with mode: 0644]
src/rgw/rgw_cksum_pipe.h [new file with mode: 0644]
src/rgw/rgw_common.h
src/rgw/rgw_crc_digest.h [new file with mode: 0644]
src/rgw/rgw_file_int.h
src/rgw/rgw_hex.h [new file with mode: 0644]
src/rgw/rgw_op.cc
src/rgw/rgw_op.h
src/rgw/rgw_rest_s3.cc
src/rgw/rgw_rest_s3.h
src/rgw/rgw_xxh_digest.h [new file with mode: 0644]
src/test/rgw/CMakeLists.txt
src/test/rgw/test_rgw_cksum.cc [new file with mode: 0644]
src/test/test_rgw_admin_log.cc
src/test/test_rgw_admin_meta.cc

index 5b21a2db04f594147ff107dfa9c82989d1e05b18..d28a949bf4afe67f41ccd772dac3a19a61b4b5ee 100644 (file)
@@ -203,6 +203,10 @@ if(HAVE_INTEL)
   else()
     set(object_format "elf64")
   endif()
+
+  set(SAVE_CMAKE_ASM_FLAGS ${CMAKE_ASM_FLAGS})
+  set(SAVE_CMAKE_ASM_COMPILER ${CMAKE_ASM_COMPILER})
+
   set(CMAKE_ASM_FLAGS "-f ${object_format}")
   set(CMAKE_ASM_COMPILER ${PROJECT_SOURCE_DIR}/src/nasm-wrapper)
   if(NOT WIN32)
index ed93d09e6e2735174da2f8fb543311deb7133b89..6b2fa50dc2aa679a18390011d7697a6f66028075 100644 (file)
@@ -91,7 +91,6 @@ namespace TOPNSPC::crypto {
         SHA512 () : OpenSSLDigest(EVP_sha512()) { }
     };
 
-
 # if OPENSSL_VERSION_NUMBER < 0x10100000L
   class HMAC {
   private:
index f91fbaed4308b72a8ad132ee65c381b437319a1d..cb2f93d34dbf794d0ce2e15888cc5118f103be09 100644 (file)
@@ -30,6 +30,33 @@ endfunction()
 
 find_package(ICU 52.0 COMPONENTS uc REQUIRED)
 
+set(blake3_srcs
+  ${CMAKE_SOURCE_DIR}/src/BLAKE3/c/blake3.c
+  ${CMAKE_SOURCE_DIR}/src/BLAKE3/c/blake3_dispatch.c
+  ${CMAKE_SOURCE_DIR}/src/BLAKE3/c/blake3_portable.c)
+
+if (HAVE_INTEL)
+  # restore compiler detected assembler, following use gas syntax
+  set(CMAKE_ASM_FLAGS ${SAVE_CMAKE_ASM_FLAGS})
+  set(CMAKE_ASM_COMPILER ${SAVE_CMAKE_ASM_COMPILER})
+  if(WIN32)
+  list(APPEND blake3_srcs
+    ${CMAKE_SOURCE_DIR}/src/BLAKE3/c/blake3_sse2_x86-64_windows_gnu.S
+    ${CMAKE_SOURCE_DIR}/src/BLAKE3/c/blake3_sse41_x86-64_windows_gnu.S
+    ${CMAKE_SOURCE_DIR}/src/BLAKE3/c/blake3_avx2_x86-64_windows_gnu.S
+    ${CMAKE_SOURCE_DIR}/src/BLAKE3/c/blake3_avx512_x86-64_windows_gnu.S)
+  else()
+  list(APPEND blake3_srcs
+    ${CMAKE_SOURCE_DIR}/src/BLAKE3/c/blake3.c
+    ${CMAKE_SOURCE_DIR}/src/BLAKE3/c/blake3_dispatch.c
+    ${CMAKE_SOURCE_DIR}/src/BLAKE3/c/blake3_portable.c
+    ${CMAKE_SOURCE_DIR}/src/BLAKE3/c/blake3_sse2_x86-64_unix.S
+    ${CMAKE_SOURCE_DIR}/src/BLAKE3/c/blake3_sse41_x86-64_unix.S
+    ${CMAKE_SOURCE_DIR}/src/BLAKE3/c/blake3_avx2_x86-64_unix.S
+    ${CMAKE_SOURCE_DIR}/src/BLAKE3/c/blake3_avx512_x86-64_unix.S)
+  endif()
+endif()
+
 set(librgw_common_srcs
   services/svc_finisher.cc
   services/svc_bi_rados.cc
@@ -57,6 +84,7 @@ set(librgw_common_srcs
   services/svc_user_rados.cc
   services/svc_zone.cc
   services/svc_zone_utils.cc
+  ${blake3_srcs}
   rgw_account.cc
   rgw_acl.cc
   rgw_acl_s3.cc
@@ -70,6 +98,7 @@ set(librgw_common_srcs
   rgw_bucket.cc
   rgw_bucket_layout.cc
   rgw_cache.cc
+  rgw_cksum_pipe.cc
   rgw_common.cc
   rgw_compression.cc
   rgw_cors.cc
index 61c2118b672fdeee5258646917f3e0350baf9571..d521f60f6b125be15f47b4fbae15578a9dd401b3 100644 (file)
@@ -1469,7 +1469,7 @@ inline void AWSv4ComplMulti::extract_trailing_headers(
        /* populate trailer map with expected headers and their values, if sent */
        trailer_map.insert(trailer_map_t::value_type(k, v));
        /* populate to req_info.env as well */
-       put_prop(ys_header_mangle(k), v);
+       put_prop(ys_header_mangle(fmt::format("HTTP-{}", k)), v);
       });
       consumed += get<2>(ex_header);
     } /* one trailer */
index cd56db1081b9db29344e68cfe63d45c128f981df..771466ffae5f63f60776a528a6ad0c705c4ebe45 100644 (file)
@@ -31,6 +31,7 @@
 #include "rgw_user_types.h"
 #include "rgw_bucket_types.h"
 #include "rgw_obj_types.h"
+#include "rgw_cksum.h"
 
 #include "driver/rados/rgw_obj_manifest.h" // FIXME: subclass dependency
 
@@ -255,6 +256,7 @@ struct RGWUploadPartInfo {
   uint64_t size;
   uint64_t accounted_size{0};
   std::string etag;
+  rgw::cksum::Cksum cksum;
   ceph::real_time modified;
   RGWObjManifest manifest;
   RGWCompressionInfo cs_info;
@@ -265,7 +267,7 @@ struct RGWUploadPartInfo {
   RGWUploadPartInfo() : num(0), size(0) {}
 
   void encode(bufferlist& bl) const {
-    ENCODE_START(5, 2, bl);
+    ENCODE_START(6, 2, bl);
     encode(num, bl);
     encode(size, bl);
     encode(etag, bl);
@@ -274,10 +276,11 @@ struct RGWUploadPartInfo {
     encode(cs_info, bl);
     encode(accounted_size, bl);
     encode(past_prefixes, bl);
+    encode(cksum, bl);
     ENCODE_FINISH(bl);
   }
   void decode(bufferlist::const_iterator& bl) {
-    DECODE_START_LEGACY_COMPAT_LEN(5, 2, 2, bl);
+    DECODE_START_LEGACY_COMPAT_LEN(6, 2, 2, bl);
     decode(num, bl);
     decode(size, bl);
     decode(etag, bl);
@@ -293,6 +296,9 @@ struct RGWUploadPartInfo {
     if (struct_v >= 5) {
       decode(past_prefixes, bl);
     }
+    if (struct_v >= 6) {
+      decode(cksum, bl);
+    }
     DECODE_FINISH(bl);
   }
   void dump(Formatter *f) const;
diff --git a/src/rgw/rgw_blake3_digest.h b/src/rgw/rgw_blake3_digest.h
new file mode 100644 (file)
index 0000000..f8a9fd6
--- /dev/null
@@ -0,0 +1,47 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2023 Red Hat, Inc
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#pragma once
+
+#include <stdint.h>
+#include <stdio.h>
+#include "BLAKE3/c/blake3.h"
+
+#define XXH_INLINE_ALL 1 /* required for streaming variants */
+#include "xxhash.h"
+
+namespace rgw { namespace digest {
+
+class Blake3 {
+  private:
+    blake3_hasher h;
+
+  public:
+    static constexpr uint16_t digest_size = BLAKE3_OUT_LEN /* 32 bytes */;
+
+    Blake3() { Restart(); }
+
+    void Restart() { blake3_hasher_init(&h); }
+
+    void Update(const unsigned char *data, uint64_t len) {
+       blake3_hasher_update(&h, data, len);
+    }
+
+    void Final(unsigned char* digest) {
+       blake3_hasher_finalize(&h, digest, digest_size);
+    }
+}; /* Blake3 */
+
+}} /* namespace */
diff --git a/src/rgw/rgw_cksum.h b/src/rgw/rgw_cksum.h
new file mode 100644 (file)
index 0000000..06ccacf
--- /dev/null
@@ -0,0 +1,297 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2019 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#pragma once
+
+#include <boost/algorithm/string/predicate.hpp>
+#include <cstdint>
+#include <stdint.h>
+#include <string>
+#include <string_view>
+#include <array>
+#include <iterator>
+#include <boost/variant.hpp>
+#include <boost/blank.hpp>
+#include <boost/algorithm/string.hpp>
+#include "fmt/format.h"
+#include "common/ceph_crypto.h"
+#include "common/armor.h"
+#include "rgw_blake3_digest.h"
+#include "rgw_crc_digest.h"
+#include "rgw_xxh_digest.h"
+#include <boost/algorithm/hex.hpp>
+#include "rgw_hex.h"
+#include "rgw_b64.h"
+
+#include "include/buffer.h"
+#include "include/encoding.h"
+
+namespace rgw { namespace cksum {
+
+  enum class Type : uint16_t
+  {
+      none = 0,
+      crc32,  /* !cryptographic, but AWS supports */
+      crc32c, /* !cryptographic, but AWS supports */
+      xxh3,   /* !cryptographic, but strong and very fast */
+      sha1,   /* unsafe, but AWS supports */
+      sha256,
+      sha512,
+      blake3,
+  };
+
+  static constexpr uint16_t FLAG_NONE =      0x0000;
+  static constexpr uint16_t FLAG_AWS_CKSUM = 0x0001;
+
+  class Desc
+  {
+  public:
+    const Type type;
+    const char* name;
+    const uint16_t digest_size;
+    const uint16_t armored_size;
+    const uint16_t flags;
+
+    constexpr uint16_t to_armored_size(uint16_t sz) {
+      return sz / 3 * 4 + 4;
+    }
+
+    constexpr Desc(Type _type, const char* _name, uint16_t _size,
+                  uint16_t _flags)
+      : type(_type), name(_name),
+       digest_size(_size),
+       armored_size(to_armored_size(digest_size)),
+       flags(_flags)
+      {}
+
+    constexpr bool aws() const {
+      return (flags & FLAG_AWS_CKSUM);
+    }
+  }; /* Desc */
+
+  namespace  ba = boost::algorithm;
+
+  class Cksum {
+  public:
+    static constexpr std::array<Desc, 8> checksums =
+    {
+      Desc(Type::none, "none", 0, FLAG_NONE),
+      Desc(Type::crc32, "crc32", 4, FLAG_AWS_CKSUM),
+      Desc(Type::crc32c, "crc32c", 4, FLAG_AWS_CKSUM),
+      Desc(Type::xxh3, "xxh3", 8, FLAG_NONE),
+      Desc(Type::sha1, "sha1", 20, FLAG_AWS_CKSUM),
+      Desc(Type::sha256, "sha256", 32, FLAG_AWS_CKSUM),
+      Desc(Type::sha512, "sha512", 64, FLAG_NONE),
+      Desc(Type::blake3, "blake3", 32, FLAG_NONE),
+    };
+
+    static constexpr uint16_t max_digest_size = 64;
+    using value_type = std::array<unsigned char, max_digest_size>;
+
+    Type type;
+    value_type digest;
+
+    Cksum(Type _type = Type::none) : type(_type) {}
+
+    static const char* type_string(const Type type) {
+      return (Cksum::checksums[uint16_t(type)]).name;
+    }
+
+    std::string aws_name() {
+      return fmt::format("x-amz-checksum-{}", type_string(type));
+    }
+
+    std::string rgw_name() {
+      return fmt::format("x-rgw-checksum-{}", type_string(type));
+    }
+
+    std::string header_name() {
+      return ((Cksum::checksums[uint16_t(type)]).aws()) ?
+       aws_name() :
+       rgw_name();
+    }
+
+    std::string to_armor() const {
+      std::string hs;
+      const auto& ckd = checksums[uint16_t(type)];
+      hs.resize(ckd.armored_size);
+      memset(hs.data(), 0, hs.length());
+      ceph_armor((char*) hs.data(), (char*) hs.data() + ckd.armored_size,
+                (char*) digest.begin(), (char*) digest.begin() +
+                ckd.digest_size);
+      return hs;
+    }
+
+    std::string hex() const {
+      std::string hs;
+      const auto& ckd = checksums[uint16_t(type)];
+      hs.reserve(ckd.digest_size * 2 + 1);
+      ba::hex_lower(digest.begin(), digest.begin() + ckd.digest_size,
+                   std::back_inserter(hs));
+      return hs;
+    }
+
+    std::string to_base64() const {
+      return rgw::to_base64(hex());
+    }
+
+    std::string to_string() const  {
+      std::string hs;
+      const auto& ckd = checksums[uint16_t(type)];
+      return fmt::format("{{{}}}{}", ckd.name, to_base64());
+    }
+
+    void encode(buffer::list& bl) const {
+      const auto& ckd = checksums[uint16_t(type)];
+      ENCODE_START(1, 1, bl);
+      encode(uint16_t(type), bl);
+      encode(ckd.digest_size, bl);
+      bl.append((char*)digest.data(), ckd.digest_size);
+      ENCODE_FINISH(bl);
+    }
+
+    void decode(bufferlist::const_iterator& p) {
+      DECODE_START(1, p);
+      uint16_t tt;
+      decode(tt, p);
+      type = cksum::Type(tt);
+      decode(tt, p); /* <= max_digest_size */
+      p.copy(tt, (char*)digest.data());
+      DECODE_FINISH(p);
+    }
+  }; /* Cksum */
+  WRITE_CLASS_ENCODER(Cksum);
+
+  static inline Type parse_cksum_type(const char* name)
+  {
+    for (const auto& ck : Cksum::checksums) {
+      if (boost::iequals(ck.name, name))
+       return ck.type;
+    }
+    return Type::none;
+  }
+
+  class Digest {
+  public:
+    virtual void Restart() = 0;
+    virtual void Update (const unsigned char *input, size_t length) = 0;
+    virtual void Update(const ceph::buffer::list& bl) = 0;
+    virtual void Final (unsigned char *digest) = 0;
+    virtual ~Digest() {}
+  };
+
+  template<class T>
+  class TDigest : public Digest
+  {
+    T d;
+  public:
+    TDigest() {}
+    TDigest(TDigest&& rhs) noexcept
+      : d(std::move(rhs.d))
+    {}
+    void Restart() override { d.Restart(); }
+    void Update(const unsigned char* data, uint64_t len) override {
+      d.Update(data, len);
+    }
+    void Update(const ceph::buffer::list& bl) {
+      for (auto& p : bl.buffers()) {
+       d.Update((const unsigned char *)p.c_str(), p.length());
+      }
+    }
+    void Final(unsigned char* digest) override {
+      d.Final(digest);
+    }
+  };
+
+  typedef TDigest<rgw::digest::Blake3> Blake3;
+  typedef TDigest<rgw::digest::Crc32> Crc32;
+  typedef TDigest<rgw::digest::Crc32c> Crc32c;
+  typedef TDigest<rgw::digest::XXH3> XXH3;
+  typedef TDigest<ceph::crypto::SHA1> SHA1;
+  typedef TDigest<ceph::crypto::SHA256> SHA256;
+  typedef TDigest<ceph::crypto::SHA512> SHA512;
+
+  typedef boost::variant<boost::blank,
+                        Blake3,
+                        Crc32,
+                        Crc32c,
+                        XXH3,
+                        SHA1,
+                        SHA256,
+                        SHA512> DigestVariant;
+
+  struct get_digest_ptr : public boost::static_visitor<Digest*>
+  {
+    get_digest_ptr() {};
+    Digest* operator()(const boost::blank& b) const { return nullptr; }
+    Digest* operator()(Blake3& digest) const { return &digest; }
+    Digest* operator()(Crc32& digest) const { return &digest; }
+    Digest* operator()(Crc32c& digest) const { return &digest; }
+    Digest* operator()(XXH3& digest) const { return &digest; }
+    Digest* operator()(SHA1& digest) const { return &digest; }
+    Digest* operator()(SHA256& digest) const { return &digest; }
+    Digest* operator()(SHA512& digest) const { return &digest; }
+  };
+
+  static inline Digest* get_digest(DigestVariant& ev)
+  {
+    return boost::apply_visitor(get_digest_ptr{}, ev);
+  }
+
+  static inline DigestVariant digest_factory(const Type cksum_type)
+  {
+    switch (cksum_type) {
+    case Type::blake3:
+      return Blake3();
+      break;
+    case Type::sha256:
+      return SHA256();
+      break;
+    case Type::crc32:
+      return Crc32();
+      break;
+    case Type::crc32c:
+      return Crc32c();
+      break;
+    case Type::xxh3:
+      return XXH3();
+      break;
+    case Type::sha512:
+      return SHA512();
+      break;
+    case Type::sha1:
+      return SHA1();
+      break;
+    case Type::none:
+      break;
+    };
+    return boost::blank();
+  } /* digest_factory */
+
+  static inline Cksum finalize_digest(Digest* digest, Type type)
+  {
+    Cksum cksum(type);
+    if (digest) {
+      auto data = cksum.digest.data();
+      digest->Final(data);
+    }
+    return cksum;
+  }
+
+  static inline std::string to_string(const Type type) {
+    std::string hs;
+    const auto& ckd = Cksum::checksums[uint16_t(type)];
+    return ckd.name;
+  }
+
+}} /* namespace */
diff --git a/src/rgw/rgw_cksum_digest.h b/src/rgw/rgw_cksum_digest.h
new file mode 100644 (file)
index 0000000..a70c892
--- /dev/null
@@ -0,0 +1,133 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2019 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#pragma once
+
+#include <boost/variant.hpp>
+#include <boost/blank.hpp>
+#include "rgw_blake3_digest.h"
+#include "rgw_crc_digest.h"
+#include "rgw_xxh_digest.h"
+
+#include "rgw_cksum.h"
+
+namespace rgw { namespace cksum {
+
+  class Digest {
+  public:
+    virtual void Restart() = 0;
+    virtual void Update (const unsigned char *input, size_t length) = 0;
+    virtual void Update(const ceph::buffer::list& bl) = 0;
+    virtual void Final (unsigned char *digest) = 0;
+    virtual ~Digest() {}
+  };
+
+  template<class T>
+  class TDigest : public Digest
+  {
+    T d;
+  public:
+    TDigest() {}
+    TDigest(TDigest&& rhs) noexcept
+      : d(std::move(rhs.d))
+    {}
+    void Restart() override { d.Restart(); }
+    void Update(const unsigned char* data, uint64_t len) override {
+      d.Update(data, len);
+    }
+    void Update(const ceph::buffer::list& bl) {
+      for (auto& p : bl.buffers()) {
+       d.Update((const unsigned char *)p.c_str(), p.length());
+      }
+    }
+    void Final(unsigned char* digest) override {
+      d.Final(digest);
+    }
+  };
+
+  typedef TDigest<rgw::digest::Blake3> Blake3;
+  typedef TDigest<rgw::digest::Crc32> Crc32;
+  typedef TDigest<rgw::digest::Crc32c> Crc32c;
+  typedef TDigest<rgw::digest::XXH3> XXH3;
+  typedef TDigest<ceph::crypto::SHA1> SHA1;
+  typedef TDigest<ceph::crypto::SHA256> SHA256;
+  typedef TDigest<ceph::crypto::SHA512> SHA512;
+
+  typedef boost::variant<boost::blank,
+                        Blake3,
+                        Crc32,
+                        Crc32c,
+                        XXH3,
+                        SHA1,
+                        SHA256,
+                        SHA512> DigestVariant;
+
+  struct get_digest_ptr : public boost::static_visitor<Digest*>
+  {
+    get_digest_ptr() {};
+    Digest* operator()(const boost::blank& b) const { return nullptr; }
+    Digest* operator()(Blake3& digest) const { return &digest; }
+    Digest* operator()(Crc32& digest) const { return &digest; }
+    Digest* operator()(Crc32c& digest) const { return &digest; }
+    Digest* operator()(XXH3& digest) const { return &digest; }
+    Digest* operator()(SHA1& digest) const { return &digest; }
+    Digest* operator()(SHA256& digest) const { return &digest; }
+    Digest* operator()(SHA512& digest) const { return &digest; }
+  };
+
+  static inline Digest* get_digest(DigestVariant& ev)
+  {
+    return boost::apply_visitor(get_digest_ptr{}, ev);
+  }
+
+  static inline DigestVariant digest_factory(const Type cksum_type)
+  {
+    switch (cksum_type) {
+    case Type::blake3:
+      return Blake3();
+      break;
+    case Type::sha256:
+      return SHA256();
+      break;
+    case Type::crc32:
+      return Crc32();
+      break;
+    case Type::crc32c:
+      return Crc32c();
+      break;
+    case Type::xxh3:
+      return XXH3();
+      break;
+    case Type::sha512:
+      return SHA512();
+      break;
+    case Type::sha1:
+      return SHA1();
+      break;
+    case Type::none:
+      break;
+    };
+    return boost::blank();
+  } /* digest_factory */
+
+  static inline Cksum finalize_digest(Digest* digest, Type type)
+  {
+    Cksum cksum(type);
+    if (digest) {
+      auto data = cksum.digest.data();
+      digest->Final(data);
+    }
+    return cksum;
+  }
+
+}} /* namespace */
diff --git a/src/rgw/rgw_cksum_pipe.cc b/src/rgw/rgw_cksum_pipe.cc
new file mode 100644 (file)
index 0000000..ea03894
--- /dev/null
@@ -0,0 +1,91 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright contributors to the Ceph project
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#include "rgw_cksum_pipe.h"
+#include <memory>
+#include <string>
+#include <fmt/format.h>
+#include <boost/algorithm/string.hpp>
+#include "rgw_common.h"
+#include "common/dout.h"
+#include "rgw_client_io.h"
+
+namespace rgw::putobj {
+
+  RGWPutObj_Cksum::RGWPutObj_Cksum(rgw::sal::DataProcessor* next,
+                                  rgw::cksum::Type _typ,
+                                  cksum_hdr_t&& _hdr)
+    : Pipe(next),
+      _type(_typ),
+      dv(rgw::cksum::digest_factory(_type)),
+      _digest(cksum::get_digest(dv)), cksum_hdr(_hdr),
+      _state(State::DIGEST)
+  {
+    cksum::Digest* digest = cksum::get_digest(dv);
+    /* XXXX remove this */
+    std::cout << "ctor had digest " << _digest
+             << " and got digest: " << digest
+             << std::endl;
+  }
+
+  std::unique_ptr<RGWPutObj_Cksum> RGWPutObj_Cksum::Factory(
+    rgw::sal::DataProcessor* next, const RGWEnv& env)
+  {
+    /* look for matching headers */
+    auto match = [&env] () -> const cksum_hdr_t {
+      /* If the individual checksum value you provide through
+      x-amz-checksum-algorithm doesn't match the checksum algorithm
+      you set through x-amz-sdk-checksum-algorithm, Amazon S3 ignores
+      any provided ChecksumAlgorithm parameter and uses the checksum
+      algorithm that matches the provided value in
+      x-amz-checksum-algorithm.
+      https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObject.html
+      */
+      for (const auto hk : {"HTTP_X_AMZ_CHECKSUM_ALGORITHM",
+                           "HTTP_X_AMZ_SDK_CHECKSUM_ALGORITHM"}) {
+       auto hv = env.get(hk);
+       if (hv) {
+         return cksum_hdr_t(hk, hv);
+       }
+      }
+      return cksum_hdr_t(nullptr, nullptr);
+    };
+
+    auto algo_header = match();
+    if (algo_header.first) {
+      if (algo_header.second) {
+       auto cksum_type = cksum::parse_cksum_type(algo_header.second);
+       return  std::make_unique<RGWPutObj_Cksum>(
+                                                 next,
+                                                 cksum_type,
+                                                 std::move(algo_header));
+      }
+      /* malformed checksum algorithm header(s) */
+      throw rgw::io::Exception(EINVAL, std::system_category());
+    }
+    /* no checksum header */
+    return std::unique_ptr<RGWPutObj_Cksum>();
+  }
+
+  int RGWPutObj_Cksum::process(ceph::buffer::list &&data, uint64_t logical_offset)
+  {
+    for (const auto& ptr : data.buffers()) {
+      _digest->Update(reinterpret_cast<const unsigned char*>(ptr.c_str()),
+                      ptr.length());
+    }
+    return 0;
+  }
+
+} // namespace rgw::putobj
diff --git a/src/rgw/rgw_cksum_pipe.h b/src/rgw/rgw_cksum_pipe.h
new file mode 100644 (file)
index 0000000..047fbcb
--- /dev/null
@@ -0,0 +1,93 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright contributors to the Ceph project
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#pragma once
+
+#include <utility>
+#include <tuple>
+#include <cstring>
+#include "rgw_cksum.h"
+#include "rgw_putobj.h"
+
+namespace rgw::putobj {
+
+  namespace cksum = rgw::cksum;
+  using cksum_hdr_t = std::pair<const char*, const char*>;
+  
+  // PutObj filter for streaming checksums
+  class RGWPutObj_Cksum : public rgw::putobj::Pipe {
+
+    enum class State : uint16_t {
+      START,
+      DIGEST,
+      FINAL
+    };
+    
+    cksum::Type _type;
+    cksum::DigestVariant dv;
+    cksum::Digest* _digest;
+    cksum::Cksum _cksum;
+    cksum_hdr_t cksum_hdr;
+    State _state;
+
+  public:
+
+    using VerifyResult = std::tuple<bool, const cksum::Cksum&>;
+
+    static std::unique_ptr<RGWPutObj_Cksum> Factory(
+      rgw::sal::DataProcessor* next, const RGWEnv&);
+
+    RGWPutObj_Cksum(rgw::sal::DataProcessor* next, rgw::cksum::Type _type,
+                   cksum_hdr_t&& _hdr);
+    RGWPutObj_Cksum(RGWPutObj_Cksum& rhs) = delete;
+    ~RGWPutObj_Cksum() {}
+
+    cksum::Type type() { return _type; }
+    cksum::Digest* digest() const { return _digest; }
+    const cksum::Cksum& cksum() { return _cksum; };
+    State state() const { return _state; }
+
+    const cksum_hdr_t& header() const {
+      return cksum_hdr;
+    }
+
+    const cksum::Cksum& finalize() {
+      _cksum = finalize_digest(_digest, _type);
+      _state = State::FINAL;
+      return _cksum;
+    }
+
+    const char* expected(const RGWEnv& env) {
+      auto hk = fmt::format("HTTP_X_AMZ_CHECKSUM_{}", cksum_hdr.second);
+      auto hv = env.get(hk.c_str());
+      return hv;
+    }
+
+    VerifyResult verify(const RGWEnv& env) {
+      if (_state == State::DIGEST) [[likely]] {
+       (void) finalize();
+      }
+      auto hv = expected(env);
+      auto cv = _cksum.to_armor();
+      return VerifyResult(cksum_hdr.first &&
+                         hv && !std::strcmp(hv, cv.c_str()),
+                         _cksum);
+    }
+
+    int process(bufferlist &&data, uint64_t logical_offset) override;
+
+  }; /* RGWPutObj_Cksum */
+
+} // namespace rgw::putobj
index 2002ae51ec9b9ab12bde7f3a5c2452730e528ed5..929ebc60b9208b2402a6d14984dbee0713997c85 100644 (file)
@@ -48,6 +48,7 @@
 #include "include/rados/librados.hpp"
 #include "rgw_public_access.h"
 #include "rgw_sal_fwd.h"
+#include "rgw_hex.h"
 
 namespace ceph {
   class Formatter;
@@ -82,6 +83,7 @@ using ceph::crypto::MD5;
 #define RGW_ATTR_LC            RGW_ATTR_PREFIX "lc"
 #define RGW_ATTR_CORS          RGW_ATTR_PREFIX "cors"
 #define RGW_ATTR_ETAG          RGW_ATTR_PREFIX "etag"
+#define RGW_ATTR_CKSUM         RGW_ATTR_PREFIX "cksum"
 #define RGW_ATTR_BUCKETS       RGW_ATTR_PREFIX "buckets"
 #define RGW_ATTR_META_PREFIX   RGW_ATTR_PREFIX RGW_AMZ_META_PREFIX
 #define RGW_ATTR_CONTENT_TYPE  RGW_ATTR_PREFIX "content_type"
@@ -160,6 +162,9 @@ using ceph::crypto::MD5;
 #define RGW_ATTR_UNIX_KEY1      RGW_ATTR_PREFIX "unix-key1"
 #define RGW_ATTR_UNIX1          RGW_ATTR_PREFIX "unix1"
 
+/* Content Checksums */
+#define RGW_ATTR_AMZ_CKSUM      RGW_ATTR_PREFIX "x-amz-content-checksum"
+
 #define RGW_ATTR_CRYPT_PREFIX   RGW_ATTR_PREFIX "crypt."
 #define RGW_ATTR_CRYPT_MODE     RGW_ATTR_CRYPT_PREFIX "mode"
 #define RGW_ATTR_CRYPT_KEYMD5   RGW_ATTR_CRYPT_PREFIX "keymd5"
@@ -1027,6 +1032,8 @@ enum RGWBucketFlags {
 
 class RGWSI_Zone;
 
+#include "rgw_cksum.h"
+
 struct RGWBucketInfo {
   rgw_bucket bucket;
   rgw_owner owner;
@@ -1054,6 +1061,8 @@ struct RGWBucketInfo {
 
   std::map<std::string, uint32_t> mdsearch_config;
 
+  rgw::cksum::Type cksum_type = rgw::cksum::Type::none;
+
   // resharding
   cls_rgw_reshard_status reshard_status{cls_rgw_reshard_status::NOT_RESHARDING};
   std::string new_bucket_instance_id;
@@ -1064,7 +1073,6 @@ struct RGWBucketInfo {
 
   void encode(bufferlist& bl) const;
   void decode(bufferlist::const_iterator& bl);
-
   void dump(Formatter *f) const;
   static void generate_test_instances(std::list<RGWBucketInfo*>& o);
 
@@ -1550,61 +1558,6 @@ struct multipart_upload_info
 };
 WRITE_CLASS_ENCODER(multipart_upload_info)
 
-static inline void buf_to_hex(const unsigned char* const buf,
-                              const size_t len,
-                              char* const str)
-{
-  str[0] = '\0';
-  for (size_t i = 0; i < len; i++) {
-    ::sprintf(&str[i*2], "%02x", static_cast<int>(buf[i]));
-  }
-}
-
-template<size_t N> static inline std::array<char, N * 2 + 1>
-buf_to_hex(const std::array<unsigned char, N>& buf)
-{
-  static_assert(N > 0, "The input array must be at least one element long");
-
-  std::array<char, N * 2 + 1> hex_dest;
-  buf_to_hex(buf.data(), N, hex_dest.data());
-  return hex_dest;
-}
-
-static inline int hexdigit(char c)
-{
-  if (c >= '0' && c <= '9')
-    return (c - '0');
-  c = toupper(c);
-  if (c >= 'A' && c <= 'F')
-    return c - 'A' + 0xa;
-  return -EINVAL;
-}
-
-static inline int hex_to_buf(const char *hex, char *buf, int len)
-{
-  int i = 0;
-  const char *p = hex;
-  while (*p) {
-    if (i >= len)
-      return -EINVAL;
-    buf[i] = 0;
-    int d = hexdigit(*p);
-    if (d < 0)
-      return d;
-    buf[i] = d << 4;
-    p++;
-    if (!*p)
-      return -EINVAL;
-    d = hexdigit(*p);
-    if (d < 0)
-      return d;
-    buf[i] += d;
-    i++;
-    p++;
-  }
-  return i;
-}
-
 static inline int rgw_str_to_bool(const char *s, int def_val)
 {
   if (!s)
diff --git a/src/rgw/rgw_crc_digest.h b/src/rgw/rgw_crc_digest.h
new file mode 100644 (file)
index 0000000..8e97df5
--- /dev/null
@@ -0,0 +1,93 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2023 Red Hat, Inc
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#pragma once
+
+#include <bit>
+#include <array>
+#include <concepts>
+#include <algorithm>
+#include <stdio.h>
+#include "include/crc32c.h"
+#include <boost/crc.hpp>
+
+namespace rgw { namespace digest {
+
+  /* crib impl of c++23 std::byteswap from
+   * https://en.cppreference.com/w/cpp/numeric/byteswap */
+  template <std::integral T> constexpr T byteswap(T value) noexcept {
+    static_assert(std::has_unique_object_representations_v<T>,
+                 "T may not have padding bits");
+    auto value_representation =
+      std::bit_cast<std::array<std::byte, sizeof(T)>>(value);
+    std::ranges::reverse(value_representation);
+    return std::bit_cast<T>(value_representation);
+  } /* byteswap */
+
+  /* impl. using  boost::crc, as configured by imtzw  */
+  class Crc32 {
+  private:
+    using crc32_type = boost::crc_optimal<
+    32, 0x04C11DB7, 0xFFFFFFFF, 0xFFFFFFFF, true, true>;
+    crc32_type crc;
+
+  public:
+    static constexpr uint16_t digest_size = 4;
+
+    Crc32() { Restart(); }
+
+    void Restart() { crc.reset(); }
+
+    void Update(const unsigned char *data, uint64_t len) {
+      crc.process_bytes(data, len);
+    }
+
+    void Final(unsigned char* digest) {
+      /* XXX crc32 and cksfb utilities both treat the byteswapped result
+       * as canonical--possibly this needs to be omitted when BigEndian? */
+      uint32_t final = crc();
+      if constexpr (std::endian::native != std::endian::big) {
+       final = rgw::digest::byteswap(final);
+      }
+      memcpy((char*) digest, &final, sizeof(final));
+    }
+  }; /* Crc32 */
+
+  /* use Ceph hw-specialized crc32c (0x1EDC6F41) */
+  class Crc32c {
+  private:
+    uint32_t crc;
+
+  public:
+    static constexpr uint16_t digest_size = 4;
+    static constexpr uint32_t initial_value = 0xffffffff;
+
+    Crc32c() { Restart(); }
+
+    void Restart() { crc = initial_value; }
+
+    void Update(const unsigned char *data, uint64_t len) {
+      crc = ceph_crc32c(crc, data, len);
+    }
+
+    void Final(unsigned char* digest) {
+      crc = crc ^ 0xffffffff;
+      if constexpr (std::endian::native != std::endian::big) {
+       crc = rgw::digest::byteswap(crc);
+      }
+      memcpy((char*) digest, &crc, sizeof(crc));
+    }
+  }; /* Crc32c */
+}} /* namespace */
index d61ef33c8a880134d9a9724553d20789a01acf1d..0a1db64520722f015455668fcdb879697266c56b 100644 (file)
@@ -37,6 +37,7 @@
 #include "rgw_aio_throttle.h"
 #include "rgw_compression.h"
 #include "rgw_perf_counters.h"
+#include "rgw_cksum.h"
 
 
 /* XXX
diff --git a/src/rgw/rgw_hex.h b/src/rgw/rgw_hex.h
new file mode 100644 (file)
index 0000000..ceb29ff
--- /dev/null
@@ -0,0 +1,78 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2023 Red Hat, Inc
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#pragma once
+
+#include <array>
+#include <stdint.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <errno.h>
+
+static inline void buf_to_hex(const unsigned char* const buf,
+                              const size_t len,
+                              char* const str)
+{
+  str[0] = '\0';
+  for (size_t i = 0; i < len; i++) {
+    ::sprintf(&str[i*2], "%02x", static_cast<int>(buf[i]));
+  }
+}
+
+template<size_t N> static inline std::array<char, N * 2 + 1>
+buf_to_hex(const std::array<unsigned char, N>& buf)
+{
+  static_assert(N > 0, "The input array must be at least one element long");
+
+  std::array<char, N * 2 + 1> hex_dest;
+  buf_to_hex(buf.data(), N, hex_dest.data());
+  return hex_dest;
+}
+
+static inline int hexdigit(char c)
+{
+  if (c >= '0' && c <= '9')
+    return (c - '0');
+  c = toupper(c);
+  if (c >= 'A' && c <= 'F')
+    return c - 'A' + 0xa;
+  return -EINVAL;
+}
+
+static inline int hex_to_buf(const char *hex, char *buf, int len)
+{
+  int i = 0;
+  const char *p = hex;
+  while (*p) {
+    if (i >= len)
+      return -EINVAL;
+    buf[i] = 0;
+    int d = hexdigit(*p);
+    if (d < 0)
+      return d;
+    buf[i] = d << 4;
+    p++;
+    if (!*p)
+      return -EINVAL;
+    d = hexdigit(*p);
+    if (d < 0)
+      return d;
+    buf[i] += d;
+    i++;
+    p++;
+  }
+  return i;
+} /* hex_to_buf */
index 71936702e93f518ff4bc040f2312617ed10644f3..2f8e2abf916ebc81a7ef2f0ed32fc17eece4f843 100644 (file)
@@ -24,6 +24,7 @@
 #include "common/ceph_json.h"
 #include "common/static_ptr.h"
 #include "common/perf_counters_key.h"
+#include "rgw_common.h"
 #include "rgw_tracer.h"
 
 #include "rgw_rados.h"
@@ -56,6 +57,7 @@
 #include "rgw_sal.h"
 #include "rgw_sal_rados.h"
 #include "rgw_torrent.h"
+#include "rgw_cksum_pipe.h"
 #include "rgw_lua_data_filter.h"
 #include "rgw_lua.h"
 #include "rgw_iam_managed_policy.h"
@@ -4322,9 +4324,13 @@ void RGWPutObj::execute(optional_yield y)
   std::optional<RGWPutObj_Compress> compressor;
   std::optional<RGWPutObj_Torrent> torrent;
 
+  /* XXX Cksum::DigestVariant was designed to avoid allocation, but going with
+   * factory method to avoid issues with move assignment when wrapped */
+  std::unique_ptr<rgw::putobj::RGWPutObj_Cksum> cksum_filter;
   std::unique_ptr<rgw::sal::DataProcessor> encrypt;
   std::unique_ptr<rgw::sal::DataProcessor> run_lua;
 
+  /* data processor filters--last filter runs first */
   if (!append) { // compression and encryption only apply to full object uploads
     op_ret = get_encrypt_filter(&encrypt, filter);
     if (op_ret < 0) {
@@ -4352,7 +4358,8 @@ void RGWPutObj::execute(optional_yield y)
     if (torrent = get_torrent_filter(filter); torrent) {
       filter = &*torrent;
     }
-    // run lua script before data is compressed and encrypted - last filter runs first
+    /* checksum lua filters must run before compression and encryption
+     * filters, checksum first (probably?) */
     op_ret = get_lua_filter(&run_lua, filter);
     if (op_ret < 0) {
       return;
@@ -4360,7 +4367,18 @@ void RGWPutObj::execute(optional_yield y)
     if (run_lua) {
       filter = &*run_lua;
     }
-  }
+    /* optional streaming checksum */
+    try {
+      cksum_filter =
+       rgw::putobj::RGWPutObj_Cksum::Factory(filter, *s->info.env);
+    } catch (const rgw::io::Exception& e) {
+      op_ret = e.code().value();
+      return;
+    }
+    if (cksum_filter) {
+      filter = &*cksum_filter;
+    }
+  } /* !append */
   tracepoint(rgw_op, before_data_transfer, s->req_id.c_str());
   do {
     bufferlist data;
@@ -4500,6 +4518,37 @@ void RGWPutObj::execute(optional_yield y)
   bl.append(etag.c_str(), etag.size());
   emplace_attr(RGW_ATTR_ETAG, std::move(bl));
 
+  if (cksum_filter) {
+    const auto& hdr = cksum_filter->header();
+    auto cksum_verify =
+      cksum_filter->verify(*s->info.env); // valid or no supplied cksum
+    cksum = get<1>(cksum_verify);
+    if (std::get<0>(cksum_verify)) {
+      buffer::list cksum_bl;
+      ldpp_dout(this, 16)
+       << fmt::format("{} checksum verified ", hdr.second)
+       << fmt::format("\n\tcomputed={} == \n\texpected=  {}",
+                      cksum->to_armor(),
+                      cksum_filter->expected(*s->info.env))
+       << dendl;
+      cksum->encode(cksum_bl);
+      emplace_attr(RGW_ATTR_CKSUM, std::move(cksum_bl));
+    } else {
+      /* content checksum mismatch */
+      auto computed_ck = cksum->to_armor();
+      auto expected_ck = cksum_filter->expected(*s->info.env);
+
+      ldpp_dout(this, 4)
+       << fmt::format("{} content checksum mismatch", hdr.second)
+       << fmt::format("\n\tcalculated={} != \n\texpected={}",
+                      computed_ck,
+                      (!!expected_ck) ? expected_ck : "(checksum unavailable)")
+       << dendl;
+      op_ret = -ERR_INVALID_REQUEST;
+      return;
+    }
+  }
+
   populate_with_generic_attrs(s, attrs);
   op_ret = rgw_get_request_metadata(this, s->cct, s->info, attrs);
   if (op_ret < 0) {
index afd5f8a685b715cc6edaf5eb497f2c3e06681c61..111fef519ace9ab146248ea9b23c3f77d3ee41ce 100644 (file)
@@ -34,6 +34,7 @@
 #include "common/ceph_json.h"
 #include "common/ceph_time.h"
 
+#include "rgw_cksum.h"
 #include "rgw_common.h"
 #include "rgw_dmclock.h"
 #include "rgw_sal.h"
@@ -1253,6 +1254,9 @@ protected:
   RGWObjectRetention *obj_retention;
   RGWObjectLegalHold *obj_legal_hold;
 
+  // optional cksum
+  boost::optional<rgw::cksum::Cksum> cksum;
+
 public:
   RGWPutObj() : ofs(0),
                 supplied_md5_b64(NULL),
@@ -2152,7 +2156,7 @@ inline int rgw_get_request_metadata(const DoutPrefixProvider *dpp,
       "x-amz-server-side-encryption-customer-algorithm",
       "x-amz-server-side-encryption-customer-key",
       "x-amz-server-side-encryption-customer-key-md5",
-      "x-amz-storage-class"
+      "x-amz-storage-class",
   };
 
   size_t valid_meta_count = 0;
index e7316485a972c9695a8b858674b8c97525a90bad..3dcedb1a28bc88d6bfe779d2e359009704b087f3 100644 (file)
@@ -306,6 +306,12 @@ int RGWGetObj_ObjStore_S3::get_params(optional_yield y)
   dst_zone_trace = s->info.args.get(RGW_SYS_PARAM_PREFIX "if-not-replicated-to");
   get_torrent = s->info.args.exists("torrent");
 
+  auto checksum_mode_hdr =
+    s->info.env->get_optional("HTTP_X_AMZ_CHECKSUM_MODE");
+  checksum_mode =
+    (checksum_mode_hdr &&
+     boost::algorithm::iequals(*checksum_mode_hdr, "enabled"));
+
   // optional part number
   auto optstr = s->info.args.get_optional("partNumber");
   if (optstr) {
@@ -491,6 +497,20 @@ int RGWGetObj_ObjStore_S3::send_response_data(bufferlist& bl, off_t bl_ofs,
       }
     }
 
+    if (checksum_mode) {
+      if (auto i = attrs.find(RGW_ATTR_CKSUM); i != attrs.end()) {
+       try {
+         rgw::cksum::Cksum cksum;
+         decode(cksum, i->second);
+         dump_header(s, cksum.header_name(), cksum.to_armor());
+       }  catch (buffer::error& err) {
+         ldpp_dout(this, 0) << "ERROR: failed to decode rgw::cksum::Cksum"
+                            << dendl;
+         /* XXX return error here?  the user might prefer data we have */
+       }
+      }
+    } /* checksum_mode */
+
     for (struct response_attr_param *p = resp_attr_params; p->param; p++) {
       bool exists;
       string val = s->info.args.get(p->param, &exists);
@@ -2713,12 +2733,18 @@ void RGWPutObj_ObjStore_S3::send_response()
       dump_content_length(s, 0);
       dump_header_if_nonempty(s, "x-amz-version-id", version_id);
       dump_header_if_nonempty(s, "x-amz-expiration", expires);
+      if (cksum) {
+       dump_header(s, cksum->header_name(), cksum->to_armor());
+      }
       for (auto &it : crypt_http_responses)
         dump_header(s, it.first, it.second);
     } else {
       dump_errno(s);
       dump_header_if_nonempty(s, "x-amz-version-id", version_id);
       dump_header_if_nonempty(s, "x-amz-expiration", expires);
+      if (cksum) {
+       dump_header(s, cksum->header_name(), cksum->to_armor());
+      }
       end_header(s, this, to_mime_type(s->format));
       dump_start(s);
       struct tm tmp;
index dba32471745054c49740cbcb450f0e02bfe2b3d5..7f7b91df37db054d1543511ae364aa079403d915 100644 (file)
@@ -42,6 +42,7 @@ protected:
   // Serving a custom error page from an object is really a 200 response with
   // just the status line altered.
   int custom_http_ret = 0;
+  bool checksum_mode{false};
   std::map<std::string, std::string> crypt_http_responses;
   int override_range_hdr(const rgw::auth::StrategyRegistry& auth_registry, optional_yield y);
 public:
diff --git a/src/rgw/rgw_xxh_digest.h b/src/rgw/rgw_xxh_digest.h
new file mode 100644 (file)
index 0000000..fe78636
--- /dev/null
@@ -0,0 +1,53 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2023 Red Hat, Inc
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#pragma once
+
+#include <stdint.h>
+#include <stdio.h>
+#include "rgw_crc_digest.h"
+
+#define XXH_INLINE_ALL 1 /* required for streaming variants */
+#include "xxhash.h"
+
+namespace rgw { namespace digest {
+
+  class XXH3 {
+  private:
+    XXH3_state_t s;
+
+  public:
+    static constexpr uint16_t digest_size = 8;
+
+    XXH3() {
+      XXH3_INITSTATE(&s);
+      Restart();
+    }
+
+    void Restart() { XXH3_64bits_reset(&s); }
+
+    void Update(const unsigned char *data, uint64_t len) {
+      XXH3_64bits_update(&s, data, len);
+    }
+
+    void Final(unsigned char* digest) {
+      XXH64_hash_t final = XXH3_64bits_digest(&s);
+      if constexpr (std::endian::native != std::endian::big) {
+       final = rgw::digest::byteswap(final);
+      }
+      memcpy((char*) digest, &final, sizeof(final));
+    }
+  }; /* XXH3 */
+}} /* namespace */
index c96e9012790f6cf3d1191daee4ea69b20bfd4497..1b3c136349823fda0f69741f68068d91bfa975e2 100644 (file)
@@ -268,6 +268,14 @@ target_include_directories(unittest_rgw_lc
 target_link_libraries(unittest_rgw_lc
   rgw_common ${rgw_libs} ${EXPAT_LIBRARIES})
 
+# unittest_rgw_cksum
+add_executable(unittest_rgw_cksum test_rgw_cksum.cc)
+add_ceph_unittest(unittest_rgw_cksum)
+target_include_directories(unittest_rgw_cksum
+  SYSTEM PRIVATE "${CMAKE_SOURCE_DIR}/src/rgw")
+target_link_libraries(unittest_rgw_cksum
+  rgw_common ${rgw_libs})
+
 # unittest_rgw_arn
 add_executable(unittest_rgw_arn test_rgw_arn.cc)
 add_ceph_unittest(unittest_rgw_arn)
diff --git a/src/test/rgw/test_rgw_cksum.cc b/src/test/rgw/test_rgw_cksum.cc
new file mode 100644 (file)
index 0000000..c4655ba
--- /dev/null
@@ -0,0 +1,350 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2016 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#include <errno.h>
+#include <iostream>
+#include <fstream>
+#include <string>
+
+#include "gtest/gtest.h"
+
+#include "common/config.h"
+#include "common/ceph_argparse.h"
+#include "common/debug.h"
+#include "rgw/rgw_cksum.h"
+#include <openssl/sha.h>
+#include "rgw/rgw_hex.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+namespace {
+
+  using namespace rgw;
+  using namespace rgw::cksum;
+
+  bool verbose = false;
+
+  cksum::Type t1 = cksum::Type::blake3;
+  cksum::Type t2 = cksum::Type::sha1;
+  cksum::Type t3 = cksum::Type::sha256;
+  cksum::Type t4 = cksum::Type::sha512;
+  cksum::Type t5 = cksum::Type::crc32;
+  cksum::Type t6 = cksum::Type::crc32c;
+  cksum::Type t7 = cksum::Type::xxh3;
+
+  std::string lorem =
+    "Lorem ipsum dolor sit amet";
+
+  std::string dolor =
+    R"(Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.)";
+
+TEST(RGWCksum, Output)
+{
+  auto o_mode = std::ios::out|std::ios::trunc;
+  std::ofstream of;
+  of.open("/tmp/lorem", o_mode);
+  of << lorem;
+  of.close();
+
+  of.open("/tmp/dolor", o_mode);
+  of << dolor;
+  of.close();
+}
+
+TEST(RGWCksum, DigestCRC32)
+{
+  auto t = cksum::Type::crc32;
+  DigestVariant dv = rgw::cksum::digest_factory(t);
+  Digest* digest = get_digest(dv);
+
+  ASSERT_NE(digest, nullptr);
+
+  digest->Update((const unsigned char *)dolor.c_str(), dolor.length());
+
+  auto cksum = rgw::cksum::finalize_digest(digest, t);
+  
+  /* compare w/known value https://crccalc.com/ */
+  ASSERT_EQ(cksum.hex(), "98b2c5bd");
+  /* compare w/known value https://www.base64encode.org/ */
+  ASSERT_EQ(cksum.to_base64(), "OThiMmM1YmQ=");
+  /* compare with aws-sdk-cpp encoded value */
+  ASSERT_EQ(cksum.to_armor(), "mLLFvQ==");
+}
+
+TEST(RGWCksum, DigestCRC32c)
+{
+  auto t = cksum::Type::crc32c;
+  DigestVariant dv = rgw::cksum::digest_factory(t);
+  Digest* digest = get_digest(dv);
+
+  ASSERT_NE(digest, nullptr);
+
+  digest->Update((const unsigned char *)dolor.c_str(), dolor.length());
+
+  auto cksum = rgw::cksum::finalize_digest(digest, t);
+  /* compare w/known value https://crccalc.com/ */
+  ASSERT_EQ(cksum.hex(), "95dc2e4b");
+  /* compare w/known value https://www.base64encode.org/ */
+  ASSERT_EQ(cksum.to_base64(), "OTVkYzJlNGI=");
+  /* compare with aws-sdk-cpp encoded value */
+  ASSERT_EQ(cksum.to_armor(), "ldwuSw==");
+}
+
+TEST(RGWCksum, DigestXXH3)
+{
+  auto t = cksum::Type::xxh3;
+  DigestVariant dv = rgw::cksum::digest_factory(t);
+  Digest* digest = get_digest(dv);
+
+  ASSERT_NE(digest, nullptr);
+
+  digest->Update((const unsigned char *)dolor.c_str(), dolor.length());
+
+  auto cksum = rgw::cksum::finalize_digest(digest, t);
+  /* compare w/known value xxhsum -H3 */
+  ASSERT_EQ(cksum.hex(), "5a164e0145351d01");
+  /* compare w/known value https://www.base64encode.org/ */
+  ASSERT_EQ(cksum.to_base64(), "NWExNjRlMDE0NTM1MWQwMQ==");
+}
+
+TEST(RGWCksum, DigestSha1)
+{
+  auto t = cksum::Type::sha1;
+  for (const auto input_str : {&lorem, &dolor}) {
+    DigestVariant dv = rgw::cksum::digest_factory(t);
+    Digest *digest = get_digest(dv);
+
+    ASSERT_NE(digest, nullptr);
+
+    digest->Update((const unsigned char *)input_str->c_str(),
+                  input_str->length());
+
+    /* try by hand */
+    unsigned char sha1_hash[SHA_DIGEST_LENGTH]; // == 20
+    ::SHA1((unsigned char *)input_str->c_str(), input_str->length(), sha1_hash);
+    // do some stuff with the hash
+
+    char buf[20 * 2 + 1];
+    memset(buf, 0, sizeof(buf));
+    buf_to_hex(sha1_hash, SHA_DIGEST_LENGTH, buf);
+    if (verbose) {
+      std::cout << "byhand sha1 " << buf << std::endl;
+    }
+
+    auto cksum = rgw::cksum::finalize_digest(digest, t);
+    if (verbose) {
+      std::cout << "computed sha1: " << cksum.hex() << std::endl;
+    }
+
+    /* check match with direct OpenSSL mech */
+    ASSERT_TRUE(memcmp(buf, cksum.hex().c_str(),
+                      cksum.hex().length()) == 0);
+
+    if (input_str == &lorem) {
+      /* compare w/known value, openssl sha1 */
+      ASSERT_EQ(cksum.hex(), "38f00f8738e241daea6f37f6f55ae8414d7b0219");
+      /* compare w/known value https://www.base64encode.org/ */
+      ASSERT_EQ(cksum.to_base64(),
+               "MzhmMDBmODczOGUyNDFkYWVhNmYzN2Y2ZjU1YWU4NDE0ZDdiMDIxOQ==");
+    } else { // &dolor
+      /* compare w/known value, openssl sha1 */
+      ASSERT_EQ(cksum.hex(), "cd36b370758a259b34845084a6cc38473cb95e27");
+      /* compare w/known value https://www.base64encode.org/ */
+      ASSERT_EQ(cksum.to_base64(),
+               "Y2QzNmIzNzA3NThhMjU5YjM0ODQ1MDg0YTZjYzM4NDczY2I5NWUyNw==");
+      /* compare with aws-sdk-cpp encoded value */
+      ASSERT_EQ(cksum.to_armor(), "zTazcHWKJZs0hFCEpsw4Rzy5Xic=");
+    }
+  }
+}
+
+TEST(RGWCksum, DigestSha256)
+{
+  auto t = cksum::Type::sha256;
+  for (const auto input_str : {&lorem, &dolor}) {
+    DigestVariant dv = rgw::cksum::digest_factory(t);
+    Digest *digest = get_digest(dv);
+
+    ASSERT_NE(digest, nullptr);
+
+    digest->Update((const unsigned char *)input_str->c_str(),
+                  input_str->length());
+
+    auto cksum = rgw::cksum::finalize_digest(digest, t);
+    if (verbose) {
+      std::cout << "computed sha256: " << cksum.hex() << std::endl;
+    }
+
+    if (input_str == &lorem) {
+      /* compare w/known value, openssl sha1 */
+      ASSERT_EQ(cksum.hex(), "16aba5393ad72c0041f5600ad3c2c52ec437a2f0c7fc08fadfc3c0fe9641d7a3");
+      /* compare w/known value https://www.base64encode.org/ */
+      ASSERT_EQ(cksum.to_base64(),
+               "MTZhYmE1MzkzYWQ3MmMwMDQxZjU2MDBhZDNjMmM1MmVjNDM3YTJmMGM3ZmMwOGZhZGZjM2MwZmU5NjQxZDdhMw==");
+    } else { // &dolor
+      /* compare w/known value, openssl sha1 */
+      ASSERT_EQ(cksum.hex(), "2d8c2f6d978ca21712b5f6de36c9d31fa8e96a4fa5d8ff8b0188dfb9e7c171bb");
+      /* compare w/known value https://www.base64encode.org/ */
+      ASSERT_EQ(cksum.to_base64(),
+               "MmQ4YzJmNmQ5NzhjYTIxNzEyYjVmNmRlMzZjOWQzMWZhOGU5NmE0ZmE1ZDhmZjhiMDE4OGRmYjllN2MxNzFiYg==");
+      /* compare with aws-sdk-cpp encoded value */
+      ASSERT_EQ(cksum.to_armor(), "LYwvbZeMohcStfbeNsnTH6jpak+l2P+LAYjfuefBcbs=");
+    }
+  }
+}
+
+TEST(RGWCksum, DigestSha512)
+{
+  auto t = cksum::Type::sha512;
+  for (const auto input_str : {&lorem, &dolor}) {
+    DigestVariant dv = rgw::cksum::digest_factory(t);
+    Digest *digest = get_digest(dv);
+
+    ASSERT_NE(digest, nullptr);
+
+    digest->Update((const unsigned char *)input_str->c_str(),
+                  input_str->length());
+
+    auto cksum = rgw::cksum::finalize_digest(digest, t);
+
+    if (input_str == &lorem) {
+      /* compare w/known value, openssl sha1 */
+      ASSERT_EQ(cksum.hex(), "b1f4aaa6b51c19ffbe4b1b6fa107be09c8acafd7c768106a3faf475b1e27a940d3c075fda671eadf46c68f93d7eabcf604bcbf7055da0dc4eae6743607a2fc3f");
+      /* compare w/known value https://www.base64encode.org/ */
+      ASSERT_EQ(cksum.to_base64(),
+               "YjFmNGFhYTZiNTFjMTlmZmJlNGIxYjZmYTEwN2JlMDljOGFjYWZkN2M3NjgxMDZhM2ZhZjQ3NWIxZTI3YTk0MGQzYzA3NWZkYTY3MWVhZGY0NmM2OGY5M2Q3ZWFiY2Y2MDRiY2JmNzA1NWRhMGRjNGVhZTY3NDM2MDdhMmZjM2Y=");
+    } else { // &dolor
+      /* compare w/known value, openssl sha1 */
+      ASSERT_EQ(cksum.hex(), "8ba760cac29cb2b2ce66858ead169174057aa1298ccd581514e6db6dee3285280ee6e3a54c9319071dc8165ff061d77783100d449c937ff1fb4cd1bb516a69b9");
+      /* compare w/known value https://www.base64encode.org/ */
+      ASSERT_EQ(cksum.to_base64(),
+               "OGJhNzYwY2FjMjljYjJiMmNlNjY4NThlYWQxNjkxNzQwNTdhYTEyOThjY2Q1ODE1MTRlNmRiNmRlZTMyODUyODBlZTZlM2E1NGM5MzE5MDcxZGM4MTY1ZmYwNjFkNzc3ODMxMDBkNDQ5YzkzN2ZmMWZiNGNkMWJiNTE2YTY5Yjk=");
+    }
+  }
+}
+
+TEST(RGWCksum, DigestBlake3)
+{
+  auto t = cksum::Type::blake3;
+  for (const auto input_str : {&lorem, &dolor}) {
+    DigestVariant dv = rgw::cksum::digest_factory(t);
+    Digest *digest = get_digest(dv);
+
+    ASSERT_NE(digest, nullptr);
+
+    digest->Update((const unsigned char *)input_str->c_str(),
+                  input_str->length());
+
+    auto cksum = rgw::cksum::finalize_digest(digest, t);
+
+    if (input_str == &lorem) {
+      /* compare w/known value, b3sum */
+      ASSERT_EQ(cksum.hex(), "f1da5f4e2bd5669307bcdb2e223dad05af7425207cbee59e73526235f50f76ad");
+      /* compare w/known value https://www.base64encode.org/ */
+      ASSERT_EQ(cksum.to_base64(),
+               "ZjFkYTVmNGUyYmQ1NjY5MzA3YmNkYjJlMjIzZGFkMDVhZjc0MjUyMDdjYmVlNTllNzM1MjYyMzVmNTBmNzZhZA==");
+    } else { // &dolor
+      /* compare w/known value, b3sum */
+      ASSERT_EQ(cksum.hex(), "71fe44583a6268b56139599c293aeb854e5c5a9908eca00105d81ad5e22b7bb6");
+      /* compare w/known value https://www.base64encode.org/ */
+      ASSERT_EQ(cksum.to_base64(),
+               "NzFmZTQ0NTgzYTYyNjhiNTYxMzk1OTljMjkzYWViODU0ZTVjNWE5OTA4ZWNhMDAxMDVkODFhZDVlMjJiN2JiNg==");
+    }
+  }
+} /* blake3 */
+
+TEST(RGWCksum, DigestSTR)
+{
+  for (auto t : {t1, t2, t3, t4, t5, t6, t7}) {
+    DigestVariant dv = rgw::cksum::digest_factory(t);
+    Digest* digest = get_digest(dv);
+
+    ASSERT_NE(digest, nullptr);
+
+    digest->Update((const unsigned char *)dolor.c_str(), dolor.length());
+    auto cksum = rgw::cksum::finalize_digest(digest, t);
+    if (verbose) {
+      std::cout << "type: " << to_string(t)
+               << " digest: " << cksum.to_string()
+               << std::endl;
+    }
+  }
+}
+
+TEST(RGWCksum, DigestBL)
+{
+  std::string lacrimae = dolor + dolor;
+
+  ceph::buffer::list dolor_bl;
+  for ([[maybe_unused]] const auto& ix : {1, 2}) {
+    dolor_bl.push_back(
+      buffer::create_static(dolor.length(),
+                           const_cast<char*>(dolor.data())));
+  }
+
+  for (auto t : {t1, t2, t3, t4, t5, t6, t7}) {
+    DigestVariant dv1 = rgw::cksum::digest_factory(t);
+    Digest* digest1 = get_digest(dv1);
+    ASSERT_NE(digest1, nullptr);
+
+    DigestVariant dv2 = rgw::cksum::digest_factory(t);
+    Digest* digest2 = get_digest(dv2);
+    ASSERT_NE(digest2, nullptr);
+
+    digest1->Update((const unsigned char *)lacrimae.c_str(),
+                   lacrimae.length());
+    digest2->Update(dolor_bl);
+
+    auto cksum1 = rgw::cksum::finalize_digest(digest1, t);
+    auto cksum2 = rgw::cksum::finalize_digest(digest2, t);
+
+    ASSERT_EQ(cksum1.to_string(), cksum2.to_string());
+
+    /* serialization */
+    buffer::list bl_out;
+    encode(cksum1, bl_out);
+
+    /* unserialization */
+    buffer::list bl_in;
+    bl_in.append(bl_out.c_str(), bl_out.length());
+
+    rgw::cksum::Cksum cksum3;
+    auto iter = bl_in.cbegin();
+    decode(cksum3, iter);
+
+    /* all that way for a Strohs */
+    ASSERT_EQ(cksum1.to_string(), cksum3.to_string());
+  } /* for t1, ... */
+}
+} /* namespace */
+
+int main(int argc, char *argv[])
+{
+  auto args = argv_to_vec(argc, argv);
+  env_to_vec(args);
+
+  std::string val;
+  for (auto arg_iter = args.begin(); arg_iter != args.end();) {
+     if (ceph_argparse_flag(args, arg_iter, "--verbose",
+                           (char*) nullptr)) {
+       verbose = true;
+     } else {
+       ++arg_iter;
+     }
+  }
+
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
index 7dd7604db1a1f677e1f7845d47c449e4d8314998..0759b0c80ce25f2948130a3312e98b8de93ad49a 100644 (file)
@@ -30,6 +30,7 @@ extern "C"{
 #include "common/ceph_json.h"
 #include "common/code_environment.h"
 #include "common/ceph_argparse.h"
+#include "common/armor.h"
 #include "common/Finisher.h"
 #include "global/global_init.h"
 #include "rgw_common.h"
@@ -55,8 +56,6 @@ using namespace std;
 static string uid = "ceph";
 static string display_name = "CEPH";
 
-extern "C" int ceph_armor(char *dst, const char *dst_end, 
-                          const char *src, const char *end);
 static void print_usage(char *exec){
   cout << "Usage: " << exec << " <Options>\n";
   cout << "Options:\n"
index 00c43d10b5495717948830b13d3041d483418b53..962c08f369c1a50109afe77f589d5b82f4471c40 100644 (file)
@@ -29,6 +29,7 @@ extern "C"{
 #include "common/ceph_json.h"
 #include "common/code_environment.h"
 #include "common/ceph_argparse.h"
+#include "common/armor.h"
 #include "common/Finisher.h"
 #include "global/global_init.h"
 #include "rgw_common.h"
@@ -47,8 +48,6 @@ static string uid = CEPH_UID;
 static string display_name = "CEPH";
 static string meta_caps = "metadata";
 
-extern "C" int ceph_armor(char *dst, const char *dst_end, 
-                          const char *src, const char *end);
 static void print_usage(char *exec){
   cout << "Usage: " << exec << " <Options>\n";
   cout << "Options:\n"