From: myoungwon oh Date: Mon, 27 May 2019 07:41:15 +0000 (+0900) Subject: common,osd: add hash algorithms for dedup (sha256, sha512) X-Git-Tag: v15.1.0~2501^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=f745a7d35fc82c3de4509ed6a5d643a5e41f9b62;p=ceph.git common,osd: add hash algorithms for dedup (sha256, sha512) Signed-off-by: Myoungwon Oh --- diff --git a/src/common/buffer.cc b/src/common/buffer.cc index 0e9957a56f77..8782a7b6f6cb 100644 --- a/src/common/buffer.cc +++ b/src/common/buffer.cc @@ -2043,6 +2043,8 @@ void buffer::list::invalidate_crc() #include "common/ceph_crypto.h" using ceph::crypto::SHA1; +using ceph::crypto::SHA256; +using ceph::crypto::SHA512; sha1_digest_t buffer::list::sha1() { @@ -2055,6 +2057,28 @@ sha1_digest_t buffer::list::sha1() return sha1_digest_t(fingerprint); } +sha256_digest_t buffer::list::sha256() +{ + unsigned char fingerprint[CEPH_CRYPTO_SHA256_DIGESTSIZE]; + SHA256 sha256_gen; + for (auto& p : _buffers) { + sha256_gen.Update((const unsigned char *)p.c_str(), p.length()); + } + sha256_gen.Final(fingerprint); + return sha256_digest_t(fingerprint); +} + +sha512_digest_t buffer::list::sha512() +{ + unsigned char fingerprint[CEPH_CRYPTO_SHA512_DIGESTSIZE]; + SHA512 sha512_gen; + for (auto& p : _buffers) { + sha512_gen.Update((const unsigned char *)p.c_str(), p.length()); + } + sha512_gen.Final(fingerprint); + return sha512_digest_t(fingerprint); +} + /** * Binary write all contents to a C++ stream */ diff --git a/src/common/ceph_crypto.h b/src/common/ceph_crypto.h index 30206c65702a..810af086381d 100644 --- a/src/common/ceph_crypto.h +++ b/src/common/ceph_crypto.h @@ -10,6 +10,7 @@ #define CEPH_CRYPTO_SHA1_DIGESTSIZE 20 #define CEPH_CRYPTO_HMACSHA256_DIGESTSIZE 32 #define CEPH_CRYPTO_SHA256_DIGESTSIZE 32 +#define CEPH_CRYPTO_SHA512_DIGESTSIZE 64 #ifdef USE_NSS // you *must* use CRYPTO_CXXFLAGS in CMakeLists.txt for including this include @@ -33,6 +34,7 @@ extern "C" { const EVP_MD *EVP_md5(void); const EVP_MD *EVP_sha1(void); const EVP_MD *EVP_sha256(void); + const EVP_MD *EVP_sha512(void); } #endif /*USE_OPENSSL*/ @@ -115,6 +117,11 @@ namespace ceph { public: SHA256 () : NSSDigest(SEC_OID_SHA256, CEPH_CRYPTO_SHA256_DIGESTSIZE) { } }; + + class SHA512 : public NSSDigest { + public: + SHA512 () : NSSDigest(SEC_OID_SHA512, CEPH_CRYPTO_SHA512_DIGESTSIZE) { } + }; } } } @@ -150,6 +157,11 @@ namespace ceph { public: SHA256 () : OpenSSLDigest(EVP_sha256()) { } }; + + class SHA512 : public OpenSSLDigest { + public: + SHA512 () : OpenSSLDigest(EVP_sha512()) { } + }; } } } @@ -337,6 +349,7 @@ namespace ceph { using ceph::crypto::ssl::SHA256; using ceph::crypto::ssl::MD5; using ceph::crypto::ssl::SHA1; + using ceph::crypto::ssl::SHA512; using ceph::crypto::ssl::HMACSHA256; using ceph::crypto::ssl::HMACSHA1; @@ -348,6 +361,7 @@ namespace ceph { using ceph::crypto::nss::SHA256; using ceph::crypto::nss::MD5; using ceph::crypto::nss::SHA1; + using ceph::crypto::nss::SHA512; using ceph::crypto::nss::HMACSHA256; using ceph::crypto::nss::HMACSHA1; diff --git a/src/include/buffer.h b/src/include/buffer.h index 8bbede17666b..0f6c91e4c097 100644 --- a/src/include/buffer.h +++ b/src/include/buffer.h @@ -76,6 +76,8 @@ class deleter; template struct sha_digest_t; using sha1_digest_t = sha_digest_t<20>; +using sha256_digest_t = sha_digest_t<32>; +using sha512_digest_t = sha_digest_t<64>; template class DencDumper; @@ -1233,6 +1235,8 @@ inline namespace v14_2_0 { uint32_t crc32c(uint32_t crc) const; void invalidate_crc(); sha1_digest_t sha1(); + sha256_digest_t sha256(); + sha512_digest_t sha512(); // These functions return a bufferlist with a pointer to a single // static buffer. They /must/ not outlive the memory they diff --git a/src/include/types.h b/src/include/types.h index 44777d218c8a..461cb9819311 100644 --- a/src/include/types.h +++ b/src/include/types.h @@ -623,5 +623,8 @@ WRITE_CLASS_ENCODER(sha1_digest_t) using sha256_digest_t = sha_digest_t<32>; WRITE_CLASS_ENCODER(sha256_digest_t) +using md5_digest_t = sha_digest_t<16>; +WRITE_CLASS_ENCODER(md5_digest_t) + #endif diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index af44294c0b86..e3cfbccda940 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -2472,41 +2472,52 @@ int PrimaryLogPG::do_manifest_flush(OpRequestRef op, ObjectContextRef obc, Flush pg_pool_t::fingerprint_t fp_algo_t = pool.info.get_fingerprint_type(); if (iter->second.has_reference() && fp_algo_t != pg_pool_t::TYPE_FINGERPRINT_NONE) { + object_t fp_oid; + bufferlist in; switch (fp_algo_t) { case pg_pool_t::TYPE_FINGERPRINT_SHA1: { sha1_digest_t sha1r = chunk_data.sha1(); - object_t fp_oid = sha1r.to_str(); - bufferlist in; - if (fp_oid != tgt_soid.oid) { - // decrement old chunk's reference count - ObjectOperation dec_op; - cls_chunk_refcount_put_op put_call; - ::encode(put_call, in); - dec_op.call("refcount", "chunk_put", in); - // we don't care dec_op's completion. scrub for dedup will fix this. - tid = osd->objecter->mutate( - tgt_soid.oid, oloc, dec_op, snapc, - ceph::real_clock::from_ceph_timespec(obc->obs.oi.mtime), - flags, NULL); - in.clear(); - } - tgt_soid.oid = fp_oid; - iter->second.oid = tgt_soid; - // add data op - ceph_osd_op osd_op; - osd_op.extent.offset = 0; - osd_op.extent.length = chunk_data.length(); - encode(osd_op, in); - encode(soid, in); - in.append(chunk_data); - obj_op.call("cas", "cas_write_or_get", in); + fp_oid = sha1r.to_str(); break; } + case pg_pool_t::TYPE_FINGERPRINT_SHA256: + { + sha256_digest_t sha256r = chunk_data.sha256(); + fp_oid = sha256r.to_str(); + } + case pg_pool_t::TYPE_FINGERPRINT_SHA512: + { + sha512_digest_t sha512r = chunk_data.sha512(); + fp_oid = sha512r.to_str(); + } default: assert(0 == "unrecognized fingerprint type"); break; } + if (fp_oid != tgt_soid.oid) { + // decrement old chunk's reference count + ObjectOperation dec_op; + cls_chunk_refcount_put_op put_call; + ::encode(put_call, in); + dec_op.call("refcount", "chunk_put", in); + // we don't care dec_op's completion. scrub for dedup will fix this. + tid = osd->objecter->mutate( + tgt_soid.oid, oloc, dec_op, snapc, + ceph::real_clock::from_ceph_timespec(obc->obs.oi.mtime), + flags, NULL); + in.clear(); + } + tgt_soid.oid = fp_oid; + iter->second.oid = tgt_soid; + // add data op + ceph_osd_op osd_op; + osd_op.extent.offset = 0; + osd_op.extent.length = chunk_data.length(); + encode(osd_op, in); + encode(soid, in); + in.append(chunk_data); + obj_op.call("cas", "cas_write_or_get", in); } else { obj_op.add_data(CEPH_OSD_OP_WRITE, tgt_offset, tgt_length, chunk_data); } diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index fc81d58c2288..047b33029403 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -1455,12 +1455,18 @@ public: typedef enum { TYPE_FINGERPRINT_NONE = 0, TYPE_FINGERPRINT_SHA1 = 1, + TYPE_FINGERPRINT_SHA256 = 2, + TYPE_FINGERPRINT_SHA512 = 3, } fingerprint_t; static fingerprint_t get_fingerprint_from_str(const std::string& s) { if (s == "none") return TYPE_FINGERPRINT_NONE; if (s == "sha1") return TYPE_FINGERPRINT_SHA1; + if (s == "sha256") + return TYPE_FINGERPRINT_SHA256; + if (s == "sha512") + return TYPE_FINGERPRINT_SHA512; return (fingerprint_t)-1; } const fingerprint_t get_fingerprint_type() const { @@ -1479,6 +1485,8 @@ public: switch (m) { case TYPE_FINGERPRINT_NONE: return "none"; case TYPE_FINGERPRINT_SHA1: return "sha1"; + case TYPE_FINGERPRINT_SHA256: return "sha256"; + case TYPE_FINGERPRINT_SHA512: return "sha512"; default: return "unknown"; } } diff --git a/src/tools/ceph_dedup_tool.cc b/src/tools/ceph_dedup_tool.cc index a9489483802f..a2c310c0203c 100644 --- a/src/tools/ceph_dedup_tool.cc +++ b/src/tools/ceph_dedup_tool.cc @@ -59,7 +59,7 @@ void usage() cout << " --object " << std::endl; cout << " --chunk-size chunk-size (byte) " << std::endl; cout << " --chunk-algorithm " << std::endl; - cout << " --fingerprint-algorithm " << std::endl; + cout << " --fingerprint-algorithm " << std::endl; cout << " --chunk-pool " << std::endl; cout << " --max-thread " << std::endl; cout << " --report-perioid " << std::endl; @@ -371,6 +371,12 @@ void EstimateDedupRatio::add_chunk_fp_to_stat(bufferlist &chunk) if (fp_algo == "sha1") { sha1_digest_t sha1_val = chunk.sha1(); fp = sha1_val.to_str(); + } else if (fp_algo == "sha256") { + sha256_digest_t sha256_val = chunk.sha256(); + fp = sha256_val.to_str(); + } else if (fp_algo == "sha512") { + sha512_digest_t sha512_val = chunk.sha512(); + fp = sha512_val.to_str(); } else if (chunk_algo == "rabin") { uint64_t hash = rabin.gen_rabin_hash(chunk.c_str(), 0, chunk.length()); fp = to_string(hash); @@ -569,7 +575,8 @@ int estimate_dedup_ratio(const std::map < std::string, std::string > &opts, i = opts.find("fingerprint-algorithm"); if (i != opts.end()) { fp_algo = i->second.c_str(); - if (fp_algo != "sha1" && fp_algo != "rabin") { + if (fp_algo != "sha1" && fp_algo != "rabin" + && fp_algo != "sha256" && fp_algo != "sha512") { usage_exit(); } } else {