From: Sage Weil Date: Thu, 21 May 2020 20:07:00 +0000 (-0500) Subject: cls/cas/cls_cas_internal: dynamically drop precision for refs_by_hash X-Git-Tag: wip-pdonnell-testing-20200918.022351~1040^2~4 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=15449829390ce472c10f913b1816eb6881fbcae5;p=ceph-ci.git cls/cas/cls_cas_internal: dynamically drop precision for refs_by_hash Enable reduced precision via shrink(). Update test to - verify encode/decode are consistent - adjust pool id at the right times so that we get coverage of all encoding types. Signed-off-by: Sage Weil --- diff --git a/src/cls/cas/cls_cas_internal.cc b/src/cls/cas/cls_cas_internal.cc index cbc9e70d410..7f17063f82b 100644 --- a/src/cls/cas/cls_cas_internal.cc +++ b/src/cls/cas/cls_cas_internal.cc @@ -54,6 +54,7 @@ WRITE_CLASS_ENCODER(refs_by_object) struct refs_by_hash : public chunk_obj_refcount::refs_t { uint64_t total = 0; + uint32_t hash_bits = 32; ///< how many bits of mask to encode std::map,uint64_t> by_hash; refs_by_hash() {} @@ -64,6 +65,31 @@ struct refs_by_hash : public chunk_obj_refcount::refs_t { } } + std::string describe_encoding() const { + return "by_hash("s + stringify(hash_bits) + " bits)"s; + } + + uint32_t mask() { + // with the hobject_t reverse-bitwise sort, the least significant + // hash values are actually the most significant, so preserve them + // as we lose resolution. + return 0xffffffff >> (32 - hash_bits); + } + + bool shrink() { + if (hash_bits <= 1) { + return false; + } + hash_bits--; + std::map,uint64_t> old; + old.swap(by_hash); + auto m = mask(); + for (auto& i : old) { + by_hash[make_pair(i.first.first, i.first.second & m)] = i.second; + } + return true; + } + uint8_t get_type() const { return chunk_obj_refcount::TYPE_BY_HASH; } @@ -74,12 +100,12 @@ struct refs_by_hash : public chunk_obj_refcount::refs_t { return total; } bool get(const hobject_t& o) override { - by_hash[make_pair(o.pool, o.get_hash())]++; + by_hash[make_pair(o.pool, o.get_hash() & mask())]++; ++total; return true; } bool put(const hobject_t& o) override { - auto p = by_hash.find(make_pair(o.pool, o.get_hash())); + auto p = by_hash.find(make_pair(o.pool, o.get_hash() & mask())); if (p == by_hash.end()) { return false; } @@ -96,10 +122,13 @@ struct refs_by_hash : public chunk_obj_refcount::refs_t { void encode(::ceph::buffer::list::contiguous_appender& p) const { DENC_START(1, 1, p); denc_varint(total, p); + denc_varint(hash_bits, p); denc_varint(by_hash.size(), p); + int hash_bytes = (hash_bits + 7) / 8; for (auto& i : by_hash) { denc_signed_varint(i.first.first, p); - denc(i.first.second, p); + // this may write some bytes past where we move cursor too; harmless! + *(__le32*)p.get_pos_add(hash_bytes) = i.first.second; denc_varint(i.second, p); } DENC_FINISH(p); @@ -107,22 +136,25 @@ struct refs_by_hash : public chunk_obj_refcount::refs_t { void decode(::ceph::buffer::ptr::const_iterator& p) { DENC_START(1, 1, p); denc_varint(total, p); + denc_varint(hash_bits, p); uint64_t n; denc_varint(n, p); + int hash_bytes = (hash_bits + 7) / 8; while (n--) { int64_t poolid; - uint32_t hash; + __le32 hash; uint64_t count; denc_signed_varint(poolid, p); - denc(hash, p); + memcpy(&hash, p.get_pos_add(hash_bytes), hash_bytes); denc_varint(count, p); - by_hash[make_pair(poolid, hash)] = count; + by_hash[make_pair(poolid, (uint32_t)hash)] = count; } DENC_FINISH(p); } void dump(Formatter *f) const override { f->dump_string("type", "by_hash"); f->dump_unsigned("count", total); + f->dump_unsigned("hash_bits", hash_bits); f->open_array_section("refs"); for (auto& i : by_hash) { f->open_object_section("hash"); @@ -313,16 +345,17 @@ void chunk_obj_refcount::dynamic_encode(ceph::buffer::list& bl, size_t max) std::unique_ptr n; switch (r->get_type()) { case TYPE_BY_OBJECT: - n.reset(new refs_by_hash(static_cast(r.get()))); + r.reset(new refs_by_hash(static_cast(r.get()))); break; case TYPE_BY_HASH: - n.reset(new refs_by_pool(static_cast(r.get()))); + if (!static_cast(r.get())->shrink()) { + r.reset(new refs_by_pool(static_cast(r.get()))); + } break; case TYPE_BY_POOL: - n.reset(new refs_count(r.get())); + r.reset(new refs_count(r.get())); break; } - r.swap(n); t.clear(); } _encode_final(bl, t); diff --git a/src/cls/cas/cls_cas_internal.h b/src/cls/cas/cls_cas_internal.h index 02cbcff560c..f98721cf8cd 100644 --- a/src/cls/cas/cls_cas_internal.h +++ b/src/cls/cas/cls_cas_internal.h @@ -3,6 +3,8 @@ #pragma once +#include + #include "boost/variant.hpp" #include "include/stringify.h" @@ -19,6 +21,15 @@ struct chunk_obj_refcount { TYPE_BY_POOL = 4, TYPE_COUNT = 5, }; + static const char *type_name(int t) { + switch (t) { + case TYPE_BY_OBJECT: return "by_object"; + case TYPE_BY_HASH: return "by_hash"; + case TYPE_BY_POOL: return "by_pool"; + case TYPE_COUNT: return "count"; + default: return "???"; + } + } struct refs_t { virtual ~refs_t() {} @@ -28,6 +39,9 @@ struct chunk_obj_refcount { virtual bool get(const hobject_t& o) = 0; virtual bool put(const hobject_t& o) = 0; virtual void dump(Formatter *f) const = 0; + virtual std::string describe_encoding() const { + return type_name(get_type()); + } }; std::unique_ptr r; @@ -41,6 +55,9 @@ struct chunk_obj_refcount { int get_type() const { return r->get_type(); } + std::string describe_encoding() const { + return r->describe_encoding(); + } bool empty() const { return r->empty(); diff --git a/src/test/cls_cas/test_cls_cas.cc b/src/test/cls_cas/test_cls_cas.cc index 3339768bcde..28d1b4178c5 100644 --- a/src/test/cls_cas/test_cls_cas.cc +++ b/src/test/cls_cas/test_cls_cas.cc @@ -299,25 +299,54 @@ static int count_bits(unsigned long n) TEST(chunk_obj_refcount, size) { chunk_obj_refcount r; - size_t poolmask = 0xf0f0f0; size_t max = 1048576; - for (size_t i = 0; i < max; ++i) { + + // mix in pool changes as i gets bigger + size_t pool_mask = 0xfff5110; + + // eventually add in a zillion different pools to force us to a raw count + size_t pool_cutoff = max/2; + + for (size_t i = 1; i <= max; ++i) { hobject_t h(sobject_t(object_t("foo"s + stringify(i)), i)); - h.pool = i & poolmask; + h.pool = i > pool_cutoff ? i : (i & pool_mask); bool ret = r.get(h); ASSERT_TRUE(ret); if (count_bits(i) <= 2) { bufferlist bl; r.dynamic_encode(bl, 1024); if (count_bits(i) == 1) { - cout << i << "\t" << bl.length() << "\t" << r.get_type() << std::endl; + cout << i << "\t" << bl.length() + << "\t" << r.describe_encoding() + << std::endl; + } + + // verify reencoding is correct + chunk_obj_refcount a; + auto t = bl.cbegin(); + decode(a, t); + bufferlist bl2; + encode(a, bl2); + if (!bl.contents_equal(bl2)) { + Formatter *f = Formatter::create("json-pretty"); + cout << "original:\n"; + f->dump_object("refs", r); + f->flush(cout); + cout << "decoded:\n"; + f->dump_object("refs", a); + f->flush(cout); + cout << "original encoding:\n"; + bl.hexdump(cout); + cout << "decoded re-encoding:\n"; + bl2.hexdump(cout); + ASSERT_TRUE(bl.contents_equal(bl2)); } } } ASSERT_EQ(max, r.count()); - for (size_t i = 0; i < max; ++i) { + for (size_t i = 1; i <= max; ++i) { hobject_t h(sobject_t(object_t("foo"s + stringify(i)), 1)); - h.pool = i & poolmask; + h.pool = i > pool_cutoff ? i : (i & pool_mask); bool ret = r.put(h); ASSERT_TRUE(ret); }