From c263117c516925048c262fa5d1cb3dd9de022e1f Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 20 May 2020 14:26:50 -0500 Subject: [PATCH] cls_cas: dynamically adjust resolution of chunk_obj_refcount Most objects will have few refs, but some will have many. Retain as much information about references as we can for both cases, without exploding the size of encoded refs. Signed-off-by: Sage Weil --- src/cls/cas/cls_cas.cc | 21 +- src/cls/cas/cls_cas_internal.h | 346 +++++++++++++++++++++++++++++-- src/test/cls_cas/test_cls_cas.cc | 35 ++++ src/test/librados/tier_cxx.cc | 10 +- 4 files changed, 380 insertions(+), 32 deletions(-) diff --git a/src/cls/cas/cls_cas.cc b/src/cls/cas/cls_cas.cc index 3f06a387dba..451c5c14722 100644 --- a/src/cls/cas/cls_cas.cc +++ b/src/cls/cas/cls_cas.cc @@ -26,7 +26,7 @@ static int chunk_read_refcount( chunk_obj_refcount *objr) { bufferlist bl; - objr->refs.clear(); + objr->clear(); int ret = cls_cxx_getxattr(hctx, CHUNK_REFCOUNT_ATTR, &bl); if (ret == -ENODATA) { return 0; @@ -88,7 +88,7 @@ static int chunk_create_or_get_ref(cls_method_context_t hctx, if (ret < 0) { return ret; } - objr.refs.insert(op.source); + objr.get(op.source); ret = chunk_set_refcount(hctx, objr); if (ret < 0) { return ret; @@ -107,10 +107,9 @@ static int chunk_create_or_get_ref(cls_method_context_t hctx, CLS_LOG(10, "inc ref oid=%s\n", op.source.oid.name.c_str()); - if (objr.refs.count(op.source)) { + if (!objr.get(op.source)) { return -EEXIST; } - objr.refs.insert(op.source); ret = chunk_set_refcount(hctx, objr); if (ret < 0) { @@ -143,10 +142,9 @@ static int chunk_get_ref(cls_method_context_t hctx, // existing chunk; inc ref CLS_LOG(10, "oid=%s\n", op.source.oid.name.c_str()); - if (objr.refs.count(op.source)) { + if (!objr.get(op.source)) { return -EEXIST; } - objr.refs.insert(op.source); ret = chunk_set_refcount(hctx, objr); if (ret < 0) { @@ -173,19 +171,12 @@ static int chunk_put_ref(cls_method_context_t hctx, if (ret < 0) return ret; - if (objr.refs.empty()) {// shouldn't happen! - CLS_LOG(0, "ERROR was called without any references!\n"); - return -ENOLINK; - } - - auto p = objr.refs.find(op.source); - if (p == objr.refs.end()) { + if (!objr.put(op.source)) { CLS_LOG(10, "oid=%s (no ref)\n", op.source.oid.name.c_str()); return -ENOLINK; } - objr.refs.erase(p); - if (objr.refs.empty()) { + if (objr.empty()) { CLS_LOG(10, "oid=%s (last ref)\n", op.source.oid.name.c_str()); return cls_cxx_remove(hctx); } diff --git a/src/cls/cas/cls_cas_internal.h b/src/cls/cas/cls_cas_internal.h index 3bcb856f99a..f0267f86055 100644 --- a/src/cls/cas/cls_cas_internal.h +++ b/src/cls/cas/cls_cas_internal.h @@ -3,33 +3,355 @@ #pragma once +#include "boost/variant.hpp" + +#include "include/stringify.h" #include "common/Formatter.h" #define CHUNK_REFCOUNT_ATTR "chunk_refcount" struct chunk_obj_refcount { - std::set refs; + enum { + TYPE_BY_OBJECT = 1, + TYPE_BY_HASH = 2, + TYPE_BY_PARTIAL = 3, + TYPE_BY_POOL = 4, + TYPE_COUNT = 5, + }; + + struct refs_t { + virtual ~refs_t() {} + virtual uint8_t get_type() const = 0; + virtual bool empty() const = 0; + virtual uint64_t count() const = 0; + virtual bool get(const hobject_t& o) = 0; + virtual bool put(const hobject_t& o) = 0; + virtual void encode(bufferlist& bl) const = 0; + virtual void decode(bufferlist::const_iterator& p) = 0; + virtual void dump(Formatter *f) const = 0; + }; + + struct refs_by_object : public refs_t { + std::set by_object; + + uint8_t get_type() const { + return TYPE_BY_OBJECT; + } + bool empty() const override { + return by_object.empty(); + } + uint64_t count() const override { + return by_object.size(); + } + bool get(const hobject_t& o) override { + if (by_object.count(o)) { + return false; + } + by_object.insert(o); + return true; + } + bool put(const hobject_t& o) override { + auto p = by_object.find(o); + if (p == by_object.end()) { + return false; + } + by_object.erase(p); + return true; + } + void encode(bufferlist& bl) const override { + ENCODE_START(1, 1, bl); + encode(by_object, bl); + ENCODE_FINISH(bl); + } + void decode(bufferlist::const_iterator& p) override { + DECODE_START(1, p); + decode(by_object, p); + DECODE_FINISH(p); + } + void dump(Formatter *f) const override { + f->dump_string("type", "by_object"); + f->dump_unsigned("count", by_object.size()); + f->open_array_section("refs"); + for (auto& i : by_object) { + f->dump_object("ref", i); + } + f->close_section(); + } + }; - chunk_obj_refcount() {} + struct refs_by_hash : public refs_t { + uint64_t total = 0; + std::map,uint64_t> by_hash; + + refs_by_hash() {} + refs_by_hash(const refs_by_object *o) { + total = o->count(); + for (auto& i : o->by_object) { + by_hash[make_pair(i.pool, i.get_hash())]++; + } + } + + uint8_t get_type() const { + return TYPE_BY_HASH; + } + bool empty() const override { + return by_hash.empty(); + } + uint64_t count() const override { + return total; + } + bool get(const hobject_t& o) override { + by_hash[make_pair(o.pool, o.get_hash())]++; + ++total; + return true; + } + bool put(const hobject_t& o) override { + auto p = by_hash.find(make_pair(o.pool, o.get_hash())); + if (p == by_hash.end()) { + return false; + } + if (--p->second == 0) { + by_hash.erase(p); + } + --total; + return true; + } + void encode(bufferlist& bl) const override { + ENCODE_START(1, 1, bl); + encode(total, bl); + encode(by_hash, bl); + ENCODE_FINISH(bl); + } + void decode(bufferlist::const_iterator& p) override { + DECODE_START(1, p); + decode(total, p); + decode(by_hash, p); + DECODE_FINISH(p); + } + void dump(Formatter *f) const override { + f->dump_string("type", "by_hash"); + f->dump_unsigned("count", total); + f->open_array_section("refs"); + for (auto& i : by_hash) { + f->open_object_section("hash"); + f->dump_int("pool", i.first.first); + f->dump_unsigned("hash", i.first.second); + f->dump_unsigned("count", i.second); + f->close_section(); + } + f->close_section(); + } + }; + + struct refs_by_pool : public refs_t { + uint64_t total = 0; + map by_pool; + + refs_by_pool() {} + refs_by_pool(const refs_by_hash *o) { + total = o->count(); + for (auto& i : o->by_hash) { + by_pool[i.first.first] += i.second; + } + } + + uint8_t get_type() const { + return TYPE_BY_POOL; + } + bool empty() const override { + return by_pool.empty(); + } + uint64_t count() const override { + return total; + } + bool get(const hobject_t& o) override { + ++by_pool[o.pool]; + ++total; + return true; + } + bool put(const hobject_t& o) override { + auto p = by_pool.find(o.pool); + if (p == by_pool.end()) { + return false; + } + --p->second; + if (p->second == 0) { + by_pool.erase(p); + } + --total; + return true; + } + void encode(bufferlist& bl) const override { + ENCODE_START(1, 1, bl); + encode(total, bl); + encode(by_pool, bl); + ENCODE_FINISH(bl); + } + void decode(bufferlist::const_iterator& p) override { + DECODE_START(1, p); + decode(total, p); + decode(by_pool, p); + DECODE_FINISH(p); + } + void dump(Formatter *f) const override { + f->dump_string("type", "by_pool"); + f->dump_unsigned("count", total); + f->open_array_section("pools"); + for (auto& i : by_pool) { + f->open_object_section("pool"); + f->dump_unsigned("pool_id", i.first); + f->dump_unsigned("count", i.second); + f->close_section(); + } + f->close_section(); + } + }; + + struct refs_count : public refs_t { + uint64_t total = 0; + + refs_count() {} + refs_count(const refs_t *old) { + total = old->count(); + } + + uint8_t get_type() const { + return TYPE_COUNT; + } + bool empty() const override { + return total == 0; + } + uint64_t count() const override { + return total; + } + bool get(const hobject_t& o) override { + ++total; + return true; + } + bool put(const hobject_t& o) override { + if (!total) { + return false; + } + --total; + return true; + } + void encode(bufferlist& bl) const override { + ENCODE_START(1, 1, bl); + encode(total, bl); + ENCODE_FINISH(bl); + } + void decode(bufferlist::const_iterator& p) override { + DECODE_START(1, p); + decode(total, p); + DECODE_FINISH(p); + } + void dump(Formatter *f) const override { + f->dump_string("type", "count"); + f->dump_unsigned("count", total); + } + }; + + + std::unique_ptr r; + + chunk_obj_refcount() { + clear(); + } + + void clear() { + // default to most precise impl + r.reset(new refs_by_object); + } + + int get_type() const { + return r->get_type(); + } + + bool empty() const { + return r->empty(); + } + uint64_t count() const { + return r->count(); + } + + bool get(const hobject_t& o) { + return r->get(o); + } + bool put(const hobject_t& o) { + bool ret = r->put(o); + if (r->get_type() != TYPE_BY_OBJECT && + r->count() == 0) { + clear(); // reset to full resolution, yay + } + return ret; + } void encode(ceph::buffer::list& bl) const { + bufferlist t; + r->encode(t); + _encode(bl, t); + } + + void dynamic_encode(ceph::buffer::list& bl, size_t max = 1024) { + bufferlist t; + while (true) { + r->encode(t); + if (t.length() <= max) { + break; + } + // downgrade resolution + std::unique_ptr n; + switch (r->get_type()) { + case TYPE_BY_OBJECT: + n.reset(new refs_by_hash(static_cast(r.get()))); + break; + case TYPE_BY_HASH: + n.reset(new refs_by_pool(static_cast(r.get()))); + break; + case TYPE_BY_POOL: + n.reset(new refs_count(r.get())); + break; + } + r.swap(n); + t.clear(); + } + _encode(bl, t); + } + + void _encode(bufferlist& bl, bufferlist& t) const { ENCODE_START(1, 1, bl); - encode(refs, bl); + encode(r->get_type(), bl); + bl.claim_append(t); ENCODE_FINISH(bl); } - void decode(ceph::buffer::list::const_iterator& bl) { - DECODE_START(1, bl); - decode(refs, bl); - DECODE_FINISH(bl); + void decode(ceph::buffer::list::const_iterator& p) { + DECODE_START(1, p); + uint8_t t; + decode(t, p); + switch (t) { + case TYPE_BY_OBJECT: + r.reset(new refs_by_object()); + break; + case TYPE_BY_HASH: + r.reset(new refs_by_hash()); + break; + case TYPE_BY_POOL: + r.reset(new refs_by_pool()); + break; + case TYPE_COUNT: + r.reset(new refs_count()); + break; + default: + throw ceph::buffer::malformed_input( + "unrecognized chunk ref encoding type "s + stringify((int)t)); + } + r->decode(p); + DECODE_FINISH(p); } void dump(Formatter *f) const { - f->open_array_section("refs"); - for (auto& i : refs) { - f->dump_object("ref", i); - } - f->close_section(); + r->dump(f); } static void generate_test_instances(std::list& ls) { ls.push_back(new chunk_obj_refcount()); diff --git a/src/test/cls_cas/test_cls_cas.cc b/src/test/cls_cas/test_cls_cas.cc index b9a709ecaea..5f558908d13 100644 --- a/src/test/cls_cas/test_cls_cas.cc +++ b/src/test/cls_cas/test_cls_cas.cc @@ -2,6 +2,7 @@ // vim: ts=8 sw=2 smarttab #include "include/types.h" +#include "include/stringify.h" #include "cls/cas/cls_cas_client.h" #include "cls/cas/cls_cas_internal.h" @@ -285,3 +286,37 @@ TEST_F(cls_cas, get_wrong_data) } ASSERT_EQ(-ENOENT, ioctx.read(oid, t, 0, 0)); } + +static int count_bits(unsigned long n) +{ + // base case + if (n == 0) + return 0; + else + return 1 + count_bits(n & (n - 1)); +} + +TEST(chunk_obj_refcount, size) +{ + chunk_obj_refcount r; + size_t max = 1048576; + for (size_t i = 0; i < max; ++i) { + hobject_t h(sobject_t(object_t("foo"s + stringify(i)), 1)); + bool ret = r.get(h); + ASSERT_TRUE(ret); + if (count_bits(i) <= 2) { + bufferlist bl; + r.dynamic_encode(bl, 1024); + if (count_bits(i) == 1) { + cout << i << "\t" << bl.length() << "\t" << r.get_type() << std::endl; + } + } + } + ASSERT_EQ(max, r.count()); + for (size_t i = 0; i < max; ++i) { + hobject_t h(sobject_t(object_t("foo"s + stringify(i)), 1)); + bool ret = r.put(h); + ASSERT_TRUE(ret); + } + ASSERT_EQ(0, r.count()); +} diff --git a/src/test/librados/tier_cxx.cc b/src/test/librados/tier_cxx.cc index 56131fb12d4..e38c02f4ad3 100644 --- a/src/test/librados/tier_cxx.cc +++ b/src/test/librados/tier_cxx.cc @@ -3223,7 +3223,7 @@ TEST_F(LibRadosTwoPoolsPP, ManifestRefRead) { } catch (buffer::error& err) { ASSERT_TRUE(0); } - ASSERT_EQ(1U, refs.refs.size()); + ASSERT_EQ(1U, refs.count()); } // chunk's refcount { @@ -3236,7 +3236,7 @@ TEST_F(LibRadosTwoPoolsPP, ManifestRefRead) { } catch (buffer::error& err) { ASSERT_TRUE(0); } - ASSERT_EQ(1u, refs.refs.size()); + ASSERT_EQ(1u, refs.count()); } // wait for maps to settle before next test @@ -3313,7 +3313,7 @@ TEST_F(LibRadosTwoPoolsPP, ManifestUnset) { } catch (buffer::error& err) { ASSERT_TRUE(0); } - ASSERT_EQ(1u, refs.refs.size()); + ASSERT_EQ(1u, refs.count()); } // chunk's refcount { @@ -3326,7 +3326,7 @@ TEST_F(LibRadosTwoPoolsPP, ManifestUnset) { } catch (buffer::error& err) { ASSERT_TRUE(0); } - ASSERT_EQ(1u, refs.refs.size()); + ASSERT_EQ(1u, refs.count()); } // unset-manifest for set-redirect @@ -3506,7 +3506,7 @@ TEST_F(LibRadosTwoPoolsPP, ManifestDedupRefRead) { } catch (buffer::error& err) { ASSERT_TRUE(0); } - ASSERT_EQ(2u, refs.refs.size()); + ASSERT_EQ(2u, refs.count()); } // wait for maps to settle before next test -- 2.39.5