struct refs_by_hash : public chunk_obj_refcount::refs_t {
uint64_t total = 0;
+ uint32_t hash_bits = 32; ///< how many bits of mask to encode
std::map<std::pair<int64_t,uint32_t>,uint64_t> by_hash;
refs_by_hash() {}
}
}
+ std::string describe_encoding() const {
+ return "by_hash("s + stringify(hash_bits) + " bits)"s;
+ }
+
+ uint32_t mask() {
+ // with the hobject_t reverse-bitwise sort, the least significant
+ // hash values are actually the most significant, so preserve them
+ // as we lose resolution.
+ return 0xffffffff >> (32 - hash_bits);
+ }
+
+ bool shrink() {
+ if (hash_bits <= 1) {
+ return false;
+ }
+ hash_bits--;
+ std::map<std::pair<int64_t,uint32_t>,uint64_t> old;
+ old.swap(by_hash);
+ auto m = mask();
+ for (auto& i : old) {
+ by_hash[make_pair(i.first.first, i.first.second & m)] = i.second;
+ }
+ return true;
+ }
+
uint8_t get_type() const {
return chunk_obj_refcount::TYPE_BY_HASH;
}
return total;
}
bool get(const hobject_t& o) override {
- by_hash[make_pair(o.pool, o.get_hash())]++;
+ by_hash[make_pair(o.pool, o.get_hash() & mask())]++;
++total;
return true;
}
bool put(const hobject_t& o) override {
- auto p = by_hash.find(make_pair(o.pool, o.get_hash()));
+ auto p = by_hash.find(make_pair(o.pool, o.get_hash() & mask()));
if (p == by_hash.end()) {
return false;
}
void encode(::ceph::buffer::list::contiguous_appender& p) const {
DENC_START(1, 1, p);
denc_varint(total, p);
+ denc_varint(hash_bits, p);
denc_varint(by_hash.size(), p);
+ int hash_bytes = (hash_bits + 7) / 8;
for (auto& i : by_hash) {
denc_signed_varint(i.first.first, p);
- denc(i.first.second, p);
+ // this may write some bytes past where we move cursor too; harmless!
+ *(__le32*)p.get_pos_add(hash_bytes) = i.first.second;
denc_varint(i.second, p);
}
DENC_FINISH(p);
void decode(::ceph::buffer::ptr::const_iterator& p) {
DENC_START(1, 1, p);
denc_varint(total, p);
+ denc_varint(hash_bits, p);
uint64_t n;
denc_varint(n, p);
+ int hash_bytes = (hash_bits + 7) / 8;
while (n--) {
int64_t poolid;
- uint32_t hash;
+ __le32 hash;
uint64_t count;
denc_signed_varint(poolid, p);
- denc(hash, p);
+ memcpy(&hash, p.get_pos_add(hash_bytes), hash_bytes);
denc_varint(count, p);
- by_hash[make_pair(poolid, hash)] = count;
+ by_hash[make_pair(poolid, (uint32_t)hash)] = count;
}
DENC_FINISH(p);
}
void dump(Formatter *f) const override {
f->dump_string("type", "by_hash");
f->dump_unsigned("count", total);
+ f->dump_unsigned("hash_bits", hash_bits);
f->open_array_section("refs");
for (auto& i : by_hash) {
f->open_object_section("hash");
std::unique_ptr<refs_t> n;
switch (r->get_type()) {
case TYPE_BY_OBJECT:
- n.reset(new refs_by_hash(static_cast<refs_by_object*>(r.get())));
+ r.reset(new refs_by_hash(static_cast<refs_by_object*>(r.get())));
break;
case TYPE_BY_HASH:
- n.reset(new refs_by_pool(static_cast<refs_by_hash*>(r.get())));
+ if (!static_cast<refs_by_hash*>(r.get())->shrink()) {
+ r.reset(new refs_by_pool(static_cast<refs_by_hash*>(r.get())));
+ }
break;
case TYPE_BY_POOL:
- n.reset(new refs_count(r.get()));
+ r.reset(new refs_count(r.get()));
break;
}
- r.swap(n);
t.clear();
}
_encode_final(bl, t);
#pragma once
+#include <string>
+
#include "boost/variant.hpp"
#include "include/stringify.h"
TYPE_BY_POOL = 4,
TYPE_COUNT = 5,
};
+ static const char *type_name(int t) {
+ switch (t) {
+ case TYPE_BY_OBJECT: return "by_object";
+ case TYPE_BY_HASH: return "by_hash";
+ case TYPE_BY_POOL: return "by_pool";
+ case TYPE_COUNT: return "count";
+ default: return "???";
+ }
+ }
struct refs_t {
virtual ~refs_t() {}
virtual bool get(const hobject_t& o) = 0;
virtual bool put(const hobject_t& o) = 0;
virtual void dump(Formatter *f) const = 0;
+ virtual std::string describe_encoding() const {
+ return type_name(get_type());
+ }
};
std::unique_ptr<refs_t> r;
int get_type() const {
return r->get_type();
}
+ std::string describe_encoding() const {
+ return r->describe_encoding();
+ }
bool empty() const {
return r->empty();
TEST(chunk_obj_refcount, size)
{
chunk_obj_refcount r;
- size_t poolmask = 0xf0f0f0;
size_t max = 1048576;
- for (size_t i = 0; i < max; ++i) {
+
+ // mix in pool changes as i gets bigger
+ size_t pool_mask = 0xfff5110;
+
+ // eventually add in a zillion different pools to force us to a raw count
+ size_t pool_cutoff = max/2;
+
+ for (size_t i = 1; i <= max; ++i) {
hobject_t h(sobject_t(object_t("foo"s + stringify(i)), i));
- h.pool = i & poolmask;
+ h.pool = i > pool_cutoff ? i : (i & pool_mask);
bool ret = r.get(h);
ASSERT_TRUE(ret);
if (count_bits(i) <= 2) {
bufferlist bl;
r.dynamic_encode(bl, 1024);
if (count_bits(i) == 1) {
- cout << i << "\t" << bl.length() << "\t" << r.get_type() << std::endl;
+ cout << i << "\t" << bl.length()
+ << "\t" << r.describe_encoding()
+ << std::endl;
+ }
+
+ // verify reencoding is correct
+ chunk_obj_refcount a;
+ auto t = bl.cbegin();
+ decode(a, t);
+ bufferlist bl2;
+ encode(a, bl2);
+ if (!bl.contents_equal(bl2)) {
+ Formatter *f = Formatter::create("json-pretty");
+ cout << "original:\n";
+ f->dump_object("refs", r);
+ f->flush(cout);
+ cout << "decoded:\n";
+ f->dump_object("refs", a);
+ f->flush(cout);
+ cout << "original encoding:\n";
+ bl.hexdump(cout);
+ cout << "decoded re-encoding:\n";
+ bl2.hexdump(cout);
+ ASSERT_TRUE(bl.contents_equal(bl2));
}
}
}
ASSERT_EQ(max, r.count());
- for (size_t i = 0; i < max; ++i) {
+ for (size_t i = 1; i <= max; ++i) {
hobject_t h(sobject_t(object_t("foo"s + stringify(i)), 1));
- h.pool = i & poolmask;
+ h.pool = i > pool_cutoff ? i : (i & pool_mask);
bool ret = r.put(h);
ASSERT_TRUE(ret);
}