From: Sage Weil Date: Thu, 3 Oct 2013 05:41:54 +0000 (-0700) Subject: osd_types: add generic HitSet type with bloom and explicit implementations X-Git-Tag: v0.75~98^2~41 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=c01b183da0e78a6fb70838df723ef91e28e9c442;p=ceph.git osd_types: add generic HitSet type with bloom and explicit implementations Track a set of hash values, either explicitly or using a bloom_filter. Hide the implementation and allow us to transparently encode and decode. Signed-off-by: Sage Weil Signed-off-by: Greg Farnum --- diff --git a/src/common/Makefile.am b/src/common/Makefile.am index 080e276d39af..63703b838990 100644 --- a/src/common/Makefile.am +++ b/src/common/Makefile.am @@ -79,6 +79,7 @@ libcommon_la_SOURCES += \ mon/MonMap.cc \ osd/OSDMap.cc \ osd/osd_types.cc \ + osd/HitSet.cc \ mds/MDSMap.cc \ mds/inode_backtrace.cc \ mds/mdstypes.cc diff --git a/src/common/bloom_filter.hpp b/src/common/bloom_filter.hpp index 93787a89a609..5f2a22b7d3c7 100644 --- a/src/common/bloom_filter.hpp +++ b/src/common/bloom_filter.hpp @@ -307,6 +307,11 @@ public: return insert_count_; } + inline bool is_full() const + { + return insert_count_ >= target_element_count_; + } + /* * density of bits set. inconvenient units, but: * .3 = ~50% target insertions diff --git a/src/osd/HitSet.cc b/src/osd/HitSet.cc new file mode 100644 index 000000000000..7cd736b59357 --- /dev/null +++ b/src/osd/HitSet.cc @@ -0,0 +1,213 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2013 Inktank + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "HitSet.h" + +// -- HitSet -- + +HitSet::HitSet(HitSet::Params *params) +{ + switch (params->get_type()) { + case TYPE_BLOOM: + impl.reset(new BloomHitSet(static_cast(params->impl.get()))); + break; + + case TYPE_EXPLICIT_HASH: + impl.reset(new ExplicitHashHitSet(static_cast(params->impl.get()))); + break; + + case TYPE_EXPLICIT_OBJECT: + impl.reset(new ExplicitObjectHitSet(static_cast(params->impl.get()))); + break; + + case TYPE_NONE: + break; + + default: + assert (0 == "unknown HitSet type"); + } +} + +void HitSet::encode(bufferlist &bl) const +{ + ENCODE_START(1, 1, bl); + ::encode(sealed, bl); + if (impl) { + ::encode((__u8)impl->get_type(), bl); + impl->encode(bl); + } else { + ::encode((__u8)TYPE_NONE, bl); + } + ENCODE_FINISH(bl); +} + +void HitSet::decode(bufferlist::iterator &bl) +{ + DECODE_START(1, bl); + ::decode(sealed, bl); + __u8 type; + ::decode(type, bl); + switch ((impl_type_t)type) { + case TYPE_EXPLICIT_HASH: + impl.reset(new ExplicitHashHitSet); + break; + case TYPE_EXPLICIT_OBJECT: + impl.reset(new ExplicitObjectHitSet); + break; + case TYPE_BLOOM: + impl.reset(new BloomHitSet); + break; + case TYPE_NONE: + impl.reset(NULL); + break; + default: + throw buffer::malformed_input("unrecognized HitMap type"); + } + if (impl) + impl->decode(bl); + DECODE_FINISH(bl); +} + +void HitSet::dump(Formatter *f) const +{ + f->dump_string("type", get_type_name()); + f->dump_string("sealed", sealed ? "yes" : "no"); + if (impl) + impl->dump(f); +} + +void HitSet::generate_test_instances(list& o) +{ + o.push_back(new HitSet); + o.push_back(new HitSet(new BloomHitSet(10, .1, 1))); + o.back()->insert(hobject_t()); + o.back()->insert(hobject_t("asdf", "", CEPH_NOSNAP, 123, 1, "")); + o.back()->insert(hobject_t("qwer", "", CEPH_NOSNAP, 456, 1, "")); + o.push_back(new HitSet(new ExplicitHashHitSet)); + o.back()->insert(hobject_t()); + o.back()->insert(hobject_t("asdf", "", CEPH_NOSNAP, 123, 1, "")); + o.back()->insert(hobject_t("qwer", "", CEPH_NOSNAP, 456, 1, "")); + o.push_back(new HitSet(new ExplicitObjectHitSet)); + o.back()->insert(hobject_t()); + o.back()->insert(hobject_t("asdf", "", CEPH_NOSNAP, 123, 1, "")); + o.back()->insert(hobject_t("qwer", "", CEPH_NOSNAP, 456, 1, "")); +} + +HitSet::Params::Params(const Params& o) +{ + if (o.get_type() != TYPE_NONE) { + create_impl(o.get_type()); + // it's annoying to write virtual operator= methods; use encode/decode + // instead. + bufferlist bl; + o.impl->encode(bl); + bufferlist::iterator p = bl.begin(); + impl->decode(p); + } // else we don't need to do anything +} + +const HitSet::Params& HitSet::Params::operator=(const Params& o) +{ + create_impl(o.get_type()); + if (o.impl) { + // it's annoying to write virtual operator= methods; use encode/decode + // instead. + bufferlist bl; + o.impl->encode(bl); + bufferlist::iterator p = bl.begin(); + impl->decode(p); + } + return *this; +} + +void HitSet::Params::encode(bufferlist &bl) const +{ + ENCODE_START(1, 1, bl); + if (impl) { + ::encode((__u8)impl->get_type(), bl); + impl->encode(bl); + } else { + ::encode((__u8)TYPE_NONE, bl); + } + ENCODE_FINISH(bl); +} + +bool HitSet::Params::create_impl(impl_type_t type) +{ + switch ((impl_type_t)type) { + case TYPE_EXPLICIT_HASH: + impl.reset(new ExplicitHashHitSet::Params); + break; + case TYPE_EXPLICIT_OBJECT: + impl.reset(new ExplicitObjectHitSet::Params); + break; + case TYPE_BLOOM: + impl.reset(new BloomHitSet::Params); + break; + case TYPE_NONE: + impl.reset(NULL); + break; + default: + return false; + } + return true; +} + +void HitSet::Params::decode(bufferlist::iterator &bl) +{ + DECODE_START(1, bl); + __u8 type; + ::decode(type, bl); + if (!create_impl((impl_type_t)type)) + throw buffer::malformed_input("unrecognized HitMap type"); + if (impl) + impl->decode(bl); + DECODE_FINISH(bl); +} + +void HitSet::Params::dump(Formatter *f) const +{ + f->dump_string("type", HitSet::get_type_name(get_type())); + if (impl) + impl->dump(f); +} + +void HitSet::Params::generate_test_instances(list& o) +{ +#define loop_hitset_params(kind) \ +{ \ + list params; \ + kind::Params::generate_test_instances(params); \ + for (list::iterator i = params.begin(); \ + i != params.end(); ++i) \ + o.push_back(new Params(*i)); \ +} + o.push_back(new Params); + o.push_back(new Params(new BloomHitSet::Params)); + loop_hitset_params(BloomHitSet); + o.push_back(new Params(new ExplicitHashHitSet::Params)); + loop_hitset_params(ExplicitHashHitSet); + o.push_back(new Params(new ExplicitObjectHitSet::Params)); + loop_hitset_params(ExplicitObjectHitSet); +} + +ostream& operator<<(ostream& out, const HitSet::Params& p) { + out << "params{" << HitSet::get_type_name(p.get_type()); + if (p.impl) { + out << " "; + p.impl->dump_stream(out); + } + out << "}"; + return out; +} diff --git a/src/osd/HitSet.h b/src/osd/HitSet.h new file mode 100644 index 000000000000..8b13fc8e2b61 --- /dev/null +++ b/src/osd/HitSet.h @@ -0,0 +1,462 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2013 Inktank + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#ifndef CEPH_OSD_HITSET_H +#define CEPH_OSD_HITSET_H + +#include + +#include "include/encoding.h" +#include "common/bloom_filter.hpp" +#include "common/hobject.h" +#include "common/Formatter.h" + +/** + * generic container for a HitSet + * + * Encapsulate a HitSetImpl of any type. Expose a generic interface + * to users and wrap the encoded object with a type so that it can be + * safely decoded later. + */ + +class HitSet { +public: + typedef enum { + TYPE_NONE = 0, + TYPE_EXPLICIT_HASH = 1, + TYPE_EXPLICIT_OBJECT = 2, + TYPE_BLOOM = 3 + } impl_type_t; + + static const char *get_type_name(impl_type_t t) { + switch (t) { + case TYPE_NONE: return "none"; + case TYPE_EXPLICIT_HASH: return "explicit_hash"; + case TYPE_EXPLICIT_OBJECT: return "explicit_object"; + case TYPE_BLOOM: return "bloom"; + default: return "???"; + } + } + const char *get_type_name() const { + if (impl) + return get_type_name(impl->get_type()); + return get_type_name(TYPE_NONE); + } + + /// abstract interface for a HitSet implementation + class Impl { + public: + virtual impl_type_t get_type() const = 0; + virtual bool is_full() const = 0; + virtual void insert(const hobject_t& o) = 0; + virtual bool contains(const hobject_t& o) const = 0; + virtual unsigned insert_count() const = 0; + virtual unsigned approx_unique_insert_count() const = 0; + virtual void encode(bufferlist &bl) const = 0; + virtual void decode(bufferlist::iterator& p) = 0; + virtual void dump(Formatter *f) const = 0; + virtual Impl* clone() const = 0; + virtual void seal() {} + virtual ~Impl() {} + }; + + boost::scoped_ptr impl; + bool sealed; + + class Params { + /// create an Impl* of the given type + bool create_impl(impl_type_t t); + + public: + class Impl { + public: + virtual impl_type_t get_type() const = 0; + virtual HitSet::Impl *get_new_impl() const = 0; + virtual void encode(bufferlist &bl) const {} + virtual void decode(bufferlist::iterator& p) {} + virtual void dump(Formatter *f) const {} + virtual void dump_stream(ostream& o) const {} + virtual ~Impl() {} + }; + + Params() {} + Params(Impl *i) : impl(i) {} + virtual ~Params() {} + + boost::scoped_ptr impl; + + impl_type_t get_type() const { + if (impl) + return impl->get_type(); + return TYPE_NONE; + } + + Params(const Params& o); + const Params& operator=(const Params& o); + + void encode(bufferlist &bl) const; + void decode(bufferlist::iterator &bl); + void dump(Formatter *f) const; + static void generate_test_instances(list& o); + + friend ostream& operator<<(ostream& out, const HitSet::Params& p); + }; + + HitSet() : impl(NULL), sealed(false) {} + HitSet(Impl *i) : impl(i), sealed(false) {} + HitSet(HitSet::Params *params); + + HitSet(const HitSet& o) { + sealed = o.sealed; + impl.reset(o.impl->clone()); + } + const HitSet& operator=(const HitSet& o) { + sealed = o.sealed; + if (o.impl) + impl.reset(o.impl->clone()); + else + impl.reset(NULL); + return *this; + } + + /// insert a hash into the set + void insert(const hobject_t& o) { + impl->insert(o); + } + + /// query whether a hash is in the set + bool contains(const hobject_t& o) const { + return impl->contains(o); + } + + unsigned insert_count() const { + return impl->insert_count(); + } + unsigned approx_unique_insert_count() const { + return impl->approx_unique_insert_count(); + } + void seal() { + assert(!sealed); + sealed = true; + impl->seal(); + } + + void encode(bufferlist &bl) const; + void decode(bufferlist::iterator &bl); + void dump(Formatter *f) const; + static void generate_test_instances(list& o); + +private: + void reset_to_type(impl_type_t type); +}; +WRITE_CLASS_ENCODER(HitSet); +WRITE_CLASS_ENCODER(HitSet::Params); + +ostream& operator<<(ostream& out, const HitSet::Params& p); + +/** + * explicitly enumerate hash hits in the set + */ +class ExplicitHashHitSet : public HitSet::Impl { + uint64_t count; + hash_set hits; +public: + class Params : public HitSet::Params::Impl { + public: + virtual HitSet::impl_type_t get_type() const { + return HitSet::TYPE_EXPLICIT_HASH; + } + virtual HitSet::Impl *get_new_impl() const { + return new ExplicitHashHitSet; + } + static void generate_test_instances(list& o) { + o.push_back(new Params); + } + }; + + ExplicitHashHitSet() : count(0) {} + ExplicitHashHitSet(const ExplicitHashHitSet::Params *p) : count(0) {} + ExplicitHashHitSet(const ExplicitHashHitSet &o) : count(o.count), + hits(o.hits) {} + + HitSet::Impl *clone() const { + return new ExplicitHashHitSet(*this); + } + + HitSet::impl_type_t get_type() const { + return HitSet::TYPE_EXPLICIT_HASH; + } + bool is_full() const { + return false; + } + void insert(const hobject_t& o) { + hits.insert(o.hash); + ++count; + } + bool contains(const hobject_t& o) const { + return hits.count(o.hash); + } + unsigned insert_count() const { + return count; + } + unsigned approx_unique_insert_count() const { + return hits.size(); + } + void encode(bufferlist &bl) const { + ENCODE_START(1, 1, bl); + ::encode(count, bl); + ::encode(hits, bl); + ENCODE_FINISH(bl); + } + void decode(bufferlist::iterator &bl) { + DECODE_START(1, bl); + ::decode(count, bl); + ::decode(hits, bl); + DECODE_FINISH(bl); + } + void dump(Formatter *f) const { + f->dump_unsigned("insert_count", count); + f->open_array_section("hash_set"); + for (hash_set::const_iterator p = hits.begin(); p != hits.end(); ++p) + f->dump_unsigned("hash", *p); + f->close_section(); + } + static void generate_test_instances(list& o) { + o.push_back(new ExplicitHashHitSet); + o.push_back(new ExplicitHashHitSet); + o.back()->insert(hobject_t()); + o.back()->insert(hobject_t("asdf", "", CEPH_NOSNAP, 123, 1, "")); + o.back()->insert(hobject_t("qwer", "", CEPH_NOSNAP, 456, 1, "")); + } +}; +WRITE_CLASS_ENCODER(ExplicitHashHitSet) + +/** + * explicitly enumerate objects in the set + */ +class ExplicitObjectHitSet : public HitSet::Impl { + uint64_t count; + hash_set hits; +public: + class Params : public HitSet::Params::Impl { + public: + virtual HitSet::impl_type_t get_type() const { + return HitSet::TYPE_EXPLICIT_OBJECT; + } + virtual HitSet::Impl *get_new_impl() const { + return new ExplicitObjectHitSet; + } + static void generate_test_instances(list& o) { + o.push_back(new Params); + } + }; + + ExplicitObjectHitSet() : count(0) {} + ExplicitObjectHitSet(const ExplicitObjectHitSet::Params *p) : count(0) {} + ExplicitObjectHitSet(const ExplicitObjectHitSet &o) : count(o.count), + hits(o.hits) {} + + HitSet::Impl *clone() const { + return new ExplicitObjectHitSet(*this); + } + + HitSet::impl_type_t get_type() const { + return HitSet::TYPE_EXPLICIT_OBJECT; + } + bool is_full() const { + return false; + } + void insert(const hobject_t& o) { + hits.insert(o); + ++count; + } + bool contains(const hobject_t& o) const { + return hits.count(o); + } + unsigned insert_count() const { + return count; + } + unsigned approx_unique_insert_count() const { + return hits.size(); + } + void encode(bufferlist &bl) const { + ENCODE_START(1, 1, bl); + ::encode(count, bl); + ::encode(hits, bl); + ENCODE_FINISH(bl); + } + void decode(bufferlist::iterator &bl) { + DECODE_START(1, bl); + ::decode(count, bl); + ::decode(hits, bl); + DECODE_FINISH(bl); + } + void dump(Formatter *f) const { + f->dump_unsigned("insert_count", count); + f->open_array_section("set"); + for (hash_set::const_iterator p = hits.begin(); p != hits.end(); ++p) { + f->open_object_section("object"); + p->dump(f); + f->close_section(); + } + f->close_section(); + } + static void generate_test_instances(list& o) { + o.push_back(new ExplicitObjectHitSet); + o.push_back(new ExplicitObjectHitSet); + o.back()->insert(hobject_t()); + o.back()->insert(hobject_t("asdf", "", CEPH_NOSNAP, 123, 1, "")); + o.back()->insert(hobject_t("qwer", "", CEPH_NOSNAP, 456, 1, "")); + } +}; +WRITE_CLASS_ENCODER(ExplicitObjectHitSet) + +/** + * use a bloom_filter to track hits to the set + */ +class BloomHitSet : public HitSet::Impl { + compressible_bloom_filter bloom; + +public: + HitSet::impl_type_t get_type() const { + return HitSet::TYPE_BLOOM; + } + + class Params : public HitSet::Params::Impl { + public: + virtual HitSet::impl_type_t get_type() const { + return HitSet::TYPE_BLOOM; + } + virtual HitSet::Impl *get_new_impl() const { + return new BloomHitSet; + } + + double false_positive; ///< false positive probability + uint64_t target_size; ///< number of unique insertions we expect to this HitSet + uint64_t seed; ///< seed to use when initializing the bloom filter + + Params() : false_positive(0), target_size(0), seed(0) {} + Params(double fpp, uint64_t t, uint64_t s) + : false_positive(fpp), target_size(t), seed(s) {} + Params(const Params &o) + : false_positive(o.false_positive), + target_size(o.target_size), seed(o.seed) {} + ~Params() {} + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + uint16_t fpp_micro = static_cast(false_positive * 1000000.0); + ::encode(fpp_micro, bl); + ::encode(target_size, bl); + ::encode(seed, bl); + ENCODE_FINISH(bl); + } + void decode(bufferlist::iterator& bl) { + DECODE_START(1, bl); + uint16_t fpp_micro; + ::decode(fpp_micro, bl); + false_positive = fpp_micro * 1000000.0; + ::decode(target_size, bl); + ::decode(seed, bl); + DECODE_FINISH(bl); + } + void dump(Formatter *f) const { + f->dump_int("false_positive_probability", false_positive); + f->dump_int("target_size", target_size); + f->dump_int("seed", seed); + } + void dump_stream(ostream& o) const { + o << "false_positive_probability: " + << false_positive << ", target size: " << target_size + << ", seed: " << seed; + } + static void generate_test_instances(list& o) { + o.push_back(new Params); + o.push_back(new Params); + (*o.rbegin())->false_positive = .11; + (*o.rbegin())->target_size = 300; + (*o.rbegin())->seed = 99; + } + }; + + BloomHitSet() {} + BloomHitSet(unsigned inserts, double fpp, int seed) + : bloom(inserts, fpp, seed) + {} + BloomHitSet(const BloomHitSet::Params *p) : bloom(p->target_size, + p->false_positive, + p->seed) + {} + + BloomHitSet(const BloomHitSet &o) { + // oh god + bufferlist bl; + o.encode(bl); + bufferlist::iterator bli = bl.begin(); + this->decode(bli); + } + + HitSet::Impl *clone() const { + return new BloomHitSet(*this); + } + + bool is_full() const { + return bloom.is_full(); + } + + void insert(const hobject_t& o) { + bloom.insert(o.hash); + } + bool contains(const hobject_t& o) const { + return bloom.contains(o.hash); + } + unsigned insert_count() const { + return bloom.element_count(); + } + unsigned approx_unique_insert_count() const { + return bloom.approx_unique_element_count(); + } + void seal() { + // aim for a density of .5 (50% of bit set) + double pc = (double)bloom.density() * 2.0; + if (pc < 1.0) + bloom.compress(pc); + } + + void encode(bufferlist &bl) const { + ENCODE_START(1, 1, bl); + ::encode(bloom, bl); + ENCODE_FINISH(bl); + } + void decode(bufferlist::iterator &bl) { + DECODE_START(1, bl); + ::decode(bloom, bl); + DECODE_FINISH(bl); + } + void dump(Formatter *f) const { + f->open_object_section("bloom_filter"); + bloom.dump(f); + f->close_section(); + } + static void generate_test_instances(list& o) { + o.push_back(new BloomHitSet); + o.push_back(new BloomHitSet(10, .1, 1)); + o.back()->insert(hobject_t()); + o.back()->insert(hobject_t("asdf", "", CEPH_NOSNAP, 123, 1, "")); + o.back()->insert(hobject_t("qwer", "", CEPH_NOSNAP, 456, 1, "")); + } +}; +WRITE_CLASS_ENCODER(BloomHitSet) + +#endif diff --git a/src/osd/Makefile.am b/src/osd/Makefile.am index cae02015fced..cb9db20d7345 100644 --- a/src/osd/Makefile.am +++ b/src/osd/Makefile.am @@ -11,6 +11,7 @@ libosd_la_SOURCES = \ osd/ReplicatedPG.cc \ osd/ReplicatedBackend.cc \ osd/Ager.cc \ + osd/HitSet.cc \ osd/OSD.cc \ osd/OSDCap.cc \ osd/Watch.cc \ @@ -28,6 +29,7 @@ noinst_HEADERS += \ osd/ClassHandler.h \ osd/ErasureCodeInterface.h \ osd/ErasureCodePlugin.h \ + osd/HitSet.h \ osd/OSD.h \ osd/OSDCap.h \ osd/OSDMap.h \ diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index d549278f5756..cb51d8133491 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -18,6 +18,7 @@ #include #include #include +#include #include "msg/msg_types.h" #include "include/types.h" @@ -25,9 +26,10 @@ #include "include/CompatSet.h" #include "include/histogram.h" #include "include/interval_set.h" -#include "common/snap_types.h" #include "common/Formatter.h" +#include "common/bloom_filter.hpp" #include "common/hobject.h" +#include "common/snap_types.h" #include "Watch.h" #include "OpRequest.h" diff --git a/src/test/encoding/types.h b/src/test/encoding/types.h index e446e7d73756..b250cbdbc27d 100644 --- a/src/test/encoding/types.h +++ b/src/test/encoding/types.h @@ -80,6 +80,13 @@ TYPE(PullOp) TYPE(PushOp) TYPE(PushReplyOp) +#include "osd/HitSet.h" +TYPE(ExplicitHashHitSet) +TYPE(ExplicitObjectHitSet) +TYPE(BloomHitSet) +TYPE(HitSet) +TYPE(HitSet::Params) + #include "os/ObjectStore.h" TYPE(ObjectStore::Transaction)