Summary: Added a prefix_seek flag in ReadOptions to indicate that Seek is prefix aware(might not return data with different prefix), and also not bound to a specific prefix. Multiple Seeks and range scans can be invoked on the same iterator. If a specific prefix is specified, this flag will be ignored. Just a quick prototype that works for PrefixHashRep, the new lockless memtable could be easily extended with this support too.
Test Plan: test it on Leaf
Reviewers: dhruba, kailiu, sdong, igor
Reviewed By: igor
CC: leveldb
Differential Revision: https://reviews.facebook.net/D13929
// Collect together all needed child iterators for mem
std::vector<Iterator*> list;
mem_->Ref();
- list.push_back(mem_->NewIterator(options.prefix));
+ list.push_back(mem_->NewIterator(options));
cleanup->mem.push_back(mem_);
for (unsigned int i = 0; i < immutables.size(); i++) {
MemTable* m = immutables[i];
m->Ref();
- list.push_back(m->NewIterator(options.prefix));
+ list.push_back(m->NewIterator(options));
cleanup->mem.push_back(m);
}
class MemTableIterator: public Iterator {
public:
- explicit MemTableIterator(MemTableRep* table)
- : iter_(table->GetIterator()) { }
-
- MemTableIterator(MemTableRep* table, const Slice* prefix)
- : iter_(table->GetPrefixIterator(*prefix)) { }
+ MemTableIterator(MemTableRep* table, const ReadOptions& options)
+ : iter_() {
+ if (options.prefix) {
+ iter_ = table->GetPrefixIterator(*options.prefix);
+ } else if (options.prefix_seek) {
+ iter_ = table->GetDynamicPrefixIterator();
+ } else {
+ iter_ = table->GetIterator();
+ }
+ }
virtual bool Valid() const { return iter_->Valid(); }
virtual void Seek(const Slice& k) { iter_->Seek(EncodeKey(&tmp_, k)); }
void operator=(const MemTableIterator&);
};
-Iterator* MemTable::NewIterator(const Slice* prefix) {
- if (prefix) {
- return new MemTableIterator(table_.get(), prefix);
- } else {
- return new MemTableIterator(table_.get());
- }
+Iterator* MemTable::NewIterator(const ReadOptions& options) {
+ return new MemTableIterator(table_.get(), options);
}
port::RWMutex* MemTable::GetLock(const Slice& key) {
// iterator are internal keys encoded by AppendInternalKey in the
// db/dbformat.{h,cc} module.
//
- // If a prefix is supplied, it is passed to the underlying MemTableRep as a
- // hint that the iterator only need to support access to keys with that
- // prefix.
- Iterator* NewIterator(const Slice* prefix = nullptr);
+ // If options.prefix is supplied, it is passed to the underlying MemTableRep
+ // as a hint that the iterator only need to support access to keys with that
+ // specific prefix.
+ // If options.prefix is not supplied and options.prefix_seek is set, the
+ // iterator is not bound to a specific prefix. However, the semantics of
+ // Seek is changed - the result might only include keys with the same prefix
+ // as the seek-key.
+ Iterator* NewIterator(const ReadOptions& options = ReadOptions());
// Add an entry into memtable that maps key to value at the
// specified sequence number and with the specified type.
virtual void FindShortSuccessor(std::string* key) const {}
- private:
-
};
class PrefixTest {
Options options;
};
+TEST(PrefixTest, DynamicPrefixIterator) {
+
+ DestroyDB(kDbName, Options());
+ auto db = OpenDb();
+ WriteOptions write_options;
+ ReadOptions read_options;
+
+ std::vector<uint64_t> prefixes;
+ for (uint64_t i = 0; i < FLAGS_total_prefixes; ++i) {
+ prefixes.push_back(i);
+ }
+
+ if (FLAGS_random_prefix) {
+ std::random_shuffle(prefixes.begin(), prefixes.end());
+ }
+
+ // insert x random prefix, each with y continuous element.
+ for (auto prefix : prefixes) {
+ for (uint64_t sorted = 0; sorted < FLAGS_items_per_prefix; sorted++) {
+ TestKey test_key(prefix, sorted);
+
+ Slice key = TestKeyToSlice(test_key);
+ std::string value = "v" + std::to_string(sorted);
+
+ ASSERT_OK(db->Put(write_options, key, value));
+ }
+ }
+
+ // test seek existing keys
+ HistogramImpl hist_seek_time;
+ HistogramImpl hist_seek_comparison;
+
+ if (FLAGS_use_prefix_hash_memtable) {
+ read_options.prefix_seek = true;
+ }
+ std::unique_ptr<Iterator> iter(db->NewIterator(read_options));
+
+ for (auto prefix : prefixes) {
+ TestKey test_key(prefix, FLAGS_items_per_prefix / 2);
+ Slice key = TestKeyToSlice(test_key);
+ std::string value = "v" + std::to_string(0);
+
+ perf_context.Reset();
+ StopWatchNano timer(Env::Default(), true);
+ uint64_t total_keys = 0;
+ for (iter->Seek(key); iter->Valid(); iter->Next()) {
+ if (FLAGS_trigger_deadlock) {
+ std::cout << "Behold the deadlock!\n";
+ db->Delete(write_options, iter->key());
+ }
+ auto test_key = SliceToTestKey(iter->key());
+ if (test_key->prefix != prefix) break;
+ total_keys++;
+ }
+ hist_seek_time.Add(timer.ElapsedNanos());
+ hist_seek_comparison.Add(perf_context.user_key_comparison_count);
+ ASSERT_EQ(total_keys, FLAGS_items_per_prefix - FLAGS_items_per_prefix/2);
+ }
+
+ std::cout << "Seek key comparison: \n"
+ << hist_seek_comparison.ToString()
+ << "Seek time: \n"
+ << hist_seek_time.ToString();
+
+ // test non-existing keys
+ HistogramImpl hist_no_seek_time;
+ HistogramImpl hist_no_seek_comparison;
+
+ for (auto prefix = FLAGS_total_prefixes;
+ prefix < FLAGS_total_prefixes + 100;
+ prefix++) {
+ TestKey test_key(prefix, 0);
+ Slice key = TestKeyToSlice(test_key);
+
+ perf_context.Reset();
+ StopWatchNano timer(Env::Default(), true);
+ iter->Seek(key);
+ hist_no_seek_time.Add(timer.ElapsedNanos());
+ hist_no_seek_comparison.Add(perf_context.user_key_comparison_count);
+ ASSERT_TRUE(!iter->Valid());
+ }
+
+ std::cout << "non-existing Seek key comparison: \n"
+ << hist_no_seek_comparison.ToString()
+ << "non-existing Seek time: \n"
+ << hist_no_seek_time.ToString();
+}
TEST(PrefixTest, PrefixHash) {
return GetIterator();
}
+ // Return an iterator that has a special Seek semantics. The result of
+ // a Seek might only include keys with the same prefix as the target key.
+ virtual std::shared_ptr<Iterator> GetDynamicPrefixIterator() {
+ return GetIterator();
+ }
+
protected:
// When *key is an internal key concatenated with the value, returns the
// user key.
// Default: true
bool fill_cache;
+ // If this option is set and memtable implementation allows, Seek
+ // might only return keys with the same prefix as the seek-key
+ bool prefix_seek;
+
// If "snapshot" is non-nullptr, read as of the supplied snapshot
// (which must belong to the DB that is being read and which must
// not have been released). If "snapshot" is nullptr, use an impliicit
ReadOptions()
: verify_checksums(false),
fill_cache(true),
+ prefix_seek(false),
snapshot(nullptr),
prefix(nullptr),
read_tier(kReadAllTier) {
}
ReadOptions(bool cksum, bool cache) :
verify_checksums(cksum), fill_cache(cache),
- snapshot(nullptr), prefix(nullptr),
+ prefix_seek(false), snapshot(nullptr), prefix(nullptr),
read_tier(kReadAllTier) {
}
};
virtual std::shared_ptr<MemTableRep::Iterator> GetIterator(
const Slice& slice) override;
+ virtual std::shared_ptr<MemTableRep::Iterator> GetDynamicPrefixIterator()
+ override {
+ return std::make_shared<DynamicPrefixIterator>(*this);
+ }
+
std::shared_ptr<MemTableRep::Iterator> GetTransformIterator(
const Slice& transformed);
private:
+ friend class DynamicPrefixIterator;
typedef std::set<const char*, Compare> Bucket;
typedef std::unordered_map<Slice, std::shared_ptr<Bucket>> BucketMap;
private:
const ReadLock l_;
};
+
+
+ class DynamicPrefixIterator : public MemTableRep::Iterator {
+ private:
+ // the underlying memtable rep
+ const TransformRep& memtable_rep_;
+ // the result of a prefix seek
+ std::unique_ptr<MemTableRep::Iterator> bucket_iterator_;
+
+ public:
+ explicit DynamicPrefixIterator(const TransformRep& memtable_rep)
+ : memtable_rep_(memtable_rep) {}
+
+ virtual ~DynamicPrefixIterator() { };
+
+ // Returns true iff the iterator is positioned at a valid node.
+ virtual bool Valid() const {
+ return bucket_iterator_ && bucket_iterator_->Valid();
+ }
+
+ // Returns the key at the current position.
+ // REQUIRES: Valid()
+ virtual const char* key() const {
+ assert(Valid());
+ return bucket_iterator_->key();
+ }
+
+ // Advances to the next position.
+ // REQUIRES: Valid()
+ virtual void Next() {
+ assert(Valid());
+ bucket_iterator_->Next();
+ }
+
+ // Advances to the previous position.
+ // REQUIRES: Valid()
+ virtual void Prev() {
+ assert(Valid());
+ bucket_iterator_->Prev();
+ }
+
+ // Advance to the first entry with a key >= target within the
+ // same bucket as target
+ virtual void Seek(const char* target) {
+ Slice prefix = memtable_rep_.transform_->Transform(
+ memtable_rep_.UserKey(target));
+
+ ReadLock l(&memtable_rep_.rwlock_);
+ auto bucket = memtable_rep_.buckets_.find(prefix);
+ if (bucket == memtable_rep_.buckets_.end()) {
+ bucket_iterator_.reset(nullptr);
+ } else {
+ bucket_iterator_.reset(
+ new TransformIterator(bucket->second, memtable_rep_.GetLock(prefix)));
+ bucket_iterator_->Seek(target);
+ }
+ }
+
+ // Position at the first entry in collection.
+ // Final state of iterator is Valid() iff collection is not empty.
+ virtual void SeekToFirst() {
+ // Prefix iterator does not support total order.
+ // We simply set the iterator to invalid state
+ bucket_iterator_.reset(nullptr);
+ }
+
+ // Position at the last entry in collection.
+ // Final state of iterator is Valid() iff collection is not empty.
+ virtual void SeekToLast() {
+ // Prefix iterator does not support total order.
+ // We simply set the iterator to invalid state
+ bucket_iterator_.reset(nullptr);
+ }
+ };
};
class PrefixHashRep : public TransformRep {