From: Igor Fedotov Date: Mon, 5 Jun 2017 12:16:11 +0000 (-0700) Subject: some staff X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=b0c931305a97a3820fcfaef3563dbe6e6e0d97ae;p=rocksdb.git some staff --- diff --git a/db/dbformat.cc b/db/dbformat.cc index 7298f1df..a6d9b727 100644 --- a/db/dbformat.cc +++ b/db/dbformat.cc @@ -121,6 +121,24 @@ int InternalKeyComparator::Compare(const ParsedInternalKey& a, return r; } +int InternalKeyComparator::Compare(const Slice& a, + const ParsedInternalKey& b) const { + // Order by: + // increasing user key (according to user-supplied comparator) + // decreasing sequence number + int r = user_comparator_->Compare(ExtractUserKey(a), b.user_key); + PERF_COUNTER_ADD(user_key_comparison_count, 1); + if (r == 0) { + const uint64_t anum = DecodeFixed64(a.data() + a.size() - 8); + if (anum > b.sequence) { + r = -1; + } else if (anum < b.sequence) { + r = +1; + } + } + return r; +} + void InternalKeyComparator::FindShortestSeparator( std::string* start, const Slice& limit) const { diff --git a/db/dbformat.h b/db/dbformat.h index b4dbeb2a..a472efd5 100644 --- a/db/dbformat.h +++ b/db/dbformat.h @@ -149,6 +149,7 @@ class InternalKeyComparator : public Comparator { int Compare(const InternalKey& a, const InternalKey& b) const; int Compare(const ParsedInternalKey& a, const ParsedInternalKey& b) const; + int Compare(const Slice& a, const ParsedInternalKey& b) const; }; // Modules in this directory should keep internal keys wrapped inside diff --git a/db/memtable.cc b/db/memtable.cc index c4455886..11d1a7f7 100644 --- a/db/memtable.cc +++ b/db/memtable.cc @@ -200,17 +200,93 @@ void MemTable::UpdateFlushState() { int MemTable::KeyComparator::operator()(const char* prefix_len_key1, const char* prefix_len_key2) const { // Internal keys are encoded as length-prefixed strings. + PERF_COUNTER_ADD(user_key_comparison_count, 1); + PERF_TIMER_GUARD(user_key_comparison_time); + PERF_TIMER_START(user_key_comparison_time); Slice k1 = GetLengthPrefixedSlice(prefix_len_key1); Slice k2 = GetLengthPrefixedSlice(prefix_len_key2); - return comparator.Compare(k1, k2); + int r = comparator.Compare(k1, k2); + +/* char l1 = *prefix_len_key1 - 8; + char l2 = *prefix_len_key2 - 8; + size_t min_len = (l1 < l2) ? l1 : l2; + int r = memcmp(prefix_len_key1+1, prefix_len_key2+1, min_len); + + if (r == 0) { + if (l1 < l2) r = -1; + else if (l1 > l2) r = +1; + else { + uint64_t anum = *(uint64_t*)prefix_len_key1 + l1 + 1; + uint64_t bnum = *(uint64_t*)prefix_len_key2 + l1 + 1; + if (anum > bnum) { + r = -1; + } else if (anum < bnum) { + r = +1; + } +*/ +/* const char* end = prefix_len_key1 + l1; + const char* pos2 = prefix_len_key2 + l2 + 8; + for(const char* pos = end + 8; pos != end; --pos, --pos2 ){ + if (*pos < *pos2) { + r = -1; + break; + } else if (*pos > *pos2) { + r = +1; + break; + } + }*/ +/* } + }*/ + PERF_TIMER_STOP(user_key_comparison_time); + return r; } int MemTable::KeyComparator::operator()(const char* prefix_len_key, const Slice& key) const { + PERF_COUNTER_ADD(user_key_comparison_count, 1); + PERF_TIMER_GUARD(user_key_comparison_time); + PERF_TIMER_START(user_key_comparison_time); // Internal keys are encoded as length-prefixed strings. Slice a = GetLengthPrefixedSlice(prefix_len_key); - return comparator.Compare(a, key); + int r = comparator.Compare(a, key); +/* char l1 = *prefix_len_key - 8; + char l2 = key.size() - 8; + size_t min_len = (l1 < l2) ? l1 : l2; + int r = memcmp(prefix_len_key+1, key.data(), min_len); + + if (r == 0) { + if (l1 < l2) r = -1; + else if (l1 > l2) r = +1; + else { + const char* end = prefix_len_key + l1; + const char* pos2 = key.data() + key.size() + 8; + for(const char* pos = end + 8; pos != end; --pos, --pos2 ){ + if (*pos < *pos2) { + r = -1; + break; + } else if (*pos > *pos2) { + r = +1; + break; + } + } + } + }*/ + + PERF_TIMER_STOP(user_key_comparison_time); + return r; +} + +int MemTable::KeyComparator::operator()(const char* prefix_len_key, + const ParsedInternalKey& key) + const { + PERF_TIMER_GUARD(user_key_comparison_time); + PERF_TIMER_START(user_key_comparison_time); + // Internal keys are encoded as length-prefixed strings. + Slice k1 = GetLengthPrefixedSlice(prefix_len_key); + int r = comparator.Compare(k1, key); + PERF_TIMER_STOP(user_key_comparison_time); + return r; } Slice MemTableRep::UserKey(const char* key) const { diff --git a/db/memtable.h b/db/memtable.h index 92cbce97..1911b38e 100644 --- a/db/memtable.h +++ b/db/memtable.h @@ -86,6 +86,8 @@ class MemTable { const char* prefix_len_key2) const override; virtual int operator()(const char* prefix_len_key, const Slice& key) const override; + virtual int operator()(const char* prefix_len_key, + const ParsedInternalKey& key) const override; }; // MemTables are reference counted. The initial reference count diff --git a/include/rocksdb/memtablerep.h b/include/rocksdb/memtablerep.h index 3ae89a98..efdd661a 100644 --- a/include/rocksdb/memtablerep.h +++ b/include/rocksdb/memtablerep.h @@ -46,6 +46,7 @@ class Arena; class MemTableAllocator; class LookupKey; class Slice; +class ParsedInternalKey; class SliceTransform; class Logger; @@ -65,6 +66,9 @@ class MemTableRep { virtual int operator()(const char* prefix_len_key, const Slice& key) const = 0; + virtual int operator()(const char* prefix_len_key, + const ParsedInternalKey& key) const = 0; + virtual ~KeyComparator() { } }; diff --git a/include/rocksdb/perf_context.h b/include/rocksdb/perf_context.h index 76a5cac6..7be00f81 100644 --- a/include/rocksdb/perf_context.h +++ b/include/rocksdb/perf_context.h @@ -24,6 +24,7 @@ struct PerfContext { std::string ToString(bool exclude_zero_counters = false) const; uint64_t user_key_comparison_count; // total number of user key comparisons + uint64_t user_key_comparison_time; uint64_t block_cache_hit_count; // total number of block cache hits uint64_t block_read_count; // total number of block reads (with IO) uint64_t block_read_byte; // total number of bytes from block reads diff --git a/memtable/inlineskiplist.h b/memtable/inlineskiplist.h index 43bb09ac..674999e3 100644 --- a/memtable/inlineskiplist.h +++ b/memtable/inlineskiplist.h @@ -46,9 +46,13 @@ #include #include #include +#include "rocksdb/slice.h" +#include "util/coding.h" #include "port/port.h" #include "util/allocator.h" #include "util/random.h" +#include "db/dbformat.h" +//#include "port/port.h" namespace rocksdb { @@ -212,6 +216,7 @@ class InlineSkipList { // Return true if key is greater than the data stored in "n". Null n // is considered infinite. n should not be head_. bool KeyIsAfterNode(const char* key, Node* n) const; + bool KeyIsAfterNode(const ParsedInternalKey& key, Node* n) const; // Returns the earliest node with a key >= key. // Return nullptr if there is no such node. @@ -242,11 +247,15 @@ class InlineSkipList { // node isn't conveniently available. void FindSpliceForLevel(const char* key, Node* before, Node* after, int level, Node** out_prev, Node** out_next); + void FindSpliceForLevel(const ParsedInternalKey& key, Node* before, Node* after, int level, + Node** out_prev, Node** out_next); // Recomputes Splice levels from highest_level (inclusive) down to // lowest_level (inclusive). void RecomputeSpliceLevels(const char* key, Splice* splice, int recompute_level); + void RecomputeSpliceLevels(const ParsedInternalKey& key, Splice* splice, + int recompute_level); // No copying allowed InlineSkipList(const InlineSkipList&); @@ -434,6 +443,14 @@ bool InlineSkipList::KeyIsAfterNode(const char* key, return (n != nullptr) && (compare_(n->Key(), key) < 0); } +template +bool InlineSkipList::KeyIsAfterNode(const ParsedInternalKey& key, + Node* n) const { + // nullptr n is considered infinite + assert(n != head_); + return (n != nullptr) && (compare_(n->Key(), key) < 0); +} + template typename InlineSkipList::Node* InlineSkipList::FindGreaterOrEqual(const char* key) const { @@ -660,6 +677,26 @@ void InlineSkipList::FindSpliceForLevel(const char* key, } } +template +void InlineSkipList::FindSpliceForLevel(const ParsedInternalKey& key, + Node* before, Node* after, + int level, Node** out_prev, + Node** out_next) { + while (true) { + Node* next = before->Next(level); + assert(before == head_ || next == nullptr || + KeyIsAfterNode(next->Key(), before)); + assert(before == head_ || KeyIsAfterNode(key, before)); + if (next == after || !KeyIsAfterNode(key, next)) { + // found it + *out_prev = before; + *out_next = next; + return; + } + before = next; + } +} + template void InlineSkipList::RecomputeSpliceLevels(const char* key, Splice* splice, @@ -672,6 +709,18 @@ void InlineSkipList::RecomputeSpliceLevels(const char* key, } } +template +void InlineSkipList::RecomputeSpliceLevels(const ParsedInternalKey& key, + Splice* splice, + int recompute_level) { + assert(recompute_level > 0); + assert(recompute_level <= splice->height_); + for (int i = recompute_level - 1; i >= 0; --i) { + FindSpliceForLevel(key, splice->prev_[i + 1], splice->next_[i + 1], i, + &splice->prev_[i], &splice->next_[i]); + } +} + template template void InlineSkipList::Insert(const char* key, Splice* splice, @@ -692,6 +741,10 @@ void InlineSkipList::Insert(const char* key, Splice* splice, } assert(max_height <= kMaxPossibleHeight); +/* ParsedInternalKey key_int; + ParseInternalKey(GetLengthPrefixedSlice(key), &key_int);*/ + const char* key_int = key; + int recompute_height = 0; if (splice->height_ < max_height) { // Either splice has never been used or max_height has grown since @@ -744,7 +797,7 @@ void InlineSkipList::Insert(const char* key, Splice* splice, // our chances of success. ++recompute_height; } else if (splice->prev_[recompute_height] != head_ && - !KeyIsAfterNode(key, splice->prev_[recompute_height])) { + !KeyIsAfterNode(key_int, splice->prev_[recompute_height])) { // key is from before splice if (allow_partial_splice_fix) { // skip all levels with the same node without more comparisons @@ -756,7 +809,7 @@ void InlineSkipList::Insert(const char* key, Splice* splice, // we're pessimistic, recompute everything recompute_height = max_height; } - } else if (KeyIsAfterNode(key, splice->next_[recompute_height])) { + } else if (KeyIsAfterNode(key_int, splice->next_[recompute_height])) { // key is from after splice if (allow_partial_splice_fix) { Node* bad = splice->next_[recompute_height]; @@ -774,7 +827,7 @@ void InlineSkipList::Insert(const char* key, Splice* splice, } assert(recompute_height <= max_height); if (recompute_height > 0) { - RecomputeSpliceLevels(key, splice, recompute_height); + RecomputeSpliceLevels(key_int, splice, recompute_height); } bool splice_is_valid = true; @@ -795,7 +848,7 @@ void InlineSkipList::Insert(const char* key, Splice* splice, // search, because it should be unlikely that lots of nodes have // been inserted between prev[i] and next[i]. No point in using // next[i] as the after hint, because we know it is stale. - FindSpliceForLevel(key, splice->prev_[i], nullptr, i, &splice->prev_[i], + FindSpliceForLevel(key_int, splice->prev_[i], nullptr, i, &splice->prev_[i], &splice->next_[i]); // Since we've narrowed the bracket for level i, we might have @@ -810,7 +863,7 @@ void InlineSkipList::Insert(const char* key, Splice* splice, for (int i = 0; i < height; ++i) { if (i >= recompute_height && splice->prev_[i]->Next(i) != splice->next_[i]) { - FindSpliceForLevel(key, splice->prev_[i], nullptr, i, &splice->prev_[i], + FindSpliceForLevel(key_int, splice->prev_[i], nullptr, i, &splice->prev_[i], &splice->next_[i]); } assert(splice->next_[i] == nullptr || diff --git a/monitoring/perf_context.cc b/monitoring/perf_context.cc index 2f876f7f..79520c5c 100644 --- a/monitoring/perf_context.cc +++ b/monitoring/perf_context.cc @@ -24,6 +24,7 @@ namespace rocksdb { void PerfContext::Reset() { #if !defined(NPERF_CONTEXT) && !defined(IOS_CROSS_COMPILE) user_key_comparison_count = 0; + user_key_comparison_time = 0; block_cache_hit_count = 0; block_read_count = 0; block_read_byte = 0; @@ -101,6 +102,7 @@ std::string PerfContext::ToString(bool exclude_zero_counters) const { #else std::ostringstream ss; PERF_CONTEXT_OUTPUT(user_key_comparison_count); + PERF_CONTEXT_OUTPUT(user_key_comparison_time); PERF_CONTEXT_OUTPUT(block_cache_hit_count); PERF_CONTEXT_OUTPUT(block_read_count); PERF_CONTEXT_OUTPUT(block_read_byte);