## Unreleased
### Public API Change
* For users of `Statistics` objects created via `CreateDBStatistics()`, the format of the string returned by its `ToString()` method has changed.
+* With LRUCache, when high_pri_pool_ratio > 0, midpoint insertion strategy will be enabled to put low-pri items to the tail of low-pri list (the midpoint) when they first inserted into the cache. This is to make cache entries never get hit age out faster, improving cache efficiency when large background scan presents.
## 5.14.0 (5/16/2018)
### Public API Change
void LRUCacheShard::LRU_Insert(LRUHandle* e) {
assert(e->next == nullptr);
assert(e->prev == nullptr);
- if (high_pri_pool_ratio_ > 0 && e->IsHighPri()) {
+ if (high_pri_pool_ratio_ > 0 && (e->IsHighPri() || e->HasHit())) {
// Inset "e" to head of LRU list.
e->next = &lru_;
e->prev = lru_.prev;
}
}
-void* LRUCacheShard::operator new(size_t size) {
- return port::cacheline_aligned_alloc(size);
-}
-
-void* LRUCacheShard::operator new(size_t /*size*/, void* ptr) { return ptr; }
-
-void LRUCacheShard::operator delete(void *memblock) {
- port::cacheline_aligned_free(memblock);
-}
-
-void LRUCacheShard::operator delete(void* /*memblock*/, void* /*ptr*/) {}
-
void LRUCacheShard::SetCapacity(size_t capacity) {
autovector<LRUHandle*> last_reference_list;
{
LRU_Remove(e);
}
e->refs++;
+ e->SetHit();
}
return reinterpret_cast<Cache::Handle*>(e);
}
bool InCache() { return flags & 1; }
bool IsHighPri() { return flags & 2; }
bool InHighPriPool() { return flags & 4; }
+ bool HasHit() { return flags & 8; }
void SetInCache(bool in_cache) {
if (in_cache) {
}
}
+ void SetHit() { flags |= 8; }
+
void Free() {
assert((refs == 1 && InCache()) || (refs == 0 && !InCache()));
if (deleter) {
// Retrives high pri pool ratio
double GetHighPriPoolRatio();
- // Overloading to aligned it to cache line size
- // They are used by tests.
- void* operator new(size_t);
-
- // placement new
- void* operator new(size_t, void*);
-
- void operator delete(void *);
-
- // placement delete, does nothing.
- void operator delete(void*, void*);
-
private:
void LRU_Remove(LRUHandle* e);
void LRU_Insert(LRUHandle* e);
class LRUCacheTest : public testing::Test {
public:
LRUCacheTest() {}
- ~LRUCacheTest() {}
+ ~LRUCacheTest() { DeleteCache(); }
+
+ void DeleteCache() {
+ if (cache_ != nullptr) {
+ cache_->~LRUCacheShard();
+ port::cacheline_aligned_free(cache_);
+ cache_ = nullptr;
+ }
+ }
void NewCache(size_t capacity, double high_pri_pool_ratio = 0.0) {
- cache_.reset(new LRUCacheShard(capacity, false /*strict_capcity_limit*/,
- high_pri_pool_ratio));
+ DeleteCache();
+ cache_ = reinterpret_cast<LRUCacheShard*>(
+ port::cacheline_aligned_alloc(sizeof(LRUCacheShard)));
+ new (cache_) LRUCacheShard(capacity, false /*strict_capcity_limit*/,
+ high_pri_pool_ratio);
}
void Insert(const std::string& key,
}
private:
- std::unique_ptr<LRUCacheShard> cache_;
+ LRUCacheShard* cache_ = nullptr;
};
TEST_F(LRUCacheTest, BasicLRU) {
ValidateLRUList({"e", "z", "d", "u", "v"});
}
+TEST_F(LRUCacheTest, MidpointInsertion) {
+ // Allocate 2 cache entries to high-pri pool.
+ NewCache(5, 0.45);
+
+ Insert("a", Cache::Priority::LOW);
+ Insert("b", Cache::Priority::LOW);
+ Insert("c", Cache::Priority::LOW);
+ Insert("x", Cache::Priority::HIGH);
+ Insert("y", Cache::Priority::HIGH);
+ ValidateLRUList({"a", "b", "c", "x", "y"}, 2);
+
+ // Low-pri entries inserted to the tail of low-pri list (the midpoint).
+ // After lookup, it will move to the tail of the full list.
+ Insert("d", Cache::Priority::LOW);
+ ValidateLRUList({"b", "c", "d", "x", "y"}, 2);
+ ASSERT_TRUE(Lookup("d"));
+ ValidateLRUList({"b", "c", "x", "y", "d"}, 2);
+
+ // High-pri entries will be inserted to the tail of full list.
+ Insert("z", Cache::Priority::HIGH);
+ ValidateLRUList({"c", "x", "y", "d", "z"}, 2);
+}
+
TEST_F(LRUCacheTest, EntriesWithPriority) {
// Allocate 2 cache entries to high-pri pool.
NewCache(5, 0.45);
Insert("a", Cache::Priority::LOW);
ValidateLRUList({"v", "X", "a", "Y", "Z"}, 2);
- // Low-pri entries will be inserted to head of low-pri pool after lookup.
+ // Low-pri entries will be inserted to head of high-pri pool after lookup.
ASSERT_TRUE(Lookup("v"));
- ValidateLRUList({"X", "a", "v", "Y", "Z"}, 2);
+ ValidateLRUList({"X", "a", "Y", "Z", "v"}, 2);
// High-pri entries will be inserted to the head of the list after lookup.
ASSERT_TRUE(Lookup("X"));
- ValidateLRUList({"a", "v", "Y", "Z", "X"}, 2);
+ ValidateLRUList({"a", "Y", "Z", "v", "X"}, 2);
ASSERT_TRUE(Lookup("Z"));
- ValidateLRUList({"a", "v", "Y", "X", "Z"}, 2);
+ ValidateLRUList({"a", "Y", "v", "X", "Z"}, 2);
Erase("Y");
ValidateLRUList({"a", "v", "X", "Z"}, 2);
Insert("g", Cache::Priority::LOW);
ValidateLRUList({"d", "e", "f", "g", "Z"}, 1);
ASSERT_TRUE(Lookup("d"));
- ValidateLRUList({"e", "f", "g", "d", "Z"}, 1);
+ ValidateLRUList({"e", "f", "g", "Z", "d"}, 2);
}
} // namespace rocksdb
static uint32_t high_pri_insert_count;
static uint32_t low_pri_insert_count;
- MockCache() : LRUCache(1 << 25, 0, false, 0.0) {}
+ MockCache()
+ : LRUCache((size_t)1 << 25 /*capacity*/, 0 /*num_shard_bits*/,
+ false /*strict_capacity_limit*/, 0.0 /*high_pri_pool_ratio*/) {
+ }
virtual Status Insert(const Slice& key, void* value, size_t charge,
void (*deleter)(const Slice& key, void* value),
bool strict_capacity_limit = false;
// Percentage of cache reserved for high priority entries.
+ // If greater than zero, the LRU list will be split into a high-pri
+ // list and a low-pri list. High-pri entries will be insert to the
+ // tail of high-pri list, while low-pri entries will be first inserted to
+ // the low-pri list (the midpoint). This is refered to as
+ // midpoint insertion strategy to make entries never get hit in cache
+ // age out faster.
+ //
+ // See also
+ // BlockBasedTableOptions::cache_index_and_filter_blocks_with_high_priority.
double high_pri_pool_ratio = 0.0;
LRUCacheOptions() {}
"readreverse,"
"compact,"
"compactall,"
- "readrandom,"
"multireadrandom,"
"readseq,"
"readtocache,"
"readreverse,"
"readwhilewriting,"
"readwhilemerging,"
+ "readwhilescanning,"
"readrandomwriterandom,"
"updaterandom,"
"xorupdaterandom,"
"reads\n"
"\treadwhilemerging -- 1 merger, N threads doing random "
"reads\n"
+ "\treadwhilescanning -- 1 thread doing full table scan, "
+ "N threads doing random reads\n"
"\treadrandomwriterandom -- N threads doing random-read, "
"random-write\n"
"\tupdaterandom -- N threads doing read-modify-write for random "
} else if (name == "readwhilemerging") {
num_threads++; // Add extra thread for writing
method = &Benchmark::ReadWhileMerging;
+ } else if (name == "readwhilescanning") {
+ num_threads++; // Add extra thread for scaning
+ method = &Benchmark::ReadWhileScanning;
} else if (name == "readrandomwriterandom") {
method = &Benchmark::ReadRandomWriteRandom;
} else if (name == "readrandommergerandom") {
thread->stats.AddBytes(bytes);
}
+ void ReadWhileScanning(ThreadState* thread) {
+ if (thread->tid > 0) {
+ ReadRandom(thread);
+ } else {
+ BGScan(thread);
+ }
+ }
+
+ void BGScan(ThreadState* thread) {
+ if (FLAGS_num_multi_db > 0) {
+ fprintf(stderr, "Not supporting multiple DBs.\n");
+ abort();
+ }
+ assert(db_.db != nullptr);
+ ReadOptions read_options;
+ Iterator* iter = db_.db->NewIterator(read_options);
+
+ fprintf(stderr, "num reads to do %lu\n", reads_);
+ Duration duration(FLAGS_duration, reads_);
+ uint64_t num_seek_to_first = 0;
+ uint64_t num_next = 0;
+ while (!duration.Done(1)) {
+ if (!iter->Valid()) {
+ iter->SeekToFirst();
+ num_seek_to_first++;
+ } else if (!iter->status().ok()) {
+ fprintf(stderr, "Iterator error: %s\n",
+ iter->status().ToString().c_str());
+ abort();
+ } else {
+ iter->Next();
+ num_next++;
+ }
+
+ thread->stats.FinishedOps(&db_, db_.db, 1, kSeek);
+ }
+ delete iter;
+ }
+
// Given a key K and value V, this puts (K+"0", V), (K+"1", V), (K+"2", V)
// in DB atomically i.e in a single batch. Also refer GetMany.
Status PutMany(DB* db, const WriteOptions& writeoptions, const Slice& key,