}
std::shared_ptr<rocksdb::Cache> RocksDBStore::create_block_cache(
+ const std::string& name,
const std::string& cache_type, size_t cache_size, double cache_prio_high) {
std::shared_ptr<rocksdb::Cache> cache;
auto shard_bits = cct->_conf->rocksdb_cache_shard_bits;
if (cache_type == "binned_lru") {
- cache = rocksdb_cache::NewBinnedLRUCache(cct, cache_size, shard_bits, false, cache_prio_high);
+ cache = rocksdb_cache::NewBinnedLRUCache(cct, name, cache_size, shard_bits, false, cache_prio_high);
} else if (cache_type == "lru") {
cache = rocksdb::NewLRUCache(cache_size, shard_bits);
} else if (cache_type == "clock") {
uint64_t row_cache_size = cache_size * cct->_conf->rocksdb_cache_row_ratio;
uint64_t block_cache_size = cache_size - row_cache_size;
- bbt_opts.block_cache = create_block_cache(cct->_conf->rocksdb_cache_type, block_cache_size);
+ bbt_opts.block_cache = create_block_cache(rocksdb::kDefaultColumnFamilyName, cct->_conf->rocksdb_cache_type, block_cache_size);
if (!bbt_opts.block_cache) {
return -EINVAL;
}
column_bbt_opts.no_block_cache = true;
} else {
if (require_new_block_cache) {
- block_cache = create_block_cache(cache_type, cache_size, high_pri_pool_ratio);
+ block_cache = create_block_cache(column_name, cache_type, cache_size, high_pri_pool_ratio);
if (!block_cache) {
dout(5) << __func__ << " failed to create block cache for params: " << block_cache_opt << dendl;
return -EINVAL;
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
+#include "common/admin_socket.h"
+#include "common/pretty_binary.h"
+#include <fmt/format.h>
#endif
#include "BinnedLRUCache.h"
#include <stdio.h>
#include <stdlib.h>
#include <string>
+#include "common/debug.h"
+#include "common/perf_counters_collection.h"
#define dout_context cct
#define dout_subsys ceph_subsys_rocksdb
void BinnedLRUCacheShard::EvictFromLRU(size_t charge,
BinnedLRUHandle*& deleted) {
+
while (usage_ + charge > capacity_ && lru_.next != &lru_) {
BinnedLRUHandle* old = lru_.next;
ceph_assert(old->InCache());
ceph_assert(old->refs == 1); // LRU list contains elements which may be evicted
+ stats[l_elems]--;
LRU_Remove(old);
table_.Remove(old->key(), old->hash);
old->SetInCache(false);
}
}
+int BinnedLRUCacheShard::FreeDeleted(BinnedLRUHandle* deleted) {
+ int del = 0;
+ while (deleted) {
+ auto* entry = deleted;
+ deleted = deleted->next;
+ entry->Free();
+ del++;
+ }
+ return del;
+}
+
void BinnedLRUCacheShard::SetCapacity(size_t capacity) {
BinnedLRUHandle* deleted = nullptr;
{
FreeDeleted(deleted);
}
+ShardStats BinnedLRUCacheShard::GetStats() {
+ std::lock_guard<std::mutex> l(mutex_);
+ stats[l_capacity] = capacity_;
+ stats[l_usage] = usage_;
+ stats[l_pinned] = usage_ - lru_usage_;
+ stats[l_misses] = stats[l_lookups] - stats[l_hits];
+ return stats;
+}
+
+void BinnedLRUCacheShard::ClearStats() {
+ std::lock_guard<std::mutex> l(mutex_);
+ for (int i = l_inserts; i <= l_misses; i++) {
+ stats[i] = 0;
+ }
+}
+
+void BinnedLRUCacheShard::print_bins(std::stringstream& out) const
+{
+ for (const auto& i : age_bins) {
+ out << *i << " ";
+ }
+ out << std::endl;
+}
+
void BinnedLRUCacheShard::SetStrictCapacityLimit(bool strict_capacity_limit) {
std::lock_guard<std::mutex> l(mutex_);
strict_capacity_limit_ = strict_capacity_limit;
rocksdb::Cache::Handle* BinnedLRUCacheShard::Lookup(const rocksdb::Slice& key, uint32_t hash) {
std::lock_guard<std::mutex> l(mutex_);
+ stats[l_lookups]++;
BinnedLRUHandle* e = table_.Lookup(key, hash);
if (e != nullptr) {
ceph_assert(e->InCache());
}
e->refs++;
e->SetHit();
+ stats[l_hits]++;
}
return reinterpret_cast<rocksdb::Cache::Handle*>(e);
}
last_reference = Unref(e);
if (last_reference) {
usage_ -= e->charge;
+ stats[l_elems]--;
}
if (e->refs == 1 && e->InCache()) {
// The item is still in cache, and nobody else holds a reference to it
Unref(e);
usage_ -= e->charge;
last_reference = true;
+ stats[l_elems]--;
} else {
// put the item on the list to be potentially freed
LRU_Insert(e);
{
std::lock_guard<std::mutex> l(mutex_);
+ stats[l_elems]++;
+ stats[l_inserts]++;
// Free the space following strict LRU policy until enough space
// is freed or the lru list is empty
EvictFromLRU(charge, deleted);
bool last_reference = false;
{
std::lock_guard<std::mutex> l(mutex_);
+ stats[l_elems]--;
e = table_.Remove(key, hash);
if (e != nullptr) {
last_reference = Unref(e);
return handle->deleter;
}
-BinnedLRUCache::BinnedLRUCache(CephContext *c,
- size_t capacity,
- int num_shard_bits,
- bool strict_capacity_limit,
- double high_pri_pool_ratio)
- : ShardedCache(capacity, num_shard_bits, strict_capacity_limit), cct(c) {
+#undef dout_context
+#define dout_context cache.cct
+
+class BinnedLRUCache::SocketHook : public AdminSocketHook {
+ BinnedLRUCache& cache;
+
+public:
+ SocketHook( BinnedLRUCache& _cache)
+ : cache(_cache)
+ {
+ AdminSocket *admin_socket = cache.cct->get_admin_socket();
+ if (admin_socket) {
+ int r = admin_socket->register_command(
+ std::string("rocksdb show cache ") + cache.name + std::string(" name=shard_no,type=CephInt,req=false"),
+ this, "show details of cache " + cache.name);
+ if (r != 0) {
+ dout(1) << __func__ << " cannot register SocketHook" << dendl;
+ return;
+ }
+ r = admin_socket->register_command(
+ std::string("rocksdb reset cache ") + cache.name,
+ this, "clear stats of cache " + cache.name);
+ ceph_assert(r == 0);
+ }
+ };
+ ~SocketHook() {
+ AdminSocket *admin_socket = cache.cct->get_admin_socket();
+ if (admin_socket) {
+ admin_socket->unregister_commands(this);
+ }
+ };
+ int call(std::string_view command,
+ const cmdmap_t& cmdmap,
+ const bufferlist& inbl,
+ Formatter *f,
+ std::ostream& ss,
+ bufferlist& out)
+ {
+ int r = 0;
+ if (command == std::string("rocksdb show cache ") + cache.name) {
+ int64_t shard_no;
+ std::stringstream outstr;
+ if (!ceph::common::cmd_getval(cmdmap, "shard_no", shard_no)) {
+ outstr << fmt::format("{:>5}", "shard");
+ for (int j = 0; j < stat_cnt; j++) {
+ outstr << fmt::format("{:>10}", ShardStats::stat_name[j]);
+ }
+ outstr << std::endl;
+ for (int i = 0; i < cache.num_shards_; i++) {
+ outstr << fmt::format("{:>5}", i);
+ ShardStats s = cache.shards_[i].GetStats();
+ for (int j = 0; j < stat_cnt; j++) {
+ outstr << fmt::format("{:>10}", s[j]);
+ }
+ outstr << std::endl;
+ }
+ } else {
+ cache.printshard(shard_no, outstr);
+ }
+ out.append(outstr.str());
+ } else if(command == std::string("rocksdb reset cache ") + cache.name) {
+ for (int i = 0; i < cache.num_shards_; i++) {
+ cache.shards_[i].ClearStats();
+ }
+ } else {
+ ss << "Invalid command" << std::endl;
+ r = -ENOSYS;
+ }
+ return r;
+ };
+};
+
+#undef dout_context
+#define dout_context cct
+
+BinnedLRUCache::BinnedLRUCache(
+ CephContext *c,
+ const std::string& name,
+ size_t capacity,
+ int num_shard_bits,
+ bool strict_capacity_limit,
+ double high_pri_pool_ratio)
+ : ShardedCache(capacity, num_shard_bits, strict_capacity_limit)
+ , cct(c)
+ , name(name)
+{
num_shards_ = 1 << num_shard_bits;
// TODO: Switch over to use mempool
int rc = posix_memalign((void**) &shards_,
new (&shards_[i])
BinnedLRUCacheShard(c, per_shard, strict_capacity_limit, high_pri_pool_ratio);
}
+ SetupPerfCounters();
+ asok_hook = new SocketHook(*this);
+}
+
+void BinnedLRUCache::SetupPerfCounters()
+{
+ int l_first = 0;
+ int l_last = l_first + 1 + stat_cnt;
+ PerfCountersBuilder b(cct, std::string("rocksdb-cache-") + name, l_first, l_last);
+ for (uint32_t j = l_capacity; j <= l_misses; j++) {
+ b.add_u64(1 + j, ShardStats::stat_name[j], ShardStats::stat_descr[j]);
+ }
+ perfstats = b.create_perf_counters();
+ cct->get_perfcounters_collection()->add(perfstats);
}
BinnedLRUCache::~BinnedLRUCache() {
shards_[i].~BinnedLRUCacheShard();
}
aligned_free(shards_);
+ cct->get_perfcounters_collection()->remove(perfstats);
+ delete perfstats;
+ perfstats = nullptr;
+ delete asok_hook;
+ asok_hook = nullptr;
}
CacheShard* BinnedLRUCache::GetShard(int shard) {
}
ldout(cct, 5) << __func__ << " High Pri Pool Ratio set to " << ratio << dendl;
SetHighPriPoolRatio(ratio);
+
+ // not related to cache size, but called periodically
+ UpdatePerfCounters();
return new_bytes;
}
+void BinnedLRUCache::UpdatePerfCounters() {
+ ShardStats stats;
+ for (int i = 0; i < num_shards_; i++) {
+ ShardStats s = shards_[i].GetStats();
+ stats.add(s);
+ }
+ //set these
+ for (int j = l_capacity ; j <= l_elems; j++) {
+ perfstats->set(1 + j, stats[j]);
+ }
+ //increment these, so one can reset perf counters
+ ShardStats tmp = stats;
+ tmp.sub(prev_stats);
+ for (int j = l_inserts; j <= l_misses; j++) {
+ perfstats->inc(1 + j, tmp[j]);
+ }
+ prev_stats = stats;
+}
+
+void BinnedLRUCache::printshard(int shard_no, std::stringstream& out) {
+ if (shard_no < num_shards_) {
+ shards_[shard_no].print_bins(out);
+ }
+}
+
void BinnedLRUCache::shift_bins() {
for (int s = 0; s < num_shards_; s++) {
shards_[s].shift_bins();
}
std::shared_ptr<rocksdb::Cache> NewBinnedLRUCache(
- CephContext *c,
+ CephContext *c,
+ const std::string& name,
size_t capacity,
int num_shard_bits,
bool strict_capacity_limit,
num_shard_bits = GetDefaultCacheShardBits(capacity);
}
return std::make_shared<BinnedLRUCache>(
- c, capacity, num_shard_bits, strict_capacity_limit, high_pri_pool_ratio);
+ c, name, capacity, num_shard_bits, strict_capacity_limit, high_pri_pool_ratio);
}
} // namespace rocksdb_cache
#include "common/dout.h"
#include "include/ceph_assert.h"
#include "common/ceph_context.h"
+#include "common/admin_socket.h"
namespace rocksdb_cache {
std::shared_ptr<rocksdb::Cache> NewBinnedLRUCache(
CephContext *c,
+ const std::string& name,
size_t capacity,
int num_shard_bits = -1,
bool strict_capacity_limit = false,
uint32_t elems_;
};
+enum stat_e : int {
+ l_capacity = 0, // capacity assigned to the shard
+ l_usage, // current usage of the shard
+ l_pinned, // size in elements currently referenced
+ l_elems, // count of separate items in shard
+ l_inserts, // increased when element inserted into the cache
+ l_lookups, // increased when trying to find element in shard
+ l_hits, // increased when lookup successful
+ l_misses, // calculated from lookups - hits
+ stat_cnt
+};
+
+struct ShardStats {
+ uint64_t val[stat_cnt] = {0};
+ uint64_t& operator[](int idx) {
+ return val[idx];
+ }
+
+ static constexpr char const* stat_name[stat_cnt] = {
+ "capacity",
+ "usage",
+ "pinned",
+ "elems",
+ "inserts",
+ "lookups",
+ "hits",
+ "misses",
+ };
+ static constexpr char const* stat_descr[stat_cnt] = {
+ "capacity assigned",
+ "current usage",
+ "currently pinned size (in use)",
+ "number of elems in shard",
+ "inserts into shard",
+ "lookups for an element",
+ "lookup successful",
+ "lookup failure",
+ };
+ void add(const ShardStats& other) {
+ for (int j = 0; j < stat_cnt; j++) {
+ val[j] += other.val[j];
+ }
+ }
+ void sub(const ShardStats& other) {
+ for (int j = 0; j < stat_cnt; j++) {
+ val[j] -= other.val[j];
+ }
+ }
+};
+
// A single shard of sharded cache.
class alignas(CACHE_LINE_SIZE) BinnedLRUCacheShard : public CacheShard {
public:
// Get the byte counts for a range of age bins
uint64_t sum_bins(uint32_t start, uint32_t end) const;
+ ShardStats GetStats();
+ void ClearStats();
+ void print_bins(std::stringstream& out) const;
+
private:
CephContext *cct;
void LRU_Remove(BinnedLRUHandle* e);
// holding the mutex_
void EvictFromLRU(size_t charge, BinnedLRUHandle*& deleted);
- void FreeDeleted(BinnedLRUHandle* deleted) {
- while (deleted) {
- auto* entry = deleted;
- deleted = deleted->next;
- entry->Free();
- }
- }
+ int FreeDeleted(BinnedLRUHandle* deleted);
// Initialized before use.
size_t capacity_;
// Pointer to head of low-pri pool in LRU list.
BinnedLRUHandle* lru_low_pri_;
+ // Info about the shard
+ ShardStats stats;
// ------------^^^^^^^^^^^^^-----------
// Not frequently modified data members
// ------------------------------------
class BinnedLRUCache : public ShardedCache {
public:
- BinnedLRUCache(CephContext *c, size_t capacity, int num_shard_bits,
+ BinnedLRUCache(CephContext *c, const std::string& name, size_t capacity, int num_shard_bits,
bool strict_capacity_limit, double high_pri_pool_ratio);
virtual ~BinnedLRUCache();
virtual const char* Name() const override { return "BinnedLRUCache"; }
return "RocksDB Binned LRU Cache";
}
+ private:
+ void SetupPerfCounters();
+ void UpdatePerfCounters();
+ void printshard(int shard_no, std::stringstream& out);
private:
CephContext *cct;
+ std::string name;
BinnedLRUCacheShard* shards_;
int num_shards_ = 0;
+ PerfCounters* perfstats = nullptr;
+ ShardStats prev_stats;
+ class SocketHook;
+ friend class SocketHook;
+ AdminSocketHook* asok_hook = nullptr;
};
} // namespace rocksdb_cache