void _add(BlueStore::Onode* o, int level) override
{
- if (o->put_cache()) {
+ o->set_cached();
+ if (o->pin_nref == 1) {
(level > 0) ? lru.push_front(*o) : lru.push_back(*o);
o->cache_age_bin = age_bins.front();
*(o->cache_age_bin) += 1;
- } else {
- ++num_pinned;
}
++num; // we count both pinned and unpinned entries
dout(20) << __func__ << " " << this << " " << o->oid << " added, num="
}
void _rm(BlueStore::Onode* o) override
{
- if (o->pop_cache()) {
+ o->clear_cached();
+ if (o->lru_item.is_linked()) {
*(o->cache_age_bin) -= 1;
lru.erase(lru.iterator_to(*o));
- } else {
- ceph_assert(num_pinned);
- --num_pinned;
}
ceph_assert(num);
--num;
dout(20) << __func__ << " " << this << " " << " " << o->oid << " removed, num=" << num << dendl;
}
- void _pin(BlueStore::Onode* o) override
- {
- *(o->cache_age_bin) -= 1;
- lru.erase(lru.iterator_to(*o));
- ++num_pinned;
- dout(20) << __func__ << " " << this << " " << " " << " " << o->oid << " pinned" << dendl;
- }
- void _unpin(BlueStore::Onode* o) override
- {
- lru.push_front(*o);
- o->cache_age_bin = age_bins.front();
- *(o->cache_age_bin) += 1;
- ceph_assert(num_pinned);
- --num_pinned;
- dout(20) << __func__ << " " << this << " " << " " << " " << o->oid << " unpinned" << dendl;
- }
- void _unpin_and_rm(BlueStore::Onode* o) override
+
+ void maybe_unpin(BlueStore::Onode* o) override
{
- o->pop_cache();
- ceph_assert(num_pinned);
- --num_pinned;
- ceph_assert(num);
- --num;
+ OnodeCacheShard* ocs = this;
+ ocs->lock.lock();
+ // It is possible that during waiting split_cache moved us to different OnodeCacheShard.
+ while (ocs != o->c->get_onode_cache()) {
+ ocs->lock.unlock();
+ ocs = o->c->get_onode_cache();
+ ocs->lock.lock();
+ }
+ if (o->is_cached() && o->pin_nref == 1) {
+ if(!o->lru_item.is_linked()) {
+ if (o->exists) {
+ lru.push_front(*o);
+ o->cache_age_bin = age_bins.front();
+ *(o->cache_age_bin) += 1;
+ dout(20) << __func__ << " " << this << " " << o->oid << " unpinned"
+ << dendl;
+ } else {
+ ceph_assert(num);
+ --num;
+ o->clear_cached();
+ // remove will also decrement nref
+ o->c->onode_space._remove(o->oid);
+ }
+ } else if (o->exists) {
+ // move onode within LRU
+ _rm(o);
+ _add(o, 1);
+ }
+ }
+ ocs->lock.unlock();
}
+
void _trim_to(uint64_t new_size) override
{
if (new_size >= lru.size()) {
ceph_assert(p != lru.begin());
--p;
ceph_assert(num >= n);
- num -= n;
while (n-- > 0) {
BlueStore::Onode *o = &*p;
dout(20) << __func__ << " rm " << o->oid << " "
- << o->nref << " " << o->cached << " " << o->pinned << dendl;
+ << o->nref << " " << o->cached << dendl;
+
if (p != lru.begin()) {
lru.erase(p--);
} else {
lru.erase(p);
}
*(o->cache_age_bin) -= 1;
- auto pinned = !o->pop_cache();
- ceph_assert(!pinned);
- o->c->onode_space._remove(o->oid);
+ if (o->pin_nref > 1) {
+ dout(20) << __func__ << " " << this << " " << " " << " " << o->oid << dendl;
+ } else {
+ ceph_assert(num);
+ --num;
+ o->clear_cached();
+ o->c->onode_space._remove(o->oid);
+ }
}
}
- void move_pinned(OnodeCacheShard *to, BlueStore::Onode *o) override
+ void _move_pinned(OnodeCacheShard *to, BlueStore::Onode *o) override
{
if (to == this) {
return;
}
- ceph_assert(o->cached);
- ceph_assert(o->pinned);
- ceph_assert(num);
- ceph_assert(num_pinned);
- --num_pinned;
- --num;
- ++to->num_pinned;
- ++to->num;
+ _rm(o);
+ ceph_assert(o->nref > 1);
+ to->_add(o, 0);
}
void add_stats(uint64_t *onodes, uint64_t *pinned_onodes) override
{
+ std::lock_guard l(lock);
*onodes += num;
- *pinned_onodes += num_pinned;
+ *pinned_onodes += num - lru.size();
}
};
ldout(cache->cct, 30) << __func__ << " " << oid << " hit " << p->second
<< " " << p->second->nref
<< " " << p->second->cached
- << " " << p->second->pinned
<< dendl;
// This will pin onode and implicitly touch the cache when Onode
// eventually will become unpinned
o = p->second;
- ceph_assert(!o->cached || o->pinned);
cache->logger->inc(l_bluestore_onode_hits);
}
// This will pin 'o' and implicitly touch cache
// when it will eventually become unpinned
onode_map.insert(make_pair(new_oid, o));
- ceph_assert(o->pinned);
o->oid = new_oid;
o->key = new_okey;
ldout(cct, LogLevelV) << i.first << " : " << i.second
<< " " << i.second->nref
<< " " << i.second->cached
- << " " << i.second->pinned
<< dendl;
}
}
out->push_back('~');
}
-void BlueStore::Onode::get() {
- if (++nref >= 2 && !pinned) {
- OnodeCacheShard* ocs = c->get_onode_cache();
- ocs->lock.lock();
- // It is possible that during waiting split_cache moved us to different OnodeCacheShard.
- while (ocs != c->get_onode_cache()) {
- ocs->lock.unlock();
- ocs = c->get_onode_cache();
- ocs->lock.lock();
- }
- bool was_pinned = pinned;
- pinned = nref >= 2;
- bool r = !was_pinned && pinned;
- if (cached && r) {
- ocs->_pin(this);
- }
- ocs->lock.unlock();
- }
+void BlueStore::Onode::get()
+{
+ ++nref;
+ ++pin_nref;
}
-void BlueStore::Onode::put() {
- ++put_nref;
- int n = --nref;
- if (n == 1) {
- OnodeCacheShard* ocs = c->get_onode_cache();
- ocs->lock.lock();
- // It is possible that during waiting split_cache moved us to different OnodeCacheShard.
- while (ocs != c->get_onode_cache()) {
- ocs->lock.unlock();
- ocs = c->get_onode_cache();
- ocs->lock.lock();
- }
- bool need_unpin = pinned;
- pinned = pinned && nref >= 2;
- need_unpin = need_unpin && !pinned;
- if (cached && need_unpin) {
- if (exists) {
- ocs->_unpin(this);
- } else {
- ocs->_unpin_and_rm(this);
- // remove will also decrement nref
- c->onode_space._remove(oid);
- }
- }
- ocs->lock.unlock();
+void BlueStore::Onode::put()
+{
+ if (--pin_nref == 1) {
+ c->get_onode_cache()->maybe_unpin(this);
}
- auto pn = --put_nref;
- if (nref == 0 && pn == 0) {
+ if (--nref == 0) {
delete this;
}
}
ldout(store->cct, 20) << __func__ << " moving " << o << " " << o->oid
<< dendl;
- // ensuring that nref is always >= 2 and hence onode is pinned and
- // physically out of cache during the transition
+ // ensuring that nref is always >= 2 and hence onode is pinned
OnodeRef o_pin = o;
- ceph_assert(o->pinned);
p = onode_space.onode_map.erase(p);
dest->onode_space.onode_map[o->oid] = o;
if (o->cached) {
- get_onode_cache()->move_pinned(dest->get_onode_cache(), o.get());
+ get_onode_cache()->_move_pinned(dest->get_onode_cache(), o.get());
}
o->c = dest;
};
struct OnodeSpace;
+ struct OnodeCacheShard;
/// an in-memory object
struct Onode {
MEMPOOL_CLASS_HELPERS();
- std::atomic_int nref; ///< reference count
- std::atomic_int put_nref = {0};
+ std::atomic_int nref = 0; ///< reference count
+ std::atomic_int pin_nref = 0; ///< reference count replica to track pinning
Collection *c;
ghobject_t oid;
bool cached; ///< Onode is logically in the cache
/// (it can be pinned and hence physically out
/// of it at the moment though)
- std::atomic_bool pinned; ///< Onode is pinned
- /// (or should be pinned when cached)
ExtentMap extent_map;
// track txc's that have not been committed to kv store (and whose
Onode(Collection *c, const ghobject_t& o,
const mempool::bluestore_cache_meta::string& k)
- : nref(0),
- c(c),
+ : c(c),
oid(o),
key(k),
exists(false),
cached(false),
- pinned(false),
extent_map(this,
c->store->cct->_conf->
bluestore_extent_map_inline_shard_prealloc_size) {
}
Onode(Collection* c, const ghobject_t& o,
const std::string& k)
- : nref(0),
- c(c),
+ : c(c),
oid(o),
key(k),
exists(false),
cached(false),
- pinned(false),
extent_map(this,
c->store->cct->_conf->
bluestore_extent_map_inline_shard_prealloc_size) {
}
Onode(Collection* c, const ghobject_t& o,
const char* k)
- : nref(0),
- c(c),
+ : c(c),
oid(o),
key(k),
exists(false),
cached(false),
- pinned(false),
extent_map(this,
c->store->cct->_conf->
bluestore_extent_map_inline_shard_prealloc_size) {
}
Onode(CephContext* cct)
- : nref(0),
- c(nullptr),
+ : c(nullptr),
exists(false),
cached(false),
- pinned(false),
extent_map(this,
cct->_conf->
bluestore_extent_map_inline_shard_prealloc_size) {
void get();
void put();
- inline bool put_cache() {
+ inline bool is_cached() const {
+ return cached;
+ }
+ inline void set_cached() {
ceph_assert(!cached);
cached = true;
- return !pinned;
}
- inline bool pop_cache() {
+ inline void clear_cached() {
ceph_assert(cached);
cached = false;
- return !pinned;
}
static const std::string& calc_omap_prefix(uint8_t flags);
/// A Generic onode Cache Shard
struct OnodeCacheShard : public CacheShard {
- std::atomic<uint64_t> num_pinned = {0};
std::array<std::pair<ghobject_t, ceph::mono_clock::time_point>, 64> dumped_onodes;
- virtual void _pin(Onode* o) = 0;
- virtual void _unpin(Onode* o) = 0;
-
public:
OnodeCacheShard(CephContext* cct) : CacheShard(cct) {}
static OnodeCacheShard *create(CephContext* cct, std::string type,
PerfCounters *logger);
+
+ //The following methods prefixed with '_' to be called under
+ // Shard's lock
virtual void _add(Onode* o, int level) = 0;
virtual void _rm(Onode* o) = 0;
- virtual void _unpin_and_rm(Onode* o) = 0;
+ virtual void _move_pinned(OnodeCacheShard *to, Onode *o) = 0;
- virtual void move_pinned(OnodeCacheShard *to, Onode *o) = 0;
+ virtual void maybe_unpin(Onode* o) = 0;
virtual void add_stats(uint64_t *onodes, uint64_t *pinned_onodes) = 0;
bool empty() {
return _get_num() == 0;