BlueStore::Onode,
boost::intrusive::list_member_hook<>,
&BlueStore::Onode::lru_item> > list_t;
- typedef boost::intrusive::list<
- BlueStore::Onode,
- boost::intrusive::member_hook<
- BlueStore::Onode,
- boost::intrusive::list_member_hook<>,
- &BlueStore::Onode::pin_item> > pin_list_t;
list_t lru;
- pin_list_t pin_list;
explicit LruOnodeCacheShard(CephContext *cct) : BlueStore::OnodeCacheShard(cct) {}
- void _add(BlueStore::OnodeRef& o, int level) override
+ void _add(BlueStore::Onode* o, int level) override
{
- ceph_assert(o->s == nullptr);
- o->s = this;
- if (o->nref > 1) {
- pin_list.push_front(*o);
- o->pinned = true;
- num_pinned = pin_list.size();
- } else {
+ if (o->put_cache()) {
(level > 0) ? lru.push_front(*o) : lru.push_back(*o);
+ } else {
+ ++num_pinned;
}
- num = lru.size();
+ ++num; // we count both pinned and unpinned entries
}
- void _rm(BlueStore::OnodeRef& o) override
+ void _rm(BlueStore::Onode* o) override
{
- o->s = nullptr;
- if (o->pinned) {
- o->pinned = false;
- pin_list.erase(pin_list.iterator_to(*o));
- } else {
+ if (o->pop_cache()) {
lru.erase(lru.iterator_to(*o));
+ } else {
+ --num_pinned;
}
- num = lru.size();
- num_pinned = pin_list.size();
+ --num;
}
- void _touch(BlueStore::OnodeRef& o) override
+ void _touch(BlueStore::Onode* o) override
{
- if (o->pinned) {
- return;
+ ceph_assert(o->cached);
+ if (!o->pinned) {
+ lru.erase(lru.iterator_to(*o));
+ lru.push_front(*o);
}
- lru.erase(lru.iterator_to(*o));
- lru.push_front(*o);
- num = lru.size();
}
- void _pin(BlueStore::Onode& o) override
+ void _pin(BlueStore::Onode* o) override
{
- if (o.pinned == true) {
- return;
- }
- lru.erase(lru.iterator_to(o));
- pin_list.push_front(o);
- o.pinned = true;
- num = lru.size();
- num_pinned = pin_list.size();
- dout(30) << __func__ << " " << o.oid << " pinned" << dendl;
-
- }
- void _unpin(BlueStore::Onode& o) override
+ lru.erase(lru.iterator_to(*o));
+ ++num_pinned;
+ dout(30) << __func__ << " " << o->oid << " pinned" << dendl;
+ }
+ void _unpin(BlueStore::Onode* o) override
{
- if (o.pinned == false) {
- return;
- }
- pin_list.erase(pin_list.iterator_to(o));
- lru.push_front(o);
- o.pinned = false;
- num = lru.size();
- num_pinned = pin_list.size();
- dout(30) << __func__ << " " << o.oid << " unpinned" << dendl;
+ lru.push_front(*o);
+ --num_pinned;
+ dout(30) << __func__ << " " << o->oid << " unpinned" << dendl;
}
+
void _trim_to(uint64_t new_size) override
{
if (new_size >= lru.size()) {
lru.erase(p);
ceph_assert(n == 1);
}
- o->s = nullptr;
- o->get(); // paranoia
+ if (!o->pop_cache()) {
+ --num_pinned;
+ }
o->c->onode_map.remove(o->oid);
- o->put();
--n;
}
num = lru.size();
}
void add_stats(uint64_t *onodes, uint64_t *pinned_onodes) override
{
- *onodes += num + num_pinned;
+ *onodes += num;
*pinned_onodes += num_pinned;
}
};
#undef dout_prefix
#define dout_prefix *_dout << "bluestore.OnodeSpace(" << this << " in " << cache << ") "
-BlueStore::OnodeRef BlueStore::OnodeSpace::add(const ghobject_t& oid, OnodeRef o)
+BlueStore::OnodeRef BlueStore::OnodeSpace::add(const ghobject_t& oid,
+ OnodeRef& o)
{
std::lock_guard l(cache->lock);
auto p = onode_map.find(oid);
}
ldout(cache->cct, 30) << __func__ << " " << oid << " " << o << dendl;
onode_map[oid] = o;
- cache->_add(o, 1);
+ cache->_add(o.get(), 1);
cache->_trim();
return o;
}
} else {
ldout(cache->cct, 30) << __func__ << " " << oid << " hit " << p->second
<< dendl;
- cache->_touch(p->second);
- hit = true;
+ // assign before _touch to pin object if needed and avoid
+ // cache touch
o = p->second;
+ cache->_touch(p->second.get());
+ hit = true;
}
}
std::lock_guard l(cache->lock);
ldout(cache->cct, 10) << __func__ << dendl;
for (auto &p : onode_map) {
- cache->_rm(p.second);
+ cache->_rm(p.second.get());
}
onode_map.clear();
}
if (pn != onode_map.end()) {
ldout(cache->cct, 30) << __func__ << " removing target " << pn->second
<< dendl;
- cache->_rm(pn->second);
+ cache->_rm(pn->second.get());
onode_map.erase(pn);
}
OnodeRef o = po->second;
// install a non-existent onode at old location
oldo.reset(new Onode(o->c, old_oid, o->key));
po->second = oldo;
- cache->_add(po->second, 1);
+ cache->_add(oldo.get(), 1);
// add at new position and fix oid, key
onode_map.insert(make_pair(new_oid, o));
- cache->_touch(o);
+ cache->_touch(o.get());
o->oid = new_oid;
o->key = new_okey;
cache->_trim();
#undef dout_prefix
#define dout_prefix *_dout << "bluestore.onode(" << this << ")." << __func__ << " "
+void BlueStore::Onode::get() {
+ if (++nref == 2) {
+ c->get_onode_cache()->pin(this, [&]() {
+ bool r = cached && !pinned;
+ pinned = true;
+ return r;
+ });
+ }
+}
+void BlueStore::Onode::put() {
+ int n = --nref;
+ if (n == 1) {
+ c->get_onode_cache()->unpin(this, [&]() {
+ bool r = cached && pinned;
+ pinned = false;
+ return r;
+ });
+ } else if (n == 0) {
+ delete this;
+ }
+}
+
BlueStore::Onode* BlueStore::Onode::decode(
CollectionRef c,
const ghobject_t& oid,
ldout(store->cct, 20) << __func__ << " moving " << o << " " << o->oid
<< dendl;
- // move the onode to the new map before futzing with the cache
- // shard, ensuring that nref is always >= 2, and no racing
- // thread can trigger a pin or unpin (which does *not* behave
- // well when we are clearing and resetting the 's' shard
- // pointer!).
+ // ensuring that nref is always >= 2 and hence onode is pinned and
+ // physically out of cache during the transition
+ OnodeRef o_pin = o;
+
p = onode_map.onode_map.erase(p);
dest->onode_map.onode_map[o->oid] = o;
-
- if (onode_map.cache != dest->onode_map.cache) {
- // move onode to a different cache shard
- onode_map.cache->_rm(o);
- o->c = dest;
- dest->onode_map.cache->_add(o, 1);
- } else {
- // the onode is in the same cache shard, making our move simpler.
- o->c = dest;
- }
+ o->c = dest;
// move over shared blobs and buffers. cover shared blobs from
// both extent map and spanning blob map (the full extent map
};
struct OnodeSpace;
- struct OnodeCacheShard;
/// an in-memory object
struct Onode {
MEMPOOL_CLASS_HELPERS();
- // Not persisted and updated on cache insertion/removal
- OnodeCacheShard *s;
- bool pinned = false; // Only to be used by the onode cache shard
std::atomic_int nref; ///< reference count
Collection *c;
/// key under PREFIX_OBJ where we are stored
mempool::bluestore_cache_other::string key;
- boost::intrusive::list_member_hook<> lru_item, pin_item;
+ boost::intrusive::list_member_hook<> lru_item;
bluestore_onode_t onode; ///< metadata stored as value in kv store
bool exists; ///< true if object logically exists
-
+ bool cached; ///< Onode is logically in the cache
+ /// (it can be pinned and hence physically out
+ /// of it at the moment though)
+ bool pinned; ///< Onode is pinned
+ /// (or should be pinned when cached)
ExtentMap extent_map;
// track txc's that have not been committed to kv store (and whose
Onode(Collection *c, const ghobject_t& o,
const mempool::bluestore_cache_other::string& k)
- : s(nullptr),
- nref(0),
+ : nref(0),
c(c),
oid(o),
key(k),
exists(false),
+ cached(false),
+ pinned(false),
extent_map(this) {
}
Onode(Collection* c, const ghobject_t& o,
- const string& k)
- : s(nullptr),
- nref(0),
+ const std::string& k)
+ : nref(0),
c(c),
oid(o),
key(k),
exists(false),
+ cached(false),
+ pinned(false),
extent_map(this) {
}
Onode(Collection* c, const ghobject_t& o,
const char* k)
- : s(nullptr),
- nref(0),
+ : nref(0),
c(c),
oid(o),
key(k),
exists(false),
+ cached(false),
+ pinned(false),
extent_map(this) {
}
void dump(Formatter* f) const;
void flush();
- void get() {
- if (++nref == 2 && s != nullptr) {
- s->pin(*this);
- }
+ void get();
+ void put();
+
+ inline bool put_cache() {
+ ceph_assert(!cached);
+ cached = true;
+ return !pinned;
}
- void put() {
- int n = --nref;
- if (n == 1 && s != nullptr) {
- s->unpin(*this);
- }
- if (n == 0) {
- delete this;
- }
+ inline bool pop_cache() {
+ ceph_assert(cached);
+ cached = false;
+ return !pinned;
}
const string& get_omap_prefix();
std::atomic<uint64_t> num_pinned = {0};
std::array<std::pair<ghobject_t, mono_clock::time_point>, 64> dumped_onodes;
+
+ virtual void _pin(Onode* o) = 0;
+ virtual void _unpin(Onode* o) = 0;
+
public:
OnodeCacheShard(CephContext* cct) : CacheShard(cct) {}
static OnodeCacheShard *create(CephContext* cct, string type,
PerfCounters *logger);
- virtual void _add(OnodeRef& o, int level) = 0;
- virtual void _rm(OnodeRef& o) = 0;
- virtual void _touch(OnodeRef& o) = 0;
- virtual void _pin(Onode& o) = 0;
- virtual void _unpin(Onode& o) = 0;
+ virtual void _add(Onode* o, int level) = 0;
+ virtual void _rm(Onode* o) = 0;
+ virtual void _touch(Onode* o) = 0;
- void pin(Onode& o) {
+ void pin(Onode* o, std::function<bool ()> validator) {
std::lock_guard l(lock);
- _pin(o);
+ if (validator()) {
+ _pin(o);
+ }
}
- void unpin(Onode& o) {
+ void unpin(Onode* o, std::function<bool()> validator) {
std::lock_guard l(lock);
- _unpin(o);
+ if (validator()) {
+ _unpin(o);
+ }
}
virtual void add_stats(uint64_t *onodes, uint64_t *pinned_onodes) = 0;
clear();
}
- OnodeRef add(const ghobject_t& oid, OnodeRef o);
+ OnodeRef add(const ghobject_t& oid, OnodeRef& o);
OnodeRef lookup(const ghobject_t& o);
void remove(const ghobject_t& oid) {
onode_map.erase(oid);
pool_opts_t pool_opts;
ContextQueue *commit_queue;
+ OnodeCacheShard* get_onode_cache() const {
+ return onode_map.cache;
+ }
OnodeRef get_onode(const ghobject_t& oid, bool create, bool is_createop=false);
// the terminology is confusing here, sorry!