OPTION(keyvaluestore_op_threads, OPT_INT, 2)
OPTION(keyvaluestore_op_thread_timeout, OPT_INT, 60)
OPTION(keyvaluestore_op_thread_suicide_timeout, OPT_INT, 180)
+OPTION(keyvaluestore_default_strip_size, OPT_INT, 4096) // Only affect new object
+OPTION(keyvaluestore_max_expected_write_size, OPT_U64, 1ULL << 24) // bytes
+OPTION(keyvaluestore_header_cache_size, OPT_INT, 4096) // Header cache size
// max bytes to search ahead in journal searching for corruption
OPTION(journal_max_corrupt_search, OPT_U64, 10<<20)
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2014 UnitedStack <haomai@unitedstack.com>
+ *
+ * Author: Haomai Wang <haomaiwang@gmail.com>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_RANDOMCACHE_H
+#define CEPH_RANDOMCACHE_H
+
+#include "common/Mutex.h"
+#include "include/compat.h"
+#include "include/unordered_map.h"
+
+
+// Although This is a ramdom cache implementation, here still consider to make
+// the trim progress more reasonable. Each item owns its lookup frequency,
+// when the cache is full it will randomly pick up several items and compare the
+// frequency associated with. The least frequency of items will be evicted.
+template <class K, class V>
+class RandomCache {
+ // The first element of pair is the frequency of item, it's used to evict item
+ ceph::unordered_map<K, pair<uint64_t, V> > contents;
+ Mutex lock;
+ uint64_t max_size, count;
+ K last_trim_key;
+
+ // When cache reach full, consider to evict a certain number of items
+ static const uint64_t EVICT_COUNT = 5;
+ // Avoid too much overhead on comparing items's frequency, the number of
+ // compare items is expected to small.
+ static const uint64_t COMPARE_COUNT = 3;
+
+ // In order to make evict cache progress more lightweight and effective,
+ // several items are expected to evicted in one call
+ void trim_cache(uint64_t evict_count) {
+ typename ceph::unordered_map<K, pair<uint64_t, V> >::iterator it = contents.find(last_trim_key);
+ uint64_t total_compare = evict_count * COMPARE_COUNT;
+ map<uint64_t, K> candidates;
+
+ while (total_compare--) {
+ if (it == contents.end()) {
+ it = contents.begin();
+ }
+
+ candidates[it->second.first] = it->first;
+ it++;
+ }
+ if (it != contents.end())
+ last_trim_key = it->first;
+ else
+ last_trim_key = contents.begin()->first;
+
+ for (typename map<uint64_t, K>::iterator j = candidates.begin(); j != candidates.end(); j++) {
+ contents.erase(j->second);
+ count--;
+ evict_count--;
+ if (!evict_count)
+ break;
+ }
+ }
+
+ public:
+ RandomCache(size_t max_size=20) : lock("RandomCache::lock"),
+ max_size(max_size), count(0) {}
+ ~RandomCache() {
+ contents.clear();
+ count = 0;
+ }
+
+ void clear(K key) {
+ Mutex::Locker l(lock);
+ contents.erase(key);
+ count--;
+ }
+
+ void set_size(size_t new_size) {
+ Mutex::Locker l(lock);
+ max_size = new_size;
+ if (max_size <= count) {
+ trim_cache(count - max_size);
+ }
+ }
+
+ bool lookup(K key, V *out) {
+ Mutex::Locker l(lock);
+ typename ceph::unordered_map<K, pair<uint64_t, V> >::iterator it = contents.find(key);
+ if (it != contents.end()) {
+ it->second.first++;
+ *out = it->second.second;
+ return true;
+ }
+ return false;
+ }
+
+ void add(K key, V value) {
+ Mutex::Locker l(lock);
+ if (max_size <= count) {
+ trim_cache(EVICT_COUNT);
+ }
+ contents[key] = make_pair(1, value);
+ count++;
+ }
+};
+
+#endif
if (!header)
return -EINVAL;
- _StripObjectHeader *tmp = new _StripObjectHeader();
+ StripObjectHeaderRef tmp = StripObjectHeaderRef(new StripObjectHeader());
tmp->oid = oid;
tmp->cid = cid;
tmp->header = header;
+ if (strip_header)
+ *strip_header = tmp;
- {
- Mutex::Locker l(lock);
- *strip_header = caches.add(oid, tmp);
- }
return 0;
}
{
if (cid != coll_t()) {
Mutex::Locker l(lock);
- *strip_header = caches.lookup(oid);
- if (*strip_header) {
- return 0;
+ pair<coll_t, StripObjectHeaderRef> p;
+ if (caches.lookup(oid, &p)) {
+ if (p.first == cid) {
+ *strip_header = p.second;
+ return 0;
+ }
}
}
Header header = lookup_header(cid, oid);
}
- _StripObjectHeader *tmp = new _StripObjectHeader();
+ StripObjectHeaderRef tmp = StripObjectHeaderRef(new StripObjectHeader());
if (header->data.length()) {
bufferlist::iterator bliter = header->data.begin();
::decode(*tmp, bliter);
{
Mutex::Locker l(lock);
- *strip_header = caches.add(oid, tmp);
+ caches.add(oid, make_pair(cid, tmp));
}
+ *strip_header = tmp;
dout(10) << "lookup_strip_header done " << " cid " << cid << " oid "
<< oid << dendl;
return 0;
StripObjectHeaderRef *target_header)
{
Header new_origin_header;
- StripObjectHeaderRef tmp = StripObjectHeaderRef(new _StripObjectHeader());
+ StripObjectHeaderRef tmp = StripObjectHeaderRef(new StripObjectHeader());
clone(old_header->header, cid, oid, t, &new_origin_header,
&tmp->header);
tmp->oid = oid;
tmp->cid = cid;
+ tmp->strip_size = old_header->strip_size;
+ tmp->max_size = old_header->max_size;
+ tmp->bits = old_header->bits;
old_header->header = new_origin_header;
if (target_header)
*target_header = tmp;
}
-void StripObjectMap::rename_wrap(const coll_t &cid, const ghobject_t &oid,
+void StripObjectMap::rename_wrap(StripObjectHeaderRef old_header, const coll_t &cid, const ghobject_t &oid,
KeyValueDB::Transaction t,
- StripObjectHeaderRef header)
+ StripObjectHeaderRef *new_header)
{
- assert(header);
- rename(header->header, cid, oid, t);
+ rename(old_header->header, cid, oid, t);
- if (header) {
- header->oid = oid;
- header->cid = cid;
- }
+ StripObjectHeaderRef tmp = StripObjectHeaderRef(new StripObjectHeader());
+ tmp->strip_size = old_header->strip_size;
+ tmp->max_size = old_header->max_size;
+ tmp->bits = old_header->bits;
+ tmp->header = old_header->header;
+ tmp->oid = oid;
+ tmp->cid = cid;
+
+ if (new_header)
+ *new_header = tmp;
+
+ old_header->header = Header();
+ old_header->deleted = true;
}
int StripObjectMap::get_values_with_header(const StripObjectHeaderRef header,
{
strip_header->deleted = true;
- InvalidCacheContext *c = new InvalidCacheContext(store, strip_header->oid);
+ InvalidateCacheContext *c = new InvalidateCacheContext(store, strip_header->cid, strip_header->oid);
finishes.push_back(c);
-
return store->backend->clear(strip_header->header, t);
}
{
// FIXME: Lacking of lock for origin header, it will cause other operation
// can get the origin header while submitting transactions
- store->backend->rename_wrap(cid, oid, t, old_header);
+ StripObjectMap::StripObjectHeaderRef new_header;
+ store->backend->rename_wrap(old_header, cid, oid, t, &new_header);
- InvalidCacheContext *c = new InvalidCacheContext(store, oid);
+ InvalidateCacheContext *c = new InvalidateCacheContext(store, old_header->cid, old_header->oid);
finishes.push_back(c);
-
- strip_headers.erase(make_pair(old_header->cid, old_header->oid));
- strip_headers[make_pair(cid, oid)] = old_header;
+ strip_headers[make_pair(cid, oid)] = new_header;
}
int KeyValueStore::BufferTransaction::submit_transaction()
}
}
-out:
- for (list<Context*>::iterator it = finishes.begin();
- it != finishes.end(); ++it) {
+ r = store->backend->submit_transaction(t);
+ for (list<Context*>::iterator it = finishes.begin(); it != finishes.end(); ++it) {
(*it)->complete(r);
}
+out:
dout(5) << __func__ << " r = " << r << dendl;
- return store->backend->submit_transaction(t);
+ return r;
}
// =========== KeyValueStore Intern Helper Implementation ==============
map<uint64_t, uint64_t> m;
for (vector<StripObjectMap::StripExtent>::iterator iter = extents.begin();
iter != extents.end(); ++iter) {
- uint64_t off = iter->no * header.strip_size + iter->offset;
+ uint64_t off = iter->no * header->strip_size + iter->offset;
m[off] = iter->len;
}
::encode(m, bl);
#include "common/Mutex.h"
#include "GenericObjectMap.h"
#include "KeyValueDB.h"
-#include "common/shared_cache.hpp"
+#include "common/random_cache.hpp"
#include "include/uuid.h"
};
+static uint64_t default_strip_size = 1024;
+
class StripObjectMap: public GenericObjectMap {
public:
const coll_t &cid, const ghobject_t &oid,
KeyValueDB::Transaction t,
StripObjectHeaderRef *target_header);
- void rename_wrap(const coll_t &cid, const ghobject_t &oid,
+ void rename_wrap(StripObjectHeaderRef old_header, const coll_t &cid, const ghobject_t &oid,
KeyValueDB::Transaction t,
- StripObjectHeaderRef header);
+ StripObjectHeaderRef *new_header);
// Already hold header to avoid lock header seq again
int get_with_header(
const StripObjectHeaderRef header,
);
Mutex lock;
- void invalidate_cache(const ghobject_t &oid) {
+ void invalidate_cache(const coll_t &c, const ghobject_t &oid) {
Mutex::Locker l(lock);
caches.clear(oid);
}
- SharedLRU<ghobject_t, _StripObjectHeader> caches;
- StripObjectMap(KeyValueDB *db): GenericObjectMap(db), lock("StripObjectMap::lock"),
- caches(4096){}
-
- static const uint64_t default_strip_size = 1024;
+ RandomCache<ghobject_t, pair<coll_t, StripObjectHeaderRef> > caches;
+ StripObjectMap(KeyValueDB *db): GenericObjectMap(db),
+ lock("StripObjectMap::lock"),
+ caches(g_conf->keyvaluestore_header_cache_size)
+ {}
};
BufferTransaction(KeyValueStore *store): store(store) {
t = store->backend->get_transaction();
}
- };
- struct InvalidCacheContext : public Context {
- KeyValueStore *store;
- const ghobject_t object;
- InvalidCacheContext(KeyValueStore *s,
- const ghobject_t &oid) : store(s), object(oid) {}
- void finish(int r) {
- if (r >= 0)
- store->backend->invalidate_cache(object);
- }
+ struct InvalidateCacheContext : public Context {
+ KeyValueStore *store;
+ const coll_t cid;
+ const ghobject_t oid;
+ InvalidateCacheContext(KeyValueStore *s, const coll_t &c, const ghobject_t &oid): store(s), cid(c), oid(oid) {}
+ void finish(int r) {
+ if (r == 0)
+ store->backend->invalidate_cache(cid, oid);
+ }
+ };
};
// -- op workqueue --
static const uint32_t COLLECTION_VERSION = 1;
};
-WRITE_CLASS_ENCODER(StripObjectMap::_StripObjectHeader)
+WRITE_CLASS_ENCODER(StripObjectMap::StripObjectHeader)
#endif