command: |
echo "export PKG_CONFIG_PATH=/usr/local/OFF/:~/libprotobuf-mutator/build/external.protobuf/lib/pkgconfig/" >> $BASH_ENV
echo "export PROTOC_BIN=~/libprotobuf-mutator/build/external.protobuf/bin/protoc" >> $BASH_ENV
+ setup-folly:
+ steps:
+ - run:
+ name: Install folly dependencies
+ command: |
+ sudo apt-get install libgoogle-glog-dev
+ - run:
+ name: Checkout folly sources
+ command: |
+ make checkout_folly
build-for-benchmarks:
steps:
- pre-steps
- install-gflags
- upgrade-cmake
- - run: make checkout_folly
+ - setup-folly
- run: (mkdir build && cd build && cmake -DUSE_FOLLY=1 -DWITH_GFLAGS=1 .. && make V=1 -j20 && ctest -j20)
- post-steps
steps:
- pre-steps
- run: sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test && sudo apt-get update -y && sudo apt-get install gcc-7 g++-7 libgflags-dev
- - run: make checkout_folly
+ - setup-folly
- run: USE_FOLLY=1 CC=gcc-7 CXX=g++-7 V=1 make -j32 check
- post-steps
- pre-steps
- install-clang-13
- install-gflags
- - run: make checkout_folly
+ - setup-folly
- run: CC=clang-13 CXX=clang++-13 USE_CLANG=1 USE_FOLLY=1 COMPILE_WITH_UBSAN=1 COMPILE_WITH_ASAN=1 make -j32 check
- post-steps
if(USE_FOLLY)
include_directories(${PROJECT_SOURCE_DIR}/third-party/folly)
add_definitions(-DUSE_FOLLY -DFOLLY_NO_CONFIG)
+ list(APPEND THIRDPARTY_LIBS glog)
endif()
find_package(Threads REQUIRED)
if(USE_FOLLY)
list(APPEND SOURCES
third-party/folly/folly/container/detail/F14Table.cpp
+ third-party/folly/folly/detail/Futex.cpp
third-party/folly/folly/lang/SafeAssert.cpp
third-party/folly/folly/lang/ToAscii.cpp
- third-party/folly/folly/ScopeGuard.cpp)
+ third-party/folly/folly/ScopeGuard.cpp
+ third-party/folly/folly/synchronization/AtomicNotification.cpp
+ third-party/folly/folly/synchronization/DistributedMutex.cpp
+ third-party/folly/folly/synchronization/ParkingLot.cpp)
endif()
set(ROCKSDB_STATIC_LIB rocksdb${ARTIFACT_SUFFIX})
* Removed support for reading Bloom filters using obsolete block-based filter format. (Support for writing such filters was dropped in 7.0.) For good read performance on old DBs using these filters, a full compaction is required.
* Per KV checksum in write batch is verified before a write batch is written to WAL to detect any corruption to the write batch (#10114).
+### Performance Improvements
+* When compiled with folly (Meta-internal integration; experimental in open source build), improve the locking performance (CPU efficiency) of LRUCache by using folly DistributedMutex in place of standard mutex.
+
## 7.3.0 (05/20/2022)
### Bug Fixes
* Fixed a bug where manual flush would block forever even though flush options had wait=false.
endif
PLATFORM_CCFLAGS += -DUSE_FOLLY -DFOLLY_NO_CONFIG
PLATFORM_CXXFLAGS += -DUSE_FOLLY -DFOLLY_NO_CONFIG
+# TODO: fix linking with fbcode compiler config
+ PLATFORM_LDFLAGS += -lglog
endif
ifdef TEST_CACHE_LINE_SIZE
fi
@# Pin to a particular version for public CI, so that PR authors don't
@# need to worry about folly breaking our integration. Update periodically
- cd third-party/folly && git reset --hard 98b9b2c1124e99f50f9085ddee74ce32afffc665
+ cd third-party/folly && git reset --hard beacd86d63cd71c904632262e6c36f60874d78ba
@# A hack to remove boost dependency.
@# NOTE: this hack is not needed if using FBCODE compiler config
perl -pi -e 's/^(#include <boost)/\/\/$$1/' third-party/folly/folly/functional/Invoke.h
+ @# NOTE: this hack is required for clang in some cases
+ perl -pi -e 's/int rv = syscall/int rv = (int)syscall/' third-party/folly/folly/detail/Futex.cpp
+ @# NOTE: this hack is required for gcc in some cases
+ perl -pi -e 's/(__has_include.<experimental.memory_resource>.)/__cpp_rtti && $$1/' third-party/folly/folly/memory/MemoryResource.h
# ---------------------------------------------------------------------------
# Build size testing
"//folly/experimental/coro:collect",
"//folly/experimental/coro:coroutine",
"//folly/experimental/coro:task",
+ "//folly/synchronization:distributed_mutex",
], headers=None, link_whole=False, extra_test_libs=False)
cpp_library_wrapper(name="rocksdb_whole_archive_lib", srcs=[
"//folly/experimental/coro:collect",
"//folly/experimental/coro:coroutine",
"//folly/experimental/coro:task",
+ "//folly/synchronization:distributed_mutex",
], headers=None, link_whole=True, extra_test_libs=False)
cpp_library_wrapper(name="rocksdb_test_lib", srcs=[
"//folly/experimental/coro:blocking_wait",
"//folly/experimental/coro:collect",
"//folly/experimental/coro:coroutine",
- "//folly/experimental/coro:task"])
+ "//folly/experimental/coro:task",
+ "//folly/synchronization:distributed_mutex"])
# rocksdb_whole_archive_lib
TARGETS.add_library(
"rocksdb_whole_archive_lib",
"//folly/experimental/coro:blocking_wait",
"//folly/experimental/coro:collect",
"//folly/experimental/coro:coroutine",
- "//folly/experimental/coro:task"],
+ "//folly/experimental/coro:task",
+ "//folly/synchronization:distributed_mutex"],
headers=None,
extra_external_deps="",
link_whole=True)
#include "table/block_based/block_based_table_reader.h"
#include "table/block_based/cachable_entry.h"
#include "util/coding.h"
+#include "util/distributed_mutex.h"
#include "util/gflags_compat.h"
#include "util/hash.h"
#include "util/mutexlock.h"
}
void PrintEnv() const {
+#if defined(__GNUC__) && !defined(__OPTIMIZE__)
+ printf(
+ "WARNING: Optimization is disabled: benchmarks unnecessarily slow\n");
+#endif
+#ifndef NDEBUG
+ printf("WARNING: Assertions are enabled; benchmarks unnecessarily slow\n");
+#endif
printf("RocksDB version : %d.%d\n", kMajorVersion, kMinorVersion);
+ printf("DMutex impl name : %s\n", DMutex::kName());
printf("Number of threads : %u\n", FLAGS_threads);
printf("Ops per thread : %" PRIu64 "\n", FLAGS_ops_per_thread);
printf("Cache size : %s\n",
#include "port/port.h"
#include "tbb/concurrent_hash_map.h"
#include "util/autovector.h"
-#include "util/mutexlock.h"
+#include "util/distributed_mutex.h"
namespace ROCKSDB_NAMESPACE {
// Guards list_, head_, and recycle_. In addition, updating table_ also has
// to hold the mutex, to avoid the cache being in inconsistent state.
- mutable port::Mutex mutex_;
+ mutable DMutex mutex_;
// The circular list of cache handles. Initially the list is empty. Once a
// handle is needed by insertion, and no more handles are available in
DeleterFn deleter)>& callback,
uint32_t average_entries_per_lock, uint32_t* state) {
assert(average_entries_per_lock > 0);
- MutexLock lock(&mutex_);
+ DMutexLock l(mutex_);
// Figure out the range to iterate, update `state`
size_t list_size = list_.size();
pinned_usage_.fetch_sub(total_charge, std::memory_order_relaxed);
// Cleanup if it is the last reference.
if (!InCache(flags)) {
- MutexLock l(&mutex_);
+ DMutexLock l(mutex_);
RecycleHandle(handle, context);
}
}
void ClockCacheShard::SetCapacity(size_t capacity) {
CleanupContext context;
{
- MutexLock l(&mutex_);
+ DMutexLock l(mutex_);
capacity_.store(capacity, std::memory_order_relaxed);
EvictFromCache(0, &context);
}
uint32_t meta_charge =
CacheHandle::CalcMetadataCharge(key, metadata_charge_policy_);
size_t total_charge = charge + meta_charge;
- MutexLock l(&mutex_);
+ DMutexLock l(mutex_);
bool success = EvictFromCache(total_charge, context);
bool strict = strict_capacity_limit_.load(std::memory_order_relaxed);
if (!success && (strict || !hold_reference)) {
bool ClockCacheShard::EraseAndConfirm(const Slice& key, uint32_t hash,
CleanupContext* context) {
- MutexLock l(&mutex_);
+ DMutexLock l(mutex_);
HashTable::accessor accessor;
bool erased = false;
if (table_.find(accessor, ClockCacheKey(key, hash))) {
void ClockCacheShard::EraseUnRefEntries() {
CleanupContext context;
{
- MutexLock l(&mutex_);
+ DMutexLock l(mutex_);
table_.clear();
for (auto& handle : list_) {
UnsetInCache(&handle, &context);
#include "monitoring/perf_context_imp.h"
#include "monitoring/statistics.h"
#include "port/lang.h"
-#include "util/mutexlock.h"
+#include "util/distributed_mutex.h"
#define KEY_LENGTH \
16 // TODO(guido) Make use of this symbol in other parts of the source code
void LRUCacheShard::EraseUnRefEntries() {
autovector<LRUHandle*> last_reference_list;
{
- MutexLock l(&mutex_);
+ DMutexLock l(mutex_);
while (lru_.next != &lru_) {
LRUHandle* old = lru_.next;
// LRU list contains only elements which can be evicted.
// The state is essentially going to be the starting hash, which works
// nicely even if we resize between calls because we use upper-most
// hash bits for table indexes.
- MutexLock l(&mutex_);
+ DMutexLock l(mutex_);
uint32_t length_bits = table_.GetLengthBits();
uint32_t length = uint32_t{1} << length_bits;
void LRUCacheShard::SetCapacity(size_t capacity) {
autovector<LRUHandle*> last_reference_list;
{
- MutexLock l(&mutex_);
+ DMutexLock l(mutex_);
capacity_ = capacity;
EvictFromLRU(0, &last_reference_list);
}
}
void LRUCacheShard::SetStrictCapacityLimit(bool strict_capacity_limit) {
- MutexLock l(&mutex_);
+ DMutexLock l(mutex_);
strict_capacity_limit_ = strict_capacity_limit;
}
Status s = Status::OK();
autovector<LRUHandle*> last_reference_list;
{
- MutexLock l(&mutex_);
+ DMutexLock l(mutex_);
// Free the space following strict LRU policy until enough space
// is freed or the lru list is empty.
Cache::Handle* LRUCacheShard::Lookup(const Slice& key, uint32_t hash) {
LRUHandle* e = nullptr;
{
- MutexLock l(&mutex_);
+ DMutexLock l(mutex_);
e = table_.Lookup(key, hash);
if (e != nullptr) {
assert(e->InCache());
bool LRUCacheShard::Ref(Cache::Handle* h) {
LRUHandle* e = reinterpret_cast<LRUHandle*>(h);
- MutexLock l(&mutex_);
+ DMutexLock l(mutex_);
// To create another reference - entry must be already externally referenced.
assert(e->HasRefs());
e->Ref();
LRUHandle* e = reinterpret_cast<LRUHandle*>(handle);
bool last_reference = false;
{
- MutexLock l(&mutex_);
+ DMutexLock l(mutex_);
last_reference = e->Unref();
if (last_reference && e->InCache()) {
// The item is still in cache, and nobody else holds a reference to it.
LRUHandle* e;
bool last_reference = false;
{
- MutexLock l(&mutex_);
+ DMutexLock l(mutex_);
e = table_.Remove(key, hash);
if (e != nullptr) {
assert(e->InCache());
}
size_t LRUCacheShard::GetUsage() const {
- MutexLock l(&mutex_);
+ DMutexLock l(mutex_);
return usage_;
}
size_t LRUCacheShard::GetPinnedUsage() const {
- MutexLock l(&mutex_);
+ DMutexLock l(mutex_);
assert(usage_ >= lru_usage_);
return usage_ - lru_usage_;
}
#include "port/port.h"
#include "rocksdb/secondary_cache.h"
#include "util/autovector.h"
+#include "util/distributed_mutex.h"
namespace ROCKSDB_NAMESPACE {
namespace fast_lru_cache {
// mutex_ protects the following state.
// We don't count mutex_ as the cache's internal state so semantically we
// don't mind mutex_ invoking the non-const actions.
- mutable port::Mutex mutex_;
+ mutable DMutex mutex_;
};
class LRUCache
#include "monitoring/perf_context_imp.h"
#include "monitoring/statistics.h"
#include "port/lang.h"
-#include "util/mutexlock.h"
+#include "util/distributed_mutex.h"
namespace ROCKSDB_NAMESPACE {
namespace lru_cache {
void LRUCacheShard::EraseUnRefEntries() {
autovector<LRUHandle*> last_reference_list;
{
- MutexLock l(&mutex_);
+ DMutexLock l(mutex_);
while (lru_.next != &lru_) {
LRUHandle* old = lru_.next;
// LRU list contains only elements which can be evicted.
// The state is essentially going to be the starting hash, which works
// nicely even if we resize between calls because we use upper-most
// hash bits for table indexes.
- MutexLock l(&mutex_);
+ DMutexLock l(mutex_);
uint32_t length_bits = table_.GetLengthBits();
uint32_t length = uint32_t{1} << length_bits;
}
void LRUCacheShard::TEST_GetLRUList(LRUHandle** lru, LRUHandle** lru_low_pri) {
- MutexLock l(&mutex_);
+ DMutexLock l(mutex_);
*lru = &lru_;
*lru_low_pri = lru_low_pri_;
}
size_t LRUCacheShard::TEST_GetLRUSize() {
- MutexLock l(&mutex_);
+ DMutexLock l(mutex_);
LRUHandle* lru_handle = lru_.next;
size_t lru_size = 0;
while (lru_handle != &lru_) {
}
double LRUCacheShard::GetHighPriPoolRatio() {
- MutexLock l(&mutex_);
+ DMutexLock l(mutex_);
return high_pri_pool_ratio_;
}
void LRUCacheShard::SetCapacity(size_t capacity) {
autovector<LRUHandle*> last_reference_list;
{
- MutexLock l(&mutex_);
+ DMutexLock l(mutex_);
capacity_ = capacity;
high_pri_pool_capacity_ = capacity_ * high_pri_pool_ratio_;
EvictFromLRU(0, &last_reference_list);
}
void LRUCacheShard::SetStrictCapacityLimit(bool strict_capacity_limit) {
- MutexLock l(&mutex_);
+ DMutexLock l(mutex_);
strict_capacity_limit_ = strict_capacity_limit;
}
autovector<LRUHandle*> last_reference_list;
{
- MutexLock l(&mutex_);
+ DMutexLock l(mutex_);
// Free the space following strict LRU policy until enough space
// is freed or the lru list is empty.
} else {
// Since the secondary cache lookup failed, mark the item as not in cache
// Don't charge the cache as its only metadata that'll shortly be released
- MutexLock l(&mutex_);
+ DMutexLock l(mutex_);
// TODO
e->CalcTotalCharge(0, metadata_charge_policy_);
e->SetInCache(false);
bool wait, Statistics* stats) {
LRUHandle* e = nullptr;
{
- MutexLock l(&mutex_);
+ DMutexLock l(mutex_);
e = table_.Lookup(key, hash);
if (e != nullptr) {
assert(e->InCache());
bool LRUCacheShard::Ref(Cache::Handle* h) {
LRUHandle* e = reinterpret_cast<LRUHandle*>(h);
- MutexLock l(&mutex_);
+ DMutexLock l(mutex_);
// To create another reference - entry must be already externally referenced.
assert(e->HasRefs());
e->Ref();
}
void LRUCacheShard::SetHighPriorityPoolRatio(double high_pri_pool_ratio) {
- MutexLock l(&mutex_);
+ DMutexLock l(mutex_);
high_pri_pool_ratio_ = high_pri_pool_ratio;
high_pri_pool_capacity_ = capacity_ * high_pri_pool_ratio_;
MaintainPoolSize();
LRUHandle* e = reinterpret_cast<LRUHandle*>(handle);
bool last_reference = false;
{
- MutexLock l(&mutex_);
+ DMutexLock l(mutex_);
last_reference = e->Unref();
if (last_reference && e->InCache()) {
// The item is still in cache, and nobody else holds a reference to it.
LRUHandle* e;
bool last_reference = false;
{
- MutexLock l(&mutex_);
+ DMutexLock l(mutex_);
e = table_.Remove(key, hash);
if (e != nullptr) {
assert(e->InCache());
bool LRUCacheShard::IsReady(Cache::Handle* handle) {
LRUHandle* e = reinterpret_cast<LRUHandle*>(handle);
- MutexLock l(&mutex_);
+ DMutexLock l(mutex_);
bool ready = true;
if (e->IsPending()) {
assert(secondary_cache_);
}
size_t LRUCacheShard::GetUsage() const {
- MutexLock l(&mutex_);
+ DMutexLock l(mutex_);
return usage_;
}
size_t LRUCacheShard::GetPinnedUsage() const {
- MutexLock l(&mutex_);
+ DMutexLock l(mutex_);
assert(usage_ >= lru_usage_);
return usage_ - lru_usage_;
}
const int kBufferSize = 200;
char buffer[kBufferSize];
{
- MutexLock l(&mutex_);
+ DMutexLock l(mutex_);
snprintf(buffer, kBufferSize, " high_pri_pool_ratio: %.3lf\n",
high_pri_pool_ratio_);
}
#include "port/port.h"
#include "rocksdb/secondary_cache.h"
#include "util/autovector.h"
+#include "util/distributed_mutex.h"
namespace ROCKSDB_NAMESPACE {
namespace lru_cache {
// mutex_ protects the following state.
// We don't count mutex_ as the cache's internal state so semantically we
// don't mind mutex_ invoking the non-const actions.
- mutable port::Mutex mutex_;
+ mutable DMutex mutex_;
std::shared_ptr<SecondaryCache> secondary_cache_;
};
#include "util/compression.h"
#include "util/crc32c.h"
#include "util/defer.h"
+#include "util/distributed_mutex.h"
#include "util/hash_containers.h"
#include "util/mutexlock.h"
#include "util/stop_watch.h"
}
ROCKS_LOG_HEADER(logger, "Fast CRC32 supported: %s",
crc32c::IsFastCrc32Supported().c_str());
+
+ ROCKS_LOG_HEADER(logger, "DMutex implementation: %s", DMutex::kName());
}
} // namespace
class Mutex {
public:
+ static const char* kName() { return "pthread_mutex_t"; }
+
explicit Mutex(bool adaptive = kDefaultToAdaptiveMutex);
// No copying
Mutex(const Mutex&) = delete;
// it does NOT verify that mutex is held by a calling thread
void AssertHeld();
+ // Also implement std Lockable
+ inline void lock() { Lock(); }
+ inline void unlock() { Unlock(); }
+ inline bool try_lock() { return TryLock(); }
+
private:
friend class CondVar;
pthread_mutex_t mu_;
class Mutex {
public:
+ static const char* kName() { return "std::mutex"; }
- /* implicit */ Mutex(bool adaptive = kDefaultToAdaptiveMutex)
+ explicit Mutex(bool IGNORED_adaptive = kDefaultToAdaptiveMutex)
#ifndef NDEBUG
- : locked_(false)
+ : locked_(false)
#endif
- { }
+ {
+ (void)IGNORED_adaptive;
+ }
~Mutex();
#endif
}
+ // Also implement std Lockable
+ inline void lock() { Lock(); }
+ inline void unlock() { Unlock(); }
+ inline bool try_lock() { return TryLock(); }
+
// Mutex is move only with lock ownership transfer
Mutex(const Mutex&) = delete;
void operator=(const Mutex&) = delete;
FOLLY_SOURCES = \
$(FOLLY_DIR)/folly/container/detail/F14Table.cpp \
+ $(FOLLY_DIR)/folly/detail/Futex.cpp \
$(FOLLY_DIR)/folly/lang/SafeAssert.cpp \
$(FOLLY_DIR)/folly/lang/ToAscii.cpp \
$(FOLLY_DIR)/folly/ScopeGuard.cpp \
+ $(FOLLY_DIR)/folly/synchronization/AtomicNotification.cpp \
+ $(FOLLY_DIR)/folly/synchronization/DistributedMutex.cpp \
+ $(FOLLY_DIR)/folly/synchronization/ParkingLot.cpp \
TOOLS_MAIN_SOURCES = \
db_stress_tool/db_stress.cc \
--- /dev/null
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+// This source code is licensed under both the GPLv2 (found in the
+// COPYING file in the root directory) and Apache 2.0 License
+// (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include "rocksdb/rocksdb_namespace.h"
+
+// This file declares a wrapper around the efficient folly DistributedMutex
+// that falls back on a standard mutex when not available. See
+// https://github.com/facebook/folly/blob/main/folly/synchronization/DistributedMutex.h
+// for benefits and limitations.
+
+// At the moment, only scoped locking is supported using DMutexLock
+// RAII wrapper, because lock/unlock APIs will vary.
+
+#ifdef USE_FOLLY
+
+#include <folly/synchronization/DistributedMutex.h>
+
+namespace ROCKSDB_NAMESPACE {
+
+class DMutex : public folly::DistributedMutex {
+ public:
+ static const char* kName() { return "folly::DistributedMutex"; }
+
+ explicit DMutex(bool IGNORED_adaptive = false) { (void)IGNORED_adaptive; }
+
+ // currently no-op
+ void AssertHeld() {}
+};
+using DMutexLock = std::lock_guard<folly::DistributedMutex>;
+
+} // namespace ROCKSDB_NAMESPACE
+
+#else
+
+#include "port/port.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+using DMutex = port::Mutex;
+using DMutexLock = std::lock_guard<DMutex>;
+
+} // namespace ROCKSDB_NAMESPACE
+
+#endif