]> git-server-git.apps.pok.os.sepia.ceph.com Git - rocksdb.git/commitdiff
Do not schedule memtable trimming if there is no history (#6177)
authorLevi Tamasi <ltamasi@fb.com>
Sat, 14 Dec 2019 03:09:53 +0000 (19:09 -0800)
committerLevi Tamasi <ltamasi@fb.com>
Fri, 10 Jan 2020 17:53:31 +0000 (09:53 -0800)
Summary:
We have observed an increase in CPU load caused by frequent calls to
`ColumnFamilyData::InstallSuperVersion` from `DBImpl::TrimMemtableHistory`
when using `max_write_buffer_size_to_maintain` to limit the amount of
memtable history maintained for transaction conflict checking. Part of the issue
is that trimming can potentially be scheduled even if there is no memtable
history. The patch adds a check that fixes this.

See also https://github.com/facebook/rocksdb/pull/6169.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6177

Test Plan:
Compared `perf` output for

```
./db_bench -benchmarks=randomtransaction -optimistic_transaction_db=1 -statistics -stats_interval_seconds=1 -duration=90 -num=500000 --max_write_buffer_size_to_maintain=16000000 --transaction_set_snapshot=1 --threads=32
```

before and after the change. There is a significant reduction for the call chain
`rocksdb::DBImpl::TrimMemtableHistory` -> `rocksdb::ColumnFamilyData::InstallSuperVersion` ->
`rocksdb::ThreadLocalPtr::StaticMeta::Scrape` even without https://github.com/facebook/rocksdb/pull/6169.

Differential Revision: D19057445

Pulled By: ltamasi

fbshipit-source-id: dff81882d7b280e17eda7d9b072a2d4882c50f79

db/memtable.h
db/memtable_list.cc
db/memtable_list.h
db/write_batch.cc

index f316ab8e29aee440db2fb07c78d0ef3b940c635b..4b1fcf8f3b3dd4f8e7a6c5311db47427f52256b8 100644 (file)
@@ -135,7 +135,7 @@ class MemTable {
 
   // As a cheap version of `ApproximateMemoryUsage()`, this function doens't
   // require external synchronization. The value may be less accurate though
-  size_t ApproximateMemoryUsageFast() {
+  size_t ApproximateMemoryUsageFast() const {
     return approximate_memory_usage_.load(std::memory_order_relaxed);
   }
 
index e3f0732de1555e6658c0673d773936dfd6b55ff8..8f053b6ef649663c548763291937921f1184e6e2 100644 (file)
@@ -266,7 +266,7 @@ void MemTableListVersion::Remove(MemTable* m,
 }
 
 // return the total memory usage assuming the oldest flushed memtable is dropped
-size_t MemTableListVersion::ApproximateMemoryUsageExcludingLast() {
+size_t MemTableListVersion::ApproximateMemoryUsageExcludingLast() const {
   size_t total_memtable_size = 0;
   for (auto& memtable : memlist_) {
     total_memtable_size += memtable->ApproximateMemoryUsage();
@@ -543,7 +543,7 @@ size_t MemTableList::ApproximateUnflushedMemTablesMemoryUsage() {
 
 size_t MemTableList::ApproximateMemoryUsage() { return current_memory_usage_; }
 
-size_t MemTableList::ApproximateMemoryUsageExcludingLast() {
+size_t MemTableList::ApproximateMemoryUsageExcludingLast() const {
   size_t usage =
       current_memory_usage_excluding_last_.load(std::memory_order_relaxed);
   return usage;
index 75cc1a524b28e7b40525c70f05aa9882cc73e7f5..44031ca4058babc2fb3adcd35c8684d791cb1895 100644 (file)
@@ -157,7 +157,7 @@ class MemTableListVersion {
   // excluding the last MemTable in memlist_history_. The reason for excluding
   // the last MemTable is to see if dropping the last MemTable will keep total
   // memory usage above or equal to max_write_buffer_size_to_maintain_
-  size_t ApproximateMemoryUsageExcludingLast();
+  size_t ApproximateMemoryUsageExcludingLast() const;
 
   bool MemtableLimitExceeded(size_t usage);
 
@@ -261,7 +261,7 @@ class MemTableList {
   size_t ApproximateMemoryUsage();
 
   // Returns the cached current_memory_usage_excluding_last_ value
-  size_t ApproximateMemoryUsageExcludingLast();
+  size_t ApproximateMemoryUsageExcludingLast() const;
 
   // Update current_memory_usage_excluding_last_ from MemtableListVersion
   void UpdateMemoryUsageExcludingLast();
index 1b878f3b093e04ae48b8a93218d8c1eb4d5a513f..c8a7551c507c1672c86b70d6fe03771204768d8e 100644 (file)
@@ -1786,14 +1786,28 @@ class MemTableInserter : public WriteBatch::Handler {
     // check if memtable_list size exceeds max_write_buffer_size_to_maintain
     if (trim_history_scheduler_ != nullptr) {
       auto* cfd = cf_mems_->current();
-      assert(cfd != nullptr);
-      if (cfd->ioptions()->max_write_buffer_size_to_maintain > 0 &&
-          cfd->mem()->ApproximateMemoryUsageFast() +
-                  cfd->imm()->ApproximateMemoryUsageExcludingLast() >=
-              static_cast<size_t>(
-                  cfd->ioptions()->max_write_buffer_size_to_maintain) &&
-          cfd->imm()->MarkTrimHistoryNeeded()) {
-        trim_history_scheduler_->ScheduleWork(cfd);
+
+      assert(cfd);
+      assert(cfd->ioptions());
+
+      const size_t size_to_maintain = static_cast<size_t>(
+          cfd->ioptions()->max_write_buffer_size_to_maintain);
+
+      if (size_to_maintain > 0) {
+        MemTableList* const imm = cfd->imm();
+        assert(imm);
+
+        if (imm->NumFlushed() > 0) {
+          const MemTable* const mem = cfd->mem();
+          assert(mem);
+
+          if (mem->ApproximateMemoryUsageFast() +
+                      imm->ApproximateMemoryUsageExcludingLast() >=
+                  size_to_maintain &&
+              imm->MarkTrimHistoryNeeded()) {
+            trim_history_scheduler_->ScheduleWork(cfd);
+          }
+        }
       }
     }
   }