]> git-server-git.apps.pok.os.sepia.ceph.com Git - rocksdb.git/commitdiff
Fix a data race related to DB properties (#8206)
authorLevi Tamasi <ltamasi@fb.com>
Mon, 19 Apr 2021 23:37:16 +0000 (16:37 -0700)
committerLevi Tamasi <ltamasi@fb.com>
Tue, 20 Apr 2021 17:17:49 +0000 (10:17 -0700)
Summary:
Historically, the DB properties `rocksdb.cur-size-active-mem-table`,
`rocksdb.cur-size-all-mem-tables`, and `rocksdb.size-all-mem-tables` called
the method `MemTable::ApproximateMemoryUsage` for mutable memtables,
which is not safe without synchronization. This resulted in data races with
memtable inserts. The patch changes the code handling these properties
to use `MemTable::ApproximateMemoryUsageFast` instead, which returns a
cached value backed by an atomic variable. Two test cases had to be updated
for this change. `MemoryTest.MemTableAndTableReadersTotal` was fixed by
increasing the value size used so each value ends up in its own memtable,
which was the original intention (note: the test has been broken in the sense
that the test code didn't consider that memtable sizes below 64 KB get
increased to 64 KB by `SanitizeOptions`, and has been passing only by
accident). `DBTest.MemoryUsageWithMaxWriteBufferSizeToMaintain` relies on
completely up-to-date values and thus was changed to use `ApproximateMemoryUsage`
directly instead of going through the DB properties. Note: this should be safe in this case
since there's only a single thread involved.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/8206

Test Plan: `make check`

Reviewed By: riversand963

Differential Revision: D27866811

Pulled By: ltamasi

fbshipit-source-id: 7bd754d0565e0a65f1f7f0e78ffc093beef79394

db/db_test.cc
db/internal_stats.cc
utilities/memory/memory_test.cc

index 4e1b660f4ddbd9d6c02a99d95cd320fb506434ff..89f844689100f9720a7d7d2611f4cee68d26cf58 100644 (file)
@@ -6701,20 +6701,19 @@ TEST_F(DBTest, MemoryUsageWithMaxWriteBufferSizeToMaintain) {
   Reopen(options);
   Random rnd(301);
   bool memory_limit_exceeded = false;
-  uint64_t size_all_mem_table = 0;
-  uint64_t cur_active_mem = 0;
+
+  ColumnFamilyData* cfd =
+      static_cast<ColumnFamilyHandleImpl*>(db_->DefaultColumnFamily())->cfd();
+
   for (int i = 0; i < 1000; i++) {
     std::string value = rnd.RandomString(1000);
     ASSERT_OK(Put("keykey_" + std::to_string(i), value));
 
     dbfull()->TEST_WaitForFlushMemTable();
 
-    ASSERT_TRUE(db_->GetIntProperty(db_->DefaultColumnFamily(),
-                                    DB::Properties::kSizeAllMemTables,
-                                    &size_all_mem_table));
-    ASSERT_TRUE(db_->GetIntProperty(db_->DefaultColumnFamily(),
-                                    DB::Properties::kCurSizeActiveMemTable,
-                                    &cur_active_mem));
+    const uint64_t cur_active_mem = cfd->mem()->ApproximateMemoryUsage();
+    const uint64_t size_all_mem_table =
+        cur_active_mem + cfd->imm()->ApproximateMemoryUsage();
 
     // Errors out if memory usage keeps on increasing beyond the limit.
     // Once memory limit exceeds,  memory_limit_exceeded  is set and if
index 030d1fab661e81d247ceec61872bc1d04eddaaac..a5e2b09df1e2e741f58e2abae2c8dcddfcef50c2 100644 (file)
@@ -751,21 +751,24 @@ bool InternalStats::HandleBackgroundErrors(uint64_t* value, DBImpl* /*db*/,
 bool InternalStats::HandleCurSizeActiveMemTable(uint64_t* value, DBImpl* /*db*/,
                                                 Version* /*version*/) {
   // Current size of the active memtable
-  *value = cfd_->mem()->ApproximateMemoryUsage();
+  // Using ApproximateMemoryUsageFast to avoid the need for synchronization
+  *value = cfd_->mem()->ApproximateMemoryUsageFast();
   return true;
 }
 
 bool InternalStats::HandleCurSizeAllMemTables(uint64_t* value, DBImpl* /*db*/,
                                               Version* /*version*/) {
   // Current size of the active memtable + immutable memtables
-  *value = cfd_->mem()->ApproximateMemoryUsage() +
+  // Using ApproximateMemoryUsageFast to avoid the need for synchronization
+  *value = cfd_->mem()->ApproximateMemoryUsageFast() +
            cfd_->imm()->ApproximateUnflushedMemTablesMemoryUsage();
   return true;
 }
 
 bool InternalStats::HandleSizeAllMemTables(uint64_t* value, DBImpl* /*db*/,
                                            Version* /*version*/) {
-  *value = cfd_->mem()->ApproximateMemoryUsage() +
+  // Using ApproximateMemoryUsageFast to avoid the need for synchronization
+  *value = cfd_->mem()->ApproximateMemoryUsageFast() +
            cfd_->imm()->ApproximateMemoryUsage();
   return true;
 }
index d90b9899f265785e82d3bc8e8ac18ac6cbee32df..07dab4fb84e789e94786add2e576b47252044239 100644 (file)
@@ -145,8 +145,10 @@ TEST_F(MemoryTest, MemTableAndTableReadersTotal) {
   std::vector<uint64_t> usage_by_type;
   std::vector<std::vector<ColumnFamilyHandle*>> vec_handles;
   const int kNumDBs = 10;
+  // These key/value sizes ensure each KV has its own memtable. Note that the
+  // minimum write_buffer_size allowed is 64 KB.
   const int kKeySize = 100;
-  const int kValueSize = 500;
+  const int kValueSize = 1 << 16;
   Options opt;
   opt.create_if_missing = true;
   opt.create_missing_column_families = true;