]> git-server-git.apps.pok.os.sepia.ceph.com Git - rocksdb.git/commitdiff
Fix data race of GetCreationTimeOfOldestFile() (#6473)
authorsdong <siying.d@fb.com>
Tue, 3 Mar 2020 00:34:36 +0000 (16:34 -0800)
committersdong <siying.d@fb.com>
Tue, 3 Mar 2020 19:23:01 +0000 (11:23 -0800)
Summary:
When DBImpl::GetCreationTimeOfOldestFile() calls Version::GetCreationTimeOfOldestFile(), the version is not directly or indirectly referenced, so an event like compaction can race with the operation and cause DBImpl::GetCreationTimeOfOldestFile() to access delocated data. This was caught by an ASAN run:

==268==ERROR: AddressSanitizer: heap-use-after-free on address 0x612000b7d198 at pc 0x000018332913 bp 0x7f391510d310 sp 0x7f391510d308
READ of size 8 at 0x612000b7d198 thread T845 (store_load-33)
SCARINESS: 51 (8-byte-read-heap-use-after-free)
    #0 0x18332912 in rocksdb::Version::GetCreationTimeOfOldestFile(unsigned long*) rocksdb/src/db/version_set.cc:1488
    https://github.com/facebook/rocksdb/issues/1 0x1803ddaa in rocksdb::DBImpl::GetCreationTimeOfOldestFile(unsigned long*) rocksdb/src/db/db_impl/db_impl.cc:4499
    https://github.com/facebook/rocksdb/issues/2 0xe24ca09 in rocksdb::StackableDB::GetCreationTimeOfOldestFile(unsigned long*) rocksdb/utilities/stackable_db.h:392
    ......
0x612000b7d198 is located 216 bytes inside of 296-byte region [0x612000b7d0c0,0x612000b7d1e8)
freed by thread T28 here:
    ......
    https://github.com/facebook/rocksdb/issues/5 0x1832c73f in std::vector<rocksdb::FileMetaData*, std::allocator<rocksdb::FileMetaData*> >::~vector() third-party-buck/platform007/build/libgcc/include/c++/trunk/bits/stl_vector.h:435
    https://github.com/facebook/rocksdb/issues/6 0x1832c73f in rocksdb::VersionStorageInfo::~VersionStorageInfo() rocksdb/src/db/version_set.cc:734
    https://github.com/facebook/rocksdb/issues/7 0x1832cf42 in rocksdb::Version::~Version() rocksdb/src/db/version_set.cc:758
    https://github.com/facebook/rocksdb/issues/8 0x9d1bb5 in rocksdb::Version::Unref() rocksdb/src/db/version_set.cc:2869
    https://github.com/facebook/rocksdb/issues/9 0x183e7631 in rocksdb::Compaction::~Compaction() rocksdb/src/db/compaction/compaction.cc:275
    https://github.com/facebook/rocksdb/issues/10 0x9e6de6 in std::default_delete<rocksdb::Compaction>::operator()(rocksdb::Compaction*) const third-party-buck/platform007/build/libgcc/include/c++/trunk/bits/unique_ptr.h:78
    https://github.com/facebook/rocksdb/issues/11 0x9e6de6 in std::unique_ptr<rocksdb::Compaction, std::default_delete<rocksdb::Compaction> >::reset(rocksdb::Compaction*) third-party-buck/platform007/build/libgcc/include/c++/trunk/bits/unique_ptr.h:376
    https://github.com/facebook/rocksdb/issues/12 0x9e6de6 in rocksdb::DBImpl::BackgroundCompaction(bool*, rocksdb::JobContext*, rocksdb::LogBuffer*, rocksdb::DBImpl::PrepickedCompaction*, rocksdb::Env::Priority) rocksdb/src/db/db_impl/db_impl_compaction_flush.cc:2826
    https://github.com/facebook/rocksdb/issues/13 0x9ac3b8 in rocksdb::DBImpl::BackgroundCallCompaction(rocksdb::DBImpl::PrepickedCompaction*, rocksdb::Env::Priority) rocksdb/src/db/db_impl/db_impl_compaction_flush.cc:2320
    https://github.com/facebook/rocksdb/issues/14 0x9abff7 in rocksdb::DBImpl::BGWorkCompaction(void*) rocksdb/src/db/db_impl/db_impl_compaction_flush.cc:2096
    ......

Fix the issue by reference the super version and use the referenced version from it.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6473

Test Plan: Run ASAN for all existing tests.

Differential Revision: D20196416

fbshipit-source-id: 5f4a7918110fc7b8dd7841932d376bc9d1e59d6f

HISTORY.md
db/db_impl/db_impl.cc

index d210e90faf7cbf1ca2be8a68f856b7fb9507d4a0..fe5393f76a86e8508f53bb93dc519b8d3d05d7e9 100644 (file)
@@ -1,8 +1,13 @@
 # Rocksdb Change Log
+## Unreleased
+### Bug Fixes
+* Fix a data race that might cause crash when calling DB::GetCreationTimeOfOldestFile() by a small chance. The bug was introduced in 6.6 Release.
+
 ## 6.7.2 (02/24/2020)
 ### Bug Fixes
 * Fixed a bug of IO Uring partial result handling introduced in 6.7.0.
 
+
 ## 6.7.1 (02/13/2020)
 ### Bug Fixes
 * Fixed issue #6316 that can cause a corruption of the MANIFEST file in the middle when writing to it fails due to no disk space.
index 9366d49d48124a3419750f31af5c07767feda601..bc0496f7fdcdb2f42e9194896d8137234d1286c9 100644 (file)
@@ -4495,13 +4495,21 @@ Status DBImpl::GetCreationTimeOfOldestFile(uint64_t* creation_time) {
   if (mutable_db_options_.max_open_files == -1) {
     uint64_t oldest_time = port::kMaxUint64;
     for (auto cfd : *versions_->GetColumnFamilySet()) {
-      uint64_t ctime;
-      cfd->current()->GetCreationTimeOfOldestFile(&ctime);
-      if (ctime < oldest_time) {
-        oldest_time = ctime;
-      }
-      if (oldest_time == 0) {
-        break;
+      if (!cfd->IsDropped()) {
+        uint64_t ctime;
+        {
+          SuperVersion* sv = GetAndRefSuperVersion(cfd);
+          Version* version = sv->current;
+          version->GetCreationTimeOfOldestFile(&ctime);
+          ReturnAndCleanupSuperVersion(cfd, sv);
+        }
+
+        if (ctime < oldest_time) {
+          oldest_time = ctime;
+        }
+        if (oldest_time == 0) {
+          break;
+        }
       }
     }
     *creation_time = oldest_time;