]> git-server-git.apps.pok.os.sepia.ceph.com Git - rocksdb.git/commitdiff
Retire superfluous functions introduced in earlier mempurge PRs. (#8558)
authorBaptiste Lemaire <blemaire@fb.com>
Fri, 23 Jul 2021 01:26:47 +0000 (18:26 -0700)
committerFacebook GitHub Bot <facebook-github-bot@users.noreply.github.com>
Fri, 23 Jul 2021 01:29:13 +0000 (18:29 -0700)
Summary:
The main challenge to make the memtable garbage collection prototype (nicknamed `mempurge`) was to not get rid of WAL files that contain unflushed (but mempurged) data. That was successfully guaranteed by not writing the VersionEdit to the MANIFEST file after a successful mempurge.
By not writing VersionEdits to the `MANIFEST` file after a succesful mempurge operation, we do not change the earliest log file number that contains unflushed data: `cfd->GetLogNumber()` (`cfd->SetLogNumber()` is only called in `VersionSet::ProcessManifestWrites`). As a result, a number of functions introduced earlier just for the mempurge operation are not obscolete/redundant. (e.g.: `FlushJob::ExtractEarliestLogFileNumber`), and this PR aims at cleaning up all these now-unnecessary functions. In particular, we no longer need to store the earliest log file number in the `MemTable` struct itself. This PR therefore also reverts the `MemTable` struct to its original form.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/8558

Test Plan: Already included in `db_flush_test.cc`.

Reviewed By: anand1976

Differential Revision: D29764351

Pulled By: bjlemaire

fbshipit-source-id: 0f43b260fa270251862512f397d3f24ee62e8437

15 files changed:
db/column_family.cc
db/column_family.h
db/db_impl/db_impl_open.cc
db/db_impl/db_impl_secondary.cc
db/db_impl/db_impl_write.cc
db/flush_job.cc
db/flush_job.h
db/memtable.cc
db/memtable.h
db/memtable_list.cc
db/memtable_list.h
db/repair.cc
db/version_edit_handler.cc
db/version_set.cc
db/version_set.h

index 47389a670ad823ba0a03c200fea358c87299125b..de90f6b6f5d9b664119ffe2f4fae1654ce18a748 100644 (file)
@@ -1059,20 +1059,17 @@ uint64_t ColumnFamilyData::GetLiveSstFilesSize() const {
 }
 
 MemTable* ColumnFamilyData::ConstructNewMemtable(
-    const MutableCFOptions& mutable_cf_options, SequenceNumber earliest_seq,
-    uint64_t log_number) {
+    const MutableCFOptions& mutable_cf_options, SequenceNumber earliest_seq) {
   return new MemTable(internal_comparator_, ioptions_, mutable_cf_options,
-                      write_buffer_manager_, earliest_seq, id_, log_number);
+                      write_buffer_manager_, earliest_seq, id_);
 }
 
 void ColumnFamilyData::CreateNewMemtable(
-    const MutableCFOptions& mutable_cf_options, SequenceNumber earliest_seq,
-    uint64_t log_number) {
+    const MutableCFOptions& mutable_cf_options, SequenceNumber earliest_seq) {
   if (mem_ != nullptr) {
     delete mem_->Unref();
   }
-  SetMemtable(
-      ConstructNewMemtable(mutable_cf_options, earliest_seq, log_number));
+  SetMemtable(ConstructNewMemtable(mutable_cf_options, earliest_seq));
   mem_->Ref();
 }
 
index d73c3dda99843661278df74552ad1cbb6d2549ee..48fb9abc19576249d7d7e81a961c845d65e9b954 100644 (file)
@@ -371,10 +371,9 @@ class ColumnFamilyData {
 
   // See Memtable constructor for explanation of earliest_seq param.
   MemTable* ConstructNewMemtable(const MutableCFOptions& mutable_cf_options,
-                                 SequenceNumber earliest_seq,
-                                 uint64_t log_number = 0);
+                                 SequenceNumber earliest_seq);
   void CreateNewMemtable(const MutableCFOptions& mutable_cf_options,
-                         SequenceNumber earliest_seq, uint64_t log_number = 0);
+                         SequenceNumber earliest_seq);
 
   TableCache* table_cache() const { return table_cache_.get(); }
   BlobFileCache* blob_file_cache() const { return blob_file_cache_.get(); }
index b9c2b74dfb28764bd346db6edde135bd5823a57d..e5def7003790e4ef05a771546352db90da8cd45b 100644 (file)
@@ -630,7 +630,7 @@ Status DBImpl::Recover(
         // Clear memtables if recovery failed
         for (auto cfd : *versions_->GetColumnFamilySet()) {
           cfd->CreateNewMemtable(*cfd->GetLatestMutableCFOptions(),
-                                 kMaxSequenceNumber, cfd->GetLogNumber());
+                                 kMaxSequenceNumber);
         }
       }
     }
@@ -1066,7 +1066,7 @@ Status DBImpl::RecoverLogFiles(const std::vector<uint64_t>& wal_numbers,
           flushed = true;
 
           cfd->CreateNewMemtable(*cfd->GetLatestMutableCFOptions(),
-                                 *next_sequence, cfd->GetLogNumber());
+                                 *next_sequence);
         }
       }
     }
@@ -1204,8 +1204,7 @@ Status DBImpl::RecoverLogFiles(const std::vector<uint64_t>& wal_numbers,
           flushed = true;
 
           cfd->CreateNewMemtable(*cfd->GetLatestMutableCFOptions(),
-                                 versions_->LastSequence(),
-                                 cfd->GetLogNumber());
+                                 versions_->LastSequence());
         }
         data_seen = true;
       }
index 4742110ca6ebbd6f13d286dc08ddf58666da8b83..ea773ddf7bff994c1043c0e1c5b43ccda534f0c3 100644 (file)
@@ -256,8 +256,8 @@ Status DBImplSecondary::RecoverLogFiles(
                                          curr_log_num != log_number)) {
             const MutableCFOptions mutable_cf_options =
                 *cfd->GetLatestMutableCFOptions();
-            MemTable* new_mem = cfd->ConstructNewMemtable(
-                mutable_cf_options, seq_of_batch, log_number);
+            MemTable* new_mem =
+                cfd->ConstructNewMemtable(mutable_cf_options, seq_of_batch);
             cfd->mem()->SetNextLogNumber(log_number);
             cfd->imm()->Add(cfd->mem(), &job_context->memtables_to_free);
             new_mem->Ref();
index 66ade6b21f08e9ed24d99d64f52cc284274a8ce1..2655d30a9e9d8c36395699f5a1557399537a2b15 100644 (file)
@@ -1800,8 +1800,7 @@ Status DBImpl::SwitchMemtable(ColumnFamilyData* cfd, WriteContext* context) {
   }
   if (s.ok()) {
     SequenceNumber seq = versions_->LastSequence();
-    new_mem =
-        cfd->ConstructNewMemtable(mutable_cf_options, seq, new_log_number);
+    new_mem = cfd->ConstructNewMemtable(mutable_cf_options, seq);
     context->superversion_context.NewSuperVersion();
   }
   ROCKS_LOG_INFO(immutable_db_options_.info_log,
index 9b9385d07964c637303f1de31b3115d6bad3b3a7..09d348cc8c8ef1ec6404e8368eae530a7064a037 100644 (file)
@@ -333,17 +333,6 @@ void FlushJob::Cancel() {
   base_->Unref();
 }
 
-uint64_t FlushJob::ExtractEarliestLogFileNumber() {
-  uint64_t earliest_logno = 0;
-  for (MemTable* m : mems_) {
-    uint64_t logno = m->GetEarliestLogFileNumber();
-    if (logno > 0 && (earliest_logno == 0 || logno < earliest_logno)) {
-      earliest_logno = logno;
-    }
-  }
-  return earliest_logno;
-}
-
 Status FlushJob::MemPurge() {
   Status s;
   db_mutex_->AssertHeld();
@@ -387,8 +376,6 @@ Status FlushJob::MemPurge() {
       NewMergingIterator(&(cfd_->internal_comparator()), memtables.data(),
                          static_cast<int>(memtables.size()), &arena));
 
-  uint64_t earliest_logno = ExtractEarliestLogFileNumber();
-
   auto* ioptions = cfd_->ioptions();
 
   // Place iterator at the First (meaning most recent) key node.
@@ -429,7 +416,7 @@ Status FlushJob::MemPurge() {
 
     new_mem = new MemTable((cfd_->internal_comparator()), *(cfd_->ioptions()),
                            mutable_cf_options_, cfd_->write_buffer_mgr(),
-                           earliest_seqno, cfd_->GetID(), earliest_logno);
+                           earliest_seqno, cfd_->GetID());
     assert(new_mem != nullptr);
 
     Env* env = db_options_.env;
index 488ef2c7cb2a850c6e32d1cddc1fb029f32dcd65..b0a6bf2de806d15ffe0d4553414b101c14ee7943 100644 (file)
@@ -123,7 +123,6 @@ class FlushJob {
   // recommend all users not to set this flag as true given that the MemPurge
   // process has not matured yet.
   Status MemPurge();
-  uint64_t ExtractEarliestLogFileNumber();
 #ifndef ROCKSDB_LITE
   std::unique_ptr<FlushJobInfo> GetFlushJobInfo() const;
 #endif  // !ROCKSDB_LITE
index 5c872ec5fed7219b707cdb1a5fc9887479b24ab0..2b2598658b11e86577a94fd3a835b6046551da71 100644 (file)
@@ -67,8 +67,7 @@ MemTable::MemTable(const InternalKeyComparator& cmp,
                    const ImmutableOptions& ioptions,
                    const MutableCFOptions& mutable_cf_options,
                    WriteBufferManager* write_buffer_manager,
-                   SequenceNumber latest_seq, uint32_t column_family_id,
-                   uint64_t current_logfile_number)
+                   SequenceNumber latest_seq, uint32_t column_family_id)
     : comparator_(cmp),
       moptions_(ioptions, mutable_cf_options),
       refs_(0),
@@ -99,7 +98,6 @@ MemTable::MemTable(const InternalKeyComparator& cmp,
       earliest_seqno_(latest_seq),
       creation_seq_(latest_seq),
       mem_next_logfile_number_(0),
-      mem_min_logfile_number_(current_logfile_number),
       min_prep_log_referenced_(0),
       locks_(moptions_.inplace_update_support
                  ? moptions_.inplace_update_num_locks
index 2ee08fc977064d403b5cedd9cb3264dab496bc01..6f908ae5b597ab2972617b45927c4f80e60c9428 100644 (file)
@@ -106,8 +106,7 @@ class MemTable {
                     const ImmutableOptions& ioptions,
                     const MutableCFOptions& mutable_cf_options,
                     WriteBufferManager* write_buffer_manager,
-                    SequenceNumber earliest_seq, uint32_t column_family_id,
-                    uint64_t current_logfile_number = 0);
+                    SequenceNumber earliest_seq, uint32_t column_family_id);
   // No copying allowed
   MemTable(const MemTable&) = delete;
   MemTable& operator=(const MemTable&) = delete;
@@ -388,16 +387,6 @@ class MemTable {
   // operations on the same MemTable.
   void SetNextLogNumber(uint64_t num) { mem_next_logfile_number_ = num; }
 
-  // Set the earliest log file number that (possibly)
-  // contains entries from this memtable.
-  void SetEarliestLogFileNumber(uint64_t logno) {
-    mem_min_logfile_number_ = logno;
-  }
-
-  // Return the earliest log file number that (possibly)
-  // contains entries from this memtable.
-  uint64_t GetEarliestLogFileNumber() { return mem_min_logfile_number_; }
-
   // if this memtable contains data from a committed
   // two phase transaction we must take note of the
   // log which contains that data so we can know
@@ -528,10 +517,6 @@ class MemTable {
   // The log files earlier than this number can be deleted.
   uint64_t mem_next_logfile_number_;
 
-  // The earliest log containing entries inserted into
-  // this memtable.
-  uint64_t mem_min_logfile_number_;
-
   // the earliest log containing a prepared section
   // which has been inserted into this memtable.
   std::atomic<uint64_t> min_prep_log_referenced_;
index eb56f205b2a5e6e8a7902b88fa6728ff99751214..36bdd98f5d3a574b293725e2d1848764a9d82b89 100644 (file)
@@ -521,9 +521,14 @@ Status MemTableList::TryInstallMemtableFlushResults(
         // and don't commit anything to the manifest file.
         RemoveMemTablesOrRestoreFlags(s, cfd, batch_count, log_buffer,
                                       to_delete, mu);
+        // Note: cfd->SetLogNumber is only called when a VersionEdit
+        // is written to MANIFEST. When mempurge is succesful, we skip
+        // this step, therefore cfd->GetLogNumber is always is
+        // earliest log with data unflushed.
         // Notify new head of manifest write queue.
         // wake up all the waiting writers
-        // TODO(bjlemaire): explain full reason needed or investigate more.
+        // TODO(bjlemaire): explain full reason WakeUpWaitingManifestWriters
+        // needed or investigate more.
         vset->WakeUpWaitingManifestWriters();
         *io_s = IOStatus::OK();
       }
@@ -694,21 +699,6 @@ void MemTableList::RemoveMemTablesOrRestoreFlags(
   }
 }
 
-// Returns the earliest log that possibly contain entries
-// from one of the memtables of this memtable_list.
-uint64_t MemTableList::EarliestLogContainingData() {
-  uint64_t min_log = 0;
-
-  for (auto& m : current_->memlist_) {
-    uint64_t log = m->GetEarliestLogFileNumber();
-    if (log > 0 && (min_log == 0 || log < min_log)) {
-      min_log = log;
-    }
-  }
-
-  return min_log;
-}
-
 uint64_t MemTableList::PrecomputeMinLogContainingPrepSection(
     const std::unordered_set<MemTable*>* memtables_to_flush) {
   uint64_t min_log = 0;
index 88d9ad0178bfadef01cc674fca88fe5d5f18009a..22967c8c1a6675eda1d914c98e3f71e31bab060d 100644 (file)
@@ -347,10 +347,6 @@ class MemTableList {
 
   size_t* current_memory_usage() { return &current_memory_usage_; }
 
-  // Returns the earliest log that possibly contain entries
-  // from one of the memtables of this memtable_list.
-  uint64_t EarliestLogContainingData();
-
   // Returns the min log containing the prep section after memtables listsed in
   // `memtables_to_flush` are flushed and their status is persisted in manifest.
   uint64_t PrecomputeMinLogContainingPrepSection(
index 4a710db965c796306a502d66f7347f349b9ff1d4..1ebd47402bd4919827443c1533508634330acf5c 100644 (file)
@@ -387,7 +387,7 @@ class Repairer {
     // Initialize per-column family memtables
     for (auto* cfd : *vset_.GetColumnFamilySet()) {
       cfd->CreateNewMemtable(*cfd->GetLatestMutableCFOptions(),
-                             kMaxSequenceNumber, cfd->GetLogNumber());
+                             kMaxSequenceNumber);
     }
     auto cf_mems = new ColumnFamilyMemTablesImpl(vset_.GetColumnFamilySet());
 
index 4206a19e5f3e0f466eed1386c552b231ba63e204..7a2996a59e2bdaf6d2adeaa8be42d24a785b1d68 100644 (file)
@@ -548,11 +548,6 @@ Status VersionEditHandler::ExtractInfoFromVersionEdit(ColumnFamilyData* cfd,
             "records NOT monotonically increasing");
       } else {
         cfd->SetLogNumber(edit.log_number_);
-        if (version_set_->db_options()->experimental_allow_mempurge &&
-            edit.log_number_ > 0 &&
-            (cfd->mem()->GetEarliestLogFileNumber() == 0)) {
-          cfd->mem()->SetEarliestLogFileNumber(edit.log_number_);
-        }
         version_edit_params_.SetLogNumber(edit.log_number_);
       }
     }
index df0fff956e09b17734e55a6dd8a1bab604e8091b..a2bfdac4822845a7af65b2c38834fd10811c2e94 100644 (file)
@@ -5646,7 +5646,7 @@ ColumnFamilyData* VersionSet::CreateColumnFamily(
   // GetLatestMutableCFOptions() is safe here without mutex since the
   // cfd is not available to client
   new_cfd->CreateNewMemtable(*new_cfd->GetLatestMutableCFOptions(),
-                             LastSequence(), edit->log_number_);
+                             LastSequence());
   new_cfd->SetLogNumber(edit->log_number_);
   return new_cfd;
 }
index d4fc17baaea3a75e0528665966eaa6fca999074a..1b8b3cda18747790cff1ebc9081f49a91b37c4d5 100644 (file)
@@ -1161,15 +1161,6 @@ class VersionSet {
       if (min_log_num > num && !cfd->IsDropped()) {
         min_log_num = num;
       }
-      // If mempurge is activated, there may be an immutable memtable
-      // that has data not flushed to any SST file.
-      if (db_options_->experimental_allow_mempurge && !(cfd->IsEmpty()) &&
-          !(cfd->IsDropped())) {
-        num = cfd->imm()->EarliestLogContainingData();
-        if ((num > 0) && (min_log_num > num)) {
-          min_log_num = num;
-        }
-      }
     }
     return min_log_num;
   }
@@ -1187,15 +1178,6 @@ class VersionSet {
       if (min_log_num > cfd->GetLogNumber() && !cfd->IsDropped()) {
         min_log_num = cfd->GetLogNumber();
       }
-      // If mempurge is activated, there may be an immutable memtable
-      // that has data not flushed to any SST file.
-      if (db_options_->experimental_allow_mempurge && !(cfd->IsEmpty()) &&
-          !(cfd->IsDropped())) {
-        uint64_t num = cfd->imm()->EarliestLogContainingData();
-        if ((num > 0) && (min_log_num > num)) {
-          min_log_num = num;
-        }
-      }
     }
     return min_log_num;
   }