]> git-server-git.apps.pok.os.sepia.ceph.com Git - rocksdb.git/commitdiff
Use VersionBuilder for CF import ordering/validation (#11028)
authorAndrew Kryczka <andrewkr@fb.com>
Sat, 10 Dec 2022 23:07:42 +0000 (15:07 -0800)
committerFacebook GitHub Bot <facebook-github-bot@users.noreply.github.com>
Sat, 10 Dec 2022 23:07:42 +0000 (15:07 -0800)
Summary:
Besides the existing ordering and validation, more is coming to VersionBuilder/VersionStorageInfo, like migration of epoch_numbers from older RocksDB versions. We should start using those common classes for importing CFs, instead of duplicating their ordering, validation, and migration logic.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/11028

Test Plan: rely on existing tests

Reviewed By: hx235

Differential Revision: D41865427

Pulled By: ajkr

fbshipit-source-id: 873f5cd87b8902a2380c3b71373ce0b0db3a0c50

db/import_column_family_job.cc
db/import_column_family_test.cc

index 34985666ae364596160ba031f913d35fff1c82ab..ae342cc0bf9af9ae52c3427bcc1a7afba9c36202 100644 (file)
@@ -45,40 +45,6 @@ Status ImportColumnFamilyJob::Prepare(uint64_t next_file_number,
   auto num_files = files_to_import_.size();
   if (num_files == 0) {
     return Status::InvalidArgument("The list of files is empty");
-  } else if (num_files > 1) {
-    // Verify that passed files don't have overlapping ranges in any particular
-    // level.
-    int min_level = 1;  // Check for overlaps in Level 1 and above.
-    int max_level = -1;
-    for (const auto& file_metadata : metadata_) {
-      if (file_metadata.level > max_level) {
-        max_level = file_metadata.level;
-      }
-    }
-    for (int level = min_level; level <= max_level; ++level) {
-      autovector<const IngestedFileInfo*> sorted_files;
-      for (size_t i = 0; i < num_files; i++) {
-        if (metadata_[i].level == level) {
-          sorted_files.push_back(&files_to_import_[i]);
-        }
-      }
-
-      std::sort(
-          sorted_files.begin(), sorted_files.end(),
-          [this](const IngestedFileInfo* info1, const IngestedFileInfo* info2) {
-            return cfd_->internal_comparator().Compare(
-                       info1->smallest_internal_key,
-                       info2->smallest_internal_key) < 0;
-          });
-
-      for (size_t i = 0; i + 1 < sorted_files.size(); i++) {
-        if (cfd_->internal_comparator().Compare(
-                sorted_files[i]->largest_internal_key,
-                sorted_files[i + 1]->smallest_internal_key) >= 0) {
-          return Status::InvalidArgument("Files have overlapping ranges");
-        }
-      }
-    }
   }
 
   for (const auto& f : files_to_import_) {
@@ -143,9 +109,6 @@ Status ImportColumnFamilyJob::Prepare(uint64_t next_file_number,
 // REQUIRES: we have become the only writer by entering both write_thread_ and
 // nonmem_write_thread_
 Status ImportColumnFamilyJob::Run() {
-  Status status;
-  edit_.SetColumnFamily(cfd_->GetID());
-
   // We use the import time as the ancester time. This is the time the data
   // is written to the database.
   int64_t temp_current_time = 0;
@@ -156,27 +119,57 @@ Status ImportColumnFamilyJob::Run() {
         static_cast<uint64_t>(temp_current_time);
   }
 
-  for (size_t i = 0; i < files_to_import_.size(); ++i) {
+  VersionBuilder version_builder(
+      cfd_->current()->version_set()->file_options(), cfd_->ioptions(),
+      cfd_->table_cache(), cfd_->current()->storage_info(),
+      cfd_->current()->version_set(),
+      cfd_->GetFileMetadataCacheReservationManager());
+  VersionStorageInfo vstorage(
+      &cfd_->internal_comparator(), cfd_->user_comparator(),
+      cfd_->NumberLevels(), cfd_->ioptions()->compaction_style,
+      nullptr /* src_vstorage */, cfd_->ioptions()->force_consistency_checks);
+  Status s;
+  for (size_t i = 0; s.ok() && i < files_to_import_.size(); ++i) {
     const auto& f = files_to_import_[i];
     const auto& file_metadata = metadata_[i];
 
-    edit_.AddFile(file_metadata.level, f.fd.GetNumber(), f.fd.GetPathId(),
-                  f.fd.GetFileSize(), f.smallest_internal_key,
-                  f.largest_internal_key, file_metadata.smallest_seqno,
-                  file_metadata.largest_seqno, false, file_metadata.temperature,
-                  kInvalidBlobFileNumber, oldest_ancester_time, current_time,
-                  kUnknownFileChecksum, kUnknownFileChecksumFuncName,
-                  f.unique_id);
-
-    // If incoming sequence number is higher, update local sequence number.
-    if (file_metadata.largest_seqno > versions_->LastSequence()) {
-      versions_->SetLastAllocatedSequence(file_metadata.largest_seqno);
-      versions_->SetLastPublishedSequence(file_metadata.largest_seqno);
-      versions_->SetLastSequence(file_metadata.largest_seqno);
+    VersionEdit version_edit;
+    version_edit.AddFile(
+        file_metadata.level, f.fd.GetNumber(), f.fd.GetPathId(),
+        f.fd.GetFileSize(), f.smallest_internal_key, f.largest_internal_key,
+        file_metadata.smallest_seqno, file_metadata.largest_seqno, false,
+        file_metadata.temperature, kInvalidBlobFileNumber, oldest_ancester_time,
+        current_time, kUnknownFileChecksum, kUnknownFileChecksumFuncName,
+        f.unique_id);
+    s = version_builder.Apply(&version_edit);
+  }
+  if (s.ok()) {
+    s = version_builder.SaveTo(&vstorage);
+  }
+  if (s.ok()) {
+    edit_.SetColumnFamily(cfd_->GetID());
+
+    for (int level = 0; level < vstorage.num_levels(); level++) {
+      for (FileMetaData* file_meta : vstorage.LevelFiles(level)) {
+        edit_.AddFile(level, *file_meta);
+        // If incoming sequence number is higher, update local sequence number.
+        if (file_meta->fd.largest_seqno > versions_->LastSequence()) {
+          versions_->SetLastAllocatedSequence(file_meta->fd.largest_seqno);
+          versions_->SetLastPublishedSequence(file_meta->fd.largest_seqno);
+          versions_->SetLastSequence(file_meta->fd.largest_seqno);
+        }
+      }
     }
   }
-
-  return status;
+  for (int level = 0; level < vstorage.num_levels(); level++) {
+    for (FileMetaData* file_meta : vstorage.LevelFiles(level)) {
+      file_meta->refs--;
+      if (file_meta->refs <= 0) {
+        delete file_meta;
+      }
+    }
+  }
+  return s;
 }
 
 void ImportColumnFamilyJob::Cleanup(const Status& status) {
index 2847ea8da49e31dc3cf8680a67bddc73173419b5..0c07ee2a8bf29ba8fb39fcdfa0d0220097868d82 100644 (file)
@@ -556,10 +556,9 @@ TEST_F(ImportColumnFamilyTest, ImportColumnFamilyNegativeTest) {
         LiveFileMetaDataInit(file2_sst_name, sst_files_dir_, 1, 10, 19));
     metadata.db_comparator_name = options.comparator->Name();
 
-    ASSERT_EQ(db_->CreateColumnFamilyWithImport(ColumnFamilyOptions(), "yoyo",
-                                                ImportColumnFamilyOptions(),
-                                                metadata, &import_cfh_),
-              Status::InvalidArgument("Files have overlapping ranges"));
+    ASSERT_NOK(db_->CreateColumnFamilyWithImport(ColumnFamilyOptions(), "yoyo",
+                                                 ImportColumnFamilyOptions(),
+                                                 metadata, &import_cfh_));
     ASSERT_EQ(import_cfh_, nullptr);
   }