]> git-server-git.apps.pok.os.sepia.ceph.com Git - rocksdb.git/commitdiff
Ignore missing column families
authorIgor Canadi <icanadi@fb.com>
Tue, 2 Sep 2014 20:29:05 +0000 (13:29 -0700)
committerIgor Canadi <icanadi@fb.com>
Tue, 2 Sep 2014 20:29:05 +0000 (13:29 -0700)
Summary:
Before this diff, whenever we Write to non-existing column family, Write() would fail.

This diff adds an option to not fail a Write() when WriteBatch points to non-existing column family. MongoDB said this would be useful for them, since they might have a transaction updating an index that was dropped by another thread. This way, they don't have to worry about checking if all indexes are alive on every write. They don't care if they lose writes to dropped index.

Test Plan: added a small unit test

Reviewers: sdong, yhchiang, ljin

Reviewed By: ljin

Subscribers: leveldb

Differential Revision: https://reviews.facebook.net/D22143

db/column_family_test.cc
db/db_impl.cc
db/write_batch.cc
db/write_batch_internal.h
include/rocksdb/options.h

index ac3435593d063b9d328eb1988b446e75a768c2d0..b96e66829a5b91538e9d99dedb298b514089ceaa 100644 (file)
@@ -408,9 +408,15 @@ TEST(ColumnFamilyTest, WriteBatchFailure) {
   Open();
   CreateColumnFamiliesAndReopen({"one", "two"});
   WriteBatch batch;
+  batch.Put(handles_[0], Slice("existing"), Slice("column-family"));
   batch.Put(handles_[1], Slice("non-existing"), Slice("column-family"));
   ASSERT_OK(db_->Write(WriteOptions(), &batch));
   DropColumnFamilies({1});
+  WriteOptions woptions_ignore_missing_cf;
+  woptions_ignore_missing_cf.ignore_missing_column_families = true;
+  batch.Put(handles_[0], Slice("still here"), Slice("column-family"));
+  ASSERT_OK(db_->Write(woptions_ignore_missing_cf, &batch));
+  ASSERT_EQ("column-family", Get(0, "still here"));
   Status s = db_->Write(WriteOptions(), &batch);
   ASSERT_TRUE(s.IsInvalidArgument());
   Close();
index 36c0da12f15a37a88020648e790ee191e13df120..c22aa5809bc1937209634a5ba9f27359578e506d 100644 (file)
@@ -1301,14 +1301,20 @@ Status DBImpl::RecoverLogFile(uint64_t log_number, SequenceNumber* max_sequence,
   WriteBatch batch;
   while (reader.ReadRecord(&record, &scratch)) {
     if (record.size() < 12) {
-      reporter.Corruption(
-          record.size(), Status::Corruption("log record too small"));
+      reporter.Corruption(record.size(),
+                          Status::Corruption("log record too small"));
       continue;
     }
     WriteBatchInternal::SetContents(&batch, record);
 
+    // If column family was not found, it might mean that the WAL write
+    // batch references to the column family that was dropped after the
+    // insert. We don't want to fail the whole write batch in that case -- we
+    // just ignore the update. That's why we set ignore missing column families
+    // to true
     status = WriteBatchInternal::InsertInto(
-        &batch, column_family_memtables_.get(), true, log_number);
+        &batch, column_family_memtables_.get(),
+        true /* ignore missing column families */, log_number);
 
     MaybeIgnoreError(&status);
     if (!status.ok()) {
@@ -4066,7 +4072,8 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) {
         PERF_TIMER_GUARD(write_memtable_time);
 
         status = WriteBatchInternal::InsertInto(
-            updates, column_family_memtables_.get(), false, 0, this, false);
+            updates, column_family_memtables_.get(),
+            options.ignore_missing_column_families, 0, this, false);
         // A non-OK status here indicates iteration failure (either in-memory
         // writebatch corruption (very bad), or the client specified invalid
         // column family).  This will later on trigger bg_error_.
index fdc0e2c6e166b4b677aaf13959efa169480ffb89..bfa5e3f6f2776a5193e41ba810abcc8c6d885886 100644 (file)
@@ -299,17 +299,17 @@ class MemTableInserter : public WriteBatch::Handler {
  public:
   SequenceNumber sequence_;
   ColumnFamilyMemTables* cf_mems_;
-  bool recovery_;
+  bool ignore_missing_column_families_;
   uint64_t log_number_;
   DBImpl* db_;
   const bool dont_filter_deletes_;
 
   MemTableInserter(SequenceNumber sequence, ColumnFamilyMemTables* cf_mems,
-                   bool recovery, uint64_t log_number, DB* db,
-                   const bool dont_filter_deletes)
+                   bool ignore_missing_column_families, uint64_t log_number,
+                   DB* db, const bool dont_filter_deletes)
       : sequence_(sequence),
         cf_mems_(cf_mems),
-        recovery_(recovery),
+        ignore_missing_column_families_(ignore_missing_column_families),
         log_number_(log_number),
         db_(reinterpret_cast<DBImpl*>(db)),
         dont_filter_deletes_(dont_filter_deletes) {
@@ -321,12 +321,18 @@ class MemTableInserter : public WriteBatch::Handler {
 
   bool SeekToColumnFamily(uint32_t column_family_id, Status* s) {
     bool found = cf_mems_->Seek(column_family_id);
-    if (recovery_ && (!found || log_number_ < cf_mems_->GetLogNumber())) {
-      // if in recovery envoronment:
-      // * If column family was not found, it might mean that the WAL write
-      // batch references to the column family that was dropped after the
-      // insert. We don't want to fail the whole write batch in that case -- we
-      // just ignore the update.
+    if (!found) {
+      if (ignore_missing_column_families_) {
+        *s = Status::OK();
+      } else {
+        *s = Status::InvalidArgument(
+            "Invalid column family specified in write batch");
+      }
+      return false;
+    }
+    if (log_number_ != 0 && log_number_ < cf_mems_->GetLogNumber()) {
+      // This is true only in recovery environment (log_number_ is always 0 in
+      // non-recovery, regular write code-path)
       // * If log_number_ < cf_mems_->GetLogNumber(), this means that column
       // family already contains updates from this log. We can't apply updates
       // twice because of update-in-place or merge workloads -- ignore the
@@ -334,18 +340,8 @@ class MemTableInserter : public WriteBatch::Handler {
       *s = Status::OK();
       return false;
     }
-    if (!found) {
-      assert(!recovery_);
-      // If the column family was not found in non-recovery enviornment
-      // (client's write code-path), we have to fail the write and return
-      // the failure status to the client.
-      *s = Status::InvalidArgument(
-          "Invalid column family specified in write batch");
-      return false;
-    }
     return true;
   }
-
   virtual Status PutCF(uint32_t column_family_id, const Slice& key,
                        const Slice& value) {
     Status seek_status;
@@ -503,10 +499,12 @@ class MemTableInserter : public WriteBatch::Handler {
 
 Status WriteBatchInternal::InsertInto(const WriteBatch* b,
                                       ColumnFamilyMemTables* memtables,
-                                      bool recovery, uint64_t log_number,
-                                      DB* db, const bool dont_filter_deletes) {
+                                      bool ignore_missing_column_families,
+                                      uint64_t log_number, DB* db,
+                                      const bool dont_filter_deletes) {
   MemTableInserter inserter(WriteBatchInternal::Sequence(b), memtables,
-                            recovery, log_number, db, dont_filter_deletes);
+                            ignore_missing_column_families, log_number, db,
+                            dont_filter_deletes);
   return b->Iterate(&inserter);
 }
 
index 9a191f4cb3ab812e044a7e5308decbaa014cb4e0..615a47f5ebdd53fad3d45b17a673da9f444288ff 100644 (file)
@@ -106,18 +106,18 @@ class WriteBatchInternal {
   // Inserts batch entries into memtable
   // If dont_filter_deletes is false AND options.filter_deletes is true,
   // then --> Drops deletes in batch if db->KeyMayExist returns false
-  // If recovery == true, this means InsertInto is executed on a recovery
-  // code-path. WriteBatch referencing a dropped column family can be
-  // found on a recovery code-path and should be ignored (recovery should not
-  // fail). Additionally, the memtable will be updated only if
+  // If ignore_missing_column_families == true. WriteBatch referencing
+  // non-existing column family should be ignored.
+  // However, if ignore_missing_column_families == false, any WriteBatch
+  // referencing non-existing column family will return a InvalidArgument()
+  // failure.
+  //
+  // If log_number is non-zero, the memtable will be updated only if
   // memtables->GetLogNumber() >= log_number
-  // However, if recovery == false, any WriteBatch referencing
-  // non-existing column family will return a failure. Also, log_number is
-  // ignored in that case
   static Status InsertInto(const WriteBatch* batch,
                            ColumnFamilyMemTables* memtables,
-                           bool recovery = false, uint64_t log_number = 0,
-                           DB* db = nullptr,
+                           bool ignore_missing_column_families = false,
+                           uint64_t log_number = 0, DB* db = nullptr,
                            const bool dont_filter_deletes = true);
 
   static void Append(WriteBatch* dst, const WriteBatch* src);
index 3569409c419beca4a58d8203e827dcff827e6c8f..0ca30334410b939fedf7f0fc51704ef67a29dd3c 100644 (file)
@@ -959,7 +959,17 @@ struct WriteOptions {
   // Default: 0
   uint64_t timeout_hint_us;
 
-  WriteOptions() : sync(false), disableWAL(false), timeout_hint_us(0) {}
+  // If true and if user is trying to write to column families that don't exist
+  // (they were dropped),  ignore the write (don't return an error). If there
+  // are multiple writes in a WriteBatch, other writes will succeed.
+  // Default: false
+  bool ignore_missing_column_families;
+
+  WriteOptions()
+      : sync(false),
+        disableWAL(false),
+        timeout_hint_us(0),
+        ignore_missing_column_families(false) {}
 };
 
 // Options that control flush operations