]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
test/objectstore: Add test for deferred writes
authorAdam Kupczyk <akupczyk@redhat.com>
Mon, 27 Jun 2022 12:50:10 +0000 (12:50 +0000)
committerAdam Kupczyk <akupczyk@redhat.com>
Wed, 27 Jul 2022 10:43:27 +0000 (10:43 +0000)
Add test that recreates situation when BlueStore deferred writes
can cause RocksDB files corruption.

Signed-off-by: Adam Kupczyk <akupczyk@redhat.com>
(cherry picked from commit 42de47ae8468d000d5bca62bd537d7a1028bae42)

src/test/objectstore/CMakeLists.txt
src/test/objectstore/run_test_deferred.sh [new file with mode: 0755]
src/test/objectstore/test_deferred.cc [new file with mode: 0644]

index 340855657c4fd78164c0a9327c4dc4b78b1d8f25..3294616b9b26fa7f191b1b4e3ab76a5f61cdd046 100644 (file)
@@ -147,6 +147,13 @@ if(WITH_BLUESTORE)
   add_ceph_unittest(unittest_bdev)
   target_link_libraries(unittest_bdev os global)
 
+  # unittest_deferred
+  add_executable(unittest_deferred
+    test_deferred.cc
+    )
+  add_ceph_unittest(unittest_deferred)
+  target_link_libraries(unittest_deferred os global)
+
 endif(WITH_BLUESTORE)
 
 # unittest_transaction
diff --git a/src/test/objectstore/run_test_deferred.sh b/src/test/objectstore/run_test_deferred.sh
new file mode 100755 (executable)
index 0000000..1be4d91
--- /dev/null
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+
+if [[ ! (-x ./bin/unittest_deferred) || ! (-x ./bin/ceph-kvstore-tool) || ! (-x ./bin/ceph-bluestore-tool)]]
+then
+    echo Test must be run from ceph build directory
+    echo with unittest_deferred, ceph-kvstore-tool and ceph-bluestore-tool compiled
+    exit 1
+fi
+
+# Create BlueStore, only main block device, 4K AU, forced deferred 4K, 64K AU for BlueFS
+
+# Create file zapchajdziura, that is 0xe000 in size.
+# This adds to 0x0000 - 0x1000 of BlueStore superblock and 0x1000 - 0x2000 of BlueFS superblock,
+# making 0x00000 - 0x10000 filled, nicely aligning for 64K BlueFS requirements
+
+# Prefill 10 objects Object-0 .. Object-9, each 64K. Sync to disk.
+# Do transactions like:
+# - fill Object-x+1 16 times at offsets 0x0000, 0x1000, ... 0xf000 with 8bytes, trigerring deferred writes
+# - fill Object-x with 64K data
+# Repeat for Object-0 to Object-8.
+
+# Right after getting notification on_complete for all 9 transactions, immediately exit(1).
+./bin/unittest_deferred --log-to-stderr=false
+
+# Now we should have a considerable amount of pending deferred writes.
+# They do refer disk regions that do not belong to any object.
+
+# Perform compaction on RocksDB
+# This initializes BlueFS, but does not replay deferred writes.
+# It jiggles RocksDB files around. CURRENT and MANIFEST are recreated, with some .sst files too.
+# The hope here is that newly created RocksDB files will occupy space that is free,
+# but targetted by pending deferred writes.
+./bin/ceph-kvstore-tool bluestore-kv bluestore.test_temp_dir/ compact --log-to-stderr=false
+
+# It this step we (hopefully) get RocksDB files overwritten
+# We initialize BlueFS and RocksDB, there should be no problem here.
+# Then we apply deferred writes. Now some of RocksDB files might get corrupted.
+# It is very likely that this will not cause any problems, since CURRENT and MANIFEST are only read at bootup.
+./bin/ceph-bluestore-tool --path bluestore.test_temp_dir/ --command fsck --deep 1 --debug-bluestore=30/30 --debug-bdev=30/30 --log-file=log-bs-corrupts.txt --log-to-file --log-to-stderr=false
+
+# If we were lucky, this command now fails
+./bin/ceph-bluestore-tool --path bluestore.test_temp_dir/ --command fsck --deep 1 --debug-bluestore=30/30 --debug-bdev=30/30 --log-file=log-bs-crash.txt --log-to-file --log-to-stderr=false
+if [[ $? != 0 ]]
+then
+    echo "Deferred writes corruption successfully created !"
+else
+    echo "No deferred write problems detected."
+fi
+
+#cleanup
+rm -rf bluestore.test_temp_dir/
diff --git a/src/test/objectstore/test_deferred.cc b/src/test/objectstore/test_deferred.cc
new file mode 100644 (file)
index 0000000..1b56081
--- /dev/null
@@ -0,0 +1,146 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <stdio.h>
+#include <string.h>
+#include <iostream>
+#include <memory>
+#include <time.h>
+
+#include "os/ObjectStore.h"
+#include "os/bluestore/BlueStore.h"
+#include "include/Context.h"
+#include "common/ceph_argparse.h"
+#include "global/global_init.h"
+#include "common/ceph_mutex.h"
+#include "common/Cond.h"
+#include "common/errno.h"
+#include "common/options.h" // for the size literals
+#include <semaphore.h>
+
+
+
+class C_do_action : public Context {
+public:
+  std::function<void()> action;
+  C_do_action(std::function<void()> action)
+    : action(action) {}
+
+  void finish(int r) override {
+    action();
+  }
+};
+
+void create_deferred_and_terminate() {
+  std::unique_ptr<ObjectStore> store;
+
+  g_ceph_context->_conf._clear_safe_to_start_threads();
+  g_ceph_context->_conf.set_val_or_die("bluestore_prefer_deferred_size", "4096");
+  g_ceph_context->_conf.set_val_or_die("bluestore_allocator", "bitmap");
+  g_ceph_context->_conf.set_val_or_die("bluestore_block_size", "10240000000");
+  g_ceph_context->_conf.apply_changes(nullptr);
+
+  int64_t poolid;
+  coll_t cid;
+  ghobject_t hoid;
+  ObjectStore::CollectionHandle ch;
+  ceph_assert(::mkdir("bluestore.test_temp_dir", 0777) == 0);
+  store = ObjectStore::create(g_ceph_context,
+                              "bluestore",
+                              "bluestore.test_temp_dir",
+                              "store_test_temp_journal");
+  ceph_assert(store->mkfs() == 0);
+  ceph_assert(store->mount() == 0);
+
+  poolid = 11;
+  cid = coll_t(spg_t(pg_t(1, poolid), shard_id_t::NO_SHARD));
+  ch = store->create_new_collection(cid);
+  int r;
+  {
+    ObjectStore::Transaction t;
+    t.create_collection(cid, 0);
+    r = store->queue_transaction(ch, std::move(t));
+    ceph_assert(r == 0);
+  }
+
+  {
+    ObjectStore::Transaction t;
+    std::string oid = "zapchajdziura";
+    ghobject_t hoid(hobject_t(oid, "", CEPH_NOSNAP, 1, poolid, ""));
+    bufferlist bl;
+    bl.append(std::string(0xe000, '-'));
+    t.write(cid, hoid, 0, 0xe000, bl);
+    r = store->queue_transaction(ch, std::move(t));
+    ceph_assert(r == 0);
+  }
+
+  size_t object_count = 10;
+
+  // initial fill
+  bufferlist bl_64K;
+  bl_64K.append(std::string(64 * 1024, '-'));
+
+  std::atomic<size_t> prefill_counter{0};
+  sem_t prefill_mutex;
+  sem_init(&prefill_mutex, 0, 0);
+
+  for (size_t o = 0; o < object_count; o++) {
+    ObjectStore::Transaction t;
+    std::string oid = "object-" + std::to_string(o);
+    ghobject_t hoid(hobject_t(oid, "", CEPH_NOSNAP, 1, poolid, ""));
+
+    t.write(cid, hoid, 0, bl_64K.length(), bl_64K);
+    t.register_on_commit(new C_do_action([&] {
+      if (++prefill_counter == object_count) {
+       sem_post(&prefill_mutex);
+      }
+    }));
+
+    r = store->queue_transaction(ch, std::move(t));
+    ceph_assert(r == 0);
+  }
+  sem_wait(&prefill_mutex);
+
+  // small deferred writes over object
+  // and complete overwrite of previous one
+  bufferlist bl_8_bytes;
+  bl_8_bytes.append("abcdefgh");
+  std::atomic<size_t> deferred_counter{0};
+  for (size_t o = 0; o < object_count - 1; o++) {
+    ObjectStore::Transaction t;
+
+    // sprinkle deferred writes
+    std::string oid_d = "object-" + std::to_string(o + 1);
+    ghobject_t hoid_d(hobject_t(oid_d, "", CEPH_NOSNAP, 1, poolid, ""));
+
+    for(int i = 0; i < 16; i++) {
+      t.write(cid, hoid_d, 4096 * i, bl_8_bytes.length(), bl_8_bytes);
+    }
+
+    // overwrite previous object
+    std::string oid_m = "object-" + std::to_string(o);
+    ghobject_t hoid_m(hobject_t(oid_m, "", CEPH_NOSNAP, 1, poolid, ""));
+    t.write(cid, hoid_m, 0, bl_64K.length(), bl_64K);
+
+    t.register_on_commit(new C_do_action([&] {
+      if (++deferred_counter == object_count - 1) {
+        exit(0);
+      }
+    }));
+    r = store->queue_transaction(ch, std::move(t));
+    ceph_assert(r == 0);
+  }
+  sleep(10);
+  ceph_assert(0 && "should not reach here");
+}
+
+int main(int argc, char **argv) {
+  auto args = argv_to_vec(argc, argv);
+  auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT,
+                        CODE_ENVIRONMENT_UTILITY,
+                        CINIT_FLAG_NO_DEFAULT_CONFIG_FILE);
+  common_init_finish(g_ceph_context);
+
+  create_deferred_and_terminate();
+  return 0;
+}