]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
os/bluestore: give conventional region of SMR to bluefs
authorSage Weil <sage@newdream.net>
Thu, 16 Sep 2021 17:09:54 +0000 (12:09 -0500)
committerSage Weil <sage@newdream.net>
Fri, 29 Oct 2021 13:56:42 +0000 (09:56 -0400)
Let bluefs use the conventional region of an SMR HDD.  It won't be able to
use the sequential region if it runs out of space, but it does mean that
bluefs can function on an SMR drive without a dedicated db/wal device.

Signed-off-by: Sage Weil <sage@newdream.net>
src/os/bluestore/BlueStore.cc
src/test/objectstore/run_smr_bluestore_test.sh

index e13af070afa50763918f68e859bbadfe72eec455..a3859480fb423f3a29b20cf7f2b924d62ba923f8 100644 (file)
@@ -5595,8 +5595,27 @@ int BlueStore::_create_alloc()
     return -EINVAL;
   }
 
-  // BlueFS will share the same allocator
-  shared_alloc.set(alloc);
+#ifdef HAVE_LIBZBD
+  if (freelist_type == "zoned") {
+    Allocator *a = Allocator::create(
+      cct, cct->_conf->bluestore_allocator,
+      bdev->get_conventional_region_size(),
+      alloc_size,
+      0, 0,
+      "zoned_block");
+    if (!a) {
+      lderr(cct) << __func__ << " failed to create " << cct->_conf->bluestore_allocator
+                << " allocator" << dendl;
+      delete alloc;
+      return -EINVAL;
+    }
+    shared_alloc.set(a);
+  } else
+#endif
+  {
+    // BlueFS will share the same allocator
+    shared_alloc.set(alloc);
+  }
 
   return 0;
 }
@@ -5644,6 +5663,15 @@ int BlueStore::_init_alloc(std::map<uint64_t, uint64_t> *zone_adjustments)
       }
     }
 
+    // start with conventional zone "free" (bluefs may adjust this when it starts up)
+    auto reserved = _get_ondisk_reserved();
+    // for now we require a conventional zone
+    ceph_assert(bdev->get_conventional_region_size());
+    ceph_assert(shared_alloc.a != alloc);  // zoned allocator doesn't use conventional region
+    shared_alloc.a->init_add_free(reserved,
+                                 bdev->get_conventional_region_size() - reserved);
+
+    // init sequential zone based on the device's write pointers
     a->init_from_zone_pointers(zones);
     dout(1) << __func__
            << " loaded zone pointers: "
@@ -5738,11 +5766,16 @@ void BlueStore::_close_alloc()
   ceph_assert(bdev);
   bdev->discard_drain();
 
-  ceph_assert(shared_alloc.a);
   ceph_assert(alloc);
-  ceph_assert(alloc == shared_alloc.a);
-  shared_alloc.a->shutdown();
-  delete shared_alloc.a;
+  alloc->shutdown();
+  delete alloc;
+
+  ceph_assert(shared_alloc.a);
+  if (alloc != shared_alloc.a) {
+    shared_alloc.a->shutdown();
+    delete shared_alloc.a;
+  }
+
   shared_alloc.reset();
   alloc = nullptr;
 }
@@ -6852,6 +6885,13 @@ int BlueStore::mkfs()
   reserved = _get_ondisk_reserved();
   alloc->init_add_free(reserved,
     p2align(bdev->get_size(), min_alloc_size) - reserved);
+#ifdef HAVE_LIBZBD
+  if (bdev->is_smr() && alloc != shared_alloc.a) {
+    shared_alloc.a->init_add_free(reserved,
+                                 p2align(bdev->get_conventional_region_size(),
+                                         min_alloc_size) - reserved);
+  }
+#endif
 
   r = _open_db(true);
   if (r < 0)
index 22017b0c5cb604af1b3c4d2b0cc724fe65051081..5fa8544e0a1fcef8df11c7e5b8a831aa9e2e3c17 100644 (file)
@@ -22,6 +22,4 @@ DEV=`lsscsi | grep zbc | awk '{print $7}'`
 sudo ceph_test_objectstore \
     --bluestore-block-path $DEV \
     --gtest_filter=*/2 \
-    --bluestore-block-db-create \
-    --bluestore-block-db-size 1048576000 \
     $*