From: Sage Weil Date: Tue, 31 Jan 2017 17:01:39 +0000 (-0500) Subject: os/bluestore: fix min_alloc_size at mkfs time X-Git-Tag: v12.0.0~31^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=9c9a558aa8c1cf9e2f619e72979969bb6e151cc0;p=ceph.git os/bluestore: fix min_alloc_size at mkfs time It is an ongoing challenge to allow min_alloc_size to be varied on an existing bluestore instance, and the code paths are not well tested. Avoid the complexity entirely by fixing min_alloc_size at mkfs time. Signed-off-by: Sage Weil --- diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 9dc83746c8b6..036e43933a3b 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -3115,19 +3115,6 @@ int BlueStore::_check_or_set_bdev_label( void BlueStore::_set_alloc_sizes(void) { - /* - * Set device block size according to its media - */ - if (cct->_conf->bluestore_min_alloc_size) { - min_alloc_size = cct->_conf->bluestore_min_alloc_size; - } else { - assert(bdev); - if (bdev->is_rotational()) { - min_alloc_size = cct->_conf->bluestore_min_alloc_size_hdd; - } else { - min_alloc_size = cct->_conf->bluestore_min_alloc_size_ssd; - } - } min_alloc_size_order = ctz(min_alloc_size); assert(min_alloc_size == 1u << min_alloc_size_order); @@ -3160,8 +3147,6 @@ int BlueStore::_open_bdev(bool create) block_mask = ~(block_size - 1); block_size_order = ctz(block_size); assert(block_size == 1u << block_size_order); - - _set_alloc_sizes(); return 0; fail_close: @@ -3285,7 +3270,7 @@ int BlueStore::_open_alloc() assert(bdev->get_size()); alloc = Allocator::create(cct, cct->_conf->bluestore_allocator, bdev->get_size(), - min_min_alloc_size); + min_alloc_size); uint64_t num = 0, bytes = 0; // initialize from freelist @@ -4116,11 +4101,29 @@ int BlueStore::mkfs() t->set(PREFIX_SUPER, "nid_max", bl); t->set(PREFIX_SUPER, "blobid_max", bl); } + + // choose min_alloc_size + if (cct->_conf->bluestore_min_alloc_size) { + min_alloc_size = cct->_conf->bluestore_min_alloc_size; + } else { + assert(bdev); + if (bdev->is_rotational()) { + min_alloc_size = cct->_conf->bluestore_min_alloc_size_hdd; + } else { + min_alloc_size = cct->_conf->bluestore_min_alloc_size_ssd; + } + } + _set_alloc_sizes(); + { + bufferlist bl; + ::encode(min_alloc_size, bl); + t->set(PREFIX_SUPER, "min_alloc_size", bl); + } + ondisk_format = latest_ondisk_format; _prepare_ondisk_format_super(t); db->submit_transaction_sync(t); } - _save_min_min_alloc_size(min_alloc_size); r = _open_alloc(); if (r < 0) @@ -6335,37 +6338,6 @@ void BlueStore::_prepare_ondisk_format_super(KeyValueDB::Transaction& t) } } -void BlueStore::_save_min_min_alloc_size(uint64_t new_val) -{ - assert(new_val > 0); - if (new_val == min_min_alloc_size && min_min_alloc_size > 0) { - return; - } - - if (new_val > min_min_alloc_size && min_min_alloc_size > 0) { - derr << "warning: bluestore_min_alloc_size " - << new_val << " > min_min_alloc_size " << min_min_alloc_size << "," - << " may impact performance." << dendl; - return; - } - - if (new_val < min_min_alloc_size && min_min_alloc_size > 0) { - derr << "warning: bluestore_min_alloc_size value decreased from " - << min_min_alloc_size << " to " << new_val << "." - << " Decreased value could have performance impact." << dendl; - } - - - KeyValueDB::Transaction t = db->get_transaction(); - { - bufferlist bl; - encode(new_val, bl); - t->set(PREFIX_SUPER, "min_min_alloc_size", bl); - db->submit_transaction_sync(t); - } - min_min_alloc_size = new_val; -} - int BlueStore::_open_super_meta() { // nid @@ -6418,21 +6390,6 @@ int BlueStore::_open_super_meta() } } - // Min min_alloc_size - { - bufferlist bl; - min_min_alloc_size = 0; - db->get(PREFIX_SUPER, "min_min_alloc_size", &bl); - bufferlist::iterator p = bl.begin(); - try { - ::decode(min_min_alloc_size, p); - } catch (buffer::error& e) { - } - - _save_min_min_alloc_size(min_alloc_size); - dout(10) << __func__ << " min_min_alloc_size " << min_min_alloc_size << dendl; - } - // bluefs alloc { bluefs_extents.clear(); @@ -6499,7 +6456,21 @@ int BlueStore::_open_super_meta() } } + { + bufferlist bl; + db->get(PREFIX_SUPER, "min_alloc_size", &bl); + auto p = bl.begin(); + try { + ::decode(min_alloc_size, p); + } catch (buffer::error& e) { + derr << __func__ << " unable to read min_alloc_size" << dendl; + return -EIO; + } + dout(10) << __func__ << " min_alloc_size 0x" << std::hex << min_alloc_size + << std::dec << dendl; + } _set_alloc_sizes(); + return 0; } @@ -6515,7 +6486,22 @@ int BlueStore::_upgrade_super() // - super: added ondisk_format // - super: added min_readable_ondisk_format // - super: added min_compat_ondisk_format + // - super: added min_alloc_size + // - super: removed min_min_alloc_size KeyValueDB::Transaction t = db->get_transaction(); + { + bufferlist bl; + db->get(PREFIX_SUPER, "min_min_alloc_size", &bl); + auto p = bl.begin(); + try { + ::decode(min_alloc_size, p); + } catch (buffer::error& e) { + derr << __func__ << " failed to read min_min_alloc_size" << dendl; + return -EIO; + } + t->set(PREFIX_SUPER, "min_alloc_size", bl); + t->rmkey(PREFIX_SUPER, "min_min_alloc_size"); + } ondisk_format = 2; _prepare_ondisk_format_super(t); int r = db->submit_transaction_sync(t); diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index ca890f593b00..a3d8192bf775 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -1602,7 +1602,6 @@ private: size_t block_size_order = 0; ///< bits to shift to get block size uint64_t min_alloc_size = 0; ///< minimum allocation unit (power of 2) - uint64_t min_min_alloc_size = 0; ///< minimum seen min_alloc_size size_t min_alloc_size_order = 0; ///< bits for min_alloc_size uint64_t max_alloc_size = 0; ///< maximum allocation unit (power of 2) @@ -1687,7 +1686,6 @@ private: int _check_or_set_bdev_label(string path, uint64_t size, string desc, bool create); - void _save_min_min_alloc_size(uint64_t new_val); int _open_super_meta(); int _reconcile_bluefs_freespace();