From: Sage Weil Date: Mon, 25 Apr 2016 04:04:09 +0000 (-0400) Subject: os/bluestore: separate fm and alloc lifecycles X-Git-Tag: v11.0.0~643^2~7 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=cfa9055310c872f47728fa8e1fd07920d6e1e141;p=ceph.git os/bluestore: separate fm and alloc lifecycles Set up the freelist before the allocator, so that we can initialize the freespace before feeding that info to the allocator. Signed-off-by: Sage Weil --- diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 14a777b81cd..094a4dd3a8a 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -967,18 +967,83 @@ void BlueStore::_close_bdev() bdev = NULL; } -int BlueStore::_open_alloc() +int BlueStore::_open_fm(bool create) { assert(fm == NULL); - assert(alloc == NULL); - fm = FreelistManager::create(freelist_type); - int r = fm->init(db, PREFIX_ALLOC); + fm = FreelistManager::create(freelist_type, db, PREFIX_ALLOC); + + if (create) { + // initialize freespace + dout(20) << __func__ << " initializing freespace" << dendl; + KeyValueDB::Transaction t = db->get_transaction(); + { + bufferlist bl; + bl.append(freelist_type); + t->set(PREFIX_SUPER, "freelist_type", bl); + } + fm->create(bdev->get_size(), t); + + uint64_t reserved = 0; + if (g_conf->bluestore_bluefs) { + assert(bluefs_extents.num_intervals() == 1); + interval_set::iterator p = bluefs_extents.begin(); + reserved = p.get_start() + p.get_len(); + dout(20) << __func__ << " reserved " << reserved << " for bluefs" << dendl; + bufferlist bl; + ::encode(bluefs_extents, bl); + t->set(PREFIX_SUPER, "bluefs_extents", bl); + dout(20) << __func__ << " bluefs_extents " << bluefs_extents << dendl; + } else { + reserved = BLUEFS_START; + } + fm->allocate(0, reserved, t); + + if (g_conf->bluestore_debug_prefill > 0) { + uint64_t end = bdev->get_size() - reserved; + dout(1) << __func__ << " pre-fragmenting freespace, using " + << g_conf->bluestore_debug_prefill << " with max free extent " + << g_conf->bluestore_debug_prefragment_max << dendl; + uint64_t min_alloc_size = g_conf->bluestore_min_alloc_size; + uint64_t start = ROUND_UP_TO(reserved, min_alloc_size); + uint64_t max_b = g_conf->bluestore_debug_prefragment_max / min_alloc_size; + float r = g_conf->bluestore_debug_prefill; + while (start < end) { + uint64_t l = (rand() % max_b + 1) * min_alloc_size; + if (start + l > end) + l = end - start; + l = ROUND_UP_TO(l, min_alloc_size); + uint64_t u = 1 + (uint64_t)(r * (double)l / (1.0 - r)); + u = ROUND_UP_TO(u, min_alloc_size); + dout(20) << " free " << start << "~" << l << " use " << u << dendl; + fm->allocate(start + l, u, t); + start += l + u; + } + } + db->submit_transaction_sync(t); + } + + int r = fm->init(); if (r < 0) { + derr << __func__ << " freelist init failed: " << cpp_strerror(r) << dendl; delete fm; fm = NULL; return r; } + return 0; +} + +void BlueStore::_close_fm() +{ + dout(10) << __func__ << dendl; + assert(fm); + fm->shutdown(); + delete fm; + fm = NULL; +} +int BlueStore::_open_alloc() +{ + assert(alloc == NULL); alloc = Allocator::create("stupid"); uint64_t num = 0, bytes = 0; fm->enumerate_reset(); @@ -991,19 +1056,15 @@ int BlueStore::_open_alloc() dout(10) << __func__ << " loaded " << pretty_si_t(bytes) << " in " << num << " extents" << dendl; - return r; + return 0; } void BlueStore::_close_alloc() { - assert(fm); assert(alloc); alloc->shutdown(); delete alloc; alloc = NULL; - fm->shutdown(); - delete fm; - fm = NULL; } int BlueStore::_open_fsid(bool create) @@ -1744,59 +1805,13 @@ int BlueStore::mkfs() if (r < 0) goto out_close_bdev; - r = _open_alloc(); + r = _open_fm(true); if (r < 0) goto out_close_db; - // initialize freespace - { - dout(20) << __func__ << " initializing freespace" << dendl; - KeyValueDB::Transaction t = db->get_transaction(); - { - bufferlist bl; - bl.append(freelist_type); - t->set(PREFIX_SUPER, "freelist_type", bl); - } - fm->create(bdev->get_size(), t); - - uint64_t reserved = 0; - if (g_conf->bluestore_bluefs) { - assert(bluefs_extents.num_intervals() == 1); - interval_set::iterator p = bluefs_extents.begin(); - reserved = p.get_start() + p.get_len(); - dout(20) << __func__ << " reserved " << reserved << " for bluefs" << dendl; - bufferlist bl; - ::encode(bluefs_extents, bl); - t->set(PREFIX_SUPER, "bluefs_extents", bl); - dout(20) << __func__ << " bluefs_extents " << bluefs_extents << dendl; - } else { - reserved = BLUEFS_START; - } - fm->allocate(0, reserved, t); - - if (g_conf->bluestore_debug_prefill > 0) { - uint64_t end = bdev->get_size() - reserved; - dout(1) << __func__ << " pre-fragmenting freespace, using " - << g_conf->bluestore_debug_prefill << " with max free extent " - << g_conf->bluestore_debug_prefragment_max << dendl; - uint64_t min_alloc_size = g_conf->bluestore_min_alloc_size; - uint64_t start = ROUND_UP_TO(reserved, min_alloc_size); - uint64_t max_b = g_conf->bluestore_debug_prefragment_max / min_alloc_size; - float r = g_conf->bluestore_debug_prefill; - while (start < end) { - uint64_t l = (rand() % max_b + 1) * min_alloc_size; - if (start + l > end) - l = end - start; - l = ROUND_UP_TO(l, min_alloc_size); - uint64_t u = 1 + (uint64_t)(r * (double)l / (1.0 - r)); - u = ROUND_UP_TO(u, min_alloc_size); - dout(20) << " free " << start << "~" << l << " use " << u << dendl; - fm->allocate(start + l, u, t); - start += l + u; - } - } - assert(0 == db->submit_transaction_sync(t)); - } + r = _open_alloc(); + if (r < 0) + goto out_close_fm; r = write_meta("kv_backend", g_conf->bluestore_kvbackend); if (r < 0) @@ -1821,6 +1836,8 @@ int BlueStore::mkfs() out_close_alloc: _close_alloc(); + out_close_fm: + _close_fm(); out_close_db: _close_db(); out_close_bdev: @@ -1887,10 +1904,14 @@ int BlueStore::mount() if (r < 0) goto out_db; - r = _open_alloc(); + r = _open_fm(false); if (r < 0) goto out_db; + r = _open_alloc(); + if (r < 0) + goto out_fm; + r = _open_collections(); if (r < 0) goto out_alloc; @@ -1922,6 +1943,8 @@ int BlueStore::mount() coll_map.clear(); out_alloc: _close_alloc(); + out_fm: + _close_fm(); out_db: _close_db(); out_bdev: @@ -1956,6 +1979,7 @@ int BlueStore::umount() mounted = false; _close_alloc(); + _close_fm(); _close_db(); _close_bdev(); _close_fsid(); @@ -2047,13 +2071,17 @@ int BlueStore::fsck() if (r < 0) goto out_bdev; - r = _open_alloc(); + r = _open_super_meta(); if (r < 0) goto out_db; - r = _open_super_meta(); + r = _open_fm(false); if (r < 0) - goto out_alloc; + goto out_db; + + r = _open_alloc(); + if (r < 0) + goto out_fm; r = _open_collections(&errors); if (r < 0) @@ -2392,6 +2420,8 @@ int BlueStore::fsck() coll_map.clear(); out_alloc: _close_alloc(); + out_fm: + _close_fm(); out_db: it.reset(); // before db is closed _close_db(); diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index bc7b9bdd0eb..6f45c97923e 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -577,6 +577,8 @@ private: void _close_bdev(); int _open_db(bool create); void _close_db(); + int _open_fm(bool create); + void _close_fm(); int _open_alloc(); void _close_alloc(); int _open_collections(int *errors=0); diff --git a/src/os/bluestore/ExtentFreelistManager.cc b/src/os/bluestore/ExtentFreelistManager.cc index 201d0886e32..97de391b289 100644 --- a/src/os/bluestore/ExtentFreelistManager.cc +++ b/src/os/bluestore/ExtentFreelistManager.cc @@ -11,17 +11,15 @@ #undef dout_prefix #define dout_prefix *_dout << "freelist " -int ExtentFreelistManager::init(KeyValueDB *db, string p) +int ExtentFreelistManager::init() { - dout(1) << __func__ << " prefix " << p << dendl; + dout(1) << __func__ << dendl; // load state from kvstore - prefix = p; - - KeyValueDB::Transaction txn = db->get_transaction(); + KeyValueDB::Transaction txn = kvdb->get_transaction(); int fixed = 0; - KeyValueDB::Iterator it = db->get_iterator(prefix); + KeyValueDB::Iterator it = kvdb->get_iterator(prefix); it->lower_bound(string()); uint64_t last_offset = 0; uint64_t last_length = 0; @@ -70,7 +68,7 @@ int ExtentFreelistManager::init(KeyValueDB *db, string p) } if (fixed) { - db->submit_transaction_sync(txn); + kvdb->submit_transaction_sync(txn); derr << " fixed " << fixed << " extents" << dendl; } diff --git a/src/os/bluestore/ExtentFreelistManager.h b/src/os/bluestore/ExtentFreelistManager.h index cbd296990a5..71a2ccc3d2a 100644 --- a/src/os/bluestore/ExtentFreelistManager.h +++ b/src/os/bluestore/ExtentFreelistManager.h @@ -13,6 +13,7 @@ #include "include/cpp-btree/btree_map.h" class ExtentFreelistManager : public FreelistManager { + KeyValueDB *kvdb; std::string prefix; std::mutex lock; uint64_t total_free; @@ -28,11 +29,13 @@ class ExtentFreelistManager : public FreelistManager { void _dump(); public: - ExtentFreelistManager() : + ExtentFreelistManager(KeyValueDB *kvdb, std::string prefix) : + kvdb(kvdb), + prefix(prefix), total_free(0) { } - int init(KeyValueDB *kvdb, std::string prefix) override; + int init() override; void shutdown() override; void dump() override; diff --git a/src/os/bluestore/FreelistManager.cc b/src/os/bluestore/FreelistManager.cc index 1b6167d9fd0..3dc5aa5002e 100644 --- a/src/os/bluestore/FreelistManager.cc +++ b/src/os/bluestore/FreelistManager.cc @@ -4,9 +4,12 @@ #include "FreelistManager.h" #include "ExtentFreelistManager.h" -FreelistManager *FreelistManager::create(string type) +FreelistManager *FreelistManager::create( + string type, + KeyValueDB *kvdb, + string prefix) { if (type == "extent") - return new ExtentFreelistManager; + return new ExtentFreelistManager(kvdb, prefix); return NULL; } diff --git a/src/os/bluestore/FreelistManager.h b/src/os/bluestore/FreelistManager.h index caaa5a5a542..aa58cc40731 100644 --- a/src/os/bluestore/FreelistManager.h +++ b/src/os/bluestore/FreelistManager.h @@ -15,14 +15,17 @@ public: FreelistManager() {} virtual ~FreelistManager() {} - static FreelistManager *create(string type); + static FreelistManager *create( + string type, + KeyValueDB *db, + string prefix); virtual int create(uint64_t size, KeyValueDB::Transaction txn) { release(0, size, txn); return 0; } - virtual int init(KeyValueDB *kvdb, std::string prefix) = 0; + virtual int init() = 0; virtual void shutdown() = 0; virtual void dump() = 0;