From db9e6ce17ad0ae17318acecf838befb222486597 Mon Sep 17 00:00:00 2001 From: Igor Fedotov Date: Mon, 20 Jan 2020 18:53:31 +0300 Subject: [PATCH] os/bluestore: do per-pool omap repair in shallow fsck mode Signed-off-by: Igor Fedotov --- src/os/bluestore/BlueStore.cc | 251 +++++++++++++++++----------------- src/os/bluestore/BlueStore.h | 14 +- 2 files changed, 134 insertions(+), 131 deletions(-) diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index fc81dcabe6cdd..a9c72ba8f8c98 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -6091,6 +6091,20 @@ void BlueStore::_fsck_collections(int64_t* errors) } } +void BlueStore::_set_per_pool_omap() +{ + per_pool_omap = false; + bufferlist bl; + db->get(PREFIX_SUPER, "per_pool_omap", &bl); + if (bl.length()) { + per_pool_omap = true; + dout(10) << __func__ << " per_pool_omap=1" << dendl; + } else { + dout(10) << __func__ << " per_pool_omap not present" << dendl; + } + _check_no_per_pool_omap_alert(); +} + void BlueStore::_open_statfs() { osd_pools.clear(); @@ -6954,16 +6968,23 @@ int BlueStore::_mount(bool kv_only, bool open_db) mempool_thread.init(); - if (!per_pool_stat_collection && + if ((!per_pool_stat_collection || !per_pool_omap) && cct->_conf->bluestore_fsck_quick_fix_on_mount == true) { + + bool was_per_pool_omap = per_pool_omap; + dout(1) << __func__ << " quick-fix on mount" << dendl; _fsck_on_open(FSCK_SHALLOW, true); //reread statfs //FIXME minor: replace with actual open/close? _open_statfs(); - _check_legacy_statfs_alert(); + + //set again as hopefully it has been fixed + if (!was_per_pool_omap) { + _set_per_pool_omap(); + } } mounted = true; @@ -7418,6 +7439,11 @@ BlueStore::OnodeRef BlueStore::fsck_check_objects_shallow( *res_statfs); } } // for (auto& i : ref_map) + + if (o->onode.has_omap()) { + _fsck_check_object_omap(depth, o, ctx); + } + return o; } @@ -7541,9 +7567,7 @@ public: batch->num_sharded_objects, batch->num_spanning_blobs, nullptr, // used_blocks - nullptr, // used_omap_head; - nullptr, // used_per_pool_omap_head; - nullptr, // used_pgmeta_omap_head; + nullptr, //used_omap_head sb_info_lock, *sb_info, batch->expected_store_statfs, @@ -7654,6 +7678,7 @@ public: ctx.num_blobs += batch.num_blobs; ctx.num_sharded_objects += batch.num_sharded_objects; ctx.num_spanning_blobs += batch.num_spanning_blobs; + ctx.expected_store_statfs.add(batch.expected_store_statfs); for (auto it = batch.expected_pool_statfs.begin(); @@ -7666,20 +7691,87 @@ public: }; }; -void BlueStore::_fsck_check_objects(FSCKDepth depth, - BlueStore::FSCK_ObjectCtx& ctx) +void BlueStore::_fsck_check_object_omap(FSCKDepth depth, + OnodeRef& o, + const BlueStore::FSCK_ObjectCtx& ctx) { - //no need for the below lock when in non-shallow mode as - // there is no multithreading in this case - if (depth != FSCK_SHALLOW) { - ctx.sb_info_lock = nullptr; + auto& errors = ctx.errors; + auto& warnings = ctx.warnings; + auto repairer = ctx.repairer; + + ceph_assert(o->onode.has_omap()); + if (!o->onode.is_perpool_omap() && !o->onode.is_pgmeta_omap()) { + if (per_pool_omap) { + derr << "fsck error: " << o->oid + << " has omap that is not per-pool or pgmeta" << dendl; + ++errors; + } else { + const char* w; + if (cct->_conf->bluestore_fsck_error_on_no_per_pool_omap) { + ++errors; + w = "error"; + } else { + ++warnings; + w = "warning"; + } + //FIXME + dout(10) << "fsck " << w << ": " << o->oid + << " has omap that is not per-pool or pgmeta" << dendl; + } + } + if (repairer && + o->onode.has_omap() && + !o->onode.is_perpool_omap() && + !o->oid.is_pgmeta()) { + dout(10) << "fsck converting " << o->oid << " omap to per-pool" << dendl; + bufferlist h; + map kv; + int r = _omap_get(o->c, o->oid, &h, &kv); + if (r < 0) { + derr << " got " << r << " " << cpp_strerror(r) << dendl; + } else { + KeyValueDB::Transaction txn = db->get_transaction(); + // remove old keys + const string& old_omap_prefix = o->get_omap_prefix(); + string old_head, old_tail; + o->get_omap_header(&old_head); + o->get_omap_tail(&old_tail); + txn->rm_range_keys(old_omap_prefix, old_head, old_tail); + txn->rmkey(old_omap_prefix, old_tail); + // set flag + o->onode.set_flag(bluestore_onode_t::FLAG_PERPOOL_OMAP); + _record_onode(o, txn); + const string& new_omap_prefix = o->get_omap_prefix(); + // head + if (h.length()) { + string new_head; + o->get_omap_header(&new_head); + txn->set(new_omap_prefix, new_head, h); + } + // tail + string new_tail; + o->get_omap_tail(&new_tail); + bufferlist empty; + txn->set(new_omap_prefix, new_tail, empty); + // values + string final_key; + o->get_omap_key(string(), &final_key); + size_t base_key_len = final_key.size(); + for (auto& i : kv) { + final_key.resize(base_key_len); + final_key += i.first; + txn->set(new_omap_prefix, final_key, i.second); + } + db->submit_transaction_sync(txn); + repairer->inc_repaired(); + } } +} +void BlueStore::_fsck_check_objects(FSCKDepth depth, + BlueStore::FSCK_ObjectCtx& ctx) +{ auto& errors = ctx.errors; - auto& warnings = ctx.warnings; - auto used_omap_head = ctx.used_omap_head; - auto used_per_pool_omap_head = ctx.used_per_pool_omap_head; - auto used_pgmeta_omap_head = ctx.used_pgmeta_omap_head; auto sb_info_lock = ctx.sb_info_lock; auto& sb_info = ctx.sb_info; auto repairer = ctx.repairer; @@ -7887,91 +7979,15 @@ void BlueStore::_fsck_check_objects(FSCKDepth depth, } // omap if (o->onode.has_omap()) { - ceph_assert(used_omap_head); - ceph_assert(used_per_pool_omap_head); - ceph_assert(used_pgmeta_omap_head); - auto m = - o->onode.is_pgmeta_omap() ? used_pgmeta_omap_head : - (o->onode.is_perpool_omap() ? used_per_pool_omap_head : used_omap_head); - if (m->count(o->onode.nid)) { - derr << "fsck error: " << oid << " omap_head " << o->onode.nid - << " already in use" << dendl; + ceph_assert(ctx.used_omap_head); + if (ctx.used_omap_head->count(o->onode.nid)) { + derr << "fsck error: " << o->oid << " omap_head " << o->onode.nid + << " already in use" << dendl; ++errors; + } else { + ctx.used_omap_head->insert(o->onode.nid); } - else { - m->insert(o->onode.nid); - } - if (!o->onode.is_perpool_omap() && !o->onode.is_pgmeta_omap()) { - if (per_pool_omap) { - derr << "fsck error: " << oid - << " has omap that is not per-pool or pgmeta" << dendl; - ++errors; - } - else { - const char* w; - if (cct->_conf->bluestore_fsck_error_on_no_per_pool_omap) { - ++errors; - w = "error"; - } - else { - ++warnings; - w = "warning"; - } - derr << "fsck " << w << ": " << oid - << " has omap that is not per-pool or pgmeta" << dendl; - } - } - if (repairer && - o->onode.has_omap() && - !o->onode.is_perpool_omap() && - !o->oid.is_pgmeta()) { - derr << "fsck converting " << oid << " omap to per-pool" << dendl; - used_omap_head->erase(o->onode.nid); - used_per_pool_omap_head->insert(o->onode.nid); - bufferlist h; - map kv; - int r = _omap_get(c.get(), oid, &h, &kv); - if (r < 0) { - derr << " got " << r << " " << cpp_strerror(r) << dendl; - } - else { - KeyValueDB::Transaction txn = db->get_transaction(); - // remove old keys - const string& old_omap_prefix = o->get_omap_prefix(); - string old_head, old_tail; - o->get_omap_header(&old_head); - o->get_omap_tail(&old_tail); - txn->rm_range_keys(old_omap_prefix, old_head, old_tail); - txn->rmkey(old_omap_prefix, old_tail); - // set flag - o->onode.set_flag(bluestore_onode_t::FLAG_PERPOOL_OMAP); - _record_onode(o, txn); - const string& new_omap_prefix = o->get_omap_prefix(); - // head - if (h.length()) { - string new_head; - o->get_omap_header(&new_head); - txn->set(new_omap_prefix, new_head, h); - } - // tail - string new_tail; - o->get_omap_tail(&new_tail); - bufferlist empty; - txn->set(new_omap_prefix, new_tail, empty); - // values - string final_key; - o->get_omap_key(string(), &final_key); - size_t base_key_len = final_key.size(); - for (auto& i : kv) { - final_key.resize(base_key_len); - final_key += i.first; - txn->set(new_omap_prefix, final_key, i.second); - } - db->submit_transaction_sync(txn); - repairer->inc_repaired(); - } - } - } // if (depth != FSCK_SHALLOW && o->onode.has_omap()) + } // if (o->onode.has_omap()) if (depth == FSCK_DEEP) { bufferlist bl; uint64_t max_read_block = cct->_conf->bluestore_fsck_read_bytes_cap; @@ -8130,8 +8146,6 @@ int BlueStore::_fsck_on_open(BlueStore::FSCKDepth depth, bool repair) unsigned repaired = 0; uint64_t_btree_t used_omap_head; - uint64_t_btree_t used_per_pool_omap_head; - uint64_t_btree_t used_pgmeta_omap_head; uint64_t_btree_t used_sbids; mempool_dynamic_bitset used_blocks; @@ -8260,15 +8274,15 @@ int BlueStore::_fsck_on_open(BlueStore::FSCKDepth depth, bool repair) num_spanning_blobs, &used_blocks, &used_omap_head, - &used_per_pool_omap_head, - &used_pgmeta_omap_head, - &sb_info_lock, + //no need for the below lock when in non-shallow mode as + // there is no multithreading in this case + depth == FSCK_SHALLOW ? &sb_info_lock : nullptr, sb_info, expected_store_statfs, expected_pool_statfs, repair ? &repairer : nullptr); - _fsck_check_objects(depth, - ctx); + + _fsck_check_objects(depth, ctx); } dout(1) << __func__ << " checking shared_blobs" << dendl; @@ -8592,7 +8606,7 @@ int BlueStore::_fsck_on_open(BlueStore::FSCKDepth depth, bool repair) errors, warnings, repair ? &repairer : nullptr); if (depth != FSCK_SHALLOW) { - dout(1) << __func__ << " checking for stray omap data" << dendl; + dout(1) << __func__ << " checking for stray omap data " << used_omap_head.size() << dendl; it = db->get_iterator(PREFIX_OMAP); if (it) { uint64_t last_omap_head = 0; @@ -8601,7 +8615,7 @@ int BlueStore::_fsck_on_open(BlueStore::FSCKDepth depth, bool repair) _key_decode_u64(it->key().c_str(), &omap_head); if (used_omap_head.count(omap_head) == 0 && omap_head != last_omap_head) { - derr << "fsck error: found stray omap data on omap_head " + dout(10) << "fsck error: found stray omap data on omap_head " << omap_head << dendl; ++errors; last_omap_head = omap_head; @@ -8614,9 +8628,9 @@ int BlueStore::_fsck_on_open(BlueStore::FSCKDepth depth, bool repair) for (it->lower_bound(string()); it->valid(); it->next()) { uint64_t omap_head; _key_decode_u64(it->key().c_str(), &omap_head); - if (used_pgmeta_omap_head.count(omap_head) == 0 && + if (used_omap_head.count(omap_head) == 0 && omap_head != last_omap_head) { - derr << "fsck error: found stray (pgmeta) omap data on omap_head " + dout(10) << "fsck error: found stray (pgmeta) omap data on omap_head " << omap_head << dendl; last_omap_head = omap_head; ++errors; @@ -8633,9 +8647,9 @@ int BlueStore::_fsck_on_open(BlueStore::FSCKDepth depth, bool repair) const char *c = k.c_str(); c = _key_decode_u64(c, &pool); c = _key_decode_u64(c, &omap_head); - if (used_per_pool_omap_head.count(omap_head) == 0 && + if (used_omap_head.count(omap_head) == 0 && omap_head != last_omap_head) { - derr << "fsck error: found stray (per-pool) omap data on omap_head " + dout(10) << "fsck error: found stray (per-pool) omap data on omap_head " << omap_head << dendl; ++errors; last_omap_head = omap_head; @@ -8759,8 +8773,7 @@ int BlueStore::_fsck_on_open(BlueStore::FSCKDepth depth, bool repair) } } if (repair) { - if (!per_pool_omap && - depth != FSCK_SHALLOW) { + if (!per_pool_omap) { dout(5) << __func__ << " marking per_pool_omap=1" << dendl; repairer.fix_per_pool_omap(db); } @@ -10830,17 +10843,7 @@ int BlueStore::_open_super_meta() << std::dec << dendl; } - { - bufferlist bl; - db->get(PREFIX_SUPER, "per_pool_omap", &bl); - if (bl.length()) { - per_pool_omap = true; - dout(10) << __func__ << " per_pool_omap=1" << dendl; - } else { - dout(10) << __func__ << " per_pool_omap not present" << dendl; - } - _check_no_per_pool_omap_alert(); - } + _set_per_pool_omap(); _open_statfs(); _set_alloc_sizes(); diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index 0aef90edeb4ac..6cba03d32e649 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -2229,6 +2229,7 @@ private: void _set_alloc_sizes(); void _set_blob_size(); void _set_finisher_num(); + void _set_per_pool_omap(); void _update_osd_memory_options(); int _open_bdev(bool create); @@ -3237,8 +3238,6 @@ public: mempool_dynamic_bitset* used_blocks; uint64_t_btree_t* used_omap_head; - uint64_t_btree_t* used_per_pool_omap_head; - uint64_t_btree_t* used_pgmeta_omap_head; ceph::mutex* sb_info_lock; sb_info_map_t& sb_info; @@ -3256,8 +3255,6 @@ public: uint64_t& _num_spanning_blobs, mempool_dynamic_bitset* _ub, uint64_t_btree_t* _used_omap_head, - uint64_t_btree_t* _used_per_pool_omap_head, - uint64_t_btree_t* _used_pgmeta_omap_head, ceph::mutex* _sb_info_lock, sb_info_map_t& _sb_info, store_statfs_t& _store_statfs, @@ -3272,8 +3269,6 @@ public: num_spanning_blobs(_num_spanning_blobs), used_blocks(_ub), used_omap_head(_used_omap_head), - used_per_pool_omap_head(_used_per_pool_omap_head), - used_pgmeta_omap_head(_used_pgmeta_omap_head), sb_info_lock(_sb_info_lock), sb_info(_sb_info), expected_store_statfs(_store_statfs), @@ -3294,6 +3289,10 @@ public: const BlueStore::FSCK_ObjectCtx& ctx); private: + void _fsck_check_object_omap(FSCKDepth depth, + OnodeRef& o, + const BlueStore::FSCK_ObjectCtx& ctx); + void _fsck_check_objects(FSCKDepth depth, FSCK_ObjectCtx& ctx); }; @@ -3483,6 +3482,7 @@ public: ++to_repair_cnt; } } + // In fact this is the only repairer's method which is thread-safe!! void inc_repaired() { ++to_repair_cnt; } @@ -3498,7 +3498,7 @@ public: } private: - unsigned to_repair_cnt = 0; + std::atomic to_repair_cnt = { 0 }; KeyValueDB::Transaction fix_per_pool_omap_txn; KeyValueDB::Transaction fix_fm_leaked_txn; KeyValueDB::Transaction fix_fm_false_free_txn; -- 2.39.5