From 8471cb42029339b2838f8bbe907110e9cc2253a0 Mon Sep 17 00:00:00 2001 From: Igor Fedotov Date: Thu, 21 Dec 2017 16:46:01 +0300 Subject: [PATCH] os/bluestore: implement BlueStore repair Signed-off-by: Igor Fedotov --- src/os/bluestore/BlueStore.cc | 797 ++++++++++++++++--- src/os/bluestore/BlueStore.h | 208 +++++ src/os/bluestore/bluestore_types.h | 6 + src/test/objectstore/store_test.cc | 123 +++ src/test/objectstore/test_bluestore_types.cc | 90 ++- 5 files changed, 1129 insertions(+), 95 deletions(-) diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index b2625476147be..618ad3e45838a 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -5689,11 +5689,13 @@ static void apply(uint64_t off, } int BlueStore::_fsck_check_extents( + const coll_t& cid, const ghobject_t& oid, const PExtentVector& extents, bool compressed, mempool_dynamic_bitset &used_blocks, uint64_t granularity, + BlueStoreRepairer* repairer, store_statfs_t& expected_statfs) { dout(30) << __func__ << " oid " << oid << " extents " << extents << dendl; @@ -5710,18 +5712,27 @@ int BlueStore::_fsck_check_extents( e.offset, e.length, granularity, used_blocks, [&](uint64_t pos, mempool_dynamic_bitset &bs) { assert(pos < bs.size()); - if (bs.test(pos)) - already = true; + if (bs.test(pos)) { + if (repairer) { + repairer->note_misreference( + pos * min_alloc_size, min_alloc_size, !already); + } + if (!already) { + derr << "fsck error: " << oid << " extent " << e + << " or a subset is already allocated (misreferenced)" << dendl; + ++errors; + already = true; + } + } else bs.set(pos); }); - if (already) { - derr << " " << oid << " extent " << e - << " or a subset is already allocated" << dendl; - ++errors; - } + if (repairer) { + repairer->get_space_usage_tracker().set_used( e.offset, e.length, cid, oid); + } + if (e.end() > bdev->get_size()) { - derr << " " << oid << " extent " << e + derr << "fsck error: " << oid << " extent " << e << " past end of block device" << dendl; ++errors; } @@ -5729,13 +5740,50 @@ int BlueStore::_fsck_check_extents( return errors; } +/** +An overview for currently implemented repair logics +performed in fsck in two stages: detection(+preparation) and commit. +Detection stage (in processing order): + (Issue -> Repair action to schedule) + - Detect undecodable keys for Shared Blobs -> Remove + - Detect undecodable records for Shared Blobs -> Remove + (might trigger missed Shared Blob detection below) + - Detect stray records for Shared Blobs -> Remove + - Detect misreferenced pextents -> Fix + Prepare Bloom-like filter to track cid/oid -> pextent + Prepare list of extents that are improperly referenced + Enumerate Onode records that might use 'misreferenced' pextents + (Bloom-like filter applied to reduce computation) + Per each questinable Onode enumerate all blobs and identify broken ones + (i.e. blobs having 'misreferences') + Rewrite each broken blob data by allocating another extents and + copying data there + If blob is shared - unshare it and mark corresponding Shared Blob + for removal + Release previously allocated space + Update Extent Map + - Detect missed Shared Blobs -> Recreate + - Detect undecodable deferred transaction -> Remove + - Detect Freelist Manager's 'false free' entries -> Mark as used + - Detect Freelist Manager's leaked entries -> Mark as free + - Detect statfs inconsistency - Update + Commit stage (separate DB commit per each step): + - Apply leaked FM entries fix + - Apply 'false free' FM entries fix + - Apply 'Remove' actions + - Apply fix for misreference pextents + - Apply Shared Blob recreate + (can be merged with the step above if misreferences were dectected) + - Apply StatFS update +*/ int BlueStore::_fsck(bool deep, bool repair) { dout(1) << __func__ - << (repair ? " fsck" : " repair") + << " <<>>" + << (repair ? " repair" : " check") << (deep ? " (deep)" : " (shallow)") << " start" << dendl; int errors = 0; - int repaired = 0; + unsigned repaired = 0; typedef btree::btree_set< uint64_t,std::less, @@ -5749,10 +5797,13 @@ int BlueStore::_fsck(bool deep, bool repair) KeyValueDB::Iterator it; store_statfs_t expected_statfs, actual_statfs; struct sb_info_t { + coll_t cid; list oids; SharedBlobRef sb; bluestore_extent_ref_map_t ref_map; - bool compressed; + bool compressed = false; + bool passed = false; + bool updated = false; }; mempool::bluestore_fsck::map sb_info; @@ -5763,6 +5814,7 @@ int BlueStore::_fsck(bool deep, bool repair) uint64_t num_shared_blobs = 0; uint64_t num_sharded_objects = 0; uint64_t num_object_shards = 0; + BlueStoreRepairer repairer; utime_t start = ceph_clock_now(); @@ -5822,6 +5874,11 @@ int BlueStore::_fsck(bool deep, bool repair) bs.set(pos); } ); + if (repair) { + repairer.get_space_usage_tracker().init( + bdev->get_size(), + min_alloc_size); + } if (bluefs) { for (auto e = bluefs_extents.begin(); e != bluefs_extents.end(); ++e) { @@ -5914,12 +5971,9 @@ int BlueStore::_fsck(bool deep, bool repair) oid.hobj.pool != (int64_t)pgid.pool() || !c->contains(oid)) { c = nullptr; - for (ceph::unordered_map::iterator p = - coll_map.begin(); - p != coll_map.end(); - ++p) { - if (p->second->contains(oid)) { - c = p->second; + for (auto& p : coll_map) { + if (p.second->contains(oid)) { + c = p.second; break; } } @@ -6103,6 +6157,8 @@ int BlueStore::_fsck(bool deep, bool repair) ++errors; } sb_info_t& sbi = sb_info[i.first->shared_blob->get_sbid()]; + assert(sbi.cid == coll_t() || sbi.cid == c->cid); + sbi.cid = c->cid; sbi.sb = i.first->shared_blob; sbi.oids.push_back(oid); sbi.compressed = blob.is_compressed(); @@ -6112,10 +6168,11 @@ int BlueStore::_fsck(bool deep, bool repair) } } } else { - errors += _fsck_check_extents(oid, blob.get_extents(), + errors += _fsck_check_extents(c->cid, oid, blob.get_extents(), blob.is_compressed(), used_blocks, fm->get_alloc_size(), + repair ? &repairer : nullptr, expected_statfs); } } @@ -6142,6 +6199,7 @@ int BlueStore::_fsck(bool deep, bool repair) } } } + dout(1) << __func__ << " checking shared_blobs" << dendl; it = db->get_iterator(PREFIX_SHARED_BLOB); if (it) { @@ -6151,6 +6209,9 @@ int BlueStore::_fsck(bool deep, bool repair) if (get_key_shared_blob(key, &sbid)) { derr << "fsck error: bad key '" << key << "' in shared blob namespace" << dendl; + if (repair) { + repairer.remove_key(db, PREFIX_SHARED_BLOB, key); + } ++errors; continue; } @@ -6158,6 +6219,9 @@ int BlueStore::_fsck(bool deep, bool repair) if (p == sb_info.end()) { derr << "fsck error: found stray shared blob data for sbid 0x" << std::hex << sbid << std::dec << dendl; + if (repair) { + repairer.remove_key(db, PREFIX_SHARED_BLOB, key); + } ++errors; } else { ++num_shared_blobs; @@ -6165,36 +6229,266 @@ int BlueStore::_fsck(bool deep, bool repair) bluestore_shared_blob_t shared_blob(sbid); bufferlist bl = it->value(); bufferlist::iterator blp = bl.begin(); - decode(shared_blob, blp); + try { + decode(shared_blob, blp); + } catch (buffer::error& e) { + ++errors; + // Force update and don't report as missing + sbi.updated = sbi.passed = true; + + derr << "fsck error: failed to decode Shared Blob" + << pretty_binary_string(it->key()) << dendl; + if (repair) { + dout(20) << __func__ << " undecodable Shared Blob, key:'" + << pretty_binary_string(it->key()) + << "', removing" << dendl; + repairer.remove_key(db, PREFIX_DEFERRED, it->key()); + } + continue; + } dout(20) << __func__ << " " << *sbi.sb << " " << shared_blob << dendl; if (shared_blob.ref_map != sbi.ref_map) { derr << "fsck error: shared blob 0x" << std::hex << sbid - << std::dec << " ref_map " << shared_blob.ref_map - << " != expected " << sbi.ref_map << dendl; + << std::dec << " ref_map " << shared_blob.ref_map + << " != expected " << sbi.ref_map << dendl; + sbi.updated = true; // will update later in repair mode only! ++errors; } PExtentVector extents; for (auto &r : shared_blob.ref_map.ref_map) { extents.emplace_back(bluestore_pextent_t(r.first, r.second.length)); } - errors += _fsck_check_extents(p->second.oids.front(), + errors += _fsck_check_extents(sbi.cid, + p->second.oids.front(), extents, p->second.compressed, used_blocks, fm->get_alloc_size(), + repair ? &repairer : nullptr, expected_statfs); - sb_info.erase(p); + sbi.passed = true; + } + } + } // if (it) + + if (repair && repairer.preprocess_misreference(db)) { + dout(1) << __func__ << " sorting out misreferenced extents" << dendl; + auto& space_tracker = repairer.get_space_usage_tracker(); + auto& misref_extents = repairer.get_misreferences(); + interval_set to_release; + it = db->get_iterator(PREFIX_OBJ); + if (it) { + CollectionRef c; + spg_t pgid; + KeyValueDB::Transaction txn = repairer.get_fix_misreferences_txn(); + bool bypass_rest = false; + for (it->lower_bound(string()); it->valid() && !bypass_rest; + it->next()) { + dout(30) << __func__ << " key " + << pretty_binary_string(it->key()) << dendl; + if (is_extent_shard_key(it->key())) { + continue; + } + + ghobject_t oid; + int r = get_key_object(it->key(), &oid); + if (r < 0 || !space_tracker.is_used(oid)) { + continue; + } + + if (!c || + oid.shard_id != pgid.shard || + oid.hobj.pool != (int64_t)pgid.pool() || + !c->contains(oid)) { + c = nullptr; + for (auto& p : coll_map) { + if (p.second->contains(oid)) { + c = p.second; + break; + } + } + if (!c) { + continue; + } + c->cid.is_pg(&pgid); + } + if (!space_tracker.is_used(c->cid)) { + continue; + } + dout(20) << __func__ << " check misreference for col:" << c->cid + << " obj:" << oid << dendl; + + RWLock::RLocker l(c->lock); + OnodeRef o = c->get_onode(oid, false); + o->extent_map.fault_range(db, 0, OBJECT_MAX_SIZE); + mempool::bluestore_fsck::set blobs; + + for (auto& e : o->extent_map.extent_map) { + blobs.insert(e.blob); + } + bool need_onode_update = false; + bool first_dump = true; + for(auto b : blobs) { + bool broken_blob = false; + auto& pextents = b->dirty_blob().dirty_extents(); + for (auto& e : pextents) { + if (!e.is_valid()) { + continue; + } + // for the sake of simplicity and proper shared blob handling + // always rewrite the whole blob even when it's partially + // misreferenced. + if (misref_extents.intersects(e.offset, e.length)) { + if (first_dump) { + first_dump = false; + _dump_onode(o, 10); + } + broken_blob = true; + break; + } + } + if (!broken_blob) + continue; + bool compressed = b->get_blob().is_compressed(); + need_onode_update = true; + dout(10) << __func__ + << " fix misreferences in oid:" << oid + << " " << *b << dendl; + uint64_t b_off = 0; + PExtentVector pext_to_release; + pext_to_release.reserve(pextents.size()); + // rewriting all valid pextents + for (auto e = pextents.begin(); e != pextents.end(); + b_off += e->length, e++) { + if (!e->is_valid()) { + continue; + } + int r = alloc->reserve(e->length); + if (r != 0) { + derr << __func__ << " failed to reserve 0x" << std::hex << e->length + << " bytes, misreferences repair's incomplete" << std::dec << dendl; + bypass_rest = true; + break; + } + PExtentVector exts; + int64_t alloc_len = alloc->allocate(e->length, min_alloc_size, + 0, 0, &exts); + if (alloc_len < (int64_t)e->length) { + derr << __func__ << " allocate failed on 0x" << std::hex << e->length + << " min_alloc_size 0x" << min_alloc_size << std::dec << dendl; + assert(0 == "allocate failed, wtf"); + } + expected_statfs.allocated += e->length; + if (compressed) { + expected_statfs.compressed_allocated += e->length; + } + bufferlist bl; + IOContext ioc(cct, NULL, true); // allow EIO + r = bdev->read(e->offset, e->length, &bl, &ioc, false); + if (r < 0) { + derr << __func__ << " failed to read from 0x" << std::hex << e->offset + <<"~" << e->length << std::dec << dendl; + assert(0 == "read failed, wtf"); + } + pext_to_release.push_back(*e); + e = pextents.erase(e); + e = pextents.insert(e, exts.begin(), exts.end()); + b->get_blob().map_bl( + b_off, bl, + [&](uint64_t offset, bufferlist& t) { + int r = bdev->write(offset, t, false); + assert(r == 0); + }); + e += exts.size() - 1; + for (auto& p : exts) { + fm->allocate(p.offset, p.length, txn); + } + } // for (auto e = pextents.begin(); e != pextents.end(); e++) { + + if (b->get_blob().is_shared()) { + b->dirty_blob().clear_flag(bluestore_blob_t::FLAG_SHARED); + + auto sb_it = sb_info.find(b->shared_blob->get_sbid()); + assert(sb_it != sb_info.end()); + sb_info_t& sbi = sb_it->second; + + for (auto& r : sbi.ref_map.ref_map) { + expected_statfs.allocated -= r.second.length; + if (sbi.compressed) { + // NB: it's crucial to use compressed flag from sb_info_t + // as we originally used that value while accumulating + // expected_statfs + expected_statfs.compressed_allocated -= r.second.length; + } + } + sbi.updated = sbi.passed = true; + sbi.ref_map.clear(); + + // relying on blob's pextents to decide what to release. + for (auto& p : pext_to_release) { + to_release.insert(p.offset, p.length); + } + } else { + for (auto& p : pext_to_release) { + expected_statfs.allocated -= p.length; + if (compressed) { + expected_statfs.compressed_allocated -= p.length; + } + to_release.insert(p.offset, p.length); + } + } + if (bypass_rest) { + break; + } + } // for(auto b : blobs) + if (need_onode_update) { + o->extent_map.dirty_range(0, OBJECT_MAX_SIZE); + _record_onode(o, txn); + } + } // for (it->lower_bound(string()); it->valid(); it->next()) + + for (auto it = to_release.begin(); it != to_release.end(); ++it) { + dout(10) << __func__ << " release 0x" << std::hex << it.get_start() + << "~" << it.get_len() << std::dec << dendl; + fm->release(it.get_start(), it.get_len(), txn); } - } - } + alloc->release(to_release); + to_release.clear(); + } // if (it) { + } //if (repair && repairer.preprocess_misreference()) { + for (auto &p : sb_info) { - derr << "fsck error: shared_blob 0x" << p.first - << " key is missing (" << *p.second.sb << ")" << dendl; - ++errors; + sb_info_t& sbi = p.second; + if (!sbi.passed) { + derr << "fsck error: missing " << *sbi.sb << dendl; + ++errors; + } + if (repair && (!sbi.passed || sbi.updated)) { + auto sbid = p.first; + if (sbi.ref_map.empty()) { + assert(sbi.passed); + dout(20) << __func__ << " " << *sbi.sb + << " is empty, removing" << dendl; + repairer.fix_shared_blob(db, sbid, nullptr); + } else { + bufferlist bl; + bluestore_shared_blob_t persistent(sbid, std::move(sbi.ref_map)); + encode(persistent, bl); + dout(20) << __func__ << " " << *sbi.sb + << " is " << bl.length() << " bytes, updating" << dendl; + + repairer.fix_shared_blob(db, sbid, &bl); + } + } } + sb_info.clear(); + if (!(actual_statfs == expected_statfs)) { derr << "fsck error: actual " << actual_statfs << " != expected " << expected_statfs << dendl; + if (repair) { + repairer.fix_statfs(db, expected_statfs); + } ++errors; } @@ -6236,8 +6530,13 @@ int BlueStore::_fsck(bool deep, bool repair) } catch (buffer::error& e) { derr << "fsck error: failed to decode deferred txn " << pretty_binary_string(it->key()) << dendl; - r = -EIO; - goto out_scan; + if (repair) { + dout(20) << __func__ << " undecodable deferred TXN record, key: '" + << pretty_binary_string(it->key()) + << "', removing" << dendl; + repairer.remove_key(db, PREFIX_DEFERRED, it->key()); + } + continue; } dout(20) << __func__ << " deferred " << wt.seq << " ops " << wt.ops.size() @@ -6276,36 +6575,40 @@ int BlueStore::_fsck(bool deep, bool repair) [&](uint64_t pos, mempool_dynamic_bitset &bs) { assert(pos < bs.size()); if (bs.test(pos)) { - intersects = true; + if (offset == SUPER_RESERVED && + length == min_alloc_size - SUPER_RESERVED) { + // this is due to the change just after luminous to min_alloc_size + // granularity allocations, and our baked in assumption at the top + // of _fsck that 0~round_up_to(SUPER_RESERVED,min_alloc_size) is used + // (vs luminous's round_up_to(SUPER_RESERVED,block_size)). harmless, + // since we will never allocate this region below min_alloc_size. + dout(10) << __func__ << " ignoring free extent between SUPER_RESERVED" + << " and min_alloc_size, 0x" << std::hex << offset << "~" + << length << dendl; + } else { + intersects = true; + if (repair) { + repairer.fix_false_free(db, fm, + pos * min_alloc_size, + min_alloc_size); + } + } } else { bs.set(pos); } } ); if (intersects) { - if (offset == SUPER_RESERVED && - length == min_alloc_size - SUPER_RESERVED) { - // this is due to the change just after luminous to min_alloc_size - // granularity allocations, and our baked in assumption at the top - // of _fsck that 0~round_up_to(SUPER_RESERVED,min_alloc_size) is used - // (vs luminous's round_up_to(SUPER_RESERVED,block_size)). harmless, - // since we will never allocate this region below min_alloc_size. - dout(10) << __func__ << " ignoring free extent between SUPER_RESERVED" - << " and min_alloc_size, 0x" << std::hex << offset << "~" - << length << dendl; - } else { - derr << "fsck error: free extent 0x" << std::hex << offset - << "~" << length << std::dec - << " intersects allocated blocks" << dendl; - ++errors; - } + derr << "fsck error: free extent 0x" << std::hex << offset + << "~" << length << std::dec + << " intersects allocated blocks" << dendl; + ++errors; } } fm->enumerate_reset(); size_t count = used_blocks.count(); if (used_blocks.size() != count) { assert(used_blocks.size() > count); - ++errors; used_blocks.flip(); size_t start = used_blocks.find_first(); while (start != decltype(used_blocks)::npos) { @@ -6313,10 +6616,17 @@ int BlueStore::_fsck(bool deep, bool repair) while (true) { size_t next = used_blocks.find_next(cur); if (next != cur + 1) { + ++errors; derr << "fsck error: leaked extent 0x" << std::hex << ((uint64_t)start * fm->get_alloc_size()) << "~" << ((cur + 1 - start) * fm->get_alloc_size()) << std::dec << dendl; + if (repair) { + repairer.fix_leaked(db, + fm, + start * min_alloc_size, + (cur + 1 - start) * min_alloc_size); + } start = next; break; } @@ -6326,7 +6636,11 @@ int BlueStore::_fsck(bool deep, bool repair) used_blocks.flip(); } } - + if (repair) { + dout(5) << __func__ << " applying repair results" << dendl; + repaired = repairer.apply(db); + dout(5) << __func__ << " repair applied" << dendl; + } out_scan: mempool_thread.shutdown(); _flush_cache(); @@ -6358,10 +6672,135 @@ int BlueStore::_fsck(bool deep, bool repair) << dendl; utime_t duration = ceph_clock_now() - start; - dout(1) << __func__ << " finish with " << errors << " errors, " << repaired - << " repaired, " << (errors - repaired) << " remaining in " + dout(1) << __func__ << " <<>> with " << errors << " errors, " << repaired + << " repaired, " << (errors - (int)repaired) << " remaining in " << duration << " seconds" << dendl; - return errors - repaired; + return errors - (int)repaired; +} + +/// methods to inject various errors fsck can repair +void BlueStore::inject_broken_shared_blob_key(const string& key, + const bufferlist& bl) +{ + KeyValueDB::Transaction txn; + txn = db->get_transaction(); + txn->set(PREFIX_SHARED_BLOB, key, bl); + db->submit_transaction_sync(txn); +}; + +void BlueStore::inject_leaked(uint64_t len) +{ + KeyValueDB::Transaction txn; + txn = db->get_transaction(); + + PExtentVector exts; + int r = alloc->reserve(len); + assert(r == 0); + int64_t alloc_len = alloc->allocate(len, min_alloc_size, + min_alloc_size * 256, 0, &exts); + assert(alloc_len >= (int64_t)len); + for (auto& p : exts) { + fm->allocate(p.offset, p.length, txn); + } + db->submit_transaction_sync(txn); +} + +void BlueStore::inject_false_free(coll_t cid, ghobject_t oid) +{ + KeyValueDB::Transaction txn; + OnodeRef o; + CollectionRef c = _get_collection(cid); + assert(c); + { + RWLock::WLocker l(c->lock); // just to avoid internal asserts + o = c->get_onode(oid, false); + assert(o); + o->extent_map.fault_range(db, 0, OBJECT_MAX_SIZE); + } + + bool injected = false; + txn = db->get_transaction(); + auto& em = o->extent_map.extent_map; + std::vector v; + if (em.size()) { + v.push_back(&em.begin()->blob->get_blob().get_extents()); + } + if (em.size() > 1) { + auto it = em.end(); + --it; + v.push_back(&(it->blob->get_blob().get_extents())); + } + for (auto pext : v) { + if (pext->size()) { + auto p = pext->begin(); + while (p != pext->end()) { + if (p->is_valid()) { + dout(20) << __func__ << " release 0x" << std::hex << p->offset + << "~" << p->length << std::dec << dendl; + fm->release(p->offset, p->length, txn); + injected = true; + break; + } + ++p; + } + } + } + assert(injected); + db->submit_transaction_sync(txn); +} + +void BlueStore::inject_statfs(const store_statfs_t& new_statfs) +{ + BlueStoreRepairer repairer; + repairer.fix_statfs(db, new_statfs); + repairer.apply(db); +} + +void BlueStore::inject_misreference(coll_t cid1, ghobject_t oid1, + coll_t cid2, ghobject_t oid2, + uint64_t offset) +{ + OnodeRef o1; + CollectionRef c1 = _get_collection(cid1); + assert(c1); + { + RWLock::WLocker l(c1->lock); // just to avoid internal asserts + o1 = c1->get_onode(oid1, false); + assert(o1); + o1->extent_map.fault_range(db, offset, OBJECT_MAX_SIZE); + } + OnodeRef o2; + CollectionRef c2 = _get_collection(cid2); + assert(c2); + { + RWLock::WLocker l(c2->lock); // just to avoid internal asserts + o2 = c2->get_onode(oid2, false); + assert(o2); + o2->extent_map.fault_range(db, offset, OBJECT_MAX_SIZE); + } + Extent& e1 = *(o1->extent_map.seek_lextent(offset)); + Extent& e2 = *(o2->extent_map.seek_lextent(offset)); + + // require onode/extent layout to be the same (and simple) + // to make things easier + assert(o1->onode.extent_map_shards.empty()); + assert(o2->onode.extent_map_shards.empty()); + assert(o1->extent_map.spanning_blob_map.size() == 0); + assert(o2->extent_map.spanning_blob_map.size() == 0); + assert(e1.logical_offset == e2.logical_offset); + assert(e1.length == e2.length); + assert(e1.blob_offset == e2.blob_offset); + + KeyValueDB::Transaction txn; + txn = db->get_transaction(); + + // along with misreference error this will create space leaks errors + e2.blob->dirty_blob() = e1.blob->get_blob(); + o2->extent_map.dirty_range(offset, e2.length); + o2->extent_map.update(txn, false); + + _record_onode(o2, txn); + db->submit_transaction_sync(txn); } void BlueStore::collect_metadata(map *pm) @@ -8042,48 +8481,7 @@ void BlueStore::_txc_write_nodes(TransContext *txc, KeyValueDB::Transaction t) // finalize onodes for (auto o : txc->onodes) { - // finalize extent_map shards - o->extent_map.update(t, false); - if (o->extent_map.needs_reshard()) { - o->extent_map.reshard(db, t); - o->extent_map.update(t, true); - if (o->extent_map.needs_reshard()) { - dout(20) << __func__ << " warning: still wants reshard, check options?" - << dendl; - o->extent_map.clear_needs_reshard(); - } - logger->inc(l_bluestore_onode_reshard); - } - - // bound encode - size_t bound = 0; - denc(o->onode, bound); - o->extent_map.bound_encode_spanning_blobs(bound); - if (o->onode.extent_map_shards.empty()) { - denc(o->extent_map.inline_bl, bound); - } - - // encode - bufferlist bl; - unsigned onode_part, blob_part, extent_part; - { - auto p = bl.get_contiguous_appender(bound, true); - denc(o->onode, p); - onode_part = p.get_logical_offset(); - o->extent_map.encode_spanning_blobs(p); - blob_part = p.get_logical_offset() - onode_part; - if (o->onode.extent_map_shards.empty()) { - denc(o->extent_map.inline_bl, p); - } - extent_part = p.get_logical_offset() - onode_part - blob_part; - } - - dout(20) << __func__ << " onode " << o->oid << " is " << bl.length() - << " (" << onode_part << " bytes onode + " - << blob_part << " bytes spanning blobs + " - << extent_part << " bytes inline extents)" - << dendl; - t->set(PREFIX_OBJ, o->key.c_str(), o->key.size(), bl); + _record_onode(o, t); o->flushing_count++; } @@ -11696,4 +12094,215 @@ void BlueStore::_apply_padding(uint64_t head_pad, } } +void BlueStore::_record_onode(OnodeRef &o, KeyValueDB::Transaction &txn) +{ + // finalize extent_map shards + o->extent_map.update(txn, false); + if (o->extent_map.needs_reshard()) { + o->extent_map.reshard(db, txn); + o->extent_map.update(txn, true); + if (o->extent_map.needs_reshard()) { + dout(20) << __func__ << " warning: still wants reshard, check options?" + << dendl; + o->extent_map.clear_needs_reshard(); + } + logger->inc(l_bluestore_onode_reshard); + } + + // bound encode + size_t bound = 0; + denc(o->onode, bound); + o->extent_map.bound_encode_spanning_blobs(bound); + if (o->onode.extent_map_shards.empty()) { + denc(o->extent_map.inline_bl, bound); + } + + // encode + bufferlist bl; + unsigned onode_part, blob_part, extent_part; + { + auto p = bl.get_contiguous_appender(bound, true); + denc(o->onode, p); + onode_part = p.get_logical_offset(); + o->extent_map.encode_spanning_blobs(p); + blob_part = p.get_logical_offset() - onode_part; + if (o->onode.extent_map_shards.empty()) { + denc(o->extent_map.inline_bl, p); + } + extent_part = p.get_logical_offset() - onode_part - blob_part; + } + + dout(20) << __func__ << " onode " << o->oid << " is " << bl.length() + << " (" << onode_part << " bytes onode + " + << blob_part << " bytes spanning blobs + " + << extent_part << " bytes inline extents)" + << dendl; + + + txn->set(PREFIX_OBJ, o->key.c_str(), o->key.size(), bl); +} + // =========================================== +// BlueStoreRepairer + +size_t BlueStoreRepairer::StoreSpaceTracker::filter_out( + const interval_set& extents) +{ + assert(granularity); // initialized + // can't call for the second time + assert(!was_filtered_out); + assert(collections_bfs.size() == objects_bfs.size()); + + size_t prev_pos = 0; + size_t npos = collections_bfs.size(); + + bloom_vector collections_reduced; + bloom_vector objects_reduced; + + for (auto e : extents) { + if (e.second == 0) { + continue; + } + size_t pos = max(e.first / granularity, prev_pos); + size_t end_pos = 1 + (e.first + e.second - 1) / granularity; + while (pos != npos && pos < end_pos) { + assert( collections_bfs[pos].element_count() == + objects_bfs[pos].element_count()); + if (collections_bfs[pos].element_count()) { + collections_reduced.push_back(std::move(collections_bfs[pos])); + objects_reduced.push_back(std::move(objects_bfs[pos])); + } + ++pos; + } + prev_pos = end_pos; + } + collections_reduced.swap(collections_bfs); + objects_reduced.swap(objects_bfs); + was_filtered_out = true; + return collections_bfs.size(); +} + +bool BlueStoreRepairer::remove_key(KeyValueDB *db, + const string& prefix, + const string& key) +{ + if (!remove_key_txn) { + remove_key_txn = db->get_transaction(); + } + ++to_repair_cnt; + remove_key_txn->rmkey(prefix, key); + + return true; +} + +bool BlueStoreRepairer::fix_shared_blob( + KeyValueDB *db, + uint64_t sbid, + const bufferlist* bl) +{ + KeyValueDB::Transaction txn; + if (fix_misreferences_txn) { // reuse this txn + txn = fix_misreferences_txn; + } else { + if (!fix_shared_blob_txn) { + fix_shared_blob_txn = db->get_transaction(); + } + txn = fix_shared_blob_txn; + } + string key; + get_shared_blob_key(sbid, &key); + + ++to_repair_cnt; + if (bl) { + txn->set(PREFIX_SHARED_BLOB, key, *bl); + } else { + txn->rmkey(PREFIX_SHARED_BLOB, key); + } + return true; +} + +bool BlueStoreRepairer::fix_statfs(KeyValueDB *db, + const store_statfs_t& new_statfs) +{ + if (!fix_statfs_txn) { + fix_statfs_txn = db->get_transaction(); + } + BlueStore::volatile_statfs vstatfs; + vstatfs = new_statfs; + bufferlist bl; + vstatfs.encode(bl); + ++to_repair_cnt; + fix_statfs_txn->set(PREFIX_STAT, "bluestore_statfs", bl); + return true; +} + +bool BlueStoreRepairer::fix_leaked(KeyValueDB *db, + FreelistManager* fm, + uint64_t offset, uint64_t len) +{ + if (!fix_fm_leaked_txn) { + fix_fm_leaked_txn = db->get_transaction(); + } + ++to_repair_cnt; + fm->release(offset, len, fix_fm_leaked_txn); + return true; +} +bool BlueStoreRepairer::fix_false_free(KeyValueDB *db, + FreelistManager* fm, + uint64_t offset, uint64_t len) +{ + if (!fix_fm_false_free_txn) { + fix_fm_false_free_txn = db->get_transaction(); + } + ++to_repair_cnt; + fm->allocate(offset, len, fix_fm_false_free_txn); + return true; +} + +bool BlueStoreRepairer::preprocess_misreference(KeyValueDB *db) +{ + if (misreferenced_extents.size()) { + size_t n = space_usage_tracker.filter_out(misreferenced_extents); + assert(n > 0); + if (!fix_misreferences_txn) { + fix_misreferences_txn = db->get_transaction(); + } + return true; + } + return false; +} + +unsigned BlueStoreRepairer::apply(KeyValueDB* db) +{ + if (fix_fm_leaked_txn) { + db->submit_transaction_sync(fix_fm_leaked_txn); + fix_fm_leaked_txn = nullptr; + } + if (fix_fm_false_free_txn) { + db->submit_transaction_sync(fix_fm_false_free_txn); + fix_fm_false_free_txn = nullptr; + } + if (remove_key_txn) { + db->submit_transaction_sync(remove_key_txn); + remove_key_txn = nullptr; + } + if (fix_misreferences_txn) { + db->submit_transaction_sync(fix_misreferences_txn); + fix_misreferences_txn = nullptr; + } + if (fix_shared_blob_txn) { + db->submit_transaction_sync(fix_shared_blob_txn); + fix_shared_blob_txn = nullptr; + } + + if (fix_statfs_txn) { + db->submit_transaction_sync(fix_statfs_txn); + fix_statfs_txn = nullptr; + } + unsigned repaired = to_repair_cnt; + to_repair_cnt = 0; + return repaired; +} + +// ======================================================= + diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index 746bdc97a8d16..e0da63ffc2d5f 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -33,6 +33,7 @@ #include "include/unordered_map.h" #include "include/memory.h" #include "include/mempool.h" +#include "common/bloom_filter.hpp" #include "common/Finisher.h" #include "common/perf_counters.h" #include "compressor/Compressor.h" @@ -45,6 +46,7 @@ class Allocator; class FreelistManager; class BlueFS; +class BlueStoreRepairer; //#define DEBUG_CACHE //#define DEBUG_DEFERRED @@ -1457,6 +1459,14 @@ public: int64_t& compressed_allocated() { return values[STATFS_COMPRESSED_ALLOCATED]; } + volatile_statfs& operator=(const store_statfs_t& st) { + values[STATFS_ALLOCATED] = st.allocated; + values[STATFS_STORED] = st.stored; + values[STATFS_COMPRESSED_ORIGINAL] = st.compressed_original; + values[STATFS_COMPRESSED] = st.compressed; + values[STATFS_COMPRESSED_ALLOCATED] = st.compressed_allocated; + return *this; + } bool is_empty() { return values[STATFS_ALLOCATED] == 0 && values[STATFS_STORED] == 0 && @@ -2044,11 +2054,13 @@ public: private: int _fsck_check_extents( + const coll_t& cid, const ghobject_t& oid, const PExtentVector& extents, bool compressed, mempool_dynamic_bitset &used_blocks, uint64_t granularity, + BlueStoreRepairer* repairer, store_statfs_t& expected_statfs); void _buffer_cache_write( @@ -2080,6 +2092,8 @@ private: uint64_t tail_pad, bufferlist& padded); + void _record_onode(OnodeRef &o, KeyValueDB::Transaction &txn); + // -- ondisk version --- public: const int32_t latest_ondisk_format = 2; ///< our version @@ -2330,6 +2344,17 @@ public: RWLock::WLocker l(debug_read_error_lock); debug_mdata_error_objects.insert(o); } + + /// methods to inject various errors fsck can repair + void inject_broken_shared_blob_key(const string& key, + const bufferlist& bl); + void inject_leaked(uint64_t len); + void inject_false_free(coll_t cid, ghobject_t oid); + void inject_statfs(const store_statfs_t& new_statfs); + void inject_misreference(coll_t cid1, ghobject_t oid1, + coll_t cid2, ghobject_t oid2, + uint64_t offset); + void compact() override { assert(db); db->compact(); @@ -2633,4 +2658,187 @@ static inline void intrusive_ptr_release(BlueStore::OpSequencer *o) { o->put(); } +class BlueStoreRepairer +{ +public: + // to simplify future potential migration to mempools + using fsck_interval = interval_set; + + // Structure to track what pextents are used for specific cid/oid. + // Similar to Bloom filter positive and false-positive matches are + // possible only. + // Maintains two lists of bloom filters for both cids and oids + // where each list entry is a BF for specific disk pextent + // The length of the extent per filter is measured on init. + // Allows to filter out 'uninteresting' pextents to speadup subsequent + // 'is_used' access. + struct StoreSpaceTracker { + const uint64_t BLOOM_FILTER_SALT_COUNT = 2; + const uint64_t BLOOM_FILTER_TABLE_SIZE = 32; // bytes per single filter + const uint64_t BLOOM_FILTER_EXPECTED_COUNT = 16; // arbitrary selected + static const uint64_t DEF_MEM_CAP = 128 * 1024 * 1024; + + typedef mempool::bluestore_fsck::vector bloom_vector; + bloom_vector collections_bfs; + bloom_vector objects_bfs; + + bool was_filtered_out = false; + uint64_t granularity = 0; // extent length for a single filter + + StoreSpaceTracker() { + } + StoreSpaceTracker(const StoreSpaceTracker& from) : + collections_bfs(from.collections_bfs), + objects_bfs(from.objects_bfs), + granularity(from.granularity) { + } + + void init(uint64_t total, + uint64_t min_alloc_size, + uint64_t mem_cap = DEF_MEM_CAP) { + assert(!granularity); // not initialized yet + assert(min_alloc_size && isp2(min_alloc_size)); + assert(mem_cap); + + total = ROUND_UP_TO(total, min_alloc_size); + granularity = total * BLOOM_FILTER_TABLE_SIZE * 2 / mem_cap; + + if (!granularity) { + granularity = min_alloc_size; + } else { + granularity = ROUND_UP_TO(granularity, min_alloc_size); + } + + uint64_t entries = P2ROUNDUP(total, granularity) / granularity; + collections_bfs.resize(entries, + bloom_filter(BLOOM_FILTER_SALT_COUNT, + BLOOM_FILTER_TABLE_SIZE, + 0, + BLOOM_FILTER_EXPECTED_COUNT)); + objects_bfs.resize(entries, + bloom_filter(BLOOM_FILTER_SALT_COUNT, + BLOOM_FILTER_TABLE_SIZE, + 0, + BLOOM_FILTER_EXPECTED_COUNT)); + } + inline uint32_t get_hash(const coll_t& cid) const { + return cid.hash_to_shard(1); + } + inline void set_used(uint64_t offset, uint64_t len, + const coll_t& cid, const ghobject_t& oid) { + assert(granularity); // initialized + + // can't call this func after filter_out has been apllied + assert(!was_filtered_out); + if (!len) { + return; + } + auto pos = offset / granularity; + auto end_pos = (offset + len - 1) / granularity; + while (pos <= end_pos) { + collections_bfs[pos].insert(get_hash(cid)); + objects_bfs[pos].insert(oid.hobj.get_hash()); + ++pos; + } + } + // filter-out entries unrelated to the specified(broken) extents. + // 'is_used' calls are permitted after that only + size_t filter_out(const fsck_interval& extents); + + // determines if collection's present after filtering-out + inline bool is_used(const coll_t& cid) const { + assert(was_filtered_out); + for(auto& bf : collections_bfs) { + if (bf.contains(get_hash(cid))) { + return true; + } + } + return false; + } + // determines if object's present after filtering-out + inline bool is_used(const ghobject_t& oid) const { + assert(was_filtered_out); + for(auto& bf : objects_bfs) { + if (bf.contains(oid.hobj.get_hash())) { + return true; + } + } + return false; + } + // determines if collection's present before filtering-out + inline bool is_used(const coll_t& cid, uint64_t offs) const { + assert(granularity); // initialized + assert(!was_filtered_out); + auto &bf = collections_bfs[offs / granularity]; + if (bf.contains(get_hash(cid))) { + return true; + } + return false; + } + // determines if object's present before filtering-out + inline bool is_used(const ghobject_t& oid, uint64_t offs) const { + assert(granularity); // initialized + assert(!was_filtered_out); + auto &bf = objects_bfs[offs / granularity]; + if (bf.contains(oid.hobj.get_hash())) { + return true; + } + return false; + } + }; +public: + + bool remove_key(KeyValueDB *db, const string& prefix, const string& key); + bool fix_shared_blob(KeyValueDB *db, + uint64_t sbid, + const bufferlist* bl); + bool fix_statfs(KeyValueDB *db, const store_statfs_t& new_statfs); + + bool fix_leaked(KeyValueDB *db, + FreelistManager* fm, + uint64_t offset, uint64_t len); + bool fix_false_free(KeyValueDB *db, + FreelistManager* fm, + uint64_t offset, uint64_t len); + + void init(uint64_t total_space, uint64_t lres_tracking_unit_size); + + bool preprocess_misreference(KeyValueDB *db); + + unsigned apply(KeyValueDB* db); + + void note_misreference(uint64_t offs, uint64_t len, bool inc_error) { + misreferenced_extents.insert(offs, len); + if (inc_error) { + ++to_repair_cnt; + } + } + + StoreSpaceTracker& get_space_usage_tracker() { + return space_usage_tracker; + } + const fsck_interval& get_misreferences() const { + return misreferenced_extents; + } + KeyValueDB::Transaction get_fix_misreferences_txn() { + return fix_misreferences_txn; + } + +private: + unsigned to_repair_cnt = 0; + KeyValueDB::Transaction fix_fm_leaked_txn; + KeyValueDB::Transaction fix_fm_false_free_txn; + KeyValueDB::Transaction remove_key_txn; + KeyValueDB::Transaction fix_statfs_txn; + KeyValueDB::Transaction fix_shared_blob_txn; + + KeyValueDB::Transaction fix_misreferences_txn; + + StoreSpaceTracker space_usage_tracker; + + // non-shared extents with multiple references + fsck_interval misreferenced_extents; + +}; + #endif diff --git a/src/os/bluestore/bluestore_types.h b/src/os/bluestore/bluestore_types.h index 66d68ab2fcc58..805f10e3aa299 100644 --- a/src/os/bluestore/bluestore_types.h +++ b/src/os/bluestore/bluestore_types.h @@ -444,6 +444,9 @@ public: const PExtentVector& get_extents() const { return extents; } + PExtentVector& dirty_extents() { + return extents; + } DENC_HELPERS; void bound_encode(size_t& p, uint64_t struct_v) const { @@ -852,6 +855,9 @@ struct bluestore_shared_blob_t { bluestore_extent_ref_map_t ref_map; ///< shared blob extents bluestore_shared_blob_t(uint64_t _sbid) : sbid(_sbid) {} + bluestore_shared_blob_t(uint64_t _sbid, + bluestore_extent_ref_map_t&& _ref_map ) + : sbid(_sbid), ref_map(std::move(_ref_map)) {} DENC(bluestore_shared_blob_t, v, p) { DENC_START(1, 1, p); diff --git a/src/test/objectstore/store_test.cc b/src/test/objectstore/store_test.cc index 0f43c6d72e55c..13ef8173bd7be 100644 --- a/src/test/objectstore/store_test.cc +++ b/src/test/objectstore/store_test.cc @@ -6907,6 +6907,129 @@ TEST_P(StoreTestSpecificAUSize, fsckOnUnalignedDevice2) { g_conf->apply_changes(NULL); } +TEST_P(StoreTest, BluestoreRepairTest) { + if (string(GetParam()) != "bluestore") + return; + const size_t offs_base = 65536 / 2; + + g_ceph_context->_conf->set_val("bluestore_fsck_on_mount", "false"); + g_ceph_context->_conf->set_val("bluestore_fsck_on_umount", "false"); + g_ceph_context->_conf->set_val("bluestore_max_blob_size", stringify(2 * offs_base)); + g_ceph_context->_conf->set_val("bluestore_extent_map_shard_max_size", "12000"); + g_ceph_context->_conf->apply_changes(NULL); + + BlueStore* bstore = dynamic_cast (store.get()); + + // fill the store with some data + coll_t cid(spg_t(pg_t(0,555), shard_id_t::NO_SHARD)); + auto ch = store->create_new_collection(cid); + + ghobject_t hoid(hobject_t(sobject_t("Object 1", CEPH_NOSNAP))); + ghobject_t hoid_dup(hobject_t(sobject_t("Object 1(dup)", CEPH_NOSNAP))); + ghobject_t hoid2(hobject_t(sobject_t("Object 2", CEPH_NOSNAP))); + ghobject_t hoid_cloned = hoid2; + hoid_cloned.hobj.snap = 1; + ghobject_t hoid3(hobject_t(sobject_t("Object 3", CEPH_NOSNAP))); + ghobject_t hoid3_cloned = hoid3; + hoid3_cloned.hobj.snap = 1; + bufferlist bl; + bl.append("1234512345"); + int r; + const size_t repeats = 16; + { + cerr << "create collection + write" << std::endl; + ObjectStore::Transaction t; + t.create_collection(cid, 0); + for( auto i = 0ul; i < repeats; ++i ) { + t.write(cid, hoid, i * offs_base, bl.length(), bl); + t.write(cid, hoid_dup, i * offs_base, bl.length(), bl); + } + for( auto i = 0ul; i < repeats; ++i ) { + t.write(cid, hoid2, i * offs_base, bl.length(), bl); + } + t.clone(cid, hoid2, hoid_cloned); + + r = queue_transaction(store, ch, std::move(t)); + ASSERT_EQ(r, 0); + } + + bstore->umount(); + //////////// leaked pextent fix //////////// + cerr << "fix leaked pextents" << std::endl; + ASSERT_EQ(bstore->fsck(false), 0); + ASSERT_EQ(bstore->repair(false), 0); + bstore->mount(); + bstore->inject_leaked(0x30000); + bstore->umount(); + ASSERT_EQ(bstore->fsck(false), 1); + ASSERT_EQ(bstore->repair(false), 0); + ASSERT_EQ(bstore->fsck(false), 0); + + //////////// false free fix //////////// + cerr << "fix false free pextents" << std::endl; + bstore->mount(); + bstore->inject_false_free(cid, hoid); + bstore->umount(); + ASSERT_EQ(bstore->fsck(false), 2); + ASSERT_EQ(bstore->repair(false), 0); + ASSERT_EQ(bstore->fsck(false), 0); + + //////////// verify invalid statfs /////////// + cerr << "fix invalid statfs" << std::endl; + store_statfs_t statfs0, statfs; + bstore->mount(); + ASSERT_EQ(bstore->statfs(&statfs0), 0); + statfs = statfs0; + statfs.allocated += 0x10000; + statfs.stored += 0x10000; + ASSERT_FALSE(statfs0 == statfs); + bstore->inject_statfs(statfs); + bstore->umount(); + + ASSERT_EQ(bstore->fsck(false), 1); + ASSERT_EQ(bstore->repair(false), 0); + ASSERT_EQ(bstore->fsck(false), 0); + ASSERT_EQ(bstore->mount(), 0); + ASSERT_EQ(bstore->statfs(&statfs), 0); + // adjust free space to success in comparison + statfs0.available = statfs.available; + ASSERT_EQ(statfs0, statfs); + + ///////// undecodable shared blob key / stray shared blob records /////// + cerr << "undecodable shared blob key" << std::endl; + bstore->inject_broken_shared_blob_key("undec1", + bufferlist()); + bstore->inject_broken_shared_blob_key("undecodable key 2", + bufferlist()); + bstore->inject_broken_shared_blob_key("undecodable key 3", + bufferlist()); + bstore->umount(); + ASSERT_EQ(bstore->fsck(false), 3); + ASSERT_EQ(bstore->repair(false), 0); + ASSERT_EQ(bstore->fsck(false), 0); + + cerr << "misreferencing" << std::endl; + bstore->mount(); + bstore->inject_misreference(cid, hoid, cid, hoid_dup, 0); + bstore->inject_misreference(cid, hoid, cid, hoid_dup, (offs_base * repeats) / 2); + bstore->inject_misreference(cid, hoid, cid, hoid_dup, offs_base * (repeats -1) ); + + bstore->umount(); + ASSERT_EQ(bstore->fsck(false), 6); + ASSERT_EQ(bstore->repair(false), 0); + + ASSERT_EQ(bstore->fsck(true), 0); + + cerr << "Completing" << std::endl; + bstore->mount(); + g_ceph_context->_conf->set_val("bluestore_fsck_on_mount", "true"); + g_ceph_context->_conf->set_val("bluestore_fsck_on_umount", "true"); + g_ceph_context->_conf->set_val("bluestore_max_alloc_size", "0"); + g_ceph_context->_conf->set_val("bluestore_extent_map_shard_max_size", "1200"); + + g_ceph_context->_conf->apply_changes(NULL); +} + int main(int argc, char **argv) { vector args; argv_to_vec(argc, (const char **)argv, args); diff --git a/src/test/objectstore/test_bluestore_types.cc b/src/test/objectstore/test_bluestore_types.cc index 3aad5f6840849..ab497550da0b8 100644 --- a/src/test/objectstore/test_bluestore_types.cc +++ b/src/test/objectstore/test_bluestore_types.cc @@ -1441,7 +1441,95 @@ TEST(GarbageCollector, BasicTest) em.clear(); old_extents.clear(); } - } +} + +TEST(BlueStoreRepairer, StoreSpaceTracker) +{ + BlueStoreRepairer::StoreSpaceTracker bmap0; + bmap0.init((uint64_t)4096 * 1024 * 1024 * 1024, 0x1000); + ASSERT_EQ(bmap0.granularity, 2 * 1024 * 1024); + ASSERT_EQ(bmap0.collections_bfs.size(), 2048 * 1024); + ASSERT_EQ(bmap0.objects_bfs.size(), 2048 * 1024); + + BlueStoreRepairer::StoreSpaceTracker bmap; + bmap.init(0x2000 * 0x1000 - 1, 0x1000, 512 * 1024); + ASSERT_EQ(bmap.granularity, 0x1000); + ASSERT_EQ(bmap.collections_bfs.size(), 0x2000); + ASSERT_EQ(bmap.objects_bfs.size(), 0x2000); + + coll_t cid; + ghobject_t hoid; + + ASSERT_FALSE(bmap.is_used(cid, 0)); + ASSERT_FALSE(bmap.is_used(hoid, 0)); + bmap.set_used(0, 1, cid, hoid); + ASSERT_TRUE(bmap.is_used(cid, 0)); + ASSERT_TRUE(bmap.is_used(hoid, 0)); + + ASSERT_FALSE(bmap.is_used(cid, 0x1023)); + ASSERT_FALSE(bmap.is_used(hoid, 0x1023)); + ASSERT_FALSE(bmap.is_used(cid, 0x2023)); + ASSERT_FALSE(bmap.is_used(hoid, 0x2023)); + ASSERT_FALSE(bmap.is_used(cid, 0x3023)); + ASSERT_FALSE(bmap.is_used(hoid, 0x3023)); + bmap.set_used(0x1023, 0x3000, cid, hoid); + ASSERT_TRUE(bmap.is_used(cid, 0x1023)); + ASSERT_TRUE(bmap.is_used(hoid, 0x1023)); + ASSERT_TRUE(bmap.is_used(cid, 0x2023)); + ASSERT_TRUE(bmap.is_used(hoid, 0x2023)); + ASSERT_TRUE(bmap.is_used(cid, 0x3023)); + ASSERT_TRUE(bmap.is_used(hoid, 0x3023)); + + ASSERT_FALSE(bmap.is_used(cid, 0x9001)); + ASSERT_FALSE(bmap.is_used(hoid, 0x9001)); + ASSERT_FALSE(bmap.is_used(cid, 0xa001)); + ASSERT_FALSE(bmap.is_used(hoid, 0xa001)); + ASSERT_FALSE(bmap.is_used(cid, 0xb000)); + ASSERT_FALSE(bmap.is_used(hoid, 0xb000)); + ASSERT_FALSE(bmap.is_used(cid, 0xc000)); + ASSERT_FALSE(bmap.is_used(hoid, 0xc000)); + bmap.set_used(0x9001, 0x2fff, cid, hoid); + ASSERT_TRUE(bmap.is_used(cid, 0x9001)); + ASSERT_TRUE(bmap.is_used(hoid, 0x9001)); + ASSERT_TRUE(bmap.is_used(cid, 0xa001)); + ASSERT_TRUE(bmap.is_used(hoid, 0xa001)); + ASSERT_TRUE(bmap.is_used(cid, 0xb001)); + ASSERT_TRUE(bmap.is_used(hoid, 0xb001)); + ASSERT_FALSE(bmap.is_used(cid, 0xc000)); + ASSERT_FALSE(bmap.is_used(hoid, 0xc000)); + + bmap.set_used(0xa001, 0x2, cid, hoid); + ASSERT_TRUE(bmap.is_used(cid, 0x9001)); + ASSERT_TRUE(bmap.is_used(hoid, 0x9001)); + ASSERT_TRUE(bmap.is_used(cid, 0xa001)); + ASSERT_TRUE(bmap.is_used(hoid, 0xa001)); + ASSERT_TRUE(bmap.is_used(cid, 0xb001)); + ASSERT_TRUE(bmap.is_used(hoid, 0xb001)); + ASSERT_FALSE(bmap.is_used(cid, 0xc000)); + ASSERT_FALSE(bmap.is_used(hoid, 0xc000)); + + ASSERT_FALSE(bmap.is_used(cid, 0xc0000)); + ASSERT_FALSE(bmap.is_used(hoid, 0xc0000)); + ASSERT_FALSE(bmap.is_used(cid, 0xc1000)); + ASSERT_FALSE(bmap.is_used(hoid, 0xc1000)); + + bmap.set_used(0xc0000, 0x2000, cid, hoid); + ASSERT_TRUE(bmap.is_used(cid, 0xc0000)); + ASSERT_TRUE(bmap.is_used(hoid, 0xc0000)); + ASSERT_TRUE(bmap.is_used(cid, 0xc1000)); + ASSERT_TRUE(bmap.is_used(hoid, 0xc1000)); + + interval_set extents; + extents.insert(0,0x500); + extents.insert(0x800,0x100); + extents.insert(0x1000,0x1000); + extents.insert(0xa001,1); + extents.insert(0xa0000,0xff8); + + ASSERT_EQ(bmap.filter_out(extents), 3); + ASSERT_TRUE(bmap.is_used(cid)); + ASSERT_TRUE(bmap.is_used(hoid)); +} int main(int argc, char **argv) { vector args; -- 2.39.5