From 3e56a4d05618deb0443be49b7e97de0328c8ed57 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 10 Oct 2017 14:49:29 -0500 Subject: [PATCH] os/bluestore: put pglog omap in a separate CF Signed-off-by: Sage Weil --- src/os/bluestore/BlueStore.cc | 194 +++++++++++++++++++---------- src/os/bluestore/BlueStore.h | 2 +- src/os/bluestore/bluestore_types.h | 6 +- 3 files changed, 135 insertions(+), 67 deletions(-) diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index d2b852f41c7d4..d5df329a97a6e 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -63,6 +63,7 @@ const string PREFIX_STAT = "T"; // field -> value(int64 array) const string PREFIX_COLL = "C"; // collection name -> cnode_t const string PREFIX_OBJ = "O"; // object name -> onode_t const string PREFIX_OMAP = "M"; // u64 + keyname -> value +const string PREFIX_PGMETA_OMAP = "P"; // u64 + keyname -> value (for meta coll) const string PREFIX_DEFERRED = "L"; // id -> deferred_transaction_t const string PREFIX_ALLOC = "B"; // u64 offset -> u64 length (freelist) const string PREFIX_ALLOC_BITMAP = "b"; // (see BitmapFreelistManager) @@ -70,6 +71,7 @@ const string PREFIX_SHARED_BLOB = "X"; // u64 offset -> shared_blob_t const std::vector cfs = { KeyValueDB::ColumnFamily(PREFIX_OMAP, ""), + KeyValueDB::ColumnFamily(PREFIX_PGMETA_OMAP, ""), KeyValueDB::ColumnFamily(PREFIX_DEFERRED, ""), }; @@ -5548,6 +5550,7 @@ int BlueStore::_fsck(bool deep, bool repair) mempool::bluestore_fsck::pool_allocator> uint64_t_btree_t; uint64_t_btree_t used_nids; uint64_t_btree_t used_omap_head; + uint64_t_btree_t used_pgmeta_omap_head; uint64_t_btree_t used_sbids; mempool_dynamic_bitset used_blocks; @@ -5932,12 +5935,14 @@ int BlueStore::_fsck(bool deep, bool repair) } // omap if (o->onode.has_omap()) { - if (used_omap_head.count(o->onode.nid)) { + auto& m = + o->onode.is_pgmeta_omap() ? used_pgmeta_omap_head : used_omap_head; + if (m.count(o->onode.nid)) { derr << "fsck error: " << oid << " omap_head " << o->onode.nid << " already in use" << dendl; ++errors; } else { - used_omap_head.insert(o->onode.nid); + m.insert(o->onode.nid); } } } @@ -6009,6 +6014,18 @@ int BlueStore::_fsck(bool deep, bool repair) } } } + it = db->get_iterator(PREFIX_PGMETA_OMAP); + if (it) { + for (it->lower_bound(string()); it->valid(); it->next()) { + uint64_t omap_head; + _key_decode_u64(it->key().c_str(), &omap_head); + if (used_pgmeta_omap_head.count(omap_head) == 0) { + derr << "fsck error: found stray omap data on omap_head " + << omap_head << dendl; + ++errors; + } + } + } dout(1) << __func__ << " checking deferred events" << dendl; it = db->get_iterator(PREFIX_DEFERRED); @@ -7252,7 +7269,9 @@ int BlueStore::omap_get( goto out; o->flush(); { - KeyValueDB::Iterator it = db->get_iterator(PREFIX_OMAP); + const string& prefix = + o->onode.is_pgmeta_omap() ? PREFIX_PGMETA_OMAP : PREFIX_OMAP; + KeyValueDB::Iterator it = db->get_iterator(prefix); string head, tail; get_omap_header(o->onode.nid, &head); get_omap_tail(o->onode.nid, &tail); @@ -7317,7 +7336,8 @@ int BlueStore::omap_get_header( { string head; get_omap_header(o->onode.nid, &head); - if (db->get(PREFIX_OMAP, head, header) >= 0) { + if (db->get(o->onode.is_pgmeta_omap() ? PREFIX_PGMETA_OMAP : PREFIX_OMAP, + head, header) >= 0) { dout(30) << __func__ << " got header" << dendl; } else { dout(30) << __func__ << " no header" << dendl; @@ -7362,7 +7382,9 @@ int BlueStore::omap_get_keys( goto out; o->flush(); { - KeyValueDB::Iterator it = db->get_iterator(PREFIX_OMAP); + const string& prefix = + o->onode.is_pgmeta_omap() ? PREFIX_PGMETA_OMAP : PREFIX_OMAP; + KeyValueDB::Iterator it = db->get_iterator(prefix); string head, tail; get_omap_key(o->onode.nid, string(), &head); get_omap_tail(o->onode.nid, &tail); @@ -7420,17 +7442,21 @@ int BlueStore::omap_get_values( } if (!o->onode.has_omap()) goto out; - o->flush(); - _key_encode_u64(o->onode.nid, &final_key); - final_key.push_back('.'); - for (set::const_iterator p = keys.begin(); p != keys.end(); ++p) { - final_key.resize(9); // keep prefix - final_key += *p; - bufferlist val; - if (db->get(PREFIX_OMAP, final_key, &val) >= 0) { - dout(30) << __func__ << " got " << pretty_binary_string(final_key) - << " -> " << *p << dendl; - out->insert(make_pair(*p, val)); + { + const string& prefix = + o->onode.is_pgmeta_omap() ? PREFIX_PGMETA_OMAP : PREFIX_OMAP; + o->flush(); + _key_encode_u64(o->onode.nid, &final_key); + final_key.push_back('.'); + for (set::const_iterator p = keys.begin(); p != keys.end(); ++p) { + final_key.resize(9); // keep prefix + final_key += *p; + bufferlist val; + if (db->get(prefix, final_key, &val) >= 0) { + dout(30) << __func__ << " got " << pretty_binary_string(final_key) + << " -> " << *p << dendl; + out->insert(make_pair(*p, val)); + } } } out: @@ -7473,20 +7499,24 @@ int BlueStore::omap_check_keys( } if (!o->onode.has_omap()) goto out; - o->flush(); - _key_encode_u64(o->onode.nid, &final_key); - final_key.push_back('.'); - for (set::const_iterator p = keys.begin(); p != keys.end(); ++p) { - final_key.resize(9); // keep prefix - final_key += *p; - bufferlist val; - if (db->get(PREFIX_OMAP, final_key, &val) >= 0) { - dout(30) << __func__ << " have " << pretty_binary_string(final_key) - << " -> " << *p << dendl; - out->insert(*p); - } else { - dout(30) << __func__ << " miss " << pretty_binary_string(final_key) - << " -> " << *p << dendl; + { + const string& prefix = + o->onode.is_pgmeta_omap() ? PREFIX_PGMETA_OMAP : PREFIX_OMAP; + o->flush(); + _key_encode_u64(o->onode.nid, &final_key); + final_key.push_back('.'); + for (set::const_iterator p = keys.begin(); p != keys.end(); ++p) { + final_key.resize(9); // keep prefix + final_key += *p; + bufferlist val; + if (db->get(prefix, final_key, &val) >= 0) { + dout(30) << __func__ << " have " << pretty_binary_string(final_key) + << " -> " << *p << dendl; + out->insert(*p); + } else { + dout(30) << __func__ << " miss " << pretty_binary_string(final_key) + << " -> " << *p << dendl; + } } } out: @@ -7526,7 +7556,8 @@ ObjectMap::ObjectMapIterator BlueStore::get_omap_iterator( } o->flush(); dout(10) << __func__ << " has_omap = " << (int)o->onode.has_omap() <get_iterator(PREFIX_OMAP); + KeyValueDB::Iterator it = db->get_iterator( + o->onode.is_pgmeta_omap() ? PREFIX_PGMETA_OMAP : PREFIX_OMAP); return ObjectMap::ObjectMapIterator(new OmapIteratorImpl(c, o, it)); } @@ -10641,7 +10672,9 @@ int BlueStore::_do_remove( _do_truncate(txc, c, o, 0, is_gen ? &maybe_unshared_blobs : nullptr); if (o->onode.has_omap()) { o->flush(); - _do_omap_clear(txc, o->onode.nid); + _do_omap_clear(txc, + o->onode.is_pgmeta_omap() ? PREFIX_PGMETA_OMAP : PREFIX_OMAP, + o->onode.nid); } o->exists = false; string key; @@ -10830,9 +10863,10 @@ int BlueStore::_rmattrs(TransContext *txc, return r; } -void BlueStore::_do_omap_clear(TransContext *txc, uint64_t id) +void BlueStore::_do_omap_clear(TransContext *txc, const string& omap_prefix, + uint64_t id) { - KeyValueDB::Iterator it = db->get_iterator(PREFIX_OMAP); + KeyValueDB::Iterator it = db->get_iterator(omap_prefix); string prefix, tail; get_omap_header(id, &prefix); get_omap_tail(id, &tail); @@ -10843,7 +10877,7 @@ void BlueStore::_do_omap_clear(TransContext *txc, uint64_t id) << dendl; break; } - txc->t->rmkey(PREFIX_OMAP, it->key()); + txc->t->rmkey(omap_prefix, it->key()); dout(30) << __func__ << " rm " << pretty_binary_string(it->key()) << dendl; it->next(); } @@ -10857,7 +10891,9 @@ int BlueStore::_omap_clear(TransContext *txc, int r = 0; if (o->onode.has_omap()) { o->flush(); - _do_omap_clear(txc, o->onode.nid); + _do_omap_clear(txc, + o->onode.is_pgmeta_omap() ? PREFIX_PGMETA_OMAP : PREFIX_OMAP, + o->onode.nid); o->onode.clear_omap_flag(); txc->write_onode(o); } @@ -10876,10 +10912,15 @@ int BlueStore::_omap_setkeys(TransContext *txc, __u32 num; if (!o->onode.has_omap()) { o->onode.set_omap_flag(); + if (o->oid.is_pgmeta()) { + o->onode.flags |= bluestore_onode_t::FLAG_PGMETA_OMAP; + } txc->write_onode(o); } else { txc->note_modified_object(o); } + const string& prefix = + o->onode.is_pgmeta_omap() ? PREFIX_PGMETA_OMAP : PREFIX_OMAP; string final_key; _key_encode_u64(o->onode.nid, &final_key); final_key.push_back('.'); @@ -10893,7 +10934,7 @@ int BlueStore::_omap_setkeys(TransContext *txc, final_key += key; dout(30) << __func__ << " " << pretty_binary_string(final_key) << " <- " << key << dendl; - txc->t->set(PREFIX_OMAP, final_key, value); + txc->t->set(prefix, final_key, value); } r = 0; dout(10) << __func__ << " " << c->cid << " " << o->oid << " = " << r << dendl; @@ -10910,12 +10951,17 @@ int BlueStore::_omap_setheader(TransContext *txc, string key; if (!o->onode.has_omap()) { o->onode.set_omap_flag(); + if (o->oid.is_pgmeta()) { + o->onode.flags |= bluestore_onode_t::FLAG_PGMETA_OMAP; + } txc->write_onode(o); } else { txc->note_modified_object(o); } + const string& prefix = + o->onode.is_pgmeta_omap() ? PREFIX_PGMETA_OMAP : PREFIX_OMAP; get_omap_header(o->onode.nid, &key); - txc->t->set(PREFIX_OMAP, key, bl); + txc->t->set(prefix, key, bl); r = 0; dout(10) << __func__ << " " << c->cid << " " << o->oid << " = " << r << dendl; return r; @@ -10935,17 +10981,21 @@ int BlueStore::_omap_rmkeys(TransContext *txc, if (!o->onode.has_omap()) { goto out; } - _key_encode_u64(o->onode.nid, &final_key); - final_key.push_back('.'); - ::decode(num, p); - while (num--) { - string key; - ::decode(key, p); - final_key.resize(9); // keep prefix - final_key += key; - dout(30) << __func__ << " rm " << pretty_binary_string(final_key) - << " <- " << key << dendl; - txc->t->rmkey(PREFIX_OMAP, final_key); + { + const string& prefix = + o->onode.is_pgmeta_omap() ? PREFIX_PGMETA_OMAP : PREFIX_OMAP; + _key_encode_u64(o->onode.nid, &final_key); + final_key.push_back('.'); + ::decode(num, p); + while (num--) { + string key; + ::decode(key, p); + final_key.resize(9); // keep prefix + final_key += key; + dout(30) << __func__ << " rm " << pretty_binary_string(final_key) + << " <- " << key << dendl; + txc->t->rmkey(prefix, final_key); + } } txc->note_modified_object(o); @@ -10966,20 +11016,25 @@ int BlueStore::_omap_rmkey_range(TransContext *txc, if (!o->onode.has_omap()) { goto out; } - o->flush(); - it = db->get_iterator(PREFIX_OMAP); - get_omap_key(o->onode.nid, first, &key_first); - get_omap_key(o->onode.nid, last, &key_last); - it->lower_bound(key_first); - while (it->valid()) { - if (it->key() >= key_last) { - dout(30) << __func__ << " stop at " << pretty_binary_string(key_last) + { + const string& prefix = + o->onode.is_pgmeta_omap() ? PREFIX_PGMETA_OMAP : PREFIX_OMAP; + o->flush(); + it = db->get_iterator(prefix); + get_omap_key(o->onode.nid, first, &key_first); + get_omap_key(o->onode.nid, last, &key_last); + it->lower_bound(key_first); + while (it->valid()) { + if (it->key() >= key_last) { + dout(30) << __func__ << " stop at " << pretty_binary_string(key_last) + << dendl; + break; + } + txc->t->rmkey(prefix, it->key()); + dout(30) << __func__ << " rm " << pretty_binary_string(it->key()) << dendl; - break; + it->next(); } - txc->t->rmkey(PREFIX_OMAP, it->key()); - dout(30) << __func__ << " rm " << pretty_binary_string(it->key()) << dendl; - it->next(); } txc->note_modified_object(o); @@ -11052,14 +11107,22 @@ int BlueStore::_clone(TransContext *txc, if (newo->onode.has_omap()) { dout(20) << __func__ << " clearing old omap data" << dendl; newo->flush(); - _do_omap_clear(txc, newo->onode.nid); + _do_omap_clear(txc, + newo->onode.is_pgmeta_omap() ? PREFIX_PGMETA_OMAP + : PREFIX_OMAP, + newo->onode.nid); } if (oldo->onode.has_omap()) { dout(20) << __func__ << " copying omap data" << dendl; if (!newo->onode.has_omap()) { newo->onode.set_omap_flag(); + if (newo->oid.is_pgmeta()) { + newo->onode.flags |= bluestore_onode_t::FLAG_PGMETA_OMAP; + } } - KeyValueDB::Iterator it = db->get_iterator(PREFIX_OMAP); + const string& prefix = + newo->onode.is_pgmeta_omap() ? PREFIX_PGMETA_OMAP : PREFIX_OMAP; + KeyValueDB::Iterator it = db->get_iterator(prefix); string head, tail; get_omap_header(oldo->onode.nid, &head); get_omap_tail(oldo->onode.nid, &tail); @@ -11073,7 +11136,7 @@ int BlueStore::_clone(TransContext *txc, << pretty_binary_string(it->key()) << dendl; string key; rewrite_omap_key(newo->onode.nid, it->key(), &key); - txc->t->set(PREFIX_OMAP, key, it->value()); + txc->t->set(prefix, key, it->value()); } it->next(); } @@ -11584,7 +11647,8 @@ void BlueStore::generate_db_histogram(Formatter *f) hist.update_hist_entry(hist.key_hist, prefix_onode_shard, key_size, value_size); num_shards++; } - } else if (key.first == PREFIX_OMAP) { + } else if (key.first == PREFIX_OMAP || + key.first == PREFIX_PGMETA_OMAP) { hist.update_hist_entry(hist.key_hist, PREFIX_OMAP, key_size, value_size); num_omap++; } else if (key.first == PREFIX_DEFERRED) { diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index 939db90272224..0328949fd9805 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -2635,7 +2635,7 @@ private: int _rmattrs(TransContext *txc, CollectionRef& c, OnodeRef& o); - void _do_omap_clear(TransContext *txc, uint64_t id); + void _do_omap_clear(TransContext *txc, const string& prefix, uint64_t id); int _omap_clear(TransContext *txc, CollectionRef& c, OnodeRef& o); diff --git a/src/os/bluestore/bluestore_types.h b/src/os/bluestore/bluestore_types.h index 267664d44a7a1..48c428e99f58d 100644 --- a/src/os/bluestore/bluestore_types.h +++ b/src/os/bluestore/bluestore_types.h @@ -952,7 +952,8 @@ struct bluestore_onode_t { uint8_t flags = 0; enum { - FLAG_OMAP = 1, + FLAG_OMAP = 1, ///< object may have omap data + FLAG_PGMETA_OMAP = 2, ///< omap data is in meta omap prefix }; string get_flags_string() const { @@ -978,6 +979,9 @@ struct bluestore_onode_t { bool has_omap() const { return has_flag(FLAG_OMAP); } + bool is_pgmeta_omap() const { + return has_flag(FLAG_PGMETA_OMAP); + } void set_omap_flag() { set_flag(FLAG_OMAP); -- 2.39.5