From: Igor Fedotov Date: Wed, 6 Dec 2017 15:09:06 +0000 (+0300) Subject: os/bluestore: add per-pool statistics collection X-Git-Tag: v14.1.0~692^2~21 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=9d433ee648083c01817735c5590f635524242912;p=ceph.git os/bluestore: add per-pool statistics collection Signed-off-by: Igor Fedotov --- diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 47d9ff94c67b..960b668c5265 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -62,15 +62,15 @@ MEMPOOL_DEFINE_OBJECT_FACTORY(BlueStore::TransContext, bluestore_transcontext, // kv store prefixes -const string PREFIX_SUPER = "S"; // field -> value -const string PREFIX_STAT = "T"; // field -> value(int64 array) -const string PREFIX_COLL = "C"; // collection name -> cnode_t -const string PREFIX_OBJ = "O"; // object name -> onode_t -const string PREFIX_OMAP = "M"; // u64 + keyname -> value -const string PREFIX_PGMETA_OMAP = "P"; // u64 + keyname -> value (for meta coll) -const string PREFIX_DEFERRED = "L"; // id -> deferred_transaction_t -const string PREFIX_ALLOC = "B"; // u64 offset -> u64 length (freelist) -const string PREFIX_ALLOC_BITMAP = "b"; // (see BitmapFreelistManager) +const string PREFIX_SUPER = "S"; // field -> value +const string PREFIX_STAT = "T"; // field -> value(int64 array) +const string PREFIX_COLL = "C"; // collection name -> cnode_t +const string PREFIX_OBJ = "O"; // object name -> onode_t +const string PREFIX_OMAP = "M"; // u64 + keyname -> value +const string PREFIX_PGMETA_OMAP = "P"; // u64 + keyname -> value(for meta coll) +const string PREFIX_DEFERRED = "L"; // id -> deferred_transaction_t +const string PREFIX_ALLOC = "B"; // u64 offset -> u64 length (freelist) +const string PREFIX_ALLOC_BITMAP = "b";// (see BitmapFreelistManager) const string PREFIX_SHARED_BLOB = "X"; // u64 offset -> shared_blob_t // write a label in the first block. always use this size. note that @@ -537,6 +537,20 @@ static void get_deferred_key(uint64_t seq, string *out) _key_encode_u64(seq, out); } +static void get_pool_stat_key(int64_t pool_id, string *key) +{ + key->clear(); + _key_encode_u64(pool_id, key); +} + +static int get_key_pool_stat(const string& key, uint64_t* pool_id) +{ + const char *p = key.c_str(); + if (key.length() < sizeof(uint64_t)) + return -1; + _key_decode_u64(p, pool_id); + return 0; +} // merge operators @@ -5523,6 +5537,7 @@ int BlueStore::_open_collections(int *errors) << " " << c->cnode << dendl; _osr_attach(c.get()); coll_map[cid] = c; + } else { derr << __func__ << " unrecognized collection " << it->key() << dendl; if (errors) @@ -5534,19 +5549,49 @@ int BlueStore::_open_collections(int *errors) void BlueStore::_open_statfs() { + // for sure + per_pool_stat_collection = true; + osd_pools.clear(); + vstatfs.reset(); + bufferlist bl; int r = db->get(PREFIX_STAT, "bluestore_statfs", &bl); if (r >= 0) { if (size_t(bl.length()) >= sizeof(vstatfs.values)) { auto it = bl.cbegin(); vstatfs.decode(it); + per_pool_stat_collection = false; } else { dout(10) << __func__ << " store_statfs is corrupt, using empty" << dendl; } + } else { + KeyValueDB::Iterator it = db->get_iterator(PREFIX_STAT); + for (it->upper_bound(string()); + it->valid(); + it->next()) { + + uint64_t pool_id; + int r = get_key_pool_stat(it->key(), &pool_id); + ceph_assert(r == 0); + + bufferlist bl; + bl = it->value(); + auto p = bl.cbegin(); + auto& st = osd_pools[pool_id]; + try { + st.decode(p); + vstatfs += st; + + dout(30) << __func__ << " pool " << pool_id + << " statfs " << st << dendl; + } catch (buffer::error& e) { + derr << __func__ << " failed to decode pool stats, key:" + << pretty_binary_string(it->key()) << dendl; + } + } } - else { - dout(10) << __func__ << " store_statfs missed, using empty" << dendl; - } + dout(30) << __func__ << " statfs " << vstatfs << dendl; + } int BlueStore::_setup_block_symlink_or_file( @@ -9183,15 +9228,25 @@ void BlueStore::_txc_update_store_statfs(TransContext *txc) logger->inc(l_bluestore_compressed_allocated, txc->statfs_delta.compressed_allocated()); logger->inc(l_bluestore_compressed_original, txc->statfs_delta.compressed_original()); - { - std::lock_guard l(vstatfs_lock); - vstatfs += txc->statfs_delta; - } - bufferlist bl; txc->statfs_delta.encode(bl); + if (per_pool_stat_collection) { + string key; + get_pool_stat_key(txc->osd_pool_id, &key); + txc->t->merge(PREFIX_STAT, key, bl); + + std::lock_guard l(vstatfs_lock); + auto& stats = osd_pools[txc->osd_pool_id]; + stats += txc->statfs_delta; + + vstatfs += txc->statfs_delta; //non-persistent in this mode + + } else { + txc->t->merge(PREFIX_STAT, "bluestore_statfs", bl); - txc->t->merge(PREFIX_STAT, "bluestore_statfs", bl); + std::lock_guard l(vstatfs_lock); + vstatfs += txc->statfs_delta; + } txc->statfs_delta.reset(); } @@ -10417,7 +10472,9 @@ void BlueStore::_txc_add_transaction(TransContext *txc, Transaction *t) for (vector::iterator p = i.colls.begin(); p != i.colls.end(); ++p, ++j) { cvec[j] = _get_collection(*p); + } + vector ovec(i.objects.size()); for (int pos = 0; i.have_op(); ++pos) { @@ -10428,8 +10485,17 @@ void BlueStore::_txc_add_transaction(TransContext *txc, Transaction *t) if (op->op == Transaction::OP_NOP) continue; + // collection operations CollectionRef &c = cvec[op->cid]; + // initialize osd_pool_id and do a smoke test that all collections belong + // to the same pool + spg_t pgid; + if (!!c ? c->cid.is_pg(&pgid) : false) { + ceph_assert(txc->osd_pool_id == -1 || + txc->osd_pool_id == (int64_t)pgid.pool()); + txc->osd_pool_id = (int64_t)pgid.pool(); + } switch (op->op) { case Transaction::OP_RMCOLL: { diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index ee4b2327b577..511da1390a33 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -1595,8 +1595,9 @@ public: bluestore_deferred_transaction_t *deferred_txn = nullptr; ///< if any interval_set allocated, released; - volatile_statfs statfs_delta; - + volatile_statfs statfs_delta; ///< overall store statistics delta + int64_t osd_pool_id = -1; ///< osd pool id we're operating on + IOContext ioc; bool had_ios = false; ///< true if we submitted IOs before our kv txn @@ -1971,8 +1972,14 @@ private: double osd_memory_expected_fragmentation = 0; ///< expected memory fragmentation uint64_t osd_memory_cache_min = 0; ///< Min memory to assign when autotuning cache double osd_memory_cache_resize_interval = 0; ///< Time to wait between cache resizing + + typedef map osd_pools_map; + ceph::mutex vstatfs_lock = ceph::make_mutex("BlueStore::vstatfs_lock"); volatile_statfs vstatfs; + osd_pools_map osd_pools; // protected by vstatfs_lock as well + + bool per_pool_stat_collection = true; struct MempoolThread : public Thread { public: @@ -2862,6 +2869,20 @@ private: unsigned bits); }; +inline ostream& operator<<(ostream& out, const BlueStore::volatile_statfs& s) { + return out + << " allocated:" + << s.values[BlueStore::volatile_statfs::STATFS_ALLOCATED] + << " stored:" + << s.values[BlueStore::volatile_statfs::STATFS_STORED] + << " compressed:" + << s.values[BlueStore::volatile_statfs::STATFS_COMPRESSED] + << " compressed_orig:" + << s.values[BlueStore::volatile_statfs::STATFS_COMPRESSED_ORIGINAL] + << " compressed_alloc:" + << s.values[BlueStore::volatile_statfs::STATFS_COMPRESSED_ALLOCATED]; +} + static inline void intrusive_ptr_add_ref(BlueStore::Onode *o) { o->get(); }