From: Haomai Wang Date: Mon, 18 Jan 2016 14:11:39 +0000 (+0800) Subject: BlueStore: add logger to record state transition latency X-Git-Tag: v10.0.4~81^2~4 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=557206408c527cfe71ac21ca6507cfcb37d43f6c;p=ceph.git BlueStore: add logger to record state transition latency Signed-off-by: Haomai Wang --- diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 739f9cd47e7..a80fcc56d0e 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -72,6 +72,7 @@ const string PREFIX_ALLOC = "B"; // u64 offset -> u64 length (freelist) // for bluefs, label (4k) + bluefs super (4k), means we start at 8k. #define BLUEFS_START 8192 + /* * object name key structure * @@ -789,12 +790,29 @@ BlueStore::~BlueStore() void BlueStore::_init_logger() { - // XXX + PerfCountersBuilder b(g_ceph_context, "BlueStore", + l_bluestore_first, l_bluestore_last); + b.add_time_avg(l_bluestore_state_prepare_lat, "state_prepare_lat", "Average prepare state latency"); + b.add_time_avg(l_bluestore_state_aio_wait_lat, "state_aio_wait_lat", "Average aio_wait state latency"); + b.add_time_avg(l_bluestore_state_io_done_lat, "state_io_done_lat", "Average io_done state latency"); + b.add_time_avg(l_bluestore_state_kv_queued_lat, "state_kv_queued_lat", "Average kv_queued state latency"); + b.add_time_avg(l_bluestore_state_kv_committing_lat, "state_kv_commiting_lat", "Average kv_commiting state latency"); + b.add_time_avg(l_bluestore_state_kv_done_lat, "state_kv_done_lat", "Average kv_done state latency"); + b.add_time_avg(l_bluestore_state_wal_queued_lat, "state_wal_queued_lat", "Average wal_queued state latency"); + b.add_time_avg(l_bluestore_state_wal_applying_lat, "state_wal_applying_lat", "Average wal_applying state latency"); + b.add_time_avg(l_bluestore_state_wal_aio_wait_lat, "state_wal_aio_wait_lat", "Average aio_wait state latency"); + b.add_time_avg(l_bluestore_state_wal_cleanup_lat, "state_wal_cleanup_lat", "Average cleanup state latency"); + b.add_time_avg(l_bluestore_state_wal_done_lat, "state_wal_done_lat", "Average wal_done state latency"); + b.add_time_avg(l_bluestore_state_finishing_lat, "state_finishing_lat", "Average finishing state latency"); + b.add_time_avg(l_bluestore_state_done_lat, "state_done_lat", "Average done state latency"); + logger = b.create_perf_counters(); + g_ceph_context->get_perfcounters_collection()->add(logger); } void BlueStore::_shutdown_logger() { - // XXX + g_ceph_context->get_perfcounters_collection()->remove(logger); + delete logger; } int BlueStore::get_block_device_fsid(const string& path, uuid_d *fsid) @@ -3460,6 +3478,7 @@ void BlueStore::_txc_state_proc(TransContext *txc) << " " << txc->get_state_name() << dendl; switch (txc->state) { case TransContext::STATE_PREPARE: + txc->log_state_latency(logger, l_bluestore_state_prepare_lat); if (txc->ioc.has_aios()) { txc->state = TransContext::STATE_AIO_WAIT; _txc_aio_submit(txc); @@ -3468,11 +3487,13 @@ void BlueStore::_txc_state_proc(TransContext *txc) // ** fall-thru ** case TransContext::STATE_AIO_WAIT: + txc->log_state_latency(logger, l_bluestore_state_aio_wait_lat); _txc_finish_io(txc); // may trigger blocked txc's too return; case TransContext::STATE_IO_DONE: //assert(txc->osr->qlock.is_locked()); // see _txc_finish_io + txc->log_state_latency(logger, l_bluestore_state_io_done_lat); txc->state = TransContext::STATE_KV_QUEUED; if (!g_conf->bluestore_sync_transaction) { std::lock_guard l(kv_lock); @@ -3487,11 +3508,13 @@ void BlueStore::_txc_state_proc(TransContext *txc) break; case TransContext::STATE_KV_QUEUED: + txc->log_state_latency(logger, l_bluestore_state_kv_queued_lat); txc->state = TransContext::STATE_KV_DONE; _txc_finish_kv(txc); // ** fall-thru ** case TransContext::STATE_KV_DONE: + txc->log_state_latency(logger, l_bluestore_state_kv_done_lat); if (txc->wal_txn) { txc->state = TransContext::STATE_WAL_QUEUED; if (g_conf->bluestore_sync_wal_apply) { @@ -3505,6 +3528,7 @@ void BlueStore::_txc_state_proc(TransContext *txc) break; case TransContext::STATE_WAL_APPLYING: + txc->log_state_latency(logger, l_bluestore_state_wal_applying_lat); if (txc->ioc.has_aios()) { txc->state = TransContext::STATE_WAL_AIO_WAIT; _txc_aio_submit(txc); @@ -3513,14 +3537,17 @@ void BlueStore::_txc_state_proc(TransContext *txc) // ** fall-thru ** case TransContext::STATE_WAL_AIO_WAIT: + txc->log_state_latency(logger, l_bluestore_state_wal_aio_wait_lat); _wal_finish(txc); return; case TransContext::STATE_WAL_CLEANUP: + txc->log_state_latency(logger, l_bluestore_state_wal_cleanup_lat); txc->state = TransContext::STATE_FINISHING; // ** fall-thru ** case TransContext::TransContext::STATE_FINISHING: + txc->log_state_latency(logger, l_bluestore_state_finishing_lat); _txc_finish(txc); return; @@ -3693,6 +3720,7 @@ void BlueStore::_osr_reap_done(OpSequencer *osr) } osr->q.pop_front(); + txc->log_state_latency(logger, l_bluestore_state_done_lat); delete txc; osr->qcond.notify_all(); if (osr->q.empty()) @@ -3889,6 +3917,7 @@ int BlueStore::_wal_apply(TransContext *txc) { bluestore_wal_transaction_t& wt = *txc->wal_txn; dout(20) << __func__ << " txc " << txc << " seq " << wt.seq << dendl; + txc->log_state_latency(logger, l_bluestore_state_wal_queued_lat); txc->state = TransContext::STATE_WAL_APPLYING; assert(txc->ioc.pending_aios.empty()); diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index 382a896d419..728201f51f1 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -33,6 +33,7 @@ #include "common/Finisher.h" #include "common/RWLock.h" #include "common/WorkQueue.h" +#include "common/perf_counters.h" #include "os/ObjectStore.h" #include "os/fs/FS.h" #include "kv/KeyValueDB.h" @@ -44,6 +45,24 @@ class Allocator; class FreelistManager; class BlueFS; +enum { + l_bluestore_first = 732430, + l_bluestore_state_prepare_lat, + l_bluestore_state_aio_wait_lat, + l_bluestore_state_io_done_lat, + l_bluestore_state_kv_queued_lat, + l_bluestore_state_kv_committing_lat, + l_bluestore_state_kv_done_lat, + l_bluestore_state_wal_queued_lat, + l_bluestore_state_wal_applying_lat, + l_bluestore_state_wal_aio_wait_lat, + l_bluestore_state_wal_cleanup_lat, + l_bluestore_state_wal_done_lat, + l_bluestore_state_finishing_lat, + l_bluestore_state_done_lat, + l_bluestore_last +}; + class BlueStore : public ObjectStore { // ----------------------------------------------------- // types @@ -272,6 +291,13 @@ public: return "???"; } + void log_state_latency(PerfCounters *logger, int state) { + utime_t lat, now = ceph_clock_now(g_ceph_context); + lat = now - start; + logger->tinc(state, lat); + start = now; + } + OpSequencerRef osr; boost::intrusive::list_member_hook<> sequencer_item; @@ -296,6 +322,8 @@ public: CollectionRef first_collection; ///< first referenced collection + utime_t start; + TransContext(OpSequencer *o) : state(STATE_PREPARE), osr(o), @@ -305,7 +333,8 @@ public: onreadable(NULL), onreadable_sync(NULL), wal_txn(NULL), - ioc(this) { + ioc(this), + start(ceph_clock_now(g_ceph_context)) { //cout << "txc new " << this << std::endl; } ~TransContext() { @@ -507,7 +536,7 @@ private: deque kv_queue, kv_committing; deque wal_cleanup_queue, wal_cleaning; - Logger *logger; + PerfCounters *logger; std::mutex reap_lock; list removed_collections; diff --git a/src/os/bluestore/NVMEDevice.cc b/src/os/bluestore/NVMEDevice.cc index 2a9994c4b72..3de8f3b595c 100644 --- a/src/os/bluestore/NVMEDevice.cc +++ b/src/os/bluestore/NVMEDevice.cc @@ -643,26 +643,17 @@ int NVMEDevice::aio_write( t->device = this; t->return_code = 0; - if (0 && buffered) { - Mutex::Locker l(queue_lock); - task_queue.push(t); - if (queue_empty.read()) { - queue_empty.dec(); - queue_cond.Signal(); - } - ioc->num_running.inc(); - t->next = nullptr; - } else { - Task *first = static_cast(ioc->nvme_task_first); - Task *last = static_cast(ioc->nvme_task_last); - if (last) - last->next = t; - t->next = nullptr; - if (!first) - ioc->nvme_task_first = t; - ioc->nvme_task_last = t; - ioc->num_pending.inc(); + if (buffered) { } + Task *first = static_cast(ioc->nvme_task_first); + Task *last = static_cast(ioc->nvme_task_last); + if (last) + last->next = t; + t->next = nullptr; + if (!first) + ioc->nvme_task_first = t; + ioc->nvme_task_last = t; + ioc->num_pending.inc(); dout(5) << __func__ << " " << off << "~" << len << dendl;