]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
BlueStore: add logger to record state transition latency
authorHaomai Wang <haomai@xsky.com>
Mon, 18 Jan 2016 14:11:39 +0000 (22:11 +0800)
committerHaomai Wang <haomai@xsky.com>
Mon, 1 Feb 2016 14:02:18 +0000 (22:02 +0800)
Signed-off-by: Haomai Wang <haomai@xsky.com>
src/os/bluestore/BlueStore.cc
src/os/bluestore/BlueStore.h
src/os/bluestore/NVMEDevice.cc

index 739f9cd47e73ef0b1dd0c1af527901bc2c25d89f..a80fcc56d0ee71ce11cba0466e59bec21e1f86aa 100644 (file)
@@ -72,6 +72,7 @@ const string PREFIX_ALLOC = "B";   // u64 offset -> u64 length (freelist)
 // for bluefs, label (4k) + bluefs super (4k), means we start at 8k.
 #define BLUEFS_START  8192
 
+
 /*
  * object name key structure
  *
@@ -789,12 +790,29 @@ BlueStore::~BlueStore()
 
 void BlueStore::_init_logger()
 {
-  // XXX
+  PerfCountersBuilder b(g_ceph_context, "BlueStore",
+                        l_bluestore_first, l_bluestore_last);
+  b.add_time_avg(l_bluestore_state_prepare_lat, "state_prepare_lat", "Average prepare state latency");
+  b.add_time_avg(l_bluestore_state_aio_wait_lat, "state_aio_wait_lat", "Average aio_wait state latency");
+  b.add_time_avg(l_bluestore_state_io_done_lat, "state_io_done_lat", "Average io_done state latency");
+  b.add_time_avg(l_bluestore_state_kv_queued_lat, "state_kv_queued_lat", "Average kv_queued state latency");
+  b.add_time_avg(l_bluestore_state_kv_committing_lat, "state_kv_commiting_lat", "Average kv_commiting state latency");
+  b.add_time_avg(l_bluestore_state_kv_done_lat, "state_kv_done_lat", "Average kv_done state latency");
+  b.add_time_avg(l_bluestore_state_wal_queued_lat, "state_wal_queued_lat", "Average wal_queued state latency");
+  b.add_time_avg(l_bluestore_state_wal_applying_lat, "state_wal_applying_lat", "Average wal_applying state latency");
+  b.add_time_avg(l_bluestore_state_wal_aio_wait_lat, "state_wal_aio_wait_lat", "Average aio_wait state latency");
+  b.add_time_avg(l_bluestore_state_wal_cleanup_lat, "state_wal_cleanup_lat", "Average cleanup state latency");
+  b.add_time_avg(l_bluestore_state_wal_done_lat, "state_wal_done_lat", "Average wal_done state latency");
+  b.add_time_avg(l_bluestore_state_finishing_lat, "state_finishing_lat", "Average finishing state latency");
+  b.add_time_avg(l_bluestore_state_done_lat, "state_done_lat", "Average done state latency");
+  logger = b.create_perf_counters();
+  g_ceph_context->get_perfcounters_collection()->add(logger);
 }
 
 void BlueStore::_shutdown_logger()
 {
-  // XXX
+  g_ceph_context->get_perfcounters_collection()->remove(logger);
+  delete logger;
 }
 
 int BlueStore::get_block_device_fsid(const string& path, uuid_d *fsid)
@@ -3460,6 +3478,7 @@ void BlueStore::_txc_state_proc(TransContext *txc)
             << " " << txc->get_state_name() << dendl;
     switch (txc->state) {
     case TransContext::STATE_PREPARE:
+      txc->log_state_latency(logger, l_bluestore_state_prepare_lat);
       if (txc->ioc.has_aios()) {
        txc->state = TransContext::STATE_AIO_WAIT;
        _txc_aio_submit(txc);
@@ -3468,11 +3487,13 @@ void BlueStore::_txc_state_proc(TransContext *txc)
       // ** fall-thru **
 
     case TransContext::STATE_AIO_WAIT:
+      txc->log_state_latency(logger, l_bluestore_state_aio_wait_lat);
       _txc_finish_io(txc);  // may trigger blocked txc's too
       return;
 
     case TransContext::STATE_IO_DONE:
       //assert(txc->osr->qlock.is_locked());  // see _txc_finish_io
+      txc->log_state_latency(logger, l_bluestore_state_io_done_lat);
       txc->state = TransContext::STATE_KV_QUEUED;
       if (!g_conf->bluestore_sync_transaction) {
        std::lock_guard<std::mutex> l(kv_lock);
@@ -3487,11 +3508,13 @@ void BlueStore::_txc_state_proc(TransContext *txc)
       break;
 
     case TransContext::STATE_KV_QUEUED:
+      txc->log_state_latency(logger, l_bluestore_state_kv_queued_lat);
       txc->state = TransContext::STATE_KV_DONE;
       _txc_finish_kv(txc);
       // ** fall-thru **
 
     case TransContext::STATE_KV_DONE:
+      txc->log_state_latency(logger, l_bluestore_state_kv_done_lat);
       if (txc->wal_txn) {
        txc->state = TransContext::STATE_WAL_QUEUED;
        if (g_conf->bluestore_sync_wal_apply) {
@@ -3505,6 +3528,7 @@ void BlueStore::_txc_state_proc(TransContext *txc)
       break;
 
     case TransContext::STATE_WAL_APPLYING:
+      txc->log_state_latency(logger, l_bluestore_state_wal_applying_lat);
       if (txc->ioc.has_aios()) {
        txc->state = TransContext::STATE_WAL_AIO_WAIT;
        _txc_aio_submit(txc);
@@ -3513,14 +3537,17 @@ void BlueStore::_txc_state_proc(TransContext *txc)
       // ** fall-thru **
 
     case TransContext::STATE_WAL_AIO_WAIT:
+      txc->log_state_latency(logger, l_bluestore_state_wal_aio_wait_lat);
       _wal_finish(txc);
       return;
 
     case TransContext::STATE_WAL_CLEANUP:
+      txc->log_state_latency(logger, l_bluestore_state_wal_cleanup_lat);
       txc->state = TransContext::STATE_FINISHING;
       // ** fall-thru **
 
     case TransContext::TransContext::STATE_FINISHING:
+      txc->log_state_latency(logger, l_bluestore_state_finishing_lat);
       _txc_finish(txc);
       return;
 
@@ -3693,6 +3720,7 @@ void BlueStore::_osr_reap_done(OpSequencer *osr)
     }
 
     osr->q.pop_front();
+    txc->log_state_latency(logger, l_bluestore_state_done_lat);
     delete txc;
     osr->qcond.notify_all();
     if (osr->q.empty())
@@ -3889,6 +3917,7 @@ int BlueStore::_wal_apply(TransContext *txc)
 {
   bluestore_wal_transaction_t& wt = *txc->wal_txn;
   dout(20) << __func__ << " txc " << txc << " seq " << wt.seq << dendl;
+  txc->log_state_latency(logger, l_bluestore_state_wal_queued_lat);
   txc->state = TransContext::STATE_WAL_APPLYING;
 
   assert(txc->ioc.pending_aios.empty());
index 382a896d419bbaf0b675632c7e4eebbb1cbd8786..728201f51f1dad6baac189fd9b8d5205ac1c81ef 100644 (file)
@@ -33,6 +33,7 @@
 #include "common/Finisher.h"
 #include "common/RWLock.h"
 #include "common/WorkQueue.h"
+#include "common/perf_counters.h"
 #include "os/ObjectStore.h"
 #include "os/fs/FS.h"
 #include "kv/KeyValueDB.h"
@@ -44,6 +45,24 @@ class Allocator;
 class FreelistManager;
 class BlueFS;
 
+enum {
+  l_bluestore_first = 732430,
+  l_bluestore_state_prepare_lat,
+  l_bluestore_state_aio_wait_lat,
+  l_bluestore_state_io_done_lat,
+  l_bluestore_state_kv_queued_lat,
+  l_bluestore_state_kv_committing_lat,
+  l_bluestore_state_kv_done_lat,
+  l_bluestore_state_wal_queued_lat,
+  l_bluestore_state_wal_applying_lat,
+  l_bluestore_state_wal_aio_wait_lat,
+  l_bluestore_state_wal_cleanup_lat,
+  l_bluestore_state_wal_done_lat,
+  l_bluestore_state_finishing_lat,
+  l_bluestore_state_done_lat,
+  l_bluestore_last
+};
+
 class BlueStore : public ObjectStore {
   // -----------------------------------------------------
   // types
@@ -272,6 +291,13 @@ public:
       return "???";
     }
 
+    void log_state_latency(PerfCounters *logger, int state) {
+      utime_t lat, now = ceph_clock_now(g_ceph_context);
+      lat = now - start;
+      logger->tinc(state, lat);
+      start = now;
+    }
+
     OpSequencerRef osr;
     boost::intrusive::list_member_hook<> sequencer_item;
 
@@ -296,6 +322,8 @@ public:
 
     CollectionRef first_collection;  ///< first referenced collection
 
+    utime_t start;
+
     TransContext(OpSequencer *o)
       : state(STATE_PREPARE),
        osr(o),
@@ -305,7 +333,8 @@ public:
        onreadable(NULL),
        onreadable_sync(NULL),
        wal_txn(NULL),
-       ioc(this) {
+       ioc(this),
+       start(ceph_clock_now(g_ceph_context)) {
       //cout << "txc new " << this << std::endl;
     }
     ~TransContext() {
@@ -507,7 +536,7 @@ private:
   deque<TransContext*> kv_queue, kv_committing;
   deque<TransContext*> wal_cleanup_queue, wal_cleaning;
 
-  Logger *logger;
+  PerfCounters *logger;
 
   std::mutex reap_lock;
   list<CollectionRef> removed_collections;
index 2a9994c4b72cac9ca52c3b8e12d700e89cce572f..3de8f3b595cf2b008cc875d3668517deb2835120 100644 (file)
@@ -643,26 +643,17 @@ int NVMEDevice::aio_write(
   t->device = this;
   t->return_code = 0;
 
-  if (0 && buffered) {
-    Mutex::Locker l(queue_lock);
-    task_queue.push(t);
-    if (queue_empty.read()) {
-      queue_empty.dec();
-      queue_cond.Signal();
-    }
-    ioc->num_running.inc();
-    t->next = nullptr;
-  } else {
-    Task *first = static_cast<Task*>(ioc->nvme_task_first);
-    Task *last = static_cast<Task*>(ioc->nvme_task_last);
-    if (last)
-      last->next = t;
-    t->next = nullptr;
-    if (!first)
-      ioc->nvme_task_first = t;
-    ioc->nvme_task_last = t;
-    ioc->num_pending.inc();
+  if (buffered) {
   }
+  Task *first = static_cast<Task*>(ioc->nvme_task_first);
+  Task *last = static_cast<Task*>(ioc->nvme_task_last);
+  if (last)
+    last->next = t;
+  t->next = nullptr;
+  if (!first)
+    ioc->nvme_task_first = t;
+  ioc->nvme_task_last = t;
+  ioc->num_pending.inc();
 
   dout(5) << __func__ << " " << off << "~" << len << dendl;