]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: instrument readable latency too
authorSage Weil <sage.weil@dreamhost.com>
Thu, 23 Jun 2011 21:26:34 +0000 (14:26 -0700)
committerSage Weil <sage.weil@dreamhost.com>
Thu, 23 Jun 2011 21:57:26 +0000 (14:57 -0700)
Time before a write is readable (not necessarily on disk).  Note that if we
get the commit first (e.g. writeahead journal) this value isn't calculated
or logged.

Signed-off-by: Sage Weil <sage.weil@dreamhost.com>
src/osd/OSD.cc
src/osd/OSD.h
src/osd/ReplicatedPG.cc
src/osd/ReplicatedPG.h

index bb343b65d7acbe1c8f2c4ae196a8fd03bc4512fc..655017ff696b2d7cd18467b78d4f1c7586fee988 100644 (file)
@@ -655,10 +655,12 @@ void OSD::open_logger()
     osd_logtype.add_inc(l_osd_op_r_lat,  "op_r_lat");    // client read latency
     osd_logtype.add_inc(l_osd_op_w,      "op_w");        // client writes
     osd_logtype.add_inc(l_osd_op_w_inb,  "op_w_inb");    // client write in bytes
+    osd_logtype.add_inc(l_osd_op_w_rlat, "op_w_rlat");   // client write readable/applied latency
     osd_logtype.add_inc(l_osd_op_w_lat,  "op_w_lat");    // client write latency
     osd_logtype.add_inc(l_osd_op_rw,     "op_rw");       // client rmw
     osd_logtype.add_inc(l_osd_op_rw_inb, "op_rw_inb");   // client rmw in bytes
     osd_logtype.add_inc(l_osd_op_rw_outb,"op_rw_outb");  // client rmw out bytes
+    osd_logtype.add_inc(l_osd_op_rw_rlat,"op_rw_rlat");  // client rmw readable/applied latency
     osd_logtype.add_inc(l_osd_op_rw_lat, "op_rw_lat");   // client rmw latency
 
     osd_logtype.add_inc(l_osd_sop,       "sop");
index fac14cf500415e21c6a5824b28f008f6634db3d1..1c4fa10aa7fa57736c8641f6095c5f816a3adba2 100644 (file)
@@ -59,10 +59,12 @@ enum {
   l_osd_op_r_lat,
   l_osd_op_w,
   l_osd_op_w_inb,
+  l_osd_op_w_rlat,
   l_osd_op_w_lat,
   l_osd_op_rw,
   l_osd_op_rw_inb,
   l_osd_op_rw_outb,
+  l_osd_op_rw_rlat,
   l_osd_op_rw_lat,
 
   l_osd_sop,
index 7234b7b692044a795ed2c6971baa7e7977db39a8..b16956970c0ec4ab05bf3047d95e41073929da1f 100644 (file)
@@ -646,10 +646,18 @@ void ReplicatedPG::do_op(MOSDOp *op)
 
 void ReplicatedPG::log_op_stats(OpContext *ctx)
 {
+  MOSDOp *op = (MOSDOp*)ctx->op;
+
   utime_t now = ceph_clock_now(g_ceph_context);
   utime_t latency = now;
   latency -= ctx->op->get_recv_stamp();
 
+  utime_t rlatency;
+  if (ctx->readable_stamp != utime_t()) {
+    rlatency = ctx->readable_stamp;
+    rlatency -= ctx->op->get_recv_stamp();
+  }
+
   uint64_t inb = ctx->bytes_written;
   uint64_t outb = ctx->bytes_read;
 
@@ -659,12 +667,11 @@ void ReplicatedPG::log_op_stats(OpContext *ctx)
   osd->logger->inc(l_osd_op_inb, inb);
   osd->logger->favg(l_osd_op_lat, latency);
 
-  MOSDOp *op = (MOSDOp*)ctx->op;
-
   if (op->may_read() && op->may_write()) {
     osd->logger->inc(l_osd_op_rw);
     osd->logger->inc(l_osd_op_rw_inb, inb);
     osd->logger->inc(l_osd_op_rw_outb, outb);
+    osd->logger->favg(l_osd_op_rw_rlat, rlatency);
     osd->logger->favg(l_osd_op_rw_lat, latency);
   } else if (op->may_read()) {
     osd->logger->inc(l_osd_op_r);
@@ -678,11 +685,16 @@ void ReplicatedPG::log_op_stats(OpContext *ctx)
   } else if (op->may_write()) {
     osd->logger->inc(l_osd_op_w);
     osd->logger->inc(l_osd_op_w_inb, inb);
+    osd->logger->favg(l_osd_op_w_rlat, rlatency);
     osd->logger->favg(l_osd_op_w_lat, latency);
   } else
     assert(0);
 
-  dout(15) << "log_op_stats " << *op << " inb " << inb << " outb " << outb << " latency " << latency << dendl;
+  dout(15) << "log_op_stats " << *op
+          << " inb " << inb
+          << " outb " << outb
+          << " rlat " << rlatency
+          << " lat " << latency << dendl;
 }
 
 void ReplicatedPG::log_subop_stats(MOSDSubOp *op, int tag_inb, int tag_lat)
@@ -2696,6 +2708,13 @@ void ReplicatedPG::eval_repop(RepGather *repop)
        osd->cluster_messenger->send_message(reply, op->get_connection());
        repop->sent_ack = true;
       }
+
+      // note the write is now readable (for rlatency calc).  note
+      // that this will only be defined if the write is readable
+      // _prior_ to being committed; it will not get set with
+      // writeahead journaling, for instance.
+      if (repop->ctx->readable_stamp == utime_t())
+       repop->ctx->readable_stamp = ceph_clock_now(g_ceph_context);
     }
   }
 
index baed83ca110abd0bb84edd0a4707be5427bb7b83..c2aeb937eda1548ba7b00784b5cc04887c4a3f35 100644 (file)
@@ -368,6 +368,7 @@ public:
 
     MOSDOpReply *reply;
 
+    utime_t readable_stamp;  // when applied on all replicas
     ReplicatedPG *pg;
 
     OpContext(const OpContext& other);