From 7d4af0531dc4260f830b10595d10ba823f60f9a3 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 15 Feb 2017 18:17:33 -0500 Subject: [PATCH] blkin: add traces to ECBackend Signed-off-by: Casey Bodley --- src/osd/ECBackend.cc | 87 +++++++++++++++++++++++++++++++++----------- src/osd/ECBackend.h | 22 ++++++++--- 2 files changed, 83 insertions(+), 26 deletions(-) diff --git a/src/osd/ECBackend.cc b/src/osd/ECBackend.cc index 84e0838f863..fe68a6305c4 100644 --- a/src/osd/ECBackend.cc +++ b/src/osd/ECBackend.cc @@ -737,13 +737,13 @@ bool ECBackend::handle_message( // not conflict with ECSubWrite's operator<<. MOSDECSubOpWrite *op = static_cast( _op->get_nonconst_req()); - handle_sub_write(op->op.from, _op, op->op); + handle_sub_write(op->op.from, _op, op->op, _op->pg_trace); return true; } case MSG_OSD_EC_WRITE_REPLY: { const MOSDECSubOpWriteReply *op = static_cast( _op->get_req()); - handle_sub_write_reply(op->op.from, op->op); + handle_sub_write_reply(op->op.from, op->op, _op->pg_trace); return true; } case MSG_OSD_EC_READ: { @@ -752,7 +752,8 @@ bool ECBackend::handle_message( reply->pgid = get_parent()->primary_spg_t(); reply->map_epoch = get_parent()->get_epoch(); reply->min_epoch = get_parent()->get_interval_start_epoch(); - handle_sub_read(op->op.from, op->op, &(reply->op)); + handle_sub_read(op->op.from, op->op, &(reply->op), _op->pg_trace); + reply->trace = _op->pg_trace; get_parent()->send_message_osd_cluster( op->op.from.osd, reply, get_parent()->get_epoch()); return true; @@ -763,7 +764,7 @@ bool ECBackend::handle_message( MOSDECSubOpReadReply *op = static_cast( _op->get_nonconst_req()); RecoveryMessages rm; - handle_sub_read_reply(op->op.from, op->op, &rm); + handle_sub_read_reply(op->op.from, op->op, &rm, _op->pg_trace); dispatch_recovery_messages(rm, priority); return true; } @@ -802,22 +803,25 @@ struct SubWriteCommitted : public Context { ceph_tid_t tid; eversion_t version; eversion_t last_complete; + const ZTracer::Trace trace; SubWriteCommitted( ECBackend *pg, OpRequestRef msg, ceph_tid_t tid, eversion_t version, - eversion_t last_complete) + eversion_t last_complete, + const ZTracer::Trace &trace) : pg(pg), msg(msg), tid(tid), - version(version), last_complete(last_complete) {} + version(version), last_complete(last_complete), trace(trace) {} void finish(int) override { if (msg) msg->mark_event("sub_op_committed"); - pg->sub_write_committed(tid, version, last_complete); + pg->sub_write_committed(tid, version, last_complete, trace); } }; void ECBackend::sub_write_committed( - ceph_tid_t tid, eversion_t version, eversion_t last_complete) { + ceph_tid_t tid, eversion_t version, eversion_t last_complete, + const ZTracer::Trace &trace) { if (get_parent()->pgb_is_primary()) { ECSubWriteReply reply; reply.tid = tid; @@ -826,7 +830,7 @@ void ECBackend::sub_write_committed( reply.from = get_parent()->whoami_shard(); handle_sub_write_reply( get_parent()->whoami_shard(), - reply); + reply, trace); } else { get_parent()->update_last_complete_ondisk(last_complete); MOSDECSubOpWriteReply *r = new MOSDECSubOpWriteReply; @@ -838,6 +842,8 @@ void ECBackend::sub_write_committed( r->op.committed = true; r->op.from = get_parent()->whoami_shard(); r->set_priority(CEPH_MSG_PRIO_HIGH); + r->trace = trace; + r->trace.event("sending sub op commit"); get_parent()->send_message_osd_cluster( get_parent()->primary_shard().osd, r, get_parent()->get_epoch()); } @@ -848,20 +854,23 @@ struct SubWriteApplied : public Context { OpRequestRef msg; ceph_tid_t tid; eversion_t version; + const ZTracer::Trace trace; SubWriteApplied( ECBackend *pg, OpRequestRef msg, ceph_tid_t tid, - eversion_t version) - : pg(pg), msg(msg), tid(tid), version(version) {} + eversion_t version, + const ZTracer::Trace &trace) + : pg(pg), msg(msg), tid(tid), version(version), trace(trace) {} void finish(int) override { if (msg) msg->mark_event("sub_op_applied"); - pg->sub_write_applied(tid, version); + pg->sub_write_applied(tid, version, trace); } }; void ECBackend::sub_write_applied( - ceph_tid_t tid, eversion_t version) { + ceph_tid_t tid, eversion_t version, + const ZTracer::Trace &trace) { parent->op_applied(version); if (get_parent()->pgb_is_primary()) { ECSubWriteReply reply; @@ -870,7 +879,7 @@ void ECBackend::sub_write_applied( reply.applied = true; handle_sub_write_reply( get_parent()->whoami_shard(), - reply); + reply, trace); } else { MOSDECSubOpWriteReply *r = new MOSDECSubOpWriteReply; r->pgid = get_parent()->primary_spg_t(); @@ -880,6 +889,8 @@ void ECBackend::sub_write_applied( r->op.tid = tid; r->op.applied = true; r->set_priority(CEPH_MSG_PRIO_HIGH); + r->trace = trace; + r->trace.event("sending sub op apply"); get_parent()->send_message_osd_cluster( get_parent()->primary_shard().osd, r, get_parent()->get_epoch()); } @@ -889,10 +900,12 @@ void ECBackend::handle_sub_write( pg_shard_t from, OpRequestRef msg, ECSubWrite &op, + const ZTracer::Trace &trace, Context *on_local_applied_sync) { if (msg) msg->mark_started(); + trace.event("handle_sub_write"); assert(!get_parent()->get_log().get_missing().is_missing(op.soid)); if (!get_parent()->pgb_is_primary()) get_parent()->update_stats(op.stats); @@ -937,10 +950,10 @@ void ECBackend::handle_sub_write( new SubWriteCommitted( this, msg, op.tid, op.at_version, - get_parent()->get_info().last_complete))); + get_parent()->get_info().last_complete, trace))); localt.register_on_applied( get_parent()->bless_context( - new SubWriteApplied(this, msg, op.tid, op.at_version))); + new SubWriteApplied(this, msg, op.tid, op.at_version, trace))); vector tls; tls.reserve(2); tls.push_back(std::move(op.t)); @@ -951,8 +964,10 @@ void ECBackend::handle_sub_write( void ECBackend::handle_sub_read( pg_shard_t from, const ECSubRead &op, - ECSubReadReply *reply) + ECSubReadReply *reply, + const ZTracer::Trace &trace) { + trace.event("handle sub read"); shard_id_t shard = get_parent()->whoami_shard().shard; for(auto i = op.to_read.begin(); i != op.to_read.end(); @@ -1046,11 +1061,13 @@ error: void ECBackend::handle_sub_write_reply( pg_shard_t from, - const ECSubWriteReply &op) + const ECSubWriteReply &op, + const ZTracer::Trace &trace) { map::iterator i = tid_to_op_map.find(op.tid); assert(i != tid_to_op_map.end()); if (op.committed) { + trace.event("sub write committed"); assert(i->second.pending_commit.count(from)); i->second.pending_commit.erase(from); if (from != get_parent()->whoami_shard()) { @@ -1058,6 +1075,7 @@ void ECBackend::handle_sub_write_reply( } } if (op.applied) { + trace.event("sub write applied"); assert(i->second.pending_apply.count(from)); i->second.pending_apply.erase(from); } @@ -1066,11 +1084,13 @@ void ECBackend::handle_sub_write_reply( dout(10) << __func__ << " Calling on_all_applied on " << i->second << dendl; i->second.on_all_applied->complete(0); i->second.on_all_applied = 0; + i->second.trace.event("ec write all applied"); } if (i->second.pending_commit.empty() && i->second.on_all_commit) { dout(10) << __func__ << " Calling on_all_commit on " << i->second << dendl; i->second.on_all_commit->complete(0); i->second.on_all_commit = 0; + i->second.trace.event("ec write all committed"); } check_ops(); } @@ -1078,8 +1098,10 @@ void ECBackend::handle_sub_write_reply( void ECBackend::handle_sub_read_reply( pg_shard_t from, ECSubReadReply &op, - RecoveryMessages *m) + RecoveryMessages *m, + const ZTracer::Trace &trace) { + trace.event("ec sub read reply"); dout(10) << __func__ << ": reply " << op << dendl; map::iterator iter = tid_to_read_map.find(op.tid); if (iter == tid_to_read_map.end()) { @@ -1209,6 +1231,7 @@ void ECBackend::handle_sub_read_reply( } if (rop.in_progress.empty() || is_complete == rop.complete.size()) { dout(20) << __func__ << " Complete: " << rop << dendl; + rop.trace.event("ec read complete"); complete_read_op(rop, m); } else { dout(10) << __func__ << " readop not complete: " << rop << dendl; @@ -1428,6 +1451,8 @@ void ECBackend::submit_transaction( op->tid = tid; op->reqid = reqid; op->client_op = client_op; + if (client_op) + op->trace = client_op->pg_trace; dout(10) << __func__ << ": op " << *op << " starting" << dendl; start_rmw(op, std::move(t)); @@ -1584,8 +1609,11 @@ void ECBackend::start_read_op( _op, std::move(to_read))).first->second; dout(10) << __func__ << ": starting " << op << dendl; - do_read_op( - op); + if (_op) { + op.trace = _op->pg_trace; + op.trace.event("start ec read"); + } + do_read_op(op); } void ECBackend::do_read_op(ReadOp &op) @@ -1645,6 +1673,11 @@ void ECBackend::do_read_op(ReadOp &op) msg->op = i->second; msg->op.from = get_parent()->whoami_shard(); msg->op.tid = tid; + if (op.trace) { + // initialize a child span for this shard + msg->trace.init("ec sub read", nullptr, &op.trace); + msg->trace.keyval("shard", i->first.shard.id); + } get_parent()->send_message_osd_cluster( i->first.osd, msg, @@ -1844,6 +1877,8 @@ bool ECBackend::try_reads_to_commit() trans[i->shard]; } + op->trace.event("start ec write"); + map written; if (op->plan.t) { ECTransaction::generate_transactions( @@ -1925,6 +1960,14 @@ bool ECBackend::try_reads_to_commit() op->temp_added, op->temp_cleared, !should_send); + + ZTracer::Trace trace; + if (op->trace) { + // initialize a child span for this shard + trace.init("ec sub write", nullptr, &op->trace); + trace.keyval("shard", i->shard.id); + } + if (*i == get_parent()->whoami_shard()) { should_write_local = true; local_write_op.claim(sop); @@ -1933,6 +1976,7 @@ bool ECBackend::try_reads_to_commit() r->pgid = spg_t(get_parent()->primary_spg_t().pgid, i->shard); r->map_epoch = get_parent()->get_epoch(); r->min_epoch = get_parent()->get_interval_start_epoch(); + r->trace = trace; get_parent()->send_message_osd_cluster( i->osd, r, get_parent()->get_epoch()); } @@ -1942,6 +1986,7 @@ bool ECBackend::try_reads_to_commit() get_parent()->whoami_shard(), op->client_op, local_write_op, + op->trace, op->on_local_applied_sync); op->on_local_applied_sync = 0; } diff --git a/src/osd/ECBackend.h b/src/osd/ECBackend.h index 96913644fe5..f659675d9ce 100644 --- a/src/osd/ECBackend.h +++ b/src/osd/ECBackend.h @@ -58,28 +58,37 @@ public: friend struct SubWriteApplied; friend struct SubWriteCommitted; void sub_write_applied( - ceph_tid_t tid, eversion_t version); + ceph_tid_t tid, + eversion_t version, + const ZTracer::Trace &trace); void sub_write_committed( - ceph_tid_t tid, eversion_t version, eversion_t last_complete); + ceph_tid_t tid, + eversion_t version, + eversion_t last_complete, + const ZTracer::Trace &trace); void handle_sub_write( pg_shard_t from, OpRequestRef msg, ECSubWrite &op, + const ZTracer::Trace &trace, Context *on_local_applied_sync = 0 ); void handle_sub_read( pg_shard_t from, const ECSubRead &op, - ECSubReadReply *reply + ECSubReadReply *reply, + const ZTracer::Trace &trace ); void handle_sub_write_reply( pg_shard_t from, - const ECSubWriteReply &op + const ECSubWriteReply &op, + const ZTracer::Trace &trace ); void handle_sub_read_reply( pg_shard_t from, ECSubReadReply &op, - RecoveryMessages *m + RecoveryMessages *m, + const ZTracer::Trace &trace ); /// @see ReadOp below @@ -370,6 +379,8 @@ public: // of the available shards. bool for_recovery; + ZTracer::Trace trace; + map to_read; map complete; @@ -447,6 +458,7 @@ public: vector log_entries; ceph_tid_t tid; osd_reqid_t reqid; + ZTracer::Trace trace; eversion_t roll_forward_to; /// Soon to be generated internally -- 2.39.5