From: Casey Bodley Date: Fri, 28 Aug 2015 14:32:40 +0000 (-0400) Subject: blkin: add traces to FileStore/Journal X-Git-Tag: v12.0.3~73^2~18 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=e0a5b51fad27d15f77dd9694f59ec39bfe4a2833;p=ceph.git blkin: add traces to FileStore/Journal Signed-off-by: Casey Bodley --- diff --git a/src/common/TrackedOp.h b/src/common/TrackedOp.h index da573b1b9d0..899e0b6fec9 100644 --- a/src/common/TrackedOp.h +++ b/src/common/TrackedOp.h @@ -225,6 +225,8 @@ protected: public: ZTracer::Trace osd_trace; ZTracer::Trace pg_trace; + ZTracer::Trace store_trace; + ZTracer::Trace journal_trace; virtual ~TrackedOp() {} diff --git a/src/os/filestore/FileJournal.cc b/src/os/filestore/FileJournal.cc index 03992e64792..bf13c44cadd 100644 --- a/src/os/filestore/FileJournal.cc +++ b/src/os/filestore/FileJournal.cc @@ -931,8 +931,11 @@ void FileJournal::queue_completions_thru(uint64_t seq) } if (next.finish) finisher->queue(next.finish); - if (next.tracked_op) + if (next.tracked_op) { next.tracked_op->mark_event("journaled_completion_queued"); + next.tracked_op->journal_trace.event("queued completion"); + next.tracked_op->journal_trace.keyval("completed through", seq); + } items.erase(it++); } batch_unpop_completions(items); @@ -977,8 +980,10 @@ int FileJournal::prepare_single_write(write_item &next_write, bufferlist& bl, of footerptr.copy_in(post_offset + magic2_offset, sizeof(uint64_t), (char *)&magic2); bl.claim_append(ebl); - if (next_write.tracked_op) + if (next_write.tracked_op) { next_write.tracked_op->mark_event("write_thread_in_journal_buffer"); + next_write.tracked_op->journal_trace.event("prepare_single_write"); + } journalq.push_back(pair(seq, queue_pos)); writing_seq = seq; @@ -1616,6 +1621,12 @@ void FileJournal::submit_entry(uint64_t seq, bufferlist& e, uint32_t orig_len, logger->inc(l_filestore_journal_bytes, e.length()); } + if (osd_op) { + osd_op->mark_event("commit_queued_for_journal_write"); + osd_op->journal_trace.init("journal", &trace_endpoint, &osd_op->store_trace); + osd_op->journal_trace.event("submit_entry"); + osd_op->journal_trace.keyval("seq", seq); + } { Mutex::Locker l1(writeq_lock); #ifdef HAVE_LIBAIO @@ -1635,6 +1646,8 @@ void FileJournal::submit_entry(uint64_t seq, bufferlist& e, uint32_t orig_len, if (writeq.empty()) writeq_cond.Signal(); writeq.push_back(write_item(seq, e, orig_len, osd_op)); + if (osd_op) + osd_op->journal_trace.keyval("queue depth", writeq.size()); } } diff --git a/src/os/filestore/FileJournal.h b/src/os/filestore/FileJournal.h index 1cde1876272..ff65cdb33b7 100644 --- a/src/os/filestore/FileJournal.h +++ b/src/os/filestore/FileJournal.h @@ -25,7 +25,7 @@ using std::deque; #include "common/Thread.h" #include "common/Throttle.h" #include "JournalThrottle.h" - +#include "common/zipkin_trace.h" #ifdef HAVE_LIBAIO # include @@ -46,8 +46,7 @@ public: Context *finish; utime_t start; TrackedOpRef tracked_op; - completion_item(uint64_t o, Context *c, utime_t s, - TrackedOpRef opref) + completion_item(uint64_t o, Context *c, utime_t s, TrackedOpRef opref) : seq(o), finish(c), start(s), tracked_op(opref) {} completion_item() : seq(0), finish(0), start(0) {} }; @@ -56,6 +55,7 @@ public: bufferlist bl; uint32_t orig_len; TrackedOpRef tracked_op; + ZTracer::Trace trace; write_item(uint64_t s, bufferlist& b, int ol, TrackedOpRef opref) : seq(s), orig_len(ol), tracked_op(opref) { bl.claim(b, buffer::list::CLAIM_ALLOW_NONSHAREABLE); // potential zero-copy @@ -386,6 +386,8 @@ private: return ROUND_UP_TO(sizeof(header), block_size); } + ZTracer::Endpoint trace_endpoint; + public: FileJournal(CephContext* cct, uuid_d fsid, Finisher *fin, Cond *sync_cond, const char *f, bool dio=false, bool ai=true, bool faio=false) : @@ -420,7 +422,8 @@ private: write_stop(true), aio_stop(true), write_thread(this), - write_finish_thread(this) { + write_finish_thread(this), + trace_endpoint("0.0.0.0", 0, "FileJournal") { if (aio && !directio) { lderr(cct) << "FileJournal::_open_any: aio not supported without directio; disabling aio" << dendl; diff --git a/src/os/filestore/FileStore.cc b/src/os/filestore/FileStore.cc index 6201f50d118..d262725f65b 100644 --- a/src/os/filestore/FileStore.cc +++ b/src/os/filestore/FileStore.cc @@ -547,6 +547,7 @@ FileStore::FileStore(CephContext* cct, const std::string &base, op_wq(this, cct->_conf->filestore_op_thread_timeout, cct->_conf->filestore_op_thread_suicide_timeout, &op_tp), logger(NULL), + trace_endpoint("0.0.0.0", 0, "FileStore"), read_error_lock("FileStore::read_error_lock"), m_filestore_commit_timeout(cct->_conf->filestore_commit_timeout), m_filestore_journal_parallel(cct->_conf->filestore_journal_parallel ), @@ -1963,6 +1964,7 @@ void FileStore::queue_op(OpSequencer *osr, Op *o) // sequencer, the op order will be preserved. osr->queue(o); + o->trace.event("queued"); logger->inc(l_filestore_ops); logger->inc(l_filestore_bytes, o->bytes); @@ -2008,9 +2010,12 @@ void FileStore::_do_op(OpSequencer *osr, ThreadPool::TPHandle &handle) osr->apply_lock.Lock(); Op *o = osr->peek_queue(); + o->trace.event("op_apply_start"); apply_manager.op_apply_start(o->op); dout(5) << "_do_op " << o << " seq " << o->op << " " << *osr << "/" << osr->parent << " start" << dendl; + o->trace.event("_do_transactions start"); int r = _do_transactions(o->tls, o->op, &handle); + o->trace.event("op_apply_finish"); apply_manager.op_apply_finish(o->op); dout(10) << "_do_op " << o << " seq " << o->op << " r = " << r << ", finisher " << o->onreadable << " " << o->onreadable_sync << dendl; @@ -2029,6 +2034,7 @@ void FileStore::_finish_op(OpSequencer *osr) dout(10) << "_finish_op " << o << " seq " << o->op << " " << *osr << "/" << osr->parent << " lat " << lat << dendl; osr->apply_lock.Unlock(); // locked in _do_op + o->trace.event("_finish_op"); // called with tp lock held op_queue_release_throttle(o); @@ -2100,6 +2106,12 @@ int FileStore::queue_transactions(Sequencer *posr, vector& tls, (*i).set_osr(osr); } + ZTracer::Trace trace; + if (osd_op && osd_op->pg_trace) { + osd_op->store_trace.init("filestore op", &trace_endpoint, &osd_op->pg_trace); + trace = osd_op->store_trace; + } + if (journal && journal->is_writeable() && !m_filestore_journal_trailing) { Op *o = build_op(tls, onreadable, onreadable_sync, osd_op); @@ -2118,6 +2130,7 @@ int FileStore::queue_transactions(Sequencer *posr, vector& tls, uint64_t op_num = submit_manager.op_submit_start(); o->op = op_num; + trace.keyval("opnum", op_num); if (m_filestore_do_dump) dump_transactions(o->tls, o->op, osr); @@ -2125,15 +2138,20 @@ int FileStore::queue_transactions(Sequencer *posr, vector& tls, if (m_filestore_journal_parallel) { dout(5) << "queue_transactions (parallel) " << o->op << " " << o->tls << dendl; + trace.keyval("journal mode", "parallel"); + trace.event("journal started"); _op_journal_transactions(tbl, orig_len, o->op, ondisk, osd_op); // queue inside submit_manager op submission lock queue_op(osr, o); + trace.event("op queued"); } else if (m_filestore_journal_writeahead) { dout(5) << "queue_transactions (writeahead) " << o->op << " " << o->tls << dendl; osr->queue_journal(o->op); + trace.keyval("journal mode", "writeahead"); + trace.event("journal started"); _op_journal_transactions(tbl, orig_len, o->op, new C_JournaledAhead(this, osr, o, ondisk), osd_op); @@ -2165,6 +2183,9 @@ int FileStore::queue_transactions(Sequencer *posr, vector& tls, dump_transactions(o->tls, o->op, osr); queue_op(osr, o); + trace.keyval("opnum", op_num); + trace.keyval("journal mode", "none"); + trace.event("op queued"); if (ondisk) apply_manager.add_waiter(op_num, ondisk); @@ -2187,10 +2208,15 @@ int FileStore::queue_transactions(Sequencer *posr, vector& tls, if (m_filestore_do_dump) dump_transactions(tls, op, osr); + trace.event("op_apply_start"); + trace.keyval("opnum", op); + trace.keyval("journal mode", "trailing"); apply_manager.op_apply_start(op); + trace.event("do_transactions"); int r = do_transactions(tls, op); if (r >= 0) { + trace.event("journal started"); _op_journal_transactions(tbl, orig_len, op, ondisk, osd_op); } else { delete ondisk; @@ -2204,6 +2230,7 @@ int FileStore::queue_transactions(Sequencer *posr, vector& tls, apply_finishers[osr->id % m_apply_finisher_num]->queue(onreadable, r); submit_manager.op_submit_finish(op); + trace.event("op_apply_finish"); apply_manager.op_apply_finish(op); utime_t end = ceph_clock_now(); @@ -2215,6 +2242,8 @@ void FileStore::_journaled_ahead(OpSequencer *osr, Op *o, Context *ondisk) { dout(5) << "_journaled_ahead " << o << " seq " << o->op << " " << *osr << " " << o->tls << dendl; + o->trace.event("writeahead journal finished"); + // this should queue in order because the journal does it's completions in order. queue_op(osr, o); diff --git a/src/os/filestore/FileStore.h b/src/os/filestore/FileStore.h index bbae4bc3349..f80b807bd78 100644 --- a/src/os/filestore/FileStore.h +++ b/src/os/filestore/FileStore.h @@ -34,6 +34,7 @@ using namespace std; #include "common/Timer.h" #include "common/WorkQueue.h" #include "common/perf_counters.h" +#include "common/zipkin_trace.h" #include "common/Mutex.h" #include "HashIndex.h" @@ -216,6 +217,7 @@ private: Context *onreadable, *onreadable_sync; uint64_t ops, bytes; TrackedOpRef osd_op; + ZTracer::Trace trace; }; class OpSequencer : public Sequencer_impl { Mutex qlock; // to protect q, for benefit of flush (peek/dequeue also protected by lock) @@ -290,6 +292,7 @@ private: void queue(Op *o) { Mutex::Locker l(qlock); q.push_back(o); + o->trace.keyval("queue depth", q.size()); } Op *peek_queue() { Mutex::Locker l(qlock); @@ -419,6 +422,8 @@ private: PerfCounters *logger; + ZTracer::Endpoint trace_endpoint; + public: int lfn_find(const ghobject_t& oid, const Index& index, IndexedPath *path = NULL); diff --git a/src/os/filestore/Journal.h b/src/os/filestore/Journal.h index 7bc09ebfb1a..9c1206cd257 100644 --- a/src/os/filestore/Journal.h +++ b/src/os/filestore/Journal.h @@ -23,6 +23,7 @@ #include "common/Finisher.h" #include "common/TrackedOp.h" #include "os/ObjectStore.h" +#include "common/zipkin_trace.h" class PerfCounters;