From ee4d2170f404b8032e1cb0b7270c25a27b107157 Mon Sep 17 00:00:00 2001 From: Adam Crume Date: Thu, 31 Jul 2014 16:22:44 -0700 Subject: [PATCH] rbd-replay: Support replaying partial traces Tracing may start after the application is started, and image open calls may missed. To support replaying these traces, additional information is traced, allowing missing open calls to be generated. Signed-off-by: Adam Crume --- src/librbd/internal.cc | 22 +++--- src/rbd_replay/prep-for-replay.py | 121 ++++++++++++++++++------------ src/tracing/librbd.tp | 72 ++++++++++++------ 3 files changed, 133 insertions(+), 82 deletions(-) diff --git a/src/librbd/internal.cc b/src/librbd/internal.cc index fe50ddf72d963..abbb9979d18d1 100644 --- a/src/librbd/internal.cc +++ b/src/librbd/internal.cc @@ -1527,7 +1527,7 @@ reprotect_and_return_err: int resize(ImageCtx *ictx, uint64_t size, ProgressContext& prog_ctx) { - tracepoint(librbd, resize_enter, ictx, ictx->name.c_str(), ictx->id.c_str(), size); + tracepoint(librbd, resize_enter, ictx, ictx->name.c_str(), ictx->snap_name.c_str(), ictx->read_only, size); CephContext *cct = ictx->cct; ldout(cct, 20) << "resize " << ictx << " " << ictx->size << " -> " << size << dendl; @@ -2031,7 +2031,7 @@ reprotect_and_return_err: int copy(ImageCtx *src, ImageCtx *dest, ProgressContext &prog_ctx) { - tracepoint(librbd, copy_enter, src, src->name.c_str(), src->id.c_str(), dest, dest->name.c_str(), dest->id.c_str()); + tracepoint(librbd, copy_enter, src, src->name.c_str(), src->snap_name.c_str(), src->read_only, dest, dest->name.c_str(), dest->snap_name.c_str(), dest->read_only); src->md_lock.get_read(); src->snap_lock.get_read(); uint64_t src_size = src->get_image_size(src->snap_id); @@ -2674,7 +2674,7 @@ reprotect_and_return_err: ssize_t read(ImageCtx *ictx, const vector >& image_extents, char *buf, bufferlist *pbl) { - tracepoint(librbd, read_enter, ictx, ictx->name.c_str(), ictx->id.c_str()); + tracepoint(librbd, read_enter, ictx, ictx->name.c_str(), ictx->snap_name.c_str(), ictx->read_only); for (vector >::const_iterator r = image_extents.begin(); r != image_extents.end(); ++r) { @@ -2706,7 +2706,7 @@ reprotect_and_return_err: ssize_t write(ImageCtx *ictx, uint64_t off, size_t len, const char *buf) { - tracepoint(librbd, write_enter, ictx, ictx->name.c_str(), ictx->id.c_str(), off, len, buf); + tracepoint(librbd, write_enter, ictx, ictx->name.c_str(), ictx->snap_name.c_str(), ictx->read_only, off, len, buf); utime_t start_time, elapsed; ldout(ictx->cct, 20) << "write " << ictx << " off = " << off << " len = " << len << dendl; @@ -2754,7 +2754,7 @@ reprotect_and_return_err: int discard(ImageCtx *ictx, uint64_t off, uint64_t len) { - tracepoint(librbd, discard_enter, ictx, ictx->name.c_str(), ictx->id.c_str(), off, len); + tracepoint(librbd, discard_enter, ictx, ictx->name.c_str(), ictx->snap_name.c_str(), ictx->read_only, off, len); utime_t start_time, elapsed; ldout(ictx->cct, 20) << "discard " << ictx << " off = " << off << " len = " << len << dendl; @@ -2896,7 +2896,7 @@ reprotect_and_return_err: int aio_flush(ImageCtx *ictx, AioCompletion *c) { - tracepoint(librbd, aio_flush_enter, ictx, ictx->name.c_str(), ictx->id.c_str(), c); + tracepoint(librbd, aio_flush_enter, ictx, ictx->name.c_str(), ictx->snap_name.c_str(), ictx->read_only, c); CephContext *cct = ictx->cct; ldout(cct, 20) << "aio_flush " << ictx << " completion " << c << dendl; @@ -2930,7 +2930,7 @@ reprotect_and_return_err: int flush(ImageCtx *ictx) { - tracepoint(librbd, flush_enter, ictx, ictx->name.c_str(), ictx->id.c_str()); + tracepoint(librbd, flush_enter, ictx, ictx->name.c_str(), ictx->snap_name.c_str(), ictx->read_only); CephContext *cct = ictx->cct; ldout(cct, 20) << "flush " << ictx << dendl; @@ -2966,7 +2966,7 @@ reprotect_and_return_err: int invalidate_cache(ImageCtx *ictx) { - tracepoint(librbd, invalidate_cache_enter, ictx, ictx->name.c_str(), ictx->id.c_str()); + tracepoint(librbd, invalidate_cache_enter, ictx, ictx->name.c_str(), ictx->snap_name.c_str(), ictx->read_only); CephContext *cct = ictx->cct; ldout(cct, 20) << "invalidate_cache " << ictx << dendl; @@ -2985,7 +2985,7 @@ reprotect_and_return_err: int aio_write(ImageCtx *ictx, uint64_t off, size_t len, const char *buf, AioCompletion *c) { - tracepoint(librbd, aio_write_enter, ictx, ictx->name.c_str(), ictx->id.c_str(), off, len, buf, c); + tracepoint(librbd, aio_write_enter, ictx, ictx->name.c_str(), ictx->snap_name.c_str(), ictx->read_only, off, len, buf, c); CephContext *cct = ictx->cct; ldout(cct, 20) << "aio_write " << ictx << " off = " << off << " len = " << len << " buf = " << (void*)buf << dendl; @@ -3074,7 +3074,7 @@ reprotect_and_return_err: int aio_discard(ImageCtx *ictx, uint64_t off, uint64_t len, AioCompletion *c) { - tracepoint(librbd, aio_discard_enter, ictx, ictx->name.c_str(), ictx->id.c_str(), off, len, c); + tracepoint(librbd, aio_discard_enter, ictx, ictx->name.c_str(), ictx->snap_name.c_str(), ictx->read_only, off, len, c); CephContext *cct = ictx->cct; ldout(cct, 20) << "aio_discard " << ictx << " off = " << off << " len = " << len << dendl; @@ -3185,7 +3185,7 @@ reprotect_and_return_err: int aio_read(ImageCtx *ictx, const vector >& image_extents, char *buf, bufferlist *pbl, AioCompletion *c) { - tracepoint(librbd, aio_read_enter, ictx, ictx->name.c_str(), ictx->id.c_str(), buf, c); + tracepoint(librbd, aio_read_enter, ictx, ictx->name.c_str(), ictx->snap_name.c_str(), ictx->read_only, buf, c); for (vector >::const_iterator r = image_extents.begin(); r != image_extents.end(); ++r) { diff --git a/src/rbd_replay/prep-for-replay.py b/src/rbd_replay/prep-for-replay.py index e9a06500e76fb..a6300750ffda2 100755 --- a/src/rbd_replay/prep-for-replay.py +++ b/src/rbd_replay/prep-for-replay.py @@ -282,6 +282,9 @@ class Processor(object): self.threads = {} self.ioCount = 0 self.recentCompletions = [] + self.openImages = {} + self.threads = {} + self.ios = [] def nextID(self): val = self.ioCount self.ioCount = self.ioCount + 2 @@ -289,6 +292,22 @@ class Processor(object): def completed(self, io): self.recentCompletions.append(io) self.recentCompletions[:] = [x for x in self.recentCompletions if x.start_time > io.start_time - self.window] + def requireImage(self, ts, thread, imagectx, name, snap_name, readonly): + if imagectx in self.openImages: + return + ionum = self.nextID() + thread.pendingIO = OpenImageIO(ionum, ts, thread, thread.pendingIO, imagectx, name, snap_name, readonly) + thread.pendingIO.addThreadCompletionDependencies(self.threads, self.recentCompletions) + thread.issuedIO(thread.pendingIO) + self.ios.append(thread.pendingIO) + thread.pendingIO.end_time = ts + completionIO = CompletionIO(ts, thread, thread.pendingIO) + thread.completedIO(completionIO) + self.ios.append(completionIO) + self.completed(completionIO) + self.openImages[thread.pendingIO.imagectx] = thread.pendingIO.imagectx + if self.printOnRead: + print str(thread.pendingIO) def run(self, raw_args): parser = argparse.ArgumentParser(description='convert librbd trace output to an rbd-replay input file.') parser.add_argument('--print-on-read', action="store_true", help='print events as they are read in (for debugging)') @@ -300,12 +319,10 @@ class Processor(object): self.window = 1e9 * args.window inputFileName = args.input outputFileName = args.output - ios = [] pendingIOs = {} limit = 100000000000 - printOnRead = args.print_on_read + self.printOnRead = args.print_on_read printOnWrite = args.print_on_write - threads = {} traces = TraceCollection() traces.add_trace(inputFileName, "ctf") @@ -321,26 +338,28 @@ class Processor(object): trace_start = ts ts = ts - trace_start threadID = event["pthread_id"] - if threadID in threads: - thread = threads[threadID] + if threadID in self.threads: + thread = self.threads[threadID] else: - thread = Thread(threadID, threads, self.window) - threads[threadID] = thread + thread = Thread(threadID, self.threads, self.window) + self.threads[threadID] = thread ionum = self.nextID() io = StartThreadIO(ionum, ts, thread) - ios.append(io) - if printOnRead: + self.ios.append(io) + if self.printOnRead: print str(io) thread.insertTS(ts) if event.name == "librbd:read_enter": name = event["name"] - readid = event["id"] + snap_name = event["snap_name"] + readonly = event["read_only"] imagectx = event["imagectx"] + self.requireImage(ts, thread, imagectx, name, snap_name, readonly) ionum = self.nextID() thread.pendingIO = ReadIO(ionum, ts, thread, thread.pendingIO, imagectx, []) - thread.pendingIO.addThreadCompletionDependencies(threads, self.recentCompletions) + thread.pendingIO.addThreadCompletionDependencies(self.threads, self.recentCompletions) thread.issuedIO(thread.pendingIO) - ios.append(thread.pendingIO) + self.ios.append(thread.pendingIO) elif event.name == "librbd:open_image_enter": imagectx = event["imagectx"] name = event["name"] @@ -349,31 +368,33 @@ class Processor(object): readonly = event["read_only"] ionum = self.nextID() thread.pendingIO = OpenImageIO(ionum, ts, thread, thread.pendingIO, imagectx, name, snap_name, readonly) - thread.pendingIO.addThreadCompletionDependencies(threads, self.recentCompletions) + thread.pendingIO.addThreadCompletionDependencies(self.threads, self.recentCompletions) thread.issuedIO(thread.pendingIO) - ios.append(thread.pendingIO) + self.ios.append(thread.pendingIO) elif event.name == "librbd:open_image_exit": thread.pendingIO.end_time = ts completionIO = CompletionIO(ts, thread, thread.pendingIO) thread.completedIO(completionIO) - ios.append(completionIO) + self.ios.append(completionIO) self.completed(completionIO) - if printOnRead: + self.openImages[thread.pendingIO.imagectx] = thread.pendingIO.imagectx + if self.printOnRead: print str(thread.pendingIO) elif event.name == "librbd:close_image_enter": imagectx = event["imagectx"] ionum = self.nextID() thread.pendingIO = CloseImageIO(ionum, ts, thread, thread.pendingIO, imagectx) - thread.pendingIO.addThreadCompletionDependencies(threads, self.recentCompletions) + thread.pendingIO.addThreadCompletionDependencies(self.threads, self.recentCompletions) thread.issuedIO(thread.pendingIO) - ios.append(thread.pendingIO) + self.ios.append(thread.pendingIO) elif event.name == "librbd:close_image_exit": thread.pendingIO.end_time = ts completionIO = CompletionIO(ts, thread, thread.pendingIO) thread.completedIO(completionIO) - ios.append(completionIO) + self.ios.append(completionIO) self.completed(completionIO) - if printOnRead: + del self.openImages[thread.pendingIO.imagectx] + if self.printOnRead: print str(thread.pendingIO) elif event.name == "librbd:read_extent": offset = event["offset"] @@ -383,38 +404,42 @@ class Processor(object): thread.pendingIO.end_time = ts completionIO = CompletionIO(ts, thread, thread.pendingIO) thread.completedIO(completionIO) - ios.append(completionIO) + self.ios.append(completionIO) completed(completionIO) - if printOnRead: + if self.printOnRead: print str(thread.pendingIO) elif event.name == "librbd:write_enter": name = event["name"] - readid = event["id"] + snap_name = event["snap_name"] + readonly = event["read_only"] offset = event["off"] length = event["buf_len"] imagectx = event["imagectx"] + self.requireImage(ts, thread, imagectx, name, snap_name, readonly) ionum = self.nextID() thread.pendingIO = WriteIO(ionum, ts, thread, thread.pendingIO, imagectx, [Extent(offset, length)]) - thread.pendingIO.addThreadCompletionDependencies(threads, self.recentCompletions) + thread.pendingIO.addThreadCompletionDependencies(self.threads, self.recentCompletions) thread.issuedIO(thread.pendingIO) - ios.append(thread.pendingIO) + self.ios.append(thread.pendingIO) elif event.name == "librbd:write_exit": thread.pendingIO.end_time = ts completionIO = CompletionIO(ts, thread, thread.pendingIO) thread.completedIO(completionIO) - ios.append(completionIO) + self.ios.append(completionIO) completed(completionIO) - if printOnRead: + if self.printOnRead: print str(thread.pendingIO) elif event.name == "librbd:aio_read_enter": name = event["name"] - readid = event["id"] + snap_name = event["snap_name"] + readonly = event["read_only"] completion = event["completion"] imagectx = event["imagectx"] + self.requireImage(ts, thread, imagectx, name, snap_name, readonly) ionum = self.nextID() thread.pendingIO = AioReadIO(ionum, ts, thread, thread.pendingIO, imagectx, []) - thread.pendingIO.addThreadCompletionDependencies(threads, self.recentCompletions) - ios.append(thread.pendingIO) + thread.pendingIO.addThreadCompletionDependencies(self.threads, self.recentCompletions) + self.ios.append(thread.pendingIO) thread.issuedIO(thread.pendingIO) pendingIOs[completion] = thread.pendingIO elif event.name == "librbd:aio_read_extent": @@ -422,22 +447,24 @@ class Processor(object): length = event["length"] thread.pendingIO.extents.append(Extent(offset, length)) elif event.name == "librbd:aio_read_exit": - if printOnRead: + if self.printOnRead: print str(thread.pendingIO) elif event.name == "librbd:aio_write_enter": name = event["name"] - writeid = event["id"] + snap_name = event["snap_name"] + readonly = event["read_only"] offset = event["off"] length = event["len"] completion = event["completion"] imagectx = event["imagectx"] + self.requireImage(ts, thread, imagectx, name, snap_name, readonly) ionum = self.nextID() thread.pendingIO = AioWriteIO(ionum, ts, thread, thread.pendingIO, imagectx, [Extent(offset, length)]) - thread.pendingIO.addThreadCompletionDependencies(threads, self.recentCompletions) + thread.pendingIO.addThreadCompletionDependencies(self.threads, self.recentCompletions) thread.issuedIO(thread.pendingIO) - ios.append(thread.pendingIO) + self.ios.append(thread.pendingIO) pendingIOs[completion] = thread.pendingIO - if printOnRead: + if self.printOnRead: print str(thread.pendingIO) elif event.name == "librbd:aio_complete_enter": completion = event["completion"] @@ -448,18 +475,18 @@ class Processor(object): completedIO.end_time = ts completionIO = CompletionIO(ts, thread, completedIO) completedIO.thread.completedIO(completionIO) - ios.append(completionIO) + self.ios.append(completionIO) self.completed(completionIO) - if printOnRead: + if self.printOnRead: print str(completionIO) # Insert-thread-stop phase - ios = sorted(ios, key = lambda io: io.start_time) - for threadID in threads: - thread = threads[threadID] + self.ios = sorted(self.ios, key = lambda io: io.start_time) + for threadID in self.threads: + thread = self.threads[threadID] ionum = None maxIONum = 0 # only valid if ionum is None - for io in ios: + for io in self.ios: if io.ionum > maxIONum: maxIONum = io.ionum if io.start_time > thread.lastTS: @@ -471,14 +498,14 @@ class Processor(object): if maxIONum % 2 == 1: maxIONum = maxIONum - 1 ionum = maxIONum + 2 - for io in ios: + for io in self.ios: if io.ionum >= ionum: io.ionum = io.ionum + 2 # TODO: Insert in the right place, don't append and re-sort - ios.append(StopThreadIO(ionum, thread.lastTS, thread)) - ios = sorted(ios, key = lambda io: io.start_time) + self.ios.append(StopThreadIO(ionum, thread.lastTS, thread)) + self.ios = sorted(self.ios, key = lambda io: io.start_time) - for io in ios: + for io in self.ios: if io.prev and io.prev in io.dependencies: io.dependencies.remove(io.prev) if io.isCompletion: @@ -486,11 +513,11 @@ class Processor(object): for dep in io.dependencies: dep.numSuccessors = dep.numSuccessors + 1 - if printOnRead and printOnWrite: + if self.printOnRead and printOnWrite: print with open(outputFileName, "wb") as f: - for io in ios: + for io in self.ios: if printOnWrite and not io.isCompletion: print str(io) io.writeTo(f) diff --git a/src/tracing/librbd.tp b/src/tracing/librbd.tp index 373f3eedd246c..46d89bc6a4f86 100644 --- a/src/tracing/librbd.tp +++ b/src/tracing/librbd.tp @@ -5,11 +5,13 @@ TRACEPOINT_EVENT(librbd, read_enter, TP_ARGS( void*, imagectx, const char*, name, - const char*, id), + const char*, snap_name, + char, read_only), TP_FIELDS( ctf_integer_hex(void*, imagectx, imagectx) ctf_string(name, name) - ctf_string(id, id) + ctf_string(snap_name, snap_name) + ctf_integer(char, read_only, read_only) ) ) @@ -35,14 +37,16 @@ TRACEPOINT_EVENT(librbd, write_enter, TP_ARGS( void*, imagectx, const char*, name, - const char*, id, + const char*, snap_name, + char, read_only, uint64_t, off, size_t, len, const char*, buf), TP_FIELDS( ctf_integer_hex(void*, imagectx, imagectx) ctf_string(name, name) - ctf_string(id, id) + ctf_string(snap_name, snap_name) + ctf_integer(char, read_only, read_only) ctf_integer(uint64_t, off, off) ceph_ctf_sequence(unsigned char, buf, buf, size_t, len) ) @@ -179,7 +183,8 @@ TRACEPOINT_EVENT(librbd, aio_write_enter, TP_ARGS( void*, imagectx, const char*, name, - const char*, id, + const char*, snap_name, + char, read_only, uint64_t, off, size_t, len, const char*, buf, @@ -187,7 +192,8 @@ TRACEPOINT_EVENT(librbd, aio_write_enter, TP_FIELDS( ctf_integer_hex(void*, imagectx, imagectx) ctf_string(name, name) - ctf_string(id, id) + ctf_string(snap_name, snap_name) + ctf_integer(char, read_only, read_only) ctf_integer(uint64_t, off, off) ctf_integer(size_t, len, len) ceph_ctf_sequence(unsigned char, buf, buf, size_t, len) @@ -207,13 +213,15 @@ TRACEPOINT_EVENT(librbd, aio_read_enter, TP_ARGS( void*, imagectx, const char*, name, - const char*, id, + const char*, snap_name, + char, read_only, const char*, buf, const void*, completion), TP_FIELDS( ctf_integer_hex(void*, imagectx, imagectx) ctf_string(name, name) - ctf_string(id, id) + ctf_string(snap_name, snap_name) + ctf_integer(char, read_only, read_only) ctf_integer_hex(const void*, completion, completion) ) ) @@ -240,14 +248,16 @@ TRACEPOINT_EVENT(librbd, aio_discard_enter, TP_ARGS( void*, imagectx, const char*, name, - const char*, id, + const char*, snap_name, + char, read_only, uint64_t, off, uint64_t, len, void*, completion), TP_FIELDS( ctf_integer_hex(void*, imagectx, imagectx) ctf_string(name, name) - ctf_string(id, id) + ctf_string(snap_name, snap_name) + ctf_integer(char, read_only, read_only) ctf_integer(uint64_t, off, off) ctf_integer(uint64_t, len, len) ctf_integer_hex(void*, completion, completion) @@ -266,11 +276,13 @@ TRACEPOINT_EVENT(librbd, invalidate_cache_enter, TP_ARGS( void*, imagectx, const char*, name, - const char*, id), + const char*, snap_name, + char, read_only), TP_FIELDS( ctf_integer_hex(void*, imagectx, imagectx) ctf_string(name, name) - ctf_string(id, id) + ctf_string(snap_name, snap_name) + ctf_integer(char, read_only, read_only) ) ) @@ -286,11 +298,13 @@ TRACEPOINT_EVENT(librbd, flush_enter, TP_ARGS( void*, imagectx, const char*, name, - const char*, id), + const char*, snap_name, + char, read_only), TP_FIELDS( ctf_integer_hex(void*, imagectx, imagectx) ctf_string(name, name) - ctf_string(id, id) + ctf_string(snap_name, snap_name) + ctf_integer(char, read_only, read_only) ) ) @@ -306,12 +320,14 @@ TRACEPOINT_EVENT(librbd, aio_flush_enter, TP_ARGS( void*, imagectx, const char*, name, - const char*, id, + const char*, snap_name, + char, read_only, const void*, completion), TP_FIELDS( ctf_integer_hex(void*, imagectx, imagectx) ctf_string(name, name) - ctf_string(id, id) + ctf_string(snap_name, snap_name) + ctf_integer(char, read_only, read_only) ctf_integer_hex(const void*, completion, completion) ) ) @@ -328,17 +344,21 @@ TRACEPOINT_EVENT(librbd, copy_enter, TP_ARGS( void*, src_imagectx, const char*, src_name, - const char*, src_id, + const char*, src_snap_name, + char, src_read_only, void*, dst_imagectx, const char*, dst_name, - const char*, dst_id), + const char*, dst_snap_name, + char, dst_read_only), TP_FIELDS( ctf_integer_hex(void*, src_imagectx, src_imagectx) ctf_string(src_name, src_name) - ctf_string(src_id, src_id) + ctf_string(src_snap_name, src_snap_name) + ctf_integer(char, src_read_only, src_read_only) ctf_integer_hex(void*, dst_imagectx, dst_imagectx) ctf_string(dst_name, dst_name) - ctf_string(dst_id, dst_id) + ctf_string(dst_snap_name, dst_snap_name) + ctf_integer(char, dst_read_only, dst_read_only) ) ) @@ -354,12 +374,14 @@ TRACEPOINT_EVENT(librbd, resize_enter, TP_ARGS( void*, imagectx, const char*, name, - const char*, id, + const char*, snap_name, + char, read_only, uint64_t, size), TP_FIELDS( ctf_integer_hex(void*, imagectx, imagectx) ctf_string(name, name) - ctf_string(id, id) + ctf_string(snap_name, snap_name) + ctf_integer(char, read_only, read_only) ctf_integer(uint64_t, size, size) ) ) @@ -398,13 +420,15 @@ TRACEPOINT_EVENT(librbd, discard_enter, TP_ARGS( void*, imagectx, const char*, name, - const char*, id, + const char*, snap_name, + char, read_only, uint64_t, off, uint64_t, len), TP_FIELDS( ctf_integer_hex(void*, imagectx, imagectx) ctf_string(name, name) - ctf_string(id, id) + ctf_string(snap_name, snap_name) + ctf_integer(char, read_only, read_only) ctf_integer(uint64_t, off, off) ctf_integer(uint64_t, len, len) ) -- 2.39.5