From cfe8fa221b6b0c7aaa457673b2ebf720720d2a1d Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Thu, 3 Sep 2015 00:17:17 -0400 Subject: [PATCH] rbd: export diff needs should write chunks in-order Use new OrderedThrottle class to ensure multiple AIO reads are written to the export diff in-order. Fixes: #12911 Signed-off-by: Jason Dillaman --- src/rbd.cc | 243 +++++++++++++++++++++++++---------------------------- 1 file changed, 116 insertions(+), 127 deletions(-) diff --git a/src/rbd.cc b/src/rbd.cc index 062034ccf09c..9257c1f44251 100755 --- a/src/rbd.cc +++ b/src/rbd.cc @@ -77,6 +77,19 @@ map map_options; // -o / --options map #define dout_subsys ceph_subsys_rbd +namespace { + +void aio_context_callback(librbd::completion_t completion, void *arg) +{ + librbd::RBD::AioCompletion *aio_completion = + reinterpret_cast(completion); + Context *context = reinterpret_cast(arg); + context->complete(aio_completion->get_return_value()); + aio_completion->release(); +} + +} // anonymous namespace + static std::map feature_mapping = boost::assign::map_list_of( RBD_FEATURE_LAYERING, "layering")( @@ -1104,53 +1117,33 @@ static int do_bench_write(librbd::Image& image, uint64_t io_size, return 0; } -struct ExportContext { - librbd::Image *image; - int fd; - uint64_t totalsize; - MyProgressContext pc; - - SimpleThrottle throttle; - Mutex lock; - - ExportContext(librbd::Image *i, int f, uint64_t t, int max_ops) : - image(i), - fd(f), - totalsize(t), - pc("Exporting image"), - throttle(max_ops, true), - lock("ExportContext::lock") - {} -}; - -class AioExportContext : public Context +class C_Export : public Context { public: - AioExportContext(SimpleThrottle &simple_throttle, librbd::Image &image, + C_Export(SimpleThrottle &simple_throttle, librbd::Image &image, uint64_t offset, uint64_t length, int fd) : m_aio_completion( - new librbd::RBD::AioCompletion(this, &AioExportContext::aio_callback)), - m_throttle(simple_throttle), - m_offset(offset), - m_fd(fd) + new librbd::RBD::AioCompletion(this, &aio_context_callback)), + m_throttle(simple_throttle), m_image(image), m_offset(offset), + m_length(length), m_fd(fd) + { + } + + void send() { m_throttle.start_op(); int op_flags = LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL | LIBRADOS_OP_FLAG_FADVISE_NOCACHE; - int r = image.aio_read2(offset, length, m_bufferlist, m_aio_completion, - op_flags); + int r = m_image.aio_read2(m_offset, m_length, m_bufferlist, + m_aio_completion, op_flags); if (r < 0) { cerr << "rbd: error requesting read from source image" << std::endl; + m_aio_completion->release(); m_throttle.end_op(r); } } - virtual ~AioExportContext() - { - m_aio_completion->release(); - } - virtual void finish(int r) { BOOST_SCOPE_EXIT((&m_throttle) (&r)) @@ -1186,19 +1179,13 @@ public: } } - static void aio_callback(librbd::completion_t completion, void *arg) - { - librbd::RBD::AioCompletion *aio_completion = - reinterpret_cast(completion); - AioExportContext *export_context = reinterpret_cast(arg); - export_context->complete(aio_completion->get_return_value()); - } - private: librbd::RBD::AioCompletion *m_aio_completion; SimpleThrottle &m_throttle; + librbd::Image &m_image; bufferlist m_bufferlist; uint64_t m_offset; + uint64_t m_length; int m_fd; }; @@ -1229,8 +1216,14 @@ static int do_export(librbd::Image& image, const char *path) SimpleThrottle throttle(max_concurrent_ops, false); uint64_t period = image.get_stripe_count() * (1ull << info.order); for (uint64_t offset = 0; offset < info.size; offset += period) { + if (throttle.pending_error()) { + break; + } + uint64_t length = min(period, info.size - offset); - new AioExportContext(throttle, image, offset, length, fd); + C_Export *ctx = new C_Export(throttle, image, offset, length, fd); + ctx->send(); + pc.update_progress(offset, info.size); } @@ -1250,92 +1243,90 @@ static int do_export(librbd::Image& image, const char *path) return r; } -class C_ExportDiff { +struct ExportDiffContext { + librbd::Image *image; + int fd; + uint64_t totalsize; + MyProgressContext pc; + OrderedThrottle throttle; + + ExportDiffContext(librbd::Image *i, int f, uint64_t t, int max_ops) : + image(i), fd(f), totalsize(t), pc("Exporting image"), + throttle(max_ops, true) { + } +}; + +class C_ExportDiff : public Context { public: - C_ExportDiff(ExportContext *ec, uint64_t offset, uint64_t length) - : m_export_context(ec), m_offset(offset), m_length(length) - { + C_ExportDiff(ExportDiffContext *edc, uint64_t offset, uint64_t length, + bool exists) + : m_export_diff_context(edc), m_offset(offset), m_length(length), + m_exists(exists) { } int send() { - if (m_export_context->throttle.pending_error()) { - return m_export_context->throttle.wait_for_ret(); + if (m_export_diff_context->throttle.pending_error()) { + return m_export_diff_context->throttle.wait_for_ret(); } - m_export_context->throttle.start_op(); - librbd::RBD::AioCompletion *aio_completion = - new librbd::RBD::AioCompletion(this, &C_ExportDiff::aio_callback); - int op_flags = LIBRADOS_OP_FLAG_FADVISE_NOCACHE; - int r = m_export_context->image->aio_read2(m_offset, m_length, m_read_data, - aio_completion, op_flags); - if (r < 0) { - aio_completion->release(); - complete(r); + C_OrderedThrottle *ctx = m_export_diff_context->throttle.start_op(this); + if (m_exists) { + librbd::RBD::AioCompletion *aio_completion = + new librbd::RBD::AioCompletion(ctx, &aio_context_callback); + + int op_flags = LIBRADOS_OP_FLAG_FADVISE_NOCACHE; + int r = m_export_diff_context->image->aio_read2( + m_offset, m_length, m_read_data, aio_completion, op_flags); + if (r < 0) { + aio_completion->release(); + ctx->complete(r); + } + } else { + ctx->complete(0); } - return r; + return 0; } static int export_diff_cb(uint64_t offset, size_t length, int exists, void *arg) { - ExportContext *ec = reinterpret_cast(arg); + ExportDiffContext *edc = reinterpret_cast(arg); - int r; - { - if (exists) { - C_ExportDiff *context = new C_ExportDiff(ec, offset, length); - r = context->send(); - } else { - Mutex::Locker lock(ec->lock); - r = write_extent(ec, offset, length, false); + C_ExportDiff *context = new C_ExportDiff(edc, offset, length, exists); + return context->send(); + } + +protected: + virtual void finish(int r) { + if (r >= 0) { + if (m_exists) { + m_exists = !m_read_data.is_zero(); + } + r = write_extent(m_export_diff_context, m_offset, m_length, m_exists); + if (r == 0 && m_exists) { + r = m_read_data.write_fd(m_export_diff_context->fd); } } - ec->pc.update_progress(offset, ec->totalsize); - return r; + m_export_diff_context->throttle.end_op(r); } private: - ExportContext *m_export_context; + ExportDiffContext *m_export_diff_context; uint64_t m_offset; uint64_t m_length; + bool m_exists; bufferlist m_read_data; - void complete(int r) { - { - Mutex::Locker locker(m_export_context->lock); - if (r >= 0) { - r = write_extent(m_export_context, m_offset, m_length, - !m_read_data.is_zero()); - if (r == 0) { - // block - r = m_read_data.write_fd(m_export_context->fd); - } - } - } - m_export_context->throttle.end_op(r); - delete this; - } - - static void aio_callback(librbd::completion_t completion, void *arg) - { - librbd::RBD::AioCompletion *aio_completion = - reinterpret_cast(completion); - C_ExportDiff *context = reinterpret_cast(arg); - - context->complete(aio_completion->get_return_value()); - aio_completion->release(); - } - - static int write_extent(ExportContext *ec, uint64_t offset, uint64_t length, - bool exists) { - assert(ec->lock.is_locked()); - + static int write_extent(ExportDiffContext *edc, uint64_t offset, + uint64_t length, bool exists) { // extent bufferlist bl; __u8 tag = exists ? 'w' : 'z'; ::encode(tag, bl); ::encode(offset, bl); ::encode(length, bl); - int r = bl.write_fd(ec->fd); + int r = bl.write_fd(edc->fd); + + edc->pc.update_progress(offset, edc->totalsize); return r; } }; @@ -1397,15 +1388,15 @@ static int do_export_diff(librbd::Image& image, const char *fromsnapname, } } - ExportContext ec(&image, fd, info.size, - g_conf->rbd_concurrent_management_ops); + ExportDiffContext edc(&image, fd, info.size, + g_conf->rbd_concurrent_management_ops); r = image.diff_iterate2(fromsnapname, 0, info.size, true, whole_object, - &C_ExportDiff::export_diff_cb, (void *)&ec); + &C_ExportDiff::export_diff_cb, (void *)&edc); if (r < 0) { goto out; } - r = ec.throttle.wait_for_ret(); + r = edc.throttle.wait_for_ret(); if (r < 0) { goto out; } @@ -1419,9 +1410,9 @@ static int do_export_diff(librbd::Image& image, const char *fromsnapname, out: if (r < 0) - ec.pc.fail(); + edc.pc.fail(); else - ec.pc.finish(); + edc.pc.finish(); return r; } @@ -1539,33 +1530,33 @@ done_img: update_snap_name(*new_img, snap); } -class AioImportContext : public Context +class C_Import : public Context { public: - AioImportContext(SimpleThrottle &simple_throttle, librbd::Image &image, - bufferlist &bl, uint64_t offset) - : m_throttle(simple_throttle), + C_Import(SimpleThrottle &simple_throttle, librbd::Image &image, + bufferlist &bl, uint64_t offset) + : m_throttle(simple_throttle), m_image(image), m_aio_completion( - new librbd::RBD::AioCompletion(this, &AioImportContext::aio_callback)), + new librbd::RBD::AioCompletion(this, &aio_context_callback)), m_bufferlist(bl), m_offset(offset) + { + } + + void send() { m_throttle.start_op(); int op_flags = LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL | LIBRADOS_OP_FLAG_FADVISE_NOCACHE; - int r = image.aio_write2(m_offset, m_bufferlist.length(), m_bufferlist, - m_aio_completion, op_flags); + int r = m_image.aio_write2(m_offset, m_bufferlist.length(), m_bufferlist, + m_aio_completion, op_flags); if (r < 0) { cerr << "rbd: error requesting write to destination image" << std::endl; + m_aio_completion->release(); m_throttle.end_op(r); } } - virtual ~AioImportContext() - { - m_aio_completion->release(); - } - virtual void finish(int r) { if (r < 0) { @@ -1575,16 +1566,9 @@ public: m_throttle.end_op(r); } - static void aio_callback(librbd::completion_t completion, void *arg) - { - librbd::RBD::AioCompletion *aio_completion = - reinterpret_cast(completion); - AioImportContext *import_context = reinterpret_cast(arg); - import_context->complete(aio_completion->get_return_value()); - } - private: SimpleThrottle &m_throttle; + librbd::Image &m_image; librbd::RBD::AioCompletion *m_aio_completion; bufferlist m_bufferlist; uint64_t m_offset; @@ -1669,6 +1653,10 @@ static int do_import(librbd::RBD &rbd, librados::IoCtx& io_ctx, // loop body handles 0 return, as we may have a block to flush while ((readlen = ::read(fd, p + blklen, reqlen)) >= 0) { + if (throttle->pending_error()) { + break; + } + blklen += readlen; // if read was short, try again to fill the block before writing if (readlen && ((size_t)readlen < reqlen)) { @@ -1693,7 +1681,8 @@ static int do_import(librbd::RBD &rbd, librados::IoCtx& io_ctx, // write as much as we got; perhaps less than imgblklen // but skip writing zeros to create sparse images if (!bl.is_zero()) { - new AioImportContext(*throttle, image, bl, image_pos); + C_Import *ctx = new C_Import(*throttle, image, bl, image_pos); + ctx->send(); } // done with whole block, whether written or not -- 2.47.3