From: Min Chen Date: Fri, 25 Jul 2014 09:29:52 +0000 (+0800) Subject: librbd: copy-on-read for clones, write entire object into child asychronously X-Git-Tag: v0.93~197^2~7 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=f408c8e4a48bbd76f6a344e70c301292e09af11f;p=ceph.git librbd: copy-on-read for clones, write entire object into child asychronously Signed-off-by: Min Chen Signed-off-by: Li Wang Signed-off-by: Yunchuan Wen --- diff --git a/src/include/xlist.h b/src/include/xlist.h index 53aa3ab06fc5..7fdbe3559592 100644 --- a/src/include/xlist.h +++ b/src/include/xlist.h @@ -160,6 +160,7 @@ public: return *this; } bool end() const { return cur == 0; } + item *get_cur() const { return cur; } }; iterator begin() { return iterator(_front); } diff --git a/src/librbd/AioRequest.cc b/src/librbd/AioRequest.cc index 63902ffec619..9591eb34d083 100644 --- a/src/librbd/AioRequest.cc +++ b/src/librbd/AioRequest.cc @@ -71,6 +71,38 @@ namespace librbd { /** read **/ + //copy-on-read: after read entire object, just write it into child + ssize_t AioRead::write_cor() + { + ldout(m_ictx->cct, 20) << "write_cor" << dendl; + int ret = 0; + + m_ictx->snap_lock.get_read(); + ::SnapContext snapc = m_ictx->snapc; + m_ictx->snap_lock.put_read(); + + librados::ObjectWriteOperation copyup_cor; + copyup_cor.exec("rbd", "copyup", m_entire_object); + + std::vector m_snaps; + for (std::vector::const_iterator it = snapc.snaps.begin(); + it != snapc.snaps.end(); ++it) { + m_snaps.push_back(it->val); + } + + librados::AioCompletion *cor_completion = + librados::Rados::aio_create_completion(m_ictx, librbd::cor_completion_callback, NULL); + + xlist::item *comp = + new xlist::item(cor_completion); + + m_ictx->add_cor_completion(comp);//add cor_completion to xlist + //asynchronously write object + ret = m_ictx->md_ctx.aio_operate(m_oid, cor_completion, ©up_cor, snapc.seq.val, m_snaps); + + return ret; + } + bool AioRead::should_complete(int r) { ldout(m_ictx->cct, 20) << "should_complete " << this << " " << m_oid << " " << m_object_off << "~" << m_object_len @@ -128,6 +160,7 @@ namespace librbd { m_ictx->prune_parent_extents(image_extents, image_overlap); // copy the read range to m_read_data m_read_data.substr_of(m_entire_object, m_object_off, m_object_len); + write_cor(); } } diff --git a/src/librbd/AioRequest.h b/src/librbd/AioRequest.h index 25aa2bd9eb81..8fbae736e543 100644 --- a/src/librbd/AioRequest.h +++ b/src/librbd/AioRequest.h @@ -75,6 +75,7 @@ namespace librbd { m_sparse(sparse), m_op_flags(op_flags) { } virtual ~AioRead() {} + ssize_t write_cor(); virtual bool should_complete(int r); virtual int send(); diff --git a/src/librbd/ImageCtx.cc b/src/librbd/ImageCtx.cc index da3e22ec4a70..622e8c420ba3 100644 --- a/src/librbd/ImageCtx.cc +++ b/src/librbd/ImageCtx.cc @@ -47,6 +47,7 @@ namespace librbd { parent_lock("librbd::ImageCtx::parent_lock"), refresh_lock("librbd::ImageCtx::refresh_lock"), aio_lock("librbd::ImageCtx::aio_lock"), + cor_lock("librbd::ImageCtx::cor_lock"), extra_read_flags(0), old_format(true), order(0), size(0), features(0), @@ -101,6 +102,7 @@ namespace librbd { object_set->return_enoent = true; object_cacher->start(); } + cor_completions = new xlist(); } ImageCtx::~ImageCtx() { @@ -117,6 +119,10 @@ namespace librbd { delete object_set; object_set = NULL; } + if (cor_completions) { + delete cor_completions; + cor_completions = NULL; + } delete[] format_string; } @@ -663,4 +669,67 @@ namespace librbd { pending_aio_cond.Wait(aio_lock); } } + } + + void ImageCtx::add_cor_completion(xlist::item *comp) + { + if(!comp) + return; + + cor_lock.Lock(); + cor_completions->push_back(comp); + cor_lock.Unlock(); + + ldout(cct, 10) << "add_cor_completion:: size = "<< cor_completions->size() << dendl; + } + + void ImageCtx::wait_last_completions() + { + ldout(cct, 10) << "wait_last_completions:: cor_completions = " << cor_completions << " size = " << cor_completions->size() << dendl; + xlist::iterator itr; + xlist::item *ptr; + + while (!cor_completions->empty()){ + cor_lock.Lock(); + librados::AioCompletion *comp = cor_completions->front(); + comp->wait_for_complete(); + itr = cor_completions->begin(); + ptr = itr.get_cur(); + cor_completions->pop_front(); + delete ptr; + ptr = NULL; + cor_lock.Unlock(); + } + ldout(cct, 10) << "wait_last_completions:: after clear cor_completions = " << cor_completions << " size = " << cor_completions->size() << dendl; + } + + void cor_completion_callback(librados::completion_t aio_completion_impl, void *arg) + { + librbd::ImageCtx * ictx = (librbd::ImageCtx *)arg; + + ictx->cor_lock.Lock(); + xlist *completions = ictx->cor_completions; + ictx->cor_lock.Unlock(); + + ldout(ictx->cct, 10) << "cor_completion_callback:: cor_completions = " << completions << " size = "<< completions->size() << dendl; + if (!completions) + return; + + //find current AioCompletion item in xlist, and remove it + for (xlist::iterator itr = completions->begin(); !(itr.end()); ++itr) { + if (aio_completion_impl == (*itr)->pc){ + xlist::item *ptr = itr.get_cur(); + + ictx->cor_lock.Lock(); + completions->remove(ptr); + ictx->cor_lock.Unlock(); + + delete ptr;//delete xlist::item * + ptr = NULL; + break; + } + } + ldout(ictx->cct, 10) << "cor_completion_callback:: after remove item, size = " << completions->size() << dendl; + } + } diff --git a/src/librbd/ImageCtx.h b/src/librbd/ImageCtx.h index 8412223154d4..33ae12d2d9de 100644 --- a/src/librbd/ImageCtx.h +++ b/src/librbd/ImageCtx.h @@ -74,6 +74,7 @@ namespace librbd { RWLock parent_lock; // protects parent_md and parent Mutex refresh_lock; // protects refresh_seq and last_refresh Mutex aio_lock; // protects pending_aio and pending_aio_cond + Mutex cor_lock; //protects cor_completions for copy-on-read unsigned extra_read_flags; @@ -100,6 +101,7 @@ namespace librbd { Cond pending_aio_cond; uint64_t pending_aio; + xlist *cor_completions; //copy-on-read AioCompletions /** * Either image_name or image_id must be set. @@ -165,7 +167,11 @@ namespace librbd { uint64_t prune_parent_extents(vector >& objectx, uint64_t overlap); void wait_for_pending_aio(); + + void add_cor_completion(xlist::item *comp); + void wait_last_completions();//wait for uncompleted asynchronous write which is still in xlist }; + void cor_completion_callback(librados::completion_t aio_completion_impl, void *arg); } #endif diff --git a/src/librbd/internal.cc b/src/librbd/internal.cc index 119ab4e2613a..7a755b5aa23f 100644 --- a/src/librbd/internal.cc +++ b/src/librbd/internal.cc @@ -2278,7 +2278,10 @@ reprotect_and_return_err: if (ictx->image_watcher != NULL) { ictx->image_watcher->flush_aio_operations(); } - if (ictx->object_cacher) { + if (ictx->cor_completions) + ictx->wait_last_completions();//copy-on-read: wait for unfinished AioCompletion requests + + if (ictx->object_cacher) ictx->shutdown_cache(); // implicitly flushes } else { flush(ictx);