]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
librbd: copy-on-read for clones, write entire object into child asychronously
authorMin Chen <minchen@ubuntukylin.com>
Fri, 25 Jul 2014 09:29:52 +0000 (17:29 +0800)
committerJason Dillaman <dillaman@redhat.com>
Tue, 20 Jan 2015 15:11:39 +0000 (10:11 -0500)
Signed-off-by: Min Chen <minchen@ubuntukylin.com>
Signed-off-by: Li Wang <liwang@ubuntukylin.com>
Signed-off-by: Yunchuan Wen <yunchuanwen@ubuntukylin.com>
src/include/xlist.h
src/librbd/AioRequest.cc
src/librbd/AioRequest.h
src/librbd/ImageCtx.cc
src/librbd/ImageCtx.h
src/librbd/internal.cc

index 53aa3ab06fc5195dbd76c0d5636619f5f81b6ef1..7fdbe355959253e652f696f72a9d552c7a64fc9d 100644 (file)
@@ -160,6 +160,7 @@ public:
       return *this;
     }
     bool end() const { return cur == 0; }
+    item *get_cur() const { return cur; }
   };
 
   iterator begin() { return iterator(_front); }
index 63902ffec61924da96a66b9d9994dcadc0d276c4..9591eb34d083d38710ba8821c72552f007b5a1cf 100644 (file)
@@ -71,6 +71,38 @@ namespace librbd {
 
   /** read **/
 
+  //copy-on-read: after read entire object, just write it into child
+  ssize_t AioRead::write_cor()
+  {
+    ldout(m_ictx->cct, 20) << "write_cor" << dendl;
+    int ret = 0;
+
+    m_ictx->snap_lock.get_read();
+    ::SnapContext snapc = m_ictx->snapc;
+    m_ictx->snap_lock.put_read();
+
+    librados::ObjectWriteOperation copyup_cor;
+    copyup_cor.exec("rbd", "copyup", m_entire_object);
+
+    std::vector<librados::snap_t> m_snaps;
+    for (std::vector<snapid_t>::const_iterator it = snapc.snaps.begin();
+                it != snapc.snaps.end(); ++it) {
+      m_snaps.push_back(it->val);
+    }
+
+    librados::AioCompletion *cor_completion =
+        librados::Rados::aio_create_completion(m_ictx, librbd::cor_completion_callback, NULL);
+
+    xlist<librados::AioCompletion *>::item *comp =
+       new xlist<librados::AioCompletion *>::item(cor_completion);
+
+    m_ictx->add_cor_completion(comp);//add cor_completion to xlist
+    //asynchronously write object
+    ret = m_ictx->md_ctx.aio_operate(m_oid, cor_completion, &copyup_cor, snapc.seq.val, m_snaps);
+
+    return ret;
+  }
+
   bool AioRead::should_complete(int r)
   {
     ldout(m_ictx->cct, 20) << "should_complete " << this << " " << m_oid << " " << m_object_off << "~" << m_object_len
@@ -128,6 +160,7 @@ namespace librbd {
        m_ictx->prune_parent_extents(image_extents, image_overlap);
        // copy the read range to m_read_data
        m_read_data.substr_of(m_entire_object, m_object_off, m_object_len);
+       write_cor();
       }
     }
 
index 25aa2bd9eb8195e3e4a0706a72c8f0f55a6fabb1..8fbae736e5435665c2dd2f070e854c3b54e1cff8 100644 (file)
@@ -75,6 +75,7 @@ namespace librbd {
        m_sparse(sparse), m_op_flags(op_flags) {
     }
     virtual ~AioRead() {}
+    ssize_t write_cor();
     virtual bool should_complete(int r);
     virtual int send();
 
index da3e22ec4a70012683503fbcd8c0ee39014a9690..622e8c420ba3e14d77d23b854179c001a1780b35 100644 (file)
@@ -47,6 +47,7 @@ namespace librbd {
       parent_lock("librbd::ImageCtx::parent_lock"),
       refresh_lock("librbd::ImageCtx::refresh_lock"),
       aio_lock("librbd::ImageCtx::aio_lock"),
+      cor_lock("librbd::ImageCtx::cor_lock"),
       extra_read_flags(0),
       old_format(true),
       order(0), size(0), features(0),
@@ -101,6 +102,7 @@ namespace librbd {
       object_set->return_enoent = true;
       object_cacher->start();
     }
+    cor_completions = new xlist<librados::AioCompletion*>();
   }
 
   ImageCtx::~ImageCtx() {
@@ -117,6 +119,10 @@ namespace librbd {
       delete object_set;
       object_set = NULL;
     }
+    if (cor_completions) {
+      delete cor_completions;
+      cor_completions = NULL;
+    }
     delete[] format_string;
   }
 
@@ -663,4 +669,67 @@ namespace librbd {
       pending_aio_cond.Wait(aio_lock);
     }
   }
+ }
+
+  void ImageCtx::add_cor_completion(xlist<librados::AioCompletion*>::item *comp)
+  {
+    if(!comp)
+      return;
+
+    cor_lock.Lock();
+    cor_completions->push_back(comp);
+    cor_lock.Unlock();
+
+    ldout(cct, 10) << "add_cor_completion:: size = "<< cor_completions->size() << dendl;
+  }
+
+  void ImageCtx::wait_last_completions()
+  {
+    ldout(cct, 10) << "wait_last_completions:: cor_completions = " << cor_completions  << " size = " << cor_completions->size()  << dendl;
+    xlist<librados::AioCompletion*>::iterator itr;
+    xlist<librados::AioCompletion*>::item *ptr;
+
+    while (!cor_completions->empty()){
+      cor_lock.Lock();
+      librados::AioCompletion *comp = cor_completions->front();
+      comp->wait_for_complete();
+      itr = cor_completions->begin();
+      ptr = itr.get_cur();
+      cor_completions->pop_front();
+      delete ptr;
+      ptr = NULL;
+      cor_lock.Unlock();
+    }
+    ldout(cct, 10) << "wait_last_completions:: after clear cor_completions = " << cor_completions  << " size = " << cor_completions->size() << dendl;
+  }
+
+  void cor_completion_callback(librados::completion_t aio_completion_impl, void *arg)
+  {
+    librbd::ImageCtx * ictx = (librbd::ImageCtx *)arg;
+
+    ictx->cor_lock.Lock();
+    xlist<librados::AioCompletion*> *completions = ictx->cor_completions; 
+    ictx->cor_lock.Unlock();
+
+    ldout(ictx->cct, 10) << "cor_completion_callback:: cor_completions = " << completions << " size = "<< completions->size() << dendl;
+    if (!completions) 
+      return;
+
+    //find current AioCompletion item in xlist, and remove it
+    for (xlist<librados::AioCompletion*>::iterator itr = completions->begin(); !(itr.end()); ++itr) {
+       if (aio_completion_impl == (*itr)->pc){
+         xlist<librados::AioCompletion*>::item *ptr = itr.get_cur();
+
+         ictx->cor_lock.Lock();
+         completions->remove(ptr);
+         ictx->cor_lock.Unlock();
+
+         delete ptr;//delete xlist<librados::AioCompletion*>::item *
+         ptr = NULL;
+         break;
+       }
+    }
+    ldout(ictx->cct, 10) << "cor_completion_callback:: after remove item, size = " << completions->size() << dendl;
+  }
+
 }
index 8412223154d40221b2219809b509a0533ea1b0d0..33ae12d2d9de209159f1393fc71b645addd15b65 100644 (file)
@@ -74,6 +74,7 @@ namespace librbd {
     RWLock parent_lock; // protects parent_md and parent
     Mutex refresh_lock; // protects refresh_seq and last_refresh
     Mutex aio_lock; // protects pending_aio and pending_aio_cond
+    Mutex cor_lock; //protects cor_completions for copy-on-read
 
     unsigned extra_read_flags;
 
@@ -100,6 +101,7 @@ namespace librbd {
 
     Cond pending_aio_cond;
     uint64_t pending_aio;
+    xlist<librados::AioCompletion*> *cor_completions; //copy-on-read AioCompletions
 
     /**
      * Either image_name or image_id must be set.
@@ -165,7 +167,11 @@ namespace librbd {
     uint64_t prune_parent_extents(vector<pair<uint64_t,uint64_t> >& objectx,
                                  uint64_t overlap);
     void wait_for_pending_aio();
+
+    void add_cor_completion(xlist<librados::AioCompletion*>::item *comp);
+    void wait_last_completions();//wait for uncompleted asynchronous write which is still in xlist
   };
+  void cor_completion_callback(librados::completion_t aio_completion_impl, void *arg);
 }
 
 #endif
index 119ab4e2613a67066ac73c465c232f6703b5b964..7a755b5aa23f2cc9c5678267ed46e5df56bb389d 100644 (file)
@@ -2278,7 +2278,10 @@ reprotect_and_return_err:
     if (ictx->image_watcher != NULL) {
       ictx->image_watcher->flush_aio_operations();
     }
-    if (ictx->object_cacher) {
+    if (ictx->cor_completions)
+      ictx->wait_last_completions();//copy-on-read: wait for unfinished AioCompletion requests
+
+    if (ictx->object_cacher)
       ictx->shutdown_cache(); // implicitly flushes
     } else {
       flush(ictx);