]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
librbd: handle parent change while async I/Os are in flight
authorDan Mick <dan.mick@inktank.com>
Sat, 1 Dec 2012 02:11:09 +0000 (18:11 -0800)
committerDan Mick <dan.mick@inktank.com>
Thu, 6 Dec 2012 01:05:18 +0000 (17:05 -0800)
During a test_librbd_fsx run including flatten, ImageCtx->parent
was being dereferenced while null.  Between the time the parent
overlap is calculated and the time the guard+write completes
with ENOENT and submits the copyup+write, the parent image
could have changed (by resize) or been made irrelevant (by
child flatten) such that the parent overlap is now incorrect.

Handle "no parent" by just sending the copyup+write; the copyup
part will be a no-op.  Move to WRITE_FLAT state in this case
because there's no more child to deal with.

Handle "overlap changed" by recalculating overlap before
reading parent data; if none is left, don't read, but rather
just clear m_object_image_extents, in which case the copyup
will again be a no-op because it will be of zero length.
However we still have a parent, so stay in WRITE_COPYUP state
and come back through as usual.

Signed-off-by: Dan Mick <dan.mick@inktank.com>
Fixes: #3524
src/librbd/AioRequest.cc

index 63424e397fc8892507e17ae01ba08414df522a04..b9a76e48e0a9a47cdd73430bbf25eb755831fec7 100644 (file)
@@ -157,15 +157,47 @@ namespace librbd {
       ldout(m_ictx->cct, 20) << "WRITE_CHECK_GUARD" << dendl;
 
       if (r == -ENOENT) {
+
        Mutex::Locker l(m_ictx->snap_lock);
        Mutex::Locker l2(m_ictx->parent_lock);
 
-       // copyup the entire object up to the overlap point
-       ldout(m_ictx->cct, 20) << "reading from parent " << m_object_image_extents << dendl;
-       assert(m_object_image_extents.size());
-
-       m_state = LIBRBD_AIO_WRITE_COPYUP;
-       read_from_parent(m_object_image_extents);
+       /*
+        * Parent may have disappeared; if so, recover by using
+        * send_copyup() to send the original write req (the copyup
+        * operation itself will be a no-op, since someone must have
+        * populated the child object while we weren't looking).
+        * Move to WRITE_FLAT state as we'll be done with the
+        * operation once the null copyup completes.
+        */
+
+       if (m_ictx->parent == NULL) {
+         ldout(m_ictx->cct, 20) << "parent is gone; do null copyup " << dendl;
+         m_state = LIBRBD_AIO_WRITE_FLAT;
+         send_copyup();
+         finished = false;
+         break;
+       }
+
+       // If parent still exists, overlap might also have changed.
+       uint64_t newlen = m_ictx->prune_parent_extents(
+         m_object_image_extents, m_ictx->parent_md.overlap);
+
+       // copyup the entire object up to the overlap point, if any
+       if (newlen != 0) {
+         ldout(m_ictx->cct, 20) << "should_complete(" << this << ") overlap "
+                                << m_ictx->parent_md.overlap << " newlen "
+                                << newlen << " image_extents"
+                                << m_object_image_extents << dendl;
+
+         m_state = LIBRBD_AIO_WRITE_COPYUP;
+         read_from_parent(m_object_image_extents);
+       } else {
+         ldout(m_ictx->cct, 20) << "should_complete(" << this
+                                << "): parent overlap now 0" << dendl;
+         m_object_image_extents.clear();
+         m_state = LIBRBD_AIO_WRITE_FLAT;
+         send_copyup();
+       }
        finished = false;
        break;
       }