From ac8d61a41934dd6d8200d0a827be270a3f3ad69a Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Sun, 25 Aug 2024 13:10:58 +0200 Subject: [PATCH] librbd/migration/NBDStream: be more thorough around nbd_block_status() nbd_block_status() is specified to be really loose: it can return more status (go beyond the requested range), less status (cover only some part of the requested range) or nothing at all. The former would likely violate some assumptions in ObjectCopyRequest, while the latter would lead to straight data corruption -- parent blocks that weren't covered would never get copied up into the destination image. On top of that, the current implementation is very suboptimal. Because only "nr_entries == 2" responses are effectively processed, any extent which has a small amount of data and is otherwise zeroed (e.g. 8K of data in a 4M object) gets listed as DATA. A copyup for such an object would write 4M worth of data instead of 8K, consuming space in the destination image unnecessarily. Also, it's unclear whether zero-sized entries can be returned. Currently, if it happens we would hit an assert in interval_map. This fixes all of these issues. Signed-off-by: Ilya Dryomov (cherry picked from commit d52bd80f1f9daed51e1cdac142b07fcb9dec8615) --- src/librbd/migration/NBDStream.cc | 43 ++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/src/librbd/migration/NBDStream.cc b/src/librbd/migration/NBDStream.cc index 8a675a9ac31ee..a1e8bcf57d24f 100644 --- a/src/librbd/migration/NBDStream.cc +++ b/src/librbd/migration/NBDStream.cc @@ -21,17 +21,20 @@ int extent_cb(void* data, const char* metacontext, uint64_t offset, uint32_t* entries, size_t nr_entries, int* error) { auto sparse_extents = reinterpret_cast(data); - uint64_t length = 0; - for (size_t i=0; i 0 && state & (LIBNBD_STATE_HOLE | LIBNBD_STATE_ZERO)) { + sparse_extents->insert(offset, length, + {io::SPARSE_EXTENT_STATE_ZEROED, length}); + } + offset += length; } } - sparse_extents->insert(offset, length, {state, length}); + return 1; } @@ -139,17 +142,33 @@ struct NBDStream::ListSparseExtentsRequest { ldout(cct, 20) << "byte_offset=" << byte_offset << " byte_length=" << byte_length << dendl; + // nbd_block_status() is specified to be really loose: + // "The count parameter is a hint: the server may choose to + // return less status, or the final block may extend beyond the + // requested range. [...] It is possible for the extent function + // to be called more times than you expect [...] It is also + // possible that the extent function is not called at all, even + // for metadata contexts that you requested." + io::SparseExtents tmp_sparse_extents; + tmp_sparse_extents.insert(byte_offset, byte_length, + {io::SPARSE_EXTENT_STATE_DATA, byte_length}); + int rc = nbd_block_status(nbd_stream->m_nbd, byte_length, byte_offset, - {extent_cb, sparse_extents}, 0); + {extent_cb, &tmp_sparse_extents}, 0); if (rc == -1) { rc = nbd_get_errno(); lderr(cct) << "block_status " << byte_offset << "~" << byte_length << ": " << nbd_get_error() << " (errno = " << rc << ")" << dendl; - finish(rc); - return; + // don't propagate errors -- we are set up to list any missing + // parts of the range as DATA if nbd_block_status() returns less + // status or none at all } + // trim the result in case more status was returned + sparse_extents->insert(tmp_sparse_extents.intersect(byte_offset, + byte_length)); + boost::asio::post(nbd_stream->m_strand, [this] { list_sparse_extents(); }); } -- 2.39.5