From: Ilya Dryomov Date: Sun, 25 Aug 2024 11:10:58 +0000 (+0200) Subject: librbd/migration/NBDStream: be more thorough around nbd_block_status() X-Git-Tag: testing/wip-pdonnell-testing-20240916.200549-debug~69^2~6 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=d52bd80f1f9daed51e1cdac142b07fcb9dec8615;p=ceph-ci.git librbd/migration/NBDStream: be more thorough around nbd_block_status() nbd_block_status() is specified to be really loose: it can return more status (go beyond the requested range), less status (cover only some part of the requested range) or nothing at all. The former would likely violate some assumptions in ObjectCopyRequest, while the latter would lead to straight data corruption -- parent blocks that weren't covered would never get copied up into the destination image. On top of that, the current implementation is very suboptimal. Because only "nr_entries == 2" responses are effectively processed, any extent which has a small amount of data and is otherwise zeroed (e.g. 8K of data in a 4M object) gets listed as DATA. A copyup for such an object would write 4M worth of data instead of 8K, consuming space in the destination image unnecessarily. Also, it's unclear whether zero-sized entries can be returned. Currently, if it happens we would hit an assert in interval_map. This fixes all of these issues. Signed-off-by: Ilya Dryomov --- diff --git a/src/librbd/migration/NBDStream.cc b/src/librbd/migration/NBDStream.cc index 8a675a9ac31..a1e8bcf57d2 100644 --- a/src/librbd/migration/NBDStream.cc +++ b/src/librbd/migration/NBDStream.cc @@ -21,17 +21,20 @@ int extent_cb(void* data, const char* metacontext, uint64_t offset, uint32_t* entries, size_t nr_entries, int* error) { auto sparse_extents = reinterpret_cast(data); - uint64_t length = 0; - for (size_t i=0; i 0 && state & (LIBNBD_STATE_HOLE | LIBNBD_STATE_ZERO)) { + sparse_extents->insert(offset, length, + {io::SPARSE_EXTENT_STATE_ZEROED, length}); + } + offset += length; } } - sparse_extents->insert(offset, length, {state, length}); + return 1; } @@ -139,17 +142,33 @@ struct NBDStream::ListSparseExtentsRequest { ldout(cct, 20) << "byte_offset=" << byte_offset << " byte_length=" << byte_length << dendl; + // nbd_block_status() is specified to be really loose: + // "The count parameter is a hint: the server may choose to + // return less status, or the final block may extend beyond the + // requested range. [...] It is possible for the extent function + // to be called more times than you expect [...] It is also + // possible that the extent function is not called at all, even + // for metadata contexts that you requested." + io::SparseExtents tmp_sparse_extents; + tmp_sparse_extents.insert(byte_offset, byte_length, + {io::SPARSE_EXTENT_STATE_DATA, byte_length}); + int rc = nbd_block_status(nbd_stream->m_nbd, byte_length, byte_offset, - {extent_cb, sparse_extents}, 0); + {extent_cb, &tmp_sparse_extents}, 0); if (rc == -1) { rc = nbd_get_errno(); lderr(cct) << "block_status " << byte_offset << "~" << byte_length << ": " << nbd_get_error() << " (errno = " << rc << ")" << dendl; - finish(rc); - return; + // don't propagate errors -- we are set up to list any missing + // parts of the range as DATA if nbd_block_status() returns less + // status or none at all } + // trim the result in case more status was returned + sparse_extents->insert(tmp_sparse_extents.intersect(byte_offset, + byte_length)); + boost::asio::post(nbd_stream->m_strand, [this] { list_sparse_extents(); }); }