--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/* Based on QEMU block/qcow.cc and block/qcow2.h, which has this license: */
+
+/*
+ * Block driver for the QCOW version 2 format
+ *
+ * Copyright (c) 2004-2006 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef CEPH_LIBRBD_MIGRATION_QCOW2_H
+#define CEPH_LIBRBD_MIGRATION_QCOW2_H
+
+#include "include/ceph_assert.h"
+#include "include/int_types.h"
+#include "librbd/migration/QCOW.h"
+#include <endian.h>
+
+#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)
+
+#define QCOW_CRYPT_NONE 0
+#define QCOW_CRYPT_AES 1
+#define QCOW_CRYPT_LUKS 2
+
+#define QCOW_MAX_CRYPT_CLUSTERS 32
+#define QCOW_MAX_SNAPSHOTS 65536
+
+/* Field widths in qcow2 mean normal cluster offsets cannot reach
+ * 64PB; depending on cluster size, compressed clusters can have a
+ * smaller limit (64PB for up to 16k clusters, then ramps down to
+ * 512TB for 2M clusters). */
+#define QCOW_MAX_CLUSTER_OFFSET ((1ULL << 56) - 1)
+
+/* 8 MB refcount table is enough for 2 PB images at 64k cluster size
+ * (128 GB for 512 byte clusters, 2 EB for 2 MB clusters) */
+#define QCOW_MAX_REFTABLE_SIZE (1ULL << 23)
+
+/* 32 MB L1 table is enough for 2 PB images at 64k cluster size
+ * (128 GB for 512 byte clusters, 2 EB for 2 MB clusters) */
+#define QCOW_MAX_L1_SIZE (1ULL << 25)
+
+/* Allow for an average of 1k per snapshot table entry, should be plenty of
+ * space for snapshot names and IDs */
+#define QCOW_MAX_SNAPSHOTS_SIZE (1024 * QCOW_MAX_SNAPSHOTS)
+
+/* Maximum amount of extra data per snapshot table entry to accept */
+#define QCOW_MAX_SNAPSHOT_EXTRA_DATA 1024
+
+/* Bitmap header extension constraints */
+#define QCOW2_MAX_BITMAPS 65535
+#define QCOW2_MAX_BITMAP_DIRECTORY_SIZE (1024 * QCOW2_MAX_BITMAPS)
+
+/* Maximum of parallel sub-request per guest request */
+#define QCOW2_MAX_WORKERS 8
+
+/* indicate that the refcount of the referenced cluster is exactly one. */
+#define QCOW_OFLAG_COPIED (1ULL << 63)
+/* indicate that the cluster is compressed (they never have the copied flag) */
+#define QCOW_OFLAG_COMPRESSED (1ULL << 62)
+/* The cluster reads as all zeros */
+#define QCOW_OFLAG_ZERO (1ULL << 0)
+
+#define QCOW_EXTL2_SUBCLUSTERS_PER_CLUSTER 32
+
+/* The subcluster X [0..31] is allocated */
+#define QCOW_OFLAG_SUB_ALLOC(X) (1ULL << (X))
+/* The subcluster X [0..31] reads as zeroes */
+#define QCOW_OFLAG_SUB_ZERO(X) (QCOW_OFLAG_SUB_ALLOC(X) << 32)
+/* Subclusters [X, Y) (0 <= X <= Y <= 32) are allocated */
+#define QCOW_OFLAG_SUB_ALLOC_RANGE(X, Y) \
+ (QCOW_OFLAG_SUB_ALLOC(Y) - QCOW_OFLAG_SUB_ALLOC(X))
+/* Subclusters [X, Y) (0 <= X <= Y <= 32) read as zeroes */
+#define QCOW_OFLAG_SUB_ZERO_RANGE(X, Y) \
+ (QCOW_OFLAG_SUB_ALLOC_RANGE(X, Y) << 32)
+/* L2 entry bitmap with all allocation bits set */
+#define QCOW_L2_BITMAP_ALL_ALLOC (QCOW_OFLAG_SUB_ALLOC_RANGE(0, 32))
+/* L2 entry bitmap with all "read as zeroes" bits set */
+#define QCOW_L2_BITMAP_ALL_ZEROES (QCOW_OFLAG_SUB_ZERO_RANGE(0, 32))
+
+/* Size of normal and extended L2 entries */
+#define QCOW_L2E_SIZE_NORMAL (sizeof(uint64_t))
+#define QCOW_L2E_SIZE_EXTENDED (sizeof(uint64_t) * 2)
+
+/* Size of L1 table entries */
+#define QCOW_L1E_SIZE (sizeof(uint64_t))
+
+/* Size of reftable entries */
+#define QCOW_REFTABLE_ENTRY_SIZE (sizeof(uint64_t))
+
+#define QCOW_MIN_CLUSTER_BITS 9
+#define QCOW_MAX_CLUSTER_BITS 21
+
+/* Defined in the qcow2 spec (compressed cluster descriptor) */
+#define QCOW2_COMPRESSED_SECTOR_SIZE 512U
+#define QCOW2_COMPRESSED_SECTOR_MASK (~(QCOW2_COMPRESSED_SECTOR_SIZE - 1ULL))
+
+#define QCOW_L2_CACHE_SIZE 16
+
+/* Must be at least 2 to cover COW */
+#define QCOW_MIN_L2_CACHE_SIZE 2 /* cache entries */
+
+/* Must be at least 4 to cover all cases of refcount table growth */
+#define QCOW_MIN_REFCOUNT_CACHE_SIZE 4 /* clusters */
+
+#define QCOW_DEFAULT_L2_CACHE_MAX_SIZE (1ULL << 25)
+#define QCOW_DEFAULT_CACHE_CLEAN_INTERVAL 600 /* seconds */
+
+#define QCOW_DEFAULT_CLUSTER_SIZE 65536
+
+#define QCOW2_OPT_DATA_FILE "data-file"
+#define QCOW2_OPT_LAZY_REFCOUNTS "lazy-refcounts"
+#define QCOW2_OPT_DISCARD_REQUEST "pass-discard-request"
+#define QCOW2_OPT_DISCARD_SNAPSHOT "pass-discard-snapshot"
+#define QCOW2_OPT_DISCARD_OTHER "pass-discard-other"
+#define QCOW2_OPT_OVERLAP "overlap-check"
+#define QCOW2_OPT_OVERLAP_TEMPLATE "overlap-check.template"
+#define QCOW2_OPT_OVERLAP_MAIN_HEADER "overlap-check.main-header"
+#define QCOW2_OPT_OVERLAP_ACTIVE_L1 "overlap-check.active-l1"
+#define QCOW2_OPT_OVERLAP_ACTIVE_L2 "overlap-check.active-l2"
+#define QCOW2_OPT_OVERLAP_REFCOUNT_TABLE "overlap-check.refcount-table"
+#define QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK "overlap-check.refcount-block"
+#define QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE "overlap-check.snapshot-table"
+#define QCOW2_OPT_OVERLAP_INACTIVE_L1 "overlap-check.inactive-l1"
+#define QCOW2_OPT_OVERLAP_INACTIVE_L2 "overlap-check.inactive-l2"
+#define QCOW2_OPT_OVERLAP_BITMAP_DIRECTORY "overlap-check.bitmap-directory"
+#define QCOW2_OPT_CACHE_SIZE "cache-size"
+#define QCOW2_OPT_L2_CACHE_SIZE "l2-cache-size"
+#define QCOW2_OPT_L2_CACHE_ENTRY_SIZE "l2-cache-entry-size"
+#define QCOW2_OPT_REFCOUNT_CACHE_SIZE "refcount-cache-size"
+#define QCOW2_OPT_CACHE_CLEAN_INTERVAL "cache-clean-interval"
+
+typedef struct QCowHeaderProbe {
+ uint32_t magic;
+ uint32_t version;
+} __attribute__((__packed__)) QCowHeaderProbe;
+
+typedef struct QCowHeaderV1
+{
+ uint32_t magic;
+ uint32_t version;
+ uint64_t backing_file_offset;
+ uint32_t backing_file_size;
+ uint32_t mtime;
+ uint64_t size; /* in bytes */
+ uint8_t cluster_bits;
+ uint8_t l2_bits;
+ uint16_t padding;
+ uint32_t crypt_method;
+ uint64_t l1_table_offset;
+} __attribute__((__packed__)) QCowHeaderV1;
+
+typedef struct QCowHeader {
+ uint32_t magic;
+ uint32_t version;
+ uint64_t backing_file_offset;
+ uint32_t backing_file_size;
+ uint32_t cluster_bits;
+ uint64_t size; /* in bytes */
+ uint32_t crypt_method;
+ uint32_t l1_size; /* XXX: save number of clusters instead ? */
+ uint64_t l1_table_offset;
+ uint64_t refcount_table_offset;
+ uint32_t refcount_table_clusters;
+ uint32_t nb_snapshots;
+ uint64_t snapshots_offset;
+
+ /* The following fields are only valid for version >= 3 */
+ uint64_t incompatible_features;
+ uint64_t compatible_features;
+ uint64_t autoclear_features;
+
+ uint32_t refcount_order;
+ uint32_t header_length;
+
+ /* Additional fields */
+ uint8_t compression_type;
+
+ /* header must be a multiple of 8 */
+ uint8_t padding[7];
+} __attribute__((__packed__)) QCowHeader;
+
+typedef struct QCowSnapshotHeader {
+ /* header is 8 byte aligned */
+ uint64_t l1_table_offset;
+
+ uint32_t l1_size;
+ uint16_t id_str_size;
+ uint16_t name_size;
+
+ uint32_t date_sec;
+ uint32_t date_nsec;
+
+ uint64_t vm_clock_nsec;
+
+ uint32_t vm_state_size;
+ uint32_t extra_data_size; /* for extension */
+ /* extra data follows */
+ /* id_str follows */
+ /* name follows */
+} __attribute__((__packed__)) QCowSnapshotHeader;
+
+typedef struct QCowSnapshotExtraData {
+ uint64_t vm_state_size_large;
+ uint64_t disk_size;
+ uint64_t icount;
+} __attribute__((__packed__)) QCowSnapshotExtraData;
+
+
+typedef struct QCowSnapshot {
+ uint64_t l1_table_offset;
+ uint32_t l1_size;
+ char *id_str;
+ char *name;
+ uint64_t disk_size;
+ uint64_t vm_state_size;
+ uint32_t date_sec;
+ uint32_t date_nsec;
+ uint64_t vm_clock_nsec;
+ /* icount value for the moment when snapshot was taken */
+ uint64_t icount;
+ /* Size of all extra data, including QCowSnapshotExtraData if available */
+ uint32_t extra_data_size;
+ /* Data beyond QCowSnapshotExtraData, if any */
+ void *unknown_extra_data;
+} QCowSnapshot;
+
+typedef struct Qcow2CryptoHeaderExtension {
+ uint64_t offset;
+ uint64_t length;
+} __attribute__((__packed__)) Qcow2CryptoHeaderExtension;
+
+typedef struct Qcow2UnknownHeaderExtension {
+ uint32_t magic;
+ uint32_t len;
+ uint8_t data[];
+} Qcow2UnknownHeaderExtension;
+
+enum {
+ QCOW2_FEAT_TYPE_INCOMPATIBLE = 0,
+ QCOW2_FEAT_TYPE_COMPATIBLE = 1,
+ QCOW2_FEAT_TYPE_AUTOCLEAR = 2,
+};
+
+/* Incompatible feature bits */
+enum {
+ QCOW2_INCOMPAT_DIRTY_BITNR = 0,
+ QCOW2_INCOMPAT_CORRUPT_BITNR = 1,
+ QCOW2_INCOMPAT_DATA_FILE_BITNR = 2,
+ QCOW2_INCOMPAT_COMPRESSION_BITNR = 3,
+ QCOW2_INCOMPAT_EXTL2_BITNR = 4,
+ QCOW2_INCOMPAT_DIRTY = 1 << QCOW2_INCOMPAT_DIRTY_BITNR,
+ QCOW2_INCOMPAT_CORRUPT = 1 << QCOW2_INCOMPAT_CORRUPT_BITNR,
+ QCOW2_INCOMPAT_DATA_FILE = 1 << QCOW2_INCOMPAT_DATA_FILE_BITNR,
+ QCOW2_INCOMPAT_COMPRESSION = 1 << QCOW2_INCOMPAT_COMPRESSION_BITNR,
+ QCOW2_INCOMPAT_EXTL2 = 1 << QCOW2_INCOMPAT_EXTL2_BITNR,
+
+ QCOW2_INCOMPAT_MASK = QCOW2_INCOMPAT_DIRTY
+ | QCOW2_INCOMPAT_CORRUPT
+ | QCOW2_INCOMPAT_DATA_FILE
+ | QCOW2_INCOMPAT_COMPRESSION
+ | QCOW2_INCOMPAT_EXTL2,
+};
+
+/* Compatible feature bits */
+enum {
+ QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR = 0,
+ QCOW2_COMPAT_LAZY_REFCOUNTS = 1 << QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR,
+
+ QCOW2_COMPAT_FEAT_MASK = QCOW2_COMPAT_LAZY_REFCOUNTS,
+};
+
+/* Autoclear feature bits */
+enum {
+ QCOW2_AUTOCLEAR_BITMAPS_BITNR = 0,
+ QCOW2_AUTOCLEAR_DATA_FILE_RAW_BITNR = 1,
+ QCOW2_AUTOCLEAR_BITMAPS = 1 << QCOW2_AUTOCLEAR_BITMAPS_BITNR,
+ QCOW2_AUTOCLEAR_DATA_FILE_RAW = 1 << QCOW2_AUTOCLEAR_DATA_FILE_RAW_BITNR,
+
+ QCOW2_AUTOCLEAR_MASK = QCOW2_AUTOCLEAR_BITMAPS
+ | QCOW2_AUTOCLEAR_DATA_FILE_RAW,
+};
+
+enum qcow2_discard_type {
+ QCOW2_DISCARD_NEVER = 0,
+ QCOW2_DISCARD_ALWAYS,
+ QCOW2_DISCARD_REQUEST,
+ QCOW2_DISCARD_SNAPSHOT,
+ QCOW2_DISCARD_OTHER,
+ QCOW2_DISCARD_MAX
+};
+
+typedef struct Qcow2Feature {
+ uint8_t type;
+ uint8_t bit;
+ char name[46];
+} __attribute__((__packed__)) Qcow2Feature;
+
+typedef struct Qcow2DiscardRegion {
+ uint64_t offset;
+ uint64_t bytes;
+} Qcow2DiscardRegion;
+
+typedef uint64_t Qcow2GetRefcountFunc(const void *refcount_array,
+ uint64_t index);
+typedef void Qcow2SetRefcountFunc(void *refcount_array,
+ uint64_t index, uint64_t value);
+
+typedef struct Qcow2BitmapHeaderExt {
+ uint32_t nb_bitmaps;
+ uint32_t reserved32;
+ uint64_t bitmap_directory_size;
+ uint64_t bitmap_directory_offset;
+} __attribute__((__packed__)) Qcow2BitmapHeaderExt;
+
+#define QCOW_RC_CACHE_SIZE QCOW_L2_CACHE_SIZE;
+
+typedef struct Qcow2COWRegion {
+ /**
+ * Offset of the COW region in bytes from the start of the first cluster
+ * touched by the request.
+ */
+ unsigned offset;
+
+ /** Number of bytes to copy */
+ unsigned nb_bytes;
+} Qcow2COWRegion;
+
+/**
+ * Describes an in-flight (part of a) write request that writes to clusters
+ * that are not referenced in their L2 table yet.
+ */
+typedef struct QCowL2Meta
+{
+ /** Guest offset of the first newly allocated cluster */
+ uint64_t offset;
+
+ /** Host offset of the first newly allocated cluster */
+ uint64_t alloc_offset;
+
+ /** Number of newly allocated clusters */
+ int nb_clusters;
+
+ /** Do not free the old clusters */
+ bool keep_old_clusters;
+
+ /**
+ * The COW Region between the start of the first allocated cluster and the
+ * area the guest actually writes to.
+ */
+ Qcow2COWRegion cow_start;
+
+ /**
+ * The COW Region between the area the guest actually writes to and the
+ * end of the last allocated cluster.
+ */
+ Qcow2COWRegion cow_end;
+
+ /*
+ * Indicates that COW regions are already handled and do not require
+ * any more processing.
+ */
+ bool skip_cow;
+
+ /**
+ * Indicates that this is not a normal write request but a preallocation.
+ * If the image has extended L2 entries this means that no new individual
+ * subclusters will be marked as allocated in the L2 bitmap (but any
+ * existing contents of that bitmap will be kept).
+ */
+ bool prealloc;
+
+ /** Pointer to next L2Meta of the same write request */
+ struct QCowL2Meta *next;
+} QCowL2Meta;
+
+typedef enum QCow2ClusterType {
+ QCOW2_CLUSTER_UNALLOCATED,
+ QCOW2_CLUSTER_ZERO_PLAIN,
+ QCOW2_CLUSTER_ZERO_ALLOC,
+ QCOW2_CLUSTER_NORMAL,
+ QCOW2_CLUSTER_COMPRESSED,
+} QCow2ClusterType;
+
+typedef enum QCow2MetadataOverlap {
+ QCOW2_OL_MAIN_HEADER_BITNR = 0,
+ QCOW2_OL_ACTIVE_L1_BITNR = 1,
+ QCOW2_OL_ACTIVE_L2_BITNR = 2,
+ QCOW2_OL_REFCOUNT_TABLE_BITNR = 3,
+ QCOW2_OL_REFCOUNT_BLOCK_BITNR = 4,
+ QCOW2_OL_SNAPSHOT_TABLE_BITNR = 5,
+ QCOW2_OL_INACTIVE_L1_BITNR = 6,
+ QCOW2_OL_INACTIVE_L2_BITNR = 7,
+ QCOW2_OL_BITMAP_DIRECTORY_BITNR = 8,
+
+ QCOW2_OL_MAX_BITNR = 9,
+
+ QCOW2_OL_NONE = 0,
+ QCOW2_OL_MAIN_HEADER = (1 << QCOW2_OL_MAIN_HEADER_BITNR),
+ QCOW2_OL_ACTIVE_L1 = (1 << QCOW2_OL_ACTIVE_L1_BITNR),
+ QCOW2_OL_ACTIVE_L2 = (1 << QCOW2_OL_ACTIVE_L2_BITNR),
+ QCOW2_OL_REFCOUNT_TABLE = (1 << QCOW2_OL_REFCOUNT_TABLE_BITNR),
+ QCOW2_OL_REFCOUNT_BLOCK = (1 << QCOW2_OL_REFCOUNT_BLOCK_BITNR),
+ QCOW2_OL_SNAPSHOT_TABLE = (1 << QCOW2_OL_SNAPSHOT_TABLE_BITNR),
+ QCOW2_OL_INACTIVE_L1 = (1 << QCOW2_OL_INACTIVE_L1_BITNR),
+ /* NOTE: Checking overlaps with inactive L2 tables will result in bdrv
+ * reads. */
+ QCOW2_OL_INACTIVE_L2 = (1 << QCOW2_OL_INACTIVE_L2_BITNR),
+ QCOW2_OL_BITMAP_DIRECTORY = (1 << QCOW2_OL_BITMAP_DIRECTORY_BITNR),
+} QCow2MetadataOverlap;
+
+/* Perform all overlap checks which can be done in constant time */
+#define QCOW2_OL_CONSTANT \
+ (QCOW2_OL_MAIN_HEADER | QCOW2_OL_ACTIVE_L1 | QCOW2_OL_REFCOUNT_TABLE | \
+ QCOW2_OL_SNAPSHOT_TABLE | QCOW2_OL_BITMAP_DIRECTORY)
+
+/* Perform all overlap checks which don't require disk access */
+#define QCOW2_OL_CACHED \
+ (QCOW2_OL_CONSTANT | QCOW2_OL_ACTIVE_L2 | QCOW2_OL_REFCOUNT_BLOCK | \
+ QCOW2_OL_INACTIVE_L1)
+
+/* Perform all overlap checks */
+#define QCOW2_OL_ALL \
+ (QCOW2_OL_CACHED | QCOW2_OL_INACTIVE_L2)
+
+#define QCOW_L1E_OFFSET_MASK 0x00fffffffffffe00ULL
+#define QCOW_L2E_OFFSET_MASK 0x00fffffffffffe00ULL
+#define QCOW_L2E_COMPRESSED_OFFSET_SIZE_MASK 0x3fffffffffffffffULL
+
+#define REFT_OFFSET_MASK 0xfffffffffffffe00ULL
+
+#define INV_OFFSET (-1ULL)
+
+static inline uint64_t l2meta_cow_start(QCowL2Meta *m)
+{
+ return m->offset + m->cow_start.offset;
+}
+
+static inline uint64_t l2meta_cow_end(QCowL2Meta *m)
+{
+ return m->offset + m->cow_end.offset + m->cow_end.nb_bytes;
+}
+
+static inline uint64_t refcount_diff(uint64_t r1, uint64_t r2)
+{
+ return r1 > r2 ? r1 - r2 : r2 - r1;
+}
+
+#endif // CEPH_LIBRBD_MIGRATION_QCOW2_H
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/migration/QCOWFormat.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "include/intarith.h"
+#include "librbd/AsioEngine.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/Utils.h"
+#include "librbd/io/AioCompletion.h"
+#include "librbd/io/ReadResult.h"
+#include "librbd/migration/SnapshotInterface.h"
+#include "librbd/migration/SourceSpecBuilder.h"
+#include "librbd/migration/StreamInterface.h"
+#include <boost/asio/dispatch.hpp>
+#include <boost/asio/post.hpp>
+#include <deque>
+#include <endian.h>
+#include <tuple>
+#include <unordered_map>
+#include <vector>
+
+#define dout_subsys ceph_subsys_rbd
+
+namespace librbd {
+namespace migration {
+
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::migration::QCOWFormat: " \
+ << __func__ << ": "
+
+namespace {
+
+struct ClusterExtent {
+ uint64_t cluster_offset;
+ uint64_t cluster_length;
+ uint64_t intra_cluster_offset;
+ uint64_t image_offset;
+ uint64_t buffer_offset;
+
+ ClusterExtent(uint64_t cluster_offset, uint64_t cluster_length,
+ uint64_t intra_cluster_offset, uint64_t image_offset,
+ uint64_t buffer_offset)
+ : cluster_offset(cluster_offset), cluster_length(cluster_length),
+ intra_cluster_offset(intra_cluster_offset), image_offset(image_offset),
+ buffer_offset(buffer_offset) {
+ }
+};
+
+typedef std::vector<ClusterExtent> ClusterExtents;
+
+void populate_cluster_extents(CephContext* cct, uint64_t cluster_size,
+ const io::Extents& image_extents,
+ ClusterExtents* cluster_extents) {
+ uint64_t buffer_offset = 0;
+ for (auto [image_offset, image_length] : image_extents) {
+ while (image_length > 0) {
+ auto intra_cluster_offset = image_offset & (cluster_size - 1);
+ auto intra_cluster_length = cluster_size - intra_cluster_offset;
+ auto cluster_length = std::min(image_length, intra_cluster_length);
+
+ ldout(cct, 20) << "image_offset=" << image_offset << ", "
+ << "image_length=" << image_length << ", "
+ << "cluster_length=" << cluster_length << dendl;
+
+
+ cluster_extents->emplace_back(0, cluster_length, intra_cluster_offset,
+ image_offset, buffer_offset);
+
+ image_offset += cluster_length;
+ image_length -= cluster_length;
+ buffer_offset += cluster_length;
+ }
+ }
+}
+
+} // anonymous namespace
+
+template <typename I>
+struct QCOWFormat<I>::Cluster {
+ const uint64_t cluster_offset;
+ bufferlist cluster_data_bl;
+
+ Cluster(uint64_t cluster_offset) : cluster_offset(cluster_offset) {
+ }
+};
+
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::migration::QCOWFormat::ClusterCache: " \
+ << this << " " << __func__ << ": "
+
+template <typename I>
+class QCOWFormat<I>::ClusterCache {
+public:
+ ClusterCache(QCOWFormat* qcow_format)
+ : qcow_format(qcow_format),
+ m_strand(*qcow_format->m_image_ctx->asio_engine) {
+ }
+
+ void get_cluster(uint64_t cluster_offset, uint64_t cluster_length,
+ uint64_t intra_cluster_offset, bufferlist* bl,
+ Context* on_finish) {
+ auto cct = qcow_format->m_image_ctx->cct;
+ ldout(cct, 20) << "cluster_offset=" << cluster_offset << dendl;
+
+ // cache state machine runs in a single strand thread
+ boost::asio::dispatch(
+ m_strand,
+ [this, cluster_offset, cluster_length, intra_cluster_offset, bl,
+ on_finish]() {
+ execute_get_cluster(cluster_offset, cluster_length,
+ intra_cluster_offset, bl, on_finish);
+ });
+ }
+
+private:
+ typedef std::tuple<uint64_t, uint64_t, bufferlist*, Context*> Completion;
+ typedef std::list<Completion> Completions;
+
+ QCOWFormat* qcow_format;
+ boost::asio::io_context::strand m_strand;
+
+ std::shared_ptr<Cluster> cluster;
+ std::unordered_map<uint64_t, std::shared_ptr<Cluster>> clusters;
+ std::unordered_map<uint64_t, Completions> cluster_completions;
+
+ void execute_get_cluster(uint64_t cluster_offset, uint64_t cluster_length,
+ uint64_t intra_cluster_offset, bufferlist* bl,
+ Context* on_finish) {
+ auto cct = qcow_format->m_image_ctx->cct;
+ ldout(cct, 20) << "cluster_offset=" << cluster_offset << dendl;
+
+ if (cluster && cluster->cluster_offset == cluster_offset) {
+ // most-recent cluster matches
+ bl->substr_of(cluster->cluster_data_bl, intra_cluster_offset,
+ cluster_length);
+ boost::asio::post(*qcow_format->m_image_ctx->asio_engine,
+ [on_finish]() { on_finish->complete(0); });
+ return;
+ }
+
+ // record callback for cluster
+ cluster_completions[cluster_offset].emplace_back(
+ intra_cluster_offset, cluster_length, bl, on_finish);
+ if (clusters.count(cluster_offset) == 0) {
+ // start the new read request
+ auto cluster = std::make_shared<Cluster>(cluster_offset);
+ clusters[cluster_offset] = cluster;
+
+ read_cluster(cluster);
+ }
+ }
+
+ void read_cluster(std::shared_ptr<Cluster> cluster) {
+ auto cct = qcow_format->m_image_ctx->cct;
+
+ uint64_t stream_offset = cluster->cluster_offset;
+ uint64_t stream_length = qcow_format->m_cluster_size;
+ if ((cluster->cluster_offset & QCOW_OFLAG_COMPRESSED) != 0) {
+ // compressed clusters encode the compressed length in the lower bits
+ stream_offset = cluster->cluster_offset &
+ qcow_format->m_cluster_offset_mask;
+ stream_length = (cluster->cluster_offset >>
+ (63 - qcow_format->m_cluster_bits)) &
+ (qcow_format->m_cluster_size - 1);
+ }
+
+ ldout(cct, 20) << "cluster_offset=" << cluster->cluster_offset << ", "
+ << "stream_offset=" << stream_offset << ", "
+ << "stream_length=" << stream_length << dendl;
+
+ // read the cluster into the cache entry
+ auto ctx = new LambdaContext([this, cluster](int r) {
+ boost::asio::post(m_strand, [this, cluster, r]() {
+ handle_read_cluster(r, cluster); }); });
+ qcow_format->m_stream->read({{stream_offset, stream_length}},
+ &cluster->cluster_data_bl, ctx);
+ }
+
+ void handle_read_cluster(int r, std::shared_ptr<Cluster> cluster) {
+ auto cct = qcow_format->m_image_ctx->cct;
+ ldout(cct, 20) << "r=" << r << ", "
+ << "cluster_offset=" << cluster->cluster_offset << dendl;
+
+ auto completions = std::move(cluster_completions[cluster->cluster_offset]);
+ cluster_completions.erase(cluster->cluster_offset);
+ clusters.erase(cluster->cluster_offset);
+
+ if (r < 0) {
+ lderr(cct) << "failed to read cluster offset " << cluster->cluster_offset
+ << ": " << cpp_strerror(r) << dendl;
+ } else {
+ if ((cluster->cluster_offset & QCOW_OFLAG_COMPRESSED) != 0) {
+ bufferlist compressed_bl{std::move(cluster->cluster_data_bl)};
+ cluster->cluster_data_bl.clear();
+
+ // TODO
+ lderr(cct) << "support for compressed clusters is not available"
+ << dendl;
+ r = -EINVAL;
+ }
+ }
+
+ // complete the IO back to caller
+ boost::asio::post(*qcow_format->m_image_ctx->asio_engine,
+ [r, cluster, completions=std::move(completions)]() {
+ for (auto completion : completions) {
+ if (r >= 0) {
+ std::get<2>(completion)->substr_of(
+ cluster->cluster_data_bl,
+ std::get<0>(completion),
+ std::get<1>(completion));
+ }
+ std::get<3>(completion)->complete(r);
+ }
+ });
+ }
+};
+
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::migration::QCOWFormat::L2TableCache: " \
+ << this << " " << __func__ << ": "
+
+template <typename I>
+class QCOWFormat<I>::L2TableCache {
+public:
+ L2TableCache(QCOWFormat* qcow_format, uint32_t l2_bits)
+ : qcow_format(qcow_format), l2_bits(l2_bits), l2_size(1UL << l2_bits),
+ m_strand(*qcow_format->m_image_ctx->asio_engine),
+ l2_cache_entries(QCOW_L2_CACHE_SIZE) {
+ }
+
+ void get_cluster_offset(uint64_t image_offset, uint64_t* cluster_offset,
+ Context* on_finish) {
+ auto cct = qcow_format->m_image_ctx->cct;
+ ldout(cct, 20) << "image_offset=" << image_offset << dendl;
+
+ // cache state machine runs in a single strand thread
+ boost::asio::dispatch(
+ m_strand, [this, image_offset, cluster_offset, on_finish]() {
+ requests.emplace_back(image_offset, cluster_offset, on_finish);
+ });
+ dispatch_get_cluster_offset();
+ }
+
+private:
+ QCOWFormat* qcow_format;
+ uint32_t l2_bits;
+ uint32_t l2_size;
+
+ boost::asio::io_context::strand m_strand;
+
+ struct Request {
+ uint64_t image_offset;
+ uint64_t* cluster_offset;
+ Context* on_finish;
+
+ Request(uint64_t image_offset, uint64_t* cluster_offset, Context* on_finish)
+ : image_offset(image_offset), cluster_offset(cluster_offset),
+ on_finish(on_finish) {
+ }
+ };
+ typedef std::deque<Request> Requests;
+
+ struct L2Cache {
+ uint64_t l2_offset = 0;
+ uint64_t* l2_table = nullptr;
+ bufferlist l2_table_bl;
+
+ uint32_t count = 0;
+ bool in_flight = false;
+
+ int ret_val = 0;
+ };
+ std::vector<L2Cache> l2_cache_entries;
+
+ Requests requests;
+
+ void dispatch_get_cluster_offset() {
+ boost::asio::dispatch(m_strand, [this]() { execute_get_cluster_offset(); });
+ }
+
+ void execute_get_cluster_offset() {
+ auto cct = qcow_format->m_image_ctx->cct;
+ if (requests.empty()) {
+ return;
+ }
+
+ auto request = requests.front();
+ auto l1_index = request.image_offset >>
+ (l2_bits + qcow_format->m_cluster_bits);
+ auto l2_offset = qcow_format->m_l1_table[l1_index] &
+ qcow_format->m_cluster_mask;
+ auto l2_index = (request.image_offset >> qcow_format->m_cluster_bits) &
+ (l2_size - 1);
+ ldout(cct, 20) << "image_offset=" << request.image_offset << ", "
+ << "l1_index=" << l1_index << ", "
+ << "l2_offset=" << l2_offset << ", "
+ << "l2_index=" << l2_index << dendl;
+
+ int r = 0;
+ if (l2_offset == 0) {
+ // L2 table has not been allocated for specified offset
+ ldout(cct, 20) << "image_offset=" << request.image_offset << ", "
+ << "cluster_offset=DNE" << dendl;
+ *request.cluster_offset = 0;
+ r = -ENOENT;
+ } else {
+ const uint64_t* l2_table = nullptr;
+ r = l2_table_lookup(l2_offset, &l2_table);
+ if (r < 0) {
+ lderr(cct) << "failed to load L2 table: l2_offset=" << l2_offset << ": "
+ << cpp_strerror(r) << dendl;
+ } else if (l2_table == nullptr) {
+ // table not in cache -- will restart once its loaded
+ return;
+ } else {
+ *request.cluster_offset = be64toh(l2_table[l2_index]);
+ ldout(cct, 20) << "image_offset=" << request.image_offset << ", "
+ << "cluster_offset=" << *request.cluster_offset << dendl;
+ }
+ }
+
+ // complete the L2 cache request
+ boost::asio::post(*qcow_format->m_image_ctx->asio_engine,
+ [r, ctx=request.on_finish]() { ctx->complete(r); });
+ requests.pop_front();
+
+ // process next request (if any)
+ dispatch_get_cluster_offset();
+ }
+
+ int l2_table_lookup(uint64_t l2_offset, const uint64_t** cluster_offset) {
+ auto cct = qcow_format->m_image_ctx->cct;
+
+ *cluster_offset = nullptr;
+
+ // find a match in the existing cache
+ for (auto idx = 0U; idx < l2_cache_entries.size(); ++idx) {
+ auto& l2_cache = l2_cache_entries[idx];
+ if (l2_cache.l2_offset == l2_offset) {
+ if (l2_cache.in_flight) {
+ ldout(cct, 20) << "l2_offset=" << l2_offset << ", "
+ << "index=" << idx << " (in-flight)" << dendl;
+ return 0;
+ }
+
+ if (l2_cache.ret_val < 0) {
+ ldout(cct, 20) << "l2_offset=" << l2_offset << ", "
+ << "index=" << idx << " (error)" << dendl;
+ l2_cache = L2Cache{};
+ return l2_cache.ret_val;
+ }
+
+ ++l2_cache.count;
+ if (l2_cache.count == std::numeric_limits<uint32_t>::max()) {
+ for (auto& entry : l2_cache_entries) {
+ entry.count >>= 1;
+ }
+ }
+
+ *cluster_offset = l2_cache.l2_table;
+ return 0;
+ }
+ }
+
+ // find the least used entry
+ int32_t min_idx = -1;
+ uint32_t min_count = std::numeric_limits<uint32_t>::max();
+ for (uint32_t idx = 0U; idx < l2_cache_entries.size(); ++idx) {
+ auto& l2_cache = l2_cache_entries[idx];
+ if (l2_cache.in_flight) {
+ continue;
+ }
+
+ if (l2_cache.count > 0) {
+ --l2_cache.count;
+ }
+ if (l2_cache.count < min_count) {
+ min_count = l2_cache.count;
+ min_idx = idx;
+ }
+ }
+
+ if (min_idx == -1) {
+ // no space in the cache due to in-flight requests
+ ldout(cct, 20) << "l2_offset=" << l2_offset << ", "
+ << "index=DNE (cache busy)" << dendl;
+ return 0;
+ }
+
+ ldout(cct, 20) << "l2_offset=" << l2_offset << ", "
+ << "index=" << min_idx << " (loading)" << dendl;
+ auto& l2_cache = l2_cache_entries[min_idx];
+ l2_cache.l2_offset = l2_offset;
+ l2_cache.count = 1;
+ l2_cache.in_flight = true;
+
+ // read the L2 table into the L2 cache entry
+ auto ctx = new LambdaContext([this, index=min_idx, l2_offset](int r) {
+ boost::asio::post(m_strand, [this, index, l2_offset, r]() {
+ handle_l2_table_lookup(r, index, l2_offset); }); });
+ l2_cache.l2_table_bl.clear();
+ qcow_format->m_stream->read(
+ {{l2_offset, l2_size * sizeof(uint64_t)}}, &l2_cache.l2_table_bl, ctx);
+
+ return 0;
+ }
+
+ void handle_l2_table_lookup(int r, uint32_t index, uint64_t l2_offset) {
+ auto cct = qcow_format->m_image_ctx->cct;
+ ldout(cct, 20) << "r=" << r << ", "
+ << "l2_offset=" << l2_offset << ", "
+ << "index=" << index << dendl;
+
+ auto& l2_cache = l2_cache_entries[index];
+ ceph_assert(l2_cache.in_flight);
+ l2_cache.in_flight = false;
+
+ if (r < 0) {
+ lderr(cct) << "failed to load L2 table: "
+ << "l2_offset=" << l2_cache.l2_offset << ": "
+ << cpp_strerror(r) << dendl;
+ l2_cache.ret_val = r;
+ } else {
+ l2_cache.l2_table = reinterpret_cast<uint64_t*>(
+ l2_cache.l2_table_bl.c_str());
+ }
+
+ // restart the state machine
+ dispatch_get_cluster_offset();
+ }
+
+};
+
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::migration::QCOWFormat::ReadRequest: " \
+ << this << " " << __func__ << ": "
+
+template <typename I>
+class QCOWFormat<I>::ReadRequest {
+public:
+ ReadRequest(QCOWFormat* qcow_format, io::AioCompletion* aio_comp,
+ io::Extents&& image_extents)
+ : qcow_format(qcow_format), aio_comp(aio_comp),
+ image_extents(std::move(image_extents)) {
+ }
+
+ void send() {
+ get_cluster_offsets();
+ }
+
+private:
+ QCOWFormat* qcow_format;
+ io::AioCompletion* aio_comp;
+
+ io::Extents image_extents;
+ size_t image_extents_idx = 0;
+ uint32_t image_extent_offset = 0;
+
+ ClusterExtents cluster_extents;
+
+ void get_cluster_offsets() {
+ auto cct = qcow_format->m_image_ctx->cct;
+ populate_cluster_extents(cct, qcow_format->m_cluster_size, image_extents,
+ &cluster_extents);
+
+ ldout(cct, 20) << dendl;
+ auto ctx = new LambdaContext([this](int r) {
+ handle_get_cluster_offsets(r); });
+ auto gather_ctx = new C_Gather(cct, ctx);
+
+ for (auto& cluster_extent : cluster_extents) {
+ auto sub_ctx = new LambdaContext(
+ [this, &cluster_extent, on_finish=gather_ctx->new_sub()](int r) {
+ handle_get_cluster_offset(r, cluster_extent, on_finish); });
+ qcow_format->m_l2_table_cache->get_cluster_offset(
+ cluster_extent.image_offset, &cluster_extent.cluster_offset, sub_ctx);
+ }
+
+ gather_ctx->activate();
+ }
+
+ void handle_get_cluster_offset(int r, const ClusterExtent& cluster_extent,
+ Context* on_finish) {
+ auto cct = qcow_format->m_image_ctx->cct;
+ ldout(cct, 20) << "r=" << r << ", "
+ << "image_offset=" << cluster_extent.image_offset << ", "
+ << "cluster_offset=" << cluster_extent.cluster_offset
+ << dendl;
+
+ if (r == -ENOENT) {
+ ldout(cct, 20) << "image offset DNE in QCOW image" << dendl;
+ r = 0;
+ } else if (r < 0) {
+ lderr(cct) << "failed to map image offset " << cluster_extent.image_offset
+ << ": " << cpp_strerror(r) << dendl;
+ }
+
+ on_finish->complete(r);
+ }
+
+ void handle_get_cluster_offsets(int r) {
+ auto cct = qcow_format->m_image_ctx->cct;
+ ldout(cct, 20) << "r=" << r << dendl;
+
+ if (r < 0) {
+ lderr(cct) << "failed to retrieve cluster extents: " << cpp_strerror(r)
+ << dendl;
+ aio_comp->fail(r);
+ delete this;
+ return;
+ }
+
+ read_clusters();
+ }
+
+ void read_clusters() {
+ auto cct = qcow_format->m_image_ctx->cct;
+ ldout(cct, 20) << dendl;
+
+ aio_comp->set_request_count(cluster_extents.size());
+ for (auto& cluster_extent : cluster_extents) {
+ auto read_ctx = new io::ReadResult::C_ImageReadRequest(
+ aio_comp, cluster_extent.buffer_offset,
+ {{cluster_extent.image_offset, cluster_extent.cluster_length}});
+ read_ctx->ignore_enoent = true;
+
+ auto log_ctx = new LambdaContext(
+ [this, cct=qcow_format->m_image_ctx->cct,
+ image_offset=cluster_extent.image_offset,
+ image_length=cluster_extent.cluster_length, ctx=read_ctx](int r) {
+ handle_read_cluster(cct, r, image_offset, image_length, ctx);
+ });
+
+ if (cluster_extent.cluster_offset == 0) {
+ // QCOW header is at offset 0, implies cluster DNE
+ log_ctx->complete(-ENOENT);
+ } else {
+ // request the (sub)cluster from the cluster cache
+ qcow_format->m_cluster_cache->get_cluster(
+ cluster_extent.cluster_offset, cluster_extent.cluster_length,
+ cluster_extent.intra_cluster_offset, &read_ctx->bl, log_ctx);
+ }
+ }
+
+ delete this;
+ }
+
+ void handle_read_cluster(CephContext* cct, int r, uint64_t image_offset,
+ uint64_t image_length, Context* on_finish) {
+ ldout(cct, 20) << "r=" << r << ", "
+ << "image_offset=" << image_offset << ", "
+ << "image_length=" << image_length << dendl;
+
+ if (r != -ENOENT && r < 0) {
+ lderr(cct) << "failed to read image extent " << image_offset << "~"
+ << image_length << ": " << cpp_strerror(r) << dendl;
+ }
+
+ on_finish->complete(r);
+ }
+};
+
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::migration::QCOWFormat::" \
+ << "ListSnapsRequest: " << this << " " \
+ << __func__ << ": "
+
+template <typename I>
+class QCOWFormat<I>::ListSnapsRequest {
+public:
+ ListSnapsRequest(QCOWFormat* qcow_format, io::Extents&& image_extents,
+ io::SparseExtents* sparse_extents, Context* on_finish)
+ : qcow_format(qcow_format), image_extents(std::move(image_extents)),
+ sparse_extents(sparse_extents), on_finish(on_finish) {
+ }
+
+ void send() {
+ list_snaps();
+ }
+
+private:
+ QCOWFormat* qcow_format;
+ io::Extents image_extents;
+ io::SparseExtents* sparse_extents;
+ Context* on_finish;
+
+ ClusterExtents cluster_extents;
+
+ void list_snaps() {
+ auto cct = qcow_format->m_image_ctx->cct;
+ ldout(cct, 20) << dendl;
+
+ populate_cluster_extents(cct, qcow_format->m_cluster_size, image_extents,
+ &cluster_extents);
+
+ auto ctx = new LambdaContext([this](int r) {
+ handle_list_snaps(r); });
+ auto gather_ctx = new C_Gather(cct, ctx);
+ for (auto& cluster_extent : cluster_extents) {
+ auto ctx = new LambdaContext(
+ [this, cluster_extent=&cluster_extent,
+ ctx=gather_ctx->new_sub()](int r) {
+ boost::asio::post(
+ qcow_format->m_strand, [this, cluster_extent, ctx, r]() {
+ handle_get_cluster_offset( r, *cluster_extent, ctx);
+ });
+ });
+ qcow_format->m_l2_table_cache->get_cluster_offset(
+ cluster_extent.image_offset, &cluster_extent.cluster_offset, ctx);
+ }
+
+ gather_ctx->activate();
+ }
+
+ void handle_get_cluster_offset(
+ int r, const ClusterExtent& cluster_extent, Context* on_finish) {
+ auto cct = qcow_format->m_image_ctx->cct;
+ ldout(cct, 20) << "r=" << r << ", "
+ << "image_offset=" << cluster_extent.image_offset << ", "
+ << "image_length=" << cluster_extent.cluster_length << ", "
+ << "cluster_offset=" << cluster_extent.cluster_offset
+ << dendl;
+
+ if (r == -ENOENT) {
+ r = 0;
+ } else if (r >= 0 && cluster_extent.cluster_offset != 0) {
+ sparse_extents->insert(
+ cluster_extent.image_offset, cluster_extent.cluster_length,
+ {io::SPARSE_EXTENT_STATE_DATA, cluster_extent.cluster_length});
+ }
+
+ on_finish->complete(r);
+ }
+
+ void handle_list_snaps(int r) {
+ auto cct = qcow_format->m_image_ctx->cct;
+ ldout(cct, 20) << "r=" << r << dendl;
+
+ on_finish->complete(r);
+ delete this;
+ }
+};
+
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::migration::QCOWFormat: " << this \
+ << " " << __func__ << ": "
+
+template <typename I>
+QCOWFormat<I>::QCOWFormat(
+ I* image_ctx, const json_spirit::mObject& json_object,
+ const SourceSpecBuilder<I>* source_spec_builder)
+ : m_image_ctx(image_ctx), m_json_object(json_object),
+ m_source_spec_builder(source_spec_builder),
+ m_strand(*image_ctx->asio_engine) {
+}
+
+template <typename I>
+void QCOWFormat<I>::open(Context* on_finish) {
+ auto cct = m_image_ctx->cct;
+ ldout(cct, 10) << dendl;
+
+ int r = m_source_spec_builder->build_stream(m_json_object, &m_stream);
+ if (r < 0) {
+ lderr(cct) << "failed to build migration stream handler" << cpp_strerror(r)
+ << dendl;
+ on_finish->complete(r);
+ return;
+ }
+
+ auto ctx = new LambdaContext([this, on_finish](int r) {
+ handle_open(r, on_finish); });
+ m_stream->open(ctx);
+}
+
+template <typename I>
+void QCOWFormat<I>::handle_open(int r, Context* on_finish) {
+ auto cct = m_image_ctx->cct;
+ ldout(cct, 10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ lderr(cct) << "failed to open QCOW image: " << cpp_strerror(r)
+ << dendl;
+ on_finish->complete(r);
+ return;
+ }
+
+ probe(on_finish);
+}
+
+template <typename I>
+void QCOWFormat<I>::probe(Context* on_finish) {
+ auto cct = m_image_ctx->cct;
+ ldout(cct, 10) << dendl;
+
+ auto ctx = new LambdaContext([this, on_finish](int r) {
+ handle_probe(r, on_finish); });
+ m_bl.clear();
+ m_stream->read({{0, 8}}, &m_bl, ctx);
+}
+
+template <typename I>
+void QCOWFormat<I>::handle_probe(int r, Context* on_finish) {
+ auto cct = m_image_ctx->cct;
+ ldout(cct, 10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ lderr(cct) << "failed to probe QCOW image: " << cpp_strerror(r)
+ << dendl;
+ on_finish->complete(r);
+ return;
+ }
+
+ auto header_probe = *reinterpret_cast<QCowHeaderProbe*>(
+ m_bl.c_str());
+ header_probe.magic = be32toh(header_probe.magic);
+ header_probe.version = be32toh(header_probe.version);
+
+ if (header_probe.magic != QCOW_MAGIC) {
+ lderr(cct) << "invalid QCOW header magic" << dendl;
+ on_finish->complete(-EINVAL);
+ return;
+ }
+
+ m_bl.clear();
+ if (header_probe.version == 1) {
+ read_v1_header(on_finish);
+ return;
+ } else if (header_probe.version == 2) {
+ read_v2_header(on_finish);
+ return;
+ } else {
+ lderr(cct) << "invalid QCOW header version " << header_probe.version
+ << dendl;
+ on_finish->complete(-EINVAL);
+ return;
+ }
+}
+
+template <typename I>
+void QCOWFormat<I>::read_v1_header(Context* on_finish) {
+ auto cct = m_image_ctx->cct;
+ ldout(cct, 10) << dendl;
+
+ auto ctx = new LambdaContext([this, on_finish](int r) {
+ handle_read_v1_header(r, on_finish); });
+ m_bl.clear();
+ m_stream->read({{0, sizeof(QCowHeaderV1)}}, &m_bl, ctx);
+}
+
+template <typename I>
+void QCOWFormat<I>::handle_read_v1_header(int r, Context* on_finish) {
+ auto cct = m_image_ctx->cct;
+ ldout(cct, 10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ lderr(cct) << "failed to read QCOW header: " << cpp_strerror(r) << dendl;
+ on_finish->complete(r);
+ return;
+ }
+
+ auto header = *reinterpret_cast<QCowHeaderV1*>(m_bl.c_str());
+
+ // byte-swap important fields
+ header.magic = be32toh(header.magic);
+ header.version = be32toh(header.version);
+ header.backing_file_offset = be64toh(header.backing_file_offset);
+ header.backing_file_size = be32toh(header.backing_file_size);
+ header.size = be64toh(header.size);
+ header.crypt_method = be32toh(header.crypt_method);
+ header.l1_table_offset = be64toh(header.l1_table_offset);
+
+ if (header.magic != QCOW_MAGIC || header.version != 1) {
+ // honestly shouldn't happen since we've already validated it
+ lderr(cct) << "header is not QCOW" << dendl;
+ on_finish->complete(-EINVAL);
+ return;
+ }
+
+ if (header.cluster_bits < QCOW_MIN_CLUSTER_BITS ||
+ header.cluster_bits > QCOW_MAX_CLUSTER_BITS) {
+ lderr(cct) << "invalid cluster bits: " << header.cluster_bits << dendl;
+ on_finish->complete(-EINVAL);
+ return;
+ }
+
+ if (header.l2_bits < (QCOW_MIN_CLUSTER_BITS - 3) ||
+ header.l2_bits > (QCOW_MAX_CLUSTER_BITS - 3)) {
+ lderr(cct) << "invalid L2 bits: " << header.l2_bits << dendl;
+ on_finish->complete(-EINVAL);
+ return;
+ }
+
+ if (header.crypt_method != QCOW_CRYPT_NONE) {
+ lderr(cct) << "invalid or unsupported encryption method" << dendl;
+ on_finish->complete(-EINVAL);
+ return;
+ }
+
+ m_size = header.size;
+ if (p2roundup(m_size, static_cast<uint64_t>(512)) != m_size) {
+ lderr(cct) << "image size is not a multiple of block size" << dendl;
+ on_finish->complete(-EINVAL);
+ return;
+ }
+
+ m_backing_file_offset = header.backing_file_offset;
+ m_backing_file_size = header.backing_file_size;
+
+ m_cluster_bits = header.cluster_bits;
+ m_cluster_size = 1UL << header.cluster_bits;
+ m_cluster_offset_mask = (1ULL << (63 - header.cluster_bits)) - 1;
+ m_cluster_mask = ~QCOW_OFLAG_COMPRESSED;
+
+ uint32_t l2_bits = header.l2_bits;
+ uint32_t shift = m_cluster_bits + l2_bits;
+ m_l1_size = (m_size + (1LL << shift) - 1) >> shift;
+ m_l1_table_offset = header.l1_table_offset;
+ if (m_size > (std::numeric_limits<uint64_t>::max() - (1ULL << shift)) ||
+ m_l1_size > (std::numeric_limits<int32_t>::max() / sizeof(uint64_t))) {
+ lderr(cct) << "image size too big: " << m_size << dendl;
+ on_finish->complete(-EINVAL);
+ return;
+ }
+
+ ldout(cct, 15) << "size=" << m_size << ", "
+ << "cluster_bits=" << m_cluster_bits << ", "
+ << "l2_bits=" << l2_bits << dendl;
+
+ // allocate memory for L1 table and L2 + cluster caches
+ m_l2_table_cache = std::make_unique<L2TableCache>(this, l2_bits);
+ m_cluster_cache = std::make_unique<ClusterCache>(this);
+
+ read_l1_table(on_finish);
+}
+
+template <typename I>
+void QCOWFormat<I>::read_v2_header(Context* on_finish) {
+ auto cct = m_image_ctx->cct;
+ ldout(cct, 10) << dendl;
+
+ auto ctx = new LambdaContext([this, on_finish](int r) {
+ handle_read_v2_header(r, on_finish); });
+ m_bl.clear();
+ m_stream->read({{0, sizeof(QCowHeader)}}, &m_bl, ctx);
+}
+
+template <typename I>
+void QCOWFormat<I>::handle_read_v2_header(int r, Context* on_finish) {
+ auto cct = m_image_ctx->cct;
+ ldout(cct, 10) << "r=" << r << dendl;
+
+ // TODO add support for QCOW2
+ on_finish->complete(-ENOTSUP);
+}
+
+template <typename I>
+void QCOWFormat<I>::read_l1_table(Context* on_finish) {
+ auto cct = m_image_ctx->cct;
+ ldout(cct, 10) << dendl;
+
+ auto ctx = new LambdaContext([this, on_finish](int r) {
+ handle_read_l1_table(r, on_finish); });
+ m_stream->read({{m_l1_table_offset,
+ m_l1_size * sizeof(uint64_t)}}, &m_l1_table_bl, ctx);
+}
+
+template <typename I>
+void QCOWFormat<I>::handle_read_l1_table(int r, Context* on_finish) {
+ auto cct = m_image_ctx->cct;
+ ldout(cct, 10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ lderr(cct) << "failed to read L1 table: " << cpp_strerror(r) << dendl;
+ on_finish->complete(r);
+ return;
+ }
+
+ // translate the L1 table (big-endian -> CPU endianess)
+ m_l1_table = reinterpret_cast<uint64_t*>(m_l1_table_bl.c_str());
+ for (auto idx = 0UL; idx < m_l1_size; ++idx) {
+ m_l1_table[idx] = be64toh(m_l1_table[idx]);
+ }
+
+ read_backing_file(on_finish);
+}
+
+template <typename I>
+void QCOWFormat<I>::read_backing_file(Context* on_finish) {
+ if (m_backing_file_offset == 0 || m_backing_file_size == 0) {
+ // all data is within the specified file
+ on_finish->complete(0);
+ return;
+ }
+
+ auto cct = m_image_ctx->cct;
+ ldout(cct, 10) << dendl;
+
+ // TODO add support for backing files
+ on_finish->complete(-ENOTSUP);
+}
+
+template <typename I>
+void QCOWFormat<I>::close(Context* on_finish) {
+ auto cct = m_image_ctx->cct;
+ ldout(cct, 10) << dendl;
+
+ m_stream->close(on_finish);
+}
+
+template <typename I>
+void QCOWFormat<I>::get_snapshots(SnapInfos* snap_infos, Context* on_finish) {
+ auto cct = m_image_ctx->cct;
+ ldout(cct, 10) << dendl;
+
+ snap_infos->clear();
+
+ // TODO QCOW2 supports snapshots, not QCOW
+ on_finish->complete(0);
+}
+
+template <typename I>
+void QCOWFormat<I>::get_image_size(uint64_t snap_id, uint64_t* size,
+ Context* on_finish) {
+ auto cct = m_image_ctx->cct;
+ ldout(cct, 10) << dendl;
+
+ *size = m_size;
+ on_finish->complete(0);
+}
+
+template <typename I>
+bool QCOWFormat<I>::read(
+ io::AioCompletion* aio_comp, uint64_t snap_id, io::Extents&& image_extents,
+ io::ReadResult&& read_result, int op_flags, int read_flags,
+ const ZTracer::Trace &parent_trace) {
+ auto cct = m_image_ctx->cct;
+ ldout(cct, 20) << "snap_id=" << snap_id << ", "
+ << "image_extents=" << image_extents << dendl;
+
+ if (snap_id != CEPH_NOSNAP) {
+ // TODO add QCOW2 snapshot support
+ lderr(cct) << "snapshots are not supported" << dendl;
+ aio_comp->fail(-EINVAL);
+ return true;
+ }
+
+ aio_comp->read_result = std::move(read_result);
+ aio_comp->read_result.set_image_extents(image_extents);
+
+ auto read_request = new ReadRequest(this, aio_comp, std::move(image_extents));
+ read_request->send();
+
+ return true;
+}
+
+template <typename I>
+void QCOWFormat<I>::list_snaps(io::Extents&& image_extents,
+ io::SnapIds&& snap_ids, int list_snaps_flags,
+ io::SnapshotDelta* snapshot_delta,
+ const ZTracer::Trace &parent_trace,
+ Context* on_finish) {
+ auto cct = m_image_ctx->cct;
+ ldout(cct, 20) << "image_extents=" << image_extents << dendl;
+
+ // TODO add QCOW2 snapshot support
+
+ // QCOW does support snapshots so just use cluster existence for delta
+ auto snapshot = &(*snapshot_delta)[{CEPH_NOSNAP, CEPH_NOSNAP}];
+ auto list_snaps_request = new ListSnapsRequest(
+ this, io::Extents{image_extents}, snapshot, on_finish);
+ list_snaps_request->send();
+}
+
+} // namespace migration
+} // namespace librbd
+
+template class librbd::migration::QCOWFormat<librbd::ImageCtx>;