From 72b6c7f3e9c06ab37af767fc7d45100b02d29f04 Mon Sep 17 00:00:00 2001 From: Venky Shankar Date: Thu, 20 Feb 2025 12:55:34 +0000 Subject: [PATCH] libcephfs / client: wire up file blockdiff Fixes: http://tracker.ceph.com/issues/69791 Signed-off-by: Venky Shankar --- src/client/Client.cc | 138 ++++++++++++++++++++++++++ src/client/Client.h | 12 +++ src/common/options/mds-client.yaml.in | 9 ++ src/include/cephfs/libcephfs.h | 65 ++++++++++++ src/libcephfs.cc | 99 ++++++++++++++++++ 5 files changed, 323 insertions(+) diff --git a/src/client/Client.cc b/src/client/Client.cc index 5090e718268..7a7c8f0858a 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -9872,6 +9872,144 @@ int Client::readdirplus_r(dir_result_t *d, struct dirent *de, return 0; } +static void cleanup_state(Client *client, struct scan_state_t *sst) +{ + if (sst->fd1 != -1) { + client->_close(sst->fd1); + } + if (sst->fd2 != -1) { + client->_close(sst->fd2); + } + delete sst; +} + +int Client::file_blockdiff_init_state(const char* path1, const char* path2, + const UserPerm &perms, struct scan_state_t **state) +{ + ldout(cct, 20) << __func__ << dendl; + + InodeRef inode1, inode2; + scan_state_t *sst = new scan_state_t(); + sst->fd1 = sst->fd2 = -1; + + /* + * lets have a constraint that both snapshot paths should be + * present - otherwise the caller should do a full copy or a + * delete on the path. + */ + int r = open(path1, O_RDONLY, perms, 0); + if (r < 0) { + return r; + } + sst->fd1 = r; + + r = open(path2, O_RDONLY, perms, 0); + if (r < 0) { + cleanup_state(this, sst); + return r; + } + sst->fd2 = r; + + std::unique_lock lock(client_lock); + r = get_fd_inode(sst->fd1, &inode1); + if (r < 0) { + cleanup_state(this, sst); + return r; + } + r = get_fd_inode(sst->fd2, &inode2); + if (r < 0) { + cleanup_state(this, sst); + return r; + } + + ldout(cct, 20) << __func__ << ": (snapid1, ino1, size)=(" << inode1->snapid + << "," << std::hex << inode1->ino << std::dec << "," + << inode1->size <<")" << " (snapid2, ino2, size)=(" + << inode2->snapid << "," << std::hex << inode2->ino << std::dec + << "," << inode2->size << ")" << dendl; + if (inode1->ino != inode2->ino) { + cleanup_state(this, sst); + return -EINVAL; + } + + sst->index = 0; + *state = sst; + return 0; +} + +int Client::file_blockdiff_finish(struct scan_state_t *state) +{ + std::unique_lock lock(client_lock); + + _close(state->fd1); + _close(state->fd2); + delete state; + return 0; +} + +int Client::file_blockdiff(struct scan_state_t *state, const UserPerm &perms, + std::vector> *blocks) +{ + RWRef_t mref_reader(mount_state, CLIENT_MOUNTING); + if (!mref_reader.is_state_satisfied()) { + return -ENOTCONN; + } + + ldout(cct, 20) << __func__ << dendl; + + InodeRef inode1; + InodeRef inode2; + + std::unique_lock lock(client_lock); + + int r = get_fd_inode(state->fd1, &inode1); + if (r < 0) { + return r; + } + r = get_fd_inode(state->fd2, &inode2); + if (r < 0) { + return r; + } + + ceph_assert(inode1->ino == inode2->ino); + + MetaRequest *req = new MetaRequest(CEPH_MDS_OP_FILE_BLOCKDIFF); + + filepath path1, path2; + inode1->make_nosnap_relative_path(path1); + req->set_filepath(path1); + + inode2->make_nosnap_relative_path(path2); + req->set_filepath2(path2); + req->set_inode(inode2.get()); + + req->head.args.blockdiff.scan_idx = state->index; + req->head.args.blockdiff.max_objects = + cct->_conf.get_val("client_file_blockdiff_max_concurrent_object_scans"); + + bufferlist bl; + r = make_request(req, perms, nullptr, nullptr, -1, &bl); + ldout(cct, 10) << __func__ << ": result=" << r << dendl; + + if (r < 0) { + return r; + } + + BlockDiff block_diff; + auto p = bl.cbegin(); + decode(block_diff, p); + + ldout(cct, 10) << __func__ << ": block_diff=" << block_diff << dendl; + if (!block_diff.blocks.empty()) { + for (auto &block : block_diff.blocks) { + blocks->emplace_back(std::make_pair(block.first, block.second)); + } + } + + state->index = block_diff.scan_idx; + return block_diff.rval; +} + int Client::readdir_snapdiff(dir_result_t* d1, snapid_t snap2, struct dirent* out_de, snapid_t* out_snap) diff --git a/src/client/Client.h b/src/client/Client.h index 23376052f9e..99ea786112c 100644 --- a/src/client/Client.h +++ b/src/client/Client.h @@ -147,6 +147,12 @@ class ceph_lock_state_t; // ======================================================== // client interface +struct scan_state_t { + int fd1; + int fd2; + uint64_t index; +}; + struct dir_result_t { static const int SHIFT = 28; static const int64_t MASK = (1 << SHIFT) - 1; @@ -369,6 +375,12 @@ public: int readdir_r(dir_result_t *dirp, struct dirent *de); int readdirplus_r(dir_result_t *dirp, struct dirent *de, struct ceph_statx *stx, unsigned want, unsigned flags, Inode **out); + int file_blockdiff_init_state(const char *path1, const char *path2, + const UserPerm &perms, struct scan_state_t **state); + int file_blockdiff(struct scan_state_t *state, const UserPerm &perms, + std::vector> *blocks); + int file_blockdiff_finish(struct scan_state_t *state); + /* * Get the next snapshot delta entry. * diff --git a/src/common/options/mds-client.yaml.in b/src/common/options/mds-client.yaml.in index ea7ad398533..a35c3c76489 100644 --- a/src/common/options/mds-client.yaml.in +++ b/src/common/options/mds-client.yaml.in @@ -589,3 +589,12 @@ options: - mds_client flags: - runtime +- name: client_file_blockdiff_max_concurrent_object_scans + type: uint + level: advanced + desc: maximum number of concurrent object scans + long_desc: Maximum number of concurrent listsnaps operations sent to RADOS. + default: 16 + services: + - mds_client + min: 1 \ No newline at end of file diff --git a/src/include/cephfs/libcephfs.h b/src/include/cephfs/libcephfs.h index 4fc975801fa..0a08e89566c 100644 --- a/src/include/cephfs/libcephfs.h +++ b/src/include/cephfs/libcephfs.h @@ -648,6 +648,71 @@ struct ceph_snapdiff_info // doesn't exist in the second snapshot }; +struct ceph_file_blockdiff_result; + +// blockdiff stream handle +struct ceph_file_blockdiff_info +{ + struct ceph_mount_info* cmount; + struct ceph_file_blockdiff_result* blockp; +}; + +// set of file block diff's +struct cblock +{ + uint64_t offset; + uint64_t len; +}; +struct ceph_file_blockdiff_changedblocks +{ + uint64_t num_blocks; + struct cblock *b; +}; + +/** + * Initialize blockdiff stream to get file block deltas. + * + * @param cmount the ceph mount handle to use for snapdiff retrieval. + * @param root_path root path for snapshots-in-question + * @param rel_path subpath under the root to build delta for + * @param snap1 the first snapshot name + * @param snap2 the second snapshot name + * @param out_info resulting blockdiff stream handle to be used for blokdiff results + retrieval via ceph_file_blockdiff(). + * @returns 0 on success and negative error code otherwise + */ +int ceph_file_blockdiff_init(struct ceph_mount_info* cmount, + const char* root_path, + const char* rel_path, + const char* snap1, + const char* snap2, + struct ceph_file_blockdiff_info* out_info); + +/** + * Get a set of file blockdiff's + * + * @param info blockdiff stream handle + * @param blocks next set of file blockdiff's (offset, length) + * @returns 0 or 1 on success and negative error code otherwise + */ +int ceph_file_blockdiff(struct ceph_file_blockdiff_info* info, + struct ceph_file_blockdiff_changedblocks* blocks); +/** + * Free blockdiff buffer + * + * @param blocks file block diff's from ceph_file_blockdiff() + * @returns None + */ +void ceph_free_file_blockdiff_buffer(struct ceph_file_blockdiff_changedblocks* blocks); + +/** + * Close blockdiff stream + * + * @param info blockdiff stream handle + * @returns 0 on success and negative error code otherwise + */ +int ceph_file_blockdiff_finish(struct ceph_file_blockdiff_info* info); + /** * Opens snapdiff stream to get snapshots delta (aka snapdiff). * diff --git a/src/libcephfs.cc b/src/libcephfs.cc index ce06ff7eef0..9b95ec48e19 100644 --- a/src/libcephfs.cc +++ b/src/libcephfs.cc @@ -739,6 +739,105 @@ extern "C" int ceph_readdirplus_r(struct ceph_mount_info *cmount, struct ceph_di return cmount->get_client()->readdirplus_r(reinterpret_cast(dirp), de, stx, want, flags, out); } +extern "C" int ceph_file_blockdiff_init(struct ceph_mount_info* cmount, + const char* root_path, + const char* rel_path, + const char* snap1, + const char* snap2, + struct ceph_file_blockdiff_info* out_info) +{ + if (!cmount->is_mounted()) { + return -ENOTCONN; + } + if (!out_info || !root_path || !rel_path || + !snap1 || !*snap1 || !snap2 || !*snap2) { + return -EINVAL; + } + + char snapdir[PATH_MAX]; + cmount->conf_get("client_snapdir", snapdir, sizeof(snapdir) - 1); + + char path1[PATH_MAX]; + char path2[PATH_MAX]; + // construct snapshot paths for the files + int n = snprintf(path1, PATH_MAX, "%s/%s/%s/%s", + root_path, snapdir, snap1, rel_path); + if (n < 0 || n == PATH_MAX) { + errno = ENAMETOOLONG; + return -errno; + } + n = snprintf(path2, PATH_MAX, "%s/%s/%s/%s", + root_path, snapdir, snap2, rel_path); + if (n < 0 || n == PATH_MAX) { + return -ENAMETOOLONG; + } + + int r = cmount->get_client()->file_blockdiff_init_state(path1, path2, + cmount->default_perms, + (struct scan_state_t **)&(out_info->blockp)); + if (r < 0) { + return r; + } + + out_info->cmount = cmount; + return 0; +} + +extern "C" int ceph_file_blockdiff(struct ceph_file_blockdiff_info* info, + struct ceph_file_blockdiff_changedblocks* blocks) +{ + if (!info->cmount->is_mounted()) { + return -ENOTCONN; + } + + std::vector> _blocks; + struct scan_state_t *state = reinterpret_cast(info->blockp); + + int r = info->cmount->get_client()->file_blockdiff(state, info->cmount->default_perms, &_blocks); + if (r < 0) { + return r; + } + + blocks->b = NULL; + blocks->num_blocks = _blocks.size(); + if (blocks->num_blocks) { + struct cblock *b = (struct cblock *)calloc(blocks->num_blocks, sizeof(struct cblock)); + if (!b) { + return -ENOMEM; + } + + struct cblock *_b = b; + for (auto &_block : _blocks) { + _b->offset = _block.first; + _b->len = _block.second; + ++_b; + } + + blocks->b = b; + } + + return r; +} + +extern "C" void ceph_free_file_blockdiff_buffer(struct ceph_file_blockdiff_changedblocks* blocks) +{ + if (blocks->b) { + free(blocks->b); + } + blocks->num_blocks = 0; + blocks->b = NULL; +} + +extern "C" int ceph_file_blockdiff_finish(struct ceph_file_blockdiff_info* info) +{ + if (!info->cmount->is_mounted()) { + return -ENOTCONN; + } + + struct scan_state_t *state = reinterpret_cast(info->blockp); + return info->cmount->get_client()->file_blockdiff_finish(state); +} + extern "C" int ceph_open_snapdiff(struct ceph_mount_info* cmount, const char* root_path, const char* rel_path, -- 2.39.5