From 305ff6b3a03c230d3c07b61457e961406d979693 Mon Sep 17 00:00:00 2001 From: ethanwu Date: Thu, 25 Sep 2025 18:42:06 +0800 Subject: [PATCH] ceph: supply snapshot context in ceph_uninline_data() The ceph_uninline_data function was missing proper snapshot context handling for its OSD write operations. Both CEPH_OSD_OP_CREATE and CEPH_OSD_OP_WRITE requests were passing NULL instead of the appropriate snapshot context, which could lead to unnecessary object clone. Reproducer: ../src/vstart.sh --new -x --localhost --bluestore // turn on cephfs inline data ./bin/ceph fs set a inline_data true --yes-i-really-really-mean-it // allow fs_a client to take snapshot ./bin/ceph auth caps client.fs_a mds 'allow rwps fsname=a' mon 'allow r fsname=a' osd 'allow rw tag cephfs data=a' // mount cephfs with fuse, since kernel cephfs doesn't support inline write ceph-fuse --id fs_a -m 127.0.0.1:40318 --conf ceph.conf -d /mnt/mycephfs/ // bump snapshot seq mkdir /mnt/mycephfs/.snap/snap1 echo "foo" > /mnt/mycephfs/test // umount and mount it again using kernel cephfs client umount /mnt/mycephfs mount -t ceph fs_a@.a=/ /mnt/mycephfs/ -o conf=./ceph.conf echo "bar" >> /mnt/mycephfs/test ./bin/rados listsnaps -p cephfs.a.data $(printf "%x\n" $(stat -c %i /mnt/mycephfs/test)).00000000 will see this object does unnecessary clone 1000000000a.00000000 (seq:2): cloneid snaps size overlap 2 2 4 [] head - 8 but it's expected to see 10000000000.00000000 (seq:2): cloneid snaps size overlap head - 8 since there's no snapshot between these 2 writes clone happened because the first osd request CEPH_OSD_OP_CREATE doesn't pass snap context so object is created with snap seq 0, but later data writeback is equipped with snapshot context. snap.seq(1) > object snap seq(0), so osd does object clone. This fix properly acquiring the snapshot context before performing write operations. Signed-off-by: ethanwu Reviewed-by: Viacheslav Dubeyko Tested-by: Viacheslav Dubeyko Signed-off-by: Ilya Dryomov --- fs/ceph/addr.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 63b75d214210..faecd9025ee9 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -2199,6 +2199,7 @@ int ceph_uninline_data(struct file *file) struct ceph_osd_request *req = NULL; struct ceph_cap_flush *prealloc_cf = NULL; struct folio *folio = NULL; + struct ceph_snap_context *snapc = NULL; u64 inline_version = CEPH_INLINE_NONE; struct page *pages[1]; int err = 0; @@ -2226,6 +2227,24 @@ int ceph_uninline_data(struct file *file) if (inline_version == 1) /* initial version, no data */ goto out_uninline; + down_read(&fsc->mdsc->snap_rwsem); + spin_lock(&ci->i_ceph_lock); + if (__ceph_have_pending_cap_snap(ci)) { + struct ceph_cap_snap *capsnap = + list_last_entry(&ci->i_cap_snaps, + struct ceph_cap_snap, + ci_item); + snapc = ceph_get_snap_context(capsnap->context); + } else { + if (!ci->i_head_snapc) { + ci->i_head_snapc = ceph_get_snap_context( + ci->i_snap_realm->cached_context); + } + snapc = ceph_get_snap_context(ci->i_head_snapc); + } + spin_unlock(&ci->i_ceph_lock); + up_read(&fsc->mdsc->snap_rwsem); + folio = read_mapping_folio(inode->i_mapping, 0, file); if (IS_ERR(folio)) { err = PTR_ERR(folio); @@ -2241,7 +2260,7 @@ int ceph_uninline_data(struct file *file) req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, ceph_vino(inode), 0, &len, 0, 1, CEPH_OSD_OP_CREATE, CEPH_OSD_FLAG_WRITE, - NULL, 0, 0, false); + snapc, 0, 0, false); if (IS_ERR(req)) { err = PTR_ERR(req); goto out_unlock; @@ -2257,7 +2276,7 @@ int ceph_uninline_data(struct file *file) req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, ceph_vino(inode), 0, &len, 1, 3, CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE, - NULL, ci->i_truncate_seq, + snapc, ci->i_truncate_seq, ci->i_truncate_size, false); if (IS_ERR(req)) { err = PTR_ERR(req); @@ -2320,6 +2339,7 @@ out_unlock: folio_put(folio); } out: + ceph_put_snap_context(snapc); ceph_free_cap_flush(prealloc_cf); doutc(cl, "%llx.%llx inline_version %llu = %d\n", ceph_vinop(inode), inline_version, err); -- 2.47.3