ci->i_max_bytes = 0;
ci->i_max_files = 0;
+ ci->i_subvolume_id = CEPH_SUBVOLUME_ID_NONE;
memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout));
memset(&ci->i_cached_layout, 0, sizeof(ci->i_cached_layout));
percpu_counter_dec(&mdsc->metric.total_inodes);
+ ci->i_subvolume_id = CEPH_SUBVOLUME_ID_NONE;
+
netfs_wait_for_outstanding_io(inode);
truncate_inode_pages_final(&inode->i_data);
if (inode_state_read_once(inode) & I_PINNING_NETFS_WB)
return queue_trunc;
}
+/*
+ * Set the subvolume ID for an inode.
+ *
+ * The subvolume_id identifies which CephFS subvolume this inode belongs to.
+ * CEPH_SUBVOLUME_ID_NONE (0) means unknown/unset - the MDS only sends
+ * non-zero IDs for inodes within subvolumes.
+ *
+ * An inode's subvolume membership is immutable - once an inode is created
+ * in a subvolume, it stays there. Therefore, if we already have a valid
+ * (non-zero) subvolume_id and receive a different one, that indicates a bug.
+ */
+void ceph_inode_set_subvolume(struct inode *inode, u64 subvolume_id)
+{
+ struct ceph_inode_info *ci;
+ u64 old;
+
+ if (!inode || subvolume_id == CEPH_SUBVOLUME_ID_NONE)
+ return;
+
+ ci = ceph_inode(inode);
+ old = READ_ONCE(ci->i_subvolume_id);
+
+ if (old == subvolume_id)
+ return;
+
+ if (old != CEPH_SUBVOLUME_ID_NONE) {
+ /* subvolume_id should not change once set */
+ WARN_ON_ONCE(1);
+ return;
+ }
+
+ WRITE_ONCE(ci->i_subvolume_id, subvolume_id);
+}
+
void ceph_fill_file_time(struct inode *inode, int issued,
u64 time_warp_seq, struct timespec64 *ctime,
struct timespec64 *mtime, struct timespec64 *atime)
new_issued = ~issued & info_caps;
__ceph_update_quota(ci, iinfo->max_bytes, iinfo->max_files);
+ ceph_inode_set_subvolume(inode, iinfo->subvolume_id);
#ifdef CONFIG_FS_ENCRYPTION
if (iinfo->fscrypt_auth_len &&
goto done;
}
if (parent_dir) {
+ ceph_inode_set_subvolume(parent_dir,
+ rinfo->diri.subvolume_id);
err = ceph_fill_inode(parent_dir, NULL, &rinfo->diri,
rinfo->dirfrag, session, -1,
&req->r_caps_reservation);
BUG_ON(!req->r_target_inode);
in = req->r_target_inode;
+ ceph_inode_set_subvolume(in, rinfo->targeti.subvolume_id);
err = ceph_fill_inode(in, req->r_locked_page, &rinfo->targeti,
NULL, session,
(!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags) &&
return -EIO;
}
-/*
- * parse individual inode info
- */
static int parse_reply_info_in(void **p, void *end,
struct ceph_mds_reply_info_in *info,
- u64 features)
+ u64 features,
+ struct ceph_mds_client *mdsc)
{
int err = 0;
u8 struct_v = 0;
+ u8 struct_compat = 0;
+ u32 struct_len = 0;
+
+ info->subvolume_id = CEPH_SUBVOLUME_ID_NONE;
if (features == (u64)-1) {
- u32 struct_len;
- u8 struct_compat;
ceph_decode_8_safe(p, end, struct_v, bad);
ceph_decode_8_safe(p, end, struct_compat, bad);
/* struct_v is expected to be >= 1. we only understand
ceph_decode_skip_n(p, end, v8_struct_len, bad);
}
+ /* struct_v 9 added subvolume_id */
+ if (struct_v >= 9)
+ ceph_decode_64_safe(p, end, info->subvolume_id, bad);
+
*p = end;
} else {
/* legacy (unversioned) struct */
*/
static int parse_reply_info_trace(void **p, void *end,
struct ceph_mds_reply_info_parsed *info,
- u64 features)
+ u64 features,
+ struct ceph_mds_client *mdsc)
{
int err;
if (info->head->is_dentry) {
- err = parse_reply_info_in(p, end, &info->diri, features);
+ err = parse_reply_info_in(p, end, &info->diri, features, mdsc);
if (err < 0)
goto out_bad;
}
if (info->head->is_target) {
- err = parse_reply_info_in(p, end, &info->targeti, features);
+ err = parse_reply_info_in(p, end, &info->targeti, features,
+ mdsc);
if (err < 0)
goto out_bad;
}
*/
static int parse_reply_info_readdir(void **p, void *end,
struct ceph_mds_request *req,
- u64 features)
+ u64 features,
+ struct ceph_mds_client *mdsc)
{
struct ceph_mds_reply_info_parsed *info = &req->r_reply_info;
struct ceph_client *cl = req->r_mdsc->fsc->client;
rde->name_len = oname.len;
/* inode */
- err = parse_reply_info_in(p, end, &rde->inode, features);
+ err = parse_reply_info_in(p, end, &rde->inode, features, mdsc);
if (err < 0)
goto out_bad;
/* ceph_readdir_prepopulate() will update it */
if (op == CEPH_MDS_OP_GETFILELOCK)
return parse_reply_info_filelock(p, end, info, features);
else if (op == CEPH_MDS_OP_READDIR || op == CEPH_MDS_OP_LSSNAP)
- return parse_reply_info_readdir(p, end, req, features);
+ return parse_reply_info_readdir(p, end, req, features,
+ req->r_mdsc);
else if (op == CEPH_MDS_OP_CREATE)
return parse_reply_info_create(p, end, info, features, s);
else if (op == CEPH_MDS_OP_GETVXATTR)
ceph_decode_32_safe(&p, end, len, bad);
if (len > 0) {
ceph_decode_need(&p, end, len, bad);
- err = parse_reply_info_trace(&p, p+len, info, features);
+ err = parse_reply_info_trace(&p, p + len, info, features,
+ s->s_mdsc);
if (err < 0)
goto out_bad;
}
ceph_decode_32_safe(&p, end, len, bad);
if (len > 0) {
ceph_decode_need(&p, end, len, bad);
- err = parse_reply_info_extra(&p, p+len, req, features, s);
+ err = parse_reply_info_extra(&p, p + len, req, features, s);
if (err < 0)
goto out_bad;
}
goto out_err;
}
req->r_target_inode = in;
+ ceph_inode_set_subvolume(in, rinfo->targeti.subvolume_id);
}
mutex_lock(&session->s_mutex);
/* quotas */
u64 i_max_bytes, i_max_files;
+ /*
+ * Subvolume ID this inode belongs to. CEPH_SUBVOLUME_ID_NONE (0)
+ * means unknown/unset, matching the FUSE client convention.
+ * Once set to a valid (non-zero) value, it should not change
+ * during the inode's lifetime.
+ */
+#define CEPH_SUBVOLUME_ID_NONE 0
+ u64 i_subvolume_id;
+
s32 i_dir_pin;
struct rb_root i_fragtree;
extern int ceph_fill_file_size(struct inode *inode, int issued,
u32 truncate_seq, u64 truncate_size,
u64 size, int newcaps);
+extern void ceph_inode_set_subvolume(struct inode *inode, u64 subvolume_id);
extern void ceph_fill_file_time(struct inode *inode, int issued,
u64 time_warp_seq, struct timespec64 *ctime,
struct timespec64 *mtime,