From: Viacheslav Dubeyko Date: Tue, 12 Aug 2025 19:53:26 +0000 (-0700) Subject: ceph: cleanup of processing ci->i_ceph_flags bits X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=ea7c567dd3c6d72fc7f388d649cb852b3b36d886;p=ceph-client.git ceph: cleanup of processing ci->i_ceph_flags bits The Coverity Scan service has detected potential race condition in ceph_check_delayed_caps() [1]. The CID 1590633 contains explanation: "Accessing ci->i_ceph_flags without holding lock ceph_inode_info.i_ceph_lock. The value of the shared data will be determined by the interleaving of thread execution. Thread shared data is accessed without holding an appropriate lock, possibly causing a race condition (CWE-366)". The patch reworks the logic of accessing ci->i_ceph_flags. At first, it removes ci item from a mdsc->cap_delay_list. Then it unlocks mdsc->cap_delay_lock and it locks ci->i_ceph_lock. Then, it calls smp_mb__before_atomic() to be sure that ci->i_ceph_flags has consistent state of the bits. The is_metadata_under_flush variable stores the state of CEPH_I_FLUSH_BIT. Finally, it unlocks the ci->i_ceph_lock and it locks the mdsc->cap_delay_lock. The is_metadata_under_flush is used to check the condition that ci needs to be removed from mdsc->cap_delay_list. If it is not the case, then ci will be added into the head of mdsc->cap_delay_list. This patch reworks the logic of checking the CEPH_I_FLUSH_BIT, CEPH_I_FLUSH_SNAPS_BIT, CEPH_I_KICK_FLUSH_BIT, CEPH_ASYNC_CREATE_BIT, CEPH_I_ERROR_FILELOCK_BIT by test_bit() method and calling smp_mb__before_atomic() to ensure that bit state is consistent. It switches on calling the set_bit(), clear_bit() for these bits, and calling smp_mb__after_atomic() after these methods to ensure that modified bit is visible. Additionally, __must_hold() has been added for __cap_delay_requeue(), __cap_delay_requeue_front(), and __prep_cap() to help the sparse with lock checking and it was commented that caller of __cap_delay_requeue_front() and __prep_cap() must lock the ci->i_ceph_lock. v.2 Alex Markuze suggested to rework all Ceph inode's flags. Now, every declaration has CEPH_I_<*> and CEPH_I_<*>_BIT pair. v.3 The logic of operating by ci->i_ceph_flags bits on using test_bit(), clear_bit(), set_bit() and smp_mb__before_atomic(), smp_mb__after_atomic() has been reworked in addr.c, inode.c, locks.c, mds_client.c, snap.c, super.h, xattr.c additionally to caps.c. [1] https://scan5.scan.coverity.com/#/project-view/64304/10063?selectedIssue=1590633 Signed-off-by: Viacheslav Dubeyko cc: Alex Markuze cc: Ilya Dryomov cc: Ceph Development --- diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index ff7a99c6f9d8..07bafb263db7 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -2572,6 +2572,8 @@ int ceph_pool_perm_check(struct inode *inode, int need) return 0; spin_lock(&ci->i_ceph_lock); + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); flags = ci->i_ceph_flags; pool = ci->i_layout.pool_id; spin_unlock(&ci->i_ceph_lock); @@ -2604,8 +2606,12 @@ check: if (pool == ci->i_layout.pool_id && pool_ns == rcu_dereference_raw(ci->i_layout.pool_ns)) { ci->i_ceph_flags |= flags; - } else { + /* ensure modified bit is visible */ + smp_mb__after_atomic(); + } else { pool = ci->i_layout.pool_id; + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); flags = ci->i_ceph_flags; } spin_unlock(&ci->i_ceph_lock); diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 21fedc8bb163..e30c97615a99 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -517,6 +517,7 @@ static void __cap_set_timeouts(struct ceph_mds_client *mdsc, */ static void __cap_delay_requeue(struct ceph_mds_client *mdsc, struct ceph_inode_info *ci) + __must_hold(ci->i_ceph_lock) { struct inode *inode = &ci->netfs.inode; @@ -526,7 +527,9 @@ static void __cap_delay_requeue(struct ceph_mds_client *mdsc, if (!mdsc->stopping) { spin_lock(&mdsc->cap_delay_lock); if (!list_empty(&ci->i_cap_delay_list)) { - if (ci->i_ceph_flags & CEPH_I_FLUSH) + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); + if (test_bit(CEPH_I_FLUSH_BIT, &ci->i_ceph_flags)) goto no_change; list_del_init(&ci->i_cap_delay_list); } @@ -541,15 +544,20 @@ no_change: * Queue an inode for immediate writeback. Mark inode with I_FLUSH, * indicating we should send a cap message to flush dirty metadata * asap, and move to the front of the delayed cap list. + * + * Caller must hold i_ceph_lock. */ static void __cap_delay_requeue_front(struct ceph_mds_client *mdsc, struct ceph_inode_info *ci) + __must_hold(ci->i_ceph_lock) { struct inode *inode = &ci->netfs.inode; doutc(mdsc->fsc->client, "%p %llx.%llx\n", inode, ceph_vinop(inode)); spin_lock(&mdsc->cap_delay_lock); - ci->i_ceph_flags |= CEPH_I_FLUSH; + set_bit(CEPH_I_FLUSH_BIT, &ci->i_ceph_flags); + /* ensure modified bit is visible */ + smp_mb__after_atomic(); if (!list_empty(&ci->i_cap_delay_list)) list_del_init(&ci->i_cap_delay_list); list_add(&ci->i_cap_delay_list, &mdsc->cap_delay_list); @@ -1400,10 +1408,13 @@ void __ceph_remove_caps(struct ceph_inode_info *ci) * * Make note of max_size reported/requested from mds, revoked caps * that have now been implemented. + * + * Caller must hold i_ceph_lock. */ static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap, int op, int flags, int used, int want, int retain, int flushing, u64 flush_tid, u64 oldest_flush_tid) + __must_hold(ci->i_ceph_lock) { struct ceph_inode_info *ci = cap->ci; struct inode *inode = &ci->netfs.inode; @@ -1422,7 +1433,9 @@ static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap, ceph_cap_string(revoking)); BUG_ON((retain & CEPH_CAP_PIN) == 0); - ci->i_ceph_flags &= ~CEPH_I_FLUSH; + clear_bit(CEPH_I_FLUSH_BIT, &ci->i_ceph_flags); + /* ensure modified bit is visible */ + smp_mb__after_atomic(); cap->issued &= retain; /* drop bits we don't want */ /* @@ -1731,7 +1744,9 @@ static void __ceph_flush_snaps(struct ceph_inode_info *ci, * to miss setting the CEPH_CLIENT_CAPS_PENDING_CAPSNAP flag and finally * the mds will drop the capsnap request to floor. */ - ci->i_ceph_flags &= ~CEPH_I_FLUSH_SNAPS; + clear_bit(CEPH_I_FLUSH_SNAPS_BIT, &ci->i_ceph_flags); + /* ensure modified bit is visible */ + smp_mb__after_atomic(); } void ceph_flush_snaps(struct ceph_inode_info *ci, @@ -1749,7 +1764,9 @@ void ceph_flush_snaps(struct ceph_inode_info *ci, session = *psession; retry: spin_lock(&ci->i_ceph_lock); - if (!(ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS)) { + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); + if (!test_bit(CEPH_I_FLUSH_SNAPS_BIT, &ci->i_ceph_flags)) { doutc(cl, " no capsnap needs flush, doing nothing\n"); goto out; } @@ -1773,7 +1790,9 @@ retry: } // make sure flushsnap messages are sent in proper order. - if (ci->i_ceph_flags & CEPH_I_KICK_FLUSH) + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); + if (test_bit(CEPH_I_KICK_FLUSH_BIT, &ci->i_ceph_flags)) __kick_flushing_caps(mdsc, session, ci, 0); __ceph_flush_snaps(ci, session); @@ -2046,15 +2065,21 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags) struct ceph_mds_session *session = NULL; spin_lock(&ci->i_ceph_lock); - if (ci->i_ceph_flags & CEPH_I_ASYNC_CREATE) { - ci->i_ceph_flags |= CEPH_I_ASYNC_CHECK_CAPS; + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); + if (test_bit(CEPH_ASYNC_CREATE_BIT, &ci->i_ceph_flags)) { + set_bit(CEPH_I_ASYNC_CHECK_CAPS_BIT, &ci->i_ceph_flags); + /* ensure modified bit is visible */ + smp_mb__after_atomic(); /* Don't send messages until we get async create reply */ spin_unlock(&ci->i_ceph_lock); return; } - if (ci->i_ceph_flags & CEPH_I_FLUSH) + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); + if (test_bit(CEPH_I_FLUSH_BIT, &ci->i_ceph_flags)) flags |= CHECK_CAPS_FLUSH; retry: /* Caps wanted by virtue of active open files. */ @@ -2218,7 +2243,10 @@ retry: doutc(cl, "flushing dirty caps\n"); goto ack; } - if (ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS) { + + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); + if (test_bit(CEPH_I_FLUSH_SNAPS_BIT, &ci->i_ceph_flags)) { doutc(cl, "flushing snap caps\n"); goto ack; } @@ -2242,12 +2270,14 @@ ack: /* kick flushing and flush snaps before sending normal * cap message */ + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); if (cap == ci->i_auth_cap && (ci->i_ceph_flags & (CEPH_I_KICK_FLUSH | CEPH_I_FLUSH_SNAPS))) { - if (ci->i_ceph_flags & CEPH_I_KICK_FLUSH) + if (test_bit(CEPH_I_KICK_FLUSH_BIT, &ci->i_ceph_flags)) __kick_flushing_caps(mdsc, session, ci, 0); - if (ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS) + if (test_bit(CEPH_I_FLUSH_SNAPS_BIT, &ci->i_ceph_flags)) __ceph_flush_snaps(ci, session); goto retry; @@ -2319,11 +2349,17 @@ retry_locked: goto out; } + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); if (ci->i_ceph_flags & (CEPH_I_KICK_FLUSH | CEPH_I_FLUSH_SNAPS)) { - if (ci->i_ceph_flags & CEPH_I_KICK_FLUSH) + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); + if (test_bit(CEPH_I_KICK_FLUSH_BIT, &ci->i_ceph_flags)) __kick_flushing_caps(mdsc, session, ci, 0); - if (ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS) + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); + if (test_bit(CEPH_I_FLUSH_SNAPS_BIT, &ci->i_ceph_flags)) __ceph_flush_snaps(ci, session); goto retry_locked; } @@ -2595,10 +2631,14 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc, u64 last_snap_flush = 0; /* Don't do anything until create reply comes in */ - if (ci->i_ceph_flags & CEPH_I_ASYNC_CREATE) + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); + if (test_bit(CEPH_ASYNC_CREATE_BIT, &ci->i_ceph_flags)) return; - ci->i_ceph_flags &= ~CEPH_I_KICK_FLUSH; + clear_bit(CEPH_I_KICK_FLUSH_BIT, &ci->i_ceph_flags); + /* ensure modified bit is visible */ + smp_mb__after_atomic(); list_for_each_entry_reverse(cf, &ci->i_cap_flush_list, i_list) { if (cf->is_capsnap) { @@ -2707,7 +2747,9 @@ void ceph_early_kick_flushing_caps(struct ceph_mds_client *mdsc, __kick_flushing_caps(mdsc, session, ci, oldest_flush_tid); } else { - ci->i_ceph_flags |= CEPH_I_KICK_FLUSH; + set_bit(CEPH_I_KICK_FLUSH_BIT, &ci->i_ceph_flags); + /* ensure modified bit is visible */ + smp_mb__after_atomic(); } spin_unlock(&ci->i_ceph_lock); @@ -2742,7 +2784,10 @@ void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc, spin_unlock(&ci->i_ceph_lock); continue; } - if (ci->i_ceph_flags & CEPH_I_KICK_FLUSH) { + + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); + if (test_bit(CEPH_I_KICK_FLUSH_BIT, &ci->i_ceph_flags)) { __kick_flushing_caps(mdsc, session, ci, oldest_flush_tid); } @@ -2849,8 +2894,10 @@ static int try_get_cap_refs(struct inode *inode, int need, int want, again: spin_lock(&ci->i_ceph_lock); + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); if ((flags & CHECK_FILELOCK) && - (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK)) { + test_bit(CEPH_I_ERROR_FILELOCK_BIT, &ci->i_ceph_flags)) { doutc(cl, "%p %llx.%llx error filelock\n", inode, ceph_vinop(inode)); ret = -EIO; @@ -3227,8 +3274,11 @@ static int ceph_try_drop_cap_snap(struct ceph_inode_info *ci, doutc(cl, "%p follows %llu\n", capsnap, capsnap->follows); BUG_ON(capsnap->cap_flush.tid > 0); ceph_put_snap_context(capsnap->context); - if (!list_is_last(&capsnap->ci_item, &ci->i_cap_snaps)) - ci->i_ceph_flags |= CEPH_I_FLUSH_SNAPS; + if (!list_is_last(&capsnap->ci_item, &ci->i_cap_snaps)) { + set_bit(CEPH_I_FLUSH_SNAPS_BIT, &ci->i_ceph_flags); + /* ensure modified bit is visible */ + smp_mb__after_atomic(); + } list_del(&capsnap->ci_item); ceph_put_cap_snap(capsnap); @@ -3417,7 +3467,10 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, if (ceph_try_drop_cap_snap(ci, capsnap)) { put++; } else { - ci->i_ceph_flags |= CEPH_I_FLUSH_SNAPS; + set_bit(CEPH_I_FLUSH_SNAPS_BIT, + &ci->i_ceph_flags); + /* ensure modified bit is visible */ + smp_mb__after_atomic(); flush_snaps = true; } } @@ -3668,8 +3721,11 @@ static void handle_cap_grant(struct inode *inode, rcu_assign_pointer(ci->i_layout.pool_ns, extra_info->pool_ns); if (ci->i_layout.pool_id != old_pool || - extra_info->pool_ns != old_ns) - ci->i_ceph_flags &= ~CEPH_I_POOL_PERM; + extra_info->pool_ns != old_ns) { + clear_bit(CEPH_I_POOL_PERM_BIT, &ci->i_ceph_flags); + /* ensure modified bit is visible */ + smp_mb__after_atomic(); + } extra_info->pool_ns = old_ns; @@ -4658,6 +4714,7 @@ unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc) unsigned long delay_max = opt->caps_wanted_delay_max * HZ; unsigned long loop_start = jiffies; unsigned long delay = 0; + bool is_metadata_under_flush; doutc(cl, "begin\n"); spin_lock(&mdsc->cap_delay_lock); @@ -4670,11 +4727,24 @@ unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc) delay = ci->i_hold_caps_max; break; } - if ((ci->i_ceph_flags & CEPH_I_FLUSH) == 0 && - time_before(jiffies, ci->i_hold_caps_max)) - break; + list_del_init(&ci->i_cap_delay_list); + spin_unlock(&mdsc->cap_delay_lock); + spin_lock(&ci->i_ceph_lock); + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); + is_metadata_under_flush = + test_bit(CEPH_I_FLUSH_BIT, &ci->i_ceph_flags); + spin_unlock(&ci->i_ceph_lock); + spin_lock(&mdsc->cap_delay_lock); + + if (!is_metadata_under_flush && + time_before(jiffies, ci->i_hold_caps_max)) { + list_add(&ci->i_cap_delay_list, &mdsc->cap_delay_list); + break; + } + inode = igrab(&ci->netfs.inode); if (inode) { spin_unlock(&mdsc->cap_delay_lock); @@ -4856,7 +4926,9 @@ int ceph_drop_caps_for_unlink(struct inode *inode) doutc(mdsc->fsc->client, "%p %llx.%llx\n", inode, ceph_vinop(inode)); spin_lock(&mdsc->cap_delay_lock); - ci->i_ceph_flags |= CEPH_I_FLUSH; + set_bit(CEPH_I_FLUSH_BIT, &ci->i_ceph_flags); + /* ensure modified bit is visible */ + smp_mb__after_atomic(); if (!list_empty(&ci->i_cap_delay_list)) list_del_init(&ci->i_cap_delay_list); list_add_tail(&ci->i_cap_delay_list, @@ -5121,7 +5193,9 @@ int ceph_purge_inode_cap(struct inode *inode, struct ceph_cap *cap, bool *invali if (atomic_read(&ci->i_filelock_ref) > 0) { /* make further file lock syscall return -EIO */ - ci->i_ceph_flags |= CEPH_I_ERROR_FILELOCK; + set_bit(CEPH_I_ERROR_FILELOCK_BIT, &ci->i_ceph_flags); + /* ensure modified bit is visible */ + smp_mb__after_atomic(); pr_warn_ratelimited_client(cl, " dropping file locks for %p %llx.%llx\n", inode, ceph_vinop(inode)); diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index ac78d90a0bac..e67ab9740311 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -1153,8 +1153,11 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page, lockdep_is_held(&ci->i_ceph_lock)); rcu_assign_pointer(ci->i_layout.pool_ns, pool_ns); - if (ci->i_layout.pool_id != old_pool || pool_ns != old_ns) - ci->i_ceph_flags &= ~CEPH_I_POOL_PERM; + if (ci->i_layout.pool_id != old_pool || pool_ns != old_ns) { + clear_bit(CEPH_I_POOL_PERM_BIT, &ci->i_ceph_flags); + /* ensure modified bit is visible */ + smp_mb__after_atomic(); + } pool_ns = old_ns; @@ -3213,7 +3216,9 @@ void ceph_inode_shutdown(struct inode *inode) bool invalidate = false; spin_lock(&ci->i_ceph_lock); - ci->i_ceph_flags |= CEPH_I_SHUTDOWN; + set_bit(CEPH_I_SHUTDOWN_BIT, &ci->i_ceph_flags); + /* ensure modified bit is visible */ + smp_mb__after_atomic(); p = rb_first(&ci->i_caps); while (p) { struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node); diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index dd764f9c64b9..4356614b1e88 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -58,7 +58,9 @@ static void ceph_fl_release_lock(struct file_lock *fl) if (atomic_dec_and_test(&ci->i_filelock_ref)) { /* clear error when all locks are released */ spin_lock(&ci->i_ceph_lock); - ci->i_ceph_flags &= ~CEPH_I_ERROR_FILELOCK; + clear_bit(CEPH_I_ERROR_FILELOCK_BIT, &ci->i_ceph_flags); + /* ensure modified bit is visible */ + smp_mb__after_atomic(); spin_unlock(&ci->i_ceph_lock); } fl->fl_u.ceph.inode = NULL; @@ -272,9 +274,10 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) wait = 1; spin_lock(&ci->i_ceph_lock); - if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) { + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); + if (test_bit(CEPH_I_ERROR_FILELOCK_BIT, &ci->i_ceph_flags)) err = -EIO; - } spin_unlock(&ci->i_ceph_lock); if (err < 0) { if (op == CEPH_MDS_OP_SETFILELOCK && lock_is_unlock(fl)) @@ -332,9 +335,10 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) doutc(cl, "fl_file: %p\n", fl->c.flc_file); spin_lock(&ci->i_ceph_lock); - if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) { + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); + if (test_bit(CEPH_I_ERROR_FILELOCK_BIT, &ci->i_ceph_flags)) err = -EIO; - } spin_unlock(&ci->i_ceph_lock); if (err < 0) { if (lock_is_unlock(fl)) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 8b4d72757307..770ff0a0f7ad 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -3638,7 +3638,10 @@ static void __do_request(struct ceph_mds_client *mdsc, spin_lock(&ci->i_ceph_lock); cap = ci->i_auth_cap; - if (ci->i_ceph_flags & CEPH_I_ASYNC_CREATE && mds != cap->mds) { + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); + if (test_bit(CEPH_ASYNC_CREATE_BIT, &ci->i_ceph_flags) && + mds != cap->mds) { doutc(cl, "session changed for auth cap %d -> %d\n", cap->session->s_mds, session->s_mds); @@ -4718,8 +4721,11 @@ static int reconnect_caps_cb(struct inode *inode, int mds, void *arg) rec.v2.issued = cpu_to_le32(cap->issued); rec.v2.snaprealm = cpu_to_le64(ci->i_snap_realm->ino); rec.v2.pathbase = cpu_to_le64(path_info.vino.ino); + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); rec.v2.flock_len = (__force __le32) - ((ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) ? 0 : 1); + (test_bit(CEPH_I_ERROR_FILELOCK_BIT, + &ci->i_ceph_flags) ? 0 : 1); } else { struct timespec64 ts; diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 88cafcf11a97..2f95fb705c1e 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -662,6 +662,7 @@ update_snapc: */ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, struct ceph_cap_snap *capsnap) + __must_hold(ci->i_ceph_lock) { struct inode *inode = &ci->netfs.inode; struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); @@ -701,7 +702,10 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, return 0; } - ci->i_ceph_flags |= CEPH_I_FLUSH_SNAPS; + set_bit(CEPH_I_FLUSH_SNAPS_BIT, &ci->i_ceph_flags); + /* ensure modified bit is visible */ + smp_mb__after_atomic(); + doutc(cl, "%p %llx.%llx cap_snap %p snapc %p %llu %s s=%llu\n", inode, ceph_vinop(inode), capsnap, capsnap->context, capsnap->context->seq, ceph_cap_string(capsnap->dirty), diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 18f8e5d35143..2ade9feca410 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -657,23 +657,35 @@ static inline struct inode *ceph_find_inode(struct super_block *sb, /* * Ceph inode. */ -#define CEPH_I_DIR_ORDERED (1 << 0) /* dentries in dir are ordered */ -#define CEPH_I_FLUSH (1 << 2) /* do not delay flush of dirty metadata */ -#define CEPH_I_POOL_PERM (1 << 3) /* pool rd/wr bits are valid */ -#define CEPH_I_POOL_RD (1 << 4) /* can read from pool */ -#define CEPH_I_POOL_WR (1 << 5) /* can write to pool */ -#define CEPH_I_SEC_INITED (1 << 6) /* security initialized */ -#define CEPH_I_KICK_FLUSH (1 << 7) /* kick flushing caps */ -#define CEPH_I_FLUSH_SNAPS (1 << 8) /* need flush snapss */ -#define CEPH_I_ERROR_WRITE (1 << 9) /* have seen write errors */ -#define CEPH_I_ERROR_FILELOCK (1 << 10) /* have seen file lock errors */ -#define CEPH_I_ODIRECT_BIT (11) /* inode in direct I/O mode */ -#define CEPH_I_ODIRECT (1 << CEPH_I_ODIRECT_BIT) -#define CEPH_ASYNC_CREATE_BIT (12) /* async create in flight for this */ -#define CEPH_I_ASYNC_CREATE (1 << CEPH_ASYNC_CREATE_BIT) -#define CEPH_I_SHUTDOWN (1 << 13) /* inode is no longer usable */ -#define CEPH_I_ASYNC_CHECK_CAPS (1 << 14) /* check caps immediately after async - creating finishes */ +#define CEPH_I_DIR_ORDERED_BIT (0) /* dentries in dir are ordered */ +#define CEPH_I_FLUSH_BIT (2) /* do not delay flush of dirty metadata */ +#define CEPH_I_POOL_PERM_BIT (3) /* pool rd/wr bits are valid */ +#define CEPH_I_POOL_RD_BIT (4) /* can read from pool */ +#define CEPH_I_POOL_WR_BIT (5) /* can write to pool */ +#define CEPH_I_SEC_INITED_BIT (6) /* security initialized */ +#define CEPH_I_KICK_FLUSH_BIT (7) /* kick flushing caps */ +#define CEPH_I_FLUSH_SNAPS_BIT (8) /* need flush snapss */ +#define CEPH_I_ERROR_WRITE_BIT (9) /* have seen write errors */ +#define CEPH_I_ERROR_FILELOCK_BIT (10) /* have seen file lock errors */ +#define CEPH_I_ODIRECT_BIT (11) /* inode in direct I/O mode */ +#define CEPH_ASYNC_CREATE_BIT (12) /* async create in flight for this */ +#define CEPH_I_SHUTDOWN_BIT (13) /* inode is no longer usable */ +#define CEPH_I_ASYNC_CHECK_CAPS_BIT (14) /* check caps after async creating finishes */ + +#define CEPH_I_DIR_ORDERED (1 << CEPH_I_DIR_ORDERED_BIT) +#define CEPH_I_FLUSH (1 << CEPH_I_FLUSH_BIT) +#define CEPH_I_POOL_PERM (1 << CEPH_I_POOL_PERM_BIT) +#define CEPH_I_POOL_RD (1 << CEPH_I_POOL_RD_BIT) +#define CEPH_I_POOL_WR (1 << CEPH_I_POOL_WR_BIT) +#define CEPH_I_SEC_INITED (1 << CEPH_I_SEC_INITED_BIT) +#define CEPH_I_KICK_FLUSH (1 << CEPH_I_KICK_FLUSH_BIT) +#define CEPH_I_FLUSH_SNAPS (1 << CEPH_I_FLUSH_SNAPS_BIT) +#define CEPH_I_ERROR_WRITE (1 << CEPH_I_ERROR_WRITE_BIT) +#define CEPH_I_ERROR_FILELOCK (1 << CEPH_I_ERROR_FILELOCK_BIT) +#define CEPH_I_ODIRECT (1 << CEPH_I_ODIRECT_BIT) +#define CEPH_I_ASYNC_CREATE (1 << CEPH_ASYNC_CREATE_BIT) +#define CEPH_I_SHUTDOWN (1 << CEPH_I_SHUTDOWN_BIT) +#define CEPH_I_ASYNC_CHECK_CAPS (1 << CEPH_I_ASYNC_CHECK_CAPS_BIT) /* * Masks of ceph inode work. @@ -693,20 +705,28 @@ static inline struct inode *ceph_find_inode(struct super_block *sb, */ static inline void ceph_set_error_write(struct ceph_inode_info *ci) { - if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ERROR_WRITE)) { - spin_lock(&ci->i_ceph_lock); - ci->i_ceph_flags |= CEPH_I_ERROR_WRITE; - spin_unlock(&ci->i_ceph_lock); + spin_lock(&ci->i_ceph_lock); + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); + if (!test_bit(CEPH_I_ERROR_WRITE_BIT, &ci->i_ceph_flags)) { + set_bit(CEPH_I_ERROR_WRITE_BIT, &ci->i_ceph_flags); + /* ensure modified bit is visible */ + smp_mb__after_atomic(); } + spin_unlock(&ci->i_ceph_lock); } static inline void ceph_clear_error_write(struct ceph_inode_info *ci) { - if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ERROR_WRITE) { - spin_lock(&ci->i_ceph_lock); - ci->i_ceph_flags &= ~CEPH_I_ERROR_WRITE; - spin_unlock(&ci->i_ceph_lock); + spin_lock(&ci->i_ceph_lock); + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); + if (!test_bit(CEPH_I_ERROR_WRITE_BIT, &ci->i_ceph_flags)) { + clear_bit(CEPH_I_ERROR_WRITE_BIT, &ci->i_ceph_flags); + /* ensure modified bit is visible */ + smp_mb__after_atomic(); } + spin_unlock(&ci->i_ceph_lock); } static inline void __ceph_dir_set_complete(struct ceph_inode_info *ci, @@ -1141,10 +1161,14 @@ void ceph_inode_shutdown(struct inode *inode); static inline bool ceph_inode_is_shutdown(struct inode *inode) { - unsigned long flags = READ_ONCE(ceph_inode(inode)->i_ceph_flags); + unsigned long flags; struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); int state = READ_ONCE(fsc->mount_state); + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); + flags = READ_ONCE(ceph_inode(inode)->i_ceph_flags); + return (flags & CEPH_I_SHUTDOWN) || state >= CEPH_MOUNT_SHUTDOWN; } diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 3a62900f8af0..b6f3a86e10f6 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -1052,8 +1052,11 @@ handle_non_vxattrs: if (current->journal_info && !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) && - security_ismaclabel(name + XATTR_SECURITY_PREFIX_LEN)) - ci->i_ceph_flags |= CEPH_I_SEC_INITED; + security_ismaclabel(name + XATTR_SECURITY_PREFIX_LEN)) { + set_bit(CEPH_I_SEC_INITED_BIT, &ci->i_ceph_flags); + /* ensure modified bit is visible */ + smp_mb__after_atomic(); + } out: spin_unlock(&ci->i_ceph_lock); return err; @@ -1363,7 +1366,9 @@ bool ceph_security_xattr_deadlock(struct inode *in) return false; ci = ceph_inode(in); spin_lock(&ci->i_ceph_lock); - ret = !(ci->i_ceph_flags & CEPH_I_SEC_INITED) && + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); + ret = !test_bit(CEPH_I_SEC_INITED_BIT, &ci->i_ceph_flags) && !(ci->i_xattrs.version > 0 && __ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 0)); spin_unlock(&ci->i_ceph_lock);