]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-client.git/commitdiff
ceph: cleanup of processing ci->i_ceph_flags bits
authorViacheslav Dubeyko <Slava.Dubeyko@ibm.com>
Tue, 12 Aug 2025 19:53:26 +0000 (12:53 -0700)
committerIlya Dryomov <idryomov@gmail.com>
Wed, 11 Feb 2026 18:19:17 +0000 (19:19 +0100)
The Coverity Scan service has detected potential
race condition in ceph_check_delayed_caps() [1].

The CID 1590633 contains explanation: "Accessing
ci->i_ceph_flags without holding lock
ceph_inode_info.i_ceph_lock. The value of the shared data
will be determined by the interleaving of thread execution.
Thread shared data is accessed without holding an appropriate
lock, possibly causing a race condition (CWE-366)".

The patch reworks the logic of accessing ci->i_ceph_flags.
At first, it removes ci item from a mdsc->cap_delay_list.
Then it unlocks mdsc->cap_delay_lock and it locks
ci->i_ceph_lock. Then, it calls smp_mb__before_atomic()
to be sure that ci->i_ceph_flags has consistent state of
the bits. The is_metadata_under_flush variable stores
the state of CEPH_I_FLUSH_BIT. Finally, it unlocks
the ci->i_ceph_lock and it locks the mdsc->cap_delay_lock.
The is_metadata_under_flush is used to check the condition
that ci needs to be removed from mdsc->cap_delay_list.
If it is not the case, then ci will be added into the head of
mdsc->cap_delay_list.

This patch reworks the logic of checking the CEPH_I_FLUSH_BIT,
CEPH_I_FLUSH_SNAPS_BIT, CEPH_I_KICK_FLUSH_BIT,
CEPH_ASYNC_CREATE_BIT, CEPH_I_ERROR_FILELOCK_BIT by test_bit()
method and calling smp_mb__before_atomic() to ensure that
bit state is consistent. It switches on calling the set_bit(),
clear_bit() for these bits, and calling smp_mb__after_atomic()
after these methods to ensure that modified bit is visible.

Additionally, __must_hold() has been added for
__cap_delay_requeue(), __cap_delay_requeue_front(), and
__prep_cap() to help the sparse with lock checking and
it was commented that caller of __cap_delay_requeue_front()
and __prep_cap() must lock the ci->i_ceph_lock.

v.2
Alex Markuze suggested to rework all Ceph inode's flags.
Now, every declaration has CEPH_I_<*> and CEPH_I_<*>_BIT pair.

v.3
The logic of operating by ci->i_ceph_flags bits on using
test_bit(), clear_bit(), set_bit() and smp_mb__before_atomic(),
smp_mb__after_atomic() has been reworked in addr.c, inode.c,
locks.c, mds_client.c, snap.c, super.h, xattr.c additionally
to caps.c.

[1] https://scan5.scan.coverity.com/#/project-view/64304/10063?selectedIssue=1590633

Signed-off-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
cc: Alex Markuze <amarkuze@redhat.com>
cc: Ilya Dryomov <idryomov@gmail.com>
cc: Ceph Development <ceph-devel@vger.kernel.org>

fs/ceph/addr.c
fs/ceph/caps.c
fs/ceph/inode.c
fs/ceph/locks.c
fs/ceph/mds_client.c
fs/ceph/snap.c
fs/ceph/super.h
fs/ceph/xattr.c

index ff7a99c6f9d806078497c3c67b8883ca442d6a43..07bafb263db7640743bba5599a56998477e3e804 100644 (file)
@@ -2572,6 +2572,8 @@ int ceph_pool_perm_check(struct inode *inode, int need)
                return 0;
 
        spin_lock(&ci->i_ceph_lock);
+       /* ensure that bit state is consistent */
+       smp_mb__before_atomic();
        flags = ci->i_ceph_flags;
        pool = ci->i_layout.pool_id;
        spin_unlock(&ci->i_ceph_lock);
@@ -2604,8 +2606,12 @@ check:
        if (pool == ci->i_layout.pool_id &&
            pool_ns == rcu_dereference_raw(ci->i_layout.pool_ns)) {
                ci->i_ceph_flags |= flags;
-        } else {
+               /* ensure modified bit is visible */
+               smp_mb__after_atomic();
+       } else {
                pool = ci->i_layout.pool_id;
+               /* ensure that bit state is consistent */
+               smp_mb__before_atomic();
                flags = ci->i_ceph_flags;
        }
        spin_unlock(&ci->i_ceph_lock);
index 21fedc8bb163752c91aa6264af7f081d0cf8195c..e30c97615a99b19460d140c82b1f7db560744672 100644 (file)
@@ -517,6 +517,7 @@ static void __cap_set_timeouts(struct ceph_mds_client *mdsc,
  */
 static void __cap_delay_requeue(struct ceph_mds_client *mdsc,
                                struct ceph_inode_info *ci)
+               __must_hold(ci->i_ceph_lock)
 {
        struct inode *inode = &ci->netfs.inode;
 
@@ -526,7 +527,9 @@ static void __cap_delay_requeue(struct ceph_mds_client *mdsc,
        if (!mdsc->stopping) {
                spin_lock(&mdsc->cap_delay_lock);
                if (!list_empty(&ci->i_cap_delay_list)) {
-                       if (ci->i_ceph_flags & CEPH_I_FLUSH)
+                       /* ensure that bit state is consistent */
+                       smp_mb__before_atomic();
+                       if (test_bit(CEPH_I_FLUSH_BIT, &ci->i_ceph_flags))
                                goto no_change;
                        list_del_init(&ci->i_cap_delay_list);
                }
@@ -541,15 +544,20 @@ no_change:
  * Queue an inode for immediate writeback.  Mark inode with I_FLUSH,
  * indicating we should send a cap message to flush dirty metadata
  * asap, and move to the front of the delayed cap list.
+ *
+ * Caller must hold i_ceph_lock.
  */
 static void __cap_delay_requeue_front(struct ceph_mds_client *mdsc,
                                      struct ceph_inode_info *ci)
+               __must_hold(ci->i_ceph_lock)
 {
        struct inode *inode = &ci->netfs.inode;
 
        doutc(mdsc->fsc->client, "%p %llx.%llx\n", inode, ceph_vinop(inode));
        spin_lock(&mdsc->cap_delay_lock);
-       ci->i_ceph_flags |= CEPH_I_FLUSH;
+       set_bit(CEPH_I_FLUSH_BIT, &ci->i_ceph_flags);
+       /* ensure modified bit is visible */
+       smp_mb__after_atomic();
        if (!list_empty(&ci->i_cap_delay_list))
                list_del_init(&ci->i_cap_delay_list);
        list_add(&ci->i_cap_delay_list, &mdsc->cap_delay_list);
@@ -1400,10 +1408,13 @@ void __ceph_remove_caps(struct ceph_inode_info *ci)
  *
  * Make note of max_size reported/requested from mds, revoked caps
  * that have now been implemented.
+ *
+ * Caller must hold i_ceph_lock.
  */
 static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap,
                       int op, int flags, int used, int want, int retain,
                       int flushing, u64 flush_tid, u64 oldest_flush_tid)
+               __must_hold(ci->i_ceph_lock)
 {
        struct ceph_inode_info *ci = cap->ci;
        struct inode *inode = &ci->netfs.inode;
@@ -1422,7 +1433,9 @@ static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap,
              ceph_cap_string(revoking));
        BUG_ON((retain & CEPH_CAP_PIN) == 0);
 
-       ci->i_ceph_flags &= ~CEPH_I_FLUSH;
+       clear_bit(CEPH_I_FLUSH_BIT, &ci->i_ceph_flags);
+       /* ensure modified bit is visible */
+       smp_mb__after_atomic();
 
        cap->issued &= retain;  /* drop bits we don't want */
        /*
@@ -1731,7 +1744,9 @@ static void __ceph_flush_snaps(struct ceph_inode_info *ci,
         * to miss setting the CEPH_CLIENT_CAPS_PENDING_CAPSNAP flag and finally
         * the mds will drop the capsnap request to floor.
         */
-       ci->i_ceph_flags &= ~CEPH_I_FLUSH_SNAPS;
+       clear_bit(CEPH_I_FLUSH_SNAPS_BIT, &ci->i_ceph_flags);
+       /* ensure modified bit is visible */
+       smp_mb__after_atomic();
 }
 
 void ceph_flush_snaps(struct ceph_inode_info *ci,
@@ -1749,7 +1764,9 @@ void ceph_flush_snaps(struct ceph_inode_info *ci,
                session = *psession;
 retry:
        spin_lock(&ci->i_ceph_lock);
-       if (!(ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS)) {
+       /* ensure that bit state is consistent */
+       smp_mb__before_atomic();
+       if (!test_bit(CEPH_I_FLUSH_SNAPS_BIT, &ci->i_ceph_flags)) {
                doutc(cl, " no capsnap needs flush, doing nothing\n");
                goto out;
        }
@@ -1773,7 +1790,9 @@ retry:
        }
 
        // make sure flushsnap messages are sent in proper order.
-       if (ci->i_ceph_flags & CEPH_I_KICK_FLUSH)
+       /* ensure that bit state is consistent */
+       smp_mb__before_atomic();
+       if (test_bit(CEPH_I_KICK_FLUSH_BIT, &ci->i_ceph_flags))
                __kick_flushing_caps(mdsc, session, ci, 0);
 
        __ceph_flush_snaps(ci, session);
@@ -2046,15 +2065,21 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags)
        struct ceph_mds_session *session = NULL;
 
        spin_lock(&ci->i_ceph_lock);
-       if (ci->i_ceph_flags & CEPH_I_ASYNC_CREATE) {
-               ci->i_ceph_flags |= CEPH_I_ASYNC_CHECK_CAPS;
+       /* ensure that bit state is consistent */
+       smp_mb__before_atomic();
+       if (test_bit(CEPH_ASYNC_CREATE_BIT, &ci->i_ceph_flags)) {
+               set_bit(CEPH_I_ASYNC_CHECK_CAPS_BIT, &ci->i_ceph_flags);
+               /* ensure modified bit is visible */
+               smp_mb__after_atomic();
 
                /* Don't send messages until we get async create reply */
                spin_unlock(&ci->i_ceph_lock);
                return;
        }
 
-       if (ci->i_ceph_flags & CEPH_I_FLUSH)
+       /* ensure that bit state is consistent */
+       smp_mb__before_atomic();
+       if (test_bit(CEPH_I_FLUSH_BIT, &ci->i_ceph_flags))
                flags |= CHECK_CAPS_FLUSH;
 retry:
        /* Caps wanted by virtue of active open files. */
@@ -2218,7 +2243,10 @@ retry:
                                doutc(cl, "flushing dirty caps\n");
                                goto ack;
                        }
-                       if (ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS) {
+
+                       /* ensure that bit state is consistent */
+                       smp_mb__before_atomic();
+                       if (test_bit(CEPH_I_FLUSH_SNAPS_BIT, &ci->i_ceph_flags)) {
                                doutc(cl, "flushing snap caps\n");
                                goto ack;
                        }
@@ -2242,12 +2270,14 @@ ack:
 
                /* kick flushing and flush snaps before sending normal
                 * cap message */
+               /* ensure that bit state is consistent */
+               smp_mb__before_atomic();
                if (cap == ci->i_auth_cap &&
                    (ci->i_ceph_flags &
                     (CEPH_I_KICK_FLUSH | CEPH_I_FLUSH_SNAPS))) {
-                       if (ci->i_ceph_flags & CEPH_I_KICK_FLUSH)
+                       if (test_bit(CEPH_I_KICK_FLUSH_BIT, &ci->i_ceph_flags))
                                __kick_flushing_caps(mdsc, session, ci, 0);
-                       if (ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS)
+                       if (test_bit(CEPH_I_FLUSH_SNAPS_BIT, &ci->i_ceph_flags))
                                __ceph_flush_snaps(ci, session);
 
                        goto retry;
@@ -2319,11 +2349,17 @@ retry_locked:
                        goto out;
                }
 
+               /* ensure that bit state is consistent */
+               smp_mb__before_atomic();
                if (ci->i_ceph_flags &
                    (CEPH_I_KICK_FLUSH | CEPH_I_FLUSH_SNAPS)) {
-                       if (ci->i_ceph_flags & CEPH_I_KICK_FLUSH)
+                       /* ensure that bit state is consistent */
+                       smp_mb__before_atomic();
+                       if (test_bit(CEPH_I_KICK_FLUSH_BIT, &ci->i_ceph_flags))
                                __kick_flushing_caps(mdsc, session, ci, 0);
-                       if (ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS)
+                       /* ensure that bit state is consistent */
+                       smp_mb__before_atomic();
+                       if (test_bit(CEPH_I_FLUSH_SNAPS_BIT, &ci->i_ceph_flags))
                                __ceph_flush_snaps(ci, session);
                        goto retry_locked;
                }
@@ -2595,10 +2631,14 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc,
        u64 last_snap_flush = 0;
 
        /* Don't do anything until create reply comes in */
-       if (ci->i_ceph_flags & CEPH_I_ASYNC_CREATE)
+       /* ensure that bit state is consistent */
+       smp_mb__before_atomic();
+       if (test_bit(CEPH_ASYNC_CREATE_BIT, &ci->i_ceph_flags))
                return;
 
-       ci->i_ceph_flags &= ~CEPH_I_KICK_FLUSH;
+       clear_bit(CEPH_I_KICK_FLUSH_BIT, &ci->i_ceph_flags);
+       /* ensure modified bit is visible */
+       smp_mb__after_atomic();
 
        list_for_each_entry_reverse(cf, &ci->i_cap_flush_list, i_list) {
                if (cf->is_capsnap) {
@@ -2707,7 +2747,9 @@ void ceph_early_kick_flushing_caps(struct ceph_mds_client *mdsc,
                        __kick_flushing_caps(mdsc, session, ci,
                                             oldest_flush_tid);
                } else {
-                       ci->i_ceph_flags |= CEPH_I_KICK_FLUSH;
+                       set_bit(CEPH_I_KICK_FLUSH_BIT, &ci->i_ceph_flags);
+                       /* ensure modified bit is visible */
+                       smp_mb__after_atomic();
                }
 
                spin_unlock(&ci->i_ceph_lock);
@@ -2742,7 +2784,10 @@ void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
                        spin_unlock(&ci->i_ceph_lock);
                        continue;
                }
-               if (ci->i_ceph_flags & CEPH_I_KICK_FLUSH) {
+
+               /* ensure that bit state is consistent */
+               smp_mb__before_atomic();
+               if (test_bit(CEPH_I_KICK_FLUSH_BIT, &ci->i_ceph_flags)) {
                        __kick_flushing_caps(mdsc, session, ci,
                                             oldest_flush_tid);
                }
@@ -2849,8 +2894,10 @@ static int try_get_cap_refs(struct inode *inode, int need, int want,
 again:
        spin_lock(&ci->i_ceph_lock);
 
+       /* ensure that bit state is consistent */
+       smp_mb__before_atomic();
        if ((flags & CHECK_FILELOCK) &&
-           (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK)) {
+           test_bit(CEPH_I_ERROR_FILELOCK_BIT, &ci->i_ceph_flags)) {
                doutc(cl, "%p %llx.%llx error filelock\n", inode,
                      ceph_vinop(inode));
                ret = -EIO;
@@ -3227,8 +3274,11 @@ static int ceph_try_drop_cap_snap(struct ceph_inode_info *ci,
                doutc(cl, "%p follows %llu\n", capsnap, capsnap->follows);
                BUG_ON(capsnap->cap_flush.tid > 0);
                ceph_put_snap_context(capsnap->context);
-               if (!list_is_last(&capsnap->ci_item, &ci->i_cap_snaps))
-                       ci->i_ceph_flags |= CEPH_I_FLUSH_SNAPS;
+               if (!list_is_last(&capsnap->ci_item, &ci->i_cap_snaps)) {
+                       set_bit(CEPH_I_FLUSH_SNAPS_BIT, &ci->i_ceph_flags);
+                       /* ensure modified bit is visible */
+                       smp_mb__after_atomic();
+               }
 
                list_del(&capsnap->ci_item);
                ceph_put_cap_snap(capsnap);
@@ -3417,7 +3467,10 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
                                if (ceph_try_drop_cap_snap(ci, capsnap)) {
                                        put++;
                                } else {
-                                       ci->i_ceph_flags |= CEPH_I_FLUSH_SNAPS;
+                                       set_bit(CEPH_I_FLUSH_SNAPS_BIT,
+                                               &ci->i_ceph_flags);
+                                       /* ensure modified bit is visible */
+                                       smp_mb__after_atomic();
                                        flush_snaps = true;
                                }
                        }
@@ -3668,8 +3721,11 @@ static void handle_cap_grant(struct inode *inode,
                rcu_assign_pointer(ci->i_layout.pool_ns, extra_info->pool_ns);
 
                if (ci->i_layout.pool_id != old_pool ||
-                   extra_info->pool_ns != old_ns)
-                       ci->i_ceph_flags &= ~CEPH_I_POOL_PERM;
+                   extra_info->pool_ns != old_ns) {
+                       clear_bit(CEPH_I_POOL_PERM_BIT, &ci->i_ceph_flags);
+                       /* ensure modified bit is visible */
+                       smp_mb__after_atomic();
+               }
 
                extra_info->pool_ns = old_ns;
 
@@ -4658,6 +4714,7 @@ unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
        unsigned long delay_max = opt->caps_wanted_delay_max * HZ;
        unsigned long loop_start = jiffies;
        unsigned long delay = 0;
+       bool is_metadata_under_flush;
 
        doutc(cl, "begin\n");
        spin_lock(&mdsc->cap_delay_lock);
@@ -4670,11 +4727,24 @@ unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
                        delay = ci->i_hold_caps_max;
                        break;
                }
-               if ((ci->i_ceph_flags & CEPH_I_FLUSH) == 0 &&
-                   time_before(jiffies, ci->i_hold_caps_max))
-                       break;
+
                list_del_init(&ci->i_cap_delay_list);
 
+               spin_unlock(&mdsc->cap_delay_lock);
+               spin_lock(&ci->i_ceph_lock);
+               /* ensure that bit state is consistent */
+               smp_mb__before_atomic();
+               is_metadata_under_flush =
+                       test_bit(CEPH_I_FLUSH_BIT, &ci->i_ceph_flags);
+               spin_unlock(&ci->i_ceph_lock);
+               spin_lock(&mdsc->cap_delay_lock);
+
+               if (!is_metadata_under_flush &&
+                   time_before(jiffies, ci->i_hold_caps_max)) {
+                       list_add(&ci->i_cap_delay_list, &mdsc->cap_delay_list);
+                       break;
+               }
+
                inode = igrab(&ci->netfs.inode);
                if (inode) {
                        spin_unlock(&mdsc->cap_delay_lock);
@@ -4856,7 +4926,9 @@ int ceph_drop_caps_for_unlink(struct inode *inode)
                        doutc(mdsc->fsc->client, "%p %llx.%llx\n", inode,
                              ceph_vinop(inode));
                        spin_lock(&mdsc->cap_delay_lock);
-                       ci->i_ceph_flags |= CEPH_I_FLUSH;
+                       set_bit(CEPH_I_FLUSH_BIT, &ci->i_ceph_flags);
+                       /* ensure modified bit is visible */
+                       smp_mb__after_atomic();
                        if (!list_empty(&ci->i_cap_delay_list))
                                list_del_init(&ci->i_cap_delay_list);
                        list_add_tail(&ci->i_cap_delay_list,
@@ -5121,7 +5193,9 @@ int ceph_purge_inode_cap(struct inode *inode, struct ceph_cap *cap, bool *invali
 
                if (atomic_read(&ci->i_filelock_ref) > 0) {
                        /* make further file lock syscall return -EIO */
-                       ci->i_ceph_flags |= CEPH_I_ERROR_FILELOCK;
+                       set_bit(CEPH_I_ERROR_FILELOCK_BIT, &ci->i_ceph_flags);
+                       /* ensure modified bit is visible */
+                       smp_mb__after_atomic();
                        pr_warn_ratelimited_client(cl,
                                " dropping file locks for %p %llx.%llx\n",
                                inode, ceph_vinop(inode));
index ac78d90a0bac92996d1534093f6ac54bd734c602..e67ab9740311a2389d0a2eae1262cff47fb72480 100644 (file)
@@ -1153,8 +1153,11 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page,
                                        lockdep_is_held(&ci->i_ceph_lock));
                rcu_assign_pointer(ci->i_layout.pool_ns, pool_ns);
 
-               if (ci->i_layout.pool_id != old_pool || pool_ns != old_ns)
-                       ci->i_ceph_flags &= ~CEPH_I_POOL_PERM;
+               if (ci->i_layout.pool_id != old_pool || pool_ns != old_ns) {
+                       clear_bit(CEPH_I_POOL_PERM_BIT, &ci->i_ceph_flags);
+                       /* ensure modified bit is visible */
+                       smp_mb__after_atomic();
+               }
 
                pool_ns = old_ns;
 
@@ -3213,7 +3216,9 @@ void ceph_inode_shutdown(struct inode *inode)
        bool invalidate = false;
 
        spin_lock(&ci->i_ceph_lock);
-       ci->i_ceph_flags |= CEPH_I_SHUTDOWN;
+       set_bit(CEPH_I_SHUTDOWN_BIT, &ci->i_ceph_flags);
+       /* ensure modified bit is visible */
+       smp_mb__after_atomic();
        p = rb_first(&ci->i_caps);
        while (p) {
                struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node);
index dd764f9c64b9f9528480bb7fbb98ccd0736329d0..4356614b1e880f6b34fe40c36dee003562ac5e39 100644 (file)
@@ -58,7 +58,9 @@ static void ceph_fl_release_lock(struct file_lock *fl)
        if (atomic_dec_and_test(&ci->i_filelock_ref)) {
                /* clear error when all locks are released */
                spin_lock(&ci->i_ceph_lock);
-               ci->i_ceph_flags &= ~CEPH_I_ERROR_FILELOCK;
+               clear_bit(CEPH_I_ERROR_FILELOCK_BIT, &ci->i_ceph_flags);
+               /* ensure modified bit is visible */
+               smp_mb__after_atomic();
                spin_unlock(&ci->i_ceph_lock);
        }
        fl->fl_u.ceph.inode = NULL;
@@ -272,9 +274,10 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
                wait = 1;
 
        spin_lock(&ci->i_ceph_lock);
-       if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) {
+       /* ensure that bit state is consistent */
+       smp_mb__before_atomic();
+       if (test_bit(CEPH_I_ERROR_FILELOCK_BIT, &ci->i_ceph_flags))
                err = -EIO;
-       }
        spin_unlock(&ci->i_ceph_lock);
        if (err < 0) {
                if (op == CEPH_MDS_OP_SETFILELOCK && lock_is_unlock(fl))
@@ -332,9 +335,10 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
        doutc(cl, "fl_file: %p\n", fl->c.flc_file);
 
        spin_lock(&ci->i_ceph_lock);
-       if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) {
+       /* ensure that bit state is consistent */
+       smp_mb__before_atomic();
+       if (test_bit(CEPH_I_ERROR_FILELOCK_BIT, &ci->i_ceph_flags))
                err = -EIO;
-       }
        spin_unlock(&ci->i_ceph_lock);
        if (err < 0) {
                if (lock_is_unlock(fl))
index 8b4d727573070a003676cc566e6e9c45bfa5060e..770ff0a0f7ad1155e8fd0097c0440fbe27f58095 100644 (file)
@@ -3638,7 +3638,10 @@ static void __do_request(struct ceph_mds_client *mdsc,
 
                spin_lock(&ci->i_ceph_lock);
                cap = ci->i_auth_cap;
-               if (ci->i_ceph_flags & CEPH_I_ASYNC_CREATE && mds != cap->mds) {
+               /* ensure that bit state is consistent */
+               smp_mb__before_atomic();
+               if (test_bit(CEPH_ASYNC_CREATE_BIT, &ci->i_ceph_flags) &&
+                   mds != cap->mds) {
                        doutc(cl, "session changed for auth cap %d -> %d\n",
                              cap->session->s_mds, session->s_mds);
 
@@ -4718,8 +4721,11 @@ static int reconnect_caps_cb(struct inode *inode, int mds, void *arg)
                rec.v2.issued = cpu_to_le32(cap->issued);
                rec.v2.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
                rec.v2.pathbase = cpu_to_le64(path_info.vino.ino);
+               /* ensure that bit state is consistent */
+               smp_mb__before_atomic();
                rec.v2.flock_len = (__force __le32)
-                       ((ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) ? 0 : 1);
+                       (test_bit(CEPH_I_ERROR_FILELOCK_BIT,
+                                               &ci->i_ceph_flags) ? 0 : 1);
        } else {
                struct timespec64 ts;
 
index 88cafcf11a97ac434719a508e6d1cb05acdc3298..2f95fb705c1edd6116f5b92d6ab1e8b0a493dab0 100644 (file)
@@ -662,6 +662,7 @@ update_snapc:
  */
 int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
                            struct ceph_cap_snap *capsnap)
+               __must_hold(ci->i_ceph_lock)
 {
        struct inode *inode = &ci->netfs.inode;
        struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
@@ -701,7 +702,10 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
                return 0;
        }
 
-       ci->i_ceph_flags |= CEPH_I_FLUSH_SNAPS;
+       set_bit(CEPH_I_FLUSH_SNAPS_BIT, &ci->i_ceph_flags);
+       /* ensure modified bit is visible */
+       smp_mb__after_atomic();
+
        doutc(cl, "%p %llx.%llx cap_snap %p snapc %p %llu %s s=%llu\n",
              inode, ceph_vinop(inode), capsnap, capsnap->context,
              capsnap->context->seq, ceph_cap_string(capsnap->dirty),
index 18f8e5d35143fa177cfc0e1ced923d478c32463b..2ade9feca410e6930af07729b4c43c14908a0533 100644 (file)
@@ -657,23 +657,35 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
 /*
  * Ceph inode.
  */
-#define CEPH_I_DIR_ORDERED     (1 << 0)  /* dentries in dir are ordered */
-#define CEPH_I_FLUSH           (1 << 2)  /* do not delay flush of dirty metadata */
-#define CEPH_I_POOL_PERM       (1 << 3)  /* pool rd/wr bits are valid */
-#define CEPH_I_POOL_RD         (1 << 4)  /* can read from pool */
-#define CEPH_I_POOL_WR         (1 << 5)  /* can write to pool */
-#define CEPH_I_SEC_INITED      (1 << 6)  /* security initialized */
-#define CEPH_I_KICK_FLUSH      (1 << 7)  /* kick flushing caps */
-#define CEPH_I_FLUSH_SNAPS     (1 << 8)  /* need flush snapss */
-#define CEPH_I_ERROR_WRITE     (1 << 9) /* have seen write errors */
-#define CEPH_I_ERROR_FILELOCK  (1 << 10) /* have seen file lock errors */
-#define CEPH_I_ODIRECT_BIT     (11) /* inode in direct I/O mode */
-#define CEPH_I_ODIRECT         (1 << CEPH_I_ODIRECT_BIT)
-#define CEPH_ASYNC_CREATE_BIT  (12)      /* async create in flight for this */
-#define CEPH_I_ASYNC_CREATE    (1 << CEPH_ASYNC_CREATE_BIT)
-#define CEPH_I_SHUTDOWN                (1 << 13) /* inode is no longer usable */
-#define CEPH_I_ASYNC_CHECK_CAPS        (1 << 14) /* check caps immediately after async
-                                            creating finishes */
+#define CEPH_I_DIR_ORDERED_BIT         (0)  /* dentries in dir are ordered */
+#define CEPH_I_FLUSH_BIT               (2)  /* do not delay flush of dirty metadata */
+#define CEPH_I_POOL_PERM_BIT           (3)  /* pool rd/wr bits are valid */
+#define CEPH_I_POOL_RD_BIT             (4)  /* can read from pool */
+#define CEPH_I_POOL_WR_BIT             (5)  /* can write to pool */
+#define CEPH_I_SEC_INITED_BIT          (6)  /* security initialized */
+#define CEPH_I_KICK_FLUSH_BIT          (7)  /* kick flushing caps */
+#define CEPH_I_FLUSH_SNAPS_BIT         (8)  /* need flush snapss */
+#define CEPH_I_ERROR_WRITE_BIT         (9)  /* have seen write errors */
+#define CEPH_I_ERROR_FILELOCK_BIT      (10) /* have seen file lock errors */
+#define CEPH_I_ODIRECT_BIT             (11) /* inode in direct I/O mode */
+#define CEPH_ASYNC_CREATE_BIT          (12) /* async create in flight for this */
+#define CEPH_I_SHUTDOWN_BIT            (13) /* inode is no longer usable */
+#define CEPH_I_ASYNC_CHECK_CAPS_BIT    (14) /* check caps after async creating finishes */
+
+#define CEPH_I_DIR_ORDERED             (1 << CEPH_I_DIR_ORDERED_BIT)
+#define CEPH_I_FLUSH                   (1 << CEPH_I_FLUSH_BIT)
+#define CEPH_I_POOL_PERM               (1 << CEPH_I_POOL_PERM_BIT)
+#define CEPH_I_POOL_RD                 (1 << CEPH_I_POOL_RD_BIT)
+#define CEPH_I_POOL_WR                 (1 << CEPH_I_POOL_WR_BIT)
+#define CEPH_I_SEC_INITED              (1 << CEPH_I_SEC_INITED_BIT)
+#define CEPH_I_KICK_FLUSH              (1 << CEPH_I_KICK_FLUSH_BIT)
+#define CEPH_I_FLUSH_SNAPS             (1 << CEPH_I_FLUSH_SNAPS_BIT)
+#define CEPH_I_ERROR_WRITE             (1 << CEPH_I_ERROR_WRITE_BIT)
+#define CEPH_I_ERROR_FILELOCK          (1 << CEPH_I_ERROR_FILELOCK_BIT)
+#define CEPH_I_ODIRECT                 (1 << CEPH_I_ODIRECT_BIT)
+#define CEPH_I_ASYNC_CREATE            (1 << CEPH_ASYNC_CREATE_BIT)
+#define CEPH_I_SHUTDOWN                        (1 << CEPH_I_SHUTDOWN_BIT)
+#define CEPH_I_ASYNC_CHECK_CAPS                (1 << CEPH_I_ASYNC_CHECK_CAPS_BIT)
 
 /*
  * Masks of ceph inode work.
@@ -693,20 +705,28 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
  */
 static inline void ceph_set_error_write(struct ceph_inode_info *ci)
 {
-       if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ERROR_WRITE)) {
-               spin_lock(&ci->i_ceph_lock);
-               ci->i_ceph_flags |= CEPH_I_ERROR_WRITE;
-               spin_unlock(&ci->i_ceph_lock);
+       spin_lock(&ci->i_ceph_lock);
+       /* ensure that bit state is consistent */
+       smp_mb__before_atomic();
+       if (!test_bit(CEPH_I_ERROR_WRITE_BIT, &ci->i_ceph_flags)) {
+               set_bit(CEPH_I_ERROR_WRITE_BIT, &ci->i_ceph_flags);
+               /* ensure modified bit is visible */
+               smp_mb__after_atomic();
        }
+       spin_unlock(&ci->i_ceph_lock);
 }
 
 static inline void ceph_clear_error_write(struct ceph_inode_info *ci)
 {
-       if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ERROR_WRITE) {
-               spin_lock(&ci->i_ceph_lock);
-               ci->i_ceph_flags &= ~CEPH_I_ERROR_WRITE;
-               spin_unlock(&ci->i_ceph_lock);
+       spin_lock(&ci->i_ceph_lock);
+       /* ensure that bit state is consistent */
+       smp_mb__before_atomic();
+       if (!test_bit(CEPH_I_ERROR_WRITE_BIT, &ci->i_ceph_flags)) {
+               clear_bit(CEPH_I_ERROR_WRITE_BIT, &ci->i_ceph_flags);
+               /* ensure modified bit is visible */
+               smp_mb__after_atomic();
        }
+       spin_unlock(&ci->i_ceph_lock);
 }
 
 static inline void __ceph_dir_set_complete(struct ceph_inode_info *ci,
@@ -1141,10 +1161,14 @@ void ceph_inode_shutdown(struct inode *inode);
 
 static inline bool ceph_inode_is_shutdown(struct inode *inode)
 {
-       unsigned long flags = READ_ONCE(ceph_inode(inode)->i_ceph_flags);
+       unsigned long flags;
        struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode);
        int state = READ_ONCE(fsc->mount_state);
 
+       /* ensure that bit state is consistent */
+       smp_mb__before_atomic();
+       flags = READ_ONCE(ceph_inode(inode)->i_ceph_flags);
+
        return (flags & CEPH_I_SHUTDOWN) || state >= CEPH_MOUNT_SHUTDOWN;
 }
 
index 3a62900f8af0642e5e4b5d368a730de6328c1eae..b6f3a86e10f68d5256af9b9f2d847c9668a68886 100644 (file)
@@ -1052,8 +1052,11 @@ handle_non_vxattrs:
 
        if (current->journal_info &&
            !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) &&
-           security_ismaclabel(name + XATTR_SECURITY_PREFIX_LEN))
-               ci->i_ceph_flags |= CEPH_I_SEC_INITED;
+           security_ismaclabel(name + XATTR_SECURITY_PREFIX_LEN)) {
+               set_bit(CEPH_I_SEC_INITED_BIT, &ci->i_ceph_flags);
+               /* ensure modified bit is visible */
+               smp_mb__after_atomic();
+       }
 out:
        spin_unlock(&ci->i_ceph_lock);
        return err;
@@ -1363,7 +1366,9 @@ bool ceph_security_xattr_deadlock(struct inode *in)
                return false;
        ci = ceph_inode(in);
        spin_lock(&ci->i_ceph_lock);
-       ret = !(ci->i_ceph_flags & CEPH_I_SEC_INITED) &&
+       /* ensure that bit state is consistent */
+       smp_mb__before_atomic();
+       ret = !test_bit(CEPH_I_SEC_INITED_BIT, &ci->i_ceph_flags) &&
              !(ci->i_xattrs.version > 0 &&
                __ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 0));
        spin_unlock(&ci->i_ceph_lock);