*/
static void __cap_delay_requeue(struct ceph_mds_client *mdsc,
struct ceph_inode_info *ci)
+ __must_hold(ci->i_ceph_lock)
{
struct inode *inode = &ci->netfs.inode;
if (!mdsc->stopping) {
spin_lock(&mdsc->cap_delay_lock);
if (!list_empty(&ci->i_cap_delay_list)) {
- if (ci->i_ceph_flags & CEPH_I_FLUSH)
+ /* ensure that bit state is consistent */
+ smp_mb__before_atomic();
+ if (test_bit(CEPH_I_FLUSH_BIT, &ci->i_ceph_flags))
goto no_change;
list_del_init(&ci->i_cap_delay_list);
}
* Queue an inode for immediate writeback. Mark inode with I_FLUSH,
* indicating we should send a cap message to flush dirty metadata
* asap, and move to the front of the delayed cap list.
+ *
+ * Caller must hold i_ceph_lock.
*/
static void __cap_delay_requeue_front(struct ceph_mds_client *mdsc,
struct ceph_inode_info *ci)
+ __must_hold(ci->i_ceph_lock)
{
struct inode *inode = &ci->netfs.inode;
doutc(mdsc->fsc->client, "%p %llx.%llx\n", inode, ceph_vinop(inode));
spin_lock(&mdsc->cap_delay_lock);
- ci->i_ceph_flags |= CEPH_I_FLUSH;
+ set_bit(CEPH_I_FLUSH_BIT, &ci->i_ceph_flags);
+ /* ensure modified bit is visible */
+ smp_mb__after_atomic();
if (!list_empty(&ci->i_cap_delay_list))
list_del_init(&ci->i_cap_delay_list);
list_add(&ci->i_cap_delay_list, &mdsc->cap_delay_list);
*
* Make note of max_size reported/requested from mds, revoked caps
* that have now been implemented.
+ *
+ * Caller must hold i_ceph_lock.
*/
static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap,
int op, int flags, int used, int want, int retain,
int flushing, u64 flush_tid, u64 oldest_flush_tid)
+ __must_hold(ci->i_ceph_lock)
{
struct ceph_inode_info *ci = cap->ci;
struct inode *inode = &ci->netfs.inode;
ceph_cap_string(revoking));
BUG_ON((retain & CEPH_CAP_PIN) == 0);
- ci->i_ceph_flags &= ~CEPH_I_FLUSH;
+ clear_bit(CEPH_I_FLUSH_BIT, &ci->i_ceph_flags);
+ /* ensure modified bit is visible */
+ smp_mb__after_atomic();
cap->issued &= retain; /* drop bits we don't want */
/*
* to miss setting the CEPH_CLIENT_CAPS_PENDING_CAPSNAP flag and finally
* the mds will drop the capsnap request to floor.
*/
- ci->i_ceph_flags &= ~CEPH_I_FLUSH_SNAPS;
+ clear_bit(CEPH_I_FLUSH_SNAPS_BIT, &ci->i_ceph_flags);
+ /* ensure modified bit is visible */
+ smp_mb__after_atomic();
}
void ceph_flush_snaps(struct ceph_inode_info *ci,
session = *psession;
retry:
spin_lock(&ci->i_ceph_lock);
- if (!(ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS)) {
+ /* ensure that bit state is consistent */
+ smp_mb__before_atomic();
+ if (!test_bit(CEPH_I_FLUSH_SNAPS_BIT, &ci->i_ceph_flags)) {
doutc(cl, " no capsnap needs flush, doing nothing\n");
goto out;
}
}
// make sure flushsnap messages are sent in proper order.
- if (ci->i_ceph_flags & CEPH_I_KICK_FLUSH)
+ /* ensure that bit state is consistent */
+ smp_mb__before_atomic();
+ if (test_bit(CEPH_I_KICK_FLUSH_BIT, &ci->i_ceph_flags))
__kick_flushing_caps(mdsc, session, ci, 0);
__ceph_flush_snaps(ci, session);
struct ceph_mds_session *session = NULL;
spin_lock(&ci->i_ceph_lock);
- if (ci->i_ceph_flags & CEPH_I_ASYNC_CREATE) {
- ci->i_ceph_flags |= CEPH_I_ASYNC_CHECK_CAPS;
+ /* ensure that bit state is consistent */
+ smp_mb__before_atomic();
+ if (test_bit(CEPH_ASYNC_CREATE_BIT, &ci->i_ceph_flags)) {
+ set_bit(CEPH_I_ASYNC_CHECK_CAPS_BIT, &ci->i_ceph_flags);
+ /* ensure modified bit is visible */
+ smp_mb__after_atomic();
/* Don't send messages until we get async create reply */
spin_unlock(&ci->i_ceph_lock);
return;
}
- if (ci->i_ceph_flags & CEPH_I_FLUSH)
+ /* ensure that bit state is consistent */
+ smp_mb__before_atomic();
+ if (test_bit(CEPH_I_FLUSH_BIT, &ci->i_ceph_flags))
flags |= CHECK_CAPS_FLUSH;
retry:
/* Caps wanted by virtue of active open files. */
doutc(cl, "flushing dirty caps\n");
goto ack;
}
- if (ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS) {
+
+ /* ensure that bit state is consistent */
+ smp_mb__before_atomic();
+ if (test_bit(CEPH_I_FLUSH_SNAPS_BIT, &ci->i_ceph_flags)) {
doutc(cl, "flushing snap caps\n");
goto ack;
}
/* kick flushing and flush snaps before sending normal
* cap message */
+ /* ensure that bit state is consistent */
+ smp_mb__before_atomic();
if (cap == ci->i_auth_cap &&
(ci->i_ceph_flags &
(CEPH_I_KICK_FLUSH | CEPH_I_FLUSH_SNAPS))) {
- if (ci->i_ceph_flags & CEPH_I_KICK_FLUSH)
+ if (test_bit(CEPH_I_KICK_FLUSH_BIT, &ci->i_ceph_flags))
__kick_flushing_caps(mdsc, session, ci, 0);
- if (ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS)
+ if (test_bit(CEPH_I_FLUSH_SNAPS_BIT, &ci->i_ceph_flags))
__ceph_flush_snaps(ci, session);
goto retry;
goto out;
}
+ /* ensure that bit state is consistent */
+ smp_mb__before_atomic();
if (ci->i_ceph_flags &
(CEPH_I_KICK_FLUSH | CEPH_I_FLUSH_SNAPS)) {
- if (ci->i_ceph_flags & CEPH_I_KICK_FLUSH)
+ /* ensure that bit state is consistent */
+ smp_mb__before_atomic();
+ if (test_bit(CEPH_I_KICK_FLUSH_BIT, &ci->i_ceph_flags))
__kick_flushing_caps(mdsc, session, ci, 0);
- if (ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS)
+ /* ensure that bit state is consistent */
+ smp_mb__before_atomic();
+ if (test_bit(CEPH_I_FLUSH_SNAPS_BIT, &ci->i_ceph_flags))
__ceph_flush_snaps(ci, session);
goto retry_locked;
}
u64 last_snap_flush = 0;
/* Don't do anything until create reply comes in */
- if (ci->i_ceph_flags & CEPH_I_ASYNC_CREATE)
+ /* ensure that bit state is consistent */
+ smp_mb__before_atomic();
+ if (test_bit(CEPH_ASYNC_CREATE_BIT, &ci->i_ceph_flags))
return;
- ci->i_ceph_flags &= ~CEPH_I_KICK_FLUSH;
+ clear_bit(CEPH_I_KICK_FLUSH_BIT, &ci->i_ceph_flags);
+ /* ensure modified bit is visible */
+ smp_mb__after_atomic();
list_for_each_entry_reverse(cf, &ci->i_cap_flush_list, i_list) {
if (cf->is_capsnap) {
__kick_flushing_caps(mdsc, session, ci,
oldest_flush_tid);
} else {
- ci->i_ceph_flags |= CEPH_I_KICK_FLUSH;
+ set_bit(CEPH_I_KICK_FLUSH_BIT, &ci->i_ceph_flags);
+ /* ensure modified bit is visible */
+ smp_mb__after_atomic();
}
spin_unlock(&ci->i_ceph_lock);
spin_unlock(&ci->i_ceph_lock);
continue;
}
- if (ci->i_ceph_flags & CEPH_I_KICK_FLUSH) {
+
+ /* ensure that bit state is consistent */
+ smp_mb__before_atomic();
+ if (test_bit(CEPH_I_KICK_FLUSH_BIT, &ci->i_ceph_flags)) {
__kick_flushing_caps(mdsc, session, ci,
oldest_flush_tid);
}
again:
spin_lock(&ci->i_ceph_lock);
+ /* ensure that bit state is consistent */
+ smp_mb__before_atomic();
if ((flags & CHECK_FILELOCK) &&
- (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK)) {
+ test_bit(CEPH_I_ERROR_FILELOCK_BIT, &ci->i_ceph_flags)) {
doutc(cl, "%p %llx.%llx error filelock\n", inode,
ceph_vinop(inode));
ret = -EIO;
doutc(cl, "%p follows %llu\n", capsnap, capsnap->follows);
BUG_ON(capsnap->cap_flush.tid > 0);
ceph_put_snap_context(capsnap->context);
- if (!list_is_last(&capsnap->ci_item, &ci->i_cap_snaps))
- ci->i_ceph_flags |= CEPH_I_FLUSH_SNAPS;
+ if (!list_is_last(&capsnap->ci_item, &ci->i_cap_snaps)) {
+ set_bit(CEPH_I_FLUSH_SNAPS_BIT, &ci->i_ceph_flags);
+ /* ensure modified bit is visible */
+ smp_mb__after_atomic();
+ }
list_del(&capsnap->ci_item);
ceph_put_cap_snap(capsnap);
if (ceph_try_drop_cap_snap(ci, capsnap)) {
put++;
} else {
- ci->i_ceph_flags |= CEPH_I_FLUSH_SNAPS;
+ set_bit(CEPH_I_FLUSH_SNAPS_BIT,
+ &ci->i_ceph_flags);
+ /* ensure modified bit is visible */
+ smp_mb__after_atomic();
flush_snaps = true;
}
}
rcu_assign_pointer(ci->i_layout.pool_ns, extra_info->pool_ns);
if (ci->i_layout.pool_id != old_pool ||
- extra_info->pool_ns != old_ns)
- ci->i_ceph_flags &= ~CEPH_I_POOL_PERM;
+ extra_info->pool_ns != old_ns) {
+ clear_bit(CEPH_I_POOL_PERM_BIT, &ci->i_ceph_flags);
+ /* ensure modified bit is visible */
+ smp_mb__after_atomic();
+ }
extra_info->pool_ns = old_ns;
unsigned long delay_max = opt->caps_wanted_delay_max * HZ;
unsigned long loop_start = jiffies;
unsigned long delay = 0;
+ bool is_metadata_under_flush;
doutc(cl, "begin\n");
spin_lock(&mdsc->cap_delay_lock);
delay = ci->i_hold_caps_max;
break;
}
- if ((ci->i_ceph_flags & CEPH_I_FLUSH) == 0 &&
- time_before(jiffies, ci->i_hold_caps_max))
- break;
+
list_del_init(&ci->i_cap_delay_list);
+ spin_unlock(&mdsc->cap_delay_lock);
+ spin_lock(&ci->i_ceph_lock);
+ /* ensure that bit state is consistent */
+ smp_mb__before_atomic();
+ is_metadata_under_flush =
+ test_bit(CEPH_I_FLUSH_BIT, &ci->i_ceph_flags);
+ spin_unlock(&ci->i_ceph_lock);
+ spin_lock(&mdsc->cap_delay_lock);
+
+ if (!is_metadata_under_flush &&
+ time_before(jiffies, ci->i_hold_caps_max)) {
+ list_add(&ci->i_cap_delay_list, &mdsc->cap_delay_list);
+ break;
+ }
+
inode = igrab(&ci->netfs.inode);
if (inode) {
spin_unlock(&mdsc->cap_delay_lock);
doutc(mdsc->fsc->client, "%p %llx.%llx\n", inode,
ceph_vinop(inode));
spin_lock(&mdsc->cap_delay_lock);
- ci->i_ceph_flags |= CEPH_I_FLUSH;
+ set_bit(CEPH_I_FLUSH_BIT, &ci->i_ceph_flags);
+ /* ensure modified bit is visible */
+ smp_mb__after_atomic();
if (!list_empty(&ci->i_cap_delay_list))
list_del_init(&ci->i_cap_delay_list);
list_add_tail(&ci->i_cap_delay_list,
if (atomic_read(&ci->i_filelock_ref) > 0) {
/* make further file lock syscall return -EIO */
- ci->i_ceph_flags |= CEPH_I_ERROR_FILELOCK;
+ set_bit(CEPH_I_ERROR_FILELOCK_BIT, &ci->i_ceph_flags);
+ /* ensure modified bit is visible */
+ smp_mb__after_atomic();
pr_warn_ratelimited_client(cl,
" dropping file locks for %p %llx.%llx\n",
inode, ceph_vinop(inode));
/*
* Ceph inode.
*/
-#define CEPH_I_DIR_ORDERED (1 << 0) /* dentries in dir are ordered */
-#define CEPH_I_FLUSH (1 << 2) /* do not delay flush of dirty metadata */
-#define CEPH_I_POOL_PERM (1 << 3) /* pool rd/wr bits are valid */
-#define CEPH_I_POOL_RD (1 << 4) /* can read from pool */
-#define CEPH_I_POOL_WR (1 << 5) /* can write to pool */
-#define CEPH_I_SEC_INITED (1 << 6) /* security initialized */
-#define CEPH_I_KICK_FLUSH (1 << 7) /* kick flushing caps */
-#define CEPH_I_FLUSH_SNAPS (1 << 8) /* need flush snapss */
-#define CEPH_I_ERROR_WRITE (1 << 9) /* have seen write errors */
-#define CEPH_I_ERROR_FILELOCK (1 << 10) /* have seen file lock errors */
-#define CEPH_I_ODIRECT_BIT (11) /* inode in direct I/O mode */
-#define CEPH_I_ODIRECT (1 << CEPH_I_ODIRECT_BIT)
-#define CEPH_ASYNC_CREATE_BIT (12) /* async create in flight for this */
-#define CEPH_I_ASYNC_CREATE (1 << CEPH_ASYNC_CREATE_BIT)
-#define CEPH_I_SHUTDOWN (1 << 13) /* inode is no longer usable */
-#define CEPH_I_ASYNC_CHECK_CAPS (1 << 14) /* check caps immediately after async
- creating finishes */
+#define CEPH_I_DIR_ORDERED_BIT (0) /* dentries in dir are ordered */
+#define CEPH_I_FLUSH_BIT (2) /* do not delay flush of dirty metadata */
+#define CEPH_I_POOL_PERM_BIT (3) /* pool rd/wr bits are valid */
+#define CEPH_I_POOL_RD_BIT (4) /* can read from pool */
+#define CEPH_I_POOL_WR_BIT (5) /* can write to pool */
+#define CEPH_I_SEC_INITED_BIT (6) /* security initialized */
+#define CEPH_I_KICK_FLUSH_BIT (7) /* kick flushing caps */
+#define CEPH_I_FLUSH_SNAPS_BIT (8) /* need flush snapss */
+#define CEPH_I_ERROR_WRITE_BIT (9) /* have seen write errors */
+#define CEPH_I_ERROR_FILELOCK_BIT (10) /* have seen file lock errors */
+#define CEPH_I_ODIRECT_BIT (11) /* inode in direct I/O mode */
+#define CEPH_ASYNC_CREATE_BIT (12) /* async create in flight for this */
+#define CEPH_I_SHUTDOWN_BIT (13) /* inode is no longer usable */
+#define CEPH_I_ASYNC_CHECK_CAPS_BIT (14) /* check caps after async creating finishes */
+
+#define CEPH_I_DIR_ORDERED (1 << CEPH_I_DIR_ORDERED_BIT)
+#define CEPH_I_FLUSH (1 << CEPH_I_FLUSH_BIT)
+#define CEPH_I_POOL_PERM (1 << CEPH_I_POOL_PERM_BIT)
+#define CEPH_I_POOL_RD (1 << CEPH_I_POOL_RD_BIT)
+#define CEPH_I_POOL_WR (1 << CEPH_I_POOL_WR_BIT)
+#define CEPH_I_SEC_INITED (1 << CEPH_I_SEC_INITED_BIT)
+#define CEPH_I_KICK_FLUSH (1 << CEPH_I_KICK_FLUSH_BIT)
+#define CEPH_I_FLUSH_SNAPS (1 << CEPH_I_FLUSH_SNAPS_BIT)
+#define CEPH_I_ERROR_WRITE (1 << CEPH_I_ERROR_WRITE_BIT)
+#define CEPH_I_ERROR_FILELOCK (1 << CEPH_I_ERROR_FILELOCK_BIT)
+#define CEPH_I_ODIRECT (1 << CEPH_I_ODIRECT_BIT)
+#define CEPH_I_ASYNC_CREATE (1 << CEPH_ASYNC_CREATE_BIT)
+#define CEPH_I_SHUTDOWN (1 << CEPH_I_SHUTDOWN_BIT)
+#define CEPH_I_ASYNC_CHECK_CAPS (1 << CEPH_I_ASYNC_CHECK_CAPS_BIT)
/*
* Masks of ceph inode work.
*/
static inline void ceph_set_error_write(struct ceph_inode_info *ci)
{
- if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ERROR_WRITE)) {
- spin_lock(&ci->i_ceph_lock);
- ci->i_ceph_flags |= CEPH_I_ERROR_WRITE;
- spin_unlock(&ci->i_ceph_lock);
+ spin_lock(&ci->i_ceph_lock);
+ /* ensure that bit state is consistent */
+ smp_mb__before_atomic();
+ if (!test_bit(CEPH_I_ERROR_WRITE_BIT, &ci->i_ceph_flags)) {
+ set_bit(CEPH_I_ERROR_WRITE_BIT, &ci->i_ceph_flags);
+ /* ensure modified bit is visible */
+ smp_mb__after_atomic();
}
+ spin_unlock(&ci->i_ceph_lock);
}
static inline void ceph_clear_error_write(struct ceph_inode_info *ci)
{
- if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ERROR_WRITE) {
- spin_lock(&ci->i_ceph_lock);
- ci->i_ceph_flags &= ~CEPH_I_ERROR_WRITE;
- spin_unlock(&ci->i_ceph_lock);
+ spin_lock(&ci->i_ceph_lock);
+ /* ensure that bit state is consistent */
+ smp_mb__before_atomic();
+ if (!test_bit(CEPH_I_ERROR_WRITE_BIT, &ci->i_ceph_flags)) {
+ clear_bit(CEPH_I_ERROR_WRITE_BIT, &ci->i_ceph_flags);
+ /* ensure modified bit is visible */
+ smp_mb__after_atomic();
}
+ spin_unlock(&ci->i_ceph_lock);
}
static inline void __ceph_dir_set_complete(struct ceph_inode_info *ci,
static inline bool ceph_inode_is_shutdown(struct inode *inode)
{
- unsigned long flags = READ_ONCE(ceph_inode(inode)->i_ceph_flags);
+ unsigned long flags;
struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode);
int state = READ_ONCE(fsc->mount_state);
+ /* ensure that bit state is consistent */
+ smp_mb__before_atomic();
+ flags = READ_ONCE(ceph_inode(inode)->i_ceph_flags);
+
return (flags & CEPH_I_SHUTDOWN) || state >= CEPH_MOUNT_SHUTDOWN;
}