/- flush dirty caps to auth mds only. resend on cap import.
- reflush caps on mds recovery
- cap (release) reservations
-- size limit on readdir result, partial dirfrag readdir
+/- size limit on readdir result, partial dirfrag readdir
- revisit unmount
/- make request paths relative to a non-snapshotted inode.
up_read(&mdsc->snap_rwsem);
}
+/*
+ * Mark caps dirty. If inode is newly dirty, add to the global dirty
+ * list.
+ */
+int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
+{
+ struct ceph_mds_client *mdsc = &ceph_client(ci->vfs_inode.i_sb)->mdsc;
+ int was = __ceph_caps_dirty(ci);
+
+ ci->i_dirty_caps |= mask;
+ if (!was) {
+ dout(20, " inode %p now dirty\n", &ci->vfs_inode);
+ spin_lock(&mdsc->cap_dirty_lock);
+ list_add(&ci->i_dirty_item, &mdsc->cap_dirty);
+ spin_unlock(&mdsc->cap_dirty_lock);
+ }
+ return was;
+}
+
+static void __mark_caps_sync(struct inode *inode)
+{
+ struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc;
+ struct ceph_inode_info *ci = ceph_inode(inode);
+
+ BUG_ON(list_empty(&ci->i_dirty_item));
+ spin_lock(&mdsc->cap_dirty_lock);
+ if (list_empty(&ci->i_sync_item)) {
+ dout(20, " inode %p now sync\n", &ci->vfs_inode);
+ list_add(&ci->i_sync_item, &mdsc->cap_sync);
+ }
+ spin_unlock(&mdsc->cap_dirty_lock);
+}
+
/*
* Try to flush dirty caps back to the auth mds.
*/
if (cap->session->s_state < CEPH_MDS_SESSION_OPEN)
goto out;
+ __mark_caps_sync(inode);
+
cap->flushing |= dirty & cap->implemented;
if (cap->flushing) {
ci->i_dirty_caps &= ~cap->flushing;
__releases(inode->i_lock)
{
struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc;
unsigned seq = le32_to_cpu(m->seq);
int cleaned = le32_to_cpu(m->dirty);
int old_dirty, new_dirty;
old_dirty = __ceph_caps_dirty(ci);
cap->flushing &= ~cleaned;
new_dirty = __ceph_caps_dirty(ci);
- spin_unlock(&inode->i_lock);
- if (old_dirty && !new_dirty) {
+ if (old_dirty) {
+ spin_lock(&mdsc->cap_dirty_lock);
+ list_del_init(&ci->i_sync_item);
+ if (list_empty(&mdsc->cap_sync))
+ wake_up(&mdsc->cap_sync_wq);
+ dout(20, " inode %p now !sync\n", inode);
+ if (!new_dirty) {
+ dout(20, " inode %p now clean\n", inode);
+ list_del_init(&ci->i_dirty_item);
+ }
+ spin_unlock(&mdsc->cap_dirty_lock);
wake_up(&ci->i_cap_wq);
- iput(inode);
}
+
+ spin_unlock(&inode->i_lock);
+ if (old_dirty && !new_dirty)
+ iput(inode);
}
/*
dout(20, " op %s ino %llx inode %p\n", ceph_cap_op_name(op), vino.ino,
inode);
if (!inode) {
- dout(10, " i don't have ino %llx, sending release\n", vino.ino);
+ dout(10, " i don't have ino %llx\n", vino.ino);
goto done;
}
case CEPH_CAP_OP_EXPORT:
handle_cap_export(inode, h, session);
- if (list_empty(&session->s_caps))
- ceph_mdsc_flushed_all_caps(mdsc, session);
goto done;
case CEPH_CAP_OP_IMPORT:
case CEPH_CAP_OP_FLUSH_ACK:
handle_cap_flush_ack(inode, h, session, cap);
- if (list_empty(&session->s_caps))
- ceph_mdsc_flushed_all_caps(mdsc, session);
break;
case CEPH_CAP_OP_TRUNC:
spin_lock(&inode->i_lock);
was_dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
spin_unlock(&inode->i_lock);
- if (!was_dirty)
- igrab(inode);
+ if (!was_dirty) {
+ __mark_inode_dirty(inode,I_DIRTY_SYNC|I_DIRTY_DATASYNC);
+ igrab(inode);
+ }
}
out:
ci->i_caps = RB_ROOT;
ci->i_auth_cap = NULL;
ci->i_dirty_caps = 0;
+ INIT_LIST_HEAD(&ci->i_dirty_item);
+ INIT_LIST_HEAD(&ci->i_sync_item);
init_waitqueue_head(&ci->i_cap_wq);
ci->i_hold_caps_until = 0;
INIT_LIST_HEAD(&ci->i_cap_delay_list);
if (dirtied) {
int was_dirty = __ceph_mark_dirty_caps(ci, dirtied);
- if (!was_dirty)
+ if (!was_dirty) {
+ __mark_inode_dirty(inode,I_DIRTY_SYNC);
igrab(inode);
+ }
inode->i_ctime = CURRENT_TIME;
}
spin_unlock(&inode->i_lock);
case CEPH_MDS_SESSION_NEW: return "new";
case CEPH_MDS_SESSION_OPENING: return "opening";
case CEPH_MDS_SESSION_OPEN: return "open";
- case CEPH_MDS_SESSION_FLUSHING: return "flushing";
case CEPH_MDS_SESSION_CLOSING: return "closing";
case CEPH_MDS_SESSION_RECONNECTING: return "reconnecting";
default: return "???";
return err;
}
-/*
- * check all caps on a session, without allowing release to
- * be delayed.
- */
-static void check_all_caps(struct ceph_mds_client *mdsc,
- struct ceph_mds_session *session)
-{
- struct list_head *p, *n;
-
- list_for_each_safe(p, n, &session->s_caps) {
- struct ceph_cap *cap =
- list_entry(p, struct ceph_cap, session_caps);
- struct inode *inode = &cap->ci->vfs_inode;
-
- igrab(inode);
- mutex_unlock(&session->s_mutex);
- ceph_check_caps(ceph_inode(inode), 1, 0, NULL);
- mutex_lock(&session->s_mutex);
- iput(inode);
- }
-}
-
/*
* Called with s_mutex held.
*/
static int __close_session(struct ceph_mds_client *mdsc,
struct ceph_mds_session *session)
{
- int mds = session->s_mds;
- int err = 0;
-
- dout(10, "close_session mds%d state=%s\n", mds,
+ dout(10, "close_session mds%d state=%s\n", session->s_mds,
session_state_name(session->s_state));
if (session->s_state >= CEPH_MDS_SESSION_CLOSING)
return 0;
-
- check_all_caps(mdsc, session);
-
- if (list_empty(&session->s_caps)) {
- session->s_state = CEPH_MDS_SESSION_CLOSING;
- err = request_close_session(mdsc, session);
- } else {
- session->s_state = CEPH_MDS_SESSION_FLUSHING;
- }
- return err;
-}
-
-/*
- * Called when the last cap for a session has been flushed or
- * exported.
- */
-void ceph_mdsc_flushed_all_caps(struct ceph_mds_client *mdsc,
- struct ceph_mds_session *session)
-{
- dout(10, "flushed_all_caps for mds%d state %s\n", session->s_mds,
- session_state_name(session->s_state));
- if (session->s_state == CEPH_MDS_SESSION_FLUSHING) {
- session->s_state = CEPH_MDS_SESSION_CLOSING;
- request_close_session(mdsc, session);
- }
+ session->s_state = CEPH_MDS_SESSION_CLOSING;
+ return request_close_session(mdsc, session);
}
/*
spin_lock_init(&mdsc->cap_delay_lock);
INIT_LIST_HEAD(&mdsc->snap_flush_list);
spin_lock_init(&mdsc->snap_flush_lock);
+ INIT_LIST_HEAD(&mdsc->cap_dirty);
+ INIT_LIST_HEAD(&mdsc->cap_sync);
+ spin_lock_init(&mdsc->cap_dirty_lock);
+ init_waitqueue_head(&mdsc->cap_sync_wq);
}
/*
wait_requests(mdsc);
}
+/*
+ * sync - flush all dirty inode data to disk
+ */
+static int are_no_sync_caps(struct ceph_mds_client *mdsc)
+{
+ int empty;
+ spin_lock(&mdsc->cap_dirty_lock);
+ empty = list_empty(&mdsc->cap_sync);
+ spin_unlock(&mdsc->cap_dirty_lock);
+ dout(20, "are_no_sync_caps = %d\n", empty);
+ return empty;
+}
+
+void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
+{
+ dout(10, "sync\n");
+ wait_event(mdsc->cap_sync_wq, are_no_sync_caps(mdsc));
+}
+
+
/*
* called after sb is ro.
*/
mutex_lock(&mdsc->mutex);
- /* close sessions, caps.
- *
- * WARNING the session close timeout (and forced unmount in
- * general) is somewhat broken.. we'll leaved inodes pinned
- * and other nastyness.
- */
+ /* close sessions */
started = jiffies;
while (time_before(jiffies, started + timeout)) {
dout(10, "closing sessions\n");
}
}
-
WARN_ON(!list_empty(&mdsc->cap_delay_list));
mutex_unlock(&mdsc->mutex);
CEPH_MDS_SESSION_NEW = 1,
CEPH_MDS_SESSION_OPENING = 2,
CEPH_MDS_SESSION_OPEN = 3,
- CEPH_MDS_SESSION_FLUSHING = 4,
CEPH_MDS_SESSION_CLOSING = 5,
CEPH_MDS_SESSION_RECONNECTING = 6
};
spinlock_t cap_delay_lock; /* protects cap_delay_list */
struct list_head snap_flush_list; /* cap_snaps ready to flush */
spinlock_t snap_flush_lock;
+ struct list_head cap_dirty, cap_sync; /* inodes with dirty cap data */
+ spinlock_t cap_dirty_lock;
+ wait_queue_head_t cap_sync_wq;
struct dentry *debugfs_file;
};
extern void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc);
extern void ceph_mdsc_stop(struct ceph_mds_client *mdsc);
+extern void ceph_mdsc_sync(struct ceph_mds_client *mdsc);
+
extern void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc,
struct ceph_msg *msg);
extern void ceph_mdsc_handle_session(struct ceph_mds_client *mdsc,
extern void ceph_mdsc_handle_reset(struct ceph_mds_client *mdsc, int mds);
-extern void ceph_mdsc_flushed_all_caps(struct ceph_mds_client *mdsc,
- struct ceph_mds_session *session);
extern struct ceph_mds_request *ceph_mdsc_get_listener_req(struct inode *inode,
u64 tid);
extern char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base,
{
dout(10, "sync_fs %d\n", wait);
ceph_osdc_sync(&ceph_client(sb)->osdc);
+ ceph_mdsc_sync(&ceph_client(sb)->mdsc);
return 0;
}
struct rb_root i_caps; /* cap list */
struct ceph_cap *i_auth_cap; /* authoritative cap, if any */
unsigned i_dirty_caps; /* mask of dirtied fields */
+ struct list_head i_dirty_item, i_sync_item;
wait_queue_head_t i_cap_wq; /* threads waiting on a capability */
unsigned long i_hold_caps_until; /* jiffies */
struct list_head i_cap_delay_list; /* for delayed cap release to mds */
}
extern int __ceph_caps_dirty(struct ceph_inode_info *ci);
-static inline int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
-{
- int was = __ceph_caps_dirty(ci);
- ci->i_dirty_caps |= mask;
- return was;
-}
+extern int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask);
+
extern int ceph_caps_revoking(struct ceph_inode_info *ci, int mask);
static inline int __ceph_caps_used(struct ceph_inode_info *ci)