#include "osd_client.h"
/*
+ * Ceph address space ops.
+ *
* There are a few funny things going on here.
*
* The page->private field is used to reference a struct
* On writeback, we must submit writes to the osd IN SNAP ORDER. So,
* we look for the first capsnap in i_cap_snaps and write out pages in
* that snap context _only_. Then we move on to the next capsnap,
- * eventually reachings the "live" or "head" context (i.e., pages that
+ * eventually reaching the "live" or "head" context (i.e., pages that
* are not yet snapped) and are writing the most recently dirtied
* pages.
*
#endif
if (undo)
+ /* whoops, we failed to dirty the page */
ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
{
struct pagevec pvec;
int i;
+
pagevec_init(&pvec, 0);
for (i = 0; i < num; i++) {
if (pagevec_add(&pvec, pages[i]) == 0)
}
/*
- * parse a full metadata trace from the mds: inode, dirinfo, dentry, inode...
- * sequence.
+ * parse a normal reply, which may contain a (dir+)dentry and/or a
+ * target inode.
*/
static int parse_reply_info_trace(void **p, void *end,
struct ceph_mds_reply_info_parsed *info)
}
/*
+ * Helper to safely iterate over all caps associated with a session.
+ *
* caller must hold session s_mutex
*/
static int iterate_session_caps(struct ceph_mds_session *session,
{
dout("remove_session_caps on %p\n", session);
iterate_session_caps(session, remove_session_caps_cb, NULL);
-
BUG_ON(session->s_nr_caps > 0);
-
cleanup_cap_releases(session);
}
spin_lock(&inode->i_lock);
if (cap->gen != session->s_cap_gen) {
- pr_err("ceph failed reconnect %p cap %p (gen %d < sess %d)\n",
- inode, cap, cap->gen, session->s_cap_gen);
+ pr_err("ceph failed reconnect %p %llx.%llx cap %p "
+ "(gen %d < session %d)\n", inode, ceph_vinop(inode),
+ cap, cap->gen, session->s_cap_gen);
__ceph_remove_cap(cap, NULL);
}
wake_up(&ceph_inode(inode)->i_cap_wq);
* Note new cap ttl, and any transition from stale -> not stale (fresh?).
*/
static void renewed_caps(struct ceph_mds_client *mdsc,
- struct ceph_mds_session *session, int is_renew)
+ struct ceph_mds_session *session, int is_renew)
{
int was_stale;
int wake = 0;
/*
* Trim old(er) caps.
+ *
+ * Because we can't cache an inode without one or more caps, we do
+ * this indirectly: if a cap is unused, we prune its aliases, at which
+ * point the inode will hopefully get dropped to.
+ *
+ * Yes, this is a bit sloppy. Our only real goal here is to respond to
+ * memory pressure from the MDS, though, so it needn't be perfect.
*/
static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
{
return 0;
}
+/*
+ * Trim session cap count down to some max number.
+ */
static int trim_caps(struct ceph_mds_client *mdsc,
struct ceph_mds_session *session,
int max_caps)
return 0;
}
+/*
+ * request arguments may be specified via an inode *, a dentry *, or
+ * an explicit ino+path.
+ */
static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
const char *rpath, u64 rino,
const char **ppath, int *pathlen,
goto out;
}
+/*
+ * called under mdsc->mutex
+ */
static void __wake_requests(struct ceph_mds_client *mdsc,
struct list_head *head)
{
ceph_get_cap_refs(ceph_inode(req->r_old_dentry->d_parent->d_inode),
CEPH_CAP_PIN);
+ /* issue */
mutex_lock(&mdsc->mutex);
__register_request(mdsc, req, listener);
__do_request(mdsc, req);
+ /* wait */
if (!req->r_reply) {
mutex_unlock(&mdsc->mutex);
if (req->r_timeout) {
mutex_unlock(&mdsc->mutex);
}
+/*
+ * Encode information about a cap for a reconnect with the MDS.
+ */
struct encode_caps_data {
void **pp;
void *end;
};
static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
- void *arg)
+ void *arg)
{
struct ceph_mds_cap_reconnect *rec;
struct ceph_inode_info *ci;
* mdsc->snap_flush_lock
* mdsc->cap_delay_lock
*
- *
*/
struct ceph_client;
int snapblob_len;
};
+
+/*
+ * cap releases are batched and sent to the MDS en masse.
+ */
+#define CEPH_CAPS_PER_RELEASE ((PAGE_CACHE_SIZE - \
+ sizeof(struct ceph_mds_cap_release)) / \
+ sizeof(struct ceph_mds_cap_item))
+
+
/*
* state associated with each MDS<->client session
*/
CEPH_MDS_SESSION_RECONNECTING = 6
};
-#define CEPH_CAPS_PER_RELEASE ((PAGE_CACHE_SIZE - \
- sizeof(struct ceph_mds_cap_release)) / \
- sizeof(struct ceph_mds_cap_item))
-
struct ceph_mds_session {
int s_mds;
int s_state;
struct ceph_mds_request;
struct ceph_mds_client;
+/*
+ * request completion callback
+ */
typedef void (*ceph_mds_request_callback_t) (struct ceph_mds_client *mdsc,
struct ceph_mds_request *req);
-struct ceph_mds_request_attr {
- struct attribute attr;
- ssize_t (*show)(struct ceph_mds_request *,
- struct ceph_mds_request_attr *,
- char *);
- ssize_t (*store)(struct ceph_mds_request *,
- struct ceph_mds_request_attr *,
- const char *, size_t);
-};
-
/*
* an in-flight mds request
*/
u64 r_tid; /* transaction id */
int r_op;
- struct inode *r_inode;
- struct dentry *r_dentry;
- struct dentry *r_old_dentry; /* rename from or link from */
+
+ /* operation on what? */
+ struct inode *r_inode; /* arg1 */
+ struct dentry *r_dentry; /* arg1 */
+ struct dentry *r_old_dentry; /* arg2: rename from or link from */
const char *r_path1, *r_path2;
struct ceph_vino r_ino1, r_ino2;
union ceph_mds_request_args r_args;
+
+ /* data payload is used for xattr ops */
struct page **r_pages;
int r_num_pages;
int r_data_len;
+ /* what caps shall we drop? */
int r_inode_drop, r_inode_unless;
int r_dentry_drop, r_dentry_unless;
int r_old_dentry_drop, r_old_dentry_unless;
struct inode *r_old_inode;
int r_old_inode_drop, r_old_inode_unless;
- struct inode *r_target_inode;
+ struct inode *r_target_inode; /* resulting inode */
struct ceph_msg *r_request; /* original request */
struct ceph_msg *r_reply;
u32 r_direct_hash; /* choose dir frag based on this dentry hash */
bool r_direct_is_hash; /* true if r_direct_hash is valid */
+ /* link unsafe requests to parent directory, for fsync */
struct inode *r_unsafe_dir;
struct list_head r_unsafe_dir_item;
}
/*
- * Ignore any fields we don't care about in the MDS map (there are quite
- * a few of them).
+ * Decode an MDS map
+ *
+ * Ignore any fields we don't care about (there are quite a few of
+ * them).
*/
struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
{
#include "types.h"
/*
- * mds map
+ * mds map - describe servers in the mds cluster
*
* fields limited to those the client cares about
*/