From fc67f91c912697856b7c566e9a17ec834f1fecde Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 16 Jul 2009 16:02:38 -0700 Subject: [PATCH] kclient: cleanup addr, mdsc --- src/kernel/addr.c | 6 +++++- src/kernel/mds_client.c | 39 +++++++++++++++++++++++++++++++-------- src/kernel/mds_client.h | 41 ++++++++++++++++++++++------------------- src/kernel/mdsmap.c | 6 ++++-- src/kernel/mdsmap.h | 2 +- 5 files changed, 63 insertions(+), 31 deletions(-) diff --git a/src/kernel/addr.c b/src/kernel/addr.c index 928c592e0f744..99b2a07c69c50 100644 --- a/src/kernel/addr.c +++ b/src/kernel/addr.c @@ -13,6 +13,8 @@ #include "osd_client.h" /* + * Ceph address space ops. + * * There are a few funny things going on here. * * The page->private field is used to reference a struct @@ -38,7 +40,7 @@ * On writeback, we must submit writes to the osd IN SNAP ORDER. So, * we look for the first capsnap in i_cap_snaps and write out pages in * that snap context _only_. Then we move on to the next capsnap, - * eventually reachings the "live" or "head" context (i.e., pages that + * eventually reaching the "live" or "head" context (i.e., pages that * are not yet snapped) and are writing the most recently dirtied * pages. * @@ -136,6 +138,7 @@ static int ceph_set_page_dirty(struct page *page) #endif if (undo) + /* whoops, we failed to dirty the page */ ceph_put_wrbuffer_cap_refs(ci, 1, snapc); __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); @@ -469,6 +472,7 @@ static void ceph_release_pages(struct page **pages, int num) { struct pagevec pvec; int i; + pagevec_init(&pvec, 0); for (i = 0; i < num; i++) { if (pagevec_add(&pvec, pages[i]) == 0) diff --git a/src/kernel/mds_client.c b/src/kernel/mds_client.c index 11f43c8e42a2d..62371340eb6ac 100644 --- a/src/kernel/mds_client.c +++ b/src/kernel/mds_client.c @@ -57,8 +57,8 @@ bad: } /* - * parse a full metadata trace from the mds: inode, dirinfo, dentry, inode... - * sequence. + * parse a normal reply, which may contain a (dir+)dentry and/or a + * target inode. */ static int parse_reply_info_trace(void **p, void *end, struct ceph_mds_reply_info_parsed *info) @@ -622,6 +622,8 @@ static void cleanup_cap_releases(struct ceph_mds_session *session) } /* + * Helper to safely iterate over all caps associated with a session. + * * caller must hold session s_mutex */ static int iterate_session_caps(struct ceph_mds_session *session, @@ -667,9 +669,7 @@ static void remove_session_caps(struct ceph_mds_session *session) { dout("remove_session_caps on %p\n", session); iterate_session_caps(session, remove_session_caps_cb, NULL); - BUG_ON(session->s_nr_caps > 0); - cleanup_cap_releases(session); } @@ -686,8 +686,9 @@ static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap, spin_lock(&inode->i_lock); if (cap->gen != session->s_cap_gen) { - pr_err("ceph failed reconnect %p cap %p (gen %d < sess %d)\n", - inode, cap, cap->gen, session->s_cap_gen); + pr_err("ceph failed reconnect %p %llx.%llx cap %p " + "(gen %d < session %d)\n", inode, ceph_vinop(inode), + cap, cap->gen, session->s_cap_gen); __ceph_remove_cap(cap, NULL); } wake_up(&ceph_inode(inode)->i_cap_wq); @@ -740,7 +741,7 @@ static int send_renew_caps(struct ceph_mds_client *mdsc, * Note new cap ttl, and any transition from stale -> not stale (fresh?). */ static void renewed_caps(struct ceph_mds_client *mdsc, - struct ceph_mds_session *session, int is_renew) + struct ceph_mds_session *session, int is_renew) { int was_stale; int wake = 0; @@ -802,6 +803,13 @@ static int __close_session(struct ceph_mds_client *mdsc, /* * Trim old(er) caps. + * + * Because we can't cache an inode without one or more caps, we do + * this indirectly: if a cap is unused, we prune its aliases, at which + * point the inode will hopefully get dropped to. + * + * Yes, this is a bit sloppy. Our only real goal here is to respond to + * memory pressure from the MDS, though, so it needn't be perfect. */ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg) { @@ -843,6 +851,9 @@ out: return 0; } +/* + * Trim session cap count down to some max number. + */ static int trim_caps(struct ceph_mds_client *mdsc, struct ceph_mds_session *session, int max_caps) @@ -1122,6 +1133,10 @@ static int build_inode_path(struct inode *inode, return 0; } +/* + * request arguments may be specified via an inode *, a dentry *, or + * an explicit ino+path. + */ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry, const char *rpath, u64 rino, const char **ppath, int *pathlen, @@ -1383,6 +1398,9 @@ finish: goto out; } +/* + * called under mdsc->mutex + */ static void __wake_requests(struct ceph_mds_client *mdsc, struct list_head *head) { @@ -1458,10 +1476,12 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, ceph_get_cap_refs(ceph_inode(req->r_old_dentry->d_parent->d_inode), CEPH_CAP_PIN); + /* issue */ mutex_lock(&mdsc->mutex); __register_request(mdsc, req, listener); __do_request(mdsc, req); + /* wait */ if (!req->r_reply) { mutex_unlock(&mdsc->mutex); if (req->r_timeout) { @@ -1841,6 +1861,9 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc, mutex_unlock(&mdsc->mutex); } +/* + * Encode information about a cap for a reconnect with the MDS. + */ struct encode_caps_data { void **pp; void *end; @@ -1848,7 +1871,7 @@ struct encode_caps_data { }; static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, - void *arg) + void *arg) { struct ceph_mds_cap_reconnect *rec; struct ceph_inode_info *ci; diff --git a/src/kernel/mds_client.h b/src/kernel/mds_client.h index 5a4db3332d12d..a2be61dd736d9 100644 --- a/src/kernel/mds_client.h +++ b/src/kernel/mds_client.h @@ -48,7 +48,6 @@ * mdsc->snap_flush_lock * mdsc->cap_delay_lock * - * */ struct ceph_client; @@ -94,6 +93,15 @@ struct ceph_mds_reply_info_parsed { int snapblob_len; }; + +/* + * cap releases are batched and sent to the MDS en masse. + */ +#define CEPH_CAPS_PER_RELEASE ((PAGE_CACHE_SIZE - \ + sizeof(struct ceph_mds_cap_release)) / \ + sizeof(struct ceph_mds_cap_item)) + + /* * state associated with each MDS<->client session */ @@ -105,10 +113,6 @@ enum { CEPH_MDS_SESSION_RECONNECTING = 6 }; -#define CEPH_CAPS_PER_RELEASE ((PAGE_CACHE_SIZE - \ - sizeof(struct ceph_mds_cap_release)) / \ - sizeof(struct ceph_mds_cap_item)) - struct ceph_mds_session { int s_mds; int s_state; @@ -145,19 +149,12 @@ enum { struct ceph_mds_request; struct ceph_mds_client; +/* + * request completion callback + */ typedef void (*ceph_mds_request_callback_t) (struct ceph_mds_client *mdsc, struct ceph_mds_request *req); -struct ceph_mds_request_attr { - struct attribute attr; - ssize_t (*show)(struct ceph_mds_request *, - struct ceph_mds_request_attr *, - char *); - ssize_t (*store)(struct ceph_mds_request *, - struct ceph_mds_request_attr *, - const char *, size_t); -}; - /* * an in-flight mds request */ @@ -165,24 +162,29 @@ struct ceph_mds_request { u64 r_tid; /* transaction id */ int r_op; - struct inode *r_inode; - struct dentry *r_dentry; - struct dentry *r_old_dentry; /* rename from or link from */ + + /* operation on what? */ + struct inode *r_inode; /* arg1 */ + struct dentry *r_dentry; /* arg1 */ + struct dentry *r_old_dentry; /* arg2: rename from or link from */ const char *r_path1, *r_path2; struct ceph_vino r_ino1, r_ino2; union ceph_mds_request_args r_args; + + /* data payload is used for xattr ops */ struct page **r_pages; int r_num_pages; int r_data_len; + /* what caps shall we drop? */ int r_inode_drop, r_inode_unless; int r_dentry_drop, r_dentry_unless; int r_old_dentry_drop, r_old_dentry_unless; struct inode *r_old_inode; int r_old_inode_drop, r_old_inode_unless; - struct inode *r_target_inode; + struct inode *r_target_inode; /* resulting inode */ struct ceph_msg *r_request; /* original request */ struct ceph_msg *r_reply; @@ -199,6 +201,7 @@ struct ceph_mds_request { u32 r_direct_hash; /* choose dir frag based on this dentry hash */ bool r_direct_is_hash; /* true if r_direct_hash is valid */ + /* link unsafe requests to parent directory, for fsync */ struct inode *r_unsafe_dir; struct list_head r_unsafe_dir_item; diff --git a/src/kernel/mdsmap.c b/src/kernel/mdsmap.c index 584529a7775fc..b545850c74e9c 100644 --- a/src/kernel/mdsmap.c +++ b/src/kernel/mdsmap.c @@ -40,8 +40,10 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m) } /* - * Ignore any fields we don't care about in the MDS map (there are quite - * a few of them). + * Decode an MDS map + * + * Ignore any fields we don't care about (there are quite a few of + * them). */ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) { diff --git a/src/kernel/mdsmap.h b/src/kernel/mdsmap.h index 5560be5f972e4..a9bb3d6ba0d49 100644 --- a/src/kernel/mdsmap.h +++ b/src/kernel/mdsmap.h @@ -4,7 +4,7 @@ #include "types.h" /* - * mds map + * mds map - describe servers in the mds cluster * * fields limited to those the client cares about */ -- 2.39.5