- handle file caps, ack back to mds, etc.
- actually flush dirty data, too
- integrate mds reply trace into cache
+? - reconnect to recoverying mds
- osd client
- readpages (readahead)
- async (caching) mode
#define CEPH_MSG_OSD_OPREPLY 43
+/* mds states */
+#define CEPH_MDS_STATE_DNE 0 /* down, never existed. */
+#define CEPH_MDS_STATE_STOPPED -1 /* down, once existed, but no subtrees. empty log. */
+#define CEPH_MDS_STATE_DESTROYING -2 /* down, once existed, but no subtrees. empty log. */
+#define CEPH_MDS_STATE_FAILED 3 /* down, active subtrees needs to be recovered. */
+
+#define CEPH_MDS_STATE_BOOT -4 /* up, boot announcement. destiny unknown. */
+#define CEPH_MDS_STATE_STANDBY -5 /* up, idle. waiting for assignment by monitor. */
+#define CEPH_MDS_STATE_CREATING -6 /* up, creating MDS instance (new journal, idalloc..). */
+#define CEPH_MDS_STATE_STARTING -7 /* up, starting prior stopped MDS instance. */
+
+#define CEPH_MDS_STATE_REPLAY 8 /* up, starting prior failed instance. scanning journal. */
+#define CEPH_MDS_STATE_RESOLVE 9 /* up, disambiguating distributed operations (import, rename, etc.) */
+#define CEPH_MDS_STATE_RECONNECT 10 /* up, reconnect to clients */
+#define CEPH_MDS_STATE_REJOIN 11 /* up, replayed journal, rejoining distributed cache */
+#define CEPH_MDS_STATE_ACTIVE 12 /* up, active */
+#define CEPH_MDS_STATE_STOPPING 13 /* up, exporting metadata (-> standby or out) */
+
+
/* client_session message op values */
enum {
CEPH_SESSION_REQUEST_OPEN,
ceph_ino_t mds_wants_replica_in_dirino;
__u32 op;
__u32 caller_uid, caller_gid;
- ceph_ino_t cwd_ino;
// fixed size arguments. in a union.
union {
if ((err = ceph_mdsc_do_request(mdsc, req, &rinfo, -1)) < 0)
return ERR_PTR(err);
err = le32_to_cpu(rinfo.head->result);
- dout(20, "dir_readdir result=%d\n", err);
+ dout(20, "dir_lookup result=%d\n", err);
/* if there was a previous inode associated with this dentry, now there isn't one */
if (err == -ENOENT) {
return msg;
}
-static void open_session(struct ceph_mds_client *mdsc, struct ceph_mds_session *session, int mds)
+static int open_session(struct ceph_mds_client *mdsc, struct ceph_mds_session *session, int mds)
{
struct ceph_msg *msg;
+ int mstate;
/* connect */
- if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) < CEPH_MDS_STATE_ACTIVE) {
+ mstate = ceph_mdsmap_get_state(mdsc->mdsmap, mds);
+ dout(10, "open_session to mds%d, state %d\n", mds, mstate);
+ if (mstate < CEPH_MDS_STATE_ACTIVE) {
ceph_monc_request_mdsmap(&mdsc->client->monc, mdsc->mdsmap->m_epoch);
- return;
+ return -EINPROGRESS;
}
/* send connect message */
msg = create_session_msg(CEPH_SESSION_REQUEST_OPEN, session->s_cap_seq);
if (IS_ERR(msg))
- return; /* fixme */
+ return PTR_ERR(msg); /* fixme */
session->s_state = CEPH_MDS_SESSION_OPENING;
send_msg_mds(mdsc, msg, mds);
+ return 0;
}
void ceph_mdsc_handle_session(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
0, 0, 0);
if (IS_ERR(req))
return req;
- memset(req->front.iov_base, 0, req->front.iov_len);
head = req->front.iov_base;
p = req->front.iov_base + sizeof(*head);
end = req->front.iov_base + req->front.iov_len;
/* encode head */
- head->op = cpu_to_le32(op);
ceph_encode_inst(&head->client_inst, &mdsc->client->msgr->inst);
+ /* tid, oldest_client_tid set by do_request */
+ head->mdsmap_epoch = cpu_to_le64(mdsc->mdsmap->m_epoch);
+ head->num_fwd = 0;
+ /* head->retry_attempt = 0; set by do_request */
+ head->mds_wants_replica_in_dirino = 0;
+ head->op = cpu_to_le32(op);
+ head->caller_uid = cpu_to_le32(current->uid);
+ head->caller_gid = cpu_to_le32(current->gid);
/* encode paths */
ceph_encode_filepath(&p, end, ino1, path1);
/* get session */
session = get_session(mdsc, mds);
- dout(30, "do_request session %p\n", session);
+ dout(30, "do_request session %p state %d\n", session, session->s_state);
/* open? */
if (session->s_state == CEPH_MDS_SESSION_NEW ||
- session->s_state == CEPH_MDS_SESSION_CLOSING)
- open_session(mdsc, session, mds);
+ session->s_state == CEPH_MDS_SESSION_CLOSING) {
+ err = open_session(mdsc, session, mds);
+ if (err == -EINPROGRESS) {
+ /* waiting for new mdsmap. bleh, this is a little messy. */
+ spin_unlock(&mdsc->lock);
+ wait_for_completion(&mdsc->map_waiters);
+ spin_lock(&mdsc->lock);
+ goto retry;
+ }
+ BUG_ON(err); /* implement me */
+ }
if (session->s_state == CEPH_MDS_SESSION_OPENING) {
/* wait for session to open (or fail, or close) */
spin_unlock(&mdsc->lock);
if ((err = ceph_mdsc_parse_reply_info(reply, rinfo)) < 0)
return err;
- dout(30, "do_request done on %p\n", msg);
+ dout(30, "do_request done on %p result %d tracelen %d\n", msg,
+ rinfo->head->result, rinfo->trace_nr);
return 0;
}
* state associated with each MDS<->client session
*/
enum {
- CEPH_MDS_SESSION_NEW,
- CEPH_MDS_SESSION_OPENING,
- CEPH_MDS_SESSION_OPEN,
- CEPH_MDS_SESSION_CLOSING
+ CEPH_MDS_SESSION_NEW = 1,
+ CEPH_MDS_SESSION_OPENING = 2,
+ CEPH_MDS_SESSION_OPEN = 3,
+ CEPH_MDS_SESSION_CLOSING = 4
};
struct ceph_mds_session {
int s_state;
#include <linux/ceph_fs.h>
-/* see mds/MDSMap.h */
-#define CEPH_MDS_STATE_DNE 0 /* down, never existed. */
-#define CEPH_MDS_STATE_STOPPED -1 /* down, once existed, but no subtrees. empty log. */
-#define CEPH_MDS_STATE_FAILED 2 /* down, active subtrees needs to be recovered. */
-
-#define CEPH_MDS_STATE_BOOT -3 /* up, boot announcement. destiny unknown. */
-#define CEPH_MDS_STATE_STANDBY -4 /* up, idle. waiting for assignment by monitor. */
-#define CEPH_MDS_STATE_CREATING -5 /* up, creating MDS instance (new journal, idalloc..). */
-#define CEPH_MDS_STATE_STARTING -6 /* up, starting prior stopped MDS instance. */
-
-#define CEPH_MDS_STATE_REPLAY 7 /* up, starting prior failed instance. scanning journal. */
-#define CEPH_MDS_STATE_RESOLVE 8 /* up, disambiguating distributed operations (import, rename, etc.) */
-#define CEPH_MDS_STATE_RECONNECT 9 /* up, reconnect to clients */
-#define CEPH_MDS_STATE_REJOIN 10 /* up, replayed journal, rejoining distributed cache */
-#define CEPH_MDS_STATE_ACTIVE 11 /* up, active */
-#define CEPH_MDS_STATE_STOPPING 12 /* up, exporting metadata (-> standby or out) */
-
/*
* mds map
*
void ceph_monc_request_mdsmap(struct ceph_mon_client *monc, __u64 have)
{
- dout(5, "ceph_monc_request_mdsmap\n");
+ dout(5, "ceph_monc_request_mdsmap -- IMPLEMENT ME\n");
+
}
struct inode *inode;
struct dentry *root;
struct ceph_msg *req = 0;
+ struct ceph_mds_request_head *reqhead;
struct ceph_mds_reply_info rinfo;
int frommds;
int err;
req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_OPEN, 1, args->path, 0, 0);
if (IS_ERR(req))
return PTR_ERR(req);
+ reqhead = req->front.iov_base;
+ reqhead->args.open.flags = 0;
+ reqhead->args.open.mode = 0;
if ((err = ceph_mdsc_do_request(mdsc, req, &rinfo, -1)) < 0)
return err;
class MDSMap {
public:
// mds states
- static const int STATE_DNE = 0; // down, never existed.
- static const int STATE_DESTROYING = -1; // down, existing, semi-destroyed.
- static const int STATE_STOPPED = -2; // down, once existed, but no subtrees. empty log.
- static const int STATE_FAILED = 3; // down, active subtrees; needs to be recovered.
-
- static const int STATE_BOOT = -4; // up, boot announcement. destiny unknown.
- static const int STATE_STANDBY = -5; // up, idle. waiting for assignment by monitor.
-
- static const int STATE_CREATING = -6; // up, creating MDS instance (new journal, idalloc..).
- static const int STATE_STARTING = -7; // up, starting prior stopped MDS instance.
-
- static const int STATE_REPLAY = 8; // up, starting prior failed instance. scanning journal.
- static const int STATE_RESOLVE = 9; // up, disambiguating distributed operations (import, rename, etc.)
- static const int STATE_RECONNECT = 10; // up, reconnect to clients
- static const int STATE_REJOIN = 11; // up, replayed journal, rejoining distributed cache
- static const int STATE_ACTIVE = 12; // up, active
- static const int STATE_STOPPING = 13; // up, exporting metadata (-> standby or out)
+ static const int STATE_DNE = CEPH_MDS_STATE_DNE; // down, never existed.
+ static const int STATE_DESTROYING = CEPH_MDS_STATE_DESTROYING; // down, existing, semi-destroyed.
+ static const int STATE_STOPPED = CEPH_MDS_STATE_STOPPED; // down, once existed, but no subtrees. empty log.
+ static const int STATE_FAILED = CEPH_MDS_STATE_FAILED; // down, active subtrees; needs to be recovered.
+
+ static const int STATE_BOOT = CEPH_MDS_STATE_BOOT; // up, boot announcement. destiny unknown.
+ static const int STATE_STANDBY = CEPH_MDS_STATE_STANDBY; // up, idle. waiting for assignment by monitor.
+
+ static const int STATE_CREATING = CEPH_MDS_STATE_CREATING; // up, creating MDS instance (new journal, idalloc..).
+ static const int STATE_STARTING = CEPH_MDS_STATE_STARTING; // up, starting prior stopped MDS instance.
+
+ static const int STATE_REPLAY = CEPH_MDS_STATE_REPLAY; // up, starting prior failed instance. scanning journal.
+ static const int STATE_RESOLVE = CEPH_MDS_STATE_RESOLVE; // up, disambiguating distributed operations (import, rename, etc.)
+ static const int STATE_RECONNECT = CEPH_MDS_STATE_RECONNECT; // up, reconnect to clients
+ static const int STATE_REJOIN = CEPH_MDS_STATE_REJOIN; // up, replayed journal, rejoining distributed cache
+ static const int STATE_ACTIVE = CEPH_MDS_STATE_ACTIVE; // up, active
+ static const int STATE_STOPPING = CEPH_MDS_STATE_STOPPING; // up, exporting metadata (-> standby or out)
static const char *get_state_name(int s) {
switch (s) {
inodeno_t get_mds_wants_replica_in_dirino() {
return head.mds_wants_replica_in_dirino; }
- inodeno_t get_cwd_ino() { return head.cwd_ino ? head.cwd_ino:MDS_INO_ROOT; }
-
void decode_payload() {
int off = 0;
::_decode(head, payload, off);