From b8228b35d7381c08b8686dee5175126b7b35e3d3 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 20 May 2009 12:10:12 -0700 Subject: [PATCH] osd: describe osd ops as READ, WRITE, or READ|WRITE An op is either a pure read (read, stat), a pure (blind) write (write extent, delete), or read+write (e.g., read/modify/write). Writes with read portions that are captured by ProjectedObjectInfo (size, truncate attr) can pretend to be purely write (as they are now). --- src/include/ceph_fs.h | 2 +- src/include/rados.h | 5 ++++- src/kernel/addr.c | 2 +- src/kernel/file.c | 2 +- src/kernel/osd_client.c | 18 ++++++++++-------- src/messages/MOSDOp.h | 14 +++++++++++--- src/messages/MOSDOpReply.h | 5 +++-- src/osd/OSD.cc | 6 +++--- src/osd/ReplicatedPG.cc | 8 ++++---- src/osdc/Objecter.cc | 6 +++--- 10 files changed, 41 insertions(+), 27 deletions(-) diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h index 5de2c309cacbb..eec7d00e47829 100644 --- a/src/include/ceph_fs.h +++ b/src/include/ceph_fs.h @@ -26,7 +26,7 @@ #define CEPH_OSD_PROTOCOL 5 /* cluster internal */ #define CEPH_MDS_PROTOCOL 9 /* cluster internal */ #define CEPH_MON_PROTOCOL 4 /* cluster internal */ -#define CEPH_OSDC_PROTOCOL 11 /* public/client */ +#define CEPH_OSDC_PROTOCOL 12 /* public/client */ #define CEPH_MDSC_PROTOCOL 20 /* public/client */ #define CEPH_MONC_PROTOCOL 12 /* public/client */ diff --git a/src/include/rados.h b/src/include/rados.h index ef75e48b0570b..311482f566fd9 100644 --- a/src/include/rados.h +++ b/src/include/rados.h @@ -307,13 +307,16 @@ static inline const char *ceph_osd_op_name(int op) /* * osd op flags + * + * An op may be READ, WRITE, or READ|WRITE. */ enum { CEPH_OSD_FLAG_ACK = 1, /* want (or is) "ack" ack */ CEPH_OSD_FLAG_ONNVRAM = 2, /* want (or is) "onnvram" ack */ CEPH_OSD_FLAG_ONDISK = 4, /* want (or is) "ondisk" ack */ CEPH_OSD_FLAG_RETRY = 8, /* resend attempt */ - CEPH_OSD_FLAG_MODIFY = 32, /* op is/was a mutation */ + CEPH_OSD_FLAG_READ = 16, /* op may read */ + CEPH_OSD_FLAG_WRITE = 32, /* op may write */ CEPH_OSD_FLAG_ORDERSNAP = 64, /* EOLDSNAP if snapc is out of order */ CEPH_OSD_FLAG_PEERSTAT = 128, /* msg includes osd_peer_stat */ CEPH_OSD_FLAG_BALANCE_READS = 256, diff --git a/src/kernel/addr.c b/src/kernel/addr.c index e8299b298f367..326125f994651 100644 --- a/src/kernel/addr.c +++ b/src/kernel/addr.c @@ -734,7 +734,7 @@ get_more_pages: ceph_vino(inode), offset, &len, CEPH_OSD_OP_WRITE, - CEPH_OSD_FLAG_MODIFY | + CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, snapc, do_sync, ci->i_truncate_seq, diff --git a/src/kernel/file.c b/src/kernel/file.c index 98a92c2e270b1..8d21e3a103190 100644 --- a/src/kernel/file.c +++ b/src/kernel/file.c @@ -542,7 +542,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, flags = CEPH_OSD_FLAG_ORDERSNAP | CEPH_OSD_FLAG_ONDISK | - CEPH_OSD_FLAG_MODIFY; + CEPH_OSD_FLAG_WRITE; if ((file->f_flags & (O_SYNC|O_DIRECT)) == 0) flags |= CEPH_OSD_FLAG_ACK; else diff --git a/src/kernel/osd_client.c b/src/kernel/osd_client.c index b1ec08e4d674e..a15dfb864f7fa 100644 --- a/src/kernel/osd_client.c +++ b/src/kernel/osd_client.c @@ -108,9 +108,11 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, init_completion(&req->r_completion); init_completion(&req->r_safe_completion); INIT_LIST_HEAD(&req->r_unsafe_item); - req->r_flags = flags & CEPH_OSD_FLAG_MODIFY; + req->r_flags = flags; req->r_last_osd = -1; + WARN_ON((flags & (CEPH_OSD_OP_READ|CEPH_OSD_OP_WRITE)) == 0); + /* create message */ if (snapc) msg_size += sizeof(u64) * snapc->num_snaps; @@ -126,7 +128,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, head->client_inc = cpu_to_le32(1); /* always, for now. */ head->flags = cpu_to_le32(flags); - if (flags & CEPH_OSD_FLAG_MODIFY) + if (flags & CEPH_OSD_FLAG_WRITE) ceph_encode_timespec(&head->mtime, mtime); head->num_ops = cpu_to_le16(num_op); op->op = cpu_to_le16(opcode); @@ -138,7 +140,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, calc_layout(osdc, vino, layout, off, plen, req); req->r_file_layout = *layout; /* keep a copy */ - if (flags & CEPH_OSD_FLAG_MODIFY) { + if (flags & CEPH_OSD_FLAG_WRITE) { req->r_request->hdr.data_off = cpu_to_le16(off); req->r_request->hdr.data_len = cpu_to_le32(*plen); } @@ -463,7 +465,7 @@ void ceph_osdc_handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg) /* either this is a read, or we got the safe response */ if ((flags & CEPH_OSD_FLAG_ONDISK) || - ((flags & CEPH_OSD_FLAG_MODIFY) == 0)) + ((flags & CEPH_OSD_FLAG_WRITE) == 0)) __unregister_request(osdc, req); mutex_unlock(&osdc->request_mutex); @@ -809,7 +811,7 @@ void ceph_osdc_sync(struct ceph_osd_client *osdc) break; next_tid = req->r_tid + 1; - if ((req->r_flags & CEPH_OSD_FLAG_MODIFY) == 0) + if ((req->r_flags & CEPH_OSD_FLAG_WRITE) == 0) continue; ceph_osdc_get_request(req); @@ -870,8 +872,8 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc, dout(10, "readpages on ino %llx.%llx on %llu~%llu\n", vino.ino, vino.snap, off, len); req = ceph_osdc_new_request(osdc, layout, vino, off, &len, - CEPH_OSD_OP_READ, 0, NULL, 0, - truncate_seq, truncate_size, NULL); + CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, + NULL, 0, truncate_seq, truncate_size, NULL); if (IS_ERR(req)) return PTR_ERR(req); @@ -946,7 +948,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, req = ceph_osdc_new_request(osdc, layout, vino, off, &len, CEPH_OSD_OP_WRITE, flags | CEPH_OSD_FLAG_ONDISK | - CEPH_OSD_FLAG_MODIFY, + CEPH_OSD_FLAG_WRITE, snapc, do_sync, truncate_seq, truncate_size, mtime); if (IS_ERR(req)) diff --git a/src/messages/MOSDOp.h b/src/messages/MOSDOp.h index 186bdcbf52161..7736419a5ebae 100644 --- a/src/messages/MOSDOp.h +++ b/src/messages/MOSDOp.h @@ -60,7 +60,8 @@ public: utime_t get_mtime() { return head.mtime; } - bool is_modify() { return head.flags & CEPH_OSD_FLAG_MODIFY; } + bool may_read() { return head.flags & CEPH_OSD_FLAG_READ; } + bool may_write() { return head.flags & CEPH_OSD_FLAG_WRITE; } void set_peer_stat(const osd_peer_stat_t& stat) { peer_stat = stat; @@ -173,8 +174,15 @@ public: void print(ostream& out) { out << "osd_op(" << get_reqid(); out << " " << head.oid; - if (!is_modify()) - out << " @" << snapid_t((__u64)head.snapid); + + out << " "; + if (may_read()) + out << "r"; + if (may_write()) + out << "w"; + else + out << "@" << snapid_t((__u64)head.snapid); + out << " " << ops; out << " " << pg_t(head.layout.ol_pgid); if (is_retry_attempt()) out << " RETRY"; diff --git a/src/messages/MOSDOpReply.h b/src/messages/MOSDOpReply.h index f13d51c04f0a7..28644feb3578b 100644 --- a/src/messages/MOSDOpReply.h +++ b/src/messages/MOSDOpReply.h @@ -45,7 +45,8 @@ class MOSDOpReply : public Message { __s32 get_result() { return head.result; } eversion_t get_version() { return head.reassert_version; } - bool is_modify() { return head.flags & CEPH_OSD_FLAG_MODIFY; } + bool may_read() { return head.flags & CEPH_OSD_FLAG_READ; } + bool may_write() { return head.flags & CEPH_OSD_FLAG_WRITE; } void set_result(int r) { head.result = r; } void set_version(eversion_t v) { head.reassert_version = v; } @@ -92,7 +93,7 @@ public: void print(ostream& out) { out << "osd_op_reply(" << get_reqid() << " " << head.oid << " " << ops; - if (is_modify()) { + if (may_write()) { if (is_ondisk()) out << " ondisk"; else if (is_onnvram()) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 863f74010cfc2..35fe0f59965d3 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -3421,7 +3421,7 @@ void OSD::handle_op(MOSDOp *op) stat_oprate.hit(now); stat_ops++; stat_qlen += pending_ops; - if (!op->is_modify()) { + if (!op->may_write()) { stat_rd_ops++; if (op->get_source().is_osd()) { //derr(-10) << "shed in " << stat_rd_ops_shed_in << " / " << stat_rd_ops << dendl; @@ -3442,7 +3442,7 @@ void OSD::handle_op(MOSDOp *op) } // pg must be same-ish... - if (!op->is_modify()) { + if (!op->may_write()) { // read if (!pg->same_for_read_since(op->get_map_epoch())) { dout(7) << "handle_rep_op pg changed " << pg->info.history @@ -3530,7 +3530,7 @@ void OSD::handle_op(MOSDOp *op) return; } - if (!op->is_modify()) { + if (!op->may_write()) { Mutex::Locker lock(peer_stat_lock); stat_rd_ops_in_queue++; } diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index a1327f60f9f64..4fb988c9c38a1 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -109,10 +109,10 @@ void ReplicatedPG::wait_for_missing_object(sobject_t soid, Message *m) bool ReplicatedPG::preprocess_op(MOSDOp *op, utime_t now) { // we only care about reads here on out.. - if (op->is_modify() || - op->ops.size() < 1 || - op->is_modify()) + if (op->may_write() || + op->ops.size() < 1) return false; + ceph_osd_op& readop = op->ops[0]; object_t oid = op->get_oid(); @@ -375,7 +375,7 @@ void ReplicatedPG::do_op(MOSDOp *op) osd->logger->inc(l_osd_op); - if (op->is_modify()) + if (op->may_write()) op_modify(op); else op_read(op); diff --git a/src/osdc/Objecter.cc b/src/osdc/Objecter.cc index 7a4dcb48431da..25f8cc60821fb 100644 --- a/src/osdc/Objecter.cc +++ b/src/osdc/Objecter.cc @@ -348,7 +348,7 @@ void Objecter::tick() void Objecter::handle_osd_op_reply(MOSDOpReply *m) { - if (m->is_modify()) + if (m->may_write()) handle_osd_modify_reply(m); else handle_osd_read_reply(m); @@ -390,7 +390,7 @@ tid_t Objecter::read_submit(ReadOp *rd) flags |= CEPH_OSD_FLAG_ACK; MOSDOp *m = new MOSDOp(client_inc, last_tid, rd->oid, rd->layout, osdmap->get_epoch(), - flags); + flags | CEPH_OSD_FLAG_READ); m->set_snapid(rd->snap); m->ops = rd->ops; m->set_data(rd->bl); @@ -506,7 +506,7 @@ tid_t Objecter::modify_submit(ModifyOp *wr) } else if (pg.primary() >= 0) { MOSDOp *m = new MOSDOp(client_inc, wr->tid, wr->oid, wr->layout, osdmap->get_epoch(), - flags | CEPH_OSD_FLAG_MODIFY); + flags | CEPH_OSD_FLAG_WRITE); m->ops = wr->ops; m->set_mtime(wr->mtime); m->set_snap_seq(wr->snapc.seq); -- 2.39.5