From c9869fd2e0eb16e436152093721a4dfa28534cf5 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 2 Jun 2009 10:52:38 -0700 Subject: [PATCH] rados: add pg (non-object) ops Some ops run against the whole pg, not individual objects. Setting the PGOP flag selects a different write path that skips missing object checks (ignores oid). --- src/TODO | 2 +- src/include/rados.h | 5 +++++ src/osd/OSD.cc | 12 +++++++----- src/osd/ReplicatedPG.cc | 39 +++++++++++++++++++++++++++++++++++++++ src/osd/ReplicatedPG.h | 1 + 5 files changed, 53 insertions(+), 6 deletions(-) diff --git a/src/TODO b/src/TODO index 04425a1dacfa2..b58407c00c56b 100644 --- a/src/TODO +++ b/src/TODO @@ -13,6 +13,7 @@ v0.9 /- make mds exhert memory pressure on client caps, leases /- librados - async io + - list_objects /- object classes - optionally separate osd interfaces (ips) for clients and osds (replication, peering, etc.) @@ -37,7 +38,6 @@ rados - autosize pg_pools? - security - c library glue to c3 -- pyexec? repair - are we concerned about diff --git a/src/include/rados.h b/src/include/rados.h index 1b2c18a11cff4..df2d0ce3535f5 100644 --- a/src/include/rados.h +++ b/src/include/rados.h @@ -173,6 +173,7 @@ struct ceph_eversion { #define CEPH_OSD_OP_TYPE_DATA 0x0200 #define CEPH_OSD_OP_TYPE_ATTR 0x0300 #define CEPH_OSD_OP_TYPE_EXEC 0x0400 +#define CEPH_OSD_OP_TYPE_PG 0x0500 enum { /** data **/ @@ -225,6 +226,9 @@ enum { /** exec **/ CEPH_OSD_OP_RDCALL = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_EXEC | 1, CEPH_OSD_OP_WRCALL = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_EXEC | 1, + + /** pg **/ + CEPH_OSD_OP_PGLS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 1, }; static inline int ceph_osd_op_type_lock(int op) @@ -320,6 +324,7 @@ enum { CEPH_OSD_FLAG_PEERSTAT = 128, /* msg includes osd_peer_stat */ CEPH_OSD_FLAG_BALANCE_READS = 256, CEPH_OSD_FLAG_PARALLELEXEC = 512, /* execute op in parallel */ + CEPH_OSD_FLAG_PGOP = 1024, /* pg op, no object */ }; #define EOLDSNAPC ERESTART /* ORDERSNAP flag set; writer has old snapc*/ diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 3cb7495cab871..acb970a9422dc 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -3520,11 +3520,13 @@ void OSD::handle_op(MOSDOp *op) } // missing object? - sobject_t head(op->get_oid(), CEPH_NOSNAP); - if (pg->is_missing_object(head)) { - pg->wait_for_missing_object(head, op); - pg->unlock(); - return; + if ((op->get_flags() & CEPH_OSD_FLAG_PGOP) == 0) { + sobject_t head(op->get_oid(), CEPH_NOSNAP); + if (pg->is_missing_object(head)) { + pg->wait_for_missing_object(head, op); + pg->unlock(); + return; + } } diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 0d5f34f88be93..14cdbdc7b07aa 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -366,6 +366,42 @@ bool ReplicatedPG::preprocess_op(MOSDOp *op, utime_t now) return false; } +void ReplicatedPG::do_pg_op(MOSDOp *op) +{ + dout(15) << "do_pg_op " << *op << dendl; + + //bufferlist& indata = op->get_data(); + bufferlist outdata; + int result = 0; + + for (vector::iterator p = op->ops.begin(); p != op->ops.end(); p++) { + switch (p->op) { + + case CEPH_OSD_OP_PGLS: + { + vector pobjects; + // ??? + vector objects; + // ??? + ::encode(objects, outdata); + } + break; + + default: + result = -EINVAL; + break; + } + } + + // reply + MOSDOpReply *reply = new MOSDOpReply(op, 0, osd->osdmap->get_epoch(), + CEPH_OSD_FLAG_ACK | CEPH_OSD_FLAG_ONDISK); + reply->set_data(outdata); + reply->set_result(result); + osd->messenger->send_message(reply, op->get_orig_source_inst()); + delete op; +} + /** do_op - do an op * pg lock will be held (if multithreaded) @@ -375,6 +411,9 @@ void ReplicatedPG::do_op(MOSDOp *op) { osd->logger->inc(l_osd_op); + if (op->get_flags() & CEPH_OSD_FLAG_PGOP) + return do_pg_op(op); + dout(15) << "do_op " << *op << dendl; entity_inst_t client = op->get_source_inst(); diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h index e512520b337e3..c648d833ad624 100644 --- a/src/osd/ReplicatedPG.h +++ b/src/osd/ReplicatedPG.h @@ -442,6 +442,7 @@ public: bool preprocess_op(MOSDOp *op, utime_t now); void do_op(MOSDOp *op); + void do_pg_op(MOSDOp *op); void do_sub_op(MOSDSubOp *op); void do_sub_op_reply(MOSDSubOpReply *op); bool snap_trimmer(); -- 2.39.5