int64_t ol = t.get_int();
object_t oid(oh, ol);
lock.Lock();
- ObjectLayout layout = client->osdmap->make_object_layout(oid, pg_t::TYPE_REP, 2);
+ ceph_object_layout_t layout = client->osdmap->make_object_layout(oid, pg_t::TYPE_REP, 2);
off_t size;
client->objecter->stat(oid, &size, layout, new C_SafeCond(&lock, &cond, &ack));
while (!ack) cond.Wait(lock);
int64_t len = t.get_int();
object_t oid(oh, ol);
lock.Lock();
- ObjectLayout layout = client->osdmap->make_object_layout(oid, pg_t::TYPE_REP, 2);
+ ceph_object_layout_t layout = client->osdmap->make_object_layout(oid, pg_t::TYPE_REP, 2);
bufferlist bl;
client->objecter->read(oid, off, len, layout, &bl, new C_SafeCond(&lock, &cond, &ack));
while (!ack) cond.Wait(lock);
int64_t len = t.get_int();
object_t oid(oh, ol);
lock.Lock();
- ObjectLayout layout = client->osdmap->make_object_layout(oid, pg_t::TYPE_REP, 2);
+ ceph_object_layout_t layout = client->osdmap->make_object_layout(oid, pg_t::TYPE_REP, 2);
bufferptr bp(len);
bufferlist bl;
bl.push_back(bp);
int64_t len = t.get_int();
object_t oid(oh, ol);
lock.Lock();
- ObjectLayout layout = client->osdmap->make_object_layout(oid, pg_t::TYPE_REP, 2);
+ ceph_object_layout_t layout = client->osdmap->make_object_layout(oid, pg_t::TYPE_REP, 2);
client->objecter->zero(oid, off, len, layout,
new C_SafeCond(&lock, &cond, &ack),
safeg->new_sub());
if (time_to_stop()) break;
object_t oid(0x1000, i);
- ObjectLayout layout = client->osdmap->make_object_layout(oid, pg_t::TYPE_REP, g_OSD_FileLayout.fl_pg_size);
+ ceph_object_layout_t layout = client->osdmap->make_object_layout(oid, pg_t::TYPE_REP, g_OSD_FileLayout.fl_pg_size);
if (i % inflight == 0) {
dout(6) << "create_objects " << i << "/" << (nobj+1) << dendl;
}
object_t oid(0x1000, o);
- ObjectLayout layout = client->osdmap->make_object_layout(oid, pg_t::TYPE_REP, g_OSD_FileLayout.fl_pg_size);
+ ceph_object_layout_t layout = client->osdmap->make_object_layout(oid, pg_t::TYPE_REP, g_OSD_FileLayout.fl_pg_size);
client->client_lock.Lock();
utime_t start = g_clock.now();
* object layout - how a given object should be stored.
*/
struct ceph_object_layout {
- ceph_pg_t ol_pgid;
- __u32 ol_stripe_unit;
+ ceph_pg_t pgid;
+ __u32 stripe_unit;
};
+typedef struct ceph_object_layout ceph_object_layout_t;
/*
};
enum {
- CEPH_OSD_OP_WANT_ACK,
- CEPH_OSD_OP_WANT_SAFE,
- CEPH_OSD_OP_IS_RETRY
+ CEPH_OSD_OP_WANT_ACK = 1,
+ CEPH_OSD_OP_WANT_SAFE = 2,
+ CEPH_OSD_OP_IS_RETRY = 4
};
+struct ceph_osd_peer_stat {
+ ceph_timeval stamp;
+ float oprate;
+ float qlen;
+ float recent_qlen;
+ float read_latency;
+ float read_latency_mine;
+ float frac_rd_ops_shed_in;
+ float frac_rd_ops_shed_out;
+} __attribute__ ((packed));
+typedef struct ceph_osd_peer_stat ceph_osd_peer_stat_t;
+
struct ceph_osd_request_head {
- struct ceph_entity_inst client;
- ceph_osd_reqid_t reqid;
+ struct ceph_entity_addr client_addr;
+ ceph_osd_reqid_t reqid; /* fixme: this dups client.name */
__u32 op;
__u64 offset, length;
ceph_object_t oid;
- struct ceph_object_layout layout;
+ ceph_object_layout_t layout;
ceph_epoch_t osdmap_epoch;
__u32 flags;
- /* hack, fix me */
- ceph_tid_t rep_tid;
- ceph_eversion_t pg_trim_to;
- __u32 shed_count;
- //osd_peer_stat_t peer_stat;
+ ceph_eversion_t reassert_version;
+
+ /* semi-hack, fix me */
+ __u32 shed_count;
+ ceph_osd_peer_stat_t peer_stat;
} __attribute__ ((packed));
#endif
struct object_t {
static const uint32_t MAXREV = 0xffffffffU;
+ // IMPORTANT: make this match struct ceph_object ****
+
uint64_t ino; // "file" identifier
uint32_t bno; // "block" in that "file"
objectrev_t rev; // revision. normally ctime (as epoch).
object_t() : ino(0), bno(0), rev(0) {}
object_t(uint64_t i, uint32_t b) : ino(i), bno(b), rev(0) {}
object_t(uint64_t i, uint32_t b, uint32_t r) : ino(i), bno(b), rev(r) {}
+
+ // yuck.
+ object_t(const ceph_object& co) {
+ ino = co.ino;
+ bno = co.bno;
+ rev = co.rev;
+ }
+ operator ceph_object_t() {
+ ceph_object_t oid;
+ oid.ino = ino;
+ oid.bno = bno;
+ oid.rev = rev;
+ return oid;
+ }
} __attribute__ ((packed));
inline bool operator==(const object_t l, const object_t r) {
}
private:
- struct st_ {
- // who's asking?
- entity_inst_t client;
- osdreqid_t reqid; // minor weirdness: entity_name_t is in reqid_t too.
-
- // for replication
- tid_t rep_tid;
-
- object_t oid;
- objectrev_t rev;
- ObjectLayout layout;
-
- epoch_t map_epoch;
-
- eversion_t pg_trim_to; // primary->replica: trim to here
-
- int32_t op;
- off_t offset, length;
-
- eversion_t version;
- eversion_t old_version;
-
- bool want_ack;
- bool want_commit;
- bool retry_attempt;
-
- int shed_count;
- osd_peer_stat_t peer_stat;
- } st;
-
+ ceph_osd_request_head head;
map<string,bufferptr> attrset;
friend class MOSDOpReply;
public:
- const osdreqid_t& get_reqid() { return st.reqid; }
- const tid_t get_client_tid() { return st.reqid.tid; }
- int get_client_inc() { return st.reqid.inc; }
-
- const entity_name_t& get_client() { return st.client.name; }
- const entity_inst_t& get_client_inst() { return st.client; }
- void set_client_inst(const entity_inst_t& i) { st.client = i; }
+ const ceph_osd_reqid_t& get_reqid() { return head.reqid; }
+ const tid_t get_client_tid() { return head.reqid.tid; }
+ int get_client_inc() { return head.reqid.inc; }
- bool wants_reply() {
- if (st.op < 100) return true;
- return false; // no reply needed for primary-lock, -unlock.
+ entity_name_t get_client() { return head.reqid.name; }
+ entity_inst_t get_client_inst() {
+ return entity_inst_t(head.reqid.name, head.client_addr);
}
+ void set_client_addr(const entity_addr_t& a) { head.client_addr = a.v; }
- const tid_t get_rep_tid() { return st.rep_tid; }
- void set_rep_tid(tid_t t) { st.rep_tid = t; }
+ object_t get_oid() { return object_t(head.oid); }
+ pg_t get_pg() { return head.layout.pgid; }
+ ceph_object_layout_t get_layout() { return head.layout; }
+ epoch_t get_map_epoch() { return head.osdmap_epoch; }
- bool get_retry_attempt() const { return st.retry_attempt; }
- void set_retry_attempt(bool a) { st.retry_attempt = a; }
-
- const object_t get_oid() { return st.oid; }
- const pg_t get_pg() { return st.layout.pgid; }
- const ObjectLayout& get_layout() { return st.layout; }
- const epoch_t get_map_epoch() { return st.map_epoch; }
-
- const eversion_t get_version() { return st.version; }
- //const eversion_t get_old_version() { return st.old_version; }
+ eversion_t get_version() { return head.reassert_version; }
- void set_rev(objectrev_t r) { st.rev = r; }
- objectrev_t get_rev() { return st.rev; }
-
- const eversion_t get_pg_trim_to() { return st.pg_trim_to; }
- void set_pg_trim_to(eversion_t v) { st.pg_trim_to = v; }
-
- const int get_op() { return st.op; }
- void set_op(int o) { st.op = o; }
+ const int get_op() { return head.op; }
+ void set_op(int o) { head.op = o; }
bool is_read() {
- return st.op < 10;
+ return head.op < 10;
}
- const off_t get_length() { return st.length; }
- const off_t get_offset() { return st.offset; }
+ const off_t get_length() { return head.length; }
+ const off_t get_offset() { return head.offset; }
map<string,bufferptr>& get_attrset() { return attrset; }
void set_attrset(map<string,bufferptr> &as) { attrset.swap(as); }
- const bool wants_ack() { return st.want_ack; }
- const bool wants_commit() { return st.want_commit; }
+ void set_peer_stat(const osd_peer_stat_t& stat) { head.peer_stat = stat; }
+ const ceph_osd_peer_stat_t& get_peer_stat() { return head.peer_stat; }
- void set_peer_stat(const osd_peer_stat_t& stat) { st.peer_stat = stat; }
- const osd_peer_stat_t& get_peer_stat() { return st.peer_stat; }
- void inc_shed_count() { st.shed_count++; }
- int get_shed_count() { return st.shed_count; }
+ void inc_shed_count() { head.shed_count++; }
+ int get_shed_count() { return head.shed_count; }
MOSDOp(entity_inst_t asker, int inc, long tid,
- object_t oid, ObjectLayout ol, epoch_t mapepoch, int op) :
+ object_t oid, ceph_object_layout_t ol, epoch_t mapepoch, int op) :
Message(CEPH_MSG_OSD_OP) {
- memset(&st, 0, sizeof(st));
- this->st.client = asker;
- this->st.reqid.name = asker.name;
- this->st.reqid.inc = inc;
- this->st.reqid.tid = tid;
-
- this->st.oid = oid;
- this->st.layout = ol;
- this->st.map_epoch = mapepoch;
- this->st.op = op;
-
- this->st.rep_tid = 0;
-
- this->st.want_ack = true;
- this->st.want_commit = true;
+ memset(&head, 0, sizeof(head));
+ head.client_addr = asker.addr.v;
+ head.reqid.name = asker.name.v;
+ head.reqid.inc = inc;
+ head.reqid.tid = tid;
+
+ head.oid = oid;
+ head.layout = ol;
+ head.osdmap_epoch = mapepoch;
+ head.op = op;
+
+ head.flags = CEPH_OSD_OP_WANT_ACK | CEPH_OSD_OP_WANT_SAFE;
}
MOSDOp() {}
- void set_layout(const ObjectLayout& l) { st.layout = l; }
+ void set_layout(const ceph_object_layout_t& l) { head.layout = l; }
- void set_length(off_t l) { st.length = l; }
- void set_offset(off_t o) { st.offset = o; }
- void set_version(eversion_t v) { st.version = v; }
- void set_old_version(eversion_t ov) { st.old_version = ov; }
+ void set_length(off_t l) { head.length = l; }
+ void set_offset(off_t o) { head.offset = o; }
+ void set_version(eversion_t v) { head.reassert_version = v; }
- void set_want_ack(bool b) { st.want_ack = b; }
- void set_want_commit(bool b) { st.want_commit = b; }
+ bool wants_ack() { return head.flags & CEPH_OSD_OP_WANT_ACK; }
+ bool wants_commit() { return head.flags & CEPH_OSD_OP_WANT_SAFE; }
+ bool is_retry_attempt() const { return head.flags & CEPH_OSD_OP_IS_RETRY; }
+
+ void set_want_ack(bool b) { head.flags |= CEPH_OSD_OP_WANT_ACK; }
+ void set_want_commit(bool b) { head.flags |= CEPH_OSD_OP_WANT_SAFE; }
+ void set_retry_attempt(bool a) { head.flags |= CEPH_OSD_OP_IS_RETRY; }
// marshalling
virtual void decode_payload() {
int off = 0;
- ::_decode(st, payload, off);
+ ::_decode(head, payload, off);
::_decode(attrset, payload, off);
}
virtual void encode_payload() {
- ::_encode(st, payload);
+ ::_encode(head, payload);
::_encode(attrset, payload);
- env.data_off = st.offset;
+ env.data_off = head.offset;
}
virtual char *get_type_name() { return "osd_op"; }
void print(ostream& out) {
- out << "osd_op(" << st.reqid
- << " " << get_opname(st.op)
- << " " << st.oid;
- if (st.length) out << " " << st.offset << "~" << st.length;
- if (st.retry_attempt) out << " RETRY";
+ out << "osd_op(" << head.reqid
+ << " " << get_opname(head.op)
+ << " " << head.oid;
+ if (head.length) out << " " << head.offset << "~" << head.length;
+ if (is_retry_attempt()) out << " RETRY";
out << ")";
}
};
class MOSDOpReply : public Message {
struct st_t {
// req
- osdreqid_t reqid;
+ ceph_osd_reqid_t reqid;
- tid_t rep_tid;
-
object_t oid;
- ObjectLayout layout; // pgid, etc.
+ ceph_object_layout_t layout; // pgid, etc.
int32_t op;
map<string,bufferptr> attrset;
public:
- const osdreqid_t& get_reqid() { return st.reqid; }
+ const ceph_osd_reqid_t& get_reqid() { return st.reqid; }
long get_tid() { return st.reqid.tid; }
- long get_rep_tid() { return st.rep_tid; }
object_t get_oid() { return st.oid; }
pg_t get_pg() { return st.layout.pgid; }
int get_op() { return st.op; }
void set_attrset(map<string,bufferptr> &as) { attrset = as; }
void set_op(int op) { st.op = op; }
- void set_rep_tid(tid_t t) { st.rep_tid = t; }
// osdmap
epoch_t get_map_epoch() { return st.map_epoch; }
MOSDOpReply(MOSDOp *req, int result, epoch_t e, bool commit) :
Message(CEPH_MSG_OSD_OPREPLY) {
memset(&st, 0, sizeof(st));
- this->st.reqid = req->st.reqid;
- this->st.op = req->st.op;
- this->st.rep_tid = req->st.rep_tid;
+ this->st.reqid = req->head.reqid;
+ this->st.op = req->head.op;
- this->st.oid = req->st.oid;
- this->st.layout = req->st.layout;
+ this->st.oid = req->head.oid;
+ this->st.layout = req->head.layout;
this->st.result = result;
this->st.commit = commit;
- this->st.length = req->st.length; // speculative... OSD should ensure these are correct
- this->st.offset = req->st.offset;
- this->st.version = req->st.version;
+ this->st.length = req->head.length; // speculative... OSD should ensure these are correct
+ this->st.offset = req->head.offset;
+ this->st.version = req->head.reassert_version;
this->st.map_epoch = e;
}
epoch_t map_epoch;
// metadata from original request
- osdreqid_t reqid;
+ ceph_osd_reqid_t reqid;
// subop
pg_t pgid;
public:
const epoch_t get_map_epoch() { return st.map_epoch; }
- const osdreqid_t& get_reqid() { return st.reqid; }
+ const ceph_osd_reqid_t& get_reqid() { return st.reqid; }
bool wants_reply() {
if (st.op < 100) return true;
void set_peer_stat(const osd_peer_stat_t& stat) { st.peer_stat = stat; }
const osd_peer_stat_t& get_peer_stat() { return st.peer_stat; }
- MOSDSubOp(osdreqid_t r, pg_t p, pobject_t po, int o, off_t of, off_t le,
+ MOSDSubOp(ceph_osd_reqid_t r, pg_t p, pobject_t po, int o, off_t of, off_t le,
epoch_t mape, tid_t rtid, eversion_t v) :
Message(MSG_OSD_SUBOP) {
memset(&st, 0, sizeof(st));
epoch_t map_epoch;
// subop metadata
- osdreqid_t reqid;
+ ceph_osd_reqid_t reqid;
pg_t pgid;
tid_t rep_tid;
int32_t op;
name.v = i.name;
addr.v = i.addr;
}
+ entity_inst_t(const ceph_entity_name& n, const ceph_entity_addr &a) {
+ name.v = n;
+ addr.v = a;
+ }
};
if (now - my_stat.stamp > g_conf.osd_stat_refresh_interval ||
pending_ops > 2*my_stat.qlen) {
- my_stat.stamp = now;
+ my_stat.stamp = now.tv_ref();
my_stat.oprate = stat_oprate.get(now);
//read_latency_calc.set_size( 20 ); // hrm.
/**** mapping facilities ****/
// oid -> pg
- ObjectLayout file_to_object_layout(object_t oid, FileLayout& layout) {
+ ceph_object_layout_t file_to_object_layout(object_t oid, FileLayout& layout) {
return make_object_layout(oid, layout.fl_pg_type, layout.fl_pg_size, layout.fl_pg_preferred, layout.fl_object_stripe_unit);
}
- ObjectLayout make_object_layout(object_t oid, int pg_type, int pg_size, int preferred=-1, int object_stripe_unit = 0) {
+ ceph_object_layout_t make_object_layout(object_t oid, int pg_type, int pg_size, int preferred=-1, int object_stripe_unit = 0) {
int num = preferred >= 0 ? localized_pg_num:pg_num;
int num_mask = preferred >= 0 ? localized_pg_num_mask:pg_num_mask;
//cout << "preferred " << preferred << " num " << num << " mask " << num_mask << " ps " << ps << endl;
// construct object layout
- return ObjectLayout(pg_t(pg_type, pg_size, ps, preferred),
- object_stripe_unit);
+ pg_t pgid = pg_t(pg_type, pg_size, ps, preferred);
+ ceph_object_layout_t layout;
+ layout.pgid = pgid.u;
+ layout.stripe_unit = object_stripe_unit;
+ return layout;
}
object_t oid;
eversion_t version;
- osdreqid_t reqid; // caller+tid to uniquely identify request
+ ceph_osd_reqid_t reqid; // caller+tid to uniquely identify request
Entry() : op(0) {}
Entry(int _op, object_t _oid, const eversion_t& v,
- const osdreqid_t& rid) :
+ const ceph_osd_reqid_t& rid) :
op(_op), oid(_oid), version(v), reqid(rid) {}
bool is_delete() const { return op == DELETE; }
class IndexedLog : public Log {
public:
hash_map<object_t,Entry*> objects; // ptrs into log. be careful!
- hash_set<osdreqid_t> caller_ops;
+ hash_set<ceph_osd_reqid_t> caller_ops;
// recovery pointers
list<Entry>::iterator requested_to; // not inclusive of referenced item
bool logged_object(object_t oid) {
return objects.count(oid);
}
- bool logged_req(const osdreqid_t &r) {
+ bool logged_req(const ceph_osd_reqid_t &r) {
return caller_ops.count(r);
}
void trim_ondisklog_to(ObjectStore::Transaction& t, eversion_t v);
- bool is_dup(osdreqid_t rid) {
+ bool is_dup(ceph_osd_reqid_t rid) {
return log.logged_req(rid);
}
// a write will do something like
object_t oid = op->get_oid(); // logical object
pg_t pg = op->get_pg();
- ObjectLayout layout = op->get_layout();
+ ceph_object_layout_t layout = op->get_layout();
bufferlist data = op->get_data();
off_t off = op->get_offset();
off_t left = op->get_length();
balancing_reads.count(oid) == 0) {
dout(-10) << "preprocess_op balance-reads on " << oid << dendl;
balancing_reads.insert(oid);
+ ceph_object_layout_t layout;
+ layout.pgid = info.pgid.u;
+ layout.stripe_unit = 0;
MOSDOp *pop = new MOSDOp(osd->messenger->get_myinst(), 0, osd->get_tid(),
oid,
- ObjectLayout(info.pgid),
+ layout,
osd->osdmap->get_epoch(),
CEPH_OSD_OP_BALANCEREADS);
do_op(pop);
!unbalancing_reads.count(oid) == 0) {
dout(-10) << "preprocess_op unbalance-reads on " << oid << dendl;
unbalancing_reads.insert(oid);
+ ceph_object_layout_t layout;
+ layout.pgid = info.pgid.u;
+ layout.stripe_unit = 0;
MOSDOp *pop = new MOSDOp(osd->messenger->get_myinst(), 0, osd->get_tid(),
oid,
- ObjectLayout(info.pgid),
+ layout,
osd->osdmap->get_epoch(),
CEPH_OSD_OP_UNBALANCEREADS);
do_op(pop);
// MODIFY
void ReplicatedPG::prepare_log_transaction(ObjectStore::Transaction& t,
- osdreqid_t reqid, pobject_t poid, int op, eversion_t version,
+ ceph_osd_reqid_t reqid, pobject_t poid, int op, eversion_t version,
objectrev_t crev, objectrev_t rev,
eversion_t trim_to)
{
/** prepare_op_transaction
* apply an op to the store wrapped in a transaction.
*/
-void ReplicatedPG::prepare_op_transaction(ObjectStore::Transaction& t, const osdreqid_t& reqid,
+void ReplicatedPG::prepare_op_transaction(ObjectStore::Transaction& t, const ceph_osd_reqid_t& reqid,
pg_t pgid, int op, pobject_t poid,
off_t offset, off_t length, bufferlist& bl,
eversion_t& version, objectrev_t crev, objectrev_t rev)
if (repop->can_send_commit() &&
repop->op->wants_commit()) {
// send commit.
- if (repop->op->wants_reply()) {
- MOSDOpReply *reply = new MOSDOpReply(repop->op, 0, osd->osdmap->get_epoch(), true);
- dout(10) << "put_repop sending commit on " << *repop << " " << reply << dendl;
- osd->messenger->send_message(reply, repop->op->get_client_inst());
- }
+ MOSDOpReply *reply = new MOSDOpReply(repop->op, 0, osd->osdmap->get_epoch(), true);
+ dout(10) << "put_repop sending commit on " << *repop << " " << reply << dendl;
+ osd->messenger->send_message(reply, repop->op->get_client_inst());
repop->sent_commit = true;
}
apply_repop(repop);
// send ack
- if (repop->op->wants_reply()) {
- MOSDOpReply *reply = new MOSDOpReply(repop->op, 0, osd->osdmap->get_epoch(), false);
- dout(10) << "put_repop sending ack on " << *repop << " " << reply << dendl;
- osd->messenger->send_message(reply, repop->op->get_client_inst());
- } else {
- dout(10) << "put_repop NOT sending ack on " << *repop << dendl;
- }
+ MOSDOpReply *reply = new MOSDOpReply(repop->op, 0, osd->osdmap->get_epoch(), false);
+ dout(10) << "put_repop sending ack on " << *repop << " " << reply << dendl;
+ osd->messenger->send_message(reply, repop->op->get_client_inst());
repop->sent_ack = true;
utime_t now = g_clock.now();
ReplicatedPG::RepGather *ReplicatedPG::new_rep_gather(MOSDOp *op, tid_t rep_tid, eversion_t nv)
{
- dout(10) << "new_rep_gather rep_tid " << op->get_rep_tid() << " on " << *op << dendl;
+ dout(10) << "new_rep_gather rep_tid " << rep_tid << " on " << *op << dendl;
RepGather *repop = new RepGather(op, rep_tid, nv, info.last_complete);
// osds. commits all come to me.
assert(nv > log.top);
// will clone?
- if (crev && op->get_rev() && op->get_rev() > crev) {
+ if (crev && op->get_oid().rev && op->get_oid().rev > crev) {
clone_version = nv;
nv.version++;
}
nv.version = op->get_version().version;
// clone?
- if (crev && op->get_rev() && op->get_rev() > crev) {
+ if (crev && op->get_oid().rev && op->get_oid().rev > crev) {
// backstep clone
clone_version = nv;
clone_version.version--;
dout(-10) << "preprocess_op unbalancing-reads on " << op->get_oid() << dendl;
unbalancing_reads.insert(op->get_oid());
+ ceph_object_layout_t layout;
+ layout.pgid = info.pgid.u;
+ layout.stripe_unit = 0;
MOSDOp *pop = new MOSDOp(osd->messenger->get_myinst(), 0, osd->get_tid(),
op->get_oid(),
- ObjectLayout(info.pgid),
+ layout,
osd->osdmap->get_epoch(),
CEPH_OSD_OP_UNBALANCEREADS);
do_op(pop);
<< " " << oid
<< " v " << nv
//<< " crev " << crev
- << " rev " << op->get_rev()
+ << " rev " << op->get_oid().rev
<< " " << op->get_offset() << "~" << op->get_length()
<< dendl;
// log and update later.
pobject_t poid = oid;
prepare_log_transaction(repop->t, op->get_reqid(), poid, op->get_op(), nv,
- crev, op->get_rev(), peers_complete_thru);
+ crev, op->get_oid().rev, peers_complete_thru);
prepare_op_transaction(repop->t, op->get_reqid(),
info.pgid, op->get_op(), poid,
op->get_offset(), op->get_length(), op->get_data(),
- nv, crev, op->get_rev());
+ nv, crev, op->get_oid().rev);
}
// (logical) local ack.
<< dendl;
// send op
- osdreqid_t rid;
+ ceph_osd_reqid_t rid;
tid_t tid = osd->get_tid();
MOSDSubOp *subop = new MOSDSubOp(rid, info.pgid, poid, CEPH_OSD_OP_PULL,
0, 0,
osd->logger->inc("r_pushb", bl.length());
// send
- osdreqid_t rid; // useless?
+ ceph_osd_reqid_t rid; // useless?
MOSDSubOp *subop = new MOSDSubOp(rid, info.pgid, poid, CEPH_OSD_OP_PUSH, 0, bl.length(),
osd->osdmap->get_epoch(), osd->get_tid(), v);
subop->set_data(bl); // note: claims bl, set length above here!
void sub_op_modify_commit(MOSDSubOp *op, int ackerosd, eversion_t last_complete);
void prepare_log_transaction(ObjectStore::Transaction& t,
- osdreqid_t reqid, pobject_t poid, int op, eversion_t version,
+ ceph_osd_reqid_t reqid, pobject_t poid, int op, eversion_t version,
objectrev_t crev, objectrev_t rev,
eversion_t trim_to);
- void prepare_op_transaction(ObjectStore::Transaction& t, const osdreqid_t& reqid,
+ void prepare_op_transaction(ObjectStore::Transaction& t, const ceph_osd_reqid_t& reqid,
pg_t pgid, int op, pobject_t poid,
off_t offset, off_t length, bufferlist& bl,
eversion_t& version, objectrev_t crev, objectrev_t rev);
/* osdreqid_t - caller name + incarnation# + tid to unique identify this request
* use for metadata and osd ops.
*/
-class osdreqid_t {
-public:
- entity_name_t name; // who
- int32_t inc; // incarnation
- tid_t tid;
- osdreqid_t() : inc(0), tid(0) {}
- osdreqid_t(const entity_name_t& a, int i, tid_t t) : name(a), inc(i), tid(t) {}
-};
-inline ostream& operator<<(ostream& out, const osdreqid_t& r) {
+inline ostream& operator<<(ostream& out, const ceph_osd_reqid_t& r) {
return out << r.name << "." << r.inc << ":" << r.tid;
}
-inline bool operator==(const osdreqid_t& l, const osdreqid_t& r) {
+inline bool operator==(const ceph_osd_reqid_t& l, const ceph_osd_reqid_t& r) {
return (l.name == r.name) && (l.inc == r.inc) && (l.tid == r.tid);
}
-inline bool operator!=(const osdreqid_t& l, const osdreqid_t& r) {
+inline bool operator!=(const ceph_osd_reqid_t& l, const ceph_osd_reqid_t& r) {
return (l.name != r.name) || (l.inc != r.inc) || (l.tid != r.tid);
}
-inline bool operator<(const osdreqid_t& l, const osdreqid_t& r) {
+inline bool operator<(const ceph_osd_reqid_t& l, const ceph_osd_reqid_t& r) {
return (l.name < r.name) || (l.inc < r.inc) ||
(l.name == r.name && l.inc == r.inc && l.tid < r.tid);
}
-inline bool operator<=(const osdreqid_t& l, const osdreqid_t& r) {
+inline bool operator<=(const ceph_osd_reqid_t& l, const ceph_osd_reqid_t& r) {
return (l.name < r.name) || (l.inc < r.inc) ||
(l.name == r.name && l.inc == r.inc && l.tid <= r.tid);
}
-inline bool operator>(const osdreqid_t& l, const osdreqid_t& r) { return !(l <= r); }
-inline bool operator>=(const osdreqid_t& l, const osdreqid_t& r) { return !(l < r); }
+inline bool operator>(const ceph_osd_reqid_t& l, const ceph_osd_reqid_t& r) { return !(l <= r); }
+inline bool operator>=(const ceph_osd_reqid_t& l, const ceph_osd_reqid_t& r) { return !(l < r); }
namespace __gnu_cxx {
- template<> struct hash<osdreqid_t> {
- size_t operator()(const osdreqid_t &r) const {
+ template<> struct hash<ceph_osd_reqid_t> {
+ size_t operator()(const ceph_osd_reqid_t &r) const {
static blobhash H;
return H((const char*)&r, sizeof(r));
}
static const int TYPE_REP = CEPH_PG_TYPE_REP;
static const int TYPE_RAID4 = CEPH_PG_TYPE_RAID4;
-private:
+ //private:
union ceph_pg u;
public:
assert(sizeof(u.pg) == sizeof(u.pg64));
}
pg_t(uint64_t v) { u.pg64 = v; }
+ pg_t(const ceph_pg_t& cpg) {
+ u = cpg;
+ }
int type() { return u.pg.type; }
bool is_rep() { return type() == TYPE_REP; }
-/** ObjectLayout
- *
- * describes an object's placement and layout in the storage cluster.
- * most importatly, which pg it belongs to.
- * if that pg is raided, it also specifies the object's stripe_unit.
- */
-struct ObjectLayout {
- pg_t pgid; // what pg do i belong to
- int32_t stripe_unit; // for object raid in raid pgs
-
- ObjectLayout() : pgid(0), stripe_unit(0) { }
- ObjectLayout(pg_t p, int su=0) : pgid(p), stripe_unit(su) { }
-};
-
-inline ostream& operator<<(ostream& out, const ObjectLayout &ol)
+inline ostream& operator<<(ostream& out, const ceph_object_layout_t &ol)
{
out << "pg" << ol.pgid;
if (ol.stripe_unit)
version_t version;
eversion_t() : epoch(0), version(0) {}
eversion_t(epoch_t e, version_t v) : epoch(e), version(v) {}
+
+ eversion_t(const ceph_eversion& ce) : epoch(ce.epoch), version(ce.version) {}
+ operator ceph_eversion_t() {
+ ceph_eversion_t c;
+ c.epoch = epoch;
+ c.version = version;
+ return c;
+ }
};
inline bool operator==(const eversion_t& l, const eversion_t& r) {
pg_stat_t() : state(0), size(0), num_blocks(0), num_objects(0) {}
};
-
-
-struct osd_peer_stat_t {
- utime_t stamp;
- double oprate;
- double qlen;
- double recent_qlen;
- double read_latency;
- double read_latency_mine;
- double frac_rd_ops_shed_in;
- double frac_rd_ops_shed_out;
- osd_peer_stat_t() : oprate(0), qlen(0), recent_qlen(0),
- read_latency(0), read_latency_mine(0),
- frac_rd_ops_shed_in(0), frac_rd_ops_shed_out(0) {}
-};
+typedef struct ceph_osd_peer_stat osd_peer_stat_t;
inline ostream& operator<<(ostream& out, const osd_peer_stat_t &stat) {
return out << "stat(" << stat.stamp
off_t start; // in object
size_t length; // in object
- ObjectLayout layout; // object layout (pgid, etc.)
+ ceph_object_layout_t layout; // object layout (pgid, etc.)
map<size_t, size_t> buffer_extents; // off -> len. extents in buffer being mapped (may be fragmented bc of striping!)
object_t oid; // this _always_ is oid.rev=0
inodeno_t ino;
objectrev_t rev; // last rev we're written
- ObjectLayout layout;
+ ceph_object_layout_t layout;
public:
map<off_t, BufferHead*> data;
int rdlock_ref; // how many ppl want or are using a READ lock
public:
- Object(ObjectCacher *_oc, object_t o, inodeno_t i, ObjectLayout& l) :
+ Object(ObjectCacher *_oc, object_t o, inodeno_t i, ceph_object_layout_t& l) :
oc(_oc),
oid(o), ino(i), layout(l),
last_write_tid(0), last_ack_tid(0), last_commit_tid(0),
object_t get_oid() { return oid; }
inodeno_t get_ino() { return ino; }
- ObjectLayout& get_layout() { return layout; }
- void set_layout(ObjectLayout& l) { layout = l; }
+ ceph_object_layout_t& get_layout() { return layout; }
+ void set_layout(ceph_object_layout_t& l) { layout = l; }
bool can_close() {
return data.empty() && lock_state == LOCK_NONE &&
// objects
- Object *get_object(object_t oid, inodeno_t ino, ObjectLayout &l) {
+ Object *get_object(object_t oid, inodeno_t ino, ceph_object_layout_t &l) {
// have it?
if (objects.count(oid))
return objects[oid];
// stat -----------------------------------
-tid_t Objecter::stat(object_t oid, off_t *size, ObjectLayout ol, Context *onfinish)
+tid_t Objecter::stat(object_t oid, off_t *size, ceph_object_layout_t ol, Context *onfinish)
{
OSDStat *st = new OSDStat(size);
st->extents.push_back(ObjectExtent(oid, 0, 0));
// read -----------------------------------
-tid_t Objecter::read(object_t oid, off_t off, size_t len, ObjectLayout ol, bufferlist *bl,
+tid_t Objecter::read(object_t oid, off_t off, size_t len, ceph_object_layout_t ol, bufferlist *bl,
Context *onfinish)
{
OSDRead *rd = new OSDRead(bl);
// write ------------------------------------
-tid_t Objecter::write(object_t oid, off_t off, size_t len, ObjectLayout ol, bufferlist &bl,
+tid_t Objecter::write(object_t oid, off_t off, size_t len, ceph_object_layout_t ol, bufferlist &bl,
Context *onack, Context *oncommit)
{
OSDWrite *wr = new OSDWrite(bl);
// zero
-tid_t Objecter::zero(object_t oid, off_t off, size_t len, ObjectLayout ol,
+tid_t Objecter::zero(object_t oid, off_t off, size_t len, ceph_object_layout_t ol,
Context *onack, Context *oncommit)
{
OSDModify *z = new OSDModify(CEPH_OSD_OP_ZERO);
// lock ops
-tid_t Objecter::lock(int op, object_t oid, ObjectLayout ol,
+tid_t Objecter::lock(int op, object_t oid, ceph_object_layout_t ol,
Context *onack, Context *oncommit)
{
OSDModify *l = new OSDModify(op);
//tid_t lockx(OSDLock *l, Context *onack, Context *oncommit);
// even lazier
- tid_t read(object_t oid, off_t off, size_t len, ObjectLayout ol, bufferlist *bl,
+ tid_t read(object_t oid, off_t off, size_t len, ceph_object_layout_t ol, bufferlist *bl,
Context *onfinish);
- tid_t write(object_t oid, off_t off, size_t len, ObjectLayout ol, bufferlist &bl,
+ tid_t write(object_t oid, off_t off, size_t len, ceph_object_layout_t ol, bufferlist &bl,
Context *onack, Context *oncommit);
- tid_t zero(object_t oid, off_t off, size_t len, ObjectLayout ol,
+ tid_t zero(object_t oid, off_t off, size_t len, ceph_object_layout_t ol,
Context *onack, Context *oncommit);
- tid_t stat(object_t oid, off_t *size, ObjectLayout ol, Context *onfinish);
+ tid_t stat(object_t oid, off_t *size, ceph_object_layout_t ol, Context *onfinish);
- tid_t lock(int op, object_t oid, ObjectLayout ol, Context *onack, Context *oncommit);
+ tid_t lock(int op, object_t oid, ceph_object_layout_t ol, Context *onack, Context *oncommit);
void ms_handle_failure(Message *m, entity_name_t dest, const entity_inst_t& inst);