From: Sage Weil Date: Mon, 9 Jun 2008 18:40:10 +0000 (-0700) Subject: os: remove read ops from transactions X-Git-Tag: v0.3~138 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=06d43b7895e7f47044ba6a21dbc202570e57c824;p=ceph.git os: remove read ops from transactions --- diff --git a/src/ebofs/Ebofs.cc b/src/ebofs/Ebofs.cc index e05bbdc21ca..5a79e4ebffc 100644 --- a/src/ebofs/Ebofs.cc +++ b/src/ebofs/Ebofs.cc @@ -2464,64 +2464,7 @@ unsigned Ebofs::_apply_transaction(Transaction& t) while (t.have_op()) { int op = t.get_op(); switch (op) { - case Transaction::OP_READ: - { - pobject_t oid; - t.get_oid(oid); - __u64 offset, len; - t.get_length(offset); - t.get_length(len); - bufferlist *pbl; - t.get_pbl(pbl); - if (_read(oid, offset, len, *pbl) < 0) { - dout(7) << "apply_transaction fail on _read" << dendl; - r &= bit; - } - } - break; - - case Transaction::OP_STAT: - { - pobject_t oid; - t.get_oid(oid); - struct stat *st; - t.get_pstat(st); - if (_stat(oid, st) < 0) { - dout(7) << "apply_transaction fail on _stat" << dendl; - r &= bit; - } - } - break; - - case Transaction::OP_GETATTR: - { - pobject_t oid; - t.get_oid(oid); - const char *attrname; - t.get_attrname(attrname); - pair pattrval; - t.get_pattrval(pattrval); - if ((*(pattrval.second) = _getattr(oid, attrname, pattrval.first, *(pattrval.second))) < 0) { - dout(7) << "apply_transaction fail on _getattr" << dendl; - r &= bit; - } - } - break; - - case Transaction::OP_GETATTRS: - { - pobject_t oid; - t.get_oid(oid); - map *pset; - t.get_pattrset(pset); - if (_getattrs(oid, *pset) < 0) { - dout(7) << "apply_transaction fail on _getattrs" << dendl; - r &= bit; - } - } - break; - - + case Transaction::OP_WRITE: { pobject_t oid; diff --git a/src/os/FileStore.cc b/src/os/FileStore.cc index dbe84670d5b..26cf7a10ac2 100644 --- a/src/os/FileStore.cc +++ b/src/os/FileStore.cc @@ -426,7 +426,7 @@ unsigned FileStore::apply_transaction(Transaction &t, Context *onsafe) return ObjectStore::apply_transaction(t, onsafe); // create transaction - int len = t.get_len() * 30; // very conservative! FIXME FIXME FIXME + int len = t.get_btrfs_len(); dout(20) << "apply_transaction allocation btrfs usertrans len " << len << dendl; btrfs_ioctl_usertrans *trans = (btrfs_ioctl_usertrans *)new char[sizeof(*trans) + len * sizeof(trans->ops[0])]; @@ -439,56 +439,7 @@ unsigned FileStore::apply_transaction(Transaction &t, Context *onsafe) int op = t.get_op(); switch (op) { - case Transaction::OP_READ: - { - coll_t cid; - pobject_t oid; - __u64 offset, len; - t.get_cid(cid); - t.get_oid(oid); - t.get_length(offset); - t.get_length(len); - bufferlist *pbl; - t.get_pbl(pbl); - read(cid, oid, offset, len, *pbl); - } - break; - case Transaction::OP_STAT: - { - coll_t cid; - t.get_cid(cid); - pobject_t oid; - t.get_oid(oid); - struct stat *st; - t.get_pstat(st); - stat(cid, oid, st); - } - break; - case Transaction::OP_GETATTR: - { - coll_t cid; - t.get_cid(cid); - pobject_t oid; - t.get_oid(oid); - const char *attrname; - t.get_attrname(attrname); - pair pattrval; - t.get_pattrval(pattrval); - *pattrval.second = getattr(cid, oid, attrname, pattrval.first, *pattrval.second); - } - break; - case Transaction::OP_GETATTRS: - { - coll_t cid; - t.get_cid(cid); - pobject_t oid; - t.get_oid(oid); - map *pset; - t.get_pattrset(pset); - getattrs(cid, oid, *pset); - } - break; - + case Transaction::OP_WRITE: case Transaction::OP_ZERO: // write actual zeros. { @@ -616,24 +567,32 @@ unsigned FileStore::apply_transaction(Transaction &t, Context *onsafe) trans->len++; } break; + + case Transaction::OP_SETATTRS: + case Transaction::OP_COLL_SETATTRS: { - coll_t cid; - t.get_cid(cid); - pobject_t oid; - t.get_oid(oid); - map *pattrset; - t.get_pattrset(pattrset); - //setattrs(cid, oid, *pattrset, 0); - // make note of old attrs map oldattrs; - getattrs(cid, oid, oldattrs); - - dout(10) << "setattrs " << cid << " " << oid << dendl; char *fn = new char[80]; str.push_back(fn); - get_coname(cid, oid, fn); + + if (op == Transaction::OP_SETATTRS) { + coll_t cid; + t.get_cid(cid); + pobject_t oid; + t.get_oid(oid); + getattrs(cid, oid, oldattrs); + get_coname(cid, oid, fn); + } else { + coll_t cid; + t.get_cid(cid); + collection_getattrs(cid, oldattrs); + get_cdir(cid, fn); + } + map *pattrset; + t.get_pattrset(pattrset); + for (map::iterator p = pattrset->begin(); p != pattrset->end(); p++) { @@ -713,10 +672,10 @@ unsigned FileStore::apply_transaction(Transaction &t, Context *onsafe) trans->ops[trans->len].args[0] = -2; trans->ops[trans->len].args[1] = -1; trans->len++; - trans->ops[trans->len].op = BTRFS_IOC_USERTRANS_CLONE; + trans->ops[trans->len].op = BTRFS_IOC_USERTRANS_CLOSE; trans->ops[trans->len].args[0] = -1; trans->len++; - trans->ops[trans->len].op = BTRFS_IOC_USERTRANS_CLONE; + trans->ops[trans->len].op = BTRFS_IOC_USERTRANS_CLOSE; trans->ops[trans->len].args[0] = -2; trans->len++; } @@ -817,9 +776,9 @@ unsigned FileStore::apply_transaction(Transaction &t, Context *onsafe) trans->ops[trans->len].args[0] = (__u64)fn; trans->ops[trans->len].args[1] = (__u64)attrname; trans->len++; - } + } break; - + default: cerr << "bad op " << op << std::endl; @@ -828,6 +787,7 @@ unsigned FileStore::apply_transaction(Transaction &t, Context *onsafe) } dout(20) << "apply_transaction final btrfs usertrans len is " << trans->len << dendl; + assert((int)trans->len <= (int)len); // apply int r = 0; diff --git a/src/os/JournalingObjectStore.h b/src/os/JournalingObjectStore.h index ff31a373513..fcb6c794b94 100644 --- a/src/os/JournalingObjectStore.h +++ b/src/os/JournalingObjectStore.h @@ -200,7 +200,7 @@ protected: } else queue_commit_waiter(onsafe); } - + void journal_collection_setattrs(coll_t cid, map& aset, Context *onsafe) { if (journal && journal->is_writeable()) { Transaction t; @@ -211,7 +211,7 @@ protected: } else queue_commit_waiter(onsafe); } - + void journal_sync(Context *onsafe) { if (journal) { // journal empty transaction diff --git a/src/os/ObjectStore.h b/src/os/ObjectStore.h index b3b2caeb7fb..8c9932c3279 100644 --- a/src/os/ObjectStore.h +++ b/src/os/ObjectStore.h @@ -43,6 +43,9 @@ using std::list; */ +static inline void encode(const map *attrset, bufferlist &bl) { + ::encode(*attrset, bl); +} class ObjectStore { public: @@ -73,16 +76,11 @@ public: */ class Transaction { public: - static const int OP_READ = 1; // oid, offset, len, pbl - static const int OP_STAT = 2; // oid, pstat - static const int OP_GETATTR = 3; // oid, attrname, pattrval - static const int OP_GETATTRS = 4; // oid, pattrset - static const int OP_WRITE = 10; // oid, offset, len, bl static const int OP_ZERO = 11; // oid, offset, len static const int OP_TRUNCATE = 12; // oid, len static const int OP_REMOVE = 13; // oid - static const int OP_SETATTR = 14; // oid, attrname, attrval + static const int OP_SETATTR = 14; // oid, attrname, bl static const int OP_SETATTRS = 15; // oid, attrset static const int OP_RMATTR = 16; // oid, attrname static const int OP_CLONE = 17; // oid, newoid @@ -93,27 +91,29 @@ public: static const int OP_RMCOLL = 21; // cid static const int OP_COLL_ADD = 22; // cid, oid static const int OP_COLL_REMOVE = 23; // cid, oid - static const int OP_COLL_SETATTR = 24; // cid, attrname, attrval + static const int OP_COLL_SETATTR = 24; // cid, attrname, bl static const int OP_COLL_RMATTR = 25; // cid, attrname static const int OP_COLL_SETATTRS = 26; // cid, attrset private: + int len; + int blen; // for btrfs transactions list ops; list bls; list oids; list cids; list lengths; + + // for these guys, just use a pointer. + // but, decode to a full value, and create pointers to that. list attrnames; list attrnames2; - - // for reads only (not encoded) - list pbls; - list psts; - list< pair > pattrvals; - list< map* > pattrsets; + list *> attrsets; + list > attrsets2; public: - int get_len() { return ops.size(); } // FIXME maintain a counter? + int get_len() { return len ? len : ops.size(); } + int get_btrfs_len() { return blen; } bool have_op() { return !ops.empty(); @@ -144,54 +144,9 @@ public: p = attrnames.front(); attrnames.pop_front(); } - void get_pbl(bufferlist* &pbl) { - pbl = pbls.front(); - pbls.pop_front(); - } - void get_pstat(struct stat* &pst) { - pst = psts.front(); - psts.pop_front(); - } - void get_pattrval(pair& p) { - p = pattrvals.front(); - pattrvals.pop_front(); - } void get_pattrset(map* &ps) { - ps = pattrsets.front(); - pattrsets.pop_front(); - } - - - void read(coll_t cid, pobject_t oid, __u64 off, size_t len, bufferlist *pbl) { - int op = OP_READ; - ops.push_back(op); - cids.push_back(cid); - oids.push_back(oid); - lengths.push_back(off); - lengths.push_back(len); - pbls.push_back(pbl); - } - void stat(coll_t cid, pobject_t oid, struct stat *st) { - int op = OP_STAT; - ops.push_back(op); - cids.push_back(cid); - oids.push_back(oid); - psts.push_back(st); - } - void getattr(coll_t cid, pobject_t oid, const char* name, void* val, int *plen) { - int op = OP_GETATTR; - ops.push_back(op); - cids.push_back(cid); - oids.push_back(oid); - attrnames.push_back(name); - pattrvals.push_back(pair(val,plen)); - } - void getattrs(coll_t cid, pobject_t oid, map& aset) { - int op = OP_GETATTRS; - ops.push_back(op); - cids.push_back(cid); - oids.push_back(oid); - pattrsets.push_back(&aset); + ps = attrsets.front(); + attrsets.pop_front(); } void write(coll_t cid, pobject_t oid, __u64 off, size_t len, const bufferlist& bl) { @@ -202,6 +157,8 @@ public: lengths.push_back(off); lengths.push_back(len); bls.push_back(bl); + len++; + blen += 3 + bl.buffers().size(); } void zero(coll_t cid, pobject_t oid, __u64 off, size_t len) { int op = OP_ZERO; @@ -210,6 +167,8 @@ public: oids.push_back(oid); lengths.push_back(off); lengths.push_back(len); + len++; + blen += 3 + 1; } void trim_from_cache(coll_t cid, pobject_t oid, __u64 off, size_t len) { int op = OP_TRIMCACHE; @@ -218,6 +177,7 @@ public: oids.push_back(oid); lengths.push_back(off); lengths.push_back(len); + len++; } void truncate(coll_t cid, pobject_t oid, __u64 off) { int op = OP_TRUNCATE; @@ -225,12 +185,16 @@ public: cids.push_back(cid); oids.push_back(oid); lengths.push_back(off); + len++; + blen++; } void remove(coll_t cid, pobject_t oid) { int op = OP_REMOVE; ops.push_back(op); cids.push_back(cid); oids.push_back(oid); + len++; + blen++; } void setattr(coll_t cid, pobject_t oid, const char* name, const void* val, int len) { int op = OP_SETATTR; @@ -238,17 +202,20 @@ public: cids.push_back(cid); oids.push_back(oid); attrnames.push_back(name); - //attrvals.push_back(pair(val,len)); bufferlist bl; bl.append((char*)val,len); bls.push_back(bl); + len++; + blen++; } void setattrs(coll_t cid, pobject_t oid, map& attrset) { int op = OP_SETATTRS; ops.push_back(op); cids.push_back(cid); oids.push_back(oid); - pattrsets.push_back(&attrset); + attrsets.push_back(&attrset); + len++; + blen += 5 + attrset.size(); // HACK allowance for removing old attrs } void rmattr(coll_t cid, pobject_t oid, const char* name) { int op = OP_RMATTR; @@ -256,6 +223,8 @@ public: cids.push_back(cid); oids.push_back(oid); attrnames.push_back(name); + len++; + blen++; } void clone(coll_t cid, pobject_t oid, pobject_t noid) { int op = OP_CLONE; @@ -263,16 +232,22 @@ public: cids.push_back(cid); oids.push_back(oid); oids.push_back(noid); + len++; + blen += 5; } void create_collection(coll_t cid) { int op = OP_MKCOLL; ops.push_back(op); cids.push_back(cid); + len++; + blen++; } void remove_collection(coll_t cid) { int op = OP_RMCOLL; ops.push_back(op); cids.push_back(cid); + len++; + blen++; } void collection_add(coll_t cid, coll_t ocid, pobject_t oid) { int op = OP_COLL_ADD; @@ -280,13 +255,17 @@ public: cids.push_back(cid); cids.push_back(ocid); oids.push_back(oid); + len++; + blen++; } void collection_remove(coll_t cid, pobject_t oid) { int op = OP_COLL_REMOVE; ops.push_back(op); cids.push_back(cid); oids.push_back(oid); - } + len++; + blen++; + } void collection_setattr(coll_t cid, const char* name, const void* val, int len) { int op = OP_COLL_SETATTR; ops.push_back(op); @@ -295,24 +274,31 @@ public: bufferlist bl; bl.append((char*)val, len); bls.push_back(bl); + len++; + blen++; } void collection_rmattr(coll_t cid, const char* name) { int op = OP_COLL_RMATTR; ops.push_back(op); cids.push_back(cid); attrnames.push_back(name); + len++; + blen++; } void collection_setattrs(coll_t cid, map& aset) { int op = OP_COLL_SETATTRS; ops.push_back(op); cids.push_back(cid); - pattrsets.push_back(&aset); + attrsets.push_back(&aset); + len++; + blen += 5 + aset.size(); } + // etc. - Transaction() {} - Transaction(bufferlist::iterator &p) { decode(p); } - Transaction(bufferlist &bl) { + Transaction() : len(0) {} + Transaction(bufferlist::iterator &p) : len(0) { decode(p); } + Transaction(bufferlist &bl) : len(0) { bufferlist::iterator p = bl.begin(); decode(p); } @@ -324,6 +310,7 @@ public: ::encode(cids, bl); ::encode(lengths, bl); ::encode(attrnames, bl); + ::encode(attrsets, bl); } void decode(bufferlist::iterator &bl) { ::decode(ops, bl); @@ -336,6 +323,11 @@ public: p != attrnames2.end(); ++p) attrnames.push_back((*p).c_str()); + ::decode(attrsets2, bl); + for (list >::iterator p = attrsets2.begin(); + p != attrsets2.end(); + ++p) + attrsets.push_back(&(*p)); } }; @@ -351,56 +343,6 @@ public: while (t.have_op()) { int op = t.get_op(); switch (op) { - case Transaction::OP_READ: - { - coll_t cid; - pobject_t oid; - __u64 offset, len; - t.get_cid(cid); - t.get_oid(oid); - t.get_length(offset); - t.get_length(len); - bufferlist *pbl; - t.get_pbl(pbl); - read(cid, oid, offset, len, *pbl); - } - break; - case Transaction::OP_STAT: - { - coll_t cid; - t.get_cid(cid); - pobject_t oid; - t.get_oid(oid); - struct stat *st; - t.get_pstat(st); - stat(cid, oid, st); - } - break; - case Transaction::OP_GETATTR: - { - coll_t cid; - t.get_cid(cid); - pobject_t oid; - t.get_oid(oid); - const char *attrname; - t.get_attrname(attrname); - pair pattrval; - t.get_pattrval(pattrval); - *pattrval.second = getattr(cid, oid, attrname, pattrval.first, *pattrval.second); - } - break; - case Transaction::OP_GETATTRS: - { - coll_t cid; - t.get_cid(cid); - pobject_t oid; - t.get_oid(oid); - map *pset; - t.get_pattrset(pset); - getattrs(cid, oid, *pset); - } - break; - case Transaction::OP_WRITE: { coll_t cid; diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index bf61abaf0ab..31a24e03961 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -1407,15 +1407,12 @@ void ReplicatedPG::push(pobject_t poid, int peer) // read data+attrs bufferlist bl; eversion_t v; - int vlen = sizeof(v); + size_t vlen = sizeof(v); map attrset; - ObjectStore::Transaction t; - t.read(info.pgid, poid, 0, 0, &bl); - t.getattr(info.pgid, poid, "version", &v, &vlen); - t.getattrs(info.pgid, poid, attrset); - unsigned tr = osd->store->apply_transaction(t); - assert(tr == 0); // !!! + osd->store->read(info.pgid, poid, 0, 0, bl); + osd->store->getattr(info.pgid, poid, "version", &v, vlen); + osd->store->getattrs(info.pgid, poid, attrset); // ok dout(7) << "push " << poid << " v " << v