From a32d6d32c1a92d3bc45399e235a7e80edd551fdd Mon Sep 17 00:00:00 2001 From: sageweil Date: Mon, 26 Feb 2007 00:13:49 +0000 Subject: [PATCH] fixed pg log storage (and the stupid recovery problems); fakestore cleanup git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@1129 29311d96-e01e-0410-9327-a35deaab8ce9 --- branches/riccardo/monitor2/include/object.h | 1 + branches/riccardo/monitor2/osd/FakeStore.cc | 5 ++- branches/riccardo/monitor2/osd/FakeStore.h | 45 ++++++-------------- branches/riccardo/monitor2/osd/OSD.cc | 2 +- branches/riccardo/monitor2/osd/ObjectStore.h | 8 ++-- branches/riccardo/monitor2/osd/PG.cc | 41 +++++++++++++----- branches/riccardo/monitor2/osd/PG.h | 6 +-- branches/riccardo/monitor2/osd/osd_types.h | 19 ++++++--- 8 files changed, 71 insertions(+), 56 deletions(-) diff --git a/branches/riccardo/monitor2/include/object.h b/branches/riccardo/monitor2/include/object.h index 3a66c4ab83d54..9773ecb4b3288 100644 --- a/branches/riccardo/monitor2/include/object.h +++ b/branches/riccardo/monitor2/include/object.h @@ -30,6 +30,7 @@ struct object_t { object_t() : ino(0), bno(0), rev(0) {} object_t(__uint64_t i, __uint32_t b) : ino(i), bno(b), rev(0) {} + object_t(__uint64_t i, __uint32_t b, __uint32_t r) : ino(i), bno(b), rev(r) {} }; diff --git a/branches/riccardo/monitor2/osd/FakeStore.cc b/branches/riccardo/monitor2/osd/FakeStore.cc index 1c8ff259e08e9..1ff08530e4cfd 100644 --- a/branches/riccardo/monitor2/osd/FakeStore.cc +++ b/branches/riccardo/monitor2/osd/FakeStore.cc @@ -452,6 +452,7 @@ int FakeStore::rmattr(object_t oid, const char *name, Context *onsafe) return r; } +/* int FakeStore::listattr(object_t oid, char *attrls, size_t size) { if (fake_attrs) return attrs.listattr(oid, attrls, size); @@ -459,6 +460,7 @@ int FakeStore::listattr(object_t oid, char *attrls, size_t size) get_oname(oid, fn); return ::listxattr(fn, attrls, size); } +*/ // collections @@ -485,12 +487,13 @@ int FakeStore::collection_getattr(coll_t c, const char *name, return 0; } +/* int FakeStore::collection_listattr(coll_t c, char *attrs, size_t size) { if (fake_attrs) return collection_listattr(c, attrs, size); return 0; } - +*/ // -------------------------- // collections diff --git a/branches/riccardo/monitor2/osd/FakeStore.h b/branches/riccardo/monitor2/osd/FakeStore.h index b4bf822aa294a..4ad2cb4a054e8 100644 --- a/branches/riccardo/monitor2/osd/FakeStore.h +++ b/branches/riccardo/monitor2/osd/FakeStore.h @@ -76,50 +76,33 @@ class FakeStore : public ObjectStore { int stat(object_t oid, struct stat *st); int remove(object_t oid, Context *onsafe); int truncate(object_t oid, off_t size, Context *onsafe); - int read(object_t oid, - off_t offset, size_t len, - bufferlist& bl); - int write(object_t oid, - off_t offset, size_t len, - bufferlist& bl, - Context *onsafe); + int read(object_t oid, off_t offset, size_t len, bufferlist& bl); + int write(object_t oid, off_t offset, size_t len, bufferlist& bl, Context *onsafe); void sync(); void sync(Context *onsafe); // attrs - int setattr(object_t oid, const char *name, - const void *value, size_t size, - Context *onsafe=0); + int setattr(object_t oid, const char *name, const void *value, size_t size, Context *onsafe=0); int setattrs(object_t oid, map& aset); - int getattr(object_t oid, const char *name, - void *value, size_t size); + int getattr(object_t oid, const char *name, void *value, size_t size); int getattrs(object_t oid, map& aset); - int rmattr(object_t oid, const char *name, - Context *onsafe=0); - int listattr(object_t oid, char *attrs, size_t size); - int collection_setattr(coll_t c, const char *name, - void *value, size_t size, - Context *onsafe=0); - int collection_rmattr(coll_t c, const char *name, - Context *onsafe=0); - int collection_getattr(coll_t c, const char *name, - void *value, size_t size); - int collection_listattr(coll_t c, char *attrs, size_t size); + int rmattr(object_t oid, const char *name, Context *onsafe=0); + //int listattr(object_t oid, char *attrs, size_t size); + int collection_setattr(coll_t c, const char *name, void *value, size_t size, Context *onsafe=0); + int collection_rmattr(coll_t c, const char *name, Context *onsafe=0); + int collection_getattr(coll_t c, const char *name, void *value, size_t size); + //int collection_listattr(coll_t c, char *attrs, size_t size); // collections int list_collections(list& ls); - int create_collection(coll_t c, - Context *onsafe=0); - int destroy_collection(coll_t c, - Context *onsafe=0); + int create_collection(coll_t c, Context *onsafe=0); + int destroy_collection(coll_t c, Context *onsafe=0); int collection_stat(coll_t c, struct stat *st); bool collection_exists(coll_t c); - int collection_add(coll_t c, object_t o, - Context *onsafe=0); - int collection_remove(coll_t c, object_t o, - Context *onsafe=0); + int collection_add(coll_t c, object_t o, Context *onsafe=0); + int collection_remove(coll_t c, object_t o, Context *onsafe=0); int collection_list(coll_t c, list& o); }; diff --git a/branches/riccardo/monitor2/osd/OSD.cc b/branches/riccardo/monitor2/osd/OSD.cc index b1290b132cac9..0d075ff4123f2 100644 --- a/branches/riccardo/monitor2/osd/OSD.cc +++ b/branches/riccardo/monitor2/osd/OSD.cc @@ -435,7 +435,7 @@ void OSD::_remove_pg(pg_t pgid) p++) t.remove(*p); t.remove_collection(pgid); - t.remove(object_t(1,pgid)); // log too + t.remove(pgid.to_object()); // log too } store->apply_transaction(t); diff --git a/branches/riccardo/monitor2/osd/ObjectStore.h b/branches/riccardo/monitor2/osd/ObjectStore.h index 70bc92dd653f7..9ff94adfcae99 100644 --- a/branches/riccardo/monitor2/osd/ObjectStore.h +++ b/branches/riccardo/monitor2/osd/ObjectStore.h @@ -466,7 +466,7 @@ public: return -1; } - virtual int listattr(object_t oid, char *attrs, size_t size) {return 0;} //= 0; + //virtual int listattr(object_t oid, char *attrs, size_t size) {return 0;} //= 0; // collections virtual int list_collections(list& ls) {return 0;}//= 0; @@ -489,10 +489,10 @@ public: Context *onsafe=0) {return 0;} //= 0; virtual int collection_getattr(coll_t cid, const char *name, void *value, size_t size) {return 0;} //= 0; - virtual int collection_listattr(coll_t cid, char *attrs, size_t size) {return 0;} //= 0; + //virtual int collection_listattr(coll_t cid, char *attrs, size_t size) {return 0;} //= 0; - virtual void sync(Context *onsync) {}; - virtual void sync() {}; + virtual void sync(Context *onsync) {} + virtual void sync() {} virtual void _fake_writes(bool b) {}; diff --git a/branches/riccardo/monitor2/osd/PG.cc b/branches/riccardo/monitor2/osd/PG.cc index fb87630b3306f..218f9eac36aae 100644 --- a/branches/riccardo/monitor2/osd/PG.cc +++ b/branches/riccardo/monitor2/osd/PG.cc @@ -826,7 +826,7 @@ void PG::activate(ObjectStore::Transaction& t) state_set(STATE_ACTIVE); state_clear(STATE_STRAY); if (is_crashed()) { - assert(is_replay()); + //assert(is_replay()); // HELP.. not on replica? state_clear(STATE_CRASHED); state_clear(STATE_REPLAY); } @@ -1174,6 +1174,8 @@ void PG::clean_replicas() void PG::write_log(ObjectStore::Transaction& t) { + dout(10) << "write_log" << endl; + // assemble buffer bufferlist bl; @@ -1186,12 +1188,16 @@ void PG::write_log(ObjectStore::Transaction& t) if (bl.length() % 4096 == 0) ondisklog.block_map[bl.length()] = p->version; bl.append((char*)&(*p), sizeof(*p)); + if (g_conf.osd_pad_pg_log) { // pad to 4k, until i fix ebofs reallocation crap. FIXME. + bufferptr bp(4096 - sizeof(*p)); + bl.push_back(bp); + } } ondisklog.top = bl.length(); // write it - t.remove( object_t(1,info.pgid) ); - t.write( object_t(1,info.pgid) , 0, bl.length(), bl); + t.remove( info.pgid.to_object() ); + t.write( info.pgid.to_object() , 0, bl.length(), bl); t.collection_setattr(info.pgid, "ondisklog_bottom", &ondisklog.bottom, sizeof(ondisklog.bottom)); t.collection_setattr(info.pgid, "ondisklog_top", &ondisklog.top, sizeof(ondisklog.top)); @@ -1234,6 +1240,8 @@ void PG::trim_ondisklog_to(ObjectStore::Transaction& t, eversion_t v) void PG::append_log(ObjectStore::Transaction& t, PG::Log::Entry& logentry, eversion_t trim_to) { + dout(10) << "append_log " << ondisklog.top << " " << logentry << endl; + // write entry on disk bufferlist bl; bl.append( (char*)&logentry, sizeof(logentry) ); @@ -1241,7 +1249,7 @@ void PG::append_log(ObjectStore::Transaction& t, PG::Log::Entry& logentry, bufferptr bp(4096 - sizeof(logentry)); bl.push_back(bp); } - t.write( object_t(1,info.pgid), ondisklog.top, bl.length(), bl ); + t.write( info.pgid.to_object(), ondisklog.top, bl.length(), bl ); // update block map? if (ondisklog.top % 4096 == 0) @@ -1263,30 +1271,43 @@ void PG::append_log(ObjectStore::Transaction& t, PG::Log::Entry& logentry, void PG::read_log(ObjectStore *store) { + int r; // load bounds ondisklog.bottom = ondisklog.top = 0; - store->collection_getattr(info.pgid, "ondisklog_bottom", &ondisklog.bottom, sizeof(ondisklog.bottom)); - store->collection_getattr(info.pgid, "ondisklog_top", &ondisklog.top, sizeof(ondisklog.top)); - + r = store->collection_getattr(info.pgid, "ondisklog_bottom", &ondisklog.bottom, sizeof(ondisklog.bottom)); + assert(r == sizeof(ondisklog.bottom)); + r = store->collection_getattr(info.pgid, "ondisklog_top", &ondisklog.top, sizeof(ondisklog.top)); + assert(r == sizeof(ondisklog.top)); + + dout(10) << "read_log [" << ondisklog.bottom << "," << ondisklog.top << ")" << endl; + log.backlog = info.log_backlog; log.bottom = info.log_bottom; if (ondisklog.top > 0) { // read bufferlist bl; - store->read(object_t(1,info.pgid), ondisklog.bottom, ondisklog.top-ondisklog.bottom, bl); + store->read(info.pgid.to_object(), ondisklog.bottom, ondisklog.top-ondisklog.bottom, bl); PG::Log::Entry e; off_t pos = ondisklog.bottom; + assert(log.log.empty()); while (pos < ondisklog.top) { bl.copy(pos-ondisklog.bottom, sizeof(e), (char*)&e); + dout(10) << "read_log " << pos << " " << e << endl; + if (e.version > log.bottom || log.backlog) { // ignore items below log.bottom if (pos % 4096 == 0) - ondisklog.block_map[pos] = e.version; + ondisklog.block_map[pos] = e.version; log.log.push_back(e); + } else { + dout(10) << "read_log ignoring entry at " << pos << endl; } - pos += sizeof(e); + if (g_conf.osd_pad_pg_log) // pad to 4k, until i fix ebofs reallocation crap. FIXME. + pos += 4096; + else + pos += sizeof(e); } } log.top = info.last_update; diff --git a/branches/riccardo/monitor2/osd/PG.h b/branches/riccardo/monitor2/osd/PG.h index 6d6de985eaf8e..f3b00cf935f91 100644 --- a/branches/riccardo/monitor2/osd/PG.h +++ b/branches/riccardo/monitor2/osd/PG.h @@ -613,7 +613,7 @@ inline ostream& operator<<(ostream& out, const PG::Info::History& h) inline ostream& operator<<(ostream& out, const PG::Info& pgi) { - out << "pginfo(" << hex << pgi.pgid << dec; + out << "pginfo(" << pgi.pgid; if (pgi.is_empty()) out << " empty"; else @@ -669,8 +669,8 @@ inline ostream& operator<<(ostream& out, const PG& pg) !pg.log.backlog) || (pg.log.log.rbegin()->version.version != pg.log.top.version)) { out << " (log bound mismatch, actual=[" - << pg.log.log.begin()->version << "," - << pg.log.log.rbegin()->version << "])"; + << pg.log.log.begin()->version << "," + << pg.log.log.rbegin()->version << "] len=" << pg.log.log.size() << ")"; } } diff --git a/branches/riccardo/monitor2/osd/osd_types.h b/branches/riccardo/monitor2/osd/osd_types.h index e86c074fa1b15..f8656e1f3e178 100644 --- a/branches/riccardo/monitor2/osd/osd_types.h +++ b/branches/riccardo/monitor2/osd/osd_types.h @@ -16,10 +16,12 @@ #include "include/reqid.h" +#define PG_INO 1 + + // osd types typedef __uint64_t coll_t; // collection id - // pg stuff typedef __uint16_t ps_t; typedef __uint8_t pruleset_t; @@ -28,13 +30,14 @@ typedef __uint8_t pruleset_t; struct pg_t { union { struct { - int preferred; - ps_t ps; - __uint8_t nrep; - pruleset_t ruleset; + __uint32_t preferred:32; // 32 + ps_t ps:16; // 16 + __uint8_t nrep:8; // 8 + pruleset_t ruleset:8; // 8 } fields; - __uint64_t val; + __uint64_t val; // 64 } u; + pg_t() { u.val = 0; } pg_t(const pg_t& o) { u.val = o.u.val; } pg_t(ps_t s, int p, unsigned char n, pruleset_t r=0) { @@ -52,6 +55,8 @@ struct pg_t { pg_t operator++() { ++u.val; return *this; } */ operator __uint64_t() const { return u.val; } + + object_t to_object() const { return object_t(PG_INO, u.val >> 32, u.val & 0xffffffff); } }; inline ostream& operator<<(ostream& out, pg_t pg) { @@ -62,6 +67,8 @@ inline ostream& operator<<(ostream& out, pg_t pg) { if (pg.u.fields.preferred) out << pg.u.fields.preferred << '.'; out << hex << pg.u.fields.ps << dec; + out << "=" << hex << pg.u.val << dec; + out << "=" << hex << (__uint64_t)pg << dec; return out; } -- 2.39.5