From 0bf1a8e1ffb0839ed02d5defd85646cd2ce240f2 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 30 Apr 2009 09:44:39 -0700 Subject: [PATCH] osd: more pg_pool, pg changes Remove 'type' from pg_t. It's implied based on the pool. Clean out file_layout. The pool implies a type and namespace. --- src/client/SyntheticClient.cc | 12 +++++----- src/config.cc | 5 ----- src/include/ceph_fs.h | 10 +++------ src/kernel/osd_client.c | 4 ++-- src/kernel/osdmap.c | 1 - src/mds/mdstypes.h | 2 +- src/mon/PGMonitor.cc | 3 +-- src/osd/OSD.cc | 5 ++--- src/osd/OSDMap.h | 19 +++++++++++----- src/osd/osd_types.h | 42 ++++++++++++++++------------------- 10 files changed, 48 insertions(+), 55 deletions(-) diff --git a/src/client/SyntheticClient.cc b/src/client/SyntheticClient.cc index 53be64de6b27..db10860889ff 100644 --- a/src/client/SyntheticClient.cc +++ b/src/client/SyntheticClient.cc @@ -1345,7 +1345,7 @@ int SyntheticClient::play_trace(Trace& t, string& prefix, bool metadata_only) int64_t ol = t.get_int(); object_t oid(oh, ol); lock.Lock(); - ceph_object_layout layout = client->osdmap->make_object_layout(oid, pg_t::TYPE_REP, 2, 0); + ceph_object_layout layout = client->osdmap->make_object_layout(oid, CEPH_CASDATA_RULE); __u64 size; client->objecter->stat(oid, layout, &size, 0, new C_SafeCond(&lock, &cond, &ack)); while (!ack) cond.Wait(lock); @@ -1358,7 +1358,7 @@ int SyntheticClient::play_trace(Trace& t, string& prefix, bool metadata_only) int64_t len = t.get_int(); object_t oid(oh, ol); lock.Lock(); - ceph_object_layout layout = client->osdmap->make_object_layout(oid, pg_t::TYPE_REP, 2, 0); + ceph_object_layout layout = client->osdmap->make_object_layout(oid, CEPH_CASDATA_RULE); bufferlist bl; client->objecter->read(oid, layout, off, len, &bl, 0, new C_SafeCond(&lock, &cond, &ack)); while (!ack) cond.Wait(lock); @@ -1371,7 +1371,7 @@ int SyntheticClient::play_trace(Trace& t, string& prefix, bool metadata_only) int64_t len = t.get_int(); object_t oid(oh, ol); lock.Lock(); - ceph_object_layout layout = client->osdmap->make_object_layout(oid, pg_t::TYPE_REP, 2, 0); + ceph_object_layout layout = client->osdmap->make_object_layout(oid, CEPH_CASDATA_RULE); bufferptr bp(len); bufferlist bl; bl.push_back(bp); @@ -1389,7 +1389,7 @@ int SyntheticClient::play_trace(Trace& t, string& prefix, bool metadata_only) int64_t len = t.get_int(); object_t oid(oh, ol); lock.Lock(); - ceph_object_layout layout = client->osdmap->make_object_layout(oid, pg_t::TYPE_REP, 2, 0); + ceph_object_layout layout = client->osdmap->make_object_layout(oid, CEPH_CASDATA_RULE); SnapContext snapc; client->objecter->zero(oid, layout, off, len, snapc, g_clock.now(), 0, new C_SafeCond(&lock, &cond, &ack), @@ -2146,7 +2146,7 @@ int SyntheticClient::create_objects(int nobj, int osize, int inflight) if (time_to_stop()) break; object_t oid(0x1000, i); - ceph_object_layout layout = client->osdmap->make_object_layout(oid, pg_t::TYPE_REP, 0); + ceph_object_layout layout = client->osdmap->make_object_layout(oid, CEPH_CASDATA_RULE); SnapContext snapc; if (i % inflight == 0) { @@ -2249,7 +2249,7 @@ int SyntheticClient::object_rw(int nobj, int osize, int wrpc, } object_t oid(0x1000, o); - ceph_object_layout layout = client->osdmap->make_object_layout(oid, pg_t::TYPE_REP, 0); + ceph_object_layout layout = client->osdmap->make_object_layout(oid, CEPH_CASDATA_RULE); SnapContext snapc; client->client_lock.Lock(); diff --git a/src/config.cc b/src/config.cc index edf553409911..21ee85b8f6c2 100644 --- a/src/config.cc +++ b/src/config.cc @@ -85,7 +85,6 @@ struct ceph_file_layout g_default_file_layout = { fl_cas_hash: init_le32(0), fl_object_stripe_unit: init_le32(0), fl_pg_preferred: init_le32(-1), - fl_pg_type: CEPH_PG_TYPE_REP, fl_pg_pool: {CEPH_DATA_RULE}, }; @@ -96,7 +95,6 @@ struct ceph_file_layout g_default_casdata_layout = { fl_cas_hash: init_le32(0), fl_object_stripe_unit: init_le32(0), fl_pg_preferred: init_le32(-1), - fl_pg_type: CEPH_PG_TYPE_REP, fl_pg_pool: {CEPH_CASDATA_RULE}, }; @@ -107,7 +105,6 @@ struct ceph_file_layout g_default_mds_dir_layout = { fl_cas_hash: init_le32(0), fl_object_stripe_unit: init_le32(0), fl_pg_preferred: init_le32(-1), - fl_pg_type: CEPH_PG_TYPE_REP, fl_pg_pool: {CEPH_METADATA_RULE}, }; @@ -118,7 +115,6 @@ struct ceph_file_layout g_default_mds_log_layout = { fl_cas_hash: init_le32(0), fl_object_stripe_unit: init_le32(0), fl_pg_preferred: init_le32(-1), - fl_pg_type: CEPH_PG_TYPE_REP, fl_pg_pool: {CEPH_METADATA_RULE}, }; @@ -129,7 +125,6 @@ struct ceph_file_layout g_default_mds_anchortable_layout = { fl_cas_hash: init_le32(0), fl_object_stripe_unit: init_le32(0), fl_pg_preferred: init_le32(-1), - fl_pg_type: CEPH_PG_TYPE_REP, fl_pg_pool: {CEPH_METADATA_RULE}, }; diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h index e79f583611fb..122c9b01770c 100644 --- a/src/include/ceph_fs.h +++ b/src/include/ceph_fs.h @@ -43,7 +43,7 @@ #define CEPH_OSD_PROTOCOL 5 /* cluster internal */ #define CEPH_MDS_PROTOCOL 9 /* cluster internal */ #define CEPH_MON_PROTOCOL 4 /* cluster internal */ -#define CEPH_OSDC_PROTOCOL 7 /* public/client */ +#define CEPH_OSDC_PROTOCOL 8 /* public/client */ #define CEPH_MDSC_PROTOCOL 18 /* public/client */ #define CEPH_MONC_PROTOCOL 11 /* public/client */ @@ -264,9 +264,7 @@ struct ceph_file_layout { /* object -> pg layout */ __le32 fl_pg_preferred; /* preferred primary for pg (-1 for none) */ - __u8 fl_pg_type; - __le16 fl_pg_pool; /* implies crush ruleset, rep level */ - __le16 fl_pg_ns; /* object namespace */ + __le32 fl_pg_pool; /* namespace, crush ruleset, rep level */ } __attribute__ ((packed)); #define ceph_file_layout_su(l) ((__s32)le32_to_cpu((l).fl_stripe_unit)) @@ -297,9 +295,7 @@ union ceph_pg { struct { __s16 preferred; /* preferred primary osd */ __u16 ps; /* placement seed */ - __u16 pool; /* implies crush ruleset */ - __u8 type; - __u8 __pad; + __u32 pool; /* implies crush ruleset */ } pg; } __attribute__ ((packed)); diff --git a/src/kernel/osd_client.c b/src/kernel/osd_client.c index 1fc0e0958dd8..2022b8cf47ec 100644 --- a/src/kernel/osd_client.c +++ b/src/kernel/osd_client.c @@ -333,10 +333,10 @@ static int map_osds(struct ceph_osd_client *osdc, return -1; pool = &osdc->osdmap->pg_pool[req->r_pgid.pg.pool]; ruleno = crush_find_rule(osdc->osdmap->crush, pool->v.crush_ruleset, - req->r_pgid.pg.type, pool->v.size); + pool->v.type, pool->v.size); if (ruleno < 0) { derr(0, "map_osds no crush rule for pool %d type %d size %d\n", - req->r_pgid.pg.pool, req->r_pgid.pg.type, pool->v.size); + req->r_pgid.pg.pool, pool->v.type, pool->v.size); return -1; } diff --git a/src/kernel/osdmap.c b/src/kernel/osdmap.c index 7030ef80f17a..4b3c43573fea 100644 --- a/src/kernel/osdmap.c +++ b/src/kernel/osdmap.c @@ -668,7 +668,6 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol, pgid.pg64 = 0; /* start with it zeroed out */ pgid.pg.ps = bno + crush_hash32_2(ino, ino>>32); pgid.pg.preferred = preferred; - pgid.pg.type = fl->fl_pg_type; pgid.pg.pool = fl->fl_pg_pool; ol->ol_pgid = cpu_to_le64(pgid.pg64); diff --git a/src/mds/mdstypes.h b/src/mds/mdstypes.h index 005d920444d2..5cde0b26c0b3 100644 --- a/src/mds/mdstypes.h +++ b/src/mds/mdstypes.h @@ -18,7 +18,7 @@ using namespace std; #include "include/xlist.h" #include "include/nstring.h" -#define CEPH_FS_ONDISK_MAGIC "ceph fs volume v002" +#define CEPH_FS_ONDISK_MAGIC "ceph fs volume v003" #define MDS_REF_SET // define me for improved debug output, sanity checking diff --git a/src/mon/PGMonitor.cc b/src/mon/PGMonitor.cc index e606932b927e..0738e51cfa05 100644 --- a/src/mon/PGMonitor.cc +++ b/src/mon/PGMonitor.cc @@ -435,7 +435,6 @@ bool PGMonitor::register_new_pgs() p++) { int poolid = p->first; pg_pool_t &pool = p->second; - int type = pool.get_type(); int ruleno = pool.get_crush_ruleset(); if (!mon->osdmon()->osdmap.crush.rule_exists(ruleno)) continue; @@ -449,7 +448,7 @@ bool PGMonitor::register_new_pgs() dout(10) << "register_new_pgs scanning " << pool << dendl; for (ps_t ps = 0; ps < pool.get_pg_num(); ps++) { - pg_t pgid(type, ps, poolid, -1); + pg_t pgid(ps, poolid, -1); if (pg_map.pg_stat.count(pgid)) { dout(20) << "register_new_pgs have " << pgid << dendl; continue; diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 461586e573e2..11149fc7ef09 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -552,7 +552,7 @@ PG *OSD::_open_lock_pg(pg_t pgid) // create PG *pg; - if (pgid.is_rep()) + if (osdmap->get_pg_type(pgid) == CEPH_PG_TYPE_REP) pg = new ReplicatedPG(this, pgid); //else if (pgid.is_raid4()) //pg = new RAID4PG(this, pgid); @@ -2457,8 +2457,7 @@ void OSD::split_pg(PG *parent, map& children, ObjectStore::Transaction for (vector::iterator p = olist.begin(); p != olist.end(); p++) { pobject_t poid = *p; - ceph_object_layout l = osdmap->make_object_layout(poid.oid, parentid.type(), - parentid.pool(), parentid.preferred()); + ceph_object_layout l = osdmap->make_object_layout(poid.oid, parentid.pool(), parentid.preferred()); if (le64_to_cpu(l.ol_pgid) != parentid.u.pg64) { pg_t pgid(le64_to_cpu(l.ol_pgid)); dout(20) << " moving " << poid << " from " << parentid << " -> " << pgid << dendl; diff --git a/src/osd/OSDMap.h b/src/osd/OSDMap.h index 34c4e8375b9c..58264419c45e 100644 --- a/src/osd/OSDMap.h +++ b/src/osd/OSDMap.h @@ -620,13 +620,12 @@ private: // oid -> pg ceph_object_layout file_to_object_layout(object_t oid, ceph_file_layout& layout) { - return make_object_layout(oid, layout.fl_pg_type, - layout.fl_pg_pool, + return make_object_layout(oid, layout.fl_pg_pool, ceph_file_layout_pg_preferred(layout), ceph_file_layout_object_su(layout)); } - ceph_object_layout make_object_layout(object_t oid, int pg_type, int pg_pool, int preferred=-1, int object_stripe_unit = 0) { + ceph_object_layout make_object_layout(object_t oid, int pg_pool, int preferred=-1, int object_stripe_unit = 0) { // calculate ps (placement seed) ps_t ps; // NOTE: keep full precision, here! switch (g_conf.osd_object_layout) { @@ -653,7 +652,7 @@ private: //cout << "preferred " << preferred << " num " << num << " mask " << num_mask << " ps " << ps << endl; // construct object layout - pg_t pgid = pg_t(pg_type, ps, pg_pool, preferred); + pg_t pgid = pg_t(ps, pg_pool, preferred); ceph_object_layout layout; layout.ol_pgid = pgid.u.pg64; layout.ol_stripe_unit = object_stripe_unit; @@ -675,7 +674,7 @@ private: case CEPH_PG_LAYOUT_CRUSH: { // what crush rule? - int ruleno = crush.find_rule(pool.get_crush_ruleset(), pg.type(), size); + int ruleno = crush.find_rule(pool.get_crush_ruleset(), pool.get_type(), size); if (ruleno >= 0) crush.do_rule(ruleno, pps, osds, size, pg.preferred(), osd_weight); } @@ -769,11 +768,21 @@ private: + const pg_pool_t& get_pg_pool(int p) { + assert(pools.count(p)); + return pools[p]; + } unsigned get_pg_size(pg_t pg) { assert(pools.count(pg.pool())); pg_pool_t &pool = pools[pg.pool()]; return pool.get_size(); } + int get_pg_type(pg_t pg) { + assert(pools.count(pg.pool())); + pg_pool_t &pool = pools[pg.pool()]; + return pool.get_type(); + } + pg_t raw_pg_to_pg(pg_t pg) { assert(pools.count(pg.pool())); diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index 9d074b817037..190ba850f121 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -25,7 +25,7 @@ -#define CEPH_OSD_ONDISK_MAGIC "ceph osd volume v012" +#define CEPH_OSD_ONDISK_MAGIC "ceph osd volume v013" @@ -109,19 +109,12 @@ enum { // placement group id struct pg_t { -public: - static const int TYPE_REP = CEPH_PG_TYPE_REP; - static const int TYPE_RAID4 = CEPH_PG_TYPE_RAID4; - - //private: union ceph_pg u; -public: pg_t() { u.pg64 = 0; } pg_t(const pg_t& o) { u.pg64 = o.u.pg64; } - pg_t(int type, ps_t seed, int pool, int pref) { + pg_t(ps_t seed, int pool, int pref) { u.pg64 = 0; - u.pg.type = type; u.pg.ps = seed; u.pg.pool = pool; u.pg.preferred = pref; // hack: avoid negative. @@ -132,10 +125,6 @@ public: u = cpg; } - int type() { return u.pg.type; } - bool is_rep() { return type() == TYPE_REP; } - bool is_raid4() { return type() == TYPE_RAID4; } - ps_t ps() { return u.pg.ps; } int pool() { return u.pg.pool; } int preferred() { return u.pg.preferred; } // hack: avoid negative. @@ -161,7 +150,6 @@ public: int r = sscanf(s, "%d.%x", &pool, &ps); if (r < 3) return false; - u.pg.type = TYPE_REP; u.pg.pool = pool; u.pg.ps = ps; u.pg.preferred = -1; @@ -431,6 +419,9 @@ struct pg_pool_t { int get_crush_ruleset() const { return v.crush_ruleset; } epoch_t get_last_change() const { return v.last_change; } + bool is_rep() const { return get_type() == CEPH_PG_TYPE_REP; } + bool is_raid4() const { return get_type() == CEPH_PG_TYPE_RAID4; } + void calc_pg_masks() { pg_num_mask = (1 << calc_bits_of(v.pg_num-1)) - 1; pgp_num_mask = (1 << calc_bits_of(v.pgp_num-1)) - 1; @@ -470,15 +461,20 @@ struct pg_pool_t { WRITE_CLASS_ENCODER(pg_pool_t) inline ostream& operator<<(ostream& out, const pg_pool_t& p) { - return out << "pg_pool(type " << p.get_type() - << " size " << p.get_size() - << " ruleset " << p.get_crush_ruleset() - << " pg_num " << p.get_pg_num() - << " pgp_num " << p.get_pgp_num() - << " lpg_num " << p.get_lpg_num() - << " lpgp_num " << p.get_lpgp_num() - << " last_change " << p.get_last_change() - << ")"; + out << "pg_pool("; + switch (p.get_type()) { + case CEPH_PG_TYPE_REP: out << "rep"; break; + default: out << "type " << p.get_type(); + } + out << " size " << p.get_size() + << " ruleset " << p.get_crush_ruleset() + << " pg_num " << p.get_pg_num() + << " pgp_num " << p.get_pgp_num() + << " lpg_num " << p.get_lpg_num() + << " lpgp_num " << p.get_lpgp_num() + << " last_change " << p.get_last_change() + << ")"; + return out; } /** pg_stat -- 2.47.3