From b5aa44daf69526b3295d8d899e0e878588055275 Mon Sep 17 00:00:00 2001 From: sageweil Date: Tue, 21 Nov 2006 00:16:08 +0000 Subject: [PATCH] pg_t stuff git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@966 29311d96-e01e-0410-9327-a35deaab8ce9 --- ceph/osd/OSD.cc | 95 +++++++++++++++++++------------ ceph/osd/OSDMap.h | 140 +++++++++++++++++++++++----------------------- 2 files changed, 128 insertions(+), 107 deletions(-) diff --git a/ceph/osd/OSD.cc b/ceph/osd/OSD.cc index 044869fb05cca..d37ef233901c0 100644 --- a/ceph/osd/OSD.cc +++ b/ceph/osd/OSD.cc @@ -450,10 +450,25 @@ void OSD::activate_pg(pg_t pgid, epoch_t epoch) _unlock_pg(pgid); } } - osd_lock.Unlock(); + + // finishers? + if (finished.empty()) { + osd_lock.Unlock(); + } else { + list waiting; + waiting.splice(waiting.begin(), finished); + + osd_lock.Unlock(); + + for (list::iterator it = waiting.begin(); + it != waiting.end(); + it++) { + dispatch(*it); + } + } // kick myself w/ a ping .. HACK - messenger->send_message(new MPing, MSG_ADDR_OSD(whoami)); + //messenger->send_message(new MPing, MSG_ADDR_OSD(whoami)); } @@ -995,6 +1010,7 @@ void OSD::advance_map(ObjectStore::Transaction& t) if (osdmap->is_mkfs()) { ps_t maxps = 1ULL << osdmap->get_pg_bits(); + ps_t maxlps = 1ULL << osdmap->get_localized_pg_bits(); dout(1) << "mkfs on " << osdmap->get_pg_bits() << " bits, " << maxps << " pgs" << endl; assert(osdmap->get_epoch() == 1); @@ -1005,43 +1021,45 @@ void OSD::advance_map(ObjectStore::Transaction& t) for (int nrep = 1; nrep <= MIN(g_conf.num_osd, g_conf.osd_max_rep); // for low osd counts.. hackish bleh nrep++) { - for (pg_t ps = 0; ps < maxps; ++ps) { - pg_t pgid = osdmap->ps_nrep_to_pg(ps, nrep); - vector acting; - int nrep = osdmap->pg_to_acting_osds(pgid, acting); - int role = osdmap->calc_pg_role(whoami, acting, nrep); - if (role < 0) continue; - - PG *pg = create_pg(pgid, t); - pg->set_role(role); - pg->acting.swap(acting); - pg->last_epoch_started_any = - pg->info.last_epoch_started = - pg->info.history.same_since = - pg->info.history.same_primary_since = - pg->info.history.same_acker_since = osdmap->get_epoch(); - pg->activate(t); - - dout(7) << "created " << *pg << endl; + for (ps_t ps = 0; ps < maxps; ++ps) { + vector acting; + pg_t pgid = osdmap->ps_nrep_to_pg(ps, nrep); + int nrep = osdmap->pg_to_acting_osds(pgid, acting); + int role = osdmap->calc_pg_role(whoami, acting, nrep); + if (role < 0) continue; + + PG *pg = create_pg(pgid, t); + pg->set_role(role); + pg->acting.swap(acting); + pg->last_epoch_started_any = + pg->info.last_epoch_started = + pg->info.history.same_since = + pg->info.history.same_primary_since = + pg->info.history.same_acker_since = osdmap->get_epoch(); + pg->activate(t); + + dout(7) << "created " << *pg << endl; } - // local PG too - pg_t pgid = osdmap->osd_nrep_to_pg(whoami, nrep); - vector acting; - int nrep = osdmap->pg_to_acting_osds(pgid, acting); - int role = osdmap->calc_pg_role(whoami, acting, nrep); - - PG *pg = create_pg(pgid, t); - pg->acting.swap(acting); - pg->set_role(role); - pg->last_epoch_started_any = - pg->info.last_epoch_started = - pg->info.history.same_primary_since = - pg->info.history.same_acker_since = - pg->info.history.same_since = osdmap->get_epoch(); - pg->activate(t); - - dout(7) << "created " << *pg << endl; + for (ps_t ps = 0; ps < maxlps; ++ps) { + // local PG too + vector acting; + pg_t pgid = osdmap->ps_osd_nrep_to_pg(ps, whoami, nrep); + int nrep = osdmap->pg_to_acting_osds(pgid, acting); + int role = osdmap->calc_pg_role(whoami, acting, nrep); + + PG *pg = create_pg(pgid, t); + pg->acting.swap(acting); + pg->set_role(role); + pg->last_epoch_started_any = + pg->info.last_epoch_started = + pg->info.history.same_primary_since = + pg->info.history.same_acker_since = + pg->info.history.same_since = osdmap->get_epoch(); + pg->activate(t); + + dout(7) << "created " << *pg << endl; + } } dout(1) << "mkfs done, created " << pg_map.size() << " pgs" << endl; @@ -1376,6 +1394,9 @@ bool OSD::pg_exists(pg_t pgid) PG *OSD::create_pg(pg_t pgid, ObjectStore::Transaction& t) { + if (pg_map.count(pgid)) { + dout(0) << "create_pg on " << pgid << ", already have " << *pg_map[pgid] << endl; + } assert(pg_map.count(pgid) == 0); assert(!pg_exists(pgid)); diff --git a/ceph/osd/OSDMap.h b/ceph/osd/OSDMap.h index 1d8cbc52f896b..6da41fc0de0b6 100644 --- a/ceph/osd/OSDMap.h +++ b/ceph/osd/OSDMap.h @@ -41,7 +41,7 @@ using namespace std; */ // from LSB to MSB, -#define PG_PS_BITS 24 // max bits for placement seed/group portion of PG +#define PG_PS_BITS 16 // max bits for placement seed/group portion of PG #define PG_REP_BITS 6 // up to 64 replicas #define PG_TYPE_BITS 2 #define PG_PS_MASK ((1LL< osds; // all osds set down_osds; // list of down disks @@ -120,7 +121,7 @@ private: friend class MDS; public: - OSDMap() : epoch(0), mon_epoch(0), pg_bits(5) {} + OSDMap() : epoch(0), mon_epoch(0), pg_bits(5), localized_pg_bits(3) {} // map info epoch_t get_epoch() const { return epoch; } @@ -128,6 +129,7 @@ private: int get_pg_bits() const { return pg_bits; } void set_pg_bits(int b) { pg_bits = b; } + int get_localized_pg_bits() const { return localized_pg_bits; } const utime_t& get_ctime() const { return ctime; } @@ -259,8 +261,8 @@ private: /**** mapping facilities ****/ - // oid -> ps - ps_t object_to_pg(object_t oid, FileLayout& layout) { + // oid -> pg + pg_t object_to_pg(object_t oid, FileLayout& layout) { static crush::Hash H(777); int policy = layout.object_layout; @@ -268,7 +270,7 @@ private: policy = g_conf.osd_object_layout; int type = PG_TYPE_RAND; - pg_t ps; + ps_t ps; switch (policy) { case OBJECT_LAYOUT_LINEAR: @@ -305,41 +307,41 @@ private: } // construct final PG - pg_t pg = type; + /*pg_t pg = type; pg = (pg << PG_REP_BITS) | (pg_t)layout.num_rep; pg = (pg << PG_PS_BITS) | ps; + */ //cout << "pg " << hex << pg << dec << endl; - return pg; + return pg_t(ps, 0, layout.num_rep); } // (ps, nrep) -> pg pg_t ps_nrep_to_pg(ps_t ps, int nrep) { - return ((pg_t)ps & ((1ULL< nrep int pg_to_nrep(pg_t pg) { - return (pg >> PG_PS_BITS) & ((1ULL << PG_REP_BITS)-1); + return pg.u.fields.nrep; + //return (pg >> PG_PS_BITS) & ((1ULL << PG_REP_BITS)-1); } // pg -> ps int pg_to_ps(pg_t pg) { - return pg & PG_PS_MASK; - } - - // pg -> pg_type - int pg_to_type(pg_t pg) { - return pg >> (PG_PS_BITS + PG_REP_BITS); + //return pg & PG_PS_MASK; + return pg.u.fields.ps; } - // pg -> (osd list) int pg_to_osds(pg_t pg, @@ -347,71 +349,69 @@ private: pg_t ps = pg_to_ps(pg); int num_rep = pg_to_nrep(pg); assert(num_rep > 0); - int type = pg_to_type(pg); - - // spread "on" ps bits around a bit (usually only low bits are set bc of pg_bits) - if (num_rep > 0) { - switch(g_conf.osd_pg_layout) { - case PG_LAYOUT_CRUSH: - crush.do_rule(crush.rules[num_rep], - ps, - osds, - out_osds, overload_osds); - break; - - case PG_LAYOUT_LINEAR: - for (int i=0; i 0) { + int osd = pg.u.fields.preferred-1; - if (type == PG_TYPE_STARTOSD) { // already in there? if (osds.empty()) { - osds.push_back((int)ps); + osds.push_back(osd); } else { assert(num_rep > 0); for (int i=1; i