From: Sage Weil Date: Tue, 11 Feb 2014 17:27:32 +0000 (-0800) Subject: osd/OSDMap: add osd_primary_affinity fields, accessors, encoding X-Git-Tag: v0.78~173^2~9 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=cee914290c5540eb1fb9d70faac70a581381c29b;p=ceph.git osd/OSDMap: add osd_primary_affinity fields, accessors, encoding Signed-off-by: Sage Weil --- diff --git a/src/include/rados.h b/src/include/rados.h index 59cc77bceebb..6693f9659ec5 100644 --- a/src/include/rados.h +++ b/src/include/rados.h @@ -121,6 +121,9 @@ extern const char *ceph_osd_state_name(int s); #define CEPH_OSD_IN 0x10000 #define CEPH_OSD_OUT 0 +#define CEPH_OSD_MAX_PRIMARY_AFFINITY 0x10000 +#define CEPH_OSD_DEFAULT_PRIMARY_AFFINITY 0x10000 + /* * osd map flag bits diff --git a/src/osd/OSDMap.cc b/src/osd/OSDMap.cc index 406327c23567..92591cf08a86 100644 --- a/src/osd/OSDMap.cc +++ b/src/osd/OSDMap.cc @@ -388,7 +388,7 @@ void OSDMap::Incremental::encode(bufferlist& bl, uint64_t features) const ENCODE_START(7, 7, bl); { - ENCODE_START(1, 1, bl); // client-usable data + ENCODE_START(2, 1, bl); // client-usable data ::encode(fsid, bl); ::encode(epoch, bl); ::encode(modified, bl); @@ -406,6 +406,7 @@ void OSDMap::Incremental::encode(bufferlist& bl, uint64_t features) const ::encode(new_weight, bl); ::encode(new_pg_temp, bl); ::encode(new_primary_temp, bl); + ::encode(new_primary_affinity, bl); ENCODE_FINISH(bl); // client-usable data } @@ -541,7 +542,7 @@ void OSDMap::Incremental::decode(bufferlist::iterator& bl) return; } { - DECODE_START(1, bl); // client-usable data + DECODE_START(2, bl); // client-usable data ::decode(fsid, bl); ::decode(epoch, bl); ::decode(modified, bl); @@ -559,6 +560,10 @@ void OSDMap::Incremental::decode(bufferlist::iterator& bl) ::decode(new_weight, bl); ::decode(new_pg_temp, bl); ::decode(new_primary_temp, bl); + if (struct_v >= 2) + ::decode(new_primary_affinity, bl); + else + new_primary_affinity.clear(); DECODE_FINISH(bl); // client-usable data } @@ -824,6 +829,8 @@ void OSDMap::set_max_osd(int m) osd_addrs->hb_back_addr.resize(m); osd_addrs->hb_front_addr.resize(m); osd_uuid->resize(m); + if (osd_primary_affinity) + osd_primary_affinity->resize(m, CEPH_OSD_DEFAULT_PRIMARY_AFFINITY); calc_num_osds(); } @@ -1168,6 +1175,12 @@ int OSDMap::apply_incremental(const Incremental &inc) osd_state[i->first] &= ~(CEPH_OSD_AUTOOUT | CEPH_OSD_NEW); } + for (map::const_iterator i = inc.new_primary_affinity.begin(); + i != inc.new_primary_affinity.end(); + ++i) { + set_primary_affinity(i->first, i->second); + } + // up/down for (map::const_iterator i = inc.new_state.begin(); i != inc.new_state.end(); @@ -1397,7 +1410,7 @@ int OSDMap::pg_to_osds(pg_t pg, vector *raw, int *primary) const const pg_pool_t *pool = get_pg_pool(pg.pool()); if (!pool) return 0; - int r = _pg_to_osds(*pool, pg, raw, primary); + int r = _pg_to_osds(*pool, pg, raw, primary, NULL); return r; } @@ -1589,7 +1602,7 @@ void OSDMap::encode(bufferlist& bl, uint64_t features) const ENCODE_START(7, 7, bl); { - ENCODE_START(1, 1, bl); // client-usable data + ENCODE_START(2, 1, bl); // client-usable data // base ::encode(fsid, bl); ::encode(epoch, bl); @@ -1609,6 +1622,12 @@ void OSDMap::encode(bufferlist& bl, uint64_t features) const ::encode(*pg_temp, bl); ::encode(*primary_temp, bl); + if (osd_primary_affinity) { + ::encode(*osd_primary_affinity, bl); + } else { + vector<__u32> v; + ::encode(v, bl); + } // crush bufferlist cbl; @@ -1746,6 +1765,8 @@ void OSDMap::decode_classic(bufferlist::iterator& p) else osd_addrs->hb_front_addr.resize(osd_addrs->hb_back_addr.size()); + osd_primary_affinity.reset(); + post_decode(); } @@ -1769,7 +1790,7 @@ void OSDMap::decode(bufferlist::iterator& bl) * Since we made it past that hurdle, we can use our normal paths. */ { - DECODE_START(1, bl); // client-usable data + DECODE_START(2, bl); // client-usable data // base ::decode(fsid, bl); ::decode(epoch, bl); @@ -1789,6 +1810,14 @@ void OSDMap::decode(bufferlist::iterator& bl) ::decode(*pg_temp, bl); ::decode(*primary_temp, bl); + if (struct_v >= 2) { + osd_primary_affinity.reset(new vector<__u32>); + ::decode(*osd_primary_affinity, bl); + if (osd_primary_affinity->empty()) + osd_primary_affinity.reset(); + } else { + osd_primary_affinity.reset(); + } // crush bufferlist cbl; @@ -1871,6 +1900,8 @@ void OSDMap::dump(Formatter *f) const f->dump_stream("uuid") << get_uuid(i); f->dump_int("up", is_up(i)); f->dump_int("in", is_in(i)); + f->dump_float("weight", get_weightf(i)); + f->dump_float("primary_affinity", get_primary_affinityf(i)); get_info(i).dump(f); f->dump_stream("public_addr") << get_addr(i); f->dump_stream("cluster_addr") << get_cluster_addr(i); diff --git a/src/osd/OSDMap.h b/src/osd/OSDMap.h index b44b28617669..2658cf6b90e6 100644 --- a/src/osd/OSDMap.h +++ b/src/osd/OSDMap.h @@ -135,6 +135,7 @@ public: map new_weight; map > new_pg_temp; // [] to remove map new_primary_temp; // [-1] to remove + map new_primary_affinity; map new_up_thru; map > new_last_clean_interval; map new_lost; @@ -208,6 +209,7 @@ private: vector osd_info; ceph::shared_ptr< map > > pg_temp; // temp pg mapping (e.g. while we rebuild) ceph::shared_ptr< map > primary_temp; // temp primary mapping (e.g. while we rebuild) + ceph::shared_ptr< vector<__u32> > osd_primary_affinity; ///< 16.16 fixed point, 0x10000 = baseline map pools; map pool_name; @@ -341,6 +343,23 @@ public: } void adjust_osd_weights(const map& weights, Incremental& inc) const; + void set_primary_affinity(int o, int w) { + assert(o < max_osd); + if (!osd_primary_affinity) + osd_primary_affinity.reset(new vector<__u32>(max_osd, + CEPH_OSD_DEFAULT_PRIMARY_AFFINITY)); + (*osd_primary_affinity)[o] = w; + } + unsigned get_primary_affinity(int o) const { + assert(o < max_osd); + if (!osd_primary_affinity) + return CEPH_OSD_DEFAULT_PRIMARY_AFFINITY; + return (*osd_primary_affinity)[o]; + } + float get_primary_affinityf(int o) const { + return (float)get_primary_affinity(o) / (float)CEPH_OSD_MAX_PRIMARY_AFFINITY; + } + bool exists(int osd) const { //assert(osd >= 0); return osd >= 0 && osd < max_osd && (osd_state[osd] & CEPH_OSD_EXISTS);