}
}
}
-
+
+void OSDMap::_apply_primary_affinity(ps_t seed,
+ const pg_pool_t& pool,
+ vector<int> *osds,
+ int *primary) const
+{
+ // do we have any non-default primary_affinity values for these osds?
+ if (!osd_primary_affinity)
+ return;
+
+ bool any = false;
+ for (vector<int>::const_iterator p = osds->begin(); p != osds->end(); ++p) {
+ if ((*osd_primary_affinity)[*p] != CEPH_OSD_DEFAULT_PRIMARY_AFFINITY) {
+ any = true;
+ }
+ }
+ if (!any)
+ return;
+
+ // pick the primary. feed both the seed (for the pg) and the osd
+ // into the hash/rng so that a proportional fraction of an osd's pgs
+ // get rejected as primary.
+ int pos = -1;
+ for (unsigned i = 0; i < osds->size(); ++i) {
+ int o = (*osds)[i];
+ if (o == CRUSH_ITEM_NONE)
+ continue;
+ unsigned a = (*osd_primary_affinity)[o];
+ if (a < CEPH_OSD_MAX_PRIMARY_AFFINITY &&
+ (crush_hash32_2(CRUSH_HASH_RJENKINS1,
+ seed, o) >> 16) >= a) {
+ // we chose not to use this primary. note it anyway as a
+ // fallback in case we don't pick anyone else, but keep looking.
+ if (pos < 0)
+ pos = i;
+ } else {
+ pos = i;
+ break;
+ }
+ }
+ if (pos < 0)
+ return;
+
+ *primary = (*osds)[pos];
+
+ if (pool.can_shift_osds() && pos > 0) {
+ // move the new primary to the front.
+ for (int i = pos; i > 0; --i) {
+ (*osds)[i] = (*osds)[i-1];
+ }
+ (*osds)[0] = *primary;
+ }
+}
+
void OSDMap::_get_temp_osds(const pg_pool_t& pool, pg_t pg,
vector<int> *temp_pg, int *temp_primary) const
{
return;
}
vector<int> raw;
- _pg_to_osds(*pool, pg, &raw, primary);
+ ps_t pps;
+ _pg_to_osds(*pool, pg, &raw, primary, &pps);
_raw_to_up_osds(*pool, raw, up, primary);
+ _apply_primary_affinity(pps, *pool, up, primary);
}
void OSDMap::_pg_to_up_acting_osds(pg_t pg, vector<int> *up, int *up_primary,
vector<int> _acting;
int _up_primary;
int _acting_primary;
- _pg_to_osds(*pool, pg, &raw, &_up_primary);
+ ps_t pps;
+ _pg_to_osds(*pool, pg, &raw, &_up_primary, &pps);
_raw_to_up_osds(*pool, raw, &_up, &_up_primary);
+ _apply_primary_affinity(pps, *pool, &_up, &_up_primary);
_get_temp_osds(*pool, pg, &_acting, &_acting_primary);
if (_acting.empty())
_acting = _up;
pending_inc.new_uuid[i] = sample_uuid;
}
osdmap.apply_incremental(pending_inc);
+
+ // kludge to get an erasure coding rule and pool
+ int r = osdmap.crush->add_simple_ruleset("erasure", "default", "osd",
+ "indep", pg_pool_t::TYPE_ERASURE,
+ &cerr);
+ pg_pool_t *p = (pg_pool_t *)osdmap.get_pg_pool(2);
+ p->type = pg_pool_t::TYPE_ERASURE;
+ p->crush_ruleset = r;
}
unsigned int get_num_osds() { return num_osds; }
+
+ void test_mappings(int pool,
+ int num,
+ vector<int> *any,
+ vector<int> *first,
+ vector<int> *primary) {
+ for (int i=0; i<num; ++i) {
+ vector<int> o;
+ int p;
+ pg_t pgid(i, pool);
+ osdmap.pg_to_acting_osds(pgid, &o, &p);
+ for (unsigned j=0; j<o.size(); ++j)
+ (*any)[o[j]]++;
+ if (!o.empty())
+ (*first)[o[0]]++;
+ if (p >= 0)
+ (*primary)[p]++;
+ }
+ }
};
TEST_F(OSDMapTest, Create) {
EXPECT_FALSE(pending_inc.new_pg_temp.count(pgid));
EXPECT_FALSE(pending_inc.new_primary_temp.count(pgid));
}
+
+TEST_F(OSDMapTest, PrimaryAffinity) {
+ set_up_map();
+
+ /*
+ osdmap.print(cout);
+ Formatter *f = new_formatter("json-pretty");
+ f->open_object_section("CRUSH");
+ osdmap.crush->dump(f);
+ f->close_section();
+ f->flush(cout);
+ delete f;
+ */
+
+ int n = get_num_osds();
+ for (map<int64_t,pg_pool_t>::const_iterator p = osdmap.get_pools().begin();
+ p != osdmap.get_pools().end();
+ ++p) {
+ int pool = p->first;
+ cout << "pool " << pool << std::endl;
+ {
+ vector<int> any(n, 0);
+ vector<int> first(n, 0);
+ vector<int> primary(n, 0);
+ test_mappings(0, 10000, &any, &first, &primary);
+ for (int i=0; i<n; ++i) {
+ //cout << "osd." << i << " " << any[i] << " " << first[i] << " " << primary[i] << std::endl;
+ ASSERT_LT(0, any[i]);
+ ASSERT_LT(0, first[i]);
+ ASSERT_LT(0, primary[i]);
+ }
+ }
+
+ osdmap.set_primary_affinity(0, 0);
+ osdmap.set_primary_affinity(1, 0);
+ {
+ vector<int> any(n, 0);
+ vector<int> first(n, 0);
+ vector<int> primary(n, 0);
+ test_mappings(pool, 10000, &any, &first, &primary);
+ for (int i=0; i<n; ++i) {
+ //cout << "osd." << i << " " << any[i] << " " << first[i] << " " << primary[i] << std::endl;
+ ASSERT_LT(0, any[i]);
+ if (i >= 2) {
+ ASSERT_LT(0, first[i]);
+ ASSERT_LT(0, primary[i]);
+ } else {
+ if (p->second.is_replicated())
+ ASSERT_EQ(0, first[i]);
+ ASSERT_EQ(0, primary[i]);
+ }
+ }
+ }
+
+ osdmap.set_primary_affinity(0, 0x8000);
+ osdmap.set_primary_affinity(1, 0);
+ {
+ vector<int> any(n, 0);
+ vector<int> first(n, 0);
+ vector<int> primary(n, 0);
+ test_mappings(pool, 10000, &any, &first, &primary);
+ for (int i=0; i<n; ++i) {
+ //cout << "osd." << i << " " << any[i] << " " << first[i] << " " << primary[i] << std::endl;
+ ASSERT_LT(0, any[i]);
+ if (i >= 2) {
+ ASSERT_LT(0, first[i]);
+ ASSERT_LT(0, primary[i]);
+ } else if (i == 1) {
+ if (p->second.is_replicated())
+ ASSERT_EQ(0, first[i]);
+ ASSERT_EQ(0, primary[i]);
+ } else {
+ ASSERT_LT(10000/6/4, primary[0]);
+ ASSERT_GT(10000/6/4*3, primary[0]);
+ }
+ }
+ }
+
+ osdmap.set_primary_affinity(0, 0x10000);
+ osdmap.set_primary_affinity(1, 0x10000);
+ }
+}