newp.peering_crush_bucket_count = new_site_count;
newp.peering_crush_mandatory_member = remaining_site;
newp.min_size = pgi.second.min_size / 2; // only support 2 zones now
+ newp.last_force_op_resend = pending_inc.epoch;
pending_inc.new_pools[pgi.first] = newp;
}
}
for (auto pgi : osdmap.pools) {
if (pgi.second.peering_crush_bucket_count) {
pg_pool_t newp(pgi.second);
- // bump up the min_size since we have extra replicas available...
+ newp.last_force_op_resend = pending_inc.epoch;
pending_inc.new_pools[pgi.first] = newp;
}
}
newp.peering_crush_bucket_count = osdmap.stretch_bucket_count;
newp.peering_crush_mandatory_member = 0;
newp.min_size = g_conf().get_val<uint64_t>("mon_stretch_pool_min_size");
+ newp.last_force_op_resend = pending_inc.epoch;
pending_inc.new_pools[pgi.first] = newp;
}
}
// didn't break them with earlier choices!
const pg_pool_t& pg_pool = pool.info;
if (pg_pool.is_stretch_pool()) {
- const uint32_t barrier_id = pg_pool.peering_crush_bucket_barrier;
- const uint32_t barrier_count = pg_pool.peering_crush_bucket_count;
- set<int> ancestors;
- const shared_ptr<CrushWrapper>& crush = osdmap_ref->crush;
- for (int osdid : want) {
- int ancestor = crush->get_parent_of_type(osdid, barrier_id,
- pg_pool.crush_rule);
- ancestors.insert(ancestor);
- }
- if (ancestors.size() < barrier_count) {
- psdout(5) << "peeering blocked: not enough crush buckets with OSDs in acting" << dendl;
- return false;
- } else if (pg_pool.peering_crush_mandatory_member &&
- !ancestors.count(pg_pool.peering_crush_mandatory_member)) {
- psdout(5) << "peering blocked: missing mandatory crush bucket member "
- << pg_pool.peering_crush_mandatory_member << dendl;
+ stringstream ss;
+ if (!pg_pool.stretch_set_can_peer(want, *get_osdmap(), &ss)) {
+ psdout(5) << "peering blocked by stretch_can_peer: " << ss.str() << dendl;
return false;
}
}
calc_grade_table();
}
+bool pg_pool_t::stretch_set_can_peer(const set<int>& want, const OSDMap& osdmap,
+ std::ostream * out) const
+{
+ const uint32_t barrier_id = peering_crush_bucket_barrier;
+ const uint32_t barrier_count = peering_crush_bucket_count;
+ set<int> ancestors;
+ const shared_ptr<CrushWrapper>& crush = osdmap.crush;
+ for (int osdid : want) {
+ int ancestor = crush->get_parent_of_type(osdid, barrier_id,
+ crush_rule);
+ ancestors.insert(ancestor);
+ }
+ if (ancestors.size() < barrier_count) {
+ if (out) {
+ *out << __func__ << ": not enough crush buckets with OSDs in want set "
+ << want;
+ }
+ return false;
+ } else if (peering_crush_mandatory_member &&
+ !ancestors.count(peering_crush_mandatory_member)) {
+ if (out) {
+ *out << __func__ << ": missing mandatory crush bucket member "
+ << peering_crush_mandatory_member;
+ }
+ return false;
+ }
+ return true;
+}
+
void pg_pool_t::generate_test_instances(list<pg_pool_t*>& o)
{
pg_pool_t a;
bool new_sort_bitwise,
bool old_recovery_deletes,
bool new_recovery_deletes,
+ uint32_t old_crush_count,
+ uint32_t new_crush_count,
+ uint32_t old_crush_target,
+ uint32_t new_crush_target,
+ uint32_t old_crush_barrier,
+ uint32_t new_crush_barrier,
+ int32_t old_crush_member,
+ int32_t new_crush_member,
pg_t pgid) {
return old_acting_primary != new_acting_primary ||
new_acting != old_acting ||
// merge target
pgid.is_merge_target(old_pg_num, new_pg_num) ||
old_sort_bitwise != new_sort_bitwise ||
- old_recovery_deletes != new_recovery_deletes;
+ old_recovery_deletes != new_recovery_deletes ||
+ old_crush_count != new_crush_count ||
+ old_crush_target != new_crush_target ||
+ old_crush_barrier != new_crush_barrier ||
+ old_crush_member != new_crush_member;
}
bool PastIntervals::is_new_interval(
osdmap->test_flag(CEPH_OSDMAP_SORTBITWISE),
lastmap->test_flag(CEPH_OSDMAP_RECOVERY_DELETES),
osdmap->test_flag(CEPH_OSDMAP_RECOVERY_DELETES),
+ plast->peering_crush_bucket_count, pi->peering_crush_bucket_count,
+ plast->peering_crush_bucket_target, pi->peering_crush_bucket_target,
+ plast->peering_crush_bucket_barrier, pi->peering_crush_bucket_barrier,
+ plast->peering_crush_mandatory_member, pi->peering_crush_mandatory_member,
pgid);
}
if (num_acting &&
i.primary != -1 &&
num_acting >= old_pg_pool.min_size &&
+ (!old_pg_pool.is_stretch_pool() ||
+ old_pg_pool.stretch_set_can_peer(old_acting, *lastmap, out)) &&
could_have_gone_active(old_acting_shards)) {
if (out)
*out << __func__ << " " << i
}
}
-
// true if the given map affects the prior set
bool PastIntervals::PriorSet::affected_by_map(
const OSDMap &osdmap,
};
WRITE_CLASS_ENCODER(pg_merge_meta_t)
+class OSDMap;
+
/*
* pg_pool
*/
return peering_crush_bucket_count != 0;
}
+ bool stretch_set_can_peer(const set<int>& want, const OSDMap& osdmap,
+ std::ostream *out) const;
+ bool stretch_set_can_peer(const vector<int>& want, const OSDMap& osdmap,
+ std::ostream *out) const {
+ set<int> swant;
+ for (auto i : want) swant.insert(i);
+ return stretch_set_can_peer(swant, osdmap, out);
+ }
+
uint64_t target_max_bytes = 0; ///< tiering: target max pool size
uint64_t target_max_objects = 0; ///< tiering: target max pool size
WRITE_CLASS_ENCODER(pg_fast_info_t)
-class OSDMap;
/**
* PastIntervals -- information needed to determine the PriorSet and
* the might_have_unfound set
bool new_sort_bitwise,
bool old_recovery_deletes,
bool new_recovery_deletes,
+ uint32_t old_crush_count,
+ uint32_t new_crush_count,
+ uint32_t old_crush_target,
+ uint32_t new_crush_target,
+ uint32_t old_crush_barrier,
+ uint32_t new_crush_barrier,
+ int32_t old_crush_member,
+ int32_t new_crush_member,
pg_t pgid
);
sort_bitwise,
t->recovery_deletes,
recovery_deletes,
+ t->peering_crush_bucket_count,
+ pi->peering_crush_bucket_count,
+ t->peering_crush_bucket_target,
+ pi->peering_crush_bucket_target,
+ t->peering_crush_bucket_barrier,
+ pi->peering_crush_bucket_barrier,
+ t->peering_crush_mandatory_member,
+ pi->peering_crush_mandatory_member,
prev_pgid)) {
force_resend = true;
}
t->actual_pgid = spgid;
t->sort_bitwise = sort_bitwise;
t->recovery_deletes = recovery_deletes;
+ t->peering_crush_bucket_count = pi->peering_crush_bucket_count;
+ t->peering_crush_bucket_target = pi->peering_crush_bucket_target;
+ t->peering_crush_bucket_barrier = pi->peering_crush_bucket_barrier;
+ t->peering_crush_mandatory_member = pi->peering_crush_mandatory_member;
ldout(cct, 10) << __func__ << " "
<< " raw pgid " << pgid << " -> actual " << t->actual_pgid
<< " acting " << acting
int min_size = -1; ///< the min size of the pool when were were last mapped
bool sort_bitwise = false; ///< whether the hobject_t sort order is bitwise
bool recovery_deletes = false; ///< whether the deletes are performed during recovery instead of peering
+ uint32_t peering_crush_bucket_count = 0;
+ uint32_t peering_crush_bucket_target = 0;
+ uint32_t peering_crush_bucket_barrier = 0;
+ int32_t peering_crush_mandatory_member = 0;
bool used_replica = false;
bool paused = false;