OPTION(osd_max_rep, OPT_INT, 10)
OPTION(osd_pool_default_crush_rule, OPT_INT, 0)
OPTION(osd_pool_default_size, OPT_INT, 2)
+OPTION(osd_pool_default_min_size, OPT_INT, 2)
OPTION(osd_pool_default_pg_num, OPT_INT, 8)
OPTION(osd_pool_default_pgp_num, OPT_INT, 8)
OPTION(osd_map_dedup, OPT_BOOL, true)
pending_inc.new_pools[pool].type = pg_pool_t::TYPE_REP;
pending_inc.new_pools[pool].size = g_conf->osd_pool_default_size;
+ pending_inc.new_pools[pool].min_size =
+ g_conf->osd_pool_default_min_size;
if (crush_rule >= 0)
pending_inc.new_pools[pool].crush_ruleset = crush_rule;
else
if (pending_inc.new_pools.count(pool) == 0)
pending_inc.new_pools[pool] = *p;
pending_inc.new_pools[pool].size = n;
+ if (n < p->min_size)
+ pending_inc.new_pools[pool].min_size = n;
pending_inc.new_pools[pool].last_change = pending_inc.epoch;
ss << "set pool " << pool << " size to " << n;
getline(ss, rs);
paxos->wait_for_commit(new Monitor::C_Command(mon, m, 0, rs, paxos->get_version()));
return true;
+ } else if (m->cmd[4] == "min_size") {
+ if (pending_inc.new_pools.count(pool) == 0)
+ pending_inc.new_pools[pool] = *p;
+ pending_inc.new_pools[pool].min_size = n;
+ pending_inc.new_pools[pool].last_change = pending_inc.epoch;
+ ss << "set pool " << pool << " min_size to " << n;
+ getline(ss, rs);
+ paxos->wait_for_commit(new Monitor::C_Command(mon, m, 0, rs, paxos->get_version()));
+ return true;
} else if (m->cmd[4] == "crash_replay_interval") {
if (pending_inc.new_pools.count(pool) == 0)
pending_inc.new_pools[pool] = *p;
p.same_interval_since,
pg->info.history.last_epoch_clean,
cur_map, last_map,
+ pg->info.pgid.pool(),
&pg->past_intervals,
&debug);
if (new_interval) {
int64_t pool = ++pool_max;
pools[pool].type = pg_pool_t::TYPE_REP;
pools[pool].size = cct->_conf->osd_pool_default_size;
+ pools[pool].min_size = cct->_conf->osd_pool_default_min_size;
pools[pool].crush_ruleset = p->first;
pools[pool].object_hash = CEPH_STR_HASH_RJENKINS;
pools[pool].set_pg_num(poolbase << pg_bits);
int64_t pool = ++pool_max;
pools[pool].type = pg_pool_t::TYPE_REP;
pools[pool].size = cct->_conf->osd_pool_default_size;
+ pools[pool].min_size = cct->_conf->osd_pool_default_min_size;
pools[pool].crush_ruleset = p->first;
pools[pool].object_hash = CEPH_STR_HASH_RJENKINS;
pools[pool].set_pg_num((numosd + 1) << pg_bits);
info.history.last_epoch_clean,
cur_map,
last_map,
+ info.pgid.pool(),
&past_intervals,
&debug);
if (new_interval) {
return false;
}
+ if (want.size() < pool.info.min_size) {
+ want_acting.clear();
+ return false;
+ }
+
if (want != acting) {
dout(10) << "choose_acting want " << want << " != acting " << acting
<< ", requesting pg_temp change" << dendl;
info.history.same_interval_since,
info.history.last_epoch_clean,
osdmap,
- lastmap, &past_intervals);
+ lastmap, info.pgid.pool(), &past_intervals);
if (new_interval) {
dout(10) << " noting past " << past_intervals.rbegin()->second << dendl;
dirty_info = true;
pg->update_stats();
}
+boost::statechart::result PG::RecoveryState::Incomplete::react(const AdvMap &advmap) {
+ PG *pg = context< RecoveryMachine >().pg;
+ int64_t poolnum = pg->info.pgid.pool();
+
+ // Reset if min_size changed, pg might now be able to go active
+ if (advmap.lastmap->get_pools().find(poolnum)->second.min_size !=
+ advmap.osdmap->get_pools().find(poolnum)->second.min_size) {
+ post_event(advmap);
+ return transit< Reset >();
+ }
+
+ return forward_event();
+}
+
void PG::RecoveryState::Incomplete::exit()
{
context< RecoveryMachine >().log_exit(state_name, enter_time);
};
struct Incomplete : boost::statechart::state< Incomplete, Peering>, NamedState {
+ typedef boost::mpl::list <
+ boost::statechart::custom_reaction< AdvMap >
+ > reactions;
Incomplete(my_context ctx);
+ boost::statechart::result react(const AdvMap &advmap);
void exit();
};
return;
}
- ENCODE_START(6, 5, bl);
+ ENCODE_START(7, 5, bl);
::encode(type, bl);
::encode(size, bl);
::encode(crush_ruleset, bl);
::encode(auid, bl);
::encode(flags, bl);
::encode(crash_replay_interval, bl);
+ ::encode(min_size, bl);
ENCODE_FINISH(bl);
}
void pg_pool_t::decode(bufferlist::iterator& bl)
{
- DECODE_START_LEGACY_COMPAT_LEN(6, 5, 5, bl);
+ DECODE_START_LEGACY_COMPAT_LEN(7, 5, 5, bl);
::decode(type, bl);
::decode(size, bl);
::decode(crush_ruleset, bl);
else
crash_replay_interval = 0;
}
+ if (struct_v >= 7) {
+ ::decode(min_size, bl);
+ } else {
+ min_size = MAX(size - 1, 1);
+ }
DECODE_FINISH(bl);
calc_pg_masks();
}
epoch_t last_epoch_clean,
OSDMapRef osdmap,
OSDMapRef lastmap,
+ int64_t pool_id,
map<epoch_t, pg_interval_t> *past_intervals,
std::ostream *out)
{
// remember past interval
- if (new_acting != old_acting || new_up != old_up) {
+ if (new_acting != old_acting || new_up != old_up ||
+ (!(lastmap->get_pools().count(pool_id))) ||
+ lastmap->get_pools().find(pool_id)->second.min_size !=
+ osdmap->get_pools().find(pool_id)->second.min_size) {
pg_interval_t& i = (*past_intervals)[same_interval_since];
i.first = same_interval_since;
i.last = osdmap->get_epoch() - 1;
i.acting = old_acting;
i.up = old_up;
- if (i.acting.size()) {
+ if (i.acting.size() >=
+ osdmap->get_pools().find(pool_id)->second.min_size) {
if (lastmap->get_up_thru(i.acting[0]) >= i.first &&
lastmap->get_up_from(i.acting[0]) <= i.first) {
i.maybe_went_rw = true;
uint64_t flags; /// FLAG_*
__u8 type; /// TYPE_*
- __u8 size; /// number of osds in each pg
+ __u8 size, min_size; /// number of osds in each pg
__u8 crush_ruleset; /// crush placement rule set
__u8 object_hash; /// hash mapping object name to ps
private:
int pg_num_mask, pgp_num_mask;
pg_pool_t()
- : flags(0), type(0), size(0), crush_ruleset(0), object_hash(0),
+ : flags(0), type(0), size(0), min_size(0),
+ crush_ruleset(0), object_hash(0),
pg_num(0), pgp_num(0),
last_change(0),
snap_seq(0), snap_epoch(0),
epoch_t last_epoch_clean, ///< [in] current
std::tr1::shared_ptr<const OSDMap> osdmap, ///< [in] current map
std::tr1::shared_ptr<const OSDMap> lastmap, ///< [in] last map
+ int64_t poolid, ///< [in] pool for pg
map<epoch_t, pg_interval_t> *past_intervals,///< [out] intervals
ostream *out = 0 ///< [out] debug ostream
);