{
dout(10) << "proc_master_log for osd." << from << ": "
<< olog << " " << omissing << dendl;
- assert(!is_active() && is_primary());
+ assert(!is_peered() && is_primary());
// merge log into our own log to build master log. no need to
// make any adjustments to their missing map; we are taking their
++num_want_acting;
}
- // This is a bit of a problem, if we allow the pg to go active with
- // want.size() < min_size, we won't consider the pg to have been
- // maybe_went_rw in build_prior.
- if (num_want_acting < pool.info.min_size) {
+ // We go incomplete if below min_size for ec_pools since backfill
+ // does not currently maintain rollbackability
+ // Otherwise, we will go "peered", but not "active"
+ if (num_want_acting < pool.info.min_size &&
+ (pool.info.ec_pool() ||
+ !(get_min_peer_features() & CEPH_FEATURE_OSD_MIN_SIZE_RECOVERY))) {
want_acting.clear();
+ dout(10) << "choose_acting failed, below min size" << dendl;
return false;
}
}
if (!(*recoverable_predicate)(have)) {
want_acting.clear();
+ dout(10) << "choose_acting failed, not recoverable" << dendl;
return false;
}
struct C_PG_ActivateCommitted : public Context {
PGRef pg;
epoch_t epoch;
- C_PG_ActivateCommitted(PG *p, epoch_t e)
- : pg(p), epoch(e) {}
+ epoch_t activation_epoch;
+ C_PG_ActivateCommitted(PG *p, epoch_t e, epoch_t ae)
+ : pg(p), epoch(e), activation_epoch(ae) {}
void finish(int r) {
- pg->_activate_committed(epoch);
+ pg->_activate_committed(epoch, activation_epoch);
}
};
void PG::activate(ObjectStore::Transaction& t,
- epoch_t query_epoch,
+ epoch_t activation_epoch,
list<Context*>& tfin,
map<int, map<spg_t,pg_query_t> >& query_map,
map<int,
pg_interval_map_t> > > *activator_map,
RecoveryCtx *ctx)
{
- assert(!is_active());
+ assert(!is_peered());
assert(scrubber.callbacks.empty());
assert(callbacks_for_degraded_object.empty());
// -- crash recovery?
- if (is_primary() &&
+ if (acting.size() >= pool.info.min_size &&
+ is_primary() &&
pool.info.crash_replay_interval > 0 &&
may_need_replay(get_osdmap())) {
replay_until = ceph_clock_now(cct);
send_notify = false;
- if (is_acting(pg_whoami))
- info.last_epoch_started = query_epoch;
+ if (is_primary()) {
+ // only update primary last_epoch_started if we will go active
+ if (acting.size() >= pool.info.min_size)
+ info.last_epoch_started = activation_epoch;
+ } else if (is_acting(pg_whoami)) {
+ // update last_epoch_started on acting replica to whatever the primary sent
+ info.last_epoch_started = activation_epoch;
+ }
const pg_missing_t &missing = pg_log.get_missing();
dirty_big_info = true; // maybe
// find out when we commit
- t.register_on_complete(new C_PG_ActivateCommitted(this, query_epoch));
+ t.register_on_complete(
+ new C_PG_ActivateCommitted(
+ this,
+ get_osdmap()->get_epoch(),
+ activation_epoch));
// initialize snap_trimq
if (is_primary()) {
pi.last_update = info.last_update;
pi.last_complete = info.last_update;
pi.last_backfill = hobject_t();
+ pi.last_epoch_started = info.last_epoch_started;
pi.history = info.history;
pi.hit_set = info.hit_set;
pi.stats.stats.clear();
publish_stats_to_osd();
}
-void PG::_activate_committed(epoch_t e)
+void PG::_activate_committed(epoch_t epoch, epoch_t activation_epoch)
{
lock();
- if (pg_has_reset_since(e)) {
- dout(10) << "_activate_committed " << e << ", that was an old interval" << dendl;
+ if (pg_has_reset_since(epoch)) {
+ dout(10) << "_activate_committed " << epoch
+ << ", that was an old interval" << dendl;
} else if (is_primary()) {
peer_activated.insert(pg_whoami);
- dout(10) << "_activate_committed " << e << " peer_activated now " << peer_activated
+ dout(10) << "_activate_committed " << epoch
+ << " peer_activated now " << peer_activated
<< " last_epoch_started " << info.history.last_epoch_started
<< " same_interval_since " << info.history.same_interval_since << dendl;
assert(!actingbackfill.empty());
if (peer_activated.size() == actingbackfill.size())
all_activated_and_committed();
} else {
- dout(10) << "_activate_committed " << e << " telling primary" << dendl;
- MOSDPGInfo *m = new MOSDPGInfo(e);
+ dout(10) << "_activate_committed " << epoch << " telling primary" << dendl;
+ MOSDPGInfo *m = new MOSDPGInfo(epoch);
pg_notify_t i = pg_notify_t(
get_primary().shard, pg_whoami.shard,
get_osdmap()->get_epoch(),
get_osdmap()->get_epoch(),
info);
- i.info.history.last_epoch_started = e;
+
+ i.info.history.last_epoch_started = activation_epoch;
+ if (acting.size() >= pool.info.min_size) {
+ state_set(PG_STATE_ACTIVE);
+ } else {
+ state_set(PG_STATE_PEERED);
+ }
+
m->pg_list.push_back(make_pair(i, pg_interval_map_t()));
osd->send_message_osd_cluster(get_primary().osd, m, get_osdmap()->get_epoch());
- state_set(PG_STATE_ACTIVE);
// waiters
if (flushes_in_progress == 0) {
requeue_ops(waiting_for_peered);
info.stats.stats.calc_copies(MAX(target, actingbackfill.size()));
info.stats.stats.sum.num_objects_degraded = 0;
info.stats.stats.sum.num_objects_misplaced = 0;
- if ((is_degraded() || is_undersized() || !is_clean()) && is_active()) {
+ if ((is_degraded() || is_undersized() || !is_clean()) && is_peered()) {
// NOTE: we only generate copies, degraded, unfound values for
// the summation, not individual stat categories.
uint64_t num_objects = info.stats.stats.sum.num_objects;
// deactivate.
state_clear(PG_STATE_ACTIVE);
+ state_clear(PG_STATE_PEERED);
state_clear(PG_STATE_DOWN);
state_clear(PG_STATE_RECOVERY_WAIT);
state_clear(PG_STATE_RECOVERING);
<< "/" << pg.past_intervals.size();
}
- if (pg.is_active()) {
+ if (pg.is_peered()) {
if (pg.last_update_ondisk != pg.info.last_update)
out << " luod=" << pg.last_update_ondisk;
if (pg.last_update_applied != pg.info.last_update)
context< RecoveryMachine >().log_enter(state_name);
PG *pg = context< RecoveryMachine >().pg;
- assert(!pg->is_active());
+ assert(!pg->is_peered());
assert(!pg->is_peering());
assert(pg->is_primary());
pg->state_set(PG_STATE_PEERING);
// may be telling us they have activated (and committed) but we can't
// share that until _everyone_ does the same.
if (pg->is_actingbackfill(infoevt.from)) {
- assert(pg->info.history.last_epoch_started <
- pg->info.history.same_interval_since);
- assert(infoevt.info.history.last_epoch_started >=
- pg->info.history.same_interval_since);
dout(10) << " peer osd." << infoevt.from << " activated and committed"
<< dendl;
pg->peer_activated.insert(infoevt.from);
pg->state_clear(PG_STATE_ACTIVATING);
pg->state_clear(PG_STATE_CREATING);
- pg->state_set(PG_STATE_ACTIVE);
+ if (pg->acting.size() >= pg->pool.info.min_size) {
+ pg->state_set(PG_STATE_ACTIVE);
+ } else {
+ pg->state_set(PG_STATE_PEERED);
+ }
// info.last_epoch_started is set during activate()
pg->info.history.last_epoch_started = pg->info.last_epoch_started;
pg->check_local();
// waiters
- if (!pg->is_replay() && pg->flushes_in_progress == 0) {
+ if (pg->flushes_in_progress == 0) {
pg->requeue_ops(pg->waiting_for_peered);
}
PG *pg = context< RecoveryMachine >().pg;
map<int, map<spg_t, pg_query_t> > query_map;
pg->activate(*context< RecoveryMachine >().get_cur_transaction(),
- actevt.query_epoch,
+ actevt.activation_epoch,
*context< RecoveryMachine >().get_on_safe_context_list(),
query_map, NULL, NULL);
dout(10) << "Activate Finished" << dendl;
context< RecoveryMachine >().log_enter(state_name);
PG *pg = context< RecoveryMachine >().pg;
- assert(!pg->is_active());
+ assert(!pg->is_peered());
assert(!pg->is_peering());
assert(!pg->is_primary());
pg->start_flush(
assert(pg->pg_log.get_head() == pg->info.last_update);
- post_event(Activate(logevt.msg->get_epoch()));
+ post_event(Activate(logevt.msg->info.last_epoch_started));
return transit<ReplicaActive>();
}
assert(infoevt.info.last_update == pg->info.last_update);
assert(pg->pg_log.get_head() == pg->info.last_update);
- post_event(Activate(infoevt.msg_epoch));
+ post_event(Activate(infoevt.info.last_epoch_started));
return transit<ReplicaActive>();
}