derr << "failed to list pgs: " << cpp_strerror(-r) << dendl;
}
+ int num = 0;
for (vector<coll_t>::iterator it = ls.begin();
it != ls.end();
++it) {
dout(10) << __func__ << " loaded " << *pg << " " << pg->pg_log.get_log()
<< dendl;
pg->unlock();
+ ++num;
}
- {
- RWLock::RLocker l(pg_map_lock);
- dout(0) << __func__ << " opened " << pg_map.size() << " pgs" << dendl;
- }
-
- build_past_intervals_parallel();
+ dout(0) << __func__ << " opened " << num << " pgs" << dendl;
}
-/*
- * build past_intervals efficiently on old, degraded, and buried
- * clusters. this is important for efficiently catching up osds that
- * are way behind on maps to the current cluster state.
- *
- * this is a parallel version of PG::generate_past_intervals().
- * follow the same logic, but do all pgs at the same time so that we
- * can make a single pass across the osdmap history.
- */
-void OSD::build_past_intervals_parallel()
-{
- struct pistate {
- epoch_t start, end;
- vector<int> old_acting, old_up;
- epoch_t same_interval_since;
- int primary;
- int up_primary;
- };
- map<PG*,pistate> pis;
-
- // calculate junction of map range
- epoch_t end_epoch = superblock.oldest_map;
- epoch_t cur_epoch = superblock.newest_map;
- {
- RWLock::RLocker l(pg_map_lock);
- for (ceph::unordered_map<spg_t, PG*>::iterator i = pg_map.begin();
- i != pg_map.end();
- ++i) {
- PG *pg = i->second;
-
- auto rpib = pg->get_required_past_interval_bounds(
- pg->info,
- superblock.oldest_map);
- if (rpib.first >= rpib.second && pg->past_intervals.empty()) {
- if (pg->info.history.same_interval_since == 0) {
- pg->info.history.same_interval_since = rpib.second;
- }
- continue;
- } else {
- auto apib = pg->past_intervals.get_bounds();
- if (apib.second >= rpib.second &&
- apib.first <= rpib.first) {
- if (pg->info.history.same_interval_since == 0) {
- pg->info.history.same_interval_since = rpib.second;
- }
- continue;
- }
- }
-
- dout(10) << pg->pg_id << " needs " << rpib.first << "-"
- << rpib.second << dendl;
- pistate& p = pis[pg];
- p.start = rpib.first;
- p.end = rpib.second;
- p.same_interval_since = 0;
-
- if (rpib.first < cur_epoch)
- cur_epoch = rpib.first;
- if (rpib.second > end_epoch)
- end_epoch = rpib.second;
- }
- }
- if (pis.empty()) {
- dout(10) << __func__ << " nothing to build" << dendl;
- return;
- }
-
- dout(1) << __func__ << " over " << cur_epoch << "-" << end_epoch << dendl;
- assert(cur_epoch <= end_epoch);
-
- OSDMapRef cur_map, last_map;
- for ( ; cur_epoch <= end_epoch; cur_epoch++) {
- dout(10) << __func__ << " epoch " << cur_epoch << dendl;
- last_map = cur_map;
- cur_map = get_map(cur_epoch);
-
- for (map<PG*,pistate>::iterator i = pis.begin(); i != pis.end(); ++i) {
- PG *pg = i->first;
- pistate& p = i->second;
-
- if (cur_epoch < p.start || cur_epoch > p.end)
- continue;
-
- vector<int> acting, up;
- int up_primary;
- int primary;
- pg_t pgid = pg->pg_id.pgid;
- if (p.same_interval_since && last_map->get_pools().count(pgid.pool()))
- pgid = pgid.get_ancestor(last_map->get_pg_num(pgid.pool()));
- cur_map->pg_to_up_acting_osds(
- pgid, &up, &up_primary, &acting, &primary);
-
- if (p.same_interval_since == 0) {
- dout(10) << __func__ << " epoch " << cur_epoch << " pg " << pg->pg_id
- << " first map, acting " << acting
- << " up " << up << ", same_interval_since = " << cur_epoch << dendl;
- p.same_interval_since = cur_epoch;
- p.old_up = up;
- p.old_acting = acting;
- p.primary = primary;
- p.up_primary = up_primary;
- continue;
- }
- assert(last_map);
-
- boost::scoped_ptr<IsPGRecoverablePredicate> recoverable(
- pg->get_is_recoverable_predicate());
- std::stringstream debug;
- bool new_interval = PastIntervals::check_new_interval(
- p.primary,
- primary,
- p.old_acting, acting,
- p.up_primary,
- up_primary,
- p.old_up, up,
- p.same_interval_since,
- pg->info.history.last_epoch_clean,
- cur_map, last_map,
- pgid,
- recoverable.get(),
- &pg->past_intervals,
- &debug);
- if (new_interval) {
- dout(10) << __func__ << " epoch " << cur_epoch << " pg " << pg->pg_id
- << " " << debug.str() << dendl;
- p.old_up = up;
- p.old_acting = acting;
- p.primary = primary;
- p.up_primary = up_primary;
- p.same_interval_since = cur_epoch;
- }
- }
- }
-
- // Now that past_intervals have been recomputed let's fix the same_interval_since
- // if it was cleared by import.
- for (map<PG*,pistate>::iterator i = pis.begin(); i != pis.end(); ++i) {
- PG *pg = i->first;
- pistate& p = i->second;
-
- if (pg->info.history.same_interval_since == 0) {
- assert(p.same_interval_since);
- dout(10) << __func__ << " fix same_interval_since " << p.same_interval_since << " pg " << *pg << dendl;
- dout(10) << __func__ << " past_intervals " << pg->past_intervals << dendl;
- // Fix it
- pg->info.history.same_interval_since = p.same_interval_since;
- }
- }
-
- // write info only at the end. this is necessary because we check
- // whether the past_intervals go far enough back or forward in time,
- // but we don't check for holes. we could avoid it by discarding
- // the previous past_intervals and rebuilding from scratch, or we
- // can just do this and commit all our work at the end.
- ObjectStore::Transaction t;
- int num = 0;
- for (map<PG*,pistate>::iterator i = pis.begin(); i != pis.end(); ++i) {
- PG *pg = i->first;
- pg->lock();
- pg->dirty_big_info = true;
- pg->dirty_info = true;
- pg->write_if_dirty(t);
- pg->unlock();
-
- // don't let the transaction get too big
- if (++num >= cct->_conf->osd_target_transaction_size) {
- store->apply_transaction(service.meta_osr.get(), std::move(t));
- t = ObjectStore::Transaction();
- num = 0;
- }
- }
- if (!t.empty())
- store->apply_transaction(service.meta_osr.get(), std::move(t));
-}
-
/*
* look up a pg. if we have it, great. if not, consider creating it IF the pg mapping
* hasn't changed since the given epoch and we are the primary.