From 1362d3e10dc6fc28063d7a0e01fd611f03f1d305 Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Fri, 16 Dec 2011 18:04:32 -0800 Subject: [PATCH] calc_acting: Prefer up[0] as primary if possible Previously, we could get into a state where although up[0] has been fully backfilled, acting[0] could be selected as a primary if it is able to pull another peer into the acting set. This also collects the logic of choosing the best info into a helper function. Signed-off-by: Samuel Just --- src/osd/PG.cc | 142 ++++++++++++++++++++++++-------------------------- src/osd/PG.h | 1 + 2 files changed, 69 insertions(+), 74 deletions(-) diff --git a/src/osd/PG.cc b/src/osd/PG.cc index bedb961336bd5..008cb05c0402f 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -880,115 +880,109 @@ void PG::clear_primary_state() osd->snap_trim_wq.dequeue(this); } -/** - * calculate the desired acting set. - * - * Choose an appropriate acting set. Prefer up[0], unless it is - * incomplete, or another osd has a longer tail that allows us to - * bring other up nodes up to date. - */ -void PG::calc_acting(int& newest_update_osd_id, vector& want) const +map::const_iterator PG::find_best_info(const map &infos) const { - map all_info(peer_info.begin(), peer_info.end()); - all_info[osd->whoami] = info; - - for (map::iterator p = all_info.begin(); p != all_info.end(); ++p) { - dout(10) << "calc_acting osd." << p->first << " " << p->second << dendl; - } - + map::const_iterator best = infos.end(); // find osd with newest last_update. if there are multiples, prefer // - a longer tail, if it brings another peer into log contiguity // - the current primary - map::iterator newest_update_osd = all_info.find(osd->whoami); - for (map::iterator p = all_info.begin(); p != all_info.end(); ++p) { - if (p->second.last_update < newest_update_osd->second.last_update) + for (map::const_iterator p = infos.begin(); + p != infos.end(); + ++p) { + if (best == infos.end()) { + best = p; + continue; + } + // Prefer newer last_update + if (p->second.last_update < best->second.last_update) continue; - if (p->second.last_update > newest_update_osd->second.last_update) { - newest_update_osd = p; + if (p->second.last_update > best->second.last_update) { + best = p; continue; } - // prefer longer tail, if it will bring another peer in log contiguity - bool worse = false; - for (map::iterator q = all_info.begin(); q != all_info.end(); ++q) { + // Prefer longer tail if it brings another peer into contiguity + for (map::const_iterator q = infos.begin(); + q != infos.end(); + ++q) { if (q->second.is_incomplete()) continue; // don't care about log contiguity - if (q->second.last_update < newest_update_osd->second.log_tail && + if (q->second.last_update < best->second.log_tail && q->second.last_update >= p->second.log_tail) { dout(10) << "calc_acting prefer osd." << p->first << " because it brings osd." << q->first << " into log contiguity" << dendl; - newest_update_osd = p; + best = p; continue; } if (q->second.last_update < p->second.log_tail && - q->second.last_update >= newest_update_osd->second.log_tail) { - worse = true; - break; + q->second.last_update >= best->second.log_tail) { + continue; } } - if (worse) - continue; - // prefer current primary (usually the caller), all things being equal if (p->first == acting[0]) { dout(10) << "calc_acting prefer osd." << p->first << " because it is current primary" << dendl; - newest_update_osd = p; + best = p; continue; } } + return best; +} + +/** + * calculate the desired acting set. + * + * Choose an appropriate acting set. Prefer up[0], unless it is + * incomplete, or another osd has a longer tail that allows us to + * bring other up nodes up to date. + */ +void PG::calc_acting(int& newest_update_osd_id, vector& want) const +{ + map all_info(peer_info.begin(), peer_info.end()); + all_info[osd->whoami] = info; + + for (map::iterator p = all_info.begin(); p != all_info.end(); ++p) { + dout(10) << "calc_acting osd." << p->first << " " << p->second << dendl; + } + + map::const_iterator newest_update_osd = find_best_info(all_info); + dout(10) << "calc_acting newest update on osd." << newest_update_osd->first << " with " << newest_update_osd->second << dendl; newest_update_osd_id = newest_update_osd->first; // select primary - map::iterator primary; - if (up.size()) { + map::const_iterator primary; + if (up.size() && + !all_info[up[0]].is_incomplete() && + all_info[up[0]].last_update >= newest_update_osd->second.log_tail) { + dout(10) << "up[0](osd." << up[0] << ") selected as primary" << dendl; primary = all_info.find(up[0]); // prefer up[0], all thing being equal + } else if (!newest_update_osd->second.is_incomplete()) { + dout(10) << "up[0] needs backfill, osd." << newest_update_osd_id + << " selected as primary instead" << dendl; + primary = newest_update_osd; } else { - assert(acting.size()); - primary = all_info.find(acting[0]); // or the status quo - } - for (map::iterator p = all_info.begin(); p != all_info.end(); ++p) { - if (p == primary) - continue; - // require complete - if (p->second.is_incomplete()) - continue; - if (primary->second.is_incomplete()) { - dout(10) << "calc_acting prefer osd." << p->first << " because not incomplete" << dendl; - primary = p; - continue; - } - // require log continuity with newest_update_osd - if (p->second.last_update < newest_update_osd->second.log_tail) - continue; - if (primary->second.last_update < newest_update_osd->second.log_tail) { - dout(10) << "calc_acting prefer osd." << p->first - << " because log contiguous with newest osd." << newest_update_osd->first << dendl; - primary = p; - continue; + map complete_infos; + for (map::iterator i = all_info.begin(); + i != all_info.end(); + ++i) { + if (!i->second.is_incomplete()) + complete_infos.insert(*i); } - // prefer longer tail, if it will bring another peer in log contiguity - if (p->second.log_tail < primary->second.log_tail) { - for (map::iterator q = all_info.begin(); q != all_info.end(); ++q) { - if (!q->second.is_incomplete() && - q->second.last_update < primary->second.log_tail && - q->second.last_update >= p->second.log_tail) { - dout(10) << "calc_acting prefer osd." << p->first - << " because it brings osd." << q->first << " into log contiguity" << dendl; - primary = p; - continue; - } - } + primary = find_best_info(complete_infos); + if (primary == complete_infos.end() || + primary->second.last_update < newest_update_osd->second.log_tail) { + dout(10) << "calc_acting no acceptable primary, reverting to up " << up << dendl; + want = up; + return; + } else { + dout(10) << "up[0] and newest_update_osd need backfill, osd." + << newest_update_osd_id + << " selected as primary instead" << dendl; } } - if (primary->second.is_incomplete() || - primary->second.last_update < newest_update_osd->second.log_tail) { - dout(10) << "calc_acting no acceptable primary, reverting to up " << up << dendl; - want = up; - return; - } dout(10) << "calc_acting primary is osd." << primary->first << " with " << primary->second << dendl; diff --git a/src/osd/PG.h b/src/osd/PG.h index 9171c8a96a26b..703217106009a 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -1513,6 +1513,7 @@ public: void trim_write_ahead(); + map::const_iterator find_best_info(const map &infos) const; void calc_acting(int& newest_update_osd, vector& want) const; bool choose_acting(int& newest_update_osd); void build_might_have_unfound(); -- 2.39.5