From c7d92d1d3fe469f5e8e7c35185a670570c665029 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 10 Jan 2012 13:23:00 -0800 Subject: [PATCH] osd: fail to peer if interval lacks any !incomplete replicas We need at least one non-incomplete replica during a rw interval in order to peer. The backfilling/incomplete replicas get log entries, but not all object writes, so they are (mostly) excluded from the peering process (find_best_info(), in particular). We can't do this during the PriorSet calculation because we don't have their PG::Info yet. But, once we get it, we need to make sure at least one of the replicas during the last rw interval is not incomplete, or else we should mark the pg DOWN (just like the PriorSet calculation does). This logic mostly mirrors that of PriorSet, but additionally requires the replicas be !incomplete. Signed-off-by: Sage Weil --- src/osd/PG.cc | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 05a4f75bb95ce..e37da4b449752 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -4232,8 +4232,61 @@ boost::statechart::result PG::RecoveryState::GetInfo::react(const MNotifyRec& in get_infos(); } else { // are we done getting everything? - if (peer_info_requested.empty() && !prior_set->pg_down) + if (peer_info_requested.empty() && !prior_set->pg_down) { + /* + * make sure we have at least one !incomplete() osd from the + * last rw interval. the incomplete (backfilling) replicas + * get a copy of the log, but they don't get all the object + * updates, so they are insufficient to recover changes during + * that interval. + */ + if (pg->info.history.last_epoch_started) { + for (map::reverse_iterator p = pg->past_intervals.rbegin(); + p != pg->past_intervals.rend(); + ++p) { + if (p->first < pg->info.history.last_epoch_started) + break; + if (!p->second.maybe_went_rw) + continue; + Interval& interval = p->second; + dout(10) << " last maybe_went_rw interval was " << interval << dendl; + OSDMapRef osdmap = pg->get_osdmap(); + + /* + * this mirrors the PriorSet calculation: we wait if we + * don't have an up (AND !incomplete) node AND there are + * nodes down that might be usable. + */ + bool any_up_complete_now = false; + bool any_down_now = false; + for (unsigned i=0; iexists(o) || osdmap->get_info(o).lost_at > interval.first) + continue; // dne or lost + if (osdmap->is_up(o)) { + PG::Info *pinfo; + if (o == pg->osd->whoami) { + pinfo = &pg->info; + } else { + assert(pg->peer_info.count(o)); + pinfo = &pg->peer_info[o]; + } + if (!pinfo->is_incomplete()) + any_up_complete_now = true; + } else { + any_down_now = true; + } + } + if (!any_up_complete_now && any_down_now) { + dout(10) << " no osds up+complete from interval " << interval << dendl; + pg->state_set(PG_STATE_DOWN); + return discard_event(); + } + break; + } + } post_event(GotInfo()); + } } } return discard_event(); -- 2.39.5