From: xie xingguo Date: Tue, 26 Mar 2019 12:04:15 +0000 (+0800) Subject: osd/PG: introduce all_missing_unfound helper X-Git-Tag: v13.2.6~17^2~1 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=47556b48b4d8a779cab35e201111ce10ddc81e1c;p=ceph.git osd/PG: introduce all_missing_unfound helper We use pg_log.missing to track each peer's missing objects separately, whereas missing_loc records the location of all (probably existing) good copies for both primary and replicas' missing objects. Hence an item from pg_log.missing or missing_loc is of different meaning and is not comparable. During recovery, we can skip recovering primary only if - primary is good, e.g., has no missing at all - or all of the primary's missing objects do exist in missing_loc and are currently unfound Obviously, the current "all missing objects are unfound" checker is broken. Fix by introducing an independent all_missing_unfound helper to make the count of missing objects that are currently unfound correct. Fixes: http://tracker.ceph.com/issues/38784 Signed-off-by: xie xingguo (cherry picked from commit d9497139a6f516ce015bb43a2e7f8958638cf8f8) --- diff --git a/src/osd/PG.h b/src/osd/PG.h index cf23ab41c1979..7ee3ecfe7a2a5 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -1510,6 +1510,16 @@ protected: uint64_t get_num_unfound() const { return missing_loc.num_unfound(); } + bool all_missing_unfound() const { + const auto& missing = pg_log.get_missing(); + if (!missing.have_missing()) + return false; + for (auto& m : missing.get_items()) { + if (!missing_loc.is_unfound(m.first)) + return false; + } + return true; + } virtual void check_local() = 0; diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index 4f0bce054dd01..7eb43c2891384 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -12280,15 +12280,14 @@ bool PrimaryLogPG::start_recovery_ops( const auto &missing = pg_log.get_missing(); - unsigned int num_missing = missing.num_missing(); uint64_t num_unfound = get_num_unfound(); - if (num_missing == 0) { + if (!missing.have_missing()) { info.last_complete = info.last_update; } - if (num_missing == num_unfound) { - // All of the missing objects we have are unfound. + if (!missing.have_missing() || // Primary does not have missing + all_missing_unfound()) { // or all of the missing objects are unfound. // Recover the replicas. started = recover_replicas(max, handle, &recovery_started); }