From: xie xingguo Date: Tue, 26 Mar 2019 12:04:15 +0000 (+0800) Subject: osd/PG: introduce all_missing_unfound helper X-Git-Tag: v15.0.0~37^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=d9497139a6f516ce015bb43a2e7f8958638cf8f8;p=ceph.git osd/PG: introduce all_missing_unfound helper We use pg_log.missing to track each peer's missing objects separately, whereas missing_loc records the location of all (probably existing) good copies for both primary and replicas' missing objects. Hence an item from pg_log.missing or missing_loc is of different meaning and is not comparable. During recovery, we can skip recovering primary only if - primary is good, e.g., has no missing at all - or all of the primary's missing objects do exist in missing_loc and are currently unfound Obviously, the current "all missing objects are unfound" checker is broken. Fix by introducing an independent all_missing_unfound helper to make the count of missing objects that are currently unfound correct. Fixes: http://tracker.ceph.com/issues/38784 Signed-off-by: xie xingguo --- diff --git a/src/osd/PG.h b/src/osd/PG.h index 38c5dab2f392..913822b84f37 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -1609,6 +1609,16 @@ protected: uint64_t get_num_unfound() const { return missing_loc.num_unfound(); } + bool all_missing_unfound() const { + const auto& missing = pg_log.get_missing(); + if (!missing.have_missing()) + return false; + for (auto& m : missing.get_items()) { + if (!missing_loc.is_unfound(m.first)) + return false; + } + return true; + } virtual void check_local() = 0; diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index 51713b294145..ae2f6d0c91fa 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -12443,15 +12443,14 @@ bool PrimaryLogPG::start_recovery_ops( const auto &missing = pg_log.get_missing(); - unsigned int num_missing = missing.num_missing(); uint64_t num_unfound = get_num_unfound(); - if (num_missing == 0) { + if (!missing.have_missing()) { info.last_complete = info.last_update; } - if (num_missing == num_unfound) { - // All of the missing objects we have are unfound. + if (!missing.have_missing() || // Primary does not have missing + all_missing_unfound()) { // or all of the missing objects are unfound. // Recover the replicas. started = recover_replicas(max, handle, &recovery_started); }