From 2fad7d77721c91a32697c958f65cbbbe75766dbc Mon Sep 17 00:00:00 2001 From: xie xingguo Date: Tue, 26 Mar 2019 20:04:15 +0800 Subject: [PATCH] osd/PG: introduce all_missing_unfound helper We use pg_log.missing to track each peer's missing objects separately, whereas missing_loc records the location of all (probably existing) good copies for both primary and replicas' missing objects. Hence an item from pg_log.missing or missing_loc is of different meaning and is not comparable. During recovery, we can skip recovering primary only if - primary is good, e.g., has no missing at all - or all of the primary's missing objects do exist in missing_loc and are currently unfound Obviously, the current "all missing objects are unfound" checker is broken. Fix by introducing an independent all_missing_unfound helper to make the count of missing objects that are currently unfound correct. Fixes: http://tracker.ceph.com/issues/38784 Signed-off-by: xie xingguo (cherry picked from commit d9497139a6f516ce015bb43a2e7f8958638cf8f8) --- src/osd/PG.h | 10 ++++++++++ src/osd/PrimaryLogPG.cc | 7 +++---- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/src/osd/PG.h b/src/osd/PG.h index dc64cedb15e58..87de99eb0a3f5 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -1259,6 +1259,16 @@ public: uint64_t get_num_unfound() const { return missing_loc.num_unfound(); } + bool all_missing_unfound() const { + const auto& missing = pg_log.get_missing(); + if (!missing.have_missing()) + return false; + for (auto& m : missing.get_items()) { + if (!missing_loc.is_unfound(m.first)) + return false; + } + return true; + } virtual void check_local() = 0; diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index eb937f99b154d..d1b5953e22e9e 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -11549,15 +11549,14 @@ bool PrimaryLogPG::start_recovery_ops( const auto &missing = pg_log.get_missing(); - unsigned int num_missing = missing.num_missing(); uint64_t num_unfound = get_num_unfound(); - if (num_missing == 0) { + if (!missing.have_missing()) { info.last_complete = info.last_update; } - if (num_missing == num_unfound) { - // All of the missing objects we have are unfound. + if (!missing.have_missing() || // Primary does not have missing + all_missing_unfound()) { // or all of the missing objects are unfound. // Recover the replicas. started = recover_replicas(max, handle); } -- 2.39.5