From 4c617ecf1cf6f25cca42e5d59fa162dbc60f4de8 Mon Sep 17 00:00:00 2001 From: Neha Ojha Date: Wed, 24 Apr 2019 19:15:27 -0700 Subject: [PATCH] osd/PG: do not use approx_missing_objects pre-nautilus We changed async recovery cost calculation in nautilus to also take into account approx_missing_objects in ab241bf7e927cda2d0ed1698383d18dc4a4b601c This commit depends on https://github.com/ceph/ceph/pull/23663, hence wasn't backported to mimic. Mimic only uses the difference in length of logs as the cost. Due to this, the same OSD might have different costs in a mixed mimic and nautilus(or above) cluster. This can lead to choose_acting() cycling between OSDs, when trying to select the acting set and async_recovery_targets. Fixes: https://tracker.ceph.com/issues/39441 Signed-off-by: Neha Ojha --- src/osd/PG.cc | 63 ++++++++++++++++++++++++++++++++++----------------- src/osd/PG.h | 6 +++-- 2 files changed, 46 insertions(+), 23 deletions(-) diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 88219371434..e59397eb323 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -1553,7 +1553,8 @@ bool PG::recoverable_and_ge_min_size(const vector &want) const void PG::choose_async_recovery_ec(const map &all_info, const pg_info_t &auth_info, vector *want, - set *async_recovery) const + set *async_recovery, + const OSDMapRef osdmap) const { set > candidates_by_cost; for (uint8_t i = 0; i < want->size(); ++i) { @@ -1580,14 +1581,21 @@ void PG::choose_async_recovery_ec(const map &all_info, // past the authoritative last_update the same as those equal to it. version_t auth_version = auth_info.last_update.version; version_t candidate_version = shard_info.last_update.version; - auto approx_missing_objects = - shard_info.stats.stats.sum.num_objects_missing; - if (auth_version > candidate_version) { - approx_missing_objects += auth_version - candidate_version; - } - if (static_cast(approx_missing_objects) > - cct->_conf.get_val("osd_async_recovery_min_cost")) { - candidates_by_cost.emplace(approx_missing_objects, shard_i); + if (HAVE_FEATURE(osdmap->get_up_osd_features(), SERVER_NAUTILUS)) { + auto approx_missing_objects = + shard_info.stats.stats.sum.num_objects_missing; + if (auth_version > candidate_version) { + approx_missing_objects += auth_version - candidate_version; + } + if (static_cast(approx_missing_objects) > + cct->_conf.get_val("osd_async_recovery_min_cost")) { + candidates_by_cost.emplace(approx_missing_objects, shard_i); + } + } else { + if (auth_version > candidate_version && + (auth_version - candidate_version) > cct->_conf.get_val("osd_async_recovery_min_cost")) { + candidates_by_cost.insert(make_pair(auth_version - candidate_version, shard_i)); + } } } @@ -1612,7 +1620,8 @@ void PG::choose_async_recovery_ec(const map &all_info, void PG::choose_async_recovery_replicated(const map &all_info, const pg_info_t &auth_info, vector *want, - set *async_recovery) const + set *async_recovery, + const OSDMapRef osdmap) const { set > candidates_by_cost; for (auto osd_num : *want) { @@ -1631,16 +1640,28 @@ void PG::choose_async_recovery_replicated(const map &all_ // logs plus historical missing objects as the cost of recovery version_t auth_version = auth_info.last_update.version; version_t candidate_version = shard_info.last_update.version; - auto approx_missing_objects = - shard_info.stats.stats.sum.num_objects_missing; - if (auth_version > candidate_version) { - approx_missing_objects += auth_version - candidate_version; + if (HAVE_FEATURE(osdmap->get_up_osd_features(), SERVER_NAUTILUS)) { + auto approx_missing_objects = + shard_info.stats.stats.sum.num_objects_missing; + if (auth_version > candidate_version) { + approx_missing_objects += auth_version - candidate_version; + } else { + approx_missing_objects += candidate_version - auth_version; + } + if (static_cast(approx_missing_objects) > + cct->_conf.get_val("osd_async_recovery_min_cost")) { + candidates_by_cost.emplace(approx_missing_objects, shard_i); + } } else { - approx_missing_objects += candidate_version - auth_version; - } - if (static_cast(approx_missing_objects) > - cct->_conf.get_val("osd_async_recovery_min_cost")) { - candidates_by_cost.emplace(approx_missing_objects, shard_i); + size_t approx_entries; + if (auth_version > candidate_version) { + approx_entries = auth_version - candidate_version; + } else { + approx_entries = candidate_version - auth_version; + } + if (approx_entries > cct->_conf.get_val("osd_async_recovery_min_cost")) { + candidates_by_cost.insert(make_pair(approx_entries, shard_i)); + } } } @@ -1758,9 +1779,9 @@ bool PG::choose_acting(pg_shard_t &auth_log_shard_id, set want_async_recovery; if (HAVE_FEATURE(get_osdmap()->get_up_osd_features(), SERVER_MIMIC)) { if (pool.info.is_erasure()) { - choose_async_recovery_ec(all_info, auth_log_shard->second, &want, &want_async_recovery); + choose_async_recovery_ec(all_info, auth_log_shard->second, &want, &want_async_recovery, get_osdmap()); } else { - choose_async_recovery_replicated(all_info, auth_log_shard->second, &want, &want_async_recovery); + choose_async_recovery_replicated(all_info, auth_log_shard->second, &want, &want_async_recovery, get_osdmap()); } } if (want != acting) { diff --git a/src/osd/PG.h b/src/osd/PG.h index fd37b4e456e..4aab6e34801 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -1572,11 +1572,13 @@ protected: void choose_async_recovery_ec(const map &all_info, const pg_info_t &auth_info, vector *want, - set *async_recovery) const; + set *async_recovery, + const OSDMapRef osdmap) const; void choose_async_recovery_replicated(const map &all_info, const pg_info_t &auth_info, vector *want, - set *async_recovery) const; + set *async_recovery, + const OSDMapRef osdmap) const; bool recoverable_and_ge_min_size(const vector &want) const; bool choose_acting(pg_shard_t &auth_log_shard, -- 2.39.5