From: xie xingguo Date: Mon, 30 Jul 2018 10:56:56 +0000 (+0800) Subject: osd/PrimaryLogPG: fix potential pg-log overtrimming X-Git-Tag: v14.0.1~214^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=3654d56985c67d15506fa37b56ef5b0c04e01a65;p=ceph.git osd/PrimaryLogPG: fix potential pg-log overtrimming In https://github.com/ceph/ceph/pull/21580 I set a trap to catch some wired and random segmentfaults and in a recent QA run I was able to observe it was successfully triggered by one of the test case, see: ``` http://qa-proxy.ceph.com/teuthology/xxg-2018-07-30_05:25:06-rados-wip-hb-peers-distro-basic-smithi/2837916/teuthology.log ``` The root cause is that there might be holes on log versions, thus the approx_size() method should (almost) always overestimate the actual number of log entries. As a result, we might be at the risk of overtrimming log entries. https://github.com/ceph/ceph/pull/18338 reveals a probably easier way to fix the above problem but unfortunately it also can cause big performance regression and hence comes this pr.. Signed-off-by: xie xingguo --- diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index 0577e0bf6324b..da02a4398bd2c 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -1638,19 +1638,30 @@ void PrimaryLogPG::calc_trim_to() cct->_conf->osd_pg_log_trim_max >= cct->_conf->osd_pg_log_trim_min) { return; } - list::const_iterator it = pg_log.get_log().log.begin(); - eversion_t new_trim_to; - for (uint64_t i = 0; i < num_to_trim; ++i) { - new_trim_to = it->version; - ++it; - if (new_trim_to >= limit) { - new_trim_to = limit; - dout(10) << "calc_trim_to trimming to limit: " << limit << dendl; - break; + auto it = pg_log.get_log().log.begin(); // oldest log entry + auto rit = pg_log.get_log().log.rbegin(); + eversion_t by_n_to_keep; // start from tail + eversion_t by_n_to_trim = eversion_t::max(); // start from head + for (size_t i = 0; it != pg_log.get_log().log.end(); ++it, ++rit) { + i++; + if (i > target && by_n_to_keep == eversion_t()) { + by_n_to_keep = rit->version; + } + if (i >= num_to_trim && by_n_to_trim == eversion_t::max()) { + by_n_to_trim = it->version; + } + if (by_n_to_keep != eversion_t() && + by_n_to_trim != eversion_t::max()) { + break; } } - dout(10) << "calc_trim_to " << pg_trim_to << " -> " << new_trim_to << dendl; - pg_trim_to = new_trim_to; + + if (by_n_to_keep == eversion_t()) { + return; + } + + pg_trim_to = std::min({by_n_to_keep, by_n_to_trim, limit}); + dout(10) << __func__ << " pg_trim_to now " << pg_trim_to << dendl; ceph_assert(pg_trim_to <= pg_log.get_head()); } }