]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/PrimaryLogPG: fix potential pg-log overtrimming 23317/head
authorxie xingguo <xie.xingguo@zte.com.cn>
Mon, 30 Jul 2018 10:56:56 +0000 (18:56 +0800)
committerxie xingguo <xie.xingguo@zte.com.cn>
Thu, 20 Sep 2018 09:17:49 +0000 (17:17 +0800)
In https://github.com/ceph/ceph/pull/21580 I set a trap to catch some wired
and random segmentfaults and in a recent QA run I was able to observe it was
successfully triggered by one of the test case, see:

```
http://qa-proxy.ceph.com/teuthology/xxg-2018-07-30_05:25:06-rados-wip-hb-peers-distro-basic-smithi/2837916/teuthology.log
```

The root cause is that there might be holes on log versions, thus the
approx_size() method should (almost) always overestimate the actual number of log entries.
As a result, we might be at the risk of overtrimming log entries.

https://github.com/ceph/ceph/pull/18338 reveals a probably easier way
to fix the above problem but unfortunately it also can cause big performance regression
and hence comes this pr..

Signed-off-by: xie xingguo <xie.xingguo@zte.com.cn>
src/osd/PrimaryLogPG.cc

index 0577e0bf6324bc5f900197fa50cc8a954885d997..da02a4398bd2c366b3e315b0c805d2e41d929a86 100644 (file)
@@ -1638,19 +1638,30 @@ void PrimaryLogPG::calc_trim_to()
        cct->_conf->osd_pg_log_trim_max >= cct->_conf->osd_pg_log_trim_min) {
       return;
     }
-    list<pg_log_entry_t>::const_iterator it = pg_log.get_log().log.begin();
-    eversion_t new_trim_to;
-    for (uint64_t i = 0; i < num_to_trim; ++i) {
-      new_trim_to = it->version;
-      ++it;
-      if (new_trim_to >= limit) {
-       new_trim_to = limit;
-        dout(10) << "calc_trim_to trimming to limit: " << limit << dendl;
-       break;
+    auto it = pg_log.get_log().log.begin(); // oldest log entry
+    auto rit = pg_log.get_log().log.rbegin();
+    eversion_t by_n_to_keep; // start from tail
+    eversion_t by_n_to_trim = eversion_t::max(); // start from head
+    for (size_t i = 0; it != pg_log.get_log().log.end(); ++it, ++rit) {
+      i++;
+      if (i > target && by_n_to_keep == eversion_t()) {
+        by_n_to_keep = rit->version;
+      }
+      if (i >= num_to_trim && by_n_to_trim == eversion_t::max()) {
+        by_n_to_trim = it->version;
+      }
+      if (by_n_to_keep != eversion_t() &&
+          by_n_to_trim != eversion_t::max()) {
+        break;
       }
     }
-    dout(10) << "calc_trim_to " << pg_trim_to << " -> " << new_trim_to << dendl;
-    pg_trim_to = new_trim_to;
+
+    if (by_n_to_keep == eversion_t()) {
+      return;
+    }
+
+    pg_trim_to = std::min({by_n_to_keep, by_n_to_trim, limit});
+    dout(10) << __func__ << " pg_trim_to now " << pg_trim_to << dendl;
     ceph_assert(pg_trim_to <= pg_log.get_head());
   }
 }