PG: block writes and scan log for scrub using inclusive upper bound

author Samuel Just <sjust@redhat.com>

Mon, 3 Oct 2016 17:34:51 +0000 (10:34 -0700)

committer David Zafman <dzafman@redhat.com>

Tue, 4 Oct 2016 04:43:40 +0000 (21:43 -0700)
author Samuel Just <sjust@redhat.com>
Mon, 3 Oct 2016 17:34:51 +0000 (10:34 -0700)
committer David Zafman <dzafman@redhat.com>
Tue, 4 Oct 2016 04:43:40 +0000 (21:43 -0700)
diff --git a/src/osd/PG.cc b/src/osd/PG.cc

index c5b73247251d68e09280f1eb7c24d79820063074..d72b91fc3cc952f9705824781b2048ba120deeb4 100644 (file)
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -3482,10 +3482,16 @@ void PG::sub_op_scrub_map(OpRequestRef op)
    dout(10) << " got " << m->from << " scrub map" << dendl;
    bufferlist::iterator p = m->get_data().begin();
  
-  scrubber.received_maps[m->from].decode(p, info.pgid.pool());
+  ScrubMap &map = scrubber.received_maps[m->from];
+  map.reset_bitwise(get_sort_bitwise());
+  map.decode(p, info.pgid.pool());
    dout(10) << "map version is "
-            << scrubber.received_maps[m->from].valid_through
-            << dendl;
+          << map.valid_through
+          << dendl;
+
+  // Account for http://tracker.ceph.com/issues/17491
+  if (!map.objects.empty() && map.objects.rbegin()->first == scrubber.end)
+    map.objects.erase(scrubber.end);
  
    --scrubber.waiting_on;
    scrubber.waiting_on_whom.erase(m->from);
@@ -4172,7 +4178,7 @@ void PG::chunky_scrub(ThreadPool::TPHandle &handle)
           if (!_range_available_for_scrub(scrubber.start, candidate_end)) {
             // we'll be requeued by whatever made us unavailable for scrub
             dout(10) << __func__ << ": scrub blocked somewhere in range "
-                    << "[" << scrubber.start << ", " << candidate_end << ")"
+                    << "[" << scrubber.start << ", " << candidate_end << "]"
                      << dendl;
             done = true;
             break;
@@ -4186,7 +4192,8 @@ void PG::chunky_scrub(ThreadPool::TPHandle &handle)
               p != pg_log.get_log().log.rend();
               ++p) {
            if (cmp(p->soid, scrubber.start, get_sort_bitwise()) >= 0 &&
-             cmp(p->soid, scrubber.end, get_sort_bitwise()) < 0) {
+             cmp(p->soid, scrubber.end, get_sort_bitwise()) <= 0) {
+           // inclusive upper bound, @see write_blocked_by_scrub
              scrubber.subset_last_update = p->version;
              break;
            }
diff --git a/src/osd/PG.h b/src/osd/PG.h

index cda845f68f1750e7821804962894b5f11034c9cc..7cb85b9e9c91516c2e7226216164d3e149fa16a5 100644 (file)
--- a/src/osd/PG.h
+++ b/src/osd/PG.h
@@ -1230,11 +1230,28 @@ public:
  
      bool is_chunky_scrub_active() const { return state != INACTIVE; }
  
-    // classic (non chunk) scrubs block all writes
-    // chunky scrubs only block writes to a range
+    /* We use an inclusive upper bound here because the replicas scan .end
+     * as well in hammer (see http://tracker.ceph.com/issues/17491).
+     *
+     * The boundary can only be
+     * 1) Not an object (object boundary) or
+     * 2) A clone
+     * In case 1), it doesn't matter.  In case 2), we might fail to
+     * wait for an un-applied snap trim to complete, or fail to block an
+     * eviction on a tail object.  In such a case the replica might
+     * erroneously detect a snap_mapper/attr mismatch and "fix" the
+     * snap_mapper to the old value.
+     *
+     * @see _range_available_for_scrub
+     * @see chunk_scrub (the part where it determines the last relelvant log
+     *      entry)
+     *
+     * TODO: switch this logic back to an exclusive upper bound once the
+     * replicas don't scan the upper boundary
+     */
      bool write_blocked_by_scrub(const hobject_t &soid, bool sort_bitwise) {
        if (cmp(soid, start, sort_bitwise) >= 0 &&
-         cmp(soid, end, sort_bitwise) < 0)
+         cmp(soid, end, sort_bitwise) <= 0)
         return true;
  
        return false;
diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc

index 6dfa97b7cb0bc346ab66c827f7ff3e002a243a22..f04ef6028a93ee9896170d401f5233ba4e489ea6 100644 (file)
--- a/src/osd/ReplicatedPG.cc
+++ b/src/osd/ReplicatedPG.cc
@@ -12487,7 +12487,8 @@ bool ReplicatedPG::_range_available_for_scrub(
    next.second = object_contexts.lookup(begin);
    next.first = begin;
    bool more = true;
-  while (more && cmp(next.first, end, get_sort_bitwise()) < 0) {
+  // inclusive upper bound, @see write_blocked_by_scrub
+  while (more && cmp(next.first, end, get_sort_bitwise()) <= 0) {
      if (next.second && next.second->is_blocked()) {
        next.second->requeue_scrub_on_unblock = true;
        dout(10) << __func__ << ": scrub delayed, "
diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc

index 2334b70ed33b9bfd0c98668381d7d6784adb2d3c..b9222fed117ed2ea24f3c3351cfd9da5bc916ce6 100644 (file)
--- a/src/osd/osd_types.cc
+++ b/src/osd/osd_types.cc
@@ -5292,7 +5292,7 @@ void ScrubMap::decode(bufferlist::iterator& bl, int64_t pool)
  
    // handle hobject_t upgrade
    if (struct_v < 3) {
-    map<hobject_t, object, hobject_t::ComparatorWithDefault> tmp;
+    map<hobject_t, object, hobject_t::ComparatorWithDefault> tmp(objects.key_comp());
      tmp.swap(objects);
      for (map<hobject_t, object, hobject_t::ComparatorWithDefault>::iterator i = tmp.begin();
          i != tmp.end();
author	Samuel Just <sjust@redhat.com>
	Mon, 3 Oct 2016 17:34:51 +0000 (10:34 -0700)
committer	David Zafman <dzafman@redhat.com>
	Tue, 4 Oct 2016 04:43:40 +0000 (21:43 -0700)
src/osd/PG.cc		patch \| blob \| history
src/osd/PG.h		patch \| blob \| history
src/osd/ReplicatedPG.cc		patch \| blob \| history
src/osd/osd_types.cc		patch \| blob \| history