]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: set handle to start of next PG on pgnls end of pg
authorSage Weil <sage@redhat.com>
Mon, 17 Aug 2015 18:41:41 +0000 (14:41 -0400)
committerJohn Spray <john.spray@redhat.com>
Thu, 3 Dec 2015 14:57:38 +0000 (14:57 +0000)
For the namespaced PGNLS op, set the handle to the start
of the next PG when we reach the end of the current PG.

We do this OSD side rather than client side so that
the client doesn't have to wait for the OSD's map
epoch in order to correctly calculate the next hash
in the case of PG splitting.

Only do this when the cluster is in bitwise mode to avoid
totally weird results when the client is in nibblewise
mode--the nibblewise sort order doesn't map contiguous
ranges of the hash to PGs.

Signed-off-by: Sage Weil <sage@redhat.com>
src/osd/ReplicatedPG.cc

index 92201d0d6a97100b2bfda16bfb1086d4c0bb4b32..b07b80c6f78cf0362a8684bd8b0968bcc55dbc22 100644 (file)
@@ -936,7 +936,10 @@ void ReplicatedPG::do_pg_op(OpRequestRef op)
        }
 
        hobject_t next;
-       hobject_t current = response.handle;
+       hobject_t lower_bound = response.handle;
+        dout(10) << " pgnls lower_bound " << lower_bound << dendl;
+
+       hobject_t current = lower_bound;
        osr->flush();
        int r = pgbackend->objects_list_partial(
          current,
@@ -985,6 +988,9 @@ void ReplicatedPG::do_pg_op(OpRequestRef op)
            ++ls_iter;
          }
 
+          dout(10) << " pgnls candidate 0x" << std::hex << candidate.get_hash()
+            << " vs lower bound 0x" << lower_bound.get_hash() << dendl;
+
          if (cmp(candidate, next, get_sort_bitwise()) >= 0) {
            break;
          }
@@ -1036,18 +1042,37 @@ void ReplicatedPG::do_pg_op(OpRequestRef op)
          if (filter && !pgls_filter(filter, candidate, filter_out))
            continue;
 
+          dout(20) << "pgnls item 0x" << std::hex
+            << candidate.get_hash()
+            << ", rev 0x" << hobject_t::_reverse_bits(candidate.get_hash())
+            << std::dec << " "
+            << candidate.oid.name << dendl;
+
          librados::ListObjectImpl item;
          item.nspace = candidate.get_namespace();
          item.oid = candidate.oid.name;
          item.locator = candidate.get_key();
          response.entries.push_back(item);
        }
+
        if (next.is_max() &&
            missing_iter == pg_log.get_missing().missing.end() &&
            ls_iter == sentries.end()) {
          result = 1;
-       }
-       response.handle = next;
+
+         if (get_osdmap()->test_flag(CEPH_OSDMAP_SORTBITWISE)) {
+           // Set response.handle to the start of the next PG
+           // according to the object sort order.  Only do this if
+           // the cluster is in bitwise mode; with legacy nibblewise
+           // sort PGs don't always cover contiguous ranges of the
+           // hash order.
+           response.handle = info.pgid.pgid.get_hobj_end(
+             pool.info.get_pg_num());
+         }
+       } else {
+          response.handle = next;
+        }
+        dout(10) << "pgls handle=" << response.handle << dendl;
        ::encode(response, osd_op.outdata);
        if (filter)
          ::encode(filter_out, osd_op.outdata);