From: Sage Weil Date: Mon, 17 Aug 2015 18:41:41 +0000 (-0400) Subject: osd: set handle to start of next PG on pgnls end of pg X-Git-Tag: v10.0.3~215^2~10 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=3ee77318813b8d9c8ea840157ee7107ed1afa961;p=ceph.git osd: set handle to start of next PG on pgnls end of pg For the namespaced PGNLS op, set the handle to the start of the next PG when we reach the end of the current PG. We do this OSD side rather than client side so that the client doesn't have to wait for the OSD's map epoch in order to correctly calculate the next hash in the case of PG splitting. Only do this when the cluster is in bitwise mode to avoid totally weird results when the client is in nibblewise mode--the nibblewise sort order doesn't map contiguous ranges of the hash to PGs. Signed-off-by: Sage Weil --- diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 92201d0d6a9..b07b80c6f78 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -936,7 +936,10 @@ void ReplicatedPG::do_pg_op(OpRequestRef op) } hobject_t next; - hobject_t current = response.handle; + hobject_t lower_bound = response.handle; + dout(10) << " pgnls lower_bound " << lower_bound << dendl; + + hobject_t current = lower_bound; osr->flush(); int r = pgbackend->objects_list_partial( current, @@ -985,6 +988,9 @@ void ReplicatedPG::do_pg_op(OpRequestRef op) ++ls_iter; } + dout(10) << " pgnls candidate 0x" << std::hex << candidate.get_hash() + << " vs lower bound 0x" << lower_bound.get_hash() << dendl; + if (cmp(candidate, next, get_sort_bitwise()) >= 0) { break; } @@ -1036,18 +1042,37 @@ void ReplicatedPG::do_pg_op(OpRequestRef op) if (filter && !pgls_filter(filter, candidate, filter_out)) continue; + dout(20) << "pgnls item 0x" << std::hex + << candidate.get_hash() + << ", rev 0x" << hobject_t::_reverse_bits(candidate.get_hash()) + << std::dec << " " + << candidate.oid.name << dendl; + librados::ListObjectImpl item; item.nspace = candidate.get_namespace(); item.oid = candidate.oid.name; item.locator = candidate.get_key(); response.entries.push_back(item); } + if (next.is_max() && missing_iter == pg_log.get_missing().missing.end() && ls_iter == sentries.end()) { result = 1; - } - response.handle = next; + + if (get_osdmap()->test_flag(CEPH_OSDMAP_SORTBITWISE)) { + // Set response.handle to the start of the next PG + // according to the object sort order. Only do this if + // the cluster is in bitwise mode; with legacy nibblewise + // sort PGs don't always cover contiguous ranges of the + // hash order. + response.handle = info.pgid.pgid.get_hobj_end( + pool.info.get_pg_num()); + } + } else { + response.handle = next; + } + dout(10) << "pgls handle=" << response.handle << dendl; ::encode(response, osd_op.outdata); if (filter) ::encode(filter_out, osd_op.outdata);