From: Sage Weil Date: Thu, 26 Jan 2017 21:48:51 +0000 (-0500) Subject: osdc/Objecter: refactor list_nobjects to use hobject_t as position X-Git-Tag: v12.0.1~413^2~6 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=6fb487d1a022b65c40bec9342a6d18cf4bbc8d2b;p=ceph.git osdc/Objecter: refactor list_nobjects to use hobject_t as position Stop using current_pg as a position pointer; use the hobject_t cursor explicitly. We keep current_pg *only* for compatibility with !sortbitwise clusters, and we only use it when we get back MAX from a !sortbitwise OSD and need to determine where the start of the next PG is. In !sortbitwise mode we also have the legacy kludges to behave on PG split. Signed-off-by: Sage Weil --- diff --git a/src/osdc/Objecter.cc b/src/osdc/Objecter.cc index 6d954bbfbc680..6641945102630 100644 --- a/src/osdc/Objecter.cc +++ b/src/osdc/Objecter.cc @@ -3362,48 +3362,25 @@ uint32_t Objecter::list_nobjects_seek(NListContext *list_context, uint32_t pos) { shared_lock rl(rwlock); + list_context->pos = hobject_t(object_t(), string(), CEPH_NOSNAP, + pos, list_context->pool_id, string()); + ldout(cct, 10) << __func__ << list_context + << " pos " << pos << " -> " << list_context->pos << dendl; pg_t actual = osdmap->raw_pg_to_pg(pg_t(pos, list_context->pool_id)); - ldout(cct, 10) << "list_objects_seek " << list_context - << " pos " << pos << " -> " << actual << dendl; list_context->current_pg = actual.ps(); - list_context->cookie = collection_list_handle_t(); - list_context->at_end_of_pg = false; list_context->at_end_of_pool = false; - list_context->current_pg_epoch = 0; - return list_context->current_pg; + return pos; } void Objecter::list_nobjects(NListContext *list_context, Context *onfinish) { - ldout(cct, 10) << "list_objects" << dendl; - ldout(cct, 20) << " pool_id " << list_context->pool_id - << " pool_snap_seq " << list_context->pool_snap_seq - << " max_entries " << list_context->max_entries - << " list_context " << list_context - << " onfinish " << onfinish - << " list_context->current_pg " << list_context->current_pg - << " list_context->cookie " << list_context->cookie << dendl; - - if (list_context->at_end_of_pg) { - list_context->at_end_of_pg = false; - ++list_context->current_pg; - list_context->current_pg_epoch = 0; - list_context->cookie = collection_list_handle_t(); - if (list_context->current_pg >= list_context->starting_pg_num) { - list_context->at_end_of_pool = true; - ldout(cct, 20) << " no more pgs; reached end of pool" << dendl; - } else { - ldout(cct, 20) << " move to next pg " << list_context->current_pg - << dendl; - } - } - if (list_context->at_end_of_pool) { - // release the listing context's budget once all - // OPs (in the session) are finished - put_nlist_context_budget(list_context); - onfinish->complete(0); - return; - } + ldout(cct, 10) << __func__ << " pool_id " << list_context->pool_id + << " pool_snap_seq " << list_context->pool_snap_seq + << " max_entries " << list_context->max_entries + << " list_context " << list_context + << " onfinish " << onfinish + << " current_pg " << list_context->current_pg + << " pos " << list_context->pos << dendl; shared_lock rl(rwlock); const pg_pool_t *pool = osdmap->get_pg_pool(list_context->pool_id); @@ -3415,36 +3392,53 @@ void Objecter::list_nobjects(NListContext *list_context, Context *onfinish) } int pg_num = pool->get_pg_num(); bool sort_bitwise = osdmap->test_flag(CEPH_OSDMAP_SORTBITWISE); - rl.unlock(); - if (list_context->starting_pg_num == 0) { // there can't be zero pgs! - list_context->starting_pg_num = pg_num; + if (list_context->pos.is_min()) { + list_context->starting_pg_num = 0; list_context->sort_bitwise = sort_bitwise; - ldout(cct, 20) << pg_num << " placement groups" << dendl; + list_context->starting_pg_num = pg_num; } if (list_context->sort_bitwise != sort_bitwise) { - ldout(cct, 10) << " hobject sort order changed, restarting this pg" - << dendl; - list_context->cookie = collection_list_handle_t(); + list_context->pos = hobject_t( + object_t(), string(), CEPH_NOSNAP, + list_context->current_pg, list_context->pool_id, string()); list_context->sort_bitwise = sort_bitwise; + ldout(cct, 10) << " hobject sort order changed, restarting this pg at " + << list_context->pos << dendl; } if (list_context->starting_pg_num != pg_num) { - // start reading from the beginning; the pgs have changed - ldout(cct, 10) << " pg_num changed; restarting with " << pg_num << dendl; - list_context->current_pg = 0; - list_context->cookie = collection_list_handle_t(); - list_context->current_pg_epoch = 0; + if (!sort_bitwise) { + // start reading from the beginning; the pgs have changed + ldout(cct, 10) << " pg_num changed; restarting with " << pg_num << dendl; + list_context->pos = collection_list_handle_t(); + } list_context->starting_pg_num = pg_num; } - assert(list_context->current_pg < pg_num); + + if (list_context->pos.is_max()) { + ldout(cct, 20) << __func__ << " end of pool, list " + << list_context->list << dendl; + if (list_context->list.empty()) { + list_context->at_end_of_pool = true; + } + // release the listing context's budget once all + // OPs (in the session) are finished + put_nlist_context_budget(list_context); + onfinish->complete(0); + return; + } ObjectOperation op; op.pg_nls(list_context->max_entries, list_context->filter, - list_context->cookie, list_context->current_pg_epoch); + list_context->pos, osdmap->get_epoch()); list_context->bl.clear(); C_NList *onack = new C_NList(list_context, onfinish, this); object_locator_t oloc(list_context->pool_id, list_context->nspace); + // note current_pg in case we don't have (or lose) SORTBITWISE + list_context->current_pg = pool->raw_hash_to_pg(list_context->pos.get_hash()); + rl.unlock(); + pg_read(list_context->current_pg, oloc, op, &list_context->bl, 0, onack, &onack->epoch, &list_context->ctx_budget); @@ -3453,7 +3447,7 @@ void Objecter::list_nobjects(NListContext *list_context, Context *onfinish) void Objecter::_nlist_reply(NListContext *list_context, int r, Context *final_finish, epoch_t reply_epoch) { - ldout(cct, 10) << "_list_reply" << dendl; + ldout(cct, 10) << __func__ << " " << list_context << dendl; bufferlist::iterator iter = list_context->bl.begin(); pg_nls_response_t response; @@ -3462,38 +3456,39 @@ void Objecter::_nlist_reply(NListContext *list_context, int r, if (!iter.end()) { ::decode(extra_info, iter); } - list_context->cookie = response.handle; - if (!list_context->current_pg_epoch) { - // first pgls result, set epoch marker - ldout(cct, 20) << " first pgls piece, reply_epoch is " - << reply_epoch << dendl; - list_context->current_pg_epoch = reply_epoch; + + // if the osd returns 1 (newer code), or handle MAX, it means we + // hit the end of the pg. + if ((response.handle.is_max() || r == 1) && + !list_context->sort_bitwise) { + // legacy OSD and !sortbitwise, figure out the next PG on our own + ++list_context->current_pg; + if (list_context->current_pg == list_context->starting_pg_num) { + // end of pool + list_context->pos = hobject_t::get_max(); + } else { + // next pg + list_context->pos = hobject_t(object_t(), string(), CEPH_NOSNAP, + list_context->current_pg, + list_context->pool_id, string()); + } + } else { + list_context->pos = response.handle; } int response_size = response.entries.size(); ldout(cct, 20) << " response.entries.size " << response_size << ", response.entries " << response.entries - << ", handle " << response.handle << dendl; + << ", handle " << response.handle + << ", tentative new pos " << list_context->pos << dendl; list_context->extra_info.append(extra_info); if (response_size) { list_context->list.merge(response.entries); } - // if the osd returns 1 (newer code), or handle MAX, it means we - // hit the end of the pg. - if (response.handle.is_max() || r == 1) { - ldout(cct, 20) << " at end of pg" << dendl; - list_context->at_end_of_pg = true; - } else { - // there is more for this pg; get it? - if (response_size < list_context->max_entries) { - list_context->max_entries -= response_size; - list_nobjects(list_context, final_finish); - return; - } - } - if (!list_context->list.empty()) { - ldout(cct, 20) << " returning results so far" << dendl; + if (list_context->list.size() >= list_context->max_entries) { + ldout(cct, 20) << " hit max, returning results so far, " + << list_context->list << dendl; // release the listing context's budget once all // OPs (in the session) are finished put_nlist_context_budget(list_context); @@ -3505,14 +3500,15 @@ void Objecter::_nlist_reply(NListContext *list_context, int r, list_nobjects(list_context, final_finish); } -void Objecter::put_nlist_context_budget(NListContext *list_context) { - if (list_context->ctx_budget >= 0) { - ldout(cct, 10) << " release listing context's budget " << - list_context->ctx_budget << dendl; - put_op_budget_bytes(list_context->ctx_budget); - list_context->ctx_budget = -1; - } +void Objecter::put_nlist_context_budget(NListContext *list_context) +{ + if (list_context->ctx_budget >= 0) { + ldout(cct, 10) << " release listing context's budget " << + list_context->ctx_budget << dendl; + put_op_budget_bytes(list_context->ctx_budget); + list_context->ctx_budget = -1; } +} uint32_t Objecter::list_objects_seek(ListContext *list_context, uint32_t pos) diff --git a/src/osdc/Objecter.h b/src/osdc/Objecter.h index 2a37c58f508c3..53f87e482bdf0 100644 --- a/src/osdc/Objecter.h +++ b/src/osdc/Objecter.h @@ -1386,17 +1386,18 @@ public: // Pools and statistics struct NListContext { - int current_pg; - collection_list_handle_t cookie; - epoch_t current_pg_epoch; - int starting_pg_num; - bool at_end_of_pool; - bool at_end_of_pg; - bool sort_bitwise; + collection_list_handle_t pos; - int64_t pool_id; - int pool_snap_seq; - int max_entries; + // these are for !sortbitwise compat only + int current_pg = 0; + int starting_pg_num = 0; + bool sort_bitwise = false; + + bool at_end_of_pool = false; ///< publicly visible end flag + + int64_t pool_id = -1; + int pool_snap_seq = 0; + uint64_t max_entries = 0; string nspace; bufferlist bl; // raw data read to here @@ -1410,30 +1411,14 @@ public: // the budget is not get/released on OP basis, instead the budget // is acquired before sending the first OP and released upon receiving // the last op reply. - int ctx_budget; - - NListContext() : current_pg(0), - current_pg_epoch(0), - starting_pg_num(0), - at_end_of_pool(false), - at_end_of_pg(false), - sort_bitwise(false), - pool_id(0), - pool_snap_seq(0), - max_entries(0), - nspace(), - bl(), - list(), - filter(), - extra_info(), - ctx_budget(-1) {} + int ctx_budget = -1; bool at_end() const { return at_end_of_pool; } uint32_t get_pg_hash_position() const { - return current_pg; + return pos.get_hash(); } };