From ea4fe7629db1491f46b7c5774d3dcc6a79da529c Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 13 Jul 2015 12:36:46 -0400 Subject: [PATCH] os/HashIndex: handle legacy nibblewise sort We need to separate the bitwise and nibblewise implementations because they diverge too much. Start by converting the old code to a working nibblewise sort (compensating for the new ghobject_t sort and new args). Signed-off-by: Sage Weil --- src/os/HashIndex.cc | 102 +++++++++++++++++++++++++++----------------- src/os/HashIndex.h | 52 ++++++++++++++++++++-- 2 files changed, 112 insertions(+), 42 deletions(-) diff --git a/src/os/HashIndex.cc b/src/os/HashIndex.cc index b70912f0f2a0d..481c18736a4b3 100644 --- a/src/os/HashIndex.cc +++ b/src/os/HashIndex.cc @@ -322,6 +322,7 @@ int HashIndex::_lookup(const ghobject_t &oid, int HashIndex::_collection_list_partial(const ghobject_t &start, const ghobject_t &end, + bool sort_bitwise, int max_count, vector *ls, ghobject_t *next) { @@ -331,7 +332,7 @@ int HashIndex::_collection_list_partial(const ghobject_t &start, next = &_next; *next = start; dout(20) << __func__ << " start:" << start << " end:" << end << "-" << max_count << " ls.size " << ls->size() << dendl; - return list_by_hash(path, end, max_count, next, ls); + return list_by_hash(path, end, sort_bitwise, max_count, next, ls); } int HashIndex::prep_delete() { @@ -762,81 +763,106 @@ uint32_t HashIndex::hash_prefix_to_hash(string prefix) { return hash; } -int HashIndex::get_path_contents_by_hash(const vector &path, - const ghobject_t *next_object, - set *hash_prefixes, - set > *objects) { - vector subdirs_vec; +int HashIndex::get_path_contents_by_hash_nibblewise( + const vector &path, + const ghobject_t *next_object, + set *hash_prefixes, + set, CmpPairNibblewise > *objects) +{ map rev_objects; int r; - string cur_prefix; - for (vector::const_iterator i = path.begin(); - i != path.end(); - ++i) { - cur_prefix.append(*i); - } r = list_objects(path, 0, 0, &rev_objects); if (r < 0) return r; + for (map::iterator i = rev_objects.begin(); i != rev_objects.end(); ++i) { string hash_prefix = get_path_str(i->second); - if (next_object && i->second < *next_object) + if (next_object && cmp_nibblewise(i->second, *next_object) < 0) continue; hash_prefixes->insert(hash_prefix); objects->insert(pair(hash_prefix, i->second)); } - r = list_subdirs(path, &subdirs_vec); + + vector subdirs; + r = list_subdirs(path, &subdirs); if (r < 0) return r; - set subdirs; - subdirs.insert(subdirs_vec.begin(), subdirs_vec.end()); - for (set::iterator i = subdirs.begin(); + + // sort nibblewise (string sort of (reversed) hex digits) + std::sort(subdirs.begin(), subdirs.end()); + + string cur_prefix; + for (vector::const_iterator i = path.begin(); + i != path.end(); + ++i) { + cur_prefix.append(*i); + } + string next_object_string; + if (next_object) + next_object_string = get_path_str(*next_object); + + for (vector::iterator i = subdirs.begin(); i != subdirs.end(); ++i) { string candidate = cur_prefix + *i; - if (next_object && - (next_object->is_max() || - candidate < get_path_str(*next_object).substr(0, candidate.size()))) - continue; + if (next_object) { + if (next_object->is_max()) + continue; + if (candidate < next_object_string.substr(0, candidate.size())) + continue; + } hash_prefixes->insert(cur_prefix + *i); } return 0; } int HashIndex::list_by_hash(const vector &path, - ghobject_t end, + const ghobject_t &end, + bool sort_bitwise, int max_count, ghobject_t *next, - vector *out) { + vector *out) +{ assert(out); + assert(!sort_bitwise); + return list_by_hash_nibblewise(path, end, max_count, next, out); +} + +int HashIndex::list_by_hash_nibblewise( + const vector &path, + const ghobject_t& end, + int max_count, + ghobject_t *next, + vector *out) +{ vector next_path = path; next_path.push_back(""); set hash_prefixes; - set > objects; - int r = get_path_contents_by_hash(path, - next, - &hash_prefixes, - &objects); + set, CmpPairNibblewise > objects; + int r = get_path_contents_by_hash_nibblewise(path, + next, + &hash_prefixes, + &objects); if (r < 0) return r; - dout(20) << " prefixes " << hash_prefixes << dendl; for (set::iterator i = hash_prefixes.begin(); i != hash_prefixes.end(); ++i) { - set >::iterator j = objects.lower_bound( - make_pair(*i, ghobject_t())); + dout(20) << __func__ << " prefix " << *i << dendl; + set, CmpPairNibblewise >::iterator j = + objects.lower_bound(make_pair(*i, ghobject_t())); if (j == objects.end() || j->first != *i) { *(next_path.rbegin()) = *(i->rbegin()); ghobject_t next_recurse; if (next) next_recurse = *next; - r = list_by_hash(next_path, - end, - max_count, - &next_recurse, - out); + r = list_by_hash_nibblewise(next_path, + end, + max_count, + &next_recurse, + out); if (r < 0) return r; @@ -852,12 +878,12 @@ int HashIndex::list_by_hash(const vector &path, *next = j->second; return 0; } - if (j->second >= end) { + if (cmp_nibblewise(j->second, end) >= 0) { if (next) *next = ghobject_t::get_max(); return 0; } - if (!next || j->second >= *next) { + if (!next || cmp_nibblewise(j->second, *next) >= 0) { out->push_back(j->second); } ++j; diff --git a/src/os/HashIndex.h b/src/os/HashIndex.h index ee3de278006c3..d6f605635a029 100644 --- a/src/os/HashIndex.h +++ b/src/os/HashIndex.h @@ -19,6 +19,7 @@ #include "include/encoding.h" #include "LFNIndex.h" +extern string reverse_hexdigit_bits_string(string l); /** * Implements collection prehashing. @@ -191,6 +192,7 @@ protected: int _collection_list_partial( const ghobject_t &start, const ghobject_t &end, + bool sort_bitwise, int max_count, vector *ls, ghobject_t *next @@ -349,18 +351,60 @@ private: return str; } + struct CmpPairNibblewise { + bool operator()(const pair& l, + const pair& r) + { + if (l.first < r.first) + return true; + if (l.first > r.first) + return false; + if (cmp_nibblewise(l.second, r.second) < 0) + return true; + return false; + } + }; + + struct CmpHexdigitStringBitwise { + bool operator()(const string& l, const string& r) { + return reverse_hexdigit_bits_string(l) < reverse_hexdigit_bits_string(r); + } + }; + /// Get path contents by hash - int get_path_contents_by_hash( - const vector &path, /// [in] Path to list + int get_path_contents_by_hash_bitwise( + const vector &path, /// [in] Path to list const ghobject_t *next_object, /// [in] list > *next_object - set *hash_prefixes, /// [out] prefixes in dir + set *hash_prefixes, /// [out] prefixes in dir set > *objects /// [out] objects ); + int get_path_contents_by_hash_nibblewise( + const vector &path, /// [in] Path to list + const ghobject_t *next_object, /// [in] list > *next_object + set *hash_prefixes, /// [out] prefixes in dir + set, CmpPairNibblewise > *objects /// [out] objects + ); /// List objects in collection in ghobject_t order int list_by_hash( const vector &path, /// [in] Path to list - ghobject_t end, /// [in] List only objects < end + const ghobject_t &end, /// [in] List only objects < end + bool sort_bitwise, /// [in] sort bitwise + int max_count, /// [in] List at most max_count + ghobject_t *next, /// [in,out] List objects >= *next + vector *out /// [out] Listed objects + ); ///< @return Error Code, 0 on success + /// List objects in collection in ghobject_t order + int list_by_hash_bitwise( + const vector &path, /// [in] Path to list + const ghobject_t &end, /// [in] List only objects < end + int max_count, /// [in] List at most max_count + ghobject_t *next, /// [in,out] List objects >= *next + vector *out /// [out] Listed objects + ); ///< @return Error Code, 0 on success + int list_by_hash_nibblewise( + const vector &path, /// [in] Path to list + const ghobject_t &end, /// [in] List only objects < end int max_count, /// [in] List at most max_count ghobject_t *next, /// [in,out] List objects >= *next vector *out /// [out] Listed objects -- 2.39.5