From: Matt Benjamin Date: Thu, 21 May 2020 21:58:19 +0000 (-0400) Subject: rgw:gc: use XXHash for gc queue selection X-Git-Tag: v16.1.0~1777^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=0864588f8a2a48dde43abf045688091ed3f7d22b;p=ceph.git rgw:gc: use XXHash for gc queue selection We've observed significant imbalance in gc queue loading, most likely due to poor ceph_str_hash_linux dispersion for common key forms. XXHash64 should be an effective and fast replacement for all current RGW users of ceph_str_hash_linux (which also include bucket-index sharding and bucket-lifecycle shard selection). Unlike the other users, gc queue selection appears safe to "just switch" as only defer-gc operations (which are non-critical and of brief temporal locality) make any sort of rendezvous on specific enqueued tags. Signed-off-by: Matt Benjamin --- diff --git a/src/rgw/rgw_gc.cc b/src/rgw/rgw_gc.cc index 98be34bf4c28..b7b89e7b8697 100644 --- a/src/rgw/rgw_gc.cc +++ b/src/rgw/rgw_gc.cc @@ -17,6 +17,7 @@ #include // XXX #include +#include "xxhash.h" #define dout_context g_ceph_context #define dout_subsys ceph_subsys_rgw @@ -60,7 +61,7 @@ void RGWGC::finalize() int RGWGC::tag_index(const string& tag) { - return rgw_shard_id(tag, max_objs); + return rgw_shards_mod(XXH64(tag.c_str(), tag.size(), seed), max_objs); } int RGWGC::send_chain(cls_rgw_obj_chain& chain, const string& tag) diff --git a/src/rgw/rgw_gc.h b/src/rgw/rgw_gc.h index 5dda753fd548..9eafb5fbd875 100644 --- a/src/rgw/rgw_gc.h +++ b/src/rgw/rgw_gc.h @@ -25,6 +25,8 @@ class RGWGC : public DoutPrefixProvider { string *obj_names; std::atomic down_flag = { false }; + static constexpr uint64_t seed = 8675309; + int tag_index(const string& tag); class GCWorker : public Thread {