From d60dee0f0a0e83d98ca61bcf1f184fa446a70a54 Mon Sep 17 00:00:00 2001 From: Matt Benjamin Date: Tue, 18 Jun 2019 16:12:55 -0400 Subject: [PATCH] rgw: s/std::map/boost::container::flat_map/ cls_bucket_list_ordered (RGWRados and CLS). Probably faster, allocating less. Definitely not slower. Examples from single-OSD vstart.sh cluster, Ceph built at -O2 BEFORE [mbenjamin@lemon python]$ time s3cmd -c s3cfg_userx ls s3://DOCREQUEST_750/CSV/SUB1/ > /dev/null real 4m48.991s user 3m45.260s sys 0m7.174s (2nd run) radosgw Samples: 81K of event 'cycles:ppp', 4000 Hz, Event count (approx.): 3189324729 Overhead Shared Object Symbol 7.06% libtcmalloc.so.4.5.1 [.] tcmalloc::CentralFreeList::FetchFromOneSpans 6.85% libstdc++.so.6.0.25 [.] std::__ostream_insert > 6.15% librados.so.2.0.0 [.] ceph::buffer::v14_2_0::list::iterator_impl::copy 4.12% librados.so.2.0.0 [.] ceph::buffer::v14_2_0::ptr::copy_out 4.11% libstdc++.so.6.0.25 [.] std::basic_streambuf >::xsputn 3.49% libc-2.27.so [.] __memmove_avx_unaligned_erms 3.33% libtcmalloc.so.4.5.1 [.] tc_deletearray_aligned_nothrow 3.04% radosgw [.] std::_Rb_tree, std::allocator >, std::pa 2.46% radosgw [.] std::__cxx11::basic_string, std::allocator >::compare 2.45% libtcmalloc.so.4.5.1 [.] operator new[] 2.39% libstdc++.so.6.0.25 [.] std::ostream::sentry::sentry 2.36% radosgw [.] std::_Rb_tree, std::allocator >, std::pa 2.07% librados.so.2.0.0 [.] ceph::buffer::v14_2_0::list::iterator_impl::advance 1.94% libc-2.27.so [.] __memcmp_avx2_movbe 1.93% radosgw [.] std::_Rb_tree, std::allocator >, std::pa 1.85% librados.so.2.0.0 [.] ceph::buffer::v14_2_0::list::iterator_impl::copy 1.79% radosgw [.] rgw_bucket_dir::decode 1.76% libceph-common.so.0 [.] operator<< 1.42% radosgw [.] ceph::decode 1.35% libstdc++.so.6.0.25 [.] std::_Rb_tree_insert_and_rebalance 1.33% libtcmalloc.so.4.5.1 [.] tcmalloc::CentralFreeList::ReleaseToSpans 1.31% librados.so.2.0.0 [.] std::__cxx11::basic_string, std::allocator >::_M_mutate 1.24% [kernel] [k] copy_user_enhanced_fast_string For a higher level overview, try: perf top --sort comm,dso osd Samples: 23K of event 'cycles:ppp', 4000 Hz, Event count (approx.): 5059851086 Overhead Shared Object Symbol 8.57% libc-2.27.so [.] vfprintf 4.90% ceph-osd [.] ceph::logging::Log::_flush 4.66% libc-2.27.so [.] _IO_default_xsputn 3.49% libtcmalloc.so.4.5.1 [.] operator new[] 2.93% ceph-osd [.] StackStringBuf<4096ul>::xsputn 2.70% libstdc++.so.6.0.25 [.] std::__ostream_insert > 2.11% libpthread-2.27.so [.] __pthread_mutex_unlock_usercnt 1.91% libc-2.27.so [.] __memmove_avx_unaligned_erms 1.75% libstdc++.so.6.0.25 [.] std::num_put > >::_M_insert_int 1.10% ceph-osd [.] ceph::buffer::v14_2_0::ptr::append 1.04% libpthread-2.27.so [.] __pthread_mutex_lock 1.01% libc-2.27.so [.] __tz_convert 0.94% [kernel] [k] entry_SYSCALL_64 0.94% ceph-osd [.] ceph::buffer::v14_2_0::list::iterator_impl::copy 0.93% ceph-osd [.] ceph::logging::Log::submit_entry For a higher level overview, try: perf top --sort comm,dso AFTER [mbenjamin@lemon python]$ time s3cmd -c s3cfg_userx ls s3://DOCREQUEST_750/CSV/SUB1/ > /dev/null real 4m51.488s user 3m36.785s sys 0m5.689s (1st run) radosgw Samples: 52K of event 'cycles:ppp', 4000 Hz, Event count (approx.): 4426952205 Overhead Shared Object Symbol 6.11% librados.so.2.0.0 [.] ceph::buffer::v14_2_0::list::iterator_impl::copy 5.83% libstdc++.so.6.0.25 [.] std::__ostream_insert > 3.89% radosgw [.] rgw_bucket_dir::decode 3.73% radosgw [.] rgw_bucket_dir_entry::rgw_bucket_dir_entry 3.68% librados.so.2.0.0 [.] ceph::buffer::v14_2_0::ptr::copy_out 3.37% libstdc++.so.6.0.25 [.] std::basic_streambuf >::xsputn 3.14% libtcmalloc.so.4.5.1 [.] tcmalloc::CentralFreeList::FetchFromOneSpans 2.55% libc-2.27.so [.] __memmove_avx_unaligned_erms 2.22% libtcmalloc.so.4.5.1 [.] tc_deletearray_aligned_nothrow 2.02% libstdc++.so.6.0.25 [.] std::ostream::sentry::sentry 1.79% librados.so.2.0.0 [.] ceph::buffer::v14_2_0::list::iterator_impl::advance 1.77% [kernel] [k] n_tty_write 1.74% libc-2.27.so [.] vfprintf 1.71% libtcmalloc.so.4.5.1 [.] operator new[] 1.65% librados.so.2.0.0 [.] ceph::buffer::v14_2_0::list::iterator_impl::copy 1.58% radosgw [.] ceph::decode 1.38% librados.so.2.0.0 [.] std::__cxx11::basic_string, std::allocator >::_M_mutate 1.22% [kernel] [k] stackleak_erase 1.12% libceph-common.so.0 [.] operator<< 1.09% libc-2.27.so [.] _IO_default_xsputn 1.06% libc-2.27.so [.] __memcmp_avx2_movbe 1.04% [kernel] [k] copy_user_enhanced_fast_string 0.93% librados.so.2.0.0 [.] std::__cxx11::basic_string, std::allocator >::_M_append For a higher level overview, try: perf top --sort comm,dso osd Samples: 134K of event 'cycles:ppp', 4000 Hz, Event count (approx.): 26819176020 Overhead Shared Object Symbol 8.82% libc-2.27.so [.] vfprintf 6.88% ceph-osd [.] ceph::logging::Log::_flush 4.80% libc-2.27.so [.] _IO_default_xsputn 4.15% libpthread-2.27.so [.] __pthread_mutex_unlock_usercnt 2.69% ceph-osd [.] StackStringBuf<4096ul>::xsputn 2.54% libstdc++.so.6.0.25 [.] std::__ostream_insert > 2.40% libtcmalloc.so.4.5.1 [.] operator new[] 2.39% libc-2.27.so [.] __memmove_avx_unaligned_erms 2.13% libcls_rgw.so.1.0.0 [.] boost::container::dtl::pair, std::allocator > >::_M_insert_int --- src/cls/rgw/cls_rgw.cc | 2 +- src/cls/rgw/cls_rgw_types.cc | 4 ++-- src/cls/rgw/cls_rgw_types.h | 3 ++- src/rgw/rgw_admin.cc | 5 ++--- src/rgw/rgw_bucket.cc | 8 ++++---- src/rgw/rgw_rados.cc | 10 ++++++---- src/rgw/rgw_rados.h | 7 ++++++- 7 files changed, 23 insertions(+), 16 deletions(-) diff --git a/src/cls/rgw/cls_rgw.cc b/src/cls/rgw/cls_rgw.cc index 8babb8a7fae..6be74b681f4 100644 --- a/src/cls/rgw/cls_rgw.cc +++ b/src/cls/rgw/cls_rgw.cc @@ -433,7 +433,7 @@ int rgw_bucket_list(cls_method_context_t hctx, bufferlist *in, bufferlist *out) if (rc < 0) return rc; - std::map& m = new_dir.m; + auto& m = new_dir.m; done = keys.empty(); diff --git a/src/cls/rgw/cls_rgw_types.cc b/src/cls/rgw/cls_rgw_types.cc index 892ef5617e0..969153b0aa3 100644 --- a/src/cls/rgw/cls_rgw_types.cc +++ b/src/cls/rgw/cls_rgw_types.cc @@ -577,9 +577,9 @@ void rgw_bucket_dir::dump(Formatter *f) const f->open_object_section("header"); header.dump(f); f->close_section(); - map::const_iterator iter = m.begin(); + auto iter = m.cbegin(); f->open_array_section("map"); - for (; iter != m.end(); ++iter) { + for (; iter != m.cend(); ++iter) { f->dump_string("key", iter->first); f->open_object_section("dir_entry"); iter->second.dump(f); diff --git a/src/cls/rgw/cls_rgw_types.h b/src/cls/rgw/cls_rgw_types.h index d069e8f516d..7fb845b7662 100644 --- a/src/cls/rgw/cls_rgw_types.h +++ b/src/cls/rgw/cls_rgw_types.h @@ -4,6 +4,7 @@ #ifndef CEPH_CLS_RGW_TYPES_H #define CEPH_CLS_RGW_TYPES_H +#include #include "common/ceph_time.h" #include "common/Formatter.h" @@ -770,7 +771,7 @@ WRITE_CLASS_ENCODER(rgw_bucket_dir_header) struct rgw_bucket_dir { rgw_bucket_dir_header header; - std::map m; + boost::container::flat_map m; void encode(bufferlist &bl) const { ENCODE_START(2, 2, bl); diff --git a/src/rgw/rgw_admin.cc b/src/rgw/rgw_admin.cc index d458c3799f1..e652ec5618c 100644 --- a/src/rgw/rgw_admin.cc +++ b/src/rgw/rgw_admin.cc @@ -6182,7 +6182,7 @@ next: formatter->dump_string("bucket", bucket_name); formatter->open_array_section("objects"); while (is_truncated) { - map result; + RGWRados::ent_map_t result; int r = store->cls_bucket_list_ordered(bucket_info, RGW_NO_SHARD, marker, prefix, 1000, true, @@ -6197,8 +6197,7 @@ next: if (r == -ENOENT) break; - map::iterator iter; - for (iter = result.begin(); iter != result.end(); ++iter) { + for (auto iter = result.begin(); iter != result.end(); ++iter) { rgw_obj_key key = iter->second.key; rgw_bucket_dir_entry& entry = iter->second; diff --git a/src/rgw/rgw_bucket.cc b/src/rgw/rgw_bucket.cc index ff93fd3218e..dff579b9ecb 100644 --- a/src/rgw/rgw_bucket.cc +++ b/src/rgw/rgw_bucket.cc @@ -1000,10 +1000,9 @@ int RGWBucket::remove_object(RGWBucketAdminOpState& op_state, std::string *err_m return 0; } -static void dump_bucket_index(map result, Formatter *f) +static void dump_bucket_index(const RGWRados::ent_map_t& result, Formatter *f) { - map::iterator iter; - for (iter = result.begin(); iter != result.end(); ++iter) { + for (auto iter = result.begin(); iter != result.end(); ++iter) { f->dump_string("object", iter->first); } } @@ -1167,7 +1166,8 @@ int RGWBucket::check_object_index(RGWBucketAdminOpState& op_state, Formatter *formatter = flusher.get_formatter(); formatter->open_object_section("objects"); while (is_truncated) { - map result; + RGWRados::ent_map_t result; + result.reserve(1000); int r = store->cls_bucket_list_ordered(bucket_info, RGW_NO_SHARD, marker, prefix, 1000, true, diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc index 6b0d15b3ba0..726564f1313 100644 --- a/src/rgw/rgw_rados.cc +++ b/src/rgw/rgw_rados.cc @@ -2459,7 +2459,9 @@ int RGWRados::Bucket::List::list_objects_ordered(int64_t max_p, << "[" << cur_marker.instance << "]" << dendl; } - std::map ent_map; + + ent_map_t ent_map; + ent_map.reserve(read_ahead); int r = store->cls_bucket_list_ordered(target->get_bucket_info(), shard_id, cur_marker, @@ -9123,7 +9125,7 @@ int RGWRados::cls_bucket_list_ordered(RGWBucketInfo& bucket_info, const string& prefix, uint32_t num_entries, bool list_versions, - map& m, + ent_map_t& m, bool *is_truncated, rgw_obj_index_key *last_entry, optional_yield y, @@ -9151,8 +9153,8 @@ int RGWRados::cls_bucket_list_ordered(RGWBucketInfo& bucket_info, return r; // Create a list of iterators that are used to iterate each shard - vector::iterator> vcurrents; - vector::iterator> vends; + vector vcurrents; + vector vends; vector vnames; vcurrents.reserve(list_results.size()); vends.reserve(list_results.size()); diff --git a/src/rgw/rgw_rados.h b/src/rgw/rgw_rados.h index d62ac9cecbf..e7b9a134fa8 100644 --- a/src/rgw/rgw_rados.h +++ b/src/rgw/rgw_rados.h @@ -5,6 +5,7 @@ #define CEPH_RGWRADOS_H #include +#include #include "include/rados/librados.hpp" #include "include/Context.h" @@ -2222,11 +2223,15 @@ public: ceph::real_time& removed_mtime, list *remove_objs, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr); int cls_obj_complete_cancel(BucketShard& bs, string& tag, rgw_obj& obj, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr); int cls_obj_set_bucket_tag_timeout(RGWBucketInfo& bucket_info, uint64_t timeout); + + using ent_map_t = + boost::container::flat_map; + int cls_bucket_list_ordered(RGWBucketInfo& bucket_info, int shard_id, const rgw_obj_index_key& start, const string& prefix, uint32_t num_entries, bool list_versions, - map& m, + ent_map_t& m, bool *is_truncated, rgw_obj_index_key *last_entry, optional_yield y, -- 2.39.5