--- /dev/null
+#!/usr/bin/env bash
+
+set -ex
+
+function run_perf_c2c() {
+ # First get some background system info
+ uname -a > uname.out
+ lscpu > lscpu.out
+ cat /proc/cmdline > cmdline.out
+ timeout -s INT 10 vmstat -w 1 > vmstat.out || true
+ sudo dmesg >& dmesg.out
+ cat /proc/cpuinfo > cpuinfo.out
+ ps axo psr,time,stat,ppid,pid,pcpu,comm > ps.1.out
+ ps -eafT > ps.2.out
+ sudo sysctl -a > sysctl.out
+
+ nodecnt=`lscpu|grep "NUMA node(" |awk '{print $3}'`
+ for ((i=0; i<$nodecnt; i++))
+ do
+ sudo cat /sys/devices/system/node/node${i}/meminfo > meminfo.$i.out
+ done
+ sudo more `sudo find /proc -name status` > proc_parent_child_status.out
+ sudo more /proc/*/numa_maps > numa_maps.out
+
+ #
+ # Get separate kernel and user perf-c2c stats
+ #
+ sudo perf c2c record -a --ldlat=70 --all-user -o perf_c2c_a_all_user.data sleep 5
+ sudo perf c2c report --stdio -i perf_c2c_a_all_user.data > perf_c2c_a_all_user.out 2>&1
+ sudo perf c2c report --full-symbols --stdio -i perf_c2c_a_all_user.data > perf_c2c_full-sym_a_all_user.out 2>&1
+
+ sudo perf c2c record --call-graph dwarf -a --ldlat=70 --all-user -o perf_c2c_g_a_all_user.data sleep 5
+ sudo perf c2c report -g --stdio -i perf_c2c_g_a_all_user.data > perf_c2c_g_a_all_user.out 2>&1
+
+ sudo perf c2c record -a --ldlat=70 --all-kernel -o perf_c2c_a_all_kernel.data sleep 4
+ sudo perf c2c report --stdio -i perf_c2c_a_all_kernel.data > perf_c2c_a_all_kernel.out 2>&1
+
+ sudo perf c2c record --call-graph dwarf --ldlat=70 -a --all-kernel -o perf_c2c_g_a_all_kernel.data sleep 4
+
+ sudo perf c2c report -g --stdio -i perf_c2c_g_a_all_kernel.data > perf_c2c_g_a_all_kernel.out 2>&1
+
+ #
+ # Get combined kernel and user perf-c2c stats
+ #
+ sudo perf c2c record -a --ldlat=70 -o perf_c2c_a_both.data sleep 4
+ sudo perf c2c report --stdio -i perf_c2c_a_both.data > perf_c2c_a_both.out 2>&1
+
+ sudo perf c2c record --call-graph dwarf --ldlat=70 -a --all-kernel -o perf_c2c_g_a_both.data sleep 4
+ sudo perf c2c report -g --stdio -i perf_c2c_g_a_both.data > perf_c2c_g_a_both.out 2>&1
+
+ #
+ # Get all-user physical addr stats, in case multiple threads or processes are
+ # accessing shared memory with different vaddrs.
+ #
+ sudo perf c2c record --phys-data -a --ldlat=70 --all-user -o perf_c2c_a_all_user_phys_data.data sleep 5
+ sudo perf c2c report --stdio -i perf_c2c_a_all_user_phys_data.data > perf_c2c_a_all_user_phys_data.out 2>&1
+}
+
+function run() {
+ local dir=$1
+ shift
+ (
+ rm -fr $dir
+ mkdir $dir
+ cd $dir
+ ceph_test_c2c --threads $(($(nproc) * 2)) "$@" &
+ sleep 30 # let it warm up
+ run_perf_c2c
+ kill $! || { echo "ceph_test_c2c WAS NOT RUNNING" ; exit 1 ; }
+ ) || exit 1
+}
+
+function bench() {
+ optimized=$(timeout 30 ceph_test_c2c --threads $(($(nproc) * 2)) --sharding 2> /dev/null || true)
+ not_optimized=$(timeout 30 ceph_test_c2c --threads $(($(nproc) * 2)) 2> /dev/null || true)
+ if ! (( $optimized > ( $not_optimized * 2 ) )) ; then
+ echo "the optimization is expected to be at least x2 faster"
+ exit 1
+ fi
+}
+
+run with-sharding --sharding
+run without-sharding
+bench
--- /dev/null
+arch: x86_64
+roles:
+- - mon.a
+ - mgr.x
+ - osd.0
+ - client.0
+tasks:
+- install:
+ extra_system_packages:
+ rpm:
+ - perf
+ deb:
+ - linux-tools-generic
+- workunit:
+ basedir: qa/standalone
+ clients:
+ all:
+ - c2c
num_shards = 1 << num_shard_bits
};
-// align shard to a cacheline
+//
+// Align shard to a cacheline.
+//
+// It would be possible to retrieve the value at runtime (for instance
+// with getconf LEVEL1_DCACHE_LINESIZE or grep -m1 cache_alignment
+// /proc/cpuinfo). It is easier to hard code the largest cache
+// linesize for all known processors (128 bytes). If the actual cache
+// linesize is smaller on a given processor, it will just waste a few
+// bytes.
+//
struct shard_t {
ceph::atomic<size_t> bytes = {0};
ceph::atomic<size_t> items = {0};
ceph_test_stress_watch
DESTINATION ${CMAKE_INSTALL_BINDIR})
+add_executable(ceph_test_c2c
+ test_c2c.cc
+ )
+target_link_libraries(ceph_test_c2c
+ global
+ ceph-common
+ pthread
+ ${EXTRALIBS}
+ ${CMAKE_DL_LIBS}
+ )
+install(TARGETS
+ ceph_test_c2c
+ DESTINATION ${CMAKE_INSTALL_BINDIR})
+
if(WITH_FUSE)
add_executable(ceph_test_cfuse_cache_invalidate
test_cfuse_cache_invalidate.cc
--- /dev/null
+#include "common/ceph_argparse.h"
+#include "common/debug.h"
+#include "common/config.h"
+#include "global/global_init.h"
+#include "global/signal_handler.h"
+
+#include "include/mempool.h"
+
+#include <iostream>
+#include <string>
+
+using std::cerr;
+using std::string;
+
+static void usage(void)
+{
+ cerr << "--threads number of threads (default 1)" << std::endl;
+ cerr << "--sharding activate sharding optimization" << std::endl;
+}
+
+
+mempool::shard_t shards[mempool::num_shards] = {0};
+
+void sigterm_handler(int signum)
+{
+ size_t total = 0;
+ for (auto& shard : shards) {
+ total += shard.bytes;
+ }
+ cout << total << std::endl;
+ exit(0);
+}
+
+int main(int argc, const char **argv)
+{
+ int ret = 0;
+ vector<const char*> args;
+ argv_to_vec(argc, argv, args);
+ auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT,
+ CODE_ENVIRONMENT_UTILITY,
+ CINIT_FLAG_NO_DEFAULT_CONFIG_FILE);
+ common_init_finish(g_ceph_context);
+
+ int threads = 1;
+ bool sharding = false;
+ for (std::vector<const char*>::iterator i = args.begin(); i != args.end(); ) {
+ if (ceph_argparse_double_dash(args, i)) {
+ break;
+ }
+ else if (ceph_argparse_witharg(args, i, &threads, cerr, "--threads", "-t", (char*)NULL)) {
+ }
+ else if (ceph_argparse_flag(args, i, "--sharding", "-s", (char*)NULL)) {
+ sharding = true;
+ }
+ else {
+ cerr << "unknown command line option: " << *i << std::endl;
+ cerr << std::endl;
+ usage();
+ return 2;
+ }
+ }
+
+ init_async_signal_handler();
+ register_async_signal_handler(SIGTERM, sigterm_handler);
+
+
+ std::vector<std::thread> workers;
+ for (int i = 0; i < threads; i++) {
+ workers.push_back(
+ std::thread([&](){
+ while(1) {
+ size_t i;
+ if (sharding) {
+ i = mempool::pool_t::pick_a_shard_int();
+ } else {
+ i = 0;
+ }
+ shards[i].bytes++;
+ }
+ }));
+ }
+
+ for (auto& t:workers) {
+ t.join();
+ }
+ workers.clear();
+
+ return ret;
+}