From: Samuel Just Date: Thu, 24 Apr 2025 22:13:04 +0000 (-0700) Subject: vstart.sh: simplify crimson core assignment, use assign_crimson_cores.py X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=3a38b2f86c24fd4fec8048e52c23ca9b3e280934;p=ceph.git vstart.sh: simplify crimson core assignment, use assign_crimson_cores.py This commit simplifies the internal flow in a few ways: - core assignment is entirely handled by prep_balance_cpu and do_balance_cpu. The latter simply does as the cpu_table instructs. - assign_crimson_cores calls lscpu and taskset internally, no need for temp files. It also changes some defaults: - if crimson-balance-cpu is unset or set to none, crimson-osd will not pin cpus at all rather than using the simple sequential allocation scheme, which could be much less efficient on platforms where cpuids 0,1,2,3,... are on socket 0,1,2,3,... "osd" and "socket" options provide numa aware assignments when requested. New features: - Alienstore cores are now assigned with assign_crimson_cores using the same balance strategy using --crimson-alien-num-cores. - --crimson-reactor-physical-only and --crimson-alienstore-physical-only will cause reactor or alienstore cpus respectively to be allocated with one cpu per physical core rather than including smt siblings. Fixes: https://tracker.ceph.com/issues/71096 Signed-off-by: Samuel Just (cherry picked from commit 1795f46ebbc2f061e26f0298815d891fa12c1b96) --- diff --git a/src/vstart.sh b/src/vstart.sh index adeeab9bb9a7a..ea3577bf4af86 100755 --- a/src/vstart.sh +++ b/src/vstart.sh @@ -275,7 +275,9 @@ options: --seastore-secondary-devs-type: device type of all secondary blockdevs. HDD, SSD(default), ZNS or RANDOM_BLOCK_SSD --crimson-smp: number of cores to use for crimson --crimson-alien-num-threads: number of alien-tp threads - --crimson-alien-num-cores: number of cores to use for alien-tp + --crimson-reactor-physical-only: use only one cpu per physical core for seastar reactors + --crimson-alien-num-cores: number of cpus to use for alien threads + --crimson-alienstore-physical-only: use only one cpu per physical core for alienstore --crimson-balance-cpu: distribute the Seastar reactors uniformly across OSDs (osd) or NUMA (socket) --osds-per-host: populate crush_location as each host holds the specified number of osds if set --require-osd-and-client-version: if supplied, do set-require-min-compat-client and require-osd-release to specified value @@ -350,40 +352,38 @@ parse_secondary_devs() { # Auxiliar function to prepare the CPU cores to pin Seastar reactors prep_balance_cpu() { - local crimson_smp=$1 - local balance_strategy=$2 - local in_file_name="/tmp/numa_args_${balance_strategy}.out" - local out_file_name="/tmp/numa_nodes.json" - local log_file_name="/tmp/numa_bal_${balance_strategy}.log" - local cmd - - # Check the table is empty - if [ "${#cpu_table[@]}" -eq 0 ]; then - # Ensure the file with the CPU mappings exist - if [ ! -f ${in_file_name} ]; then - debug echo "lscpu --json > ${out_file_name}" - lscpu --json > ${out_file_name} - MY_CPUS=$(taskset -acp $$ | awk -F : '{print $2}') - cmd="python3 ${CEPH_DIR}/../src/tools/contrib/balance_cpu.py -o ${CEPH_NUM_OSD}\ - -r ${crimson_smp} -b ${balance_strategy} -u ${out_file_name} -t ${MY_CPUS} > ${in_file_name}" - debug echo "$cmd" - eval "$cmd" >> ${log_file_name} - fi + if [ -z $crimson_balance_cpu ] || [ "${crimson_balance_cpu}" == "none" ] ; then + echo "Not assigning cpus for crimson" + return + fi - readarray -t cpu_table < ${in_file_name} - # Check the table is not empty, bail out otherwise - if [ "${#cpu_table[@]}" -ne 0 ]; then - debug echo "CPU table not empty with ${#cpu_table[@]} entries" - else - debug echo "CPU table empty, bailing out. Check ${log_file_name}" - fi + cmd="python3 ${CEPH_DIR}/../src/tools/contrib/assign_crimson_cores.py" + cmd+=" -o ${CEPH_NUM_OSD} -r ${crimson_smp} -a ${crimson_alien_num_cores}" + cmd+=" -b ${crimson_balance_cpu}" + if [ ${crimson_reactor_physical_only} != 0 ]; then + cmd+=" --physical-only-seastar" + fi + if [ ${crimson_alienstore_physical_only} != 0 ]; then + cmd+=" --physical-only-alienstore" + fi + + echo $cmd + readarray -t cpu_table < <($cmd) + # Check the table is not empty, bail out otherwise + if [ "${#cpu_table[@]}" -ne 0 ]; then + debug echo "CPU table not empty with ${#cpu_table[@]} entries" + else + debug echo "CPU table empty, bailing out." + exit 1 fi } # Default values for the crimson options crimson_smp=1 crimson_alien_num_threads=0 +crimson_reactor_physical_only=0 crimson_alien_num_cores=0 +crimson_alienstore_physical_only=0 crimson_balance_cpu="" # "osd", "socket" while [ $# -ge 1 ]; do @@ -620,10 +620,16 @@ case $1 in crimson_alien_num_threads=$2 shift ;; + --crimson-reactor-physical-only) + crimson_reactor_physical_only=1 + ;; --crimson-alien-num-cores) crimson_alien_num_cores=$2 shift ;; + --crimson-alienstore-physical-only) + crimson_alienstore_physical_only=1 + ;; --crimson-balance-cpu) crimson_balance_cpu=$2 shift @@ -1197,10 +1203,32 @@ start_cephexporter() { do_balance_cpu() { local osd=$1 + local alienstore_idx=$(( osd + CEPH_NUM_OSD )) + + local reactor_interval=${cpu_table[${osd}]} + if ! [ "${reactor_interval}" == "" ]; then + local cmd="$CEPH_BIN/ceph -c $conf_fn config set osd.$osd crimson_seastar_cpu_cores ${reactor_interval}" + echo $cmd + $cmd + else + echo "No cpu_table entry for osd $osd, setting crimson_seastar_num_reactors" + local cmd="$CEPH_BIN/ceph -c $conf_fn config set osd.$osd crimson_seastar_num_threads $crimson_smp" + echo $cmd + $cmd + return + fi + + + local alienstore_interval=${cpu_table[${alienstore_idx}]} + if [ ! "${alienstore_interval}" == "" ]; then + local cmd="$CEPH_BIN/ceph -c $conf_fn config set osd.$osd crimson_alien_thread_cpu_cores ${alienstore_interval}" + echo $cmd + $cmd + else + echo "No alienstore cpu_table entry for osd $osd" + return + fi - interval=${cpu_table[${osd}]} - echo "$CEPH_BIN/ceph -c $conf_fn config set osd.$osd crimson_seastar_cpu_cores $interval" - $CEPH_BIN/ceph -c $conf_fn config set "osd.$osd" crimson_seastar_cpu_cores "$interval" } start_osd() { @@ -1215,22 +1243,14 @@ start_osd() { fi local osds_wait # If the type of OSD is Crimson and the option to balance the Seastar reactors is true - if [ "$ceph_osd" == "crimson-osd" ] && [ ! -z "$crimson_balance_cpu" ]; then + if [ "$ceph_osd" == "crimson-osd" ]; then debug echo "Preparing balance CPU for Crimson" - prep_balance_cpu $crimson_smp $crimson_balance_cpu + prep_balance_cpu fi for osd in `seq $start $end` do if [ "$ceph_osd" == "crimson-osd" ]; then - if [ ! -z "$crimson_balance_cpu" ]; then - do_balance_cpu $osd - else - bottom_cpu=$(( osd * crimson_smp )) - top_cpu=$(( bottom_cpu + crimson_smp - 1 )) - # set exclusive CPU nodes for each osd - echo "$CEPH_BIN/ceph -c $conf_fn config set osd.$osd crimson_seastar_cpu_cores $bottom_cpu-$top_cpu" - $CEPH_BIN/ceph -c $conf_fn config set "osd.$osd" crimson_seastar_cpu_cores "$bottom_cpu-$top_cpu" - fi + do_balance_cpu $osd fi if [ "$new" -eq 1 -o $inc_osd_num -gt 0 ]; then wconf <