]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
vstart.sh: simplify crimson core assignment, use assign_crimson_cores.py
authorSamuel Just <sjust@redhat.com>
Thu, 24 Apr 2025 22:13:04 +0000 (15:13 -0700)
committerMatan Breizman <mbreizma@redhat.com>
Tue, 13 May 2025 06:47:45 +0000 (09:47 +0300)
This commit simplifies the internal flow in a few ways:
- core assignment is entirely handled by prep_balance_cpu and
  do_balance_cpu.  The latter simply does as the cpu_table
  instructs.
- assign_crimson_cores calls lscpu and taskset internally, no
  need for temp files.

It also changes some defaults:
- if crimson-balance-cpu is unset or set to none, crimson-osd will not
  pin cpus at all rather than using the simple sequential allocation
  scheme, which could be much less efficient on platforms where
  cpuids 0,1,2,3,... are on socket 0,1,2,3,...  "osd" and "socket"
  options provide numa aware assignments when requested.

New features:
- Alienstore cores are now assigned with assign_crimson_cores
  using the same balance strategy using
  --crimson-alien-num-cores.
- --crimson-reactor-physical-only and
  --crimson-alienstore-physical-only will cause reactor or
  alienstore cpus respectively to be allocated with one
  cpu per physical core rather than including smt siblings.

Fixes: https://tracker.ceph.com/issues/71096
Signed-off-by: Samuel Just <sjust@redhat.com>
(cherry picked from commit 1795f46ebbc2f061e26f0298815d891fa12c1b96)

src/vstart.sh

index adeeab9bb9a7a43553c9ebfd0f54b7ff1cfda21a..ea3577bf4af86e622cbf16cd9dde02edb394e2e9 100755 (executable)
@@ -275,7 +275,9 @@ options:
        --seastore-secondary-devs-type: device type of all secondary blockdevs. HDD, SSD(default), ZNS or RANDOM_BLOCK_SSD
        --crimson-smp: number of cores to use for crimson
        --crimson-alien-num-threads: number of alien-tp threads
-       --crimson-alien-num-cores: number of cores to use for alien-tp
+       --crimson-reactor-physical-only: use only one cpu per physical core for seastar reactors
+       --crimson-alien-num-cores: number of cpus to use for alien threads
+       --crimson-alienstore-physical-only: use only one cpu per physical core for alienstore
        --crimson-balance-cpu: distribute the Seastar reactors uniformly across OSDs (osd) or NUMA (socket)
        --osds-per-host: populate crush_location as each host holds the specified number of osds if set
        --require-osd-and-client-version: if supplied, do set-require-min-compat-client and require-osd-release to specified value
@@ -350,40 +352,38 @@ parse_secondary_devs() {
 
 # Auxiliar function to prepare the CPU cores to pin Seastar reactors
 prep_balance_cpu() {
-    local crimson_smp=$1
-    local balance_strategy=$2
-    local in_file_name="/tmp/numa_args_${balance_strategy}.out"
-    local out_file_name="/tmp/numa_nodes.json"
-    local log_file_name="/tmp/numa_bal_${balance_strategy}.log"
-    local cmd
-
-    # Check the table is empty
-    if [ "${#cpu_table[@]}" -eq 0 ]; then
-        # Ensure the file with the CPU mappings exist
-        if [ ! -f ${in_file_name} ]; then
-            debug echo "lscpu --json > ${out_file_name}"
-            lscpu --json > ${out_file_name}
-            MY_CPUS=$(taskset -acp $$ | awk -F : '{print $2}')
-            cmd="python3 ${CEPH_DIR}/../src/tools/contrib/balance_cpu.py -o ${CEPH_NUM_OSD}\
-                -r ${crimson_smp}  -b ${balance_strategy} -u ${out_file_name} -t ${MY_CPUS} > ${in_file_name}"
-            debug echo "$cmd"
-            eval "$cmd" >> ${log_file_name}
-        fi
+    if [ -z $crimson_balance_cpu ] || [ "${crimson_balance_cpu}" == "none" ] ; then
+        echo "Not assigning cpus for crimson"
+        return
+    fi
 
-        readarray -t cpu_table < ${in_file_name}
-        # Check the table is not empty, bail out otherwise
-        if [ "${#cpu_table[@]}" -ne 0 ]; then
-            debug echo "CPU table not empty with ${#cpu_table[@]} entries"
-        else
-            debug echo "CPU table empty, bailing out. Check  ${log_file_name}"
-        fi
+    cmd="python3 ${CEPH_DIR}/../src/tools/contrib/assign_crimson_cores.py"
+    cmd+=" -o ${CEPH_NUM_OSD} -r ${crimson_smp} -a ${crimson_alien_num_cores}"
+    cmd+=" -b ${crimson_balance_cpu}"
+    if [ ${crimson_reactor_physical_only} != 0 ]; then
+        cmd+=" --physical-only-seastar"
+    fi
+    if [ ${crimson_alienstore_physical_only} != 0 ]; then
+        cmd+=" --physical-only-alienstore"
+    fi
+
+    echo $cmd
+    readarray -t cpu_table < <($cmd)
+    # Check the table is not empty, bail out otherwise
+    if [ "${#cpu_table[@]}" -ne 0 ]; then
+        debug echo "CPU table not empty with ${#cpu_table[@]} entries"
+    else
+        debug echo "CPU table empty, bailing out."
+        exit 1
     fi
 }
 
 # Default values for the crimson options
 crimson_smp=1
 crimson_alien_num_threads=0
+crimson_reactor_physical_only=0
 crimson_alien_num_cores=0
+crimson_alienstore_physical_only=0
 crimson_balance_cpu="" # "osd", "socket"
 
 while [ $# -ge 1 ]; do
@@ -620,10 +620,16 @@ case $1 in
         crimson_alien_num_threads=$2
         shift
         ;;
+    --crimson-reactor-physical-only)
+        crimson_reactor_physical_only=1
+        ;;
     --crimson-alien-num-cores)
         crimson_alien_num_cores=$2
         shift
         ;;
+    --crimson-alienstore-physical-only)
+        crimson_alienstore_physical_only=1
+        ;;
     --crimson-balance-cpu)
         crimson_balance_cpu=$2
         shift
@@ -1197,10 +1203,32 @@ start_cephexporter() {
 
 do_balance_cpu() {
     local osd=$1
+    local alienstore_idx=$(( osd + CEPH_NUM_OSD ))
+
+    local reactor_interval=${cpu_table[${osd}]}
+    if ! [ "${reactor_interval}" == "" ]; then
+        local cmd="$CEPH_BIN/ceph -c $conf_fn config set osd.$osd crimson_seastar_cpu_cores ${reactor_interval}"
+        echo $cmd
+        $cmd
+    else
+        echo "No cpu_table entry for osd $osd, setting crimson_seastar_num_reactors"
+        local cmd="$CEPH_BIN/ceph -c $conf_fn config set osd.$osd crimson_seastar_num_threads $crimson_smp"
+        echo $cmd
+        $cmd
+        return
+    fi
+
+
+    local alienstore_interval=${cpu_table[${alienstore_idx}]}
+    if [ ! "${alienstore_interval}" == "" ]; then
+        local cmd="$CEPH_BIN/ceph -c $conf_fn config set osd.$osd crimson_alien_thread_cpu_cores ${alienstore_interval}"
+        echo $cmd
+        $cmd
+    else
+        echo "No alienstore cpu_table entry for osd $osd"
+        return
+    fi
 
-    interval=${cpu_table[${osd}]}
-    echo "$CEPH_BIN/ceph -c $conf_fn config set osd.$osd crimson_seastar_cpu_cores $interval"
-    $CEPH_BIN/ceph -c $conf_fn config set "osd.$osd" crimson_seastar_cpu_cores "$interval"
 }
 
 start_osd() {
@@ -1215,22 +1243,14 @@ start_osd() {
     fi
     local osds_wait
     # If the type of OSD is Crimson and the option to balance the Seastar reactors is true
-       if [ "$ceph_osd" == "crimson-osd" ] && [ ! -z "$crimson_balance_cpu" ]; then
+       if [ "$ceph_osd" == "crimson-osd" ]; then
         debug echo "Preparing balance CPU for Crimson"
-        prep_balance_cpu $crimson_smp $crimson_balance_cpu
+        prep_balance_cpu
     fi
     for osd in `seq $start $end`
     do
        if [ "$ceph_osd" == "crimson-osd" ]; then
-        if [ ! -z "$crimson_balance_cpu" ]; then
-            do_balance_cpu $osd
-        else
-            bottom_cpu=$(( osd * crimson_smp ))
-            top_cpu=$(( bottom_cpu + crimson_smp - 1 ))
-            # set exclusive CPU nodes for each osd
-            echo "$CEPH_BIN/ceph -c $conf_fn config set osd.$osd crimson_seastar_cpu_cores $bottom_cpu-$top_cpu"
-            $CEPH_BIN/ceph -c $conf_fn config set "osd.$osd" crimson_seastar_cpu_cores "$bottom_cpu-$top_cpu"
-        fi
+        do_balance_cpu $osd
     fi
        if [ "$new" -eq 1 -o $inc_osd_num -gt 0 ]; then
             wconf <<EOF
@@ -1759,38 +1779,9 @@ if [ "$ceph_osd" == "crimson-osd" ]; then
         extra_seastar_args=" --trace"
     fi
     if [ "$objectstore" == "bluestore" ]; then
-        # This condition verifies the number of logical CPU cores
-        if [ "$(expr $(nproc) - 1)" -gt "$(($CEPH_NUM_OSD * crimson_smp))" ]; then
-            if [ ! -z "$crimson_balance_cpu" ]; then
-                debug echo "Preparing balance CPU for Crimson"
-                prep_balance_cpu $crimson_smp $crimson_balance_cpu
-                available_cpus="${cpu_table[-1]}"
-                echo "crimson_alien_thread_cpu_cores: '$available_cpus'"
-                $CEPH_BIN/ceph -c $conf_fn config set osd crimson_alien_thread_cpu_cores "$available_cpus"
-            else
-                if [ $crimson_alien_num_cores -gt 0 ]; then
-                    alien_bottom_cpu=$(($CEPH_NUM_OSD * crimson_smp))
-                    alien_top_cpu=$(( alien_bottom_cpu + crimson_alien_num_cores - 1 ))
-                    # Ensure top value within range:
-                    if [ "$(($alien_top_cpu))" -gt "$(expr $(nproc) - 1)" ]; then
-                        alien_top_cpu=$(expr $(nproc) - 1)
-                    fi
-                    echo "crimson_alien_thread_cpu_cores: $alien_bottom_cpu-$alien_top_cpu"
-                    # This is a (logical) processor id range, it could be refined to encompass only physical processor ids
-                    # (equivalently, ignore hyperthreading sibling processor ids)
-                    $CEPH_BIN/ceph -c $conf_fn config set osd crimson_alien_thread_cpu_cores "$alien_bottom_cpu-$alien_top_cpu"
-                else
-                    # This is the legacy default case
-                    echo "crimson_alien_thread_cpu_cores:" $(($CEPH_NUM_OSD * crimson_smp))-"$(expr $(nproc) - 1)"
-                    $CEPH_BIN/ceph -c $conf_fn config set osd crimson_alien_thread_cpu_cores $(($CEPH_NUM_OSD * crimson_smp))-"$(expr $(nproc) - 1)"
-                fi
-            fi
-            if [ $crimson_alien_num_threads -gt 0 ]; then
-                echo "$CEPH_BIN/ceph -c $conf_fn config set osd crimson_alien_op_num_threads $crimson_alien_num_threads"
-                $CEPH_BIN/ceph -c $conf_fn config set osd crimson_alien_op_num_threads "$crimson_alien_num_threads"
-            fi
-        else
-          echo "No alien thread cpu core isolation"
+        if [ $crimson_alien_num_threads -gt 0 ]; then
+            echo "$CEPH_BIN/ceph -c $conf_fn config set osd crimson_alien_op_num_threads $crimson_alien_num_threads"
+            $CEPH_BIN/ceph -c $conf_fn config set osd crimson_alien_op_num_threads "$crimson_alien_num_threads"
         fi
     fi
 fi