declare -a bluestore_db_devs
declare -a bluestore_wal_devs
declare -a secondary_block_devs
+declare -a cpu_table
secondary_block_devs_type="SSD"
VSTART_SEC="client.vstart.sh"
--crimson-smp: number of cores to use for crimson
--crimson-alien-num-threads: number of alien-tp threads
--crimson-alien-num-cores: number of cores to use for alien-tp
+ --crimson-balance-cpu: distribute the Seastar reactors uniformly across OSDs (osd) or NUMA (socket)
--osds-per-host: populate crush_location as each host holds the specified number of osds if set
--require-osd-and-client-version: if supplied, do set-require-min-compat-client and require-osd-release to specified value
--use-crush-tunables: if supplied, set tunables to specified value
done
}
+# Auxiliar function to prepare the CPU cores to pin Seastar reactors
+prep_balance_cpu() {
+ local crimson_smp=$1
+ local balance_strategy=$2
+ local in_file_name="/tmp/numa_args_${balance_strategy}.out"
+ local out_file_name="/tmp/numa_nodes.json"
+ local log_file_name="/tmp/numa_bal_${balance_strategy}.log"
+ local cmd
+
+ # Check the table is empty
+ if [ "${#cpu_table[@]}" -eq 0 ]; then
+ # Ensure the file with the CPU mappings exist
+ if [ ! -f ${in_file_name} ]; then
+ debug echo "lscpu --json > ${out_file_name}"
+ lscpu --json > ${out_file_name}
+ MY_CPUS=$(taskset -acp $$ | awk -F : '{print $2}')
+ cmd="python3 ${CEPH_DIR}/../src/tools/contrib/balance_cpu.py -o ${CEPH_NUM_OSD}\
+ -r ${crimson_smp} -b ${balance_strategy} -u ${out_file_name} -t ${MY_CPUS} > ${in_file_name}"
+ debug echo "$cmd"
+ eval "$cmd" >> ${log_file_name}
+ fi
+
+ readarray -t cpu_table < ${in_file_name}
+ # Check the table is not empty, bail out otherwise
+ if [ "${#cpu_table[@]}" -ne 0 ]; then
+ debug echo "CPU table not empty with ${#cpu_table[@]} entries"
+ else
+ debug echo "CPU table empty, bailing out. Check ${log_file_name}"
+ fi
+ fi
+}
+
# Default values for the crimson options
crimson_smp=1
crimson_alien_num_threads=0
crimson_alien_num_cores=0
+crimson_balance_cpu="" # "osd", "socket"
while [ $# -ge 1 ]; do
case $1 in
crimson_alien_num_cores=$2
shift
;;
+ --crimson-balance-cpu)
+ crimson_balance_cpu=$2
+ shift
+ ;;
--bluestore-spdk)
[ -z "$2" ] && usage_exit
IFS=',' read -r -a bluestore_spdk_dev <<< "$2"
--addrs "$IP"
}
+do_balance_cpu() {
+ local osd=$1
+
+ interval=${cpu_table[${osd}]}
+ echo "$CEPH_BIN/ceph -c $conf_fn config set osd.$osd crimson_seastar_cpu_cores $interval"
+ $CEPH_BIN/ceph -c $conf_fn config set "osd.$osd" crimson_seastar_cpu_cores "$interval"
+}
+
start_osd() {
if [ $inc_osd_num -gt 0 ]; then
old_maxosd=$($CEPH_BIN/ceph osd getmaxosd | sed -e 's/max_osd = //' -e 's/ in epoch.*//')
end=$(($CEPH_NUM_OSD-1))
fi
local osds_wait
+ # If the type of OSD is Crimson and the option to balance the Seastar reactors is true
+ if [ "$ceph_osd" == "crimson-osd" ] && [ ! -z "$crimson_balance_cpu" ]; then
+ debug echo "Preparing balance CPU for Crimson"
+ prep_balance_cpu $crimson_smp $crimson_balance_cpu
+ fi
for osd in `seq $start $end`
do
if [ "$ceph_osd" == "crimson-osd" ]; then
- bottom_cpu=$(( osd * crimson_smp ))
- top_cpu=$(( bottom_cpu + crimson_smp - 1 ))
- # set exclusive CPU nodes for each osd
- echo "$CEPH_BIN/ceph -c $conf_fn config set osd.$osd crimson_seastar_cpu_cores $bottom_cpu-$top_cpu"
- $CEPH_BIN/ceph -c $conf_fn config set "osd.$osd" crimson_seastar_cpu_cores "$bottom_cpu-$top_cpu"
- fi
+ if [ ! -z "$crimson_balance_cpu" ]; then
+ do_balance_cpu $osd
+ else
+ bottom_cpu=$(( osd * crimson_smp ))
+ top_cpu=$(( bottom_cpu + crimson_smp - 1 ))
+ # set exclusive CPU nodes for each osd
+ echo "$CEPH_BIN/ceph -c $conf_fn config set osd.$osd crimson_seastar_cpu_cores $bottom_cpu-$top_cpu"
+ $CEPH_BIN/ceph -c $conf_fn config set "osd.$osd" crimson_seastar_cpu_cores "$bottom_cpu-$top_cpu"
+ fi
+ fi
if [ "$new" -eq 1 -o $inc_osd_num -gt 0 ]; then
wconf <<EOF
[osd.$osd]
extra_seastar_args=" --trace"
fi
if [ "$objectstore" == "bluestore" ]; then
+ # This condition verifies the number of logical CPU cores
if [ "$(expr $(nproc) - 1)" -gt "$(($CEPH_NUM_OSD * crimson_smp))" ]; then
- if [ $crimson_alien_num_cores -gt 0 ]; then
- alien_bottom_cpu=$(($CEPH_NUM_OSD * crimson_smp))
- alien_top_cpu=$(( alien_bottom_cpu + crimson_alien_num_cores - 1 ))
- # Ensure top value within range:
- if [ "$(($alien_top_cpu))" -gt "$(expr $(nproc) - 1)" ]; then
- alien_top_cpu=$(expr $(nproc) - 1)
- fi
- echo "crimson_alien_thread_cpu_cores: $alien_bottom_cpu-$alien_top_cpu"
- # This is a (logical) processor id range, it could be refined to encompass only physical processor ids
- # (equivalently, ignore hyperthreading sibling processor ids)
- $CEPH_BIN/ceph -c $conf_fn config set osd crimson_alien_thread_cpu_cores "$alien_bottom_cpu-$alien_top_cpu"
+ if [ ! -z "$crimson_balance_cpu" ]; then
+ debug echo "Preparing balance CPU for Crimson"
+ prep_balance_cpu $crimson_smp $crimson_balance_cpu
+ available_cpus="${cpu_table[-1]}"
+ echo "crimson_alien_thread_cpu_cores: '$available_cpus'"
+ $CEPH_BIN/ceph -c $conf_fn config set osd crimson_alien_thread_cpu_cores "$available_cpus"
else
- echo "crimson_alien_thread_cpu_cores:" $(($CEPH_NUM_OSD * crimson_smp))-"$(expr $(nproc) - 1)"
- $CEPH_BIN/ceph -c $conf_fn config set osd crimson_alien_thread_cpu_cores $(($CEPH_NUM_OSD * crimson_smp))-"$(expr $(nproc) - 1)"
+ if [ $crimson_alien_num_cores -gt 0 ]; then
+ alien_bottom_cpu=$(($CEPH_NUM_OSD * crimson_smp))
+ alien_top_cpu=$(( alien_bottom_cpu + crimson_alien_num_cores - 1 ))
+ # Ensure top value within range:
+ if [ "$(($alien_top_cpu))" -gt "$(expr $(nproc) - 1)" ]; then
+ alien_top_cpu=$(expr $(nproc) - 1)
+ fi
+ echo "crimson_alien_thread_cpu_cores: $alien_bottom_cpu-$alien_top_cpu"
+ # This is a (logical) processor id range, it could be refined to encompass only physical processor ids
+ # (equivalently, ignore hyperthreading sibling processor ids)
+ $CEPH_BIN/ceph -c $conf_fn config set osd crimson_alien_thread_cpu_cores "$alien_bottom_cpu-$alien_top_cpu"
+ else
+ # This is the legacy default case
+ echo "crimson_alien_thread_cpu_cores:" $(($CEPH_NUM_OSD * crimson_smp))-"$(expr $(nproc) - 1)"
+ $CEPH_BIN/ceph -c $conf_fn config set osd crimson_alien_thread_cpu_cores $(($CEPH_NUM_OSD * crimson_smp))-"$(expr $(nproc) - 1)"
+ fi
fi
if [ $crimson_alien_num_threads -gt 0 ]; then
echo "$CEPH_BIN/ceph -c $conf_fn config set osd crimson_alien_op_num_threads $crimson_alien_num_threads"