From 1676e1a57c6d582587b1b8bb1c830b4c250d483f Mon Sep 17 00:00:00 2001 From: Kefu Chai Date: Wed, 22 Aug 2018 15:27:32 +0800 Subject: [PATCH] ceph-*-build: set jobs number according to free memory limits the job number of building ceph/ceph pull requests and deb packages with the (size of free memory in MB)/1800. guess we are using more compile-time optimizations now, so to compile ceph source requires more memory. sometimes, a single cc1plus takes more than 3GB memeory. that's why we are seeing more and more OOM in our arm64 builders. following is a sample from omani09 -- a arm64 builder compiling a ceph/ceph PR targeting master: 30474 jenkins+ 20 0 2573092 2.388g 16220 R 100.0 3.8 9:11.04 cc1plus 34339 jenkins+ 20 0 3279768 3.005g 16296 R 100.0 4.8 5:07.63 cc1plus 36382 jenkins+ 20 0 322276 278932 12740 R 100.0 0.4 0:09.78 cc1plus ... also, the performance of over-all compiling is also impacted by the I/O subsystem. so lower the number of job could actually reduce the time of the compiling processes to completing for the I/O queue of local device. so we can use an conservative number for calc an upper bound of job number for "make" instead using $(nproc). in this change, $(free_memory_in_mega / 1800) is used as the upper limit of n_jobs. on a typical arm64 builder with 48 cores and 64 GB mem, the n_jobs is now 34 . when building rpm packages, the number of build jobs is specified by _smp_mflags macro, which is defined by /usr/lib/rpm/platform/*/macros and /usr/lib/rpm/redhat/macros. see https://github.com/rpm-software-management/rpm/blob/master/platform.in#L53 and rhel/centos use following patch https://git.centos.org/blob/rpms!redhat-rpm-config.git/eaaa6282147d0797a3733f3b91671b7a0752d448/SOURCES!redhat-rpm-config-9.1.0-ncpus-max.patch;jsessionid=xv8lqw4ipwwetge0i19ejo9t so one cannot build rpm packages on centos/rhel with more than 16 jobs when using redhat-rpm-config. and 16 is a safe number for us. Signed-off-by: Kefu Chai --- ceph-build/build/build_deb | 3 ++- ceph-dev-build/build/build_deb | 3 ++- ceph-dev-new-build/build/build_deb | 3 ++- ceph-pull-requests-arm64/build/build | 8 ++++---- .../definitions/ceph-pull-requests-arm64.yml | 3 ++- scripts/build_utils.sh | 18 ++++++++++++++++++ 6 files changed, 30 insertions(+), 8 deletions(-) diff --git a/ceph-build/build/build_deb b/ceph-build/build/build_deb index 9a437239..e93b9ba5 100644 --- a/ceph-build/build/build_deb +++ b/ceph-build/build/build_deb @@ -74,16 +74,17 @@ echo deb vers $bpvers echo building debs for $DIST CEPH_EXTRA_CMAKE_ARGS="$CEPH_EXTRA_CMAKE_ARGS $(extra_cmake_args)" +DEB_BUILD_OPTIONS="parallel=$(get_nr_build_jobs)" # pass only those env vars specifically noted sudo \ CEPH_EXTRA_CMAKE_ARGS="$CEPH_EXTRA_CMAKE_ARGS" \ CEPH_EXTRA_CONFIGURE_ARGS="$CEPH_EXTRA_CONFIGURE_ARGS" \ + DEB_BUILD_OPTIONS="$DEB_BUILD_OPTIONS" \ pbuilder build \ --distribution $DIST \ --basetgz $pbuilddir/$DIST.tgz \ --buildresult $releasedir/$cephver \ - --debbuildopts "-j`grep -c processor /proc/cpuinfo`" \ $releasedir/$cephver/ceph_$bpvers.dsc # do lintian checks diff --git a/ceph-dev-build/build/build_deb b/ceph-dev-build/build/build_deb index ca4a23b5..98b3eef2 100644 --- a/ceph-dev-build/build/build_deb +++ b/ceph-dev-build/build/build_deb @@ -74,16 +74,17 @@ echo deb vers $bpvers echo building debs for $DIST CEPH_EXTRA_CMAKE_ARGS="$CEPH_EXTRA_CMAKE_ARGS $(extra_cmake_args)" +DEB_BUILD_OPTIONS="parallel=$(get_nr_build_jobs)" # pass only those env vars specifically noted sudo \ CEPH_EXTRA_CMAKE_ARGS="$CEPH_EXTRA_CMAKE_ARGS" \ CEPH_EXTRA_CONFIGURE_ARGS="$CEPH_EXTRA_CONFIGURE_ARGS" \ + DEB_BUILD_OPTIONS="$DEB_BUILD_OPTIONS" \ pbuilder build \ --distribution $DIST \ --basetgz $pbuilddir/$DIST.tgz \ --buildresult $releasedir/$cephver \ - --debbuildopts "-j`grep -c processor /proc/cpuinfo`" \ $releasedir/$cephver/ceph_$bpvers.dsc # do lintian checks diff --git a/ceph-dev-new-build/build/build_deb b/ceph-dev-new-build/build/build_deb index ca4a23b5..98b3eef2 100644 --- a/ceph-dev-new-build/build/build_deb +++ b/ceph-dev-new-build/build/build_deb @@ -74,16 +74,17 @@ echo deb vers $bpvers echo building debs for $DIST CEPH_EXTRA_CMAKE_ARGS="$CEPH_EXTRA_CMAKE_ARGS $(extra_cmake_args)" +DEB_BUILD_OPTIONS="parallel=$(get_nr_build_jobs)" # pass only those env vars specifically noted sudo \ CEPH_EXTRA_CMAKE_ARGS="$CEPH_EXTRA_CMAKE_ARGS" \ CEPH_EXTRA_CONFIGURE_ARGS="$CEPH_EXTRA_CONFIGURE_ARGS" \ + DEB_BUILD_OPTIONS="$DEB_BUILD_OPTIONS" \ pbuilder build \ --distribution $DIST \ --basetgz $pbuilddir/$DIST.tgz \ --buildresult $releasedir/$cephver \ - --debbuildopts "-j`grep -c processor /proc/cpuinfo`" \ $releasedir/$cephver/ceph_$bpvers.dsc # do lintian checks diff --git a/ceph-pull-requests-arm64/build/build b/ceph-pull-requests-arm64/build/build index 0efb93e3..244a1436 100644 --- a/ceph-pull-requests-arm64/build/build +++ b/ceph-pull-requests-arm64/build/build @@ -1,8 +1,8 @@ #!/bin/bash -ex -NPROC=$(nproc) -testnproc=$(($NPROC / 4)) -export CHECK_MAKEOPTS="-j${testnproc}" -export BUILD_MAKEOPTS="-j${NPROC}" +n_build_jobs=$(get_nr_build_jobs) +n_test_jobs=$(($(nproc) / 4)) +export CHECK_MAKEOPTS="-j${n_test_jobs}" +export BUILD_MAKEOPTS="-j${n_build_jobs}" timeout 3h ./run-make-check.sh sleep 5 ps -ef | grep ceph || true diff --git a/ceph-pull-requests-arm64/config/definitions/ceph-pull-requests-arm64.yml b/ceph-pull-requests-arm64/config/definitions/ceph-pull-requests-arm64.yml index 1fb71bff..6adcab91 100644 --- a/ceph-pull-requests-arm64/config/definitions/ceph-pull-requests-arm64.yml +++ b/ceph-pull-requests-arm64/config/definitions/ceph-pull-requests-arm64.yml @@ -4,7 +4,8 @@ builders: - shell: !include-raw: - - ../../build/build + - ../../../scripts/build_utils.sh + - ../../build/build concurrent: true disabled: false name: ceph-pull-requests-arm64 diff --git a/scripts/build_utils.sh b/scripts/build_utils.sh index d71b185a..64df4c55 100644 --- a/scripts/build_utils.sh +++ b/scripts/build_utils.sh @@ -830,3 +830,21 @@ teardown_vagrant_tests() { sudo virsh net-undefine $network || true done } + +get_nr_build_jobs() { + # assume each compiling job takes 1800 MiB memory on average + local nproc=$(nproc) + local max_build_jobs=$(vmstat --stats --unit m | \ + grep 'free memory' | \ + awk '{print int($1/1800)}') + if [[ $max_build_jobs -eq 0 ]]; then + # probably the system is under high load, use a safe number + max_build_jobs=16 + fi + if [[ $nproc -ge $max_build_jobs ]]; then + n_build_jobs=$max_build_jobs + else + n_build_jobs=$nproc + fi + echo $n_build_jobs +} -- 2.39.5