From 6f5612a3a7d5aae3253c91aa5638f19d896a9f2e Mon Sep 17 00:00:00 2001 From: Nizamudeen A Date: Fri, 27 Sep 2024 12:17:16 +0530 Subject: [PATCH] mgr/dashboard: improve the kcli bootstrap process MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit I have a new script added for starting the kcli cluster called quick-bootstrap.sh The goal is to use that script to download the ceph image on local (rather than inside vm) and then copy them over to all the vms that is being spawned by the kcli. This way all the hosts will get the ceph image which will make the deployment loads faster. Another thing I added is to add some dnf.conf to improve parallel_downlaods and get the fastest server to install deps eg: ``` ╰─$ ./quick-bootstrap.sh -h 255 ↵ + set +x Usage: ./quick-bootstrap.sh [OPTIONS] Options: -u, --use-cached-image Uses the existing podman image in local. Only use this if there is such an image present. -dir, --ceph-dir Use this to provide the local ceph directory. eg. --ceph-dir=/path/to/ceph -e, --expanded-cluster To add all the hosts and deploy OSDs on top of it. -h, --help Display this help message. Example: ./quick-bootstrap.sh --use-cached-image ``` ``` ./quick-bootstrap.sh -u --ceph-dir=/home/nia/projects/ceph ``` Signed-off-by: Nizamudeen A --- .../dashboard/ci/cephadm/bootstrap-cluster.sh | 15 +++- .../mgr/dashboard/ci/cephadm/ceph_cluster.yml | 11 ++- .../mgr/dashboard/ci/cephadm/dnf.conf.tpl | 10 +++ .../dashboard/ci/cephadm/initial-ceph.conf | 9 ++ .../dashboard/ci/cephadm/load-podman-image.sh | 23 +++++ .../dashboard/ci/cephadm/quick-bootstrap.sh | 86 +++++++++++++++++++ .../mgr/dashboard/ci/cephadm/start-cluster.sh | 4 +- 7 files changed, 154 insertions(+), 4 deletions(-) create mode 100644 src/pybind/mgr/dashboard/ci/cephadm/dnf.conf.tpl create mode 100644 src/pybind/mgr/dashboard/ci/cephadm/initial-ceph.conf create mode 100755 src/pybind/mgr/dashboard/ci/cephadm/load-podman-image.sh create mode 100755 src/pybind/mgr/dashboard/ci/cephadm/quick-bootstrap.sh diff --git a/src/pybind/mgr/dashboard/ci/cephadm/bootstrap-cluster.sh b/src/pybind/mgr/dashboard/ci/cephadm/bootstrap-cluster.sh index ae720e6d49b..08ce7618114 100755 --- a/src/pybind/mgr/dashboard/ci/cephadm/bootstrap-cluster.sh +++ b/src/pybind/mgr/dashboard/ci/cephadm/bootstrap-cluster.sh @@ -23,12 +23,25 @@ bootstrap_extra_options='--allow-fqdn-hostname --dashboard-password-noupdate' # {% if expanded_cluster is not defined %} # bootstrap_extra_options+=" ${bootstrap_extra_options_not_expanded}" # {% endif %} +quick_install_options='' +{% if quick_install is defined %} + quick_install_options="--image localhost:5000/ceph" +{% endif %} + +{% if nodes < 3 %} + bootstrap_extra_options+=" --config /root/initial-ceph.conf" +{% endif %} -$CEPHADM bootstrap --mon-ip $mon_ip --initial-dashboard-password {{ admin_password }} --shared_ceph_folder /mnt/{{ ceph_dev_folder }} ${bootstrap_extra_options} +{% if ceph_dev_folder is defined %} + bootstrap_extra_options+=" --shared_ceph_folder /mnt/{{ ceph_dev_folder }}" +{% endif %} + +$CEPHADM ${quick_install_options} bootstrap --mon-ip $mon_ip --initial-dashboard-password {{ admin_password }} ${bootstrap_extra_options} fsid=$(cat /etc/ceph/ceph.conf | grep fsid | awk '{ print $3}') cephadm_shell="$CEPHADM shell --fsid ${fsid} -c /etc/ceph/ceph.conf -k /etc/ceph/ceph.client.admin.keyring" + {% for number in range(1, nodes) %} ssh-copy-id -f -i /etc/ceph/ceph.pub -o StrictHostKeyChecking=no root@192.168.100.10{{ number }} {% if expanded_cluster is defined %} diff --git a/src/pybind/mgr/dashboard/ci/cephadm/ceph_cluster.yml b/src/pybind/mgr/dashboard/ci/cephadm/ceph_cluster.yml index a334fbad5f6..3273cbc41eb 100755 --- a/src/pybind/mgr/dashboard/ci/cephadm/ceph_cluster.yml +++ b/src/pybind/mgr/dashboard/ci/cephadm/ceph_cluster.yml @@ -8,7 +8,7 @@ parameters: prefix: ceph numcpus: 1 memory: 2048 - image: fedora36 + image: fedora40 notify: false admin_password: password disks: @@ -35,8 +35,17 @@ parameters: sharedfolders: [{{ ceph_dev_folder }}] files: - bootstrap-cluster.sh + - dnf.conf.tpl + - load-podman-image.sh + - initial-ceph.conf cmds: + # updating the dnf.conf to make the dnf faster + - cp /root/dnf.conf.tpl /etc/dnf/dnf.conf - dnf -y install python3 chrony lvm2 podman + # setting up an insecure podman registry and then loading the ceph image to all hosts + {% if quick_install is defined %} + - /root/load-podman-image.sh + {% endif %} - sed -i "s/SELINUX=enforcing/SELINUX=permissive/" /etc/selinux/config - setenforce 0 {% if number == 0 %} diff --git a/src/pybind/mgr/dashboard/ci/cephadm/dnf.conf.tpl b/src/pybind/mgr/dashboard/ci/cephadm/dnf.conf.tpl new file mode 100644 index 00000000000..a53a68fd2a3 --- /dev/null +++ b/src/pybind/mgr/dashboard/ci/cephadm/dnf.conf.tpl @@ -0,0 +1,10 @@ +[main] +fastestmirror=true +max_parallel_downloads=10 +metadata_expire=1h +clean_requirements_on_remove=true +assumeyes=true +gpgcheck=1 +keepcache=0 +plugins=1 +installonly_limit=3 diff --git a/src/pybind/mgr/dashboard/ci/cephadm/initial-ceph.conf b/src/pybind/mgr/dashboard/ci/cephadm/initial-ceph.conf new file mode 100644 index 00000000000..397d01489d0 --- /dev/null +++ b/src/pybind/mgr/dashboard/ci/cephadm/initial-ceph.conf @@ -0,0 +1,9 @@ +[global] +osd_pool_default_min_size=1 +osd_pool_default_size=1 + +[mon] +mon_allow_pool_size_one=true +mon_allow_pool_delete=true +mon_data_avail_crit=1 +mon_data_avail_warn=1 diff --git a/src/pybind/mgr/dashboard/ci/cephadm/load-podman-image.sh b/src/pybind/mgr/dashboard/ci/cephadm/load-podman-image.sh new file mode 100755 index 00000000000..41ab402bca0 --- /dev/null +++ b/src/pybind/mgr/dashboard/ci/cephadm/load-podman-image.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +echo -e "[registries.insecure]\n\ +registries = ['localhost:5000']" | sudo tee /etc/containers/registries.conf + +podman run -d -p 5000:5000 --name my-registry registry:2 +# Load the image and capture the output +output=$(podman load -i /root/ceph_image.tar) + +# Extract image name from output +image_name=$(echo "$output" | grep -oP '(?<=^Loaded image: ).*') + +if [[ -n "$image_name" ]]; then + echo "Image loaded: $image_name" + podman tag "$image_name" localhost:5000/ceph + echo "Tagged image as localhost:5000/ceph" +else + echo "Failed to load image or extract image name." + exit 1 +fi + +podman push localhost:5000/ceph +rm -f /root/ceph_image.tar diff --git a/src/pybind/mgr/dashboard/ci/cephadm/quick-bootstrap.sh b/src/pybind/mgr/dashboard/ci/cephadm/quick-bootstrap.sh new file mode 100755 index 00000000000..759747415f2 --- /dev/null +++ b/src/pybind/mgr/dashboard/ci/cephadm/quick-bootstrap.sh @@ -0,0 +1,86 @@ +#!/usr/bin/env bash + +source bootstrap-cluster.sh > /dev/null 2>&1 + +set +x + +show_help() { + echo "Usage: ./quick-bootstrap.sh [OPTIONS]" + echo "" + echo "Options:" + echo " -u, --use-cached-image Uses the existing podman image in local. Only use this if there is such an image present." + echo " -dir, --ceph-dir Use this to provide the local ceph directory. eg. --ceph-dir=/path/to/ceph" + echo " -e, --expanded-cluster To add all the hosts and deploy OSDs on top of it." + echo " -h, --help Display this help message." + echo "" + echo "Example:" + echo " ./quick-bootstrap.sh --use-cached-image" +} + +use_cached_image=false +extra_args="-P quick_install=True" + +for arg in "$@"; do + case "$arg" in + -u|--use-cached-image) + use_cached_image=true + ;; + -dir=*|--ceph-dir=*) + extra_args+=" -P ceph_dev_folder=${arg#*=}" + ;; + -e|--expanded-cluster) + extra_args+=" -P expanded_cluster=True" + ;; + -h|--help) + show_help + exit 0 + ;; + *) + echo "Unknown option: $arg" + show_help + exit 1 + ;; + esac +done + +image_name=$(echo "$CEPHADM_IMAGE") +ceph_cluster_yml='ceph_cluster.yml' +node_count=$(awk '/nodes:/ {print $2}' "${ceph_cluster_yml}") + +if [[ ${use_cached_image} == false ]]; then + printf "Pulling the image: %s\n" "$image_name" + podman pull "${image_name}" +fi + +rm -f ceph_image.tar + +printf "Saving the image: %s\n" "$image_name" +podman save -o ceph_image.tar quay.ceph.io/ceph-ci/ceph:main + +printf "Creating the plan\n" +kcli create plan -f ceph_cluster.yml ${extra_args} ceph + +attempt=0 + +MAX_ATTEMPTS=10 +SLEEP_INTERVAL=5 + +printf "Waiting for the host to be reachable\n" +while [[ ${attempt} -lt ${MAX_ATTEMPTS} ]]; do + if ssh -o StrictHostKeyChecking=no -o BatchMode=yes -o ConnectTimeout=10 root@192.168.100.100 exit; then + break + else + echo "Waiting for ssh connection to be available..., attempt: ${attempt}" + ((attempt++)) + sleep ${SLEEP_INTERVAL} + fi +done + +printf "Copying the image to the hosts\n" + +for node in $(seq 0 $((node_count - 1))); do + scp -o StrictHostKeyChecking=no ceph_image.tar root@192.168.100.10"${node}":/root/ +done + +rm -f ceph_image.tar +kcli ssh -u root -- ceph-node-00 'journalctl -n all -ft cloud-init' diff --git a/src/pybind/mgr/dashboard/ci/cephadm/start-cluster.sh b/src/pybind/mgr/dashboard/ci/cephadm/start-cluster.sh index cda0635bc08..16151f39153 100755 --- a/src/pybind/mgr/dashboard/ci/cephadm/start-cluster.sh +++ b/src/pybind/mgr/dashboard/ci/cephadm/start-cluster.sh @@ -59,8 +59,8 @@ fi npm run build ${FRONTEND_BUILD_OPTS} & cd ${CEPH_DEV_FOLDER} -: ${VM_IMAGE:='fedora36'} -: ${VM_IMAGE_URL:='https://archives.fedoraproject.org/pub/archive/fedora/linux/releases/36/Cloud/x86_64/images/Fedora-Cloud-Base-36-1.5.x86_64.qcow2'} +: ${VM_IMAGE:='fedora40'} +: ${VM_IMAGE_URL:='https://download.fedoraproject.org/pub/fedora/linux/releases/40/Cloud/x86_64/images/Fedora-Cloud-Base-Generic.x86_64-40-1.14.qcow2'} kcli download image -p ceph-dashboard -u ${VM_IMAGE_URL} ${VM_IMAGE} kcli delete plan -y ceph || true # Compile cephadm locally for the shared_ceph_folder to pick it up -- 2.39.5