From dc112c9b24f3e88ff0d24ddd4845d72e02859e52 Mon Sep 17 00:00:00 2001 From: Adam King Date: Tue, 13 Jun 2023 19:54:30 -0400 Subject: [PATCH] cephadm: run tcmu-runner through script to do restart on failure Currently, cephadm runs tcmu-runner as a background process inside the unit file deployed for iscsi (rbd-target-api is the primary process). This means if tcmu-runner crashes for whatever reason, systemd will not attempt to restart it. This commits sets up a script to serve as the container entrypoint for the tcmu-runner container that will run tcmu-runner and also restart it on failure (unless there are too many failures in a short period, at which point it gives up). The hope is to eventually drop use of this script for a better solution in squid onward, but this should be helpful on older releases (quincy and pacific at least) where we won't be able to bring that better solution Fixes: https://tracker.ceph.com/issues/61667 Signed-off-by: Adam King (cherry picked from commit 47eb6b3f62afe993073429b02051ae0343d7aea3) Conflicts: src/cephadm/tests/test_cephadm.py --- src/cephadm/cephadm | 53 ++++++++++++++++++++++++++++++- src/cephadm/tests/test_cephadm.py | 4 +-- 2 files changed, 54 insertions(+), 3 deletions(-) diff --git a/src/cephadm/cephadm b/src/cephadm/cephadm index 85232507e6b91..96a28449c3e58 100755 --- a/src/cephadm/cephadm +++ b/src/cephadm/cephadm @@ -845,6 +845,7 @@ class CephIscsi(object): mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z' mounts[os.path.join(data_dir, 'iscsi-gateway.cfg')] = '/etc/ceph/iscsi-gateway.cfg:z' mounts[os.path.join(data_dir, 'configfs')] = '/sys/kernel/config' + mounts[os.path.join(data_dir, 'tcmu-runner-entrypoint.sh')] = '/usr/local/scripts/tcmu-runner-entrypoint.sh' mounts[log_dir] = '/var/log:z' mounts['/dev'] = '/dev' return mounts @@ -908,9 +909,19 @@ class CephIscsi(object): configfs_dir = os.path.join(data_dir, 'configfs') makedirs(configfs_dir, uid, gid, 0o755) + # set up the tcmu-runner entrypoint script + # to be mounted into the container. For more info + # on why we need this script, see the + # tcmu_runner_entrypoint_script function + self.files['tcmu-runner-entrypoint.sh'] = self.tcmu_runner_entrypoint_script() + # populate files from the config-json populate_files(data_dir, self.files, uid, gid) + # we want the tcmu runner entrypoint script to be executable + # populate_files will give it 0o600 by default + os.chmod(os.path.join(data_dir, 'tcmu-runner-entrypoint.sh'), 0o700) + @staticmethod def configfs_mount_umount(data_dir, mount=True): # type: (str, bool) -> List[str] @@ -923,13 +934,53 @@ class CephIscsi(object): 'umount {0}; fi'.format(mount_path) return cmd.split() + @staticmethod + def tcmu_runner_entrypoint_script() -> str: + # since we are having tcmu-runner be a background + # process in its systemd unit (rbd-target-api being + # the main process) systemd will not restart it when + # it fails. in order to try and get around that for now + # we can have a script mounted in the container that + # that attempts to do the restarting for us. This script + # can then become the entrypoint for the tcmu-runner + # container + + # This is intended to be dropped for a better solution + # for at least the squid release onward + return """#!/bin/bash +RUN_DIR=/var/run/tcmu-runner + +if [ ! -d "${RUN_DIR}" ] ; then + mkdir -p "${RUN_DIR}" +fi + +rm -rf "${RUN_DIR}"/* + +while true +do + touch "${RUN_DIR}"/start-up-$(date -Ins) + /usr/bin/tcmu-runner + + # If we got around 3 kills/segfaults in the last minute, + # don't start anymore + if [ $(find "${RUN_DIR}" -type f -cmin -1 | wc -l) -ge 3 ] ; then + exit 0 + fi + + sleep 1 +done +""" + def get_tcmu_runner_container(self): # type: () -> CephContainer # daemon_id, is used to generated the cid and pid files used by podman but as both tcmu-runner # and rbd-target-api have the same daemon_id, it conflits and prevent the second container from # starting. .tcmu runner is appended to the daemon_id to fix that. tcmu_container = get_deployment_container(self.ctx, self.fsid, self.daemon_type, str(self.daemon_id) + '.tcmu') - tcmu_container.entrypoint = '/usr/bin/tcmu-runner' + # TODO: Eventually we don't want to run tcmu-runner through this script. + # This is intended to be a workaround backported to older releases + # and should eventually be removed in at least squid onward + tcmu_container.entrypoint = '/usr/local/scripts/tcmu-runner-entrypoint.sh' tcmu_container.cname = self.get_container_name(desc='tcmu') return tcmu_container diff --git a/src/cephadm/tests/test_cephadm.py b/src/cephadm/tests/test_cephadm.py index 7559402d3983b..641483d8504f2 100644 --- a/src/cephadm/tests/test_cephadm.py +++ b/src/cephadm/tests/test_cephadm.py @@ -1751,11 +1751,11 @@ if ! grep -qs /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id # iscsi tcmu-runner container ! /usr/bin/podman rm -f ceph-9b9d7609-f4d5-4aba-94c8-effa764d96c9-iscsi.daemon_id-tcmu 2> /dev/null ! /usr/bin/podman rm -f ceph-9b9d7609-f4d5-4aba-94c8-effa764d96c9-iscsi-daemon_id-tcmu 2> /dev/null -/usr/bin/podman run --rm --ipc=host --stop-signal=SIGTERM --net=host --entrypoint /usr/bin/tcmu-runner --privileged --group-add=disk --init --name ceph-9b9d7609-f4d5-4aba-94c8-effa764d96c9-iscsi-daemon_id-tcmu --pids-limit=0 -e CONTAINER_IMAGE=ceph/ceph -e NODE_NAME=host1 -e CEPH_USE_RANDOM_NONCE=1 -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/config:/etc/ceph/ceph.conf:z -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/keyring:/etc/ceph/keyring:z -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/iscsi-gateway.cfg:/etc/ceph/iscsi-gateway.cfg:z -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/configfs:/sys/kernel/config -v /var/log/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9:/var/log:z -v /dev:/dev --mount type=bind,source=/lib/modules,destination=/lib/modules,ro=true ceph/ceph & +/usr/bin/podman run --rm --ipc=host --stop-signal=SIGTERM --net=host --entrypoint /usr/local/scripts/tcmu-runner-entrypoint.sh --privileged --group-add=disk --init --name ceph-9b9d7609-f4d5-4aba-94c8-effa764d96c9-iscsi-daemon_id-tcmu --pids-limit=0 -e CONTAINER_IMAGE=ceph/ceph -e NODE_NAME=host1 -e CEPH_USE_RANDOM_NONCE=1 -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/config:/etc/ceph/ceph.conf:z -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/keyring:/etc/ceph/keyring:z -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/iscsi-gateway.cfg:/etc/ceph/iscsi-gateway.cfg:z -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/configfs:/sys/kernel/config -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/tcmu-runner-entrypoint.sh:/usr/local/scripts/tcmu-runner-entrypoint.sh -v /var/log/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9:/var/log:z -v /dev:/dev --mount type=bind,source=/lib/modules,destination=/lib/modules,ro=true ceph/ceph & # iscsi.daemon_id ! /usr/bin/podman rm -f ceph-9b9d7609-f4d5-4aba-94c8-effa764d96c9-iscsi.daemon_id 2> /dev/null ! /usr/bin/podman rm -f ceph-9b9d7609-f4d5-4aba-94c8-effa764d96c9-iscsi-daemon_id 2> /dev/null -/usr/bin/podman run --rm --ipc=host --stop-signal=SIGTERM --net=host --entrypoint /usr/bin/rbd-target-api --privileged --group-add=disk --init --name ceph-9b9d7609-f4d5-4aba-94c8-effa764d96c9-iscsi-daemon_id --pids-limit=0 -e CONTAINER_IMAGE=ceph/ceph -e NODE_NAME=host1 -e CEPH_USE_RANDOM_NONCE=1 -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/config:/etc/ceph/ceph.conf:z -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/keyring:/etc/ceph/keyring:z -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/iscsi-gateway.cfg:/etc/ceph/iscsi-gateway.cfg:z -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/configfs:/sys/kernel/config -v /var/log/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9:/var/log:z -v /dev:/dev --mount type=bind,source=/lib/modules,destination=/lib/modules,ro=true ceph/ceph +/usr/bin/podman run --rm --ipc=host --stop-signal=SIGTERM --net=host --entrypoint /usr/bin/rbd-target-api --privileged --group-add=disk --init --name ceph-9b9d7609-f4d5-4aba-94c8-effa764d96c9-iscsi-daemon_id --pids-limit=0 -e CONTAINER_IMAGE=ceph/ceph -e NODE_NAME=host1 -e CEPH_USE_RANDOM_NONCE=1 -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/config:/etc/ceph/ceph.conf:z -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/keyring:/etc/ceph/keyring:z -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/iscsi-gateway.cfg:/etc/ceph/iscsi-gateway.cfg:z -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/configfs:/sys/kernel/config -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/tcmu-runner-entrypoint.sh:/usr/local/scripts/tcmu-runner-entrypoint.sh -v /var/log/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9:/var/log:z -v /dev:/dev --mount type=bind,source=/lib/modules,destination=/lib/modules,ro=true ceph/ceph """ def test_get_container(self): -- 2.39.5