From d4e6763a19fb91cd0b25c835ed9f9ccbfe70c07f Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Thu, 21 Apr 2022 11:12:14 +0200 Subject: [PATCH] cephadm/box: Rootless podman box implementation Even though box is a rootless podman container, there are some capabilities that are need like SYS_ADMIN, NET_ADMIN and SYS_TIME. In rootless podman chronyd is not capabable of using adjtimex syscall so we had to add -x flag to chronyd entrypoint which states it will not try to call that function. To test run `./box.py -v cluster start`. To fix: There are some problems with /dev permissions so osds cannot be deployed still. `./box.py cluster start --expanded` won't fully work. Details: - Dockerfile: - Fedora image instead of centos. - Tons of basic packages aren't installed, therefore, those are explicitly installed. - Some container folders are created. It isn't clear/tested whether those are needed. - .box_container is an empty file to identify if a command is ran on containers (see box.py for usage). - box.py - podman-compose is missing some features but it compensates with args as --podman-run-args so you can add some extra commands whenever podman-compose calls podman run. - sudo ./box.py cluster start will exit since it can be dangerous. - Added some sed trickeroo to modify chronyd options. (I don't know how can I pass OPTIONS to it :P) - host.py - setup_ssh now is need on every box container. - util.py - rice command printing :3. - run multiple commands with a multiline string. Signed-off-by: Pere Diaz Bou --- doc/dev/cephadm/developing-cephadm.rst | 67 +++++++-------- src/cephadm/box/Dockerfile | 71 +++++++++++----- src/cephadm/box/box.py | 113 +++++++++++++++++++------ src/cephadm/box/docker-compose.yml | 45 ++++++---- src/cephadm/box/host.py | 21 +++-- src/cephadm/box/osd.py | 54 ++++++------ src/cephadm/box/util.py | 49 +++++++++-- 7 files changed, 284 insertions(+), 136 deletions(-) diff --git a/doc/dev/cephadm/developing-cephadm.rst b/doc/dev/cephadm/developing-cephadm.rst index 61bffb35165b1..845f14bd994a7 100644 --- a/doc/dev/cephadm/developing-cephadm.rst +++ b/doc/dev/cephadm/developing-cephadm.rst @@ -228,15 +228,15 @@ When completed, you'll see:: Then you can reload your Dashboard browser tab. -Cephadm DiD (Docker in Docker) box development environment -========================================================== +Cephadm box container (Podman inside Podman) development environment +==================================================================== As kcli has a long startup time, we created an alternative which is faster using -Docker inside Docker. This approach has its downsides too as we have to +Podman inside Podman. This approach has its downsides too as we have to simulate the creation of osds and addition of devices with loopback devices. -Cephadm's DiD environment is a command which requires little to setup. The setup -requires you to get the required docker images for what we call boxes and ceph. +Cephadm's box environment is a command which requires little to setup. The setup +requires you to get the required podman images for what we call boxes and ceph. A box is the first layer of docker containers which can be either a seed or a host. A seed is the main box which holds cephadm and where you bootstrap the cluster. On the other hand, you have hosts with an ssh server setup so you can @@ -250,7 +250,7 @@ seed box, requires the ceph image. Requirements ------------ -* `docker-compose `_ +* `podman-compose `_ * lvm Setup @@ -259,46 +259,46 @@ Setup In order to setup Cephadm's box run:: cd src/cephadm/box - sudo ln -sf "$PWD"/box.py /usr/bin/box - sudo box -v cluster setup + ./box.py -v cluster setup -.. note:: It is recommended to run box with verbose (-v). +.. note:: It is recommended to run box with verbose (-v) as it will show the output of + shell commands being run. -After getting all needed images we can create a simple cluster without osds and hosts with:: +After getting all needed images we can create a simple cluster without OSDs and hosts with:: - sudo box -v cluster start + ./box.py -v cluster start -If you want to deploy the cluster with more osds and hosts:: +If you want to deploy the cluster with more OSDs and hosts:: # 3 osds and 3 hosts by default sudo box -v cluster start --extended # explicitly change number of hosts and osds sudo box -v cluster start --extended --osds 5 --hosts 5 -Without the extended option, explicitly adding either more hosts or osds won't change the state +.. warning:: OSDs are still not supported in the box implementation with podman. It is + work in progress. + + +Without the extended option, explicitly adding either more hosts or OSDs won't change the state of the cluster. .. note:: Cluster start will try to setup even if cluster setup was not called. -.. note:: Osds are created with loopback devices and hence, sudo is needed to - create loopback devices capable of holding osds. +.. note:: OSDs are created with loopback devices and hence, sudo is needed to + create loopback devices capable of holding OSDs. .. note:: Each osd will require 5GiB of space. -After bootstrapping the cluster you can go inside the seed box in which you'll be +After bootstraping the cluster you can go inside the seed box in which you'll be able to run cephadm commands:: - box -v cluster sh + ./box.py -v cluster sh [root@8d52a7860245] cephadm --help ... -If you want to navigate to the dashboard you can find the ip address after running:: - docker ps - docker inspect | grep IPAddress - -The address will be https://$IPADDRESS:8443 +If you want to navigate to the dashboard enter https://localhost:8443 on you browser. You can also find the hostname and ip of each box container with:: - sudo box cluster list + ./box.py cluster list and you'll see something like:: @@ -310,15 +310,15 @@ and you'll see something like:: To remove the cluster and clean up run:: - box cluster down + ./box.py cluster down If you just want to clean up the last cluster created run:: - box cluster cleanup + ./box.py cluster cleanup To check all available commands run:: - box --help + ./box.py --help Known issues @@ -331,19 +331,20 @@ Known issues * Docker containers run with the --privileged flag enabled which has been seen to make some computers log out. - -* Sometimes when starting a cluster the osds won't get deployed because cephadm - takes a while to update the state. If this happens wait and call:: - - box -v osd deploy --vg vg1 +* If SELinux is not disabled you'll probably see unexpected behaviour. For example: + if not all permissions of Ceph repo files are set to your user it will probably + fail starting with podman-compose. +* If running a command it fails to run a podman command because it couldn't find the + container, you can debug by running the same podman-compose .. up command displayed + with the flag -v. Road map ------------ -* Run containers without --privileged +* Create osds with ceph-volume raw. * Enable ceph-volume to mark loopback devices as a valid block device in the inventory. -* Make DiD ready to run dashboard CI tests (including cluster expansion). +* Make the box ready to run dashboard CI tests (including cluster expansion). Note regarding network calls from CLI handlers ============================================== diff --git a/src/cephadm/box/Dockerfile b/src/cephadm/box/Dockerfile index e927bcb70793c..04d428ae338d8 100644 --- a/src/cephadm/box/Dockerfile +++ b/src/cephadm/box/Dockerfile @@ -1,32 +1,61 @@ -# https://developers.redhat.com/blog/2014/05/05/running-systemd-within-docker-container/ -FROM centos:8 as centos-systemd -ENV container docker +# stable/Dockerfile +# +# Build a Podman container image from the latest +# stable version of Podman on the Fedoras Updates System. +# https://bodhi.fedoraproject.org/updates/?search=podman +# This image can be used to create a secured container +# that runs safely with privileges within the container. +# +FROM fedora:34 + ENV CEPHADM_PATH=/usr/local/sbin/cephadm +# Don't include container-selinux and remove +# directories used by yum that are just taking +# up space. +RUN dnf -y update; rpm --restore shadow-utils 2>/dev/null; \ +yum -y install strace podman fuse-overlayfs --exclude container-selinux; \ +rm -rf /var/cache /var/log/dnf* /var/log/yum.* + +RUN dnf install which firewalld chrony procps systemd openssh openssh-server openssh-clients sshpass lvm2 -y + +ADD https://raw.githubusercontent.com/containers/podman/main/contrib/podmanimage/stable/containers.conf /etc/containers/containers.conf +ADD https://raw.githubusercontent.com/containers/podman/main/contrib/podmanimage/stable/podman-containers.conf /root/.config/containers/containers.conf + +RUN mkdir -p /root/.local/share/containers; # chown podman:podman -R /home/podman -# Centos met EOL and the content of the CentOS 8 repos has been moved to vault.centos.org -RUN sed -i 's/mirrorlist/#mirrorlist/g' /etc/yum.repos.d/CentOS-Linux-* -RUN sed -i 's|#baseurl=http://mirror.centos.org|baseurl=https://vault.centos.org|g' /etc/yum.repos.d/CentOS-Linux-* +# Note VOLUME options must always happen after the chown call above +# RUN commands can not modify existing volumes +VOLUME /var/lib/containers +VOLUME /root/.local/share/containers -RUN dnf -y install chrony firewalld lvm2 \ - openssh-server openssh-clients python3 \ - yum-utils sudo which && dnf clean all +# chmod containers.conf and adjust storage.conf to enable Fuse storage. +RUN chmod 644 /etc/containers/containers.conf; sed -i -e 's|^#mount_program|mount_program|g' -e '/additionalimage.*/a "/var/lib/shared",' -e 's|^mountopt[[:space:]]*=.*$|mountopt = "nodev,fsync=0"|g' /etc/containers/storage.conf +RUN mkdir -p /var/lib/shared/overlay-images /var/lib/shared/overlay-layers /var/lib/shared/vfs-images /var/lib/shared/vfs-layers; touch /var/lib/shared/overlay-images/images.lock; touch /var/lib/shared/overlay-layers/layers.lock; touch /var/lib/shared/vfs-images/images.lock; touch /var/lib/shared/vfs-layers/layers.lock -RUN systemctl enable chronyd firewalld sshd +RUN echo 'root:root' | chpasswd +RUN dnf install -y adjtimex # adjtimex syscall doesn't exist in fedora 35+ therefore we have to install it manually + # so chronyd works +RUN dnf -y install hostname iproute udev +ENV _CONTAINERS_USERNS_CONFIGURED="" -FROM centos-systemd as centos-systemd-docker -# To cache cephadm images -RUN yum-config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo -RUN dnf -y install docker-ce && \ - dnf clean all && systemctl enable docker +RUN useradd podman; \ +echo podman:0:5000 > /etc/subuid; \ +echo podman:0:5000 > /etc/subgid; \ +echo root:0:65535 > /etc/subuid; \ +echo root:0:65535 > /etc/subgid; -# ssh utilities -RUN dnf install epel-release -y && dnf makecache && dnf install sshpass -y +VOLUME /home/podman/.local/share/containers + +ADD https://raw.githubusercontent.com/containers/libpod/master/contrib/podmanimage/stable/containers.conf /etc/containers/containers.conf +ADD https://raw.githubusercontent.com/containers/libpod/master/contrib/podmanimage/stable/podman-containers.conf /home/podman/.config/containers/containers.conf + +RUN chown podman:podman -R /home/podman + +RUN echo 'podman:podman' | chpasswd +RUN touch /.box_container # empty file to check if inside a container EXPOSE 8443 EXPOSE 22 -FROM centos-systemd-docker -WORKDIR /root - -CMD [ "/usr/sbin/init" ] +ENTRYPOINT ["/usr/sbin/init"] diff --git a/src/cephadm/box/box.py b/src/cephadm/box/box.py index 6c4dd7fd78295..9507361312968 100755 --- a/src/cephadm/box/box.py +++ b/src/cephadm/box/box.py @@ -3,7 +3,6 @@ import argparse import os import stat import sys - import host import osd from util import ( @@ -14,17 +13,28 @@ from util import ( get_boxes_container_info, run_cephadm_shell_command, run_dc_shell_command, + run_dc_shell_commands, run_shell_command, + run_shell_commands, + colored, + Colors ) CEPH_IMAGE = 'quay.ceph.io/ceph-ci/ceph:main' BOX_IMAGE = 'cephadm-box:latest' # NOTE: this image tar is a trickeroo so cephadm won't pull the image everytime -# we deploy a cluster. Keep in mind that you'll be responsible of pulling the -# image yourself with `box cluster setup` +# we deploy a cluster. Keep in mind that you'll be responsible for pulling the +# image yourself with `./box.py -v cluster setup` CEPH_IMAGE_TAR = 'docker/ceph/image/quay.ceph.image.tar' +CEPH_ROOT = '../../../' +DASHBOARD_PATH = '../../../src/pybind/mgr/dashboard/frontend/' +root_error_msg = """ +WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING +sudo with this script can kill your computer, try again without sudo +if you value your time. +""" def remove_ceph_image_tar(): if os.path.exists(CEPH_IMAGE_TAR): @@ -40,7 +50,7 @@ def image_exists(image_name: str): # extract_tag assert image_name.find(':') image_name, tag = image_name.split(':') - images = run_shell_command('docker image ls').split('\n') + images = run_shell_command('podman image ls').split('\n') IMAGE_NAME = 0 TAG = 1 for image in images: @@ -54,23 +64,39 @@ def image_exists(image_name: str): def get_ceph_image(): print('Getting ceph image') - run_shell_command(f'docker pull {CEPH_IMAGE}') + run_shell_command(f'podman pull {CEPH_IMAGE}') # update - run_shell_command(f'docker build -t {CEPH_IMAGE} docker/ceph') + run_shell_command(f'podman build -t {CEPH_IMAGE} docker/ceph') if not os.path.exists('docker/ceph/image'): os.mkdir('docker/ceph/image') remove_ceph_image_tar() - run_shell_command(f'docker save {CEPH_IMAGE} -o {CEPH_IMAGE_TAR}') + run_shell_command(f'podman save {CEPH_IMAGE} -o {CEPH_IMAGE_TAR}') print('Ceph image added') def get_box_image(): print('Getting box image') - run_shell_command('docker build -t cephadm-box -f Dockerfile .') + run_shell_command('podman build -t cephadm-box -f Dockerfile .') print('Box image added') +def check_dashboard(): + if not os.path.exists(os.path.join(CEPH_ROOT, 'dist')): + print(colored('Missing build in dashboard', Colors.WARNING)) + +def check_cgroups(): + if not os.path.exists('/sys/fs/cgroup/cgroup.controllers'): + print(colored('cgroups v1 is not supported', Colors.FAIL)) + print('Enable cgroups v2 please') + sys.exit(666) + +def check_selinux(): + selinux = run_shell_command('getenforce') + if 'Disabled' not in selinux: + print(colored('selinux should be disabled, please disable it if you ' + 'don\'t want unexpected behaviour.', Colors.WARNING)) + class Cluster(Target): _help = 'Manage docker cephadm boxes' @@ -82,7 +108,7 @@ class Cluster(Target): ) self.parser.add_argument('--osds', type=int, default=3, help='Number of osds') - self.parser.add_argument('--hosts', type=int, default=2, help='Number of hosts') + self.parser.add_argument('--hosts', type=int, default=1, help='Number of hosts') self.parser.add_argument('--skip-deploy-osds', action='store_true', help='skip deploy osd') self.parser.add_argument('--skip-create-loop', action='store_true', help='skip create loopback device') self.parser.add_argument('--skip-monitoring-stack', action='store_true', help='skip monitoring stack') @@ -91,6 +117,11 @@ class Cluster(Target): @ensure_outside_container def setup(self): + run_shell_command('pip3 install https://github.com/containers/podman-compose/archive/devel.tar.gz') + + check_cgroups() + check_selinux() + get_ceph_image() get_box_image() @@ -103,14 +134,17 @@ class Cluster(Target): print('Running bootstrap on seed') cephadm_path = os.environ.get('CEPHADM_PATH') os.symlink('/cephadm/cephadm', cephadm_path) - run_shell_command( - 'systemctl restart docker' - ) # restart to ensure docker is using daemon.json + + + # restart to ensure docker is using daemon.json + # run_shell_command( + # 'systemctl restart docker' + # ) st = os.stat(cephadm_path) os.chmod(cephadm_path, st.st_mode | stat.S_IEXEC) - run_shell_command('docker load < /cephadm/box/docker/ceph/image/quay.ceph.image.tar') + run_shell_command('podman load < /cephadm/box/docker/ceph/image/quay.ceph.image.tar') # cephadm guid error because it sometimes tries to use quay.ceph.io/ceph-ci/ceph: # instead of main branch's tag run_shell_command('export CEPH_SOURCE_FOLDER=/ceph') @@ -166,19 +200,24 @@ class Cluster(Target): run_shell_command('cephadm ls') run_shell_command('ln -s /ceph/src/cephadm/box/box.py /usr/bin/box') - # NOTE: sometimes cephadm in the box takes a while to update the containers - # running in the cluster and it cannot deploy the osds. In this case - # run: box -v osd deploy --vg vg1 to deploy osds again. run_cephadm_shell_command('ceph -s') + + print(colored('dashboard available at https://localhost:8443', Colors.OKGREEN)) print('Bootstrap completed!') @ensure_outside_container def start(self): + check_cgroups() + check_selinux() osds = Config.get('osds') hosts = Config.get('hosts') # ensure boxes don't exist - run_shell_command('docker-compose down') + run_shell_command('podman-compose down') + I_am = run_shell_command('whoami') + if 'root' in I_am: + print(root_error_msg) + sys.exit(1) print('Checking docker images') if not image_exists(CEPH_IMAGE): @@ -186,24 +225,45 @@ class Cluster(Target): if not image_exists(BOX_IMAGE): get_box_image() + used_loop = "" if not Config.get('skip_create_loop'): print('Adding logical volumes (block devices) in loopback device...') - osd.create_loopback_devices(osds) + used_loop = osd.create_loopback_devices(osds) print(f'Added {osds} logical volumes in a loopback device') + loop_device_arg = "" + if used_loop: + loop_device_arg = f'--device {used_loop} -v /dev/vg1:/dev/vg1:Z' + for o in range(osds): + loop_device_arg += f' --device /dev/dm-{o}' print('Starting containers') dcflags = '-f docker-compose.yml' if not os.path.exists('/sys/fs/cgroup/cgroup.controllers'): dcflags += ' -f docker-compose.cgroup1.yml' - run_shell_command(f'docker-compose {dcflags} up --scale hosts={hosts} -d') + run_shell_command(f'podman-compose --podman-run-args "--group-add keep-groups --network=host --device /dev/fuse -it {loop_device_arg}" up --scale hosts={hosts} -d') + ip = run_dc_shell_command('hostname -i', 1, 'seed') + assert ip != '127.0.0.1' run_shell_command('sudo sysctl net.ipv4.conf.all.forwarding=1') run_shell_command('sudo iptables -P FORWARD ACCEPT') + # don't update clock with chronyd / setup chronyd on all boxes + chronyd_setup = """ + sed 's/$OPTIONS/-x/g' /usr/lib/systemd/system/chronyd.service -i + systemctl daemon-reload + systemctl start chronyd + systemctl status chronyd + """ + for h in range(hosts): + run_dc_shell_commands(h + 1, 'hosts', chronyd_setup) + run_dc_shell_commands(1, 'seed', chronyd_setup) + print('Seting up host ssh servers') for h in range(hosts): - host._setup_ssh(h + 1) + host._setup_ssh('hosts', h + 1) + + host._setup_ssh('seed', 1) verbose = '-v' if Config.get('verbose') else '' skip_deploy = '--skip-deploy-osds' if Config.get('skip-deploy-osds') else '' @@ -231,16 +291,17 @@ class Cluster(Target): if expanded: host._add_hosts(ips, hostnames) - if expanded and not Config.get('skip-deploy-osds'): - print('Deploying osds... This could take up to minutes') - osd.deploy_osds_in_vg('vg1') - print('Osds deployed') + # TODO: add osds + # if expanded and not Config.get('skip-deploy-osds'): + # print('Deploying osds... This could take up to minutes') + # osd.deploy_osds_in_vg('vg1') + # print('Osds deployed') print('Bootstrap finished successfully') @ensure_outside_container def down(self): - run_shell_command('docker-compose down') + run_shell_command('podman-compose down') cleanup_box() print('Successfully killed all boxes') @@ -258,7 +319,7 @@ class Cluster(Target): # we need verbose to see the prompt after running shell command Config.set('verbose', True) print('Seed bash') - run_shell_command('docker-compose exec seed bash') + run_shell_command('podman-compose exec seed bash') targets = { diff --git a/src/cephadm/box/docker-compose.yml b/src/cephadm/box/docker-compose.yml index 3d9d3ea9ac0f0..4e941a3eb0e50 100644 --- a/src/cephadm/box/docker-compose.yml +++ b/src/cephadm/box/docker-compose.yml @@ -7,39 +7,54 @@ services: - CEPH_BRANCH=master image: cephadm-box # probably not needed with rootless Docker and cgroups v2 - privileged: true - # cap_add: - # - SYS_ADMIN - # - NET_ADMIN - # - SYS_TIME - # - MKNOD + # privileged: true + cap_add: + - SYS_ADMIN + - NET_ADMIN + - SYS_TIME + - SYS_RAWIO + - MKNOD + - NET_RAW + - SETUID + - SETGID + - CHOWN + - SYS_PTRACE + - SYS_TTY_CONFIG + - CAP_AUDIT_WRITE + - CAP_AUDIT_CONTROL stop_signal: RTMIN+3 volumes: - - ../../../:/ceph - - ..:/cephadm - - ./daemon.json:/etc/docker/daemon.json + - ../../../:/ceph:z + - ..:/cephadm:z + # - ./daemon.json:/etc/docker/daemon.json # dangerous, maybe just map the loopback # https://stackoverflow.com/questions/36880565/why-dont-my-udev-rules-work-inside-of-a-running-docker-container - - /dev:/dev - networks: - - public + - /run/udev:/run/udev + - /sys/dev/block:/sys/dev/block + - /sys/fs/cgroup:/sys/fs/cgroup + - /dev/fuse:/dev/fuse + - /dev/disk:/dev/disk + - /dev/mapper:/dev/mapper + - /dev/mapper/control:/dev/mapper/control mem_limit: "20g" scale: -1 seed: extends: service: cephadm-host-base ports: + - "2222:22" - "3000:3000" + - "8888:8888" - "8443:8443" - "9095:9095" scale: 1 hosts: extends: service: cephadm-host-base - scale: 3 + scale: 1 volumes: var-lib-docker: -networks: - public: + +network_mode: public diff --git a/src/cephadm/box/host.py b/src/cephadm/box/host.py index d7907812d6ec8..d6d084eef1e7e 100644 --- a/src/cephadm/box/host.py +++ b/src/cephadm/box/host.py @@ -12,24 +12,24 @@ from util import ( ) -def _setup_ssh(container_index): +def _setup_ssh(container_type, container_index): if inside_container(): if not os.path.exists('/root/.ssh/known_hosts'): - run_shell_command('ssh-keygen -A') + run_shell_command('ssh-keygen -b 2048 -t rsa -f /root/.ssh/id_rsa -q -N ""') run_shell_command('echo "root:root" | chpasswd') with open('/etc/ssh/sshd_config', 'a+') as f: f.write('PermitRootLogin yes\n') f.write('PasswordAuthentication yes\n') f.flush() - run_shell_command('/usr/sbin/sshd') + run_shell_command('systemctl restart sshd') else: print('Redirecting to _setup_ssh to container') verbose = '-v' if Config.get('verbose') else '' run_dc_shell_command( - f'/cephadm/box/box.py {verbose} host setup_ssh {container_index}', + f'/cephadm/box/box.py {verbose} host setup_ssh {container_type} {container_index}', container_index, - 'hosts', + container_type, ) @@ -47,7 +47,7 @@ def _add_hosts(ips: Union[List[str], str], hostnames: Union[List[str], str]): hostnames = ' '.join(hostnames) hostnames = f'{hostnames}' run_dc_shell_command( - f'/cephadm/box/box.py {verbose} host add_hosts 1 --ips {ips} --hostnames {hostnames}', + f'/cephadm/box/box.py {verbose} host add_hosts seed 1 --ips {ips} --hostnames {hostnames}', 1, 'seed', ) @@ -73,7 +73,7 @@ def _copy_cluster_ssh_key(ips: Union[List[str], str]): ips = f'{ips}' # assume we only have one seed run_dc_shell_command( - f'/cephadm/box/box.py {verbose} host copy_cluster_ssh_key 1 --ips {ips}', + f'/cephadm/box/box.py {verbose} host copy_cluster_ssh_key seed 1 --ips {ips}', 1, 'seed', ) @@ -86,7 +86,10 @@ class Host(Target): def set_args(self): self.parser.add_argument('action', choices=Host.actions) self.parser.add_argument( - 'host_container_index', type=str, help='box_host_{index}' + 'container_type', type=str, help='box_{type}_{index}' + ) + self.parser.add_argument( + 'container_index', type=str, help='box_{type}_{index}' ) self.parser.add_argument('--ips', nargs='*', help='List of host ips') self.parser.add_argument( @@ -94,7 +97,7 @@ class Host(Target): ) def setup_ssh(self): - _setup_ssh(Config.get('host_container_index')) + _setup_ssh(Config.get('container_type') ,Config.get('container_index')) def add_hosts(self): ips = Config.get('ips') diff --git a/src/cephadm/box/osd.py b/src/cephadm/box/osd.py index 72693ac85bf58..d5fe1a533b87c 100644 --- a/src/cephadm/box/osd.py +++ b/src/cephadm/box/osd.py @@ -21,26 +21,27 @@ def remove_loop_img() -> None: os.remove(loop_image) -@ensure_outside_container + def create_loopback_devices(osds: int) -> None: + assert osds size = (5 * osds) + 1 print(f'Using {size}GB of data to store osds') - avail_loop = run_shell_command('sudo losetup -f') + # loop_dev = run_shell_command('sudo losetup -f') + loop_dev = '/dev/loop111' + run_shell_command(f'sudo rm -f {loop_dev}') + run_shell_command(f'sudo mknod -m 0777 {loop_dev} b 7 111') - # create loop if we cannot find it - if not os.path.exists(avail_loop): - num_loops = int(run_shell_command("lsmod | grep loop | awk '{print $3}'")) - num_loops += 1 - run_shell_command(f'mknod {avail_loop} b 7 {num_loops}') + # cleanup last call + cleanup() - if os.path.ismount(avail_loop): - os.umount(avail_loop) + if os.path.ismount(loop_dev): + os.umount(loop_dev) loop_devices = json.loads(run_shell_command('losetup -l -J', expect_error=True)) for dev in loop_devices['loopdevices']: - if dev['name'] == avail_loop: - run_shell_command(f'sudo losetup -d {avail_loop}') + if dev['name'] == loop_dev: + run_shell_command(f'sudo losetup -d {loop_dev}') if not os.path.exists('./loop-images'): os.mkdir('loop-images') @@ -49,19 +50,21 @@ def create_loopback_devices(osds: int) -> None: loop_image = Config.get('loop_img') run_shell_command(f'sudo dd if=/dev/zero of={loop_image} bs=1 count=0 seek={size}G') - run_shell_command(f'sudo losetup {avail_loop} {loop_image}') + run_shell_command(f'sudo losetup {loop_dev} {loop_image}') - # cleanup last call - cleanup() - run_shell_command(f'sudo pvcreate {avail_loop} ') - run_shell_command(f'sudo vgcreate vg1 {avail_loop}') + run_shell_command(f'sudo pvcreate {loop_dev} ') + run_shell_command(f'sudo vgcreate vg1 {loop_dev}') - p = int(100 / osds) + p = int(100 / osds) # FIXME: 100 osds is the maximum because of lvcreate pct (it doesn't seem to work with lots more decimals) for i in range(osds): run_shell_command('sudo vgchange --refresh') run_shell_command(f'sudo lvcreate -l {p}%VG --name lv{i} vg1') + # FIXME: use /dev/vg1/lv* links as it is less hacky (there could be unrelated dm devices) + run_shell_command(f'sudo chmod 777 /dev/dm-*') + return loop_dev + def get_lvm_osd_data(data: str) -> Dict[str, str]: osd_lvm_info = run_cephadm_shell_command(f'ceph-volume lvm list {data}') @@ -109,24 +112,23 @@ def deploy_osds_in_vg(vg: str): makes another process to run on the background """ if inside_container(): - lvs = json.loads(run_shell_command('lvs --reportformat json')) + print('xd') + else: + lvs = json.loads(run_shell_command('sudo lvs --reportformat json')) # distribute osds per host hosts = get_orch_hosts() host_index = 0 + verbose = '-v' if Config.get('verbose') else '' for lv in lvs['report'][0]['lv']: if lv['vg_name'] == vg: deployed = False while not deployed: - deployed = deploy_osd( - f'{vg}/{lv["lv_name"]}', hosts[host_index]['hostname'] + hostname = hosts[host_index]['hostname'] + deployed = run_dc_shell_command( + f'/cephadm/box/box.py -v osd deploy --data /dev/{vg}/{lv["lv_name"]} --hostname {hostname}', 1, 'seed' ) + deployed = 'created osd' in deployed.lower() host_index = (host_index + 1) % len(hosts) - else: - verbose = '-v' if Config.get('verbose') else '' - print('Redirecting deploy osd in vg to inside container') - run_dc_shell_command( - f'/cephadm/box/box.py {verbose} osd deploy --vg {vg}', 1, 'seed' - ) class Osd(Target): diff --git a/src/cephadm/box/util.py b/src/cephadm/box/util.py index 6b939b6befc6e..8105a3d3c450d 100644 --- a/src/cephadm/box/util.py +++ b/src/cephadm/box/util.py @@ -4,6 +4,16 @@ import subprocess import sys from typing import Any, Callable, Dict +class Colors: + HEADER = '\033[95m' + OKBLUE = '\033[94m' + OKCYAN = '\033[96m' + OKGREEN = '\033[92m' + WARNING = '\033[93m' + FAIL = '\033[91m' + ENDC = '\033[0m' + BOLD = '\033[1m' + UNDERLINE = '\033[4m' class Config: args = { @@ -74,9 +84,13 @@ def ensure_inside_container(func) -> bool: return wrapper +def colored(msg, color: Colors): + return color + msg + Colors.ENDC + def run_shell_command(command: str, expect_error=False) -> str: if Config.get('verbose'): - print(f'Running command: {command}') + print(f'{colored("Running command", Colors.HEADER)}: {colored(command, Colors.OKBLUE)}') + process = subprocess.Popen( command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) @@ -107,6 +121,20 @@ def run_shell_command(command: str, expect_error=False) -> str: return out +def run_dc_shell_commands(index, box_type, commands: str, expect_error=False) -> str: + for command in commands.split('\n'): + command = command.strip() + if not command: + continue + run_dc_shell_command(command.strip(), index, box_type, expect_error=expect_error) + +def run_shell_commands(commands: str, expect_error=False) -> str: + for command in commands.split('\n'): + command = command.strip() + if not command: + continue + run_shell_command(command, expect_error=expect_error) + @ensure_inside_container def run_cephadm_shell_command(command: str, expect_error=False) -> str: config = Config.get('config') @@ -123,15 +151,18 @@ def run_cephadm_shell_command(command: str, expect_error=False) -> str: def run_dc_shell_command( command: str, index: int, box_type: str, expect_error=False ) -> str: + container_id = get_container_id(f'{box_type}_{index}') + print(container_id) out = run_shell_command( - f'docker-compose exec --index={index} {box_type} {command}', expect_error + f'podman exec -it {container_id} {command}', expect_error ) return out - def inside_container() -> bool: - return os.path.exists('/.dockerenv') + return os.path.exists('/.box_container') +def get_container_id(container_name: str): + return run_shell_command("podman ps | \grep " + container_name + " | awk '{ print $1 }'") @ensure_outside_container def get_boxes_container_info(with_seed: bool = False) -> Dict[str, Any]: @@ -139,7 +170,7 @@ def get_boxes_container_info(with_seed: bool = False) -> Dict[str, Any]: IP = 0 CONTAINER_NAME = 1 HOSTNAME = 2 - ips_query = "docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}} %tab% {{.Name}} %tab% {{.Config.Hostname}}' $(docker ps -aq) | sed 's#%tab%#\t#g' | sed 's#/##g' | sort -t . -k 1,1n -k 2,2n -k 3,3n -k 4,4n" + ips_query = "podman inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}} %tab% {{.Name}} %tab% {{.Config.Hostname}}' $(podman ps -aq) | sed 's#%tab%#\t#g' | sed 's#/##g' | sort -t . -k 1,1n -k 2,2n -k 3,3n -k 4,4n" out = run_shell_command(ips_query) # FIXME: if things get more complex a class representing a container info might be useful, # for now representing data this way is faster. @@ -157,6 +188,12 @@ def get_boxes_container_info(with_seed: bool = False) -> Dict[str, Any]: def get_orch_hosts(): - orch_host_ls_out = run_cephadm_shell_command('ceph orch host ls --format json') + if inside_container(): + orch_host_ls_out = run_cephadm_shell_command('ceph orch host ls --format json') + else: + orch_host_ls_out = run_dc_shell_command('cephadm shell --keyring /etc/ceph/ceph.keyring --config /etc/ceph/ceph.conf -- ceph orch host ls --format json', 1, 'seed') + sp = orch_host_ls_out.split('\n') + orch_host_ls_out = sp[len(sp) - 1] + print('xd', orch_host_ls_out) hosts = json.loads(orch_host_ls_out) return hosts -- 2.39.5