From: Pere Diaz Bou Date: Wed, 24 Nov 2021 12:21:23 +0000 (+0100) Subject: cephadm/box: add box.py script X-Git-Tag: v17.1.0~148^2~6 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=cc55b767e4c0c0742b74cea444a0889066f831a8;p=ceph.git cephadm/box: add box.py script Signed-off-by: Pere Diaz Bou --- diff --git a/src/cephadm/box/box.py b/src/cephadm/box/box.py new file mode 100755 index 0000000000000..985867be734aa --- /dev/null +++ b/src/cephadm/box/box.py @@ -0,0 +1,220 @@ +#!/bin/python3 +import sys +import argparse +import os +import stat +import host +import osd + +from util import Config, run_shell_command, run_cephadm_shell_command, \ + ensure_inside_container, ensure_outside_container, get_boxes_container_info, \ + inside_container, get_host_ips, run_dc_shell_command + + + +CEPH_IMAGE = 'quay.ceph.io/ceph-ci/ceph:master' +BOX_IMAGE = 'cephadm-box:latest' + +def image_exists(image_name: str): + # extract_tag + assert image_name.find(':') + image_name, tag = image_name.split(':') + images = run_shell_command('docker image ls').split('\n') + IMAGE_NAME = 0 + TAG = 1 + for image in images: + image = image.split() + print(image) + print(image_name, tag) + if image[IMAGE_NAME] == image_name and image[TAG] == tag: + return True + return False + +def get_ceph_image(): + print('Getting ceph image') + run_shell_command(f'docker pull {CEPH_IMAGE}') + # update + run_shell_command(f'docker build -t {CEPH_IMAGE} docker/ceph') + if not os.path.exists('docker/ceph/image'): + os.mkdir('docker/ceph/image') + image_tar = 'docker/ceph/image/quay.ceph.image.tar' + if not os.path.exists(image_tar): + os.remove(image_tar) + run_shell_command(f'docker save {CEPH_IMAGE} -o {image_tar}') + print('Ceph image added') + +def get_box_image(): + print('Getting box image') + run_shell_command('docker build -t cephadm-box -f Dockerfile .') + print('Box image added') + +class Cluster: + _help = 'Manage docker cephadm boxes' + actions = ['bootstrap', 'start', 'down', 'list', 'sh', 'setup'] + parser = None + + @staticmethod + def add_parser(subparsers): + assert not Cluster.parser + Cluster.parser = subparsers.add_parser('cluster', help=Cluster._help) + parser = Cluster.parser + parser.add_argument('action', choices=Cluster.actions, help='Action to perform on the box') + parser.add_argument('--osds', type=int, default=1, help='Number of osds') + parser.add_argument('--hosts', type=int, default=1, help='Number of hosts') + + def __init__(self, argv): + self.argv = argv + + @ensure_outside_container + def setup(self): + get_ceph_image() + get_box_image() + + @ensure_inside_container + def bootstrap(self): + print('Running bootstrap on seed') + cephadm_path = os.environ.get('CEPHADM_PATH') + os.symlink('/cephadm/cephadm', cephadm_path) + st = os.stat(cephadm_path) + os.chmod(cephadm_path, st.st_mode | stat.S_IEXEC) + + run_shell_command('docker load < /cephadm/box/docker/ceph/image/quay.ceph.image.tar') + # cephadm guid error because it sometimes tries to use quay.ceph.io/ceph-ci/ceph: + # instead of master's tag + run_shell_command('export CEPH_SOURCE_FOLDER=/ceph') + run_shell_command('export CEPHADM_IMAGE=quay.ceph.io/ceph-ci/ceph:master') + run_shell_command('echo "export CEPHADM_IMAGE=quay.ceph.io/ceph-ci/ceph:master" >> ~/.bashrc') + + extra_args = [] + + shared_ceph_folder = os.environ.get('SHARED_CEPH_FOLDER') + if shared_ceph_folder: + extra_args.extend(['--shared_ceph_folder', 'shared_ceph_folder']) + + cephadm_image = os.environ.get('CEPHADM_IMAGE') + if shared_ceph_folder: + extra_args.append('--skip-pull') + + # cephadm prints in warning, let's redirect it to the output so shell_command doesn't + # complain + extra_args.append('2>&0') + + extra_args = ''.join(extra_args) + + cephadm_bootstrap_command = ( + '$CEPHADM_PATH --verbose bootstrap ' + '--mon-ip "$(hostname -i)" ' + '--allow-fqdn-hostname ' + '--initial-dashboard-password admin ' + '--dashboard-password-noupdate ' + '--shared_ceph_folder /ceph ' + f'{extra_args} ' + ) + + print('Running cephadm bootstrap...') + run_shell_command(cephadm_bootstrap_command) + print('Cephadm bootstrap complete') + + + run_shell_command('sudo vgchange --refresh') + + print('Deploying osds...') + osds = Config.get('osds') + for o in range(osds): + osd.deploy_osd(f'/dev/vg1/lv{o}') + print('Osds deployed') + print('Bootstrap completed!') + + + + @ensure_outside_container + def start(self): + osds = Config.get('osds') + hosts = Config.get('hosts') + + print('Checking docker images') + if not image_exists(CEPH_IMAGE): + get_ceph_image() + if not image_exists(BOX_IMAGE): + get_box_image() + + print('Adding logical volumes (block devices) in loopback device...') + osd.create_loopback_devices(osds) + print(f'Added {osds} logical volumes in a loopback device') + + print('Starting containers') + # ensure boxes don't exist + run_shell_command('docker-compose down') + + dcflags = '-f docker-compose.yml' + if not os.path.exists('/sys/fs/cgroup/cgroup.controllers'): + dcflags += ' -f docker-compose.cgroup1.yml' + run_shell_command(f'docker-compose {dcflags} up --scale hosts={hosts} -d') + + run_shell_command('sudo sysctl net.ipv4.conf.all.forwarding=1') + run_shell_command('sudo iptables -P FORWARD ACCEPT') + + print('Seting up host ssh servers') + ips = get_host_ips() + print(ips) + for h in range(hosts): + host._setup_ssh(h+1) + + verbose = '-v' if Config.get('verbose') else '' + run_dc_shell_command(f'/cephadm/box/box.py {verbose} cluster bootstrap --osds {osds} --hosts {hosts}', 1, 'seed') + + host._copy_cluster_ssh_key(ips) + + print('Bootstrap finished successfully') + + @ensure_outside_container + def down(self): + run_shell_command('docker-compose down') + print('Successfully killed all boxes') + + @ensure_outside_container + def list(self): + info = get_boxes_container_info() + for container in info: + print('\t'.join(container)) + + @ensure_outside_container + def sh(self): + print('Seed bash') + run_shell_command('docker-compose exec seed bash') + + def main(self): + parser = Cluster.parser + args = parser.parse_args(self.argv) + Config.add_args(vars(args)) + function = getattr(self, args.action) + function() + + +targets = { + 'cluster': Cluster, + 'osd': osd.Osd, + 'host': host.Host, +} + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('-v', action='store_true', dest='verbose', help='be more verbose') + + subparsers = parser.add_subparsers() + for _, target in targets.items(): + target.add_parser(subparsers) + + for count, arg in enumerate(sys.argv, 1): + if arg in targets: + instance = targets[arg](sys.argv[count:]) + if hasattr(instance, 'main'): + args = parser.parse_args() + Config.add_args(vars(args)) + instance.main() + sys.exit(0) + + parser.print_help() + +if __name__ == '__main__': + main() diff --git a/src/cephadm/box/host.py b/src/cephadm/box/host.py new file mode 100644 index 0000000000000..c91058d570098 --- /dev/null +++ b/src/cephadm/box/host.py @@ -0,0 +1,73 @@ +import argparse +import os +from typing import List +from util import inside_container, run_shell_command, run_dc_shell_command, Config + +def _setup_ssh(container_index): + if inside_container(): + if not os.path.exists('/root/.ssh/known_hosts'): + run_shell_command('ssh-keygen -A') + + run_shell_command('echo "root:root" | chpasswd') + with open('/etc/ssh/sshd_config', 'a+') as f: + f.write('PermitRootLogin yes\n') + f.write('PasswordAuthentication yes\n') + f.flush() + run_shell_command('/usr/sbin/sshd') + else: + print('Redirecting to _setup_ssh to container') + verbose = '-v' if Config.get('verbose') else '' + run_dc_shell_command(f'/cephadm/box/box.py {verbose} host setup_ssh {container_index}', container_index, 'hosts') + + +def _copy_cluster_ssh_key(ips: List[str]): + if inside_container(): + local_ip = run_shell_command('hostname -i') + for ip in ips: + if ip != local_ip: + run_shell_command(('sshpass -p "root" ssh-copy-id -f ' + f'-o StrictHostKeyChecking=no -i /etc/ceph/ceph.pub "root@{ip}"')) + + else: + print('Redirecting to _copy_cluster_ssh to container') + verbose = '-v' if Config.get('verbose') else '' + print(ips) + ips = ' '.join(ips) + ips = f"{ips}" + # assume we only have one seed + run_dc_shell_command(f'/cephadm/box/box.py {verbose} host copy_cluster_ssh_key 1 --ips {ips}', + 1, 'seed') +class Host: + _help = 'Run seed/host related commands' + actions = ['setup_ssh', 'copy_cluster_ssh_key'] + parser = None + + def __init__(self, argv): + self.argv = argv + + @staticmethod + def add_parser(subparsers): + assert not Host.parser + Host.parser = subparsers.add_parser('host', help=Host._help) + parser = Host.parser + parser.add_argument('action', choices=Host.actions) + parser.add_argument('host_container_index', type=str, help='box_host_{index}') + parser.add_argument('--ips', nargs='*', help='List of host ips') + + def setup_ssh(self): + _setup_ssh(Config.get('host_container_index')) + + + def copy_cluster_ssh_key(self): + ips = Config.get('ips') + if not ips: + ips = get_host_ips() + _copy_cluster_ssh_key(ips) + + + def main(self): + parser = Host.parser + args = parser.parse_args(self.argv) + Config.add_args(vars(args)) + function = getattr(self, args.action) + function() diff --git a/src/cephadm/box/osd.py b/src/cephadm/box/osd.py new file mode 100644 index 0000000000000..5454e115a46f2 --- /dev/null +++ b/src/cephadm/box/osd.py @@ -0,0 +1,112 @@ +from typing import Dict +import os +import argparse +from util import ensure_inside_container, ensure_outside_container, run_shell_command, \ + run_cephadm_shell_command, Config + +@ensure_outside_container +def create_loopback_devices(osds: int) -> None: + assert osds + size = (5 * osds) + 1 + print(f'Using {size}GB of data to store osds') + avail_loop = run_shell_command('sudo losetup -f') + base_name = os.path.basename(avail_loop) + + # create loop if we cannot find it + if not os.path.exists(avail_loop): + num_loops = int(run_shell_command('lsmod | grep loop | awk \'{print $3}\'')) + num_loops += 1 + run_shell_command(f'mknod {avail_loop} b 7 {num_loops}') + + if os.path.ismount(avail_loop): + os.umount(avail_loop) + + if run_shell_command(f'losetup -l | grep {avail_loop}', expect_error=True): + run_shell_command(f'sudo losetup -d {avail_loop}') + + if not os.path.exists('./loop-images'): + os.mkdir('loop-images') + + loop_image = 'loop-images/loop.img' + if os.path.exists(loop_image): + os.remove(loop_image) + + run_shell_command(f'sudo dd if=/dev/zero of={loop_image} bs=1G count={size}') + run_shell_command(f'sudo losetup {avail_loop} {loop_image}') + + vgs = run_shell_command('sudo vgs | grep vg1', expect_error=True) + if vgs: + run_shell_command('sudo lvm vgremove -f -y vg1') + + run_shell_command(f'sudo pvcreate {avail_loop}') + run_shell_command(f'sudo vgcreate vg1 {avail_loop}') + for i in range(osds): + run_shell_command('sudo vgchange --refresh') + run_shell_command(f'sudo lvcreate --size 5G --name lv{i} vg1') + +def get_lvm_osd_data(data: str) -> Dict[str, str]: + osd_lvm_info = run_cephadm_shell_command(f'ceph-volume lvm list {data}') + osd_data = {} + for line in osd_lvm_info.split('\n'): + line = line.strip() + if not line: + continue + line = line.split() + if line[0].startswith('===') or line[0].startswith('[block]'): + continue + # "block device" key -> "block_device" + key = '_'.join(line[:-1]) + osd_data[key] = line[-1] + return osd_data + +@ensure_inside_container +def deploy_osd(data: str): + assert data + out = run_shell_command(f'cephadm ceph-volume lvm zap {data}') + out = run_shell_command(f'cephadm ceph-volume --shared_ceph_folder /ceph lvm prepare --data {data} --no-systemd --no-tmpfs') + + osd_data = get_lvm_osd_data(data) + + osd = 'osd.' + osd_data['osd_id'] + run_shell_command(f'cephadm deploy --name {osd}') +class Osd: + _help = ''' + Deploy osds and create needed block devices with loopback devices: + Actions: + - deploy: Deploy an osd given a block device + - create_loop: Create needed loopback devices and block devices in logical volumes + for a number of osds. + ''' + actions = ['deploy', 'create_loop'] + parser = None + + def __init__(self, argv): + self.argv = argv + + @staticmethod + def add_parser(subparsers): + assert not Osd.parser + Osd.parser = subparsers.add_parser('osd', help=Osd._help) + parser = Osd.parser + parser.add_argument('action', choices=Osd.actions) + parser.add_argument('--data', type=str, help='path to a block device') + parser.add_argument('--osds', type=int, default=0, help='number of osds') + + @ensure_inside_container + def deploy(self): + data = Config.get('data') + deploy_osd(data) + + @ensure_outside_container + def create_loop(self): + osds = Config.get('osds') + create_loopback_devices(osds) + print('Successfully added logical volumes in loopback devices') + + def main(self): + parser = Osd.parser + args = parser.parse_args(self.argv) + Config.add_args(vars(args)) + function = getattr(self, args.action) + function() + diff --git a/src/cephadm/box/util.py b/src/cephadm/box/util.py new file mode 100644 index 0000000000000..e006ddd2cd50a --- /dev/null +++ b/src/cephadm/box/util.py @@ -0,0 +1,98 @@ +from typing import Dict, List +import argparse +import subprocess +import os +import sys + +class Config: + args = {} + + @staticmethod + def get(key): + if key in Config.args: + return Config.args[key] + return None + + @staticmethod + def add_args(args: Dict[str, str]) -> argparse.ArgumentParser: + Config.args.update(args) + +def ensure_outside_container(func) -> bool: + def wrapper(*args, **kwargs): + if not inside_container(): + return func(*args, **kwargs) + else: + raise RuntimeError('This command should be ran outside a container') + return wrapper + +def ensure_inside_container(func) -> bool: + def wrapper(*args, **kwargs): + if inside_container(): + return func(*args, **kwargs) + else: + raise RuntimeError('This command should be ran inside a container') + return wrapper + + +def run_shell_command(command: str, expect_error=False) -> str: + if Config.get('verbose'): + print(f'Running command: {command}') + process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + out = '' + # let's read when output comes so it is in real time + while True: + # TODO: improve performance of this part, I think this part is a problem + pout = process.stdout.read(1).decode('latin1') + if pout == '' and process.poll() is not None: + break + if pout: + if Config.get('verbose'): + sys.stdout.write(pout) + sys.stdout.flush() + out += pout + process.wait() + + # no last break line + err = process.stderr.read().decode().rstrip() # remove trailing whitespaces and new lines + out = out.strip() + + if process.returncode != 0 and not expect_error: + raise RuntimeError(f'Failed command: {command}\n{err}') + sys.exit(1) + return out + +@ensure_inside_container +def run_cephadm_shell_command(command: str, expect_error=False) -> str: + out = run_shell_command(f'cephadm shell -- {command}', expect_error) + return out + +def run_dc_shell_command(command: str, index: int, box_type: str, expect_error=False) -> str: + out = run_shell_command(f'docker-compose exec --index={index} {box_type} {command}', expect_error) + return out + +def inside_container() -> bool: + return os.path.exists('/.dockerenv') + +@ensure_outside_container +def get_host_ips() -> List[List[str]]: + containers_info = get_boxes_container_info() + print(containers_info) + ips = [] + for container in containers_info: + if container[1][:len('box_hosts')] == 'box_hosts': + ips.append(container[0]) + return ips + +@ensure_outside_container +def get_boxes_container_info() -> List[List[str]]: + ips_query = "docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}} %tab% {{.Name}} %tab% {{.Config.Hostname}}' $(docker ps -aq) | sed 's#%tab%#\t#g' | sed 's#/##g' | sort -t . -k 1,1n -k 2,2n -k 3,3n -k 4,4n" + out = run_shell_command(ips_query) + info = [] + for line in out.split('\n'): + container = line.split() + if container[1].strip()[:4] == 'box_': + info.append(container) + return info + +