From: shylesh kumar Date: Wed, 31 Oct 2018 16:25:17 +0000 (-0400) Subject: Modify ceph task to use ceph-volume lvm for osd creation X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=555af9f2e5e6cdaa678034fbeed8dfbca64ee904;p=ceph.git Modify ceph task to use ceph-volume lvm for osd creation Signed-off-by: shylesh kumar --- diff --git a/qa/suites/smoke/ceph-volume-lvm/% b/qa/suites/smoke/ceph-volume-lvm/% new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/qa/suites/smoke/ceph-volume-lvm/clusters/3osd_3_mon.yaml b/qa/suites/smoke/ceph-volume-lvm/clusters/3osd_3_mon.yaml new file mode 100644 index 0000000000000..979d2621c4034 --- /dev/null +++ b/qa/suites/smoke/ceph-volume-lvm/clusters/3osd_3_mon.yaml @@ -0,0 +1,13 @@ +roles: +- [mon.a, mds.a, mgr.x, osd.0, osd.1, osd.2] +- [mon.b, mon.c, mgr.y, osd.3, osd.4, osd.5] +- [client.0] + +log-rotate: + ceph-mds: 10G + ceph-osd: 10G +overrides: + ceph: + conf: + osd: + osd shutdown pgref assert: true diff --git a/qa/suites/smoke/ceph-volume-lvm/objectstore/bluestore.yaml b/qa/suites/smoke/ceph-volume-lvm/objectstore/bluestore.yaml new file mode 100644 index 0000000000000..dc223cfa3578c --- /dev/null +++ b/qa/suites/smoke/ceph-volume-lvm/objectstore/bluestore.yaml @@ -0,0 +1,21 @@ +overrides: + thrashosds: + bdev_inject_crash: 2 + bdev_inject_crash_probability: .5 + ceph: + fs: xfs + conf: + osd: + osd objectstore: bluestore + ceph-volume: true + bluestore block size: 96636764160 + debug bluestore: 20 + debug bluefs: 20 + debug rocksdb: 10 + bluestore fsck on mount: true + # lower the full ratios since we can fill up a 100gb osd so quickly + mon osd full ratio: .9 + mon osd backfillfull_ratio: .85 + mon osd nearfull ratio: .8 + osd failsafe full ratio: .95 + diff --git a/qa/suites/smoke/ceph-volume-lvm/objectstore/filestore.yaml b/qa/suites/smoke/ceph-volume-lvm/objectstore/filestore.yaml new file mode 100644 index 0000000000000..b05045696a33e --- /dev/null +++ b/qa/suites/smoke/ceph-volume-lvm/objectstore/filestore.yaml @@ -0,0 +1,13 @@ +overrides: + ceph: + fs: xfs + conf: + osd: + osd objectstore: filestore + debug rocksdb: 10 + # lower the full ratios since we can fill up a 100gb osd so quickly + mon osd full ratio: .9 + mon osd backfillfull_ratio: .85 + mon osd nearfull ratio: .8 + osd failsafe full ratio: .95 + diff --git a/qa/suites/smoke/ceph-volume-lvm/tasks/rados_bench.yaml b/qa/suites/smoke/ceph-volume-lvm/tasks/rados_bench.yaml new file mode 100644 index 0000000000000..31cf27575bf2c --- /dev/null +++ b/qa/suites/smoke/ceph-volume-lvm/tasks/rados_bench.yaml @@ -0,0 +1,27 @@ +overrides: + ceph: + conf: + global: + ms inject delay max: 1 + ms inject delay probability: 0.005 + ms inject delay type: osd + ms inject internal delays: 0.002 + ms inject socket failures: 2500 +tasks: +- install: null +- ceph: + fs: xfs + log-whitelist: + - overall HEALTH_ + - \(OSDMAP_FLAGS\) + - \(OSD_ + - \(PG_ + - \(POOL_ + - \(CACHE_POOL_ + - \(SMALLER_PGP_NUM\) + - \(OBJECT_ + - \(REQUEST_SLOW\) + - \(TOO_FEW_PGS\) +- radosbench: + clients: [client.0] + time: 150 diff --git a/qa/tasks/ceph.py b/qa/tasks/ceph.py index 4fa7fbcaa3e61..d3932ba5a940a 100644 --- a/qa/tasks/ceph.py +++ b/qa/tasks/ceph.py @@ -555,6 +555,25 @@ def cluster(ctx, config): ], ) + if ctx.ceph_volume: + log.info("Creating bootstrap-osd keyring") + ctx.cluster.only(firstmon).run(\ + args=[ + 'sudo', + 'adjust-ulimits', + 'ceph-authtool', + '--create-keyring', + '/var/lib/ceph/bootstrap-osd/{cluster}.keyring'.format\ + (cluster=cluster_name),\ + '--gen-key', + '-n', + 'client.bootstrap-osd', + '--cap', + 'mon', + '\'profile bootstrap-osd\'', + ] + ) + log.info('Copying monmap to all nodes...') keyring = teuthology.get_file( remote=mon0_remote, @@ -580,6 +599,25 @@ def cluster(ctx, config): data=monmap, ) + if ctx.ceph_volume: + log.info('Copying bootstrap-osd keys to all osd nodes') + keyring = teuthology.get_file( + remote=mon0_remote, + path='/var/lib/ceph/bootstrap-osd/{cluster}.keyring'.format( + cluster=cluster_name), + sudo=True, + ) + + for remote, roles in osds.remotes.iteritems(): + teuthology.sudo_write_file( + remote=remote, + path='/var/lib/ceph/bootstrap-osd/{cluster}.keyring'.format( + cluster=cluster_name), + data=keyring, + perms='0644' + ) + + log.info('Setting up mon nodes...') mons = ctx.cluster.only(teuthology.is_type('mon', cluster_name)) @@ -659,123 +697,126 @@ def cluster(ctx, config): teuthology.deep_merge(ctx.disk_config.remote_to_roles_to_dev, remote_to_roles_to_devs) teuthology.deep_merge(ctx.disk_config.remote_to_roles_to_journals, remote_to_roles_to_journals) - log.info("ctx.disk_config.remote_to_roles_to_dev: {r}".format(r=str(ctx.disk_config.remote_to_roles_to_dev))) - for remote, roles_for_host in osds.remotes.iteritems(): - roles_to_devs = remote_to_roles_to_devs[remote] - roles_to_journals = remote_to_roles_to_journals[remote] + if ctx.ceph_volume: + pass + else: + log.info("ctx.disk_config.remote_to_roles_to_dev: {r}".format(r=str(ctx.disk_config.remote_to_roles_to_dev))) + for remote, roles_for_host in osds.remotes.iteritems(): + roles_to_devs = remote_to_roles_to_devs[remote] + roles_to_journals = remote_to_roles_to_journals[remote] + + for role in teuthology.cluster_roles_of_type(roles_for_host, 'osd', cluster_name): + _, _, id_ = teuthology.split_role(role) + mnt_point = '/var/lib/ceph/osd/{cluster}-{id}'.format(cluster=cluster_name, id=id_) + remote.run( + args=[ + 'sudo', + 'mkdir', + '-p', + mnt_point, + ]) + log.info(str(roles_to_devs)) + log.info(str(roles_to_journals)) + log.info(role) + if roles_to_devs.get(role): + dev = roles_to_devs[role] + fs = config.get('fs') + package = None + mkfs_options = config.get('mkfs_options') + mount_options = config.get('mount_options') + if fs == 'btrfs': + # package = 'btrfs-tools' + if mount_options is None: + mount_options = ['noatime', 'user_subvol_rm_allowed'] + if mkfs_options is None: + mkfs_options = ['-m', 'single', + '-l', '32768', + '-n', '32768'] + if fs == 'xfs': + # package = 'xfsprogs' + if mount_options is None: + mount_options = ['noatime'] + if mkfs_options is None: + mkfs_options = ['-f', '-i', 'size=2048'] + if fs == 'ext4' or fs == 'ext3': + if mount_options is None: + mount_options = ['noatime', 'user_xattr'] - for role in teuthology.cluster_roles_of_type(roles_for_host, 'osd', cluster_name): - _, _, id_ = teuthology.split_role(role) - mnt_point = '/var/lib/ceph/osd/{cluster}-{id}'.format(cluster=cluster_name, id=id_) - remote.run( - args=[ - 'sudo', - 'mkdir', - '-p', - mnt_point, - ]) - log.info(str(roles_to_devs)) - log.info(str(roles_to_journals)) - log.info(role) - if roles_to_devs.get(role): - dev = roles_to_devs[role] - fs = config.get('fs') - package = None - mkfs_options = config.get('mkfs_options') - mount_options = config.get('mount_options') - if fs == 'btrfs': - # package = 'btrfs-tools' - if mount_options is None: - mount_options = ['noatime', 'user_subvol_rm_allowed'] - if mkfs_options is None: - mkfs_options = ['-m', 'single', - '-l', '32768', - '-n', '32768'] - if fs == 'xfs': - # package = 'xfsprogs' if mount_options is None: - mount_options = ['noatime'] + mount_options = [] if mkfs_options is None: - mkfs_options = ['-f', '-i', 'size=2048'] - if fs == 'ext4' or fs == 'ext3': - if mount_options is None: - mount_options = ['noatime', 'user_xattr'] - - if mount_options is None: - mount_options = [] - if mkfs_options is None: - mkfs_options = [] - mkfs = ['mkfs.%s' % fs] + mkfs_options - log.info('%s on %s on %s' % (mkfs, dev, remote)) - if package is not None: + mkfs_options = [] + mkfs = ['mkfs.%s' % fs] + mkfs_options + log.info('%s on %s on %s' % (mkfs, dev, remote)) + if package is not None: + remote.run( + args=[ + 'sudo', + 'apt-get', 'install', '-y', package + ], + stdout=StringIO(), + ) + + try: + remote.run(args=['yes', run.Raw('|')] + ['sudo'] + mkfs + [dev]) + except run.CommandFailedError: + # Newer btfs-tools doesn't prompt for overwrite, use -f + if '-f' not in mount_options: + mkfs_options.append('-f') + mkfs = ['mkfs.%s' % fs] + mkfs_options + log.info('%s on %s on %s' % (mkfs, dev, remote)) + remote.run(args=['yes', run.Raw('|')] + ['sudo'] + mkfs + [dev]) + + log.info('mount %s on %s -o %s' % (dev, remote, + ','.join(mount_options))) remote.run( args=[ 'sudo', - 'apt-get', 'install', '-y', package + 'mount', + '-t', fs, + '-o', ','.join(mount_options), + dev, + mnt_point, + ] + ) + remote.run( + args=[ + 'sudo', '/sbin/restorecon', mnt_point, ], - stdout=StringIO(), + check_status=False, ) + if not remote in ctx.disk_config.remote_to_roles_to_dev_mount_options: + ctx.disk_config.remote_to_roles_to_dev_mount_options[remote] = {} + ctx.disk_config.remote_to_roles_to_dev_mount_options[remote][role] = mount_options + if not remote in ctx.disk_config.remote_to_roles_to_dev_fstype: + ctx.disk_config.remote_to_roles_to_dev_fstype[remote] = {} + ctx.disk_config.remote_to_roles_to_dev_fstype[remote][role] = fs + devs_to_clean[remote].append(mnt_point) - try: - remote.run(args=['yes', run.Raw('|')] + ['sudo'] + mkfs + [dev]) - except run.CommandFailedError: - # Newer btfs-tools doesn't prompt for overwrite, use -f - if '-f' not in mount_options: - mkfs_options.append('-f') - mkfs = ['mkfs.%s' % fs] + mkfs_options - log.info('%s on %s on %s' % (mkfs, dev, remote)) - remote.run(args=['yes', run.Raw('|')] + ['sudo'] + mkfs + [dev]) - - log.info('mount %s on %s -o %s' % (dev, remote, - ','.join(mount_options))) + for role in teuthology.cluster_roles_of_type(roles_for_host, 'osd', cluster_name): + _, _, id_ = teuthology.split_role(role) remote.run( args=[ 'sudo', - 'mount', - '-t', fs, - '-o', ','.join(mount_options), - dev, - mnt_point, - ] - ) - remote.run( - args=[ - 'sudo', '/sbin/restorecon', mnt_point, + 'MALLOC_CHECK_=3', + 'adjust-ulimits', + 'ceph-coverage', + coverage_dir, + 'ceph-osd', + '--cluster', + cluster_name, + '--mkfs', + '--mkkey', + '-i', id_, + '--monmap', monmap_path, ], - check_status=False, ) - if not remote in ctx.disk_config.remote_to_roles_to_dev_mount_options: - ctx.disk_config.remote_to_roles_to_dev_mount_options[remote] = {} - ctx.disk_config.remote_to_roles_to_dev_mount_options[remote][role] = mount_options - if not remote in ctx.disk_config.remote_to_roles_to_dev_fstype: - ctx.disk_config.remote_to_roles_to_dev_fstype[remote] = {} - ctx.disk_config.remote_to_roles_to_dev_fstype[remote][role] = fs - devs_to_clean[remote].append(mnt_point) - - for role in teuthology.cluster_roles_of_type(roles_for_host, 'osd', cluster_name): - _, _, id_ = teuthology.split_role(role) - remote.run( - args=[ - 'sudo', - 'MALLOC_CHECK_=3', - 'adjust-ulimits', - 'ceph-coverage', - coverage_dir, - 'ceph-osd', - '--cluster', - cluster_name, - '--mkfs', - '--mkkey', - '-i', id_, - '--monmap', monmap_path, - ], - ) log.info('Reading keys from all nodes...') keys_fp = StringIO() keys = [] for remote, roles_for_host in ctx.cluster.remotes.iteritems(): - for type_ in ['mgr', 'mds', 'osd']: + for type_ in [['mgr', 'mds', 'osd'], ['mgr', 'mds']][ctx.ceph_volume]: if type_ == 'mgr' and config.get('skip_mgr_daemons', False): continue for role in teuthology.cluster_roles_of_type(roles_for_host, type_, cluster_name): @@ -801,6 +842,18 @@ def cluster(ctx, config): keys.append(('client', id_, data)) keys_fp.write(data) + # add bootstrap-osd keyring as well + if ctx.ceph_volume: + id_ = 'bootstrap-osd' + data = teuthology.get_file( + remote=mon0_remote, + path='/var/lib/ceph/bootstrap-osd/{cluster}.keyring'.format\ + (cluster=cluster_name), + sudo=True, + ) + keys.append(('client', id_, data)) + keys_fp.write(data) + log.info('Adding keys to all mons...') writes = mons.run( args=[ @@ -1003,6 +1056,66 @@ def cluster(ctx, config): ) +@contextlib.contextmanager +def create_osd_with_lvm(ctx, config): + ''' + create osds with ceph-volume lvm + as of now doing only bluestore + TODO: filestore (because it needs extra journal device) + ''' + remote_to_roles_to_devs = ctx.disk_config.remote_to_roles_to_dev + log.info(remote_to_roles_to_devs) + log.info('Using ceph-volume lvm for osd creation') + objstore = 'bluestore' + for section, keys in config['conf'].iteritems(): + for key, val in keys.iteritems(): + if key == 'osd objectstore': + if val == 'filestore': + objstore = 'filestore' + + log.info("Using objectstore {}".format(objstore)) + ceph_lvm = "sudo ceph-volume lvm " + for remote, roles_to_dev in remote_to_roles_to_devs.iteritems(): + for osd, dev in roles_to_dev.iteritems(): + log.info("OSD={}, dev={}".format(osd, dev)) + cmd = ceph_lvm + "create --" + objstore + \ + " --data "+ dev + log.info("Running cmd={}".format(cmd)) + remote.run( + args=cmd, + ) + time.sleep(10) + try: + yield + + finally: + osd_path = "/var/lib/ceph/osd/" + + for remote, roles_to_dev in remote_to_roles_to_devs.iteritems(): + cmd="sudo systemctl stop ceph-osd@*" + remote.run( + args=cmd, + ) + time.sleep(10) + cmd = 'sudo ls '+osd_path + proc = remote.run( + args=cmd, + stdout=StringIO(), + ) + out = proc.stdout.getvalue() + log.info("OSDs on this node are {}".format(out)) + olist = out.split('\n') + for osd in olist: + if osd == '': + continue + cmd = "sudo umount {opath}{osd}".format(opath=osd_path, + osd=osd) + remote.run( + args=cmd, + ) + time.sleep(5) + + def osd_scrub_pgs(ctx, config): """ Scrub pgs when we exit. @@ -1594,6 +1707,9 @@ def task(ctx, config): if 'cluster' not in config: config['cluster'] = 'ceph' + ctx.ceph_volume = False + if 'ceph-volume' in config['conf']['osd']: + ctx.ceph_volume = config['conf']['osd']['ceph-volume'] validate_config(ctx, config) @@ -1622,7 +1738,8 @@ def task(ctx, config): lambda: run_daemon(ctx=ctx, config=config, type_='mon'), lambda: run_daemon(ctx=ctx, config=config, type_='mgr'), lambda: crush_setup(ctx=ctx, config=config), - lambda: run_daemon(ctx=ctx, config=config, type_='osd'), + (lambda: run_daemon(ctx=ctx, config=config, type_='osd'), + lambda: create_osd_with_lvm(ctx=ctx, config=config))[ctx.ceph_volume], lambda: create_rbd_pool(ctx=ctx, config=config), lambda: cephfs_setup(ctx=ctx, config=config), lambda: run_daemon(ctx=ctx, config=config, type_='mds'),