From 3fcdf411d481fd5984c5124adfe9d25f7fa6651b Mon Sep 17 00:00:00 2001 From: Vicente Cheng Date: Thu, 10 Sep 2015 16:37:03 +0800 Subject: [PATCH] ceph-disk: add deactivate feature Implement deactivate option on ceph-disk. - stop ceph-osd service if needed (If osd still in osd map, make it out first) - remove 'ready', 'active', and INIT-specific files - remove gpt partition type and change partition name (prevent triggered by udev) - create deactive flag - umount device and remove mount point Signed-off-by: Vicente Cheng --- src/ceph-disk | 340 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 340 insertions(+) diff --git a/src/ceph-disk b/src/ceph-disk index 0ca368879cd0d..49886bd70bcda 100755 --- a/src/ceph-disk +++ b/src/ceph-disk @@ -51,13 +51,22 @@ Activate: - if encrypted, map the dmcrypt volume - mount the volume in a temp location - allocate an osd id (if needed) + - if deactived, change the gpt partition info correctly - remount in the correct location /var/lib/ceph/osd/$cluster-$id + - remove the deactive flag - start ceph-osd - triggered by udev when it sees the OSD gpt partition type - triggered by admin 'ceph-disk activate ' - triggered on ceph service startup with 'ceph-disk activate-all' +Deactivate: + - stop ceph-osd service if needed (If osd still in osd map, make it out first) + - remove 'ready', 'active', and INIT-specific files + - remove gpt partition type and change partition name (prevent triggered by udev) + - create deactive flag + - umount device and remove mount point + We rely on /dev/disk/by-partuuid to find partitions by their UUID; this is what the journal symlink inside the osd data volume normally points to. @@ -80,6 +89,7 @@ knew the GPT partition type. CEPH_OSD_ONDISK_MAGIC = 'ceph osd volume v026' +LINUX_RESERVED_TYPE = '8da63339-0007-60c0-c436-083ac8230908' JOURNAL_UUID = '45b0969e-9b03-4f30-b4c6-b4b80ceff106' MPATH_JOURNAL_UUID = '45b0969e-8ae0-4982-bf9d-5a8d867af560' DMCRYPT_JOURNAL_UUID = '45b0969e-9b03-4f30-b4c6-5ec00ceff106' @@ -96,6 +106,14 @@ DMCRYPT_JOURNAL_TOBE_UUID = '89c57f98-2fe5-4dc0-89c1-35865ceff2be' DEFAULT_FS_TYPE = 'xfs' SYSFS = '/sys' +""" +OSD STATUS Definition +""" +OSD_STATUS_OUT_DOWN = 0 +OSD_STATUS_OUT_UP = 1 +OSD_STATUS_IN_DOWN = 2 +OSD_STATUS_IN_UP = 3 + MOUNT_OPTIONS = dict( btrfs='noatime,user_subvol_rm_allowed', # user_xattr is default ever since linux 2.6.39 / 3.0, but we'll @@ -792,6 +810,27 @@ def check_osd_magic(path): raise BadMagicError(path) +def convert_osd_id(cluster, osd_id): + """ + Convert the OSD id to OS device (ex. sdx) + """ + mountsp_name = '%s-%s' % (cluster, osd_id) + + # mount_info's first fields means `device`, Second means `mount point` + mount_info = [] + with file('/proc/mounts', 'rb') as proc_mounts: + for line in proc_mounts: + if mountsp_name in line: + fields = line.split() + mount_info.append(fields[0]) + mount_info.append(fields[1]) + else: + continue + if not mount_info: + raise Error('Can not find mount point by osd-id') + return mount_info + + def check_osd_id(osd_id): """ Ensures osd id is numeric. @@ -2108,6 +2147,66 @@ def start_daemon( raise Error('ceph osd start failed', e) +def stop_daemon( + cluster, + osd_id, + ): + LOG.debug('Stoping %s osd.%s...', cluster, osd_id) + + path = (STATEDIR + '/osd/{cluster}-{osd_id}').format( + cluster=cluster, osd_id=osd_id) + + # upstart? + try: + if os.path.exists(os.path.join(path,'upstart')): + command_check_call( + [ + '/sbin/initctl', + 'stop', + # I remove --no-wait parameter because we must guarantee + # this service stop. + 'ceph-osd', + 'cluster={cluster}'.format(cluster=cluster), + 'id={osd_id}'.format(osd_id=osd_id), + ], + ) + elif os.path.exists(os.path.join(path, 'sysvinit')): + if os.path.exists('/usr/sbin/service'): + svc = '/usr/sbin/service' + else: + svc = '/sbin/service' + command_check_call( + [ + svc, + 'ceph', + '--cluster', + '{cluster}'.format(cluster=cluster), + 'stop', + 'osd.{osd_id}'.format(osd_id=osd_id), + ], + ) + elif os.path.exists(os.path.join(path, 'systemd')): + command_check_call( + [ + 'systemctl', + 'disable', + 'ceph-osd@{osd_id}'.format(osd_id=osd_id), + ], + ) + command_check_call( + [ + 'systemctl', + 'stop', + 'ceph-osd@{osd_id}'.format(osd_id=osd_id), + ], + ) + else: + raise Error('{cluster} osd.{osd_id} is not tagged with an init '\ + ' system'.format(cluster=cluster,osd_id=osd_id,)) + except: + raise Error('ceph osd stop failed') + + def detect_fstype( dev, ): @@ -2191,11 +2290,60 @@ def mount_activate( path = mount(dev=dev, fstype=fstype, options=mount_options) + # check if the disk is deactive, change the journal owner, group + # mode for correct user and group. + if os.path.exists(os.path.join(path, 'deactive')): + # flag to activate a deactive osd. + deactive = True + journal_dev = os.path.realpath(os.path.join(path,'journal')) + try: + if get_ceph_user() == 'ceph': + command( + [ + 'chown', '-R', 'ceph:ceph', + journal_dev, + ], + ) + command( + [ + 'chmod', '660', + journal_dev, + ] + ) + except OSError: + pass + else: + deactive = False + osd_id = None cluster = None try: (osd_id, cluster) = activate(path, activate_key_template, init) + # Now active successfully + # change the gpt partition type for bootup (meet the udev rules) + if deactive: + # Change OSD gpt partition type + if is_mpath(dev): + type_code = MPATH_OSD_UUID + else: + type_code = OSD_UUID + _change_gpt_partition_info(dev, type_code) + + # Change Journal gpt partition type + if is_mpath(journal_dev): + type_code = MPATH_JOURNAL_UUID + else: + type_code = JOURNAL_UUID + _change_gpt_partition_info(journal_dev, type_code) + + # Remove the deactive flag + try: + os.remove(os.path.join(path, 'deactive')) + LOG.info('Remove `deactive` file.') + except OSError: + pass + # check if the disk is already active, or if something else is already # mounted there active = False @@ -2461,6 +2609,176 @@ def main_activate(args): ########################### +def _mark_osd_out(cluster, osd_id): + LOG.info('Prepare to mark osd.%s out...', osd_id) + try: + out, ret = command( + [ + 'ceph', + 'osd', + 'out', + 'osd.%s' % osd_id, + ], + ) + except: + raise Error('Could not find osd.%s, is a vaild/exist osd id?' % osd_id) + + +def _check_osd_status(cluster, osd_id): + """ + report the osd status: + 00(0) : means OSD OUT AND DOWN + 01(1) : means OSD OUT AND UP + 10(2) : means OSD IN AND DOWN + 11(3) : means OSD IN AND UP + """ + LOG.info("Checking osd id: %s ..." % osd_id) + status_code = 0 + try: + out, ret = command( + [ + 'ceph', + 'osd', + 'find', + osd_id, + '--cluster={cluster}'.format( + cluster=cluster, + ), + '--format', + 'json', + ], + ) + except subprocess.CalledProcessError as e: + raise Error(e) + out_json = json.loads(out) + if out_json['status IN/OUT'] == u'IN': + status_code += 2 + if out_json['status UP/DOWN'] == u'UP': + status_code += 1 + return status_code + + +def _remove_osd_directory_files(mounted_path, cluster): + """ + To remove the 'ready', 'active', INIT-specific files. + """ + if os.path.exists(os.path.join(mounted_path, 'ready')): + try: + os.remove(os.path.join(mounted_path, 'ready')) + LOG.info('Remove `ready` file.') + except OSError: + pass + else: + LOG.info('`ready` file is already removed.') + + if os.path.exists(os.path.join(mounted_path, 'active')): + try: + os.remove(os.path.join(mounted_path, 'active')) + LOG.info('Remove `active` file.') + except OSError: + pass + else: + LOG.info('`active` file is already removed.') + + # Just check `upstart` and `sysvinit` directly if filename is init-spec. + conf_val = get_conf( + cluster=cluster, + variable='init' + ) + if conf_val is not None: + init = conf_val + else: + init = init_get() + try: + os.remove(os.path.join(mounted_path, init)) + LOG.info('Remove `%s` file.', init) + return + except OSError: + pass + + +def _change_gpt_partition_info(device_part, type_code=LINUX_RESERVED_TYPE): + """ + Due to udev rule 95-ceph-osd.rules, we need to remove the + gpt partition type to prevent trigger ceph-disk-activate. + + Also change partition name for zap in destroy stage + """ + + (device, part_num) = split_dev_base_partnum(device_part) + + part_name = get_partition_name(device_part) + + if type_code is LINUX_RESERVED_TYPE: + part_name = part_name + ' (deactive)' + + if type_code is MPATH_JOURNAL_UUID or type_code is JOURNAL_UUID or \ + type_code is MPATH_OSD_UUID or type_code is OSD_UUID: + part_name = part_name.replace(" (deactive)", "") + + try: + command_check_call( + [ + 'sgdisk', + '--change-name=%s:%s' % (part_num, part_name), + '--typecode=%s:%s' % (part_num, type_code), + '--', + device, + ], + ) + except subprocess.CalledProcessError as e: + raise Error(e) + + +def main_deactivate(args): + if args.cluster is None: + args.cluster = 'ceph' + if args.osd_id is None: + raise Error("osd id can not be zero. Try to use --osd-id .") + # Do not do anything if osd is already down. + status_code = _check_osd_status(args.cluster, args.osd_id) + if status_code == OSD_STATUS_IN_UP: + _mark_osd_out(args.cluster, args.osd_id) + stop_daemon(args.cluster, args.osd_id) + elif status_code == OSD_STATUS_IN_DOWN: + _mark_osd_out(args.cluster, args.osd_id) + elif status_code == OSD_STATUS_OUT_UP: + stop_daemon(args.cluster, args.osd_id) + elif status_code == OSD_STATUS_OUT_DOWN: + LOG.info("OSD already out/down. Do not do anything now.") + return + + # GET the mounted device and mount point. + mount_info = convert_osd_id(args.cluster, args.osd_id) + + # remove 'ready', 'active', and INIT-specific files. + _remove_osd_directory_files(mount_info[1], args.cluster) + + # Remove filesystem gpt partition type + _change_gpt_partition_info(mount_info[0], LINUX_RESERVED_TYPE) + + # Check journal + # if journal is exist, remove the gpt partition type + journal_path = os.path.join(mount_info[1], 'journal') + if os.path.exists(journal_path) and os.path.islink(journal_path): + _change_gpt_partition_info(os.path.realpath(journal_path), \ + LINUX_RESERVED_TYPE) + else: + LOG.info('Journal is not exist on osd.%s (or not symlink).', \ + args.osd_id) + + # Write deactivate to osd directory! + with file(os.path.join(mount_info[1], 'deactive'), 'w'): + path_set_context(os.path.join(mount_info[1], 'deactive')) + pass + + unmount(mount_info[1]) + LOG.info("Umount `%s` successfully.", mount_info[1]) + + return + +########################### + def get_journal_osd_uuid(path): if not os.path.exists(path): raise Error('%s does not exist' % path) @@ -2672,6 +2990,10 @@ def get_partition_type(part): def get_partition_uuid(part): return get_sgdisk_partition_info(part, 'Partition unique GUID: (\S+)') +def get_partition_name(part): + regexp = "Partition name: \'*([A-Za-z ]+[ ()A-Za-z]*)\'*" + return get_sgdisk_partition_info(part, regexp) + def get_sgdisk_partition_info(dev, regexp): (base, partnum) = split_dev_base_partnum(dev) out, _ = command(['sgdisk', '-i', partnum, base]) @@ -3252,6 +3574,7 @@ def parse_args(argv): make_activate_all_parser(subparsers) make_list_parser(subparsers) make_suppress_parser(subparsers) + make_deactivate_parser(subparsers) make_zap_parser(subparsers) make_trigger_parser(subparsers) @@ -3503,6 +3826,23 @@ def make_suppress_parser(subparsers): ) return suppress_parser +def make_deactivate_parser(subparsers): + deactivate_parser = subparsers.add_parser('deactivate', help='Deactivate a Ceph OSD') + deactivate_parser.add_argument( + '--cluster', + metavar='NAME', + default='ceph', + help='cluster name to assign this disk to', + ) + deactivate_parser.add_argument( + '--osd-id', + metavar='OSDID', + help='ID of OSD to deactivate' + ) + deactivate_parser.set_defaults( + func=main_deactivate, + ) + def make_zap_parser(subparsers): zap_parser = subparsers.add_parser('zap', help='Zap/erase/destroy a device\'s partition table (and contents)') zap_parser.add_argument( -- 2.39.5