]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
ceph-disk: add deactivate feature
authorVicente Cheng <freeze.bilsted@gmail.com>
Thu, 10 Sep 2015 08:37:03 +0000 (16:37 +0800)
committerVicente Cheng <freeze.bilsted@gmail.com>
Tue, 17 Nov 2015 01:24:42 +0000 (09:24 +0800)
  Implement deactivate option on ceph-disk.

  - stop ceph-osd service if needed (If osd still in osd map, make it out first)
  - remove 'ready', 'active', and INIT-specific files
  - remove gpt partition type and change partition name (prevent triggered by udev)
  - create deactive flag
  - umount device and remove mount point

Signed-off-by: Vicente Cheng <freeze.bilsted@gmail.com>
src/ceph-disk

index 0ca368879cd0d4cef78a0632c9947402a082ee87..49886bd70bcda570413f4a743795da7103fe48bf 100755 (executable)
@@ -51,13 +51,22 @@ Activate:
  - if encrypted, map the dmcrypt volume
  - mount the volume in a temp location
  - allocate an osd id (if needed)
+ - if deactived, change the gpt partition info correctly
  - remount in the correct location /var/lib/ceph/osd/$cluster-$id
+ - remove the deactive flag
  - start ceph-osd
 
  - triggered by udev when it sees the OSD gpt partition type
  - triggered by admin 'ceph-disk activate <path>'
  - triggered on ceph service startup with 'ceph-disk activate-all'
 
+Deactivate:
+ - stop ceph-osd service if needed (If osd still in osd map, make it out first)
+ - remove 'ready', 'active', and INIT-specific files
+ - remove gpt partition type and change partition name (prevent triggered by udev)
+ - create deactive flag
+ - umount device and remove mount point
+
 We rely on /dev/disk/by-partuuid to find partitions by their UUID;
 this is what the journal symlink inside the osd data volume normally
 points to.
@@ -80,6 +89,7 @@ knew the GPT partition type.
 
 CEPH_OSD_ONDISK_MAGIC = 'ceph osd volume v026'
 
+LINUX_RESERVED_TYPE =       '8da63339-0007-60c0-c436-083ac8230908'
 JOURNAL_UUID =              '45b0969e-9b03-4f30-b4c6-b4b80ceff106'
 MPATH_JOURNAL_UUID =        '45b0969e-8ae0-4982-bf9d-5a8d867af560'
 DMCRYPT_JOURNAL_UUID =      '45b0969e-9b03-4f30-b4c6-5ec00ceff106'
@@ -96,6 +106,14 @@ DMCRYPT_JOURNAL_TOBE_UUID = '89c57f98-2fe5-4dc0-89c1-35865ceff2be'
 DEFAULT_FS_TYPE = 'xfs'
 SYSFS = '/sys'
 
+"""
+OSD STATUS Definition
+"""
+OSD_STATUS_OUT_DOWN =  0
+OSD_STATUS_OUT_UP =    1
+OSD_STATUS_IN_DOWN =   2
+OSD_STATUS_IN_UP =     3
+
 MOUNT_OPTIONS = dict(
     btrfs='noatime,user_subvol_rm_allowed',
     # user_xattr is default ever since linux 2.6.39 / 3.0, but we'll
@@ -792,6 +810,27 @@ def check_osd_magic(path):
         raise BadMagicError(path)
 
 
+def convert_osd_id(cluster, osd_id):
+    """
+    Convert the OSD id to OS device (ex. sdx)
+    """
+    mountsp_name = '%s-%s' % (cluster, osd_id)
+
+    # mount_info's first fields means `device`, Second means `mount point`
+    mount_info = []
+    with file('/proc/mounts', 'rb') as proc_mounts:
+        for line in proc_mounts:
+            if mountsp_name in line:
+                fields = line.split()
+                mount_info.append(fields[0])
+                mount_info.append(fields[1])
+            else:
+                continue
+    if not mount_info:
+        raise Error('Can not find mount point by osd-id')
+    return mount_info
+
+
 def check_osd_id(osd_id):
     """
     Ensures osd id is numeric.
@@ -2108,6 +2147,66 @@ def start_daemon(
         raise Error('ceph osd start failed', e)
 
 
+def stop_daemon(
+    cluster,
+    osd_id,
+    ):
+    LOG.debug('Stoping %s osd.%s...', cluster, osd_id)
+
+    path = (STATEDIR + '/osd/{cluster}-{osd_id}').format(
+        cluster=cluster, osd_id=osd_id)
+
+    # upstart?
+    try:
+        if os.path.exists(os.path.join(path,'upstart')):
+            command_check_call(
+                [
+                    '/sbin/initctl',
+                    'stop',
+                    # I remove --no-wait parameter because we must guarantee
+                    # this service stop.
+                    'ceph-osd',
+                    'cluster={cluster}'.format(cluster=cluster),
+                    'id={osd_id}'.format(osd_id=osd_id),
+                    ],
+                )
+        elif os.path.exists(os.path.join(path, 'sysvinit')):
+            if os.path.exists('/usr/sbin/service'):
+                svc = '/usr/sbin/service'
+            else:
+                svc = '/sbin/service'
+            command_check_call(
+                [
+                    svc,
+                    'ceph',
+                    '--cluster',
+                    '{cluster}'.format(cluster=cluster),
+                    'stop',
+                    'osd.{osd_id}'.format(osd_id=osd_id),
+                    ],
+                )
+        elif os.path.exists(os.path.join(path, 'systemd')):
+            command_check_call(
+                [
+                    'systemctl',
+                    'disable',
+                    'ceph-osd@{osd_id}'.format(osd_id=osd_id),
+                    ],
+                )
+            command_check_call(
+                [
+                    'systemctl',
+                    'stop',
+                    'ceph-osd@{osd_id}'.format(osd_id=osd_id),
+                    ],
+                )
+        else:
+            raise Error('{cluster} osd.{osd_id} is not tagged with an init '\
+                        ' system'.format(cluster=cluster,osd_id=osd_id,))
+    except:
+        raise Error('ceph osd stop failed')
+
+
 def detect_fstype(
     dev,
     ):
@@ -2191,11 +2290,60 @@ def mount_activate(
 
     path = mount(dev=dev, fstype=fstype, options=mount_options)
 
+    # check if the disk is deactive, change the journal owner, group
+    # mode for correct user and group.
+    if os.path.exists(os.path.join(path, 'deactive')):
+        # flag to activate a deactive osd.
+        deactive = True
+        journal_dev = os.path.realpath(os.path.join(path,'journal'))
+        try:
+            if get_ceph_user() == 'ceph':
+                command(
+                    [
+                    'chown', '-R', 'ceph:ceph',
+                    journal_dev,
+                    ],
+                )
+                command(
+                    [
+                    'chmod', '660',
+                    journal_dev,
+                    ]
+                )
+        except OSError:
+            pass
+    else:
+        deactive = False
+
     osd_id = None
     cluster = None
     try:
         (osd_id, cluster) = activate(path, activate_key_template, init)
 
+        # Now active successfully
+        # change the gpt partition type for bootup (meet the udev rules)
+        if deactive:
+            # Change OSD gpt partition type
+            if is_mpath(dev):
+                type_code = MPATH_OSD_UUID
+            else:
+                type_code = OSD_UUID
+            _change_gpt_partition_info(dev, type_code)
+
+            # Change Journal gpt partition type
+            if is_mpath(journal_dev):
+                type_code = MPATH_JOURNAL_UUID
+            else:
+                type_code = JOURNAL_UUID
+            _change_gpt_partition_info(journal_dev, type_code)
+
+            # Remove the deactive flag
+            try:
+                os.remove(os.path.join(path, 'deactive'))
+                LOG.info('Remove `deactive` file.')
+            except OSError:
+                pass
+
         # check if the disk is already active, or if something else is already
         # mounted there
         active = False
@@ -2461,6 +2609,176 @@ def main_activate(args):
 
 ###########################
 
+def _mark_osd_out(cluster, osd_id):
+    LOG.info('Prepare to mark osd.%s out...', osd_id)
+    try:
+        out, ret = command(
+                [
+                    'ceph',
+                    'osd',
+                    'out',
+                    'osd.%s' % osd_id,
+                    ],
+                )
+    except:
+        raise Error('Could not find osd.%s, is a vaild/exist osd id?' % osd_id)
+
+
+def _check_osd_status(cluster, osd_id):
+    """
+    report the osd status:
+    00(0) : means OSD OUT AND DOWN
+    01(1) : means OSD OUT AND UP
+    10(2) : means OSD IN AND DOWN
+    11(3) : means OSD IN AND UP
+    """
+    LOG.info("Checking osd id: %s ..." % osd_id)
+    status_code = 0
+    try:
+        out, ret = command(
+                [
+                    'ceph',
+                    'osd',
+                    'find',
+                    osd_id,
+                    '--cluster={cluster}'.format(
+                        cluster=cluster,
+                        ),
+                    '--format',
+                    'json',
+                    ],
+                )
+    except subprocess.CalledProcessError as e:
+        raise Error(e)
+    out_json = json.loads(out)
+    if out_json['status IN/OUT'] == u'IN':
+        status_code += 2
+    if out_json['status UP/DOWN'] == u'UP':
+        status_code += 1
+    return status_code
+
+
+def _remove_osd_directory_files(mounted_path, cluster):
+    """
+    To remove the 'ready', 'active', INIT-specific files.
+    """
+    if os.path.exists(os.path.join(mounted_path, 'ready')):
+        try:
+            os.remove(os.path.join(mounted_path, 'ready'))
+            LOG.info('Remove `ready` file.')
+        except OSError:
+            pass
+    else:
+        LOG.info('`ready` file is already removed.')
+
+    if os.path.exists(os.path.join(mounted_path, 'active')):
+        try:
+            os.remove(os.path.join(mounted_path, 'active'))
+            LOG.info('Remove `active` file.')
+        except OSError:
+            pass
+    else:
+        LOG.info('`active` file is already removed.')
+
+    # Just check `upstart` and `sysvinit` directly if filename is init-spec.
+    conf_val = get_conf(
+        cluster=cluster,
+        variable='init'
+        )
+    if conf_val is not None:
+        init = conf_val
+    else:
+        init = init_get()
+    try:
+        os.remove(os.path.join(mounted_path, init))
+        LOG.info('Remove `%s` file.', init)
+        return
+    except OSError:
+        pass
+
+
+def _change_gpt_partition_info(device_part, type_code=LINUX_RESERVED_TYPE):
+    """
+    Due to udev rule 95-ceph-osd.rules, we need to remove the
+    gpt partition type to prevent trigger ceph-disk-activate.
+
+    Also change partition name for zap in destroy stage
+    """
+
+    (device, part_num) = split_dev_base_partnum(device_part)
+
+    part_name = get_partition_name(device_part)
+
+    if type_code is LINUX_RESERVED_TYPE:
+        part_name = part_name + ' (deactive)'
+
+    if type_code is MPATH_JOURNAL_UUID or type_code is JOURNAL_UUID or \
+       type_code is MPATH_OSD_UUID or type_code is OSD_UUID:
+        part_name = part_name.replace(" (deactive)", "")
+
+    try:
+        command_check_call(
+                    [
+                        'sgdisk',
+                        '--change-name=%s:%s' % (part_num, part_name),
+                        '--typecode=%s:%s' % (part_num, type_code),
+                        '--',
+                        device,
+                    ],
+                )
+    except subprocess.CalledProcessError as e:
+        raise Error(e)
+
+
+def main_deactivate(args):
+    if args.cluster is None:
+        args.cluster = 'ceph'
+    if args.osd_id is None:
+        raise Error("osd id can not be zero. Try to use --osd-id <OSDID>.")
+    # Do not do anything if osd is already down.
+    status_code = _check_osd_status(args.cluster, args.osd_id)
+    if status_code == OSD_STATUS_IN_UP:
+        _mark_osd_out(args.cluster, args.osd_id)
+        stop_daemon(args.cluster, args.osd_id)
+    elif status_code == OSD_STATUS_IN_DOWN:
+        _mark_osd_out(args.cluster, args.osd_id)
+    elif status_code == OSD_STATUS_OUT_UP:
+        stop_daemon(args.cluster, args.osd_id)
+    elif status_code == OSD_STATUS_OUT_DOWN:
+        LOG.info("OSD already out/down. Do not do anything now.")
+        return
+
+    # GET the mounted device and mount point.
+    mount_info = convert_osd_id(args.cluster, args.osd_id)
+
+    # remove 'ready', 'active', and INIT-specific files.
+    _remove_osd_directory_files(mount_info[1], args.cluster)
+
+    # Remove filesystem gpt partition type
+    _change_gpt_partition_info(mount_info[0], LINUX_RESERVED_TYPE)
+
+    # Check journal
+    # if journal is exist, remove the gpt partition type
+    journal_path = os.path.join(mount_info[1], 'journal')
+    if os.path.exists(journal_path) and os.path.islink(journal_path):
+        _change_gpt_partition_info(os.path.realpath(journal_path), \
+                                   LINUX_RESERVED_TYPE)
+    else:
+        LOG.info('Journal is not exist on osd.%s (or not symlink).', \
+                 args.osd_id)
+
+    # Write deactivate to osd directory!
+    with file(os.path.join(mount_info[1], 'deactive'), 'w'):
+        path_set_context(os.path.join(mount_info[1], 'deactive'))
+        pass
+
+    unmount(mount_info[1])
+    LOG.info("Umount `%s` successfully.", mount_info[1])
+
+    return
+
+###########################
+
 def get_journal_osd_uuid(path):
     if not os.path.exists(path):
         raise Error('%s does not exist' % path)
@@ -2672,6 +2990,10 @@ def get_partition_type(part):
 def get_partition_uuid(part):
     return get_sgdisk_partition_info(part, 'Partition unique GUID: (\S+)')
 
+def get_partition_name(part):
+    regexp = "Partition name: \'*([A-Za-z ]+[ ()A-Za-z]*)\'*"
+    return get_sgdisk_partition_info(part, regexp)
+
 def get_sgdisk_partition_info(dev, regexp):
     (base, partnum) = split_dev_base_partnum(dev)
     out, _ = command(['sgdisk', '-i', partnum, base])
@@ -3252,6 +3574,7 @@ def parse_args(argv):
     make_activate_all_parser(subparsers)
     make_list_parser(subparsers)
     make_suppress_parser(subparsers)
+    make_deactivate_parser(subparsers)
     make_zap_parser(subparsers)
     make_trigger_parser(subparsers)
 
@@ -3503,6 +3826,23 @@ def make_suppress_parser(subparsers):
         )
     return suppress_parser
 
+def make_deactivate_parser(subparsers):
+    deactivate_parser = subparsers.add_parser('deactivate', help='Deactivate a Ceph OSD')
+    deactivate_parser.add_argument(
+        '--cluster',
+        metavar='NAME',
+        default='ceph',
+        help='cluster name to assign this disk to',
+        )
+    deactivate_parser.add_argument(
+        '--osd-id',
+        metavar='OSDID',
+        help='ID of OSD to deactivate'
+        )
+    deactivate_parser.set_defaults(
+        func=main_deactivate,
+        )
+
 def make_zap_parser(subparsers):
     zap_parser = subparsers.add_parser('zap', help='Zap/erase/destroy a device\'s partition table (and contents)')
     zap_parser.add_argument(