- if encrypted, map the dmcrypt volume
- mount the volume in a temp location
- allocate an osd id (if needed)
+ - if deactived, change the gpt partition info correctly
- remount in the correct location /var/lib/ceph/osd/$cluster-$id
+ - remove the deactive flag
- start ceph-osd
- triggered by udev when it sees the OSD gpt partition type
- triggered by admin 'ceph-disk activate <path>'
- triggered on ceph service startup with 'ceph-disk activate-all'
+Deactivate:
+ - stop ceph-osd service if needed (If osd still in osd map, make it out first)
+ - remove 'ready', 'active', and INIT-specific files
+ - remove gpt partition type and change partition name (prevent triggered by udev)
+ - create deactive flag
+ - umount device and remove mount point
+
We rely on /dev/disk/by-partuuid to find partitions by their UUID;
this is what the journal symlink inside the osd data volume normally
points to.
CEPH_OSD_ONDISK_MAGIC = 'ceph osd volume v026'
+LINUX_RESERVED_TYPE = '8da63339-0007-60c0-c436-083ac8230908'
JOURNAL_UUID = '45b0969e-9b03-4f30-b4c6-b4b80ceff106'
MPATH_JOURNAL_UUID = '45b0969e-8ae0-4982-bf9d-5a8d867af560'
DMCRYPT_JOURNAL_UUID = '45b0969e-9b03-4f30-b4c6-5ec00ceff106'
DEFAULT_FS_TYPE = 'xfs'
SYSFS = '/sys'
+"""
+OSD STATUS Definition
+"""
+OSD_STATUS_OUT_DOWN = 0
+OSD_STATUS_OUT_UP = 1
+OSD_STATUS_IN_DOWN = 2
+OSD_STATUS_IN_UP = 3
+
MOUNT_OPTIONS = dict(
btrfs='noatime,user_subvol_rm_allowed',
# user_xattr is default ever since linux 2.6.39 / 3.0, but we'll
raise BadMagicError(path)
+def convert_osd_id(cluster, osd_id):
+ """
+ Convert the OSD id to OS device (ex. sdx)
+ """
+ mountsp_name = '%s-%s' % (cluster, osd_id)
+
+ # mount_info's first fields means `device`, Second means `mount point`
+ mount_info = []
+ with file('/proc/mounts', 'rb') as proc_mounts:
+ for line in proc_mounts:
+ if mountsp_name in line:
+ fields = line.split()
+ mount_info.append(fields[0])
+ mount_info.append(fields[1])
+ else:
+ continue
+ if not mount_info:
+ raise Error('Can not find mount point by osd-id')
+ return mount_info
+
+
def check_osd_id(osd_id):
"""
Ensures osd id is numeric.
raise Error('ceph osd start failed', e)
+def stop_daemon(
+ cluster,
+ osd_id,
+ ):
+ LOG.debug('Stoping %s osd.%s...', cluster, osd_id)
+
+ path = (STATEDIR + '/osd/{cluster}-{osd_id}').format(
+ cluster=cluster, osd_id=osd_id)
+
+ # upstart?
+ try:
+ if os.path.exists(os.path.join(path,'upstart')):
+ command_check_call(
+ [
+ '/sbin/initctl',
+ 'stop',
+ # I remove --no-wait parameter because we must guarantee
+ # this service stop.
+ 'ceph-osd',
+ 'cluster={cluster}'.format(cluster=cluster),
+ 'id={osd_id}'.format(osd_id=osd_id),
+ ],
+ )
+ elif os.path.exists(os.path.join(path, 'sysvinit')):
+ if os.path.exists('/usr/sbin/service'):
+ svc = '/usr/sbin/service'
+ else:
+ svc = '/sbin/service'
+ command_check_call(
+ [
+ svc,
+ 'ceph',
+ '--cluster',
+ '{cluster}'.format(cluster=cluster),
+ 'stop',
+ 'osd.{osd_id}'.format(osd_id=osd_id),
+ ],
+ )
+ elif os.path.exists(os.path.join(path, 'systemd')):
+ command_check_call(
+ [
+ 'systemctl',
+ 'disable',
+ 'ceph-osd@{osd_id}'.format(osd_id=osd_id),
+ ],
+ )
+ command_check_call(
+ [
+ 'systemctl',
+ 'stop',
+ 'ceph-osd@{osd_id}'.format(osd_id=osd_id),
+ ],
+ )
+ else:
+ raise Error('{cluster} osd.{osd_id} is not tagged with an init '\
+ ' system'.format(cluster=cluster,osd_id=osd_id,))
+ except:
+ raise Error('ceph osd stop failed')
+
+
def detect_fstype(
dev,
):
path = mount(dev=dev, fstype=fstype, options=mount_options)
+ # check if the disk is deactive, change the journal owner, group
+ # mode for correct user and group.
+ if os.path.exists(os.path.join(path, 'deactive')):
+ # flag to activate a deactive osd.
+ deactive = True
+ journal_dev = os.path.realpath(os.path.join(path,'journal'))
+ try:
+ if get_ceph_user() == 'ceph':
+ command(
+ [
+ 'chown', '-R', 'ceph:ceph',
+ journal_dev,
+ ],
+ )
+ command(
+ [
+ 'chmod', '660',
+ journal_dev,
+ ]
+ )
+ except OSError:
+ pass
+ else:
+ deactive = False
+
osd_id = None
cluster = None
try:
(osd_id, cluster) = activate(path, activate_key_template, init)
+ # Now active successfully
+ # change the gpt partition type for bootup (meet the udev rules)
+ if deactive:
+ # Change OSD gpt partition type
+ if is_mpath(dev):
+ type_code = MPATH_OSD_UUID
+ else:
+ type_code = OSD_UUID
+ _change_gpt_partition_info(dev, type_code)
+
+ # Change Journal gpt partition type
+ if is_mpath(journal_dev):
+ type_code = MPATH_JOURNAL_UUID
+ else:
+ type_code = JOURNAL_UUID
+ _change_gpt_partition_info(journal_dev, type_code)
+
+ # Remove the deactive flag
+ try:
+ os.remove(os.path.join(path, 'deactive'))
+ LOG.info('Remove `deactive` file.')
+ except OSError:
+ pass
+
# check if the disk is already active, or if something else is already
# mounted there
active = False
###########################
+def _mark_osd_out(cluster, osd_id):
+ LOG.info('Prepare to mark osd.%s out...', osd_id)
+ try:
+ out, ret = command(
+ [
+ 'ceph',
+ 'osd',
+ 'out',
+ 'osd.%s' % osd_id,
+ ],
+ )
+ except:
+ raise Error('Could not find osd.%s, is a vaild/exist osd id?' % osd_id)
+
+
+def _check_osd_status(cluster, osd_id):
+ """
+ report the osd status:
+ 00(0) : means OSD OUT AND DOWN
+ 01(1) : means OSD OUT AND UP
+ 10(2) : means OSD IN AND DOWN
+ 11(3) : means OSD IN AND UP
+ """
+ LOG.info("Checking osd id: %s ..." % osd_id)
+ status_code = 0
+ try:
+ out, ret = command(
+ [
+ 'ceph',
+ 'osd',
+ 'find',
+ osd_id,
+ '--cluster={cluster}'.format(
+ cluster=cluster,
+ ),
+ '--format',
+ 'json',
+ ],
+ )
+ except subprocess.CalledProcessError as e:
+ raise Error(e)
+ out_json = json.loads(out)
+ if out_json['status IN/OUT'] == u'IN':
+ status_code += 2
+ if out_json['status UP/DOWN'] == u'UP':
+ status_code += 1
+ return status_code
+
+
+def _remove_osd_directory_files(mounted_path, cluster):
+ """
+ To remove the 'ready', 'active', INIT-specific files.
+ """
+ if os.path.exists(os.path.join(mounted_path, 'ready')):
+ try:
+ os.remove(os.path.join(mounted_path, 'ready'))
+ LOG.info('Remove `ready` file.')
+ except OSError:
+ pass
+ else:
+ LOG.info('`ready` file is already removed.')
+
+ if os.path.exists(os.path.join(mounted_path, 'active')):
+ try:
+ os.remove(os.path.join(mounted_path, 'active'))
+ LOG.info('Remove `active` file.')
+ except OSError:
+ pass
+ else:
+ LOG.info('`active` file is already removed.')
+
+ # Just check `upstart` and `sysvinit` directly if filename is init-spec.
+ conf_val = get_conf(
+ cluster=cluster,
+ variable='init'
+ )
+ if conf_val is not None:
+ init = conf_val
+ else:
+ init = init_get()
+ try:
+ os.remove(os.path.join(mounted_path, init))
+ LOG.info('Remove `%s` file.', init)
+ return
+ except OSError:
+ pass
+
+
+def _change_gpt_partition_info(device_part, type_code=LINUX_RESERVED_TYPE):
+ """
+ Due to udev rule 95-ceph-osd.rules, we need to remove the
+ gpt partition type to prevent trigger ceph-disk-activate.
+
+ Also change partition name for zap in destroy stage
+ """
+
+ (device, part_num) = split_dev_base_partnum(device_part)
+
+ part_name = get_partition_name(device_part)
+
+ if type_code is LINUX_RESERVED_TYPE:
+ part_name = part_name + ' (deactive)'
+
+ if type_code is MPATH_JOURNAL_UUID or type_code is JOURNAL_UUID or \
+ type_code is MPATH_OSD_UUID or type_code is OSD_UUID:
+ part_name = part_name.replace(" (deactive)", "")
+
+ try:
+ command_check_call(
+ [
+ 'sgdisk',
+ '--change-name=%s:%s' % (part_num, part_name),
+ '--typecode=%s:%s' % (part_num, type_code),
+ '--',
+ device,
+ ],
+ )
+ except subprocess.CalledProcessError as e:
+ raise Error(e)
+
+
+def main_deactivate(args):
+ if args.cluster is None:
+ args.cluster = 'ceph'
+ if args.osd_id is None:
+ raise Error("osd id can not be zero. Try to use --osd-id <OSDID>.")
+ # Do not do anything if osd is already down.
+ status_code = _check_osd_status(args.cluster, args.osd_id)
+ if status_code == OSD_STATUS_IN_UP:
+ _mark_osd_out(args.cluster, args.osd_id)
+ stop_daemon(args.cluster, args.osd_id)
+ elif status_code == OSD_STATUS_IN_DOWN:
+ _mark_osd_out(args.cluster, args.osd_id)
+ elif status_code == OSD_STATUS_OUT_UP:
+ stop_daemon(args.cluster, args.osd_id)
+ elif status_code == OSD_STATUS_OUT_DOWN:
+ LOG.info("OSD already out/down. Do not do anything now.")
+ return
+
+ # GET the mounted device and mount point.
+ mount_info = convert_osd_id(args.cluster, args.osd_id)
+
+ # remove 'ready', 'active', and INIT-specific files.
+ _remove_osd_directory_files(mount_info[1], args.cluster)
+
+ # Remove filesystem gpt partition type
+ _change_gpt_partition_info(mount_info[0], LINUX_RESERVED_TYPE)
+
+ # Check journal
+ # if journal is exist, remove the gpt partition type
+ journal_path = os.path.join(mount_info[1], 'journal')
+ if os.path.exists(journal_path) and os.path.islink(journal_path):
+ _change_gpt_partition_info(os.path.realpath(journal_path), \
+ LINUX_RESERVED_TYPE)
+ else:
+ LOG.info('Journal is not exist on osd.%s (or not symlink).', \
+ args.osd_id)
+
+ # Write deactivate to osd directory!
+ with file(os.path.join(mount_info[1], 'deactive'), 'w'):
+ path_set_context(os.path.join(mount_info[1], 'deactive'))
+ pass
+
+ unmount(mount_info[1])
+ LOG.info("Umount `%s` successfully.", mount_info[1])
+
+ return
+
+###########################
+
def get_journal_osd_uuid(path):
if not os.path.exists(path):
raise Error('%s does not exist' % path)
def get_partition_uuid(part):
return get_sgdisk_partition_info(part, 'Partition unique GUID: (\S+)')
+def get_partition_name(part):
+ regexp = "Partition name: \'*([A-Za-z ]+[ ()A-Za-z]*)\'*"
+ return get_sgdisk_partition_info(part, regexp)
+
def get_sgdisk_partition_info(dev, regexp):
(base, partnum) = split_dev_base_partnum(dev)
out, _ = command(['sgdisk', '-i', partnum, base])
make_activate_all_parser(subparsers)
make_list_parser(subparsers)
make_suppress_parser(subparsers)
+ make_deactivate_parser(subparsers)
make_zap_parser(subparsers)
make_trigger_parser(subparsers)
)
return suppress_parser
+def make_deactivate_parser(subparsers):
+ deactivate_parser = subparsers.add_parser('deactivate', help='Deactivate a Ceph OSD')
+ deactivate_parser.add_argument(
+ '--cluster',
+ metavar='NAME',
+ default='ceph',
+ help='cluster name to assign this disk to',
+ )
+ deactivate_parser.add_argument(
+ '--osd-id',
+ metavar='OSDID',
+ help='ID of OSD to deactivate'
+ )
+ deactivate_parser.set_defaults(
+ func=main_deactivate,
+ )
+
def make_zap_parser(subparsers):
zap_parser = subparsers.add_parser('zap', help='Zap/erase/destroy a device\'s partition table (and contents)')
zap_parser.add_argument(