ceph orch rm rgw.myrgw
+The same command accepts ``--force`` and ``--force-delete-data``. Use
+``ceph orch rm <service-name> --force --force-delete-data`` when you want
+cephadm to remove on-disk data for supported daemon types instead of relocating
+it under ``<fsid>/removed/``. The latter flag requires ``--force``. The same
+pair of flags applies to ``ceph orch daemon rm <daemon-name>`` when removing
+individual daemons.
+
.. _cephadm-spec-unmanaged:
.. prompt:: bash #
ceph orch rm grafana
- ceph orch rm prometheus --force # this will delete metrics data collected so far
+ ceph orch rm prometheus
ceph orch rm node-exporter
ceph orch rm alertmanager
ceph mgr module disable prometheus
+By default, cephadm moves Prometheus data aside under ``<fsid>/removed/`` on the
+host. To delete that data instead, use
+``ceph orch rm prometheus --force --force-delete-data`` in place of
+``ceph orch rm prometheus`` (``--force-delete-data`` requires ``--force``).
+
See also :ref:`orch-rm`.
Setting up RBD-Image Monitoring
ceph orch apply mds <fs_name> [--placement=<placement>] [--dry-run]
ceph orch apply rgw <name> [--realm=<realm>] [--zone=<zone>] [--port=<port>] [--ssl] [--placement=<placement>] [--dry-run]
ceph orch apply nfs <name> <pool> [--namespace=<namespace>] [--placement=<placement>] [--dry-run]
- ceph orch rm <service_name> [--force]
+ ceph orch rm <service_name> [--force] [--force-delete-data]
where ``placement`` is a :ref:`orchestrator-cli-placement-spec`.
+For ``ceph orch rm``, ``--force`` may be required in some cases (for example when
+removing an OSD service that would leave OSDs behind).
+
+The ``--force-delete-data`` flag requires that ``--force`` is also passed.
+This directs cephadm to delete on-disk data for certain daemon types instead
+of moving it to ``<fsid>/removed/`` on the host. These daemon types include
+``mon``, ``osd``, and ``prometheus``. This helps avoid filling up the
+underlying filesystem over time.
+
e.g., ``ceph orch apply mds myfs --placement="3 host1 host2 host3"``
Service Commands:
spec: ServiceSpec
rank_map: Optional[Dict[int, Dict[int, Optional[str]]]]
created: datetime.datetime
- deleted: Optional[datetime.datetime]
+ deleted: Optional[Tuple[datetime.datetime, bool]]
class SpecStore():
# service_name -> rank -> gen -> daemon_id
self._rank_maps = {} # type: Dict[str, Dict[int, Dict[int, Optional[str]]]]
self.spec_created = {} # type: Dict[str, datetime.datetime]
- self.spec_deleted = {} # type: Dict[str, datetime.datetime]
+ self.spec_deleted = {} # type: Dict[str, Tuple[datetime.datetime, bool]]
self.spec_preview = {} # type: Dict[str, ServiceSpec]
self._needs_configuration: Dict[str, bool] = {}
self.spec_created[service_name] = created
if 'deleted' in j:
- deleted = str_to_datetime(cast(str, j['deleted']))
- self.spec_deleted[service_name] = deleted
+ deleted_ts = str_to_datetime(cast(str, j['deleted']))
+ force_delete_data = cast(
+ bool, j.get('force_delete_data', False)
+ )
+ self.spec_deleted[service_name] = (deleted_ts, force_delete_data)
if 'needs_configuration' in j:
self._needs_configuration[service_name] = cast(bool, j['needs_configuration'])
if name in self._rank_maps:
data['rank_map'] = self._rank_maps[name]
if name in self.spec_deleted:
- data['deleted'] = datetime_to_str(self.spec_deleted[name])
+ deleted_time, force_delete_data = self.spec_deleted[name]
+ data['deleted'] = datetime_to_str(deleted_time)
+ data['force_delete_data'] = force_delete_data
if name in self._needs_configuration:
data['needs_configuration'] = self._needs_configuration[name]
service_name=nvmeof_spec.service_name(),
user_made=True)
- def rm(self, service_name: str) -> bool:
+ def rm(self, service_name: str, force_delete_data: bool = False) -> bool:
if service_name not in self._specs:
return False
self.finally_rm(service_name)
return True
- self.spec_deleted[service_name] = datetime_now()
+ self.spec_deleted[service_name] = (datetime_now(), force_delete_data)
self.save(self._specs[service_name], update_create=False)
return True
else:
size = spec.placement.get_target_count(self.cache.get_schedulable_hosts())
+ deleted_ts = self.spec_store.spec_deleted.get(nm)
svc_desc = orchestrator.ServiceDescription(
spec=spec,
size=size,
running=0,
events=self.events.get_for_service(spec.service_name()),
created=self.spec_store.spec_created[nm],
- deleted=self.spec_store.spec_deleted.get(nm, None),
+ deleted=deleted_ts[0] if deleted_ts else None,
virtual_ip=spec.get_virtual_ip(),
ports=spec.get_port_start(),
)
return msg
@handle_orch_error
- def remove_daemons(self, names):
- # type: (List[str]) -> List[str]
+ def remove_daemons(self,
+ names: List[str],
+ force_delete_data: bool = False) -> List[str]:
+ """
+ Remove specific daemon(s).
+
+ :param names: daemon names to remove
+ :param force: skip safety checks (PRECIOUS DATA warning)
+ :param force_delete_data: if True, request that cephadm delete the
+ daemon data instead of moving it under
+ <fsid>/removed/.
+ """
args = []
for host, dm in self.cache.daemons.items():
for name in names:
if name in dm:
- args.append((name, host))
+ args.append((name, host, force_delete_data))
if not args:
raise OrchestratorError('Unable to find daemon(s) %s' % (names), errno=errno.EINVAL)
self.log.info('Remove daemons %s' % ' '.join([a[0] for a in args]))
return self._remove_daemons(args)
@handle_orch_error
- def remove_service(self, service_name: str, force: bool = False) -> str:
- self.log.info('Remove service %s' % service_name)
+ def remove_service(
+ self,
+ service_name: str,
+ force: bool = False,
+ force_delete_data: bool = False
+ ) -> str:
+ """
+ Remove a service.
+
+ :param service_name: service to remove
+ :param force: skip safety checks (e.g., leftover OSDs)
+ :param force_delete_data: intent to delete backing daemon data instead
+ of moving it under <fsid>/removed/.
+ (actual effect depends on lower layers)
+ """
+ self.log.info(
+ 'Remove service %s (force=%s, force_delete_data=%s)' %
+ (service_name, force, force_delete_data)
+ )
self._trigger_preview_refresh(service_name=service_name)
if service_name in self.spec_store:
if self.spec_store[service_name].spec.service_type in ('mon', 'mgr'):
- return f'Unable to remove {service_name} service.\n' \
- f'Note, you might want to mark the {service_name} service as "unmanaged"'
+ return (
+ f'Unable to remove {service_name} service.\n'
+ f'Note, you might want to mark the {service_name} '
+ f'service as "unmanaged"'
+ )
else:
- return f"Invalid service '{service_name}'. Use 'ceph orch ls' to list available services.\n"
+ return (
+ f"Invalid service '{service_name}'. Use 'ceph orch ls' to "
+ f"list available services.\n"
+ )
# Report list of affected OSDs?
if not force and service_name.startswith('osd.'):
for h, ls in osds_msg.items():
msg += f'\thost {h}: {" ".join([f"osd.{id}" for id in ls])}'
raise OrchestratorError(
- f'If {service_name} is removed then the following OSDs will remain, --force to proceed anyway\n{msg}')
+ f'If {service_name} is removed then the following OSDs '
+ f'will remain, --force to proceed anyway\n{msg}'
+ )
- found = self.spec_store.rm(service_name)
+ found = self.spec_store.rm(service_name, force_delete_data)
if found and service_name.startswith('osd.'):
self.spec_store.finally_rm(service_name)
self._kick_serve_loop()
return previews_for_specs
@forall_hosts
- def _remove_daemons(self, name: str, host: str) -> str:
- return CephadmServe(self)._remove_daemon(name, host)
+ def _remove_daemons(self, name: str, host: str, force_delete_data: bool = False) -> str:
+ # pass force_delete_data by keyword: third positional arg is no_post_remove
+ return CephadmServe(self)._remove_daemon(
+ name, host, force_delete_data=force_delete_data)
def _check_pool_exists(self, pool: str, service_name: str) -> None:
logger.info(f'Checking pool "{pool}" exists for service {service_name}')
if not spec and dd.daemon_type not in ['mon', 'mgr', 'osd']:
# (mon and mgr specs should always exist; osds aren't matched
# to a service spec)
+ force_delete_data = False
+ if dd.service_name() in self.mgr.spec_store.spec_deleted:
+ _, force_delete_data = self.mgr.spec_store.spec_deleted[dd.service_name()]
self.log.info('Removing orphan daemon %s...' % dd.name())
- self._remove_daemon(dd.name(), dd.hostname)
-
- # ignore unmanaged services
- if spec and spec.unmanaged:
+ self._remove_daemon(dd.name(), dd.hostname, force_delete_data=force_delete_data)
+ # This daemon was removed from cache; skip any additional checks
+ # in this iteration to avoid looking up stale daemon entries.
continue
- # ignore daemons for deleted services
- if dd.service_name() in self.mgr.spec_store.spec_deleted:
+ if spec and spec.unmanaged:
continue
if dd.daemon_type == 'agent':
ic_params.append(ic.to_json(flatten_args=True))
return ic_meta
- def _remove_daemon(self, name: str, host: str, no_post_remove: bool = False) -> str:
+ def _remove_daemon(self, name: str, host: str, no_post_remove: bool = False, force_delete_data: bool = False) -> str:
"""
Remove a daemon
"""
service_registry.get_service(daemon_type_to_service(daemon_type)).pre_remove(daemon)
# NOTE: we are passing the 'force' flag here, which means
# we can delete a mon instances data.
+ args = ['--name', name, '--force']
+ if force_delete_data:
+ args.append('--force-delete-data')
if dd.ports:
- args = ['--name', name, '--force', '--tcp-ports', ' '.join(map(str, dd.ports))]
- else:
- args = ['--name', name, '--force']
+ args.extend(['--tcp-ports', ' '.join(map(str, dd.ports))])
self.log.info('Removing daemon %s from %s -- ports %s' % (name, host, dd.ports))
with self.mgr.async_timeout_handler(host, f'cephadm rm-daemon (daemon {name})'):
out = wait(cephadm_module, c)
assert out == ["Removed rgw.myrgw.myhost.myid from host 'test'"]
+ @pytest.mark.parametrize(
+ "daemon_name",
+ ["osd.424242", "prometheus.force_delete_data_regression"]
+ )
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_remove_daemon_force_delete_data_passes_cephadm_flag(self, _run_cephadm, cephadm_module, daemon_name):
+ """Regression: force_delete_data must be passed as a keyword to _remove_daemon.
+
+ Otherwise the third positional arg is bound to no_post_remove and cephadm
+ never receives --force-delete-data (see _remove_daemon parameter order).
+ """
+ ls_json = json.dumps([
+ dict(
+ name=daemon_name,
+ style='cephadm',
+ fsid='fsid',
+ container_id='container_id',
+ version='version',
+ state='running',
+ )
+ ])
+
+ async def side_effect(host, entity, command, args, **kwargs):
+ if command == 'ls':
+ return [ls_json], '', 0
+ return ['{}'], '', 0
+
+ _run_cephadm.side_effect = side_effect
+
+ with with_host(cephadm_module, 'test'):
+ CephadmServe(cephadm_module)._refresh_host_daemons('test')
+ c = cephadm_module.list_daemons()
+ wait(cephadm_module, c)
+ c = cephadm_module.remove_daemons([daemon_name], force_delete_data=True)
+ wait(cephadm_module, c)
+
+ rm_daemon_calls = [
+ call for call in _run_cephadm.call_args_list
+ if len(call[0]) >= 4 and call[0][2] == 'rm-daemon'
+ ]
+ assert len(rm_daemon_calls) == 1
+ rm_args = rm_daemon_calls[0][0][3]
+ assert '--force-delete-data' in rm_args
+
@mock.patch("cephadm.serve.CephadmServe._run_cephadm")
def test_remove_duplicate_osds(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
_run_cephadm.side_effect = async_side_effect(('{}', '', 0))
"""
raise NotImplementedError()
- def remove_daemons(self, names: List[str]) -> OrchResult[List[str]]:
+ def remove_daemons(self, names: List[str], force_delete_data: bool = False) -> OrchResult[List[str]]:
"""
Remove specific daemon(s).
"""
raise NotImplementedError()
- def remove_service(self, service_name: str, force: bool = False) -> OrchResult[str]:
+ def remove_service(self, service_name: str, force: bool = False, force_delete_data: bool = False) -> OrchResult[str]:
"""
Remove a service (a collection of daemons).
@OrchestratorCLICommand.Write('orch daemon rm')
def _daemon_rm(self,
names: List[str],
- force: Optional[bool] = False) -> HandleCommandResult:
- """Remove specific daemon(s)"""
+ force: bool = False,
+ force_delete_data: bool = False) -> HandleCommandResult:
+ """
+ Remove specific daemon(s).
+
+ When used with --force-delete-data, data for certain daemon types
+ (mon, osd, prometheus) will be deleted instead of being moved to
+ <fsid>/removed/.
+ """
for name in names:
if '.' not in name:
- return HandleCommandResult(stderr=f"{name} is not a valid daemon name", retval=-errno.EINVAL)
- (daemon_type) = name.split('.')[0]
+ return HandleCommandResult(
+ stderr=f"{name} is not a valid daemon name",
+ retval=-errno.EINVAL
+ )
+
+ daemon_type = name.split('.')[0]
+
if not force and daemon_type in ['osd', 'mon', 'prometheus']:
- return HandleCommandResult(stderr=f"must pass --force to REMOVE daemon with potentially PRECIOUS DATA for {name}", retval=-errno.EPERM)
- completion = self.remove_daemons(names)
+ return HandleCommandResult(
+ stderr=f"must pass --force to remove daemon with potentially precious data for {name}",
+ retval=-errno.EPERM
+ )
+
+ if force_delete_data and not force:
+ # extra safety: don’t allow delete-data without force
+ return HandleCommandResult(
+ stderr="--force-delete-data requires --force",
+ retval=-errno.EPERM
+ )
+
+ completion = self.remove_daemons(
+ names,
+ force_delete_data=force_delete_data,
+ )
return completion_to_result(completion)
@OrchestratorCLICommand.Write('orch rm')
def _service_rm(self,
service_name: str,
- force: bool = False) -> HandleCommandResult:
- """Remove a service"""
+ force: bool = False,
+ force_delete_data: bool = False) -> HandleCommandResult:
+ """
+ Remove a service.
+
+ When used with --force-delete-data, data for stateful daemons belonging
+ to this service (e.g. mon, osd, prometheus) will be deleted instead of
+ being moved to <fsid>/removed/.
+ """
if service_name in ['mon', 'mgr'] and not force:
raise OrchestratorError('The mon and mgr services cannot be removed')
- completion = self.remove_service(service_name, force=force)
+
+ if force_delete_data and not force:
+ # same safety rule as for daemon rm
+ raise OrchestratorError('--force-delete-data requires --force')
+
+ completion = self.remove_service(
+ service_name,
+ force=force,
+ force_delete_data=force_delete_data,
+ )
raise_if_exception(completion)
return HandleCommandResult(stdout=completion.result_str())