from ceph.deployment.drive_group import DeviceSelection
from ceph.utils import str_to_datetime, datetime_to_str, datetime_now
from ceph.cryptotools.select import choose_crypto_caller
-from cephadm.serve import CephadmServe, REQUIRES_POST_ACTIONS
+from cephadm.serve import CephadmServe
from cephadm.services.cephadmservice import CephadmDaemonDeploySpec
from cephadm.http_server import CephadmHttpServer
from cephadm.agent import CephadmAgentHelpers
'unknown': DaemonDescriptionStatus.error,
}[d['state']]
+ cached_dd = None
+ try:
+ cached_dd = self.cache.get_daemon(d['name'], host)
+ except OrchestratorError:
+ self.log.debug(f'Could not find daemon {d["name"]} in cache')
+
sd = orchestrator.DaemonDescription(
daemon_type=daemon_type,
daemon_id='.'.join(d['name'].split('.')[1:]),
rank_generation=rank_generation,
extra_container_args=d.get('extra_container_args'),
extra_entrypoint_args=d.get('extra_entrypoint_args'),
+ pending_daemon_config=cached_dd.pending_daemon_config if cached_dd else False,
+ user_stopped=cached_dd.user_stopped if cached_dd else False,
)
- if daemon_type in REQUIRES_POST_ACTIONS:
- # If post action is required for daemon, then restore value of pending_daemon_config
- try:
- cached_dd = self.cache.get_daemon(sd.name(), host)
- sd.update_pending_daemon_config(cached_dd.pending_daemon_config)
- except orchestrator.OrchestratorError:
- pass
-
dm[sd.name()] = sd
self.log.debug('Refreshed host %s daemons (%d)' % (host, len(dm)))
self.cache.update_host_daemons(host, dm)
def offline_hosts_remove(self, host: str) -> None:
if host in self.offline_hosts:
self.offline_hosts.remove(host)
+ self._invalidate_all_host_metadata_and_kick_serve(host)
def update_failed_daemon_health_check(self) -> None:
failed_daemons = []
out, err, code = self.wait_async(CephadmServe(self)._run_cephadm(
daemon_spec.host, name, 'unit',
['--name', name, a]))
- except Exception:
- self.log.exception(f'`{daemon_spec.host}: cephadm unit {name} {a}` failed')
+ except Exception as exp:
+ if a == 'reset-failed' and daemon_spec.daemon_type in ['nfs'] and 'not loaded' in str(exp):
+ # Don't log exception if reset-failed fails because the unit is not loaded
+ pass
+ else:
+ self.log.exception(f'`{daemon_spec.host}: cephadm unit {name} {a}` failed')
self.cache.invalidate_host_daemons(daemon_spec.host)
msg = "{} {} from host '{}'".format(action, name, daemon_spec.host)
self.events.for_daemon(name, 'INFO', msg)
d = self.cache.get_daemon(daemon_name)
assert d.daemon_type is not None
assert d.daemon_id is not None
+ assert d.hostname
if (action == 'redeploy' or action == 'restart') and self.daemon_is_self(d.daemon_type, d.daemon_id) \
and not self.mgr_service.mgr_map_has_standby():
f'key rotation not supported for {d.daemon_type}'
)
+ # Track user-initiated stop/start actions
+ if action == 'stop':
+ d.user_stopped = True
+ self.cache.update_host_daemons(d.hostname, {d.name(): d})
+ elif action in ['start', 'restart']:
+ d.user_stopped = False
+ self.cache.update_host_daemons(d.hostname, {d.name(): d})
+
self._daemon_action_set_image(action, image, d.daemon_type, d.daemon_id)
self.log.info(f'Schedule {action} daemon {daemon_name}')
logger = logging.getLogger(__name__)
REQUIRES_POST_ACTIONS = ['grafana', 'iscsi', 'prometheus', 'alertmanager', 'rgw', 'nvmeof', 'mgmt-gateway']
+DISABLED_SERVICES = ['nfs']
WHICH = ssh.RemoteExecutable('which')
CEPHADM_EXE = ssh.RemoteExecutable('/usr/bin/cephadm')
dd.daemon_type in CEPH_TYPES:
self.log.info('Reconfiguring %s (extra config changed)...' % dd.name())
action = 'reconfig'
+ elif dd.daemon_type in DISABLED_SERVICES:
+ if dd.status == 0 and not dd.user_stopped:
+ self.log.debug(f'Starting daemon {dd.name()}')
+ action = 'start'
if action:
if self.mgr.cache.get_scheduled_daemon_action(dd.hostname, dd.name()) == 'redeploy' \
rank_generation: Optional[int] = None,
extra_container_args: Optional[GeneralArgList] = None,
extra_entrypoint_args: Optional[GeneralArgList] = None,
- pending_daemon_config: bool = False
+ pending_daemon_config: bool = False,
+ user_stopped: bool = False
) -> None:
#: Host is at the same granularity as InventoryHost
self.extra_entrypoint_args = ArgumentSpec.from_general_args(
extra_entrypoint_args)
self.pending_daemon_config = pending_daemon_config
+ self.user_stopped = user_stopped
def __setattr__(self, name: str, value: Any) -> None:
if value is not None and name in ('extra_container_args', 'extra_entrypoint_args'):
out['rank_generation'] = self.rank_generation
out['systemd_unit'] = self.systemd_unit
out['pending_daemon_config'] = self.pending_daemon_config
+ out['user_stopped'] = self.user_stopped
for k in ['last_refresh', 'created', 'started', 'last_deployed',
'last_configured']:
out['ip'] = self.ip
out['systemd_unit'] = self.systemd_unit
out['pending_daemon_config'] = self.pending_daemon_config
+ out['user_stopped'] = self.user_stopped
for k in ['last_refresh', 'created', 'started', 'last_deployed',
'last_configured']: