# ensure the host lists are in sync
for h in self.inventory.keys():
if h not in self.cache.daemons:
- self.log.debug('adding service item for %s' % h)
self.cache.prime_empty_host(h)
for h in self.cache.get_hosts():
if h not in self.inventory:
self.cache.rm_host(h)
def shutdown(self):
- self.log.info('shutdown')
+ self.log.debug('shutdown')
self._worker_pool.close()
self._worker_pool.join()
self.run = False
self.set_health_checks(self.health_checks)
def _update_upgrade_progress(self, progress):
- self.log.debug('upgrade progress %f' % progress)
if 'progress_id' not in self.upgrade_state:
self.upgrade_state['progress_id'] = str(uuid.uuid4())
self._save_upgrade_state()
out, err, code = self._run_cephadm(
d.hostname, None, 'inspect-image', [],
image=target_name, no_fsid=True, error_ok=True)
- self.log.debug('out %s code %s' % (out, code))
if code or json.loads(''.join(out)).get('image_id') != target_id:
self.log.info('Upgrade: Pulling %s on %s' % (target_name,
d.hostname))
'prefix': 'versions',
})
j = json.loads(out)
- self.log.debug('j %s' % j)
for version, count in j.get(daemon_type, {}).items():
if version != target_version:
self.log.warning(
if section.startswith(daemon_type + '.'):
to_clean.append(section)
if to_clean:
- self.log.info('Upgrade: Cleaning up container_image for %s...' %
- to_clean)
+ self.log.debug('Upgrade: Cleaning up container_image for %s...' %
+ to_clean)
for section in to_clean:
ret, image, err = self.mon_command({
'prefix': 'config rm',
def serve(self):
# type: () -> None
- self.log.info("serve starting")
+ self.log.debug("serve starting")
while self.run:
self._check_hosts()
break
if completion.exception is not None:
self.log.error(str(completion.exception))
- self.log.debug('did _do_upgrade')
else:
self._serve_sleep()
- self.log.info("serve exit")
+ self.log.debug("serve exit")
def config_notify(self):
"""
if len([d for d in existing if d.daemon_id == name]):
if not suffix:
raise orchestrator.OrchestratorValidationError('name %s already in use', name)
- self.log.warning('name %s exists, trying again', name)
+ self.log.debug('name %s exists, trying again', name)
continue
return name
self._ssh_options = ' '.join(ssh_options) # type: Optional[str]
else:
self._ssh_options = None
- self.log.info('ssh_options %s' % ssh_options)
if self.mode == 'root':
self.ssh_user = 'root'
Does nothing, as completions are processed in another thread.
"""
if completions:
- self.log.info("process: completions={0}".format(orchestrator.pretty_print(completions)))
+ self.log.debug("process: completions={0}".format(orchestrator.pretty_print(completions)))
for p in completions:
p.finalize()
if 'CEPHADM_HOST_CHECK_FAILED' in self.health_checks:
for item in self.health_checks['CEPHADM_HOST_CHECK_FAILED']['detail']:
if item.startswith('host %s ' % host):
- self.log.debug('kicking serve thread')
self.event.set()
return 0, '%s (%s) ok' % (host, addr), err
if 'CEPHADM_HOST_CHECK_FAILED' in self.health_checks:
for item in self.health_checks['CEPHADM_HOST_CHECK_FAILED']['detail']:
if item.startswith('host %s ' % host):
- self.log.debug('kicking serve thread')
self.event.set()
return 0, '%s (%s) ok' % (host, addr), err
self.log.debug('Have connection to %s' % host)
return conn_and_r
n = self.ssh_user + '@' + host
- self.log.info("Opening connection to {} with ssh options '{}'".format(
+ self.log.debug("Opening connection to {} with ssh options '{}'".format(
n, self._ssh_options))
conn = remoto.Connection(
n,
if not executable_path:
raise RuntimeError("Executable '{}' not found on host '{}'".format(
executable, conn.hostname))
- self.log.info("Found executable '{}' at path '{}'".format(executable,
+ self.log.debug("Found executable '{}' at path '{}'".format(executable,
executable_path))
return executable_path
if self.mode == 'root':
self.log.debug('args: %s' % (' '.join(final_args)))
- self.log.debug('stdin: %s' % stdin)
+ if stdin:
+ self.log.debug('stdin: %s' % stdin)
script = 'injected_argv = ' + json.dumps(final_args) + '\n'
if stdin:
script += 'injected_stdin = ' + json.dumps(stdin) + '\n'
else:
assert False, 'unsupported mode'
+ self.log.debug('code: %d' % code)
+ if out:
+ self.log.debug('out: %s' % '\n'.join(out))
+ if err:
+ self.log.debug('err: %s' % '\n'.join(err))
if code and not error_ok:
raise RuntimeError(
'cephadm exited with an error code: %d, stderr:%s' % (
"""
r = []
for hostname, info in self.inventory.items():
- self.log.debug('host %s info %s' % (hostname, info))
r.append(orchestrator.HostSpec(
hostname,
addr=info.get('addr', hostname),
dm = {}
for d in ls:
if not d['style'].startswith('cephadm'):
- self.log.debug('ignoring non-cephadm on %s: %s' % (host, d))
continue
if d['fsid'] != self._cluster_fsid:
- self.log.debug('ignoring foreign daemon on %s: %s' % (host, d))
continue
if '.' not in d['name']:
- self.log.debug('ignoring dot-less daemon on %s: %s' % (host, d))
continue
- self.log.debug('including %s %s' % (host, d))
sd = orchestrator.DaemonDescription()
sd.last_refresh = datetime.datetime.utcnow()
sd.daemon_type = d['name'].split('.')[0]
sd.status_desc = 'unknown'
sd.status = None
dm[sd.name()] = sd
- self.log.debug('Refreshed host %s daemons: %s' % (host, dm))
+ self.log.debug('Refreshed host %s daemons (%d)' % (host, len(dm)))
self.cache.update_host_daemons(host, dm)
self.cache.save_host(host)
return None
except Exception as e:
return 'host %s ceph-volume inventory failed: %s' % (host, e)
data = json.loads(''.join(out))
- self.log.debug('Refreshed host %s devices: %s' % (host, data))
+ self.log.debug('Refreshed host %s devices (%d)' % (host, len(data)))
devices = inventory.Devices.from_json(data)
self.cache.update_host_devices(host, devices.devices)
self.cache.save_host(host)
if daemon_id and daemon_id != dd.daemon_id:
continue
result.append(dd)
- self.log.debug('list_daemons result %s' % result)
return trivial_result(result)
def service_action(self, action, service_name):
- self.log.debug('service_action action %s name %s' % (
- action, service_name))
args = []
for host, dm in self.cache.daemons.items():
for name, d in dm.items():
['--name', name, a],
error_ok=True)
self.cache.invalidate_host_daemons(host)
- self.log.debug('_daemon_action code %s out %s' % (code, out))
return "{} {} from host '{}'".format(action, name, host)
def daemon_action(self, action, daemon_type, daemon_id):
host, 'osd', 'ceph-volume',
_cmd,
stdin=j)
- self.log.debug('ceph-volume prepare: %s' % out)
# check result
out, err, code = self._run_cephadm(
'lvm', 'list',
'--format', 'json',
])
- self.log.debug('code %s out %s' % (code, out))
osds_elems = json.loads('\n'.join(out))
fsid = self._cluster_fsid
osd_uuid_map = self.get_osd_uuid_map()
'--name', name,
] + extra_args,
stdin=j)
- self.log.debug('create_daemon code %s out %s' % (code, out))
if not code and host in self.cache.daemons:
# prime cached service state with what we (should have)
# just created
"""
Remove a daemon
"""
- self.log.debug('_remove_daemon %s on %s force=%s' % (name, host, force))
args = ['--name', name]
if force:
args.extend(['--force'])
out, err, code = self._run_cephadm(
host, name, 'rm-daemon', args)
- self.log.debug('_remove_daemon code %s out %s' % (code, out))
if not code:
# remove item from cache
self.cache.rm_daemon(host, name)
"""
Create a new monitor on the given host.
"""
- self.log.info("create_mon({}:{}): starting mon.{}".format(
- host, network, name))
-
# get mon. key
ret, keyring, err = self.mon_command({
'prefix': 'auth get',
"""
Create a new manager instance on a host.
"""
- self.log.info("create_mgr({}, mgr.{}): starting".format(host, mgr_id))
-
# get mgr. key
ret, keyring, err = self.mon_command({
'prefix': 'auth get-or-create',
break
if not host:
raise OrchestratorError('no hosts defined')
- self.log.debug('using host %s' % host)
out, err, code = self._run_cephadm(
host, None, 'pull', [],
image=image_name,