if by_name and '.' not in daemon_filter:
logger.warning(f'Trying to get container info using invalid daemon name {daemon_filter}')
return None
- daemons = list_daemons(ctx, detail=False)
- matching_daemons = [d for d in daemons if daemon_name_or_type(d) == daemon_filter and d['fsid'] == ctx.fsid]
+ if by_name:
+ matching_daemons = _get_matching_daemons_by_name(ctx, daemon_filter)
+ else:
+ # NOTE: we are passing detail=False here as in this case where we are not
+ # doing it by_name, we really only need the names of the daemons. Additionally,
+ # when not doing it by_name, we are getting the info for all daemons on the
+ # host, and doing this with detail=True tends to be slow.
+ daemons = list_daemons(ctx, detail=False)
+ matching_daemons = [d for d in daemons if daemon_name_or_type(d) == daemon_filter and d['fsid'] == ctx.fsid]
if matching_daemons:
- d_type, d_id = matching_daemons[0]['name'].split('.', 1)
- out, _, code = get_container_stats(ctx, ctx.container_engine.path, ctx.fsid, d_type, d_id)
- if not code:
- (container_id, image_name, image_id, start, version) = out.strip().split(',')
- return ContainerInfo(container_id, image_name, image_id, start, version)
+ if (
+ by_name
+ and 'state' in matching_daemons[0]
+ and matching_daemons[0]['state'] != 'running'
+ and 'container_image_name' in matching_daemons[0]
+ and matching_daemons[0]['container_image_name']
+ ):
+ # this daemon contianer is not running so the regular `podman/docker inspect` on the
+ # container will not help us. If we have the image name from the list_daemons output
+ # we can try that.
+ image_name = matching_daemons[0]['container_image_name']
+ out, _, code = get_container_stats_by_image_name(ctx, ctx.container_engine.path, image_name)
+ if not code:
+ # keep in mind, the daemon container is not running, so no container id here
+ (image_id, start, version) = out.strip().split(',')
+ return ContainerInfo(
+ container_id='',
+ image_name=image_name,
+ image_id=image_id,
+ start=start,
+ version=version)
+ else:
+ d_type, d_id = matching_daemons[0]['name'].split('.', 1)
+ out, _, code = get_container_stats(ctx, ctx.container_engine.path, ctx.fsid, d_type, d_id)
+ if not code:
+ (container_id, image_name, image_id, start, version) = out.strip().split(',')
+ return ContainerInfo(container_id, image_name, image_id, start, version)
return None
+def _get_matching_daemons_by_name(ctx: CephadmContext, daemon_filter: str) -> List[Dict[str, str]]:
+ # NOTE: we are not passing detail=False to this list_daemons call
+ # as we want the container_image name in the case where we are
+ # doing this by name and this is skipped when detail=False
+ matching_daemons = list_daemons(ctx, daemon_name=daemon_filter)
+ if len(matching_daemons) > 1:
+ logger.warning(f'Found multiple daemons sharing same name: {daemon_filter}')
+ # Take the first daemon we find that is actually running, or just the
+ # first in the list if none are running
+ matched_daemon = None
+ for d in matching_daemons:
+ if 'state' in d and d['state'] == 'running':
+ matched_daemon = d
+ break
+ if not matched_daemon:
+ matched_daemon = matching_daemons[0]
+ matching_daemons = [matched_daemon]
+ return matching_daemons
+
+
def infer_local_ceph_image(ctx: CephadmContext, container_path: str) -> Optional[str]:
"""
Infer the local ceph image based on the following priority criteria:
def get_container_stats(ctx: CephadmContext, container_path: str, fsid: str, daemon_type: str, daemon_id: str) -> Tuple[str, str, int]:
+ """returns container id, image name, image id, created time, and ceph version if available"""
c = CephContainer.for_daemon(
ctx, DaemonIdentity(fsid, daemon_type, daemon_id), 'bash'
)
break
return out, err, code
+
+def get_container_stats_by_image_name(ctx: CephadmContext, container_path: str, image_name: str) -> Tuple[str, str, int]:
+ """returns image id, created time, and ceph version if available"""
+ out, err, code = '', '', -1
+ cmd = [
+ container_path, 'image', 'inspect',
+ '--format', '{{.Id}},{{.Created}},{{index .Config.Labels "io.ceph.version"}}',
+ image_name
+ ]
+ out, err, code = call(ctx, cmd, verbosity=CallVerbosity.QUIET)
+ return out, err, code
+
##################################
# type: ignore
+import copy
import errno
import json
import mock
with mock.patch('cephadm.get_container_stats', return_value=container_stats):
assert _cephadm.get_container_info(ctx, daemon_filter, by_name) == output
+ @mock.patch('cephadm.list_daemons')
+ @mock.patch('cephadm.get_container_stats')
+ @mock.patch('cephadm.get_container_stats_by_image_name')
+ def test_get_container_info_daemon_down(self, _get_stats_by_name, _get_stats, _list_daemons):
+ ctx = _cephadm.CephadmContext()
+ ctx.fsid = '5e39c134-dfc5-11ee-a344-5254000ee071'
+ ctx.container_engine = mock_podman()
+
+ # list_daemons output taken from cephadm ls of an
+ # OSD that was stopped, with subsititutions
+ # true -> True
+ # null -> None
+ down_osd_json = {
+ "style": "cephadm:v1",
+ "name": "osd.2",
+ "fsid": "5e39c134-dfc5-11ee-a344-5254000ee071",
+ "systemd_unit": "ceph-5e39c134-dfc5-11ee-a344-5254000ee071@osd.2",
+ "enabled": True,
+ "state": "stopped",
+ "service_name": "osd.foo",
+ "ports": [],
+ "ip": None,
+ "deployed_by": [
+ "quay.io/adk3798/ceph@sha256:7da0af22ce45aac97dff00125af590506d8e36ab97d78e5175149643562bfb0b"
+ ],
+ "rank": None,
+ "rank_generation": None,
+ "extra_container_args": None,
+ "extra_entrypoint_args": None,
+ "memory_request": None,
+ "memory_limit": None,
+ "container_id": None,
+ "container_image_name": "quay.io/adk3798/ceph@sha256:7da0af22ce45aac97dff00125af590506d8e36ab97d78e5175149643562bfb0b",
+ "container_image_id": None,
+ "container_image_digests": None,
+ "version": None,
+ "started": None,
+ "created": "2024-03-11T17:17:49.533757Z",
+ "deployed": "2024-03-11T17:37:23.520061Z",
+ "configured": "2024-03-11T17:37:28.494075Z"
+ }
+ _list_daemons.return_value = [down_osd_json]
+ _get_stats_by_name.return_value = (('a03c201ff4080204949932f367545cd381c4acee0d48dbc15f2eac1e35f22318,'
+ '2023-11-28 21:34:38.045413692 +0000 UTC,'),
+ '', 0)
+
+ expected_container_info = _cephadm.ContainerInfo(
+ container_id='',
+ image_name='quay.io/adk3798/ceph@sha256:7da0af22ce45aac97dff00125af590506d8e36ab97d78e5175149643562bfb0b',
+ image_id='a03c201ff4080204949932f367545cd381c4acee0d48dbc15f2eac1e35f22318',
+ start='2023-11-28 21:34:38.045413692 +0000 UTC',
+ version='')
+
+ assert _cephadm.get_container_info(ctx, 'osd.2', by_name=True) == expected_container_info
+ assert not _get_stats.called, 'only get_container_stats_by_image_name should have been called'
+
+ # If there is one down and one up daemon of the same name, it should use the up one
+ # In this case, we would be using the running container to get the image, so
+ # all the info will come from the return value of get_container_stats, rather
+ # than it partially being taken from the list_daemons output
+ up_osd_json = copy.deepcopy(down_osd_json)
+ up_osd_json['state'] = 'running'
+ _get_stats.return_value = (('container_id,image_name,image_id,the_past,'), '', 0)
+ _list_daemons.return_value = [down_osd_json, up_osd_json]
+
+ expected_container_info = _cephadm.ContainerInfo(
+ container_id='container_id',
+ image_name='image_name',
+ image_id='image_id',
+ start='the_past',
+ version='')
+
+ assert _cephadm.get_container_info(ctx, 'osd.2', by_name=True) == expected_container_info
+
def test_should_log_to_journald(self):
from cephadmlib import context_getters