From c35375948e5f778183117d88ab0b9b75780565ae Mon Sep 17 00:00:00 2001 From: Adam King Date: Mon, 30 Aug 2021 13:39:56 -0400 Subject: [PATCH] mgr/cephadm: convert networks from set to list + don't reset con on down agent hosts networks needs to be list so they can be encoded in a json string resetting con on the hosts where agent isn't reporting (possibly offline hosts) Signed-off-by: Adam King --- src/cephadm/cephadm | 15 +++++++++++---- src/pybind/mgr/cephadm/agent.py | 1 - src/pybind/mgr/cephadm/serve.py | 9 ++++++--- src/pybind/mgr/cephadm/tests/fixtures.py | 2 ++ 4 files changed, 19 insertions(+), 8 deletions(-) diff --git a/src/cephadm/cephadm b/src/cephadm/cephadm index b7af2573c51..6aa0bd0cd33 100755 --- a/src/cephadm/cephadm +++ b/src/cephadm/cephadm @@ -40,7 +40,7 @@ from configparser import ConfigParser from functools import wraps from glob import glob from io import StringIO -from threading import Thread, RLock +from threading import Thread, RLock, Event from urllib.error import HTTPError from urllib.request import urlopen, Request from pathlib import Path @@ -3538,7 +3538,7 @@ class CephadmAgent(): self.listener_key_path = os.path.join(self.daemon_dir, 'listener.key') self.listener_port = '' self.ack = -1 - self.event = threading.Event() + self.event = Event() self.mgr_listener = MgrListener(self) self.device_enhanced_scan = False @@ -3651,16 +3651,23 @@ WantedBy=ceph-{fsid}.target while not self.stop: ack = self.ack - try: volume = self._ceph_volume(self.device_enhanced_scan) except Exception as e: logger.error(f'Failed to get ceph-volume metadata: {e}') volume = '' + # part of the networks info is returned as a set which is not JSON + # serializable. The set must be converted to a list + networks = list_networks(self.ctx) + networks_list = {} + for key in networks.keys(): + for k, v in networks[key].items(): + networks_list[key] = {k: list(v)} + data = json.dumps({'host': self.host, 'ls': list_daemons(self.ctx), - 'networks': list_networks(self.ctx), + 'networks': networks_list, 'facts': HostFacts(self.ctx).dump(), 'volume': volume, 'ack': str(ack), diff --git a/src/pybind/mgr/cephadm/agent.py b/src/pybind/mgr/cephadm/agent.py index abc56a1984c..cdb42d4bc72 100644 --- a/src/pybind/mgr/cephadm/agent.py +++ b/src/pybind/mgr/cephadm/agent.py @@ -171,7 +171,6 @@ class HostData: if 'facts' in data and data['facts']: self.mgr.cache.update_host_facts(host, json.loads(data['facts'])) if 'volume' in data and data['volume']: - self.mgr.log.error(data['volume']) ret = Devices.from_json(json.loads(data['volume'])) self.mgr.cache.update_host_devices(host, ret.devices) diff --git a/src/pybind/mgr/cephadm/serve.py b/src/pybind/mgr/cephadm/serve.py index 7b1b432e431..e11a0e7e8e2 100644 --- a/src/pybind/mgr/cephadm/serve.py +++ b/src/pybind/mgr/cephadm/serve.py @@ -3,7 +3,7 @@ import json import logging import uuid from collections import defaultdict -from typing import TYPE_CHECKING, Optional, List, cast, Dict, Any, Union, Tuple, Iterator, Set +from typing import TYPE_CHECKING, Optional, List, cast, Dict, Any, Union, Tuple, Set from ceph.deployment import inventory from ceph.deployment.drive_group import DriveGroupSpec @@ -269,7 +269,10 @@ class CephadmServe: if host in self.mgr.offline_hosts: return self.mgr.offline_hosts.add(host) - self.mgr._reset_con(host) + # In case host is actually offline, it's best to reset the connection to avoid + # a long timeout trying to use an existing connection to an offline host + # REVISIT AFTER https://github.com/ceph/ceph/pull/42919 + # self.mgr.ssh._reset_con(host) agents_down.append(host) # try to schedule redeploy of agent in case it is individually down try: @@ -280,7 +283,7 @@ class CephadmServe: self.log.debug( f'Failed to find entry for agent deployed on host {host}. Agent possibly never deployed: {e}') return - else: + elif self.mgr.use_agent: self.mgr.offline_hosts_remove(host) if self.mgr.cache.host_needs_check(host): diff --git a/src/pybind/mgr/cephadm/tests/fixtures.py b/src/pybind/mgr/cephadm/tests/fixtures.py index bc0fdeeba24..7fe868392b4 100644 --- a/src/pybind/mgr/cephadm/tests/fixtures.py +++ b/src/pybind/mgr/cephadm/tests/fixtures.py @@ -49,6 +49,7 @@ class MockEventLoopThread: loop.close() asyncio.set_event_loop(None) + def receive_agent_metadata(m: CephadmOrchestrator, host: str, ops: List[str] = None) -> None: to_update: Dict[str, Callable[[str, Any], None]] = { 'ls': m._process_ls_output, @@ -69,6 +70,7 @@ def receive_agent_metadata_all_hosts(m: CephadmOrchestrator) -> None: for host in m.cache.get_hosts(): receive_agent_metadata(m, host) + @contextmanager def with_cephadm_module(module_options=None, store=None): """ -- 2.39.5