]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cephadm: convert networks from set to list + don't reset con on down agent hosts
authorAdam King <adking@redhat.com>
Mon, 30 Aug 2021 17:39:56 +0000 (13:39 -0400)
committerAdam King <adking@redhat.com>
Fri, 24 Sep 2021 11:23:51 +0000 (07:23 -0400)
networks needs to be list so they can be encoded in  a json string
resetting con on the hosts where agent isn't reporting (possibly offline hosts)

Signed-off-by: Adam King <adking@redhat.com>
src/cephadm/cephadm
src/pybind/mgr/cephadm/agent.py
src/pybind/mgr/cephadm/serve.py
src/pybind/mgr/cephadm/tests/fixtures.py

index b7af2573c51e6d7552bf34c3013fe31bb877c85f..6aa0bd0cd335134bfd0adb3c81315ab5786b0ada 100755 (executable)
@@ -40,7 +40,7 @@ from configparser import ConfigParser
 from functools import wraps
 from glob import glob
 from io import StringIO
-from threading import Thread, RLock
+from threading import Thread, RLock, Event
 from urllib.error import HTTPError
 from urllib.request import urlopen, Request
 from pathlib import Path
@@ -3538,7 +3538,7 @@ class CephadmAgent():
         self.listener_key_path = os.path.join(self.daemon_dir, 'listener.key')
         self.listener_port = ''
         self.ack = -1
-        self.event = threading.Event()
+        self.event = Event()
         self.mgr_listener = MgrListener(self)
         self.device_enhanced_scan = False
 
@@ -3651,16 +3651,23 @@ WantedBy=ceph-{fsid}.target
 
         while not self.stop:
             ack = self.ack
-
             try:
                 volume = self._ceph_volume(self.device_enhanced_scan)
             except Exception as e:
                 logger.error(f'Failed to get ceph-volume metadata: {e}')
                 volume = ''
 
+            # part of the networks info is returned as a set which is not JSON
+            # serializable. The set must be converted to a list
+            networks = list_networks(self.ctx)
+            networks_list = {}
+            for key in networks.keys():
+                for k, v in networks[key].items():
+                    networks_list[key] = {k: list(v)}
+
             data = json.dumps({'host': self.host,
                               'ls': list_daemons(self.ctx),
-                               'networks': list_networks(self.ctx),
+                               'networks': networks_list,
                                'facts': HostFacts(self.ctx).dump(),
                                'volume': volume,
                                'ack': str(ack),
index abc56a1984c7ddd3f2b6f882334b9213a3e3ca5c..cdb42d4bc7293bbe6f526dccc9fc9de19c8864dd 100644 (file)
@@ -171,7 +171,6 @@ class HostData:
             if 'facts' in data and data['facts']:
                 self.mgr.cache.update_host_facts(host, json.loads(data['facts']))
             if 'volume' in data and data['volume']:
-                self.mgr.log.error(data['volume'])
                 ret = Devices.from_json(json.loads(data['volume']))
                 self.mgr.cache.update_host_devices(host, ret.devices)
 
index 7b1b432e431d3c0e77df9fbc8b3554e050326183..e11a0e7e8e2fab74b74be88d60d59d99ca65d2e9 100644 (file)
@@ -3,7 +3,7 @@ import json
 import logging
 import uuid
 from collections import defaultdict
-from typing import TYPE_CHECKING, Optional, List, cast, Dict, Any, Union, Tuple, Iterator, Set
+from typing import TYPE_CHECKING, Optional, List, cast, Dict, Any, Union, Tuple, Set
 
 from ceph.deployment import inventory
 from ceph.deployment.drive_group import DriveGroupSpec
@@ -269,7 +269,10 @@ class CephadmServe:
                 if host in self.mgr.offline_hosts:
                     return
                 self.mgr.offline_hosts.add(host)
-                self.mgr._reset_con(host)
+                # In case host is actually offline, it's best to reset the connection to avoid
+                # a long timeout trying to use an existing connection to an offline host
+                # REVISIT AFTER https://github.com/ceph/ceph/pull/42919
+                # self.mgr.ssh._reset_con(host)
                 agents_down.append(host)
                 # try to schedule redeploy of agent in case it is individually down
                 try:
@@ -280,7 +283,7 @@ class CephadmServe:
                     self.log.debug(
                         f'Failed to find entry for agent deployed on host {host}. Agent possibly never deployed: {e}')
                 return
-            else:
+            elif self.mgr.use_agent:
                 self.mgr.offline_hosts_remove(host)
 
             if self.mgr.cache.host_needs_check(host):
index bc0fdeeba244e76cf21768eb7f1db4d23dba105a..7fe868392b4dedea12ab3b9f5023ae232353da28 100644 (file)
@@ -49,6 +49,7 @@ class MockEventLoopThread:
             loop.close()
             asyncio.set_event_loop(None)
 
+
 def receive_agent_metadata(m: CephadmOrchestrator, host: str, ops: List[str] = None) -> None:
     to_update: Dict[str, Callable[[str, Any], None]] = {
         'ls': m._process_ls_output,
@@ -69,6 +70,7 @@ def receive_agent_metadata_all_hosts(m: CephadmOrchestrator) -> None:
     for host in m.cache.get_hosts():
         receive_agent_metadata(m, host)
 
+
 @contextmanager
 def with_cephadm_module(module_options=None, store=None):
     """