From 45e806676bc9bf85d4947eba9741387cec3ae2fd Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 3 Jun 2021 10:29:00 -0400 Subject: [PATCH] mgr/cephadm/inventory: do not try to resolve current mgr host The CNI configuration may set up a private network for the container, which is mapped to the hostname in /etc/hosts. For example, my test box sets up 10.88.0.0/24 because I was using crio + kubeadm on this host earlier (at least I think that's why): $ sudo podman run --rm --name test123 --entrypoint /bin/bash -it quay.ceph.io/ceph-ci/ceph:master -c "cat /etc/hosts" 127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4 ::1 localhost localhost.localdomain localhost6 localhost6.localdomain6 10.88.0.8 f9e91bf2478f test123 In any case, we should never trust a lookup of our own hostname from inside a container! This isn't quite sufficient, though: if this is a single-host cluster, then we fall back to using get_mgr_ip(). That value may be distorted by the public_network option on the mgr, but we don't have any other good options here, and single-node clusters are unlikely to have complex network configs. Refactor a bit to avoid the try/except nesting. Signed-off-by: Sage Weil --- src/pybind/mgr/cephadm/inventory.py | 43 +++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/src/pybind/mgr/cephadm/inventory.py b/src/pybind/mgr/cephadm/inventory.py index c2ce50db8c86b..d0509b66124ee 100644 --- a/src/pybind/mgr/cephadm/inventory.py +++ b/src/pybind/mgr/cephadm/inventory.py @@ -3,6 +3,7 @@ from copy import copy import ipaddress import json import logging +import socket from typing import TYPE_CHECKING, Dict, List, Iterator, Optional, Any, Tuple, Set, Mapping, cast, \ NamedTuple, Type @@ -32,6 +33,14 @@ class Inventory: def __init__(self, mgr: 'CephadmOrchestrator'): self.mgr = mgr adjusted_addrs = False + + def is_valid_ip(ip: str) -> bool: + try: + ipaddress.ip_address(ip) + return True + except ValueError: + return False + # load inventory i = self.mgr.get_store('inventory') if i: @@ -42,18 +51,30 @@ class Inventory: v['hostname'] = k # convert legacy non-IP addr? - try: - ipaddress.ip_address(v.get('addr')) - except ValueError: + if is_valid_ip(str(v.get('addr'))): + continue + if len(self._inventory) > 1: + if k == socket.gethostname(): + # Never try to resolve our own host! This is + # fraught and can lead to either a loopback + # address (due to podman's futzing with + # /etc/hosts) or a private IP based on the CNI + # configuration. Instead, wait until the mgr + # fails over to another host and let them resolve + # this host. + continue ip = resolve_ip(cast(str, v.get('addr'))) - try: - ipaddress.ip_address(ip) - if not ip.startswith('127.0.'): - self.mgr.log.info(f"inventory: adjusted host {v['hostname']} addr '{v['addr']}' -> '{ip}'") - v['addr'] = ip - adjusted_addrs = True - except ValueError: - pass + else: + # we only have 1 node in the cluster, so we can't + # rely on another host doing the lookup. use the + # IP the mgr binds to. + ip = self.mgr.get_mgr_ip() + if is_valid_ip(ip) and not ip.startswith('127.0.'): + self.mgr.log.info( + f"inventory: adjusted host {v['hostname']} addr '{v['addr']}' -> '{ip}'" + ) + v['addr'] = ip + adjusted_addrs = True if adjusted_addrs: self.save() else: -- 2.39.5