from ceph.deployment.service_spec import ServiceSpec
from orchestrator._interface import DaemonDescription
from orchestrator import OrchestratorValidationError
+from .utils import RESCHEDULE_FROM_OFFLINE_HOSTS_TYPES
logger = logging.getLogger(__name__)
T = TypeVar('T')
# get candidate hosts based on [hosts, label, host_pattern]
candidates = self.get_candidates() # type: List[DaemonPlacement]
+ if self.primary_daemon_type in RESCHEDULE_FROM_OFFLINE_HOSTS_TYPES:
+ # remove unreachable hosts that are not in maintenance so daemons
+ # on these hosts will be rescheduled
+ candidates = self.remove_non_maintenance_unreachable_candidates(candidates)
def expand_candidates(ls: List[DaemonPlacement], num: int) -> List[DaemonPlacement]:
r = []
final = sorted(ls)
random.Random(seed).shuffle(final)
return ls
+
+ def remove_non_maintenance_unreachable_candidates(self, candidates: List[DaemonPlacement]) -> List[DaemonPlacement]:
+ in_maintenance: Dict[str, bool] = {}
+ for h in self.hosts:
+ if h.status.lower() == 'maintenance':
+ in_maintenance[h.hostname] = True
+ continue
+ in_maintenance[h.hostname] = False
+ unreachable_hosts = [h.hostname for h in self.unreachable_hosts]
+ candidates = [c for c in candidates if c.hostname not in unreachable_hosts or in_maintenance[c.hostname]]
+ return candidates
).place()
assert sorted([h.hostname for h in to_add]) in expected_add
assert sorted([h.name() for h in to_remove]) in expected_remove
+
+
+class RescheduleFromOfflineTest(NamedTuple):
+ service_type: str
+ placement: PlacementSpec
+ hosts: List[str]
+ maintenance_hosts: List[str]
+ offline_hosts: List[str]
+ daemons: List[DaemonDescription]
+ expected_add: List[List[str]]
+ expected_remove: List[List[str]]
+
+
+@pytest.mark.parametrize("service_type,placement,hosts,maintenance_hosts,offline_hosts,daemons,expected_add,expected_remove",
+ [
+ RescheduleFromOfflineTest(
+ 'nfs',
+ PlacementSpec(count=2),
+ 'host1 host2 host3'.split(),
+ [],
+ ['host2'],
+ [
+ DaemonDescription('nfs', 'a', 'host1'),
+ DaemonDescription('nfs', 'b', 'host2'),
+ ],
+ [['host3']],
+ [[]],
+ ),
+ RescheduleFromOfflineTest(
+ 'nfs',
+ PlacementSpec(count=2),
+ 'host1 host2 host3'.split(),
+ ['host2'],
+ [],
+ [
+ DaemonDescription('nfs', 'a', 'host1'),
+ DaemonDescription('nfs', 'b', 'host2'),
+ ],
+ [[]],
+ [[]],
+ ),
+ RescheduleFromOfflineTest(
+ 'mon',
+ PlacementSpec(count=2),
+ 'host1 host2 host3'.split(),
+ [],
+ ['host2'],
+ [
+ DaemonDescription('mon', 'a', 'host1'),
+ DaemonDescription('mon', 'b', 'host2'),
+ ],
+ [[]],
+ [[]],
+ ),
+ ])
+def test_remove_from_offline(service_type, placement, hosts, maintenance_hosts, offline_hosts, daemons, expected_add, expected_remove):
+
+ spec = ServiceSpec(service_type=service_type,
+ service_id='test',
+ placement=placement)
+
+ host_specs = [HostSpec(h) for h in hosts]
+ for h in host_specs:
+ if h.hostname in offline_hosts:
+ h.status = 'offline'
+ if h.hostname in maintenance_hosts:
+ h.status = 'maintenance'
+
+ hosts, to_add, to_remove = HostAssignment(
+ spec=spec,
+ hosts=host_specs,
+ unreachable_hosts=[h for h in host_specs if h.status],
+ daemons=daemons,
+ ).place()
+ assert sorted([h.hostname for h in to_add]) in expected_add
+ assert sorted([h.name() for h in to_remove]) in expected_remove
CEPH_TYPES = ['mgr', 'mon', 'crash', 'osd', 'mds', 'rgw', 'rbd-mirror', 'cephfs-mirror']
GATEWAY_TYPES = ['iscsi', 'nfs']
MONITORING_STACK_TYPES = ['node-exporter', 'prometheus', 'alertmanager', 'grafana', 'loki', 'promtail']
+RESCHEDULE_FROM_OFFLINE_HOSTS_TYPES = ['nfs']
CEPH_UPGRADE_ORDER = CEPH_TYPES + GATEWAY_TYPES + MONITORING_STACK_TYPES