From ef3b125210162f10094f22cd0913c81c54885c49 Mon Sep 17 00:00:00 2001 From: John Mulligan Date: Mon, 15 Jul 2024 15:16:04 -0400 Subject: [PATCH] cephadm: add ctdb support to smb daemon type Signed-off-by: John Mulligan --- src/cephadm/cephadmlib/daemons/smb.py | 211 ++++++++++++++++++++++++-- 1 file changed, 199 insertions(+), 12 deletions(-) diff --git a/src/cephadm/cephadmlib/daemons/smb.py b/src/cephadm/cephadmlib/daemons/smb.py index 46246f4fafb4d..406e9c6964b0c 100644 --- a/src/cephadm/cephadmlib/daemons/smb.py +++ b/src/cephadm/cephadmlib/daemons/smb.py @@ -32,6 +32,11 @@ from ..net_utils import EndPoint logger = logging.getLogger() +# sambacc provided commands we will need (when clustered) +_SCC = '/usr/bin/samba-container' +_NODES_SUBCMD = [_SCC, 'ctdb-list-nodes'] +_MUTEX_SUBCMD = [_SCC, 'ctdb-rados-mutex'] # requires rados uri + class Features(enum.Enum): DOMAIN = 'domain' @@ -48,6 +53,7 @@ class Features(enum.Enum): class Config: + identity: DaemonIdentity instance_id: str source_config: str samba_debug_level: int @@ -60,10 +66,16 @@ class Config: smb_port: int ceph_config_entity: str vhostname: str + # clustering related values + rank: int + rank_generation: int + cluster_meta_uri: str + cluster_lock_uri: str def __init__( self, *, + identity: DaemonIdentity, instance_id: str, source_config: str, domain_member: bool, @@ -76,7 +88,12 @@ class Config: smb_port: int = 0, ceph_config_entity: str = 'client.admin', vhostname: str = '', + rank: int = -1, + rank_generation: int = -1, + cluster_meta_uri: str = '', + cluster_lock_uri: str = '', ) -> None: + self.identity = identity self.instance_id = instance_id self.source_config = source_config self.domain_member = domain_member @@ -89,6 +106,10 @@ class Config: self.smb_port = smb_port self.ceph_config_entity = ceph_config_entity self.vhostname = vhostname + self.rank = rank + self.rank_generation = rank_generation + self.cluster_meta_uri = cluster_meta_uri + self.cluster_lock_uri = cluster_lock_uri def __str__(self) -> str: return ( @@ -101,6 +122,10 @@ class Config: def config_uris(self) -> List[str]: uris = [self.source_config] uris.extend(self.user_sources or []) + if self.clustered: + # When clustered, we inject certain clustering related config vars + # via a config file generated by cephadm (elsewhere in this file) + uris.append('/etc/samba/container/ctdb.json') return uris @@ -128,8 +153,19 @@ class SambaContainerCommon: 'SAMBA_CONTAINER_ID': self.cfg.instance_id, 'SAMBACC_CONFIG': json.dumps(self.cfg.config_uris()), } + # The CTDB support in sambacc project is considered experimental + # and it refuses to run without setting the following environment + # variable. This can be dropped once sambacc no longer needs it, + # possibly after the next sambacc release. + environ['SAMBACC_CTDB'] = 'ctdb-is-experimental' if self.cfg.ceph_config_entity: environ['SAMBACC_CEPH_ID'] = f'name={self.cfg.ceph_config_entity}' + if self.cfg.rank >= 0: + # how the values are known to ceph (for debugging purposes...) + environ['RANK'] = str(self.cfg.rank) + environ['RANK_GENERATION'] = str(self.cfg.rank) + # samba container specific variant + environ['NODE_NUMBER'] = environ['RANK'] return environ def envs_list(self) -> List[str]: @@ -147,12 +183,34 @@ class SambaContainerCommon: return [] +class SambaNetworkedInitContainer(SambaContainerCommon): + """SambaContainerCommon subclass that enables additional networking + params for an init container by default. + NB: By networked we mean needs to use public network resources outside + the ceph cluster. + """ + + def container_args(self) -> List[str]: + cargs = _container_dns_args(self.cfg) + if self.cfg.clustered: + cargs.append('--network=host') + return cargs + + class SMBDContainer(SambaContainerCommon): def name(self) -> str: return 'smbd' def args(self) -> List[str]: - return super().args() + ['run', 'smbd'] + args = super().args() + args.append('run') + if self.cfg.clustered: + auth_kind = 'nsswitch' if self.cfg.domain_member else 'users' + args.append(f'--setup={auth_kind}') + args.append('--setup=smb_ctdb') + args.append('--wait-for=ctdb') + args.append('smbd') + return args def container_args(self) -> List[str]: cargs = [] @@ -167,7 +225,13 @@ class WinbindContainer(SambaContainerCommon): return 'winbindd' def args(self) -> List[str]: - return super().args() + ['run', 'winbindd'] + args = super().args() + args.append('run') + if self.cfg.clustered: + args.append('--setup=smb_ctdb') + args.append('--wait-for=ctdb') + args.append('winbindd') + return args class ConfigInitContainer(SambaContainerCommon): @@ -178,20 +242,22 @@ class ConfigInitContainer(SambaContainerCommon): return super().args() + ['init'] -class MustJoinContainer(SambaContainerCommon): +class MustJoinContainer(SambaNetworkedInitContainer): def name(self) -> str: return 'mustjoin' def args(self) -> List[str]: - args = super().args() + ['must-join'] + args = super().args() + if self.cfg.clustered: + # TODO: not only do we want to only do this on node 0, we only + # want to do it exactly ONCE per cluster even on pnn 0. This needs + # additional work to get that right. + args.append('--skip-if=env:NODE_NUMBER!=0') + args.append('must-join') for join_src in self.cfg.join_sources: args.append(f'-j{join_src}') return args - def container_args(self) -> List[str]: - cargs = _container_dns_args(self.cfg) - return cargs - class ConfigWatchContainer(SambaContainerCommon): def name(self) -> str: @@ -201,6 +267,77 @@ class ConfigWatchContainer(SambaContainerCommon): return super().args() + ['update-config', '--watch'] +class CTDBMigrateInitContainer(SambaContainerCommon): + def name(self) -> str: + return 'ctdbMigrate' + + def args(self) -> List[str]: + # TODO: not only do we want to only do this on node 0, we only + # want to do it exactly ONCE per cluster even on pnn 0. This needs + # additional work to get that right. + return super().args() + [ + '--skip-if=env:NODE_NUMBER!=0', + 'ctdb-migrate', + '--dest-dir=/var/lib/ctdb/persistent', + '--archive=/var/lib/samba/.migrated', + ] + + +class CTDBMustHaveNodeInitContainer(SambaContainerCommon): + def name(self) -> str: + return 'ctdbMustHaveNode' + + def args(self) -> List[str]: + args = super().args() + unique_name = self.cfg.identity.daemon_name + args += [ + 'ctdb-must-have-node', + # hostname is a misnomer (todo: fix in sambacc) + f'--hostname={unique_name}', + '--take-node-number-from-env', + ] + return args + + +class CTDBDaemonContainer(SambaContainerCommon): + def name(self) -> str: + return 'ctdbd' + + def args(self) -> List[str]: + return super().args() + [ + 'run', + 'ctdbd', + '--setup=smb_ctdb', + '--setup=ctdb_config', + '--setup=ctdb_etc', + ] + + def container_args(self) -> List[str]: + cargs = super().container_args() + # make conditional? + # CAP_NET_ADMIN is needed for event script to add public ips to iface + cargs.append('--cap-add=NET_ADMIN') + return cargs + + +class CTDBNodeMonitorContainer(SambaContainerCommon): + def name(self) -> str: + return 'ctdbNodes' + + def args(self) -> List[str]: + args = super().args() + unique_name = self.cfg.identity.daemon_name + args += [ + '--debug', + 'ctdb-monitor-nodes', + # hostname is a misnomer (todo: fix in sambacc) + f'--hostname={unique_name}', + '--take-node-number-from-env', + '--reload=all', + ] + return args + + class ContainerLayout: init_containers: List[SambaContainerCommon] primary: SambaContainerCommon @@ -237,6 +374,7 @@ class SMB(ContainerDaemonForm): self._raw_configs: Dict[str, Any] = context_getters.fetch_configs(ctx) self._config_keyring = context_getters.get_config_and_keyring(ctx) self._cached_layout: Optional[ContainerLayout] = None + self._rank_info = context_getters.fetch_rank_info(ctx) self.smb_port = 445 logger.debug('Created SMB ContainerDaemonForm instance') @@ -275,6 +413,8 @@ class SMB(ContainerDaemonForm): files = data_utils.dict_get(configs, 'files', {}) ceph_config_entity = configs.get('config_auth_entity', '') vhostname = configs.get('virtual_hostname', '') + cluster_meta_uri = configs.get('cluster_meta_uri', '') + cluster_lock_uri = configs.get('cluster_lock_uri', '') if not instance_id: raise Error('invalid instance (cluster) id') @@ -287,8 +427,6 @@ class SMB(ContainerDaemonForm): raise Error( f'invalid instance features: {", ".join(invalid_features)}' ) - if Features.CLUSTERED.value in instance_features: - raise NotImplementedError('clustered instance') if not vhostname: # if a virtual hostname is not provided, generate one by prefixing # the cluster/instanced id to the system hostname @@ -296,6 +434,7 @@ class SMB(ContainerDaemonForm): vhostname = f'{instance_id}-{hname}' self._instance_cfg = Config( + identity=self._identity, instance_id=instance_id, source_config=source_config, join_sources=join_sources, @@ -306,7 +445,14 @@ class SMB(ContainerDaemonForm): smb_port=self.smb_port, ceph_config_entity=ceph_config_entity, vhostname=vhostname, + cluster_meta_uri=cluster_meta_uri, + cluster_lock_uri=cluster_lock_uri, ) + if self._rank_info: + ( + self._instance_cfg.rank, + self._instance_cfg.rank_generation, + ) = self._rank_info self._files = files logger.debug('SMB Instance Config: %s', self._instance_cfg) logger.debug('Configured files: %s', self._files) @@ -354,6 +500,16 @@ class SMB(ContainerDaemonForm): init_ctrs.append(MustJoinContainer(self._cfg)) ctrs.append(WinbindContainer(self._cfg)) + if self._cfg.clustered: + init_ctrs += [ + CTDBMigrateInitContainer(self._cfg), + CTDBMustHaveNodeInitContainer(self._cfg), + ] + ctrs += [ + CTDBDaemonContainer(self._cfg), + CTDBNodeMonitorContainer(self._cfg), + ] + smbd = SMBDContainer(self._cfg) self._cached_layout = ContainerLayout(init_ctrs, smbd, ctrs) return self._cached_layout @@ -419,7 +575,7 @@ class SMB(ContainerDaemonForm): ) def container(self, ctx: CephadmContext) -> CephContainer: - ctr = daemon_to_container(ctx, self, host_network=False) + ctr = daemon_to_container(ctx, self, host_network=self._cfg.clustered) # We want to share the IPC ns between the samba containers for one # instance. Cephadm's default, host ipc, is not what we want. # Unsetting it works fine for podman but docker (on ubuntu 22.04) needs @@ -476,6 +632,15 @@ class SMB(ContainerDaemonForm): mounts[run_samba] = '/run:z' # TODO: make this a shared tmpfs mounts[config] = '/etc/ceph/ceph.conf:z' mounts[keyring] = '/etc/ceph/keyring:z' + if self._cfg.clustered: + ctdb_persistent = str(data_dir / 'ctdb/persistent') + ctdb_run = str(data_dir / 'ctdb/run') # TODO: tmpfs too! + ctdb_volatile = str(data_dir / 'ctdb/volatile') + ctdb_etc = str(data_dir / 'ctdb/etc') + mounts[ctdb_persistent] = '/var/lib/ctdb/persistent:z' + mounts[ctdb_run] = '/var/run/ctdb:z' + mounts[ctdb_volatile] = '/var/lib/ctdb/volatile:z' + mounts[ctdb_etc] = '/etc/ctdb:z' def customize_container_endpoints( self, endpoints: List[EndPoint], deployment_type: DeploymentType @@ -486,8 +651,30 @@ class SMB(ContainerDaemonForm): def prepare_data_dir(self, data_dir: str, uid: int, gid: int) -> None: self.validate() ddir = pathlib.Path(data_dir) - file_utils.makedirs(ddir / 'etc-samba-container', uid, gid, 0o770) + etc_samba_ctr = ddir / 'etc-samba-container' + file_utils.makedirs(etc_samba_ctr, uid, gid, 0o770) file_utils.makedirs(ddir / 'lib-samba', uid, gid, 0o770) file_utils.makedirs(ddir / 'run', uid, gid, 0o770) if self._files: file_utils.populate_files(data_dir, self._files, uid, gid) + if self._cfg.clustered: + file_utils.makedirs(ddir / 'ctdb/persistent', uid, gid, 0o770) + file_utils.makedirs(ddir / 'ctdb/run', uid, gid, 0o770) + file_utils.makedirs(ddir / 'ctdb/volatile', uid, gid, 0o770) + file_utils.makedirs(ddir / 'ctdb/etc', uid, gid, 0o770) + self._write_ctdb_stub_config(etc_samba_ctr / 'ctdb.json') + + def _write_ctdb_stub_config(self, path: pathlib.Path) -> None: + reclock_cmd = ' '.join(_MUTEX_SUBCMD + [self._cfg.cluster_lock_uri]) + nodes_cmd = ' '.join(_NODES_SUBCMD) + stub_config = { + 'samba-container-config': 'v0', + 'ctdb': { + # recovery_lock is passed directly to ctdb: needs '!' prefix + 'recovery_lock': f'!{reclock_cmd}', + 'cluster_meta_uri': self._cfg.cluster_meta_uri, + 'nodes_cmd': nodes_cmd, + }, + } + with file_utils.write_new(path) as fh: + json.dump(stub_config, fh) -- 2.39.5