---
-### This standalone playbook can be used to prep a COBBLER-IMAGED testnode
+### This role is used to prep a {FOG|MAAS}-IMAGED testnode
### so that it can be used to capture an OS image for FOG.
### This playbook is needed for a couple reasons
### - NIC configs get hard coded into the captured FOG images so nodes reimaged by FOG don't come up with network
+### - SSH host keys need to be deleted
+### - apt and cloud-init services need to be disabled
- hosts:
- testnodes
- become: true
+ roles:
+ - prep-fog-capture
gather_facts: false
- tasks:
-
- # (Missing in RHEL8)
- - name: Check for /usr/bin/python
- shell: echo marco
- register: polo
- ignore_errors: true
-
- - name: Set ansible_python_interpreter=/usr/bin/python3
- set_fact:
- ansible_python_interpreter: /usr/bin/python3
- when: polo is failed
-
- # Now that we know where python is, we can gather_facts
- - setup:
-
- # We need to leave /.cephlab_rc_local or else each FOG reimage would tell Cobbler to run ceph-cm-ansible
- - name: Remove lock files and udev rules
- file:
- path: "{{ item }}"
- state: absent
- with_items:
- - /etc/udev/rules.d/70-persistent-net.rules
- - /.cephlab_net_configured
- - /ceph-qa-ready
-
- - name: Get list of ifcfg scripts from host used to capture image
- shell: "ls -1 /etc/sysconfig/network-scripts/ifcfg-* | grep -v ifcfg-lo"
- register: ifcfg_scripts
- when: ansible_os_family == "RedHat"
- ignore_errors: true
-
- - name: Get list of ifcfg scripts from host used to capture image
- shell: "ls -1 /etc/sysconfig/network/ifcfg-* | grep -v ifcfg-lo"
- register: ifcfg_scripts
- when: ansible_os_family == "Suse"
- ignore_errors: true
-
- - name: Delete ifcfg scripts
- file:
- path: "{{ item }}"
- state: absent
- with_items: "{{ ifcfg_scripts.stdout_lines|default([]) }}"
- when: ifcfg_scripts is defined
-
- - name: Remove /var/lib/ceph mountpoint from fstab
- shell: sed -i '/\/var\/lib\/ceph/d' /etc/fstab
-
- - name: Unmount /var/lib/ceph
- ansible.posix.mount:
- path: /var/lib/ceph
- state: unmounted
-
- - name: Install one-shot service to regenerate SSH host keys on first boot
- copy:
- dest: /etc/systemd/system/regen-ssh-hostkeys.service
- owner: root
- group: root
- mode: '0644'
- content: |
- [Unit]
- Description=Regenerate SSH host keys on first boot
- ConditionPathExists=!/etc/ssh/ssh_host_ed25519_key
- Before=ssh.service
-
- [Service]
- Type=oneshot
- ExecStart=/usr/bin/ssh-keygen -A
- ExecStartPost=/bin/systemctl disable regen-ssh-hostkeys.service
-
- [Install]
- WantedBy=multi-user.target
-
- - name: Reload systemd daemon
- systemd:
- daemon_reload: true
-
- - name: Enable regen-ssh-hostkeys.service
- systemd:
- name: regen-ssh-hostkeys.service
- enabled: true
-
- - name: Get list of SSH host keys
- shell: "ls -1 /etc/ssh/ssh_host_*"
- register: ssh_host_keys
- ignore_errors: true
-
- # Key regeneration is done automatically on CentOS firstboot.
- # For Ubuntu, we'll add `dpkg-reconfigure openssh-server` to rc.local
- - name: Delete SSH host keys so they're generated during firstboot on cloned machines
- file:
- path: "{{ item }}"
- state: absent
- with_items: "{{ ssh_host_keys.stdout_lines|default([]) }}"
- when: ssh_host_keys is defined
-
- - name: Unsubscribe RHEL
- command: subscription-manager unregister
- when: ansible_distribution == "RedHat"
- failed_when: false
-
- # A file gets leftover when a testnode is registered with Satellite that caused
- # each registered subsequent testnode to report the wrong hostname
- - name: Clean up katello facts
- file:
- path: /etc/rhsm/facts/katello.facts
- state: absent
- when: ansible_distribution == "RedHat"
-
- # https://bugzilla.redhat.com/show_bug.cgi?id=1814337
- - name: Disable dnf-makecache service
- service:
- name: dnf-makecache.timer
- state: stopped
- enabled: no
- when:
- - ansible_os_family == "RedHat"
- - ansible_distribution_major_version|int >= 8
-
- # Hopefully fixes https://github.com/ceph/ceph-cm-ansible/pull/544#issuecomment-599076564
- - name: Clean DNF cache
- shell: "dnf clean all && rm -rf /var/cache/dnf/*"
- when:
- - ansible_os_family == "RedHat"
- - ansible_distribution_major_version|int >= 8
-
- - set_fact:
- ntp_service: ntp
- when: ansible_os_family == "Debian"
-
- - set_fact:
- ntp_service: ntpd
- when: ansible_os_family == "RedHat" and ansible_distribution_major_version|int <= 7
-
- - set_fact:
- ntp_service: chronyd
- when: (ansible_os_family == "RedHat" and ansible_distribution_major_version|int >= 8) or
- ansible_os_family == "Suse"
-
- - name: "Stop {{ ntp_service }} service"
- service:
- name: "{{ ntp_service }}"
- state: stopped
- when: '"ntp" in ntp_service'
-
- # The theory here is although we do have the ntp service running on boot,
- # if the time is off, it slowly drifts back in sync. Since our testnodes
- # are ephemeral, they don't ever have enough time to correctly drift
- # back to the correct time. So we'll force it in the captured OS images.
- - name: Install ntpdate command if missing
- package:
- name: ntpdate
- state: present
- when: '"ntp" in ntp_service'
-
- - name: Force time synchronization using stepping | ntp
- command: "ntpdate -b {{ ntp_servers|join(' ') }}"
- when: '"ntp" in ntp_service'
-
- - name: "Start {{ ntp_service }}"
- service:
- name: "{{ ntp_service }}"
- state: started
-
- # chronyd needs to be started in order to force time sync. This differs from ntpd.
- - name: Force time synchronization using stepping | chrony
- command: chronyc -a makestep
- when: '"chrony" in ntp_service'
-
- - name: Sync the hardware clock
- command: "hwclock --systohc"
+ become: true
--- /dev/null
+[Unit]
+Description=Ceph Lab hostname configuration
+After=network-online.target nss-lookup.target
+Wants=nss-lookup.target
+
+[Service]
+StandardOutput=journal+console
+StandardError=journal+console
+Type=oneshot
+ExecStart=/usr/local/sbin/cephlab-set-hostname.sh
+RemainAfterExit=yes
+
+[Install]
+WantedBy=multi-user.target
--- /dev/null
+#!/usr/bin/env bash
+# Wait for /.cephlab_net_configured, then set hostname + /etc/hostname + /etc/hosts
+# Flow:
+# 1) Wait for DHCP/global IPv4
+# 2) Ping CHECK_HOST for up to 10 minutes (from any local IP)
+# 3) Once ping works, try reverse DNS for up to 10 minutes (for an IP that can ping)
+# 4) Set hostname and rewrite /etc/hostname + /etc/hosts
+set -euo pipefail
+
+# --- Config ---
+CHECK_HOST="10.20.192.14" # soko04 (must be reachable before we trust DNS)
+DEFAULT_NAMESERVER="10.20.192.11" # override via env NAMESERVER or arg1
+
+WAIT_FOR_FILE="/.cephlab_net_configured"
+HOSTNAME_IS_SET_FILE="/.cephlab_hostname_set"
+LOG="/var/log/cephlab-set-hostname.log"
+
+NAMESERVER="${NAMESERVER:-${1:-${DEFAULT_NAMESERVER}}}"
+
+MAX_WAIT_SECONDS="300" # wait for /.cephlab_net_configured
+PING_WINDOW_SECONDS="600" # 10 minutes
+DNS_WINDOW_SECONDS="600" # 10 minutes
+LOOP_SLEEP_SECONDS="2"
+
+# --- Logging ---
+touch "$LOG"
+chmod 0644 "$LOG"
+exec > >(tee -a "$LOG") 2>&1
+
+log() {
+ echo "$(date -u +%FT%T.%N | cut -c1-23) cephlab-set-hostname: $*" >&2
+}
+
+# --- Helpers ---
+get_my_ips() {
+ ip -4 -o addr show scope global 2>/dev/null \
+ | awk '$2 != "docker0" {print $4}' \
+ | cut -d/ -f1 \
+ || true
+}
+
+# Reverse lookup helper (never non-zero; safe with set -euo pipefail)
+reverse_lookup() {
+ local ip="$1"
+ local ns="$2"
+ local name=""
+
+ if command -v dig >/dev/null 2>&1; then
+ name="$(dig +time=1 +tries=1 +short -x "${ip}" @"${ns}" 2>/dev/null | head -n1 | sed 's/\.$//' || true)"
+ elif command -v host >/dev/null 2>&1; then
+ name="$(host -W 1 "${ip}" "${ns}" 2>/dev/null | awk '/domain name pointer/ {print $5}' | sed 's/\.$//' | head -n1 || true)"
+ elif command -v getent >/dev/null 2>&1; then
+ name="$(getent hosts "${ip}" 2>/dev/null | awk '{print $2}' | head -n1 || true)"
+ fi
+
+ echo "${name}"
+}
+
+set_hostname() {
+ local fqdn="$1"
+ if command -v hostnamectl >/dev/null 2>&1; then
+ hostnamectl set-hostname "${fqdn}"
+ else
+ hostname "${fqdn}"
+ fi
+}
+
+can_ping_from_ip() {
+ local src_ip="$1"
+ # More tolerant per-attempt check but bounded:
+ # 3 packets, 1s apart, wait up to 2s each; hard cap 10s.
+ timeout 10s ping -I "${src_ip}" -nq -c3 -i 1 -W 2 "${CHECK_HOST}" >/dev/null 2>&1
+}
+
+# --- Main ---
+if [[ -f "${HOSTNAME_IS_SET_FILE}" ]]; then
+ log "We've already set the hostname before. Exiting..."
+ exit 0
+fi
+
+log "Waiting for ${WAIT_FOR_FILE} (up to ${MAX_WAIT_SECONDS}s)..."
+end=$((SECONDS + MAX_WAIT_SECONDS))
+while [[ ! -f "${WAIT_FOR_FILE}" ]]; do
+ if (( SECONDS >= end )); then
+ log "Timed out waiting for ${WAIT_FOR_FILE}. Exiting."
+ exit 1
+ fi
+ sleep 1
+done
+log "Flag file present. Proceeding."
+
+# Wait for at least one global IPv4
+myips="$(get_my_ips)"
+if [[ -z "${myips}" ]]; then
+ log "No non-loopback IPv4 addresses found yet. Will continue, but ping/DNS will likely fail until DHCP is up."
+fi
+
+# 1) Ping CHECK_HOST for up to 10 minutes (find a working source IP)
+log "Checking connectivity to ${CHECK_HOST} for up to ${PING_WINDOW_SECONDS}s..."
+ping_deadline=$((SECONDS + PING_WINDOW_SECONDS))
+good_ip=""
+
+while (( SECONDS < ping_deadline )); do
+ myips="$(get_my_ips)"
+ if [[ -z "${myips}" ]]; then
+ log "No global IPv4 yet; waiting..."
+ sleep "${LOOP_SLEEP_SECONDS}"
+ continue
+ fi
+
+ for ip in ${myips}; do
+ log "Pinging ${CHECK_HOST} from ${ip}..."
+ if can_ping_from_ip "${ip}"; then
+ good_ip="${ip}"
+ log "Connectivity confirmed: ${ip} -> ${CHECK_HOST}"
+ break
+ fi
+ log "Ping failed from ${ip}"
+ done
+
+ [[ -n "${good_ip}" ]] && break
+ sleep "${LOOP_SLEEP_SECONDS}"
+done
+
+if [[ -z "${good_ip}" ]]; then
+ log "Timed out (${PING_WINDOW_SECONDS}s) waiting for connectivity to ${CHECK_HOST}. Nothing changed."
+ exit 1
+fi
+
+# 2) Now that we can reach CHECK_HOST, try reverse DNS for up to 10 minutes
+log "Connectivity is good. Attempting reverse DNS via ${NAMESERVER} for up to ${DNS_WINDOW_SECONDS}s..."
+dns_deadline=$((SECONDS + DNS_WINDOW_SECONDS))
+newhostname=""
+
+while (( SECONDS < dns_deadline )); do
+ # Prefer the IP that proved connectivity; if it disappeared, re-find a good one.
+ myips="$(get_my_ips)"
+ if [[ -z "${myips}" ]]; then
+ log "Lost all global-scope IPv4 addresses; waiting..."
+ sleep "${LOOP_SLEEP_SECONDS}"
+ continue
+ fi
+
+ if ! echo "${myips}" | tr ' ' '\n' | grep -qx "${good_ip}"; then
+ log "Previously-good IP ${good_ip} is gone; re-checking connectivity..."
+ good_ip=""
+ for ip in ${myips}; do
+ log "Pinging ${CHECK_HOST} from ${ip}..."
+ if can_ping_from_ip "${ip}"; then
+ good_ip="${ip}"
+ log "Connectivity confirmed: ${ip} -> ${CHECK_HOST}"
+ break
+ fi
+ done
+ [[ -z "${good_ip}" ]] && { sleep "${LOOP_SLEEP_SECONDS}"; continue; }
+ fi
+
+ log "Reverse lookup for ${good_ip} via ${NAMESERVER}..."
+ newhostname="$(reverse_lookup "${good_ip}" "${NAMESERVER}")"
+
+ if [[ -n "${newhostname}" ]]; then
+ log "Resolved ${good_ip} -> ${newhostname}"
+ break
+ fi
+
+ log "Reverse lookup failed/empty for ${good_ip}"
+ sleep "${LOOP_SLEEP_SECONDS}"
+done
+
+if [[ -z "${newhostname}" ]]; then
+ log "Timed out (${DNS_WINDOW_SECONDS}s) waiting for reverse DNS via ${NAMESERVER}. Nothing changed."
+ exit 1
+fi
+
+# Apply hostname + persist
+set_hostname "${newhostname}"
+shorthostname="${newhostname%%.*}"
+echo "${newhostname}" > /etc/hostname
+
+log "Rewriting /etc/hosts from scratch"
+cat > /etc/hosts <<EOF
+127.0.0.1 localhost
+${good_ip} ${newhostname} ${shorthostname}
+
+# IPv6
+::1 localhost ip6-localhost ip6-loopback
+ff02::1 ip6-allnodes
+ff02::2 ip6-allrouters
+EOF
+
+log "Hostname updated: $(hostname); /etc/hostname and /etc/hosts rewritten."
+touch "${HOSTNAME_IS_SET_FILE}"
+exit 0
--- /dev/null
+[Unit]
+Description=Write netplan from link carrier once
+After=systemd-networkd.service local-fs.target
+Wants=systemd-networkd.service
+
+[Service]
+StandardOutput=journal+console
+StandardError=journal+console
+Type=oneshot
+ExecStart=/usr/local/sbin/netplan-from-link.sh
+RemainAfterExit=yes
+
+[Install]
+WantedBy=multi-user.target
--- /dev/null
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+OUT="/etc/netplan/01-fog.yaml"
+STAMP="/.cephlab_net_configured"
+LOG="/var/log/netplan-from-link.log"
+
+touch "$LOG"
+chmod 0644 "$LOG"
+exec > >(tee -a "$LOG") 2>&1
+
+log() {
+ echo "$(date -u +%FT%T.%N | cut -c1-23) netplan-from-link: $*" >&2
+}
+
+log "starting"
+log "kernel=$(uname -r)"
+log "cmdline=$(cat /proc/cmdline || true)"
+
+rm -f /etc/netplan/*.yaml || true
+
+pick_iface() {
+ for d in /sys/class/net/*; do
+ iface="$(basename "$d")"
+ c="$d/carrier"
+
+ case "$iface" in
+ lo|docker*|veth*|virbr*|br*|cni*|flannel*|weave*|zt*|wg*|tun*|tap*|sit*|ip6tnl*|gre*|gretap*|erspan*|bond* )
+ continue
+ ;;
+ esac
+
+ ip link set dev "$iface" up 2>/dev/null || true
+ v="$(cat "$c" 2>/dev/null || true)"
+ log "probe iface=$iface carrier='${v}' path=$c"
+ if [[ -r "$c" ]] && [[ "$v" == "1" ]]; then
+ log "selected iface=$iface via carrier"
+ echo "$iface"
+ return 0
+ fi
+ done
+
+ dflt="$(ip -4 route show default 2>/dev/null | awk '{for(i=1;i<=NF;i++) if ($i=="dev") {print $(i+1); exit}}' || true)"
+ if [[ -n "${dflt:-}" ]]; then
+ log "selected iface=$dflt via default-route"
+ echo "$dflt"
+ return 0
+ fi
+
+ return 1
+}
+
+iface=""
+for i in $(seq 1 30); do
+ iface="$(pick_iface || true)"
+ if [[ -n "${iface:-}" ]]; then
+ break
+ fi
+ log "no iface yet (attempt $i/30); sleeping 1s"
+ sleep 1
+done
+
+if [[ -z "${iface:-}" ]]; then
+ log "netplan-from-link could not find an uplink interface"
+ log "ip -o link:"
+ ip -o link show || true
+ log "ip -4 addr:"
+ ip -4 addr show || true
+ log "ip -4 route:"
+ ip -4 route show || true
+ exit 0
+fi
+
+log "writing netplan to $OUT for iface=$iface"
+cat >"$OUT" <<EOF
+network:
+ version: 2
+ renderer: networkd
+ ethernets:
+ ${iface}:
+ dhcp4: true
+ dhcp6: false
+ optional: false
+ dhcp4-overrides:
+ use-dns: true
+ use-hostname: true
+ nameservers:
+ addresses: [10.20.192.11]
+EOF
+
+chmod 0600 "$OUT"
+
+if command -v netplan >/dev/null 2>&1; then
+ log "netplan generate"
+ netplan generate || true
+ log "netplan apply"
+ netplan apply || true
+else
+ log "netplan not found; skipping generate/apply"
+fi
+
+log "final ip -4 addr for iface=$iface"
+ip -4 addr show dev "$iface" || true
+
+touch "$STAMP"
+log "done; touched $STAMP"
--- /dev/null
+[Unit]
+Description=Write NetworkManager connection from link carrier once
+After=systemd-udev-settle.service local-fs.target
+Wants=systemd-udev-settle.service
+
+[Service]
+StandardOutput=journal+console
+StandardError=journal+console
+Type=oneshot
+ExecStart=/usr/local/sbin/nm-from-link.sh
+RemainAfterExit=yes
+
+[Install]
+WantedBy=multi-user.target
--- /dev/null
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+STAMP="/.cephlab_net_configured"
+LOG="/var/log/nm-from-link.log"
+
+touch "$LOG"
+chmod 0644 "$LOG"
+exec > >(tee -a "$LOG") 2>&1
+
+log() {
+ echo "$(date -u +%FT%T.%N | cut -c1-23) nm-from-link: $*" >&2
+}
+
+log "starting"
+
+pick_iface() {
+ for c in /sys/class/net/*/carrier; do
+ iface="$(basename "$(dirname "$c")")"
+
+ case "$iface" in
+ lo|docker*|veth*|virbr*|br*|cni*|flannel*|weave*|zt*|wg*|tun*|tap*|sit*|ip6tnl*|gre*|gretap*|erspan*|bond* )
+ continue
+ ;;
+ esac
+
+ if [[ -r "$c" ]] && [[ "$(cat "$c")" == "1" ]]; then
+ echo "$iface"
+ return 0
+ fi
+ done
+
+ ip -4 route show default 2>/dev/null | awk '{for(i=1;i<=NF;i++) if ($i=="dev") {print $(i+1); exit}}' || true
+}
+
+iface=""
+for _ in $(seq 1 30); do
+ iface="$(pick_iface || true)"
+ if [[ -n "${iface:-}" ]]; then
+ break
+ fi
+ sleep 1
+done
+
+if [[ -z "${iface:-}" ]]; then
+ log "nm-from-link could not find an uplink interface" >&2
+ exit 0
+fi
+
+systemctl enable --now NetworkManager || true
+
+IFACE="$iface"
+CONN="fog-dhcp-${IFACE}"
+
+# Remove existing connections pinned to this interface (prevents stale MAC/IP settings)
+nmcli -t -f NAME,DEVICE con show | awk -F: -v d="$IFACE" '$2==d {print $1}' | while read -r n; do
+ [[ -n "$n" ]] && nmcli con delete "$n" || true
+done
+
+# Remove same-named conn if present
+nmcli -t -f NAME con show | grep -qx "$CONN" && nmcli con delete "$CONN" || true
+
+nmcli con add type ethernet ifname "$IFACE" con-name "$CONN" ipv4.method auto ipv6.method ignore
+nmcli con mod "$CONN" connection.autoconnect yes
+nmcli con mod "$CONN" ipv4.ignore-auto-dns yes
+nmcli con mod "$CONN" ipv4.dns "10.20.192.11"
+nmcli con up "$CONN" || true
+
+touch "$STAMP"
--- /dev/null
+[Unit]
+Description=Regenerate SSH host keys on first boot
+ConditionPathExists=!/etc/ssh/ssh_host_ed25519_key
+Before=ssh.service
+
+[Service]
+Type=oneshot
+ExecStart=/usr/bin/ssh-keygen -A
+ExecStartPost=/bin/systemctl disable regen-ssh-hostkeys.service
+
+[Install]
+WantedBy=multi-user.target
--- /dev/null
+---
+- name: Update apt cache
+ apt:
+ update_cache: yes
+ cache_valid_time: 3600
+ when: ansible_facts.os_family == "Debian"
+
+- name: Full upgrade (apt dist-upgrade)
+ apt:
+ upgrade: dist
+ when: ansible_facts.os_family == "Debian"
+
+- name: Check if reboot is required (Debian/Ubuntu)
+ stat:
+ path: /var/run/reboot-required
+ register: deb_reboot_required
+ when: ansible_facts.os_family == "Debian"
+
+- name: Install one-shot service to regenerate SSH host keys on first boot
+ copy:
+ src: files/regen-ssh-hostkeys.service
+ dest: /etc/systemd/system/regen-ssh-hostkeys.service
+ owner: root
+ group: root
+ mode: '0644'
+
+- name: Reload systemd daemon
+ systemd:
+ daemon_reload: true
+
+- name: Enable regen-ssh-hostkeys.service
+ systemd:
+ name: regen-ssh-hostkeys.service
+ enabled: true
+
+- set_fact:
+ ntp_service: ntp
+
+- name: Remove cloud init netplan file
+ file:
+ path: /etc/netplan/50-cloud-init.yaml
+ state: absent
+ failed_when: false
+
+- name: Install netplan link selection script
+ copy:
+ src: files/netplan-from-link.sh
+ dest: /usr/local/sbin/netplan-from-link.sh
+ owner: root
+ group: root
+ mode: "0755"
+
+- name: Install netplan-from-link systemd unit
+ copy:
+ src: files/netplan-from-link.service
+ dest: /etc/systemd/system/netplan-from-link.service
+ owner: root
+ group: root
+ mode: "0644"
+
+- name: Enable netplan link selection systemd unit
+ systemd:
+ name: netplan-from-link.service
+ enabled: true
+ state: started
+ daemon_reload: true
+
+- name: Disable NetworkManager
+ systemd:
+ name: NetworkManager
+ enabled: false
+ state: stopped
+ failed_when: false
+
+- name: Enable networkd
+ systemd:
+ name: systemd-networkd
+ enabled: true
+ state: started
+ daemon_reload: true
+
+- name: Avoid wait online hang
+ systemd:
+ name: systemd-networkd-wait-online
+ enabled: false
+ state: stopped
+ failed_when: false
+
+- name: Fog prep netplan generate
+ command: netplan generate
+ changed_when: false
+ failed_when: false
+
+- name: Fog prep netplan apply
+ command: netplan apply
+ changed_when: true
+ failed_when: false
--- /dev/null
+---
+# Tasks common to all distros
+# We import tasks based on ansible_os_family about halfway through
+
+- setup:
+
+- name: Remove lock files, udev rules, logs
+ file:
+ path: "{{ item }}"
+ state: absent
+ with_items:
+ - /etc/udev/rules.d/70-persistent-net.rules
+ - /.cephlab_net_configured
+ - /.cephlab_hostname_set
+ - /ceph-qa-ready
+ - /var/log/netplan-from-link.log
+ - /var/log/nm-from-link.log
+ - /var/log/cephlab-set-hostname.log
+ - /var/log/cloud-init-output.log
+ - /var/log/cloud-init.log
+
+- name: Remove /var/lib/ceph mountpoint from fstab
+ shell: sed -i '/\/var\/lib\/ceph/d' /etc/fstab
+
+- name: Unmount /var/lib/ceph
+ mount:
+ path: /var/lib/ceph
+ state: unmounted
+
+- name: Import tasks for RPM-based distros
+ import_tasks: rpm.yml
+ when: ansible_os_family == "RedHat" or ansible_os_family == "Suse"
+
+- name: Import tasks for APT-based distros
+ import_tasks: apt.yml
+ when: ansible_os_family == "Debian"
+
+# If we updated the kernel in apt/rpm.yml
+- name: Reboot if required
+ reboot:
+ msg: "Rebooting trial node after kernel/package updates"
+ reboot_timeout: 1800
+ connect_timeout: 10
+ test_command: whoami
+ when: >
+ (ansible_facts.os_family == "Debian" and deb_reboot_required.stat.exists) or
+ (ansible_facts.os_family == "RedHat" and rhel_needs_reboot.rc != 0) or
+ (ansible_facts.os_family == "Suse" and suse_reboot_required.stat.exists)
+
+- name: Get list of SSH host keys
+ shell: "ls -1 /etc/ssh/ssh_host_*"
+ register: ssh_host_keys
+ ignore_errors: true
+
+- name: Delete SSH host keys so they're generated during firstboot on cloned machines
+ file:
+ path: "{{ item }}"
+ state: absent
+ with_items: "{{ ssh_host_keys.stdout_lines|default([]) }}"
+ when: ssh_host_keys is defined
+
+# el <= 7 = ntpd
+# el >= 8 = chronyd
+# Ubuntu = ntp
+- name: "Stop {{ ntp_service }} service"
+ service:
+ name: "{{ ntp_service }}"
+ state: stopped
+ when: '"ntp" in ntp_service'
+
+# The theory here is although we do have the ntp service running on boot,
+# if the time is off, it slowly drifts back in sync. Since our testnodes
+# are ephemeral, they don't ever have enough time to correctly drift
+# back to the correct time. So we'll force it in the captured OS images.
+- name: Install ntpdate command if missing
+ package:
+ name: ntpdate
+ state: present
+ when: '"ntp" in ntp_service'
+
+- name: Force time synchronization using stepping | ntp
+ command: "ntpdate -b {{ ntp_servers|join(' ') }}"
+ when: '"ntp" in ntp_service'
+
+- name: "Start {{ ntp_service }}"
+ service:
+ name: "{{ ntp_service }}"
+ state: started
+
+# chronyd needs to be started in order to force time sync. This differs from ntpd.
+- name: Force time synchronization using stepping | chrony
+ command: chronyc -a makestep
+ when: '"chrony" in ntp_service'
+
+- name: Sync the hardware clock
+ command: "hwclock --systohc"
+
+- name: Disable cloud init and disruptive apt services
+ systemd:
+ name: "{{ item }}"
+ enabled: false
+ state: stopped
+ masked: true
+ loop:
+ - cloud-init-local.service
+ - cloud-init.service
+ - cloud-config.service
+ - cloud-final.service
+ - unattended-upgrades.service
+ - apt-daily.service
+ - apt-daily-upgrade.service
+ - apt-daily.timer
+ - apt-daily-upgrade.timer
+ failed_when: false
+
+- name: Disable cloud init networking config
+ copy:
+ dest: /etc/cloud/cloud.cfg.d/99-disable-network-config.cfg
+ owner: root
+ group: root
+ mode: "0644"
+ content: |
+ network:
+ config: disabled
+ failed_when: false
+
+- name: Disable cloud init completely
+ file:
+ path: /etc/cloud/cloud-init.disabled
+ state: touch
+ owner: root
+ group: root
+ mode: "0644"
+
+- name: Remove cloud init state
+ file:
+ path: /var/lib/cloud
+ state: absent
+ failed_when: false
+
+- name: Install cephlab-set-hostname script
+ copy:
+ src: files/cephlab-set-hostname.sh
+ dest: /usr/local/sbin/
+ owner: root
+ group: root
+ mode: "0755"
+
+- name: Install cephlab-set-hostname systemd unit
+ copy:
+ src: files/cephlab-set-hostname.service
+ dest: /etc/systemd/system/cephlab-set-hostname.service
+ owner: root
+ group: root
+ mode: "0644"
+
+- name: Enable cephlab-set-hostname service
+ systemd:
+ name: cephlab-set-hostname.service
+ enabled: true
+ daemon_reload: true
--- /dev/null
+---
+- name: Upgrade all packages to latest (dnf)
+ dnf:
+ name: "*"
+ state: latest
+ update_cache: true
+ when: ansible_facts.os_family == "RedHat"
+
+- name: Ensure dnf-utils present (for needs-restarting)
+ package:
+ name: dnf-utils
+ state: present
+ when: ansible_facts.os_family == "RedHat"
+
+- name: Check if reboot is required (RHEL family)
+ command: needs-restarting -r
+ register: rhel_needs_reboot
+ changed_when: false
+ failed_when: false
+ when: ansible_facts.os_family == "RedHat"
+
+- name: Find existing ifcfg scripts
+ shell: |
+ ls -1 {{ ifcfg_dir }}/ifcfg-* 2>/dev/null | grep -v ifcfg-lo || true
+ vars:
+ ifcfg_dir: >-
+ {{ '/etc/sysconfig/network-scripts'
+ if ansible_os_family == 'RedHat'
+ else '/etc/sysconfig/network'
+ if ansible_os_family == 'Suse'
+ else '' }}
+ register: ifcfg_scripts
+
+- name: Delete ifcfg scripts
+ file:
+ path: "{{ item }}"
+ state: absent
+ loop: "{{ ifcfg_scripts.stdout_lines | default([]) }}"
+ when: ifcfg_scripts is defined
+
+- name: Unsubscribe RHEL
+ command: subscription-manager unregister
+ when: ansible_distribution == "RedHat"
+ failed_when: false
+
+# A file gets leftover when a testnode is registered with Satellite that caused
+# each registered subsequent testnode to report the wrong hostname
+- name: Clean up katello facts
+ file:
+ path: /etc/rhsm/facts/katello.facts
+ state: absent
+ when: ansible_distribution == "RedHat"
+
+# https://bugzilla.redhat.com/show_bug.cgi?id=1814337
+- name: Disable dnf-makecache service
+ service:
+ name: dnf-makecache.timer
+ state: stopped
+ enabled: no
+ when:
+ - ansible_os_family == "RedHat"
+ - ansible_distribution_major_version|int >= 8
+
+# Hopefully fixes https://github.com/ceph/ceph-cm-ansible/pull/544#issuecomment-599076564
+- name: Clean DNF cache
+ shell: "dnf clean all && rm -rf /var/cache/dnf/*"
+ when:
+ - ansible_os_family == "RedHat"
+ - ansible_distribution_major_version|int >= 8
+
+- name: Ensure sshd-keygen is enabled so host keys get regenerated on boot
+ systemd:
+ name: sshd-keygen.target
+ enabled: true
+
+- set_fact:
+ ntp_service: ntpd
+ when: ansible_os_family == "RedHat" and ansible_distribution_major_version|int <= 7
+
+- set_fact:
+ ntp_service: chronyd
+ when: (ansible_os_family == "RedHat" and ansible_distribution_major_version|int >= 8) or
+ ansible_os_family == "Suse"
+
+- name: Install systemd unit for network manager link selection
+ copy:
+ src: files/nm-from-link.service
+ dest: /etc/systemd/system/nm-from-link.service
+ owner: root
+ group: root
+ mode: "0644"
+
+- name: Enable network manager link selection unit
+ systemd:
+ name: nm-from-link.service
+ enabled: true
+ state: started
+ daemon_reload: true