Ceph AIO installation with single/multiple node is not friendly for
loopback mount, especially always get deadlock issue during graceful
system reboot.
We already have `rbdmap.service` with graceful system reboot friendly as
below:
[Unit]
After=network-online.target
Before=remote-fs-pre.target
Wants=network-online.target remote-fs-pre.target
[Service]
ExecStart=/usr/bin/rbdmap map
ExecReload=/usr/bin/rbdmap map
ExecStop=/usr/bin/rbdmap unmap-all
This PR introduce:
- `ceph-mon.target`: Ensure startup after `network-online.target` and
before `remote-fs-pre.target`
- `ceph-*.target`: Ensure startup after `ceph-mon.target` and before
`remote-fs-pre.target`
- `rbdmap.service`: Once all `_netdev` get unmount by
`remote-fs.target`, ensure unmap all RBD BEFORE any Ceph components
under `ceph.target` get stopped during shutdown
The logic is concept proof by
<https://github.com/alvistack/ansible-role-ceph_common/tree/develop>;
also works as expected with Ceph + Kubernetes deployment by
<https://github.com/alvistack/ansible-collection-kubernetes/tree/develop>.
No more deadlock happened during graceful system reboot, both AIO
single/multiple no de with loopback mount.
Also see:
- <https://github.com/ceph/ceph/pull/36776>
- <https://github.com/etcd-io/etcd/pull/12259>
- <https://github.com/cri-o/cri-o/pull/4128>
- <https://github.com/kubernetes/release/pull/1504>
Fixes: https://tracker.ceph.com/issues/47528
Signed-off-by: Wong Hoi Sing Edison <hswong3i@gmail.com>
(cherry picked from commit
d88c834ea44bd67cfde0bd11ec4ded079b76d11a)
Conflicts:
systemd/ceph-immutable-object-cache.target
- only exists in branch octopus and master, not exists in branch nautilus
systemd/ceph-mgr@.service.in
systemd/ceph-mon@.service.in
- reorder lines due to original branch different, no logical changes
Description=ceph target allowing to start/stop all ceph-fuse@.service instances at once
PartOf=ceph.target
Before=ceph.target
+
[Install]
WantedBy=remote-fs.target ceph.target
[Unit]
Description=ceph target allowing to start/stop all ceph-mds@.service instances at once
PartOf=ceph.target
+After=ceph-mon.target
Before=ceph.target
+Wants=ceph.target ceph-mon.target
+
[Install]
WantedBy=multi-user.target ceph.target
[Unit]
Description=Ceph metadata server daemon
-After=network-online.target local-fs.target time-sync.target
-Wants=network-online.target local-fs.target time-sync.target
PartOf=ceph-mds.target
+After=network-online.target local-fs.target time-sync.target
+Before=remote-fs-pre.target ceph-mds.target
+Wants=network-online.target local-fs.target time-sync.target remote-fs-pre.target ceph-mds.target
[Service]
LimitNOFILE=1048576
[Unit]
Description=ceph target allowing to start/stop all ceph-mgr@.service instances at once
PartOf=ceph.target
+After=ceph-mon.target
Before=ceph.target
+Wants=ceph.target ceph-mon.target
+
[Install]
WantedBy=multi-user.target ceph.target
[Unit]
Description=Ceph cluster manager daemon
-After=network-online.target local-fs.target time-sync.target
-Wants=network-online.target local-fs.target time-sync.target
PartOf=ceph-mgr.target
+After=network-online.target local-fs.target time-sync.target
+Before=remote-fs-pre.target ceph-mgr.target
+Wants=network-online.target local-fs.target time-sync.target remote-fs-pre.target ceph-mgr.target
[Service]
LimitNOFILE=1048576
ExecStart=/usr/bin/ceph-mgr -f --cluster ${CLUSTER} --id %i --setuser ceph --setgroup ceph
ExecReload=/bin/kill -HUP $MAINPID
LockPersonality=true
-
# We need to disable this protection as some python libraries generate
# dynamic code, like python-cffi, and require mmap calls to succeed
MemoryDenyWriteExecute=false
-
NoNewPrivileges=true
PrivateDevices=yes
ProtectControlGroups=true
Description=ceph target allowing to start/stop all ceph-mon@.service instances at once
PartOf=ceph.target
Before=ceph.target
+Wants=ceph.target
+
[Install]
WantedBy=multi-user.target ceph.target
[Unit]
Description=Ceph cluster monitor daemon
-
+PartOf=ceph-mon.target
# According to:
# http://www.freedesktop.org/wiki/Software/systemd/NetworkTarget
# these can be removed once ceph-mon will dynamically change network
# configuration.
After=network-online.target local-fs.target time-sync.target
-Wants=network-online.target local-fs.target time-sync.target
-
-PartOf=ceph-mon.target
+Before=remote-fs-pre.target ceph-mon.target
+Wants=network-online.target local-fs.target time-sync.target remote-fs-pre.target ceph-mon.target
[Service]
LimitNOFILE=1048576
[Unit]
Description=ceph target allowing to start/stop all ceph-osd@.service instances at once
PartOf=ceph.target
+After=ceph-mon.target
Before=ceph.target
+Wants=ceph.target ceph-mon.target
+
[Install]
WantedBy=multi-user.target ceph.target
[Unit]
Description=Ceph object storage daemon osd.%i
-After=network-online.target local-fs.target time-sync.target ceph-mon.target
-Wants=network-online.target local-fs.target time-sync.target
PartOf=ceph-osd.target
+After=network-online.target local-fs.target time-sync.target
+Before=remote-fs-pre.target ceph-osd.target
+Wants=network-online.target local-fs.target time-sync.target remote-fs-pre.target ceph-osd.target
[Service]
LimitNOFILE=1048576
[Unit]
Description=ceph target allowing to start/stop all ceph-radosgw@.service instances at once
PartOf=ceph.target
+After=ceph-mon.target
Before=ceph.target
+Wants=ceph.target ceph-mon.target
+
[Install]
WantedBy=multi-user.target ceph.target
[Unit]
Description=Ceph rados gateway
-After=network-online.target local-fs.target time-sync.target
-Wants=network-online.target local-fs.target time-sync.target
PartOf=ceph-radosgw.target
+After=network-online.target local-fs.target time-sync.target
+Before=remote-fs-pre.target ceph-radosgw.target
+Wants=network-online.target local-fs.target time-sync.target remote-fs-pre.target ceph-radosgw.target
[Service]
LimitNOFILE=1048576
Description=ceph target allowing to start/stop all ceph-rbd-mirror@.service instances at once
PartOf=ceph.target
Before=ceph.target
+
[Install]
WantedBy=multi-user.target ceph.target
[Unit]
Description=ceph target allowing to start/stop all ceph*@.service instances at once
+
[Install]
WantedBy=multi-user.target
[Unit]
Description=Map RBD devices
-
-After=network-online.target
+After=network-online.target ceph.target
Before=remote-fs-pre.target
-Wants=network-online.target remote-fs-pre.target
+Wants=network-online.target remote-fs-pre.target ceph.target
[Service]
EnvironmentFile=-@SYSTEMD_ENV_FILE@