LTDEPENDENCIES += $(LIBHANDLE) $(LIBFROG)
LLDFLAGS = -static
-default: depend $(LTCOMMAND)
+XFS_HEALER_SVCNAME=xfs_healer@.service
+CFLAGS += -DXFS_HEALER_SVCNAME=\"$(XFS_HEALER_SVCNAME)\"
+
+ifeq ($(HAVE_SYSTEMD),yes)
+INSTALL_HEALER += install-systemd
+SYSTEMD_SERVICES=\
+ system-xfs_healer.slice \
+ $(XFS_HEALER_SVCNAME)
+OPTIONAL_TARGETS += $(SYSTEMD_SERVICES)
+endif
+
+default: depend $(LTCOMMAND) $(SYSTEMD_SERVICES)
+
+%.service: %.service.in $(builddefs)
+ @echo " [SED] $@"
+ $(Q)$(SED) -e "s|@pkg_libexec_dir@|$(PKG_LIBEXEC_DIR)|g" \
+ < $< > $@
include $(BUILDRULES)
$(INSTALL) -m 755 -d $(PKG_LIBEXEC_DIR)
$(INSTALL) -m 755 $(LTCOMMAND) $(PKG_LIBEXEC_DIR)
+install-systemd: default
+ $(INSTALL) -m 755 -d $(SYSTEMD_SYSTEM_UNIT_DIR)
+ $(INSTALL) -m 644 $(SYSTEMD_SERVICES) $(SYSTEMD_SYSTEM_UNIT_DIR)
+
install-dev:
-include .dep
--- /dev/null
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2024-2026 Oracle. All Rights Reserved.
+# Author: Darrick J. Wong <djwong@kernel.org>
+
+[Unit]
+Description=xfs_healer background service slice
+Before=slices.target
+
+[Slice]
+
+# If the CPU usage cgroup controller is available, don't use more than 2 cores
+# for all background processes. One thread to read events, another to run
+# repairs.
+CPUQuota=200%
+CPUAccounting=true
+
+[Install]
+# As of systemd 249, the systemd cgroupv2 configuration code will drop resource
+# controllers from the root and system.slice cgroups at startup if it doesn't
+# find any direct dependencies that require a given controller. Newly
+# activated units with resource control directives are created under the system
+# slice but do not cause a reconfiguration of the slice's resource controllers.
+# Hence we cannot put CPUQuota= into the xfs_healer service units directly.
+#
+# For the CPUQuota directive to have any effect, we must therefore create an
+# explicit definition file for the slice that systemd creates to contain the
+# xfs_healer instance units (e.g. xfs_healer@.service) and we must configure
+# this slice as a dependency of the system slice to establish the direct
+# dependency relation.
+WantedBy=system.slice
LOPT_HELP,
LOPT_QUIET,
LOPT_REPAIR,
+ LOPT_SVCNAME,
LOPT_MAX,
};
[LOPT_HELP] = {"help", no_argument, NULL, 0 },
[LOPT_QUIET] = {"quiet", no_argument, &ctx.log, 0 },
[LOPT_REPAIR] = {"repair", no_argument, &ctx.want_repair, 1 },
+ [LOPT_SVCNAME] = {"svcname", no_argument, &ctx.print_svcname, 1 },
[LOPT_MAX] = {NULL, 0, NULL, 0 },
};
ctx.mntpoint = argv[optind];
+ if (ctx.print_svcname) {
+ char unitname[PATH_MAX];
+
+ ret = systemd_path_instance_unit_name(XFS_HEALER_SVCNAME,
+ ctx.mntpoint, unitname, sizeof(unitname));
+ if (ret) {
+ perror(ctx.mntpoint);
+ return EXIT_FAILURE;
+ }
+
+ printf("%s\n", unitname);
+ return EXIT_SUCCESS;
+ }
+
ret = setup_monitor(&ctx);
if (ret)
goto out_events;
--- /dev/null
+# SPDX-License-Identifier: GPL-2.0-or-later
+#
+# Copyright (c) 2024-2026 Oracle. All Rights Reserved.
+# Author: Darrick J. Wong <djwong@kernel.org>
+
+[Unit]
+Description=Self Healing of XFS Metadata for %f
+
+# Explicitly require the capabilities that this program needs
+ConditionCapability=CAP_SYS_ADMIN
+ConditionCapability=CAP_DAC_OVERRIDE
+
+# Must be a mountpoint
+ConditionPathIsMountPoint=%f
+RequiresMountsFor=%f
+
+[Service]
+Type=exec
+Environment=SERVICE_MODE=1
+ExecStart=@pkg_libexec_dir@/xfs_healer %f
+SyslogIdentifier=%N
+
+# Create the service underneath the healer background service slice so that we
+# can control resource usage.
+Slice=system-xfs_healer.slice
+
+# No realtime CPU scheduling
+RestrictRealtime=true
+
+# xfs_healer avoids pinning mounted filesystems by recording the file handle
+# for the provided mountpoint (%f) before opening the health monitor, after
+# which it closes the fd for the mountpoint. If repairs are needed, it will
+# reopen the mountpoint, resample the file handle, and proceed only if the
+# handles match. If the filesystem is unmounted, the daemon exits. If the
+# mountpoint moves, repairs will not be attempted against the wrong filesystem.
+#
+# Due to this resampling behavior, xfs_healer must see the same filesystem
+# mount tree inside the service container as outside, with the same ro/rw
+# state. BindPaths doesn't work on the paths that are made readonly by
+# ProtectSystem and ProtectHome, so it is not possible to set either option.
+# DynamicUser sets ProtectSystem, so that also cannot be used. We cannot use
+# BindPaths to bind the desired mountpoint somewhere under /tmp like xfs_scrub
+# does because that pins the mount.
+#
+# Regrettably, this leaves xfs_healer less hardened than xfs_scrub.
+# Surprisingly, this doesn't affect xfs_healer's score dramatically.
+DynamicUser=false
+ProtectSystem=false
+ProtectHome=no
+PrivateTmp=true
+PrivateDevices=true
+
+# Don't let healer complain about paths in /etc/projects that have been hidden
+# by our sandboxing. healer doesn't care about project ids anyway.
+InaccessiblePaths=-/etc/projects
+
+# No network access
+PrivateNetwork=true
+ProtectHostname=true
+RestrictAddressFamilies=none
+IPAddressDeny=any
+
+# Don't let the program mess with the kernel configuration at all
+ProtectKernelLogs=true
+ProtectKernelModules=true
+ProtectKernelTunables=true
+ProtectControlGroups=true
+ProtectProc=invisible
+RestrictNamespaces=true
+
+# Hide everything in /proc, even /proc/mounts
+ProcSubset=pid
+
+# Only allow the default personality Linux
+LockPersonality=true
+
+# No writable memory pages
+MemoryDenyWriteExecute=true
+
+# Don't let our mounts leak out to the host
+PrivateMounts=true
+
+# Restrict system calls to the native arch and only enough to get things going
+SystemCallArchitectures=native
+SystemCallFilter=@system-service
+SystemCallFilter=~@privileged
+SystemCallFilter=~@resources
+SystemCallFilter=~@mount
+
+# xfs_healer needs these privileges to open the rootdir and monitor
+CapabilityBoundingSet=CAP_SYS_ADMIN CAP_DAC_OVERRIDE
+AmbientCapabilities=CAP_SYS_ADMIN CAP_DAC_OVERRIDE
+NoNewPrivileges=true
+
+# xfs_healer doesn't create files
+UMask=7777
+
+# No access to hardware /dev files except for block devices
+ProtectClock=true
+DevicePolicy=closed
+
+[Install]
+WantedBy=multi-user.target
+# If someone tries to enable the template itself, translate that into enabling
+# this service on the root directory at systemd startup time. In the
+# initramfs, the udev rules in xfs_healer.rules run before systemd starts.
+DefaultInstance=-