From: Darrick J. Wong Date: Sun, 22 Feb 2026 22:41:13 +0000 (-0800) Subject: xfs_io: monitor filesystem health events X-Git-Tag: v7.0.0~49 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=eba72901c88ca51aeeeb162ab5679fbd02c6a0ba;p=xfsprogs-dev.git xfs_io: monitor filesystem health events Create a subcommand to monitor for health events generated by the kernel. Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig --- diff --git a/io/Makefile b/io/Makefile index 444e2d6a..8e378335 100644 --- a/io/Makefile +++ b/io/Makefile @@ -25,6 +25,7 @@ CFILES = \ fsuuid.c \ fsync.c \ getrusage.c \ + healthmon.c \ imap.c \ init.c \ inject.c \ diff --git a/io/healthmon.c b/io/healthmon.c new file mode 100644 index 00000000..77042054 --- /dev/null +++ b/io/healthmon.c @@ -0,0 +1,191 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2024-2026 Oracle. All Rights Reserved. + * Author: Darrick J. Wong + */ +#include "libxfs.h" +#include "libfrog/fsgeom.h" +#include "libfrog/paths.h" +#include "libfrog/healthevent.h" +#include "command.h" +#include "init.h" +#include "io.h" + +static void +healthmon_help(void) +{ + printf(_( +"Monitor filesystem health events" +"\n" +"-c Replace the open file with the monitor file.\n" +"-d delay_ms Sleep this many milliseconds between reads.\n" +"-p Only probe for the existence of the ioctl.\n" +"-v Request all events.\n" +"\n")); +} + +static inline int +monitor_sleep( + int delay_ms) +{ + struct timespec ts; + + if (!delay_ms) + return 0; + + ts.tv_sec = delay_ms / 1000; + ts.tv_nsec = (delay_ms % 1000) * 1000000; + + return nanosleep(&ts, NULL); +} + +static int +monitor( + size_t bufsize, + bool consume, + int delay_ms, + bool verbose, + bool only_probe) +{ + struct xfs_health_monitor hmo = { + .format = XFS_HEALTH_MONITOR_FMT_V0, + }; + struct hme_prefix pfx; + void *buf; + ssize_t bytes_read; + int mon_fd; + int ret = 1; + + hme_prefix_init(&pfx, file->name); + + if (verbose) + hmo.flags |= XFS_HEALTH_MONITOR_ALL; + + mon_fd = ioctl(file->fd, XFS_IOC_HEALTH_MONITOR, &hmo); + if (mon_fd < 0) { + perror("XFS_IOC_HEALTH_MONITOR"); + return 1; + } + + if (only_probe) { + ret = 0; + goto out_mon; + } + + buf = malloc(bufsize); + if (!buf) { + perror("malloc"); + goto out_mon; + } + + if (consume) { + close(file->fd); + file->fd = mon_fd; + } + + monitor_sleep(delay_ms); + while ((bytes_read = read(mon_fd, buf, bufsize)) > 0) { + struct xfs_health_monitor_event *hme = buf; + + while (bytes_read >= sizeof(*hme)) { + hme_report_event(&pfx, hme); + hme++; + bytes_read -= sizeof(*hme); + } + if (bytes_read > 0) { + printf("healthmon: %zu bytes remain?\n", bytes_read); + fflush(stdout); + } + + monitor_sleep(delay_ms); + } + if (bytes_read < 0) { + perror("healthmon"); + goto out_buf; + } + + ret = 0; + +out_buf: + free(buf); +out_mon: + if (!consume) + close(mon_fd); + return ret; +} + +static int +healthmon_f( + int argc, + char **argv) +{ + size_t bufsize = 4096; + bool consume = false; + bool verbose = false; + bool only_probe = false; + int delay_ms = 0; + int c; + + while ((c = getopt(argc, argv, "b:cd:pv")) != EOF) { + switch (c) { + case 'b': + const size_t minsz = + sizeof(struct xfs_health_monitor_event); + + errno = 0; + c = atoi(optarg); + if (c < minsz || errno) { + printf( + "%s: bufsize must be at least one event %zd\n", + optarg, minsz); + exitcode = 1; + return 0; + } + bufsize = c; + break; + case 'c': + consume = true; + break; + case 'd': + errno = 0; + delay_ms = atoi(optarg); + if (delay_ms < 0 || errno) { + printf("%s: delay must be positive msecs\n", + optarg); + exitcode = 1; + return 0; + } + break; + case 'p': + only_probe = true; + break; + case 'v': + verbose = true; + break; + default: + exitcode = 1; + healthmon_help(); + return 0; + } + } + + return monitor(bufsize, consume, delay_ms, verbose, only_probe); +} + +static struct cmdinfo healthmon_cmd = { + .name = "healthmon", + .cfunc = healthmon_f, + .argmin = 0, + .argmax = -1, + .flags = CMD_FLAG_ONESHOT | CMD_NOMAP_OK, + .args = "[-c] [-d delay_ms] [-v]", + .help = healthmon_help, +}; + +void +healthmon_init(void) +{ + healthmon_cmd.oneline = _("monitor filesystem health events"); + + add_command(&healthmon_cmd); +} diff --git a/io/init.c b/io/init.c index 49e9e7cb..cb5573f4 100644 --- a/io/init.c +++ b/io/init.c @@ -92,6 +92,7 @@ init_commands(void) crc32cselftest_init(); exchangerange_init(); fsprops_init(); + healthmon_init(); } /* diff --git a/io/io.h b/io/io.h index 35fb8339..2f5262bc 100644 --- a/io/io.h +++ b/io/io.h @@ -162,3 +162,4 @@ extern void bulkstat_init(void); void exchangerange_init(void); void fsprops_init(void); void aginfo_init(void); +void healthmon_init(void); diff --git a/man/man8/xfs_io.8 b/man/man8/xfs_io.8 index 0a673322..f7f2956a 100644 --- a/man/man8/xfs_io.8 +++ b/man/man8/xfs_io.8 @@ -1356,6 +1356,31 @@ Only available in expert mode and requires privileges. .B thaw Undo the effects of a filesystem freeze operation. Only available in expert mode and requires privileges. +.TP +.BI "healthmon [ \-c " bufsize " ] [ \-c ] [ \-d " delay_ms " ] [ \-p ] [ \-v ]" +Watch for filesystem health events and write them to the console. +.RE +.RS 1.0i +.PD 0 +.TP +.BI "\-b " bufsize +Use a buffer of this size to read events from the kernel. +.TP +.BI \-c +Close the open file and replace it with the monitor file. +.TP +.BI "\-d " delay_ms +Sleep for this long between read attempts. +.TP +.B \-p +Probe for the existence of the functionality by opening the monitoring fd and +closing it immediately. +.TP +.BI \-v +Request all health events, even if nothing changed. +.PD +.RE + .TP .BI "inject [ " tag " ]" Inject errors into a filesystem to observe filesystem behavior at