]> git-server-git.apps.pok.os.sepia.ceph.com Git - xfsprogs-dev.git/commitdiff
libfrog: create healthmon event log library functions
authorDarrick J. Wong <djwong@kernel.org>
Sun, 22 Feb 2026 22:41:12 +0000 (14:41 -0800)
committerDarrick J. Wong <djwong@kernel.org>
Thu, 9 Apr 2026 22:30:16 +0000 (15:30 -0700)
Add some helper functions to log health monitoring events so that xfs_io
and xfs_healer can share logging code.

Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
libfrog/Makefile
libfrog/flagmap.c [new file with mode: 0644]
libfrog/flagmap.h [new file with mode: 0644]
libfrog/healthevent.c [new file with mode: 0644]
libfrog/healthevent.h [new file with mode: 0644]

index 927bd8d0957fabf3bf566af12594809e36147c31..bccd9289e5dd799e00a428f344b2e0e82d663f3d 100644 (file)
@@ -19,11 +19,13 @@ bulkstat.c \
 convert.c \
 crc32.c \
 file_exchange.c \
+flagmap.c \
 fsgeom.c \
 fsproperties.c \
 fsprops.c \
 getparents.c \
 histogram.c \
+healthevent.c \
 file_attr.c \
 list_sort.c \
 linux.c \
@@ -51,11 +53,13 @@ dahashselftest.h \
 div64.h \
 fakelibattr.h \
 file_exchange.h \
+flagmap.h \
 fsgeom.h \
 fsproperties.h \
 fsprops.h \
 getparents.h \
 handle_priv.h \
+healthevent.h \
 histogram.h \
 file_attr.h \
 logging.h \
diff --git a/libfrog/flagmap.c b/libfrog/flagmap.c
new file mode 100644 (file)
index 0000000..1a8bdd2
--- /dev/null
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2026 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+
+#include "platform_defs.h"
+#include "libfrog/flagmap.h"
+
+/*
+ * Given a mapping of bits to strings and a bitmask, format the bitmask as a
+ * list of strings and hexadecimal number representing bits not mapped to any
+ * string.  The output will be truncated if buf is not large enough.
+ */
+void
+mask_to_string(
+       const struct flag_map   *map,
+       unsigned long long      mask,
+       const char              *delimiter,
+       char                    *buf,
+       size_t                  bufsize)
+{
+       const char              *tag = "";
+       unsigned long long      seen = 0;
+       int                     w;
+
+       for (; map->string; map++) {
+               seen |= map->flag;
+
+               if (mask & map->flag) {
+                       w = snprintf(buf, bufsize, "%s%s", tag, _(map->string));
+                       if (w >= bufsize)
+                               return;
+
+                       buf += w;
+                       bufsize -= w;
+
+                       tag = delimiter;
+               }
+       }
+
+       if (mask & ~seen)
+               snprintf(buf, bufsize, "%s0x%llx", tag, mask & ~seen);
+}
+
+/*
+ * Given a mapping of values to strings and a value, return the matching string
+ * or confusion.
+ */
+const char *
+value_to_string(
+       const struct flag_map   *map,
+       unsigned long long      value)
+{
+       for (; map->string; map++) {
+               if (value == map->flag)
+                       return _(map->string);
+       }
+
+       return _("unknown value");
+}
diff --git a/libfrog/flagmap.h b/libfrog/flagmap.h
new file mode 100644 (file)
index 0000000..8031d75
--- /dev/null
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2025-2026 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#ifndef LIBFROG_FLAGMAP_H_
+#define LIBFROG_FLAGMAP_H_
+
+struct flag_map {
+       unsigned long long      flag;
+       const char              *string;
+};
+
+void mask_to_string(const struct flag_map *map, unsigned long long mask,
+               const char *delimiter, char *buf, size_t bufsize);
+
+const char *value_to_string(const struct flag_map *map,
+               unsigned long long value);
+
+#endif /* LIBFROG_FLAGMAP_H_ */
diff --git a/libfrog/healthevent.c b/libfrog/healthevent.c
new file mode 100644 (file)
index 0000000..8520cb3
--- /dev/null
@@ -0,0 +1,360 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2025-2026 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+
+#include "platform_defs.h"
+#include "libfrog/healthevent.h"
+#include "libfrog/flagmap.h"
+
+/*
+ * The healthmon log string format is as follows:
+ *
+ * WHICH OBJECT: STATUS
+ *
+ * /mnt: 32 events lost
+ * /mnt agno 0x5 bnobt, rmapbt: sick
+ * /mnt rgno 0x5 bitmap: sick
+ * /mnt ino 13 gen 0x3 bmbtd: sick
+ * /mnt/a bmbtd: sick
+ * /mnt ino 13 gen 0x3 pos 4096 len 4096: directio_write failed
+ * /mnt/a pos 4096 len 4096: directio_read failed
+ * /mnt datadev daddr 0x13 bbcount 0x5: media error
+ * /mnt: filesystem shut down due to shenanigans, badness
+ */
+
+static const struct flag_map device_domains[] = {
+       { XFS_HEALTH_MONITOR_DOMAIN_DATADEV,    N_("datadev") },
+       { XFS_HEALTH_MONITOR_DOMAIN_RTDEV,      N_("rtdev") },
+       { XFS_HEALTH_MONITOR_DOMAIN_LOGDEV,     N_("logdev") },
+       {0, NULL},
+};
+
+static inline const char *
+device_domain_string(
+       uint32_t                domain)
+{
+       return value_to_string(device_domains, domain);
+}
+
+static const struct flag_map fileio_types[] = {
+       { XFS_HEALTH_MONITOR_TYPE_BUFREAD,      N_("buffered_read") },
+       { XFS_HEALTH_MONITOR_TYPE_BUFWRITE,     N_("buffered_write") },
+       { XFS_HEALTH_MONITOR_TYPE_DIOREAD,      N_("directio_read") },
+       { XFS_HEALTH_MONITOR_TYPE_DIOWRITE,     N_("directio_write") },
+       { XFS_HEALTH_MONITOR_TYPE_DATALOST,     N_("media") },
+       {0, NULL},
+};
+
+static inline const char *
+fileio_type_string(
+       uint32_t                type)
+{
+       return value_to_string(fileio_types, type);
+}
+
+static const struct flag_map health_types[] = {
+       { XFS_HEALTH_MONITOR_TYPE_SICK,         N_("sick") },
+       { XFS_HEALTH_MONITOR_TYPE_CORRUPT,      N_("corrupt") },
+       { XFS_HEALTH_MONITOR_TYPE_HEALTHY,      N_("healthy") },
+       {0, NULL},
+};
+
+static inline const char *
+health_type_string(
+       uint32_t                type)
+{
+       return value_to_string(health_types, type);
+}
+
+/* Report that the kernel lost events. */
+static void
+report_lost(
+       const struct hme_prefix                 *pfx,
+       const struct xfs_health_monitor_event   *hme)
+{
+       printf("%s: %llu %s\n", pfx->mountpoint,
+                       (unsigned long long)hme->e.lost.count,
+                       _("events lost"));
+       fflush(stdout);
+}
+
+/* Report that the monitor is running. */
+static void
+report_running(
+       const struct hme_prefix                 *pfx,
+       const struct xfs_health_monitor_event   *hme)
+{
+       printf("%s: %s\n", pfx->mountpoint, _("monitoring started"));
+       fflush(stdout);
+}
+
+/* Report that the filesystem was unmounted. */
+static void
+report_unmounted(
+       const struct hme_prefix                 *pfx,
+       const struct xfs_health_monitor_event   *hme)
+{
+       printf("%s: %s\n", pfx->mountpoint, _("filesystem unmounted"));
+       fflush(stdout);
+}
+
+static const struct flag_map shutdown_reasons[] = {
+       { XFS_HEALTH_SHUTDOWN_META_IO_ERROR,    N_("metadata I/O error") },
+       { XFS_HEALTH_SHUTDOWN_LOG_IO_ERROR,     N_("log I/O error") },
+       { XFS_HEALTH_SHUTDOWN_FORCE_UMOUNT,     N_("forced unmount") },
+       { XFS_HEALTH_SHUTDOWN_CORRUPT_INCORE,   N_("in-memory state corruption") },
+       { XFS_HEALTH_SHUTDOWN_CORRUPT_ONDISK,   N_("ondisk metadata corruption") },
+       { XFS_HEALTH_SHUTDOWN_DEVICE_REMOVED,   N_("device removed") },
+       {0, NULL},
+};
+
+/* Report an abortive shutdown of the filesystem. */
+static void
+report_shutdown(
+       const struct hme_prefix                 *pfx,
+       const struct xfs_health_monitor_event   *hme)
+{
+       char                                    buf[512];
+
+       mask_to_string(shutdown_reasons, hme->e.shutdown.reasons, ", ", buf,
+                       sizeof(buf));
+
+       printf("%s: %s %s\n", pfx->mountpoint,
+                       _("filesystem shut down due to"), buf);
+       fflush(stdout);
+}
+
+static const struct flag_map inode_structs[] = {
+       { XFS_BS_SICK_INODE,    N_("core") },
+       { XFS_BS_SICK_BMBTD,    N_("datafork") },
+       { XFS_BS_SICK_BMBTA,    N_("attrfork") },
+       { XFS_BS_SICK_BMBTC,    N_("cowfork") },
+       { XFS_BS_SICK_DIR,      N_("directory") },
+       { XFS_BS_SICK_XATTR,    N_("xattr") },
+       { XFS_BS_SICK_SYMLINK,  N_("symlink") },
+       { XFS_BS_SICK_PARENT,   N_("parent") },
+       { XFS_BS_SICK_DIRTREE,  N_("dirtree") },
+       {0, NULL},
+};
+
+/* Report inode metadata corruption */
+static void
+report_inode(
+       const struct hme_prefix                 *pfx,
+       const struct xfs_health_monitor_event   *hme)
+{
+       char                                    buf[512];
+
+       mask_to_string(inode_structs, hme->e.inode.mask, ", ", buf,
+                       sizeof(buf));
+
+       if (hme_prefix_has_path(pfx))
+               printf("%s %s: %s\n",
+                               pfx->path,
+                               buf,
+                               health_type_string(hme->type));
+       else
+               printf("%s %s %llu %s 0x%x %s: %s\n",
+                               pfx->mountpoint,
+                               _("ino"),
+                               (unsigned long long)hme->e.inode.ino,
+                               _("gen"),
+                               hme->e.inode.gen,
+                               buf,
+                               health_type_string(hme->type));
+       fflush(stdout);
+}
+
+static const struct flag_map ag_structs[] = {
+       { XFS_AG_GEOM_SICK_SB,          N_("super") },
+       { XFS_AG_GEOM_SICK_AGF,         N_("agf") },
+       { XFS_AG_GEOM_SICK_AGFL,        N_("agfl") },
+       { XFS_AG_GEOM_SICK_AGI,         N_("agi") },
+       { XFS_AG_GEOM_SICK_BNOBT,       N_("bnobt") },
+       { XFS_AG_GEOM_SICK_CNTBT,       N_("cntbt") },
+       { XFS_AG_GEOM_SICK_INOBT,       N_("inobt") },
+       { XFS_AG_GEOM_SICK_FINOBT,      N_("finobt") },
+       { XFS_AG_GEOM_SICK_RMAPBT,      N_("rmapbt") },
+       { XFS_AG_GEOM_SICK_REFCNTBT,    N_("refcountbt") },
+       { XFS_AG_GEOM_SICK_INODES,      N_("inodes") },
+       {0, NULL},
+};
+
+/* Report AG metadata corruption */
+static void
+report_ag(
+       const struct hme_prefix                 *pfx,
+       const struct xfs_health_monitor_event   *hme)
+{
+       char                                    buf[512];
+
+       mask_to_string(ag_structs, hme->e.group.mask, ", ", buf,
+                       sizeof(buf));
+
+       printf("%s %s 0x%x %s: %s\n",
+                       pfx->mountpoint,
+                       _("agno"),
+                       hme->e.group.gno,
+                       buf,
+                       health_type_string(hme->type));
+       fflush(stdout);
+}
+
+static const struct flag_map rtgroup_structs[] = {
+       { XFS_RTGROUP_GEOM_SICK_SUPER,          N_("super") },
+       { XFS_RTGROUP_GEOM_SICK_BITMAP,         N_("bitmap") },
+       { XFS_RTGROUP_GEOM_SICK_SUMMARY,        N_("summary") },
+       { XFS_RTGROUP_GEOM_SICK_RMAPBT,         N_("rmapbt") },
+       { XFS_RTGROUP_GEOM_SICK_REFCNTBT,       N_("refcountbt") },
+       {0, NULL},
+};
+
+/* Report rtgroup metadata corruption */
+static void
+report_rtgroup(
+       const struct hme_prefix                 *pfx,
+       const struct xfs_health_monitor_event   *hme)
+{
+       char                                    buf[512];
+
+       mask_to_string(rtgroup_structs, hme->e.group.mask, ", ", buf,
+                       sizeof(buf));
+
+       printf("%s %s 0x%x %s: %s\n",
+                       pfx->mountpoint,
+                       _("rgno"),
+                       hme->e.group.gno,
+                       buf, health_type_string(hme->type));
+       fflush(stdout);
+}
+
+static const struct flag_map fs_structs[] = {
+       { XFS_FSOP_GEOM_SICK_COUNTERS,          N_("fscounters") },
+       { XFS_FSOP_GEOM_SICK_UQUOTA,            N_("usrquota") },
+       { XFS_FSOP_GEOM_SICK_GQUOTA,            N_("grpquota") },
+       { XFS_FSOP_GEOM_SICK_PQUOTA,            N_("prjquota") },
+       { XFS_FSOP_GEOM_SICK_RT_BITMAP,         N_("bitmap") },
+       { XFS_FSOP_GEOM_SICK_RT_SUMMARY,        N_("summary") },
+       { XFS_FSOP_GEOM_SICK_QUOTACHECK,        N_("quotacheck") },
+       { XFS_FSOP_GEOM_SICK_NLINKS,            N_("nlinks") },
+       { XFS_FSOP_GEOM_SICK_METADIR,           N_("metadir") },
+       { XFS_FSOP_GEOM_SICK_METAPATH,          N_("metapath") },
+       {0, NULL},
+};
+
+/* Report fs-wide metadata corruption */
+static void
+report_fs(
+       const struct hme_prefix                 *pfx,
+       const struct xfs_health_monitor_event   *hme)
+{
+       char                                    buf[512];
+
+       mask_to_string(fs_structs, hme->e.fs.mask, ", ", buf, sizeof(buf));
+
+       printf("%s %s: %s\n",
+                       pfx->mountpoint,
+                       buf,
+                       health_type_string(hme->type));
+       fflush(stdout);
+}
+
+/* Report device media corruption */
+static void
+report_device_error(
+       const struct hme_prefix                 *pfx,
+       const struct xfs_health_monitor_event   *hme)
+{
+       printf("%s %s %s 0x%llx %s 0x%llx: %s\n", pfx->mountpoint,
+                       device_domain_string(hme->domain),
+                       _("daddr"),
+                       (unsigned long long)hme->e.media.daddr,
+                       _("bbcount"),
+                       (unsigned long long)hme->e.media.bbcount,
+                       _("media error"));
+       fflush(stdout);
+}
+
+/* Report file range errors */
+static void
+report_file_range(
+       const struct hme_prefix                 *pfx,
+       const struct xfs_health_monitor_event   *hme)
+{
+       if (hme_prefix_has_path(pfx))
+               printf("%s ", pfx->path);
+       else
+               printf("%s %s %llu %s 0x%x ",
+                               pfx->mountpoint,
+                               _("ino"),
+                               (unsigned long long)hme->e.filerange.ino,
+                               _("gen"),
+                               hme->e.filerange.gen);
+       if (hme->type != XFS_HEALTH_MONITOR_TYPE_DATALOST &&
+           hme->e.filerange.error)
+               printf("%s %llu %s %llu: %s: %s\n",
+                               _("pos"),
+                               (unsigned long long)hme->e.filerange.pos,
+                               _("len"),
+                               (unsigned long long)hme->e.filerange.len,
+                               fileio_type_string(hme->type),
+                               strerror(hme->e.filerange.error));
+       else
+               printf("%s %llu %s %llu: %s %s\n",
+                               _("pos"),
+                               (unsigned long long)hme->e.filerange.pos,
+                               _("len"),
+                               (unsigned long long)hme->e.filerange.len,
+                               fileio_type_string(hme->type),
+                               _("failed"));
+       fflush(stdout);
+}
+
+/* Log a health monitoring event to stdout. */
+void
+hme_report_event(
+       const struct hme_prefix                 *pfx,
+       const struct xfs_health_monitor_event   *hme)
+{
+       switch (hme->domain) {
+       case XFS_HEALTH_MONITOR_DOMAIN_MOUNT:
+               switch (hme->type) {
+               case XFS_HEALTH_MONITOR_TYPE_LOST:
+                       report_lost(pfx, hme);
+                       return;
+               case XFS_HEALTH_MONITOR_TYPE_RUNNING:
+                       report_running(pfx, hme);
+                       return;
+               case XFS_HEALTH_MONITOR_TYPE_UNMOUNT:
+                       report_unmounted(pfx, hme);
+                       return;
+               case XFS_HEALTH_MONITOR_TYPE_SHUTDOWN:
+                       report_shutdown(pfx, hme);
+                       return;
+               }
+               break;
+       case XFS_HEALTH_MONITOR_DOMAIN_INODE:
+               report_inode(pfx, hme);
+               break;
+       case XFS_HEALTH_MONITOR_DOMAIN_AG:
+               report_ag(pfx, hme);
+               break;
+       case XFS_HEALTH_MONITOR_DOMAIN_RTGROUP:
+               report_rtgroup(pfx, hme);
+               break;
+       case XFS_HEALTH_MONITOR_DOMAIN_FS:
+               report_fs(pfx, hme);
+               break;
+       case XFS_HEALTH_MONITOR_DOMAIN_DATADEV:
+       case XFS_HEALTH_MONITOR_DOMAIN_RTDEV:
+       case XFS_HEALTH_MONITOR_DOMAIN_LOGDEV:
+               report_device_error(pfx, hme);
+               break;
+       case XFS_HEALTH_MONITOR_DOMAIN_FILERANGE:
+               report_file_range(pfx, hme);
+               break;
+       }
+}
diff --git a/libfrog/healthevent.h b/libfrog/healthevent.h
new file mode 100644 (file)
index 0000000..6de41bc
--- /dev/null
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2025-2026 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#ifndef LIBFROG_HEALTHEVENT_H_
+#define LIBFROG_HEALTHEVENT_H_
+
+struct hme_prefix {
+       /*
+        * Format a complete file path into this buffer to prevent the logging
+        * code from printing the mountpoint and a file handle.  Only works for
+        * file-related events.
+        */
+       char            path[MAXPATHLEN];
+
+       /* Set this to the mountpoint */
+       const char      *mountpoint;
+};
+
+static inline bool hme_prefix_has_path(const struct hme_prefix *pfx)
+{
+       return pfx->path[0] != 0;
+}
+
+static inline void hme_prefix_clear_path(struct hme_prefix *pfx)
+{
+       pfx->path[0] = 0;
+}
+
+static inline void
+hme_prefix_init(
+       struct hme_prefix       *pfx,
+       const char              *mountpoint)
+{
+       pfx->mountpoint = mountpoint;
+       hme_prefix_clear_path(pfx);
+}
+
+void hme_report_event(const struct hme_prefix *pfx,
+               const struct xfs_health_monitor_event *hme);
+
+#endif /* LIBFROG_HEALTHEVENT_H_ */