From: Darrick J. Wong Date: Sun, 22 Feb 2026 22:41:12 +0000 (-0800) Subject: libfrog: create healthmon event log library functions X-Git-Tag: v7.0.0~55 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=3774cbe059392e81f0d5b44ade49953ca9da9682;p=xfsprogs-dev.git libfrog: create healthmon event log library functions Add some helper functions to log health monitoring events so that xfs_io and xfs_healer can share logging code. Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig --- diff --git a/libfrog/Makefile b/libfrog/Makefile index 927bd8d0..bccd9289 100644 --- a/libfrog/Makefile +++ b/libfrog/Makefile @@ -19,11 +19,13 @@ bulkstat.c \ convert.c \ crc32.c \ file_exchange.c \ +flagmap.c \ fsgeom.c \ fsproperties.c \ fsprops.c \ getparents.c \ histogram.c \ +healthevent.c \ file_attr.c \ list_sort.c \ linux.c \ @@ -51,11 +53,13 @@ dahashselftest.h \ div64.h \ fakelibattr.h \ file_exchange.h \ +flagmap.h \ fsgeom.h \ fsproperties.h \ fsprops.h \ getparents.h \ handle_priv.h \ +healthevent.h \ histogram.h \ file_attr.h \ logging.h \ diff --git a/libfrog/flagmap.c b/libfrog/flagmap.c new file mode 100644 index 00000000..1a8bdd23 --- /dev/null +++ b/libfrog/flagmap.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2026 Oracle. All Rights Reserved. + * Author: Darrick J. Wong + */ +#include "xfs.h" + +#include "platform_defs.h" +#include "libfrog/flagmap.h" + +/* + * Given a mapping of bits to strings and a bitmask, format the bitmask as a + * list of strings and hexadecimal number representing bits not mapped to any + * string. The output will be truncated if buf is not large enough. + */ +void +mask_to_string( + const struct flag_map *map, + unsigned long long mask, + const char *delimiter, + char *buf, + size_t bufsize) +{ + const char *tag = ""; + unsigned long long seen = 0; + int w; + + for (; map->string; map++) { + seen |= map->flag; + + if (mask & map->flag) { + w = snprintf(buf, bufsize, "%s%s", tag, _(map->string)); + if (w >= bufsize) + return; + + buf += w; + bufsize -= w; + + tag = delimiter; + } + } + + if (mask & ~seen) + snprintf(buf, bufsize, "%s0x%llx", tag, mask & ~seen); +} + +/* + * Given a mapping of values to strings and a value, return the matching string + * or confusion. + */ +const char * +value_to_string( + const struct flag_map *map, + unsigned long long value) +{ + for (; map->string; map++) { + if (value == map->flag) + return _(map->string); + } + + return _("unknown value"); +} diff --git a/libfrog/flagmap.h b/libfrog/flagmap.h new file mode 100644 index 00000000..8031d75a --- /dev/null +++ b/libfrog/flagmap.h @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2025-2026 Oracle. All Rights Reserved. + * Author: Darrick J. Wong + */ +#ifndef LIBFROG_FLAGMAP_H_ +#define LIBFROG_FLAGMAP_H_ + +struct flag_map { + unsigned long long flag; + const char *string; +}; + +void mask_to_string(const struct flag_map *map, unsigned long long mask, + const char *delimiter, char *buf, size_t bufsize); + +const char *value_to_string(const struct flag_map *map, + unsigned long long value); + +#endif /* LIBFROG_FLAGMAP_H_ */ diff --git a/libfrog/healthevent.c b/libfrog/healthevent.c new file mode 100644 index 00000000..8520cb32 --- /dev/null +++ b/libfrog/healthevent.c @@ -0,0 +1,360 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2025-2026 Oracle. All Rights Reserved. + * Author: Darrick J. Wong + */ +#include "xfs.h" + +#include "platform_defs.h" +#include "libfrog/healthevent.h" +#include "libfrog/flagmap.h" + +/* + * The healthmon log string format is as follows: + * + * WHICH OBJECT: STATUS + * + * /mnt: 32 events lost + * /mnt agno 0x5 bnobt, rmapbt: sick + * /mnt rgno 0x5 bitmap: sick + * /mnt ino 13 gen 0x3 bmbtd: sick + * /mnt/a bmbtd: sick + * /mnt ino 13 gen 0x3 pos 4096 len 4096: directio_write failed + * /mnt/a pos 4096 len 4096: directio_read failed + * /mnt datadev daddr 0x13 bbcount 0x5: media error + * /mnt: filesystem shut down due to shenanigans, badness + */ + +static const struct flag_map device_domains[] = { + { XFS_HEALTH_MONITOR_DOMAIN_DATADEV, N_("datadev") }, + { XFS_HEALTH_MONITOR_DOMAIN_RTDEV, N_("rtdev") }, + { XFS_HEALTH_MONITOR_DOMAIN_LOGDEV, N_("logdev") }, + {0, NULL}, +}; + +static inline const char * +device_domain_string( + uint32_t domain) +{ + return value_to_string(device_domains, domain); +} + +static const struct flag_map fileio_types[] = { + { XFS_HEALTH_MONITOR_TYPE_BUFREAD, N_("buffered_read") }, + { XFS_HEALTH_MONITOR_TYPE_BUFWRITE, N_("buffered_write") }, + { XFS_HEALTH_MONITOR_TYPE_DIOREAD, N_("directio_read") }, + { XFS_HEALTH_MONITOR_TYPE_DIOWRITE, N_("directio_write") }, + { XFS_HEALTH_MONITOR_TYPE_DATALOST, N_("media") }, + {0, NULL}, +}; + +static inline const char * +fileio_type_string( + uint32_t type) +{ + return value_to_string(fileio_types, type); +} + +static const struct flag_map health_types[] = { + { XFS_HEALTH_MONITOR_TYPE_SICK, N_("sick") }, + { XFS_HEALTH_MONITOR_TYPE_CORRUPT, N_("corrupt") }, + { XFS_HEALTH_MONITOR_TYPE_HEALTHY, N_("healthy") }, + {0, NULL}, +}; + +static inline const char * +health_type_string( + uint32_t type) +{ + return value_to_string(health_types, type); +} + +/* Report that the kernel lost events. */ +static void +report_lost( + const struct hme_prefix *pfx, + const struct xfs_health_monitor_event *hme) +{ + printf("%s: %llu %s\n", pfx->mountpoint, + (unsigned long long)hme->e.lost.count, + _("events lost")); + fflush(stdout); +} + +/* Report that the monitor is running. */ +static void +report_running( + const struct hme_prefix *pfx, + const struct xfs_health_monitor_event *hme) +{ + printf("%s: %s\n", pfx->mountpoint, _("monitoring started")); + fflush(stdout); +} + +/* Report that the filesystem was unmounted. */ +static void +report_unmounted( + const struct hme_prefix *pfx, + const struct xfs_health_monitor_event *hme) +{ + printf("%s: %s\n", pfx->mountpoint, _("filesystem unmounted")); + fflush(stdout); +} + +static const struct flag_map shutdown_reasons[] = { + { XFS_HEALTH_SHUTDOWN_META_IO_ERROR, N_("metadata I/O error") }, + { XFS_HEALTH_SHUTDOWN_LOG_IO_ERROR, N_("log I/O error") }, + { XFS_HEALTH_SHUTDOWN_FORCE_UMOUNT, N_("forced unmount") }, + { XFS_HEALTH_SHUTDOWN_CORRUPT_INCORE, N_("in-memory state corruption") }, + { XFS_HEALTH_SHUTDOWN_CORRUPT_ONDISK, N_("ondisk metadata corruption") }, + { XFS_HEALTH_SHUTDOWN_DEVICE_REMOVED, N_("device removed") }, + {0, NULL}, +}; + +/* Report an abortive shutdown of the filesystem. */ +static void +report_shutdown( + const struct hme_prefix *pfx, + const struct xfs_health_monitor_event *hme) +{ + char buf[512]; + + mask_to_string(shutdown_reasons, hme->e.shutdown.reasons, ", ", buf, + sizeof(buf)); + + printf("%s: %s %s\n", pfx->mountpoint, + _("filesystem shut down due to"), buf); + fflush(stdout); +} + +static const struct flag_map inode_structs[] = { + { XFS_BS_SICK_INODE, N_("core") }, + { XFS_BS_SICK_BMBTD, N_("datafork") }, + { XFS_BS_SICK_BMBTA, N_("attrfork") }, + { XFS_BS_SICK_BMBTC, N_("cowfork") }, + { XFS_BS_SICK_DIR, N_("directory") }, + { XFS_BS_SICK_XATTR, N_("xattr") }, + { XFS_BS_SICK_SYMLINK, N_("symlink") }, + { XFS_BS_SICK_PARENT, N_("parent") }, + { XFS_BS_SICK_DIRTREE, N_("dirtree") }, + {0, NULL}, +}; + +/* Report inode metadata corruption */ +static void +report_inode( + const struct hme_prefix *pfx, + const struct xfs_health_monitor_event *hme) +{ + char buf[512]; + + mask_to_string(inode_structs, hme->e.inode.mask, ", ", buf, + sizeof(buf)); + + if (hme_prefix_has_path(pfx)) + printf("%s %s: %s\n", + pfx->path, + buf, + health_type_string(hme->type)); + else + printf("%s %s %llu %s 0x%x %s: %s\n", + pfx->mountpoint, + _("ino"), + (unsigned long long)hme->e.inode.ino, + _("gen"), + hme->e.inode.gen, + buf, + health_type_string(hme->type)); + fflush(stdout); +} + +static const struct flag_map ag_structs[] = { + { XFS_AG_GEOM_SICK_SB, N_("super") }, + { XFS_AG_GEOM_SICK_AGF, N_("agf") }, + { XFS_AG_GEOM_SICK_AGFL, N_("agfl") }, + { XFS_AG_GEOM_SICK_AGI, N_("agi") }, + { XFS_AG_GEOM_SICK_BNOBT, N_("bnobt") }, + { XFS_AG_GEOM_SICK_CNTBT, N_("cntbt") }, + { XFS_AG_GEOM_SICK_INOBT, N_("inobt") }, + { XFS_AG_GEOM_SICK_FINOBT, N_("finobt") }, + { XFS_AG_GEOM_SICK_RMAPBT, N_("rmapbt") }, + { XFS_AG_GEOM_SICK_REFCNTBT, N_("refcountbt") }, + { XFS_AG_GEOM_SICK_INODES, N_("inodes") }, + {0, NULL}, +}; + +/* Report AG metadata corruption */ +static void +report_ag( + const struct hme_prefix *pfx, + const struct xfs_health_monitor_event *hme) +{ + char buf[512]; + + mask_to_string(ag_structs, hme->e.group.mask, ", ", buf, + sizeof(buf)); + + printf("%s %s 0x%x %s: %s\n", + pfx->mountpoint, + _("agno"), + hme->e.group.gno, + buf, + health_type_string(hme->type)); + fflush(stdout); +} + +static const struct flag_map rtgroup_structs[] = { + { XFS_RTGROUP_GEOM_SICK_SUPER, N_("super") }, + { XFS_RTGROUP_GEOM_SICK_BITMAP, N_("bitmap") }, + { XFS_RTGROUP_GEOM_SICK_SUMMARY, N_("summary") }, + { XFS_RTGROUP_GEOM_SICK_RMAPBT, N_("rmapbt") }, + { XFS_RTGROUP_GEOM_SICK_REFCNTBT, N_("refcountbt") }, + {0, NULL}, +}; + +/* Report rtgroup metadata corruption */ +static void +report_rtgroup( + const struct hme_prefix *pfx, + const struct xfs_health_monitor_event *hme) +{ + char buf[512]; + + mask_to_string(rtgroup_structs, hme->e.group.mask, ", ", buf, + sizeof(buf)); + + printf("%s %s 0x%x %s: %s\n", + pfx->mountpoint, + _("rgno"), + hme->e.group.gno, + buf, health_type_string(hme->type)); + fflush(stdout); +} + +static const struct flag_map fs_structs[] = { + { XFS_FSOP_GEOM_SICK_COUNTERS, N_("fscounters") }, + { XFS_FSOP_GEOM_SICK_UQUOTA, N_("usrquota") }, + { XFS_FSOP_GEOM_SICK_GQUOTA, N_("grpquota") }, + { XFS_FSOP_GEOM_SICK_PQUOTA, N_("prjquota") }, + { XFS_FSOP_GEOM_SICK_RT_BITMAP, N_("bitmap") }, + { XFS_FSOP_GEOM_SICK_RT_SUMMARY, N_("summary") }, + { XFS_FSOP_GEOM_SICK_QUOTACHECK, N_("quotacheck") }, + { XFS_FSOP_GEOM_SICK_NLINKS, N_("nlinks") }, + { XFS_FSOP_GEOM_SICK_METADIR, N_("metadir") }, + { XFS_FSOP_GEOM_SICK_METAPATH, N_("metapath") }, + {0, NULL}, +}; + +/* Report fs-wide metadata corruption */ +static void +report_fs( + const struct hme_prefix *pfx, + const struct xfs_health_monitor_event *hme) +{ + char buf[512]; + + mask_to_string(fs_structs, hme->e.fs.mask, ", ", buf, sizeof(buf)); + + printf("%s %s: %s\n", + pfx->mountpoint, + buf, + health_type_string(hme->type)); + fflush(stdout); +} + +/* Report device media corruption */ +static void +report_device_error( + const struct hme_prefix *pfx, + const struct xfs_health_monitor_event *hme) +{ + printf("%s %s %s 0x%llx %s 0x%llx: %s\n", pfx->mountpoint, + device_domain_string(hme->domain), + _("daddr"), + (unsigned long long)hme->e.media.daddr, + _("bbcount"), + (unsigned long long)hme->e.media.bbcount, + _("media error")); + fflush(stdout); +} + +/* Report file range errors */ +static void +report_file_range( + const struct hme_prefix *pfx, + const struct xfs_health_monitor_event *hme) +{ + if (hme_prefix_has_path(pfx)) + printf("%s ", pfx->path); + else + printf("%s %s %llu %s 0x%x ", + pfx->mountpoint, + _("ino"), + (unsigned long long)hme->e.filerange.ino, + _("gen"), + hme->e.filerange.gen); + if (hme->type != XFS_HEALTH_MONITOR_TYPE_DATALOST && + hme->e.filerange.error) + printf("%s %llu %s %llu: %s: %s\n", + _("pos"), + (unsigned long long)hme->e.filerange.pos, + _("len"), + (unsigned long long)hme->e.filerange.len, + fileio_type_string(hme->type), + strerror(hme->e.filerange.error)); + else + printf("%s %llu %s %llu: %s %s\n", + _("pos"), + (unsigned long long)hme->e.filerange.pos, + _("len"), + (unsigned long long)hme->e.filerange.len, + fileio_type_string(hme->type), + _("failed")); + fflush(stdout); +} + +/* Log a health monitoring event to stdout. */ +void +hme_report_event( + const struct hme_prefix *pfx, + const struct xfs_health_monitor_event *hme) +{ + switch (hme->domain) { + case XFS_HEALTH_MONITOR_DOMAIN_MOUNT: + switch (hme->type) { + case XFS_HEALTH_MONITOR_TYPE_LOST: + report_lost(pfx, hme); + return; + case XFS_HEALTH_MONITOR_TYPE_RUNNING: + report_running(pfx, hme); + return; + case XFS_HEALTH_MONITOR_TYPE_UNMOUNT: + report_unmounted(pfx, hme); + return; + case XFS_HEALTH_MONITOR_TYPE_SHUTDOWN: + report_shutdown(pfx, hme); + return; + } + break; + case XFS_HEALTH_MONITOR_DOMAIN_INODE: + report_inode(pfx, hme); + break; + case XFS_HEALTH_MONITOR_DOMAIN_AG: + report_ag(pfx, hme); + break; + case XFS_HEALTH_MONITOR_DOMAIN_RTGROUP: + report_rtgroup(pfx, hme); + break; + case XFS_HEALTH_MONITOR_DOMAIN_FS: + report_fs(pfx, hme); + break; + case XFS_HEALTH_MONITOR_DOMAIN_DATADEV: + case XFS_HEALTH_MONITOR_DOMAIN_RTDEV: + case XFS_HEALTH_MONITOR_DOMAIN_LOGDEV: + report_device_error(pfx, hme); + break; + case XFS_HEALTH_MONITOR_DOMAIN_FILERANGE: + report_file_range(pfx, hme); + break; + } +} diff --git a/libfrog/healthevent.h b/libfrog/healthevent.h new file mode 100644 index 00000000..6de41bc7 --- /dev/null +++ b/libfrog/healthevent.h @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2025-2026 Oracle. All Rights Reserved. + * Author: Darrick J. Wong + */ +#ifndef LIBFROG_HEALTHEVENT_H_ +#define LIBFROG_HEALTHEVENT_H_ + +struct hme_prefix { + /* + * Format a complete file path into this buffer to prevent the logging + * code from printing the mountpoint and a file handle. Only works for + * file-related events. + */ + char path[MAXPATHLEN]; + + /* Set this to the mountpoint */ + const char *mountpoint; +}; + +static inline bool hme_prefix_has_path(const struct hme_prefix *pfx) +{ + return pfx->path[0] != 0; +} + +static inline void hme_prefix_clear_path(struct hme_prefix *pfx) +{ + pfx->path[0] = 0; +} + +static inline void +hme_prefix_init( + struct hme_prefix *pfx, + const char *mountpoint) +{ + pfx->mountpoint = mountpoint; + hme_prefix_clear_path(pfx); +} + +void hme_report_event(const struct hme_prefix *pfx, + const struct xfs_health_monitor_event *hme); + +#endif /* LIBFROG_HEALTHEVENT_H_ */