common: disable infinite IO error retry for EIO shutdown tests

author Darrick J. Wong <djwong@kernel.org>

Wed, 3 Aug 2022 04:22:35 +0000 (21:22 -0700)

committer Zorro Lang <zlang@kernel.org>

Sun, 4 Sep 2022 13:44:05 +0000 (21:44 +0800)
author Darrick J. Wong <djwong@kernel.org>
Wed, 3 Aug 2022 04:22:35 +0000 (21:22 -0700)
committer Zorro Lang <zlang@kernel.org>
Sun, 4 Sep 2022 13:44:05 +0000 (21:44 +0800)
diff --git a/common/dmerror b/common/dmerror

index 0934d220343b22c3b9af3e2a4c065543217a6e89..54122b12ea5a24c8171beda51ebbdf7b3245f8d4 100644 (file)
--- a/common/dmerror
+++ b/common/dmerror
@@ -138,6 +138,10 @@ _dmerror_load_error_table()
                 suspend_opt="$*"
         fi
  
+       # If the full environment is set up, configure ourselves for shutdown
+       type _prepare_for_eio_shutdown &>/dev/null && \
+               _prepare_for_eio_shutdown $DMERROR_DEV
+
         # Suspend the scratch device before the log and realtime devices so
         # that the kernel can freeze and flush the filesystem if the caller
         # wanted a freeze.
diff --git a/common/fail_make_request b/common/fail_make_request

index 9f8ea500704940201cbde1e3ee47edaa56c44d92..b5370ba68f0ccd4afe6d4e6994e777bd87a39674 100644 (file)
--- a/common/fail_make_request
+++ b/common/fail_make_request
@@ -44,6 +44,7 @@ _start_fail_scratch_dev()
  {
      echo "Force SCRATCH_DEV device failure"
  
+    _prepare_for_eio_shutdown $SCRATCH_DEV
      _bdev_fail_make_request $SCRATCH_DEV 1
      [ "$USE_EXTERNAL" = yes -a ! -z "$SCRATCH_LOGDEV" ] && \
          _bdev_fail_make_request $SCRATCH_LOGDEV 1
diff --git a/common/rc b/common/rc

index c4cd773ed1b9821cfd5c5766f86b2937b833f24e..277678850aa7f8ae6137998da8848e7e44eef8fc 100644 (file)
--- a/common/rc
+++ b/common/rc
@@ -4212,6 +4212,20 @@ _check_dmesg()
         fi
  }
  
+# Make whatever configuration changes we need ahead of testing fs shutdowns due
+# to unexpected IO errors while updating metadata.  The sole parameter should
+# be the fs device, e.g.  $SCRATCH_DEV.
+_prepare_for_eio_shutdown()
+{
+       local dev="$1"
+
+       case "$FSTYP" in
+       "xfs")
+               _xfs_prepare_for_eio_shutdown "$dev"
+               ;;
+       esac
+}
+
  # capture the kmemleak report
  _capture_kmemleak()
  {
@@ -4474,7 +4488,7 @@ run_fsx()
  #
  # Usage example:
  #   _require_fs_sysfs error/fail_at_unmount
-_require_fs_sysfs()
+_has_fs_sysfs()
  {
         local attr=$1
         local dname
@@ -4490,9 +4504,18 @@ _require_fs_sysfs()
                 _fail "Usage: _require_fs_sysfs <sysfs_attr_path>"
         fi
  
-       if [ ! -e /sys/fs/${FSTYP}/${dname}/${attr} ];then
-               _notrun "This test requires /sys/fs/${FSTYP}/${dname}/${attr}"
-       fi
+       test -e /sys/fs/${FSTYP}/${dname}/${attr}
+}
+
+# Require the existence of a sysfs entry at /sys/fs/$FSTYP/DEV/$ATTR
+_require_fs_sysfs()
+{
+       _has_fs_sysfs "$@" && return
+
+       local attr=$1
+       local dname=$(_short_dev $TEST_DEV)
+
+       _notrun "This test requires /sys/fs/${FSTYP}/${dname}/${attr}"
  }
  
  _require_statx()
diff --git a/common/xfs b/common/xfs

index 92c281c64c112601034208ec8e7542d0c677e4fd..65234c8baeb6dc91334d2d58e83de94783c24a9d 100644 (file)
--- a/common/xfs
+++ b/common/xfs
@@ -823,6 +823,35 @@ _scratch_xfs_unmount_dirty()
         _scratch_unmount
  }
  
+# Prepare a mounted filesystem for an IO error shutdown test by disabling retry
+# for metadata writes.  This prevents a (rare) log livelock when:
+#
+# - The log has given out all available grant space, preventing any new
+#   writers from tripping over IO errors (and shutting down the fs/log),
+# - All log buffers were written to disk, and
+# - The log tail is pinned because the AIL keeps hitting EIO trying to write
+#   committed changes back into the filesystem.
+#
+# Real users might want the default behavior of the AIL retrying writes forever
+# but for testing purposes we don't want to wait.
+#
+# The sole parameter should be the filesystem data device, e.g. $SCRATCH_DEV.
+_xfs_prepare_for_eio_shutdown()
+{
+       local dev="$1"
+       local ctlfile="error/fail_at_unmount"
+
+       # Don't retry any writes during the (presumably) post-shutdown unmount
+       _has_fs_sysfs "$ctlfile" && _set_fs_sysfs_attr $dev "$ctlfile" 1
+
+       # Disable retry of metadata writes that fail with EIO
+       for ctl in max_retries retry_timeout_seconds; do
+               ctlfile="error/metadata/EIO/$ctl"
+
+               _has_fs_sysfs "$ctlfile" && _set_fs_sysfs_attr $dev "$ctlfile" 0
+       done
+}
+
  # Skip if we are running an older binary without the stricter input checks.
  # Make multiple checks to be sure that there is no regression on the one
  # selected feature check, which would skew the result.
author	Darrick J. Wong <djwong@kernel.org>
	Wed, 3 Aug 2022 04:22:35 +0000 (21:22 -0700)
committer	Zorro Lang <zlang@kernel.org>
	Sun, 4 Sep 2022 13:44:05 +0000 (21:44 +0800)
common/dmerror		patch \| blob \| history
common/fail_make_request		patch \| blob \| history
common/rc		patch \| blob \| history
common/xfs		patch \| blob \| history