struct ptvar *verify_schedules;
struct read_verify_pool *rvp[XFS_DEV_RT + 1];
-
- struct bitmap *d_bad; /* bytes */
- struct bitmap *r_bad; /* bytes */
- struct bitmap *l_bad; /* bytes */
- bool d_trunc:1;
- bool r_trunc:1;
- bool l_trunc:1;
};
/* Return XFS device index from fsmap device. */
}
}
-/* Find the incore bad blocks bitmap for a given disk. */
-static struct bitmap *
-bitmap_for_disk(
- enum xfs_device dev,
- struct media_verify_state *vs)
-{
- switch (dev) {
- case XFS_DEV_DATA:
- return vs->d_bad;
- case XFS_DEV_RT:
- return vs->r_bad;
- case XFS_DEV_LOG:
- return vs->l_bad;
- default:
- return NULL;
- }
-}
-
struct disk_ioerr_report {
struct scrub_ctx *ctx;
enum xfs_device dev;
return 0;
}
+static inline enum xfs_device from_fsx(const struct fsxattr *fsx)
+{
+ if (fsx->fsx_xflags & FS_XFLAG_REALTIME)
+ return XFS_DEV_RT;
+ return XFS_DEV_DATA;
+}
+
/* Report if this extent overlaps a bad region. */
static int
report_data_loss(
{
struct badfile_report *br = arg;
struct media_verify_state *vs = br->vs;
- struct bitmap *bmp;
br->bmap = bmap;
if (bmap->bm_flags & (BMV_OF_PREALLOC | BMV_OF_DELALLOC))
return 0;
- if (fsx->fsx_xflags & FS_XFLAG_REALTIME)
- bmp = vs->r_bad;
- else
- bmp = vs->d_bad;
-
- return -bitmap_iterate_range(bmp, bmap->bm_physical, bmap->bm_length,
- report_badfile, br);
+ return read_verify_iterate_failed_range(vs->rvp[from_fsx(fsx)],
+ bmap->bm_physical, bmap->bm_length, report_badfile,
+ br);
}
/* Report if the extended attribute data overlaps a bad region. */
{
struct badfile_report *br = arg;
struct media_verify_state *vs = br->vs;
- struct bitmap *bmp = vs->d_bad;
/* Complain about attr fork extents that don't look right. */
if (bmap->bm_flags & (BMV_OF_PREALLOC | BMV_OF_DELALLOC)) {
return 0;
}
- if (bitmap_test(bmp, bmap->bm_physical, bmap->bm_length))
+ if (read_verify_has_failed(vs->rvp[XFS_DEV_DATA], bmap->bm_physical,
+ bmap->bm_length))
str_corrupt(ctx, br->descr,
_("media error in extended attribute data."));
&fr);
}
+static inline const char *trunc_msg(enum xfs_device dev)
+{
+ switch (dev) {
+ case XFS_DEV_DATA:
+ return _("data device truncated");
+ case XFS_DEV_LOG:
+ return _("log device truncated");
+ case XFS_DEV_RT:
+ return _("rt device truncated");
+ }
+ abort();
+}
+
/* Report all the media errors found on a disk. */
static int
report_disk_ioerrs(
struct media_verify_state *vs,
enum xfs_device dev)
{
- struct bitmap *tree = bitmap_for_disk(dev, vs);
struct disk_ioerr_report dioerr = {
.ctx = ctx,
.dev = dev,
};
- if (!tree)
+ if (!vs->rvp[dev])
return 0;
- return -bitmap_iterate(tree, report_ioerr, &dioerr);
+
+ if (read_verify_truncated(vs->rvp[dev]))
+ str_corrupt(ctx, ctx->mntpoint, trunc_msg(dev));
+
+ return read_verify_iterate_failed(vs->rvp[dev], report_ioerr, &dioerr);
}
/* Given bad extent lists for the data & rtdev, find bad files. */
{
int ret;
- if (vs->d_trunc)
- str_corrupt(ctx, ctx->mntpoint, _("data device truncated"));
- if (vs->l_trunc)
- str_corrupt(ctx, ctx->mntpoint, _("log device truncated"));
- if (vs->r_trunc)
- str_corrupt(ctx, ctx->mntpoint, _("rt device truncated"));
-
ret = report_disk_ioerrs(ctx, vs, XFS_DEV_DATA);
if (ret) {
str_liberror(ctx, ret, _("walking datadev io errors"));
clean_pool(
struct media_verify_state *vs,
enum xfs_device dev,
- unsigned long long *bytes_checked)
+ unsigned long long *bytes_checked,
+ bool *ok)
{
struct read_verify_pool *rvp = vs->rvp[dev];
- uint64_t pool_checked;
int ret;
if (!rvp)
ret = read_verify_pool_flush(rvp);
if (ret)
- goto out_destroy;
-
- ret = read_verify_bytes(rvp, &pool_checked);
- if (ret)
- goto out_destroy;
-
- *bytes_checked += pool_checked;
-out_destroy:
- read_verify_pool_destroy(rvp);
- return ret;
-}
-
-/* Remember a media error for later. */
-static void
-remember_ioerr(
- struct scrub_ctx *ctx,
- enum xfs_device dev,
- uint64_t start,
- uint64_t length,
- int error,
- void *arg)
-{
- struct media_verify_state *vs = arg;
- struct bitmap *tree;
- int ret;
-
- if (!length) {
- switch (dev) {
- case XFS_DEV_DATA:
- vs->d_trunc = true;
- break;
- case XFS_DEV_LOG:
- vs->l_trunc = true;
- break;
- case XFS_DEV_RT:
- vs->r_trunc = true;
- break;
- }
- return;
- }
-
- tree = bitmap_for_disk(dev, vs);
- if (!tree) {
- str_liberror(ctx, ENOENT, _("finding bad block bitmap"));
- return;
- }
+ return ret;
- ret = -bitmap_set(tree, start, length);
- if (ret)
- str_liberror(ctx, ret, _("setting bad block bitmap"));
+ *bytes_checked += read_verify_progress(rvp);
+ if (!read_verify_ok(rvp))
+ *ok = false;
+ return 0;
}
static inline int
struct media_verify_state *vs,
enum xfs_device dev)
{
- return read_verify_pool_alloc(ctx, dev, remember_ioerr, vs,
- &vs->rvp[dev]);
+ return read_verify_pool_alloc(ctx, dev, &vs->rvp[dev]);
}
static inline void
struct scrub_ctx *ctx)
{
struct media_verify_state vs = { NULL };
+ bool ok = true;
int ret, ret2, ret3;
- ret = -bitmap_alloc(&vs.d_bad);
- if (ret) {
- str_liberror(ctx, ret, _("creating datadev badblock bitmap"));
- return ret;
- }
-
- ret = -bitmap_alloc(&vs.r_bad);
- if (ret) {
- str_liberror(ctx, ret, _("creating realtime badblock bitmap"));
- goto out_dbad;
- }
-
- ret = -bitmap_alloc(&vs.l_bad);
- if (ret) {
- str_liberror(ctx, ret, _("creating log badblock bitmap"));
- goto out_rbad;
- }
-
ret = alloc_pool(ctx, &vs, XFS_DEV_DATA);
if (ret) {
str_liberror(ctx, ret, _("creating datadev media verifier"));
- goto out_lbad;
+ return ret;
}
if (ctx->fsinfo.fs_log) {
ret = alloc_pool(ctx, &vs, XFS_DEV_LOG);
ptvar_free(vs.verify_schedules);
vs.verify_schedules = NULL;
- ret = clean_pool(&vs, XFS_DEV_DATA, &ctx->bytes_checked);
+ ret = clean_pool(&vs, XFS_DEV_DATA, &ctx->bytes_checked, &ok);
if (ret)
str_liberror(ctx, ret, _("flushing datadev verify pool"));
- ret2 = clean_pool(&vs, XFS_DEV_LOG, &ctx->bytes_checked);
+ ret2 = clean_pool(&vs, XFS_DEV_LOG, &ctx->bytes_checked, &ok);
if (ret2)
str_liberror(ctx, ret2, _("flushing logdev verify pool"));
- ret3 = clean_pool(&vs, XFS_DEV_RT, &ctx->bytes_checked);
+ ret3 = clean_pool(&vs, XFS_DEV_RT, &ctx->bytes_checked, &ok);
if (ret3)
str_liberror(ctx, ret3, _("flushing rtdev verify pool"));
* If the verify flush didn't work or we found no bad blocks, we're
* done! No errors detected.
*/
- if (ret || ret2 || ret3) {
+ if (ret || ret2 || ret3 || ok) {
ret |= ret2 | ret3; /* caller only cares about non-zero/zero */
- goto out_lbad;
+ goto out_rtpool;
}
- if (bitmap_empty(vs.d_bad) && !vs.d_trunc &&
- bitmap_empty(vs.r_bad) && !vs.r_trunc &&
- bitmap_empty(vs.l_bad) && !vs.l_trunc)
- goto out_lbad;
/* Scan the whole dir tree to see what matches the bad extents. */
ret = report_all_media_errors(ctx, &vs);
-
- bitmap_free(&vs.l_bad);
- bitmap_free(&vs.r_bad);
- bitmap_free(&vs.d_bad);
- return ret;
+ goto out_rtpool;
out_schedules:
ptvar_free(vs.verify_schedules);
free_pool(&vs, XFS_DEV_LOG);
out_datapool:
free_pool(&vs, XFS_DEV_DATA);
-out_lbad:
- bitmap_free(&vs.l_bad);
-out_rbad:
- bitmap_free(&vs.r_bad);
-out_dbad:
- bitmap_free(&vs.d_bad);
return ret;
}
#include <sys/statvfs.h>
#include "libfrog/workqueue.h"
#include "libfrog/paths.h"
+#include "libfrog/bitmap.h"
#include "xfs_scrub.h"
#include "common.h"
#include "counter.h"
struct scrub_ctx *ctx; /* scrub context */
void *readbuf; /* read buffer */
struct ptcounter *verified_bytes;
- void *ioerr_arg;
- read_verify_ioerr_fn_t ioerr_fn; /* io error callback */
size_t miniosz; /* minimum io size, bytes */
enum xfs_device dev; /* which device? */
* return it to the caller.
*/
int runtime_error;
+
+ /* outputs: a bad block bitmap and a truncated flag */
+ struct bitmap *failmap;
+ bool truncated;
};
unsigned int
/*
* Create a thread pool to run read verifiers.
- *
- * @ioerr_fn will be called when IO errors occur.
*/
int
read_verify_pool_alloc(
struct scrub_ctx *ctx,
enum xfs_device dev,
- read_verify_ioerr_fn_t ioerr_fn,
- void *ioerr_arg,
struct read_verify_pool **prvp)
{
struct read_verify_pool *rvp;
rvp->miniosz = ctx->mnt.fsgeom.blocksize;
rvp->ctx = ctx;
rvp->dev = dev;
- rvp->ioerr_fn = ioerr_fn;
- rvp->ioerr_arg = ioerr_arg;
ret = -workqueue_create(&rvp->wq, (struct xfs_mount *)rvp,
verifier_threads == 1 ? 0 : verifier_threads);
if (ret)
{
if (!rvp->runtime_error)
rvp->runtime_error = ECANCELED;
- workqueue_terminate(&rvp->wq);
+ if (!rvp->wq.terminated)
+ workqueue_terminate(&rvp->wq);
}
/* Finish up any read verification work. */
struct read_verify_pool *rvp)
{
workqueue_destroy(&rvp->wq);
+ bitmap_free(&rvp->failmap);
ptcounter_free(rvp->verified_bytes);
free(rvp->readbuf);
free(rvp);
single_step);
}
+/* Remember a media error for later. */
+static int
+read_verify_error(
+ struct read_verify_pool *rvp,
+ uint64_t start,
+ uint64_t length,
+ int error)
+{
+ static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
+ int ret;
+
+ if (!length) {
+ rvp->truncated = true;
+ return 0;
+ }
+
+ if (!rvp->failmap) {
+ struct bitmap *failmap;
+
+ ret = -bitmap_alloc(&failmap);
+ if (ret) {
+ str_liberror(rvp->ctx, ret,
+ _("allocating bad block bitmap"));
+ return ret;
+ }
+
+ pthread_mutex_lock(&lock);
+ if (!rvp->failmap)
+ rvp->failmap = failmap;
+ else
+ bitmap_free(&failmap);
+ pthread_mutex_unlock(&lock);
+ }
+
+ ret = -bitmap_set(rvp->failmap, start, length);
+ if (ret) {
+ str_liberror(rvp->ctx, ret, _("setting bad block bitmap"));
+ return ret;
+ }
+
+ return 0;
+}
+
/*
* Issue a read-verify IO in big batches.
*/
ssize_t sz;
ssize_t len;
int read_error;
- int ret;
+ int ret = 0, ret2;
rvp = (struct read_verify_pool *)wq->wq_ctx;
if (rvp->runtime_error)
sz = rvp->miniosz - (rv->io_start % rvp->miniosz);
dbg_printf("IOERR %u @ %"PRIu64" %zu err %d\n",
rvp->dev, rv->io_start, sz, read_error);
- rvp->ioerr_fn(rvp->ctx, rvp->dev, rv->io_start, sz,
- read_error, rvp->ioerr_arg);
+ ret = read_verify_error(rvp, rv->io_start, sz,
+ read_error);
+ if (ret)
+ goto out_err;
} else if (sz == 0) {
/* No bytes at all? Did we hit the end of the disk? */
dbg_printf("EOF %u @ %"PRIu64" %zu err %d\n",
rvp->dev, rv->io_start, sz, read_error);
- rvp->ioerr_fn(rvp->ctx, rvp->dev, rv->io_start, sz,
- read_error, rvp->ioerr_arg);
+ ret = read_verify_error(rvp, rv->io_start, sz,
+ read_error);
+ if (ret)
+ goto out_err;
break;
} else if (sz < len) {
/*
background_sleep();
}
+out_err:
free(rv);
- ret = ptcounter_add(rvp->verified_bytes, verified);
+ ret2 = ptcounter_add(rvp->verified_bytes, verified);
+ if (!ret && ret2)
+ ret = ret2;
if (ret)
rvp->runtime_error = ret;
}
return false;
}
-/* How many bytes has this process verified? */
+/* Did read verification succeed? */
+bool
+read_verify_ok(
+ const struct read_verify_pool *rvp)
+{
+ return rvp->failmap == NULL && !rvp->truncated;
+}
+
+/* Did the verification unexpectedly stop early due to short reads? */
+bool
+read_verify_truncated(
+ const struct read_verify_pool *rvp)
+{
+ return rvp->truncated;
+}
+
+/* How many bytes has this pool verified? */
+uint64_t
+read_verify_progress(
+ const struct read_verify_pool *rvp)
+{
+ uint64_t ret = 0;
+
+ ptcounter_value(rvp->verified_bytes, &ret);
+ return ret;
+}
+
+/* Call @fn for every media failure this pool observed. */
int
-read_verify_bytes(
+read_verify_iterate_failed(
struct read_verify_pool *rvp,
- uint64_t *bytes_checked)
+ int (*fn)(uint64_t, uint64_t, void *),
+ void *arg)
{
- return ptcounter_value(rvp->verified_bytes, bytes_checked);
+ if (!rvp->failmap)
+ return 0;
+
+ return -bitmap_iterate(rvp->failmap, fn, arg);
+}
+
+/* Call @fn for every media failure this pool observed in the given range. */
+int
+read_verify_iterate_failed_range(
+ struct read_verify_pool *rvp,
+ uint64_t start,
+ uint64_t length,
+ int (*fn)(uint64_t, uint64_t, void *),
+ void *arg)
+{
+ if (!rvp->failmap)
+ return 0;
+
+ return -bitmap_iterate_range(rvp->failmap, start, length, fn, arg);
+}
+
+/* Were there any media failures within the given range? */
+bool
+read_verify_has_failed(
+ struct read_verify_pool *rvp,
+ uint64_t start,
+ uint64_t length)
+{
+ if (rvp->failmap)
+ return bitmap_test(rvp->failmap, start, length);
+ return false;
}