}
int
-__krbd_flush(struct rbd_ctx *ctx)
+__krbd_flush(struct rbd_ctx *ctx, bool invalidate)
{
+ int ret;
+
if (o_direct)
return 0;
/*
- * fsync(2) on the block device does not sync the filesystem
- * mounted on top of it, but that's OK - we control the entire
- * lifetime of the block device and write directly to it.
+ * BLKFLSBUF will sync the filesystem on top of the device (we
+ * don't care about that here, since we write directly to it),
+ * write out any dirty buffers and invalidate the buffer cache.
+ * It won't do a hardware cache flush.
+ *
+ * fsync() will write out any dirty buffers and do a hardware
+ * cache flush (which we don't care about either, because for
+ * krbd it's a noop). It won't try to empty the buffer cache
+ * nor poke the filesystem before writing out.
+ *
+ * Given that, for our purposes, fsync is a flush, while
+ * BLKFLSBUF is a flush+invalidate.
*/
- if (fsync(ctx->krbd_fd) < 0) {
- int ret = -errno;
- prt("fsync failed\n");
+ if (invalidate)
+ ret = ioctl(ctx->krbd_fd, BLKFLSBUF, NULL);
+ else
+ ret = fsync(ctx->krbd_fd);
+ if (ret < 0) {
+ ret = -errno;
+ prt("%s failed\n", invalidate ? "BLKFLSBUF" : "fsync");
return ret;
}
int
krbd_flush(struct rbd_ctx *ctx)
{
- return __krbd_flush(ctx);
+ return __krbd_flush(ctx, false);
}
int
int ret;
/*
- * BLKDISCARD doesn't affect dirty pages. This means we can't
- * rely on discarded sectors to match good_buf (i.e. contain
- * zeros) without a preceding cache flush:
+ * BLKDISCARD goes straight to disk and doesn't do anything
+ * about dirty buffers. This means we need to flush so that
*
* write 0..3M
* discard 1..2M
*
- * results in "data data data" rather than "data 0000 data".
+ * results in "data 0000 data" rather than "data data data" on
+ * disk and invalidate so that
+ *
+ * discard 1..2M
+ * read 0..3M
+ *
+ * returns "data 0000 data" rather than "data data data" in
+ * case 1..2M was cached.
*/
- ret = __krbd_flush(ctx);
+ ret = __krbd_flush(ctx, true);
if (ret < 0)
return ret;
* which ends up calling invalidate_bdev(), which invalidates
* clean pages and does nothing about dirty pages beyond the
* new size. The preceding cache flush makes sure those pages
- * are invalidated, which is what we need on shrink:
+ * are invalidated, which is what we need on shrink so that
*
* write 0..1M
* resize 0
* resize 2M
- * write 1..2M
+ * read 0..2M
*
- * results in "data data" rather than "0000 data".
+ * returns "0000 0000" rather than "data 0000".
*/
- ret = __krbd_flush(ctx);
+ ret = __krbd_flush(ctx, false);
if (ret < 0)
return ret;
{
int ret;
- ret = __krbd_flush(ctx);
+ ret = __krbd_flush(ctx, false);
if (ret < 0)
return ret;
{
int ret;
- ret = __krbd_flush(ctx);
+ ret = __krbd_flush(ctx, false);
if (ret < 0)
return ret;