2017-10-20 05:24:03

by Ross Zwisler

[permalink] [raw]
Subject: [PATCH 1/2] dm log writes: Add support for inline data buffers

Currently dm-log-writes supports writing filesystem data via BIOs, and
writing internal metadata from a flat buffer via write_metadata().

For DAX writes, though, we won't have a BIO, but will instead have an
iterator that we'll want to use to fill a flat data buffer.

So, create write_inline_data() which allows us to write filesystem data
using a flat buffer as a source, and wire it up in log_one_block().

Signed-off-by: Ross Zwisler <ross.zwisler-VuQAYsv1563Yd54FQh9/[email protected]>
---
drivers/md/dm-log-writes.c | 90 +++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 86 insertions(+), 4 deletions(-)

diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index 8b80a9c..c65f9d1 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -246,27 +246,109 @@ static int write_metadata(struct log_writes_c *lc, void *entry,
return -1;
}

+static int write_inline_data(struct log_writes_c *lc, void *entry,
+ size_t entrylen, void *data, size_t datalen,
+ sector_t sector)
+{
+ int num_pages, bio_pages, pg_datalen, pg_sectorlen, i;
+ struct page *page;
+ struct bio *bio;
+ size_t ret;
+ void *ptr;
+
+ while (datalen) {
+ num_pages = ALIGN(datalen, PAGE_SIZE) >> PAGE_SHIFT;
+ bio_pages = min(num_pages, BIO_MAX_PAGES);
+
+ atomic_inc(&lc->io_blocks);
+
+ bio = bio_alloc(GFP_KERNEL, bio_pages);
+ if (!bio) {
+ DMERR("Couldn't alloc inline data bio");
+ goto error;
+ }
+
+ bio->bi_iter.bi_size = 0;
+ bio->bi_iter.bi_sector = sector;
+ bio_set_dev(bio, lc->logdev->bdev);
+ bio->bi_end_io = log_end_io;
+ bio->bi_private = lc;
+ bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+
+ for (i = 0; i < bio_pages; i++) {
+ pg_datalen = min(datalen, PAGE_SIZE);
+ pg_sectorlen = ALIGN(pg_datalen, lc->sectorsize);
+
+ page = alloc_page(GFP_KERNEL);
+ if (!page) {
+ DMERR("Couldn't alloc inline data page");
+ goto error_bio;
+ }
+
+ ptr = kmap_atomic(page);
+ memcpy(ptr, data, pg_datalen);
+ if (pg_sectorlen > pg_datalen)
+ memset(ptr + pg_datalen, 0,
+ pg_sectorlen - pg_datalen);
+ kunmap_atomic(ptr);
+
+ ret = bio_add_page(bio, page, pg_sectorlen, 0);
+ if (ret != pg_sectorlen) {
+ DMERR("Couldn't add page of inline data");
+ __free_page(page);
+ goto error_bio;
+ }
+
+ datalen -= pg_datalen;
+ data += pg_datalen;
+ }
+ submit_bio(bio);
+
+ sector += bio_pages * PAGE_SECTORS;
+ }
+ return 0;
+error_bio:
+ bio_free_pages(bio);
+ bio_put(bio);
+error:
+ put_io_block(lc);
+ return -1;
+}
+
static int log_one_block(struct log_writes_c *lc,
struct pending_block *block, sector_t sector)
{
struct bio *bio;
struct log_write_entry entry;
- size_t ret;
+ size_t metadlen, ret;
int i;

entry.sector = cpu_to_le64(block->sector);
entry.nr_sectors = cpu_to_le64(block->nr_sectors);
entry.flags = cpu_to_le64(block->flags);
entry.data_len = cpu_to_le64(block->datalen);
- if (write_metadata(lc, &entry, sizeof(entry), block->data,
- block->datalen, sector)) {
+
+ metadlen = (block->flags & LOG_MARK_FLAG) ? block->datalen : 0;
+ if (write_metadata(lc, &entry, sizeof(entry), block->data, metadlen,
+ sector)) {
free_pending_block(lc, block);
return -1;
}

+ sector += dev_to_bio_sectors(lc, 1);
+
+ if (block->datalen && metadlen == 0) {
+ if (write_inline_data(lc, &entry, sizeof(entry), block->data,
+ block->datalen, sector)) {
+ free_pending_block(lc, block);
+ return -1;
+ }
+ /* we don't support both inline data & bio data */
+ goto out;
+ }
+
if (!block->vec_cnt)
goto out;
- sector += dev_to_bio_sectors(lc, 1);

atomic_inc(&lc->io_blocks);
bio = bio_alloc(GFP_KERNEL, min(block->vec_cnt, BIO_MAX_PAGES));
--
2.9.5


2017-10-20 05:24:04

by Ross Zwisler

[permalink] [raw]
Subject: [PATCH 2/2] dm log writes: add support for DAX

Now that we have the ability log filesystem writes using a flat buffer, add
support for DAX. Unfortunately we can't easily track data that has been
written via mmap() now that the dax_flush() abstraction was removed by this
commit:

commit c3ca015fab6d ("dax: remove the pmem_dax_ops->flush abstraction")

Otherwise we could just treat each flush as a big write, and store the data
that is being synced to media. It may be worthwhile to add the dax_flush()
entry point back, just as a notifier so we can do this logging.

The motivation for this support is the need for an xfstest that can test
the new MAP_SYNC DAX flag. By logging the filesystem activity with
dm-log-writes we can show that the MAP_SYNC page faults are writing out
their metadata as they happen, instead of requiring an explicit
msync/fsync.

Signed-off-by: Ross Zwisler <[email protected]>
---

Here's a link to Jan's latest MAP_SYNC set, which can be used for the
fstest:

https://www.spinics.net/lists/linux-xfs/msg11852.html

MAP_SYNC is not needed for basic DAX+dm-log-writes functionality.

---
drivers/md/dm-log-writes.c | 90 +++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 89 insertions(+), 1 deletion(-)

diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index c65f9d1..6a8d352 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -10,9 +10,11 @@
#include <linux/init.h>
#include <linux/blkdev.h>
#include <linux/bio.h>
+#include <linux/dax.h>
#include <linux/slab.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
+#include <linux/uio.h>

#define DM_MSG_PREFIX "log-writes"

@@ -609,6 +611,50 @@ static int log_mark(struct log_writes_c *lc, char *data)
return 0;
}

+static int log_dax(struct log_writes_c *lc, sector_t sector, size_t bytes,
+ struct iov_iter *i)
+{
+ struct pending_block *block;
+
+ if (!bytes)
+ return 0;
+
+ block = kzalloc(sizeof(struct pending_block), GFP_KERNEL);
+ if (!block) {
+ DMERR("Error allocating dax pending block");
+ return -ENOMEM;
+ }
+
+ block->data = kzalloc(bytes, GFP_KERNEL);
+ if (!block->data) {
+ DMERR("Error allocating dax data space");
+ kfree(block);
+ return -ENOMEM;
+ }
+
+ /* write data provided via the iterator */
+ if (!copy_from_iter(block->data, bytes, i)) {
+ DMERR("Error copying dax data");
+ kfree(block->data);
+ kfree(block);
+ return -EIO;
+ }
+
+ /* rewind the iterator so that the block driver can use it */
+ iov_iter_revert(i, bytes);
+
+ block->datalen = bytes;
+ block->sector = bio_to_dev_sectors(lc, sector);
+ block->nr_sectors = ALIGN(bytes, lc->sectorsize) >> lc->sectorshift;
+
+ atomic_inc(&lc->pending_blocks);
+ spin_lock_irq(&lc->blocks_lock);
+ list_add_tail(&block->list, &lc->unflushed_blocks);
+ spin_unlock_irq(&lc->blocks_lock);
+ wake_up_process(lc->log_kthread);
+ return 0;
+}
+
static void log_writes_dtr(struct dm_target *ti)
{
struct log_writes_c *lc = ti->private;
@@ -874,9 +920,49 @@ static void log_writes_io_hints(struct dm_target *ti, struct queue_limits *limit
limits->io_min = limits->physical_block_size;
}

+static long log_writes_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
+ long nr_pages, void **kaddr, pfn_t *pfn)
+{
+ struct log_writes_c *lc = ti->private;
+ struct block_device *bdev = lc->dev->bdev;
+ struct dax_device *dax_dev = lc->dev->dax_dev;
+ sector_t sector = pgoff * PAGE_SECTORS;
+ int ret;
+
+ ret = bdev_dax_pgoff(bdev, sector, nr_pages * PAGE_SIZE, &pgoff);
+ if (ret)
+ return ret;
+ return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn);
+}
+
+static size_t log_writes_dax_copy_from_iter(struct dm_target *ti,
+ pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i)
+{
+ struct log_writes_c *lc = ti->private;
+ struct block_device *bdev = lc->dev->bdev;
+ struct dax_device *dax_dev = lc->dev->dax_dev;
+ sector_t sector = pgoff * PAGE_SECTORS;
+ int err;
+
+ if (bdev_dax_pgoff(bdev, sector, ALIGN(bytes, PAGE_SIZE), &pgoff))
+ return 0;
+
+ /* Don't bother doing anything if logging has been disabled */
+ if (!lc->logging_enabled)
+ goto dax_copy;
+
+ err = log_dax(lc, sector, bytes, i);
+ if (err) {
+ DMWARN("Error %d logging DAX write", err);
+ return 0;
+ }
+dax_copy:
+ return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i);
+}
+
static struct target_type log_writes_target = {
.name = "log-writes",
- .version = {1, 0, 0},
+ .version = {1, 0, 1},
.module = THIS_MODULE,
.ctr = log_writes_ctr,
.dtr = log_writes_dtr,
@@ -887,6 +973,8 @@ static struct target_type log_writes_target = {
.message = log_writes_message,
.iterate_devices = log_writes_iterate_devices,
.io_hints = log_writes_io_hints,
+ .direct_access = log_writes_dax_direct_access,
+ .dax_copy_from_iter = log_writes_dax_copy_from_iter,
};

static int __init dm_log_writes_init(void)
--
2.9.5


2017-10-20 05:29:43

by Ross Zwisler

[permalink] [raw]
Subject: [fstests PATCH] generic: add test for DAX MAP_SYNC support

Add a test that exercises DAX's new MAP_SYNC flag.

This test creates a file and writes to it via an mmap(), but never syncs
via fsync/msync. This process is tracked via dm-log-writes, then replayed.

If MAP_SYNC is working the dm-log-writes replay will show the test file
with the same size that we wrote via the mmap() because each allocating
page fault included an implicit metadata sync. If MAP_SYNC isn't working
(which you can test by fiddling with the parameters to mmap()) the file
will be smaller or missing entirely.

Note that dm-log-writes doesn't track the data that we write via the
mmap(), so we can't do any data integrity checking. We can only verify
that the metadata writes for the page faults happened.

Signed-off-by: Ross Zwisler <[email protected]>
---

For this test to run successfully you'll need both Jan's MAP_SYNC series:

https://www.spinics.net/lists/linux-xfs/msg11852.html

and my series adding DAX support to dm-log-writes:

https://lists.01.org/pipermail/linux-nvdimm/2017-October/012972.html

---
.gitignore | 1 +
common/dmlogwrites | 1 -
src/Makefile | 3 +-
src/t_map_sync.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++
tests/generic/466 | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++
tests/generic/466.out | 3 ++
tests/generic/group | 1 +
7 files changed, 158 insertions(+), 2 deletions(-)
create mode 100644 src/t_map_sync.c
create mode 100755 tests/generic/466
create mode 100644 tests/generic/466.out

diff --git a/.gitignore b/.gitignore
index 2014c08..9fc0695 100644
--- a/.gitignore
+++ b/.gitignore
@@ -119,6 +119,7 @@
/src/t_getcwd
/src/t_holes
/src/t_immutable
+/src/t_map_sync
/src/t_mmap_cow_race
/src/t_mmap_dio
/src/t_mmap_fallocate
diff --git a/common/dmlogwrites b/common/dmlogwrites
index 247c744..5b57df9 100644
--- a/common/dmlogwrites
+++ b/common/dmlogwrites
@@ -23,7 +23,6 @@ _require_log_writes()
[ -z "$LOGWRITES_DEV" -o ! -b "$LOGWRITES_DEV" ] && \
_notrun "This test requires a valid \$LOGWRITES_DEV"

- _exclude_scratch_mount_option dax
_require_dm_target log-writes
_require_test_program "log-writes/replay-log"
}
diff --git a/src/Makefile b/src/Makefile
index 3eb25b1..af7e7e9 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -13,7 +13,8 @@ TARGETS = dirstress fill fill2 getpagesize holes lstat64 \
multi_open_unlink dmiperf unwritten_sync genhashnames t_holes \
t_mmap_writev t_truncate_cmtime dirhash_collide t_rename_overwrite \
holetest t_truncate_self t_mmap_dio af_unix t_mmap_stale_pmd \
- t_mmap_cow_race t_mmap_fallocate fsync-err t_mmap_write_ro
+ t_mmap_cow_race t_mmap_fallocate fsync-err t_mmap_write_ro \
+ t_map_sync

LINUX_TARGETS = xfsctl bstat t_mtab getdevicesize preallo_rw_pattern_reader \
preallo_rw_pattern_writer ftrunc trunc fs_perms testx looptest \
diff --git a/src/t_map_sync.c b/src/t_map_sync.c
new file mode 100644
index 0000000..8190f3c
--- /dev/null
+++ b/src/t_map_sync.c
@@ -0,0 +1,74 @@
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#define MiB(a) ((a)*1024*1024)
+
+/*
+ * These two defines were added to the kernel via commits entitled
+ * "mm: Define MAP_SYNC and VM_SYNC flags" and
+ * "mm: introduce MAP_SHARED_VALIDATE, a mechanism to safely define new mmap
+ * flags", respectively.
+ */
+#define MAP_SYNC 0x80000
+#define MAP_SHARED_VALIDATE 0x3
+
+void err_exit(char *op)
+{
+ fprintf(stderr, "%s: %s\n", op, strerror(errno));
+ exit(1);
+}
+
+int main(int argc, char *argv[])
+{
+ int page_size = getpagesize();
+ int len = MiB(1);
+ int i, fd, err;
+ char *data;
+
+ if (argc < 2) {
+ printf("Usage: %s <file>\n", basename(argv[0]));
+ exit(0);
+ }
+
+ fd = open(argv[1], O_RDWR|O_CREAT, S_IRUSR|S_IWUSR);
+ if (fd < 0)
+ err_exit("fd");
+
+ ftruncate(fd, 0);
+ ftruncate(fd, len);
+
+ data = mmap(NULL, len, PROT_READ|PROT_WRITE,
+ MAP_SHARED_VALIDATE|MAP_SYNC, fd, 0);
+ if (data == MAP_FAILED)
+ err_exit("mmap");
+
+ /*
+ * We intentionally don't sync 'fd' manually. If MAP_SYNC is working
+ * these allocating page faults will cause the filesystem to sync its
+ * metadata so that when we replay the dm-log-writes log the test file
+ * will be 1 MiB in size.
+ *
+ * dm-log-writes doesn't track the data that we write via the mmap(),
+ * so we can't check that, we can only verify that the metadata writes
+ * happened.
+ */
+ for (i = 0; i < len; i+=page_size)
+ data[i] = 0xff;
+
+ err = munmap(data, len);
+ if (err < 0)
+ err_exit("munmap");
+
+ err = close(fd);
+ if (err < 0)
+ err_exit("close");
+
+ return 0;
+}
diff --git a/tests/generic/466 b/tests/generic/466
new file mode 100755
index 0000000..f39e6dc
--- /dev/null
+++ b/tests/generic/466
@@ -0,0 +1,77 @@
+#! /bin/bash
+# FS QA Test No. 466
+#
+# Use md_log_writes to verify that MAP_SYNC actually syncs metadata during
+# page faults.
+#
+#-----------------------------------------------------------------------
+# Copyright (c) 2017 Intel Corporation. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+#-----------------------------------------------------------------------
+#
+
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo "QA output created by $seq"
+
+here=`pwd`
+status=1 # failure is the default!
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+_cleanup()
+{
+ _log_writes_cleanup
+}
+
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/filter
+. ./common/dmlogwrites
+
+# remove previous $seqres.full before test
+rm -f $seqres.full
+
+# real QA test starts here
+_supported_fs generic
+_supported_os Linux
+_require_log_writes
+_require_scratch_dax
+_require_test_program "t_map_sync"
+
+_log_writes_init
+_log_writes_mkfs >> $seqres.full 2>&1
+_log_writes_mount -o dax
+
+src/t_map_sync $SCRATCH_MNT/test
+
+# Unmount the scratch dir and tear down the log writes target
+_log_writes_mark last
+_log_writes_unmount
+_log_writes_remove
+_check_scratch_fs
+
+# check pre umount
+_log_writes_replay_log last
+_scratch_mount
+
+# We should see $SCRATCH_MNT/test as 1MiB in size
+du -sh $SCRATCH_MNT/test | _filter_scratch | _filter_spaces
+
+_scratch_unmount
+_check_scratch_fs
+
+echo "Silence is golden"
+status=0
+exit
diff --git a/tests/generic/466.out b/tests/generic/466.out
new file mode 100644
index 0000000..4c54d7a
--- /dev/null
+++ b/tests/generic/466.out
@@ -0,0 +1,3 @@
+QA output created by 466
+1.0M SCRATCH_MNT/test
+Silence is golden
diff --git a/tests/generic/group b/tests/generic/group
index fbe0a7f..65328c6 100644
--- a/tests/generic/group
+++ b/tests/generic/group
@@ -468,3 +468,4 @@
463 auto quick clone dangerous
464 auto rw
465 auto rw quick aio
+466 auto quick dax
--
2.9.5

2017-10-20 06:51:23

by Amir Goldstein

[permalink] [raw]
Subject: Re: [fstests PATCH] generic: add test for DAX MAP_SYNC support

On Fri, Oct 20, 2017 at 8:29 AM, Ross Zwisler
<[email protected]> wrote:
> Add a test that exercises DAX's new MAP_SYNC flag.
>
> This test creates a file and writes to it via an mmap(), but never syncs
> via fsync/msync. This process is tracked via dm-log-writes, then replayed.
>
> If MAP_SYNC is working the dm-log-writes replay will show the test file
> with the same size that we wrote via the mmap() because each allocating
> page fault included an implicit metadata sync. If MAP_SYNC isn't working
> (which you can test by fiddling with the parameters to mmap()) the file
> will be smaller or missing entirely.
>
> Note that dm-log-writes doesn't track the data that we write via the
> mmap(), so we can't do any data integrity checking. We can only verify
> that the metadata writes for the page faults happened.
>
> Signed-off-by: Ross Zwisler <[email protected]>

Looks good. some nit picking...

> ---
>
> For this test to run successfully you'll need both Jan's MAP_SYNC series:
>
> https://www.spinics.net/lists/linux-xfs/msg11852.html
>
> and my series adding DAX support to dm-log-writes:
>
> https://lists.01.org/pipermail/linux-nvdimm/2017-October/012972.html
>
> ---
> .gitignore | 1 +
> common/dmlogwrites | 1 -
> src/Makefile | 3 +-
> src/t_map_sync.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++
> tests/generic/466 | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++
> tests/generic/466.out | 3 ++
> tests/generic/group | 1 +
> 7 files changed, 158 insertions(+), 2 deletions(-)
> create mode 100644 src/t_map_sync.c
> create mode 100755 tests/generic/466
> create mode 100644 tests/generic/466.out
>
> diff --git a/.gitignore b/.gitignore
> index 2014c08..9fc0695 100644
> --- a/.gitignore
> +++ b/.gitignore
> @@ -119,6 +119,7 @@
> /src/t_getcwd
> /src/t_holes
> /src/t_immutable
> +/src/t_map_sync
> /src/t_mmap_cow_race
> /src/t_mmap_dio
> /src/t_mmap_fallocate
> diff --git a/common/dmlogwrites b/common/dmlogwrites
> index 247c744..5b57df9 100644
> --- a/common/dmlogwrites
> +++ b/common/dmlogwrites
> @@ -23,7 +23,6 @@ _require_log_writes()
> [ -z "$LOGWRITES_DEV" -o ! -b "$LOGWRITES_DEV" ] && \
> _notrun "This test requires a valid \$LOGWRITES_DEV"
>
> - _exclude_scratch_mount_option dax
> _require_dm_target log-writes
> _require_test_program "log-writes/replay-log"
> }
> diff --git a/src/Makefile b/src/Makefile
> index 3eb25b1..af7e7e9 100644
> --- a/src/Makefile
> +++ b/src/Makefile
> @@ -13,7 +13,8 @@ TARGETS = dirstress fill fill2 getpagesize holes lstat64 \
> multi_open_unlink dmiperf unwritten_sync genhashnames t_holes \
> t_mmap_writev t_truncate_cmtime dirhash_collide t_rename_overwrite \
> holetest t_truncate_self t_mmap_dio af_unix t_mmap_stale_pmd \
> - t_mmap_cow_race t_mmap_fallocate fsync-err t_mmap_write_ro
> + t_mmap_cow_race t_mmap_fallocate fsync-err t_mmap_write_ro \
> + t_map_sync
>
> LINUX_TARGETS = xfsctl bstat t_mtab getdevicesize preallo_rw_pattern_reader \
> preallo_rw_pattern_writer ftrunc trunc fs_perms testx looptest \
> diff --git a/src/t_map_sync.c b/src/t_map_sync.c
> new file mode 100644
> index 0000000..8190f3c
> --- /dev/null
> +++ b/src/t_map_sync.c
> @@ -0,0 +1,74 @@
> +#include <errno.h>
> +#include <fcntl.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <sys/mman.h>
> +#include <sys/stat.h>
> +#include <sys/types.h>
> +#include <unistd.h>
> +
> +#define MiB(a) ((a)*1024*1024)
> +
> +/*
> + * These two defines were added to the kernel via commits entitled
> + * "mm: Define MAP_SYNC and VM_SYNC flags" and
> + * "mm: introduce MAP_SHARED_VALIDATE, a mechanism to safely define new mmap
> + * flags", respectively.

#ifndef?

> + */
> +#define MAP_SYNC 0x80000
> +#define MAP_SHARED_VALIDATE 0x3
> +
> +void err_exit(char *op)
> +{
> + fprintf(stderr, "%s: %s\n", op, strerror(errno));
> + exit(1);
> +}
> +
> +int main(int argc, char *argv[])
> +{
> + int page_size = getpagesize();
> + int len = MiB(1);
> + int i, fd, err;
> + char *data;
> +
> + if (argc < 2) {
> + printf("Usage: %s <file>\n", basename(argv[0]));
> + exit(0);
> + }
> +
> + fd = open(argv[1], O_RDWR|O_CREAT, S_IRUSR|S_IWUSR);
> + if (fd < 0)
> + err_exit("fd");
> +
> + ftruncate(fd, 0);

O_TRUNC?

> + ftruncate(fd, len);
> +
> + data = mmap(NULL, len, PROT_READ|PROT_WRITE,
> + MAP_SHARED_VALIDATE|MAP_SYNC, fd, 0);
> + if (data == MAP_FAILED)
> + err_exit("mmap");
> +
> + /*
> + * We intentionally don't sync 'fd' manually. If MAP_SYNC is working
> + * these allocating page faults will cause the filesystem to sync its
> + * metadata so that when we replay the dm-log-writes log the test file
> + * will be 1 MiB in size.
> + *
> + * dm-log-writes doesn't track the data that we write via the mmap(),
> + * so we can't check that, we can only verify that the metadata writes
> + * happened.
> + */
> + for (i = 0; i < len; i+=page_size)
> + data[i] = 0xff;
> +

Ideally, you would write the mark now (see mark_log() in fsx.c)
Otherwise, what you are testing is not that page faults allocate disk blocks,
but rather that page fault + munmap + close + process exit
allocate disk blocks.
I realize munmap/close are not supposed to sync dirty pages, but better
eliminate this noise from the test.


> + err = munmap(data, len);
> + if (err < 0)
> + err_exit("munmap");
> +
> + err = close(fd);
> + if (err < 0)
> + err_exit("close");
> +
> + return 0;
> +}
> diff --git a/tests/generic/466 b/tests/generic/466
> new file mode 100755
> index 0000000..f39e6dc
> --- /dev/null
> +++ b/tests/generic/466
> @@ -0,0 +1,77 @@
> +#! /bin/bash
> +# FS QA Test No. 466
> +#
> +# Use md_log_writes to verify that MAP_SYNC actually syncs metadata during
> +# page faults.
> +#
> +#-----------------------------------------------------------------------
> +# Copyright (c) 2017 Intel Corporation. All Rights Reserved.
> +#
> +# This program is free software; you can redistribute it and/or
> +# modify it under the terms of the GNU General Public License as
> +# published by the Free Software Foundation.
> +#
> +# This program is distributed in the hope that it would be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License
> +# along with this program; if not, write the Free Software Foundation,
> +# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
> +#-----------------------------------------------------------------------
> +#
> +
> +seq=`basename $0`
> +seqres=$RESULT_DIR/$seq
> +echo "QA output created by $seq"
> +
> +here=`pwd`
> +status=1 # failure is the default!
> +trap "_cleanup; exit \$status" 0 1 2 3 15
> +
> +_cleanup()
> +{
> + _log_writes_cleanup
> +}
> +
> +# get standard environment, filters and checks
> +. ./common/rc
> +. ./common/filter
> +. ./common/dmlogwrites
> +
> +# remove previous $seqres.full before test
> +rm -f $seqres.full
> +
> +# real QA test starts here
> +_supported_fs generic
> +_supported_os Linux
> +_require_log_writes
> +_require_scratch_dax
> +_require_test_program "t_map_sync"
> +
> +_log_writes_init
> +_log_writes_mkfs >> $seqres.full 2>&1
> +_log_writes_mount -o dax
> +
> +src/t_map_sync $SCRATCH_MNT/test
> +
> +# Unmount the scratch dir and tear down the log writes target
> +_log_writes_mark last

Feel free to use the lousy name I chose for the pre_unmount mark ;-)
but if you follow my suggestion to move the mark into t_map_sync
better call it pre_munmap or something.

> +_log_writes_unmount
> +_log_writes_remove
> +_check_scratch_fs
> +
> +# check pre umount
> +_log_writes_replay_log last
> +_scratch_mount
> +
> +# We should see $SCRATCH_MNT/test as 1MiB in size

nit: 1MiB in disk usage. it's 1MiB in size to begin with

> +du -sh $SCRATCH_MNT/test | _filter_scratch | _filter_spaces
> +
> +_scratch_unmount
> +_check_scratch_fs
> +
> +echo "Silence is golden"
> +status=0
> +exit
> diff --git a/tests/generic/466.out b/tests/generic/466.out
> new file mode 100644
> index 0000000..4c54d7a
> --- /dev/null
> +++ b/tests/generic/466.out
> @@ -0,0 +1,3 @@
> +QA output created by 466
> +1.0M SCRATCH_MNT/test
> +Silence is golden
> diff --git a/tests/generic/group b/tests/generic/group
> index fbe0a7f..65328c6 100644
> --- a/tests/generic/group
> +++ b/tests/generic/group
> @@ -468,3 +468,4 @@
> 463 auto quick clone dangerous
> 464 auto rw
> 465 auto rw quick aio
> +466 auto quick dax
> --
> 2.9.5
>
> --
> To unsubscribe from this list: send the line "unsubscribe fstests" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html

2017-10-23 17:34:12

by Josef Bacik

[permalink] [raw]
Subject: Re: [PATCH 2/2] dm log writes: add support for DAX

On Thu, Oct 19, 2017 at 11:24:04PM -0600, Ross Zwisler wrote:
> Now that we have the ability log filesystem writes using a flat buffer, add
> support for DAX. Unfortunately we can't easily track data that has been
> written via mmap() now that the dax_flush() abstraction was removed by this
> commit:
>
> commit c3ca015fab6d ("dax: remove the pmem_dax_ops->flush abstraction")
>
> Otherwise we could just treat each flush as a big write, and store the data
> that is being synced to media. It may be worthwhile to add the dax_flush()
> entry point back, just as a notifier so we can do this logging.
>
> The motivation for this support is the need for an xfstest that can test
> the new MAP_SYNC DAX flag. By logging the filesystem activity with
> dm-log-writes we can show that the MAP_SYNC page faults are writing out
> their metadata as they happen, instead of requiring an explicit
> msync/fsync.
>
> Signed-off-by: Ross Zwisler <[email protected]>
> ---

Ok this is just my ignorance of how DAX works shining through, but do we need a
new flag to indicate this is DAX data? You are logging it like it's just normal
data going to a certain sector, is that good enough? If it is then hooray this
looks fine to me, I'm just slightly confused. Thanks,

Josef

2017-10-23 18:59:16

by Ross Zwisler

[permalink] [raw]
Subject: Re: [PATCH 2/2] dm log writes: add support for DAX

On Mon, Oct 23, 2017 at 01:34:09PM -0400, Josef Bacik wrote:
> On Thu, Oct 19, 2017 at 11:24:04PM -0600, Ross Zwisler wrote:
> > Now that we have the ability log filesystem writes using a flat buffer, add
> > support for DAX. Unfortunately we can't easily track data that has been
> > written via mmap() now that the dax_flush() abstraction was removed by this
> > commit:
> >
> > commit c3ca015fab6d ("dax: remove the pmem_dax_ops->flush abstraction")
> >
> > Otherwise we could just treat each flush as a big write, and store the data
> > that is being synced to media. It may be worthwhile to add the dax_flush()
> > entry point back, just as a notifier so we can do this logging.
> >
> > The motivation for this support is the need for an xfstest that can test
> > the new MAP_SYNC DAX flag. By logging the filesystem activity with
> > dm-log-writes we can show that the MAP_SYNC page faults are writing out
> > their metadata as they happen, instead of requiring an explicit
> > msync/fsync.
> >
> > Signed-off-by: Ross Zwisler <ross.zwisler-VuQAYsv1563Yd54FQh9/[email protected]>
> > ---
>
> Ok this is just my ignorance of how DAX works shining through, but do we need a
> new flag to indicate this is DAX data? You are logging it like it's just normal
> data going to a certain sector, is that good enough? If it is then hooray this
> looks fine to me, I'm just slightly confused. Thanks,
>
> Josef

I don't think we need a special flag to specify that it's DAX. Really it's
just the same as a normal filesystem write, except that we actually do the
work of writing the data via the FS DAX iomap code instead of bubbling it all
the way down to the block driver.

2017-10-24 19:14:32

by Mike Snitzer

[permalink] [raw]
Subject: Re: [PATCH 1/2] dm log writes: Add support for inline data buffers

On Fri, Oct 20 2017 at 1:24am -0400,
Ross Zwisler <[email protected]> wrote:

> Currently dm-log-writes supports writing filesystem data via BIOs, and
> writing internal metadata from a flat buffer via write_metadata().
>
> For DAX writes, though, we won't have a BIO, but will instead have an
> iterator that we'll want to use to fill a flat data buffer.
>
> So, create write_inline_data() which allows us to write filesystem data
> using a flat buffer as a source, and wire it up in log_one_block().
>
> Signed-off-by: Ross Zwisler <[email protected]>

Hi,

I picked this up but tweaked some whitespace and couple style nits, see:
https://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm.git/commit/?h=for-4.15/dm&id=6a697d036324c7fbe63fb49599027269006161e7

Thanks,
Mike

2017-10-24 19:22:23

by Mike Snitzer

[permalink] [raw]
Subject: Re: [PATCH 2/2] dm log writes: add support for DAX

On Fri, Oct 20 2017 at 1:24am -0400,
Ross Zwisler <ross.zwisler-VuQAYsv1563Yd54FQh9/[email protected]> wrote:

> Now that we have the ability log filesystem writes using a flat buffer, add
> support for DAX. Unfortunately we can't easily track data that has been
> written via mmap() now that the dax_flush() abstraction was removed by this
> commit:
>
> commit c3ca015fab6d ("dax: remove the pmem_dax_ops->flush abstraction")
>
> Otherwise we could just treat each flush as a big write, and store the data
> that is being synced to media. It may be worthwhile to add the dax_flush()
> entry point back, just as a notifier so we can do this logging.
>
> The motivation for this support is the need for an xfstest that can test
> the new MAP_SYNC DAX flag. By logging the filesystem activity with
> dm-log-writes we can show that the MAP_SYNC page faults are writing out
> their metadata as they happen, instead of requiring an explicit
> msync/fsync.
>
> Signed-off-by: Ross Zwisler <ross.zwisler-VuQAYsv1563Yd54FQh9/[email protected]>

I've picked this up, please see:
https://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm.git/commit/?h=for-4.15/dm&id=ae613bbb0144e84cb3c0ebfa9f4fd4d1507c2f0e

I tweaked the header and tweaked a couple whitespace nits. Also
switched version bump from 1.0.1 to 1.1.0.

Thanks,
Mike

2017-10-24 19:30:22

by Ross Zwisler

[permalink] [raw]
Subject: Re: [PATCH 2/2] dm log writes: add support for DAX

On Tue, Oct 24, 2017 at 03:22:23PM -0400, Mike Snitzer wrote:
> On Fri, Oct 20 2017 at 1:24am -0400,
> Ross Zwisler <ross.zwisler-VuQAYsv1563Yd54FQh9/[email protected]> wrote:
>
> > Now that we have the ability log filesystem writes using a flat buffer, add
> > support for DAX. Unfortunately we can't easily track data that has been
> > written via mmap() now that the dax_flush() abstraction was removed by this
> > commit:
> >
> > commit c3ca015fab6d ("dax: remove the pmem_dax_ops->flush abstraction")
> >
> > Otherwise we could just treat each flush as a big write, and store the data
> > that is being synced to media. It may be worthwhile to add the dax_flush()
> > entry point back, just as a notifier so we can do this logging.
> >
> > The motivation for this support is the need for an xfstest that can test
> > the new MAP_SYNC DAX flag. By logging the filesystem activity with
> > dm-log-writes we can show that the MAP_SYNC page faults are writing out
> > their metadata as they happen, instead of requiring an explicit
> > msync/fsync.
> >
> > Signed-off-by: Ross Zwisler <ross.zwisler-VuQAYsv1563Yd54FQh9/[email protected]>
>
> I've picked this up, please see:
> https://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm.git/commit/?h=for-4.15/dm&id=ae613bbb0144e84cb3c0ebfa9f4fd4d1507c2f0e
>
> I tweaked the header and tweaked a couple whitespace nits. Also
> switched version bump from 1.0.1 to 1.1.0.
>
> Thanks,
> Mike

Sure, your tweaks look fine. Thanks!