Hi Christoph,
This patch series addresses Linus' feedback on my most recent configfs patch,
adds unit tests for configfs and also includes one functional change that I
came up with while writing these unit tests. I will leave it to you to decide
whether to submit these patches for inclusion in kernel v5.14 or v5.15.
Thanks,
Bart.
Bart Van Assche (4):
configfs: Rework the overflow check in fill_write_buffer()
configfs: Fix writing at a non-zero offset
kunit: Add support for suite initialization and cleanup
configfs: Add unit tests
fs/configfs/Kconfig | 8 +
fs/configfs/Makefile | 2 +
fs/configfs/configfs-test.c | 369 ++++++++++++++++++++++++++++++++++++
fs/configfs/file.c | 17 +-
include/kunit/test.h | 4 +
lib/kunit/test.c | 14 ++
6 files changed, 406 insertions(+), 8 deletions(-)
create mode 100644 fs/configfs/configfs-test.c
Change 'if (SIMPLE_ATTR_SIZE - 1 - pos <= 0)' into
'if (pos >= SIMPLE_ATTR_SIZE - 1)'. Change the data type of 'to_copy'
from long long (loff_t) into int. Do not check whether pos < 0 since
rw_verify_area() checks this for us. As one can see on
https://lore.kernel.org/lkml/CAHk-=wjuDBQdUvaO=XaptgmvE_qeg_EuZjsUZf2vVoXPUMgAvg@mail.gmail.com/
these changes have been requested by Linus Torvalds.
Cc: Bodo Stroesser <[email protected]>
Cc: Martin K. Petersen <[email protected]>
Cc: Yanko Kaneti <[email protected]>
Cc: Brendan Higgins <[email protected]>
Signed-off-by: Bart Van Assche <[email protected]>
---
fs/configfs/file.c | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index 5a0be9985bae..8121bb1b2121 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -181,8 +181,7 @@ static ssize_t configfs_bin_read_iter(struct kiocb *iocb, struct iov_iter *to)
static int fill_write_buffer(struct configfs_buffer *buffer, loff_t pos,
struct iov_iter *from)
{
- loff_t to_copy;
- int copied;
+ int to_copy, copied;
u8 *to;
if (!buffer->page)
@@ -190,9 +189,9 @@ static int fill_write_buffer(struct configfs_buffer *buffer, loff_t pos,
if (!buffer->page)
return -ENOMEM;
- to_copy = SIMPLE_ATTR_SIZE - 1 - pos;
- if (to_copy <= 0)
+ if (pos >= SIMPLE_ATTR_SIZE - 1)
return 0;
+ to_copy = SIMPLE_ATTR_SIZE - 1 - pos;
to = buffer->page + pos;
copied = copy_from_iter(to, to_copy, from);
buffer->needs_read_fill = 1;
Cc: Brendan Higgins <[email protected]>
Cc: Bodo Stroesser <[email protected]>
Cc: Martin K. Petersen <[email protected]>
Cc: Yanko Kaneti <[email protected]>
Signed-off-by: Bart Van Assche <[email protected]>
---
include/kunit/test.h | 4 ++++
lib/kunit/test.c | 14 ++++++++++++++
2 files changed, 18 insertions(+)
diff --git a/include/kunit/test.h b/include/kunit/test.h
index 24b40e5c160b..a6eef96a409c 100644
--- a/include/kunit/test.h
+++ b/include/kunit/test.h
@@ -215,6 +215,8 @@ static inline char *kunit_status_to_ok_not_ok(enum kunit_status status)
* struct kunit_suite - describes a related collection of &struct kunit_case
*
* @name: the name of the test. Purely informational.
+ * @init_suite: called once per test suite before the test cases.
+ * @exit_suite: called once per test suite after all test cases.
* @init: called before every test case.
* @exit: called after every test case.
* @test_cases: a null terminated array of test cases.
@@ -229,6 +231,8 @@ static inline char *kunit_status_to_ok_not_ok(enum kunit_status status)
*/
struct kunit_suite {
const char name[256];
+ int (*init_suite)(void);
+ void (*exit_suite)(void);
int (*init)(struct kunit *test);
void (*exit)(struct kunit *test);
struct kunit_case *test_cases;
diff --git a/lib/kunit/test.c b/lib/kunit/test.c
index d79ecb86ea57..c271692ced93 100644
--- a/lib/kunit/test.c
+++ b/lib/kunit/test.c
@@ -397,9 +397,19 @@ int kunit_run_tests(struct kunit_suite *suite)
{
char param_desc[KUNIT_PARAM_DESC_SIZE];
struct kunit_case *test_case;
+ int res = 0;
kunit_print_subtest_start(suite);
+ if (suite->init_suite)
+ res = suite->init_suite();
+
+ if (res < 0) {
+ kunit_log(KERN_INFO, suite, KUNIT_SUBTEST_INDENT
+ "# Suite initialization failed (%d)\n", res);
+ goto end;
+ }
+
kunit_suite_for_each_test_case(suite, test_case) {
struct kunit test = { .param_value = NULL, .param_index = 0 };
test_case->status = KUNIT_SKIPPED;
@@ -439,6 +449,10 @@ int kunit_run_tests(struct kunit_suite *suite)
test.status_comment);
}
+ if (suite->exit_suite)
+ suite->exit_suite();
+
+end:
kunit_print_subtest_end(suite);
return 0;
Testing configfs read and write behavior is non-trivial and tedious.
Hence these configfs kunit tests that make it easier to test configfs
text and binary attribute support. This is how I run these tests:
set -e
if [ -e .config ]; then
make ARCH=um mrproper
fi
if [ ! -e .kunit/.kunitconfig ]; then
cat <<EOF >.kunit/.kunitconfig
CONFIG_CONFIGFS_FS=y
CONFIG_CONFIGFS_KUNIT_TEST=y
CONFIG_KUNIT=y
CONFIG_PROVE_LOCKING=y
CONFIG_SYSFS=y
CONFIG_UBSAN=y
EOF
cp .kunit/.kunitconfig .kunit/.config
fi
./tools/testing/kunit/kunit.py run
Cc: Bodo Stroesser <[email protected]>
Cc: Martin K. Petersen <[email protected]>
Cc: Yanko Kaneti <[email protected]>
Cc: Brendan Higgins <[email protected]>
Signed-off-by: Bart Van Assche <[email protected]>
---
fs/configfs/Kconfig | 8 +
fs/configfs/Makefile | 2 +
fs/configfs/configfs-test.c | 375 ++++++++++++++++++++++++++++++++++++
3 files changed, 385 insertions(+)
create mode 100644 fs/configfs/configfs-test.c
diff --git a/fs/configfs/Kconfig b/fs/configfs/Kconfig
index 272b64456999..9f8f3d8ca67d 100644
--- a/fs/configfs/Kconfig
+++ b/fs/configfs/Kconfig
@@ -10,3 +10,11 @@ config CONFIGFS_FS
Both sysfs and configfs can and should exist together on the
same system. One is not a replacement for the other.
+
+config CONFIGFS_KUNIT_TEST
+ tristate "Configfs Kunit test" if !KUNIT_ALL_TESTS
+ depends on CONFIGFS_FS && KUNIT
+ default KUNIT_ALL_TESTS
+ help
+ Run the configfs unit tests at boot time. For more information, see
+ also the Kunit documentation in Documentation/dev-tools/kunit/.
diff --git a/fs/configfs/Makefile b/fs/configfs/Makefile
index 0200498ede27..388003fa9f37 100644
--- a/fs/configfs/Makefile
+++ b/fs/configfs/Makefile
@@ -6,3 +6,5 @@
obj-$(CONFIG_CONFIGFS_FS) += configfs.o
configfs-objs := inode.o file.o dir.o symlink.o mount.o item.o
+
+obj-$(CONFIG_CONFIGFS_KUNIT_TEST) += configfs-test.o
diff --git a/fs/configfs/configfs-test.c b/fs/configfs/configfs-test.c
new file mode 100644
index 000000000000..ad907cd092f8
--- /dev/null
+++ b/fs/configfs/configfs-test.c
@@ -0,0 +1,375 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <kunit/test.h>
+#include <linux/configfs.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/uio.h>
+
+/*
+ * Maximum number of bytes supported by the configfs attributes in this unit
+ * test.
+ */
+enum { ATTR_MAX_SIZE = 256 };
+
+static struct test_item {
+ uint32_t nbytes;
+ char data[ATTR_MAX_SIZE];
+} bin_attr, text_attr;
+
+static ssize_t attr_read(struct test_item *ti, void *buf, size_t len)
+{
+ size_t nbytes = min_t(size_t, len, ti->nbytes);
+
+ memcpy(buf, ti->data, nbytes);
+ return nbytes;
+}
+
+static ssize_t attr_write(struct test_item *ti, const void *buf, size_t len)
+{
+ if (len > ATTR_MAX_SIZE)
+ return -EINVAL;
+ ti->nbytes = len;
+ memcpy(ti->data, buf, len);
+ return len;
+}
+
+static DEFINE_SEMAPHORE(bin_attr_written);
+
+static ssize_t bin_attr_read(struct config_item *item, void *buf, size_t len)
+{
+ WARN_ON_ONCE(!bin_attr.nbytes);
+ return buf ? attr_read(&bin_attr, buf, len) : bin_attr.nbytes;
+}
+
+static ssize_t bin_attr_write(struct config_item *item, const void *buf,
+ size_t len)
+{
+ up(&bin_attr_written);
+ return attr_write(&bin_attr, buf, len);
+}
+
+CONFIGFS_BIN_ATTR(, bin_attr, NULL, ATTR_MAX_SIZE);
+
+static struct configfs_bin_attribute *bin_attrs[] = {
+ &attr_bin_attr,
+ NULL,
+};
+
+static ssize_t text_attr_show(struct config_item *item, char *buf)
+{
+ return attr_read(&text_attr, buf, strlen(buf));
+}
+
+static ssize_t text_attr_store(struct config_item *item, const char *buf,
+ size_t size)
+{
+ return attr_write(&text_attr, buf, size);
+}
+
+CONFIGFS_ATTR(, text_attr);
+
+static struct configfs_attribute *text_attrs[] = {
+ &attr_text_attr,
+ NULL,
+};
+
+static const struct config_item_type test_configfs_type = {
+ .ct_owner = THIS_MODULE,
+ .ct_bin_attrs = bin_attrs,
+ .ct_attrs = text_attrs,
+};
+
+/*
+ * Return the file mode if @path exists or an error code if opening @path via
+ * filp_open() in read-only mode failed.
+ */
+int get_file_mode(const char *path)
+{
+ struct file *file;
+ int res;
+
+ file = filp_open(path, O_RDONLY, 0400);
+ if (IS_ERR(file)) {
+ res = PTR_ERR(file);
+ goto out;
+ }
+ res = file_inode(file)->i_mode;
+ filp_close(file, NULL);
+
+out:
+ return res;
+}
+
+static int mkdir(const char *name, umode_t mode)
+{
+ struct dentry *dentry;
+ struct path path;
+ int err;
+
+ err = get_file_mode(name);
+ if (err >= 0 && S_ISDIR(err))
+ return 0;
+
+ dentry = kern_path_create(AT_FDCWD, name, &path, LOOKUP_DIRECTORY);
+ if (IS_ERR(dentry))
+ return PTR_ERR(dentry);
+
+ err = vfs_mkdir(&init_user_ns, d_inode(path.dentry), dentry, mode);
+ done_path_create(&path, dentry);
+
+ return err;
+}
+
+static int mount_configfs(void)
+{
+ int res;
+
+ res = get_file_mode("/sys/kernel/config/unit-test");
+ if (res >= 0)
+ return 0;
+ res = mkdir("/sys", 0755);
+ if (res < 0)
+ return res;
+ res = mkdir("/sys/kernel", 0755);
+ if (res < 0)
+ return res;
+ res = mkdir("/sys/kernel/config", 0755);
+ if (res < 0)
+ return res;
+ pr_info("mounting configfs ...\n");
+ res = do_mount("", "/sys/kernel/config", "configfs", 0, NULL);
+ if (res < 0)
+ pr_err("mounting configfs failed: %d\n", res);
+ else
+ pr_info("mounted configfs.\n");
+ return res;
+}
+
+static void unmount_configfs(void)
+{
+ /* How to unmount a filesystem from kernel code? */
+}
+
+#define KUNIT_EXPECT_MODE(test, left_arg, mask, right) \
+({ \
+ const int left = (left_arg); \
+ \
+ KUNIT_EXPECT_TRUE_MSG(test, left >= 0 && (left & mask) == right,\
+ "(" #left_arg "(%d) & " #mask ") != " #right, left); \
+})
+
+static void configfs_mounted(struct kunit *test)
+{
+ KUNIT_EXPECT_MODE(test, get_file_mode("/"), 0500, 0500);
+ KUNIT_EXPECT_MODE(test, get_file_mode("/sys"), 0500, 0500);
+ KUNIT_EXPECT_MODE(test, get_file_mode("/sys/kernel"), 0500, 0500);
+ KUNIT_EXPECT_MODE(test, get_file_mode("/sys/kernel/config"), 0500, 0500);
+ KUNIT_EXPECT_MODE(test, get_file_mode("/sys/kernel/config/unit-test"),
+ 0500, 0500);
+ KUNIT_EXPECT_MODE(test, get_file_mode
+ ("/sys/kernel/config/unit-test/text_attr"),
+ 0700, 0600);
+}
+
+static void configfs_text_attr(struct kunit *test)
+{
+ struct file *f = filp_open("/sys/kernel/config/unit-test/text_attr",
+ O_RDWR, 0);
+ static const char text1[] =
+ "The quick brown fox jumps over the lazy dog";
+ const int off1 = 0;
+ const int len1 = strlen(text1);
+ static const char text2[] = "huge";
+ const int off2 = strlen(text1) - strlen(text2) - 4;
+ const int len2 = strlen(text2);
+ char text3[sizeof(text1)];
+ int res;
+ loff_t pos;
+
+ KUNIT_EXPECT_EQ(test, PTR_ERR_OR_ZERO(f), 0);
+ if (IS_ERR(f))
+ return;
+ pos = off1;
+ res = kernel_write(f, text1, len1, &pos);
+ KUNIT_EXPECT_EQ(test, res, len1);
+ KUNIT_EXPECT_EQ(test, pos, off1 + len1);
+ pos = off2;
+ res = kernel_write(f, text2, len2, &pos);
+ KUNIT_EXPECT_EQ(test, res, len2);
+ KUNIT_EXPECT_EQ(test, pos, off2 + len2);
+ pos = 4095;
+ res = kernel_write(f, text1, len1, &pos);
+ KUNIT_EXPECT_EQ(test, res, 0);
+ KUNIT_EXPECT_EQ(test, pos, 4095);
+ pos = 0;
+ res = kernel_read(f, text3, sizeof(text3), &pos);
+ KUNIT_EXPECT_EQ(test, res, off2 + len2);
+ KUNIT_EXPECT_EQ(test, pos, off2 + len2);
+ if (res >= 0) {
+ text3[res] = '\0';
+ KUNIT_EXPECT_STREQ(test, text3,
+ "The quick brown fox jumps over the huge");
+ }
+ pos = 1;
+ res = kernel_read(f, text3, sizeof(text3), &pos);
+ KUNIT_EXPECT_EQ(test, res, off2 + len2 - 1);
+ KUNIT_EXPECT_EQ(test, pos, off2 + len2);
+ if (res >= 0) {
+ text3[res] = '\0';
+ KUNIT_EXPECT_STREQ(test, text3,
+ "he quick brown fox jumps over the huge");
+ }
+ pos = -1;
+ res = kernel_write(f, text1, len1, &pos);
+ KUNIT_EXPECT_EQ(test, res, -EINVAL);
+ pos = LLONG_MAX - len1;
+ res = kernel_write(f, text1, len1, &pos);
+ KUNIT_EXPECT_EQ(test, res, 0);
+ pos = -1;
+ res = kernel_read(f, text3, sizeof(text3), &pos);
+ KUNIT_EXPECT_EQ(test, res, -EINVAL);
+ pos = LLONG_MAX - sizeof(text3);
+ res = kernel_read(f, text3, sizeof(text3), &pos);
+ KUNIT_EXPECT_EQ(test, res, 0);
+ filp_close(f, NULL);
+}
+
+#define KUNIT_EXPECT_MEMEQ(test, left, right, len) \
+ KUNIT_EXPECT_TRUE_MSG(test, memcmp(left, right, len) == 0, \
+ #left " != " #right ": %.*s <> %.*s", \
+ (int)len, left, (int)len, right)
+
+static void configfs_bin_attr(struct kunit *test)
+{
+ struct file *f = filp_open("/sys/kernel/config/unit-test/bin_attr",
+ O_RDWR, 0);
+ static const u8 data1[] =
+ "\xff\x00The quick brown fox jumps over the lazy dog";
+ const int off1 = 0;
+ const int len1 = sizeof(data1) - 1;
+ static const u8 data2[] = "huge";
+ const int off2 = len1 - strlen(data2) - 4;
+ const int len2 = strlen(data2);
+ u8 data3[sizeof(data1)];
+ int res;
+ loff_t pos;
+
+ bin_attr.nbytes = len1;
+
+ KUNIT_EXPECT_EQ(test, PTR_ERR_OR_ZERO(f), 0);
+ if (IS_ERR(f))
+ return;
+ pos = off1;
+ res = kernel_write(f, data1, len1, &pos);
+ KUNIT_EXPECT_EQ(test, res, len1);
+ KUNIT_EXPECT_EQ(test, pos, off1 + len1);
+ pos = off2;
+ res = kernel_write(f, data2, len2, &pos);
+ KUNIT_EXPECT_EQ(test, res, len2);
+ KUNIT_EXPECT_EQ(test, pos, off2 + len2);
+ filp_close(f, NULL);
+
+ /*
+ * buffer->bin_attr->write() is called from inside
+ * configfs_release_bin_file() and the latter function is
+ * called asynchronously. Hence the down() calls below to wait
+ * until the write method has been called.
+ */
+ down(&bin_attr_written);
+ down(&bin_attr_written);
+
+ f = filp_open("/sys/kernel/config/unit-test/bin_attr", O_RDONLY, 0);
+ KUNIT_EXPECT_EQ(test, PTR_ERR_OR_ZERO(f), 0);
+ if (IS_ERR(f))
+ return;
+ pos = 0;
+ res = kernel_read(f, data3, sizeof(data3), &pos);
+ KUNIT_EXPECT_EQ(test, res, len1);
+ KUNIT_EXPECT_EQ(test, pos, len1);
+ if (res >= 0) {
+ data3[res] = '\0';
+ KUNIT_EXPECT_MEMEQ(test, data3,
+ "\xff\x00The quick brown fox jumps over the huge dog",
+ len1);
+ }
+ pos = 1;
+ res = kernel_read(f, data3, sizeof(data3), &pos);
+ KUNIT_EXPECT_EQ(test, res, len1 - 1);
+ KUNIT_EXPECT_EQ(test, pos, len1);
+ if (res >= 0) {
+ data3[res] = '\0';
+ KUNIT_EXPECT_MEMEQ(test, data3,
+ "\x00The quick brown fox jumps over the huge dog",
+ len1 - 1);
+ }
+ filp_close(f, NULL);
+
+ f = filp_open("/sys/kernel/config/unit-test/bin_attr", O_RDWR, 0);
+ KUNIT_EXPECT_EQ(test, PTR_ERR_OR_ZERO(f), 0);
+ if (IS_ERR(f))
+ return;
+ pos = -1;
+ res = kernel_write(f, data1, len1, &pos);
+ KUNIT_EXPECT_EQ(test, res, -EINVAL);
+ pos = LLONG_MAX - len1;
+ res = kernel_write(f, data1, len1, &pos);
+ KUNIT_EXPECT_EQ(test, res, -EFBIG);
+ filp_close(f, NULL);
+
+ f = filp_open("/sys/kernel/config/unit-test/bin_attr", O_RDONLY, 0);
+ KUNIT_EXPECT_EQ(test, PTR_ERR_OR_ZERO(f), 0);
+ if (IS_ERR(f))
+ return;
+ pos = -1;
+ res = kernel_read(f, data3, sizeof(data3), &pos);
+ KUNIT_EXPECT_EQ(test, res, -EINVAL);
+ pos = LLONG_MAX - sizeof(data3);
+ res = kernel_read(f, data3, sizeof(data3), &pos);
+ KUNIT_EXPECT_EQ(test, res, 0);
+ filp_close(f, NULL);
+}
+
+static struct kunit_case configfs_test_cases[] = {
+ KUNIT_CASE(configfs_mounted),
+ KUNIT_CASE(configfs_text_attr),
+ KUNIT_CASE(configfs_bin_attr),
+ {},
+};
+
+static struct configfs_subsystem test_subsys = {
+ .su_group = {
+ .cg_item = {
+ .ci_namebuf = "unit-test",
+ .ci_type = &test_configfs_type,
+ }
+ },
+};
+
+static int configfs_suite_init(void)
+{
+ int res;
+
+ config_group_init(&test_subsys.su_group);
+ mutex_init(&test_subsys.su_mutex);
+ res = configfs_register_subsystem(&test_subsys);
+ if (res < 0) {
+ pr_err("Registration of configfs subsystem failed: %d\n", res);
+ return res;
+ }
+ return mount_configfs();
+}
+
+static void configfs_suite_exit(void)
+{
+ configfs_unregister_subsystem(&test_subsys);
+ unmount_configfs();
+}
+
+static struct kunit_suite configfs_test_module = {
+ .name = "configfs unit tests",
+ .init_suite = configfs_suite_init,
+ .exit_suite = configfs_suite_exit,
+ .test_cases = configfs_test_cases,
+};
+kunit_test_suites(&configfs_test_module);
Store data at the right offset when writing with a non-zero offset. I think
this patch fixes behavior introduced in the initial configfs commit. From a
source code comment in the initial configfs commit: "There is no easy way
for us to know if userspace is only doing a partial write, so we don't
support them."
Cc: Bodo Stroesser <[email protected]>
Cc: Martin K. Petersen <[email protected]>
Cc: Yanko Kaneti <[email protected]>
Cc: Brendan Higgins <[email protected]>
Signed-off-by: Bart Van Assche <[email protected]>
---
fs/configfs/file.c | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index 8121bb1b2121..3b2ddc2e5d42 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -226,14 +226,16 @@ static ssize_t configfs_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct configfs_buffer *buffer = file->private_data;
- ssize_t len;
+ int len, written = 0;
mutex_lock(&buffer->mutex);
len = fill_write_buffer(buffer, iocb->ki_pos, from);
if (len > 0)
- len = flush_write_buffer(file, buffer, len);
- if (len > 0)
- iocb->ki_pos += len;
+ written = flush_write_buffer(file, buffer, iocb->ki_pos + len);
+ if (written > 0) {
+ len = max_t(int, 0, written - iocb->ki_pos);
+ iocb->ki_pos = written;
+ }
mutex_unlock(&buffer->mutex);
return len;
}
On 23.07.21 23:23, Bart Van Assche wrote:
> Store data at the right offset when writing with a non-zero offset. I think
> this patch fixes behavior introduced in the initial configfs commit.
Unfortunately I think it does not.
Let's say user writes 5 times to configfs file while keeping it open.
On every write() call it writes 1 character only, e.g. first "A", then "B", ...
The original code before the changes 5 times called flush_write_buffer for the
strings "A\0", "B\0", ... (with the '\0' not included in the count parameter,
so count is 1 always, which is the length of the last write).
Since commit
420405ecde06 "configfs: fix the read and write iterators"
flush_write_buffer is called for the strings "A\0", "AB\0", "ABC\0", ...
but count still is 1.
This fix changes just the count parameter, so it now is 1, 2, 3, ... in our
example. But it still sends on every call to flush_write_buffer all the write
data gathered since last open().
I think, a simple way to restore the original behavior would be to revert the
part of commit 420405ecde06 that changed write routines.
Bodo
From a
> source code comment in the initial configfs commit: "There is no easy way
> for us to know if userspace is only doing a partial write, so we don't
> support them."
>
> Cc: Bodo Stroesser <[email protected]>
> Cc: Martin K. Petersen <[email protected]>
> Cc: Yanko Kaneti <[email protected]>
> Cc: Brendan Higgins <[email protected]>
> Signed-off-by: Bart Van Assche <[email protected]>
> ---
> fs/configfs/file.c | 10 ++++++----
> 1 file changed, 6 insertions(+), 4 deletions(-)
>
> diff --git a/fs/configfs/file.c b/fs/configfs/file.c
> index 8121bb1b2121..3b2ddc2e5d42 100644
> --- a/fs/configfs/file.c
> +++ b/fs/configfs/file.c
> @@ -226,14 +226,16 @@ static ssize_t configfs_write_iter(struct kiocb *iocb, struct iov_iter *from)
> {
> struct file *file = iocb->ki_filp;
> struct configfs_buffer *buffer = file->private_data;
> - ssize_t len;
> + int len, written = 0;
>
> mutex_lock(&buffer->mutex);
> len = fill_write_buffer(buffer, iocb->ki_pos, from);
> if (len > 0)
> - len = flush_write_buffer(file, buffer, len);
> - if (len > 0)
> - iocb->ki_pos += len;
> + written = flush_write_buffer(file, buffer, iocb->ki_pos + len);
> + if (written > 0) {
> + len = max_t(int, 0, written - iocb->ki_pos);
> + iocb->ki_pos = written;
> + }
> mutex_unlock(&buffer->mutex);
> return len;
> }
>
On 7/26/21 7:58 AM, Bodo Stroesser wrote:
> On 23.07.21 23:23, Bart Van Assche wrote:
> Let's say user writes 5 times to configfs file while keeping it open.
> On every write() call it writes 1 character only, e.g. first "A", then
> "B", ...
>
> The original code before the changes 5 times called flush_write_buffer
> for the
> strings "A\0", "B\0", ... (with the '\0' not included in the count
> parameter,
> so count is 1 always, which is the length of the last write).
Isn't that behavior a severe violation of how POSIX specifies that the
write() system call should be implemented?
Thanks,
Bart.
On 26.07.21 18:26, Bart Van Assche wrote:
> On 7/26/21 7:58 AM, Bodo Stroesser wrote:
>> On 23.07.21 23:23, Bart Van Assche wrote:
>> Let's say user writes 5 times to configfs file while keeping it open.
>> On every write() call it writes 1 character only, e.g. first "A", then
>> "B", ...
>>
>> The original code before the changes 5 times called flush_write_buffer
>> for the
>> strings "A\0", "B\0", ... (with the '\0' not included in the count
>> parameter,
>> so count is 1 always, which is the length of the last write).
>
> Isn't that behavior a severe violation of how POSIX specifies that the
> write() system call should be implemented?
Hmm. I'm not sure which detail should violate POSIX spec? Is there any
definition how data should be flushed from buffer internally? (I'm by
far not a POSIX expert!)
I would rather say the new behavior, to call flush_write_buffer during the
first write() for the data of that write, and then on the second write to
call flush_write_buffer for the concatenated data of the first and the
second write, could be a violation of POSIX, because the one times written
data of the first write is flushed twice.
I don't like the idea of breaking the "one write, one flush" principle that
was implemented before. The old comment:
"There is no easy way for us to know if userspace is only doing a partial
write, so we don't support them. We expect the entire buffer to come on the
first write."
as I interpret it, makes clear that configfs code has to work according to
that principle. (Or even block all but the first write, but that would even
more break compatibility to old implementation.)
Thank you,
Bodo
On 7/26/21 2:13 PM, Bodo Stroesser wrote:
> On 26.07.21 18:26, Bart Van Assche wrote:
>> On 7/26/21 7:58 AM, Bodo Stroesser wrote:
>>> On 23.07.21 23:23, Bart Van Assche wrote:
>>> Let's say user writes 5 times to configfs file while keeping it open.
>>> On every write() call it writes 1 character only, e.g. first "A",
>>> then "B", ...
>>>
>>> The original code before the changes 5 times called
>>> flush_write_buffer for the
>>> strings "A\0", "B\0", ... (with the '\0' not included in the count
>>> parameter,
>>> so count is 1 always, which is the length of the last write).
>>
>> Isn't that behavior a severe violation of how POSIX specifies that the
>> write() system call should be implemented?
>
> Hmm. I'm not sure which detail should violate POSIX spec? Is there any
> definition how data should be flushed from buffer internally? (I'm by
> far not a POSIX expert!)
>
> I would rather say the new behavior, to call flush_write_buffer during the
> first write() for the data of that write, and then on the second write to
> call flush_write_buffer for the concatenated data of the first and the
> second write, could be a violation of POSIX, because the one times written
> data of the first write is flushed twice.
>
> I don't like the idea of breaking the "one write, one flush" principle that
> was implemented before. The old comment:
> "There is no easy way for us to know if userspace is only doing a partial
> write, so we don't support them. We expect the entire buffer to come on the
> first write."
> as I interpret it, makes clear that configfs code has to work according to
> that principle. (Or even block all but the first write, but that would even
> more break compatibility to old implementation.)
Hi Bodo,
The private email that you sent me made it clear that you would like to
keep the behavior from kernel 5.13. That means passing "A\0", "B\0", ...
to the configfs store callback function if "AB..." is witten one byte at
a time. What is not clear to me is how a store callback with argument
"B\0" can know at which offset that write happened? From
<linux/configfs.h> (I have added argument names):
ssize_t (*store)(struct config_item *item, const char *page,
size_t count);
My understanding of the POSIX specification [1] is that writes should
happen at the proper offset. If user space software writes "A" at offset
0 and "B" at offset 1 then the string "AB" should be passed to the
configfs store callback.
Regarding the "action" attribute from your tcmu patch, how about
checking the last character of the string written into that attribute
instead of the first character? Would that be sufficient to write twice
into that attribute without having to call close() and open() between
the two write actions?
To me the following comment: "There is no easy way for us to know if
userspace is only doing a partial write, so we don't support them. We
expect the entire buffer to come on the first write." means that writing
"ABCD" by first writing "AB" and next "CD" will trigger two
item->store() calls. Triggering a single item->store() call for partial
writes is not supported.
Thanks,
Bart.
[1] https://pubs.opengroup.org/onlinepubs/9699919799/
On 26.07.21 23:52, Bart Van Assche wrote:
> On 7/26/21 2:13 PM, Bodo Stroesser wrote:
>> On 26.07.21 18:26, Bart Van Assche wrote:
>>> On 7/26/21 7:58 AM, Bodo Stroesser wrote:
>>>> On 23.07.21 23:23, Bart Van Assche wrote:
>>>> Let's say user writes 5 times to configfs file while keeping it open.
>>>> On every write() call it writes 1 character only, e.g. first "A",
>>>> then "B", ...
>>>>
>>>> The original code before the changes 5 times called
>>>> flush_write_buffer for the
>>>> strings "A\0", "B\0", ... (with the '\0' not included in the count
>>>> parameter,
>>>> so count is 1 always, which is the length of the last write).
>>>
>>> Isn't that behavior a severe violation of how POSIX specifies that
>>> the write() system call should be implemented?
>>
>> Hmm. I'm not sure which detail should violate POSIX spec? Is there any
>> definition how data should be flushed from buffer internally? (I'm by
>> far not a POSIX expert!)
>>
>> I would rather say the new behavior, to call flush_write_buffer during
>> the
>> first write() for the data of that write, and then on the second write to
>> call flush_write_buffer for the concatenated data of the first and the
>> second write, could be a violation of POSIX, because the one times
>> written
>> data of the first write is flushed twice.
>>
>> I don't like the idea of breaking the "one write, one flush" principle
>> that
>> was implemented before. The old comment:
>> "There is no easy way for us to know if userspace is only doing a partial
>> write, so we don't support them. We expect the entire buffer to come
>> on the
>> first write."
>> as I interpret it, makes clear that configfs code has to work
>> according to
>> that principle. (Or even block all but the first write, but that would
>> even
>> more break compatibility to old implementation.)
>
> Hi Bodo,
>
> The private email that you sent me made it clear that you would like to
> keep the behavior from kernel 5.13. That means passing "A\0", "B\0", ...
> to the configfs store callback function if "AB..." is witten one byte at
> a time. What is not clear to me is how a store callback with argument
> "B\0" can know at which offset that write happened? From
> <linux/configfs.h> (I have added argument names):
>
> ssize_t (*store)(struct config_item *item, const char *page,
> size_t count);
It does not know. It simply handles it as two separate store actions.
One could say, both start from offset 0.
>
> My understanding of the POSIX specification [1] is that writes should
> happen at the proper offset. If user space software writes "A" at offset
> 0 and "B" at offset 1 then the string "AB" should be passed to the
> configfs store callback.
The comment says, that such a concatenation is not supported. To add
such a support, we would have to buffer all writes and then have a
criterion that triggers the flush_write_buffer. For example that could
be done on close(). But that would also mean, that bad result from store
handler could be reported by close only. And it would mean, that again
the behavior changes, in that the new SW allows one store action only
after one open(). You have to close and re-open before you can start a
new store action.
To me it looks strange to write again all previous data from the
beginning at each new write. So I think this is not a good solution.
>
> Regarding the "action" attribute from your tcmu patch, how about
> checking the last character of the string written into that attribute
> instead of the first character? Would that be sufficient to write twice
> into that attribute without having to call close() and open() between
> the two write actions?
I'm not sure I understand what you mean. If userspace writes a string
byte by byte or in pieces of other sizes, would you still gather
data in the file's buffer and call flush_write_buffer on each write
with all the data gathered up to and including the current write?
Of so, do you want the store handler to detect the end of the string,
e.g by searching for '\n', and discard the write if not found? That
would not work well, because after the store handler detected the '\n',
during the next write it would get the same string again plus what was
added by the new write. Store handler would have to know, how much of
the entire buffer content it already had seen. After a couple of writes
we would even run out of buffer. So again close and re-open is needed.
After close and re-open, how does the store handler know, that the
buffer now is re-started from the beginning?
The new behavior can also cause trouble with existing store handlers.
Example:
The tcmu attribute files cmd_time_out and qfull_time_out just take a
string containing the decimal formatted number of seconds of the
timeout. Each number up to now had to be transferred in a single write.
Assume the old value is 30 and we want to change to 19. If userspace
writes byte by byte, you end up calling
store(item, "1\0", 1) and then
store(item, "19\9", 2).
If these quick changes do not cause trouble in tcmu's scsi cmd handling,
then think what happens, if userspace is interrupted between the two
writes. Allowing to split the writes cause a loss of "atomicity".
>
> To me the following comment: "There is no easy way for us to know if
> userspace is only doing a partial write, so we don't support them. We
> expect the entire buffer to come on the first write." means that writing
> "ABCD" by first writing "AB" and next "CD" will trigger two
> item->store() calls. Triggering a single item->store() call for partial
> writes is not supported.
Exactly. So IMHO we don't need to handle any offsets during write
processing, since for every write we again start at offset 0.
(We just add the trailing '\0' - not in count - to ease store handler's
work.)
Thank you,
Bodo
On 7/26/21 5:54 PM, Bodo Stroesser wrote:
> The new behavior can also cause trouble with existing store handlers.
> Example:
> The tcmu attribute files cmd_time_out and qfull_time_out just take a
> string containing the decimal formatted number of seconds of the
> timeout. Each number up to now had to be transferred in a single write.
> Assume the old value is 30 and we want to change to 19. If userspace
> writes byte by byte, you end up calling
> store(item, "1\0", 1) and then
> store(item, "19\9", 2).
> If these quick changes do not cause trouble in tcmu's scsi cmd handling,
> then think what happens, if userspace is interrupted between the two
> writes. Allowing to split the writes cause a loss of "atomicity".
From Documentation/filesystems/configfs.rst, for normal attributes:
"Configfs expects write(2) to store the entire buffer at once." In other
words, the behavior for partial writes is undocumented. My changes
preserve the behavior if a buffer is written in its entirety. I do not
agree that my changes can cause trouble for existing store handlers.
Bart.
On 27.07.21 05:17, Bart Van Assche wrote:
> On 7/26/21 5:54 PM, Bodo Stroesser wrote:
>> The new behavior can also cause trouble with existing store handlers.
>> Example:
>> The tcmu attribute files cmd_time_out and qfull_time_out just take a
>> string containing the decimal formatted number of seconds of the
>> timeout. Each number up to now had to be transferred in a single write.
>> Assume the old value is 30 and we want to change to 19. If userspace
>> writes byte by byte, you end up calling
>> store(item, "1\0", 1) and then
>> store(item, "19\9", 2).
>> If these quick changes do not cause trouble in tcmu's scsi cmd handling,
>> then think what happens, if userspace is interrupted between the two
>> writes. Allowing to split the writes cause a loss of "atomicity".
>
> From Documentation/filesystems/configfs.rst, for normal attributes:
> "Configfs expects write(2) to store the entire buffer at once." In other
> words, the behavior for partial writes is undocumented. My changes
> preserve the behavior if a buffer is written in its entirety. I do not
> agree that my changes can cause trouble for existing store handlers.
>
I agree. I was not precise.
What I meant is, that changing the source code in such a way, that
writing a buffer in multiple writes works in general, could cause
trouble in case userspace uses this.
But for special syscall sequences your changes still change the result
on existing configfs files. Example:
1) userspace program opens qfull_time_out
2) userspace program writes "90", count=2 to set timeout to 90 sec
3) userspace again wants to change timeout, so it writes "55", count=2
Before the changes we end up with timeout being 55 seconds. After the
change - due to data gathering - we finally have timeout 9055 seconds.
BR,
Bodo
On 7/27/21 12:27 AM, Bodo Stroesser wrote:
> What I meant is, that changing the source code in such a way, that
> writing a buffer in multiple writes works in general, could cause
> trouble in case userspace uses this.
>
> But for special syscall sequences your changes still change the result
> on existing configfs files. Example:
>
> 1) userspace program opens qfull_time_out
> 2) userspace program writes "90", count=2 to set timeout to 90 sec
> 3) userspace again wants to change timeout, so it writes "55", count=2
>
> Before the changes we end up with timeout being 55 seconds. After the
> change - due to data gathering - we finally have timeout 9055 seconds.
Hi Bodo,
How about replacing patches 1 and 2 from this series with the patch below?
Do you agree that this patch is sufficient to restore the behavior from
kernel v5.13 and before?
Thanks,
Bart.
Subject: [PATCH 1/3] configfs: Restore the kernel v5.13 text attribute write behavior
Instead of writing at the offset specified by the write() system call,
always write at offset zero.
Cc: Bodo Stroesser <[email protected]>
Cc: Martin K. Petersen <[email protected]>
Cc: Yanko Kaneti <[email protected]>
Cc: Brendan Higgins <[email protected]>
Reported-by: Bodo Stroesser <[email protected]>
Signed-off-by: Bart Van Assche <[email protected]>
---
fs/configfs/file.c | 20 ++++++++------------
1 file changed, 8 insertions(+), 12 deletions(-)
diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index 5a0be9985bae..8adf6250b207 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -177,12 +177,11 @@ static ssize_t configfs_bin_read_iter(struct kiocb *iocb, struct iov_iter *to)
return retval;
}
-/* Fill [buffer, buffer + pos) with data coming from @from. */
-static int fill_write_buffer(struct configfs_buffer *buffer, loff_t pos,
+/* Fill @buffer with data coming from @from. */
+static int fill_write_buffer(struct configfs_buffer *buffer,
struct iov_iter *from)
{
- loff_t to_copy;
- int copied;
+ int to_copy, copied;
u8 *to;
if (!buffer->page)
@@ -190,11 +189,8 @@ static int fill_write_buffer(struct configfs_buffer *buffer, loff_t pos,
if (!buffer->page)
return -ENOMEM;
- to_copy = SIMPLE_ATTR_SIZE - 1 - pos;
- if (to_copy <= 0)
- return 0;
- to = buffer->page + pos;
- copied = copy_from_iter(to, to_copy, from);
+ to = buffer->page;
+ copied = copy_from_iter(to, SIMPLE_ATTR_SIZE - 1, from);
buffer->needs_read_fill = 1;
/* if buf is assumed to contain a string, terminate it by \0,
* so e.g. sscanf() can scan the string easily */
@@ -227,14 +223,14 @@ static ssize_t configfs_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct configfs_buffer *buffer = file->private_data;
- ssize_t len;
+ int len;
mutex_lock(&buffer->mutex);
- len = fill_write_buffer(buffer, iocb->ki_pos, from);
+ len = fill_write_buffer(buffer, from);
if (len > 0)
len = flush_write_buffer(file, buffer, len);
if (len > 0)
- iocb->ki_pos += len;
+ iocb->ki_pos = len;
mutex_unlock(&buffer->mutex);
return len;
}
On Fri, Jul 23, 2021 at 2:24 PM Bart Van Assche <[email protected]> wrote:
>
> Cc: Brendan Higgins <[email protected]>
> Cc: Bodo Stroesser <[email protected]>
> Cc: Martin K. Petersen <[email protected]>
> Cc: Yanko Kaneti <[email protected]>
Please also CC [email protected], [email protected],
[email protected], and [email protected] for
KUnit changes in the future.
> Signed-off-by: Bart Van Assche <[email protected]>
This seems pretty sensible.
Reviewed-by: Brendan Higgins <[email protected]>
> ---
> include/kunit/test.h | 4 ++++
> lib/kunit/test.c | 14 ++++++++++++++
> 2 files changed, 18 insertions(+)
>
> diff --git a/include/kunit/test.h b/include/kunit/test.h
> index 24b40e5c160b..a6eef96a409c 100644
> --- a/include/kunit/test.h
> +++ b/include/kunit/test.h
> @@ -215,6 +215,8 @@ static inline char *kunit_status_to_ok_not_ok(enum kunit_status status)
> * struct kunit_suite - describes a related collection of &struct kunit_case
> *
> * @name: the name of the test. Purely informational.
> + * @init_suite: called once per test suite before the test cases.
> + * @exit_suite: called once per test suite after all test cases.
> * @init: called before every test case.
> * @exit: called after every test case.
> * @test_cases: a null terminated array of test cases.
> @@ -229,6 +231,8 @@ static inline char *kunit_status_to_ok_not_ok(enum kunit_status status)
> */
> struct kunit_suite {
> const char name[256];
> + int (*init_suite)(void);
> + void (*exit_suite)(void);
I like this idea. Many other unit testing libraries in other languages
have something similar.
I think it probably makes sense to not use any kind of context object
here (as you have done); nevertheless, I still think it is an
appropriate question for the list.
> int (*init)(struct kunit *test);
> void (*exit)(struct kunit *test);
> struct kunit_case *test_cases;
> diff --git a/lib/kunit/test.c b/lib/kunit/test.c
> index d79ecb86ea57..c271692ced93 100644
> --- a/lib/kunit/test.c
> +++ b/lib/kunit/test.c
> @@ -397,9 +397,19 @@ int kunit_run_tests(struct kunit_suite *suite)
> {
> char param_desc[KUNIT_PARAM_DESC_SIZE];
> struct kunit_case *test_case;
> + int res = 0;
>
> kunit_print_subtest_start(suite);
>
> + if (suite->init_suite)
> + res = suite->init_suite();
> +
> + if (res < 0) {
> + kunit_log(KERN_INFO, suite, KUNIT_SUBTEST_INDENT
> + "# Suite initialization failed (%d)\n", res);
> + goto end;
> + }
> +
> kunit_suite_for_each_test_case(suite, test_case) {
> struct kunit test = { .param_value = NULL, .param_index = 0 };
> test_case->status = KUNIT_SKIPPED;
> @@ -439,6 +449,10 @@ int kunit_run_tests(struct kunit_suite *suite)
> test.status_comment);
> }
>
> + if (suite->exit_suite)
> + suite->exit_suite();
> +
> +end:
> kunit_print_subtest_end(suite);
>
> return 0;
Hi Bart,
I reviewed and tested the new patch. For me it works fine.
Just one warning to fix:
fs/configfs/file.c: In function ‘fill_write_buffer’:
fs/configfs/file.c:184:6: warning: unused variable ‘to_copy’
[-Wunused-variable]
int to_copy, copied;
^~~~~~~
Apart from that you can add my tested-by or reviewed-by if you want.
Thank you,
Bodo
On 27.07.21 18:47, Bart Van Assche wrote:
> Hi Bodo,
>
> How about replacing patches 1 and 2 from this series with the patch below?
> Do you agree that this patch is sufficient to restore the behavior from
> kernel v5.13 and before?
>
> Thanks,
>
> Bart.
>
> Subject: [PATCH 1/3] configfs: Restore the kernel v5.13 text attribute
> write behavior
>
> Instead of writing at the offset specified by the write() system call,
> always write at offset zero.
>
> Cc: Bodo Stroesser <[email protected]>
> Cc: Martin K. Petersen <[email protected]>
> Cc: Yanko Kaneti <[email protected]>
> Cc: Brendan Higgins <[email protected]>
> Reported-by: Bodo Stroesser <[email protected]>
> Signed-off-by: Bart Van Assche <[email protected]>
> ---
> fs/configfs/file.c | 20 ++++++++------------
> 1 file changed, 8 insertions(+), 12 deletions(-)
>
> diff --git a/fs/configfs/file.c b/fs/configfs/file.c
> index 5a0be9985bae..8adf6250b207 100644
> --- a/fs/configfs/file.c
> +++ b/fs/configfs/file.c
> @@ -177,12 +177,11 @@ static ssize_t configfs_bin_read_iter(struct kiocb
> *iocb, struct iov_iter *to)
> return retval;
> }
>
> -/* Fill [buffer, buffer + pos) with data coming from @from. */
> -static int fill_write_buffer(struct configfs_buffer *buffer, loff_t pos,
> +/* Fill @buffer with data coming from @from. */
> +static int fill_write_buffer(struct configfs_buffer *buffer,
> struct iov_iter *from)
> {
> - loff_t to_copy;
> - int copied;
> + int to_copy, copied;
> u8 *to;
>
> if (!buffer->page)
> @@ -190,11 +189,8 @@ static int fill_write_buffer(struct configfs_buffer
> *buffer, loff_t pos,
> if (!buffer->page)
> return -ENOMEM;
>
> - to_copy = SIMPLE_ATTR_SIZE - 1 - pos;
> - if (to_copy <= 0)
> - return 0;
> - to = buffer->page + pos;
> - copied = copy_from_iter(to, to_copy, from);
> + to = buffer->page;
> + copied = copy_from_iter(to, SIMPLE_ATTR_SIZE - 1, from);
> buffer->needs_read_fill = 1;
> /* if buf is assumed to contain a string, terminate it by \0,
> * so e.g. sscanf() can scan the string easily */
> @@ -227,14 +223,14 @@ static ssize_t configfs_write_iter(struct kiocb
> *iocb, struct iov_iter *from)
> {
> struct file *file = iocb->ki_filp;
> struct configfs_buffer *buffer = file->private_data;
> - ssize_t len;
> + int len;
>
> mutex_lock(&buffer->mutex);
> - len = fill_write_buffer(buffer, iocb->ki_pos, from);
> + len = fill_write_buffer(buffer, from);
> if (len > 0)
> len = flush_write_buffer(file, buffer, len);
> if (len > 0)
> - iocb->ki_pos += len;
> + iocb->ki_pos = len;
> mutex_unlock(&buffer->mutex);
> return len;
> }
On 7/28/21 10:14 AM, Bodo Stroesser wrote:
> I reviewed and tested the new patch. For me it works fine.
>
> Just one warning to fix:
>
> fs/configfs/file.c: In function ‘fill_write_buffer’:
> fs/configfs/file.c:184:6: warning: unused variable ‘to_copy’ [-Wunused-variable]
> int to_copy, copied;
> ^~~~~~~
>
> Apart from that you can add my tested-by or reviewed-by if you want.
I will remove the 'to_copy' variable and also add your Tested-by. Thanks for
having tested this patch!
Bart.
On 7/27/21 2:26 PM, Brendan Higgins wrote:
> On Fri, Jul 23, 2021 at 2:24 PM Bart Van Assche <[email protected]> wrote:
>>
>> Cc: Brendan Higgins <[email protected]>
>> Cc: Bodo Stroesser <[email protected]>
>> Cc: Martin K. Petersen <[email protected]>
>> Cc: Yanko Kaneti <[email protected]>
>
> Please also CC [email protected], [email protected],
> [email protected], and [email protected] for
> KUnit changes in the future.
Will do.
>> Signed-off-by: Bart Van Assche <[email protected]>
>
> This seems pretty sensible.
>
> Reviewed-by: Brendan Higgins <[email protected]>
Thanks for the review!
Bart.