2022-08-12 05:37:50

by Alistair Popple

[permalink] [raw]
Subject: [PATCH 2/2] selftests/hmm-tests: Add test for dirty bits

We were not correctly copying PTE dirty bits to pages during
migrate_vma_setup() calls. This could potentially lead to data loss, so
add a test for this.

Signed-off-by: Alistair Popple <[email protected]>
---
tools/testing/selftests/vm/hmm-tests.c | 124 ++++++++++++++++++++++++++-
1 file changed, 124 insertions(+)

diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c
index 529f53b..70fdb49 100644
--- a/tools/testing/selftests/vm/hmm-tests.c
+++ b/tools/testing/selftests/vm/hmm-tests.c
@@ -1200,6 +1200,130 @@ TEST_F(hmm, migrate_multiple)
}
}

+static char cgroup[] = "/sys/fs/cgroup/hmm-test-XXXXXX";
+static int write_cgroup_param(char *cgroup_path, char *param, long value)
+{
+ int ret;
+ FILE *f;
+ char *filename;
+
+ if (asprintf(&filename, "%s/%s", cgroup_path, param) < 0)
+ return -1;
+
+ f = fopen(filename, "w");
+ if (!f) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = fprintf(f, "%ld\n", value);
+ if (ret < 0)
+ goto out1;
+
+ ret = 0;
+
+out1:
+ fclose(f);
+out:
+ free(filename);
+
+ return ret;
+}
+
+static int setup_cgroup(void)
+{
+ pid_t pid = getpid();
+ int ret;
+
+ if (!mkdtemp(cgroup))
+ return -1;
+
+ ret = write_cgroup_param(cgroup, "cgroup.procs", pid);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int destroy_cgroup(void)
+{
+ pid_t pid = getpid();
+ int ret;
+
+ ret = write_cgroup_param("/sys/fs/cgroup/cgroup.procs",
+ "cgroup.proc", pid);
+ if (ret)
+ return ret;
+
+ if (rmdir(cgroup))
+ return -1;
+
+ return 0;
+}
+
+/*
+ * Try and migrate a dirty page that has previously been swapped to disk. This
+ * checks that we don't loose dirty bits.
+ */
+TEST_F(hmm, migrate_dirty_page)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int tmp = 0;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ ASSERT_EQ(setup_cgroup(), 0);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = 0;
+
+ ASSERT_FALSE(write_cgroup_param(cgroup, "memory.reclaim", 1UL<<30));
+
+ /* Fault pages back in from swap as clean pages */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ tmp += ptr[i];
+
+ /* Dirty the pte */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /*
+ * Attempt to migrate memory to device, which should fail because
+ * hopefully some pages are backed by swap storage.
+ */
+ ASSERT_TRUE(hmm_migrate_sys_to_dev(self->fd, buffer, npages));
+
+ ASSERT_FALSE(write_cgroup_param(cgroup, "memory.reclaim", 1UL<<30));
+
+ /* Check we still see the updated data after restoring from swap. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ hmm_buffer_free(buffer);
+ destroy_cgroup();
+}
+
/*
* Read anonymous memory multiple times.
*/
--
git-series 0.9.1


2022-08-12 08:14:04

by Mika Penttilä

[permalink] [raw]
Subject: Re: [PATCH 2/2] selftests/hmm-tests: Add test for dirty bits

Hi Alistair!

On 12.8.2022 8.22, Alistair Popple wrote:
> We were not correctly copying PTE dirty bits to pages during
> migrate_vma_setup() calls. This could potentially lead to data loss, so
> add a test for this.
>
> Signed-off-by: Alistair Popple <[email protected]>
> ---
> tools/testing/selftests/vm/hmm-tests.c | 124 ++++++++++++++++++++++++++-
> 1 file changed, 124 insertions(+)
>
> diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c
> index 529f53b..70fdb49 100644
> --- a/tools/testing/selftests/vm/hmm-tests.c
> +++ b/tools/testing/selftests/vm/hmm-tests.c
> @@ -1200,6 +1200,130 @@ TEST_F(hmm, migrate_multiple)
> }
> }
>
> +static char cgroup[] = "/sys/fs/cgroup/hmm-test-XXXXXX";
> +static int write_cgroup_param(char *cgroup_path, char *param, long value)
> +{
> + int ret;
> + FILE *f;
> + char *filename;
> +
> + if (asprintf(&filename, "%s/%s", cgroup_path, param) < 0)
> + return -1;
> +
> + f = fopen(filename, "w");
> + if (!f) {
> + ret = -1;
> + goto out;
> + }
> +
> + ret = fprintf(f, "%ld\n", value);
> + if (ret < 0)
> + goto out1;
> +
> + ret = 0;
> +
> +out1:
> + fclose(f);
> +out:
> + free(filename);
> +
> + return ret;
> +}
> +
> +static int setup_cgroup(void)
> +{
> + pid_t pid = getpid();
> + int ret;
> +
> + if (!mkdtemp(cgroup))
> + return -1;
> +
> + ret = write_cgroup_param(cgroup, "cgroup.procs", pid);
> + if (ret)
> + return ret;
> +
> + return 0;
> +}
> +
> +static int destroy_cgroup(void)
> +{
> + pid_t pid = getpid();
> + int ret;
> +
> + ret = write_cgroup_param("/sys/fs/cgroup/cgroup.procs",
> + "cgroup.proc", pid);
> + if (ret)
> + return ret;
> +
> + if (rmdir(cgroup))
> + return -1;
> +
> + return 0;
> +}
> +
> +/*
> + * Try and migrate a dirty page that has previously been swapped to disk. This
> + * checks that we don't loose dirty bits.
> + */
> +TEST_F(hmm, migrate_dirty_page)
> +{
> + struct hmm_buffer *buffer;
> + unsigned long npages;
> + unsigned long size;
> + unsigned long i;
> + int *ptr;
> + int tmp = 0;
> +
> + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
> + ASSERT_NE(npages, 0);
> + size = npages << self->page_shift;
> +
> + buffer = malloc(sizeof(*buffer));
> + ASSERT_NE(buffer, NULL);
> +
> + buffer->fd = -1;
> + buffer->size = size;
> + buffer->mirror = malloc(size);
> + ASSERT_NE(buffer->mirror, NULL);
> +
> + ASSERT_EQ(setup_cgroup(), 0);
> +
> + buffer->ptr = mmap(NULL, size,
> + PROT_READ | PROT_WRITE,
> + MAP_PRIVATE | MAP_ANONYMOUS,
> + buffer->fd, 0);
> + ASSERT_NE(buffer->ptr, MAP_FAILED);
> +
> + /* Initialize buffer in system memory. */
> + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
> + ptr[i] = 0;
> +
> + ASSERT_FALSE(write_cgroup_param(cgroup, "memory.reclaim", 1UL<<30));
> +
> + /* Fault pages back in from swap as clean pages */
> + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
> + tmp += ptr[i];
> +
> + /* Dirty the pte */
> + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
> + ptr[i] = i;
> +

The anon pages are quite likely in memory at this point, and dirty in pte.



> + /*
> + * Attempt to migrate memory to device, which should fail because
> + * hopefully some pages are backed by swap storage.
> + */
> + ASSERT_TRUE(hmm_migrate_sys_to_dev(self->fd, buffer, npages));


And pages marked dirty also now. But could you elaborate how and where
the above fails in more detail, couldn't immediately see it...



> +
> + ASSERT_FALSE(write_cgroup_param(cgroup, "memory.reclaim", 1UL<<30));
> +
> + /* Check we still see the updated data after restoring from swap. */
> + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
> + ASSERT_EQ(ptr[i], i);
> +
> + hmm_buffer_free(buffer);
> + destroy_cgroup();
> +}
> +
> /*
> * Read anonymous memory multiple times.
> */


--Mika

2022-08-15 03:21:30

by Mika Penttilä

[permalink] [raw]
Subject: Re: [PATCH 2/2] selftests/hmm-tests: Add test for dirty bits



On 15.8.2022 5.35, Alistair Popple wrote:
>
> Mika Penttilä <[email protected]> writes:
>
>> Hi Alistair!
>>
>> On 12.8.2022 8.22, Alistair Popple wrote:
>
> [...]
>
>>> + buffer->ptr = mmap(NULL, size,
>>> + PROT_READ | PROT_WRITE,
>>> + MAP_PRIVATE | MAP_ANONYMOUS,
>>> + buffer->fd, 0);
>>> + ASSERT_NE(buffer->ptr, MAP_FAILED);
>>> +
>>> + /* Initialize buffer in system memory. */
>>> + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
>>> + ptr[i] = 0;
>>> +
>>> + ASSERT_FALSE(write_cgroup_param(cgroup, "memory.reclaim", 1UL<<30));
>>> +
>>> + /* Fault pages back in from swap as clean pages */
>>> + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
>>> + tmp += ptr[i];
>>> +
>>> + /* Dirty the pte */
>>> + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
>>> + ptr[i] = i;
>>> +
>>
>> The anon pages are quite likely in memory at this point, and dirty in pte.
>
> Why would the pte be dirty? I just confirmed using some modified pagemap
> code that on my system at least this isn't the case.
>
>>> + /*
>>> + * Attempt to migrate memory to device, which should fail because
>>> + * hopefully some pages are backed by swap storage.
>>> + */
>>> + ASSERT_TRUE(hmm_migrate_sys_to_dev(self->fd, buffer, npages));
>>
>> And pages marked dirty also now. But could you elaborate how and where the above
>> fails in more detail, couldn't immediately see it...
>
> Not if you don't have patch 1 of this series applied. If the
> trylock_page() in migrate_vma_collect_pmd() succeeds (which it almost
> always does) it will have cleared the pte without setting PageDirty.
>

Ah yes but I meant with the patch 1 applied, the comment "Attempt to
migrate memory to device, which should fail because hopefully some pages
are backed by swap storage" indicates that hmm_migrate_sys_to_dev()
would fail..and there's that ASSERT_TRUE which means fail here.

So I understand the data loss but where is the hmm_migrate_sys_to_dev()
failing, with or wihtout patch 1 applied?




> So now we have a dirty page without PageDirty set and without a dirty
> pte. If this page gets swapped back to disk and is still in the swap
> cache data will be lost because reclaim will see a clean page and won't
> write it out again.
>
> At least that's my understanding - please let me know if you see
> something that doesn't make sense.
>
>>> +
>>> + ASSERT_FALSE(write_cgroup_param(cgroup, "memory.reclaim", 1UL<<30));
>>> +
>>> + /* Check we still see the updated data after restoring from swap. */
>>> + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
>>> + ASSERT_EQ(ptr[i], i);
>>> +
>>> + hmm_buffer_free(buffer);
>>> + destroy_cgroup();
>>> +}
>>> +
>>> /*
>>> * Read anonymous memory multiple times.
>>> */
>>
>>
>> --Mika
>

2022-08-15 03:24:30

by Alistair Popple

[permalink] [raw]
Subject: Re: [PATCH 2/2] selftests/hmm-tests: Add test for dirty bits


Mika Penttilä <[email protected]> writes:

> Hi Alistair!
>
> On 12.8.2022 8.22, Alistair Popple wrote:

[...]

>> + buffer->ptr = mmap(NULL, size,
>> + PROT_READ | PROT_WRITE,
>> + MAP_PRIVATE | MAP_ANONYMOUS,
>> + buffer->fd, 0);
>> + ASSERT_NE(buffer->ptr, MAP_FAILED);
>> +
>> + /* Initialize buffer in system memory. */
>> + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
>> + ptr[i] = 0;
>> +
>> + ASSERT_FALSE(write_cgroup_param(cgroup, "memory.reclaim", 1UL<<30));
>> +
>> + /* Fault pages back in from swap as clean pages */
>> + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
>> + tmp += ptr[i];
>> +
>> + /* Dirty the pte */
>> + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
>> + ptr[i] = i;
>> +
>
> The anon pages are quite likely in memory at this point, and dirty in pte.

Why would the pte be dirty? I just confirmed using some modified pagemap
code that on my system at least this isn't the case.

>> + /*
>> + * Attempt to migrate memory to device, which should fail because
>> + * hopefully some pages are backed by swap storage.
>> + */
>> + ASSERT_TRUE(hmm_migrate_sys_to_dev(self->fd, buffer, npages));
>
> And pages marked dirty also now. But could you elaborate how and where the above
> fails in more detail, couldn't immediately see it...

Not if you don't have patch 1 of this series applied. If the
trylock_page() in migrate_vma_collect_pmd() succeeds (which it almost
always does) it will have cleared the pte without setting PageDirty.

So now we have a dirty page without PageDirty set and without a dirty
pte. If this page gets swapped back to disk and is still in the swap
cache data will be lost because reclaim will see a clean page and won't
write it out again.

At least that's my understanding - please let me know if you see
something that doesn't make sense.

>> +
>> + ASSERT_FALSE(write_cgroup_param(cgroup, "memory.reclaim", 1UL<<30));
>> +
>> + /* Check we still see the updated data after restoring from swap. */
>> + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
>> + ASSERT_EQ(ptr[i], i);
>> +
>> + hmm_buffer_free(buffer);
>> + destroy_cgroup();
>> +}
>> +
>> /*
>> * Read anonymous memory multiple times.
>> */
>
>
> --Mika

2022-08-15 03:51:12

by Alistair Popple

[permalink] [raw]
Subject: Re: [PATCH 2/2] selftests/hmm-tests: Add test for dirty bits


Mika Penttilä <[email protected]> writes:

> On 15.8.2022 5.35, Alistair Popple wrote:
>> Mika Penttilä <[email protected]> writes:
>>
>>> Hi Alistair!
>>>
>>> On 12.8.2022 8.22, Alistair Popple wrote:
>> [...]
>>
>>>> + buffer->ptr = mmap(NULL, size,
>>>> + PROT_READ | PROT_WRITE,
>>>> + MAP_PRIVATE | MAP_ANONYMOUS,
>>>> + buffer->fd, 0);
>>>> + ASSERT_NE(buffer->ptr, MAP_FAILED);
>>>> +
>>>> + /* Initialize buffer in system memory. */
>>>> + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
>>>> + ptr[i] = 0;
>>>> +
>>>> + ASSERT_FALSE(write_cgroup_param(cgroup, "memory.reclaim", 1UL<<30));
>>>> +
>>>> + /* Fault pages back in from swap as clean pages */
>>>> + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
>>>> + tmp += ptr[i];
>>>> +
>>>> + /* Dirty the pte */
>>>> + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
>>>> + ptr[i] = i;
>>>> +
>>>
>>> The anon pages are quite likely in memory at this point, and dirty in pte.
>> Why would the pte be dirty? I just confirmed using some modified pagemap
>> code that on my system at least this isn't the case.
>>
>>>> + /*
>>>> + * Attempt to migrate memory to device, which should fail because
>>>> + * hopefully some pages are backed by swap storage.
>>>> + */
>>>> + ASSERT_TRUE(hmm_migrate_sys_to_dev(self->fd, buffer, npages));
>>>
>>> And pages marked dirty also now. But could you elaborate how and where the above
>>> fails in more detail, couldn't immediately see it...
>> Not if you don't have patch 1 of this series applied. If the
>> trylock_page() in migrate_vma_collect_pmd() succeeds (which it almost
>> always does) it will have cleared the pte without setting PageDirty.
>>
>
> Ah yes but I meant with the patch 1 applied, the comment "Attempt to migrate
> memory to device, which should fail because hopefully some pages are backed by
> swap storage" indicates that hmm_migrate_sys_to_dev() would fail..and there's
> that ASSERT_TRUE which means fail here.
>
> So I understand the data loss but where is the hmm_migrate_sys_to_dev() failing,
> with or wihtout patch 1 applied?

Oh right. hmm_migrate_sys_to_dev() will fail because the page is in the
swap cache, and migrate_vma_*() doesn't currently support migrating
pages with a mapping.

>> So now we have a dirty page without PageDirty set and without a dirty
>> pte. If this page gets swapped back to disk and is still in the swap
>> cache data will be lost because reclaim will see a clean page and won't
>> write it out again.
>> At least that's my understanding - please let me know if you see
>> something that doesn't make sense.
>>
>>>> +
>>>> + ASSERT_FALSE(write_cgroup_param(cgroup, "memory.reclaim", 1UL<<30));
>>>> +
>>>> + /* Check we still see the updated data after restoring from swap. */
>>>> + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
>>>> + ASSERT_EQ(ptr[i], i);
>>>> +
>>>> + hmm_buffer_free(buffer);
>>>> + destroy_cgroup();
>>>> +}
>>>> +
>>>> /*
>>>> * Read anonymous memory multiple times.
>>>> */
>>>
>>>
>>> --Mika
>>

2022-08-15 04:36:21

by Mika Penttilä

[permalink] [raw]
Subject: Re: [PATCH 2/2] selftests/hmm-tests: Add test for dirty bits



On 15.8.2022 6.21, Alistair Popple wrote:
>
> Mika Penttilä <[email protected]> writes:
>
>> On 15.8.2022 5.35, Alistair Popple wrote:
>>> Mika Penttilä <[email protected]> writes:
>>>
>>>> Hi Alistair!
>>>>
>>>> On 12.8.2022 8.22, Alistair Popple wrote:
>>> [...]
>>>
>>>>> + buffer->ptr = mmap(NULL, size,
>>>>> + PROT_READ | PROT_WRITE,
>>>>> + MAP_PRIVATE | MAP_ANONYMOUS,
>>>>> + buffer->fd, 0);
>>>>> + ASSERT_NE(buffer->ptr, MAP_FAILED);
>>>>> +
>>>>> + /* Initialize buffer in system memory. */
>>>>> + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
>>>>> + ptr[i] = 0;
>>>>> +
>>>>> + ASSERT_FALSE(write_cgroup_param(cgroup, "memory.reclaim", 1UL<<30));
>>>>> +
>>>>> + /* Fault pages back in from swap as clean pages */
>>>>> + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
>>>>> + tmp += ptr[i];
>>>>> +
>>>>> + /* Dirty the pte */
>>>>> + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
>>>>> + ptr[i] = i;
>>>>> +
>>>>
>>>> The anon pages are quite likely in memory at this point, and dirty in pte.
>>> Why would the pte be dirty? I just confirmed using some modified pagemap
>>> code that on my system at least this isn't the case.
>>>
>>>>> + /*
>>>>> + * Attempt to migrate memory to device, which should fail because
>>>>> + * hopefully some pages are backed by swap storage.
>>>>> + */
>>>>> + ASSERT_TRUE(hmm_migrate_sys_to_dev(self->fd, buffer, npages));
>>>>
>>>> And pages marked dirty also now. But could you elaborate how and where the above
>>>> fails in more detail, couldn't immediately see it...
>>> Not if you don't have patch 1 of this series applied. If the
>>> trylock_page() in migrate_vma_collect_pmd() succeeds (which it almost
>>> always does) it will have cleared the pte without setting PageDirty.
>>>
>>
>> Ah yes but I meant with the patch 1 applied, the comment "Attempt to migrate
>> memory to device, which should fail because hopefully some pages are backed by
>> swap storage" indicates that hmm_migrate_sys_to_dev() would fail..and there's
>> that ASSERT_TRUE which means fail here.
>>
>> So I understand the data loss but where is the hmm_migrate_sys_to_dev() failing,
>> with or wihtout patch 1 applied?
>
> Oh right. hmm_migrate_sys_to_dev() will fail because the page is in the
> swap cache, and migrate_vma_*() doesn't currently support migrating
> pages with a mapping.
>

Ok I forgot we skip also page cache pages, not just file pages...




>>> So now we have a dirty page without PageDirty set and without a dirty
>>> pte. If this page gets swapped back to disk and is still in the swap
>>> cache data will be lost because reclaim will see a clean page and won't
>>> write it out again.
>>> At least that's my understanding - please let me know if you see
>>> something that doesn't make sense.
>>>
>>>>> +
>>>>> + ASSERT_FALSE(write_cgroup_param(cgroup, "memory.reclaim", 1UL<<30));
>>>>> +
>>>>> + /* Check we still see the updated data after restoring from swap. */
>>>>> + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
>>>>> + ASSERT_EQ(ptr[i], i);
>>>>> +
>>>>> + hmm_buffer_free(buffer);
>>>>> + destroy_cgroup();
>>>>> +}
>>>>> +
>>>>> /*
>>>>> * Read anonymous memory multiple times.
>>>>> */
>>>>
>>>>
>>>> --Mika
>>>
>

2022-08-15 04:36:48

by Mika Penttilä

[permalink] [raw]
Subject: Re: [PATCH 2/2] selftests/hmm-tests: Add test for dirty bits



On 15.8.2022 7.05, Mika Penttilä wrote:
>
>
> On 15.8.2022 6.21, Alistair Popple wrote:
>>
>> Mika Penttilä <[email protected]> writes:
>>
>>> On 15.8.2022 5.35, Alistair Popple wrote:
>>>> Mika Penttilä <[email protected]> writes:
>>>>
>>>>> Hi Alistair!
>>>>>
>>>>> On 12.8.2022 8.22, Alistair Popple wrote:
>>>> [...]
>>>>
>>>>>> +    buffer->ptr = mmap(NULL, size,
>>>>>> +               PROT_READ | PROT_WRITE,
>>>>>> +               MAP_PRIVATE | MAP_ANONYMOUS,
>>>>>> +               buffer->fd, 0);
>>>>>> +    ASSERT_NE(buffer->ptr, MAP_FAILED);
>>>>>> +
>>>>>> +    /* Initialize buffer in system memory. */
>>>>>> +    for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
>>>>>> +        ptr[i] = 0;
>>>>>> +
>>>>>> +    ASSERT_FALSE(write_cgroup_param(cgroup, "memory.reclaim",
>>>>>> 1UL<<30));
>>>>>> +
>>>>>> +    /* Fault pages back in from swap as clean pages */
>>>>>> +    for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
>>>>>> +        tmp += ptr[i];
>>>>>> +
>>>>>> +    /* Dirty the pte */
>>>>>> +    for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
>>>>>> +        ptr[i] = i;
>>>>>> +
>>>>>
>>>>> The anon pages are quite likely in memory at this point, and dirty
>>>>> in pte.
>>>> Why would the pte be dirty? I just confirmed using some modified
>>>> pagemap
>>>> code that on my system at least this isn't the case.
>>>>
>>>>>> +    /*
>>>>>> +     * Attempt to migrate memory to device, which should fail
>>>>>> because
>>>>>> +     * hopefully some pages are backed by swap storage.
>>>>>> +     */
>>>>>> +    ASSERT_TRUE(hmm_migrate_sys_to_dev(self->fd, buffer, npages));
>>>>>
>>>>> And pages marked dirty also now. But could you elaborate how and
>>>>> where the above
>>>>> fails in more detail, couldn't immediately see it...
>>>> Not if you don't have patch 1 of this series applied. If the
>>>> trylock_page() in migrate_vma_collect_pmd() succeeds (which it almost
>>>> always does) it will have cleared the pte without setting PageDirty.
>>>>
>>>
>>> Ah yes but I meant with the patch 1 applied, the comment "Attempt to
>>> migrate
>>> memory to device, which should fail because hopefully some pages are
>>> backed by
>>> swap storage" indicates that hmm_migrate_sys_to_dev() would fail..and
>>> there's
>>> that ASSERT_TRUE which means fail here.
>>>
>>> So I understand the data loss but where is the
>>> hmm_migrate_sys_to_dev() failing,
>>> with or wihtout patch 1 applied?
>>
>> Oh right. hmm_migrate_sys_to_dev() will fail because the page is in the
>> swap cache, and migrate_vma_*() doesn't currently support migrating
>> pages with a mapping.
>>
>
> Ok I forgot we skip also page cache pages, not just file pages...

Meant we skip swap cache pages also, not just file pages..



>
>
>
>
>>>> So now we have a dirty page without PageDirty set and without a dirty
>>>> pte. If this page gets swapped back to disk and is still in the swap
>>>> cache data will be lost because reclaim will see a clean page and won't
>>>> write it out again.
>>>> At least that's my understanding - please let me know if you see
>>>> something that doesn't make sense.
>>>>
>>>>>> +
>>>>>> +    ASSERT_FALSE(write_cgroup_param(cgroup, "memory.reclaim",
>>>>>> 1UL<<30));
>>>>>> +
>>>>>> +    /* Check we still see the updated data after restoring from
>>>>>> swap. */
>>>>>> +    for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
>>>>>> +        ASSERT_EQ(ptr[i], i);
>>>>>> +
>>>>>> +    hmm_buffer_free(buffer);
>>>>>> +    destroy_cgroup();
>>>>>> +}
>>>>>> +
>>>>>>     /*
>>>>>>      * Read anonymous memory multiple times.
>>>>>>      */
>>>>>
>>>>>
>>>>> --Mika
>>>>
>>