2023-11-16 00:56:50

by Hongchen Zhang

[permalink] [raw]
Subject: [PATCH v2] PM: hibernate: use acquire/release ordering when compress/decompress image

When we test S4(suspend to disk) on LoongArch 3A6000 platform, the
test case sometimes fails. The dmesg log shows the following error:
Invalid LZO compressed length
After we dig into the code, we find out that:
When compress/decompress the image, the synchronization operation
between the control thread and the compress/decompress/crc thread
uses relaxed ordering interface, which is unreliable, and the
following situation may occur:
CPU 0 CPU 1
save_image_lzo lzo_compress_threadfn
atomic_set(&d->stop, 1);
atomic_read(&data[thr].stop)
data[thr].cmp = data[thr].cmp_len;
WRITE data[thr].cmp_len
Then CPU0 get a old cmp_len and write to disk. When cpu resume from S4,
wrong cmp_len is loaded.

To maintain data consistency between two threads, we should use the
acquire/release ordering interface. So we change atomic_read/atomic_set
to atomic_read_acquire/atomic_set_release.

Fixes: 081a9d043c98 ("PM / Hibernate: Improve performance of LZO/plain hibernation, checksum image")
Cc: [email protected]
Signed-off-by: Hongchen Zhang <[email protected]>
Signed-off-by: Weihao Li <[email protected]>
---
v1 -> v2:
1. add Cc: [email protected] in commit log
2. add Fixes: line in commit log
---
kernel/power/swap.c | 38 +++++++++++++++++++-------------------
1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index a2cb0babb5ec..d44f5937f1e5 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -606,11 +606,11 @@ static int crc32_threadfn(void *data)
unsigned i;

while (1) {
- wait_event(d->go, atomic_read(&d->ready) ||
+ wait_event(d->go, atomic_read_acquire(&d->ready) ||
kthread_should_stop());
if (kthread_should_stop()) {
d->thr = NULL;
- atomic_set(&d->stop, 1);
+ atomic_set_release(&d->stop, 1);
wake_up(&d->done);
break;
}
@@ -619,7 +619,7 @@ static int crc32_threadfn(void *data)
for (i = 0; i < d->run_threads; i++)
*d->crc32 = crc32_le(*d->crc32,
d->unc[i], *d->unc_len[i]);
- atomic_set(&d->stop, 1);
+ atomic_set_release(&d->stop, 1);
wake_up(&d->done);
}
return 0;
@@ -649,12 +649,12 @@ static int lzo_compress_threadfn(void *data)
struct cmp_data *d = data;

while (1) {
- wait_event(d->go, atomic_read(&d->ready) ||
+ wait_event(d->go, atomic_read_acquire(&d->ready) ||
kthread_should_stop());
if (kthread_should_stop()) {
d->thr = NULL;
d->ret = -1;
- atomic_set(&d->stop, 1);
+ atomic_set_release(&d->stop, 1);
wake_up(&d->done);
break;
}
@@ -663,7 +663,7 @@ static int lzo_compress_threadfn(void *data)
d->ret = lzo1x_1_compress(d->unc, d->unc_len,
d->cmp + LZO_HEADER, &d->cmp_len,
d->wrk);
- atomic_set(&d->stop, 1);
+ atomic_set_release(&d->stop, 1);
wake_up(&d->done);
}
return 0;
@@ -798,7 +798,7 @@ static int save_image_lzo(struct swap_map_handle *handle,

data[thr].unc_len = off;

- atomic_set(&data[thr].ready, 1);
+ atomic_set_release(&data[thr].ready, 1);
wake_up(&data[thr].go);
}

@@ -806,12 +806,12 @@ static int save_image_lzo(struct swap_map_handle *handle,
break;

crc->run_threads = thr;
- atomic_set(&crc->ready, 1);
+ atomic_set_release(&crc->ready, 1);
wake_up(&crc->go);

for (run_threads = thr, thr = 0; thr < run_threads; thr++) {
wait_event(data[thr].done,
- atomic_read(&data[thr].stop));
+ atomic_read_acquire(&data[thr].stop));
atomic_set(&data[thr].stop, 0);

ret = data[thr].ret;
@@ -850,7 +850,7 @@ static int save_image_lzo(struct swap_map_handle *handle,
}
}

- wait_event(crc->done, atomic_read(&crc->stop));
+ wait_event(crc->done, atomic_read_acquire(&crc->stop));
atomic_set(&crc->stop, 0);
}

@@ -1132,12 +1132,12 @@ static int lzo_decompress_threadfn(void *data)
struct dec_data *d = data;

while (1) {
- wait_event(d->go, atomic_read(&d->ready) ||
+ wait_event(d->go, atomic_read_acquire(&d->ready) ||
kthread_should_stop());
if (kthread_should_stop()) {
d->thr = NULL;
d->ret = -1;
- atomic_set(&d->stop, 1);
+ atomic_set_release(&d->stop, 1);
wake_up(&d->done);
break;
}
@@ -1150,7 +1150,7 @@ static int lzo_decompress_threadfn(void *data)
flush_icache_range((unsigned long)d->unc,
(unsigned long)d->unc + d->unc_len);

- atomic_set(&d->stop, 1);
+ atomic_set_release(&d->stop, 1);
wake_up(&d->done);
}
return 0;
@@ -1335,7 +1335,7 @@ static int load_image_lzo(struct swap_map_handle *handle,
}

if (crc->run_threads) {
- wait_event(crc->done, atomic_read(&crc->stop));
+ wait_event(crc->done, atomic_read_acquire(&crc->stop));
atomic_set(&crc->stop, 0);
crc->run_threads = 0;
}
@@ -1371,7 +1371,7 @@ static int load_image_lzo(struct swap_map_handle *handle,
pg = 0;
}

- atomic_set(&data[thr].ready, 1);
+ atomic_set_release(&data[thr].ready, 1);
wake_up(&data[thr].go);
}

@@ -1390,7 +1390,7 @@ static int load_image_lzo(struct swap_map_handle *handle,

for (run_threads = thr, thr = 0; thr < run_threads; thr++) {
wait_event(data[thr].done,
- atomic_read(&data[thr].stop));
+ atomic_read_acquire(&data[thr].stop));
atomic_set(&data[thr].stop, 0);

ret = data[thr].ret;
@@ -1421,7 +1421,7 @@ static int load_image_lzo(struct swap_map_handle *handle,
ret = snapshot_write_next(snapshot);
if (ret <= 0) {
crc->run_threads = thr + 1;
- atomic_set(&crc->ready, 1);
+ atomic_set_release(&crc->ready, 1);
wake_up(&crc->go);
goto out_finish;
}
@@ -1429,13 +1429,13 @@ static int load_image_lzo(struct swap_map_handle *handle,
}

crc->run_threads = thr;
- atomic_set(&crc->ready, 1);
+ atomic_set_release(&crc->ready, 1);
wake_up(&crc->go);
}

out_finish:
if (crc->run_threads) {
- wait_event(crc->done, atomic_read(&crc->stop));
+ wait_event(crc->done, atomic_read_acquire(&crc->stop));
atomic_set(&crc->stop, 0);
}
stop = ktime_get();
--
2.33.0


2023-12-12 13:04:20

by Rafael J. Wysocki

[permalink] [raw]
Subject: Re: [PATCH v2] PM: hibernate: use acquire/release ordering when compress/decompress image

On Thu, Nov 16, 2023 at 1:56 AM Hongchen Zhang
<[email protected]> wrote:
>
> When we test S4(suspend to disk) on LoongArch 3A6000 platform, the
> test case sometimes fails. The dmesg log shows the following error:
> Invalid LZO compressed length
> After we dig into the code, we find out that:
> When compress/decompress the image, the synchronization operation
> between the control thread and the compress/decompress/crc thread
> uses relaxed ordering interface, which is unreliable, and the
> following situation may occur:
> CPU 0 CPU 1
> save_image_lzo lzo_compress_threadfn
> atomic_set(&d->stop, 1);
> atomic_read(&data[thr].stop)
> data[thr].cmp = data[thr].cmp_len;
> WRITE data[thr].cmp_len
> Then CPU0 get a old cmp_len and write to disk. When cpu resume from S4,
> wrong cmp_len is loaded.
>
> To maintain data consistency between two threads, we should use the
> acquire/release ordering interface. So we change atomic_read/atomic_set
> to atomic_read_acquire/atomic_set_release.
>
> Fixes: 081a9d043c98 ("PM / Hibernate: Improve performance of LZO/plain hibernation, checksum image")
> Cc: [email protected]
> Signed-off-by: Hongchen Zhang <[email protected]>
> Signed-off-by: Weihao Li <[email protected]>

I was about to apply this patch when I noticed the S-o-b confusion.

Either a From: header pointing to Weihao Li <[email protected]> is
missing and the Hongchen Zhang <[email protected]> sign-off
means that the patch is sent on behalf of Weihao Li, or you both
worked on the patch together and a Co-developed-by: tag pointing to
Weihao Li <[email protected]> is missing.

Which of the above is the case?

> ---
> v1 -> v2:
> 1. add Cc: [email protected] in commit log
> 2. add Fixes: line in commit log
> ---
> kernel/power/swap.c | 38 +++++++++++++++++++-------------------
> 1 file changed, 19 insertions(+), 19 deletions(-)
>
> diff --git a/kernel/power/swap.c b/kernel/power/swap.c
> index a2cb0babb5ec..d44f5937f1e5 100644
> --- a/kernel/power/swap.c
> +++ b/kernel/power/swap.c
> @@ -606,11 +606,11 @@ static int crc32_threadfn(void *data)
> unsigned i;
>
> while (1) {
> - wait_event(d->go, atomic_read(&d->ready) ||
> + wait_event(d->go, atomic_read_acquire(&d->ready) ||
> kthread_should_stop());
> if (kthread_should_stop()) {
> d->thr = NULL;
> - atomic_set(&d->stop, 1);
> + atomic_set_release(&d->stop, 1);
> wake_up(&d->done);
> break;
> }
> @@ -619,7 +619,7 @@ static int crc32_threadfn(void *data)
> for (i = 0; i < d->run_threads; i++)
> *d->crc32 = crc32_le(*d->crc32,
> d->unc[i], *d->unc_len[i]);
> - atomic_set(&d->stop, 1);
> + atomic_set_release(&d->stop, 1);
> wake_up(&d->done);
> }
> return 0;
> @@ -649,12 +649,12 @@ static int lzo_compress_threadfn(void *data)
> struct cmp_data *d = data;
>
> while (1) {
> - wait_event(d->go, atomic_read(&d->ready) ||
> + wait_event(d->go, atomic_read_acquire(&d->ready) ||
> kthread_should_stop());
> if (kthread_should_stop()) {
> d->thr = NULL;
> d->ret = -1;
> - atomic_set(&d->stop, 1);
> + atomic_set_release(&d->stop, 1);
> wake_up(&d->done);
> break;
> }
> @@ -663,7 +663,7 @@ static int lzo_compress_threadfn(void *data)
> d->ret = lzo1x_1_compress(d->unc, d->unc_len,
> d->cmp + LZO_HEADER, &d->cmp_len,
> d->wrk);
> - atomic_set(&d->stop, 1);
> + atomic_set_release(&d->stop, 1);
> wake_up(&d->done);
> }
> return 0;
> @@ -798,7 +798,7 @@ static int save_image_lzo(struct swap_map_handle *handle,
>
> data[thr].unc_len = off;
>
> - atomic_set(&data[thr].ready, 1);
> + atomic_set_release(&data[thr].ready, 1);
> wake_up(&data[thr].go);
> }
>
> @@ -806,12 +806,12 @@ static int save_image_lzo(struct swap_map_handle *handle,
> break;
>
> crc->run_threads = thr;
> - atomic_set(&crc->ready, 1);
> + atomic_set_release(&crc->ready, 1);
> wake_up(&crc->go);
>
> for (run_threads = thr, thr = 0; thr < run_threads; thr++) {
> wait_event(data[thr].done,
> - atomic_read(&data[thr].stop));
> + atomic_read_acquire(&data[thr].stop));
> atomic_set(&data[thr].stop, 0);
>
> ret = data[thr].ret;
> @@ -850,7 +850,7 @@ static int save_image_lzo(struct swap_map_handle *handle,
> }
> }
>
> - wait_event(crc->done, atomic_read(&crc->stop));
> + wait_event(crc->done, atomic_read_acquire(&crc->stop));
> atomic_set(&crc->stop, 0);
> }
>
> @@ -1132,12 +1132,12 @@ static int lzo_decompress_threadfn(void *data)
> struct dec_data *d = data;
>
> while (1) {
> - wait_event(d->go, atomic_read(&d->ready) ||
> + wait_event(d->go, atomic_read_acquire(&d->ready) ||
> kthread_should_stop());
> if (kthread_should_stop()) {
> d->thr = NULL;
> d->ret = -1;
> - atomic_set(&d->stop, 1);
> + atomic_set_release(&d->stop, 1);
> wake_up(&d->done);
> break;
> }
> @@ -1150,7 +1150,7 @@ static int lzo_decompress_threadfn(void *data)
> flush_icache_range((unsigned long)d->unc,
> (unsigned long)d->unc + d->unc_len);
>
> - atomic_set(&d->stop, 1);
> + atomic_set_release(&d->stop, 1);
> wake_up(&d->done);
> }
> return 0;
> @@ -1335,7 +1335,7 @@ static int load_image_lzo(struct swap_map_handle *handle,
> }
>
> if (crc->run_threads) {
> - wait_event(crc->done, atomic_read(&crc->stop));
> + wait_event(crc->done, atomic_read_acquire(&crc->stop));
> atomic_set(&crc->stop, 0);
> crc->run_threads = 0;
> }
> @@ -1371,7 +1371,7 @@ static int load_image_lzo(struct swap_map_handle *handle,
> pg = 0;
> }
>
> - atomic_set(&data[thr].ready, 1);
> + atomic_set_release(&data[thr].ready, 1);
> wake_up(&data[thr].go);
> }
>
> @@ -1390,7 +1390,7 @@ static int load_image_lzo(struct swap_map_handle *handle,
>
> for (run_threads = thr, thr = 0; thr < run_threads; thr++) {
> wait_event(data[thr].done,
> - atomic_read(&data[thr].stop));
> + atomic_read_acquire(&data[thr].stop));
> atomic_set(&data[thr].stop, 0);
>
> ret = data[thr].ret;
> @@ -1421,7 +1421,7 @@ static int load_image_lzo(struct swap_map_handle *handle,
> ret = snapshot_write_next(snapshot);
> if (ret <= 0) {
> crc->run_threads = thr + 1;
> - atomic_set(&crc->ready, 1);
> + atomic_set_release(&crc->ready, 1);
> wake_up(&crc->go);
> goto out_finish;
> }
> @@ -1429,13 +1429,13 @@ static int load_image_lzo(struct swap_map_handle *handle,
> }
>
> crc->run_threads = thr;
> - atomic_set(&crc->ready, 1);
> + atomic_set_release(&crc->ready, 1);
> wake_up(&crc->go);
> }
>
> out_finish:
> if (crc->run_threads) {
> - wait_event(crc->done, atomic_read(&crc->stop));
> + wait_event(crc->done, atomic_read_acquire(&crc->stop));
> atomic_set(&crc->stop, 0);
> }
> stop = ktime_get();
> --
> 2.33.0
>

2023-12-13 01:04:26

by Hongchen Zhang

[permalink] [raw]
Subject: Re: [PATCH v2] PM: hibernate: use acquire/release ordering when compress/decompress image

Hi Rafael,
Thanks for your review.
On 2023/12/12 下午9:03, Rafael J. Wysocki wrote:
> On Thu, Nov 16, 2023 at 1:56 AM Hongchen Zhang
> <[email protected]> wrote:
>>
>> When we test S4(suspend to disk) on LoongArch 3A6000 platform, the
>> test case sometimes fails. The dmesg log shows the following error:
>> Invalid LZO compressed length
>> After we dig into the code, we find out that:
>> When compress/decompress the image, the synchronization operation
>> between the control thread and the compress/decompress/crc thread
>> uses relaxed ordering interface, which is unreliable, and the
>> following situation may occur:
>> CPU 0 CPU 1
>> save_image_lzo lzo_compress_threadfn
>> atomic_set(&d->stop, 1);
>> atomic_read(&data[thr].stop)
>> data[thr].cmp = data[thr].cmp_len;
>> WRITE data[thr].cmp_len
>> Then CPU0 get a old cmp_len and write to disk. When cpu resume from S4,
>> wrong cmp_len is loaded.
>>
>> To maintain data consistency between two threads, we should use the
>> acquire/release ordering interface. So we change atomic_read/atomic_set
>> to atomic_read_acquire/atomic_set_release.
>>
>> Fixes: 081a9d043c98 ("PM / Hibernate: Improve performance of LZO/plain hibernation, checksum image")
>> Cc: [email protected]
>> Signed-off-by: Hongchen Zhang <[email protected]>
>> Signed-off-by: Weihao Li <[email protected]>
>
> I was about to apply this patch when I noticed the S-o-b confusion.
>
> Either a From: header pointing to Weihao Li <[email protected]> is
> missing and the Hongchen Zhang <[email protected]> sign-off
> means that the patch is sent on behalf of Weihao Li, or you both
> worked on the patch together and a Co-developed-by: tag pointing to
> Weihao Li <[email protected]> is missing.
>
> Which of the above is the case?
Yes, we both worked on this patch,let me add
Co-developed-by: Weihao Li <[email protected]>
and send V3.

>
>> ---
>> v1 -> v2:
>> 1. add Cc: [email protected] in commit log
>> 2. add Fixes: line in commit log
>> ---
>> kernel/power/swap.c | 38 +++++++++++++++++++-------------------
>> 1 file changed, 19 insertions(+), 19 deletions(-)
>>
>> diff --git a/kernel/power/swap.c b/kernel/power/swap.c
>> index a2cb0babb5ec..d44f5937f1e5 100644
>> --- a/kernel/power/swap.c
>> +++ b/kernel/power/swap.c
>> @@ -606,11 +606,11 @@ static int crc32_threadfn(void *data)
>> unsigned i;
>>
>> while (1) {
>> - wait_event(d->go, atomic_read(&d->ready) ||
>> + wait_event(d->go, atomic_read_acquire(&d->ready) ||
>> kthread_should_stop());
>> if (kthread_should_stop()) {
>> d->thr = NULL;
>> - atomic_set(&d->stop, 1);
>> + atomic_set_release(&d->stop, 1);
>> wake_up(&d->done);
>> break;
>> }
>> @@ -619,7 +619,7 @@ static int crc32_threadfn(void *data)
>> for (i = 0; i < d->run_threads; i++)
>> *d->crc32 = crc32_le(*d->crc32,
>> d->unc[i], *d->unc_len[i]);
>> - atomic_set(&d->stop, 1);
>> + atomic_set_release(&d->stop, 1);
>> wake_up(&d->done);
>> }
>> return 0;
>> @@ -649,12 +649,12 @@ static int lzo_compress_threadfn(void *data)
>> struct cmp_data *d = data;
>>
>> while (1) {
>> - wait_event(d->go, atomic_read(&d->ready) ||
>> + wait_event(d->go, atomic_read_acquire(&d->ready) ||
>> kthread_should_stop());
>> if (kthread_should_stop()) {
>> d->thr = NULL;
>> d->ret = -1;
>> - atomic_set(&d->stop, 1);
>> + atomic_set_release(&d->stop, 1);
>> wake_up(&d->done);
>> break;
>> }
>> @@ -663,7 +663,7 @@ static int lzo_compress_threadfn(void *data)
>> d->ret = lzo1x_1_compress(d->unc, d->unc_len,
>> d->cmp + LZO_HEADER, &d->cmp_len,
>> d->wrk);
>> - atomic_set(&d->stop, 1);
>> + atomic_set_release(&d->stop, 1);
>> wake_up(&d->done);
>> }
>> return 0;
>> @@ -798,7 +798,7 @@ static int save_image_lzo(struct swap_map_handle *handle,
>>
>> data[thr].unc_len = off;
>>
>> - atomic_set(&data[thr].ready, 1);
>> + atomic_set_release(&data[thr].ready, 1);
>> wake_up(&data[thr].go);
>> }
>>
>> @@ -806,12 +806,12 @@ static int save_image_lzo(struct swap_map_handle *handle,
>> break;
>>
>> crc->run_threads = thr;
>> - atomic_set(&crc->ready, 1);
>> + atomic_set_release(&crc->ready, 1);
>> wake_up(&crc->go);
>>
>> for (run_threads = thr, thr = 0; thr < run_threads; thr++) {
>> wait_event(data[thr].done,
>> - atomic_read(&data[thr].stop));
>> + atomic_read_acquire(&data[thr].stop));
>> atomic_set(&data[thr].stop, 0);
>>
>> ret = data[thr].ret;
>> @@ -850,7 +850,7 @@ static int save_image_lzo(struct swap_map_handle *handle,
>> }
>> }
>>
>> - wait_event(crc->done, atomic_read(&crc->stop));
>> + wait_event(crc->done, atomic_read_acquire(&crc->stop));
>> atomic_set(&crc->stop, 0);
>> }
>>
>> @@ -1132,12 +1132,12 @@ static int lzo_decompress_threadfn(void *data)
>> struct dec_data *d = data;
>>
>> while (1) {
>> - wait_event(d->go, atomic_read(&d->ready) ||
>> + wait_event(d->go, atomic_read_acquire(&d->ready) ||
>> kthread_should_stop());
>> if (kthread_should_stop()) {
>> d->thr = NULL;
>> d->ret = -1;
>> - atomic_set(&d->stop, 1);
>> + atomic_set_release(&d->stop, 1);
>> wake_up(&d->done);
>> break;
>> }
>> @@ -1150,7 +1150,7 @@ static int lzo_decompress_threadfn(void *data)
>> flush_icache_range((unsigned long)d->unc,
>> (unsigned long)d->unc + d->unc_len);
>>
>> - atomic_set(&d->stop, 1);
>> + atomic_set_release(&d->stop, 1);
>> wake_up(&d->done);
>> }
>> return 0;
>> @@ -1335,7 +1335,7 @@ static int load_image_lzo(struct swap_map_handle *handle,
>> }
>>
>> if (crc->run_threads) {
>> - wait_event(crc->done, atomic_read(&crc->stop));
>> + wait_event(crc->done, atomic_read_acquire(&crc->stop));
>> atomic_set(&crc->stop, 0);
>> crc->run_threads = 0;
>> }
>> @@ -1371,7 +1371,7 @@ static int load_image_lzo(struct swap_map_handle *handle,
>> pg = 0;
>> }
>>
>> - atomic_set(&data[thr].ready, 1);
>> + atomic_set_release(&data[thr].ready, 1);
>> wake_up(&data[thr].go);
>> }
>>
>> @@ -1390,7 +1390,7 @@ static int load_image_lzo(struct swap_map_handle *handle,
>>
>> for (run_threads = thr, thr = 0; thr < run_threads; thr++) {
>> wait_event(data[thr].done,
>> - atomic_read(&data[thr].stop));
>> + atomic_read_acquire(&data[thr].stop));
>> atomic_set(&data[thr].stop, 0);
>>
>> ret = data[thr].ret;
>> @@ -1421,7 +1421,7 @@ static int load_image_lzo(struct swap_map_handle *handle,
>> ret = snapshot_write_next(snapshot);
>> if (ret <= 0) {
>> crc->run_threads = thr + 1;
>> - atomic_set(&crc->ready, 1);
>> + atomic_set_release(&crc->ready, 1);
>> wake_up(&crc->go);
>> goto out_finish;
>> }
>> @@ -1429,13 +1429,13 @@ static int load_image_lzo(struct swap_map_handle *handle,
>> }
>>
>> crc->run_threads = thr;
>> - atomic_set(&crc->ready, 1);
>> + atomic_set_release(&crc->ready, 1);
>> wake_up(&crc->go);
>> }
>>
>> out_finish:
>> if (crc->run_threads) {
>> - wait_event(crc->done, atomic_read(&crc->stop));
>> + wait_event(crc->done, atomic_read_acquire(&crc->stop));
>> atomic_set(&crc->stop, 0);
>> }
>> stop = ktime_get();
>> --
>> 2.33.0
>>


--
Best Regards
Hongchen Zhang