From: Daeho Jeong <[email protected]>
I found out f2fs_free_dic() is invoked in a wrong timing, but
f2fs_verify_bio() still needed the dic info and it triggered the
below kernel panic. It has been caused by the race condition of
pending_pages value between decompression and verity logic, when
the same compression cluster had been split in different bios.
By split bios, f2fs_verify_bio() ended up with decreasing
pending_pages value before it is reset to nr_cpages by
f2fs_decompress_pages() and caused the kernel panic.
[ 4416.564763] Unable to handle kernel NULL pointer dereference
at virtual address 0000000000000000
...
[ 4416.896016] Workqueue: fsverity_read_queue f2fs_verity_work
[ 4416.908515] pc : fsverity_verify_page+0x20/0x78
[ 4416.913721] lr : f2fs_verify_bio+0x11c/0x29c
[ 4416.913722] sp : ffffffc019533cd0
[ 4416.913723] x29: ffffffc019533cd0 x28: 0000000000000402
[ 4416.913724] x27: 0000000000000001 x26: 0000000000000100
[ 4416.913726] x25: 0000000000000001 x24: 0000000000000004
[ 4416.913727] x23: 0000000000001000 x22: 0000000000000000
[ 4416.913728] x21: 0000000000000000 x20: ffffffff2076f9c0
[ 4416.913729] x19: ffffffff2076f9c0 x18: ffffff8a32380c30
[ 4416.913731] x17: ffffffc01f966d97 x16: 0000000000000298
[ 4416.913732] x15: 0000000000000000 x14: 0000000000000000
[ 4416.913733] x13: f074faec89ffffff x12: 0000000000000000
[ 4416.913734] x11: 0000000000001000 x10: 0000000000001000
[ 4416.929176] x9 : ffffffff20d1f5c7 x8 : 0000000000000000
[ 4416.929178] x7 : 626d7464ff286b6b x6 : ffffffc019533ade
[ 4416.929179] x5 : 000000008049000e x4 : ffffffff2793e9e0
[ 4416.929180] x3 : 000000008049000e x2 : ffffff89ecfa74d0
[ 4416.929181] x1 : 0000000000000c40 x0 : ffffffff2076f9c0
[ 4416.929184] Call trace:
[ 4416.929187] fsverity_verify_page+0x20/0x78
[ 4416.929189] f2fs_verify_bio+0x11c/0x29c
[ 4416.929192] f2fs_verity_work+0x58/0x84
[ 4417.050667] process_one_work+0x270/0x47c
[ 4417.055354] worker_thread+0x27c/0x4d8
[ 4417.059784] kthread+0x13c/0x320
[ 4417.063693] ret_from_fork+0x10/0x18
Signed-off-by: Daeho Jeong <[email protected]>
Signed-off-by: Jaegeuk Kim <[email protected]>
---
v3: back to v1 and enabled verity in a unit of cluster
v2: merged verity_pages with pending_pages, and increased the
pending_pages count only if STEP_VERITY is set on bio
---
fs/f2fs/compress.c | 2 --
fs/f2fs/data.c | 51 ++++++++++++++++++++++++++++++++++++----------
fs/f2fs/f2fs.h | 1 +
3 files changed, 41 insertions(+), 13 deletions(-)
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 87090da8693d..832b19986caf 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -803,8 +803,6 @@ void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity)
if (cops->destroy_decompress_ctx)
cops->destroy_decompress_ctx(dic);
out_free_dic:
- if (verity)
- atomic_set(&dic->pending_pages, dic->nr_cpages);
if (!verity)
f2fs_decompress_end_io(dic->rpages, dic->cluster_size,
ret, false);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 42254d3859c7..861e5783a5fc 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -202,7 +202,7 @@ static void f2fs_verify_bio(struct bio *bio)
dic = (struct decompress_io_ctx *)page_private(page);
if (dic) {
- if (atomic_dec_return(&dic->pending_pages))
+ if (atomic_dec_return(&dic->verity_pages))
continue;
f2fs_verify_pages(dic->rpages,
dic->cluster_size);
@@ -1027,7 +1027,8 @@ static inline bool f2fs_need_verity(const struct inode *inode, pgoff_t idx)
static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
unsigned nr_pages, unsigned op_flag,
- pgoff_t first_idx, bool for_write)
+ pgoff_t first_idx, bool for_write,
+ bool for_verity)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct bio *bio;
@@ -1049,7 +1050,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
post_read_steps |= 1 << STEP_DECRYPT;
if (f2fs_compressed_file(inode))
post_read_steps |= 1 << STEP_DECOMPRESS_NOWQ;
- if (f2fs_need_verity(inode, first_idx))
+ if (for_verity && f2fs_need_verity(inode, first_idx))
post_read_steps |= 1 << STEP_VERITY;
if (post_read_steps) {
@@ -1079,7 +1080,7 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page,
struct bio *bio;
bio = f2fs_grab_read_bio(inode, blkaddr, 1, op_flags,
- page->index, for_write);
+ page->index, for_write, true);
if (IS_ERR(bio))
return PTR_ERR(bio);
@@ -2133,7 +2134,7 @@ static int f2fs_read_single_page(struct inode *inode, struct page *page,
if (bio == NULL) {
bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
is_readahead ? REQ_RAHEAD : 0, page->index,
- false);
+ false, true);
if (IS_ERR(bio)) {
ret = PTR_ERR(bio);
bio = NULL;
@@ -2180,6 +2181,8 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
const unsigned blkbits = inode->i_blkbits;
const unsigned blocksize = 1 << blkbits;
struct decompress_io_ctx *dic = NULL;
+ struct bio_post_read_ctx *ctx;
+ bool for_verity = false;
int i;
int ret = 0;
@@ -2245,10 +2248,22 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
goto out_put_dnode;
}
+ if (fsverity_active(cc->inode)) {
+ atomic_set(&dic->verity_pages, cc->nr_cpages);
+ for_verity = true;
+
+ if (bio) {
+ ctx = bio->bi_private;
+ if (!(ctx->enabled_steps & (1 << STEP_VERITY))) {
+ __submit_bio(sbi, bio, DATA);
+ bio = NULL;
+ }
+ }
+ }
+
for (i = 0; i < dic->nr_cpages; i++) {
struct page *page = dic->cpages[i];
block_t blkaddr;
- struct bio_post_read_ctx *ctx;
blkaddr = data_blkaddr(dn.inode, dn.node_page,
dn.ofs_in_node + i + 1);
@@ -2264,17 +2279,31 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
if (!bio) {
bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages,
is_readahead ? REQ_RAHEAD : 0,
- page->index, for_write);
+ page->index, for_write, for_verity);
if (IS_ERR(bio)) {
+ unsigned int remained = dic->nr_cpages - i;
+ bool release = false;
+
ret = PTR_ERR(bio);
dic->failed = true;
- if (!atomic_sub_return(dic->nr_cpages - i,
- &dic->pending_pages)) {
+
+ if (for_verity) {
+ if (!atomic_sub_return(remained,
+ &dic->verity_pages))
+ release = true;
+ } else {
+ if (!atomic_sub_return(remained,
+ &dic->pending_pages))
+ release = true;
+ }
+
+ if (release) {
f2fs_decompress_end_io(dic->rpages,
- cc->cluster_size, true,
- false);
+ cc->cluster_size, true,
+ false);
f2fs_free_dic(dic);
}
+
f2fs_put_dnode(&dn);
*bio_ret = NULL;
return ret;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 94d16bde5e24..f328f55fb0a0 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1341,6 +1341,7 @@ struct decompress_io_ctx {
size_t rlen; /* valid data length in rbuf */
size_t clen; /* valid data length in cbuf */
atomic_t pending_pages; /* in-flight compressed page count */
+ atomic_t verity_pages; /* in-flight page count for verity */
bool failed; /* indicate IO error during decompression */
void *private; /* payload buffer for specified decompression algorithm */
void *private2; /* extra payload buffer */
--
2.29.2.576.ga3fc446d84-goog
On 2020/12/5 12:26, Daeho Jeong wrote:
> From: Daeho Jeong <[email protected]>
>
> I found out f2fs_free_dic() is invoked in a wrong timing, but
> f2fs_verify_bio() still needed the dic info and it triggered the
> below kernel panic. It has been caused by the race condition of
> pending_pages value between decompression and verity logic, when
> the same compression cluster had been split in different bios.
> By split bios, f2fs_verify_bio() ended up with decreasing
> pending_pages value before it is reset to nr_cpages by
> f2fs_decompress_pages() and caused the kernel panic.
>
> [ 4416.564763] Unable to handle kernel NULL pointer dereference
> at virtual address 0000000000000000
> ...
> [ 4416.896016] Workqueue: fsverity_read_queue f2fs_verity_work
> [ 4416.908515] pc : fsverity_verify_page+0x20/0x78
> [ 4416.913721] lr : f2fs_verify_bio+0x11c/0x29c
> [ 4416.913722] sp : ffffffc019533cd0
> [ 4416.913723] x29: ffffffc019533cd0 x28: 0000000000000402
> [ 4416.913724] x27: 0000000000000001 x26: 0000000000000100
> [ 4416.913726] x25: 0000000000000001 x24: 0000000000000004
> [ 4416.913727] x23: 0000000000001000 x22: 0000000000000000
> [ 4416.913728] x21: 0000000000000000 x20: ffffffff2076f9c0
> [ 4416.913729] x19: ffffffff2076f9c0 x18: ffffff8a32380c30
> [ 4416.913731] x17: ffffffc01f966d97 x16: 0000000000000298
> [ 4416.913732] x15: 0000000000000000 x14: 0000000000000000
> [ 4416.913733] x13: f074faec89ffffff x12: 0000000000000000
> [ 4416.913734] x11: 0000000000001000 x10: 0000000000001000
> [ 4416.929176] x9 : ffffffff20d1f5c7 x8 : 0000000000000000
> [ 4416.929178] x7 : 626d7464ff286b6b x6 : ffffffc019533ade
> [ 4416.929179] x5 : 000000008049000e x4 : ffffffff2793e9e0
> [ 4416.929180] x3 : 000000008049000e x2 : ffffff89ecfa74d0
> [ 4416.929181] x1 : 0000000000000c40 x0 : ffffffff2076f9c0
> [ 4416.929184] Call trace:
> [ 4416.929187] fsverity_verify_page+0x20/0x78
> [ 4416.929189] f2fs_verify_bio+0x11c/0x29c
> [ 4416.929192] f2fs_verity_work+0x58/0x84
> [ 4417.050667] process_one_work+0x270/0x47c
> [ 4417.055354] worker_thread+0x27c/0x4d8
> [ 4417.059784] kthread+0x13c/0x320
> [ 4417.063693] ret_from_fork+0x10/0x18
Is race condition as below?
Thread A f2fs_post_read_wq fsverity_wq
- f2fs_read_multi_pages()
- f2fs_alloc_dic
- dic->pending_pages = 2
- submit_bio()
- submit_bio()
- f2fs_post_read_work() handle first bio
- f2fs_decompress_work()
- __read_end_io()
- f2fs_decompress_pages()
- dic->pending_pages--
- enqueue f2fs_verity_work()
- f2fs_verity_work() handle first bio
- f2fs_verify_bio()
- dic->pending_pages--
- f2fs_post_read_work() handle second bio
- f2fs_decompress_work()
- enqueue f2fs_verity_work()
- f2fs_verify_pages()
- f2fs_free_dic()
- f2fs_verity_work() handle second bio
- f2fs_verfy_bio()
- use-after-free on dic
If this is correct, could you please add this into commit message?
>
> Signed-off-by: Daeho Jeong <[email protected]>
> Signed-off-by: Jaegeuk Kim <[email protected]>
> ---
> v3: back to v1 and enabled verity in a unit of cluster
> v2: merged verity_pages with pending_pages, and increased the
> pending_pages count only if STEP_VERITY is set on bio
> ---
> fs/f2fs/compress.c | 2 --
> fs/f2fs/data.c | 51 ++++++++++++++++++++++++++++++++++++----------
> fs/f2fs/f2fs.h | 1 +
> 3 files changed, 41 insertions(+), 13 deletions(-)
>
> diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
> index 87090da8693d..832b19986caf 100644
> --- a/fs/f2fs/compress.c
> +++ b/fs/f2fs/compress.c
> @@ -803,8 +803,6 @@ void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity)
> if (cops->destroy_decompress_ctx)
> cops->destroy_decompress_ctx(dic);
> out_free_dic:
> - if (verity)
> - atomic_set(&dic->pending_pages, dic->nr_cpages);
> if (!verity)
> f2fs_decompress_end_io(dic->rpages, dic->cluster_size,
> ret, false);
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 42254d3859c7..861e5783a5fc 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -202,7 +202,7 @@ static void f2fs_verify_bio(struct bio *bio)
> dic = (struct decompress_io_ctx *)page_private(page);
>
> if (dic) {
> - if (atomic_dec_return(&dic->pending_pages))
> + if (atomic_dec_return(&dic->verity_pages))
> continue;
> f2fs_verify_pages(dic->rpages,
> dic->cluster_size);
> @@ -1027,7 +1027,8 @@ static inline bool f2fs_need_verity(const struct inode *inode, pgoff_t idx)
>
> static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
> unsigned nr_pages, unsigned op_flag,
> - pgoff_t first_idx, bool for_write)
> + pgoff_t first_idx, bool for_write,
> + bool for_verity)
> {
> struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> struct bio *bio;
> @@ -1049,7 +1050,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
> post_read_steps |= 1 << STEP_DECRYPT;
> if (f2fs_compressed_file(inode))
> post_read_steps |= 1 << STEP_DECOMPRESS_NOWQ;
> - if (f2fs_need_verity(inode, first_idx))
> + if (for_verity && f2fs_need_verity(inode, first_idx))
> post_read_steps |= 1 << STEP_VERITY;
>
> if (post_read_steps) {
> @@ -1079,7 +1080,7 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page,
> struct bio *bio;
>
> bio = f2fs_grab_read_bio(inode, blkaddr, 1, op_flags,
> - page->index, for_write);
> + page->index, for_write, true);
> if (IS_ERR(bio))
> return PTR_ERR(bio);
>
> @@ -2133,7 +2134,7 @@ static int f2fs_read_single_page(struct inode *inode, struct page *page,
> if (bio == NULL) {
> bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
> is_readahead ? REQ_RAHEAD : 0, page->index,
> - false);
> + false, true);
> if (IS_ERR(bio)) {
> ret = PTR_ERR(bio);
> bio = NULL;
> @@ -2180,6 +2181,8 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
> const unsigned blkbits = inode->i_blkbits;
> const unsigned blocksize = 1 << blkbits;
> struct decompress_io_ctx *dic = NULL;
> + struct bio_post_read_ctx *ctx;
> + bool for_verity = false;
> int i;
> int ret = 0;
>
> @@ -2245,10 +2248,22 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
> goto out_put_dnode;
> }
>
> + if (fsverity_active(cc->inode)) {
> + atomic_set(&dic->verity_pages, cc->nr_cpages);
> + for_verity = true;
> +
> + if (bio) {
> + ctx = bio->bi_private;
> + if (!(ctx->enabled_steps & (1 << STEP_VERITY))) {
It looks like it will be better to move this into merge condition?
if (bio && (!page_is_mergeable(sbi, bio,
*last_block_in_bio, blkaddr) ||
!f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL) ||
f2fs_verify_mergeable_bio())) {
Thanks,
> + __submit_bio(sbi, bio, DATA);
> + bio = NULL;
> + }
> + }
> + }
> +
> for (i = 0; i < dic->nr_cpages; i++) {
> struct page *page = dic->cpages[i];
> block_t blkaddr;
> - struct bio_post_read_ctx *ctx;
>
> blkaddr = data_blkaddr(dn.inode, dn.node_page,
> dn.ofs_in_node + i + 1);
> @@ -2264,17 +2279,31 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
> if (!bio) {
> bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages,
> is_readahead ? REQ_RAHEAD : 0,
> - page->index, for_write);
> + page->index, for_write, for_verity);
> if (IS_ERR(bio)) {
> + unsigned int remained = dic->nr_cpages - i;
> + bool release = false;
> +
> ret = PTR_ERR(bio);
> dic->failed = true;
> - if (!atomic_sub_return(dic->nr_cpages - i,
> - &dic->pending_pages)) {
> +
> + if (for_verity) {
> + if (!atomic_sub_return(remained,
> + &dic->verity_pages))
> + release = true;
> + } else {
> + if (!atomic_sub_return(remained,
> + &dic->pending_pages))
> + release = true;
> + }
> +
> + if (release) {
> f2fs_decompress_end_io(dic->rpages,
> - cc->cluster_size, true,
> - false);
> + cc->cluster_size, true,
> + false);
> f2fs_free_dic(dic);
> }
> +
> f2fs_put_dnode(&dn);
> *bio_ret = NULL;
> return ret;
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 94d16bde5e24..f328f55fb0a0 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -1341,6 +1341,7 @@ struct decompress_io_ctx {
> size_t rlen; /* valid data length in rbuf */
> size_t clen; /* valid data length in cbuf */
> atomic_t pending_pages; /* in-flight compressed page count */
> + atomic_t verity_pages; /* in-flight page count for verity */
> bool failed; /* indicate IO error during decompression */
> void *private; /* payload buffer for specified decompression algorithm */
> void *private2; /* extra payload buffer */
>
> It looks like it will be better to move this into merge condition?
>
> if (bio && (!page_is_mergeable(sbi, bio,
> *last_block_in_bio, blkaddr) ||
> !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL) ||
> f2fs_verify_mergeable_bio())) {
>
I tried this for the first time, but it requires unnecessary checks
within the compression cluster.
I wanted to just check one time in the beginning of the cluster.
What do you think?
On 2020/12/7 15:28, Daeho Jeong wrote:
>> It looks like it will be better to move this into merge condition?
>>
>> if (bio && (!page_is_mergeable(sbi, bio,
>> *last_block_in_bio, blkaddr) ||
>> !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL) ||
>> f2fs_verify_mergeable_bio())) {
>>
>
> I tried this for the first time, but it requires unnecessary checks
> within the compression cluster.
We only need to check f2fs_verify_mergeable_bio for i == 0 case? something like:
static bool f2fs_verify_mergeable_bio(struct bio *bio, bool verify, bool first_page)
{
if (!first_page)
return false;
if (!verify)
return false;
ctx = bio->bi_private;
if (!(ctx->enabled_steps & (1 << STEP_VERITY)))
return true;
}
Thoughts?
> I wanted to just check one time in the beginning of the cluster.
> What do you think?
It's trivial, but I'm think about the readability... at least, one line comment
is needed to describe why we submit previous bio. :)
Thanks,
> .
>
On 12/07, Chao Yu wrote:
> On 2020/12/7 15:28, Daeho Jeong wrote:
> > > It looks like it will be better to move this into merge condition?
> > >
> > > if (bio && (!page_is_mergeable(sbi, bio,
> > > *last_block_in_bio, blkaddr) ||
> > > !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL) ||
> > > f2fs_verify_mergeable_bio())) {
> > >
> >
> > I tried this for the first time, but it requires unnecessary checks
> > within the compression cluster.
>
> We only need to check f2fs_verify_mergeable_bio for i == 0 case? something like:
>
> static bool f2fs_verify_mergeable_bio(struct bio *bio, bool verify, bool first_page)
> {
> if (!first_page)
Agreed that we don't need to run this instruction for every pages.
> return false;
> if (!verify)
> return false;
>
> ctx = bio->bi_private;
> if (!(ctx->enabled_steps & (1 << STEP_VERITY)))
> return true;
> }
>
> Thoughts?
>
> > I wanted to just check one time in the beginning of the cluster.
> > What do you think?
>
> It's trivial, but I'm think about the readability... at least, one line comment
> is needed to describe why we submit previous bio. :)
I added like this. :P
https://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git/commit/?h=dev&id=f189a2471df2560e5834d999ab4ff68bc10853e4
>
> Thanks,
>
> > .
> >
On Sat, Dec 05, 2020 at 01:26:26PM +0900, Daeho Jeong wrote:
> From: Daeho Jeong <[email protected]>
>
> I found out f2fs_free_dic() is invoked in a wrong timing, but
> f2fs_verify_bio() still needed the dic info and it triggered the
> below kernel panic. It has been caused by the race condition of
> pending_pages value between decompression and verity logic, when
> the same compression cluster had been split in different bios.
> By split bios, f2fs_verify_bio() ended up with decreasing
> pending_pages value before it is reset to nr_cpages by
> f2fs_decompress_pages() and caused the kernel panic.
>
> [ 4416.564763] Unable to handle kernel NULL pointer dereference
> at virtual address 0000000000000000
> ...
> [ 4416.896016] Workqueue: fsverity_read_queue f2fs_verity_work
> [ 4416.908515] pc : fsverity_verify_page+0x20/0x78
> [ 4416.913721] lr : f2fs_verify_bio+0x11c/0x29c
> [ 4416.913722] sp : ffffffc019533cd0
> [ 4416.913723] x29: ffffffc019533cd0 x28: 0000000000000402
> [ 4416.913724] x27: 0000000000000001 x26: 0000000000000100
> [ 4416.913726] x25: 0000000000000001 x24: 0000000000000004
> [ 4416.913727] x23: 0000000000001000 x22: 0000000000000000
> [ 4416.913728] x21: 0000000000000000 x20: ffffffff2076f9c0
> [ 4416.913729] x19: ffffffff2076f9c0 x18: ffffff8a32380c30
> [ 4416.913731] x17: ffffffc01f966d97 x16: 0000000000000298
> [ 4416.913732] x15: 0000000000000000 x14: 0000000000000000
> [ 4416.913733] x13: f074faec89ffffff x12: 0000000000000000
> [ 4416.913734] x11: 0000000000001000 x10: 0000000000001000
> [ 4416.929176] x9 : ffffffff20d1f5c7 x8 : 0000000000000000
> [ 4416.929178] x7 : 626d7464ff286b6b x6 : ffffffc019533ade
> [ 4416.929179] x5 : 000000008049000e x4 : ffffffff2793e9e0
> [ 4416.929180] x3 : 000000008049000e x2 : ffffff89ecfa74d0
> [ 4416.929181] x1 : 0000000000000c40 x0 : ffffffff2076f9c0
> [ 4416.929184] Call trace:
> [ 4416.929187] fsverity_verify_page+0x20/0x78
> [ 4416.929189] f2fs_verify_bio+0x11c/0x29c
> [ 4416.929192] f2fs_verity_work+0x58/0x84
> [ 4417.050667] process_one_work+0x270/0x47c
> [ 4417.055354] worker_thread+0x27c/0x4d8
> [ 4417.059784] kthread+0x13c/0x320
> [ 4417.063693] ret_from_fork+0x10/0x18
>
> Signed-off-by: Daeho Jeong <[email protected]>
> Signed-off-by: Jaegeuk Kim <[email protected]>
> ---
> v3: back to v1 and enabled verity in a unit of cluster
> v2: merged verity_pages with pending_pages, and increased the
> pending_pages count only if STEP_VERITY is set on bio
I am trying to review this but it is very hard, as the f2fs compression code is
very hard to understand.
It looks like a 'struct decompress_io_ctx' represents the work to decompress a
particular cluster. Since the compressed data of the cluster can be read using
multiple bios, there is a reference count of how many pages are remaining to be
read before all the cluster's pages have been read and decompression can start.
What I don't understand is why that reference counting needs to work differently
depending on whether verity is enabled or not. Shouldn't it be exactly the
same?
There also seems to be some confusion about the scope of STEP_VERITY. Before
f2fs compression was added, it was a per-bio thing. But now in a compressed
file, it's really a per-cluster thing, since all decompressed pages in a
compressed cluster are verified (or not verified) at once.
Wouldn't it make a lot more sense to, when a cluster needs both compression and
verity, *not* set STEP_VERITY on the bios, but rather set a similar flag in the
decompress_io_ctx?
- Eric
Chao, Jaegeuk,
Thanks. I'll update it as your comments. :)
Eric,
Decompression and verity can be executed in different thread contexts
in different timing, so we need separate counts for each.
We already use STEP_VERITY for non-compression case, so I think using
this flag in here looks more making sense.
Thanks,
2020년 12월 8일 (화) 오전 5:31, Eric Biggers <[email protected]>님이 작성:
>
> On Sat, Dec 05, 2020 at 01:26:26PM +0900, Daeho Jeong wrote:
> > From: Daeho Jeong <[email protected]>
> >
> > I found out f2fs_free_dic() is invoked in a wrong timing, but
> > f2fs_verify_bio() still needed the dic info and it triggered the
> > below kernel panic. It has been caused by the race condition of
> > pending_pages value between decompression and verity logic, when
> > the same compression cluster had been split in different bios.
> > By split bios, f2fs_verify_bio() ended up with decreasing
> > pending_pages value before it is reset to nr_cpages by
> > f2fs_decompress_pages() and caused the kernel panic.
> >
> > [ 4416.564763] Unable to handle kernel NULL pointer dereference
> > at virtual address 0000000000000000
> > ...
> > [ 4416.896016] Workqueue: fsverity_read_queue f2fs_verity_work
> > [ 4416.908515] pc : fsverity_verify_page+0x20/0x78
> > [ 4416.913721] lr : f2fs_verify_bio+0x11c/0x29c
> > [ 4416.913722] sp : ffffffc019533cd0
> > [ 4416.913723] x29: ffffffc019533cd0 x28: 0000000000000402
> > [ 4416.913724] x27: 0000000000000001 x26: 0000000000000100
> > [ 4416.913726] x25: 0000000000000001 x24: 0000000000000004
> > [ 4416.913727] x23: 0000000000001000 x22: 0000000000000000
> > [ 4416.913728] x21: 0000000000000000 x20: ffffffff2076f9c0
> > [ 4416.913729] x19: ffffffff2076f9c0 x18: ffffff8a32380c30
> > [ 4416.913731] x17: ffffffc01f966d97 x16: 0000000000000298
> > [ 4416.913732] x15: 0000000000000000 x14: 0000000000000000
> > [ 4416.913733] x13: f074faec89ffffff x12: 0000000000000000
> > [ 4416.913734] x11: 0000000000001000 x10: 0000000000001000
> > [ 4416.929176] x9 : ffffffff20d1f5c7 x8 : 0000000000000000
> > [ 4416.929178] x7 : 626d7464ff286b6b x6 : ffffffc019533ade
> > [ 4416.929179] x5 : 000000008049000e x4 : ffffffff2793e9e0
> > [ 4416.929180] x3 : 000000008049000e x2 : ffffff89ecfa74d0
> > [ 4416.929181] x1 : 0000000000000c40 x0 : ffffffff2076f9c0
> > [ 4416.929184] Call trace:
> > [ 4416.929187] fsverity_verify_page+0x20/0x78
> > [ 4416.929189] f2fs_verify_bio+0x11c/0x29c
> > [ 4416.929192] f2fs_verity_work+0x58/0x84
> > [ 4417.050667] process_one_work+0x270/0x47c
> > [ 4417.055354] worker_thread+0x27c/0x4d8
> > [ 4417.059784] kthread+0x13c/0x320
> > [ 4417.063693] ret_from_fork+0x10/0x18
> >
> > Signed-off-by: Daeho Jeong <[email protected]>
> > Signed-off-by: Jaegeuk Kim <[email protected]>
> > ---
> > v3: back to v1 and enabled verity in a unit of cluster
> > v2: merged verity_pages with pending_pages, and increased the
> > pending_pages count only if STEP_VERITY is set on bio
>
> I am trying to review this but it is very hard, as the f2fs compression code is
> very hard to understand.
>
> It looks like a 'struct decompress_io_ctx' represents the work to decompress a
> particular cluster. Since the compressed data of the cluster can be read using
> multiple bios, there is a reference count of how many pages are remaining to be
> read before all the cluster's pages have been read and decompression can start.
>
> What I don't understand is why that reference counting needs to work differently
> depending on whether verity is enabled or not. Shouldn't it be exactly the
> same?
>
> There also seems to be some confusion about the scope of STEP_VERITY. Before
> f2fs compression was added, it was a per-bio thing. But now in a compressed
> file, it's really a per-cluster thing, since all decompressed pages in a
> compressed cluster are verified (or not verified) at once.
>
> Wouldn't it make a lot more sense to, when a cluster needs both compression and
> verity, *not* set STEP_VERITY on the bios, but rather set a similar flag in the
> decompress_io_ctx?
>
> - Eric
On Tue, Dec 08, 2020 at 08:51:45AM +0900, Daeho Jeong wrote:
> > I am trying to review this but it is very hard, as the f2fs compression code is
> > very hard to understand.
> >
> > It looks like a 'struct decompress_io_ctx' represents the work to decompress a
> > particular cluster. Since the compressed data of the cluster can be read using
> > multiple bios, there is a reference count of how many pages are remaining to be
> > read before all the cluster's pages have been read and decompression can start.
> >
> > What I don't understand is why that reference counting needs to work differently
> > depending on whether verity is enabled or not. Shouldn't it be exactly the
> > same?
> >
> > There also seems to be some confusion about the scope of STEP_VERITY. Before
> > f2fs compression was added, it was a per-bio thing. But now in a compressed
> > file, it's really a per-cluster thing, since all decompressed pages in a
> > compressed cluster are verified (or not verified) at once.
> >
> > Wouldn't it make a lot more sense to, when a cluster needs both compression and
> > verity, *not* set STEP_VERITY on the bios, but rather set a similar flag in the
> > decompress_io_ctx?
> >
>
> Eric,
>
> Decompression and verity can be executed in different thread contexts
> in different timing, so we need separate counts for each.
>
> We already use STEP_VERITY for non-compression case, so I think using
> this flag in here looks more making sense.
>
> Thanks,
That didn't really answer my questions.
I gave up trying to review this patch as the compression post-read handling is
just way too weird and hard to understand. I wrote a patch to clean it all up
instead, please take a look:
https://lkml.kernel.org/r/[email protected]
- Eric
On 12/07, Eric Biggers wrote:
> On Tue, Dec 08, 2020 at 08:51:45AM +0900, Daeho Jeong wrote:
> > > I am trying to review this but it is very hard, as the f2fs compression code is
> > > very hard to understand.
> > >
> > > It looks like a 'struct decompress_io_ctx' represents the work to decompress a
> > > particular cluster. Since the compressed data of the cluster can be read using
> > > multiple bios, there is a reference count of how many pages are remaining to be
> > > read before all the cluster's pages have been read and decompression can start.
> > >
> > > What I don't understand is why that reference counting needs to work differently
> > > depending on whether verity is enabled or not. Shouldn't it be exactly the
> > > same?
> > >
> > > There also seems to be some confusion about the scope of STEP_VERITY. Before
> > > f2fs compression was added, it was a per-bio thing. But now in a compressed
> > > file, it's really a per-cluster thing, since all decompressed pages in a
> > > compressed cluster are verified (or not verified) at once.
> > >
> > > Wouldn't it make a lot more sense to, when a cluster needs both compression and
> > > verity, *not* set STEP_VERITY on the bios, but rather set a similar flag in the
> > > decompress_io_ctx?
> > >
> >
> > Eric,
> >
> > Decompression and verity can be executed in different thread contexts
> > in different timing, so we need separate counts for each.
> >
> > We already use STEP_VERITY for non-compression case, so I think using
> > this flag in here looks more making sense.
> >
> > Thanks,
>
> That didn't really answer my questions.
>
> I gave up trying to review this patch as the compression post-read handling is
> just way too weird and hard to understand. I wrote a patch to clean it all up
> instead, please take a look:
> https://lkml.kernel.org/r/[email protected]
Eric,
I also tried to review your patch, but it's quite hard to follow quickly and
requires stress tests for a while. Given upcoming merge window and urgency of
the bug, let me apply Daeho's fix first. By any chance, may I ask revisiting
your clean-up on top of the fix in the next cycle?
Thanks,
>
> - Eric
On 2020/12/9 7:55, Jaegeuk Kim wrote:
> On 12/07, Eric Biggers wrote:
>> On Tue, Dec 08, 2020 at 08:51:45AM +0900, Daeho Jeong wrote:
>>>> I am trying to review this but it is very hard, as the f2fs compression code is
>>>> very hard to understand.
>>>>
>>>> It looks like a 'struct decompress_io_ctx' represents the work to decompress a
>>>> particular cluster. Since the compressed data of the cluster can be read using
>>>> multiple bios, there is a reference count of how many pages are remaining to be
>>>> read before all the cluster's pages have been read and decompression can start.
>>>>
>>>> What I don't understand is why that reference counting needs to work differently
>>>> depending on whether verity is enabled or not. Shouldn't it be exactly the
>>>> same?
>>>>
>>>> There also seems to be some confusion about the scope of STEP_VERITY. Before
>>>> f2fs compression was added, it was a per-bio thing. But now in a compressed
>>>> file, it's really a per-cluster thing, since all decompressed pages in a
>>>> compressed cluster are verified (or not verified) at once.
>>>>
>>>> Wouldn't it make a lot more sense to, when a cluster needs both compression and
>>>> verity, *not* set STEP_VERITY on the bios, but rather set a similar flag in the
>>>> decompress_io_ctx?
>>>>
>>>
>>> Eric,
>>>
>>> Decompression and verity can be executed in different thread contexts
>>> in different timing, so we need separate counts for each.
>>>
>>> We already use STEP_VERITY for non-compression case, so I think using
>>> this flag in here looks more making sense.
>>>
>>> Thanks,
>>
>> That didn't really answer my questions.
>>
>> I gave up trying to review this patch as the compression post-read handling is
>> just way too weird and hard to understand. I wrote a patch to clean it all up
>> instead, please take a look:
>> https://lkml.kernel.org/r/[email protected]
>
> Eric,
> I also tried to review your patch, but it's quite hard to follow quickly and
Me too, it needs more time to check whether the cleanup doesn't miss any cases.
Thanks,
> requires stress tests for a while. Given upcoming merge window and urgency of
> the bug, let me apply Daeho's fix first. By any chance, may I ask revisiting
> your clean-up on top of the fix in the next cycle?
>
> Thanks,
>
>>
>> - Eric
>
>
> _______________________________________________
> Linux-f2fs-devel mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
> .
>
On Wed, Dec 09, 2020 at 09:34:06AM +0800, Chao Yu wrote:
> On 2020/12/9 7:55, Jaegeuk Kim wrote:
> > On 12/07, Eric Biggers wrote:
> > > On Tue, Dec 08, 2020 at 08:51:45AM +0900, Daeho Jeong wrote:
> > > > > I am trying to review this but it is very hard, as the f2fs compression code is
> > > > > very hard to understand.
> > > > >
> > > > > It looks like a 'struct decompress_io_ctx' represents the work to decompress a
> > > > > particular cluster. Since the compressed data of the cluster can be read using
> > > > > multiple bios, there is a reference count of how many pages are remaining to be
> > > > > read before all the cluster's pages have been read and decompression can start.
> > > > >
> > > > > What I don't understand is why that reference counting needs to work differently
> > > > > depending on whether verity is enabled or not. Shouldn't it be exactly the
> > > > > same?
> > > > >
> > > > > There also seems to be some confusion about the scope of STEP_VERITY. Before
> > > > > f2fs compression was added, it was a per-bio thing. But now in a compressed
> > > > > file, it's really a per-cluster thing, since all decompressed pages in a
> > > > > compressed cluster are verified (or not verified) at once.
> > > > >
> > > > > Wouldn't it make a lot more sense to, when a cluster needs both compression and
> > > > > verity, *not* set STEP_VERITY on the bios, but rather set a similar flag in the
> > > > > decompress_io_ctx?
> > > > >
> > > >
> > > > Eric,
> > > >
> > > > Decompression and verity can be executed in different thread contexts
> > > > in different timing, so we need separate counts for each.
> > > >
> > > > We already use STEP_VERITY for non-compression case, so I think using
> > > > this flag in here looks more making sense.
> > > >
> > > > Thanks,
> > >
> > > That didn't really answer my questions.
> > >
> > > I gave up trying to review this patch as the compression post-read handling is
> > > just way too weird and hard to understand. I wrote a patch to clean it all up
> > > instead, please take a look:
> > > https://lkml.kernel.org/r/[email protected]
> >
> > Eric,
> > I also tried to review your patch, but it's quite hard to follow quickly and
>
> Me too, it needs more time to check whether the cleanup doesn't miss any cases.
>
> Thanks,
>
> > requires stress tests for a while. Given upcoming merge window and urgency of
> > the bug, let me apply Daeho's fix first. By any chance, may I ask revisiting
> > your clean-up on top of the fix in the next cycle?
> >
> > Thanks,
I'm not in a hurry, please just take a look when you have time.
- Eric