2019-08-19 12:30:56

by Juri Lelli

[permalink] [raw]
Subject: [RT PATCH v2] net/xfrm/xfrm_ipcomp: Protect scratch buffer with local_lock

The following BUG has been reported while running ipsec tests.

BUG: scheduling while atomic: irq/78-eno3-rx-/12023/0x00000002
Modules linked in: ipcomp xfrm_ipcomp ...
Preemption disabled at:
[<ffffffffc0b29730>] ipcomp_input+0xd0/0x9a0 [xfrm_ipcomp]
CPU: 1 PID: 12023 Comm: irq/78-eno3-rx- Kdump: loaded Not tainted [...] #1
Hardware name: [...]
Call Trace:
dump_stack+0x5c/0x80
? ipcomp_input+0xd0/0x9a0 [xfrm_ipcomp]
__schedule_bug.cold.81+0x44/0x51
__schedule+0x5bf/0x6a0
schedule+0x39/0xd0
rt_spin_lock_slowlock_locked+0x10e/0x2b0
rt_spin_lock_slowlock+0x50/0x80
get_page_from_freelist+0x609/0x1560
? zlib_updatewindow+0x5a/0xd0
__alloc_pages_nodemask+0xd9/0x280
ipcomp_input+0x299/0x9a0 [xfrm_ipcomp]
xfrm_input+0x5e3/0x960
xfrm4_ipcomp_rcv+0x34/0x50
ip_local_deliver_finish+0x22d/0x250
ip_local_deliver+0x6d/0x110
? ip_rcv_finish+0xac/0x480
ip_rcv+0x28e/0x3f9
? packet_rcv+0x43/0x4c0
__netif_receive_skb_core+0xb7c/0xd10
? inet_gro_receive+0x8e/0x2f0
netif_receive_skb_internal+0x4a/0x160
napi_gro_receive+0xee/0x110
tg3_rx+0x2a8/0x810 [tg3]
tg3_poll_work+0x3b3/0x830 [tg3]
tg3_poll_msix+0x3b/0x170 [tg3]
net_rx_action+0x1ff/0x470
? __switch_to_asm+0x41/0x70
do_current_softirqs+0x223/0x3e0
? irq_thread_check_affinity+0x20/0x20
__local_bh_enable+0x51/0x60
irq_forced_thread_fn+0x5e/0x80
? irq_finalize_oneshot.part.45+0xf0/0xf0
irq_thread+0x13d/0x1a0
? wake_threads_waitq+0x30/0x30
kthread+0x112/0x130
? kthread_create_worker_on_cpu+0x70/0x70
ret_from_fork+0x35/0x40

The problem resides in the fact that get_cpu(), called from
ipcomp_input() disables preemption, and that triggers the scheduling
while atomic BUG further down the callpath chain of
get_page_from_freelist(), i.e.,

ipcomp_input
ipcomp_decompress
<-- get_cpu()
alloc_page(GFP_ATOMIC)
alloc_pages(GFP_ATOMIC, 0)
alloc_pages_current
__alloc_pages_nodemask
get_page_from_freelist
(try_this_zone:) rmqueue
rmqueue_pcplist
__rmqueue_pcplist
rmqueue_bulk
<-- spin_lock(&zone->lock) - BUG

Fix this by replacing get_cpu() with a local lock to protect
ipcomp_scratches buffers used by ipcomp_(de)compress().

Suggested-by: Sebastian Andrzej Siewior <[email protected]>
Signed-off-by: Juri Lelli <[email protected]>

--
This v2 applies to v4.19.59-rt24.

v1 -> v2: Use a local lock instead of {get,put}_cpu_light(), as the
latter doesn't protect against multiple CPUs invoking (de)compress
function at the same time, thus concurently working on the same scratch
buffer.
---
net/xfrm/xfrm_ipcomp.c | 19 +++++++++++++------
1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c
index a00ec715aa46..3b4a38febf0a 100644
--- a/net/xfrm/xfrm_ipcomp.c
+++ b/net/xfrm/xfrm_ipcomp.c
@@ -18,6 +18,7 @@
#include <linux/crypto.h>
#include <linux/err.h>
#include <linux/list.h>
+#include <linux/locallock.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/percpu.h>
@@ -35,6 +36,7 @@ struct ipcomp_tfms {
};

static DEFINE_MUTEX(ipcomp_resource_mutex);
+static DEFINE_LOCAL_IRQ_LOCK(ipcomp_scratches_lock);
static void * __percpu *ipcomp_scratches;
static int ipcomp_scratch_users;
static LIST_HEAD(ipcomp_tfms_list);
@@ -45,12 +47,14 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb)
const int plen = skb->len;
int dlen = IPCOMP_SCRATCH_SIZE;
const u8 *start = skb->data;
- const int cpu = get_cpu();
- u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu);
- struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu);
- int err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen);
- int len;
+ u8 *scratch;
+ struct crypto_comp *tfm;
+ int err, len;

+ local_lock(ipcomp_scratches_lock);
+ scratch = *this_cpu_ptr(ipcomp_scratches);
+ tfm = *this_cpu_ptr(ipcd->tfms);
+ err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen);
if (err)
goto out;

@@ -103,7 +107,7 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb)
err = 0;

out:
- put_cpu();
+ local_unlock(ipcomp_scratches_lock);
return err;
}

@@ -146,6 +150,7 @@ static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb)
int err;

local_bh_disable();
+ local_lock(ipcomp_scratches_lock);
scratch = *this_cpu_ptr(ipcomp_scratches);
tfm = *this_cpu_ptr(ipcd->tfms);
err = crypto_comp_compress(tfm, start, plen, scratch, &dlen);
@@ -158,12 +163,14 @@ static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb)
}

memcpy(start + sizeof(struct ip_comp_hdr), scratch, dlen);
+ local_unlock(ipcomp_scratches_lock);
local_bh_enable();

pskb_trim(skb, dlen + sizeof(struct ip_comp_hdr));
return 0;

out:
+ local_unlock(ipcomp_scratches_lock);
local_bh_enable();
return err;
}
--
2.17.2


2019-08-19 19:58:44

by Steven Rostedt

[permalink] [raw]
Subject: Re: [RT PATCH v2] net/xfrm/xfrm_ipcomp: Protect scratch buffer with local_lock

On Mon, 19 Aug 2019 14:27:31 +0200
Juri Lelli <[email protected]> wrote:

> The following BUG has been reported while running ipsec tests.

Thanks!

I'm still in the process of backporting patches to fix some bugs that
showed up with the latest merge of upstream stable. I'll add this to
the queue to add.

-- Steve


>
> BUG: scheduling while atomic: irq/78-eno3-rx-/12023/0x00000002
> Modules linked in: ipcomp xfrm_ipcomp ...
> Preemption disabled at:
> [<ffffffffc0b29730>] ipcomp_input+0xd0/0x9a0 [xfrm_ipcomp]
> CPU: 1 PID: 12023 Comm: irq/78-eno3-rx- Kdump: loaded Not tainted [...] #1
> Hardware name: [...]
> Call Trace:
> dump_stack+0x5c/0x80
> ? ipcomp_input+0xd0/0x9a0 [xfrm_ipcomp]
> __schedule_bug.cold.81+0x44/0x51
> __schedule+0x5bf/0x6a0
> schedule+0x39/0xd0
> rt_spin_lock_slowlock_locked+0x10e/0x2b0
> rt_spin_lock_slowlock+0x50/0x80
> get_page_from_freelist+0x609/0x1560
> ? zlib_updatewindow+0x5a/0xd0
> __alloc_pages_nodemask+0xd9/0x280
> ipcomp_input+0x299/0x9a0 [xfrm_ipcomp]
> xfrm_input+0x5e3/0x960
> xfrm4_ipcomp_rcv+0x34/0x50
> ip_local_deliver_finish+0x22d/0x250
> ip_local_deliver+0x6d/0x110
> ? ip_rcv_finish+0xac/0x480
> ip_rcv+0x28e/0x3f9
> ? packet_rcv+0x43/0x4c0
> __netif_receive_skb_core+0xb7c/0xd10
> ? inet_gro_receive+0x8e/0x2f0
> netif_receive_skb_internal+0x4a/0x160
> napi_gro_receive+0xee/0x110
> tg3_rx+0x2a8/0x810 [tg3]
> tg3_poll_work+0x3b3/0x830 [tg3]
> tg3_poll_msix+0x3b/0x170 [tg3]
> net_rx_action+0x1ff/0x470
> ? __switch_to_asm+0x41/0x70
> do_current_softirqs+0x223/0x3e0
> ? irq_thread_check_affinity+0x20/0x20
> __local_bh_enable+0x51/0x60
> irq_forced_thread_fn+0x5e/0x80
> ? irq_finalize_oneshot.part.45+0xf0/0xf0
> irq_thread+0x13d/0x1a0
> ? wake_threads_waitq+0x30/0x30
> kthread+0x112/0x130
> ? kthread_create_worker_on_cpu+0x70/0x70
> ret_from_fork+0x35/0x40
>


2019-08-20 05:37:09

by kernel test robot

[permalink] [raw]
Subject: Re: [RT PATCH v2] net/xfrm/xfrm_ipcomp: Protect scratch buffer with local_lock

Hi Juri,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on linus/master]
[cannot apply to v5.3-rc5 next-20190819]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url: https://github.com/0day-ci/linux/commits/Juri-Lelli/net-xfrm-xfrm_ipcomp-Protect-scratch-buffer-with-local_lock/20190820-113542
config: ia64-allmodconfig (attached as .config)
compiler: ia64-linux-gcc (GCC) 7.4.0
reproduce:
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
GCC_VERSION=7.4.0 make.cross ARCH=ia64

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <[email protected]>

All errors (new ones prefixed by >>):

>> net//xfrm/xfrm_ipcomp.c:17:10: fatal error: linux/locallock.h: No such file or directory
#include <linux/locallock.h>
^~~~~~~~~~~~~~~~~~~
compilation terminated.

vim +17 net//xfrm/xfrm_ipcomp.c

> 17 #include <linux/locallock.h>
18 #include <linux/module.h>
19 #include <linux/mutex.h>
20 #include <linux/percpu.h>
21 #include <linux/slab.h>
22 #include <linux/smp.h>
23 #include <linux/vmalloc.h>
24 #include <net/ip.h>
25 #include <net/ipcomp.h>
26 #include <net/xfrm.h>
27

---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation


Attachments:
(No filename) (1.56 kB)
.config.gz (53.30 kB)
Download all attachments

2019-08-20 06:43:27

by Juri Lelli

[permalink] [raw]
Subject: Re: [RT PATCH v2] net/xfrm/xfrm_ipcomp: Protect scratch buffer with local_lock

Hi,

On 20/08/19 13:35, kbuild test robot wrote:
> Hi Juri,
>
> Thank you for the patch! Yet something to improve:
>
> [auto build test ERROR on linus/master]
> [cannot apply to v5.3-rc5 next-20190819]
> [if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

This seems to be indeed the case, as this patch is for RT v4.19-rt
stable tree:

git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-stable-rt.git v4.19-rt

I was under the impression that putting "RT" on the subject line (before
PATCH) would prevent build bot to pick this up, but maybe something
else/different is needed?

Thanks,

Juri

2019-08-20 06:45:36

by Juri Lelli

[permalink] [raw]
Subject: Re: [RT PATCH v2] net/xfrm/xfrm_ipcomp: Protect scratch buffer with local_lock

On 19/08/19 15:57, Steven Rostedt wrote:
> On Mon, 19 Aug 2019 14:27:31 +0200
> Juri Lelli <[email protected]> wrote:
>
> > The following BUG has been reported while running ipsec tests.
>
> Thanks!
>
> I'm still in the process of backporting patches to fix some bugs that
> showed up with the latest merge of upstream stable. I'll add this to
> the queue to add.

Great, thank you!

Best,

Juri

Subject: Re: [RT PATCH v2] net/xfrm/xfrm_ipcomp: Protect scratch buffer with local_lock

On 2019-08-19 14:27:31 [+0200], Juri Lelli wrote:
> This v2 applies to v4.19.59-rt24.

Looks good, I suggest to apply this to v4.19 and earlier.

For v5.2 and later (including upstream) please send a patch to simply
replace get_cpu() with smp_processor_id(). The reason is that get_cpu()
will not disable BH and according to the call path this function is
invoked in NAPI callback and the other sides does local_bh_disable().
Therefore the caller of this must ensure that BH is disabled and
disabling preemption is not enough.

Sebastian

2019-08-21 02:18:29

by Philip Li

[permalink] [raw]
Subject: RE: [RT PATCH v2] net/xfrm/xfrm_ipcomp: Protect scratch buffer with local_lock

> Subject: Re: [RT PATCH v2] net/xfrm/xfrm_ipcomp: Protect scratch buffer with
> local_lock
>
> Hi,
>
> On 20/08/19 13:35, kbuild test robot wrote:
> > Hi Juri,
> >
> > Thank you for the patch! Yet something to improve:
> >
> > [auto build test ERROR on linus/master]
> > [cannot apply to v5.3-rc5 next-20190819]
> > [if your patch is applied to the wrong git tree, please drop us a note to help
> improve the system]
>
> This seems to be indeed the case, as this patch is for RT v4.19-rt
> stable tree:
>
> git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-stable-rt.git v4.19-rt
>
> I was under the impression that putting "RT" on the subject line (before
> PATCH) would prevent build bot to pick this up, but maybe something
> else/different is needed?
Hi Juri, currently if the mail subject has RFC, we will test it but send report
privately to author only.

>
> Thanks,
>
> Juri

2019-08-21 06:45:26

by Juri Lelli

[permalink] [raw]
Subject: Re: [RT PATCH v2] net/xfrm/xfrm_ipcomp: Protect scratch buffer with local_lock

On 21/08/19 01:43, Li, Philip wrote:
> > Subject: Re: [RT PATCH v2] net/xfrm/xfrm_ipcomp: Protect scratch buffer with
> > local_lock
> >
> > Hi,
> >
> > On 20/08/19 13:35, kbuild test robot wrote:
> > > Hi Juri,
> > >
> > > Thank you for the patch! Yet something to improve:
> > >
> > > [auto build test ERROR on linus/master]
> > > [cannot apply to v5.3-rc5 next-20190819]
> > > [if your patch is applied to the wrong git tree, please drop us a note to help
> > improve the system]
> >
> > This seems to be indeed the case, as this patch is for RT v4.19-rt
> > stable tree:
> >
> > git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-stable-rt.git v4.19-rt
> >
> > I was under the impression that putting "RT" on the subject line (before
> > PATCH) would prevent build bot to pick this up, but maybe something
> > else/different is needed?
> Hi Juri, currently if the mail subject has RFC, we will test it but send report
> privately to author only.

OK. But, my email had "RT" and not "RFC" in the subject (since it is
meant for one of the PREEMPT_RT stable trees [1]).

Best,

Juri

1 - git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-stable-rt.git v4.19-rt

Subject: Re: [RT PATCH v2] net/xfrm/xfrm_ipcomp: Protect scratch buffer with local_lock

On 2019-08-21 08:44:22 [+0200], Juri Lelli wrote:
> > Hi Juri, currently if the mail subject has RFC, we will test it but send report
> > privately to author only.
>
> OK. But, my email had "RT" and not "RFC" in the subject (since it is
> meant for one of the PREEMPT_RT stable trees [1]).

Was the RT tag consider at all at some point? I remember I asked for it
and then the bot did not complain again or I don't remember. At the same
point my rt-devel tree was added to the trees-to-be-tested.

> Best,
>
> Juri
>
> 1 - git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-stable-rt.git v4.19-rt

Sebastian

2019-09-04 06:05:12

by Joerg Vehlow

[permalink] [raw]
Subject: Re: [RT PATCH v2] net/xfrm/xfrm_ipcomp: Protect scratch buffer with local_lock

I just happen to be analyzing an error in the kernel if ipcomp is used with rt
patches. The reason was the meissing lock around the scratch buffer for the
compress call. Now that I have applied Juris fix, I get another error:

[ 139.717259] BUG: unable to handle kernel NULL pointer dereference at 0000000000000518
[ 139.717260] PGD 0 P4D 0
[ 139.717262] Oops: 0000 [#1] PREEMPT SMP PTI
[ 139.717273] CPU: 2 PID: 11987 Comm: netstress Not tainted 4.19.59-rt24-preemt-rt #1
[ 139.717274] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-0-ga698c8995f-prebuilt.qemu.org 04/01/2014
[ 139.717306] RIP: 0010:xfrm_trans_reinject+0x97/0xd0
[ 139.717307] Code: 42 eb 45 83 6d b0 01 31 f6 48 8b 42 08 48 c7 42 08 00 00 00 00 48 8b 0a 48 c7 02 00 00 00 00 48 89 41 08 48 89 08 48 8b 42 10 <48> 8b b8 18 05 00 00 48 8b 42 40 e8 d9 e1 4b 00 48 8b 55 a0 48 39
[ 139.717307] RSP: 0018:ffffc900007b37e8 EFLAGS: 00010246
[ 139.717308] RAX: 0000000000000000 RBX: ffffc900007b37e8 RCX: ffff88807db206a8
[ 139.717309] RDX: ffff88807db206a8 RSI: 0000000000000000 RDI: 0000000000000000
[ 139.717309] RBP: ffffc900007b3848 R08: 0000000000000001 R09: ffffc900007b35c8
[ 139.717309] R10: ffffea0001dcfc00 R11: 00000000000890c4 R12: ffff88807db20680
[ 139.717310] R13: 00000000000f4240 R14: 0000000000000000 R15: 0000000000000000
[ 139.717310] FS: 00007f4643034700(0000) GS:ffff88807db00000(0000) knlGS:0000000000000000
[ 139.717311] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 139.717337] CR2: 0000000000000518 CR3: 00000000769c6000 CR4: 00000000000006e0
[ 139.717350] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 139.717350] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 139.717350] Call Trace:
[ 139.717387] tasklet_action_common.isra.18+0x6d/0xd0
[ 139.717388] tasklet_action+0x1d/0x20
[ 139.717389] do_current_softirqs+0x196/0x360
[ 139.717390] __local_bh_enable+0x51/0x60
[ 139.717397] ip_finish_output2+0x18b/0x3f0
[ 139.717408] ? task_rq_lock+0x53/0xe0
[ 139.717415] ip_finish_output+0xbe/0x1b0
[ 139.717416] ip_output+0x72/0x100
[ 139.717422] ? ipcomp_output+0x5e/0x280
[ 139.717424] xfrm_output_resume+0x4b5/0x540
[ 139.717436] ? refcount_dec_and_test_checked+0x11/0x20
[ 139.717443] ? kfree_skbmem+0x33/0x80
[ 139.717444] xfrm_output+0xd7/0x110
[ 139.717451] xfrm4_output_finish+0x2b/0x30
[ 139.717452] __xfrm4_output+0x3a/0x50
[ 139.717453] xfrm4_output+0x40/0xe0
[ 139.717454] ? xfrm_dst_check+0x174/0x250
[ 139.717455] ? xfrm4_output+0x40/0xe0
[ 139.717456] ? xfrm_dst_check+0x174/0x250
[ 139.717457] ip_local_out+0x3b/0x50
[ 139.717458] __ip_queue_xmit+0x16b/0x420
[ 139.717464] ip_queue_xmit+0x10/0x20
[ 139.717466] __tcp_transmit_skb+0x566/0xad0
[ 139.717467] tcp_write_xmit+0x3a4/0x1050
[ 139.717468] __tcp_push_pending_frames+0x35/0xe0
[ 139.717469] tcp_push+0xdb/0x100
[ 139.717469] tcp_sendmsg_locked+0x491/0xd70
[ 139.717470] tcp_sendmsg+0x2c/0x50
[ 139.717476] inet_sendmsg+0x3e/0xf0
[ 139.717483] sock_sendmsg+0x3e/0x50
[ 139.717484] __sys_sendto+0x114/0x1a0
[ 139.717491] ? __rt_mutex_unlock+0xe/0x10
[ 139.717492] ? _mutex_unlock+0xe/0x10
[ 139.717500] ? ksys_write+0xc5/0xe0
[ 139.717501] __x64_sys_sendto+0x28/0x30
[ 139.717503] do_syscall_64+0x4d/0x110
[ 139.717504] entry_SYSCALL_64_after_hwframe+0x44/0xa9

Did I find some other bug here? Can you maybe point me in a direction,
because I am quite lost now where to look.


Apart from that:
Also is the bh_disable/bh_enable in ipcomp_compress even required, if
a lock is used?

Joerg