2023-07-07 11:05:19

by Niklas Schnelle

[permalink] [raw]
Subject: [PATCH net v2 1/3] s390/ism: Fix locking for forwarding of IRQs and events to clients

The clients array references all registered clients and is protected by
the clients_lock. Besides its use as general list of clients the clients
array is accessed in ism_handle_irq() to forward ISM device events to
clients.

While the clients_lock is taken in the IRQ handler when calling
handle_event() it is however incorrectly not held during the
client->handle_irq() call and for the preceding clients[] access leaving
it unprotected against concurrent client (un-)registration.

Furthermore the accesses to ism->sba_client_arr[] in ism_register_dmb()
and ism_unregister_dmb() are not protected by any lock. This is
especially problematic as the client ID from the ism->sba_client_arr[]
is not checked against NO_CLIENT and neither is the client pointer
checked.

Instead of expanding the use of the clients_lock further add a separate
array in struct ism_dev which references clients subscribed to the
device's events and IRQs. This array is protected by ism->lock which is
already taken in ism_handle_irq() and can be taken outside the IRQ
handler when adding/removing subscribers or the accessing
ism->sba_client_arr[]. This also means that the clients_lock is no
longer taken in IRQ context.

Fixes: 89e7d2ba61b7 ("net/ism: Add new API for client registration")
Signed-off-by: Niklas Schnelle <[email protected]>
---
drivers/s390/net/ism_drv.c | 44 +++++++++++++++++++++++++++++++-------
include/linux/ism.h | 1 +
2 files changed, 37 insertions(+), 8 deletions(-)

diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c
index 9b5fccdbc7d6..b664e4a08645 100644
--- a/drivers/s390/net/ism_drv.c
+++ b/drivers/s390/net/ism_drv.c
@@ -47,6 +47,15 @@ static struct ism_dev_list ism_dev_list = {
.mutex = __MUTEX_INITIALIZER(ism_dev_list.mutex),
};

+static void ism_setup_forwarding(struct ism_client *client, struct ism_dev *ism)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&ism->lock, flags);
+ ism->subs[client->id] = client;
+ spin_unlock_irqrestore(&ism->lock, flags);
+}
+
int ism_register_client(struct ism_client *client)
{
struct ism_dev *ism;
@@ -71,6 +80,7 @@ int ism_register_client(struct ism_client *client)
list_for_each_entry(ism, &ism_dev_list.list, list) {
ism->priv[i] = NULL;
client->add(ism);
+ ism_setup_forwarding(client, ism);
}
}
mutex_unlock(&ism_dev_list.mutex);
@@ -92,6 +102,9 @@ int ism_unregister_client(struct ism_client *client)
max_client--;
spin_unlock_irqrestore(&clients_lock, flags);
list_for_each_entry(ism, &ism_dev_list.list, list) {
+ spin_lock_irqsave(&ism->lock, flags);
+ /* Stop forwarding IRQs and events */
+ ism->subs[client->id] = NULL;
for (int i = 0; i < ISM_NR_DMBS; ++i) {
if (ism->sba_client_arr[i] == client->id) {
pr_err("%s: attempt to unregister client '%s'"
@@ -101,6 +114,7 @@ int ism_unregister_client(struct ism_client *client)
goto out;
}
}
+ spin_unlock_irqrestore(&ism->lock, flags);
}
out:
mutex_unlock(&ism_dev_list.mutex);
@@ -328,6 +342,7 @@ int ism_register_dmb(struct ism_dev *ism, struct ism_dmb *dmb,
struct ism_client *client)
{
union ism_reg_dmb cmd;
+ unsigned long flags;
int ret;

ret = ism_alloc_dmb(ism, dmb);
@@ -351,7 +366,9 @@ int ism_register_dmb(struct ism_dev *ism, struct ism_dmb *dmb,
goto out;
}
dmb->dmb_tok = cmd.response.dmb_tok;
+ spin_lock_irqsave(&ism->lock, flags);
ism->sba_client_arr[dmb->sba_idx - ISM_DMB_BIT_OFFSET] = client->id;
+ spin_unlock_irqrestore(&ism->lock, flags);
out:
return ret;
}
@@ -360,6 +377,7 @@ EXPORT_SYMBOL_GPL(ism_register_dmb);
int ism_unregister_dmb(struct ism_dev *ism, struct ism_dmb *dmb)
{
union ism_unreg_dmb cmd;
+ unsigned long flags;
int ret;

memset(&cmd, 0, sizeof(cmd));
@@ -368,7 +386,9 @@ int ism_unregister_dmb(struct ism_dev *ism, struct ism_dmb *dmb)

cmd.request.dmb_tok = dmb->dmb_tok;

+ spin_lock_irqsave(&ism->lock, flags);
ism->sba_client_arr[dmb->sba_idx - ISM_DMB_BIT_OFFSET] = NO_CLIENT;
+ spin_unlock_irqrestore(&ism->lock, flags);

ret = ism_cmd(ism, &cmd);
if (ret && ret != ISM_ERROR)
@@ -491,6 +511,7 @@ static u16 ism_get_chid(struct ism_dev *ism)
static void ism_handle_event(struct ism_dev *ism)
{
struct ism_event *entry;
+ struct ism_client *clt;
int i;

while ((ism->ieq_idx + 1) != READ_ONCE(ism->ieq->header.idx)) {
@@ -499,21 +520,21 @@ static void ism_handle_event(struct ism_dev *ism)

entry = &ism->ieq->entry[ism->ieq_idx];
debug_event(ism_debug_info, 2, entry, sizeof(*entry));
- spin_lock(&clients_lock);
- for (i = 0; i < max_client; ++i)
- if (clients[i])
- clients[i]->handle_event(ism, entry);
- spin_unlock(&clients_lock);
+ for (i = 0; i < max_client; ++i) {
+ clt = ism->subs[i];
+ if (clt)
+ clt->handle_event(ism, entry);
+ }
}
}

static irqreturn_t ism_handle_irq(int irq, void *data)
{
struct ism_dev *ism = data;
- struct ism_client *clt;
unsigned long bit, end;
unsigned long *bv;
u16 dmbemask;
+ u8 client_id;

bv = (void *) &ism->sba->dmb_bits[ISM_DMB_WORD_OFFSET];
end = sizeof(ism->sba->dmb_bits) * BITS_PER_BYTE - ISM_DMB_BIT_OFFSET;
@@ -530,8 +551,10 @@ static irqreturn_t ism_handle_irq(int irq, void *data)
dmbemask = ism->sba->dmbe_mask[bit + ISM_DMB_BIT_OFFSET];
ism->sba->dmbe_mask[bit + ISM_DMB_BIT_OFFSET] = 0;
barrier();
- clt = clients[ism->sba_client_arr[bit]];
- clt->handle_irq(ism, bit + ISM_DMB_BIT_OFFSET, dmbemask);
+ client_id = ism->sba_client_arr[bit];
+ if (unlikely(client_id == NO_CLIENT || !ism->subs[client_id]))
+ continue;
+ ism->subs[client_id]->handle_irq(ism, bit + ISM_DMB_BIT_OFFSET, dmbemask);
}

if (ism->sba->e) {
@@ -554,6 +577,7 @@ static void ism_dev_add_work_func(struct work_struct *work)
add_work);

client->add(client->tgt_ism);
+ ism_setup_forwarding(client, client->tgt_ism);
atomic_dec(&client->tgt_ism->add_dev_cnt);
wake_up(&client->tgt_ism->waitq);
}
@@ -691,7 +715,11 @@ static void ism_dev_remove_work_func(struct work_struct *work)
{
struct ism_client *client = container_of(work, struct ism_client,
remove_work);
+ unsigned long flags;

+ spin_lock_irqsave(&client->tgt_ism->lock, flags);
+ client->tgt_ism->subs[client->id] = NULL;
+ spin_unlock_irqrestore(&client->tgt_ism->lock, flags);
client->remove(client->tgt_ism);
atomic_dec(&client->tgt_ism->free_clients_cnt);
wake_up(&client->tgt_ism->waitq);
diff --git a/include/linux/ism.h b/include/linux/ism.h
index ea2bcdae7401..5160d47e5ea9 100644
--- a/include/linux/ism.h
+++ b/include/linux/ism.h
@@ -44,6 +44,7 @@ struct ism_dev {
u64 local_gid;
int ieq_idx;

+ struct ism_client *subs[MAX_CLIENTS];
atomic_t free_clients_cnt;
atomic_t add_dev_cnt;
wait_queue_head_t waitq;
--
2.39.2



2023-07-07 11:25:07

by Niklas Schnelle

[permalink] [raw]
Subject: Re: [PATCH net v2 1/3] s390/ism: Fix locking for forwarding of IRQs and events to clients

On Fri, 2023-07-07 at 12:43 +0200, Niklas Schnelle wrote:
> The clients array references all registered clients and is protected by
> the clients_lock. Besides its use as general list of clients the clients
> array is accessed in ism_handle_irq() to forward ISM device events to
> clients.
>
> While the clients_lock is taken in the IRQ handler when calling
> handle_event() it is however incorrectly not held during the
> client->handle_irq() call and for the preceding clients[] access leaving
> it unprotected against concurrent client (un-)registration.
>
> Furthermore the accesses to ism->sba_client_arr[] in ism_register_dmb()
> and ism_unregister_dmb() are not protected by any lock. This is
> especially problematic as the client ID from the ism->sba_client_arr[]
> is not checked against NO_CLIENT and neither is the client pointer
> checked.
>
> Instead of expanding the use of the clients_lock further add a separate
> array in struct ism_dev which references clients subscribed to the
> device's events and IRQs. This array is protected by ism->lock which is
> already taken in ism_handle_irq() and can be taken outside the IRQ
> handler when adding/removing subscribers or the accessing
> ism->sba_client_arr[]. This also means that the clients_lock is no
> longer taken in IRQ context.
>
> Fixes: 89e7d2ba61b7 ("net/ism: Add new API for client registration")
> Signed-off-by: Niklas Schnelle <[email protected]>
> ---

Sorry for the mess. My get_maintainers.pl to-cmd setup in git send-
email stumbled over the cover letter and so the cover letter was not
sent to the right people/lists messing up the "in-reply-to" references.
So I had to resend it.

Thanks
Niklas

2023-07-08 14:07:48

by Simon Horman

[permalink] [raw]
Subject: Re: [PATCH net v2 1/3] s390/ism: Fix locking for forwarding of IRQs and events to clients

On Fri, Jul 07, 2023 at 12:43:57PM +0200, Niklas Schnelle wrote:
> The clients array references all registered clients and is protected by
> the clients_lock. Besides its use as general list of clients the clients
> array is accessed in ism_handle_irq() to forward ISM device events to
> clients.
>
> While the clients_lock is taken in the IRQ handler when calling
> handle_event() it is however incorrectly not held during the
> client->handle_irq() call and for the preceding clients[] access leaving
> it unprotected against concurrent client (un-)registration.
>
> Furthermore the accesses to ism->sba_client_arr[] in ism_register_dmb()
> and ism_unregister_dmb() are not protected by any lock. This is
> especially problematic as the client ID from the ism->sba_client_arr[]
> is not checked against NO_CLIENT and neither is the client pointer
> checked.
>
> Instead of expanding the use of the clients_lock further add a separate
> array in struct ism_dev which references clients subscribed to the
> device's events and IRQs. This array is protected by ism->lock which is
> already taken in ism_handle_irq() and can be taken outside the IRQ
> handler when adding/removing subscribers or the accessing
> ism->sba_client_arr[]. This also means that the clients_lock is no
> longer taken in IRQ context.
>
> Fixes: 89e7d2ba61b7 ("net/ism: Add new API for client registration")
> Signed-off-by: Niklas Schnelle <[email protected]>

...

> @@ -71,6 +80,7 @@ int ism_register_client(struct ism_client *client)
> list_for_each_entry(ism, &ism_dev_list.list, list) {
> ism->priv[i] = NULL;
> client->add(ism);
> + ism_setup_forwarding(client, ism);
> }
> }
> mutex_unlock(&ism_dev_list.mutex);

...

> @@ -92,6 +102,9 @@ int ism_unregister_client(struct ism_client *client)
> max_client--;
> spin_unlock_irqrestore(&clients_lock, flags);
> list_for_each_entry(ism, &ism_dev_list.list, list) {
> + spin_lock_irqsave(&ism->lock, flags);

Hi Niklas,

The lock is taken here.

> + /* Stop forwarding IRQs and events */
> + ism->subs[client->id] = NULL;
> for (int i = 0; i < ISM_NR_DMBS; ++i) {
> if (ism->sba_client_arr[i] == client->id) {
> pr_err("%s: attempt to unregister client '%s'"
> @@ -101,6 +114,7 @@ int ism_unregister_client(struct ism_client *client)
> goto out;

But it does not appear to be released
(by the call to spin_unlock_irqrestore() below)
if goto out is called here.

> }
> }
> + spin_unlock_irqrestore(&ism->lock, flags);
> }
> out:
> mutex_unlock(&ism_dev_list.mutex);

2023-07-10 07:00:32

by Alexandra Winter

[permalink] [raw]
Subject: Re: [PATCH net v2 1/3] s390/ism: Fix locking for forwarding of IRQs and events to clients



On 08.07.23 15:36, Simon Horman wrote:
> On Fri, Jul 07, 2023 at 12:43:57PM +0200, Niklas Schnelle wrote:
[...]
>> @@ -92,6 +102,9 @@ int ism_unregister_client(struct ism_client *client)
>> max_client--;
>> spin_unlock_irqrestore(&clients_lock, flags);
>> list_for_each_entry(ism, &ism_dev_list.list, list) {
>> + spin_lock_irqsave(&ism->lock, flags);
>
> Hi Niklas,
>
> The lock is taken here.
>
>> + /* Stop forwarding IRQs and events */
>> + ism->subs[client->id] = NULL;
>> for (int i = 0; i < ISM_NR_DMBS; ++i) {
>> if (ism->sba_client_arr[i] == client->id) {
>> pr_err("%s: attempt to unregister client '%s'"
>> @@ -101,6 +114,7 @@ int ism_unregister_client(struct ism_client *client)
>> goto out;
>
> But it does not appear to be released
> (by the call to spin_unlock_irqrestore() below)
> if goto out is called here.
>
>> }
>> }
>> + spin_unlock_irqrestore(&ism->lock, flags);
>> }
>> out:
>> mutex_unlock(&ism_dev_list.mutex);
>

Great catch, Simon.
@Niklas, the missing unlock accidentially got moved to "[PATCH net v2 3/3] s390/ism: Do not unregister clients with registered DMBs"

2023-07-10 07:43:35

by Niklas Schnelle

[permalink] [raw]
Subject: Re: [PATCH net v2 1/3] s390/ism: Fix locking for forwarding of IRQs and events to clients

On Sat, 2023-07-08 at 14:36 +0100, Simon Horman wrote:
> On Fri, Jul 07, 2023 at 12:43:57PM +0200, Niklas Schnelle wrote:
> > The clients array references all registered clients and is protected by
> > the clients_lock. Besides its use as general list of clients the clients
> > array is accessed in ism_handle_irq() to forward ISM device events to
> > clients.
> >
> > While the clients_lock is taken in the IRQ handler when calling
> > handle_event() it is however incorrectly not held during the
> > client->handle_irq() call and for the preceding clients[] access leaving
> > it unprotected against concurrent client (un-)registration.
> >
> > Furthermore the accesses to ism->sba_client_arr[] in ism_register_dmb()
> > and ism_unregister_dmb() are not protected by any lock. This is
> > especially problematic as the client ID from the ism->sba_client_arr[]
> > is not checked against NO_CLIENT and neither is the client pointer
> > checked.
> >
> > Instead of expanding the use of the clients_lock further add a separate
> > array in struct ism_dev which references clients subscribed to the
> > device's events and IRQs. This array is protected by ism->lock which is
> > already taken in ism_handle_irq() and can be taken outside the IRQ
> > handler when adding/removing subscribers or the accessing
> > ism->sba_client_arr[]. This also means that the clients_lock is no
> > longer taken in IRQ context.
> >
> > Fixes: 89e7d2ba61b7 ("net/ism: Add new API for client registration")
> > Signed-off-by: Niklas Schnelle <[email protected]>
>
> ...
>
> > @@ -71,6 +80,7 @@ int ism_register_client(struct ism_client *client)
> > list_for_each_entry(ism, &ism_dev_list.list, list) {
> > ism->priv[i] = NULL;
> > client->add(ism);
> > + ism_setup_forwarding(client, ism);
> > }
> > }
> > mutex_unlock(&ism_dev_list.mutex);
>
> ...
>
> > @@ -92,6 +102,9 @@ int ism_unregister_client(struct ism_client *client)
> > max_client--;
> > spin_unlock_irqrestore(&clients_lock, flags);
> > list_for_each_entry(ism, &ism_dev_list.list, list) {
> > + spin_lock_irqsave(&ism->lock, flags);
>
> Hi Niklas,
>
> The lock is taken here.
>
> > + /* Stop forwarding IRQs and events */
> > + ism->subs[client->id] = NULL;
> > for (int i = 0; i < ISM_NR_DMBS; ++i) {
> > if (ism->sba_client_arr[i] == client->id) {
> > pr_err("%s: attempt to unregister client '%s'"
> > @@ -101,6 +114,7 @@ int ism_unregister_client(struct ism_client *client)
> > goto out;
>
> But it does not appear to be released
> (by the call to spin_unlock_irqrestore() below)
> if goto out is called here.

Good catch. Yes I screwed this up while splitting the patch up. The
spin_unlock_irqrestore() is there after patch 3 but should have been
added in patch 1. As far as I can see all 3 patches have already been
applied, otherwise I'd send a v3. Thankfully even in the in between
state this error case can really onlt happen due to driver bugs so
maybe it's okay?

>
> > }
> > }
> > + spin_unlock_irqrestore(&ism->lock, flags);
> > }
> > out:
> > mutex_unlock(&ism_dev_list.mutex);


2023-07-10 07:58:28

by Simon Horman

[permalink] [raw]
Subject: Re: [PATCH net v2 1/3] s390/ism: Fix locking for forwarding of IRQs and events to clients

On Mon, Jul 10, 2023 at 09:28:20AM +0200, Niklas Schnelle wrote:
> On Sat, 2023-07-08 at 14:36 +0100, Simon Horman wrote:
> > On Fri, Jul 07, 2023 at 12:43:57PM +0200, Niklas Schnelle wrote:
> > > The clients array references all registered clients and is protected by
> > > the clients_lock. Besides its use as general list of clients the clients
> > > array is accessed in ism_handle_irq() to forward ISM device events to
> > > clients.
> > >
> > > While the clients_lock is taken in the IRQ handler when calling
> > > handle_event() it is however incorrectly not held during the
> > > client->handle_irq() call and for the preceding clients[] access leaving
> > > it unprotected against concurrent client (un-)registration.
> > >
> > > Furthermore the accesses to ism->sba_client_arr[] in ism_register_dmb()
> > > and ism_unregister_dmb() are not protected by any lock. This is
> > > especially problematic as the client ID from the ism->sba_client_arr[]
> > > is not checked against NO_CLIENT and neither is the client pointer
> > > checked.
> > >
> > > Instead of expanding the use of the clients_lock further add a separate
> > > array in struct ism_dev which references clients subscribed to the
> > > device's events and IRQs. This array is protected by ism->lock which is
> > > already taken in ism_handle_irq() and can be taken outside the IRQ
> > > handler when adding/removing subscribers or the accessing
> > > ism->sba_client_arr[]. This also means that the clients_lock is no
> > > longer taken in IRQ context.
> > >
> > > Fixes: 89e7d2ba61b7 ("net/ism: Add new API for client registration")
> > > Signed-off-by: Niklas Schnelle <[email protected]>
> >
> > ...
> >
> > > @@ -71,6 +80,7 @@ int ism_register_client(struct ism_client *client)
> > > list_for_each_entry(ism, &ism_dev_list.list, list) {
> > > ism->priv[i] = NULL;
> > > client->add(ism);
> > > + ism_setup_forwarding(client, ism);
> > > }
> > > }
> > > mutex_unlock(&ism_dev_list.mutex);
> >
> > ...
> >
> > > @@ -92,6 +102,9 @@ int ism_unregister_client(struct ism_client *client)
> > > max_client--;
> > > spin_unlock_irqrestore(&clients_lock, flags);
> > > list_for_each_entry(ism, &ism_dev_list.list, list) {
> > > + spin_lock_irqsave(&ism->lock, flags);
> >
> > Hi Niklas,
> >
> > The lock is taken here.
> >
> > > + /* Stop forwarding IRQs and events */
> > > + ism->subs[client->id] = NULL;
> > > for (int i = 0; i < ISM_NR_DMBS; ++i) {
> > > if (ism->sba_client_arr[i] == client->id) {
> > > pr_err("%s: attempt to unregister client '%s'"
> > > @@ -101,6 +114,7 @@ int ism_unregister_client(struct ism_client *client)
> > > goto out;
> >
> > But it does not appear to be released
> > (by the call to spin_unlock_irqrestore() below)
> > if goto out is called here.
>
> Good catch. Yes I screwed this up while splitting the patch up. The
> spin_unlock_irqrestore() is there after patch 3 but should have been
> added in patch 1. As far as I can see all 3 patches have already been
> applied, otherwise I'd send a v3. Thankfully even in the in between
> state this error case can really onlt happen due to driver bugs so
> maybe it's okay?

Hi Niklas,

I also saw the patches have been accepted after I sent my previous email.
So, given that the problem is resolved by another patch in the series,
I think the situation is as good as it is going to get.