2020-03-18 20:47:59

by Thomas Gleixner

[permalink] [raw]
Subject: [patch V2 02/15] pci/switchtec: Replace completion wait queue usage for poll

From: Sebastian Andrzej Siewior <[email protected]>

The poll callback is using the completion wait queue and sticks it into
poll_wait() to wake up pollers after a command has completed.

This works to some extent, but cannot provide EPOLLEXCLUSIVE support
because the waker side uses complete_all() which unconditionally wakes up
all waiters. complete_all() is required because completions internally use
exclusive wait and complete() only wakes up one waiter by default.

This mixes conceptually different mechanisms and relies on internal
implementation details of completions, which in turn puts contraints on
changing the internal implementation of completions.

Replace it with a regular wait queue and store the state in struct
switchtec_user.

Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
Acked-by: Peter Zijlstra (Intel) <[email protected]>
Cc: Kurt Schwemmer <[email protected]>
Cc: Logan Gunthorpe <[email protected]>
Cc: Bjorn Helgaas <[email protected]>
Cc: [email protected]
---
V2: Reworded changelog.
---
drivers/pci/switch/switchtec.c | 22 +++++++++++++---------
1 file changed, 13 insertions(+), 9 deletions(-)

--- a/drivers/pci/switch/switchtec.c
+++ b/drivers/pci/switch/switchtec.c
@@ -52,10 +52,11 @@ struct switchtec_user {

enum mrpc_state state;

- struct completion comp;
+ wait_queue_head_t cmd_comp;
struct kref kref;
struct list_head list;

+ bool cmd_done;
u32 cmd;
u32 status;
u32 return_code;
@@ -77,7 +78,7 @@ static struct switchtec_user *stuser_cre
stuser->stdev = stdev;
kref_init(&stuser->kref);
INIT_LIST_HEAD(&stuser->list);
- init_completion(&stuser->comp);
+ init_waitqueue_head(&stuser->cmd_comp);
stuser->event_cnt = atomic_read(&stdev->event_cnt);

dev_dbg(&stdev->dev, "%s: %p\n", __func__, stuser);
@@ -175,7 +176,7 @@ static int mrpc_queue_cmd(struct switcht
kref_get(&stuser->kref);
stuser->read_len = sizeof(stuser->data);
stuser_set_state(stuser, MRPC_QUEUED);
- reinit_completion(&stuser->comp);
+ stuser->cmd_done = false;
list_add_tail(&stuser->list, &stdev->mrpc_queue);

mrpc_cmd_submit(stdev);
@@ -222,7 +223,8 @@ static void mrpc_complete_cmd(struct swi
memcpy_fromio(stuser->data, &stdev->mmio_mrpc->output_data,
stuser->read_len);
out:
- complete_all(&stuser->comp);
+ stuser->cmd_done = true;
+ wake_up_interruptible(&stuser->cmd_comp);
list_del_init(&stuser->list);
stuser_put(stuser);
stdev->mrpc_busy = 0;
@@ -529,10 +531,11 @@ static ssize_t switchtec_dev_read(struct
mutex_unlock(&stdev->mrpc_mutex);

if (filp->f_flags & O_NONBLOCK) {
- if (!try_wait_for_completion(&stuser->comp))
+ if (!stuser->cmd_done)
return -EAGAIN;
} else {
- rc = wait_for_completion_interruptible(&stuser->comp);
+ rc = wait_event_interruptible(stuser->cmd_comp,
+ stuser->cmd_done);
if (rc < 0)
return rc;
}
@@ -580,7 +583,7 @@ static __poll_t switchtec_dev_poll(struc
struct switchtec_dev *stdev = stuser->stdev;
__poll_t ret = 0;

- poll_wait(filp, &stuser->comp.wait, wait);
+ poll_wait(filp, &stuser->cmd_comp, wait);
poll_wait(filp, &stdev->event_wq, wait);

if (lock_mutex_and_test_alive(stdev))
@@ -588,7 +591,7 @@ static __poll_t switchtec_dev_poll(struc

mutex_unlock(&stdev->mrpc_mutex);

- if (try_wait_for_completion(&stuser->comp))
+ if (stuser->cmd_done)
ret |= EPOLLIN | EPOLLRDNORM;

if (stuser->event_cnt != atomic_read(&stdev->event_cnt))
@@ -1272,7 +1275,8 @@ static void stdev_kill(struct switchtec_

/* Wake up and kill any users waiting on an MRPC request */
list_for_each_entry_safe(stuser, tmpuser, &stdev->mrpc_queue, list) {
- complete_all(&stuser->comp);
+ stuser->cmd_done = true;
+ wake_up_interruptible(&stuser->cmd_comp);
list_del_init(&stuser->list);
stuser_put(stuser);
}


2020-03-18 22:13:15

by Logan Gunthorpe

[permalink] [raw]
Subject: Re: [patch V2 02/15] pci/switchtec: Replace completion wait queue usage for poll



On 2020-03-18 2:43 p.m., Thomas Gleixner wrote:
> From: Sebastian Andrzej Siewior <[email protected]>
>
> The poll callback is using the completion wait queue and sticks it into
> poll_wait() to wake up pollers after a command has completed.
>
> This works to some extent, but cannot provide EPOLLEXCLUSIVE support
> because the waker side uses complete_all() which unconditionally wakes up
> all waiters. complete_all() is required because completions internally use
> exclusive wait and complete() only wakes up one waiter by default.
>
> This mixes conceptually different mechanisms and relies on internal
> implementation details of completions, which in turn puts contraints on
> changing the internal implementation of completions.
>
> Replace it with a regular wait queue and store the state in struct
> switchtec_user.
>
> Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
> Acked-by: Peter Zijlstra (Intel) <[email protected]>

While I've been against open coding the completion in this driver for a
while, I'm convinced by the EPOLLEXCLUSIVE argument for this change.
I've reviewed and lightly tested the change with hardware:

Reviewed-by: Logan Gunthorpe <[email protected]>

Thanks,

Logan

> Cc: Kurt Schwemmer <[email protected]>
> Cc: Logan Gunthorpe <[email protected]>
> Cc: Bjorn Helgaas <[email protected]>
> Cc: [email protected]
> ---
> V2: Reworded changelog.
> ---
> drivers/pci/switch/switchtec.c | 22 +++++++++++++---------
> 1 file changed, 13 insertions(+), 9 deletions(-)
>
> --- a/drivers/pci/switch/switchtec.c
> +++ b/drivers/pci/switch/switchtec.c
> @@ -52,10 +52,11 @@ struct switchtec_user {
>
> enum mrpc_state state;
>
> - struct completion comp;
> + wait_queue_head_t cmd_comp;
> struct kref kref;
> struct list_head list;
>
> + bool cmd_done;
> u32 cmd;
> u32 status;
> u32 return_code;
> @@ -77,7 +78,7 @@ static struct switchtec_user *stuser_cre
> stuser->stdev = stdev;
> kref_init(&stuser->kref);
> INIT_LIST_HEAD(&stuser->list);
> - init_completion(&stuser->comp);
> + init_waitqueue_head(&stuser->cmd_comp);
> stuser->event_cnt = atomic_read(&stdev->event_cnt);
>
> dev_dbg(&stdev->dev, "%s: %p\n", __func__, stuser);
> @@ -175,7 +176,7 @@ static int mrpc_queue_cmd(struct switcht
> kref_get(&stuser->kref);
> stuser->read_len = sizeof(stuser->data);
> stuser_set_state(stuser, MRPC_QUEUED);
> - reinit_completion(&stuser->comp);
> + stuser->cmd_done = false;
> list_add_tail(&stuser->list, &stdev->mrpc_queue);
>
> mrpc_cmd_submit(stdev);
> @@ -222,7 +223,8 @@ static void mrpc_complete_cmd(struct swi
> memcpy_fromio(stuser->data, &stdev->mmio_mrpc->output_data,
> stuser->read_len);
> out:
> - complete_all(&stuser->comp);
> + stuser->cmd_done = true;
> + wake_up_interruptible(&stuser->cmd_comp);
> list_del_init(&stuser->list);
> stuser_put(stuser);
> stdev->mrpc_busy = 0;
> @@ -529,10 +531,11 @@ static ssize_t switchtec_dev_read(struct
> mutex_unlock(&stdev->mrpc_mutex);
>
> if (filp->f_flags & O_NONBLOCK) {
> - if (!try_wait_for_completion(&stuser->comp))
> + if (!stuser->cmd_done)
> return -EAGAIN;
> } else {
> - rc = wait_for_completion_interruptible(&stuser->comp);
> + rc = wait_event_interruptible(stuser->cmd_comp,
> + stuser->cmd_done);
> if (rc < 0)
> return rc;
> }
> @@ -580,7 +583,7 @@ static __poll_t switchtec_dev_poll(struc
> struct switchtec_dev *stdev = stuser->stdev;
> __poll_t ret = 0;
>
> - poll_wait(filp, &stuser->comp.wait, wait);
> + poll_wait(filp, &stuser->cmd_comp, wait);
> poll_wait(filp, &stdev->event_wq, wait);
>
> if (lock_mutex_and_test_alive(stdev))
> @@ -588,7 +591,7 @@ static __poll_t switchtec_dev_poll(struc
>
> mutex_unlock(&stdev->mrpc_mutex);
>
> - if (try_wait_for_completion(&stuser->comp))
> + if (stuser->cmd_done)
> ret |= EPOLLIN | EPOLLRDNORM;
>
> if (stuser->event_cnt != atomic_read(&stdev->event_cnt))
> @@ -1272,7 +1275,8 @@ static void stdev_kill(struct switchtec_
>
> /* Wake up and kill any users waiting on an MRPC request */
> list_for_each_entry_safe(stuser, tmpuser, &stdev->mrpc_queue, list) {
> - complete_all(&stuser->comp);
> + stuser->cmd_done = true;
> + wake_up_interruptible(&stuser->cmd_comp);
> list_del_init(&stuser->list);
> stuser_put(stuser);
> }
>