2016-10-12 11:38:23

by Baolin Wang

[permalink] [raw]
Subject: [PATCH v2] usb: dwc3: gadget: Wait for end transfer complete before free irq

When we change the USB function with configfs frequently, sometimes it will
hang the system to crash. The reason is the gadget driver can not hanle the
end transfer complete event after free the gadget irq (since the xHCI will
share the same interrupt number with gadget, thus when free the gadget irq,
it will not shutdown this gadget irq line), which will trigger the interrupt
all the time.

Thus we should check if we need wait for the end transfer command completion
before free gadget irq.

Signed-off-by: Baolin Wang <[email protected]>
---
Changes since v1:
- Simply the operation of cleaning the dep flags.
---
drivers/usb/dwc3/core.h | 3 ++
drivers/usb/dwc3/gadget.c | 73 +++++++++++++++++++++++++++++++++++++++++++--
2 files changed, 73 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index 9128725..f01d8fd 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -537,6 +537,7 @@ struct dwc3_ep {
#define DWC3_EP_BUSY (1 << 4)
#define DWC3_EP_PENDING_REQUEST (1 << 5)
#define DWC3_EP_MISSED_ISOC (1 << 6)
+#define DWC3_EP_CMDCMPLT_BUSY (1 << 7)

/* This last one is specific to EP0 */
#define DWC3_EP0_DIR_IN (1 << 31)
@@ -746,6 +747,7 @@ struct dwc3_scratchpad_array {
* @ep0_bounce_addr: dma address of ep0_bounce
* @scratch_addr: dma address of scratchbuf
* @ep0_in_setup: One control transfer is completed and enter setup phase
+ * @cmd_complete: endpoint command completion
* @lock: for synchronizing
* @dev: pointer to our struct device
* @xhci: pointer to our xHCI child
@@ -845,6 +847,7 @@ struct dwc3 {
dma_addr_t scratch_addr;
struct dwc3_request ep0_usb_req;
struct completion ep0_in_setup;
+ struct completion cmd_complete;

/* device lock */
spinlock_t lock;
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index fef023a..32e3d4d 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -573,6 +573,7 @@ static int __dwc3_gadget_ep_enable(struct dwc3_ep *dep,
dep->comp_desc = comp_desc;
dep->type = usb_endpoint_type(desc);
dep->flags |= DWC3_EP_ENABLED;
+ dep->flags &= ~DWC3_EP_CMDCMPLT_BUSY;

reg = dwc3_readl(dwc->regs, DWC3_DALEPENA);
reg |= DWC3_DALEPENA_EP(dep->number);
@@ -650,7 +651,7 @@ static int __dwc3_gadget_ep_disable(struct dwc3_ep *dep)
dep->endpoint.desc = NULL;
dep->comp_desc = NULL;
dep->type = 0;
- dep->flags = 0;
+ dep->flags &= DWC3_EP_CMDCMPLT_BUSY;

return 0;
}
@@ -1732,6 +1733,54 @@ static void __dwc3_gadget_stop(struct dwc3 *dwc)
__dwc3_gadget_ep_disable(dwc->eps[1]);
}

+static void dwc3_wait_command_complete(struct dwc3 *dwc)
+{
+ u32 epnum, epstart = 2;
+ int ret, wait_cmd_complete = 0;
+ unsigned long flags;
+
+check_next:
+ spin_lock_irqsave(&dwc->lock, flags);
+ /*
+ * If the gadget has been in suspend state, then don't
+ * need to wait for the end transfer complete event.
+ */
+ if (pm_runtime_suspended(dwc->dev)) {
+ spin_unlock_irqrestore(&dwc->lock, flags);
+ return;
+ }
+
+ for (epnum = epstart; epnum < DWC3_ENDPOINTS_NUM; epnum++) {
+ struct dwc3_ep *dep;
+
+ dep = dwc->eps[epnum];
+ if (!dep)
+ continue;
+
+ if (dep->flags & DWC3_EP_CMDCMPLT_BUSY) {
+ reinit_completion(&dwc->cmd_complete);
+ epstart = epnum + 1;
+ wait_cmd_complete = 1;
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&dwc->lock, flags);
+
+ /*
+ * Wait for 500ms to complete the end transfer command before free irq.
+ */
+ if (wait_cmd_complete) {
+ wait_cmd_complete = 0;
+ ret = wait_for_completion_timeout(&dwc->cmd_complete,
+ msecs_to_jiffies(500));
+ if (ret == 0)
+ dev_warn(dwc->dev,
+ "timeout to wait for command complete.\n");
+
+ goto check_next;
+ }
+}
+
static int dwc3_gadget_stop(struct usb_gadget *g)
{
struct dwc3 *dwc = gadget_to_dwc(g);
@@ -1742,6 +1791,17 @@ static int dwc3_gadget_stop(struct usb_gadget *g)
dwc->gadget_driver = NULL;
spin_unlock_irqrestore(&dwc->lock, flags);

+ /*
+ * Since the xHCI will share the same interrupt with gadget, thus when
+ * free the gadget irq, it will not shutdown this gadget irq line. Then
+ * the gadget driver can not handle the end transfer command complete
+ * event after free the gadget irq, which will hang the system to crash.
+ *
+ * So we should wait for the end transfer command complete event before
+ * free it to avoid this situation.
+ */
+ dwc3_wait_command_complete(dwc);
+
free_irq(dwc->irq_gadget, dwc->ev_buf);

return 0;
@@ -2108,7 +2168,8 @@ static void dwc3_endpoint_interrupt(struct dwc3 *dwc,

dep = dwc->eps[epnum];

- if (!(dep->flags & DWC3_EP_ENABLED))
+ if (!(dep->flags & DWC3_EP_ENABLED) &&
+ !(dep->flags & DWC3_EP_CMDCMPLT_BUSY))
return;

if (epnum == 0 || epnum == 1) {
@@ -2180,6 +2241,11 @@ static void dwc3_endpoint_interrupt(struct dwc3 *dwc,
dwc3_trace(trace_dwc3_gadget, "%s FIFO Overrun", dep->name);
break;
case DWC3_DEPEVT_EPCMDCMPLT:
+ if (dep->flags & DWC3_EP_CMDCMPLT_BUSY) {
+ dep->flags &= ~DWC3_EP_CMDCMPLT_BUSY;
+ complete(&dwc->cmd_complete);
+ }
+
dwc3_trace(trace_dwc3_gadget, "Endpoint Command Complete");
break;
}
@@ -2278,7 +2344,7 @@ static void dwc3_stop_active_transfer(struct dwc3 *dwc, u32 epnum, bool force)
dep->flags &= ~DWC3_EP_BUSY;

if (dwc3_is_usb31(dwc) || dwc->revision < DWC3_REVISION_310A)
- udelay(100);
+ dep->flags |= DWC3_EP_CMDCMPLT_BUSY;
}

static void dwc3_stop_active_transfers(struct dwc3 *dwc)
@@ -2936,6 +3002,7 @@ int dwc3_gadget_init(struct dwc3 *dwc)
}

init_completion(&dwc->ep0_in_setup);
+ init_completion(&dwc->cmd_complete);

dwc->gadget.ops = &dwc3_gadget_ops;
dwc->gadget.speed = USB_SPEED_UNKNOWN;
--
1.7.9.5


2016-10-13 07:22:57

by Felipe Balbi

[permalink] [raw]
Subject: Re: [PATCH v2] usb: dwc3: gadget: Wait for end transfer complete before free irq


Hi,

Baolin Wang <[email protected]> writes:
> @@ -1742,6 +1791,17 @@ static int dwc3_gadget_stop(struct usb_gadget *g)
> dwc->gadget_driver = NULL;
> spin_unlock_irqrestore(&dwc->lock, flags);
>
> + /*
> + * Since the xHCI will share the same interrupt with gadget, thus when
> + * free the gadget irq, it will not shutdown this gadget irq line. Then
> + * the gadget driver can not handle the end transfer command complete
> + * event after free the gadget irq, which will hang the system to crash.
> + *
> + * So we should wait for the end transfer command complete event before
> + * free it to avoid this situation.
> + */
> + dwc3_wait_command_complete(dwc);

this doesn't make sense. We have already masked all interrupts before
getting here. We have also, already, disabled all endpoints.

I'm thinking this is a bug in configfs interface of Gadget API, not
dwc3. The only reason for this to happen would be if we get to
->udc_stop() with endpoints still enabled.

Can you check if that's the case? i.e. can you check if any endpoints
are still enabled when we get here?

--
balbi


Attachments:
signature.asc (800.00 B)

2016-10-13 07:35:05

by Baolin Wang

[permalink] [raw]
Subject: Re: [PATCH v2] usb: dwc3: gadget: Wait for end transfer complete before free irq

Hi,

On 13 October 2016 at 15:02, Felipe Balbi <[email protected]> wrote:
>
> Hi,
>
> Baolin Wang <[email protected]> writes:
>> @@ -1742,6 +1791,17 @@ static int dwc3_gadget_stop(struct usb_gadget *g)
>> dwc->gadget_driver = NULL;
>> spin_unlock_irqrestore(&dwc->lock, flags);
>>
>> + /*
>> + * Since the xHCI will share the same interrupt with gadget, thus when
>> + * free the gadget irq, it will not shutdown this gadget irq line. Then
>> + * the gadget driver can not handle the end transfer command complete
>> + * event after free the gadget irq, which will hang the system to crash.
>> + *
>> + * So we should wait for the end transfer command complete event before
>> + * free it to avoid this situation.
>> + */
>> + dwc3_wait_command_complete(dwc);
>
> this doesn't make sense. We have already masked all interrupts before
> getting here. We have also, already, disabled all endpoints.

No, you just mask the interrupts described in DEVTEN register, and you
did not disable the endpoint command complete event.

>
> I'm thinking this is a bug in configfs interface of Gadget API, not
> dwc3. The only reason for this to happen would be if we get to
> ->udc_stop() with endpoints still enabled.
>
> Can you check if that's the case? i.e. can you check if any endpoints
> are still enabled when we get here?

No, it is not any endpoints are still enabled. Like I said in commit
message, we will start end transfer command when disable the endpoint,
if the end transfer command complete event comes after we have freed
the gadget irq, it will trigger the interrupt line all the time to
crash the system.

--
Baolin.wang
Best Regards

2016-10-13 07:54:39

by Felipe Balbi

[permalink] [raw]
Subject: Re: [PATCH v2] usb: dwc3: gadget: Wait for end transfer complete before free irq


Hi,

Baolin Wang <[email protected]> writes:
> Hi,
>
> On 13 October 2016 at 15:02, Felipe Balbi <[email protected]> wrote:
>>
>> Hi,
>>
>> Baolin Wang <[email protected]> writes:
>>> @@ -1742,6 +1791,17 @@ static int dwc3_gadget_stop(struct usb_gadget *g)
>>> dwc->gadget_driver = NULL;
>>> spin_unlock_irqrestore(&dwc->lock, flags);
>>>
>>> + /*
>>> + * Since the xHCI will share the same interrupt with gadget, thus when
>>> + * free the gadget irq, it will not shutdown this gadget irq line. Then
>>> + * the gadget driver can not handle the end transfer command complete
>>> + * event after free the gadget irq, which will hang the system to crash.
>>> + *
>>> + * So we should wait for the end transfer command complete event before
>>> + * free it to avoid this situation.
>>> + */
>>> + dwc3_wait_command_complete(dwc);
>>
>> this doesn't make sense. We have already masked all interrupts before
>> getting here. We have also, already, disabled all endpoints.
>
> No, you just mask the interrupts described in DEVTEN register, and you
> did not disable the endpoint command complete event.

True that

>> I'm thinking this is a bug in configfs interface of Gadget API, not
>> dwc3. The only reason for this to happen would be if we get to
>> ->udc_stop() with endpoints still enabled.
>>
>> Can you check if that's the case? i.e. can you check if any endpoints
>> are still enabled when we get here?
>
> No, it is not any endpoints are still enabled. Like I said in commit
> message, we will start end transfer command when disable the endpoint,
> if the end transfer command complete event comes after we have freed
> the gadget irq, it will trigger the interrupt line all the time to
> crash the system.

I see what the problem is. Databook tells us we *must* set CMDIOC when
issuing EndTransfer command and we should always wait for Command
Complete IRQ. However, we took a shortcut and just delayed for 100us
after issuing End Transfer.

It seems as if *that* should be fixed. We should start actually waiting
for Command Complete IRQ.

--
balbi


Attachments:
signature.asc (800.00 B)

2016-10-13 08:00:45

by Baolin Wang

[permalink] [raw]
Subject: Re: [PATCH v2] usb: dwc3: gadget: Wait for end transfer complete before free irq

Hi,

On 13 October 2016 at 15:51, Felipe Balbi <[email protected]> wrote:
>
> Hi,
>
> Baolin Wang <[email protected]> writes:
>> Hi,
>>
>> On 13 October 2016 at 15:02, Felipe Balbi <[email protected]> wrote:
>>>
>>> Hi,
>>>
>>> Baolin Wang <[email protected]> writes:
>>>> @@ -1742,6 +1791,17 @@ static int dwc3_gadget_stop(struct usb_gadget *g)
>>>> dwc->gadget_driver = NULL;
>>>> spin_unlock_irqrestore(&dwc->lock, flags);
>>>>
>>>> + /*
>>>> + * Since the xHCI will share the same interrupt with gadget, thus when
>>>> + * free the gadget irq, it will not shutdown this gadget irq line. Then
>>>> + * the gadget driver can not handle the end transfer command complete
>>>> + * event after free the gadget irq, which will hang the system to crash.
>>>> + *
>>>> + * So we should wait for the end transfer command complete event before
>>>> + * free it to avoid this situation.
>>>> + */
>>>> + dwc3_wait_command_complete(dwc);
>>>
>>> this doesn't make sense. We have already masked all interrupts before
>>> getting here. We have also, already, disabled all endpoints.
>>
>> No, you just mask the interrupts described in DEVTEN register, and you
>> did not disable the endpoint command complete event.
>
> True that
>
>>> I'm thinking this is a bug in configfs interface of Gadget API, not
>>> dwc3. The only reason for this to happen would be if we get to
>>> ->udc_stop() with endpoints still enabled.
>>>
>>> Can you check if that's the case? i.e. can you check if any endpoints
>>> are still enabled when we get here?
>>
>> No, it is not any endpoints are still enabled. Like I said in commit
>> message, we will start end transfer command when disable the endpoint,
>> if the end transfer command complete event comes after we have freed
>> the gadget irq, it will trigger the interrupt line all the time to
>> crash the system.
>
> I see what the problem is. Databook tells us we *must* set CMDIOC when
> issuing EndTransfer command and we should always wait for Command
> Complete IRQ. However, we took a shortcut and just delayed for 100us
> after issuing End Transfer.

Yes, but 100us delay is not enough in some scenarios, like changing
function with configfs frequently, we will met problems.

>
> It seems as if *that* should be fixed. We should start actually waiting
> for Command Complete IRQ.
>
> --
> balbi



--
Baolin.wang
Best Regards

2016-10-13 09:59:10

by Felipe Balbi

[permalink] [raw]
Subject: Re: [PATCH v2] usb: dwc3: gadget: Wait for end transfer complete before free irq


Hi,

Baolin Wang <[email protected]> writes:
>>>> I'm thinking this is a bug in configfs interface of Gadget API, not
>>>> dwc3. The only reason for this to happen would be if we get to
>>>> ->udc_stop() with endpoints still enabled.
>>>>
>>>> Can you check if that's the case? i.e. can you check if any endpoints
>>>> are still enabled when we get here?
>>>
>>> No, it is not any endpoints are still enabled. Like I said in commit
>>> message, we will start end transfer command when disable the endpoint,
>>> if the end transfer command complete event comes after we have freed
>>> the gadget irq, it will trigger the interrupt line all the time to
>>> crash the system.
>>
>> I see what the problem is. Databook tells us we *must* set CMDIOC when
>> issuing EndTransfer command and we should always wait for Command
>> Complete IRQ. However, we took a shortcut and just delayed for 100us
>> after issuing End Transfer.
>
> Yes, but 100us delay is not enough in some scenarios, like changing
> function with configfs frequently, we will met problems.

heh, 100us *is* enough. However we still get an IRQ because we requested
for it. If you want to fix this, then you need to find a way to get rid
of that 100us udelay() and add a proper wait_for_completion() to delay
execution until command complete IRQ fires up.

--
balbi


Attachments:
signature.asc (800.00 B)

2016-10-13 10:57:52

by Felipe Balbi

[permalink] [raw]
Subject: Re: [PATCH v2] usb: dwc3: gadget: Wait for end transfer complete before free irq


Hi,

Felipe Balbi <[email protected]> writes:
> Hi,
>
> Baolin Wang <[email protected]> writes:
>>>>> I'm thinking this is a bug in configfs interface of Gadget API, not
>>>>> dwc3. The only reason for this to happen would be if we get to
>>>>> ->udc_stop() with endpoints still enabled.
>>>>>
>>>>> Can you check if that's the case? i.e. can you check if any endpoints
>>>>> are still enabled when we get here?
>>>>
>>>> No, it is not any endpoints are still enabled. Like I said in commit
>>>> message, we will start end transfer command when disable the endpoint,
>>>> if the end transfer command complete event comes after we have freed
>>>> the gadget irq, it will trigger the interrupt line all the time to
>>>> crash the system.
>>>
>>> I see what the problem is. Databook tells us we *must* set CMDIOC when
>>> issuing EndTransfer command and we should always wait for Command
>>> Complete IRQ. However, we took a shortcut and just delayed for 100us
>>> after issuing End Transfer.
>>
>> Yes, but 100us delay is not enough in some scenarios, like changing
>> function with configfs frequently, we will met problems.
>
> heh, 100us *is* enough. However we still get an IRQ because we requested
> for it. If you want to fix this, then you need to find a way to get rid
> of that 100us udelay() and add a proper wait_for_completion() to delay
> execution until command complete IRQ fires up.

I haven't tested this yet, but it shows the idea (I think we might still
have a race with ep_queue if we're still waiting for End Transfer, but
that's easy to sort out by checking for DWC3_EP_END_TRANSFER_PENDING
before calling kick_transfer). Could you have a look and, perhaps, run a
test?

diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index e878366ead00..24a77e9f9bba 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -26,6 +26,7 @@
#include <linux/dma-mapping.h>
#include <linux/mm.h>
#include <linux/debugfs.h>
+#include <linux/wait.h>

#include <linux/usb/ch9.h>
#include <linux/usb/gadget.h>
@@ -504,6 +505,7 @@ struct dwc3_event_buffer {
* @endpoint: usb endpoint
* @pending_list: list of pending requests for this endpoint
* @started_list: list of started requests on this endpoint
+ * @wait_end_transfer: wait_queue_head_t for waiting on End Transfer complete
* @lock: spinlock for endpoint request queue traversal
* @regs: pointer to first endpoint register
* @trb_pool: array of transaction buffers
@@ -529,6 +531,8 @@ struct dwc3_ep {
struct list_head pending_list;
struct list_head started_list;

+ wait_queue_head_t wait_end_transfer;
+
spinlock_t lock;
void __iomem *regs;

@@ -545,7 +549,7 @@ struct dwc3_ep {
#define DWC3_EP_BUSY (1 << 4)
#define DWC3_EP_PENDING_REQUEST (1 << 5)
#define DWC3_EP_MISSED_ISOC (1 << 6)
-
+#define DWC3_EP_END_TRANSFER_PENDING (1 << 7)
/* This last one is specific to EP0 */
#define DWC3_EP0_DIR_IN (1 << 31)

@@ -1047,6 +1051,9 @@ struct dwc3_event_depevt {
#define DEPEVT_TRANSFER_BUS_EXPIRY 2

u32 parameters:16;
+
+/* For Command Complete Events */
+#define DEPEVT_PARAMETER_CMD(n) (((n) & (0xf << 24)) >> 24)
} __packed;

/**
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 3ba05b12e49a..8037bff43485 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -598,6 +598,8 @@ static int __dwc3_gadget_ep_enable(struct dwc3_ep *dep,
reg |= DWC3_DALEPENA_EP(dep->number);
dwc3_writel(dwc->regs, DWC3_DALEPENA, reg);

+ init_waitqueue_head(&dep->wait_end_transfer);
+
if (usb_endpoint_xfer_control(desc))
return 0;

@@ -2156,6 +2158,7 @@ static void dwc3_endpoint_interrupt(struct dwc3 *dwc,
{
struct dwc3_ep *dep;
u8 epnum = event->endpoint_number;
+ u8 cmd;

dep = dwc->eps[epnum];

@@ -2200,8 +2203,13 @@ static void dwc3_endpoint_interrupt(struct dwc3 *dwc,
return;
}
break;
- case DWC3_DEPEVT_RXTXFIFOEVT:
case DWC3_DEPEVT_EPCMDCMPLT:
+ cmd = DEPEVT_PARAMETER_CMD(event->parameters);
+
+ if (cmd == DWC3_DEPCMD_ENDTRANSFER)
+ dep->flags &= ~DWC3_EP_END_TRANSFER_PENDING;
+ break;
+ case DWC3_DEPEVT_RXTXFIFOEVT:
break;
}
}
@@ -2246,6 +2254,7 @@ static void dwc3_reset_gadget(struct dwc3 *dwc)
}

static void dwc3_stop_active_transfer(struct dwc3 *dwc, u32 epnum, bool force)
+__releases(&dwc->lock) __acquires(&dwc->lock)
{
struct dwc3_ep *dep;
struct dwc3_gadget_ep_cmd_params params;
@@ -2295,6 +2304,12 @@ static void dwc3_stop_active_transfer(struct dwc3 *dwc, u32 epnum, bool force)
memset(&params, 0, sizeof(params));
ret = dwc3_send_gadget_ep_cmd(dep, cmd, &params);
WARN_ON_ONCE(ret);
+
+ dep->flags |= DWC3_EP_END_TRANSFER_PENDING;
+ wait_event_cmd(dep->wait_end_transfer,
+ !(dep->flags & DWC3_EP_END_TRANSFER_PENDING),
+ spin_unlock_irq(&dwc->lock), spin_lock_irq(&dwc->lock));
+
dep->resource_index = 0;
dep->flags &= ~DWC3_EP_BUSY;




--
balbi


Attachments:
signature.asc (800.00 B)

2016-10-13 11:18:52

by Baolin Wang

[permalink] [raw]
Subject: Re: [PATCH v2] usb: dwc3: gadget: Wait for end transfer complete before free irq

On 13 October 2016 at 18:56, Felipe Balbi <[email protected]> wrote:
>
> Hi,
>
> Felipe Balbi <[email protected]> writes:
>> Hi,
>>
>> Baolin Wang <[email protected]> writes:
>>>>>> I'm thinking this is a bug in configfs interface of Gadget API, not
>>>>>> dwc3. The only reason for this to happen would be if we get to
>>>>>> ->udc_stop() with endpoints still enabled.
>>>>>>
>>>>>> Can you check if that's the case? i.e. can you check if any endpoints
>>>>>> are still enabled when we get here?
>>>>>
>>>>> No, it is not any endpoints are still enabled. Like I said in commit
>>>>> message, we will start end transfer command when disable the endpoint,
>>>>> if the end transfer command complete event comes after we have freed
>>>>> the gadget irq, it will trigger the interrupt line all the time to
>>>>> crash the system.
>>>>
>>>> I see what the problem is. Databook tells us we *must* set CMDIOC when
>>>> issuing EndTransfer command and we should always wait for Command
>>>> Complete IRQ. However, we took a shortcut and just delayed for 100us
>>>> after issuing End Transfer.
>>>
>>> Yes, but 100us delay is not enough in some scenarios, like changing
>>> function with configfs frequently, we will met problems.
>>
>> heh, 100us *is* enough. However we still get an IRQ because we requested
>> for it. If you want to fix this, then you need to find a way to get rid
>> of that 100us udelay() and add a proper wait_for_completion() to delay
>> execution until command complete IRQ fires up.
>
> I haven't tested this yet, but it shows the idea (I think we might still
> have a race with ep_queue if we're still waiting for End Transfer, but

Yes, maybe we need check DWC3_EP_END_TRANSFER_PENDING flag when
queuing one request.

> that's easy to sort out by checking for DWC3_EP_END_TRANSFER_PENDING
> before calling kick_transfer). Could you have a look and, perhaps, run a
> test?

Sure. I will test it and send out the result tomorrow.

>
> diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
> index e878366ead00..24a77e9f9bba 100644
> --- a/drivers/usb/dwc3/core.h
> +++ b/drivers/usb/dwc3/core.h
> @@ -26,6 +26,7 @@
> #include <linux/dma-mapping.h>
> #include <linux/mm.h>
> #include <linux/debugfs.h>
> +#include <linux/wait.h>
>
> #include <linux/usb/ch9.h>
> #include <linux/usb/gadget.h>
> @@ -504,6 +505,7 @@ struct dwc3_event_buffer {
> * @endpoint: usb endpoint
> * @pending_list: list of pending requests for this endpoint
> * @started_list: list of started requests on this endpoint
> + * @wait_end_transfer: wait_queue_head_t for waiting on End Transfer complete
> * @lock: spinlock for endpoint request queue traversal
> * @regs: pointer to first endpoint register
> * @trb_pool: array of transaction buffers
> @@ -529,6 +531,8 @@ struct dwc3_ep {
> struct list_head pending_list;
> struct list_head started_list;
>
> + wait_queue_head_t wait_end_transfer;
> +
> spinlock_t lock;
> void __iomem *regs;
>
> @@ -545,7 +549,7 @@ struct dwc3_ep {
> #define DWC3_EP_BUSY (1 << 4)
> #define DWC3_EP_PENDING_REQUEST (1 << 5)
> #define DWC3_EP_MISSED_ISOC (1 << 6)
> -
> +#define DWC3_EP_END_TRANSFER_PENDING (1 << 7)
> /* This last one is specific to EP0 */
> #define DWC3_EP0_DIR_IN (1 << 31)
>
> @@ -1047,6 +1051,9 @@ struct dwc3_event_depevt {
> #define DEPEVT_TRANSFER_BUS_EXPIRY 2
>
> u32 parameters:16;
> +
> +/* For Command Complete Events */
> +#define DEPEVT_PARAMETER_CMD(n) (((n) & (0xf << 24)) >> 24)
> } __packed;
>
> /**
> diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
> index 3ba05b12e49a..8037bff43485 100644
> --- a/drivers/usb/dwc3/gadget.c
> +++ b/drivers/usb/dwc3/gadget.c
> @@ -598,6 +598,8 @@ static int __dwc3_gadget_ep_enable(struct dwc3_ep *dep,
> reg |= DWC3_DALEPENA_EP(dep->number);
> dwc3_writel(dwc->regs, DWC3_DALEPENA, reg);
>
> + init_waitqueue_head(&dep->wait_end_transfer);
> +
> if (usb_endpoint_xfer_control(desc))
> return 0;
>
> @@ -2156,6 +2158,7 @@ static void dwc3_endpoint_interrupt(struct dwc3 *dwc,
> {
> struct dwc3_ep *dep;
> u8 epnum = event->endpoint_number;
> + u8 cmd;
>
> dep = dwc->eps[epnum];
>
> @@ -2200,8 +2203,13 @@ static void dwc3_endpoint_interrupt(struct dwc3 *dwc,
> return;
> }
> break;
> - case DWC3_DEPEVT_RXTXFIFOEVT:
> case DWC3_DEPEVT_EPCMDCMPLT:
> + cmd = DEPEVT_PARAMETER_CMD(event->parameters);
> +
> + if (cmd == DWC3_DEPCMD_ENDTRANSFER)
> + dep->flags &= ~DWC3_EP_END_TRANSFER_PENDING;
> + break;
> + case DWC3_DEPEVT_RXTXFIFOEVT:
> break;
> }
> }
> @@ -2246,6 +2254,7 @@ static void dwc3_reset_gadget(struct dwc3 *dwc)
> }
>
> static void dwc3_stop_active_transfer(struct dwc3 *dwc, u32 epnum, bool force)
> +__releases(&dwc->lock) __acquires(&dwc->lock)
> {
> struct dwc3_ep *dep;
> struct dwc3_gadget_ep_cmd_params params;
> @@ -2295,6 +2304,12 @@ static void dwc3_stop_active_transfer(struct dwc3 *dwc, u32 epnum, bool force)
> memset(&params, 0, sizeof(params));
> ret = dwc3_send_gadget_ep_cmd(dep, cmd, &params);
> WARN_ON_ONCE(ret);
> +
> + dep->flags |= DWC3_EP_END_TRANSFER_PENDING;
> + wait_event_cmd(dep->wait_end_transfer,
> + !(dep->flags & DWC3_EP_END_TRANSFER_PENDING),
> + spin_unlock_irq(&dwc->lock), spin_lock_irq(&dwc->lock));
> +
> dep->resource_index = 0;
> dep->flags &= ~DWC3_EP_BUSY;
>
>
>
>
> --
> balbi



--
Baolin.wang
Best Regards

2016-10-13 11:26:24

by Felipe Balbi

[permalink] [raw]
Subject: Re: [PATCH v2] usb: dwc3: gadget: Wait for end transfer complete before free irq


Hi,

Baolin Wang <[email protected]> writes:
>>> Baolin Wang <[email protected]> writes:
>>>>>>> I'm thinking this is a bug in configfs interface of Gadget API, not
>>>>>>> dwc3. The only reason for this to happen would be if we get to
>>>>>>> ->udc_stop() with endpoints still enabled.
>>>>>>>
>>>>>>> Can you check if that's the case? i.e. can you check if any endpoints
>>>>>>> are still enabled when we get here?
>>>>>>
>>>>>> No, it is not any endpoints are still enabled. Like I said in commit
>>>>>> message, we will start end transfer command when disable the endpoint,
>>>>>> if the end transfer command complete event comes after we have freed
>>>>>> the gadget irq, it will trigger the interrupt line all the time to
>>>>>> crash the system.
>>>>>
>>>>> I see what the problem is. Databook tells us we *must* set CMDIOC when
>>>>> issuing EndTransfer command and we should always wait for Command
>>>>> Complete IRQ. However, we took a shortcut and just delayed for 100us
>>>>> after issuing End Transfer.
>>>>
>>>> Yes, but 100us delay is not enough in some scenarios, like changing
>>>> function with configfs frequently, we will met problems.
>>>
>>> heh, 100us *is* enough. However we still get an IRQ because we requested
>>> for it. If you want to fix this, then you need to find a way to get rid
>>> of that 100us udelay() and add a proper wait_for_completion() to delay
>>> execution until command complete IRQ fires up.
>>
>> I haven't tested this yet, but it shows the idea (I think we might still
>> have a race with ep_queue if we're still waiting for End Transfer, but
>
> Yes, maybe we need check DWC3_EP_END_TRANSFER_PENDING flag when
> queuing one request.

Yeah, I'll add that check later. I still need to make sure we would
still try to kick any transfers that might have been queued while
waiting for End Transfer Command Complete IRQ.

>> that's easy to sort out by checking for DWC3_EP_END_TRANSFER_PENDING
>> before calling kick_transfer). Could you have a look and, perhaps, run a
>> test?
>
> Sure. I will test it and send out the result tomorrow.

Thank you

--
balbi


Attachments:
signature.asc (800.00 B)

2016-10-13 13:08:25

by Baolin Wang

[permalink] [raw]
Subject: Re: [PATCH v2] usb: dwc3: gadget: Wait for end transfer complete before free irq

Hi Felipe,

On 13 October 2016 at 19:23, Felipe Balbi <[email protected]> wrote:
>
> Hi,
>
> Baolin Wang <[email protected]> writes:
>>>> Baolin Wang <[email protected]> writes:
>>>>>>>> I'm thinking this is a bug in configfs interface of Gadget API, not
>>>>>>>> dwc3. The only reason for this to happen would be if we get to
>>>>>>>> ->udc_stop() with endpoints still enabled.
>>>>>>>>
>>>>>>>> Can you check if that's the case? i.e. can you check if any endpoints
>>>>>>>> are still enabled when we get here?
>>>>>>>
>>>>>>> No, it is not any endpoints are still enabled. Like I said in commit
>>>>>>> message, we will start end transfer command when disable the endpoint,
>>>>>>> if the end transfer command complete event comes after we have freed
>>>>>>> the gadget irq, it will trigger the interrupt line all the time to
>>>>>>> crash the system.
>>>>>>
>>>>>> I see what the problem is. Databook tells us we *must* set CMDIOC when
>>>>>> issuing EndTransfer command and we should always wait for Command
>>>>>> Complete IRQ. However, we took a shortcut and just delayed for 100us
>>>>>> after issuing End Transfer.
>>>>>
>>>>> Yes, but 100us delay is not enough in some scenarios, like changing
>>>>> function with configfs frequently, we will met problems.
>>>>
>>>> heh, 100us *is* enough. However we still get an IRQ because we requested
>>>> for it. If you want to fix this, then you need to find a way to get rid
>>>> of that 100us udelay() and add a proper wait_for_completion() to delay
>>>> execution until command complete IRQ fires up.
>>>
>>> I haven't tested this yet, but it shows the idea (I think we might still
>>> have a race with ep_queue if we're still waiting for End Transfer, but
>>
>> Yes, maybe we need check DWC3_EP_END_TRANSFER_PENDING flag when
>> queuing one request.
>
> Yeah, I'll add that check later. I still need to make sure we would
> still try to kick any transfers that might have been queued while
> waiting for End Transfer Command Complete IRQ.

OK. But I still worried if there are other races in some places which
is not easy to find out by testing. No introducing race condition
would be one better solution, anyway I would like to test the patch
you send out firstly.

>
>>> that's easy to sort out by checking for DWC3_EP_END_TRANSFER_PENDING
>>> before calling kick_transfer). Could you have a look and, perhaps, run a
>>> test?
>>
>> Sure. I will test it and send out the result tomorrow.
>
> Thank you
>
> --
> balbi



--
Baolin.wang
Best Regards

2016-10-13 13:34:55

by Felipe Balbi

[permalink] [raw]
Subject: Re: [PATCH v2] usb: dwc3: gadget: Wait for end transfer complete before free irq



Hi,

Baolin Wang <[email protected]> writes:
>> Baolin Wang <[email protected]> writes:
>>>>> Baolin Wang <[email protected]> writes:
>>>>>>>>> I'm thinking this is a bug in configfs interface of Gadget API, not
>>>>>>>>> dwc3. The only reason for this to happen would be if we get to
>>>>>>>>> ->udc_stop() with endpoints still enabled.
>>>>>>>>>
>>>>>>>>> Can you check if that's the case? i.e. can you check if any endpoints
>>>>>>>>> are still enabled when we get here?
>>>>>>>>
>>>>>>>> No, it is not any endpoints are still enabled. Like I said in commit
>>>>>>>> message, we will start end transfer command when disable the endpoint,
>>>>>>>> if the end transfer command complete event comes after we have freed
>>>>>>>> the gadget irq, it will trigger the interrupt line all the time to
>>>>>>>> crash the system.
>>>>>>>
>>>>>>> I see what the problem is. Databook tells us we *must* set CMDIOC when
>>>>>>> issuing EndTransfer command and we should always wait for Command
>>>>>>> Complete IRQ. However, we took a shortcut and just delayed for 100us
>>>>>>> after issuing End Transfer.
>>>>>>
>>>>>> Yes, but 100us delay is not enough in some scenarios, like changing
>>>>>> function with configfs frequently, we will met problems.
>>>>>
>>>>> heh, 100us *is* enough. However we still get an IRQ because we requested
>>>>> for it. If you want to fix this, then you need to find a way to get rid
>>>>> of that 100us udelay() and add a proper wait_for_completion() to delay
>>>>> execution until command complete IRQ fires up.
>>>>
>>>> I haven't tested this yet, but it shows the idea (I think we might still
>>>> have a race with ep_queue if we're still waiting for End Transfer, but
>>>
>>> Yes, maybe we need check DWC3_EP_END_TRANSFER_PENDING flag when
>>> queuing one request.
>>
>> Yeah, I'll add that check later. I still need to make sure we would
>> still try to kick any transfers that might have been queued while
>> waiting for End Transfer Command Complete IRQ.
>
> OK. But I still worried if there are other races in some places which

There are no other places where this needs to be sorted out.

> is not easy to find out by testing. No introducing race condition
> would be one better solution, anyway I would like to test the patch
> you send out firstly.

Right, but your patch was working around another problem, rather then
fixing it.

Here's another version which should make sure everything remains working
as it should.

8<------------------------------------------------------------------------
>From 1f922a902a21d5cee32082be18bbfbf1d46137e4 Mon Sep 17 00:00:00 2001
From: Felipe Balbi <[email protected]>
Date: Thu, 13 Oct 2016 14:09:47 +0300
Subject: [PATCH] usb: dwc3: gadget: wait for End Transfer to complete

Instead of just delaying for 100us, we should
actually wait for End Transfer Command Complete
interrupt before moving on. Note that this should
only be done if we're dealing with one of the core
revisions that actually require the interrupt before
moving on.

Reported-by: Baolin Wang <[email protected]>
Signed-off-by: Felipe Balbi <[email protected]>
---
drivers/usb/dwc3/core.h | 10 +++++++++-
drivers/usb/dwc3/gadget.c | 31 ++++++++++++++++++++++++++++---
2 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index e878366ead00..cf495d932252 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -26,6 +26,7 @@
#include <linux/dma-mapping.h>
#include <linux/mm.h>
#include <linux/debugfs.h>
+#include <linux/wait.h>

#include <linux/usb/ch9.h>
#include <linux/usb/gadget.h>
@@ -504,6 +505,7 @@ struct dwc3_event_buffer {
* @endpoint: usb endpoint
* @pending_list: list of pending requests for this endpoint
* @started_list: list of started requests on this endpoint
+ * @wait_end_transfer: wait_queue_head_t for waiting on End Transfer complete
* @lock: spinlock for endpoint request queue traversal
* @regs: pointer to first endpoint register
* @trb_pool: array of transaction buffers
@@ -529,6 +531,8 @@ struct dwc3_ep {
struct list_head pending_list;
struct list_head started_list;

+ wait_queue_head_t wait_end_transfer;
+
spinlock_t lock;
void __iomem *regs;

@@ -545,7 +549,8 @@ struct dwc3_ep {
#define DWC3_EP_BUSY (1 << 4)
#define DWC3_EP_PENDING_REQUEST (1 << 5)
#define DWC3_EP_MISSED_ISOC (1 << 6)
-
+#define DWC3_EP_END_TRANSFER_PENDING (1 << 7)
+#define DWC3_EP_KICK_POST_END_TRANSFER (1 << 8)
/* This last one is specific to EP0 */
#define DWC3_EP0_DIR_IN (1 << 31)

@@ -1047,6 +1052,9 @@ struct dwc3_event_depevt {
#define DEPEVT_TRANSFER_BUS_EXPIRY 2

u32 parameters:16;
+
+/* For Command Complete Events */
+#define DEPEVT_PARAMETER_CMD(n) (((n) & (0xf << 24)) >> 24)
} __packed;

/**
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 3ba05b12e49a..a3f81b5ba901 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -598,6 +598,8 @@ static int __dwc3_gadget_ep_enable(struct dwc3_ep *dep,
reg |= DWC3_DALEPENA_EP(dep->number);
dwc3_writel(dwc->regs, DWC3_DALEPENA, reg);

+ init_waitqueue_head(&dep->wait_end_transfer);
+
if (usb_endpoint_xfer_control(desc))
return 0;

@@ -1151,6 +1153,11 @@ static int __dwc3_gadget_ep_queue(struct dwc3_ep *dep, struct dwc3_request *req)
if (!dwc3_calc_trbs_left(dep))
return 0;

+ if (dep->flags & DWC3_EP_END_TRANSFER_PENDING) {
+ dep->flags &= DWC3_EP_KICK_POST_END_TRANSFER;
+ return 0;
+ }
+
ret = __dwc3_gadget_kick_transfer(dep, 0);
if (ret && ret != -EBUSY)
dwc3_trace(trace_dwc3_gadget,
@@ -2156,6 +2163,7 @@ static void dwc3_endpoint_interrupt(struct dwc3 *dwc,
{
struct dwc3_ep *dep;
u8 epnum = event->endpoint_number;
+ u8 cmd;

dep = dwc->eps[epnum];

@@ -2200,8 +2208,13 @@ static void dwc3_endpoint_interrupt(struct dwc3 *dwc,
return;
}
break;
- case DWC3_DEPEVT_RXTXFIFOEVT:
case DWC3_DEPEVT_EPCMDCMPLT:
+ cmd = DEPEVT_PARAMETER_CMD(event->parameters);
+
+ if (cmd == DWC3_DEPCMD_ENDTRANSFER)
+ dep->flags &= ~DWC3_EP_END_TRANSFER_PENDING;
+ break;
+ case DWC3_DEPEVT_RXTXFIFOEVT:
break;
}
}
@@ -2246,6 +2259,7 @@ static void dwc3_reset_gadget(struct dwc3 *dwc)
}

static void dwc3_stop_active_transfer(struct dwc3 *dwc, u32 epnum, bool force)
+__releases(&dwc->lock) __acquires(&dwc->lock)
{
struct dwc3_ep *dep;
struct dwc3_gadget_ep_cmd_params params;
@@ -2295,11 +2309,22 @@ static void dwc3_stop_active_transfer(struct dwc3 *dwc, u32 epnum, bool force)
memset(&params, 0, sizeof(params));
ret = dwc3_send_gadget_ep_cmd(dep, cmd, &params);
WARN_ON_ONCE(ret);
+
+ if (dwc3_is_usb31(dwc) || dwc->revision < DWC3_REVISION_310A) {
+ dep->flags |= DWC3_EP_END_TRANSFER_PENDING;
+ wait_event_lock_irq(dep->wait_end_transfer,
+ !(dep->flags & DWC3_EP_END_TRANSFER_PENDING),
+ dwc->lock);
+ }
+
dep->resource_index = 0;
dep->flags &= ~DWC3_EP_BUSY;

- if (dwc3_is_usb31(dwc) || dwc->revision < DWC3_REVISION_310A)
- udelay(100);
+ if (dep->flags & DWC3_EP_KICK_POST_END_TRANSFER) {
+ dep->flags &= ~DWC3_EP_KICK_POST_END_TRANSFER;
+ ret = __dwc3_gadget_kick_transfer(dep, 0);
+ WARN_ON_ONCE(ret);
+ }
}

static void dwc3_stop_active_transfers(struct dwc3 *dwc)
--
2.10.0.440.g21f862b


--
balbi

2016-10-14 07:03:09

by Baolin Wang

[permalink] [raw]
Subject: Re: [PATCH v2] usb: dwc3: gadget: Wait for end transfer complete before free irq

Hi Felipe,

On 13 October 2016 at 21:34, Felipe Balbi <[email protected]> wrote:
>
>
> Hi,
>
> Baolin Wang <[email protected]> writes:
>>> Baolin Wang <[email protected]> writes:
>>>>>> Baolin Wang <[email protected]> writes:
>>>>>>>>>> I'm thinking this is a bug in configfs interface of Gadget API, not
>>>>>>>>>> dwc3. The only reason for this to happen would be if we get to
>>>>>>>>>> ->udc_stop() with endpoints still enabled.
>>>>>>>>>>
>>>>>>>>>> Can you check if that's the case? i.e. can you check if any endpoints
>>>>>>>>>> are still enabled when we get here?
>>>>>>>>>
>>>>>>>>> No, it is not any endpoints are still enabled. Like I said in commit
>>>>>>>>> message, we will start end transfer command when disable the endpoint,
>>>>>>>>> if the end transfer command complete event comes after we have freed
>>>>>>>>> the gadget irq, it will trigger the interrupt line all the time to
>>>>>>>>> crash the system.
>>>>>>>>
>>>>>>>> I see what the problem is. Databook tells us we *must* set CMDIOC when
>>>>>>>> issuing EndTransfer command and we should always wait for Command
>>>>>>>> Complete IRQ. However, we took a shortcut and just delayed for 100us
>>>>>>>> after issuing End Transfer.
>>>>>>>
>>>>>>> Yes, but 100us delay is not enough in some scenarios, like changing
>>>>>>> function with configfs frequently, we will met problems.
>>>>>>
>>>>>> heh, 100us *is* enough. However we still get an IRQ because we requested
>>>>>> for it. If you want to fix this, then you need to find a way to get rid
>>>>>> of that 100us udelay() and add a proper wait_for_completion() to delay
>>>>>> execution until command complete IRQ fires up.
>>>>>
>>>>> I haven't tested this yet, but it shows the idea (I think we might still
>>>>> have a race with ep_queue if we're still waiting for End Transfer, but
>>>>
>>>> Yes, maybe we need check DWC3_EP_END_TRANSFER_PENDING flag when
>>>> queuing one request.
>>>
>>> Yeah, I'll add that check later. I still need to make sure we would
>>> still try to kick any transfers that might have been queued while
>>> waiting for End Transfer Command Complete IRQ.
>>
>> OK. But I still worried if there are other races in some places which
>
> There are no other places where this needs to be sorted out.
>
>> is not easy to find out by testing. No introducing race condition
>> would be one better solution, anyway I would like to test the patch
>> you send out firstly.
>
> Right, but your patch was working around another problem, rather then
> fixing it.
>
> Here's another version which should make sure everything remains working
> as it should.
>
> 8<------------------------------------------------------------------------
> From 1f922a902a21d5cee32082be18bbfbf1d46137e4 Mon Sep 17 00:00:00 2001
> From: Felipe Balbi <[email protected]>
> Date: Thu, 13 Oct 2016 14:09:47 +0300
> Subject: [PATCH] usb: dwc3: gadget: wait for End Transfer to complete
>
> Instead of just delaying for 100us, we should
> actually wait for End Transfer Command Complete
> interrupt before moving on. Note that this should
> only be done if we're dealing with one of the core
> revisions that actually require the interrupt before
> moving on.
>
> Reported-by: Baolin Wang <[email protected]>
> Signed-off-by: Felipe Balbi <[email protected]>
> ---
> drivers/usb/dwc3/core.h | 10 +++++++++-
> drivers/usb/dwc3/gadget.c | 31 ++++++++++++++++++++++++++++---
> 2 files changed, 37 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
> index e878366ead00..cf495d932252 100644
> --- a/drivers/usb/dwc3/core.h
> +++ b/drivers/usb/dwc3/core.h
> @@ -26,6 +26,7 @@
> #include <linux/dma-mapping.h>
> #include <linux/mm.h>
> #include <linux/debugfs.h>
> +#include <linux/wait.h>
>
> #include <linux/usb/ch9.h>
> #include <linux/usb/gadget.h>
> @@ -504,6 +505,7 @@ struct dwc3_event_buffer {
> * @endpoint: usb endpoint
> * @pending_list: list of pending requests for this endpoint
> * @started_list: list of started requests on this endpoint
> + * @wait_end_transfer: wait_queue_head_t for waiting on End Transfer complete
> * @lock: spinlock for endpoint request queue traversal
> * @regs: pointer to first endpoint register
> * @trb_pool: array of transaction buffers
> @@ -529,6 +531,8 @@ struct dwc3_ep {
> struct list_head pending_list;
> struct list_head started_list;
>
> + wait_queue_head_t wait_end_transfer;
> +
> spinlock_t lock;
> void __iomem *regs;
>
> @@ -545,7 +549,8 @@ struct dwc3_ep {
> #define DWC3_EP_BUSY (1 << 4)
> #define DWC3_EP_PENDING_REQUEST (1 << 5)
> #define DWC3_EP_MISSED_ISOC (1 << 6)
> -
> +#define DWC3_EP_END_TRANSFER_PENDING (1 << 7)
> +#define DWC3_EP_KICK_POST_END_TRANSFER (1 << 8)
> /* This last one is specific to EP0 */
> #define DWC3_EP0_DIR_IN (1 << 31)
>
> @@ -1047,6 +1052,9 @@ struct dwc3_event_depevt {
> #define DEPEVT_TRANSFER_BUS_EXPIRY 2
>
> u32 parameters:16;
> +
> +/* For Command Complete Events */
> +#define DEPEVT_PARAMETER_CMD(n) (((n) & (0xf << 24)) >> 24)
> } __packed;
>
> /**
> diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
> index 3ba05b12e49a..a3f81b5ba901 100644
> --- a/drivers/usb/dwc3/gadget.c
> +++ b/drivers/usb/dwc3/gadget.c
> @@ -598,6 +598,8 @@ static int __dwc3_gadget_ep_enable(struct dwc3_ep *dep,
> reg |= DWC3_DALEPENA_EP(dep->number);
> dwc3_writel(dwc->regs, DWC3_DALEPENA, reg);
>
> + init_waitqueue_head(&dep->wait_end_transfer);
> +
> if (usb_endpoint_xfer_control(desc))
> return 0;
>
> @@ -1151,6 +1153,11 @@ static int __dwc3_gadget_ep_queue(struct dwc3_ep *dep, struct dwc3_request *req)
> if (!dwc3_calc_trbs_left(dep))
> return 0;
>
> + if (dep->flags & DWC3_EP_END_TRANSFER_PENDING) {
> + dep->flags &= DWC3_EP_KICK_POST_END_TRANSFER;
> + return 0;
> + }
> +
> ret = __dwc3_gadget_kick_transfer(dep, 0);
> if (ret && ret != -EBUSY)
> dwc3_trace(trace_dwc3_gadget,
> @@ -2156,6 +2163,7 @@ static void dwc3_endpoint_interrupt(struct dwc3 *dwc,
> {
> struct dwc3_ep *dep;
> u8 epnum = event->endpoint_number;
> + u8 cmd;
>
> dep = dwc->eps[epnum];
>
> @@ -2200,8 +2208,13 @@ static void dwc3_endpoint_interrupt(struct dwc3 *dwc,
> return;
> }
> break;
> - case DWC3_DEPEVT_RXTXFIFOEVT:
> case DWC3_DEPEVT_EPCMDCMPLT:
> + cmd = DEPEVT_PARAMETER_CMD(event->parameters);
> +
> + if (cmd == DWC3_DEPCMD_ENDTRANSFER)
> + dep->flags &= ~DWC3_EP_END_TRANSFER_PENDING;

I think you missed wake_up(&dep->wait_end_transfer) function here.

> + break;
> + case DWC3_DEPEVT_RXTXFIFOEVT:
> break;
> }
> }
> @@ -2246,6 +2259,7 @@ static void dwc3_reset_gadget(struct dwc3 *dwc)
> }
>
> static void dwc3_stop_active_transfer(struct dwc3 *dwc, u32 epnum, bool force)
> +__releases(&dwc->lock) __acquires(&dwc->lock)
> {
> struct dwc3_ep *dep;
> struct dwc3_gadget_ep_cmd_params params;
> @@ -2295,11 +2309,22 @@ static void dwc3_stop_active_transfer(struct dwc3 *dwc, u32 epnum, bool force)
> memset(&params, 0, sizeof(params));
> ret = dwc3_send_gadget_ep_cmd(dep, cmd, &params);
> WARN_ON_ONCE(ret);
> +
> + if (dwc3_is_usb31(dwc) || dwc->revision < DWC3_REVISION_310A) {
> + dep->flags |= DWC3_EP_END_TRANSFER_PENDING;
> + wait_event_lock_irq(dep->wait_end_transfer,
> + !(dep->flags & DWC3_EP_END_TRANSFER_PENDING),
> + dwc->lock);
> + }

I've tested your patch, unfortunately it can not work on my platform.
Since we can not issue wait_event_lock_irq() function in atomic
context, we will hold another 'cdev->lock' in composite_disconnect()
function. The dump stack as below:

[ 92.686810] c7 BUG: scheduling while atomic: init/1/0x00000002
[ 92.686825] c7 INFO: lockdep is turned off.
[ 92.686833] c0 Modules linked in: mtty marlin2_fm mali_kbase(O)
[ 92.686859] c0 Preemption disabled at:[<ffffffc00064cea8>]
composite_disconnect+0x30/0x90
[ 92.686879] c7
[ 92.686891] c7 CPU: 7 PID: 1 Comm: init Tainted: G W O 4.4.6+ #20
[ 92.686898] c7 Hardware name: Spreadtrum SP9860g Board (DT)
[ 92.686905] c7 Call trace:
[ 92.689519] c7 [<ffffffc00008b538>] dump_backtrace+0x0/0x170
[ 92.689528] c7 [<ffffffc00008b6c8>] show_stack+0x20/0x28
[ 92.689537] c7 [<ffffffc000424114>] dump_stack+0xa8/0xe0
[ 92.689547] c7 [<ffffffc0000fa7d8>] __schedule_bug+0x7c/0xd4
[ 92.689559] c7 [<ffffffc000aa5d20>] __schedule+0x724/0xab4
[ 92.689567] c7 [<ffffffc000aa62f8>] schedule+0x48/0xa8
[ 92.689578] c7 [<ffffffc000625fb8>]
dwc3_stop_active_transfer.constprop.27+0x98/0x158
[ 92.689587] c7 [<ffffffc000626c88>] dwc3_remove_requests+0x3c/0xa0
[ 92.689595] c7 [<ffffffc000627d08>] __dwc3_gadget_ep_disable+0x4c/0x12c
[ 92.689603] c7 [<ffffffc000628188>] dwc3_gadget_ep_disable+0x6c/0x100
[ 92.689613] c7 [<ffffffc0006708b4>] mtp_function_disable+0xc0/0x100
[ 92.689624] c7 [<ffffffc00064bfa0>] reset_config+0x4c/0x94
[ 92.689632] c7 [<ffffffc00064cebc>] composite_disconnect+0x44/0x90
[ 92.689641] c7 [<ffffffc0006509c4>] android_disconnect+0x60/0x70
[ 92.689649] c7 [<ffffffc000653080>] usb_gadget_remove_driver+0x6c/0xe0
[ 92.689657] c7 [<ffffffc000653170>] usb_gadget_unregister_driver+0x7c/0xc8
[ 92.689665] c7 [<ffffffc000651adc>] gadget_dev_desc_UDC_store+0x8c/0x128
[ 92.689675] c7 [<ffffffc0002c263c>] configfs_write_file+0xd0/0x13c
[ 92.689684] c7 [<ffffffc00023cb14>] vfs_write+0xb8/0x214
[ 92.689692] c7 [<ffffffc00023d594>] SyS_write+0x54/0xb0
[ 92.689701] c7 [<ffffffc000085ff0>] el0_svc_naked+0x24/0x28

> +
> dep->resource_index = 0;
> dep->flags &= ~DWC3_EP_BUSY;
>
> - if (dwc3_is_usb31(dwc) || dwc->revision < DWC3_REVISION_310A)
> - udelay(100);
> + if (dep->flags & DWC3_EP_KICK_POST_END_TRANSFER) {
> + dep->flags &= ~DWC3_EP_KICK_POST_END_TRANSFER;
> + ret = __dwc3_gadget_kick_transfer(dep, 0);
> + WARN_ON_ONCE(ret);
> + }
> }
>
> static void dwc3_stop_active_transfers(struct dwc3 *dwc)
> --
> 2.10.0.440.g21f862b
>
>
> --
> balbi



--
Baolin.wang
Best Regards

2016-10-14 07:47:12

by Felipe Balbi

[permalink] [raw]
Subject: Re: [PATCH v2] usb: dwc3: gadget: Wait for end transfer complete before free irq


Hi,

Baolin Wang <[email protected]> writes:
>>>>>>>>> I see what the problem is. Databook tells us we *must* set CMDIOC when
>>>>>>>>> issuing EndTransfer command and we should always wait for Command
>>>>>>>>> Complete IRQ. However, we took a shortcut and just delayed for 100us
>>>>>>>>> after issuing End Transfer.
>>>>>>>>
>>>>>>>> Yes, but 100us delay is not enough in some scenarios, like changing
>>>>>>>> function with configfs frequently, we will met problems.
>>>>>>>
>>>>>>> heh, 100us *is* enough. However we still get an IRQ because we requested
>>>>>>> for it. If you want to fix this, then you need to find a way to get rid
>>>>>>> of that 100us udelay() and add a proper wait_for_completion() to delay
>>>>>>> execution until command complete IRQ fires up.
>>>>>>
>>>>>> I haven't tested this yet, but it shows the idea (I think we might still
>>>>>> have a race with ep_queue if we're still waiting for End Transfer, but
>>>>>
>>>>> Yes, maybe we need check DWC3_EP_END_TRANSFER_PENDING flag when
>>>>> queuing one request.
>>>>
>>>> Yeah, I'll add that check later. I still need to make sure we would
>>>> still try to kick any transfers that might have been queued while
>>>> waiting for End Transfer Command Complete IRQ.
>>>
>>> OK. But I still worried if there are other races in some places which
>>
>> There are no other places where this needs to be sorted out.
>>
>>> is not easy to find out by testing. No introducing race condition
>>> would be one better solution, anyway I would like to test the patch
>>> you send out firstly.
>>
>> Right, but your patch was working around another problem, rather then
>> fixing it.
>>
>> Here's another version which should make sure everything remains working
>> as it should.
>>
>> 8<------------------------------------------------------------------------
>> From 1f922a902a21d5cee32082be18bbfbf1d46137e4 Mon Sep 17 00:00:00 2001
>> From: Felipe Balbi <[email protected]>
>> Date: Thu, 13 Oct 2016 14:09:47 +0300
>> Subject: [PATCH] usb: dwc3: gadget: wait for End Transfer to complete
>>
>> Instead of just delaying for 100us, we should
>> actually wait for End Transfer Command Complete
>> interrupt before moving on. Note that this should
>> only be done if we're dealing with one of the core
>> revisions that actually require the interrupt before
>> moving on.
>>
>> Reported-by: Baolin Wang <[email protected]>
>> Signed-off-by: Felipe Balbi <[email protected]>
>> ---
>> drivers/usb/dwc3/core.h | 10 +++++++++-
>> drivers/usb/dwc3/gadget.c | 31 ++++++++++++++++++++++++++++---
>> 2 files changed, 37 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
>> index e878366ead00..cf495d932252 100644
>> --- a/drivers/usb/dwc3/core.h
>> +++ b/drivers/usb/dwc3/core.h
>> @@ -26,6 +26,7 @@
>> #include <linux/dma-mapping.h>
>> #include <linux/mm.h>
>> #include <linux/debugfs.h>
>> +#include <linux/wait.h>
>>
>> #include <linux/usb/ch9.h>
>> #include <linux/usb/gadget.h>
>> @@ -504,6 +505,7 @@ struct dwc3_event_buffer {
>> * @endpoint: usb endpoint
>> * @pending_list: list of pending requests for this endpoint
>> * @started_list: list of started requests on this endpoint
>> + * @wait_end_transfer: wait_queue_head_t for waiting on End Transfer complete
>> * @lock: spinlock for endpoint request queue traversal
>> * @regs: pointer to first endpoint register
>> * @trb_pool: array of transaction buffers
>> @@ -529,6 +531,8 @@ struct dwc3_ep {
>> struct list_head pending_list;
>> struct list_head started_list;
>>
>> + wait_queue_head_t wait_end_transfer;
>> +
>> spinlock_t lock;
>> void __iomem *regs;
>>
>> @@ -545,7 +549,8 @@ struct dwc3_ep {
>> #define DWC3_EP_BUSY (1 << 4)
>> #define DWC3_EP_PENDING_REQUEST (1 << 5)
>> #define DWC3_EP_MISSED_ISOC (1 << 6)
>> -
>> +#define DWC3_EP_END_TRANSFER_PENDING (1 << 7)
>> +#define DWC3_EP_KICK_POST_END_TRANSFER (1 << 8)
>> /* This last one is specific to EP0 */
>> #define DWC3_EP0_DIR_IN (1 << 31)
>>
>> @@ -1047,6 +1052,9 @@ struct dwc3_event_depevt {
>> #define DEPEVT_TRANSFER_BUS_EXPIRY 2
>>
>> u32 parameters:16;
>> +
>> +/* For Command Complete Events */
>> +#define DEPEVT_PARAMETER_CMD(n) (((n) & (0xf << 24)) >> 24)
>> } __packed;
>>
>> /**
>> diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
>> index 3ba05b12e49a..a3f81b5ba901 100644
>> --- a/drivers/usb/dwc3/gadget.c
>> +++ b/drivers/usb/dwc3/gadget.c
>> @@ -598,6 +598,8 @@ static int __dwc3_gadget_ep_enable(struct dwc3_ep *dep,
>> reg |= DWC3_DALEPENA_EP(dep->number);
>> dwc3_writel(dwc->regs, DWC3_DALEPENA, reg);
>>
>> + init_waitqueue_head(&dep->wait_end_transfer);
>> +
>> if (usb_endpoint_xfer_control(desc))
>> return 0;
>>
>> @@ -1151,6 +1153,11 @@ static int __dwc3_gadget_ep_queue(struct dwc3_ep *dep, struct dwc3_request *req)
>> if (!dwc3_calc_trbs_left(dep))
>> return 0;
>>
>> + if (dep->flags & DWC3_EP_END_TRANSFER_PENDING) {
>> + dep->flags &= DWC3_EP_KICK_POST_END_TRANSFER;
>> + return 0;
>> + }
>> +
>> ret = __dwc3_gadget_kick_transfer(dep, 0);
>> if (ret && ret != -EBUSY)
>> dwc3_trace(trace_dwc3_gadget,
>> @@ -2156,6 +2163,7 @@ static void dwc3_endpoint_interrupt(struct dwc3 *dwc,
>> {
>> struct dwc3_ep *dep;
>> u8 epnum = event->endpoint_number;
>> + u8 cmd;
>>
>> dep = dwc->eps[epnum];
>>
>> @@ -2200,8 +2208,13 @@ static void dwc3_endpoint_interrupt(struct dwc3 *dwc,
>> return;
>> }
>> break;
>> - case DWC3_DEPEVT_RXTXFIFOEVT:
>> case DWC3_DEPEVT_EPCMDCMPLT:
>> + cmd = DEPEVT_PARAMETER_CMD(event->parameters);
>> +
>> + if (cmd == DWC3_DEPCMD_ENDTRANSFER)
>> + dep->flags &= ~DWC3_EP_END_TRANSFER_PENDING;
>
> I think you missed wake_up(&dep->wait_end_transfer) function here.
>
>> + break;
>> + case DWC3_DEPEVT_RXTXFIFOEVT:
>> break;
>> }
>> }
>> @@ -2246,6 +2259,7 @@ static void dwc3_reset_gadget(struct dwc3 *dwc)
>> }
>>
>> static void dwc3_stop_active_transfer(struct dwc3 *dwc, u32 epnum, bool force)
>> +__releases(&dwc->lock) __acquires(&dwc->lock)
>> {
>> struct dwc3_ep *dep;
>> struct dwc3_gadget_ep_cmd_params params;
>> @@ -2295,11 +2309,22 @@ static void dwc3_stop_active_transfer(struct dwc3 *dwc, u32 epnum, bool force)
>> memset(&params, 0, sizeof(params));
>> ret = dwc3_send_gadget_ep_cmd(dep, cmd, &params);
>> WARN_ON_ONCE(ret);
>> +
>> + if (dwc3_is_usb31(dwc) || dwc->revision < DWC3_REVISION_310A) {
>> + dep->flags |= DWC3_EP_END_TRANSFER_PENDING;
>> + wait_event_lock_irq(dep->wait_end_transfer,
>> + !(dep->flags & DWC3_EP_END_TRANSFER_PENDING),
>> + dwc->lock);
>> + }
>
> I've tested your patch, unfortunately it can not work on my platform.
> Since we can not issue wait_event_lock_irq() function in atomic
> context, we will hold another 'cdev->lock' in composite_disconnect()
> function. The dump stack as below:

argh, we have nested spinlocks :-( Well, we shouldn't call
usb_ep_disable() with locks held AFAICR. So the following should be
enough:

diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index 919d7d1b611c..2e9359c58eb9 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -732,8 +732,10 @@ static void reset_config(struct usb_composite_dev *cdev)
DBG(cdev, "reset config\n");

list_for_each_entry(f, &cdev->config->functions, list) {
+ spin_unlock_irq(&cdev->lock);
if (f->disable)
f->disable(f);
+ spin_lock_irq(&cdev->lock);

bitmap_zero(f->endpoints, 32);
}

Alan, do you remember if we have a requirement for not holding locks
when calling usb_ep_disable()? I can't find Documentation about it, but
history shows me that usb_ep_disable() was called without locks and IRQs
enabled. Do you think we should update documentation about this?

(keeping dump below)

> [ 92.686810] c7 BUG: scheduling while atomic: init/1/0x00000002
> [ 92.686825] c7 INFO: lockdep is turned off.
> [ 92.686833] c0 Modules linked in: mtty marlin2_fm mali_kbase(O)
> [ 92.686859] c0 Preemption disabled at:[<ffffffc00064cea8>] composite_disconnect+0x30/0x90
> [ 92.686879] c7
> [ 92.686891] c7 CPU: 7 PID: 1 Comm: init Tainted: G W O 4.4.6+ #20
> [ 92.686898] c7 Hardware name: Spreadtrum SP9860g Board (DT)
> [ 92.686905] c7 Call trace:
> [ 92.689519] c7 [<ffffffc00008b538>] dump_backtrace+0x0/0x170
> [ 92.689528] c7 [<ffffffc00008b6c8>] show_stack+0x20/0x28
> [ 92.689537] c7 [<ffffffc000424114>] dump_stack+0xa8/0xe0
> [ 92.689547] c7 [<ffffffc0000fa7d8>] __schedule_bug+0x7c/0xd4
> [ 92.689559] c7 [<ffffffc000aa5d20>] __schedule+0x724/0xab4
> [ 92.689567] c7 [<ffffffc000aa62f8>] schedule+0x48/0xa8
> [ 92.689578] c7 [<ffffffc000625fb8>] dwc3_stop_active_transfer.constprop.27+0x98/0x158
> [ 92.689587] c7 [<ffffffc000626c88>] dwc3_remove_requests+0x3c/0xa0
> [ 92.689595] c7 [<ffffffc000627d08>] __dwc3_gadget_ep_disable+0x4c/0x12c
> [ 92.689603] c7 [<ffffffc000628188>] dwc3_gadget_ep_disable+0x6c/0x100
> [ 92.689613] c7 [<ffffffc0006708b4>] mtp_function_disable+0xc0/0x100
> [ 92.689624] c7 [<ffffffc00064bfa0>] reset_config+0x4c/0x94
> [ 92.689632] c7 [<ffffffc00064cebc>] composite_disconnect+0x44/0x90
> [ 92.689641] c7 [<ffffffc0006509c4>] android_disconnect+0x60/0x70
> [ 92.689649] c7 [<ffffffc000653080>] usb_gadget_remove_driver+0x6c/0xe0
> [ 92.689657] c7 [<ffffffc000653170>] usb_gadget_unregister_driver+0x7c/0xc8
> [ 92.689665] c7 [<ffffffc000651adc>] gadget_dev_desc_UDC_store+0x8c/0x128
> [ 92.689675] c7 [<ffffffc0002c263c>] configfs_write_file+0xd0/0x13c
> [ 92.689684] c7 [<ffffffc00023cb14>] vfs_write+0xb8/0x214
> [ 92.689692] c7 [<ffffffc00023d594>] SyS_write+0x54/0xb0
> [ 92.689701] c7 [<ffffffc000085ff0>] el0_svc_naked+0x24/0x28

--
balbi


Attachments:
signature.asc (800.00 B)

2016-10-14 09:11:02

by Baolin Wang

[permalink] [raw]
Subject: Re: [PATCH v2] usb: dwc3: gadget: Wait for end transfer complete before free irq

Hi Felipe,

On 14 October 2016 at 15:46, Felipe Balbi <[email protected]> wrote:
>
> Hi,
>
> Baolin Wang <[email protected]> writes:
>>>>>>>>>> I see what the problem is. Databook tells us we *must* set CMDIOC when
>>>>>>>>>> issuing EndTransfer command and we should always wait for Command
>>>>>>>>>> Complete IRQ. However, we took a shortcut and just delayed for 100us
>>>>>>>>>> after issuing End Transfer.
>>>>>>>>>
>>>>>>>>> Yes, but 100us delay is not enough in some scenarios, like changing
>>>>>>>>> function with configfs frequently, we will met problems.
>>>>>>>>
>>>>>>>> heh, 100us *is* enough. However we still get an IRQ because we requested
>>>>>>>> for it. If you want to fix this, then you need to find a way to get rid
>>>>>>>> of that 100us udelay() and add a proper wait_for_completion() to delay
>>>>>>>> execution until command complete IRQ fires up.
>>>>>>>
>>>>>>> I haven't tested this yet, but it shows the idea (I think we might still
>>>>>>> have a race with ep_queue if we're still waiting for End Transfer, but
>>>>>>
>>>>>> Yes, maybe we need check DWC3_EP_END_TRANSFER_PENDING flag when
>>>>>> queuing one request.
>>>>>
>>>>> Yeah, I'll add that check later. I still need to make sure we would
>>>>> still try to kick any transfers that might have been queued while
>>>>> waiting for End Transfer Command Complete IRQ.
>>>>
>>>> OK. But I still worried if there are other races in some places which
>>>
>>> There are no other places where this needs to be sorted out.
>>>
>>>> is not easy to find out by testing. No introducing race condition
>>>> would be one better solution, anyway I would like to test the patch
>>>> you send out firstly.
>>>
>>> Right, but your patch was working around another problem, rather then
>>> fixing it.
>>>
>>> Here's another version which should make sure everything remains working
>>> as it should.
>>>
>>> 8<------------------------------------------------------------------------
>>> From 1f922a902a21d5cee32082be18bbfbf1d46137e4 Mon Sep 17 00:00:00 2001
>>> From: Felipe Balbi <[email protected]>
>>> Date: Thu, 13 Oct 2016 14:09:47 +0300
>>> Subject: [PATCH] usb: dwc3: gadget: wait for End Transfer to complete
>>>
>>> Instead of just delaying for 100us, we should
>>> actually wait for End Transfer Command Complete
>>> interrupt before moving on. Note that this should
>>> only be done if we're dealing with one of the core
>>> revisions that actually require the interrupt before
>>> moving on.
>>>
>>> Reported-by: Baolin Wang <[email protected]>
>>> Signed-off-by: Felipe Balbi <[email protected]>
>>> ---
>>> drivers/usb/dwc3/core.h | 10 +++++++++-
>>> drivers/usb/dwc3/gadget.c | 31 ++++++++++++++++++++++++++++---
>>> 2 files changed, 37 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
>>> index e878366ead00..cf495d932252 100644
>>> --- a/drivers/usb/dwc3/core.h
>>> +++ b/drivers/usb/dwc3/core.h
>>> @@ -26,6 +26,7 @@
>>> #include <linux/dma-mapping.h>
>>> #include <linux/mm.h>
>>> #include <linux/debugfs.h>
>>> +#include <linux/wait.h>
>>>
>>> #include <linux/usb/ch9.h>
>>> #include <linux/usb/gadget.h>
>>> @@ -504,6 +505,7 @@ struct dwc3_event_buffer {
>>> * @endpoint: usb endpoint
>>> * @pending_list: list of pending requests for this endpoint
>>> * @started_list: list of started requests on this endpoint
>>> + * @wait_end_transfer: wait_queue_head_t for waiting on End Transfer complete
>>> * @lock: spinlock for endpoint request queue traversal
>>> * @regs: pointer to first endpoint register
>>> * @trb_pool: array of transaction buffers
>>> @@ -529,6 +531,8 @@ struct dwc3_ep {
>>> struct list_head pending_list;
>>> struct list_head started_list;
>>>
>>> + wait_queue_head_t wait_end_transfer;
>>> +
>>> spinlock_t lock;
>>> void __iomem *regs;
>>>
>>> @@ -545,7 +549,8 @@ struct dwc3_ep {
>>> #define DWC3_EP_BUSY (1 << 4)
>>> #define DWC3_EP_PENDING_REQUEST (1 << 5)
>>> #define DWC3_EP_MISSED_ISOC (1 << 6)
>>> -
>>> +#define DWC3_EP_END_TRANSFER_PENDING (1 << 7)
>>> +#define DWC3_EP_KICK_POST_END_TRANSFER (1 << 8)
>>> /* This last one is specific to EP0 */
>>> #define DWC3_EP0_DIR_IN (1 << 31)
>>>
>>> @@ -1047,6 +1052,9 @@ struct dwc3_event_depevt {
>>> #define DEPEVT_TRANSFER_BUS_EXPIRY 2
>>>
>>> u32 parameters:16;
>>> +
>>> +/* For Command Complete Events */
>>> +#define DEPEVT_PARAMETER_CMD(n) (((n) & (0xf << 24)) >> 24)
>>> } __packed;
>>>
>>> /**
>>> diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
>>> index 3ba05b12e49a..a3f81b5ba901 100644
>>> --- a/drivers/usb/dwc3/gadget.c
>>> +++ b/drivers/usb/dwc3/gadget.c
>>> @@ -598,6 +598,8 @@ static int __dwc3_gadget_ep_enable(struct dwc3_ep *dep,
>>> reg |= DWC3_DALEPENA_EP(dep->number);
>>> dwc3_writel(dwc->regs, DWC3_DALEPENA, reg);
>>>
>>> + init_waitqueue_head(&dep->wait_end_transfer);
>>> +
>>> if (usb_endpoint_xfer_control(desc))
>>> return 0;
>>>
>>> @@ -1151,6 +1153,11 @@ static int __dwc3_gadget_ep_queue(struct dwc3_ep *dep, struct dwc3_request *req)
>>> if (!dwc3_calc_trbs_left(dep))
>>> return 0;
>>>
>>> + if (dep->flags & DWC3_EP_END_TRANSFER_PENDING) {
>>> + dep->flags &= DWC3_EP_KICK_POST_END_TRANSFER;
>>> + return 0;
>>> + }
>>> +
>>> ret = __dwc3_gadget_kick_transfer(dep, 0);
>>> if (ret && ret != -EBUSY)
>>> dwc3_trace(trace_dwc3_gadget,
>>> @@ -2156,6 +2163,7 @@ static void dwc3_endpoint_interrupt(struct dwc3 *dwc,
>>> {
>>> struct dwc3_ep *dep;
>>> u8 epnum = event->endpoint_number;
>>> + u8 cmd;
>>>
>>> dep = dwc->eps[epnum];
>>>
>>> @@ -2200,8 +2208,13 @@ static void dwc3_endpoint_interrupt(struct dwc3 *dwc,
>>> return;
>>> }
>>> break;
>>> - case DWC3_DEPEVT_RXTXFIFOEVT:
>>> case DWC3_DEPEVT_EPCMDCMPLT:
>>> + cmd = DEPEVT_PARAMETER_CMD(event->parameters);
>>> +
>>> + if (cmd == DWC3_DEPCMD_ENDTRANSFER)
>>> + dep->flags &= ~DWC3_EP_END_TRANSFER_PENDING;
>>
>> I think you missed wake_up(&dep->wait_end_transfer) function here.
>>
>>> + break;
>>> + case DWC3_DEPEVT_RXTXFIFOEVT:
>>> break;
>>> }
>>> }
>>> @@ -2246,6 +2259,7 @@ static void dwc3_reset_gadget(struct dwc3 *dwc)
>>> }
>>>
>>> static void dwc3_stop_active_transfer(struct dwc3 *dwc, u32 epnum, bool force)
>>> +__releases(&dwc->lock) __acquires(&dwc->lock)
>>> {
>>> struct dwc3_ep *dep;
>>> struct dwc3_gadget_ep_cmd_params params;
>>> @@ -2295,11 +2309,22 @@ static void dwc3_stop_active_transfer(struct dwc3 *dwc, u32 epnum, bool force)
>>> memset(&params, 0, sizeof(params));
>>> ret = dwc3_send_gadget_ep_cmd(dep, cmd, &params);
>>> WARN_ON_ONCE(ret);
>>> +
>>> + if (dwc3_is_usb31(dwc) || dwc->revision < DWC3_REVISION_310A) {
>>> + dep->flags |= DWC3_EP_END_TRANSFER_PENDING;
>>> + wait_event_lock_irq(dep->wait_end_transfer,
>>> + !(dep->flags & DWC3_EP_END_TRANSFER_PENDING),
>>> + dwc->lock);
>>> + }
>>
>> I've tested your patch, unfortunately it can not work on my platform.
>> Since we can not issue wait_event_lock_irq() function in atomic
>> context, we will hold another 'cdev->lock' in composite_disconnect()
>> function. The dump stack as below:
>
> argh, we have nested spinlocks :-( Well, we shouldn't call
> usb_ep_disable() with locks held AFAICR. So the following should be
> enough:
>
> diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
> index 919d7d1b611c..2e9359c58eb9 100644
> --- a/drivers/usb/gadget/composite.c
> +++ b/drivers/usb/gadget/composite.c
> @@ -732,8 +732,10 @@ static void reset_config(struct usb_composite_dev *cdev)
> DBG(cdev, "reset config\n");
>
> list_for_each_entry(f, &cdev->config->functions, list) {
> + spin_unlock_irq(&cdev->lock);
> if (f->disable)
> f->disable(f);
> + spin_lock_irq(&cdev->lock);
>
> bitmap_zero(f->endpoints, 32);
> }

After applying above modification, there are no obvious errors, but I
still met problem: I can not change usb function with configfs. I need
some time to find out why.

>
> Alan, do you remember if we have a requirement for not holding locks
> when calling usb_ep_disable()? I can't find Documentation about it, but
> history shows me that usb_ep_disable() was called without locks and IRQs
> enabled. Do you think we should update documentation about this?
>
> (keeping dump below)
>
>> [ 92.686810] c7 BUG: scheduling while atomic: init/1/0x00000002
>> [ 92.686825] c7 INFO: lockdep is turned off.
>> [ 92.686833] c0 Modules linked in: mtty marlin2_fm mali_kbase(O)
>> [ 92.686859] c0 Preemption disabled at:[<ffffffc00064cea8>] composite_disconnect+0x30/0x90
>> [ 92.686879] c7
>> [ 92.686891] c7 CPU: 7 PID: 1 Comm: init Tainted: G W O 4.4.6+ #20
>> [ 92.686898] c7 Hardware name: Spreadtrum SP9860g Board (DT)
>> [ 92.686905] c7 Call trace:
>> [ 92.689519] c7 [<ffffffc00008b538>] dump_backtrace+0x0/0x170
>> [ 92.689528] c7 [<ffffffc00008b6c8>] show_stack+0x20/0x28
>> [ 92.689537] c7 [<ffffffc000424114>] dump_stack+0xa8/0xe0
>> [ 92.689547] c7 [<ffffffc0000fa7d8>] __schedule_bug+0x7c/0xd4
>> [ 92.689559] c7 [<ffffffc000aa5d20>] __schedule+0x724/0xab4
>> [ 92.689567] c7 [<ffffffc000aa62f8>] schedule+0x48/0xa8
>> [ 92.689578] c7 [<ffffffc000625fb8>] dwc3_stop_active_transfer.constprop.27+0x98/0x158
>> [ 92.689587] c7 [<ffffffc000626c88>] dwc3_remove_requests+0x3c/0xa0
>> [ 92.689595] c7 [<ffffffc000627d08>] __dwc3_gadget_ep_disable+0x4c/0x12c
>> [ 92.689603] c7 [<ffffffc000628188>] dwc3_gadget_ep_disable+0x6c/0x100
>> [ 92.689613] c7 [<ffffffc0006708b4>] mtp_function_disable+0xc0/0x100
>> [ 92.689624] c7 [<ffffffc00064bfa0>] reset_config+0x4c/0x94
>> [ 92.689632] c7 [<ffffffc00064cebc>] composite_disconnect+0x44/0x90
>> [ 92.689641] c7 [<ffffffc0006509c4>] android_disconnect+0x60/0x70
>> [ 92.689649] c7 [<ffffffc000653080>] usb_gadget_remove_driver+0x6c/0xe0
>> [ 92.689657] c7 [<ffffffc000653170>] usb_gadget_unregister_driver+0x7c/0xc8
>> [ 92.689665] c7 [<ffffffc000651adc>] gadget_dev_desc_UDC_store+0x8c/0x128
>> [ 92.689675] c7 [<ffffffc0002c263c>] configfs_write_file+0xd0/0x13c
>> [ 92.689684] c7 [<ffffffc00023cb14>] vfs_write+0xb8/0x214
>> [ 92.689692] c7 [<ffffffc00023d594>] SyS_write+0x54/0xb0
>> [ 92.689701] c7 [<ffffffc000085ff0>] el0_svc_naked+0x24/0x28
>
> --
> balbi



--
Baolin.wang
Best Regards

2016-10-14 09:50:55

by Felipe Balbi

[permalink] [raw]
Subject: Re: [PATCH v2] usb: dwc3: gadget: Wait for end transfer complete before free irq


Hi,

Baolin Wang <[email protected]> writes:
>> Baolin Wang <[email protected]> writes:
>>>>>>>>>>> I see what the problem is. Databook tells us we *must* set CMDIOC when
>>>>>>>>>>> issuing EndTransfer command and we should always wait for Command
>>>>>>>>>>> Complete IRQ. However, we took a shortcut and just delayed for 100us
>>>>>>>>>>> after issuing End Transfer.
>>>>>>>>>>
>>>>>>>>>> Yes, but 100us delay is not enough in some scenarios, like changing
>>>>>>>>>> function with configfs frequently, we will met problems.
>>>>>>>>>
>>>>>>>>> heh, 100us *is* enough. However we still get an IRQ because we requested
>>>>>>>>> for it. If you want to fix this, then you need to find a way to get rid
>>>>>>>>> of that 100us udelay() and add a proper wait_for_completion() to delay
>>>>>>>>> execution until command complete IRQ fires up.
>>>>>>>>
>>>>>>>> I haven't tested this yet, but it shows the idea (I think we might still
>>>>>>>> have a race with ep_queue if we're still waiting for End Transfer, but
>>>>>>>
>>>>>>> Yes, maybe we need check DWC3_EP_END_TRANSFER_PENDING flag when
>>>>>>> queuing one request.
>>>>>>
>>>>>> Yeah, I'll add that check later. I still need to make sure we would
>>>>>> still try to kick any transfers that might have been queued while
>>>>>> waiting for End Transfer Command Complete IRQ.
>>>>>
>>>>> OK. But I still worried if there are other races in some places which
>>>>
>>>> There are no other places where this needs to be sorted out.
>>>>
>>>>> is not easy to find out by testing. No introducing race condition
>>>>> would be one better solution, anyway I would like to test the patch
>>>>> you send out firstly.
>>>>
>>>> Right, but your patch was working around another problem, rather then
>>>> fixing it.
>>>>
>>>> Here's another version which should make sure everything remains working
>>>> as it should.
>>>>
>>>> 8<------------------------------------------------------------------------
>>>> From 1f922a902a21d5cee32082be18bbfbf1d46137e4 Mon Sep 17 00:00:00 2001
>>>> From: Felipe Balbi <[email protected]>
>>>> Date: Thu, 13 Oct 2016 14:09:47 +0300
>>>> Subject: [PATCH] usb: dwc3: gadget: wait for End Transfer to complete
>>>>
>>>> Instead of just delaying for 100us, we should
>>>> actually wait for End Transfer Command Complete
>>>> interrupt before moving on. Note that this should
>>>> only be done if we're dealing with one of the core
>>>> revisions that actually require the interrupt before
>>>> moving on.
>>>>
>>>> Reported-by: Baolin Wang <[email protected]>
>>>> Signed-off-by: Felipe Balbi <[email protected]>
>>>> ---
>>>> drivers/usb/dwc3/core.h | 10 +++++++++-
>>>> drivers/usb/dwc3/gadget.c | 31 ++++++++++++++++++++++++++++---
>>>> 2 files changed, 37 insertions(+), 4 deletions(-)
>>>>
>>>> diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
>>>> index e878366ead00..cf495d932252 100644
>>>> --- a/drivers/usb/dwc3/core.h
>>>> +++ b/drivers/usb/dwc3/core.h
>>>> @@ -26,6 +26,7 @@
>>>> #include <linux/dma-mapping.h>
>>>> #include <linux/mm.h>
>>>> #include <linux/debugfs.h>
>>>> +#include <linux/wait.h>
>>>>
>>>> #include <linux/usb/ch9.h>
>>>> #include <linux/usb/gadget.h>
>>>> @@ -504,6 +505,7 @@ struct dwc3_event_buffer {
>>>> * @endpoint: usb endpoint
>>>> * @pending_list: list of pending requests for this endpoint
>>>> * @started_list: list of started requests on this endpoint
>>>> + * @wait_end_transfer: wait_queue_head_t for waiting on End Transfer complete
>>>> * @lock: spinlock for endpoint request queue traversal
>>>> * @regs: pointer to first endpoint register
>>>> * @trb_pool: array of transaction buffers
>>>> @@ -529,6 +531,8 @@ struct dwc3_ep {
>>>> struct list_head pending_list;
>>>> struct list_head started_list;
>>>>
>>>> + wait_queue_head_t wait_end_transfer;
>>>> +
>>>> spinlock_t lock;
>>>> void __iomem *regs;
>>>>
>>>> @@ -545,7 +549,8 @@ struct dwc3_ep {
>>>> #define DWC3_EP_BUSY (1 << 4)
>>>> #define DWC3_EP_PENDING_REQUEST (1 << 5)
>>>> #define DWC3_EP_MISSED_ISOC (1 << 6)
>>>> -
>>>> +#define DWC3_EP_END_TRANSFER_PENDING (1 << 7)
>>>> +#define DWC3_EP_KICK_POST_END_TRANSFER (1 << 8)
>>>> /* This last one is specific to EP0 */
>>>> #define DWC3_EP0_DIR_IN (1 << 31)
>>>>
>>>> @@ -1047,6 +1052,9 @@ struct dwc3_event_depevt {
>>>> #define DEPEVT_TRANSFER_BUS_EXPIRY 2
>>>>
>>>> u32 parameters:16;
>>>> +
>>>> +/* For Command Complete Events */
>>>> +#define DEPEVT_PARAMETER_CMD(n) (((n) & (0xf << 24)) >> 24)
>>>> } __packed;
>>>>
>>>> /**
>>>> diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
>>>> index 3ba05b12e49a..a3f81b5ba901 100644
>>>> --- a/drivers/usb/dwc3/gadget.c
>>>> +++ b/drivers/usb/dwc3/gadget.c
>>>> @@ -598,6 +598,8 @@ static int __dwc3_gadget_ep_enable(struct dwc3_ep *dep,
>>>> reg |= DWC3_DALEPENA_EP(dep->number);
>>>> dwc3_writel(dwc->regs, DWC3_DALEPENA, reg);
>>>>
>>>> + init_waitqueue_head(&dep->wait_end_transfer);
>>>> +
>>>> if (usb_endpoint_xfer_control(desc))
>>>> return 0;
>>>>
>>>> @@ -1151,6 +1153,11 @@ static int __dwc3_gadget_ep_queue(struct dwc3_ep *dep, struct dwc3_request *req)
>>>> if (!dwc3_calc_trbs_left(dep))
>>>> return 0;
>>>>
>>>> + if (dep->flags & DWC3_EP_END_TRANSFER_PENDING) {
>>>> + dep->flags &= DWC3_EP_KICK_POST_END_TRANSFER;
>>>> + return 0;
>>>> + }
>>>> +
>>>> ret = __dwc3_gadget_kick_transfer(dep, 0);
>>>> if (ret && ret != -EBUSY)
>>>> dwc3_trace(trace_dwc3_gadget,
>>>> @@ -2156,6 +2163,7 @@ static void dwc3_endpoint_interrupt(struct dwc3 *dwc,
>>>> {
>>>> struct dwc3_ep *dep;
>>>> u8 epnum = event->endpoint_number;
>>>> + u8 cmd;
>>>>
>>>> dep = dwc->eps[epnum];
>>>>
>>>> @@ -2200,8 +2208,13 @@ static void dwc3_endpoint_interrupt(struct dwc3 *dwc,
>>>> return;
>>>> }
>>>> break;
>>>> - case DWC3_DEPEVT_RXTXFIFOEVT:
>>>> case DWC3_DEPEVT_EPCMDCMPLT:
>>>> + cmd = DEPEVT_PARAMETER_CMD(event->parameters);
>>>> +
>>>> + if (cmd == DWC3_DEPCMD_ENDTRANSFER)
>>>> + dep->flags &= ~DWC3_EP_END_TRANSFER_PENDING;
>>>
>>> I think you missed wake_up(&dep->wait_end_transfer) function here.
>>>
>>>> + break;
>>>> + case DWC3_DEPEVT_RXTXFIFOEVT:
>>>> break;
>>>> }
>>>> }
>>>> @@ -2246,6 +2259,7 @@ static void dwc3_reset_gadget(struct dwc3 *dwc)
>>>> }
>>>>
>>>> static void dwc3_stop_active_transfer(struct dwc3 *dwc, u32 epnum, bool force)
>>>> +__releases(&dwc->lock) __acquires(&dwc->lock)
>>>> {
>>>> struct dwc3_ep *dep;
>>>> struct dwc3_gadget_ep_cmd_params params;
>>>> @@ -2295,11 +2309,22 @@ static void dwc3_stop_active_transfer(struct dwc3 *dwc, u32 epnum, bool force)
>>>> memset(&params, 0, sizeof(params));
>>>> ret = dwc3_send_gadget_ep_cmd(dep, cmd, &params);
>>>> WARN_ON_ONCE(ret);
>>>> +
>>>> + if (dwc3_is_usb31(dwc) || dwc->revision < DWC3_REVISION_310A) {
>>>> + dep->flags |= DWC3_EP_END_TRANSFER_PENDING;
>>>> + wait_event_lock_irq(dep->wait_end_transfer,
>>>> + !(dep->flags & DWC3_EP_END_TRANSFER_PENDING),
>>>> + dwc->lock);
>>>> + }
>>>
>>> I've tested your patch, unfortunately it can not work on my platform.
>>> Since we can not issue wait_event_lock_irq() function in atomic
>>> context, we will hold another 'cdev->lock' in composite_disconnect()
>>> function. The dump stack as below:
>>
>> argh, we have nested spinlocks :-( Well, we shouldn't call
>> usb_ep_disable() with locks held AFAICR. So the following should be
>> enough:
>>
>> diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
>> index 919d7d1b611c..2e9359c58eb9 100644
>> --- a/drivers/usb/gadget/composite.c
>> +++ b/drivers/usb/gadget/composite.c
>> @@ -732,8 +732,10 @@ static void reset_config(struct usb_composite_dev *cdev)
>> DBG(cdev, "reset config\n");
>>
>> list_for_each_entry(f, &cdev->config->functions, list) {
>> + spin_unlock_irq(&cdev->lock);
>> if (f->disable)
>> f->disable(f);
>> + spin_lock_irq(&cdev->lock);
>>
>> bitmap_zero(f->endpoints, 32);
>> }
>
> After applying above modification, there are no obvious errors, but I
> still met problem: I can not change usb function with configfs. I need
> some time to find out why.

I'm also looking into this and testing it out here on SKL.

--
balbi


Attachments:
signature.asc (800.00 B)

2016-10-14 14:36:35

by Alan Stern

[permalink] [raw]
Subject: Re: [PATCH v2] usb: dwc3: gadget: Wait for end transfer complete before free irq

On Fri, 14 Oct 2016, Felipe Balbi wrote:

> argh, we have nested spinlocks :-( Well, we shouldn't call
> usb_ep_disable() with locks held AFAICR. So the following should be
> enough:
>
> diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
> index 919d7d1b611c..2e9359c58eb9 100644
> --- a/drivers/usb/gadget/composite.c
> +++ b/drivers/usb/gadget/composite.c
> @@ -732,8 +732,10 @@ static void reset_config(struct usb_composite_dev *cdev)
> DBG(cdev, "reset config\n");
>
> list_for_each_entry(f, &cdev->config->functions, list) {
> + spin_unlock_irq(&cdev->lock);
> if (f->disable)
> f->disable(f);
> + spin_lock_irq(&cdev->lock);
>
> bitmap_zero(f->endpoints, 32);
> }
>
> Alan, do you remember if we have a requirement for not holding locks
> when calling usb_ep_disable()? I can't find Documentation about it, but
> history shows me that usb_ep_disable() was called without locks and IRQs
> enabled. Do you think we should update documentation about this?

I don't think there is any requirement for interrupts to be enabled
when usb_ep_disable() runs. At least, a quick check shows that both
net2280 and dummy-hcd use spin_lock_irqsave() rather than spin_lock()
in their disable routines.

Holding locks is a different story. It should be okay for a gadget
driver to hold one of its own locks when disabling an endpoint (which
means that the gadget's disable routine shouldn't wait for a concurrent
giveback to finish), but we might want to avoid holding a lock in the
composite core. Although even that might be okay -- I can't think of
any reason why a udc driver would need to call back into the composite
core while disabling an endpoint. It should be a pretty self-contained
operation.

Alan Stern

2016-10-17 08:11:48

by Felipe Balbi

[permalink] [raw]
Subject: Re: [PATCH v2] usb: dwc3: gadget: Wait for end transfer complete before free irq


Hi,

(I have added you to another thread which is where we'll be collecting
discussion about this, however ...)

Alan Stern <[email protected]> writes:
> On Fri, 14 Oct 2016, Felipe Balbi wrote:
>
>> argh, we have nested spinlocks :-( Well, we shouldn't call
>> usb_ep_disable() with locks held AFAICR. So the following should be
>> enough:
>>
>> diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
>> index 919d7d1b611c..2e9359c58eb9 100644
>> --- a/drivers/usb/gadget/composite.c
>> +++ b/drivers/usb/gadget/composite.c
>> @@ -732,8 +732,10 @@ static void reset_config(struct usb_composite_dev *cdev)
>> DBG(cdev, "reset config\n");
>>
>> list_for_each_entry(f, &cdev->config->functions, list) {
>> + spin_unlock_irq(&cdev->lock);
>> if (f->disable)
>> f->disable(f);
>> + spin_lock_irq(&cdev->lock);
>>
>> bitmap_zero(f->endpoints, 32);
>> }
>>
>> Alan, do you remember if we have a requirement for not holding locks
>> when calling usb_ep_disable()? I can't find Documentation about it, but
>> history shows me that usb_ep_disable() was called without locks and IRQs
>> enabled. Do you think we should update documentation about this?
>
> I don't think there is any requirement for interrupts to be enabled
> when usb_ep_disable() runs. At least, a quick check shows that both
> net2280 and dummy-hcd use spin_lock_irqsave() rather than spin_lock()
> in their disable routines.
>
> Holding locks is a different story. It should be okay for a gadget
> driver to hold one of its own locks when disabling an endpoint (which
> means that the gadget's disable routine shouldn't wait for a concurrent
> giveback to finish), but we might want to avoid holding a lock in the
> composite core. Although even that might be okay -- I can't think of
> any reason why a udc driver would need to call back into the composite
> core while disabling an endpoint. It should be a pretty self-contained
> operation.

True, but how do we handle controllers which need to wait for an
interrupt in order to cancel a transfer? Maybe we should change the
calling context of usb_ep_disable() so that it _must_ be called with
IRQs enabled?

--
balbi


Attachments:
signature.asc (800.00 B)