2018-03-20 02:49:33

by Sinan Kaya

[permalink] [raw]
Subject: [PATCH v4 5/6] IB/nes: Eliminate duplicate barriers on weakly-ordered archs

Code includes barrier() followed by writel(). writel() already has a
barrier on some architectures like arm64.

This ends up CPU observing two barriers back to back before executing the
register write.

Create a new wrapper function with relaxed write operator. Use the new
wrapper when a write is following a barrier().

Signed-off-by: Sinan Kaya <[email protected]>
---
drivers/infiniband/hw/nes/nes.h | 5 +++++
drivers/infiniband/hw/nes/nes_hw.c | 21 ++++++++++++++-------
drivers/infiniband/hw/nes/nes_mgt.c | 15 ++++++++++-----
drivers/infiniband/hw/nes/nes_nic.c | 2 +-
drivers/infiniband/hw/nes/nes_utils.c | 3 ++-
drivers/infiniband/hw/nes/nes_verbs.c | 5 +++--
6 files changed, 35 insertions(+), 16 deletions(-)

diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h
index 00c27291..85e007d 100644
--- a/drivers/infiniband/hw/nes/nes.h
+++ b/drivers/infiniband/hw/nes/nes.h
@@ -387,6 +387,11 @@ static inline void nes_write_indexed(struct nes_device *nesdev, u32 reg_index, u
spin_unlock_irqrestore(&nesdev->indexed_regs_lock, flags);
}

+static inline void nes_write32_relaxed(void __iomem *addr, u32 val)
+{
+ writel_relaxed(val, addr);
+}
+
static inline void nes_write32(void __iomem *addr, u32 val)
{
writel(val, addr);
diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index 18a7de1..568e17d 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -1257,7 +1257,8 @@ int nes_destroy_cqp(struct nes_device *nesdev)

barrier();
/* Ring doorbell (5 WQEs) */
- nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x05800000 | nesdev->cqp.qp_id);
+ nes_write32_relaxed(nesdev->regs+NES_WQE_ALLOC,
+ 0x05800000 | nesdev->cqp.qp_id);

spin_unlock_irqrestore(&nesdev->cqp.lock, flags);

@@ -1594,7 +1595,8 @@ static void nes_replenish_nic_rq(struct nes_vnic *nesvnic)
atomic_dec(&nesvnic->rx_skbs_needed);
barrier();
if (++rx_wqes_posted == 255) {
- nes_write32(nesdev->regs+NES_WQE_ALLOC, (rx_wqes_posted << 24) | nesnic->qp_id);
+ nes_write32_relaxed(nesdev->regs+NES_WQE_ALLOC,
+ (rx_wqes_posted << 24) | nesnic->qp_id);
rx_wqes_posted = 0;
}
} else {
@@ -1612,7 +1614,8 @@ static void nes_replenish_nic_rq(struct nes_vnic *nesvnic)
} while (atomic_read(&nesvnic->rx_skbs_needed));
barrier();
if (rx_wqes_posted)
- nes_write32(nesdev->regs+NES_WQE_ALLOC, (rx_wqes_posted << 24) | nesnic->qp_id);
+ nes_write32_relaxed(nesdev->regs+NES_WQE_ALLOC,
+ (rx_wqes_posted << 24) | nesnic->qp_id);
nesnic->replenishing_rq = 0;
}

@@ -1795,7 +1798,8 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev)
barrier();

/* Ring doorbell (2 WQEs) */
- nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x02800000 | nesdev->cqp.qp_id);
+ nes_write32_relaxed(nesdev->regs+NES_WQE_ALLOC,
+ 0x02800000 | nesdev->cqp.qp_id);

spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
nes_debug(NES_DBG_INIT, "Waiting for create NIC QP%u to complete.\n",
@@ -1844,7 +1848,8 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev)
do {
counter = min(wqe_count, ((u32)255));
wqe_count -= counter;
- nes_write32(nesdev->regs+NES_WQE_ALLOC, (counter << 24) | nesvnic->nic.qp_id);
+ nes_write32_relaxed(nesdev->regs+NES_WQE_ALLOC,
+ (counter << 24) | nesvnic->nic.qp_id);
} while (wqe_count);
timer_setup(&nesvnic->rq_wqes_timer, nes_rq_wqes_timeout, 0);
nes_debug(NES_DBG_INIT, "NAPI support Enabled\n");
@@ -1988,7 +1993,8 @@ void nes_destroy_nic_qp(struct nes_vnic *nesvnic)
barrier();

/* Ring doorbell (2 WQEs) */
- nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x02800000 | nesdev->cqp.qp_id);
+ nes_write32_relaxed(nesdev->regs+NES_WQE_ALLOC,
+ 0x02800000 | nesdev->cqp.qp_id);

spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
nes_debug(NES_DBG_SHUTDOWN, "Waiting for CQP, cqp_head=%u, cqp.sq_head=%u,"
@@ -3064,7 +3070,8 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq)
cqp_request, le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX])&0x3f, head);
/* Ring doorbell (1 WQEs) */
barrier();
- nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x01800000 | nesdev->cqp.qp_id);
+ nes_write32_relaxed(nesdev->regs+NES_WQE_ALLOC,
+ 0x01800000 | nesdev->cqp.qp_id);
}
spin_unlock_irqrestore(&nesdev->cqp.lock, flags);

diff --git a/drivers/infiniband/hw/nes/nes_mgt.c b/drivers/infiniband/hw/nes/nes_mgt.c
index 21e0ebd..5c5073c 100644
--- a/drivers/infiniband/hw/nes/nes_mgt.c
+++ b/drivers/infiniband/hw/nes/nes_mgt.c
@@ -96,7 +96,8 @@ static void nes_replenish_mgt_rq(struct nes_vnic_mgt *mgtvnic)
atomic_dec(&mgtvnic->rx_skbs_needed);
barrier();
if (++rx_wqes_posted == 255) {
- nes_write32(nesdev->regs + NES_WQE_ALLOC, (rx_wqes_posted << 24) | nesmgt->qp_id);
+ nes_write32_relaxed(nesdev->regs + NES_WQE_ALLOC,
+ (rx_wqes_posted << 24) | nesmgt->qp_id);
rx_wqes_posted = 0;
}
} else {
@@ -115,7 +116,8 @@ static void nes_replenish_mgt_rq(struct nes_vnic_mgt *mgtvnic)
} while (atomic_read(&mgtvnic->rx_skbs_needed));
barrier();
if (rx_wqes_posted)
- nes_write32(nesdev->regs + NES_WQE_ALLOC, (rx_wqes_posted << 24) | nesmgt->qp_id);
+ nes_write32_relaxed(nesdev->regs + NES_WQE_ALLOC,
+ (rx_wqes_posted << 24) | nesmgt->qp_id);
nesmgt->replenishing_rq = 0;
}

@@ -995,7 +997,8 @@ int nes_init_mgt_qp(struct nes_device *nesdev, struct net_device *netdev, struct
barrier();

/* Ring doorbell (2 WQEs) */
- nes_write32(nesdev->regs + NES_WQE_ALLOC, 0x02800000 | nesdev->cqp.qp_id);
+ nes_write32_relaxed(nesdev->regs + NES_WQE_ALLOC,
+ 0x02800000 | nesdev->cqp.qp_id);

spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
nes_debug(NES_DBG_INIT, "Waiting for create MGT QP%u to complete.\n",
@@ -1050,7 +1053,8 @@ int nes_init_mgt_qp(struct nes_device *nesdev, struct net_device *netdev, struct
do {
counter = min(wqe_count, ((u32)255));
wqe_count -= counter;
- nes_write32(nesdev->regs + NES_WQE_ALLOC, (counter << 24) | mgtvnic->mgt.qp_id);
+ nes_write32_relaxed(nesdev->regs + NES_WQE_ALLOC,
+ (counter << 24) | mgtvnic->mgt.qp_id);
} while (wqe_count);

nes_write32(nesdev->regs + NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT |
@@ -1124,7 +1128,8 @@ void nes_destroy_mgt(struct nes_vnic *nesvnic)
barrier();

/* Ring doorbell (2 WQEs) */
- nes_write32(nesdev->regs + NES_WQE_ALLOC, 0x02800000 | nesdev->cqp.qp_id);
+ nes_write32_relaxed(nesdev->regs + NES_WQE_ALLOC,
+ 0x02800000 | nesdev->cqp.qp_id);

spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
nes_debug(NES_DBG_SHUTDOWN, "Waiting for CQP, cqp_head=%u, cqp.sq_head=%u,"
diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c
index 0a75164..0653da0 100644
--- a/drivers/infiniband/hw/nes/nes_nic.c
+++ b/drivers/infiniband/hw/nes/nes_nic.c
@@ -683,7 +683,7 @@ static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev)
barrier();

if (wqe_count)
- nes_write32(nesdev->regs+NES_WQE_ALLOC,
+ nes_write32_relaxed(nesdev->regs+NES_WQE_ALLOC,
(wqe_count << 24) | (1 << 23) | nesvnic->nic.qp_id);

netif_trans_update(netdev);
diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c
index 21b4a83..79a3d98 100644
--- a/drivers/infiniband/hw/nes/nes_utils.c
+++ b/drivers/infiniband/hw/nes/nes_utils.c
@@ -661,7 +661,8 @@ void nes_post_cqp_request(struct nes_device *nesdev,
barrier();

/* Ring doorbell (1 WQEs) */
- nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x01800000 | nesdev->cqp.qp_id);
+ nes_write32_relaxed(nesdev->regs+NES_WQE_ALLOC,
+ 0x01800000 | nesdev->cqp.qp_id);

barrier();
} else {
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 162475a..87f8635 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -3310,7 +3310,7 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
while (wqe_count) {
counter = min(wqe_count, ((u32)255));
wqe_count -= counter;
- nes_write32(nesdev->regs + NES_WQE_ALLOC,
+ nes_write32_relaxed(nesdev->regs + NES_WQE_ALLOC,
(counter << 24) | 0x00800000 | nesqp->hwqp.qp_id);
}

@@ -3404,7 +3404,8 @@ static int nes_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
while (wqe_count) {
counter = min(wqe_count, ((u32)255));
wqe_count -= counter;
- nes_write32(nesdev->regs+NES_WQE_ALLOC, (counter<<24) | nesqp->hwqp.qp_id);
+ nes_write32_relaxed(nesdev->regs+NES_WQE_ALLOC,
+ (counter<<24) | nesqp->hwqp.qp_id);
}

spin_unlock_irqrestore(&nesqp->lock, flags);
--
2.7.4



2018-03-20 14:55:37

by Jason Gunthorpe

[permalink] [raw]
Subject: Re: [PATCH v4 5/6] IB/nes: Eliminate duplicate barriers on weakly-ordered archs

On Mon, Mar 19, 2018 at 10:47:47PM -0400, Sinan Kaya wrote:
> Code includes barrier() followed by writel(). writel() already has a
> barrier on some architectures like arm64.
>
> This ends up CPU observing two barriers back to back before executing the
> register write.
>
> Create a new wrapper function with relaxed write operator. Use the new
> wrapper when a write is following a barrier().
>
> Signed-off-by: Sinan Kaya <[email protected]>
> drivers/infiniband/hw/nes/nes.h | 5 +++++
> drivers/infiniband/hw/nes/nes_hw.c | 21 ++++++++++++++-------
> drivers/infiniband/hw/nes/nes_mgt.c | 15 ++++++++++-----
> drivers/infiniband/hw/nes/nes_nic.c | 2 +-
> drivers/infiniband/hw/nes/nes_utils.c | 3 ++-
> drivers/infiniband/hw/nes/nes_verbs.c | 5 +++--
> 6 files changed, 35 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h
> index 00c27291..85e007d 100644
> +++ b/drivers/infiniband/hw/nes/nes.h
> @@ -387,6 +387,11 @@ static inline void nes_write_indexed(struct nes_device *nesdev, u32 reg_index, u
> spin_unlock_irqrestore(&nesdev->indexed_regs_lock, flags);
> }
>
> +static inline void nes_write32_relaxed(void __iomem *addr, u32 val)
> +{
> + writel_relaxed(val, addr);
> +}

This wrapper is pointless, let us not add more..

> static inline void nes_write32(void __iomem *addr, u32 val)
> {
> writel(val, addr);
> diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
> index 18a7de1..568e17d 100644
> +++ b/drivers/infiniband/hw/nes/nes_hw.c
> @@ -1257,7 +1257,8 @@ int nes_destroy_cqp(struct nes_device *nesdev)
>
> barrier();
> /* Ring doorbell (5 WQEs) */
> - nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x05800000 | nesdev->cqp.qp_id);
> + nes_write32_relaxed(nesdev->regs+NES_WQE_ALLOC,
> + 0x05800000 | nesdev->cqp.qp_id);

barrier() is not strong enough to order writel, so this doesn't seem
right?

It is probably noteven strong enough for what this driver thinks it is
doing.. This driver is essentially dead and broken, probably just
don't change it.

Jason

2018-03-20 15:24:58

by Sinan Kaya

[permalink] [raw]
Subject: Re: [PATCH v4 5/6] IB/nes: Eliminate duplicate barriers on weakly-ordered archs

On 3/20/2018 9:54 AM, Jason Gunthorpe wrote:
> On Mon, Mar 19, 2018 at 10:47:47PM -0400, Sinan Kaya wrote:
>> Code includes barrier() followed by writel(). writel() already has a
>> barrier on some architectures like arm64.
>>
>> This ends up CPU observing two barriers back to back before executing the
>> register write.
>>
>> Create a new wrapper function with relaxed write operator. Use the new
>> wrapper when a write is following a barrier().
>>
>> Signed-off-by: Sinan Kaya <[email protected]>
>> drivers/infiniband/hw/nes/nes.h | 5 +++++
>> drivers/infiniband/hw/nes/nes_hw.c | 21 ++++++++++++++-------
>> drivers/infiniband/hw/nes/nes_mgt.c | 15 ++++++++++-----
>> drivers/infiniband/hw/nes/nes_nic.c | 2 +-
>> drivers/infiniband/hw/nes/nes_utils.c | 3 ++-
>> drivers/infiniband/hw/nes/nes_verbs.c | 5 +++--
>> 6 files changed, 35 insertions(+), 16 deletions(-)
>>
>> diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h
>> index 00c27291..85e007d 100644
>> +++ b/drivers/infiniband/hw/nes/nes.h
>> @@ -387,6 +387,11 @@ static inline void nes_write_indexed(struct nes_device *nesdev, u32 reg_index, u
>> spin_unlock_irqrestore(&nesdev->indexed_regs_lock, flags);
>> }
>>
>> +static inline void nes_write32_relaxed(void __iomem *addr, u32 val)
>> +{
>> + writel_relaxed(val, addr);
>> +}
>
> This wrapper is pointless, let us not add more..
>
>> static inline void nes_write32(void __iomem *addr, u32 val)
>> {
>> writel(val, addr);
>> diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
>> index 18a7de1..568e17d 100644
>> +++ b/drivers/infiniband/hw/nes/nes_hw.c
>> @@ -1257,7 +1257,8 @@ int nes_destroy_cqp(struct nes_device *nesdev)
>>
>> barrier();
>> /* Ring doorbell (5 WQEs) */
>> - nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x05800000 | nesdev->cqp.qp_id);
>> + nes_write32_relaxed(nesdev->regs+NES_WQE_ALLOC,
>> + 0x05800000 | nesdev->cqp.qp_id);
>
> barrier() is not strong enough to order writel, so this doesn't seem
> right?
>
> It is probably noteven strong enough for what this driver thinks it is
> doing.. This driver is essentially dead and broken, probably just
> don't change it.

Just for the sake of other changes in netdev directory and my education...

barrier() on ARM is a wmb(). It should be a compiler barrier on intel.

You are saying barrier() should have been a wmb(), right?

I have gone through similar exercise on netdev directory and changed

barrier()
writel()

to

barrier()
writel_relaxed()

Do you see any problem with this?

If the goal is to make memory changes observable to the hardware, it should
have been, right not barrier()?

wmb()
writel_relaxed()

>
> Jason
>


--
Sinan Kaya
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies, Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project.

2018-03-20 16:03:57

by Jason Gunthorpe

[permalink] [raw]
Subject: Re: [PATCH v4 5/6] IB/nes: Eliminate duplicate barriers on weakly-ordered archs

On Tue, Mar 20, 2018 at 10:23:16AM -0500, Sinan Kaya wrote:
> On 3/20/2018 9:54 AM, Jason Gunthorpe wrote:
> > On Mon, Mar 19, 2018 at 10:47:47PM -0400, Sinan Kaya wrote:
> >> Code includes barrier() followed by writel(). writel() already has a
> >> barrier on some architectures like arm64.
> >>
> >> This ends up CPU observing two barriers back to back before executing the
> >> register write.
> >>
> >> Create a new wrapper function with relaxed write operator. Use the new
> >> wrapper when a write is following a barrier().
> >>
> >> Signed-off-by: Sinan Kaya <[email protected]>
> >> drivers/infiniband/hw/nes/nes.h | 5 +++++
> >> drivers/infiniband/hw/nes/nes_hw.c | 21 ++++++++++++++-------
> >> drivers/infiniband/hw/nes/nes_mgt.c | 15 ++++++++++-----
> >> drivers/infiniband/hw/nes/nes_nic.c | 2 +-
> >> drivers/infiniband/hw/nes/nes_utils.c | 3 ++-
> >> drivers/infiniband/hw/nes/nes_verbs.c | 5 +++--
> >> 6 files changed, 35 insertions(+), 16 deletions(-)
> >>
> >> diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h
> >> index 00c27291..85e007d 100644
> >> +++ b/drivers/infiniband/hw/nes/nes.h
> >> @@ -387,6 +387,11 @@ static inline void nes_write_indexed(struct nes_device *nesdev, u32 reg_index, u
> >> spin_unlock_irqrestore(&nesdev->indexed_regs_lock, flags);
> >> }
> >>
> >> +static inline void nes_write32_relaxed(void __iomem *addr, u32 val)
> >> +{
> >> + writel_relaxed(val, addr);
> >> +}
> >
> > This wrapper is pointless, let us not add more..
> >
> >> static inline void nes_write32(void __iomem *addr, u32 val)
> >> {
> >> writel(val, addr);
> >> diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
> >> index 18a7de1..568e17d 100644
> >> +++ b/drivers/infiniband/hw/nes/nes_hw.c
> >> @@ -1257,7 +1257,8 @@ int nes_destroy_cqp(struct nes_device *nesdev)
> >>
> >> barrier();
> >> /* Ring doorbell (5 WQEs) */
> >> - nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x05800000 | nesdev->cqp.qp_id);
> >> + nes_write32_relaxed(nesdev->regs+NES_WQE_ALLOC,
> >> + 0x05800000 | nesdev->cqp.qp_id);
> >
> > barrier() is not strong enough to order writel, so this doesn't seem
> > right?
> >
> > It is probably noteven strong enough for what this driver thinks it is
> > doing.. This driver is essentially dead and broken, probably just
> > don't change it.
>
> Just for the sake of other changes in netdev directory and my education...
>
> barrier() on ARM is a wmb(). It should be a compiler barrier on intel.
>
> You are saying barrier() should have been a wmb(), right?

Yes, that is my understanding.. barrier() is supposed to be a very
weak barrier that just ensures the CPU is locally consistent with
itself. It doesn't say anything about DMA access, or SMP cases.

I don't think it is supposed to order anything related to
writel_relaxed()

> I have gone through similar exercise on netdev directory and changed
>
> barrier()
> writel()
>
> to
>
> barrier()
> writel_relaxed()
>
> Do you see any problem with this?

Seems dangerous as a mechanical change to me, it really depends on why
the driver author put barrier() there.

In this case, I strongly suspect nes really intended to say wmb()

Jason

2018-03-20 16:10:54

by Sinan Kaya

[permalink] [raw]
Subject: Re: [PATCH v4 5/6] IB/nes: Eliminate duplicate barriers on weakly-ordered archs

On 3/20/2018 11:01 AM, Jason Gunthorpe wrote:
> On Tue, Mar 20, 2018 at 10:23:16AM -0500, Sinan Kaya wrote:
>> On 3/20/2018 9:54 AM, Jason Gunthorpe wrote:
>>> On Mon, Mar 19, 2018 at 10:47:47PM -0400, Sinan Kaya wrote:
>>>> Code includes barrier() followed by writel(). writel() already has a
>>>> barrier on some architectures like arm64.
>>>>
>>>> This ends up CPU observing two barriers back to back before executing the
>>>> register write.
>>>>
>>>> Create a new wrapper function with relaxed write operator. Use the new
>>>> wrapper when a write is following a barrier().
>>>>
>>>> Signed-off-by: Sinan Kaya <[email protected]>
>>>> drivers/infiniband/hw/nes/nes.h | 5 +++++
>>>> drivers/infiniband/hw/nes/nes_hw.c | 21 ++++++++++++++-------
>>>> drivers/infiniband/hw/nes/nes_mgt.c | 15 ++++++++++-----
>>>> drivers/infiniband/hw/nes/nes_nic.c | 2 +-
>>>> drivers/infiniband/hw/nes/nes_utils.c | 3 ++-
>>>> drivers/infiniband/hw/nes/nes_verbs.c | 5 +++--
>>>> 6 files changed, 35 insertions(+), 16 deletions(-)
>>>>
>>>> diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h
>>>> index 00c27291..85e007d 100644
>>>> +++ b/drivers/infiniband/hw/nes/nes.h
>>>> @@ -387,6 +387,11 @@ static inline void nes_write_indexed(struct nes_device *nesdev, u32 reg_index, u
>>>> spin_unlock_irqrestore(&nesdev->indexed_regs_lock, flags);
>>>> }
>>>>
>>>> +static inline void nes_write32_relaxed(void __iomem *addr, u32 val)
>>>> +{
>>>> + writel_relaxed(val, addr);
>>>> +}
>>>
>>> This wrapper is pointless, let us not add more..
>>>
>>>> static inline void nes_write32(void __iomem *addr, u32 val)
>>>> {
>>>> writel(val, addr);
>>>> diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
>>>> index 18a7de1..568e17d 100644
>>>> +++ b/drivers/infiniband/hw/nes/nes_hw.c
>>>> @@ -1257,7 +1257,8 @@ int nes_destroy_cqp(struct nes_device *nesdev)
>>>>
>>>> barrier();
>>>> /* Ring doorbell (5 WQEs) */
>>>> - nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x05800000 | nesdev->cqp.qp_id);
>>>> + nes_write32_relaxed(nesdev->regs+NES_WQE_ALLOC,
>>>> + 0x05800000 | nesdev->cqp.qp_id);
>>>
>>> barrier() is not strong enough to order writel, so this doesn't seem
>>> right?
>>>
>>> It is probably noteven strong enough for what this driver thinks it is
>>> doing.. This driver is essentially dead and broken, probably just
>>> don't change it.
>>
>> Just for the sake of other changes in netdev directory and my education...
>>
>> barrier() on ARM is a wmb(). It should be a compiler barrier on intel.
>>
>> You are saying barrier() should have been a wmb(), right?
>
> Yes, that is my understanding.. barrier() is supposed to be a very
> weak barrier that just ensures the CPU is locally consistent with
> itself. It doesn't say anything about DMA access, or SMP cases.
>
> I don't think it is supposed to order anything related to
> writel_relaxed()
>
>> I have gone through similar exercise on netdev directory and changed
>>
>> barrier()
>> writel()
>>
>> to
>>
>> barrier()
>> writel_relaxed()
>>
>> Do you see any problem with this?
>
> Seems dangerous as a mechanical change to me, it really depends on why
> the driver author put barrier() there.

OK. I'll drop those changes.

>
> In this case, I strongly suspect nes really intended to say wmb()

Should I change barrier() to wmb() or leave it alone? I have no idea if
nes is actively being maintained or if it is EOL.

>
> Jason
>


--
Sinan Kaya
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies, Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project.

2018-03-20 16:31:02

by Jason Gunthorpe

[permalink] [raw]
Subject: Re: [PATCH v4 5/6] IB/nes: Eliminate duplicate barriers on weakly-ordered archs

On Tue, Mar 20, 2018 at 11:08:59AM -0500, Sinan Kaya wrote:

> > In this case, I strongly suspect nes really intended to say wmb()
>
> Should I change barrier() to wmb() or leave it alone? I have no idea if
> nes is actively being maintained or if it is EOL.

It is dead, and the HW doesn't work on 64 bit machines. Just ignore it
is my advice.

Jason