2021-12-04 12:01:03

by Sui Jingfeng

[permalink] [raw]
Subject: [PATCH] mips/loongson64: using __fast_iob implement __wbflush() instead of sync

1) loongson's cpu(ls3a3000, ls3a4000, ls3a5000) have uncache store buffers
which is for uncache accleration.

Uncached Accelerated is the name under which the R10000 introduced
a cache mode that uses the CPU's write buffer to combine writes
but that otherwise is uncached.

wbflush is mean to empty data gathered in the uncache store buffers
within the CPU.

2) The SYNC instruction in R10000

A SYNC instruction is not prevented from graduating if the uncached
buffer contains any uncached accelerated stores[1].

3) wbflush() implementation of IDT CPU.

IDT CPUs enforce strict write priority (all pending writes retired
to memory before main memory is read). Thus, implementing wbflush()
is as simple as implementing an uncached load.

for loongson's cpu, __wbflush should also be implemented with
__fast_iob not sync.

[1] https://www.ele.uva.es/~jesman/BigSeti/ftp/Microprocesadores/MIPS/t5.ver.2.0.book.pdf

Signed-off-by: suijingfeng <[email protected]>
---
arch/mips/loongson64/Makefile | 1 +
arch/mips/loongson64/setup.c | 17 -----------------
arch/mips/loongson64/smp.c | 6 +++---
arch/mips/loongson64/wbflush.c | 28 ++++++++++++++++++++++++++++
4 files changed, 32 insertions(+), 20 deletions(-)
create mode 100644 arch/mips/loongson64/wbflush.c

diff --git a/arch/mips/loongson64/Makefile b/arch/mips/loongson64/Makefile
index e806280bbb85..ad00d92c2871 100644
--- a/arch/mips/loongson64/Makefile
+++ b/arch/mips/loongson64/Makefile
@@ -12,3 +12,4 @@ obj-$(CONFIG_SUSPEND) += pm.o
obj-$(CONFIG_PCI_QUIRKS) += vbios_quirk.o
obj-$(CONFIG_CPU_LOONGSON3_CPUCFG_EMULATION) += cpucfg-emul.o
obj-$(CONFIG_SYSFS) += boardinfo.o
+obj-$(CONFIG_CPU_HAS_WB) += wbflush.o
diff --git a/arch/mips/loongson64/setup.c b/arch/mips/loongson64/setup.c
index 6fe3ffffcaa6..cb10d14da433 100644
--- a/arch/mips/loongson64/setup.c
+++ b/arch/mips/loongson64/setup.c
@@ -3,10 +3,7 @@
* Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology
* Author: Fuxin Zhang, [email protected]
*/
-#include <linux/export.h>
#include <linux/init.h>
-
-#include <asm/wbflush.h>
#include <asm/bootinfo.h>
#include <linux/libfdt.h>
#include <linux/of_fdt.h>
@@ -17,20 +14,6 @@

void *loongson_fdt_blob;

-static void wbflush_loongson(void)
-{
- asm(".set\tpush\n\t"
- ".set\tnoreorder\n\t"
- ".set mips3\n\t"
- "sync\n\t"
- "nop\n\t"
- ".set\tpop\n\t"
- ".set mips0\n\t");
-}
-
-void (*__wbflush)(void) = wbflush_loongson;
-EXPORT_SYMBOL(__wbflush);
-
void __init plat_mem_setup(void)
{
if (loongson_fdt_blob)
diff --git a/arch/mips/loongson64/smp.c b/arch/mips/loongson64/smp.c
index 660e1de4412a..0d9f249c95f9 100644
--- a/arch/mips/loongson64/smp.c
+++ b/arch/mips/loongson64/smp.c
@@ -42,13 +42,13 @@ static uint32_t core0_c0count[NR_CPUS];
#define loongson3_ipi_write32(action, addr) \
do { \
writel(action, addr); \
- __wbflush(); \
+ wbflush(); \
} while (0)
/* write a 64bit value to ipi register */
#define loongson3_ipi_write64(action, addr) \
do { \
writeq(action, addr); \
- __wbflush(); \
+ wbflush(); \
} while (0)

static u32 (*ipi_read_clear)(int cpu);
@@ -418,7 +418,7 @@ static irqreturn_t loongson3_ipi_interrupt(int irq, void *dev_id)
c0count = c0count ? c0count : 1;
for (i = 1; i < nr_cpu_ids; i++)
core0_c0count[i] = c0count;
- __wbflush(); /* Let others see the result ASAP */
+ wbflush(); /* Let others see the result ASAP */
}

return IRQ_HANDLED;
diff --git a/arch/mips/loongson64/wbflush.c b/arch/mips/loongson64/wbflush.c
new file mode 100644
index 000000000000..49f0e4c53196
--- /dev/null
+++ b/arch/mips/loongson64/wbflush.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2021 [email protected]
+ */
+#include <linux/export.h>
+#include <linux/init.h>
+#include <asm/wbflush.h>
+#include <asm/barrier.h>
+
+#ifdef CONFIG_CPU_HAS_WB
+
+/*
+ * I/O ASIC systems use a standard writeback buffer that gets flushed
+ * upon an uncached read.
+ */
+static void wbflush_mips(void)
+{
+ __fast_iob();
+}
+
+void (*__wbflush)(void) = wbflush_mips;
+EXPORT_SYMBOL(__wbflush);
+
+#endif
--
2.25.1



2021-12-04 12:33:01

by Jiaxun Yang

[permalink] [raw]
Subject: Re: [PATCH] mips/loongson64: using __fast_iob implement __wbflush() instead of sync

Hi Jingfeng,
I'd suggest you not to mess with barriers on Loongson.
It's a hell.

Also Loongson had changed semantics of sync/synci many times.
They got redefined and swapped. So the present way is just the safest way.

Thanks.

- Jiaxun

在2021年12月4日十二月 下午12:00,suijingfeng写道:
> 1) loongson's cpu(ls3a3000, ls3a4000, ls3a5000) have uncache store buffers
> which is for uncache accleration.
>
> Uncached Accelerated is the name under which the R10000 introduced
> a cache mode that uses the CPU's write buffer to combine writes
> but that otherwise is uncached.
>
> wbflush is mean to empty data gathered in the uncache store buffers
> within the CPU.
>
> 2) The SYNC instruction in R10000
>
> A SYNC instruction is not prevented from graduating if the uncached
> buffer contains any uncached accelerated stores[1].
>
> 3) wbflush() implementation of IDT CPU.
>
> IDT CPUs enforce strict write priority (all pending writes retired
> to memory before main memory is read). Thus, implementing wbflush()
> is as simple as implementing an uncached load.
>
> for loongson's cpu, __wbflush should also be implemented with
> __fast_iob not sync.
>
> [1]
> https://www.ele.uva.es/~jesman/BigSeti/ftp/Microprocesadores/MIPS/t5.ver.2.0.book.pdf
>
> Signed-off-by: suijingfeng <[email protected]>
> ---
> arch/mips/loongson64/Makefile | 1 +
> arch/mips/loongson64/setup.c | 17 -----------------
> arch/mips/loongson64/smp.c | 6 +++---
> arch/mips/loongson64/wbflush.c | 28 ++++++++++++++++++++++++++++
> 4 files changed, 32 insertions(+), 20 deletions(-)
> create mode 100644 arch/mips/loongson64/wbflush.c
>
> diff --git a/arch/mips/loongson64/Makefile b/arch/mips/loongson64/Makefile
> index e806280bbb85..ad00d92c2871 100644
> --- a/arch/mips/loongson64/Makefile
> +++ b/arch/mips/loongson64/Makefile
> @@ -12,3 +12,4 @@ obj-$(CONFIG_SUSPEND) += pm.o
> obj-$(CONFIG_PCI_QUIRKS) += vbios_quirk.o
> obj-$(CONFIG_CPU_LOONGSON3_CPUCFG_EMULATION) += cpucfg-emul.o
> obj-$(CONFIG_SYSFS) += boardinfo.o
> +obj-$(CONFIG_CPU_HAS_WB) += wbflush.o
> diff --git a/arch/mips/loongson64/setup.c b/arch/mips/loongson64/setup.c
> index 6fe3ffffcaa6..cb10d14da433 100644
> --- a/arch/mips/loongson64/setup.c
> +++ b/arch/mips/loongson64/setup.c
> @@ -3,10 +3,7 @@
> * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology
> * Author: Fuxin Zhang, [email protected]
> */
> -#include <linux/export.h>
> #include <linux/init.h>
> -
> -#include <asm/wbflush.h>
> #include <asm/bootinfo.h>
> #include <linux/libfdt.h>
> #include <linux/of_fdt.h>
> @@ -17,20 +14,6 @@
>
> void *loongson_fdt_blob;
>
> -static void wbflush_loongson(void)
> -{
> - asm(".set\tpush\n\t"
> - ".set\tnoreorder\n\t"
> - ".set mips3\n\t"
> - "sync\n\t"
> - "nop\n\t"
> - ".set\tpop\n\t"
> - ".set mips0\n\t");
> -}
> -
> -void (*__wbflush)(void) = wbflush_loongson;
> -EXPORT_SYMBOL(__wbflush);
> -
> void __init plat_mem_setup(void)
> {
> if (loongson_fdt_blob)
> diff --git a/arch/mips/loongson64/smp.c b/arch/mips/loongson64/smp.c
> index 660e1de4412a..0d9f249c95f9 100644
> --- a/arch/mips/loongson64/smp.c
> +++ b/arch/mips/loongson64/smp.c
> @@ -42,13 +42,13 @@ static uint32_t core0_c0count[NR_CPUS];
> #define loongson3_ipi_write32(action, addr) \
> do { \
> writel(action, addr); \
> - __wbflush(); \
> + wbflush(); \
> } while (0)
> /* write a 64bit value to ipi register */
> #define loongson3_ipi_write64(action, addr) \
> do { \
> writeq(action, addr); \
> - __wbflush(); \
> + wbflush(); \
> } while (0)
>
> static u32 (*ipi_read_clear)(int cpu);
> @@ -418,7 +418,7 @@ static irqreturn_t loongson3_ipi_interrupt(int irq,
> void *dev_id)
> c0count = c0count ? c0count : 1;
> for (i = 1; i < nr_cpu_ids; i++)
> core0_c0count[i] = c0count;
> - __wbflush(); /* Let others see the result ASAP */
> + wbflush(); /* Let others see the result ASAP */
> }
>
> return IRQ_HANDLED;
> diff --git a/arch/mips/loongson64/wbflush.c b/arch/mips/loongson64/wbflush.c
> new file mode 100644
> index 000000000000..49f0e4c53196
> --- /dev/null
> +++ b/arch/mips/loongson64/wbflush.c
> @@ -0,0 +1,28 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * This file is subject to the terms and conditions of the GNU General Public
> + * License. See the file "COPYING" in the main directory of this archive
> + * for more details.
> + *
> + * Copyright (C) 2021 [email protected]
> + */
> +#include <linux/export.h>
> +#include <linux/init.h>
> +#include <asm/wbflush.h>
> +#include <asm/barrier.h>
> +
> +#ifdef CONFIG_CPU_HAS_WB
> +
> +/*
> + * I/O ASIC systems use a standard writeback buffer that gets flushed
> + * upon an uncached read.
> + */
> +static void wbflush_mips(void)
> +{
> + __fast_iob();
> +}
> +
> +void (*__wbflush)(void) = wbflush_mips;
> +EXPORT_SYMBOL(__wbflush);
> +
> +#endif
> --
> 2.25.1

--
- Jiaxun

2021-12-05 04:43:41

by Sui Jingfeng

[permalink] [raw]
Subject: Re: Re: [PATCH] mips/loongson64: using __fast_iob implement __wbflush() instead of sync

Hi Jiaxun,


I'm not toward to loongson cpu's barriers, but the semantic of wbflush().

And it is not necessary the safest way:


before applying this patch, wbflush() will be expanded to double __sync(),

if __sync() can empty the uncached store buffer, single one is enough.

After apply this patch, wbflush() will be expanded to a __sync() followed by a __fast_iob();

this is more safe than your's version which blindly copy code from loongson2ef/common/setup.c.

Note, Uncached Accelerated cache mode is only implemented after ls3a2000.


See Mips Run Say:

Most write queues can be emptied out by performing an uncached store to

any location and then performing an operation that reads the same data back.

Put a sync instruction between the write and the read, and that should be effective

on any system compliant with MIPS32/64.

A write queue certainly can't permit the read to overtake the write, it would return stale data.

This is effective, but not necessarily efficient; you can minimize the overhead

by loading from the fastest memory available. Perhaps your system offers

something system-specific but faster.


Again, wbflush is mean to empty data gathered in the uncache store buffers within the CPU.

__wbflush() is only mean to be used internally, wbflush() should be used outside of wbflush.h.

by separate __wbflush out of setup.c, the code is more modularity. it allow you to deselect

CPU_HAS_WB easily and say what will happen.


Actually nothing strange is happen, because uncached accelerated cache mode does not

get used in upstream kernel.


&gt; -----Original Messages-----
&gt; From: "Jiaxun Yang" <[email protected]>
&gt; Sent Time: 2021-12-04 20:32:37 (Saturday)
&gt; To: suijingfeng <[email protected]>, "Huacai Chen" <[email protected]>, "Thomas Bogendoerfer" <[email protected]>
&gt; Cc: "[email protected]" <[email protected]>, [email protected]
&gt; Subject: Re: [PATCH] mips/loongson64: using __fast_iob implement __wbflush() instead of sync
&gt;
&gt; Hi Jingfeng,
&gt; I'd suggest you not to mess with barriers on Loongson.
&gt; It's a hell.
&gt;
&gt; Also Loongson had changed semantics of sync/synci many times.
&gt; They got redefined and swapped. So the present way is just the safest way.
&gt;
&gt; Thanks.
&gt;
&gt; - Jiaxun
&gt;
&gt; 在2021年12月4日十二月 下午12:00,suijingfeng写道:
&gt; &gt; 1) loongson's cpu(ls3a3000, ls3a4000, ls3a5000) have uncache store buffers
&gt; &gt; which is for uncache accleration.
&gt; &gt;
&gt; &gt; Uncached Accelerated is the name under which the R10000 introduced
&gt; &gt; a cache mode that uses the CPU's write buffer to combine writes
&gt; &gt; but that otherwise is uncached.
&gt; &gt;
&gt; &gt; wbflush is mean to empty data gathered in the uncache store buffers
&gt; &gt; within the CPU.
&gt; &gt;
&gt; &gt; 2) The SYNC instruction in R10000
&gt; &gt;
&gt; &gt; A SYNC instruction is not prevented from graduating if the uncached
&gt; &gt; buffer contains any uncached accelerated stores[1].
&gt; &gt;
&gt; &gt; 3) wbflush() implementation of IDT CPU.
&gt; &gt;
&gt; &gt; IDT CPUs enforce strict write priority (all pending writes retired
&gt; &gt; to memory before main memory is read). Thus, implementing wbflush()
&gt; &gt; is as simple as implementing an uncached load.
&gt; &gt;
&gt; &gt; for loongson's cpu, __wbflush should also be implemented with
&gt; &gt; __fast_iob not sync.
&gt; &gt;
&gt; &gt;
&gt; &gt; Signed-off-by: suijingfeng <[email protected]>
&gt; &gt; ---
&gt; &gt; arch/mips/loongson64/Makefile | 1 +
&gt; &gt; arch/mips/loongson64/setup.c | 17 -----------------
&gt; &gt; arch/mips/loongson64/smp.c | 6 +++---
&gt; &gt; arch/mips/loongson64/wbflush.c | 28 ++++++++++++++++++++++++++++
&gt; &gt; 4 files changed, 32 insertions(+), 20 deletions(-)
&gt; &gt; create mode 100644 arch/mips/loongson64/wbflush.c
&gt; &gt;
&gt; &gt; diff --git a/arch/mips/loongson64/Makefile b/arch/mips/loongson64/Makefile
&gt; &gt; index e806280bbb85..ad00d92c2871 100644
&gt; &gt; --- a/arch/mips/loongson64/Makefile
&gt; &gt; +++ b/arch/mips/loongson64/Makefile
&gt; &gt; @@ -12,3 +12,4 @@ obj-$(CONFIG_SUSPEND) += pm.o
&gt; &gt; obj-$(CONFIG_PCI_QUIRKS) += vbios_quirk.o
&gt; &gt; obj-$(CONFIG_CPU_LOONGSON3_CPUCFG_EMULATION) += cpucfg-emul.o
&gt; &gt; obj-$(CONFIG_SYSFS) += boardinfo.o
&gt; &gt; +obj-$(CONFIG_CPU_HAS_WB) += wbflush.o
&gt; &gt; diff --git a/arch/mips/loongson64/setup.c b/arch/mips/loongson64/setup.c
&gt; &gt; index 6fe3ffffcaa6..cb10d14da433 100644
&gt; &gt; --- a/arch/mips/loongson64/setup.c
&gt; &gt; +++ b/arch/mips/loongson64/setup.c
&gt; &gt; @@ -3,10 +3,7 @@
&gt; &gt; * Copyright (C) 2007 Lemote Inc. &amp; Institute of Computing Technology
&gt; &gt; * Author: Fuxin Zhang, [email protected]
&gt; &gt; */
&gt; &gt; -#include <linux export.h="">
&gt; &gt; #include <linux init.h="">
&gt; &gt; -
&gt; &gt; -#include <asm wbflush.h="">
&gt; &gt; #include <asm bootinfo.h="">
&gt; &gt; #include <linux libfdt.h="">
&gt; &gt; #include <linux of_fdt.h="">
&gt; &gt; @@ -17,20 +14,6 @@
&gt; &gt;
&gt; &gt; void *loongson_fdt_blob;
&gt; &gt;
&gt; &gt; -static void wbflush_loongson(void)
&gt; &gt; -{
&gt; &gt; - asm(".set\tpush\n\t"
&gt; &gt; - ".set\tnoreorder\n\t"
&gt; &gt; - ".set mips3\n\t"
&gt; &gt; - "sync\n\t"
&gt; &gt; - "nop\n\t"
&gt; &gt; - ".set\tpop\n\t"
&gt; &gt; - ".set mips0\n\t");
&gt; &gt; -}
&gt; &gt; -
&gt; &gt; -void (*__wbflush)(void) = wbflush_loongson;
&gt; &gt; -EXPORT_SYMBOL(__wbflush);
&gt; &gt; -
&gt; &gt; void __init plat_mem_setup(void)
&gt; &gt; {
&gt; &gt; if (loongson_fdt_blob)
&gt; &gt; diff --git a/arch/mips/loongson64/smp.c b/arch/mips/loongson64/smp.c
&gt; &gt; index 660e1de4412a..0d9f249c95f9 100644
&gt; &gt; --- a/arch/mips/loongson64/smp.c
&gt; &gt; +++ b/arch/mips/loongson64/smp.c
&gt; &gt; @@ -42,13 +42,13 @@ static uint32_t core0_c0count[NR_CPUS];
&gt; &gt; #define loongson3_ipi_write32(action, addr) \
&gt; &gt; do { \
&gt; &gt; writel(action, addr); \
&gt; &gt; - __wbflush(); \
&gt; &gt; + wbflush(); \
&gt; &gt; } while (0)
&gt; &gt; /* write a 64bit value to ipi register */
&gt; &gt; #define loongson3_ipi_write64(action, addr) \
&gt; &gt; do { \
&gt; &gt; writeq(action, addr); \
&gt; &gt; - __wbflush(); \
&gt; &gt; + wbflush(); \
&gt; &gt; } while (0)
&gt; &gt;
&gt; &gt; static u32 (*ipi_read_clear)(int cpu);
&gt; &gt; @@ -418,7 +418,7 @@ static irqreturn_t loongson3_ipi_interrupt(int irq,
&gt; &gt; void *dev_id)
&gt; &gt; c0count = c0count ? c0count : 1;
&gt; &gt; for (i = 1; i &lt; nr_cpu_ids; i++)
&gt; &gt; core0_c0count[i] = c0count;
&gt; &gt; - __wbflush(); /* Let others see the result ASAP */
&gt; &gt; + wbflush(); /* Let others see the result ASAP */
&gt; &gt; }
&gt; &gt;
&gt; &gt; return IRQ_HANDLED;
&gt; &gt; diff --git a/arch/mips/loongson64/wbflush.c b/arch/mips/loongson64/wbflush.c
&gt; &gt; new file mode 100644
&gt; &gt; index 000000000000..49f0e4c53196
&gt; &gt; --- /dev/null
&gt; &gt; +++ b/arch/mips/loongson64/wbflush.c
&gt; &gt; @@ -0,0 +1,28 @@
&gt; &gt; +// SPDX-License-Identifier: GPL-2.0-or-later
&gt; &gt; +/*
&gt; &gt; + * This file is subject to the terms and conditions of the GNU General Public
&gt; &gt; + * License. See the file "COPYING" in the main directory of this archive
&gt; &gt; + * for more details.
&gt; &gt; + *
&gt; &gt; + * Copyright (C) 2021 [email protected]
&gt; &gt; + */
&gt; &gt; +#include <linux export.h="">
&gt; &gt; +#include <linux init.h="">
&gt; &gt; +#include <asm wbflush.h="">
&gt; &gt; +#include <asm barrier.h="">
&gt; &gt; +
&gt; &gt; +#ifdef CONFIG_CPU_HAS_WB
&gt; &gt; +
&gt; &gt; +/*
&gt; &gt; + * I/O ASIC systems use a standard writeback buffer that gets flushed
&gt; &gt; + * upon an uncached read.
&gt; &gt; + */
&gt; &gt; +static void wbflush_mips(void)
&gt; &gt; +{
&gt; &gt; + __fast_iob();
&gt; &gt; +}
&gt; &gt; +
&gt; &gt; +void (*__wbflush)(void) = wbflush_mips;
&gt; &gt; +EXPORT_SYMBOL(__wbflush);
&gt; &gt; +
&gt; &gt; +#endif
&gt; &gt; --
&gt; &gt; 2.25.1
&gt;
&gt; --
&gt; - Jiaxun
</asm></asm></linux></linux></linux></linux></asm></asm></linux></linux></[email protected]></[email protected]></[email protected]></[email protected]></[email protected]></[email protected]>

本邮件及其附件含有龙芯中科的商业秘密信息,仅限于发送给上面地址中列出的个人或群组。禁止任何其他人以任何形式使用(包括但不限于全部或部分地泄露、复制或散发)本邮件及其附件中的信息。如果您错收本邮件,请您立即电话或邮件通知发件人并删除本邮件。
This email and its attachments contain confidential information from Loongson Technology , which is intended only for the person or entity whose address is listed above. Any use of the information contained herein in any way (including, but not limited to, total or partial disclosure, reproduction or dissemination) by persons other than the intended recipient(s) is prohibited. If you receive this email in error, please notify the sender by phone or email immediately and delete it.

2021-12-05 11:18:15

by Jiaxun Yang

[permalink] [raw]
Subject: Re: [PATCH] mips/loongson64: using __fast_iob implement __wbflush() instead of sync

Actually I tried the same thing years a ago and it breaks some driver on 3A2000....

Write Buffer is not limited to UCA buffer but also some write gathering buffer like store fill buffer. On Loongson they're write back is tied to memory barrier.


Thanks.

- Jiaxun

在2021年12月5日十二月 上午4:29,隋景峰写道:
> Hi Jiaxun,
>
> I'm not toward to loongson cpu's barriers, but the semantic of wbflush().
>
> And it is not necessary the safest way:
>
>
>
> before applying this patch, wbflush() will be expanded to double __sync(),
>
> if __sync() can empty the uncached store buffer, single one is enough.
>
> After apply this patch, wbflush() will be expanded to a __sync()
> followed by a __fast_iob();
>
> this is more safe than your's version which blindly copy code from
> loongson2ef/common/setup.c.
>
> Note, Uncached Accelerated cache mode is only implemented after ls3a2000.
>
>
>
> Again, *wbflush is mean to empty data gathered in the uncache store
> buffers within the CPU*.
>
> __wbflush() is only mean to be used internally, wbflush() should be
> used outside of wbflush.h.
>
> by separate __wbflush out of setup.c, the code is more modularity. it
> allow you to deselect
>
> CPU_HAS_WB easily and see what will happen.
>
>
>
> Actually nothing strange is happen, because uncached accelerated cache
> mode does not
>
> get used in upstream kernel.
>
>
>
> See Mips Run Say:
>
> Most write queues can be emptied out by performing an uncached store to
>
> any location and then performing an operation that reads the same data back.
>
> Put a sync instruction between the write and the read, and that should
> be effective
>
> on any system compliant with MIPS32/64.
>
> A write queue certainly can't permit the read to overtake the write, it
> would return stale data.
>
> This is effective, but not necessarily efficient; you can minimize the overhead
>
> by loading from the fastest memory available. Perhaps your system offers
>
> something system specific but faster.
>
>
>
> On 2021/12/4 下午8:32, Jiaxun Yang wrote:
>> Hi Jingfeng,
>> I'd suggest you not to mess with barriers on Loongson.
>> It's a hell.
>>
>> Also Loongson had changed semantics of sync/synci many times.
>> They got redefined and swapped. So the present way is just the safest way.
>>
>> Thanks.
>>
>> - Jiaxun
>>
>> 在2021年12月4日十二月 下午12:00,suijingfeng写道:
>>> 1) loongson's cpu(ls3a3000, ls3a4000, ls3a5000) have uncache store buffers
>>> which is for uncache accleration.
>>>
>>> Uncached Accelerated is the name under which the R10000 introduced
>>> a cache mode that uses the CPU's write buffer to combine writes
>>> but that otherwise is uncached.
>>>
>>> wbflush is mean to empty data gathered in the uncache store buffers
>>> within the CPU.
>>>
>>> 2) The SYNC instruction in R10000
>>>
>>> A SYNC instruction is not prevented from graduating if the uncached
>>> buffer contains any uncached accelerated stores[1].
>>>
>>> 3) wbflush() implementation of IDT CPU.
>>>
>>> IDT CPUs enforce strict write priority (all pending writes retired
>>> to memory before main memory is read). Thus, implementing wbflush()
>>> is as simple as implementing an uncached load.
>>>
>>> for loongson's cpu, __wbflush should also be implemented with
>>> __fast_iob not sync.
>>>
>>> [1] https://www.ele.uva.es/~jesman/BigSeti/ftp/Microprocesadores/MIPS/t5.ver.2.0.book.pdf Signed-off-by: suijingfeng <[email protected]> ---
>>> arch/mips/loongson64/Makefile | 1 +
>>> arch/mips/loongson64/setup.c | 17 -----------------
>>> arch/mips/loongson64/smp.c | 6 +++---
>>> arch/mips/loongson64/wbflush.c | 28 ++++++++++++++++++++++++++++
>>> 4 files changed, 32 insertions(+), 20 deletions(-)
>>> create mode 100644 arch/mips/loongson64/wbflush.c
>>>
>>> diff --git a/arch/mips/loongson64/Makefile b/arch/mips/loongson64/Makefile
>>> index e806280bbb85..ad00d92c2871 100644
>>> --- a/arch/mips/loongson64/Makefile
>>> +++ b/arch/mips/loongson64/Makefile
>>> @@ -12,3 +12,4 @@ obj-$(CONFIG_SUSPEND) += pm.o
>>> obj-$(CONFIG_PCI_QUIRKS) += vbios_quirk.o
>>> obj-$(CONFIG_CPU_LOONGSON3_CPUCFG_EMULATION) += cpucfg-emul.o
>>> obj-$(CONFIG_SYSFS) += boardinfo.o
>>> +obj-$(CONFIG_CPU_HAS_WB) += wbflush.o
>>> diff --git a/arch/mips/loongson64/setup.c b/arch/mips/loongson64/setup.c
>>> index 6fe3ffffcaa6..cb10d14da433 100644
>>> --- a/arch/mips/loongson64/setup.c
>>> +++ b/arch/mips/loongson64/setup.c
>>> @@ -3,10 +3,7 @@
>>> * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology
>>> * Author: Fuxin Zhang, [email protected] */
>>> -#include <linux/export.h>
>>> #include <linux/init.h>
>>> -
>>> -#include <asm/wbflush.h>
>>> #include <asm/bootinfo.h>
>>> #include <linux/libfdt.h>
>>> #include <linux/of_fdt.h>
>>> @@ -17,20 +14,6 @@
>>>
>>> void *loongson_fdt_blob;
>>>
>>> -static void wbflush_loongson(void)
>>> -{
>>> - asm(".set\tpush\n\t"
>>> - ".set\tnoreorder\n\t"
>>> - ".set mips3\n\t"
>>> - "sync\n\t"
>>> - "nop\n\t"
>>> - ".set\tpop\n\t"
>>> - ".set mips0\n\t");
>>> -}
>>> -
>>> -void (*__wbflush)(void) = wbflush_loongson;
>>> -EXPORT_SYMBOL(__wbflush);
>>> -
>>> void __init plat_mem_setup(void)
>>> {
>>> if (loongson_fdt_blob)
>>> diff --git a/arch/mips/loongson64/smp.c b/arch/mips/loongson64/smp.c
>>> index 660e1de4412a..0d9f249c95f9 100644
>>> --- a/arch/mips/loongson64/smp.c
>>> +++ b/arch/mips/loongson64/smp.c
>>> @@ -42,13 +42,13 @@ static uint32_t core0_c0count[NR_CPUS];
>>> #define loongson3_ipi_write32(action, addr) \
>>> do { \
>>> writel(action, addr); \
>>> - __wbflush(); \
>>> + wbflush(); \
>>> } while (0)
>>> /* write a 64bit value to ipi register */
>>> #define loongson3_ipi_write64(action, addr) \
>>> do { \
>>> writeq(action, addr); \
>>> - __wbflush(); \
>>> + wbflush(); \
>>> } while (0)
>>>
>>> static u32 (*ipi_read_clear)(int cpu);
>>> @@ -418,7 +418,7 @@ static irqreturn_t loongson3_ipi_interrupt(int irq,
>>> void *dev_id)
>>> c0count = c0count ? c0count : 1;
>>> for (i = 1; i < nr_cpu_ids; i++)
>>> core0_c0count[i] = c0count;
>>> - __wbflush(); /* Let others see the result ASAP */
>>> + wbflush(); /* Let others see the result ASAP */
>>> }
>>>
>>> return IRQ_HANDLED;
>>> diff --git a/arch/mips/loongson64/wbflush.c b/arch/mips/loongson64/wbflush.c
>>> new file mode 100644
>>> index 000000000000..49f0e4c53196
>>> --- /dev/null
>>> +++ b/arch/mips/loongson64/wbflush.c
>>> @@ -0,0 +1,28 @@
>>> +// SPDX-License-Identifier: GPL-2.0-or-later
>>> +/*
>>> + * This file is subject to the terms and conditions of the GNU General Public
>>> + * License. See the file "COPYING" in the main directory of this archive
>>> + * for more details.
>>> + *
>>> + * Copyright (C) 2021 [email protected] + */
>>> +#include <linux/export.h>
>>> +#include <linux/init.h>
>>> +#include <asm/wbflush.h>
>>> +#include <asm/barrier.h>
>>> +
>>> +#ifdef CONFIG_CPU_HAS_WB
>>> +
>>> +/*
>>> + * I/O ASIC systems use a standard writeback buffer that gets flushed
>>> + * upon an uncached read.
>>> + */
>>> +static void wbflush_mips(void)
>>> +{
>>> + __fast_iob();
>>> +}
>>> +
>>> +void (*__wbflush)(void) = wbflush_mips;
>>> +EXPORT_SYMBOL(__wbflush);
>>> +
>>> +#endif
>>> --
>>> 2.25.1
>
>
> *本邮件及其附件含有龙芯中科的商业秘密信息,仅限于发送给上面地址中列出的个人或群组。禁止任何其他人以任何形式使用(包括但不限于全部或部分地泄露、复制或散发)本邮件及其附件中的信息。如果您错收本邮件,请您立即电话或邮件通知发件人并删除本邮件。
> This email and its attachments contain confidential information from
> Loongson Technology , which is intended only for the person or entity
> whose address is listed above. Any use of the information contained
> herein in any way (including, but not limited to, total or partial
> disclosure, reproduction or dissemination) by persons other than the
> intended recipient(s) is prohibited. If you receive this email in
> error, please notify the sender by phone or email immediately and
> delete it. *

--
- Jiaxun