2022-04-16 04:26:28

by Max Filippov

[permalink] [raw]
Subject: [PATCH 0/6] xtensa: support coprocessors on SMP

Hello,

this series does a bunch of small cleanups around coprocessor handling
code and adds coprocessors support in SMP configurations.

Max Filippov (6):
xtensa: clean up function declarations in traps.c
xtensa: clean up declarations in coprocessor.h
xtensa: clean up excsave1 initialization
xtensa: use callx0 opcode in fast_coprocessor
xtensa: add xtensa_xsr macro
xtensa: support coprocessors on SMP

arch/xtensa/include/asm/coprocessor.h | 10 ++--
arch/xtensa/include/asm/processor.h | 7 +++
arch/xtensa/include/asm/thread_info.h | 7 ++-
arch/xtensa/include/asm/traps.h | 11 +++-
arch/xtensa/kernel/asm-offsets.c | 8 ++-
arch/xtensa/kernel/coprocessor.S | 61 +++++++++++++--------
arch/xtensa/kernel/entry.S | 17 ++++++
arch/xtensa/kernel/process.c | 70 +++++++++++++++---------
arch/xtensa/kernel/traps.c | 77 +++++++++++++++------------
9 files changed, 177 insertions(+), 91 deletions(-)

--
2.30.2


2022-04-16 04:26:35

by Max Filippov

[permalink] [raw]
Subject: [PATCH 1/6] xtensa: clean up function declarations in traps.c

Drop 'extern' from all function declarations. Add 'static' to
declarations and definitions only used locally. Add argument names in
declarations. Drop unused and not passed second argument from do_multihit
and do_page_fault.

Signed-off-by: Max Filippov <[email protected]>
---
arch/xtensa/kernel/traps.c | 61 +++++++++++++++++++-------------------
1 file changed, 31 insertions(+), 30 deletions(-)

diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index 515719c7e750..a85992d60c11 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -48,25 +48,31 @@
* Machine specific interrupt handlers
*/

-extern void kernel_exception(void);
-extern void user_exception(void);
-
-extern void fast_illegal_instruction_user(void);
-extern void fast_syscall_user(void);
-extern void fast_alloca(void);
-extern void fast_unaligned(void);
-extern void fast_second_level_miss(void);
-extern void fast_store_prohibited(void);
-extern void fast_coprocessor(void);
-
-extern void do_illegal_instruction (struct pt_regs*);
-extern void do_interrupt (struct pt_regs*);
-extern void do_nmi(struct pt_regs *);
-extern void do_unaligned_user (struct pt_regs*);
-extern void do_multihit (struct pt_regs*, unsigned long);
-extern void do_page_fault (struct pt_regs*, unsigned long);
-extern void do_debug (struct pt_regs*);
-extern void system_call (struct pt_regs*);
+void kernel_exception(void);
+void user_exception(void);
+
+void fast_illegal_instruction_user(void);
+void fast_syscall_user(void);
+void fast_alloca(void);
+void fast_unaligned(void);
+void fast_second_level_miss(void);
+void fast_store_prohibited(void);
+void fast_coprocessor(void);
+
+void do_IRQ(int hwirq, struct pt_regs *regs);
+void do_page_fault(struct pt_regs *regs);
+void system_call(struct pt_regs *regs);
+
+static void do_illegal_instruction(struct pt_regs *regs);
+static void do_interrupt(struct pt_regs *regs);
+#if XTENSA_FAKE_NMI
+static void do_nmi(struct pt_regs *regs);
+#endif
+#if XCHAL_UNALIGNED_LOAD_EXCEPTION || XCHAL_UNALIGNED_STORE_EXCEPTION
+static void do_unaligned_user(struct pt_regs *regs);
+#endif
+static void do_multihit(struct pt_regs *regs);
+static void do_debug(struct pt_regs *regs);

/*
* The vector table must be preceded by a save area (which
@@ -197,7 +203,7 @@ void do_unhandled(struct pt_regs *regs, unsigned long exccause)
* Multi-hit exception. This if fatal!
*/

-void do_multihit(struct pt_regs *regs, unsigned long exccause)
+static void do_multihit(struct pt_regs *regs)
{
die("Caught multihit exception", regs, SIGKILL);
}
@@ -206,8 +212,6 @@ void do_multihit(struct pt_regs *regs, unsigned long exccause)
* IRQ handler.
*/

-extern void do_IRQ(int, struct pt_regs *);
-
#if XTENSA_FAKE_NMI

#define IS_POW2(v) (((v) & ((v) - 1)) == 0)
@@ -240,7 +244,7 @@ irqreturn_t xtensa_pmu_irq_handler(int irq, void *dev_id);

DEFINE_PER_CPU(unsigned long, nmi_count);

-void do_nmi(struct pt_regs *regs)
+static void do_nmi(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);

@@ -253,7 +257,7 @@ void do_nmi(struct pt_regs *regs)
}
#endif

-void do_interrupt(struct pt_regs *regs)
+static void do_interrupt(struct pt_regs *regs)
{
static const unsigned int_level_mask[] = {
0,
@@ -303,8 +307,7 @@ void do_interrupt(struct pt_regs *regs)
* Illegal instruction. Fatal if in kernel space.
*/

-void
-do_illegal_instruction(struct pt_regs *regs)
+static void do_illegal_instruction(struct pt_regs *regs)
{
__die_if_kernel("Illegal instruction in kernel", regs, SIGKILL);

@@ -324,8 +327,7 @@ do_illegal_instruction(struct pt_regs *regs)
*/

#if XCHAL_UNALIGNED_LOAD_EXCEPTION || XCHAL_UNALIGNED_STORE_EXCEPTION
-void
-do_unaligned_user (struct pt_regs *regs)
+static void do_unaligned_user(struct pt_regs *regs)
{
__die_if_kernel("Unhandled unaligned exception in kernel",
regs, SIGKILL);
@@ -346,8 +348,7 @@ do_unaligned_user (struct pt_regs *regs)
* breakpoint structures to debug registers intact, so that
* DEBUGCAUSE.DBNUM could be used in case of data breakpoint hit.
*/
-void
-do_debug(struct pt_regs *regs)
+static void do_debug(struct pt_regs *regs)
{
#ifdef CONFIG_HAVE_HW_BREAKPOINT
int ret = check_hw_breakpoint(regs);
--
2.30.2

2022-04-16 04:26:38

by Max Filippov

[permalink] [raw]
Subject: [PATCH 2/6] xtensa: clean up declarations in coprocessor.h

Drop 'extern' from all function declarations. Add argument names in
declarations.

Signed-off-by: Max Filippov <[email protected]>
---
arch/xtensa/include/asm/coprocessor.h | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/xtensa/include/asm/coprocessor.h b/arch/xtensa/include/asm/coprocessor.h
index 0fbe2a740b8d..a360efced7e7 100644
--- a/arch/xtensa/include/asm/coprocessor.h
+++ b/arch/xtensa/include/asm/coprocessor.h
@@ -143,10 +143,9 @@ typedef struct { XCHAL_CP7_SA_LIST(2) } xtregs_cp7_t
__attribute__ ((aligned (XCHAL_CP7_SA_ALIGN)));

extern struct thread_info* coprocessor_owner[XCHAL_CP_MAX];
-extern void coprocessor_flush(struct thread_info*, int);
-
-extern void coprocessor_release_all(struct thread_info*);
-extern void coprocessor_flush_all(struct thread_info*);
+void coprocessor_flush(struct thread_info *ti, int cp_index);
+void coprocessor_release_all(struct thread_info *ti);
+void coprocessor_flush_all(struct thread_info *ti);

#endif /* XTENSA_HAVE_COPROCESSORS */

--
2.30.2

2022-04-16 04:26:39

by Max Filippov

[permalink] [raw]
Subject: [PATCH 3/6] xtensa: clean up excsave1 initialization

Use xtensa_set_sr instead of inline assembly.
Rename local variable exc_table in early_trap_init to avoid conflict
with per-CPU variable of the same name.

Signed-off-by: Max Filippov <[email protected]>
---
arch/xtensa/include/asm/traps.h | 4 ++--
arch/xtensa/kernel/traps.c | 3 +--
2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/xtensa/include/asm/traps.h b/arch/xtensa/include/asm/traps.h
index 6fa47cd8e02d..c9c5f59db420 100644
--- a/arch/xtensa/include/asm/traps.h
+++ b/arch/xtensa/include/asm/traps.h
@@ -45,11 +45,11 @@ void fast_second_level_miss(void);
/* Initialize minimal exc_table structure sufficient for basic paging */
static inline void __init early_trap_init(void)
{
- static struct exc_table exc_table __initdata = {
+ static struct exc_table init_exc_table __initdata = {
.fast_kernel_handler[EXCCAUSE_DTLB_MISS] =
fast_second_level_miss,
};
- __asm__ __volatile__("wsr %0, excsave1\n" : : "a" (&exc_table));
+ xtensa_set_sr(&init_exc_table, excsave1);
}

void secondary_trap_init(void);
diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index a85992d60c11..f6855eb92614 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -386,8 +386,7 @@ void * __init trap_set_handler(int cause, void *handler)

static void trap_init_excsave(void)
{
- unsigned long excsave1 = (unsigned long)this_cpu_ptr(&exc_table);
- __asm__ __volatile__("wsr %0, excsave1\n" : : "a" (excsave1));
+ xtensa_set_sr(this_cpu_ptr(&exc_table), excsave1);
}

static void trap_init_debug(void)
--
2.30.2

2022-04-16 04:26:47

by Max Filippov

[permalink] [raw]
Subject: [PATCH 4/6] xtensa: use callx0 opcode in fast_coprocessor

Instead of emulating call0 in fast_coprocessor use that opcode directly.
Use 'ret' instead of 'jx a0'.

Signed-off-by: Max Filippov <[email protected]>
---
arch/xtensa/kernel/coprocessor.S | 18 ++++++++----------
1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/arch/xtensa/kernel/coprocessor.S b/arch/xtensa/kernel/coprocessor.S
index c7b9f12896f2..8bcbabbff38a 100644
--- a/arch/xtensa/kernel/coprocessor.S
+++ b/arch/xtensa/kernel/coprocessor.S
@@ -30,7 +30,7 @@
.align 4; \
.Lsave_cp_regs_cp##x: \
xchal_cp##x##_store a2 a3 a4 a5 a6; \
- jx a0; \
+ ret; \
.endif

#define SAVE_CP_REGS_TAB(x) \
@@ -47,7 +47,7 @@
.align 4; \
.Lload_cp_regs_cp##x: \
xchal_cp##x##_load a2 a3 a4 a5 a6; \
- jx a0; \
+ ret; \
.endif

#define LOAD_CP_REGS_TAB(x) \
@@ -163,21 +163,20 @@ ENTRY(fast_coprocessor)
s32i a5, a4, THREAD_CPENABLE

/*
- * Get context save area and 'call' save routine.
+ * Get context save area and call save routine.
* (a4 still holds previous owner (thread_info), a3 CP number)
*/

movi a5, .Lsave_cp_regs_jump_table
- movi a0, 2f # a0: 'return' address
addx8 a3, a3, a5 # a3: coprocessor number
l32i a2, a3, 4 # a2: xtregs offset
l32i a3, a3, 0 # a3: jump address
add a2, a2, a4
- jx a3
+ callx0 a3

/* Note that only a0 and a1 were preserved. */

-2: rsr a3, exccause
+ rsr a3, exccause
addi a3, a3, -EXCCAUSE_COPROCESSOR0_DISABLED
movi a0, coprocessor_owner
addx4 a0, a3, a0
@@ -187,19 +186,18 @@ ENTRY(fast_coprocessor)
1: GET_THREAD_INFO (a4, a1)
s32i a4, a0, 0

- /* Get context save area and 'call' load routine. */
+ /* Get context save area and call load routine. */

movi a5, .Lload_cp_regs_jump_table
- movi a0, 1f
addx8 a3, a3, a5
l32i a2, a3, 4 # a2: xtregs offset
l32i a3, a3, 0 # a3: jump address
add a2, a2, a4
- jx a3
+ callx0 a3

/* Restore all registers and return from exception handler. */

-1: l32i a6, a1, PT_AREG6
+ l32i a6, a1, PT_AREG6
l32i a5, a1, PT_AREG5
l32i a4, a1, PT_AREG4

--
2.30.2

2022-04-16 04:26:56

by Max Filippov

[permalink] [raw]
Subject: [PATCH 5/6] xtensa: add xtensa_xsr macro

xtensa_xsr does the XSR instruction for the specified special register.

Signed-off-by: Max Filippov <[email protected]>
---
arch/xtensa/include/asm/processor.h | 7 +++++++
1 file changed, 7 insertions(+)

diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h
index 4489a27d527a..76bc63127c66 100644
--- a/arch/xtensa/include/asm/processor.h
+++ b/arch/xtensa/include/asm/processor.h
@@ -246,6 +246,13 @@ extern unsigned long __get_wchan(struct task_struct *p);
v; \
})

+#define xtensa_xsr(x, sr) \
+ ({ \
+ unsigned int __v__ = (unsigned int)(x); \
+ __asm__ __volatile__ ("xsr %0, " __stringify(sr) : "+a"(__v__)); \
+ __v__; \
+ })
+
#if XCHAL_HAVE_EXTERN_REGS

static inline void set_er(unsigned long value, unsigned long addr)
--
2.30.2

2022-04-16 04:27:07

by Max Filippov

[permalink] [raw]
Subject: [PATCH 6/6] xtensa: support coprocessors on SMP

Current coprocessor support on xtensa only works correctly on
uniprocessor configurations. Make it work on SMP too and keep it lazy.

Make coprocessor_owner array per-CPU and move it to struct exc_table for
easy access from the fast_coprocessor exception handler. Allow task to
have live coprocessors only on single CPU, record this CPU number in the
struct thread_info::cp_owner_cpu. Change struct thread_info::cpenable
meaning to be 'coprocessors live on cp_owner_cpu'.
Introduce C-level coprocessor exception handler that flushes and
releases live coprocessors of the task taking 'coprocessor disabled'
exception and call it from the fast_coprocessor handler when the task
has live coprocessors on other CPU.
Make coprocessor_flush_all and coprocessor_release_all work correctly
when called from any CPU by sending IPI to the cp_owner_cpu. Add
coprocessor_flush_release_all to do flush followed by release
efficiently.

Signed-off-by: Max Filippov <[email protected]>
---
arch/xtensa/include/asm/coprocessor.h | 3 +-
arch/xtensa/include/asm/thread_info.h | 7 ++-
arch/xtensa/include/asm/traps.h | 7 +++
arch/xtensa/kernel/asm-offsets.c | 8 ++-
arch/xtensa/kernel/coprocessor.S | 43 +++++++++++-----
arch/xtensa/kernel/entry.S | 17 +++++++
arch/xtensa/kernel/process.c | 70 +++++++++++++++++----------
arch/xtensa/kernel/traps.c | 13 ++++-
8 files changed, 125 insertions(+), 43 deletions(-)

diff --git a/arch/xtensa/include/asm/coprocessor.h b/arch/xtensa/include/asm/coprocessor.h
index a360efced7e7..dc53bd015c5f 100644
--- a/arch/xtensa/include/asm/coprocessor.h
+++ b/arch/xtensa/include/asm/coprocessor.h
@@ -142,10 +142,11 @@ typedef struct { XCHAL_CP6_SA_LIST(2) } xtregs_cp6_t
typedef struct { XCHAL_CP7_SA_LIST(2) } xtregs_cp7_t
__attribute__ ((aligned (XCHAL_CP7_SA_ALIGN)));

-extern struct thread_info* coprocessor_owner[XCHAL_CP_MAX];
+struct thread_info;
void coprocessor_flush(struct thread_info *ti, int cp_index);
void coprocessor_release_all(struct thread_info *ti);
void coprocessor_flush_all(struct thread_info *ti);
+void coprocessor_flush_release_all(struct thread_info *ti);

#endif /* XTENSA_HAVE_COPROCESSORS */

diff --git a/arch/xtensa/include/asm/thread_info.h b/arch/xtensa/include/asm/thread_info.h
index f6fcbba1d02f..52974317a6b6 100644
--- a/arch/xtensa/include/asm/thread_info.h
+++ b/arch/xtensa/include/asm/thread_info.h
@@ -52,12 +52,17 @@ struct thread_info {
__u32 cpu; /* current CPU */
__s32 preempt_count; /* 0 => preemptable,< 0 => BUG*/

- unsigned long cpenable;
#if XCHAL_HAVE_EXCLUSIVE
/* result of the most recent exclusive store */
unsigned long atomctl8;
#endif

+ /*
+ * If i-th bit is set then coprocessor state is loaded into the
+ * coprocessor i on CPU cp_owner_cpu.
+ */
+ unsigned long cpenable;
+ u32 cp_owner_cpu;
/* Allocate storage for extra user states and coprocessor states. */
#if XTENSA_HAVE_COPROCESSORS
xtregs_coprocessor_t xtregs_cp;
diff --git a/arch/xtensa/include/asm/traps.h b/arch/xtensa/include/asm/traps.h
index c9c5f59db420..6b292facf7a7 100644
--- a/arch/xtensa/include/asm/traps.h
+++ b/arch/xtensa/include/asm/traps.h
@@ -12,6 +12,7 @@

#include <asm/ptrace.h>

+struct thread_info;
/*
* Per-CPU exception handling data structure.
* EXCSAVE1 points to it.
@@ -25,6 +26,10 @@ struct exc_table {
void *fixup;
/* For passing a parameter to fixup */
void *fixup_param;
+#if XTENSA_HAVE_COPROCESSORS
+ /* Pointers to owner struct thread_info */
+ struct thread_info *coprocessor_owner[XCHAL_CP_MAX];
+#endif
/* Fast user exception handlers */
void *fast_user_handler[EXCCAUSE_N];
/* Fast kernel exception handlers */
@@ -33,6 +38,8 @@ struct exc_table {
void *default_handler[EXCCAUSE_N];
};

+DECLARE_PER_CPU(struct exc_table, exc_table);
+
/*
* handler must be either of the following:
* void (*)(struct pt_regs *regs);
diff --git a/arch/xtensa/kernel/asm-offsets.c b/arch/xtensa/kernel/asm-offsets.c
index 37278e2785fb..e3b9cf4c2289 100644
--- a/arch/xtensa/kernel/asm-offsets.c
+++ b/arch/xtensa/kernel/asm-offsets.c
@@ -91,10 +91,12 @@ int main(void)
/* struct thread_info (offset from start_struct) */
DEFINE(THREAD_RA, offsetof (struct task_struct, thread.ra));
DEFINE(THREAD_SP, offsetof (struct task_struct, thread.sp));
- DEFINE(THREAD_CPENABLE, offsetof (struct thread_info, cpenable));
#if XCHAL_HAVE_EXCLUSIVE
DEFINE(THREAD_ATOMCTL8, offsetof (struct thread_info, atomctl8));
#endif
+ DEFINE(THREAD_CPENABLE, offsetof(struct thread_info, cpenable));
+ DEFINE(THREAD_CPU, offsetof(struct thread_info, cpu));
+ DEFINE(THREAD_CP_OWNER_CPU, offsetof(struct thread_info, cp_owner_cpu));
#if XTENSA_HAVE_COPROCESSORS
DEFINE(THREAD_XTREGS_CP0, offsetof(struct thread_info, xtregs_cp.cp0));
DEFINE(THREAD_XTREGS_CP1, offsetof(struct thread_info, xtregs_cp.cp1));
@@ -137,6 +139,10 @@ int main(void)
DEFINE(EXC_TABLE_DOUBLE_SAVE, offsetof(struct exc_table, double_save));
DEFINE(EXC_TABLE_FIXUP, offsetof(struct exc_table, fixup));
DEFINE(EXC_TABLE_PARAM, offsetof(struct exc_table, fixup_param));
+#if XTENSA_HAVE_COPROCESSORS
+ DEFINE(EXC_TABLE_COPROCESSOR_OWNER,
+ offsetof(struct exc_table, coprocessor_owner));
+#endif
DEFINE(EXC_TABLE_FAST_USER,
offsetof(struct exc_table, fast_user_handler));
DEFINE(EXC_TABLE_FAST_KERNEL,
diff --git a/arch/xtensa/kernel/coprocessor.S b/arch/xtensa/kernel/coprocessor.S
index 8bcbabbff38a..1e2bfcf9f0cf 100644
--- a/arch/xtensa/kernel/coprocessor.S
+++ b/arch/xtensa/kernel/coprocessor.S
@@ -115,9 +115,32 @@

ENTRY(fast_coprocessor)

+ s32i a3, a2, PT_AREG3
+
+#ifdef CONFIG_SMP
+ /*
+ * Check if any coprocessor context is live on another CPU
+ * and if so go through the C-level coprocessor exception handler
+ * to flush it to memory.
+ */
+
+ GET_THREAD_INFO (a0, a2)
+ l32i a3, a0, THREAD_CPENABLE
+ beqz a3, .Lload_local
+ l32i a3, a0, THREAD_CPU
+ l32i a0, a0, THREAD_CP_OWNER_CPU
+ beq a0, a3, .Lload_local
+
+ rsr a0, ps
+ l32i a3, a2, PT_AREG3
+ bbci.l a0, PS_UM_BIT, 1f
+ call0 user_exception
+1: call0 kernel_exception
+#endif
+
/* Save remaining registers a1-a3 and SAR */

- s32i a3, a2, PT_AREG3
+.Lload_local:
rsr a3, sar
s32i a1, a2, PT_AREG1
s32i a3, a2, PT_SAR
@@ -150,9 +173,9 @@ ENTRY(fast_coprocessor)

/* Retrieve previous owner. (a3 still holds CP number) */

- movi a0, coprocessor_owner # list of owners
+ rsr a0, excsave1 # exc_table
addx4 a0, a3, a0 # entry for CP
- l32i a4, a0, 0
+ l32i a4, a0, EXC_TABLE_COPROCESSOR_OWNER

beqz a4, 1f # skip 'save' if no previous owner

@@ -178,13 +201,15 @@ ENTRY(fast_coprocessor)

rsr a3, exccause
addi a3, a3, -EXCCAUSE_COPROCESSOR0_DISABLED
- movi a0, coprocessor_owner
+ rsr a0, excsave1 # exc_table
addx4 a0, a3, a0

/* Set new 'owner' (a0 points to the CP owner, a3 contains the CP nr) */

1: GET_THREAD_INFO (a4, a1)
- s32i a4, a0, 0
+ l32i a5, a4, THREAD_CPU
+ s32i a4, a0, EXC_TABLE_COPROCESSOR_OWNER
+ s32i a5, a4, THREAD_CP_OWNER_CPU

/* Get context save area and call load routine. */

@@ -245,12 +270,4 @@ ENTRY(coprocessor_flush)

ENDPROC(coprocessor_flush)

- .data
-
-ENTRY(coprocessor_owner)
-
- .fill XCHAL_CP_MAX, 4, 0
-
-END(coprocessor_owner)
-
#endif /* XTENSA_HAVE_COPROCESSORS */
diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S
index b9bcb2cd74dd..033443b4ce87 100644
--- a/arch/xtensa/kernel/entry.S
+++ b/arch/xtensa/kernel/entry.S
@@ -2087,9 +2087,26 @@ ENTRY(_switch_to)
/* Switch CPENABLE */

#if (XTENSA_HAVE_COPROCESSORS || XTENSA_HAVE_IO_PORTS)
+#ifdef CONFIG_SMP
l32i a3, a5, THREAD_CPENABLE
+ beqz a3, 1f
+ l32i a6, a5, THREAD_CP_OWNER_CPU
+ l32i a7, a5, THREAD_CPU
+ beq a6, a7, 1f # load 0 into CPENABLE if current CPU is not the owner
+ movi a3, 0
+1:
xsr a3, cpenable
+
+ l32i a6, a4, THREAD_CP_OWNER_CPU
+ l32i a7, a4, THREAD_CPU
+ bne a6, a7, 1f # skip saving CPENABLE if current CPU was not the owner
s32i a3, a4, THREAD_CPENABLE
+1:
+#else
+ l32i a3, a5, THREAD_CPENABLE
+ xsr a3, cpenable
+ s32i a3, a4, THREAD_CPENABLE
+#endif
#endif

#if XCHAL_HAVE_EXCLUSIVE
diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c
index e8bfbca5f001..a17c37fbd93c 100644
--- a/arch/xtensa/kernel/process.c
+++ b/arch/xtensa/kernel/process.c
@@ -47,6 +47,7 @@
#include <asm/asm-offsets.h>
#include <asm/regs.h>
#include <asm/hw_breakpoint.h>
+#include <asm/traps.h>

extern void ret_from_fork(void);
extern void ret_from_kernel_thread(void);
@@ -63,52 +64,70 @@ EXPORT_SYMBOL(__stack_chk_guard);

#if XTENSA_HAVE_COPROCESSORS

-void coprocessor_release_all(struct thread_info *ti)
+static void local_coprocessor_release_all(void *info)
{
- unsigned long cpenable;
+ struct thread_info *ti = info;
+ struct thread_info **coprocessor_owner;
int i;

- /* Make sure we don't switch tasks during this operation. */
-
- preempt_disable();
+ coprocessor_owner = this_cpu_ptr(&exc_table)->coprocessor_owner;

/* Walk through all cp owners and release it for the requested one. */

- cpenable = ti->cpenable;
-
for (i = 0; i < XCHAL_CP_MAX; i++) {
- if (coprocessor_owner[i] == ti) {
- coprocessor_owner[i] = 0;
- cpenable &= ~(1 << i);
- }
+ if (coprocessor_owner[i] == ti)
+ coprocessor_owner[i] = NULL;
}
-
- ti->cpenable = cpenable;
+ ti->cpenable = 0;
if (ti == current_thread_info())
xtensa_set_sr(0, cpenable);
+}

- preempt_enable();
+void coprocessor_release_all(struct thread_info *ti)
+{
+ if (ti->cpenable)
+ smp_call_function_single(ti->cp_owner_cpu,
+ local_coprocessor_release_all,
+ ti, true);
}

-void coprocessor_flush_all(struct thread_info *ti)
+static void local_coprocessor_flush_all(void *info)
{
- unsigned long cpenable, old_cpenable;
+ struct thread_info *ti = info;
+ struct thread_info **coprocessor_owner;
+ unsigned long old_cpenable;
int i;

- preempt_disable();
-
- old_cpenable = xtensa_get_sr(cpenable);
- cpenable = ti->cpenable;
- xtensa_set_sr(cpenable, cpenable);
+ coprocessor_owner = this_cpu_ptr(&exc_table)->coprocessor_owner;
+ old_cpenable = xtensa_xsr(ti->cpenable, cpenable);

for (i = 0; i < XCHAL_CP_MAX; i++) {
- if ((cpenable & 1) != 0 && coprocessor_owner[i] == ti)
+ if (coprocessor_owner[i] == ti)
coprocessor_flush(ti, i);
- cpenable >>= 1;
}
xtensa_set_sr(old_cpenable, cpenable);
+}
+
+void coprocessor_flush_all(struct thread_info *ti)
+{
+ if (ti->cpenable)
+ smp_call_function_single(ti->cp_owner_cpu,
+ local_coprocessor_flush_all,
+ ti, true);
+}

- preempt_enable();
+static void local_coprocessor_flush_release_all(void *info)
+{
+ local_coprocessor_flush_all(info);
+ local_coprocessor_release_all(info);
+}
+
+void coprocessor_flush_release_all(struct thread_info *ti)
+{
+ if (ti->cpenable)
+ smp_call_function_single(ti->cp_owner_cpu,
+ local_coprocessor_flush_release_all,
+ ti, true);
}

#endif
@@ -140,8 +159,7 @@ void flush_thread(void)
{
#if XTENSA_HAVE_COPROCESSORS
struct thread_info *ti = current_thread_info();
- coprocessor_flush_all(ti);
- coprocessor_release_all(ti);
+ coprocessor_flush_release_all(ti);
#endif
flush_ptrace_hw_breakpoint(current);
}
diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index f6855eb92614..9b8a76d4fc05 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -72,6 +72,9 @@ static void do_nmi(struct pt_regs *regs);
static void do_unaligned_user(struct pt_regs *regs);
#endif
static void do_multihit(struct pt_regs *regs);
+#if XTENSA_HAVE_COPROCESSORS
+static void do_coprocessor(struct pt_regs *regs);
+#endif
static void do_debug(struct pt_regs *regs);

/*
@@ -84,7 +87,8 @@ static void do_debug(struct pt_regs *regs);
#define USER 0x02

#define COPROCESSOR(x) \
-{ EXCCAUSE_COPROCESSOR ## x ## _DISABLED, USER, fast_coprocessor }
+{ EXCCAUSE_COPROCESSOR ## x ## _DISABLED, USER, fast_coprocessor }, \
+{ EXCCAUSE_COPROCESSOR ## x ## _DISABLED, 0, do_coprocessor }

typedef struct {
int cause;
@@ -342,6 +346,13 @@ static void do_unaligned_user(struct pt_regs *regs)
}
#endif

+#if XTENSA_HAVE_COPROCESSORS
+static void do_coprocessor(struct pt_regs *regs)
+{
+ coprocessor_flush_release_all(current_thread_info());
+}
+#endif
+
/* Handle debug events.
* When CONFIG_HAVE_HW_BREAKPOINT is on this handler is called with
* preemption disabled to avoid rescheduling and keep mapping of hardware
--
2.30.2