Hi,
This is follow up series on the top of cleanup series available here.
https://lkml.org/lkml/2020/2/12/215
There are two things together.
1. Changes in cpuinfo structure in patches 1 and 2
2. Defining SMP safe operations instead of IRQ disabling
Microblaze has 32bit exclusive load/store instructions which should be used
instead of irq enable/disable. For more information take a look at
https://www.xilinx.com/support/documentation/sw_manuals/xilinx2019_2/ug984-vivado-microblaze-ref.pdf
starting from page 25.
Thanks,
Michal
Michal Simek (1):
microblaze: timer: Don't use cpu timer setting
Stefan Asserhall (5):
microblaze: Make cpuinfo structure SMP aware
microblaze: Define SMP safe bit operations
microblaze: Add SMP implementation of xchg and cmpxchg
microblaze: Remove disabling IRQ while pte_update() run
microblaze: Implement architecture spinlock
Stefan Asserhall load and store (1):
microblaze: Do atomic operations by using exclusive ops
arch/microblaze/include/asm/Kbuild | 1 -
arch/microblaze/include/asm/atomic.h | 265 ++++++++++++++++++-
arch/microblaze/include/asm/bitops.h | 189 +++++++++++++
arch/microblaze/include/asm/cmpxchg.h | 87 ++++++
arch/microblaze/include/asm/cpuinfo.h | 2 +-
arch/microblaze/include/asm/pgtable.h | 19 +-
arch/microblaze/include/asm/spinlock.h | 240 +++++++++++++++++
arch/microblaze/include/asm/spinlock_types.h | 25 ++
arch/microblaze/kernel/cpu/cache.c | 154 ++++++-----
arch/microblaze/kernel/cpu/cpuinfo.c | 38 ++-
arch/microblaze/kernel/cpu/mb.c | 207 ++++++++-------
arch/microblaze/kernel/timer.c | 2 +-
arch/microblaze/mm/consistent.c | 8 +-
13 files changed, 1040 insertions(+), 197 deletions(-)
create mode 100644 arch/microblaze/include/asm/bitops.h
create mode 100644 arch/microblaze/include/asm/spinlock.h
create mode 100644 arch/microblaze/include/asm/spinlock_types.h
--
2.25.0
From: Stefan Asserhall <[email protected]>
Define struct cpuinfo per cpu and also fill it based on information from DT
or PVR.
Signed-off-by: Stefan Asserhall <[email protected]>
Signed-off-by: Michal Simek <[email protected]>
---
arch/microblaze/include/asm/cpuinfo.h | 2 +-
arch/microblaze/kernel/cpu/cache.c | 154 +++++++++++--------
arch/microblaze/kernel/cpu/cpuinfo.c | 38 +++--
arch/microblaze/kernel/cpu/mb.c | 207 ++++++++++++++------------
arch/microblaze/mm/consistent.c | 8 +-
5 files changed, 236 insertions(+), 173 deletions(-)
diff --git a/arch/microblaze/include/asm/cpuinfo.h b/arch/microblaze/include/asm/cpuinfo.h
index 786ffa669bf1..b8b04cd0095d 100644
--- a/arch/microblaze/include/asm/cpuinfo.h
+++ b/arch/microblaze/include/asm/cpuinfo.h
@@ -84,7 +84,7 @@ struct cpuinfo {
u32 pvr_user2;
};
-extern struct cpuinfo cpuinfo;
+DECLARE_PER_CPU(struct cpuinfo, cpu_info);
/* fwd declarations of the various CPUinfo populators */
void setup_cpuinfo(void);
diff --git a/arch/microblaze/kernel/cpu/cache.c b/arch/microblaze/kernel/cpu/cache.c
index dcba53803fa5..818152e37375 100644
--- a/arch/microblaze/kernel/cpu/cache.c
+++ b/arch/microblaze/kernel/cpu/cache.c
@@ -12,6 +12,7 @@
#include <asm/cacheflush.h>
#include <linux/cache.h>
+#include <linux/smp.h>
#include <asm/cpuinfo.h>
#include <asm/pvr.h>
@@ -158,6 +159,8 @@ do { \
static void __flush_icache_range_msr_irq(unsigned long start, unsigned long end)
{
+ unsigned int cpu = smp_processor_id();
+ struct cpuinfo *cpuinfo = per_cpu_ptr(&cpu_info, cpu);
unsigned long flags;
#ifndef ASM_LOOP
int i;
@@ -166,15 +169,15 @@ static void __flush_icache_range_msr_irq(unsigned long start, unsigned long end)
(unsigned int)start, (unsigned int) end);
CACHE_LOOP_LIMITS(start, end,
- cpuinfo.icache_line_length, cpuinfo.icache_size);
+ cpuinfo->icache_line_length, cpuinfo->icache_size);
local_irq_save(flags);
__disable_icache_msr();
#ifdef ASM_LOOP
- CACHE_RANGE_LOOP_1(start, end, cpuinfo.icache_line_length, wic);
+ CACHE_RANGE_LOOP_1(start, end, cpuinfo->icache_line_length, wic);
#else
- for (i = start; i < end; i += cpuinfo.icache_line_length)
+ for (i = start; i < end; i += cpuinfo->icache_line_length)
__asm__ __volatile__ ("wic %0, r0;" \
: : "r" (i));
#endif
@@ -185,6 +188,8 @@ static void __flush_icache_range_msr_irq(unsigned long start, unsigned long end)
static void __flush_icache_range_nomsr_irq(unsigned long start,
unsigned long end)
{
+ unsigned int cpu = smp_processor_id();
+ struct cpuinfo *cpuinfo = per_cpu_ptr(&cpu_info, cpu);
unsigned long flags;
#ifndef ASM_LOOP
int i;
@@ -193,15 +198,15 @@ static void __flush_icache_range_nomsr_irq(unsigned long start,
(unsigned int)start, (unsigned int) end);
CACHE_LOOP_LIMITS(start, end,
- cpuinfo.icache_line_length, cpuinfo.icache_size);
+ cpuinfo->icache_line_length, cpuinfo->icache_size);
local_irq_save(flags);
__disable_icache_nomsr();
#ifdef ASM_LOOP
- CACHE_RANGE_LOOP_1(start, end, cpuinfo.icache_line_length, wic);
+ CACHE_RANGE_LOOP_1(start, end, cpuinfo->icache_line_length, wic);
#else
- for (i = start; i < end; i += cpuinfo.icache_line_length)
+ for (i = start; i < end; i += cpuinfo->icache_line_length)
__asm__ __volatile__ ("wic %0, r0;" \
: : "r" (i));
#endif
@@ -213,6 +218,8 @@ static void __flush_icache_range_nomsr_irq(unsigned long start,
static void __flush_icache_range_noirq(unsigned long start,
unsigned long end)
{
+ unsigned int cpu = smp_processor_id();
+ struct cpuinfo *cpuinfo = per_cpu_ptr(&cpu_info, cpu);
#ifndef ASM_LOOP
int i;
#endif
@@ -220,11 +227,11 @@ static void __flush_icache_range_noirq(unsigned long start,
(unsigned int)start, (unsigned int) end);
CACHE_LOOP_LIMITS(start, end,
- cpuinfo.icache_line_length, cpuinfo.icache_size);
+ cpuinfo->icache_line_length, cpuinfo->icache_size);
#ifdef ASM_LOOP
- CACHE_RANGE_LOOP_1(start, end, cpuinfo.icache_line_length, wic);
+ CACHE_RANGE_LOOP_1(start, end, cpuinfo->icache_line_length, wic);
#else
- for (i = start; i < end; i += cpuinfo.icache_line_length)
+ for (i = start; i < end; i += cpuinfo->icache_line_length)
__asm__ __volatile__ ("wic %0, r0;" \
: : "r" (i));
#endif
@@ -232,6 +239,8 @@ static void __flush_icache_range_noirq(unsigned long start,
static void __flush_icache_all_msr_irq(void)
{
+ unsigned int cpu = smp_processor_id();
+ struct cpuinfo *cpuinfo = per_cpu_ptr(&cpu_info, cpu);
unsigned long flags;
#ifndef ASM_LOOP
int i;
@@ -241,11 +250,10 @@ static void __flush_icache_all_msr_irq(void)
local_irq_save(flags);
__disable_icache_msr();
#ifdef ASM_LOOP
- CACHE_ALL_LOOP(cpuinfo.icache_size, cpuinfo.icache_line_length, wic);
+ CACHE_ALL_LOOP(cpuinfo->icache_size, cpuinfo->icache_line_length, wic);
#else
- for (i = 0; i < cpuinfo.icache_size;
- i += cpuinfo.icache_line_length)
- __asm__ __volatile__ ("wic %0, r0;" \
+ for (i = 0; i < cpuinfo->icache_size; i += cpuinfo->icache_line_length)
+ __asm__ __volatile__ ("wic %0, r0;" \
: : "r" (i));
#endif
__enable_icache_msr();
@@ -254,6 +262,8 @@ static void __flush_icache_all_msr_irq(void)
static void __flush_icache_all_nomsr_irq(void)
{
+ unsigned int cpu = smp_processor_id();
+ struct cpuinfo *cpuinfo = per_cpu_ptr(&cpu_info, cpu);
unsigned long flags;
#ifndef ASM_LOOP
int i;
@@ -263,11 +273,10 @@ static void __flush_icache_all_nomsr_irq(void)
local_irq_save(flags);
__disable_icache_nomsr();
#ifdef ASM_LOOP
- CACHE_ALL_LOOP(cpuinfo.icache_size, cpuinfo.icache_line_length, wic);
+ CACHE_ALL_LOOP(cpuinfo->icache_size, cpuinfo->icache_line_length, wic);
#else
- for (i = 0; i < cpuinfo.icache_size;
- i += cpuinfo.icache_line_length)
- __asm__ __volatile__ ("wic %0, r0;" \
+ for (i = 0; i < cpuinfo->icache_size; i += cpuinfo->icache_line_length)
+ __asm__ __volatile__ ("wic %0, r0;" \
: : "r" (i));
#endif
__enable_icache_nomsr();
@@ -276,22 +285,25 @@ static void __flush_icache_all_nomsr_irq(void)
static void __flush_icache_all_noirq(void)
{
+ unsigned int cpu = smp_processor_id();
+ struct cpuinfo *cpuinfo = per_cpu_ptr(&cpu_info, cpu);
#ifndef ASM_LOOP
int i;
#endif
pr_debug("%s\n", __func__);
#ifdef ASM_LOOP
- CACHE_ALL_LOOP(cpuinfo.icache_size, cpuinfo.icache_line_length, wic);
+ CACHE_ALL_LOOP(cpuinfo->icache_size, cpuinfo->icache_line_length, wic);
#else
- for (i = 0; i < cpuinfo.icache_size;
- i += cpuinfo.icache_line_length)
- __asm__ __volatile__ ("wic %0, r0;" \
+ for (i = 0; i < cpuinfo->icache_size; i += cpuinfo->icache_line_length)
+ __asm__ __volatile__ ("wic %0, r0;" \
: : "r" (i));
#endif
}
static void __invalidate_dcache_all_msr_irq(void)
{
+ unsigned int cpu = smp_processor_id();
+ struct cpuinfo *cpuinfo = per_cpu_ptr(&cpu_info, cpu);
unsigned long flags;
#ifndef ASM_LOOP
int i;
@@ -301,11 +313,10 @@ static void __invalidate_dcache_all_msr_irq(void)
local_irq_save(flags);
__disable_dcache_msr();
#ifdef ASM_LOOP
- CACHE_ALL_LOOP(cpuinfo.dcache_size, cpuinfo.dcache_line_length, wdc);
+ CACHE_ALL_LOOP(cpuinfo->dcache_size, cpuinfo->dcache_line_length, wdc);
#else
- for (i = 0; i < cpuinfo.dcache_size;
- i += cpuinfo.dcache_line_length)
- __asm__ __volatile__ ("wdc %0, r0;" \
+ for (i = 0; i < cpuinfo->dcache_size; i += cpuinfo->dcache_line_length)
+ __asm__ __volatile__ ("wdc %0, r0;" \
: : "r" (i));
#endif
__enable_dcache_msr();
@@ -314,6 +325,8 @@ static void __invalidate_dcache_all_msr_irq(void)
static void __invalidate_dcache_all_nomsr_irq(void)
{
+ unsigned int cpu = smp_processor_id();
+ struct cpuinfo *cpuinfo = per_cpu_ptr(&cpu_info, cpu);
unsigned long flags;
#ifndef ASM_LOOP
int i;
@@ -323,11 +336,10 @@ static void __invalidate_dcache_all_nomsr_irq(void)
local_irq_save(flags);
__disable_dcache_nomsr();
#ifdef ASM_LOOP
- CACHE_ALL_LOOP(cpuinfo.dcache_size, cpuinfo.dcache_line_length, wdc);
+ CACHE_ALL_LOOP(cpuinfo->dcache_size, cpuinfo->dcache_line_length, wdc);
#else
- for (i = 0; i < cpuinfo.dcache_size;
- i += cpuinfo.dcache_line_length)
- __asm__ __volatile__ ("wdc %0, r0;" \
+ for (i = 0; i < cpuinfo->dcache_size; i += cpuinfo->dcache_line_length)
+ __asm__ __volatile__ ("wdc %0, r0;" \
: : "r" (i));
#endif
__enable_dcache_nomsr();
@@ -336,16 +348,17 @@ static void __invalidate_dcache_all_nomsr_irq(void)
static void __invalidate_dcache_all_noirq_wt(void)
{
+ unsigned int cpu = smp_processor_id();
+ struct cpuinfo *cpuinfo = per_cpu_ptr(&cpu_info, cpu);
#ifndef ASM_LOOP
int i;
#endif
pr_debug("%s\n", __func__);
#ifdef ASM_LOOP
- CACHE_ALL_LOOP(cpuinfo.dcache_size, cpuinfo.dcache_line_length, wdc);
+ CACHE_ALL_LOOP(cpuinfo->dcache_size, cpuinfo->dcache_line_length, wdc);
#else
- for (i = 0; i < cpuinfo.dcache_size;
- i += cpuinfo.dcache_line_length)
- __asm__ __volatile__ ("wdc %0, r0;" \
+ for (i = 0; i < cpuinfo->dcache_size; i += cpuinfo->dcache_line_length)
+ __asm__ __volatile__ ("wdc %0, r0;" \
: : "r" (i));
#endif
}
@@ -359,17 +372,18 @@ static void __invalidate_dcache_all_noirq_wt(void)
*/
static void __invalidate_dcache_all_wb(void)
{
+ unsigned int cpu = smp_processor_id();
+ struct cpuinfo *cpuinfo = per_cpu_ptr(&cpu_info, cpu);
#ifndef ASM_LOOP
int i;
#endif
pr_debug("%s\n", __func__);
#ifdef ASM_LOOP
- CACHE_ALL_LOOP(cpuinfo.dcache_size, cpuinfo.dcache_line_length,
+ CACHE_ALL_LOOP(cpuinfo->dcache_size, cpuinfo->dcache_line_length,
wdc);
#else
- for (i = 0; i < cpuinfo.dcache_size;
- i += cpuinfo.dcache_line_length)
- __asm__ __volatile__ ("wdc %0, r0;" \
+ for (i = 0; i < cpuinfo->dcache_size; i += cpuinfo->dcache_line_length)
+ __asm__ __volatile__ ("wdc %0, r0;" \
: : "r" (i));
#endif
}
@@ -377,6 +391,8 @@ static void __invalidate_dcache_all_wb(void)
static void __invalidate_dcache_range_wb(unsigned long start,
unsigned long end)
{
+ unsigned int cpu = smp_processor_id();
+ struct cpuinfo *cpuinfo = per_cpu_ptr(&cpu_info, cpu);
#ifndef ASM_LOOP
int i;
#endif
@@ -384,11 +400,11 @@ static void __invalidate_dcache_range_wb(unsigned long start,
(unsigned int)start, (unsigned int) end);
CACHE_LOOP_LIMITS(start, end,
- cpuinfo.dcache_line_length, cpuinfo.dcache_size);
+ cpuinfo->dcache_line_length, cpuinfo->dcache_size);
#ifdef ASM_LOOP
- CACHE_RANGE_LOOP_2(start, end, cpuinfo.dcache_line_length, wdc.clear);
+ CACHE_RANGE_LOOP_2(start, end, cpuinfo->dcache_line_length, wdc.clear);
#else
- for (i = start; i < end; i += cpuinfo.dcache_line_length)
+ for (i = start; i < end; i += cpuinfo->dcache_line_length)
__asm__ __volatile__ ("wdc.clear %0, r0;" \
: : "r" (i));
#endif
@@ -397,18 +413,20 @@ static void __invalidate_dcache_range_wb(unsigned long start,
static void __invalidate_dcache_range_nomsr_wt(unsigned long start,
unsigned long end)
{
+ unsigned int cpu = smp_processor_id();
+ struct cpuinfo *cpuinfo = per_cpu_ptr(&cpu_info, cpu);
#ifndef ASM_LOOP
int i;
#endif
pr_debug("%s: start 0x%x, end 0x%x\n", __func__,
(unsigned int)start, (unsigned int) end);
CACHE_LOOP_LIMITS(start, end,
- cpuinfo.dcache_line_length, cpuinfo.dcache_size);
+ cpuinfo->dcache_line_length, cpuinfo->dcache_size);
#ifdef ASM_LOOP
- CACHE_RANGE_LOOP_1(start, end, cpuinfo.dcache_line_length, wdc);
+ CACHE_RANGE_LOOP_1(start, end, cpuinfo->dcache_line_length, wdc);
#else
- for (i = start; i < end; i += cpuinfo.dcache_line_length)
+ for (i = start; i < end; i += cpuinfo->dcache_line_length)
__asm__ __volatile__ ("wdc %0, r0;" \
: : "r" (i));
#endif
@@ -417,6 +435,8 @@ static void __invalidate_dcache_range_nomsr_wt(unsigned long start,
static void __invalidate_dcache_range_msr_irq_wt(unsigned long start,
unsigned long end)
{
+ unsigned int cpu = smp_processor_id();
+ struct cpuinfo *cpuinfo = per_cpu_ptr(&cpu_info, cpu);
unsigned long flags;
#ifndef ASM_LOOP
int i;
@@ -424,15 +444,15 @@ static void __invalidate_dcache_range_msr_irq_wt(unsigned long start,
pr_debug("%s: start 0x%x, end 0x%x\n", __func__,
(unsigned int)start, (unsigned int) end);
CACHE_LOOP_LIMITS(start, end,
- cpuinfo.dcache_line_length, cpuinfo.dcache_size);
+ cpuinfo->dcache_line_length, cpuinfo->dcache_size);
local_irq_save(flags);
__disable_dcache_msr();
#ifdef ASM_LOOP
- CACHE_RANGE_LOOP_1(start, end, cpuinfo.dcache_line_length, wdc);
+ CACHE_RANGE_LOOP_1(start, end, cpuinfo->dcache_line_length, wdc);
#else
- for (i = start; i < end; i += cpuinfo.dcache_line_length)
+ for (i = start; i < end; i += cpuinfo->dcache_line_length)
__asm__ __volatile__ ("wdc %0, r0;" \
: : "r" (i));
#endif
@@ -444,6 +464,8 @@ static void __invalidate_dcache_range_msr_irq_wt(unsigned long start,
static void __invalidate_dcache_range_nomsr_irq(unsigned long start,
unsigned long end)
{
+ unsigned int cpu = smp_processor_id();
+ struct cpuinfo *cpuinfo = per_cpu_ptr(&cpu_info, cpu);
unsigned long flags;
#ifndef ASM_LOOP
int i;
@@ -452,15 +474,15 @@ static void __invalidate_dcache_range_nomsr_irq(unsigned long start,
(unsigned int)start, (unsigned int) end);
CACHE_LOOP_LIMITS(start, end,
- cpuinfo.dcache_line_length, cpuinfo.dcache_size);
+ cpuinfo->dcache_line_length, cpuinfo->dcache_size);
local_irq_save(flags);
__disable_dcache_nomsr();
#ifdef ASM_LOOP
- CACHE_RANGE_LOOP_1(start, end, cpuinfo.dcache_line_length, wdc);
+ CACHE_RANGE_LOOP_1(start, end, cpuinfo->dcache_line_length, wdc);
#else
- for (i = start; i < end; i += cpuinfo.dcache_line_length)
+ for (i = start; i < end; i += cpuinfo->dcache_line_length)
__asm__ __volatile__ ("wdc %0, r0;" \
: : "r" (i));
#endif
@@ -471,23 +493,26 @@ static void __invalidate_dcache_range_nomsr_irq(unsigned long start,
static void __flush_dcache_all_wb(void)
{
+ unsigned int cpu = smp_processor_id();
+ struct cpuinfo *cpuinfo = per_cpu_ptr(&cpu_info, cpu);
#ifndef ASM_LOOP
int i;
#endif
pr_debug("%s\n", __func__);
#ifdef ASM_LOOP
- CACHE_ALL_LOOP(cpuinfo.dcache_size, cpuinfo.dcache_line_length,
+ CACHE_ALL_LOOP(cpuinfo->dcache_size, cpuinfo->dcache_line_length,
wdc.flush);
#else
- for (i = 0; i < cpuinfo.dcache_size;
- i += cpuinfo.dcache_line_length)
- __asm__ __volatile__ ("wdc.flush %0, r0;" \
+ for (i = 0; i < cpuinfo->dcache_size; i += cpuinfo->dcache_line_length)
+ __asm__ __volatile__ ("wdc.flush %0, r0;" \
: : "r" (i));
#endif
}
static void __flush_dcache_range_wb(unsigned long start, unsigned long end)
{
+ unsigned int cpu = smp_processor_id();
+ struct cpuinfo *cpuinfo = per_cpu_ptr(&cpu_info, cpu);
#ifndef ASM_LOOP
int i;
#endif
@@ -495,11 +520,11 @@ static void __flush_dcache_range_wb(unsigned long start, unsigned long end)
(unsigned int)start, (unsigned int) end);
CACHE_LOOP_LIMITS(start, end,
- cpuinfo.dcache_line_length, cpuinfo.dcache_size);
+ cpuinfo->dcache_line_length, cpuinfo->dcache_size);
#ifdef ASM_LOOP
- CACHE_RANGE_LOOP_2(start, end, cpuinfo.dcache_line_length, wdc.flush);
+ CACHE_RANGE_LOOP_2(start, end, cpuinfo->dcache_line_length, wdc.flush);
#else
- for (i = start; i < end; i += cpuinfo.dcache_line_length)
+ for (i = start; i < end; i += cpuinfo->dcache_line_length)
__asm__ __volatile__ ("wdc.flush %0, r0;" \
: : "r" (i));
#endif
@@ -608,16 +633,19 @@ static const struct scache wt_nomsr_noirq = {
void microblaze_cache_init(void)
{
- if (cpuinfo.use_instr & PVR2_USE_MSR_INSTR) {
- if (cpuinfo.dcache_wb) {
+ unsigned int cpu = smp_processor_id();
+ struct cpuinfo *cpuinfo = per_cpu_ptr(&cpu_info, cpu);
+
+ if (cpuinfo->use_instr & PVR2_USE_MSR_INSTR) {
+ if (cpuinfo->dcache_wb) {
pr_info("wb_msr\n");
mbc = (struct scache *)&wb_msr;
- if (cpuinfo.ver_code <= CPUVER_7_20_D) {
+ if (cpuinfo->ver_code <= CPUVER_7_20_D) {
/* MS: problem with signal handling - hw bug */
pr_info("WB won't work properly\n");
}
} else {
- if (cpuinfo.ver_code >= CPUVER_7_20_A) {
+ if (cpuinfo->ver_code >= CPUVER_7_20_A) {
pr_info("wt_msr_noirq\n");
mbc = (struct scache *)&wt_msr_noirq;
} else {
@@ -626,15 +654,15 @@ void microblaze_cache_init(void)
}
}
} else {
- if (cpuinfo.dcache_wb) {
+ if (cpuinfo->dcache_wb) {
pr_info("wb_nomsr\n");
mbc = (struct scache *)&wb_nomsr;
- if (cpuinfo.ver_code <= CPUVER_7_20_D) {
+ if (cpuinfo->ver_code <= CPUVER_7_20_D) {
/* MS: problem with signal handling - hw bug */
pr_info("WB won't work properly\n");
}
} else {
- if (cpuinfo.ver_code >= CPUVER_7_20_A) {
+ if (cpuinfo->ver_code >= CPUVER_7_20_A) {
pr_info("wt_nomsr_noirq\n");
mbc = (struct scache *)&wt_nomsr_noirq;
} else {
diff --git a/arch/microblaze/kernel/cpu/cpuinfo.c b/arch/microblaze/kernel/cpu/cpuinfo.c
index cd9b4450763b..e2b87f136ba8 100644
--- a/arch/microblaze/kernel/cpu/cpuinfo.c
+++ b/arch/microblaze/kernel/cpu/cpuinfo.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (C) 2013-2020 Xilinx, Inc. All rights reserved.
* Copyright (C) 2007-2009 Michal Simek <[email protected]>
* Copyright (C) 2007-2009 PetaLogix
* Copyright (C) 2007 John Williams <[email protected]>
@@ -10,6 +11,7 @@
#include <linux/clk.h>
#include <linux/init.h>
+#include <linux/smp.h>
#include <asm/cpuinfo.h>
#include <asm/pvr.h>
@@ -56,7 +58,7 @@ const struct cpu_ver_key cpu_ver_lookup[] = {
};
/*
- * FIXME Not sure if the actual key is defined by Xilinx in the PVR
+ * The actual key is defined by Xilinx in the PVR
*/
const struct family_string_key family_string_lookup[] = {
{"virtex2", 0x4},
@@ -85,37 +87,40 @@ const struct family_string_key family_string_lookup[] = {
{NULL, 0},
};
-struct cpuinfo cpuinfo;
-static struct device_node *cpu;
+DEFINE_PER_CPU(struct cpuinfo, cpu_info);
void __init setup_cpuinfo(void)
{
- cpu = of_get_cpu_node(0, NULL);
+ struct device_node *cpu;
+ unsigned int cpu_id = smp_processor_id();
+ struct cpuinfo *cpuinfo = per_cpu_ptr(&cpu_info, cpu_id);
+
+ cpu = of_get_cpu_node(cpu_id, NULL);
if (!cpu)
pr_err("You don't have cpu or are missing cpu reg property!!!\n");
- pr_info("%s: initialising\n", __func__);
+ pr_info("%s: initialising cpu %d\n", __func__, cpu_id);
switch (cpu_has_pvr()) {
case 0:
pr_warn("%s: No PVR support. Using static CPU info from FDT\n",
__func__);
- set_cpuinfo_static(&cpuinfo, cpu);
+ set_cpuinfo_static(cpuinfo, cpu);
break;
/* FIXME I found weird behavior with MB 7.00.a/b 7.10.a
* please do not use FULL PVR with MMU */
case 1:
pr_info("%s: Using full CPU PVR support\n",
__func__);
- set_cpuinfo_static(&cpuinfo, cpu);
- set_cpuinfo_pvr_full(&cpuinfo, cpu);
+ set_cpuinfo_static(cpuinfo, cpu);
+ set_cpuinfo_pvr_full(cpuinfo, cpu);
break;
default:
pr_warn("%s: Unsupported PVR setting\n", __func__);
- set_cpuinfo_static(&cpuinfo, cpu);
+ set_cpuinfo_static(cpuinfo, cpu);
}
- if (cpuinfo.mmu_privins)
+ if (cpuinfo->mmu_privins)
pr_warn("%s: Stream instructions enabled"
" - USERSPACE CAN LOCK THIS KERNEL!\n", __func__);
@@ -125,17 +130,24 @@ void __init setup_cpuinfo(void)
void __init setup_cpuinfo_clk(void)
{
struct clk *clk;
+ struct device_node *cpu;
+ unsigned int cpu_id = smp_processor_id();
+ struct cpuinfo *cpuinfo = per_cpu_ptr(&cpu_info, cpu_id);
+
+ cpu = of_get_cpu_node(cpu_id, NULL);
+ if (!cpu)
+ pr_err("You don't have cpu or are missing cpu reg property!!!\n");
clk = of_clk_get(cpu, 0);
if (IS_ERR(clk)) {
pr_err("ERROR: CPU CCF input clock not found\n");
/* take timebase-frequency from DTS */
- cpuinfo.cpu_clock_freq = fcpu(cpu, "timebase-frequency");
+ cpuinfo->cpu_clock_freq = fcpu(cpu, "timebase-frequency");
} else {
- cpuinfo.cpu_clock_freq = clk_get_rate(clk);
+ cpuinfo->cpu_clock_freq = clk_get_rate(clk);
}
- if (!cpuinfo.cpu_clock_freq) {
+ if (!cpuinfo->cpu_clock_freq) {
pr_err("ERROR: CPU clock frequency not setup\n");
BUG();
}
diff --git a/arch/microblaze/kernel/cpu/mb.c b/arch/microblaze/kernel/cpu/mb.c
index 9581d194d9e4..7a6fc13f925a 100644
--- a/arch/microblaze/kernel/cpu/mb.c
+++ b/arch/microblaze/kernel/cpu/mb.c
@@ -1,6 +1,7 @@
/*
* CPU-version specific code
*
+ * Copyright (C) 2013-2020 Xilinx, Inc. All rights reserved
* Copyright (C) 2007-2009 Michal Simek <[email protected]>
* Copyright (C) 2006-2009 PetaLogix
*
@@ -27,117 +28,135 @@
static int show_cpuinfo(struct seq_file *m, void *v)
{
- char *fpga_family = "Unknown";
- char *cpu_ver = "Unknown";
- int i;
-
- /* Denormalised to get the fpga family string */
- for (i = 0; family_string_lookup[i].s != NULL; i++) {
- if (cpuinfo.fpga_family_code == family_string_lookup[i].k) {
- fpga_family = (char *)family_string_lookup[i].s;
- break;
+ unsigned int cpu;
+
+ for_each_online_cpu(cpu) {
+ struct cpuinfo *cpuinfo = per_cpu_ptr(&cpu_info, cpu);
+ char *fpga_family = "Unknown";
+ char *cpu_ver = "Unknown";
+ int i;
+
+ /* Denormalised to get the fpga family string */
+ for (i = 0; family_string_lookup[i].s != NULL; i++) {
+ if (cpuinfo->fpga_family_code ==
+ family_string_lookup[i].k) {
+ fpga_family = (char *)family_string_lookup[i].s;
+ break;
+ }
}
- }
- /* Denormalised to get the hw version string */
- for (i = 0; cpu_ver_lookup[i].s != NULL; i++) {
- if (cpuinfo.ver_code == cpu_ver_lookup[i].k) {
- cpu_ver = (char *)cpu_ver_lookup[i].s;
- break;
+ /* Denormalised to get the hw version string */
+ for (i = 0; cpu_ver_lookup[i].s != NULL; i++) {
+ if (cpuinfo->ver_code == cpu_ver_lookup[i].k) {
+ cpu_ver = (char *)cpu_ver_lookup[i].s;
+ break;
+ }
}
- }
- seq_printf(m,
- "CPU-Family: MicroBlaze\n"
- "FPGA-Arch: %s\n"
- "CPU-Ver: %s, %s endian\n"
- "CPU-MHz: %d.%02d\n"
- "BogoMips: %lu.%02lu\n",
- fpga_family,
- cpu_ver,
- cpuinfo.endian ? "little" : "big",
- cpuinfo.cpu_clock_freq / 1000000,
- cpuinfo.cpu_clock_freq % 1000000,
- loops_per_jiffy / (500000 / HZ),
- (loops_per_jiffy / (5000 / HZ)) % 100);
-
- seq_printf(m,
- "HW:\n Shift:\t\t%s\n"
- " MSR:\t\t%s\n"
- " PCMP:\t\t%s\n"
- " DIV:\t\t%s\n",
- (cpuinfo.use_instr & PVR0_USE_BARREL_MASK) ? "yes" : "no",
- (cpuinfo.use_instr & PVR2_USE_MSR_INSTR) ? "yes" : "no",
- (cpuinfo.use_instr & PVR2_USE_PCMP_INSTR) ? "yes" : "no",
- (cpuinfo.use_instr & PVR0_USE_DIV_MASK) ? "yes" : "no");
-
- seq_printf(m, " MMU:\t\t%x\n", cpuinfo.mmu);
-
- seq_printf(m,
- " MUL:\t\t%s\n"
- " FPU:\t\t%s\n",
- (cpuinfo.use_mult & PVR2_USE_MUL64_MASK) ? "v2" :
- (cpuinfo.use_mult & PVR0_USE_HW_MUL_MASK) ? "v1" : "no",
- (cpuinfo.use_fpu & PVR2_USE_FPU2_MASK) ? "v2" :
- (cpuinfo.use_fpu & PVR0_USE_FPU_MASK) ? "v1" : "no");
-
- seq_printf(m,
- " Exc:\t\t%s%s%s%s%s%s%s%s\n",
- (cpuinfo.use_exc & PVR2_OPCODE_0x0_ILL_MASK) ? "op0x0 " : "",
- (cpuinfo.use_exc & PVR2_UNALIGNED_EXC_MASK) ? "unal " : "",
- (cpuinfo.use_exc & PVR2_ILL_OPCODE_EXC_MASK) ? "ill " : "",
- (cpuinfo.use_exc & PVR2_IOPB_BUS_EXC_MASK) ? "iopb " : "",
- (cpuinfo.use_exc & PVR2_DOPB_BUS_EXC_MASK) ? "dopb " : "",
- (cpuinfo.use_exc & PVR2_DIV_ZERO_EXC_MASK) ? "zero " : "",
- (cpuinfo.use_exc & PVR2_FPU_EXC_MASK) ? "fpu " : "",
- (cpuinfo.use_exc & PVR2_USE_FSL_EXC) ? "fsl " : "");
-
- seq_printf(m,
- "Stream-insns:\t%sprivileged\n",
- cpuinfo.mmu_privins ? "un" : "");
-
- if (cpuinfo.use_icache)
seq_printf(m,
- "Icache:\t\t%ukB\tline length:\t%dB\n",
- cpuinfo.icache_size >> 10,
- cpuinfo.icache_line_length);
- else
- seq_puts(m, "Icache:\t\tno\n");
+ "Processor: %u\n"
+ "CPU-Family: MicroBlaze\n"
+ "FPGA-Arch: %s\n"
+ "CPU-Ver: %s, %s endian\n"
+ "CPU-MHz: %d.%02d\n"
+ "BogoMips: %lu.%02lu\n",
+ cpu,
+ fpga_family,
+ cpu_ver,
+ cpuinfo->endian ? "little" : "big",
+ cpuinfo->cpu_clock_freq / 1000000,
+ cpuinfo->cpu_clock_freq % 1000000,
+ loops_per_jiffy / (500000 / HZ),
+ (loops_per_jiffy / (5000 / HZ)) % 100);
+
+ seq_printf(m,
+ "HW:\n Shift:\t\t%s\n"
+ " MSR:\t\t%s\n"
+ " PCMP:\t\t%s\n"
+ " DIV:\t\t%s\n",
+ (cpuinfo->use_instr & PVR0_USE_BARREL_MASK) ?
+ "yes" : "no",
+ (cpuinfo->use_instr & PVR2_USE_MSR_INSTR) ?
+ "yes" : "no",
+ (cpuinfo->use_instr & PVR2_USE_PCMP_INSTR) ?
+ "yes" : "no",
+ (cpuinfo->use_instr & PVR0_USE_DIV_MASK) ?
+ "yes" : "no");
+
+ seq_printf(m, " MMU:\t\t%x\n", cpuinfo->mmu);
+
+ seq_printf(m,
+ " MUL:\t\t%s\n"
+ " FPU:\t\t%s\n",
+ (cpuinfo->use_mult & PVR2_USE_MUL64_MASK) ? "v2" :
+ (cpuinfo->use_mult & PVR0_USE_HW_MUL_MASK) ?
+ "v1" : "no",
+ (cpuinfo->use_fpu & PVR2_USE_FPU2_MASK) ? "v2" :
+ (cpuinfo->use_fpu & PVR0_USE_FPU_MASK) ? "v1" : "no");
+
+ seq_printf(m,
+ " Exc:\t\t%s%s%s%s%s%s%s%s\n",
+ (cpuinfo->use_exc & PVR2_OPCODE_0x0_ILL_MASK) ?
+ "op0x0 " : "",
+ (cpuinfo->use_exc & PVR2_UNALIGNED_EXC_MASK) ?
+ "unal " : "",
+ (cpuinfo->use_exc & PVR2_ILL_OPCODE_EXC_MASK) ?
+ "ill " : "",
+ (cpuinfo->use_exc & PVR2_IOPB_BUS_EXC_MASK) ?
+ "iopb " : "",
+ (cpuinfo->use_exc & PVR2_DOPB_BUS_EXC_MASK) ?
+ "dopb " : "",
+ (cpuinfo->use_exc & PVR2_DIV_ZERO_EXC_MASK) ?
+ "zero " : "",
+ (cpuinfo->use_exc & PVR2_FPU_EXC_MASK) ? "fpu " : "",
+ (cpuinfo->use_exc & PVR2_USE_FSL_EXC) ? "fsl " : "");
- if (cpuinfo.use_dcache) {
seq_printf(m,
- "Dcache:\t\t%ukB\tline length:\t%dB\n",
- cpuinfo.dcache_size >> 10,
- cpuinfo.dcache_line_length);
- seq_puts(m, "Dcache-Policy:\t");
- if (cpuinfo.dcache_wb)
- seq_puts(m, "write-back\n");
+ "Stream-insns:\t%sprivileged\n",
+ cpuinfo->mmu_privins ? "un" : "");
+
+ if (cpuinfo->use_icache)
+ seq_printf(m,
+ "Icache:\t\t%ukB\tline length:\t%dB\n",
+ cpuinfo->icache_size >> 10,
+ cpuinfo->icache_line_length);
else
- seq_puts(m, "write-through\n");
- } else {
- seq_puts(m, "Dcache:\t\tno\n");
- }
+ seq_puts(m, "Icache:\t\tno\n");
+
+ if (cpuinfo->use_dcache) {
+ seq_printf(m,
+ "Dcache:\t\t%ukB\tline length:\t%dB\n",
+ cpuinfo->dcache_size >> 10,
+ cpuinfo->dcache_line_length);
+ seq_puts(m, "Dcache-Policy:\t");
+ if (cpuinfo->dcache_wb)
+ seq_puts(m, "write-back\n");
+ else
+ seq_puts(m, "write-through\n");
+ } else {
+ seq_puts(m, "Dcache:\t\tno\n");
+ }
- seq_printf(m,
- "HW-Debug:\t%s\n",
- cpuinfo.hw_debug ? "yes" : "no");
+ seq_printf(m,
+ "HW-Debug:\t%s\n",
+ cpuinfo->hw_debug ? "yes" : "no");
- seq_printf(m,
- "PVR-USR1:\t%02x\n"
- "PVR-USR2:\t%08x\n",
- cpuinfo.pvr_user1,
- cpuinfo.pvr_user2);
+ seq_printf(m,
+ "PVR-USR1:\t%02x\n"
+ "PVR-USR2:\t%08x\n",
+ cpuinfo->pvr_user1,
+ cpuinfo->pvr_user2);
- seq_printf(m, "Page size:\t%lu\n", PAGE_SIZE);
+ seq_printf(m, "Page size:\t%lu\n", PAGE_SIZE);
+ seq_puts(m, "\n");
+ }
return 0;
}
static void *c_start(struct seq_file *m, loff_t *pos)
{
- int i = *pos;
-
- return i < NR_CPUS ? (void *) (i + 1) : NULL;
+ return *pos < 1 ? (void *) 1 : NULL;
}
static void *c_next(struct seq_file *m, void *v, loff_t *pos)
diff --git a/arch/microblaze/mm/consistent.c b/arch/microblaze/mm/consistent.c
index 8c5f0c332d8b..ddccc2a29fbf 100644
--- a/arch/microblaze/mm/consistent.c
+++ b/arch/microblaze/mm/consistent.c
@@ -35,7 +35,7 @@ void arch_dma_prep_coherent(struct page *page, size_t size)
* I have to use dcache values because I can't relate on ram size:
*/
#ifdef CONFIG_XILINX_UNCACHED_SHADOW
-#define UNCACHED_SHADOW_MASK (cpuinfo.dcache_high - cpuinfo.dcache_base + 1)
+#define UNCACHED_SHADOW_MASK (cpuinfo->dcache_high - cpuinfo->dcache_base + 1)
#else
#define UNCACHED_SHADOW_MASK 0
#endif /* CONFIG_XILINX_UNCACHED_SHADOW */
@@ -43,9 +43,11 @@ void arch_dma_prep_coherent(struct page *page, size_t size)
void *uncached_kernel_address(void *ptr)
{
unsigned long addr = (unsigned long)ptr;
+ unsigned int cpu = smp_processor_id();
+ struct cpuinfo *cpuinfo = per_cpu_ptr(&cpu_info, cpu);
addr |= UNCACHED_SHADOW_MASK;
- if (addr > cpuinfo.dcache_base && addr < cpuinfo.dcache_high)
+ if (addr > cpuinfo->dcache_base && addr < cpuinfo->dcache_high)
pr_warn("ERROR: Your cache coherent area is CACHED!!!\n");
return (void *)addr;
}
@@ -53,6 +55,8 @@ void *uncached_kernel_address(void *ptr)
void *cached_kernel_address(void *ptr)
{
unsigned long addr = (unsigned long)ptr;
+ unsigned int cpu = smp_processor_id();
+ struct cpuinfo *cpuinfo = per_cpu_ptr(&cpu_info, cpu);
return (void *)(addr & ~UNCACHED_SHADOW_MASK);
}
--
2.25.0
From: Stefan Asserhall <[email protected]>
There is no need to disable local IRQ for pte update. Use exclusive
load/store instruction for it.
Signed-off-by: Stefan Asserhall <[email protected]>
Signed-off-by: Michal Simek <[email protected]>
---
arch/microblaze/include/asm/pgtable.h | 19 +++++++++----------
1 file changed, 9 insertions(+), 10 deletions(-)
diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h
index 45b30878fc17..cf1e3095514b 100644
--- a/arch/microblaze/include/asm/pgtable.h
+++ b/arch/microblaze/include/asm/pgtable.h
@@ -372,20 +372,19 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
static inline unsigned long pte_update(pte_t *p, unsigned long clr,
unsigned long set)
{
- unsigned long flags, old, tmp;
-
- raw_local_irq_save(flags);
-
- __asm__ __volatile__( "lw %0, %2, r0 \n"
- "andn %1, %0, %3 \n"
- "or %1, %1, %4 \n"
- "sw %1, %2, r0 \n"
+ unsigned long old, tmp;
+
+ __asm__ __volatile__(
+ "1: lwx %0, %2, r0;\n"
+ " andn %1, %0, %3;\n"
+ " or %1, %1, %4;\n"
+ " swx %1, %2, r0;\n"
+ " addic %1, r0, 0;\n"
+ " bnei %1, 1b;\n"
: "=&r" (old), "=&r" (tmp)
: "r" ((unsigned long)(p + 1) - 4), "r" (clr), "r" (set)
: "cc");
- raw_local_irq_restore(flags);
-
return old;
}
--
2.25.0
From: Stefan Asserhall <[email protected]>
Using exclusive loads/stores to implement spinlocks which can be used on
SMP systems.
Signed-off-by: Stefan Asserhall <[email protected]>
Signed-off-by: Michal Simek <[email protected]>
---
arch/microblaze/include/asm/spinlock.h | 240 +++++++++++++++++++
arch/microblaze/include/asm/spinlock_types.h | 25 ++
2 files changed, 265 insertions(+)
create mode 100644 arch/microblaze/include/asm/spinlock.h
create mode 100644 arch/microblaze/include/asm/spinlock_types.h
diff --git a/arch/microblaze/include/asm/spinlock.h b/arch/microblaze/include/asm/spinlock.h
new file mode 100644
index 000000000000..0199ea9f7f0f
--- /dev/null
+++ b/arch/microblaze/include/asm/spinlock.h
@@ -0,0 +1,240 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2013-2020 Xilinx, Inc.
+ */
+
+#ifndef _ASM_MICROBLAZE_SPINLOCK_H
+#define _ASM_MICROBLAZE_SPINLOCK_H
+
+/*
+ * Unlocked value: 0
+ * Locked value: 1
+ */
+#define arch_spin_is_locked(x) (READ_ONCE((x)->lock) != 0)
+
+static inline void arch_spin_lock(arch_spinlock_t *lock)
+{
+ unsigned long tmp;
+
+ __asm__ __volatile__ (
+ /* load conditional address in %1 to %0 */
+ "1: lwx %0, %1, r0;\n"
+ /* not zero? try again */
+ " bnei %0, 1b;\n"
+ /* increment lock by 1 */
+ " addi %0, r0, 1;\n"
+ /* attempt store */
+ " swx %0, %1, r0;\n"
+ /* checking msr carry flag */
+ " addic %0, r0, 0;\n"
+ /* store failed (MSR[C] set)? try again */
+ " bnei %0, 1b;\n"
+ /* Outputs: temp variable for load result */
+ : "=&r" (tmp)
+ /* Inputs: lock address */
+ : "r" (&lock->lock)
+ : "cc", "memory"
+ );
+}
+
+static inline int arch_spin_trylock(arch_spinlock_t *lock)
+{
+ unsigned long prev, tmp;
+
+ __asm__ __volatile__ (
+ /* load conditional address in %2 to %0 */
+ "1: lwx %0, %2, r0;\n"
+ /* not zero? clear reservation */
+ " bneid %0, 2f;\n"
+ /* increment lock by one if lwx was sucessful */
+ " addi %1, r0, 1;\n"
+ /* attempt store */
+ " swx %1, %2, r0;\n"
+ /* checking msr carry flag */
+ " addic %1, r0, 0;\n"
+ /* store failed (MSR[C] set)? try again */
+ " bnei %1, 1b;\n"
+ "2:"
+ /* Outputs: temp variable for load result */
+ : "=&r" (prev), "=&r" (tmp)
+ /* Inputs: lock address */
+ : "r" (&lock->lock)
+ : "cc", "memory"
+ );
+
+ return (prev == 0);
+}
+
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
+{
+ unsigned long tmp;
+
+ __asm__ __volatile__ (
+ /* load conditional address in %1 to %0 */
+ "1: lwx %0, %1, r0;\n"
+ /* clear */
+ " swx r0, %1, r0;\n"
+ /* checking msr carry flag */
+ " addic %0, r0, 0;\n"
+ /* store failed (MSR[C] set)? try again */
+ " bnei %0, 1b;\n"
+ /* Outputs: temp variable for load result */
+ : "=&r" (tmp)
+ /* Inputs: lock address */
+ : "r" (&lock->lock)
+ : "cc", "memory"
+ );
+}
+
+/* RWLOCKS */
+static inline void arch_write_lock(arch_rwlock_t *rw)
+{
+ unsigned long tmp;
+
+ __asm__ __volatile__ (
+ /* load conditional address in %1 to %0 */
+ "1: lwx %0, %1, r0;\n"
+ /* not zero? try again */
+ " bneid %0, 1b;\n"
+ /* set tmp to -1 */
+ " addi %0, r0, -1;\n"
+ /* attempt store */
+ " swx %0, %1, r0;\n"
+ /* checking msr carry flag */
+ " addic %0, r0, 0;\n"
+ /* store failed (MSR[C] set)? try again */
+ " bnei %0, 1b;\n"
+ /* Outputs: temp variable for load result */
+ : "=&r" (tmp)
+ /* Inputs: lock address */
+ : "r" (&rw->lock)
+ : "cc", "memory"
+ );
+}
+
+static inline int arch_write_trylock(arch_rwlock_t *rw)
+{
+ unsigned long prev, tmp;
+
+ __asm__ __volatile__ (
+ /* load conditional address in %1 to tmp */
+ "1: lwx %0, %2, r0;\n"
+ /* not zero? abort */
+ " bneid %0, 2f;\n"
+ /* set tmp to -1 */
+ " addi %1, r0, -1;\n"
+ /* attempt store */
+ " swx %1, %2, r0;\n"
+ /* checking msr carry flag */
+ " addic %1, r0, 0;\n"
+ /* store failed (MSR[C] set)? try again */
+ " bnei %1, 1b;\n"
+ "2:"
+ /* Outputs: temp variable for load result */
+ : "=&r" (prev), "=&r" (tmp)
+ /* Inputs: lock address */
+ : "r" (&rw->lock)
+ : "cc", "memory"
+ );
+ /* prev value should be zero and MSR should be clear */
+ return (prev == 0);
+}
+
+static inline void arch_write_unlock(arch_rwlock_t *rw)
+{
+ unsigned long tmp;
+
+ __asm__ __volatile__ (
+ /* load conditional address in %1 to %0 */
+ "1: lwx %0, %1, r0;\n"
+ /* clear */
+ " swx r0, %1, r0;\n"
+ /* checking msr carry flag */
+ " addic %0, r0, 0;\n"
+ /* store failed (MSR[C] set)? try again */
+ " bnei %0, 1b;\n"
+ /* Outputs: temp variable for load result */
+ : "=&r" (tmp)
+ /* Inputs: lock address */
+ : "r" (&rw->lock)
+ : "cc", "memory"
+ );
+}
+
+/* Read locks */
+static inline void arch_read_lock(arch_rwlock_t *rw)
+{
+ unsigned long tmp;
+
+ __asm__ __volatile__ (
+ /* load conditional address in %1 to %0 */
+ "1: lwx %0, %1, r0;\n"
+ /* < 0 (WRITE LOCK active) try again */
+ " bltid %0, 1b;\n"
+ /* increment lock by 1 if lwx was sucessful */
+ " addi %0, %0, 1;\n"
+ /* attempt store */
+ " swx %0, %1, r0;\n"
+ /* checking msr carry flag */
+ " addic %0, r0, 0;\n"
+ /* store failed (MSR[C] set)? try again */
+ " bnei %0, 1b;\n"
+ /* Outputs: temp variable for load result */
+ : "=&r" (tmp)
+ /* Inputs: lock address */
+ : "r" (&rw->lock)
+ : "cc", "memory"
+ );
+}
+
+static inline void arch_read_unlock(arch_rwlock_t *rw)
+{
+ unsigned long tmp;
+
+ __asm__ __volatile__ (
+ /* load conditional address in %1 to tmp */
+ "1: lwx %0, %1, r0;\n"
+ /* tmp = tmp - 1 */
+ " addi %0, %0, -1;\n"
+ /* attempt store */
+ " swx %0, %1, r0;\n"
+ /* checking msr carry flag */
+ " addic %0, r0, 0;\n"
+ /* store failed (MSR[C] set)? try again */
+ " bnei %0, 1b;\n"
+ /* Outputs: temp variable for load result */
+ : "=&r" (tmp)
+ /* Inputs: lock address */
+ : "r" (&rw->lock)
+ : "cc", "memory"
+ );
+}
+
+static inline int arch_read_trylock(arch_rwlock_t *rw)
+{
+ unsigned long prev, tmp;
+
+ __asm__ __volatile__ (
+ /* load conditional address in %1 to %0 */
+ "1: lwx %0, %2, r0;\n"
+ /* < 0 bail, release lock */
+ " bltid %0, 2f;\n"
+ /* increment lock by 1 */
+ " addi %1, %0, 1;\n"
+ /* attempt store */
+ " swx %1, %2, r0;\n"
+ /* checking msr carry flag */
+ " addic %1, r0, 0;\n"
+ /* store failed (MSR[C] set)? try again */
+ " bnei %1, 1b;\n"
+ "2:"
+ /* Outputs: temp variable for load result */
+ : "=&r" (prev), "=&r" (tmp)
+ /* Inputs: lock address */
+ : "r" (&rw->lock)
+ : "cc", "memory"
+ );
+ return (prev >= 0);
+}
+
+#endif /* _ASM_MICROBLAZE_SPINLOCK_H */
diff --git a/arch/microblaze/include/asm/spinlock_types.h b/arch/microblaze/include/asm/spinlock_types.h
new file mode 100644
index 000000000000..ffd3588f6546
--- /dev/null
+++ b/arch/microblaze/include/asm/spinlock_types.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2013-2020 Xilinx, Inc.
+ */
+
+#ifndef __ASM_MICROBLAZE_SPINLOCK_TYPES_H
+#define __ASM_MICROBLAZE_SPINLOCK_TYPES_H
+
+#ifndef __LINUX_SPINLOCK_TYPES_H
+# error "please don't include this file directly"
+#endif
+
+typedef struct {
+ volatile unsigned int lock;
+} arch_spinlock_t;
+
+#define __ARCH_SPIN_LOCK_UNLOCKED { 0 }
+
+typedef struct {
+ volatile signed int lock;
+} arch_rwlock_t;
+
+#define __ARCH_RW_LOCK_UNLOCKED { 0 }
+
+#endif
--
2.25.0
From: Stefan Asserhall load and store <[email protected]>
Implement SMP aware atomic operations.
Signed-off-by: Stefan Asserhall <[email protected]>
Signed-off-by: Michal Simek <[email protected]>
---
arch/microblaze/include/asm/atomic.h | 265 +++++++++++++++++++++++++--
1 file changed, 253 insertions(+), 12 deletions(-)
diff --git a/arch/microblaze/include/asm/atomic.h b/arch/microblaze/include/asm/atomic.h
index 41e9aff23a62..522d704fad63 100644
--- a/arch/microblaze/include/asm/atomic.h
+++ b/arch/microblaze/include/asm/atomic.h
@@ -1,28 +1,269 @@
/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2013-2020 Xilinx, Inc.
+ */
+
#ifndef _ASM_MICROBLAZE_ATOMIC_H
#define _ASM_MICROBLAZE_ATOMIC_H
+#include <linux/types.h>
#include <asm/cmpxchg.h>
-#include <asm-generic/atomic.h>
-#include <asm-generic/atomic64.h>
+
+#define ATOMIC_INIT(i) { (i) }
+
+#define atomic_read(v) READ_ONCE((v)->counter)
+
+static inline void atomic_set(atomic_t *v, int i)
+{
+ int result, tmp;
+
+ __asm__ __volatile__ (
+ /* load conditional address in %2 to %0 */
+ "1: lwx %0, %2, r0;\n"
+ /* attempt store */
+ " swx %3, %2, r0;\n"
+ /* checking msr carry flag */
+ " addic %1, r0, 0;\n"
+ /* store failed (MSR[C] set)? try again */
+ " bnei %1, 1b;\n"
+ /* Outputs: result value */
+ : "=&r" (result), "=&r" (tmp)
+ /* Inputs: counter address */
+ : "r" (&v->counter), "r" (i)
+ : "cc", "memory"
+ );
+}
+#define atomic_set atomic_set
+
+/* Atomically perform op with v->counter and i, return result */
+#define ATOMIC_OP_RETURN(op, asm) \
+static inline int atomic_##op##_return_relaxed(int i, atomic_t *v) \
+{ \
+ int result, tmp; \
+ \
+ __asm__ __volatile__ ( \
+ /* load conditional address in %2 to %0 */ \
+ "1: lwx %0, %2, r0;\n" \
+ /* perform operation and save it to result */ \
+ #asm " %0, %3, %0;\n" \
+ /* attempt store */ \
+ " swx %0, %2, r0;\n" \
+ /* checking msr carry flag */ \
+ " addic %1, r0, 0;\n" \
+ /* store failed (MSR[C] set)? try again */ \
+ " bnei %1, 1b;\n" \
+ /* Outputs: result value */ \
+ : "=&r" (result), "=&r" (tmp) \
+ /* Inputs: counter address */ \
+ : "r" (&v->counter), "r" (i) \
+ : "cc", "memory" \
+ ); \
+ \
+ return result; \
+} \
+ \
+static inline void atomic_##op(int i, atomic_t *v) \
+{ \
+ atomic_##op##_return_relaxed(i, v); \
+}
+
+/* Atomically perform op with v->counter and i, return orig v->counter */
+#define ATOMIC_FETCH_OP_RELAXED(op, asm) \
+static inline int atomic_fetch_##op##_relaxed(int i, atomic_t *v) \
+{ \
+ int old, tmp; \
+ \
+ __asm__ __volatile__ ( \
+ /* load conditional address in %2 to %0 */ \
+ "1: lwx %0, %2, r0;\n" \
+ /* perform operation and save it to tmp */ \
+ #asm " %1, %3, %0;\n" \
+ /* attempt store */ \
+ " swx %1, %2, r0;\n" \
+ /* checking msr carry flag */ \
+ " addic %1, r0, 0;\n" \
+ /* store failed (MSR[C] set)? try again */ \
+ " bnei %1, 1b;\n" \
+ /* Outputs: old value */ \
+ : "=&r" (old), "=&r" (tmp) \
+ /* Inputs: counter address */ \
+ : "r" (&v->counter), "r" (i) \
+ : "cc", "memory" \
+ ); \
+ \
+ return old; \
+}
+
+#define ATOMIC_OPS(op, asm) \
+ ATOMIC_FETCH_OP_RELAXED(op, asm) \
+ ATOMIC_OP_RETURN(op, asm)
+
+ATOMIC_OPS(and, and)
+#define atomic_and atomic_and
+#define atomic_and_return_relaxed atomic_and_return_relaxed
+#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed
+
+ATOMIC_OPS(add, add)
+#define atomic_add atomic_add
+#define atomic_add_return_relaxed atomic_add_return_relaxed
+#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed
+
+ATOMIC_OPS(xor, xor)
+#define atomic_xor atomic_xor
+#define atomic_xor_return_relaxed atomic_xor_return_relaxed
+#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed
+
+ATOMIC_OPS(or, or)
+#define atomic_or atomic_or
+#define atomic_or_return_relaxed atomic_or_return_relaxed
+#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed
+
+ATOMIC_OPS(sub, rsub)
+#define atomic_sub atomic_sub
+#define atomic_sub_return_relaxed atomic_sub_return_relaxed
+#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed
+
+static inline int atomic_inc_return_relaxed(atomic_t *v)
+{
+ int result, tmp;
+
+ __asm__ __volatile__ (
+ /* load conditional address in %2 to %0 */
+ "1: lwx %0, %2, r0;\n"
+ /* increment counter by 1 */
+ " addi %0, %0, 1;\n"
+ /* attempt store */
+ " swx %0, %2, r0;\n"
+ /* checking msr carry flag */
+ " addic %1, r0, 0;\n"
+ /* store failed (MSR[C] set)? try again */
+ " bnei %1, 1b;\n"
+ /* Outputs: result value */
+ : "=&r" (result), "=&r" (tmp)
+ /* Inputs: counter address */
+ : "r" (&v->counter)
+ : "cc", "memory"
+ );
+
+ return result;
+}
+#define atomic_inc_return_relaxed atomic_inc_return_relaxed
+
+#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0)
+
+static inline int atomic_dec_return(atomic_t *v)
+{
+ int result, tmp;
+
+ __asm__ __volatile__ (
+ /* load conditional address in %2 to %0 */
+ "1: lwx %0, %2, r0;\n"
+ /* increment counter by -1 */
+ " addi %0, %0, -1;\n"
+ /* attempt store */
+ " swx %0, %2, r0;\n"
+ /* checking msr carry flag */
+ " addic %1, r0, 0;\n"
+ /* store failed (MSR[C] set)? try again */
+ " bnei %1, 1b;\n"
+ /* Outputs: result value */
+ : "=&r" (result), "=&r" (tmp)
+ /* Inputs: counter address */
+ : "r" (&v->counter)
+ : "cc", "memory"
+ );
+
+ return result;
+}
+#define atomic_dec_return atomic_dec_return
+
+static inline void atomic_dec(atomic_t *v)
+{
+ atomic_dec_return(v);
+}
+#define atomic_dec atomic_dec
+
+#define atomic_sub_and_test(a, v) (atomic_sub_return((a), (v)) == 0)
+#define atomic_dec_and_test(v) (atomic_dec_return((v)) == 0)
+
+#define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+
+/**
+ * atomic_add_unless - add unless the number is a given value
+ * @v: pointer of type atomic_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, so long as it was not @u.
+ * Returns the old value of @v.
+ */
+static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+{
+ int result, tmp;
+
+ __asm__ __volatile__ (
+ /* load conditional address in %2 to %0 */
+ "1: lwx %0, %2, r0;\n"
+ /* compare loaded value with old value*/
+ " cmp %1, %0, %3;\n"
+ /* equal to u, don't increment */
+ " beqid %1, 2f;\n"
+ /* increment counter by i */
+ " add %1, %0, %4;\n"
+ /* attempt store of new value*/
+ " swx %1, %2, r0;\n"
+ /* checking msr carry flag */
+ " addic %1, r0, 0;\n"
+ /* store failed (MSR[C] set)? try again */
+ " bnei %1, 1b;\n"
+ "2:"
+ /* Outputs: result value */
+ : "=&r" (result), "=&r" (tmp)
+ /* Inputs: counter address, old, new */
+ : "r" (&v->counter), "r" (u), "r" (a)
+ : "cc", "memory"
+ );
+
+ return result;
+}
/*
* Atomically test *v and decrement if it is greater than 0.
- * The function returns the old value of *v minus 1.
+ * The function returns the old value of *v minus 1, even if
+ * the atomic variable, v, was not decremented.
*/
static inline int atomic_dec_if_positive(atomic_t *v)
{
- unsigned long flags;
- int res;
+ int result, tmp;
- local_irq_save(flags);
- res = v->counter - 1;
- if (res >= 0)
- v->counter = res;
- local_irq_restore(flags);
+ __asm__ __volatile__ (
+ /* load conditional address in %2 to %0 */
+ "1: lwx %0, %2, r0;\n"
+ /* decrement counter by 1*/
+ " addi %0, %0, -1;\n"
+ /* if < 0 abort (*v was <= 0)*/
+ " blti %0, 2f;\n"
+ /* attempt store of new value*/
+ " swx %0, %2, r0;\n"
+ /* checking msr carry flag */
+ " addic %1, r0, 0;\n"
+ /* store failed (MSR[C] set)? try again */
+ " bnei %1, 1b;\n"
+ "2: "
+ /* Outputs: result value */
+ : "=&r" (result), "=&r" (tmp)
+ /* Inputs: counter address */
+ : "r" (&v->counter)
+ : "cc", "memory"
+ );
- return res;
+ return result;
}
-#define atomic_dec_if_positive atomic_dec_if_positive
+#define atomic_dec_if_positive atomic_dec_if_positive
+
+#define atomic_add_negative(i, v) (atomic_add_return(i, v) < 0)
+
+#include <asm-generic/atomic64.h>
#endif /* _ASM_MICROBLAZE_ATOMIC_H */
--
2.25.0
From: Stefan Asserhall <[email protected]>
For SMP based system there is a need to have proper bit operations.
Microblaze is using exclusive load and store instructions.
Signed-off-by: Stefan Asserhall <[email protected]>
Signed-off-by: Michal Simek <[email protected]>
---
arch/microblaze/include/asm/Kbuild | 1 -
arch/microblaze/include/asm/bitops.h | 189 +++++++++++++++++++++++++++
2 files changed, 189 insertions(+), 1 deletion(-)
create mode 100644 arch/microblaze/include/asm/bitops.h
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index abb33619299b..0da195308ac9 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -1,6 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
generated-y += syscall_table.h
-generic-y += bitops.h
generic-y += bug.h
generic-y += bugs.h
generic-y += compat.h
diff --git a/arch/microblaze/include/asm/bitops.h b/arch/microblaze/include/asm/bitops.h
new file mode 100644
index 000000000000..a4f5ca09850f
--- /dev/null
+++ b/arch/microblaze/include/asm/bitops.h
@@ -0,0 +1,189 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Microblaze atomic bit operations.
+ *
+ * Copyright (C) 2013 - 2020 Xilinx, Inc.
+ *
+ * Merged version by David Gibson <[email protected]>.
+ * Based on ppc64 versions by: Dave Engebretsen, Todd Inglett, Don
+ * Reed, Pat McCarthy, Peter Bergner, Anton Blanchard. They
+ * originally took it from the ppc32 code.
+ *
+ * Within a word, bits are numbered LSB first. Lot's of places make
+ * this assumption by directly testing bits with (val & (1<<nr)).
+ * This can cause confusion for large (> 1 word) bitmaps on a
+ * big-endian system because, unlike little endian, the number of each
+ * bit depends on the word size.
+ *
+ * The bitop functions are defined to work on unsigned longs, so for a
+ * ppc64 system the bits end up numbered:
+ * |63..............0|127............64|191...........128|255...........196|
+ * and on ppc32:
+ * |31.....0|63....31|95....64|127...96|159..128|191..160|223..192|255..224|
+ *
+ * There are a few little-endian macros used mostly for filesystem
+ * bitmaps, these work on similar bit arrays layouts, but
+ * byte-oriented:
+ * |7...0|15...8|23...16|31...24|39...32|47...40|55...48|63...56|
+ *
+ * The main difference is that bit 3-5 (64b) or 3-4 (32b) in the bit
+ * number field needs to be reversed compared to the big-endian bit
+ * fields. This can be achieved by XOR with 0x38 (64b) or 0x18 (32b).
+ */
+
+#ifndef _ASM_MICROBLAZE_BITOPS_H
+#define _ASM_MICROBLAZE_BITOPS_H
+
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
+#include <asm/types.h>
+#include <linux/compiler.h>
+#include <asm/asm-compat.h>
+#include <linux/stringify.h>
+
+/*
+ * clear_bit doesn't imply a memory barrier
+ */
+#define smp_mb__before_clear_bit() smp_mb()
+#define smp_mb__after_clear_bit() smp_mb()
+
+#define BITOP_MASK(nr) (1UL << ((nr) % BITS_PER_LONG))
+#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG)
+
+/* Macro for generating the ***_bits() functions */
+#define DEFINE_BITOP(fn, op) \
+static inline void fn(unsigned long mask, volatile unsigned long *_p) \
+{ \
+ unsigned long tmp; \
+ unsigned long *p = (unsigned long *)_p; \
+ \
+ __asm__ __volatile__ ( \
+ /* load conditional address in %2 to %0 */ \
+ "1: lwx %0, %3, r0;\n" \
+ /* perform bit operation with mask */ \
+ stringify_in_c(op)" %0, %0, %2;\n" \
+ /* attempt store */ \
+ " swx %0, %3, r0;\n" \
+ /* checking msr carry flag */ \
+ " addic %0, r0, 0;\n" \
+ /* store failed (MSR[C] set)? try again */ \
+ " bnei %0, 1b;\n" \
+ : "=&r" (tmp), "+m" (*p) /* Outputs: tmp, p */ \
+ : "r" (mask), "r" (p) /* Inputs: mask, p */ \
+ : "cc", "memory" \
+ ); \
+}
+
+DEFINE_BITOP(set_bits, or)
+DEFINE_BITOP(clear_bits, andn)
+DEFINE_BITOP(clear_bits_unlock, andn)
+DEFINE_BITOP(change_bits, xor)
+
+static inline void set_bit(int nr, volatile unsigned long *addr)
+{
+ set_bits(BITOP_MASK(nr), addr + BITOP_WORD(nr));
+}
+
+static inline void clear_bit(int nr, volatile unsigned long *addr)
+{
+ clear_bits(BITOP_MASK(nr), addr + BITOP_WORD(nr));
+}
+
+static inline void clear_bit_unlock(int nr, volatile unsigned long *addr)
+{
+ clear_bits_unlock(BITOP_MASK(nr), addr + BITOP_WORD(nr));
+}
+
+static inline void change_bit(int nr, volatile unsigned long *addr)
+{
+ change_bits(BITOP_MASK(nr), addr + BITOP_WORD(nr));
+}
+
+/*
+ * Like DEFINE_BITOP(), with changes to the arguments to 'op' and the output
+ * operands.
+ */
+#define DEFINE_TESTOP(fn, op) \
+static inline unsigned long fn(unsigned long mask, \
+ volatile unsigned long *_p) \
+{ \
+ unsigned long old, tmp; \
+ unsigned long *p = (unsigned long *)_p; \
+ \
+ __asm__ __volatile__ ( \
+ /* load conditional address in %4 to %0 */ \
+ "1: lwx %0, %4, r0;\n" \
+ /* perform bit operation with mask */ \
+ stringify_in_c(op)" %1, %0, %3;\n" \
+ /* attempt store */ \
+ " swx %1, %4, r0;\n" \
+ /* checking msr carry flag */ \
+ " addic %1, r0, 0;\n" \
+ /* store failed (MSR[C] set)? try again */ \
+ " bnei %1, 1b;\n" \
+ /* Outputs: old, tmp, p */ \
+ : "=&r" (old), "=&r" (tmp), "+m" (*p) \
+ /* Inputs: mask, p */ \
+ : "r" (mask), "r" (p) \
+ : "cc", "memory" \
+ ); \
+ return (old & mask); \
+}
+
+DEFINE_TESTOP(test_and_set_bits, or)
+DEFINE_TESTOP(test_and_set_bits_lock, or)
+DEFINE_TESTOP(test_and_clear_bits, andn)
+DEFINE_TESTOP(test_and_change_bits, xor)
+
+static inline int test_and_set_bit(unsigned long nr,
+ volatile unsigned long *addr)
+{
+ return test_and_set_bits(BITOP_MASK(nr), addr + BITOP_WORD(nr)) != 0;
+}
+
+static inline int test_and_set_bit_lock(unsigned long nr,
+ volatile unsigned long *addr)
+{
+ return test_and_set_bits_lock(BITOP_MASK(nr),
+ addr + BITOP_WORD(nr)) != 0;
+}
+
+static inline int test_and_clear_bit(unsigned long nr,
+ volatile unsigned long *addr)
+{
+ return test_and_clear_bits(BITOP_MASK(nr), addr + BITOP_WORD(nr)) != 0;
+}
+
+static inline int test_and_change_bit(unsigned long nr,
+ volatile unsigned long *addr)
+{
+ return test_and_change_bits(BITOP_MASK(nr), addr + BITOP_WORD(nr)) != 0;
+}
+
+#include <asm-generic/bitops/non-atomic.h>
+
+static inline void __clear_bit_unlock(int nr, volatile unsigned long *addr)
+{
+ __clear_bit(nr, addr);
+}
+
+#include <asm-generic/bitops/ffz.h>
+#include <asm-generic/bitops/__fls.h>
+#include <asm-generic/bitops/__ffs.h>
+#include <asm-generic/bitops/fls.h>
+#include <asm-generic/bitops/ffs.h>
+#include <asm-generic/bitops/hweight.h>
+#include <asm-generic/bitops/find.h>
+#include <asm-generic/bitops/fls64.h>
+
+/* Little-endian versions */
+#include <asm-generic/bitops/le.h>
+
+/* Bitmap functions for the ext2 filesystem */
+#include <asm-generic/bitops/ext2-atomic-setbit.h>
+
+#include <asm-generic/bitops/sched.h>
+
+#endif /* _ASM_MICROBLAZE_BITOPS_H */
--
2.25.0
There is no longer valid that timer runs at the same frequency as cpu.
That's why don't use cpuinfo setup for timer clock base. Better is to error
out instead of using incorrect frequency.
Signed-off-by: Michal Simek <[email protected]>
Reviewed-by: Stefan Asserhall <[email protected]>
---
arch/microblaze/kernel/timer.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/microblaze/kernel/timer.c b/arch/microblaze/kernel/timer.c
index a6683484b3a1..65bce785e6ed 100644
--- a/arch/microblaze/kernel/timer.c
+++ b/arch/microblaze/kernel/timer.c
@@ -304,7 +304,7 @@ static int __init xilinx_timer_init(struct device_node *timer)
if (!timer_clock_freq) {
pr_err("ERROR: Using CPU clock frequency\n");
- timer_clock_freq = cpuinfo.cpu_clock_freq;
+ return -EINVAL;
}
freq_div_hz = timer_clock_freq / HZ;
--
2.25.0
From: Stefan Asserhall <[email protected]>
Microblaze support only 32bit loads and stores that's why only 4 byte
operations are supported by SMP.
Signed-off-by: Stefan Asserhall <[email protected]>
Signed-off-by: Michal Simek <[email protected]>
---
arch/microblaze/include/asm/cmpxchg.h | 87 +++++++++++++++++++++++++++
1 file changed, 87 insertions(+)
diff --git a/arch/microblaze/include/asm/cmpxchg.h b/arch/microblaze/include/asm/cmpxchg.h
index 3523b51aab36..0c24ac37df7f 100644
--- a/arch/microblaze/include/asm/cmpxchg.h
+++ b/arch/microblaze/include/asm/cmpxchg.h
@@ -4,6 +4,93 @@
#ifndef CONFIG_SMP
# include <asm-generic/cmpxchg.h>
+#else
+
+extern void __xchg_called_with_bad_pointer(void);
+
+static inline unsigned long __xchg_u32(volatile void *p, unsigned long val)
+{
+ unsigned long prev, temp;
+
+ __asm__ __volatile__ (
+ /* load conditional address in %3 to %0 */
+ "1: lwx %0, %3, r0;\n"
+ /* attempt store of new value */
+ " swx %4, %3, r0;\n"
+ /* checking msr carry flag */
+ " addic %1, r0, 0;\n"
+ /* store failed (MSR[C] set)? try again */
+ " bnei %1, 1b;\n"
+ /* Outputs: result value */
+ : "=&r" (prev), "=&r" (temp), "+m" (*(volatile unsigned int *)p)
+ /* Inputs: counter address */
+ : "r" (p), "r" (val)
+ : "cc", "memory"
+ );
+
+ return prev;
+}
+
+static inline unsigned long __xchg(unsigned long x, volatile void *ptr,
+ int size)
+{
+ if (size == 4)
+ return __xchg_u32(ptr, x);
+
+ __xchg_called_with_bad_pointer();
+ return x;
+}
+
+#define xchg(ptr, x) ({ \
+ ((__typeof__(*(ptr))) \
+ __xchg((unsigned long)(x), (ptr), sizeof(*(ptr)))); \
+})
+
+static inline unsigned long __cmpxchg_u32(volatile unsigned int *p,
+ unsigned long old, unsigned long new)
+{
+ int result, tmp;
+
+ __asm__ __volatile__ (
+ /* load conditional address in %3 to %0 */
+ "1: lwx %0, %3, r0;\n"
+ /* compare loaded value with old value */
+ " cmp %2, %0, %4;\n"
+ /* not equal to old value, write old value */
+ " bnei %2, 2f;\n"
+ /* attempt store of new value*/
+ " swx %5, %3, r0;\n"
+ /* checking msr carry flag */
+ " addic %2, r0, 0;\n"
+ /* store failed (MSR[C] set)? try again */
+ " bnei %2, 1b;\n"
+ "2: "
+ /* Outputs : result value */
+ : "=&r" (result), "+m" (*p), "=&r" (tmp)
+ /* Inputs : counter address, old, new */
+ : "r" (p), "r" (old), "r" (new), "r" (&tmp)
+ : "cc", "memory"
+ );
+
+ return result;
+}
+
+static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
+ unsigned long new, unsigned int size)
+{
+ if (size == 4)
+ return __cmpxchg_u32(ptr, old, new);
+
+ __xchg_called_with_bad_pointer();
+ return old;
+}
+
+#define cmpxchg(ptr, o, n) ({ \
+ ((__typeof__(*(ptr)))__cmpxchg((ptr), (unsigned long)(o), \
+ (unsigned long)(n), sizeof(*(ptr)))); \
+})
+
+
#endif
#endif /* _ASM_MICROBLAZE_CMPXCHG_H */
--
2.25.0
On Wed, Feb 12, 2020 at 04:42:28PM +0100, Michal Simek wrote:
> From: Stefan Asserhall <[email protected]>
>
> Using exclusive loads/stores to implement spinlocks which can be used on
> SMP systems.
>
> Signed-off-by: Stefan Asserhall <[email protected]>
> Signed-off-by: Michal Simek <[email protected]>
> ---
>
> arch/microblaze/include/asm/spinlock.h | 240 +++++++++++++++++++
> arch/microblaze/include/asm/spinlock_types.h | 25 ++
> 2 files changed, 265 insertions(+)
> create mode 100644 arch/microblaze/include/asm/spinlock.h
> create mode 100644 arch/microblaze/include/asm/spinlock_types.h
>
> diff --git a/arch/microblaze/include/asm/spinlock.h b/arch/microblaze/include/asm/spinlock.h
> new file mode 100644
> index 000000000000..0199ea9f7f0f
> --- /dev/null
> +++ b/arch/microblaze/include/asm/spinlock.h
> @@ -0,0 +1,240 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright (C) 2013-2020 Xilinx, Inc.
> + */
> +
> +#ifndef _ASM_MICROBLAZE_SPINLOCK_H
> +#define _ASM_MICROBLAZE_SPINLOCK_H
> +
> +/*
> + * Unlocked value: 0
> + * Locked value: 1
> + */
> +#define arch_spin_is_locked(x) (READ_ONCE((x)->lock) != 0)
> +
> +static inline void arch_spin_lock(arch_spinlock_t *lock)
> +{
> + unsigned long tmp;
> +
> + __asm__ __volatile__ (
> + /* load conditional address in %1 to %0 */
> + "1: lwx %0, %1, r0;\n"
> + /* not zero? try again */
> + " bnei %0, 1b;\n"
> + /* increment lock by 1 */
> + " addi %0, r0, 1;\n"
> + /* attempt store */
> + " swx %0, %1, r0;\n"
> + /* checking msr carry flag */
> + " addic %0, r0, 0;\n"
> + /* store failed (MSR[C] set)? try again */
> + " bnei %0, 1b;\n"
> + /* Outputs: temp variable for load result */
> + : "=&r" (tmp)
> + /* Inputs: lock address */
> + : "r" (&lock->lock)
> + : "cc", "memory"
> + );
> +}
That's a test-and-set spinlock if I read it correctly. Why? that's the
worst possible spinlock implementation possible.
On Wed, Feb 12, 2020 at 04:42:25PM +0100, Michal Simek wrote:
> From: Stefan Asserhall <[email protected]>
>
> For SMP based system there is a need to have proper bit operations.
> Microblaze is using exclusive load and store instructions.
>
> Signed-off-by: Stefan Asserhall <[email protected]>
> Signed-off-by: Michal Simek <[email protected]>
> +/*
> + * clear_bit doesn't imply a memory barrier
> + */
> +#define smp_mb__before_clear_bit() smp_mb()
> +#define smp_mb__after_clear_bit() smp_mb()
These macros no longer exist.
Also, might I draw your attention to:
include/asm-generic/bitops/atomic.h
This being a ll/sc arch, I'm thinking that if you do your atomic_t
implementation right, the generic atomic bitop code should be near
optimal.
On Wed, Feb 12, 2020 at 04:42:29PM +0100, Michal Simek wrote:
> From: Stefan Asserhall load and store <[email protected]>
>
> Implement SMP aware atomic operations.
>
> Signed-off-by: Stefan Asserhall <[email protected]>
> Signed-off-by: Michal Simek <[email protected]>
> ---
>
> arch/microblaze/include/asm/atomic.h | 265 +++++++++++++++++++++++++--
> 1 file changed, 253 insertions(+), 12 deletions(-)
>
> diff --git a/arch/microblaze/include/asm/atomic.h b/arch/microblaze/include/asm/atomic.h
> index 41e9aff23a62..522d704fad63 100644
> --- a/arch/microblaze/include/asm/atomic.h
> +++ b/arch/microblaze/include/asm/atomic.h
> @@ -1,28 +1,269 @@
> /* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright (C) 2013-2020 Xilinx, Inc.
> + */
> +
> #ifndef _ASM_MICROBLAZE_ATOMIC_H
> #define _ASM_MICROBLAZE_ATOMIC_H
>
> +#include <linux/types.h>
> #include <asm/cmpxchg.h>
> -#include <asm-generic/atomic.h>
> -#include <asm-generic/atomic64.h>
> +
> +#define ATOMIC_INIT(i) { (i) }
> +
> +#define atomic_read(v) READ_ONCE((v)->counter)
> +
> +static inline void atomic_set(atomic_t *v, int i)
> +{
> + int result, tmp;
> +
> + __asm__ __volatile__ (
> + /* load conditional address in %2 to %0 */
> + "1: lwx %0, %2, r0;\n"
> + /* attempt store */
> + " swx %3, %2, r0;\n"
> + /* checking msr carry flag */
> + " addic %1, r0, 0;\n"
> + /* store failed (MSR[C] set)? try again */
> + " bnei %1, 1b;\n"
> + /* Outputs: result value */
> + : "=&r" (result), "=&r" (tmp)
> + /* Inputs: counter address */
> + : "r" (&v->counter), "r" (i)
> + : "cc", "memory"
> + );
> +}
> +#define atomic_set atomic_set
Uuuuhh.. *what* ?!?
Are you telling me your LL/SC implementation is so bugger that
atomic_set() being a WRITE_ONCE() does not in fact work?
On Wed, Feb 12, 2020 at 04:42:22PM +0100, Michal Simek wrote:
> Microblaze has 32bit exclusive load/store instructions which should be used
> instead of irq enable/disable. For more information take a look at
> https://www.xilinx.com/support/documentation/sw_manuals/xilinx2019_2/ug984-vivado-microblaze-ref.pdf
> starting from page 25.
> arch/microblaze/include/asm/Kbuild | 1 -
> arch/microblaze/include/asm/atomic.h | 265 ++++++++++++++++++-
> arch/microblaze/include/asm/bitops.h | 189 +++++++++++++
> arch/microblaze/include/asm/cmpxchg.h | 87 ++++++
> arch/microblaze/include/asm/cpuinfo.h | 2 +-
> arch/microblaze/include/asm/pgtable.h | 19 +-
> arch/microblaze/include/asm/spinlock.h | 240 +++++++++++++++++
> arch/microblaze/include/asm/spinlock_types.h | 25 ++
> arch/microblaze/kernel/cpu/cache.c | 154 ++++++-----
> arch/microblaze/kernel/cpu/cpuinfo.c | 38 ++-
> arch/microblaze/kernel/cpu/mb.c | 207 ++++++++-------
> arch/microblaze/kernel/timer.c | 2 +-
> arch/microblaze/mm/consistent.c | 8 +-
> 13 files changed, 1040 insertions(+), 197 deletions(-)
> create mode 100644 arch/microblaze/include/asm/bitops.h
> create mode 100644 arch/microblaze/include/asm/spinlock.h
> create mode 100644 arch/microblaze/include/asm/spinlock_types.h
I'm missing asm/barrier.h
Also that PDF (thanks for that!), seems light on memory ordering
details.
Your comment:
+/*
+ * clear_bit doesn't imply a memory barrier
+ */
worries me, because that would imply your ll/sc does not impose order,
but then you also don't have any explicit barriers in your locking
primitives or atomics where required.
In the PDF I only find MBAR; is that what smp_mb() ends up being?
On Wed, Feb 12, 2020 at 05:08:52PM +0100, Peter Zijlstra wrote:
> On Wed, Feb 12, 2020 at 04:42:22PM +0100, Michal Simek wrote:
>
> > Microblaze has 32bit exclusive load/store instructions which should be used
> > instead of irq enable/disable. For more information take a look at
> > https://www.xilinx.com/support/documentation/sw_manuals/xilinx2019_2/ug984-vivado-microblaze-ref.pdf
> > starting from page 25.
>
> > arch/microblaze/include/asm/Kbuild | 1 -
> > arch/microblaze/include/asm/atomic.h | 265 ++++++++++++++++++-
> > arch/microblaze/include/asm/bitops.h | 189 +++++++++++++
> > arch/microblaze/include/asm/cmpxchg.h | 87 ++++++
> > arch/microblaze/include/asm/cpuinfo.h | 2 +-
> > arch/microblaze/include/asm/pgtable.h | 19 +-
> > arch/microblaze/include/asm/spinlock.h | 240 +++++++++++++++++
> > arch/microblaze/include/asm/spinlock_types.h | 25 ++
> > arch/microblaze/kernel/cpu/cache.c | 154 ++++++-----
> > arch/microblaze/kernel/cpu/cpuinfo.c | 38 ++-
> > arch/microblaze/kernel/cpu/mb.c | 207 ++++++++-------
> > arch/microblaze/kernel/timer.c | 2 +-
> > arch/microblaze/mm/consistent.c | 8 +-
> > 13 files changed, 1040 insertions(+), 197 deletions(-)
> > create mode 100644 arch/microblaze/include/asm/bitops.h
> > create mode 100644 arch/microblaze/include/asm/spinlock.h
> > create mode 100644 arch/microblaze/include/asm/spinlock_types.h
>
> I'm missing asm/barrier.h
>
> Also that PDF (thanks for that!), seems light on memory ordering
> details.
>
> Your comment:
>
> +/*
> + * clear_bit doesn't imply a memory barrier
> + */
>
> worries me, because that would imply your ll/sc does not impose order,
> but then you also don't have any explicit barriers in your locking
> primitives or atomics where required.
>
> In the PDF I only find MBAR; is that what smp_mb() ends up being?
Bah, I'm sure I did patches at some point that made
asm-generic/barrier.h #error if you didn't define at least one memory
barrier on CONFIG_SMP, but it seems that all got lost somewhere.
On Wed, Feb 12, 2020 at 4:42 PM Michal Simek <[email protected]> wrote:
> static void __flush_icache_range_msr_irq(unsigned long start, unsigned long end)
> {
> + unsigned int cpu = smp_processor_id();
> + struct cpuinfo *cpuinfo = per_cpu_ptr(&cpu_info, cpu);
I think all the instances of smp_processor_id()/per_cpu_ptr() should
be replaced with get_cpu_ptr()/put_cpu_ptr(). Same for per_cpu_var()
-> get_cpu_var().
Arnd
On 12. 02. 20 17:08, Peter Zijlstra wrote:
> On Wed, Feb 12, 2020 at 04:42:22PM +0100, Michal Simek wrote:
>
>> Microblaze has 32bit exclusive load/store instructions which should be used
>> instead of irq enable/disable. For more information take a look at
>> https://www.xilinx.com/support/documentation/sw_manuals/xilinx2019_2/ug984-vivado-microblaze-ref.pdf
>> starting from page 25.
>
>> arch/microblaze/include/asm/Kbuild | 1 -
>> arch/microblaze/include/asm/atomic.h | 265 ++++++++++++++++++-
>> arch/microblaze/include/asm/bitops.h | 189 +++++++++++++
>> arch/microblaze/include/asm/cmpxchg.h | 87 ++++++
>> arch/microblaze/include/asm/cpuinfo.h | 2 +-
>> arch/microblaze/include/asm/pgtable.h | 19 +-
>> arch/microblaze/include/asm/spinlock.h | 240 +++++++++++++++++
>> arch/microblaze/include/asm/spinlock_types.h | 25 ++
>> arch/microblaze/kernel/cpu/cache.c | 154 ++++++-----
>> arch/microblaze/kernel/cpu/cpuinfo.c | 38 ++-
>> arch/microblaze/kernel/cpu/mb.c | 207 ++++++++-------
>> arch/microblaze/kernel/timer.c | 2 +-
>> arch/microblaze/mm/consistent.c | 8 +-
>> 13 files changed, 1040 insertions(+), 197 deletions(-)
>> create mode 100644 arch/microblaze/include/asm/bitops.h
>> create mode 100644 arch/microblaze/include/asm/spinlock.h
>> create mode 100644 arch/microblaze/include/asm/spinlock_types.h
>
> I'm missing asm/barrier.h
This has been sent in previous patchset. Link was in this email.
>
> Also that PDF (thanks for that!), seems light on memory ordering
> details.
>
> Your comment:
>
> +/*
> + * clear_bit doesn't imply a memory barrier
> + */
>
> worries me, because that would imply your ll/sc does not impose order,
> but then you also don't have any explicit barriers in your locking
> primitives or atomics where required.
I think this is just comment which shouldn't be there.
clear_bit is calling clear_bits which is using exclusive load/store
instruction which should impose ordering.
>
> In the PDF I only find MBAR; is that what smp_mb() ends up being?
yes. All barriers should end up with mbar.
Stefan: Please correct me if I am wrong?
Thanks,
Michal
On 12. 02. 20 16:47, Peter Zijlstra wrote:
> On Wed, Feb 12, 2020 at 04:42:28PM +0100, Michal Simek wrote:
>> From: Stefan Asserhall <[email protected]>
>>
>> Using exclusive loads/stores to implement spinlocks which can be used on
>> SMP systems.
>>
>> Signed-off-by: Stefan Asserhall <[email protected]>
>> Signed-off-by: Michal Simek <[email protected]>
>> ---
>>
>> arch/microblaze/include/asm/spinlock.h | 240 +++++++++++++++++++
>> arch/microblaze/include/asm/spinlock_types.h | 25 ++
>> 2 files changed, 265 insertions(+)
>> create mode 100644 arch/microblaze/include/asm/spinlock.h
>> create mode 100644 arch/microblaze/include/asm/spinlock_types.h
>>
>> diff --git a/arch/microblaze/include/asm/spinlock.h b/arch/microblaze/include/asm/spinlock.h
>> new file mode 100644
>> index 000000000000..0199ea9f7f0f
>> --- /dev/null
>> +++ b/arch/microblaze/include/asm/spinlock.h
>> @@ -0,0 +1,240 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +/*
>> + * Copyright (C) 2013-2020 Xilinx, Inc.
>> + */
>> +
>> +#ifndef _ASM_MICROBLAZE_SPINLOCK_H
>> +#define _ASM_MICROBLAZE_SPINLOCK_H
>> +
>> +/*
>> + * Unlocked value: 0
>> + * Locked value: 1
>> + */
>> +#define arch_spin_is_locked(x) (READ_ONCE((x)->lock) != 0)
>> +
>> +static inline void arch_spin_lock(arch_spinlock_t *lock)
>> +{
>> + unsigned long tmp;
>> +
>> + __asm__ __volatile__ (
>> + /* load conditional address in %1 to %0 */
>> + "1: lwx %0, %1, r0;\n"
>> + /* not zero? try again */
>> + " bnei %0, 1b;\n"
>> + /* increment lock by 1 */
>> + " addi %0, r0, 1;\n"
>> + /* attempt store */
>> + " swx %0, %1, r0;\n"
>> + /* checking msr carry flag */
>> + " addic %0, r0, 0;\n"
>> + /* store failed (MSR[C] set)? try again */
>> + " bnei %0, 1b;\n"
>> + /* Outputs: temp variable for load result */
>> + : "=&r" (tmp)
>> + /* Inputs: lock address */
>> + : "r" (&lock->lock)
>> + : "cc", "memory"
>> + );
>> +}
>
> That's a test-and-set spinlock if I read it correctly. Why? that's the
> worst possible spinlock implementation possible.
This was written by Stefan and it is aligned with recommended
implementation. What other options do we have?
Thanks,
Michal
On Thu, Feb 13, 2020 at 08:51:38AM +0100, Michal Simek wrote:
> On 12. 02. 20 16:47, Peter Zijlstra wrote:
> >
> > That's a test-and-set spinlock if I read it correctly. Why? that's the
> > worst possible spinlock implementation possible.
>
> This was written by Stefan and it is aligned with recommended
> implementation. What other options do we have?
A ticket lock should be simple enough; ARM has one you can borrow from.
The problem with TaS spinlocks is that they're unfair and exhibit
horrible starvation issues.
On 12. 02. 20 16:55, Peter Zijlstra wrote:
> On Wed, Feb 12, 2020 at 04:42:29PM +0100, Michal Simek wrote:
>> From: Stefan Asserhall load and store <[email protected]>
>>
>> Implement SMP aware atomic operations.
>>
>> Signed-off-by: Stefan Asserhall <[email protected]>
>> Signed-off-by: Michal Simek <[email protected]>
>> ---
>>
>> arch/microblaze/include/asm/atomic.h | 265 +++++++++++++++++++++++++--
>> 1 file changed, 253 insertions(+), 12 deletions(-)
>>
>> diff --git a/arch/microblaze/include/asm/atomic.h b/arch/microblaze/include/asm/atomic.h
>> index 41e9aff23a62..522d704fad63 100644
>> --- a/arch/microblaze/include/asm/atomic.h
>> +++ b/arch/microblaze/include/asm/atomic.h
>> @@ -1,28 +1,269 @@
>> /* SPDX-License-Identifier: GPL-2.0 */
>> +/*
>> + * Copyright (C) 2013-2020 Xilinx, Inc.
>> + */
>> +
>> #ifndef _ASM_MICROBLAZE_ATOMIC_H
>> #define _ASM_MICROBLAZE_ATOMIC_H
>>
>> +#include <linux/types.h>
>> #include <asm/cmpxchg.h>
>> -#include <asm-generic/atomic.h>
>> -#include <asm-generic/atomic64.h>
>> +
>> +#define ATOMIC_INIT(i) { (i) }
>> +
>> +#define atomic_read(v) READ_ONCE((v)->counter)
>> +
>> +static inline void atomic_set(atomic_t *v, int i)
>> +{
>> + int result, tmp;
>> +
>> + __asm__ __volatile__ (
>> + /* load conditional address in %2 to %0 */
>> + "1: lwx %0, %2, r0;\n"
>> + /* attempt store */
>> + " swx %3, %2, r0;\n"
>> + /* checking msr carry flag */
>> + " addic %1, r0, 0;\n"
>> + /* store failed (MSR[C] set)? try again */
>> + " bnei %1, 1b;\n"
>> + /* Outputs: result value */
>> + : "=&r" (result), "=&r" (tmp)
>> + /* Inputs: counter address */
>> + : "r" (&v->counter), "r" (i)
>> + : "cc", "memory"
>> + );
>> +}
>> +#define atomic_set atomic_set
>
> Uuuuhh.. *what* ?!?
>
> Are you telling me your LL/SC implementation is so bugger that
> atomic_set() being a WRITE_ONCE() does not in fact work?
Just keep in your mind that this code was written long time ago and
there could be a lot of things/technique used at that time by IIRC
powerpc and I hope that review process will fix these things and I
really appreciation your comments.
Stefan is the right person to say if we really need to use exclusive
loads/stores instructions or use what I see in include/linux/compiler.h.
Please correct me if I am wrong.
WRITE_ONCE is __write_once_size which is normal write in C which I
expect will be converted in asm to non exclusive writes. And barrier is
called only for cases above 8bytes.
READ_ONCE is normal read follow by barrier all the time.
Also is there any testsuite I should run to verify all these atomics
operations? That would really help but I haven't seen any tool (but also
didn't try hard to find it out).
Thanks,
Michal
On Thu, Feb 13, 2020 at 08:49:40AM +0100, Michal Simek wrote:
> On 12. 02. 20 17:08, Peter Zijlstra wrote:
> > On Wed, Feb 12, 2020 at 04:42:22PM +0100, Michal Simek wrote:
> >
> >> Microblaze has 32bit exclusive load/store instructions which should be used
> >> instead of irq enable/disable. For more information take a look at
> >> https://www.xilinx.com/support/documentation/sw_manuals/xilinx2019_2/ug984-vivado-microblaze-ref.pdf
> >> starting from page 25.
> >
> >> arch/microblaze/include/asm/Kbuild | 1 -
> >> arch/microblaze/include/asm/atomic.h | 265 ++++++++++++++++++-
> >> arch/microblaze/include/asm/bitops.h | 189 +++++++++++++
> >> arch/microblaze/include/asm/cmpxchg.h | 87 ++++++
> >> arch/microblaze/include/asm/cpuinfo.h | 2 +-
> >> arch/microblaze/include/asm/pgtable.h | 19 +-
> >> arch/microblaze/include/asm/spinlock.h | 240 +++++++++++++++++
> >> arch/microblaze/include/asm/spinlock_types.h | 25 ++
> >> arch/microblaze/kernel/cpu/cache.c | 154 ++++++-----
> >> arch/microblaze/kernel/cpu/cpuinfo.c | 38 ++-
> >> arch/microblaze/kernel/cpu/mb.c | 207 ++++++++-------
> >> arch/microblaze/kernel/timer.c | 2 +-
> >> arch/microblaze/mm/consistent.c | 8 +-
> >> 13 files changed, 1040 insertions(+), 197 deletions(-)
> >> create mode 100644 arch/microblaze/include/asm/bitops.h
> >> create mode 100644 arch/microblaze/include/asm/spinlock.h
> >> create mode 100644 arch/microblaze/include/asm/spinlock_types.h
> >
> > I'm missing asm/barrier.h
>
> This has been sent in previous patchset. Link was in this email.
lkml.org link, please don't use them. The kernel.org links (example
below) have the Message-Id in them, which allows me to search for them
in my local storage without having to use a web browser.
You mean this one, right?
https://lkml.kernel.org/r/be707feb95d65b44435a0d9de946192f1fef30c6.1581497860.git.michal.simek@xilinx.com
So the PDF says this is a completion barrier, needed for DMA. This is a
very expensive option for smp_mb(), but if it is all you have and is
required, then yes, that should do.
On 13. 02. 20 9:11, Peter Zijlstra wrote:
> On Thu, Feb 13, 2020 at 08:49:40AM +0100, Michal Simek wrote:
>> On 12. 02. 20 17:08, Peter Zijlstra wrote:
>>> On Wed, Feb 12, 2020 at 04:42:22PM +0100, Michal Simek wrote:
>>>
>>>> Microblaze has 32bit exclusive load/store instructions which should be used
>>>> instead of irq enable/disable. For more information take a look at
>>>> https://www.xilinx.com/support/documentation/sw_manuals/xilinx2019_2/ug984-vivado-microblaze-ref.pdf
>>>> starting from page 25.
>>>
>>>> arch/microblaze/include/asm/Kbuild | 1 -
>>>> arch/microblaze/include/asm/atomic.h | 265 ++++++++++++++++++-
>>>> arch/microblaze/include/asm/bitops.h | 189 +++++++++++++
>>>> arch/microblaze/include/asm/cmpxchg.h | 87 ++++++
>>>> arch/microblaze/include/asm/cpuinfo.h | 2 +-
>>>> arch/microblaze/include/asm/pgtable.h | 19 +-
>>>> arch/microblaze/include/asm/spinlock.h | 240 +++++++++++++++++
>>>> arch/microblaze/include/asm/spinlock_types.h | 25 ++
>>>> arch/microblaze/kernel/cpu/cache.c | 154 ++++++-----
>>>> arch/microblaze/kernel/cpu/cpuinfo.c | 38 ++-
>>>> arch/microblaze/kernel/cpu/mb.c | 207 ++++++++-------
>>>> arch/microblaze/kernel/timer.c | 2 +-
>>>> arch/microblaze/mm/consistent.c | 8 +-
>>>> 13 files changed, 1040 insertions(+), 197 deletions(-)
>>>> create mode 100644 arch/microblaze/include/asm/bitops.h
>>>> create mode 100644 arch/microblaze/include/asm/spinlock.h
>>>> create mode 100644 arch/microblaze/include/asm/spinlock_types.h
>>>
>>> I'm missing asm/barrier.h
>>
>> This has been sent in previous patchset. Link was in this email.
>
> lkml.org link, please don't use them. The kernel.org links (example
> below) have the Message-Id in them, which allows me to search for them
> in my local storage without having to use a web browser.
>
> You mean this one, right?
>
> https://lkml.kernel.org/r/be707feb95d65b44435a0d9de946192f1fef30c6.1581497860.git.michal.simek@xilinx.com
>
> So the PDF says this is a completion barrier, needed for DMA. This is a
> very expensive option for smp_mb(), but if it is all you have and is
> required, then yes, that should do.
Yes that's the one.
Thanks,
Michal
On 12. 02. 20 16:53, Peter Zijlstra wrote:
> On Wed, Feb 12, 2020 at 04:42:25PM +0100, Michal Simek wrote:
>> From: Stefan Asserhall <[email protected]>
>>
>> For SMP based system there is a need to have proper bit operations.
>> Microblaze is using exclusive load and store instructions.
>>
>> Signed-off-by: Stefan Asserhall <[email protected]>
>> Signed-off-by: Michal Simek <[email protected]>
>
>> +/*
>> + * clear_bit doesn't imply a memory barrier
>> + */
>> +#define smp_mb__before_clear_bit() smp_mb()
>> +#define smp_mb__after_clear_bit() smp_mb()
>
> These macros no longer exist.
ok. Easy to remove.
>
> Also, might I draw your attention to:
>
> include/asm-generic/bitops/atomic.h
>
> This being a ll/sc arch, I'm thinking that if you do your atomic_t
> implementation right, the generic atomic bitop code should be near
> optimal.
>
Based on my look it looks like that I can replace implementations in
this file by sourcing which will be using atomic operations.
#include <asm-generic/bitops/atomic.h>
#include <asm-generic/bitops/lock.h>
Correct?
Would be good to run any testsuite to prove that all operations works as
expected. Is there any testsuite I can use to confirm it?
Thanks,
Michal
On Thu, Feb 13, 2020 at 09:06:24AM +0100, Michal Simek wrote:
> On 12. 02. 20 16:55, Peter Zijlstra wrote:
> > On Wed, Feb 12, 2020 at 04:42:29PM +0100, Michal Simek wrote:
> >> +static inline void atomic_set(atomic_t *v, int i)
> >> +{
> >> + int result, tmp;
> >> +
> >> + __asm__ __volatile__ (
> >> + /* load conditional address in %2 to %0 */
> >> + "1: lwx %0, %2, r0;\n"
> >> + /* attempt store */
> >> + " swx %3, %2, r0;\n"
> >> + /* checking msr carry flag */
> >> + " addic %1, r0, 0;\n"
> >> + /* store failed (MSR[C] set)? try again */
> >> + " bnei %1, 1b;\n"
> >> + /* Outputs: result value */
> >> + : "=&r" (result), "=&r" (tmp)
> >> + /* Inputs: counter address */
> >> + : "r" (&v->counter), "r" (i)
> >> + : "cc", "memory"
> >> + );
> >> +}
> >> +#define atomic_set atomic_set
> >
> > Uuuuhh.. *what* ?!?
> >
> > Are you telling me your LL/SC implementation is so bugger that
> > atomic_set() being a WRITE_ONCE() does not in fact work?
>
> Just keep in your mind that this code was written long time ago and
> there could be a lot of things/technique used at that time by IIRC
> powerpc and I hope that review process will fix these things and I
> really appreciation your comments.
I don't think I've ever seen Power do this, but I've not checked the git
history.
> Stefan is the right person to say if we really need to use exclusive
> loads/stores instructions or use what I see in include/linux/compiler.h.
>
> Please correct me if I am wrong.
> WRITE_ONCE is __write_once_size which is normal write in C which I
> expect will be converted in asm to non exclusive writes. And barrier is
> called only for cases above 8bytes.
>
> READ_ONCE is normal read follow by barrier all the time.
Right:
WRITE_ONCE() is something like:
*(volatile typeof(var)*)(&(var)) = val;
And should translate to just a regular store; the volatile just tells
the C compiler it should not do funny things with it.
READ_ONCE() is something like:
val = *(volatile typeof(var)*)(&(var));
And should translate to just a regular load; the volatile again tells
the compiler to not be funny about it.
No memory barriers what so ever, not even a compiler barrier as such.
The thing is, your bog standard LL/SC _SHOULD_ fail the SC if someone
else does a regular store to the same variable. See the example in
Documentation/atomic_t.txt.
That is, a competing SW/SWI should result in the interconnect responding
with something other than EXOKAY, the SWX should fail and MSR[C] <- 1.
> Also is there any testsuite I should run to verify all these atomics
> operations? That would really help but I haven't seen any tool (but also
> didn't try hard to find it out).
Will, Paul; can't this LKMM thing generate kernel modules to run? And do
we have a 'nice' collection of litmus tests that cover atomic_t ?
The one in atomic_t.txt should cover this one at least.
> On 12. 02. 20 16:53, Peter Zijlstra wrote:
> > On Wed, Feb 12, 2020 at 04:42:25PM +0100, Michal Simek wrote:
> >> From: Stefan Asserhall <[email protected]>
> >>
> >> For SMP based system there is a need to have proper bit operations.
> >> Microblaze is using exclusive load and store instructions.
> >>
> >> Signed-off-by: Stefan Asserhall <[email protected]>
> >> Signed-off-by: Michal Simek <[email protected]>
> >
> >> +/*
> >> + * clear_bit doesn't imply a memory barrier */
> >> +#define smp_mb__before_clear_bit() smp_mb()
> >> +#define smp_mb__after_clear_bit() smp_mb()
> >
> > These macros no longer exist.
>
> ok. Easy to remove.
>
> >
> > Also, might I draw your attention to:
> >
> > include/asm-generic/bitops/atomic.h
> >
> > This being a ll/sc arch, I'm thinking that if you do your atomic_t
> > implementation right, the generic atomic bitop code should be near
> > optimal.
> >
>
> Based on my look it looks like that I can replace implementations in this file by
> sourcing which will be using atomic operations.
>
> #include <asm-generic/bitops/atomic.h>
> #include <asm-generic/bitops/lock.h>
>
> Correct?
>
> Would be good to run any testsuite to prove that all operations works as
> expected. Is there any testsuite I can use to confirm it?
>
> Thanks,
> Michal
The comment in the generic bitops.h says "You should recode these in the
native assembly language, if at all possible". I don't think using the generic
implementation will be as efficient as the current arch specific one.
My recommendation is to stick with the arch specific implementation.
Thanks,
Stefan
On Thu, Feb 13, 2020 at 09:01:21AM +0000, Stefan Asserhall wrote:
> The comment in the generic bitops.h says "You should recode these in the
> native assembly language, if at all possible". I don't think using the generic
> implementation will be as efficient as the current arch specific one.
That is a very crusty old recommendation. Please look at the compiler
generated code.
We've extended the atomic_t operations the past few years and Will wrote
the generic atomic bitops for Arm64, we're looking to convert more LL/SC
archs to them.
There is currently one known issue with it, but Will has a patch-set
pending to solve that (IIRC that only matters if you have stack
protector on).
Also see this thread:
https://lkml.kernel.org/r/[email protected]
And these patches:
https://lkml.kernel.org/r/[email protected]
On Thu, Feb 13, 2020 at 09:58:49AM +0100, Peter Zijlstra wrote:
> The thing is, your bog standard LL/SC _SHOULD_ fail the SC if someone
> else does a regular store to the same variable. See the example in
> Documentation/atomic_t.txt.
>
> That is, a competing SW/SWI should result in the interconnect responding
> with something other than EXOKAY, the SWX should fail and MSR[C] <- 1.
The thing is; we have code that relies on this behaviour. There are a
few crusty SMP archs that sorta-kinda limp along (mostly by disabling
some of the code and praying the rest doesn't trigger too often), but we
really should not allow more broken SMP archs.
> On Thu, Feb 13, 2020 at 09:01:21AM +0000, Stefan Asserhall wrote:
> > The comment in the generic bitops.h says "You should recode these in
> > the native assembly language, if at all possible". I don't think using
> > the generic implementation will be as efficient as the current arch specific one.
>
> That is a very crusty old recommendation. Please look at the compiler generated
> code.
>
> We've extended the atomic_t operations the past few years and Will wrote the
> generic atomic bitops for Arm64, we're looking to convert more LL/SC archs to
> them.
>
> There is currently one known issue with it, but Will has a patch-set pending to
> solve that (IIRC that only matters if you have stack protector on).
>
> Also see this thread:
>
> https://lkml.kernel.org/r/[email protected]
>
> And these patches:
>
> https://lkml.kernel.org/r/[email protected]
Thanks for the links. Sure, I agree that it is better to use the generic
implementation if it is as efficient as the arch specific one, but I don't
think we should assume that it is.
Michal, would it be possible to replace the arch specific code and check
what we get?
Stefan
On Thu, Feb 13, 2020 at 10:16:51AM +0100, Peter Zijlstra wrote:
> On Thu, Feb 13, 2020 at 09:58:49AM +0100, Peter Zijlstra wrote:
>
> > The thing is, your bog standard LL/SC _SHOULD_ fail the SC if someone
> > else does a regular store to the same variable. See the example in
> > Documentation/atomic_t.txt.
> >
> > That is, a competing SW/SWI should result in the interconnect responding
> > with something other than EXOKAY, the SWX should fail and MSR[C] <- 1.
>
> The thing is; we have code that relies on this behaviour. There are a
> few crusty SMP archs that sorta-kinda limp along (mostly by disabling
> some of the code and praying the rest doesn't trigger too often), but we
> really should not allow more broken SMP archs.
I did find this in the linked pdf:
| If the store [swx] is successful, the sequence of instructions from
| the semaphore load to the semaphore store appear to be executed
| atomically - no other device modified the semaphore location between
| the read and the update.
which sounds like we're ok, although it could be better worded.
One part I haven't figured out is what happens if you take an interrupt
between the lwx and the swx and whether you can end up succeeding thanks
to somebody else's reservation. Also, the manual is silent about the
interaction with TLB invalidation and just refers to "address" when
talking about the reservation. What happens if a user thread triggers
CoW while another is in the middle of a lwx/swx?
Will
> On Thu, Feb 13, 2020 at 10:16:51AM +0100, Peter Zijlstra wrote:
> > On Thu, Feb 13, 2020 at 09:58:49AM +0100, Peter Zijlstra wrote:
> >
> > > The thing is, your bog standard LL/SC _SHOULD_ fail the SC if
> > > someone else does a regular store to the same variable. See the
> > > example in Documentation/atomic_t.txt.
> > >
> > > That is, a competing SW/SWI should result in the interconnect
> > > responding with something other than EXOKAY, the SWX should fail and
> MSR[C] <- 1.
> >
> > The thing is; we have code that relies on this behaviour. There are a
> > few crusty SMP archs that sorta-kinda limp along (mostly by disabling
> > some of the code and praying the rest doesn't trigger too often), but
> > we really should not allow more broken SMP archs.
>
> I did find this in the linked pdf:
>
> | If the store [swx] is successful, the sequence of instructions from
> | the semaphore load to the semaphore store appear to be executed
> | atomically - no other device modified the semaphore location between
> | the read and the update.
>
> which sounds like we're ok, although it could be better worded.
>
> One part I haven't figured out is what happens if you take an interrupt between
> the lwx and the swx and whether you can end up succeeding thanks to
> somebody else's reservation. Also, the manual is silent about the interaction
> with TLB invalidation and just refers to "address" when talking about the
> reservation. What happens if a user thread triggers CoW while another is in the
> middle of a lwx/swx?
>
> Will
The manual says "Reset, interrupts, exceptions, and breaks (including the BRK
and BRKI instructions) all clear the reservation." In case of a TLB invalidation
between lwx and swx, you will get a TLB miss exception when attempting the
swx, and the reservation will be cleared due to the exception.
Stefan
On Thu, Feb 13, 2020 at 10:04:03AM +0000, Will Deacon wrote:
> On Thu, Feb 13, 2020 at 10:16:51AM +0100, Peter Zijlstra wrote:
> > On Thu, Feb 13, 2020 at 09:58:49AM +0100, Peter Zijlstra wrote:
> >
> > > The thing is, your bog standard LL/SC _SHOULD_ fail the SC if someone
> > > else does a regular store to the same variable. See the example in
> > > Documentation/atomic_t.txt.
> > >
> > > That is, a competing SW/SWI should result in the interconnect responding
> > > with something other than EXOKAY, the SWX should fail and MSR[C] <- 1.
> >
> > The thing is; we have code that relies on this behaviour. There are a
> > few crusty SMP archs that sorta-kinda limp along (mostly by disabling
> > some of the code and praying the rest doesn't trigger too often), but we
> > really should not allow more broken SMP archs.
>
> I did find this in the linked pdf:
>
> | If the store [swx] is successful, the sequence of instructions from
> | the semaphore load to the semaphore store appear to be executed
> | atomically - no other device modified the semaphore location between
> | the read and the update.
>
> which sounds like we're ok, although it could be better worded.
>
> One part I haven't figured out is what happens if you take an interrupt
> between the lwx and the swx and whether you can end up succeeding thanks
> to somebody else's reservation. Also, the manual is silent about the
> interaction with TLB invalidation and just refers to "address" when
> talking about the reservation. What happens if a user thread triggers
> CoW while another is in the middle of a lwx/swx?
Page 79, Table 2-40 has the note:
"All of these events will clear the reservation bit, used together with
the LWX and SWX instructions to implement mutual exclusion,..."
On Thu, Feb 13, 2020 at 10:14:13AM +0000, Stefan Asserhall wrote:
> > On Thu, Feb 13, 2020 at 10:16:51AM +0100, Peter Zijlstra wrote:
> > > On Thu, Feb 13, 2020 at 09:58:49AM +0100, Peter Zijlstra wrote:
> > >
> > > > The thing is, your bog standard LL/SC _SHOULD_ fail the SC if
> > > > someone else does a regular store to the same variable. See the
> > > > example in Documentation/atomic_t.txt.
> > > >
> > > > That is, a competing SW/SWI should result in the interconnect
> > > > responding with something other than EXOKAY, the SWX should fail and
> > MSR[C] <- 1.
> > >
> > > The thing is; we have code that relies on this behaviour. There are a
> > > few crusty SMP archs that sorta-kinda limp along (mostly by disabling
> > > some of the code and praying the rest doesn't trigger too often), but
> > > we really should not allow more broken SMP archs.
> >
> > I did find this in the linked pdf:
> >
> > | If the store [swx] is successful, the sequence of instructions from
> > | the semaphore load to the semaphore store appear to be executed
> > | atomically - no other device modified the semaphore location between
> > | the read and the update.
> >
> > which sounds like we're ok, although it could be better worded.
> >
> > One part I haven't figured out is what happens if you take an interrupt between
> > the lwx and the swx and whether you can end up succeeding thanks to
> > somebody else's reservation. Also, the manual is silent about the interaction
> > with TLB invalidation and just refers to "address" when talking about the
> > reservation. What happens if a user thread triggers CoW while another is in the
> > middle of a lwx/swx?
> >
> > Will
>
> The manual says "Reset, interrupts, exceptions, and breaks (including the BRK
> and BRKI instructions) all clear the reservation." In case of a TLB invalidation
> between lwx and swx, you will get a TLB miss exception when attempting the
> swx, and the reservation will be cleared due to the exception.
Perfect, then I think we're good to go!
Will
On Thu, Feb 13, 2020 at 09:58:49AM +0100, Peter Zijlstra wrote:
[...]
>
> > Also is there any testsuite I should run to verify all these atomics
> > operations? That would really help but I haven't seen any tool (but also
> > didn't try hard to find it out).
>
> Will, Paul; can't this LKMM thing generate kernel modules to run? And do
The herd toolset does have something called klitmus:
"an experimental tool, similar to litmus7 that runs kernel memory model
tests as kernel modules."
I think Andrea knows more about how to use it.
> we have a 'nice' collection of litmus tests that cover atomic_t ?
>
There is a few in Paul's litmus repo:
https://github.com/paulmckrcu/litmus/tree/master/manual/atomic
Maybe a good start?
Regards,
Boqun
> The one in atomic_t.txt should cover this one at least.
(Forget to copy Andrea in the previous email)
Andrea, could you tell us more about how to use klitmus to generate test
modules from litmus test?
Regards,
Boqun
On Thu, Feb 13, 2020 at 07:34:32PM +0800, Boqun Feng wrote:
> On Thu, Feb 13, 2020 at 09:58:49AM +0100, Peter Zijlstra wrote:
> [...]
> >
> > > Also is there any testsuite I should run to verify all these atomics
> > > operations? That would really help but I haven't seen any tool (but also
> > > didn't try hard to find it out).
> >
> > Will, Paul; can't this LKMM thing generate kernel modules to run? And do
>
> The herd toolset does have something called klitmus:
>
> "an experimental tool, similar to litmus7 that runs kernel memory model
> tests as kernel modules."
>
> I think Andrea knows more about how to use it.
>
> > we have a 'nice' collection of litmus tests that cover atomic_t ?
> >
>
> There is a few in Paul's litmus repo:
>
> https://github.com/paulmckrcu/litmus/tree/master/manual/atomic
>
> Maybe a good start?
>
> Regards,
> Boqun
>
> > The one in atomic_t.txt should cover this one at least.
Hi,
On Thu, Feb 13, 2020 at 07:38:12PM +0800, Boqun Feng wrote:
> (Forget to copy Andrea in the previous email)
>
> Andrea, could you tell us more about how to use klitmus to generate test
> modules from litmus test?
The basic usage is described in "tools/memory-model/README", cf., in
particular, the section dedicated to klitmus7 and the "REQUIREMENTS"
section. For example, given the test,
andrea@andrea:~$ cat atomicity.litmus
C atomicity
{
atomic_t x = ATOMIC_INIT(0);
}
P0(atomic_t *x)
{
int r0;
r0 = atomic_fetch_inc_relaxed(x);
}
P1(atomic_t *x)
{
atomic_set(x, 2);
}
exists (0:r0=0 /\ x=1)
You should be able to do:
$ mkdir mymodules
$ klitmus7 -o mymodules atomicity.litmus
$ cd mymodules ; make
[...]
$ sudo sh run.sh
Thu 13 Feb 2020 02:21:52 PM CET
Compilation command: klitmus7 -o mymodules atomicity.litmus
OPT=
uname -r=5.3.0-29-generic
Test atomicity Allowed
Histogram (2 states)
1963399 :>0:r0=0; x=2;
2036601 :>0:r0=2; x=3;
No
Witnesses
Positive: 0, Negative: 4000000
Condition exists (0:r0=0 /\ x=1) is NOT validated
Hash=11bd2c90c4ca7a8acd9ca728a3d61d5f
Observation atomicity Never 0 4000000
Time atomicity 0.15
Thu 13 Feb 2020 02:21:52 PM CET
Where the "Positive: 0 Negative: 4000000" indicates that, during four
million trials, the state specified in the test's "exists" clause was
not reached/observed (as expected).
More information are available at:
http://diy.inria.fr/doc/litmus.html#klitmus
Thanks,
Andrea
On Thu, Feb 13, 2020 at 02:51:38PM +0100, Andrea Parri wrote:
> Hi,
>
> On Thu, Feb 13, 2020 at 07:38:12PM +0800, Boqun Feng wrote:
> > (Forget to copy Andrea in the previous email)
> >
> > Andrea, could you tell us more about how to use klitmus to generate test
> > modules from litmus test?
>
> The basic usage is described in "tools/memory-model/README", cf., in
> particular, the section dedicated to klitmus7 and the "REQUIREMENTS"
> section. For example, given the test,
>
> andrea@andrea:~$ cat atomicity.litmus
> C atomicity
>
> {
> atomic_t x = ATOMIC_INIT(0);
> }
>
> P0(atomic_t *x)
> {
> int r0;
>
> r0 = atomic_fetch_inc_relaxed(x);
> }
>
> P1(atomic_t *x)
> {
> atomic_set(x, 2);
> }
>
> exists (0:r0=0 /\ x=1)
>
> You should be able to do:
>
> $ mkdir mymodules
> $ klitmus7 -o mymodules atomicity.litmus
> $ cd mymodules ; make
> [...]
>
> $ sudo sh run.sh
> Thu 13 Feb 2020 02:21:52 PM CET
> Compilation command: klitmus7 -o mymodules atomicity.litmus
> OPT=
> uname -r=5.3.0-29-generic
>
> Test atomicity Allowed
> Histogram (2 states)
> 1963399 :>0:r0=0; x=2;
> 2036601 :>0:r0=2; x=3;
> No
>
> Witnesses
> Positive: 0, Negative: 4000000
> Condition exists (0:r0=0 /\ x=1) is NOT validated
> Hash=11bd2c90c4ca7a8acd9ca728a3d61d5f
> Observation atomicity Never 0 4000000
> Time atomicity 0.15
>
> Thu 13 Feb 2020 02:21:52 PM CET
>
> Where the "Positive: 0 Negative: 4000000" indicates that, during four
> million trials, the state specified in the test's "exists" clause was
> not reached/observed (as expected).
>
> More information are available at:
>
> http://diy.inria.fr/doc/litmus.html#klitmus
And I forgot to Cc: Luc and Jade, the main developers (and current
maintainers) of the tool suite in question.
Thanks,
Andrea