2023-12-08 05:55:41

by Samuel Holland

[permalink] [raw]
Subject: [RFC PATCH 00/12] Unified cross-architecture kernel-mode FPU API

This series supersedes my earier RISC-V specific series[1].

This series unifies the kernel-mode FPU API across several architectures
by wrapping the existing functions (where needed) in consistently-named
functions placed in a consistent header location, with mostly the same
semantics: they can be called from preemptible or non-preemptible task
context, and are not assumed to be reentrant. Architectures are also
expected to provide CFLAGS adjustments for compiling FPU-dependent code.
For the moment, SIMD/vector units are out of scope for this common API.

This allows us to remove the ifdeffery and duplicated Makefile logic at
each FPU user. It then implements the common API on RISC-V, and converts
a couple of users to the new API: the AMDGPU DRM driver, and the FPU
self test.

The underlying goal of this series is to allow using newer AMD GPUs
(e.g. Navi) on RISC-V boards such as SiFive's HiFive Unmatched. Those
GPUs need CONFIG_DRM_AMD_DC_FP to initialize, which requires kernel-mode
FPU support.

[1]: https://lore.kernel.org/linux-riscv/[email protected]/


Samuel Holland (12):
arch: Add ARCH_HAS_KERNEL_FPU_SUPPORT
ARM: Implement ARCH_HAS_KERNEL_FPU_SUPPORT
ARM: crypto: Use CC_FLAGS_FPU for NEON CFLAGS
arm64: Implement ARCH_HAS_KERNEL_FPU_SUPPORT
lib/raid6: Use CC_FLAGS_FPU for NEON CFLAGS
LoongArch: Implement ARCH_HAS_KERNEL_FPU_SUPPORT
powerpc: Implement ARCH_HAS_KERNEL_FPU_SUPPORT
x86: Implement ARCH_HAS_KERNEL_FPU_SUPPORT
riscv: Add support for kernel-mode FPU
drm/amd/display: Use ARCH_HAS_KERNEL_FPU_SUPPORT
selftests/fpu: Move FP code to a separate translation unit
selftests/fpu: Allow building on other architectures

Makefile | 4 ++
arch/Kconfig | 9 +++++
arch/arm/Kconfig | 1 +
arch/arm/Makefile | 7 ++++
arch/arm/include/asm/fpu.h | 17 +++++++++
arch/arm/lib/Makefile | 3 +-
arch/arm64/Kconfig | 1 +
arch/arm64/Makefile | 9 ++++-
arch/arm64/include/asm/fpu.h | 17 +++++++++
arch/loongarch/Kconfig | 1 +
arch/loongarch/Makefile | 5 ++-
arch/loongarch/include/asm/fpu.h | 1 +
arch/powerpc/Kconfig | 1 +
arch/powerpc/Makefile | 5 ++-
arch/powerpc/include/asm/fpu.h | 28 ++++++++++++++
arch/riscv/Kconfig | 1 +
arch/riscv/Makefile | 3 ++
arch/riscv/include/asm/fpu.h | 26 +++++++++++++
arch/riscv/kernel/Makefile | 1 +
arch/riscv/kernel/kernel_mode_fpu.c | 28 ++++++++++++++
arch/x86/Kconfig | 1 +
arch/x86/Makefile | 20 ++++++++++
arch/x86/include/asm/fpu.h | 13 +++++++
drivers/gpu/drm/amd/display/Kconfig | 2 +-
.../gpu/drm/amd/display/amdgpu_dm/dc_fpu.c | 33 +----------------
drivers/gpu/drm/amd/display/dc/dml/Makefile | 36 +-----------------
drivers/gpu/drm/amd/display/dc/dml2/Makefile | 36 +-----------------
lib/Kconfig.debug | 2 +-
lib/Makefile | 26 ++-----------
lib/raid6/Makefile | 31 ++++------------
lib/{test_fpu.c => test_fpu_glue.c} | 37 +++----------------
lib/test_fpu_impl.c | 35 ++++++++++++++++++
32 files changed, 255 insertions(+), 185 deletions(-)
create mode 100644 arch/arm/include/asm/fpu.h
create mode 100644 arch/arm64/include/asm/fpu.h
create mode 100644 arch/powerpc/include/asm/fpu.h
create mode 100644 arch/riscv/include/asm/fpu.h
create mode 100644 arch/riscv/kernel/kernel_mode_fpu.c
create mode 100644 arch/x86/include/asm/fpu.h
rename lib/{test_fpu.c => test_fpu_glue.c} (71%)
create mode 100644 lib/test_fpu_impl.c

--
2.42.0


2023-12-08 05:55:43

by Samuel Holland

[permalink] [raw]
Subject: [RFC PATCH 06/12] LoongArch: Implement ARCH_HAS_KERNEL_FPU_SUPPORT

LoongArch already provides kernel_fpu_begin() and kernel_fpu_end() in
asm/fpu.h, so it only needs to add kernel_fpu_available() and export
the CFLAGS adjustments.

Signed-off-by: Samuel Holland <[email protected]>
---

arch/loongarch/Kconfig | 1 +
arch/loongarch/Makefile | 5 ++++-
arch/loongarch/include/asm/fpu.h | 1 +
3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index ee123820a476..65d4475565b8 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -15,6 +15,7 @@ config LOONGARCH
select ARCH_HAS_CPU_FINALIZE_INIT
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_KCOV
+ select ARCH_HAS_KERNEL_FPU_SUPPORT if CPU_HAS_FPU
select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
select ARCH_HAS_PTE_SPECIAL
diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
index 204b94b2e6aa..f5c4f7e921db 100644
--- a/arch/loongarch/Makefile
+++ b/arch/loongarch/Makefile
@@ -25,6 +25,9 @@ endif
32bit-emul = elf32loongarch
64bit-emul = elf64loongarch

+CC_FLAGS_FPU := -mfpu=64
+CC_FLAGS_NO_FPU := -msoft-float
+
ifdef CONFIG_DYNAMIC_FTRACE
KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
CC_FLAGS_FTRACE := -fpatchable-function-entry=2
@@ -46,7 +49,7 @@ ld-emul = $(64bit-emul)
cflags-y += -mabi=lp64s
endif

-cflags-y += -pipe -msoft-float
+cflags-y += -pipe $(CC_FLAGS_NO_FPU)
LDFLAGS_vmlinux += -static -n -nostdlib

# When the assembler supports explicit relocation hint, we must use it.
diff --git a/arch/loongarch/include/asm/fpu.h b/arch/loongarch/include/asm/fpu.h
index c2d8962fda00..3177674228f8 100644
--- a/arch/loongarch/include/asm/fpu.h
+++ b/arch/loongarch/include/asm/fpu.h
@@ -21,6 +21,7 @@

struct sigcontext;

+#define kernel_fpu_available() cpu_has_fpu
extern void kernel_fpu_begin(void);
extern void kernel_fpu_end(void);

--
2.42.0

2023-12-08 05:56:17

by Samuel Holland

[permalink] [raw]
Subject: [RFC PATCH 10/12] drm/amd/display: Use ARCH_HAS_KERNEL_FPU_SUPPORT

Now that all previously-supported architectures select
ARCH_HAS_KERNEL_FPU_SUPPORT, this code can depend on that symbol instead
of the existing list of architectures. It can also take advantage of the
common kernel-mode FPU API and method of adjusting CFLAGS.

Signed-off-by: Samuel Holland <[email protected]>
---

drivers/gpu/drm/amd/display/Kconfig | 2 +-
.../gpu/drm/amd/display/amdgpu_dm/dc_fpu.c | 33 +----------------
drivers/gpu/drm/amd/display/dc/dml/Makefile | 36 ++-----------------
drivers/gpu/drm/amd/display/dc/dml2/Makefile | 36 ++-----------------
4 files changed, 6 insertions(+), 101 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig
index 901d1961b739..5fcd4f778dc3 100644
--- a/drivers/gpu/drm/amd/display/Kconfig
+++ b/drivers/gpu/drm/amd/display/Kconfig
@@ -8,7 +8,7 @@ config DRM_AMD_DC
depends on BROKEN || !CC_IS_CLANG || ARM64 || RISCV || SPARC64 || X86_64
select SND_HDA_COMPONENT if SND_HDA_CORE
# !CC_IS_CLANG: https://github.com/ClangBuiltLinux/linux/issues/1752
- select DRM_AMD_DC_FP if (X86 || LOONGARCH || (PPC64 && ALTIVEC) || (ARM64 && KERNEL_MODE_NEON && !CC_IS_CLANG))
+ select DRM_AMD_DC_FP if ARCH_HAS_KERNEL_FPU_SUPPORT && (!ARM64 || !CC_IS_CLANG)
help
Choose this option if you want to use the new display engine
support for AMDGPU. This adds required support for Vega and
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
index 4ae4720535a5..b64f917174ca 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
@@ -26,16 +26,7 @@

#include "dc_trace.h"

-#if defined(CONFIG_X86)
-#include <asm/fpu/api.h>
-#elif defined(CONFIG_PPC64)
-#include <asm/switch_to.h>
-#include <asm/cputable.h>
-#elif defined(CONFIG_ARM64)
-#include <asm/neon.h>
-#elif defined(CONFIG_LOONGARCH)
#include <asm/fpu.h>
-#endif

/**
* DOC: DC FPU manipulation overview
@@ -87,20 +78,9 @@ void dc_fpu_begin(const char *function_name, const int line)
WARN_ON_ONCE(!in_task());
preempt_disable();
depth = __this_cpu_inc_return(fpu_recursion_depth);
-
if (depth == 1) {
-#if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH)
+ BUG_ON(!kernel_fpu_available());
kernel_fpu_begin();
-#elif defined(CONFIG_PPC64)
- if (cpu_has_feature(CPU_FTR_VSX_COMP))
- enable_kernel_vsx();
- else if (cpu_has_feature(CPU_FTR_ALTIVEC_COMP))
- enable_kernel_altivec();
- else if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE))
- enable_kernel_fp();
-#elif defined(CONFIG_ARM64)
- kernel_neon_begin();
-#endif
}

TRACE_DCN_FPU(true, function_name, line, depth);
@@ -122,18 +102,7 @@ void dc_fpu_end(const char *function_name, const int line)

depth = __this_cpu_dec_return(fpu_recursion_depth);
if (depth == 0) {
-#if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH)
kernel_fpu_end();
-#elif defined(CONFIG_PPC64)
- if (cpu_has_feature(CPU_FTR_VSX_COMP))
- disable_kernel_vsx();
- else if (cpu_has_feature(CPU_FTR_ALTIVEC_COMP))
- disable_kernel_altivec();
- else if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE))
- disable_kernel_fp();
-#elif defined(CONFIG_ARM64)
- kernel_neon_end();
-#endif
} else {
WARN_ON_ONCE(depth < 0);
}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
index ea7d60f9a9b4..5aad0f572ba3 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
@@ -25,40 +25,8 @@
# It provides the general basic services required by other DAL
# subcomponents.

-ifdef CONFIG_X86
-dml_ccflags-$(CONFIG_CC_IS_GCC) := -mhard-float
-dml_ccflags := $(dml_ccflags-y) -msse
-endif
-
-ifdef CONFIG_PPC64
-dml_ccflags := -mhard-float -maltivec
-endif
-
-ifdef CONFIG_ARM64
-dml_rcflags := -mgeneral-regs-only
-endif
-
-ifdef CONFIG_LOONGARCH
-dml_ccflags := -mfpu=64
-dml_rcflags := -msoft-float
-endif
-
-ifdef CONFIG_CC_IS_GCC
-ifneq ($(call gcc-min-version, 70100),y)
-IS_OLD_GCC = 1
-endif
-endif
-
-ifdef CONFIG_X86
-ifdef IS_OLD_GCC
-# Stack alignment mismatch, proceed with caution.
-# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
-# (8B stack alignment).
-dml_ccflags += -mpreferred-stack-boundary=4
-else
-dml_ccflags += -msse2
-endif
-endif
+dml_ccflags := $(CC_FLAGS_FPU)
+dml_rcflags := $(CC_FLAGS_NO_FPU)

ifneq ($(CONFIG_FRAME_WARN),0)
frame_warn_flag := -Wframe-larger-than=2048
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
index acff3449b8d7..4f6c804a26ad 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
@@ -24,40 +24,8 @@
#
# Makefile for dml2.

-ifdef CONFIG_X86
-dml2_ccflags-$(CONFIG_CC_IS_GCC) := -mhard-float
-dml2_ccflags := $(dml2_ccflags-y) -msse
-endif
-
-ifdef CONFIG_PPC64
-dml2_ccflags := -mhard-float -maltivec
-endif
-
-ifdef CONFIG_ARM64
-dml2_rcflags := -mgeneral-regs-only
-endif
-
-ifdef CONFIG_LOONGARCH
-dml2_ccflags := -mfpu=64
-dml2_rcflags := -msoft-float
-endif
-
-ifdef CONFIG_CC_IS_GCC
-ifeq ($(call cc-ifversion, -lt, 0701, y), y)
-IS_OLD_GCC = 1
-endif
-endif
-
-ifdef CONFIG_X86
-ifdef IS_OLD_GCC
-# Stack alignment mismatch, proceed with caution.
-# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
-# (8B stack alignment).
-dml2_ccflags += -mpreferred-stack-boundary=4
-else
-dml2_ccflags += -msse2
-endif
-endif
+dml2_ccflags := $(CC_FLAGS_FPU)
+dml2_rcflags := $(CC_FLAGS_NO_FPU)

ifneq ($(CONFIG_FRAME_WARN),0)
ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y)
--
2.42.0

2023-12-08 05:56:19

by Samuel Holland

[permalink] [raw]
Subject: [RFC PATCH 11/12] selftests/fpu: Move FP code to a separate translation unit

This ensures no compiler-generated floating-point code can appear
outside kernel_fpu_{begin,end}() sections, and some architectures
enforce this separation.

Signed-off-by: Samuel Holland <[email protected]>
---

lib/Makefile | 3 ++-
lib/{test_fpu.c => test_fpu_glue.c} | 32 +-------------------------
lib/test_fpu_impl.c | 35 +++++++++++++++++++++++++++++
3 files changed, 38 insertions(+), 32 deletions(-)
rename lib/{test_fpu.c => test_fpu_glue.c} (71%)
create mode 100644 lib/test_fpu_impl.c

diff --git a/lib/Makefile b/lib/Makefile
index 6b09731d8e61..e7cbd54944a2 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -132,7 +132,8 @@ FPU_CFLAGS += $(call cc-option,-msse -mpreferred-stack-boundary=3,-mpreferred-st
endif

obj-$(CONFIG_TEST_FPU) += test_fpu.o
-CFLAGS_test_fpu.o += $(FPU_CFLAGS)
+test_fpu-y := test_fpu_glue.o test_fpu_impl.o
+CFLAGS_test_fpu_impl.o += $(FPU_CFLAGS)

obj-$(CONFIG_TEST_LIVEPATCH) += livepatch/

diff --git a/lib/test_fpu.c b/lib/test_fpu_glue.c
similarity index 71%
rename from lib/test_fpu.c
rename to lib/test_fpu_glue.c
index e82db19fed84..2761b51117b0 100644
--- a/lib/test_fpu.c
+++ b/lib/test_fpu_glue.c
@@ -19,37 +19,7 @@
#include <linux/debugfs.h>
#include <asm/fpu/api.h>

-static int test_fpu(void)
-{
- /*
- * This sequence of operations tests that rounding mode is
- * to nearest and that denormal numbers are supported.
- * Volatile variables are used to avoid compiler optimizing
- * the calculations away.
- */
- volatile double a, b, c, d, e, f, g;
-
- a = 4.0;
- b = 1e-15;
- c = 1e-310;
-
- /* Sets precision flag */
- d = a + b;
-
- /* Result depends on rounding mode */
- e = a + b / 2;
-
- /* Denormal and very large values */
- f = b / c;
-
- /* Depends on denormal support */
- g = a + c * f;
-
- if (d > a && e > a && g > a)
- return 0;
- else
- return -EINVAL;
-}
+int test_fpu(void);

static int test_fpu_get(void *data, u64 *val)
{
diff --git a/lib/test_fpu_impl.c b/lib/test_fpu_impl.c
new file mode 100644
index 000000000000..2ff01980bc22
--- /dev/null
+++ b/lib/test_fpu_impl.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <linux/errno.h>
+
+int test_fpu(void)
+{
+ /*
+ * This sequence of operations tests that rounding mode is
+ * to nearest and that denormal numbers are supported.
+ * Volatile variables are used to avoid compiler optimizing
+ * the calculations away.
+ */
+ volatile double a, b, c, d, e, f, g;
+
+ a = 4.0;
+ b = 1e-15;
+ c = 1e-310;
+
+ /* Sets precision flag */
+ d = a + b;
+
+ /* Result depends on rounding mode */
+ e = a + b / 2;
+
+ /* Denormal and very large values */
+ f = b / c;
+
+ /* Depends on denormal support */
+ g = a + c * f;
+
+ if (d > a && e > a && g > a)
+ return 0;
+ else
+ return -EINVAL;
+}
--
2.42.0

2023-12-08 05:56:45

by Samuel Holland

[permalink] [raw]
Subject: [RFC PATCH 09/12] riscv: Add support for kernel-mode FPU

This is motivated by the amdgpu DRM driver, which needs floating-point
code to support recent hardware. That code is not performance-critical,
so only provide a minimal non-preemptible implementation for now.

Use a similar trick as ARM to force placing floating-point code in a
separate translation unit, so it is not possible for compiler-generated
floating-point code to appear outside kernel_fpu_{begin,end}().

Signed-off-by: Samuel Holland <[email protected]>
---

arch/riscv/Kconfig | 1 +
arch/riscv/Makefile | 3 +++
arch/riscv/include/asm/fpu.h | 26 ++++++++++++++++++++++++++
arch/riscv/kernel/Makefile | 1 +
arch/riscv/kernel/kernel_mode_fpu.c | 28 ++++++++++++++++++++++++++++
5 files changed, 59 insertions(+)
create mode 100644 arch/riscv/include/asm/fpu.h
create mode 100644 arch/riscv/kernel/kernel_mode_fpu.c

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 95a2a06acc6a..cf0967928e6d 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -27,6 +27,7 @@ config RISCV
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_GIGANTIC_PAGE
select ARCH_HAS_KCOV
+ select ARCH_HAS_KERNEL_FPU_SUPPORT if FPU
select ARCH_HAS_MMIOWB
select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
select ARCH_HAS_PMEM_API
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index a74be78678eb..2e719c369210 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -81,6 +81,9 @@ KBUILD_CFLAGS += -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64i

KBUILD_AFLAGS += -march=$(riscv-march-y)

+# For C code built with floating-point support, exclude V but keep F and D.
+CC_FLAGS_FPU := -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)([^v_]*)v?/\1\2/')
+
KBUILD_CFLAGS += -mno-save-restore
KBUILD_CFLAGS += -DCONFIG_PAGE_OFFSET=$(CONFIG_PAGE_OFFSET)

diff --git a/arch/riscv/include/asm/fpu.h b/arch/riscv/include/asm/fpu.h
new file mode 100644
index 000000000000..8cd027acc015
--- /dev/null
+++ b/arch/riscv/include/asm/fpu.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2023 SiFive
+ */
+
+#ifndef _ASM_RISCV_FPU_H
+#define _ASM_RISCV_FPU_H
+
+#include <asm/switch_to.h>
+
+#define kernel_fpu_available() has_fpu()
+
+#ifdef __riscv_f
+
+#define kernel_fpu_begin() \
+ static_assert(false, "floating-point code must use a separate translation unit")
+#define kernel_fpu_end() kernel_fpu_begin()
+
+#else
+
+void kernel_fpu_begin(void);
+void kernel_fpu_end(void);
+
+#endif
+
+#endif /* ! _ASM_RISCV_FPU_H */
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index fee22a3d1b53..662c483e338d 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -62,6 +62,7 @@ obj-$(CONFIG_MMU) += vdso.o vdso/

obj-$(CONFIG_RISCV_MISALIGNED) += traps_misaligned.o
obj-$(CONFIG_FPU) += fpu.o
+obj-$(CONFIG_FPU) += kernel_mode_fpu.o
obj-$(CONFIG_RISCV_ISA_V) += vector.o
obj-$(CONFIG_SMP) += smpboot.o
obj-$(CONFIG_SMP) += smp.o
diff --git a/arch/riscv/kernel/kernel_mode_fpu.c b/arch/riscv/kernel/kernel_mode_fpu.c
new file mode 100644
index 000000000000..9b2024cc056b
--- /dev/null
+++ b/arch/riscv/kernel/kernel_mode_fpu.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2023 SiFive
+ */
+
+#include <linux/export.h>
+#include <linux/preempt.h>
+
+#include <asm/csr.h>
+#include <asm/fpu.h>
+#include <asm/processor.h>
+#include <asm/switch_to.h>
+
+void kernel_fpu_begin(void)
+{
+ preempt_disable();
+ fstate_save(current, task_pt_regs(current));
+ csr_set(CSR_SSTATUS, SR_FS);
+}
+EXPORT_SYMBOL_GPL(kernel_fpu_begin);
+
+void kernel_fpu_end(void)
+{
+ csr_clear(CSR_SSTATUS, SR_FS);
+ fstate_restore(current, task_pt_regs(current));
+ preempt_enable();
+}
+EXPORT_SYMBOL_GPL(kernel_fpu_end);
--
2.42.0

2023-12-08 05:56:48

by Samuel Holland

[permalink] [raw]
Subject: [RFC PATCH 07/12] powerpc: Implement ARCH_HAS_KERNEL_FPU_SUPPORT

PowerPC provides an equivalent to the common kernel-mode FPU API, but in
a different header and using different function names. The PowerPC API
also requires a non-preemptible context. Add a wrapper header, and
export the CFLAGS adjustments.

Signed-off-by: Samuel Holland <[email protected]>
---

arch/powerpc/Kconfig | 1 +
arch/powerpc/Makefile | 5 ++++-
arch/powerpc/include/asm/fpu.h | 28 ++++++++++++++++++++++++++++
3 files changed, 33 insertions(+), 1 deletion(-)
create mode 100644 arch/powerpc/include/asm/fpu.h

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 6f105ee4f3cf..e96cb5b7c571 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -137,6 +137,7 @@ config PPC
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_HUGEPD if HUGETLB_PAGE
select ARCH_HAS_KCOV
+ select ARCH_HAS_KERNEL_FPU_SUPPORT if PPC_FPU
select ARCH_HAS_MEMBARRIER_CALLBACKS
select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_MEMREMAP_COMPAT_ALIGN if PPC_64S_HASH_MMU
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index f19dbaa1d541..2d5f21baf6ff 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -142,6 +142,9 @@ CFLAGS-$(CONFIG_PPC32) += $(call cc-option, $(MULTIPLEWORD))

CFLAGS-$(CONFIG_PPC32) += $(call cc-option,-mno-readonly-in-sdata)

+CC_FLAGS_FPU := $(call cc-option,-mhard-float)
+CC_FLAGS_NO_FPU += $(call cc-option,-msoft-float)
+
ifdef CONFIG_FUNCTION_TRACER
ifdef CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY
KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
@@ -163,7 +166,7 @@ asinstr := $(call as-instr,lis 9$(comma)foo@high,-DHAVE_AS_ATHIGH=1)

KBUILD_CPPFLAGS += -I $(srctree)/arch/$(ARCH) $(asinstr)
KBUILD_AFLAGS += $(AFLAGS-y)
-KBUILD_CFLAGS += $(call cc-option,-msoft-float)
+KBUILD_CFLAGS += $(CC_FLAGS_NO_FPU)
KBUILD_CFLAGS += $(CFLAGS-y)
CPP = $(CC) -E $(KBUILD_CFLAGS)

diff --git a/arch/powerpc/include/asm/fpu.h b/arch/powerpc/include/asm/fpu.h
new file mode 100644
index 000000000000..ca584e4bc40f
--- /dev/null
+++ b/arch/powerpc/include/asm/fpu.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2023 SiFive
+ */
+
+#ifndef _ASM_POWERPC_FPU_H
+#define _ASM_POWERPC_FPU_H
+
+#include <linux/preempt.h>
+
+#include <asm/cpu_has_feature.h>
+#include <asm/switch_to.h>
+
+#define kernel_fpu_available() (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE))
+
+static inline void kernel_fpu_begin(void)
+{
+ preempt_disable();
+ enable_kernel_fp();
+}
+
+static inline void kernel_fpu_end(void)
+{
+ disable_kernel_fp();
+ preempt_enable();
+}
+
+#endif /* ! _ASM_POWERPC_FPU_H */
--
2.42.0

2023-12-08 05:56:53

by Samuel Holland

[permalink] [raw]
Subject: [RFC PATCH 12/12] selftests/fpu: Allow building on other architectures

Now that ARCH_HAS_KERNEL_FPU_SUPPORT provides a common way to compile
and run floating-point code, this test is no longer x86-specific.

Signed-off-by: Samuel Holland <[email protected]>
---

lib/Kconfig.debug | 2 +-
lib/Makefile | 25 ++-----------------------
lib/test_fpu_glue.c | 5 ++++-
3 files changed, 7 insertions(+), 25 deletions(-)

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index cc7d53d9dc01..bbab0b054e09 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2933,7 +2933,7 @@ config TEST_FREE_PAGES

config TEST_FPU
tristate "Test floating point operations in kernel space"
- depends on X86 && !KCOV_INSTRUMENT_ALL
+ depends on ARCH_HAS_KERNEL_FPU_SUPPORT && !KCOV_INSTRUMENT_ALL
help
Enable this option to add /sys/kernel/debug/selftest_helpers/test_fpu
which will trigger a sequence of floating point operations. This is used
diff --git a/lib/Makefile b/lib/Makefile
index e7cbd54944a2..b9f28558c9bd 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -109,31 +109,10 @@ CFLAGS_test_fprobe.o += $(CC_FLAGS_FTRACE)
obj-$(CONFIG_FPROBE_SANITY_TEST) += test_fprobe.o
obj-$(CONFIG_TEST_OBJPOOL) += test_objpool.o

-#
-# CFLAGS for compiling floating point code inside the kernel. x86/Makefile turns
-# off the generation of FPU/SSE* instructions for kernel proper but FPU_FLAGS
-# get appended last to CFLAGS and thus override those previous compiler options.
-#
-FPU_CFLAGS := -msse -msse2
-ifdef CONFIG_CC_IS_GCC
-# Stack alignment mismatch, proceed with caution.
-# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
-# (8B stack alignment).
-# See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383
-#
-# The "-msse" in the first argument is there so that the
-# -mpreferred-stack-boundary=3 build error:
-#
-# -mpreferred-stack-boundary=3 is not between 4 and 12
-#
-# can be triggered. Otherwise gcc doesn't complain.
-FPU_CFLAGS += -mhard-float
-FPU_CFLAGS += $(call cc-option,-msse -mpreferred-stack-boundary=3,-mpreferred-stack-boundary=4)
-endif
-
obj-$(CONFIG_TEST_FPU) += test_fpu.o
test_fpu-y := test_fpu_glue.o test_fpu_impl.o
-CFLAGS_test_fpu_impl.o += $(FPU_CFLAGS)
+CFLAGS_test_fpu_impl.o += $(CC_FLAGS_FPU)
+CFLAGS_REMOVE_test_fpu_impl.o += $(CC_FLAGS_NO_FPU)

obj-$(CONFIG_TEST_LIVEPATCH) += livepatch/

diff --git a/lib/test_fpu_glue.c b/lib/test_fpu_glue.c
index 2761b51117b0..2e0b4027a5e3 100644
--- a/lib/test_fpu_glue.c
+++ b/lib/test_fpu_glue.c
@@ -17,7 +17,7 @@
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/debugfs.h>
-#include <asm/fpu/api.h>
+#include <asm/fpu.h>

int test_fpu(void);

@@ -38,6 +38,9 @@ static struct dentry *selftest_dir;

static int __init test_fpu_init(void)
{
+ if (!kernel_fpu_available())
+ return -EINVAL;
+
selftest_dir = debugfs_create_dir("selftest_helpers", NULL);
if (!selftest_dir)
return -ENOMEM;
--
2.42.0

2023-12-08 05:57:01

by Samuel Holland

[permalink] [raw]
Subject: [RFC PATCH 08/12] x86: Implement ARCH_HAS_KERNEL_FPU_SUPPORT

x86 already provides kernel_fpu_begin() and kernel_fpu_end(), but in a
different header. Add a wrapper header, and export the CFLAGS
adjustments as found in lib/Makefile.

Signed-off-by: Samuel Holland <[email protected]>
---

arch/x86/Kconfig | 1 +
arch/x86/Makefile | 20 ++++++++++++++++++++
arch/x86/include/asm/fpu.h | 13 +++++++++++++
3 files changed, 34 insertions(+)
create mode 100644 arch/x86/include/asm/fpu.h

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 3762f41bb092..1fe7f2d8d017 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -81,6 +81,7 @@ config X86
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_KCOV if X86_64
+ select ARCH_HAS_KERNEL_FPU_SUPPORT
select ARCH_HAS_MEM_ENCRYPT
select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 1a068de12a56..71576c8dbe79 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -70,6 +70,26 @@ export BITS
KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx
KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2

+#
+# CFLAGS for compiling floating point code inside the kernel.
+#
+CC_FLAGS_FPU := -msse -msse2
+ifdef CONFIG_CC_IS_GCC
+# Stack alignment mismatch, proceed with caution.
+# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
+# (8B stack alignment).
+# See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383
+#
+# The "-msse" in the first argument is there so that the
+# -mpreferred-stack-boundary=3 build error:
+#
+# -mpreferred-stack-boundary=3 is not between 4 and 12
+#
+# can be triggered. Otherwise gcc doesn't complain.
+CC_FLAGS_FPU += -mhard-float
+CC_FLAGS_FPU += $(call cc-option,-msse -mpreferred-stack-boundary=3,-mpreferred-stack-boundary=4)
+endif
+
ifeq ($(CONFIG_X86_KERNEL_IBT),y)
#
# Kernel IBT has S_CET.NOTRACK_EN=0, as such the compilers must not generate
diff --git a/arch/x86/include/asm/fpu.h b/arch/x86/include/asm/fpu.h
new file mode 100644
index 000000000000..b2743fe19339
--- /dev/null
+++ b/arch/x86/include/asm/fpu.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2023 SiFive
+ */
+
+#ifndef _ASM_X86_FPU_H
+#define _ASM_X86_FPU_H
+
+#include <asm/fpu/api.h>
+
+#define kernel_fpu_available() true
+
+#endif /* ! _ASM_X86_FPU_H */
--
2.42.0

2023-12-11 12:29:58

by Michael Ellerman

[permalink] [raw]
Subject: Re: [RFC PATCH 10/12] drm/amd/display: Use ARCH_HAS_KERNEL_FPU_SUPPORT

Hi Samuel,

Thanks for trying to clean all this up.

One problem below.

Samuel Holland <[email protected]> writes:
> Now that all previously-supported architectures select
> ARCH_HAS_KERNEL_FPU_SUPPORT, this code can depend on that symbol instead
> of the existing list of architectures. It can also take advantage of the
> common kernel-mode FPU API and method of adjusting CFLAGS.
>
> Signed-off-by: Samuel Holland <[email protected]>
...
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
> index 4ae4720535a5..b64f917174ca 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
> @@ -87,20 +78,9 @@ void dc_fpu_begin(const char *function_name, const int line)
> WARN_ON_ONCE(!in_task());
> preempt_disable();
> depth = __this_cpu_inc_return(fpu_recursion_depth);
> -
> if (depth == 1) {
> -#if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH)
> + BUG_ON(!kernel_fpu_available());
> kernel_fpu_begin();
> -#elif defined(CONFIG_PPC64)
> - if (cpu_has_feature(CPU_FTR_VSX_COMP))
> - enable_kernel_vsx();
> - else if (cpu_has_feature(CPU_FTR_ALTIVEC_COMP))
> - enable_kernel_altivec();

Note altivec.

> - else if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE))
> - enable_kernel_fp();
> -#elif defined(CONFIG_ARM64)
> - kernel_neon_begin();
> -#endif
> }
>
> TRACE_DCN_FPU(true, function_name, line, depth);
> diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
> index ea7d60f9a9b4..5aad0f572ba3 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
> +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
> @@ -25,40 +25,8 @@
> # It provides the general basic services required by other DAL
> # subcomponents.
>
> -ifdef CONFIG_X86
> -dml_ccflags-$(CONFIG_CC_IS_GCC) := -mhard-float
> -dml_ccflags := $(dml_ccflags-y) -msse
> -endif
> -
> -ifdef CONFIG_PPC64
> -dml_ccflags := -mhard-float -maltivec
> -endif

And altivec is enabled in the flags there.

That doesn't match your implementation for powerpc in patch 7, which
only deals with float.

I suspect the AMD driver actually doesn't need altivec enabled, but I
don't know that for sure. It compiles without it, but I don't have a GPU
to actually test. I've added Timothy on Cc who added the support for
powerpc to the driver originally, hopefully he has a test system.

Anyway if that's true that it doesn't need altivec we should probably do
a lead-up patch that drops altivec from the AMD driver explicitly, eg.
as below.

cheers


diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
index 4ae4720535a5..0de16796466b 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
@@ -92,11 +92,7 @@ void dc_fpu_begin(const char *function_name, const int line)
#if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH)
kernel_fpu_begin();
#elif defined(CONFIG_PPC64)
- if (cpu_has_feature(CPU_FTR_VSX_COMP))
- enable_kernel_vsx();
- else if (cpu_has_feature(CPU_FTR_ALTIVEC_COMP))
- enable_kernel_altivec();
- else if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE))
+ if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE))
enable_kernel_fp();
#elif defined(CONFIG_ARM64)
kernel_neon_begin();
@@ -125,11 +121,7 @@ void dc_fpu_end(const char *function_name, const int line)
#if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH)
kernel_fpu_end();
#elif defined(CONFIG_PPC64)
- if (cpu_has_feature(CPU_FTR_VSX_COMP))
- disable_kernel_vsx();
- else if (cpu_has_feature(CPU_FTR_ALTIVEC_COMP))
- disable_kernel_altivec();
- else if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE))
+ if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE))
disable_kernel_fp();
#elif defined(CONFIG_ARM64)
kernel_neon_end();
diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
index 6042a5a6a44f..554c39024a40 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
@@ -31,7 +31,7 @@ dml_ccflags := $(dml_ccflags-y) -msse
endif

ifdef CONFIG_PPC64
-dml_ccflags := -mhard-float -maltivec
+dml_ccflags := -mhard-float
endif

ifdef CONFIG_ARM64
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
index acff3449b8d7..7b51364084b5 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
@@ -30,7 +30,7 @@ dml2_ccflags := $(dml2_ccflags-y) -msse
endif

ifdef CONFIG_PPC64
-dml2_ccflags := -mhard-float -maltivec
+dml2_ccflags := -mhard-float
endif

ifdef CONFIG_ARM64

2023-12-11 16:08:53

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [RFC PATCH 06/12] LoongArch: Implement ARCH_HAS_KERNEL_FPU_SUPPORT

On Thu, Dec 07, 2023 at 09:54:36PM -0800, Samuel Holland wrote:
> LoongArch already provides kernel_fpu_begin() and kernel_fpu_end() in
> asm/fpu.h, so it only needs to add kernel_fpu_available() and export
> the CFLAGS adjustments.

Looks good:

Reviewed-by: Christoph Hellwig <[email protected]>

2023-12-11 16:08:54

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [RFC PATCH 07/12] powerpc: Implement ARCH_HAS_KERNEL_FPU_SUPPORT

On Thu, Dec 07, 2023 at 09:54:37PM -0800, Samuel Holland wrote:
> PowerPC provides an equivalent to the common kernel-mode FPU API, but in
> a different header and using different function names. The PowerPC API
> also requires a non-preemptible context. Add a wrapper header, and
> export the CFLAGS adjustments.

Looks good:

Reviewed-by: Christoph Hellwig <[email protected]>

2023-12-11 16:09:39

by Christoph Hellwig

[permalink] [raw]

2023-12-11 16:11:46

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [RFC PATCH 09/12] riscv: Add support for kernel-mode FPU

> +#ifdef __riscv_f
> +
> +#define kernel_fpu_begin() \
> + static_assert(false, "floating-point code must use a separate translation unit")
> +#define kernel_fpu_end() kernel_fpu_begin()
> +
> +#else
> +
> +void kernel_fpu_begin(void);
> +void kernel_fpu_end(void);
> +
> +#endif

I'll assume this is related to trick that places code in a separate
translation unit, but I fail to understand it. Can you add a comment
explaining it?

2023-12-11 16:17:23

by Samuel Holland

[permalink] [raw]
Subject: Re: [RFC PATCH 09/12] riscv: Add support for kernel-mode FPU

On 2023-12-11 10:11 AM, Christoph Hellwig wrote:
>> +#ifdef __riscv_f
>> +
>> +#define kernel_fpu_begin() \
>> + static_assert(false, "floating-point code must use a separate translation unit")
>> +#define kernel_fpu_end() kernel_fpu_begin()
>> +
>> +#else
>> +
>> +void kernel_fpu_begin(void);
>> +void kernel_fpu_end(void);
>> +
>> +#endif
>
> I'll assume this is related to trick that places code in a separate
> translation unit, but I fail to understand it. Can you add a comment
> explaining it?

Yes, I can add a comment. Here, __riscv_f refers to RISC-V's F extension for
single-precision floating point, which is enabled by CC_FLAGS_FPU.

2023-12-11 16:18:59

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [RFC PATCH 11/12] selftests/fpu: Move FP code to a separate translation unit

> obj-$(CONFIG_TEST_FPU) += test_fpu.o
> -CFLAGS_test_fpu.o += $(FPU_CFLAGS)
> +test_fpu-y := test_fpu_glue.o test_fpu_impl.o
> +CFLAGS_test_fpu_impl.o += $(FPU_CFLAGS)

Btw, I really wonder if having a

modname-fpu += foo.o

syntax in kbuild wouldn't be preferable to this. Of coure that requires
someone who understands kbuild inside out.

> +int test_fpu(void);

This needs to go into a header.

And I think I underatand your way to enforce the use of a separate
compilation unit in the riscv patch now.

Can we just make that generic, e.g. have a <linux/fpu.h> that wraps
<asm/fpu.h> that does the guard based on a
-D_LINUX_FPU_COMPILATION_UNIT=1 on the command line so that all the
code becomes fully portable? Any legacy arch specific fpu users not
using <linux/fpu.h> would not be affected by it, although it would be
great to eventually migrate them to the common scheme.

2023-12-13 16:14:12

by WANG Xuerui

[permalink] [raw]
Subject: Re: [RFC PATCH 06/12] LoongArch: Implement ARCH_HAS_KERNEL_FPU_SUPPORT

On 12/8/23 13:54, Samuel Holland wrote:
> LoongArch already provides kernel_fpu_begin() and kernel_fpu_end() in
> asm/fpu.h, so it only needs to add kernel_fpu_available() and export
> the CFLAGS adjustments.
>
> Signed-off-by: Samuel Holland <[email protected]>
> ---
>
> arch/loongarch/Kconfig | 1 +
> arch/loongarch/Makefile | 5 ++++-
> arch/loongarch/include/asm/fpu.h | 1 +
> 3 files changed, 6 insertions(+), 1 deletion(-)

This is all intuitive wrapping, so:

Acked-by: WANG Xuerui <[email protected]>

Thanks!

--
WANG "xen0n" Xuerui

Linux/LoongArch mailing list: https://lore.kernel.org/loongarch/

2023-12-14 01:03:36

by Samuel Holland

[permalink] [raw]
Subject: Re: [RFC PATCH 10/12] drm/amd/display: Use ARCH_HAS_KERNEL_FPU_SUPPORT

On 2023-12-11 6:23 AM, Michael Ellerman wrote:
> Hi Samuel,
>
> Thanks for trying to clean all this up.
>
> One problem below.
>
> Samuel Holland <[email protected]> writes:
>> Now that all previously-supported architectures select
>> ARCH_HAS_KERNEL_FPU_SUPPORT, this code can depend on that symbol instead
>> of the existing list of architectures. It can also take advantage of the
>> common kernel-mode FPU API and method of adjusting CFLAGS.
>>
>> Signed-off-by: Samuel Holland <[email protected]>
> ...
>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
>> index 4ae4720535a5..b64f917174ca 100644
>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
>> @@ -87,20 +78,9 @@ void dc_fpu_begin(const char *function_name, const int line)
>> WARN_ON_ONCE(!in_task());
>> preempt_disable();
>> depth = __this_cpu_inc_return(fpu_recursion_depth);
>> -
>> if (depth == 1) {
>> -#if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH)
>> + BUG_ON(!kernel_fpu_available());
>> kernel_fpu_begin();
>> -#elif defined(CONFIG_PPC64)
>> - if (cpu_has_feature(CPU_FTR_VSX_COMP))
>> - enable_kernel_vsx();
>> - else if (cpu_has_feature(CPU_FTR_ALTIVEC_COMP))
>> - enable_kernel_altivec();
>
> Note altivec.
>
>> - else if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE))
>> - enable_kernel_fp();
>> -#elif defined(CONFIG_ARM64)
>> - kernel_neon_begin();
>> -#endif
>> }
>>
>> TRACE_DCN_FPU(true, function_name, line, depth);
>> diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
>> index ea7d60f9a9b4..5aad0f572ba3 100644
>> --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
>> +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
>> @@ -25,40 +25,8 @@
>> # It provides the general basic services required by other DAL
>> # subcomponents.
>>
>> -ifdef CONFIG_X86
>> -dml_ccflags-$(CONFIG_CC_IS_GCC) := -mhard-float
>> -dml_ccflags := $(dml_ccflags-y) -msse
>> -endif
>> -
>> -ifdef CONFIG_PPC64
>> -dml_ccflags := -mhard-float -maltivec
>> -endif
>
> And altivec is enabled in the flags there.
>
> That doesn't match your implementation for powerpc in patch 7, which
> only deals with float.
>
> I suspect the AMD driver actually doesn't need altivec enabled, but I
> don't know that for sure. It compiles without it, but I don't have a GPU
> to actually test. I've added Timothy on Cc who added the support for
> powerpc to the driver originally, hopefully he has a test system.

I tested this series on a POWER9 system with an AMD Radeon RX 6400 GPU (which
requires this FPU code to initialize), and got functioning graphics output.

> Anyway if that's true that it doesn't need altivec we should probably do
> a lead-up patch that drops altivec from the AMD driver explicitly, eg.
> as below.

That makes sense to me. Do you want to provide your Signed-off-by so I can send
this patch with your authorship?

Regards,
Samuel

> cheers
>
>
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
> index 4ae4720535a5..0de16796466b 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
> @@ -92,11 +92,7 @@ void dc_fpu_begin(const char *function_name, const int line)
> #if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH)
> kernel_fpu_begin();
> #elif defined(CONFIG_PPC64)
> - if (cpu_has_feature(CPU_FTR_VSX_COMP))
> - enable_kernel_vsx();
> - else if (cpu_has_feature(CPU_FTR_ALTIVEC_COMP))
> - enable_kernel_altivec();
> - else if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE))
> + if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE))
> enable_kernel_fp();
> #elif defined(CONFIG_ARM64)
> kernel_neon_begin();
> @@ -125,11 +121,7 @@ void dc_fpu_end(const char *function_name, const int line)
> #if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH)
> kernel_fpu_end();
> #elif defined(CONFIG_PPC64)
> - if (cpu_has_feature(CPU_FTR_VSX_COMP))
> - disable_kernel_vsx();
> - else if (cpu_has_feature(CPU_FTR_ALTIVEC_COMP))
> - disable_kernel_altivec();
> - else if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE))
> + if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE))
> disable_kernel_fp();
> #elif defined(CONFIG_ARM64)
> kernel_neon_end();
> diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
> index 6042a5a6a44f..554c39024a40 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
> +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
> @@ -31,7 +31,7 @@ dml_ccflags := $(dml_ccflags-y) -msse
> endif
>
> ifdef CONFIG_PPC64
> -dml_ccflags := -mhard-float -maltivec
> +dml_ccflags := -mhard-float
> endif
>
> ifdef CONFIG_ARM64
> diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
> index acff3449b8d7..7b51364084b5 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile
> +++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
> @@ -30,7 +30,7 @@ dml2_ccflags := $(dml2_ccflags-y) -msse
> endif
>
> ifdef CONFIG_PPC64
> -dml2_ccflags := -mhard-float -maltivec
> +dml2_ccflags := -mhard-float
> endif
>
> ifdef CONFIG_ARM64

2023-12-14 03:26:06

by Timothy Pearson

[permalink] [raw]
Subject: Re: [RFC PATCH 10/12] drm/amd/display: Use ARCH_HAS_KERNEL_FPU_SUPPORT



----- Original Message -----
> From: "Samuel Holland" <[email protected]>
> To: "Michael Ellerman" <[email protected]>
> Cc: "linux-kernel" <[email protected]>, "amd-gfx" <[email protected]>, "linux-arch"
> <[email protected]>, "linux-arm-kernel" <[email protected]>, [email protected],
> "linuxppc-dev" <[email protected]>, "x86" <[email protected]>, [email protected], "Christoph
> Hellwig" <[email protected]>, "Timothy Pearson" <[email protected]>
> Sent: Wednesday, December 13, 2023 7:03:20 PM
> Subject: Re: [RFC PATCH 10/12] drm/amd/display: Use ARCH_HAS_KERNEL_FPU_SUPPORT

> On 2023-12-11 6:23 AM, Michael Ellerman wrote:
>> Hi Samuel,
>>
>> Thanks for trying to clean all this up.
>>
>> One problem below.
>>
>> Samuel Holland <[email protected]> writes:
>>> Now that all previously-supported architectures select
>>> ARCH_HAS_KERNEL_FPU_SUPPORT, this code can depend on that symbol instead
>>> of the existing list of architectures. It can also take advantage of the
>>> common kernel-mode FPU API and method of adjusting CFLAGS.
>>>
>>> Signed-off-by: Samuel Holland <[email protected]>
>> ...
>>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
>>> b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
>>> index 4ae4720535a5..b64f917174ca 100644
>>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
>>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
>>> @@ -87,20 +78,9 @@ void dc_fpu_begin(const char *function_name, const int line)
>>> WARN_ON_ONCE(!in_task());
>>> preempt_disable();
>>> depth = __this_cpu_inc_return(fpu_recursion_depth);
>>> -
>>> if (depth == 1) {
>>> -#if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH)
>>> + BUG_ON(!kernel_fpu_available());
>>> kernel_fpu_begin();
>>> -#elif defined(CONFIG_PPC64)
>>> - if (cpu_has_feature(CPU_FTR_VSX_COMP))
>>> - enable_kernel_vsx();
>>> - else if (cpu_has_feature(CPU_FTR_ALTIVEC_COMP))
>>> - enable_kernel_altivec();
>>
>> Note altivec.
>>
>>> - else if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE))
>>> - enable_kernel_fp();
>>> -#elif defined(CONFIG_ARM64)
>>> - kernel_neon_begin();
>>> -#endif
>>> }
>>>
>>> TRACE_DCN_FPU(true, function_name, line, depth);
>>> diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile
>>> b/drivers/gpu/drm/amd/display/dc/dml/Makefile
>>> index ea7d60f9a9b4..5aad0f572ba3 100644
>>> --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
>>> +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
>>> @@ -25,40 +25,8 @@
>>> # It provides the general basic services required by other DAL
>>> # subcomponents.
>>>
>>> -ifdef CONFIG_X86
>>> -dml_ccflags-$(CONFIG_CC_IS_GCC) := -mhard-float
>>> -dml_ccflags := $(dml_ccflags-y) -msse
>>> -endif
>>> -
>>> -ifdef CONFIG_PPC64
>>> -dml_ccflags := -mhard-float -maltivec
>>> -endif
>>
>> And altivec is enabled in the flags there.
>>
>> That doesn't match your implementation for powerpc in patch 7, which
>> only deals with float.
>>
>> I suspect the AMD driver actually doesn't need altivec enabled, but I
>> don't know that for sure. It compiles without it, but I don't have a GPU
>> to actually test. I've added Timothy on Cc who added the support for
>> powerpc to the driver originally, hopefully he has a test system.

If you would like me to test I'm happy to do so, but I am travelling until Friday so would need to wait until then.

Thanks!

2023-12-14 04:45:51

by Michael Ellerman

[permalink] [raw]
Subject: Re: [RFC PATCH 10/12] drm/amd/display: Use ARCH_HAS_KERNEL_FPU_SUPPORT

Samuel Holland <[email protected]> writes:
> On 2023-12-11 6:23 AM, Michael Ellerman wrote:
>> Hi Samuel,
>>
>> Thanks for trying to clean all this up.
>>
>> One problem below.
>>
>> Samuel Holland <[email protected]> writes:
>>> Now that all previously-supported architectures select
>>> ARCH_HAS_KERNEL_FPU_SUPPORT, this code can depend on that symbol instead
>>> of the existing list of architectures. It can also take advantage of the
>>> common kernel-mode FPU API and method of adjusting CFLAGS.
>>>
>>> Signed-off-by: Samuel Holland <[email protected]>
>> ...
>>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
>>> index 4ae4720535a5..b64f917174ca 100644
>>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
>>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
>>> @@ -87,20 +78,9 @@ void dc_fpu_begin(const char *function_name, const int line)
>>> WARN_ON_ONCE(!in_task());
>>> preempt_disable();
>>> depth = __this_cpu_inc_return(fpu_recursion_depth);
>>> -
>>> if (depth == 1) {
>>> -#if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH)
>>> + BUG_ON(!kernel_fpu_available());
>>> kernel_fpu_begin();
>>> -#elif defined(CONFIG_PPC64)
>>> - if (cpu_has_feature(CPU_FTR_VSX_COMP))
>>> - enable_kernel_vsx();
>>> - else if (cpu_has_feature(CPU_FTR_ALTIVEC_COMP))
>>> - enable_kernel_altivec();
>>
>> Note altivec.
>>
>>> - else if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE))
>>> - enable_kernel_fp();
>>> -#elif defined(CONFIG_ARM64)
>>> - kernel_neon_begin();
>>> -#endif
>>> }
>>>
>>> TRACE_DCN_FPU(true, function_name, line, depth);
>>> diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
>>> index ea7d60f9a9b4..5aad0f572ba3 100644
>>> --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
>>> +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
>>> @@ -25,40 +25,8 @@
>>> # It provides the general basic services required by other DAL
>>> # subcomponents.
>>>
>>> -ifdef CONFIG_X86
>>> -dml_ccflags-$(CONFIG_CC_IS_GCC) := -mhard-float
>>> -dml_ccflags := $(dml_ccflags-y) -msse
>>> -endif
>>> -
>>> -ifdef CONFIG_PPC64
>>> -dml_ccflags := -mhard-float -maltivec
>>> -endif
>>
>> And altivec is enabled in the flags there.
>>
>> That doesn't match your implementation for powerpc in patch 7, which
>> only deals with float.
>>
>> I suspect the AMD driver actually doesn't need altivec enabled, but I
>> don't know that for sure. It compiles without it, but I don't have a GPU
>> to actually test. I've added Timothy on Cc who added the support for
>> powerpc to the driver originally, hopefully he has a test system.
>
> I tested this series on a POWER9 system with an AMD Radeon RX 6400 GPU (which
> requires this FPU code to initialize), and got functioning graphics output.

Awesome.

>> Anyway if that's true that it doesn't need altivec we should probably do
>> a lead-up patch that drops altivec from the AMD driver explicitly, eg.
>> as below.
>
> That makes sense to me. Do you want to provide your Signed-off-by so I can send
> this patch with your authorship?

Yeah that'd be great. Patch below. Feel free to adjust the commit
message as you see fit.

cheers


From c8a2862d2ebe76a023eceb3267fd85262925c0ba Mon Sep 17 00:00:00 2001
From: Michael Ellerman <[email protected]>
Date: Thu, 14 Dec 2023 15:39:05 +1100
Subject: [PATCH] drm/amd/display: Only use hard-float, not altivec on powerpc

The compiler flags enable altivec, but that is not required, hard-float
is sufficient for the code to build and function.

Drop altivec from the compiler flags and adjust the enable/disable code
to only enable FPU use.

Signed-off-by: Michael Ellerman <[email protected]>
---
drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c | 12 ++----------
drivers/gpu/drm/amd/display/dc/dml/Makefile | 2 +-
drivers/gpu/drm/amd/display/dc/dml2/Makefile | 2 +-
3 files changed, 4 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
index 4ae4720535a5..0de16796466b 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
@@ -92,11 +92,7 @@ void dc_fpu_begin(const char *function_name, const int line)
#if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH)
kernel_fpu_begin();
#elif defined(CONFIG_PPC64)
- if (cpu_has_feature(CPU_FTR_VSX_COMP))
- enable_kernel_vsx();
- else if (cpu_has_feature(CPU_FTR_ALTIVEC_COMP))
- enable_kernel_altivec();
- else if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE))
+ if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE))
enable_kernel_fp();
#elif defined(CONFIG_ARM64)
kernel_neon_begin();
@@ -125,11 +121,7 @@ void dc_fpu_end(const char *function_name, const int line)
#if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH)
kernel_fpu_end();
#elif defined(CONFIG_PPC64)
- if (cpu_has_feature(CPU_FTR_VSX_COMP))
- disable_kernel_vsx();
- else if (cpu_has_feature(CPU_FTR_ALTIVEC_COMP))
- disable_kernel_altivec();
- else if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE))
+ if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE))
disable_kernel_fp();
#elif defined(CONFIG_ARM64)
kernel_neon_end();
diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
index 6042a5a6a44f..554c39024a40 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
@@ -31,7 +31,7 @@ dml_ccflags := $(dml_ccflags-y) -msse
endif

ifdef CONFIG_PPC64
-dml_ccflags := -mhard-float -maltivec
+dml_ccflags := -mhard-float
endif

ifdef CONFIG_ARM64
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
index acff3449b8d7..7b51364084b5 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
@@ -30,7 +30,7 @@ dml2_ccflags := $(dml2_ccflags-y) -msse
endif

ifdef CONFIG_PPC64
-dml2_ccflags := -mhard-float -maltivec
+dml2_ccflags := -mhard-float
endif

ifdef CONFIG_ARM64
--
2.43.0