RISCV_HWPROBE_KEY_CPUPERF_0 was mistakenly flagged as a bitmask in
hwprobe_key_is_bitmask(), when in reality it was an enum value. This
causes problems when used in conjunction with RISCV_HWPROBE_WHICH_CPUS,
since SLOW, FAST, and EMULATED have values whose bits overlap with
each other. If the caller asked for the set of CPUs that was SLOW or
EMULATED, the returned set would also include CPUs that were FAST.
Introduce a new hwprobe key, RISCV_HWPROBE_KEY_MISALIGNED_PERF, which
returns the same values in response to a direct query (with no flags),
but is properly handled as an enumerated value. As a result, SLOW,
FAST, and EMULATED are all correctly treated as distinct values under
the new key when queried with the WHICH_CPUS flag.
Leave the old key in place to avoid disturbing applications which may
have already come to rely on the broken behavior.
Fixes: e178bf146e4b ("RISC-V: hwprobe: Introduce which-cpus flag")
Signed-off-by: Evan Green <[email protected]>
---
Note: Yangyu also has a fix out for this issue at [1]. That fix is much
tidier, but comes with the slight risk that some very broken userspace
application may break now that FAST cpus are not included for the query
of which cpus are SLOW or EMULATED. I wanted to get this fix out so that
we have both as options, and can discuss. These fixes are mutually
exclusive, don't take both.
[1] https://lore.kernel.org/linux-riscv/[email protected]/
---
Documentation/arch/riscv/hwprobe.rst | 8 ++++++--
arch/riscv/include/asm/hwprobe.h | 2 +-
arch/riscv/include/uapi/asm/hwprobe.h | 1 +
arch/riscv/kernel/sys_hwprobe.c | 1 +
4 files changed, 9 insertions(+), 3 deletions(-)
diff --git a/Documentation/arch/riscv/hwprobe.rst b/Documentation/arch/riscv/hwprobe.rst
index 204cd4433af5..616ee372adaf 100644
--- a/Documentation/arch/riscv/hwprobe.rst
+++ b/Documentation/arch/riscv/hwprobe.rst
@@ -192,8 +192,12 @@ The following keys are defined:
supported as defined in the RISC-V ISA manual starting from commit
d8ab5c78c207 ("Zihintpause is ratified").
-* :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: A bitmask that contains performance
- information about the selected set of processors.
+* :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: Deprecated. Returns similar values to
+ :c:macro:`RISCV_HWPROBE_KEY_MISALIGNED_PERF`, but the key was mistakenly
+ classified as a bitmask rather than a value.
+
+* :c:macro:`RISCV_HWPROBE_KEY_MISALIGNED_PERF`: An enum value describing the
+ performance of misaligned scalar accesses on the selected set of processors.
* :c:macro:`RISCV_HWPROBE_MISALIGNED_UNKNOWN`: The performance of misaligned
accesses is unknown.
diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h
index 630507dff5ea..150a9877b0af 100644
--- a/arch/riscv/include/asm/hwprobe.h
+++ b/arch/riscv/include/asm/hwprobe.h
@@ -8,7 +8,7 @@
#include <uapi/asm/hwprobe.h>
-#define RISCV_HWPROBE_MAX_KEY 6
+#define RISCV_HWPROBE_MAX_KEY 7
static inline bool riscv_hwprobe_key_is_valid(__s64 key)
{
diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h
index dda76a05420b..bc34e33fef23 100644
--- a/arch/riscv/include/uapi/asm/hwprobe.h
+++ b/arch/riscv/include/uapi/asm/hwprobe.h
@@ -68,6 +68,7 @@ struct riscv_hwprobe {
#define RISCV_HWPROBE_MISALIGNED_UNSUPPORTED (4 << 0)
#define RISCV_HWPROBE_MISALIGNED_MASK (7 << 0)
#define RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE 6
+#define RISCV_HWPROBE_KEY_MISALIGNED_PERF 7
/* Increase RISCV_HWPROBE_MAX_KEY when adding items. */
/* Flags */
diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c
index 969ef3d59dbe..c8b7d57eb55e 100644
--- a/arch/riscv/kernel/sys_hwprobe.c
+++ b/arch/riscv/kernel/sys_hwprobe.c
@@ -208,6 +208,7 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair,
break;
case RISCV_HWPROBE_KEY_CPUPERF_0:
+ case RISCV_HWPROBE_KEY_MISALIGNED_PERF:
pair->value = hwprobe_misaligned(cpus);
break;
--
2.34.1
On Wed, May 29, 2024 at 11:26:48AM -0700, Evan Green wrote:
> RISCV_HWPROBE_KEY_CPUPERF_0 was mistakenly flagged as a bitmask in
> hwprobe_key_is_bitmask(), when in reality it was an enum value. This
> causes problems when used in conjunction with RISCV_HWPROBE_WHICH_CPUS,
> since SLOW, FAST, and EMULATED have values whose bits overlap with
> each other. If the caller asked for the set of CPUs that was SLOW or
> EMULATED, the returned set would also include CPUs that were FAST.
>
> Introduce a new hwprobe key, RISCV_HWPROBE_KEY_MISALIGNED_PERF, which
> returns the same values in response to a direct query (with no flags),
> but is properly handled as an enumerated value. As a result, SLOW,
> FAST, and EMULATED are all correctly treated as distinct values under
> the new key when queried with the WHICH_CPUS flag.
>
> Leave the old key in place to avoid disturbing applications which may
> have already come to rely on the broken behavior.
>
> Fixes: e178bf146e4b ("RISC-V: hwprobe: Introduce which-cpus flag")
> Signed-off-by: Evan Green <[email protected]>
>
> ---
>
>
> Note: Yangyu also has a fix out for this issue at [1]. That fix is much
> tidier, but comes with the slight risk that some very broken userspace
> application may break now that FAST cpus are not included for the query
> of which cpus are SLOW or EMULATED. I wanted to get this fix out so that
> we have both as options, and can discuss. These fixes are mutually
> exclusive, don't take both.
>
> [1] https://lore.kernel.org/linux-riscv/[email protected]/
>
> ---
> Documentation/arch/riscv/hwprobe.rst | 8 ++++++--
> arch/riscv/include/asm/hwprobe.h | 2 +-
> arch/riscv/include/uapi/asm/hwprobe.h | 1 +
> arch/riscv/kernel/sys_hwprobe.c | 1 +
> 4 files changed, 9 insertions(+), 3 deletions(-)
>
> diff --git a/Documentation/arch/riscv/hwprobe.rst b/Documentation/arch/riscv/hwprobe.rst
> index 204cd4433af5..616ee372adaf 100644
> --- a/Documentation/arch/riscv/hwprobe.rst
> +++ b/Documentation/arch/riscv/hwprobe.rst
> @@ -192,8 +192,12 @@ The following keys are defined:
> supported as defined in the RISC-V ISA manual starting from commit
> d8ab5c78c207 ("Zihintpause is ratified").
>
> -* :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: A bitmask that contains performance
> - information about the selected set of processors.
> +* :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: Deprecated. Returns similar values to
> + :c:macro:`RISCV_HWPROBE_KEY_MISALIGNED_PERF`, but the key was mistakenly
> + classified as a bitmask rather than a value.
> +
> +* :c:macro:`RISCV_HWPROBE_KEY_MISALIGNED_PERF`: An enum value describing the
> + performance of misaligned scalar accesses on the selected set of processors.
>
> * :c:macro:`RISCV_HWPROBE_MISALIGNED_UNKNOWN`: The performance of misaligned
> accesses is unknown.
> diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h
> index 630507dff5ea..150a9877b0af 100644
> --- a/arch/riscv/include/asm/hwprobe.h
> +++ b/arch/riscv/include/asm/hwprobe.h
> @@ -8,7 +8,7 @@
>
> #include <uapi/asm/hwprobe.h>
>
> -#define RISCV_HWPROBE_MAX_KEY 6
> +#define RISCV_HWPROBE_MAX_KEY 7
>
> static inline bool riscv_hwprobe_key_is_valid(__s64 key)
> {
> diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h
> index dda76a05420b..bc34e33fef23 100644
> --- a/arch/riscv/include/uapi/asm/hwprobe.h
> +++ b/arch/riscv/include/uapi/asm/hwprobe.h
> @@ -68,6 +68,7 @@ struct riscv_hwprobe {
> #define RISCV_HWPROBE_MISALIGNED_UNSUPPORTED (4 << 0)
> #define RISCV_HWPROBE_MISALIGNED_MASK (7 << 0)
> #define RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE 6
> +#define RISCV_HWPROBE_KEY_MISALIGNED_PERF 7
> /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */
>
> /* Flags */
> diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c
> index 969ef3d59dbe..c8b7d57eb55e 100644
> --- a/arch/riscv/kernel/sys_hwprobe.c
> +++ b/arch/riscv/kernel/sys_hwprobe.c
> @@ -208,6 +208,7 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair,
> break;
>
> case RISCV_HWPROBE_KEY_CPUPERF_0:
> + case RISCV_HWPROBE_KEY_MISALIGNED_PERF:
> pair->value = hwprobe_misaligned(cpus);
> break;
>
> --
> 2.34.1
>
>
> _______________________________________________
> linux-riscv mailing list
> [email protected]
> http://lists.infradead.org/mailman/listinfo/linux-riscv
I am not sure what the "best" solution is here but making a new key is
most logical to me.
Reviewed-by: Charlie Jenkins <[email protected]>
On 2024/5/30 02:26, Evan Green wrote:
> RISCV_HWPROBE_KEY_CPUPERF_0 was mistakenly flagged as a bitmask in
> hwprobe_key_is_bitmask(), when in reality it was an enum value. This
> causes problems when used in conjunction with RISCV_HWPROBE_WHICH_CPUS,
> since SLOW, FAST, and EMULATED have values whose bits overlap with
> each other. If the caller asked for the set of CPUs that was SLOW or
> EMULATED, the returned set would also include CPUs that were FAST.
>
> Introduce a new hwprobe key, RISCV_HWPROBE_KEY_MISALIGNED_PERF, which
> returns the same values in response to a direct query (with no flags),
> but is properly handled as an enumerated value. As a result, SLOW,
> FAST, and EMULATED are all correctly treated as distinct values under
> the new key when queried with the WHICH_CPUS flag.
>
> Leave the old key in place to avoid disturbing applications which may
> have already come to rely on the broken behavior.
>
> Fixes: e178bf146e4b ("RISC-V: hwprobe: Introduce which-cpus flag")
> Signed-off-by: Evan Green <[email protected]>
>
> ---
>
>
> Note: Yangyu also has a fix out for this issue at [1]. That fix is much
> tidier, but comes with the slight risk that some very broken userspace
> application may break now that FAST cpus are not included for the query
> of which cpus are SLOW or EMULATED.
Indeed. Since the value of FAST is 0b11, the SLOW and EMULATED are 0b10 and
0b01 respectively.
When this key is treated as a bitmask and query with
RISCV_HWPROBE_WHICH_CPUS if a CPU has a superset bitmask of the requested
value on the requested key, it will remain in the CPU mask. Otherwise, the
CPU will be clear in the CPU mask. But when a key is treated as a value, we
will just do a comparison. if it is not equal, then the CPU will be clear
in the CPU. That's why FAST cpus are included when querying with SLOW or
EMULATED with RISCV_HWPROBE_KEY_CPUPERF_0 key now.
For me, deprecating the original hwprobe key and introducing a new key
would be a better solution than changing the behavior as my patch did.
> I wanted to get this fix out so that
> we have both as options, and can discuss. These fixes are mutually
> exclusive, don't take both.
It's better to note this strange behavior on
Documentation/arch/riscv/hwprobe.rst so users can quickly understand the
differences on the behavior of these two keys.
The C code part looks good to me.
>
> [1] https://lore.kernel.org/linux-riscv/[email protected]/
>
> ---
> Documentation/arch/riscv/hwprobe.rst | 8 ++++++--
> arch/riscv/include/asm/hwprobe.h | 2 +-
> arch/riscv/include/uapi/asm/hwprobe.h | 1 +
> arch/riscv/kernel/sys_hwprobe.c | 1 +
> 4 files changed, 9 insertions(+), 3 deletions(-)
>
> diff --git a/Documentation/arch/riscv/hwprobe.rst b/Documentation/arch/riscv/hwprobe.rst
> index 204cd4433af5..616ee372adaf 100644
> --- a/Documentation/arch/riscv/hwprobe.rst
> +++ b/Documentation/arch/riscv/hwprobe.rst
> @@ -192,8 +192,12 @@ The following keys are defined:
> supported as defined in the RISC-V ISA manual starting from commit
> d8ab5c78c207 ("Zihintpause is ratified").
>
> -* :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: A bitmask that contains performance
> - information about the selected set of processors.
> +* :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: Deprecated. Returns similar values to
> + :c:macro:`RISCV_HWPROBE_KEY_MISALIGNED_PERF`, but the key was mistakenly
> + classified as a bitmask rather than a value.
> +
> +* :c:macro:`RISCV_HWPROBE_KEY_MISALIGNED_PERF`: An enum value describing the
> + performance of misaligned scalar accesses on the selected set of processors.
>
> * :c:macro:`RISCV_HWPROBE_MISALIGNED_UNKNOWN`: The performance of misaligned
> accesses is unknown.
> diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h
> index 630507dff5ea..150a9877b0af 100644
> --- a/arch/riscv/include/asm/hwprobe.h
> +++ b/arch/riscv/include/asm/hwprobe.h
> @@ -8,7 +8,7 @@
>
> #include <uapi/asm/hwprobe.h>
>
> -#define RISCV_HWPROBE_MAX_KEY 6
> +#define RISCV_HWPROBE_MAX_KEY 7
>
> static inline bool riscv_hwprobe_key_is_valid(__s64 key)
> {
> diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h
> index dda76a05420b..bc34e33fef23 100644
> --- a/arch/riscv/include/uapi/asm/hwprobe.h
> +++ b/arch/riscv/include/uapi/asm/hwprobe.h
> @@ -68,6 +68,7 @@ struct riscv_hwprobe {
> #define RISCV_HWPROBE_MISALIGNED_UNSUPPORTED (4 << 0)
> #define RISCV_HWPROBE_MISALIGNED_MASK (7 << 0)
> #define RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE 6
> +#define RISCV_HWPROBE_KEY_MISALIGNED_PERF 7
> /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */
>
> /* Flags */
> diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c
> index 969ef3d59dbe..c8b7d57eb55e 100644
> --- a/arch/riscv/kernel/sys_hwprobe.c
> +++ b/arch/riscv/kernel/sys_hwprobe.c
> @@ -208,6 +208,7 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair,
> break;
>
> case RISCV_HWPROBE_KEY_CPUPERF_0:
> + case RISCV_HWPROBE_KEY_MISALIGNED_PERF:
> pair->value = hwprobe_misaligned(cpus);
> break;
>
On Wed, May 29, 2024 at 11:26:48AM GMT, Evan Green wrote:
> RISCV_HWPROBE_KEY_CPUPERF_0 was mistakenly flagged as a bitmask in
> hwprobe_key_is_bitmask(), when in reality it was an enum value. This
> causes problems when used in conjunction with RISCV_HWPROBE_WHICH_CPUS,
> since SLOW, FAST, and EMULATED have values whose bits overlap with
> each other. If the caller asked for the set of CPUs that was SLOW or
> EMULATED, the returned set would also include CPUs that were FAST.
>
> Introduce a new hwprobe key, RISCV_HWPROBE_KEY_MISALIGNED_PERF, which
> returns the same values in response to a direct query (with no flags),
> but is properly handled as an enumerated value. As a result, SLOW,
> FAST, and EMULATED are all correctly treated as distinct values under
> the new key when queried with the WHICH_CPUS flag.
>
> Leave the old key in place to avoid disturbing applications which may
> have already come to rely on the broken behavior.
I appreciate the paranoia, even if I think we could probably get away
with fixing CPUPERF_0.
>
> Fixes: e178bf146e4b ("RISC-V: hwprobe: Introduce which-cpus flag")
> Signed-off-by: Evan Green <[email protected]>
>
> ---
>
>
> Note: Yangyu also has a fix out for this issue at [1]. That fix is much
> tidier, but comes with the slight risk that some very broken userspace
> application may break now that FAST cpus are not included for the query
> of which cpus are SLOW or EMULATED. I wanted to get this fix out so that
> we have both as options, and can discuss. These fixes are mutually
> exclusive, don't take both.
>
> [1] https://lore.kernel.org/linux-riscv/[email protected]/
>
> ---
> Documentation/arch/riscv/hwprobe.rst | 8 ++++++--
> arch/riscv/include/asm/hwprobe.h | 2 +-
> arch/riscv/include/uapi/asm/hwprobe.h | 1 +
> arch/riscv/kernel/sys_hwprobe.c | 1 +
> 4 files changed, 9 insertions(+), 3 deletions(-)
>
> diff --git a/Documentation/arch/riscv/hwprobe.rst b/Documentation/arch/riscv/hwprobe.rst
> index 204cd4433af5..616ee372adaf 100644
> --- a/Documentation/arch/riscv/hwprobe.rst
> +++ b/Documentation/arch/riscv/hwprobe.rst
> @@ -192,8 +192,12 @@ The following keys are defined:
> supported as defined in the RISC-V ISA manual starting from commit
> d8ab5c78c207 ("Zihintpause is ratified").
>
> -* :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: A bitmask that contains performance
> - information about the selected set of processors.
> +* :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: Deprecated. Returns similar values to
> + :c:macro:`RISCV_HWPROBE_KEY_MISALIGNED_PERF`, but the key was mistakenly
> + classified as a bitmask rather than a value.
> +
> +* :c:macro:`RISCV_HWPROBE_KEY_MISALIGNED_PERF`: An enum value describing the
> + performance of misaligned scalar accesses on the selected set of processors.
>
> * :c:macro:`RISCV_HWPROBE_MISALIGNED_UNKNOWN`: The performance of misaligned
> accesses is unknown.
> diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h
> index 630507dff5ea..150a9877b0af 100644
> --- a/arch/riscv/include/asm/hwprobe.h
> +++ b/arch/riscv/include/asm/hwprobe.h
> @@ -8,7 +8,7 @@
>
> #include <uapi/asm/hwprobe.h>
>
> -#define RISCV_HWPROBE_MAX_KEY 6
> +#define RISCV_HWPROBE_MAX_KEY 7
>
> static inline bool riscv_hwprobe_key_is_valid(__s64 key)
> {
> diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h
> index dda76a05420b..bc34e33fef23 100644
> --- a/arch/riscv/include/uapi/asm/hwprobe.h
> +++ b/arch/riscv/include/uapi/asm/hwprobe.h
> @@ -68,6 +68,7 @@ struct riscv_hwprobe {
> #define RISCV_HWPROBE_MISALIGNED_UNSUPPORTED (4 << 0)
> #define RISCV_HWPROBE_MISALIGNED_MASK (7 << 0)
Can we also remove the unnecessary ( << 0) shifts for each of the
MISALIGNED_* values? The shifts imply bits of a bitmask (to me).
> #define RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE 6
> +#define RISCV_HWPROBE_KEY_MISALIGNED_PERF 7
> /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */
>
> /* Flags */
> diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c
> index 969ef3d59dbe..c8b7d57eb55e 100644
> --- a/arch/riscv/kernel/sys_hwprobe.c
> +++ b/arch/riscv/kernel/sys_hwprobe.c
> @@ -208,6 +208,7 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair,
> break;
>
> case RISCV_HWPROBE_KEY_CPUPERF_0:
> + case RISCV_HWPROBE_KEY_MISALIGNED_PERF:
> pair->value = hwprobe_misaligned(cpus);
> break;
>
> --
> 2.34.1
>
Otherwise,
Reviewed-by: Andrew Jones <[email protected]>
Thanks,
drew
On Wed, 29 May 2024 20:36:45 PDT (-0700), [email protected] wrote:
> On 2024/5/30 02:26, Evan Green wrote:
>> RISCV_HWPROBE_KEY_CPUPERF_0 was mistakenly flagged as a bitmask in
>> hwprobe_key_is_bitmask(), when in reality it was an enum value. This
>> causes problems when used in conjunction with RISCV_HWPROBE_WHICH_CPUS,
>> since SLOW, FAST, and EMULATED have values whose bits overlap with
>> each other. If the caller asked for the set of CPUs that was SLOW or
>> EMULATED, the returned set would also include CPUs that were FAST.
>>
>> Introduce a new hwprobe key, RISCV_HWPROBE_KEY_MISALIGNED_PERF, which
>> returns the same values in response to a direct query (with no flags),
>> but is properly handled as an enumerated value. As a result, SLOW,
>> FAST, and EMULATED are all correctly treated as distinct values under
>> the new key when queried with the WHICH_CPUS flag.
>>
>> Leave the old key in place to avoid disturbing applications which may
>> have already come to rely on the broken behavior.
>>
>> Fixes: e178bf146e4b ("RISC-V: hwprobe: Introduce which-cpus flag")
>> Signed-off-by: Evan Green <[email protected]>
>>
>> ---
>>
>>
>> Note: Yangyu also has a fix out for this issue at [1]. That fix is much
>> tidier, but comes with the slight risk that some very broken userspace
>> application may break now that FAST cpus are not included for the query
>> of which cpus are SLOW or EMULATED.
>
> Indeed. Since the value of FAST is 0b11, the SLOW and EMULATED are 0b10 and
> 0b01 respectively.
>
> When this key is treated as a bitmask and query with
> RISCV_HWPROBE_WHICH_CPUS if a CPU has a superset bitmask of the requested
> value on the requested key, it will remain in the CPU mask. Otherwise, the
> CPU will be clear in the CPU mask. But when a key is treated as a value, we
> will just do a comparison. if it is not equal, then the CPU will be clear
> in the CPU. That's why FAST cpus are included when querying with SLOW or
> EMULATED with RISCV_HWPROBE_KEY_CPUPERF_0 key now.
>
> For me, deprecating the original hwprobe key and introducing a new key
> would be a better solution than changing the behavior as my patch did.
OK. I don't have a strong feeling either way: if someone has code that
tries to read this as a btimask then it'd be broken, but it would
technically be following the docs.
That said, we're relying on this as a pretty core userspace portability
construct. So maybe the right answer here is to just be really strict
about compatibility and eat the pain when we make a mistake, just to
make sure we set the right example about not breaking stuff.
So unless anyone's opposed, I'll pick this up for 6.11.
>> I wanted to get this fix out so that
>> we have both as options, and can discuss. These fixes are mutually
>> exclusive, don't take both.
>
> It's better to note this strange behavior on
> Documentation/arch/riscv/hwprobe.rst so users can quickly understand the
> differences on the behavior of these two keys.
>
> The C code part looks good to me.
>
>>
>> [1] https://lore.kernel.org/linux-riscv/[email protected]/
>>
>> ---
>> Documentation/arch/riscv/hwprobe.rst | 8 ++++++--
>> arch/riscv/include/asm/hwprobe.h | 2 +-
>> arch/riscv/include/uapi/asm/hwprobe.h | 1 +
>> arch/riscv/kernel/sys_hwprobe.c | 1 +
>> 4 files changed, 9 insertions(+), 3 deletions(-)
>>
>> diff --git a/Documentation/arch/riscv/hwprobe.rst b/Documentation/arch/riscv/hwprobe.rst
>> index 204cd4433af5..616ee372adaf 100644
>> --- a/Documentation/arch/riscv/hwprobe.rst
>> +++ b/Documentation/arch/riscv/hwprobe.rst
>> @@ -192,8 +192,12 @@ The following keys are defined:
>> supported as defined in the RISC-V ISA manual starting from commit
>> d8ab5c78c207 ("Zihintpause is ratified").
>>
>> -* :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: A bitmask that contains performance
>> - information about the selected set of processors.
>> +* :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: Deprecated. Returns similar values to
>> + :c:macro:`RISCV_HWPROBE_KEY_MISALIGNED_PERF`, but the key was mistakenly
>> + classified as a bitmask rather than a value.
>> +
>> +* :c:macro:`RISCV_HWPROBE_KEY_MISALIGNED_PERF`: An enum value describing the
>> + performance of misaligned scalar accesses on the selected set of processors.
>>
>> * :c:macro:`RISCV_HWPROBE_MISALIGNED_UNKNOWN`: The performance of misaligned
>> accesses is unknown.
>> diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h
>> index 630507dff5ea..150a9877b0af 100644
>> --- a/arch/riscv/include/asm/hwprobe.h
>> +++ b/arch/riscv/include/asm/hwprobe.h
>> @@ -8,7 +8,7 @@
>>
>> #include <uapi/asm/hwprobe.h>
>>
>> -#define RISCV_HWPROBE_MAX_KEY 6
>> +#define RISCV_HWPROBE_MAX_KEY 7
>>
>> static inline bool riscv_hwprobe_key_is_valid(__s64 key)
>> {
>> diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h
>> index dda76a05420b..bc34e33fef23 100644
>> --- a/arch/riscv/include/uapi/asm/hwprobe.h
>> +++ b/arch/riscv/include/uapi/asm/hwprobe.h
>> @@ -68,6 +68,7 @@ struct riscv_hwprobe {
>> #define RISCV_HWPROBE_MISALIGNED_UNSUPPORTED (4 << 0)
>> #define RISCV_HWPROBE_MISALIGNED_MASK (7 << 0)
>> #define RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE 6
>> +#define RISCV_HWPROBE_KEY_MISALIGNED_PERF 7
>> /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */
>>
>> /* Flags */
>> diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c
>> index 969ef3d59dbe..c8b7d57eb55e 100644
>> --- a/arch/riscv/kernel/sys_hwprobe.c
>> +++ b/arch/riscv/kernel/sys_hwprobe.c
>> @@ -208,6 +208,7 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair,
>> break;
>>
>> case RISCV_HWPROBE_KEY_CPUPERF_0:
>> + case RISCV_HWPROBE_KEY_MISALIGNED_PERF:
>> pair->value = hwprobe_misaligned(cpus);
>> break;
>>
On Mon, 03 Jun 2024 10:57:21 PDT (-0700), Palmer Dabbelt wrote:
> On Wed, 29 May 2024 20:36:45 PDT (-0700), [email protected] wrote:
>> On 2024/5/30 02:26, Evan Green wrote:
>>> RISCV_HWPROBE_KEY_CPUPERF_0 was mistakenly flagged as a bitmask in
>>> hwprobe_key_is_bitmask(), when in reality it was an enum value. This
>>> causes problems when used in conjunction with RISCV_HWPROBE_WHICH_CPUS,
>>> since SLOW, FAST, and EMULATED have values whose bits overlap with
>>> each other. If the caller asked for the set of CPUs that was SLOW or
>>> EMULATED, the returned set would also include CPUs that were FAST.
>>>
>>> Introduce a new hwprobe key, RISCV_HWPROBE_KEY_MISALIGNED_PERF, which
>>> returns the same values in response to a direct query (with no flags),
>>> but is properly handled as an enumerated value. As a result, SLOW,
>>> FAST, and EMULATED are all correctly treated as distinct values under
>>> the new key when queried with the WHICH_CPUS flag.
>>>
>>> Leave the old key in place to avoid disturbing applications which may
>>> have already come to rely on the broken behavior.
>>>
>>> Fixes: e178bf146e4b ("RISC-V: hwprobe: Introduce which-cpus flag")
>>> Signed-off-by: Evan Green <[email protected]>
>>>
>>> ---
>>>
>>>
>>> Note: Yangyu also has a fix out for this issue at [1]. That fix is much
>>> tidier, but comes with the slight risk that some very broken userspace
>>> application may break now that FAST cpus are not included for the query
>>> of which cpus are SLOW or EMULATED.
>>
>> Indeed. Since the value of FAST is 0b11, the SLOW and EMULATED are 0b10 and
>> 0b01 respectively.
>>
>> When this key is treated as a bitmask and query with
>> RISCV_HWPROBE_WHICH_CPUS if a CPU has a superset bitmask of the requested
>> value on the requested key, it will remain in the CPU mask. Otherwise, the
>> CPU will be clear in the CPU mask. But when a key is treated as a value, we
>> will just do a comparison. if it is not equal, then the CPU will be clear
>> in the CPU. That's why FAST cpus are included when querying with SLOW or
>> EMULATED with RISCV_HWPROBE_KEY_CPUPERF_0 key now.
>>
>> For me, deprecating the original hwprobe key and introducing a new key
>> would be a better solution than changing the behavior as my patch did.
>
> OK. I don't have a strong feeling either way: if someone has code that
> tries to read this as a btimask then it'd be broken, but it would
> technically be following the docs.
>
> That said, we're relying on this as a pretty core userspace portability
> construct. So maybe the right answer here is to just be really strict
> about compatibility and eat the pain when we make a mistake, just to
> make sure we set the right example about not breaking stuff.
>
> So unless anyone's opposed, I'll pick this up for 6.11.
Though a few of us were talking and it looks like it's probably best to
just make this extensible and put the vector stuff in here too. So
something like
diff --git a/Documentation/arch/riscv/hwprobe.rst b/Documentation/arch/riscv/hwprobe.rst
index df5045103e73..e74727e1b7c0 100644
--- a/Documentation/arch/riscv/hwprobe.rst
+++ b/Documentation/arch/riscv/hwprobe.rst
@@ -211,25 +211,30 @@ The following keys are defined:
:c:macro:`RISCV_HWPROBE_KEY_MISALIGNED_PERF`, but the key was mistakenly
classified as a bitmask rather than a value.
-* :c:macro:`RISCV_HWPROBE_KEY_MISALIGNED_PERF`: An enum value describing the
- performance of misaligned scalar accesses on the selected set of processors.
+* :c:macro:`RISCV_HWPROBE_KEY_MISALIGNED_PERF`: An set of enum values describing the
+ performance of misaligned accesses on the selected set of processors.
- * :c:macro:`RISCV_HWPROBE_MISALIGNED_UNKNOWN`: The performance of misaligned
+ * :c:macro: `RISCV_HWPROBE_SCALAR_MISALIGNED_MASK`: Describes the performance
+ of scalar misaligned accesses.
+
+ * :c:macro:`RISCV_HWPROBE_SCALAR_MISALIGNED_UNKNOWN`: The performance of misaligned
accesses is unknown.
- * :c:macro:`RISCV_HWPROBE_MISALIGNED_EMULATED`: Misaligned accesses are
+ * :c:macro:`RISCV_HWPROBE_SCALAR_MISALIGNED_EMULATED`: Misaligned accesses are
emulated via software, either in or below the kernel. These accesses are
always extremely slow.
- * :c:macro:`RISCV_HWPROBE_MISALIGNED_SLOW`: Misaligned accesses are slower
+ * :c:macro:`RISCV_HWPROBE_SCALAR_MISALIGNED_SLOW`: Misaligned accesses are slower
than equivalent byte accesses. Misaligned accesses may be supported
directly in hardware, or trapped and emulated by software.
- * :c:macro:`RISCV_HWPROBE_MISALIGNED_FAST`: Misaligned accesses are faster
+ * :c:macro:`RISCV_HWPROBE_SCALAR_MISALIGNED_FAST`: Misaligned accesses are faster
than equivalent byte accesses.
- * :c:macro:`RISCV_HWPROBE_MISALIGNED_UNSUPPORTED`: Misaligned accesses are
+ * :c:macro:`RISCV_HWPROBE_SCALAR_MISALIGNED_UNSUPPORTED`: Misaligned accesses are
not supported at all and will generate a misaligned address fault.
+ * :c:macro: `RISCV_HWPROBE_VECTOR_MISALIGNED_MASK`: ... vector ...
+
* :c:macro:`RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE`: An unsigned int which
represents the size of the Zicboz block in bytes.
Jesse is looking at the vector misaligned stuff that's showing up on the
hardware, so I'll just hold off on this for a bit -- that way we can get
the vector stuff sorted out at the same time.
>
>>> I wanted to get this fix out so that
>>> we have both as options, and can discuss. These fixes are mutually
>>> exclusive, don't take both.
>>
>> It's better to note this strange behavior on
>> Documentation/arch/riscv/hwprobe.rst so users can quickly understand the
>> differences on the behavior of these two keys.
>>
>> The C code part looks good to me.
>>
>>>
>>> [1] https://lore.kernel.org/linux-riscv/[email protected]/
>>>
>>> ---
>>> Documentation/arch/riscv/hwprobe.rst | 8 ++++++--
>>> arch/riscv/include/asm/hwprobe.h | 2 +-
>>> arch/riscv/include/uapi/asm/hwprobe.h | 1 +
>>> arch/riscv/kernel/sys_hwprobe.c | 1 +
>>> 4 files changed, 9 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/Documentation/arch/riscv/hwprobe.rst b/Documentation/arch/riscv/hwprobe.rst
>>> index 204cd4433af5..616ee372adaf 100644
>>> --- a/Documentation/arch/riscv/hwprobe.rst
>>> +++ b/Documentation/arch/riscv/hwprobe.rst
>>> @@ -192,8 +192,12 @@ The following keys are defined:
>>> supported as defined in the RISC-V ISA manual starting from commit
>>> d8ab5c78c207 ("Zihintpause is ratified").
>>>
>>> -* :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: A bitmask that contains performance
>>> - information about the selected set of processors.
>>> +* :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: Deprecated. Returns similar values to
>>> + :c:macro:`RISCV_HWPROBE_KEY_MISALIGNED_PERF`, but the key was mistakenly
>>> + classified as a bitmask rather than a value.
>>> +
>>> +* :c:macro:`RISCV_HWPROBE_KEY_MISALIGNED_PERF`: An enum value describing the
>>> + performance of misaligned scalar accesses on the selected set of processors.
>>>
>>> * :c:macro:`RISCV_HWPROBE_MISALIGNED_UNKNOWN`: The performance of misaligned
>>> accesses is unknown.
>>> diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h
>>> index 630507dff5ea..150a9877b0af 100644
>>> --- a/arch/riscv/include/asm/hwprobe.h
>>> +++ b/arch/riscv/include/asm/hwprobe.h
>>> @@ -8,7 +8,7 @@
>>>
>>> #include <uapi/asm/hwprobe.h>
>>>
>>> -#define RISCV_HWPROBE_MAX_KEY 6
>>> +#define RISCV_HWPROBE_MAX_KEY 7
>>>
>>> static inline bool riscv_hwprobe_key_is_valid(__s64 key)
>>> {
>>> diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h
>>> index dda76a05420b..bc34e33fef23 100644
>>> --- a/arch/riscv/include/uapi/asm/hwprobe.h
>>> +++ b/arch/riscv/include/uapi/asm/hwprobe.h
>>> @@ -68,6 +68,7 @@ struct riscv_hwprobe {
>>> #define RISCV_HWPROBE_MISALIGNED_UNSUPPORTED (4 << 0)
>>> #define RISCV_HWPROBE_MISALIGNED_MASK (7 << 0)
>>> #define RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE 6
>>> +#define RISCV_HWPROBE_KEY_MISALIGNED_PERF 7
>>> /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */
>>>
>>> /* Flags */
>>> diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c
>>> index 969ef3d59dbe..c8b7d57eb55e 100644
>>> --- a/arch/riscv/kernel/sys_hwprobe.c
>>> +++ b/arch/riscv/kernel/sys_hwprobe.c
>>> @@ -208,6 +208,7 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair,
>>> break;
>>>
>>> case RISCV_HWPROBE_KEY_CPUPERF_0:
>>> + case RISCV_HWPROBE_KEY_MISALIGNED_PERF:
>>> pair->value = hwprobe_misaligned(cpus);
>>> break;
>>>