2020-12-14 09:44:54

by Tony W Wang-oc

[permalink] [raw]
Subject: [PATCH] crypto: x86/crc32c-intel - Don't match some Zhaoxin CPUs

The driver crc32c-intel match CPUs supporting X86_FEATURE_XMM4_2.
On platforms with Zhaoxin CPUs supporting this X86 feature, when
crc32c-intel and crc32c-generic are both registered, system will
use crc32c-intel because its .cra_priority is greater than
crc32c-generic.

When doing lmbench3 Create and Delete file test on partitions with
ext4 enabling metadata checksum, found using crc32c-generic driver
could get about 20% performance gain than using the driver crc32c-intel
on some Zhaoxin CPUs.

This case expect to use crc32c-generic driver for these Zhaoxin CPUs
to get performance gain, so remove these Zhaoxin CPUs support from
crc32c-intel.

Signed-off-by: Tony W Wang-oc <[email protected]>
---
arch/x86/crypto/crc32c-intel_glue.c | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c
index feccb52..2cbbdde 100644
--- a/arch/x86/crypto/crc32c-intel_glue.c
+++ b/arch/x86/crypto/crc32c-intel_glue.c
@@ -215,6 +215,12 @@ static struct shash_alg alg = {
};

static const struct x86_cpu_id crc32c_cpu_id[] = {
+ X86_MATCH_VENDOR_FAM_FEATURE(ZHAOXIN, 0x6, X86_FEATURE_XMM4_2, 1),
+ X86_MATCH_VENDOR_FAM_MODEL_FEATURE(ZHAOXIN, 0x7, 0x1b, X86_FEATURE_XMM4_2, 1),
+ X86_MATCH_VENDOR_FAM_MODEL_FEATURE(ZHAOXIN, 0x7, 0x3b, X86_FEATURE_XMM4_2, 1),
+ X86_MATCH_VENDOR_FAM_FEATURE(CENTAUR, 0x6, X86_FEATURE_XMM4_2, 1),
+ X86_MATCH_VENDOR_FAM_MODEL_FEATURE(CENTAUR, 0x7, 0x1b, X86_FEATURE_XMM4_2, 1),
+ X86_MATCH_VENDOR_FAM_MODEL_FEATURE(CENTAUR, 0x7, 0x3b, X86_FEATURE_XMM4_2, 1),
X86_MATCH_FEATURE(X86_FEATURE_XMM4_2, NULL),
{}
};
@@ -222,7 +228,9 @@ MODULE_DEVICE_TABLE(x86cpu, crc32c_cpu_id);

static int __init crc32c_intel_mod_init(void)
{
- if (!x86_match_cpu(crc32c_cpu_id))
+ const struct x86_cpu_id *m = x86_match_cpu(crc32c_cpu_id);
+
+ if (!m || m->driver_data)
return -ENODEV;
#ifdef CONFIG_X86_64
if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) {
--
2.7.4


2020-12-15 09:01:13

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH] crypto: x86/crc32c-intel - Don't match some Zhaoxin CPUs

On Mon, Dec 14, 2020 at 11:59:52AM +0800, Tony W Wang-oc wrote:

Didn't I mention something about a comment?

> static const struct x86_cpu_id crc32c_cpu_id[] = {
> + X86_MATCH_VENDOR_FAM_FEATURE(ZHAOXIN, 0x6, X86_FEATURE_XMM4_2, 1),
> + X86_MATCH_VENDOR_FAM_MODEL_FEATURE(ZHAOXIN, 0x7, 0x1b, X86_FEATURE_XMM4_2, 1),
> + X86_MATCH_VENDOR_FAM_MODEL_FEATURE(ZHAOXIN, 0x7, 0x3b, X86_FEATURE_XMM4_2, 1),
> + X86_MATCH_VENDOR_FAM_FEATURE(CENTAUR, 0x6, X86_FEATURE_XMM4_2, 1),
> + X86_MATCH_VENDOR_FAM_MODEL_FEATURE(CENTAUR, 0x7, 0x1b, X86_FEATURE_XMM4_2, 1),
> + X86_MATCH_VENDOR_FAM_MODEL_FEATURE(CENTAUR, 0x7, 0x3b, X86_FEATURE_XMM4_2, 1),
> X86_MATCH_FEATURE(X86_FEATURE_XMM4_2, NULL),
> {}

Also, the above is weird in that is has the negative entries marked
positive, and 1/NULL are inconsistent.

Something like so then?

---

diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c
index feccb5254c7e..f6e6669a5102 100644
--- a/arch/x86/crypto/crc32c-intel_glue.c
+++ b/arch/x86/crypto/crc32c-intel_glue.c
@@ -215,14 +215,31 @@ static struct shash_alg alg = {
};

static const struct x86_cpu_id crc32c_cpu_id[] = {
- X86_MATCH_FEATURE(X86_FEATURE_XMM4_2, NULL),
+ /*
+ * Negative entries; exclude these chips from using this driver.
+ * They match the positive rule below, but their CRC32 instruction
+ * implementation is so slow, it doesn't merrit use.
+ */
+ X86_MATCH_VENDOR_FAM_FEATURE(ZHAOXIN, 0x6, X86_FEATURE_XMM4_2, false),
+ X86_MATCH_VENDOR_FAM_MODEL_FEATURE(ZHAOXIN, 0x7, 0x1b, X86_FEATURE_XMM4_2, false),
+ X86_MATCH_VENDOR_FAM_MODEL_FEATURE(ZHAOXIN, 0x7, 0x3b, X86_FEATURE_XMM4_2, false),
+ X86_MATCH_VENDOR_FAM_FEATURE(CENTAUR, 0x6, X86_FEATURE_XMM4_2, false),
+ X86_MATCH_VENDOR_FAM_MODEL_FEATURE(CENTAUR, 0x7, 0x1b, X86_FEATURE_XMM4_2, false),
+ X86_MATCH_VENDOR_FAM_MODEL_FEATURE(CENTAUR, 0x7, 0x3b, X86_FEATURE_XMM4_2, false),
+ /*
+ * Positive entry; SSE-4.2 instructions include special purpose CRC32
+ * instructions.
+ */
+ X86_MATCH_FEATURE(X86_FEATURE_XMM4_2, true),
{}
};
MODULE_DEVICE_TABLE(x86cpu, crc32c_cpu_id);

static int __init crc32c_intel_mod_init(void)
{
- if (!x86_match_cpu(crc32c_cpu_id))
+ const struct x86_cpu_id *m = x86_match_cpu(crc32c_cpu_id);
+
+ if (!m || !m->driver_data)
return -ENODEV;
#ifdef CONFIG_X86_64
if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) {

2020-12-15 10:04:33

by Tony W Wang-oc

[permalink] [raw]
Subject: Re: [PATCH] crypto: x86/crc32c-intel - Don't match some Zhaoxin CPUs


On 15/12/2020 16:58, Peter Zijlstra wrote:
> On Mon, Dec 14, 2020 at 11:59:52AM +0800, Tony W Wang-oc wrote:
>
> Didn't I mention something about a comment?
>
Really sorry for this.

>> static const struct x86_cpu_id crc32c_cpu_id[] = {
>> + X86_MATCH_VENDOR_FAM_FEATURE(ZHAOXIN, 0x6, X86_FEATURE_XMM4_2, 1),
>> + X86_MATCH_VENDOR_FAM_MODEL_FEATURE(ZHAOXIN, 0x7, 0x1b, X86_FEATURE_XMM4_2, 1),
>> + X86_MATCH_VENDOR_FAM_MODEL_FEATURE(ZHAOXIN, 0x7, 0x3b, X86_FEATURE_XMM4_2, 1),
>> + X86_MATCH_VENDOR_FAM_FEATURE(CENTAUR, 0x6, X86_FEATURE_XMM4_2, 1),
>> + X86_MATCH_VENDOR_FAM_MODEL_FEATURE(CENTAUR, 0x7, 0x1b, X86_FEATURE_XMM4_2, 1),
>> + X86_MATCH_VENDOR_FAM_MODEL_FEATURE(CENTAUR, 0x7, 0x3b, X86_FEATURE_XMM4_2, 1),
>> X86_MATCH_FEATURE(X86_FEATURE_XMM4_2, NULL),
>> {}
>
> Also, the above is weird in that is has the negative entries marked
> positive, and 1/NULL are inconsistent.
>
> Something like so then?
> That's better!

> ---
>
> diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c
> index feccb5254c7e..f6e6669a5102 100644
> --- a/arch/x86/crypto/crc32c-intel_glue.c
> +++ b/arch/x86/crypto/crc32c-intel_glue.c
> @@ -215,14 +215,31 @@ static struct shash_alg alg = {
> };
>
> static const struct x86_cpu_id crc32c_cpu_id[] = {
> - X86_MATCH_FEATURE(X86_FEATURE_XMM4_2, NULL),
> + /*
> + * Negative entries; exclude these chips from using this driver.
> + * They match the positive rule below, but their CRC32 instruction
> + * implementation is so slow, it doesn't merrit use.
Will fix the typo merrit -> merit and resend the patch.

Sincerely
Tony

> + */
> + X86_MATCH_VENDOR_FAM_FEATURE(ZHAOXIN, 0x6, X86_FEATURE_XMM4_2, false),
> + X86_MATCH_VENDOR_FAM_MODEL_FEATURE(ZHAOXIN, 0x7, 0x1b, X86_FEATURE_XMM4_2, false),
> + X86_MATCH_VENDOR_FAM_MODEL_FEATURE(ZHAOXIN, 0x7, 0x3b, X86_FEATURE_XMM4_2, false),
> + X86_MATCH_VENDOR_FAM_FEATURE(CENTAUR, 0x6, X86_FEATURE_XMM4_2, false),
> + X86_MATCH_VENDOR_FAM_MODEL_FEATURE(CENTAUR, 0x7, 0x1b, X86_FEATURE_XMM4_2, false),
> + X86_MATCH_VENDOR_FAM_MODEL_FEATURE(CENTAUR, 0x7, 0x3b, X86_FEATURE_XMM4_2, false),
> + /*
> + * Positive entry; SSE-4.2 instructions include special purpose CRC32
> + * instructions.
> + */
> + X86_MATCH_FEATURE(X86_FEATURE_XMM4_2, true),
> {}
> };
> MODULE_DEVICE_TABLE(x86cpu, crc32c_cpu_id);
>
> static int __init crc32c_intel_mod_init(void)
> {
> - if (!x86_match_cpu(crc32c_cpu_id))
> + const struct x86_cpu_id *m = x86_match_cpu(crc32c_cpu_id);
> +
> + if (!m || !m->driver_data)
> return -ENODEV;
> #ifdef CONFIG_X86_64
> if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) {
> .
>