2014-10-19 01:46:57

by Daniel J Blueman

[permalink] [raw]
Subject: [PATCH v2 1/5] Numachip: Fix build failure with trunk GCC

Fix APIC declaration to be consistent with definition; this addresses
a compilation failure with the development branch of GCC, see:
https://bugzilla.kernel.org/show_bug.cgi?id=78251

Signed-off-by: Daniel J Blueman <[email protected]>
---
arch/x86/kernel/apic/apic_numachip.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index ae91539..81d70ba 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -32,7 +32,7 @@

static int numachip_system __read_mostly;

-static const struct apic apic_numachip __read_mostly;
+static const struct apic apic_numachip __refconst;

static unsigned int get_apic_id(unsigned long x)
{
--
1.9.1


2014-10-19 01:47:07

by Daniel J Blueman

[permalink] [raw]
Subject: [PATCH v2 3/5] Numachip: Add safe is-present function

Add safe function to check if Numachip is detected, to be used elsewhere.

Signed-off-by: Daniel J Blueman <[email protected]>
---
arch/x86/include/asm/numachip/numachip.h | 9 +++++++++
arch/x86/kernel/apic/apic_numachip.c | 9 +++++++--
2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/numachip/numachip.h b/arch/x86/include/asm/numachip/numachip.h
index 1c6f7f6..3e1f4f9 100644
--- a/arch/x86/include/asm/numachip/numachip.h
+++ b/arch/x86/include/asm/numachip/numachip.h
@@ -16,4 +16,13 @@

extern int __init pci_numachip_init(void);

+#ifdef CONFIG_X86_NUMACHIP
+extern bool is_numachip_system(void);
+#else
+static inline bool is_numachip_system(void)
+{
+ return 0;
+}
+#endif
+
#endif /* _ASM_X86_NUMACHIP_NUMACHIP_H */
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index bd083c0..c965b69 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -30,7 +30,7 @@
#include <asm/apic_flat_64.h>
#include <asm/pgtable.h>

-static int numachip_system __read_mostly;
+static bool numachip_system __read_mostly;

static const struct apic apic_numachip __refconst;

@@ -173,11 +173,16 @@ static void fixup_cpu_id(struct cpuinfo_x86 *c, int node)
}
}

+bool is_numachip_system(void)
+{
+ return numachip_system;
+}
+
static int __init numachip_system_init(void)
{
unsigned int val;

- if (!numachip_system)
+ if (!is_numachip_system())
return 0;

x86_cpuinit.fixup_cpu_id = fixup_cpu_id;
--
1.9.1

2014-10-19 01:47:23

by Daniel J Blueman

[permalink] [raw]
Subject: [PATCH v2 5/5] Numachip: use 2GB memory block size

Use appropriate memory block size to reduce sysfs entry creation time
by 16x.

Boot-tested with the four permutations of X86_UV and X86_NUMACHIP.

Signed-off-by: Daniel J Blueman <[email protected]>
---
arch/x86/mm/init_64.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 5621c47..22ea6de 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -53,6 +53,7 @@
#include <asm/cacheflush.h>
#include <asm/init.h>
#include <asm/uv/uv.h>
+#include <asm/numachip/numachip.h>
#include <asm/setup.h>

#include "mm_internal.h"
@@ -1235,9 +1236,9 @@ static unsigned long probe_memory_block_size(void)
/* start from 2g */
unsigned long bz = 1UL<<31;

-#ifdef CONFIG_X86_UV
- if (is_uv_system()) {
- printk(KERN_INFO "UV: memory block size 2GB\n");
+#ifdef CONFIG_X86_64
+ if (is_uv_system() || is_numachip_system()) {
+ pr_info("Memory block size 2GB for large-SMP system\n");
return 2UL * 1024 * 1024 * 1024;
}
#endif
--
1.9.1

2014-10-19 01:47:21

by Daniel J Blueman

[permalink] [raw]
Subject: [PATCH v2 4/5] Numachip: APIC driver cleanups

Drop printing that serves no purpose, as it's printing fixed or known
values, and mark constant structure appropriately.

Signed-off-by: Daniel J Blueman <[email protected]>
---
arch/x86/kernel/apic/apic_numachip.c | 22 +++-------------------
arch/x86/pci/numachip.c | 2 +-
2 files changed, 4 insertions(+), 20 deletions(-)

diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index c965b69..6374d94 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -153,20 +153,8 @@ static int __init numachip_probe(void)
return apic == &apic_numachip;
}

-static void __init map_csrs(void)
-{
- printk(KERN_INFO "NumaChip: Mapping local CSR space (%016llx - %016llx)\n",
- NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_BASE + NUMACHIP_LCSR_SIZE - 1);
- init_extra_mapping_uc(NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_SIZE);
-
- printk(KERN_INFO "NumaChip: Mapping global CSR space (%016llx - %016llx)\n",
- NUMACHIP_GCSR_BASE, NUMACHIP_GCSR_BASE + NUMACHIP_GCSR_SIZE - 1);
- init_extra_mapping_uc(NUMACHIP_GCSR_BASE, NUMACHIP_GCSR_SIZE);
-}
-
static void fixup_cpu_id(struct cpuinfo_x86 *c, int node)
{
-
if (c->phys_proc_id != node) {
c->phys_proc_id = node;
per_cpu(cpu_llc_id, smp_processor_id()) = node;
@@ -180,19 +168,15 @@ bool is_numachip_system(void)

static int __init numachip_system_init(void)
{
- unsigned int val;
-
if (!is_numachip_system())
return 0;

+ init_extra_mapping_uc(NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_SIZE);
+ init_extra_mapping_uc(NUMACHIP_GCSR_BASE, NUMACHIP_GCSR_SIZE);
+
x86_cpuinit.fixup_cpu_id = fixup_cpu_id;
x86_init.pci.arch_init = pci_numachip_init;

- map_csrs();
-
- val = read_lcsr(CSR_G0_NODE_IDS);
- printk(KERN_INFO "NumaChip: Local NodeID = %08x\n", val);
-
return 0;
}
early_initcall(numachip_system_init);
diff --git a/arch/x86/pci/numachip.c b/arch/x86/pci/numachip.c
index 7307d9d..2e565e6 100644
--- a/arch/x86/pci/numachip.c
+++ b/arch/x86/pci/numachip.c
@@ -103,7 +103,7 @@ static int pci_mmcfg_write_numachip(unsigned int seg, unsigned int bus,
return 0;
}

-const struct pci_raw_ops pci_mmcfg_numachip = {
+static const struct pci_raw_ops pci_mmcfg_numachip = {
.read = pci_mmcfg_read_numachip,
.write = pci_mmcfg_write_numachip,
};
--
1.9.1

2014-10-19 01:47:20

by Daniel J Blueman

[permalink] [raw]
Subject: [PATCH v2 2/5] Numachip: APIC fixes

Fix 16-bit APIC ID truncation and redundant APIC ICR idle polling for IPI
to self (AMD64 APICs are documented in the system developer manuals to
queue APIC writes).

Signed-off-by: Daniel J Blueman <[email protected]>
---
arch/x86/kernel/apic/apic_numachip.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index 81d70ba..bd083c0 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -40,7 +40,7 @@ static unsigned int get_apic_id(unsigned long x)
unsigned int id;

rdmsrl(MSR_FAM10H_NODE_ID, value);
- id = ((x >> 24) & 0xffU) | ((value << 2) & 0x3f00U);
+ id = ((x >> 24) & 0xffU) | ((value << 2) & 0xff00U);

return id;
}
@@ -145,7 +145,7 @@ static void numachip_send_IPI_all(int vector)

static void numachip_send_IPI_self(int vector)
{
- __default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
+ apic_write(APIC_SELF_IPI, vector);
}

static int __init numachip_probe(void)
--
1.9.1

2014-10-19 09:23:32

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH v2 5/5] Numachip: use 2GB memory block size


* Daniel J Blueman <[email protected]> wrote:

> Use appropriate memory block size to reduce sysfs entry creation time
> by 16x.
>
> Boot-tested with the four permutations of X86_UV and X86_NUMACHIP.
>
> Signed-off-by: Daniel J Blueman <[email protected]>
> ---
> arch/x86/mm/init_64.c | 7 ++++---
> 1 file changed, 4 insertions(+), 3 deletions(-)
>
> diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
> index 5621c47..22ea6de 100644
> --- a/arch/x86/mm/init_64.c
> +++ b/arch/x86/mm/init_64.c
> @@ -53,6 +53,7 @@
> #include <asm/cacheflush.h>
> #include <asm/init.h>
> #include <asm/uv/uv.h>
> +#include <asm/numachip/numachip.h>
> #include <asm/setup.h>
>
> #include "mm_internal.h"
> @@ -1235,9 +1236,9 @@ static unsigned long probe_memory_block_size(void)
> /* start from 2g */
> unsigned long bz = 1UL<<31;
>
> -#ifdef CONFIG_X86_UV
> - if (is_uv_system()) {
> - printk(KERN_INFO "UV: memory block size 2GB\n");
> +#ifdef CONFIG_X86_64
> + if (is_uv_system() || is_numachip_system()) {
> + pr_info("Memory block size 2GB for large-SMP system\n");
> return 2UL * 1024 * 1024 * 1024;

It would be a lot cleaner and more robust to have a more
intelligent decision here.

Is there a reliable indicator for large 'sysfs entry creation
time', such as a lot of RAM present?

Also, it would be nice to list the pros/cons of this change, an
advantage is reduced overhead - what are the disadvantages?

Thanks,

Ingo

2014-10-19 09:24:55

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH v2 2/5] Numachip: APIC fixes


* Daniel J Blueman <[email protected]> wrote:

> Fix 16-bit APIC ID truncation and redundant APIC ICR idle polling for IPI
> to self (AMD64 APICs are documented in the system developer manuals to
> queue APIC writes).

That is again two fixes - there should be one fix per patch, with
a good description of what the effects of the bug were and what
the advantages of the change are - for each change separately.

Thanks,

Ingo

2014-10-20 07:04:14

by Daniel J Blueman

[permalink] [raw]
Subject: Re: [PATCH v2 5/5] Numachip: use 2GB memory block size

On 19/10/2014 17:23, Ingo Molnar wrote:
>
> * Daniel J Blueman <[email protected]> wrote:
>
>> Use appropriate memory block size to reduce sysfs entry creation time
>> by 16x.
>>
>> Boot-tested with the four permutations of X86_UV and X86_NUMACHIP.
>>
>> Signed-off-by: Daniel J Blueman <[email protected]>
>> ---
>> arch/x86/mm/init_64.c | 7 ++++---
>> 1 file changed, 4 insertions(+), 3 deletions(-)
>>
>> diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
>> index 5621c47..22ea6de 100644
>> --- a/arch/x86/mm/init_64.c
>> +++ b/arch/x86/mm/init_64.c
>> @@ -53,6 +53,7 @@
>> #include <asm/cacheflush.h>
>> #include <asm/init.h>
>> #include <asm/uv/uv.h>
>> +#include <asm/numachip/numachip.h>
>> #include <asm/setup.h>
>>
>> #include "mm_internal.h"
>> @@ -1235,9 +1236,9 @@ static unsigned long probe_memory_block_size(void)
>> /* start from 2g */
>> unsigned long bz = 1UL<<31;
>>
>> -#ifdef CONFIG_X86_UV
>> - if (is_uv_system()) {
>> - printk(KERN_INFO "UV: memory block size 2GB\n");
>> +#ifdef CONFIG_X86_64
>> + if (is_uv_system() || is_numachip_system()) {
>> + pr_info("Memory block size 2GB for large-SMP system\n");
>> return 2UL * 1024 * 1024 * 1024;
>
> It would be a lot cleaner and more robust to have a more
> intelligent decision here.
>
> Is there a reliable indicator for large 'sysfs entry creation
> time', such as a lot of RAM present?

Yes, agreed exactly.

> Also, it would be nice to list the pros/cons of this change, an
> advantage is reduced overhead - what are the disadvantages?

The single disadvantage is that small-memory systems won't be able to
have finer control of memory offlining, though the impact of that depend
on why the user is offlining memory of course.

If it seems reasonable for x86-64 systems with >64GB memory to have 2GB
memory block sizes, I could prepare that change instead and document the
above if preferred?

Thanks,
Daniel
--
Daniel J Blueman
Principal Software Engineer, Numascale

2014-10-20 12:02:51

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH v2 5/5] Numachip: use 2GB memory block size


* Daniel J Blueman <[email protected]> wrote:

> On 19/10/2014 17:23, Ingo Molnar wrote:
> >
> >* Daniel J Blueman <[email protected]> wrote:
> >
> >>Use appropriate memory block size to reduce sysfs entry creation time
> >>by 16x.
> >>
> >>Boot-tested with the four permutations of X86_UV and X86_NUMACHIP.
> >>
> >>Signed-off-by: Daniel J Blueman <[email protected]>
> >>---
> >> arch/x86/mm/init_64.c | 7 ++++---
> >> 1 file changed, 4 insertions(+), 3 deletions(-)
> >>
> >>diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
> >>index 5621c47..22ea6de 100644
> >>--- a/arch/x86/mm/init_64.c
> >>+++ b/arch/x86/mm/init_64.c
> >>@@ -53,6 +53,7 @@
> >> #include <asm/cacheflush.h>
> >> #include <asm/init.h>
> >> #include <asm/uv/uv.h>
> >>+#include <asm/numachip/numachip.h>
> >> #include <asm/setup.h>
> >>
> >> #include "mm_internal.h"
> >>@@ -1235,9 +1236,9 @@ static unsigned long probe_memory_block_size(void)
> >> /* start from 2g */
> >> unsigned long bz = 1UL<<31;
> >>
> >>-#ifdef CONFIG_X86_UV
> >>- if (is_uv_system()) {
> >>- printk(KERN_INFO "UV: memory block size 2GB\n");
> >>+#ifdef CONFIG_X86_64
> >>+ if (is_uv_system() || is_numachip_system()) {
> >>+ pr_info("Memory block size 2GB for large-SMP system\n");
> >> return 2UL * 1024 * 1024 * 1024;
> >
> >It would be a lot cleaner and more robust to have a more
> >intelligent decision here.
> >
> >Is there a reliable indicator for large 'sysfs entry creation
> >time', such as a lot of RAM present?
>
> Yes, agreed exactly.
>
> > Also, it would be nice to list the pros/cons of this change,
> > an advantage is reduced overhead - what are the
> > disadvantages?
>
> The single disadvantage is that small-memory systems won't be
> able to have finer control of memory offlining, though the
> impact of that depend on why the user is offlining memory of
> course.
>
> If it seems reasonable for x86-64 systems with >64GB memory to
> have 2GB memory block sizes, I could prepare that change
> instead and document the above if preferred?

I'd make it >= 64GB, but yes, that sounds like a good limit.

Thanks,

Ingo