2011-05-13 18:21:46

by Suresh Siddha

[permalink] [raw]
Subject: [patch 3/5] x86, x2apic: Track the x2apic cluster sibling map

From: Cyrill Gorcunov <[email protected]>
Subject: x86, x2apic: Track the x2apic cluster sibling map

In the case of x2apic cluster mode, we can group IPI register writes based on
the cluster group instead of individual per-cpu destination messages.

For this purpose, track the cpu's that belong to the same x2apic cluster.

Signed-off-by: Cyrill Gorcunov <[email protected]>
Signed-off-by: Suresh Siddha <[email protected]>
---
arch/x86/kernel/apic/x2apic_cluster.c | 72 +++++++++++++++++++++++++++++++++-
1 file changed, 70 insertions(+), 2 deletions(-)

Index: linux-2.6-tip/arch/x86/kernel/apic/x2apic_cluster.c
===================================================================
--- linux-2.6-tip.orig/arch/x86/kernel/apic/x2apic_cluster.c
+++ linux-2.6-tip/arch/x86/kernel/apic/x2apic_cluster.c
@@ -11,6 +11,7 @@
#include <asm/ipi.h>

static DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
+static DEFINE_PER_CPU(cpumask_var_t, cpus_in_cluster);

static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
@@ -48,6 +49,11 @@ static void
native_x2apic_icr_write(cfg, apicid);
}

+static inline u32 x2apic_cluster(int cpu)
+{
+ return per_cpu(x86_cpu_to_logical_apicid, cpu) >> 16;
+}
+
/*
* for now, we send the IPI's one by one in the cpumask.
* TBD: Based on the cpu mask, we can send the IPI's to the cluster group
@@ -163,14 +169,76 @@ static void x2apic_send_IPI_self(int vec

static void init_x2apic_ldr(void)
{
+ unsigned int this_cpu = smp_processor_id();
+ unsigned int cpu;
+
+ per_cpu(x86_cpu_to_logical_apicid, this_cpu) = apic_read(APIC_LDR);
+
+ __cpu_set(this_cpu, per_cpu(cpus_in_cluster, this_cpu));
+ for_each_online_cpu(cpu) {
+ if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu))
+ continue;
+ __cpu_set(this_cpu, per_cpu(cpus_in_cluster, cpu));
+ __cpu_set(cpu, per_cpu(cpus_in_cluster, this_cpu));
+ }
+}
+
+ /*
+ * At CPU state changes, update the x2apic cluster sibling info.
+ */
+static int __cpuinit
+update_clusterinfo(struct notifier_block *nfb, unsigned long action, void *hcpu)
+{
+ unsigned int this_cpu = (unsigned long)hcpu;
+ unsigned int cpu;
+ int err = 0;
+
+ switch (action) {
+ case CPU_UP_PREPARE:
+ if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, this_cpu),
+ GFP_KERNEL)) {
+ err = -ENOMEM;
+ }
+ break;
+ case CPU_UP_CANCELED:
+ case CPU_UP_CANCELED_FROZEN:
+ case CPU_DEAD:
+ for_each_online_cpu(cpu) {
+ if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu))
+ continue;
+ __cpu_clear(this_cpu, per_cpu(cpus_in_cluster, cpu));
+ __cpu_clear(cpu, per_cpu(cpus_in_cluster, this_cpu));
+ }
+ free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu));
+ break;
+ }
+
+ return notifier_from_errno(err);
+}
+
+static struct notifier_block __refdata x2apic_cpu_notifier = {
+ .notifier_call = update_clusterinfo,
+};
+
+static int x2apic_init_cpu_notifier(void)
+{
int cpu = smp_processor_id();

- per_cpu(x86_cpu_to_logical_apicid, cpu) = apic_read(APIC_LDR);
+ zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL);
+
+ BUG_ON(!per_cpu(cpus_in_cluster, cpu));
+
+ __cpu_set(cpu, per_cpu(cpus_in_cluster, cpu));
+ register_hotcpu_notifier(&x2apic_cpu_notifier);
+ return 1;
}

static int x2apic_cluster_probe(void)
{
- return x2apic_mode;
+ if (x2apic_mode)
+ return x2apic_init_cpu_notifier();
+ else
+ return 0;
}

struct apic apic_x2apic_cluster = {