2008-03-24 18:23:36

by tip-bot for Jack Steiner

[permalink] [raw]
Subject: [RFC 8/8] x86_64: Support for new UV apic


UV supports really big systems. So big, in fact, that the APICID register
does not contain enough bits to contain an APICID that is unique across all
cpus.

The UV BIOS supports 3 APICID modes:

- legacy mode. This mode uses the old APIC mode where
APICID is in bits [31:24] of the APICID register.

- x2apic mode. This mode is whitebox-compatible. APICIDs
are unique across all cpus. Standard x2apic APIC operations
(Intel-defined) can be used for IPIs. The node identifier
fits within the Intel-defined portion of the APICID register.

- x2apic-uv mode. In this mode, the APICIDs on each node have
unique IDs, but IDs on different node are not unique. For example,
if each mode has 32 cpus, the APICIDs on each node might be
0 - 31. Every node has the same set of IDs.
The UV hub is used to route IPIs/interrupts to the correct node.
Traditional APIC operations WILL NOT WORK.

In x2apic-uv mode, the ACPI tables all contain a full unique ID (note:
exact bit layout still changing but the following is close):

nnnnnnnnnnlc0cch
n = unique node number
l = socket number on board
c = core
h = hyperthread

Only the "lc0cch" bits are written to the APICID register. The remaining bits are
supplied by having the get_apic_id() function "OR" the extra bits into the value
read from the APICID register. (Hmmm.. why not keep the ENTIRE APICID register
in per-cpu data....)

The x2apic-uv mode is recognized by the MADT table containing:
oem_id = "SGI"
oem_table_id = "UV-X"


(NOTE: a work-in-progress. Pieces missing....)


Signed-off-by: Jack Steiner <[email protected]>

---
arch/x86/kernel/Makefile | 2
arch/x86/kernel/genapic_64.c | 15 +
arch/x86/kernel/genx2apic_uv_x.c | 305 +++++++++++++++++++++++++++++++++++++++
arch/x86/kernel/setup64.c | 4
arch/x86/kernel/smpboot_64.c | 7
include/asm-x86/genapic_64.h | 5
6 files changed, 335 insertions(+), 3 deletions(-)

Index: linux/arch/x86/kernel/genapic_64.c
===================================================================
--- linux.orig/arch/x86/kernel/genapic_64.c 2008-03-21 15:37:05.000000000 -0500
+++ linux/arch/x86/kernel/genapic_64.c 2008-03-21 15:49:38.000000000 -0500
@@ -30,6 +30,7 @@ u16 x86_cpu_to_apicid_init[NR_CPUS] __in
void *x86_cpu_to_apicid_early_ptr;
DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID;
EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
+DEFINE_PER_CPU(int, x2apic_extra_bits);

struct genapic __read_mostly *genapic = &apic_flat;

@@ -40,6 +41,9 @@ static enum uv_system_type uv_system_typ
*/
void __init setup_apic_routing(void)
{
+ if (uv_system_type == UV_NON_UNIQUE_APIC)
+ genapic = &apic_x2apic_uv_x;
+ else
#ifdef CONFIG_ACPI
/*
* Quirk: some x86_64 machines can only use physical APIC mode
@@ -69,7 +73,16 @@ void send_IPI_self(int vector)

unsigned int get_apic_id(void)
{
- return (apic_read(APIC_ID) >> 24) & 0xFFu;
+ unsigned int id;
+
+ preempt_disable();
+ id = apic_read(APIC_ID);
+ if (uv_system_type >= UV_X2APIC)
+ id |= __get_cpu_var(x2apic_extra_bits);
+ else
+ id = (id >> 24) & 0xFFu;;
+ preempt_enable();
+ return id;
}

int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
Index: linux/arch/x86/kernel/genx2apic_uv_x.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux/arch/x86/kernel/genx2apic_uv_x.c 2008-03-24 09:21:56.000000000 -0500
@@ -0,0 +1,305 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * SGI UV APIC functions (note: not an Intel compatible APIC)
+ *
+ * Copyright (C) 2007 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/bootmem.h>
+#include <linux/module.h>
+#include <asm/smp.h>
+#include <asm/ipi.h>
+#include <asm/genapic.h>
+#include <asm/uv_mmrs.h>
+#include <asm/uv_hub.h>
+
+DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
+EXPORT_PER_CPU_SYMBOL(__uv_hub_info);
+
+struct uv_blade_info *uv_blade_info;
+EXPORT_SYMBOL_GPL(uv_blade_info);
+
+short *uv_node_to_blade;
+EXPORT_SYMBOL_GPL(uv_node_to_blade);
+
+short *uv_cpu_to_blade;
+EXPORT_SYMBOL_GPL(uv_cpu_to_blade);
+
+short uv_possible_blades;
+EXPORT_SYMBOL_GPL(uv_possible_blades);
+
+/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
+/* Probably incorrect for UV ZZZ */
+
+static cpumask_t uv_target_cpus(void)
+{
+ return cpumask_of_cpu(0);
+}
+
+static cpumask_t uv_vector_allocation_domain(int cpu)
+{
+ cpumask_t domain = CPU_MASK_NONE;
+ cpu_set(cpu, domain);
+ return domain;
+}
+
+int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip)
+{
+ unsigned long val;
+ int nasid;
+
+ printk(KERN_DEBUG "ZZZZZZZZZZZ send SIPI to apicid 0x%x, start 0x%x\n",
+ phys_apicid, start_rip);
+ nasid = uv_apicid_to_nasid(phys_apicid);
+ val = (1UL << UVH_IPI_INT_SEND_SHFT) |
+ (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
+ (((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
+ (6 << UVH_IPI_INT_DELIVERY_MODE_SHFT);
+ printk(KERN_DEBUG "ZZZZZZZZZZZ nasid %d, val 0x%lx\n", nasid, val);
+ uv_write_global_mmr64(nasid, UVH_IPI_INT, val);
+ return 0;
+}
+
+static void uv_send_IPI_one(int cpu, int vector)
+{
+ unsigned long val, apicid;
+ int nasid;
+
+ apicid = per_cpu(x86_cpu_to_apicid, cpu); /* ZZZ - cache node-local ? */
+ nasid = uv_apicid_to_nasid(apicid);
+ val =
+ (1UL << UVH_IPI_INT_SEND_SHFT) | (apicid <<
+ UVH_IPI_INT_APIC_ID_SHFT) |
+ (vector << UVH_IPI_INT_VECTOR_SHFT);
+ uv_write_global_mmr64(nasid, UVH_IPI_INT, val);
+ printk(KERN_DEBUG
+ "UV: IPI to cpu %d, apicid 0x%lx, vec %d, nasid%d, val 0x%lx\n",
+ cpu, apicid, vector, nasid, val);
+}
+
+static void uv_send_IPI_mask(cpumask_t mask, int vector)
+{
+ unsigned long flags;
+ unsigned int cpu;
+
+ local_irq_save(flags);
+ for (cpu = 0; cpu < NR_CPUS; ++cpu)
+ if (cpu_isset(cpu, mask))
+ uv_send_IPI_one(cpu, vector);
+ local_irq_restore(flags);
+}
+
+static void uv_send_IPI_allbutself(int vector)
+{
+ cpumask_t mask = cpu_online_map;
+
+ cpu_clear(smp_processor_id(), mask);
+
+ if (!cpus_empty(mask))
+ uv_send_IPI_mask(mask, vector);
+}
+
+static void uv_send_IPI_all(int vector)
+{
+ uv_send_IPI_mask(cpu_online_map, vector);
+}
+
+static int uv_apic_id_registered(void)
+{
+ return 1;
+}
+
+static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
+{
+ int cpu;
+
+ /*
+ * We're using fixed IRQ delivery, can only return one phys APIC ID.
+ * May as well be the first.
+ */
+ cpu = first_cpu(cpumask);
+ if ((unsigned)cpu < NR_CPUS)
+ return per_cpu(x86_cpu_to_apicid, cpu);
+ else
+ return BAD_APICID;
+}
+
+static unsigned int phys_pkg_id(int index_msb)
+{
+ return get_apic_id() >> index_msb;
+}
+
+#ifdef ZZZ
+static void uv_send_IPI_self(int vector)
+{
+ apic_write(APIC_SELF_IPI, vector);
+}
+#endif
+
+struct genapic apic_x2apic_uv_x = {
+ .name = "UV large system",
+ .int_delivery_mode = dest_Fixed,
+ .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
+ .target_cpus = uv_target_cpus,
+ .vector_allocation_domain = uv_vector_allocation_domain,/* Fixme ZZZ */
+ .apic_id_registered = uv_apic_id_registered,
+ .send_IPI_all = uv_send_IPI_all,
+ .send_IPI_allbutself = uv_send_IPI_allbutself,
+ .send_IPI_mask = uv_send_IPI_mask,
+ /* ZZZ.send_IPI_self = uv_send_IPI_self, */
+ .cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
+ .phys_pkg_id = phys_pkg_id, /* Fixme ZZZ */
+};
+
+static __cpuinit void set_x2apic_extra_bits(int nasid)
+{
+ __get_cpu_var(x2apic_extra_bits) = ((nasid >> 1) << 6);
+}
+
+/*
+ * Called on boot cpu.
+ */
+static __init void uv_system_init(void)
+{
+ union uvh_si_addr_map_config_u m_n_config;
+ int bytes, nid, cpu, lcpu, nasid, last_nasid, blade;
+ unsigned long mmr_base;
+
+ m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG);
+ mmr_base =
+ uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) &
+ ~UV_MMR_ENABLE;
+ printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base);
+
+ last_nasid = -1;
+ for_each_possible_cpu(cpu) {
+ nid = cpu_to_node(cpu);
+ nasid = uv_apicid_to_nasid(per_cpu(x86_cpu_to_apicid, cpu));
+ if (nasid != last_nasid)
+ uv_possible_blades++;
+ last_nasid = nasid;
+ }
+ printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades());
+
+ bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
+ uv_blade_info = alloc_bootmem_pages(bytes);
+ memset(uv_blade_info, 255, bytes);
+
+ bytes = sizeof(uv_node_to_blade[0]) * num_possible_nodes();
+ uv_node_to_blade = alloc_bootmem_pages(bytes);
+ memset(uv_node_to_blade, 255, bytes);
+
+ bytes = sizeof(uv_cpu_to_blade[0]) * num_possible_cpus();
+ uv_cpu_to_blade = alloc_bootmem_pages(bytes);
+ memset(uv_cpu_to_blade, 255, bytes);
+
+ last_nasid = -1;
+ blade = -1;
+ lcpu = -1;
+ for_each_possible_cpu(cpu) {
+ nid = cpu_to_node(cpu);
+ nasid = uv_apicid_to_nasid(per_cpu(x86_cpu_to_apicid, cpu));
+ if (nasid != last_nasid) {
+ blade++;
+ lcpu = -1;
+ uv_blade_info[blade].nr_posible_cpus = 0;
+ uv_blade_info[blade].nr_online_cpus = 0;
+ }
+ last_nasid = nasid;
+ lcpu++;
+
+ uv_cpu_hub_info(cpu)->m_val = m_n_config.s.m_skt;
+ uv_cpu_hub_info(cpu)->n_val = m_n_config.s.n_skt;
+ uv_cpu_hub_info(cpu)->numa_blade_id = blade;
+ uv_cpu_hub_info(cpu)->blade_processor_id = lcpu;
+ uv_cpu_hub_info(cpu)->local_nasid = nasid;
+ uv_cpu_hub_info(cpu)->gnode_upper =
+ nasid & ~((1 << uv_hub_info->n_val) - 1);
+ uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
+ uv_cpu_hub_info(cpu)->coherency_domain_number = 0;/* ZZZ */
+ uv_blade_info[blade].nasid = nasid;
+ uv_blade_info[blade].nr_posible_cpus++;
+ uv_node_to_blade[nid] = blade;
+ uv_cpu_to_blade[cpu] = blade;
+
+ printk(KERN_DEBUG "UV cpu %d, apicid 0x%x, nasid %d, nid %d\n",
+ cpu, per_cpu(x86_cpu_to_apicid, cpu), nasid, nid);
+ printk(KERN_DEBUG "UV lcpu %d, blade %d\n", lcpu, blade);
+
+#ifdef ZZZ
+ printk(KERN_DEBUG "UV ZZZZ nasid %d\n", nasid);
+ printk(KERN_DEBUG "UV ZZZ local paddr %p\n",
+ __pa(uv_local_mmr_address
+ (UVH_LB_BAU_SB_DESCRIPTOR_BASE)));
+ printk(KERN_DEBUG "UV ZZZ global paddr %p\n",
+ __pa(uv_global_mmr64_address
+ (nasid, UVH_LB_BAU_SB_DESCRIPTOR_BASE)));
+ printk(KERN_DEBUG "UV ZZZ global32 paddr %p\n",
+ __pa(uv_global_mmr32_address
+ (nasid, UVH_LB_BAU_SB_DESCRIPTOR_BASE_32)));
+ printk(KERN_DEBUG "UV ZZZ local addr %p\n",
+ uv_local_mmr_address(UVH_LB_BAU_SB_DESCRIPTOR_BASE));
+ printk(KERN_DEBUG "UV ZZZ global addr %p\n",
+ uv_global_mmr64_address(nasid,
+ UVH_LB_BAU_SB_DESCRIPTOR_BASE));
+ printk(KERN_DEBUG "UV ZZZ global32 addr %p\n",
+ uv_global_mmr32_address(nasid,
+ UVH_LB_BAU_SB_DESCRIPTOR_BASE_32));
+
+ printk(KERN_DEBUG "UV ZZZ local 0x%lx\n",
+ uv_read_local_mmr(UVH_LB_BAU_SB_DESCRIPTOR_BASE));
+ printk(KERN_DEBUG "UV ZZZ global 0x%lx\n",
+ uv_read_global_mmr64(nasid,
+ UVH_LB_BAU_SB_DESCRIPTOR_BASE));
+ printk(KERN_DEBUG "UV ZZZ global32 0x%lx\n",
+ uv_read_global_mmr32(nasid,
+ UVH_LB_BAU_SB_DESCRIPTOR_BASE_32));
+#endif
+ }
+}
+
+/*
+ * Called on each cpu to initialize the per_cpu UV data area.
+ */
+void __cpuinit uv_cpu_init(void)
+{
+ if (!uv_node_to_blade)
+ uv_system_init();
+
+ uv_blade_info[uv_numa_blade_id()].nr_online_cpus++;
+
+ if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
+ set_x2apic_extra_bits(uv_hub_info->local_nasid);
+
+#ifndef ZZZ
+ printk(KERN_DEBUG
+ "UV cpu %d, lcpu %d, blade %d, nasid %d/%d/%d, possible %d,"
+ " online %d, cputoblade %d, uv_node_to_blade_id %d\n",
+ smp_processor_id(), uv_blade_processor_id(), uv_numa_blade_id(),
+ uv_blade_to_nasid(uv_numa_blade_id()),
+ uv_cpu_to_nasid(smp_processor_id()),
+ uv_node_to_nasid(numa_node_id()),
+ uv_blade_nr_possible_cpus(uv_numa_blade_id()),
+ uv_blade_nr_online_cpus(uv_numa_blade_id()),
+ uv_cpu_to_blade_id(smp_processor_id()),
+ uv_node_to_blade_id(numa_node_id()));
+
+ printk(KERN_DEBUG
+ "UV cpu %d: hdw_apic_id 0x%x, extra_apic 0x%x, nasid 0x%x, "
+ "M %d, N %d, nasid_h 0x%x, mmrs 0x%lx\n",
+ smp_processor_id(), apic_read(APIC_ID),
+ __get_cpu_var(x2apic_extra_bits), uv_hub_info->local_nasid,
+ uv_hub_info->m_val, uv_hub_info->n_val, uv_hub_info->gnode_upper,
+ uv_hub_info->global_mmr_base);
+#endif
+}
Index: linux/arch/x86/kernel/setup64.c
===================================================================
--- linux.orig/arch/x86/kernel/setup64.c 2008-03-21 15:36:35.000000000 -0500
+++ linux/arch/x86/kernel/setup64.c 2008-03-21 15:49:38.000000000 -0500
@@ -24,6 +24,7 @@
#include <asm/proto.h>
#include <asm/sections.h>
#include <asm/setup.h>
+#include <asm/genapic.h>

#ifndef CONFIG_DEBUG_BOOT_PARAMS
struct boot_params __initdata boot_params;
@@ -355,4 +356,7 @@ void __cpuinit cpu_init (void)
fpu_init();

raw_local_save_flags(kernel_eflags);
+
+ if (is_uv_system())
+ uv_cpu_init();
}
Index: linux/arch/x86/kernel/Makefile
===================================================================
--- linux.orig/arch/x86/kernel/Makefile 2008-03-21 15:36:35.000000000 -0500
+++ linux/arch/x86/kernel/Makefile 2008-03-21 15:49:38.000000000 -0500
@@ -90,7 +90,7 @@ scx200-y += scx200_32.o
###
# 64 bit specific files
ifeq ($(CONFIG_X86_64),y)
- obj-y += genapic_64.o genapic_flat_64.o
+ obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o
obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
obj-$(CONFIG_AUDIT) += audit_64.o

Index: linux/arch/x86/kernel/smpboot_64.c
===================================================================
--- linux.orig/arch/x86/kernel/smpboot_64.c 2008-03-21 15:36:45.000000000 -0500
+++ linux/arch/x86/kernel/smpboot_64.c 2008-03-21 15:49:38.000000000 -0500
@@ -60,6 +60,7 @@
#include <asm/hw_irq.h>
#include <asm/numa.h>
#include <asm/trampoline.h>
+#include <asm/genapic.h>

/* Number of siblings per CPU package */
int smp_num_siblings = 1;
@@ -418,6 +419,9 @@ static int __cpuinit wakeup_secondary_vi
unsigned long send_status, accept_status = 0;
int maxlvt, num_starts, j;

+ if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
+ return uv_wakeup_secondary(phys_apicid, start_rip);
+
Dprintk("Asserting INIT.\n");

/*
@@ -679,7 +683,8 @@ do_rest:
/* trampoline code not run */
printk("Not responding.\n");
#ifdef APIC_DEBUG
- inquire_remote_apic(apicid);
+ if (get_uv_system_type() != UV_NON_UNIQUE_APIC)
+ inquire_remote_apic(apicid);
#endif
}
}
Index: linux/include/asm-x86/genapic_64.h
===================================================================
--- linux.orig/include/asm-x86/genapic_64.h 2008-03-21 15:37:05.000000000 -0500
+++ linux/include/asm-x86/genapic_64.h 2008-03-21 15:49:38.000000000 -0500
@@ -39,4 +39,9 @@ enum uv_system_type {UV_NONE, UV_LEGACY_
extern enum uv_system_type get_uv_system_type(void);
extern int is_uv_system(void);

+extern struct genapic apic_x2apic_uv_x;
+DECLARE_PER_CPU(int, x2apic_extra_bits);
+extern void uv_cpu_init(void);
+extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip);
+
#endif


2008-03-25 10:25:45

by Andi Kleen

[permalink] [raw]
Subject: Re: [RFC 8/8] x86_64: Support for new UV apic

Jack Steiner <[email protected]> writes:

> unsigned int get_apic_id(void)
> {
> - return (apic_read(APIC_ID) >> 24) & 0xFFu;
> + unsigned int id;
> +
> + preempt_disable();
> + id = apic_read(APIC_ID);
> + if (uv_system_type >= UV_X2APIC)
> + id |= __get_cpu_var(x2apic_extra_bits);
> + else
> + id = (id >> 24) & 0xFFu;;
> + preempt_enable();
> + return id;

Really caller should have done preempt_disable(), otherwise
the value can be wrong as soon as you return.

Better probably to just WARN_ON if preemption is on

(just be careful it does not trigger in oopses and machine checks)

> +
> +DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
> +EXPORT_PER_CPU_SYMBOL(__uv_hub_info);

GPL export too?

> +
> +struct uv_blade_info *uv_blade_info;
> +EXPORT_SYMBOL_GPL(uv_blade_info);
> +
> +short *uv_node_to_blade;
> +EXPORT_SYMBOL_GPL(uv_node_to_blade);
> +
> +short *uv_cpu_to_blade;
> +EXPORT_SYMBOL_GPL(uv_cpu_to_blade);
> +
> +short uv_possible_blades;
> +EXPORT_SYMBOL_GPL(uv_possible_blades);
> +
> +/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
> +/* Probably incorrect for UV ZZZ */

Actually it should be correct. Except for UV you likely really need a
NUMA aware irqbalanced. I used to have some old very hackish patches
to implement that in irqbalanced, but never pushed it because the
systems I was working on didn't really need it.


> +
> +static void uv_send_IPI_one(int cpu, int vector)
> +{
> + unsigned long val, apicid;
> + int nasid;
> +
> + apicid = per_cpu(x86_cpu_to_apicid, cpu); /* ZZZ - cache node-local ? */

Instead of doing that it might be better to implement __read_mostly per CPU variables
(should not be very hard)

> +static void uv_send_IPI_mask(cpumask_t mask, int vector)
> +{
> + unsigned long flags;
> + unsigned int cpu;
> +
> + local_irq_save(flags);
> + for (cpu = 0; cpu < NR_CPUS; ++cpu)
> + if (cpu_isset(cpu, mask))
> + uv_send_IPI_one(cpu, vector);
> + local_irq_restore(flags);

This could disable interrupts for a long time could't it? Really needed?


> + bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
> + uv_blade_info = alloc_bootmem_pages(bytes);
> + memset(uv_blade_info, 255, bytes);

255? Strange poison value.

> ===================================================================
> --- linux.orig/arch/x86/kernel/Makefile 2008-03-21 15:36:35.000000000 -0500
> +++ linux/arch/x86/kernel/Makefile 2008-03-21 15:49:38.000000000 -0500
> @@ -90,7 +90,7 @@ scx200-y += scx200_32.o
> ###
> # 64 bit specific files
> ifeq ($(CONFIG_X86_64),y)
> - obj-y += genapic_64.o genapic_flat_64.o
> + obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o

Definitely should be a CONFIG

> @@ -418,6 +419,9 @@ static int __cpuinit wakeup_secondary_vi
> unsigned long send_status, accept_status = 0;
> int maxlvt, num_starts, j;
>
> + if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
> + return uv_wakeup_secondary(phys_apicid, start_rip);
> +

This should be probably factored properly (didn't Jeremy have smp_ops
for this some time ago) so that even the default case is a call.

> Dprintk("Asserting INIT.\n");
>
> /*
> @@ -679,7 +683,8 @@ do_rest:
> /* trampoline code not run */
> printk("Not responding.\n");
> #ifdef APIC_DEBUG
> - inquire_remote_apic(apicid);
> + if (get_uv_system_type() != UV_NON_UNIQUE_APIC)
> + inquire_remote_apic(apicid);

Dito.


-Andi

2008-03-25 14:31:23

by Ingo Molnar

[permalink] [raw]
Subject: Re: [RFC 8/8] x86_64: Support for new UV apic


* Jack Steiner <[email protected]> wrote:

> Index: linux/arch/x86/kernel/genapic_64.c

> @@ -69,7 +73,16 @@ void send_IPI_self(int vector)
>
> unsigned int get_apic_id(void)
> {
> - return (apic_read(APIC_ID) >> 24) & 0xFFu;
> + unsigned int id;
> +
> + preempt_disable();
> + id = apic_read(APIC_ID);
> + if (uv_system_type >= UV_X2APIC)
> + id |= __get_cpu_var(x2apic_extra_bits);
> + else
> + id = (id >> 24) & 0xFFu;;
> + preempt_enable();
> + return id;

dont we want to put get_apic_id() into struct genapic instead? We
already have ID management there.

also, we want to unify 32-bit and 64-bit genapic code and just have
genapic all across x86.

Ingo

2008-03-25 16:31:20

by tip-bot for Jack Steiner

[permalink] [raw]
Subject: Re: [RFC 8/8] x86_64: Support for new UV apic

On Tue, Mar 25, 2008 at 03:30:59PM +0100, Ingo Molnar wrote:
>
> * Jack Steiner <[email protected]> wrote:
>
> > Index: linux/arch/x86/kernel/genapic_64.c
>
> > @@ -69,7 +73,16 @@ void send_IPI_self(int vector)
> >
> > unsigned int get_apic_id(void)
> > {
> > - return (apic_read(APIC_ID) >> 24) & 0xFFu;
> > + unsigned int id;
> > +
> > + preempt_disable();
> > + id = apic_read(APIC_ID);
> > + if (uv_system_type >= UV_X2APIC)
> > + id |= __get_cpu_var(x2apic_extra_bits);
> > + else
> > + id = (id >> 24) & 0xFFu;;
> > + preempt_enable();
> > + return id;
>
> dont we want to put get_apic_id() into struct genapic instead? We
> already have ID management there.
>
> also, we want to unify 32-bit and 64-bit genapic code and just have
> genapic all across x86.

Long term, I think that makes sense. However, I think that should be a
separate series of patches since there are significant differences between
the 32-bit and 64-bit genapic structs.

--- jack

2008-03-25 17:57:27

by tip-bot for Jack Steiner

[permalink] [raw]
Subject: Re: [RFC 8/8] x86_64: Support for new UV apic

On Tue, Mar 25, 2008 at 11:25:34AM +0100, Andi Kleen wrote:
> Jack Steiner <[email protected]> writes:
>
> > unsigned int get_apic_id(void)
> > {
> > - return (apic_read(APIC_ID) >> 24) & 0xFFu;
> > + unsigned int id;
> > +
> > + preempt_disable();
> > + id = apic_read(APIC_ID);
> > + if (uv_system_type >= UV_X2APIC)
> > + id |= __get_cpu_var(x2apic_extra_bits);
> > + else
> > + id = (id >> 24) & 0xFFu;;
> > + preempt_enable();
> > + return id;
>
> Really caller should have done preempt_disable(), otherwise
> the value can be wrong as soon as you return.
>
> Better probably to just WARN_ON if preemption is on

Will do (assuming it doesn't ripple thru too much code eliminating
warnings - doesn't look bad at first glance).


>
> (just be careful it does not trigger in oopses and machine checks)
>
> > +
> > +DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
> > +EXPORT_PER_CPU_SYMBOL(__uv_hub_info);
>
> GPL export too?

Yes.


>
> > +
> > +struct uv_blade_info *uv_blade_info;
> > +EXPORT_SYMBOL_GPL(uv_blade_info);
> > +
> > +short *uv_node_to_blade;
> > +EXPORT_SYMBOL_GPL(uv_node_to_blade);
> > +
> > +short *uv_cpu_to_blade;
> > +EXPORT_SYMBOL_GPL(uv_cpu_to_blade);
> > +
> > +short uv_possible_blades;
> > +EXPORT_SYMBOL_GPL(uv_possible_blades);
> > +
> > +/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
> > +/* Probably incorrect for UV ZZZ */
>
> Actually it should be correct. Except for UV you likely really need a
> NUMA aware irqbalanced. I used to have some old very hackish patches
> to implement that in irqbalanced, but never pushed it because the
> systems I was working on didn't really need it.

Deleted comment.

>
>
> > +
> > +static void uv_send_IPI_one(int cpu, int vector)
> > +{
> > + unsigned long val, apicid;
> > + int nasid;
> > +
> > + apicid = per_cpu(x86_cpu_to_apicid, cpu); /* ZZZ - cache node-local ? */
>
> Instead of doing that it might be better to implement __read_mostly per CPU variables
> (should not be very hard)

Added to list of loose-ends that need addressing.


>
> > +static void uv_send_IPI_mask(cpumask_t mask, int vector)
> > +{
> > + unsigned long flags;
> > + unsigned int cpu;
> > +
> > + local_irq_save(flags);
> > + for (cpu = 0; cpu < NR_CPUS; ++cpu)
> > + if (cpu_isset(cpu, mask))
> > + uv_send_IPI_one(cpu, vector);
> > + local_irq_restore(flags);
>
> This could disable interrupts for a long time could't it? Really needed?

No, not sure why I did that. Deleted the irq disable...


>
>
> > + bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
> > + uv_blade_info = alloc_bootmem_pages(bytes);
> > + memset(uv_blade_info, 255, bytes);
>
> 255? Strange poison value.

Deleted the memset. Should not be depending on poison values. Was useful
in debugging but it has outlived it's usefulness.


>
> > ===================================================================
> > --- linux.orig/arch/x86/kernel/Makefile 2008-03-21 15:36:35.000000000 -0500
> > +++ linux/arch/x86/kernel/Makefile 2008-03-21 15:49:38.000000000 -0500
> > @@ -90,7 +90,7 @@ scx200-y += scx200_32.o
> > ###
> > # 64 bit specific files
> > ifeq ($(CONFIG_X86_64),y)
> > - obj-y += genapic_64.o genapic_flat_64.o
> > + obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o
>
> Definitely should be a CONFIG

Not sure that I understand why. The overhead of UV is minimal & we want UV
enabled in all distro kernels. OTOH, small embedded systems probably want to
eliminate every last bit of unneeded code.

Might make sense to have a config option. Thoughts????


>
> > @@ -418,6 +419,9 @@ static int __cpuinit wakeup_secondary_vi
> > unsigned long send_status, accept_status = 0;
> > int maxlvt, num_starts, j;
> >
> > + if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
> > + return uv_wakeup_secondary(phys_apicid, start_rip);
> > +
>
> This should be probably factored properly (didn't Jeremy have smp_ops
> for this some time ago) so that even the default case is a call.

By factored, do you means something like:
is_uv_legacy_system()
is_us_non_unique_apicid_system()
...

Or maybe:
is_uv_system_type(x) # where x is UV_NON_UNIQUE_APIC, etc


> -Andi

Thanks for the careful review.

--- jack

2008-03-25 18:03:18

by Andi Kleen

[permalink] [raw]
Subject: Re: [RFC 8/8] x86_64: Support for new UV apic

> > This should be probably factored properly (didn't Jeremy have smp_ops
> > for this some time ago) so that even the default case is a call.
>
> By factored, do you means something like:
> is_uv_legacy_system()
> is_us_non_unique_apicid_system()
> ...
>
> Or maybe:
> is_uv_system_type(x) # where x is UV_NON_UNIQUE_APIC, etc

No instead of having lots of if (xyz_system) do_xyz_special()
go through smp_ops for the whole thing so that UV would just have a
special smp_ops that has special implementions or wrappers.

Oops I see smp_ops are currently only implemented
for 32bit. Ok do it only once smp_ops exist on 64bit too.

-Andi

2008-03-26 02:28:17

by Jeremy Fitzhardinge

[permalink] [raw]
Subject: Re: [RFC 8/8] x86_64: Support for new UV apic

Andi Kleen wrote:
> No instead of having lots of if (xyz_system) do_xyz_special()
> go through smp_ops for the whole thing so that UV would just have a
> special smp_ops that has special implementions or wrappers.
>
> Oops I see smp_ops are currently only implemented
> for 32bit. Ok do it only once smp_ops exist on 64bit too.
>

I think glommer has patches to unify smp stuff, which should include
smp_ops.

J

2008-03-26 03:22:19

by Glauber Costa

[permalink] [raw]
Subject: Re: [RFC 8/8] x86_64: Support for new UV apic

On Tue, Mar 25, 2008 at 11:23 PM, Jeremy Fitzhardinge <[email protected]> wrote:
> Andi Kleen wrote:
> > No instead of having lots of if (xyz_system) do_xyz_special()
> > go through smp_ops for the whole thing so that UV would just have a
> > special smp_ops that has special implementions or wrappers.
> >
> > Oops I see smp_ops are currently only implemented
> > for 32bit. Ok do it only once smp_ops exist on 64bit too.
> >
>
> I think glommer has patches to unify smp stuff, which should include
> smp_ops.
>

They are already merged in ingo's tree.

I'm still about to post some last-minute issues, but the full smp_ops
support is there.
--
Glauber Costa.
"Free as in Freedom"
http://glommer.net

"The less confident you are, the more serious you have to act."

2008-03-26 03:24:18

by Glauber Costa

[permalink] [raw]
Subject: Re: [RFC 8/8] x86_64: Support for new UV apic

On Tue, Mar 25, 2008 at 1:31 PM, Jack Steiner <[email protected]> wrote:
> On Tue, Mar 25, 2008 at 03:30:59PM +0100, Ingo Molnar wrote:
> >
> > * Jack Steiner <[email protected]> wrote:
> >
> > > Index: linux/arch/x86/kernel/genapic_64.c
> >
> > > @@ -69,7 +73,16 @@ void send_IPI_self(int vector)
> > >
> > > unsigned int get_apic_id(void)
> > > {
> > > - return (apic_read(APIC_ID) >> 24) & 0xFFu;
> > > + unsigned int id;
> > > +
> > > + preempt_disable();
> > > + id = apic_read(APIC_ID);
> > > + if (uv_system_type >= UV_X2APIC)
> > > + id |= __get_cpu_var(x2apic_extra_bits);
> > > + else
> > > + id = (id >> 24) & 0xFFu;;
> > > + preempt_enable();
> > > + return id;
> >
> > dont we want to put get_apic_id() into struct genapic instead? We
> > already have ID management there.
> >
> > also, we want to unify 32-bit and 64-bit genapic code and just have
> > genapic all across x86.
>
> Long term, I think that makes sense. However, I think that should be a
> separate series of patches since there are significant differences between
> the 32-bit and 64-bit genapic structs.
>
However, if you add more code, they'll keep diverging. The moment you
touch them, and get your
hands warmed up, is the perfect moment for an integration series.

--
Glauber Costa.
"Free as in Freedom"
http://glommer.net

"The less confident you are, the more serious you have to act."

2008-03-26 07:30:32

by Ingo Molnar

[permalink] [raw]
Subject: Re: [RFC 8/8] x86_64: Support for new UV apic


* Jeremy Fitzhardinge <[email protected]> wrote:

> Andi Kleen wrote:
>> No instead of having lots of if (xyz_system) do_xyz_special()
>> go through smp_ops for the whole thing so that UV would just have a
>> special smp_ops that has special implementions or wrappers.
>> Oops I see smp_ops are currently only implemented
>> for 32bit. Ok do it only once smp_ops exist on 64bit too.
>
> I think glommer has patches to unify smp stuff, which should include
> smp_ops.

yep, x86.git/latest has all that work included.

Ingo

2008-03-26 07:38:47

by Ingo Molnar

[permalink] [raw]
Subject: Re: [RFC 8/8] x86_64: Support for new UV apic


* Jack Steiner <[email protected]> wrote:

> > > - obj-y += genapic_64.o genapic_flat_64.o
> > > + obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o
> >
> > Definitely should be a CONFIG
>
> Not sure that I understand why. The overhead of UV is minimal & we
> want UV enabled in all distro kernels. OTOH, small embedded systems
> probably want to eliminate every last bit of unneeded code.
>
> Might make sense to have a config option. Thoughts????

i wouldnt mind having UV enabled by default (it can be a config option
but default-enabled on generic kernels so all distros will pick this hw
support up), but we definitely need the genapic unification before we
can add more features.

Ingo

2008-03-30 20:23:24

by Yinghai Lu

[permalink] [raw]
Subject: Re: [RFC 8/8] x86_64: Support for new UV apic

On Wed, Mar 26, 2008 at 12:38 AM, Ingo Molnar <[email protected]> wrote:
>
> * Jack Steiner <[email protected]> wrote:
>
> > > > - obj-y += genapic_64.o genapic_flat_64.o
> > > > + obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o
> > >
> > > Definitely should be a CONFIG
> >
> > Not sure that I understand why. The overhead of UV is minimal & we
> > want UV enabled in all distro kernels. OTOH, small embedded systems
> > probably want to eliminate every last bit of unneeded code.
> >
> > Might make sense to have a config option. Thoughts????
>
> i wouldnt mind having UV enabled by default (it can be a config option
> but default-enabled on generic kernels so all distros will pick this hw
> support up), but we definitely need the genapic unification before we
> can add more features.

config option would be reasonable.
for x86_64
subarch already have X86_PC, X86_VSMP.
we have X86_UVSMP

YH

2008-03-30 20:42:04

by Yinghai Lu

[permalink] [raw]
Subject: Re: [RFC 8/8] x86_64: Support for new UV apic

On Mon, Mar 24, 2008 at 11:21 AM, Jack Steiner <[email protected]> wrote:
>
> UV supports really big systems. So big, in fact, that the APICID register
> does not contain enough bits to contain an APICID that is unique across all
> cpus.
>
> The UV BIOS supports 3 APICID modes:
>
> - legacy mode. This mode uses the old APIC mode where
> APICID is in bits [31:24] of the APICID register.
>
> - x2apic mode. This mode is whitebox-compatible. APICIDs
> are unique across all cpus. Standard x2apic APIC operations
> (Intel-defined) can be used for IPIs. The node identifier
> fits within the Intel-defined portion of the APICID register.
>
> - x2apic-uv mode. In this mode, the APICIDs on each node have
> unique IDs, but IDs on different node are not unique. For example,
> if each mode has 32 cpus, the APICIDs on each node might be
> 0 - 31. Every node has the same set of IDs.
> The UV hub is used to route IPIs/interrupts to the correct node.
> Traditional APIC operations WILL NOT WORK.
>
> In x2apic-uv mode, the ACPI tables all contain a full unique ID (note:
> exact bit layout still changing but the following is close):
>
> nnnnnnnnnnlc0cch
> n = unique node number
> l = socket number on board
> c = core
> h = hyperthread
>
> Only the "lc0cch" bits are written to the APICID register. The remaining bits are
> supplied by having the get_apic_id() function "OR" the extra bits into the value
> read from the APICID register. (Hmmm.. why not keep the ENTIRE APICID register
> in per-cpu data....)
>
> The x2apic-uv mode is recognized by the MADT table containing:
> oem_id = "SGI"
> oem_table_id = "UV-X"
>
>
> (NOTE: a work-in-progress. Pieces missing....)
>
>
> Signed-off-by: Jack Steiner <[email protected]>
>
> ---
> arch/x86/kernel/Makefile | 2
> arch/x86/kernel/genapic_64.c | 15 +
> arch/x86/kernel/genx2apic_uv_x.c | 305 +++++++++++++++++++++++++++++++++++++++
> arch/x86/kernel/setup64.c | 4
> arch/x86/kernel/smpboot_64.c | 7
> include/asm-x86/genapic_64.h | 5
> 6 files changed, 335 insertions(+), 3 deletions(-)
>
> Index: linux/arch/x86/kernel/genapic_64.c
> ===================================================================
> --- linux.orig/arch/x86/kernel/genapic_64.c 2008-03-21 15:37:05.000000000 -0500
> +++ linux/arch/x86/kernel/genapic_64.c 2008-03-21 15:49:38.000000000 -0500
> @@ -30,6 +30,7 @@ u16 x86_cpu_to_apicid_init[NR_CPUS] __in
> void *x86_cpu_to_apicid_early_ptr;
> DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID;
> EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
> +DEFINE_PER_CPU(int, x2apic_extra_bits);
>
> struct genapic __read_mostly *genapic = &apic_flat;
>
> @@ -40,6 +41,9 @@ static enum uv_system_type uv_system_typ
> */
> void __init setup_apic_routing(void)
> {
> + if (uv_system_type == UV_NON_UNIQUE_APIC)
> + genapic = &apic_x2apic_uv_x;
> + else
> #ifdef CONFIG_ACPI
> /*
> * Quirk: some x86_64 machines can only use physical APIC mode
> @@ -69,7 +73,16 @@ void send_IPI_self(int vector)
>
> unsigned int get_apic_id(void)
> {
> - return (apic_read(APIC_ID) >> 24) & 0xFFu;
> + unsigned int id;
> +
> + preempt_disable();
> + id = apic_read(APIC_ID);
> + if (uv_system_type >= UV_X2APIC)
> + id |= __get_cpu_var(x2apic_extra_bits);
> + else
> + id = (id >> 24) & 0xFFu;;
> + preempt_enable();
> + return id;
>

you can not shift id here.

GET_APIC_ID will shift that again.

you apic id will be 0 for all cpu

YH

2008-03-30 21:04:16

by tip-bot for Jack Steiner

[permalink] [raw]
Subject: Re: [RFC 8/8] x86_64: Support for new UV apic

On Sun, Mar 30, 2008 at 01:23:12PM -0700, Yinghai Lu wrote:
> On Wed, Mar 26, 2008 at 12:38 AM, Ingo Molnar <[email protected]> wrote:
> >
> > * Jack Steiner <[email protected]> wrote:
> >
> > > > > - obj-y += genapic_64.o genapic_flat_64.o
> > > > > + obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o
> > > >
> > > > Definitely should be a CONFIG
> > >
> > > Not sure that I understand why. The overhead of UV is minimal & we
> > > want UV enabled in all distro kernels. OTOH, small embedded systems
> > > probably want to eliminate every last bit of unneeded code.
> > >
> > > Might make sense to have a config option. Thoughts????
> >
> > i wouldnt mind having UV enabled by default (it can be a config option
> > but default-enabled on generic kernels so all distros will pick this hw
> > support up), but we definitely need the genapic unification before we
> > can add more features.
>
> config option would be reasonable.
> for x86_64
> subarch already have X86_PC, X86_VSMP.
> we have X86_UVSMP

If there was a significant differece between UV and generic kernels
(or hardware), then I would agree. However, the only significant
difference is the APIC model on large systems. Small systems are
exactly compatible.

The problem with subarch is that we want 1 binary kernel to support
both generic hardware AND uv hardware. This restriction is desirable
for the distros and software vendors. Otherwise, additional kernel
images would have to be built, released, & certified.

--- jack

2008-03-30 21:08:54

by tip-bot for Jack Steiner

[permalink] [raw]
Subject: Re: [RFC 8/8] x86_64: Support for new UV apic

> > unsigned int get_apic_id(void)
> > {
> > - return (apic_read(APIC_ID) >> 24) & 0xFFu;
> > + unsigned int id;
> > +
> > + preempt_disable();
> > + id = apic_read(APIC_ID);
> > + if (uv_system_type >= UV_X2APIC)
> > + id |= __get_cpu_var(x2apic_extra_bits);
> > + else
> > + id = (id >> 24) & 0xFFu;;
> > + preempt_enable();
> > + return id;
> >
>
> you can not shift id here.
>
> GET_APIC_ID will shift that again.
>
> you apic id will be 0 for all cpu
>

I think this is fixed in the patch that I submitted on Friday. I
had to rework the GET_APIC_ID() changes because of the unification
of -32 & -64 apic code. I think the new code is much cleaner...


--- jack

2008-03-30 21:15:28

by Andi Kleen

[permalink] [raw]
Subject: Re: [RFC 8/8] x86_64: Support for new UV apic

> If there was a significant differece between UV and generic kernels
> (or hardware), then I would agree. However, the only significant
> difference is the APIC model on large systems. Small systems are
> exactly compatible.
>
> The problem with subarch is that we want 1 binary kernel to support

x86-64 subarchs are more options than true subarchs. They generally
do not prevent the kernel from running on other systems, just
control addition of some additional code or special data layout. They are
quite different from the i386 subarchs or those of other architectures.

The main reason vSMP is called a subarch is that it pads a lot
of data structures to 4K and you don't really want that on your
normal kernel, but there isn't anything in there that would
prevent booting on a normal system.

The UV option certainly doesn't have this issue.

> both generic hardware AND uv hardware. This restriction is desirable
> for the distros and software vendors. Otherwise, additional kernel
> images would have to be built, released, & certified.

I think an option would be fine, just don't call it a subarch. I don't
feel strongly about it, as you point out it is not really very much
code.

-Andi

2008-03-30 23:24:39

by Yinghai Lu

[permalink] [raw]
Subject: Re: [RFC 8/8] x86_64: Support for new UV apic

On Sun, Mar 30, 2008 at 2:08 PM, Jack Steiner <[email protected]> wrote:
> > > unsigned int get_apic_id(void)
> > > {
> > > - return (apic_read(APIC_ID) >> 24) & 0xFFu;
> > > + unsigned int id;
> > > +
> > > + preempt_disable();
> > > + id = apic_read(APIC_ID);
> > > + if (uv_system_type >= UV_X2APIC)
> > > + id |= __get_cpu_var(x2apic_extra_bits);
> > > + else
> > > + id = (id >> 24) & 0xFFu;;
> > > + preempt_enable();
> > > + return id;
> > >
> >
> > you can not shift id here.
> >
> > GET_APIC_ID will shift that again.
> >
> > you apic id will be 0 for all cpu
> >
>
> I think this is fixed in the patch that I submitted on Friday. I
> had to rework the GET_APIC_ID() changes because of the unification
> of -32 & -64 apic code. I think the new code is much cleaner...

i think Ingo already put you Friday's version in x86.git#latest.

that is wrong too with extra shift.

YH

2008-03-30 23:31:14

by Yinghai Lu

[permalink] [raw]
Subject: Re: [RFC 8/8] x86_64: Support for new UV apic

On Sun, Mar 30, 2008 at 2:18 PM, Andi Kleen <[email protected]> wrote:
> > If there was a significant differece between UV and generic kernels
> > (or hardware), then I would agree. However, the only significant
> > difference is the APIC model on large systems. Small systems are
> > exactly compatible.
> >
> > The problem with subarch is that we want 1 binary kernel to support
>
> x86-64 subarchs are more options than true subarchs. They generally
> do not prevent the kernel from running on other systems, just
> control addition of some additional code or special data layout. They are
> quite different from the i386 subarchs or those of other architectures.
>
> The main reason vSMP is called a subarch is that it pads a lot
> of data structures to 4K and you don't really want that on your
> normal kernel, but there isn't anything in there that would
> prevent booting on a normal system.
>
> The UV option certainly doesn't have this issue.
>
>
> > both generic hardware AND uv hardware. This restriction is desirable
> > for the distros and software vendors. Otherwise, additional kernel
> > images would have to be built, released, & certified.
>
> I think an option would be fine, just don't call it a subarch. I don't
> feel strongly about it, as you point out it is not really very much
> code.

if the calling path like GET_APIC_ID is keeping checking if it is UV
box after boot time, that may not good.

don't need make other hundreds of machine keep running the code only
for several big box all the time.

YH

2008-03-31 02:18:32

by tip-bot for Jack Steiner

[permalink] [raw]
Subject: Re: [RFC 8/8] x86_64: Support for new UV apic

>
> if the calling path like GET_APIC_ID is keeping checking if it is UV
> box after boot time, that may not good.
>
> don't need make other hundreds of machine keep running the code only
> for several big box all the time.
>
> YH


I added trace code to see how often GET_APIC_ID() is called. For
my 8p AMD box, the function is called 6 times per cpu during boot.
I have not seen any other calls to the function after early boot
although I'm they occur under some circumstances.

Can you think of a workload where the frequency of calls is
high enough to be significant??

--- jack

2008-03-31 02:20:25

by Yinghai Lu

[permalink] [raw]
Subject: Re: [RFC 8/8] x86_64: Support for new UV apic

On Sun, Mar 30, 2008 at 7:18 PM, Jack Steiner <[email protected]> wrote:
> >
> > if the calling path like GET_APIC_ID is keeping checking if it is UV
> > box after boot time, that may not good.
> >
> > don't need make other hundreds of machine keep running the code only
> > for several big box all the time.
> >
> > YH
>
>
> I added trace code to see how often GET_APIC_ID() is called. For
> my 8p AMD box, the function is called 6 times per cpu during boot.
> I have not seen any other calls to the function after early boot
> although I'm they occur under some circumstances.

then it is ok.

YH

2008-03-31 06:45:08

by Andi Kleen

[permalink] [raw]
Subject: Re: [RFC 8/8] x86_64: Support for new UV apic

> if the calling path like GET_APIC_ID is keeping checking if it is UV
> box after boot time, that may not good.

I don't think GET_APIC_ID is anywhere on a critical path. As long
as it doesn't lead to code bloat that shouldn't be an issue.

-Andi

2008-03-31 12:34:11

by Ingo Molnar

[permalink] [raw]
Subject: Re: [RFC 8/8] x86_64: Support for new UV apic


* Yinghai Lu <[email protected]> wrote:

> On Sun, Mar 30, 2008 at 7:18 PM, Jack Steiner <[email protected]> wrote:
> > >
> > > if the calling path like GET_APIC_ID is keeping checking if it is
> > > UV box after boot time, that may not good.
> > >
> > > don't need make other hundreds of machine keep running the code
> > > only for several big box all the time.
> > >
> > > YH
> >
> >
> > I added trace code to see how often GET_APIC_ID() is called. For my
> > 8p AMD box, the function is called 6 times per cpu during boot. I
> > have not seen any other calls to the function after early boot
> > although I'm they occur under some circumstances.
>
> then it is ok.

yes - and even if it were called more frequently, having generic code
and having the possibility of an as generic as possible kernel image
(and kernel rpms) is still a very important feature. In that sense
subarch support is actively harmful and we are trying to move away from
that model.

It is very nice that Jack has managed to make UV a generic platform
instead of a subarch - and i'd encourage all future PC platform
extensions to work via that model. The status of current PC
subarchitectures is the following:

- mach-visws: obsolete. We could drop it today - it's been years since
i saw real VISWS bugreports.

- mach-voyager: obsolete.

- mach-es7000: on the way out - latest ES7000's are generic.

- mach-rdc321x: it's being de-sub-architectured. It's about one patch
away from becoming a non-subarch.

so we are just a few patches and a few well-directed voltage spikes away
from being able to remove the subarch complication from x86 altogether
;-)

Ingo

2008-03-31 12:48:41

by Andi Kleen

[permalink] [raw]
Subject: Re: [RFC 8/8] x86_64: Support for new UV apic

> - mach-voyager: obsolete.

You just have to convince James @)

>
> - mach-es7000: on the way out - latest ES7000's are generic.

That would mean dropping support for prev-gen es7000 which are not that
old actually (only a few years). But I think with a little effort the
old es7000 code could be fit into the generic architecture. It is not
that far away from a normal PC.

>
> - mach-rdc321x: it's being de-sub-architectured. It's about one patch
> away from becoming a non-subarch.

mach-numaq (not in arch, but spread out all over the port)
I hear there are only a one or two machines running left.
Unfortunately they are in test.kernel.org, but hopefully
they will die soon.
NUMAQ has quite a lot of ugly ifdefs and special cases that would be
great to eliminate

mach-bigsmp/mach-summit (also in asm only)
obsolete, should be deprecated for mach-generic
(just generic currently pulls in code from them)
A good first step would be to just disable the separate CONFIG
options and only allow using them through generic.

-Andi

2008-03-31 18:42:20

by Yinghai Lu

[permalink] [raw]
Subject: Re: [RFC 8/8] x86_64: Support for new UV apic

On Mon, Mar 31, 2008 at 5:33 AM, Ingo Molnar <[email protected]> wrote:
>
>
> * Yinghai Lu <[email protected]> wrote:
>
> > On Sun, Mar 30, 2008 at 7:18 PM, Jack Steiner <[email protected]> wrote:
> > > >
> > > > if the calling path like GET_APIC_ID is keeping checking if it is
> > > > UV box after boot time, that may not good.
> > > >
> > > > don't need make other hundreds of machine keep running the code
> > > > only for several big box all the time.
> > > >
> > > > YH
> > >
> > >
> > > I added trace code to see how often GET_APIC_ID() is called. For my
> > > 8p AMD box, the function is called 6 times per cpu during boot. I
> > > have not seen any other calls to the function after early boot
> > > although I'm they occur under some circumstances.
> >
> > then it is ok.
>
> yes - and even if it were called more frequently, having generic code
> and having the possibility of an as generic as possible kernel image
> (and kernel rpms) is still a very important feature. In that sense
> subarch support is actively harmful and we are trying to move away from
> that model.

regarding LinuxBIOS = coreboot + TinyKernel. some box need to use
64bit kernel, because 32 bit kernel could mess up the 64 bit
resources, and final kernel kexeced is 64 bit.

and TinyKernel need to stay with coreboot in MB flash rom, and that
flash is about 2M...

So hope it is easier to use MACRO to mask platform detect code out.

YH