On Saturday March 13, [email protected] wrote:
> On Thursday March 11, [email protected] wrote:
> > > I thought I might try selectively removing patches, but it isn't clear
> > > what order the borken-out patches were applied it.
> > > If you have an ordered list, I can try a binary search.
> >
> > See the `series' file in the broken-out directory.
> >
>
> Ahh... would you consider moving that up one level, or spelling it
> "Series" or "00series" or something to make it stand out for the
> uninitiated??
>
> > > Or if you can suggest some patches that I can try backing out....
> >
> > Maybe turn off -mregparm? Or back off the 4g/4g patches? Maybe they broke
> > non-4:4 code comehow.
> >
>
> Looks like it might be a good guess....
>
And it turns out it was spot on.
Applying 4g-2.6.0-test2-mm2-A5.patch (on top of preceding -mm1
patches) causes my server not to boot.
I tried all the broken out patches except
4g-2.6.0-test2-mm2-A5.patch
4g4g-locked-userspace-copy.patch
but then it couldn't find /sbin/init, even though the root filesystem
was successful found (I don't know if that is significant).
With all of mm1 applied, and with X86_4G selected, it boots fine.
But without X86_4G it doesn't get to print out any messages at all,
even with 'earlyprintk=vga' set.
I am happy to experiment with if you or anyone has patches you would
like tested.
NeilBrown
* Neil Brown <[email protected]> wrote:
> And it turns out it was spot on. Applying 4g-2.6.0-test2-mm2-A5.patch
> (on top of preceding -mm1 patches) causes my server not to boot.
weird. 2.6.4-mm2 boots fine here (both 4g and non-4g). Could you send me
your .config?
(btw., 2.6.4-mm2 needs the attached trivial fix to compile.)
Ingo
--- Makefile.orig
+++ Makefile
@@ -988,7 +988,7 @@ if_changed_dep = $(if $(strip $? $(filte
@set -e; \
$(if $($(quiet)cmd_$(1)),echo ' $(subst ','\'',$($(quiet)cmd_$(1)))';) \
$(cmd_$(1)); \
- scripts/fixdep $(depfile) $@ '$(subst $$,$$$$,$(subst ','\'',$(cmd_$(1))))' > $(@D)/.$(@F).tmp; \
+ scripts/basic/fixdep $(depfile) $@ '$(subst $$,$$$$,$(subst ','\'',$(cmd_$(1))))' > $(@D)/.$(@F).tmp; \
rm -f $(depfile); \
mv -f $(@D)/.$(@F).tmp $(@D)/.$(@F).cmd)
* Neil Brown <[email protected]> wrote:
> And it turns out it was spot on. Applying 4g-2.6.0-test2-mm2-A5.patch
> (on top of preceding -mm1 patches) causes my server not to boot.
hm. Since your .config boots on akpm's box, this is some BIOS dependency
creating an early-boot problem i fear. Debugging such bugs is hard. One
way would be via the PC speaker:
movb $0x3,%al; outb %al,$0x61
this will cause a continuous beep on a typical PC - it works in 16-bit
code too, doesnt have any memory-model assumptions, etc.
the first place to put this would be startup_32 - do we get to this
point at all? (check CONFIG_4G first, to make sure the beep triggers.)
If it beeps, then move it down until you find the place that crashes.
Ingo
On Mon, 15 Mar 2004, Ingo Molnar wrote:
>
> * Neil Brown <[email protected]> wrote:
>
> > And it turns out it was spot on. Applying 4g-2.6.0-test2-mm2-A5.patch
> > (on top of preceding -mm1 patches) causes my server not to boot.
>
> weird. 2.6.4-mm2 boots fine here (both 4g and non-4g). Could you send me
> your .config?
>
> (btw., 2.6.4-mm2 needs the attached trivial fix to compile.)
Yes actually it is scrogged, but i'm sceptical as to whether it is 4G/4G,
here is a broken configuration.
Zwane Mwaikambo <[email protected]> wrote:
>
> On Mon, 15 Mar 2004, Ingo Molnar wrote:
>
> >
> > * Neil Brown <[email protected]> wrote:
> >
> > > And it turns out it was spot on. Applying 4g-2.6.0-test2-mm2-A5.patch
> > > (on top of preceding -mm1 patches) causes my server not to boot.
> >
> > weird. 2.6.4-mm2 boots fine here (both 4g and non-4g). Could you send me
> > your .config?
> >
> > (btw., 2.6.4-mm2 needs the attached trivial fix to compile.)
>
> Yes actually it is scrogged, but i'm sceptical as to whether it is 4G/4G,
> here is a broken configuration.
Manfred found and fixed one bug, but that was newly added in mm2.
DEBUG_PAGEALLOC switches to nopentium mode: otherwise a change_page_attr
could cause a split of a 4 MB page, which could cause a gfp which might
deadlock.
Two parts of the cpu initialization cannot be done early: the f00f bug
workaround needs a working paging setup, and loops_per_jiffies is only
initialized after the timer is running. And everything in setup_arch instead
of before start_kernel: The first printk line should be the "Linux verion xy"
line from start_kernel.
---
25-akpm/arch/i386/kernel/cpu/common.c | 40 ++++++++++++++++++++++------------
25-akpm/arch/i386/kernel/setup.c | 3 --
25-akpm/include/asm-i386/processor.h | 2 -
3 files changed, 29 insertions(+), 16 deletions(-)
diff -puN arch/i386/kernel/cpu/common.c~early-x86-cpu-detection-fix arch/i386/kernel/cpu/common.c
--- 25/arch/i386/kernel/cpu/common.c~early-x86-cpu-detection-fix 2004-03-15 11:36:05.144371392 -0800
+++ 25-akpm/arch/i386/kernel/cpu/common.c 2004-03-15 11:36:05.149370632 -0800
@@ -234,7 +234,7 @@ void __init generic_identify(struct cpui
if ( (xlvl & 0xffff0000) == 0x80000000 ) {
if ( xlvl >= 0x80000001 )
c->x86_capability[1] = cpuid_edx(0x80000001);
- if ( xlvl >= 0x80000004)
+ if ( xlvl >= 0x80000004 )
get_model_name(c); /* Default name */
}
}
@@ -263,18 +263,22 @@ static int __init x86_serial_nr_setup(ch
}
__setup("serialnumber", x86_serial_nr_setup);
-void __init early_identify_cpu(struct cpuinfo_x86 *c)
+/*
+ * This does the hard work of actually picking apart the CPU stuff...
+ */
+void __init identify_cpu(struct cpuinfo_x86 *c)
{
- c->loops_per_jiffy = loops_per_jiffy;
+ int i;
+
c->x86_cache_size = -1;
c->x86_vendor = X86_VENDOR_UNKNOWN;
c->cpuid_level = -1; /* CPUID not detected */
c->x86_model = c->x86_mask = 0; /* So far unknown... */
c->x86_vendor_id[0] = '\0'; /* Unset */
c->x86_model_id[0] = '\0'; /* Unset */
- c->x86_clflush_size = 32;
memset(&c->x86_capability, 0, sizeof c->x86_capability);
+ c->x86_clflush_size = 0;
if (!have_cpuid_p()) {
/* First of all, decide if this is a 486 or higher */
/* It's a 486 if we can modify the AC flag */
@@ -285,16 +289,14 @@ void __init early_identify_cpu(struct cp
}
generic_identify(c);
-}
-
-/*
- * This does the hard work of actually picking apart the CPU stuff...
- */
-void __init identify_cpu(struct cpuinfo_x86 *c)
-{
- int i;
- early_identify_cpu(c);
+ if (!c->x86_clflush_size) {
+ /* No cache line size autodetected - manual estimate: */
+ if (c->x86 <= 4)
+ c->x86_clflush_size = 16;
+ else
+ c->x86_clflush_size = 32;
+ }
printk(KERN_DEBUG "CPU: After generic identify, caps: %08lx %08lx %08lx %08lx\n",
c->x86_capability[0],
@@ -382,7 +384,19 @@ void __init identify_cpu(struct cpuinfo_
#ifdef CONFIG_X86_MCE
mcheck_init(c);
#endif
+ late_identify_cpu(c);
+}
+
+void __init late_identify_cpu(struct cpuinfo_x86 *c)
+{
+ /*
+ * The timer is not yet running when identify cpu is called for the
+ * first cpu - check_bugs() calls late_identify_cpu and transfers
+ * loops_per_jiffy from calibrate_delay into the cpu data area.
+ */
+ c->loops_per_jiffy = loops_per_jiffy;
}
+
/*
* Perform early boot up checks for a valid TSC. See arch/i386/kernel/time.c
*/
diff -puN arch/i386/kernel/setup.c~early-x86-cpu-detection-fix arch/i386/kernel/setup.c
--- 25/arch/i386/kernel/setup.c~early-x86-cpu-detection-fix 2004-03-15 11:36:05.145371240 -0800
+++ 25-akpm/arch/i386/kernel/setup.c 2004-03-15 11:36:05.150370480 -0800
@@ -1124,6 +1124,7 @@ void __init setup_arch(char **cmdline_p)
max_low_pfn = setup_memory();
+ identify_cpu(&boot_cpu_data);
/*
* NOTE: before this point _nobody_ is allowed to allocate
* any memory using the bootmem allocator.
@@ -1149,8 +1150,6 @@ void __init setup_arch(char **cmdline_p)
dmi_scan_machine();
- early_identify_cpu(&boot_cpu_data);
-
#ifdef CONFIG_X86_GENERICARCH
generic_apic_probe(*cmdline_p);
#endif
diff -puN include/asm-i386/processor.h~early-x86-cpu-detection-fix include/asm-i386/processor.h
--- 25/include/asm-i386/processor.h~early-x86-cpu-detection-fix 2004-03-15 11:36:05.147370936 -0800
+++ 25-akpm/include/asm-i386/processor.h 2004-03-15 11:36:05.151370328 -0800
@@ -98,6 +98,7 @@ extern struct cpuinfo_x86 cpu_data[];
extern char ignore_fpu_irq;
extern void identify_cpu(struct cpuinfo_x86 *);
+extern void late_identify_cpu(struct cpuinfo_x86 *c);
extern void print_cpu_info(struct cpuinfo_x86 *);
extern void dodgy_tsc(void);
@@ -653,6 +654,5 @@ extern void select_idle_routine(const st
#endif
#define cache_line_size() (boot_cpu_data.x86_clflush_size)
-extern void early_identify_cpu(struct cpuinfo_x86 *c);
#endif /* __ASM_I386_PROCESSOR_H */
_