LinuxLists.cc - Boot error: x86/efi/ioremap kconfig:x86

2012-05-17 03:06:59

Subject: Boot error: x86/efi/ioremap kconfig:x86_64-nfsroot commit:a790068

Hi Matt,

FYI, starting from this commit, the kernel immediately reboots in my
kvm boot tests..

Thanks,
Fengguang

commit a790068f4435a053fa16562eceed520f9b027c41
Author: Matt Fleming <[email protected]>
Date: Mon Mar 5 14:21:35 2012 +0000

x86, efi: Fixup efi_ioremap() breakage

Don't map holes.

* Is the e820 map the correct memory map to use?
* Is it OK to call find_early_table_space multiple times?
* Do we need add_efi_memmap anymore? Unconditionally do it?
* Should we reduce freq of printk(init_memory_mapping);?
* Are there parts of the memmap not in the e820 map?
* Do we even need to map ACPI stuff? is it ioremap'd?
* Is it save to reserve pagetables in init_memory_mapping(), are the other callers (gart_iommu) called after bootmem?

Signed-off-by: Matt Fleming <[email protected]>

diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index bce688d..4f95f23 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -51,8 +51,9 @@ static inline phys_addr_t get_max_mapped(void)
return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT;
}

-extern unsigned long init_memory_mapping(unsigned long start,
- unsigned long end);
+extern unsigned long init_memory_mapping(void);
+extern unsigned long __init_memory_mapping(unsigned long start,
+ unsigned long end);

extern void initmem_init(void);
extern void free_initmem(void);
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index b1e7c7f..f52ced2 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -769,7 +769,7 @@ int __init gart_iommu_init(void)

if (end_pfn > max_low_pfn_mapped) {
start_pfn = (aper_base>>PAGE_SHIFT);
- init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
+ __init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
}

pr_info("PCI-DMA: using GART IOMMU.\n");
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index e22bb08..b65d287 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -690,8 +690,6 @@ early_param("reservelow", parse_reservelow);

void __init setup_arch(char **cmdline_p)
{
- unsigned long end_pfn;
-
#ifdef CONFIG_X86_32
memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
visws_early_detect();
@@ -927,31 +925,11 @@ void __init setup_arch(char **cmdline_p)

init_gbpages();

- /* max_pfn_mapped is updated here */
- end_pfn = max_low_pfn;
-
-#ifdef CONFIG_X86_64
- /*
- * There may be regions after the last E820_RAM region that we
- * want to include in the kernel direct mapping because their
- * contents are needed at runtime.
- */
- if (efi_enabled) {
- unsigned long efi_end;
-
- efi_end = e820_end_pfn(MAXMEM>>PAGE_SHIFT, E820_RESERVED_EFI);
- if (efi_end > end_pfn)
- end_pfn = efi_end;
- }
-#endif
-
- max_low_pfn_mapped = init_memory_mapping(0, end_pfn << PAGE_SHIFT);
- max_pfn_mapped = max_low_pfn_mapped;
+ /* max_low_pfn_mapped is updated here */
+ max_pfn_mapped = init_memory_mapping();

#ifdef CONFIG_X86_64
if (max_pfn > max_low_pfn) {
- max_pfn_mapped = init_memory_mapping(1UL<<32,
- max_pfn<<PAGE_SHIFT);
/* can we preseve max_low_pfn ?*/
max_low_pfn = max_pfn;
}
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 6cabf65..f1835e9 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -118,8 +118,8 @@ static int __meminit save_mr(struct map_range *mr, int nr_range,
* This runs before bootmem is initialized and gets pages directly from
* the physical memory. To access them they are temporarily mapped.
*/
-unsigned long __init_refok init_memory_mapping(unsigned long start,
- unsigned long end)
+unsigned long __init_refok __init_memory_mapping(unsigned long start,
+ unsigned long end)
{
unsigned long page_size_mask = 0;
unsigned long start_pfn, end_pfn;
@@ -301,6 +301,59 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
return ret >> PAGE_SHIFT;
}

+/*
+ * Traverse the E820 memory map and add RAM/ACPI/EFI regions to the
+ * initial memory mapping.
+ */
+unsigned long __init_refok init_memory_mapping(void)
+{
+ struct e820entry *entry;
+ unsigned long last_pfn_mapped = 0;
+ unsigned long start, end;
+ int i;
+
+ /* Map the legacy region until we fix all drivers */
+ __init_memory_mapping(0, 1 << 20);
+
+ for (i = 0; i < e820.nr_map; i++) {
+ entry = &e820.map[i];
+ start = entry->addr;
+ end = start + entry->size;
+
+ /* We've already mapped below 1MB */
+ if (end < (1 << 20))
+ continue;
+
+ if (start < (1 << 20))
+ start = 1 << 20;
+#ifdef CONFIG_X86_32
+ /*
+ * The map is sorted, so bail once we hit a region
+ * that's above max_low_pfn.
+ */
+ if (start >= max_low_pfn << PAGE_SHIFT)
+ break;
+
+ if (end > max_low_pfn << PAGE_SHIFT)
+ end = max_low_pfn << PAGE_SHIFT;
+#endif
+ switch (entry->type) {
+ case E820_RAM:
+ case E820_RESERVED_EFI:
+ case E820_ACPI:
+ case E820_NVS:
+ last_pfn_mapped = __init_memory_mapping(start, end);
+ break;
+ default:
+ continue;
+ }
+
+ if (end <= max_low_pfn << PAGE_SHIFT)
+ max_low_pfn_mapped = last_pfn_mapped;
+ }
+
+ return last_pfn_mapped;
+}

/*
* devmem_is_allowed() checks to see if /dev/mem access to a certain address
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 436a030..e86e370 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -659,7 +659,7 @@ int arch_add_memory(int nid, u64 start, u64 size)
unsigned long nr_pages = size >> PAGE_SHIFT;
int ret;

- last_mapped_pfn = init_memory_mapping(start, start + size);
+ last_mapped_pfn = __init_memory_mapping(start, start + size);
if (last_mapped_pfn > max_pfn_mapped)
max_pfn_mapped = last_mapped_pfn;

diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 264cc6e..ef0a725 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -573,8 +573,7 @@ void __init efi_init(void)
printk(KERN_WARNING
"Kernel-defined memdesc doesn't match the one from EFI!\n");

- if (add_efi_memmap)
- do_add_efi_memmap();
+ do_add_efi_memmap();

#ifdef CONFIG_X86_32
x86_platform.get_wallclock = efi_get_time;

Attachments:

(No filename) (6.13 kB)
config-3.3.0-rc6+ (81.43 kB)
Download all attachments

2012-05-17 06:35:14

by Matt Fleming

[permalink] [raw]

Subject: Re: Boot error: x86/efi/ioremap kconfig:x86_64-nfsroot commit:a790068

On Thu, 2012-05-17 at 11:06 +0800, [email protected] wrote:
> Hi Matt,
>
> FYI, starting from this commit, the kernel immediately reboots in my
> kvm boot tests..

Hi Fengguang,

Yeah, this commit is broken. I pushed it to my repository so that I
could test it on all of my machines, but quickly discovered that it
contains some bugs.

How come you are running are kernel built from that branch? I only
created it yesterday. The majority of branches in my repository are work
in progress, at least until they appear in a pull request on lkml. Which
means they may contain blatantly broken commits.

But anyway, thanks for testing as it confirms that the commit is indeed
buggy!

2012-05-17 06:59:26

by Wu Fengguang

[permalink] [raw]

Subject: Re: Boot error: x86/efi/ioremap kconfig:x86_64-nfsroot commit:a790068

Hi Matt,

On Thu, May 17, 2012 at 07:34:55AM +0100, Matt Fleming wrote:
> On Thu, 2012-05-17 at 11:06 +0800, [email protected] wrote:
> > Hi Matt,
> >
> > FYI, starting from this commit, the kernel immediately reboots in my
> > kvm boot tests..
>
> Hi Fengguang,
>
> Yeah, this commit is broken. I pushed it to my repository so that I
> could test it on all of my machines, but quickly discovered that it
> contains some bugs.

OK, I got it.

> How come you are running are kernel built from that branch? I only
> created it yesterday. The majority of branches in my repository are work
> in progress, at least until they appear in a pull request on lkml. Which
> means they may contain blatantly broken commits.

I happen to be building a kernel test backend for our team and
hopefully for the wider kernel community.

What I do is to fetch a number of git trees (including yours) into my
test server, iterate through *all* remote branches and test out *every
single* commits there.

> But anyway, thanks for testing as it confirms that the commit is indeed
> buggy!

My pleasure :)

Thanks,
Fengguang

2012-05-17 08:11:51

by Matt Fleming

[permalink] [raw]

Subject: Re: Boot error: x86/efi/ioremap kconfig:x86_64-nfsroot commit:a790068

On Thu, 2012-05-17 at 14:59 +0800, Fengguang Wu wrote:
> I happen to be building a kernel test backend for our team and
> hopefully for the wider kernel community.
>
> What I do is to fetch a number of git trees (including yours) into my
> test server, iterate through *all* remote branches and test out *every
> single* commits there.

That is very cool! I'll keep this in mind in future to make sure that my
trees don't contain known buggy commits.

2012-05-17 12:28:11

by Wu Fengguang

[permalink] [raw]

Subject: 0day kernel testing back-end

On Thu, May 17, 2012 at 09:11:12AM +0100, Matt Fleming wrote:
> On Thu, 2012-05-17 at 14:59 +0800, Fengguang Wu wrote:
> > I happen to be building a kernel test backend for our team and
> > hopefully for the wider kernel community.
> >
> > What I do is to fetch a number of git trees (including yours) into my
> > test server, iterate through *all* remote branches and test out *every
> > single* commits there.
>
> That is very cool! I'll keep this in mind in future to make sure that my
> trees don't contain known buggy commits.

Heh. On the other hand, you are welcome to take advantage of this
test infrastructure by creating a random test branch and pushing
commits there. Within hours you should be able to receive (private)
email notifications about possible compile/boot errors.

[ more words on the motivations ]

Of cause one shall still try to code it right in the first place and
carry out the more oriented in-house tests. However in general not all
kernel developers can afford to do comprehensive compile&boot tests on
a variety of kconfigs and hardwares. In a global view it's also not
economical for everyone to setup and run such test environments.

So I'm setting up this "0day" kernel test service with highlights:

0) 0 efforts to use
1) 1-hour 24x7 response (aka. 0day)
2) commit-by-commit tests
3) covers all branches of a git tree

It would be a fast responding system because I would really really
like bugs be found and fixed ASAP. So that they don't land linux-next
at all. IMHO linux-next was supposed to be the "integration" test tree,
however the bugs landing it are mostly non-integration bugs..

linux-next is over-used. The result is bad experiences on running
linux-next kernels. People run into silly mistakes by the others which
could be otherwise avoided if the tests can be carried out in the very
moment new commits are pushed to the git tree. At the time when the
developer is still "hot" to fix any problems. At the time no others
have been disturbed or even aware of the problem being fixed.

Look at the attached compile status graphs. The X-axis is the git
commits in a branch and the Y-axis is the kconfigs. Each blue 'C'/'B'
character means a successful compile/boot. Red 'c'/'b' characters
indicate failures. 'r' means the compiled kernel is to be ran. 'R'
kernel is being boot tested.

As you can see, some kconfig sees a complete red line. And there are
lines that has a long run of red 'C' or 'b' in the middle of line.
Which means the bug silently appears and disappears, or the bug is
found/fixed late and the git branch is an important one that cannot be
rebased. (The mm tree deliberately keeps standalone bug fixes, which
is fine to us users other than making the graph looking ugly.)

Interestingly, it only requires several (perhaps trivial) bugs to
impact a lot of commits, kconfigs and users.

Overall, the red portions in the compile status graphs are significant
and that's a common phenomenon over the big subsystem trees. What I'd
like to see is for the bugs be fixed promptly in each end developers'
topic or testing branches before they are merged into linux-next (and
found by others there in the painful way). I'd like the linux-next be
more pleasant for me and other developers to use. If so, linux-next will
be able to attract more users, resulting even better quality for itself
and then Linus' tree. Hopefully the whole user base and code quality
chain can be moved a bit forward in this way.

On the technical side, I'm currently running a 16-core compile server
which can compile test one commit in 2 minutes on average, covering
about 20 kconfigs. Another 6-core boot server will run 5 kvm instances
each can boot test a kernel in about 1 minute. Note that only several
kconfigs will be boot tested for now. Overall the current setup can
test up to 700 commits each day. Hopefully the hardware pool can be
further expanded on demand, given that it's proved to be important for
the community.

There are a lot of rooms for future improvements.

- more kconfigs are desirable

- it'd be valuable to include some stress tests in the boot test, so
as to trigger more possible kernel panics

- physical test boxes with different hw profile will be allocated to
improve coverage of the boot tests

- micro performance benchmarks are also good candidate features to
catch performance regressions early, though performance drops are
far less painful as kernel panics for the other developers

- the test scripts can be improved over time

- more test back-ends could be established (with different focus) as
long as there are interests and resources: there are never too many
tests!

Feedbacks are welcome! Please drop me a mail if you would like me to
add (or drop) your tree from the 0day kernel tests :-)

Thanks,
Fengguang

Attachments:

(No filename) (4.70 kB)
mm.png (70.79 kB)
net.png (72.95 kB)
xfs.png (71.86 kB)
drm.png (73.99 kB)
Download all attachments