2007-06-07 22:31:20

by Jesse Barnes

[permalink] [raw]
Subject: [PATCH] trim memory not covered by WB MTRRs

On some machines, buggy BIOSes don't properly setup WB MTRRs to
cover all available RAM, meaning the last few megs (or even gigs)
of memory will be marked uncached. Since Linux tends to allocate
from high memory addresses first, this causes the machine to be
unusably slow as soon as the kernel starts really using memory
(i.e. right around init time).

This patch works around the problem by scanning the MTRRs at
boot and figuring out whether the current end_pfn value (setup
by early e820 code) goes beyond the highest WB MTRR range, and
if so, trimming it to match. A fairly obnoxious KERN_WARNING
is printed too, letting the user know that not all of their
memory is available due to a likely BIOS bug.

Something similar could be done on i386 if needed, but the boot
ordering would be slightly different, since the MTRR code on i386
depends on the boot_cpu_data structure being setup.

This patch incorporates the feedback from Eric and Andi:
- use MAX_VAR_RANGES instead of NUM_VAR_RANGES
- move array declaration to header file as an extern
- add command line disable option "disable_mtrr_trim"
- don't run the trim code if the MTRR default type is cacheable
- don't run the trim code on non-Intel machines

Justin, feel free to test again if you have time and add your
"Tested-by" signoff.

Andi, as for large pages, do you think this is ok as is, or should
I trim a larger granularity? If so, what granularity?

Signed-off-by: Jesse Barnes <[email protected]>

Thanks,
Jesse

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 5d0283c..cb728a8 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -553,6 +553,12 @@ and is between 256 and 4096 characters. It is defined in the file
See drivers/char/README.epca and
Documentation/digiepca.txt.

+ disable_mtrr_trim [X86-64]
+ By default the kernel will trim any uncacheable
+ memory out of your available memory pool based on
+ MTRR settings. This parameter disables that behavior,
+ possibly causing your machine to run very slowly.
+
dmascc= [HW,AX25,SERIAL] AX.25 Z80SCC driver with DMA
support available.
Format: <io_dev0>[,<io_dev1>[,..<io_dev32>]]
diff --git a/arch/i386/kernel/cpu/mtrr/generic.c b/arch/i386/kernel/cpu/mtrr/generic.c
index c4ebb51..8eb3085 100644
--- a/arch/i386/kernel/cpu/mtrr/generic.c
+++ b/arch/i386/kernel/cpu/mtrr/generic.c
@@ -13,7 +13,7 @@
#include "mtrr.h"

struct mtrr_state {
- struct mtrr_var_range *var_ranges;
+ struct mtrr_var_range var_ranges[MAX_VAR_RANGES];
mtrr_type fixed_ranges[NUM_FIXED_RANGES];
unsigned char enabled;
unsigned char have_fixed;
@@ -84,12 +84,6 @@ void get_mtrr_state(void)
struct mtrr_var_range *vrs;
unsigned lo, dummy;

- if (!mtrr_state.var_ranges) {
- mtrr_state.var_ranges = kmalloc(num_var_ranges * sizeof (struct mtrr_var_range),
- GFP_KERNEL);
- if (!mtrr_state.var_ranges)
- return;
- }
vrs = mtrr_state.var_ranges;

rdmsr(MTRRcap_MSR, lo, dummy);
diff --git a/arch/i386/kernel/cpu/mtrr/if.c b/arch/i386/kernel/cpu/mtrr/if.c
index c7d8f17..0e34a67 100644
--- a/arch/i386/kernel/cpu/mtrr/if.c
+++ b/arch/i386/kernel/cpu/mtrr/if.c
@@ -11,10 +11,6 @@
#include <asm/mtrr.h>
#include "mtrr.h"

-/* RED-PEN: this is accessed without any locking */
-extern unsigned int *usage_table;
-
-
#define FILE_FCOUNT(f) (((struct seq_file *)((f)->private_data))->private)

static const char *const mtrr_strings[MTRR_NUM_TYPES] =
diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c
index 7202b98..ef552ba 100644
--- a/arch/i386/kernel/cpu/mtrr/main.c
+++ b/arch/i386/kernel/cpu/mtrr/main.c
@@ -38,8 +38,8 @@
#include <linux/cpu.h>
#include <linux/mutex.h>

+#include <asm/e820.h>
#include <asm/mtrr.h>
-
#include <asm/uaccess.h>
#include <asm/processor.h>
#include <asm/msr.h>
@@ -47,7 +47,7 @@

u32 num_var_ranges = 0;

-unsigned int *usage_table;
+unsigned int usage_table[MAX_VAR_RANGES];
static DEFINE_MUTEX(mtrr_mutex);

u64 size_or_mask, size_and_mask;
@@ -121,11 +121,6 @@ static void __init init_table(void)
int i, max;

max = num_var_ranges;
- if ((usage_table = kmalloc(max * sizeof *usage_table, GFP_KERNEL))
- == NULL) {
- printk(KERN_ERR "mtrr: could not allocate\n");
- return;
- }
for (i = 0; i < max; i++)
usage_table[i] = 1;
}
@@ -589,16 +584,11 @@ struct mtrr_value {
unsigned long lsize;
};

-static struct mtrr_value * mtrr_state;
+static struct mtrr_value mtrr_state[MAX_VAR_RANGES];

static int mtrr_save(struct sys_device * sysdev, pm_message_t state)
{
int i;
- int size = num_var_ranges * sizeof(struct mtrr_value);
-
- mtrr_state = kzalloc(size,GFP_ATOMIC);
- if (!mtrr_state)
- return -ENOMEM;

for (i = 0; i < num_var_ranges; i++) {
mtrr_if->get(i,
@@ -620,7 +610,6 @@ static int mtrr_restore(struct sys_device * sysdev)
mtrr_state[i].lsize,
mtrr_state[i].ltype);
}
- kfree(mtrr_state);
return 0;
}

@@ -631,6 +620,57 @@ static struct sysdev_driver mtrr_sysdev_driver = {
.resume = mtrr_restore,
};

+static int disable_mtrr_trim;
+
+static int __init disable_mtrr_trim_setup(char *str)
+{
+ disable_mtrr_trim = 1;
+ return 0;
+}
+early_param("disable_mtrr_trim", disable_mtrr_trim_setup);
+
+/**
+ * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
+ *
+ * Some buggy BIOSes don't setup the MTRRs properly for systems with certain
+ * memory configurations. This routine checks to make sure the MTRRs having
+ * a write back type cover all of the memory the kernel is intending to use.
+ * If not, it'll trim any memory off the end by adjusting end_pfn, removing
+ * it from the kernel's allocation pools, warning the user with an obnoxious
+ * message.
+ */
+void __init mtrr_trim_uncached_memory(void)
+{
+ unsigned long i, base, size, highest_addr = 0, def, dummy;
+ mtrr_type type;
+
+ /* Make sure we only trim uncachable memory on Intel machines */
+ rdmsr(MTRRdefType_MSR, def, dummy);
+ def &= 0xff;
+ if (!use_intel() || disable_mtrr_trim || def != MTRR_TYPE_UNCACHABLE)
+ return;
+
+ /* Find highest cached pfn */
+ for (i = 0; i < num_var_ranges; i++) {
+ mtrr_if->get(i, &base, &size, &type);
+ if (type != MTRR_TYPE_WRBACK)
+ continue;
+ base <<= PAGE_SHIFT;
+ size <<= PAGE_SHIFT;
+ if (highest_addr < base + size)
+ highest_addr = base + size;
+ }
+
+ if ((highest_addr >> PAGE_SHIFT) != end_pfn) {
+ printk(KERN_WARNING "***************\n");
+ printk(KERN_WARNING "**** WARNING: likely BIOS bug\n");
+ printk(KERN_WARNING "**** MTRRs don't cover all of "
+ "memory, trimmed %ld pages\n", end_pfn -
+ (highest_addr >> PAGE_SHIFT));
+ printk(KERN_WARNING "***************\n");
+ end_pfn = highest_addr >> PAGE_SHIFT;
+ }
+}

/**
* mtrr_bp_init - initialize mtrrs on the boot CPU
diff --git a/arch/i386/kernel/cpu/mtrr/mtrr.h b/arch/i386/kernel/cpu/mtrr/mtrr.h
index 289dfe6..627b339 100644
--- a/arch/i386/kernel/cpu/mtrr/mtrr.h
+++ b/arch/i386/kernel/cpu/mtrr/mtrr.h
@@ -14,6 +14,7 @@
#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)

#define NUM_FIXED_RANGES 88
+#define MAX_VAR_RANGES 256
#define MTRRfix64K_00000_MSR 0x250
#define MTRRfix16K_80000_MSR 0x258
#define MTRRfix16K_A0000_MSR 0x259
@@ -34,6 +35,8 @@
an 8 bit field: */
typedef u8 mtrr_type;

+extern unsigned int usage_table[MAX_VAR_RANGES];
+
struct mtrr_ops {
u32 vendor;
u32 use_intel_if;
diff --git a/arch/x86_64/kernel/bugs.c b/arch/x86_64/kernel/bugs.c
index c3c6b91..c138eac 100644
--- a/arch/x86_64/kernel/bugs.c
+++ b/arch/x86_64/kernel/bugs.c
@@ -14,7 +14,6 @@
void __init check_bugs(void)
{
identify_cpu(&boot_cpu_data);
- mtrr_bp_init();
#if !defined(CONFIG_SMP)
printk("CPU: ");
print_cpu_info(&boot_cpu_data);
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index eb6524f..409b63c 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -266,6 +266,10 @@ void __init setup_arch(char **cmdline_p)
* we are rounding upwards:
*/
end_pfn = e820_end_of_ram();
+ /* Trim memory not covered by WB MTRRs */
+ mtrr_bp_init();
+ mtrr_trim_uncached_memory();
+
num_physpages = end_pfn;

check_efer();
diff --git a/include/asm-x86_64/mtrr.h b/include/asm-x86_64/mtrr.h
index b557c48..cc62bd8 100644
--- a/include/asm-x86_64/mtrr.h
+++ b/include/asm-x86_64/mtrr.h
@@ -78,6 +78,7 @@ extern int mtrr_add_page (unsigned long base, unsigned long size,
unsigned int type, char increment);
extern int mtrr_del (int reg, unsigned long base, unsigned long size);
extern int mtrr_del_page (int reg, unsigned long base, unsigned long size);
+extern void mtrr_trim_uncached_memory(void);
# else
static __inline__ int mtrr_add (unsigned long base, unsigned long size,
unsigned int type, char increment)


2007-06-07 22:50:36

by Justin Piszcz

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

Will see if it still patches against -rc4.

On Thu, 7 Jun 2007, Jesse Barnes wrote:

> On some machines, buggy BIOSes don't properly setup WB MTRRs to
> cover all available RAM, meaning the last few megs (or even gigs)
> of memory will be marked uncached. Since Linux tends to allocate
> from high memory addresses first, this causes the machine to be
> unusably slow as soon as the kernel starts really using memory
> (i.e. right around init time).
>
> This patch works around the problem by scanning the MTRRs at
> boot and figuring out whether the current end_pfn value (setup
> by early e820 code) goes beyond the highest WB MTRR range, and
> if so, trimming it to match. A fairly obnoxious KERN_WARNING
> is printed too, letting the user know that not all of their
> memory is available due to a likely BIOS bug.
>
> Something similar could be done on i386 if needed, but the boot
> ordering would be slightly different, since the MTRR code on i386
> depends on the boot_cpu_data structure being setup.
>
> This patch incorporates the feedback from Eric and Andi:
> - use MAX_VAR_RANGES instead of NUM_VAR_RANGES
> - move array declaration to header file as an extern
> - add command line disable option "disable_mtrr_trim"
> - don't run the trim code if the MTRR default type is cacheable
> - don't run the trim code on non-Intel machines
>
> Justin, feel free to test again if you have time and add your
> "Tested-by" signoff.
>
> Andi, as for large pages, do you think this is ok as is, or should
> I trim a larger granularity? If so, what granularity?
>
> Signed-off-by: Jesse Barnes <[email protected]>
>
> Thanks,
> Jesse
>
> diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
> index 5d0283c..cb728a8 100644
> --- a/Documentation/kernel-parameters.txt
> +++ b/Documentation/kernel-parameters.txt
> @@ -553,6 +553,12 @@ and is between 256 and 4096 characters. It is defined in the file
> See drivers/char/README.epca and
> Documentation/digiepca.txt.
>
> + disable_mtrr_trim [X86-64]
> + By default the kernel will trim any uncacheable
> + memory out of your available memory pool based on
> + MTRR settings. This parameter disables that behavior,
> + possibly causing your machine to run very slowly.
> +
> dmascc= [HW,AX25,SERIAL] AX.25 Z80SCC driver with DMA
> support available.
> Format: <io_dev0>[,<io_dev1>[,..<io_dev32>]]
> diff --git a/arch/i386/kernel/cpu/mtrr/generic.c b/arch/i386/kernel/cpu/mtrr/generic.c
> index c4ebb51..8eb3085 100644
> --- a/arch/i386/kernel/cpu/mtrr/generic.c
> +++ b/arch/i386/kernel/cpu/mtrr/generic.c
> @@ -13,7 +13,7 @@
> #include "mtrr.h"
>
> struct mtrr_state {
> - struct mtrr_var_range *var_ranges;
> + struct mtrr_var_range var_ranges[MAX_VAR_RANGES];
> mtrr_type fixed_ranges[NUM_FIXED_RANGES];
> unsigned char enabled;
> unsigned char have_fixed;
> @@ -84,12 +84,6 @@ void get_mtrr_state(void)
> struct mtrr_var_range *vrs;
> unsigned lo, dummy;
>
> - if (!mtrr_state.var_ranges) {
> - mtrr_state.var_ranges = kmalloc(num_var_ranges * sizeof (struct mtrr_var_range),
> - GFP_KERNEL);
> - if (!mtrr_state.var_ranges)
> - return;
> - }
> vrs = mtrr_state.var_ranges;
>
> rdmsr(MTRRcap_MSR, lo, dummy);
> diff --git a/arch/i386/kernel/cpu/mtrr/if.c b/arch/i386/kernel/cpu/mtrr/if.c
> index c7d8f17..0e34a67 100644
> --- a/arch/i386/kernel/cpu/mtrr/if.c
> +++ b/arch/i386/kernel/cpu/mtrr/if.c
> @@ -11,10 +11,6 @@
> #include <asm/mtrr.h>
> #include "mtrr.h"
>
> -/* RED-PEN: this is accessed without any locking */
> -extern unsigned int *usage_table;
> -
> -
> #define FILE_FCOUNT(f) (((struct seq_file *)((f)->private_data))->private)
>
> static const char *const mtrr_strings[MTRR_NUM_TYPES] =
> diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c
> index 7202b98..ef552ba 100644
> --- a/arch/i386/kernel/cpu/mtrr/main.c
> +++ b/arch/i386/kernel/cpu/mtrr/main.c
> @@ -38,8 +38,8 @@
> #include <linux/cpu.h>
> #include <linux/mutex.h>
>
> +#include <asm/e820.h>
> #include <asm/mtrr.h>
> -
> #include <asm/uaccess.h>
> #include <asm/processor.h>
> #include <asm/msr.h>
> @@ -47,7 +47,7 @@
>
> u32 num_var_ranges = 0;
>
> -unsigned int *usage_table;
> +unsigned int usage_table[MAX_VAR_RANGES];
> static DEFINE_MUTEX(mtrr_mutex);
>
> u64 size_or_mask, size_and_mask;
> @@ -121,11 +121,6 @@ static void __init init_table(void)
> int i, max;
>
> max = num_var_ranges;
> - if ((usage_table = kmalloc(max * sizeof *usage_table, GFP_KERNEL))
> - == NULL) {
> - printk(KERN_ERR "mtrr: could not allocate\n");
> - return;
> - }
> for (i = 0; i < max; i++)
> usage_table[i] = 1;
> }
> @@ -589,16 +584,11 @@ struct mtrr_value {
> unsigned long lsize;
> };
>
> -static struct mtrr_value * mtrr_state;
> +static struct mtrr_value mtrr_state[MAX_VAR_RANGES];
>
> static int mtrr_save(struct sys_device * sysdev, pm_message_t state)
> {
> int i;
> - int size = num_var_ranges * sizeof(struct mtrr_value);
> -
> - mtrr_state = kzalloc(size,GFP_ATOMIC);
> - if (!mtrr_state)
> - return -ENOMEM;
>
> for (i = 0; i < num_var_ranges; i++) {
> mtrr_if->get(i,
> @@ -620,7 +610,6 @@ static int mtrr_restore(struct sys_device * sysdev)
> mtrr_state[i].lsize,
> mtrr_state[i].ltype);
> }
> - kfree(mtrr_state);
> return 0;
> }
>
> @@ -631,6 +620,57 @@ static struct sysdev_driver mtrr_sysdev_driver = {
> .resume = mtrr_restore,
> };
>
> +static int disable_mtrr_trim;
> +
> +static int __init disable_mtrr_trim_setup(char *str)
> +{
> + disable_mtrr_trim = 1;
> + return 0;
> +}
> +early_param("disable_mtrr_trim", disable_mtrr_trim_setup);
> +
> +/**
> + * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
> + *
> + * Some buggy BIOSes don't setup the MTRRs properly for systems with certain
> + * memory configurations. This routine checks to make sure the MTRRs having
> + * a write back type cover all of the memory the kernel is intending to use.
> + * If not, it'll trim any memory off the end by adjusting end_pfn, removing
> + * it from the kernel's allocation pools, warning the user with an obnoxious
> + * message.
> + */
> +void __init mtrr_trim_uncached_memory(void)
> +{
> + unsigned long i, base, size, highest_addr = 0, def, dummy;
> + mtrr_type type;
> +
> + /* Make sure we only trim uncachable memory on Intel machines */
> + rdmsr(MTRRdefType_MSR, def, dummy);
> + def &= 0xff;
> + if (!use_intel() || disable_mtrr_trim || def != MTRR_TYPE_UNCACHABLE)
> + return;
> +
> + /* Find highest cached pfn */
> + for (i = 0; i < num_var_ranges; i++) {
> + mtrr_if->get(i, &base, &size, &type);
> + if (type != MTRR_TYPE_WRBACK)
> + continue;
> + base <<= PAGE_SHIFT;
> + size <<= PAGE_SHIFT;
> + if (highest_addr < base + size)
> + highest_addr = base + size;
> + }
> +
> + if ((highest_addr >> PAGE_SHIFT) != end_pfn) {
> + printk(KERN_WARNING "***************\n");
> + printk(KERN_WARNING "**** WARNING: likely BIOS bug\n");
> + printk(KERN_WARNING "**** MTRRs don't cover all of "
> + "memory, trimmed %ld pages\n", end_pfn -
> + (highest_addr >> PAGE_SHIFT));
> + printk(KERN_WARNING "***************\n");
> + end_pfn = highest_addr >> PAGE_SHIFT;
> + }
> +}
>
> /**
> * mtrr_bp_init - initialize mtrrs on the boot CPU
> diff --git a/arch/i386/kernel/cpu/mtrr/mtrr.h b/arch/i386/kernel/cpu/mtrr/mtrr.h
> index 289dfe6..627b339 100644
> --- a/arch/i386/kernel/cpu/mtrr/mtrr.h
> +++ b/arch/i386/kernel/cpu/mtrr/mtrr.h
> @@ -14,6 +14,7 @@
> #define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
>
> #define NUM_FIXED_RANGES 88
> +#define MAX_VAR_RANGES 256
> #define MTRRfix64K_00000_MSR 0x250
> #define MTRRfix16K_80000_MSR 0x258
> #define MTRRfix16K_A0000_MSR 0x259
> @@ -34,6 +35,8 @@
> an 8 bit field: */
> typedef u8 mtrr_type;
>
> +extern unsigned int usage_table[MAX_VAR_RANGES];
> +
> struct mtrr_ops {
> u32 vendor;
> u32 use_intel_if;
> diff --git a/arch/x86_64/kernel/bugs.c b/arch/x86_64/kernel/bugs.c
> index c3c6b91..c138eac 100644
> --- a/arch/x86_64/kernel/bugs.c
> +++ b/arch/x86_64/kernel/bugs.c
> @@ -14,7 +14,6 @@
> void __init check_bugs(void)
> {
> identify_cpu(&boot_cpu_data);
> - mtrr_bp_init();
> #if !defined(CONFIG_SMP)
> printk("CPU: ");
> print_cpu_info(&boot_cpu_data);
> diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
> index eb6524f..409b63c 100644
> --- a/arch/x86_64/kernel/setup.c
> +++ b/arch/x86_64/kernel/setup.c
> @@ -266,6 +266,10 @@ void __init setup_arch(char **cmdline_p)
> * we are rounding upwards:
> */
> end_pfn = e820_end_of_ram();
> + /* Trim memory not covered by WB MTRRs */
> + mtrr_bp_init();
> + mtrr_trim_uncached_memory();
> +
> num_physpages = end_pfn;
>
> check_efer();
> diff --git a/include/asm-x86_64/mtrr.h b/include/asm-x86_64/mtrr.h
> index b557c48..cc62bd8 100644
> --- a/include/asm-x86_64/mtrr.h
> +++ b/include/asm-x86_64/mtrr.h
> @@ -78,6 +78,7 @@ extern int mtrr_add_page (unsigned long base, unsigned long size,
> unsigned int type, char increment);
> extern int mtrr_del (int reg, unsigned long base, unsigned long size);
> extern int mtrr_del_page (int reg, unsigned long base, unsigned long size);
> +extern void mtrr_trim_uncached_memory(void);
> # else
> static __inline__ int mtrr_add (unsigned long base, unsigned long size,
> unsigned int type, char increment)
>

2007-06-07 22:53:51

by Justin Piszcz

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

p34:/usr/src/linux# patch -p1 < ../mtrr-v2.patch
patching file Documentation/kernel-parameters.txt
patching file arch/i386/kernel/cpu/mtrr/generic.c
patching file arch/i386/kernel/cpu/mtrr/if.c
patching file arch/i386/kernel/cpu/mtrr/main.c
patching file arch/i386/kernel/cpu/mtrr/mtrr.h
patching file arch/x86_64/kernel/bugs.c
patching file arch/x86_64/kernel/setup.c
patching file include/asm-x86_64/mtrr.h
p34:/usr/src/linux#

Applies clean to 2.6.22-rc4, verifying shortly.

On Thu, 7 Jun 2007, Jesse Barnes wrote:

> On some machines, buggy BIOSes don't properly setup WB MTRRs to
> cover all available RAM, meaning the last few megs (or even gigs)
> of memory will be marked uncached. Since Linux tends to allocate
> from high memory addresses first, this causes the machine to be
> unusably slow as soon as the kernel starts really using memory
> (i.e. right around init time).
>
> This patch works around the problem by scanning the MTRRs at
> boot and figuring out whether the current end_pfn value (setup
> by early e820 code) goes beyond the highest WB MTRR range, and
> if so, trimming it to match. A fairly obnoxious KERN_WARNING
> is printed too, letting the user know that not all of their
> memory is available due to a likely BIOS bug.
>
> Something similar could be done on i386 if needed, but the boot
> ordering would be slightly different, since the MTRR code on i386
> depends on the boot_cpu_data structure being setup.
>
> This patch incorporates the feedback from Eric and Andi:
> - use MAX_VAR_RANGES instead of NUM_VAR_RANGES
> - move array declaration to header file as an extern
> - add command line disable option "disable_mtrr_trim"
> - don't run the trim code if the MTRR default type is cacheable
> - don't run the trim code on non-Intel machines
>
> Justin, feel free to test again if you have time and add your
> "Tested-by" signoff.
>
> Andi, as for large pages, do you think this is ok as is, or should
> I trim a larger granularity? If so, what granularity?
>
> Signed-off-by: Jesse Barnes <[email protected]>
>
> Thanks,
> Jesse
>
> diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
> index 5d0283c..cb728a8 100644
> --- a/Documentation/kernel-parameters.txt
> +++ b/Documentation/kernel-parameters.txt
> @@ -553,6 +553,12 @@ and is between 256 and 4096 characters. It is defined in the file
> See drivers/char/README.epca and
> Documentation/digiepca.txt.
>
> + disable_mtrr_trim [X86-64]
> + By default the kernel will trim any uncacheable
> + memory out of your available memory pool based on
> + MTRR settings. This parameter disables that behavior,
> + possibly causing your machine to run very slowly.
> +
> dmascc= [HW,AX25,SERIAL] AX.25 Z80SCC driver with DMA
> support available.
> Format: <io_dev0>[,<io_dev1>[,..<io_dev32>]]
> diff --git a/arch/i386/kernel/cpu/mtrr/generic.c b/arch/i386/kernel/cpu/mtrr/generic.c
> index c4ebb51..8eb3085 100644
> --- a/arch/i386/kernel/cpu/mtrr/generic.c
> +++ b/arch/i386/kernel/cpu/mtrr/generic.c
> @@ -13,7 +13,7 @@
> #include "mtrr.h"
>
> struct mtrr_state {
> - struct mtrr_var_range *var_ranges;
> + struct mtrr_var_range var_ranges[MAX_VAR_RANGES];
> mtrr_type fixed_ranges[NUM_FIXED_RANGES];
> unsigned char enabled;
> unsigned char have_fixed;
> @@ -84,12 +84,6 @@ void get_mtrr_state(void)
> struct mtrr_var_range *vrs;
> unsigned lo, dummy;
>
> - if (!mtrr_state.var_ranges) {
> - mtrr_state.var_ranges = kmalloc(num_var_ranges * sizeof (struct mtrr_var_range),
> - GFP_KERNEL);
> - if (!mtrr_state.var_ranges)
> - return;
> - }
> vrs = mtrr_state.var_ranges;
>
> rdmsr(MTRRcap_MSR, lo, dummy);
> diff --git a/arch/i386/kernel/cpu/mtrr/if.c b/arch/i386/kernel/cpu/mtrr/if.c
> index c7d8f17..0e34a67 100644
> --- a/arch/i386/kernel/cpu/mtrr/if.c
> +++ b/arch/i386/kernel/cpu/mtrr/if.c
> @@ -11,10 +11,6 @@
> #include <asm/mtrr.h>
> #include "mtrr.h"
>
> -/* RED-PEN: this is accessed without any locking */
> -extern unsigned int *usage_table;
> -
> -
> #define FILE_FCOUNT(f) (((struct seq_file *)((f)->private_data))->private)
>
> static const char *const mtrr_strings[MTRR_NUM_TYPES] =
> diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c
> index 7202b98..ef552ba 100644
> --- a/arch/i386/kernel/cpu/mtrr/main.c
> +++ b/arch/i386/kernel/cpu/mtrr/main.c
> @@ -38,8 +38,8 @@
> #include <linux/cpu.h>
> #include <linux/mutex.h>
>
> +#include <asm/e820.h>
> #include <asm/mtrr.h>
> -
> #include <asm/uaccess.h>
> #include <asm/processor.h>
> #include <asm/msr.h>
> @@ -47,7 +47,7 @@
>
> u32 num_var_ranges = 0;
>
> -unsigned int *usage_table;
> +unsigned int usage_table[MAX_VAR_RANGES];
> static DEFINE_MUTEX(mtrr_mutex);
>
> u64 size_or_mask, size_and_mask;
> @@ -121,11 +121,6 @@ static void __init init_table(void)
> int i, max;
>
> max = num_var_ranges;
> - if ((usage_table = kmalloc(max * sizeof *usage_table, GFP_KERNEL))
> - == NULL) {
> - printk(KERN_ERR "mtrr: could not allocate\n");
> - return;
> - }
> for (i = 0; i < max; i++)
> usage_table[i] = 1;
> }
> @@ -589,16 +584,11 @@ struct mtrr_value {
> unsigned long lsize;
> };
>
> -static struct mtrr_value * mtrr_state;
> +static struct mtrr_value mtrr_state[MAX_VAR_RANGES];
>
> static int mtrr_save(struct sys_device * sysdev, pm_message_t state)
> {
> int i;
> - int size = num_var_ranges * sizeof(struct mtrr_value);
> -
> - mtrr_state = kzalloc(size,GFP_ATOMIC);
> - if (!mtrr_state)
> - return -ENOMEM;
>
> for (i = 0; i < num_var_ranges; i++) {
> mtrr_if->get(i,
> @@ -620,7 +610,6 @@ static int mtrr_restore(struct sys_device * sysdev)
> mtrr_state[i].lsize,
> mtrr_state[i].ltype);
> }
> - kfree(mtrr_state);
> return 0;
> }
>
> @@ -631,6 +620,57 @@ static struct sysdev_driver mtrr_sysdev_driver = {
> .resume = mtrr_restore,
> };
>
> +static int disable_mtrr_trim;
> +
> +static int __init disable_mtrr_trim_setup(char *str)
> +{
> + disable_mtrr_trim = 1;
> + return 0;
> +}
> +early_param("disable_mtrr_trim", disable_mtrr_trim_setup);
> +
> +/**
> + * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
> + *
> + * Some buggy BIOSes don't setup the MTRRs properly for systems with certain
> + * memory configurations. This routine checks to make sure the MTRRs having
> + * a write back type cover all of the memory the kernel is intending to use.
> + * If not, it'll trim any memory off the end by adjusting end_pfn, removing
> + * it from the kernel's allocation pools, warning the user with an obnoxious
> + * message.
> + */
> +void __init mtrr_trim_uncached_memory(void)
> +{
> + unsigned long i, base, size, highest_addr = 0, def, dummy;
> + mtrr_type type;
> +
> + /* Make sure we only trim uncachable memory on Intel machines */
> + rdmsr(MTRRdefType_MSR, def, dummy);
> + def &= 0xff;
> + if (!use_intel() || disable_mtrr_trim || def != MTRR_TYPE_UNCACHABLE)
> + return;
> +
> + /* Find highest cached pfn */
> + for (i = 0; i < num_var_ranges; i++) {
> + mtrr_if->get(i, &base, &size, &type);
> + if (type != MTRR_TYPE_WRBACK)
> + continue;
> + base <<= PAGE_SHIFT;
> + size <<= PAGE_SHIFT;
> + if (highest_addr < base + size)
> + highest_addr = base + size;
> + }
> +
> + if ((highest_addr >> PAGE_SHIFT) != end_pfn) {
> + printk(KERN_WARNING "***************\n");
> + printk(KERN_WARNING "**** WARNING: likely BIOS bug\n");
> + printk(KERN_WARNING "**** MTRRs don't cover all of "
> + "memory, trimmed %ld pages\n", end_pfn -
> + (highest_addr >> PAGE_SHIFT));
> + printk(KERN_WARNING "***************\n");
> + end_pfn = highest_addr >> PAGE_SHIFT;
> + }
> +}
>
> /**
> * mtrr_bp_init - initialize mtrrs on the boot CPU
> diff --git a/arch/i386/kernel/cpu/mtrr/mtrr.h b/arch/i386/kernel/cpu/mtrr/mtrr.h
> index 289dfe6..627b339 100644
> --- a/arch/i386/kernel/cpu/mtrr/mtrr.h
> +++ b/arch/i386/kernel/cpu/mtrr/mtrr.h
> @@ -14,6 +14,7 @@
> #define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
>
> #define NUM_FIXED_RANGES 88
> +#define MAX_VAR_RANGES 256
> #define MTRRfix64K_00000_MSR 0x250
> #define MTRRfix16K_80000_MSR 0x258
> #define MTRRfix16K_A0000_MSR 0x259
> @@ -34,6 +35,8 @@
> an 8 bit field: */
> typedef u8 mtrr_type;
>
> +extern unsigned int usage_table[MAX_VAR_RANGES];
> +
> struct mtrr_ops {
> u32 vendor;
> u32 use_intel_if;
> diff --git a/arch/x86_64/kernel/bugs.c b/arch/x86_64/kernel/bugs.c
> index c3c6b91..c138eac 100644
> --- a/arch/x86_64/kernel/bugs.c
> +++ b/arch/x86_64/kernel/bugs.c
> @@ -14,7 +14,6 @@
> void __init check_bugs(void)
> {
> identify_cpu(&boot_cpu_data);
> - mtrr_bp_init();
> #if !defined(CONFIG_SMP)
> printk("CPU: ");
> print_cpu_info(&boot_cpu_data);
> diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
> index eb6524f..409b63c 100644
> --- a/arch/x86_64/kernel/setup.c
> +++ b/arch/x86_64/kernel/setup.c
> @@ -266,6 +266,10 @@ void __init setup_arch(char **cmdline_p)
> * we are rounding upwards:
> */
> end_pfn = e820_end_of_ram();
> + /* Trim memory not covered by WB MTRRs */
> + mtrr_bp_init();
> + mtrr_trim_uncached_memory();
> +
> num_physpages = end_pfn;
>
> check_efer();
> diff --git a/include/asm-x86_64/mtrr.h b/include/asm-x86_64/mtrr.h
> index b557c48..cc62bd8 100644
> --- a/include/asm-x86_64/mtrr.h
> +++ b/include/asm-x86_64/mtrr.h
> @@ -78,6 +78,7 @@ extern int mtrr_add_page (unsigned long base, unsigned long size,
> unsigned int type, char increment);
> extern int mtrr_del (int reg, unsigned long base, unsigned long size);
> extern int mtrr_del_page (int reg, unsigned long base, unsigned long size);
> +extern void mtrr_trim_uncached_memory(void);
> # else
> static __inline__ int mtrr_add (unsigned long base, unsigned long size,
> unsigned int type, char increment)
>

2007-06-07 23:00:57

by Justin Piszcz

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs



On Thu, 7 Jun 2007, Jesse Barnes wrote:

> On some machines, buggy BIOSes don't properly setup WB MTRRs to
> cover all available RAM, meaning the last few megs (or even gigs)
> of memory will be marked uncached. Since Linux tends to allocate
> from high memory addresses first, this causes the machine to be
> unusably slow as soon as the kernel starts really using memory
> (i.e. right around init time).
>
> This patch works around the problem by scanning the MTRRs at
> boot and figuring out whether the current end_pfn value (setup
> by early e820 code) goes beyond the highest WB MTRR range, and
> if so, trimming it to match. A fairly obnoxious KERN_WARNING
> is printed too, letting the user know that not all of their
> memory is available due to a likely BIOS bug.
>
> Something similar could be done on i386 if needed, but the boot
> ordering would be slightly different, since the MTRR code on i386
> depends on the boot_cpu_data structure being setup.
>
> This patch incorporates the feedback from Eric and Andi:
> - use MAX_VAR_RANGES instead of NUM_VAR_RANGES
> - move array declaration to header file as an extern
> - add command line disable option "disable_mtrr_trim"
> - don't run the trim code if the MTRR default type is cacheable
> - don't run the trim code on non-Intel machines
>
> Justin, feel free to test again if you have time and add your
> "Tested-by" signoff.
>
> Andi, as for large pages, do you think this is ok as is, or should
> I trim a larger granularity? If so, what granularity?
>
> Signed-off-by: Jesse Barnes <[email protected]>
>
> Thanks,
> Jesse
>

v1 of your patch:

top - 18:53:46 up 1 day, 1 min, 27 users, load average: 2.82, 1.11,
0.90
Tasks: 356 total, 7 running, 348 sleeping, 1 stopped, 0 zombie
Cpu(s): 2.2%us, 0.4%sy, 0.0%ni, 97.0%id, 0.1%wa, 0.0%hi, 0.2%si,
0.0%st
Mem: 8039576k total, 7962376k used, 77200k free, 716k buffers
Swap: 16787768k total, 128k used, 16787640k free, 6713332k cached

v2 of your patch: (dmesg also attached)

top - 18:58:59 up 2 min, 4 users, load average: 0.12, 0.13, 0.05
Tasks: 155 total, 1 running, 154 sleeping, 0 stopped, 0 zombie
Cpu(s): 2.0%us, 1.1%sy, 0.5%ni, 94.8%id, 1.5%wa, 0.0%hi, 0.0%si,
0.0%st
Mem: 8039576k total, 982192k used, 7057384k free, 1876k buffers
Swap: 16787768k total, 0k used, 16787768k free, 114492k cached

If the box has no issues over the next 8 hours with me pounding it with
backups, bzip2s etc I'll consider it good, so far it boots fine etc, no
issues, but I'll let it cook for a bit. Will update tomorrow.

Thanks,

Justin.


Attachments:
dmesg2.txt (48.42 kB)

2007-06-08 01:58:29

by Robert Hancock

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

Justin Piszcz wrote:
>> Note that your boot also mentions this:
>>
>> [ 106.449661] mtrr: no more MTRRs available
>>
>> which indicates that things like X may not be able to map the
>> framebuffer with the 'write-combine' attribute, which will hurt
>> performance. I've heard reports that turning of 'Intel QST fan
>> control' in your BIOS settings will prevent all your MTRRs from being
>> used (improperly, probably another BIOS bug) so that X will perform
>> well. But if you don't use X on this machine, you don't have to worry
>> about it. The other option would be to remap your MTRRs by hand to
>> free one up for X, you can do that by combining the last one or two
>> entries into a single MTRR using the API described in
>> Documentation/mtrr.txt before you start X.
>>
>> Jesse
>>
>
> FYI--
>
> [ 106.449661] mtrr: no more MTRRs available
>
> This has always occurred, even with mem=8832M setting.
>
> Justin.

Yes, it's another consequence of the way your BIOS configured the MTRRs
(wastefully, using more of the precious register entries than it needed
to, in addition to not covering all of the RAM).

--
Robert Hancock Saskatoon, SK, Canada
To email, remove "nospam" from [email protected]
Home Page: http://www.roberthancock.com/

2007-06-08 08:21:17

by Justin Piszcz

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs



On Thu, 7 Jun 2007, Jesse Barnes wrote:

> On some machines, buggy BIOSes don't properly setup WB MTRRs to
> cover all available RAM, meaning the last few megs (or even gigs)
> of memory will be marked uncached. Since Linux tends to allocate
> from high memory addresses first, this causes the machine to be
> unusably slow as soon as the kernel starts really using memory
> (i.e. right around init time).
>
> This patch works around the problem by scanning the MTRRs at
> boot and figuring out whether the current end_pfn value (setup
> by early e820 code) goes beyond the highest WB MTRR range, and
> if so, trimming it to match. A fairly obnoxious KERN_WARNING
> is printed too, letting the user know that not all of their
> memory is available due to a likely BIOS bug.
>
> Something similar could be done on i386 if needed, but the boot
> ordering would be slightly different, since the MTRR code on i386
> depends on the boot_cpu_data structure being setup.
>
> This patch incorporates the feedback from Eric and Andi:
> - use MAX_VAR_RANGES instead of NUM_VAR_RANGES
> - move array declaration to header file as an extern
> - add command line disable option "disable_mtrr_trim"
> - don't run the trim code if the MTRR default type is cacheable
> - don't run the trim code on non-Intel machines
>
> Justin, feel free to test again if you have time and add your
> "Tested-by" signoff.
>
> Andi, as for large pages, do you think this is ok as is, or should
> I trim a larger granularity? If so, what granularity?
>
> Signed-off-by: Jesse Barnes <[email protected]>
>
> Thanks,
> Jesse
>

Looks good, it sustained many backups, bzip2, and even some encoding
processes, no issues. Let me know if you need me to test any future
iterations of the patch, so far each has been fine, no problems to
report using the 965WH motherboard with 8GB of memory.

Signed-off-by: Justin Piszcz <[email protected]>

2007-06-12 14:50:28

by Pavel Machek

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

Hi!

> On some machines, buggy BIOSes don't properly setup WB MTRRs to
> cover all available RAM, meaning the last few megs (or even gigs)
> of memory will be marked uncached. Since Linux tends to allocate
> from high memory addresses first, this causes the machine to be
> unusably slow as soon as the kernel starts really using memory
> (i.e. right around init time).

> + if ((highest_addr >> PAGE_SHIFT) != end_pfn) {
> + printk(KERN_WARNING "***************\n");
> + printk(KERN_WARNING "**** WARNING: likely BIOS bug\n");
> + printk(KERN_WARNING "**** MTRRs don't cover all of "
> + "memory, trimmed %ld pages\n", end_pfn -
> + (highest_addr >> PAGE_SHIFT));
> + printk(KERN_WARNING "***************\n");
> + end_pfn = highest_addr >> PAGE_SHIFT;

Missing 4K of memory is not worth 4K of junk in syslog per boot. Can
you drop the stars and stop shouting?

Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

2007-06-12 15:29:30

by Jesse Barnes

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

On Tuesday, June 12, 2007 7:50:08 Pavel Machek wrote:
> Hi!
>
> > On some machines, buggy BIOSes don't properly setup WB MTRRs to
> > cover all available RAM, meaning the last few megs (or even gigs)
> > of memory will be marked uncached. Since Linux tends to allocate
> > from high memory addresses first, this causes the machine to be
> > unusably slow as soon as the kernel starts really using memory
> > (i.e. right around init time).
> >
> > + if ((highest_addr >> PAGE_SHIFT) != end_pfn) {
> > + printk(KERN_WARNING "***************\n");
> > + printk(KERN_WARNING "**** WARNING: likely BIOS bug\n");
> > + printk(KERN_WARNING "**** MTRRs don't cover all of "
> > + "memory, trimmed %ld pages\n", end_pfn -
> > + (highest_addr >> PAGE_SHIFT));
> > + printk(KERN_WARNING "***************\n");
> > + end_pfn = highest_addr >> PAGE_SHIFT;
>
> Missing 4K of memory is not worth 4K of junk in syslog per boot. Can
> you drop the stars and stop shouting?

How missing about 1G of memory? We already discussed this, and Andi and
Venki felt that either a panic or a really obnoxious message was the
way to go...

Jesse

2007-06-12 15:48:59

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

> > Missing 4K of memory is not worth 4K of junk in syslog per boot. Can
> > you drop the stars and stop shouting?
>
> How missing about 1G of memory? We already discussed this, and Andi and
> Venki felt that either a panic or a really obnoxious message was the
> way to go...

Perhaps you could vary the number of stars based on the missing amount
to satisfy Pavel @)

-Andi

2007-06-12 21:30:47

by Pavel Machek

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

Hi!

> > > On some machines, buggy BIOSes don't properly setup WB MTRRs to
> > > cover all available RAM, meaning the last few megs (or even gigs)
> > > of memory will be marked uncached. Since Linux tends to allocate
> > > from high memory addresses first, this causes the machine to be
> > > unusably slow as soon as the kernel starts really using memory
> > > (i.e. right around init time).
> > >
> > > + if ((highest_addr >> PAGE_SHIFT) != end_pfn) {
> > > + printk(KERN_WARNING "***************\n");
> > > + printk(KERN_WARNING "**** WARNING: likely BIOS bug\n");
> > > + printk(KERN_WARNING "**** MTRRs don't cover all of "
> > > + "memory, trimmed %ld pages\n", end_pfn -
> > > + (highest_addr >> PAGE_SHIFT));
> > > + printk(KERN_WARNING "***************\n");
> > > + end_pfn = highest_addr >> PAGE_SHIFT;
> >
> > Missing 4K of memory is not worth 4K of junk in syslog per boot. Can
> > you drop the stars and stop shouting?
>
> How missing about 1G of memory? We already discussed this, and Andi and
> Venki felt that either a panic or a really obnoxious message was the
> way to go...

Just use panic, then.
Pavel,
who still thinks anyone missing 1GB of ram will not miss
friendly notice in dmesg, even if it goes without 20 stars.

--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

2007-06-12 21:31:52

by Justin Piszcz

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs



On Tue, 12 Jun 2007, Pavel Machek wrote:

> Hi!
>
>>>> On some machines, buggy BIOSes don't properly setup WB MTRRs to
>>>> cover all available RAM, meaning the last few megs (or even gigs)
>>>> of memory will be marked uncached. Since Linux tends to allocate
>>>> from high memory addresses first, this causes the machine to be
>>>> unusably slow as soon as the kernel starts really using memory
>>>> (i.e. right around init time).
>>>>
>>>> + if ((highest_addr >> PAGE_SHIFT) != end_pfn) {
>>>> + printk(KERN_WARNING "***************\n");
>>>> + printk(KERN_WARNING "**** WARNING: likely BIOS bug\n");
>>>> + printk(KERN_WARNING "**** MTRRs don't cover all of "
>>>> + "memory, trimmed %ld pages\n", end_pfn -
>>>> + (highest_addr >> PAGE_SHIFT));
>>>> + printk(KERN_WARNING "***************\n");
>>>> + end_pfn = highest_addr >> PAGE_SHIFT;
>>>
>>> Missing 4K of memory is not worth 4K of junk in syslog per boot. Can
>>> you drop the stars and stop shouting?
>>
>> How missing about 1G of memory? We already discussed this, and Andi and
>> Venki felt that either a panic or a really obnoxious message was the
>> way to go...
>
> Just use panic, then.
> Pavel,
> who still thinks anyone missing 1GB of ram will not miss
> friendly notice in dmesg, even if it goes without 20 stars.
>
> --
> (english) http://www.livejournal.com/~pavelmachek
> (cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
>

What is wrong with Jesse's patch? I've been using it for quite a few days
now, no issues.

Justin.

2007-06-12 21:38:37

by Ray Lee

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

On 6/12/07, Pavel Machek <[email protected]> wrote:
> > > > On some machines, buggy BIOSes don't properly setup WB MTRRs to
> > > > cover all available RAM, meaning the last few megs (or even gigs)
> > > > of memory will be marked uncached. Since Linux tends to allocate
> > > > from high memory addresses first, this causes the machine to be
> > > > unusably slow as soon as the kernel starts really using memory
> > > > (i.e. right around init time).
> > > >
> > > > + if ((highest_addr >> PAGE_SHIFT) != end_pfn) {
> > > > + printk(KERN_WARNING "***************\n");
> > > > + printk(KERN_WARNING "**** WARNING: likely BIOS bug\n");
> > > > + printk(KERN_WARNING "**** MTRRs don't cover all of "
> > > > + "memory, trimmed %ld pages\n", end_pfn -
> > > > + (highest_addr >> PAGE_SHIFT));
> > > > + printk(KERN_WARNING "***************\n");
> > > > + end_pfn = highest_addr >> PAGE_SHIFT;
> > >
> > > Missing 4K of memory is not worth 4K of junk in syslog per boot. Can
> > > you drop the stars and stop shouting?
> >
> > How missing about 1G of memory? We already discussed this, and Andi and
> > Venki felt that either a panic or a really obnoxious message was the
> > way to go...
>
> Just use panic, then.
> Pavel,
> who still thinks anyone missing 1GB of ram will not miss
> friendly notice in dmesg, even if it goes without 20 stars.

Panicking when it's not necessary is anti-social. If the kernel can
continue, then it should, unless it's a correctness issue that may
cause data corruption. Given that the kernel can even work around the
problem now, throwing a panic is even less warranted.

Ray

2007-06-12 21:55:27

by Pavel Machek

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

On Tue 2007-06-12 14:38:28, Ray Lee wrote:
> On 6/12/07, Pavel Machek <[email protected]> wrote:
> >> > > On some machines, buggy BIOSes don't properly setup WB MTRRs to
> >> > > cover all available RAM, meaning the last few megs (or even gigs)
> >> > > of memory will be marked uncached. Since Linux tends to allocate
> >> > > from high memory addresses first, this causes the machine to be
> >> > > unusably slow as soon as the kernel starts really using memory
> >> > > (i.e. right around init time).
> >> > >
> >> > > + if ((highest_addr >> PAGE_SHIFT) != end_pfn) {
> >> > > + printk(KERN_WARNING "***************\n");
> >> > > + printk(KERN_WARNING "**** WARNING: likely BIOS bug\n");
> >> > > + printk(KERN_WARNING "**** MTRRs don't cover all of "
> >> > > + "memory, trimmed %ld pages\n", end_pfn -
> >> > > + (highest_addr >> PAGE_SHIFT));
> >> > > + printk(KERN_WARNING "***************\n");
> >> > > + end_pfn = highest_addr >> PAGE_SHIFT;
> >> >
> >> > Missing 4K of memory is not worth 4K of junk in syslog per boot. Can
> >> > you drop the stars and stop shouting?
> >>
> >> How missing about 1G of memory? We already discussed this, and Andi and
> >> Venki felt that either a panic or a really obnoxious message was the
> >> way to go...
> >
> >Just use panic, then.
> > Pavel,
> > who still thinks anyone missing 1GB of ram will not miss
> > friendly notice in dmesg, even if it goes without 20 stars.
>
> Panicking when it's not necessary is anti-social. If the kernel can
> continue, then it should, unless it's a correctness issue that may
> cause data corruption. Given that the kernel can even work around the
> problem now, throwing a panic is even less warranted.

Printk("*********************** WARNING")

is anti-social, too.

Here's what my dmesg looks. Lots of uninteresting, unneccessary crap.

Instead of removing some of that crap, this patch starts "I'm more
important than you" wars, trying to get attetion with stars. How do
you like dump below?

Come on, we have printk loglevels. They are meant for getting
attetion. Shouting printk with ton of stars is not.

Pavel

Linux version 2.6.22-rc4 (pavel@amd) (gcc version 4.1.2 20061115
(prerelease) (Debian 4.1.1-21)) #440 SMP Sat Jun 9 15:25:43 CEST 2007
...
DMI present.
ACPI: RSDP 000F6880, 0024 (r2 LENOVO)
ACPI: XSDT 7F6E6621, 0074 (r1 LENOVO TP-7B 1060 LTP 0)
ACPI: FACP 7F6E6700, 00F4 (r3 LENOVO TP-7B 1060 LNVO 1)
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!ACPI WARNING (TBFADT-0434): OPTIONAL FIELD
!!!!!!!!!!!!!!!!!!"GPE1BLOCK" HAS ZERO ADDRESS OR LENGTH: 000000000000102C/0 [20070126]
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
ACPI: DSDT 7F6E68E7, C463 (r1 LENOVO TP-7B 1060 MSFT 100000E)
ACPI: FACS 7F6F4000, 0040
ACPI: SSDT 7F6E68B4, 0033 (r1 LENOVO TP-7B 1060 MSFT 100000E)
ACPI: ECDT 7F6F2D4A, 0052 (r1 LENOVO TP-7B 1060 LNVO 1)
ACPI: TCPA 7F6F2D9C, 0032 (r2 LENOVO TP-7B 1060 LNVO 1)
ACPI: APIC 7F6F2DCE, 0068 (r1 LENOVO TP-7B 1060 LNVO 1)
ACPI: MCFG 7F6F2E36, 003E (r1 LENOVO TP-7B 1060 LNVO 1)
ACPI: HPET 7F6F2E74, 0038 (r1 LENOVO TP-7B 1060 LNVO 1)
ACPI: BOOT 7F6F2FD8, 0028 (r1 LENOVO TP-7B 1060 LTP 1)
ACPI: SSDT 7F6E5BDC, 0507 (r1 LENOVO TP-7B 1060 INTL 20050513)
ACPI: SSDT 7F6E5A04, 01D8 (r1 LENOVO TP-7B 1060 INTL 20050513)
ACPI: PM-Timer IO Port: 0x1008
ACPI: Local APIC address 0xfee00000
ACPI: LAPIC (acpi_id[0x00] lapic_id[0x00] enabled)
Processor #0 6:14 APIC version 20
ACPI: LAPIC (acpi_id[0x01] lapic_id[0x01] enabled)
Processor #1 6:14 APIC version 20
ACPI: LAPIC_NMI (acpi_id[0x00] high edge lint[0x1])
ACPI: LAPIC_NMI (acpi_id[0x01] high edge lint[0x1])
ACPI: IOAPIC (id[0x01] address[0xfec00000] gsi_base[0])
IOAPIC[0]: apic_id 1, version 32, address 0xfec00000, GSI 0-23
ACPI: INT_SRC_OVR (bus 0 bus_irq 0 global_irq 2 dfl dfl)
ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 high level)
////////////////////////////////////////////////////////////////////
/////////////////////////////ACPI: IRQ0 used by override.
///////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////
/////////////////////////////ACPI: IRQ2 used by override.
////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////
/////////////////////////////ACPI: IRQ9 used by override.
////////////////////////////////////////////////////////////////////
Enabling APIC mode: Flat. Using 1 I/O APICs
Using ACPI (MADT) for SMP configuration information
Allocating PCI resources starting at 88000000 (gap: 80000000:70000000)
Built 1 zonelists. Total pages: 517875
Kernel command line: root=/dev/sda4 resume=/dev/sda1
psmouse.psmouse_proto=imps psmouse_proto=imps psmouse.proto=imps
vga=791 init=/tmp/swsusp-init
************ Unknown boot option `psmouse.psmouse_proto=imps': ignoring
mapped APIC to ffffd000 (fee00000)
mapped IOAPIC to ffffc000 (fec00000)
Enabling fast FPU save and restore... done.
....
Total of 2 processors activated (7318.93 BogoMIPS).
ENABLING IO-APIC IRQs
..TIMER: vector=0x31 apic1=0 pin1=2 apic2=-1 pin2=-1
checking TSC synchronization [CPU#0 -> CPU#1]:
************
*
*
*
* Measured 627605 cycles TSC warp between CPUs, turning off TSC clock.
*
*
*
*
*
**********************
##########################################################################
##### Marking TSC unstable due to: check_tsc_sync_source failed. ########
########################################################################
Brought up 2 CPUs
migration_cost=10000

Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

2007-06-13 00:25:20

by Ray Lee

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

On 6/12/07, Pavel Machek <[email protected]> wrote:
> On Tue 2007-06-12 14:38:28, Ray Lee wrote:
> > Panicking when it's not necessary is anti-social. If the kernel can
> > continue, then it should, unless it's a correctness issue that may
> > cause data corruption. Given that the kernel can even work around the
> > problem now, throwing a panic is even less warranted.
>
> Printk("*********************** WARNING")
>
> is anti-social, too.

Pavel, this warning isn't even going to print on any of your systems.
So it's completely different than the straw-man you're proposing (that
I snipped).

Look, if you want to argue that the stars should go away, then sure,
I'm not going to stop you. But panicking over a BIOS misconfiguration
issue? One that can be corrected by the kernel? That's just plain
stupid.

Ray

2007-06-13 06:53:04

by Bodo Eggert

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

Jesse Barnes <[email protected]> wrote:

> On some machines, buggy BIOSes don't properly setup WB MTRRs to
> cover all available RAM, meaning the last few megs (or even gigs)
> of memory will be marked uncached. Since Linux tends to allocate
> from high memory addresses first, this causes the machine to be
> unusably slow as soon as the kernel starts really using memory
> (i.e. right around init time).
>
> This patch works around the problem by scanning the MTRRs at
> boot and figuring out whether the current end_pfn value (setup
> by early e820 code) goes beyond the highest WB MTRR range, and
> if so, trimming it to match. A fairly obnoxious KERN_WARNING
> is printed too, letting the user know that not all of their
> memory is available due to a likely BIOS bug.

Wouldn't it be better to correct the MTRR, if possible? As far as I read
here (LKML), the BIOS did not merge the entries, and this waste caused the
last part of the memory not to be covered. Off cause you can't DTRT for all
buggy MTRR setups, but if you're lucky, optionally merging the MTRR and
adding the rest of the memory may sometimes do the trick ...
--
Funny quotes:
10. Nothing is fool proof to a talented fool.

Fri?, Spammer: [email protected]

2007-06-13 08:23:19

by Pavel Machek

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

Hi!

> >> Panicking when it's not necessary is anti-social. If the kernel can
> >> continue, then it should, unless it's a correctness issue that may
> >> cause data corruption. Given that the kernel can even work around the
> >> problem now, throwing a panic is even less warranted.
> >
> >Printk("*********************** WARNING")
> >
> >is anti-social, too.
>
> Pavel, this warning isn't even going to print on any of your systems.
> So it's completely different than the straw-man you're proposing (that
> I snipped).
>
> Look, if you want to argue that the stars should go away, then sure,
> I'm not going to stop you. But panicking over a BIOS misconfiguration
> issue? One that can be corrected by the kernel? That's just plain
> stupid.

Well, either the warning is _really_ important. Then it is not really
warning, but a fatal problem, and we should panic for it (so that user
sees the message) and ask for a command line option (so we really
really know user wants to ignore that warning).

Or it is important but not _that_ important. We have
printk(KERN_EMERG) for that.

Or maybe it is not so important. We have printk(KERN_WARNING) for
that.

Pick one, but doing "KERN_WARNING" level with message
"************************* I'm extremely important warning,
************************** uhuh maybe there is something bad in your
bios but I'm not really sure" is just wrong.

Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

2007-06-13 16:20:03

by Dave Jones

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

On Wed, Jun 13, 2007 at 08:52:23AM +0200, Bodo Eggert wrote:
> Jesse Barnes <[email protected]> wrote:
>
> > On some machines, buggy BIOSes don't properly setup WB MTRRs to
> > cover all available RAM, meaning the last few megs (or even gigs)
> > of memory will be marked uncached. Since Linux tends to allocate
> > from high memory addresses first, this causes the machine to be
> > unusably slow as soon as the kernel starts really using memory
> > (i.e. right around init time).
> >
> > This patch works around the problem by scanning the MTRRs at
> > boot and figuring out whether the current end_pfn value (setup
> > by early e820 code) goes beyond the highest WB MTRR range, and
> > if so, trimming it to match. A fairly obnoxious KERN_WARNING
> > is printed too, letting the user know that not all of their
> > memory is available due to a likely BIOS bug.
>
> Wouldn't it be better to correct the MTRR, if possible? As far as I read
> here (LKML), the BIOS did not merge the entries

The size/alignment constraints of MTRRs (must be a power of 2)
means that the best-fit method of covering non power of 2 memory sizes
is the, well.. best fit. There's nothing that can be merged.

Dave

--
http://www.codemonkey.org.uk

2007-06-14 19:46:20

by Pim Zandbergen

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

Thanks for this patch. I was having the exact same symptoms as Justin
Piszcz, on a different, but similar motherboard:

Motherboard: GigaByte GA-G33-DS3R
BIOS rev: F2
Chipset: Intel G33
Memory: 8GB
Distro: Fedora 7 x86_64
Kernel: kernel-2.6.21-1.3194.fc7

Building vanilla 2.6.22-rc4 with your patch solved the problem.

I'm now seeing this in the syslog

***************
**** WARNING: likely BIOS bug
**** MTRRs don't cover all of memory, trimmed 196608 pages
***************

leaving me 7416672 kB of usable memory.

If there's any way I can help with more info or testing,
then let me know.

Thanks,
Pim

2007-06-14 20:26:17

by Justin Piszcz

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs



On Thu, 14 Jun 2007, Pim Zandbergen wrote:

> Thanks for this patch. I was having the exact same symptoms as Justin Piszcz,
> on a different, but similar motherboard:
>
> Motherboard: GigaByte GA-G33-DS3R
> BIOS rev: F2
> Chipset: Intel G33
> Memory: 8GB
> Distro: Fedora 7 x86_64
> Kernel: kernel-2.6.21-1.3194.fc7
>
> Building vanilla 2.6.22-rc4 with your patch solved the problem.
>
> I'm now seeing this in the syslog
>
> ***************
> **** WARNING: likely BIOS bug
> **** MTRRs don't cover all of memory, trimmed 196608 pages
> ***************
>
> leaving me 7416672 kB of usable memory.
>
> If there's any way I can help with more info or testing,
> then let me know.
>
> Thanks,
> Pim
>

That's strange, I guess different chipsets 'chew' up different amounts of
memory OR you have your DVT(?) (video-card memory/aperature) set to 256MB?
I have mine set to 128MB, in top:

Mem: 8039576k total, 6187304k used, 1852272k free, 696k buffers

What type of memory are you using and what is your DVT set to?

Justin.

2007-06-14 21:19:03

by Jesse Barnes

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

On Thursday, June 14, 2007 1:26:07 Justin Piszcz wrote:
> On Thu, 14 Jun 2007, Pim Zandbergen wrote:
> > Thanks for this patch. I was having the exact same symptoms as
> > Justin Piszcz, on a different, but similar motherboard:
> >
> > Motherboard: GigaByte GA-G33-DS3R
> > BIOS rev: F2
> > Chipset: Intel G33
> > Memory: 8GB
> > Distro: Fedora 7 x86_64
> > Kernel: kernel-2.6.21-1.3194.fc7
> >
> > Building vanilla 2.6.22-rc4 with your patch solved the problem.
> >
> > I'm now seeing this in the syslog
> >
> > ***************
> > **** WARNING: likely BIOS bug
> > **** MTRRs don't cover all of memory, trimmed 196608 pages
> > ***************
> >
> > leaving me 7416672 kB of usable memory.
> >
> > If there's any way I can help with more info or testing,
> > then let me know.
> >
> > Thanks,
> > Pim

Thanks for testing, Pim. Glad it works for you. Keep an eye out for
BIOS upgrades, the next version might fix it.

> That's strange, I guess different chipsets 'chew' up different
> amounts of memory OR you have your DVT(?) (video-card
> memory/aperature) set to 256MB? I have mine set to 128MB, in top:
>
> Mem: 8039576k total, 6187304k used, 1852272k free, 696k
> buffers
>
> What type of memory are you using and what is your DVT set to?

Different BIOSes will map things differently, so I'd expect differences
in the "trimmmed xxx pages" message across machines. But yeah, BIOS
config options can also affect things, in particular I've heard that
the fan control options change MTRR setup significantly.

Jesse

2007-06-14 21:21:42

by Justin Piszcz

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs



On Thu, 14 Jun 2007, Jesse Barnes wrote:

> On Thursday, June 14, 2007 1:26:07 Justin Piszcz wrote:
>> On Thu, 14 Jun 2007, Pim Zandbergen wrote:
>>> Thanks for this patch. I was having the exact same symptoms as
>>> Justin Piszcz, on a different, but similar motherboard:
>>>
>>> Motherboard: GigaByte GA-G33-DS3R
>>> BIOS rev: F2
>>> Chipset: Intel G33
>>> Memory: 8GB
>>> Distro: Fedora 7 x86_64
>>> Kernel: kernel-2.6.21-1.3194.fc7
>>>
>>> Building vanilla 2.6.22-rc4 with your patch solved the problem.
>>>
>>> I'm now seeing this in the syslog
>>>
>>> ***************
>>> **** WARNING: likely BIOS bug
>>> **** MTRRs don't cover all of memory, trimmed 196608 pages
>>> ***************
>>>
>>> leaving me 7416672 kB of usable memory.
>>>
>>> If there's any way I can help with more info or testing,
>>> then let me know.
>>>
>>> Thanks,
>>> Pim
>
> Thanks for testing, Pim. Glad it works for you. Keep an eye out for
> BIOS upgrades, the next version might fix it.
>
>> That's strange, I guess different chipsets 'chew' up different
>> amounts of memory OR you have your DVT(?) (video-card
>> memory/aperature) set to 256MB? I have mine set to 128MB, in top:
>>
>> Mem: 8039576k total, 6187304k used, 1852272k free, 696k
>> buffers
>>
>> What type of memory are you using and what is your DVT set to?
>
> Different BIOSes will map things differently, so I'd expect differences
> in the "trimmmed xxx pages" message across machines. But yeah, BIOS
> config options can also affect things, in particular I've heard that
> the fan control options change MTRR setup significantly.
>
> Jesse
>

To Intel,

When will HECI be supported via the kernel? When it becomes supported,
would that alter the MTRR map at all?

Justin.

2007-06-14 21:26:49

by Jesse Barnes

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

On Thursday, June 14, 2007 2:21:16 Justin Piszcz wrote:
> To Intel,
>
> When will HECI be supported via the kernel? When it becomes
> supported, would that alter the MTRR map at all?

I *think* HECI is related to our IT remote management stuff, but I don't
work on it. It *may* affect the MTRR mappings, but I think it's just a
PCI device, so I don't think enabling it will change the MTRR layout.

Arjan, do you know who's doing our AMT stuff?

Thanks,
Jesse

2007-06-15 10:17:34

by Pim Zandbergen

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

Justin Piszcz wrote:
> That's strange, I guess different chipsets 'chew' up different amounts
> of memory OR you have your DVT(?) (video-card memory/aperature) set to
> 256MB? I have mine set to 128MB, in top:
>
> Mem: 8039576k total, 6187304k used, 1852272k free, 696k buffers
Me:
Mem: 7416672k total, 378988k used, 7037684k free, 13592k buffers

> What type of memory are you using
2x Kingston KVR667D2N5K2/4G
> and what is your DVT set to?
GigaByte's BIOS config options of the onboard graphics controller are
very limited compared to those on your Intel motherboard.

I can only choose the graphics buffer size, between
"1MB+1~2MB for GTT" or "8MB+1~2MB for GTT".
I chose the latter. The POST says 9MB are taken for video.

Not that it matters much, as the current i810/intel xorg driver does
not yet support the GMA3100, so I'm using the vesa driver.

Thanks,
Pim

2007-06-15 10:21:38

by Pim Zandbergen

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

Jesse Barnes wrote:

> Thanks for testing, Pim. Glad it works for you.
The pleasure was all on my side.

> Keep an eye out for BIOS upgrades, the next version might fix it.
>

What, are you going to report this to GigaByte?

Thanks,
Pim

2007-06-15 10:35:05

by Justin Piszcz

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs



On Fri, 15 Jun 2007, Pim Zandbergen wrote:

> Justin Piszcz wrote:
>> That's strange, I guess different chipsets 'chew' up different amounts of
>> memory OR you have your DVT(?) (video-card memory/aperature) set to 256MB?
>> I have mine set to 128MB, in top:
>>
>> Mem: 8039576k total, 6187304k used, 1852272k free, 696k buffers
> Me:
> Mem: 7416672k total, 378988k used, 7037684k free, 13592k buffers
>
>> What type of memory are you using
> 2x Kingston KVR667D2N5K2/4G
>> and what is your DVT set to?
> GigaByte's BIOS config options of the onboard graphics controller are
> very limited compared to those on your Intel motherboard.
>
> I can only choose the graphics buffer size, between
> "1MB+1~2MB for GTT" or "8MB+1~2MB for GTT".
> I chose the latter. The POST says 9MB are taken for video.
>
> Not that it matters much, as the current i810/intel xorg driver does
> not yet support the GMA3100, so I'm using the vesa driver.
>
> Thanks,
> Pim
>

I use the exact same memory model. So it must be the difference in BIOS
MTRR/E820 memory mappings.

Justin.

2007-06-15 16:20:52

by Jesse Barnes

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

On Friday, June 15, 2007 3:21:17 Pim Zandbergen wrote:
> Jesse Barnes wrote:
> > Thanks for testing, Pim. Glad it works for you.
>
> The pleasure was all on my side.
>
> > Keep an eye out for BIOS upgrades, the next version might fix it.
>
> What, are you going to report this to GigaByte?

No, but you should if you haven't already. I think GigaByte probably
gets its BIOS from another BIOS vendor (maybe Intel), so when that
vendor provides them with an update, they'll probably provide it on
their website. And from what I understand, an Intel BIOS update is in
the works to address this issue for Intel boards, so a GigaByte version
may follow shortly. I don't have an exact timeframe though...

Jesse

2007-06-15 17:29:10

by Jesse Barnes

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

On Friday, June 15, 2007 3:17:11 Pim Zandbergen wrote:
> Not that it matters much, as the current i810/intel xorg driver does
> not yet support the GMA3100, so I'm using the vesa driver.

I *think* the latest trees support that chip. If you're feeling brave,
checkout the latest version of the xf86-video-intel driver from
freedesktop.org and give it a try (to get 3d you'll also need newer DRM
and AGP bits).

Jesse

2007-06-20 13:55:36

by Pim Zandbergen

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

Jesse Barnes wrote:
> On Friday, June 15, 2007 3:17:11 Pim Zandbergen wrote:
>
>> Not that it matters much, as the current i810/intel xorg driver does
>> not yet support the GMA3100, so I'm using the vesa driver.
>>
>
> I *think* the latest trees support that chip. If you're feeling brave,
> checkout the latest version of the xf86-video-intel driver from
> freedesktop.org and give it a try
As it happens, the Fedora guys just released a new Intel xorg driver RPM
that supports the G33. It works.
> (to get 3d you'll also need newer DRM and AGP bits).
>
No 3D indeed. The newer DRM and AGP bits probably go into the kernel.

But hey, it's a server. I've got another machine with the exact same
hardware
running Windows 2003. No video driver support there at all.

Thanks,
Pim

2007-06-21 14:25:11

by Pim Zandbergen

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

Jesse Barnes wrote:
>> What, are you going to report this to GigaByte?
>>
>
> No, but you should if you haven't already. I think GigaByte probably
> gets its BIOS from another BIOS vendor (maybe Intel), so when that
> vendor provides them with an update, they'll probably provide it on
> their website. And from what I understand, an Intel BIOS update is in
> the works to address this issue for Intel boards, so a GigaByte version
> may follow shortly. I don't have an exact timeframe though...
>
>
I reported this to GigaByte, and lo and behold, they sent me a fixed
BIOS within 48 hours.
Kudos to Taipeh!

They sent the BIOS image in a private message, so it might take a while
before it's available
on their website.


2007-06-21 14:28:46

by Justin Piszcz

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs



On Thu, 21 Jun 2007, Pim Zandbergen wrote:

> Jesse Barnes wrote:
>>> What, are you going to report this to GigaByte?
>>>
>>
>> No, but you should if you haven't already. I think GigaByte probably gets
>> its BIOS from another BIOS vendor (maybe Intel), so when that vendor
>> provides them with an update, they'll probably provide it on their website.
>> And from what I understand, an Intel BIOS update is in the works to address
>> this issue for Intel boards, so a GigaByte version may follow shortly. I
>> don't have an exact timeframe though...
>>
>>
> I reported this to GigaByte, and lo and behold, they sent me a fixed BIOS
> within 48 hours.
> Kudos to Taipeh!
>
> They sent the BIOS image in a private message, so it might take a while
> before it's available
> on their website.
>
>

Wow, totally the opposite from Intel.

Justin.

2007-06-21 19:41:16

by Yinghai Lu

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

On 6/7/07, Jesse Barnes <[email protected]> wrote:
> On some machines, buggy BIOSes don't properly setup WB MTRRs to
> cover all available RAM, meaning the last few megs (or even gigs)
> of memory will be marked uncached. Since Linux tends to allocate
> from high memory addresses first, this causes the machine to be
> unusably slow as soon as the kernel starts really using memory
> (i.e. right around init time).
>
> This patch works around the problem by scanning the MTRRs at
> boot and figuring out whether the current end_pfn value (setup
> by early e820 code) goes beyond the highest WB MTRR range, and
> if so, trimming it to match. A fairly obnoxious KERN_WARNING
> is printed too, letting the user know that not all of their
> memory is available due to a likely BIOS bug.
>
> Something similar could be done on i386 if needed, but the boot
> ordering would be slightly different, since the MTRR code on i386
> depends on the boot_cpu_data structure being setup.
>
> This patch incorporates the feedback from Eric and Andi:
> - use MAX_VAR_RANGES instead of NUM_VAR_RANGES
> - move array declaration to header file as an extern
> - add command line disable option "disable_mtrr_trim"
> - don't run the trim code if the MTRR default type is cacheable
> - don't run the trim code on non-Intel machines
>
> Justin, feel free to test again if you have time and add your
> "Tested-by" signoff.
>
> Andi, as for large pages, do you think this is ok as is, or should
> I trim a larger granularity? If so, what granularity?
>
> Signed-off-by: Jesse Barnes <[email protected]>
>
> Thanks,
> Jesse
>

NAK.

for AMD Rev F Opteron later CPU, BIOS will not set WB in MTRR for 4G above mem.

This patch will get rid of those RAM.

YH

2007-06-21 19:57:01

by Jesse Barnes

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

On Thursday, June 21, 2007 12:40:58 Yinghai Lu wrote:
> On 6/7/07, Jesse Barnes <[email protected]> wrote:
> > On some machines, buggy BIOSes don't properly setup WB MTRRs to
> > cover all available RAM, meaning the last few megs (or even gigs)
> > of memory will be marked uncached. Since Linux tends to allocate
> > from high memory addresses first, this causes the machine to be
> > unusably slow as soon as the kernel starts really using memory
> > (i.e. right around init time).
> >
> > This patch works around the problem by scanning the MTRRs at
> > boot and figuring out whether the current end_pfn value (setup
> > by early e820 code) goes beyond the highest WB MTRR range, and
> > if so, trimming it to match. A fairly obnoxious KERN_WARNING
> > is printed too, letting the user know that not all of their
> > memory is available due to a likely BIOS bug.
> >
> > Something similar could be done on i386 if needed, but the boot
> > ordering would be slightly different, since the MTRR code on i386
> > depends on the boot_cpu_data structure being setup.
> >
> > This patch incorporates the feedback from Eric and Andi:
> > - use MAX_VAR_RANGES instead of NUM_VAR_RANGES
> > - move array declaration to header file as an extern
> > - add command line disable option "disable_mtrr_trim"
> > - don't run the trim code if the MTRR default type is cacheable
> > - don't run the trim code on non-Intel machines
> >
> > Justin, feel free to test again if you have time and add your
> > "Tested-by" signoff.
> >
> > Andi, as for large pages, do you think this is ok as is, or should
> > I trim a larger granularity? If so, what granularity?
> >
> > Signed-off-by: Jesse Barnes <[email protected]>
> >
> > Thanks,
> > Jesse
>
> NAK.
>
> for AMD Rev F Opteron later CPU, BIOS will not set WB in MTRR for 4G
> above mem.
>
> This patch will get rid of those RAM.

Yeah, Eric already mentioned that. I'll rework it to only run on Intel
CPUs per Eric's last mail.

Thanks,
Jesse

2007-06-25 16:31:58

by Pim Zandbergen

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

Pim Zandbergen wrote
>>
> I reported this to GigaByte, and lo and behold, they sent me a fixed
> BIOS within 48 hours.
> Kudos to Taipeh!
>
> They sent the BIOS image in a private message, so it might take a
> while before it's available
> on their website.
It is now, and it is described as "Fix Vista boot lag with 8GB memory
issue" ...

Pim

2007-06-25 16:34:17

by Justin Piszcz

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

Impressive.

Jesse, can you touch base with Intel's BIOS department? Also, what are
the chances of that patch making it into 2.6.22-rc6/7 if it hasn't
already?

On Mon, 25 Jun 2007, Pim Zandbergen wrote:

> Pim Zandbergen wrote
>>>
>> I reported this to GigaByte, and lo and behold, they sent me a fixed BIOS
>> within 48 hours.
>> Kudos to Taipeh!
>>
>> They sent the BIOS image in a private message, so it might take a while
>> before it's available
>> on their website.
> It is now, and it is described as "Fix Vista boot lag with 8GB memory issue"
> ...
>
> Pim
>