2007-06-25 21:35:36

by Jesse Barnes

[permalink] [raw]
Subject: [PATCH] trim memory not covered by WB MTRRs

On some machines, buggy BIOSes don't properly setup WB MTRRs to
cover all available RAM, meaning the last few megs (or even gigs)
of memory will be marked uncached. Since Linux tends to allocate
from high memory addresses first, this causes the machine to be
unusably slow as soon as the kernel starts really using memory
(i.e. right around init time).

This patch works around the problem by scanning the MTRRs at
boot and figuring out whether the current end_pfn value (setup
by early e820 code) goes beyond the highest WB MTRR range, and
if so, trimming it to match. A fairly obnoxious KERN_WARNING
is printed too, letting the user know that not all of their
memory is available due to a likely BIOS bug.

Something similar could be done on i386 if needed, but the boot
ordering would be slightly different, since the MTRR code on i386
depends on the boot_cpu_data structure being setup.

This patch fixes a bug in the last patch that caused the code to
run on non-Intel machines (AMD machines apparently don't need it
and it's untested on other non-Intel machines, so best keep it
off).

akpm -- this one should replace all the mtrr patches currently
in your tree.

Yinghai, maybe you can test this on one of your AMD machines to
make sure I got the CPU code right?

Signed-off-by: Jesse Barnes <[email protected]>

Thanks,
Jesse

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 5d0283c..642db9b 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -553,6 +553,12 @@ and is between 256 and 4096 characters. It is defined in the file
See drivers/char/README.epca and
Documentation/digiepca.txt.

+ disable_mtrr_trim [X86-64, Intel only]
+ By default the kernel will trim any uncacheable
+ memory out of your available memory pool based on
+ MTRR settings. This parameter disables that behavior,
+ possibly causing your machine to run very slowly.
+
dmascc= [HW,AX25,SERIAL] AX.25 Z80SCC driver with DMA
support available.
Format: <io_dev0>[,<io_dev1>[,..<io_dev32>]]
diff --git a/arch/i386/kernel/cpu/mtrr/generic.c b/arch/i386/kernel/cpu/mtrr/generic.c
index 6d59378..bf70d2d 100644
--- a/arch/i386/kernel/cpu/mtrr/generic.c
+++ b/arch/i386/kernel/cpu/mtrr/generic.c
@@ -13,7 +13,7 @@
#include "mtrr.h"

struct mtrr_state {
- struct mtrr_var_range *var_ranges;
+ struct mtrr_var_range var_ranges[MAX_VAR_RANGES];
mtrr_type fixed_ranges[NUM_FIXED_RANGES];
unsigned char enabled;
unsigned char have_fixed;
@@ -84,12 +84,6 @@ void get_mtrr_state(void)
struct mtrr_var_range *vrs;
unsigned lo, dummy;

- if (!mtrr_state.var_ranges) {
- mtrr_state.var_ranges = kmalloc(num_var_ranges * sizeof (struct mtrr_var_range),
- GFP_KERNEL);
- if (!mtrr_state.var_ranges)
- return;
- }
vrs = mtrr_state.var_ranges;

rdmsr(MTRRcap_MSR, lo, dummy);
diff --git a/arch/i386/kernel/cpu/mtrr/if.c b/arch/i386/kernel/cpu/mtrr/if.c
index c7d8f17..1b3a09c 100644
--- a/arch/i386/kernel/cpu/mtrr/if.c
+++ b/arch/i386/kernel/cpu/mtrr/if.c
@@ -11,10 +11,6 @@
#include <asm/mtrr.h>
#include "mtrr.h"

-/* RED-PEN: this is accessed without any locking */
-extern unsigned int *usage_table;
-
-
#define FILE_FCOUNT(f) (((struct seq_file *)((f)->private_data))->private)

static const char *const mtrr_strings[MTRR_NUM_TYPES] =
@@ -396,7 +392,7 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset)
for (i = 0; i < max; i++) {
mtrr_if->get(i, &base, &size, &type);
if (size == 0)
- usage_table[i] = 0;
+ mtrr_usage_table[i] = 0;
else {
if (size < (0x100000 >> PAGE_SHIFT)) {
/* less than 1MB */
@@ -410,7 +406,7 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset)
len += seq_printf(seq,
"reg%02i: base=0x%05lx000 (%4luMB), size=%4lu%cB: %s, count=%d\n",
i, base, base >> (20 - PAGE_SHIFT), size, factor,
- mtrr_attrib_to_str(type), usage_table[i]);
+ mtrr_attrib_to_str(type), mtrr_usage_table[i]);
}
}
return 0;
diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c
index 55b0051..0e4be8b 100644
--- a/arch/i386/kernel/cpu/mtrr/main.c
+++ b/arch/i386/kernel/cpu/mtrr/main.c
@@ -38,8 +38,8 @@
#include <linux/cpu.h>
#include <linux/mutex.h>

+#include <asm/e820.h>
#include <asm/mtrr.h>
-
#include <asm/uaccess.h>
#include <asm/processor.h>
#include <asm/msr.h>
@@ -47,7 +47,7 @@

u32 num_var_ranges = 0;

-unsigned int *usage_table;
+unsigned int mtrr_usage_table[MAX_VAR_RANGES];
static DEFINE_MUTEX(mtrr_mutex);

u64 size_or_mask, size_and_mask;
@@ -121,13 +121,8 @@ static void __init init_table(void)
int i, max;

max = num_var_ranges;
- if ((usage_table = kmalloc(max * sizeof *usage_table, GFP_KERNEL))
- == NULL) {
- printk(KERN_ERR "mtrr: could not allocate\n");
- return;
- }
for (i = 0; i < max; i++)
- usage_table[i] = 1;
+ mtrr_usage_table[i] = 1;
}

struct set_mtrr_data {
@@ -381,7 +376,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
goto out;
}
if (increment)
- ++usage_table[i];
+ ++mtrr_usage_table[i];
error = i;
goto out;
}
@@ -390,12 +385,13 @@ int mtrr_add_page(unsigned long base, unsigned long size,
if (i >= 0) {
set_mtrr(i, base, size, type);
if (likely(replace < 0))
- usage_table[i] = 1;
+ mtrr_usage_table[i] = 1;
else {
- usage_table[i] = usage_table[replace] + !!increment;
+ mtrr_usage_table[i] = mtrr_usage_table[replace] +
+ !!increment;
if (unlikely(replace != i)) {
set_mtrr(replace, 0, 0, 0);
- usage_table[replace] = 0;
+ mtrr_usage_table[replace] = 0;
}
}
} else
@@ -525,11 +521,11 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
printk(KERN_WARNING "mtrr: MTRR %d not used\n", reg);
goto out;
}
- if (usage_table[reg] < 1) {
+ if (mtrr_usage_table[reg] < 1) {
printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg);
goto out;
}
- if (--usage_table[reg] < 1)
+ if (--mtrr_usage_table[reg] < 1)
set_mtrr(reg, 0, 0, 0);
error = reg;
out:
@@ -589,16 +585,11 @@ struct mtrr_value {
unsigned long lsize;
};

-static struct mtrr_value * mtrr_state;
+static struct mtrr_value mtrr_state[MAX_VAR_RANGES];

static int mtrr_save(struct sys_device * sysdev, pm_message_t state)
{
int i;
- int size = num_var_ranges * sizeof(struct mtrr_value);
-
- mtrr_state = kzalloc(size,GFP_ATOMIC);
- if (!mtrr_state)
- return -ENOMEM;

for (i = 0; i < num_var_ranges; i++) {
mtrr_if->get(i,
@@ -620,7 +611,6 @@ static int mtrr_restore(struct sys_device * sysdev)
mtrr_state[i].lsize,
mtrr_state[i].ltype);
}
- kfree(mtrr_state);
return 0;
}

@@ -631,6 +621,59 @@ static struct sysdev_driver mtrr_sysdev_driver = {
.resume = mtrr_restore,
};

+static int disable_mtrr_trim;
+
+static int __init disable_mtrr_trim_setup(char *str)
+{
+ disable_mtrr_trim = 1;
+ return 0;
+}
+early_param("disable_mtrr_trim", disable_mtrr_trim_setup);
+
+#ifdef CONFIG_X86_64
+/**
+ * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
+ *
+ * Some buggy BIOSes don't setup the MTRRs properly for systems with certain
+ * memory configurations. This routine checks to make sure the MTRRs having
+ * a write back type cover all of the memory the kernel is intending to use.
+ * If not, it'll trim any memory off the end by adjusting end_pfn, removing
+ * it from the kernel's allocation pools, warning the user with an obnoxious
+ * message.
+ */
+void __init mtrr_trim_uncached_memory(void)
+{
+ unsigned long i, base, size, highest_addr = 0, def, dummy;
+ mtrr_type type;
+
+ /* Make sure we only trim uncachable memory on Intel machines */
+ rdmsr(MTRRdefType_MSR, def, dummy);
+ def &= 0xff;
+ if (!is_cpu(INTEL) || disable_mtrr_trim || def != MTRR_TYPE_UNCACHABLE)
+ return;
+
+ /* Find highest cached pfn */
+ for (i = 0; i < num_var_ranges; i++) {
+ mtrr_if->get(i, &base, &size, &type);
+ if (type != MTRR_TYPE_WRBACK)
+ continue;
+ base <<= PAGE_SHIFT;
+ size <<= PAGE_SHIFT;
+ if (highest_addr < base + size)
+ highest_addr = base + size;
+ }
+
+ if ((highest_addr >> PAGE_SHIFT) != end_pfn) {
+ printk(KERN_WARNING "***************\n");
+ printk(KERN_WARNING "**** WARNING: likely BIOS bug\n");
+ printk(KERN_WARNING "**** MTRRs don't cover all of "
+ "memory, trimmed %ld pages\n", end_pfn -
+ (highest_addr >> PAGE_SHIFT));
+ printk(KERN_WARNING "***************\n");
+ end_pfn = highest_addr >> PAGE_SHIFT;
+ }
+}
+#endif

/**
* mtrr_bp_init - initialize mtrrs on the boot CPU
diff --git a/arch/i386/kernel/cpu/mtrr/mtrr.h b/arch/i386/kernel/cpu/mtrr/mtrr.h
index 289dfe6..14fb88b 100644
--- a/arch/i386/kernel/cpu/mtrr/mtrr.h
+++ b/arch/i386/kernel/cpu/mtrr/mtrr.h
@@ -14,6 +14,7 @@
#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)

#define NUM_FIXED_RANGES 88
+#define MAX_VAR_RANGES 256
#define MTRRfix64K_00000_MSR 0x250
#define MTRRfix16K_80000_MSR 0x258
#define MTRRfix16K_A0000_MSR 0x259
@@ -34,6 +35,8 @@
an 8 bit field: */
typedef u8 mtrr_type;

+extern unsigned int mtrr_usage_table[MAX_VAR_RANGES];
+
struct mtrr_ops {
u32 vendor;
u32 use_intel_if;
diff --git a/arch/x86_64/kernel/bugs.c b/arch/x86_64/kernel/bugs.c
index c3c6b91..c138eac 100644
--- a/arch/x86_64/kernel/bugs.c
+++ b/arch/x86_64/kernel/bugs.c
@@ -14,7 +14,6 @@
void __init check_bugs(void)
{
identify_cpu(&boot_cpu_data);
- mtrr_bp_init();
#if !defined(CONFIG_SMP)
printk("CPU: ");
print_cpu_info(&boot_cpu_data);
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index eb6524f..409b63c 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -266,6 +266,10 @@ void __init setup_arch(char **cmdline_p)
* we are rounding upwards:
*/
end_pfn = e820_end_of_ram();
+ /* Trim memory not covered by WB MTRRs */
+ mtrr_bp_init();
+ mtrr_trim_uncached_memory();
+
num_physpages = end_pfn;

check_efer();
diff --git a/include/asm-x86_64/mtrr.h b/include/asm-x86_64/mtrr.h
index b557c48..51ad788 100644
--- a/include/asm-x86_64/mtrr.h
+++ b/include/asm-x86_64/mtrr.h
@@ -78,6 +78,7 @@ extern int mtrr_add_page (unsigned long base, unsigned long size,
unsigned int type, char increment);
extern int mtrr_del (int reg, unsigned long base, unsigned long size);
extern int mtrr_del_page (int reg, unsigned long base, unsigned long size);
+extern void mtrr_trim_uncached_memory(void);
# else
static __inline__ int mtrr_add (unsigned long base, unsigned long size,
unsigned int type, char increment)
@@ -99,7 +100,9 @@ static __inline__ int mtrr_del_page (int reg, unsigned long base,
{
return -ENODEV;
}
-
+static __inline__ void mtrr_trim_uncached_memory(void)
+{
+}
#endif /* CONFIG_MTRR */

#ifdef CONFIG_COMPAT


2007-06-25 21:45:55

by Justin Piszcz

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

Will try this patch shortly w/ 2.6.22-rc6.

p34:/usr/src/linux-2.6.22-rc6# patch -p1 < ../mtrr-v3.patch
patching file Documentation/kernel-parameters.txt
patching file arch/i386/kernel/cpu/mtrr/generic.c
patching file arch/i386/kernel/cpu/mtrr/if.c
patching file arch/i386/kernel/cpu/mtrr/main.c
patching file arch/i386/kernel/cpu/mtrr/mtrr.h
patching file arch/x86_64/kernel/bugs.c
patching file arch/x86_64/kernel/setup.c
patching file include/asm-x86_64/mtrr.h
p34:/usr/src/linux-2.6.22-rc6#

So far so good.

On Mon, 25 Jun 2007, Jesse Barnes wrote:

> On some machines, buggy BIOSes don't properly setup WB MTRRs to
> cover all available RAM, meaning the last few megs (or even gigs)
> of memory will be marked uncached. Since Linux tends to allocate
> from high memory addresses first, this causes the machine to be
> unusably slow as soon as the kernel starts really using memory
> (i.e. right around init time).
>
> This patch works around the problem by scanning the MTRRs at
> boot and figuring out whether the current end_pfn value (setup
> by early e820 code) goes beyond the highest WB MTRR range, and
> if so, trimming it to match. A fairly obnoxious KERN_WARNING
> is printed too, letting the user know that not all of their
> memory is available due to a likely BIOS bug.
>
> Something similar could be done on i386 if needed, but the boot
> ordering would be slightly different, since the MTRR code on i386
> depends on the boot_cpu_data structure being setup.
>
> This patch fixes a bug in the last patch that caused the code to
> run on non-Intel machines (AMD machines apparently don't need it
> and it's untested on other non-Intel machines, so best keep it
> off).
>
> akpm -- this one should replace all the mtrr patches currently
> in your tree.
>
> Yinghai, maybe you can test this on one of your AMD machines to
> make sure I got the CPU code right?
>
> Signed-off-by: Jesse Barnes <[email protected]>
>
> Thanks,
> Jesse
>
> diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
> index 5d0283c..642db9b 100644
> --- a/Documentation/kernel-parameters.txt
> +++ b/Documentation/kernel-parameters.txt
> @@ -553,6 +553,12 @@ and is between 256 and 4096 characters. It is defined in the file
> See drivers/char/README.epca and
> Documentation/digiepca.txt.
>
> + disable_mtrr_trim [X86-64, Intel only]
> + By default the kernel will trim any uncacheable
> + memory out of your available memory pool based on
> + MTRR settings. This parameter disables that behavior,
> + possibly causing your machine to run very slowly.
> +
> dmascc= [HW,AX25,SERIAL] AX.25 Z80SCC driver with DMA
> support available.
> Format: <io_dev0>[,<io_dev1>[,..<io_dev32>]]
> diff --git a/arch/i386/kernel/cpu/mtrr/generic.c b/arch/i386/kernel/cpu/mtrr/generic.c
> index 6d59378..bf70d2d 100644
> --- a/arch/i386/kernel/cpu/mtrr/generic.c
> +++ b/arch/i386/kernel/cpu/mtrr/generic.c
> @@ -13,7 +13,7 @@
> #include "mtrr.h"
>
> struct mtrr_state {
> - struct mtrr_var_range *var_ranges;
> + struct mtrr_var_range var_ranges[MAX_VAR_RANGES];
> mtrr_type fixed_ranges[NUM_FIXED_RANGES];
> unsigned char enabled;
> unsigned char have_fixed;
> @@ -84,12 +84,6 @@ void get_mtrr_state(void)
> struct mtrr_var_range *vrs;
> unsigned lo, dummy;
>
> - if (!mtrr_state.var_ranges) {
> - mtrr_state.var_ranges = kmalloc(num_var_ranges * sizeof (struct mtrr_var_range),
> - GFP_KERNEL);
> - if (!mtrr_state.var_ranges)
> - return;
> - }
> vrs = mtrr_state.var_ranges;
>
> rdmsr(MTRRcap_MSR, lo, dummy);
> diff --git a/arch/i386/kernel/cpu/mtrr/if.c b/arch/i386/kernel/cpu/mtrr/if.c
> index c7d8f17..1b3a09c 100644
> --- a/arch/i386/kernel/cpu/mtrr/if.c
> +++ b/arch/i386/kernel/cpu/mtrr/if.c
> @@ -11,10 +11,6 @@
> #include <asm/mtrr.h>
> #include "mtrr.h"
>
> -/* RED-PEN: this is accessed without any locking */
> -extern unsigned int *usage_table;
> -
> -
> #define FILE_FCOUNT(f) (((struct seq_file *)((f)->private_data))->private)
>
> static const char *const mtrr_strings[MTRR_NUM_TYPES] =
> @@ -396,7 +392,7 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset)
> for (i = 0; i < max; i++) {
> mtrr_if->get(i, &base, &size, &type);
> if (size == 0)
> - usage_table[i] = 0;
> + mtrr_usage_table[i] = 0;
> else {
> if (size < (0x100000 >> PAGE_SHIFT)) {
> /* less than 1MB */
> @@ -410,7 +406,7 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset)
> len += seq_printf(seq,
> "reg%02i: base=0x%05lx000 (%4luMB), size=%4lu%cB: %s, count=%d\n",
> i, base, base >> (20 - PAGE_SHIFT), size, factor,
> - mtrr_attrib_to_str(type), usage_table[i]);
> + mtrr_attrib_to_str(type), mtrr_usage_table[i]);
> }
> }
> return 0;
> diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c
> index 55b0051..0e4be8b 100644
> --- a/arch/i386/kernel/cpu/mtrr/main.c
> +++ b/arch/i386/kernel/cpu/mtrr/main.c
> @@ -38,8 +38,8 @@
> #include <linux/cpu.h>
> #include <linux/mutex.h>
>
> +#include <asm/e820.h>
> #include <asm/mtrr.h>
> -
> #include <asm/uaccess.h>
> #include <asm/processor.h>
> #include <asm/msr.h>
> @@ -47,7 +47,7 @@
>
> u32 num_var_ranges = 0;
>
> -unsigned int *usage_table;
> +unsigned int mtrr_usage_table[MAX_VAR_RANGES];
> static DEFINE_MUTEX(mtrr_mutex);
>
> u64 size_or_mask, size_and_mask;
> @@ -121,13 +121,8 @@ static void __init init_table(void)
> int i, max;
>
> max = num_var_ranges;
> - if ((usage_table = kmalloc(max * sizeof *usage_table, GFP_KERNEL))
> - == NULL) {
> - printk(KERN_ERR "mtrr: could not allocate\n");
> - return;
> - }
> for (i = 0; i < max; i++)
> - usage_table[i] = 1;
> + mtrr_usage_table[i] = 1;
> }
>
> struct set_mtrr_data {
> @@ -381,7 +376,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
> goto out;
> }
> if (increment)
> - ++usage_table[i];
> + ++mtrr_usage_table[i];
> error = i;
> goto out;
> }
> @@ -390,12 +385,13 @@ int mtrr_add_page(unsigned long base, unsigned long size,
> if (i >= 0) {
> set_mtrr(i, base, size, type);
> if (likely(replace < 0))
> - usage_table[i] = 1;
> + mtrr_usage_table[i] = 1;
> else {
> - usage_table[i] = usage_table[replace] + !!increment;
> + mtrr_usage_table[i] = mtrr_usage_table[replace] +
> + !!increment;
> if (unlikely(replace != i)) {
> set_mtrr(replace, 0, 0, 0);
> - usage_table[replace] = 0;
> + mtrr_usage_table[replace] = 0;
> }
> }
> } else
> @@ -525,11 +521,11 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
> printk(KERN_WARNING "mtrr: MTRR %d not used\n", reg);
> goto out;
> }
> - if (usage_table[reg] < 1) {
> + if (mtrr_usage_table[reg] < 1) {
> printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg);
> goto out;
> }
> - if (--usage_table[reg] < 1)
> + if (--mtrr_usage_table[reg] < 1)
> set_mtrr(reg, 0, 0, 0);
> error = reg;
> out:
> @@ -589,16 +585,11 @@ struct mtrr_value {
> unsigned long lsize;
> };
>
> -static struct mtrr_value * mtrr_state;
> +static struct mtrr_value mtrr_state[MAX_VAR_RANGES];
>
> static int mtrr_save(struct sys_device * sysdev, pm_message_t state)
> {
> int i;
> - int size = num_var_ranges * sizeof(struct mtrr_value);
> -
> - mtrr_state = kzalloc(size,GFP_ATOMIC);
> - if (!mtrr_state)
> - return -ENOMEM;
>
> for (i = 0; i < num_var_ranges; i++) {
> mtrr_if->get(i,
> @@ -620,7 +611,6 @@ static int mtrr_restore(struct sys_device * sysdev)
> mtrr_state[i].lsize,
> mtrr_state[i].ltype);
> }
> - kfree(mtrr_state);
> return 0;
> }
>
> @@ -631,6 +621,59 @@ static struct sysdev_driver mtrr_sysdev_driver = {
> .resume = mtrr_restore,
> };
>
> +static int disable_mtrr_trim;
> +
> +static int __init disable_mtrr_trim_setup(char *str)
> +{
> + disable_mtrr_trim = 1;
> + return 0;
> +}
> +early_param("disable_mtrr_trim", disable_mtrr_trim_setup);
> +
> +#ifdef CONFIG_X86_64
> +/**
> + * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
> + *
> + * Some buggy BIOSes don't setup the MTRRs properly for systems with certain
> + * memory configurations. This routine checks to make sure the MTRRs having
> + * a write back type cover all of the memory the kernel is intending to use.
> + * If not, it'll trim any memory off the end by adjusting end_pfn, removing
> + * it from the kernel's allocation pools, warning the user with an obnoxious
> + * message.
> + */
> +void __init mtrr_trim_uncached_memory(void)
> +{
> + unsigned long i, base, size, highest_addr = 0, def, dummy;
> + mtrr_type type;
> +
> + /* Make sure we only trim uncachable memory on Intel machines */
> + rdmsr(MTRRdefType_MSR, def, dummy);
> + def &= 0xff;
> + if (!is_cpu(INTEL) || disable_mtrr_trim || def != MTRR_TYPE_UNCACHABLE)
> + return;
> +
> + /* Find highest cached pfn */
> + for (i = 0; i < num_var_ranges; i++) {
> + mtrr_if->get(i, &base, &size, &type);
> + if (type != MTRR_TYPE_WRBACK)
> + continue;
> + base <<= PAGE_SHIFT;
> + size <<= PAGE_SHIFT;
> + if (highest_addr < base + size)
> + highest_addr = base + size;
> + }
> +
> + if ((highest_addr >> PAGE_SHIFT) != end_pfn) {
> + printk(KERN_WARNING "***************\n");
> + printk(KERN_WARNING "**** WARNING: likely BIOS bug\n");
> + printk(KERN_WARNING "**** MTRRs don't cover all of "
> + "memory, trimmed %ld pages\n", end_pfn -
> + (highest_addr >> PAGE_SHIFT));
> + printk(KERN_WARNING "***************\n");
> + end_pfn = highest_addr >> PAGE_SHIFT;
> + }
> +}
> +#endif
>
> /**
> * mtrr_bp_init - initialize mtrrs on the boot CPU
> diff --git a/arch/i386/kernel/cpu/mtrr/mtrr.h b/arch/i386/kernel/cpu/mtrr/mtrr.h
> index 289dfe6..14fb88b 100644
> --- a/arch/i386/kernel/cpu/mtrr/mtrr.h
> +++ b/arch/i386/kernel/cpu/mtrr/mtrr.h
> @@ -14,6 +14,7 @@
> #define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
>
> #define NUM_FIXED_RANGES 88
> +#define MAX_VAR_RANGES 256
> #define MTRRfix64K_00000_MSR 0x250
> #define MTRRfix16K_80000_MSR 0x258
> #define MTRRfix16K_A0000_MSR 0x259
> @@ -34,6 +35,8 @@
> an 8 bit field: */
> typedef u8 mtrr_type;
>
> +extern unsigned int mtrr_usage_table[MAX_VAR_RANGES];
> +
> struct mtrr_ops {
> u32 vendor;
> u32 use_intel_if;
> diff --git a/arch/x86_64/kernel/bugs.c b/arch/x86_64/kernel/bugs.c
> index c3c6b91..c138eac 100644
> --- a/arch/x86_64/kernel/bugs.c
> +++ b/arch/x86_64/kernel/bugs.c
> @@ -14,7 +14,6 @@
> void __init check_bugs(void)
> {
> identify_cpu(&boot_cpu_data);
> - mtrr_bp_init();
> #if !defined(CONFIG_SMP)
> printk("CPU: ");
> print_cpu_info(&boot_cpu_data);
> diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
> index eb6524f..409b63c 100644
> --- a/arch/x86_64/kernel/setup.c
> +++ b/arch/x86_64/kernel/setup.c
> @@ -266,6 +266,10 @@ void __init setup_arch(char **cmdline_p)
> * we are rounding upwards:
> */
> end_pfn = e820_end_of_ram();
> + /* Trim memory not covered by WB MTRRs */
> + mtrr_bp_init();
> + mtrr_trim_uncached_memory();
> +
> num_physpages = end_pfn;
>
> check_efer();
> diff --git a/include/asm-x86_64/mtrr.h b/include/asm-x86_64/mtrr.h
> index b557c48..51ad788 100644
> --- a/include/asm-x86_64/mtrr.h
> +++ b/include/asm-x86_64/mtrr.h
> @@ -78,6 +78,7 @@ extern int mtrr_add_page (unsigned long base, unsigned long size,
> unsigned int type, char increment);
> extern int mtrr_del (int reg, unsigned long base, unsigned long size);
> extern int mtrr_del_page (int reg, unsigned long base, unsigned long size);
> +extern void mtrr_trim_uncached_memory(void);
> # else
> static __inline__ int mtrr_add (unsigned long base, unsigned long size,
> unsigned int type, char increment)
> @@ -99,7 +100,9 @@ static __inline__ int mtrr_del_page (int reg, unsigned long base,
> {
> return -ENODEV;
> }
> -
> +static __inline__ void mtrr_trim_uncached_memory(void)
> +{
> +}
> #endif /* CONFIG_MTRR */
>
> #ifdef CONFIG_COMPAT
>

2007-06-25 22:03:01

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

On Mon, 25 Jun 2007 14:34:42 -0700
Jesse Barnes <[email protected]> wrote:

> akpm -- this one should replace all the mtrr patches currently
> in your tree.

fear, uncertainty, doubt.

box:/usr/src/25> grep mtrr series
x86_64-mm-bug-in-i386-mtrr-initialization.patch
x86-fix-section-mismatch-warnings-in-mtrr.patch
i386-x86_64-trim-memory-not-covered-by-wb-mtrrs.patch
i386-x86_64-trim-memory-not-covered-by-wb-mtrrs-fix.patch
i386-x86_64-trim-memory-not-covered-by-wb-mtrrs-fix-2.patch
i386-mtrr-clean-up-usage_table.patch

Not all of those, I'm sure. I think you're referring to the final
four. I guess if it doesn't compile (again) I'll find out ;)

2007-06-25 22:06:18

by Jesse Barnes

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

On Monday, June 25, 2007 3:01:27 Andrew Morton wrote:
> On Mon, 25 Jun 2007 14:34:42 -0700
>
> Jesse Barnes <[email protected]> wrote:
> > akpm -- this one should replace all the mtrr patches currently
> > in your tree.
>
> fear, uncertainty, doubt.
>
> box:/usr/src/25> grep mtrr series
> x86_64-mm-bug-in-i386-mtrr-initialization.patch
> x86-fix-section-mismatch-warnings-in-mtrr.patch
> i386-x86_64-trim-memory-not-covered-by-wb-mtrrs.patch
> i386-x86_64-trim-memory-not-covered-by-wb-mtrrs-fix.patch
> i386-x86_64-trim-memory-not-covered-by-wb-mtrrs-fix-2.patch
> i386-mtrr-clean-up-usage_table.patch
>
> Not all of those, I'm sure. I think you're referring to the final
> four. I guess if it doesn't compile (again) I'll find out ;)

Oops, correct. I forgot you were carrying some MTRR patches not
authored by me. :) Yeah, the last four are the ones I meant.

Thanks,
Jesse

2007-06-25 22:29:32

by Justin Piszcz

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs



On Mon, 25 Jun 2007, Jesse Barnes wrote:

> On Monday, June 25, 2007 3:01:27 Andrew Morton wrote:
>> On Mon, 25 Jun 2007 14:34:42 -0700
>>
>> Jesse Barnes <[email protected]> wrote:
>>> akpm -- this one should replace all the mtrr patches currently
>>> in your tree.
>>
>> fear, uncertainty, doubt.
>>
>> box:/usr/src/25> grep mtrr series
>> x86_64-mm-bug-in-i386-mtrr-initialization.patch
>> x86-fix-section-mismatch-warnings-in-mtrr.patch
>> i386-x86_64-trim-memory-not-covered-by-wb-mtrrs.patch
>> i386-x86_64-trim-memory-not-covered-by-wb-mtrrs-fix.patch
>> i386-x86_64-trim-memory-not-covered-by-wb-mtrrs-fix-2.patch
>> i386-mtrr-clean-up-usage_table.patch
>>
>> Not all of those, I'm sure. I think you're referring to the final
>> four. I guess if it doesn't compile (again) I'll find out ;)
>
> Oops, correct. I forgot you were carrying some MTRR patches not
> authored by me. :) Yeah, the last four are the ones I meant.
>
> Thanks,
> Jesse
>

Hi Jesse,

Tested your patch against 2.6.22-rc6, working good so far!

$ uname -r
2.6.22-rc6

Mem: 8039576k total, 1056500k used, 6983076k free, 2420k buffers
Swap: 16787768k total, 0k used, 16787768k free, 127128k cached

Justin.

2007-06-25 23:34:43

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

> This patch fixes a bug in the last patch that caused the code to
> run on non-Intel machines (AMD machines apparently don't need it

Actually the problem can happen on AMD too, but the symptoms can
be different and there can be more wrong than just the MTRRs.

-Andi

2007-06-25 23:37:45

by Jesse Barnes

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

On Monday, June 25, 2007 4:34:33 Andi Kleen wrote:
> > This patch fixes a bug in the last patch that caused the code to
> > run on non-Intel machines (AMD machines apparently don't need it
>
> Actually the problem can happen on AMD too, but the symptoms can
> be different and there can be more wrong than just the MTRRs.

I should have been more specific in the changelog. My understanding is
that AMD systems don't need it for memory above 4G, and since the code
doesn't handle holes (no test systems, nor any real reports that I've
seen), it's not that useful for finding problems below 4G. We can
always change that later if needed though.

Thanks,
Jesse

2007-06-26 00:56:11

by Eric W. Biederman

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

Jesse Barnes <[email protected]> writes:

> On Monday, June 25, 2007 4:34:33 Andi Kleen wrote:
>> > This patch fixes a bug in the last patch that caused the code to
>> > run on non-Intel machines (AMD machines apparently don't need it
>>
>> Actually the problem can happen on AMD too, but the symptoms can
>> be different and there can be more wrong than just the MTRRs.
>
> I should have been more specific in the changelog. My understanding is
> that AMD systems don't need it for memory above 4G, and since the code
> doesn't handle holes (no test systems, nor any real reports that I've
> seen), it's not that useful for finding problems below 4G. We can
> always change that later if needed though.

For the K7 and K8 cores AMD systems are exactly like Intel systems
with respect to MTRRs (although AMD systems also have additional registers)
For the K9 core (i.e. AMD socket F or the K8 with DDR2 support) there
is an additional mechanism that makes everything above 4G write-back
cacheable without using any MTRRs.

So only on the very latest AMD cpus would this code not be applicable.

Eric

2007-06-26 03:29:57

by Jesse Barnes

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

On Monday, June 25, 2007 5:54:49 pm Eric W. Biederman wrote:
> Jesse Barnes <[email protected]> writes:
> > On Monday, June 25, 2007 4:34:33 Andi Kleen wrote:
> >> > This patch fixes a bug in the last patch that caused the code to
> >> > run on non-Intel machines (AMD machines apparently don't need it
> >>
> >> Actually the problem can happen on AMD too, but the symptoms can
> >> be different and there can be more wrong than just the MTRRs.
> >
> > I should have been more specific in the changelog. My understanding is
> > that AMD systems don't need it for memory above 4G, and since the code
> > doesn't handle holes (no test systems, nor any real reports that I've
> > seen), it's not that useful for finding problems below 4G. We can
> > always change that later if needed though.
>
> For the K7 and K8 cores AMD systems are exactly like Intel systems
> with respect to MTRRs (although AMD systems also have additional registers)
> For the K9 core (i.e. AMD socket F or the K8 with DDR2 support) there
> is an additional mechanism that makes everything above 4G write-back
> cacheable without using any MTRRs.
>
> So only on the very latest AMD cpus would this code not be applicable.

Is there an is_cpu() check to differentiate between those? Anyway I'd rather
not enable it unless we see reports though... So far I've only seen reports
of this problem on some recent Intel based systems.

Jesse


2007-06-26 03:31:17

by Jesse Barnes

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

On Monday, June 25, 2007 8:29:35 pm Jesse Barnes wrote:
> Is there an is_cpu() check to differentiate between those? Anyway I'd
> rather not enable it unless we see reports though... So far I've only seen
> reports of this problem on some recent Intel based systems.

Oh, and FYI I've seen new systems with a default mapping type of WB, with a
few uncached holes for MMIO. That means PAT will be absolutely required on
those systems if we want to use WC for the framebuffer or other memory. I
hope it's ready for mainline soon...

Jesse

2007-06-26 15:03:04

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

On Mon, Jun 25, 2007 at 04:36:52PM -0700, Jesse Barnes wrote:
> On Monday, June 25, 2007 4:34:33 Andi Kleen wrote:
> > > This patch fixes a bug in the last patch that caused the code to
> > > run on non-Intel machines (AMD machines apparently don't need it
> >
> > Actually the problem can happen on AMD too, but the symptoms can
> > be different and there can be more wrong than just the MTRRs.
>
> I should have been more specific in the changelog. My understanding is
> that AMD systems don't need it for memory above 4G, and since the code

AMD systems need MTRRs to get cached memory too, or what is your point?

There was a new memory remapping feature in RevE but I didn't think
it obsoleted MTRRs.

We've also had systems with this issue.

-Andi

2007-06-26 15:03:58

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

On Mon, Jun 25, 2007 at 08:30:52PM -0700, Jesse Barnes wrote:
> Oh, and FYI I've seen new systems with a default mapping type of WB, with a
> few uncached holes for MMIO. That means PAT will be absolutely required on
> those systems if we want to use WC for the framebuffer or other memory.

Why? As long as there are still MTRRs free you could use them for
WC even in this setup.

-Andi

2007-06-26 15:08:15

by Jesse Barnes

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

On Tuesday, June 26, 2007 8:03:48 am Andi Kleen wrote:
> On Mon, Jun 25, 2007 at 08:30:52PM -0700, Jesse Barnes wrote:
> > Oh, and FYI I've seen new systems with a default mapping type of WB, with
> > a few uncached holes for MMIO. That means PAT will be absolutely
> > required on those systems if we want to use WC for the framebuffer or
> > other memory.
>
> Why? As long as there are still MTRRs free you could use them for
> WC even in this setup.

According to the manual, you can't use WC or WT MTRRs on top of a WB MTRR
range. Only UC MTRRs can override WB MTRRs, but PAT doesn't have this
restriction.

Jesse

2007-06-26 15:18:55

by Jesse Barnes

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

On Tuesday, June 26, 2007 8:07:45 am Jesse Barnes wrote:
> On Tuesday, June 26, 2007 8:03:48 am Andi Kleen wrote:
> > On Mon, Jun 25, 2007 at 08:30:52PM -0700, Jesse Barnes wrote:
> > > Oh, and FYI I've seen new systems with a default mapping type of WB,
> > > with a few uncached holes for MMIO. That means PAT will be absolutely
> > > required on those systems if we want to use WC for the framebuffer or
> > > other memory.
> >
> > Why? As long as there are still MTRRs free you could use them for
> > WC even in this setup.
>
> According to the manual, you can't use WC or WT MTRRs on top of a WB MTRR
> range. Only UC MTRRs can override WB MTRRs, but PAT doesn't have this
> restriction.

Actually, it looks like WT on top of WB is ok, but not WC on top of WB,
quoting from the manual:

> 1. If the physical address falls within the first 1 MByte of physical
> memory and fixed MTRRs are enabled, the processor uses the memory type
> stored for the appropriate fixed-range MTRR.
>
> 2. Otherwise, the processor attempts to match the physical address with a
> memory type set by the variable-range MTRRs:
>
> a. If one variable memory range matches, the processor uses the memory
> type stored in the IA32_MTRR_PHYSBASEn register for that range.
> b. If two or more variable memory ranges match and the memory types are
> identical, then that memory type is used.
> c. If two or more variable memory ranges match and one of the memory
> types is UC, the UC memory type used.
> d. If two or more variable memory ranges match and the memory types are
> WT and WB, the WT memory type is used.
> e. For overlaps not defined by the above rules, processor behavior is
> undefined.
>
> 3. If no fixed or variable memory range matches, the processor uses the
> default memory type.

2007-06-26 15:38:27

by Jesse Barnes

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

On Tuesday, June 26, 2007 8:02:49 am Andi Kleen wrote:
> On Mon, Jun 25, 2007 at 04:36:52PM -0700, Jesse Barnes wrote:
> > On Monday, June 25, 2007 4:34:33 Andi Kleen wrote:
> > > > This patch fixes a bug in the last patch that caused the code to
> > > > run on non-Intel machines (AMD machines apparently don't need it
> > >
> > > Actually the problem can happen on AMD too, but the symptoms can
> > > be different and there can be more wrong than just the MTRRs.
> >
> > I should have been more specific in the changelog. My understanding is
> > that AMD systems don't need it for memory above 4G, and since the code
>
> AMD systems need MTRRs to get cached memory too, or what is your point?

My point is that yes, this problem can happen on AMD, but the code doesn't
handle the problems that AMD systems might have, since it doesn't look for
problems in low memory (e.g. if you have an AMD system with 6G of memory, the
code will probably trim everything above 4G since it doesn't know about the
new AMD mapping stuff from RevE), as Eric pointed out.

Both you and Eric say you've seen AMD systems with problems, but handling them
would make the code more complex than it is now, and I haven't seen the
actual reports (memory maps & MTRR setups) so I can't really fix them anyway.
And since this patch solves real problems as-is, it seems like we should push
it upstream then rework it, if necessary (i.e. real user machines with this
problem) later.

What do you think?

Jesse

2007-06-26 15:39:22

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

> For the K7 and K8 cores AMD systems are exactly like Intel systems
> with respect to MTRRs (although AMD systems also have additional registers)
> For the K9 core (i.e. AMD socket F or the K8 with DDR2 support) there

It's called K8RevE, not K9

> is an additional mechanism that makes everything above 4G write-back
> cacheable without using any MTRRs.

... but not BIOS use this mechanism (often there are BIOS switches
for several MTRR models or it is just the wrong one hardcoded), so Linux
should detect the broken cases.

-Andi

2007-06-26 15:55:09

by Yinghai Lu

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

On 6/26/07, Andi Kleen <[email protected]> wrote:
> > For the K7 and K8 cores AMD systems are exactly like Intel systems
> > with respect to MTRRs (although AMD systems also have additional registers)
> > For the K9 core (i.e. AMD socket F or the K8 with DDR2 support) there
>
> It's called K8RevE, not K9

For K8 Rev F and later, if you are using TOM2, cpu will assume the mem
between 4G and TOM2 is WB.

I think rule could be:
scan the var mtrrs to find out if there is any mtrr is used for 4G
above is set to WB, if it is true, will use trimming tricks.

YH

2007-06-26 16:07:27

by Eric W. Biederman

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

Andi Kleen <[email protected]> writes:

>> For the K7 and K8 cores AMD systems are exactly like Intel systems
>> with respect to MTRRs (although AMD systems also have additional registers)
>> For the K9 core (i.e. AMD socket F or the K8 with DDR2 support) there
>
> It's called K8RevE, not K9

revF not revE. I think AMD was code-naming that K9 before the socket F
part was released.

revE was the last DDR rev of the K8 core.

>> is an additional mechanism that makes everything above 4G write-back
>> cacheable without using any MTRRs.
>
> ... but not BIOS use this mechanism (often there are BIOS switches
> for several MTRR models or it is just the wrong one hardcoded), so Linux
> should detect the broken cases.

Yes. They are and I have seen at least two motherboards that fit this
description. I almost freaked out looking at a system with 16G and
only 4G setup to be cached.

The painful bit is I have also seen such a system with not all of the
lower 4G cached. Which caused some interesting booting issues.

Eric

2007-06-26 17:38:29

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

On Tue, Jun 26, 2007 at 10:06:41AM -0600, Eric W. Biederman wrote:
> Andi Kleen <[email protected]> writes:
>
> >> For the K7 and K8 cores AMD systems are exactly like Intel systems
> >> with respect to MTRRs (although AMD systems also have additional registers)
> >> For the K9 core (i.e. AMD socket F or the K8 with DDR2 support) there
> >
> > It's called K8RevE, not K9
>
> revF not revE. I think AMD was code-naming that K9 before the socket F
> part was released.

I didn't think so.

>
> revE was the last DDR rev of the K8 core.

RevE had a new memory remapping scheme (memory hoisting) at least, which
I think you refered to earlier. There might have been more changes in F.

-Andi

2007-06-26 18:55:53

by Yinghai Lu

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

On 6/26/07, Andi Kleen <[email protected]> wrote:
> RevE had a new memory remapping scheme (memory hoisting) at least, which
> I think you refered to earlier. There might have been more changes in F.

yes, REV E has hardware memory remapping.
Rev F you don't need to use mtrr to set MEM to WE above 4G.

YH

2007-06-27 10:45:25

by Pim Zandbergen

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

First:
Jesse saved my life by releasing a patch that made my GigaByte Intel G33
based motherboard use all of its 8GB RAM and not be slow as hell.

Then:
GigaByte released a BIOS update that fixed the root of the problem.
I went back from patched vanilla kernel to "official" Fedora kernel.

Now:
Jesse released a new patch and I tried if for fun on 2.6.22-rc6
It looks like the patch is releasing memory rather than trimming it:

Jun 27 12:22:56 corneille kernel: Linux version 2.6.22-rc6
(pim@corneille) (gcc version 4.1.2 20070502 (Red Hat 4.1.2-12)) #1 SMP
Wed Jun 27 11:41:29 CEST 2007
Jun 27 12:22:56 corneille kernel: Command line: root=LABEL=/
console=ttyS0,38400
Jun 27 12:22:56 corneille kernel: BIOS-provided physical RAM map:
Jun 27 12:22:56 corneille kernel: BIOS-e820: 0000000000000000 -
000000000009e800 (usable)
Jun 27 12:22:56 corneille kernel: BIOS-e820: 000000000009f800 -
00000000000a0000 (reserved)
Jun 27 12:22:56 corneille kernel: BIOS-e820: 00000000000f0000 -
0000000000100000 (reserved)
Jun 27 12:22:56 corneille kernel: BIOS-e820: 0000000000100000 -
00000000cf5e0000 (usable)
Jun 27 12:22:56 corneille kernel: BIOS-e820: 00000000cf5e0000 -
00000000cf5e3000 (ACPI NVS)
Jun 27 12:22:56 corneille kernel: BIOS-e820: 00000000cf5e3000 -
00000000cf5f0000 (ACPI data)
Jun 27 12:22:56 corneille kernel: BIOS-e820: 00000000cf5f0000 -
00000000cf600000 (reserved)
Jun 27 12:22:56 corneille kernel: BIOS-e820: 00000000d0000000 -
00000000e0000000 (reserved)
Jun 27 12:22:56 corneille kernel: BIOS-e820: 00000000fec00000 -
0000000100000000 (reserved)
Jun 27 12:22:56 corneille kernel: BIOS-e820: 0000000100000000 -
0000000230000000 (usable)
Jun 27 12:22:56 corneille kernel: end_pfn_map = 2293760
Jun 27 12:22:56 corneille kernel: ***************
Jun 27 12:22:56 corneille kernel: **** WARNING: likely BIOS bug
Jun 27 12:22:56 corneille kernel: **** MTRRs don't cover all of memory,
trimmed -65536 pages
Jun 27 12:22:56 corneille kernel: ***************

I tried the earlier patch as well and it gives the same result.

2007-06-27 11:22:27

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

On Wed, Jun 27, 2007 at 12:44:01PM +0200, Pim Zandbergen wrote:
> Jesse saved my life by releasing a patch that made my GigaByte Intel G33
> based motherboard use all of its 8GB RAM and not be slow as hell.

That's impossible. Either it limited your RAM or it didn't change anything.

-Andi

2007-06-27 11:40:51

by Pim Zandbergen

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

Andi Kleen wrote:

> That's impossible. Either it limited your RAM or it didn't change anything.

OK, maybe it's cosmetic, but I would not expect a negative number

With old BIOS it printed
**** MTRRs don't cover all of memory, trimmed 196608 pages

with new BIOS it prints
**** MTRRs don't cover all of memory, trimmed -65536 pages

Pim

2007-06-27 11:44:18

by Justin Piszcz

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs



On Wed, 27 Jun 2007, Pim Zandbergen wrote:

> Andi Kleen wrote:
>
>> That's impossible. Either it limited your RAM or it didn't change anything.
>
> OK, maybe it's cosmetic, but I would not expect a negative number
>
> With old BIOS it printed
> **** MTRRs don't cover all of memory, trimmed 196608 pages
>
> with new BIOS it prints
> **** MTRRs don't cover all of memory, trimmed -65536 pages
>
> Pim
>

It works ok for me:

[ 0.000000] **** MTRRs don't cover all of memory, trimmed 16384 pages

This is using his latest patch with 2.6.22-rc6.

Justin.

2007-06-27 14:22:33

by Mauro Giachero

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

On 6/27/07, Pim Zandbergen <[email protected]> wrote:
> Now:
> Jesse released a new patch and I tried if for fun on 2.6.22-rc6
> It looks like the patch is releasing memory rather than trimming it:
>
> [...]
> Jun 27 12:22:56 corneille kernel: **** MTRRs don't cover all of memory,
> trimmed -65536 pages
> [...]

>From Jesse's patch:
> + unsigned long i, base, size, highest_addr = 0, def, dummy;
> [...]
> + printk(KERN_WARNING "**** MTRRs don't cover all of "
> + "memory, trimmed %ld pages\n", end_pfn -
> + (highest_addr >> PAGE_SHIFT));

Since both end_pfn (from arch/x86_64/kernel/e820.c) and highest_addr
are unsigned long, maybe the problem is just that %ld in the kprintf
format string? Shouldn't that be %lu?

Mauro

2007-06-27 15:05:52

by Jesse Barnes

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

On Wednesday, June 27, 2007 7:22:24 Mauro Giachero wrote:
> On 6/27/07, Pim Zandbergen <[email protected]> wrote:
> > Now:
> > Jesse released a new patch and I tried if for fun on 2.6.22-rc6
> > It looks like the patch is releasing memory rather than trimming
> > it:
> >
> > [...]
> > Jun 27 12:22:56 corneille kernel: **** MTRRs don't cover all of
> > memory, trimmed -65536 pages
> > [...]
>
> From Jesse's patch:
> > + unsigned long i, base, size, highest_addr = 0, def, dummy;
> > [...]
> > + printk(KERN_WARNING "**** MTRRs don't cover all of
> > " + "memory, trimmed %ld pages\n", end_pfn - +
> > (highest_addr >> PAGE_SHIFT));
>
> Since both end_pfn (from arch/x86_64/kernel/e820.c) and highest_addr
> are unsigned long, maybe the problem is just that %ld in the kprintf
> format string? Shouldn't that be %lu?

Yeah, you're right I should use an unsigned format string. Pim, if you
change it to %lu does the printk in your dmesg look better?

Thanks,
Jesse

2007-06-27 16:01:32

by Pim Zandbergen

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

Jesse Barnes wrote:

> Yeah, you're right I should use an unsigned format string. Pim, if you
> change it to %lu does the printk in your dmesg look better?

Er, no.

**** MTRRs don't cover all of memory, trimmed 18446744073709486080 pages

Thanks,
Pim

2007-06-27 16:08:25

by Jesse Barnes

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

On Wednesday, June 27, 2007 9:00:33 Pim Zandbergen wrote:
> Jesse Barnes wrote:
> > Yeah, you're right I should use an unsigned format string. Pim, if
> > you change it to %lu does the printk in your dmesg look better?
>
> Er, no.
>
> **** MTRRs don't cover all of memory, trimmed 18446744073709486080
> pages

Hm, that's what I was afraid of. So something else is wrong. :)

Jesse

2007-06-27 16:23:05

by Jesse Barnes

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

On Wednesday, June 27, 2007 9:00:33 Pim Zandbergen wrote:
> Jesse Barnes wrote:
> > Yeah, you're right I should use an unsigned format string. Pim, if
> > you change it to %lu does the printk in your dmesg look better?
>
> Er, no.
>
> **** MTRRs don't cover all of memory, trimmed 18446744073709486080
> pages

It looks like end_pfn might be ~0UL now... can you print that out in
your configuration?

Thanks,
Jesse

2007-06-27 17:03:17

by Pim Zandbergen

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

Jesse Barnes wrote:

> It looks like end_pfn might be ~0UL now... can you print that out in
> your configuration?

Er, do you need the value of end_pfn ?
Here's what I changed:

if ((highest_addr >> PAGE_SHIFT) != end_pfn) {
printk(KERN_WARNING "***************\n");
printk(KERN_WARNING "**** WARNING: likely BIOS bug\n");
printk(KERN_WARNING "**** MTRRs don't cover all of "
"memory, trimmed %lu pages\n", end_pfn -
(highest_addr >> PAGE_SHIFT));
printk(KERN_WARNING "**** end_pfn before = %lu\n", end_pfn);
end_pfn = highest_addr >> PAGE_SHIFT;
printk(KERN_WARNING "**** end_pfn after = %lu\n", end_pfn);
printk(KERN_WARNING "***************\n");
}

Here's the result:
***************
**** WARNING: likely BIOS bug
**** MTRRs don't cover all of memory, trimmed 18446744073709486080 pages
**** end_pfn before = 2293760
**** end_pfn after = 2359296
***************

Hope that's what you needed.
Pim

2007-06-27 17:07:20

by Jesse Barnes

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

On Wednesday, June 27, 2007 10:02:35 Pim Zandbergen wrote:
> Jesse Barnes wrote:
> > It looks like end_pfn might be ~0UL now... can you print that out
> > in your configuration?
>
> Er, do you need the value of end_pfn ?
> Here's what I changed:
>
> if ((highest_addr >> PAGE_SHIFT) != end_pfn) {
> printk(KERN_WARNING "***************\n");
> printk(KERN_WARNING "**** WARNING: likely BIOS bug\n");
> printk(KERN_WARNING "**** MTRRs don't cover all of "
> "memory, trimmed %lu pages\n", end_pfn -
> (highest_addr >> PAGE_SHIFT));
> printk(KERN_WARNING "**** end_pfn before = %lu\n", end_pfn);
> end_pfn = highest_addr >> PAGE_SHIFT;
> printk(KERN_WARNING "**** end_pfn after = %lu\n", end_pfn);
> printk(KERN_WARNING "***************\n");
> }
>
> Here's the result:
> ***************
> **** WARNING: likely BIOS bug
> **** MTRRs don't cover all of memory, trimmed 18446744073709486080
> pages **** end_pfn before = 2293760
> **** end_pfn after = 2359296
> ***************
>
> Hope that's what you needed.

Yeah, that's what I needed. end_pfn looks ok, but I guess my test is a
little too precise. It should be if ((highest_addr >> PAGE_SHIFT) <
end_pfn) rather than !=. That should keep it from trying to extend
your memory with bogus math. I thought that the two values should
always match, but I guess slightly different memory configurations
might break that assumption.

Thanks,
Jesse

2007-06-27 17:17:36

by Pim Zandbergen

[permalink] [raw]
Subject: Re: [PATCH] trim memory not covered by WB MTRRs

Jesse Barnes wrote:

> Yeah, that's what I needed. end_pfn looks ok, but I guess my test is a
> little too precise. It should be if ((highest_addr >> PAGE_SHIFT) <
> end_pfn) rather than !=.

Right. That made the message disappear. Nice to know my BIOS is really
fixed.

Thanks,
Pim