From: "Luis R. Rodriguez" <[email protected]>
This provides a bit of clean up on PAT and then adds addresses
the agreed upon compromise of how we will address drivers with broken
PAT support by using pat_enabled() and recommending to boot with "nopat"
kernel parameter.
With this and the rest of the easier changes submitted we should
soon see a patch trickle down which buries MTRR from direct driver
access. That will only be done once *all* the series addressing the
conversion have been merged.
The two driver changes that are implicated here have been reviewed
by the respective maintainers, this goes with their respective ACKs.
Luis R. Rodriguez (8):
x86/mm/pat: use pr_info() and friends
init.h: add __read_mostly to early_param_on_off()
init.h: add early_param_on() and early_param_off()
x86/mm/pat: use early_param_off() and redefine pat_enabled()
arch/x86/mm/pat: export pat_enabled()
ivtv: use arch_phys_wc_add() and require PAT disabled
IB/ipath: add counting for MTRR
IB/ipath: use arch_phys_wc_add() and require PAT disabled
arch/x86/include/asm/pat.h | 7 +--
arch/x86/kernel/cpu/mtrr/main.c | 2 +-
arch/x86/mm/iomap_32.c | 2 +-
arch/x86/mm/ioremap.c | 4 +-
arch/x86/mm/pageattr.c | 2 +-
arch/x86/mm/pat.c | 86 ++++++++++++---------------
arch/x86/pci/i386.c | 6 +-
drivers/infiniband/hw/ipath/Kconfig | 3 +
drivers/infiniband/hw/ipath/ipath_driver.c | 18 ++++--
drivers/infiniband/hw/ipath/ipath_kernel.h | 4 +-
drivers/infiniband/hw/ipath/ipath_wc_x86_64.c | 43 +++-----------
drivers/media/pci/ivtv/Kconfig | 3 +
drivers/media/pci/ivtv/ivtvfb.c | 58 +++++++-----------
include/linux/init.h | 25 +++++++-
14 files changed, 123 insertions(+), 140 deletions(-)
--
2.3.2.209.gd67f9d5.dirty
From: "Luis R. Rodriguez" <[email protected]>
Cc: Andy Walls <[email protected]>
Cc: Doug Ledford <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Juergen Gross <[email protected]>
Cc: Daniel Vetter <[email protected]>
Cc: Dave Airlie <[email protected]>
Cc: Bjorn Helgaas <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Michael S. Tsirkin <[email protected]>
Cc: [email protected]
Cc: [email protected]
Signed-off-by: Luis R. Rodriguez <[email protected]>
---
arch/x86/mm/pat.c | 47 +++++++++++++++++++++++------------------------
1 file changed, 23 insertions(+), 24 deletions(-)
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 372ad42..917155d 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -39,7 +39,7 @@ int __read_mostly pat_enabled = 1;
static inline void pat_disable(const char *reason)
{
pat_enabled = 0;
- printk(KERN_INFO "%s\n", reason);
+ pr_info("%s\n", reason);
}
static int __init nopat(char *str)
@@ -211,8 +211,7 @@ void pat_init(void)
* switched to PAT on the boot CPU. We have no way to
* undo PAT.
*/
- printk(KERN_ERR "PAT enabled, "
- "but not supported by secondary CPU\n");
+ pr_err("PAT enabled, but not supported by secondary CPU\n");
BUG();
}
}
@@ -451,9 +450,9 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type,
err = rbt_memtype_check_insert(new, new_type);
if (err) {
- printk(KERN_INFO "reserve_memtype failed [mem %#010Lx-%#010Lx], track %s, req %s\n",
- start, end - 1,
- cattr_name(new->type), cattr_name(req_type));
+ pr_info("reserve_memtype failed [mem %#010Lx-%#010Lx], track %s, req %s\n",
+ start, end - 1,
+ cattr_name(new->type), cattr_name(req_type));
kfree(new);
spin_unlock(&memtype_lock);
@@ -462,9 +461,9 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type,
spin_unlock(&memtype_lock);
- dprintk("reserve_memtype added [mem %#010Lx-%#010Lx], track %s, req %s, ret %s\n",
- start, end - 1, cattr_name(new->type), cattr_name(req_type),
- new_type ? cattr_name(*new_type) : "-");
+ pr_debug("reserve_memtype added [mem %#010Lx-%#010Lx], track %s, req %s, ret %s\n",
+ start, end - 1, cattr_name(new->type), cattr_name(req_type),
+ new_type ? cattr_name(*new_type) : "-");
return err;
}
@@ -497,14 +496,15 @@ int free_memtype(u64 start, u64 end)
spin_unlock(&memtype_lock);
if (!entry) {
- printk(KERN_INFO "%s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n",
- current->comm, current->pid, start, end - 1);
+ pr_info("%s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n",
+ current->comm, current->pid, start, end - 1);
return -EINVAL;
}
kfree(entry);
- dprintk("free_memtype request [mem %#010Lx-%#010Lx]\n", start, end - 1);
+ pr_debug("free_memtype request [mem %#010Lx-%#010Lx]\n",
+ start, end - 1);
return 0;
}
@@ -628,8 +628,8 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size)
while (cursor < to) {
if (!devmem_is_allowed(pfn)) {
- printk(KERN_INFO "Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx], PAT prevents it\n",
- current->comm, from, to - 1);
+ pr_info("Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx], PAT prevents it\n",
+ current->comm, from, to - 1);
return 0;
}
cursor += PAGE_SIZE;
@@ -698,8 +698,7 @@ int kernel_map_sync_memtype(u64 base, unsigned long size,
size;
if (ioremap_change_attr((unsigned long)__va(base), id_sz, pcm) < 0) {
- printk(KERN_INFO "%s:%d ioremap_change_attr failed %s "
- "for [mem %#010Lx-%#010Lx]\n",
+ pr_info("%s:%d ioremap_change_attr failed %s for [mem %#010Lx-%#010Lx]\n",
current->comm, current->pid,
cattr_name(pcm),
base, (unsigned long long)(base + size-1));
@@ -734,7 +733,7 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
pcm = lookup_memtype(paddr);
if (want_pcm != pcm) {
- printk(KERN_WARNING "%s:%d map pfn RAM range req %s for [mem %#010Lx-%#010Lx], got %s\n",
+ pr_warn("%s:%d map pfn RAM range req %s for [mem %#010Lx-%#010Lx], got %s\n",
current->comm, current->pid,
cattr_name(want_pcm),
(unsigned long long)paddr,
@@ -755,13 +754,13 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
if (strict_prot ||
!is_new_memtype_allowed(paddr, size, want_pcm, pcm)) {
free_memtype(paddr, paddr + size);
- printk(KERN_ERR "%s:%d map pfn expected mapping type %s"
- " for [mem %#010Lx-%#010Lx], got %s\n",
- current->comm, current->pid,
- cattr_name(want_pcm),
- (unsigned long long)paddr,
- (unsigned long long)(paddr + size - 1),
- cattr_name(pcm));
+ pr_err(KERN_ERR "%s:%d map pfn expected mapping type %s"
+ " for [mem %#010Lx-%#010Lx], got %s\n",
+ current->comm, current->pid,
+ cattr_name(want_pcm),
+ (unsigned long long)paddr,
+ (unsigned long long)(paddr + size - 1),
+ cattr_name(pcm));
return -EINVAL;
}
/*
--
2.3.2.209.gd67f9d5.dirty
From: "Luis R. Rodriguez" <[email protected]>
Cc: Andy Walls <[email protected]>
Cc: Doug Ledford <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Juergen Gross <[email protected]>
Cc: Daniel Vetter <[email protected]>
Cc: Dave Airlie <[email protected]>
Cc: Bjorn Helgaas <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Michael S. Tsirkin <[email protected]>
Cc: [email protected]
Cc: [email protected]
Signed-off-by: Luis R. Rodriguez <[email protected]>
---
include/linux/init.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/linux/init.h b/include/linux/init.h
index 21b6d76..a0385cc 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -272,7 +272,7 @@ struct obs_kernel_param {
#define early_param_on_off(str_on, str_off, var, config) \
\
- int var = IS_ENABLED(config); \
+ int __read_mostly var = IS_ENABLED(config); \
\
static int __init parse_##var##_on(char *arg) \
{ \
--
2.3.2.209.gd67f9d5.dirty
From: "Luis R. Rodriguez" <[email protected]>
At times all we need is an enabler / disabler.
Cc: Andy Walls <[email protected]>
Cc: Doug Ledford <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Juergen Gross <[email protected]>
Cc: Daniel Vetter <[email protected]>
Cc: Dave Airlie <[email protected]>
Cc: Bjorn Helgaas <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Michael S. Tsirkin <[email protected]>
Cc: [email protected]
Cc: [email protected]
Signed-off-by: Luis R. Rodriguez <[email protected]>
---
include/linux/init.h | 23 +++++++++++++++++++++++
1 file changed, 23 insertions(+)
diff --git a/include/linux/init.h b/include/linux/init.h
index a0385cc..7773883 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -270,6 +270,29 @@ struct obs_kernel_param {
#define early_param(str, fn) \
__setup_param(str, fn, fn, 1)
+#define early_param_on(str_on, var, config) \
+ \
+ int __read_mostly var = IS_ENABLED(config); \
+ \
+ static int __init parse_##var##_on(char *arg) \
+ { \
+ var = 1; \
+ return 0; \
+ } \
+ __setup_param(str_on, parse_##var##_on, parse_##var##_on, 1); \
+ \
+
+#define early_param_off(str_off, var, config) \
+ \
+ int __read_mostly var = IS_ENABLED(config); \
+ \
+ static int __init parse_##var##_off(char *arg) \
+ { \
+ var = 0; \
+ return 0; \
+ } \
+ __setup_param(str_off, parse_##var##_off, parse_##var##_off, 1)
+
#define early_param_on_off(str_on, str_off, var, config) \
\
int __read_mostly var = IS_ENABLED(config); \
--
2.3.2.209.gd67f9d5.dirty
From: "Luis R. Rodriguez" <[email protected]>
We use pat_enabled on x86 specific code to see if PAT is
enabled or not, we however are granting full access to the
variable even though readers do not need to set it.
This uses the early_param_off() to simplify the definition of
the old pat_enabled under a new static variable __pat_enabled,
for access to see if its enabled / disabled folks can just use
pat_enabled() now. Code that set this is purely internal to
pat.c. Apart from the early parameter to disable it we also have
a few cases that disable it later, these were wrapped under
and ifdef but since that code cannot run unless PAT was enabled
its not required to wrap it with ifdefs.
Cc: Andy Walls <[email protected]>
Cc: Doug Ledford <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Juergen Gross <[email protected]>
Cc: Daniel Vetter <[email protected]>
Cc: Dave Airlie <[email protected]>
Cc: Bjorn Helgaas <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Michael S. Tsirkin <[email protected]>
Cc: [email protected]
Cc: [email protected]
Signed-off-by: Luis R. Rodriguez <[email protected]>
---
arch/x86/include/asm/pat.h | 7 +------
arch/x86/kernel/cpu/mtrr/main.c | 2 +-
arch/x86/mm/iomap_32.c | 2 +-
arch/x86/mm/ioremap.c | 4 ++--
arch/x86/mm/pageattr.c | 2 +-
arch/x86/mm/pat.c | 38 ++++++++++++++------------------------
arch/x86/pci/i386.c | 6 +++---
7 files changed, 23 insertions(+), 38 deletions(-)
diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h
index 91bc4ba..cdcff7f 100644
--- a/arch/x86/include/asm/pat.h
+++ b/arch/x86/include/asm/pat.h
@@ -4,12 +4,7 @@
#include <linux/types.h>
#include <asm/pgtable_types.h>
-#ifdef CONFIG_X86_PAT
-extern int pat_enabled;
-#else
-static const int pat_enabled;
-#endif
-
+bool pat_enabled(void);
extern void pat_init(void);
void pat_init_cache_modes(void);
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index bfef424..f094d36 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -556,7 +556,7 @@ int arch_phys_wc_add(unsigned long base, unsigned long size)
{
int ret;
- if (pat_enabled || !mtrr_enabled)
+ if (pat_enabled() || !mtrr_enabled)
return 0; /* Success! (We don't need to do anything.) */
ret = mtrr_add(base, size, MTRR_TYPE_WRCOMB, true);
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index 9ca35fc..3a2ec87 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -82,7 +82,7 @@ iomap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot)
* MTRR is UC or WC. UC_MINUS gets the real intention, of the
* user, which is "WC if the MTRR is WC, UC if you can't do that."
*/
- if (!pat_enabled && pgprot_val(prot) ==
+ if (!pat_enabled() && pgprot_val(prot) ==
(__PAGE_KERNEL | cachemode2protval(_PAGE_CACHE_MODE_WC)))
prot = __pgprot(__PAGE_KERNEL |
cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS));
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index a493bb8..82d63ed 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -234,7 +234,7 @@ void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size)
{
/*
* Ideally, this should be:
- * pat_enabled ? _PAGE_CACHE_MODE_UC : _PAGE_CACHE_MODE_UC_MINUS;
+ * pat_enabled() ? _PAGE_CACHE_MODE_UC : _PAGE_CACHE_MODE_UC_MINUS;
*
* Till we fix all X drivers to use ioremap_wc(), we will use
* UC MINUS. Drivers that are certain they need or can already
@@ -292,7 +292,7 @@ EXPORT_SYMBOL_GPL(ioremap_uc);
*/
void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size)
{
- if (pat_enabled)
+ if (pat_enabled())
return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WC,
__builtin_return_address(0));
else
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 49660c0..0aa8dd8 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -1574,7 +1574,7 @@ int set_memory_wc(unsigned long addr, int numpages)
{
int ret;
- if (!pat_enabled)
+ if (!pat_enabled())
return set_memory_uc(addr, numpages);
ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 917155d..b1f9a67 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -33,28 +33,18 @@
#include "pat_internal.h"
#include "mm_internal.h"
-#ifdef CONFIG_X86_PAT
-int __read_mostly pat_enabled = 1;
+static early_param_off("nopat", __pat_enabled, CONFIG_X86_PAT);
static inline void pat_disable(const char *reason)
{
- pat_enabled = 0;
+ __pat_enabled = 0;
pr_info("%s\n", reason);
}
-static int __init nopat(char *str)
-{
- pat_disable("PAT support disabled.");
- return 0;
-}
-early_param("nopat", nopat);
-#else
-static inline void pat_disable(const char *reason)
+bool pat_enabled(void)
{
- (void)reason;
+ return !!__pat_enabled;
}
-#endif
-
int pat_debug_enable;
@@ -198,7 +188,7 @@ void pat_init(void)
u64 pat;
bool boot_cpu = !boot_pat_state;
- if (!pat_enabled)
+ if (!pat_enabled())
return;
if (!cpu_has_pat) {
@@ -399,7 +389,7 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type,
BUG_ON(start >= end); /* end is exclusive */
- if (!pat_enabled) {
+ if (!pat_enabled()) {
/* This is identical to page table setting without PAT */
if (new_type) {
if (req_type == _PAGE_CACHE_MODE_WC)
@@ -474,7 +464,7 @@ int free_memtype(u64 start, u64 end)
int is_range_ram;
struct memtype *entry;
- if (!pat_enabled)
+ if (!pat_enabled())
return 0;
/* Low ISA region is always mapped WB. No need to track */
@@ -623,7 +613,7 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size)
u64 to = from + size;
u64 cursor = from;
- if (!pat_enabled)
+ if (!pat_enabled())
return 1;
while (cursor < to) {
@@ -659,7 +649,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
* caching for the high addresses through the KEN pin, but
* we maintain the tradition of paranoia in this code.
*/
- if (!pat_enabled &&
+ if (!pat_enabled() &&
!(boot_cpu_has(X86_FEATURE_MTRR) ||
boot_cpu_has(X86_FEATURE_K6_MTRR) ||
boot_cpu_has(X86_FEATURE_CYRIX_ARR) ||
@@ -728,7 +718,7 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
* the type requested matches the type of first page in the range.
*/
if (is_ram) {
- if (!pat_enabled)
+ if (!pat_enabled())
return 0;
pcm = lookup_memtype(paddr);
@@ -843,7 +833,7 @@ int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
return ret;
}
- if (!pat_enabled)
+ if (!pat_enabled())
return 0;
/*
@@ -871,7 +861,7 @@ int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
{
enum page_cache_mode pcm;
- if (!pat_enabled)
+ if (!pat_enabled())
return 0;
/* Set prot based on lookup */
@@ -912,7 +902,7 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
pgprot_t pgprot_writecombine(pgprot_t prot)
{
- if (pat_enabled)
+ if (pat_enabled())
return __pgprot(pgprot_val(prot) |
cachemode2protval(_PAGE_CACHE_MODE_WC));
else
@@ -995,7 +985,7 @@ static const struct file_operations memtype_fops = {
static int __init pat_memtype_list_init(void)
{
- if (pat_enabled) {
+ if (pat_enabled()) {
debugfs_create_file("pat_memtype_list", S_IRUSR,
arch_debugfs_dir, NULL, &memtype_fops);
}
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 349c0d3..0a9f2ca 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -429,12 +429,12 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
* Caller can followup with UC MINUS request and add a WC mtrr if there
* is a free mtrr slot.
*/
- if (!pat_enabled && write_combine)
+ if (!pat_enabled() && write_combine)
return -EINVAL;
- if (pat_enabled && write_combine)
+ if (pat_enabled() && write_combine)
prot |= cachemode2protval(_PAGE_CACHE_MODE_WC);
- else if (pat_enabled || boot_cpu_data.x86 > 3)
+ else if (pat_enabled() || boot_cpu_data.x86 > 3)
/*
* ioremap() and ioremap_nocache() defaults to UC MINUS for now.
* To avoid attribute conflicts, request UC MINUS here
--
2.3.2.209.gd67f9d5.dirty
From: "Luis R. Rodriguez" <[email protected]>
Two Linux device drivers cannot work with PAT and the work
required to make them work is significant. There is not
enough motivation to convert these drivers over to use
PAT properly, the compromise reached is to let drivers
that cannot be ported to PAT check if PAT was enabled
and if so fail on probe with a recommendation to boot
with the "nopat" kernel parameter.
Cc: Andy Walls <[email protected]>
Cc: Doug Ledford <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Juergen Gross <[email protected]>
Cc: Daniel Vetter <[email protected]>
Cc: Dave Airlie <[email protected]>
Cc: Bjorn Helgaas <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Michael S. Tsirkin <[email protected]>
Cc: [email protected]
Cc: [email protected]
Signed-off-by: Luis R. Rodriguez <[email protected]>
---
arch/x86/mm/pat.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index b1f9a67..4524b78 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -45,6 +45,7 @@ bool pat_enabled(void)
{
return !!__pat_enabled;
}
+EXPORT_SYMBOL_GPL(pat_enabled);
int pat_debug_enable;
--
2.3.2.209.gd67f9d5.dirty
From: "Luis R. Rodriguez" <[email protected]>
We are burrying direct access to MTRR code support on
x86 in order to take advantage of PAT. In the future we
also want to make the default behaviour of ioremap_nocache()
to use strong UC, use of mtrr_add() on those systems
would make write-combining void.
In order to help both enable us to later make strong
UC default and in order to phase out direct MTRR access
code port the driver over to arch_phys_wc_add() and
annotate that the device driver requires systems to
boot with PAT disabled, with the nopat kernel parameter.
This is a worthy comprmise given that the hardware is
really rare these days, and perhaps only some lost souls
in some third world country are expected to be using this
feature of the device driver.
Acked-by: Andy Walls <[email protected]>
Cc: Andy Walls <[email protected]>
Cc: Doug Ledford <[email protected]>
Cc: Mauro Carvalho Chehab <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Cc: Suresh Siddha <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Juergen Gross <[email protected]>
Cc: Daniel Vetter <[email protected]>
Cc: Dave Airlie <[email protected]>
Cc: Bjorn Helgaas <[email protected]>
Cc: Antonino Daplas <[email protected]>
Cc: Jean-Christophe Plagniol-Villard <[email protected]>
Cc: Tomi Valkeinen <[email protected]>
Cc: Dave Hansen <[email protected]>
Cc: Arnd Bergmann <[email protected]>
Cc: Michael S. Tsirkin <[email protected]>
Cc: Stefan Bader <[email protected]>
Cc: Ville Syrjälä <[email protected]>
Cc: Mel Gorman <[email protected]>
Cc: Vlastimil Babka <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Davidlohr Bueso <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: Roger Pau Monné <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Signed-off-by: Luis R. Rodriguez <[email protected]>
---
drivers/media/pci/ivtv/Kconfig | 3 +++
drivers/media/pci/ivtv/ivtvfb.c | 58 ++++++++++++++++-------------------------
2 files changed, 26 insertions(+), 35 deletions(-)
diff --git a/drivers/media/pci/ivtv/Kconfig b/drivers/media/pci/ivtv/Kconfig
index dd6ee57e..b2a7f88 100644
--- a/drivers/media/pci/ivtv/Kconfig
+++ b/drivers/media/pci/ivtv/Kconfig
@@ -57,5 +57,8 @@ config VIDEO_FB_IVTV
This is used in the Hauppauge PVR-350 card. There is a driver
homepage at <http://www.ivtvdriver.org>.
+ If you have this hardware you will need to boot with PAT disabled
+ on your x86 systems, use the nopat kernel parameter.
+
To compile this driver as a module, choose M here: the
module will be called ivtvfb.
diff --git a/drivers/media/pci/ivtv/ivtvfb.c b/drivers/media/pci/ivtv/ivtvfb.c
index 9ff1230..7685ae3 100644
--- a/drivers/media/pci/ivtv/ivtvfb.c
+++ b/drivers/media/pci/ivtv/ivtvfb.c
@@ -44,8 +44,8 @@
#include <linux/ivtvfb.h>
#include <linux/slab.h>
-#ifdef CONFIG_MTRR
-#include <asm/mtrr.h>
+#ifdef CONFIG_X86_64
+#include <asm/pat.h>
#endif
#include "ivtv-driver.h"
@@ -155,12 +155,11 @@ struct osd_info {
/* Buffer size */
u32 video_buffer_size;
-#ifdef CONFIG_MTRR
/* video_base rounded down as required by hardware MTRRs */
unsigned long fb_start_aligned_physaddr;
/* video_base rounded up as required by hardware MTRRs */
unsigned long fb_end_aligned_physaddr;
-#endif
+ int wc_cookie;
/* Store the buffer offset */
int set_osd_coords_x;
@@ -1099,6 +1098,8 @@ static int ivtvfb_init_vidmode(struct ivtv *itv)
static int ivtvfb_init_io(struct ivtv *itv)
{
struct osd_info *oi = itv->osd_info;
+ /* Find the largest power of two that maps the whole buffer */
+ int size_shift = 31;
mutex_lock(&itv->serialize_lock);
if (ivtv_init_on_first_open(itv)) {
@@ -1132,29 +1133,16 @@ static int ivtvfb_init_io(struct ivtv *itv)
oi->video_pbase, oi->video_vbase,
oi->video_buffer_size / 1024);
-#ifdef CONFIG_MTRR
- {
- /* Find the largest power of two that maps the whole buffer */
- int size_shift = 31;
-
- while (!(oi->video_buffer_size & (1 << size_shift))) {
- size_shift--;
- }
- size_shift++;
- oi->fb_start_aligned_physaddr = oi->video_pbase & ~((1 << size_shift) - 1);
- oi->fb_end_aligned_physaddr = oi->video_pbase + oi->video_buffer_size;
- oi->fb_end_aligned_physaddr += (1 << size_shift) - 1;
- oi->fb_end_aligned_physaddr &= ~((1 << size_shift) - 1);
- if (mtrr_add(oi->fb_start_aligned_physaddr,
- oi->fb_end_aligned_physaddr - oi->fb_start_aligned_physaddr,
- MTRR_TYPE_WRCOMB, 1) < 0) {
- IVTVFB_INFO("disabled mttr\n");
- oi->fb_start_aligned_physaddr = 0;
- oi->fb_end_aligned_physaddr = 0;
- }
- }
-#endif
-
+ while (!(oi->video_buffer_size & (1 << size_shift)))
+ size_shift--;
+ size_shift++;
+ oi->fb_start_aligned_physaddr = oi->video_pbase & ~((1 << size_shift) - 1);
+ oi->fb_end_aligned_physaddr = oi->video_pbase + oi->video_buffer_size;
+ oi->fb_end_aligned_physaddr += (1 << size_shift) - 1;
+ oi->fb_end_aligned_physaddr &= ~((1 << size_shift) - 1);
+ oi->wc_cookie = arch_phys_wc_add(oi->fb_start_aligned_physaddr,
+ oi->fb_end_aligned_physaddr -
+ oi->fb_start_aligned_physaddr);
/* Blank the entire osd. */
memset_io(oi->video_vbase, 0, oi->video_buffer_size);
@@ -1172,14 +1160,7 @@ static void ivtvfb_release_buffers (struct ivtv *itv)
/* Release pseudo palette */
kfree(oi->ivtvfb_info.pseudo_palette);
-
-#ifdef CONFIG_MTRR
- if (oi->fb_end_aligned_physaddr) {
- mtrr_del(-1, oi->fb_start_aligned_physaddr,
- oi->fb_end_aligned_physaddr - oi->fb_start_aligned_physaddr);
- }
-#endif
-
+ arch_phys_wc_del(oi->wc_cookie);
kfree(oi);
itv->osd_info = NULL;
}
@@ -1284,6 +1265,13 @@ static int __init ivtvfb_init(void)
int registered = 0;
int err;
+#ifdef CONFIG_X86_64
+ if (WARN(pat_enabled(),
+ "ivtvfb needs PAT disabled, boot with nopat kernel parameter\n")) {
+ return EINVAL;
+ }
+#endif
+
if (ivtvfb_card_id < -1 || ivtvfb_card_id >= IVTV_MAX_CARDS) {
printk(KERN_ERR "ivtvfb: ivtvfb_card_id parameter is out of range (valid range: -1 - %d)\n",
IVTV_MAX_CARDS - 1);
--
2.3.2.209.gd67f9d5.dirty
From: "Luis R. Rodriguez" <[email protected]>
There is no good reason not to, we eventually delete it as well.
Cc: Toshi Kani <[email protected]>
Cc: Roland Dreier <[email protected]>
Cc: Sean Hefty <[email protected]>
Cc: Hal Rosenstock <[email protected]>
Cc: Suresh Siddha <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Juergen Gross <[email protected]>
Cc: Daniel Vetter <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Cc: Dave Airlie <[email protected]>
Cc: Antonino Daplas <[email protected]>
Cc: Jean-Christophe Plagniol-Villard <[email protected]>
Cc: Tomi Valkeinen <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Signed-off-by: Luis R. Rodriguez <[email protected]>
---
drivers/infiniband/hw/ipath/ipath_wc_x86_64.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c b/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c
index 4ad0b93..70c1f3a 100644
--- a/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c
+++ b/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c
@@ -127,7 +127,7 @@ int ipath_enable_wc(struct ipath_devdata *dd)
"(addr %llx, len=0x%llx)\n",
(unsigned long long) pioaddr,
(unsigned long long) piolen);
- cookie = mtrr_add(pioaddr, piolen, MTRR_TYPE_WRCOMB, 0);
+ cookie = mtrr_add(pioaddr, piolen, MTRR_TYPE_WRCOMB, 1);
if (cookie < 0) {
{
dev_info(&dd->pcidev->dev,
--
2.3.2.209.gd67f9d5.dirty
From: "Luis R. Rodriguez" <[email protected]>
We are burrying direct access to MTRR code support on
x86 in order to take advantage of PAT. In the future we
also want to make the default behaviour of ioremap_nocache()
to use strong UC, use of mtrr_add() on those systems
would make write-combining void.
In order to help both enable us to later make strong
UC default and in order to phase out direct MTRR access
code port the driver over to arch_phys_wc_add() and
annotate that the device driver requires systems to
boot with PAT disabled, with the nopat kernel parameter.
This is a worthy compromise given that the ipath device
driver powers the old HTX bus cards that only work in
AMD systems, while the newer IB/qib device driver
powers all PCI-e cards. The ipath device driver is
obsolete, hardware hard to find and because of this
this its a reasonable compromise to make to require
users of ipath to boot with nopat.
Acked-by: Doug Ledford <[email protected]>
Cc: Doug Ledford <[email protected]>
Cc: Andy Walls <[email protected]>
Cc: Hal Rosenstock <[email protected]>
Cc: Sean Hefty <[email protected]>
Cc: Suresh Siddha <[email protected]>
Cc: Rickard Strandqvist <[email protected]>
Cc: Mike Marciniszyn <[email protected]>
Cc: Roland Dreier <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Juergen Gross <[email protected]>
Cc: Daniel Vetter <[email protected]>
Cc: Dave Airlie <[email protected]>
Cc: Bjorn Helgaas <[email protected]>
Cc: Antonino Daplas <[email protected]>
Cc: Jean-Christophe Plagniol-Villard <[email protected]>
Cc: Tomi Valkeinen <[email protected]>
Cc: Ville Syrjälä <[email protected]>
Cc: Mel Gorman <[email protected]>
Cc: Vlastimil Babka <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Davidlohr Bueso <[email protected]>
Cc: Dave Hansen <[email protected]>
Cc: Arnd Bergmann <[email protected]>
Cc: Michael S. Tsirkin <[email protected]>
Cc: Stefan Bader <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: Roger Pau Monné <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Signed-off-by: Luis R. Rodriguez <[email protected]>
---
drivers/infiniband/hw/ipath/Kconfig | 3 ++
drivers/infiniband/hw/ipath/ipath_driver.c | 18 +++++++----
drivers/infiniband/hw/ipath/ipath_kernel.h | 4 +--
drivers/infiniband/hw/ipath/ipath_wc_x86_64.c | 43 ++++++---------------------
4 files changed, 26 insertions(+), 42 deletions(-)
diff --git a/drivers/infiniband/hw/ipath/Kconfig b/drivers/infiniband/hw/ipath/Kconfig
index 1d9bb11..8fe54ff 100644
--- a/drivers/infiniband/hw/ipath/Kconfig
+++ b/drivers/infiniband/hw/ipath/Kconfig
@@ -9,3 +9,6 @@ config INFINIBAND_IPATH
as IP-over-InfiniBand as well as with userspace applications
(in conjunction with InfiniBand userspace access).
For QLogic PCIe QLE based cards, use the QIB driver instead.
+
+ If you have this hardware you will need to boot with PAT disabled
+ on your x86-64 systems, use the nopat kernel parameter.
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index bd0caed..441cfe5 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -42,6 +42,9 @@
#include <linux/bitmap.h>
#include <linux/slab.h>
#include <linux/module.h>
+#ifdef CONFIG_X86_64
+#include <asm/pat.h>
+#endif
#include "ipath_kernel.h"
#include "ipath_verbs.h"
@@ -395,6 +398,14 @@ static int ipath_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
unsigned long long addr;
u32 bar0 = 0, bar1 = 0;
+#ifdef CONFIG_X86_64
+ if (WARN(pat_enabled(),
+ "ipath needs PAT disabled, boot with nopat kernel parameter\n")) {
+ ret = EINVAL;
+ goto bail;
+ }
+#endif
+
dd = ipath_alloc_devdata(pdev);
if (IS_ERR(dd)) {
ret = PTR_ERR(dd);
@@ -542,6 +553,7 @@ static int ipath_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
dd->ipath_kregbase = __ioremap(addr, len,
(_PAGE_NO_CACHE|_PAGE_WRITETHRU));
#else
+ /* XXX: split this properly to enable on PAT */
dd->ipath_kregbase = ioremap_nocache(addr, len);
#endif
@@ -587,12 +599,8 @@ static int ipath_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
ret = ipath_enable_wc(dd);
- if (ret) {
- ipath_dev_err(dd, "Write combining not enabled "
- "(err %d): performance may be poor\n",
- -ret);
+ if (ret)
ret = 0;
- }
ipath_verify_pioperf(dd);
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index e08db70..f0f9471 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -463,9 +463,7 @@ struct ipath_devdata {
/* offset in HT config space of slave/primary interface block */
u8 ipath_ht_slave_off;
/* for write combining settings */
- unsigned long ipath_wc_cookie;
- unsigned long ipath_wc_base;
- unsigned long ipath_wc_len;
+ int wc_cookie;
/* ref count for each pkey */
atomic_t ipath_pkeyrefs[4];
/* shadow copy of struct page *'s for exp tid pages */
diff --git a/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c b/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c
index 70c1f3a..7b6e4c8 100644
--- a/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c
+++ b/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c
@@ -37,7 +37,6 @@
*/
#include <linux/pci.h>
-#include <asm/mtrr.h>
#include <asm/processor.h>
#include "ipath_kernel.h"
@@ -122,27 +121,14 @@ int ipath_enable_wc(struct ipath_devdata *dd)
}
if (!ret) {
- int cookie;
- ipath_cdbg(VERBOSE, "Setting mtrr for chip to WC "
- "(addr %llx, len=0x%llx)\n",
- (unsigned long long) pioaddr,
- (unsigned long long) piolen);
- cookie = mtrr_add(pioaddr, piolen, MTRR_TYPE_WRCOMB, 1);
- if (cookie < 0) {
- {
- dev_info(&dd->pcidev->dev,
- "mtrr_add() WC for PIO bufs "
- "failed (%d)\n",
- cookie);
- ret = -EINVAL;
- }
- } else {
- ipath_cdbg(VERBOSE, "Set mtrr for chip to WC, "
- "cookie is %d\n", cookie);
- dd->ipath_wc_cookie = cookie;
- dd->ipath_wc_base = (unsigned long) pioaddr;
- dd->ipath_wc_len = (unsigned long) piolen;
- }
+ dd->wc_cookie = arch_phys_wc_add(pioaddr, piolen);
+ if (dd->wc_cookie < 0) {
+ ipath_dev_err(dd, "Seting mtrr failed on PIO buffers\n");
+ ret = -ENODEV;
+ } else if (dd->wc_cookie == 0)
+ ipath_cdbg(VERBOSE, "Set mtrr for chip to WC not needed\n");
+ else
+ ipath_cdbg(VERBOSE, "Set mtrr for chip to WC\n");
}
return ret;
@@ -154,16 +140,5 @@ int ipath_enable_wc(struct ipath_devdata *dd)
*/
void ipath_disable_wc(struct ipath_devdata *dd)
{
- if (dd->ipath_wc_cookie) {
- int r;
- ipath_cdbg(VERBOSE, "undoing WCCOMB on pio buffers\n");
- r = mtrr_del(dd->ipath_wc_cookie, dd->ipath_wc_base,
- dd->ipath_wc_len);
- if (r < 0)
- dev_info(&dd->pcidev->dev,
- "mtrr_del(%lx, %lx, %lx) failed: %d\n",
- dd->ipath_wc_cookie, dd->ipath_wc_base,
- dd->ipath_wc_len, r);
- dd->ipath_wc_cookie = 0; /* even on failure */
- }
+ arch_phys_wc_del(dd->wc_cookie);
}
--
2.3.2.209.gd67f9d5.dirty
On Wed, Apr 29, 2015 at 02:44:20PM -0700, Luis R. Rodriguez wrote:
> From: "Luis R. Rodriguez" <[email protected]>
>
Please add a commit message, albeit a single trivial sentence.
> Cc: Andy Walls <[email protected]>
> Cc: Doug Ledford <[email protected]>
> Cc: Ingo Molnar <[email protected]>
> Cc: Thomas Gleixner <[email protected]>
> Cc: Juergen Gross <[email protected]>
> Cc: Daniel Vetter <[email protected]>
> Cc: Dave Airlie <[email protected]>
> Cc: Bjorn Helgaas <[email protected]>
> Cc: Borislav Petkov <[email protected]>
> Cc: Michael S. Tsirkin <[email protected]>
> Cc: [email protected]
> Cc: [email protected]
> Signed-off-by: Luis R. Rodriguez <[email protected]>
> ---
> arch/x86/mm/pat.c | 47 +++++++++++++++++++++++------------------------
> 1 file changed, 23 insertions(+), 24 deletions(-)
>
> diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
> index 372ad42..917155d 100644
> --- a/arch/x86/mm/pat.c
> +++ b/arch/x86/mm/pat.c
#define pr_fmt(fmt) "PAT: " fmt
or so, so that it has a prefix and lookers can know where the message
comes from.
> @@ -39,7 +39,7 @@ int __read_mostly pat_enabled = 1;
> static inline void pat_disable(const char *reason)
> {
> pat_enabled = 0;
> - printk(KERN_INFO "%s\n", reason);
> + pr_info("%s\n", reason);
> }
>
> static int __init nopat(char *str)
> @@ -211,8 +211,7 @@ void pat_init(void)
> * switched to PAT on the boot CPU. We have no way to
> * undo PAT.
> */
> - printk(KERN_ERR "PAT enabled, "
> - "but not supported by secondary CPU\n");
> + pr_err("PAT enabled, but not supported by secondary CPU\n");
> BUG();
> }
> }
> @@ -451,9 +450,9 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type,
>
> err = rbt_memtype_check_insert(new, new_type);
> if (err) {
> - printk(KERN_INFO "reserve_memtype failed [mem %#010Lx-%#010Lx], track %s, req %s\n",
> - start, end - 1,
> - cattr_name(new->type), cattr_name(req_type));
> + pr_info("reserve_memtype failed [mem %#010Lx-%#010Lx], track %s, req %s\n",
> + start, end - 1,
> + cattr_name(new->type), cattr_name(req_type));
> kfree(new);
> spin_unlock(&memtype_lock);
>
> @@ -462,9 +461,9 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type,
>
> spin_unlock(&memtype_lock);
>
> - dprintk("reserve_memtype added [mem %#010Lx-%#010Lx], track %s, req %s, ret %s\n",
> - start, end - 1, cattr_name(new->type), cattr_name(req_type),
> - new_type ? cattr_name(*new_type) : "-");
This was using "debugpat"...
> + pr_debug("reserve_memtype added [mem %#010Lx-%#010Lx], track %s, req %s, ret %s\n",
> + start, end - 1, cattr_name(new->type), cattr_name(req_type),
> + new_type ? cattr_name(*new_type) : "-");
... and now that pr_debug() ugliness which I can't enable at boot time.
>From looking at Documentation/dynamic-debug-howto.txt apparently I can
using dyndbg= but let's not change that.
> return err;
> }
> @@ -497,14 +496,15 @@ int free_memtype(u64 start, u64 end)
> spin_unlock(&memtype_lock);
>
> if (!entry) {
> - printk(KERN_INFO "%s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n",
> - current->comm, current->pid, start, end - 1);
> + pr_info("%s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n",
> + current->comm, current->pid, start, end - 1);
> return -EINVAL;
> }
>
> kfree(entry);
>
> - dprintk("free_memtype request [mem %#010Lx-%#010Lx]\n", start, end - 1);
> + pr_debug("free_memtype request [mem %#010Lx-%#010Lx]\n",
> + start, end - 1);
Ditto.
>
> return 0;
> }
> @@ -628,8 +628,8 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size)
>
> while (cursor < to) {
> if (!devmem_is_allowed(pfn)) {
> - printk(KERN_INFO "Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx], PAT prevents it\n",
> - current->comm, from, to - 1);
> + pr_info("Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx], PAT prevents it\n",
> + current->comm, from, to - 1);
> return 0;
> }
> cursor += PAGE_SIZE;
> @@ -698,8 +698,7 @@ int kernel_map_sync_memtype(u64 base, unsigned long size,
> size;
>
> if (ioremap_change_attr((unsigned long)__va(base), id_sz, pcm) < 0) {
> - printk(KERN_INFO "%s:%d ioremap_change_attr failed %s "
> - "for [mem %#010Lx-%#010Lx]\n",
> + pr_info("%s:%d ioremap_change_attr failed %s for [mem %#010Lx-%#010Lx]\n",
> current->comm, current->pid,
> cattr_name(pcm),
> base, (unsigned long long)(base + size-1));
> @@ -734,7 +733,7 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
>
> pcm = lookup_memtype(paddr);
> if (want_pcm != pcm) {
> - printk(KERN_WARNING "%s:%d map pfn RAM range req %s for [mem %#010Lx-%#010Lx], got %s\n",
> + pr_warn("%s:%d map pfn RAM range req %s for [mem %#010Lx-%#010Lx], got %s\n",
> current->comm, current->pid,
> cattr_name(want_pcm),
> (unsigned long long)paddr,
> @@ -755,13 +754,13 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
> if (strict_prot ||
> !is_new_memtype_allowed(paddr, size, want_pcm, pcm)) {
> free_memtype(paddr, paddr + size);
> - printk(KERN_ERR "%s:%d map pfn expected mapping type %s"
> - " for [mem %#010Lx-%#010Lx], got %s\n",
> - current->comm, current->pid,
> - cattr_name(want_pcm),
> - (unsigned long long)paddr,
> - (unsigned long long)(paddr + size - 1),
> - cattr_name(pcm));
> + pr_err(KERN_ERR "%s:%d map pfn expected mapping type %s"
KERN_ERR forgotten.
Use checkpatch but don't take it too seriously :-)
> + " for [mem %#010Lx-%#010Lx], got %s\n",
> + current->comm, current->pid,
> + cattr_name(want_pcm),
> + (unsigned long long)paddr,
> + (unsigned long long)(paddr + size - 1),
> + cattr_name(pcm));
> return -EINVAL;
> }
> /*
> --
> 2.3.2.209.gd67f9d5.dirty
>
--
Regards/Gruss,
Boris.
ECO tip #101: Trim your mails when you reply.
--
On Wed, Apr 29, 2015 at 02:44:21PM -0700, Luis R. Rodriguez wrote:
> From: "Luis R. Rodriguez" <[email protected]>
So this one is missing a commit message too but we need to talk about
this. Why are we adding __read_mostly in the macro? This would put every
param declared this way into __section__(".data..read_mostly") and I'm
not really convinced that is needed.
Those setup params get accessed a couple of times only during boot, in
non-critical paths so we don't really care.
If you care about pat_enabled being read a lot, you could do:
int __read_mostly pat_enabled;
only for it.
But for every param declared this way? I'm not sure we want that...
> Cc: Andy Walls <[email protected]>
> Cc: Doug Ledford <[email protected]>
> Cc: Andy Lutomirski <[email protected]>
> Cc: Ingo Molnar <[email protected]>
> Cc: Thomas Gleixner <[email protected]>
> Cc: Juergen Gross <[email protected]>
> Cc: Daniel Vetter <[email protected]>
> Cc: Dave Airlie <[email protected]>
> Cc: Bjorn Helgaas <[email protected]>
> Cc: Borislav Petkov <[email protected]>
> Cc: Michael S. Tsirkin <[email protected]>
> Cc: [email protected]
> Cc: [email protected]
> Signed-off-by: Luis R. Rodriguez <[email protected]>
> ---
> include/linux/init.h | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/include/linux/init.h b/include/linux/init.h
> index 21b6d76..a0385cc 100644
> --- a/include/linux/init.h
> +++ b/include/linux/init.h
> @@ -272,7 +272,7 @@ struct obs_kernel_param {
>
> #define early_param_on_off(str_on, str_off, var, config) \
> \
> - int var = IS_ENABLED(config); \
> + int __read_mostly var = IS_ENABLED(config); \
> \
> static int __init parse_##var##_on(char *arg) \
> { \
--
Regards/Gruss,
Boris.
ECO tip #101: Trim your mails when you reply.
--
On Thu, Apr 30, 2015 at 03:56:41PM +0200, Borislav Petkov wrote:
> On Wed, Apr 29, 2015 at 02:44:20PM -0700, Luis R. Rodriguez wrote:
> > From: "Luis R. Rodriguez" <[email protected]>
> >
>
> Please add a commit message, albeit a single trivial sentence.
>
> > Cc: Andy Walls <[email protected]>
> > Cc: Doug Ledford <[email protected]>
> > Cc: Ingo Molnar <[email protected]>
> > Cc: Thomas Gleixner <[email protected]>
> > Cc: Juergen Gross <[email protected]>
> > Cc: Daniel Vetter <[email protected]>
> > Cc: Dave Airlie <[email protected]>
> > Cc: Bjorn Helgaas <[email protected]>
> > Cc: Borislav Petkov <[email protected]>
> > Cc: Michael S. Tsirkin <[email protected]>
> > Cc: [email protected]
> > Cc: [email protected]
> > Signed-off-by: Luis R. Rodriguez <[email protected]>
> > ---
> > arch/x86/mm/pat.c | 47 +++++++++++++++++++++++------------------------
> > 1 file changed, 23 insertions(+), 24 deletions(-)
> >
> > diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
> > index 372ad42..917155d 100644
> > --- a/arch/x86/mm/pat.c
> > +++ b/arch/x86/mm/pat.c
>
> #define pr_fmt(fmt) "PAT: " fmt
>
> or so, so that it has a prefix and lookers can know where the message
> comes from.
Sure, I didn't as I thought this was already automatically going to be
added for is, after I added the build complained about it being
defined. I checked an we udef it and redefine it for some other x86
code so will do the same. I'll use "x86-PAT".
> > @@ -39,7 +39,7 @@ int __read_mostly pat_enabled = 1;
> > static inline void pat_disable(const char *reason)
> > {
> > pat_enabled = 0;
> > - printk(KERN_INFO "%s\n", reason);
> > + pr_info("%s\n", reason);
> > }
> >
> > static int __init nopat(char *str)
> > @@ -211,8 +211,7 @@ void pat_init(void)
> > * switched to PAT on the boot CPU. We have no way to
> > * undo PAT.
> > */
> > - printk(KERN_ERR "PAT enabled, "
> > - "but not supported by secondary CPU\n");
> > + pr_err("PAT enabled, but not supported by secondary CPU\n");
> > BUG();
> > }
> > }
> > @@ -451,9 +450,9 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type,
> >
> > err = rbt_memtype_check_insert(new, new_type);
> > if (err) {
> > - printk(KERN_INFO "reserve_memtype failed [mem %#010Lx-%#010Lx], track %s, req %s\n",
> > - start, end - 1,
> > - cattr_name(new->type), cattr_name(req_type));
> > + pr_info("reserve_memtype failed [mem %#010Lx-%#010Lx], track %s, req %s\n",
> > + start, end - 1,
> > + cattr_name(new->type), cattr_name(req_type));
> > kfree(new);
> > spin_unlock(&memtype_lock);
> >
> > @@ -462,9 +461,9 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type,
> >
> > spin_unlock(&memtype_lock);
> >
> > - dprintk("reserve_memtype added [mem %#010Lx-%#010Lx], track %s, req %s, ret %s\n",
> > - start, end - 1, cattr_name(new->type), cattr_name(req_type),
> > - new_type ? cattr_name(*new_type) : "-");
>
> This was using "debugpat"...
>
> > + pr_debug("reserve_memtype added [mem %#010Lx-%#010Lx], track %s, req %s, ret %s\n",
> > + start, end - 1, cattr_name(new->type), cattr_name(req_type),
> > + new_type ? cattr_name(*new_type) : "-");
>
> ... and now that pr_debug() ugliness which I can't enable at boot time.
> From looking at Documentation/dynamic-debug-howto.txt apparently I can
> using dyndbg= but let's not change that.
OK sure, I'll leave this and mention the few remaining non-converted stragglers
on the commit log, I'll also add a "x86-PAT-debug" prefix to dprintk().
> > return err;
> > }
> > @@ -497,14 +496,15 @@ int free_memtype(u64 start, u64 end)
> > spin_unlock(&memtype_lock);
> >
> > if (!entry) {
> > - printk(KERN_INFO "%s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n",
> > - current->comm, current->pid, start, end - 1);
> > + pr_info("%s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n",
> > + current->comm, current->pid, start, end - 1);
> > return -EINVAL;
> > }
> >
> > kfree(entry);
> >
> > - dprintk("free_memtype request [mem %#010Lx-%#010Lx]\n", start, end - 1);
> > + pr_debug("free_memtype request [mem %#010Lx-%#010Lx]\n",
> > + start, end - 1);
>
> Ditto.
OK, fixed.
> >
> > return 0;
> > }
> > @@ -628,8 +628,8 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size)
> >
> > while (cursor < to) {
> > if (!devmem_is_allowed(pfn)) {
> > - printk(KERN_INFO "Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx], PAT prevents it\n",
> > - current->comm, from, to - 1);
> > + pr_info("Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx], PAT prevents it\n",
> > + current->comm, from, to - 1);
> > return 0;
> > }
> > cursor += PAGE_SIZE;
> > @@ -698,8 +698,7 @@ int kernel_map_sync_memtype(u64 base, unsigned long size,
> > size;
> >
> > if (ioremap_change_attr((unsigned long)__va(base), id_sz, pcm) < 0) {
> > - printk(KERN_INFO "%s:%d ioremap_change_attr failed %s "
> > - "for [mem %#010Lx-%#010Lx]\n",
> > + pr_info("%s:%d ioremap_change_attr failed %s for [mem %#010Lx-%#010Lx]\n",
> > current->comm, current->pid,
> > cattr_name(pcm),
> > base, (unsigned long long)(base + size-1));
> > @@ -734,7 +733,7 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
> >
> > pcm = lookup_memtype(paddr);
> > if (want_pcm != pcm) {
> > - printk(KERN_WARNING "%s:%d map pfn RAM range req %s for [mem %#010Lx-%#010Lx], got %s\n",
> > + pr_warn("%s:%d map pfn RAM range req %s for [mem %#010Lx-%#010Lx], got %s\n",
> > current->comm, current->pid,
> > cattr_name(want_pcm),
> > (unsigned long long)paddr,
> > @@ -755,13 +754,13 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
> > if (strict_prot ||
> > !is_new_memtype_allowed(paddr, size, want_pcm, pcm)) {
> > free_memtype(paddr, paddr + size);
> > - printk(KERN_ERR "%s:%d map pfn expected mapping type %s"
> > - " for [mem %#010Lx-%#010Lx], got %s\n",
> > - current->comm, current->pid,
> > - cattr_name(want_pcm),
> > - (unsigned long long)paddr,
> > - (unsigned long long)(paddr + size - 1),
> > - cattr_name(pcm));
> > + pr_err(KERN_ERR "%s:%d map pfn expected mapping type %s"
>
> KERN_ERR forgotten.
>
> Use checkpatch but don't take it too seriously :-)
That was a fat finger rather. Fixed, thanks.
Luis
On Thu, Apr 30, 2015 at 06:00:44PM +0200, Borislav Petkov wrote:
> On Wed, Apr 29, 2015 at 02:44:21PM -0700, Luis R. Rodriguez wrote:
> > From: "Luis R. Rodriguez" <[email protected]>
>
> So this one is missing a commit message too but we need to talk about
> this. Why are we adding __read_mostly in the macro? This would put every
> param declared this way into __section__(".data..read_mostly") and I'm
> not really convinced that is needed.
>
> Those setup params get accessed a couple of times only during boot, in
> non-critical paths so we don't really care.
>
> If you care about pat_enabled being read a lot, you could do:
>
> int __read_mostly pat_enabled;
>
> only for it.
>
> But for every param declared this way? I'm not sure we want that...
I'm glad you bring this up, Christoph can you or anyone else can you provide
advise since you added __read_mostly ?
I added this to early_param_on_off() given I saw a prevalence of __read_mostly
in similar taste elsewhere in the kernel on kernel parameters, and since this
patch tried to convert one kernel parameter which used __read_mostly over to
early_param_on_off() I decided to add it and now we need to really decide if
its ideal for kernel parameters or not.
In this particular case early_param_on_off() will be used for boot time kernel
parameters which we do not intend on dynamically change a lot at run time, we
at run time in early boot have to disable a feature but this will typically
happen only once.
Luis
> > diff --git a/include/linux/init.h b/include/linux/init.h
> > index 21b6d76..a0385cc 100644
> > --- a/include/linux/init.h
> > +++ b/include/linux/init.h
> > @@ -272,7 +272,7 @@ struct obs_kernel_param {
> >
> > #define early_param_on_off(str_on, str_off, var, config) \
> > \
> > - int var = IS_ENABLED(config); \
> > + int __read_mostly var = IS_ENABLED(config); \
> > \
> > static int __init parse_##var##_on(char *arg) \
> > { \
On Thu, Apr 30, 2015 at 11:32 AM, Luis R. Rodriguez <[email protected]> wrote:
> , we
> at run time in early boot have to disable a feature but this will typically
> happen only once.
Sorry I meant that we may need to disable a feature early at boot
time, but this typically happens only once with the variable.
Luis
On Thu, Apr 30, 2015 at 08:32:00PM +0200, Luis R. Rodriguez wrote:
> In this particular case early_param_on_off() will be used for boot
> time kernel parameters which we do not intend on dynamically change a
> lot at run time, we
It is not about whether you're going to dynamically change it but more
about how often it is accessed. If it is accessed only a couple of times
tops during boot and then never again during the system lifetime - which
is generally the case for most boot params - then you certainly don't
want to pollute the read.mostly section with dead weight. You can just
as well use a stinking normal variable.
--
Regards/Gruss,
Boris.
ECO tip #101: Trim your mails when you reply.
--
On Thu, 30 Apr 2015, Luis R. Rodriguez wrote:
> > But for every param declared this way? I'm not sure we want that...
>
> I'm glad you bring this up, Christoph can you or anyone else can you provide
> advise since you added __read_mostly ?
Well it should be reseved for data that is actually used frequently in hot
paths. Initially we only moved variables into __read_mostly after we saw
in performance traces that there was contention on an item because a
neighboring variable was frequently written to.
You want the __read_mostly data to be tighly packed. In the best case
multiple frequently read variables for a hot path will be next to each
other in order to reduce the number of cachelines needed to execute a
critical path. That means being selective and aware of which variables
actually benefit.