The PAT bit gets relocated to bit 12 when PUD and PMD mappings are
used. This bit 12, however, is not covered by PTE_FLAGS_MASK, which
is corrently used for masking the flag bits for all cases.
Patch 1/2 fixes pud_flags() and pmd_flags() to handle the PAT bit
when PUD and PMD mappings are used.
Patch 2/2 fixes /sys/kernel/debug/kernel_page_tables to show the
PAT bit properly.
Note, the PAT bit is first enabled in 4.2-rc1 with WT mappings.
---
Toshi Kani (2):
1/2 x86: Fix pXd_flags() to handle _PAGE_PAT_LARGE
2/2 x86, mm: Fix page table dump to show PAT bit
---
arch/x86/include/asm/pgtable_types.h | 16 ++++++++++++---
arch/x86/mm/dump_pagetables.c | 39 +++++++++++++++++++-----------------
2 files changed, 34 insertions(+), 21 deletions(-)
The PAT bit gets relocated to bit 12 when PUD and PMD mappings are
used. This bit 12, however, is not covered by PTE_FLAGS_MASK, which
is corrently used for masking the flag bits for all cases.
Fix pud_flags() and pmd_flags() to cover the PAT bit, _PAGE_PAT_LARGE,
when they are used to map a large page with _PAGE_PSE set.
Signed-off-by: Toshi Kani <[email protected]>
Cc: Juergen Gross <[email protected]>
Cc: Konrad Wilk <[email protected]>
Cc: Robert Elliott <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: H. Peter Anvin <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Andrew Morton <[email protected]>
---
arch/x86/include/asm/pgtable_types.h | 16 +++++++++++++---
1 file changed, 13 insertions(+), 3 deletions(-)
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 13f310b..caaf45c 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -212,9 +212,13 @@ enum page_cache_mode {
/* PTE_PFN_MASK extracts the PFN from a (pte|pmd|pud|pgd)val_t */
#define PTE_PFN_MASK ((pteval_t)PHYSICAL_PAGE_MASK)
-/* PTE_FLAGS_MASK extracts the flags from a (pte|pmd|pud|pgd)val_t */
+/* Extracts the flags from a (pte|pmd|pud|pgd)val_t of a 4KB page */
#define PTE_FLAGS_MASK (~PTE_PFN_MASK)
+/* Extracts the flags from a (pmd|pud)val_t of a (1GB|2MB) page */
+#define PMD_FLAGS_MASK_LARGE ((~PTE_PFN_MASK) | _PAGE_PAT_LARGE)
+#define PUD_FLAGS_MASK_LARGE ((~PTE_PFN_MASK) | _PAGE_PAT_LARGE)
+
typedef struct pgprot { pgprotval_t pgprot; } pgprot_t;
typedef struct { pgdval_t pgd; } pgd_t;
@@ -278,12 +282,18 @@ static inline pmdval_t native_pmd_val(pmd_t pmd)
static inline pudval_t pud_flags(pud_t pud)
{
- return native_pud_val(pud) & PTE_FLAGS_MASK;
+ if (native_pud_val(pud) & _PAGE_PSE)
+ return native_pud_val(pud) & PUD_FLAGS_MASK_LARGE;
+ else
+ return native_pud_val(pud) & PTE_FLAGS_MASK;
}
static inline pmdval_t pmd_flags(pmd_t pmd)
{
- return native_pmd_val(pmd) & PTE_FLAGS_MASK;
+ if (native_pmd_val(pmd) & _PAGE_PSE)
+ return native_pmd_val(pmd) & PMD_FLAGS_MASK_LARGE;
+ else
+ return native_pmd_val(pmd) & PTE_FLAGS_MASK;
}
static inline pte_t native_make_pte(pteval_t val)
/sys/kernel/debug/kernel_page_tables does not show the PAT bit
for PUD and PMD mappings. This is because walk_pud_level(),
walk_pmd_level() and note_page() mask the flags with PTE_FLAGS_MASK,
which does not cover their PAT bit, _PAGE_PAT_LARGE.
Fix it by replacing the use of PTE_FLAGS_MASK with pXd_flags(),
which mask the flags properly.
Change also to show the PAT bit as "PAT" to be consistent with
other bits.
Reported-by: Robert Elliott <[email protected]>
Signed-off-by: Toshi Kani <[email protected]>
Cc: Juergen Gross <[email protected]>
Cc: Konrad Wilk <[email protected]>
Cc: Robert Elliott <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: H. Peter Anvin <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Andrew Morton <[email protected]>
---
arch/x86/mm/dump_pagetables.c | 39 +++++++++++++++++++++------------------
1 file changed, 21 insertions(+), 18 deletions(-)
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index f0cedf3..71ab2d7 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -155,7 +155,7 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg)
pt_dump_cont_printf(m, dmsg, " ");
if ((level == 4 && pr & _PAGE_PAT) ||
((level == 3 || level == 2) && pr & _PAGE_PAT_LARGE))
- pt_dump_cont_printf(m, dmsg, "pat ");
+ pt_dump_cont_printf(m, dmsg, "PAT ");
else
pt_dump_cont_printf(m, dmsg, " ");
if (pr & _PAGE_GLOBAL)
@@ -198,8 +198,8 @@ static void note_page(struct seq_file *m, struct pg_state *st,
* we have now. "break" is either changing perms, levels or
* address space marker.
*/
- prot = pgprot_val(new_prot) & PTE_FLAGS_MASK;
- cur = pgprot_val(st->current_prot) & PTE_FLAGS_MASK;
+ prot = pgprot_val(new_prot);
+ cur = pgprot_val(st->current_prot);
if (!st->level) {
/* First entry */
@@ -269,13 +269,13 @@ static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr,
{
int i;
pte_t *start;
+ pgprotval_t prot;
start = (pte_t *) pmd_page_vaddr(addr);
for (i = 0; i < PTRS_PER_PTE; i++) {
- pgprot_t prot = pte_pgprot(*start);
-
+ prot = pte_flags(*start);
st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT);
- note_page(m, st, prot, 4);
+ note_page(m, st, __pgprot(prot), 4);
start++;
}
}
@@ -287,18 +287,19 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr,
{
int i;
pmd_t *start;
+ pgprotval_t prot;
start = (pmd_t *) pud_page_vaddr(addr);
for (i = 0; i < PTRS_PER_PMD; i++) {
st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT);
if (!pmd_none(*start)) {
- pgprotval_t prot = pmd_val(*start) & PTE_FLAGS_MASK;
-
- if (pmd_large(*start) || !pmd_present(*start))
+ if (pmd_large(*start) || !pmd_present(*start)) {
+ prot = pmd_flags(*start);
note_page(m, st, __pgprot(prot), 3);
- else
+ } else {
walk_pte_level(m, st, *start,
P + i * PMD_LEVEL_MULT);
+ }
} else
note_page(m, st, __pgprot(0), 3);
start++;
@@ -318,19 +319,20 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
{
int i;
pud_t *start;
+ pgprotval_t prot;
start = (pud_t *) pgd_page_vaddr(addr);
for (i = 0; i < PTRS_PER_PUD; i++) {
st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT);
if (!pud_none(*start)) {
- pgprotval_t prot = pud_val(*start) & PTE_FLAGS_MASK;
-
- if (pud_large(*start) || !pud_present(*start))
+ if (pud_large(*start) || !pud_present(*start)) {
+ prot = pud_flags(*start);
note_page(m, st, __pgprot(prot), 2);
- else
+ } else {
walk_pmd_level(m, st, *start,
P + i * PUD_LEVEL_MULT);
+ }
} else
note_page(m, st, __pgprot(0), 2);
@@ -351,6 +353,7 @@ void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
#else
pgd_t *start = swapper_pg_dir;
#endif
+ pgprotval_t prot;
int i;
struct pg_state st = {};
@@ -362,13 +365,13 @@ void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
for (i = 0; i < PTRS_PER_PGD; i++) {
st.current_address = normalize_addr(i * PGD_LEVEL_MULT);
if (!pgd_none(*start)) {
- pgprotval_t prot = pgd_val(*start) & PTE_FLAGS_MASK;
-
- if (pgd_large(*start) || !pgd_present(*start))
+ if (pgd_large(*start) || !pgd_present(*start)) {
+ prot = pgd_flags(*start);
note_page(m, &st, __pgprot(prot), 1);
- else
+ } else {
walk_pud_level(m, &st, *start,
i * PGD_LEVEL_MULT);
+ }
} else
note_page(m, &st, __pgprot(0), 1);
On 07/09/2015 07:03 PM, Toshi Kani wrote:
> The PAT bit gets relocated to bit 12 when PUD and PMD mappings are
> used. This bit 12, however, is not covered by PTE_FLAGS_MASK, which
> is corrently used for masking the flag bits for all cases.
>
> Fix pud_flags() and pmd_flags() to cover the PAT bit, _PAGE_PAT_LARGE,
> when they are used to map a large page with _PAGE_PSE set.
>
> Signed-off-by: Toshi Kani <[email protected]>
> Cc: Juergen Gross <[email protected]>
> Cc: Konrad Wilk <[email protected]>
> Cc: Robert Elliott <[email protected]>
> Cc: Thomas Gleixner <[email protected]>
> Cc: H. Peter Anvin <[email protected]>
> Cc: Ingo Molnar <[email protected]>
> Cc: Borislav Petkov <[email protected]>
> Cc: Andrew Morton <[email protected]>
> ---
> arch/x86/include/asm/pgtable_types.h | 16 +++++++++++++---
> 1 file changed, 13 insertions(+), 3 deletions(-)
>
> diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
> index 13f310b..caaf45c 100644
> --- a/arch/x86/include/asm/pgtable_types.h
> +++ b/arch/x86/include/asm/pgtable_types.h
> @@ -212,9 +212,13 @@ enum page_cache_mode {
> /* PTE_PFN_MASK extracts the PFN from a (pte|pmd|pud|pgd)val_t */
> #define PTE_PFN_MASK ((pteval_t)PHYSICAL_PAGE_MASK)
>
> -/* PTE_FLAGS_MASK extracts the flags from a (pte|pmd|pud|pgd)val_t */
> +/* Extracts the flags from a (pte|pmd|pud|pgd)val_t of a 4KB page */
> #define PTE_FLAGS_MASK (~PTE_PFN_MASK)
>
> +/* Extracts the flags from a (pmd|pud)val_t of a (1GB|2MB) page */
> +#define PMD_FLAGS_MASK_LARGE ((~PTE_PFN_MASK) | _PAGE_PAT_LARGE)
> +#define PUD_FLAGS_MASK_LARGE ((~PTE_PFN_MASK) | _PAGE_PAT_LARGE)
> +
> typedef struct pgprot { pgprotval_t pgprot; } pgprot_t;
>
> typedef struct { pgdval_t pgd; } pgd_t;
> @@ -278,12 +282,18 @@ static inline pmdval_t native_pmd_val(pmd_t pmd)
>
> static inline pudval_t pud_flags(pud_t pud)
> {
> - return native_pud_val(pud) & PTE_FLAGS_MASK;
> + if (native_pud_val(pud) & _PAGE_PSE)
> + return native_pud_val(pud) & PUD_FLAGS_MASK_LARGE;
> + else
> + return native_pud_val(pud) & PTE_FLAGS_MASK;
> }
>
> static inline pmdval_t pmd_flags(pmd_t pmd)
> {
> - return native_pmd_val(pmd) & PTE_FLAGS_MASK;
> + if (native_pmd_val(pmd) & _PAGE_PSE)
> + return native_pmd_val(pmd) & PMD_FLAGS_MASK_LARGE;
> + else
> + return native_pmd_val(pmd) & PTE_FLAGS_MASK;
> }
Hmm, I think this covers only half of the problem. pud_pfn() and
pmd_pfn() will return wrong results for large pages with PAT bit
set as well.
I'd rather use something like:
static inline unsigned long pmd_pfn_mask(pmd_t pmd)
{
if (pmd_large(pmd))
return PMD_PAGE_MASK & PHYSICAL_PAGE_MASK;
else
return PTE_PFN_MASK;
}
static inline unsigned long pmd_flags_mask(pmd_t pmd)
{
if (pmd_large(pmd))
return ~(PMD_PAGE_MASK & PHYSICAL_PAGE_MASK);
else
return ~PTE_PFN_MASK;
}
static inline unsigned long pmd_pfn(pmd_t pmd)
{
return (pmd_val(pmd) & pmd_pfn_mask(pmd)) >> PAGE_SHIFT;
}
static inline pmdval_t pmd_flags(pmd_t pmd)
{
return native_pmd_val(pmd) & ~pmd_flags_mask(pmd);
}
Juergen
On Fri, 2015-07-10 at 05:57 +0200, Juergen Gross wrote:
> On 07/09/2015 07:03 PM, Toshi Kani wrote:
> > The PAT bit gets relocated to bit 12 when PUD and PMD mappings are
> > used. This bit 12, however, is not covered by PTE_FLAGS_MASK,
> > which
> > is corrently used for masking the flag bits for all cases.
> >
> > Fix pud_flags() and pmd_flags() to cover the PAT bit,
> > _PAGE_PAT_LARGE,
> > when they are used to map a large page with _PAGE_PSE set.
:
> Hmm, I think this covers only half of the problem. pud_pfn() and
> pmd_pfn() will return wrong results for large pages with PAT bit
> set as well.
>
> I'd rather use something like:
>
> static inline unsigned long pmd_pfn_mask(pmd_t pmd)
> {
> if (pmd_large(pmd))
> return PMD_PAGE_MASK & PHYSICAL_PAGE_MASK;
> else
> return PTE_PFN_MASK;
> }
>
> static inline unsigned long pmd_flags_mask(pmd_t pmd)
> {
> if (pmd_large(pmd))
> return ~(PMD_PAGE_MASK & PHYSICAL_PAGE_MASK);
> else
> return ~PTE_PFN_MASK;
> }
>
> static inline unsigned long pmd_pfn(pmd_t pmd)
> {
> return (pmd_val(pmd) & pmd_pfn_mask(pmd)) >> PAGE_SHIFT;
> }
>
> static inline pmdval_t pmd_flags(pmd_t pmd)
> {
> return native_pmd_val(pmd) & ~pmd_flags_mask(pmd);
> }
Thanks for the suggestion! I agree that it is cleaner in this way. I
am updating the patches and found the following changes are needed as
well:
- Define PGTABLE_LEVELS to 2 in
"arch/x86/entry/vdso/vdso32/vclock_gettime.c". This file redefines to
X86_32. Setting to 2 levels (since X86_PAE is not set) allows <asm
-generic/pgtable-nopmd.h> be included to define PMD_SHIFT.
- Move PUD_PAGE_SIZE & PUD_PAGE_MASK from <asm/page_64_types.h> to
<asm/page_types.h>. This allows X86_32 to refer the PUD macros.
- Nit: pmd_large() cannot be used in pmd_xxx_mask() since it calls
pmd_flags(). Use (native_pud_val(pud) & _PAGE_PSE), instead.
Thanks,
-Toshi