2013-07-30 20:47:06

by Cyrill Gorcunov

[permalink] [raw]
Subject: [patch 2/2] [PATCH] mm: Save soft-dirty bits on file pages

Andy reported that if file page get reclaimed we loose soft-dirty bit
if it was there, so save _PAGE_BIT_SOFT_DIRTY bit when page address
get encoded into pte entry. Thus when #pf happens on such non-present
pte we can restore it back.

Reported-by: Andy Lutomirski <[email protected]>
Signed-off-by: Cyrill Gorcunov <[email protected]>
Cc: Pavel Emelyanov <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Matt Mackall <[email protected]>
Cc: Xiao Guangrong <[email protected]>
Cc: Marcelo Tosatti <[email protected]>
Cc: KOSAKI Motohiro <[email protected]>
Cc: Stephen Rothwell <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: "Aneesh Kumar K.V" <[email protected]>
---
arch/x86/include/asm/pgtable-2level.h | 48 +++++++++++++++++++++++++++++++++-
arch/x86/include/asm/pgtable-3level.h | 3 ++
arch/x86/include/asm/pgtable.h | 15 ++++++++++
arch/x86/include/asm/pgtable_types.h | 4 ++
fs/proc/task_mmu.c | 2 +
include/asm-generic/pgtable.h | 15 ++++++++++
mm/fremap.c | 11 +++++--
mm/memory.c | 11 +++++--
mm/rmap.c | 8 ++++-
9 files changed, 107 insertions(+), 10 deletions(-)

Index: linux-2.6.git/arch/x86/include/asm/pgtable-2level.h
===================================================================
--- linux-2.6.git.orig/arch/x86/include/asm/pgtable-2level.h
+++ linux-2.6.git/arch/x86/include/asm/pgtable-2level.h
@@ -55,9 +55,53 @@ static inline pmd_t native_pmdp_get_and_
#define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp)
#endif

+#ifdef CONFIG_MEM_SOFT_DIRTY
+
+/*
+ * Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE, _PAGE_BIT_SOFT_DIRTY and
+ * _PAGE_BIT_PROTNONE are taken, split up the 28 bits of offset
+ * into this range.
+ */
+#define PTE_FILE_MAX_BITS 28
+#define PTE_FILE_SHIFT1 (_PAGE_BIT_PRESENT + 1)
+#define PTE_FILE_SHIFT2 (_PAGE_BIT_FILE + 1)
+#define PTE_FILE_SHIFT3 (_PAGE_BIT_PROTNONE + 1)
+#define PTE_FILE_SHIFT4 (_PAGE_BIT_SOFT_DIRTY + 1)
+#define PTE_FILE_BITS1 (PTE_FILE_SHIFT2 - PTE_FILE_SHIFT1 - 1)
+#define PTE_FILE_BITS2 (PTE_FILE_SHIFT3 - PTE_FILE_SHIFT2 - 1)
+#define PTE_FILE_BITS3 (PTE_FILE_SHIFT4 - PTE_FILE_SHIFT3 - 1)
+
+#define pte_to_pgoff(pte) \
+ ((((pte).pte_low >> (PTE_FILE_SHIFT1)) \
+ & ((1U << PTE_FILE_BITS1) - 1))) \
+ + ((((pte).pte_low >> (PTE_FILE_SHIFT2)) \
+ & ((1U << PTE_FILE_BITS2) - 1)) \
+ << (PTE_FILE_BITS1)) \
+ + ((((pte).pte_low >> (PTE_FILE_SHIFT3)) \
+ & ((1U << PTE_FILE_BITS3) - 1)) \
+ << (PTE_FILE_BITS1 + PTE_FILE_BITS2)) \
+ + ((((pte).pte_low >> (PTE_FILE_SHIFT4))) \
+ << (PTE_FILE_BITS1 + PTE_FILE_BITS2 + PTE_FILE_BITS3))
+
+#define pgoff_to_pte(off) \
+ ((pte_t) { .pte_low = \
+ ((((off)) & ((1U << PTE_FILE_BITS1) - 1)) << PTE_FILE_SHIFT1) \
+ + ((((off) >> PTE_FILE_BITS1) \
+ & ((1U << PTE_FILE_BITS2) - 1)) \
+ << PTE_FILE_SHIFT2) \
+ + ((((off) >> (PTE_FILE_BITS1 + PTE_FILE_BITS2)) \
+ & ((1U << PTE_FILE_BITS3) - 1)) \
+ << PTE_FILE_SHIFT3) \
+ + ((((off) >> \
+ (PTE_FILE_BITS1 + PTE_FILE_BITS2 + PTE_FILE_BITS3))) \
+ << PTE_FILE_SHIFT4) \
+ + _PAGE_FILE })
+
+#else /* CONFIG_MEM_SOFT_DIRTY */
+
/*
* Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE and _PAGE_BIT_PROTNONE are taken,
- * split up the 29 bits of offset into this range:
+ * split up the 29 bits of offset into this range.
*/
#define PTE_FILE_MAX_BITS 29
#define PTE_FILE_SHIFT1 (_PAGE_BIT_PRESENT + 1)
@@ -88,6 +132,8 @@ static inline pmd_t native_pmdp_get_and_
<< PTE_FILE_SHIFT3) \
+ _PAGE_FILE })

+#endif /* CONFIG_MEM_SOFT_DIRTY */
+
/* Encode and de-code a swap entry */
#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1)
Index: linux-2.6.git/arch/x86/include/asm/pgtable-3level.h
===================================================================
--- linux-2.6.git.orig/arch/x86/include/asm/pgtable-3level.h
+++ linux-2.6.git/arch/x86/include/asm/pgtable-3level.h
@@ -179,6 +179,9 @@ static inline pmd_t native_pmdp_get_and_
/*
* Bits 0, 6 and 7 are taken in the low part of the pte,
* put the 32 bits of offset into the high part.
+ *
+ * For soft-dirty tracking 11 bit is taken from
+ * the low part of pte as well.
*/
#define pte_to_pgoff(pte) ((pte).pte_high)
#define pgoff_to_pte(off) \
Index: linux-2.6.git/arch/x86/include/asm/pgtable.h
===================================================================
--- linux-2.6.git.orig/arch/x86/include/asm/pgtable.h
+++ linux-2.6.git/arch/x86/include/asm/pgtable.h
@@ -329,6 +329,21 @@ static inline pte_t pte_swp_clear_soft_d
return pte_clear_flags(pte, _PAGE_SWP_SOFT_DIRTY);
}

+static inline pte_t pte_file_clear_soft_dirty(pte_t pte)
+{
+ return pte_clear_flags(pte, _PAGE_SOFT_DIRTY);
+}
+
+static inline pte_t pte_file_mksoft_dirty(pte_t pte)
+{
+ return pte_set_flags(pte, _PAGE_SOFT_DIRTY);
+}
+
+static inline int pte_file_soft_dirty(pte_t pte)
+{
+ return pte_flags(pte) & _PAGE_SOFT_DIRTY;
+}
+
/*
* Mask out unsupported bits in a present pgprot. Non-present pgprots
* can use those bits for other purposes, so leave them be.
Index: linux-2.6.git/arch/x86/include/asm/pgtable_types.h
===================================================================
--- linux-2.6.git.orig/arch/x86/include/asm/pgtable_types.h
+++ linux-2.6.git/arch/x86/include/asm/pgtable_types.h
@@ -61,8 +61,10 @@
* they do not conflict with each other.
*/

+#define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_HIDDEN
+
#ifdef CONFIG_MEM_SOFT_DIRTY
-#define _PAGE_SOFT_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_HIDDEN)
+#define _PAGE_SOFT_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_SOFT_DIRTY)
#else
#define _PAGE_SOFT_DIRTY (_AT(pteval_t, 0))
#endif
Index: linux-2.6.git/fs/proc/task_mmu.c
===================================================================
--- linux-2.6.git.orig/fs/proc/task_mmu.c
+++ linux-2.6.git/fs/proc/task_mmu.c
@@ -736,6 +736,8 @@ static inline void clear_soft_dirty(stru
ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY);
} else if (is_swap_pte(ptent)) {
ptent = pte_swp_clear_soft_dirty(ptent);
+ } else if (pte_file(ptent)) {
+ ptent = pte_file_clear_soft_dirty(ptent);
}

set_pte_at(vma->vm_mm, addr, pte, ptent);
Index: linux-2.6.git/include/asm-generic/pgtable.h
===================================================================
--- linux-2.6.git.orig/include/asm-generic/pgtable.h
+++ linux-2.6.git/include/asm-generic/pgtable.h
@@ -432,6 +432,21 @@ static inline pte_t pte_swp_clear_soft_d
{
return pte;
}
+
+static inline pte_t pte_file_clear_soft_dirty(pte_t pte)
+{
+ return pte;
+}
+
+static inline pte_t pte_file_mksoft_dirty(pte_t pte)
+{
+ return pte;
+}
+
+static inline int pte_file_soft_dirty(pte_t pte)
+{
+ return 0;
+}
#endif

#ifndef __HAVE_PFNMAP_TRACKING
Index: linux-2.6.git/mm/fremap.c
===================================================================
--- linux-2.6.git.orig/mm/fremap.c
+++ linux-2.6.git/mm/fremap.c
@@ -57,17 +57,22 @@ static int install_file_pte(struct mm_st
unsigned long addr, unsigned long pgoff, pgprot_t prot)
{
int err = -ENOMEM;
- pte_t *pte;
+ pte_t *pte, ptfile;
spinlock_t *ptl;

pte = get_locked_pte(mm, addr, &ptl);
if (!pte)
goto out;

- if (!pte_none(*pte))
+ ptfile = pgoff_to_pte(pgoff);
+
+ if (!pte_none(*pte)) {
+ if (pte_present(*pte) && pte_soft_dirty(*pte))
+ pte_file_mksoft_dirty(ptfile);
zap_pte(mm, vma, addr, pte);
+ }

- set_pte_at(mm, addr, pte, pgoff_to_pte(pgoff));
+ set_pte_at(mm, addr, pte, ptfile);
/*
* We don't need to run update_mmu_cache() here because the "file pte"
* being installed by install_file_pte() is not a real pte - it's a
Index: linux-2.6.git/mm/memory.c
===================================================================
--- linux-2.6.git.orig/mm/memory.c
+++ linux-2.6.git/mm/memory.c
@@ -1141,9 +1141,12 @@ again:
continue;
if (unlikely(details) && details->nonlinear_vma
&& linear_page_index(details->nonlinear_vma,
- addr) != page->index)
- set_pte_at(mm, addr, pte,
- pgoff_to_pte(page->index));
+ addr) != page->index) {
+ pte_t ptfile = pgoff_to_pte(page->index);
+ if (pte_soft_dirty(ptent))
+ pte_file_mksoft_dirty(ptfile);
+ set_pte_at(mm, addr, pte, ptfile);
+ }
if (PageAnon(page))
rss[MM_ANONPAGES]--;
else {
@@ -3410,6 +3413,8 @@ static int __do_fault(struct mm_struct *
entry = mk_pte(page, vma->vm_page_prot);
if (flags & FAULT_FLAG_WRITE)
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+ else if (pte_file(orig_pte) && pte_file_soft_dirty(orig_pte))
+ pte_mksoft_dirty(entry);
if (anon) {
inc_mm_counter_fast(mm, MM_ANONPAGES);
page_add_new_anon_rmap(page, vma, address);
Index: linux-2.6.git/mm/rmap.c
===================================================================
--- linux-2.6.git.orig/mm/rmap.c
+++ linux-2.6.git/mm/rmap.c
@@ -1405,8 +1405,12 @@ static int try_to_unmap_cluster(unsigned
pteval = ptep_clear_flush(vma, address, pte);

/* If nonlinear, store the file page offset in the pte. */
- if (page->index != linear_page_index(vma, address))
- set_pte_at(mm, address, pte, pgoff_to_pte(page->index));
+ if (page->index != linear_page_index(vma, address)) {
+ pte_t ptfile = pgoff_to_pte(page->index);
+ if (pte_soft_dirty(pteval))
+ pte_file_mksoft_dirty(ptfile);
+ set_pte_at(mm, address, pte, ptfile);
+ }

/* Move the dirty bit to the physical page now the pte is gone. */
if (pte_dirty(pteval))


2013-07-31 08:17:15

by Pavel Emelyanov

[permalink] [raw]
Subject: Re: [patch 2/2] [PATCH] mm: Save soft-dirty bits on file pages

On 07/31/2013 12:41 AM, Cyrill Gorcunov wrote:

> Andy reported that if file page get reclaimed we loose soft-dirty bit
> if it was there, so save _PAGE_BIT_SOFT_DIRTY bit when page address
> get encoded into pte entry. Thus when #pf happens on such non-present
> pte we can restore it back.
>
> Reported-by: Andy Lutomirski <[email protected]>
> Signed-off-by: Cyrill Gorcunov <[email protected]>

Acked-by: Pavel Emelyanov <[email protected]>

2013-08-07 20:28:15

by Andrew Morton

[permalink] [raw]
Subject: Re: [patch 2/2] [PATCH] mm: Save soft-dirty bits on file pages

On Wed, 31 Jul 2013 00:41:56 +0400 Cyrill Gorcunov <[email protected]> wrote:

> +#define pte_to_pgoff(pte) \
> + ((((pte).pte_low >> (PTE_FILE_SHIFT1)) \
> + & ((1U << PTE_FILE_BITS1) - 1))) \
> + + ((((pte).pte_low >> (PTE_FILE_SHIFT2)) \
> + & ((1U << PTE_FILE_BITS2) - 1)) \
> + << (PTE_FILE_BITS1)) \
> + + ((((pte).pte_low >> (PTE_FILE_SHIFT3)) \
> + & ((1U << PTE_FILE_BITS3) - 1)) \
> + << (PTE_FILE_BITS1 + PTE_FILE_BITS2)) \
> + + ((((pte).pte_low >> (PTE_FILE_SHIFT4))) \
> + << (PTE_FILE_BITS1 + PTE_FILE_BITS2 + PTE_FILE_BITS3))
> +
> +#define pgoff_to_pte(off) \
> + ((pte_t) { .pte_low = \
> + ((((off)) & ((1U << PTE_FILE_BITS1) - 1)) << PTE_FILE_SHIFT1) \
> + + ((((off) >> PTE_FILE_BITS1) \
> + & ((1U << PTE_FILE_BITS2) - 1)) \
> + << PTE_FILE_SHIFT2) \
> + + ((((off) >> (PTE_FILE_BITS1 + PTE_FILE_BITS2)) \
> + & ((1U << PTE_FILE_BITS3) - 1)) \
> + << PTE_FILE_SHIFT3) \
> + + ((((off) >> \
> + (PTE_FILE_BITS1 + PTE_FILE_BITS2 + PTE_FILE_BITS3))) \
> + << PTE_FILE_SHIFT4) \
> + + _PAGE_FILE })

Good god.

I wonder if these can be turned into out-of-line functions in some form
which humans can understand.

or

#define pte_to_pgoff(pte)
frob(pte, PTE_FILE_SHIFT1, PTE_FILE_BITS1) +
frob(PTE_FILE_SHIFT2, PTE_FILE_BITS2) +
frob(PTE_FILE_SHIFT3, PTE_FILE_BITS3) +
frob(PTE_FILE_SHIFT4, PTE_FILE_BITS1 + PTE_FILE_BITS2 + PTE_FILE_BITS3)

2013-08-07 20:31:09

by Cyrill Gorcunov

[permalink] [raw]
Subject: Re: [patch 2/2] [PATCH] mm: Save soft-dirty bits on file pages

On Wed, Aug 07, 2013 at 01:28:12PM -0700, Andrew Morton wrote:
>
> Good god.
>
> I wonder if these can be turned into out-of-line functions in some form
> which humans can understand.
>
> or
>
> #define pte_to_pgoff(pte)
> frob(pte, PTE_FILE_SHIFT1, PTE_FILE_BITS1) +
> frob(PTE_FILE_SHIFT2, PTE_FILE_BITS2) +
> frob(PTE_FILE_SHIFT3, PTE_FILE_BITS3) +
> frob(PTE_FILE_SHIFT4, PTE_FILE_BITS1 + PTE_FILE_BITS2 + PTE_FILE_BITS3)

I copied this code from existing one, not mine invention ;)
I'll clean it up on top.

2013-08-08 17:49:14

by Cyrill Gorcunov

[permalink] [raw]
Subject: Re: [patch 2/2] [PATCH] mm: Save soft-dirty bits on file pages

On Wed, Aug 07, 2013 at 01:28:12PM -0700, Andrew Morton wrote:
>
> Good god.
>
> I wonder if these can be turned into out-of-line functions in some form
> which humans can understand.
>
> or
>
> #define pte_to_pgoff(pte)
> frob(pte, PTE_FILE_SHIFT1, PTE_FILE_BITS1) +
> frob(PTE_FILE_SHIFT2, PTE_FILE_BITS2) +
> frob(PTE_FILE_SHIFT3, PTE_FILE_BITS3) +
> frob(PTE_FILE_SHIFT4, PTE_FILE_BITS1 + PTE_FILE_BITS2 + PTE_FILE_BITS3)

Hi, here is what I ended up with. Please take a look (I decided to post
patch in the thread since it's related to the context of the mails).


Attachments:
(No filename) (576.00 B)
pte-sft-dirty-file-cleanup-2 (5.43 kB)
Download all attachments

2013-08-12 21:57:24

by Andrew Morton

[permalink] [raw]
Subject: Re: [patch 2/2] [PATCH] mm: Save soft-dirty bits on file pages

On Thu, 8 Aug 2013 18:51:20 +0400 Cyrill Gorcunov <[email protected]> wrote:

> On Wed, Aug 07, 2013 at 01:28:12PM -0700, Andrew Morton wrote:
> >
> > Good god.
> >
> > I wonder if these can be turned into out-of-line functions in some form
> > which humans can understand.
> >
> > or
> >
> > #define pte_to_pgoff(pte)
> > frob(pte, PTE_FILE_SHIFT1, PTE_FILE_BITS1) +
> > frob(PTE_FILE_SHIFT2, PTE_FILE_BITS2) +
> > frob(PTE_FILE_SHIFT3, PTE_FILE_BITS3) +
> > frob(PTE_FILE_SHIFT4, PTE_FILE_BITS1 + PTE_FILE_BITS2 + PTE_FILE_BITS3)
>
> Hi, here is what I ended up with. Please take a look (I decided to post
> patch in the thread since it's related to the context of the mails).

You could have #undefed _mfrob and __frob after using them, but whatever.

I saved this patch to wave at the x86 guys for 3.12. I plan to merge
mm-save-soft-dirty-bits-on-file-pages.patch for 3.11.

> Guys, is there a reason for "if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE"
> test present in this pgtable-2level.h file at all? I can't imagine
> where it can be false on x86.

I doubt if "Guys" read this. x86 maintainers cc'ed.





From: Cyrill Gorcunov <[email protected]>
Subject: arch/x86/include/asm/pgtable-2level.h: clean up pte_to_pgoff and pgoff_to_pte helpers

Andrew asked if there a way to make pte_to_pgoff and pgoff_to_pte macro
helpers somehow more readable.

With this patch it should be more understandable what is happening with
bits when they come to and from pte entry.

Signed-off-by: Cyrill Gorcunov <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
---

arch/x86/include/asm/pgtable-2level.h | 82 ++++++++++++------------
1 file changed, 41 insertions(+), 41 deletions(-)

diff -puN arch/x86/include/asm/pgtable-2level.h~arch-x86-include-asm-pgtable-2levelh-clean-up-pte_to_pgoff-and-pgoff_to_pte-helpers arch/x86/include/asm/pgtable-2level.h
--- a/arch/x86/include/asm/pgtable-2level.h~arch-x86-include-asm-pgtable-2levelh-clean-up-pte_to_pgoff-and-pgoff_to_pte-helpers
+++ a/arch/x86/include/asm/pgtable-2level.h
@@ -55,6 +55,9 @@ static inline pmd_t native_pmdp_get_and_
#define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp)
#endif

+#define _mfrob(v,r,m,l) ((((v) >> (r)) & (m)) << (l))
+#define __frob(v,r,l) (((v) >> (r)) << (l))
+
#ifdef CONFIG_MEM_SOFT_DIRTY

/*
@@ -71,31 +74,27 @@ static inline pmd_t native_pmdp_get_and_
#define PTE_FILE_BITS2 (PTE_FILE_SHIFT3 - PTE_FILE_SHIFT2 - 1)
#define PTE_FILE_BITS3 (PTE_FILE_SHIFT4 - PTE_FILE_SHIFT3 - 1)

-#define pte_to_pgoff(pte) \
- ((((pte).pte_low >> (PTE_FILE_SHIFT1)) \
- & ((1U << PTE_FILE_BITS1) - 1))) \
- + ((((pte).pte_low >> (PTE_FILE_SHIFT2)) \
- & ((1U << PTE_FILE_BITS2) - 1)) \
- << (PTE_FILE_BITS1)) \
- + ((((pte).pte_low >> (PTE_FILE_SHIFT3)) \
- & ((1U << PTE_FILE_BITS3) - 1)) \
- << (PTE_FILE_BITS1 + PTE_FILE_BITS2)) \
- + ((((pte).pte_low >> (PTE_FILE_SHIFT4))) \
- << (PTE_FILE_BITS1 + PTE_FILE_BITS2 + PTE_FILE_BITS3))
-
-#define pgoff_to_pte(off) \
- ((pte_t) { .pte_low = \
- ((((off)) & ((1U << PTE_FILE_BITS1) - 1)) << PTE_FILE_SHIFT1) \
- + ((((off) >> PTE_FILE_BITS1) \
- & ((1U << PTE_FILE_BITS2) - 1)) \
- << PTE_FILE_SHIFT2) \
- + ((((off) >> (PTE_FILE_BITS1 + PTE_FILE_BITS2)) \
- & ((1U << PTE_FILE_BITS3) - 1)) \
- << PTE_FILE_SHIFT3) \
- + ((((off) >> \
- (PTE_FILE_BITS1 + PTE_FILE_BITS2 + PTE_FILE_BITS3))) \
- << PTE_FILE_SHIFT4) \
- + _PAGE_FILE })
+#define PTE_FILE_MASK1 ((1U << PTE_FILE_BITS1) - 1)
+#define PTE_FILE_MASK2 ((1U << PTE_FILE_BITS2) - 1)
+#define PTE_FILE_MASK3 ((1U << PTE_FILE_BITS3) - 1)
+
+#define PTE_FILE_LSHIFT2 (PTE_FILE_BITS1)
+#define PTE_FILE_LSHIFT3 (PTE_FILE_BITS1 + PTE_FILE_BITS2)
+#define PTE_FILE_LSHIFT4 (PTE_FILE_BITS1 + PTE_FILE_BITS2 + PTE_FILE_BITS3)
+
+#define pte_to_pgoff(pte) \
+ (_mfrob((pte).pte_low, PTE_FILE_SHIFT1, PTE_FILE_MASK1, 0) + \
+ _mfrob((pte).pte_low, PTE_FILE_SHIFT2, PTE_FILE_MASK2, PTE_FILE_LSHIFT2) + \
+ _mfrob((pte).pte_low, PTE_FILE_SHIFT3, PTE_FILE_MASK3, PTE_FILE_LSHIFT3) + \
+ __frob((pte).pte_low, PTE_FILE_SHIFT4, PTE_FILE_LSHIFT4))
+
+#define pgoff_to_pte(off) \
+ ((pte_t) { .pte_low = \
+ _mfrob(off, 0, PTE_FILE_MASK1, PTE_FILE_SHIFT1) + \
+ _mfrob(off, PTE_FILE_LSHIFT2, PTE_FILE_MASK2, PTE_FILE_SHIFT2) + \
+ _mfrob(off, PTE_FILE_LSHIFT3, PTE_FILE_MASK3, PTE_FILE_SHIFT3) + \
+ __frob(off, PTE_FILE_LSHIFT4, PTE_FILE_SHIFT4) + \
+ _PAGE_FILE })

#else /* CONFIG_MEM_SOFT_DIRTY */

@@ -115,22 +114,23 @@ static inline pmd_t native_pmdp_get_and_
#define PTE_FILE_BITS1 (PTE_FILE_SHIFT2 - PTE_FILE_SHIFT1 - 1)
#define PTE_FILE_BITS2 (PTE_FILE_SHIFT3 - PTE_FILE_SHIFT2 - 1)

-#define pte_to_pgoff(pte) \
- ((((pte).pte_low >> PTE_FILE_SHIFT1) \
- & ((1U << PTE_FILE_BITS1) - 1)) \
- + ((((pte).pte_low >> PTE_FILE_SHIFT2) \
- & ((1U << PTE_FILE_BITS2) - 1)) << PTE_FILE_BITS1) \
- + (((pte).pte_low >> PTE_FILE_SHIFT3) \
- << (PTE_FILE_BITS1 + PTE_FILE_BITS2)))
-
-#define pgoff_to_pte(off) \
- ((pte_t) { .pte_low = \
- (((off) & ((1U << PTE_FILE_BITS1) - 1)) << PTE_FILE_SHIFT1) \
- + ((((off) >> PTE_FILE_BITS1) & ((1U << PTE_FILE_BITS2) - 1)) \
- << PTE_FILE_SHIFT2) \
- + (((off) >> (PTE_FILE_BITS1 + PTE_FILE_BITS2)) \
- << PTE_FILE_SHIFT3) \
- + _PAGE_FILE })
+#define PTE_FILE_MASK1 ((1U << PTE_FILE_BITS1) - 1)
+#define PTE_FILE_MASK2 ((1U << PTE_FILE_BITS2) - 1)
+
+#define PTE_FILE_LSHIFT2 (PTE_FILE_BITS1)
+#define PTE_FILE_LSHIFT3 (PTE_FILE_BITS1 + PTE_FILE_BITS2)
+
+#define pte_to_pgoff(pte) \
+ (_mfrob((pte).pte_low, PTE_FILE_SHIFT1, PTE_FILE_MASK1, 0) + \
+ _mfrob((pte).pte_low, PTE_FILE_SHIFT2, PTE_FILE_MASK2, PTE_FILE_LSHIFT2) + \
+ __frob((pte).pte_low, PTE_FILE_SHIFT3, PTE_FILE_LSHIFT3))
+
+#define pgoff_to_pte(off) \
+ ((pte_t) { .pte_low = \
+ _mfrob(off, 0, PTE_FILE_MASK1, PTE_FILE_SHIFT1) + \
+ _mfrob(off, PTE_FILE_LSHIFT2, PTE_FILE_MASK2, PTE_FILE_SHIFT2) + \
+ __frob(off, PTE_FILE_LSHIFT3, PTE_FILE_SHIFT3) + \
+ _PAGE_FILE })

#endif /* CONFIG_MEM_SOFT_DIRTY */

_

2013-08-12 22:28:38

by Andy Lutomirski

[permalink] [raw]
Subject: Re: [patch 2/2] [PATCH] mm: Save soft-dirty bits on file pages

On Mon, Aug 12, 2013 at 2:57 PM, Andrew Morton
<[email protected]> wrote:
> On Thu, 8 Aug 2013 18:51:20 +0400 Cyrill Gorcunov <[email protected]> wrote:
>
>> On Wed, Aug 07, 2013 at 01:28:12PM -0700, Andrew Morton wrote:
>> >
>> > Good god.
>> >
>> > I wonder if these can be turned into out-of-line functions in some form
>> > which humans can understand.
>> >
>> > or
>> >
>> > #define pte_to_pgoff(pte)
>> > frob(pte, PTE_FILE_SHIFT1, PTE_FILE_BITS1) +
>> > frob(PTE_FILE_SHIFT2, PTE_FILE_BITS2) +
>> > frob(PTE_FILE_SHIFT3, PTE_FILE_BITS3) +
>> > frob(PTE_FILE_SHIFT4, PTE_FILE_BITS1 + PTE_FILE_BITS2 + PTE_FILE_BITS3)
>>
>> Hi, here is what I ended up with. Please take a look (I decided to post
>> patch in the thread since it's related to the context of the mails).
>
> You could have #undefed _mfrob and __frob after using them, but whatever.
>
> I saved this patch to wave at the x86 guys for 3.12. I plan to merge
> mm-save-soft-dirty-bits-on-file-pages.patch for 3.11.
>
>> Guys, is there a reason for "if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE"
>> test present in this pgtable-2level.h file at all? I can't imagine
>> where it can be false on x86.
>
> I doubt if "Guys" read this. x86 maintainers cc'ed.
>
>
>
>
>
> From: Cyrill Gorcunov <[email protected]>
> Subject: arch/x86/include/asm/pgtable-2level.h: clean up pte_to_pgoff and pgoff_to_pte helpers
>
> Andrew asked if there a way to make pte_to_pgoff and pgoff_to_pte macro
> helpers somehow more readable.
>
> With this patch it should be more understandable what is happening with
> bits when they come to and from pte entry.
>
> Signed-off-by: Cyrill Gorcunov <[email protected]>
> Cc: Ingo Molnar <[email protected]>
> Cc: "H. Peter Anvin" <[email protected]>
> Cc: Thomas Gleixner <[email protected]>
> Signed-off-by: Andrew Morton <[email protected]>
> ---
>
> arch/x86/include/asm/pgtable-2level.h | 82 ++++++++++++------------
> 1 file changed, 41 insertions(+), 41 deletions(-)
>
> diff -puN arch/x86/include/asm/pgtable-2level.h~arch-x86-include-asm-pgtable-2levelh-clean-up-pte_to_pgoff-and-pgoff_to_pte-helpers arch/x86/include/asm/pgtable-2level.h
> --- a/arch/x86/include/asm/pgtable-2level.h~arch-x86-include-asm-pgtable-2levelh-clean-up-pte_to_pgoff-and-pgoff_to_pte-helpers
> +++ a/arch/x86/include/asm/pgtable-2level.h
> @@ -55,6 +55,9 @@ static inline pmd_t native_pmdp_get_and_
> #define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp)
> #endif
>
> +#define _mfrob(v,r,m,l) ((((v) >> (r)) & (m)) << (l))
> +#define __frob(v,r,l) (((v) >> (r)) << (l))
> +
> #ifdef CONFIG_MEM_SOFT_DIRTY
>

If I'm understanding this right, the idea is to take the bits in the
range a..b of v and stick them at c..d, where a-b == c-d. Would it
make sense to change this to look something like

#define __frob(v, inmsb, inlsb, outlsb) ((v >> inlsb) & ((1<<(inmsb -
inlsb + 1)-1) << outlsb)

For extra fun, there could be an __unfrob macro that takes the same
inmsg, inlsb, outlsb parameters but undoes it so that it's (more)
clear that the operations that are supposed to be inverses are indeed
inverses.

--Andy

2013-08-12 22:37:28

by Andrew Morton

[permalink] [raw]
Subject: Re: [patch 2/2] [PATCH] mm: Save soft-dirty bits on file pages

On Mon, 12 Aug 2013 15:28:06 -0700 Andy Lutomirski <[email protected]> wrote:

> > +#define _mfrob(v,r,m,l) ((((v) >> (r)) & (m)) << (l))
> > +#define __frob(v,r,l) (((v) >> (r)) << (l))
> > +
> > #ifdef CONFIG_MEM_SOFT_DIRTY
> >
>
> If I'm understanding this right, the idea is to take the bits in the
> range a..b of v and stick them at c..d, where a-b == c-d. Would it
> make sense to change this to look something like
>
> #define __frob(v, inmsb, inlsb, outlsb) ((v >> inlsb) & ((1<<(inmsb -
> inlsb + 1)-1) << outlsb)
>
> For extra fun, there could be an __unfrob macro that takes the same
> inmsg, inlsb, outlsb parameters but undoes it so that it's (more)
> clear that the operations that are supposed to be inverses are indeed
> inverses.

hm, I seem to remember writing
drivers/net/ethernet/3com/3c59x.c:BFINS() and BFEXT() shortly after the
invention of the electronic computer.

I'm kinda surprised that we don't already have something like this in
kernel.h or somewhere - there's surely a ton of code which does such
things.

2013-08-13 05:02:18

by Cyrill Gorcunov

[permalink] [raw]
Subject: Re: [patch 2/2] [PATCH] mm: Save soft-dirty bits on file pages

On Mon, Aug 12, 2013 at 03:28:06PM -0700, Andy Lutomirski wrote:
> >
> > You could have #undefed _mfrob and __frob after using them, but whatever.

Sure, for some reason I forgot to do that. Will send update on top.

> > I saved this patch to wave at the x86 guys for 3.12. I plan to merge
> > mm-save-soft-dirty-bits-on-file-pages.patch for 3.11.
> >
> >> Guys, is there a reason for "if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE"
> >> test present in this pgtable-2level.h file at all? I can't imagine
> >> where it can be false on x86.
> >
> > I doubt if "Guys" read this. x86 maintainers cc'ed.

Thanks!

> > +#define _mfrob(v,r,m,l) ((((v) >> (r)) & (m)) << (l))
> > +#define __frob(v,r,l) (((v) >> (r)) << (l))
> > +
> > #ifdef CONFIG_MEM_SOFT_DIRTY
>
> If I'm understanding this right, the idea is to take the bits in the
> range a..b of v and stick them at c..d, where a-b == c-d. Would it
> make sense to change this to look something like
>
> #define __frob(v, inmsb, inlsb, outlsb) ((v >> inlsb) & ((1<<(inmsb -
> inlsb + 1)-1) << outlsb)

There is a case when you don't need a mask completely. And because this
pte conversion is on hot path and time critical I kept generated code
as it was (even if that lead to slightly less clear source code).

> For extra fun, there could be an __unfrob macro that takes the same
> inmsg, inlsb, outlsb parameters but undoes it so that it's (more)
> clear that the operations that are supposed to be inverses are indeed
> inverses.

2013-08-13 15:15:59

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [patch 2/2] [PATCH] mm: Save soft-dirty bits on file pages

On 08/12/2013 10:02 PM, Cyrill Gorcunov wrote:
>
> There is a case when you don't need a mask completely. And because this
> pte conversion is on hot path and time critical I kept generated code
> as it was (even if that lead to slightly less clear source code).
>

Does it actually matter, generated-code-wise, or is the compiler smart
enough to figure it out? The reason I'm asking is because it makes the
code much harder to follow.

The other thing is can we please pretty please call it something other
than "frob"?

-hpa



2013-08-13 15:37:08

by Cyrill Gorcunov

[permalink] [raw]
Subject: Re: [patch 2/2] [PATCH] mm: Save soft-dirty bits on file pages

On Tue, Aug 13, 2013 at 08:14:39AM -0700, H. Peter Anvin wrote:
> On 08/12/2013 10:02 PM, Cyrill Gorcunov wrote:
> >
> > There is a case when you don't need a mask completely. And because this
> > pte conversion is on hot path and time critical I kept generated code
> > as it was (even if that lead to slightly less clear source code).
> >
>
> Does it actually matter, generated-code-wise, or is the compiler smart
> enough to figure it out? The reason I'm asking is because it makes the

gcc-4.7.2 is smart enough to suppress useless masking (ie ((1u << 31) - 1))
completely but I don't know if this can be assumed for all gcc series.

> code much harder to follow.

I see. OK, I'll try to prepare more readable macro helpers.

>
> The other thing is can we please pretty please call it something other
> than "frob"?

Sure.

2013-08-13 16:44:24

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [patch 2/2] [PATCH] mm: Save soft-dirty bits on file pages

On 08/13/2013 08:37 AM, Cyrill Gorcunov wrote:
>>
>> Does it actually matter, generated-code-wise, or is the compiler smart
>> enough to figure it out? The reason I'm asking is because it makes the
>
> gcc-4.7.2 is smart enough to suppress useless masking (ie ((1u << 31) - 1))
> completely but I don't know if this can be assumed for all gcc series.
>

I would be highly surprised if it wasn't the case for any gcc we care about.

-hpa

2013-08-13 21:29:03

by Cyrill Gorcunov

[permalink] [raw]
Subject: Re: [patch 2/2] [PATCH] mm: Save soft-dirty bits on file pages

On Tue, Aug 13, 2013 at 09:43:23AM -0700, H. Peter Anvin wrote:
> On 08/13/2013 08:37 AM, Cyrill Gorcunov wrote:
> >>
> >> Does it actually matter, generated-code-wise, or is the compiler smart
> >> enough to figure it out? The reason I'm asking is because it makes the
> >
> > gcc-4.7.2 is smart enough to suppress useless masking (ie ((1u << 31) - 1))
> > completely but I don't know if this can be assumed for all gcc series.
> >
>
> I would be highly surprised if it wasn't the case for any gcc we care about.

Does below one looks better? (Btw, what about the snippet we have there as well

#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
#define PTE_FILE_SHIFT2 (_PAGE_BIT_FILE + 1)
#define PTE_FILE_SHIFT3 (_PAGE_BIT_PROTNONE + 1)
#else
#define PTE_FILE_SHIFT2 (_PAGE_BIT_PROTNONE + 1)
#define PTE_FILE_SHIFT3 (_PAGE_BIT_FILE + 1)
#endif

where

#define _PAGE_BIT_PROTNONE _PAGE_BIT_GLOBAL -> 8
#define _PAGE_BIT_FILE _PAGE_BIT_DIRTY -> 6

so I wonder where the cases on x86 when _PAGE_BIT_FILE > _PAGE_BIT_PROTNONE,
what i'm missing here?)

---
arch/x86/include/asm/pgtable-2level.h | 37 +++++++++++++++++++---------------
1 file changed, 21 insertions(+), 16 deletions(-)

Index: linux-2.6.git/arch/x86/include/asm/pgtable-2level.h
===================================================================
--- linux-2.6.git.orig/arch/x86/include/asm/pgtable-2level.h
+++ linux-2.6.git/arch/x86/include/asm/pgtable-2level.h
@@ -55,8 +55,11 @@ static inline pmd_t native_pmdp_get_and_
#define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp)
#endif

-#define _mfrob(v,r,m,l) ((((v) >> (r)) & (m)) << (l))
-#define __frob(v,r,l) (((v) >> (r)) << (l))
+/*
+ * For readable bitfield manipulations.
+ */
+#define PTE_FILE_NOMASK (-1U)
+#define __bfop(v,r,m,l) ((((v) >> (r)) & (m)) << (l))

#ifdef CONFIG_MEM_SOFT_DIRTY

@@ -83,17 +86,17 @@ static inline pmd_t native_pmdp_get_and_
#define PTE_FILE_LSHIFT4 (PTE_FILE_BITS1 + PTE_FILE_BITS2 + PTE_FILE_BITS3)

#define pte_to_pgoff(pte) \
- (_mfrob((pte).pte_low, PTE_FILE_SHIFT1, PTE_FILE_MASK1, 0) + \
- _mfrob((pte).pte_low, PTE_FILE_SHIFT2, PTE_FILE_MASK2, PTE_FILE_LSHIFT2) + \
- _mfrob((pte).pte_low, PTE_FILE_SHIFT3, PTE_FILE_MASK3, PTE_FILE_LSHIFT3) + \
- __frob((pte).pte_low, PTE_FILE_SHIFT4, PTE_FILE_LSHIFT4))
+ (__bfop((pte).pte_low, PTE_FILE_SHIFT1, PTE_FILE_MASK1, 0) + \
+ __bfop((pte).pte_low, PTE_FILE_SHIFT2, PTE_FILE_MASK2, PTE_FILE_LSHIFT2) + \
+ __bfop((pte).pte_low, PTE_FILE_SHIFT3, PTE_FILE_MASK3, PTE_FILE_LSHIFT3) + \
+ __bfop((pte).pte_low, PTE_FILE_SHIFT4, PTE_FILE_NOMASK, PTE_FILE_LSHIFT4))

#define pgoff_to_pte(off) \
((pte_t) { .pte_low = \
- _mfrob(off, 0, PTE_FILE_MASK1, PTE_FILE_SHIFT1) + \
- _mfrob(off, PTE_FILE_LSHIFT2, PTE_FILE_MASK2, PTE_FILE_SHIFT2) + \
- _mfrob(off, PTE_FILE_LSHIFT3, PTE_FILE_MASK3, PTE_FILE_SHIFT3) + \
- __frob(off, PTE_FILE_LSHIFT4, PTE_FILE_SHIFT4) + \
+ __bfop(off, 0, PTE_FILE_MASK1, PTE_FILE_SHIFT1) + \
+ __bfop(off, PTE_FILE_LSHIFT2, PTE_FILE_MASK2, PTE_FILE_SHIFT2) + \
+ __bfop(off, PTE_FILE_LSHIFT3, PTE_FILE_MASK3, PTE_FILE_SHIFT3) + \
+ __bfop(off, PTE_FILE_LSHIFT4, PTE_FILE_NOMASK, PTE_FILE_SHIFT4) + \
_PAGE_FILE })

#else /* CONFIG_MEM_SOFT_DIRTY */
@@ -121,19 +124,21 @@ static inline pmd_t native_pmdp_get_and_
#define PTE_FILE_LSHIFT3 (PTE_FILE_BITS1 + PTE_FILE_BITS2)

#define pte_to_pgoff(pte) \
- (_mfrob((pte).pte_low, PTE_FILE_SHIFT1, PTE_FILE_MASK1, 0) + \
- _mfrob((pte).pte_low, PTE_FILE_SHIFT2, PTE_FILE_MASK2, PTE_FILE_LSHIFT2) + \
- __frob((pte).pte_low, PTE_FILE_SHIFT3, PTE_FILE_LSHIFT3))
+ (__bfop((pte).pte_low, PTE_FILE_SHIFT1, PTE_FILE_MASK1, 0) + \
+ __bfop((pte).pte_low, PTE_FILE_SHIFT2, PTE_FILE_MASK2, PTE_FILE_LSHIFT2) + \
+ __bfop((pte).pte_low, PTE_FILE_SHIFT3, PTE_FILE_NOMASK, PTE_FILE_LSHIFT3))

#define pgoff_to_pte(off) \
((pte_t) { .pte_low = \
- _mfrob(off, 0, PTE_FILE_MASK1, PTE_FILE_SHIFT1) + \
- _mfrob(off, PTE_FILE_LSHIFT2, PTE_FILE_MASK2, PTE_FILE_SHIFT2) + \
- __frob(off, PTE_FILE_LSHIFT3, PTE_FILE_SHIFT3) + \
+ __bfop(off, 0, PTE_FILE_MASK1, PTE_FILE_SHIFT1) + \
+ __bfop(off, PTE_FILE_LSHIFT2, PTE_FILE_MASK2, PTE_FILE_SHIFT2) + \
+ __bfop(off, PTE_FILE_LSHIFT3, PTE_FILE_NOMASK, PTE_FILE_SHIFT3) + \
_PAGE_FILE })

#endif /* CONFIG_MEM_SOFT_DIRTY */

+#undef __bfop
+
/* Encode and de-code a swap entry */
#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1)