Andy Lutomirski reported that in case if a page with _PAGE_SOFT_DIRTY
bit set get swapped out, the bit is getting lost and no longer
available when pte read back.
To resolve this we introduce _PTE_SWP_SOFT_DIRTY bit which is
saved in pte entry for the page being swapped out. When such page
is to be read back from a swap cache we check for bit presence
and if it's there we clear it and restore the former _PAGE_SOFT_DIRTY
bit back.
One of the problem was to find a place in pte entry where we can
save the _PTE_SWP_SOFT_DIRTY bit while page is in swap. The
_PAGE_PSE was chosen for that, it doesn't intersect with swap
entry format stored in pte.
Reported-by: Andy Lutomirski <[email protected]>
Signed-off-by: Pavel Emelyanov <[email protected]>
Signed-off-by: Cyrill Gorcunov <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Matt Mackall <[email protected]>
Cc: Xiao Guangrong <[email protected]>
Cc: Marcelo Tosatti <[email protected]>
Cc: KOSAKI Motohiro <[email protected]>
Cc: Stephen Rothwell <[email protected]>
---
While I've intensively tested this patch on x86-64/32 I would
really appreciate detailed review, to be sure I've not missed
places where borrowing _PAGE_PSE bit for own needs doesn't
cause any problems. Thanks!
arch/x86/include/asm/pgtable.h | 15 +++++++++++++++
arch/x86/include/asm/pgtable_types.h | 13 +++++++++++++
fs/proc/task_mmu.c | 23 +++++++++++++++++------
include/linux/swapops.h | 4 ++++
mm/memory.c | 4 ++++
mm/rmap.c | 6 +++++-
6 files changed, 58 insertions(+), 7 deletions(-)
Index: linux-2.6.git/arch/x86/include/asm/pgtable.h
===================================================================
--- linux-2.6.git.orig/arch/x86/include/asm/pgtable.h
+++ linux-2.6.git/arch/x86/include/asm/pgtable.h
@@ -314,6 +314,21 @@ static inline pmd_t pmd_mksoft_dirty(pmd
return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY);
}
+static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
+{
+ return pte_set_flags(pte, _PAGE_SWP_SOFT_DIRTY);
+}
+
+static inline int pte_swp_soft_dirty(pte_t pte)
+{
+ return pte_flags(pte) & _PAGE_SWP_SOFT_DIRTY;
+}
+
+static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
+{
+ return pte_clear_flags(pte, _PAGE_SWP_SOFT_DIRTY);
+}
+
/*
* Mask out unsupported bits in a present pgprot. Non-present pgprots
* can use those bits for other purposes, so leave them be.
Index: linux-2.6.git/arch/x86/include/asm/pgtable_types.h
===================================================================
--- linux-2.6.git.orig/arch/x86/include/asm/pgtable_types.h
+++ linux-2.6.git/arch/x86/include/asm/pgtable_types.h
@@ -67,6 +67,19 @@
#define _PAGE_SOFT_DIRTY (_AT(pteval_t, 0))
#endif
+/*
+ * Tracking soft dirty bit when a page goes to a swap is tricky.
+ * We need a bit which can be stored in pte _and_ not conflict
+ * with swap entry format. On x86 bits 6 and 7 are *not* involved
+ * into swap entry computation, but bit 6 is used for nonlinear
+ * file mapping, so we borrow bit 7 for soft dirty tracking.
+ */
+#ifdef CONFIG_MEM_SOFT_DIRTY
+#define _PAGE_SWP_SOFT_DIRTY _PAGE_PSE
+#else
+#define _PAGE_SWP_SOFT_DIRTY (_AT(pteval_t, 0))
+#endif
+
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
#define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX)
#else
Index: linux-2.6.git/fs/proc/task_mmu.c
===================================================================
--- linux-2.6.git.orig/fs/proc/task_mmu.c
+++ linux-2.6.git/fs/proc/task_mmu.c
@@ -730,8 +730,14 @@ static inline void clear_soft_dirty(stru
* of how soft-dirty works.
*/
pte_t ptent = *pte;
- ptent = pte_wrprotect(ptent);
- ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY);
+
+ if (pte_present(ptent)) {
+ ptent = pte_wrprotect(ptent);
+ ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY);
+ } else if (pte_swp_soft_dirty(ptent)) {
+ ptent = pte_swp_clear_soft_dirty(ptent);
+ }
+
set_pte_at(vma->vm_mm, addr, pte, ptent);
#endif
}
@@ -752,14 +758,15 @@ static int clear_refs_pte_range(pmd_t *p
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
for (; addr != end; pte++, addr += PAGE_SIZE) {
ptent = *pte;
- if (!pte_present(ptent))
- continue;
if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
clear_soft_dirty(vma, addr, pte);
continue;
}
+ if (!pte_present(ptent))
+ continue;
+
page = vm_normal_page(vma, addr, ptent);
if (!page)
continue;
@@ -930,8 +937,12 @@ static void pte_to_pagemap_entry(pagemap
flags = PM_PRESENT;
page = vm_normal_page(vma, addr, pte);
} else if (is_swap_pte(pte)) {
- swp_entry_t entry = pte_to_swp_entry(pte);
-
+ swp_entry_t entry;
+#ifdef CONFIG_MEM_SOFT_DIRTY
+ if (pte_swp_soft_dirty(pte))
+ flags2 |= __PM_SOFT_DIRTY;
+#endif
+ entry = pte_to_swp_entry(pte);
frame = swp_type(entry) |
(swp_offset(entry) << MAX_SWAPFILES_SHIFT);
flags = PM_SWAP;
Index: linux-2.6.git/include/linux/swapops.h
===================================================================
--- linux-2.6.git.orig/include/linux/swapops.h
+++ linux-2.6.git/include/linux/swapops.h
@@ -67,6 +67,10 @@ static inline swp_entry_t pte_to_swp_ent
swp_entry_t arch_entry;
BUG_ON(pte_file(pte));
+#ifdef CONFIG_MEM_SOFT_DIRTY
+ if (pte_swp_soft_dirty(pte))
+ pte = pte_swp_clear_soft_dirty(pte);
+#endif
arch_entry = __pte_to_swp_entry(pte);
return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry));
}
Index: linux-2.6.git/mm/memory.c
===================================================================
--- linux-2.6.git.orig/mm/memory.c
+++ linux-2.6.git/mm/memory.c
@@ -3115,6 +3115,10 @@ static int do_swap_page(struct mm_struct
exclusive = 1;
}
flush_icache_page(vma, page);
+#ifdef CONFIG_MEM_SOFT_DIRTY
+ if (pte_swp_soft_dirty(orig_pte))
+ pte = pte_mksoft_dirty(pte);
+#endif
set_pte_at(mm, address, page_table, pte);
if (page == swapcache)
do_page_add_anon_rmap(page, vma, address, exclusive);
Index: linux-2.6.git/mm/rmap.c
===================================================================
--- linux-2.6.git.orig/mm/rmap.c
+++ linux-2.6.git/mm/rmap.c
@@ -1236,6 +1236,7 @@ int try_to_unmap_one(struct page *page,
swp_entry_to_pte(make_hwpoison_entry(page)));
} else if (PageAnon(page)) {
swp_entry_t entry = { .val = page_private(page) };
+ pte_t swp_pte;
if (PageSwapCache(page)) {
/*
@@ -1264,7 +1265,10 @@ int try_to_unmap_one(struct page *page,
BUG_ON(TTU_ACTION(flags) != TTU_MIGRATION);
entry = make_migration_entry(page, pte_write(pteval));
}
- set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
+ swp_pte = swp_entry_to_pte(entry);
+ if (pte_soft_dirty(pteval))
+ swp_pte = pte_swp_mksoft_dirty(swp_pte);
+ set_pte_at(mm, address, pte, swp_pte);
BUG_ON(pte_file(*pte));
} else if (IS_ENABLED(CONFIG_MIGRATION) &&
(TTU_ACTION(flags) == TTU_MIGRATION)) {
On Wed, Jul 24, 2013 at 9:08 AM, Cyrill Gorcunov <[email protected]> wrote:
> Andy Lutomirski reported that in case if a page with _PAGE_SOFT_DIRTY
> bit set get swapped out, the bit is getting lost and no longer
> available when pte read back.
Potentially silly question (due to my completely lack of understanding
of how swapping works in Linux): what about file-backed pages?
(Arguably these would be best supported by filesystems instead of by
the core vm, in which case it might make sense to drop soft-dirty
support for these pages entirely.)
--Andy
On Wed, Jul 24, 2013 at 09:23:14AM -0700, Andy Lutomirski wrote:
> On Wed, Jul 24, 2013 at 9:08 AM, Cyrill Gorcunov <[email protected]> wrote:
> > Andy Lutomirski reported that in case if a page with _PAGE_SOFT_DIRTY
> > bit set get swapped out, the bit is getting lost and no longer
> > available when pte read back.
>
> Potentially silly question (due to my completely lack of understanding
> of how swapping works in Linux): what about file-backed pages?
> (Arguably these would be best supported by filesystems instead of by
> the core vm, in which case it might make sense to drop soft-dirty
> support for these pages entirely.)
Hi Andy, if I understand you correctly "file-backed pages" are carried
in pte with _PAGE_FILE bit set and the swap soft-dirty bit won't be
used on them but _PAGE_SOFT_DIRTY will be set on write if only I've
not missed something obvious (Pavel?).
On Wed, Jul 24, 2013 at 9:37 AM, Cyrill Gorcunov <[email protected]> wrote:
> On Wed, Jul 24, 2013 at 09:23:14AM -0700, Andy Lutomirski wrote:
>> On Wed, Jul 24, 2013 at 9:08 AM, Cyrill Gorcunov <[email protected]> wrote:
>> > Andy Lutomirski reported that in case if a page with _PAGE_SOFT_DIRTY
>> > bit set get swapped out, the bit is getting lost and no longer
>> > available when pte read back.
>>
>> Potentially silly question (due to my completely lack of understanding
>> of how swapping works in Linux): what about file-backed pages?
>> (Arguably these would be best supported by filesystems instead of by
>> the core vm, in which case it might make sense to drop soft-dirty
>> support for these pages entirely.)
>
> Hi Andy, if I understand you correctly "file-backed pages" are carried
> in pte with _PAGE_FILE bit set and the swap soft-dirty bit won't be
> used on them but _PAGE_SOFT_DIRTY will be set on write if only I've
> not missed something obvious (Pavel?).
If I understand this stuff correctly, the vmscan code calls
try_to_unmap when it reclaims memory, which makes its way into
try_to_unmap_one, which clears the pte (and loses the soft-dirty bit).
--Andy
On Wed, Jul 24, 2013 at 10:06:53AM -0700, Andy Lutomirski wrote:
> > Hi Andy, if I understand you correctly "file-backed pages" are carried
> > in pte with _PAGE_FILE bit set and the swap soft-dirty bit won't be
> > used on them but _PAGE_SOFT_DIRTY will be set on write if only I've
> > not missed something obvious (Pavel?).
>
> If I understand this stuff correctly, the vmscan code calls
> try_to_unmap when it reclaims memory, which makes its way into
> try_to_unmap_one, which clears the pte (and loses the soft-dirty bit).
Indeed, I was so stareing into swap that forgot about files. I'll do
a separate patch for that, thanks!
On Wed, 2013-07-24 at 21:17 +0400, Cyrill Gorcunov wrote:
> On Wed, Jul 24, 2013 at 10:06:53AM -0700, Andy Lutomirski wrote:
> > > Hi Andy, if I understand you correctly "file-backed pages" are carried
> > > in pte with _PAGE_FILE bit set and the swap soft-dirty bit won't be
> > > used on them but _PAGE_SOFT_DIRTY will be set on write if only I've
> > > not missed something obvious (Pavel?).
> >
> > If I understand this stuff correctly, the vmscan code calls
> > try_to_unmap when it reclaims memory, which makes its way into
> > try_to_unmap_one, which clears the pte (and loses the soft-dirty bit).
>
> Indeed, I was so stareing into swap that forgot about files. I'll do
> a separate patch for that, thanks!
Lets just be clear about the problem first: the vmscan pass referred to
above happens only on clean pages, so the soft dirty bit could only be
set if the page was previously dirty and got written back. Now it's an
exercise for the reader whether we want to reinstantiate a cleaned
evicted page for the purpose of doing an iterative migration or whether
we want to flip the page in the migrated entity to be evicted (so if it
gets referred to, it pulls in an up to date copy) ... assuming the
backing file also gets transferred, of course.
James
On Wed, Jul 24, 2013 at 10:36 AM, James Bottomley
<[email protected]> wrote:
> On Wed, 2013-07-24 at 21:17 +0400, Cyrill Gorcunov wrote:
>> On Wed, Jul 24, 2013 at 10:06:53AM -0700, Andy Lutomirski wrote:
>> > > Hi Andy, if I understand you correctly "file-backed pages" are carried
>> > > in pte with _PAGE_FILE bit set and the swap soft-dirty bit won't be
>> > > used on them but _PAGE_SOFT_DIRTY will be set on write if only I've
>> > > not missed something obvious (Pavel?).
>> >
>> > If I understand this stuff correctly, the vmscan code calls
>> > try_to_unmap when it reclaims memory, which makes its way into
>> > try_to_unmap_one, which clears the pte (and loses the soft-dirty bit).
>>
>> Indeed, I was so stareing into swap that forgot about files. I'll do
>> a separate patch for that, thanks!
>
> Lets just be clear about the problem first: the vmscan pass referred to
> above happens only on clean pages, so the soft dirty bit could only be
> set if the page was previously dirty and got written back. Now it's an
> exercise for the reader whether we want to reinstantiate a cleaned
> evicted page for the purpose of doing an iterative migration or whether
> we want to flip the page in the migrated entity to be evicted (so if it
> gets referred to, it pulls in an up to date copy) ... assuming the
> backing file also gets transferred, of course.
I think I understand your distinction. Nonetheless, given the loss of
the soft-dirty bit, the migration tool could fail to notice that the
pages was dirtied and subsequently cleaned and evicted. I'm
unconvinced that doing this on a per-PTE basis is the right way,
though.
I've long wanted a feature to efficiently see what changed on a
filesystem by comparing, say, a hash tree. NTFS can do this (sort
of), but I don't think that anything else can. I think that btrfs
should be able to, but there's no API that I've ever seen.
--Andy
On Wed, Jul 24, 2013 at 10:42:24AM -0700, Andy Lutomirski wrote:
> >
> > Lets just be clear about the problem first: the vmscan pass referred to
> > above happens only on clean pages, so the soft dirty bit could only be
> > set if the page was previously dirty and got written back. Now it's an
> > exercise for the reader whether we want to reinstantiate a cleaned
> > evicted page for the purpose of doing an iterative migration or whether
> > we want to flip the page in the migrated entity to be evicted (so if it
> > gets referred to, it pulls in an up to date copy) ... assuming the
> > backing file also gets transferred, of course.
Good question! I rather forward it to Pavel as an author for soft dirty
bit feature. Pavel?
> I think I understand your distinction. Nonetheless, given the loss of
> the soft-dirty bit, the migration tool could fail to notice that the
> pages was dirtied and subsequently cleaned and evicted. I'm
> unconvinced that doing this on a per-PTE basis is the right way,
> though.
I fear for tracking soft-dirty-bit for swapped entries we sinply have
no other place than pte (still i'm quite open for ideas, maybe there
are a better way which I've missed).
> I've long wanted a feature to efficiently see what changed on a
> filesystem by comparing, say, a hash tree. NTFS can do this (sort
> of), but I don't think that anything else can. I think that btrfs
> should be able to, but there's no API that I've ever seen.
On Wed, Jul 24, 2013 at 11:15 AM, Cyrill Gorcunov <[email protected]> wrote:
> On Wed, Jul 24, 2013 at 10:42:24AM -0700, Andy Lutomirski wrote:
>> >
>> > Lets just be clear about the problem first: the vmscan pass referred to
>> > above happens only on clean pages, so the soft dirty bit could only be
>> > set if the page was previously dirty and got written back. Now it's an
>> > exercise for the reader whether we want to reinstantiate a cleaned
>> > evicted page for the purpose of doing an iterative migration or whether
>> > we want to flip the page in the migrated entity to be evicted (so if it
>> > gets referred to, it pulls in an up to date copy) ... assuming the
>> > backing file also gets transferred, of course.
>
> Good question! I rather forward it to Pavel as an author for soft dirty
> bit feature. Pavel?
>
>> I think I understand your distinction. Nonetheless, given the loss of
>> the soft-dirty bit, the migration tool could fail to notice that the
>> pages was dirtied and subsequently cleaned and evicted. I'm
>> unconvinced that doing this on a per-PTE basis is the right way,
>> though.
>
> I fear for tracking soft-dirty-bit for swapped entries we sinply have
> no other place than pte (still i'm quite open for ideas, maybe there
> are a better way which I've missed).
I know approximately nothing about how swap and anon_vma work.
For files, sticking it in struct page seems potentially nicer,
although finding a free bit might be tough. (FWIW, I have plans to
free up a page flag on x86 some time moderately soon as part of a
completely unrelated project.) I think this stuff really belongs to
the address_space more than it belongs to the pte.
How do you handle the write syscall?
--Andy
On 07/24/2013 08:23 PM, Andy Lutomirski wrote:
> On Wed, Jul 24, 2013 at 9:08 AM, Cyrill Gorcunov <[email protected]> wrote:
>> Andy Lutomirski reported that in case if a page with _PAGE_SOFT_DIRTY
>> bit set get swapped out, the bit is getting lost and no longer
>> available when pte read back.
>
> Potentially silly question (due to my completely lack of understanding
> of how swapping works in Linux): what about file-backed pages?
Strictly speaking file-backed mappings should also be fixed to keep the
soft-dirty bit, yes.
But in checkpoint-restore _shared_ file mappings are not of interest, as
all the data (changed or not) sits in the file and we just don't need to
take it into dump. If the file mapping of _private_, then pages, that are
written to become anonymous and occur in the swap cache and are handled
by this patch.
> (Arguably these would be best supported by filesystems instead of by
> the core vm, in which case it might make sense to drop soft-dirty
> support for these pages entirely.)
>
> --Andy
> .
>
On 07/24/2013 08:08 PM, Cyrill Gorcunov wrote:
> Andy Lutomirski reported that in case if a page with _PAGE_SOFT_DIRTY
> bit set get swapped out, the bit is getting lost and no longer
> available when pte read back.
>
> To resolve this we introduce _PTE_SWP_SOFT_DIRTY bit which is
> saved in pte entry for the page being swapped out. When such page
> is to be read back from a swap cache we check for bit presence
> and if it's there we clear it and restore the former _PAGE_SOFT_DIRTY
> bit back.
>
> One of the problem was to find a place in pte entry where we can
> save the _PTE_SWP_SOFT_DIRTY bit while page is in swap. The
> _PAGE_PSE was chosen for that, it doesn't intersect with swap
> entry format stored in pte.
>
> Reported-by: Andy Lutomirski <[email protected]>
> Signed-off-by: Pavel Emelyanov <[email protected]>
> Signed-off-by: Cyrill Gorcunov <[email protected]>
> Cc: Andrew Morton <[email protected]>
> Cc: Matt Mackall <[email protected]>
> Cc: Xiao Guangrong <[email protected]>
> Cc: Marcelo Tosatti <[email protected]>
> Cc: KOSAKI Motohiro <[email protected]>
> Cc: Stephen Rothwell <[email protected]>
Acked-by: Pavel Emelyanov <[email protected]>
On Wed, Jul 24, 2013 at 11:21:46AM -0700, Andy Lutomirski wrote:
> >
> > I fear for tracking soft-dirty-bit for swapped entries we sinply have
> > no other place than pte (still i'm quite open for ideas, maybe there
> > are a better way which I've missed).
>
> I know approximately nothing about how swap and anon_vma work.
>
> For files, sticking it in struct page seems potentially nicer,
> although finding a free bit might be tough. (FWIW, I have plans to
> free up a page flag on x86 some time moderately soon as part of a
> completely unrelated project.) I think this stuff really belongs to
> the address_space more than it belongs to the pte.
Well, some part of information already lays in pte (such as 'file' bit,
swap entries) so it looks natural i think to work on this level. but
letme think if use page struct for that be more convenient...
>
> How do you handle the write syscall?
I fear I somehow miss your point here, could please alaborate a bit?
There is no additional code I know of being write() specific, just
a code for #PF exceptions.
On 07/24/2013 10:52 PM, Cyrill Gorcunov wrote:
> On Wed, Jul 24, 2013 at 11:21:46AM -0700, Andy Lutomirski wrote:
>>>
>>> I fear for tracking soft-dirty-bit for swapped entries we sinply have
>>> no other place than pte (still i'm quite open for ideas, maybe there
>>> are a better way which I've missed).
>>
>> I know approximately nothing about how swap and anon_vma work.
>>
>> For files, sticking it in struct page seems potentially nicer,
>> although finding a free bit might be tough. (FWIW, I have plans to
>> free up a page flag on x86 some time moderately soon as part of a
>> completely unrelated project.) I think this stuff really belongs to
>> the address_space more than it belongs to the pte.
>
> Well, some part of information already lays in pte (such as 'file' bit,
> swap entries) so it looks natural i think to work on this level. but
> letme think if use page struct for that be more convenient...
It hardly will be. Consider we have a page shared between two tasks,
then first one "touches" it and soft-dirty is put onto his PTE and,
subsequently, the page itself. The we go and clear sofr-dirty for the
2nd task. What should we do with the soft-dirty bit on the page?
The soft-dirty thing watches changes in the virtual memory, not in
the physical one.
>>
>> How do you handle the write syscall?
>
> I fear I somehow miss your point here, could please alaborate a bit?
> There is no additional code I know of being write() specific, just
> a code for #PF exceptions.
> .
>
On Wed, Jul 24, 2013 at 10:55:41PM +0400, Pavel Emelyanov wrote:
> >
> > Well, some part of information already lays in pte (such as 'file' bit,
> > swap entries) so it looks natural i think to work on this level. but
> > letme think if use page struct for that be more convenient...
>
> It hardly will be. Consider we have a page shared between two tasks,
> then first one "touches" it and soft-dirty is put onto his PTE and,
> subsequently, the page itself. The we go and clear sofr-dirty for the
> 2nd task. What should we do with the soft-dirty bit on the page?
Indeed, this won't help. Well then, bippidy-boppidy-boo, our
pants are metaphorically on fire (c)
>
> The soft-dirty thing watches changes in the virtual memory, not in
> the physical one.
On Wed, Jul 24, 2013 at 11:04:53PM +0400, Cyrill Gorcunov wrote:
> On Wed, Jul 24, 2013 at 10:55:41PM +0400, Pavel Emelyanov wrote:
> > >
> > > Well, some part of information already lays in pte (such as 'file' bit,
> > > swap entries) so it looks natural i think to work on this level. but
> > > letme think if use page struct for that be more convenient...
> >
> > It hardly will be. Consider we have a page shared between two tasks,
> > then first one "touches" it and soft-dirty is put onto his PTE and,
> > subsequently, the page itself. The we go and clear sofr-dirty for the
> > 2nd task. What should we do with the soft-dirty bit on the page?
>
> Indeed, this won't help. Well then, bippidy-boppidy-boo, our
> pants are metaphorically on fire (c)
(i meant page flags wont help)
On Wed, Jul 24, 2013 at 12:04 PM, Cyrill Gorcunov <[email protected]> wrote:
> On Wed, Jul 24, 2013 at 10:55:41PM +0400, Pavel Emelyanov wrote:
>> >
>> > Well, some part of information already lays in pte (such as 'file' bit,
>> > swap entries) so it looks natural i think to work on this level. but
>> > letme think if use page struct for that be more convenient...
>>
>> It hardly will be. Consider we have a page shared between two tasks,
>> then first one "touches" it and soft-dirty is put onto his PTE and,
>> subsequently, the page itself. The we go and clear sofr-dirty for the
>> 2nd task. What should we do with the soft-dirty bit on the page?
>
> Indeed, this won't help. Well then, bippidy-boppidy-boo, our
> pants are metaphorically on fire (c)
Hmm. So there are at least three kinds of memory:
Anonymous pages: soft-dirty works
Shared file-backed pages: soft-dirty does not work
Private file-backed pages: soft-dirty works (but see below)
Perhaps another bit should be allocated to expose to userspace either
"soft-dirty", "soft-clean", or "soft-dirty unsupported"?
There's another possible issue with private file-backed pages, though:
how do you distinguish clean-and-not-cowed from cowed-but-soft-clean?
(The former will reflect changes in the underlying file, I think, but
the latter won't.)
--Andy
On Wed, Jul 24, 2013 at 12:40:22PM -0700, Andy Lutomirski wrote:
>
> Hmm. So there are at least three kinds of memory:
>
> Anonymous pages: soft-dirty works
> Shared file-backed pages: soft-dirty does not work
> Private file-backed pages: soft-dirty works (but see below)
>
> Perhaps another bit should be allocated to expose to userspace either
> "soft-dirty", "soft-clean", or "soft-dirty unsupported"?
> There's another possible issue with private file-backed pages, though:
> how do you distinguish clean-and-not-cowed from cowed-but-soft-clean?
> (The former will reflect changes in the underlying file, I think, but
> the latter won't.)
When fault happens with cow allocation (on write) the pte get soft dirty
bit set (the code uses pte_mkdirty(entry) in __do_fault) and until we
explicitly clean the bit it remains set. Or you mean something else?
On 07/24/2013 11:40 PM, Andy Lutomirski wrote:
> On Wed, Jul 24, 2013 at 12:04 PM, Cyrill Gorcunov <[email protected]> wrote:
>> On Wed, Jul 24, 2013 at 10:55:41PM +0400, Pavel Emelyanov wrote:
>>>>
>>>> Well, some part of information already lays in pte (such as 'file' bit,
>>>> swap entries) so it looks natural i think to work on this level. but
>>>> letme think if use page struct for that be more convenient...
>>>
>>> It hardly will be. Consider we have a page shared between two tasks,
>>> then first one "touches" it and soft-dirty is put onto his PTE and,
>>> subsequently, the page itself. The we go and clear sofr-dirty for the
>>> 2nd task. What should we do with the soft-dirty bit on the page?
>>
>> Indeed, this won't help. Well then, bippidy-boppidy-boo, our
>> pants are metaphorically on fire (c)
>
> Hmm. So there are at least three kinds of memory:
>
> Anonymous pages: soft-dirty works
> Shared file-backed pages: soft-dirty does not work
> Private file-backed pages: soft-dirty works (but see below)
The shared file-backed pages case works, but unmap-map case doesn't
preserve the soft-dirty bit. Just like the private file did. We'll
fix this case next.
> Perhaps another bit should be allocated to expose to userspace either
> "soft-dirty", "soft-clean", or "soft-dirty unsupported"?
>
> There's another possible issue with private file-backed pages, though:
> how do you distinguish clean-and-not-cowed from cowed-but-soft-clean?
> (The former will reflect changes in the underlying file, I think, but
> the latter won't.)
There's a bit called PAGE_FILE bit in /proc/pagemap file introduced with
the 052fb0d635df5d49dfc85687d94e1a87bf09378d commit.
Plz, refer to Documentation/vm/pagemap.txt and soft-dirty.txt, all this
is described there pretty well.
> --Andy
Thanks,
Pavel
On 07/25/2013 03:29 PM, Pavel Emelyanov wrote:
> On 07/24/2013 11:40 PM, Andy Lutomirski wrote:
>> On Wed, Jul 24, 2013 at 12:04 PM, Cyrill Gorcunov <[email protected]> wrote:
>>> On Wed, Jul 24, 2013 at 10:55:41PM +0400, Pavel Emelyanov wrote:
>>>>> Well, some part of information already lays in pte (such as 'file' bit,
>>>>> swap entries) so it looks natural i think to work on this level. but
>>>>> letme think if use page struct for that be more convenient...
>>>> It hardly will be. Consider we have a page shared between two tasks,
>>>> then first one "touches" it and soft-dirty is put onto his PTE and,
>>>> subsequently, the page itself. The we go and clear sofr-dirty for the
>>>> 2nd task. What should we do with the soft-dirty bit on the page?
>>> Indeed, this won't help. Well then, bippidy-boppidy-boo, our
>>> pants are metaphorically on fire (c)
>> Hmm. So there are at least three kinds of memory:
>>
>> Anonymous pages: soft-dirty works
>> Shared file-backed pages: soft-dirty does not work
>> Private file-backed pages: soft-dirty works (but see below)
> The shared file-backed pages case works, but unmap-map case doesn't
What's the meaning of unmap-map case?
> preserve the soft-dirty bit. Just like the private file did. We'll
> fix this case next.
>
>> Perhaps another bit should be allocated to expose to userspace either
>> "soft-dirty", "soft-clean", or "soft-dirty unsupported"?
>>
>> There's another possible issue with private file-backed pages, though:
>> how do you distinguish clean-and-not-cowed from cowed-but-soft-clean?
>> (The former will reflect changes in the underlying file, I think, but
>> the latter won't.)
> There's a bit called PAGE_FILE bit in /proc/pagemap file introduced with
> the 052fb0d635df5d49dfc85687d94e1a87bf09378d commit.
>
> Plz, refer to Documentation/vm/pagemap.txt and soft-dirty.txt, all this
> is described there pretty well.
>
>> --Andy
> Thanks,
> Pavel
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to [email protected]. For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"[email protected]"> [email protected] </a>
On 07/25/2013 12:26 PM, Hush Bensen wrote:
> On 07/25/2013 03:29 PM, Pavel Emelyanov wrote:
>> On 07/24/2013 11:40 PM, Andy Lutomirski wrote:
>>> On Wed, Jul 24, 2013 at 12:04 PM, Cyrill Gorcunov <[email protected]> wrote:
>>>> On Wed, Jul 24, 2013 at 10:55:41PM +0400, Pavel Emelyanov wrote:
>>>>>> Well, some part of information already lays in pte (such as 'file' bit,
>>>>>> swap entries) so it looks natural i think to work on this level. but
>>>>>> letme think if use page struct for that be more convenient...
>>>>> It hardly will be. Consider we have a page shared between two tasks,
>>>>> then first one "touches" it and soft-dirty is put onto his PTE and,
>>>>> subsequently, the page itself. The we go and clear sofr-dirty for the
>>>>> 2nd task. What should we do with the soft-dirty bit on the page?
>>>> Indeed, this won't help. Well then, bippidy-boppidy-boo, our
>>>> pants are metaphorically on fire (c)
>>> Hmm. So there are at least three kinds of memory:
>>>
>>> Anonymous pages: soft-dirty works
>>> Shared file-backed pages: soft-dirty does not work
>>> Private file-backed pages: soft-dirty works (but see below)
>> The shared file-backed pages case works, but unmap-map case doesn't
>
> What's the meaning of unmap-map case?
Unmap is what happens when Linux runs out of memory, starts memory
reclaim procedure and removes a page from task's address space, replacing
the respective pte with an information where (in swap or in file) the
page can be found.
Map is what occurs when the task touches the unmapped page again, the
respective page is read back from file/swap and the respective pte if
filled with its pfn.
This patch fixes the soft-dirty bit preservation during the unmap-map cycle
for pages, that go to swap.
Sorry for confusion.
>> preserve the soft-dirty bit. Just like the private file did. We'll
>> fix this case next.
Thanks,
Pavel
On Thu, Jul 25, 2013 at 12:29 AM, Pavel Emelyanov <[email protected]> wrote:
> On 07/24/2013 11:40 PM, Andy Lutomirski wrote:
>> On Wed, Jul 24, 2013 at 12:04 PM, Cyrill Gorcunov <[email protected]> wrote:
>>> On Wed, Jul 24, 2013 at 10:55:41PM +0400, Pavel Emelyanov wrote:
>>>>>
>
>> Perhaps another bit should be allocated to expose to userspace either
>> "soft-dirty", "soft-clean", or "soft-dirty unsupported"?
>>
>> There's another possible issue with private file-backed pages, though:
>> how do you distinguish clean-and-not-cowed from cowed-but-soft-clean?
>> (The former will reflect changes in the underlying file, I think, but
>> the latter won't.)
>
> There's a bit called PAGE_FILE bit in /proc/pagemap file introduced with
> the 052fb0d635df5d49dfc85687d94e1a87bf09378d commit.
>
> Plz, refer to Documentation/vm/pagemap.txt and soft-dirty.txt, all this
> is described there pretty well.
>
Fair enough. I'm still a little bit concerned that it will be hard
for userspace to distinguish between things for which soft-dirty works
(which will be more things once the patches are in) and things for
which soft-dirty doesn't work, assuming any are left. But maybe this
is silly.
--Andy