Signed-off-by: Jared E. Hulbert <[email protected]>
Signed-off-by: Moussa Ba <[email protected]>
--- a/fs/proc/task_mmu.c 2009-07-21 14:30:01.000000000 -0700
+++ b/fs/proc/task_mmu.c 2009-07-21 14:32:56.000000000 -0700
@@ -461,6 +461,33 @@
cond_resched();
return 0;
}
+static void walk_vma_area(struct mm_walk *this_walk, struct
vm_area_struct *vma,
+ int type)
+{
+ switch (type) {
+ /* Clear Anon VMAs only */
+ case 1:
+ if (!is_vm_hugetlb_page(vma) && !vma->vm_file)
+ walk_page_range(vma->vm_start,
+ vma->vm_end,
+ this_walk);
+ break;
+ /* Clear Mapped VMAs only */
+ case 2:
+ if (!is_vm_hugetlb_page(vma) && vma->vm_file)
+ walk_page_range(vma->vm_start,
+ vma->vm_end,
+ this_walk);
+ break;
+ /* Clear All VMAs */
+ default:
+ if (!is_vm_hugetlb_page(vma))
+ walk_page_range(vma->vm_start,
+ vma->vm_end,
+ this_walk);
+ break;
+ }
+}
static ssize_t clear_refs_write(struct file *file, const char __user *
buf,
size_t count, loff_t * ppos)
@@ -469,13 +496,15 @@
char buffer[PROC_NUMBUF], *end;
struct mm_struct *mm;
struct vm_area_struct *vma;
+ int type;
memset(buffer, 0, sizeof(buffer));
if (count > sizeof(buffer) - 1)
count = sizeof(buffer) - 1;
if (copy_from_user(buffer, buf, count))
return -EFAULT;
- if (!simple_strtol(buffer, &end, 0))
+ type = simple_strtol(buffer, &end, 0);
+ if (!type)
return -EINVAL;
if (*end == '\n')
end++;
@@ -491,9 +520,7 @@
down_read(&mm->mmap_sem);
for (vma = mm->mmap; vma; vma = vma->vm_next) {
clear_refs_walk.private = vma;
- if (!is_vm_hugetlb_page(vma))
- walk_page_range(vma->vm_start, vma->vm_end,
- &clear_refs_walk);
+ walk_vma_area(&clear_refs_walk, vma, type);
}
flush_tlb_mm(mm);
up_read(&mm->mmap_sem);
On Thu, Jul 23, 2009 at 08:57:22PM -0700, Moussa A. Ba wrote:
>
> Signed-off-by: Jared E. Hulbert <[email protected]>
> Signed-off-by: Moussa Ba <[email protected]>
> --- a/fs/proc/task_mmu.c 2009-07-21 14:30:01.000000000 -0700
> +++ b/fs/proc/task_mmu.c 2009-07-21 14:32:56.000000000 -0700
> @@ -461,6 +461,33 @@
> cond_resched();
> return 0;
> }
> +static void walk_vma_area(struct mm_walk *this_walk, struct
> vm_area_struct *vma,
> + int type)
> +{
> + switch (type) {
> + /* Clear Anon VMAs only */
> + case 1:
> + if (!is_vm_hugetlb_page(vma) && !vma->vm_file)
> + walk_page_range(vma->vm_start,
> + vma->vm_end,
> + this_walk);
> + break;
> + /* Clear Mapped VMAs only */
> + case 2:
> + if (!is_vm_hugetlb_page(vma) && vma->vm_file)
> + walk_page_range(vma->vm_start,
> + vma->vm_end,
> + this_walk);
> + break;
> + /* Clear All VMAs */
> + default:
> + if (!is_vm_hugetlb_page(vma))
> + walk_page_range(vma->vm_start,
> + vma->vm_end,
> + this_walk);
> + break;
> + }
> +}
>
First, can you make these plain numbers as macros or enum types?
Second, I believe the above code can be simiplified, how about below?
{
if (!is_vm_hugetlb_page(vma)) {
if (type == 1 && vma->vm_file)
return;
if (type == 2 && !vma->vm_file)
return;
walk_page_range(vma->vm_start, vma->end, this_walk);
}
}
> static ssize_t clear_refs_write(struct file *file, const char __user *
> buf,
> size_t count, loff_t * ppos)
> @@ -469,13 +496,15 @@
> char buffer[PROC_NUMBUF], *end;
> struct mm_struct *mm;
> struct vm_area_struct *vma;
> + int type;
>
> memset(buffer, 0, sizeof(buffer));
> if (count > sizeof(buffer) - 1)
> count = sizeof(buffer) - 1;
> if (copy_from_user(buffer, buf, count))
> return -EFAULT;
> - if (!simple_strtol(buffer, &end, 0))
> + type = simple_strtol(buffer, &end, 0);
> + if (!type)
> return -EINVAL;
> if (*end == '\n')
> end++;
> @@ -491,9 +520,7 @@
> down_read(&mm->mmap_sem);
> for (vma = mm->mmap; vma; vma = vma->vm_next) {
> clear_refs_walk.private = vma;
> - if (!is_vm_hugetlb_page(vma))
> - walk_page_range(vma->vm_start, vma->vm_end,
> - &clear_refs_walk);
> + walk_vma_area(&clear_refs_walk, vma, type);
> }
> flush_tlb_mm(mm);
> up_read(&mm->mmap_sem);
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
On Fri, Jul 24, 2009 at 1:39 AM, Amerigo Wang <[email protected]> wrote:
>
> On Thu, Jul 23, 2009 at 08:57:22PM -0700, Moussa A. Ba wrote:
> >
> > Signed-off-by: Jared E. Hulbert <[email protected]>
> > Signed-off-by: Moussa Ba <[email protected]>
> > --- a/fs/proc/task_mmu.c ? ? ?2009-07-21 14:30:01.000000000 -0700
> > +++ b/fs/proc/task_mmu.c ? ? ?2009-07-21 14:32:56.000000000 -0700
> > @@ -461,6 +461,33 @@
> > ? ? ? cond_resched();
> > ? ? ? return 0;
> > ?}
> > +static void walk_vma_area(struct mm_walk *this_walk, struct
> > vm_area_struct *vma,
> > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? int type)
> > +{
> > + ? ? switch (type) {
> > + ? ? ? ? ? ? /* Clear Anon VMAs only */
> > + ? ? case 1:
> > + ? ? ? ? ? ? if (!is_vm_hugetlb_page(vma) && !vma->vm_file)
> > + ? ? ? ? ? ? ? ? ? ? walk_page_range(vma->vm_start,
> > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? vma->vm_end,
> > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? this_walk);
> > + ? ? ? ? ? ? break;
> > + ? ? ? ? ? ? /* Clear Mapped VMAs only */
> > + ? ? case 2:
> > + ? ? ? ? ? ? if (!is_vm_hugetlb_page(vma) && vma->vm_file)
> > + ? ? ? ? ? ? ? ? ? ? walk_page_range(vma->vm_start,
> > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? vma->vm_end,
> > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? this_walk);
> > + ? ? ? ? ? ? break;
> > + ? ? ? ? ? ? /* Clear All VMAs */
> > + ? ? default:
> > + ? ? ? ? ? ? if (!is_vm_hugetlb_page(vma))
> > + ? ? ? ? ? ? ? ? ? ? walk_page_range(vma->vm_start,
> > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? vma->vm_end,
> > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? this_walk);
> > + ? ? ? ? ? ? break;
> > + ? ? }
> > +}
> >
>
>
> First, can you make these plain numbers as macros or enum types?
>
> Second, I believe the above code can be simiplified, how about below?
>
> {
> ? ?if (!is_vm_hugetlb_page(vma)) {
> ? ? ? ?if (type == 1 && vma->vm_file)
> ? ? ? ? ? ? return;
> ? ? ? ?if (type == 2 && !vma->vm_file)
> ? ? ? ? ? ? return;
> ? ? ? ?walk_page_range(vma->vm_start, vma->end, this_walk);
> ? ?}
> }
>
It would indeed work, it can probably be written as:
{
if (is_vm_hugetlb_page(vma))
return;
if (type == 2 && vma->vm_file)
return;
if (type == 3 && !vma->vm_file)
return;
walk_page_range(vma->vm_start, vma->vm_end, this_walk);
}
The change in numbers also addresses Matt's concern that writing a 1 is the
default behavior where all vmas are cleared, hence it is better to not break
that and make the selective clearing options 2 and 3. There is no
documentation for clear_refs beyond a 1 liner that states "Clears page
referenced bits shown in smaps output". I will also update the Documentation
and add it to the patch.
>
>
> > ?static ssize_t clear_refs_write(struct file *file, const char __user *
> > buf,
> > ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? size_t count, loff_t * ppos)
> > @@ -469,13 +496,15 @@
> > ? ? ? char buffer[PROC_NUMBUF], *end;
> > ? ? ? struct mm_struct *mm;
> > ? ? ? struct vm_area_struct *vma;
> > + ? ? int type;
> >
> > ? ? ? memset(buffer, 0, sizeof(buffer));
> > ? ? ? if (count > sizeof(buffer) - 1)
> > ? ? ? ? ? ? ? count = sizeof(buffer) - 1;
> > ? ? ? if (copy_from_user(buffer, buf, count))
> > ? ? ? ? ? ? ? return -EFAULT;
> > - ? ? if (!simple_strtol(buffer, &end, 0))
> > + ? ? type = simple_strtol(buffer, &end, 0);
> > + ? ? if (!type)
> > ? ? ? ? ? ? ? return -EINVAL;
> > ? ? ? if (*end == '\n')
> > ? ? ? ? ? ? ? end++;
> > @@ -491,9 +520,7 @@
> > ? ? ? ? ? ? ? down_read(&mm->mmap_sem);
> > ? ? ? ? ? ? ? for (vma = mm->mmap; vma; vma = vma->vm_next) {
> > ? ? ? ? ? ? ? ? ? ? ? clear_refs_walk.private = vma;
> > - ? ? ? ? ? ? ? ? ? ? if (!is_vm_hugetlb_page(vma))
> > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? walk_page_range(vma->vm_start, vma->vm_end,
> > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? &clear_refs_walk);
> > + ? ? ? ? ? ? ? ? ? ? walk_vma_area(&clear_refs_walk, vma, type);
> > ? ? ? ? ? ? ? }
> > ? ? ? ? ? ? ? flush_tlb_mm(mm);
> > ? ? ? ? ? ? ? up_read(&mm->mmap_sem);
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> > the body of a message to [email protected]
> > More majordomo info at ?http://vger.kernel.org/majordomo-info.html
> > Please read the FAQ at ?http://www.tux.org/lkml/
This patch makes the clear_refs proc interface a bit more versatile.
It adds support for clearing anonymous pages, file mapped pages or both.
The clear_refs entry is used to reset the Referenced bits on virtual and
physical pages associated with a process.
echo 1 > /proc/PID/clear_refs clears all pages associated with the process
echo 2 > /proc/PID/clear_refs clears anonymous pages only
echo 3 > /proc/PID/clear_refs clears file mapped pages only
Any other value written to the proc entry will clear all pages.
Selective clearing the pages has a measurable impact on performance as it
limits the number of page walks. We have been using this interface and this
adds flexibility to the user user space application implementing the reference
clearing.
Signed-off-by: Jared Hulbert ([email protected])
Signed-off-by: Moussa A. Ba ([email protected])
-------
Documentation/filesystems/proc.txt | 7 +++++++
fs/proc/task_mmu.c | 29 +++++++++++++++++++++++++----
2 files changed, 32 insertions(+), 4 deletions(-)
--- a/fs/proc/task_mmu.c 2009-07-21 14:30:01.000000000 -0700
+++ b/fs/proc/task_mmu.c 2009-07-27 11:46:05.000000000 -0700
@@ -462,6 +462,27 @@
return 0;
}
+static void walk_vma_area(struct mm_walk *this_walk,
+ struct vm_area_struct *vma, int type)
+{
+
+ /* Writing 2 to /proc/pid/clear_refs will clear all Anonymous
+ * pages.
+ *
+ * Writing 3 to /proc/pid/clear_refs will clear all file mapped
+ * pages.
+ *
+ * Writing any other value including 1 will clear all pages
+ */
+ if (is_vm_hugetlb_page(vma))
+ return;
+ if (type == 2 && vma->vm_file)
+ return;
+ if (type == 3 && !vma->vm_file)
+ return;
+ walk_page_range(vma->vm_start, vma->vm_end, this_walk);
+}
+
static ssize_t clear_refs_write(struct file *file, const char __user * buf,
size_t count, loff_t * ppos)
{
@@ -469,13 +490,15 @@
char buffer[PROC_NUMBUF], *end;
struct mm_struct *mm;
struct vm_area_struct *vma;
+ int type;
memset(buffer, 0, sizeof(buffer));
if (count > sizeof(buffer) - 1)
count = sizeof(buffer) - 1;
if (copy_from_user(buffer, buf, count))
return -EFAULT;
- if (!simple_strtol(buffer, &end, 0))
+ type = strict_strtol(buffer, &end, 0);
+ if (!type)
return -EINVAL;
if (*end == '\n')
end++;
@@ -491,9 +514,7 @@
down_read(&mm->mmap_sem);
for (vma = mm->mmap; vma; vma = vma->vm_next) {
clear_refs_walk.private = vma;
- if (!is_vm_hugetlb_page(vma))
- walk_page_range(vma->vm_start, vma->vm_end,
- &clear_refs_walk);
+ walk_vma_area(&clear_refs_walk, vma, type);
}
flush_tlb_mm(mm);
up_read(&mm->mmap_sem);
--- a/Documentation/filesystems/proc.txt 2009-07-20 17:29:11.000000000 -0700
+++ b/Documentation/filesystems/proc.txt 2009-07-27 12:08:34.000000000 -0700
@@ -375,6 +375,13 @@
This file is only present if the CONFIG_MMU kernel configuration option is
enabled.
+The clear_refs entry is used to reset the Referenced bits on virtual and physical
+pages associated with a process.
+echo 1 > /proc/PID/clear_refs clears all pages associated with the process
+echo 2 > /proc/PID/clear_refs clears anonymous pages only
+echo 3 > /proc/PID/clear_refs clears file mapped pages only
+Any other value written to the proc entry will clear all pages.
+
1.2 Kernel data
---------------
On Mon, 2009-07-27 at 13:19 -0700, Moussa A. Ba wrote:
> This patch makes the clear_refs proc interface a bit more versatile.
> It adds support for clearing anonymous pages, file mapped pages or both.
>
> The clear_refs entry is used to reset the Referenced bits on virtual and
> physical pages associated with a process.
> echo 1 > /proc/PID/clear_refs clears all pages associated with the process
> echo 2 > /proc/PID/clear_refs clears anonymous pages only
> echo 3 > /proc/PID/clear_refs clears file mapped pages only
> Any other value written to the proc entry will clear all pages.
>
> Selective clearing the pages has a measurable impact on performance as it
> limits the number of page walks. We have been using this interface and this
> adds flexibility to the user user space application implementing the reference
> clearing.
Looks ok to me.
Acked-by: Matt Mackall <[email protected]>
--
http://selenic.com : development and support for Mercurial and Linux
On Mon, 27 Jul 2009, Moussa A. Ba wrote:
> This patch makes the clear_refs proc interface a bit more versatile.
> It adds support for clearing anonymous pages, file mapped pages or both.
>
It already has support for clearing both, so you're only adding anonymous
and file-backed filters.
> The clear_refs entry is used to reset the Referenced bits on virtual and
> physical pages associated with a process.
> echo 1 > /proc/PID/clear_refs clears all pages associated with the process
> echo 2 > /proc/PID/clear_refs clears anonymous pages only
> echo 3 > /proc/PID/clear_refs clears file mapped pages only
> Any other value written to the proc entry will clear all pages.
>
clear_refs currently accepts any non-zero value, so it's possible that
this will break user scripts that, for whatever reason, write '2' or '3'.
I think that's acceptable, but it would be helpful to make all other
values a no-op similar to drop_caches at this point to avoid the potential
for breakage if this is ever extended any further.
> Selective clearing the pages has a measurable impact on performance as it
> limits the number of page walks. We have been using this interface and this
> adds flexibility to the user user space application implementing the reference
> clearing.
>
> Signed-off-by: Jared Hulbert ([email protected])
> Signed-off-by: Moussa A. Ba ([email protected])
Email addresses in < > braces, please.
The first sign-off line normally indicates who wrote the patch, but your
submission lacks a From: line, so git would indicate you wrote it. If
that's incorrect, please add a From: line as described in
Documentation/SubmittingPatches. If it's correct, please reorder your
sign-off lines.
> -------
> Documentation/filesystems/proc.txt | 7 +++++++
> fs/proc/task_mmu.c | 29 +++++++++++++++++++++++++----
> 2 files changed, 32 insertions(+), 4 deletions(-)
>
> --- a/fs/proc/task_mmu.c 2009-07-21 14:30:01.000000000 -0700
> +++ b/fs/proc/task_mmu.c 2009-07-27 11:46:05.000000000 -0700
> @@ -462,6 +462,27 @@
> return 0;
> }
>
> +static void walk_vma_area(struct mm_walk *this_walk,
> + struct vm_area_struct *vma, int type)
> +{
This is a very generic name for something that is only applicable to
clear_refs, so please name it accordingly. This will also avoid having to
pass the struct mm_walk * in since its only user is clear_refs_walk.
> +
> + /* Writing 2 to /proc/pid/clear_refs will clear all Anonymous
> + * pages.
> + *
> + * Writing 3 to /proc/pid/clear_refs will clear all file mapped
> + * pages.
> + *
> + * Writing any other value including 1 will clear all pages
> + */
Documentation/CodingStyle would suggest this format:
/*
* Multi-line kernel comments always start ..
* with an empty first line.
*/
> + if (is_vm_hugetlb_page(vma))
> + return;
> + if (type == 2 && vma->vm_file)
> + return;
> + if (type == 3 && !vma->vm_file)
> + return;
> + walk_page_range(vma->vm_start, vma->vm_end, this_walk);
> +}
K&R would suggest #define's (or enums) for those hard-coded values. I
think that's already been suggested for this patch, actually.
> +
> static ssize_t clear_refs_write(struct file *file, const char __user * buf,
> size_t count, loff_t * ppos)
> {
> @@ -469,13 +490,15 @@
> char buffer[PROC_NUMBUF], *end;
> struct mm_struct *mm;
> struct vm_area_struct *vma;
> + int type;
>
> memset(buffer, 0, sizeof(buffer));
> if (count > sizeof(buffer) - 1)
> count = sizeof(buffer) - 1;
> if (copy_from_user(buffer, buf, count))
> return -EFAULT;
> - if (!simple_strtol(buffer, &end, 0))
> + type = strict_strtol(buffer, &end, 0);
> + if (!type)
> return -EINVAL;
> if (*end == '\n')
> end++;
> @@ -491,9 +514,7 @@
> down_read(&mm->mmap_sem);
> for (vma = mm->mmap; vma; vma = vma->vm_next) {
> clear_refs_walk.private = vma;
> - if (!is_vm_hugetlb_page(vma))
> - walk_page_range(vma->vm_start, vma->vm_end,
> - &clear_refs_walk);
> + walk_vma_area(&clear_refs_walk, vma, type);
> }
> flush_tlb_mm(mm);
> up_read(&mm->mmap_sem);
> --- a/Documentation/filesystems/proc.txt 2009-07-20 17:29:11.000000000
> -0700
> +++ b/Documentation/filesystems/proc.txt 2009-07-27 12:08:34.000000000
> -0700
> @@ -375,6 +375,13 @@
> This file is only present if the CONFIG_MMU kernel configuration option is
> enabled.
>
> +The clear_refs entry is used to reset the Referenced bits on virtual and physical
> +pages associated with a process.
> +echo 1 > /proc/PID/clear_refs clears all pages associated with the process
> +echo 2 > /proc/PID/clear_refs clears anonymous pages only
> +echo 3 > /proc/PID/clear_refs clears file mapped pages only
> +Any other value written to the proc entry will clear all pages.
> +
Please follow the format in this document for how other /proc/PID/*
entries are described.
That format could really be improved here, perhaps you could clean
proc.txt up a little bit while you're here?
Also, as the author of clear_refs, please cc me on future revisions of
this patch.
On Mon, Jul 27, 2009 at 1:57 PM, David Rientjes<[email protected]> wrote:
> On Mon, 27 Jul 2009, Moussa A. Ba wrote:
>
>> This patch makes the clear_refs proc interface a bit more versatile.
>> It adds support ?for clearing anonymous pages, file mapped pages or both.
>>
>
> It already has support for clearing both, so you're only adding anonymous
> and file-backed filters.
>
>> The clear_refs entry is used to reset the Referenced bits on virtual and
>> physical pages associated with a process.
>> echo 1 > /proc/PID/clear_refs clears all pages associated with the process
>> echo 2 > /proc/PID/clear_refs clears anonymous pages only
>> echo 3 > /proc/PID/clear_refs clears file mapped pages only
>> Any other value written to the proc entry will clear all pages.
>>
>
> clear_refs currently accepts any non-zero value, so it's possible that
> this will break user scripts that, for whatever reason, write '2' or '3'.
> I think that's acceptable, but it would be helpful to make all other
> values a no-op similar to drop_caches at this point to avoid the potential
> for breakage if this is ever extended any further.
>
>> Selective clearing the pages has a measurable impact on performance as it
>> limits the number of page walks. ?We have been using this interface and ?this
>> adds flexibility to the user user space application implementing the reference
>> clearing.
>>
>> Signed-off-by: Jared Hulbert ([email protected])
>> Signed-off-by: Moussa A. Ba ([email protected])
>
> Email addresses in < > braces, please.
>
> The first sign-off line normally indicates who wrote the patch, but your
> submission lacks a From: line, so git would indicate you wrote it. ?If
> that's incorrect, please add a From: line as described in
> Documentation/SubmittingPatches. ?If it's correct, please reorder your
> sign-off lines.
>
I will reorder the sign-off lines
>> -------
>> Documentation/filesystems/proc.txt | ? ?7 +++++++
>> fs/proc/task_mmu.c ? ? ? ? ? ? ? ? | ? 29 +++++++++++++++++++++++++----
>> 2 files changed, 32 insertions(+), 4 deletions(-)
>>
>> --- a/fs/proc/task_mmu.c ? ? ?2009-07-21 14:30:01.000000000 -0700
>> +++ b/fs/proc/task_mmu.c ? ? ?2009-07-27 11:46:05.000000000 -0700
>> @@ -462,6 +462,27 @@
>> ? ? ? return 0;
>> ?}
>>
>> +static void walk_vma_area(struct mm_walk *this_walk,
>> + ? ? ? ? ? ? ? ? ? ? ? struct vm_area_struct *vma, int type)
>> +{
>
> This is a very generic name for something that is only applicable to
> clear_refs, so please name it accordingly. ?This will also avoid having to
> pass the struct mm_walk * in since its only user is clear_refs_walk.
>
Done.
>> +
>> + ? ? /* Writing 2 to /proc/pid/clear_refs will clear all Anonymous
>> + ? ? ?* pages.
>> + ? ? ?*
>> + ? ? ?* Writing 3 to /proc/pid/clear_refs will clear all file mapped
>> + ? ? ?* pages.
>> + ? ? ?*
>> + ? ? ?* Writing any other value including 1 will clear all pages
>> + ? ? ?*/
>
> Documentation/CodingStyle would suggest this format:
>
> ? ? ? ?/*
> ? ? ? ? * Multi-line kernel comments always start ..
> ? ? ? ? * with an empty first line.
> ? ? ? ? */
>
Done.
>> + ? ? if (is_vm_hugetlb_page(vma))
>> + ? ? ? ? ? ? return;
>> + ? ? if (type == 2 && vma->vm_file)
>> + ? ? ? ? ? ? return;
>> + ? ? if (type == 3 && !vma->vm_file)
>> + ? ? ? ? ? ? return;
>> + ? ? walk_page_range(vma->vm_start, vma->vm_end, this_walk);
>> +}
>
> K&R would suggest #define's (or enums) for those hard-coded values. ?I
> think that's already been suggested for this patch, actually.
>
Would this be acceptable?
enum clear_refs_walk_type {
CLEAR_REFS_ALL = 1,
CLEAR_REFS_ANON = 2,
CLEAR_REFS_MAPPED = 3
};
static void clear_refs_walk_vma_area(struct mm_walk *this_walk,
struct vm_area_struct *vma, enum clear_refs_walk_type type)
{
/*
* Writing 1 to /proc/pid/clear_refs clears all pages.
* Writing 2 to /proc/pid/clear_refs clears Anonymous pages.
* Writing 3 to /proc/pid/clear_refs clears file mapped pages.
*/
if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED)
return;
if (is_vm_hugetlb_page(vma))
return;
if (type == CLEAR_REFS_ANON && vma->vm_file)
return;
if (type == CLEAR_REFS_MAPPED && !vma->vm_file)
return;
walk_page_range(vma->vm_start, vma->vm_end, this_walk);
}
>> +
>> ?static ssize_t clear_refs_write(struct file *file, const char __user * buf,
>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? size_t count, loff_t * ppos)
>> ?{
>> @@ -469,13 +490,15 @@
>> ? ? ? char buffer[PROC_NUMBUF], *end;
>> ? ? ? struct mm_struct *mm;
>> ? ? ? struct vm_area_struct *vma;
>> + ? ? int type;
>>
>> ? ? ? memset(buffer, 0, sizeof(buffer));
>> ? ? ? if (count > sizeof(buffer) - 1)
>> ? ? ? ? ? ? ? count = sizeof(buffer) - 1;
>> ? ? ? if (copy_from_user(buffer, buf, count))
>> ? ? ? ? ? ? ? return -EFAULT;
>> - ? ? if (!simple_strtol(buffer, &end, 0))
>> + ? ? type = strict_strtol(buffer, &end, 0);
>> + ? ? if (!type)
>> ? ? ? ? ? ? ? return -EINVAL;
>> ? ? ? if (*end == '\n')
>> ? ? ? ? ? ? ? end++;
>> @@ -491,9 +514,7 @@
>> ? ? ? ? ? ? ? down_read(&mm->mmap_sem);
>> ? ? ? ? ? ? ? for (vma = mm->mmap; vma; vma = vma->vm_next) {
>> ? ? ? ? ? ? ? ? ? ? ? clear_refs_walk.private = vma;
>> - ? ? ? ? ? ? ? ? ? ? if (!is_vm_hugetlb_page(vma))
>> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? walk_page_range(vma->vm_start, vma->vm_end,
>> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? &clear_refs_walk);
>> + ? ? ? ? ? ? ? ? ? ? walk_vma_area(&clear_refs_walk, vma, type);
>> ? ? ? ? ? ? ? }
>> ? ? ? ? ? ? ? flush_tlb_mm(mm);
>> ? ? ? ? ? ? ? up_read(&mm->mmap_sem);
>> --- a/Documentation/filesystems/proc.txt ? ? ?2009-07-20 17:29:11.000000000
>> -0700
>> +++ b/Documentation/filesystems/proc.txt ? ? ?2009-07-27 12:08:34.000000000
>> -0700
>> @@ -375,6 +375,13 @@
>> ?This file is only present if the CONFIG_MMU kernel configuration option is
>> ?enabled.
>>
>> +The clear_refs entry is used to reset the Referenced bits on virtual and physical
>> +pages associated with a process.
>> +echo 1 > /proc/PID/clear_refs clears all pages associated with the process
>> +echo 2 > /proc/PID/clear_refs clears anonymous pages only
>> +echo 3 > /proc/PID/clear_refs clears file mapped pages only
>> +Any other value written to the proc entry will clear all pages.
>> +
>
> Please follow the format in this document for how other /proc/PID/*
> entries are described.
>
> That format could really be improved here, perhaps you could clean
> proc.txt up a little bit while you're here?
>
>
I am not sure what you mean by "clean" proc.txt, I did not detect much
formatting in the PID proc enries description, beyond what I rewrote
below:
The /proc/PID/clear_refs is used to reset the Referenced bits on virtual and
physical pages associated with a process.
To clear all pages associated with the process
> echo 1 > /proc/PID/clear_refs
To clear all anonymous pages associated with the process
> echo 2 > /proc/PID/clear_refs
To clear all file mapped pages associated with the process
> echo 3 > /proc/PID/clear_refs
Any other value written to /proc/PID/clear_refs will have no effect.
> Also, as the author of clear_refs, please cc me on future revisions of
> this patch.
>
I shall.
On Mon, 2009-07-27 at 15:14 -0700, Moussa Ba wrote:
> On Mon, Jul 27, 2009 at 1:57 PM, David Rientjes<[email protected]> wrote:
> > On Mon, 27 Jul 2009, Moussa A. Ba wrote:
> >
> >> This patch makes the clear_refs proc interface a bit more versatile.
> >> It adds support for clearing anonymous pages, file mapped pages or both.
> >>
> >
> > It already has support for clearing both, so you're only adding anonymous
> > and file-backed filters.
> >
> >> The clear_refs entry is used to reset the Referenced bits on virtual and
> >> physical pages associated with a process.
> >> echo 1 > /proc/PID/clear_refs clears all pages associated with the process
> >> echo 2 > /proc/PID/clear_refs clears anonymous pages only
> >> echo 3 > /proc/PID/clear_refs clears file mapped pages only
> >> Any other value written to the proc entry will clear all pages.
> >>
> >
> > clear_refs currently accepts any non-zero value, so it's possible that
> > this will break user scripts that, for whatever reason, write '2' or '3'.
> > I think that's acceptable, but it would be helpful to make all other
> > values a no-op similar to drop_caches at this point to avoid the potential
> > for breakage if this is ever extended any further.
> >
> >> Selective clearing the pages has a measurable impact on performance as it
> >> limits the number of page walks. We have been using this interface and this
> >> adds flexibility to the user user space application implementing the reference
> >> clearing.
> >>
> >> Signed-off-by: Jared Hulbert ([email protected])
> >> Signed-off-by: Moussa A. Ba ([email protected])
> >
> > Email addresses in < > braces, please.
> >
> > The first sign-off line normally indicates who wrote the patch, but your
> > submission lacks a From: line, so git would indicate you wrote it. If
> > that's incorrect, please add a From: line as described in
> > Documentation/SubmittingPatches. If it's correct, please reorder your
> > sign-off lines.
> >
> I will reorder the sign-off lines
> >> -------
> >> Documentation/filesystems/proc.txt | 7 +++++++
> >> fs/proc/task_mmu.c | 29 +++++++++++++++++++++++++----
> >> 2 files changed, 32 insertions(+), 4 deletions(-)
> >>
> >> --- a/fs/proc/task_mmu.c 2009-07-21 14:30:01.000000000 -0700
> >> +++ b/fs/proc/task_mmu.c 2009-07-27 11:46:05.000000000 -0700
> >> @@ -462,6 +462,27 @@
> >> return 0;
> >> }
> >>
> >> +static void walk_vma_area(struct mm_walk *this_walk,
> >> + struct vm_area_struct *vma, int type)
> >> +{
> >
> > This is a very generic name for something that is only applicable to
> > clear_refs, so please name it accordingly. This will also avoid having to
> > pass the struct mm_walk * in since its only user is clear_refs_walk.
> >
> Done.
> >> +
> >> + /* Writing 2 to /proc/pid/clear_refs will clear all Anonymous
> >> + * pages.
> >> + *
> >> + * Writing 3 to /proc/pid/clear_refs will clear all file mapped
> >> + * pages.
> >> + *
> >> + * Writing any other value including 1 will clear all pages
> >> + */
> >
> > Documentation/CodingStyle would suggest this format:
> >
> > /*
> > * Multi-line kernel comments always start ..
> > * with an empty first line.
> > */
> >
> Done.
> >> + if (is_vm_hugetlb_page(vma))
> >> + return;
> >> + if (type == 2 && vma->vm_file)
> >> + return;
> >> + if (type == 3 && !vma->vm_file)
> >> + return;
> >> + walk_page_range(vma->vm_start, vma->vm_end, this_walk);
> >> +}
> >
> > K&R would suggest #define's (or enums) for those hard-coded values. I
> > think that's already been suggested for this patch, actually.
> >
>
> Would this be acceptable?
>
> enum clear_refs_walk_type {
> CLEAR_REFS_ALL = 1,
> CLEAR_REFS_ANON = 2,
> CLEAR_REFS_MAPPED = 3
> };
I don't see a scenario where we use these values in more than one place,
so I don't really see this as much of an improvement given that the
magic numbers are already documented clearly in situ. But I think we
tend to lean towards #defines rather than enums in any case.
> static void clear_refs_walk_vma_area(struct mm_walk *this_walk,
> struct vm_area_struct *vma, enum clear_refs_walk_type type)
> {
>
> /*
> * Writing 1 to /proc/pid/clear_refs clears all pages.
> * Writing 2 to /proc/pid/clear_refs clears Anonymous pages.
> * Writing 3 to /proc/pid/clear_refs clears file mapped pages.
> */
> if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED)
> return;
> if (is_vm_hugetlb_page(vma))
> return;
> if (type == CLEAR_REFS_ANON && vma->vm_file)
> return;
> if (type == CLEAR_REFS_MAPPED && !vma->vm_file)
> return;
> walk_page_range(vma->vm_start, vma->vm_end, this_walk);
> }
>
>
> >> +
> >> static ssize_t clear_refs_write(struct file *file, const char __user * buf,
> >> size_t count, loff_t * ppos)
> >> {
> >> @@ -469,13 +490,15 @@
> >> char buffer[PROC_NUMBUF], *end;
> >> struct mm_struct *mm;
> >> struct vm_area_struct *vma;
> >> + int type;
> >>
> >> memset(buffer, 0, sizeof(buffer));
> >> if (count > sizeof(buffer) - 1)
> >> count = sizeof(buffer) - 1;
> >> if (copy_from_user(buffer, buf, count))
> >> return -EFAULT;
> >> - if (!simple_strtol(buffer, &end, 0))
> >> + type = strict_strtol(buffer, &end, 0);
> >> + if (!type)
> >> return -EINVAL;
> >> if (*end == '\n')
> >> end++;
> >> @@ -491,9 +514,7 @@
> >> down_read(&mm->mmap_sem);
> >> for (vma = mm->mmap; vma; vma = vma->vm_next) {
> >> clear_refs_walk.private = vma;
> >> - if (!is_vm_hugetlb_page(vma))
> >> - walk_page_range(vma->vm_start, vma->vm_end,
> >> - &clear_refs_walk);
> >> + walk_vma_area(&clear_refs_walk, vma, type);
> >> }
> >> flush_tlb_mm(mm);
> >> up_read(&mm->mmap_sem);
> >> --- a/Documentation/filesystems/proc.txt 2009-07-20 17:29:11.000000000
> >> -0700
> >> +++ b/Documentation/filesystems/proc.txt 2009-07-27 12:08:34.000000000
> >> -0700
> >> @@ -375,6 +375,13 @@
> >> This file is only present if the CONFIG_MMU kernel configuration option is
> >> enabled.
> >>
> >> +The clear_refs entry is used to reset the Referenced bits on virtual and physical
> >> +pages associated with a process.
> >> +echo 1 > /proc/PID/clear_refs clears all pages associated with the process
> >> +echo 2 > /proc/PID/clear_refs clears anonymous pages only
> >> +echo 3 > /proc/PID/clear_refs clears file mapped pages only
> >> +Any other value written to the proc entry will clear all pages.
> >> +
> >
> > Please follow the format in this document for how other /proc/PID/*
> > entries are described.
> >
> > That format could really be improved here, perhaps you could clean
> > proc.txt up a little bit while you're here?
> >
> >
>
> I am not sure what you mean by "clean" proc.txt, I did not detect much
> formatting in the PID proc enries description, beyond what I rewrote
> below:
>
>
> The /proc/PID/clear_refs is used to reset the Referenced bits on virtual and
> physical pages associated with a process.
> To clear all pages associated with the process
> > echo 1 > /proc/PID/clear_refs
>
> To clear all anonymous pages associated with the process
> > echo 2 > /proc/PID/clear_refs
>
> To clear all file mapped pages associated with the process
> > echo 3 > /proc/PID/clear_refs
> Any other value written to /proc/PID/clear_refs will have no effect.
>
>
> > Also, as the author of clear_refs, please cc me on future revisions of
> > this patch.
> >
>
> I shall.
--
http://selenic.com : development and support for Mercurial and Linux
On Mon, 27 Jul 2009, Moussa Ba wrote:
> > clear_refs currently accepts any non-zero value, so it's possible that
> > this will break user scripts that, for whatever reason, write '2' or '3'.
> > I think that's acceptable, but it would be helpful to make all other
> > values a no-op similar to drop_caches at this point to avoid the potential
> > for breakage if this is ever extended any further.
> >
In your latest post, I see you implemented this in
clear_refs_walk_vma_area() by checking for
CLEAR_REFS_ALL > type > CLEAR_REFS_MAPPED. Thanks! Don't forget to
update Documentation/filesystems/proc.txt to specify that.
> >> Selective clearing the pages has a measurable impact on performance as it
> >> limits the number of page walks. ?We have been using this interface and ?this
> >> adds flexibility to the user user space application implementing the reference
> >> clearing.
> >>
> >> Signed-off-by: Jared Hulbert ([email protected])
> >> Signed-off-by: Moussa A. Ba ([email protected])
> >
> > Email addresses in < > braces, please.
> >
> > The first sign-off line normally indicates who wrote the patch, but your
> > submission lacks a From: line, so git would indicate you wrote it. ?If
> > that's incorrect, please add a From: line as described in
> > Documentation/SubmittingPatches. ?If it's correct, please reorder your
> > sign-off lines.
> >
> I will reorder the sign-off lines
Ok, that removes the ambiguity concerning authorship, thanks.
> >> -------
> >> Documentation/filesystems/proc.txt | ? ?7 +++++++
> >> fs/proc/task_mmu.c ? ? ? ? ? ? ? ? | ? 29 +++++++++++++++++++++++++----
> >> 2 files changed, 32 insertions(+), 4 deletions(-)
> >>
> >> --- a/fs/proc/task_mmu.c ? ? ?2009-07-21 14:30:01.000000000 -0700
> >> +++ b/fs/proc/task_mmu.c ? ? ?2009-07-27 11:46:05.000000000 -0700
> >> @@ -462,6 +462,27 @@
> >> ? ? ? return 0;
> >> ?}
> >>
> >> +static void walk_vma_area(struct mm_walk *this_walk,
> >> + ? ? ? ? ? ? ? ? ? ? ? struct vm_area_struct *vma, int type)
> >> +{
> >
> > This is a very generic name for something that is only applicable to
> > clear_refs, so please name it accordingly. ?This will also avoid having to
> > pass the struct mm_walk * in since its only user is clear_refs_walk.
> >
> Done.
> >> +
> >> + ? ? /* Writing 2 to /proc/pid/clear_refs will clear all Anonymous
> >> + ? ? ?* pages.
> >> + ? ? ?*
> >> + ? ? ?* Writing 3 to /proc/pid/clear_refs will clear all file mapped
> >> + ? ? ?* pages.
> >> + ? ? ?*
> >> + ? ? ?* Writing any other value including 1 will clear all pages
> >> + ? ? ?*/
> >
> > Documentation/CodingStyle would suggest this format:
> >
> > ? ? ? ?/*
> > ? ? ? ? * Multi-line kernel comments always start ..
> > ? ? ? ? * with an empty first line.
> > ? ? ? ? */
> >
> Done.
> >> + ? ? if (is_vm_hugetlb_page(vma))
> >> + ? ? ? ? ? ? return;
> >> + ? ? if (type == 2 && vma->vm_file)
> >> + ? ? ? ? ? ? return;
> >> + ? ? if (type == 3 && !vma->vm_file)
> >> + ? ? ? ? ? ? return;
> >> + ? ? walk_page_range(vma->vm_start, vma->vm_end, this_walk);
> >> +}
> >
> > K&R would suggest #define's (or enums) for those hard-coded values. ?I
> > think that's already been suggested for this patch, actually.
> >
>
> Would this be acceptable?
>
> enum clear_refs_walk_type {
> CLEAR_REFS_ALL = 1,
> CLEAR_REFS_ANON = 2,
> CLEAR_REFS_MAPPED = 3
> };
>
#define seems more appropriate for this particular use case. We simply
try to avoid hard-coded integers in source code (rationale: K&R).
> static void clear_refs_walk_vma_area(struct mm_walk *this_walk,
> struct vm_area_struct *vma, enum clear_refs_walk_type type)
> {
Ddi you consider my suggestion of dropping the struct mm_walk * formal
since this is now a clear_refs-specific function? walk_page_range() will
always take clear_refs_walk, there's no need to pass it in.
>
> /*
> * Writing 1 to /proc/pid/clear_refs clears all pages.
> * Writing 2 to /proc/pid/clear_refs clears Anonymous pages.
> * Writing 3 to /proc/pid/clear_refs clears file mapped pages.
> */
> if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED)
> return;
> if (is_vm_hugetlb_page(vma))
> return;
> if (type == CLEAR_REFS_ANON && vma->vm_file)
> return;
> if (type == CLEAR_REFS_MAPPED && !vma->vm_file)
> return;
> walk_page_range(vma->vm_start, vma->vm_end, this_walk);
> }
>
>
> >> +
> >> ?static ssize_t clear_refs_write(struct file *file, const char __user * buf,
> >> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? size_t count, loff_t * ppos)
> >> ?{
> >> @@ -469,13 +490,15 @@
> >> ? ? ? char buffer[PROC_NUMBUF], *end;
> >> ? ? ? struct mm_struct *mm;
> >> ? ? ? struct vm_area_struct *vma;
> >> + ? ? int type;
> >>
> >> ? ? ? memset(buffer, 0, sizeof(buffer));
> >> ? ? ? if (count > sizeof(buffer) - 1)
> >> ? ? ? ? ? ? ? count = sizeof(buffer) - 1;
> >> ? ? ? if (copy_from_user(buffer, buf, count))
> >> ? ? ? ? ? ? ? return -EFAULT;
> >> - ? ? if (!simple_strtol(buffer, &end, 0))
> >> + ? ? type = strict_strtol(buffer, &end, 0);
> >> + ? ? if (!type)
> >> ? ? ? ? ? ? ? return -EINVAL;
> >> ? ? ? if (*end == '\n')
> >> ? ? ? ? ? ? ? end++;
> >> @@ -491,9 +514,7 @@
> >> ? ? ? ? ? ? ? down_read(&mm->mmap_sem);
> >> ? ? ? ? ? ? ? for (vma = mm->mmap; vma; vma = vma->vm_next) {
> >> ? ? ? ? ? ? ? ? ? ? ? clear_refs_walk.private = vma;
> >> - ? ? ? ? ? ? ? ? ? ? if (!is_vm_hugetlb_page(vma))
> >> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? walk_page_range(vma->vm_start, vma->vm_end,
> >> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? &clear_refs_walk);
> >> + ? ? ? ? ? ? ? ? ? ? walk_vma_area(&clear_refs_walk, vma, type);
> >> ? ? ? ? ? ? ? }
> >> ? ? ? ? ? ? ? flush_tlb_mm(mm);
> >> ? ? ? ? ? ? ? up_read(&mm->mmap_sem);
> >> --- a/Documentation/filesystems/proc.txt ? ? ?2009-07-20 17:29:11.000000000
> >> -0700
> >> +++ b/Documentation/filesystems/proc.txt ? ? ?2009-07-27 12:08:34.000000000
> >> -0700
> >> @@ -375,6 +375,13 @@
> >> ?This file is only present if the CONFIG_MMU kernel configuration option is
> >> ?enabled.
> >>
> >> +The clear_refs entry is used to reset the Referenced bits on virtual and physical
> >> +pages associated with a process.
> >> +echo 1 > /proc/PID/clear_refs clears all pages associated with the process
> >> +echo 2 > /proc/PID/clear_refs clears anonymous pages only
> >> +echo 3 > /proc/PID/clear_refs clears file mapped pages only
> >> +Any other value written to the proc entry will clear all pages.
> >> +
> >
> > Please follow the format in this document for how other /proc/PID/*
> > entries are described.
> >
> > That format could really be improved here, perhaps you could clean
> > proc.txt up a little bit while you're here?
> >
> >
>
> I am not sure what you mean by "clean" proc.txt, I did not detect much
> formatting in the PID proc enries description, beyond what I rewrote
> below:
>
Right, the file is pretty sloppy, so I was wondering if you wanted to take
the time to clean it up a little so there's a more consistent style.
>
> The /proc/PID/clear_refs is used to reset the Referenced bits on virtual and
Probably better to say PG_referenced instead of Referenced.
> physical pages associated with a process.
> To clear all pages associated with the process
> > echo 1 > /proc/PID/clear_refs
>
> To clear all anonymous pages associated with the process
> > echo 2 > /proc/PID/clear_refs
>
> To clear all file mapped pages associated with the process
> > echo 3 > /proc/PID/clear_refs
> Any other value written to /proc/PID/clear_refs will have no effect.
>
You should probably say these all clear the bit instead of saying they
"clear pages," which doesn't make a lot of sense.
On Mon, Jul 27, 2009 at 3:49 PM, David Rientjes<[email protected]> wrote:
> On Mon, 27 Jul 2009, Moussa Ba wrote:
>
>> > clear_refs currently accepts any non-zero value, so it's possible that
>> > this will break user scripts that, for whatever reason, write '2' or '3'.
>> > I think that's acceptable, but it would be helpful to make all other
>> > values a no-op similar to drop_caches at this point to avoid the potential
>> > for breakage if this is ever extended any further.
>> >
>
> In your latest post, I see you implemented this in
> clear_refs_walk_vma_area() by checking for
> CLEAR_REFS_ALL > type > CLEAR_REFS_MAPPED. ?Thanks! ?Don't forget to
> update Documentation/filesystems/proc.txt to specify that.
>
>> >> Selective clearing the pages has a measurable impact on performance as it
>> >> limits the number of page walks. ?We have been using this interface and ?this
>> >> adds flexibility to the user user space application implementing the reference
>> >> clearing.
>> >>
>> >> Signed-off-by: Jared Hulbert ([email protected])
>> >> Signed-off-by: Moussa A. Ba ([email protected])
>> >
>> > Email addresses in < > braces, please.
>> >
>> > The first sign-off line normally indicates who wrote the patch, but your
>> > submission lacks a From: line, so git would indicate you wrote it. ?If
>> > that's incorrect, please add a From: line as described in
>> > Documentation/SubmittingPatches. ?If it's correct, please reorder your
>> > sign-off lines.
>> >
>> I will reorder the sign-off lines
>
> Ok, that removes the ambiguity concerning authorship, thanks.
>
>> >> -------
>> >> Documentation/filesystems/proc.txt | ? ?7 +++++++
>> >> fs/proc/task_mmu.c ? ? ? ? ? ? ? ? | ? 29 +++++++++++++++++++++++++----
>> >> 2 files changed, 32 insertions(+), 4 deletions(-)
>> >>
>> >> --- a/fs/proc/task_mmu.c ? ? ?2009-07-21 14:30:01.000000000 -0700
>> >> +++ b/fs/proc/task_mmu.c ? ? ?2009-07-27 11:46:05.000000000 -0700
>> >> @@ -462,6 +462,27 @@
>> >> ? ? ? return 0;
>> >> ?}
>> >>
>> >> +static void walk_vma_area(struct mm_walk *this_walk,
>> >> + ? ? ? ? ? ? ? ? ? ? ? struct vm_area_struct *vma, int type)
>> >> +{
>> >
>> > This is a very generic name for something that is only applicable to
>> > clear_refs, so please name it accordingly. ?This will also avoid having to
>> > pass the struct mm_walk * in since its only user is clear_refs_walk.
>> >
>> Done.
>> >> +
>> >> + ? ? /* Writing 2 to /proc/pid/clear_refs will clear all Anonymous
>> >> + ? ? ?* pages.
>> >> + ? ? ?*
>> >> + ? ? ?* Writing 3 to /proc/pid/clear_refs will clear all file mapped
>> >> + ? ? ?* pages.
>> >> + ? ? ?*
>> >> + ? ? ?* Writing any other value including 1 will clear all pages
>> >> + ? ? ?*/
>> >
>> > Documentation/CodingStyle would suggest this format:
>> >
>> > ? ? ? ?/*
>> > ? ? ? ? * Multi-line kernel comments always start ..
>> > ? ? ? ? * with an empty first line.
>> > ? ? ? ? */
>> >
>> Done.
>> >> + ? ? if (is_vm_hugetlb_page(vma))
>> >> + ? ? ? ? ? ? return;
>> >> + ? ? if (type == 2 && vma->vm_file)
>> >> + ? ? ? ? ? ? return;
>> >> + ? ? if (type == 3 && !vma->vm_file)
>> >> + ? ? ? ? ? ? return;
>> >> + ? ? walk_page_range(vma->vm_start, vma->vm_end, this_walk);
>> >> +}
>> >
>> > K&R would suggest #define's (or enums) for those hard-coded values. ?I
>> > think that's already been suggested for this patch, actually.
>> >
>>
>> Would this be acceptable?
>>
>> enum clear_refs_walk_type {
>> ? ? ? CLEAR_REFS_ALL ?= 1,
>> ? ? ? CLEAR_REFS_ANON = 2,
>> ? ? ? CLEAR_REFS_MAPPED = 3
>> };
>>
>
> #define seems more appropriate for this particular use case. ?We simply
> try to avoid hard-coded integers in source code (rationale: K&R).
>
>> static void clear_refs_walk_vma_area(struct mm_walk *this_walk,
>> ? ? ? ? ? ? ? ? ? ? ? ? struct vm_area_struct *vma, enum clear_refs_walk_type type)
>> {
>
> Ddi you consider my suggestion of dropping the struct mm_walk * formal
> since this is now a clear_refs-specific function? ?walk_page_range() will
> always take clear_refs_walk, there's no need to pass it in.
>
Well, I did but I would have to either pass clear_refs_walk or mm and
build the mm_walk entry inside clear_refs_walk_vma. Simply passing
the clear_refs_walk structure seemed simpler and more logical than
having to rebuild it inside the function every time it is called.
The other alternative would be to just forgo the additional function
clear_refs_walk_vma and rewrite the for loop as:
for (vma = mm->mmap; vma; vma = vma->vm_next) {
clear_refs_walk.private = vma;
if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED)
continue;
if (is_vm_hugetlb_page(vma))
continue;
if (type == CLEAR_REFS_ANON && vma->vm_file)
continue;
if (type == CLEAR_REFS_MAPPED && !vma->vm_file)
continue;
walk_page_range(vma->vm_start, vma->vm_end, this_walk);
}
>>
>> ? ? ? /*
>> ? ? ? ?* Writing 1 to /proc/pid/clear_refs clears all pages.
>> ? ? ? ?* Writing 2 to /proc/pid/clear_refs clears Anonymous pages.
>> ? ? ? ?* Writing 3 to /proc/pid/clear_refs clears file mapped pages.
>> ? ? ? ?*/
>> ? ? ? if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED)
>> ? ? ? ? ? ? ? return;
>> ? ? ? if (is_vm_hugetlb_page(vma))
>> ? ? ? ? ? ? ? return;
>> ? ? ? if (type == CLEAR_REFS_ANON && vma->vm_file)
>> ? ? ? ? ? ? ? return;
>> ? ? ? if (type == CLEAR_REFS_MAPPED && !vma->vm_file)
>> ? ? ? ? ? ? ? return;
>> ? ? ? walk_page_range(vma->vm_start, vma->vm_end, this_walk);
>> }
>>
>>
>> >> +
>> >> ?static ssize_t clear_refs_write(struct file *file, const char __user * buf,
>> >> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? size_t count, loff_t * ppos)
>> >> ?{
>> >> @@ -469,13 +490,15 @@
>> >> ? ? ? char buffer[PROC_NUMBUF], *end;
>> >> ? ? ? struct mm_struct *mm;
>> >> ? ? ? struct vm_area_struct *vma;
>> >> + ? ? int type;
>> >>
>> >> ? ? ? memset(buffer, 0, sizeof(buffer));
>> >> ? ? ? if (count > sizeof(buffer) - 1)
>> >> ? ? ? ? ? ? ? count = sizeof(buffer) - 1;
>> >> ? ? ? if (copy_from_user(buffer, buf, count))
>> >> ? ? ? ? ? ? ? return -EFAULT;
>> >> - ? ? if (!simple_strtol(buffer, &end, 0))
>> >> + ? ? type = strict_strtol(buffer, &end, 0);
>> >> + ? ? if (!type)
>> >> ? ? ? ? ? ? ? return -EINVAL;
>> >> ? ? ? if (*end == '\n')
>> >> ? ? ? ? ? ? ? end++;
>> >> @@ -491,9 +514,7 @@
>> >> ? ? ? ? ? ? ? down_read(&mm->mmap_sem);
>> >> ? ? ? ? ? ? ? for (vma = mm->mmap; vma; vma = vma->vm_next) {
>> >> ? ? ? ? ? ? ? ? ? ? ? clear_refs_walk.private = vma;
>> >> - ? ? ? ? ? ? ? ? ? ? if (!is_vm_hugetlb_page(vma))
>> >> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? walk_page_range(vma->vm_start, vma->vm_end,
>> >> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? &clear_refs_walk);
>> >> + ? ? ? ? ? ? ? ? ? ? walk_vma_area(&clear_refs_walk, vma, type);
>> >> ? ? ? ? ? ? ? }
>> >> ? ? ? ? ? ? ? flush_tlb_mm(mm);
>> >> ? ? ? ? ? ? ? up_read(&mm->mmap_sem);
>> >> --- a/Documentation/filesystems/proc.txt ? ? ?2009-07-20 17:29:11.000000000
>> >> -0700
>> >> +++ b/Documentation/filesystems/proc.txt ? ? ?2009-07-27 12:08:34.000000000
>> >> -0700
>> >> @@ -375,6 +375,13 @@
>> >> ?This file is only present if the CONFIG_MMU kernel configuration option is
>> >> ?enabled.
>> >>
>> >> +The clear_refs entry is used to reset the Referenced bits on virtual and physical
>> >> +pages associated with a process.
>> >> +echo 1 > /proc/PID/clear_refs clears all pages associated with the process
>> >> +echo 2 > /proc/PID/clear_refs clears anonymous pages only
>> >> +echo 3 > /proc/PID/clear_refs clears file mapped pages only
>> >> +Any other value written to the proc entry will clear all pages.
>> >> +
>> >
>> > Please follow the format in this document for how other /proc/PID/*
>> > entries are described.
>> >
>> > That format could really be improved here, perhaps you could clean
>> > proc.txt up a little bit while you're here?
>> >
>> >
>>
>> I am not sure what you mean by "clean" proc.txt, I did not detect much
>> formatting in the PID proc enries description, beyond what I rewrote
>> below:
>>
>
> Right, the file is pretty sloppy, so I was wondering if you wanted to take
> the time to clean it up a little so there's a more consistent style.
>
>>
>> The /proc/PID/clear_refs is used to reset the Referenced bits on virtual and
>
> Probably better to say PG_referenced instead of Referenced.
Okay, though it would have to also state that the pte bits are cleared as well.
>
>> physical pages associated with a process.
>> To clear all pages associated with the process
>> ? ? > echo 1 > /proc/PID/clear_refs
>>
>> To clear all anonymous pages associated with the process
>> ? ? > echo 2 > /proc/PID/clear_refs
>>
>> To clear all file mapped pages associated with the process
>> ? ? > echo 3 > /proc/PID/clear_refs
>> Any other value written to /proc/PID/clear_refs will have no effect.
>>
>
> You should probably say these all clear the bit instead of saying they
> "clear pages," which doesn't make a lot of sense.
You are correct.
On Mon, Jul 27, 2009 at 4:38 PM, Moussa Ba<[email protected]> wrote:
> On Mon, Jul 27, 2009 at 3:49 PM, David Rientjes<[email protected]> wrote:
>> On Mon, 27 Jul 2009, Moussa Ba wrote:
>>
>>> > clear_refs currently accepts any non-zero value, so it's possible that
>>> > this will break user scripts that, for whatever reason, write '2' or '3'.
>>> > I think that's acceptable, but it would be helpful to make all other
>>> > values a no-op similar to drop_caches at this point to avoid the potential
>>> > for breakage if this is ever extended any further.
>>> >
>>
>> In your latest post, I see you implemented this in
>> clear_refs_walk_vma_area() by checking for
>> CLEAR_REFS_ALL > type > CLEAR_REFS_MAPPED. ?Thanks! ?Don't forget to
>> update Documentation/filesystems/proc.txt to specify that.
>>
>>> >> Selective clearing the pages has a measurable impact on performance as it
>>> >> limits the number of page walks. ?We have been using this interface and ?this
>>> >> adds flexibility to the user user space application implementing the reference
>>> >> clearing.
>>> >>
>>> >> Signed-off-by: Jared Hulbert ([email protected])
>>> >> Signed-off-by: Moussa A. Ba ([email protected])
>>> >
>>> > Email addresses in < > braces, please.
>>> >
>>> > The first sign-off line normally indicates who wrote the patch, but your
>>> > submission lacks a From: line, so git would indicate you wrote it. ?If
>>> > that's incorrect, please add a From: line as described in
>>> > Documentation/SubmittingPatches. ?If it's correct, please reorder your
>>> > sign-off lines.
>>> >
>>> I will reorder the sign-off lines
>>
>> Ok, that removes the ambiguity concerning authorship, thanks.
>>
>>> >> -------
>>> >> Documentation/filesystems/proc.txt | ? ?7 +++++++
>>> >> fs/proc/task_mmu.c ? ? ? ? ? ? ? ? | ? 29 +++++++++++++++++++++++++----
>>> >> 2 files changed, 32 insertions(+), 4 deletions(-)
>>> >>
>>> >> --- a/fs/proc/task_mmu.c ? ? ?2009-07-21 14:30:01.000000000 -0700
>>> >> +++ b/fs/proc/task_mmu.c ? ? ?2009-07-27 11:46:05.000000000 -0700
>>> >> @@ -462,6 +462,27 @@
>>> >> ? ? ? return 0;
>>> >> ?}
>>> >>
>>> >> +static void walk_vma_area(struct mm_walk *this_walk,
>>> >> + ? ? ? ? ? ? ? ? ? ? ? struct vm_area_struct *vma, int type)
>>> >> +{
>>> >
>>> > This is a very generic name for something that is only applicable to
>>> > clear_refs, so please name it accordingly. ?This will also avoid having to
>>> > pass the struct mm_walk * in since its only user is clear_refs_walk.
>>> >
>>> Done.
>>> >> +
>>> >> + ? ? /* Writing 2 to /proc/pid/clear_refs will clear all Anonymous
>>> >> + ? ? ?* pages.
>>> >> + ? ? ?*
>>> >> + ? ? ?* Writing 3 to /proc/pid/clear_refs will clear all file mapped
>>> >> + ? ? ?* pages.
>>> >> + ? ? ?*
>>> >> + ? ? ?* Writing any other value including 1 will clear all pages
>>> >> + ? ? ?*/
>>> >
>>> > Documentation/CodingStyle would suggest this format:
>>> >
>>> > ? ? ? ?/*
>>> > ? ? ? ? * Multi-line kernel comments always start ..
>>> > ? ? ? ? * with an empty first line.
>>> > ? ? ? ? */
>>> >
>>> Done.
>>> >> + ? ? if (is_vm_hugetlb_page(vma))
>>> >> + ? ? ? ? ? ? return;
>>> >> + ? ? if (type == 2 && vma->vm_file)
>>> >> + ? ? ? ? ? ? return;
>>> >> + ? ? if (type == 3 && !vma->vm_file)
>>> >> + ? ? ? ? ? ? return;
>>> >> + ? ? walk_page_range(vma->vm_start, vma->vm_end, this_walk);
>>> >> +}
>>> >
>>> > K&R would suggest #define's (or enums) for those hard-coded values. ?I
>>> > think that's already been suggested for this patch, actually.
>>> >
>>>
>>> Would this be acceptable?
>>>
>>> enum clear_refs_walk_type {
>>> ? ? ? CLEAR_REFS_ALL ?= 1,
>>> ? ? ? CLEAR_REFS_ANON = 2,
>>> ? ? ? CLEAR_REFS_MAPPED = 3
>>> };
>>>
>>
>> #define seems more appropriate for this particular use case. ?We simply
>> try to avoid hard-coded integers in source code (rationale: K&R).
>>
>>> static void clear_refs_walk_vma_area(struct mm_walk *this_walk,
>>> ? ? ? ? ? ? ? ? ? ? ? ? struct vm_area_struct *vma, enum clear_refs_walk_type type)
>>> {
>>
>> Ddi you consider my suggestion of dropping the struct mm_walk * formal
>> since this is now a clear_refs-specific function? ?walk_page_range() will
>> always take clear_refs_walk, there's no need to pass it in.
>>
>
> Well, I did but I would have to either pass clear_refs_walk or mm and
> build the mm_walk entry inside clear_refs_walk_vma. ?Simply passing
> the clear_refs_walk structure seemed simpler and more logical than
> having to rebuild it inside the function every time it is called.
>
> The other alternative would be to just forgo the additional function
> clear_refs_walk_vma and rewrite the for loop as:
>
> ? ? ? ?for (vma = mm->mmap; vma; vma = vma->vm_next) {
> ? ? ? ? ? ? ? ? ? ? ? ?clear_refs_walk.private = vma;
> if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED)
> continue;
> ? ? ? ? ? ? ? ? ? ? ? ?if (is_vm_hugetlb_page(vma))
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?continue;
> ? ? ? ? ? ? ? ? ? ? ? ?if (type == CLEAR_REFS_ANON && vma->vm_file)
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?continue;
> ? ? ? ? ? ? ? ? ? ? ? ?if (type == CLEAR_REFS_MAPPED && !vma->vm_file)
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?continue;
> ? ? ? ? ? ? ? ? ? ? ? ?walk_page_range(vma->vm_start, vma->vm_end, this_walk);
> ? ? ? ?}
>
>
Apologies the checking of the range of values should be outside the for loop.
if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED)
return -EINVAl;
for (vma = mm->mmap; vma; vma = vma->vm_next) {
clear_refs_walk.private = vma;
if (is_vm_hugetlb_page(vma))
continue;
if (type == CLEAR_REFS_ANON && vma->vm_file)
continue;
if (type == CLEAR_REFS_MAPPED && !vma->vm_file)
continue;
walk_page_range(vma->vm_start, vma->vm_end, this_walk);
}
>
>>>
>>> ? ? ? /*
>>> ? ? ? ?* Writing 1 to /proc/pid/clear_refs clears all pages.
>>> ? ? ? ?* Writing 2 to /proc/pid/clear_refs clears Anonymous pages.
>>> ? ? ? ?* Writing 3 to /proc/pid/clear_refs clears file mapped pages.
>>> ? ? ? ?*/
>>> ? ? ? if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED)
>>> ? ? ? ? ? ? ? return;
>>> ? ? ? if (is_vm_hugetlb_page(vma))
>>> ? ? ? ? ? ? ? return;
>>> ? ? ? if (type == CLEAR_REFS_ANON && vma->vm_file)
>>> ? ? ? ? ? ? ? return;
>>> ? ? ? if (type == CLEAR_REFS_MAPPED && !vma->vm_file)
>>> ? ? ? ? ? ? ? return;
>>> ? ? ? walk_page_range(vma->vm_start, vma->vm_end, this_walk);
>>> }
>>>
>>>
>>> >> +
>>> >> ?static ssize_t clear_refs_write(struct file *file, const char __user * buf,
>>> >> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? size_t count, loff_t * ppos)
>>> >> ?{
>>> >> @@ -469,13 +490,15 @@
>>> >> ? ? ? char buffer[PROC_NUMBUF], *end;
>>> >> ? ? ? struct mm_struct *mm;
>>> >> ? ? ? struct vm_area_struct *vma;
>>> >> + ? ? int type;
>>> >>
>>> >> ? ? ? memset(buffer, 0, sizeof(buffer));
>>> >> ? ? ? if (count > sizeof(buffer) - 1)
>>> >> ? ? ? ? ? ? ? count = sizeof(buffer) - 1;
>>> >> ? ? ? if (copy_from_user(buffer, buf, count))
>>> >> ? ? ? ? ? ? ? return -EFAULT;
>>> >> - ? ? if (!simple_strtol(buffer, &end, 0))
>>> >> + ? ? type = strict_strtol(buffer, &end, 0);
>>> >> + ? ? if (!type)
>>> >> ? ? ? ? ? ? ? return -EINVAL;
>>> >> ? ? ? if (*end == '\n')
>>> >> ? ? ? ? ? ? ? end++;
>>> >> @@ -491,9 +514,7 @@
>>> >> ? ? ? ? ? ? ? down_read(&mm->mmap_sem);
>>> >> ? ? ? ? ? ? ? for (vma = mm->mmap; vma; vma = vma->vm_next) {
>>> >> ? ? ? ? ? ? ? ? ? ? ? clear_refs_walk.private = vma;
>>> >> - ? ? ? ? ? ? ? ? ? ? if (!is_vm_hugetlb_page(vma))
>>> >> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? walk_page_range(vma->vm_start, vma->vm_end,
>>> >> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? &clear_refs_walk);
>>> >> + ? ? ? ? ? ? ? ? ? ? walk_vma_area(&clear_refs_walk, vma, type);
>>> >> ? ? ? ? ? ? ? }
>>> >> ? ? ? ? ? ? ? flush_tlb_mm(mm);
>>> >> ? ? ? ? ? ? ? up_read(&mm->mmap_sem);
>>> >> --- a/Documentation/filesystems/proc.txt ? ? ?2009-07-20 17:29:11.000000000
>>> >> -0700
>>> >> +++ b/Documentation/filesystems/proc.txt ? ? ?2009-07-27 12:08:34.000000000
>>> >> -0700
>>> >> @@ -375,6 +375,13 @@
>>> >> ?This file is only present if the CONFIG_MMU kernel configuration option is
>>> >> ?enabled.
>>> >>
>>> >> +The clear_refs entry is used to reset the Referenced bits on virtual and physical
>>> >> +pages associated with a process.
>>> >> +echo 1 > /proc/PID/clear_refs clears all pages associated with the process
>>> >> +echo 2 > /proc/PID/clear_refs clears anonymous pages only
>>> >> +echo 3 > /proc/PID/clear_refs clears file mapped pages only
>>> >> +Any other value written to the proc entry will clear all pages.
>>> >> +
>>> >
>>> > Please follow the format in this document for how other /proc/PID/*
>>> > entries are described.
>>> >
>>> > That format could really be improved here, perhaps you could clean
>>> > proc.txt up a little bit while you're here?
>>> >
>>> >
>>>
>>> I am not sure what you mean by "clean" proc.txt, I did not detect much
>>> formatting in the PID proc enries description, beyond what I rewrote
>>> below:
>>>
>>
>> Right, the file is pretty sloppy, so I was wondering if you wanted to take
>> the time to clean it up a little so there's a more consistent style.
>>
>>>
>>> The /proc/PID/clear_refs is used to reset the Referenced bits on virtual and
>>
>> Probably better to say PG_referenced instead of Referenced.
>
> Okay, though it would have to also state that the pte bits are cleared as well.
>>
>>> physical pages associated with a process.
>>> To clear all pages associated with the process
>>> ? ? > echo 1 > /proc/PID/clear_refs
>>>
>>> To clear all anonymous pages associated with the process
>>> ? ? > echo 2 > /proc/PID/clear_refs
>>>
>>> To clear all file mapped pages associated with the process
>>> ? ? > echo 3 > /proc/PID/clear_refs
>>> Any other value written to /proc/PID/clear_refs will have no effect.
>>>
>>
>> You should probably say these all clear the bit instead of saying they
>> "clear pages," which doesn't make a lot of sense.
>
> You are correct.
>
On Mon, 27 Jul 2009, Moussa Ba wrote:
> The other alternative would be to just forgo the additional function
> clear_refs_walk_vma and rewrite the for loop as:
>
> for (vma = mm->mmap; vma; vma = vma->vm_next) {
> clear_refs_walk.private = vma;
> if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED)
> continue;
> if (is_vm_hugetlb_page(vma))
> continue;
> if (type == CLEAR_REFS_ANON && vma->vm_file)
> continue;
> if (type == CLEAR_REFS_MAPPED && !vma->vm_file)
> continue;
> walk_page_range(vma->vm_start, vma->vm_end, this_walk);
> }
>
That looks good, with the exception of the check for
type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED being in the loop.
On Mon, Jul 27, 2009 at 04:58:20PM -0700, David Rientjes wrote:
>On Mon, 27 Jul 2009, Moussa Ba wrote:
>
>> The other alternative would be to just forgo the additional function
>> clear_refs_walk_vma and rewrite the for loop as:
>>
>> for (vma = mm->mmap; vma; vma = vma->vm_next) {
>> clear_refs_walk.private = vma;
>> if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED)
>> continue;
>> if (is_vm_hugetlb_page(vma))
>> continue;
>> if (type == CLEAR_REFS_ANON && vma->vm_file)
>> continue;
>> if (type == CLEAR_REFS_MAPPED && !vma->vm_file)
>> continue;
>> walk_page_range(vma->vm_start, vma->vm_end, this_walk);
>> }
>>
>
>That looks good, with the exception of the check for
>type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED being in the loop.
And also that it looks like is_vm_hugetlb_page() is already checked
outside, no need to check it again. :-)
Thanks.
This patch adds anonymous and file backed filters to the clear_refs interface.
echo 1 > /proc/PID/clear_refs resets the bits on all pages
echo 2 > /proc/PID/clear_refs resets the bits on anonymous pages only
echo 3 > /proc/PID/clear_refs resets the bits on file backed pages only
Any other value is ignored.
---
Signed-off-by: Moussa A. Ba <[email protected]>
Signed-off-by: Jared E. Hulbert <[email protected]>
--- a/Documentation/filesystems/proc.txt 2009-07-20 17:29:11.000000000 -0700
+++ b/Documentation/filesystems/proc.txt 2009-07-27 16:58:05.000000000 -0700
@@ -375,6 +375,18 @@ of memory currently marked as referenced
This file is only present if the CONFIG_MMU kernel configuration option is
enabled.
+The /proc/PID/clear_refs is used to reset the PG_Referenced and ACCESSED/YOUNG
+bits on both physical and virtual pages associated with a process.
+To clear the bits for all the pages associated with the process
+ > echo 1 > /proc/PID/clear_refs
+
+To clear the bits for the anonymous pages associated with the process
+ > echo 2 > /proc/PID/clear_refs
+
+To clear the bits for the file backed pages associated with the process
+ > echo 3 > /proc/PID/clear_refs
+Any other value written to /proc/PID/clear_refs will have no effect.
+
1.2 Kernel data
---------------
--- a/fs/proc/task_mmu.c 2009-07-21 14:30:01.000000000 -0700
+++ b/fs/proc/task_mmu.c 2009-07-27 17:05:25.000000000 -0700
@@ -462,6 +462,10 @@ static int clear_refs_pte_range(pmd_t *
return 0;
}
+#define CLEAR_REFS_ALL 1
+#define CLEAR_REFS_ANON 2
+#define CLEAR_REFS_MAPPED 3
+
static ssize_t clear_refs_write(struct file *file, const char __user * buf,
size_t count, loff_t * ppos)
{
@@ -469,13 +473,15 @@ static ssize_t clear_refs_write(struct f
char buffer[PROC_NUMBUF], *end;
struct mm_struct *mm;
struct vm_area_struct *vma;
+ int type;
memset(buffer, 0, sizeof(buffer));
if (count > sizeof(buffer) - 1)
count = sizeof(buffer) - 1;
if (copy_from_user(buffer, buf, count))
return -EFAULT;
- if (!simple_strtol(buffer, &end, 0))
+ type = simple_strtol(buffer, &end, 0);
+ if (!type || type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED)
return -EINVAL;
if (*end == '\n')
end++;
@@ -491,9 +497,23 @@ static ssize_t clear_refs_write(struct f
down_read(&mm->mmap_sem);
for (vma = mm->mmap; vma; vma = vma->vm_next) {
clear_refs_walk.private = vma;
- if (!is_vm_hugetlb_page(vma))
- walk_page_range(vma->vm_start, vma->vm_end,
- &clear_refs_walk);
+ if (is_vm_hugetlb_page(vma))
+ continue;
+ /*
+ * Writing 1 to /proc/pid/clear_refs affects all pages.
+ *
+ * Writing 2 to /proc/pid/clear_refs only affects
+ * Anonymous pages.
+ *
+ * Writing 3 to /proc/pid/clear_refs only affects file
+ * backed pages.
+ */
+ if (type == CLEAR_REFS_ANON && vma->vm_file)
+ continue;
+ if (type == CLEAR_REFS_MAPPED && !vma->vm_file)
+ continue;
+ walk_page_range(vma->vm_start, vma->vm_end,
+ &clear_refs_walk);
}
flush_tlb_mm(mm);
up_read(&mm->mmap_sem);
On Tue, 28 Jul 2009, Moussa A. Ba wrote:
> --- a/fs/proc/task_mmu.c 2009-07-21 14:30:01.000000000 -0700
> +++ b/fs/proc/task_mmu.c 2009-07-27 17:05:25.000000000 -0700
> @@ -462,6 +462,10 @@ static int clear_refs_pte_range(pmd_t *
> return 0;
> }
>
> +#define CLEAR_REFS_ALL 1
> +#define CLEAR_REFS_ANON 2
> +#define CLEAR_REFS_MAPPED 3
> +
> static ssize_t clear_refs_write(struct file *file, const char __user * buf,
> size_t count, loff_t * ppos)
> {
> @@ -469,13 +473,15 @@ static ssize_t clear_refs_write(struct f
> char buffer[PROC_NUMBUF], *end;
> struct mm_struct *mm;
> struct vm_area_struct *vma;
> + int type;
>
> memset(buffer, 0, sizeof(buffer));
> if (count > sizeof(buffer) - 1)
> count = sizeof(buffer) - 1;
> if (copy_from_user(buffer, buf, count))
> return -EFAULT;
> - if (!simple_strtol(buffer, &end, 0))
> + type = simple_strtol(buffer, &end, 0);
> + if (!type || type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED)
> return -EINVAL;
> if (*end == '\n')
> end++;
The test for type < CLEAR_REFS_ALL covers !type.
This patch adds anonymous and file backed filters to the clear_refs interface.
echo 1 > /proc/PID/clear_refs resets the bits on all pages
echo 2 > /proc/PID/clear_refs resets the bits on anonymous pages only
echo 3 > /proc/PID/clear_refs resets the bits on file backed pages only
Any other value is ignored
---
Signed-off-by: Moussa A. Ba <[email protected]>
Signed-off-by: Jared E. Hulbert <[email protected]>
Acked-by: David Rientjes <[email protected]>
--- a/Documentation/filesystems/proc.txt 2009-07-20 17:29:11.000000000 -0700
+++ b/Documentation/filesystems/proc.txt 2009-07-27 16:58:05.000000000 -0700
@@ -375,6 +375,18 @@ of memory currently marked as referenced
This file is only present if the CONFIG_MMU kernel configuration option is
enabled.
+The /proc/PID/clear_refs is used to reset the PG_Referenced and ACCESSED/YOUNG
+bits on both physical and virtual pages associated with a process.
+To clear the bits for all the pages associated with the process
+ > echo 1 > /proc/PID/clear_refs
+
+To clear the bits for the anonymous pages associated with the process
+ > echo 2 > /proc/PID/clear_refs
+
+To clear the bits for the file mapped pages associated with the process
+ > echo 3 > /proc/PID/clear_refs
+Any other value written to /proc/PID/clear_refs will have no effect.
+
1.2 Kernel data
---------------
--- a/fs/proc/task_mmu.c 2009-07-21 14:30:01.000000000 -0700
+++ b/fs/proc/task_mmu.c 2009-07-27 17:05:25.000000000 -0700
@@ -462,6 +462,10 @@ static int clear_refs_pte_range(pmd_t *
return 0;
}
+#define CLEAR_REFS_ALL 1
+#define CLEAR_REFS_ANON 2
+#define CLEAR_REFS_MAPPED 3
+
static ssize_t clear_refs_write(struct file *file, const char __user * buf,
size_t count, loff_t * ppos)
{
@@ -469,13 +473,15 @@ static ssize_t clear_refs_write(struct f
char buffer[PROC_NUMBUF], *end;
struct mm_struct *mm;
struct vm_area_struct *vma;
+ int type;
memset(buffer, 0, sizeof(buffer));
if (count > sizeof(buffer) - 1)
count = sizeof(buffer) - 1;
if (copy_from_user(buffer, buf, count))
return -EFAULT;
- if (!simple_strtol(buffer, &end, 0))
+ type = simple_strtol(buffer, &end, 0);
+ if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED)
return -EINVAL;
if (*end == '\n')
end++;
@@ -491,9 +497,23 @@ static ssize_t clear_refs_write(struct f
down_read(&mm->mmap_sem);
for (vma = mm->mmap; vma; vma = vma->vm_next) {
clear_refs_walk.private = vma;
- if (!is_vm_hugetlb_page(vma))
- walk_page_range(vma->vm_start, vma->vm_end,
- &clear_refs_walk);
+ if (is_vm_hugetlb_page(vma))
+ continue;
+ /*
+ * Writing 1 to /proc/pid/clear_refs affects all pages.
+ *
+ * Writing 2 to /proc/pid/clear_refs only affects
+ * Anonymous pages.
+ *
+ * Writing 3 to /proc/pid/clear_refs only affects file
+ * mapped pages.
+ */
+ if (type == CLEAR_REFS_ANON && vma->vm_file)
+ continue;
+ if (type == CLEAR_REFS_MAPPED && !vma->vm_file)
+ continue;
+ walk_page_range(vma->vm_start, vma->vm_end,
+ &clear_refs_walk);
}
flush_tlb_mm(mm);
up_read(&mm->mmap_sem);
On Tue, 28 Jul 2009 15:52:05 -0700
"Moussa A. Ba" <[email protected]> wrote:
> This patch adds anonymous and file backed filters to the clear_refs interface.
> echo 1 > /proc/PID/clear_refs resets the bits on all pages
> echo 2 > /proc/PID/clear_refs resets the bits on anonymous pages only
> echo 3 > /proc/PID/clear_refs resets the bits on file backed pages only
> Any other value is ignored
The changelog is missing any rationale for making this change.
Originally we were told that it "makes the clear_refs proc interface a
bit more versatile", but that's a bit thin.
How do we justify making this change to Linux? What value does it
have? Can you describe a usage scenario which would help people
understand the usefulness of the change?
Thanks.
The patch makes the clear_refs more versatile in adding the option to
select anonymous pages or file backed pages for clearing. This
addition has a measurable impact on user space application performance
as it decreases the number of pagewalks in scenarios where one is only
interested in a specific type of page (anonymous or file mapped).
On Tue, Jul 28, 2009 at 4:52 PM, Andrew Morton<[email protected]> wrote:
> On Tue, 28 Jul 2009 15:52:05 -0700
> "Moussa A. Ba" <[email protected]> wrote:
>
>> This patch adds anonymous and file backed filters to the clear_refs interface.
>> echo 1 > /proc/PID/clear_refs resets the bits on all pages
>> echo 2 > /proc/PID/clear_refs resets the bits on anonymous pages only
>> echo 3 > /proc/PID/clear_refs resets the bits on file backed pages only
>> Any other value is ignored
>
> The changelog is missing any rationale for making this change.
> Originally we were told that it "makes the clear_refs proc interface a
> bit more versatile", but that's a bit thin.
>
> How do we justify making this change to Linux? ?What value does it
> have? ?Can you describe a usage scenario which would help people
> understand the usefulness of the change?
>
> Thanks.
>
On Tue, Jul 28, 2009 at 05:00:54PM -0700, Moussa Ba wrote:
> The patch makes the clear_refs more versatile in adding the option to
> select anonymous pages or file backed pages for clearing. This
> addition has a measurable impact on user space application performance
> as it decreases the number of pagewalks in scenarios where one is only
> interested in a specific type of page (anonymous or file mapped).
>
I think what Andrew might be looking for is a repeat of the use-case for
using clear_refs at all and what the additional usecase is that makes this
patch beneficial. To be honest, I had to go digging for a bit to find out
why clear_refs is used at all and the potential benefits of this patch were
initially unclear to me although they were obvious to others. I confess I
haven't read the patch closely to determine if it's behaving as advertised
or not.
Bonus points for a wee illustration of a script that measures the apparent
working set of a process using clear_refs, showing the working set for anon
vs filebacked and the difference of measuring just anon versus the full
process for example. Such a script could be added to Documentation/vm as
I didn't spot any example in there already.
Total aside, is there potential to really mess with LRU aging by abusing
clear_refs a lot, partcularly as clear_refs is writable by the process
owner? Does it matter?
>
> On Tue, Jul 28, 2009 at 4:52 PM, Andrew Morton<[email protected]> wrote:
> > On Tue, 28 Jul 2009 15:52:05 -0700
> > "Moussa A. Ba" <[email protected]> wrote:
> >
> >> This patch adds anonymous and file backed filters to the clear_refs interface.
> >> echo 1 > /proc/PID/clear_refs resets the bits on all pages
> >> echo 2 > /proc/PID/clear_refs resets the bits on anonymous pages only
> >> echo 3 > /proc/PID/clear_refs resets the bits on file backed pages only
> >> Any other value is ignored
> >
> > The changelog is missing any rationale for making this change.
> > Originally we were told that it "makes the clear_refs proc interface a
> > bit more versatile", but that's a bit thin.
> >
> > How do we justify making this change to Linux? ?What value does it
> > have? ?Can you describe a usage scenario which would help people
> > understand the usefulness of the change?
> >
> > Thanks.
> >
>
--
Mel Gorman
Part-time Phd Student Linux Technology Center
University of Limerick IBM Dublin Software Lab
On Wed, 2009-07-29 at 15:58 +0100, Mel Gorman wrote:
> Total aside, is there potential to really mess with LRU aging by abusing
> clear_refs a lot, partcularly as clear_refs is writable by the process
> owner? Does it matter?
Absolutely. However, it's generally the process owner who will suffer.
--
http://selenic.com : development and support for Mercurial and Linux
Mel,
here are two scripts that make use of the interface to "measure"
Referenced anon vs. mapped pages.
save the awk script to refs.awk and run the bash script as follows
./refscount PID secs
PID is the process you want to monitor while secs is the number of
seconds to sleep before clearing the refs. The output after x secs
is a the total number of refs before and immediately after clearing.
Here is an example run using firefox as I was writing this email.
/usr/lib/firefox-3.0.3/firefox
12:10:58 Mapped = 12856 kB Anon = 27896 kB
12:10:58 Mapped = 760 kB Anon = 4 kB
12:11:08 Mapped = 5884 kB Anon = 20340 kB
.
.
.
.
12:16:19 Mapped = 476 kB Anon = 0 kB
12:16:29 Mapped = 7016 kB Anon = 13128 kB
12:16:29 Mapped = 476 kB Anon = 0 kB
12:16:39 Mapped = 9276 kB Anon = 21588 kB
12:16:39 Mapped = 476 kB Anon = 0 kB
12:16:49 Mapped = 10288 kB Anon = 16000 kB
12:16:49 Mapped = 716 kB Anon = 12 kB
BEGIN BASH SCRIPT
--------------------------------
#!/bin/bash
echo `cat /proc/$1/cmdline`
#Intial state of the process
cat /proc/$1/smaps | awk -f refs.awk
while [ true ]
do
echo 1 > /proc/$1/clear_refs
# Immidiately after clearing
cat /proc/$1/smaps | awk -f refs.awk
sleep $2
# after x seconds
cat /proc/$1/smaps | awk -f refs.awk
done
---------------------------
END BASH SCRIPT
BEGIN AWK SCRIPT
-----------------------
BEGIN {refsanon = 0;refsmapped=0;}
{
if (NF == 6) {
if ($4 == "00:00")
type = 1;
else
type = 2;
}
if ($1 == "Referenced:") {
if (type == 1)
refsanon += $2;
if (type == 2)
refsmapped += $2;
type = 0;
}
}
END {printf("%s ",strftime("%H:%M:%S")); print "Mapped = " refsmapped
" kB Anon = " refsanon " kB";}
---------------------------
END AWK SCRIPT
On Wed, Jul 29, 2009 at 7:58 AM, Mel Gorman<[email protected]> wrote:
> On Tue, Jul 28, 2009 at 05:00:54PM -0700, Moussa Ba wrote:
>> The patch makes the clear_refs more versatile in adding the option to
>> select anonymous pages or file backed pages for clearing. ?This
>> addition has a measurable impact on user space application performance
>> as it decreases the number of pagewalks in scenarios where one is only
>> interested in a specific type of page (anonymous or file mapped).
>>
>
> I think what Andrew might be looking for is a repeat of the use-case for
> using clear_refs at all and what the additional usecase is that makes this
> patch beneficial. To be honest, I had to go digging for a bit to find out
> why clear_refs is used at all and the potential benefits of this patch were
> initially unclear to me although they were obvious to others. I confess I
> haven't read the patch closely to determine if it's behaving as advertised
> or not.
>
> Bonus points for a wee illustration of a script that measures the apparent
> working set of a process using clear_refs, showing the working set for anon
> vs filebacked and the difference of measuring just anon versus the full
> process for example. Such a script could be added to Documentation/vm as
> I didn't spot any example in there already.
>
> Total aside, is there potential to really mess with LRU aging by abusing
> clear_refs a lot, partcularly as clear_refs is writable by the process
> owner? Does it matter?
>
>>
>> On Tue, Jul 28, 2009 at 4:52 PM, Andrew Morton<[email protected]> wrote:
>> > On Tue, 28 Jul 2009 15:52:05 -0700
>> > "Moussa A. Ba" <[email protected]> wrote:
>> >
>> >> This patch adds anonymous and file backed filters to the clear_refs interface.
>> >> echo 1 > /proc/PID/clear_refs resets the bits on all pages
>> >> echo 2 > /proc/PID/clear_refs resets the bits on anonymous pages only
>> >> echo 3 > /proc/PID/clear_refs resets the bits on file backed pages only
>> >> Any other value is ignored
>> >
>> > The changelog is missing any rationale for making this change.
>> > Originally we were told that it "makes the clear_refs proc interface a
>> > bit more versatile", but that's a bit thin.
>> >
>> > How do we justify making this change to Linux? ?What value does it
>> > have? ?Can you describe a usage scenario which would help people
>> > understand the usefulness of the change?
>> >
>> > Thanks.
>> >
>>
>
> --
> Mel Gorman
> Part-time Phd Student ? ? ? ? ? ? ? ? ? ? ? ? ?Linux Technology Center
> University of Limerick ? ? ? ? ? ? ? ? ? ? ? ? IBM Dublin Software Lab
>