2019-05-14 13:18:37

by Oleksandr Natalenko

[permalink] [raw]
Subject: [PATCH RFC v2 3/4] mm/ksm: introduce force_madvise knob

Present a new sysfs knob to mark task's anonymous memory as mergeable.

To force merging task's VMAs, its PID is echoed in a write-only file:

# echo PID > /sys/kernel/mm/ksm/force_madvise

Force unmerging is done similarly, but with "minus" sign:

# echo -PID > /sys/kernel/mm/ksm/force_madvise

"0" or "-0" can be used to control the current task.

To achieve this, previously introduced ksm_enter()/ksm_leave() helpers
are used in the "store" handler.

Signed-off-by: Oleksandr Natalenko <[email protected]>
---
mm/ksm.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 68 insertions(+)

diff --git a/mm/ksm.c b/mm/ksm.c
index e9f3901168bb..22c59fb03d3a 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -2879,10 +2879,77 @@ static void wait_while_offlining(void)

#define KSM_ATTR_RO(_name) \
static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
+#define KSM_ATTR_WO(_name) \
+ static struct kobj_attribute _name##_attr = __ATTR_WO(_name)
#define KSM_ATTR(_name) \
static struct kobj_attribute _name##_attr = \
__ATTR(_name, 0644, _name##_show, _name##_store)

+static ssize_t force_madvise_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ int err;
+ pid_t pid;
+ bool merge = true;
+ struct task_struct *tsk;
+ struct mm_struct *mm;
+ struct vm_area_struct *vma;
+
+ err = kstrtoint(buf, 10, &pid);
+ if (err)
+ return -EINVAL;
+
+ if (pid < 0) {
+ pid = abs(pid);
+ merge = false;
+ }
+
+ if (!pid && *buf == '-')
+ merge = false;
+
+ rcu_read_lock();
+ if (pid) {
+ tsk = find_task_by_vpid(pid);
+ if (!tsk) {
+ err = -ESRCH;
+ rcu_read_unlock();
+ goto out;
+ }
+ } else {
+ tsk = current;
+ }
+
+ tsk = tsk->group_leader;
+
+ get_task_struct(tsk);
+ rcu_read_unlock();
+
+ mm = get_task_mm(tsk);
+ if (!mm) {
+ err = -EINVAL;
+ goto out_put_task_struct;
+ }
+ down_write(&mm->mmap_sem);
+ vma = mm->mmap;
+ while (vma) {
+ if (merge)
+ ksm_enter(vma->vm_mm, vma, &vma->vm_flags);
+ else
+ ksm_leave(vma, vma->vm_start, vma->vm_end, &vma->vm_flags);
+ vma = vma->vm_next;
+ }
+ up_write(&mm->mmap_sem);
+ mmput(mm);
+
+out_put_task_struct:
+ put_task_struct(tsk);
+
+out:
+ return err ? err : count;
+}
+KSM_ATTR_WO(force_madvise);
+
static ssize_t sleep_millisecs_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
@@ -3185,6 +3252,7 @@ static ssize_t full_scans_show(struct kobject *kobj,
KSM_ATTR_RO(full_scans);

static struct attribute *ksm_attrs[] = {
+ &force_madvise_attr.attr,
&sleep_millisecs_attr.attr,
&pages_to_scan_attr.attr,
&run_attr.attr,
--
2.21.0


2019-05-14 13:25:35

by Aaron Tomlin

[permalink] [raw]
Subject: Re: [PATCH RFC v2 3/4] mm/ksm: introduce force_madvise knob

On Tue 2019-05-14 15:16 +0200, Oleksandr Natalenko wrote:
> Present a new sysfs knob to mark task's anonymous memory as mergeable.
>
> To force merging task's VMAs, its PID is echoed in a write-only file:
>
> # echo PID > /sys/kernel/mm/ksm/force_madvise
>
> Force unmerging is done similarly, but with "minus" sign:
>
> # echo -PID > /sys/kernel/mm/ksm/force_madvise
>
> "0" or "-0" can be used to control the current task.
>
> To achieve this, previously introduced ksm_enter()/ksm_leave() helpers
> are used in the "store" handler.
>
> Signed-off-by: Oleksandr Natalenko <[email protected]>
> ---
> mm/ksm.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 68 insertions(+)
>
> diff --git a/mm/ksm.c b/mm/ksm.c
> index e9f3901168bb..22c59fb03d3a 100644
> --- a/mm/ksm.c
> +++ b/mm/ksm.c
> @@ -2879,10 +2879,77 @@ static void wait_while_offlining(void)
>
> #define KSM_ATTR_RO(_name) \
> static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
> +#define KSM_ATTR_WO(_name) \
> + static struct kobj_attribute _name##_attr = __ATTR_WO(_name)
> #define KSM_ATTR(_name) \
> static struct kobj_attribute _name##_attr = \
> __ATTR(_name, 0644, _name##_show, _name##_store)
>
> +static ssize_t force_madvise_store(struct kobject *kobj,
> + struct kobj_attribute *attr,
> + const char *buf, size_t count)
> +{
> + int err;
> + pid_t pid;
> + bool merge = true;
> + struct task_struct *tsk;
> + struct mm_struct *mm;
> + struct vm_area_struct *vma;
> +
> + err = kstrtoint(buf, 10, &pid);
> + if (err)
> + return -EINVAL;
> +
> + if (pid < 0) {
> + pid = abs(pid);
> + merge = false;
> + }
> +
> + if (!pid && *buf == '-')
> + merge = false;
> +
> + rcu_read_lock();
> + if (pid) {
> + tsk = find_task_by_vpid(pid);
> + if (!tsk) {
> + err = -ESRCH;
> + rcu_read_unlock();
> + goto out;
> + }
> + } else {
> + tsk = current;
> + }
> +
> + tsk = tsk->group_leader;
> +
> + get_task_struct(tsk);
> + rcu_read_unlock();
> +
> + mm = get_task_mm(tsk);
> + if (!mm) {
> + err = -EINVAL;
> + goto out_put_task_struct;
> + }
> + down_write(&mm->mmap_sem);
> + vma = mm->mmap;
> + while (vma) {
> + if (merge)
> + ksm_enter(vma->vm_mm, vma, &vma->vm_flags);
> + else
> + ksm_leave(vma, vma->vm_start, vma->vm_end, &vma->vm_flags);
> + vma = vma->vm_next;
> + }
> + up_write(&mm->mmap_sem);
> + mmput(mm);
> +
> +out_put_task_struct:
> + put_task_struct(tsk);
> +
> +out:
> + return err ? err : count;
> +}
> +KSM_ATTR_WO(force_madvise);
> +
> static ssize_t sleep_millisecs_show(struct kobject *kobj,
> struct kobj_attribute *attr, char *buf)
> {
> @@ -3185,6 +3252,7 @@ static ssize_t full_scans_show(struct kobject *kobj,
> KSM_ATTR_RO(full_scans);
>
> static struct attribute *ksm_attrs[] = {
> + &force_madvise_attr.attr,
> &sleep_millisecs_attr.attr,
> &pages_to_scan_attr.attr,
> &run_attr.attr,

Looks fine to me.

Reviewed-by: Aaron Tomlin <[email protected]>

--
Aaron Tomlin

2019-05-15 00:51:26

by Timofey Titovets

[permalink] [raw]
Subject: Re: [PATCH RFC v2 3/4] mm/ksm: introduce force_madvise knob

LGTM

Reviewed-by: Timofey Titovets <[email protected]>

вт, 14 мая 2019 г. в 16:22, Aaron Tomlin <[email protected]>:
>
> On Tue 2019-05-14 15:16 +0200, Oleksandr Natalenko wrote:
> > Present a new sysfs knob to mark task's anonymous memory as mergeable.
> >
> > To force merging task's VMAs, its PID is echoed in a write-only file:
> >
> > # echo PID > /sys/kernel/mm/ksm/force_madvise
> >
> > Force unmerging is done similarly, but with "minus" sign:
> >
> > # echo -PID > /sys/kernel/mm/ksm/force_madvise
> >
> > "0" or "-0" can be used to control the current task.
> >
> > To achieve this, previously introduced ksm_enter()/ksm_leave() helpers
> > are used in the "store" handler.
> >
> > Signed-off-by: Oleksandr Natalenko <[email protected]>
> > ---
> > mm/ksm.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> > 1 file changed, 68 insertions(+)
> >
> > diff --git a/mm/ksm.c b/mm/ksm.c
> > index e9f3901168bb..22c59fb03d3a 100644
> > --- a/mm/ksm.c
> > +++ b/mm/ksm.c
> > @@ -2879,10 +2879,77 @@ static void wait_while_offlining(void)
> >
> > #define KSM_ATTR_RO(_name) \
> > static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
> > +#define KSM_ATTR_WO(_name) \
> > + static struct kobj_attribute _name##_attr = __ATTR_WO(_name)
> > #define KSM_ATTR(_name) \
> > static struct kobj_attribute _name##_attr = \
> > __ATTR(_name, 0644, _name##_show, _name##_store)
> >
> > +static ssize_t force_madvise_store(struct kobject *kobj,
> > + struct kobj_attribute *attr,
> > + const char *buf, size_t count)
> > +{
> > + int err;
> > + pid_t pid;
> > + bool merge = true;
> > + struct task_struct *tsk;
> > + struct mm_struct *mm;
> > + struct vm_area_struct *vma;
> > +
> > + err = kstrtoint(buf, 10, &pid);
> > + if (err)
> > + return -EINVAL;
> > +
> > + if (pid < 0) {
> > + pid = abs(pid);
> > + merge = false;
> > + }
> > +
> > + if (!pid && *buf == '-')
> > + merge = false;
> > +
> > + rcu_read_lock();
> > + if (pid) {
> > + tsk = find_task_by_vpid(pid);
> > + if (!tsk) {
> > + err = -ESRCH;
> > + rcu_read_unlock();
> > + goto out;
> > + }
> > + } else {
> > + tsk = current;
> > + }
> > +
> > + tsk = tsk->group_leader;
> > +
> > + get_task_struct(tsk);
> > + rcu_read_unlock();
> > +
> > + mm = get_task_mm(tsk);
> > + if (!mm) {
> > + err = -EINVAL;
> > + goto out_put_task_struct;
> > + }
> > + down_write(&mm->mmap_sem);
> > + vma = mm->mmap;
> > + while (vma) {
> > + if (merge)
> > + ksm_enter(vma->vm_mm, vma, &vma->vm_flags);
> > + else
> > + ksm_leave(vma, vma->vm_start, vma->vm_end, &vma->vm_flags);
> > + vma = vma->vm_next;
> > + }
> > + up_write(&mm->mmap_sem);
> > + mmput(mm);
> > +
> > +out_put_task_struct:
> > + put_task_struct(tsk);
> > +
> > +out:
> > + return err ? err : count;
> > +}
> > +KSM_ATTR_WO(force_madvise);
> > +
> > static ssize_t sleep_millisecs_show(struct kobject *kobj,
> > struct kobj_attribute *attr, char *buf)
> > {
> > @@ -3185,6 +3252,7 @@ static ssize_t full_scans_show(struct kobject *kobj,
> > KSM_ATTR_RO(full_scans);
> >
> > static struct attribute *ksm_attrs[] = {
> > + &force_madvise_attr.attr,
> > &sleep_millisecs_attr.attr,
> > &pages_to_scan_attr.attr,
> > &run_attr.attr,
>
> Looks fine to me.
>
> Reviewed-by: Aaron Tomlin <[email protected]>
>
> --
> Aaron Tomlin



--
Have a nice day,
Timofey.