2023-11-26 03:09:22

by Rong Tao

[permalink] [raw]
Subject: [PATCH v2] prctl: Get private anonymous memory region name

From: Rong Tao <[email protected]>

In commit 9a10064f5625 ("mm: add a field to store names for private anony-
mous memory") add PR_SET_VMA options and PR_SET_VMA_ANON_NAME for the prctl
system call, then the PR_GET_VMA interface should be provided accordingly,
which is necessary, as the userspace program usually wants to know what
VMA name it has configured for the anonymous page.

Userspace can set the name for a region of memory by calling:

prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, start, len, (unsigned long)name);

Then, Userspace can get the name of a memory region by calling:

char buf[80];
prctl(PR_GET_VMA, PR_GET_VMA_ANON_NAME, start, buf, 0);

Changes for prctl(2) manual page (in the options section):

PR_GET_VMA
Gets an attribute specified in arg2 for virtual memory areas
starting from the address specified in arg3 and spanning the
size specified in arg4. arg5 specifies the value of the attribute
to be set.

Currently, arg2 must be one of:

PR_GET_VMA_ANON_NAME
Get name of anonymous virtual memory areas. arg4 should be
a buffer in the user's program, and the size of the buffer
should not be less than 80 bytes, otherwise it is possible
that the prctl return will fail due to a copy failure
(unless you know the length of the name you set through
the PR_SET_VMA_ANON_NAME).

This feature is available only if the kernel is built with
the CONFIG_ANON_VMA_NAME option enabled.

Signed-off-by: Rong Tao <[email protected]>
---
v2: Simplify code implementation.
v1: https://lore.kernel.org/all/[email protected]/
---
include/linux/mm.h | 7 +++++++
include/uapi/linux/prctl.h | 3 +++
kernel/sys.c | 39 ++++++++++++++++++++++++++++++++++++++
mm/madvise.c | 15 +++++++++++++++
4 files changed, 64 insertions(+)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 418d26608ece..f7c242f1bceb 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4106,6 +4106,8 @@ static inline int seal_check_write(int seals, struct vm_area_struct *vma)
}

#ifdef CONFIG_ANON_VMA_NAME
+struct anon_vma_name *madvise_get_anon_name(struct mm_struct *mm,
+ unsigned long start);
int madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
unsigned long len_in,
struct anon_vma_name *anon_name);
@@ -4115,6 +4117,11 @@ madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
unsigned long len_in, struct anon_vma_name *anon_name) {
return 0;
}
+static inline
+struct anon_vma_name *madvise_get_anon_name(struct mm_struct *mm,
+ unsigned long start) {
+ return NULL;
+}
#endif

#ifdef CONFIG_UNACCEPTED_MEMORY
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 370ed14b1ae0..8ba0016d77de 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -291,6 +291,9 @@ struct prctl_mm_map {
#define PR_SET_VMA 0x53564d41
# define PR_SET_VMA_ANON_NAME 0

+#define PR_GET_VMA 0x53564d42
+# define PR_GET_VMA_ANON_NAME 0
+
#define PR_GET_AUXV 0x41555856

#define PR_SET_MEMORY_MERGE 67
diff --git a/kernel/sys.c b/kernel/sys.c
index e219fcfa112d..b1cbcb276e1a 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2359,12 +2359,48 @@ static int prctl_set_vma(unsigned long opt, unsigned long addr,
return error;
}

+static int prctl_get_vma(unsigned long opt, unsigned long addr,
+ unsigned long buf, unsigned long arg)
+{
+ struct mm_struct *mm = current->mm;
+ char __user *u_buf;
+ int error;
+
+ switch (opt) {
+ case PR_GET_VMA_ANON_NAME:
+ struct anon_vma_name *anon_name = NULL;
+
+ u_buf = (char __user *)buf;
+ error = 0;
+
+ mmap_read_lock(mm);
+ anon_name = madvise_get_anon_name(mm, addr);
+
+ if (!anon_name || copy_to_user(u_buf, anon_name->name,
+ strlen(anon_name->name) + 1))
+ error = -EFAULT;
+
+ mmap_read_unlock(mm);
+ anon_vma_name_put(anon_name);
+ break;
+ default:
+ error = -EINVAL;
+ }
+ return error;
+}
+
#else /* CONFIG_ANON_VMA_NAME */
static int prctl_set_vma(unsigned long opt, unsigned long start,
unsigned long size, unsigned long arg)
{
return -EINVAL;
}
+
+static int prctl_get_vma(unsigned long opt, unsigned long start,
+ unsigned long u_buf, unsigned long arg)
+{
+ return -EINVAL;
+}
#endif /* CONFIG_ANON_VMA_NAME */

static inline unsigned long get_current_mdwe(void)
@@ -2712,6 +2748,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
case PR_SET_VMA:
error = prctl_set_vma(arg2, arg3, arg4, arg5);
break;
+ case PR_GET_VMA:
+ error = prctl_get_vma(arg2, arg3, arg4, arg5);
+ break;
case PR_GET_AUXV:
if (arg4 || arg5)
return -EINVAL;
diff --git a/mm/madvise.c b/mm/madvise.c
index cf4d694280e9..bad7b4167d2c 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -1287,6 +1287,21 @@ static int madvise_vma_anon_name(struct vm_area_struct *vma,
return error;
}

+struct anon_vma_name *madvise_get_anon_name(struct mm_struct *mm,
+ unsigned long start)
+{
+ struct vm_area_struct *vma;
+ struct anon_vma_name *anon_name = NULL;
+
+ vma = find_vma(mm, start);
+ if (vma) {
+ anon_name = anon_vma_name(vma);
+ anon_vma_name_get(anon_name);
+ }
+
+ return anon_name;
+}
+
int madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
unsigned long len_in, struct anon_vma_name *anon_name)
{
--
2.43.0


2023-11-26 13:58:24

by Oleg Nesterov

[permalink] [raw]
Subject: Re: [PATCH v2] prctl: Get private anonymous memory region name

On 11/26, Rong Tao wrote:
>
> then the PR_GET_VMA interface should be provided accordingly,
> which is necessary, as the userspace program usually wants to know what
> VMA name it has configured for the anonymous page.

I don't really understand the use-case for PR_GET_VMA ...

But the patch looks reasonable and correct to me.

Reviewed-by: Oleg Nesterov <[email protected]>

2023-11-27 14:53:16

by David Hildenbrand

[permalink] [raw]
Subject: Re: [PATCH v2] prctl: Get private anonymous memory region name

On 26.11.23 14:56, Oleg Nesterov wrote:
> On 11/26, Rong Tao wrote:
>>
>> then the PR_GET_VMA interface should be provided accordingly,
>> which is necessary, as the userspace program usually wants to know what
>> VMA name it has configured for the anonymous page.
>
> I don't really understand the use-case for PR_GET_VMA ...
>

Can't we simply read "/proc/PID/maps" and just have that information
from there?

Also, I don't understand the exact use case, that should be clarified --
especially, why the existing way is insufficient.

--
Cheers,

David / dhildenb

2023-11-28 01:03:07

by Rong Tao

[permalink] [raw]
Subject: Re: [PATCH v2] prctl: Get private anonymous memory region name


On 11/27/23 22:52, David Hildenbrand wrote:
> On 26.11.23 14:56, Oleg Nesterov wrote:
>> On 11/26, Rong Tao wrote:
>>>
>>> then the PR_GET_VMA interface should be provided accordingly,
>>> which is necessary, as the userspace program usually wants to know what
>>> VMA name it has configured for the anonymous page.
>>
>> I don't really understand the use-case for PR_GET_VMA ...
>>
>
> Can't we simply read "/proc/PID/maps" and just have that information
> from there?


Thank you, David.

The relationship between PR_GET_VMA and /proc/PID/maps is like the
relationship between

PR_GET_NAME and /proc/PID/comm. Obviously, both methods can obtain the
corresponding

name. However, prctl(2) can be obtained directly from the code level,
while reading proc is not

so convenient and efficient. Moreover, reading proc is more like bash
command line, rather

than C code.


>
> Also, I don't understand the exact use case, that should be clarified
> -- especially, why the existing way is insufficient.
>

For use-case, in fact, I now want to develop a user-mode patch tool and
need to map the patch file to the target

process(Use ptrace(2) and pread/pwrite("/proc/self/mem")). I initially
used shared files

    00400000-00401000 r--p 00000000 08:00 241933181
/home/sda/git-repos/upatch/tests/hello/hello
    00401000-00402000 r-xp 00001000 08:00 241933181
/home/sda/git-repos/upatch/tests/hello/hello
    00402000-00403000 r--p 00002000 08:00 241933181
/home/sda/git-repos/upatch/tests/hello/hello
    00403000-00404000 r--p 00002000 08:00 241933181
/home/sda/git-repos/upatch/tests/hello/hello
    00404000-00405000 rw-p 00003000 08:00 241933181
/home/sda/git-repos/upatch/tests/hello/hello
    01136000-01157000 rw-p 00000000 00:00 0 [heap]
    7f21472c0000-7f21472c2000 rw-p 00000000 00:00 0
    7f21472c2000-7f21472e8000 r--p 00000000 103:03 3705
/usr/lib64/libc.so.6
    7f21472e8000-7f2147448000 r-xp 00026000 103:03 3705
/usr/lib64/libc.so.6
    7f2147448000-7f2147496000 r--p 00186000 103:03 3705
/usr/lib64/libc.so.6
    7f2147496000-7f214749a000 r--p 001d3000 103:03 3705
/usr/lib64/libc.so.6
    7f214749a000-7f214749c000 rw-p 001d7000 103:03 3705
/usr/lib64/libc.so.6
    7f214749c000-7f21474a6000 rw-p 00000000 00:00 0
    7f21474be000-7f21474bf000 rwxs 00000000 00:27 7794
/tmp/upatch/62984/map_files/patch-FKSYTp   <<
    7f21474bf000-7f21474c0000 rwxs 00000000 00:27 7793
/tmp/upatch/62984/map_files/patch-KFaQNU <<
    7f21474c0000-7f21474c1000 r--p 00000000 103:03 3702
/usr/lib64/ld-linux-x86-64.so.2
    7f21474c1000-7f21474e8000 r-xp 00001000 103:03 3702
/usr/lib64/ld-linux-x86-64.so.2
    7f21474e8000-7f21474f2000 r--p 00028000 103:03 3702
/usr/lib64/ld-linux-x86-64.so.2
    7f21474f2000-7f21474f4000 r--p 00031000 103:03 3702
/usr/lib64/ld-linux-x86-64.so.2
    7f21474f4000-7f21474f6000 rw-p 00033000 103:03 3702
/usr/lib64/ld-linux-x86-64.so.2
    7ffec158b000-7ffec15ad000 rw-p 00000000 00:00 0 [stack]
    7ffec15cf000-7ffec15d3000 r--p 00000000 00:00 0 [vvar]
    7ffec15d3000-7ffec15d5000 r-xp 00000000 00:00 0 [vdso]
    ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]

However, this is obviously not the best approach, I want to use
anonymous pages instead,

    7f21474be000-7f21474bf000 rwxp 00000000 00:27 7794 [anon:patch1]
    7f21474bf000-7f21474c0000 rwxp 00000000 00:27 7793 [anon:patch2]

I hope to use the address to directly obtain the vma name, which is
"patch1". This is very convenient

in the program without parsing /proc/PID/maps in the source code.

Thanks again.

Rong Tao