2022-08-15 04:30:56

by Kirill A. Shutemov

[permalink] [raw]
Subject: [PATCHv6 06/11] x86/mm: Provide arch_prctl() interface for LAM

Add a couple of arch_prctl() handles:

- ARCH_ENABLE_TAGGED_ADDR enabled LAM. The argument is required number
of tag bits. It is rounded up to the nearest LAM mode that can
provide it. For now only LAM_U57 is supported, with 6 tag bits.

- ARCH_GET_UNTAG_MASK returns untag mask. It can indicates where tag
bits located in the address.

- ARCH_GET_MAX_TAG_BITS returns the maximum tag bits user can request.
Zero if LAM is not supported.

Signed-off-by: Kirill A. Shutemov <[email protected]>
---
arch/x86/include/uapi/asm/prctl.h | 4 ++
arch/x86/kernel/process_64.c | 65 ++++++++++++++++++++++++++++++-
2 files changed, 68 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h
index 500b96e71f18..a31e27b95b19 100644
--- a/arch/x86/include/uapi/asm/prctl.h
+++ b/arch/x86/include/uapi/asm/prctl.h
@@ -20,4 +20,8 @@
#define ARCH_MAP_VDSO_32 0x2002
#define ARCH_MAP_VDSO_64 0x2003

+#define ARCH_GET_UNTAG_MASK 0x4001
+#define ARCH_ENABLE_TAGGED_ADDR 0x4002
+#define ARCH_GET_MAX_TAG_BITS 0x4003
+
#endif /* _ASM_X86_PRCTL_H */
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 1962008fe743..4f9f0f8ccd26 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -742,6 +742,60 @@ static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr)
}
#endif

+static void enable_lam_func(void *mm)
+{
+ struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
+ unsigned long lam_mask;
+ unsigned long cr3;
+
+ if (loaded_mm != mm)
+ return;
+
+ lam_mask = READ_ONCE(loaded_mm->context.lam_cr3_mask);
+
+ /* Update CR3 to get LAM active on the CPU */
+ cr3 = __read_cr3();
+ cr3 &= ~(X86_CR3_LAM_U48 | X86_CR3_LAM_U57);
+ cr3 |= lam_mask;
+ write_cr3(cr3);
+ set_tlbstate_cr3_lam_mask(lam_mask);
+}
+
+static int prctl_enable_tagged_addr(struct mm_struct *mm, unsigned long nr_bits)
+{
+ int ret = 0;
+
+ if (!cpu_feature_enabled(X86_FEATURE_LAM))
+ return -ENODEV;
+
+ mutex_lock(&mm->context.lock);
+
+ /* Already enabled? */
+ if (mm->context.lam_cr3_mask) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ if (!nr_bits) {
+ ret = -EINVAL;
+ goto out;
+ } else if (nr_bits <= 6) {
+ mm->context.lam_cr3_mask = X86_CR3_LAM_U57;
+ mm->context.untag_mask = ~GENMASK(62, 57);
+ } else {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* Make lam_cr3_mask and untag_mask visible on other CPUs */
+ smp_mb();
+
+ on_each_cpu_mask(mm_cpumask(mm), enable_lam_func, mm, true);
+out:
+ mutex_unlock(&mm->context.lock);
+ return ret;
+}
+
long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
{
int ret = 0;
@@ -829,7 +883,16 @@ long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
case ARCH_MAP_VDSO_64:
return prctl_map_vdso(&vdso_image_64, arg2);
#endif
-
+ case ARCH_GET_UNTAG_MASK:
+ return put_user(task->mm->context.untag_mask,
+ (unsigned long __user *)arg2);
+ case ARCH_ENABLE_TAGGED_ADDR:
+ return prctl_enable_tagged_addr(task->mm, arg2);
+ case ARCH_GET_MAX_TAG_BITS:
+ if (!cpu_feature_enabled(X86_FEATURE_LAM))
+ return put_user(0, (unsigned long __user *)arg2);
+ else
+ return put_user(6, (unsigned long __user *)arg2);
default:
ret = -EINVAL;
break;
--
2.35.1


2022-08-15 13:44:54

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCHv6 06/11] x86/mm: Provide arch_prctl() interface for LAM

On Mon, Aug 15, 2022 at 07:17:58AM +0300, Kirill A. Shutemov wrote:

> +static void enable_lam_func(void *mm)
> +{
> + struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
> + unsigned long lam_mask;
> + unsigned long cr3;
> +
> + if (loaded_mm != mm)
> + return;
> +
> + lam_mask = READ_ONCE(loaded_mm->context.lam_cr3_mask);
> +
> + /* Update CR3 to get LAM active on the CPU */
> + cr3 = __read_cr3();
> + cr3 &= ~(X86_CR3_LAM_U48 | X86_CR3_LAM_U57);
> + cr3 |= lam_mask;
> + write_cr3(cr3);
> + set_tlbstate_cr3_lam_mask(lam_mask);
> +}
> +
> +static int prctl_enable_tagged_addr(struct mm_struct *mm, unsigned long nr_bits)
> +{
> + int ret = 0;
> +
> + if (!cpu_feature_enabled(X86_FEATURE_LAM))
> + return -ENODEV;
> +
> + mutex_lock(&mm->context.lock);
> +
> + /* Already enabled? */
> + if (mm->context.lam_cr3_mask) {
> + ret = -EBUSY;
> + goto out;
> + }
> +
> + if (!nr_bits) {
> + ret = -EINVAL;
> + goto out;
> + } else if (nr_bits <= 6) {
> + mm->context.lam_cr3_mask = X86_CR3_LAM_U57;
> + mm->context.untag_mask = ~GENMASK(62, 57);
> + } else {
> + ret = -EINVAL;
> + goto out;
> + }
> +
> + /* Make lam_cr3_mask and untag_mask visible on other CPUs */
> + smp_mb();

smp_mb() doesn't make visible -- it is about ordering, what does it
order against that the below on_each_cpu_mask() doesn't already take
care of?

> +
> + on_each_cpu_mask(mm_cpumask(mm), enable_lam_func, mm, true);
> +out:
> + mutex_unlock(&mm->context.lock);
> + return ret;
> +}

2022-08-15 18:04:40

by Kirill A. Shutemov

[permalink] [raw]
Subject: Re: [PATCHv6 06/11] x86/mm: Provide arch_prctl() interface for LAM

On Mon, Aug 15, 2022 at 03:37:16PM +0200, Peter Zijlstra wrote:
> On Mon, Aug 15, 2022 at 07:17:58AM +0300, Kirill A. Shutemov wrote:
>
> > +static void enable_lam_func(void *mm)
> > +{
> > + struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
> > + unsigned long lam_mask;
> > + unsigned long cr3;
> > +
> > + if (loaded_mm != mm)
> > + return;
> > +
> > + lam_mask = READ_ONCE(loaded_mm->context.lam_cr3_mask);
> > +
> > + /* Update CR3 to get LAM active on the CPU */
> > + cr3 = __read_cr3();
> > + cr3 &= ~(X86_CR3_LAM_U48 | X86_CR3_LAM_U57);
> > + cr3 |= lam_mask;
> > + write_cr3(cr3);
> > + set_tlbstate_cr3_lam_mask(lam_mask);
> > +}
> > +
> > +static int prctl_enable_tagged_addr(struct mm_struct *mm, unsigned long nr_bits)
> > +{
> > + int ret = 0;
> > +
> > + if (!cpu_feature_enabled(X86_FEATURE_LAM))
> > + return -ENODEV;
> > +
> > + mutex_lock(&mm->context.lock);
> > +
> > + /* Already enabled? */
> > + if (mm->context.lam_cr3_mask) {
> > + ret = -EBUSY;
> > + goto out;
> > + }
> > +
> > + if (!nr_bits) {
> > + ret = -EINVAL;
> > + goto out;
> > + } else if (nr_bits <= 6) {
> > + mm->context.lam_cr3_mask = X86_CR3_LAM_U57;
> > + mm->context.untag_mask = ~GENMASK(62, 57);
> > + } else {
> > + ret = -EINVAL;
> > + goto out;
> > + }
> > +
> > + /* Make lam_cr3_mask and untag_mask visible on other CPUs */
> > + smp_mb();
>
> smp_mb() doesn't make visible -- it is about ordering, what does it
> order against that the below on_each_cpu_mask() doesn't already take
> care of?

You are right it is redundant. I will drop it.

--
Kiryl Shutsemau / Kirill A. Shutemov

2022-08-16 06:40:28

by Kirill A. Shutemov

[permalink] [raw]
Subject: [PATCHv6.1 06/11] x86/mm: Provide arch_prctl() interface for LAM

Add a couple of arch_prctl() handles:

- ARCH_ENABLE_TAGGED_ADDR enabled LAM. The argument is required number
of tag bits. It is rounded up to the nearest LAM mode that can
provide it. For now only LAM_U57 is supported, with 6 tag bits.

- ARCH_GET_UNTAG_MASK returns untag mask. It can indicates where tag
bits located in the address.

- ARCH_GET_MAX_TAG_BITS returns the maximum tag bits user can request.
Zero if LAM is not supported.

Signed-off-by: Kirill A. Shutemov <[email protected]>
---
v6.1:
- Drop redundant smb_mb() in prctl_enable_tagged_addr();
---
arch/x86/include/uapi/asm/prctl.h | 4 ++
arch/x86/kernel/process_64.c | 62 ++++++++++++++++++++++++++++++-
2 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h
index 500b96e71f18..a31e27b95b19 100644
--- a/arch/x86/include/uapi/asm/prctl.h
+++ b/arch/x86/include/uapi/asm/prctl.h
@@ -20,4 +20,8 @@
#define ARCH_MAP_VDSO_32 0x2002
#define ARCH_MAP_VDSO_64 0x2003

+#define ARCH_GET_UNTAG_MASK 0x4001
+#define ARCH_ENABLE_TAGGED_ADDR 0x4002
+#define ARCH_GET_MAX_TAG_BITS 0x4003
+
#endif /* _ASM_X86_PRCTL_H */
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 1962008fe743..28b9657ce2d0 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -742,6 +742,57 @@ static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr)
}
#endif

+static void enable_lam_func(void *mm)
+{
+ struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
+ unsigned long lam_mask;
+ unsigned long cr3;
+
+ if (loaded_mm != mm)
+ return;
+
+ lam_mask = READ_ONCE(loaded_mm->context.lam_cr3_mask);
+
+ /* Update CR3 to get LAM active on the CPU */
+ cr3 = __read_cr3();
+ cr3 &= ~(X86_CR3_LAM_U48 | X86_CR3_LAM_U57);
+ cr3 |= lam_mask;
+ write_cr3(cr3);
+ set_tlbstate_cr3_lam_mask(lam_mask);
+}
+
+static int prctl_enable_tagged_addr(struct mm_struct *mm, unsigned long nr_bits)
+{
+ int ret = 0;
+
+ if (!cpu_feature_enabled(X86_FEATURE_LAM))
+ return -ENODEV;
+
+ mutex_lock(&mm->context.lock);
+
+ /* Already enabled? */
+ if (mm->context.lam_cr3_mask) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ if (!nr_bits) {
+ ret = -EINVAL;
+ goto out;
+ } else if (nr_bits <= 6) {
+ mm->context.lam_cr3_mask = X86_CR3_LAM_U57;
+ mm->context.untag_mask = ~GENMASK(62, 57);
+ } else {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ on_each_cpu_mask(mm_cpumask(mm), enable_lam_func, mm, true);
+out:
+ mutex_unlock(&mm->context.lock);
+ return ret;
+}
+
long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
{
int ret = 0;
@@ -829,7 +880,16 @@ long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
case ARCH_MAP_VDSO_64:
return prctl_map_vdso(&vdso_image_64, arg2);
#endif
-
+ case ARCH_GET_UNTAG_MASK:
+ return put_user(task->mm->context.untag_mask,
+ (unsigned long __user *)arg2);
+ case ARCH_ENABLE_TAGGED_ADDR:
+ return prctl_enable_tagged_addr(task->mm, arg2);
+ case ARCH_GET_MAX_TAG_BITS:
+ if (!cpu_feature_enabled(X86_FEATURE_LAM))
+ return put_user(0, (unsigned long __user *)arg2);
+ else
+ return put_user(6, (unsigned long __user *)arg2);
default:
ret = -EINVAL;
break;
--
2.35.1

2022-08-22 09:56:53

by Alexander Potapenko

[permalink] [raw]
Subject: Re: [PATCHv6 06/11] x86/mm: Provide arch_prctl() interface for LAM

On Mon, Aug 15, 2022 at 6:15 AM Kirill A. Shutemov
<[email protected]> wrote:
>
> Add a couple of arch_prctl() handles:
>
> - ARCH_ENABLE_TAGGED_ADDR enabled LAM. The argument is required number
> of tag bits. It is rounded up to the nearest LAM mode that can
> provide it. For now only LAM_U57 is supported, with 6 tag bits.
>
> - ARCH_GET_UNTAG_MASK returns untag mask. It can indicates where tag
> bits located in the address.
>
> - ARCH_GET_MAX_TAG_BITS returns the maximum tag bits user can request.
> Zero if LAM is not supported.
>
> Signed-off-by: Kirill A. Shutemov <[email protected]>
Reviewed-by: Alexander Potapenko <[email protected]>

(with a nit, see below)

> +static int prctl_enable_tagged_addr(struct mm_struct *mm, unsigned long nr_bits)
> +{
> + int ret = 0;
> +
> + if (!cpu_feature_enabled(X86_FEATURE_LAM))
> + return -ENODEV;
> +
> + mutex_lock(&mm->context.lock);
> +
> + /* Already enabled? */
> + if (mm->context.lam_cr3_mask) {
> + ret = -EBUSY;
> + goto out;
> + }
> +
> + if (!nr_bits) {
> + ret = -EINVAL;
> + goto out;
> + } else if (nr_bits <= 6) {

Can you please make this 6 a #define?


> + return put_user(6, (unsigned long __user *)arg2);
... and use it at least here (could also express masks in terms of
this number, but maybe it's enough to just declare them in the same
header next to each other).

2022-08-23 17:22:24

by Alexander Potapenko

[permalink] [raw]
Subject: Re: [PATCHv6 06/11] x86/mm: Provide arch_prctl() interface for LAM

On Mon, Aug 22, 2022 at 11:32 AM Alexander Potapenko <[email protected]> wrote:
>
> On Mon, Aug 15, 2022 at 6:15 AM Kirill A. Shutemov
> <[email protected]> wrote:
> >
> > Add a couple of arch_prctl() handles:
> >
> > - ARCH_ENABLE_TAGGED_ADDR enabled LAM. The argument is required number
> > of tag bits. It is rounded up to the nearest LAM mode that can
> > provide it. For now only LAM_U57 is supported, with 6 tag bits.
> >
> > - ARCH_GET_UNTAG_MASK returns untag mask. It can indicates where tag
> > bits located in the address.
> >
> > - ARCH_GET_MAX_TAG_BITS returns the maximum tag bits user can request.
> > Zero if LAM is not supported.
> >
> > Signed-off-by: Kirill A. Shutemov <[email protected]>
> Reviewed-by: Alexander Potapenko <[email protected]>
And also:

Tested-by: Alexander Potapenko <[email protected]>