This is required to support time namespaces where a time namespace data
page is different for each namespace.
Reviewed-by: Vincenzo Frascino <[email protected]>
Reviewed-by: Dmitry Safonov <[email protected]>
Signed-off-by: Andrei Vagin <[email protected]>
---
arch/arm64/kernel/vdso.c | 24 +++++++++++++++---------
1 file changed, 15 insertions(+), 9 deletions(-)
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 4e016574bd91..b0aec4e8c9b4 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -108,28 +108,32 @@ static int __vdso_init(enum vdso_abi abi)
PAGE_SHIFT;
/* Allocate the vDSO pagelist, plus a page for the data. */
- vdso_pagelist = kcalloc(vdso_info[abi].vdso_pages + 1,
+ vdso_pagelist = kcalloc(vdso_info[abi].vdso_pages,
sizeof(struct page *),
GFP_KERNEL);
if (vdso_pagelist == NULL)
return -ENOMEM;
- /* Grab the vDSO data page. */
- vdso_pagelist[0] = phys_to_page(__pa_symbol(vdso_data));
-
-
/* Grab the vDSO code pages. */
pfn = sym_to_pfn(vdso_info[abi].vdso_code_start);
for (i = 0; i < vdso_info[abi].vdso_pages; i++)
- vdso_pagelist[i + 1] = pfn_to_page(pfn + i);
+ vdso_pagelist[i] = pfn_to_page(pfn + i);
- vdso_info[abi].dm->pages = &vdso_pagelist[0];
- vdso_info[abi].cm->pages = &vdso_pagelist[1];
+ vdso_info[abi].cm->pages = vdso_pagelist;
return 0;
}
+static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
+ struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ if (vmf->pgoff == 0)
+ return vmf_insert_pfn(vma, vmf->address,
+ sym_to_pfn(vdso_data));
+ return VM_FAULT_SIGBUS;
+}
+
static int __setup_additional_pages(enum vdso_abi abi,
struct mm_struct *mm,
struct linux_binprm *bprm,
@@ -150,7 +154,7 @@ static int __setup_additional_pages(enum vdso_abi abi,
}
ret = _install_special_mapping(mm, vdso_base, PAGE_SIZE,
- VM_READ|VM_MAYREAD,
+ VM_READ|VM_MAYREAD|VM_PFNMAP,
vdso_info[abi].dm);
if (IS_ERR(ret))
goto up_fail;
@@ -209,6 +213,7 @@ static struct vm_special_mapping aarch32_vdso_maps[] = {
#ifdef CONFIG_COMPAT_VDSO
[AA32_MAP_VVAR] = {
.name = "[vvar]",
+ .fault = vvar_fault,
},
[AA32_MAP_VDSO] = {
.name = "[vdso]",
@@ -376,6 +381,7 @@ enum aarch64_map {
static struct vm_special_mapping aarch64_vdso_maps[] __ro_after_init = {
[AA64_MAP_VVAR] = {
.name = "[vvar]",
+ .fault = vvar_fault,
},
[AA64_MAP_VDSO] = {
.name = "[vdso]",
--
2.24.1
On Tue, Jun 16, 2020 at 12:55:40AM -0700, Andrei Vagin wrote:
> This is required to support time namespaces where a time namespace data
> page is different for each namespace.
Can you please give a bit more of an introduction to the changes here?
As-is, this doesn't give reviewers the necessary context to understand
the change, nor does it justify it.
Ideally, a commit message for this should look something like:
| Currently the vdso has no awareness of time namespaces, which may
| apply distinct offsets to processes in different namespaces. To handle
| this within the vdso, we'll need to expose a per-namespace data page.
|
| As a preparatory step, this patch separates the vdso data page from
| the code pages, and has it faulted in via its own fault callback.
| Subsquent patches will extend this to support distinct pages per time
| namespace.
Otherwise, I have a few minor comments below.
> Reviewed-by: Vincenzo Frascino <[email protected]>
> Reviewed-by: Dmitry Safonov <[email protected]>
> Signed-off-by: Andrei Vagin <[email protected]>
> ---
> arch/arm64/kernel/vdso.c | 24 +++++++++++++++---------
> 1 file changed, 15 insertions(+), 9 deletions(-)
>
> diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
> index 4e016574bd91..b0aec4e8c9b4 100644
> --- a/arch/arm64/kernel/vdso.c
> +++ b/arch/arm64/kernel/vdso.c
> @@ -108,28 +108,32 @@ static int __vdso_init(enum vdso_abi abi)
> PAGE_SHIFT;
>
> /* Allocate the vDSO pagelist, plus a page for the data. */
> - vdso_pagelist = kcalloc(vdso_info[abi].vdso_pages + 1,
> + vdso_pagelist = kcalloc(vdso_info[abi].vdso_pages,
> sizeof(struct page *),
> GFP_KERNEL);
The comment above is now stale. Can you please update it, or (event
better) delete it entirely?
> if (vdso_pagelist == NULL)
> return -ENOMEM;
>
> - /* Grab the vDSO data page. */
> - vdso_pagelist[0] = phys_to_page(__pa_symbol(vdso_data));
> -
> -
> /* Grab the vDSO code pages. */
> pfn = sym_to_pfn(vdso_info[abi].vdso_code_start);
>
> for (i = 0; i < vdso_info[abi].vdso_pages; i++)
> - vdso_pagelist[i + 1] = pfn_to_page(pfn + i);
> + vdso_pagelist[i] = pfn_to_page(pfn + i);
>
> - vdso_info[abi].dm->pages = &vdso_pagelist[0];
> - vdso_info[abi].cm->pages = &vdso_pagelist[1];
> + vdso_info[abi].cm->pages = vdso_pagelist;
>
> return 0;
> }
>
> +static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
> + struct vm_area_struct *vma, struct vm_fault *vmf)
> +{
> + if (vmf->pgoff == 0)
> + return vmf_insert_pfn(vma, vmf->address,
> + sym_to_pfn(vdso_data));
> + return VM_FAULT_SIGBUS;
> +}
This might look better as:
| if (vmf->pgoff != 0)
| return VM_FAULT_SIGBUS;
|
| return vmf_insert_pfn(vma, vmf->address, sym_to_pfn(vdso_data));
> +
> static int __setup_additional_pages(enum vdso_abi abi,
> struct mm_struct *mm,
> struct linux_binprm *bprm,
> @@ -150,7 +154,7 @@ static int __setup_additional_pages(enum vdso_abi abi,
> }
>
> ret = _install_special_mapping(mm, vdso_base, PAGE_SIZE,
> - VM_READ|VM_MAYREAD,
> + VM_READ|VM_MAYREAD|VM_PFNMAP,
This change needs to be explained in the commit message. WHy is it
necessary, and why only so for the data page?
Thanks,
Mark.
> vdso_info[abi].dm);
> if (IS_ERR(ret))
> goto up_fail;
> @@ -209,6 +213,7 @@ static struct vm_special_mapping aarch32_vdso_maps[] = {
> #ifdef CONFIG_COMPAT_VDSO
> [AA32_MAP_VVAR] = {
> .name = "[vvar]",
> + .fault = vvar_fault,
> },
> [AA32_MAP_VDSO] = {
> .name = "[vdso]",
> @@ -376,6 +381,7 @@ enum aarch64_map {
> static struct vm_special_mapping aarch64_vdso_maps[] __ro_after_init = {
> [AA64_MAP_VVAR] = {
> .name = "[vvar]",
> + .fault = vvar_fault,
> },
> [AA64_MAP_VDSO] = {
> .name = "[vdso]",
> --
> 2.24.1
>
On Tue, Jun 16, 2020 at 11:59:20AM +0100, Mark Rutland wrote:
> On Tue, Jun 16, 2020 at 12:55:40AM -0700, Andrei Vagin wrote:
> > This is required to support time namespaces where a time namespace data
> > page is different for each namespace.
>
> Can you please give a bit more of an introduction to the changes here?
> As-is, this doesn't give reviewers the necessary context to understand
> the change, nor does it justify it.
>
> Ideally, a commit message for this should look something like:
Mark, thank you for the review. I will write more details message and
fix this patch according with your other comments.
>
> | Currently the vdso has no awareness of time namespaces, which may
> | apply distinct offsets to processes in different namespaces. To handle
> | this within the vdso, we'll need to expose a per-namespace data page.
> |
> | As a preparatory step, this patch separates the vdso data page from
> | the code pages, and has it faulted in via its own fault callback.
> | Subsquent patches will extend this to support distinct pages per time
> | namespace.
>
> Otherwise, I have a few minor comments below.
>
> > Reviewed-by: Vincenzo Frascino <[email protected]>
> > Reviewed-by: Dmitry Safonov <[email protected]>
> > Signed-off-by: Andrei Vagin <[email protected]>
> > ---
> > arch/arm64/kernel/vdso.c | 24 +++++++++++++++---------
> > 1 file changed, 15 insertions(+), 9 deletions(-)
> >
> > diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
> > index 4e016574bd91..b0aec4e8c9b4 100644
> > --- a/arch/arm64/kernel/vdso.c
> > +++ b/arch/arm64/kernel/vdso.c
> > @@ -108,28 +108,32 @@ static int __vdso_init(enum vdso_abi abi)
> > PAGE_SHIFT;
> >
> > /* Allocate the vDSO pagelist, plus a page for the data. */
> > - vdso_pagelist = kcalloc(vdso_info[abi].vdso_pages + 1,
> > + vdso_pagelist = kcalloc(vdso_info[abi].vdso_pages,
> > sizeof(struct page *),
> > GFP_KERNEL);
>
> The comment above is now stale. Can you please update it, or (event
> better) delete it entirely?
I will delete it.
>
> > if (vdso_pagelist == NULL)
> > return -ENOMEM;
> >
> > - /* Grab the vDSO data page. */
> > - vdso_pagelist[0] = phys_to_page(__pa_symbol(vdso_data));
> > -
> > -
> > /* Grab the vDSO code pages. */
> > pfn = sym_to_pfn(vdso_info[abi].vdso_code_start);
> >
> > for (i = 0; i < vdso_info[abi].vdso_pages; i++)
> > - vdso_pagelist[i + 1] = pfn_to_page(pfn + i);
> > + vdso_pagelist[i] = pfn_to_page(pfn + i);
> >
> > - vdso_info[abi].dm->pages = &vdso_pagelist[0];
> > - vdso_info[abi].cm->pages = &vdso_pagelist[1];
> > + vdso_info[abi].cm->pages = vdso_pagelist;
> >
> > return 0;
> > }
> >
> > +static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
> > + struct vm_area_struct *vma, struct vm_fault *vmf)
> > +{
> > + if (vmf->pgoff == 0)
> > + return vmf_insert_pfn(vma, vmf->address,
> > + sym_to_pfn(vdso_data));
> > + return VM_FAULT_SIGBUS;
> > +}
>
> This might look better as:
In the next patch, this function will handle more pages and this form
will look better.
>
> | if (vmf->pgoff != 0)
> | return VM_FAULT_SIGBUS;
> |
> | return vmf_insert_pfn(vma, vmf->address, sym_to_pfn(vdso_data));
>
> > +
> > static int __setup_additional_pages(enum vdso_abi abi,
> > struct mm_struct *mm,
> > struct linux_binprm *bprm,
> > @@ -150,7 +154,7 @@ static int __setup_additional_pages(enum vdso_abi abi,
> > }
> >
> > ret = _install_special_mapping(mm, vdso_base, PAGE_SIZE,
> > - VM_READ|VM_MAYREAD,
> > + VM_READ|VM_MAYREAD|VM_PFNMAP,
>
> This change needs to be explained in the commit message. WHy is it
> necessary, and why only so for the data page?
I will update the commit message. VM_PFNMAP is required to handle faults
from a vma fault callback. In this case, it is vvar_fault.
>
> Thanks,
> Mark.
>
> > vdso_info[abi].dm);
> > if (IS_ERR(ret))
> > goto up_fail;
> > @@ -209,6 +213,7 @@ static struct vm_special_mapping aarch32_vdso_maps[] = {
> > #ifdef CONFIG_COMPAT_VDSO
> > [AA32_MAP_VVAR] = {
> > .name = "[vvar]",
> > + .fault = vvar_fault,
> > },
> > [AA32_MAP_VDSO] = {
> > .name = "[vdso]",
> > @@ -376,6 +381,7 @@ enum aarch64_map {
> > static struct vm_special_mapping aarch64_vdso_maps[] __ro_after_init = {
> > [AA64_MAP_VVAR] = {
> > .name = "[vvar]",
> > + .fault = vvar_fault,
> > },
> > [AA64_MAP_VDSO] = {
> > .name = "[vdso]",
> > --
> > 2.24.1
> >