2022-11-15 16:21:45

by Kristen Carlson Accardi

[permalink] [raw]
Subject: [PATCH v2] x86/sgx: Replace kmap/kunmap_atomic calls

kmap_local_page() is the preferred way to create temporary mappings
when it is feasible, because the mappings are thread-local and
CPU-local. kmap_local_page() uses per-task maps rather than per-CPU
maps. This in effect removes the need to preemption in the
local CPU while kmap is active, and thus vastly reduces overall
system latency. It is also valid to take pagefaults.

The use of kmap_atomic() in the SGX code was not an explicit design
choice to disable page faults or preemption, and there is no compelling
design reason to using kmap_atomic() vs. kmap_local_page().

Link: https://lore.kernel.org/linux-sgx/Y0biN3%[email protected]/

For more information on the use of kmap_local_page() vs.
kmap_atomic(), please see Documentation/mm/highmem.rst

Signed-off-by: Kristen Carlson Accardi <[email protected]>
---
Changes since V1:

- Reword commit message to make it more clear why it is preferred
to use kmap_local_page() vs. kmap_atomic().

arch/x86/kernel/cpu/sgx/encl.c | 12 ++++++------
arch/x86/kernel/cpu/sgx/ioctl.c | 4 ++--
arch/x86/kernel/cpu/sgx/main.c | 8 ++++----
3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
index 2c258255a629..68f8b18d2278 100644
--- a/arch/x86/kernel/cpu/sgx/encl.c
+++ b/arch/x86/kernel/cpu/sgx/encl.c
@@ -160,8 +160,8 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
return ret;

pginfo.addr = encl_page->desc & PAGE_MASK;
- pginfo.contents = (unsigned long)kmap_atomic(b.contents);
- pcmd_page = kmap_atomic(b.pcmd);
+ pginfo.contents = (unsigned long)kmap_local_page(b.contents);
+ pcmd_page = kmap_local_page(b.pcmd);
pginfo.metadata = (unsigned long)pcmd_page + b.pcmd_offset;

if (secs_page)
@@ -187,8 +187,8 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
*/
pcmd_page_empty = !memchr_inv(pcmd_page, 0, PAGE_SIZE);

- kunmap_atomic(pcmd_page);
- kunmap_atomic((void *)(unsigned long)pginfo.contents);
+ kunmap_local(pcmd_page);
+ kunmap_local((void *)(unsigned long)pginfo.contents);

get_page(b.pcmd);
sgx_encl_put_backing(&b);
@@ -197,10 +197,10 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,

if (pcmd_page_empty && !reclaimer_writing_to_pcmd(encl, pcmd_first_page)) {
sgx_encl_truncate_backing_page(encl, PFN_DOWN(page_pcmd_off));
- pcmd_page = kmap_atomic(b.pcmd);
+ pcmd_page = kmap_local_page(b.pcmd);
if (memchr_inv(pcmd_page, 0, PAGE_SIZE))
pr_warn("PCMD page not empty after truncate.\n");
- kunmap_atomic(pcmd_page);
+ kunmap_local(pcmd_page);
}

put_page(b.pcmd);
diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
index ef874828fa6b..03c50f11ad75 100644
--- a/arch/x86/kernel/cpu/sgx/ioctl.c
+++ b/arch/x86/kernel/cpu/sgx/ioctl.c
@@ -221,11 +221,11 @@ static int __sgx_encl_add_page(struct sgx_encl *encl,
pginfo.secs = (unsigned long)sgx_get_epc_virt_addr(encl->secs.epc_page);
pginfo.addr = encl_page->desc & PAGE_MASK;
pginfo.metadata = (unsigned long)secinfo;
- pginfo.contents = (unsigned long)kmap_atomic(src_page);
+ pginfo.contents = (unsigned long)kmap_local_page(src_page);

ret = __eadd(&pginfo, sgx_get_epc_virt_addr(epc_page));

- kunmap_atomic((void *)pginfo.contents);
+ kunmap_local((void *)pginfo.contents);
put_page(src_page);

return ret ? -EIO : 0;
diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
index 0aad028f04d4..e5a37b6e9aa5 100644
--- a/arch/x86/kernel/cpu/sgx/main.c
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -165,17 +165,17 @@ static int __sgx_encl_ewb(struct sgx_epc_page *epc_page, void *va_slot,
pginfo.addr = 0;
pginfo.secs = 0;

- pginfo.contents = (unsigned long)kmap_atomic(backing->contents);
- pginfo.metadata = (unsigned long)kmap_atomic(backing->pcmd) +
+ pginfo.contents = (unsigned long)kmap_local_page(backing->contents);
+ pginfo.metadata = (unsigned long)kmap_local_page(backing->pcmd) +
backing->pcmd_offset;

ret = __ewb(&pginfo, sgx_get_epc_virt_addr(epc_page), va_slot);
set_page_dirty(backing->pcmd);
set_page_dirty(backing->contents);

- kunmap_atomic((void *)(unsigned long)(pginfo.metadata -
+ kunmap_local((void *)(unsigned long)(pginfo.metadata -
backing->pcmd_offset));
- kunmap_atomic((void *)(unsigned long)pginfo.contents);
+ kunmap_local((void *)(unsigned long)pginfo.contents);

return ret;
}
--
2.38.1



2022-11-16 00:18:12

by Jarkko Sakkinen

[permalink] [raw]
Subject: Re: [PATCH v2] x86/sgx: Replace kmap/kunmap_atomic calls

On Tue, Nov 15, 2022 at 08:16:26AM -0800, Kristen Carlson Accardi wrote:
> kmap_local_page() is the preferred way to create temporary mappings
> when it is feasible, because the mappings are thread-local and
> CPU-local. kmap_local_page() uses per-task maps rather than per-CPU
> maps. This in effect removes the need to preemption in the
> local CPU while kmap is active, and thus vastly reduces overall
> system latency. It is also valid to take pagefaults.
>
> The use of kmap_atomic() in the SGX code was not an explicit design
> choice to disable page faults or preemption, and there is no compelling
> design reason to using kmap_atomic() vs. kmap_local_page().
>
> Link: https://lore.kernel.org/linux-sgx/Y0biN3%[email protected]/
>
> For more information on the use of kmap_local_page() vs.
> kmap_atomic(), please see Documentation/mm/highmem.rst
>
> Signed-off-by: Kristen Carlson Accardi <[email protected]>
> ---
> Changes since V1:
>
> - Reword commit message to make it more clear why it is preferred
> to use kmap_local_page() vs. kmap_atomic().
>
> arch/x86/kernel/cpu/sgx/encl.c | 12 ++++++------
> arch/x86/kernel/cpu/sgx/ioctl.c | 4 ++--
> arch/x86/kernel/cpu/sgx/main.c | 8 ++++----
> 3 files changed, 12 insertions(+), 12 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
> index 2c258255a629..68f8b18d2278 100644
> --- a/arch/x86/kernel/cpu/sgx/encl.c
> +++ b/arch/x86/kernel/cpu/sgx/encl.c
> @@ -160,8 +160,8 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
> return ret;
>
> pginfo.addr = encl_page->desc & PAGE_MASK;
> - pginfo.contents = (unsigned long)kmap_atomic(b.contents);
> - pcmd_page = kmap_atomic(b.pcmd);
> + pginfo.contents = (unsigned long)kmap_local_page(b.contents);
> + pcmd_page = kmap_local_page(b.pcmd);
> pginfo.metadata = (unsigned long)pcmd_page + b.pcmd_offset;
>
> if (secs_page)
> @@ -187,8 +187,8 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
> */
> pcmd_page_empty = !memchr_inv(pcmd_page, 0, PAGE_SIZE);
>
> - kunmap_atomic(pcmd_page);
> - kunmap_atomic((void *)(unsigned long)pginfo.contents);
> + kunmap_local(pcmd_page);
> + kunmap_local((void *)(unsigned long)pginfo.contents);
>
> get_page(b.pcmd);
> sgx_encl_put_backing(&b);
> @@ -197,10 +197,10 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
>
> if (pcmd_page_empty && !reclaimer_writing_to_pcmd(encl, pcmd_first_page)) {
> sgx_encl_truncate_backing_page(encl, PFN_DOWN(page_pcmd_off));
> - pcmd_page = kmap_atomic(b.pcmd);
> + pcmd_page = kmap_local_page(b.pcmd);
> if (memchr_inv(pcmd_page, 0, PAGE_SIZE))
> pr_warn("PCMD page not empty after truncate.\n");
> - kunmap_atomic(pcmd_page);
> + kunmap_local(pcmd_page);
> }
>
> put_page(b.pcmd);
> diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
> index ef874828fa6b..03c50f11ad75 100644
> --- a/arch/x86/kernel/cpu/sgx/ioctl.c
> +++ b/arch/x86/kernel/cpu/sgx/ioctl.c
> @@ -221,11 +221,11 @@ static int __sgx_encl_add_page(struct sgx_encl *encl,
> pginfo.secs = (unsigned long)sgx_get_epc_virt_addr(encl->secs.epc_page);
> pginfo.addr = encl_page->desc & PAGE_MASK;
> pginfo.metadata = (unsigned long)secinfo;
> - pginfo.contents = (unsigned long)kmap_atomic(src_page);
> + pginfo.contents = (unsigned long)kmap_local_page(src_page);
>
> ret = __eadd(&pginfo, sgx_get_epc_virt_addr(epc_page));
>
> - kunmap_atomic((void *)pginfo.contents);
> + kunmap_local((void *)pginfo.contents);
> put_page(src_page);
>
> return ret ? -EIO : 0;
> diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
> index 0aad028f04d4..e5a37b6e9aa5 100644
> --- a/arch/x86/kernel/cpu/sgx/main.c
> +++ b/arch/x86/kernel/cpu/sgx/main.c
> @@ -165,17 +165,17 @@ static int __sgx_encl_ewb(struct sgx_epc_page *epc_page, void *va_slot,
> pginfo.addr = 0;
> pginfo.secs = 0;
>
> - pginfo.contents = (unsigned long)kmap_atomic(backing->contents);
> - pginfo.metadata = (unsigned long)kmap_atomic(backing->pcmd) +
> + pginfo.contents = (unsigned long)kmap_local_page(backing->contents);
> + pginfo.metadata = (unsigned long)kmap_local_page(backing->pcmd) +
> backing->pcmd_offset;
>
> ret = __ewb(&pginfo, sgx_get_epc_virt_addr(epc_page), va_slot);
> set_page_dirty(backing->pcmd);
> set_page_dirty(backing->contents);
>
> - kunmap_atomic((void *)(unsigned long)(pginfo.metadata -
> + kunmap_local((void *)(unsigned long)(pginfo.metadata -
> backing->pcmd_offset));
> - kunmap_atomic((void *)(unsigned long)pginfo.contents);
> + kunmap_local((void *)(unsigned long)pginfo.contents);
>
> return ret;
> }
> --
> 2.38.1
>

Reviewed-by: Jarkko Sakkinen <[email protected]>

BR, Jarkko

2022-11-16 11:28:48

by Fabio M. De Francesco

[permalink] [raw]
Subject: Re: [PATCH v2] x86/sgx: Replace kmap/kunmap_atomic calls

On martedì 15 novembre 2022 17:16:26 CET Kristen Carlson Accardi wrote:
> kmap_local_page() is the preferred way to create temporary mappings
> when it is feasible, because the mappings are thread-local and
> CPU-local.

The atomic mappings are thread and CPU local too.

Instead, please note that the most important difference is that
kmap_local_page() won't ever disable (1) pagefaults and (2) (depending on
PREEMPT_RT) preemption.

To make it clearer: I think that your conversions are correct and welcome. I'm
only asking for clarifications about how you show the reasons why they are
good.

> kmap_local_page() uses per-task maps rather than per-CPU
> maps. This in effect removes the need to preemption in the
> local CPU while kmap is active, and thus vastly reduces overall
> system latency.

I'm also experiencing difficulties with the paragraph above. What do you mean
by "This in effect removes the need to preemption in the local CPU while kmap
is active"? I understand that for me, being a non native English speaker, some
sentences may appear weird even in cases where they are perfectly fine.

I can't see why you are expressing the way you are, since kmap_local_page()
does not ever disable preemption.

Let's focus to the call sites you are converting: you know that the code
between kmap_atomic() and kunmap_atomic() can run in non atomic context, thus
there are no real reason to rely on any kmap_atomic() side effects (please
look above again, if necessary).

Therefore, the real good reason for converting are (1) avoid unnecessary
disabling of page faults and (2) avoid the probable disabling of preemption.

> local CPU while kmap is active

> It is also valid to take pagefaults.
>
> The use of kmap_atomic() in the SGX code was not an explicit design
> choice to disable page faults or preemption, and there is no compelling
> design reason to using kmap_atomic() vs. kmap_local_page().

This is at the core of the reasons why you are converting, that is to avoid
the potential overhead (in 32 bit architectures) of switching in atomic
context where it is not required.

> Link: https://lore.kernel.org/linux-sgx/Y0biN3%[email protected]/
>
> For more information on the use of kmap_local_page() vs.
> kmap_atomic(), please see Documentation/mm/highmem.rst
>
> Signed-off-by: Kristen Carlson Accardi <[email protected]>
> ---
> Changes since V1:
>
> - Reword commit message to make it more clear why it is preferred
> to use kmap_local_page() vs. kmap_atomic().
>
> arch/x86/kernel/cpu/sgx/encl.c | 12 ++++++------
> arch/x86/kernel/cpu/sgx/ioctl.c | 4 ++--
> arch/x86/kernel/cpu/sgx/main.c | 8 ++++----
> 3 files changed, 12 insertions(+), 12 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
> index 2c258255a629..68f8b18d2278 100644
> --- a/arch/x86/kernel/cpu/sgx/encl.c
> +++ b/arch/x86/kernel/cpu/sgx/encl.c
> @@ -160,8 +160,8 @@ static int __sgx_encl_eldu(struct sgx_encl_page
> *encl_page, return ret;
>
> pginfo.addr = encl_page->desc & PAGE_MASK;
> - pginfo.contents = (unsigned long)kmap_atomic(b.contents);
> - pcmd_page = kmap_atomic(b.pcmd);
> + pginfo.contents = (unsigned long)kmap_local_page(b.contents);
> + pcmd_page = kmap_local_page(b.pcmd);
> pginfo.metadata = (unsigned long)pcmd_page + b.pcmd_offset;
>
> if (secs_page)
> @@ -187,8 +187,8 @@ static int __sgx_encl_eldu(struct sgx_encl_page
> *encl_page, */
> pcmd_page_empty = !memchr_inv(pcmd_page, 0, PAGE_SIZE);
>
> - kunmap_atomic(pcmd_page);
> - kunmap_atomic((void *)(unsigned long)pginfo.contents);
> + kunmap_local(pcmd_page);
> + kunmap_local((void *)(unsigned long)pginfo.contents);
>
> get_page(b.pcmd);
> sgx_encl_put_backing(&b);
> @@ -197,10 +197,10 @@ static int __sgx_encl_eldu(struct sgx_encl_page
> *encl_page,
>
> if (pcmd_page_empty && !reclaimer_writing_to_pcmd(encl,
pcmd_first_page)) {
> sgx_encl_truncate_backing_page(encl,
PFN_DOWN(page_pcmd_off));
> - pcmd_page = kmap_atomic(b.pcmd);
> + pcmd_page = kmap_local_page(b.pcmd);
> if (memchr_inv(pcmd_page, 0, PAGE_SIZE))
> pr_warn("PCMD page not empty after truncate.
\n");
> - kunmap_atomic(pcmd_page);
> + kunmap_local(pcmd_page);
> }
>
> put_page(b.pcmd);
> diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/
ioctl.c
> index ef874828fa6b..03c50f11ad75 100644
> --- a/arch/x86/kernel/cpu/sgx/ioctl.c
> +++ b/arch/x86/kernel/cpu/sgx/ioctl.c
> @@ -221,11 +221,11 @@ static int __sgx_encl_add_page(struct sgx_encl *encl,
> pginfo.secs = (unsigned long)sgx_get_epc_virt_addr(encl-
>secs.epc_page);
> pginfo.addr = encl_page->desc & PAGE_MASK;
> pginfo.metadata = (unsigned long)secinfo;
> - pginfo.contents = (unsigned long)kmap_atomic(src_page);
> + pginfo.contents = (unsigned long)kmap_local_page(src_page);
>
> ret = __eadd(&pginfo, sgx_get_epc_virt_addr(epc_page));
>
> - kunmap_atomic((void *)pginfo.contents);
> + kunmap_local((void *)pginfo.contents);
> put_page(src_page);
>
> return ret ? -EIO : 0;
> diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
> index 0aad028f04d4..e5a37b6e9aa5 100644
> --- a/arch/x86/kernel/cpu/sgx/main.c
> +++ b/arch/x86/kernel/cpu/sgx/main.c
> @@ -165,17 +165,17 @@ static int __sgx_encl_ewb(struct sgx_epc_page
*epc_page,
> void *va_slot, pginfo.addr = 0;
> pginfo.secs = 0;
>
> - pginfo.contents = (unsigned long)kmap_atomic(backing->contents);
> - pginfo.metadata = (unsigned long)kmap_atomic(backing->pcmd) +
> + pginfo.contents = (unsigned long)kmap_local_page(backing->contents);
> + pginfo.metadata = (unsigned long)kmap_local_page(backing->pcmd) +
> backing->pcmd_offset;
>
> ret = __ewb(&pginfo, sgx_get_epc_virt_addr(epc_page), va_slot);
> set_page_dirty(backing->pcmd);
> set_page_dirty(backing->contents);
>
> - kunmap_atomic((void *)(unsigned long)(pginfo.metadata -
> + kunmap_local((void *)(unsigned long)(pginfo.metadata -
> backing-
>pcmd_offset));

> - kunmap_atomic((void *)(unsigned long)pginfo.contents);
> + kunmap_local((void *)(unsigned long)pginfo.contents);
>
> return ret;
> }
> --
> 2.38.1

2022-11-16 13:29:29

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH v2] x86/sgx: Replace kmap/kunmap_atomic calls

On Wed, Nov 16 2022 at 11:16, Fabio M. De Francesco wrote:
> On martedì 15 novembre 2022 17:16:26 CET Kristen Carlson Accardi wrote:
>> The use of kmap_atomic() in the SGX code was not an explicit design
>> choice to disable page faults or preemption, and there is no compelling
>> design reason to using kmap_atomic() vs. kmap_local_page().
>
> This is at the core of the reasons why you are converting, that is to avoid
> the potential overhead (in 32 bit architectures) of switching in atomic
> context where it is not required.

No. The point is that kmap_atomic() is an historical artifact of 32bit
HIGHMEM support.

The back then chosen implementation was to disable preemption and
pagefaults and use a temporary per CPU mapping. Disabling preemption was
required to protect the temporary mapping against a context switch.

Disabling pagefaults was an implicit side effect of disabling
preemption. The separation of preempt_disable() and pagefault_disable()
happened way later.

On 64bit and on 32bit systems with CONFIG_HIGHMEM=n this is not required
at all because the pages are already in the direct map.

That means support for 32bit highmem forces code to accomodate with the
preemption disabled section, where in the most cases this is absolutely
not required. That results often in suboptimal and horrible code:

again:
kmap_atomic();
remaining = copy_to_user_inatomic();
kunmap_atomic();
if (remaining) {
if (try_to_handle_fault())
goto again;
ret = -EFAULT;
}

instead of:

kmap_local();
ret = copy_to_user();
kunmap_local();

It obsiously does not allow to sleep or take sleeping locks in the
kmap_atomic() section which puts further restrictions on code just to
accomodate 32bit highmem.

So a few years ago we implemented kmap_local() and related interfaces to
replace kmap_atomic() completely, but we could not do a scripted
wholesale conversion because there are a few code pathes which rely on
the implicit preemption disable and of course something like the above
monstrosity needs manual conversion.

kmap_local() puts a penalty exclusively on 32bit highmem systems. The
temporary mapping is still strict per CPU, which is guaranteed by an
implicit migrate_disable(), and it is context switched in case of
[un]voluntary scheduling.

On plain 32bit and 64bit systems kmap_local() is pretty much a NOP. All
it does is to return the page address. It does not disable migration in
that case either. kunmap_local() is a complete NOP.

The goal is to eliminate _all_ use cases of kmap_atomic*() and replace
them with kmap_local*(). This is a prerequisite for system protection
keys and other things.

Thanks,

tglx









2022-11-16 14:25:40

by Fabio M. De Francesco

[permalink] [raw]
Subject: Re: [PATCH v2] x86/sgx: Replace kmap/kunmap_atomic calls

On mercoledì 16 novembre 2022 12:58:41 CET Thomas Gleixner wrote:
> On Wed, Nov 16 2022 at 11:16, Fabio M. De Francesco wrote:
> > On martedì 15 novembre 2022 17:16:26 CET Kristen Carlson Accardi wrote:
> >> The use of kmap_atomic() in the SGX code was not an explicit design
> >> choice to disable page faults or preemption, and there is no compelling
> >> design reason to using kmap_atomic() vs. kmap_local_page().
> >
> > This is at the core of the reasons why you are converting, that is to
avoid
> > the potential overhead (in 32 bit architectures) of switching in atomic
> > context where it is not required.
>
> No. The point is that kmap_atomic() is an historical artifact of 32bit
> HIGHMEM support.

I just saw that the last part of my email is missing. In fact it's enough
clear that what I was saying was still incomplete and that no signature was
visible.

I meant that, before we had kmap_local_page(), the choice was between kmap()
and kmap_atomic().

Despite I suppose that nobody should rely on those kmap_atomic() side effects.
I have been observing that a large part of the users of the Highmem API used
to call kmap_atomic() for its implicit pagefault_disable() and
preempt_disable() side effects and so switching in atomic context only between
mappings and unmappings. Others used to call the atomic variant of kmap() just
because they wanted to default to it, instead of calling kmap().

Since 2019 we have kmap_local_page() and kmap() / kmap_atomic() have been
recently deprecated but we can't know why the author of a piece of code
decided to use kmap_atomic()...

1) Are we already in atomic context when kmap_atomic() is invoked?
2) Do we call kmap_atomic() instead of kmap() only to switch from non atomic
to atomic context between the mapping the unmapping (although I stress that we
shouldn't ever rely on these side effects)?

I think that who converts should tell whether or not we must explicitly, for
example, disable page faults and/or preemption along with converting to
kmap_local_page().

This is why I think that Kristen did well with the inspection of what happens
in those sections in between.

I expressed this concept in a bad way and furthermore the lines below that
sentence disappeared, making the whole much more confusing.

I think that later I'm saying a large part of what you are detailing because I
had to learn all these things for reworking highmem.rst and the kdocs of the
relevant highmem-related files.

The new documentation is upstream since months and you've been Cd'ed. If
anything is wrong please help me to correct what I wrote or do it yourself if
you prefer that other way.

> The back then chosen implementation was to disable preemption and
> pagefaults and use a temporary per CPU mapping. Disabling preemption was
> required to protect the temporary mapping against a context switch.
>
> Disabling pagefaults was an implicit side effect of disabling
> preemption. The separation of preempt_disable() and pagefault_disable()
> happened way later.
>
> On 64bit and on 32bit systems with CONFIG_HIGHMEM=n this is not required
> at all because the pages are already in the direct map.

Unfortunately, this is one of the things that were missing at the end of my
previous email. I'm aware that with HIGHMEM=n all these kmap_local_page() are
plain page_address(). I wrote this too in my email (and in the documentation)
but it disappeared when I sent my message.

> That means support for 32bit highmem forces code to accomodate with the
> preemption disabled section, where in the most cases this is absolutely
> not required.

This is the core: Kristen knows that the code between mapping / unmapping does
not need preemption and page faults disabling. Therefore kmap_local_page()
will work with no need to run in atomic. We all agree to the necessity to
convert but sometimes we are not sure about how to convert. Otherwise a script
would be enough to convert to kmap_local_page(). :-)

> That results often in suboptimal and horrible code:
>
> again:
> kmap_atomic();
> remaining = copy_to_user_inatomic();
> kunmap_atomic();
> if (remaining) {
> if (try_to_handle_fault())
> goto again;
> ret = -EFAULT;
> }
>
> instead of:
>
> kmap_local();
> ret = copy_to_user();
> kunmap_local();
>
> It obsiously does not allow to sleep or take sleeping locks in the
> kmap_atomic() section which puts further restrictions on code just to
> accomodate 32bit highmem.

I understand and agree. I'm sorry for the missing parts and for expressing
with not so good English proficiency.

> So a few years ago we implemented kmap_local() and related interfaces to
> replace kmap_atomic() completely, but we could not do a scripted
> wholesale conversion because there are a few code pathes which rely on
> the implicit preemption disable and of course something like the above
> monstrosity needs manual conversion.
>
> kmap_local() puts a penalty exclusively on 32bit highmem systems. The
> temporary mapping is still strict per CPU, which is guaranteed by an
> implicit migrate_disable(), and it is context switched in case of
> [un]voluntary scheduling.
>
> On plain 32bit and 64bit systems kmap_local() is pretty much a NOP. All
> it does is to return the page address. It does not disable migration in
> that case either. kunmap_local() is a complete NOP.

> The goal is to eliminate _all_ use cases of kmap_atomic*() and replace
> them with kmap_local*(). This is a prerequisite for system protection
> keys and other things.

Ira confirmed that system protection keys aren't affected. I'm not sure about
what you are referring to when talking about "other things".

> Thanks,
>
> tglx

Can you please say if you noticed other further misleading or plainly wrong
sentences in my message? Ira and I are coordinating this conversions efforts
(from kmap() and kmap_atomic() to kmap_local_page), so I'm much interested to
know whether or not I'm wrong with regard to fundamental details.

Thanks for helping and clarify,

Fabio

2022-11-16 18:48:27

by Ira Weiny

[permalink] [raw]
Subject: Re: [PATCH v2] x86/sgx: Replace kmap/kunmap_atomic calls

On Wed, Nov 16, 2022 at 03:01:56PM +0100, Fabio M. De Francesco wrote:
> On mercoled? 16 novembre 2022 12:58:41 CET Thomas Gleixner wrote:
> > On Wed, Nov 16 2022 at 11:16, Fabio M. De Francesco wrote:
> > > On marted? 15 novembre 2022 17:16:26 CET Kristen Carlson Accardi wrote:
> > >> The use of kmap_atomic() in the SGX code was not an explicit design
> > >> choice to disable page faults or preemption, and there is no compelling
> > >> design reason to using kmap_atomic() vs. kmap_local_page().
> > >
> > > This is at the core of the reasons why you are converting, that is to
> avoid
> > > the potential overhead (in 32 bit architectures) of switching in atomic
> > > context where it is not required.
> >
> > No. The point is that kmap_atomic() is an historical artifact of 32bit
> > HIGHMEM support.
>
> I just saw that the last part of my email is missing. In fact it's enough
> clear that what I was saying was still incomplete and that no signature was
> visible.
>
> I meant that, before we had kmap_local_page(), the choice was between kmap()
> and kmap_atomic().
>
> Despite I suppose that nobody should rely on those kmap_atomic() side effects.
> I have been observing that a large part of the users of the Highmem API used
> to call kmap_atomic() for its implicit pagefault_disable() and
> preempt_disable() side effects

Fabio I think you missed the point here. Just because we have found _some_
places where the side effect was required does not mean that "a large part of
the users..." do.

While I value your review of these conversions I think Kristen did her home
work here. She checked with Jarkko (the original author of the code) and
verified that the side effects are not necessary here. That is all that needs
to be mentioned in the commit message.

> and so switching in atomic context only between
> mappings and unmappings. Others used to call the atomic variant of kmap() just
> because they wanted to default to it, instead of calling kmap().
>
> Since 2019 we have kmap_local_page() and kmap() / kmap_atomic() have been
> recently deprecated but we can't know why the author of a piece of code
> decided to use kmap_atomic()...
>
> 1) Are we already in atomic context when kmap_atomic() is invoked?
> 2) Do we call kmap_atomic() instead of kmap() only to switch from non atomic
> to atomic context between the mapping the unmapping (although I stress that we
> shouldn't ever rely on these side effects)?
>
> I think that who converts should tell whether or not we must explicitly, for
> example, disable page faults and/or preemption along with converting to
> kmap_local_page().
>
> This is why I think that Kristen did well with the inspection of what happens
> in those sections in between.
>
> I expressed this concept in a bad way and furthermore the lines below that
> sentence disappeared, making the whole much more confusing.
>
> I think that later I'm saying a large part of what you are detailing because I
> had to learn all these things for reworking highmem.rst and the kdocs of the
> relevant highmem-related files.
>
> The new documentation is upstream since months and you've been Cd'ed. If
> anything is wrong please help me to correct what I wrote or do it yourself if
> you prefer that other way.
>
> > The back then chosen implementation was to disable preemption and
> > pagefaults and use a temporary per CPU mapping. Disabling preemption was
> > required to protect the temporary mapping against a context switch.
> >
> > Disabling pagefaults was an implicit side effect of disabling
> > preemption. The separation of preempt_disable() and pagefault_disable()
> > happened way later.
> >
> > On 64bit and on 32bit systems with CONFIG_HIGHMEM=n this is not required
> > at all because the pages are already in the direct map.
>
> Unfortunately, this is one of the things that were missing at the end of my
> previous email. I'm aware that with HIGHMEM=n all these kmap_local_page() are
> plain page_address(). I wrote this too in my email (and in the documentation)
> but it disappeared when I sent my message.
>
> > That means support for 32bit highmem forces code to accomodate with the
> > preemption disabled section, where in the most cases this is absolutely
> > not required.
>
> This is the core: Kristen knows that the code between mapping / unmapping does
> not need preemption and page faults disabling. Therefore kmap_local_page()
> will work with no need to run in atomic. We all agree to the necessity to
> convert but sometimes we are not sure about how to convert.

But we are sure about this conversion.

> Otherwise a script
> would be enough to convert to kmap_local_page(). :-)
>
> > That results often in suboptimal and horrible code:
> >
> > again:
> > kmap_atomic();
> > remaining = copy_to_user_inatomic();
> > kunmap_atomic();
> > if (remaining) {
> > if (try_to_handle_fault())
> > goto again;
> > ret = -EFAULT;
> > }
> >
> > instead of:
> >
> > kmap_local();
> > ret = copy_to_user();
> > kunmap_local();
> >
> > It obsiously does not allow to sleep or take sleeping locks in the
> > kmap_atomic() section which puts further restrictions on code just to
> > accomodate 32bit highmem.
>
> I understand and agree. I'm sorry for the missing parts and for expressing
> with not so good English proficiency.
>
> > So a few years ago we implemented kmap_local() and related interfaces to
> > replace kmap_atomic() completely, but we could not do a scripted
> > wholesale conversion because there are a few code pathes which rely on
> > the implicit preemption disable and of course something like the above
> > monstrosity needs manual conversion.
> >
> > kmap_local() puts a penalty exclusively on 32bit highmem systems. The
> > temporary mapping is still strict per CPU, which is guaranteed by an
> > implicit migrate_disable(), and it is context switched in case of
> > [un]voluntary scheduling.
> >
> > On plain 32bit and 64bit systems kmap_local() is pretty much a NOP. All
> > it does is to return the page address. It does not disable migration in
> > that case either. kunmap_local() is a complete NOP.
>
> > The goal is to eliminate _all_ use cases of kmap_atomic*() and replace
> > them with kmap_local*(). This is a prerequisite for system protection
> > keys and other things.
>
> Ira confirmed that system protection keys aren't affected.

I'm not sure what you mean here. This work started because of system
protection keys. But since they have yet to have a good use case they are not
yet upstream. Therefore some _specific_ conversions have used page_address()
directly because those drivers know _exactly_ where and how the pages were
mapped. That is a very specific use case which is different from what Thomas
is talking about. General code which gets a page may still need to have
additional operations performed prior to using a kernel map.

> I'm not sure about
> what you are referring to when talking about "other things".

There are a number of other ideas where memory may not be in the direct map.

>
> > Thanks,
> >
> > tglx
>
> Can you please say if you noticed other further misleading or plainly wrong
> sentences in my message? Ira and I are coordinating this conversions efforts
> (from kmap() and kmap_atomic() to kmap_local_page), so I'm much interested to
> know whether or not I'm wrong with regard to fundamental details.

I think the issue is you are trying to get way too much detail for patches
which don't require it. If you review a patch like this and you find
kmap_atomic() was being used for the side effects note that and NAK the patch.
If you feel there is some evidence but you are unsure a quick email asking is
fine. But if there is no evidence the patch is fine lets tag it with a review
and move on.

In this case people are getting frustrated with the 'bikesheding'[1]. We are
never going to complete all these conversions if we spend this much time on the
easy ones.

Ira

[1] https://en.wiktionary.org/wiki/bikeshedding

>
> Thanks for helping and clarify,
>
> Fabio

2022-11-16 20:09:18

by Fabio M. De Francesco

[permalink] [raw]
Subject: Re: [PATCH v2] x86/sgx: Replace kmap/kunmap_atomic calls

On mercoledì 16 novembre 2022 18:21:50 CET Ira Weiny wrote:
> On Wed, Nov 16, 2022 at 03:01:56PM +0100, Fabio M. De Francesco wrote:
> > On mercoledì 16 novembre 2022 12:58:41 CET Thomas Gleixner wrote:
> > > On Wed, Nov 16 2022 at 11:16, Fabio M. De Francesco wrote:
> > > > On martedì 15 novembre 2022 17:16:26 CET Kristen Carlson Accardi
wrote:
> > > >> The use of kmap_atomic() in the SGX code was not an explicit design
> > > >> choice to disable page faults or preemption, and there is no
compelling
> > > >> design reason to using kmap_atomic() vs. kmap_local_page().
> > > >
> > > > This is at the core of the reasons why you are converting, that is to
> >
> > avoid
> >
> > > > the potential overhead (in 32 bit architectures) of switching in
atomic
> > > > context where it is not required.
> > >
> > > No. The point is that kmap_atomic() is an historical artifact of 32bit
> > > HIGHMEM support.
> >
> > I just saw that the last part of my email is missing. In fact it's enough
> > clear that what I was saying was still incomplete and that no signature
was
> > visible.
> >
> > I meant that, before we had kmap_local_page(), the choice was between
kmap()
> > and kmap_atomic().
> >
> > Despite I suppose that nobody should rely on those kmap_atomic() side
> > effects, I have been observing that a large part of the users of the
> > Highmem API used to call kmap_atomic() for its implicit
> > pagefault_disable()
> > and preempt_disable() side effects.
>
> Fabio I think you missed the point here. Just because we have found _some_
> places where the side effect was required does not mean that "a large part
> of the users..." do.

You are right. Numbers don't support that "a large part of the users..." but
that it happened and will happen again. Let's delete "large". However this is
not the point. The real point is below...

> While I value your review of these conversions I think Kristen did her home
> work here.

I agree with you: she did her homework and the patch is good.

I don't know why I have not been able to convey that I appreciated her
homework. I especially appreciated that she cared of checking that the
code between mappings / un-mappings does not rely on any of the disabling side
effects of kmap_atomic so she could simply replace it with kmap_local_page().

I'm not sure why the message that this patch is good didn't pass. I suspect
that I stressed too much what I was considering minor inaccuracies.

In the while I missed to talk about what matters for real, i.e. that the
changes are good and that they will work properly.

> She checked with Jarkko (the original author of the code) and
> verified that the side effects are not necessary here. That is all that needs
> to be mentioned in the commit message.

I should avoid that kind of excessive focus on less relevant things.
Instead the overall end product is what matters the most.

[snip]

> > This is the core: Kristen knows that the code between mapping / unmapping
> > does not need preemption and page faults disabling. Therefore
> > kmap_local_page() will work with no need to run in atomic. We all agree to
> > the necessity to convert but sometimes we are not sure about how to
> > convert.
>
> But we are sure about this conversion.
>

Yes we are. Did you note that I wrote it above?

I supposed that that was enough to see that I'm aware that Kristen knows what
she did and that she did it correctly. My objections were only about
minor details
in the commit message. I didn't ask for another version, but I admit
that several
parts of my message were ambiguous.

[snip]

> > Ira confirmed that system protection keys aren't affected.
>
> I'm not sure what you mean here. This work started because of system
> protection keys. But since they have yet to have a good use case they are
not
> yet upstream. Therefore some _specific_ conversions have used
page_address()
> directly because those drivers know _exactly_ where and how the pages were
> mapped. That is a very specific use case which is different from what
Thomas
> is talking about. General code which gets a page may still need to have
> additional operations performed prior to using a kernel map.

I was referring to an email from you to Jonathan Corbet.[1]
Did I misunderstand it?

> > I'm not sure about
> > what you are referring to when talking about "other things".
>
> There are a number of other ideas where memory may not be in the direct map.

OK, I'm not yet aware of them.

[snip]

> I think the issue is you are trying to get way too much detail for patches
> which don't require it. If you review a patch like this and you find
> kmap_atomic() was being used for the side effects note that and NAK the
patch.
> If you feel there is some evidence but you are unsure a quick email asking
is
> fine. But if there is no evidence the patch is fine lets tag it with a
> review and move on.
>
> In this case people are getting frustrated with the 'bikesheding'[1]. We
are
> never going to complete all these conversions if we spend this much time on
> the easy ones.
>
> Ira
>
> [1] https://en.wiktionary.org/wiki/bikeshedding
>

You are right. I'm sorry especially because we recently talked about
this topic.
But I fell again into the same trap :-(

Thanks,

Fabio

[1] https://lore.kernel.org/lkml/Ytny132kWjXvu1Ql@iweiny-desk3/

2022-11-16 22:25:50

by Ira Weiny

[permalink] [raw]
Subject: Re: [PATCH v2] x86/sgx: Replace kmap/kunmap_atomic calls

On Tue, Nov 15, 2022 at 08:16:26AM -0800, Kristen Carlson Accardi wrote:
> kmap_local_page() is the preferred way to create temporary mappings
> when it is feasible, because the mappings are thread-local and
> CPU-local. kmap_local_page() uses per-task maps rather than per-CPU
> maps. This in effect removes the need to preemption in the
> local CPU while kmap is active, and thus vastly reduces overall
> system latency. It is also valid to take pagefaults.
>
> The use of kmap_atomic() in the SGX code was not an explicit design
> choice to disable page faults or preemption, and there is no compelling
> design reason to using kmap_atomic() vs. kmap_local_page().
>
> Link: https://lore.kernel.org/linux-sgx/Y0biN3%[email protected]/
>
> For more information on the use of kmap_local_page() vs.
> kmap_atomic(), please see Documentation/mm/highmem.rst
>
> Signed-off-by: Kristen Carlson Accardi <[email protected]>
> ---
> Changes since V1:
>
> - Reword commit message to make it more clear why it is preferred
> to use kmap_local_page() vs. kmap_atomic().
>
> arch/x86/kernel/cpu/sgx/encl.c | 12 ++++++------
> arch/x86/kernel/cpu/sgx/ioctl.c | 4 ++--
> arch/x86/kernel/cpu/sgx/main.c | 8 ++++----
> 3 files changed, 12 insertions(+), 12 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
> index 2c258255a629..68f8b18d2278 100644
> --- a/arch/x86/kernel/cpu/sgx/encl.c
> +++ b/arch/x86/kernel/cpu/sgx/encl.c
> @@ -160,8 +160,8 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
> return ret;
>
> pginfo.addr = encl_page->desc & PAGE_MASK;
> - pginfo.contents = (unsigned long)kmap_atomic(b.contents);
> - pcmd_page = kmap_atomic(b.pcmd);
> + pginfo.contents = (unsigned long)kmap_local_page(b.contents);
> + pcmd_page = kmap_local_page(b.pcmd);
> pginfo.metadata = (unsigned long)pcmd_page + b.pcmd_offset;
>
> if (secs_page)
> @@ -187,8 +187,8 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
> */
> pcmd_page_empty = !memchr_inv(pcmd_page, 0, PAGE_SIZE);
>
> - kunmap_atomic(pcmd_page);
> - kunmap_atomic((void *)(unsigned long)pginfo.contents);
> + kunmap_local(pcmd_page);
> + kunmap_local((void *)(unsigned long)pginfo.contents);
>
> get_page(b.pcmd);
> sgx_encl_put_backing(&b);
> @@ -197,10 +197,10 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
>
> if (pcmd_page_empty && !reclaimer_writing_to_pcmd(encl, pcmd_first_page)) {
> sgx_encl_truncate_backing_page(encl, PFN_DOWN(page_pcmd_off));
> - pcmd_page = kmap_atomic(b.pcmd);
> + pcmd_page = kmap_local_page(b.pcmd);
> if (memchr_inv(pcmd_page, 0, PAGE_SIZE))
> pr_warn("PCMD page not empty after truncate.\n");
> - kunmap_atomic(pcmd_page);
> + kunmap_local(pcmd_page);
> }
>
> put_page(b.pcmd);
> diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
> index ef874828fa6b..03c50f11ad75 100644
> --- a/arch/x86/kernel/cpu/sgx/ioctl.c
> +++ b/arch/x86/kernel/cpu/sgx/ioctl.c
> @@ -221,11 +221,11 @@ static int __sgx_encl_add_page(struct sgx_encl *encl,
> pginfo.secs = (unsigned long)sgx_get_epc_virt_addr(encl->secs.epc_page);
> pginfo.addr = encl_page->desc & PAGE_MASK;
> pginfo.metadata = (unsigned long)secinfo;
> - pginfo.contents = (unsigned long)kmap_atomic(src_page);
> + pginfo.contents = (unsigned long)kmap_local_page(src_page);
>
> ret = __eadd(&pginfo, sgx_get_epc_virt_addr(epc_page));
>
> - kunmap_atomic((void *)pginfo.contents);
> + kunmap_local((void *)pginfo.contents);
> put_page(src_page);
>
> return ret ? -EIO : 0;
> diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
> index 0aad028f04d4..e5a37b6e9aa5 100644
> --- a/arch/x86/kernel/cpu/sgx/main.c
> +++ b/arch/x86/kernel/cpu/sgx/main.c
> @@ -165,17 +165,17 @@ static int __sgx_encl_ewb(struct sgx_epc_page *epc_page, void *va_slot,
> pginfo.addr = 0;
> pginfo.secs = 0;
>
> - pginfo.contents = (unsigned long)kmap_atomic(backing->contents);
> - pginfo.metadata = (unsigned long)kmap_atomic(backing->pcmd) +
> + pginfo.contents = (unsigned long)kmap_local_page(backing->contents);
> + pginfo.metadata = (unsigned long)kmap_local_page(backing->pcmd) +
> backing->pcmd_offset;
>
> ret = __ewb(&pginfo, sgx_get_epc_virt_addr(epc_page), va_slot);
> set_page_dirty(backing->pcmd);
> set_page_dirty(backing->contents);
>
> - kunmap_atomic((void *)(unsigned long)(pginfo.metadata -
> + kunmap_local((void *)(unsigned long)(pginfo.metadata -
> backing->pcmd_offset));

NIT: Technically you don't have to pass the pointer to the beginning of the
page here but this works fine.

Reviewed-by: Ira Weiny <[email protected]>

> - kunmap_atomic((void *)(unsigned long)pginfo.contents);
> + kunmap_local((void *)(unsigned long)pginfo.contents);
>
> return ret;
> }
> --
> 2.38.1
>

2022-11-16 22:27:35

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH v2] x86/sgx: Replace kmap/kunmap_atomic calls

On Wed, Nov 16 2022 at 21:00, Fabio M. De Francesco wrote:
> On mercoledì 16 novembre 2022 18:21:50 CET Ira Weiny wrote:
>> > Despite I suppose that nobody should rely on those kmap_atomic() side
>> > effects, I have been observing that a large part of the users of the
>> > Highmem API used to call kmap_atomic() for its implicit
>> > pagefault_disable()
>> > and preempt_disable() side effects.
>>
>> Fabio I think you missed the point here. Just because we have found _some_
>> places where the side effect was required does not mean that "a large part
>> of the users..." do.
>
> You are right. Numbers don't support that "a large part of the users..." but
> that it happened and will happen again.

There is a fundamental misconception here. You argue about requirement
instead of dependency. Those are absolutely not the same.

Technically there is no requirement at all to use kmap_atomic().

kmap_atomic() in it's original implementation had the requirement of
disabling preemption to protect the temporary mapping. kmap_atomic()
is nothing else than a conveniance wrapper:

preempt_disable();
establish_mapping();

and later on when pagefault_disable() was separated from
preempt_disable() it became:

preempt_disable();
pagefault_disable();
establish_mapping();

which is again technically not required at all.

It was done because there were usage sites where the implementation of
the kmap_atomic() section _depended_ on the implicit pagefault_disable()
of preempt_disable(). So the pagefault_disable() was added in order not
to break these usage sites.

But did any of the usage sites have a hard technical requirment to have
pagefaults or preemption disabled by kmap_atomic(). No. Not a single
one. All of them could have been converted to disable preemption and/or
pagefaults explicitely.

The implementation dependencies are a consequence of the original
kmap_atomic() implementation and not the other way round.

There is no single context where the implicit preemption and pagefault
disable of kmap_atomic() is required due to the context itself.

If there is code which runs, e.g. with interrupts disabled and needs a
temporary mapping which is then used for a copy to user operation, which
can obviously fault, then there is still no requirement for
kmap_atomic() to disable preemption or pagefaults.

The requirement to disable pagefaults comes from the
copy_to_user_inatomic() which is in turn necessary because the code runs
with interrupts disabled.

So now we have kmap_local() which does not rely on preemption disable
anymore because the protection of the temporary mapping is done
differently.

So you can convert any instance of kmap_atomic() over to kmap_local(),
but you have to analyze whether any code inside the mapped section
requires protection against preemption or pagefault handling.

If there is code which requires that, then you have to figure out
whether that code has been written in that way due to the semantics of
kmap_atomic() or if there is a different requirement for it.

In the previous example I gave you vs. copy_to_user_inatomic()

again:
kmap_atomic();
remaining = copy_to_user_inatomic();
kunmap_atomic();
if (remaining) {
if (try_to_handle_fault())
goto again;
ret = -EFAULT;
}

is clearly no requirement neither for preemption nor for pagefault
disable. The semantics of kmap_atomic() enforced to write the code this
way. The replacement code:

kmap_local();
ret = copy_to_user();
kunmap_local();

is completely equivalent. It can be written this way because
kmap_local() has no implicit side effects which prevent using
copy_to_user() which in turn can fault, handle the fault ...

This is all about implementation details which depend on the original
kmap_atomic() semantics and have been enforced by them.

E.g. something like this:

kmap_atomic();
smp_processor_id();
kunmap_atomic();

has a dependency, but not a requirement.

kmap_local();
preempt_disable();
smp_processor_id();
preempt_enable();
kunmap_local();

is completely equivalent and it makes it entirely clear why preemption
needs to be disabled: because smp_processor_id() requires it.

There are only the three classes of usage sites which are affected:

1) The code which is enforced to be stupid because of kmap_atomic()

2) The code which (ab)uses the side effects of kmap_atomic()

3) The combination of #1 and #2

All of them can be replaced by functionaly equivalent code.

There is not a single usage site which has a hard requirement on the
kmap_atomic() semantics due to the context in which kmap_atomic() is
invoked.

Ergo _ALL_ usage of kmap_atomic() and related interfaces can be
eliminated completely.

Claiming that even a single place

>> > used to call kmap_atomic() for its implicit pagefault_disable() and
>> > preempt_disable() side effects.

belongs into fairy tale territory, but has nothing to do with technical
reality and facts. Why? Simply because there was no other solution to
handle highmem in a hotpath or in a non-sleepable context.

Ergo developers were forced to use kmap_atomic() and then obviously had
to accomodate the code in the mapping section. Of course they utilized
the side effects because that's what we do with spinlocks and other
primitives as well.

So can we just go and fix these places instead of indulging in
handwaving and bikeshedding?

Thanks,

tglx









2022-11-16 23:09:28

by Fabio M. De Francesco

[permalink] [raw]
Subject: Re: [PATCH v2] x86/sgx: Replace kmap/kunmap_atomic calls

On martedì 15 novembre 2022 17:16:26 CET Kristen Carlson Accardi wrote:
> kmap_local_page() is the preferred way to create temporary mappings
> when it is feasible, because the mappings are thread-local and
> CPU-local. kmap_local_page() uses per-task maps rather than per-CPU
> maps. This in effect removes the need to preemption in the
> local CPU while kmap is active, and thus vastly reduces overall
> system latency. It is also valid to take pagefaults.
>
> The use of kmap_atomic() in the SGX code was not an explicit design
> choice to disable page faults or preemption, and there is no compelling
> design reason to using kmap_atomic() vs. kmap_local_page().
>
> Link: https://lore.kernel.org/linux-sgx/Y0biN3%[email protected]/
>
> For more information on the use of kmap_local_page() vs.
> kmap_atomic(), please see Documentation/mm/highmem.rst
>
> Signed-off-by: Kristen Carlson Accardi <[email protected]>
> ---
> Changes since V1:
>
> - Reword commit message to make it more clear why it is preferred
> to use kmap_local_page() vs. kmap_atomic().
>
> arch/x86/kernel/cpu/sgx/encl.c | 12 ++++++------
> arch/x86/kernel/cpu/sgx/ioctl.c | 4 ++--
> arch/x86/kernel/cpu/sgx/main.c | 8 ++++----
> 3 files changed, 12 insertions(+), 12 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
> index 2c258255a629..68f8b18d2278 100644
> --- a/arch/x86/kernel/cpu/sgx/encl.c
> +++ b/arch/x86/kernel/cpu/sgx/encl.c
> @@ -160,8 +160,8 @@ static int __sgx_encl_eldu(struct sgx_encl_page
> *encl_page, return ret;
>
> pginfo.addr = encl_page->desc & PAGE_MASK;
> - pginfo.contents = (unsigned long)kmap_atomic(b.contents);
> - pcmd_page = kmap_atomic(b.pcmd);
> + pginfo.contents = (unsigned long)kmap_local_page(b.contents);
> + pcmd_page = kmap_local_page(b.pcmd);
> pginfo.metadata = (unsigned long)pcmd_page + b.pcmd_offset;
>
> if (secs_page)
> @@ -187,8 +187,8 @@ static int __sgx_encl_eldu(struct sgx_encl_page
> *encl_page, */
> pcmd_page_empty = !memchr_inv(pcmd_page, 0, PAGE_SIZE);
>
> - kunmap_atomic(pcmd_page);
> - kunmap_atomic((void *)(unsigned long)pginfo.contents);
> + kunmap_local(pcmd_page);
> + kunmap_local((void *)(unsigned long)pginfo.contents);
>
> get_page(b.pcmd);
> sgx_encl_put_backing(&b);
> @@ -197,10 +197,10 @@ static int __sgx_encl_eldu(struct sgx_encl_page
> *encl_page,
>
> if (pcmd_page_empty && !reclaimer_writing_to_pcmd(encl,
pcmd_first_page)) {
> sgx_encl_truncate_backing_page(encl,
PFN_DOWN(page_pcmd_off));
> - pcmd_page = kmap_atomic(b.pcmd);
> + pcmd_page = kmap_local_page(b.pcmd);
> if (memchr_inv(pcmd_page, 0, PAGE_SIZE))
> pr_warn("PCMD page not empty after truncate.
\n");
> - kunmap_atomic(pcmd_page);
> + kunmap_local(pcmd_page);
> }
>
> put_page(b.pcmd);
> diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/
ioctl.c
> index ef874828fa6b..03c50f11ad75 100644
> --- a/arch/x86/kernel/cpu/sgx/ioctl.c
> +++ b/arch/x86/kernel/cpu/sgx/ioctl.c
> @@ -221,11 +221,11 @@ static int __sgx_encl_add_page(struct sgx_encl *encl,
> pginfo.secs = (unsigned long)sgx_get_epc_virt_addr(encl-
>secs.epc_page);
> pginfo.addr = encl_page->desc & PAGE_MASK;
> pginfo.metadata = (unsigned long)secinfo;
> - pginfo.contents = (unsigned long)kmap_atomic(src_page);
> + pginfo.contents = (unsigned long)kmap_local_page(src_page);
>
> ret = __eadd(&pginfo, sgx_get_epc_virt_addr(epc_page));
>
> - kunmap_atomic((void *)pginfo.contents);
> + kunmap_local((void *)pginfo.contents);
> put_page(src_page);
>
> return ret ? -EIO : 0;
> diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
> index 0aad028f04d4..e5a37b6e9aa5 100644
> --- a/arch/x86/kernel/cpu/sgx/main.c
> +++ b/arch/x86/kernel/cpu/sgx/main.c
> @@ -165,17 +165,17 @@ static int __sgx_encl_ewb(struct sgx_epc_page
*epc_page,
> void *va_slot, pginfo.addr = 0;
> pginfo.secs = 0;
>
> - pginfo.contents = (unsigned long)kmap_atomic(backing->contents);
> - pginfo.metadata = (unsigned long)kmap_atomic(backing->pcmd) +
> + pginfo.contents = (unsigned long)kmap_local_page(backing->contents);
> + pginfo.metadata = (unsigned long)kmap_local_page(backing->pcmd) +
> backing->pcmd_offset;
>
> ret = __ewb(&pginfo, sgx_get_epc_virt_addr(epc_page), va_slot);
> set_page_dirty(backing->pcmd);
> set_page_dirty(backing->contents);
>
> - kunmap_atomic((void *)(unsigned long)(pginfo.metadata -
> + kunmap_local((void *)(unsigned long)(pginfo.metadata -
> backing-
>pcmd_offset));
> - kunmap_atomic((void *)(unsigned long)pginfo.contents);
> + kunmap_local((void *)(unsigned long)pginfo.contents);
>
> return ret;
> }
> --
> 2.38.1

Despite my tendency to focus attention on details of little importance for the
task at hand, I think that your conversions are good and that your analysis
proves them safe because there is no need to explicitly disable page faults
and/or preemption along with the conversions.

Reviewed-by: Fabio M. De Francesco <[email protected]>

Thanks,

Fabio

Subject: [tip: x86/sgx] x86/sgx: Replace kmap/kunmap_atomic() calls

The following commit has been merged into the x86/sgx branch of tip:

Commit-ID: 89e927bbcd45d507e5612ef72fda04182e544a38
Gitweb: https://git.kernel.org/tip/89e927bbcd45d507e5612ef72fda04182e544a38
Author: Kristen Carlson Accardi <[email protected]>
AuthorDate: Tue, 15 Nov 2022 08:16:26 -08:00
Committer: Thomas Gleixner <[email protected]>
CommitterDate: Fri, 02 Dec 2022 14:59:56 +01:00

x86/sgx: Replace kmap/kunmap_atomic() calls

kmap_local_page() is the preferred way to create temporary mappings when it
is feasible, because the mappings are thread-local and CPU-local.

kmap_local_page() uses per-task maps rather than per-CPU maps. This in
effect removes the need to disable preemption on the local CPU while the
mapping is active, and thus vastly reduces overall system latency. It is
also valid to take pagefaults within the mapped region.

The use of kmap_atomic() in the SGX code was not an explicit design choice
to disable page faults or preemption, and there is no compelling design
reason to using kmap_atomic() vs. kmap_local_page().

Signed-off-by: Kristen Carlson Accardi <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Reviewed-by: Jarkko Sakkinen <[email protected]>
Reviewed-by: Fabio M. De Francesco <[email protected]>
Reviewed-by: Ira Weiny <[email protected]>
Link: https://lore.kernel.org/linux-sgx/Y0biN3%[email protected]/
Link: https://lore.kernel.org/r/[email protected]
---
arch/x86/kernel/cpu/sgx/encl.c | 12 ++++++------
arch/x86/kernel/cpu/sgx/ioctl.c | 4 ++--
arch/x86/kernel/cpu/sgx/main.c | 8 ++++----
3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
index 2c25825..68f8b18 100644
--- a/arch/x86/kernel/cpu/sgx/encl.c
+++ b/arch/x86/kernel/cpu/sgx/encl.c
@@ -160,8 +160,8 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
return ret;

pginfo.addr = encl_page->desc & PAGE_MASK;
- pginfo.contents = (unsigned long)kmap_atomic(b.contents);
- pcmd_page = kmap_atomic(b.pcmd);
+ pginfo.contents = (unsigned long)kmap_local_page(b.contents);
+ pcmd_page = kmap_local_page(b.pcmd);
pginfo.metadata = (unsigned long)pcmd_page + b.pcmd_offset;

if (secs_page)
@@ -187,8 +187,8 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
*/
pcmd_page_empty = !memchr_inv(pcmd_page, 0, PAGE_SIZE);

- kunmap_atomic(pcmd_page);
- kunmap_atomic((void *)(unsigned long)pginfo.contents);
+ kunmap_local(pcmd_page);
+ kunmap_local((void *)(unsigned long)pginfo.contents);

get_page(b.pcmd);
sgx_encl_put_backing(&b);
@@ -197,10 +197,10 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,

if (pcmd_page_empty && !reclaimer_writing_to_pcmd(encl, pcmd_first_page)) {
sgx_encl_truncate_backing_page(encl, PFN_DOWN(page_pcmd_off));
- pcmd_page = kmap_atomic(b.pcmd);
+ pcmd_page = kmap_local_page(b.pcmd);
if (memchr_inv(pcmd_page, 0, PAGE_SIZE))
pr_warn("PCMD page not empty after truncate.\n");
- kunmap_atomic(pcmd_page);
+ kunmap_local(pcmd_page);
}

put_page(b.pcmd);
diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
index ef87482..03c50f1 100644
--- a/arch/x86/kernel/cpu/sgx/ioctl.c
+++ b/arch/x86/kernel/cpu/sgx/ioctl.c
@@ -221,11 +221,11 @@ static int __sgx_encl_add_page(struct sgx_encl *encl,
pginfo.secs = (unsigned long)sgx_get_epc_virt_addr(encl->secs.epc_page);
pginfo.addr = encl_page->desc & PAGE_MASK;
pginfo.metadata = (unsigned long)secinfo;
- pginfo.contents = (unsigned long)kmap_atomic(src_page);
+ pginfo.contents = (unsigned long)kmap_local_page(src_page);

ret = __eadd(&pginfo, sgx_get_epc_virt_addr(epc_page));

- kunmap_atomic((void *)pginfo.contents);
+ kunmap_local((void *)pginfo.contents);
put_page(src_page);

return ret ? -EIO : 0;
diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
index 0aad028..e5a37b6 100644
--- a/arch/x86/kernel/cpu/sgx/main.c
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -165,17 +165,17 @@ static int __sgx_encl_ewb(struct sgx_epc_page *epc_page, void *va_slot,
pginfo.addr = 0;
pginfo.secs = 0;

- pginfo.contents = (unsigned long)kmap_atomic(backing->contents);
- pginfo.metadata = (unsigned long)kmap_atomic(backing->pcmd) +
+ pginfo.contents = (unsigned long)kmap_local_page(backing->contents);
+ pginfo.metadata = (unsigned long)kmap_local_page(backing->pcmd) +
backing->pcmd_offset;

ret = __ewb(&pginfo, sgx_get_epc_virt_addr(epc_page), va_slot);
set_page_dirty(backing->pcmd);
set_page_dirty(backing->contents);

- kunmap_atomic((void *)(unsigned long)(pginfo.metadata -
+ kunmap_local((void *)(unsigned long)(pginfo.metadata -
backing->pcmd_offset));
- kunmap_atomic((void *)(unsigned long)pginfo.contents);
+ kunmap_local((void *)(unsigned long)pginfo.contents);

return ret;
}