LinuxLists.cc - [PATCH] x86/sgx: fix a NULL pointer

[permalink] [raw]

Subject: [PATCH] x86/sgx: fix a NULL pointer

Under heavy load, the SGX EPC reclaimers (current ksgxd or future EPC
cgroup worker) may reclaim the SECS EPC page for an enclave and set
encl->secs.epc_page to NULL. But the SECS EPC page is used for EAUG in
the SGX #PF handler without checking for NULL and reloading.

Fix this by checking if SECS is loaded before EAUG and load it if it was
reclaimed.

Fixes: 5a90d2c3f5ef8 ("x86/sgx: Support adding of pages to an initialized enclave")
Cc: [email protected]
Signed-off-by: Haitao Huang <[email protected]>
---
arch/x86/kernel/cpu/sgx/encl.c | 25 ++++++++++++++++++++-----
arch/x86/kernel/cpu/sgx/main.c | 4 ++++
2 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
index 2a0e90fe2abc..2ab544da1664 100644
--- a/arch/x86/kernel/cpu/sgx/encl.c
+++ b/arch/x86/kernel/cpu/sgx/encl.c
@@ -235,6 +235,16 @@ static struct sgx_epc_page *sgx_encl_eldu(struct sgx_encl_page *encl_page,
return epc_page;
}

+static struct sgx_epc_page *sgx_encl_load_secs(struct sgx_encl *encl)
+{
+ struct sgx_epc_page *epc_page = encl->secs.epc_page;
+
+ if (!epc_page)
+ epc_page = sgx_encl_eldu(&encl->secs, NULL);
+
+ return epc_page;
+}
+
static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl,
struct sgx_encl_page *entry)
{
@@ -248,11 +258,9 @@ static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl,
return entry;
}

- if (!(encl->secs.epc_page)) {
- epc_page = sgx_encl_eldu(&encl->secs, NULL);
- if (IS_ERR(epc_page))
- return ERR_CAST(epc_page);
- }
+ epc_page = sgx_encl_load_secs(encl);
+ if (IS_ERR(epc_page))
+ return ERR_CAST(epc_page);

epc_page = sgx_encl_eldu(entry, encl->secs.epc_page);
if (IS_ERR(epc_page))
@@ -339,6 +347,13 @@ static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma,

mutex_lock(&encl->lock);

+ epc_page = sgx_encl_load_secs(encl);
+ if (IS_ERR(epc_page)) {
+ if (PTR_ERR(epc_page) == -EBUSY)
+ vmret = VM_FAULT_NOPAGE;
+ goto err_out_unlock;
+ }
+
epc_page = sgx_alloc_epc_page(encl_page, false);
if (IS_ERR(epc_page)) {
if (PTR_ERR(epc_page) == -EBUSY)
diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
index 166692f2d501..4662a364ce62 100644
--- a/arch/x86/kernel/cpu/sgx/main.c
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -257,6 +257,10 @@ static void sgx_reclaimer_write(struct sgx_epc_page *epc_page,

mutex_lock(&encl->lock);

+ /* Should not be possible */
+ if (WARN_ON(!(encl->secs.epc_page)))
+ goto out;
+
sgx_encl_ewb(epc_page, backing);
encl_page->epc_page = NULL;
encl->secs_child_cnt--;
--
2.25.1

2023-07-17 23:17:00

[permalink] [raw]

Subject: Re: [PATCH] x86/sgx: fix a NULL pointer

On Mon, 2023-07-17 at 13:29 -0700, Haitao Huang wrote:
> Under heavy load, the SGX EPC reclaimers (current ksgxd or future EPC
> cgroup worker) may reclaim the SECS EPC page for an enclave and set
> encl->secs.epc_page to NULL.
>

As a bug fix, I don't think you need to mention "future EPC cgroup worker".

> But the SECS EPC page is used for EAUG in
> the SGX #PF handler without checking for NULL and reloading.
>
> Fix this by checking if SECS is loaded before EAUG and load it if it was
^
loading
> reclaimed.
>
> Fixes: 5a90d2c3f5ef8 ("x86/sgx: Support adding of pages to an initialized enclave")
> Cc: [email protected]
> Signed-off-by: Haitao Huang <[email protected]>
> ---
> arch/x86/kernel/cpu/sgx/encl.c | 25 ++++++++++++++++++++-----
> arch/x86/kernel/cpu/sgx/main.c | 4 ++++
> 2 files changed, 24 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
> index 2a0e90fe2abc..2ab544da1664 100644
> --- a/arch/x86/kernel/cpu/sgx/encl.c
> +++ b/arch/x86/kernel/cpu/sgx/encl.c
> @@ -235,6 +235,16 @@ static struct sgx_epc_page *sgx_encl_eldu(struct sgx_encl_page *encl_page,
> return epc_page;
> }
>
> +static struct sgx_epc_page *sgx_encl_load_secs(struct sgx_encl *encl)
> +{
> + struct sgx_epc_page *epc_page = encl->secs.epc_page;
> +
> + if (!epc_page)
> + epc_page = sgx_encl_eldu(&encl->secs, NULL);
> +
> + return epc_page;
> +}
> +
> static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl,
> struct sgx_encl_page *entry)
> {
> @@ -248,11 +258,9 @@ static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl,
> return entry;
> }
>
> - if (!(encl->secs.epc_page)) {
> - epc_page = sgx_encl_eldu(&encl->secs, NULL);
> - if (IS_ERR(epc_page))
> - return ERR_CAST(epc_page);
> - }
> + epc_page = sgx_encl_load_secs(encl);
> + if (IS_ERR(epc_page))
> + return ERR_CAST(epc_page);
>
> epc_page = sgx_encl_eldu(entry, encl->secs.epc_page);
> if (IS_ERR(epc_page))
> @@ -339,6 +347,13 @@ static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma,
>
> mutex_lock(&encl->lock);
>
> + epc_page = sgx_encl_load_secs(encl);
> + if (IS_ERR(epc_page)) {
> + if (PTR_ERR(epc_page) == -EBUSY)
> + vmret = VM_FAULT_NOPAGE;
> + goto err_out_unlock;
> + }
> +
> epc_page = sgx_alloc_epc_page(encl_page, false);
> if (IS_ERR(epc_page)) {
> if (PTR_ERR(epc_page) == -EBUSY)
> diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
> index 166692f2d501..4662a364ce62 100644
> --- a/arch/x86/kernel/cpu/sgx/main.c
> +++ b/arch/x86/kernel/cpu/sgx/main.c
> @@ -257,6 +257,10 @@ static void sgx_reclaimer_write(struct sgx_epc_page *epc_page,
>
> mutex_lock(&encl->lock);
>
> + /* Should not be possible */
> + if (WARN_ON(!(encl->secs.epc_page)))
> + goto out;
> +

This shouldn't be a mandatory part of this fix, no?

If there's good reason to do, then probably you should describe the reason in
the changelog.

> sgx_encl_ewb(epc_page, backing);
> encl_page->epc_page = NULL;
> encl->secs_child_cnt--;

2023-07-18 02:12:27

[permalink] [raw]

Subject: Re: [PATCH] x86/sgx: fix a NULL pointer

On Mon, 2023-07-17 at 17:45 -0700, Haitao Huang wrote:
> Under heavy load, the SGX EPC reclaimer (ksgxd) may reclaim the SECS EPC
> page for an enclave and set encl->secs.epc_page to NULL. But the SECS
> EPC page is used for EAUG in the SGX #PF handler without checking for
> NULL and reloading.
>
> Fix this by checking if SECS is loaded before EAUG and loading it if it was
> reclaimed.

Nit:

Looks the sentence break of the second paragraph isn't consistent with the first
paragraph, i.e., looks the first line is too long and the "was" should be broken
to the second line.

And I think even for this simple patch, you are sending too frequently. The
patch subject should contain the version number too so people can distinguish it
from the previous one. Even better, please use "--base=auto" when generating
the patch so people can know the base commit to apply to.

>
> Fixes: 5a90d2c3f5ef ("x86/sgx: Support adding of pages to an initialized enclave")
> Cc: [email protected]
> Signed-off-by: Haitao Huang <[email protected]>
> ---
> arch/x86/kernel/cpu/sgx/encl.c | 25 ++++++++++++++++++++-----
> 1 file changed, 20 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
> index 2a0e90fe2abc..2ab544da1664 100644
> --- a/arch/x86/kernel/cpu/sgx/encl.c
> +++ b/arch/x86/kernel/cpu/sgx/encl.c
> @@ -235,6 +235,16 @@ static struct sgx_epc_page *sgx_encl_eldu(struct sgx_encl_page *encl_page,
> return epc_page;
> }
>
> +static struct sgx_epc_page *sgx_encl_load_secs(struct sgx_encl *encl)
> +{
> + struct sgx_epc_page *epc_page = encl->secs.epc_page;
> +
> + if (!epc_page)
> + epc_page = sgx_encl_eldu(&encl->secs, NULL);
> +
> + return epc_page;
> +}
> +
> static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl,
> struct sgx_encl_page *entry)
> {
> @@ -248,11 +258,9 @@ static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl,
> return entry;
> }
>
> - if (!(encl->secs.epc_page)) {
> - epc_page = sgx_encl_eldu(&encl->secs, NULL);
> - if (IS_ERR(epc_page))
> - return ERR_CAST(epc_page);
> - }
> + epc_page = sgx_encl_load_secs(encl);
> + if (IS_ERR(epc_page))
> + return ERR_CAST(epc_page);
>
> epc_page = sgx_encl_eldu(entry, encl->secs.epc_page);
> if (IS_ERR(epc_page))
> @@ -339,6 +347,13 @@ static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma,
>
> mutex_lock(&encl->lock);
>
> + epc_page = sgx_encl_load_secs(encl);
> + if (IS_ERR(epc_page)) {
> + if (PTR_ERR(epc_page) == -EBUSY)
> + vmret = VM_FAULT_NOPAGE;
^
Nit: two spaces here (yeah you copied from the existing code, and sorry forgot
to point out in the previous version).

> + goto err_out_unlock;
> + }
> +
> epc_page = sgx_alloc_epc_page(encl_page, false);
> if (IS_ERR(epc_page)) {
> if (PTR_ERR(epc_page) == -EBUSY)

2023-07-18 02:14:27

[permalink] [raw]

Subject: [PATCH] x86/sgx: fix a NULL pointer

Under heavy load, the SGX EPC reclaimer (ksgxd) may reclaim the SECS EPC
page for an enclave and set encl->secs.epc_page to NULL. But the SECS
EPC page is used for EAUG in the SGX #PF handler without checking for
NULL and reloading.

Fix this by checking if SECS is loaded before EAUG and loading it if it was
reclaimed.

Fixes: 5a90d2c3f5ef ("x86/sgx: Support adding of pages to an initialized enclave")
Cc: [email protected]
Signed-off-by: Haitao Huang <[email protected]>
---
arch/x86/kernel/cpu/sgx/encl.c | 25 ++++++++++++++++++++-----
1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
index 2a0e90fe2abc..2ab544da1664 100644
--- a/arch/x86/kernel/cpu/sgx/encl.c
+++ b/arch/x86/kernel/cpu/sgx/encl.c
@@ -235,6 +235,16 @@ static struct sgx_epc_page *sgx_encl_eldu(struct sgx_encl_page *encl_page,
return epc_page;
}

+static struct sgx_epc_page *sgx_encl_load_secs(struct sgx_encl *encl)
+{
+ struct sgx_epc_page *epc_page = encl->secs.epc_page;
+
+ if (!epc_page)
+ epc_page = sgx_encl_eldu(&encl->secs, NULL);
+
+ return epc_page;
+}
+
static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl,
struct sgx_encl_page *entry)
{
@@ -248,11 +258,9 @@ static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl,
return entry;
}

- if (!(encl->secs.epc_page)) {
- epc_page = sgx_encl_eldu(&encl->secs, NULL);
- if (IS_ERR(epc_page))
- return ERR_CAST(epc_page);
- }
+ epc_page = sgx_encl_load_secs(encl);
+ if (IS_ERR(epc_page))
+ return ERR_CAST(epc_page);

epc_page = sgx_encl_eldu(entry, encl->secs.epc_page);
if (IS_ERR(epc_page))
@@ -339,6 +347,13 @@ static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma,

mutex_lock(&encl->lock);

+ epc_page = sgx_encl_load_secs(encl);
+ if (IS_ERR(epc_page)) {
+ if (PTR_ERR(epc_page) == -EBUSY)
+ vmret = VM_FAULT_NOPAGE;
+ goto err_out_unlock;
+ }
+
epc_page = sgx_alloc_epc_page(encl_page, false);
if (IS_ERR(epc_page)) {
if (PTR_ERR(epc_page) == -EBUSY)
--
2.25.1

2023-07-18 03:19:07

[permalink] [raw]

Subject: Re: [PATCH] x86/sgx: fix a NULL pointer

Hi
On Mon, 17 Jul 2023 20:39:31 -0500, Huang, Kai <[email protected]> wrote:

> On Mon, 2023-07-17 at 17:45 -0700, Haitao Huang wrote:
>> Under heavy load, the SGX EPC reclaimer (ksgxd) may reclaim the SECS EPC
>> page for an enclave and set encl->secs.epc_page to NULL. But the SECS
>> EPC page is used for EAUG in the SGX #PF handler without checking for
>> NULL and reloading.
>>
>> Fix this by checking if SECS is loaded before EAUG and loading it if it
>> was
>> reclaimed.
>
> Nit:
>
> Looks the sentence break of the second paragraph isn't consistent with
> the first
> paragraph, i.e., looks the first line is too long and the "was" should
> be broken
> to the second line.
>
Yeah, I think I forgot to reformat this line after revising.

> And I think even for this simple patch, you are sending too frequently.
> The
> patch subject should contain the version number too so people can
> distinguish it
> from the previous one. Even better, please use "--base=auto" when
> generating
> the patch so people can know the base commit to apply to.

I'll send the next one as V2 and start a separate email thread.

>>
>> Fixes: 5a90d2c3f5ef ("x86/sgx: Support adding of pages to an
>> initialized enclave")
>> Cc: [email protected]
>> Signed-off-by: Haitao Huang <[email protected]>
>> ---
>> arch/x86/kernel/cpu/sgx/encl.c | 25 ++++++++++++++++++++-----
>> 1 file changed, 20 insertions(+), 5 deletions(-)
>>
>> diff --git a/arch/x86/kernel/cpu/sgx/encl.c
>> b/arch/x86/kernel/cpu/sgx/encl.c
>> index 2a0e90fe2abc..2ab544da1664 100644
>> --- a/arch/x86/kernel/cpu/sgx/encl.c
>> +++ b/arch/x86/kernel/cpu/sgx/encl.c
>> @@ -235,6 +235,16 @@ static struct sgx_epc_page *sgx_encl_eldu(struct
>> sgx_encl_page *encl_page,
>> return epc_page;
>> }
>>
>> +static struct sgx_epc_page *sgx_encl_load_secs(struct sgx_encl *encl)
>> +{
>> + struct sgx_epc_page *epc_page = encl->secs.epc_page;
>> +
>> + if (!epc_page)
>> + epc_page = sgx_encl_eldu(&encl->secs, NULL);
>> +
>> + return epc_page;
>> +}
>> +
>> static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl
>> *encl,
>> struct sgx_encl_page *entry)
>> {
>> @@ -248,11 +258,9 @@ static struct sgx_encl_page
>> *__sgx_encl_load_page(struct sgx_encl *encl,
>> return entry;
>> }
>>
>> - if (!(encl->secs.epc_page)) {
>> - epc_page = sgx_encl_eldu(&encl->secs, NULL);
>> - if (IS_ERR(epc_page))
>> - return ERR_CAST(epc_page);
>> - }
>> + epc_page = sgx_encl_load_secs(encl);
>> + if (IS_ERR(epc_page))
>> + return ERR_CAST(epc_page);
>>
>> epc_page = sgx_encl_eldu(entry, encl->secs.epc_page);
>> if (IS_ERR(epc_page))
>> @@ -339,6 +347,13 @@ static vm_fault_t sgx_encl_eaug_page(struct
>> vm_area_struct *vma,
>>
>> mutex_lock(&encl->lock);
>>
>> + epc_page = sgx_encl_load_secs(encl);
>> + if (IS_ERR(epc_page)) {
>> + if (PTR_ERR(epc_page) == -EBUSY)
>> + vmret = VM_FAULT_NOPAGE;
> ^
> Nit: two spaces here (yeah you copied from the existing code, and sorry
> forgot
> to point out in the previous version).
>
Sure. will fix in V2.

Thanks
Haitao

2023-07-18 14:51:08

[permalink] [raw]

Subject: Re: [PATCH] x86/sgx: fix a NULL pointer

On 7/17/23 13:29, Haitao Huang wrote:
...
> @@ -248,11 +258,9 @@ static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl,
> return entry;
> }
>
> - if (!(encl->secs.epc_page)) {
> - epc_page = sgx_encl_eldu(&encl->secs, NULL);
> - if (IS_ERR(epc_page))
> - return ERR_CAST(epc_page);
> - }
> + epc_page = sgx_encl_load_secs(encl);
> + if (IS_ERR(epc_page))
> + return ERR_CAST(epc_page);
>
> epc_page = sgx_encl_eldu(entry, encl->secs.epc_page);
> if (IS_ERR(epc_page))
> @@ -339,6 +347,13 @@ static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma,
>
> mutex_lock(&encl->lock);
>
> + epc_page = sgx_encl_load_secs(encl);
> + if (IS_ERR(epc_page)) {
> + if (PTR_ERR(epc_page) == -EBUSY)
> + vmret = VM_FAULT_NOPAGE;
> + goto err_out_unlock;
> + }

Whenever I see one of these "make sure it isn't NULL", I always jump to
asking what *keeps* it from becoming NULL again. In both cases here, I
think that's encl->lock.

A comment would be really nice here, maybe on sgx_encl_load_secs(). Maybe:

/*
* Ensure the SECS page is not swapped out. Must be called with
* encl->lock to protect _____ and ensure the SECS page is not
* swapped out again.
*/

> diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
> index 166692f2d501..4662a364ce62 100644
> --- a/arch/x86/kernel/cpu/sgx/main.c
> +++ b/arch/x86/kernel/cpu/sgx/main.c
> @@ -257,6 +257,10 @@ static void sgx_reclaimer_write(struct sgx_epc_page *epc_page,
>
> mutex_lock(&encl->lock);
>
> + /* Should not be possible */
> + if (WARN_ON(!(encl->secs.epc_page)))
> + goto out;

That comment isn't super helpful. We generally don't WARN_ON() things
that should happen. *Why* is it not possible?

2023-07-18 15:33:49

[permalink] [raw]

Subject: Re: [PATCH] x86/sgx: fix a NULL pointer

On 7/17/23 13:29, Haitao Huang wrote:
> Under heavy load, the SGX EPC reclaimers (current ksgxd or future EPC
> cgroup worker) may reclaim the SECS EPC page for an enclave and set
> encl->secs.epc_page to NULL. But the SECS EPC page is used for EAUG in
> the SGX #PF handler without checking for NULL and reloading.
>
> Fix this by checking if SECS is loaded before EAUG and load it if it was
> reclaimed.

It would be nice to see a _bit_ more theory of the bug in here.

What is an SECS page and why is it special in a reclaim context? Why is
this so hard to hit? What led you to discover this issue now? What is
EAUG?

2023-07-18 16:17:28

by Jarkko Sakkinen

[permalink] [raw]

Subject: Re: [PATCH] x86/sgx: fix a NULL pointer

On Mon Jul 17, 2023 at 11:29 PM EEST, Haitao Huang wrote:
> Under heavy load, the SGX EPC reclaimers (current ksgxd or future EPC
> cgroup worker) may reclaim the SECS EPC page for an enclave and set
> encl->secs.epc_page to NULL. But the SECS EPC page is used for EAUG in
> the SGX #PF handler without checking for NULL and reloading.
>
> Fix this by checking if SECS is loaded before EAUG and load it if it was
> reclaimed.
>
> Fixes: 5a90d2c3f5ef8 ("x86/sgx: Support adding of pages to an initialized enclave")
> Cc: [email protected]

Given that

$ git describe --contains 5a90d2c3f5ef8
v6.0-rc1~102^2~16

You could also describe this as:

Cc: [email protected] # v6.0+

> Signed-off-by: Haitao Huang <[email protected]>
> ---
> arch/x86/kernel/cpu/sgx/encl.c | 25 ++++++++++++++++++++-----
> arch/x86/kernel/cpu/sgx/main.c | 4 ++++
> 2 files changed, 24 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
> index 2a0e90fe2abc..2ab544da1664 100644
> --- a/arch/x86/kernel/cpu/sgx/encl.c
> +++ b/arch/x86/kernel/cpu/sgx/encl.c
> @@ -235,6 +235,16 @@ static struct sgx_epc_page *sgx_encl_eldu(struct sgx_encl_page *encl_page,
> return epc_page;
> }
>
> +static struct sgx_epc_page *sgx_encl_load_secs(struct sgx_encl *encl)
> +{
> + struct sgx_epc_page *epc_page = encl->secs.epc_page;
> +
> + if (!epc_page)
> + epc_page = sgx_encl_eldu(&encl->secs, NULL);
> +
> + return epc_page;
> +}
> +
> static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl,
> struct sgx_encl_page *entry)
> {
> @@ -248,11 +258,9 @@ static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl,
> return entry;
> }
>
> - if (!(encl->secs.epc_page)) {
> - epc_page = sgx_encl_eldu(&encl->secs, NULL);
> - if (IS_ERR(epc_page))
> - return ERR_CAST(epc_page);
> - }
> + epc_page = sgx_encl_load_secs(encl);
> + if (IS_ERR(epc_page))
> + return ERR_CAST(epc_page);
>
> epc_page = sgx_encl_eldu(entry, encl->secs.epc_page);
> if (IS_ERR(epc_page))
> @@ -339,6 +347,13 @@ static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma,
>
> mutex_lock(&encl->lock);
>
> + epc_page = sgx_encl_load_secs(encl);
> + if (IS_ERR(epc_page)) {
> + if (PTR_ERR(epc_page) == -EBUSY)
> + vmret = VM_FAULT_NOPAGE;
> + goto err_out_unlock;
> + }
> +
> epc_page = sgx_alloc_epc_page(encl_page, false);
> if (IS_ERR(epc_page)) {
> if (PTR_ERR(epc_page) == -EBUSY)
> diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
> index 166692f2d501..4662a364ce62 100644
> --- a/arch/x86/kernel/cpu/sgx/main.c
> +++ b/arch/x86/kernel/cpu/sgx/main.c
> @@ -257,6 +257,10 @@ static void sgx_reclaimer_write(struct sgx_epc_page *epc_page,
>
> mutex_lock(&encl->lock);
>
> + /* Should not be possible */
> + if (WARN_ON(!(encl->secs.epc_page)))
> + goto out;
> +
> sgx_encl_ewb(epc_page, backing);
> encl_page->epc_page = NULL;
> encl->secs_child_cnt--;
> --
> 2.25.1

Reviewed-by: Jarkko Sakkinen <[email protected]>

BR, Jarkko

2023-07-18 16:55:39

[permalink] [raw]

Subject: Re: [PATCH] x86/sgx: fix a NULL pointer

On Tue, 18 Jul 2023 09:30:11 -0500, Dave Hansen <[email protected]>
wrote:

> On 7/17/23 13:29, Haitao Huang wrote:
> ...
>> @@ -248,11 +258,9 @@ static struct sgx_encl_page
>> *__sgx_encl_load_page(struct sgx_encl *encl,
>> return entry;
>> }
>>
>> - if (!(encl->secs.epc_page)) {
>> - epc_page = sgx_encl_eldu(&encl->secs, NULL);
>> - if (IS_ERR(epc_page))
>> - return ERR_CAST(epc_page);
>> - }
>> + epc_page = sgx_encl_load_secs(encl);
>> + if (IS_ERR(epc_page))
>> + return ERR_CAST(epc_page);
>>
>> epc_page = sgx_encl_eldu(entry, encl->secs.epc_page);
>> if (IS_ERR(epc_page))
>> @@ -339,6 +347,13 @@ static vm_fault_t sgx_encl_eaug_page(struct
>> vm_area_struct *vma,
>>
>> mutex_lock(&encl->lock);
>>
>> + epc_page = sgx_encl_load_secs(encl);
>> + if (IS_ERR(epc_page)) {
>> + if (PTR_ERR(epc_page) == -EBUSY)
>> + vmret = VM_FAULT_NOPAGE;
>> + goto err_out_unlock;
>> + }
>
> Whenever I see one of these "make sure it isn't NULL", I always jump to
> asking what *keeps* it from becoming NULL again. In both cases here, I
> think that's encl->lock.
>
Yes, encl->lock protects all enclave states, the xarray holding
encl_pages, SECS, VAs, etc.

> A comment would be really nice here, maybe on sgx_encl_load_secs().
> Maybe:
>
> /*
> * Ensure the SECS page is not swapped out. Must be called with
> * encl->lock to protect _____ and ensure the SECS page is not
> * swapped out again.
> */
>
Thanks for the suggestion. Lock should be held for the duration of SECS
usage.
So something like this?
/*
* Ensure the SECS page is not swapped out. Must be called with
* encl->lock to protect the enclave states including SECS and
* ensure the SECS page is not swapped out again while being used.
*/

>> diff --git a/arch/x86/kernel/cpu/sgx/main.c
>> b/arch/x86/kernel/cpu/sgx/main.c
>> index 166692f2d501..4662a364ce62 100644
>> --- a/arch/x86/kernel/cpu/sgx/main.c
>> +++ b/arch/x86/kernel/cpu/sgx/main.c
>> @@ -257,6 +257,10 @@ static void sgx_reclaimer_write(struct
>> sgx_epc_page *epc_page,
>>
>> mutex_lock(&encl->lock);
>>
>> + /* Should not be possible */
>> + if (WARN_ON(!(encl->secs.epc_page)))
>> + goto out;
>
> That comment isn't super helpful. We generally don't WARN_ON() things
> that should happen. *Why* is it not possible?
>

When this part of code is reached, the reclaimer is holding at least one
reclaimable EPC page to reclaim for the enclave and the code below only
reclaims SECS when no reclaimable EPCs (number of SECS children being
zero) of the enclave left. So it should not be possible.
I'll remove this change because this is really not needed for fixing the
bug as Kai pointed out.

I added this for sanity check when implementing multiple EPC tracking
lists for cgroups. At one point there were list corruption issues if
moving EPCs between lists not managed well. With those straightened out,
and clear definitions of EPC states for moving them from one list to
another, I no longer see much value to keep this even in later cgroup
patches.

Thanks
Haitao

2023-07-18 18:56:40

[permalink] [raw]

Subject: Re: [PATCH] x86/sgx: fix a NULL pointer

On Tue, 18 Jul 2023 09:27:49 -0500, Dave Hansen <[email protected]>
wrote:

> On 7/17/23 13:29, Haitao Huang wrote:
>> Under heavy load, the SGX EPC reclaimers (current ksgxd or future EPC
>> cgroup worker) may reclaim the SECS EPC page for an enclave and set
>> encl->secs.epc_page to NULL. But the SECS EPC page is used for EAUG in
>> the SGX #PF handler without checking for NULL and reloading.
>>
>> Fix this by checking if SECS is loaded before EAUG and load it if it was
>> reclaimed.
>
> It would be nice to see a _bit_ more theory of the bug in here.
>
> What is an SECS page and why is it special in a reclaim context? Why is
> this so hard to hit? What led you to discover this issue now? What is
> EAUG?

Let me know if this clarify things.

The SECS page holds global states of an enclave, and all reclaimable pages
tracked by the SGX EPC reclaimer (ksgxd) are considered 'child' pages of
the SECS page corresponding to that enclave. The reclaimer only reclaims
the SECS page when all its children are reclaimed. That can happen on
system under high EPC pressure where multiple large enclaves demanding
much more EPC page than physically available. In a rare case, the
reclaimer may reclaim all EPC pages of an enclave and it SECS page,
setting encl->secs.epc_page to NULL, right before the #PF handler get the
chance to handle a #PF for that enclave. In that case, if that #PF happens
to require kernel to invoke the EAUG instruction to add a new EPC page for
the enclave, then a NULL pointer results as current code does not check if
encl->secs.epc_page is NULL before using it.

The bug is easier to reproduce with the EPC cgroup implementation when a
low EPC limit is set for a group of enclave hosting processes. Without the
EPC cgroup it's hard to trigger the reclaimer to reclaim all child pages
of an SECS page. And it'd also require a machine configured with large RAM
relative to EPC so no OOM killer triggered before this happens.

Thanks
Haitao

2023-07-18 19:25:23

[permalink] [raw]

Subject: Re: [PATCH] x86/sgx: fix a NULL pointer

On 7/18/23 11:11, Haitao Huang wrote:
> On Tue, 18 Jul 2023 09:27:49 -0500, Dave Hansen <[email protected]>
> wrote:
>
>> On 7/17/23 13:29, Haitao Huang wrote:
>>> Under heavy load, the SGX EPC reclaimers (current ksgxd or future EPC
>>> cgroup worker) may reclaim the SECS EPC page for an enclave and set
>>> encl->secs.epc_page to NULL. But the SECS EPC page is used for EAUG in
>>> the SGX #PF handler without checking for NULL and reloading.
>>>
>>> Fix this by checking if SECS is loaded before EAUG and load it if it was
>>> reclaimed.
>>
>> It would be nice to see a _bit_ more theory of the bug in here.
>>
>> What is an SECS page and why is it special in a reclaim context? Why is
>> this so hard to hit? What led you to discover this issue now? What is
>> EAUG?
>
> Let me know if this clarify things.
>
> The SECS page holds global states of an enclave, and all reclaimable
> pages tracked by the SGX EPC reclaimer (ksgxd) are considered 'child'
> pages of the SECS page corresponding to that enclave. The reclaimer
> only reclaims the SECS page when all its children are reclaimed. That
> can happen on system under high EPC pressure where multiple large
> enclaves demanding much more EPC page than physically available. In a
> rare case, the reclaimer may reclaim all EPC pages of an enclave and it
> SECS page, setting encl->secs.epc_page to NULL, right before the #PF
> handler get the chance to handle a #PF for that enclave. In that case,
> if that #PF happens to require kernel to invoke the EAUG instruction to
> add a new EPC page for the enclave, then a NULL pointer results as
> current code does not check if encl->secs.epc_page is NULL before using it.

Better, but that's *REALLY* verbose and really imprecise. It doesn't
_require_ "high pressure". It could literally happen at very, very low
pressures over a long period of time. Please stick to the facts and
it'll actually simplify the description.

The SECS page holds global enclave metadata. It can only be
reclaimed when there are no other enclave pages remaining. At
that point, virtually nothing can be done with the enclave until
the SECS page is paged back in.

An enclave can not run nor generate page faults without without
a resident SECS page. But it is still possible for a #PF for a
non-SECS page to race with paging out the SECS page.

Hitting this bug requires triggering that race.

> The bug is easier to reproduce with the EPC cgroup implementation when a
> low EPC limit is set for a group of enclave hosting processes. Without
> the EPC cgroup it's hard to trigger the reclaimer to reclaim all child
> pages of an SECS page. And it'd also require a machine configured with
> large RAM relative to EPC so no OOM killer triggered before this happens.

Isn't this the _normal_ case? EPC is relatively tiny compared to RAM
normally.

2023-07-18 20:44:31

[permalink] [raw]

Subject: Re: [PATCH] x86/sgx: fix a NULL pointer

On Tue, 18 Jul 2023 13:53:47 -0500, Dave Hansen <[email protected]>
wrote:

> On 7/18/23 11:11, Haitao Huang wrote:
>> On Tue, 18 Jul 2023 09:27:49 -0500, Dave Hansen <[email protected]>
>> wrote:
>>
>>> On 7/17/23 13:29, Haitao Huang wrote:
>>>> Under heavy load, the SGX EPC reclaimers (current ksgxd or future EPC
>>>> cgroup worker) may reclaim the SECS EPC page for an enclave and set
>>>> encl->secs.epc_page to NULL. But the SECS EPC page is used for EAUG in
>>>> the SGX #PF handler without checking for NULL and reloading.
>>>>
>>>> Fix this by checking if SECS is loaded before EAUG and load it if it
>>>> was
>>>> reclaimed.
>>>
>>> It would be nice to see a _bit_ more theory of the bug in here.
>>>
>>> What is an SECS page and why is it special in a reclaim context? Why
>>> is
>>> this so hard to hit? What led you to discover this issue now? What is
>>> EAUG?
>>
>> Let me know if this clarify things.
>>
>> The SECS page holds global states of an enclave, and all reclaimable
>> pages tracked by the SGX EPC reclaimer (ksgxd) are considered 'child'
>> pages of the SECS page corresponding to that enclave. The reclaimer
>> only reclaims the SECS page when all its children are reclaimed. That
>> can happen on system under high EPC pressure where multiple large
>> enclaves demanding much more EPC page than physically available. In a
>> rare case, the reclaimer may reclaim all EPC pages of an enclave and it
>> SECS page, setting encl->secs.epc_page to NULL, right before the #PF
>> handler get the chance to handle a #PF for that enclave. In that case,
>> if that #PF happens to require kernel to invoke the EAUG instruction to
>> add a new EPC page for the enclave, then a NULL pointer results as
>> current code does not check if encl->secs.epc_page is NULL before using
>> it.
>
> Better, but that's *REALLY* verbose and really imprecise. It doesn't
> _require_ "high pressure". It could literally happen at very, very low
> pressures over a long period of time.

I don't quite get this part. In low pressure scenario, the reclaimer never
need to reclaim all children of SECs. So it would not reclaim SECS no
matter how long you run?

Ignore VA pages for now. Say for a system with 10 page EPC, 2 enclaves,
each needs 5 pages non-SECS so total demand would be 12 pages. The ksgxd
would only need to swap out 2 pages at the most to get one enclave fully
loaded with 6 pages, and the other one with 4 pages. There is no chance
the ksgxd would swap any one of two SECS pages.

We would need at least one enclave A of 10 pages total to squeeze out the
other B completely. For that to happen B pretty much has to be sleeping
all the time so the LRU based reclaiming would hit it but not pages of A.
So no chance to hit #PF on pages of B still.

So some minimal pressure is needed to ensure SECS swapped. The higher the
pressure the higher the chance to hit #PF while SECS is swapped.

> Please stick to the facts and
> it'll actually simplify the description.
>
> The SECS page holds global enclave metadata. It can only be
> reclaimed when there are no other enclave pages remaining. At
> that point, virtually nothing can be done with the enclave until
> the SECS page is paged back in.
>
> An enclave can not run nor generate page faults without without
> a resident SECS page. But it is still possible for a #PF for a
> non-SECS page to race with paging out the SECS page.
>
> Hitting this bug requires triggering that race.
>
Thanks for the suggestion. I agree on those.

>> The bug is easier to reproduce with the EPC cgroup implementation when a
>> low EPC limit is set for a group of enclave hosting processes. Without
>> the EPC cgroup it's hard to trigger the reclaimer to reclaim all child
>> pages of an SECS page. And it'd also require a machine configured with
>> large RAM relative to EPC so no OOM killer triggered before this
>> happens.
>
> Isn't this the _normal_ case? EPC is relatively tiny compared to RAM
> normally.

I don't know what is perceived as normal here. But for this to happen, the
swapping space should be able to hold content much bigger than EPC, if my
reasoning above for high pressure required is correct. I tried 70
concurrent sgx selftests instances on a server with 4G EPC, 512G RAM and
no disk swapping, and encountered OOM first. Those selftests instance each
demand about 8G EPC.

Thanks
Haitao

2023-07-18 21:25:50

[permalink] [raw]

Subject: Re: [PATCH] x86/sgx: fix a NULL pointer

On 7/18/23 13:32, Haitao Huang wrote:
...
> Ignore VA pages for now. Say for a system with 10 page EPC, 2 enclaves,
> each needs 5 pages non-SECS so total demand would be 12 pages. The ksgxd
> would only need to swap out 2 pages at the most to get one enclave fully
> loaded with 6 pages, and the other one with 4 pages. There is no chance
> the ksgxd would swap any one of two SECS pages.
>
> We would need at least one enclave A of 10 pages total to squeeze out
> the other B completely. For that to happen B pretty much has to be
> sleeping all the time so the LRU based reclaiming would hit it but not
> pages of A. So no chance to hit #PF on pages of B still.
>
> So some minimal pressure is needed to ensure SECS swapped. The higher
> the pressure the higher the chance to hit #PF while SECS is swapped.

What would the second-to-last non-SECS page be? A thread control page?
VA page?

As long as *that* page can generate a page fault, then you only need two
pages for this scenario to happen:

1. Reclaimer takes encl->lock
2. #PF occurs from another thread, blocks on encl->lock
3. SECS is reclaimed
4. encl->lock released
5. #PF sees reclaimed SECS

2023-07-18 21:26:55

[permalink] [raw]

Subject: Re: [PATCH] x86/sgx: fix a NULL pointer

On Tue, 18 Jul 2023 15:56:27 -0500, Dave Hansen <[email protected]>
wrote:

> On 7/18/23 13:32, Haitao Huang wrote:
> ...
>> Ignore VA pages for now. Say for a system with 10 page EPC, 2 enclaves,
>> each needs 5 pages non-SECS so total demand would be 12 pages. The ksgxd
>> would only need to swap out 2 pages at the most to get one enclave fully
>> loaded with 6 pages, and the other one with 4 pages. There is no chance
>> the ksgxd would swap any one of two SECS pages.
>>
>> We would need at least one enclave A of 10 pages total to squeeze out
>> the other B completely. For that to happen B pretty much has to be
>> sleeping all the time so the LRU based reclaiming would hit it but not
>> pages of A. So no chance to hit #PF on pages of B still.
>>
>> So some minimal pressure is needed to ensure SECS swapped. The higher
>> the pressure the higher the chance to hit #PF while SECS is swapped.
>
> What would the second-to-last non-SECS page be? A thread control page?
> VA page?
>
> As long as *that* page can generate a page fault, then you only need two
> pages for this scenario to happen:
>
> 1. Reclaimer takes encl->lock
> 2. #PF occurs from another thread, blocks on encl->lock
> 3. SECS is reclaimed
> 4. encl->lock released
> 5. #PF sees reclaimed SECS
>
>
I agree this is the race. But for this to happen, that is at #1 you have
only one non-SECS page left so #3 can happen. That means it is already
high pressure because reclaimer has swapped all other non-SECS.
In my example of two enclaves of 5 non-EPC pages. #3 won't happen because
you don't reach #1 with only one non-SECS left.

Thanks
Haitao

2023-07-18 22:00:38

[permalink] [raw]

Subject: Re: [PATCH] x86/sgx: fix a NULL pointer

On 7/18/23 14:22, Haitao Huang wrote:
> I agree this is the race. But for this to happen, that is at #1 you have
> only one non-SECS page left so #3 can happen. That means it is already
> high pressure

I think our definitions of memory pressure differ.

Pressure is raised by allocations and dropped by reclaim. This
raise->drop cycle is (or should be) time-limited and can't take forever.
The reclaim either works in a short period of time or something dies.
If allocations are transient, pressure is transient.

Let's say a pressure blip (a one-time event) comes along and pages out
that second-to-last page. That's pretty low pressure. Years pass. The
enclave never gets run. Nothing pages the second-to-last page back in.
A second pressure blip comes along. The SECS page gets paged out.

That's two pressure blips in, say 10 years. Is that "high pressure"?

> because reclaimer has swapped all other non-SECS.
> In my example of two enclaves of 5 non-EPC pages. #3 won't happen
> because you don't reach #1 with only one non-SECS left.

2023-07-18 22:18:19

[permalink] [raw]

Subject: Re: [PATCH] x86/sgx: fix a NULL pointer

On 7/18/23 14:57, Haitao Huang wrote:
> Okay, that explains. I would consider it still triggered by high
> pressure blips ????
I'm talking about a "blip" being a single allocation. It's *LITERALLY*
the smallest (aka. lowest) possible quantum of memory pressure.

So go ahead and try to write the changelog without "high" or "low".
But, sheesh, if you and are somehow using "high" and "low" to describe
the exact same condition, I think that's a rather large communication or
understanding problem somewhere. It doesn't bode well for this simple
patch.

2023-07-18 22:19:57

[permalink] [raw]

Subject: Re: [PATCH] x86/sgx: fix a NULL pointer

On Tue, 18 Jul 2023 16:36:53 -0500, Dave Hansen <[email protected]>
wrote:

> On 7/18/23 14:22, Haitao Huang wrote:
>> I agree this is the race. But for this to happen, that is at #1 you have
>> only one non-SECS page left so #3 can happen. That means it is already
>> high pressure
>
> I think our definitions of memory pressure differ.
>
> Pressure is raised by allocations and dropped by reclaim. This
> raise->drop cycle is (or should be) time-limited and can't take forever.
> The reclaim either works in a short period of time or something dies.
> If allocations are transient, pressure is transient.
>
> Let's say a pressure blip (a one-time event) comes along and pages out
> that second-to-last page. That's pretty low pressure. Years pass. The
> enclave never gets run. Nothing pages the second-to-last page back in.
> A second pressure blip comes along. The SECS page gets paged out.
>
> That's two pressure blips in, say 10 years. Is that "high pressure"?

Okay, that explains. I would consider it still triggered by high pressure
blips :-)

But I agree we can drop the mentioning of pressure altogether and just
state the race so no confusions.
Thanks
Haitao

2023-07-19 00:01:33

[permalink] [raw]

Subject: Re: [PATCH] x86/sgx: fix a NULL pointer

On Tue, 18 Jul 2023 10:37:45 -0500, Jarkko Sakkinen <[email protected]>
wrote:

> On Mon Jul 17, 2023 at 11:29 PM EEST, Haitao Huang wrote:
>> Under heavy load, the SGX EPC reclaimers (current ksgxd or future EPC
>> cgroup worker) may reclaim the SECS EPC page for an enclave and set
>> encl->secs.epc_page to NULL. But the SECS EPC page is used for EAUG in
>> the SGX #PF handler without checking for NULL and reloading.
>>
>> Fix this by checking if SECS is loaded before EAUG and load it if it was
>> reclaimed.
>>
>> Fixes: 5a90d2c3f5ef8 ("x86/sgx: Support adding of pages to an
>> initialized enclave")
>> Cc: [email protected]
>
> Given that
>
> $ git describe --contains 5a90d2c3f5ef8
> v6.0-rc1~102^2~16
>
> You could also describe this as:
>
> Cc: [email protected] # v6.0+

Will add

>
...
>
> Reviewed-by: Jarkko Sakkinen <[email protected]>
>

Thank you.
Haitao

2023-07-19 00:51:01

[permalink] [raw]

Subject: Re: [PATCH] x86/sgx: fix a NULL pointer

On Tue, 2023-07-18 at 16:57 -0500, Haitao Huang wrote:
> On Tue, 18 Jul 2023 16:36:53 -0500, Dave Hansen <[email protected]>
> wrote:
>
> > On 7/18/23 14:22, Haitao Huang wrote:
> > > I agree this is the race. But for this to happen, that is at #1 you have
> > > only one non-SECS page left so #3 can happen. That means it is already
> > > high pressure
> >
> > I think our definitions of memory pressure differ.
> >
> > Pressure is raised by allocations and dropped by reclaim. This
> > raise->drop cycle is (or should be) time-limited and can't take forever.
> > The reclaim either works in a short period of time or something dies.
> > If allocations are transient, pressure is transient.
> >
> > Let's say a pressure blip (a one-time event) comes along and pages out
> > that second-to-last page. That's pretty low pressure. Years pass. The
> > enclave never gets run. Nothing pages the second-to-last page back in.
> > A second pressure blip comes along. The SECS page gets paged out.
> >
> > That's two pressure blips in, say 10 years. Is that "high pressure"?
>
> Okay, that explains. I would consider it still triggered by high pressure
> blips :-)
>
> But I agree we can drop the mentioning of pressure altogether and just
> state the race so no confusions.

Also perhaps the patch title is too vague. Adding more information doesn't hurt
I think, e.g., mentioning it is a fix for NULL pointer dereference in the EAUG
flow.

2023-07-19 00:51:21

[permalink] [raw]

Subject: Re: [PATCH] x86/sgx: fix a NULL pointer

On Tue, 18 Jul 2023 17:05:59 -0500, Dave Hansen <[email protected]>
wrote:

> On 7/18/23 14:57, Haitao Huang wrote:
>> Okay, that explains. I would consider it still triggered by high
>> pressure blips ????
> I'm talking about a "blip" being a single allocation. It's *LITERALLY*
> the smallest (aka. lowest) possible quantum of memory pressure.
>
> So go ahead and try to write the changelog without "high" or "low".
> But, sheesh, if you and are somehow using "high" and "low" to describe
> the exact same condition, I think that's a rather large communication or
> understanding problem somewhere. It doesn't bode well for this simple
> patch.

Sorry that did not come across right. I'll send v3 (please skip v2) with
the comments added as you suggested.

Thanks a lot for your time and the review.
BR
Haitao

2023-07-19 00:56:40

[permalink] [raw]

Subject: Re: [PATCH] x86/sgx: fix a NULL pointer

On 7/18/23 17:14, Huang, Kai wrote:
> Also perhaps the patch title is too vague. Adding more information doesn't hurt
> I think, e.g., mentioning it is a fix for NULL pointer dereference in the EAUG
> flow.

Yeah, let's say something like:

x86/sgx: Resolve SECS reclaim vs. page fault race

please

2023-07-19 14:21:24

[permalink] [raw]

Subject: Re: [PATCH] x86/sgx: fix a NULL pointer

Hi Dave and Kai
On Tue, 18 Jul 2023 19:21:54 -0500, Dave Hansen <[email protected]>
wrote:

> On 7/18/23 17:14, Huang, Kai wrote:
>> Also perhaps the patch title is too vague. Adding more information
>> doesn't hurt
>> I think, e.g., mentioning it is a fix for NULL pointer dereference in
>> the EAUG
>> flow.
>
> Yeah, let's say something like:
>
> x86/sgx: Resolve SECS reclaim vs. page fault race
>
The patch is not to resolve SECS vs #PF race though the race is a
necessary condition to cause the NULL pointer. The same condition does not
cause NULL pointer in the ELDU path of #PF, only in EAUG path of #PF.

And the issue really is the NULL pointer not checked and fix was to reuse
the same code to reload SECS in ELDU code path for EAUG code path

How about this:

x86/sgx: Reload reclaimed SECS for EAUG on #PF

or

x86/sgx: Fix a NULL pointer to SECS used for EAUG on #PF

BR
Haitao

2023-07-21 01:15:25

[permalink] [raw]

Subject: Re: [PATCH] x86/sgx: fix a NULL pointer

On Wed, 2023-07-19 at 08:53 -0500, Haitao Huang wrote:
> Hi Dave and Kai
> On Tue, 18 Jul 2023 19:21:54 -0500, Dave Hansen <[email protected]>
> wrote:
>
> > On 7/18/23 17:14, Huang, Kai wrote:
> > > Also perhaps the patch title is too vague. Adding more information
> > > doesn't hurt
> > > I think, e.g., mentioning it is a fix for NULL pointer dereference in
> > > the EAUG
> > > flow.
> >
> > Yeah, let's say something like:
> >
> > x86/sgx: Resolve SECS reclaim vs. page fault race
> >
> The patch is not to resolve SECS vs #PF race though the race is a
> necessary condition to cause the NULL pointer. The same condition does not
> cause NULL pointer in the ELDU path of #PF, only in EAUG path of #PF.
>
> And the issue really is the NULL pointer not checked and fix was to reuse
> the same code to reload SECS in ELDU code path for EAUG code path
>
>
> How about this:
>
> x86/sgx: Reload reclaimed SECS for EAUG on #PF
>
> or
>
> x86/sgx: Fix a NULL pointer to SECS used for EAUG on #PF
>

Perhaps you can add "EAUG" part to what Dave suggested?

x86/sgx: Resolves SECS reclaim vs. page fault race on EAUG

(assuming Dave is fine with this :-))

2023-07-21 01:57:16

[permalink] [raw]

Subject: Re: [PATCH] x86/sgx: fix a NULL pointer

On Fri, 2023-07-21 at 00:32 +0000, Huang, Kai wrote:
> On Wed, 2023-07-19 at 08:53 -0500, Haitao Huang wrote:
> > Hi Dave and Kai
> > On Tue, 18 Jul 2023 19:21:54 -0500, Dave Hansen <[email protected]>
> > wrote:
> >
> > > On 7/18/23 17:14, Huang, Kai wrote:
> > > > Also perhaps the patch title is too vague. Adding more information
> > > > doesn't hurt
> > > > I think, e.g., mentioning it is a fix for NULL pointer dereference in
> > > > the EAUG
> > > > flow.
> > >
> > > Yeah, let's say something like:
> > >
> > > x86/sgx: Resolve SECS reclaim vs. page fault race
> > >
> > The patch is not to resolve SECS vs #PF race though the race is a
> > necessary condition to cause the NULL pointer. The same condition does not
> > cause NULL pointer in the ELDU path of #PF, only in EAUG path of #PF.
> >
> > And the issue really is the NULL pointer not checked and fix was to reuse
> > the same code to reload SECS in ELDU code path for EAUG code path
> >
> >
> > How about this:
> >
> > x86/sgx: Reload reclaimed SECS for EAUG on #PF
> >
> > or
> >
> > x86/sgx: Fix a NULL pointer to SECS used for EAUG on #PF
> >
>
> Perhaps you can add "EAUG" part to what Dave suggested?
>
> x86/sgx: Resolves SECS reclaim vs. page fault race on EAUG
>
> (assuming Dave is fine with this :-))

Btw, do you have a real call trace? If you have, I think you can add that to
the changelog too because that catches people's eye immediately.

2023-07-26 17:32:53

[permalink] [raw]

Subject: Re: [PATCH] x86/sgx: fix a NULL pointer

On Thu, 20 Jul 2023 19:52:22 -0500, Huang, Kai <[email protected]> wrote:

> On Fri, 2023-07-21 at 00:32 +0000, Huang, Kai wrote:
>> On Wed, 2023-07-19 at 08:53 -0500, Haitao Huang wrote:
>> > Hi Dave and Kai
>> > On Tue, 18 Jul 2023 19:21:54 -0500, Dave Hansen
>> <[email protected]>
>> > wrote:
>> >
>> > > On 7/18/23 17:14, Huang, Kai wrote:
>> > > > Also perhaps the patch title is too vague. Adding more
>> information
>> > > > doesn't hurt
>> > > > I think, e.g., mentioning it is a fix for NULL pointer
>> dereference in
>> > > > the EAUG
>> > > > flow.
>> > >
>> > > Yeah, let's say something like:
>> > >
>> > > x86/sgx: Resolve SECS reclaim vs. page fault race
>> > >
>> > The patch is not to resolve SECS vs #PF race though the race is a
>> > necessary condition to cause the NULL pointer. The same condition
>> does not
>> > cause NULL pointer in the ELDU path of #PF, only in EAUG path of #PF.
>> >
>> > And the issue really is the NULL pointer not checked and fix was to
>> reuse
>> > the same code to reload SECS in ELDU code path for EAUG code path
>> >
>> >
>> > How about this:
>> >
>> > x86/sgx: Reload reclaimed SECS for EAUG on #PF
>> >
>> > or
>> >
>> > x86/sgx: Fix a NULL pointer to SECS used for EAUG on #PF
>> >
>>
>> Perhaps you can add "EAUG" part to what Dave suggested?
>>
>> x86/sgx: Resolves SECS reclaim vs. page fault race on EAUG
>>
>> (assuming Dave is fine with this :-))
Sure, I can use this too.

> Btw, do you have a real call trace? If you have, I think you can add
> that to
> the changelog too because that catches people's eye immediately.

Previously I was not able to reproduce without SGX cgroup patches. Now I
managed to get a trace with a QEMU setup with small EPC (8M), large RAM
(128G) and 128 vCPUs:

[ 1682.914263] BUG: kernel NULL pointer dereference, address:
0000000000000000
[ 1682.922966] #PF: supervisor read access in kernel mode
[ 1682.929115] #PF: error_code(0x0000) - not-present page
[ 1682.935264] PGD 0 P4D 0
[ 1682.938383] Oops: 0000 [#1] PREEMPT SMP NOPTI
[ 1682.943620] CPU: 43 PID: 2681 Comm: test_sgx Not tainted
6.3.0-rc4sgxcet #12
[ 1682.951989] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
rel-1.16.2-0-gea1b7a073390-prebuilt.qemu.org 04/01/2014
[ 1682.965504] RIP: 0010:sgx_encl_eaug_page+0xc7/0x210
[ 1682.971359] Code: 25 49 8b 96 98 04 00 00 48 8d 40 48 48 89 42 08 48 89
56 48 49 8d 96 98 04 00 00 48 89 56 50 49 89 86 98 04 00 00 49 8b 46 60
<8b> 10 48 c1 e2 05 488
[ 1682.993330] RSP: 0000:ffffb2e64725bc00 EFLAGS: 00010246
[ 1682.999585] RAX: 0000000000000000 RBX: ffff987e5abac428 RCX:
0000000000000000
[ 1683.008059] RDX: 0000000000000001 RSI: 0000000000000000 RDI:
ffff987e61aee000
[ 1683.016533] RBP: ffffb2e64725bcf0 R08: 0000000000000000 R09:
ffffb2e64725bb58
[ 1683.025008] R10: 0000000000000000 R11: 00007f3f5c418fff R12:
ffff987e61aee020
[ 1683.033479] R13: ffff987e505bc080 R14: ffff987e61aee000 R15:
ffffb2e6420fcb20
[ 1683.041949] FS: 00007f3f5cb48740(0000) GS:ffff989cfe8c0000(0000)
knlGS:0000000000000000
[ 1683.051540] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 1683.058478] CR2: 0000000000000000 CR3: 0000000115896002 CR4:
0000000000770ee0
[ 1683.067018] DR0: 0000000000000000 DR1: 0000000000000000 DR2:
0000000000000000
[ 1683.075539] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7:
0000000000000400
[ 1683.084085] PKRU: 55555554
[ 1683.087465] Call Trace:
[ 1683.090547] <TASK>
[ 1683.093220] ? __kmem_cache_alloc_node+0x16a/0x440
[ 1683.099034] ? xa_load+0x6e/0xa0
[ 1683.103038] sgx_vma_fault+0x119/0x230
[ 1683.107630] __do_fault+0x36/0x140
[ 1683.111828] do_fault+0x12f/0x400
[ 1683.115928] __handle_mm_fault+0x728/0x1110
[ 1683.121050] handle_mm_fault+0x105/0x310
[ 1683.125850] do_user_addr_fault+0x1ee/0x750
[ 1683.130957] ? __this_cpu_preempt_check+0x13/0x20
[ 1683.136667] exc_page_fault+0x76/0x180
[ 1683.141265] asm_exc_page_fault+0x27/0x30
[ 1683.146160] RIP: 0033:0x7ffc6496beea
[ 1683.150563] Code: 43 48 8b 4d 10 48 c7 c3 28 00 00 00 48 83 3c 19 00 75
31 48 83 c3 08 48 81 fb 00 01 00 00 75 ec 48 8b 19 48 8d 0d 00 00 00 00
<0f> 01 d7 48 8b 5d 101
[ 1683.172773] RSP: 002b:00007ffc64935b68 EFLAGS: 00000202
[ 1683.179138] RAX: 0000000000000003 RBX: 00007f3800000000 RCX:
00007ffc6496beea
[ 1683.187675] RDX: 0000000000000000 RSI: 0000000000000000 RDI:
0000000000000000
[ 1683.196200] RBP: 00007ffc64935b70 R08: 0000000000000000 R09:
0000000000000000
[ 1683.204724] R10: 0000000000000000 R11: 0000000000000000 R12:
0000000000000000
[ 1683.213310] R13: 0000000000000000 R14: 0000000000000000 R15:
0000000000000000
[ 1683.221850] </TASK>
[ 1683.224636] Modules linked in: isofs intel_rapl_msr intel_rapl_common
binfmt_misc kvm_intel nls_iso8859_1 kvm ppdev irqbypass input_leds
parport_pc joydev parport rapi
[ 1683.291173] CR2: 0000000000000000
[ 1683.295271] ---[ end trace 0000000000000000 ]---

I'll add this to the commit as well.

Thanks
Haitao

2023-07-26 21:45:21

[permalink] [raw]

Subject: [PATCH v4] x86/sgx: Resolves SECS reclaim vs. page fault for EAUG race

Under heavy load, the SGX EPC reclaimer (ksgxd) may reclaim the SECS EPC
page for an enclave and set encl->secs.epc_page to NULL. But the SECS
EPC page is used for EAUG in the SGX page fault handler without checking
for NULL and reloading.

Fix this by checking if SECS is loaded before EAUG and loading it if it
was reclaimed.

The SECS page holds global enclave metadata. It can only be reclaimed
when there are no other enclave pages remaining. At that point,
virtually nothing can be done with the enclave until the SECS page is
paged back in.

An enclave can not run nor generate page faults without a resident SECS
page. But it is still possible for a #PF for a non-SECS page to race
with paging out the SECS page: when the last resident non-SECS page A
triggers a #PF in a non-resident page B, and then page A and the SECS
both are paged out before the #PF on B is handled.

Hitting this bug requires that race triggered with a #PF for EAUG.
Following is a trace when it happens.

[ 1682.914263] BUG: kernel NULL pointer dereference, address: 0000000000000000
[ 1682.922966] #PF: supervisor read access in kernel mode
[ 1682.929115] #PF: error_code(0x0000) - not-present page
[ 1682.935264] PGD 0 P4D 0
[ 1682.938383] Oops: 0000 [#1] PREEMPT SMP NOPTI
[ 1682.943620] CPU: 43 PID: 2681 Comm: test_sgx Not tainted 6.3.0-rc4sgxcet #12
[ 1682.951989] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.2-0-gea1b7a073390-prebuilt.qemu.org 04/01/2014
[ 1682.965504] RIP: 0010:sgx_encl_eaug_page+0xc7/0x210
[ 1682.971359] Code: 25 49 8b 96 98 04 00 00 48 8d 40 48 48 89 42 08 48 89 56 48 49 8d 96 98 04 00 00 48 89 56 50 49 89 86 98 04 00 00 49 8b 46 60 <8b> 10 48 c1 e2 05 488
[ 1682.993330] RSP: 0000:ffffb2e64725bc00 EFLAGS: 00010246
[ 1682.999585] RAX: 0000000000000000 RBX: ffff987e5abac428 RCX: 0000000000000000
[ 1683.008059] RDX: 0000000000000001 RSI: 0000000000000000 RDI: ffff987e61aee000
[ 1683.016533] RBP: ffffb2e64725bcf0 R08: 0000000000000000 R09: ffffb2e64725bb58
[ 1683.025008] R10: 0000000000000000 R11: 00007f3f5c418fff R12: ffff987e61aee020
[ 1683.033479] R13: ffff987e505bc080 R14: ffff987e61aee000 R15: ffffb2e6420fcb20
[ 1683.041949] FS: 00007f3f5cb48740(0000) GS:ffff989cfe8c0000(0000) knlGS:0000000000000000
[ 1683.051540] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 1683.058478] CR2: 0000000000000000 CR3: 0000000115896002 CR4: 0000000000770ee0
[ 1683.067018] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 1683.075539] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 1683.084085] PKRU: 55555554
[ 1683.087465] Call Trace:
[ 1683.090547] <TASK>
[ 1683.093220] ? __kmem_cache_alloc_node+0x16a/0x440
[ 1683.099034] ? xa_load+0x6e/0xa0
[ 1683.103038] sgx_vma_fault+0x119/0x230
[ 1683.107630] __do_fault+0x36/0x140
[ 1683.111828] do_fault+0x12f/0x400
[ 1683.115928] __handle_mm_fault+0x728/0x1110
[ 1683.121050] handle_mm_fault+0x105/0x310
[ 1683.125850] do_user_addr_fault+0x1ee/0x750
[ 1683.130957] ? __this_cpu_preempt_check+0x13/0x20
[ 1683.136667] exc_page_fault+0x76/0x180
[ 1683.141265] asm_exc_page_fault+0x27/0x30
[ 1683.146160] RIP: 0033:0x7ffc6496beea
[ 1683.150563] Code: 43 48 8b 4d 10 48 c7 c3 28 00 00 00 48 83 3c 19 00 75 31 48 83 c3 08 48 81 fb 00 01 00 00 75 ec 48 8b 19 48 8d 0d 00 00 00 00 <0f> 01 d7 48 8b 5d 101
[ 1683.172773] RSP: 002b:00007ffc64935b68 EFLAGS: 00000202
[ 1683.179138] RAX: 0000000000000003 RBX: 00007f3800000000 RCX: 00007ffc6496beea
[ 1683.187675] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
[ 1683.196200] RBP: 00007ffc64935b70 R08: 0000000000000000 R09: 0000000000000000
[ 1683.204724] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
[ 1683.213310] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
[ 1683.221850] </TASK>
[ 1683.224636] Modules linked in: isofs intel_rapl_msr intel_rapl_common binfmt_misc kvm_intel nls_iso8859_1 kvm ppdev irqbypass input_leds parport_pc joydev parport rapi
[ 1683.291173] CR2: 0000000000000000
[ 1683.295271] ---[ end trace 0000000000000000 ]---

Fixes: 5a90d2c3f5ef ("x86/sgx: Support adding of pages to an initialized enclave")
Cc: [email protected] # v6.0+
Signed-off-by: Haitao Huang <[email protected]>
Reviewed-by: Jarkko Sakkinen <[email protected]>
---
v4:
- Refined the title (Kai, Dave)
- Added a trace to commit meesage (Kai)
- Added a few details for the race.

v3:
- Added comments on sgx_encl_load_secs(). (Dave)
- Added theory of the race condition to hit the bug. (Dave)
- Added Reviewed-by, and applicable stable release. (Jarkko)

v2:
- Fixes for style, commit message (Jarkko, Kai)
- Removed unneeded WARN_ON (Kai)
---
arch/x86/kernel/cpu/sgx/encl.c | 30 +++++++++++++++++++++++++-----
1 file changed, 25 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
index 91fa70e51004..279148e72459 100644
--- a/arch/x86/kernel/cpu/sgx/encl.c
+++ b/arch/x86/kernel/cpu/sgx/encl.c
@@ -235,6 +235,21 @@ static struct sgx_epc_page *sgx_encl_eldu(struct sgx_encl_page *encl_page,
return epc_page;
}

+/*
+ * Ensure the SECS page is not swapped out. Must be called with encl->lock
+ * to protect the enclave states including SECS and ensure the SECS page is
+ * not swapped out again while being used.
+ */
+static struct sgx_epc_page *sgx_encl_load_secs(struct sgx_encl *encl)
+{
+ struct sgx_epc_page *epc_page = encl->secs.epc_page;
+
+ if (!epc_page)
+ epc_page = sgx_encl_eldu(&encl->secs, NULL);
+
+ return epc_page;
+}
+
static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl,
struct sgx_encl_page *entry)
{
@@ -248,11 +263,9 @@ static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl,
return entry;
}

- if (!(encl->secs.epc_page)) {
- epc_page = sgx_encl_eldu(&encl->secs, NULL);
- if (IS_ERR(epc_page))
- return ERR_CAST(epc_page);
- }
+ epc_page = sgx_encl_load_secs(encl);
+ if (IS_ERR(epc_page))
+ return ERR_CAST(epc_page);

epc_page = sgx_encl_eldu(entry, encl->secs.epc_page);
if (IS_ERR(epc_page))
@@ -339,6 +352,13 @@ static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma,

mutex_lock(&encl->lock);

+ epc_page = sgx_encl_load_secs(encl);
+ if (IS_ERR(epc_page)) {
+ if (PTR_ERR(epc_page) == -EBUSY)
+ vmret = VM_FAULT_NOPAGE;
+ goto err_out_unlock;
+ }
+
epc_page = sgx_alloc_epc_page(encl_page, false);
if (IS_ERR(epc_page)) {
if (PTR_ERR(epc_page) == -EBUSY)

base-commit: 6eaae198076080886b9e7d57f4ae06fa782f90ef
--
2.25.1

2023-07-26 22:45:01

by Reinette Chatre

[permalink] [raw]

Subject: Re: [PATCH v4] x86/sgx: Resolves SECS reclaim vs. page fault for EAUG race

Hi Haitao,

On 7/26/2023 1:50 PM, Haitao Huang wrote:
> Under heavy load, the SGX EPC reclaimer (ksgxd) may reclaim the SECS EPC
> page for an enclave and set encl->secs.epc_page to NULL. But the SECS
> EPC page is used for EAUG in the SGX page fault handler without checking
> for NULL and reloading.
>
> Fix this by checking if SECS is loaded before EAUG and loading it if it
> was reclaimed.
>
> The SECS page holds global enclave metadata. It can only be reclaimed
> when there are no other enclave pages remaining. At that point,
> virtually nothing can be done with the enclave until the SECS page is
> paged back in.
>
> An enclave can not run nor generate page faults without a resident SECS
> page. But it is still possible for a #PF for a non-SECS page to race
> with paging out the SECS page: when the last resident non-SECS page A
> triggers a #PF in a non-resident page B, and then page A and the SECS
> both are paged out before the #PF on B is handled.
>
> Hitting this bug requires that race triggered with a #PF for EAUG.
> Following is a trace when it happens.
>

Thank you very much for finding this issue as well as providing
a fix.

> [ 1682.914263] BUG: kernel NULL pointer dereference, address: 0000000000000000
> [ 1682.922966] #PF: supervisor read access in kernel mode
> [ 1682.929115] #PF: error_code(0x0000) - not-present page
> [ 1682.935264] PGD 0 P4D 0
> [ 1682.938383] Oops: 0000 [#1] PREEMPT SMP NOPTI
> [ 1682.943620] CPU: 43 PID: 2681 Comm: test_sgx Not tainted 6.3.0-rc4sgxcet #12
> [ 1682.951989] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.2-0-gea1b7a073390-prebuilt.qemu.org 04/01/2014
> [ 1682.965504] RIP: 0010:sgx_encl_eaug_page+0xc7/0x210
> [ 1682.971359] Code: 25 49 8b 96 98 04 00 00 48 8d 40 48 48 89 42 08 48 89 56 48 49 8d 96 98 04 00 00 48 89 56 50 49 89 86 98 04 00 00 49 8b 46 60 <8b> 10 48 c1 e2 05 488
> [ 1682.993330] RSP: 0000:ffffb2e64725bc00 EFLAGS: 00010246
> [ 1682.999585] RAX: 0000000000000000 RBX: ffff987e5abac428 RCX: 0000000000000000
> [ 1683.008059] RDX: 0000000000000001 RSI: 0000000000000000 RDI: ffff987e61aee000
> [ 1683.016533] RBP: ffffb2e64725bcf0 R08: 0000000000000000 R09: ffffb2e64725bb58
> [ 1683.025008] R10: 0000000000000000 R11: 00007f3f5c418fff R12: ffff987e61aee020
> [ 1683.033479] R13: ffff987e505bc080 R14: ffff987e61aee000 R15: ffffb2e6420fcb20
> [ 1683.041949] FS: 00007f3f5cb48740(0000) GS:ffff989cfe8c0000(0000) knlGS:0000000000000000
> [ 1683.051540] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 1683.058478] CR2: 0000000000000000 CR3: 0000000115896002 CR4: 0000000000770ee0
> [ 1683.067018] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> [ 1683.075539] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> [ 1683.084085] PKRU: 55555554
> [ 1683.087465] Call Trace:
> [ 1683.090547] <TASK>
> [ 1683.093220] ? __kmem_cache_alloc_node+0x16a/0x440
> [ 1683.099034] ? xa_load+0x6e/0xa0
> [ 1683.103038] sgx_vma_fault+0x119/0x230
> [ 1683.107630] __do_fault+0x36/0x140
> [ 1683.111828] do_fault+0x12f/0x400
> [ 1683.115928] __handle_mm_fault+0x728/0x1110
> [ 1683.121050] handle_mm_fault+0x105/0x310
> [ 1683.125850] do_user_addr_fault+0x1ee/0x750
> [ 1683.130957] ? __this_cpu_preempt_check+0x13/0x20
> [ 1683.136667] exc_page_fault+0x76/0x180
> [ 1683.141265] asm_exc_page_fault+0x27/0x30
> [ 1683.146160] RIP: 0033:0x7ffc6496beea
> [ 1683.150563] Code: 43 48 8b 4d 10 48 c7 c3 28 00 00 00 48 83 3c 19 00 75 31 48 83 c3 08 48 81 fb 00 01 00 00 75 ec 48 8b 19 48 8d 0d 00 00 00 00 <0f> 01 d7 48 8b 5d 101
> [ 1683.172773] RSP: 002b:00007ffc64935b68 EFLAGS: 00000202
> [ 1683.179138] RAX: 0000000000000003 RBX: 00007f3800000000 RCX: 00007ffc6496beea
> [ 1683.187675] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
> [ 1683.196200] RBP: 00007ffc64935b70 R08: 0000000000000000 R09: 0000000000000000
> [ 1683.204724] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
> [ 1683.213310] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
> [ 1683.221850] </TASK>
> [ 1683.224636] Modules linked in: isofs intel_rapl_msr intel_rapl_common binfmt_misc kvm_intel nls_iso8859_1 kvm ppdev irqbypass input_leds parport_pc joydev parport rapi
> [ 1683.291173] CR2: 0000000000000000
> [ 1683.295271] ---[ end trace 0000000000000000 ]---

Could you please trim this trace? There is more detail in
Documentation/process/submitting-patches.rst (search for
"Backtraces in commit messages"), but the ideal trace should
have just the information needed to describe the issue (no
timestamps, register dumps, etc.).

With that addressed, feel free to add:

Acked-by: Reinette Chatre <[email protected]>

Reinette

2023-07-27 01:17:16

[permalink] [raw]

Subject: [PATCH v5] x86/sgx: Resolves SECS reclaim vs. page fault for EAUG race

2023-07-27 03:05:55

[permalink] [raw]

Subject: Re: [PATCH v5] x86/sgx: Resolves SECS reclaim vs. page fault for EAUG race

On Wed, 2023-07-26 at 18:02 -0700, Haitao Huang wrote:
> Under heavy load, the SGX EPC reclaimer (ksgxd) may reclaim the SECS EPC

If I read correctly, Dave suggested to not use "high" (heavy in this sentence)
or "low" pressure:

https://lore.kernel.org/lkml/[email protected]/T/#m9120eac6a4a94daa7c9fcc47709f241cd181e5dc

And I agree. For instance, consider this happens to one extremely "small"
enclave, while there's a new "big" enclave starts to run. I don't think we
should say this is "under heavy load". Just stick to the fact that the
reclaimer may reclaim the SECS page.

> page for an enclave and set encl->secs.epc_page to NULL. But the SECS
> EPC page is used for EAUG in the SGX page fault handler without checking
> for NULL and reloading.
>
> Fix this by checking if SECS is loaded before EAUG and loading it if it
> was reclaimed.
>
> The SECS page holds global enclave metadata. It can only be reclaimed
> when there are no other enclave pages remaining. At that point,
> virtually nothing can be done with the enclave until the SECS page is
> paged back in.
>
> An enclave can not run nor generate page faults without a resident SECS
> page.
>

I am not sure whether "nor generate page faults without a resident SECS page" is
accurate? When SECS is swapped out, I suppose the first EENTER should trigger a
#PF on the TSC page and in the #PF handler the SECS will be swapped in first.

I guess you can just remove this sentence?

> But it is still possible for a #PF for a non-SECS page to race
> with paging out the SECS page: when the last resident non-SECS page A
> triggers a #PF in a non-resident page B, and then page A and the SECS
> both are paged out before the #PF on B is handled.
>
> Hitting this bug requires that race triggered with a #PF for EAUG.

The above race can happen for the normal ELDU path too, thus I suppose it will
be better to mention why the normal ELDU path doesn't have this issue: it
already does what this fix does.

> Following is a trace when it happens.
>
> BUG: kernel NULL pointer dereference, address: 0000000000000000
> RIP: 0010:sgx_encl_eaug_page+0xc7/0x210
> Call Trace:
> ? __kmem_cache_alloc_node+0x16a/0x440
> ? xa_load+0x6e/0xa0
> sgx_vma_fault+0x119/0x230
> __do_fault+0x36/0x140
> do_fault+0x12f/0x400
> __handle_mm_fault+0x728/0x1110
> handle_mm_fault+0x105/0x310
> do_user_addr_fault+0x1ee/0x750
> ? __this_cpu_preempt_check+0x13/0x20
> exc_page_fault+0x76/0x180
> asm_exc_page_fault+0x27/0x30
>
> Fixes: 5a90d2c3f5ef ("x86/sgx: Support adding of pages to an initialized enclave")
> Cc: [email protected] # v6.0+
> Signed-off-by: Haitao Huang <[email protected]>
> Reviewed-by: Jarkko Sakkinen <[email protected]>
> Acked-by: Reinette Chatre <[email protected]>

With above addressed, feel free to add:

Reviewed-by: Kai Huang <[email protected]>

2023-07-27 03:37:21

[permalink] [raw]

Subject: Re: [PATCH v5] x86/sgx: Resolves SECS reclaim vs. page fault for EAUG race

On Thu, 2023-07-27 at 02:50 +0000, Huang, Kai wrote:
> > An enclave can not run nor generate page faults without a resident SECS
> > page.
> >
>
> I am not sure whether "nor generate page faults without a resident SECS page" is
> accurate? When SECS is swapped out, I suppose the first EENTER should trigger a
> #PF on the TSC page and in the #PF handler the SECS will be swapped in first.
>
> I guess you can just remove this sentence?

Hmm.. Probably I should interpret this sentence as the enclave "code" itself
cannot generate page faults without a resident SECS. This is true. So feel
free to ignore this comment.

2023-07-27 14:58:32

[permalink] [raw]

Subject: Re: [PATCH v5] x86/sgx: Resolves SECS reclaim vs. page fault for EAUG race

On Wed, 26 Jul 2023 21:50:02 -0500, Huang, Kai <[email protected]> wrote:

> On Wed, 2023-07-26 at 18:02 -0700, Haitao Huang wrote:
>> Under heavy load, the SGX EPC reclaimer (ksgxd) may reclaim the SECS EPC
>
> If I read correctly, Dave suggested to not use "high" (heavy in this
> sentence)
> or "low" pressure:
>
> https://lore.kernel.org/lkml/[email protected]/T/#m9120eac6a4a94daa7c9fcc47709f241cd181e5dc
>
> And I agree. For instance, consider this happens to one extremely
> "small"
> enclave, while there's a new "big" enclave starts to run. I don't think
> we
> should say this is "under heavy load". Just stick to the fact that the
> reclaimer may reclaim the SECS page.
>
Mybe I have some confusion here but I did not think Dave had issues with
'heavy load'. When this happens, the last page causing #PF (page A below)
should be the the "youngest" in PTE and it got paged out together with the
SECS before the #PF is even handled. Based on that the ksgxd moves 'young'
pages to the back of the queue for reclaiming, for that to happen, almost
all EPC pages must be paged out for all enclaves at that time, so it means
heavy load to me. And that's also consistent with my tests.

>> page for an enclave and set encl->secs.epc_page to NULL. But the SECS
>> EPC page is used for EAUG in the SGX page fault handler without checking
>> for NULL and reloading.
>>
>> Fix this by checking if SECS is loaded before EAUG and loading it if it
>> was reclaimed.
>>
>> The SECS page holds global enclave metadata. It can only be reclaimed
>> when there are no other enclave pages remaining. At that point,
>> virtually nothing can be done with the enclave until the SECS page is
>> paged back in.
...
>> But it is still possible for a #PF for a non-SECS page to race
>> with paging out the SECS page: when the last resident non-SECS page A
>> triggers a #PF in a non-resident page B, and then page A and the SECS
>> both are paged out before the #PF on B is handled.
>>
>> Hitting this bug requires that race triggered with a #PF for EAUG.
>
> The above race can happen for the normal ELDU path too, thus I suppose
> it will
> be better to mention why the normal ELDU path doesn't have this issue: it
> already does what this fix does.
>
Should we focus on the bug and fix itself instead of explaining a non-bug
case?
And the simple changes in this patch clearly show that too if people look
for that.

Thanks
Haitao

2023-07-27 23:51:27

[permalink] [raw]

Subject: Re: [PATCH v5] x86/sgx: Resolves SECS reclaim vs. page fault for EAUG race

On Thu, 2023-07-27 at 09:16 -0500, Haitao Huang wrote:
> On Wed, 26 Jul 2023 21:50:02 -0500, Huang, Kai <[email protected]> wrote:
>
> > On Wed, 2023-07-26 at 18:02 -0700, Haitao Huang wrote:
> > > Under heavy load, the SGX EPC reclaimer (ksgxd) may reclaim the SECS EPC
> >
> > If I read correctly, Dave suggested to not use "high" (heavy in this
> > sentence)
> > or "low" pressure:
> >
> > https://lore.kernel.org/lkml/[email protected]/T/#m9120eac6a4a94daa7c9fcc47709f241cd181e5dc
> >
> > And I agree. For instance, consider this happens to one extremely
> > "small"
> > enclave, while there's a new "big" enclave starts to run. I don't think
> > we
> > should say this is "under heavy load". Just stick to the fact that the
> > reclaimer may reclaim the SECS page.
> >
> Mybe I have some confusion here but I did not think Dave had issues with
> 'heavy load'. When this happens, the last page causing #PF (page A below)
> should be the the "youngest" in PTE and it got paged out together with the
> SECS before the #PF is even handled. Based on that the ksgxd moves 'young'
> pages to the back of the queue for reclaiming, for that to happen, almost
> all EPC pages must be paged out for all enclaves at that time, so it means
> heavy load to me. And that's also consistent with my tests.

I already provided an example: swapping out an "extreme small" enclave.

Anyway, no big deal to me.

>
> > > page for an enclave and set encl->secs.epc_page to NULL. But the SECS
> > > EPC page is used for EAUG in the SGX page fault handler without checking
> > > for NULL and reloading.
> > >
> > > Fix this by checking if SECS is loaded before EAUG and loading it if it
> > > was reclaimed.
> > >
> > > The SECS page holds global enclave metadata. It can only be reclaimed
> > > when there are no other enclave pages remaining. At that point,
> > > virtually nothing can be done with the enclave until the SECS page is
> > > paged back in.
> ...
> > > But it is still possible for a #PF for a non-SECS page to race
> > > with paging out the SECS page: when the last resident non-SECS page A
> > > triggers a #PF in a non-resident page B, and then page A and the SECS
> > > both are paged out before the #PF on B is handled.
> > >
> > > Hitting this bug requires that race triggered with a #PF for EAUG.
> >
> > The above race can happen for the normal ELDU path too, thus I suppose
> > it will
> > be better to mention why the normal ELDU path doesn't have this issue: it
> > already does what this fix does.
> >
> Should we focus on the bug and fix itself instead of explaining a non-bug
> case?
> And the simple changes in this patch clearly show that too if people look
> for that.

So you spent a lot of text explaining the race condition, but such race
condition applies to both ELDU and EAUG. I personally went to see the code
whether ELDU has such issue too, and it turned out only EAUG has issue. If you
mention this in the changelog perhaps I wouldn't need to go to read the code.

Anyway, just my 2cents.

And I don't want to let those block this patch, so feel free to add my tag:

Reviewed-by: Kai Huang <[email protected]>

2023-07-28 06:05:24

[permalink] [raw]

Subject: [PATCH v6] x86/sgx: Resolves SECS reclaim vs. page fault for EAUG race

The SGX EPC reclaimer (ksgxd) may reclaim the SECS EPC page for an
enclave and set secs.epc_page to NULL. The SECS page is used for EAUG
and ELDU in the SGX page fault handler. However, the NULL check for
secs.epc_page is only done for ELDU, not EAUG before being used.

Fix this by doing the same NULL check and reloading of the SECS page as
needed for both EAUG and ELDU.

The SECS page holds global enclave metadata. It can only be reclaimed
when there are no other enclave pages remaining. At that point,
virtually nothing can be done with the enclave until the SECS page is
paged back in.

An enclave can not run nor generate page faults without a resident SECS
page. But it is still possible for a #PF for a non-SECS page to race
with paging out the SECS page: when the last resident non-SECS page A
triggers a #PF in a non-resident page B, and then page A and the SECS
both are paged out before the #PF on B is handled.

Hitting this bug requires that race triggered with a #PF for EAUG.
Following is a trace when it happens.

BUG: kernel NULL pointer dereference, address: 0000000000000000
RIP: 0010:sgx_encl_eaug_page+0xc7/0x210
Call Trace:
? __kmem_cache_alloc_node+0x16a/0x440
? xa_load+0x6e/0xa0
sgx_vma_fault+0x119/0x230
__do_fault+0x36/0x140
do_fault+0x12f/0x400
__handle_mm_fault+0x728/0x1110
handle_mm_fault+0x105/0x310
do_user_addr_fault+0x1ee/0x750
? __this_cpu_preempt_check+0x13/0x20
exc_page_fault+0x76/0x180
asm_exc_page_fault+0x27/0x30

Fixes: 5a90d2c3f5ef ("x86/sgx: Support adding of pages to an initialized enclave")
Cc: [email protected] # v6.0+
Signed-off-by: Haitao Huang <[email protected]>
Reviewed-by: Jarkko Sakkinen <[email protected]>
Reviewed-by: Kai Huang <[email protected]>
Acked-by: Reinette Chatre <[email protected]>
---
v6:
- Removed 'Under heavy load' as it is not the required condition though
it makes the bug more likely happen. (Kai)
- Added mentioning of the NULL check and reloading already done for ELDU (Kai)
- Added Reviewed-by (Kai)

v5:
- Trimmed trace and added Acked-by (Reinette)

v4:
- Refined the title (Kai, Dave)
- Added a trace to commit meesage (Kai)
- Added a few details for the race.

v3:
- Added comments on sgx_encl_load_secs(). (Dave)
- Added theory of the race condition to hit the bug. (Dave)
- Added Reviewed-by, and applicable stable release. (Jarkko)

v2:
- Fixes for style, commit message (Jarkko, Kai)
- Removed unneeded WARN_ON (Kai)
---
arch/x86/kernel/cpu/sgx/encl.c | 30 +++++++++++++++++++++++++-----
1 file changed, 25 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
index 91fa70e51004..279148e72459 100644
--- a/arch/x86/kernel/cpu/sgx/encl.c
+++ b/arch/x86/kernel/cpu/sgx/encl.c
@@ -235,6 +235,21 @@ static struct sgx_epc_page *sgx_encl_eldu(struct sgx_encl_page *encl_page,
return epc_page;
}

+/*
+ * Ensure the SECS page is not swapped out. Must be called with encl->lock
+ * to protect the enclave states including SECS and ensure the SECS page is
+ * not swapped out again while being used.
+ */
+static struct sgx_epc_page *sgx_encl_load_secs(struct sgx_encl *encl)
+{
+ struct sgx_epc_page *epc_page = encl->secs.epc_page;
+
+ if (!epc_page)
+ epc_page = sgx_encl_eldu(&encl->secs, NULL);
+
+ return epc_page;
+}
+
static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl,
struct sgx_encl_page *entry)
{
@@ -248,11 +263,9 @@ static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl,
return entry;
}

- if (!(encl->secs.epc_page)) {
- epc_page = sgx_encl_eldu(&encl->secs, NULL);
- if (IS_ERR(epc_page))
- return ERR_CAST(epc_page);
- }
+ epc_page = sgx_encl_load_secs(encl);
+ if (IS_ERR(epc_page))
+ return ERR_CAST(epc_page);

epc_page = sgx_encl_eldu(entry, encl->secs.epc_page);
if (IS_ERR(epc_page))
@@ -339,6 +352,13 @@ static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma,

mutex_lock(&encl->lock);

+ epc_page = sgx_encl_load_secs(encl);
+ if (IS_ERR(epc_page)) {
+ if (PTR_ERR(epc_page) == -EBUSY)
+ vmret = VM_FAULT_NOPAGE;
+ goto err_out_unlock;
+ }
+
epc_page = sgx_alloc_epc_page(encl_page, false);
if (IS_ERR(epc_page)) {
if (PTR_ERR(epc_page) == -EBUSY)

base-commit: 6eaae198076080886b9e7d57f4ae06fa782f90ef
--
2.25.1

2023-09-29 00:14:23