2021-07-05 14:38:41

by Jarkko Sakkinen

[permalink] [raw]
Subject: [PATCH 4/4] selftests/sgx: Trigger the reclaimer and #PF handler

Create a heap for the test enclave, which has the same size as all
available Enclave Page Cache (EPC) pages in the system. This will guarantee
that all test_encl.elf pages *and* SGX Enclave Control Structure (SECS)
have been swapped out by the page reclaimer during the load time. Actually,
this adds a bit more stress than that since part of the EPC gets reserved
for the Version Array (VA) pages.

For each test, the page fault handler gets triggered in two occasions:

- When SGX_IOC_ENCLAVE_INIT is performed, SECS gets swapped in by the
page fault handler.
- During the execution, each page that is referenced gets swapped in
by the page fault handler.

Signed-off-by: Jarkko Sakkinen <[email protected]>
---
tools/testing/selftests/sgx/load.c | 33 +++++++++++++++----
tools/testing/selftests/sgx/main.c | 42 ++++++++++++++++++++++++-
tools/testing/selftests/sgx/main.h | 3 +-
tools/testing/selftests/sgx/sigstruct.c | 6 ++--
4 files changed, 74 insertions(+), 10 deletions(-)

diff --git a/tools/testing/selftests/sgx/load.c b/tools/testing/selftests/sgx/load.c
index 9946fab2a3d6..a312a132ac33 100644
--- a/tools/testing/selftests/sgx/load.c
+++ b/tools/testing/selftests/sgx/load.c
@@ -21,6 +21,8 @@

void encl_delete(struct encl *encl)
{
+ struct encl_segment *heap_seg = &encl->segment_tbl[encl->nr_segments - 1];
+
if (encl->encl_base)
munmap((void *)encl->encl_base, encl->encl_size);

@@ -30,6 +32,8 @@ void encl_delete(struct encl *encl)
if (encl->fd)
close(encl->fd);

+ munmap(heap_seg->src, heap_seg->size);
+
if (encl->segment_tbl)
free(encl->segment_tbl);

@@ -111,7 +115,10 @@ static bool encl_ioc_add_pages(struct encl *encl, struct encl_segment *seg)
ioc.offset = seg->offset;
ioc.length = seg->size;
ioc.secinfo = (unsigned long)&secinfo;
- ioc.flags = SGX_PAGE_MEASURE;
+ if (seg->measure)
+ ioc.flags = SGX_PAGE_MEASURE;
+ else
+ ioc.flags = 0;

rc = ioctl(encl->fd, SGX_IOC_ENCLAVE_ADD_PAGES, &ioc);
if (rc < 0) {
@@ -124,9 +131,10 @@ static bool encl_ioc_add_pages(struct encl *encl, struct encl_segment *seg)



-bool encl_load(const char *path, struct encl *encl)
+bool encl_load(const char *path, struct encl *encl, unsigned long heap_size)
{
const char device_path[] = "/dev/sgx_enclave";
+ struct encl_segment *seg;
Elf64_Phdr *phdr_tbl;
off_t src_offset;
Elf64_Ehdr *ehdr;
@@ -188,6 +196,8 @@ bool encl_load(const char *path, struct encl *encl)
ehdr = encl->bin;
phdr_tbl = encl->bin + ehdr->e_phoff;

+ encl->nr_segments = 1; /* one for the heap */
+
for (i = 0; i < ehdr->e_phnum; i++) {
Elf64_Phdr *phdr = &phdr_tbl[i];

@@ -203,7 +213,6 @@ bool encl_load(const char *path, struct encl *encl)
for (i = 0, j = 0; i < ehdr->e_phnum; i++) {
Elf64_Phdr *phdr = &phdr_tbl[i];
unsigned int flags = phdr->p_flags;
- struct encl_segment *seg;

if (phdr->p_type != PT_LOAD)
continue;
@@ -240,14 +249,26 @@ bool encl_load(const char *path, struct encl *encl)
seg->offset = (phdr->p_offset & PAGE_MASK) - src_offset;
seg->size = (phdr->p_filesz + PAGE_SIZE - 1) & PAGE_MASK;
seg->src = encl->src + seg->offset;
+ seg->measure = true;

j++;
}

- assert(j == encl->nr_segments);
+ assert(j == encl->nr_segments - 1);
+
+ seg = &encl->segment_tbl[j];
+ seg->offset = encl->segment_tbl[j - 1].offset + encl->segment_tbl[j - 1].size;
+ seg->size = heap_size;
+ seg->src = mmap(NULL, heap_size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ seg->prot = PROT_READ | PROT_WRITE;
+ seg->flags = (SGX_PAGE_TYPE_REG << 8) | seg->prot;
+ seg->measure = false;
+
+ if (seg->src == MAP_FAILED)
+ goto err;

- encl->src_size = encl->segment_tbl[j - 1].offset +
- encl->segment_tbl[j - 1].size;
+ encl->src_size = encl->segment_tbl[j].offset + encl->segment_tbl[j].size;

for (encl->encl_size = 4096; encl->encl_size < encl->src_size; )
encl->encl_size <<= 1;
diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c
index e252015e0c15..772ba1d72619 100644
--- a/tools/testing/selftests/sgx/main.c
+++ b/tools/testing/selftests/sgx/main.c
@@ -112,17 +112,57 @@ FIXTURE(enclave) {
struct sgx_enclave_run run;
};

+#define SGX_NR_ALL_PAGES_PATH "/sys/kernel/debug/x86/sgx_nr_all_pages"
+
+static int sysfs_get_ulong(const char *path, unsigned long *value)
+{
+ struct stat sbuf;
+ ssize_t ret = 0;
+ char buf[128];
+ int fd;
+
+ ret = stat(path, &sbuf);
+ if (ret)
+ return ret;
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0)
+ return fd;
+
+ ret = read(fd, buf, sizeof(buf));
+ if (ret < 0)
+ goto out;
+
+ /* Clear the read bytes count. */
+ ret = 0;
+
+ errno = 0;
+ *value = strtoul(buf, NULL, 0);
+ if (errno)
+ ret = -1;
+
+out:
+ close(fd);
+ return ret;
+}
+
FIXTURE_SETUP(enclave)
{
Elf64_Sym *sgx_enter_enclave_sym = NULL;
+ unsigned long nr_all_pages;
struct vdso_symtab symtab;
struct encl_segment *seg;
char maps_line[256];
FILE *maps_file;
unsigned int i;
void *addr;
+ int ret;
+
+ ret = sysfs_get_ulong(SGX_NR_ALL_PAGES_PATH, &nr_all_pages);
+ if (ret)
+ ksft_exit_skip("Failed to read " SGX_NR_ALL_PAGES_PATH "\n");

- if (!encl_load("test_encl.elf", &self->encl)) {
+ if (!encl_load("test_encl.elf", &self->encl, nr_all_pages * 4096)) {
encl_delete(&self->encl);
ksft_exit_skip("cannot load enclaves\n");
}
diff --git a/tools/testing/selftests/sgx/main.h b/tools/testing/selftests/sgx/main.h
index 452d11dc4889..a286861dc289 100644
--- a/tools/testing/selftests/sgx/main.h
+++ b/tools/testing/selftests/sgx/main.h
@@ -12,6 +12,7 @@ struct encl_segment {
size_t size;
unsigned int prot;
unsigned int flags;
+ bool measure;
};

struct encl {
@@ -32,7 +33,7 @@ extern unsigned char sign_key[];
extern unsigned char sign_key_end[];

void encl_delete(struct encl *ctx);
-bool encl_load(const char *path, struct encl *encl);
+bool encl_load(const char *path, struct encl *encl, unsigned long heap_size);
bool encl_measure(struct encl *encl);
bool encl_build(struct encl *encl);

diff --git a/tools/testing/selftests/sgx/sigstruct.c b/tools/testing/selftests/sgx/sigstruct.c
index 202a96fd81bf..50c5ab1aa6fa 100644
--- a/tools/testing/selftests/sgx/sigstruct.c
+++ b/tools/testing/selftests/sgx/sigstruct.c
@@ -296,8 +296,10 @@ static bool mrenclave_segment(EVP_MD_CTX *ctx, struct encl *encl,
if (!mrenclave_eadd(ctx, seg->offset + offset, seg->flags))
return false;

- if (!mrenclave_eextend(ctx, seg->offset + offset, seg->src + offset))
- return false;
+ if (seg->measure) {
+ if (!mrenclave_eextend(ctx, seg->offset + offset, seg->src + offset))
+ return false;
+ }
}

return true;
--
2.32.0


2021-07-06 18:38:20

by Reinette Chatre

[permalink] [raw]
Subject: Re: [PATCH 4/4] selftests/sgx: Trigger the reclaimer and #PF handler

Hi Jarkko,

On 7/5/2021 7:36 AM, Jarkko Sakkinen wrote:
> Create a heap for the test enclave, which has the same size as all
> available Enclave Page Cache (EPC) pages in the system. This will guarantee
> that all test_encl.elf pages *and* SGX Enclave Control Structure (SECS)
> have been swapped out by the page reclaimer during the load time. Actually,
> this adds a bit more stress than that since part of the EPC gets reserved
> for the Version Array (VA) pages.
>
> For each test, the page fault handler gets triggered in two occasions:
>
> - When SGX_IOC_ENCLAVE_INIT is performed, SECS gets swapped in by the
> page fault handler.
> - During the execution, each page that is referenced gets swapped in
> by the page fault handler.
>

If I understand this correctly, all EPC pages are now being consumed
during fixture setup and thus every SGX test, no matter how big or
small, now becomes a stress test of the reclaimer instead of there being
a unique reclaimer test. Since an enclave is set up and torn down for
every test this seems like a significant addition. It also seems like
this would impact future tests of dynamic page addition where not all
scenarios could be tested with all EPC pages already consumed.

Reinette

2021-07-06 23:54:38

by Jarkko Sakkinen

[permalink] [raw]
Subject: Re: [PATCH 4/4] selftests/sgx: Trigger the reclaimer and #PF handler

On Tue, Jul 06, 2021 at 11:34:54AM -0700, Reinette Chatre wrote:
> Hi Jarkko,
>
> On 7/5/2021 7:36 AM, Jarkko Sakkinen wrote:
> > Create a heap for the test enclave, which has the same size as all
> > available Enclave Page Cache (EPC) pages in the system. This will guarantee
> > that all test_encl.elf pages *and* SGX Enclave Control Structure (SECS)
> > have been swapped out by the page reclaimer during the load time. Actually,
> > this adds a bit more stress than that since part of the EPC gets reserved
> > for the Version Array (VA) pages.
> >
> > For each test, the page fault handler gets triggered in two occasions:
> >
> > - When SGX_IOC_ENCLAVE_INIT is performed, SECS gets swapped in by the
> > page fault handler.
> > - During the execution, each page that is referenced gets swapped in
> > by the page fault handler.
> >
>
> If I understand this correctly, all EPC pages are now being consumed during
> fixture setup and thus every SGX test, no matter how big or small, now
> becomes a stress test of the reclaimer instead of there being a unique
> reclaimer test. Since an enclave is set up and torn down for every test this
> seems like a significant addition. It also seems like this would impact
> future tests of dynamic page addition where not all scenarios could be
> tested with all EPC pages already consumed.
>
> Reinette

Re-initializing the test enclave is mandatory thing to do for all tests
because it has an internals state.

/Jarkko

2021-07-07 00:15:34

by Reinette Chatre

[permalink] [raw]
Subject: Re: [PATCH 4/4] selftests/sgx: Trigger the reclaimer and #PF handler

Hi Jarkko,

On 7/6/2021 4:50 PM, Jarkko Sakkinen wrote:
> On Tue, Jul 06, 2021 at 11:34:54AM -0700, Reinette Chatre wrote:
>> Hi Jarkko,
>>
>> On 7/5/2021 7:36 AM, Jarkko Sakkinen wrote:
>>> Create a heap for the test enclave, which has the same size as all
>>> available Enclave Page Cache (EPC) pages in the system. This will guarantee
>>> that all test_encl.elf pages *and* SGX Enclave Control Structure (SECS)
>>> have been swapped out by the page reclaimer during the load time. Actually,
>>> this adds a bit more stress than that since part of the EPC gets reserved
>>> for the Version Array (VA) pages.
>>>
>>> For each test, the page fault handler gets triggered in two occasions:
>>>
>>> - When SGX_IOC_ENCLAVE_INIT is performed, SECS gets swapped in by the
>>> page fault handler.
>>> - During the execution, each page that is referenced gets swapped in
>>> by the page fault handler.
>>>
>>
>> If I understand this correctly, all EPC pages are now being consumed during
>> fixture setup and thus every SGX test, no matter how big or small, now
>> becomes a stress test of the reclaimer instead of there being a unique
>> reclaimer test. Since an enclave is set up and torn down for every test this
>> seems like a significant addition. It also seems like this would impact
>> future tests of dynamic page addition where not all scenarios could be
>> tested with all EPC pages already consumed.
>>
>> Reinette
>
> Re-initializing the test enclave is mandatory thing to do for all tests
> because it has an internals state.
>

Right, but not all tests require the same enclave. In kselftest
terminology I think you are attempting to force all tests to depend on
the same test fixture. Is it not possible to have a separate "reclaimer"
test fixture that would build an enclave with a large heap and then have
reclaimer tests that exercise it by being tests that are specific to
this "reclaimer fixture"?

Reinette

2021-07-07 09:18:31

by Jarkko Sakkinen

[permalink] [raw]
Subject: Re: [PATCH 4/4] selftests/sgx: Trigger the reclaimer and #PF handler

On Tue, Jul 06, 2021 at 05:10:38PM -0700, Reinette Chatre wrote:
> Hi Jarkko,
>
> On 7/6/2021 4:50 PM, Jarkko Sakkinen wrote:
> > On Tue, Jul 06, 2021 at 11:34:54AM -0700, Reinette Chatre wrote:
> > > Hi Jarkko,
> > >
> > > On 7/5/2021 7:36 AM, Jarkko Sakkinen wrote:
> > > > Create a heap for the test enclave, which has the same size as all
> > > > available Enclave Page Cache (EPC) pages in the system. This will guarantee
> > > > that all test_encl.elf pages *and* SGX Enclave Control Structure (SECS)
> > > > have been swapped out by the page reclaimer during the load time. Actually,
> > > > this adds a bit more stress than that since part of the EPC gets reserved
> > > > for the Version Array (VA) pages.
> > > >
> > > > For each test, the page fault handler gets triggered in two occasions:
> > > >
> > > > - When SGX_IOC_ENCLAVE_INIT is performed, SECS gets swapped in by the
> > > > page fault handler.
> > > > - During the execution, each page that is referenced gets swapped in
> > > > by the page fault handler.
> > > >
> > >
> > > If I understand this correctly, all EPC pages are now being consumed during
> > > fixture setup and thus every SGX test, no matter how big or small, now
> > > becomes a stress test of the reclaimer instead of there being a unique
> > > reclaimer test. Since an enclave is set up and torn down for every test this
> > > seems like a significant addition. It also seems like this would impact
> > > future tests of dynamic page addition where not all scenarios could be
> > > tested with all EPC pages already consumed.
> > >
> > > Reinette
> >
> > Re-initializing the test enclave is mandatory thing to do for all tests
> > because it has an internals state.
> >
>
> Right, but not all tests require the same enclave. In kselftest terminology
> I think you are attempting to force all tests to depend on the same test
> fixture. Is it not possible to have a separate "reclaimer" test fixture that
> would build an enclave with a large heap and then have reclaimer tests that
> exercise it by being tests that are specific to this "reclaimer fixture"?
>
> Reinette

Why add that complexity?

/Jarkko

2021-07-07 15:06:16

by Reinette Chatre

[permalink] [raw]
Subject: Re: [PATCH 4/4] selftests/sgx: Trigger the reclaimer and #PF handler

Hi Jarkko,

On 7/7/2021 2:17 AM, Jarkko Sakkinen wrote:
> On Tue, Jul 06, 2021 at 05:10:38PM -0700, Reinette Chatre wrote:
>> Hi Jarkko,
>>
>> On 7/6/2021 4:50 PM, Jarkko Sakkinen wrote:
>>> On Tue, Jul 06, 2021 at 11:34:54AM -0700, Reinette Chatre wrote:
>>>> Hi Jarkko,
>>>>
>>>> On 7/5/2021 7:36 AM, Jarkko Sakkinen wrote:
>>>>> Create a heap for the test enclave, which has the same size as all
>>>>> available Enclave Page Cache (EPC) pages in the system. This will guarantee
>>>>> that all test_encl.elf pages *and* SGX Enclave Control Structure (SECS)
>>>>> have been swapped out by the page reclaimer during the load time. Actually,
>>>>> this adds a bit more stress than that since part of the EPC gets reserved
>>>>> for the Version Array (VA) pages.
>>>>>
>>>>> For each test, the page fault handler gets triggered in two occasions:
>>>>>
>>>>> - When SGX_IOC_ENCLAVE_INIT is performed, SECS gets swapped in by the
>>>>> page fault handler.
>>>>> - During the execution, each page that is referenced gets swapped in
>>>>> by the page fault handler.
>>>>>
>>>>
>>>> If I understand this correctly, all EPC pages are now being consumed during
>>>> fixture setup and thus every SGX test, no matter how big or small, now
>>>> becomes a stress test of the reclaimer instead of there being a unique
>>>> reclaimer test. Since an enclave is set up and torn down for every test this
>>>> seems like a significant addition. It also seems like this would impact
>>>> future tests of dynamic page addition where not all scenarios could be
>>>> tested with all EPC pages already consumed.
>>>>
>>>> Reinette
>>>
>>> Re-initializing the test enclave is mandatory thing to do for all tests
>>> because it has an internals state.
>>>
>>
>> Right, but not all tests require the same enclave. In kselftest terminology
>> I think you are attempting to force all tests to depend on the same test
>> fixture. Is it not possible to have a separate "reclaimer" test fixture that
>> would build an enclave with a large heap and then have reclaimer tests that
>> exercise it by being tests that are specific to this "reclaimer fixture"?
>>
>> Reinette
>
> Why add that complexity?
>

With this change every test is turned into a pseudo reclaimer test
without there being any explicit testing (with pass/fail criteria) of
reclaimer behavior. This is an expensive addition and reduces the
scenarios that the tests can exercise.

Reinette

2021-07-07 21:52:20

by Reinette Chatre

[permalink] [raw]
Subject: Re: [PATCH 4/4] selftests/sgx: Trigger the reclaimer and #PF handler

Hi Jarkko,

On 7/7/2021 1:50 PM, Jarkko Sakkinen wrote:
> On Wed, Jul 07, 2021 at 08:02:42AM -0700, Reinette Chatre wrote:
>> Hi Jarkko,
>>
>> On 7/7/2021 2:17 AM, Jarkko Sakkinen wrote:
>>> On Tue, Jul 06, 2021 at 05:10:38PM -0700, Reinette Chatre wrote:
>>>> Hi Jarkko,
>>>>
>>>> On 7/6/2021 4:50 PM, Jarkko Sakkinen wrote:
>>>>> On Tue, Jul 06, 2021 at 11:34:54AM -0700, Reinette Chatre wrote:
>>>>>> Hi Jarkko,
>>>>>>
>>>>>> On 7/5/2021 7:36 AM, Jarkko Sakkinen wrote:
>>>>>>> Create a heap for the test enclave, which has the same size as all
>>>>>>> available Enclave Page Cache (EPC) pages in the system. This will guarantee
>>>>>>> that all test_encl.elf pages *and* SGX Enclave Control Structure (SECS)
>>>>>>> have been swapped out by the page reclaimer during the load time. Actually,
>>>>>>> this adds a bit more stress than that since part of the EPC gets reserved
>>>>>>> for the Version Array (VA) pages.
>>>>>>>
>>>>>>> For each test, the page fault handler gets triggered in two occasions:
>>>>>>>
>>>>>>> - When SGX_IOC_ENCLAVE_INIT is performed, SECS gets swapped in by the
>>>>>>> page fault handler.
>>>>>>> - During the execution, each page that is referenced gets swapped in
>>>>>>> by the page fault handler.
>>>>>>>
>>>>>>
>>>>>> If I understand this correctly, all EPC pages are now being consumed during
>>>>>> fixture setup and thus every SGX test, no matter how big or small, now
>>>>>> becomes a stress test of the reclaimer instead of there being a unique
>>>>>> reclaimer test. Since an enclave is set up and torn down for every test this
>>>>>> seems like a significant addition. It also seems like this would impact
>>>>>> future tests of dynamic page addition where not all scenarios could be
>>>>>> tested with all EPC pages already consumed.
>>>>>>
>>>>>> Reinette
>>>>>
>>>>> Re-initializing the test enclave is mandatory thing to do for all tests
>>>>> because it has an internals state.
>>>>>
>>>>
>>>> Right, but not all tests require the same enclave. In kselftest terminology
>>>> I think you are attempting to force all tests to depend on the same test
>>>> fixture. Is it not possible to have a separate "reclaimer" test fixture that
>>>> would build an enclave with a large heap and then have reclaimer tests that
>>>> exercise it by being tests that are specific to this "reclaimer fixture"?
>>>>
>>>> Reinette
>>>
>>> Why add that complexity?
>>>
>>
>> With this change every test is turned into a pseudo reclaimer test without
>> there being any explicit testing (with pass/fail criteria) of reclaimer
>> behavior. This is an expensive addition and reduces the scenarios that the
>> tests can exercise.
>>
>> Reinette
>
> There is consistent known behaviour how reclaimer and also the page fault
> are exercised for each test. I think that is what matters most right now
> that the basic behaviour of both the page reclaimer and page fault handler
> gets exercised.

I believe the basic behavior of page fault handler is currently
exercised in each test, this is required.

>
> I don't understand the real-world gain of doing something factors more
> complex than necessary at a particular point of time, when you don't
> really need to hang yourself into it forever.

Your argument about "hang yourself into it forever" can go both ways -
why should all tests now unnecessarily consume the entire EPC forever?

If I understand correctly adding a separate reclaimer test is not
complex but would require refactoring code.

> This patch does increase the coverage in a deterministic manner to the code
> paths that were not previously exercised, i.e. we know the code paths, and
> could even calculate the exact number of times that they are triggered. And
> without doing anything obscure. That's what matters to me.

On the contrary this is indeed obfuscating the SGX tests: if an issue
shows up in the reclaimer then all tests would fail. If there is a
unique reclaimer test then that would help point to where the issue may be.

Reinette

2021-07-07 21:54:23

by Dave Hansen

[permalink] [raw]
Subject: Re: [PATCH 4/4] selftests/sgx: Trigger the reclaimer and #PF handler

On 7/7/21 1:50 PM, Jarkko Sakkinen wrote:
> There is consistent known behaviour how reclaimer and also the page fault
> are exercised for each test. I think that is what matters most right now
> that the basic behaviour of both the page reclaimer and page fault handler
> gets exercised.

There's also a lot of value to ensuring that tests can run _quickly_.
If you have a test that fails one out of a million executions, it's a
lot easier find and debug if it takes 1 ms versus 10 seconds.

In other words, I think I'd prefer if we run two enclaves in each
execution of the selftest. One can be as small as possible. The other
can be the reclaim-triggering one.

That's good both for test coverage, and it makes it a *bit* more
straightforward to hack out the reclaim test if you need things to run
faster.

The pkeys selftest isn't a bad example here either. It has a couple of
different "malloc()" options: THP, hugetlbfs, small-page mmap(), and a
bunch of tests it runs on each type. As we add more SGX tests, we might
end up with "do reclaim" just being an option we pass.

2021-07-07 22:13:46

by Jarkko Sakkinen

[permalink] [raw]
Subject: Re: [PATCH 4/4] selftests/sgx: Trigger the reclaimer and #PF handler

On Wed, Jul 07, 2021 at 08:02:42AM -0700, Reinette Chatre wrote:
> Hi Jarkko,
>
> On 7/7/2021 2:17 AM, Jarkko Sakkinen wrote:
> > On Tue, Jul 06, 2021 at 05:10:38PM -0700, Reinette Chatre wrote:
> > > Hi Jarkko,
> > >
> > > On 7/6/2021 4:50 PM, Jarkko Sakkinen wrote:
> > > > On Tue, Jul 06, 2021 at 11:34:54AM -0700, Reinette Chatre wrote:
> > > > > Hi Jarkko,
> > > > >
> > > > > On 7/5/2021 7:36 AM, Jarkko Sakkinen wrote:
> > > > > > Create a heap for the test enclave, which has the same size as all
> > > > > > available Enclave Page Cache (EPC) pages in the system. This will guarantee
> > > > > > that all test_encl.elf pages *and* SGX Enclave Control Structure (SECS)
> > > > > > have been swapped out by the page reclaimer during the load time. Actually,
> > > > > > this adds a bit more stress than that since part of the EPC gets reserved
> > > > > > for the Version Array (VA) pages.
> > > > > >
> > > > > > For each test, the page fault handler gets triggered in two occasions:
> > > > > >
> > > > > > - When SGX_IOC_ENCLAVE_INIT is performed, SECS gets swapped in by the
> > > > > > page fault handler.
> > > > > > - During the execution, each page that is referenced gets swapped in
> > > > > > by the page fault handler.
> > > > > >
> > > > >
> > > > > If I understand this correctly, all EPC pages are now being consumed during
> > > > > fixture setup and thus every SGX test, no matter how big or small, now
> > > > > becomes a stress test of the reclaimer instead of there being a unique
> > > > > reclaimer test. Since an enclave is set up and torn down for every test this
> > > > > seems like a significant addition. It also seems like this would impact
> > > > > future tests of dynamic page addition where not all scenarios could be
> > > > > tested with all EPC pages already consumed.
> > > > >
> > > > > Reinette
> > > >
> > > > Re-initializing the test enclave is mandatory thing to do for all tests
> > > > because it has an internals state.
> > > >
> > >
> > > Right, but not all tests require the same enclave. In kselftest terminology
> > > I think you are attempting to force all tests to depend on the same test
> > > fixture. Is it not possible to have a separate "reclaimer" test fixture that
> > > would build an enclave with a large heap and then have reclaimer tests that
> > > exercise it by being tests that are specific to this "reclaimer fixture"?
> > >
> > > Reinette
> >
> > Why add that complexity?
> >
>
> With this change every test is turned into a pseudo reclaimer test without
> there being any explicit testing (with pass/fail criteria) of reclaimer
> behavior. This is an expensive addition and reduces the scenarios that the
> tests can exercise.
>
> Reinette

There is consistent known behaviour how reclaimer and also the page fault
are exercised for each test. I think that is what matters most right now
that the basic behaviour of both the page reclaimer and page fault handler
gets exercised.

I don't understand the real-world gain of doing something factors more
complex than necessary at a particular point of time, when you don't
really need to hang yourself into it forever.

This patch does increase the coverage in a deterministic manner to the code
paths that were not previously exercised, i.e. we know the code paths, and
could even calculate the exact number of times that they are triggered. And
without doing anything obscure. That's what matters to me.

/Jarkko

/Jarkko

2021-07-09 16:24:15

by Jarkko Sakkinen

[permalink] [raw]
Subject: Re: [PATCH 4/4] selftests/sgx: Trigger the reclaimer and #PF handler

On Wed, Jul 07, 2021 at 02:20:04PM -0700, Reinette Chatre wrote:
> Hi Jarkko,
>
> On 7/7/2021 1:50 PM, Jarkko Sakkinen wrote:
> > On Wed, Jul 07, 2021 at 08:02:42AM -0700, Reinette Chatre wrote:
> > > Hi Jarkko,
> > >
> > > On 7/7/2021 2:17 AM, Jarkko Sakkinen wrote:
> > > > On Tue, Jul 06, 2021 at 05:10:38PM -0700, Reinette Chatre wrote:
> > > > > Hi Jarkko,
> > > > >
> > > > > On 7/6/2021 4:50 PM, Jarkko Sakkinen wrote:
> > > > > > On Tue, Jul 06, 2021 at 11:34:54AM -0700, Reinette Chatre wrote:
> > > > > > > Hi Jarkko,
> > > > > > >
> > > > > > > On 7/5/2021 7:36 AM, Jarkko Sakkinen wrote:
> > > > > > > > Create a heap for the test enclave, which has the same size as all
> > > > > > > > available Enclave Page Cache (EPC) pages in the system. This will guarantee
> > > > > > > > that all test_encl.elf pages *and* SGX Enclave Control Structure (SECS)
> > > > > > > > have been swapped out by the page reclaimer during the load time. Actually,
> > > > > > > > this adds a bit more stress than that since part of the EPC gets reserved
> > > > > > > > for the Version Array (VA) pages.
> > > > > > > >
> > > > > > > > For each test, the page fault handler gets triggered in two occasions:
> > > > > > > >
> > > > > > > > - When SGX_IOC_ENCLAVE_INIT is performed, SECS gets swapped in by the
> > > > > > > > page fault handler.
> > > > > > > > - During the execution, each page that is referenced gets swapped in
> > > > > > > > by the page fault handler.
> > > > > > > >
> > > > > > >
> > > > > > > If I understand this correctly, all EPC pages are now being consumed during
> > > > > > > fixture setup and thus every SGX test, no matter how big or small, now
> > > > > > > becomes a stress test of the reclaimer instead of there being a unique
> > > > > > > reclaimer test. Since an enclave is set up and torn down for every test this
> > > > > > > seems like a significant addition. It also seems like this would impact
> > > > > > > future tests of dynamic page addition where not all scenarios could be
> > > > > > > tested with all EPC pages already consumed.
> > > > > > >
> > > > > > > Reinette
> > > > > >
> > > > > > Re-initializing the test enclave is mandatory thing to do for all tests
> > > > > > because it has an internals state.
> > > > > >
> > > > >
> > > > > Right, but not all tests require the same enclave. In kselftest terminology
> > > > > I think you are attempting to force all tests to depend on the same test
> > > > > fixture. Is it not possible to have a separate "reclaimer" test fixture that
> > > > > would build an enclave with a large heap and then have reclaimer tests that
> > > > > exercise it by being tests that are specific to this "reclaimer fixture"?
> > > > >
> > > > > Reinette
> > > >
> > > > Why add that complexity?
> > > >
> > >
> > > With this change every test is turned into a pseudo reclaimer test without
> > > there being any explicit testing (with pass/fail criteria) of reclaimer
> > > behavior. This is an expensive addition and reduces the scenarios that the
> > > tests can exercise.
> > >
> > > Reinette
> >
> > There is consistent known behaviour how reclaimer and also the page fault
> > are exercised for each test. I think that is what matters most right now
> > that the basic behaviour of both the page reclaimer and page fault handler
> > gets exercised.
>
> I believe the basic behavior of page fault handler is currently exercised in
> each test, this is required.

This not true. The current test does not exercise ELDU code path.

>
> >
> > I don't understand the real-world gain of doing something factors more
> > complex than necessary at a particular point of time, when you don't
> > really need to hang yourself into it forever.
>
> Your argument about "hang yourself into it forever" can go both ways - why
> should all tests now unnecessarily consume the entire EPC forever?
>
> If I understand correctly adding a separate reclaimer test is not complex
> but would require refactoring code.

What does it matter anyway if code nees to be refactored?

> > This patch does increase the coverage in a deterministic manner to the code
> > paths that were not previously exercised, i.e. we know the code paths, and
> > could even calculate the exact number of times that they are triggered. And
> > without doing anything obscure. That's what matters to me.
>
> On the contrary this is indeed obfuscating the SGX tests: if an issue shows
> up in the reclaimer then all tests would fail. If there is a unique
> reclaimer test then that would help point to where the issue may be.

I tend to disagree this. I'll add a separate reclaimer test if I need
to test something that this does not scale. It's an iterative process.

/Jarkko

2021-07-09 16:27:19

by Jarkko Sakkinen

[permalink] [raw]
Subject: Re: [PATCH 4/4] selftests/sgx: Trigger the reclaimer and #PF handler

On Wed, Jul 07, 2021 at 02:20:07PM -0700, Dave Hansen wrote:
> On 7/7/21 1:50 PM, Jarkko Sakkinen wrote:
> > There is consistent known behaviour how reclaimer and also the page fault
> > are exercised for each test. I think that is what matters most right now
> > that the basic behaviour of both the page reclaimer and page fault handler
> > gets exercised.
>
> There's also a lot of value to ensuring that tests can run _quickly_.
> If you have a test that fails one out of a million executions, it's a
> lot easier find and debug if it takes 1 ms versus 10 seconds.
>
> In other words, I think I'd prefer if we run two enclaves in each
> execution of the selftest. One can be as small as possible. The other
> can be the reclaim-triggering one.
>
> That's good both for test coverage, and it makes it a *bit* more
> straightforward to hack out the reclaim test if you need things to run
> faster.
>
> The pkeys selftest isn't a bad example here either. It has a couple of
> different "malloc()" options: THP, hugetlbfs, small-page mmap(), and a
> bunch of tests it runs on each type. As we add more SGX tests, we might
> end up with "do reclaim" just being an option we pass.

Even with large EPC's, the current test runs quite fast, because heap is
left unmeasured. It's the EEXTEND operations that would cause a major
slow-down.

I would go only to something "more complex" when the current test hits
the roof. I don't like to make code more complicated, when that does not
happen.

When there's no compatibility requirements, it's not hard to refactor it
later on.

/Jarkko