An issue was found in the processing of event logs when the output
buffer length was not reset.[1]
This bug was not caught with cxl-test for 2 reasons. First, the test
harness mbox_send command [mock_get_event()] does not set the output
size based on the amount of data returned like the hardware command
does. Second, the simplistic event log testing always returned the same
number of elements per-get command.
Enhance the simulation of the event log mailbox to better match the bug
found with real hardware to cover potential regressions.
NOTE: These changes will cause cxl-events.sh in ndctl to fail without
the fix from Kwangjin. However, no changes to the user space test was
required. Therefore ndctl itself will be compatible with old or new
kernels once both patches land in the new kernel.
[1] Link: https://lore.kernel.org/all/[email protected]/
Cc: Kwangjin Ko <[email protected]>
Signed-off-by: Ira Weiny <[email protected]>
---
tools/testing/cxl/test/mem.c | 19 +++++++++++++++----
1 file changed, 15 insertions(+), 4 deletions(-)
diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c
index 35ee41e435ab..6584443144de 100644
--- a/tools/testing/cxl/test/mem.c
+++ b/tools/testing/cxl/test/mem.c
@@ -127,7 +127,7 @@ static struct {
#define CXL_TEST_EVENT_CNT_MAX 15
/* Set a number of events to return at a time for simulation. */
-#define CXL_TEST_EVENT_CNT 3
+#define CXL_TEST_EVENT_RET_MAX 4
struct mock_event_log {
u16 clear_idx;
@@ -222,6 +222,12 @@ static void mes_add_event(struct mock_event_store *mes,
log->nr_events++;
}
+/*
+ * Vary the number of events returned to simulate events occuring while the
+ * logs are being read.
+ */
+static int ret_limit = 0;
+
static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd)
{
struct cxl_get_event_payload *pl;
@@ -233,14 +239,18 @@ static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd)
if (cmd->size_in != sizeof(log_type))
return -EINVAL;
- if (cmd->size_out < struct_size(pl, records, CXL_TEST_EVENT_CNT))
+ ret_limit = (ret_limit + 1) % CXL_TEST_EVENT_RET_MAX;
+ if (!ret_limit)
+ ret_limit = 1;
+
+ if (cmd->size_out < struct_size(pl, records, ret_limit))
return -EINVAL;
log_type = *((u8 *)cmd->payload_in);
if (log_type >= CXL_EVENT_TYPE_MAX)
return -EINVAL;
- memset(cmd->payload_out, 0, cmd->size_out);
+ memset(cmd->payload_out, 0, struct_size(pl, records, 0));
log = event_find_log(dev, log_type);
if (!log || event_log_empty(log))
@@ -248,7 +258,7 @@ static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd)
pl = cmd->payload_out;
- for (i = 0; i < CXL_TEST_EVENT_CNT && !event_log_empty(log); i++) {
+ for (i = 0; i < ret_limit && !event_log_empty(log); i++) {
memcpy(&pl->records[i], event_get_current(log),
sizeof(pl->records[i]));
pl->records[i].event.generic.hdr.handle =
@@ -256,6 +266,7 @@ static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd)
log->cur_idx++;
}
+ cmd->size_out = struct_size(pl, records, i);
pl->record_count = cpu_to_le16(i);
if (!event_log_empty(log))
pl->flags |= CXL_GET_EVENT_FLAG_MORE_RECORDS;
---
base-commit: 8d025e2092e29bfd13e56c78e22af25fac83c8ec
change-id: 20240401-enhance-event-test-cebeff5189cd
Best regards,
--
Ira Weiny <[email protected]>
On Mon, 01 Apr 2024 22:31:58 -0700
Ira Weiny <[email protected]> wrote:
> An issue was found in the processing of event logs when the output
> buffer length was not reset.[1]
>
> This bug was not caught with cxl-test for 2 reasons. First, the test
> harness mbox_send command [mock_get_event()] does not set the output
> size based on the amount of data returned like the hardware command
> does. Second, the simplistic event log testing always returned the same
> number of elements per-get command.
>
> Enhance the simulation of the event log mailbox to better match the bug
> found with real hardware to cover potential regressions.
>
> NOTE: These changes will cause cxl-events.sh in ndctl to fail without
> the fix from Kwangjin. However, no changes to the user space test was
> required. Therefore ndctl itself will be compatible with old or new
> kernels once both patches land in the new kernel.
Good to state how many events are read out (22 I think) as that allows
reader of this patch to see that it will cycle all the way around, so we
will get the 4 to 1 reduction between two reads ensuring things work
correctly when smaller number of events are returned.
1, 2, 3, 4, 1, 2, 3, 4, 1, 1
I think.
>
> [1] Link: https://lore.kernel.org/all/[email protected]/
>
> Cc: Kwangjin Ko <[email protected]>
> Signed-off-by: Ira Weiny <[email protected]>
Reviewed-by: Jonathan Cameron <[email protected]>
> ---
> tools/testing/cxl/test/mem.c | 19 +++++++++++++++----
> 1 file changed, 15 insertions(+), 4 deletions(-)
>
> diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c
> index 35ee41e435ab..6584443144de 100644
> --- a/tools/testing/cxl/test/mem.c
> +++ b/tools/testing/cxl/test/mem.c
> @@ -127,7 +127,7 @@ static struct {
> #define CXL_TEST_EVENT_CNT_MAX 15
>
> /* Set a number of events to return at a time for simulation. */
> -#define CXL_TEST_EVENT_CNT 3
> +#define CXL_TEST_EVENT_RET_MAX 4
>
> struct mock_event_log {
> u16 clear_idx;
> @@ -222,6 +222,12 @@ static void mes_add_event(struct mock_event_store *mes,
> log->nr_events++;
> }
>
> +/*
> + * Vary the number of events returned to simulate events occuring while the
> + * logs are being read.
> + */
> +static int ret_limit = 0;
> +
> static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd)
> {
> struct cxl_get_event_payload *pl;
> @@ -233,14 +239,18 @@ static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd)
> if (cmd->size_in != sizeof(log_type))
> return -EINVAL;
>
> - if (cmd->size_out < struct_size(pl, records, CXL_TEST_EVENT_CNT))
> + ret_limit = (ret_limit + 1) % CXL_TEST_EVENT_RET_MAX;
> + if (!ret_limit)
> + ret_limit = 1;
> +
> + if (cmd->size_out < struct_size(pl, records, ret_limit))
> return -EINVAL;
>
> log_type = *((u8 *)cmd->payload_in);
> if (log_type >= CXL_EVENT_TYPE_MAX)
> return -EINVAL;
>
> - memset(cmd->payload_out, 0, cmd->size_out);
> + memset(cmd->payload_out, 0, struct_size(pl, records, 0));
>
> log = event_find_log(dev, log_type);
> if (!log || event_log_empty(log))
> @@ -248,7 +258,7 @@ static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd)
>
> pl = cmd->payload_out;
>
> - for (i = 0; i < CXL_TEST_EVENT_CNT && !event_log_empty(log); i++) {
> + for (i = 0; i < ret_limit && !event_log_empty(log); i++) {
> memcpy(&pl->records[i], event_get_current(log),
> sizeof(pl->records[i]));
> pl->records[i].event.generic.hdr.handle =
> @@ -256,6 +266,7 @@ static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd)
> log->cur_idx++;
> }
>
> + cmd->size_out = struct_size(pl, records, i);
> pl->record_count = cpu_to_le16(i);
> if (!event_log_empty(log))
> pl->flags |= CXL_GET_EVENT_FLAG_MORE_RECORDS;
>
> ---
> base-commit: 8d025e2092e29bfd13e56c78e22af25fac83c8ec
> change-id: 20240401-enhance-event-test-cebeff5189cd
>
> Best regards,
Jonathan Cameron wrote:
> On Mon, 01 Apr 2024 22:31:58 -0700
> Ira Weiny <[email protected]> wrote:
>
> > An issue was found in the processing of event logs when the output
> > buffer length was not reset.[1]
> >
> > This bug was not caught with cxl-test for 2 reasons. First, the test
> > harness mbox_send command [mock_get_event()] does not set the output
> > size based on the amount of data returned like the hardware command
> > does. Second, the simplistic event log testing always returned the same
> > number of elements per-get command.
> >
> > Enhance the simulation of the event log mailbox to better match the bug
> > found with real hardware to cover potential regressions.
> >
> > NOTE: These changes will cause cxl-events.sh in ndctl to fail without
> > the fix from Kwangjin. However, no changes to the user space test was
> > required. Therefore ndctl itself will be compatible with old or new
> > kernels once both patches land in the new kernel.
>
> Good to state how many events are read out (22 I think) as that allows
> reader of this patch to see that it will cycle all the way around, so we
> will get the 4 to 1 reduction between two reads ensuring things work
> correctly when smaller number of events are returned.
It is actually:
num_overflow_expected=1
num_fatal_expected=2
num_failure_expected=16
num_info_expected=3
..because each log is read separately. But the 16 failures is what tests
this.
>
> 1, 2, 3, 4, 1, 2, 3, 4, 1, 1
> I think.
1, 2, 3
> >
> > [1] Link: https://lore.kernel.org/all/[email protected]/
> >
> > Cc: Kwangjin Ko <[email protected]>
> > Signed-off-by: Ira Weiny <[email protected]>
>
> Reviewed-by: Jonathan Cameron <[email protected]>
Thanks, Dave do you want me to reroll with an updated commit message?
Ira
>
>
> > ---
> > tools/testing/cxl/test/mem.c | 19 +++++++++++++++----
> > 1 file changed, 15 insertions(+), 4 deletions(-)
> >
> > diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c
> > index 35ee41e435ab..6584443144de 100644
> > --- a/tools/testing/cxl/test/mem.c
> > +++ b/tools/testing/cxl/test/mem.c
> > @@ -127,7 +127,7 @@ static struct {
> > #define CXL_TEST_EVENT_CNT_MAX 15
> >
> > /* Set a number of events to return at a time for simulation. */
> > -#define CXL_TEST_EVENT_CNT 3
> > +#define CXL_TEST_EVENT_RET_MAX 4
> >
> > struct mock_event_log {
> > u16 clear_idx;
> > @@ -222,6 +222,12 @@ static void mes_add_event(struct mock_event_store *mes,
> > log->nr_events++;
> > }
> >
> > +/*
> > + * Vary the number of events returned to simulate events occuring while the
> > + * logs are being read.
> > + */
> > +static int ret_limit = 0;
> > +
> > static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd)
> > {
> > struct cxl_get_event_payload *pl;
> > @@ -233,14 +239,18 @@ static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd)
> > if (cmd->size_in != sizeof(log_type))
> > return -EINVAL;
> >
> > - if (cmd->size_out < struct_size(pl, records, CXL_TEST_EVENT_CNT))
> > + ret_limit = (ret_limit + 1) % CXL_TEST_EVENT_RET_MAX;
> > + if (!ret_limit)
> > + ret_limit = 1;
> > +
> > + if (cmd->size_out < struct_size(pl, records, ret_limit))
> > return -EINVAL;
> >
> > log_type = *((u8 *)cmd->payload_in);
> > if (log_type >= CXL_EVENT_TYPE_MAX)
> > return -EINVAL;
> >
> > - memset(cmd->payload_out, 0, cmd->size_out);
> > + memset(cmd->payload_out, 0, struct_size(pl, records, 0));
> >
> > log = event_find_log(dev, log_type);
> > if (!log || event_log_empty(log))
> > @@ -248,7 +258,7 @@ static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd)
> >
> > pl = cmd->payload_out;
> >
> > - for (i = 0; i < CXL_TEST_EVENT_CNT && !event_log_empty(log); i++) {
> > + for (i = 0; i < ret_limit && !event_log_empty(log); i++) {
> > memcpy(&pl->records[i], event_get_current(log),
> > sizeof(pl->records[i]));
> > pl->records[i].event.generic.hdr.handle =
> > @@ -256,6 +266,7 @@ static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd)
> > log->cur_idx++;
> > }
> >
> > + cmd->size_out = struct_size(pl, records, i);
> > pl->record_count = cpu_to_le16(i);
> > if (!event_log_empty(log))
> > pl->flags |= CXL_GET_EVENT_FLAG_MORE_RECORDS;
> >
> > ---
> > base-commit: 8d025e2092e29bfd13e56c78e22af25fac83c8ec
> > change-id: 20240401-enhance-event-test-cebeff5189cd
> >
> > Best regards,
>