2024-01-18 21:11:50

by Stuart Hayes

[permalink] [raw]
Subject: [PATCH v2] nvme_core: scan namespaces asynchronously

Use async function calls to make namespace scanning happen in parallel.

Without the patch, NVME namespaces are scanned serially, so it can take a
long time for all of a controller's namespaces to become available,
especially with a slower (TCP) interface with large number of namespaces.

The time it took for all namespaces to show up after connecting (via TCP)
to a controller with 1002 namespaces was measured:

network latency without patch with patch
0 6s 1s
50ms 210s 10s
100ms 417s 18s

Signed-off-by: Stuart Hayes <[email protected]>

--
V2: remove module param to enable/disable async scanning
add scan time measurements to commit message

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 0af612387083..069350f85b83 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -4,6 +4,7 @@
* Copyright (c) 2011-2014, Intel Corporation.
*/

+#include <linux/async.h>
#include <linux/blkdev.h>
#include <linux/blk-mq.h>
#include <linux/blk-integrity.h>
@@ -3812,12 +3813,38 @@ static void nvme_validate_ns(struct nvme_ns *ns, struct nvme_ns_info *info)
nvme_ns_remove(ns);
}

-static void nvme_scan_ns(struct nvme_ctrl *ctrl, unsigned nsid)
+/*
+ * struct nvme_scan_state - keeps track of controller & NSIDs to scan
+ * @ctrl: Controller on which namespaces are being scanned
+ * @count: Next NSID to scan (for sequential scan), or
+ * Index of next NSID to scan in ns_list (for list scan)
+ * @ns_list: pointer to list of NSIDs to scan (NULL if sequential scan)
+ */
+struct nvme_scan_state {
+ struct nvme_ctrl *ctrl;
+ atomic_t count;
+ __le32 *ns_list;
+};
+
+static void nvme_scan_ns(void *data, async_cookie_t cookie)
{
- struct nvme_ns_info info = { .nsid = nsid };
+ struct nvme_ns_info info = {};
+ struct nvme_scan_state *scan_state;
+ struct nvme_ctrl *ctrl;
+ u32 nsid;
struct nvme_ns *ns;
int ret;

+ scan_state = data;
+ ctrl = scan_state->ctrl;
+ nsid = (u32)atomic_fetch_add(1, &scan_state->count);
+ /*
+ * get NSID from list (if scanning from a list, not sequentially)
+ */
+ if (scan_state->ns_list)
+ nsid = le32_to_cpu(scan_state->ns_list[nsid]);
+
+ info.nsid = nsid;
if (nvme_identify_ns_descs(ctrl, &info))
return;

@@ -3881,11 +3908,15 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl)
__le32 *ns_list;
u32 prev = 0;
int ret = 0, i;
+ ASYNC_DOMAIN(domain);
+ struct nvme_scan_state scan_state;

ns_list = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL);
if (!ns_list)
return -ENOMEM;

+ scan_state.ctrl = ctrl;
+ scan_state.ns_list = ns_list;
for (;;) {
struct nvme_command cmd = {
.identify.opcode = nvme_admin_identify,
@@ -3901,19 +3932,25 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl)
goto free;
}

+ /*
+ * scan list starting at list offset 0
+ */
+ atomic_set(&scan_state.count, 0);
for (i = 0; i < nr_entries; i++) {
u32 nsid = le32_to_cpu(ns_list[i]);

if (!nsid) /* end of the list? */
goto out;
- nvme_scan_ns(ctrl, nsid);
+ async_schedule_domain(nvme_scan_ns, &scan_state, &domain);
while (++prev < nsid)
nvme_ns_remove_by_nsid(ctrl, prev);
}
+ async_synchronize_full_domain(&domain);
}
out:
nvme_remove_invalid_namespaces(ctrl, prev);
free:
+ async_synchronize_full_domain(&domain);
kfree(ns_list);
return ret;
}
@@ -3922,14 +3959,23 @@ static void nvme_scan_ns_sequential(struct nvme_ctrl *ctrl)
{
struct nvme_id_ctrl *id;
u32 nn, i;
+ ASYNC_DOMAIN(domain);
+ struct nvme_scan_state scan_state;

if (nvme_identify_ctrl(ctrl, &id))
return;
nn = le32_to_cpu(id->nn);
kfree(id);

+ scan_state.ctrl = ctrl;
+ /*
+ * scan sequentially starting at NSID 1
+ */
+ atomic_set(&scan_state.count, 1);
+ scan_state.ns_list = NULL;
for (i = 1; i <= nn; i++)
- nvme_scan_ns(ctrl, i);
+ async_schedule_domain(nvme_scan_ns, &scan_state, &domain);
+ async_synchronize_full_domain(&domain);

nvme_remove_invalid_namespaces(ctrl, nn);
}
--
2.39.3



2024-01-22 09:24:11

by Sagi Grimberg

[permalink] [raw]
Subject: Re: [PATCH v2] nvme_core: scan namespaces asynchronously

Resending... didn't make it to the list, probably smtp issues....

On 1/22/24 11:13, Sagi Grimberg wrote:
>
>
> On 1/18/24 23:03, Stuart Hayes wrote:
>> Use async function calls to make namespace scanning happen in parallel.
>>
>> Without the patch, NVME namespaces are scanned serially, so it can take a
>> long time for all of a controller's namespaces to become available,
>> especially with a slower (TCP) interface with large number of namespaces.
>>
>> The time it took for all namespaces to show up after connecting (via TCP)
>> to a controller with 1002 namespaces was measured:
>>
>> network latency   without patch   with patch
>>       0                 6s            1s
>>      50ms             210s           10s
>>     100ms             417s           18s
>>
>
> Impressive speedup. Not a very common use-case though...
>
>> Signed-off-by: Stuart Hayes <[email protected]>
>>
>> --
>> V2: remove module param to enable/disable async scanning
>>      add scan time measurements to commit message
>>
>> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
>> index 0af612387083..069350f85b83 100644
>> --- a/drivers/nvme/host/core.c
>> +++ b/drivers/nvme/host/core.c
>> @@ -4,6 +4,7 @@
>>    * Copyright (c) 2011-2014, Intel Corporation.
>>    */
>> +#include <linux/async.h>
>>   #include <linux/blkdev.h>
>>   #include <linux/blk-mq.h>
>>   #include <linux/blk-integrity.h>
>> @@ -3812,12 +3813,38 @@ static void nvme_validate_ns(struct nvme_ns
>> *ns, struct nvme_ns_info *info)
>>           nvme_ns_remove(ns);
>>   }
>> -static void nvme_scan_ns(struct nvme_ctrl *ctrl, unsigned nsid)
>> +/*
>> + * struct nvme_scan_state - keeps track of controller & NSIDs to scan
>> + * @ctrl:    Controller on which namespaces are being scanned
>> + * @count:    Next NSID to scan (for sequential scan), or
>> + *        Index of next NSID to scan in ns_list (for list scan)
>> + * @ns_list:    pointer to list of NSIDs to scan (NULL if sequential
>> scan)
>> + */
>> +struct nvme_scan_state {
>> +    struct nvme_ctrl *ctrl;
>> +    atomic_t count;
>> +    __le32 *ns_list;
>> +};
>> +
>> +static void nvme_scan_ns(void *data, async_cookie_t cookie)
>
> I think its better to call it nvme_scan_ns_async to indicate what
> it is.
>
>>   {
>> -    struct nvme_ns_info info = { .nsid = nsid };
>> +    struct nvme_ns_info info = {};
>> +    struct nvme_scan_state *scan_state;
>> +    struct nvme_ctrl *ctrl;
>> +    u32 nsid;
>>       struct nvme_ns *ns;
>>       int ret;
>> +    scan_state = data;
>> +    ctrl = scan_state->ctrl;
>
> I think these assignments can be done on the declaration.
>
>> +    nsid = (u32)atomic_fetch_add(1, &scan_state->count);
>> +    /*
>> +     * get NSID from list (if scanning from a list, not sequentially)
>> +     */
>> +    if (scan_state->ns_list)
>> +        nsid = le32_to_cpu(scan_state->ns_list[nsid]);
>> +
>
> This is awkward. ns_list passed in optionally.
> How about we limit this change to only operate on nvme_scan_ns_list?
> If the controller is old or quirked to support only a sequential scan
> it does not benefit from a parallel scan. I doubt that these controllers
> are likely to expose a large number of namespaces anyways.
>
>> +    info.nsid = nsid;
>>       if (nvme_identify_ns_descs(ctrl, &info))
>>           return;
>> @@ -3881,11 +3908,15 @@ static int nvme_scan_ns_list(struct nvme_ctrl
>> *ctrl)
>>       __le32 *ns_list;
>>       u32 prev = 0;
>>       int ret = 0, i;
>> +    ASYNC_DOMAIN(domain);
>> +    struct nvme_scan_state scan_state;
>>       ns_list = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL);
>>       if (!ns_list)
>>           return -ENOMEM;
>> +    scan_state.ctrl = ctrl;
>> +    scan_state.ns_list = ns_list;
>
> Is there a need to have a local ns_list variable here?
>
>>       for (;;) {
>>           struct nvme_command cmd = {
>>               .identify.opcode    = nvme_admin_identify,
>> @@ -3901,19 +3932,25 @@ static int nvme_scan_ns_list(struct nvme_ctrl
>> *ctrl)
>>               goto free;
>>           }
>> +        /*
>> +         * scan list starting at list offset 0
>> +         */
>> +        atomic_set(&scan_state.count, 0);
>>           for (i = 0; i < nr_entries; i++) {
>>               u32 nsid = le32_to_cpu(ns_list[i]);
>>               if (!nsid)    /* end of the list? */
>>                   goto out;
>> -            nvme_scan_ns(ctrl, nsid);
>> +            async_schedule_domain(nvme_scan_ns, &scan_state, &domain);
>>               while (++prev < nsid)
>>                   nvme_ns_remove_by_nsid(ctrl, prev);
>>           }
>> +        async_synchronize_full_domain(&domain);
>>       }
>>    out:
>>       nvme_remove_invalid_namespaces(ctrl, prev);
>
> Is it a good idea to remove the invalid namespaces before synchronizing
> the async scans?
>
>>    free:
>> +    async_synchronize_full_domain(&domain);
>>       kfree(ns_list);
>>       return ret;
>>   }
>> @@ -3922,14 +3959,23 @@ static void nvme_scan_ns_sequential(struct
>> nvme_ctrl *ctrl)
>>   {
>>       struct nvme_id_ctrl *id;
>>       u32 nn, i;
>> +    ASYNC_DOMAIN(domain);
>> +    struct nvme_scan_state scan_state;
>>       if (nvme_identify_ctrl(ctrl, &id))
>>           return;
>>       nn = le32_to_cpu(id->nn);
>>       kfree(id);
>> +    scan_state.ctrl = ctrl;
>> +    /*
>> +     * scan sequentially starting at NSID 1
>> +     */
>> +    atomic_set(&scan_state.count, 1);
>> +    scan_state.ns_list = NULL;
>>       for (i = 1; i <= nn; i++)
>> -        nvme_scan_ns(ctrl, i);
>> +        async_schedule_domain(nvme_scan_ns, &scan_state, &domain);
>> +    async_synchronize_full_domain(&domain);
>>       nvme_remove_invalid_namespaces(ctrl, nn);
>>   }
>
> I think we need a blktest for this. ns scanning has been notorious when
> running simultaneously with controller reset/reconnect/remove
> sequences... Ideally a test with a larger number of namespaces to
> exercise the code.
>
> Also, make sure that blktest suite does not complain about anything
> else.

2024-01-22 14:26:19

by Sagi Grimberg

[permalink] [raw]
Subject: Re: [PATCH v2] nvme_core: scan namespaces asynchronously



On 1/18/24 23:03, Stuart Hayes wrote:
> Use async function calls to make namespace scanning happen in parallel.
>
> Without the patch, NVME namespaces are scanned serially, so it can take a
> long time for all of a controller's namespaces to become available,
> especially with a slower (TCP) interface with large number of namespaces.
>
> The time it took for all namespaces to show up after connecting (via TCP)
> to a controller with 1002 namespaces was measured:
>
> network latency without patch with patch
> 0 6s 1s
> 50ms 210s 10s
> 100ms 417s 18s
>

Impressive speedup. Not a very common use-case though...

> Signed-off-by: Stuart Hayes <[email protected]>
>
> --
> V2: remove module param to enable/disable async scanning
> add scan time measurements to commit message
>
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 0af612387083..069350f85b83 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -4,6 +4,7 @@
> * Copyright (c) 2011-2014, Intel Corporation.
> */
>
> +#include <linux/async.h>
> #include <linux/blkdev.h>
> #include <linux/blk-mq.h>
> #include <linux/blk-integrity.h>
> @@ -3812,12 +3813,38 @@ static void nvme_validate_ns(struct nvme_ns *ns, struct nvme_ns_info *info)
> nvme_ns_remove(ns);
> }
>
> -static void nvme_scan_ns(struct nvme_ctrl *ctrl, unsigned nsid)
> +/*
> + * struct nvme_scan_state - keeps track of controller & NSIDs to scan
> + * @ctrl: Controller on which namespaces are being scanned
> + * @count: Next NSID to scan (for sequential scan), or
> + * Index of next NSID to scan in ns_list (for list scan)
> + * @ns_list: pointer to list of NSIDs to scan (NULL if sequential scan)
> + */
> +struct nvme_scan_state {
> + struct nvme_ctrl *ctrl;
> + atomic_t count;
> + __le32 *ns_list;
> +};
> +
> +static void nvme_scan_ns(void *data, async_cookie_t cookie)

I think its better to call it nvme_scan_ns_async to indicate what
it is.

> {
> - struct nvme_ns_info info = { .nsid = nsid };
> + struct nvme_ns_info info = {};
> + struct nvme_scan_state *scan_state;
> + struct nvme_ctrl *ctrl;
> + u32 nsid;
> struct nvme_ns *ns;
> int ret;
>
> + scan_state = data;
> + ctrl = scan_state->ctrl;

I think these assignments can be done on the declaration.

> + nsid = (u32)atomic_fetch_add(1, &scan_state->count);
> + /*
> + * get NSID from list (if scanning from a list, not sequentially)
> + */
> + if (scan_state->ns_list)
> + nsid = le32_to_cpu(scan_state->ns_list[nsid]);
> +

This is awkward. ns_list passed in optionally.
How about we limit this change to only operate on nvme_scan_ns_list?
If the controller is old or quirked to support only a sequential scan
it does not benefit from a parallel scan. I doubt that these controllers
are likely to expose a large number of namespaces anyways.

> + info.nsid = nsid;
> if (nvme_identify_ns_descs(ctrl, &info))
> return;
>
> @@ -3881,11 +3908,15 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl)
> __le32 *ns_list;
> u32 prev = 0;
> int ret = 0, i;
> + ASYNC_DOMAIN(domain);
> + struct nvme_scan_state scan_state;
>
> ns_list = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL);
> if (!ns_list)
> return -ENOMEM;
>
> + scan_state.ctrl = ctrl;
> + scan_state.ns_list = ns_list;

Is there a need to have a local ns_list variable here?

> for (;;) {
> struct nvme_command cmd = {
> .identify.opcode = nvme_admin_identify,
> @@ -3901,19 +3932,25 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl)
> goto free;
> }
>
> + /*
> + * scan list starting at list offset 0
> + */
> + atomic_set(&scan_state.count, 0);
> for (i = 0; i < nr_entries; i++) {
> u32 nsid = le32_to_cpu(ns_list[i]);
>
> if (!nsid) /* end of the list? */
> goto out;
> - nvme_scan_ns(ctrl, nsid);
> + async_schedule_domain(nvme_scan_ns, &scan_state, &domain);
> while (++prev < nsid)
> nvme_ns_remove_by_nsid(ctrl, prev);
> }
> + async_synchronize_full_domain(&domain);
> }
> out:
> nvme_remove_invalid_namespaces(ctrl, prev);

Is it a good idea to remove the invalid namespaces before synchronizing
the async scans?

> free:
> + async_synchronize_full_domain(&domain);
> kfree(ns_list);
> return ret;
> }
> @@ -3922,14 +3959,23 @@ static void nvme_scan_ns_sequential(struct nvme_ctrl *ctrl)
> {
> struct nvme_id_ctrl *id;
> u32 nn, i;
> + ASYNC_DOMAIN(domain);
> + struct nvme_scan_state scan_state;
>
> if (nvme_identify_ctrl(ctrl, &id))
> return;
> nn = le32_to_cpu(id->nn);
> kfree(id);
>
> + scan_state.ctrl = ctrl;
> + /*
> + * scan sequentially starting at NSID 1
> + */
> + atomic_set(&scan_state.count, 1);
> + scan_state.ns_list = NULL;
> for (i = 1; i <= nn; i++)
> - nvme_scan_ns(ctrl, i);
> + async_schedule_domain(nvme_scan_ns, &scan_state, &domain);
> + async_synchronize_full_domain(&domain);
>
> nvme_remove_invalid_namespaces(ctrl, nn);
> }

I think we need a blktest for this. ns scanning has been notorious when
running simultaneously with controller reset/reconnect/remove
sequences... Ideally a test with a larger number of namespaces to
exercise the code.

Also, make sure that blktest suite does not complain about anything
else.

2024-01-23 16:44:22

by Keith Busch

[permalink] [raw]
Subject: Re: [PATCH v2] nvme_core: scan namespaces asynchronously

On Mon, Jan 22, 2024 at 11:13:15AM +0200, Sagi Grimberg wrote:
> On 1/18/24 23:03, Stuart Hayes wrote:
> > @@ -3901,19 +3932,25 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl)
> > goto free;
> > }
> > + /*
> > + * scan list starting at list offset 0
> > + */
> > + atomic_set(&scan_state.count, 0);
> > for (i = 0; i < nr_entries; i++) {
> > u32 nsid = le32_to_cpu(ns_list[i]);
> > if (!nsid) /* end of the list? */
> > goto out;
> > - nvme_scan_ns(ctrl, nsid);
> > + async_schedule_domain(nvme_scan_ns, &scan_state, &domain);
> > while (++prev < nsid)
> > nvme_ns_remove_by_nsid(ctrl, prev);
> > }
> > + async_synchronize_full_domain(&domain);

You mentioned async scanning was an improvement if you have 1000
namespaces, but wouldn't this be worse if you have very few namespaces?
IOW, the decision to use the async schedule should be based on
nr_entries, right?

2024-01-23 20:33:39

by Chaitanya Kulkarni

[permalink] [raw]
Subject: Re: [PATCH v2] nvme_core: scan namespaces asynchronously

On 1/23/2024 8:37 AM, Keith Busch wrote:
> On Mon, Jan 22, 2024 at 11:13:15AM +0200, Sagi Grimberg wrote:
>> On 1/18/24 23:03, Stuart Hayes wrote:
>>> @@ -3901,19 +3932,25 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl)
>>> goto free;
>>> }
>>> + /*
>>> + * scan list starting at list offset 0
>>> + */
>>> + atomic_set(&scan_state.count, 0);
>>> for (i = 0; i < nr_entries; i++) {
>>> u32 nsid = le32_to_cpu(ns_list[i]);
>>> if (!nsid) /* end of the list? */
>>> goto out;
>>> - nvme_scan_ns(ctrl, nsid);
>>> + async_schedule_domain(nvme_scan_ns, &scan_state, &domain);
>>> while (++prev < nsid)
>>> nvme_ns_remove_by_nsid(ctrl, prev);
>>> }
>>> + async_synchronize_full_domain(&domain);
>
> You mentioned async scanning was an improvement if you have 1000
> namespaces, but wouldn't this be worse if you have very few namespaces?
> IOW, the decision to use the async schedule should be based on
> nr_entries, right?
>

Perhaps it's also helpful to documents the data for small number of
namespaces, we can think of collecting data something like this:-

NR Namespaces Seq Scan Async Scan
2
4
8
16
32
64
128
256
512
1024

If we find that difference is not that much then we can go ahead with
this patch, if it the difference is not acceptable to the point that it
will regress for common setups then we can make it configurable ?

-ck


2024-01-24 19:38:48

by Stuart Hayes

[permalink] [raw]
Subject: Re: [PATCH v2] nvme_core: scan namespaces asynchronously



On 1/22/2024 3:13 AM, Sagi Grimberg wrote:
>
>
> On 1/18/24 23:03, Stuart Hayes wrote:
>> Use async function calls to make namespace scanning happen in parallel.
>>
>> Without the patch, NVME namespaces are scanned serially, so it can take a
>> long time for all of a controller's namespaces to become available,
>> especially with a slower (TCP) interface with large number of namespaces.
>>
>> The time it took for all namespaces to show up after connecting (via TCP)
>> to a controller with 1002 namespaces was measured:
>>
>> network latency   without patch   with patch
>>       0                 6s            1s
>>      50ms             210s           10s
>>     100ms             417s           18s
>>
>
> Impressive speedup. Not a very common use-case though...
>
>> Signed-off-by: Stuart Hayes <[email protected]>
>>
>> --
>> V2: remove module param to enable/disable async scanning
>>      add scan time measurements to commit message
>>
>> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
>> index 0af612387083..069350f85b83 100644
>> --- a/drivers/nvme/host/core.c
>> +++ b/drivers/nvme/host/core.c
>> @@ -4,6 +4,7 @@
>>    * Copyright (c) 2011-2014, Intel Corporation.
>>    */
>> +#include <linux/async.h>
>>   #include <linux/blkdev.h>
>>   #include <linux/blk-mq.h>
>>   #include <linux/blk-integrity.h>
>> @@ -3812,12 +3813,38 @@ static void nvme_validate_ns(struct nvme_ns *ns, struct nvme_ns_info *info)
>>           nvme_ns_remove(ns);
>>   }
>> -static void nvme_scan_ns(struct nvme_ctrl *ctrl, unsigned nsid)
>> +/*
>> + * struct nvme_scan_state - keeps track of controller & NSIDs to scan
>> + * @ctrl:    Controller on which namespaces are being scanned
>> + * @count:    Next NSID to scan (for sequential scan), or
>> + *        Index of next NSID to scan in ns_list (for list scan)
>> + * @ns_list:    pointer to list of NSIDs to scan (NULL if sequential scan)
>> + */
>> +struct nvme_scan_state {
>> +    struct nvme_ctrl *ctrl;
>> +    atomic_t count;
>> +    __le32 *ns_list;
>> +};
>> +
>> +static void nvme_scan_ns(void *data, async_cookie_t cookie)
>
> I think its better to call it nvme_scan_ns_async to indicate what
> it is.
>
>>   {
>> -    struct nvme_ns_info info = { .nsid = nsid };
>> +    struct nvme_ns_info info = {};
>> +    struct nvme_scan_state *scan_state;
>> +    struct nvme_ctrl *ctrl;
>> +    u32 nsid;
>>       struct nvme_ns *ns;
>>       int ret;
>> +    scan_state = data;
>> +    ctrl = scan_state->ctrl;
>
> I think these assignments can be done on the declaration.
>
>> +    nsid = (u32)atomic_fetch_add(1, &scan_state->count);
>> +    /*
>> +     * get NSID from list (if scanning from a list, not sequentially)
>> +     */
>> +    if (scan_state->ns_list)
>> +        nsid = le32_to_cpu(scan_state->ns_list[nsid]);
>> +
>
> This is awkward. ns_list passed in optionally.
> How about we limit this change to only operate on nvme_scan_ns_list?
> If the controller is old or quirked to support only a sequential scan
> it does not benefit from a parallel scan. I doubt that these controllers
> are likely to expose a large number of namespaces anyways.
>
>> +    info.nsid = nsid;
>>       if (nvme_identify_ns_descs(ctrl, &info))
>>           return;
>> @@ -3881,11 +3908,15 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl)
>>       __le32 *ns_list;
>>       u32 prev = 0;
>>       int ret = 0, i;
>> +    ASYNC_DOMAIN(domain);
>> +    struct nvme_scan_state scan_state;
>>       ns_list = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL);
>>       if (!ns_list)
>>           return -ENOMEM;
>> +    scan_state.ctrl = ctrl;
>> +    scan_state.ns_list = ns_list;
>
> Is there a need to have a local ns_list variable here?
>
>>       for (;;) {
>>           struct nvme_command cmd = {
>>               .identify.opcode    = nvme_admin_identify,
>> @@ -3901,19 +3932,25 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl)
>>               goto free;
>>           }
>> +        /*
>> +         * scan list starting at list offset 0
>> +         */
>> +        atomic_set(&scan_state.count, 0);
>>           for (i = 0; i < nr_entries; i++) {
>>               u32 nsid = le32_to_cpu(ns_list[i]);
>>               if (!nsid)    /* end of the list? */
>>                   goto out;
>> -            nvme_scan_ns(ctrl, nsid);
>> +            async_schedule_domain(nvme_scan_ns, &scan_state, &domain);
>>               while (++prev < nsid)
>>                   nvme_ns_remove_by_nsid(ctrl, prev);
>>           }
>> +        async_synchronize_full_domain(&domain);
>>       }
>>    out:
>>       nvme_remove_invalid_namespaces(ctrl, prev);
>
> Is it a good idea to remove the invalid namespaces before synchronizing
> the async scans?
>
>>    free:
>> +    async_synchronize_full_domain(&domain);
>>       kfree(ns_list);
>>       return ret;
>>   }
>> @@ -3922,14 +3959,23 @@ static void nvme_scan_ns_sequential(struct nvme_ctrl *ctrl)
>>   {
>>       struct nvme_id_ctrl *id;
>>       u32 nn, i;
>> +    ASYNC_DOMAIN(domain);
>> +    struct nvme_scan_state scan_state;
>>       if (nvme_identify_ctrl(ctrl, &id))
>>           return;
>>       nn = le32_to_cpu(id->nn);
>>       kfree(id);
>> +    scan_state.ctrl = ctrl;
>> +    /*
>> +     * scan sequentially starting at NSID 1
>> +     */
>> +    atomic_set(&scan_state.count, 1);
>> +    scan_state.ns_list = NULL;
>>       for (i = 1; i <= nn; i++)
>> -        nvme_scan_ns(ctrl, i);
>> +        async_schedule_domain(nvme_scan_ns, &scan_state, &domain);
>> +    async_synchronize_full_domain(&domain);
>>       nvme_remove_invalid_namespaces(ctrl, nn);
>>   }
>
> I think we need a blktest for this. ns scanning has been notorious when
> running simultaneously with controller reset/reconnect/remove
> sequences... Ideally a test with a larger number of namespaces to
> exercise the code.
>
> Also, make sure that blktest suite does not complain about anything
> else.

Thank you for the feedback on the patch, I agree with it.

I'm not sure how to implement a blktest suite for this, though. I can look into it.

2024-01-24 19:55:22

by Stuart Hayes

[permalink] [raw]
Subject: Re: [PATCH v2] nvme_core: scan namespaces asynchronously



On 1/23/2024 2:21 PM, Chaitanya Kulkarni wrote:
> On 1/23/2024 8:37 AM, Keith Busch wrote:
>> On Mon, Jan 22, 2024 at 11:13:15AM +0200, Sagi Grimberg wrote:
>>> On 1/18/24 23:03, Stuart Hayes wrote:
>>>> @@ -3901,19 +3932,25 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl)
>>>> goto free;
>>>> }
>>>> + /*
>>>> + * scan list starting at list offset 0
>>>> + */
>>>> + atomic_set(&scan_state.count, 0);
>>>> for (i = 0; i < nr_entries; i++) {
>>>> u32 nsid = le32_to_cpu(ns_list[i]);
>>>> if (!nsid) /* end of the list? */
>>>> goto out;
>>>> - nvme_scan_ns(ctrl, nsid);
>>>> + async_schedule_domain(nvme_scan_ns, &scan_state, &domain);
>>>> while (++prev < nsid)
>>>> nvme_ns_remove_by_nsid(ctrl, prev);
>>>> }
>>>> + async_synchronize_full_domain(&domain);
>>
>> You mentioned async scanning was an improvement if you have 1000
>> namespaces, but wouldn't this be worse if you have very few namespaces?
>> IOW, the decision to use the async schedule should be based on
>> nr_entries, right?
>>
>
> Perhaps it's also helpful to documents the data for small number of
> namespaces, we can think of collecting data something like this:-
>
> NR Namespaces Seq Scan Async Scan
> 2
> 4
> 8
> 16
> 32
> 64
> 128
> 256
> 512
> 1024
>
> If we find that difference is not that much then we can go ahead with
> this patch, if it the difference is not acceptable to the point that it
> will regress for common setups then we can make it configurable ?
>
> -ck
>
>
I believe the only reason the async scanning should take any longer than
sync scanning is that nvme_scan_ns() has to wait on the workqueue until it
is scheduled.

Testing on my system (with pcie nvme devices with a single namespace), it
looks like it only takes a fraction of a ms (100us or less typically) for
that to happen. Then it takes 6-10ms or more for the actual namesapce scan.

So scanning asynchronously, even using a local pcie device with a single
namespace, doesn't take significantly longer. Of course I guess it might
take a bit longer on a busy system, but I wouldn't think that scanning
namespaces is a critical path where a few milliseconds would make much
difference (?). It wouldn't be too hard to make it scan synchronously if
there aren't more than, say, a couple namespaces, but my opinion is that
the minimal benefit wouldn't be worth the extra code.