2022-08-17 17:07:45

by Fabio M. De Francesco

[permalink] [raw]
Subject: [PATCH] nvmet-tcp: Don't kmap() pages which can't come from HIGHMEM

kmap() is being deprecated in favor of kmap_local_page().[1]

There are two main problems with kmap(): (1) It comes with an overhead as
mapping space is restricted and protected by a global lock for
synchronization and (2) it also requires global TLB invalidation when the
kmap’s pool wraps and it might block when the mapping space is fully
utilized until a slot becomes available.

The pages which will be mapped are allocated in nvmet_tcp_map_data(),
using the GFP_KERNEL flag. This assures that they cannot come from
HIGHMEM. This imply that a straight page_address() can replace the kmap()
of sg_page(sg) in nvmet_tcp_map_pdu_iovec(). As a side effect, we might
also delete the field "nr_mapped" from struct "nvmet_tcp_cmd" because,
after removing the kmap() calls, there would be no longer any need of it.

Therefore, replace the kmap() of sg_page(sg) with a page_address() and
delete the "nr_mapped" field from "nvmet_tcp_cmd".

[1] "[PATCH] checkpatch: Add kmap and kmap_atomic to the deprecated
list" https://lore.kernel.org/all/[email protected]/

Cc: Chaitanya Kulkarni <[email protected]>
Cc: Keith Busch <[email protected]>
Cc: Sagi Grimberg <[email protected]>
Suggested-by: Ira Weiny <[email protected]>
Signed-off-by: Fabio M. De Francesco <[email protected]>
---

Thanks so much to Chaitanya, Keith, Sagi, for the answers and the comments
on the RFC which gave birth to this patch. The RFC is at:
https://lore.kernel.org/all/[email protected]/

drivers/nvme/target/tcp.c | 26 ++------------------------
1 file changed, 2 insertions(+), 24 deletions(-)

diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index dc3b4dc8fe08..6ed44531de55 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -77,7 +77,6 @@ struct nvmet_tcp_cmd {
u32 pdu_len;
u32 pdu_recv;
int sg_idx;
- int nr_mapped;
struct msghdr recv_msg;
struct kvec *iov;
u32 flags;
@@ -167,7 +166,6 @@ static const struct nvmet_fabrics_ops nvmet_tcp_ops;
static void nvmet_tcp_free_cmd(struct nvmet_tcp_cmd *c);
static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd);
static void nvmet_tcp_free_cmd_buffers(struct nvmet_tcp_cmd *cmd);
-static void nvmet_tcp_unmap_pdu_iovec(struct nvmet_tcp_cmd *cmd);

static inline u16 nvmet_tcp_cmd_tag(struct nvmet_tcp_queue *queue,
struct nvmet_tcp_cmd *cmd)
@@ -301,27 +299,12 @@ static int nvmet_tcp_check_ddgst(struct nvmet_tcp_queue *queue, void *pdu)

static void nvmet_tcp_free_cmd_buffers(struct nvmet_tcp_cmd *cmd)
{
- WARN_ON(unlikely(cmd->nr_mapped > 0));
-
kfree(cmd->iov);
sgl_free(cmd->req.sg);
cmd->iov = NULL;
cmd->req.sg = NULL;
}

-static void nvmet_tcp_unmap_pdu_iovec(struct nvmet_tcp_cmd *cmd)
-{
- struct scatterlist *sg;
- int i;
-
- sg = &cmd->req.sg[cmd->sg_idx];
-
- for (i = 0; i < cmd->nr_mapped; i++)
- kunmap(sg_page(&sg[i]));
-
- cmd->nr_mapped = 0;
-}
-
static void nvmet_tcp_map_pdu_iovec(struct nvmet_tcp_cmd *cmd)
{
struct kvec *iov = cmd->iov;
@@ -329,7 +312,6 @@ static void nvmet_tcp_map_pdu_iovec(struct nvmet_tcp_cmd *cmd)
u32 length, offset, sg_offset;

length = cmd->pdu_len;
- cmd->nr_mapped = DIV_ROUND_UP(length, PAGE_SIZE);
offset = cmd->rbytes_done;
cmd->sg_idx = offset / PAGE_SIZE;
sg_offset = offset % PAGE_SIZE;
@@ -338,7 +320,7 @@ static void nvmet_tcp_map_pdu_iovec(struct nvmet_tcp_cmd *cmd)
while (length) {
u32 iov_len = min_t(u32, length, sg->length - sg_offset);

- iov->iov_base = kmap(sg_page(sg)) + sg->offset + sg_offset;
+ iov->iov_base = page_address(sg_page(sg)) + sg->offset + sg_offset;
iov->iov_len = iov_len;

length -= iov_len;
@@ -347,8 +329,7 @@ static void nvmet_tcp_map_pdu_iovec(struct nvmet_tcp_cmd *cmd)
sg_offset = 0;
}

- iov_iter_kvec(&cmd->recv_msg.msg_iter, READ, cmd->iov,
- cmd->nr_mapped, cmd->pdu_len);
+ iov_iter_kvec(&cmd->recv_msg.msg_iter, READ, cmd->iov, 0, cmd->pdu_len);
}

static void nvmet_tcp_fatal_error(struct nvmet_tcp_queue *queue)
@@ -1141,7 +1122,6 @@ static int nvmet_tcp_try_recv_data(struct nvmet_tcp_queue *queue)
cmd->rbytes_done += ret;
}

- nvmet_tcp_unmap_pdu_iovec(cmd);
if (queue->data_digest) {
nvmet_tcp_prep_recv_ddgst(cmd);
return 0;
@@ -1411,7 +1391,6 @@ static void nvmet_tcp_restore_socket_callbacks(struct nvmet_tcp_queue *queue)
static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd)
{
nvmet_req_uninit(&cmd->req);
- nvmet_tcp_unmap_pdu_iovec(cmd);
nvmet_tcp_free_cmd_buffers(cmd);
}

@@ -1424,7 +1403,6 @@ static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue)
if (nvmet_tcp_need_data_in(cmd))
nvmet_req_uninit(&cmd->req);

- nvmet_tcp_unmap_pdu_iovec(cmd);
nvmet_tcp_free_cmd_buffers(cmd);
}

--
2.37.1


2022-08-17 17:55:05

by Keith Busch

[permalink] [raw]
Subject: Re: [PATCH] nvmet-tcp: Don't kmap() pages which can't come from HIGHMEM

On Wed, Aug 17, 2022 at 07:04:27PM +0200, Fabio M. De Francesco wrote:
> @@ -329,7 +312,6 @@ static void nvmet_tcp_map_pdu_iovec(struct nvmet_tcp_cmd *cmd)
> u32 length, offset, sg_offset;
>
> length = cmd->pdu_len;
> - cmd->nr_mapped = DIV_ROUND_UP(length, PAGE_SIZE);
> offset = cmd->rbytes_done;
> cmd->sg_idx = offset / PAGE_SIZE;
> sg_offset = offset % PAGE_SIZE;
> @@ -338,7 +320,7 @@ static void nvmet_tcp_map_pdu_iovec(struct nvmet_tcp_cmd *cmd)
> while (length) {
> u32 iov_len = min_t(u32, length, sg->length - sg_offset);
>
> - iov->iov_base = kmap(sg_page(sg)) + sg->offset + sg_offset;
> + iov->iov_base = page_address(sg_page(sg)) + sg->offset + sg_offset;
> iov->iov_len = iov_len;
>
> length -= iov_len;
> @@ -347,8 +329,7 @@ static void nvmet_tcp_map_pdu_iovec(struct nvmet_tcp_cmd *cmd)
> sg_offset = 0;
> }
>
> - iov_iter_kvec(&cmd->recv_msg.msg_iter, READ, cmd->iov,
> - cmd->nr_mapped, cmd->pdu_len);
> + iov_iter_kvec(&cmd->recv_msg.msg_iter, READ, cmd->iov, 0, cmd->pdu_len);
> }

I earlier meant just use a local variable for 'nr_mapped' rather than replace
it with '0'. I don't think that 0 segments would result in usable kvec.

I'm not even sure the existing code is correct, though. The sg->length can be
higher order than a PAGE_SIZE, so it may be over-reporting nr_segs. It's just
supposed to be the number of initialized iov's.

Anway, the rest looks good.

2022-08-17 19:05:44

by Sagi Grimberg

[permalink] [raw]
Subject: Re: [PATCH] nvmet-tcp: Don't kmap() pages which can't come from HIGHMEM


>> @@ -329,7 +312,6 @@ static void nvmet_tcp_map_pdu_iovec(struct nvmet_tcp_cmd *cmd)
>> u32 length, offset, sg_offset;
>>
>> length = cmd->pdu_len;
>> - cmd->nr_mapped = DIV_ROUND_UP(length, PAGE_SIZE);
>> offset = cmd->rbytes_done;
>> cmd->sg_idx = offset / PAGE_SIZE;
>> sg_offset = offset % PAGE_SIZE;
>> @@ -338,7 +320,7 @@ static void nvmet_tcp_map_pdu_iovec(struct nvmet_tcp_cmd *cmd)
>> while (length) {
>> u32 iov_len = min_t(u32, length, sg->length - sg_offset);
>>
>> - iov->iov_base = kmap(sg_page(sg)) + sg->offset + sg_offset;
>> + iov->iov_base = page_address(sg_page(sg)) + sg->offset + sg_offset;
>> iov->iov_len = iov_len;
>>
>> length -= iov_len;
>> @@ -347,8 +329,7 @@ static void nvmet_tcp_map_pdu_iovec(struct nvmet_tcp_cmd *cmd)
>> sg_offset = 0;
>> }
>>
>> - iov_iter_kvec(&cmd->recv_msg.msg_iter, READ, cmd->iov,
>> - cmd->nr_mapped, cmd->pdu_len);
>> + iov_iter_kvec(&cmd->recv_msg.msg_iter, READ, cmd->iov, 0, cmd->pdu_len);
>> }
>
> I earlier meant just use a local variable for 'nr_mapped' rather than replace
> it with '0'. I don't think that 0 segments would result in usable kvec.

yes, that is wrong. Need a local int nr_mapped variable.

>
> I'm not even sure the existing code is correct, though. The sg->length can be
> higher order than a PAGE_SIZE, so it may be over-reporting nr_segs. It's just
> supposed to be the number of initialized iov's.

The sg is allocated with sgl_alloc so it is for sure 0-order.