The use of kmap() and kmap_atomic() are being deprecated in favor of
kmap_local_page().
There are two main problems with kmap(): (1) It comes with an overhead as
the mapping space is restricted and protected by a global lock for
synchronization and (2) it also requires global TLB invalidation when the
kmap’s pool wraps and it might block when the mapping space is fully
utilized until a slot becomes available.
With kmap_local_page() the mappings are per thread, CPU local, can take
page faults, and can be called from any context (including interrupts).
It is faster than kmap() in kernels with HIGHMEM enabled. Furthermore,
the tasks can be preempted and, when they are scheduled to run again, the
kernel virtual addresses are restored and still valid.
The use of kmap_local_page() in fs/aio.c is "safe" in the sense that the
code don't hands the returned kernel virtual addresses to other threads
and there are no nestings which should be handled with the stack based
(LIFO) mappings/un-mappings order. Furthermore, the code between the old
kmap_atomic()/kunmap_atomic() did not depend on disabling page-faults
and/or preemption, so that there is no need to call pagefault_disable()
and/or preempt_disable() before the mappings.
Therefore, replace kmap() and kmap_atomic() with kmap_local_page() in
fs/aio.c.
Tested with xfstests on a QEMU/KVM x86_32 VM, 6GB RAM, booting a kernel
with HIGHMEM64GB enabled.
Cc: "Venkataramanan, Anirudh" <[email protected]>
Suggested-by: Ira Weiny <[email protected]>
Reviewed-by: Ira Weiny <[email protected]>
Reviewed-by: Jeff Moyer <[email protected]>
Signed-off-by: Fabio M. De Francesco <[email protected]>
---
I've tested with "./check -g aio". The tests in this group fail 3/26
times, with and without my patch. Therefore, these changes don't introduce
further errors. I'm not aware of any other tests which I may run, so that
any suggestions would be precious and much appreciated :-)
I'm resending this patch because some recipients were missing in the
previous submissions. In the meantime I'm also adding some more information
in the commit message. There are no changes in the code.
Changes from v1:
Add further information in the commit message, and the
"Reviewed-by" tags from Ira and Jeff (thanks!).
fs/aio.c | 32 ++++++++++++++++----------------
1 file changed, 16 insertions(+), 16 deletions(-)
diff --git a/fs/aio.c b/fs/aio.c
index 3c249b938632..343fea0c6d1a 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -567,7 +567,7 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
ctx->user_id = ctx->mmap_base;
ctx->nr_events = nr_events; /* trusted copy */
- ring = kmap_atomic(ctx->ring_pages[0]);
+ ring = kmap_local_page(ctx->ring_pages[0]);
ring->nr = nr_events; /* user copy */
ring->id = ~0U;
ring->head = ring->tail = 0;
@@ -575,7 +575,7 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
ring->compat_features = AIO_RING_COMPAT_FEATURES;
ring->incompat_features = AIO_RING_INCOMPAT_FEATURES;
ring->header_length = sizeof(struct aio_ring);
- kunmap_atomic(ring);
+ kunmap_local(ring);
flush_dcache_page(ctx->ring_pages[0]);
return 0;
@@ -678,9 +678,9 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
* we are protected from page migration
* changes ring_pages by ->ring_lock.
*/
- ring = kmap_atomic(ctx->ring_pages[0]);
+ ring = kmap_local_page(ctx->ring_pages[0]);
ring->id = ctx->id;
- kunmap_atomic(ring);
+ kunmap_local(ring);
return 0;
}
@@ -1024,9 +1024,9 @@ static void user_refill_reqs_available(struct kioctx *ctx)
* against ctx->completed_events below will make sure we do the
* safe/right thing.
*/
- ring = kmap_atomic(ctx->ring_pages[0]);
+ ring = kmap_local_page(ctx->ring_pages[0]);
head = ring->head;
- kunmap_atomic(ring);
+ kunmap_local(ring);
refill_reqs_available(ctx, head, ctx->tail);
}
@@ -1132,12 +1132,12 @@ static void aio_complete(struct aio_kiocb *iocb)
if (++tail >= ctx->nr_events)
tail = 0;
- ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
+ ev_page = kmap_local_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
event = ev_page + pos % AIO_EVENTS_PER_PAGE;
*event = iocb->ki_res;
- kunmap_atomic(ev_page);
+ kunmap_local(ev_page);
flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
pr_debug("%p[%u]: %p: %p %Lx %Lx %Lx\n", ctx, tail, iocb,
@@ -1151,10 +1151,10 @@ static void aio_complete(struct aio_kiocb *iocb)
ctx->tail = tail;
- ring = kmap_atomic(ctx->ring_pages[0]);
+ ring = kmap_local_page(ctx->ring_pages[0]);
head = ring->head;
ring->tail = tail;
- kunmap_atomic(ring);
+ kunmap_local(ring);
flush_dcache_page(ctx->ring_pages[0]);
ctx->completed_events++;
@@ -1214,10 +1214,10 @@ static long aio_read_events_ring(struct kioctx *ctx,
mutex_lock(&ctx->ring_lock);
/* Access to ->ring_pages here is protected by ctx->ring_lock. */
- ring = kmap_atomic(ctx->ring_pages[0]);
+ ring = kmap_local_page(ctx->ring_pages[0]);
head = ring->head;
tail = ring->tail;
- kunmap_atomic(ring);
+ kunmap_local(ring);
/*
* Ensure that once we've read the current tail pointer, that
@@ -1249,10 +1249,10 @@ static long aio_read_events_ring(struct kioctx *ctx,
avail = min(avail, nr - ret);
avail = min_t(long, avail, AIO_EVENTS_PER_PAGE - pos);
- ev = kmap(page);
+ ev = kmap_local_page(page);
copy_ret = copy_to_user(event + ret, ev + pos,
sizeof(*ev) * avail);
- kunmap(page);
+ kunmap_local(ev);
if (unlikely(copy_ret)) {
ret = -EFAULT;
@@ -1264,9 +1264,9 @@ static long aio_read_events_ring(struct kioctx *ctx,
head %= ctx->nr_events;
}
- ring = kmap_atomic(ctx->ring_pages[0]);
+ ring = kmap_local_page(ctx->ring_pages[0]);
ring->head = head;
- kunmap_atomic(ring);
+ kunmap_local(ring);
flush_dcache_page(ctx->ring_pages[0]);
pr_debug("%li h%u t%u\n", ret, head, tail);
--
2.36.1
On Mon, Jan 09, 2023 at 06:56:29PM +0100, Fabio M. De Francesco wrote:
> - ring = kmap_atomic(ctx->ring_pages[0]);
> + ring = kmap_local_page(ctx->ring_pages[0]);
> ring->nr = nr_events; /* user copy */
> ring->id = ~0U;
> ring->head = ring->tail = 0;
> @@ -575,7 +575,7 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
> ring->compat_features = AIO_RING_COMPAT_FEATURES;
> ring->incompat_features = AIO_RING_INCOMPAT_FEATURES;
> ring->header_length = sizeof(struct aio_ring);
> - kunmap_atomic(ring);
> + kunmap_local(ring);
> flush_dcache_page(ctx->ring_pages[0]);
I wonder if it would be more readable as memcpy_to_page(), actually...
>
> return 0;
> @@ -678,9 +678,9 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
> * we are protected from page migration
> * changes ring_pages by ->ring_lock.
> */
> - ring = kmap_atomic(ctx->ring_pages[0]);
> + ring = kmap_local_page(ctx->ring_pages[0]);
> ring->id = ctx->id;
> - kunmap_atomic(ring);
> + kunmap_local(ring);
Incidentally, does it need flush_dcache_page()?
Hi, Al,
Al Viro <[email protected]> writes:
> On Mon, Jan 09, 2023 at 06:56:29PM +0100, Fabio M. De Francesco wrote:
>
>> - ring = kmap_atomic(ctx->ring_pages[0]);
>> + ring = kmap_local_page(ctx->ring_pages[0]);
>> ring->nr = nr_events; /* user copy */
>> ring->id = ~0U;
>> ring->head = ring->tail = 0;
>> @@ -575,7 +575,7 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
>> ring->compat_features = AIO_RING_COMPAT_FEATURES;
>> ring->incompat_features = AIO_RING_INCOMPAT_FEATURES;
>> ring->header_length = sizeof(struct aio_ring);
>> - kunmap_atomic(ring);
>> + kunmap_local(ring);
>> flush_dcache_page(ctx->ring_pages[0]);
>
> I wonder if it would be more readable as memcpy_to_page(), actually...
I'm not sure I understand what you're suggesting.
>>
>> return 0;
>> @@ -678,9 +678,9 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
>> * we are protected from page migration
>> * changes ring_pages by ->ring_lock.
>> */
>> - ring = kmap_atomic(ctx->ring_pages[0]);
>> + ring = kmap_local_page(ctx->ring_pages[0]);
>> ring->id = ctx->id;
>> - kunmap_atomic(ring);
>> + kunmap_local(ring);
>
> Incidentally, does it need flush_dcache_page()?
Yes, good catch.
Cheers,
Jeff
Al Viro <[email protected]> writes:
> On Wed, Jan 11, 2023 at 09:13:40AM -0500, Jeff Moyer wrote:
>> Hi, Al,
>>
>> Al Viro <[email protected]> writes:
>>
>> > On Mon, Jan 09, 2023 at 06:56:29PM +0100, Fabio M. De Francesco wrote:
>> >
>> >> - ring = kmap_atomic(ctx->ring_pages[0]);
>> >> + ring = kmap_local_page(ctx->ring_pages[0]);
>> >> ring->nr = nr_events; /* user copy */
>> >> ring->id = ~0U;
>> >> ring->head = ring->tail = 0;
>> >> @@ -575,7 +575,7 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
>> >> ring->compat_features = AIO_RING_COMPAT_FEATURES;
>> >> ring->incompat_features = AIO_RING_INCOMPAT_FEATURES;
>> >> ring->header_length = sizeof(struct aio_ring);
>> >> - kunmap_atomic(ring);
>> >> + kunmap_local(ring);
>> >> flush_dcache_page(ctx->ring_pages[0]);
>> >
>> > I wonder if it would be more readable as memcpy_to_page(), actually...
>>
>> I'm not sure I understand what you're suggesting.
>
> memcpy_to_page(ctx->ring_pages[0], 0, &(struct aio_ring){
> .nr = nr_events, .id = ~0U, .magic = AIO_RING_MAGIC,
> .compat_features = AIO_RING_COMPAT_FEATURES,
> .in_compat_features = AIO_RING_INCOMPAT_FEATURES,
> .header_length = sizeof(struct aio_ring)},
> sizeof(struct aio_ring));
>
> instead of the lines from kmap_atomic to flush_dcache_page...
Thanks for spelling it out. I guess it's a matter of opinion, but I
don't find that more readable.
Cheers,
Jeff
On Wed, Jan 11, 2023 at 04:02:26PM +0000, Al Viro wrote:
> On Wed, Jan 11, 2023 at 09:13:40AM -0500, Jeff Moyer wrote:
> > Hi, Al,
> >
> > Al Viro <[email protected]> writes:
> >
> > > On Mon, Jan 09, 2023 at 06:56:29PM +0100, Fabio M. De Francesco wrote:
> > >
> > >> - ring = kmap_atomic(ctx->ring_pages[0]);
> > >> + ring = kmap_local_page(ctx->ring_pages[0]);
> > >> ring->nr = nr_events; /* user copy */
> > >> ring->id = ~0U;
> > >> ring->head = ring->tail = 0;
> > >> @@ -575,7 +575,7 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
> > >> ring->compat_features = AIO_RING_COMPAT_FEATURES;
> > >> ring->incompat_features = AIO_RING_INCOMPAT_FEATURES;
> > >> ring->header_length = sizeof(struct aio_ring);
> > >> - kunmap_atomic(ring);
> > >> + kunmap_local(ring);
> > >> flush_dcache_page(ctx->ring_pages[0]);
> > >
> > > I wonder if it would be more readable as memcpy_to_page(), actually...
> >
> > I'm not sure I understand what you're suggesting.
>
> memcpy_to_page(ctx->ring_pages[0], 0, &(struct aio_ring){
> .nr = nr_events, .id = ~0U, .magic = AIO_RING_MAGIC,
> .compat_features = AIO_RING_COMPAT_FEATURES,
> .in_compat_features = AIO_RING_INCOMPAT_FEATURES,
> .header_length = sizeof(struct aio_ring)},
> sizeof(struct aio_ring));
>
> instead of the lines from kmap_atomic to flush_dcache_page...
For us mere mortals I think this may parse easier as:
struct aio_ring r;
...
r = (struct aio_ring) {
.nr = nr_events,
.id = ~0U,
.magic = AIO_RING_MAGIC,
.compat_features = AIO_RING_COMPAT_FEATURES,
.incompat_features = AIO_RING_INCOMPAT_FEATURES,
.header_length = sizeof(struct aio_ring),
};
memcpy_to_page(ctx->ring_pages[0], 0, &r, sizeof(r));
...
Is there any concern with the extra assignments to the stack only to copy into
the page? I guess this is not a critical path?
Ira
> >
> > >>
> > >> return 0;
> > >> @@ -678,9 +678,9 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
> > >> * we are protected from page migration
> > >> * changes ring_pages by ->ring_lock.
> > >> */
> > >> - ring = kmap_atomic(ctx->ring_pages[0]);
> > >> + ring = kmap_local_page(ctx->ring_pages[0]);
> > >> ring->id = ctx->id;
> > >> - kunmap_atomic(ring);
> > >> + kunmap_local(ring);
> > >
> > > Incidentally, does it need flush_dcache_page()?
> >
> > Yes, good catch.
On Wed, Jan 11, 2023 at 09:13:40AM -0500, Jeff Moyer wrote:
> Hi, Al,
>
> Al Viro <[email protected]> writes:
>
> > On Mon, Jan 09, 2023 at 06:56:29PM +0100, Fabio M. De Francesco wrote:
> >
> >> - ring = kmap_atomic(ctx->ring_pages[0]);
> >> + ring = kmap_local_page(ctx->ring_pages[0]);
> >> ring->nr = nr_events; /* user copy */
> >> ring->id = ~0U;
> >> ring->head = ring->tail = 0;
> >> @@ -575,7 +575,7 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
> >> ring->compat_features = AIO_RING_COMPAT_FEATURES;
> >> ring->incompat_features = AIO_RING_INCOMPAT_FEATURES;
> >> ring->header_length = sizeof(struct aio_ring);
> >> - kunmap_atomic(ring);
> >> + kunmap_local(ring);
> >> flush_dcache_page(ctx->ring_pages[0]);
> >
> > I wonder if it would be more readable as memcpy_to_page(), actually...
>
> I'm not sure I understand what you're suggesting.
memcpy_to_page(ctx->ring_pages[0], 0, &(struct aio_ring){
.nr = nr_events, .id = ~0U, .magic = AIO_RING_MAGIC,
.compat_features = AIO_RING_COMPAT_FEATURES,
.in_compat_features = AIO_RING_INCOMPAT_FEATURES,
.header_length = sizeof(struct aio_ring)},
sizeof(struct aio_ring));
instead of the lines from kmap_atomic to flush_dcache_page...
>
> >>
> >> return 0;
> >> @@ -678,9 +678,9 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
> >> * we are protected from page migration
> >> * changes ring_pages by ->ring_lock.
> >> */
> >> - ring = kmap_atomic(ctx->ring_pages[0]);
> >> + ring = kmap_local_page(ctx->ring_pages[0]);
> >> ring->id = ctx->id;
> >> - kunmap_atomic(ring);
> >> + kunmap_local(ring);
> >
> > Incidentally, does it need flush_dcache_page()?
>
> Yes, good catch.
On mercoled? 11 gennaio 2023 17:02:26 CET Al Viro wrote:
> On Wed, Jan 11, 2023 at 09:13:40AM -0500, Jeff Moyer wrote:
> > Hi, Al,
> >
> > Al Viro <[email protected]> writes:
> > > On Mon, Jan 09, 2023 at 06:56:29PM +0100, Fabio M. De Francesco wrote:
> > >> - ring = kmap_atomic(ctx->ring_pages[0]);
> > >> + ring = kmap_local_page(ctx->ring_pages[0]);
> > >>
> > >> ring->nr = nr_events; /* user copy */
> > >> ring->id = ~0U;
> > >> ring->head = ring->tail = 0;
> > >>
> > >> @@ -575,7 +575,7 @@ static int aio_setup_ring(struct kioctx *ctx,
> > >> unsigned int nr_events)> >>
> > >> ring->compat_features = AIO_RING_COMPAT_FEATURES;
> > >> ring->incompat_features = AIO_RING_INCOMPAT_FEATURES;
> > >> ring->header_length = sizeof(struct aio_ring);
> > >>
> > >> - kunmap_atomic(ring);
> > >> + kunmap_local(ring);
> > >>
> > >> flush_dcache_page(ctx->ring_pages[0]);
> > >
> > > I wonder if it would be more readable as memcpy_to_page(), actually...
> >
> > I'm not sure I understand what you're suggesting.
>
> memcpy_to_page(ctx->ring_pages[0], 0, &(struct aio_ring){
> .nr = nr_events, .id = ~0U, .magic =
AIO_RING_MAGIC,
> .compat_features = AIO_RING_COMPAT_FEATURES,
> .in_compat_features =
AIO_RING_INCOMPAT_FEATURES,
> .header_length = sizeof(struct aio_ring)},
> sizeof(struct aio_ring));
>
> instead of the lines from kmap_atomic to flush_dcache_page...
Actually, I'd prefer Ira's solution for readability, but I have nothing
against yours. I will send you the code in the way you rewrote it.
> > >> return 0;
> > >>
> > >> @@ -678,9 +678,9 @@ static int ioctx_add_table(struct kioctx *ctx,
struct
> > >> mm_struct *mm)> >>
> > >> * we are protected from
page migration
> > >> * changes ring_pages by -
>ring_lock.
> > >> */
> > >>
> > >> - ring = kmap_atomic(ctx-
>ring_pages[0]);
> > >> + ring = kmap_local_page(ctx-
>ring_pages[0]);
> > >>
> > >> ring->id = ctx->id;
> > >>
> > >> - kunmap_atomic(ring);
> > >> + kunmap_local(ring);
> > >
> > > Incidentally, does it need flush_dcache_page()?
> >
> > Yes, good catch.
Yes, I missed it :-(
However, with the use of memcpy_to_page() we no longer need that explicit call
to flush_dcache_page().
Thank you,
Fabio