2021-09-14 09:50:02

by Hamza Mahfooz

[permalink] [raw]
Subject: [PATCH] aio: convert active_reqs into a hashtable

Commit 833f4154ed56 ("aio: fold lookup_kiocb() into its sole caller")
suggests that, the fact that active_reqs is a linked-list means aio_kiocb
lookups in io_cancel() are inefficient. So, to get faster lookups (on
average) while maintaining similar insertion and deletion characteristics,
turn active_reqs into a hashtable.

Signed-off-by: Hamza Mahfooz <[email protected]>
---
fs/aio.c | 76 ++++++++++++++++++++++++++++++++++++--------------------
1 file changed, 49 insertions(+), 27 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index 51b08ab01dff..4d692d9d6fb9 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -116,6 +116,8 @@ struct kioctx {
*/
unsigned max_reqs;

+ unsigned int hash_bits;
+
/* Size of ringbuffer, in units of struct io_event */
unsigned nr_events;

@@ -146,7 +148,7 @@ struct kioctx {

struct {
spinlock_t ctx_lock;
- struct list_head active_reqs; /* used for cancellation */
+ struct hlist_head *active_reqs; /* used for cancellation */
} ____cacheline_aligned_in_smp;

struct {
@@ -206,7 +208,7 @@ struct aio_kiocb {

struct io_event ki_res;

- struct list_head ki_list; /* the aio core uses this
+ struct hlist_node ki_node; /* the aio core uses this
* for cancellation */
refcount_t ki_refcnt;

@@ -563,11 +565,13 @@ void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
struct kioctx *ctx = req->ki_ctx;
unsigned long flags;

- if (WARN_ON_ONCE(!list_empty(&req->ki_list)))
+ if (WARN_ON_ONCE(hash_hashed(&req->ki_node)))
return;

spin_lock_irqsave(&ctx->ctx_lock, flags);
- list_add_tail(&req->ki_list, &ctx->active_reqs);
+ hlist_add_head(&req->ki_node,
+ &ctx->active_reqs[hash_min(req->ki_res.obj,
+ ctx->hash_bits)]);
req->ki_cancel = cancel;
spin_unlock_irqrestore(&ctx->ctx_lock, flags);
}
@@ -584,6 +588,7 @@ static void free_ioctx(struct work_struct *work)
free_rwork);
pr_debug("freeing %p\n", ctx);

+ kfree(ctx->active_reqs);
aio_free_ring(ctx);
free_percpu(ctx->cpu);
percpu_ref_exit(&ctx->reqs);
@@ -611,16 +616,21 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
*/
static void free_ioctx_users(struct percpu_ref *ref)
{
+ int i;
struct kioctx *ctx = container_of(ref, struct kioctx, users);
struct aio_kiocb *req;
+ struct hlist_head *list;
+ struct hlist_node *node;

spin_lock_irq(&ctx->ctx_lock);

- while (!list_empty(&ctx->active_reqs)) {
- req = list_first_entry(&ctx->active_reqs,
- struct aio_kiocb, ki_list);
- req->ki_cancel(&req->rw);
- list_del_init(&req->ki_list);
+ for (i = 0; i < (1U << ctx->hash_bits); i++) {
+ list = &ctx->active_reqs[i];
+
+ hlist_for_each_entry_safe(req, node, list, ki_node) {
+ req->ki_cancel(&req->rw);
+ hash_del(&req->ki_node);
+ }
}

spin_unlock_irq(&ctx->ctx_lock);
@@ -735,6 +745,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
return ERR_PTR(-ENOMEM);

ctx->max_reqs = max_reqs;
+ ctx->hash_bits = ilog2(max_reqs);

spin_lock_init(&ctx->ctx_lock);
spin_lock_init(&ctx->completion_lock);
@@ -744,7 +755,14 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
mutex_lock(&ctx->ring_lock);
init_waitqueue_head(&ctx->wait);

- INIT_LIST_HEAD(&ctx->active_reqs);
+ ctx->active_reqs = kmalloc_array(1U << ctx->hash_bits,
+ sizeof(struct hlist_head),
+ GFP_KERNEL);
+
+ if (!ctx->active_reqs)
+ goto err;
+
+ __hash_init(ctx->active_reqs, 1U << ctx->hash_bits);

if (percpu_ref_init(&ctx->users, free_ioctx_users, 0, GFP_KERNEL))
goto err;
@@ -799,6 +817,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
aio_free_ring(ctx);
err:
mutex_unlock(&ctx->ring_lock);
+ kfree(ctx->active_reqs);
free_percpu(ctx->cpu);
percpu_ref_exit(&ctx->reqs);
percpu_ref_exit(&ctx->users);
@@ -1037,7 +1056,7 @@ static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)

percpu_ref_get(&ctx->reqs);
req->ki_ctx = ctx;
- INIT_LIST_HEAD(&req->ki_list);
+ INIT_HLIST_NODE(&req->ki_node);
refcount_set(&req->ki_refcnt, 2);
req->ki_eventfd = NULL;
return req;
@@ -1407,22 +1426,17 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
return -EINVAL;
}

-static void aio_remove_iocb(struct aio_kiocb *iocb)
+static void aio_complete_rw(struct kiocb *kiocb, long res, long res2)
{
- struct kioctx *ctx = iocb->ki_ctx;
unsigned long flags;
+ struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, rw);
+ struct kioctx *ctx = iocb->ki_ctx;

spin_lock_irqsave(&ctx->ctx_lock, flags);
- list_del(&iocb->ki_list);
+ if (hash_hashed(&iocb->ki_node))
+ hlist_del(&iocb->ki_node);
spin_unlock_irqrestore(&ctx->ctx_lock, flags);
-}

-static void aio_complete_rw(struct kiocb *kiocb, long res, long res2)
-{
- struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, rw);
-
- if (!list_empty_careful(&iocb->ki_list))
- aio_remove_iocb(iocb);

if (kiocb->ki_flags & IOCB_WRITE) {
struct inode *inode = file_inode(kiocb->ki_filp);
@@ -1644,7 +1658,7 @@ static void aio_poll_complete_work(struct work_struct *work)
spin_unlock_irq(&ctx->ctx_lock);
return;
}
- list_del_init(&iocb->ki_list);
+ hash_del(&iocb->ki_node);
iocb->ki_res.res = mangle_poll(mask);
req->done = true;
spin_unlock_irq(&ctx->ctx_lock);
@@ -1692,7 +1706,7 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
* call this function with IRQs disabled and because IRQs
* have to be disabled before ctx_lock is obtained.
*/
- list_del(&iocb->ki_list);
+ hlist_del(&iocb->ki_node);
iocb->ki_res.res = mangle_poll(mask);
req->done = true;
if (iocb->ki_eventfd && eventfd_signal_allowed()) {
@@ -1739,6 +1753,7 @@ static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
struct aio_poll_table apt;
bool cancel = false;
__poll_t mask;
+ struct hlist_head *list;

/* reject any unknown events outside the normal event mask. */
if ((u16)iocb->aio_buf != iocb->aio_buf)
@@ -1778,7 +1793,9 @@ static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
} else if (cancel) {
WRITE_ONCE(req->cancelled, true);
} else if (!req->done) { /* actually waiting for an event */
- list_add_tail(&aiocb->ki_list, &ctx->active_reqs);
+ list = &ctx->active_reqs[hash_min(aiocb->ki_res.obj,
+ ctx->hash_bits)];
+ hlist_add_head(&aiocb->ki_node, list);
aiocb->ki_cancel = aio_poll_cancel;
}
spin_unlock(&req->head->lock);
@@ -2007,6 +2024,8 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
struct aio_kiocb *kiocb;
int ret = -EINVAL;
u32 key;
+ struct hlist_head *list;
+ struct hlist_node *node;
u64 obj = (u64)(unsigned long)iocb;

if (unlikely(get_user(key, &iocb->aio_key)))
@@ -2019,14 +2038,17 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
return -EINVAL;

spin_lock_irq(&ctx->ctx_lock);
- /* TODO: use a hash or array, this sucks. */
- list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
+
+ list = &ctx->active_reqs[hash_min(kiocb->ki_res.obj, ctx->hash_bits)];
+
+ hlist_for_each_entry_safe(kiocb, node, list, ki_node) {
if (kiocb->ki_res.obj == obj) {
ret = kiocb->ki_cancel(&kiocb->rw);
- list_del_init(&kiocb->ki_list);
+ hash_del(&kiocb->ki_node);
break;
}
}
+
spin_unlock_irq(&ctx->ctx_lock);

if (!ret) {
--
2.33.0


2021-09-14 11:47:58

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH] aio: convert active_reqs into a hashtable

On Tue, Sep 14, 2021 at 05:46:25AM -0400, Hamza Mahfooz wrote:
> Commit 833f4154ed56 ("aio: fold lookup_kiocb() into its sole caller")
> suggests that, the fact that active_reqs is a linked-list means aio_kiocb
> lookups in io_cancel() are inefficient. So, to get faster lookups (on
> average) while maintaining similar insertion and deletion characteristics,
> turn active_reqs into a hashtable.

What workload cares about AIO cancellation performance?

2021-09-19 16:25:09

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH] aio: convert active_reqs into a hashtable

Hi Hamza,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on linus/master]
[also build test WARNING on linux/master v5.15-rc1 next-20210916]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url: https://github.com/0day-ci/linux/commits/Hamza-Mahfooz/aio-convert-active_reqs-into-a-hashtable/20210914-174924
base: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git d0ee23f9d78be5531c4b055ea424ed0b489dfe9b
:::::: branch date: 3 days ago
:::::: commit date: 3 days ago
config: arm-randconfig-c002-20210916 (attached as .config)
compiler: clang version 14.0.0 (https://github.com/llvm/llvm-project 8cbbd7e0b2aa21ce7e416cfb63d9965518948c35)
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# install arm cross compiling tool for clang build
# apt-get install binutils-arm-linux-gnueabi
# https://github.com/0day-ci/linux/commit/ab7dca103bc74aed4baada06420395f4bead4e6c
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Hamza-Mahfooz/aio-convert-active_reqs-into-a-hashtable/20210914-174924
git checkout ab7dca103bc74aed4baada06420395f4bead4e6c
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=arm clang-analyzer

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <[email protected]>


clang-analyzer warnings: (new ones prefixed by >>)

>> fs/aio.c:2042:36: warning: Dereference of undefined pointer value [clang-analyzer-core.NullDereference]
list = &ctx->active_reqs[hash_min(kiocb->ki_res.obj, ctx->hash_bits)];
^

vim +2042 fs/aio.c

c00d2c7e898800 Al Viro 2016-12-20 2009
^1da177e4c3f41 Linus Torvalds 2005-04-16 2010 /* sys_io_cancel:
^1da177e4c3f41 Linus Torvalds 2005-04-16 2011 * Attempts to cancel an iocb previously passed to io_submit. If
^1da177e4c3f41 Linus Torvalds 2005-04-16 2012 * the operation is successfully cancelled, the resulting event is
^1da177e4c3f41 Linus Torvalds 2005-04-16 2013 * copied into the memory pointed to by result without being placed
^1da177e4c3f41 Linus Torvalds 2005-04-16 2014 * into the completion queue and 0 is returned. May fail with
^1da177e4c3f41 Linus Torvalds 2005-04-16 2015 * -EFAULT if any of the data structures pointed to are invalid.
^1da177e4c3f41 Linus Torvalds 2005-04-16 2016 * May fail with -EINVAL if aio_context specified by ctx_id is
^1da177e4c3f41 Linus Torvalds 2005-04-16 2017 * invalid. May fail with -EAGAIN if the iocb specified was not
^1da177e4c3f41 Linus Torvalds 2005-04-16 2018 * cancelled. Will fail with -ENOSYS if not implemented.
^1da177e4c3f41 Linus Torvalds 2005-04-16 2019 */
002c8976ee5377 Heiko Carstens 2009-01-14 2020 SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
002c8976ee5377 Heiko Carstens 2009-01-14 2021 struct io_event __user *, result)
^1da177e4c3f41 Linus Torvalds 2005-04-16 2022 {
^1da177e4c3f41 Linus Torvalds 2005-04-16 2023 struct kioctx *ctx;
04b2fa9f8f36ec Christoph Hellwig 2015-02-02 2024 struct aio_kiocb *kiocb;
888933f8fdf06e Christoph Hellwig 2018-05-23 2025 int ret = -EINVAL;
^1da177e4c3f41 Linus Torvalds 2005-04-16 2026 u32 key;
ab7dca103bc74a Hamza Mahfooz 2021-09-14 2027 struct hlist_head *list;
ab7dca103bc74a Hamza Mahfooz 2021-09-14 2028 struct hlist_node *node;
a9339b7855094b Al Viro 2019-03-07 2029 u64 obj = (u64)(unsigned long)iocb;
^1da177e4c3f41 Linus Torvalds 2005-04-16 2030
f3a2752a43de18 Christoph Hellwig 2018-03-30 2031 if (unlikely(get_user(key, &iocb->aio_key)))
^1da177e4c3f41 Linus Torvalds 2005-04-16 2032 return -EFAULT;
f3a2752a43de18 Christoph Hellwig 2018-03-30 2033 if (unlikely(key != KIOCB_KEY))
f3a2752a43de18 Christoph Hellwig 2018-03-30 2034 return -EINVAL;
^1da177e4c3f41 Linus Torvalds 2005-04-16 2035
^1da177e4c3f41 Linus Torvalds 2005-04-16 2036 ctx = lookup_ioctx(ctx_id);
^1da177e4c3f41 Linus Torvalds 2005-04-16 2037 if (unlikely(!ctx))
^1da177e4c3f41 Linus Torvalds 2005-04-16 2038 return -EINVAL;
^1da177e4c3f41 Linus Torvalds 2005-04-16 2039
^1da177e4c3f41 Linus Torvalds 2005-04-16 2040 spin_lock_irq(&ctx->ctx_lock);
ab7dca103bc74a Hamza Mahfooz 2021-09-14 2041
ab7dca103bc74a Hamza Mahfooz 2021-09-14 @2042 list = &ctx->active_reqs[hash_min(kiocb->ki_res.obj, ctx->hash_bits)];
ab7dca103bc74a Hamza Mahfooz 2021-09-14 2043
ab7dca103bc74a Hamza Mahfooz 2021-09-14 2044 hlist_for_each_entry_safe(kiocb, node, list, ki_node) {
a9339b7855094b Al Viro 2019-03-07 2045 if (kiocb->ki_res.obj == obj) {
888933f8fdf06e Christoph Hellwig 2018-05-23 2046 ret = kiocb->ki_cancel(&kiocb->rw);
ab7dca103bc74a Hamza Mahfooz 2021-09-14 2047 hash_del(&kiocb->ki_node);
833f4154ed5602 Al Viro 2019-03-11 2048 break;
833f4154ed5602 Al Viro 2019-03-11 2049 }
888933f8fdf06e Christoph Hellwig 2018-05-23 2050 }
ab7dca103bc74a Hamza Mahfooz 2021-09-14 2051
^1da177e4c3f41 Linus Torvalds 2005-04-16 2052 spin_unlock_irq(&ctx->ctx_lock);
^1da177e4c3f41 Linus Torvalds 2005-04-16 2053
^1da177e4c3f41 Linus Torvalds 2005-04-16 2054 if (!ret) {
bec68faaf3ba74 Kent Overstreet 2013-05-13 2055 /*
bec68faaf3ba74 Kent Overstreet 2013-05-13 2056 * The result argument is no longer used - the io_event is
bec68faaf3ba74 Kent Overstreet 2013-05-13 2057 * always delivered via the ring buffer. -EINPROGRESS indicates
bec68faaf3ba74 Kent Overstreet 2013-05-13 2058 * cancellation is progress:
^1da177e4c3f41 Linus Torvalds 2005-04-16 2059 */
bec68faaf3ba74 Kent Overstreet 2013-05-13 2060 ret = -EINPROGRESS;
^1da177e4c3f41 Linus Torvalds 2005-04-16 2061 }
^1da177e4c3f41 Linus Torvalds 2005-04-16 2062
723be6e39d1425 Kent Overstreet 2013-05-28 2063 percpu_ref_put(&ctx->users);
^1da177e4c3f41 Linus Torvalds 2005-04-16 2064
^1da177e4c3f41 Linus Torvalds 2005-04-16 2065 return ret;
^1da177e4c3f41 Linus Torvalds 2005-04-16 2066 }
^1da177e4c3f41 Linus Torvalds 2005-04-16 2067

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/[email protected]


Attachments:
.config.gz (31.40 kB)