Our current memeory allocation routines suffer form an API imbalance,
for one we have kmalloc_array() and kcalloc() which check for
overflows in size multiplication and we have kmalloc_node() and
kzalloc_node() which allow for memory allocation on a certain NUMA
node but don't check for eventual overflows.
Johannes Thumshirn (6):
mm: add kmalloc_array_node and kcalloc_node
block: use kmalloc_array_node
IB/qib: use kmalloc_array_node
IB/rdmavt: use kmalloc_array_node
mm, mempool: use kmalloc_array_node
rds: ib: use kmalloc_array_node
block/blk-mq.c | 2 +-
drivers/infiniband/hw/qib/qib_init.c | 5 +++--
drivers/infiniband/sw/rdmavt/qp.c | 2 +-
include/linux/slab.h | 16 ++++++++++++++++
mm/mempool.c | 2 +-
net/rds/ib_fmr.c | 4 ++--
6 files changed, 24 insertions(+), 7 deletions(-)
--
2.13.5
We have kmalloc_array() and kcalloc() wrappers on top of kmalloc() which
ensure us overflow free multiplication for the size of a memory
allocation but these implementations are not NUMA-aware.
Likewise we have kmalloc_node() which is a NUMA-aware version of
kmalloc() but the implementation is not aware of any possible overflows in
eventual size calculations.
Introduce a combination of the two above cases to have a NUMA-node aware
version of kmalloc_array() and kcalloc().
Signed-off-by: Johannes Thumshirn <[email protected]>
---
include/linux/slab.h | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 41473df6dfb0..aaf4723e41b3 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -635,6 +635,22 @@ extern void *__kmalloc_track_caller(size_t, gfp_t, unsigned long);
#define kmalloc_track_caller(size, flags) \
__kmalloc_track_caller(size, flags, _RET_IP_)
+static inline void *kmalloc_array_node(size_t n, size_t size, gfp_t flags,
+ int node)
+{
+ if (size != 0 && n > SIZE_MAX / size)
+ return NULL;
+ if (__builtin_constant_p(n) && __builtin_constant_p(size))
+ return kmalloc_node(n * size, flags, node);
+ return __kmalloc_node(n * size, flags, node);
+}
+
+static inline void *kcalloc_node(size_t n, size_t size, gfp_t flags, int node)
+{
+ return kmalloc_array_node(n, size, flags | __GFP_ZERO, node);
+}
+
+
#ifdef CONFIG_NUMA
extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, unsigned long);
#define kmalloc_node_track_caller(size, flags, node) \
--
2.13.5
Now that we have a NUMA-aware version of kmalloc_array() we can use it
instead of kmalloc_node() without an overflow check in the size
calculation.
Signed-off-by: Johannes Thumshirn <[email protected]>
---
drivers/infiniband/hw/qib/qib_init.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index c5a4c65636d6..6aebf4e707b9 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -1674,8 +1674,9 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd)
}
if (!rcd->rcvegrbuf_phys) {
rcd->rcvegrbuf_phys =
- kmalloc_node(chunk * sizeof(rcd->rcvegrbuf_phys[0]),
- GFP_KERNEL, rcd->node_id);
+ kmalloc_array_node(chunk,
+ sizeof(rcd->rcvegrbuf_phys[0]),
+ GFP_KERNEL, rcd->node_id);
if (!rcd->rcvegrbuf_phys)
goto bail_rcvegrbuf;
}
--
2.13.5
Now that we have a NUMA-aware version of kmalloc_array() we can use it
instead of kmalloc_node() without an overflow check in the size
calculation.
Signed-off-by: Johannes Thumshirn <[email protected]>
---
mm/mempool.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mm/mempool.c b/mm/mempool.c
index 1c0294858527..26f1b70c4a4e 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -188,7 +188,7 @@ mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn,
pool = kzalloc_node(sizeof(*pool), gfp_mask, node_id);
if (!pool)
return NULL;
- pool->elements = kmalloc_node(min_nr * sizeof(void *),
+ pool->elements = kmalloc_array_node(min_nr, sizeof(void *),
gfp_mask, node_id);
if (!pool->elements) {
kfree(pool);
--
2.13.5
Now that we have a NUMA-aware version of kmalloc_array() we can use it
instead of kmalloc_node() without an overflow check in the size
calculation.
Signed-off-by: Johannes Thumshirn <[email protected]>
---
net/rds/ib_fmr.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/net/rds/ib_fmr.c b/net/rds/ib_fmr.c
index 86ef907067bb..e0f70c4051b6 100644
--- a/net/rds/ib_fmr.c
+++ b/net/rds/ib_fmr.c
@@ -139,8 +139,8 @@ static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev,
return -EINVAL;
}
- dma_pages = kmalloc_node(sizeof(u64) * page_cnt, GFP_ATOMIC,
- rdsibdev_to_node(rds_ibdev));
+ dma_pages = kmalloc_array_node(sizeof(u64), page_cnt, GFP_ATOMIC,
+ rdsibdev_to_node(rds_ibdev));
if (!dma_pages) {
ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL);
return -ENOMEM;
--
2.13.5
Now that we have a NUMA-aware version of kmalloc_array() we can use it
instead of kmalloc_node() without an overflow check in the size
calculation.
Signed-off-by: Johannes Thumshirn <[email protected]>
---
drivers/infiniband/sw/rdmavt/qp.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 22df09ae809e..b8e904905f47 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -238,7 +238,7 @@ int rvt_driver_qp_init(struct rvt_dev_info *rdi)
rdi->qp_dev->qp_table_size = rdi->dparms.qp_table_size;
rdi->qp_dev->qp_table_bits = ilog2(rdi->dparms.qp_table_size);
rdi->qp_dev->qp_table =
- kmalloc_node(rdi->qp_dev->qp_table_size *
+ kmalloc_array_node(rdi->qp_dev->qp_table_size,
sizeof(*rdi->qp_dev->qp_table),
GFP_KERNEL, rdi->dparms.node);
if (!rdi->qp_dev->qp_table)
--
2.13.5
Now that we have a NUMA-aware version of kmalloc_array() we can use it
instead of kmalloc_node() without an overflow check in the size
calculation.
Signed-off-by: Johannes Thumshirn <[email protected]>
---
block/blk-mq.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 98a18609755e..49f9dc0eb47c 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1979,7 +1979,7 @@ static int blk_mq_init_hctx(struct request_queue *q,
* Allocate space for all possible cpus to avoid allocation at
* runtime
*/
- hctx->ctxs = kmalloc_node(nr_cpu_ids * sizeof(void *),
+ hctx->ctxs = kmalloc_array_node(nr_cpu_ids, sizeof(void *),
GFP_KERNEL, node);
if (!hctx->ctxs)
goto unregister_cpu_notifier;
--
2.13.5
On Wed 27-09-17 10:20:33, Johannes Thumshirn wrote:
> We have kmalloc_array() and kcalloc() wrappers on top of kmalloc() which
> ensure us overflow free multiplication for the size of a memory
> allocation but these implementations are not NUMA-aware.
>
> Likewise we have kmalloc_node() which is a NUMA-aware version of
> kmalloc() but the implementation is not aware of any possible overflows in
> eventual size calculations.
>
> Introduce a combination of the two above cases to have a NUMA-node aware
> version of kmalloc_array() and kcalloc().
Yes, this is helpful. I am just wondering why we cannot have
kmalloc_array to call kmalloc_array_node with the local node as a
parameter. Maybe some sort of an optimization?
> Signed-off-by: Johannes Thumshirn <[email protected]>
Anyway
Acked-by: Michal Hocko <[email protected]>
> ---
> include/linux/slab.h | 16 ++++++++++++++++
> 1 file changed, 16 insertions(+)
>
> diff --git a/include/linux/slab.h b/include/linux/slab.h
> index 41473df6dfb0..aaf4723e41b3 100644
> --- a/include/linux/slab.h
> +++ b/include/linux/slab.h
> @@ -635,6 +635,22 @@ extern void *__kmalloc_track_caller(size_t, gfp_t, unsigned long);
> #define kmalloc_track_caller(size, flags) \
> __kmalloc_track_caller(size, flags, _RET_IP_)
>
> +static inline void *kmalloc_array_node(size_t n, size_t size, gfp_t flags,
> + int node)
> +{
> + if (size != 0 && n > SIZE_MAX / size)
> + return NULL;
> + if (__builtin_constant_p(n) && __builtin_constant_p(size))
> + return kmalloc_node(n * size, flags, node);
> + return __kmalloc_node(n * size, flags, node);
> +}
> +
> +static inline void *kcalloc_node(size_t n, size_t size, gfp_t flags, int node)
> +{
> + return kmalloc_array_node(n, size, flags | __GFP_ZERO, node);
> +}
> +
> +
> #ifdef CONFIG_NUMA
> extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, unsigned long);
> #define kmalloc_node_track_caller(size, flags, node) \
> --
> 2.13.5
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to [email protected]. For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"[email protected]"> [email protected] </a>
--
Michal Hocko
SUSE Labs
On Wed, 27 Sep 2017, Johannes Thumshirn wrote:
> +static inline void *kmalloc_array_node(size_t n, size_t size, gfp_t flags,
> + int node)
> +{
> + if (size != 0 && n > SIZE_MAX / size)
> + return NULL;
> + if (__builtin_constant_p(n) && __builtin_constant_p(size))
> + return kmalloc_node(n * size, flags, node);
Isnt the same check done by kmalloc_node already? The result of
multiplying two constants is a constant after all.
Reviewed-by: Christoph Lameter <[email protected]>
Reviewed-by: Christoph Lameter <[email protected]>
On Wed, 27 Sep 2017, Michal Hocko wrote:
> > Introduce a combination of the two above cases to have a NUMA-node aware
> > version of kmalloc_array() and kcalloc().
>
> Yes, this is helpful. I am just wondering why we cannot have
> kmalloc_array to call kmalloc_array_node with the local node as a
> parameter. Maybe some sort of an optimization?
Well the regular kmalloc without node is supposed to follow memory
policies. An explicit mentioning of a node requires allocation from that
node and will override memory allocation policies.
Note that node local policy is the default for allocations but that can be
overridden by the application or at the command line level. Assumptions
that this is always the case come up frequently but if we do that we will
loose the ability to control memory locality for user space.
Reviewed-by: Christoph Lameter <[email protected]>
Reviewed-by: Christoph Lameter <[email protected]>
Reviewed-by: Christoph Lameter <[email protected]>
On Wed 27-09-17 04:03:01, Cristopher Lameter wrote:
> On Wed, 27 Sep 2017, Michal Hocko wrote:
>
> > > Introduce a combination of the two above cases to have a NUMA-node aware
> > > version of kmalloc_array() and kcalloc().
> >
> > Yes, this is helpful. I am just wondering why we cannot have
> > kmalloc_array to call kmalloc_array_node with the local node as a
> > parameter. Maybe some sort of an optimization?
>
> Well the regular kmalloc without node is supposed to follow memory
> policies. An explicit mentioning of a node requires allocation from that
> node and will override memory allocation policies.
I see. Thanks for the clarification
--
Michal Hocko
SUSE Labs
On 09/27/2017 10:20 AM, Johannes Thumshirn wrote:
> We have kmalloc_array() and kcalloc() wrappers on top of kmalloc() which
> ensure us overflow free multiplication for the size of a memory
> allocation but these implementations are not NUMA-aware.
>
> Likewise we have kmalloc_node() which is a NUMA-aware version of
> kmalloc() but the implementation is not aware of any possible overflows in
> eventual size calculations.
>
> Introduce a combination of the two above cases to have a NUMA-node aware
> version of kmalloc_array() and kcalloc().
>
> Signed-off-by: Johannes Thumshirn <[email protected]>
Sounds better than custom open-coded stuff indeed.
Acked-by: Vlastimil Babka <[email protected]>
> ---
> include/linux/slab.h | 16 ++++++++++++++++
> 1 file changed, 16 insertions(+)
>
> diff --git a/include/linux/slab.h b/include/linux/slab.h
> index 41473df6dfb0..aaf4723e41b3 100644
> --- a/include/linux/slab.h
> +++ b/include/linux/slab.h
> @@ -635,6 +635,22 @@ extern void *__kmalloc_track_caller(size_t, gfp_t, unsigned long);
> #define kmalloc_track_caller(size, flags) \
> __kmalloc_track_caller(size, flags, _RET_IP_)
>
> +static inline void *kmalloc_array_node(size_t n, size_t size, gfp_t flags,
> + int node)
> +{
> + if (size != 0 && n > SIZE_MAX / size)
> + return NULL;
> + if (__builtin_constant_p(n) && __builtin_constant_p(size))
> + return kmalloc_node(n * size, flags, node);
> + return __kmalloc_node(n * size, flags, node);
> +}
> +
> +static inline void *kcalloc_node(size_t n, size_t size, gfp_t flags, int node)
> +{
> + return kmalloc_array_node(n, size, flags | __GFP_ZERO, node);
> +}
> +
> +
> #ifdef CONFIG_NUMA
> extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, unsigned long);
> #define kmalloc_node_track_caller(size, flags, node) \
>