2023-05-24 15:45:28

by David Howells

[permalink] [raw]
Subject: [PATCH net-next 03/12] mm: Make the page_frag_cache allocator alignment param a pow-of-2

Make the page_frag_cache allocator's alignment parameter a power of 2
rather than a mask and give a warning if it isn't.

This means that it's consistent with {napi,netdec}_alloc_frag_align() and
allows __{napi,netdev}_alloc_frag_align() to be removed.

Signed-off-by: David Howells <[email protected]>
cc: "David S. Miller" <[email protected]>
cc: Eric Dumazet <[email protected]>
cc: Jakub Kicinski <[email protected]>
cc: Paolo Abeni <[email protected]>
cc: Jens Axboe <[email protected]>
cc: Jeroen de Borst <[email protected]>
cc: Catherine Sullivan <[email protected]>
cc: Shailend Chand <[email protected]>
cc: Felix Fietkau <[email protected]>
cc: John Crispin <[email protected]>
cc: Sean Wang <[email protected]>
cc: Mark Lee <[email protected]>
cc: Lorenzo Bianconi <[email protected]>
cc: Matthias Brugger <[email protected]>
cc: AngeloGioacchino Del Regno <[email protected]>
cc: Keith Busch <[email protected]>
cc: Jens Axboe <[email protected]>
cc: Christoph Hellwig <[email protected]>
cc: Sagi Grimberg <[email protected]>
cc: Chaitanya Kulkarni <[email protected]>
cc: Andrew Morton <[email protected]>
cc: Matthew Wilcox <[email protected]>
cc: [email protected]
cc: [email protected]
cc: [email protected]
cc: [email protected]
cc: [email protected]
---
include/linux/gfp.h | 4 ++--
include/linux/skbuff.h | 22 ++++------------------
mm/page_frag_alloc.c | 8 +++++---
net/core/skbuff.c | 14 +++++++-------
4 files changed, 18 insertions(+), 30 deletions(-)

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 03504beb51e4..fa30100f46ad 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -306,12 +306,12 @@ struct page_frag_cache;
extern void __page_frag_cache_drain(struct page *page, unsigned int count);
extern void *page_frag_alloc_align(struct page_frag_cache *nc,
unsigned int fragsz, gfp_t gfp_mask,
- unsigned int align_mask);
+ unsigned int align);

static inline void *page_frag_alloc(struct page_frag_cache *nc,
unsigned int fragsz, gfp_t gfp_mask)
{
- return page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u);
+ return page_frag_alloc_align(nc, fragsz, gfp_mask, 1);
}

void page_frag_cache_clear(struct page_frag_cache *nc);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 1b2ebf6113e0..41b63e72c6c3 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3158,7 +3158,7 @@ void skb_queue_purge(struct sk_buff_head *list);

unsigned int skb_rbtree_purge(struct rb_root *root);

-void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask);
+void *netdev_alloc_frag_align(unsigned int fragsz, unsigned int align);

/**
* netdev_alloc_frag - allocate a page fragment
@@ -3169,14 +3169,7 @@ void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask);
*/
static inline void *netdev_alloc_frag(unsigned int fragsz)
{
- return __netdev_alloc_frag_align(fragsz, ~0u);
-}
-
-static inline void *netdev_alloc_frag_align(unsigned int fragsz,
- unsigned int align)
-{
- WARN_ON_ONCE(!is_power_of_2(align));
- return __netdev_alloc_frag_align(fragsz, -align);
+ return netdev_alloc_frag_align(fragsz, 1);
}

struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int length,
@@ -3236,18 +3229,11 @@ static inline void skb_free_frag(void *addr)
page_frag_free(addr);
}

-void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask);
+void *napi_alloc_frag_align(unsigned int fragsz, unsigned int align);

static inline void *napi_alloc_frag(unsigned int fragsz)
{
- return __napi_alloc_frag_align(fragsz, ~0u);
-}
-
-static inline void *napi_alloc_frag_align(unsigned int fragsz,
- unsigned int align)
-{
- WARN_ON_ONCE(!is_power_of_2(align));
- return __napi_alloc_frag_align(fragsz, -align);
+ return napi_alloc_frag_align(fragsz, 1);
}

struct sk_buff *__napi_alloc_skb(struct napi_struct *napi,
diff --git a/mm/page_frag_alloc.c b/mm/page_frag_alloc.c
index e02b81d68dc4..9d3f6fbd9a07 100644
--- a/mm/page_frag_alloc.c
+++ b/mm/page_frag_alloc.c
@@ -64,13 +64,15 @@ void page_frag_cache_clear(struct page_frag_cache *nc)
EXPORT_SYMBOL(page_frag_cache_clear);

void *page_frag_alloc_align(struct page_frag_cache *nc,
- unsigned int fragsz, gfp_t gfp_mask,
- unsigned int align_mask)
+ unsigned int fragsz, gfp_t gfp_mask,
+ unsigned int align)
{
unsigned int size = PAGE_SIZE;
struct page *page;
int offset;

+ WARN_ON_ONCE(!is_power_of_2(align));
+
if (unlikely(!nc->va)) {
refill:
page = __page_frag_cache_refill(nc, gfp_mask);
@@ -129,7 +131,7 @@ void *page_frag_alloc_align(struct page_frag_cache *nc,
}

nc->pagecnt_bias--;
- offset &= align_mask;
+ offset &= ~(align - 1);
nc->offset = offset;

return nc->va + offset;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f4a5b51aed22..cc507433b357 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -289,17 +289,17 @@ void napi_get_frags_check(struct napi_struct *napi)
local_bh_enable();
}

-void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
+void *napi_alloc_frag_align(unsigned int fragsz, unsigned int align)
{
struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);

fragsz = SKB_DATA_ALIGN(fragsz);

- return page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, align_mask);
+ return page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, align);
}
-EXPORT_SYMBOL(__napi_alloc_frag_align);
+EXPORT_SYMBOL(napi_alloc_frag_align);

-void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
+void *netdev_alloc_frag_align(unsigned int fragsz, unsigned int align)
{
void *data;

@@ -307,18 +307,18 @@ void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
if (in_hardirq() || irqs_disabled()) {
struct page_frag_cache *nc = this_cpu_ptr(&netdev_alloc_cache);

- data = page_frag_alloc_align(nc, fragsz, GFP_ATOMIC, align_mask);
+ data = page_frag_alloc_align(nc, fragsz, GFP_ATOMIC, align);
} else {
struct napi_alloc_cache *nc;

local_bh_disable();
nc = this_cpu_ptr(&napi_alloc_cache);
- data = page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, align_mask);
+ data = page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, align);
local_bh_enable();
}
return data;
}
-EXPORT_SYMBOL(__netdev_alloc_frag_align);
+EXPORT_SYMBOL(netdev_alloc_frag_align);

static struct sk_buff *napi_skb_cache_get(void)
{



2023-05-27 16:13:43

by Alexander Duyck

[permalink] [raw]
Subject: Re: [PATCH net-next 03/12] mm: Make the page_frag_cache allocator alignment param a pow-of-2

On Wed, 2023-05-24 at 16:33 +0100, David Howells wrote:
> Make the page_frag_cache allocator's alignment parameter a power of 2
> rather than a mask and give a warning if it isn't.
>
> This means that it's consistent with {napi,netdec}_alloc_frag_align() and
> allows __{napi,netdev}_alloc_frag_align() to be removed.
>

This goes against the original intention of these functions. One of the
reasons why this is being used is because when somebody enables
something like 2K jumbo frames they don't necessarily want to have to
allocate 4K SLABs. Instead they can just add a bit of overhead and get
almost twice the utilization out of an order 3 page.

The requirement should only be cache alignment, not power of 2
alignment. This isn't meant to be a slab allocator. We are just
sectioning up pages to handle mixed workloads. In the case of
networking we can end up getting everything from 60B packets, to 1514B
in the standard cases. That was why we started sectioning up pages in
the first place so putting a power of 2 requirement on it doens't fit
our use case at all and is what we were trying to get away from with
the SLAB allocators.

2023-06-16 16:01:55

by David Howells

[permalink] [raw]
Subject: Re: [PATCH net-next 03/12] mm: Make the page_frag_cache allocator alignment param a pow-of-2

Alexander H Duyck <[email protected]> wrote:

> The requirement should only be cache alignment, not power of 2
> alignment.

Sure, but, upstream, page_frag_alloc_align() allows the specification of an
alignment and applies that alignment by:

offset &= align_mask;

which doesn't really make sense unless the alignment boils down to being a
power of two. Now, it might make sense to kill off the align_mask parameter
and just go with SMP_CACHE_BYTES (which will be a power of two).

Note, though, that most users seem to use an align_mask of ~0u which feels a
bit dodgy (it's not an unsigned long), but is probably fine.

David


2023-06-16 16:14:51

by Alexander Duyck

[permalink] [raw]
Subject: Re: [PATCH net-next 03/12] mm: Make the page_frag_cache allocator alignment param a pow-of-2

On Fri, Jun 16, 2023 at 8:28 AM David Howells <[email protected]> wrote:
>
> Alexander H Duyck <[email protected]> wrote:
>
> > The requirement should only be cache alignment, not power of 2
> > alignment.
>
> Sure, but, upstream, page_frag_alloc_align() allows the specification of an
> alignment and applies that alignment by:
>
> offset &= align_mask;
>
> which doesn't really make sense unless the alignment boils down to being a
> power of two. Now, it might make sense to kill off the align_mask parameter
> and just go with SMP_CACHE_BYTES (which will be a power of two).
>
> Note, though, that most users seem to use an align_mask of ~0u which feels a
> bit dodgy (it's not an unsigned long), but is probably fine.

Yeah, now that I think about it this should be fine. I must have been
responding to this without enough sleep. When I originally read it I
saw it as limiting it to a power of 2 allocation, not limiting the
alignment mask to a power of 2.