2024-04-08 16:12:42

by Alexander Duyck

[permalink] [raw]
Subject: Re: [PATCH net-next v1 02/12] mm: page_frag: use initial zero offset for page_frag_alloc_align()

On Mon, Apr 8, 2024 at 6:39 AM Yunsheng Lin <[email protected]> wrote:
>
> On 2024/4/8 1:52, Alexander H Duyck wrote:
> > On Sun, 2024-04-07 at 21:08 +0800, Yunsheng Lin wrote:
> >> We are above to use page_frag_alloc_*() API to not just
> >> allocate memory for skb->data, but also use them to do
> >> the memory allocation for skb frag too. Currently the
> >> implementation of page_frag in mm subsystem is running
> >> the offset as a countdown rather than count-up value,
> >> there may have several advantages to that as mentioned
> >> in [1], but it may have some disadvantages, for example,
> >> it may disable skb frag coaleasing and more correct cache
> >> prefetching
> >>
> >> We have a trade-off to make in order to have a unified
> >> implementation and API for page_frag, so use a initial zero
> >> offset in this patch, and the following patch will try to
> >> make some optimization to aovid the disadvantages as much
> >> as possible.
> >>
> >> 1. https://lore.kernel.org/all/[email protected]/
> >>
> >> CC: Alexander Duyck <[email protected]>
> >> Signed-off-by: Yunsheng Lin <[email protected]>
> >> ---
> >> mm/page_frag_cache.c | 31 ++++++++++++++-----------------
> >> 1 file changed, 14 insertions(+), 17 deletions(-)
> >>
> >> diff --git a/mm/page_frag_cache.c b/mm/page_frag_cache.c
> >> index a0f90ba25200..3e3e88d9af90 100644
> >> --- a/mm/page_frag_cache.c
> >> +++ b/mm/page_frag_cache.c
> >> @@ -67,9 +67,8 @@ void *__page_frag_alloc_align(struct page_frag_cache *nc,
> >> unsigned int fragsz, gfp_t gfp_mask,
> >> unsigned int align_mask)
> >> {
> >> - unsigned int size = PAGE_SIZE;
> >> + unsigned int size, offset;
> >> struct page *page;
> >> - int offset;
> >>
> >> if (unlikely(!nc->va)) {
> >> refill:
> >> @@ -77,10 +76,6 @@ void *__page_frag_alloc_align(struct page_frag_cache *nc,
> >> if (!page)
> >> return NULL;
> >>
> >> -#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
> >> - /* if size can vary use size else just use PAGE_SIZE */
> >> - size = nc->size;
> >> -#endif
> >> /* Even if we own the page, we do not use atomic_set().
> >> * This would break get_page_unless_zero() users.
> >> */
> >> @@ -89,11 +84,18 @@ void *__page_frag_alloc_align(struct page_frag_cache *nc,
> >> /* reset page count bias and offset to start of new frag */
> >> nc->pfmemalloc = page_is_pfmemalloc(page);
> >> nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
> >> - nc->offset = size;
> >> + nc->offset = 0;
> >> }
> >>
> >> - offset = nc->offset - fragsz;
> >> - if (unlikely(offset < 0)) {
> >> +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
> >> + /* if size can vary use size else just use PAGE_SIZE */
> >> + size = nc->size;
> >> +#else
> >> + size = PAGE_SIZE;
> >> +#endif
> >> +
> >> + offset = ALIGN(nc->offset, -align_mask);
> >> + if (unlikely(offset + fragsz > size)) {
> >
> > Rather than using "ALIGN" with a negative value it would probably make
> > more sense to use __ALIGN_KERNEL_MASK with ~align_mask. I am not sure
> > how well the compiler sorts out the use of negatives to flip values
> > that are then converted to masks with the "(a) - 1".
>
> The next patch will remove the '-' in '-align_mask' as the 'ALIGN' operation
> is done in the inline helper. I am not sure that matter much to use
> __ALIGN_KERNEL_MASK with ~align_mask?

It is a matter of making the negations more obvious. Basically you
could achieve the same alignment by doing:
(offset + (~align_mask)) & ~(~align_mask)
rather than:
(offset + ((-align_mask) - 1)) & ~((-align_mask) - 1)

I'm not sure the compiler will pick up on the fact that the two are
identical and can save a number of operations. Also my suggested
approach is closer to how it used to work. Technically the one you are
using only works if align_mask is a negative power of 2.


2024-04-09 08:00:16

by Yunsheng Lin

[permalink] [raw]
Subject: Re: [PATCH net-next v1 02/12] mm: page_frag: use initial zero offset for page_frag_alloc_align()

On 2024/4/9 0:11, Alexander Duyck wrote:
> On Mon, Apr 8, 2024 at 6:39 AM Yunsheng Lin <[email protected]> wrote:
>>
>> On 2024/4/8 1:52, Alexander H Duyck wrote:
>>> On Sun, 2024-04-07 at 21:08 +0800, Yunsheng Lin wrote:
>>>> We are above to use page_frag_alloc_*() API to not just
>>>> allocate memory for skb->data, but also use them to do
>>>> the memory allocation for skb frag too. Currently the
>>>> implementation of page_frag in mm subsystem is running
>>>> the offset as a countdown rather than count-up value,
>>>> there may have several advantages to that as mentioned
>>>> in [1], but it may have some disadvantages, for example,
>>>> it may disable skb frag coaleasing and more correct cache
>>>> prefetching
>>>>
>>>> We have a trade-off to make in order to have a unified
>>>> implementation and API for page_frag, so use a initial zero
>>>> offset in this patch, and the following patch will try to
>>>> make some optimization to aovid the disadvantages as much
>>>> as possible.
>>>>
>>>> 1. https://lore.kernel.org/all/[email protected]/
>>>>
>>>> CC: Alexander Duyck <[email protected]>
>>>> Signed-off-by: Yunsheng Lin <[email protected]>
>>>> ---
>>>> mm/page_frag_cache.c | 31 ++++++++++++++-----------------
>>>> 1 file changed, 14 insertions(+), 17 deletions(-)
>>>>
>>>> diff --git a/mm/page_frag_cache.c b/mm/page_frag_cache.c
>>>> index a0f90ba25200..3e3e88d9af90 100644
>>>> --- a/mm/page_frag_cache.c
>>>> +++ b/mm/page_frag_cache.c
>>>> @@ -67,9 +67,8 @@ void *__page_frag_alloc_align(struct page_frag_cache *nc,
>>>> unsigned int fragsz, gfp_t gfp_mask,
>>>> unsigned int align_mask)
>>>> {
>>>> - unsigned int size = PAGE_SIZE;
>>>> + unsigned int size, offset;
>>>> struct page *page;
>>>> - int offset;
>>>>
>>>> if (unlikely(!nc->va)) {
>>>> refill:
>>>> @@ -77,10 +76,6 @@ void *__page_frag_alloc_align(struct page_frag_cache *nc,
>>>> if (!page)
>>>> return NULL;
>>>>
>>>> -#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
>>>> - /* if size can vary use size else just use PAGE_SIZE */
>>>> - size = nc->size;
>>>> -#endif
>>>> /* Even if we own the page, we do not use atomic_set().
>>>> * This would break get_page_unless_zero() users.
>>>> */
>>>> @@ -89,11 +84,18 @@ void *__page_frag_alloc_align(struct page_frag_cache *nc,
>>>> /* reset page count bias and offset to start of new frag */
>>>> nc->pfmemalloc = page_is_pfmemalloc(page);
>>>> nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
>>>> - nc->offset = size;
>>>> + nc->offset = 0;
>>>> }
>>>>
>>>> - offset = nc->offset - fragsz;
>>>> - if (unlikely(offset < 0)) {
>>>> +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
>>>> + /* if size can vary use size else just use PAGE_SIZE */
>>>> + size = nc->size;
>>>> +#else
>>>> + size = PAGE_SIZE;
>>>> +#endif
>>>> +
>>>> + offset = ALIGN(nc->offset, -align_mask);
>>>> + if (unlikely(offset + fragsz > size)) {
>>>
>>> Rather than using "ALIGN" with a negative value it would probably make
>>> more sense to use __ALIGN_KERNEL_MASK with ~align_mask. I am not sure
>>> how well the compiler sorts out the use of negatives to flip values
>>> that are then converted to masks with the "(a) - 1".
>>
>> The next patch will remove the '-' in '-align_mask' as the 'ALIGN' operation
>> is done in the inline helper. I am not sure that matter much to use
>> __ALIGN_KERNEL_MASK with ~align_mask?
>
> It is a matter of making the negations more obvious. Basically you
> could achieve the same alignment by doing:
> (offset + (~align_mask)) & ~(~align_mask)
> rather than:
> (offset + ((-align_mask) - 1)) & ~((-align_mask) - 1)
>
> I'm not sure the compiler will pick up on the fact that the two are
> identical and can save a number of operations. Also my suggested
> approach is closer to how it used to work. Technically the one you are
> using only works if align_mask is a negative power of 2.

In patch 3, we have below, so the above trick is not really needed after
patch 3:


@@ -94,7 +93,7 @@ void *__page_frag_alloc_align(struct page_frag_cache *nc,
size = PAGE_SIZE;
#endif

- offset = ALIGN(nc->offset, -align_mask);
+ offset = nc->offset;
if (unlikely(offset + fragsz > size)) {
page = virt_to_page(nc->va);

@@ -131,7 +130,7 @@ void *__page_frag_alloc_align(struct page_frag_cache *nc,

return nc->va + offset;
}
-EXPORT_SYMBOL(__page_frag_alloc_align);
+EXPORT_SYMBOL(page_frag_alloc);

..

+static inline void *__page_frag_alloc_align(struct page_frag_cache *nc,
+ unsigned int fragsz, gfp_t gfp_mask,
+ unsigned int align)
+{
+ nc->offset = ALIGN(nc->offset, align);
+
+ return page_frag_alloc(nc, fragsz, gfp_mask);
+}

static inline void *page_frag_alloc_align(struct page_frag_cache *nc,
unsigned int fragsz, gfp_t gfp_mask,
unsigned int align)
{
WARN_ON_ONCE(!is_power_of_2(align));
- return __page_frag_alloc_align(nc, fragsz, gfp_mask, -align);
-}



> .
>