This series is largely motivated by a recent discussion where there was
some confusion on how to properly ref/unref pp pages vs non pp pages:
https://lore.kernel.org/netdev/CAHS8izOoO-EovwMwAm9tLYetwikNPxC0FKyVGu1TPJWSz4bGoA@mail.gmail.com/T/#t
There is some subtely there because pp uses page->pp_ref_count for
refcounting, while non-pp uses get_page()/put_page() for ref counting.
Getting the refcounting pairs wrong can lead to kernel crash.
Additionally currently it may not be obvious to skb users unaware of
page pool internals how to properly acquire a ref on a pp frag. It
requires checking of skb->pp_recycle & is_pp_page() to make the correct
calls and may require some handling at the call site aware of arguable pp
internals.
This series is a minor refactor with a couple of goals:
1. skb users should be able to ref/unref a frag using
[__]skb_frag_[un]ref() functions without needing to understand pp
concepts and pp_ref_count vs get/put_page() differences.
2. reference counting functions should have a mirror opposite. I.e. there
should be a foo_unref() to every foo_ref() with a mirror opposite
implementation (as much as possible).
This is RFC to collect feedback if this change is desirable, but also so
that I don't race with the fix for the issue Dragos is seeing for his
crash.
https://lore.kernel.org/lkml/CAHS8izN436pn3SndrzsCyhmqvJHLyxgCeDpWXA4r1ANt3RCDLQ@mail.gmail.com/T/
Cc: Dragos Tatulea <[email protected]>
Mina Almasry (2):
net: mirror skb frag ref/unref helpers
net: remove napi_frag_[un]ref
drivers/net/ethernet/marvell/sky2.c | 2 +-
drivers/net/ethernet/mellanox/mlx4/en_rx.c | 2 +-
include/linux/skbuff.h | 45 +++++++++-------
net/core/skbuff.c | 60 ++++++++--------------
net/tls/tls_device.c | 2 +-
net/tls/tls_strp.c | 2 +-
6 files changed, 51 insertions(+), 62 deletions(-)
--
2.44.0.278.ge034bb2e1d-goog
Refactor some of the skb frag ref/unref helpers for improved clarity.
Implement napi_pp_get_page() to be the mirror counterpart of
napi_pp_put_page().
Implement napi_frag_ref() to be the mirror counterpart of
napi_frag_unref().
Improve __skb_frag_ref() to become a mirror counterpart of
__skb_frag_unref(). Previously unref could handle pp & non-pp pages,
while the ref could only handle non-pp pages. Now both the ref & unref
helpers can correctly handle both pp & non-pp pages.
Now that __skb_frag_ref() can handle both pp & non-pp pages, remove
skb_pp_frag_ref(), and use __skb_frag_ref() instead. This lets us
remove pp specific handling from skb_try_coalesce.
Signed-off-by: Mina Almasry <[email protected]>
---
include/linux/skbuff.h | 24 +++++++++++++++---
net/core/skbuff.c | 56 ++++++++++++++----------------------------
2 files changed, 39 insertions(+), 41 deletions(-)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index d577e0bee18d..51316b0e20bc 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3477,15 +3477,31 @@ static inline struct page *skb_frag_page(const skb_frag_t *frag)
return netmem_to_page(frag->netmem);
}
+bool napi_pp_get_page(struct page *page);
+
+static inline void napi_frag_ref(skb_frag_t *frag, bool recycle)
+{
+#ifdef CONFIG_PAGE_POOL
+ struct page *page = skb_frag_page(frag);
+
+ if (recycle && napi_pp_get_page(page))
+ return;
+#endif
+ get_page(page);
+}
+
/**
* __skb_frag_ref - take an addition reference on a paged fragment.
* @frag: the paged fragment
+ * @recycle: skb->pp_recycle param of the parent skb.
*
- * Takes an additional reference on the paged fragment @frag.
+ * Takes an additional reference on the paged fragment @frag. Obtains the
+ * correct reference count depending on whether skb->pp_recycle is set and
+ * whether the frag is a page pool frag.
*/
-static inline void __skb_frag_ref(skb_frag_t *frag)
+static inline void __skb_frag_ref(skb_frag_t *frag, bool recycle)
{
- get_page(skb_frag_page(frag));
+ napi_frag_ref(frag, recycle);
}
/**
@@ -3497,7 +3513,7 @@ static inline void __skb_frag_ref(skb_frag_t *frag)
*/
static inline void skb_frag_ref(struct sk_buff *skb, int f)
{
- __skb_frag_ref(&skb_shinfo(skb)->frags[f]);
+ __skb_frag_ref(&skb_shinfo(skb)->frags[f], skb->pp_recycle);
}
int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb,
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 1f918e602bc4..6d234faa9d9e 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1006,6 +1006,21 @@ int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb,
EXPORT_SYMBOL(skb_cow_data_for_xdp);
#if IS_ENABLED(CONFIG_PAGE_POOL)
+bool napi_pp_get_page(struct page *page)
+{
+
+ struct page *head_page;
+
+ head_page = compound_head(page);
+
+ if (!is_pp_page(page))
+ return false;
+
+ page_pool_ref_page(head_page);
+ return true;
+}
+EXPORT_SYMBOL(napi_pp_get_page);
+
bool napi_pp_put_page(struct page *page, bool napi_safe)
{
bool allow_direct = false;
@@ -1058,37 +1073,6 @@ static bool skb_pp_recycle(struct sk_buff *skb, void *data, bool napi_safe)
return napi_pp_put_page(virt_to_page(data), napi_safe);
}
-/**
- * skb_pp_frag_ref() - Increase fragment references of a page pool aware skb
- * @skb: page pool aware skb
- *
- * Increase the fragment reference count (pp_ref_count) of a skb. This is
- * intended to gain fragment references only for page pool aware skbs,
- * i.e. when skb->pp_recycle is true, and not for fragments in a
- * non-pp-recycling skb. It has a fallback to increase references on normal
- * pages, as page pool aware skbs may also have normal page fragments.
- */
-static int skb_pp_frag_ref(struct sk_buff *skb)
-{
- struct skb_shared_info *shinfo;
- struct page *head_page;
- int i;
-
- if (!skb->pp_recycle)
- return -EINVAL;
-
- shinfo = skb_shinfo(skb);
-
- for (i = 0; i < shinfo->nr_frags; i++) {
- head_page = compound_head(skb_frag_page(&shinfo->frags[i]));
- if (likely(is_pp_page(head_page)))
- page_pool_ref_page(head_page);
- else
- page_ref_inc(head_page);
- }
- return 0;
-}
-
static void skb_kfree_head(void *head, unsigned int end_offset)
{
if (end_offset == SKB_SMALL_HEAD_HEADROOM)
@@ -4199,7 +4183,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
to++;
} else {
- __skb_frag_ref(fragfrom);
+ __skb_frag_ref(fragfrom, skb->pp_recycle);
skb_frag_page_copy(fragto, fragfrom);
skb_frag_off_copy(fragto, fragfrom);
skb_frag_size_set(fragto, todo);
@@ -4849,7 +4833,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
}
*nskb_frag = (i < 0) ? skb_head_frag_to_page_desc(frag_skb) : *frag;
- __skb_frag_ref(nskb_frag);
+ __skb_frag_ref(nskb_frag, nskb->pp_recycle);
size = skb_frag_size(nskb_frag);
if (pos < offset) {
@@ -5980,10 +5964,8 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
/* if the skb is not cloned this does nothing
* since we set nr_frags to 0.
*/
- if (skb_pp_frag_ref(from)) {
- for (i = 0; i < from_shinfo->nr_frags; i++)
- __skb_frag_ref(&from_shinfo->frags[i]);
- }
+ for (i = 0; i < from_shinfo->nr_frags; i++)
+ __skb_frag_ref(&from_shinfo->frags[i], from->pp_recycle);
to->truesize += delta;
to->len += len;
--
2.44.0.278.ge034bb2e1d-goog
With the changes in the last patch, napi_frag_[un]ref() helpers become
reduandant. Remove them, and use __skb_frag_[un]ref() directly.
Signed-off-by: Mina Almasry <[email protected]>
---
drivers/net/ethernet/marvell/sky2.c | 2 +-
drivers/net/ethernet/mellanox/mlx4/en_rx.c | 2 +-
include/linux/skbuff.h | 45 +++++++++-------------
net/core/skbuff.c | 4 +-
net/tls/tls_device.c | 2 +-
net/tls/tls_strp.c | 2 +-
6 files changed, 24 insertions(+), 33 deletions(-)
diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c
index 07720841a8d7..8e00a5856856 100644
--- a/drivers/net/ethernet/marvell/sky2.c
+++ b/drivers/net/ethernet/marvell/sky2.c
@@ -2501,7 +2501,7 @@ static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space,
if (length == 0) {
/* don't need this page */
- __skb_frag_unref(frag, false);
+ __skb_frag_unref(frag, false, false);
--skb_shinfo(skb)->nr_frags;
} else {
size = min(length, (unsigned) PAGE_SIZE);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index eac49657bd07..4dbf29b46979 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -526,7 +526,7 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
fail:
while (nr > 0) {
nr--;
- __skb_frag_unref(skb_shinfo(skb)->frags + nr, false);
+ __skb_frag_unref(skb_shinfo(skb)->frags + nr, false, false);
}
return 0;
}
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 51316b0e20bc..9cd04c315592 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3479,17 +3479,6 @@ static inline struct page *skb_frag_page(const skb_frag_t *frag)
bool napi_pp_get_page(struct page *page);
-static inline void napi_frag_ref(skb_frag_t *frag, bool recycle)
-{
-#ifdef CONFIG_PAGE_POOL
- struct page *page = skb_frag_page(frag);
-
- if (recycle && napi_pp_get_page(page))
- return;
-#endif
- get_page(page);
-}
-
/**
* __skb_frag_ref - take an addition reference on a paged fragment.
* @frag: the paged fragment
@@ -3501,7 +3490,13 @@ static inline void napi_frag_ref(skb_frag_t *frag, bool recycle)
*/
static inline void __skb_frag_ref(skb_frag_t *frag, bool recycle)
{
- napi_frag_ref(frag, recycle);
+#ifdef CONFIG_PAGE_POOL
+ struct page *page = skb_frag_page(frag);
+
+ if (recycle && napi_pp_get_page(page))
+ return;
+#endif
+ get_page(page);
}
/**
@@ -3522,29 +3517,25 @@ int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb,
struct bpf_prog *prog);
bool napi_pp_put_page(struct page *page, bool napi_safe);
-static inline void
-napi_frag_unref(skb_frag_t *frag, bool recycle, bool napi_safe)
-{
- struct page *page = skb_frag_page(frag);
-
-#ifdef CONFIG_PAGE_POOL
- if (recycle && napi_pp_put_page(page, napi_safe))
- return;
-#endif
- put_page(page);
-}
-
/**
* __skb_frag_unref - release a reference on a paged fragment.
* @frag: the paged fragment
* @recycle: recycle the page if allocated via page_pool
+ * @napi_safe: set to true if running in the same napi context as where the
+ * consumer would run.
*
* Releases a reference on the paged fragment @frag
* or recycles the page via the page_pool API.
*/
-static inline void __skb_frag_unref(skb_frag_t *frag, bool recycle)
+static inline void __skb_frag_unref(skb_frag_t *frag, bool recycle, bool napi_safe)
{
- napi_frag_unref(frag, recycle, false);
+ struct page *page = skb_frag_page(frag);
+
+#ifdef CONFIG_PAGE_POOL
+ if (recycle && napi_pp_put_page(page, napi_safe))
+ return;
+#endif
+ put_page(page);
}
/**
@@ -3559,7 +3550,7 @@ static inline void skb_frag_unref(struct sk_buff *skb, int f)
struct skb_shared_info *shinfo = skb_shinfo(skb);
if (!skb_zcopy_managed(skb))
- __skb_frag_unref(&shinfo->frags[f], skb->pp_recycle);
+ __skb_frag_unref(&shinfo->frags[f], skb->pp_recycle, false);
}
/**
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 6d234faa9d9e..ed7f7e960b78 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1114,7 +1114,7 @@ static void skb_release_data(struct sk_buff *skb, enum skb_drop_reason reason,
}
for (i = 0; i < shinfo->nr_frags; i++)
- napi_frag_unref(&shinfo->frags[i], skb->pp_recycle, napi_safe);
+ __skb_frag_unref(&shinfo->frags[i], skb->pp_recycle, napi_safe);
free_head:
if (shinfo->frag_list)
@@ -4205,7 +4205,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
fragto = &skb_shinfo(tgt)->frags[merge];
skb_frag_size_add(fragto, skb_frag_size(fragfrom));
- __skb_frag_unref(fragfrom, skb->pp_recycle);
+ __skb_frag_unref(fragfrom, skb->pp_recycle, false);
}
/* Reposition in the original skb */
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index bf8ed36b1ad6..5dc6381f34fb 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -140,7 +140,7 @@ static void destroy_record(struct tls_record_info *record)
int i;
for (i = 0; i < record->num_frags; i++)
- __skb_frag_unref(&record->frags[i], false);
+ __skb_frag_unref(&record->frags[i], false, false);
kfree(record);
}
diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c
index ca1e0e198ceb..85b41f226978 100644
--- a/net/tls/tls_strp.c
+++ b/net/tls/tls_strp.c
@@ -196,7 +196,7 @@ static void tls_strp_flush_anchor_copy(struct tls_strparser *strp)
DEBUG_NET_WARN_ON_ONCE(atomic_read(&shinfo->dataref) != 1);
for (i = 0; i < shinfo->nr_frags; i++)
- __skb_frag_unref(&shinfo->frags[i], false);
+ __skb_frag_unref(&shinfo->frags[i], false, false);
shinfo->nr_frags = 0;
if (strp->copy_mode) {
kfree_skb_list(shinfo->frag_list);
--
2.44.0.278.ge034bb2e1d-goog
On Wed, 2024-03-06 at 15:59 -0800, Mina Almasry wrote:
> Refactor some of the skb frag ref/unref helpers for improved clarity.
>
> Implement napi_pp_get_page() to be the mirror counterpart of
> napi_pp_put_page().
>
> Implement napi_frag_ref() to be the mirror counterpart of
> napi_frag_unref().
>
> Improve __skb_frag_ref() to become a mirror counterpart of
> __skb_frag_unref(). Previously unref could handle pp & non-pp pages,
> while the ref could only handle non-pp pages. Now both the ref & unref
> helpers can correctly handle both pp & non-pp pages.
>
> Now that __skb_frag_ref() can handle both pp & non-pp pages, remove
> skb_pp_frag_ref(), and use __skb_frag_ref() instead. This lets us
> remove pp specific handling from skb_try_coalesce.
>
> Signed-off-by: Mina Almasry <[email protected]>
>
> ---
> include/linux/skbuff.h | 24 +++++++++++++++---
> net/core/skbuff.c | 56 ++++++++++++++----------------------------
> 2 files changed, 39 insertions(+), 41 deletions(-)
>
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index d577e0bee18d..51316b0e20bc 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -3477,15 +3477,31 @@ static inline struct page *skb_frag_page(const skb_frag_t *frag)
> return netmem_to_page(frag->netmem);
> }
>
> +bool napi_pp_get_page(struct page *page);
> +
> +static inline void napi_frag_ref(skb_frag_t *frag, bool recycle)
> +{
> +#ifdef CONFIG_PAGE_POOL
> + struct page *page = skb_frag_page(frag);
> +
Move assignment out of ifdef.
> + if (recycle && napi_pp_get_page(page))
> + return;
> +#endif
> + get_page(page);
> +}
> +
> /**
> * __skb_frag_ref - take an addition reference on a paged fragment.
> * @frag: the paged fragment
> + * @recycle: skb->pp_recycle param of the parent skb.
> *
> - * Takes an additional reference on the paged fragment @frag.
> + * Takes an additional reference on the paged fragment @frag. Obtains the
> + * correct reference count depending on whether skb->pp_recycle is set and
> + * whether the frag is a page pool frag.
> */
> -static inline void __skb_frag_ref(skb_frag_t *frag)
> +static inline void __skb_frag_ref(skb_frag_t *frag, bool recycle)
> {
> - get_page(skb_frag_page(frag));
> + napi_frag_ref(frag, recycle);
> }
>
> /**
> @@ -3497,7 +3513,7 @@ static inline void __skb_frag_ref(skb_frag_t *frag)
> */
> static inline void skb_frag_ref(struct sk_buff *skb, int f)
> {
> - __skb_frag_ref(&skb_shinfo(skb)->frags[f]);
> + __skb_frag_ref(&skb_shinfo(skb)->frags[f], skb->pp_recycle);
> }
>
> int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb,
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index 1f918e602bc4..6d234faa9d9e 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -1006,6 +1006,21 @@ int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb,
> EXPORT_SYMBOL(skb_cow_data_for_xdp);
>
> #if IS_ENABLED(CONFIG_PAGE_POOL)
> +bool napi_pp_get_page(struct page *page)
> +{
> +
> + struct page *head_page;
> +
> + head_page = compound_head(page);
> +
> + if (!is_pp_page(page))
> + return false;
> +
> + page_pool_ref_page(head_page);
> + return true;
> +}
> +EXPORT_SYMBOL(napi_pp_get_page);
> +
> bool napi_pp_put_page(struct page *page, bool napi_safe)
> {
> bool allow_direct = false;
> @@ -1058,37 +1073,6 @@ static bool skb_pp_recycle(struct sk_buff *skb, void *data, bool napi_safe)
> return napi_pp_put_page(virt_to_page(data), napi_safe);
> }
>
> -/**
> - * skb_pp_frag_ref() - Increase fragment references of a page pool aware skb
> - * @skb: page pool aware skb
> - *
> - * Increase the fragment reference count (pp_ref_count) of a skb. This is
> - * intended to gain fragment references only for page pool aware skbs,
> - * i.e. when skb->pp_recycle is true, and not for fragments in a
> - * non-pp-recycling skb. It has a fallback to increase references on normal
> - * pages, as page pool aware skbs may also have normal page fragments.
> - */
> -static int skb_pp_frag_ref(struct sk_buff *skb)
> -{
> - struct skb_shared_info *shinfo;
> - struct page *head_page;
> - int i;
> -
> - if (!skb->pp_recycle)
> - return -EINVAL;
> -
> - shinfo = skb_shinfo(skb);
> -
> - for (i = 0; i < shinfo->nr_frags; i++) {
> - head_page = compound_head(skb_frag_page(&shinfo->frags[i]));
> - if (likely(is_pp_page(head_page)))
> - page_pool_ref_page(head_page);
> - else
> - page_ref_inc(head_page);
> - }
> - return 0;
> -}
> -
> static void skb_kfree_head(void *head, unsigned int end_offset)
> {
> if (end_offset == SKB_SMALL_HEAD_HEADROOM)
> @@ -4199,7 +4183,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
> to++;
>
> } else {
> - __skb_frag_ref(fragfrom);
> + __skb_frag_ref(fragfrom, skb->pp_recycle);
> skb_frag_page_copy(fragto, fragfrom);
> skb_frag_off_copy(fragto, fragfrom);
> skb_frag_size_set(fragto, todo);
> @@ -4849,7 +4833,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
> }
>
> *nskb_frag = (i < 0) ? skb_head_frag_to_page_desc(frag_skb) : *frag;
> - __skb_frag_ref(nskb_frag);
> + __skb_frag_ref(nskb_frag, nskb->pp_recycle);
> size = skb_frag_size(nskb_frag);
>
> if (pos < offset) {
> @@ -5980,10 +5964,8 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
> /* if the skb is not cloned this does nothing
> * since we set nr_frags to 0.
> */
> - if (skb_pp_frag_ref(from)) {
> - for (i = 0; i < from_shinfo->nr_frags; i++)
> - __skb_frag_ref(&from_shinfo->frags[i]);
> - }
> + for (i = 0; i < from_shinfo->nr_frags; i++)
> + __skb_frag_ref(&from_shinfo->frags[i], from->pp_recycle);
>
> to->truesize += delta;
> to->len += len;
On Wed, 2024-03-06 at 15:59 -0800, Mina Almasry wrote:
> With the changes in the last patch, napi_frag_[un]ref() helpers become
> reduandant. Remove them, and use __skb_frag_[un]ref() directly.
>
Typo: reduandant
> Signed-off-by: Mina Almasry <[email protected]>
>
> ---
> drivers/net/ethernet/marvell/sky2.c | 2 +-
> drivers/net/ethernet/mellanox/mlx4/en_rx.c | 2 +-
> include/linux/skbuff.h | 45 +++++++++-------------
> net/core/skbuff.c | 4 +-
> net/tls/tls_device.c | 2 +-
> net/tls/tls_strp.c | 2 +-
> 6 files changed, 24 insertions(+), 33 deletions(-)
>
> diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c
> index 07720841a8d7..8e00a5856856 100644
> --- a/drivers/net/ethernet/marvell/sky2.c
> +++ b/drivers/net/ethernet/marvell/sky2.c
> @@ -2501,7 +2501,7 @@ static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space,
>
> if (length == 0) {
> /* don't need this page */
> - __skb_frag_unref(frag, false);
> + __skb_frag_unref(frag, false, false);
> --skb_shinfo(skb)->nr_frags;
> } else {
> size = min(length, (unsigned) PAGE_SIZE);
> diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
> index eac49657bd07..4dbf29b46979 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
> +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
> @@ -526,7 +526,7 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
> fail:
> while (nr > 0) {
> nr--;
> - __skb_frag_unref(skb_shinfo(skb)->frags + nr, false);
> + __skb_frag_unref(skb_shinfo(skb)->frags + nr, false, false);
> }
> return 0;
> }
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index 51316b0e20bc..9cd04c315592 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -3479,17 +3479,6 @@ static inline struct page *skb_frag_page(const skb_frag_t *frag)
>
> bool napi_pp_get_page(struct page *page);
>
> -static inline void napi_frag_ref(skb_frag_t *frag, bool recycle)
> -{
> -#ifdef CONFIG_PAGE_POOL
> - struct page *page = skb_frag_page(frag);
> -
> - if (recycle && napi_pp_get_page(page))
> - return;
> -#endif
> - get_page(page);
> -}
> -
> /**
> * __skb_frag_ref - take an addition reference on a paged fragment.
> * @frag: the paged fragment
> @@ -3501,7 +3490,13 @@ static inline void napi_frag_ref(skb_frag_t *frag, bool recycle)
> */
> static inline void __skb_frag_ref(skb_frag_t *frag, bool recycle)
> {
> - napi_frag_ref(frag, recycle);
> +#ifdef CONFIG_PAGE_POOL
> + struct page *page = skb_frag_page(frag);
> +
Issue from previous patch propagates here.
> + if (recycle && napi_pp_get_page(page))
> + return;
> +#endif
> + get_page(page);
> }
>
> /**
> @@ -3522,29 +3517,25 @@ int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb,
> struct bpf_prog *prog);
> bool napi_pp_put_page(struct page *page, bool napi_safe);
>
> -static inline void
> -napi_frag_unref(skb_frag_t *frag, bool recycle, bool napi_safe)
> -{
> - struct page *page = skb_frag_page(frag);
> -
> -#ifdef CONFIG_PAGE_POOL
> - if (recycle && napi_pp_put_page(page, napi_safe))
> - return;
> -#endif
> - put_page(page);
> -}
> -
> /**
> * __skb_frag_unref - release a reference on a paged fragment.
> * @frag: the paged fragment
> * @recycle: recycle the page if allocated via page_pool
> + * @napi_safe: set to true if running in the same napi context as where the
> + * consumer would run.
> *
> * Releases a reference on the paged fragment @frag
> * or recycles the page via the page_pool API.
> */
> -static inline void __skb_frag_unref(skb_frag_t *frag, bool recycle)
> +static inline void __skb_frag_unref(skb_frag_t *frag, bool recycle, bool napi_safe)
I think it would makes sense to have an additional wrapper on top of this
function that takes in a const sk_buff * instead of recycle? This would hide pp
specific details from the caller. It is in line with one of Jakub's comments in
my patch:
https://lore.kernel.org/netdev/[email protected]/
Would look good as a 3rd patch maybe. The hard work of coming up with a name
would be all yours :).
> {
> - napi_frag_unref(frag, recycle, false);
> + struct page *page = skb_frag_page(frag);
> +
> +#ifdef CONFIG_PAGE_POOL
> + if (recycle && napi_pp_put_page(page, napi_safe))
> + return;
> +#endif
> + put_page(page);
> }
>
> /**
> @@ -3559,7 +3550,7 @@ static inline void skb_frag_unref(struct sk_buff *skb, int f)
> struct skb_shared_info *shinfo = skb_shinfo(skb);
>
> if (!skb_zcopy_managed(skb))
> - __skb_frag_unref(&shinfo->frags[f], skb->pp_recycle);
> + __skb_frag_unref(&shinfo->frags[f], skb->pp_recycle, false);
> }
>
> /**
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index 6d234faa9d9e..ed7f7e960b78 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -1114,7 +1114,7 @@ static void skb_release_data(struct sk_buff *skb, enum skb_drop_reason reason,
> }
>
> for (i = 0; i < shinfo->nr_frags; i++)
> - napi_frag_unref(&shinfo->frags[i], skb->pp_recycle, napi_safe);
> + __skb_frag_unref(&shinfo->frags[i], skb->pp_recycle, napi_safe);
>
> free_head:
> if (shinfo->frag_list)
> @@ -4205,7 +4205,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
> fragto = &skb_shinfo(tgt)->frags[merge];
>
> skb_frag_size_add(fragto, skb_frag_size(fragfrom));
> - __skb_frag_unref(fragfrom, skb->pp_recycle);
> + __skb_frag_unref(fragfrom, skb->pp_recycle, false);
> }
>
> /* Reposition in the original skb */
> diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
> index bf8ed36b1ad6..5dc6381f34fb 100644
> --- a/net/tls/tls_device.c
> +++ b/net/tls/tls_device.c
> @@ -140,7 +140,7 @@ static void destroy_record(struct tls_record_info *record)
> int i;
>
> for (i = 0; i < record->num_frags; i++)
> - __skb_frag_unref(&record->frags[i], false);
> + __skb_frag_unref(&record->frags[i], false, false);
> kfree(record);
> }
>
> diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c
> index ca1e0e198ceb..85b41f226978 100644
> --- a/net/tls/tls_strp.c
> +++ b/net/tls/tls_strp.c
> @@ -196,7 +196,7 @@ static void tls_strp_flush_anchor_copy(struct tls_strparser *strp)
> DEBUG_NET_WARN_ON_ONCE(atomic_read(&shinfo->dataref) != 1);
>
> for (i = 0; i < shinfo->nr_frags; i++)
> - __skb_frag_unref(&shinfo->frags[i], false);
> + __skb_frag_unref(&shinfo->frags[i], false, false);
> shinfo->nr_frags = 0;
> if (strp->copy_mode) {
> kfree_skb_list(shinfo->frag_list);
I like the cleanup and simplifications in these patches. Thanks Mina!
After the small issues are addressed you can add on both patches:
Reviewed-by: Dragos Tatulea <[email protected]>
On 2024/3/7 7:59, Mina Almasry wrote:
..
>
> int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb,
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index 1f918e602bc4..6d234faa9d9e 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -1006,6 +1006,21 @@ int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb,
> EXPORT_SYMBOL(skb_cow_data_for_xdp);
>
> #if IS_ENABLED(CONFIG_PAGE_POOL)
> +bool napi_pp_get_page(struct page *page)
> +{
> +
> + struct page *head_page;
> +
> + head_page = compound_head(page);
> +
> + if (!is_pp_page(page))
I would use the head_page for is_pp_page(), I am not sure it
matters that much, but I believe it is the precedent.
Maybe do the below and remove head_page varible:
page = compound_head(page);
> + return false;
> +
> + page_pool_ref_page(head_page);
> + return true;
> +}
> +EXPORT_SYMBOL(napi_pp_get_page);
> +
..
> -
> static void skb_kfree_head(void *head, unsigned int end_offset)
> {
> if (end_offset == SKB_SMALL_HEAD_HEADROOM)
> @@ -4199,7 +4183,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
> to++;
>
> } else {
> - __skb_frag_ref(fragfrom);
> + __skb_frag_ref(fragfrom, skb->pp_recycle);
> skb_frag_page_copy(fragto, fragfrom);
> skb_frag_off_copy(fragto, fragfrom);
> skb_frag_size_set(fragto, todo);
> @@ -4849,7 +4833,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
> }
>
> *nskb_frag = (i < 0) ? skb_head_frag_to_page_desc(frag_skb) : *frag;
> - __skb_frag_ref(nskb_frag);
> + __skb_frag_ref(nskb_frag, nskb->pp_recycle);
> size = skb_frag_size(nskb_frag);
>
> if (pos < offset) {
> @@ -5980,10 +5964,8 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
> /* if the skb is not cloned this does nothing
> * since we set nr_frags to 0.
> */
> - if (skb_pp_frag_ref(from)) {
I guess it worth mentioning that skb->pp_recycle is only checked once,
and skb->pp_recycle is checked for every frag after this patch.
> - for (i = 0; i < from_shinfo->nr_frags; i++)
> - __skb_frag_ref(&from_shinfo->frags[i]);
> - }
> + for (i = 0; i < from_shinfo->nr_frags; i++)
> + __skb_frag_ref(&from_shinfo->frags[i], from->pp_recycle);
>
> to->truesize += delta;
> to->len += len;
>