Presented fixes address the following test-case:
* Run xdpsock on queue 10
* change number of combined channels to 20
* observe an error on xdpsock side
The first 2 patches deal with errors, the last one addresses the lack of
traffic.
Larysa Zaremba (3):
ice: remove af_xdp_zc_qps bitmap
ice: add flag to distinguish reset from .ndo_bpf in XDP rings config
ice: map XDP queues to vectors in ice_vsi_map_rings_to_vectors()
drivers/net/ethernet/intel/ice/ice.h | 44 +++++---
drivers/net/ethernet/intel/ice/ice_base.c | 3 +
drivers/net/ethernet/intel/ice/ice_lib.c | 27 ++---
drivers/net/ethernet/intel/ice/ice_main.c | 118 +++++++++++++---------
drivers/net/ethernet/intel/ice/ice_xsk.c | 13 ++-
5 files changed, 119 insertions(+), 86 deletions(-)
--
2.43.0
Referenced commit has introduced a bitmap to distinguish between ZC and
copy-mode AF_XDP queues, because xsk_get_pool_from_qid() does not do this
for us.
The bitmap would be especially useful when restoring previous state after
rebuild, if only it was not reallocated in the process. This leads to e.g.
xdpsock dying after changing number of queues.
Instead of preserving the bitmap during the rebuild, remove it completely
and distinguish between ZC and copy-mode queues based on the presence of
a device associated with the pool.
Fixes: e102db780e1c ("ice: track AF_XDP ZC enabled queues in bitmap")
Reviewed-by: Przemek Kitszel <[email protected]>
Signed-off-by: Larysa Zaremba <[email protected]>
---
drivers/net/ethernet/intel/ice/ice.h | 32 ++++++++++++++++--------
drivers/net/ethernet/intel/ice/ice_lib.c | 8 ------
drivers/net/ethernet/intel/ice/ice_xsk.c | 13 +++++-----
3 files changed, 27 insertions(+), 26 deletions(-)
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 6ad8002b22e1..d4d840729bda 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -409,7 +409,6 @@ struct ice_vsi {
struct ice_tc_cfg tc_cfg;
struct bpf_prog *xdp_prog;
struct ice_tx_ring **xdp_rings; /* XDP ring array */
- unsigned long *af_xdp_zc_qps; /* tracks AF_XDP ZC enabled qps */
u16 num_xdp_txq; /* Used XDP queues */
u8 xdp_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */
@@ -746,6 +745,25 @@ static inline void ice_set_ring_xdp(struct ice_tx_ring *ring)
ring->flags |= ICE_TX_FLAGS_RING_XDP;
}
+/**
+ * ice_get_xp_from_qid - get ZC XSK buffer pool bound to a queue ID
+ * @vsi: pointer to VSI
+ * @qid: index of a queue to look at XSK buff pool presence
+ *
+ * Returns a pointer to xsk_buff_pool structure if there is a buffer pool
+ * attached and configured as zero-copy, NULL otherwise.
+ */
+static inline struct xsk_buff_pool *ice_get_xp_from_qid(struct ice_vsi *vsi,
+ u16 qid)
+{
+ struct xsk_buff_pool *pool = xsk_get_pool_from_qid(vsi->netdev, qid);
+
+ if (!ice_is_xdp_ena_vsi(vsi))
+ return NULL;
+
+ return (pool && pool->dev) ? pool : NULL;
+}
+
/**
* ice_xsk_pool - get XSK buffer pool bound to a ring
* @ring: Rx ring to use
@@ -758,10 +776,7 @@ static inline struct xsk_buff_pool *ice_xsk_pool(struct ice_rx_ring *ring)
struct ice_vsi *vsi = ring->vsi;
u16 qid = ring->q_index;
- if (!ice_is_xdp_ena_vsi(vsi) || !test_bit(qid, vsi->af_xdp_zc_qps))
- return NULL;
-
- return xsk_get_pool_from_qid(vsi->netdev, qid);
+ return ice_get_xp_from_qid(vsi, qid);
}
/**
@@ -786,12 +801,7 @@ static inline void ice_tx_xsk_pool(struct ice_vsi *vsi, u16 qid)
if (!ring)
return;
- if (!ice_is_xdp_ena_vsi(vsi) || !test_bit(qid, vsi->af_xdp_zc_qps)) {
- ring->xsk_pool = NULL;
- return;
- }
-
- ring->xsk_pool = xsk_get_pool_from_qid(vsi->netdev, qid);
+ ring->xsk_pool = ice_get_xp_from_qid(vsi, qid);
}
/**
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 5371e91f6bbb..c0a7ff6c7e87 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -114,14 +114,8 @@ static int ice_vsi_alloc_arrays(struct ice_vsi *vsi)
if (!vsi->q_vectors)
goto err_vectors;
- vsi->af_xdp_zc_qps = bitmap_zalloc(max_t(int, vsi->alloc_txq, vsi->alloc_rxq), GFP_KERNEL);
- if (!vsi->af_xdp_zc_qps)
- goto err_zc_qps;
-
return 0;
-err_zc_qps:
- devm_kfree(dev, vsi->q_vectors);
err_vectors:
devm_kfree(dev, vsi->rxq_map);
err_rxq_map:
@@ -309,8 +303,6 @@ static void ice_vsi_free_arrays(struct ice_vsi *vsi)
dev = ice_pf_to_dev(pf);
- bitmap_free(vsi->af_xdp_zc_qps);
- vsi->af_xdp_zc_qps = NULL;
/* free the ring and vector containers */
devm_kfree(dev, vsi->q_vectors);
vsi->q_vectors = NULL;
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index aa81d1162b81..2015f66b0cf9 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -269,7 +269,6 @@ static int ice_xsk_pool_disable(struct ice_vsi *vsi, u16 qid)
if (!pool)
return -EINVAL;
- clear_bit(qid, vsi->af_xdp_zc_qps);
xsk_pool_dma_unmap(pool, ICE_RX_DMA_ATTR);
return 0;
@@ -300,8 +299,6 @@ ice_xsk_pool_enable(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
if (err)
return err;
- set_bit(qid, vsi->af_xdp_zc_qps);
-
return 0;
}
@@ -349,11 +346,13 @@ ice_realloc_rx_xdp_bufs(struct ice_rx_ring *rx_ring, bool pool_present)
int ice_realloc_zc_buf(struct ice_vsi *vsi, bool zc)
{
struct ice_rx_ring *rx_ring;
- unsigned long q;
+ uint i;
+
+ ice_for_each_rxq(vsi, i) {
+ rx_ring = vsi->rx_rings[i];
+ if (!rx_ring->xsk_pool)
+ continue;
- for_each_set_bit(q, vsi->af_xdp_zc_qps,
- max_t(int, vsi->alloc_txq, vsi->alloc_rxq)) {
- rx_ring = vsi->rx_rings[q];
if (ice_realloc_rx_xdp_bufs(rx_ring, zc))
return -ENOMEM;
}
--
2.43.0
ice_pf_dcb_recfg() re-maps queues to vectors with
ice_vsi_map_rings_to_vectors(), which does not restore the previous
state for XDP queues. This leads to no AF_XDP traffic after rebuild.
Map XDP queues to vectors in ice_vsi_map_rings_to_vectors().
Also, move the code around, so XDP queues are mapped independently only
through .ndo_bpf().
Fixes: 6624e780a577 ("ice: split ice_vsi_setup into smaller functions")
Reviewed-by: Przemek Kitszel <[email protected]>
Signed-off-by: Larysa Zaremba <[email protected]>
---
drivers/net/ethernet/intel/ice/ice.h | 1 +
drivers/net/ethernet/intel/ice/ice_base.c | 3 +
drivers/net/ethernet/intel/ice/ice_lib.c | 14 ++--
drivers/net/ethernet/intel/ice/ice_main.c | 96 ++++++++++++++---------
4 files changed, 68 insertions(+), 46 deletions(-)
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index b91b2594b29d..da8c8afebc93 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -940,6 +940,7 @@ int ice_vsi_determine_xdp_res(struct ice_vsi *vsi);
int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog,
enum ice_xdp_cfg cfg_type);
int ice_destroy_xdp_rings(struct ice_vsi *vsi, enum ice_xdp_cfg cfg_type);
+void ice_map_xdp_rings(struct ice_vsi *vsi);
int
ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
u32 flags);
diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
index 687f6cb2b917..5d396c1a7731 100644
--- a/drivers/net/ethernet/intel/ice/ice_base.c
+++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@ -842,6 +842,9 @@ void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi)
}
rx_rings_rem -= rx_rings_per_v;
}
+
+ if (ice_is_xdp_ena_vsi(vsi))
+ ice_map_xdp_rings(vsi);
}
/**
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index dd8b374823ee..7629b0190578 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -2274,13 +2274,6 @@ static int ice_vsi_cfg_def(struct ice_vsi *vsi)
if (ret)
goto unroll_vector_base;
- ice_vsi_map_rings_to_vectors(vsi);
-
- /* Associate q_vector rings to napi */
- ice_vsi_set_napi_queues(vsi);
-
- vsi->stat_offsets_loaded = false;
-
if (ice_is_xdp_ena_vsi(vsi)) {
ret = ice_vsi_determine_xdp_res(vsi);
if (ret)
@@ -2291,6 +2284,13 @@ static int ice_vsi_cfg_def(struct ice_vsi *vsi)
goto unroll_vector_base;
}
+ ice_vsi_map_rings_to_vectors(vsi);
+
+ /* Associate q_vector rings to napi */
+ ice_vsi_set_napi_queues(vsi);
+
+ vsi->stat_offsets_loaded = false;
+
/* ICE_VSI_CTRL does not need RSS so skip RSS processing */
if (vsi->type != ICE_VSI_CTRL)
/* Do not exit if configuring RSS had an issue, at
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 2a270aacd24a..1b61ca3a6eb6 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -2707,6 +2707,60 @@ static void ice_vsi_assign_bpf_prog(struct ice_vsi *vsi, struct bpf_prog *prog)
bpf_prog_put(old_prog);
}
+static struct ice_tx_ring *ice_xdp_ring_from_qid(struct ice_vsi *vsi, int qid)
+{
+ struct ice_q_vector *q_vector;
+ struct ice_tx_ring *ring;
+
+ if (static_key_enabled(&ice_xdp_locking_key))
+ return vsi->xdp_rings[qid % vsi->num_xdp_txq];
+
+ q_vector = vsi->rx_rings[qid]->q_vector;
+ ice_for_each_tx_ring(ring, q_vector->tx)
+ if (ice_ring_is_xdp(ring))
+ return ring;
+
+ return NULL;
+}
+
+/**
+ * ice_map_xdp_rings - Map XDP rings to interrupt vectors
+ * @vsi: the VSI with XDP rings being configured
+ *
+ * Map XDP rings to interrupt vectors and perform the configuration steps
+ * dependent on the mapping.
+ */
+void ice_map_xdp_rings(struct ice_vsi *vsi)
+{
+ int xdp_rings_rem = vsi->num_xdp_txq;
+ int v_idx, q_idx;
+
+ /* follow the logic from ice_vsi_map_rings_to_vectors */
+ ice_for_each_q_vector(vsi, v_idx) {
+ struct ice_q_vector *q_vector = vsi->q_vectors[v_idx];
+ int xdp_rings_per_v, q_id, q_base;
+
+ xdp_rings_per_v = DIV_ROUND_UP(xdp_rings_rem,
+ vsi->num_q_vectors - v_idx);
+ q_base = vsi->num_xdp_txq - xdp_rings_rem;
+
+ for (q_id = q_base; q_id < (q_base + xdp_rings_per_v); q_id++) {
+ struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_id];
+
+ xdp_ring->q_vector = q_vector;
+ xdp_ring->next = q_vector->tx.tx_ring;
+ q_vector->tx.tx_ring = xdp_ring;
+ }
+ xdp_rings_rem -= xdp_rings_per_v;
+ }
+
+ ice_for_each_rxq(vsi, q_idx) {
+ vsi->rx_rings[q_idx]->xdp_ring = ice_xdp_ring_from_qid(vsi,
+ q_idx);
+ ice_tx_xsk_pool(vsi, q_idx);
+ }
+}
+
/**
* ice_prepare_xdp_rings - Allocate, configure and setup Tx rings for XDP
* @vsi: VSI to bring up Tx rings used by XDP
@@ -2719,7 +2773,6 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog,
enum ice_xdp_cfg cfg_type)
{
u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
- int xdp_rings_rem = vsi->num_xdp_txq;
struct ice_pf *pf = vsi->back;
struct ice_qs_cfg xdp_qs_cfg = {
.qs_mutex = &pf->avail_q_mutex,
@@ -2732,8 +2785,7 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog,
.mapping_mode = ICE_VSI_MAP_CONTIG
};
struct device *dev;
- int i, v_idx;
- int status;
+ int status, i;
dev = ice_pf_to_dev(pf);
vsi->xdp_rings = devm_kcalloc(dev, vsi->num_xdp_txq,
@@ -2752,42 +2804,6 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog,
if (ice_xdp_alloc_setup_rings(vsi))
goto clear_xdp_rings;
- /* follow the logic from ice_vsi_map_rings_to_vectors */
- ice_for_each_q_vector(vsi, v_idx) {
- struct ice_q_vector *q_vector = vsi->q_vectors[v_idx];
- int xdp_rings_per_v, q_id, q_base;
-
- xdp_rings_per_v = DIV_ROUND_UP(xdp_rings_rem,
- vsi->num_q_vectors - v_idx);
- q_base = vsi->num_xdp_txq - xdp_rings_rem;
-
- for (q_id = q_base; q_id < (q_base + xdp_rings_per_v); q_id++) {
- struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_id];
-
- xdp_ring->q_vector = q_vector;
- xdp_ring->next = q_vector->tx.tx_ring;
- q_vector->tx.tx_ring = xdp_ring;
- }
- xdp_rings_rem -= xdp_rings_per_v;
- }
-
- ice_for_each_rxq(vsi, i) {
- if (static_key_enabled(&ice_xdp_locking_key)) {
- vsi->rx_rings[i]->xdp_ring = vsi->xdp_rings[i % vsi->num_xdp_txq];
- } else {
- struct ice_q_vector *q_vector = vsi->rx_rings[i]->q_vector;
- struct ice_tx_ring *ring;
-
- ice_for_each_tx_ring(ring, q_vector->tx) {
- if (ice_ring_is_xdp(ring)) {
- vsi->rx_rings[i]->xdp_ring = ring;
- break;
- }
- }
- }
- ice_tx_xsk_pool(vsi, i);
- }
-
/* omit the scheduler update if in reset path; XDP queues will be
* taken into account at the end of ice_vsi_rebuild, where
* ice_cfg_vsi_lan is being called
@@ -2795,6 +2811,8 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog,
if (cfg_type == ICE_XDP_CFG_PART)
return 0;
+ ice_map_xdp_rings(vsi);
+
/* tell the Tx scheduler that right now we have
* additional queues
*/
--
2.43.0
Commit 6624e780a577 ("ice: split ice_vsi_setup into smaller functions")
has placed ice_vsi_free_q_vectors() after ice_destroy_xdp_rings() in
the rebuild process. The behaviour of the XDP rings config functions is
context-dependent, so the change of order has led to
ice_destroy_xdp_rings() doing additional work and removing XDP prog, when
it was supposed to be preserved.
Also, dependency on the PF state reset flags creates an additional,
fortunately less common problem:
* PFR is requested e.g. by tx_timeout handler
* .ndo_bpf() is asked to delete the program, calls ice_destroy_xdp_rings(),
but reset flag is set, so rings are destroyed without deleting the
program
* ice_vsi_rebuild tries to delete non-existent XDP rings, because the
program is still on the VSI
* system crashes
With a similar race, when requested to attach a program,
ice_prepare_xdp_rings() can actually skip setting the program in the VSI
and nevertheless report success.
Instead of reverting to the old order of function calls, add an enum
argument to both ice_prepare_xdp_rings() and ice_destroy_xdp_rings() in
order to distinguish between calls from rebuild and .ndo_bpf().
Fixes: efc2214b6047 ("ice: Add support for XDP")
Reviewed-by: Igor Bagnucki <[email protected]>
Signed-off-by: Larysa Zaremba <[email protected]>
---
drivers/net/ethernet/intel/ice/ice.h | 11 +++++++++--
drivers/net/ethernet/intel/ice/ice_lib.c | 5 +++--
drivers/net/ethernet/intel/ice/ice_main.c | 22 ++++++++++++----------
3 files changed, 24 insertions(+), 14 deletions(-)
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index d4d840729bda..b91b2594b29d 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -930,9 +930,16 @@ int ice_down(struct ice_vsi *vsi);
int ice_down_up(struct ice_vsi *vsi);
int ice_vsi_cfg_lan(struct ice_vsi *vsi);
struct ice_vsi *ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi);
+
+enum ice_xdp_cfg {
+ ICE_XDP_CFG_FULL, /* Fully apply new config in .ndo_bpf() */
+ ICE_XDP_CFG_PART, /* Save/use part of config in VSI rebuild */
+};
+
int ice_vsi_determine_xdp_res(struct ice_vsi *vsi);
-int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog);
-int ice_destroy_xdp_rings(struct ice_vsi *vsi);
+int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog,
+ enum ice_xdp_cfg cfg_type);
+int ice_destroy_xdp_rings(struct ice_vsi *vsi, enum ice_xdp_cfg cfg_type);
int
ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
u32 flags);
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index c0a7ff6c7e87..dd8b374823ee 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -2285,7 +2285,8 @@ static int ice_vsi_cfg_def(struct ice_vsi *vsi)
ret = ice_vsi_determine_xdp_res(vsi);
if (ret)
goto unroll_vector_base;
- ret = ice_prepare_xdp_rings(vsi, vsi->xdp_prog);
+ ret = ice_prepare_xdp_rings(vsi, vsi->xdp_prog,
+ ICE_XDP_CFG_PART);
if (ret)
goto unroll_vector_base;
}
@@ -2429,7 +2430,7 @@ void ice_vsi_decfg(struct ice_vsi *vsi)
/* return value check can be skipped here, it always returns
* 0 if reset is in progress
*/
- ice_destroy_xdp_rings(vsi);
+ ice_destroy_xdp_rings(vsi, ICE_XDP_CFG_PART);
ice_vsi_clear_rings(vsi);
ice_vsi_free_q_vectors(vsi);
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index f60c022f7960..2a270aacd24a 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -2711,10 +2711,12 @@ static void ice_vsi_assign_bpf_prog(struct ice_vsi *vsi, struct bpf_prog *prog)
* ice_prepare_xdp_rings - Allocate, configure and setup Tx rings for XDP
* @vsi: VSI to bring up Tx rings used by XDP
* @prog: bpf program that will be assigned to VSI
+ * @cfg_type: create from scratch or restore the existing configuration
*
* Return 0 on success and negative value on error
*/
-int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog)
+int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog,
+ enum ice_xdp_cfg cfg_type)
{
u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
int xdp_rings_rem = vsi->num_xdp_txq;
@@ -2790,7 +2792,7 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog)
* taken into account at the end of ice_vsi_rebuild, where
* ice_cfg_vsi_lan is being called
*/
- if (ice_is_reset_in_progress(pf->state))
+ if (cfg_type == ICE_XDP_CFG_PART)
return 0;
/* tell the Tx scheduler that right now we have
@@ -2842,22 +2844,21 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog)
/**
* ice_destroy_xdp_rings - undo the configuration made by ice_prepare_xdp_rings
* @vsi: VSI to remove XDP rings
+ * @cfg_type: disable XDP permanently or allow it to be restored later
*
* Detach XDP rings from irq vectors, clean up the PF bitmap and free
* resources
*/
-int ice_destroy_xdp_rings(struct ice_vsi *vsi)
+int ice_destroy_xdp_rings(struct ice_vsi *vsi, enum ice_xdp_cfg cfg_type)
{
u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
struct ice_pf *pf = vsi->back;
int i, v_idx;
/* q_vectors are freed in reset path so there's no point in detaching
- * rings; in case of rebuild being triggered not from reset bits
- * in pf->state won't be set, so additionally check first q_vector
- * against NULL
+ * rings
*/
- if (ice_is_reset_in_progress(pf->state) || !vsi->q_vectors[0])
+ if (cfg_type == ICE_XDP_CFG_PART)
goto free_qmap;
ice_for_each_q_vector(vsi, v_idx) {
@@ -2898,7 +2899,7 @@ int ice_destroy_xdp_rings(struct ice_vsi *vsi)
if (static_key_enabled(&ice_xdp_locking_key))
static_branch_dec(&ice_xdp_locking_key);
- if (ice_is_reset_in_progress(pf->state) || !vsi->q_vectors[0])
+ if (cfg_type == ICE_XDP_CFG_PART)
return 0;
ice_vsi_assign_bpf_prog(vsi, NULL);
@@ -3009,7 +3010,8 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
if (xdp_ring_err) {
NL_SET_ERR_MSG_MOD(extack, "Not enough Tx resources for XDP");
} else {
- xdp_ring_err = ice_prepare_xdp_rings(vsi, prog);
+ xdp_ring_err = ice_prepare_xdp_rings(vsi, prog,
+ ICE_XDP_CFG_FULL);
if (xdp_ring_err)
NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Tx resources failed");
}
@@ -3020,7 +3022,7 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Rx resources failed");
} else if (ice_is_xdp_ena_vsi(vsi) && !prog) {
xdp_features_clear_redirect_target(vsi->netdev);
- xdp_ring_err = ice_destroy_xdp_rings(vsi);
+ xdp_ring_err = ice_destroy_xdp_rings(vsi, ICE_XDP_CFG_FULL);
if (xdp_ring_err)
NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Tx resources failed");
/* reallocate Rx queues that were used for zero-copy */
--
2.43.0
On Wed, May 15, 2024 at 06:02:16PM +0200, Larysa Zaremba wrote:
> ice_pf_dcb_recfg() re-maps queues to vectors with
> ice_vsi_map_rings_to_vectors(), which does not restore the previous
> state for XDP queues. This leads to no AF_XDP traffic after rebuild.
>
> Map XDP queues to vectors in ice_vsi_map_rings_to_vectors().
> Also, move the code around, so XDP queues are mapped independently only
> through .ndo_bpf().
Hi Larysa,
I take it the last sentence refers to the placement of ice_map_xdp_rings()
in ice_prepare_xdp_rings() after rather than before the
(cfg_type == ICE_XDP_CFG_PART) condition.
If so, I see that it is a small change. But I do wonder if it is separate
from fixing the issue described in the first paragraph. And thus would
be better as a separate patch.
Also, (I'm raising a separate issue :) breaking out logic into
ice_xdp_ring_from_qid() seems very nice. But I wonder if this ought to be
part of a cleanup-patch for 'iwl' rather than a fixes patch for 'iwl-next'.
OTOH, I do see that breaking out ice_map_xdp_rings() makes sense in the
context of this fix as the same logic is to be called in two places.
Splitting patches aside, the resulting code looks good to me.
..
On Wed, May 15, 2024 at 06:02:15PM +0200, Larysa Zaremba wrote:
> Commit 6624e780a577 ("ice: split ice_vsi_setup into smaller functions")
> has placed ice_vsi_free_q_vectors() after ice_destroy_xdp_rings() in
> the rebuild process. The behaviour of the XDP rings config functions is
> context-dependent, so the change of order has led to
> ice_destroy_xdp_rings() doing additional work and removing XDP prog, when
> it was supposed to be preserved.
>
> Also, dependency on the PF state reset flags creates an additional,
> fortunately less common problem:
>
> * PFR is requested e.g. by tx_timeout handler
> * .ndo_bpf() is asked to delete the program, calls ice_destroy_xdp_rings(),
> but reset flag is set, so rings are destroyed without deleting the
> program
> * ice_vsi_rebuild tries to delete non-existent XDP rings, because the
> program is still on the VSI
> * system crashes
>
> With a similar race, when requested to attach a program,
> ice_prepare_xdp_rings() can actually skip setting the program in the VSI
> and nevertheless report success.
>
> Instead of reverting to the old order of function calls, add an enum
> argument to both ice_prepare_xdp_rings() and ice_destroy_xdp_rings() in
> order to distinguish between calls from rebuild and .ndo_bpf().
>
> Fixes: efc2214b6047 ("ice: Add support for XDP")
> Reviewed-by: Igor Bagnucki <[email protected]>
> Signed-off-by: Larysa Zaremba <[email protected]>
Reviewed-by: Simon Horman <[email protected]>
On Wed, May 15, 2024 at 06:02:14PM +0200, Larysa Zaremba wrote:
> Referenced commit has introduced a bitmap to distinguish between ZC and
> copy-mode AF_XDP queues, because xsk_get_pool_from_qid() does not do this
> for us.
>
> The bitmap would be especially useful when restoring previous state after
> rebuild, if only it was not reallocated in the process. This leads to e.g.
> xdpsock dying after changing number of queues.
>
> Instead of preserving the bitmap during the rebuild, remove it completely
> and distinguish between ZC and copy-mode queues based on the presence of
> a device associated with the pool.
>
> Fixes: e102db780e1c ("ice: track AF_XDP ZC enabled queues in bitmap")
> Reviewed-by: Przemek Kitszel <[email protected]>
> Signed-off-by: Larysa Zaremba <[email protected]>
Reviewed-by: Simon Horman <[email protected]>
This patch also fixes an issue when XDP programs become detached from the RX rings on channel number reconfiguration
Regards,
Sergey
On Thu, May 16, 2024 at 09:27:13AM +0100, Simon Horman wrote:
> On Wed, May 15, 2024 at 06:02:16PM +0200, Larysa Zaremba wrote:
> > ice_pf_dcb_recfg() re-maps queues to vectors with
> > ice_vsi_map_rings_to_vectors(), which does not restore the previous
> > state for XDP queues. This leads to no AF_XDP traffic after rebuild.
> >
> > Map XDP queues to vectors in ice_vsi_map_rings_to_vectors().
> > Also, move the code around, so XDP queues are mapped independently only
> > through .ndo_bpf().
>
> Hi Larysa,
>
> I take it the last sentence refers to the placement of ice_map_xdp_rings()
> in ice_prepare_xdp_rings() after rather than before the
> (cfg_type == ICE_XDP_CFG_PART) condition.
>
> If so, I see that it is a small change. But I do wonder if it is separate
> from fixing the issue described in the first paragraph. And thus would
> be better as a separate patch.
This is not neccessary for the fix to work, but I think this is intergral to
making the change properly. I mean, before the change in the rebuild path we map
XDP rings to vectors only once and after the change we do this only once, just
previously it was in ice_prepare_xdp_rings() and now it is in
ice_vsi_map_rings_to_vectors().
>
> Also, (I'm raising a separate issue :) breaking out logic into
> ice_xdp_ring_from_qid() seems very nice. But I wonder if this ought to be
> part of a cleanup-patch for 'iwl' rather than a fixes patch for 'iwl-next'.
>
I have separated this into a separate function, because 2 lines exceeded 80
characters, which is not in line with our current style for drivers.
And I do not think that this small function creates any more additional
potentian applying problems for this patch. And the change is small enough to
see that the logic stays the same.
> OTOH, I do see that breaking out ice_map_xdp_rings() makes sense in the
> context of this fix as the same logic is to be called in two places.
>
> Splitting patches aside, the resulting code looks good to me.
>
> ...
>
On Thu, May 16, 2024 at 01:43:18PM +0200, Larysa Zaremba wrote:
> On Thu, May 16, 2024 at 09:27:13AM +0100, Simon Horman wrote:
> > On Wed, May 15, 2024 at 06:02:16PM +0200, Larysa Zaremba wrote:
> > > ice_pf_dcb_recfg() re-maps queues to vectors with
> > > ice_vsi_map_rings_to_vectors(), which does not restore the previous
> > > state for XDP queues. This leads to no AF_XDP traffic after rebuild.
> > >
> > > Map XDP queues to vectors in ice_vsi_map_rings_to_vectors().
> > > Also, move the code around, so XDP queues are mapped independently only
> > > through .ndo_bpf().
> >
> > Hi Larysa,
> >
> > I take it the last sentence refers to the placement of ice_map_xdp_rings()
> > in ice_prepare_xdp_rings() after rather than before the
> > (cfg_type == ICE_XDP_CFG_PART) condition.
> >
> > If so, I see that it is a small change. But I do wonder if it is separate
> > from fixing the issue described in the first paragraph. And thus would
> > be better as a separate patch.
>
> This is not neccessary for the fix to work, but I think this is intergral to
> making the change properly. I mean, before the change in the rebuild path we map
> XDP rings to vectors only once and after the change we do this only once, just
> previously it was in ice_prepare_xdp_rings() and now it is in
> ice_vsi_map_rings_to_vectors().
>
> >
> > Also, (I'm raising a separate issue :) breaking out logic into
> > ice_xdp_ring_from_qid() seems very nice. But I wonder if this ought to be
> > part of a cleanup-patch for 'iwl' rather than a fixes patch for 'iwl-next'.
> >
>
> I have separated this into a separate function, because 2 lines exceeded 80
> characters, which is not in line with our current style for drivers.
> And I do not think that this small function creates any more additional
> potentian applying problems for this patch. And the change is small enough to
> see that the logic stays the same.
>
> > OTOH, I do see that breaking out ice_map_xdp_rings() makes sense in the
> > context of this fix as the same logic is to be called in two places.
> >
> > Splitting patches aside, the resulting code looks good to me.
> >
> > ...
Hi Larysa,
Thanks for your explanation, this all seems reasonable to me.
Reviewed-by: Simon Horman <[email protected]>
>-----Original Message-----
>From: Intel-wired-lan <[email protected]> On Behalf Of
>Zaremba, Larysa
>Sent: Wednesday, May 15, 2024 9:32 PM
>To: [email protected]; Keller, Jacob E <[email protected]>
>Cc: Fijalkowski, Maciej <[email protected]>; Jesper Dangaard Brouer
><[email protected]>; Daniel Borkmann <[email protected]>; Zaremba,
>Larysa <[email protected]>; Kitszel, Przemyslaw
><[email protected]>; John Fastabend
><[email protected]>; Alexei Starovoitov <[email protected]>; David S.
>Miller <[email protected]>; Eric Dumazet <[email protected]>;
>[email protected]; Jakub Kicinski <[email protected]>;
>[email protected]; Paolo Abeni <[email protected]>; Magnus Karlsson
><[email protected]>; Bagnucki, Igor <[email protected]>;
>[email protected]
>Subject: [Intel-wired-lan] [PATCH iwl-net 1/3] ice: remove af_xdp_zc_qps
>bitmap
>
>Referenced commit has introduced a bitmap to distinguish between ZC and
>copy-mode AF_XDP queues, because xsk_get_pool_from_qid() does not do
>this for us.
>
>The bitmap would be especially useful when restoring previous state after
>rebuild, if only it was not reallocated in the process. This leads to e.g.
>xdpsock dying after changing number of queues.
>
>Instead of preserving the bitmap during the rebuild, remove it completely and
>distinguish between ZC and copy-mode queues based on the presence of a
>device associated with the pool.
>
>Fixes: e102db780e1c ("ice: track AF_XDP ZC enabled queues in bitmap")
>Reviewed-by: Przemek Kitszel <[email protected]>
>Signed-off-by: Larysa Zaremba <[email protected]>
>---
> drivers/net/ethernet/intel/ice/ice.h | 32 ++++++++++++++++--------
> drivers/net/ethernet/intel/ice/ice_lib.c | 8 ------
>drivers/net/ethernet/intel/ice/ice_xsk.c | 13 +++++-----
> 3 files changed, 27 insertions(+), 26 deletions(-)
>
Tested-by: Chandan Kumar Rout <[email protected]> (A Contingent Worker at Intel)
>-----Original Message-----
>From: Intel-wired-lan <[email protected]> On Behalf Of
>Zaremba, Larysa
>Sent: Wednesday, May 15, 2024 9:32 PM
>To: [email protected]; Keller, Jacob E <[email protected]>
>Cc: Fijalkowski, Maciej <[email protected]>; Jesper Dangaard Brouer
><[email protected]>; Daniel Borkmann <[email protected]>; Zaremba,
>Larysa <[email protected]>; Kitszel, Przemyslaw
><[email protected]>; John Fastabend
><[email protected]>; Alexei Starovoitov <[email protected]>; David S.
>Miller <[email protected]>; Eric Dumazet <[email protected]>;
>[email protected]; Jakub Kicinski <[email protected]>;
>[email protected]; Paolo Abeni <[email protected]>; Magnus Karlsson
><[email protected]>; Bagnucki, Igor <[email protected]>;
>[email protected]
>Subject: [Intel-wired-lan] [PATCH iwl-net 2/3] ice: add flag to distinguish reset
>from .ndo_bpf in XDP rings config
>
>Commit 6624e780a577 ("ice: split ice_vsi_setup into smaller functions") has
>placed ice_vsi_free_q_vectors() after ice_destroy_xdp_rings() in the rebuild
>process. The behaviour of the XDP rings config functions is context-dependent,
>so the change of order has led to
>ice_destroy_xdp_rings() doing additional work and removing XDP prog, when it
>was supposed to be preserved.
>
>Also, dependency on the PF state reset flags creates an additional, fortunately
>less common problem:
>
>* PFR is requested e.g. by tx_timeout handler
>* .ndo_bpf() is asked to delete the program, calls ice_destroy_xdp_rings(),
> but reset flag is set, so rings are destroyed without deleting the
> program
>* ice_vsi_rebuild tries to delete non-existent XDP rings, because the
> program is still on the VSI
>* system crashes
>
>With a similar race, when requested to attach a program,
>ice_prepare_xdp_rings() can actually skip setting the program in the VSI and
>nevertheless report success.
>
>Instead of reverting to the old order of function calls, add an enum argument
>to both ice_prepare_xdp_rings() and ice_destroy_xdp_rings() in order to
>distinguish between calls from rebuild and .ndo_bpf().
>
>Fixes: efc2214b6047 ("ice: Add support for XDP")
>Reviewed-by: Igor Bagnucki <[email protected]>
>Signed-off-by: Larysa Zaremba <[email protected]>
>---
> drivers/net/ethernet/intel/ice/ice.h | 11 +++++++++--
> drivers/net/ethernet/intel/ice/ice_lib.c | 5 +++--
>drivers/net/ethernet/intel/ice/ice_main.c | 22 ++++++++++++----------
> 3 files changed, 24 insertions(+), 14 deletions(-)
>
Tested-by: Chandan Kumar Rout <[email protected]> (A Contingent Worker at Intel)
>-----Original Message-----
>From: Intel-wired-lan <[email protected]> On Behalf Of
>Zaremba, Larysa
>Sent: Wednesday, May 15, 2024 9:32 PM
>To: [email protected]; Keller, Jacob E <[email protected]>
>Cc: Fijalkowski, Maciej <[email protected]>; Jesper Dangaard Brouer
><[email protected]>; Daniel Borkmann <[email protected]>; Zaremba,
>Larysa <[email protected]>; Kitszel, Przemyslaw
><[email protected]>; John Fastabend
><[email protected]>; Alexei Starovoitov <[email protected]>; David S.
>Miller <[email protected]>; Eric Dumazet <[email protected]>;
>[email protected]; Jakub Kicinski <[email protected]>;
>[email protected]; Paolo Abeni <[email protected]>; Magnus Karlsson
><[email protected]>; Bagnucki, Igor <[email protected]>;
>[email protected]
>Subject: [Intel-wired-lan] [PATCH iwl-net 3/3] ice: map XDP queues to vectors
>in ice_vsi_map_rings_to_vectors()
>
>ice_pf_dcb_recfg() re-maps queues to vectors with
>ice_vsi_map_rings_to_vectors(), which does not restore the previous state for
>XDP queues. This leads to no AF_XDP traffic after rebuild.
>
>Map XDP queues to vectors in ice_vsi_map_rings_to_vectors().
>Also, move the code around, so XDP queues are mapped independently only
>through .ndo_bpf().
>
>Fixes: 6624e780a577 ("ice: split ice_vsi_setup into smaller functions")
>Reviewed-by: Przemek Kitszel <[email protected]>
>Signed-off-by: Larysa Zaremba <[email protected]>
>---
> drivers/net/ethernet/intel/ice/ice.h | 1 +
> drivers/net/ethernet/intel/ice/ice_base.c | 3 +
>drivers/net/ethernet/intel/ice/ice_lib.c | 14 ++--
>drivers/net/ethernet/intel/ice/ice_main.c | 96 ++++++++++++++---------
> 4 files changed, 68 insertions(+), 46 deletions(-)
>
Tested-by: Chandan Kumar Rout <[email protected]> (A Contingent Worker at Intel)