On 2020-07-25 10:16, Hillf Danton wrote:
> Hi folks
>
> Below is a minimunm poc implementation I can imagine on top of workqueue
> to make napi threaded. Thoughts are appreciated.
Hi Hillf,
For some reason I don't see your mails on linux-wireless/netdev.
I've cleaned up your implementation a bit and I ran some tests with mt76
on an mt7621 embedded board. The results look pretty nice, performance
is a lot more consistent in my tests now.
Here are the changes that I've made compared to your version:
- remove the #ifdef, I think it's unnecessary
- add a state bit for threaded NAPI
- make netif_threaded_napi_add inline
- run queue_work outside of local_irq_save/restore (it does that
internally already)
If you don't mind, I'd like to propose this to netdev soon. Can I have
your Signed-off-by for that?
Thanks,
- Felix
---
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -347,6 +347,7 @@ struct napi_struct {
struct list_head dev_list;
struct hlist_node napi_hash_node;
unsigned int napi_id;
+ struct work_struct work;
};
enum {
@@ -357,6 +358,7 @@ enum {
NAPI_STATE_HASHED, /* In NAPI hash (busy polling possible) */
NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */
NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */
+ NAPI_STATE_THREADED, /* Use threaded NAPI */
};
enum {
@@ -367,6 +369,7 @@ enum {
NAPIF_STATE_HASHED = BIT(NAPI_STATE_HASHED),
NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL),
NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL),
+ NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED),
};
enum gro_result {
@@ -2315,6 +2318,26 @@ static inline void *netdev_priv(const struct net_device *dev)
void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
int (*poll)(struct napi_struct *, int), int weight);
+/**
+ * netif_threaded_napi_add - initialize a NAPI context
+ * @dev: network device
+ * @napi: NAPI context
+ * @poll: polling function
+ * @weight: default weight
+ *
+ * This variant of netif_napi_add() should be used from drivers using NAPI
+ * with CPU intensive poll functions.
+ * This will schedule polling from a high priority workqueue that
+ */
+static inline void netif_threaded_napi_add(struct net_device *dev,
+ struct napi_struct *napi,
+ int (*poll)(struct napi_struct *, int),
+ int weight)
+{
+ set_bit(NAPI_STATE_THREADED, &napi->state);
+ netif_napi_add(dev, napi, poll, weight);
+}
+
/**
* netif_tx_napi_add - initialize a NAPI context
* @dev: network device
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -158,6 +158,7 @@ static DEFINE_SPINLOCK(offload_lock);
struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
struct list_head ptype_all __read_mostly; /* Taps */
static struct list_head offload_base __read_mostly;
+static struct workqueue_struct *napi_workq;
static int netif_rx_internal(struct sk_buff *skb);
static int call_netdevice_notifiers_info(unsigned long val,
@@ -6286,6 +6287,11 @@ void __napi_schedule(struct napi_struct *n)
{
unsigned long flags;
+ if (test_bit(NAPI_STATE_THREADED, &n->state)) {
+ queue_work(napi_workq, &n->work);
+ return;
+ }
+
local_irq_save(flags);
____napi_schedule(this_cpu_ptr(&softnet_data), n);
local_irq_restore(flags);
@@ -6333,6 +6339,11 @@ EXPORT_SYMBOL(napi_schedule_prep);
*/
void __napi_schedule_irqoff(struct napi_struct *n)
{
+ if (test_bit(NAPI_STATE_THREADED, &n->state)) {
+ queue_work(napi_workq, &n->work);
+ return;
+ }
+
____napi_schedule(this_cpu_ptr(&softnet_data), n);
}
EXPORT_SYMBOL(__napi_schedule_irqoff);
@@ -6601,6 +6612,29 @@ static void init_gro_hash(struct napi_struct *napi)
napi->gro_bitmask = 0;
}
+static void napi_workfn(struct work_struct *work)
+{
+ struct napi_struct *n = container_of(work, struct napi_struct, work);
+
+ for (;;) {
+ if (!test_bit(NAPI_STATE_SCHED, &n->state))
+ return;
+
+ if (n->poll(n, n->weight) < n->weight)
+ return;
+
+ if (need_resched()) {
+ /*
+ * have to pay for the latency of task switch even if
+ * napi is scheduled
+ */
+ if (test_bit(NAPI_STATE_SCHED, &n->state))
+ queue_work(napi_workq, work);
+ return;
+ }
+ }
+}
+
void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
int (*poll)(struct napi_struct *, int), int weight)
{
@@ -6621,6 +6655,7 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
#ifdef CONFIG_NETPOLL
napi->poll_owner = -1;
#endif
+ INIT_WORK(&napi->work, napi_workfn);
set_bit(NAPI_STATE_SCHED, &napi->state);
napi_hash_add(napi);
}
@@ -10676,6 +10711,10 @@ static int __init net_dev_init(void)
sd->backlog.weight = weight_p;
}
+ napi_workq = alloc_workqueue("napi_workq", WQ_UNBOUND | WQ_HIGHPRI,
+ WQ_UNBOUND_MAX_ACTIVE);
+ BUG_ON(!napi_workq);
+
dev_boot_phase = 0;
/* The loopback device is special if any other network devices