Currently, all netlink protocol handlers for updating rules, actions and
qdiscs are protected with single global rtnl lock which removes any
possibility for parallelism. This patch set is a first step to remove
rtnl lock dependency from TC rules update path. It updates act API to
use atomic operations, rcu and spinlocks for fine-grained locking. It
also extend API with functions that are needed to update existing
actions for parallel execution.
Outline of changes:
- Change tc action to use atomic reference and bind counters, rcu
mechanism for cookie update.
- Extend action ops API with 'delete' function and 'unlocked' flag.
- Change action API to work with actions in lockless manner based on
primitives implemented in previous patches.
- Extend action API with new functions necessary to implement unlocked
actions.
Vlad Buslov (14):
net: sched: use rcu for action cookie update
net: sched: change type of reference and bind counters
net: sched: add 'delete' function to action ops
net: sched: implement unlocked action init API
net: sched: always take reference to action
net: sched: implement reference counted action release
net: sched: use reference counting action init
net: sched: account for temporary action reference
net: sched: don't release reference on action overwrite
net: sched: extend act API for lockless actions
net: core: add new/replace rate estimator lock parameter
net: sched: retry action check-insert on concurrent modification
net: sched: use unique idr insert function in unlocked actions
net: sched: implement delete for all actions
include/net/act_api.h | 16 ++-
include/net/gen_stats.h | 2 +
include/net/pkt_cls.h | 1 +
net/core/gen_estimator.c | 58 ++++++---
net/netfilter/xt_RATEEST.c | 2 +-
net/sched/act_api.c | 298 ++++++++++++++++++++++++++++++++-------------
net/sched/act_bpf.c | 33 +++--
net/sched/act_connmark.c | 29 +++--
net/sched/act_csum.c | 33 +++--
net/sched/act_gact.c | 30 +++--
net/sched/act_ife.c | 37 ++++--
net/sched/act_ipt.c | 41 +++++--
net/sched/act_mirred.c | 32 +++--
net/sched/act_nat.c | 29 +++--
net/sched/act_pedit.c | 30 +++--
net/sched/act_police.c | 35 ++++--
net/sched/act_sample.c | 33 +++--
net/sched/act_simple.c | 31 +++--
net/sched/act_skbedit.c | 30 +++--
net/sched/act_skbmod.c | 33 +++--
net/sched/act_tunnel_key.c | 34 ++++--
net/sched/act_vlan.c | 34 ++++--
net/sched/cls_api.c | 6 +-
net/sched/sch_api.c | 2 +
net/sched/sch_cbq.c | 4 +-
net/sched/sch_drr.c | 4 +-
net/sched/sch_hfsc.c | 4 +-
net/sched/sch_htb.c | 4 +-
net/sched/sch_qfq.c | 4 +-
29 files changed, 673 insertions(+), 256 deletions(-)
--
2.7.5
Implement functions to atomically update and free action cookie
using rcu mechanism.
Signed-off-by: Vlad Buslov <[email protected]>
---
include/net/act_api.h | 2 +-
include/net/pkt_cls.h | 1 +
net/sched/act_api.c | 44 ++++++++++++++++++++++++++++++--------------
3 files changed, 32 insertions(+), 15 deletions(-)
diff --git a/include/net/act_api.h b/include/net/act_api.h
index 9e59ebf..f7b59ef 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -37,7 +37,7 @@ struct tc_action {
spinlock_t tcfa_lock;
struct gnet_stats_basic_cpu __percpu *cpu_bstats;
struct gnet_stats_queue __percpu *cpu_qstats;
- struct tc_cookie *act_cookie;
+ struct tc_cookie __rcu *act_cookie;
struct tcf_chain *goto_chain;
};
#define tcf_index common.tcfa_index
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index e828d31..3068cc8 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -769,6 +769,7 @@ struct tc_mqprio_qopt_offload {
struct tc_cookie {
u8 *data;
u32 len;
+ struct rcu_head rcu;
};
struct tc_qopt_offload_stats {
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 72251241..ce829a3 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -55,6 +55,24 @@ static void tcf_action_goto_chain_exec(const struct tc_action *a,
res->goto_tp = rcu_dereference_bh(chain->filter_chain);
}
+static void tcf_free_cookie_rcu(struct rcu_head *p)
+{
+ struct tc_cookie *cookie = container_of(p, struct tc_cookie, rcu);
+
+ kfree(cookie->data);
+ kfree(cookie);
+}
+
+static void tcf_set_action_cookie(struct tc_cookie __rcu **old_cookie,
+ struct tc_cookie *new_cookie)
+{
+ struct tc_cookie *old;
+
+ old = xchg(old_cookie, new_cookie);
+ if (old)
+ call_rcu(&old->rcu, tcf_free_cookie_rcu);
+}
+
/* XXX: For standalone actions, we don't need a RCU grace period either, because
* actions are always connected to filters and filters are already destroyed in
* RCU callbacks, so after a RCU grace period actions are already disconnected
@@ -65,10 +83,7 @@ static void free_tcf(struct tc_action *p)
free_percpu(p->cpu_bstats);
free_percpu(p->cpu_qstats);
- if (p->act_cookie) {
- kfree(p->act_cookie->data);
- kfree(p->act_cookie);
- }
+ tcf_set_action_cookie(&p->act_cookie, NULL);
if (p->goto_chain)
tcf_action_goto_chain_fini(p);
@@ -567,16 +582,22 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
int err = -EINVAL;
unsigned char *b = skb_tail_pointer(skb);
struct nlattr *nest;
+ struct tc_cookie *cookie;
if (nla_put_string(skb, TCA_KIND, a->ops->kind))
goto nla_put_failure;
if (tcf_action_copy_stats(skb, a, 0))
goto nla_put_failure;
- if (a->act_cookie) {
- if (nla_put(skb, TCA_ACT_COOKIE, a->act_cookie->len,
- a->act_cookie->data))
+
+ rcu_read_lock();
+ cookie = rcu_dereference(a->act_cookie);
+ if (cookie) {
+ if (nla_put(skb, TCA_ACT_COOKIE, cookie->len, cookie->data)) {
+ rcu_read_unlock();
goto nla_put_failure;
+ }
}
+ rcu_read_unlock();
nest = nla_nest_start(skb, TCA_OPTIONS);
if (nest == NULL)
@@ -719,13 +740,8 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
if (err < 0)
goto err_mod;
- if (name == NULL && tb[TCA_ACT_COOKIE]) {
- if (a->act_cookie) {
- kfree(a->act_cookie->data);
- kfree(a->act_cookie);
- }
- a->act_cookie = cookie;
- }
+ if (!name && tb[TCA_ACT_COOKIE])
+ tcf_set_action_cookie(&a->act_cookie, cookie);
/* module count goes up only when brand new policy is created
* if it exists and is only bound to in a_o->init() then
--
2.7.5
Retry check-insert sequence in action init functions if action with same
index was inserted concurrently.
Signed-off-by: Vlad Buslov <[email protected]>
---
net/sched/act_bpf.c | 8 +++++++-
net/sched/act_connmark.c | 8 +++++++-
net/sched/act_csum.c | 8 +++++++-
net/sched/act_gact.c | 8 +++++++-
net/sched/act_ife.c | 8 +++++++-
net/sched/act_ipt.c | 8 +++++++-
net/sched/act_mirred.c | 8 +++++++-
net/sched/act_nat.c | 8 +++++++-
net/sched/act_pedit.c | 8 +++++++-
net/sched/act_police.c | 9 ++++++++-
net/sched/act_sample.c | 8 +++++++-
net/sched/act_simple.c | 9 ++++++++-
net/sched/act_skbedit.c | 8 +++++++-
net/sched/act_skbmod.c | 8 +++++++-
net/sched/act_tunnel_key.c | 9 ++++++++-
net/sched/act_vlan.c | 9 ++++++++-
16 files changed, 116 insertions(+), 16 deletions(-)
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 5554bf7..7e20fdc 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -299,10 +299,16 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
parm = nla_data(tb[TCA_ACT_BPF_PARMS]);
+replay:
if (!tcf_idr_check(tn, parm->index, act, bind)) {
ret = tcf_idr_create(tn, parm->index, est, act,
&act_bpf_ops, bind, true);
- if (ret < 0)
+ /* Action with specified index was created concurrently.
+ * Check again.
+ */
+ if (parm->index && ret == -ENOSPC)
+ goto replay;
+ else if (ret)
return ret;
res = ACT_P_CREATED;
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 2a4c3da..6ff45af 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -118,10 +118,16 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
parm = nla_data(tb[TCA_CONNMARK_PARMS]);
+replay:
if (!tcf_idr_check(tn, parm->index, a, bind)) {
ret = tcf_idr_create(tn, parm->index, est, a,
&act_connmark_ops, bind, false);
- if (ret)
+ /* Action with specified index was created concurrently.
+ * Check again.
+ */
+ if (parm->index && ret == -ENOSPC)
+ goto replay;
+ else if (ret)
return ret;
ci = to_connmark(*a);
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 74f5dce..49d06c3 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -67,10 +67,16 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
return -EINVAL;
parm = nla_data(tb[TCA_CSUM_PARMS]);
+replay:
if (!tcf_idr_check(tn, parm->index, a, bind)) {
ret = tcf_idr_create(tn, parm->index, est, a,
&act_csum_ops, bind, true);
- if (ret)
+ /* Action with specified index was created concurrently.
+ * Check again.
+ */
+ if (parm->index && ret == -ENOSPC)
+ goto replay;
+ else if (ret)
return ret;
ret = ACT_P_CREATED;
} else {
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 9d7d000..2edefeb 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -91,10 +91,16 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
}
#endif
+replay:
if (!tcf_idr_check(tn, parm->index, a, bind)) {
ret = tcf_idr_create(tn, parm->index, est, a,
&act_gact_ops, bind, true);
- if (ret)
+ /* Action with specified index was created concurrently.
+ * Check again.
+ */
+ if (parm->index && ret == -ENOSPC)
+ goto replay;
+ else if (ret)
return ret;
ret = ACT_P_CREATED;
} else {
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index b57c5ba..665790f 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -483,6 +483,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
if (!p)
return -ENOMEM;
+replay:
exists = tcf_idr_check(tn, parm->index, a, bind);
if (exists && bind) {
kfree(p);
@@ -492,7 +493,12 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
if (!exists) {
ret = tcf_idr_create(tn, parm->index, est, a, &act_ife_ops,
bind, true);
- if (ret) {
+ /* Action with specified index was created concurrently.
+ * Check again.
+ */
+ if (parm->index && ret == -ENOSPC) {
+ goto replay;
+ } else if (ret) {
kfree(p);
return ret;
}
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 7c26ce1..946193e 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -119,6 +119,7 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
if (tb[TCA_IPT_INDEX] != NULL)
index = nla_get_u32(tb[TCA_IPT_INDEX]);
+replay:
exists = tcf_idr_check(tn, index, a, bind);
if (exists && bind)
return 0;
@@ -139,7 +140,12 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
if (!exists) {
ret = tcf_idr_create(tn, index, est, a, ops, bind,
false);
- if (ret)
+ /* Action with specified index was created concurrently.
+ * Check again.
+ */
+ if (index && ret == -ENOSPC)
+ goto replay;
+ else if (ret)
return ret;
ret = ACT_P_CREATED;
} else {
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index b9b7b96..4c8bd26 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -94,6 +94,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
}
parm = nla_data(tb[TCA_MIRRED_PARMS]);
+replay:
exists = tcf_idr_check(tn, parm->index, a, bind);
if (exists && bind)
return 0;
@@ -129,7 +130,12 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
}
ret = tcf_idr_create(tn, parm->index, est, a,
&act_mirred_ops, bind, true);
- if (ret)
+ /* Action with specified index was created concurrently.
+ * Check again.
+ */
+ if (parm->index && ret == -ENOSPC)
+ goto replay;
+ else if (ret)
return ret;
ret = ACT_P_CREATED;
} else if (!ovr) {
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 77badb2..a1a1885 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -57,10 +57,16 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
return -EINVAL;
parm = nla_data(tb[TCA_NAT_PARMS]);
+replay:
if (!tcf_idr_check(tn, parm->index, a, bind)) {
ret = tcf_idr_create(tn, parm->index, est, a,
&act_nat_ops, bind, false);
- if (ret)
+ /* Action with specified index was created concurrently.
+ * Check again.
+ */
+ if (parm->index && ret == -ENOSPC)
+ goto replay;
+ else if (ret)
return ret;
ret = ACT_P_CREATED;
} else {
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 8c39adc..e5e93e2 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -167,12 +167,18 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
if (IS_ERR(keys_ex))
return PTR_ERR(keys_ex);
+replay:
if (!tcf_idr_check(tn, parm->index, a, bind)) {
if (!parm->nkeys)
return -EINVAL;
ret = tcf_idr_create(tn, parm->index, est, a,
&act_pedit_ops, bind, false);
- if (ret)
+ /* Action with specified index was created concurrently.
+ * Check again.
+ */
+ if (parm->index && ret == -ENOSPC)
+ goto replay;
+ else if (ret)
return ret;
p = to_pedit(*a);
keys = kmalloc(ksize, GFP_KERNEL);
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index c480d68..ced6b1f 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -101,6 +101,8 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla,
return -EINVAL;
parm = nla_data(tb[TCA_POLICE_TBF]);
+
+replay:
exists = tcf_idr_check(tn, parm->index, a, bind);
if (exists && bind)
return 0;
@@ -108,7 +110,12 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla,
if (!exists) {
ret = tcf_idr_create(tn, parm->index, NULL, a,
&act_police_ops, bind, false);
- if (ret)
+ /* Action with specified index was created concurrently.
+ * Check again.
+ */
+ if (parm->index && ret == -ENOSPC)
+ goto replay;
+ else if (ret)
return ret;
ret = ACT_P_CREATED;
} else if (!ovr) {
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index d2b0394..7411805 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -59,6 +59,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
parm = nla_data(tb[TCA_SAMPLE_PARMS]);
+replay:
exists = tcf_idr_check(tn, parm->index, a, bind);
if (exists && bind)
return 0;
@@ -66,7 +67,12 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
if (!exists) {
ret = tcf_idr_create(tn, parm->index, est, a,
&act_sample_ops, bind, false);
- if (ret)
+ /* Action with specified index was created concurrently.
+ * Check again.
+ */
+ if (parm->index && ret == -ENOSPC)
+ goto replay;
+ else if (ret)
return ret;
ret = ACT_P_CREATED;
} else if (!ovr) {
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 26eb153..a4b2aca 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -101,6 +101,8 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
return -EINVAL;
parm = nla_data(tb[TCA_DEF_PARMS]);
+
+replay:
exists = tcf_idr_check(tn, parm->index, a, bind);
if (exists && bind)
return 0;
@@ -116,7 +118,12 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
if (!exists) {
ret = tcf_idr_create(tn, parm->index, est, a,
&act_simp_ops, bind, false);
- if (ret)
+ /* Action with specified index was created concurrently.
+ * Check again.
+ */
+ if (parm->index && ret == -ENOSPC)
+ goto replay;
+ else if (ret)
return ret;
d = to_defact(*a);
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index bb416b7..7750b77 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -117,6 +117,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
parm = nla_data(tb[TCA_SKBEDIT_PARMS]);
+replay:
exists = tcf_idr_check(tn, parm->index, a, bind);
if (exists && bind)
return 0;
@@ -129,7 +130,12 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
if (!exists) {
ret = tcf_idr_create(tn, parm->index, est, a,
&act_skbedit_ops, bind, false);
- if (ret)
+ /* Action with specified index was created concurrently.
+ * Check again.
+ */
+ if (parm->index && ret == -ENOSPC)
+ goto replay;
+ else if (ret)
return ret;
d = to_skbedit(*a);
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index e1c2e1c..bbc5092 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -128,6 +128,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
if (parm->flags & SKBMOD_F_SWAPMAC)
lflags = SKBMOD_F_SWAPMAC;
+replay:
exists = tcf_idr_check(tn, parm->index, a, bind);
if (exists && bind)
return 0;
@@ -138,7 +139,12 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
if (!exists) {
ret = tcf_idr_create(tn, parm->index, est, a,
&act_skbmod_ops, bind, true);
- if (ret)
+ /* Action with specified index was created concurrently.
+ * Check again.
+ */
+ if (parm->index && ret == -ENOSPC)
+ goto replay;
+ else if (ret)
return ret;
ret = ACT_P_CREATED;
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index d88c151..4367962 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -99,6 +99,8 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
return -EINVAL;
parm = nla_data(tb[TCA_TUNNEL_KEY_PARMS]);
+
+replay:
exists = tcf_idr_check(tn, parm->index, a, bind);
if (exists && bind)
return 0;
@@ -161,7 +163,12 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
if (!exists) {
ret = tcf_idr_create(tn, parm->index, est, a,
&act_tunnel_key_ops, bind, true);
- if (ret)
+ /* Action with specified index was created concurrently.
+ * Check again.
+ */
+ if (parm->index && ret == -ENOSPC)
+ goto replay;
+ else if (ret)
return ret;
ret = ACT_P_CREATED;
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index f747fb6..adc4e6e 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -134,6 +134,8 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
if (!tb[TCA_VLAN_PARMS])
return -EINVAL;
parm = nla_data(tb[TCA_VLAN_PARMS]);
+
+replay:
exists = tcf_idr_check(tn, parm->index, a, bind);
if (exists && bind)
return 0;
@@ -181,7 +183,12 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
if (!exists) {
ret = tcf_idr_create(tn, parm->index, est, a,
&act_vlan_ops, bind, true);
- if (ret)
+ /* Action with specified index was created concurrently.
+ * Check again.
+ */
+ if (parm->index && ret == -ENOSPC)
+ goto replay;
+ else if (ret)
return ret;
ret = ACT_P_CREATED;
--
2.7.5
Implement delete function that is required to delete actions without
holding rtnl lock. Use action API function that atomically deletes action
only if it is still in action idr. This implementation prevents concurrent
threads from deleting same action twice.
Signed-off-by: Vlad Buslov <[email protected]>
---
net/sched/act_bpf.c | 8 ++++++++
net/sched/act_connmark.c | 8 ++++++++
net/sched/act_csum.c | 8 ++++++++
net/sched/act_gact.c | 8 ++++++++
net/sched/act_ife.c | 8 ++++++++
net/sched/act_ipt.c | 16 ++++++++++++++++
net/sched/act_mirred.c | 8 ++++++++
net/sched/act_nat.c | 8 ++++++++
net/sched/act_pedit.c | 8 ++++++++
net/sched/act_police.c | 8 ++++++++
net/sched/act_sample.c | 8 ++++++++
net/sched/act_simple.c | 8 ++++++++
net/sched/act_skbedit.c | 8 ++++++++
net/sched/act_skbmod.c | 8 ++++++++
net/sched/act_tunnel_key.c | 8 ++++++++
net/sched/act_vlan.c | 8 ++++++++
16 files changed, 136 insertions(+)
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 0bf4ecf..36f7f66 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -394,6 +394,13 @@ static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index,
return tcf_idr_search(tn, a, index);
}
+static int tcf_bpf_delete(struct net *net, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, bpf_net_id);
+
+ return tcf_idr_find_delete(tn, index);
+}
+
static struct tc_action_ops act_bpf_ops __read_mostly = {
.kind = "bpf",
.type = TCA_ACT_BPF,
@@ -404,6 +411,7 @@ static struct tc_action_ops act_bpf_ops __read_mostly = {
.init = tcf_bpf_init,
.walk = tcf_bpf_walker,
.lookup = tcf_bpf_search,
+ .delete = tcf_bpf_delete,
.size = sizeof(struct tcf_bpf),
};
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index a4e9f21..346ede7 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -200,6 +200,13 @@ static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index,
return tcf_idr_search(tn, a, index);
}
+static int tcf_connmark_delete(struct net *net, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, connmark_net_id);
+
+ return tcf_idr_find_delete(tn, index);
+}
+
static struct tc_action_ops act_connmark_ops = {
.kind = "connmark",
.type = TCA_ACT_CONNMARK,
@@ -209,6 +216,7 @@ static struct tc_action_ops act_connmark_ops = {
.init = tcf_connmark_init,
.walk = tcf_connmark_walker,
.lookup = tcf_connmark_search,
+ .delete = tcf_connmark_delete,
.size = sizeof(struct tcf_connmark_info),
};
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index d9836d2..b0a244e 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -655,6 +655,13 @@ static int tcf_csum_search(struct net *net, struct tc_action **a, u32 index,
return tcf_idr_search(tn, a, index);
}
+static int tcf_csum_delete(struct net *net, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, csum_net_id);
+
+ return tcf_idr_find_delete(tn, index);
+}
+
static struct tc_action_ops act_csum_ops = {
.kind = "csum",
.type = TCA_ACT_CSUM,
@@ -665,6 +672,7 @@ static struct tc_action_ops act_csum_ops = {
.cleanup = tcf_csum_cleanup,
.walk = tcf_csum_walker,
.lookup = tcf_csum_search,
+ .delete = tcf_csum_delete,
.size = sizeof(struct tcf_csum),
};
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 79266590..f6ff668 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -238,6 +238,13 @@ static size_t tcf_gact_get_fill_size(const struct tc_action *act)
return sz;
}
+static int tcf_gact_delete(struct net *net, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, gact_net_id);
+
+ return tcf_idr_find_delete(tn, index);
+}
+
static struct tc_action_ops act_gact_ops = {
.kind = "gact",
.type = TCA_ACT_GACT,
@@ -249,6 +256,7 @@ static struct tc_action_ops act_gact_ops = {
.walk = tcf_gact_walker,
.lookup = tcf_gact_search,
.get_fill_size = tcf_gact_get_fill_size,
+ .delete = tcf_gact_delete,
.size = sizeof(struct tcf_gact),
};
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 060144e..d627060 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -845,6 +845,13 @@ static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index,
return tcf_idr_search(tn, a, index);
}
+static int tcf_ife_delete(struct net *net, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, ife_net_id);
+
+ return tcf_idr_find_delete(tn, index);
+}
+
static struct tc_action_ops act_ife_ops = {
.kind = "ife",
.type = TCA_ACT_IFE,
@@ -855,6 +862,7 @@ static struct tc_action_ops act_ife_ops = {
.init = tcf_ife_init,
.walk = tcf_ife_walker,
.lookup = tcf_ife_search,
+ .delete = tcf_ife_delete,
.size = sizeof(struct tcf_ife_info),
};
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index ff8cf9d..d7bad79 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -331,6 +331,13 @@ static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index,
return tcf_idr_search(tn, a, index);
}
+static int tcf_ipt_delete(struct net *net, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, ipt_net_id);
+
+ return tcf_idr_find_delete(tn, index);
+}
+
static struct tc_action_ops act_ipt_ops = {
.kind = "ipt",
.type = TCA_ACT_IPT,
@@ -341,6 +348,7 @@ static struct tc_action_ops act_ipt_ops = {
.init = tcf_ipt_init,
.walk = tcf_ipt_walker,
.lookup = tcf_ipt_search,
+ .delete = tcf_ipt_delete,
.size = sizeof(struct tcf_ipt),
};
@@ -381,6 +389,13 @@ static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index,
return tcf_idr_search(tn, a, index);
}
+static int tcf_xt_delete(struct net *net, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, xt_net_id);
+
+ return tcf_idr_find_delete(tn, index);
+}
+
static struct tc_action_ops act_xt_ops = {
.kind = "xt",
.type = TCA_ACT_XT,
@@ -391,6 +406,7 @@ static struct tc_action_ops act_xt_ops = {
.init = tcf_xt_init,
.walk = tcf_xt_walker,
.lookup = tcf_xt_search,
+ .delete = tcf_xt_delete,
.size = sizeof(struct tcf_ipt),
};
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 7ab8d0c..62ac34b 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -327,6 +327,13 @@ static struct net_device *tcf_mirred_get_dev(const struct tc_action *a)
return rtnl_dereference(m->tcfm_dev);
}
+static int tcf_mirred_delete(struct net *net, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, mirred_net_id);
+
+ return tcf_idr_find_delete(tn, index);
+}
+
static struct tc_action_ops act_mirred_ops = {
.kind = "mirred",
.type = TCA_ACT_MIRRED,
@@ -340,6 +347,7 @@ static struct tc_action_ops act_mirred_ops = {
.lookup = tcf_mirred_search,
.size = sizeof(struct tcf_mirred),
.get_dev = tcf_mirred_get_dev,
+ .delete = tcf_mirred_delete,
};
static __net_init int mirred_init_net(struct net *net)
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index a15c4a9..acf4064 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -301,6 +301,13 @@ static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index,
return tcf_idr_search(tn, a, index);
}
+static int tcf_nat_delete(struct net *net, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, nat_net_id);
+
+ return tcf_idr_find_delete(tn, index);
+}
+
static struct tc_action_ops act_nat_ops = {
.kind = "nat",
.type = TCA_ACT_NAT,
@@ -310,6 +317,7 @@ static struct tc_action_ops act_nat_ops = {
.init = tcf_nat_init,
.walk = tcf_nat_walker,
.lookup = tcf_nat_search,
+ .delete = tcf_nat_delete,
.size = sizeof(struct tcf_nat),
};
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 49034d4..4eb605d 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -443,6 +443,13 @@ static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index,
return tcf_idr_search(tn, a, index);
}
+static int tcf_pedit_delete(struct net *net, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, pedit_net_id);
+
+ return tcf_idr_find_delete(tn, index);
+}
+
static struct tc_action_ops act_pedit_ops = {
.kind = "pedit",
.type = TCA_ACT_PEDIT,
@@ -453,6 +460,7 @@ static struct tc_action_ops act_pedit_ops = {
.init = tcf_pedit_init,
.walk = tcf_pedit_walker,
.lookup = tcf_pedit_search,
+ .delete = tcf_pedit_delete,
.size = sizeof(struct tcf_pedit),
};
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index eb4e458..b9827bb 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -319,6 +319,13 @@ static int tcf_police_search(struct net *net, struct tc_action **a, u32 index,
return tcf_idr_search(tn, a, index);
}
+static int tcf_police_delete(struct net *net, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, police_net_id);
+
+ return tcf_idr_find_delete(tn, index);
+}
+
MODULE_AUTHOR("Alexey Kuznetsov");
MODULE_DESCRIPTION("Policing actions");
MODULE_LICENSE("GPL");
@@ -332,6 +339,7 @@ static struct tc_action_ops act_police_ops = {
.init = tcf_act_police_init,
.walk = tcf_act_police_walker,
.lookup = tcf_police_search,
+ .delete = tcf_police_delete,
.size = sizeof(struct tcf_police),
};
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 5a650d4..5843ce6 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -224,6 +224,13 @@ static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index,
return tcf_idr_search(tn, a, index);
}
+static int tcf_sample_delete(struct net *net, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, sample_net_id);
+
+ return tcf_idr_find_delete(tn, index);
+}
+
static struct tc_action_ops act_sample_ops = {
.kind = "sample",
.type = TCA_ACT_SAMPLE,
@@ -234,6 +241,7 @@ static struct tc_action_ops act_sample_ops = {
.cleanup = tcf_sample_cleanup,
.walk = tcf_sample_walker,
.lookup = tcf_sample_search,
+ .delete = tcf_sample_delete,
.size = sizeof(struct tcf_sample),
};
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 13809e5..c9857b2 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -195,6 +195,13 @@ static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index,
return tcf_idr_search(tn, a, index);
}
+static int tcf_simp_delete(struct net *net, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, simp_net_id);
+
+ return tcf_idr_find_delete(tn, index);
+}
+
static struct tc_action_ops act_simp_ops = {
.kind = "simple",
.type = TCA_ACT_SIMP,
@@ -205,6 +212,7 @@ static struct tc_action_ops act_simp_ops = {
.init = tcf_simp_init,
.walk = tcf_simp_walker,
.lookup = tcf_simp_search,
+ .delete = tcf_simp_delete,
.size = sizeof(struct tcf_defact),
};
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index bf87679..ee4c3f5 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -232,6 +232,13 @@ static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index,
return tcf_idr_search(tn, a, index);
}
+static int tcf_skbedit_delete(struct net *net, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, skbedit_net_id);
+
+ return tcf_idr_find_delete(tn, index);
+}
+
static struct tc_action_ops act_skbedit_ops = {
.kind = "skbedit",
.type = TCA_ACT_SKBEDIT,
@@ -241,6 +248,7 @@ static struct tc_action_ops act_skbedit_ops = {
.init = tcf_skbedit_init,
.walk = tcf_skbedit_walker,
.lookup = tcf_skbedit_search,
+ .delete = tcf_skbedit_delete,
.size = sizeof(struct tcf_skbedit),
};
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index dc36e6f..a8b31e9 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -254,6 +254,13 @@ static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index,
return tcf_idr_search(tn, a, index);
}
+static int tcf_skbmod_delete(struct net *net, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+
+ return tcf_idr_find_delete(tn, index);
+}
+
static struct tc_action_ops act_skbmod_ops = {
.kind = "skbmod",
.type = TCA_ACT_SKBMOD,
@@ -264,6 +271,7 @@ static struct tc_action_ops act_skbmod_ops = {
.cleanup = tcf_skbmod_cleanup,
.walk = tcf_skbmod_walker,
.lookup = tcf_skbmod_search,
+ .delete = tcf_skbmod_delete,
.size = sizeof(struct tcf_skbmod),
};
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 16926c7..9175580 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -315,6 +315,13 @@ static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index,
return tcf_idr_search(tn, a, index);
}
+static int tunnel_key_delete(struct net *net, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
+
+ return tcf_idr_find_delete(tn, index);
+}
+
static struct tc_action_ops act_tunnel_key_ops = {
.kind = "tunnel_key",
.type = TCA_ACT_TUNNEL_KEY,
@@ -325,6 +332,7 @@ static struct tc_action_ops act_tunnel_key_ops = {
.cleanup = tunnel_key_release,
.walk = tunnel_key_walker,
.lookup = tunnel_key_search,
+ .delete = tunnel_key_delete,
.size = sizeof(struct tcf_tunnel_key),
};
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 02fbf76..6116e54 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -290,6 +290,13 @@ static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index,
return tcf_idr_search(tn, a, index);
}
+static int tcf_vlan_delete(struct net *net, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, vlan_net_id);
+
+ return tcf_idr_find_delete(tn, index);
+}
+
static struct tc_action_ops act_vlan_ops = {
.kind = "vlan",
.type = TCA_ACT_VLAN,
@@ -300,6 +307,7 @@ static struct tc_action_ops act_vlan_ops = {
.cleanup = tcf_vlan_cleanup,
.walk = tcf_vlan_walker,
.lookup = tcf_vlan_search,
+ .delete = tcf_vlan_delete,
.size = sizeof(struct tcf_vlan),
};
--
2.7.5
Implement new action API function to atomically delete action with
specified index and to atomically insert unique action. These functions are
required to implement init and delete functions for specific actions that
do not rely on rtnl lock.
Signed-off-by: Vlad Buslov <[email protected]>
---
include/net/act_api.h | 2 ++
net/sched/act_api.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 47 insertions(+)
diff --git a/include/net/act_api.h b/include/net/act_api.h
index a8c8570..bce0cf1 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -153,7 +153,9 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
struct tc_action **a, const struct tc_action_ops *ops,
int bind, bool cpustats);
void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a);
+void tcf_idr_insert_unique(struct tc_action_net *tn, struct tc_action *a);
+int tcf_idr_find_delete(struct tc_action_net *tn, u32 index);
int __tcf_idr_release(struct tc_action *a, bool bind, bool strict);
static inline int tcf_idr_release(struct tc_action *a, bool bind)
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 2772276e..a5193dc 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -330,6 +330,41 @@ bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
}
EXPORT_SYMBOL(tcf_idr_check);
+int tcf_idr_find_delete(struct tc_action_net *tn, u32 index)
+{
+ struct tcf_idrinfo *idrinfo = tn->idrinfo;
+ struct tc_action *p;
+ int ret = 0;
+
+ spin_lock_bh(&idrinfo->lock);
+ p = idr_find(&idrinfo->action_idr, index);
+ if (!p) {
+ spin_unlock(&idrinfo->lock);
+ return -ENOENT;
+ }
+
+ if (!atomic_read(&p->tcfa_bindcnt)) {
+ if (refcount_dec_and_test(&p->tcfa_refcnt)) {
+ struct module *owner = p->ops->owner;
+
+ WARN_ON(p != idr_remove(&idrinfo->action_idr,
+ p->tcfa_index));
+ spin_unlock_bh(&idrinfo->lock);
+
+ tcf_action_cleanup(p);
+ module_put(owner);
+ return 0;
+ }
+ ret = 0;
+ } else {
+ ret = -EPERM;
+ }
+
+ spin_unlock_bh(&idrinfo->lock);
+ return ret;
+}
+EXPORT_SYMBOL(tcf_idr_find_delete);
+
int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
struct tc_action **a, const struct tc_action_ops *ops,
int bind, bool cpustats)
@@ -407,6 +442,16 @@ void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a)
}
EXPORT_SYMBOL(tcf_idr_insert);
+void tcf_idr_insert_unique(struct tc_action_net *tn, struct tc_action *a)
+{
+ struct tcf_idrinfo *idrinfo = tn->idrinfo;
+
+ spin_lock_bh(&idrinfo->lock);
+ WARN_ON(idr_replace(&idrinfo->action_idr, a, a->tcfa_index));
+ spin_unlock_bh(&idrinfo->lock);
+}
+EXPORT_SYMBOL(tcf_idr_insert_unique);
+
void tcf_idrinfo_destroy(const struct tc_action_ops *ops,
struct tcf_idrinfo *idrinfo)
{
--
2.7.5
Extend rate estimator new and replace APIs with additional spinlock
parameter used by lockless actions to protect rate_est pointer from
concurrent modification.
Signed-off-by: Vlad Buslov <[email protected]>
---
include/net/gen_stats.h | 2 ++
net/core/gen_estimator.c | 58 +++++++++++++++++++++++++++++++++++-----------
net/netfilter/xt_RATEEST.c | 2 +-
net/sched/act_api.c | 2 +-
net/sched/act_police.c | 2 +-
net/sched/sch_api.c | 2 ++
net/sched/sch_cbq.c | 4 ++--
net/sched/sch_drr.c | 4 ++--
net/sched/sch_hfsc.c | 4 ++--
net/sched/sch_htb.c | 4 ++--
net/sched/sch_qfq.c | 4 ++--
11 files changed, 61 insertions(+), 27 deletions(-)
diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h
index 0304ba2..d1ef63d 100644
--- a/include/net/gen_stats.h
+++ b/include/net/gen_stats.h
@@ -59,12 +59,14 @@ int gnet_stats_finish_copy(struct gnet_dump *d);
int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
struct gnet_stats_basic_cpu __percpu *cpu_bstats,
struct net_rate_estimator __rcu **rate_est,
+ spinlock_t *rate_est_lock,
spinlock_t *stats_lock,
seqcount_t *running, struct nlattr *opt);
void gen_kill_estimator(struct net_rate_estimator __rcu **ptr);
int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
struct gnet_stats_basic_cpu __percpu *cpu_bstats,
struct net_rate_estimator __rcu **ptr,
+ spinlock_t *rate_est_lock,
spinlock_t *stats_lock,
seqcount_t *running, struct nlattr *opt);
bool gen_estimator_active(struct net_rate_estimator __rcu **ptr);
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 98fd127..3512720 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -107,11 +107,43 @@ static void est_timer(struct timer_list *t)
mod_timer(&est->timer, est->next_jiffies);
}
+static void __replace_estimator(struct net_rate_estimator __rcu **rate_est,
+ struct net_rate_estimator *new)
+{
+ struct net_rate_estimator *old = rcu_dereference_protected(*rate_est,
+ 1);
+
+ if (old) {
+ del_timer_sync(&old->timer);
+ new->avbps = old->avbps;
+ new->avpps = old->avpps;
+ }
+
+ new->next_jiffies = jiffies + ((HZ/4) << new->intvl_log);
+ timer_setup(&new->timer, est_timer, 0);
+ mod_timer(&new->timer, new->next_jiffies);
+
+ rcu_assign_pointer(*rate_est, new);
+
+ if (old)
+ kfree_rcu(old, rcu);
+}
+
+static void replace_estimator(struct net_rate_estimator __rcu **rate_est,
+ struct net_rate_estimator *new,
+ spinlock_t *rate_est_lock)
+{
+ spin_lock(rate_est_lock);
+ __replace_estimator(rate_est, new);
+ spin_unlock(rate_est_lock);
+}
+
/**
* gen_new_estimator - create a new rate estimator
* @bstats: basic statistics
* @cpu_bstats: bstats per cpu
* @rate_est: rate estimator statistics
+ * @rate_est_lock: rate_est lock (might be NULL)
* @stats_lock: statistics lock
* @running: qdisc running seqcount
* @opt: rate estimator configuration TLV
@@ -128,12 +160,13 @@ static void est_timer(struct timer_list *t)
int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
struct gnet_stats_basic_cpu __percpu *cpu_bstats,
struct net_rate_estimator __rcu **rate_est,
+ spinlock_t *rate_est_lock,
spinlock_t *stats_lock,
seqcount_t *running,
struct nlattr *opt)
{
struct gnet_estimator *parm = nla_data(opt);
- struct net_rate_estimator *old, *est;
+ struct net_rate_estimator *est;
struct gnet_stats_basic_packed b;
int intvl_log;
@@ -167,20 +200,15 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
local_bh_enable();
est->last_bytes = b.bytes;
est->last_packets = b.packets;
- old = rcu_dereference_protected(*rate_est, 1);
- if (old) {
- del_timer_sync(&old->timer);
- est->avbps = old->avbps;
- est->avpps = old->avpps;
- }
- est->next_jiffies = jiffies + ((HZ/4) << intvl_log);
- timer_setup(&est->timer, est_timer, 0);
- mod_timer(&est->timer, est->next_jiffies);
+ if (rate_est_lock)
+ replace_estimator(rate_est, est, rate_est_lock);
+ else
+ /* If no spinlock argument provided,
+ * then assume that caller is already synchronized.
+ */
+ __replace_estimator(rate_est, est);
- rcu_assign_pointer(*rate_est, est);
- if (old)
- kfree_rcu(old, rcu);
return 0;
}
EXPORT_SYMBOL(gen_new_estimator);
@@ -209,6 +237,7 @@ EXPORT_SYMBOL(gen_kill_estimator);
* @bstats: basic statistics
* @cpu_bstats: bstats per cpu
* @rate_est: rate estimator statistics
+ * @rate_est_lock: rate_est lock (might be NULL)
* @stats_lock: statistics lock
* @running: qdisc running seqcount (might be NULL)
* @opt: rate estimator configuration TLV
@@ -221,10 +250,11 @@ EXPORT_SYMBOL(gen_kill_estimator);
int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
struct gnet_stats_basic_cpu __percpu *cpu_bstats,
struct net_rate_estimator __rcu **rate_est,
+ spinlock_t *rate_est_lock,
spinlock_t *stats_lock,
seqcount_t *running, struct nlattr *opt)
{
- return gen_new_estimator(bstats, cpu_bstats, rate_est,
+ return gen_new_estimator(bstats, cpu_bstats, rate_est, rate_est_lock,
stats_lock, running, opt);
}
EXPORT_SYMBOL(gen_replace_estimator);
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index dec843c..8e79bd5 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -154,7 +154,7 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
cfg.est.interval = info->interval;
cfg.est.ewma_log = info->ewma_log;
- ret = gen_new_estimator(&est->bstats, NULL, &est->rate_est,
+ ret = gen_new_estimator(&est->bstats, NULL, &est->rate_est, NULL,
&est->lock, NULL, &cfg.opt);
if (ret < 0)
goto err2;
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index a5193dc..1dc092e 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -409,7 +409,7 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
p->tcfa_tm.firstuse = 0;
if (est) {
err = gen_new_estimator(&p->tcfa_bstats, p->cpu_bstats,
- &p->tcfa_rate_est,
+ &p->tcfa_rate_est, NULL,
&p->tcfa_lock, NULL, est);
if (err)
goto err4;
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 86d9417..c480d68 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -133,7 +133,7 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla,
if (est) {
err = gen_replace_estimator(&police->tcf_bstats, NULL,
- &police->tcf_rate_est,
+ &police->tcf_rate_est, NULL,
&police->tcf_lock,
NULL, est);
if (err)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 106dae7e..de6a297 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1178,6 +1178,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
sch->cpu_bstats,
&sch->rate_est,
NULL,
+ NULL,
running,
tca[TCA_RATE]);
if (err) {
@@ -1253,6 +1254,7 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca,
sch->cpu_bstats,
&sch->rate_est,
NULL,
+ NULL,
qdisc_root_sleeping_running(sch),
tca[TCA_RATE]);
}
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index f42025d..2a7ff53 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1503,7 +1503,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
if (tca[TCA_RATE]) {
err = gen_replace_estimator(&cl->bstats, NULL,
- &cl->rate_est,
+ &cl->rate_est, NULL,
NULL,
qdisc_root_sleeping_running(sch),
tca[TCA_RATE]);
@@ -1605,7 +1605,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
if (tca[TCA_RATE]) {
err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
- NULL,
+ NULL, NULL,
qdisc_root_sleeping_running(sch),
tca[TCA_RATE]);
if (err) {
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index e0b0cf8..0896e23 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -95,7 +95,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (cl != NULL) {
if (tca[TCA_RATE]) {
err = gen_replace_estimator(&cl->bstats, NULL,
- &cl->rate_est,
+ &cl->rate_est, NULL,
NULL,
qdisc_root_sleeping_running(sch),
tca[TCA_RATE]);
@@ -129,7 +129,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (tca[TCA_RATE]) {
err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est,
- NULL,
+ NULL, NULL,
qdisc_root_sleeping_running(sch),
tca[TCA_RATE]);
if (err) {
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 3ae9877..f341324 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -972,7 +972,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (tca[TCA_RATE]) {
err = gen_replace_estimator(&cl->bstats, NULL,
- &cl->rate_est,
+ &cl->rate_est, NULL,
NULL,
qdisc_root_sleeping_running(sch),
tca[TCA_RATE]);
@@ -1042,7 +1042,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (tca[TCA_RATE]) {
err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
- NULL,
+ NULL, NULL,
qdisc_root_sleeping_running(sch),
tca[TCA_RATE]);
if (err) {
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 2a4ab7c..acc0355 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1407,7 +1407,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
if (htb_rate_est || tca[TCA_RATE]) {
err = gen_new_estimator(&cl->bstats, NULL,
&cl->rate_est,
- NULL,
+ NULL, NULL,
qdisc_root_sleeping_running(sch),
tca[TCA_RATE] ? : &est.nla);
if (err) {
@@ -1473,7 +1473,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
} else {
if (tca[TCA_RATE]) {
err = gen_replace_estimator(&cl->bstats, NULL,
- &cl->rate_est,
+ &cl->rate_est, NULL,
NULL,
qdisc_root_sleeping_running(sch),
tca[TCA_RATE]);
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index bb1a9c1..8026c1e 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -461,7 +461,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (cl != NULL) { /* modify existing class */
if (tca[TCA_RATE]) {
err = gen_replace_estimator(&cl->bstats, NULL,
- &cl->rate_est,
+ &cl->rate_est, NULL,
NULL,
qdisc_root_sleeping_running(sch),
tca[TCA_RATE]);
@@ -488,7 +488,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (tca[TCA_RATE]) {
err = gen_new_estimator(&cl->bstats, NULL,
&cl->rate_est,
- NULL,
+ NULL, NULL,
qdisc_root_sleeping_running(sch),
tca[TCA_RATE]);
if (err)
--
2.7.5
Implement helper function to delete action using new action ops delete
function implemented by each action for lockless execution.
Implement action put function that releases reference to action and frees
it if necessary. Refactor action deletion code to use new put function and
not to rely on rtnl lock. Remove rtnl lock assertions that are no longer
needed.
Signed-off-by: Vlad Buslov <[email protected]>
---
net/sched/act_api.c | 98 ++++++++++++++++++++++++++++++++++++++++++++---------
net/sched/cls_api.c | 1 -
2 files changed, 82 insertions(+), 17 deletions(-)
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 9459cce..d324a4c 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -90,21 +90,39 @@ static void free_tcf(struct tc_action *p)
kfree(p);
}
-static void tcf_idr_remove(struct tcf_idrinfo *idrinfo, struct tc_action *p)
+static void tcf_action_cleanup(struct tc_action *p)
{
- spin_lock_bh(&idrinfo->lock);
- idr_remove(&idrinfo->action_idr, p->tcfa_index);
- spin_unlock_bh(&idrinfo->lock);
+ if (p->ops->cleanup)
+ p->ops->cleanup(p);
+
gen_kill_estimator(&p->tcfa_rate_est);
free_tcf(p);
}
+static int __tcf_action_put(struct tc_action *p, bool bind)
+{
+ struct tcf_idrinfo *idrinfo = p->idrinfo;
+
+ if (refcount_dec_and_lock(&p->tcfa_refcnt, &idrinfo->lock)) {
+ if (bind)
+ atomic_dec(&p->tcfa_bindcnt);
+ idr_remove(&idrinfo->action_idr, p->tcfa_index);
+ spin_unlock(&idrinfo->lock);
+
+ tcf_action_cleanup(p);
+ return 1;
+ }
+
+ if (bind)
+ atomic_dec(&p->tcfa_bindcnt);
+
+ return 0;
+}
+
int __tcf_idr_release(struct tc_action *p, bool bind, bool strict)
{
int ret = 0;
- ASSERT_RTNL();
-
/* Release with strict==1 and bind==0 is only called through act API
* interface (classifiers always bind). Only case when action with
* positive reference count and zero bind count can exist is when it was
@@ -118,18 +136,11 @@ int __tcf_idr_release(struct tc_action *p, bool bind, bool strict)
* are acceptable.
*/
if (p) {
- if (bind)
- atomic_dec(&p->tcfa_bindcnt);
- else if (strict && atomic_read(&p->tcfa_bindcnt) > 0)
+ if (!bind && strict && atomic_read(&p->tcfa_bindcnt) > 0)
return -EPERM;
- if (atomic_read(&p->tcfa_bindcnt) <= 0 &&
- refcount_dec_and_test(&p->tcfa_refcnt)) {
- if (p->ops->cleanup)
- p->ops->cleanup(p);
- tcf_idr_remove(p->idrinfo, p);
+ if (__tcf_action_put(p, bind))
ret = ACT_P_DELETED;
- }
}
return ret;
@@ -576,6 +587,11 @@ int tcf_action_destroy(struct list_head *actions, int bind)
return ret;
}
+static int tcf_action_put(struct tc_action *p)
+{
+ return __tcf_action_put(p, false);
+}
+
int
tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
{
@@ -975,6 +991,26 @@ static struct tc_action *tcf_action_get_1(struct net *net, struct nlattr *nla,
return ERR_PTR(err);
}
+static int tcf_action_del_1(struct net *net, char *kind, u32 index,
+ struct netlink_ext_ack *extack)
+{
+ const struct tc_action_ops *ops;
+ int err = -EINVAL;
+
+ ops = tc_lookup_action_n(kind);
+ if (!ops) {
+ NL_SET_ERR_MSG(extack, "Specified TC action not found");
+ goto err_out;
+ }
+
+ if (ops->delete)
+ err = ops->delete(net, index);
+
+ module_put(ops->owner);
+err_out:
+ return err;
+}
+
static int tca_action_flush(struct net *net, struct nlattr *nla,
struct nlmsghdr *n, u32 portid,
struct netlink_ext_ack *extack)
@@ -1052,6 +1088,36 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
return err;
}
+static int tcf_action_delete(struct net *net, struct list_head *actions,
+ struct netlink_ext_ack *extack)
+{
+ int ret;
+ struct tc_action *a, *tmp;
+ char kind[IFNAMSIZ];
+ u32 act_index;
+
+ list_for_each_entry_safe(a, tmp, actions, list) {
+ const struct tc_action_ops *ops = a->ops;
+
+ /* Actions can be deleted concurrently
+ * so we must save their type and id to search again
+ * after reference is released.
+ */
+ strncpy(kind, a->ops->kind, sizeof(kind) - 1);
+ act_index = a->tcfa_index;
+
+ list_del(&a->list);
+ if (tcf_action_put(a))
+ module_put(ops->owner);
+
+ /* now do the delete */
+ ret = tcf_action_del_1(net, kind, act_index, extack);
+ if (ret < 0)
+ return ret;
+ }
+ return 0;
+}
+
static int
tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
u32 portid, size_t attr_size, struct netlink_ext_ack *extack)
@@ -1072,7 +1138,7 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
}
/* now do the delete */
- ret = tcf_action_destroy(actions, 0);
+ ret = tcf_action_delete(net, actions, extack);
if (ret < 0) {
NL_SET_ERR_MSG(extack, "Failed to delete TC action");
kfree_skb(skb);
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index bcba94b..00b88e5 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1417,7 +1417,6 @@ void tcf_exts_destroy(struct tcf_exts *exts)
#ifdef CONFIG_NET_CLS_ACT
LIST_HEAD(actions);
- ASSERT_RTNL();
tcf_exts_to_list(exts, &actions);
tcf_action_destroy(&actions, TCA_ACT_UNBIND);
kfree(exts->actions);
--
2.7.5
tca_get_fill function has 'bind' and 'ref' arguments that get passed
down to action dump function. These arguments values are subtracted from
actual reference and bind counter values before writing them to skb.
In order to prevent concurrent action delete, RTM_GETACTION handler
acquires a reference to action before 'dumping' it and releases it
afterwards. This reference is temporal and should not be accounted by
userspace clients. (both logically and to preserver current API
behavior)
Use existing infrastructure of tca_get_fill arguments to subtract that
temporary reference and not expose it to userspace.
Signed-off-by: Vlad Buslov <[email protected]>
---
net/sched/act_api.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 3f02cd1..2772276e 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -935,7 +935,7 @@ tcf_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
if (!skb)
return -ENOBUFS;
if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, event,
- 0, 0) <= 0) {
+ 0, 1) <= 0) {
NL_SET_ERR_MSG(extack, "Failed to fill netlink attributes while adding TC action");
kfree_skb(skb);
return -EINVAL;
@@ -1125,7 +1125,7 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
return -ENOBUFS;
if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, RTM_DELACTION,
- 0, 1) <= 0) {
+ 0, 2) <= 0) {
NL_SET_ERR_MSG(extack, "Failed to fill netlink TC action attributes");
kfree_skb(skb);
return -EINVAL;
--
2.7.5
Substitute calls to action insert function with calls to action insert
unique function that warns if insertion overwrites index in idr.
Signed-off-by: Vlad Buslov <[email protected]>
---
net/sched/act_bpf.c | 2 +-
net/sched/act_connmark.c | 2 +-
net/sched/act_csum.c | 2 +-
net/sched/act_gact.c | 2 +-
net/sched/act_ife.c | 2 +-
net/sched/act_ipt.c | 2 +-
net/sched/act_mirred.c | 2 +-
net/sched/act_nat.c | 2 +-
net/sched/act_pedit.c | 2 +-
net/sched/act_police.c | 2 +-
net/sched/act_sample.c | 2 +-
net/sched/act_simple.c | 2 +-
net/sched/act_skbedit.c | 2 +-
net/sched/act_skbmod.c | 2 +-
net/sched/act_tunnel_key.c | 2 +-
net/sched/act_vlan.c | 2 +-
16 files changed, 16 insertions(+), 16 deletions(-)
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 7e20fdc..0bf4ecf 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -354,7 +354,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
rcu_assign_pointer(prog->filter, cfg.filter);
if (res == ACT_P_CREATED) {
- tcf_idr_insert(tn, *act);
+ tcf_idr_insert_unique(tn, *act);
} else {
/* make sure the program being replaced is no longer executing */
synchronize_rcu();
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 6ff45af..a4e9f21 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -135,7 +135,7 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
ci->net = net;
ci->zone = parm->zone;
- tcf_idr_insert(tn, *a);
+ tcf_idr_insert_unique(tn, *a);
ret = ACT_P_CREATED;
} else {
ci = to_connmark(*a);
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 49d06c3..d9836d2 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -105,7 +105,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
kfree_rcu(params_old, rcu);
if (ret == ACT_P_CREATED)
- tcf_idr_insert(tn, *a);
+ tcf_idr_insert_unique(tn, *a);
return ret;
}
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 2edefeb..79266590 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -128,7 +128,7 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
}
#endif
if (ret == ACT_P_CREATED)
- tcf_idr_insert(tn, *a);
+ tcf_idr_insert_unique(tn, *a);
return ret;
}
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 665790f..060144e 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -590,7 +590,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
kfree_rcu(p_old, rcu);
if (ret == ACT_P_CREATED)
- tcf_idr_insert(tn, *a);
+ tcf_idr_insert_unique(tn, *a);
return ret;
}
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 946193e..ff8cf9d 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -188,7 +188,7 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
ipt->tcfi_hook = hook;
spin_unlock_bh(&ipt->tcf_lock);
if (ret == ACT_P_CREATED)
- tcf_idr_insert(tn, *a);
+ tcf_idr_insert_unique(tn, *a);
return ret;
err3:
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 4c8bd26..7ab8d0c 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -157,7 +157,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
if (ret == ACT_P_CREATED) {
list_add(&m->tcfm_list, &mirred_list);
- tcf_idr_insert(tn, *a);
+ tcf_idr_insert_unique(tn, *a);
}
return ret;
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index a1a1885..a15c4a9 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -89,7 +89,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
spin_unlock_bh(&p->tcf_lock);
if (ret == ACT_P_CREATED)
- tcf_idr_insert(tn, *a);
+ tcf_idr_insert_unique(tn, *a);
return ret;
}
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index e5e93e2..49034d4 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -220,7 +220,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
spin_unlock_bh(&p->tcf_lock);
if (ret == ACT_P_CREATED)
- tcf_idr_insert(tn, *a);
+ tcf_idr_insert_unique(tn, *a);
return ret;
}
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index ced6b1f..eb4e458 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -194,7 +194,7 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla,
return ret;
police->tcfp_t_c = ktime_get_ns();
- tcf_idr_insert(tn, *a);
+ tcf_idr_insert_unique(tn, *a);
return ret;
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 7411805..5a650d4 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -97,7 +97,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
}
if (ret == ACT_P_CREATED)
- tcf_idr_insert(tn, *a);
+ tcf_idr_insert_unique(tn, *a);
return ret;
}
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index a4b2aca..13809e5 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -146,7 +146,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
}
if (ret == ACT_P_CREATED)
- tcf_idr_insert(tn, *a);
+ tcf_idr_insert_unique(tn, *a);
return ret;
}
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 7750b77..bf87679 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -169,7 +169,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
spin_unlock_bh(&d->tcf_lock);
if (ret == ACT_P_CREATED)
- tcf_idr_insert(tn, *a);
+ tcf_idr_insert_unique(tn, *a);
return ret;
}
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index bbc5092..dc36e6f 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -185,7 +185,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
kfree_rcu(p_old, rcu);
if (ret == ACT_P_CREATED)
- tcf_idr_insert(tn, *a);
+ tcf_idr_insert_unique(tn, *a);
return ret;
}
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 4367962..16926c7 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -198,7 +198,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
kfree_rcu(params_old, rcu);
if (ret == ACT_P_CREATED)
- tcf_idr_insert(tn, *a);
+ tcf_idr_insert_unique(tn, *a);
return ret;
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index adc4e6e..02fbf76 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -221,7 +221,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
kfree_rcu(p_old, rcu);
if (ret == ACT_P_CREATED)
- tcf_idr_insert(tn, *a);
+ tcf_idr_insert_unique(tn, *a);
return ret;
}
--
2.7.5
Add additional 'unlocked' argument to act API init functions.
Argument is true when rtnl lock is not taken and false otherwise.
It is required to implement actions that need to release rtnl lock before
loading kernel module and reacquire if afterwards.
Signed-off-by: Vlad Buslov <[email protected]>
---
include/net/act_api.h | 6 ++++--
net/sched/act_api.c | 17 ++++++++++-------
net/sched/act_bpf.c | 3 ++-
net/sched/act_connmark.c | 2 +-
net/sched/act_csum.c | 3 ++-
net/sched/act_gact.c | 3 ++-
net/sched/act_ife.c | 3 ++-
net/sched/act_ipt.c | 6 ++++--
net/sched/act_mirred.c | 5 +++--
net/sched/act_nat.c | 2 +-
net/sched/act_pedit.c | 3 ++-
net/sched/act_police.c | 2 +-
net/sched/act_sample.c | 3 ++-
net/sched/act_simple.c | 3 ++-
net/sched/act_skbedit.c | 3 ++-
net/sched/act_skbmod.c | 3 ++-
net/sched/act_tunnel_key.c | 3 ++-
net/sched/act_vlan.c | 3 ++-
net/sched/cls_api.c | 5 +++--
19 files changed, 49 insertions(+), 29 deletions(-)
diff --git a/include/net/act_api.h b/include/net/act_api.h
index 73175a3..a8c8570 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -92,7 +92,8 @@ struct tc_action_ops {
struct netlink_ext_ack *extack);
int (*init)(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **act, int ovr,
- int bind, struct netlink_ext_ack *extack);
+ int bind, bool unlocked,
+ struct netlink_ext_ack *extack);
int (*walk)(struct net *, struct sk_buff *,
struct netlink_callback *, int,
const struct tc_action_ops *,
@@ -168,11 +169,12 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
int nr_actions, struct tcf_result *res);
int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
struct nlattr *est, char *name, int ovr, int bind,
- struct list_head *actions, size_t *attr_size,
+ struct list_head *actions, size_t *attr_size, bool unlocked,
struct netlink_ext_ack *extack);
struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
struct nlattr *nla, struct nlattr *est,
char *name, int ovr, int bind,
+ bool unlocked,
struct netlink_ext_ack *extack);
int tcf_action_dump(struct sk_buff *skb, struct list_head *, int, int);
int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int);
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 96b0878..1331beb 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -671,6 +671,7 @@ static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb)
struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
struct nlattr *nla, struct nlattr *est,
char *name, int ovr, int bind,
+ bool unlocked,
struct netlink_ext_ack *extack)
{
struct tc_action *a;
@@ -721,9 +722,11 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
a_o = tc_lookup_action_n(act_name);
if (a_o == NULL) {
#ifdef CONFIG_MODULES
- rtnl_unlock();
+ if (!unlocked)
+ rtnl_unlock();
request_module("act_%s", act_name);
- rtnl_lock();
+ if (!unlocked)
+ rtnl_lock();
a_o = tc_lookup_action_n(act_name);
@@ -746,9 +749,9 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
/* backward compatibility for policer */
if (name == NULL)
err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind,
- extack);
+ unlocked, extack);
else
- err = a_o->init(net, nla, est, &a, ovr, bind, extack);
+ err = a_o->init(net, nla, est, &a, ovr, bind, unlocked, extack);
if (err < 0)
goto err_mod;
@@ -799,7 +802,7 @@ static void cleanup_a(struct list_head *actions, int ovr)
int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
struct nlattr *est, char *name, int ovr, int bind,
- struct list_head *actions, size_t *attr_size,
+ struct list_head *actions, size_t *attr_size, bool unlocked,
struct netlink_ext_ack *extack)
{
struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
@@ -814,7 +817,7 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind,
- extack);
+ unlocked, extack);
if (IS_ERR(act)) {
err = PTR_ERR(act);
goto err;
@@ -1173,7 +1176,7 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
LIST_HEAD(actions);
ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, &actions,
- &attr_size, extack);
+ &attr_size, false, extack);
if (ret)
return ret;
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 15a2a53..5d95c43 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -276,7 +276,8 @@ static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog,
static int tcf_bpf_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **act,
- int replace, int bind, struct netlink_ext_ack *extack)
+ int replace, int bind, bool unlocked,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, bpf_net_id);
struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 1888650..ff6444e 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -96,7 +96,7 @@ static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = {
static int tcf_connmark_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind,
+ int ovr, int bind, bool unlocked,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, connmark_net_id);
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 0572682..a692ac1 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -46,7 +46,8 @@ static struct tc_action_ops act_csum_ops;
static int tcf_csum_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a, int ovr,
- int bind, struct netlink_ext_ack *extack)
+ int bind, bool unlocked,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, csum_net_id);
struct tcf_csum_params *params_old, *params_new;
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index ca83deb..0c536cd 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -56,7 +56,8 @@ static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = {
static int tcf_gact_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, struct netlink_ext_ack *extack)
+ int ovr, int bind, bool unlocked,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, gact_net_id);
struct nlattr *tb[TCA_GACT_MAX + 1];
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 689f63e..cb155cd 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -447,7 +447,8 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb,
static int tcf_ife_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, struct netlink_ext_ack *extack)
+ int ovr, int bind, bool unlocked,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, ife_net_id);
struct nlattr *tb[TCA_IFE_MAX + 1];
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 7bce88d..0acf784 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -196,7 +196,8 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
static int tcf_ipt_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a, int ovr,
- int bind, struct netlink_ext_ack *extack)
+ int bind, bool unlocked,
+ struct netlink_ext_ack *extack)
{
return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops, ovr,
bind);
@@ -204,7 +205,8 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla,
static int tcf_xt_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a, int ovr,
- int bind, struct netlink_ext_ack *extack)
+ int bind, bool unlocked,
+ struct netlink_ext_ack *extack)
{
return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops, ovr,
bind);
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 82a8bdd..607da4b 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -68,8 +68,9 @@ static unsigned int mirred_net_id;
static struct tc_action_ops act_mirred_ops;
static int tcf_mirred_init(struct net *net, struct nlattr *nla,
- struct nlattr *est, struct tc_action **a, int ovr,
- int bind, struct netlink_ext_ack *extack)
+ struct nlattr *est, struct tc_action **a,
+ int ovr, int bind, bool unlocked,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, mirred_net_id);
struct nlattr *tb[TCA_MIRRED_MAX + 1];
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 457c2ae..2f2f045 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -37,7 +37,7 @@ static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
};
static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
- struct tc_action **a, int ovr, int bind,
+ struct tc_action **a, int ovr, int bind, bool unlocked,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, nat_net_id);
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 0102b29..117e486 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -132,7 +132,8 @@ static int tcf_pedit_key_ex_dump(struct sk_buff *skb,
static int tcf_pedit_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, struct netlink_ext_ack *extack)
+ int ovr, int bind, bool unlocked,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, pedit_net_id);
struct nlattr *tb[TCA_PEDIT_MAX + 1];
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index a789b80..2971ba3 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -75,7 +75,7 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
static int tcf_act_police_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind,
+ int ovr, int bind, bool unlocked,
struct netlink_ext_ack *extack)
{
int ret = 0, err;
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 4a46978..9bbd8e9 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -37,7 +37,8 @@ static const struct nla_policy sample_policy[TCA_SAMPLE_MAX + 1] = {
static int tcf_sample_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a, int ovr,
- int bind, struct netlink_ext_ack *extack)
+ int bind, bool unlocked,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, sample_net_id);
struct nlattr *tb[TCA_SAMPLE_MAX + 1];
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 95d5985..162f091 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -79,7 +79,8 @@ static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = {
static int tcf_simp_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, struct netlink_ext_ack *extack)
+ int ovr, int bind, bool unlocked,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, simp_net_id);
struct nlattr *tb[TCA_DEF_MAX + 1];
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index adf0a72..4b9d616 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -66,7 +66,8 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, struct netlink_ext_ack *extack)
+ int ovr, int bind, bool unlocked,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, skbedit_net_id);
struct nlattr *tb[TCA_SKBEDIT_MAX + 1];
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index a0d9abb..c1f9eda 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -84,7 +84,8 @@ static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = {
static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, struct netlink_ext_ack *extack)
+ int ovr, int bind, bool unlocked,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, skbmod_net_id);
struct nlattr *tb[TCA_SKBMOD_MAX + 1];
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index c6e5069..e4f9718 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -70,7 +70,8 @@ static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = {
static int tunnel_key_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, struct netlink_ext_ack *extack)
+ int ovr, int bind, bool unlocked,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1];
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 8dda784..6a949f5 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -109,7 +109,8 @@ static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = {
static int tcf_vlan_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, struct netlink_ext_ack *extack)
+ int ovr, int bind, bool unlocked,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, vlan_net_id);
struct nlattr *tb[TCA_VLAN_MAX + 1];
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index b66754f..bcba94b 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1438,7 +1438,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
if (exts->police && tb[exts->police]) {
act = tcf_action_init_1(net, tp, tb[exts->police],
rate_tlv, "police", ovr,
- TCA_ACT_BIND, extack);
+ TCA_ACT_BIND, false, extack);
if (IS_ERR(act))
return PTR_ERR(act);
@@ -1451,7 +1451,8 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
err = tcf_action_init(net, tp, tb[exts->action],
rate_tlv, NULL, ovr, TCA_ACT_BIND,
- &actions, &attr_size, extack);
+ &actions, &attr_size, false,
+ extack);
if (err)
return err;
list_for_each_entry(act, &actions, list)
--
2.7.5
Return from action init function with reference to action taken,
even when overwriting existing action.
Action init API initializes its fourth argument (pointer to pointer to
tc action) to either existing action with same index or newly created
action. In case of existing index(and bind argument is zero), init
function returns without incrementing action reference counter. Caller
of action init then proceeds working with action without actually
holding reference to it. This means that action could be deleted
concurrently. To prevent such scenario this patch changes action init
behavior to always take reference to action before returning
successfully.
Signed-off-by: Vlad Buslov <[email protected]>
---
net/sched/act_bpf.c | 8 ++++----
net/sched/act_connmark.c | 5 +++--
net/sched/act_csum.c | 8 ++++----
net/sched/act_gact.c | 5 +++--
net/sched/act_ife.c | 12 +++++-------
net/sched/act_ipt.c | 5 +++--
net/sched/act_mirred.c | 5 ++---
net/sched/act_nat.c | 5 +++--
net/sched/act_pedit.c | 5 +++--
net/sched/act_police.c | 8 +++-----
net/sched/act_sample.c | 8 +++-----
net/sched/act_simple.c | 5 +++--
net/sched/act_skbedit.c | 5 +++--
net/sched/act_skbmod.c | 8 +++-----
net/sched/act_tunnel_key.c | 8 +++-----
net/sched/act_vlan.c | 8 +++-----
16 files changed, 51 insertions(+), 57 deletions(-)
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 5d95c43..5554bf7 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -311,9 +311,10 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
if (bind)
return 0;
- tcf_idr_release(*act, bind);
- if (!replace)
+ if (!replace) {
+ tcf_idr_release(*act, bind);
return -EEXIST;
+ }
}
is_bpf = tb[TCA_ACT_BPF_OPS_LEN] && tb[TCA_ACT_BPF_OPS];
@@ -356,8 +357,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
return res;
out:
- if (res == ACT_P_CREATED)
- tcf_idr_release(*act, bind);
+ tcf_idr_release(*act, bind);
return ret;
}
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index ff6444e..2a4c3da 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -135,9 +135,10 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
ci = to_connmark(*a);
if (bind)
return 0;
- tcf_idr_release(*a, bind);
- if (!ovr)
+ if (!ovr) {
+ tcf_idr_release(*a, bind);
return -EEXIST;
+ }
/* replacing action and zone */
ci->tcf_action = parm->action;
ci->zone = parm->zone;
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index a692ac1..74f5dce 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -76,9 +76,10 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
} else {
if (bind)/* dont override defaults */
return 0;
- tcf_idr_release(*a, bind);
- if (!ovr)
+ if (!ovr) {
+ tcf_idr_release(*a, bind);
return -EEXIST;
+ }
}
p = to_tcf_csum(*a);
@@ -86,8 +87,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
if (unlikely(!params_new)) {
- if (ret == ACT_P_CREATED)
- tcf_idr_release(*a, bind);
+ tcf_idr_release(*a, bind);
return -ENOMEM;
}
params_old = rtnl_dereference(p->params);
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 0c536cd..9d7d000 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -100,9 +100,10 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
} else {
if (bind)/* dont override defaults */
return 0;
- tcf_idr_release(*a, bind);
- if (!ovr)
+ if (!ovr) {
+ tcf_idr_release(*a, bind);
return -EEXIST;
+ }
}
gact = to_gact(*a);
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index cb155cd..b57c5ba 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -497,12 +497,10 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
return ret;
}
ret = ACT_P_CREATED;
- } else {
+ } else if (!ovr) {
tcf_idr_release(*a, bind);
- if (!ovr) {
- kfree(p);
- return -EEXIST;
- }
+ kfree(p);
+ return -EEXIST;
}
ife = to_ife(*a);
@@ -544,13 +542,13 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
NULL, NULL);
if (err) {
metadata_parse_err:
- if (exists)
- tcf_idr_release(*a, bind);
if (ret == ACT_P_CREATED)
_tcf_ife_cleanup(*a);
if (exists)
spin_unlock_bh(&ife->tcf_lock);
+ tcf_idr_release(*a, bind);
+
kfree(p);
return err;
}
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 0acf784..7c26ce1 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -145,10 +145,11 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
} else {
if (bind)/* dont override defaults */
return 0;
- tcf_idr_release(*a, bind);
- if (!ovr)
+ if (!ovr) {
+ tcf_idr_release(*a, bind);
return -EEXIST;
+ }
}
hook = nla_get_u32(tb[TCA_IPT_HOOK]);
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 607da4b..b9b7b96 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -132,10 +132,9 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
if (ret)
return ret;
ret = ACT_P_CREATED;
- } else {
+ } else if (!ovr) {
tcf_idr_release(*a, bind);
- if (!ovr)
- return -EEXIST;
+ return -EEXIST;
}
m = to_mirred(*a);
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 2f2f045..77badb2 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -66,9 +66,10 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
} else {
if (bind)
return 0;
- tcf_idr_release(*a, bind);
- if (!ovr)
+ if (!ovr) {
+ tcf_idr_release(*a, bind);
return -EEXIST;
+ }
}
p = to_tcf_nat(*a);
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 117e486..8c39adc 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -185,9 +185,10 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
} else {
if (bind)
return 0;
- tcf_idr_release(*a, bind);
- if (!ovr)
+ if (!ovr) {
+ tcf_idr_release(*a, bind);
return -EEXIST;
+ }
p = to_pedit(*a);
if (p->tcfp_nkeys && p->tcfp_nkeys != parm->nkeys) {
keys = kmalloc(ksize, GFP_KERNEL);
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 2971ba3..86d9417 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -111,10 +111,9 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla,
if (ret)
return ret;
ret = ACT_P_CREATED;
- } else {
+ } else if (!ovr) {
tcf_idr_release(*a, bind);
- if (!ovr)
- return -EEXIST;
+ return -EEXIST;
}
police = to_police(*a);
@@ -195,8 +194,7 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla,
failure:
qdisc_put_rtab(P_tab);
qdisc_put_rtab(R_tab);
- if (ret == ACT_P_CREATED)
- tcf_idr_release(*a, bind);
+ tcf_idr_release(*a, bind);
return err;
}
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 9bbd8e9..d2b0394 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -69,10 +69,9 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
if (ret)
return ret;
ret = ACT_P_CREATED;
- } else {
+ } else if (!ovr) {
tcf_idr_release(*a, bind);
- if (!ovr)
- return -EEXIST;
+ return -EEXIST;
}
s = to_sample(*a);
@@ -81,8 +80,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
s->psample_group_num = nla_get_u32(tb[TCA_SAMPLE_PSAMPLE_GROUP]);
psample_group = psample_group_get(net, s->psample_group_num);
if (!psample_group) {
- if (ret == ACT_P_CREATED)
- tcf_idr_release(*a, bind);
+ tcf_idr_release(*a, bind);
return -ENOMEM;
}
RCU_INIT_POINTER(s->psample_group, psample_group);
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 162f091..26eb153 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -130,9 +130,10 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
} else {
d = to_defact(*a);
- tcf_idr_release(*a, bind);
- if (!ovr)
+ if (!ovr) {
+ tcf_idr_release(*a, bind);
return -EEXIST;
+ }
reset_policy(d, defdata, parm);
}
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 4b9d616..bb416b7 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -136,9 +136,10 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
ret = ACT_P_CREATED;
} else {
d = to_skbedit(*a);
- tcf_idr_release(*a, bind);
- if (!ovr)
+ if (!ovr) {
+ tcf_idr_release(*a, bind);
return -EEXIST;
+ }
}
spin_lock_bh(&d->tcf_lock);
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index c1f9eda..e1c2e1c 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -142,10 +142,9 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
return ret;
ret = ACT_P_CREATED;
- } else {
+ } else if (!ovr) {
tcf_idr_release(*a, bind);
- if (!ovr)
- return -EEXIST;
+ return -EEXIST;
}
d = to_skbmod(*a);
@@ -153,8 +152,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
ASSERT_RTNL();
p = kzalloc(sizeof(struct tcf_skbmod_params), GFP_KERNEL);
if (unlikely(!p)) {
- if (ret == ACT_P_CREATED)
- tcf_idr_release(*a, bind);
+ tcf_idr_release(*a, bind);
return -ENOMEM;
}
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index e4f9718..d88c151 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -165,10 +165,9 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
return ret;
ret = ACT_P_CREATED;
- } else {
+ } else if (!ovr) {
tcf_idr_release(*a, bind);
- if (!ovr)
- return -EEXIST;
+ return -EEXIST;
}
t = to_tunnel_key(*a);
@@ -176,8 +175,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
ASSERT_RTNL();
params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
if (unlikely(!params_new)) {
- if (ret == ACT_P_CREATED)
- tcf_idr_release(*a, bind);
+ tcf_idr_release(*a, bind);
return -ENOMEM;
}
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 6a949f5..f747fb6 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -185,10 +185,9 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
return ret;
ret = ACT_P_CREATED;
- } else {
+ } else if (!ovr) {
tcf_idr_release(*a, bind);
- if (!ovr)
- return -EEXIST;
+ return -EEXIST;
}
v = to_vlan(*a);
@@ -196,8 +195,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
ASSERT_RTNL();
p = kzalloc(sizeof(*p), GFP_KERNEL);
if (!p) {
- if (ret == ACT_P_CREATED)
- tcf_idr_release(*a, bind);
+ tcf_idr_release(*a, bind);
return -ENOMEM;
}
--
2.7.5
Change type of action reference counter to refcount_t.
Change type of action bind counter to atomic_t.
This type is used to allow decrementing bind counter without testing
for 0 result.
Signed-off-by: Vlad Buslov <[email protected]>
---
include/net/act_api.h | 5 +++--
net/sched/act_api.c | 32 ++++++++++++++++++++++----------
net/sched/act_bpf.c | 4 ++--
net/sched/act_connmark.c | 4 ++--
net/sched/act_csum.c | 4 ++--
net/sched/act_gact.c | 4 ++--
net/sched/act_ife.c | 4 ++--
net/sched/act_ipt.c | 4 ++--
net/sched/act_mirred.c | 4 ++--
net/sched/act_nat.c | 4 ++--
net/sched/act_pedit.c | 4 ++--
net/sched/act_police.c | 4 ++--
net/sched/act_sample.c | 4 ++--
net/sched/act_simple.c | 4 ++--
net/sched/act_skbedit.c | 4 ++--
net/sched/act_skbmod.c | 4 ++--
net/sched/act_tunnel_key.c | 4 ++--
net/sched/act_vlan.c | 4 ++--
18 files changed, 57 insertions(+), 44 deletions(-)
diff --git a/include/net/act_api.h b/include/net/act_api.h
index f7b59ef..e634014 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -6,6 +6,7 @@
* Public action API for classifiers/qdiscs
*/
+#include <linux/refcount.h>
#include <net/sch_generic.h>
#include <net/pkt_sched.h>
#include <net/net_namespace.h>
@@ -26,8 +27,8 @@ struct tc_action {
struct tcf_idrinfo *idrinfo;
u32 tcfa_index;
- int tcfa_refcnt;
- int tcfa_bindcnt;
+ refcount_t tcfa_refcnt;
+ atomic_t tcfa_bindcnt;
u32 tcfa_capab;
int tcfa_action;
struct tcf_t tcfa_tm;
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index ce829a3..96b0878 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -105,14 +105,26 @@ int __tcf_idr_release(struct tc_action *p, bool bind, bool strict)
ASSERT_RTNL();
+ /* Release with strict==1 and bind==0 is only called through act API
+ * interface (classifiers always bind). Only case when action with
+ * positive reference count and zero bind count can exist is when it was
+ * also created with act API (unbinding last classifier will destroy the
+ * action if it was created by classifier). So only case when bind count
+ * can be changed after initial check is when unbound action is
+ * destroyed by act API while classifier binds to action with same id
+ * concurrently. This result either creation of new action(same behavior
+ * as before), or reusing existing action if concurrent process
+ * increments reference count before action is deleted. Both scenarios
+ * are acceptable.
+ */
if (p) {
if (bind)
- p->tcfa_bindcnt--;
- else if (strict && p->tcfa_bindcnt > 0)
+ atomic_dec(&p->tcfa_bindcnt);
+ else if (strict && atomic_read(&p->tcfa_bindcnt) > 0)
return -EPERM;
- p->tcfa_refcnt--;
- if (p->tcfa_bindcnt <= 0 && p->tcfa_refcnt <= 0) {
+ if (atomic_read(&p->tcfa_bindcnt) <= 0 &&
+ refcount_dec_and_test(&p->tcfa_refcnt)) {
if (p->ops->cleanup)
p->ops->cleanup(p);
tcf_idr_remove(p->idrinfo, p);
@@ -304,8 +316,8 @@ bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
if (index && p) {
if (bind)
- p->tcfa_bindcnt++;
- p->tcfa_refcnt++;
+ atomic_inc(&p->tcfa_bindcnt);
+ refcount_inc(&p->tcfa_refcnt);
*a = p;
return true;
}
@@ -324,9 +336,9 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
if (unlikely(!p))
return -ENOMEM;
- p->tcfa_refcnt = 1;
+ refcount_set(&p->tcfa_refcnt, 1);
if (bind)
- p->tcfa_bindcnt = 1;
+ atomic_set(&p->tcfa_bindcnt, 1);
if (cpustats) {
p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
@@ -782,7 +794,7 @@ static void cleanup_a(struct list_head *actions, int ovr)
return;
list_for_each_entry(a, actions, list)
- a->tcfa_refcnt--;
+ refcount_dec(&a->tcfa_refcnt);
}
int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
@@ -810,7 +822,7 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
act->order = i;
sz += tcf_action_fill_size(act);
if (ovr)
- act->tcfa_refcnt++;
+ refcount_inc(&act->tcfa_refcnt);
list_add_tail(&act->list, actions);
}
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 18089c0..15a2a53 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -141,8 +141,8 @@ static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *act,
struct tcf_bpf *prog = to_bpf(act);
struct tc_act_bpf opt = {
.index = prog->tcf_index,
- .refcnt = prog->tcf_refcnt - ref,
- .bindcnt = prog->tcf_bindcnt - bind,
+ .refcnt = refcount_read(&prog->tcf_refcnt) - ref,
+ .bindcnt = atomic_read(&prog->tcf_bindcnt) - bind,
.action = prog->tcf_action,
};
struct tcf_t tm;
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index e4b880f..1888650 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -154,8 +154,8 @@ static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a,
struct tc_connmark opt = {
.index = ci->tcf_index,
- .refcnt = ci->tcf_refcnt - ref,
- .bindcnt = ci->tcf_bindcnt - bind,
+ .refcnt = refcount_read(&ci->tcf_refcnt) - ref,
+ .bindcnt = atomic_read(&ci->tcf_bindcnt) - bind,
.action = ci->tcf_action,
.zone = ci->zone,
};
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 7e28b2c..0572682 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -597,8 +597,8 @@ static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind,
struct tcf_csum_params *params;
struct tc_csum opt = {
.index = p->tcf_index,
- .refcnt = p->tcf_refcnt - ref,
- .bindcnt = p->tcf_bindcnt - bind,
+ .refcnt = refcount_read(&p->tcf_refcnt) - ref,
+ .bindcnt = atomic_read(&p->tcf_bindcnt) - bind,
};
struct tcf_t t;
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 4dc4f15..ca83deb 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -169,8 +169,8 @@ static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a,
struct tcf_gact *gact = to_gact(a);
struct tc_gact opt = {
.index = gact->tcf_index,
- .refcnt = gact->tcf_refcnt - ref,
- .bindcnt = gact->tcf_bindcnt - bind,
+ .refcnt = refcount_read(&gact->tcf_refcnt) - ref,
+ .bindcnt = atomic_read(&gact->tcf_bindcnt) - bind,
.action = gact->tcf_action,
};
struct tcf_t t;
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index a5994cf..689f63e 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -598,8 +598,8 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind,
struct tcf_ife_params *p = rtnl_dereference(ife->params);
struct tc_ife opt = {
.index = ife->tcf_index,
- .refcnt = ife->tcf_refcnt - ref,
- .bindcnt = ife->tcf_bindcnt - bind,
+ .refcnt = refcount_read(&ife->tcf_refcnt) - ref,
+ .bindcnt = atomic_read(&ife->tcf_bindcnt) - bind,
.action = ife->tcf_action,
.flags = p->flags,
};
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 14c312d..7bce88d 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -280,8 +280,8 @@ static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind,
if (unlikely(!t))
goto nla_put_failure;
- c.bindcnt = ipt->tcf_bindcnt - bind;
- c.refcnt = ipt->tcf_refcnt - ref;
+ c.bindcnt = atomic_read(&ipt->tcf_bindcnt) - bind;
+ c.refcnt = refcount_read(&ipt->tcf_refcnt) - ref;
strcpy(t->u.user.name, ipt->tcfi_t->u.kernel.target->name);
if (nla_put(skb, TCA_IPT_TARG, ipt->tcfi_t->u.user.target_size, t) ||
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index fd34015..82a8bdd 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -250,8 +250,8 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind,
struct tc_mirred opt = {
.index = m->tcf_index,
.action = m->tcf_action,
- .refcnt = m->tcf_refcnt - ref,
- .bindcnt = m->tcf_bindcnt - bind,
+ .refcnt = refcount_read(&m->tcf_refcnt) - ref,
+ .bindcnt = atomic_read(&m->tcf_bindcnt) - bind,
.eaction = m->tcfm_eaction,
.ifindex = dev ? dev->ifindex : 0,
};
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 4b5848b..457c2ae 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -257,8 +257,8 @@ static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a,
.index = p->tcf_index,
.action = p->tcf_action,
- .refcnt = p->tcf_refcnt - ref,
- .bindcnt = p->tcf_bindcnt - bind,
+ .refcnt = refcount_read(&p->tcf_refcnt) - ref,
+ .bindcnt = atomic_read(&p->tcf_bindcnt) - bind,
};
struct tcf_t t;
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 8a925c7..0102b29 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -391,8 +391,8 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
opt->nkeys = p->tcfp_nkeys;
opt->flags = p->tcfp_flags;
opt->action = p->tcf_action;
- opt->refcnt = p->tcf_refcnt - ref;
- opt->bindcnt = p->tcf_bindcnt - bind;
+ opt->refcnt = refcount_read(&p->tcf_refcnt) - ref;
+ opt->bindcnt = atomic_read(&p->tcf_bindcnt) - bind;
if (p->tcfp_keys_ex) {
tcf_pedit_key_ex_dump(skb, p->tcfp_keys_ex, p->tcfp_nkeys);
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 4e72bc2..a789b80 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -274,8 +274,8 @@ static int tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a,
.action = police->tcf_action,
.mtu = police->tcfp_mtu,
.burst = PSCHED_NS2TICKS(police->tcfp_burst),
- .refcnt = police->tcf_refcnt - ref,
- .bindcnt = police->tcf_bindcnt - bind,
+ .refcnt = refcount_read(&police->tcf_refcnt) - ref,
+ .bindcnt = atomic_read(&police->tcf_bindcnt) - bind,
};
struct tcf_t t;
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 5db3584..4a46978 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -173,8 +173,8 @@ static int tcf_sample_dump(struct sk_buff *skb, struct tc_action *a,
struct tc_sample opt = {
.index = s->tcf_index,
.action = s->tcf_action,
- .refcnt = s->tcf_refcnt - ref,
- .bindcnt = s->tcf_bindcnt - bind,
+ .refcnt = refcount_read(&s->tcf_refcnt) - ref,
+ .bindcnt = atomic_read(&s->tcf_bindcnt) - bind,
};
struct tcf_t t;
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 9618b4a8..95d5985 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -148,8 +148,8 @@ static int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
struct tcf_defact *d = to_defact(a);
struct tc_defact opt = {
.index = d->tcf_index,
- .refcnt = d->tcf_refcnt - ref,
- .bindcnt = d->tcf_bindcnt - bind,
+ .refcnt = refcount_read(&d->tcf_refcnt) - ref,
+ .bindcnt = atomic_read(&d->tcf_bindcnt) - bind,
.action = d->tcf_action,
};
struct tcf_t t;
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index ddf69fc..adf0a72 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -172,8 +172,8 @@ static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
struct tcf_skbedit *d = to_skbedit(a);
struct tc_skbedit opt = {
.index = d->tcf_index,
- .refcnt = d->tcf_refcnt - ref,
- .bindcnt = d->tcf_bindcnt - bind,
+ .refcnt = refcount_read(&d->tcf_refcnt) - ref,
+ .bindcnt = atomic_read(&d->tcf_bindcnt) - bind,
.action = d->tcf_action,
};
struct tcf_t t;
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index bbcbdce..a0d9abb 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -202,8 +202,8 @@ static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a,
struct tcf_skbmod_params *p = rtnl_dereference(d->skbmod_p);
struct tc_skbmod opt = {
.index = d->tcf_index,
- .refcnt = d->tcf_refcnt - ref,
- .bindcnt = d->tcf_bindcnt - bind,
+ .refcnt = refcount_read(&d->tcf_refcnt) - ref,
+ .bindcnt = atomic_read(&d->tcf_bindcnt) - bind,
.action = d->tcf_action,
};
struct tcf_t t;
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 626dac8..c6e5069 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -252,8 +252,8 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a,
struct tcf_tunnel_key_params *params;
struct tc_tunnel_key opt = {
.index = t->tcf_index,
- .refcnt = t->tcf_refcnt - ref,
- .bindcnt = t->tcf_bindcnt - bind,
+ .refcnt = refcount_read(&t->tcf_refcnt) - ref,
+ .bindcnt = atomic_read(&t->tcf_bindcnt) - bind,
};
struct tcf_t tm;
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 8536046..8dda784 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -237,8 +237,8 @@ static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a,
struct tcf_vlan_params *p = rtnl_dereference(v->vlan_p);
struct tc_vlan opt = {
.index = v->tcf_index,
- .refcnt = v->tcf_refcnt - ref,
- .bindcnt = v->tcf_bindcnt - bind,
+ .refcnt = refcount_read(&v->tcf_refcnt) - ref,
+ .bindcnt = atomic_read(&v->tcf_bindcnt) - bind,
.action = v->tcf_action,
.v_action = p->tcfv_action,
};
--
2.7.5
Without rtnl lock protection it is no longer safe to use pointer to tc
action without holding reference to it. (it can be destroyed concurrently)
Remove unsafe action idr lookup function. Instead of it, implement safe tcf
idr check function that atomically looks up action in idr and increments
its reference and bind counters.
Implement both action search and check using new safe function.
Signed-off-by: Vlad Buslov <[email protected]>
---
net/sched/act_api.c | 38 ++++++++++++++++----------------------
1 file changed, 16 insertions(+), 22 deletions(-)
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 1331beb..9459cce 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -284,44 +284,38 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
}
EXPORT_SYMBOL(tcf_generic_walker);
-static struct tc_action *tcf_idr_lookup(u32 index, struct tcf_idrinfo *idrinfo)
+bool __tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
+ int bind)
{
- struct tc_action *p = NULL;
+ struct tcf_idrinfo *idrinfo = tn->idrinfo;
+ struct tc_action *p;
spin_lock_bh(&idrinfo->lock);
p = idr_find(&idrinfo->action_idr, index);
+ if (p) {
+ refcount_inc(&p->tcfa_refcnt);
+ if (bind)
+ atomic_inc(&p->tcfa_bindcnt);
+ }
spin_unlock_bh(&idrinfo->lock);
- return p;
+ if (p) {
+ *a = p;
+ return true;
+ }
+ return false;
}
int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index)
{
- struct tcf_idrinfo *idrinfo = tn->idrinfo;
- struct tc_action *p = tcf_idr_lookup(index, idrinfo);
-
- if (p) {
- *a = p;
- return 1;
- }
- return 0;
+ return __tcf_idr_check(tn, index, a, 0);
}
EXPORT_SYMBOL(tcf_idr_search);
bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
int bind)
{
- struct tcf_idrinfo *idrinfo = tn->idrinfo;
- struct tc_action *p = tcf_idr_lookup(index, idrinfo);
-
- if (index && p) {
- if (bind)
- atomic_inc(&p->tcfa_bindcnt);
- refcount_inc(&p->tcfa_refcnt);
- *a = p;
- return true;
- }
- return false;
+ return __tcf_idr_check(tn, index, a, bind);
}
EXPORT_SYMBOL(tcf_idr_check);
--
2.7.5
Change action API to assume that action init function always takes
reference to action, even when overwriting existing action. This is
necessary because action API continues to use action pointer after init
function is done. At this point action becomes accessible for concurrent
modifications so user must always hold reference to it.
Implement helper put list function to atomically release list of actions
after action API init code is done using them.
Signed-off-by: Vlad Buslov <[email protected]>
---
net/sched/act_api.c | 38 +++++++++++++++++---------------------
1 file changed, 17 insertions(+), 21 deletions(-)
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index d324a4c..3f02cd1 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -592,6 +592,18 @@ static int tcf_action_put(struct tc_action *p)
return __tcf_action_put(p, false);
}
+static void tcf_action_put_lst(struct list_head *actions)
+{
+ struct tc_action *a, *tmp;
+
+ list_for_each_entry_safe(a, tmp, actions, list) {
+ const struct tc_action_ops *ops = a->ops;
+
+ if (tcf_action_put(a))
+ module_put(ops->owner);
+ }
+}
+
int
tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
{
@@ -799,17 +811,6 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
return ERR_PTR(err);
}
-static void cleanup_a(struct list_head *actions, int ovr)
-{
- struct tc_action *a;
-
- if (!ovr)
- return;
-
- list_for_each_entry(a, actions, list)
- refcount_dec(&a->tcfa_refcnt);
-}
-
int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
struct nlattr *est, char *name, int ovr, int bind,
struct list_head *actions, size_t *attr_size, bool unlocked,
@@ -834,17 +835,10 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
}
act->order = i;
sz += tcf_action_fill_size(act);
- if (ovr)
- refcount_inc(&act->tcfa_refcnt);
list_add_tail(&act->list, actions);
}
*attr_size = tcf_action_full_attrs_size(sz);
-
- /* Remove the temp refcnt which was necessary to protect against
- * destroying an existing action which was being replaced
- */
- cleanup_a(actions, ovr);
return 0;
err:
@@ -1196,8 +1190,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
return ret;
}
err:
- if (event != RTM_GETACTION)
- tcf_action_destroy(&actions, 0);
+ tcf_action_put_lst(&actions);
return ret;
}
@@ -1239,8 +1232,11 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
&attr_size, false, extack);
if (ret)
return ret;
+ ret = tcf_add_notify(net, n, &actions, portid, attr_size, extack);
+ if (ovr)
+ tcf_action_put_lst(&actions);
- return tcf_add_notify(net, n, &actions, portid, attr_size, extack);
+ return ret;
}
static u32 tcaa_root_flags_allowed = TCA_FLAG_LARGE_DUMP_ON;
--
2.7.5
Extend action ops with 'delete' function. Each action type to implement its
own delete function that doesn't depend on rtnl lock.
Signed-off-by: Vlad Buslov <[email protected]>
---
include/net/act_api.h | 1 +
1 file changed, 1 insertion(+)
diff --git a/include/net/act_api.h b/include/net/act_api.h
index e634014..73175a3 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -100,6 +100,7 @@ struct tc_action_ops {
void (*stats_update)(struct tc_action *, u64, u32, u64);
size_t (*get_fill_size)(const struct tc_action *act);
struct net_device *(*get_dev)(const struct tc_action *a);
+ int (*delete)(struct net *net, u32 index);
};
struct tc_action_net {
--
2.7.5
Mon, May 14, 2018 at 04:27:02PM CEST, [email protected] wrote:
>Implement functions to atomically update and free action cookie
>using rcu mechanism.
>
>Signed-off-by: Vlad Buslov <[email protected]>
Signed-off-by: Jiri Pirko <[email protected]>
Mon, May 14, 2018 at 04:27:03PM CEST, [email protected] wrote:
>Change type of action reference counter to refcount_t.
>
>Change type of action bind counter to atomic_t.
>This type is used to allow decrementing bind counter without testing
>for 0 result.
>
>Signed-off-by: Vlad Buslov <[email protected]>
Signed-off-by: Jiri Pirko <[email protected]>
Mon, May 14, 2018 at 04:27:04PM CEST, [email protected] wrote:
>Extend action ops with 'delete' function. Each action type to implement its
>own delete function that doesn't depend on rtnl lock.
>
>Signed-off-by: Vlad Buslov <[email protected]>
>---
> include/net/act_api.h | 1 +
> 1 file changed, 1 insertion(+)
>
>diff --git a/include/net/act_api.h b/include/net/act_api.h
>index e634014..73175a3 100644
>--- a/include/net/act_api.h
>+++ b/include/net/act_api.h
>@@ -100,6 +100,7 @@ struct tc_action_ops {
> void (*stats_update)(struct tc_action *, u64, u32, u64);
> size_t (*get_fill_size)(const struct tc_action *act);
> struct net_device *(*get_dev)(const struct tc_action *a);
>+ int (*delete)(struct net *net, u32 index);
Probably better to squash this to patch 14.
Mon, May 14, 2018 at 04:27:05PM CEST, [email protected] wrote:
>Add additional 'unlocked' argument to act API init functions.
>Argument is true when rtnl lock is not taken and false otherwise.
>It is required to implement actions that need to release rtnl lock before
>loading kernel module and reacquire if afterwards.
>
>Signed-off-by: Vlad Buslov <[email protected]>
[...]
>@@ -721,9 +722,11 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
> a_o = tc_lookup_action_n(act_name);
> if (a_o == NULL) {
> #ifdef CONFIG_MODULES
>- rtnl_unlock();
>+ if (!unlocked)
>+ rtnl_unlock();
> request_module("act_%s", act_name);
>- rtnl_lock();
>+ if (!unlocked)
>+ rtnl_lock();
Although I don't like this conditional locking scheme, I see no other
way to solve this :/ But I think would be better perhaps to rename
"unlocked" to something like "rtnl_held".
[...]
Mon, May 14, 2018 at 04:27:06PM CEST, [email protected] wrote:
>Without rtnl lock protection it is no longer safe to use pointer to tc
>action without holding reference to it. (it can be destroyed concurrently)
>
>Remove unsafe action idr lookup function. Instead of it, implement safe tcf
>idr check function that atomically looks up action in idr and increments
>its reference and bind counters.
>
>Implement both action search and check using new safe function.
>
>Signed-off-by: Vlad Buslov <[email protected]>
>---
> net/sched/act_api.c | 38 ++++++++++++++++----------------------
> 1 file changed, 16 insertions(+), 22 deletions(-)
>
>diff --git a/net/sched/act_api.c b/net/sched/act_api.c
>index 1331beb..9459cce 100644
>--- a/net/sched/act_api.c
>+++ b/net/sched/act_api.c
>@@ -284,44 +284,38 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
> }
> EXPORT_SYMBOL(tcf_generic_walker);
>
>-static struct tc_action *tcf_idr_lookup(u32 index, struct tcf_idrinfo *idrinfo)
>+bool __tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
>+ int bind)
> {
>- struct tc_action *p = NULL;
>+ struct tcf_idrinfo *idrinfo = tn->idrinfo;
>+ struct tc_action *p;
>
> spin_lock_bh(&idrinfo->lock);
Why "_bh" variant is necessary here?
> p = idr_find(&idrinfo->action_idr, index);
>+ if (p) {
>+ refcount_inc(&p->tcfa_refcnt);
>+ if (bind)
>+ atomic_inc(&p->tcfa_bindcnt);
>+ }
> spin_unlock_bh(&idrinfo->lock);
[...]
Mon, May 14, 2018 at 04:27:07PM CEST, [email protected] wrote:
>Implement helper function to delete action using new action ops delete
>function implemented by each action for lockless execution.
Reading this sentense for 4 times. I still don't understand what you say :(
>
>Implement action put function that releases reference to action and frees
When you write "Implement action put function" it is cryptic. Just
say "Implement function __tcf_action_put()".
>it if necessary. Refactor action deletion code to use new put function and
>not to rely on rtnl lock. Remove rtnl lock assertions that are no longer
>needed.
[...]
Mon, May 14, 2018 at 05:12:22PM CEST, [email protected] wrote:
>Mon, May 14, 2018 at 04:27:04PM CEST, [email protected] wrote:
>>Extend action ops with 'delete' function. Each action type to implement its
>>own delete function that doesn't depend on rtnl lock.
>>
>>Signed-off-by: Vlad Buslov <[email protected]>
>>---
>> include/net/act_api.h | 1 +
>> 1 file changed, 1 insertion(+)
>>
>>diff --git a/include/net/act_api.h b/include/net/act_api.h
>>index e634014..73175a3 100644
>>--- a/include/net/act_api.h
>>+++ b/include/net/act_api.h
>>@@ -100,6 +100,7 @@ struct tc_action_ops {
>> void (*stats_update)(struct tc_action *, u64, u32, u64);
>> size_t (*get_fill_size)(const struct tc_action *act);
>> struct net_device *(*get_dev)(const struct tc_action *a);
>>+ int (*delete)(struct net *net, u32 index);
>
>Probably better to squash this to patch 14.
Oh, I see you call it in patch 6. Fine.
Mon, May 14, 2018 at 04:27:07PM CEST, [email protected] wrote:
[...]
>+static int tcf_action_del_1(struct net *net, char *kind, u32 index,
>+ struct netlink_ext_ack *extack)
>+{
>+ const struct tc_action_ops *ops;
>+ int err = -EINVAL;
>+
>+ ops = tc_lookup_action_n(kind);
>+ if (!ops) {
How this can happen? Apparently you hold reference to the module since
you put it couple lines below. In that case, I don't really understand
why you need to lookup and just don't use "ops" pointer you have in
tcf_action_delete().
>+ NL_SET_ERR_MSG(extack, "Specified TC action not found");
>+ goto err_out;
>+ }
>+
>+ if (ops->delete)
>+ err = ops->delete(net, index);
>+
>+ module_put(ops->owner);
>+err_out:
>+ return err;
>+}
>+
> static int tca_action_flush(struct net *net, struct nlattr *nla,
> struct nlmsghdr *n, u32 portid,
> struct netlink_ext_ack *extack)
>@@ -1052,6 +1088,36 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
> return err;
> }
>
>+static int tcf_action_delete(struct net *net, struct list_head *actions,
>+ struct netlink_ext_ack *extack)
>+{
>+ int ret;
>+ struct tc_action *a, *tmp;
>+ char kind[IFNAMSIZ];
>+ u32 act_index;
>+
>+ list_for_each_entry_safe(a, tmp, actions, list) {
>+ const struct tc_action_ops *ops = a->ops;
>+
>+ /* Actions can be deleted concurrently
>+ * so we must save their type and id to search again
>+ * after reference is released.
>+ */
>+ strncpy(kind, a->ops->kind, sizeof(kind) - 1);
>+ act_index = a->tcfa_index;
>+
>+ list_del(&a->list);
>+ if (tcf_action_put(a))
>+ module_put(ops->owner);
>+
>+ /* now do the delete */
>+ ret = tcf_action_del_1(net, kind, act_index, extack);
>+ if (ret < 0)
>+ return ret;
>+ }
>+ return 0;
>+}
>+
[...]
On 14/05/18 10:27 AM, Vlad Buslov wrote:
> Currently, all netlink protocol handlers for updating rules, actions and
> qdiscs are protected with single global rtnl lock which removes any
> possibility for parallelism. This patch set is a first step to remove
> rtnl lock dependency from TC rules update path. It updates act API to
> use atomic operations, rcu and spinlocks for fine-grained locking. It
> also extend API with functions that are needed to update existing
> actions for parallel execution.
>
> Outline of changes:
> - Change tc action to use atomic reference and bind counters, rcu
> mechanism for cookie update.
> - Extend action ops API with 'delete' function and 'unlocked' flag.
> - Change action API to work with actions in lockless manner based on
> primitives implemented in previous patches.
> - Extend action API with new functions necessary to implement unlocked
> actions.
Please run all the tdc tests with these changes. This area has almost
good test coverage at this point. If you need help just ping me.
cheers,
jamal
On Mon 14 May 2018 at 16:23, Jiri Pirko <[email protected]> wrote:
> Mon, May 14, 2018 at 04:27:06PM CEST, [email protected] wrote:
>>Without rtnl lock protection it is no longer safe to use pointer to tc
>>action without holding reference to it. (it can be destroyed concurrently)
>>
>>Remove unsafe action idr lookup function. Instead of it, implement safe tcf
>>idr check function that atomically looks up action in idr and increments
>>its reference and bind counters.
>>
>>Implement both action search and check using new safe function.
>>
>>Signed-off-by: Vlad Buslov <[email protected]>
>>---
>> net/sched/act_api.c | 38 ++++++++++++++++----------------------
>> 1 file changed, 16 insertions(+), 22 deletions(-)
>>
>>diff --git a/net/sched/act_api.c b/net/sched/act_api.c
>>index 1331beb..9459cce 100644
>>--- a/net/sched/act_api.c
>>+++ b/net/sched/act_api.c
>>@@ -284,44 +284,38 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
>> }
>> EXPORT_SYMBOL(tcf_generic_walker);
>>
>>-static struct tc_action *tcf_idr_lookup(u32 index, struct tcf_idrinfo *idrinfo)
>>+bool __tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
>>+ int bind)
>> {
>>- struct tc_action *p = NULL;
>>+ struct tcf_idrinfo *idrinfo = tn->idrinfo;
>>+ struct tc_action *p;
>>
>> spin_lock_bh(&idrinfo->lock);
>
> Why "_bh" variant is necessary here?
It is not my code.
>
>> p = idr_find(&idrinfo->action_idr, index);
>>+ if (p) {
>>+ refcount_inc(&p->tcfa_refcnt);
>>+ if (bind)
>>+ atomic_inc(&p->tcfa_bindcnt);
>>+ }
>> spin_unlock_bh(&idrinfo->lock);
>
> [...]
On Mon 14 May 2018 at 16:47, Jiri Pirko <[email protected]> wrote:
> Mon, May 14, 2018 at 04:27:07PM CEST, [email protected] wrote:
>
> [...]
>
>
>>+static int tcf_action_del_1(struct net *net, char *kind, u32 index,
>>+ struct netlink_ext_ack *extack)
>>+{
>>+ const struct tc_action_ops *ops;
>>+ int err = -EINVAL;
>>+
>>+ ops = tc_lookup_action_n(kind);
>>+ if (!ops) {
>
> How this can happen? Apparently you hold reference to the module since
> you put it couple lines below. In that case, I don't really understand
> why you need to lookup and just don't use "ops" pointer you have in
> tcf_action_delete().
The problem is that I cant really delete action while holding reference
to it. I can try to decrement reference twice, however that might result
race condition if another task tries to delete that action at the same
time.
Imagine situation:
1. Action is in actions idr, refcount==1
2. Task tries to delete action, takes reference while working with it,
refcount==2
3. Another task tries to delete same action, takes reference,
refcount==3
4. First task decrements refcount twice, refcount==1
5. At the same time second task decrements refcount twice, refcount==-1
My solution is to save action index, but release the reference. Then try
to lookup action again and delete it if it is still in idr. (was not
concurrently deleted)
Now same potential race condition with this patch:
1. Action is in actions idr, refcount==1
2. Task tries to delete action, takes reference while working with it,
refcount==2
3. Another task tries to delete same action, takes reference,
refcount==3
4. First task releases reference and deletes actions from idr, which
results another refcount decrement, refcount==1
5. At the same time second task releases reference to action,
refcount==0, action is deleted
6. When task tries to lookup-delete action from idr by index, action is
not found. This case is handled correctly by code and no negative
overflow of refcount happens.
>
>
>>+ NL_SET_ERR_MSG(extack, "Specified TC action not found");
>>+ goto err_out;
>>+ }
>>+
>>+ if (ops->delete)
>>+ err = ops->delete(net, index);
>>+
>>+ module_put(ops->owner);
>>+err_out:
>>+ return err;
>>+}
>>+
>> static int tca_action_flush(struct net *net, struct nlattr *nla,
>> struct nlmsghdr *n, u32 portid,
>> struct netlink_ext_ack *extack)
>>@@ -1052,6 +1088,36 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
>> return err;
>> }
>>
>>+static int tcf_action_delete(struct net *net, struct list_head *actions,
>>+ struct netlink_ext_ack *extack)
>>+{
>>+ int ret;
>>+ struct tc_action *a, *tmp;
>>+ char kind[IFNAMSIZ];
>>+ u32 act_index;
>>+
>>+ list_for_each_entry_safe(a, tmp, actions, list) {
>>+ const struct tc_action_ops *ops = a->ops;
>>+
>>+ /* Actions can be deleted concurrently
>>+ * so we must save their type and id to search again
>>+ * after reference is released.
>>+ */
>>+ strncpy(kind, a->ops->kind, sizeof(kind) - 1);
>>+ act_index = a->tcfa_index;
>>+
>>+ list_del(&a->list);
>>+ if (tcf_action_put(a))
>>+ module_put(ops->owner);
>>+
>>+ /* now do the delete */
>>+ ret = tcf_action_del_1(net, kind, act_index, extack);
>>+ if (ret < 0)
>>+ return ret;
>>+ }
>>+ return 0;
>>+}
>>+
>
> [...]
On Mon 14 May 2018 at 18:03, Jamal Hadi Salim <[email protected]> wrote:
> On 14/05/18 10:27 AM, Vlad Buslov wrote:
>> Currently, all netlink protocol handlers for updating rules, actions and
>> qdiscs are protected with single global rtnl lock which removes any
>> possibility for parallelism. This patch set is a first step to remove
>> rtnl lock dependency from TC rules update path. It updates act API to
>> use atomic operations, rcu and spinlocks for fine-grained locking. It
>> also extend API with functions that are needed to update existing
>> actions for parallel execution.
>>
>> Outline of changes:
>> - Change tc action to use atomic reference and bind counters, rcu
>> mechanism for cookie update.
>> - Extend action ops API with 'delete' function and 'unlocked' flag.
>> - Change action API to work with actions in lockless manner based on
>> primitives implemented in previous patches.
>> - Extend action API with new functions necessary to implement unlocked
>> actions.
>
> Please run all the tdc tests with these changes. This area has almost
> good test coverage at this point. If you need help just ping me.
>
> cheers,
> jamal
Hello Jamal,
I'm trying to run tdc, but keep getting following error even on clean
branch without my patches:
Test d190: Add police action with maximum index
-----> teardown stage *** Could not execute: "$TC actions flush action mirred"
-----> teardown stage *** Error message: "RTNETLINK answers: Invalid argument
We have an error flushing
"
-----> teardown stage *** Aborting test run.
<_io.BufferedReader name=3> *** stdout ***
<_io.BufferedReader name=5> *** stderr ***
"-----> teardown stage" did not complete successfully
Exception <class '__main__.PluginMgrTestFail'> ('teardown', 'RTNETLINK answers: Invalid argument\nWe have an error talking to the kernel\nCommand "action" is unknown, try "tc actions help".\n', '"-----> teardown stage" did not complete suc
cessfully') (caught in test_runner, running test 54 d190 Add police action with maximum index stage teardown)
---------------
traceback
File "./tdc.py", line 293, in test_runner
tap += run_one_test(pm, args, index, tidx)
File "./tdc.py", line 229, in run_one_test
prepare_env(args, pm, 'teardown', '-----> teardown stage', tidx['teardown'], procout)
File "./tdc.py", line 177, in prepare_env
'"{}" did not complete successfully'.format(prefix))
---------------
accumulated output for this test:
RTNETLINK answers: Invalid argument
We have an error talking to the kernel
Command "action" is unknown, try "tc actions help".
---------------
I've updated both iproute2 and kernel to latest master and net-next
respectively, but error persists. Am I missing something? Maybe I should
you some specific "blessed" version of TC?
Thanks,
Vlad
Hi Vlad,
Thank you for the patch! Perhaps something to improve:
[auto build test WARNING on net/master]
[also build test WARNING on v4.17-rc5 next-20180514]
[cannot apply to net-next/master]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
url: https://github.com/0day-ci/linux/commits/Vlad-Buslov/Modify-action-API-for-implementing-lockless-actions/20180515-025420
reproduce:
# apt-get install sparse
make ARCH=x86_64 allmodconfig
make C=1 CF=-D__CHECK_ENDIAN__
sparse warnings: (new ones prefixed by >>)
>> net/sched/act_api.c:71:15: sparse: incorrect type in initializer (different address spaces) @@ expected struct tc_cookie [noderef] <asn:4>*__ret @@ got [noderef] <asn:4>*__ret @@
net/sched/act_api.c:71:15: expected struct tc_cookie [noderef] <asn:4>*__ret
net/sched/act_api.c:71:15: got struct tc_cookie *new_cookie
>> net/sched/act_api.c:71:13: sparse: incorrect type in assignment (different address spaces) @@ expected struct tc_cookie *old @@ got struct tc_cookie [noderef] <struct tc_cookie *old @@
net/sched/act_api.c:71:13: expected struct tc_cookie *old
net/sched/act_api.c:71:13: got struct tc_cookie [noderef] <asn:4>*[assigned] __ret
>> net/sched/act_api.c:132:48: sparse: dereference of noderef expression
vim +71 net/sched/act_api.c
65
66 static void tcf_set_action_cookie(struct tc_cookie __rcu **old_cookie,
67 struct tc_cookie *new_cookie)
68 {
69 struct tc_cookie *old;
70
> 71 old = xchg(old_cookie, new_cookie);
72 if (old)
73 call_rcu(&old->rcu, tcf_free_cookie_rcu);
74 }
75
76 /* XXX: For standalone actions, we don't need a RCU grace period either, because
77 * actions are always connected to filters and filters are already destroyed in
78 * RCU callbacks, so after a RCU grace period actions are already disconnected
79 * from filters. Readers later can not find us.
80 */
81 static void free_tcf(struct tc_action *p)
82 {
83 free_percpu(p->cpu_bstats);
84 free_percpu(p->cpu_qstats);
85
86 tcf_set_action_cookie(&p->act_cookie, NULL);
87 if (p->goto_chain)
88 tcf_action_goto_chain_fini(p);
89
90 kfree(p);
91 }
92
93 static void tcf_idr_remove(struct tcf_idrinfo *idrinfo, struct tc_action *p)
94 {
95 spin_lock_bh(&idrinfo->lock);
96 idr_remove(&idrinfo->action_idr, p->tcfa_index);
97 spin_unlock_bh(&idrinfo->lock);
98 gen_kill_estimator(&p->tcfa_rate_est);
99 free_tcf(p);
100 }
101
102 int __tcf_idr_release(struct tc_action *p, bool bind, bool strict)
103 {
104 int ret = 0;
105
106 ASSERT_RTNL();
107
108 if (p) {
109 if (bind)
110 p->tcfa_bindcnt--;
111 else if (strict && p->tcfa_bindcnt > 0)
112 return -EPERM;
113
114 p->tcfa_refcnt--;
115 if (p->tcfa_bindcnt <= 0 && p->tcfa_refcnt <= 0) {
116 if (p->ops->cleanup)
117 p->ops->cleanup(p);
118 tcf_idr_remove(p->idrinfo, p);
119 ret = ACT_P_DELETED;
120 }
121 }
122
123 return ret;
124 }
125 EXPORT_SYMBOL(__tcf_idr_release);
126
127 static size_t tcf_action_shared_attrs_size(const struct tc_action *act)
128 {
129 u32 cookie_len = 0;
130
131 if (act->act_cookie)
> 132 cookie_len = nla_total_size(act->act_cookie->len);
133
134 return nla_total_size(0) /* action number nested */
135 + nla_total_size(IFNAMSIZ) /* TCA_ACT_KIND */
136 + cookie_len /* TCA_ACT_COOKIE */
137 + nla_total_size(0) /* TCA_ACT_STATS nested */
138 /* TCA_STATS_BASIC */
139 + nla_total_size_64bit(sizeof(struct gnet_stats_basic))
140 /* TCA_STATS_QUEUE */
141 + nla_total_size_64bit(sizeof(struct gnet_stats_queue))
142 + nla_total_size(0) /* TCA_OPTIONS nested */
143 + nla_total_size(sizeof(struct tcf_t)); /* TCA_GACT_TM */
144 }
145
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
Fixes: 446adedb5339 ("net: sched: always take reference to action")
Signed-off-by: Fengguang Wu <[email protected]>
---
act_api.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 9459cce..27e80cf 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -284,8 +284,8 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
}
EXPORT_SYMBOL(tcf_generic_walker);
-bool __tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
- int bind)
+static bool __tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
+ int bind)
{
struct tcf_idrinfo *idrinfo = tn->idrinfo;
struct tc_action *p;
Hi Vlad,
Thank you for the patch! Perhaps something to improve:
[auto build test WARNING on net/master]
[also build test WARNING on v4.17-rc5 next-20180514]
[cannot apply to net-next/master]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
url: https://github.com/0day-ci/linux/commits/Vlad-Buslov/Modify-action-API-for-implementing-lockless-actions/20180515-025420
reproduce:
# apt-get install sparse
make ARCH=x86_64 allmodconfig
make C=1 CF=-D__CHECK_ENDIAN__
sparse warnings: (new ones prefixed by >>)
net/sched/act_api.c:71:15: sparse: incorrect type in initializer (different address spaces) @@ expected struct tc_cookie [noderef] <asn:4>*__ret @@ got [noderef] <asn:4>*__ret @@
net/sched/act_api.c:71:15: expected struct tc_cookie [noderef] <asn:4>*__ret
net/sched/act_api.c:71:15: got struct tc_cookie *new_cookie
net/sched/act_api.c:71:13: sparse: incorrect type in assignment (different address spaces) @@ expected struct tc_cookie *old @@ got struct tc_cookie [noderef] <struct tc_cookie *old @@
net/sched/act_api.c:71:13: expected struct tc_cookie *old
net/sched/act_api.c:71:13: got struct tc_cookie [noderef] <asn:4>*[assigned] __ret
>> net/sched/act_api.c:287:6: sparse: symbol '__tcf_idr_check' was not declared. Should it be static?
net/sched/act_api.c:144:48: sparse: dereference of noderef expression
Please review and possibly fold the followup patch.
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
Mon, May 14, 2018 at 08:03:20PM CEST, [email protected] wrote:
>On 14/05/18 10:27 AM, Vlad Buslov wrote:
>> Currently, all netlink protocol handlers for updating rules, actions and
>> qdiscs are protected with single global rtnl lock which removes any
>> possibility for parallelism. This patch set is a first step to remove
>> rtnl lock dependency from TC rules update path. It updates act API to
>> use atomic operations, rcu and spinlocks for fine-grained locking. It
>> also extend API with functions that are needed to update existing
>> actions for parallel execution.
>>
>> Outline of changes:
>> - Change tc action to use atomic reference and bind counters, rcu
>> mechanism for cookie update.
>> - Extend action ops API with 'delete' function and 'unlocked' flag.
>> - Change action API to work with actions in lockless manner based on
>> primitives implemented in previous patches.
>> - Extend action API with new functions necessary to implement unlocked
>> actions.
>
>Please run all the tdc tests with these changes. This area has almost
>good test coverage at this point. If you need help just ping me.
Oh, that reminds me. Vlad, please run also:
tools/testing/selftests/net/forwarding/tc_*
Thanks!
Mon, May 14, 2018 at 08:49:07PM CEST, [email protected] wrote:
>
>On Mon 14 May 2018 at 16:23, Jiri Pirko <[email protected]> wrote:
>> Mon, May 14, 2018 at 04:27:06PM CEST, [email protected] wrote:
>>>Without rtnl lock protection it is no longer safe to use pointer to tc
>>>action without holding reference to it. (it can be destroyed concurrently)
>>>
>>>Remove unsafe action idr lookup function. Instead of it, implement safe tcf
>>>idr check function that atomically looks up action in idr and increments
>>>its reference and bind counters.
>>>
>>>Implement both action search and check using new safe function.
>>>
>>>Signed-off-by: Vlad Buslov <[email protected]>
>>>---
>>> net/sched/act_api.c | 38 ++++++++++++++++----------------------
>>> 1 file changed, 16 insertions(+), 22 deletions(-)
>>>
>>>diff --git a/net/sched/act_api.c b/net/sched/act_api.c
>>>index 1331beb..9459cce 100644
>>>--- a/net/sched/act_api.c
>>>+++ b/net/sched/act_api.c
>>>@@ -284,44 +284,38 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
>>> }
>>> EXPORT_SYMBOL(tcf_generic_walker);
>>>
>>>-static struct tc_action *tcf_idr_lookup(u32 index, struct tcf_idrinfo *idrinfo)
>>>+bool __tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
>>>+ int bind)
>>> {
>>>- struct tc_action *p = NULL;
>>>+ struct tcf_idrinfo *idrinfo = tn->idrinfo;
>>>+ struct tc_action *p;
>>>
>>> spin_lock_bh(&idrinfo->lock);
>>
>> Why "_bh" variant is necessary here?
>
>It is not my code.
Yeah, yet still I wonder :)
Mon, May 14, 2018 at 09:07:06PM CEST, [email protected] wrote:
>
>On Mon 14 May 2018 at 16:47, Jiri Pirko <[email protected]> wrote:
>> Mon, May 14, 2018 at 04:27:07PM CEST, [email protected] wrote:
>>
>> [...]
>>
>>
>>>+static int tcf_action_del_1(struct net *net, char *kind, u32 index,
>>>+ struct netlink_ext_ack *extack)
>>>+{
>>>+ const struct tc_action_ops *ops;
>>>+ int err = -EINVAL;
>>>+
>>>+ ops = tc_lookup_action_n(kind);
>>>+ if (!ops) {
>>
>> How this can happen? Apparently you hold reference to the module since
>> you put it couple lines below. In that case, I don't really understand
>> why you need to lookup and just don't use "ops" pointer you have in
>> tcf_action_delete().
>
>The problem is that I cant really delete action while holding reference
Wait a sec. I was talking about a "module reference" (module_put())
>to it. I can try to decrement reference twice, however that might result
>race condition if another task tries to delete that action at the same
>time.
>
>Imagine situation:
>1. Action is in actions idr, refcount==1
>2. Task tries to delete action, takes reference while working with it,
>refcount==2
>3. Another task tries to delete same action, takes reference,
>refcount==3
>4. First task decrements refcount twice, refcount==1
>5. At the same time second task decrements refcount twice, refcount==-1
>
>My solution is to save action index, but release the reference. Then try
>to lookup action again and delete it if it is still in idr. (was not
>concurrently deleted)
>
>Now same potential race condition with this patch:
>1. Action is in actions idr, refcount==1
>2. Task tries to delete action, takes reference while working with it,
>refcount==2
>3. Another task tries to delete same action, takes reference,
>refcount==3
>4. First task releases reference and deletes actions from idr, which
>results another refcount decrement, refcount==1
>5. At the same time second task releases reference to action,
>refcount==0, action is deleted
>6. When task tries to lookup-delete action from idr by index, action is
>not found. This case is handled correctly by code and no negative
>overflow of refcount happens.
>
>>
>>
>>>+ NL_SET_ERR_MSG(extack, "Specified TC action not found");
>>>+ goto err_out;
>>>+ }
>>>+
>>>+ if (ops->delete)
>>>+ err = ops->delete(net, index);
>>>+
>>>+ module_put(ops->owner);
>>>+err_out:
>>>+ return err;
>>>+}
>>>+
>>> static int tca_action_flush(struct net *net, struct nlattr *nla,
>>> struct nlmsghdr *n, u32 portid,
>>> struct netlink_ext_ack *extack)
>>>@@ -1052,6 +1088,36 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
>>> return err;
>>> }
>>>
>>>+static int tcf_action_delete(struct net *net, struct list_head *actions,
>>>+ struct netlink_ext_ack *extack)
>>>+{
>>>+ int ret;
>>>+ struct tc_action *a, *tmp;
>>>+ char kind[IFNAMSIZ];
>>>+ u32 act_index;
>>>+
>>>+ list_for_each_entry_safe(a, tmp, actions, list) {
>>>+ const struct tc_action_ops *ops = a->ops;
>>>+
>>>+ /* Actions can be deleted concurrently
>>>+ * so we must save their type and id to search again
>>>+ * after reference is released.
>>>+ */
>>>+ strncpy(kind, a->ops->kind, sizeof(kind) - 1);
>>>+ act_index = a->tcfa_index;
>>>+
>>>+ list_del(&a->list);
>>>+ if (tcf_action_put(a))
>>>+ module_put(ops->owner);
>>>+
>>>+ /* now do the delete */
>>>+ ret = tcf_action_del_1(net, kind, act_index, extack);
>>>+ if (ret < 0)
>>>+ return ret;
>>>+ }
>>>+ return 0;
>>>+}
>>>+
>>
>> [...]
>
On Tue 15 May 2018 at 09:03, Jiri Pirko <[email protected]> wrote:
> Mon, May 14, 2018 at 09:07:06PM CEST, [email protected] wrote:
>>
>>On Mon 14 May 2018 at 16:47, Jiri Pirko <[email protected]> wrote:
>>> Mon, May 14, 2018 at 04:27:07PM CEST, [email protected] wrote:
>>>
>>> [...]
>>>
>>>
>>>>+static int tcf_action_del_1(struct net *net, char *kind, u32 index,
>>>>+ struct netlink_ext_ack *extack)
>>>>+{
>>>>+ const struct tc_action_ops *ops;
>>>>+ int err = -EINVAL;
>>>>+
>>>>+ ops = tc_lookup_action_n(kind);
>>>>+ if (!ops) {
>>>
>>> How this can happen? Apparently you hold reference to the module since
>>> you put it couple lines below. In that case, I don't really understand
>>> why you need to lookup and just don't use "ops" pointer you have in
>>> tcf_action_delete().
>>
>>The problem is that I cant really delete action while holding reference
>
> Wait a sec. I was talking about a "module reference" (module_put())
I misunderstood your question. Yes, I guess I can just save return value
of tcf_action_put to variable, continue using ops pointer and only
call module_put after delete.
>
>
>>to it. I can try to decrement reference twice, however that might result
>>race condition if another task tries to delete that action at the same
>>time.
>>
>>Imagine situation:
>>1. Action is in actions idr, refcount==1
>>2. Task tries to delete action, takes reference while working with it,
>>refcount==2
>>3. Another task tries to delete same action, takes reference,
>>refcount==3
>>4. First task decrements refcount twice, refcount==1
>>5. At the same time second task decrements refcount twice, refcount==-1
>>
>>My solution is to save action index, but release the reference. Then try
>>to lookup action again and delete it if it is still in idr. (was not
>>concurrently deleted)
>>
>>Now same potential race condition with this patch:
>>1. Action is in actions idr, refcount==1
>>2. Task tries to delete action, takes reference while working with it,
>>refcount==2
>>3. Another task tries to delete same action, takes reference,
>>refcount==3
>>4. First task releases reference and deletes actions from idr, which
>>results another refcount decrement, refcount==1
>>5. At the same time second task releases reference to action,
>>refcount==0, action is deleted
>>6. When task tries to lookup-delete action from idr by index, action is
>>not found. This case is handled correctly by code and no negative
>>overflow of refcount happens.
>>
>>>
>>>
>>>>+ NL_SET_ERR_MSG(extack, "Specified TC action not found");
>>>>+ goto err_out;
>>>>+ }
>>>>+
>>>>+ if (ops->delete)
>>>>+ err = ops->delete(net, index);
>>>>+
>>>>+ module_put(ops->owner);
>>>>+err_out:
>>>>+ return err;
>>>>+}
>>>>+
>>>> static int tca_action_flush(struct net *net, struct nlattr *nla,
>>>> struct nlmsghdr *n, u32 portid,
>>>> struct netlink_ext_ack *extack)
>>>>@@ -1052,6 +1088,36 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
>>>> return err;
>>>> }
>>>>
>>>>+static int tcf_action_delete(struct net *net, struct list_head *actions,
>>>>+ struct netlink_ext_ack *extack)
>>>>+{
>>>>+ int ret;
>>>>+ struct tc_action *a, *tmp;
>>>>+ char kind[IFNAMSIZ];
>>>>+ u32 act_index;
>>>>+
>>>>+ list_for_each_entry_safe(a, tmp, actions, list) {
>>>>+ const struct tc_action_ops *ops = a->ops;
>>>>+
>>>>+ /* Actions can be deleted concurrently
>>>>+ * so we must save their type and id to search again
>>>>+ * after reference is released.
>>>>+ */
>>>>+ strncpy(kind, a->ops->kind, sizeof(kind) - 1);
>>>>+ act_index = a->tcfa_index;
>>>>+
>>>>+ list_del(&a->list);
>>>>+ if (tcf_action_put(a))
>>>>+ module_put(ops->owner);
>>>>+
>>>>+ /* now do the delete */
>>>>+ ret = tcf_action_del_1(net, kind, act_index, extack);
>>>>+ if (ret < 0)
>>>>+ return ret;
>>>>+ }
>>>>+ return 0;
>>>>+}
>>>>+
>>>
>>> [...]
>>
Mon, May 14, 2018 at 04:27:08PM CEST, [email protected] wrote:
>Change action API to assume that action init function always takes
>reference to action, even when overwriting existing action. This is
>necessary because action API continues to use action pointer after init
>function is done. At this point action becomes accessible for concurrent
>modifications so user must always hold reference to it.
>
>Implement helper put list function to atomically release list of actions
>after action API init code is done using them.
>
>Signed-off-by: Vlad Buslov <[email protected]>
>---
> net/sched/act_api.c | 38 +++++++++++++++++---------------------
> 1 file changed, 17 insertions(+), 21 deletions(-)
>
[...]
>@@ -1196,8 +1190,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
> return ret;
> }
> err:
>- if (event != RTM_GETACTION)
Howcome you do this for RTM_GETACTION now too? Where is the related "get"?
>- tcf_action_destroy(&actions, 0);
>+ tcf_action_put_lst(&actions);
> return ret;
> }
>
[...]
On Tue 15 May 2018 at 11:24, Jiri Pirko <[email protected]> wrote:
> Mon, May 14, 2018 at 04:27:08PM CEST, [email protected] wrote:
>>Change action API to assume that action init function always takes
>>reference to action, even when overwriting existing action. This is
>>necessary because action API continues to use action pointer after init
>>function is done. At this point action becomes accessible for concurrent
>>modifications so user must always hold reference to it.
>>
>>Implement helper put list function to atomically release list of actions
>>after action API init code is done using them.
>>
>>Signed-off-by: Vlad Buslov <[email protected]>
>>---
>> net/sched/act_api.c | 38 +++++++++++++++++---------------------
>> 1 file changed, 17 insertions(+), 21 deletions(-)
>>
>
> [...]
>
>
>>@@ -1196,8 +1190,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
>> return ret;
>> }
>> err:
>>- if (event != RTM_GETACTION)
>
> Howcome you do this for RTM_GETACTION now too? Where is the related
> "get"?
In patch 5. There is always a possibility of concurrent delete without
rtnl lock so all usages of action pointers were converted to hold
reference to action.
>
>
>>- tcf_action_destroy(&actions, 0);
>>+ tcf_action_put_lst(&actions);
>> return ret;
>> }
>>
>
> [...]
Tue, May 15, 2018 at 01:32:51PM CEST, [email protected] wrote:
>
>On Tue 15 May 2018 at 11:24, Jiri Pirko <[email protected]> wrote:
>> Mon, May 14, 2018 at 04:27:08PM CEST, [email protected] wrote:
>>>Change action API to assume that action init function always takes
>>>reference to action, even when overwriting existing action. This is
>>>necessary because action API continues to use action pointer after init
>>>function is done. At this point action becomes accessible for concurrent
>>>modifications so user must always hold reference to it.
>>>
>>>Implement helper put list function to atomically release list of actions
>>>after action API init code is done using them.
>>>
>>>Signed-off-by: Vlad Buslov <[email protected]>
>>>---
>>> net/sched/act_api.c | 38 +++++++++++++++++---------------------
>>> 1 file changed, 17 insertions(+), 21 deletions(-)
>>>
>>
>> [...]
>>
>>
>>>@@ -1196,8 +1190,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
>>> return ret;
>>> }
>>> err:
>>>- if (event != RTM_GETACTION)
>>
>> Howcome you do this for RTM_GETACTION now too? Where is the related
>> "get"?
>
>In patch 5. There is always a possibility of concurrent delete without
>rtnl lock so all usages of action pointers were converted to hold
>reference to action.
So that means that if you run kernel in between, with patch 5 but
without patch 7 and you do RTM_GETACTION, you leak a reference, right?
>
>>
>>
>>>- tcf_action_destroy(&actions, 0);
>>>+ tcf_action_put_lst(&actions);
>>> return ret;
>>> }
>>>
>>
>> [...]
>
On Tue 15 May 2018 at 11:39, Jiri Pirko <[email protected]> wrote:
> Tue, May 15, 2018 at 01:32:51PM CEST, [email protected] wrote:
>>
>>On Tue 15 May 2018 at 11:24, Jiri Pirko <[email protected]> wrote:
>>> Mon, May 14, 2018 at 04:27:08PM CEST, [email protected] wrote:
>>>>Change action API to assume that action init function always takes
>>>>reference to action, even when overwriting existing action. This is
>>>>necessary because action API continues to use action pointer after init
>>>>function is done. At this point action becomes accessible for concurrent
>>>>modifications so user must always hold reference to it.
>>>>
>>>>Implement helper put list function to atomically release list of actions
>>>>after action API init code is done using them.
>>>>
>>>>Signed-off-by: Vlad Buslov <[email protected]>
>>>>---
>>>> net/sched/act_api.c | 38 +++++++++++++++++---------------------
>>>> 1 file changed, 17 insertions(+), 21 deletions(-)
>>>>
>>>
>>> [...]
>>>
>>>
>>>>@@ -1196,8 +1190,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
>>>> return ret;
>>>> }
>>>> err:
>>>>- if (event != RTM_GETACTION)
>>>
>>> Howcome you do this for RTM_GETACTION now too? Where is the related
>>> "get"?
>>
>>In patch 5. There is always a possibility of concurrent delete without
>>rtnl lock so all usages of action pointers were converted to hold
>>reference to action.
>
> So that means that if you run kernel in between, with patch 5 but
> without patch 7 and you do RTM_GETACTION, you leak a reference, right?
Right.
>
>
>>
>>>
>>>
>>>>- tcf_action_destroy(&actions, 0);
>>>>+ tcf_action_put_lst(&actions);
>>>> return ret;
>>>> }
>>>>
>>>
>>> [...]
>>
On Tue 15 May 2018 at 08:58, Jiri Pirko <[email protected]> wrote:
> Mon, May 14, 2018 at 08:49:07PM CEST, [email protected] wrote:
>>
>>On Mon 14 May 2018 at 16:23, Jiri Pirko <[email protected]> wrote:
>>> Mon, May 14, 2018 at 04:27:06PM CEST, [email protected] wrote:
>>>>Without rtnl lock protection it is no longer safe to use pointer to tc
>>>>action without holding reference to it. (it can be destroyed concurrently)
>>>>
>>>>Remove unsafe action idr lookup function. Instead of it, implement safe tcf
>>>>idr check function that atomically looks up action in idr and increments
>>>>its reference and bind counters.
>>>>
>>>>Implement both action search and check using new safe function.
>>>>
>>>>Signed-off-by: Vlad Buslov <[email protected]>
>>>>---
>>>> net/sched/act_api.c | 38 ++++++++++++++++----------------------
>>>> 1 file changed, 16 insertions(+), 22 deletions(-)
>>>>
>>>>diff --git a/net/sched/act_api.c b/net/sched/act_api.c
>>>>index 1331beb..9459cce 100644
>>>>--- a/net/sched/act_api.c
>>>>+++ b/net/sched/act_api.c
>>>>@@ -284,44 +284,38 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
>>>> }
>>>> EXPORT_SYMBOL(tcf_generic_walker);
>>>>
>>>>-static struct tc_action *tcf_idr_lookup(u32 index, struct tcf_idrinfo *idrinfo)
>>>>+bool __tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
>>>>+ int bind)
>>>> {
>>>>- struct tc_action *p = NULL;
>>>>+ struct tcf_idrinfo *idrinfo = tn->idrinfo;
>>>>+ struct tc_action *p;
>>>>
>>>> spin_lock_bh(&idrinfo->lock);
>>>
>>> Why "_bh" variant is necessary here?
>>
>>It is not my code.
>
> Yeah, yet still I wonder :)
I've been looking into it and I don't understand why it is used either.
Looking at the commit history I see that long time ago rw_lock was used
instead of spinlock, and some actions implemented they own lookup
methods. So it seems that it might have been used to protect lookups
running in bh context. However, in current architecture classifiers hold
direct reference to action so action lookup is not used when processing
packets.
Tue, May 15, 2018 at 01:41:45PM CEST, [email protected] wrote:
>
>On Tue 15 May 2018 at 11:39, Jiri Pirko <[email protected]> wrote:
>> Tue, May 15, 2018 at 01:32:51PM CEST, [email protected] wrote:
>>>
>>>On Tue 15 May 2018 at 11:24, Jiri Pirko <[email protected]> wrote:
>>>> Mon, May 14, 2018 at 04:27:08PM CEST, [email protected] wrote:
>>>>>Change action API to assume that action init function always takes
>>>>>reference to action, even when overwriting existing action. This is
>>>>>necessary because action API continues to use action pointer after init
>>>>>function is done. At this point action becomes accessible for concurrent
>>>>>modifications so user must always hold reference to it.
>>>>>
>>>>>Implement helper put list function to atomically release list of actions
>>>>>after action API init code is done using them.
>>>>>
>>>>>Signed-off-by: Vlad Buslov <[email protected]>
>>>>>---
>>>>> net/sched/act_api.c | 38 +++++++++++++++++---------------------
>>>>> 1 file changed, 17 insertions(+), 21 deletions(-)
>>>>>
>>>>
>>>> [...]
>>>>
>>>>
>>>>>@@ -1196,8 +1190,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
>>>>> return ret;
>>>>> }
>>>>> err:
>>>>>- if (event != RTM_GETACTION)
>>>>
>>>> Howcome you do this for RTM_GETACTION now too? Where is the related
>>>> "get"?
>>>
>>>In patch 5. There is always a possibility of concurrent delete without
>>>rtnl lock so all usages of action pointers were converted to hold
>>>reference to action.
>>
>> So that means that if you run kernel in between, with patch 5 but
>> without patch 7 and you do RTM_GETACTION, you leak a reference, right?
>
>Right.
That is an issue. You need to make sure that the code is without bugs
like this after every applied patch. You need to make sure the code is
bisectable.
Thanks.
On Tue 15 May 2018 at 11:57, Jiri Pirko <[email protected]> wrote:
> Tue, May 15, 2018 at 01:41:45PM CEST, [email protected] wrote:
>>
>>On Tue 15 May 2018 at 11:39, Jiri Pirko <[email protected]> wrote:
>>> Tue, May 15, 2018 at 01:32:51PM CEST, [email protected] wrote:
>>>>
>>>>On Tue 15 May 2018 at 11:24, Jiri Pirko <[email protected]> wrote:
>>>>> Mon, May 14, 2018 at 04:27:08PM CEST, [email protected] wrote:
>>>>>>Change action API to assume that action init function always takes
>>>>>>reference to action, even when overwriting existing action. This is
>>>>>>necessary because action API continues to use action pointer after init
>>>>>>function is done. At this point action becomes accessible for concurrent
>>>>>>modifications so user must always hold reference to it.
>>>>>>
>>>>>>Implement helper put list function to atomically release list of actions
>>>>>>after action API init code is done using them.
>>>>>>
>>>>>>Signed-off-by: Vlad Buslov <[email protected]>
>>>>>>---
>>>>>> net/sched/act_api.c | 38 +++++++++++++++++---------------------
>>>>>> 1 file changed, 17 insertions(+), 21 deletions(-)
>>>>>>
>>>>>
>>>>> [...]
>>>>>
>>>>>
>>>>>>@@ -1196,8 +1190,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
>>>>>> return ret;
>>>>>> }
>>>>>> err:
>>>>>>- if (event != RTM_GETACTION)
>>>>>
>>>>> Howcome you do this for RTM_GETACTION now too? Where is the related
>>>>> "get"?
>>>>
>>>>In patch 5. There is always a possibility of concurrent delete without
>>>>rtnl lock so all usages of action pointers were converted to hold
>>>>reference to action.
>>>
>>> So that means that if you run kernel in between, with patch 5 but
>>> without patch 7 and you do RTM_GETACTION, you leak a reference, right?
>>
>>Right.
>
> That is an issue. You need to make sure that the code is without bugs
> like this after every applied patch. You need to make sure the code is
> bisectable.
Got it. Will fix in V2.
>
> Thanks.
On 14/05/18 04:46 PM, Vlad Buslov wrote:
>
> On Mon 14 May 2018 at 18:03, Jamal Hadi Salim <[email protected]> wrote:
>> On 14/05/18 10:27 AM, Vlad Buslov wrote:
> Hello Jamal,
>
> I'm trying to run tdc, but keep getting following error even on clean
> branch without my patches:
Vlad, not sure if you saw my email:
Apply Roman's patch and try again
https://marc.info/?l=linux-netdev&m=152639369112020&w=2
cheers,
jamal
On Tue 15 May 2018 at 18:25, Jamal Hadi Salim <[email protected]> wrote:
> On 14/05/18 04:46 PM, Vlad Buslov wrote:
>>
>> On Mon 14 May 2018 at 18:03, Jamal Hadi Salim <[email protected]> wrote:
>>> On 14/05/18 10:27 AM, Vlad Buslov wrote:
>
>
>> Hello Jamal,
>>
>> I'm trying to run tdc, but keep getting following error even on clean
>> branch without my patches:
>
> Vlad, not sure if you saw my email:
> Apply Roman's patch and try again
>
> https://marc.info/?l=linux-netdev&m=152639369112020&w=2
>
> cheers,
> jamal
With patch applied I get following error:
Test 7d50: Add skbmod action to set destination mac
exit: 255 0
dst MAC address <11:22:33:44:55:66>
RTNETLINK answers: No such file or directory
We have an error talking to the kernel
-----> teardown stage *** Could not execute: "$TC actions flush action skbmod"
-----> teardown stage *** Error message: "RTNETLINK answers: Invalid argument
We have an error flushing
"
-----> teardown stage *** Aborting test run.
<_io.BufferedReader name=3> *** stdout ***
<_io.BufferedReader name=5> *** stderr ***
"-----> teardown stage" did not complete successfully
Exception <class '__main__.PluginMgrTestFail'> ('teardown', 'dst MAC address <11:22:33:44:55:66>\nRTNETLINK answers: No such file or directory\nWe have an error talking to the kernel\n', '"-----> teardown stage" did not complete successful
ly') (caught in test_runner, running test 87 7d50 Add skbmod action to set destination mac stage teardown)
---------------
traceback
File "./tdc.py", line 293, in test_runner
tap += run_one_test(pm, args, index, tidx)
File "./tdc.py", line 229, in run_one_test
prepare_env(args, pm, 'teardown', '-----> teardown stage', tidx['teardown'], procout)
File "./tdc.py", line 177, in prepare_env
'"{}" did not complete successfully'.format(prefix))
---------------
accumulated output for this test:
dst MAC address <11:22:33:44:55:66>
RTNETLINK answers: No such file or directory
We have an error talking to the kernel
---------------
Regards,
Vlad
Mon, May 14, 2018 at 04:27:09PM CEST, [email protected] wrote:
>tca_get_fill function has 'bind' and 'ref' arguments that get passed
>down to action dump function. These arguments values are subtracted from
>actual reference and bind counter values before writing them to skb.
>
>In order to prevent concurrent action delete, RTM_GETACTION handler
>acquires a reference to action before 'dumping' it and releases it
>afterwards. This reference is temporal and should not be accounted by
>userspace clients. (both logically and to preserver current API
>behavior)
>
>Use existing infrastructure of tca_get_fill arguments to subtract that
>temporary reference and not expose it to userspace.
>
>Signed-off-by: Vlad Buslov <[email protected]>
>---
> net/sched/act_api.c | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
>diff --git a/net/sched/act_api.c b/net/sched/act_api.c
>index 3f02cd1..2772276e 100644
>--- a/net/sched/act_api.c
>+++ b/net/sched/act_api.c
>@@ -935,7 +935,7 @@ tcf_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
> if (!skb)
> return -ENOBUFS;
> if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, event,
>- 0, 0) <= 0) {
>+ 0, 1) <= 0) {
> NL_SET_ERR_MSG(extack, "Failed to fill netlink attributes while adding TC action");
> kfree_skb(skb);
> return -EINVAL;
>@@ -1125,7 +1125,7 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
> return -ENOBUFS;
>
> if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, RTM_DELACTION,
>- 0, 1) <= 0) {
>+ 0, 2) <= 0) {
So now you are adjusting dump because of a change in a different patch
right? This also breaks bisect.
Mon, May 14, 2018 at 04:27:10PM CEST, [email protected] wrote:
>Return from action init function with reference to action taken,
>even when overwriting existing action.
>
>Action init API initializes its fourth argument (pointer to pointer to
>tc action) to either existing action with same index or newly created
>action. In case of existing index(and bind argument is zero), init
>function returns without incrementing action reference counter. Caller
>of action init then proceeds working with action without actually
>holding reference to it. This means that action could be deleted
>concurrently. To prevent such scenario this patch changes action init
Be imperative to the codebase in the patch description.
>behavior to always take reference to action before returning
>successfully.
Where's the balance? Who does the release instead? I'm probably missing
something.
>
>Signed-off-by: Vlad Buslov <[email protected]>
>---
> net/sched/act_bpf.c | 8 ++++----
> net/sched/act_connmark.c | 5 +++--
> net/sched/act_csum.c | 8 ++++----
> net/sched/act_gact.c | 5 +++--
> net/sched/act_ife.c | 12 +++++-------
> net/sched/act_ipt.c | 5 +++--
> net/sched/act_mirred.c | 5 ++---
> net/sched/act_nat.c | 5 +++--
> net/sched/act_pedit.c | 5 +++--
> net/sched/act_police.c | 8 +++-----
> net/sched/act_sample.c | 8 +++-----
> net/sched/act_simple.c | 5 +++--
> net/sched/act_skbedit.c | 5 +++--
> net/sched/act_skbmod.c | 8 +++-----
> net/sched/act_tunnel_key.c | 8 +++-----
> net/sched/act_vlan.c | 8 +++-----
> 16 files changed, 51 insertions(+), 57 deletions(-)
>
>diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
>index 5d95c43..5554bf7 100644
>--- a/net/sched/act_bpf.c
>+++ b/net/sched/act_bpf.c
>@@ -311,9 +311,10 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
> if (bind)
> return 0;
>
>- tcf_idr_release(*act, bind);
>- if (!replace)
>+ if (!replace) {
>+ tcf_idr_release(*act, bind);
> return -EEXIST;
>+ }
> }
>
> is_bpf = tb[TCA_ACT_BPF_OPS_LEN] && tb[TCA_ACT_BPF_OPS];
[...]
On Wed 16 May 2018 at 07:43, Jiri Pirko <[email protected]> wrote:
> Mon, May 14, 2018 at 04:27:10PM CEST, [email protected] wrote:
>>Return from action init function with reference to action taken,
>>even when overwriting existing action.
>>
>>Action init API initializes its fourth argument (pointer to pointer to
>>tc action) to either existing action with same index or newly created
>>action. In case of existing index(and bind argument is zero), init
>>function returns without incrementing action reference counter. Caller
>>of action init then proceeds working with action without actually
>>holding reference to it. This means that action could be deleted
>>concurrently. To prevent such scenario this patch changes action init
>
> Be imperative to the codebase in the patch description.
>
>
>>behavior to always take reference to action before returning
>>successfully.
>
> Where's the balance? Who does the release instead? I'm probably missing
> something.
I've resplit these patches for V2 to always do take/release in same
patch.
>
>>
>>Signed-off-by: Vlad Buslov <[email protected]>
>>---
>> net/sched/act_bpf.c | 8 ++++----
>> net/sched/act_connmark.c | 5 +++--
>> net/sched/act_csum.c | 8 ++++----
>> net/sched/act_gact.c | 5 +++--
>> net/sched/act_ife.c | 12 +++++-------
>> net/sched/act_ipt.c | 5 +++--
>> net/sched/act_mirred.c | 5 ++---
>> net/sched/act_nat.c | 5 +++--
>> net/sched/act_pedit.c | 5 +++--
>> net/sched/act_police.c | 8 +++-----
>> net/sched/act_sample.c | 8 +++-----
>> net/sched/act_simple.c | 5 +++--
>> net/sched/act_skbedit.c | 5 +++--
>> net/sched/act_skbmod.c | 8 +++-----
>> net/sched/act_tunnel_key.c | 8 +++-----
>> net/sched/act_vlan.c | 8 +++-----
>> 16 files changed, 51 insertions(+), 57 deletions(-)
>>
>>diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
>>index 5d95c43..5554bf7 100644
>>--- a/net/sched/act_bpf.c
>>+++ b/net/sched/act_bpf.c
>>@@ -311,9 +311,10 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
>> if (bind)
>> return 0;
>>
>>- tcf_idr_release(*act, bind);
>>- if (!replace)
>>+ if (!replace) {
>>+ tcf_idr_release(*act, bind);
>> return -EEXIST;
>>+ }
>> }
>>
>> is_bpf = tb[TCA_ACT_BPF_OPS_LEN] && tb[TCA_ACT_BPF_OPS];
>
> [...]
Mon, May 14, 2018 at 04:27:11PM CEST, [email protected] wrote:
>Implement new action API function to atomically delete action with
>specified index and to atomically insert unique action. These functions are
>required to implement init and delete functions for specific actions that
>do not rely on rtnl lock.
>
>Signed-off-by: Vlad Buslov <[email protected]>
>---
> include/net/act_api.h | 2 ++
> net/sched/act_api.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 47 insertions(+)
>
>diff --git a/include/net/act_api.h b/include/net/act_api.h
>index a8c8570..bce0cf1 100644
>--- a/include/net/act_api.h
>+++ b/include/net/act_api.h
>@@ -153,7 +153,9 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
> struct tc_action **a, const struct tc_action_ops *ops,
> int bind, bool cpustats);
> void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a);
>+void tcf_idr_insert_unique(struct tc_action_net *tn, struct tc_action *a);
>
>+int tcf_idr_find_delete(struct tc_action_net *tn, u32 index);
> int __tcf_idr_release(struct tc_action *a, bool bind, bool strict);
>
> static inline int tcf_idr_release(struct tc_action *a, bool bind)
>diff --git a/net/sched/act_api.c b/net/sched/act_api.c
>index 2772276e..a5193dc 100644
>--- a/net/sched/act_api.c
>+++ b/net/sched/act_api.c
>@@ -330,6 +330,41 @@ bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
> }
> EXPORT_SYMBOL(tcf_idr_check);
>
>+int tcf_idr_find_delete(struct tc_action_net *tn, u32 index)
>+{
>+ struct tcf_idrinfo *idrinfo = tn->idrinfo;
>+ struct tc_action *p;
>+ int ret = 0;
>+
>+ spin_lock_bh(&idrinfo->lock);
Why "_bh" is needed here?
>+ p = idr_find(&idrinfo->action_idr, index);
>+ if (!p) {
>+ spin_unlock(&idrinfo->lock);
>+ return -ENOENT;
>+ }
>+
>+ if (!atomic_read(&p->tcfa_bindcnt)) {
>+ if (refcount_dec_and_test(&p->tcfa_refcnt)) {
>+ struct module *owner = p->ops->owner;
>+
>+ WARN_ON(p != idr_remove(&idrinfo->action_idr,
>+ p->tcfa_index));
>+ spin_unlock_bh(&idrinfo->lock);
>+
>+ tcf_action_cleanup(p);
>+ module_put(owner);
>+ return 0;
>+ }
>+ ret = 0;
>+ } else {
>+ ret = -EPERM;
I wonder if "-EPERM" is the best error code for this...
>+ }
>+
>+ spin_unlock_bh(&idrinfo->lock);
>+ return ret;
>+}
>+EXPORT_SYMBOL(tcf_idr_find_delete);
>+
> int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
> struct tc_action **a, const struct tc_action_ops *ops,
> int bind, bool cpustats)
>@@ -407,6 +442,16 @@ void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a)
> }
> EXPORT_SYMBOL(tcf_idr_insert);
>
>+void tcf_idr_insert_unique(struct tc_action_net *tn, struct tc_action *a)
>+{
>+ struct tcf_idrinfo *idrinfo = tn->idrinfo;
>+
>+ spin_lock_bh(&idrinfo->lock);
>+ WARN_ON(idr_replace(&idrinfo->action_idr, a, a->tcfa_index));
Under which condition this WARN_ON is hit?
>+ spin_unlock_bh(&idrinfo->lock);
>+}
>+EXPORT_SYMBOL(tcf_idr_insert_unique);
>+
> void tcf_idrinfo_destroy(const struct tc_action_ops *ops,
> struct tcf_idrinfo *idrinfo)
> {
>--
>2.7.5
>
Wed, May 16, 2018 at 09:47:32AM CEST, [email protected] wrote:
>
>On Wed 16 May 2018 at 07:43, Jiri Pirko <[email protected]> wrote:
>> Mon, May 14, 2018 at 04:27:10PM CEST, [email protected] wrote:
>>>Return from action init function with reference to action taken,
>>>even when overwriting existing action.
>>>
>>>Action init API initializes its fourth argument (pointer to pointer to
>>>tc action) to either existing action with same index or newly created
>>>action. In case of existing index(and bind argument is zero), init
>>>function returns without incrementing action reference counter. Caller
>>>of action init then proceeds working with action without actually
>>>holding reference to it. This means that action could be deleted
>>>concurrently. To prevent such scenario this patch changes action init
>>
>> Be imperative to the codebase in the patch description.
>>
>>
>>>behavior to always take reference to action before returning
>>>successfully.
>>
>> Where's the balance? Who does the release instead? I'm probably missing
>> something.
>
>I've resplit these patches for V2 to always do take/release in same
>patch.
Good. Thanks.
>
>>
>>>
>>>Signed-off-by: Vlad Buslov <[email protected]>
>>>---
>>> net/sched/act_bpf.c | 8 ++++----
>>> net/sched/act_connmark.c | 5 +++--
>>> net/sched/act_csum.c | 8 ++++----
>>> net/sched/act_gact.c | 5 +++--
>>> net/sched/act_ife.c | 12 +++++-------
>>> net/sched/act_ipt.c | 5 +++--
>>> net/sched/act_mirred.c | 5 ++---
>>> net/sched/act_nat.c | 5 +++--
>>> net/sched/act_pedit.c | 5 +++--
>>> net/sched/act_police.c | 8 +++-----
>>> net/sched/act_sample.c | 8 +++-----
>>> net/sched/act_simple.c | 5 +++--
>>> net/sched/act_skbedit.c | 5 +++--
>>> net/sched/act_skbmod.c | 8 +++-----
>>> net/sched/act_tunnel_key.c | 8 +++-----
>>> net/sched/act_vlan.c | 8 +++-----
>>> 16 files changed, 51 insertions(+), 57 deletions(-)
>>>
>>>diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
>>>index 5d95c43..5554bf7 100644
>>>--- a/net/sched/act_bpf.c
>>>+++ b/net/sched/act_bpf.c
>>>@@ -311,9 +311,10 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
>>> if (bind)
>>> return 0;
>>>
>>>- tcf_idr_release(*act, bind);
>>>- if (!replace)
>>>+ if (!replace) {
>>>+ tcf_idr_release(*act, bind);
>>> return -EEXIST;
>>>+ }
>>> }
>>>
>>> is_bpf = tb[TCA_ACT_BPF_OPS_LEN] && tb[TCA_ACT_BPF_OPS];
>>
>> [...]
>
On Wed 16 May 2018 at 07:50, Jiri Pirko <[email protected]> wrote:
> Mon, May 14, 2018 at 04:27:11PM CEST, [email protected] wrote:
>>Implement new action API function to atomically delete action with
>>specified index and to atomically insert unique action. These functions are
>>required to implement init and delete functions for specific actions that
>>do not rely on rtnl lock.
>>
>>Signed-off-by: Vlad Buslov <[email protected]>
>>---
>> include/net/act_api.h | 2 ++
>> net/sched/act_api.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
>> 2 files changed, 47 insertions(+)
>>
>>diff --git a/include/net/act_api.h b/include/net/act_api.h
>>index a8c8570..bce0cf1 100644
>>--- a/include/net/act_api.h
>>+++ b/include/net/act_api.h
>>@@ -153,7 +153,9 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
>> struct tc_action **a, const struct tc_action_ops *ops,
>> int bind, bool cpustats);
>> void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a);
>>+void tcf_idr_insert_unique(struct tc_action_net *tn, struct tc_action *a);
>>
>>+int tcf_idr_find_delete(struct tc_action_net *tn, u32 index);
>> int __tcf_idr_release(struct tc_action *a, bool bind, bool strict);
>>
>> static inline int tcf_idr_release(struct tc_action *a, bool bind)
>>diff --git a/net/sched/act_api.c b/net/sched/act_api.c
>>index 2772276e..a5193dc 100644
>>--- a/net/sched/act_api.c
>>+++ b/net/sched/act_api.c
>>@@ -330,6 +330,41 @@ bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
>> }
>> EXPORT_SYMBOL(tcf_idr_check);
>>
>>+int tcf_idr_find_delete(struct tc_action_net *tn, u32 index)
>>+{
>>+ struct tcf_idrinfo *idrinfo = tn->idrinfo;
>>+ struct tc_action *p;
>>+ int ret = 0;
>>+
>>+ spin_lock_bh(&idrinfo->lock);
>
> Why "_bh" is needed here?
Original idr remove function used _bh version so I used it here as well.
As I already replied to your previous question about idrinfo lock usage,
I don't see any particular reason for locking with _bh at this point.
I've contacted the author(Chris Mi) and he said that he just preserved
locking the same way as it was before he changed hash table to idr for
action lookup.
You want me to do standalone patch that cleans up idrinfo locking?
>
>
>>+ p = idr_find(&idrinfo->action_idr, index);
>>+ if (!p) {
>>+ spin_unlock(&idrinfo->lock);
>>+ return -ENOENT;
>>+ }
>>+
>>+ if (!atomic_read(&p->tcfa_bindcnt)) {
>>+ if (refcount_dec_and_test(&p->tcfa_refcnt)) {
>>+ struct module *owner = p->ops->owner;
>>+
>>+ WARN_ON(p != idr_remove(&idrinfo->action_idr,
>>+ p->tcfa_index));
>>+ spin_unlock_bh(&idrinfo->lock);
>>+
>>+ tcf_action_cleanup(p);
>>+ module_put(owner);
>>+ return 0;
>>+ }
>>+ ret = 0;
>>+ } else {
>>+ ret = -EPERM;
>
> I wonder if "-EPERM" is the best error code for this...
This is what original code returned so I decided to preserve
compatibility.
>
>
>>+ }
>>+
>>+ spin_unlock_bh(&idrinfo->lock);
>>+ return ret;
>>+}
>>+EXPORT_SYMBOL(tcf_idr_find_delete);
>>+
>> int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
>> struct tc_action **a, const struct tc_action_ops *ops,
>> int bind, bool cpustats)
>>@@ -407,6 +442,16 @@ void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a)
>> }
>> EXPORT_SYMBOL(tcf_idr_insert);
>>
>>+void tcf_idr_insert_unique(struct tc_action_net *tn, struct tc_action *a)
>>+{
>>+ struct tcf_idrinfo *idrinfo = tn->idrinfo;
>>+
>>+ spin_lock_bh(&idrinfo->lock);
>>+ WARN_ON(idr_replace(&idrinfo->action_idr, a, a->tcfa_index));
>
> Under which condition this WARN_ON is hit?
When idr replace returns non-NULL pointer, which means that somehow
concurrent insertion of action with same index has happened and we are
leaking memory.
By the way I'm still not sure if having this insert unique function is
warranted or I should just add WARN to regular idr insert. What is your
opinion on this?
>
>
>>+ spin_unlock_bh(&idrinfo->lock);
>>+}
>>+EXPORT_SYMBOL(tcf_idr_insert_unique);
>>+
>> void tcf_idrinfo_destroy(const struct tc_action_ops *ops,
>> struct tcf_idrinfo *idrinfo)
>> {
>>--
>>2.7.5
>>
Wed, May 16, 2018 at 10:16:13AM CEST, [email protected] wrote:
>
>On Wed 16 May 2018 at 07:50, Jiri Pirko <[email protected]> wrote:
>> Mon, May 14, 2018 at 04:27:11PM CEST, [email protected] wrote:
>>>Implement new action API function to atomically delete action with
>>>specified index and to atomically insert unique action. These functions are
>>>required to implement init and delete functions for specific actions that
>>>do not rely on rtnl lock.
>>>
>>>Signed-off-by: Vlad Buslov <[email protected]>
>>>---
>>> include/net/act_api.h | 2 ++
>>> net/sched/act_api.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
>>> 2 files changed, 47 insertions(+)
>>>
>>>diff --git a/include/net/act_api.h b/include/net/act_api.h
>>>index a8c8570..bce0cf1 100644
>>>--- a/include/net/act_api.h
>>>+++ b/include/net/act_api.h
>>>@@ -153,7 +153,9 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
>>> struct tc_action **a, const struct tc_action_ops *ops,
>>> int bind, bool cpustats);
>>> void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a);
>>>+void tcf_idr_insert_unique(struct tc_action_net *tn, struct tc_action *a);
>>>
>>>+int tcf_idr_find_delete(struct tc_action_net *tn, u32 index);
>>> int __tcf_idr_release(struct tc_action *a, bool bind, bool strict);
>>>
>>> static inline int tcf_idr_release(struct tc_action *a, bool bind)
>>>diff --git a/net/sched/act_api.c b/net/sched/act_api.c
>>>index 2772276e..a5193dc 100644
>>>--- a/net/sched/act_api.c
>>>+++ b/net/sched/act_api.c
>>>@@ -330,6 +330,41 @@ bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
>>> }
>>> EXPORT_SYMBOL(tcf_idr_check);
>>>
>>>+int tcf_idr_find_delete(struct tc_action_net *tn, u32 index)
>>>+{
>>>+ struct tcf_idrinfo *idrinfo = tn->idrinfo;
>>>+ struct tc_action *p;
>>>+ int ret = 0;
>>>+
>>>+ spin_lock_bh(&idrinfo->lock);
>>
>> Why "_bh" is needed here?
>
>Original idr remove function used _bh version so I used it here as well.
>As I already replied to your previous question about idrinfo lock usage,
>I don't see any particular reason for locking with _bh at this point.
>I've contacted the author(Chris Mi) and he said that he just preserved
>locking the same way as it was before he changed hash table to idr for
>action lookup.
>
>You want me to do standalone patch that cleans up idrinfo locking?
Yes please. You can send it separately, not as a part of this patchset.
>
>>
>>
>>>+ p = idr_find(&idrinfo->action_idr, index);
>>>+ if (!p) {
>>>+ spin_unlock(&idrinfo->lock);
>>>+ return -ENOENT;
>>>+ }
>>>+
>>>+ if (!atomic_read(&p->tcfa_bindcnt)) {
>>>+ if (refcount_dec_and_test(&p->tcfa_refcnt)) {
>>>+ struct module *owner = p->ops->owner;
>>>+
>>>+ WARN_ON(p != idr_remove(&idrinfo->action_idr,
>>>+ p->tcfa_index));
>>>+ spin_unlock_bh(&idrinfo->lock);
>>>+
>>>+ tcf_action_cleanup(p);
>>>+ module_put(owner);
>>>+ return 0;
>>>+ }
>>>+ ret = 0;
>>>+ } else {
>>>+ ret = -EPERM;
>>
>> I wonder if "-EPERM" is the best error code for this...
>
>This is what original code returned so I decided to preserve
>compatibility.
Okay.
>
>>
>>
>>>+ }
>>>+
>>>+ spin_unlock_bh(&idrinfo->lock);
>>>+ return ret;
>>>+}
>>>+EXPORT_SYMBOL(tcf_idr_find_delete);
>>>+
>>> int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
>>> struct tc_action **a, const struct tc_action_ops *ops,
>>> int bind, bool cpustats)
>>>@@ -407,6 +442,16 @@ void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a)
>>> }
>>> EXPORT_SYMBOL(tcf_idr_insert);
>>>
>>>+void tcf_idr_insert_unique(struct tc_action_net *tn, struct tc_action *a)
>>>+{
>>>+ struct tcf_idrinfo *idrinfo = tn->idrinfo;
>>>+
>>>+ spin_lock_bh(&idrinfo->lock);
>>>+ WARN_ON(idr_replace(&idrinfo->action_idr, a, a->tcfa_index));
>>
>> Under which condition this WARN_ON is hit?
>
>When idr replace returns non-NULL pointer, which means that somehow
>concurrent insertion of action with same index has happened and we are
>leaking memory.
Is that possible to happen? Meaning, can I as a user cause this by doing
something in a wrong/unexpected way?
>
>By the way I'm still not sure if having this insert unique function is
>warranted or I should just add WARN to regular idr insert. What is your
>opinion on this?
I have to check where you use this.
>
>>
>>
>>>+ spin_unlock_bh(&idrinfo->lock);
>>>+}
>>>+EXPORT_SYMBOL(tcf_idr_insert_unique);
>>>+
>>> void tcf_idrinfo_destroy(const struct tc_action_ops *ops,
>>> struct tcf_idrinfo *idrinfo)
>>> {
>>>--
>>>2.7.5
>>>
>
On Wed 16 May 2018 at 08:56, Jiri Pirko <[email protected]> wrote:
> Wed, May 16, 2018 at 10:16:13AM CEST, [email protected] wrote:
>>
>>On Wed 16 May 2018 at 07:50, Jiri Pirko <[email protected]> wrote:
>>> Mon, May 14, 2018 at 04:27:11PM CEST, [email protected] wrote:
>>>>Implement new action API function to atomically delete action with
>>>>specified index and to atomically insert unique action. These functions are
>>>>required to implement init and delete functions for specific actions that
>>>>do not rely on rtnl lock.
>>>>
>>>>Signed-off-by: Vlad Buslov <[email protected]>
>>>>---
>>>> include/net/act_api.h | 2 ++
>>>> net/sched/act_api.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
>>>> 2 files changed, 47 insertions(+)
>>>>
>>>>diff --git a/include/net/act_api.h b/include/net/act_api.h
>>>>index a8c8570..bce0cf1 100644
>>>>--- a/include/net/act_api.h
>>>>+++ b/include/net/act_api.h
>>>>@@ -153,7 +153,9 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
>>>> struct tc_action **a, const struct tc_action_ops *ops,
>>>> int bind, bool cpustats);
>>>> void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a);
>>>>+void tcf_idr_insert_unique(struct tc_action_net *tn, struct tc_action *a);
>>>>
>>>>+int tcf_idr_find_delete(struct tc_action_net *tn, u32 index);
>>>> int __tcf_idr_release(struct tc_action *a, bool bind, bool strict);
>>>>
>>>> static inline int tcf_idr_release(struct tc_action *a, bool bind)
>>>>diff --git a/net/sched/act_api.c b/net/sched/act_api.c
>>>>index 2772276e..a5193dc 100644
>>>>--- a/net/sched/act_api.c
>>>>+++ b/net/sched/act_api.c
>>>>@@ -330,6 +330,41 @@ bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
>>>> }
>>>> EXPORT_SYMBOL(tcf_idr_check);
>>>>
>>>>+int tcf_idr_find_delete(struct tc_action_net *tn, u32 index)
>>>>+{
>>>>+ struct tcf_idrinfo *idrinfo = tn->idrinfo;
>>>>+ struct tc_action *p;
>>>>+ int ret = 0;
>>>>+
>>>>+ spin_lock_bh(&idrinfo->lock);
>>>
>>> Why "_bh" is needed here?
>>
>>Original idr remove function used _bh version so I used it here as well.
>>As I already replied to your previous question about idrinfo lock usage,
>>I don't see any particular reason for locking with _bh at this point.
>>I've contacted the author(Chris Mi) and he said that he just preserved
>>locking the same way as it was before he changed hash table to idr for
>>action lookup.
>>
>>You want me to do standalone patch that cleans up idrinfo locking?
>
> Yes please. You can send it separately, not as a part of this
> patchset.
Okay.
>
>
>
>>
>>>
>>>
>>>>+ p = idr_find(&idrinfo->action_idr, index);
>>>>+ if (!p) {
>>>>+ spin_unlock(&idrinfo->lock);
>>>>+ return -ENOENT;
>>>>+ }
>>>>+
>>>>+ if (!atomic_read(&p->tcfa_bindcnt)) {
>>>>+ if (refcount_dec_and_test(&p->tcfa_refcnt)) {
>>>>+ struct module *owner = p->ops->owner;
>>>>+
>>>>+ WARN_ON(p != idr_remove(&idrinfo->action_idr,
>>>>+ p->tcfa_index));
>>>>+ spin_unlock_bh(&idrinfo->lock);
>>>>+
>>>>+ tcf_action_cleanup(p);
>>>>+ module_put(owner);
>>>>+ return 0;
>>>>+ }
>>>>+ ret = 0;
>>>>+ } else {
>>>>+ ret = -EPERM;
>>>
>>> I wonder if "-EPERM" is the best error code for this...
>>
>>This is what original code returned so I decided to preserve
>>compatibility.
>
> Okay.
>
>
>>
>>>
>>>
>>>>+ }
>>>>+
>>>>+ spin_unlock_bh(&idrinfo->lock);
>>>>+ return ret;
>>>>+}
>>>>+EXPORT_SYMBOL(tcf_idr_find_delete);
>>>>+
>>>> int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
>>>> struct tc_action **a, const struct tc_action_ops *ops,
>>>> int bind, bool cpustats)
>>>>@@ -407,6 +442,16 @@ void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a)
>>>> }
>>>> EXPORT_SYMBOL(tcf_idr_insert);
>>>>
>>>>+void tcf_idr_insert_unique(struct tc_action_net *tn, struct tc_action *a)
>>>>+{
>>>>+ struct tcf_idrinfo *idrinfo = tn->idrinfo;
>>>>+
>>>>+ spin_lock_bh(&idrinfo->lock);
>>>>+ WARN_ON(idr_replace(&idrinfo->action_idr, a, a->tcfa_index));
>>>
>>> Under which condition this WARN_ON is hit?
>>
>>When idr replace returns non-NULL pointer, which means that somehow
>>concurrent insertion of action with same index has happened and we are
>>leaking memory.
>
> Is that possible to happen? Meaning, can I as a user cause this by doing
> something in a wrong/unexpected way?
No, it shouldn't be possible unless there is a race condition.
Otherwise I would put some proper error handling code there.
>
>
>>
>>By the way I'm still not sure if having this insert unique function is
>>warranted or I should just add WARN to regular idr insert. What is your
>>opinion on this?
>
> I have to check where you use this.
Every action init function uses this.
>
>
>>
>>>
>>>
>>>>+ spin_unlock_bh(&idrinfo->lock);
>>>>+}
>>>>+EXPORT_SYMBOL(tcf_idr_insert_unique);
>>>>+
>>>> void tcf_idrinfo_destroy(const struct tc_action_ops *ops,
>>>> struct tcf_idrinfo *idrinfo)
>>>> {
>>>>--
>>>>2.7.5
>>>>
>>
Mon, May 14, 2018 at 04:27:15PM CEST, [email protected] wrote:
>Implement delete function that is required to delete actions without
>holding rtnl lock. Use action API function that atomically deletes action
>only if it is still in action idr. This implementation prevents concurrent
>threads from deleting same action twice.
>
>Signed-off-by: Vlad Buslov <[email protected]>
>---
> net/sched/act_bpf.c | 8 ++++++++
> net/sched/act_connmark.c | 8 ++++++++
> net/sched/act_csum.c | 8 ++++++++
> net/sched/act_gact.c | 8 ++++++++
> net/sched/act_ife.c | 8 ++++++++
> net/sched/act_ipt.c | 16 ++++++++++++++++
> net/sched/act_mirred.c | 8 ++++++++
> net/sched/act_nat.c | 8 ++++++++
> net/sched/act_pedit.c | 8 ++++++++
> net/sched/act_police.c | 8 ++++++++
> net/sched/act_sample.c | 8 ++++++++
> net/sched/act_simple.c | 8 ++++++++
> net/sched/act_skbedit.c | 8 ++++++++
> net/sched/act_skbmod.c | 8 ++++++++
> net/sched/act_tunnel_key.c | 8 ++++++++
> net/sched/act_vlan.c | 8 ++++++++
> 16 files changed, 136 insertions(+)
>
>diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
>index 0bf4ecf..36f7f66 100644
>--- a/net/sched/act_bpf.c
>+++ b/net/sched/act_bpf.c
>@@ -394,6 +394,13 @@ static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index,
> return tcf_idr_search(tn, a, index);
> }
>
>+static int tcf_bpf_delete(struct net *net, u32 index)
>+{
>+ struct tc_action_net *tn = net_generic(net, bpf_net_id);
>+
>+ return tcf_idr_find_delete(tn, index);
>+}
>+
> static struct tc_action_ops act_bpf_ops __read_mostly = {
> .kind = "bpf",
> .type = TCA_ACT_BPF,
>@@ -404,6 +411,7 @@ static struct tc_action_ops act_bpf_ops __read_mostly = {
> .init = tcf_bpf_init,
> .walk = tcf_bpf_walker,
> .lookup = tcf_bpf_search,
>+ .delete = tcf_bpf_delete,
I wonder, right before this patch, how the idr index got removed?
delete op is NULL and I didn't find anyone else to do it.
Also, after this patch, does it make sense to have following check in
tcf_action_del_1()?
if (ops->delete)
err = ops->delete(net, index);
Looks like ops->delete is non-null for all.
Seems to me that you need to introduce this patch filling up the delete
op in all acts and only after that introduce a code that actually calls
it.
[...]
Mon, May 14, 2018 at 04:27:14PM CEST, [email protected] wrote:
>Substitute calls to action insert function with calls to action insert
>unique function that warns if insertion overwrites index in idr.
>
>Signed-off-by: Vlad Buslov <[email protected]>
>---
> net/sched/act_bpf.c | 2 +-
> net/sched/act_connmark.c | 2 +-
> net/sched/act_csum.c | 2 +-
> net/sched/act_gact.c | 2 +-
> net/sched/act_ife.c | 2 +-
> net/sched/act_ipt.c | 2 +-
> net/sched/act_mirred.c | 2 +-
> net/sched/act_nat.c | 2 +-
> net/sched/act_pedit.c | 2 +-
> net/sched/act_police.c | 2 +-
> net/sched/act_sample.c | 2 +-
> net/sched/act_simple.c | 2 +-
> net/sched/act_skbedit.c | 2 +-
> net/sched/act_skbmod.c | 2 +-
> net/sched/act_tunnel_key.c | 2 +-
> net/sched/act_vlan.c | 2 +-
> 16 files changed, 16 insertions(+), 16 deletions(-)
>
>diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
>index 7e20fdc..0bf4ecf 100644
>--- a/net/sched/act_bpf.c
>+++ b/net/sched/act_bpf.c
>@@ -354,7 +354,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
> rcu_assign_pointer(prog->filter, cfg.filter);
>
> if (res == ACT_P_CREATED) {
>- tcf_idr_insert(tn, *act);
>+ tcf_idr_insert_unique(tn, *act);
Seems to me that tcf_idr_insert() is unused after this patch. If that is
the case, I think that you don't need to introduce
tcf_idr_insert_unique() and just do what you need to do in
tcf_idr_insert()
[...]
Mon, May 14, 2018 at 04:27:12PM CEST, [email protected] wrote:
>Extend rate estimator new and replace APIs with additional spinlock
>parameter used by lockless actions to protect rate_est pointer from
>concurrent modification.
>
>Signed-off-by: Vlad Buslov <[email protected]>
[...]
> /**
> * gen_new_estimator - create a new rate estimator
> * @bstats: basic statistics
> * @cpu_bstats: bstats per cpu
> * @rate_est: rate estimator statistics
>+ * @rate_est_lock: rate_est lock (might be NULL)
I cannot find a place you actually use this new arg in this patchset.
Did I miss it?
> * @stats_lock: statistics lock
> * @running: qdisc running seqcount
> * @opt: rate estimator configuration TLV
[...]
On Wed 16 May 2018 at 09:50, Jiri Pirko <[email protected]> wrote:
> Mon, May 14, 2018 at 04:27:14PM CEST, [email protected] wrote:
>>Substitute calls to action insert function with calls to action insert
>>unique function that warns if insertion overwrites index in idr.
>>
>>Signed-off-by: Vlad Buslov <[email protected]>
>>---
>> net/sched/act_bpf.c | 2 +-
>> net/sched/act_connmark.c | 2 +-
>> net/sched/act_csum.c | 2 +-
>> net/sched/act_gact.c | 2 +-
>> net/sched/act_ife.c | 2 +-
>> net/sched/act_ipt.c | 2 +-
>> net/sched/act_mirred.c | 2 +-
>> net/sched/act_nat.c | 2 +-
>> net/sched/act_pedit.c | 2 +-
>> net/sched/act_police.c | 2 +-
>> net/sched/act_sample.c | 2 +-
>> net/sched/act_simple.c | 2 +-
>> net/sched/act_skbedit.c | 2 +-
>> net/sched/act_skbmod.c | 2 +-
>> net/sched/act_tunnel_key.c | 2 +-
>> net/sched/act_vlan.c | 2 +-
>> 16 files changed, 16 insertions(+), 16 deletions(-)
>>
>>diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
>>index 7e20fdc..0bf4ecf 100644
>>--- a/net/sched/act_bpf.c
>>+++ b/net/sched/act_bpf.c
>>@@ -354,7 +354,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
>> rcu_assign_pointer(prog->filter, cfg.filter);
>>
>> if (res == ACT_P_CREATED) {
>>- tcf_idr_insert(tn, *act);
>>+ tcf_idr_insert_unique(tn, *act);
>
> Seems to me that tcf_idr_insert() is unused after this patch. If that is
> the case, I think that you don't need to introduce
> tcf_idr_insert_unique() and just do what you need to do in
> tcf_idr_insert()
Got it.
>
> [...]
On Wed 16 May 2018 at 09:48, Jiri Pirko <[email protected]> wrote:
> Mon, May 14, 2018 at 04:27:15PM CEST, [email protected] wrote:
>>Implement delete function that is required to delete actions without
>>holding rtnl lock. Use action API function that atomically deletes action
>>only if it is still in action idr. This implementation prevents concurrent
>>threads from deleting same action twice.
>>
>>Signed-off-by: Vlad Buslov <[email protected]>
>>---
>> net/sched/act_bpf.c | 8 ++++++++
>> net/sched/act_connmark.c | 8 ++++++++
>> net/sched/act_csum.c | 8 ++++++++
>> net/sched/act_gact.c | 8 ++++++++
>> net/sched/act_ife.c | 8 ++++++++
>> net/sched/act_ipt.c | 16 ++++++++++++++++
>> net/sched/act_mirred.c | 8 ++++++++
>> net/sched/act_nat.c | 8 ++++++++
>> net/sched/act_pedit.c | 8 ++++++++
>> net/sched/act_police.c | 8 ++++++++
>> net/sched/act_sample.c | 8 ++++++++
>> net/sched/act_simple.c | 8 ++++++++
>> net/sched/act_skbedit.c | 8 ++++++++
>> net/sched/act_skbmod.c | 8 ++++++++
>> net/sched/act_tunnel_key.c | 8 ++++++++
>> net/sched/act_vlan.c | 8 ++++++++
>> 16 files changed, 136 insertions(+)
>>
>>diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
>>index 0bf4ecf..36f7f66 100644
>>--- a/net/sched/act_bpf.c
>>+++ b/net/sched/act_bpf.c
>>@@ -394,6 +394,13 @@ static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index,
>> return tcf_idr_search(tn, a, index);
>> }
>>
>>+static int tcf_bpf_delete(struct net *net, u32 index)
>>+{
>>+ struct tc_action_net *tn = net_generic(net, bpf_net_id);
>>+
>>+ return tcf_idr_find_delete(tn, index);
>>+}
>>+
>> static struct tc_action_ops act_bpf_ops __read_mostly = {
>> .kind = "bpf",
>> .type = TCA_ACT_BPF,
>>@@ -404,6 +411,7 @@ static struct tc_action_ops act_bpf_ops __read_mostly = {
>> .init = tcf_bpf_init,
>> .walk = tcf_bpf_walker,
>> .lookup = tcf_bpf_search,
>>+ .delete = tcf_bpf_delete,
>
> I wonder, right before this patch, how the idr index got removed?
> delete op is NULL and I didn't find anyone else to do it.
>
> Also, after this patch, does it make sense to have following check in
> tcf_action_del_1()?
>
> if (ops->delete)
> err = ops->delete(net, index);
>
> Looks like ops->delete is non-null for all.
>
> Seems to me that you need to introduce this patch filling up the delete
> op in all acts and only after that introduce a code that actually calls
> it.
Already moved this for V2 patchset to:
- Add delete callback to ops and implement it for all actions in single
patch.
- Move this patch before delete first use.
Will now remove the conditional as well.
>
> [...]
Mon, May 14, 2018 at 04:27:13PM CEST, [email protected] wrote:
>Retry check-insert sequence in action init functions if action with same
>index was inserted concurrently.
>
>Signed-off-by: Vlad Buslov <[email protected]>
>---
> net/sched/act_bpf.c | 8 +++++++-
> net/sched/act_connmark.c | 8 +++++++-
> net/sched/act_csum.c | 8 +++++++-
> net/sched/act_gact.c | 8 +++++++-
> net/sched/act_ife.c | 8 +++++++-
> net/sched/act_ipt.c | 8 +++++++-
> net/sched/act_mirred.c | 8 +++++++-
> net/sched/act_nat.c | 8 +++++++-
> net/sched/act_pedit.c | 8 +++++++-
> net/sched/act_police.c | 9 ++++++++-
> net/sched/act_sample.c | 8 +++++++-
> net/sched/act_simple.c | 9 ++++++++-
> net/sched/act_skbedit.c | 8 +++++++-
> net/sched/act_skbmod.c | 8 +++++++-
> net/sched/act_tunnel_key.c | 9 ++++++++-
> net/sched/act_vlan.c | 9 ++++++++-
> 16 files changed, 116 insertions(+), 16 deletions(-)
>
>diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
>index 5554bf7..7e20fdc 100644
>--- a/net/sched/act_bpf.c
>+++ b/net/sched/act_bpf.c
>@@ -299,10 +299,16 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
>
> parm = nla_data(tb[TCA_ACT_BPF_PARMS]);
>
>+replay:
> if (!tcf_idr_check(tn, parm->index, act, bind)) {
> ret = tcf_idr_create(tn, parm->index, est, act,
> &act_bpf_ops, bind, true);
>- if (ret < 0)
>+ /* Action with specified index was created concurrently.
>+ * Check again.
>+ */
>+ if (parm->index && ret == -ENOSPC)
>+ goto replay;
>+ else if (ret)
Hmm, looks like you are doing the same/very similar thing in every act
code. I think it would make sense to introduce a helper function for
this purpose.
[...]
On Wed 16 May 2018 at 09:54, Jiri Pirko <[email protected]> wrote:
> Mon, May 14, 2018 at 04:27:12PM CEST, [email protected] wrote:
>>Extend rate estimator new and replace APIs with additional spinlock
>>parameter used by lockless actions to protect rate_est pointer from
>>concurrent modification.
>>
>>Signed-off-by: Vlad Buslov <[email protected]>
>
> [...]
>
>
>> /**
>> * gen_new_estimator - create a new rate estimator
>> * @bstats: basic statistics
>> * @cpu_bstats: bstats per cpu
>> * @rate_est: rate estimator statistics
>>+ * @rate_est_lock: rate_est lock (might be NULL)
>
> I cannot find a place you actually use this new arg in this patchset.
> Did I miss it?
It is used by specific action init function. However, that code was
moved to next patchset due to patchset size limit.
>
>
>> * @stats_lock: statistics lock
>> * @running: qdisc running seqcount
>> * @opt: rate estimator configuration TLV
>
> [...]
Wed, May 16, 2018 at 12:00:57PM CEST, [email protected] wrote:
>
>On Wed 16 May 2018 at 09:54, Jiri Pirko <[email protected]> wrote:
>> Mon, May 14, 2018 at 04:27:12PM CEST, [email protected] wrote:
>>>Extend rate estimator new and replace APIs with additional spinlock
>>>parameter used by lockless actions to protect rate_est pointer from
>>>concurrent modification.
>>>
>>>Signed-off-by: Vlad Buslov <[email protected]>
>>
>> [...]
>>
>>
>>> /**
>>> * gen_new_estimator - create a new rate estimator
>>> * @bstats: basic statistics
>>> * @cpu_bstats: bstats per cpu
>>> * @rate_est: rate estimator statistics
>>>+ * @rate_est_lock: rate_est lock (might be NULL)
>>
>> I cannot find a place you actually use this new arg in this patchset.
>> Did I miss it?
>
>It is used by specific action init function. However, that code was
>moved to next patchset due to patchset size limit.
Please move this patch too.
>
>>
>>
>>> * @stats_lock: statistics lock
>>> * @running: qdisc running seqcount
>>> * @opt: rate estimator configuration TLV
>>
>> [...]
>
On Wed 16 May 2018 at 09:59, Jiri Pirko <[email protected]> wrote:
> Mon, May 14, 2018 at 04:27:13PM CEST, [email protected] wrote:
>>Retry check-insert sequence in action init functions if action with same
>>index was inserted concurrently.
>>
>>Signed-off-by: Vlad Buslov <[email protected]>
>>---
>> net/sched/act_bpf.c | 8 +++++++-
>> net/sched/act_connmark.c | 8 +++++++-
>> net/sched/act_csum.c | 8 +++++++-
>> net/sched/act_gact.c | 8 +++++++-
>> net/sched/act_ife.c | 8 +++++++-
>> net/sched/act_ipt.c | 8 +++++++-
>> net/sched/act_mirred.c | 8 +++++++-
>> net/sched/act_nat.c | 8 +++++++-
>> net/sched/act_pedit.c | 8 +++++++-
>> net/sched/act_police.c | 9 ++++++++-
>> net/sched/act_sample.c | 8 +++++++-
>> net/sched/act_simple.c | 9 ++++++++-
>> net/sched/act_skbedit.c | 8 +++++++-
>> net/sched/act_skbmod.c | 8 +++++++-
>> net/sched/act_tunnel_key.c | 9 ++++++++-
>> net/sched/act_vlan.c | 9 ++++++++-
>> 16 files changed, 116 insertions(+), 16 deletions(-)
>>
>>diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
>>index 5554bf7..7e20fdc 100644
>>--- a/net/sched/act_bpf.c
>>+++ b/net/sched/act_bpf.c
>>@@ -299,10 +299,16 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
>>
>> parm = nla_data(tb[TCA_ACT_BPF_PARMS]);
>>
>>+replay:
>> if (!tcf_idr_check(tn, parm->index, act, bind)) {
>> ret = tcf_idr_create(tn, parm->index, est, act,
>> &act_bpf_ops, bind, true);
>>- if (ret < 0)
>>+ /* Action with specified index was created concurrently.
>>+ * Check again.
>>+ */
>>+ if (parm->index && ret == -ENOSPC)
>>+ goto replay;
>>+ else if (ret)
>
> Hmm, looks like you are doing the same/very similar thing in every act
> code. I think it would make sense to introduce a helper function for
> this purpose.
This code uses goto so it can't be easily refactored into standalone
function. Could you specify which part of this code you suggest to
extract?
>
> [...]
Wed, May 16, 2018 at 01:55:06PM CEST, [email protected] wrote:
>
>On Wed 16 May 2018 at 09:59, Jiri Pirko <[email protected]> wrote:
>> Mon, May 14, 2018 at 04:27:13PM CEST, [email protected] wrote:
>>>Retry check-insert sequence in action init functions if action with same
>>>index was inserted concurrently.
>>>
>>>Signed-off-by: Vlad Buslov <[email protected]>
>>>---
>>> net/sched/act_bpf.c | 8 +++++++-
>>> net/sched/act_connmark.c | 8 +++++++-
>>> net/sched/act_csum.c | 8 +++++++-
>>> net/sched/act_gact.c | 8 +++++++-
>>> net/sched/act_ife.c | 8 +++++++-
>>> net/sched/act_ipt.c | 8 +++++++-
>>> net/sched/act_mirred.c | 8 +++++++-
>>> net/sched/act_nat.c | 8 +++++++-
>>> net/sched/act_pedit.c | 8 +++++++-
>>> net/sched/act_police.c | 9 ++++++++-
>>> net/sched/act_sample.c | 8 +++++++-
>>> net/sched/act_simple.c | 9 ++++++++-
>>> net/sched/act_skbedit.c | 8 +++++++-
>>> net/sched/act_skbmod.c | 8 +++++++-
>>> net/sched/act_tunnel_key.c | 9 ++++++++-
>>> net/sched/act_vlan.c | 9 ++++++++-
>>> 16 files changed, 116 insertions(+), 16 deletions(-)
>>>
>>>diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
>>>index 5554bf7..7e20fdc 100644
>>>--- a/net/sched/act_bpf.c
>>>+++ b/net/sched/act_bpf.c
>>>@@ -299,10 +299,16 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
>>>
>>> parm = nla_data(tb[TCA_ACT_BPF_PARMS]);
>>>
>>>+replay:
>>> if (!tcf_idr_check(tn, parm->index, act, bind)) {
>>> ret = tcf_idr_create(tn, parm->index, est, act,
>>> &act_bpf_ops, bind, true);
>>>- if (ret < 0)
>>>+ /* Action with specified index was created concurrently.
>>>+ * Check again.
>>>+ */
>>>+ if (parm->index && ret == -ENOSPC)
>>>+ goto replay;
>>>+ else if (ret)
>>
>> Hmm, looks like you are doing the same/very similar thing in every act
>> code. I think it would make sense to introduce a helper function for
>> this purpose.
>
>This code uses goto so it can't be easily refactored into standalone
>function. Could you specify which part of this code you suggest to
>extract?
Hmm, looking at the code, I think that what would help is to have a
helper that would atomically check if index exists and if not, it would
allocate one. Something like:
int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index,
struct tc_action **a, int bind)
{
struct tcf_idrinfo *idrinfo = tn->idrinfo;
struct tc_action *p;
int err;
spin_lock(&idrinfo->lock);
if (*index) {
p = idr_find(&idrinfo->action_idr, *index);
if (p) {
if (bind)
p->tcfa_bindcnt++;
p->tcfa_refcnt++;
*a = p;
err = 0;
} else {
*a = NULL;
err = idr_alloc_u32(idr, NULL, index,
*index, GFP_ATOMIC);
}
} else {
*index = 1;
*a = NULL;
err = idr_alloc_u32(idr, NULL, index, UINT_MAX, GFP_ATOMIC);
}
spin_unlock(&idrinfo->lock);
return err;
}
The act code would just check if "a" is NULL and if so, it would call
tcf_idr_create() with allocated index as arg.
>
>>
>> [...]
>
On Wed 16 May 2018 at 12:26, Jiri Pirko <[email protected]> wrote:
> Wed, May 16, 2018 at 01:55:06PM CEST, [email protected] wrote:
>>
>>On Wed 16 May 2018 at 09:59, Jiri Pirko <[email protected]> wrote:
>>> Mon, May 14, 2018 at 04:27:13PM CEST, [email protected] wrote:
>>>>Retry check-insert sequence in action init functions if action with same
>>>>index was inserted concurrently.
>>>>
>>>>Signed-off-by: Vlad Buslov <[email protected]>
>>>>---
>>>> net/sched/act_bpf.c | 8 +++++++-
>>>> net/sched/act_connmark.c | 8 +++++++-
>>>> net/sched/act_csum.c | 8 +++++++-
>>>> net/sched/act_gact.c | 8 +++++++-
>>>> net/sched/act_ife.c | 8 +++++++-
>>>> net/sched/act_ipt.c | 8 +++++++-
>>>> net/sched/act_mirred.c | 8 +++++++-
>>>> net/sched/act_nat.c | 8 +++++++-
>>>> net/sched/act_pedit.c | 8 +++++++-
>>>> net/sched/act_police.c | 9 ++++++++-
>>>> net/sched/act_sample.c | 8 +++++++-
>>>> net/sched/act_simple.c | 9 ++++++++-
>>>> net/sched/act_skbedit.c | 8 +++++++-
>>>> net/sched/act_skbmod.c | 8 +++++++-
>>>> net/sched/act_tunnel_key.c | 9 ++++++++-
>>>> net/sched/act_vlan.c | 9 ++++++++-
>>>> 16 files changed, 116 insertions(+), 16 deletions(-)
>>>>
>>>>diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
>>>>index 5554bf7..7e20fdc 100644
>>>>--- a/net/sched/act_bpf.c
>>>>+++ b/net/sched/act_bpf.c
>>>>@@ -299,10 +299,16 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
>>>>
>>>> parm = nla_data(tb[TCA_ACT_BPF_PARMS]);
>>>>
>>>>+replay:
>>>> if (!tcf_idr_check(tn, parm->index, act, bind)) {
>>>> ret = tcf_idr_create(tn, parm->index, est, act,
>>>> &act_bpf_ops, bind, true);
>>>>- if (ret < 0)
>>>>+ /* Action with specified index was created concurrently.
>>>>+ * Check again.
>>>>+ */
>>>>+ if (parm->index && ret == -ENOSPC)
>>>>+ goto replay;
>>>>+ else if (ret)
>>>
>>> Hmm, looks like you are doing the same/very similar thing in every act
>>> code. I think it would make sense to introduce a helper function for
>>> this purpose.
>>
>>This code uses goto so it can't be easily refactored into standalone
>>function. Could you specify which part of this code you suggest to
>>extract?
>
> Hmm, looking at the code, I think that what would help is to have a
> helper that would atomically check if index exists and if not, it would
> allocate one. Something like:
>
>
> int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index,
> struct tc_action **a, int bind)
> {
> struct tcf_idrinfo *idrinfo = tn->idrinfo;
> struct tc_action *p;
> int err;
>
> spin_lock(&idrinfo->lock);
> if (*index) {
> p = idr_find(&idrinfo->action_idr, *index);
> if (p) {
> if (bind)
> p->tcfa_bindcnt++;
> p->tcfa_refcnt++;
> *a = p;
> err = 0;
> } else {
> *a = NULL;
> err = idr_alloc_u32(idr, NULL, index,
> *index, GFP_ATOMIC);
> }
> } else {
> *index = 1;
> *a = NULL;
> err = idr_alloc_u32(idr, NULL, index, UINT_MAX, GFP_ATOMIC);
> }
> spin_unlock(&idrinfo->lock);
> return err;
> }
>
> The act code would just check if "a" is NULL and if so, it would call
> tcf_idr_create() with allocated index as arg.
What about multiple actions that have arbitrary code between initial
check and idr allocation that is currently inside tcf_idr_create()?
>
>
>>
>>>
>>> [...]
>>
Wed, May 16, 2018 at 02:43:58PM CEST, [email protected] wrote:
>
>On Wed 16 May 2018 at 12:26, Jiri Pirko <[email protected]> wrote:
>> Wed, May 16, 2018 at 01:55:06PM CEST, [email protected] wrote:
>>>
>>>On Wed 16 May 2018 at 09:59, Jiri Pirko <[email protected]> wrote:
>>>> Mon, May 14, 2018 at 04:27:13PM CEST, [email protected] wrote:
>>>>>Retry check-insert sequence in action init functions if action with same
>>>>>index was inserted concurrently.
>>>>>
>>>>>Signed-off-by: Vlad Buslov <[email protected]>
>>>>>---
>>>>> net/sched/act_bpf.c | 8 +++++++-
>>>>> net/sched/act_connmark.c | 8 +++++++-
>>>>> net/sched/act_csum.c | 8 +++++++-
>>>>> net/sched/act_gact.c | 8 +++++++-
>>>>> net/sched/act_ife.c | 8 +++++++-
>>>>> net/sched/act_ipt.c | 8 +++++++-
>>>>> net/sched/act_mirred.c | 8 +++++++-
>>>>> net/sched/act_nat.c | 8 +++++++-
>>>>> net/sched/act_pedit.c | 8 +++++++-
>>>>> net/sched/act_police.c | 9 ++++++++-
>>>>> net/sched/act_sample.c | 8 +++++++-
>>>>> net/sched/act_simple.c | 9 ++++++++-
>>>>> net/sched/act_skbedit.c | 8 +++++++-
>>>>> net/sched/act_skbmod.c | 8 +++++++-
>>>>> net/sched/act_tunnel_key.c | 9 ++++++++-
>>>>> net/sched/act_vlan.c | 9 ++++++++-
>>>>> 16 files changed, 116 insertions(+), 16 deletions(-)
>>>>>
>>>>>diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
>>>>>index 5554bf7..7e20fdc 100644
>>>>>--- a/net/sched/act_bpf.c
>>>>>+++ b/net/sched/act_bpf.c
>>>>>@@ -299,10 +299,16 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
>>>>>
>>>>> parm = nla_data(tb[TCA_ACT_BPF_PARMS]);
>>>>>
>>>>>+replay:
>>>>> if (!tcf_idr_check(tn, parm->index, act, bind)) {
>>>>> ret = tcf_idr_create(tn, parm->index, est, act,
>>>>> &act_bpf_ops, bind, true);
>>>>>- if (ret < 0)
>>>>>+ /* Action with specified index was created concurrently.
>>>>>+ * Check again.
>>>>>+ */
>>>>>+ if (parm->index && ret == -ENOSPC)
>>>>>+ goto replay;
>>>>>+ else if (ret)
>>>>
>>>> Hmm, looks like you are doing the same/very similar thing in every act
>>>> code. I think it would make sense to introduce a helper function for
>>>> this purpose.
>>>
>>>This code uses goto so it can't be easily refactored into standalone
>>>function. Could you specify which part of this code you suggest to
>>>extract?
>>
>> Hmm, looking at the code, I think that what would help is to have a
>> helper that would atomically check if index exists and if not, it would
>> allocate one. Something like:
>>
>>
>> int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index,
>> struct tc_action **a, int bind)
>> {
>> struct tcf_idrinfo *idrinfo = tn->idrinfo;
>> struct tc_action *p;
>> int err;
>>
>> spin_lock(&idrinfo->lock);
>> if (*index) {
>> p = idr_find(&idrinfo->action_idr, *index);
>> if (p) {
>> if (bind)
>> p->tcfa_bindcnt++;
>> p->tcfa_refcnt++;
>> *a = p;
>> err = 0;
>> } else {
>> *a = NULL;
>> err = idr_alloc_u32(idr, NULL, index,
>> *index, GFP_ATOMIC);
>> }
>> } else {
>> *index = 1;
>> *a = NULL;
>> err = idr_alloc_u32(idr, NULL, index, UINT_MAX, GFP_ATOMIC);
>> }
>> spin_unlock(&idrinfo->lock);
>> return err;
>> }
>>
>> The act code would just check if "a" is NULL and if so, it would call
>> tcf_idr_create() with allocated index as arg.
>
>What about multiple actions that have arbitrary code between initial
>check and idr allocation that is currently inside tcf_idr_create()?
Why it would be a problem to have them after the allocation?
There is one issue though with my draft. tcf_idr_insert() function
which actually assigns a "p" pointer to the idr index is called later on.
Until that happens, the idr_find() would return NULL even if the index
is actually allocated. We cannot assign "p" in tcf_idr_check_alloc()
because it is allocated only later on in tcf_idr_create(). But that is
resolvable by the following trick:
int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index,
struct tc_action **a, int bind)
{
struct tcf_idrinfo *idrinfo = tn->idrinfo;
struct tc_action *p;
int err;
again:
spin_lock(&idrinfo->lock);
if (*index) {
p = idr_find(&idrinfo->action_idr, *index);
if (IS_ERR(p)) {
/* This means that another process allocated
* index but did not assign the pointer yet.
*/
spin_unlock(&idrinfo->lock);
goto again;
}
if (p) {
if (bind)
p->tcfa_bindcnt++;
p->tcfa_refcnt++;
*a = p;
err = 0;
} else {
*a = NULL;
err = idr_alloc_u32(idr, NULL, index,
*index, GFP_ATOMIC);
idr_replace(&idrinfo->action_idr,
ERR_PTR(-EBUSY), *index);
}
} else {
*index = 1;
*a = NULL;
err = idr_alloc_u32(idr, NULL, index, UINT_MAX, GFP_ATOMIC);
idr_replace(&idrinfo->action_idr, ERR_PTR(-EBUSY), *index);
}
spin_unlock(&idrinfo->lock);
return err;
}
>
>>
>>
>>>
>>>>
>>>> [...]
>>>
>
On Wed 16 May 2018 at 13:21, Jiri Pirko <[email protected]> wrote:
> Wed, May 16, 2018 at 02:43:58PM CEST, [email protected] wrote:
>>
>>On Wed 16 May 2018 at 12:26, Jiri Pirko <[email protected]> wrote:
>>> Wed, May 16, 2018 at 01:55:06PM CEST, [email protected] wrote:
>>>>
>>>>On Wed 16 May 2018 at 09:59, Jiri Pirko <[email protected]> wrote:
>>>>> Mon, May 14, 2018 at 04:27:13PM CEST, [email protected] wrote:
>>>>>>Retry check-insert sequence in action init functions if action with same
>>>>>>index was inserted concurrently.
>>>>>>
>>>>>>Signed-off-by: Vlad Buslov <[email protected]>
>>>>>>---
>>>>>> net/sched/act_bpf.c | 8 +++++++-
>>>>>> net/sched/act_connmark.c | 8 +++++++-
>>>>>> net/sched/act_csum.c | 8 +++++++-
>>>>>> net/sched/act_gact.c | 8 +++++++-
>>>>>> net/sched/act_ife.c | 8 +++++++-
>>>>>> net/sched/act_ipt.c | 8 +++++++-
>>>>>> net/sched/act_mirred.c | 8 +++++++-
>>>>>> net/sched/act_nat.c | 8 +++++++-
>>>>>> net/sched/act_pedit.c | 8 +++++++-
>>>>>> net/sched/act_police.c | 9 ++++++++-
>>>>>> net/sched/act_sample.c | 8 +++++++-
>>>>>> net/sched/act_simple.c | 9 ++++++++-
>>>>>> net/sched/act_skbedit.c | 8 +++++++-
>>>>>> net/sched/act_skbmod.c | 8 +++++++-
>>>>>> net/sched/act_tunnel_key.c | 9 ++++++++-
>>>>>> net/sched/act_vlan.c | 9 ++++++++-
>>>>>> 16 files changed, 116 insertions(+), 16 deletions(-)
>>>>>>
>>>>>>diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
>>>>>>index 5554bf7..7e20fdc 100644
>>>>>>--- a/net/sched/act_bpf.c
>>>>>>+++ b/net/sched/act_bpf.c
>>>>>>@@ -299,10 +299,16 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
>>>>>>
>>>>>> parm = nla_data(tb[TCA_ACT_BPF_PARMS]);
>>>>>>
>>>>>>+replay:
>>>>>> if (!tcf_idr_check(tn, parm->index, act, bind)) {
>>>>>> ret = tcf_idr_create(tn, parm->index, est, act,
>>>>>> &act_bpf_ops, bind, true);
>>>>>>- if (ret < 0)
>>>>>>+ /* Action with specified index was created concurrently.
>>>>>>+ * Check again.
>>>>>>+ */
>>>>>>+ if (parm->index && ret == -ENOSPC)
>>>>>>+ goto replay;
>>>>>>+ else if (ret)
>>>>>
>>>>> Hmm, looks like you are doing the same/very similar thing in every act
>>>>> code. I think it would make sense to introduce a helper function for
>>>>> this purpose.
>>>>
>>>>This code uses goto so it can't be easily refactored into standalone
>>>>function. Could you specify which part of this code you suggest to
>>>>extract?
>>>
>>> Hmm, looking at the code, I think that what would help is to have a
>>> helper that would atomically check if index exists and if not, it would
>>> allocate one. Something like:
>>>
>>>
>>> int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index,
>>> struct tc_action **a, int bind)
>>> {
>>> struct tcf_idrinfo *idrinfo = tn->idrinfo;
>>> struct tc_action *p;
>>> int err;
>>>
>>> spin_lock(&idrinfo->lock);
>>> if (*index) {
>>> p = idr_find(&idrinfo->action_idr, *index);
>>> if (p) {
>>> if (bind)
>>> p->tcfa_bindcnt++;
>>> p->tcfa_refcnt++;
>>> *a = p;
>>> err = 0;
>>> } else {
>>> *a = NULL;
>>> err = idr_alloc_u32(idr, NULL, index,
>>> *index, GFP_ATOMIC);
>>> }
>>> } else {
>>> *index = 1;
>>> *a = NULL;
>>> err = idr_alloc_u32(idr, NULL, index, UINT_MAX, GFP_ATOMIC);
>>> }
>>> spin_unlock(&idrinfo->lock);
>>> return err;
>>> }
>>>
>>> The act code would just check if "a" is NULL and if so, it would call
>>> tcf_idr_create() with allocated index as arg.
>>
>>What about multiple actions that have arbitrary code between initial
>>check and idr allocation that is currently inside tcf_idr_create()?
>
> Why it would be a problem to have them after the allocation?
Lets look at mirred for exmple:
exists = tcf_idr_check(tn, parm->index, a, bind);
if (exists && bind)
return 0;
switch (parm->eaction) {
case TCA_EGRESS_MIRROR:
case TCA_EGRESS_REDIR:
case TCA_INGRESS_REDIR:
case TCA_INGRESS_MIRROR:
break;
default:
if (exists)
tcf_idr_release(*a, bind);
NL_SET_ERR_MSG_MOD(extack, "Unknown mirred option");
return -EINVAL;
}
if (parm->ifindex) {
dev = __dev_get_by_index(net, parm->ifindex);
if (dev == NULL) {
if (exists)
tcf_idr_release(*a, bind);
return -ENODEV;
}
mac_header_xmit = dev_is_mac_header_xmit(dev);
} else {
dev = NULL;
}
if (!exists) {
if (!dev) {
NL_SET_ERR_MSG_MOD(extack, "Specified device does not exist");
return -EINVAL;
}
ret = tcf_idr_create(tn, parm->index, est, a,
&act_mirred_ops, bind, true);
/* Action with specified index was created concurrently.
* Check again.
*/
if (parm->index && ret == -ENOSPC)
goto replay;
else if (ret)
return ret;
There are several returns and cleanup is only performed when action
exists. So all code like that will have to be audited to also remove
index from idr, otherwise idr handles leak on return.
>
> There is one issue though with my draft. tcf_idr_insert() function
> which actually assigns a "p" pointer to the idr index is called later on.
> Until that happens, the idr_find() would return NULL even if the index
> is actually allocated. We cannot assign "p" in tcf_idr_check_alloc()
> because it is allocated only later on in tcf_idr_create(). But that is
> resolvable by the following trick:
>
> int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index,
> struct tc_action **a, int bind)
> {
> struct tcf_idrinfo *idrinfo = tn->idrinfo;
> struct tc_action *p;
> int err;
>
> again:
> spin_lock(&idrinfo->lock);
> if (*index) {
> p = idr_find(&idrinfo->action_idr, *index);
> if (IS_ERR(p)) {
> /* This means that another process allocated
> * index but did not assign the pointer yet.
> */
> spin_unlock(&idrinfo->lock);
> goto again;
> }
> if (p) {
> if (bind)
> p->tcfa_bindcnt++;
> p->tcfa_refcnt++;
> *a = p;
> err = 0;
> } else {
> *a = NULL;
> err = idr_alloc_u32(idr, NULL, index,
> *index, GFP_ATOMIC);
> idr_replace(&idrinfo->action_idr,
> ERR_PTR(-EBUSY), *index);
> }
> } else {
> *index = 1;
> *a = NULL;
> err = idr_alloc_u32(idr, NULL, index, UINT_MAX, GFP_ATOMIC);
> idr_replace(&idrinfo->action_idr, ERR_PTR(-EBUSY), *index);
> }
> spin_unlock(&idrinfo->lock);
> return err;
> }
>
So users of action idr that might perform concurrent lookups are also
have to be changed to check for error pointers, that now can be inserted
into idr? Seems like a complex change...
>
>
>
>
>>
>>>
>>>
>>>>
>>>>>
>>>>> [...]
>>>>
>>
Wed, May 16, 2018 at 03:52:20PM CEST, [email protected] wrote:
>
>On Wed 16 May 2018 at 13:21, Jiri Pirko <[email protected]> wrote:
>> Wed, May 16, 2018 at 02:43:58PM CEST, [email protected] wrote:
>>>
>>>On Wed 16 May 2018 at 12:26, Jiri Pirko <[email protected]> wrote:
>>>> Wed, May 16, 2018 at 01:55:06PM CEST, [email protected] wrote:
>>>>>
>>>>>On Wed 16 May 2018 at 09:59, Jiri Pirko <[email protected]> wrote:
>>>>>> Mon, May 14, 2018 at 04:27:13PM CEST, [email protected] wrote:
>>>>>>>Retry check-insert sequence in action init functions if action with same
>>>>>>>index was inserted concurrently.
>>>>>>>
>>>>>>>Signed-off-by: Vlad Buslov <[email protected]>
>>>>>>>---
>>>>>>> net/sched/act_bpf.c | 8 +++++++-
>>>>>>> net/sched/act_connmark.c | 8 +++++++-
>>>>>>> net/sched/act_csum.c | 8 +++++++-
>>>>>>> net/sched/act_gact.c | 8 +++++++-
>>>>>>> net/sched/act_ife.c | 8 +++++++-
>>>>>>> net/sched/act_ipt.c | 8 +++++++-
>>>>>>> net/sched/act_mirred.c | 8 +++++++-
>>>>>>> net/sched/act_nat.c | 8 +++++++-
>>>>>>> net/sched/act_pedit.c | 8 +++++++-
>>>>>>> net/sched/act_police.c | 9 ++++++++-
>>>>>>> net/sched/act_sample.c | 8 +++++++-
>>>>>>> net/sched/act_simple.c | 9 ++++++++-
>>>>>>> net/sched/act_skbedit.c | 8 +++++++-
>>>>>>> net/sched/act_skbmod.c | 8 +++++++-
>>>>>>> net/sched/act_tunnel_key.c | 9 ++++++++-
>>>>>>> net/sched/act_vlan.c | 9 ++++++++-
>>>>>>> 16 files changed, 116 insertions(+), 16 deletions(-)
>>>>>>>
>>>>>>>diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
>>>>>>>index 5554bf7..7e20fdc 100644
>>>>>>>--- a/net/sched/act_bpf.c
>>>>>>>+++ b/net/sched/act_bpf.c
>>>>>>>@@ -299,10 +299,16 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
>>>>>>>
>>>>>>> parm = nla_data(tb[TCA_ACT_BPF_PARMS]);
>>>>>>>
>>>>>>>+replay:
>>>>>>> if (!tcf_idr_check(tn, parm->index, act, bind)) {
>>>>>>> ret = tcf_idr_create(tn, parm->index, est, act,
>>>>>>> &act_bpf_ops, bind, true);
>>>>>>>- if (ret < 0)
>>>>>>>+ /* Action with specified index was created concurrently.
>>>>>>>+ * Check again.
>>>>>>>+ */
>>>>>>>+ if (parm->index && ret == -ENOSPC)
>>>>>>>+ goto replay;
>>>>>>>+ else if (ret)
>>>>>>
>>>>>> Hmm, looks like you are doing the same/very similar thing in every act
>>>>>> code. I think it would make sense to introduce a helper function for
>>>>>> this purpose.
>>>>>
>>>>>This code uses goto so it can't be easily refactored into standalone
>>>>>function. Could you specify which part of this code you suggest to
>>>>>extract?
>>>>
>>>> Hmm, looking at the code, I think that what would help is to have a
>>>> helper that would atomically check if index exists and if not, it would
>>>> allocate one. Something like:
>>>>
>>>>
>>>> int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index,
>>>> struct tc_action **a, int bind)
>>>> {
>>>> struct tcf_idrinfo *idrinfo = tn->idrinfo;
>>>> struct tc_action *p;
>>>> int err;
>>>>
>>>> spin_lock(&idrinfo->lock);
>>>> if (*index) {
>>>> p = idr_find(&idrinfo->action_idr, *index);
>>>> if (p) {
>>>> if (bind)
>>>> p->tcfa_bindcnt++;
>>>> p->tcfa_refcnt++;
>>>> *a = p;
>>>> err = 0;
>>>> } else {
>>>> *a = NULL;
>>>> err = idr_alloc_u32(idr, NULL, index,
>>>> *index, GFP_ATOMIC);
>>>> }
>>>> } else {
>>>> *index = 1;
>>>> *a = NULL;
>>>> err = idr_alloc_u32(idr, NULL, index, UINT_MAX, GFP_ATOMIC);
>>>> }
>>>> spin_unlock(&idrinfo->lock);
>>>> return err;
>>>> }
>>>>
>>>> The act code would just check if "a" is NULL and if so, it would call
>>>> tcf_idr_create() with allocated index as arg.
>>>
>>>What about multiple actions that have arbitrary code between initial
>>>check and idr allocation that is currently inside tcf_idr_create()?
>>
>> Why it would be a problem to have them after the allocation?
>
>Lets look at mirred for exmple:
> exists = tcf_idr_check(tn, parm->index, a, bind);
> if (exists && bind)
> return 0;
>
> switch (parm->eaction) {
> case TCA_EGRESS_MIRROR:
> case TCA_EGRESS_REDIR:
> case TCA_INGRESS_REDIR:
> case TCA_INGRESS_MIRROR:
> break;
> default:
> if (exists)
> tcf_idr_release(*a, bind);
> NL_SET_ERR_MSG_MOD(extack, "Unknown mirred option");
> return -EINVAL;
> }
> if (parm->ifindex) {
> dev = __dev_get_by_index(net, parm->ifindex);
> if (dev == NULL) {
> if (exists)
> tcf_idr_release(*a, bind);
> return -ENODEV;
> }
> mac_header_xmit = dev_is_mac_header_xmit(dev);
> } else {
> dev = NULL;
> }
>
> if (!exists) {
> if (!dev) {
> NL_SET_ERR_MSG_MOD(extack, "Specified device does not exist");
> return -EINVAL;
> }
> ret = tcf_idr_create(tn, parm->index, est, a,
> &act_mirred_ops, bind, true);
> /* Action with specified index was created concurrently.
> * Check again.
> */
> if (parm->index && ret == -ENOSPC)
> goto replay;
> else if (ret)
> return ret;
>
>There are several returns and cleanup is only performed when action
>exists. So all code like that will have to be audited to also remove
>index from idr, otherwise idr handles leak on return.
Yeah. You have to take care of the error path.
>
>>
>> There is one issue though with my draft. tcf_idr_insert() function
>> which actually assigns a "p" pointer to the idr index is called later on.
>> Until that happens, the idr_find() would return NULL even if the index
>> is actually allocated. We cannot assign "p" in tcf_idr_check_alloc()
>> because it is allocated only later on in tcf_idr_create(). But that is
>> resolvable by the following trick:
>>
>> int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index,
>> struct tc_action **a, int bind)
>> {
>> struct tcf_idrinfo *idrinfo = tn->idrinfo;
>> struct tc_action *p;
>> int err;
>>
>> again:
>> spin_lock(&idrinfo->lock);
>> if (*index) {
>> p = idr_find(&idrinfo->action_idr, *index);
>> if (IS_ERR(p)) {
>> /* This means that another process allocated
>> * index but did not assign the pointer yet.
>> */
>> spin_unlock(&idrinfo->lock);
>> goto again;
>> }
>> if (p) {
>> if (bind)
>> p->tcfa_bindcnt++;
>> p->tcfa_refcnt++;
>> *a = p;
>> err = 0;
>> } else {
>> *a = NULL;
>> err = idr_alloc_u32(idr, NULL, index,
>> *index, GFP_ATOMIC);
>> idr_replace(&idrinfo->action_idr,
>> ERR_PTR(-EBUSY), *index);
>> }
>> } else {
>> *index = 1;
>> *a = NULL;
>> err = idr_alloc_u32(idr, NULL, index, UINT_MAX, GFP_ATOMIC);
>> idr_replace(&idrinfo->action_idr, ERR_PTR(-EBUSY), *index);
>> }
>> spin_unlock(&idrinfo->lock);
>> return err;
>> }
>>
>
>So users of action idr that might perform concurrent lookups are also
>have to be changed to check for error pointers, that now can be inserted
>into idr? Seems like a complex change...
You can just add a simple check into tcf_idr_lookup(). Where else?
>
>>
>>
>>
>>
>>>
>>>>
>>>>
>>>>>
>>>>>>
>>>>>> [...]
>>>>>
>>>
>
On Wed 16 May 2018 at 14:13, Jiri Pirko <[email protected]> wrote:
> Wed, May 16, 2018 at 03:52:20PM CEST, [email protected] wrote:
>>
>>On Wed 16 May 2018 at 13:21, Jiri Pirko <[email protected]> wrote:
>>> Wed, May 16, 2018 at 02:43:58PM CEST, [email protected] wrote:
>>>>
>>>>On Wed 16 May 2018 at 12:26, Jiri Pirko <[email protected]> wrote:
>>>>> Wed, May 16, 2018 at 01:55:06PM CEST, [email protected] wrote:
>>>>>>
>>>>>>On Wed 16 May 2018 at 09:59, Jiri Pirko <[email protected]> wrote:
>>>>>>> Mon, May 14, 2018 at 04:27:13PM CEST, [email protected] wrote:
>>>>>>>>Retry check-insert sequence in action init functions if action with same
>>>>>>>>index was inserted concurrently.
>>>>>>>>
>>>>>>>>Signed-off-by: Vlad Buslov <[email protected]>
>>>>>>>>---
>>>>>>>> net/sched/act_bpf.c | 8 +++++++-
>>>>>>>> net/sched/act_connmark.c | 8 +++++++-
>>>>>>>> net/sched/act_csum.c | 8 +++++++-
>>>>>>>> net/sched/act_gact.c | 8 +++++++-
>>>>>>>> net/sched/act_ife.c | 8 +++++++-
>>>>>>>> net/sched/act_ipt.c | 8 +++++++-
>>>>>>>> net/sched/act_mirred.c | 8 +++++++-
>>>>>>>> net/sched/act_nat.c | 8 +++++++-
>>>>>>>> net/sched/act_pedit.c | 8 +++++++-
>>>>>>>> net/sched/act_police.c | 9 ++++++++-
>>>>>>>> net/sched/act_sample.c | 8 +++++++-
>>>>>>>> net/sched/act_simple.c | 9 ++++++++-
>>>>>>>> net/sched/act_skbedit.c | 8 +++++++-
>>>>>>>> net/sched/act_skbmod.c | 8 +++++++-
>>>>>>>> net/sched/act_tunnel_key.c | 9 ++++++++-
>>>>>>>> net/sched/act_vlan.c | 9 ++++++++-
>>>>>>>> 16 files changed, 116 insertions(+), 16 deletions(-)
>>>>>>>>
>>>>>>>>diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
>>>>>>>>index 5554bf7..7e20fdc 100644
>>>>>>>>--- a/net/sched/act_bpf.c
>>>>>>>>+++ b/net/sched/act_bpf.c
>>>>>>>>@@ -299,10 +299,16 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
>>>>>>>>
>>>>>>>> parm = nla_data(tb[TCA_ACT_BPF_PARMS]);
>>>>>>>>
>>>>>>>>+replay:
>>>>>>>> if (!tcf_idr_check(tn, parm->index, act, bind)) {
>>>>>>>> ret = tcf_idr_create(tn, parm->index, est, act,
>>>>>>>> &act_bpf_ops, bind, true);
>>>>>>>>- if (ret < 0)
>>>>>>>>+ /* Action with specified index was created concurrently.
>>>>>>>>+ * Check again.
>>>>>>>>+ */
>>>>>>>>+ if (parm->index && ret == -ENOSPC)
>>>>>>>>+ goto replay;
>>>>>>>>+ else if (ret)
>>>>>>>
>>>>>>> Hmm, looks like you are doing the same/very similar thing in every act
>>>>>>> code. I think it would make sense to introduce a helper function for
>>>>>>> this purpose.
>>>>>>
>>>>>>This code uses goto so it can't be easily refactored into standalone
>>>>>>function. Could you specify which part of this code you suggest to
>>>>>>extract?
>>>>>
>>>>> Hmm, looking at the code, I think that what would help is to have a
>>>>> helper that would atomically check if index exists and if not, it would
>>>>> allocate one. Something like:
>>>>>
>>>>>
>>>>> int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index,
>>>>> struct tc_action **a, int bind)
>>>>> {
>>>>> struct tcf_idrinfo *idrinfo = tn->idrinfo;
>>>>> struct tc_action *p;
>>>>> int err;
>>>>>
>>>>> spin_lock(&idrinfo->lock);
>>>>> if (*index) {
>>>>> p = idr_find(&idrinfo->action_idr, *index);
>>>>> if (p) {
>>>>> if (bind)
>>>>> p->tcfa_bindcnt++;
>>>>> p->tcfa_refcnt++;
>>>>> *a = p;
>>>>> err = 0;
>>>>> } else {
>>>>> *a = NULL;
>>>>> err = idr_alloc_u32(idr, NULL, index,
>>>>> *index, GFP_ATOMIC);
>>>>> }
>>>>> } else {
>>>>> *index = 1;
>>>>> *a = NULL;
>>>>> err = idr_alloc_u32(idr, NULL, index, UINT_MAX, GFP_ATOMIC);
>>>>> }
>>>>> spin_unlock(&idrinfo->lock);
>>>>> return err;
>>>>> }
>>>>>
>>>>> The act code would just check if "a" is NULL and if so, it would call
>>>>> tcf_idr_create() with allocated index as arg.
>>>>
>>>>What about multiple actions that have arbitrary code between initial
>>>>check and idr allocation that is currently inside tcf_idr_create()?
>>>
>>> Why it would be a problem to have them after the allocation?
>>
>>Lets look at mirred for exmple:
>> exists = tcf_idr_check(tn, parm->index, a, bind);
>> if (exists && bind)
>> return 0;
>>
>> switch (parm->eaction) {
>> case TCA_EGRESS_MIRROR:
>> case TCA_EGRESS_REDIR:
>> case TCA_INGRESS_REDIR:
>> case TCA_INGRESS_MIRROR:
>> break;
>> default:
>> if (exists)
>> tcf_idr_release(*a, bind);
>> NL_SET_ERR_MSG_MOD(extack, "Unknown mirred option");
>> return -EINVAL;
>> }
>> if (parm->ifindex) {
>> dev = __dev_get_by_index(net, parm->ifindex);
>> if (dev == NULL) {
>> if (exists)
>> tcf_idr_release(*a, bind);
>> return -ENODEV;
>> }
>> mac_header_xmit = dev_is_mac_header_xmit(dev);
>> } else {
>> dev = NULL;
>> }
>>
>> if (!exists) {
>> if (!dev) {
>> NL_SET_ERR_MSG_MOD(extack, "Specified device does not exist");
>> return -EINVAL;
>> }
>> ret = tcf_idr_create(tn, parm->index, est, a,
>> &act_mirred_ops, bind, true);
>> /* Action with specified index was created concurrently.
>> * Check again.
>> */
>> if (parm->index && ret == -ENOSPC)
>> goto replay;
>> else if (ret)
>> return ret;
>>
>>There are several returns and cleanup is only performed when action
>>exists. So all code like that will have to be audited to also remove
>>index from idr, otherwise idr handles leak on return.
>
> Yeah. You have to take care of the error path.
>
>
>>
>>>
>>> There is one issue though with my draft. tcf_idr_insert() function
>>> which actually assigns a "p" pointer to the idr index is called later on.
>>> Until that happens, the idr_find() would return NULL even if the index
>>> is actually allocated. We cannot assign "p" in tcf_idr_check_alloc()
>>> because it is allocated only later on in tcf_idr_create(). But that is
>>> resolvable by the following trick:
>>>
>>> int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index,
>>> struct tc_action **a, int bind)
>>> {
>>> struct tcf_idrinfo *idrinfo = tn->idrinfo;
>>> struct tc_action *p;
>>> int err;
>>>
>>> again:
>>> spin_lock(&idrinfo->lock);
>>> if (*index) {
>>> p = idr_find(&idrinfo->action_idr, *index);
>>> if (IS_ERR(p)) {
>>> /* This means that another process allocated
>>> * index but did not assign the pointer yet.
>>> */
>>> spin_unlock(&idrinfo->lock);
>>> goto again;
>>> }
>>> if (p) {
>>> if (bind)
>>> p->tcfa_bindcnt++;
>>> p->tcfa_refcnt++;
>>> *a = p;
>>> err = 0;
>>> } else {
>>> *a = NULL;
>>> err = idr_alloc_u32(idr, NULL, index,
>>> *index, GFP_ATOMIC);
>>> idr_replace(&idrinfo->action_idr,
>>> ERR_PTR(-EBUSY), *index);
>>> }
>>> } else {
>>> *index = 1;
>>> *a = NULL;
>>> err = idr_alloc_u32(idr, NULL, index, UINT_MAX, GFP_ATOMIC);
>>> idr_replace(&idrinfo->action_idr, ERR_PTR(-EBUSY), *index);
>>> }
>>> spin_unlock(&idrinfo->lock);
>>> return err;
>>> }
>>>
>>
>>So users of action idr that might perform concurrent lookups are also
>>have to be changed to check for error pointers, that now can be inserted
>>into idr? Seems like a complex change...
>
> You can just add a simple check into tcf_idr_lookup(). Where else?
>
To me it looks like we take something simple and already working, and
make it complex to save few lines of code in action init...
Anyway, how should I do patch split for this?
Patch to implement function you outlined and another one to modify all
actions to use it(with all refactoring to not leak references)? Or patch
per action is better approach?
Wed, May 16, 2018 at 04:26:40PM CEST, [email protected] wrote:
>
>On Wed 16 May 2018 at 14:13, Jiri Pirko <[email protected]> wrote:
>> Wed, May 16, 2018 at 03:52:20PM CEST, [email protected] wrote:
>>>
>>>On Wed 16 May 2018 at 13:21, Jiri Pirko <[email protected]> wrote:
>>>> Wed, May 16, 2018 at 02:43:58PM CEST, [email protected] wrote:
>>>>>
>>>>>On Wed 16 May 2018 at 12:26, Jiri Pirko <[email protected]> wrote:
>>>>>> Wed, May 16, 2018 at 01:55:06PM CEST, [email protected] wrote:
>>>>>>>
>>>>>>>On Wed 16 May 2018 at 09:59, Jiri Pirko <[email protected]> wrote:
>>>>>>>> Mon, May 14, 2018 at 04:27:13PM CEST, [email protected] wrote:
>>>>>>>>>Retry check-insert sequence in action init functions if action with same
>>>>>>>>>index was inserted concurrently.
>>>>>>>>>
>>>>>>>>>Signed-off-by: Vlad Buslov <[email protected]>
>>>>>>>>>---
>>>>>>>>> net/sched/act_bpf.c | 8 +++++++-
>>>>>>>>> net/sched/act_connmark.c | 8 +++++++-
>>>>>>>>> net/sched/act_csum.c | 8 +++++++-
>>>>>>>>> net/sched/act_gact.c | 8 +++++++-
>>>>>>>>> net/sched/act_ife.c | 8 +++++++-
>>>>>>>>> net/sched/act_ipt.c | 8 +++++++-
>>>>>>>>> net/sched/act_mirred.c | 8 +++++++-
>>>>>>>>> net/sched/act_nat.c | 8 +++++++-
>>>>>>>>> net/sched/act_pedit.c | 8 +++++++-
>>>>>>>>> net/sched/act_police.c | 9 ++++++++-
>>>>>>>>> net/sched/act_sample.c | 8 +++++++-
>>>>>>>>> net/sched/act_simple.c | 9 ++++++++-
>>>>>>>>> net/sched/act_skbedit.c | 8 +++++++-
>>>>>>>>> net/sched/act_skbmod.c | 8 +++++++-
>>>>>>>>> net/sched/act_tunnel_key.c | 9 ++++++++-
>>>>>>>>> net/sched/act_vlan.c | 9 ++++++++-
>>>>>>>>> 16 files changed, 116 insertions(+), 16 deletions(-)
>>>>>>>>>
>>>>>>>>>diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
>>>>>>>>>index 5554bf7..7e20fdc 100644
>>>>>>>>>--- a/net/sched/act_bpf.c
>>>>>>>>>+++ b/net/sched/act_bpf.c
>>>>>>>>>@@ -299,10 +299,16 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
>>>>>>>>>
>>>>>>>>> parm = nla_data(tb[TCA_ACT_BPF_PARMS]);
>>>>>>>>>
>>>>>>>>>+replay:
>>>>>>>>> if (!tcf_idr_check(tn, parm->index, act, bind)) {
>>>>>>>>> ret = tcf_idr_create(tn, parm->index, est, act,
>>>>>>>>> &act_bpf_ops, bind, true);
>>>>>>>>>- if (ret < 0)
>>>>>>>>>+ /* Action with specified index was created concurrently.
>>>>>>>>>+ * Check again.
>>>>>>>>>+ */
>>>>>>>>>+ if (parm->index && ret == -ENOSPC)
>>>>>>>>>+ goto replay;
>>>>>>>>>+ else if (ret)
>>>>>>>>
>>>>>>>> Hmm, looks like you are doing the same/very similar thing in every act
>>>>>>>> code. I think it would make sense to introduce a helper function for
>>>>>>>> this purpose.
>>>>>>>
>>>>>>>This code uses goto so it can't be easily refactored into standalone
>>>>>>>function. Could you specify which part of this code you suggest to
>>>>>>>extract?
>>>>>>
>>>>>> Hmm, looking at the code, I think that what would help is to have a
>>>>>> helper that would atomically check if index exists and if not, it would
>>>>>> allocate one. Something like:
>>>>>>
>>>>>>
>>>>>> int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index,
>>>>>> struct tc_action **a, int bind)
>>>>>> {
>>>>>> struct tcf_idrinfo *idrinfo = tn->idrinfo;
>>>>>> struct tc_action *p;
>>>>>> int err;
>>>>>>
>>>>>> spin_lock(&idrinfo->lock);
>>>>>> if (*index) {
>>>>>> p = idr_find(&idrinfo->action_idr, *index);
>>>>>> if (p) {
>>>>>> if (bind)
>>>>>> p->tcfa_bindcnt++;
>>>>>> p->tcfa_refcnt++;
>>>>>> *a = p;
>>>>>> err = 0;
>>>>>> } else {
>>>>>> *a = NULL;
>>>>>> err = idr_alloc_u32(idr, NULL, index,
>>>>>> *index, GFP_ATOMIC);
>>>>>> }
>>>>>> } else {
>>>>>> *index = 1;
>>>>>> *a = NULL;
>>>>>> err = idr_alloc_u32(idr, NULL, index, UINT_MAX, GFP_ATOMIC);
>>>>>> }
>>>>>> spin_unlock(&idrinfo->lock);
>>>>>> return err;
>>>>>> }
>>>>>>
>>>>>> The act code would just check if "a" is NULL and if so, it would call
>>>>>> tcf_idr_create() with allocated index as arg.
>>>>>
>>>>>What about multiple actions that have arbitrary code between initial
>>>>>check and idr allocation that is currently inside tcf_idr_create()?
>>>>
>>>> Why it would be a problem to have them after the allocation?
>>>
>>>Lets look at mirred for exmple:
>>> exists = tcf_idr_check(tn, parm->index, a, bind);
>>> if (exists && bind)
>>> return 0;
>>>
>>> switch (parm->eaction) {
>>> case TCA_EGRESS_MIRROR:
>>> case TCA_EGRESS_REDIR:
>>> case TCA_INGRESS_REDIR:
>>> case TCA_INGRESS_MIRROR:
>>> break;
>>> default:
>>> if (exists)
>>> tcf_idr_release(*a, bind);
>>> NL_SET_ERR_MSG_MOD(extack, "Unknown mirred option");
>>> return -EINVAL;
>>> }
>>> if (parm->ifindex) {
>>> dev = __dev_get_by_index(net, parm->ifindex);
>>> if (dev == NULL) {
>>> if (exists)
>>> tcf_idr_release(*a, bind);
>>> return -ENODEV;
>>> }
>>> mac_header_xmit = dev_is_mac_header_xmit(dev);
>>> } else {
>>> dev = NULL;
>>> }
>>>
>>> if (!exists) {
>>> if (!dev) {
>>> NL_SET_ERR_MSG_MOD(extack, "Specified device does not exist");
>>> return -EINVAL;
>>> }
>>> ret = tcf_idr_create(tn, parm->index, est, a,
>>> &act_mirred_ops, bind, true);
>>> /* Action with specified index was created concurrently.
>>> * Check again.
>>> */
>>> if (parm->index && ret == -ENOSPC)
>>> goto replay;
>>> else if (ret)
>>> return ret;
>>>
>>>There are several returns and cleanup is only performed when action
>>>exists. So all code like that will have to be audited to also remove
>>>index from idr, otherwise idr handles leak on return.
>>
>> Yeah. You have to take care of the error path.
>>
>>
>>>
>>>>
>>>> There is one issue though with my draft. tcf_idr_insert() function
>>>> which actually assigns a "p" pointer to the idr index is called later on.
>>>> Until that happens, the idr_find() would return NULL even if the index
>>>> is actually allocated. We cannot assign "p" in tcf_idr_check_alloc()
>>>> because it is allocated only later on in tcf_idr_create(). But that is
>>>> resolvable by the following trick:
>>>>
>>>> int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index,
>>>> struct tc_action **a, int bind)
>>>> {
>>>> struct tcf_idrinfo *idrinfo = tn->idrinfo;
>>>> struct tc_action *p;
>>>> int err;
>>>>
>>>> again:
>>>> spin_lock(&idrinfo->lock);
>>>> if (*index) {
>>>> p = idr_find(&idrinfo->action_idr, *index);
>>>> if (IS_ERR(p)) {
>>>> /* This means that another process allocated
>>>> * index but did not assign the pointer yet.
>>>> */
>>>> spin_unlock(&idrinfo->lock);
>>>> goto again;
>>>> }
>>>> if (p) {
>>>> if (bind)
>>>> p->tcfa_bindcnt++;
>>>> p->tcfa_refcnt++;
>>>> *a = p;
>>>> err = 0;
>>>> } else {
>>>> *a = NULL;
>>>> err = idr_alloc_u32(idr, NULL, index,
>>>> *index, GFP_ATOMIC);
>>>> idr_replace(&idrinfo->action_idr,
>>>> ERR_PTR(-EBUSY), *index);
>>>> }
>>>> } else {
>>>> *index = 1;
>>>> *a = NULL;
>>>> err = idr_alloc_u32(idr, NULL, index, UINT_MAX, GFP_ATOMIC);
>>>> idr_replace(&idrinfo->action_idr, ERR_PTR(-EBUSY), *index);
>>>> }
>>>> spin_unlock(&idrinfo->lock);
>>>> return err;
>>>> }
>>>>
>>>
>>>So users of action idr that might perform concurrent lookups are also
>>>have to be changed to check for error pointers, that now can be inserted
>>>into idr? Seems like a complex change...
>>
>> You can just add a simple check into tcf_idr_lookup(). Where else?
>>
>
>To me it looks like we take something simple and already working, and
>make it complex to save few lines of code in action init...
Well it is not only about saving lines of code but also to avoid
duplications of code. And duplications lead to bugs. That's why I
believe that helper here is needed.
>
>Anyway, how should I do patch split for this?
>Patch to implement function you outlined and another one to modify all
>actions to use it(with all refactoring to not leak references)? Or patch
>per action is better approach?
If you do it per-action, you would end up with many patches. It is not
needed I believe.
On Mon, May 14, 2018 at 05:27:03PM +0300, Vlad Buslov wrote:
> Change type of action reference counter to refcount_t.
>
> Change type of action bind counter to atomic_t.
> This type is used to allow decrementing bind counter without testing
> for 0 result.
... and in what does not testing for 0 result helps?
Marcelo
On Mon, May 14, 2018 at 05:16:41PM +0200, Jiri Pirko wrote:
> Mon, May 14, 2018 at 04:27:05PM CEST, [email protected] wrote:
> >Add additional 'unlocked' argument to act API init functions.
> >Argument is true when rtnl lock is not taken and false otherwise.
> >It is required to implement actions that need to release rtnl lock before
> >loading kernel module and reacquire if afterwards.
> >
> >Signed-off-by: Vlad Buslov <[email protected]>
>
> [...]
>
>
> >@@ -721,9 +722,11 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
> > a_o = tc_lookup_action_n(act_name);
> > if (a_o == NULL) {
> > #ifdef CONFIG_MODULES
> >- rtnl_unlock();
> >+ if (!unlocked)
> >+ rtnl_unlock();
> > request_module("act_%s", act_name);
> >- rtnl_lock();
> >+ if (!unlocked)
> >+ rtnl_lock();
>
> Although I don't like this conditional locking scheme, I see no other
> way to solve this :/ But I think would be better perhaps to rename
> "unlocked" to something like "rtnl_held".
Agreed. "rtnl_held" also removes the double negation, "!un...".
On Mon, May 14, 2018 at 05:27:07PM +0300, Vlad Buslov wrote:
...
> @@ -1052,6 +1088,36 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
> return err;
> }
>
> +static int tcf_action_delete(struct net *net, struct list_head *actions,
> + struct netlink_ext_ack *extack)
> +{
> + int ret;
Reverse christmass tree.. this line should be the last in variable
declarations.
> + struct tc_action *a, *tmp;
> + char kind[IFNAMSIZ];
> + u32 act_index;
> +
> + list_for_each_entry_safe(a, tmp, actions, list) {
> + const struct tc_action_ops *ops = a->ops;
> +
> + /* Actions can be deleted concurrently
> + * so we must save their type and id to search again
> + * after reference is released.
> + */
> + strncpy(kind, a->ops->kind, sizeof(kind) - 1);
This may be problematic. Why strncpy here?
a->ops->kind is also of size IFNAMSIZ. If a->ops->kind is actually
IFNAMSIZ-1 long, kind here won't be NULL terminated, as kind is not
initialized and strncpy won't add the NULL.
> + act_index = a->tcfa_index;
> +
> + list_del(&a->list);
> + if (tcf_action_put(a))
> + module_put(ops->owner);
> +
> + /* now do the delete */
> + ret = tcf_action_del_1(net, kind, act_index, extack);
> + if (ret < 0)
> + return ret;
> + }
> + return 0;
> +}
On Mon, May 14, 2018 at 05:27:10PM +0300, Vlad Buslov wrote:
> Return from action init function with reference to action taken,
> even when overwriting existing action.
Isn't this patch necessary before patch 7, to not break things up?
AFAICU after patchset 7 it assumes the action init function is already
behaving like this.
Please use a more meaningful patch summary. This one is too generic.
On Mon, May 14, 2018 at 05:27:11PM +0300, Vlad Buslov wrote:
...
> +int tcf_idr_find_delete(struct tc_action_net *tn, u32 index)
What about naming it tcf_idr_delete_index() instead?
The find operation is always implicit when you don't specify the
object itself directly, and then it describes which key will be used.
> +{
> + struct tcf_idrinfo *idrinfo = tn->idrinfo;
> + struct tc_action *p;
> + int ret = 0;
> +
> + spin_lock_bh(&idrinfo->lock);
> + p = idr_find(&idrinfo->action_idr, index);
> + if (!p) {
> + spin_unlock(&idrinfo->lock);
> + return -ENOENT;
> + }
> +
> + if (!atomic_read(&p->tcfa_bindcnt)) {
> + if (refcount_dec_and_test(&p->tcfa_refcnt)) {
> + struct module *owner = p->ops->owner;
> +
> + WARN_ON(p != idr_remove(&idrinfo->action_idr,
> + p->tcfa_index));
> + spin_unlock_bh(&idrinfo->lock);
> +
> + tcf_action_cleanup(p);
> + module_put(owner);
> + return 0;
> + }
> + ret = 0;
> + } else {
> + ret = -EPERM;
> + }
> +
> + spin_unlock_bh(&idrinfo->lock);
> + return ret;
> +}
> +EXPORT_SYMBOL(tcf_idr_find_delete);
...
On Mon, May 14, 2018 at 05:27:14PM +0300, Vlad Buslov wrote:
> Substitute calls to action insert function with calls to action insert
> unique function that warns if insertion overwrites index in idr.
I know this patch may be gone on V2, but a general comment, please use
the function names themselves instead of a textualized version. I.e.,
s/action insert unique/tcf_idr_insert_unique/
On Wed, May 16, 2018 at 03:21:35PM +0200, Jiri Pirko wrote:
...
> int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index,
> struct tc_action **a, int bind)
Considering we don't have any other variant of it, maybe just
"tcf_idr_alloc()" instead?
On Wed, May 16, 2018 at 12:58:38PM +0300, Vlad Buslov wrote:
>
> On Wed 16 May 2018 at 09:48, Jiri Pirko <[email protected]> wrote:
> > Mon, May 14, 2018 at 04:27:15PM CEST, [email protected] wrote:
> >>Implement delete function that is required to delete actions without
> >>holding rtnl lock. Use action API function that atomically deletes action
> >>only if it is still in action idr. This implementation prevents concurrent
> >>threads from deleting same action twice.
> >>
> >>Signed-off-by: Vlad Buslov <[email protected]>
> >>---
> >> net/sched/act_bpf.c | 8 ++++++++
> >> net/sched/act_connmark.c | 8 ++++++++
> >> net/sched/act_csum.c | 8 ++++++++
> >> net/sched/act_gact.c | 8 ++++++++
> >> net/sched/act_ife.c | 8 ++++++++
> >> net/sched/act_ipt.c | 16 ++++++++++++++++
> >> net/sched/act_mirred.c | 8 ++++++++
> >> net/sched/act_nat.c | 8 ++++++++
> >> net/sched/act_pedit.c | 8 ++++++++
> >> net/sched/act_police.c | 8 ++++++++
> >> net/sched/act_sample.c | 8 ++++++++
> >> net/sched/act_simple.c | 8 ++++++++
> >> net/sched/act_skbedit.c | 8 ++++++++
> >> net/sched/act_skbmod.c | 8 ++++++++
> >> net/sched/act_tunnel_key.c | 8 ++++++++
> >> net/sched/act_vlan.c | 8 ++++++++
> >> 16 files changed, 136 insertions(+)
> >>
> >>diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
> >>index 0bf4ecf..36f7f66 100644
> >>--- a/net/sched/act_bpf.c
> >>+++ b/net/sched/act_bpf.c
> >>@@ -394,6 +394,13 @@ static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index,
> >> return tcf_idr_search(tn, a, index);
> >> }
> >>
> >>+static int tcf_bpf_delete(struct net *net, u32 index)
> >>+{
> >>+ struct tc_action_net *tn = net_generic(net, bpf_net_id);
> >>+
> >>+ return tcf_idr_find_delete(tn, index);
> >>+}
> >>+
> >> static struct tc_action_ops act_bpf_ops __read_mostly = {
> >> .kind = "bpf",
> >> .type = TCA_ACT_BPF,
> >>@@ -404,6 +411,7 @@ static struct tc_action_ops act_bpf_ops __read_mostly = {
> >> .init = tcf_bpf_init,
> >> .walk = tcf_bpf_walker,
> >> .lookup = tcf_bpf_search,
> >>+ .delete = tcf_bpf_delete,
> >
> > I wonder, right before this patch, how the idr index got removed?
> > delete op is NULL and I didn't find anyone else to do it.
AFAICT this also means it is leaking idr's till this patch is applied.
Maybe I missed something.
> >
> > Also, after this patch, does it make sense to have following check in
> > tcf_action_del_1()?
> >
> > if (ops->delete)
> > err = ops->delete(net, index);
> >
> > Looks like ops->delete is non-null for all.
> >
> > Seems to me that you need to introduce this patch filling up the delete
> > op in all acts and only after that introduce a code that actually calls
> > it.
>
> Already moved this for V2 patchset to:
> - Add delete callback to ops and implement it for all actions in single
> patch.
> - Move this patch before delete first use.
>
> Will now remove the conditional as well.
>
> >
> > [...]
>
Sat, May 19, 2018 at 11:43:27PM CEST, [email protected] wrote:
>On Mon, May 14, 2018 at 05:27:07PM +0300, Vlad Buslov wrote:
>...
>> @@ -1052,6 +1088,36 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
>> return err;
>> }
>>
>> +static int tcf_action_delete(struct net *net, struct list_head *actions,
>> + struct netlink_ext_ack *extack)
>> +{
>> + int ret;
>
>Reverse christmass tree.. this line should be the last in variable
>declarations.
>
>> + struct tc_action *a, *tmp;
>> + char kind[IFNAMSIZ];
>> + u32 act_index;
>> +
>> + list_for_each_entry_safe(a, tmp, actions, list) {
>> + const struct tc_action_ops *ops = a->ops;
>> +
>> + /* Actions can be deleted concurrently
>> + * so we must save their type and id to search again
>> + * after reference is released.
>> + */
>> + strncpy(kind, a->ops->kind, sizeof(kind) - 1);
>
>This may be problematic. Why strncpy here?
This is not necessary if Vlad is going to hold module referece, ops
cannot disappear.
>
>a->ops->kind is also of size IFNAMSIZ. If a->ops->kind is actually
>IFNAMSIZ-1 long, kind here won't be NULL terminated, as kind is not
>initialized and strncpy won't add the NULL.
>
>> + act_index = a->tcfa_index;
>> +
>> + list_del(&a->list);
>> + if (tcf_action_put(a))
>> + module_put(ops->owner);
>> +
>> + /* now do the delete */
>> + ret = tcf_action_del_1(net, kind, act_index, extack);
>> + if (ret < 0)
>> + return ret;
>> + }
>> + return 0;
>> +}
On Sat 19 May 2018 at 21:04, Marcelo Ricardo Leitner <[email protected]> wrote:
> On Mon, May 14, 2018 at 05:27:03PM +0300, Vlad Buslov wrote:
>> Change type of action reference counter to refcount_t.
>>
>> Change type of action bind counter to atomic_t.
>> This type is used to allow decrementing bind counter without testing
>> for 0 result.
>
> ... and in what does not testing for 0 result helps?
>
> Marcelo
Atomic operations don't WARN in this case.
On Sun 20 May 2018 at 06:22, Jiri Pirko <[email protected]> wrote:
> Sat, May 19, 2018 at 11:43:27PM CEST, [email protected] wrote:
>>On Mon, May 14, 2018 at 05:27:07PM +0300, Vlad Buslov wrote:
>>...
>>> @@ -1052,6 +1088,36 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
>>> return err;
>>> }
>>>
>>> +static int tcf_action_delete(struct net *net, struct list_head *actions,
>>> + struct netlink_ext_ack *extack)
>>> +{
>>> + int ret;
>>
>>Reverse christmass tree.. this line should be the last in variable
>>declarations.
>>
>>> + struct tc_action *a, *tmp;
>>> + char kind[IFNAMSIZ];
>>> + u32 act_index;
>>> +
>>> + list_for_each_entry_safe(a, tmp, actions, list) {
>>> + const struct tc_action_ops *ops = a->ops;
>>> +
>>> + /* Actions can be deleted concurrently
>>> + * so we must save their type and id to search again
>>> + * after reference is released.
>>> + */
>>> + strncpy(kind, a->ops->kind, sizeof(kind) - 1);
>>
>>This may be problematic. Why strncpy here?
>
> This is not necessary if Vlad is going to hold module referece, ops
> cannot disappear.
Yes, I've already refactored this code to reuse ops.
>
>
>>
>>a->ops->kind is also of size IFNAMSIZ. If a->ops->kind is actually
>>IFNAMSIZ-1 long, kind here won't be NULL terminated, as kind is not
>>initialized and strncpy won't add the NULL.
>>
>>> + act_index = a->tcfa_index;
>>> +
>>> + list_del(&a->list);
>>> + if (tcf_action_put(a))
>>> + module_put(ops->owner);
>>> +
>>> + /* now do the delete */
>>> + ret = tcf_action_del_1(net, kind, act_index, extack);
>>> + if (ret < 0)
>>> + return ret;
>>> + }
>>> + return 0;
>>> +}
On Sat 19 May 2018 at 21:52, Marcelo Ricardo Leitner <[email protected]> wrote:
> On Mon, May 14, 2018 at 05:27:10PM +0300, Vlad Buslov wrote:
>> Return from action init function with reference to action taken,
>> even when overwriting existing action.
>
> Isn't this patch necessary before patch 7, to not break things up?
> AFAICU after patchset 7 it assumes the action init function is already
> behaving like this.
I have re-split these patches for V2.
On Sun, May 20, 2018 at 1:20 AM, Marcelo Ricardo Leitner
<[email protected]> wrote:
> On Mon, May 14, 2018 at 05:27:14PM +0300, Vlad Buslov wrote:
>> Substitute calls to action insert function with calls to action insert
>> unique function that warns if insertion overwrites index in idr.
>
> I know this patch may be gone on V2, but a general comment, please use
> the function names themselves instead of a textualized version. I.e.,
> s/action insert unique/tcf_idr_insert_unique/
disagree. While doing reviews I found out that if I ask the developer
to use higher
level / descriptive language and specifically to avoid putting
variable / function names in
patch titles and change logs, the quality gets ++ big time, vs if the
developer is allowed to say
net/mlx5: Changed add_vovo_bobo()
Added variable do_it_right to add_vovo_bobo(), now we are terribly good.
On Mon, May 21, 2018 at 12:13:06AM +0300, Or Gerlitz wrote:
> On Sun, May 20, 2018 at 1:20 AM, Marcelo Ricardo Leitner
> <[email protected]> wrote:
> > On Mon, May 14, 2018 at 05:27:14PM +0300, Vlad Buslov wrote:
> >> Substitute calls to action insert function with calls to action insert
> >> unique function that warns if insertion overwrites index in idr.
> >
> > I know this patch may be gone on V2, but a general comment, please use
> > the function names themselves instead of a textualized version. I.e.,
> > s/action insert unique/tcf_idr_insert_unique/
>
> disagree. While doing reviews I found out that if I ask the developer
> to use higher
> level / descriptive language and specifically to avoid putting
> variable / function names in
> patch titles and change logs, the quality gets ++ big time, vs if the
> developer is allowed to say
>
> net/mlx5: Changed add_vovo_bobo()
>
> Added variable do_it_right to add_vovo_bobo(), now we are terribly good.
In your example I agree that it is not helping and it is even allowing
such empty changelog, just as in the section I highlighted, the
descriptive language is also not helping IMHO.
I had to read it 3 times to make sure I wasn't missing a modifier word
when comparing the two functions and well, it's just saying
"Substitute calls to foo function to bar function". I don't see how
the textualized version helps in this case while, at least in this
one, I would have visually recognized the function names way faster.
Sounds like 2 bad examples for either approach.
On Mon, May 21, 2018 at 12:33 AM, Marcelo Ricardo Leitner
<[email protected]> wrote:
> On Mon, May 21, 2018 at 12:13:06AM +0300, Or Gerlitz wrote:
>> On Sun, May 20, 2018 at 1:20 AM, Marcelo Ricardo Leitner
>> <[email protected]> wrote:
>> > On Mon, May 14, 2018 at 05:27:14PM +0300, Vlad Buslov wrote:
>> >> Substitute calls to action insert function with calls to action insert
>> >> unique function that warns if insertion overwrites index in idr.
>> >
>> > I know this patch may be gone on V2, but a general comment, please use
>> > the function names themselves instead of a textualized version. I.e.,
>> > s/action insert unique/tcf_idr_insert_unique/
>>
>> disagree. While doing reviews I found out that if I ask the developer
>> to use higher
>> level / descriptive language and specifically to avoid putting
>> variable / function names in
>> patch titles and change logs, the quality gets ++ big time, vs if the
>> developer is allowed to say
>>
>> net/mlx5: Changed add_vovo_bobo()
>>
>> Added variable do_it_right to add_vovo_bobo(), now we are terribly good.
>
> In your example I agree that it is not helping and it is even allowing
> such empty changelog, just as in the section I highlighted, the
> descriptive language is also not helping IMHO.
>
> I had to read it 3 times to make sure I wasn't missing a modifier word
> when comparing the two functions and well, it's just saying
> "Substitute calls to foo function to bar function". I don't see how
> the textualized version helps in this case while, at least in this
> one, I would have visually recognized the function names way faster.
>
> Sounds like 2 bad examples for either approach.
Properly written descriptive language is maybe harder to come up with
(specifically for those of us who are not native english speakers, like me)
but is more expressive and helpful for reviews and maintenance. Lets try
to add Vlad to come up with the right higher language and not ask him to
quote function and variable named.
On Mon, May 21, 2018 at 12:40:54AM +0300, Or Gerlitz wrote:
> On Mon, May 21, 2018 at 12:33 AM, Marcelo Ricardo Leitner
> <[email protected]> wrote:
> > On Mon, May 21, 2018 at 12:13:06AM +0300, Or Gerlitz wrote:
> >> On Sun, May 20, 2018 at 1:20 AM, Marcelo Ricardo Leitner
> >> <[email protected]> wrote:
> >> > On Mon, May 14, 2018 at 05:27:14PM +0300, Vlad Buslov wrote:
> >> >> Substitute calls to action insert function with calls to action insert
> >> >> unique function that warns if insertion overwrites index in idr.
> >> >
> >> > I know this patch may be gone on V2, but a general comment, please use
> >> > the function names themselves instead of a textualized version. I.e.,
> >> > s/action insert unique/tcf_idr_insert_unique/
> >>
> >> disagree. While doing reviews I found out that if I ask the developer
> >> to use higher
> >> level / descriptive language and specifically to avoid putting
> >> variable / function names in
> >> patch titles and change logs, the quality gets ++ big time, vs if the
> >> developer is allowed to say
> >>
> >> net/mlx5: Changed add_vovo_bobo()
> >>
> >> Added variable do_it_right to add_vovo_bobo(), now we are terribly good.
> >
> > In your example I agree that it is not helping and it is even allowing
> > such empty changelog, just as in the section I highlighted, the
> > descriptive language is also not helping IMHO.
> >
> > I had to read it 3 times to make sure I wasn't missing a modifier word
> > when comparing the two functions and well, it's just saying
> > "Substitute calls to foo function to bar function". I don't see how
> > the textualized version helps in this case while, at least in this
> > one, I would have visually recognized the function names way faster.
> >
> > Sounds like 2 bad examples for either approach.
>
> Properly written descriptive language is maybe harder to come up with
> (specifically for those of us who are not native english speakers, like me)
> but is more expressive and helpful for reviews and maintenance. Lets try
> to add Vlad to come up with the right higher language and not ask him to
> quote function and variable named.
Alright.
On Mon, May 14, 2018 at 7:27 AM, Vlad Buslov <[email protected]> wrote:
> Currently, all netlink protocol handlers for updating rules, actions and
> qdiscs are protected with single global rtnl lock which removes any
> possibility for parallelism. This patch set is a first step to remove
> rtnl lock dependency from TC rules update path. It updates act API to
> use atomic operations, rcu and spinlocks for fine-grained locking. It
> also extend API with functions that are needed to update existing
> actions for parallel execution.
Can you give a summary here for what and how it is achieved?
You said this is the first step, what do you want to achieve in this
very first step? And how do you achieve it? Do you break the RTNL
lock down to, for a quick example, a per-device lock? Or perhaps you
completely remove it because of what reason?
I go through all the descriptions of your 14 patches (but not any code),
I still have no clue how you successfully avoid RTNL. Please don't
let me read into your code to understand that, there must be some
high-level justification on how it works. Without it, I don't event want
to read into the code.
Thanks.
On Thu 24 May 2018 at 23:34, Cong Wang <[email protected]> wrote:
> On Mon, May 14, 2018 at 7:27 AM, Vlad Buslov <[email protected]> wrote:
>> Currently, all netlink protocol handlers for updating rules, actions and
>> qdiscs are protected with single global rtnl lock which removes any
>> possibility for parallelism. This patch set is a first step to remove
>> rtnl lock dependency from TC rules update path. It updates act API to
>> use atomic operations, rcu and spinlocks for fine-grained locking. It
>> also extend API with functions that are needed to update existing
>> actions for parallel execution.
>
> Can you give a summary here for what and how it is achieved?
Got it, will expand cover letter in V2 with summary.
>
> You said this is the first step, what do you want to achieve in this
> very first step? And how do you achieve it? Do you break the RTNL
But aren't this questions answered in paragraph you quoted?
What: Change act API to not rely on one-big-global-RTNL-lock and to use
more fine-grained synchronization methods to allow safe concurrent
execution.
How: Refactor act API code to use atomics, rcu and spinlocks, etc. for
protecting shared data structures, add new functions required to update
specific actions implementation for parallel execution. (step 2)
If you feel that this cover letter is too terse, I will add outline of
changes in V2.
> lock down to, for a quick example, a per-device lock? Or perhaps you
> completely remove it because of what reason?
I want to remove RTNL _dependency_ from act API data structures and
code. I probably should me more specific in this case:
Florian recently made a change that allows registering netlink protocol
handlers with flag RTNL_FLAG_DOIT_UNLOCKED. Handlers registered with
this flag are called without RTNL taken. My end goal is to have rule
update handlers(RTM_NEWTFILTER, RTM_DELTFILTER, etc.) to be registered
with UNLOCKED flag to allow parallel execution.
I do not intend to globally remove or break RTNL.
>
> I go through all the descriptions of your 14 patches (but not any code),
> I still have no clue how you successfully avoid RTNL. Please don't
> let me read into your code to understand that, there must be some
> high-level justification on how it works. Without it, I don't event want
> to read into the code.
On internal code review I've been asked not to duplicate info from
commit messages in cover letter, but I guess I can expand it with some
high level outline in V2.
>
> Thanks.
Thank you for your feedback!
On Fri, May 25, 2018 at 1:39 PM, Vlad Buslov <[email protected]> wrote:
>
> On Thu 24 May 2018 at 23:34, Cong Wang <[email protected]> wrote:
>> On Mon, May 14, 2018 at 7:27 AM, Vlad Buslov <[email protected]> wrote:
>>> Currently, all netlink protocol handlers for updating rules, actions and
>>> qdiscs are protected with single global rtnl lock which removes any
>>> possibility for parallelism. This patch set is a first step to remove
>>> rtnl lock dependency from TC rules update path. It updates act API to
>>> use atomic operations, rcu and spinlocks for fine-grained locking. It
>>> also extend API with functions that are needed to update existing
>>> actions for parallel execution.
>>
>> Can you give a summary here for what and how it is achieved?
>
> Got it, will expand cover letter in V2 with summary.
>>
>> You said this is the first step, what do you want to achieve in this
>> very first step? And how do you achieve it? Do you break the RTNL
>
> But aren't this questions answered in paragraph you quoted?
Obviously not, you said to remove it, but never explains why it can
be removed and how it is removed. This is crucial for review.
"use atomic operations, rcu and spinlocks for fine-grained locking"
is literately nothing, why atomic/rcu makes RTNL unnecessary?
How RCU is used? What spinlocks are you talking about? What
do these spinlocks protect after removing RTNL? Why are they
safe with other netdevice and netns operations?
You explain _nothing_ here. Really. Please don't force people to
read 14 patches to understand how it works. In fact, no one wants
to read the code unless there is some high-level explanation that
makes basic sense.
> What: Change act API to not rely on one-big-global-RTNL-lock and to use
> more fine-grained synchronization methods to allow safe concurrent
> execution.
Sure, how fine-grained it is after your patchset? Why this fine-grained
lock could safely replace RTNL?
Could you stop letting us guess your puzzle words? It would save your
time from exchanging emails with me, it would save my time from
guessing you too. It is a win-win.
> How: Refactor act API code to use atomics, rcu and spinlocks, etc. for
> protecting shared data structures, add new functions required to update
What shared data structures? The per-netns idr which is already protected
by a spinlock? The TC hierarchy? The shared standalone actions? Hey,
why do I have to guess? :-/
> specific actions implementation for parallel execution. (step 2)
Claim is easy, prove is hard. I can easily claim I break RTNL down
to a per-netns lock, but I can't prove it really works. :-D
>
> If you feel that this cover letter is too terse, I will add outline of
> changes in V2.
It is not my rule, it is how you have to help people to review your
14 patches. I think it is a fair game: you help people like me to
review your patches, we help you to get them reviewed and merged
if they all make sense.
>
>> lock down to, for a quick example, a per-device lock? Or perhaps you
>> completely remove it because of what reason?
>
> I want to remove RTNL _dependency_ from act API data structures and
> code. I probably should me more specific in this case:
>
> Florian recently made a change that allows registering netlink protocol
> handlers with flag RTNL_FLAG_DOIT_UNLOCKED. Handlers registered with
> this flag are called without RTNL taken. My end goal is to have rule
> update handlers(RTM_NEWTFILTER, RTM_DELTFILTER, etc.) to be registered
> with UNLOCKED flag to allow parallel execution.
Please add this paragraph in your cover letter, it is very important for review.
>
> I do not intend to globally remove or break RTNL.
>
>>
>> I go through all the descriptions of your 14 patches (but not any code),
>> I still have no clue how you successfully avoid RTNL. Please don't
>> let me read into your code to understand that, there must be some
>> high-level justification on how it works. Without it, I don't event want
>> to read into the code.
>
> On internal code review I've been asked not to duplicate info from
> commit messages in cover letter, but I guess I can expand it with some
> high level outline in V2.
In cover letter, you should put a high-level overview of "why" and "how".
If, in the worst case, on high-level it doesn't make sense, why should
we bother to read the code? In short, you have to convince people to
read your code here.
In each patch description, you should explain what a single patch does.
I don't see any duplication.