Add support for psample sampling via two new attributes to the
OVS_ACTION_ATTR_SAMPLE action.
OVS_SAMPLE_ATTR_PSAMPLE_GROUP used to pass an integer psample group_id.
OVS_SAMPLE_ATTR_PSAMPLE_COOKIE used to pass a variable-length binary
cookie that will be forwared to psample.
The maximum length of the user-defined cookie is set to 16, same as
tc_cookie, to discourage using cookies that will not be offloadable.
In order to simplify the internal processing of the action and given the
maximum size of the cookie is relatively small, add both fields to the
internal-only struct sample_arg.
The presence of a group_id mandates that the action shall called the
psample module to multicast the packet with such group_id and the
user-provided cookie if present. This behavior is orthonogal to
also executing the nested actions if present.
Signed-off-by: Adrian Moreno <[email protected]>
---
Documentation/netlink/specs/ovs_flow.yaml | 6 ++
include/uapi/linux/openvswitch.h | 49 ++++++++++----
net/openvswitch/actions.c | 51 +++++++++++++--
net/openvswitch/flow_netlink.c | 80 ++++++++++++++++++-----
4 files changed, 153 insertions(+), 33 deletions(-)
diff --git a/Documentation/netlink/specs/ovs_flow.yaml b/Documentation/netlink/specs/ovs_flow.yaml
index 4fdfc6b5cae9..5543c2937225 100644
--- a/Documentation/netlink/specs/ovs_flow.yaml
+++ b/Documentation/netlink/specs/ovs_flow.yaml
@@ -825,6 +825,12 @@ attribute-sets:
name: actions
type: nest
nested-attributes: action-attrs
+ -
+ name: psample_group
+ type: u32
+ -
+ name: psample_cookie
+ type: binary
-
name: userspace-attrs
enum-name: ovs-userspace-attr
diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index efc82c318fa2..e9cd6f3a952d 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -639,6 +639,7 @@ enum ovs_flow_attr {
#define OVS_UFID_F_OMIT_MASK (1 << 1)
#define OVS_UFID_F_OMIT_ACTIONS (1 << 2)
+#define OVS_PSAMPLE_COOKIE_MAX_SIZE 16
/**
* enum ovs_sample_attr - Attributes for %OVS_ACTION_ATTR_SAMPLE action.
* @OVS_SAMPLE_ATTR_PROBABILITY: 32-bit fraction of packets to sample with
@@ -646,15 +647,27 @@ enum ovs_flow_attr {
* %UINT32_MAX samples all packets and intermediate values sample intermediate
* fractions of packets.
* @OVS_SAMPLE_ATTR_ACTIONS: Set of actions to execute in sampling event.
- * Actions are passed as nested attributes.
+ * Actions are passed as nested attributes. Optional if
+ * OVS_SAMPLE_ATTR_PSAMPLE_GROUP is set.
+ * @OVS_SAMPLE_ATTR_PSAMPLE_GROUP: A 32-bit number to be used as psample group.
+ * Optional if OVS_SAMPLE_ATTR_ACTIONS is set.
+ * @OVS_SAMPLE_ATTR_PSAMPLE_COOKIE: A variable-length binary cookie that, if
+ * provided, will be copied to the psample cookie.
*
- * Executes the specified actions with the given probability on a per-packet
- * basis.
+ * Either OVS_SAMPLE_ATTR_PSAMPLE_GROUP or OVS_SAMPLE_ATTR_ACTIONS must be
+ * specified.
+ *
+ * Executes the specified actions and/or sends the packet to psample
+ * with the given probability on a per-packet basis.
*/
enum ovs_sample_attr {
OVS_SAMPLE_ATTR_UNSPEC,
- OVS_SAMPLE_ATTR_PROBABILITY, /* u32 number */
- OVS_SAMPLE_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */
+ OVS_SAMPLE_ATTR_PROBABILITY, /* u32 number */
+ OVS_SAMPLE_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_
+ * attributes.
+ */
+ OVS_SAMPLE_ATTR_PSAMPLE_GROUP, /* u32 number */
+ OVS_SAMPLE_ATTR_PSAMPLE_COOKIE, /* binary */
__OVS_SAMPLE_ATTR_MAX,
#ifdef __KERNEL__
@@ -665,13 +678,27 @@ enum ovs_sample_attr {
#define OVS_SAMPLE_ATTR_MAX (__OVS_SAMPLE_ATTR_MAX - 1)
#ifdef __KERNEL__
+
+/* Definition for flags in struct sample_arg. */
+enum {
+ /* When set, actions in sample will not change the flows. */
+ OVS_SAMPLE_ARG_FLAG_EXEC = 1 << 0,
+ /* When set, the packet will be sent to psample. */
+ OVS_SAMPLE_ARG_FLAG_PSAMPLE = 1 << 1,
+};
+
struct sample_arg {
- bool exec; /* When true, actions in sample will not
- * change flow keys. False otherwise.
- */
- u32 probability; /* Same value as
- * 'OVS_SAMPLE_ATTR_PROBABILITY'.
- */
+ u16 flags; /* Flags that modify the behavior of the
+ * action. See SAMPLE_ARG_FLAG_*.
+ */
+ u32 probability; /* Same value as
+ * 'OVS_SAMPLE_ATTR_PROBABILITY'.
+ */
+ u32 group_id; /* Same value as
+ * 'OVS_SAMPLE_ATTR_PSAMPLE_GROUP'.
+ */
+ u8 cookie_len; /* Length of psample cookie. */
+ char cookie[OVS_PSAMPLE_COOKIE_MAX_SIZE]; /* psample cookie data. */
};
#endif
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 6fcd7e2ca81f..eb3166986fd2 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -24,6 +24,7 @@
#include <net/checksum.h>
#include <net/dsfield.h>
#include <net/mpls.h>
+#include <net/psample.h>
#include <net/sctp/checksum.h>
#include "datapath.h"
@@ -1025,6 +1026,34 @@ static int dec_ttl_exception_handler(struct datapath *dp, struct sk_buff *skb,
return 0;
}
+static int ovs_psample_packet(struct datapath *dp, struct sw_flow_key *key,
+ const struct sample_arg *arg,
+ struct sk_buff *skb)
+{
+ struct psample_group psample_group = {};
+ struct psample_metadata md = {};
+ struct vport *input_vport;
+ u32 rate;
+
+ psample_group.group_num = arg->group_id;
+ psample_group.net = ovs_dp_get_net(dp);
+
+ input_vport = ovs_vport_rcu(dp, key->phy.in_port);
+ if (!input_vport)
+ input_vport = ovs_vport_rcu(dp, OVSP_LOCAL);
+
+ md.in_ifindex = input_vport->dev->ifindex;
+ md.user_cookie = arg->cookie_len ? &arg->cookie[0] : NULL;
+ md.user_cookie_len = arg->cookie_len;
+ md.trunc_size = skb->len;
+
+ rate = arg->probability ? U32_MAX / arg->probability : 0;
+
+ psample_sample_packet(&psample_group, skb, rate, &md);
+
+ return 0;
+}
+
/* When 'last' is true, sample() should always consume the 'skb'.
* Otherwise, sample() should keep 'skb' intact regardless what
* actions are executed within sample().
@@ -1033,11 +1062,12 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
struct sw_flow_key *key, const struct nlattr *attr,
bool last)
{
- struct nlattr *actions;
+ const struct sample_arg *arg;
struct nlattr *sample_arg;
int rem = nla_len(attr);
- const struct sample_arg *arg;
+ struct nlattr *actions;
bool clone_flow_key;
+ int ret;
/* The first action is always 'OVS_SAMPLE_ATTR_ARG'. */
sample_arg = nla_data(attr);
@@ -1051,9 +1081,20 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
return 0;
}
- clone_flow_key = !arg->exec;
- return clone_execute(dp, skb, key, 0, actions, rem, last,
- clone_flow_key);
+ if (arg->flags & OVS_SAMPLE_ARG_FLAG_PSAMPLE) {
+ ret = ovs_psample_packet(dp, key, arg, skb);
+ if (ret)
+ return ret;
+ }
+
+ if (nla_ok(actions, rem)) {
+ clone_flow_key = !(arg->flags & OVS_SAMPLE_ARG_FLAG_EXEC);
+ ret = clone_execute(dp, skb, key, 0, actions, rem, last,
+ clone_flow_key);
+ } else if (last) {
+ ovs_kfree_skb_reason(skb, OVS_DROP_LAST_ACTION);
+ }
+ return ret;
}
/* When 'last' is true, clone() should always consume the 'skb'.
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index f224d9bcea5e..1a348d3905fc 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -2561,6 +2561,9 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
u32 mpls_label_count, bool log,
u32 depth);
+static int copy_action(const struct nlattr *from,
+ struct sw_flow_actions **sfa, bool log);
+
static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa,
@@ -2569,10 +2572,10 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
u32 depth)
{
const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
- const struct nlattr *probability, *actions;
+ const struct nlattr *probability, *actions, *group, *cookie;
+ struct sample_arg arg = {};
const struct nlattr *a;
int rem, start, err;
- struct sample_arg arg;
memset(attrs, 0, sizeof(attrs));
nla_for_each_nested(a, attr, rem) {
@@ -2589,7 +2592,19 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
return -EINVAL;
actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
- if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
+ if (actions && (!nla_len(actions) || nla_len(actions) < NLA_HDRLEN))
+ return -EINVAL;
+
+ group = attrs[OVS_SAMPLE_ATTR_PSAMPLE_GROUP];
+ if (group && nla_len(group) != sizeof(u32))
+ return -EINVAL;
+
+ cookie = attrs[OVS_SAMPLE_ATTR_PSAMPLE_COOKIE];
+ if (cookie &&
+ (!group || nla_len(cookie) > OVS_PSAMPLE_COOKIE_MAX_SIZE))
+ return -EINVAL;
+
+ if (!group && !actions)
return -EINVAL;
/* validation done, copy sample action. */
@@ -2608,7 +2623,19 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
* If the sample is the last action, it can always be excuted
* rather than deferred.
*/
- arg.exec = last || !actions_may_change_flow(actions);
+ if (actions && (last || !actions_may_change_flow(actions)))
+ arg.flags |= OVS_SAMPLE_ARG_FLAG_EXEC;
+
+ if (group) {
+ arg.flags |= OVS_SAMPLE_ARG_FLAG_PSAMPLE;
+ arg.group_id = nla_get_u32(group);
+ }
+
+ if (cookie) {
+ memcpy(&arg.cookie[0], nla_data(cookie), nla_len(cookie));
+ arg.cookie_len = nla_len(cookie);
+ }
+
arg.probability = nla_get_u32(probability);
err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_ARG, &arg, sizeof(arg),
@@ -2616,12 +2643,13 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
if (err)
return err;
- err = __ovs_nla_copy_actions(net, actions, key, sfa,
- eth_type, vlan_tci, mpls_label_count, log,
- depth + 1);
-
- if (err)
- return err;
+ if (actions) {
+ err = __ovs_nla_copy_actions(net, actions, key, sfa,
+ eth_type, vlan_tci,
+ mpls_label_count, log, depth + 1);
+ if (err)
+ return err;
+ }
add_nested_action_end(*sfa, start);
@@ -3553,20 +3581,38 @@ static int sample_action_to_attr(const struct nlattr *attr,
goto out;
}
- ac_start = nla_nest_start_noflag(skb, OVS_SAMPLE_ATTR_ACTIONS);
- if (!ac_start) {
- err = -EMSGSIZE;
- goto out;
+ if (arg->flags & OVS_SAMPLE_ARG_FLAG_PSAMPLE) {
+ if (nla_put_u32(skb, OVS_SAMPLE_ATTR_PSAMPLE_GROUP,
+ arg->group_id)) {
+ err = -EMSGSIZE;
+ goto out;
+ }
+
+ if (arg->cookie_len &&
+ nla_put(skb, OVS_SAMPLE_ATTR_PSAMPLE_COOKIE,
+ arg->cookie_len, &arg->cookie[0])) {
+ err = -EMSGSIZE;
+ goto out;
+ }
}
- err = ovs_nla_put_actions(actions, rem, skb);
+ if (nla_ok(actions, rem)) {
+ ac_start = nla_nest_start_noflag(skb, OVS_SAMPLE_ATTR_ACTIONS);
+ if (!ac_start) {
+ err = -EMSGSIZE;
+ goto out;
+ }
+ err = ovs_nla_put_actions(actions, rem, skb);
+ }
out:
if (err) {
- nla_nest_cancel(skb, ac_start);
+ if (ac_start)
+ nla_nest_cancel(skb, ac_start);
nla_nest_cancel(skb, start);
} else {
- nla_nest_end(skb, ac_start);
+ if (ac_start)
+ nla_nest_end(skb, ac_start);
nla_nest_end(skb, start);
}
--
2.44.0
Hi Adrian,
kernel test robot noticed the following build warnings:
url: https://github.com/intel-lab-lkp/linux/commits/Adrian-Moreno/net-netlink-export-genl-private-pointer-getters/20240424-215821
base: net-next/main
patch link: https://lore.kernel.org/r/20240424135109.3524355-7-amorenoz%40redhat.com
patch subject: [PATCH net-next 6/8] net:openvswitch: add psample support
config: i386-randconfig-141-20240430 (https://download.01.org/0day-ci/archive/20240430/[email protected]/config)
compiler: gcc-13 (Ubuntu 13.2.0-4ubuntu3) 13.2.0
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Reported-by: Dan Carpenter <[email protected]>
| Closes: https://lore.kernel.org/r/[email protected]/
New smatch warnings:
net/openvswitch/actions.c:1097 sample() error: uninitialized symbol 'ret'.
vim +/ret +1097 net/openvswitch/actions.c
ccb1352e76cff0 Jesse Gross 2011-10-25 1061 static int sample(struct datapath *dp, struct sk_buff *skb,
ccea74457bbdaf Neil McKee 2015-05-26 1062 struct sw_flow_key *key, const struct nlattr *attr,
798c166173ffb5 andy zhou 2017-03-20 1063 bool last)
ccb1352e76cff0 Jesse Gross 2011-10-25 1064 {
ccc0b9e4657efd Adrian Moreno 2024-04-24 1065 const struct sample_arg *arg;
798c166173ffb5 andy zhou 2017-03-20 1066 struct nlattr *sample_arg;
798c166173ffb5 andy zhou 2017-03-20 1067 int rem = nla_len(attr);
ccc0b9e4657efd Adrian Moreno 2024-04-24 1068 struct nlattr *actions;
bef7f7567a104a andy zhou 2017-03-20 1069 bool clone_flow_key;
ccc0b9e4657efd Adrian Moreno 2024-04-24 1070 int ret;
ccb1352e76cff0 Jesse Gross 2011-10-25 1071
798c166173ffb5 andy zhou 2017-03-20 1072 /* The first action is always 'OVS_SAMPLE_ATTR_ARG'. */
798c166173ffb5 andy zhou 2017-03-20 1073 sample_arg = nla_data(attr);
798c166173ffb5 andy zhou 2017-03-20 1074 arg = nla_data(sample_arg);
798c166173ffb5 andy zhou 2017-03-20 1075 actions = nla_next(sample_arg, &rem);
e05176a3283822 Wenyu Zhang 2015-08-05 1076
798c166173ffb5 andy zhou 2017-03-20 1077 if ((arg->probability != U32_MAX) &&
a251c17aa558d8 Jason A. Donenfeld 2022-10-05 1078 (!arg->probability || get_random_u32() > arg->probability)) {
798c166173ffb5 andy zhou 2017-03-20 1079 if (last)
9d802da40b7c82 Adrian Moreno 2023-08-11 1080 ovs_kfree_skb_reason(skb, OVS_DROP_LAST_ACTION);
ccb1352e76cff0 Jesse Gross 2011-10-25 1081 return 0;
ccb1352e76cff0 Jesse Gross 2011-10-25 1082 }
651887b0c22cff Simon Horman 2014-07-21 1083
ccc0b9e4657efd Adrian Moreno 2024-04-24 1084 if (arg->flags & OVS_SAMPLE_ARG_FLAG_PSAMPLE) {
ccc0b9e4657efd Adrian Moreno 2024-04-24 1085 ret = ovs_psample_packet(dp, key, arg, skb);
ccc0b9e4657efd Adrian Moreno 2024-04-24 1086 if (ret)
ccc0b9e4657efd Adrian Moreno 2024-04-24 1087 return ret;
ccc0b9e4657efd Adrian Moreno 2024-04-24 1088 }
ccc0b9e4657efd Adrian Moreno 2024-04-24 1089
ccc0b9e4657efd Adrian Moreno 2024-04-24 1090 if (nla_ok(actions, rem)) {
ccc0b9e4657efd Adrian Moreno 2024-04-24 1091 clone_flow_key = !(arg->flags & OVS_SAMPLE_ARG_FLAG_EXEC);
ccc0b9e4657efd Adrian Moreno 2024-04-24 1092 ret = clone_execute(dp, skb, key, 0, actions, rem, last,
bef7f7567a104a andy zhou 2017-03-20 1093 clone_flow_key);
ccc0b9e4657efd Adrian Moreno 2024-04-24 1094 } else if (last) {
ccc0b9e4657efd Adrian Moreno 2024-04-24 1095 ovs_kfree_skb_reason(skb, OVS_DROP_LAST_ACTION);
ret is uninitialized on this last path.
ccc0b9e4657efd Adrian Moreno 2024-04-24 1096 }
ccc0b9e4657efd Adrian Moreno 2024-04-24 @1097 return ret;
971427f353f3c4 Andy Zhou 2014-09-15 1098 }
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
On 24 Apr 2024, at 15:50, Adrian Moreno wrote:
> Add support for psample sampling via two new attributes to the
> OVS_ACTION_ATTR_SAMPLE action.
>
> OVS_SAMPLE_ATTR_PSAMPLE_GROUP used to pass an integer psample group_id.
> OVS_SAMPLE_ATTR_PSAMPLE_COOKIE used to pass a variable-length binary
> cookie that will be forwared to psample.
>
> The maximum length of the user-defined cookie is set to 16, same as
> tc_cookie, to discourage using cookies that will not be offloadable.
>
> In order to simplify the internal processing of the action and given the
> maximum size of the cookie is relatively small, add both fields to the
> internal-only struct sample_arg.
>
> The presence of a group_id mandates that the action shall called the
> psample module to multicast the packet with such group_id and the
> user-provided cookie if present. This behavior is orthonogal to
> also executing the nested actions if present.
>
> Signed-off-by: Adrian Moreno <[email protected]>
This is not a full review yet. Just some comments, as I’m looking at the user-space patch first and added similar comments.
I’ll do a proper review of this series once I’m done with user-space part.
//Eelco
> ---
> Documentation/netlink/specs/ovs_flow.yaml | 6 ++
> include/uapi/linux/openvswitch.h | 49 ++++++++++----
> net/openvswitch/actions.c | 51 +++++++++++++--
> net/openvswitch/flow_netlink.c | 80 ++++++++++++++++++-----
> 4 files changed, 153 insertions(+), 33 deletions(-)
>
> diff --git a/Documentation/netlink/specs/ovs_flow.yaml b/Documentation/netlink/specs/ovs_flow.yaml
> index 4fdfc6b5cae9..5543c2937225 100644
> --- a/Documentation/netlink/specs/ovs_flow.yaml
> +++ b/Documentation/netlink/specs/ovs_flow.yaml
> @@ -825,6 +825,12 @@ attribute-sets:
> name: actions
> type: nest
> nested-attributes: action-attrs
> + -
> + name: psample_group
> + type: u32
> + -
> + name: psample_cookie
> + type: binary
> -
> name: userspace-attrs
> enum-name: ovs-userspace-attr
> diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
> index efc82c318fa2..e9cd6f3a952d 100644
> --- a/include/uapi/linux/openvswitch.h
> +++ b/include/uapi/linux/openvswitch.h
> @@ -639,6 +639,7 @@ enum ovs_flow_attr {
> #define OVS_UFID_F_OMIT_MASK (1 << 1)
> #define OVS_UFID_F_OMIT_ACTIONS (1 << 2)
>
> +#define OVS_PSAMPLE_COOKIE_MAX_SIZE 16
> /**
> * enum ovs_sample_attr - Attributes for %OVS_ACTION_ATTR_SAMPLE action.
> * @OVS_SAMPLE_ATTR_PROBABILITY: 32-bit fraction of packets to sample with
> @@ -646,15 +647,27 @@ enum ovs_flow_attr {
> * %UINT32_MAX samples all packets and intermediate values sample intermediate
> * fractions of packets.
> * @OVS_SAMPLE_ATTR_ACTIONS: Set of actions to execute in sampling event.
> - * Actions are passed as nested attributes.
> + * Actions are passed as nested attributes. Optional if
> + * OVS_SAMPLE_ATTR_PSAMPLE_GROUP is set.
> + * @OVS_SAMPLE_ATTR_PSAMPLE_GROUP: A 32-bit number to be used as psample group.
> + * Optional if OVS_SAMPLE_ATTR_ACTIONS is set.
> + * @OVS_SAMPLE_ATTR_PSAMPLE_COOKIE: A variable-length binary cookie that, if
> + * provided, will be copied to the psample cookie.
As there is a limit of to the cookie should we mention it here?
> *
> - * Executes the specified actions with the given probability on a per-packet
> - * basis.
> + * Either OVS_SAMPLE_ATTR_PSAMPLE_GROUP or OVS_SAMPLE_ATTR_ACTIONS must be
> + * specified.
> + *
> + * Executes the specified actions and/or sends the packet to psample
> + * with the given probability on a per-packet basis.
> */
> enum ovs_sample_attr {
> OVS_SAMPLE_ATTR_UNSPEC,
> - OVS_SAMPLE_ATTR_PROBABILITY, /* u32 number */
> - OVS_SAMPLE_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */
> + OVS_SAMPLE_ATTR_PROBABILITY, /* u32 number */
> + OVS_SAMPLE_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_
Missing * after OVS_ACTION_ATTR_
> + * attributes.
> + */
> + OVS_SAMPLE_ATTR_PSAMPLE_GROUP, /* u32 number */
> + OVS_SAMPLE_ATTR_PSAMPLE_COOKIE, /* binary */
As these are general sample options, I would not add the PSAMPLE reference. Other data paths could use a different implementation. So I guess OVS_SAMPLE_ATTR_GROUP_ID and OVS_SAMPLE_ATTR_COOKIE would be enough.
> __OVS_SAMPLE_ATTR_MAX,
>
> #ifdef __KERNEL__
> @@ -665,13 +678,27 @@ enum ovs_sample_attr {
> #define OVS_SAMPLE_ATTR_MAX (__OVS_SAMPLE_ATTR_MAX - 1)
>
> #ifdef __KERNEL__
> +
> +/* Definition for flags in struct sample_arg. */
> +enum {
> + /* When set, actions in sample will not change the flows. */
> + OVS_SAMPLE_ARG_FLAG_EXEC = 1 << 0,
> + /* When set, the packet will be sent to psample. */
> + OVS_SAMPLE_ARG_FLAG_PSAMPLE = 1 << 1,
> +};
> +
> struct sample_arg {
> - bool exec; /* When true, actions in sample will not
> - * change flow keys. False otherwise.
> - */
> - u32 probability; /* Same value as
> - * 'OVS_SAMPLE_ATTR_PROBABILITY'.
> - */
Not sure if you can actually do this, you are changing a structure that is part of the UAPI. This change breaks backwards compatibility.
> + u16 flags; /* Flags that modify the behavior of the
> + * action. See SAMPLE_ARG_FLAG_*.
> + */
> + u32 probability; /* Same value as
> + * 'OVS_SAMPLE_ATTR_PROBABILITY'.
> + */
> + u32 group_id; /* Same value as
> + * 'OVS_SAMPLE_ATTR_PSAMPLE_GROUP'.
> + */
> + u8 cookie_len; /* Length of psample cookie. */
> + char cookie[OVS_PSAMPLE_COOKIE_MAX_SIZE]; /* psample cookie data. */
Would it make sense for the cookie also to be u8?
> };
> #endif
>
> diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
> index 6fcd7e2ca81f..eb3166986fd2 100644
> --- a/net/openvswitch/actions.c
> +++ b/net/openvswitch/actions.c
> @@ -24,6 +24,7 @@
> #include <net/checksum.h>
> #include <net/dsfield.h>
> #include <net/mpls.h>
> +#include <net/psample.h>
> #include <net/sctp/checksum.h>
>
> #include "datapath.h"
> @@ -1025,6 +1026,34 @@ static int dec_ttl_exception_handler(struct datapath *dp, struct sk_buff *skb,
> return 0;
> }
>
> +static int ovs_psample_packet(struct datapath *dp, struct sw_flow_key *key,
> + const struct sample_arg *arg,
> + struct sk_buff *skb)
> +{
> + struct psample_group psample_group = {};
> + struct psample_metadata md = {};
> + struct vport *input_vport;
> + u32 rate;
> +
> + psample_group.group_num = arg->group_id;
> + psample_group.net = ovs_dp_get_net(dp);
> +
> + input_vport = ovs_vport_rcu(dp, key->phy.in_port);
> + if (!input_vport)
> + input_vport = ovs_vport_rcu(dp, OVSP_LOCAL);
> +
> + md.in_ifindex = input_vport->dev->ifindex;
> + md.user_cookie = arg->cookie_len ? &arg->cookie[0] : NULL;
> + md.user_cookie_len = arg->cookie_len;
> + md.trunc_size = skb->len;
> +
> + rate = arg->probability ? U32_MAX / arg->probability : 0;
> +
> + psample_sample_packet(&psample_group, skb, rate, &md);
Does this mean now the ovs module, now is dependent on the presence of psample? I think we should only support sampling to psample if the module exists, else we should return an error. There might be distributions not including psample by default.
> +
> + return 0;
> +}
> +
> /* When 'last' is true, sample() should always consume the 'skb'.
> * Otherwise, sample() should keep 'skb' intact regardless what
> * actions are executed within sample().
> @@ -1033,11 +1062,12 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
> struct sw_flow_key *key, const struct nlattr *attr,
> bool last)
> {
> - struct nlattr *actions;
> + const struct sample_arg *arg;
> struct nlattr *sample_arg;
> int rem = nla_len(attr);
> - const struct sample_arg *arg;
> + struct nlattr *actions;
> bool clone_flow_key;
> + int ret;
>
> /* The first action is always 'OVS_SAMPLE_ATTR_ARG'. */
> sample_arg = nla_data(attr);
> @@ -1051,9 +1081,20 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
> return 0;
> }
>
> - clone_flow_key = !arg->exec;
> - return clone_execute(dp, skb, key, 0, actions, rem, last,
> - clone_flow_key);
> + if (arg->flags & OVS_SAMPLE_ARG_FLAG_PSAMPLE) {
> + ret = ovs_psample_packet(dp, key, arg, skb);
> + if (ret)
> + return ret;
> + }
> +
> + if (nla_ok(actions, rem)) {
> + clone_flow_key = !(arg->flags & OVS_SAMPLE_ARG_FLAG_EXEC);
> + ret = clone_execute(dp, skb, key, 0, actions, rem, last,
> + clone_flow_key);
> + } else if (last) {
> + ovs_kfree_skb_reason(skb, OVS_DROP_LAST_ACTION);
> + }
> + return ret;
> }
>
> /* When 'last' is true, clone() should always consume the 'skb'.
> diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
> index f224d9bcea5e..1a348d3905fc 100644
> --- a/net/openvswitch/flow_netlink.c
> +++ b/net/openvswitch/flow_netlink.c
> @@ -2561,6 +2561,9 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
> u32 mpls_label_count, bool log,
> u32 depth);
>
> +static int copy_action(const struct nlattr *from,
> + struct sw_flow_actions **sfa, bool log);
> +
> static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
> const struct sw_flow_key *key,
> struct sw_flow_actions **sfa,
> @@ -2569,10 +2572,10 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
> u32 depth)
> {
> const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
> - const struct nlattr *probability, *actions;
> + const struct nlattr *probability, *actions, *group, *cookie;
> + struct sample_arg arg = {};
> const struct nlattr *a;
> int rem, start, err;
> - struct sample_arg arg;
>
> memset(attrs, 0, sizeof(attrs));
> nla_for_each_nested(a, attr, rem) {
> @@ -2589,7 +2592,19 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
> return -EINVAL;
>
> actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
> - if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
> + if (actions && (!nla_len(actions) || nla_len(actions) < NLA_HDRLEN))
> + return -EINVAL;
> +
> + group = attrs[OVS_SAMPLE_ATTR_PSAMPLE_GROUP];
> + if (group && nla_len(group) != sizeof(u32))
> + return -EINVAL;
> +
> + cookie = attrs[OVS_SAMPLE_ATTR_PSAMPLE_COOKIE];
> + if (cookie &&
> + (!group || nla_len(cookie) > OVS_PSAMPLE_COOKIE_MAX_SIZE))
> + return -EINVAL;
> +
> + if (!group && !actions)
> return -EINVAL;
>
> /* validation done, copy sample action. */
> @@ -2608,7 +2623,19 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
> * If the sample is the last action, it can always be excuted
> * rather than deferred.
> */
> - arg.exec = last || !actions_may_change_flow(actions);
> + if (actions && (last || !actions_may_change_flow(actions)))
> + arg.flags |= OVS_SAMPLE_ARG_FLAG_EXEC;
> +
> + if (group) {
> + arg.flags |= OVS_SAMPLE_ARG_FLAG_PSAMPLE;
> + arg.group_id = nla_get_u32(group);
> + }
> +
> + if (cookie) {
> + memcpy(&arg.cookie[0], nla_data(cookie), nla_len(cookie));
> + arg.cookie_len = nla_len(cookie);
> + }
> +
> arg.probability = nla_get_u32(probability);
>
> err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_ARG, &arg, sizeof(arg),
> @@ -2616,12 +2643,13 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
> if (err)
> return err;
>
> - err = __ovs_nla_copy_actions(net, actions, key, sfa,
> - eth_type, vlan_tci, mpls_label_count, log,
> - depth + 1);
> -
> - if (err)
> - return err;
> + if (actions) {
> + err = __ovs_nla_copy_actions(net, actions, key, sfa,
> + eth_type, vlan_tci,
> + mpls_label_count, log, depth + 1);
> + if (err)
> + return err;
> + }
>
> add_nested_action_end(*sfa, start);
>
> @@ -3553,20 +3581,38 @@ static int sample_action_to_attr(const struct nlattr *attr,
> goto out;
> }
>
> - ac_start = nla_nest_start_noflag(skb, OVS_SAMPLE_ATTR_ACTIONS);
> - if (!ac_start) {
> - err = -EMSGSIZE;
> - goto out;
> + if (arg->flags & OVS_SAMPLE_ARG_FLAG_PSAMPLE) {
> + if (nla_put_u32(skb, OVS_SAMPLE_ATTR_PSAMPLE_GROUP,
> + arg->group_id)) {
> + err = -EMSGSIZE;
> + goto out;
> + }
> +
> + if (arg->cookie_len &&
> + nla_put(skb, OVS_SAMPLE_ATTR_PSAMPLE_COOKIE,
> + arg->cookie_len, &arg->cookie[0])) {
> + err = -EMSGSIZE;
> + goto out;
> + }
> }
>
> - err = ovs_nla_put_actions(actions, rem, skb);
> + if (nla_ok(actions, rem)) {
> + ac_start = nla_nest_start_noflag(skb, OVS_SAMPLE_ATTR_ACTIONS);
> + if (!ac_start) {
> + err = -EMSGSIZE;
> + goto out;
> + }
> + err = ovs_nla_put_actions(actions, rem, skb);
> + }
>
> out:
> if (err) {
> - nla_nest_cancel(skb, ac_start);
> + if (ac_start)
> + nla_nest_cancel(skb, ac_start);
> nla_nest_cancel(skb, start);
> } else {
> - nla_nest_end(skb, ac_start);
> + if (ac_start)
> + nla_nest_end(skb, ac_start);
> nla_nest_end(skb, start);
> }
>
> --
> 2.44.0
On 5/3/24 11:43 AM, Eelco Chaudron wrote:
>
>
> On 24 Apr 2024, at 15:50, Adrian Moreno wrote:
>
>> Add support for psample sampling via two new attributes to the
>> OVS_ACTION_ATTR_SAMPLE action.
>>
>> OVS_SAMPLE_ATTR_PSAMPLE_GROUP used to pass an integer psample group_id.
>> OVS_SAMPLE_ATTR_PSAMPLE_COOKIE used to pass a variable-length binary
>> cookie that will be forwared to psample.
>>
>> The maximum length of the user-defined cookie is set to 16, same as
>> tc_cookie, to discourage using cookies that will not be offloadable.
>>
>> In order to simplify the internal processing of the action and given the
>> maximum size of the cookie is relatively small, add both fields to the
>> internal-only struct sample_arg.
>>
>> The presence of a group_id mandates that the action shall called the
>> psample module to multicast the packet with such group_id and the
>> user-provided cookie if present. This behavior is orthonogal to
>> also executing the nested actions if present.
>>
>> Signed-off-by: Adrian Moreno <[email protected]>
>
> This is not a full review yet. Just some comments, as I’m looking at the user-space patch first and added similar comments.
>
> I’ll do a proper review of this series once I’m done with user-space part.
>
> //Eelco
>
>> ---
>> Documentation/netlink/specs/ovs_flow.yaml | 6 ++
>> include/uapi/linux/openvswitch.h | 49 ++++++++++----
>> net/openvswitch/actions.c | 51 +++++++++++++--
>> net/openvswitch/flow_netlink.c | 80 ++++++++++++++++++-----
>> 4 files changed, 153 insertions(+), 33 deletions(-)
>>
>> diff --git a/Documentation/netlink/specs/ovs_flow.yaml b/Documentation/netlink/specs/ovs_flow.yaml
>> index 4fdfc6b5cae9..5543c2937225 100644
>> --- a/Documentation/netlink/specs/ovs_flow.yaml
>> +++ b/Documentation/netlink/specs/ovs_flow.yaml
>> @@ -825,6 +825,12 @@ attribute-sets:
>> name: actions
>> type: nest
>> nested-attributes: action-attrs
>> + -
>> + name: psample_group
>> + type: u32
>> + -
>> + name: psample_cookie
>> + type: binary
>> -
>> name: userspace-attrs
>> enum-name: ovs-userspace-attr
>> diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
>> index efc82c318fa2..e9cd6f3a952d 100644
>> --- a/include/uapi/linux/openvswitch.h
>> +++ b/include/uapi/linux/openvswitch.h
>> @@ -639,6 +639,7 @@ enum ovs_flow_attr {
>> #define OVS_UFID_F_OMIT_MASK (1 << 1)
>> #define OVS_UFID_F_OMIT_ACTIONS (1 << 2)
>>
>> +#define OVS_PSAMPLE_COOKIE_MAX_SIZE 16
>> /**
>> * enum ovs_sample_attr - Attributes for %OVS_ACTION_ATTR_SAMPLE action.
>> * @OVS_SAMPLE_ATTR_PROBABILITY: 32-bit fraction of packets to sample with
>> @@ -646,15 +647,27 @@ enum ovs_flow_attr {
>> * %UINT32_MAX samples all packets and intermediate values sample intermediate
>> * fractions of packets.
>> * @OVS_SAMPLE_ATTR_ACTIONS: Set of actions to execute in sampling event.
>> - * Actions are passed as nested attributes.
>> + * Actions are passed as nested attributes. Optional if
>> + * OVS_SAMPLE_ATTR_PSAMPLE_GROUP is set.
>> + * @OVS_SAMPLE_ATTR_PSAMPLE_GROUP: A 32-bit number to be used as psample group.
>> + * Optional if OVS_SAMPLE_ATTR_ACTIONS is set.
>> + * @OVS_SAMPLE_ATTR_PSAMPLE_COOKIE: A variable-length binary cookie that, if
>> + * provided, will be copied to the psample cookie.
>
> As there is a limit of to the cookie should we mention it here?
>
I thought OVS_PSAMPLE_COOKIE_MAX_SIZE was expressive enough but sure, we can
also mention it here.
>> *
>> - * Executes the specified actions with the given probability on a per-packet
>> - * basis.
>> + * Either OVS_SAMPLE_ATTR_PSAMPLE_GROUP or OVS_SAMPLE_ATTR_ACTIONS must be
>> + * specified.
>> + *
>> + * Executes the specified actions and/or sends the packet to psample
>> + * with the given probability on a per-packet basis.
>> */
>> enum ovs_sample_attr {
>> OVS_SAMPLE_ATTR_UNSPEC,
>> - OVS_SAMPLE_ATTR_PROBABILITY, /* u32 number */
>> - OVS_SAMPLE_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */
>> + OVS_SAMPLE_ATTR_PROBABILITY, /* u32 number */
>> + OVS_SAMPLE_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_
>
> Missing * after OVS_ACTION_ATTR_
As a matter of fact, adding an * makes checkpatch generate a warning IIRC.
That's why I initially removed it. I can look at fixing checkpatch instead.
>
>> + * attributes.
>> + */
>> + OVS_SAMPLE_ATTR_PSAMPLE_GROUP, /* u32 number */
>> + OVS_SAMPLE_ATTR_PSAMPLE_COOKIE, /* binary */
>
> As these are general sample options, I would not add the PSAMPLE reference. Other data paths could use a different implementation. So I guess OVS_SAMPLE_ATTR_GROUP_ID and OVS_SAMPLE_ATTR_COOKIE would be enough.
>
OK. But isn't the API already psample-ish? I mean that the group_id is something
specific to psample that might not be present in other datapath implementation.
>> __OVS_SAMPLE_ATTR_MAX,
>>
>> #ifdef __KERNEL__
>> @@ -665,13 +678,27 @@ enum ovs_sample_attr {
>> #define OVS_SAMPLE_ATTR_MAX (__OVS_SAMPLE_ATTR_MAX - 1)
>>
>> #ifdef __KERNEL__
>> +
>> +/* Definition for flags in struct sample_arg. */
>> +enum {
>> + /* When set, actions in sample will not change the flows. */
>> + OVS_SAMPLE_ARG_FLAG_EXEC = 1 << 0,
>> + /* When set, the packet will be sent to psample. */
>> + OVS_SAMPLE_ARG_FLAG_PSAMPLE = 1 << 1,
>> +};
>> +
>> struct sample_arg {
>> - bool exec; /* When true, actions in sample will not
>> - * change flow keys. False otherwise.
>> - */
>> - u32 probability; /* Same value as
>> - * 'OVS_SAMPLE_ATTR_PROBABILITY'.
>> - */
>
>
> Not sure if you can actually do this, you are changing a structure that is part of the UAPI. This change breaks backwards compatibility.
>
Hmmm... this the internal argument structure protected by #ifdef __KERNEL__. It
is used in several actions to optimize the internal action handling (i.e: using
a compact struct instead of a list of netlink attributes). I guess the reason
for having it in this file is because the attribute enum is being reused, but I
wouldn't think of this struct as part of the uAPI.
If the (#ifdef __KERNEL__) does not exclude this struct from the uAPI I think we
should move it (all of them actually) to some internal file.
>
>> + u16 flags; /* Flags that modify the behavior of the
>> + * action. See SAMPLE_ARG_FLAG_*.
>> + */
>> + u32 probability; /* Same value as
>> + * 'OVS_SAMPLE_ATTR_PROBABILITY'.
>> + */
>> + u32 group_id; /* Same value as
>> + * 'OVS_SAMPLE_ATTR_PSAMPLE_GROUP'.
>> + */
>> + u8 cookie_len; /* Length of psample cookie. */
>> + char cookie[OVS_PSAMPLE_COOKIE_MAX_SIZE]; /* psample cookie data. */
>
> Would it make sense for the cookie also to be u8?
>
Yes, probably.
>> };
>> #endif
>>
>> diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
>> index 6fcd7e2ca81f..eb3166986fd2 100644
>> --- a/net/openvswitch/actions.c
>> +++ b/net/openvswitch/actions.c
>> @@ -24,6 +24,7 @@
>> #include <net/checksum.h>
>> #include <net/dsfield.h>
>> #include <net/mpls.h>
>> +#include <net/psample.h>
>> #include <net/sctp/checksum.h>
>>
>> #include "datapath.h"
>> @@ -1025,6 +1026,34 @@ static int dec_ttl_exception_handler(struct datapath *dp, struct sk_buff *skb,
>> return 0;
>> }
>>
>> +static int ovs_psample_packet(struct datapath *dp, struct sw_flow_key *key,
>> + const struct sample_arg *arg,
>> + struct sk_buff *skb)
>> +{
>> + struct psample_group psample_group = {};
>> + struct psample_metadata md = {};
>> + struct vport *input_vport;
>> + u32 rate;
>> +
>> + psample_group.group_num = arg->group_id;
>> + psample_group.net = ovs_dp_get_net(dp);
>> +
>> + input_vport = ovs_vport_rcu(dp, key->phy.in_port);
>> + if (!input_vport)
>> + input_vport = ovs_vport_rcu(dp, OVSP_LOCAL);
>> +
>> + md.in_ifindex = input_vport->dev->ifindex;
>> + md.user_cookie = arg->cookie_len ? &arg->cookie[0] : NULL;
>> + md.user_cookie_len = arg->cookie_len;
>> + md.trunc_size = skb->len;
>> +
>> + rate = arg->probability ? U32_MAX / arg->probability : 0;
>> +
>> + psample_sample_packet(&psample_group, skb, rate, &md);
>
> Does this mean now the ovs module, now is dependent on the presence of psample? I think we should only support sampling to psample if the module exists, else we should return an error. There might be distributions not including psample by default.
Agree. I'll add some compile-time checks the same way we do with nf_nat.
>
>> +
>> + return 0;
>> +}
>> +
>> /* When 'last' is true, sample() should always consume the 'skb'.
>> * Otherwise, sample() should keep 'skb' intact regardless what
>> * actions are executed within sample().
>> @@ -1033,11 +1062,12 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
>> struct sw_flow_key *key, const struct nlattr *attr,
>> bool last)
>> {
>> - struct nlattr *actions;
>> + const struct sample_arg *arg;
>> struct nlattr *sample_arg;
>> int rem = nla_len(attr);
>> - const struct sample_arg *arg;
>> + struct nlattr *actions;
>> bool clone_flow_key;
>> + int ret;
>>
>> /* The first action is always 'OVS_SAMPLE_ATTR_ARG'. */
>> sample_arg = nla_data(attr);
>> @@ -1051,9 +1081,20 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
>> return 0;
>> }
>>
>> - clone_flow_key = !arg->exec;
>> - return clone_execute(dp, skb, key, 0, actions, rem, last,
>> - clone_flow_key);
>> + if (arg->flags & OVS_SAMPLE_ARG_FLAG_PSAMPLE) {
>> + ret = ovs_psample_packet(dp, key, arg, skb);
>> + if (ret)
>> + return ret;
>> + }
>> +
>> + if (nla_ok(actions, rem)) {
>> + clone_flow_key = !(arg->flags & OVS_SAMPLE_ARG_FLAG_EXEC);
>> + ret = clone_execute(dp, skb, key, 0, actions, rem, last,
>> + clone_flow_key);
>> + } else if (last) {
>> + ovs_kfree_skb_reason(skb, OVS_DROP_LAST_ACTION);
>> + }
>> + return ret;
>> }
>>
>> /* When 'last' is true, clone() should always consume the 'skb'.
>> diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
>> index f224d9bcea5e..1a348d3905fc 100644
>> --- a/net/openvswitch/flow_netlink.c
>> +++ b/net/openvswitch/flow_netlink.c
>> @@ -2561,6 +2561,9 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
>> u32 mpls_label_count, bool log,
>> u32 depth);
>>
>> +static int copy_action(const struct nlattr *from,
>> + struct sw_flow_actions **sfa, bool log);
>> +
>> static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>> const struct sw_flow_key *key,
>> struct sw_flow_actions **sfa,
>> @@ -2569,10 +2572,10 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>> u32 depth)
>> {
>> const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
>> - const struct nlattr *probability, *actions;
>> + const struct nlattr *probability, *actions, *group, *cookie;
>> + struct sample_arg arg = {};
>> const struct nlattr *a;
>> int rem, start, err;
>> - struct sample_arg arg;
>>
>> memset(attrs, 0, sizeof(attrs));
>> nla_for_each_nested(a, attr, rem) {
>> @@ -2589,7 +2592,19 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>> return -EINVAL;
>>
>> actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
>> - if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
>> + if (actions && (!nla_len(actions) || nla_len(actions) < NLA_HDRLEN))
>> + return -EINVAL;
>> +
>> + group = attrs[OVS_SAMPLE_ATTR_PSAMPLE_GROUP];
>> + if (group && nla_len(group) != sizeof(u32))
>> + return -EINVAL;
>> +
>> + cookie = attrs[OVS_SAMPLE_ATTR_PSAMPLE_COOKIE];
>> + if (cookie &&
>> + (!group || nla_len(cookie) > OVS_PSAMPLE_COOKIE_MAX_SIZE))
>> + return -EINVAL;
>> +
>> + if (!group && !actions)
>> return -EINVAL;
>>
>> /* validation done, copy sample action. */
>> @@ -2608,7 +2623,19 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>> * If the sample is the last action, it can always be excuted
>> * rather than deferred.
>> */
>> - arg.exec = last || !actions_may_change_flow(actions);
>> + if (actions && (last || !actions_may_change_flow(actions)))
>> + arg.flags |= OVS_SAMPLE_ARG_FLAG_EXEC;
>> +
>> + if (group) {
>> + arg.flags |= OVS_SAMPLE_ARG_FLAG_PSAMPLE;
>> + arg.group_id = nla_get_u32(group);
>> + }
>> +
>> + if (cookie) {
>> + memcpy(&arg.cookie[0], nla_data(cookie), nla_len(cookie));
>> + arg.cookie_len = nla_len(cookie);
>> + }
>> +
>> arg.probability = nla_get_u32(probability);
>>
>> err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_ARG, &arg, sizeof(arg),
>> @@ -2616,12 +2643,13 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>> if (err)
>> return err;
>>
>> - err = __ovs_nla_copy_actions(net, actions, key, sfa,
>> - eth_type, vlan_tci, mpls_label_count, log,
>> - depth + 1);
>> -
>> - if (err)
>> - return err;
>> + if (actions) {
>> + err = __ovs_nla_copy_actions(net, actions, key, sfa,
>> + eth_type, vlan_tci,
>> + mpls_label_count, log, depth + 1);
>> + if (err)
>> + return err;
>> + }
>>
>> add_nested_action_end(*sfa, start);
>>
>> @@ -3553,20 +3581,38 @@ static int sample_action_to_attr(const struct nlattr *attr,
>> goto out;
>> }
>>
>> - ac_start = nla_nest_start_noflag(skb, OVS_SAMPLE_ATTR_ACTIONS);
>> - if (!ac_start) {
>> - err = -EMSGSIZE;
>> - goto out;
>> + if (arg->flags & OVS_SAMPLE_ARG_FLAG_PSAMPLE) {
>> + if (nla_put_u32(skb, OVS_SAMPLE_ATTR_PSAMPLE_GROUP,
>> + arg->group_id)) {
>> + err = -EMSGSIZE;
>> + goto out;
>> + }
>> +
>> + if (arg->cookie_len &&
>> + nla_put(skb, OVS_SAMPLE_ATTR_PSAMPLE_COOKIE,
>> + arg->cookie_len, &arg->cookie[0])) {
>> + err = -EMSGSIZE;
>> + goto out;
>> + }
>> }
>>
>> - err = ovs_nla_put_actions(actions, rem, skb);
>> + if (nla_ok(actions, rem)) {
>> + ac_start = nla_nest_start_noflag(skb, OVS_SAMPLE_ATTR_ACTIONS);
>> + if (!ac_start) {
>> + err = -EMSGSIZE;
>> + goto out;
>> + }
>> + err = ovs_nla_put_actions(actions, rem, skb);
>> + }
>>
>> out:
>> if (err) {
>> - nla_nest_cancel(skb, ac_start);
>> + if (ac_start)
>> + nla_nest_cancel(skb, ac_start);
>> nla_nest_cancel(skb, start);
>> } else {
>> - nla_nest_end(skb, ac_start);
>> + if (ac_start)
>> + nla_nest_end(skb, ac_start);
>> nla_nest_end(skb, start);
>> }
>>
>> --
>> 2.44.0
>
On 7 May 2024, at 16:18, Adrian Moreno wrote:
> On 5/3/24 11:43 AM, Eelco Chaudron wrote:
>>
>>
>> On 24 Apr 2024, at 15:50, Adrian Moreno wrote:
>>
>>> Add support for psample sampling via two new attributes to the
>>> OVS_ACTION_ATTR_SAMPLE action.
>>>
>>> OVS_SAMPLE_ATTR_PSAMPLE_GROUP used to pass an integer psample group_id.
>>> OVS_SAMPLE_ATTR_PSAMPLE_COOKIE used to pass a variable-length binary
>>> cookie that will be forwared to psample.
>>>
>>> The maximum length of the user-defined cookie is set to 16, same as
>>> tc_cookie, to discourage using cookies that will not be offloadable.
>>>
>>> In order to simplify the internal processing of the action and given the
>>> maximum size of the cookie is relatively small, add both fields to the
>>> internal-only struct sample_arg.
>>>
>>> The presence of a group_id mandates that the action shall called the
>>> psample module to multicast the packet with such group_id and the
>>> user-provided cookie if present. This behavior is orthonogal to
>>> also executing the nested actions if present.
>>>
>>> Signed-off-by: Adrian Moreno <[email protected]>
>>
>> This is not a full review yet. Just some comments, as I’m looking at the user-space patch first and added similar comments.
>>
>> I’ll do a proper review of this series once I’m done with user-space part.
>>
>> //Eelco
>>
>>> ---
>>> Documentation/netlink/specs/ovs_flow.yaml | 6 ++
>>> include/uapi/linux/openvswitch.h | 49 ++++++++++----
>>> net/openvswitch/actions.c | 51 +++++++++++++--
>>> net/openvswitch/flow_netlink.c | 80 ++++++++++++++++++-----
>>> 4 files changed, 153 insertions(+), 33 deletions(-)
>>>
>>> diff --git a/Documentation/netlink/specs/ovs_flow.yaml b/Documentation/netlink/specs/ovs_flow.yaml
>>> index 4fdfc6b5cae9..5543c2937225 100644
>>> --- a/Documentation/netlink/specs/ovs_flow.yaml
>>> +++ b/Documentation/netlink/specs/ovs_flow.yaml
>>> @@ -825,6 +825,12 @@ attribute-sets:
>>> name: actions
>>> type: nest
>>> nested-attributes: action-attrs
>>> + -
>>> + name: psample_group
>>> + type: u32
>>> + -
>>> + name: psample_cookie
>>> + type: binary
>>> -
>>> name: userspace-attrs
>>> enum-name: ovs-userspace-attr
>>> diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
>>> index efc82c318fa2..e9cd6f3a952d 100644
>>> --- a/include/uapi/linux/openvswitch.h
>>> +++ b/include/uapi/linux/openvswitch.h
>>> @@ -639,6 +639,7 @@ enum ovs_flow_attr {
>>> #define OVS_UFID_F_OMIT_MASK (1 << 1)
>>> #define OVS_UFID_F_OMIT_ACTIONS (1 << 2)
>>>
>>> +#define OVS_PSAMPLE_COOKIE_MAX_SIZE 16
>>> /**
>>> * enum ovs_sample_attr - Attributes for %OVS_ACTION_ATTR_SAMPLE action.
>>> * @OVS_SAMPLE_ATTR_PROBABILITY: 32-bit fraction of packets to sample with
>>> @@ -646,15 +647,27 @@ enum ovs_flow_attr {
>>> * %UINT32_MAX samples all packets and intermediate values sample intermediate
>>> * fractions of packets.
>>> * @OVS_SAMPLE_ATTR_ACTIONS: Set of actions to execute in sampling event.
>>> - * Actions are passed as nested attributes.
>>> + * Actions are passed as nested attributes. Optional if
>>> + * OVS_SAMPLE_ATTR_PSAMPLE_GROUP is set.
>>> + * @OVS_SAMPLE_ATTR_PSAMPLE_GROUP: A 32-bit number to be used as psample group.
>>> + * Optional if OVS_SAMPLE_ATTR_ACTIONS is set.
>>> + * @OVS_SAMPLE_ATTR_PSAMPLE_COOKIE: A variable-length binary cookie that, if
>>> + * provided, will be copied to the psample cookie.
>>
>> As there is a limit of to the cookie should we mention it here?
>>
>
> I thought OVS_PSAMPLE_COOKIE_MAX_SIZE was expressive enough but sure, we can also mention it here.
>
>>> *
>>> - * Executes the specified actions with the given probability on a per-packet
>>> - * basis.
>>> + * Either OVS_SAMPLE_ATTR_PSAMPLE_GROUP or OVS_SAMPLE_ATTR_ACTIONS must be
>>> + * specified.
>>> + *
>>> + * Executes the specified actions and/or sends the packet to psample
>>> + * with the given probability on a per-packet basis.
>>> */
>>> enum ovs_sample_attr {
>>> OVS_SAMPLE_ATTR_UNSPEC,
>>> - OVS_SAMPLE_ATTR_PROBABILITY, /* u32 number */
>>> - OVS_SAMPLE_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */
>>> + OVS_SAMPLE_ATTR_PROBABILITY, /* u32 number */
>>> + OVS_SAMPLE_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_
>>
>> Missing * after OVS_ACTION_ATTR_
>
> As a matter of fact, adding an * makes checkpatch generate a warning IIRC. That's why I initially removed it. I can look at fixing checkpatch instead.
>
>>
>>> + * attributes.
>>> + */
>>> + OVS_SAMPLE_ATTR_PSAMPLE_GROUP, /* u32 number */
>>> + OVS_SAMPLE_ATTR_PSAMPLE_COOKIE, /* binary */
>>
>> As these are general sample options, I would not add the PSAMPLE reference. Other data paths could use a different implementation. So I guess OVS_SAMPLE_ATTR_GROUP_ID and OVS_SAMPLE_ATTR_COOKIE would be enough.
>>
>
> OK. But isn't the API already psample-ish? I mean that the group_id is something specific to psample that might not be present in other datapath implementation.
Well, I see it as a general GROUP and COOKIE attribute that should be available as part of the SAMPLE being taken/provided by the Datapath implementation.
Not sure what we should call PSAMPLE, but some suggestions are;
OVS_SAMPLE_ATTR_OFFLOAD/DESTINATION/ENDPOINT/COLLECTOR/TARGET_COOKIE
>>> __OVS_SAMPLE_ATTR_MAX,
>>>
>>> #ifdef __KERNEL__
>>> @@ -665,13 +678,27 @@ enum ovs_sample_attr {
>>> #define OVS_SAMPLE_ATTR_MAX (__OVS_SAMPLE_ATTR_MAX - 1)
>>>
>>> #ifdef __KERNEL__
>>> +
>>> +/* Definition for flags in struct sample_arg. */
>>> +enum {
>>> + /* When set, actions in sample will not change the flows. */
>>> + OVS_SAMPLE_ARG_FLAG_EXEC = 1 << 0,
>>> + /* When set, the packet will be sent to psample. */
>>> + OVS_SAMPLE_ARG_FLAG_PSAMPLE = 1 << 1,
>>> +};
>>> +
>>> struct sample_arg {
>>> - bool exec; /* When true, actions in sample will not
>>> - * change flow keys. False otherwise.
>>> - */
>>> - u32 probability; /* Same value as
>>> - * 'OVS_SAMPLE_ATTR_PROBABILITY'.
>>> - */
>>
>>
>> Not sure if you can actually do this, you are changing a structure that is part of the UAPI. This change breaks backwards compatibility.
>>
>
> Hmmm... this the internal argument structure protected by #ifdef __KERNEL__. It is used in several actions to optimize the internal action handling (i.e: using a compact struct instead of a list of netlink attributes). I guess the reason for having it in this file is because the attribute enum is being reused, but I wouldn't think of this struct as part of the uAPI.
>
> If the (#ifdef __KERNEL__) does not exclude this struct from the uAPI I think we should move it (all of them actually) to some internal file.
You are right, I missed the ‘#ifdef __KERNEL__’ kernel.
>>
>>> + u16 flags; /* Flags that modify the behavior of the
>>> + * action. See SAMPLE_ARG_FLAG_*.
>>> + */
>>> + u32 probability; /* Same value as
>>> + * 'OVS_SAMPLE_ATTR_PROBABILITY'.
>>> + */
>>> + u32 group_id; /* Same value as
>>> + * 'OVS_SAMPLE_ATTR_PSAMPLE_GROUP'.
>>> + */
>>> + u8 cookie_len; /* Length of psample cookie. */
>>> + char cookie[OVS_PSAMPLE_COOKIE_MAX_SIZE]; /* psample cookie data. */
>>
>> Would it make sense for the cookie also to be u8?
>>
>
> Yes, probably.
>
>>> };
>>> #endif
>>>
>>> diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
>>> index 6fcd7e2ca81f..eb3166986fd2 100644
>>> --- a/net/openvswitch/actions.c
>>> +++ b/net/openvswitch/actions.c
>>> @@ -24,6 +24,7 @@
>>> #include <net/checksum.h>
>>> #include <net/dsfield.h>
>>> #include <net/mpls.h>
>>> +#include <net/psample.h>
>>> #include <net/sctp/checksum.h>
>>>
>>> #include "datapath.h"
>>> @@ -1025,6 +1026,34 @@ static int dec_ttl_exception_handler(struct datapath *dp, struct sk_buff *skb,
>>> return 0;
>>> }
>>>
>>> +static int ovs_psample_packet(struct datapath *dp, struct sw_flow_key *key,
>>> + const struct sample_arg *arg,
>>> + struct sk_buff *skb)
>>> +{
>>> + struct psample_group psample_group = {};
>>> + struct psample_metadata md = {};
>>> + struct vport *input_vport;
>>> + u32 rate;
>>> +
>>> + psample_group.group_num = arg->group_id;
>>> + psample_group.net = ovs_dp_get_net(dp);
>>> +
>>> + input_vport = ovs_vport_rcu(dp, key->phy.in_port);
>>> + if (!input_vport)
>>> + input_vport = ovs_vport_rcu(dp, OVSP_LOCAL);
>>> +
>>> + md.in_ifindex = input_vport->dev->ifindex;
>>> + md.user_cookie = arg->cookie_len ? &arg->cookie[0] : NULL;
>>> + md.user_cookie_len = arg->cookie_len;
>>> + md.trunc_size = skb->len;
>>> +
>>> + rate = arg->probability ? U32_MAX / arg->probability : 0;
>>> +
>>> + psample_sample_packet(&psample_group, skb, rate, &md);
>>
>> Does this mean now the ovs module, now is dependent on the presence of psample? I think we should only support sampling to psample if the module exists, else we should return an error. There might be distributions not including psample by default.
>
> Agree. I'll add some compile-time checks the same way we do with nf_nat.
>
>>
>>> +
>>> + return 0;
>>> +}
>>> +
>>> /* When 'last' is true, sample() should always consume the 'skb'.
>>> * Otherwise, sample() should keep 'skb' intact regardless what
>>> * actions are executed within sample().
>>> @@ -1033,11 +1062,12 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
>>> struct sw_flow_key *key, const struct nlattr *attr,
>>> bool last)
>>> {
>>> - struct nlattr *actions;
>>> + const struct sample_arg *arg;
>>> struct nlattr *sample_arg;
>>> int rem = nla_len(attr);
>>> - const struct sample_arg *arg;
>>> + struct nlattr *actions;
>>> bool clone_flow_key;
>>> + int ret;
>>>
>>> /* The first action is always 'OVS_SAMPLE_ATTR_ARG'. */
>>> sample_arg = nla_data(attr);
>>> @@ -1051,9 +1081,20 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
>>> return 0;
>>> }
>>>
>>> - clone_flow_key = !arg->exec;
>>> - return clone_execute(dp, skb, key, 0, actions, rem, last,
>>> - clone_flow_key);
>>> + if (arg->flags & OVS_SAMPLE_ARG_FLAG_PSAMPLE) {
>>> + ret = ovs_psample_packet(dp, key, arg, skb);
>>> + if (ret)
>>> + return ret;
>>> + }
>>> +
>>> + if (nla_ok(actions, rem)) {
>>> + clone_flow_key = !(arg->flags & OVS_SAMPLE_ARG_FLAG_EXEC);
>>> + ret = clone_execute(dp, skb, key, 0, actions, rem, last,
>>> + clone_flow_key);
>>> + } else if (last) {
>>> + ovs_kfree_skb_reason(skb, OVS_DROP_LAST_ACTION);
>>> + }
>>> + return ret;
>>> }
>>>
>>> /* When 'last' is true, clone() should always consume the 'skb'.
>>> diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
>>> index f224d9bcea5e..1a348d3905fc 100644
>>> --- a/net/openvswitch/flow_netlink.c
>>> +++ b/net/openvswitch/flow_netlink.c
>>> @@ -2561,6 +2561,9 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
>>> u32 mpls_label_count, bool log,
>>> u32 depth);
>>>
>>> +static int copy_action(const struct nlattr *from,
>>> + struct sw_flow_actions **sfa, bool log);
>>> +
>>> static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>>> const struct sw_flow_key *key,
>>> struct sw_flow_actions **sfa,
>>> @@ -2569,10 +2572,10 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>>> u32 depth)
>>> {
>>> const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
>>> - const struct nlattr *probability, *actions;
>>> + const struct nlattr *probability, *actions, *group, *cookie;
>>> + struct sample_arg arg = {};
>>> const struct nlattr *a;
>>> int rem, start, err;
>>> - struct sample_arg arg;
>>>
>>> memset(attrs, 0, sizeof(attrs));
>>> nla_for_each_nested(a, attr, rem) {
>>> @@ -2589,7 +2592,19 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>>> return -EINVAL;
>>>
>>> actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
>>> - if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
>>> + if (actions && (!nla_len(actions) || nla_len(actions) < NLA_HDRLEN))
>>> + return -EINVAL;
>>> +
>>> + group = attrs[OVS_SAMPLE_ATTR_PSAMPLE_GROUP];
>>> + if (group && nla_len(group) != sizeof(u32))
>>> + return -EINVAL;
>>> +
>>> + cookie = attrs[OVS_SAMPLE_ATTR_PSAMPLE_COOKIE];
>>> + if (cookie &&
>>> + (!group || nla_len(cookie) > OVS_PSAMPLE_COOKIE_MAX_SIZE))
>>> + return -EINVAL;
>>> +
>>> + if (!group && !actions)
>>> return -EINVAL;
>>>
>>> /* validation done, copy sample action. */
>>> @@ -2608,7 +2623,19 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>>> * If the sample is the last action, it can always be excuted
>>> * rather than deferred.
>>> */
>>> - arg.exec = last || !actions_may_change_flow(actions);
>>> + if (actions && (last || !actions_may_change_flow(actions)))
>>> + arg.flags |= OVS_SAMPLE_ARG_FLAG_EXEC;
>>> +
>>> + if (group) {
>>> + arg.flags |= OVS_SAMPLE_ARG_FLAG_PSAMPLE;
>>> + arg.group_id = nla_get_u32(group);
>>> + }
>>> +
>>> + if (cookie) {
>>> + memcpy(&arg.cookie[0], nla_data(cookie), nla_len(cookie));
>>> + arg.cookie_len = nla_len(cookie);
>>> + }
>>> +
>>> arg.probability = nla_get_u32(probability);
>>>
>>> err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_ARG, &arg, sizeof(arg),
>>> @@ -2616,12 +2643,13 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>>> if (err)
>>> return err;
>>>
>>> - err = __ovs_nla_copy_actions(net, actions, key, sfa,
>>> - eth_type, vlan_tci, mpls_label_count, log,
>>> - depth + 1);
>>> -
>>> - if (err)
>>> - return err;
>>> + if (actions) {
>>> + err = __ovs_nla_copy_actions(net, actions, key, sfa,
>>> + eth_type, vlan_tci,
>>> + mpls_label_count, log, depth + 1);
>>> + if (err)
>>> + return err;
>>> + }
>>>
>>> add_nested_action_end(*sfa, start);
>>>
>>> @@ -3553,20 +3581,38 @@ static int sample_action_to_attr(const struct nlattr *attr,
>>> goto out;
>>> }
>>>
>>> - ac_start = nla_nest_start_noflag(skb, OVS_SAMPLE_ATTR_ACTIONS);
>>> - if (!ac_start) {
>>> - err = -EMSGSIZE;
>>> - goto out;
>>> + if (arg->flags & OVS_SAMPLE_ARG_FLAG_PSAMPLE) {
>>> + if (nla_put_u32(skb, OVS_SAMPLE_ATTR_PSAMPLE_GROUP,
>>> + arg->group_id)) {
>>> + err = -EMSGSIZE;
>>> + goto out;
>>> + }
>>> +
>>> + if (arg->cookie_len &&
>>> + nla_put(skb, OVS_SAMPLE_ATTR_PSAMPLE_COOKIE,
>>> + arg->cookie_len, &arg->cookie[0])) {
>>> + err = -EMSGSIZE;
>>> + goto out;
>>> + }
>>> }
>>>
>>> - err = ovs_nla_put_actions(actions, rem, skb);
>>> + if (nla_ok(actions, rem)) {
>>> + ac_start = nla_nest_start_noflag(skb, OVS_SAMPLE_ATTR_ACTIONS);
>>> + if (!ac_start) {
>>> + err = -EMSGSIZE;
>>> + goto out;
>>> + }
>>> + err = ovs_nla_put_actions(actions, rem, skb);
>>> + }
>>>
>>> out:
>>> if (err) {
>>> - nla_nest_cancel(skb, ac_start);
>>> + if (ac_start)
>>> + nla_nest_cancel(skb, ac_start);
>>> nla_nest_cancel(skb, start);
>>> } else {
>>> - nla_nest_end(skb, ac_start);
>>> + if (ac_start)
>>> + nla_nest_end(skb, ac_start);
>>> nla_nest_end(skb, start);
>>> }
>>>
>>> --
>>> 2.44.0
>>
Adrian Moreno <[email protected]> writes:
> On 5/3/24 11:43 AM, Eelco Chaudron wrote:
>> On 24 Apr 2024, at 15:50, Adrian Moreno wrote:
>>
>>> Add support for psample sampling via two new attributes to the
>>> OVS_ACTION_ATTR_SAMPLE action.
>>>
>>> OVS_SAMPLE_ATTR_PSAMPLE_GROUP used to pass an integer psample group_id.
>>> OVS_SAMPLE_ATTR_PSAMPLE_COOKIE used to pass a variable-length binary
>>> cookie that will be forwared to psample.
>>>
>>> The maximum length of the user-defined cookie is set to 16, same as
>>> tc_cookie, to discourage using cookies that will not be offloadable.
>>>
>>> In order to simplify the internal processing of the action and given the
>>> maximum size of the cookie is relatively small, add both fields to the
>>> internal-only struct sample_arg.
>>>
>>> The presence of a group_id mandates that the action shall called the
>>> psample module to multicast the packet with such group_id and the
>>> user-provided cookie if present. This behavior is orthonogal to
>>> also executing the nested actions if present.
>>>
>>> Signed-off-by: Adrian Moreno <[email protected]>
>> This is not a full review yet. Just some comments, as I’m looking at
>> the user-space patch first and added similar comments.
>> I’ll do a proper review of this series once I’m done with user-space
>> part.
>> //Eelco
>>
>>> ---
>>> Documentation/netlink/specs/ovs_flow.yaml | 6 ++
>>> include/uapi/linux/openvswitch.h | 49 ++++++++++----
>>> net/openvswitch/actions.c | 51 +++++++++++++--
>>> net/openvswitch/flow_netlink.c | 80 ++++++++++++++++++-----
>>> 4 files changed, 153 insertions(+), 33 deletions(-)
>>>
>>> diff --git a/Documentation/netlink/specs/ovs_flow.yaml b/Documentation/netlink/specs/ovs_flow.yaml
>>> index 4fdfc6b5cae9..5543c2937225 100644
>>> --- a/Documentation/netlink/specs/ovs_flow.yaml
>>> +++ b/Documentation/netlink/specs/ovs_flow.yaml
>>> @@ -825,6 +825,12 @@ attribute-sets:
>>> name: actions
>>> type: nest
>>> nested-attributes: action-attrs
>>> + -
>>> + name: psample_group
>>> + type: u32
>>> + -
>>> + name: psample_cookie
>>> + type: binary
>>> -
>>> name: userspace-attrs
>>> enum-name: ovs-userspace-attr
>>> diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
>>> index efc82c318fa2..e9cd6f3a952d 100644
>>> --- a/include/uapi/linux/openvswitch.h
>>> +++ b/include/uapi/linux/openvswitch.h
>>> @@ -639,6 +639,7 @@ enum ovs_flow_attr {
>>> #define OVS_UFID_F_OMIT_MASK (1 << 1)
>>> #define OVS_UFID_F_OMIT_ACTIONS (1 << 2)
>>>
>>> +#define OVS_PSAMPLE_COOKIE_MAX_SIZE 16
>>> /**
>>> * enum ovs_sample_attr - Attributes for %OVS_ACTION_ATTR_SAMPLE action.
>>> * @OVS_SAMPLE_ATTR_PROBABILITY: 32-bit fraction of packets to sample with
>>> @@ -646,15 +647,27 @@ enum ovs_flow_attr {
>>> * %UINT32_MAX samples all packets and intermediate values sample intermediate
>>> * fractions of packets.
>>> * @OVS_SAMPLE_ATTR_ACTIONS: Set of actions to execute in sampling event.
>>> - * Actions are passed as nested attributes.
>>> + * Actions are passed as nested attributes. Optional if
>>> + * OVS_SAMPLE_ATTR_PSAMPLE_GROUP is set.
>>> + * @OVS_SAMPLE_ATTR_PSAMPLE_GROUP: A 32-bit number to be used as psample group.
>>> + * Optional if OVS_SAMPLE_ATTR_ACTIONS is set.
>>> + * @OVS_SAMPLE_ATTR_PSAMPLE_COOKIE: A variable-length binary cookie that, if
>>> + * provided, will be copied to the psample cookie.
>> As there is a limit of to the cookie should we mention it here?
>>
>
> I thought OVS_PSAMPLE_COOKIE_MAX_SIZE was expressive enough but sure,
> we can also mention it here.
>
>>> *
>>> - * Executes the specified actions with the given probability on a per-packet
>>> - * basis.
>>> + * Either OVS_SAMPLE_ATTR_PSAMPLE_GROUP or OVS_SAMPLE_ATTR_ACTIONS must be
>>> + * specified.
>>> + *
>>> + * Executes the specified actions and/or sends the packet to psample
>>> + * with the given probability on a per-packet basis.
>>> */
>>> enum ovs_sample_attr {
>>> OVS_SAMPLE_ATTR_UNSPEC,
>>> - OVS_SAMPLE_ATTR_PROBABILITY, /* u32 number */
>>> - OVS_SAMPLE_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */
>>> + OVS_SAMPLE_ATTR_PROBABILITY, /* u32 number */
>>> + OVS_SAMPLE_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_
>> Missing * after OVS_ACTION_ATTR_
>
> As a matter of fact, adding an * makes checkpatch generate a warning
> IIRC. That's why I initially removed it. I can look at fixing
> checkpatch instead.
I think we can ignore the warning. Alternatively, consider not changing
the comment spacing for the existing comments.
>>
>>> + * attributes.
>>> + */
>>> + OVS_SAMPLE_ATTR_PSAMPLE_GROUP, /* u32 number */
>>> + OVS_SAMPLE_ATTR_PSAMPLE_COOKIE, /* binary */
>> As these are general sample options, I would not add the PSAMPLE
>> reference. Other data paths could use a different implementation. So
>> I guess OVS_SAMPLE_ATTR_GROUP_ID and OVS_SAMPLE_ATTR_COOKIE would be
>> enough.
>>
>
> OK. But isn't the API already psample-ish? I mean that the group_id is
> something specific to psample that might not be present in other
> datapath implementation.
>
>
>>> __OVS_SAMPLE_ATTR_MAX,
>>>
>>> #ifdef __KERNEL__
>>> @@ -665,13 +678,27 @@ enum ovs_sample_attr {
>>> #define OVS_SAMPLE_ATTR_MAX (__OVS_SAMPLE_ATTR_MAX - 1)
>>>
>>> #ifdef __KERNEL__
>>> +
>>> +/* Definition for flags in struct sample_arg. */
>>> +enum {
>>> + /* When set, actions in sample will not change the flows. */
>>> + OVS_SAMPLE_ARG_FLAG_EXEC = 1 << 0,
>>> + /* When set, the packet will be sent to psample. */
>>> + OVS_SAMPLE_ARG_FLAG_PSAMPLE = 1 << 1,
>>> +};
>>> +
>>> struct sample_arg {
>>> - bool exec; /* When true, actions in sample will not
>>> - * change flow keys. False otherwise.
>>> - */
>>> - u32 probability; /* Same value as
>>> - * 'OVS_SAMPLE_ATTR_PROBABILITY'.
>>> - */
>> Not sure if you can actually do this, you are changing a structure
>> that is part of the UAPI. This change breaks backwards
>> compatibility.
>>
>
> Hmmm... this the internal argument structure protected by #ifdef
> __KERNEL__. It is used in several actions to optimize the internal
> action handling (i.e: using a compact struct instead of a list of
> netlink attributes). I guess the reason for having it in this file is
> because the attribute enum is being reused, but I wouldn't think of
> this struct as part of the uAPI.
>
> If the (#ifdef __KERNEL__) does not exclude this struct from the uAPI
> I think we should move it (all of them actually) to some internal
> file.
>
>>
>>> + u16 flags; /* Flags that modify the behavior of the
>>> + * action. See SAMPLE_ARG_FLAG_*.
>>> + */
>>> + u32 probability; /* Same value as
>>> + * 'OVS_SAMPLE_ATTR_PROBABILITY'.
>>> + */
>>> + u32 group_id; /* Same value as
>>> + * 'OVS_SAMPLE_ATTR_PSAMPLE_GROUP'.
>>> + */
>>> + u8 cookie_len; /* Length of psample cookie. */
>>> + char cookie[OVS_PSAMPLE_COOKIE_MAX_SIZE]; /* psample cookie data. */
>> Would it make sense for the cookie also to be u8?
>>
>
> Yes, probably.
>
>>> };
>>> #endif
>>>
>>> diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
>>> index 6fcd7e2ca81f..eb3166986fd2 100644
>>> --- a/net/openvswitch/actions.c
>>> +++ b/net/openvswitch/actions.c
>>> @@ -24,6 +24,7 @@
>>> #include <net/checksum.h>
>>> #include <net/dsfield.h>
>>> #include <net/mpls.h>
>>> +#include <net/psample.h>
>>> #include <net/sctp/checksum.h>
>>>
>>> #include "datapath.h"
>>> @@ -1025,6 +1026,34 @@ static int dec_ttl_exception_handler(struct datapath *dp, struct sk_buff *skb,
>>> return 0;
>>> }
>>>
>>> +static int ovs_psample_packet(struct datapath *dp, struct sw_flow_key *key,
>>> + const struct sample_arg *arg,
>>> + struct sk_buff *skb)
>>> +{
>>> + struct psample_group psample_group = {};
>>> + struct psample_metadata md = {};
>>> + struct vport *input_vport;
>>> + u32 rate;
>>> +
>>> + psample_group.group_num = arg->group_id;
>>> + psample_group.net = ovs_dp_get_net(dp);
>>> +
>>> + input_vport = ovs_vport_rcu(dp, key->phy.in_port);
>>> + if (!input_vport)
>>> + input_vport = ovs_vport_rcu(dp, OVSP_LOCAL);
>>> +
>>> + md.in_ifindex = input_vport->dev->ifindex;
>>> + md.user_cookie = arg->cookie_len ? &arg->cookie[0] : NULL;
>>> + md.user_cookie_len = arg->cookie_len;
>>> + md.trunc_size = skb->len;
>>> +
>>> + rate = arg->probability ? U32_MAX / arg->probability : 0;
>>> +
>>> + psample_sample_packet(&psample_group, skb, rate, &md);
>> Does this mean now the ovs module, now is dependent on the presence
>> of psample? I think we should only support sampling to psample if
>> the module exists, else we should return an error. There might be
>> distributions not including psample by default.
>
> Agree. I'll add some compile-time checks the same way we do with nf_nat.
>
>>
>>> +
>>> + return 0;
>>> +}
>>> +
>>> /* When 'last' is true, sample() should always consume the 'skb'.
>>> * Otherwise, sample() should keep 'skb' intact regardless what
>>> * actions are executed within sample().
>>> @@ -1033,11 +1062,12 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
>>> struct sw_flow_key *key, const struct nlattr *attr,
>>> bool last)
>>> {
>>> - struct nlattr *actions;
>>> + const struct sample_arg *arg;
>>> struct nlattr *sample_arg;
>>> int rem = nla_len(attr);
>>> - const struct sample_arg *arg;
>>> + struct nlattr *actions;
>>> bool clone_flow_key;
>>> + int ret;
>>>
>>> /* The first action is always 'OVS_SAMPLE_ATTR_ARG'. */
>>> sample_arg = nla_data(attr);
>>> @@ -1051,9 +1081,20 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
>>> return 0;
>>> }
>>>
>>> - clone_flow_key = !arg->exec;
>>> - return clone_execute(dp, skb, key, 0, actions, rem, last,
>>> - clone_flow_key);
>>> + if (arg->flags & OVS_SAMPLE_ARG_FLAG_PSAMPLE) {
>>> + ret = ovs_psample_packet(dp, key, arg, skb);
>>> + if (ret)
>>> + return ret;
>>> + }
>>> +
>>> + if (nla_ok(actions, rem)) {
>>> + clone_flow_key = !(arg->flags & OVS_SAMPLE_ARG_FLAG_EXEC);
>>> + ret = clone_execute(dp, skb, key, 0, actions, rem, last,
>>> + clone_flow_key);
>>> + } else if (last) {
>>> + ovs_kfree_skb_reason(skb, OVS_DROP_LAST_ACTION);
>>> + }
>>> + return ret;
>>> }
>>>
>>> /* When 'last' is true, clone() should always consume the 'skb'.
>>> diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
>>> index f224d9bcea5e..1a348d3905fc 100644
>>> --- a/net/openvswitch/flow_netlink.c
>>> +++ b/net/openvswitch/flow_netlink.c
>>> @@ -2561,6 +2561,9 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
>>> u32 mpls_label_count, bool log,
>>> u32 depth);
>>>
>>> +static int copy_action(const struct nlattr *from,
>>> + struct sw_flow_actions **sfa, bool log);
>>> +
>>> static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>>> const struct sw_flow_key *key,
>>> struct sw_flow_actions **sfa,
>>> @@ -2569,10 +2572,10 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>>> u32 depth)
>>> {
>>> const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
>>> - const struct nlattr *probability, *actions;
>>> + const struct nlattr *probability, *actions, *group, *cookie;
>>> + struct sample_arg arg = {};
>>> const struct nlattr *a;
>>> int rem, start, err;
>>> - struct sample_arg arg;
>>>
>>> memset(attrs, 0, sizeof(attrs));
>>> nla_for_each_nested(a, attr, rem) {
>>> @@ -2589,7 +2592,19 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>>> return -EINVAL;
>>>
>>> actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
>>> - if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
>>> + if (actions && (!nla_len(actions) || nla_len(actions) < NLA_HDRLEN))
>>> + return -EINVAL;
>>> +
>>> + group = attrs[OVS_SAMPLE_ATTR_PSAMPLE_GROUP];
>>> + if (group && nla_len(group) != sizeof(u32))
>>> + return -EINVAL;
>>> +
>>> + cookie = attrs[OVS_SAMPLE_ATTR_PSAMPLE_COOKIE];
>>> + if (cookie &&
>>> + (!group || nla_len(cookie) > OVS_PSAMPLE_COOKIE_MAX_SIZE))
>>> + return -EINVAL;
>>> +
>>> + if (!group && !actions)
>>> return -EINVAL;
>>>
>>> /* validation done, copy sample action. */
>>> @@ -2608,7 +2623,19 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>>> * If the sample is the last action, it can always be excuted
>>> * rather than deferred.
>>> */
>>> - arg.exec = last || !actions_may_change_flow(actions);
>>> + if (actions && (last || !actions_may_change_flow(actions)))
>>> + arg.flags |= OVS_SAMPLE_ARG_FLAG_EXEC;
>>> +
>>> + if (group) {
>>> + arg.flags |= OVS_SAMPLE_ARG_FLAG_PSAMPLE;
>>> + arg.group_id = nla_get_u32(group);
>>> + }
>>> +
>>> + if (cookie) {
>>> + memcpy(&arg.cookie[0], nla_data(cookie), nla_len(cookie));
>>> + arg.cookie_len = nla_len(cookie);
>>> + }
>>> +
>>> arg.probability = nla_get_u32(probability);
>>>
>>> err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_ARG, &arg, sizeof(arg),
>>> @@ -2616,12 +2643,13 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>>> if (err)
>>> return err;
>>>
>>> - err = __ovs_nla_copy_actions(net, actions, key, sfa,
>>> - eth_type, vlan_tci, mpls_label_count, log,
>>> - depth + 1);
>>> -
>>> - if (err)
>>> - return err;
>>> + if (actions) {
>>> + err = __ovs_nla_copy_actions(net, actions, key, sfa,
>>> + eth_type, vlan_tci,
>>> + mpls_label_count, log, depth + 1);
>>> + if (err)
>>> + return err;
>>> + }
>>>
>>> add_nested_action_end(*sfa, start);
>>>
>>> @@ -3553,20 +3581,38 @@ static int sample_action_to_attr(const struct nlattr *attr,
>>> goto out;
>>> }
>>>
>>> - ac_start = nla_nest_start_noflag(skb, OVS_SAMPLE_ATTR_ACTIONS);
>>> - if (!ac_start) {
>>> - err = -EMSGSIZE;
>>> - goto out;
>>> + if (arg->flags & OVS_SAMPLE_ARG_FLAG_PSAMPLE) {
>>> + if (nla_put_u32(skb, OVS_SAMPLE_ATTR_PSAMPLE_GROUP,
>>> + arg->group_id)) {
>>> + err = -EMSGSIZE;
>>> + goto out;
>>> + }
>>> +
>>> + if (arg->cookie_len &&
>>> + nla_put(skb, OVS_SAMPLE_ATTR_PSAMPLE_COOKIE,
>>> + arg->cookie_len, &arg->cookie[0])) {
>>> + err = -EMSGSIZE;
>>> + goto out;
>>> + }
>>> }
>>>
>>> - err = ovs_nla_put_actions(actions, rem, skb);
>>> + if (nla_ok(actions, rem)) {
>>> + ac_start = nla_nest_start_noflag(skb, OVS_SAMPLE_ATTR_ACTIONS);
>>> + if (!ac_start) {
>>> + err = -EMSGSIZE;
>>> + goto out;
>>> + }
>>> + err = ovs_nla_put_actions(actions, rem, skb);
>>> + }
>>>
>>> out:
>>> if (err) {
>>> - nla_nest_cancel(skb, ac_start);
>>> + if (ac_start)
>>> + nla_nest_cancel(skb, ac_start);
>>> nla_nest_cancel(skb, start);
>>> } else {
>>> - nla_nest_end(skb, ac_start);
>>> + if (ac_start)
>>> + nla_nest_end(skb, ac_start);
>>> nla_nest_end(skb, start);
>>> }
>>>
>>> -- 2.44.0
>>