2017-12-09 15:25:15

by Eric Leblond

[permalink] [raw]
Subject: [PATCH net-next] libbpf: add function to setup XDP

Most of the code is taken from set_link_xdp_fd() in bpf_load.c and
slightly modified to be library compliant.

Signed-off-by: Eric Leblond <[email protected]>
---
tools/lib/bpf/bpf.c | 108 ++++++++++++++++++++++++++++++++++++++++++++++++-
tools/lib/bpf/libbpf.c | 2 +
tools/lib/bpf/libbpf.h | 4 ++
3 files changed, 112 insertions(+), 2 deletions(-)

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 5128677e4117..bea173be66fc 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -25,6 +25,10 @@
#include <asm/unistd.h>
#include <linux/bpf.h>
#include "bpf.h"
+#include "libbpf.h"
+#include <linux/rtnetlink.h>
+#include <sys/socket.h>
+#include <errno.h>

/*
* When building perf, unistd.h is overridden. __NR_bpf is
@@ -46,8 +50,6 @@
# endif
#endif

-#define min(x, y) ((x) < (y) ? (x) : (y))
-
static inline __u64 ptr_to_u64(const void *ptr)
{
return (__u64) (unsigned long) ptr;
@@ -413,3 +415,105 @@ int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len)

return err;
}
+
+int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
+{
+ struct sockaddr_nl sa;
+ int sock, seq = 0, len, ret = -1;
+ char buf[4096];
+ struct nlattr *nla, *nla_xdp;
+ struct {
+ struct nlmsghdr nh;
+ struct ifinfomsg ifinfo;
+ char attrbuf[64];
+ } req;
+ struct nlmsghdr *nh;
+ struct nlmsgerr *err;
+
+ memset(&sa, 0, sizeof(sa));
+ sa.nl_family = AF_NETLINK;
+
+ sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+ if (sock < 0) {
+ return -errno;
+ }
+
+ if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
+ ret = -errno;
+ goto cleanup;
+ }
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_type = RTM_SETLINK;
+ req.nh.nlmsg_pid = 0;
+ req.nh.nlmsg_seq = ++seq;
+ req.ifinfo.ifi_family = AF_UNSPEC;
+ req.ifinfo.ifi_index = ifindex;
+
+ /* started nested attribute for XDP */
+ nla = (struct nlattr *)(((char *)&req)
+ + NLMSG_ALIGN(req.nh.nlmsg_len));
+ nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/;
+ nla->nla_len = NLA_HDRLEN;
+
+ /* add XDP fd */
+ nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
+ nla_xdp->nla_type = 1/*IFLA_XDP_FD*/;
+ nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
+ memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd));
+ nla->nla_len += nla_xdp->nla_len;
+
+ /* if user passed in any flags, add those too */
+ if (flags) {
+ nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
+ nla_xdp->nla_type = 3/*IFLA_XDP_FLAGS*/;
+ nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags);
+ memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags));
+ nla->nla_len += nla_xdp->nla_len;
+ }
+
+ req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
+
+ if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ ret = -errno;
+ goto cleanup;
+ }
+
+ len = recv(sock, buf, sizeof(buf), 0);
+ if (len < 0) {
+ ret = -errno;
+ goto cleanup;
+ }
+
+ for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
+ nh = NLMSG_NEXT(nh, len)) {
+ if (nh->nlmsg_pid != getpid()) {
+ ret = -LIBBPF_ERRNO__WRNGPID;
+ goto cleanup;
+ }
+ if (nh->nlmsg_seq != seq) {
+ ret = -LIBBPF_ERRNO__INVSEQ;
+ goto cleanup;
+ }
+ switch (nh->nlmsg_type) {
+ case NLMSG_ERROR:
+ err = (struct nlmsgerr *)NLMSG_DATA(nh);
+ if (!err->error)
+ continue;
+ ret = err->error;
+ goto cleanup;
+ case NLMSG_DONE:
+ break;
+ default:
+ break;
+ }
+ }
+
+ ret = 0;
+
+cleanup:
+ close(sock);
+ return ret;
+}
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 5aa45f89da93..931e98c097a8 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -106,6 +106,8 @@ static const char *libbpf_strerror_table[NR_ERRNO] = {
[ERRCODE_OFFSET(PROG2BIG)] = "Program too big",
[ERRCODE_OFFSET(KVER)] = "Incorrect kernel version",
[ERRCODE_OFFSET(PROGTYPE)] = "Kernel doesn't support this program type",
+ [ERRCODE_OFFSET(WRNGPID)] = "Wrong pid in netlink message",
+ [ERRCODE_OFFSET(INVSEQ)] = "Invalid netlink sequence",
};

int libbpf_strerror(int err, char *buf, size_t size)
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 6e20003109e0..e42f96900318 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -42,6 +42,8 @@ enum libbpf_errno {
LIBBPF_ERRNO__PROG2BIG, /* Program too big */
LIBBPF_ERRNO__KVER, /* Incorrect kernel version */
LIBBPF_ERRNO__PROGTYPE, /* Kernel doesn't support this program type */
+ LIBBPF_ERRNO__WRNGPID, /* Wrong pid in netlink message */
+ LIBBPF_ERRNO__INVSEQ, /* Invalid netlink sequence */
__LIBBPF_ERRNO__END,
};

@@ -246,4 +248,6 @@ long libbpf_get_error(const void *ptr);

int bpf_prog_load(const char *file, enum bpf_prog_type type,
struct bpf_object **pobj, int *prog_fd);
+
+int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags);
#endif
--
2.15.1


2017-12-09 16:34:51

by David Ahern

[permalink] [raw]
Subject: Re: [PATCH net-next] libbpf: add function to setup XDP

On 12/9/17 7:43 AM, Eric Leblond wrote:
> + /* started nested attribute for XDP */
> + nla = (struct nlattr *)(((char *)&req)
> + + NLMSG_ALIGN(req.nh.nlmsg_len));
> + nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/;

as a part of the move into libbpf can the magic numbers be replaced by
the names directly and there as a comment?

There are more below.


> + nla->nla_len = NLA_HDRLEN;
> +
> + /* add XDP fd */
> + nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
> + nla_xdp->nla_type = 1/*IFLA_XDP_FD*/;
> + nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
> + memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd));
> + nla->nla_len += nla_xdp->nla_len;
> +
> + /* if user passed in any flags, add those too */
> + if (flags) {
> + nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
> + nla_xdp->nla_type = 3/*IFLA_XDP_FLAGS*/;
> + nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags);
> + memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags));
> + nla->nla_len += nla_xdp->nla_len;
> + }
> +

2017-12-09 17:49:46

by Alexei Starovoitov

[permalink] [raw]
Subject: Re: [PATCH net-next] libbpf: add function to setup XDP

On Sat, Dec 09, 2017 at 09:34:46AM -0700, David Ahern wrote:
> On 12/9/17 7:43 AM, Eric Leblond wrote:
> > + /* started nested attribute for XDP */
> > + nla = (struct nlattr *)(((char *)&req)
> > + + NLMSG_ALIGN(req.nh.nlmsg_len));
> > + nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/;
>
> as a part of the move into libbpf can the magic numbers be replaced by
> the names directly and there as a comment?

In general it would be nice to use names instead of numbers,
but it's much bigger change then this patch, since it would require
copying and syncing a bunch of headers into tools/ which may not be such
a good idea in the end.

Only removal of min() looks a bit suspicious to me.
Eric, is it because it now comes from some header?

2017-12-09 23:57:25

by Jakub Kicinski

[permalink] [raw]
Subject: Re: [PATCH net-next] libbpf: add function to setup XDP

On Sat, 9 Dec 2017 15:43:15 +0100, Eric Leblond wrote:
> + for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
> + nh = NLMSG_NEXT(nh, len)) {
> + if (nh->nlmsg_pid != getpid()) {
> + ret = -LIBBPF_ERRNO__WRNGPID;
> + goto cleanup;
> + }
> + if (nh->nlmsg_seq != seq) {
> + ret = -LIBBPF_ERRNO__INVSEQ;
> + goto cleanup;
> + }
> + switch (nh->nlmsg_type) {
> + case NLMSG_ERROR:
> + err = (struct nlmsgerr *)NLMSG_DATA(nh);
> + if (!err->error)
> + continue;
> + ret = err->error;
> + goto cleanup;
> + case NLMSG_DONE:
> + break;
> + default:
> + break;
> + }

Would it be possible to print out or preferably return to the caller
the ext ack error message? A couple of drivers are using it for XDP
mis-configuration reporting instead of printks. We should encourage
other to do the same and support it in all user space since ext ack
msgs lead to much better user experience.

2017-12-10 20:34:26

by Eric Leblond

[permalink] [raw]
Subject: Re: [PATCH net-next] libbpf: add function to setup XDP

Hello,

On Sat, 2017-12-09 at 15:57 -0800, Jakub Kicinski wrote:
> On Sat, 9 Dec 2017 15:43:15 +0100, Eric Leblond wrote:
> > + for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
> > + nh = NLMSG_NEXT(nh, len)) {
> > + if (nh->nlmsg_pid != getpid()) {
> > + ret = -LIBBPF_ERRNO__WRNGPID;
> > + goto cleanup;
> > + }
> > + if (nh->nlmsg_seq != seq) {
> > + ret = -LIBBPF_ERRNO__INVSEQ;
> > + goto cleanup;
> > + }
> > + switch (nh->nlmsg_type) {
> > + case NLMSG_ERROR:
> > + err = (struct nlmsgerr *)NLMSG_DATA(nh);
> > + if (!err->error)
> > + continue;
> > + ret = err->error;
> > + goto cleanup;
> > + case NLMSG_DONE:
> > + break;
> > + default:
> > + break;
> > + }
>
> Would it be possible to print out or preferably return to the caller
> the ext ack error message? A couple of drivers are using it for XDP
> mis-configuration reporting instead of printks. We should encourage
> other to do the same and support it in all user space since ext ack
> msgs lead to much better user experience.

I've seen the kind of messages displayed by reading at kernel log. They
are really useful and it looks almost mandatory to be able to display
them.

Kernel code seems to not have a parser for the ext ack error message.
Did I miss something here ?

Looking at tc code, it seems it is using libmnl to parse them and I
doubt it is a good idea to use that in libbpf as it is introducing a
dependency.

Does someone has an existing parsing code or should I write on my own ?

BR,
--
Eric Leblond <[email protected]>
Blog: https://home.regit.org/

2017-12-10 21:07:18

by David Ahern

[permalink] [raw]
Subject: Re: [PATCH net-next] libbpf: add function to setup XDP

On 12/10/17 1:34 PM, Eric Leblond wrote:
>> Would it be possible to print out or preferably return to the caller
>> the ext ack error message? A couple of drivers are using it for XDP
>> mis-configuration reporting instead of printks. We should encourage
>> other to do the same and support it in all user space since ext ack
>> msgs lead to much better user experience.
>
> I've seen the kind of messages displayed by reading at kernel log. They
> are really useful and it looks almost mandatory to be able to display
> them.
>
> Kernel code seems to not have a parser for the ext ack error message.
> Did I miss something here ?
>
> Looking at tc code, it seems it is using libmnl to parse them and I
> doubt it is a good idea to use that in libbpf as it is introducing a
> dependency.
>
> Does someone has an existing parsing code or should I write on my own ?

I had worked on extack for libbpf but seem to have lost the changes.

Look at the commits here:
https://github.com/dsahern/iproute2/commits/ext-ack

I suggest using this:

https://github.com/dsahern/iproute2/commit/b61e4c7dd54a5d3ff98640da4b480441cee497b2

to bring in nlattr from lib/nlattr (as I recall lib/nlattr can not be
used directly). From there, use this one:

https://github.com/dsahern/iproute2/commit/261f7251e6704d565b91e310faabbbb7e18d14a1

to see what is needed for extack support.

Really not that much code to add.

2017-12-11 02:25:26

by Toshiaki Makita

[permalink] [raw]
Subject: Re: [PATCH net-next] libbpf: add function to setup XDP

On 2017/12/09 23:43, Eric Leblond wrote:
> Most of the code is taken from set_link_xdp_fd() in bpf_load.c and
> slightly modified to be library compliant.
>
> Signed-off-by: Eric Leblond <[email protected]>
...
> +int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
...
> + for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
> + nh = NLMSG_NEXT(nh, len)) {
> + if (nh->nlmsg_pid != getpid()) {

Generally nlmsg_pid should not be compared with process id.
See man netlink and
https://github.com/iovisor/bcc/pull/1275/commits/69ce96a54c55960c8de3392061254c97b6306a6d

> + ret = -LIBBPF_ERRNO__WRNGPID;
> + goto cleanup;
> + }

--
Toshiaki Makita

2017-12-12 02:23:13

by Daniel Borkmann

[permalink] [raw]
Subject: Re: [PATCH net-next] libbpf: add function to setup XDP

On 12/10/2017 10:07 PM, David Ahern wrote:
> On 12/10/17 1:34 PM, Eric Leblond wrote:
>>> Would it be possible to print out or preferably return to the caller
>>> the ext ack error message? A couple of drivers are using it for XDP
>>> mis-configuration reporting instead of printks. We should encourage
>>> other to do the same and support it in all user space since ext ack
>>> msgs lead to much better user experience.
>>
>> I've seen the kind of messages displayed by reading at kernel log. They
>> are really useful and it looks almost mandatory to be able to display
>> them.
>>
>> Kernel code seems to not have a parser for the ext ack error message.
>> Did I miss something here ?
>>
>> Looking at tc code, it seems it is using libmnl to parse them and I
>> doubt it is a good idea to use that in libbpf as it is introducing a
>> dependency.
>>
>> Does someone has an existing parsing code or should I write on my own ?
>
> I had worked on extack for libbpf but seem to have lost the changes.
>
> Look at the commits here:
> https://github.com/dsahern/iproute2/commits/ext-ack
>
> I suggest using this:
>
> https://github.com/dsahern/iproute2/commit/b61e4c7dd54a5d3ff98640da4b480441cee497b2
>
> to bring in nlattr from lib/nlattr (as I recall lib/nlattr can not be
> used directly). From there, use this one:
>
> https://github.com/dsahern/iproute2/commit/261f7251e6704d565b91e310faabbbb7e18d14a1
>
> to see what is needed for extack support.
>
> Really not that much code to add.

+1, ext ack support would improve troubleshooting a lot here; please
add and respin. Thanks, Eric!

2017-12-12 15:53:57

by David Miller

[permalink] [raw]
Subject: Re: [PATCH net-next] libbpf: add function to setup XDP

From: Toshiaki Makita <[email protected]>
Date: Mon, 11 Dec 2017 11:24:12 +0900

> On 2017/12/09 23:43, Eric Leblond wrote:
>> Most of the code is taken from set_link_xdp_fd() in bpf_load.c and
>> slightly modified to be library compliant.
>>
>> Signed-off-by: Eric Leblond <[email protected]>
> ...
>> +int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
> ...
>> + for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
>> + nh = NLMSG_NEXT(nh, len)) {
>> + if (nh->nlmsg_pid != getpid()) {
>
> Generally nlmsg_pid should not be compared with process id.
> See man netlink and
> https://github.com/iovisor/bcc/pull/1275/commits/69ce96a54c55960c8de3392061254c97b6306a6d

Right. I wish we had never named this "pid", it gets misinterpreted
way too easily.