2019-08-15 14:33:25

by Ivan Khoronzhuk

[permalink] [raw]
Subject: [PATCH bpf-next v2 1/3] libbpf: use LFS (_FILE_OFFSET_BITS) instead of direct mmap2 syscall

Drop __NR_mmap2 fork in flavor of LFS, that is _FILE_OFFSET_BITS=64
(glibc & bionic) / LARGEFILE64_SOURCE (for musl) decision. It allows
mmap() to use 64bit offset that is passed to mmap2 syscall. As result
pgoff is not truncated and no need to use direct access to mmap2 for
32 bits systems.

Signed-off-by: Ivan Khoronzhuk <[email protected]>
---
tools/lib/bpf/Makefile | 1 +
tools/lib/bpf/xsk.c | 49 ++++++++++++------------------------------
2 files changed, 15 insertions(+), 35 deletions(-)

diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 9312066a1ae3..844f6cd79c03 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -113,6 +113,7 @@ override CFLAGS += -Werror -Wall
override CFLAGS += -fPIC
override CFLAGS += $(INCLUDES)
override CFLAGS += -fvisibility=hidden
+override CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64

ifeq ($(VERBOSE),1)
Q =
diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index 680e63066cf3..7392f428c07b 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -74,23 +74,6 @@ struct xsk_nl_info {
int fd;
};

-/* For 32-bit systems, we need to use mmap2 as the offsets are 64-bit.
- * Unfortunately, it is not part of glibc.
- */
-static inline void *xsk_mmap(void *addr, size_t length, int prot, int flags,
- int fd, __u64 offset)
-{
-#ifdef __NR_mmap2
- unsigned int page_shift = __builtin_ffs(getpagesize()) - 1;
- long ret = syscall(__NR_mmap2, addr, length, prot, flags, fd,
- (off_t)(offset >> page_shift));
-
- return (void *)ret;
-#else
- return mmap(addr, length, prot, flags, fd, offset);
-#endif
-}
-
int xsk_umem__fd(const struct xsk_umem *umem)
{
return umem ? umem->fd : -EINVAL;
@@ -210,10 +193,9 @@ int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, __u64 size,
goto out_socket;
}

- map = xsk_mmap(NULL, off.fr.desc +
- umem->config.fill_size * sizeof(__u64),
- PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
- umem->fd, XDP_UMEM_PGOFF_FILL_RING);
+ map = mmap(NULL, off.fr.desc + umem->config.fill_size * sizeof(__u64),
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, umem->fd,
+ XDP_UMEM_PGOFF_FILL_RING);
if (map == MAP_FAILED) {
err = -errno;
goto out_socket;
@@ -227,10 +209,9 @@ int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, __u64 size,
fill->ring = map + off.fr.desc;
fill->cached_cons = umem->config.fill_size;

- map = xsk_mmap(NULL,
- off.cr.desc + umem->config.comp_size * sizeof(__u64),
- PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
- umem->fd, XDP_UMEM_PGOFF_COMPLETION_RING);
+ map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64),
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, umem->fd,
+ XDP_UMEM_PGOFF_COMPLETION_RING);
if (map == MAP_FAILED) {
err = -errno;
goto out_mmap;
@@ -550,11 +531,10 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
}

if (rx) {
- rx_map = xsk_mmap(NULL, off.rx.desc +
- xsk->config.rx_size * sizeof(struct xdp_desc),
- PROT_READ | PROT_WRITE,
- MAP_SHARED | MAP_POPULATE,
- xsk->fd, XDP_PGOFF_RX_RING);
+ rx_map = mmap(NULL, off.rx.desc +
+ xsk->config.rx_size * sizeof(struct xdp_desc),
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
+ xsk->fd, XDP_PGOFF_RX_RING);
if (rx_map == MAP_FAILED) {
err = -errno;
goto out_socket;
@@ -569,11 +549,10 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
xsk->rx = rx;

if (tx) {
- tx_map = xsk_mmap(NULL, off.tx.desc +
- xsk->config.tx_size * sizeof(struct xdp_desc),
- PROT_READ | PROT_WRITE,
- MAP_SHARED | MAP_POPULATE,
- xsk->fd, XDP_PGOFF_TX_RING);
+ tx_map = mmap(NULL, off.tx.desc +
+ xsk->config.tx_size * sizeof(struct xdp_desc),
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
+ xsk->fd, XDP_PGOFF_TX_RING);
if (tx_map == MAP_FAILED) {
err = -errno;
goto out_mmap_rx;
--
2.17.1


2019-08-16 00:08:28

by Yonghong Song

[permalink] [raw]
Subject: Re: [PATCH bpf-next v2 1/3] libbpf: use LFS (_FILE_OFFSET_BITS) instead of direct mmap2 syscall



On 8/15/19 5:13 AM, Ivan Khoronzhuk wrote:
> Drop __NR_mmap2 fork in flavor of LFS, that is _FILE_OFFSET_BITS=64
> (glibc & bionic) / LARGEFILE64_SOURCE (for musl) decision. It allows
> mmap() to use 64bit offset that is passed to mmap2 syscall. As result
> pgoff is not truncated and no need to use direct access to mmap2 for
> 32 bits systems.
>
> Signed-off-by: Ivan Khoronzhuk <[email protected]>

Acked-by: Yonghong Song <[email protected]>