2006-08-29 13:41:38

by Yi Yang

[permalink] [raw]
Subject: vmsplice can't work well

Hi, Jens

I try to trace vmsplice and find it can't work in both ppc64 and i386, it always return -EFAULT because of the address of iovec.iov_base no matter it is page alignment or not, I don't know if I should file a bug for it, do you test it on i386 or ppc64?

This is the test program I used.

/*
* Use vmsplice to fill some user memory into a pipe. vmsplice writes
* to stdout, so that must be a pipe.
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <limits.h>
#include <sys/poll.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/uio.h>
#include <error.h>
#include <errno.h>

//#include "splice.h"
#if defined(__i386__)
#define __NR_splice 313
#define __NR_tee 315
#define __NR_vmsplice 316
#elif defined(__x86_64__)
#define __NR_splice 275
#define __NR_tee 276
#define __NR_vmsplice 277
#elif defined(__powerpc__) || defined(__powerpc64__)
#define __NR_splice 283
#define __NR_tee 284
#define __NR_vmsplice 285
#else
#error unsupported arch
#endif

#define SPLICE_SIZE 4096
#ifndef F_SETPSZ
#define F_SETPSZ 15 /* for pipes. */
#define F_GETPSZ 16 /* for pipes. */
#endif

#define ALIGN_BUF

#ifdef ALIGN_BUF
#define ALIGN_MASK (65535) /* 64k-1, should just be PAGE_SIZE - 1 */
#define ALIGN(buf) (void *) (((unsigned long) (buf) + ALIGN_MASK) & ~ALIGN_MASK)
#else
#define ALIGN_MASK (0)
#define ALIGN(buf) (buf)
#endif

#define min(a,b) (((a)>(b))?(b):(a))

static inline int xerror(const char * str)
{
int err = errno;
perror(str);
return err;
}

static inline long vmsplice(int fd, const struct iovec *iov,
unsigned long nr_segs, unsigned int flags)
{
return syscall(__NR_vmsplice, fd, iov, nr_segs, flags);
}

int do_vmsplice(int fd, void *buffer, int len)
{
struct pollfd pfd = { .fd = fd, .events = POLLOUT, };
int written;
struct iovec v;

v.iov_base = buffer;
v.iov_len = len;

while (len) {
/*
* in a real app you'd be more clever with poll of course,
* here we are basically just blocking on output room and
* not using the free time for anything interesting.
*/
if (poll(&pfd, 1, -1) < 0)
return xerror("poll");

written = vmsplice(fd, &v, 1, 0);
printf("here: len = %d, written = %d\n", len, written);

if (written <= 0)
return xerror("vmsplice");
fprintf(stderr, "written len = %d\n", written);

len -= written;
}

return 0;
}

int main(int argc, char *argv[])
{
unsigned char *buffer;
struct stat sb;
long page_size;
int i, ret;

if (fstat(STDOUT_FILENO, &sb) < 0)
return xerror("stat");
if (!S_ISFIFO(sb.st_mode)) {
fprintf(stderr, "stdout must be a pipe\n");
return 1;
}

page_size = sysconf(_SC_PAGESIZE);
if (page_size < 0)
return xerror("_SC_PAGESIZE");

fprintf(stderr, "getpagesize = %d\n", getpagesize());
fprintf(stderr, "page size: %d bytes\n", page_size);

buffer = malloc(2 * 65536);
buffer[0]='A';
for (i = 1; i < 2 * SPLICE_SIZE; i++)
buffer[i] = (i & 0xff);

do {
/*
* vmsplice the first half of the buffer into the pipe
*/
if (do_vmsplice(STDOUT_FILENO, buffer, SPLICE_SIZE))
break;

/*
* first half is now in pipe, but we don't quite know when
* we can reuse it.
*/

/*
* vmsplice second half
*/
//if (do_vmsplice(STDOUT_FILENO, buffer + SPLICE_SIZE, SPLICE_SIZE))
// break;

/*
* We still don't know when we can reuse the second half of
* the buffer, but we do now know that all parts of the first
* half have been consumed from the pipe - so we can reuse that.
*/
} while (0);

free(buffer);

return 0;
}


2006-08-29 14:03:00

by Jens Axboe

[permalink] [raw]
Subject: Re: vmsplice can't work well

On Tue, Aug 29 2006, Yi Yang wrote:
> Hi, Jens
>
> I try to trace vmsplice and find it can't work in both ppc64 and i386,
> it always return -EFAULT because of the address of iovec.iov_base no
> matter it is page alignment or not, I don't know if I should file a
> bug for it, do you test it on i386 or ppc64?

Please provide an strace of the problem, it works fine for me (on x86-64
and x86, I've previously also tested ppc64 and ia64). Also please see
the splice tools here for more examples:

http://brick.kernel.dk/snaps/splice-git-20060711102502.tar.gz

I patched your program to fix the x86-64 syscall number and one/two
bugs, diff attached. Output for me:

axboe@nelson:/home/axboe $ ./f | cat > /dev/null
getpagesize = 4096
page size: 4096 bytes
written len = 4096

--- f.c~ 2006-08-29 16:02:21.000000000 +0200
+++ f.c 2006-08-29 16:04:41.000000000 +0200
@@ -22,7 +22,7 @@
#elif defined(__x86_64__)
#define __NR_splice 275
#define __NR_tee 276
-#define __NR_vmsplice 277
+#define __NR_vmsplice 278
#elif defined(__powerpc__) || defined(__powerpc64__)
#define __NR_splice 283
#define __NR_tee 284
@@ -71,23 +71,26 @@
v.iov_base = buffer;
v.iov_len = len;

- while (len) {
+ while (v.iov_len) {
/*
* in a real app you'd be more clever with poll of course,
* here we are basically just blocking on output room and
* not using the free time for anything interesting.
*/
if (poll(&pfd, 1, -1) < 0)
- return xerror("poll");
+ return xerror("poll");

written = vmsplice(fd, &v, 1, 0);
printf("here: len = %d, written = %d\n", len, written);

- if (written <= 0)
+ if (!written)
+ break;
+ else if (written < 0)
return xerror("vmsplice");
fprintf(stderr, "written len = %d\n", written);

- len -= written;
+ v.iov_len -= written;
+ v.iov_base += written;
}

return 0;
@@ -98,7 +101,7 @@
unsigned char *buffer;
struct stat sb;
long page_size;
- int i, ret;
+ int i;

if (fstat(STDOUT_FILENO, &sb) < 0)
return xerror("stat");
@@ -112,7 +115,7 @@
return xerror("_SC_PAGESIZE");

fprintf(stderr, "getpagesize = %d\n", getpagesize());
- fprintf(stderr, "page size: %d bytes\n", page_size);
+ fprintf(stderr, "page size: %d bytes\n", (int) page_size);

buffer = malloc(2 * 65536);
buffer[0]='A';

--
Jens Axboe