Hi,
This patchset allows vhost_net to be used with legacy virtio
when guest and host have a different endianness. It is based
on previous work by Cédric Le Goater:
https://www.mail-archive.com/[email protected]/msg09848.html
As suggested by MST:
- the API now asks for a specific format (big endian) instead of the hint
whether byteswap is needed or not (patch 1)
- rebased on top of the virtio-1 accessors (patch 2)
Patch 3 is a separate fix: I think it is also valid for virtio-1.
Please comment.
---
Greg Kurz (3):
vhost: add VHOST_VRING_F_LEGACY_BIG_ENDIAN flag
vhost: add support for legacy virtio
vhost_net: fix virtio_net header endianness
drivers/vhost/net.c | 32 ++++++++++++++++++++++++++------
drivers/vhost/vhost.c | 6 +++++-
drivers/vhost/vhost.h | 23 +++++++++++++++++------
include/uapi/linux/vhost.h | 2 ++
4 files changed, 50 insertions(+), 13 deletions(-)
--
Greg
The VHOST_VRING_F_LEGACY_BIG_ENDIAN flag informs the kernel that the
associated device is big endian. Of course, this only makes sense for
legacy virtio devices since modern virtio devices are always little
endian.
It will be used by the vhost memory accessors to byteswap vring data when
we have a legacy device, in case host and guest endianness differ.
Signed-off-by: Greg Kurz <[email protected]>
---
drivers/vhost/vhost.c | 6 +++++-
drivers/vhost/vhost.h | 3 +++
include/uapi/linux/vhost.h | 2 ++
3 files changed, 10 insertions(+), 1 deletion(-)
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 2ee2826..dad3c37 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -199,6 +199,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
vq->call = NULL;
vq->log_ctx = NULL;
vq->memory = NULL;
+ vq->legacy_big_endian = false;
}
static int vhost_worker(void *data)
@@ -701,7 +702,8 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp)
r = -EFAULT;
break;
}
- if (a.flags & ~(0x1 << VHOST_VRING_F_LOG)) {
+ if (a.flags & ~(0x1 << VHOST_VRING_F_LOG|
+ 0x1 << VHOST_VRING_F_LEGACY_BIG_ENDIAN)) {
r = -EOPNOTSUPP;
break;
}
@@ -751,6 +753,8 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp)
vq->avail = (void __user *)(unsigned long)a.avail_user_addr;
vq->log_addr = a.log_guest_addr;
vq->used = (void __user *)(unsigned long)a.used_user_addr;
+ vq->legacy_big_endian =
+ !!(a.flags & (0x1 << VHOST_VRING_F_LEGACY_BIG_ENDIAN));
break;
case VHOST_SET_VRING_KICK:
if (copy_from_user(&f, argp, sizeof f)) {
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 8c1c792..ce2c68e 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -106,6 +106,9 @@ struct vhost_virtqueue {
/* Log write descriptors */
void __user *log_base;
struct vhost_log *log;
+
+ /* We need to know the device endianness with legacy virtio. */
+ bool legacy_big_endian;
};
struct vhost_dev {
diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h
index bb6a5b4..0bf4491 100644
--- a/include/uapi/linux/vhost.h
+++ b/include/uapi/linux/vhost.h
@@ -34,6 +34,8 @@ struct vhost_vring_addr {
/* Flag values: */
/* Whether log address is valid. If set enables logging. */
#define VHOST_VRING_F_LOG 0
+ /* Whether we have a big-endian legacy virtio device. */
+#define VHOST_VRING_F_LEGACY_BIG_ENDIAN 1
/* Start of array of descriptors (virtually contiguous) */
__u64 desc_user_addr;
Signed-off-by: Greg Kurz <[email protected]>
---
drivers/vhost/vhost.h | 20 ++++++++++++++------
1 file changed, 14 insertions(+), 6 deletions(-)
Michael,
The vhost_is_little_endian() helper adds unconditionnal overhead to fixed
endian architectures: that is all architectures except arm and ppc64. This
was addressed in QEMU with a TARGET_IS_BIENDIAN macro. Please give an
advice about how to address this in the vhost code.
Thanks.
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index ce2c68e..21e7d6a 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -176,34 +176,42 @@ static inline bool vhost_has_feature(struct vhost_virtqueue *vq, int bit)
return vq->acked_features & (1ULL << bit);
}
+static inline bool vhost_is_little_endian(struct vhost_virtqueue *vq)
+{
+ if (vhost_has_feature(vq, VIRTIO_F_VERSION_1))
+ return true;
+ else
+ return !vq->legacy_big_endian;
+}
+
/* Memory accessors */
static inline u16 vhost16_to_cpu(struct vhost_virtqueue *vq, __virtio16 val)
{
- return __virtio16_to_cpu(vhost_has_feature(vq, VIRTIO_F_VERSION_1), val);
+ return __virtio16_to_cpu(vhost_is_little_endian(vq), val);
}
static inline __virtio16 cpu_to_vhost16(struct vhost_virtqueue *vq, u16 val)
{
- return __cpu_to_virtio16(vhost_has_feature(vq, VIRTIO_F_VERSION_1), val);
+ return __cpu_to_virtio16(vhost_is_little_endian(vq), val);
}
static inline u32 vhost32_to_cpu(struct vhost_virtqueue *vq, __virtio32 val)
{
- return __virtio32_to_cpu(vhost_has_feature(vq, VIRTIO_F_VERSION_1), val);
+ return __virtio32_to_cpu(vhost_is_little_endian(vq), val);
}
static inline __virtio32 cpu_to_vhost32(struct vhost_virtqueue *vq, u32 val)
{
- return __cpu_to_virtio32(vhost_has_feature(vq, VIRTIO_F_VERSION_1), val);
+ return __cpu_to_virtio32(vhost_is_little_endian(vq), val);
}
static inline u64 vhost64_to_cpu(struct vhost_virtqueue *vq, __virtio64 val)
{
- return __virtio64_to_cpu(vhost_has_feature(vq, VIRTIO_F_VERSION_1), val);
+ return __virtio64_to_cpu(vhost_is_little_endian(vq), val);
}
static inline __virtio64 cpu_to_vhost64(struct vhost_virtqueue *vq, u64 val)
{
- return __cpu_to_virtio64(vhost_has_feature(vq, VIRTIO_F_VERSION_1), val);
+ return __cpu_to_virtio64(vhost_is_little_endian(vq), val);
}
#endif
Without this patch, packets are being silently dropped by the tap backend.
Signed-off-by: Greg Kurz <[email protected]>
---
drivers/vhost/net.c | 32 ++++++++++++++++++++++++++------
1 file changed, 26 insertions(+), 6 deletions(-)
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index afa06d2..2923eee 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -288,6 +288,16 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success)
rcu_read_unlock_bh();
}
+static void fix_virtio_net_hdr(struct vhost_virtqueue *vq)
+{
+ struct virtio_net_hdr *hdr = vq->iov[0].iov_base;
+
+ hdr->hdr_len = vhost16_to_cpu(vq, hdr->hdr_len);
+ hdr->gso_size = vhost16_to_cpu(vq, hdr->gso_size);
+ hdr->csum_start = vhost16_to_cpu(vq, hdr->csum_start);
+ hdr->csum_offset = vhost16_to_cpu(vq, hdr->csum_offset);
+}
+
/* Expects to be always run from workqueue - which acts as
* read-size critical section for our kind of RCU. */
static void handle_tx(struct vhost_net *net)
@@ -352,6 +362,10 @@ static void handle_tx(struct vhost_net *net)
"out %d, int %d\n", out, in);
break;
}
+
+ if (!hdr_size)
+ fix_virtio_net_hdr(vq);
+
/* Skip header. TODO: support TSO. */
len = iov_length(vq->iov, out);
iov_iter_init(&msg.msg_iter, WRITE, vq->iov, out, len);
@@ -609,12 +623,18 @@ static void handle_rx(struct vhost_net *net)
continue;
}
/* Supply virtio_net_hdr if VHOST_NET_F_VIRTIO_NET_HDR */
- if (unlikely(vhost_hlen) &&
- copy_to_iter(&hdr, sizeof(hdr), &fixup) != sizeof(hdr)) {
- vq_err(vq, "Unable to write vnet_hdr at addr %p\n",
- vq->iov->iov_base);
- break;
- }
+ if (unlikely(vhost_hlen)) {
+ size_t len = copy_to_iter(&hdr, sizeof(hdr), &fixup);
+
+ if (len != sizeof(hdr)) {
+ vq_err(vq,
+ "Unable to write vnet_hdr at addr %p\n",
+ vq->iov->iov_base);
+ break;
+ }
+ } else
+ fix_virtio_net_hdr(vq);
+
/* TODO: Should check and handle checksum. */
num_buffers = cpu_to_vhost16(vq, headcount);
On Fri, Feb 20, 2015 at 11:15:05AM +0100, Greg Kurz wrote:
> Without this patch, packets are being silently dropped by the tap backend.
>
> Signed-off-by: Greg Kurz <[email protected]>
I think it's the wrong place to fix this. You want a tun/macvtap ioctl
to enable legacy big endian stuff. Treat it same way as vhost, with a
config option to enable.
> ---
> drivers/vhost/net.c | 32 ++++++++++++++++++++++++++------
> 1 file changed, 26 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> index afa06d2..2923eee 100644
> --- a/drivers/vhost/net.c
> +++ b/drivers/vhost/net.c
> @@ -288,6 +288,16 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success)
> rcu_read_unlock_bh();
> }
>
> +static void fix_virtio_net_hdr(struct vhost_virtqueue *vq)
> +{
> + struct virtio_net_hdr *hdr = vq->iov[0].iov_base;
> +
> + hdr->hdr_len = vhost16_to_cpu(vq, hdr->hdr_len);
> + hdr->gso_size = vhost16_to_cpu(vq, hdr->gso_size);
> + hdr->csum_start = vhost16_to_cpu(vq, hdr->csum_start);
> + hdr->csum_offset = vhost16_to_cpu(vq, hdr->csum_offset);
> +}
> +
> /* Expects to be always run from workqueue - which acts as
> * read-size critical section for our kind of RCU. */
> static void handle_tx(struct vhost_net *net)
> @@ -352,6 +362,10 @@ static void handle_tx(struct vhost_net *net)
> "out %d, int %d\n", out, in);
> break;
> }
> +
> + if (!hdr_size)
> + fix_virtio_net_hdr(vq);
> +
> /* Skip header. TODO: support TSO. */
> len = iov_length(vq->iov, out);
> iov_iter_init(&msg.msg_iter, WRITE, vq->iov, out, len);
> @@ -609,12 +623,18 @@ static void handle_rx(struct vhost_net *net)
> continue;
> }
> /* Supply virtio_net_hdr if VHOST_NET_F_VIRTIO_NET_HDR */
> - if (unlikely(vhost_hlen) &&
> - copy_to_iter(&hdr, sizeof(hdr), &fixup) != sizeof(hdr)) {
> - vq_err(vq, "Unable to write vnet_hdr at addr %p\n",
> - vq->iov->iov_base);
> - break;
> - }
> + if (unlikely(vhost_hlen)) {
> + size_t len = copy_to_iter(&hdr, sizeof(hdr), &fixup);
> +
> + if (len != sizeof(hdr)) {
> + vq_err(vq,
> + "Unable to write vnet_hdr at addr %p\n",
> + vq->iov->iov_base);
> + break;
> + }
> + } else
> + fix_virtio_net_hdr(vq);
> +
> /* TODO: Should check and handle checksum. */
>
> num_buffers = cpu_to_vhost16(vq, headcount);
On Fri, Feb 20, 2015 at 11:14:47AM +0100, Greg Kurz wrote:
> Signed-off-by: Greg Kurz <[email protected]>
> ---
> drivers/vhost/vhost.h | 20 ++++++++++++++------
> 1 file changed, 14 insertions(+), 6 deletions(-)
>
> Michael,
>
> The vhost_is_little_endian() helper adds unconditionnal overhead to fixed
> endian architectures: that is all architectures except arm and ppc64. This
> was addressed in QEMU with a TARGET_IS_BIENDIAN macro. Please give an
> advice about how to address this in the vhost code.
>
> Thanks.
I suggest creating a config option for this, default to off.
When disabled the flag won't be set so userspace
can discover it's availability.
> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> index ce2c68e..21e7d6a 100644
> --- a/drivers/vhost/vhost.h
> +++ b/drivers/vhost/vhost.h
> @@ -176,34 +176,42 @@ static inline bool vhost_has_feature(struct vhost_virtqueue *vq, int bit)
> return vq->acked_features & (1ULL << bit);
> }
>
> +static inline bool vhost_is_little_endian(struct vhost_virtqueue *vq)
> +{
> + if (vhost_has_feature(vq, VIRTIO_F_VERSION_1))
> + return true;
> + else
> + return !vq->legacy_big_endian;
> +}
> +
> /* Memory accessors */
> static inline u16 vhost16_to_cpu(struct vhost_virtqueue *vq, __virtio16 val)
> {
> - return __virtio16_to_cpu(vhost_has_feature(vq, VIRTIO_F_VERSION_1), val);
> + return __virtio16_to_cpu(vhost_is_little_endian(vq), val);
> }
>
> static inline __virtio16 cpu_to_vhost16(struct vhost_virtqueue *vq, u16 val)
> {
> - return __cpu_to_virtio16(vhost_has_feature(vq, VIRTIO_F_VERSION_1), val);
> + return __cpu_to_virtio16(vhost_is_little_endian(vq), val);
> }
>
> static inline u32 vhost32_to_cpu(struct vhost_virtqueue *vq, __virtio32 val)
> {
> - return __virtio32_to_cpu(vhost_has_feature(vq, VIRTIO_F_VERSION_1), val);
> + return __virtio32_to_cpu(vhost_is_little_endian(vq), val);
> }
>
> static inline __virtio32 cpu_to_vhost32(struct vhost_virtqueue *vq, u32 val)
> {
> - return __cpu_to_virtio32(vhost_has_feature(vq, VIRTIO_F_VERSION_1), val);
> + return __cpu_to_virtio32(vhost_is_little_endian(vq), val);
> }
>
> static inline u64 vhost64_to_cpu(struct vhost_virtqueue *vq, __virtio64 val)
> {
> - return __virtio64_to_cpu(vhost_has_feature(vq, VIRTIO_F_VERSION_1), val);
> + return __virtio64_to_cpu(vhost_is_little_endian(vq), val);
> }
>
> static inline __virtio64 cpu_to_vhost64(struct vhost_virtqueue *vq, u64 val)
> {
> - return __cpu_to_virtio64(vhost_has_feature(vq, VIRTIO_F_VERSION_1), val);
> + return __cpu_to_virtio64(vhost_is_little_endian(vq), val);
> }
> #endif
On Fri, Feb 20, 2015 at 11:07:39AM +0100, Greg Kurz wrote:
> The VHOST_VRING_F_LEGACY_BIG_ENDIAN flag informs the kernel that the
> associated device is big endian. Of course, this only makes sense for
> legacy virtio devices since modern virtio devices are always little
> endian.
>
> It will be used by the vhost memory accessors to byteswap vring data when
> we have a legacy device, in case host and guest endianness differ.
>
> Signed-off-by: Greg Kurz <[email protected]>
> ---
> drivers/vhost/vhost.c | 6 +++++-
> drivers/vhost/vhost.h | 3 +++
> include/uapi/linux/vhost.h | 2 ++
> 3 files changed, 10 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index 2ee2826..dad3c37 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -199,6 +199,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
> vq->call = NULL;
> vq->log_ctx = NULL;
> vq->memory = NULL;
> + vq->legacy_big_endian = false;
> }
>
> static int vhost_worker(void *data)
> @@ -701,7 +702,8 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp)
> r = -EFAULT;
> break;
> }
> - if (a.flags & ~(0x1 << VHOST_VRING_F_LOG)) {
> + if (a.flags & ~(0x1 << VHOST_VRING_F_LOG|
> + 0x1 << VHOST_VRING_F_LEGACY_BIG_ENDIAN)) {
whitespace damage here
> r = -EOPNOTSUPP;
> break;
> }
need to also make sure LEGACY_BIG_ENDIAN isn't set with VERSION_1.
> @@ -751,6 +753,8 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp)
> vq->avail = (void __user *)(unsigned long)a.avail_user_addr;
> vq->log_addr = a.log_guest_addr;
> vq->used = (void __user *)(unsigned long)a.used_user_addr;
> + vq->legacy_big_endian =
> + !!(a.flags & (0x1 << VHOST_VRING_F_LEGACY_BIG_ENDIAN));
> break;
> case VHOST_SET_VRING_KICK:
> if (copy_from_user(&f, argp, sizeof f)) {
> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> index 8c1c792..ce2c68e 100644
> --- a/drivers/vhost/vhost.h
> +++ b/drivers/vhost/vhost.h
> @@ -106,6 +106,9 @@ struct vhost_virtqueue {
> /* Log write descriptors */
> void __user *log_base;
> struct vhost_log *log;
> +
> + /* We need to know the device endianness with legacy virtio. */
> + bool legacy_big_endian;
> };
>
> struct vhost_dev {
> diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h
> index bb6a5b4..0bf4491 100644
> --- a/include/uapi/linux/vhost.h
> +++ b/include/uapi/linux/vhost.h
> @@ -34,6 +34,8 @@ struct vhost_vring_addr {
> /* Flag values: */
> /* Whether log address is valid. If set enables logging. */
> #define VHOST_VRING_F_LOG 0
> + /* Whether we have a big-endian legacy virtio device. */
> +#define VHOST_VRING_F_LEGACY_BIG_ENDIAN 1
>
> /* Start of array of descriptors (virtually contiguous) */
> __u64 desc_user_addr;
On Fri, Feb 20, 2015 at 11:07:24AM +0100, Greg Kurz wrote:
> Hi,
>
> This patchset allows vhost_net to be used with legacy virtio
> when guest and host have a different endianness. It is based
> on previous work by C?dric Le Goater:
>
> https://www.mail-archive.com/[email protected]/msg09848.html
>
> As suggested by MST:
> - the API now asks for a specific format (big endian) instead of the hint
> whether byteswap is needed or not (patch 1)
> - rebased on top of the virtio-1 accessors (patch 2)
>
> Patch 3 is a separate fix: I think it is also valid for virtio-1.
I don't think so. See e.g. this code in tun:
gso.csum_offset = cpu_to_tun16(tun, skb->csum_offset);
looks like it has the correct endian-ness for virtio-1.
> Please comment.
>
> ---
>
> Greg Kurz (3):
> vhost: add VHOST_VRING_F_LEGACY_BIG_ENDIAN flag
> vhost: add support for legacy virtio
> vhost_net: fix virtio_net header endianness
>
>
> drivers/vhost/net.c | 32 ++++++++++++++++++++++++++------
> drivers/vhost/vhost.c | 6 +++++-
> drivers/vhost/vhost.h | 23 +++++++++++++++++------
> include/uapi/linux/vhost.h | 2 ++
> 4 files changed, 50 insertions(+), 13 deletions(-)
>
> --
> Greg
On Sun, 22 Feb 2015 10:53:51 +0100
"Michael S. Tsirkin" <[email protected]> wrote:
> On Fri, Feb 20, 2015 at 11:07:24AM +0100, Greg Kurz wrote:
> > Hi,
> >
> > This patchset allows vhost_net to be used with legacy virtio
> > when guest and host have a different endianness. It is based
> > on previous work by Cédric Le Goater:
> >
> > https://www.mail-archive.com/[email protected]/msg09848.html
> >
> > As suggested by MST:
> > - the API now asks for a specific format (big endian) instead of the hint
> > whether byteswap is needed or not (patch 1)
> > - rebased on top of the virtio-1 accessors (patch 2)
> >
> > Patch 3 is a separate fix: I think it is also valid for virtio-1.
>
> I don't think so. See e.g. this code in tun:
> gso.csum_offset = cpu_to_tun16(tun, skb->csum_offset);
> looks like it has the correct endian-ness for virtio-1.
>
>
Indeed. I will fix tun/macvtap as you suggested.
Thanks for the review.
--
Greg
>
> > Please comment.
> >
> > ---
> >
> > Greg Kurz (3):
> > vhost: add VHOST_VRING_F_LEGACY_BIG_ENDIAN flag
> > vhost: add support for legacy virtio
> > vhost_net: fix virtio_net header endianness
> >
> >
> > drivers/vhost/net.c | 32 ++++++++++++++++++++++++++------
> > drivers/vhost/vhost.c | 6 +++++-
> > drivers/vhost/vhost.h | 23 +++++++++++++++++------
> > include/uapi/linux/vhost.h | 2 ++
> > 4 files changed, 50 insertions(+), 13 deletions(-)
> >
> > --
> > Greg
>