Hi,
This patchset allows vhost to be used with legacy virtio when guest and host
have a different endianness.
Patch 7 got rewritten according to Cornelia's and Michael's comments. I have
also introduced patch 8 that brings BE vnet headers support to tun/macvtap.
This series is enough to have vhost_net working flawlessly. I could
succesfully reboot guests from ppc64 to ppc64le and vice-versa on ppc64
and ppc64le hosts.
---
Greg Kurz (8):
virtio: introduce virtio_is_little_endian() helper
tun: add tun_is_little_endian() helper
macvtap: introduce macvtap_is_little_endian() helper
vringh: introduce vringh_is_little_endian() helper
vhost: introduce vhost_is_little_endian() helper
virtio: add explicit big-endian support to memory accessors
vhost: feature to set the vring endianness
macvtap/tun: add VNET_BE flag
drivers/net/Kconfig | 12 ++++++
drivers/net/macvtap.c | 69 ++++++++++++++++++++++++++++++++++-
drivers/net/tun.c | 71 +++++++++++++++++++++++++++++++++++-
drivers/vhost/Kconfig | 10 +++++
drivers/vhost/vhost.c | 76 ++++++++++++++++++++++++++++++++++++++
drivers/vhost/vhost.h | 25 ++++++++++---
include/linux/virtio_byteorder.h | 24 +++++++-----
include/linux/virtio_config.h | 19 +++++++---
include/linux/vringh.h | 19 +++++++---
include/uapi/linux/if_tun.h | 2 +
include/uapi/linux/vhost.h | 9 +++++
11 files changed, 303 insertions(+), 33 deletions(-)
--
Greg
Signed-off-by: Greg Kurz <[email protected]>
---
include/linux/virtio_config.h | 17 +++++++++++------
1 file changed, 11 insertions(+), 6 deletions(-)
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index ca3ed78..bd1a582 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -205,35 +205,40 @@ int virtqueue_set_affinity(struct virtqueue *vq, int cpu)
return 0;
}
+static inline bool virtio_is_little_endian(struct virtio_device *vdev)
+{
+ return virtio_has_feature(vdev, VIRTIO_F_VERSION_1);
+}
+
/* Memory accessors */
static inline u16 virtio16_to_cpu(struct virtio_device *vdev, __virtio16 val)
{
- return __virtio16_to_cpu(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
+ return __virtio16_to_cpu(virtio_is_little_endian(vdev), val);
}
static inline __virtio16 cpu_to_virtio16(struct virtio_device *vdev, u16 val)
{
- return __cpu_to_virtio16(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
+ return __cpu_to_virtio16(virtio_is_little_endian(vdev), val);
}
static inline u32 virtio32_to_cpu(struct virtio_device *vdev, __virtio32 val)
{
- return __virtio32_to_cpu(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
+ return __virtio32_to_cpu(virtio_is_little_endian(vdev), val);
}
static inline __virtio32 cpu_to_virtio32(struct virtio_device *vdev, u32 val)
{
- return __cpu_to_virtio32(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
+ return __cpu_to_virtio32(virtio_is_little_endian(vdev), val);
}
static inline u64 virtio64_to_cpu(struct virtio_device *vdev, __virtio64 val)
{
- return __virtio64_to_cpu(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
+ return __virtio64_to_cpu(virtio_is_little_endian(vdev), val);
}
static inline __virtio64 cpu_to_virtio64(struct virtio_device *vdev, u64 val)
{
- return __cpu_to_virtio64(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
+ return __cpu_to_virtio64(virtio_is_little_endian(vdev), val);
}
/* Config space accessors. */
Signed-off-by: Greg Kurz <[email protected]>
---
drivers/net/tun.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 857dca4..3c3d6c0 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -206,14 +206,19 @@ struct tun_struct {
u32 flow_count;
};
+static inline bool tun_is_little_endian(struct tun_struct *tun)
+{
+ return tun->flags & TUN_VNET_LE;
+}
+
static inline u16 tun16_to_cpu(struct tun_struct *tun, __virtio16 val)
{
- return __virtio16_to_cpu(tun->flags & TUN_VNET_LE, val);
+ return __virtio16_to_cpu(tun_is_little_endian(tun), val);
}
static inline __virtio16 cpu_to_tun16(struct tun_struct *tun, u16 val)
{
- return __cpu_to_virtio16(tun->flags & TUN_VNET_LE, val);
+ return __cpu_to_virtio16(tun_is_little_endian(tun), val);
}
static inline u32 tun_hashfn(u32 rxhash)
Signed-off-by: Greg Kurz <[email protected]>
---
drivers/net/macvtap.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 27ecc5c..a2f2958 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -49,14 +49,19 @@ struct macvtap_queue {
#define MACVTAP_VNET_LE 0x80000000
+static inline bool macvtap_is_little_endian(struct macvtap_queue *q)
+{
+ return q->flags & MACVTAP_VNET_LE;
+}
+
static inline u16 macvtap16_to_cpu(struct macvtap_queue *q, __virtio16 val)
{
- return __virtio16_to_cpu(q->flags & MACVTAP_VNET_LE, val);
+ return __virtio16_to_cpu(macvtap_is_little_endian(q), val);
}
static inline __virtio16 cpu_to_macvtap16(struct macvtap_queue *q, u16 val)
{
- return __cpu_to_virtio16(q->flags & MACVTAP_VNET_LE, val);
+ return __cpu_to_virtio16(macvtap_is_little_endian(q), val);
}
static struct proto macvtap_proto = {
Signed-off-by: Greg Kurz <[email protected]>
---
include/linux/vringh.h | 17 +++++++++++------
1 file changed, 11 insertions(+), 6 deletions(-)
diff --git a/include/linux/vringh.h b/include/linux/vringh.h
index a3fa537..3ed62ef 100644
--- a/include/linux/vringh.h
+++ b/include/linux/vringh.h
@@ -226,33 +226,38 @@ static inline void vringh_notify(struct vringh *vrh)
vrh->notify(vrh);
}
+static inline bool vringh_is_little_endian(const struct vringh *vrh)
+{
+ return vrh->little_endian;
+}
+
static inline u16 vringh16_to_cpu(const struct vringh *vrh, __virtio16 val)
{
- return __virtio16_to_cpu(vrh->little_endian, val);
+ return __virtio16_to_cpu(vringh_is_little_endian(vrh), val);
}
static inline __virtio16 cpu_to_vringh16(const struct vringh *vrh, u16 val)
{
- return __cpu_to_virtio16(vrh->little_endian, val);
+ return __cpu_to_virtio16(vringh_is_little_endian(vrh), val);
}
static inline u32 vringh32_to_cpu(const struct vringh *vrh, __virtio32 val)
{
- return __virtio32_to_cpu(vrh->little_endian, val);
+ return __virtio32_to_cpu(vringh_is_little_endian(vrh), val);
}
static inline __virtio32 cpu_to_vringh32(const struct vringh *vrh, u32 val)
{
- return __cpu_to_virtio32(vrh->little_endian, val);
+ return __cpu_to_virtio32(vringh_is_little_endian(vrh), val);
}
static inline u64 vringh64_to_cpu(const struct vringh *vrh, __virtio64 val)
{
- return __virtio64_to_cpu(vrh->little_endian, val);
+ return __virtio64_to_cpu(vringh_is_little_endian(vrh), val);
}
static inline __virtio64 cpu_to_vringh64(const struct vringh *vrh, u64 val)
{
- return __cpu_to_virtio64(vrh->little_endian, val);
+ return __cpu_to_virtio64(vringh_is_little_endian(vrh), val);
}
#endif /* _LINUX_VRINGH_H */
Signed-off-by: Greg Kurz <[email protected]>
---
drivers/vhost/vhost.h | 17 +++++++++++------
1 file changed, 11 insertions(+), 6 deletions(-)
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 8c1c792..6a49960 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -173,34 +173,39 @@ static inline bool vhost_has_feature(struct vhost_virtqueue *vq, int bit)
return vq->acked_features & (1ULL << bit);
}
+static inline bool vhost_is_little_endian(struct vhost_virtqueue *vq)
+{
+ return vhost_has_feature(vq, VIRTIO_F_VERSION_1);
+}
+
/* Memory accessors */
static inline u16 vhost16_to_cpu(struct vhost_virtqueue *vq, __virtio16 val)
{
- return __virtio16_to_cpu(vhost_has_feature(vq, VIRTIO_F_VERSION_1), val);
+ return __virtio16_to_cpu(vhost_is_little_endian(vq), val);
}
static inline __virtio16 cpu_to_vhost16(struct vhost_virtqueue *vq, u16 val)
{
- return __cpu_to_virtio16(vhost_has_feature(vq, VIRTIO_F_VERSION_1), val);
+ return __cpu_to_virtio16(vhost_is_little_endian(vq), val);
}
static inline u32 vhost32_to_cpu(struct vhost_virtqueue *vq, __virtio32 val)
{
- return __virtio32_to_cpu(vhost_has_feature(vq, VIRTIO_F_VERSION_1), val);
+ return __virtio32_to_cpu(vhost_is_little_endian(vq), val);
}
static inline __virtio32 cpu_to_vhost32(struct vhost_virtqueue *vq, u32 val)
{
- return __cpu_to_virtio32(vhost_has_feature(vq, VIRTIO_F_VERSION_1), val);
+ return __cpu_to_virtio32(vhost_is_little_endian(vq), val);
}
static inline u64 vhost64_to_cpu(struct vhost_virtqueue *vq, __virtio64 val)
{
- return __virtio64_to_cpu(vhost_has_feature(vq, VIRTIO_F_VERSION_1), val);
+ return __virtio64_to_cpu(vhost_is_little_endian(vq), val);
}
static inline __virtio64 cpu_to_vhost64(struct vhost_virtqueue *vq, u64 val)
{
- return __cpu_to_virtio64(vhost_has_feature(vq, VIRTIO_F_VERSION_1), val);
+ return __cpu_to_virtio64(vhost_is_little_endian(vq), val);
}
#endif
The current memory accessors logic is:
- little endian if little_endian
- native endian (i.e. no byteswap) if !little_endian
If we want to fully support cross-endian vhost, we also need to be
able to convert to big endian.
Instead of changing the little_endian argument to some 3-value enum, this
patch changes the logic to:
- little endian if little_endian
- big endian if !little_endian
The native endian case is handled by all users with a trivial helper. This
patch doesn't change any functionality, nor it does add overhead.
Signed-off-by: Greg Kurz <[email protected]>
---
drivers/net/macvtap.c | 4 +++-
drivers/net/tun.c | 4 +++-
drivers/vhost/vhost.h | 4 +++-
include/linux/virtio_byteorder.h | 24 ++++++++++++++----------
include/linux/virtio_config.h | 4 +++-
include/linux/vringh.h | 4 +++-
6 files changed, 29 insertions(+), 15 deletions(-)
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index a2f2958..0a03a66 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -51,7 +51,9 @@ struct macvtap_queue {
static inline bool macvtap_is_little_endian(struct macvtap_queue *q)
{
- return q->flags & MACVTAP_VNET_LE;
+ if (q->flags & MACVTAP_VNET_LE)
+ return true;
+ return virtio_legacy_is_little_endian();
}
static inline u16 macvtap16_to_cpu(struct macvtap_queue *q, __virtio16 val)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 3c3d6c0..053f9b6 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -208,7 +208,9 @@ struct tun_struct {
static inline bool tun_is_little_endian(struct tun_struct *tun)
{
- return tun->flags & TUN_VNET_LE;
+ if (tun->flags & TUN_VNET_LE)
+ return true;
+ return virtio_legacy_is_little_endian();
}
static inline u16 tun16_to_cpu(struct tun_struct *tun, __virtio16 val)
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 6a49960..4e9a186 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -175,7 +175,9 @@ static inline bool vhost_has_feature(struct vhost_virtqueue *vq, int bit)
static inline bool vhost_is_little_endian(struct vhost_virtqueue *vq)
{
- return vhost_has_feature(vq, VIRTIO_F_VERSION_1);
+ if (vhost_has_feature(vq, VIRTIO_F_VERSION_1))
+ return true;
+ return virtio_legacy_is_little_endian();
}
/* Memory accessors */
diff --git a/include/linux/virtio_byteorder.h b/include/linux/virtio_byteorder.h
index 51865d0..ce63a2c 100644
--- a/include/linux/virtio_byteorder.h
+++ b/include/linux/virtio_byteorder.h
@@ -3,17 +3,21 @@
#include <linux/types.h>
#include <uapi/linux/virtio_types.h>
-/*
- * Low-level memory accessors for handling virtio in modern little endian and in
- * compatibility native endian format.
- */
+static inline bool virtio_legacy_is_little_endian(void)
+{
+#ifdef __LITTLE_ENDIAN
+ return true;
+#else
+ return false;
+#endif
+}
static inline u16 __virtio16_to_cpu(bool little_endian, __virtio16 val)
{
if (little_endian)
return le16_to_cpu((__force __le16)val);
else
- return (__force u16)val;
+ return be16_to_cpu((__force __be16)val);
}
static inline __virtio16 __cpu_to_virtio16(bool little_endian, u16 val)
@@ -21,7 +25,7 @@ static inline __virtio16 __cpu_to_virtio16(bool little_endian, u16 val)
if (little_endian)
return (__force __virtio16)cpu_to_le16(val);
else
- return (__force __virtio16)val;
+ return (__force __virtio16)cpu_to_be16(val);
}
static inline u32 __virtio32_to_cpu(bool little_endian, __virtio32 val)
@@ -29,7 +33,7 @@ static inline u32 __virtio32_to_cpu(bool little_endian, __virtio32 val)
if (little_endian)
return le32_to_cpu((__force __le32)val);
else
- return (__force u32)val;
+ return be32_to_cpu((__force __be32)val);
}
static inline __virtio32 __cpu_to_virtio32(bool little_endian, u32 val)
@@ -37,7 +41,7 @@ static inline __virtio32 __cpu_to_virtio32(bool little_endian, u32 val)
if (little_endian)
return (__force __virtio32)cpu_to_le32(val);
else
- return (__force __virtio32)val;
+ return (__force __virtio32)cpu_to_be32(val);
}
static inline u64 __virtio64_to_cpu(bool little_endian, __virtio64 val)
@@ -45,7 +49,7 @@ static inline u64 __virtio64_to_cpu(bool little_endian, __virtio64 val)
if (little_endian)
return le64_to_cpu((__force __le64)val);
else
- return (__force u64)val;
+ return be64_to_cpu((__force __be64)val);
}
static inline __virtio64 __cpu_to_virtio64(bool little_endian, u64 val)
@@ -53,7 +57,7 @@ static inline __virtio64 __cpu_to_virtio64(bool little_endian, u64 val)
if (little_endian)
return (__force __virtio64)cpu_to_le64(val);
else
- return (__force __virtio64)val;
+ return (__force __virtio64)cpu_to_be64(val);
}
#endif /* _LINUX_VIRTIO_BYTEORDER */
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index bd1a582..36a6daa 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -207,7 +207,9 @@ int virtqueue_set_affinity(struct virtqueue *vq, int cpu)
static inline bool virtio_is_little_endian(struct virtio_device *vdev)
{
- return virtio_has_feature(vdev, VIRTIO_F_VERSION_1);
+ if (virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
+ return true;
+ return virtio_legacy_is_little_endian();
}
/* Memory accessors */
diff --git a/include/linux/vringh.h b/include/linux/vringh.h
index 3ed62ef..d786c2d 100644
--- a/include/linux/vringh.h
+++ b/include/linux/vringh.h
@@ -228,7 +228,9 @@ static inline void vringh_notify(struct vringh *vrh)
static inline bool vringh_is_little_endian(const struct vringh *vrh)
{
- return vrh->little_endian;
+ if (vrh->little_endian)
+ return true;
+ return virtio_legacy_is_little_endian();
}
static inline u16 vringh16_to_cpu(const struct vringh *vrh, __virtio16 val)
This patch brings cross-endian support to vhost when used to implement
legacy virtio devices. Since it is a relatively rare situation, the
feature availability is controlled by a kernel config option (not set
by default).
The vq->is_le boolean field is added to cache the endianness to be
used for ring accesses. It defaults to native endian, as expected
by legacy virtio devices. When the ring gets active, we force little
endian if the device is modern. When the ring is deactivated, we
revert to the native endian default.
If cross-endian was compiled in, a vq->user_be boolean field is added
so that userspace may request a specific endianness. This field is
used to override the default when activating the ring of a legacy
device. It has no effect on modern devices.
Signed-off-by: Greg Kurz <[email protected]>
---
drivers/vhost/Kconfig | 10 ++++++
drivers/vhost/vhost.c | 76 +++++++++++++++++++++++++++++++++++++++++++-
drivers/vhost/vhost.h | 12 +++++--
include/uapi/linux/vhost.h | 9 +++++
4 files changed, 103 insertions(+), 4 deletions(-)
Changes since v3:
- VHOST_SET_VRING_ENDIAN_LEGACY ioctl renamed to VHOST_SET_VRING_BIG_ENDIAN
- ioctl API is now: 0 for le, 1 for be, other values are EINVAL
- ioctl doesn't filter out modern devices
- ioctl stubs return ENOIOCTLCMD
- forbid endianness changes when vring is active
- logic now handled with vq->is_le and vq->user_be according to device
start/stop as suggested by Michael
diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig
index 017a1e8..0aec88c 100644
--- a/drivers/vhost/Kconfig
+++ b/drivers/vhost/Kconfig
@@ -32,3 +32,13 @@ config VHOST
---help---
This option is selected by any driver which needs to access
the core of vhost.
+
+config VHOST_SET_ENDIAN_LEGACY
+ bool "Cross-endian support for host kernel accelerator"
+ default n
+ ---help---
+ This option allows vhost to support guests with a different byte
+ ordering from host. It is disabled by default since it adds overhead
+ and it is only needed by a few platforms (powerpc and arm).
+
+ If unsure, say "N".
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 2ee2826..3eb756b 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -199,6 +199,10 @@ static void vhost_vq_reset(struct vhost_dev *dev,
vq->call = NULL;
vq->log_ctx = NULL;
vq->memory = NULL;
+ vq->is_le = virtio_legacy_is_little_endian();
+#ifdef CONFIG_VHOST_SET_ENDIAN_LEGACY
+ vq->user_be = !vq->is_le;
+#endif
}
static int vhost_worker(void *data)
@@ -630,6 +634,53 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)
return 0;
}
+#ifdef CONFIG_VHOST_SET_ENDIAN_LEGACY
+static long vhost_set_vring_big_endian(struct vhost_virtqueue *vq,
+ int __user *argp)
+{
+ struct vhost_vring_state s;
+
+ if (vq->private_data)
+ return -EBUSY;
+
+ if (copy_from_user(&s, argp, sizeof(s)))
+ return -EFAULT;
+
+ if (s.num && s.num != 1)
+ return -EINVAL;
+
+ vq->user_be = s.num;
+
+ return 0;
+}
+
+static long vhost_get_vring_big_endian(struct vhost_virtqueue *vq, u32 idx,
+ int __user *argp)
+{
+ struct vhost_vring_state s = {
+ .index = idx,
+ .num = vq->user_be
+ };
+
+ if (copy_to_user(argp, &s, sizeof(s)))
+ return -EFAULT;
+
+ return 0;
+}
+#else
+static long vhost_set_vring_big_endian(struct vhost_virtqueue *vq,
+ int __user *argp)
+{
+ return -ENOIOCTLCMD;
+}
+
+static long vhost_get_vring_big_endian(struct vhost_virtqueue *vq, u32 idx,
+ int __user *argp)
+{
+ return -ENOIOCTLCMD;
+}
+#endif /* CONFIG_VHOST_SET_ENDIAN_LEGACY */
+
long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp)
{
struct file *eventfp, *filep = NULL;
@@ -806,6 +857,12 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp)
} else
filep = eventfp;
break;
+ case VHOST_SET_VRING_BIG_ENDIAN:
+ r = vhost_set_vring_big_endian(vq, argp);
+ break;
+ case VHOST_GET_VRING_BIG_ENDIAN:
+ r = vhost_get_vring_big_endian(vq, idx, argp);
+ break;
default:
r = -ENOIOCTLCMD;
}
@@ -1040,12 +1097,29 @@ static int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 avail_event)
return 0;
}
+#ifdef CONFIG_VHOST_SET_ENDIAN_LEGACY
+static void vhost_init_is_le(struct vhost_virtqueue *vq)
+{
+ vq->is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1) || !vq->user_be;
+}
+#else
+static void vhost_init_is_le(struct vhost_virtqueue *vq)
+{
+ if (vhost_has_feature(vq, VIRTIO_F_VERSION_1))
+ vq->is_le = true;
+}
+#endif
+
int vhost_init_used(struct vhost_virtqueue *vq)
{
__virtio16 last_used_idx;
int r;
- if (!vq->private_data)
+ if (!vq->private_data) {
+ vq->is_le = virtio_legacy_is_little_endian();
return 0;
+ }
+
+ vhost_init_is_le(vq);
r = vhost_update_used_flags(vq);
if (r)
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 4e9a186..04b2add 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -106,6 +106,14 @@ struct vhost_virtqueue {
/* Log write descriptors */
void __user *log_base;
struct vhost_log *log;
+
+ /* Ring endianness. Defaults to legacy native endianness.
+ * Set to true when starting a modern virtio device. */
+ bool is_le;
+#ifdef CONFIG_VHOST_SET_ENDIAN_LEGACY
+ /* Ring endianness requested by userspace for cross-endian support. */
+ bool user_be;
+#endif
};
struct vhost_dev {
@@ -175,9 +183,7 @@ static inline bool vhost_has_feature(struct vhost_virtqueue *vq, int bit)
static inline bool vhost_is_little_endian(struct vhost_virtqueue *vq)
{
- if (vhost_has_feature(vq, VIRTIO_F_VERSION_1))
- return true;
- return virtio_legacy_is_little_endian();
+ return vq->is_le;
}
/* Memory accessors */
diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h
index bb6a5b4..5cdebbc 100644
--- a/include/uapi/linux/vhost.h
+++ b/include/uapi/linux/vhost.h
@@ -103,6 +103,15 @@ struct vhost_memory {
/* Get accessor: reads index, writes value in num */
#define VHOST_GET_VRING_BASE _IOWR(VHOST_VIRTIO, 0x12, struct vhost_vring_state)
+/* Set the vring byte order in num. This is a legacy only API that is simply
+ * ignored when VIRTIO_F_VERSION_1 is set.
+ * 0 to set to little-endian
+ * 1 to set to big-endian
+ * other values return EINVAL.
+ */
+#define VHOST_SET_VRING_BIG_ENDIAN _IOW(VHOST_VIRTIO, 0x13, struct vhost_vring_state)
+#define VHOST_GET_VRING_BIG_ENDIAN _IOW(VHOST_VIRTIO, 0x14, struct vhost_vring_state)
+
/* The following ioctls use eventfd file descriptors to signal and poll
* for events. */
The VNET_LE flag was introduced to fix accesses to virtio 1.0 headers
that are always little-endian. It can also be used to handle the special
case of a legacy little-endian device implemented by a big-endian host.
Let's add a flag and ioctls for big-endian devices as well. If both flags
are set, little-endian wins.
Since this is isn't a common usecase, the feature is controlled by a kernel
config option (not set by default).
Both macvtap and tun are covered by this patch since they share the same
API with userland.
Signed-off-by: Greg Kurz <[email protected]>
---
drivers/net/Kconfig | 12 ++++++++
drivers/net/macvtap.c | 60 +++++++++++++++++++++++++++++++++++++++++-
drivers/net/tun.c | 62 ++++++++++++++++++++++++++++++++++++++++++-
include/uapi/linux/if_tun.h | 2 +
4 files changed, 134 insertions(+), 2 deletions(-)
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index df51d60..f0e23a0 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -244,6 +244,18 @@ config TUN
If you don't know what to use this for, you don't need it.
+config TUN_VNET_BE
+ bool "Support for big-endian vnet headers"
+ default n
+ ---help---
+ This option allows TUN/TAP and MACVTAP device drivers to parse
+ vnet headers that are in big-endian byte order. It is useful
+ when the headers come from a big-endian legacy virtio driver and
+ the host is little-endian.
+
+ Unless you have a little-endian system hosting a big-endian virtual
+ machine with a virtio NIC, you should say N.
+
config VETH
tristate "Virtual ethernet pair device"
---help---
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 0a03a66..e0ab1b7 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -48,12 +48,27 @@ struct macvtap_queue {
#define MACVTAP_FEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE)
#define MACVTAP_VNET_LE 0x80000000
+#define MACVTAP_VNET_BE 0x40000000
+
+#ifdef CONFIG_TUN_VNET_BE
+static inline bool macvtap_legacy_is_little_endian(struct macvtap_queue *q)
+{
+ if (q->flags & MACVTAP_VNET_BE)
+ return false;
+ return virtio_legacy_is_little_endian();
+}
+#else
+static inline bool macvtap_legacy_is_little_endian(struct macvtap_queue *q)
+{
+ return virtio_legacy_is_little_endian();
+}
+#endif
static inline bool macvtap_is_little_endian(struct macvtap_queue *q)
{
if (q->flags & MACVTAP_VNET_LE)
return true;
- return virtio_legacy_is_little_endian();
+ return macvtap_legacy_is_little_endian(q);
}
static inline u16 macvtap16_to_cpu(struct macvtap_queue *q, __virtio16 val)
@@ -1000,6 +1015,43 @@ static int set_offload(struct macvtap_queue *q, unsigned long arg)
return 0;
}
+#ifdef CONFIG_TUN_VNET_BE
+static long macvtap_get_vnet_be(struct macvtap_queue *q, int __user *sp)
+{
+ int s = !!(q->flags & MACVTAP_VNET_BE);
+
+ if (put_user(s, sp))
+ return -EFAULT;
+
+ return 0;
+}
+
+static long macvtap_set_vnet_be(struct macvtap_queue *q, int __user *sp)
+{
+ int s;
+
+ if (get_user(s, sp))
+ return -EFAULT;
+
+ if (s)
+ q->flags |= MACVTAP_VNET_BE;
+ else
+ q->flags &= ~MACVTAP_VNET_BE;
+
+ return 0;
+}
+#else
+static long macvtap_get_vnet_be(struct macvtap_queue *q, int __user *argp)
+{
+ return -EINVAL;
+}
+
+static long macvtap_set_vnet_be(struct macvtap_queue *q, int __user *argp)
+{
+ return -EINVAL;
+}
+#endif /* CONFIG_TUN_VNET_BE */
+
/*
* provide compatibility with generic tun/tap interface
*/
@@ -1097,6 +1149,12 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd,
q->flags &= ~MACVTAP_VNET_LE;
return 0;
+ case TUNGETVNETBE:
+ return macvtap_get_vnet_be(q, sp);
+
+ case TUNSETVNETBE:
+ return macvtap_set_vnet_be(q, sp);
+
case TUNSETOFFLOAD:
/* let the user check for future flags */
if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 |
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 053f9b6..4e12488 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -111,6 +111,7 @@ do { \
#define TUN_FASYNC IFF_ATTACH_QUEUE
/* High bits in flags field are unused. */
#define TUN_VNET_LE 0x80000000
+#define TUN_VNET_BE 0x40000000
#define TUN_FEATURES (IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR | \
IFF_MULTI_QUEUE)
@@ -206,11 +207,25 @@ struct tun_struct {
u32 flow_count;
};
+#ifdef CONFIG_TUN_VNET_BE
+static inline bool tun_legacy_is_little_endian(struct tun_struct *tun)
+{
+ if (tun->flags & TUN_VNET_BE)
+ return false;
+ return virtio_legacy_is_little_endian();
+}
+#else
+static inline bool tun_legacy_is_little_endian(struct tun_struct *tun)
+{
+ return virtio_legacy_is_little_endian();
+}
+#endif
+
static inline bool tun_is_little_endian(struct tun_struct *tun)
{
if (tun->flags & TUN_VNET_LE)
return true;
- return virtio_legacy_is_little_endian();
+ return tun_legacy_is_little_endian(tun);
}
static inline u16 tun16_to_cpu(struct tun_struct *tun, __virtio16 val)
@@ -1836,6 +1851,43 @@ unlock:
return ret;
}
+#ifdef CONFIG_TUN_VNET_BE
+static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp)
+{
+ int be = !!(tun->flags & TUN_VNET_BE);
+
+ if (put_user(be, argp))
+ return EFAULT;
+
+ return 0;
+}
+
+static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp)
+{
+ int be;
+
+ if (get_user(be, argp))
+ return -EFAULT;
+
+ if (be)
+ tun->flags |= TUN_VNET_BE;
+ else
+ tun->flags &= ~TUN_VNET_BE;
+
+ return 0;
+}
+#else
+static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp)
+{
+ return -EINVAL;
+}
+
+static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp)
+{
+ return -EINVAL;
+}
+#endif /* CONFIG_TUN_VNET_BE */
+
static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
unsigned long arg, int ifreq_len)
{
@@ -2065,6 +2117,14 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
tun->flags &= ~TUN_VNET_LE;
break;
+ case TUNGETVNETBE:
+ ret = tun_get_vnet_be(tun, argp);
+ break;
+
+ case TUNSETVNETBE:
+ ret = tun_set_vnet_be(tun, argp);
+ break;
+
case TUNATTACHFILTER:
/* Can be set only for TAPs */
ret = -EINVAL;
diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h
index 50ae243..bcac4c0 100644
--- a/include/uapi/linux/if_tun.h
+++ b/include/uapi/linux/if_tun.h
@@ -50,6 +50,8 @@
#define TUNGETFILTER _IOR('T', 219, struct sock_fprog)
#define TUNSETVNETLE _IOW('T', 220, int)
#define TUNGETVNETLE _IOR('T', 221, int)
+#define TUNSETVNETBE _IOW('T', 222, int)
+#define TUNGETVNETBE _IOR('T', 223, int)
/* TUNSETIFF ifr flags */
#define IFF_TUN 0x0001
On Fri, 10 Apr 2015 12:19:16 +0200
Greg Kurz <[email protected]> wrote:
> This patch brings cross-endian support to vhost when used to implement
> legacy virtio devices. Since it is a relatively rare situation, the
> feature availability is controlled by a kernel config option (not set
> by default).
>
> The vq->is_le boolean field is added to cache the endianness to be
> used for ring accesses. It defaults to native endian, as expected
> by legacy virtio devices. When the ring gets active, we force little
> endian if the device is modern. When the ring is deactivated, we
> revert to the native endian default.
>
> If cross-endian was compiled in, a vq->user_be boolean field is added
> so that userspace may request a specific endianness. This field is
> used to override the default when activating the ring of a legacy
> device. It has no effect on modern devices.
>
> Signed-off-by: Greg Kurz <[email protected]>
> ---
> drivers/vhost/Kconfig | 10 ++++++
> drivers/vhost/vhost.c | 76 +++++++++++++++++++++++++++++++++++++++++++-
> drivers/vhost/vhost.h | 12 +++++--
> include/uapi/linux/vhost.h | 9 +++++
> 4 files changed, 103 insertions(+), 4 deletions(-)
> +#ifdef CONFIG_VHOST_SET_ENDIAN_LEGACY
> +static long vhost_set_vring_big_endian(struct vhost_virtqueue *vq,
> + int __user *argp)
> +{
> + struct vhost_vring_state s;
> +
> + if (vq->private_data)
> + return -EBUSY;
> +
> + if (copy_from_user(&s, argp, sizeof(s)))
> + return -EFAULT;
> +
> + if (s.num && s.num != 1)
Checking for s.num > 1 might be more obvious at a glance?
> + return -EINVAL;
> +
> + vq->user_be = s.num;
> +
> + return 0;
> +}
> +
(...)
> +#ifdef CONFIG_VHOST_SET_ENDIAN_LEGACY
> +static void vhost_init_is_le(struct vhost_virtqueue *vq)
> +{
> + vq->is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1) || !vq->user_be;
So if the endianness is not explicitly set to BE, it will be forced to
LE (instead of native endian)? Won't that break userspace that does not
yet use the new interface when CONFIG_VHOST_SET_ENDIAN_LEGACY is set?
> +}
> +#else
> +static void vhost_init_is_le(struct vhost_virtqueue *vq)
> +{
> + if (vhost_has_feature(vq, VIRTIO_F_VERSION_1))
> + vq->is_le = true;
> +}
> +#endif
> +
> int vhost_init_used(struct vhost_virtqueue *vq)
> {
> __virtio16 last_used_idx;
> int r;
> - if (!vq->private_data)
> + if (!vq->private_data) {
> + vq->is_le = virtio_legacy_is_little_endian();
> return 0;
> + }
> +
> + vhost_init_is_le(vq);
>
> r = vhost_update_used_flags(vq);
> if (r)
On Tue, 14 Apr 2015 16:20:23 +0200
Cornelia Huck <[email protected]> wrote:
> On Fri, 10 Apr 2015 12:19:16 +0200
> Greg Kurz <[email protected]> wrote:
>
> > This patch brings cross-endian support to vhost when used to implement
> > legacy virtio devices. Since it is a relatively rare situation, the
> > feature availability is controlled by a kernel config option (not set
> > by default).
> >
> > The vq->is_le boolean field is added to cache the endianness to be
> > used for ring accesses. It defaults to native endian, as expected
> > by legacy virtio devices. When the ring gets active, we force little
> > endian if the device is modern. When the ring is deactivated, we
> > revert to the native endian default.
> >
> > If cross-endian was compiled in, a vq->user_be boolean field is added
> > so that userspace may request a specific endianness. This field is
> > used to override the default when activating the ring of a legacy
> > device. It has no effect on modern devices.
> >
> > Signed-off-by: Greg Kurz <[email protected]>
> > ---
> > drivers/vhost/Kconfig | 10 ++++++
> > drivers/vhost/vhost.c | 76 +++++++++++++++++++++++++++++++++++++++++++-
> > drivers/vhost/vhost.h | 12 +++++--
> > include/uapi/linux/vhost.h | 9 +++++
> > 4 files changed, 103 insertions(+), 4 deletions(-)
>
> > +#ifdef CONFIG_VHOST_SET_ENDIAN_LEGACY
> > +static long vhost_set_vring_big_endian(struct vhost_virtqueue *vq,
> > + int __user *argp)
> > +{
> > + struct vhost_vring_state s;
> > +
> > + if (vq->private_data)
> > + return -EBUSY;
> > +
> > + if (copy_from_user(&s, argp, sizeof(s)))
> > + return -EFAULT;
> > +
> > + if (s.num && s.num != 1)
>
> Checking for s.num > 1 might be more obvious at a glance?
>
Sure since s.num is unsigned.
> > + return -EINVAL;
> > +
> > + vq->user_be = s.num;
> > +
> > + return 0;
> > +}
> > +
>
> (...)
>
> > +#ifdef CONFIG_VHOST_SET_ENDIAN_LEGACY
> > +static void vhost_init_is_le(struct vhost_virtqueue *vq)
> > +{
> > + vq->is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1) || !vq->user_be;
>
> So if the endianness is not explicitly set to BE, it will be forced to
> LE (instead of native endian)? Won't that break userspace that does not
> yet use the new interface when CONFIG_VHOST_SET_ENDIAN_LEGACY is set?
>
If userspace doesn't use the new interface, then vq->user_be will retain its
default value that was set in vhost_vq_reset(), i.e. :
vq->user_be = !virtio_legacy_is_little_endian();
This means default is native endian.
What about adding this comment ?
static void vhost_init_is_le(struct vhost_virtqueue *vq)
{
/* Note for legacy virtio: user_be is initialized in vhost_vq_reset()
* according to the host endianness. If userspace does not set an
* explicit endianness, the default behavior is native endian, as
* expected by legacy virtio.
*/
vq->is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1) || !vq->user_be;
}
> > +}
> > +#else
> > +static void vhost_init_is_le(struct vhost_virtqueue *vq)
> > +{
> > + if (vhost_has_feature(vq, VIRTIO_F_VERSION_1))
> > + vq->is_le = true;
> > +}
> > +#endif
> > +
> > int vhost_init_used(struct vhost_virtqueue *vq)
> > {
> > __virtio16 last_used_idx;
> > int r;
> > - if (!vq->private_data)
> > + if (!vq->private_data) {
> > + vq->is_le = virtio_legacy_is_little_endian();
> > return 0;
> > + }
> > +
> > + vhost_init_is_le(vq);
> >
> > r = vhost_update_used_flags(vq);
> > if (r)
On Tue, 14 Apr 2015 17:13:52 +0200
Greg Kurz <[email protected]> wrote:
> On Tue, 14 Apr 2015 16:20:23 +0200
> Cornelia Huck <[email protected]> wrote:
>
> > On Fri, 10 Apr 2015 12:19:16 +0200
> > Greg Kurz <[email protected]> wrote:
> >
> > > +#ifdef CONFIG_VHOST_SET_ENDIAN_LEGACY
> > > +static void vhost_init_is_le(struct vhost_virtqueue *vq)
> > > +{
> > > + vq->is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1) || !vq->user_be;
> >
> > So if the endianness is not explicitly set to BE, it will be forced to
> > LE (instead of native endian)? Won't that break userspace that does not
> > yet use the new interface when CONFIG_VHOST_SET_ENDIAN_LEGACY is set?
> >
>
> If userspace doesn't use the new interface, then vq->user_be will retain its
> default value that was set in vhost_vq_reset(), i.e. :
>
> vq->user_be = !virtio_legacy_is_little_endian();
>
> This means default is native endian.
>
> What about adding this comment ?
>
> static void vhost_init_is_le(struct vhost_virtqueue *vq)
> {
> /* Note for legacy virtio: user_be is initialized in vhost_vq_reset()
> * according to the host endianness. If userspace does not set an
> * explicit endianness, the default behavior is native endian, as
> * expected by legacy virtio.
> */
Good idea, as this is easy to miss.
> vq->is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1) || !vq->user_be;
> }
>
> > > +}
> > > +#else
> > > +static void vhost_init_is_le(struct vhost_virtqueue *vq)
> > > +{
> > > + if (vhost_has_feature(vq, VIRTIO_F_VERSION_1))
> > > + vq->is_le = true;
> > > +}
> > > +#endif
> > > +
> > > int vhost_init_used(struct vhost_virtqueue *vq)
> > > {
> > > __virtio16 last_used_idx;
> > > int r;
> > > - if (!vq->private_data)
> > > + if (!vq->private_data) {
> > > + vq->is_le = virtio_legacy_is_little_endian();
> > > return 0;
> > > + }
> > > +
> > > + vhost_init_is_le(vq);
> > >
> > > r = vhost_update_used_flags(vq);
> > > if (r)
>
On Fri, 10 Apr 2015 12:15:00 +0200
Greg Kurz <[email protected]> wrote:
> Hi,
>
> This patchset allows vhost to be used with legacy virtio when guest and host
> have a different endianness.
>
> Patch 7 got rewritten according to Cornelia's and Michael's comments. I have
> also introduced patch 8 that brings BE vnet headers support to tun/macvtap.
>
> This series is enough to have vhost_net working flawlessly. I could
> succesfully reboot guests from ppc64 to ppc64le and vice-versa on ppc64
> and ppc64le hosts.
>
Michael,
I am ready to post v5 with the changes suggested by Cornelia... Do you have any
extra comments on this v4 ?
--
Greg
> ---
>
> Greg Kurz (8):
> virtio: introduce virtio_is_little_endian() helper
> tun: add tun_is_little_endian() helper
> macvtap: introduce macvtap_is_little_endian() helper
> vringh: introduce vringh_is_little_endian() helper
> vhost: introduce vhost_is_little_endian() helper
> virtio: add explicit big-endian support to memory accessors
> vhost: feature to set the vring endianness
> macvtap/tun: add VNET_BE flag
>
>
> drivers/net/Kconfig | 12 ++++++
> drivers/net/macvtap.c | 69 ++++++++++++++++++++++++++++++++++-
> drivers/net/tun.c | 71 +++++++++++++++++++++++++++++++++++-
> drivers/vhost/Kconfig | 10 +++++
> drivers/vhost/vhost.c | 76 ++++++++++++++++++++++++++++++++++++++
> drivers/vhost/vhost.h | 25 ++++++++++---
> include/linux/virtio_byteorder.h | 24 +++++++-----
> include/linux/virtio_config.h | 19 +++++++---
> include/linux/vringh.h | 19 +++++++---
> include/uapi/linux/if_tun.h | 2 +
> include/uapi/linux/vhost.h | 9 +++++
> 11 files changed, 303 insertions(+), 33 deletions(-)
>
> --
> Greg
>
> _______________________________________________
> Virtualization mailing list
> [email protected]
> https://lists.linuxfoundation.org/mailman/listinfo/virtualization
>
On Fri, 17 Apr 2015 11:18:13 +0200
Greg Kurz <[email protected]> wrote:
> On Fri, 10 Apr 2015 12:15:00 +0200
> Greg Kurz <[email protected]> wrote:
>
> > Hi,
> >
> > This patchset allows vhost to be used with legacy virtio when guest and host
> > have a different endianness.
> >
> > Patch 7 got rewritten according to Cornelia's and Michael's comments. I have
> > also introduced patch 8 that brings BE vnet headers support to tun/macvtap.
> >
> > This series is enough to have vhost_net working flawlessly. I could
> > succesfully reboot guests from ppc64 to ppc64le and vice-versa on ppc64
> > and ppc64le hosts.
> >
>
> Michael,
>
> I am ready to post v5 with the changes suggested by Cornelia... Do you have any
> extra comments on this v4 ?
>
Ping ?
> --
> Greg
>
> > ---
> >
> > Greg Kurz (8):
> > virtio: introduce virtio_is_little_endian() helper
> > tun: add tun_is_little_endian() helper
> > macvtap: introduce macvtap_is_little_endian() helper
> > vringh: introduce vringh_is_little_endian() helper
> > vhost: introduce vhost_is_little_endian() helper
> > virtio: add explicit big-endian support to memory accessors
> > vhost: feature to set the vring endianness
> > macvtap/tun: add VNET_BE flag
> >
> >
> > drivers/net/Kconfig | 12 ++++++
> > drivers/net/macvtap.c | 69 ++++++++++++++++++++++++++++++++++-
> > drivers/net/tun.c | 71 +++++++++++++++++++++++++++++++++++-
> > drivers/vhost/Kconfig | 10 +++++
> > drivers/vhost/vhost.c | 76 ++++++++++++++++++++++++++++++++++++++
> > drivers/vhost/vhost.h | 25 ++++++++++---
> > include/linux/virtio_byteorder.h | 24 +++++++-----
> > include/linux/virtio_config.h | 19 +++++++---
> > include/linux/vringh.h | 19 +++++++---
> > include/uapi/linux/if_tun.h | 2 +
> > include/uapi/linux/vhost.h | 9 +++++
> > 11 files changed, 303 insertions(+), 33 deletions(-)
> >
> > --
> > Greg
> >
> > _______________________________________________
> > Virtualization mailing list
> > [email protected]
> > https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> >
>
On Fri, Apr 10, 2015 at 12:19:16PM +0200, Greg Kurz wrote:
> This patch brings cross-endian support to vhost when used to implement
> legacy virtio devices. Since it is a relatively rare situation, the
> feature availability is controlled by a kernel config option (not set
> by default).
>
> The vq->is_le boolean field is added to cache the endianness to be
> used for ring accesses. It defaults to native endian, as expected
> by legacy virtio devices. When the ring gets active, we force little
> endian if the device is modern. When the ring is deactivated, we
> revert to the native endian default.
>
> If cross-endian was compiled in, a vq->user_be boolean field is added
> so that userspace may request a specific endianness. This field is
> used to override the default when activating the ring of a legacy
> device. It has no effect on modern devices.
>
> Signed-off-by: Greg Kurz <[email protected]>
> ---
> drivers/vhost/Kconfig | 10 ++++++
> drivers/vhost/vhost.c | 76 +++++++++++++++++++++++++++++++++++++++++++-
> drivers/vhost/vhost.h | 12 +++++--
> include/uapi/linux/vhost.h | 9 +++++
> 4 files changed, 103 insertions(+), 4 deletions(-)
>
> Changes since v3:
> - VHOST_SET_VRING_ENDIAN_LEGACY ioctl renamed to VHOST_SET_VRING_BIG_ENDIAN
> - ioctl API is now: 0 for le, 1 for be, other values are EINVAL
> - ioctl doesn't filter out modern devices
> - ioctl stubs return ENOIOCTLCMD
> - forbid endianness changes when vring is active
> - logic now handled with vq->is_le and vq->user_be according to device
> start/stop as suggested by Michael
>
> diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig
> index 017a1e8..0aec88c 100644
> --- a/drivers/vhost/Kconfig
> +++ b/drivers/vhost/Kconfig
> @@ -32,3 +32,13 @@ config VHOST
> ---help---
> This option is selected by any driver which needs to access
> the core of vhost.
> +
> +config VHOST_SET_ENDIAN_LEGACY
I'd prefer namin this VHOST_CROSS_ENDIAN_LEGACY
> + bool "Cross-endian support for host kernel accelerator"
> + default n
> + ---help---
> + This option allows vhost to support guests with a different byte
> + ordering from host. It is disabled by default since it adds overhead
> + and it is only needed by a few platforms (powerpc and arm).
and is only useful on a few platforms (powerpc and arm).
"it" seems to refer to "overhead", which is rarely needed.
needed is a bit too strong, you can always e.g. run virtio
in userspace.
> +
> + If unsure, say "N".
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index 2ee2826..3eb756b 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -199,6 +199,10 @@ static void vhost_vq_reset(struct vhost_dev *dev,
> vq->call = NULL;
> vq->log_ctx = NULL;
> vq->memory = NULL;
> + vq->is_le = virtio_legacy_is_little_endian();
> +#ifdef CONFIG_VHOST_SET_ENDIAN_LEGACY
> + vq->user_be = !vq->is_le;
> +#endif
add a wrapper for this too?
> }
>
> static int vhost_worker(void *data)
> @@ -630,6 +634,53 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)
> return 0;
> }
>
> +#ifdef CONFIG_VHOST_SET_ENDIAN_LEGACY
> +static long vhost_set_vring_big_endian(struct vhost_virtqueue *vq,
> + int __user *argp)
> +{
> + struct vhost_vring_state s;
> +
> + if (vq->private_data)
> + return -EBUSY;
> +
> + if (copy_from_user(&s, argp, sizeof(s)))
> + return -EFAULT;
> +
> + if (s.num && s.num != 1)
s.num & ~0x1
> + return -EINVAL;
> +
> + vq->user_be = s.num;
> +
> + return 0;
> +}
> +
> +static long vhost_get_vring_big_endian(struct vhost_virtqueue *vq, u32 idx,
> + int __user *argp)
> +{
> + struct vhost_vring_state s = {
> + .index = idx,
> + .num = vq->user_be
> + };
> +
> + if (copy_to_user(argp, &s, sizeof(s)))
> + return -EFAULT;
> +
> + return 0;
> +}
> +#else
> +static long vhost_set_vring_big_endian(struct vhost_virtqueue *vq,
> + int __user *argp)
> +{
> + return -ENOIOCTLCMD;
> +}
> +
> +static long vhost_get_vring_big_endian(struct vhost_virtqueue *vq, u32 idx,
> + int __user *argp)
> +{
> + return -ENOIOCTLCMD;
> +}
> +#endif /* CONFIG_VHOST_SET_ENDIAN_LEGACY */
> +
> long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp)
> {
> struct file *eventfp, *filep = NULL;
> @@ -806,6 +857,12 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp)
> } else
> filep = eventfp;
> break;
> + case VHOST_SET_VRING_BIG_ENDIAN:
> + r = vhost_set_vring_big_endian(vq, argp);
> + break;
> + case VHOST_GET_VRING_BIG_ENDIAN:
> + r = vhost_get_vring_big_endian(vq, idx, argp);
> + break;
> default:
> r = -ENOIOCTLCMD;
> }
> @@ -1040,12 +1097,29 @@ static int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 avail_event)
> return 0;
> }
>
> +#ifdef CONFIG_VHOST_SET_ENDIAN_LEGACY
> +static void vhost_init_is_le(struct vhost_virtqueue *vq)
> +{
> + vq->is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1) || !vq->user_be;
> +}
> +#else
> +static void vhost_init_is_le(struct vhost_virtqueue *vq)
> +{
> + if (vhost_has_feature(vq, VIRTIO_F_VERSION_1))
> + vq->is_le = true;
> +}
> +#endif
> +
I'd prefer localizing ifdefery somewhere near top of file.
> int vhost_init_used(struct vhost_virtqueue *vq)
> {
> __virtio16 last_used_idx;
> int r;
> - if (!vq->private_data)
> + if (!vq->private_data) {
> + vq->is_le = virtio_legacy_is_little_endian();
> return 0;
> + }
> +
> + vhost_init_is_le(vq);
>
> r = vhost_update_used_flags(vq);
> if (r)
> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> index 4e9a186..04b2add 100644
> --- a/drivers/vhost/vhost.h
> +++ b/drivers/vhost/vhost.h
> @@ -106,6 +106,14 @@ struct vhost_virtqueue {
> /* Log write descriptors */
> void __user *log_base;
> struct vhost_log *log;
> +
> + /* Ring endianness. Defaults to legacy native endianness.
> + * Set to true when starting a modern virtio device. */
> + bool is_le;
> +#ifdef CONFIG_VHOST_SET_ENDIAN_LEGACY
> + /* Ring endianness requested by userspace for cross-endian support. */
> + bool user_be;
> +#endif
> };
>
> struct vhost_dev {
> @@ -175,9 +183,7 @@ static inline bool vhost_has_feature(struct vhost_virtqueue *vq, int bit)
>
> static inline bool vhost_is_little_endian(struct vhost_virtqueue *vq)
> {
> - if (vhost_has_feature(vq, VIRTIO_F_VERSION_1))
> - return true;
> - return virtio_legacy_is_little_endian();
> + return vq->is_le;
> }
>
> /* Memory accessors */
> diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h
> index bb6a5b4..5cdebbc 100644
> --- a/include/uapi/linux/vhost.h
> +++ b/include/uapi/linux/vhost.h
> @@ -103,6 +103,15 @@ struct vhost_memory {
> /* Get accessor: reads index, writes value in num */
> #define VHOST_GET_VRING_BASE _IOWR(VHOST_VIRTIO, 0x12, struct vhost_vring_state)
>
> +/* Set the vring byte order in num. This is a legacy only API that is simply
> + * ignored when VIRTIO_F_VERSION_1 is set.
> + * 0 to set to little-endian
> + * 1 to set to big-endian
How about defines for these?
> + * other values return EINVAL.
> + */
> +#define VHOST_SET_VRING_BIG_ENDIAN _IOW(VHOST_VIRTIO, 0x13, struct vhost_vring_state)
> +#define VHOST_GET_VRING_BIG_ENDIAN _IOW(VHOST_VIRTIO, 0x14, struct vhost_vring_state)
> +
> /* The following ioctls use eventfd file descriptors to signal and poll
> * for events. */
>
On Fri, Apr 10, 2015 at 12:20:21PM +0200, Greg Kurz wrote:
> The VNET_LE flag was introduced to fix accesses to virtio 1.0 headers
> that are always little-endian. It can also be used to handle the special
> case of a legacy little-endian device implemented by a big-endian host.
>
> Let's add a flag and ioctls for big-endian devices as well. If both flags
> are set, little-endian wins.
>
> Since this is isn't a common usecase, the feature is controlled by a kernel
> config option (not set by default).
>
> Both macvtap and tun are covered by this patch since they share the same
> API with userland.
>
> Signed-off-by: Greg Kurz <[email protected]>
> ---
> drivers/net/Kconfig | 12 ++++++++
> drivers/net/macvtap.c | 60 +++++++++++++++++++++++++++++++++++++++++-
> drivers/net/tun.c | 62 ++++++++++++++++++++++++++++++++++++++++++-
> include/uapi/linux/if_tun.h | 2 +
> 4 files changed, 134 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
> index df51d60..f0e23a0 100644
> --- a/drivers/net/Kconfig
> +++ b/drivers/net/Kconfig
> @@ -244,6 +244,18 @@ config TUN
>
> If you don't know what to use this for, you don't need it.
>
> +config TUN_VNET_BE
> + bool "Support for big-endian vnet headers"
> + default n
> + ---help---
> + This option allows TUN/TAP and MACVTAP device drivers to parse
> + vnet headers that are in big-endian byte order. It is useful
> + when the headers come from a big-endian legacy virtio driver and
> + the host is little-endian.
> +
> + Unless you have a little-endian system hosting a big-endian virtual
> + machine with a virtio NIC, you should say N.
> +
should mention cross-endian, not big-endian, right?
> config VETH
> tristate "Virtual ethernet pair device"
> ---help---
> diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
> index 0a03a66..e0ab1b7 100644
> --- a/drivers/net/macvtap.c
> +++ b/drivers/net/macvtap.c
> @@ -48,12 +48,27 @@ struct macvtap_queue {
> #define MACVTAP_FEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE)
>
> #define MACVTAP_VNET_LE 0x80000000
> +#define MACVTAP_VNET_BE 0x40000000
> +
> +#ifdef CONFIG_TUN_VNET_BE
> +static inline bool macvtap_legacy_is_little_endian(struct macvtap_queue *q)
> +{
> + if (q->flags & MACVTAP_VNET_BE)
> + return false;
> + return virtio_legacy_is_little_endian();
> +}
> +#else
> +static inline bool macvtap_legacy_is_little_endian(struct macvtap_queue *q)
> +{
> + return virtio_legacy_is_little_endian();
> +}
> +#endif
>
> static inline bool macvtap_is_little_endian(struct macvtap_queue *q)
> {
> if (q->flags & MACVTAP_VNET_LE)
> return true;
> - return virtio_legacy_is_little_endian();
> + return macvtap_legacy_is_little_endian(q);
> }
>
> static inline u16 macvtap16_to_cpu(struct macvtap_queue *q, __virtio16 val)
> @@ -1000,6 +1015,43 @@ static int set_offload(struct macvtap_queue *q, unsigned long arg)
> return 0;
> }
>
> +#ifdef CONFIG_TUN_VNET_BE
> +static long macvtap_get_vnet_be(struct macvtap_queue *q, int __user *sp)
> +{
> + int s = !!(q->flags & MACVTAP_VNET_BE);
> +
> + if (put_user(s, sp))
> + return -EFAULT;
> +
> + return 0;
> +}
> +
> +static long macvtap_set_vnet_be(struct macvtap_queue *q, int __user *sp)
> +{
> + int s;
> +
> + if (get_user(s, sp))
> + return -EFAULT;
> +
> + if (s)
> + q->flags |= MACVTAP_VNET_BE;
> + else
> + q->flags &= ~MACVTAP_VNET_BE;
> +
> + return 0;
> +}
> +#else
> +static long macvtap_get_vnet_be(struct macvtap_queue *q, int __user *argp)
> +{
> + return -EINVAL;
> +}
> +
> +static long macvtap_set_vnet_be(struct macvtap_queue *q, int __user *argp)
> +{
> + return -EINVAL;
> +}
> +#endif /* CONFIG_TUN_VNET_BE */
> +
> /*
> * provide compatibility with generic tun/tap interface
> */
> @@ -1097,6 +1149,12 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd,
> q->flags &= ~MACVTAP_VNET_LE;
> return 0;
>
> + case TUNGETVNETBE:
> + return macvtap_get_vnet_be(q, sp);
> +
> + case TUNSETVNETBE:
> + return macvtap_set_vnet_be(q, sp);
> +
> case TUNSETOFFLOAD:
> /* let the user check for future flags */
> if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 |
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index 053f9b6..4e12488 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -111,6 +111,7 @@ do { \
> #define TUN_FASYNC IFF_ATTACH_QUEUE
> /* High bits in flags field are unused. */
> #define TUN_VNET_LE 0x80000000
> +#define TUN_VNET_BE 0x40000000
>
> #define TUN_FEATURES (IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR | \
> IFF_MULTI_QUEUE)
> @@ -206,11 +207,25 @@ struct tun_struct {
> u32 flow_count;
> };
>
> +#ifdef CONFIG_TUN_VNET_BE
> +static inline bool tun_legacy_is_little_endian(struct tun_struct *tun)
> +{
> + if (tun->flags & TUN_VNET_BE)
> + return false;
> + return virtio_legacy_is_little_endian();
> +}
> +#else
> +static inline bool tun_legacy_is_little_endian(struct tun_struct *tun)
> +{
> + return virtio_legacy_is_little_endian();
> +}
> +#endif
> +
> static inline bool tun_is_little_endian(struct tun_struct *tun)
> {
> if (tun->flags & TUN_VNET_LE)
> return true;
> - return virtio_legacy_is_little_endian();
> + return tun_legacy_is_little_endian(tun);
> }
>
> static inline u16 tun16_to_cpu(struct tun_struct *tun, __virtio16 val)
> @@ -1836,6 +1851,43 @@ unlock:
> return ret;
> }
>
> +#ifdef CONFIG_TUN_VNET_BE
> +static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp)
> +{
> + int be = !!(tun->flags & TUN_VNET_BE);
> +
> + if (put_user(be, argp))
> + return EFAULT;
> +
> + return 0;
> +}
> +
> +static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp)
> +{
> + int be;
> +
> + if (get_user(be, argp))
> + return -EFAULT;
> +
> + if (be)
> + tun->flags |= TUN_VNET_BE;
> + else
> + tun->flags &= ~TUN_VNET_BE;
> +
> + return 0;
> +}
> +#else
> +static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp)
> +{
> + return -EINVAL;
> +}
> +
> +static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp)
> +{
> + return -EINVAL;
> +}
> +#endif /* CONFIG_TUN_VNET_BE */
> +
> static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
> unsigned long arg, int ifreq_len)
> {
> @@ -2065,6 +2117,14 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
> tun->flags &= ~TUN_VNET_LE;
> break;
>
> + case TUNGETVNETBE:
> + ret = tun_get_vnet_be(tun, argp);
> + break;
> +
> + case TUNSETVNETBE:
> + ret = tun_set_vnet_be(tun, argp);
> + break;
> +
> case TUNATTACHFILTER:
> /* Can be set only for TAPs */
> ret = -EINVAL;
> diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h
> index 50ae243..bcac4c0 100644
> --- a/include/uapi/linux/if_tun.h
> +++ b/include/uapi/linux/if_tun.h
> @@ -50,6 +50,8 @@
> #define TUNGETFILTER _IOR('T', 219, struct sock_fprog)
> #define TUNSETVNETLE _IOW('T', 220, int)
> #define TUNGETVNETLE _IOR('T', 221, int)
> +#define TUNSETVNETBE _IOW('T', 222, int)
> +#define TUNGETVNETBE _IOR('T', 223, int)
>
> /* TUNSETIFF ifr flags */
> #define IFF_TUN 0x0001
On Fri, Apr 10, 2015 at 12:16:20PM +0200, Greg Kurz wrote:
> The current memory accessors logic is:
> - little endian if little_endian
> - native endian (i.e. no byteswap) if !little_endian
>
> If we want to fully support cross-endian vhost, we also need to be
> able to convert to big endian.
>
> Instead of changing the little_endian argument to some 3-value enum, this
> patch changes the logic to:
> - little endian if little_endian
> - big endian if !little_endian
>
> The native endian case is handled by all users with a trivial helper. This
> patch doesn't change any functionality, nor it does add overhead.
>
> Signed-off-by: Greg Kurz <[email protected]>
> ---
OK overall. Style comment:
> drivers/net/macvtap.c | 4 +++-
> drivers/net/tun.c | 4 +++-
> drivers/vhost/vhost.h | 4 +++-
> include/linux/virtio_byteorder.h | 24 ++++++++++++++----------
> include/linux/virtio_config.h | 4 +++-
> include/linux/vringh.h | 4 +++-
> 6 files changed, 29 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
> index a2f2958..0a03a66 100644
> --- a/drivers/net/macvtap.c
> +++ b/drivers/net/macvtap.c
> @@ -51,7 +51,9 @@ struct macvtap_queue {
>
> static inline bool macvtap_is_little_endian(struct macvtap_queue *q)
> {
> - return q->flags & MACVTAP_VNET_LE;
> + if (q->flags & MACVTAP_VNET_LE)
> + return true;
> + return virtio_legacy_is_little_endian();
> }
>
I'd prefer a bit more symmetry:
+ if (q->flags & MACVTAP_VNET_LE)
+ return true;
+ else
+ return virtio_legacy_is_little_endian();
Or better just:
return (q->flags & MACVTAP_VNET_LE) ? true : virtio_legacy_is_little_endian();
might make line long, but your follow-up patch makes it short again,
so that's ok.
> static inline u16 macvtap16_to_cpu(struct macvtap_queue *q, __virtio16 val)
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index 3c3d6c0..053f9b6 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -208,7 +208,9 @@ struct tun_struct {
>
> static inline bool tun_is_little_endian(struct tun_struct *tun)
> {
> - return tun->flags & TUN_VNET_LE;
> + if (tun->flags & TUN_VNET_LE)
> + return true;
> + return virtio_legacy_is_little_endian();
> }
>
> static inline u16 tun16_to_cpu(struct tun_struct *tun, __virtio16 val)
> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> index 6a49960..4e9a186 100644
> --- a/drivers/vhost/vhost.h
> +++ b/drivers/vhost/vhost.h
> @@ -175,7 +175,9 @@ static inline bool vhost_has_feature(struct vhost_virtqueue *vq, int bit)
>
> static inline bool vhost_is_little_endian(struct vhost_virtqueue *vq)
> {
> - return vhost_has_feature(vq, VIRTIO_F_VERSION_1);
> + if (vhost_has_feature(vq, VIRTIO_F_VERSION_1))
> + return true;
> + return virtio_legacy_is_little_endian();
> }
>
> /* Memory accessors */
> diff --git a/include/linux/virtio_byteorder.h b/include/linux/virtio_byteorder.h
> index 51865d0..ce63a2c 100644
> --- a/include/linux/virtio_byteorder.h
> +++ b/include/linux/virtio_byteorder.h
> @@ -3,17 +3,21 @@
> #include <linux/types.h>
> #include <uapi/linux/virtio_types.h>
>
> -/*
> - * Low-level memory accessors for handling virtio in modern little endian and in
> - * compatibility native endian format.
> - */
> +static inline bool virtio_legacy_is_little_endian(void)
> +{
> +#ifdef __LITTLE_ENDIAN
> + return true;
> +#else
> + return false;
> +#endif
> +}
>
> static inline u16 __virtio16_to_cpu(bool little_endian, __virtio16 val)
> {
> if (little_endian)
> return le16_to_cpu((__force __le16)val);
> else
> - return (__force u16)val;
> + return be16_to_cpu((__force __be16)val);
> }
>
> static inline __virtio16 __cpu_to_virtio16(bool little_endian, u16 val)
> @@ -21,7 +25,7 @@ static inline __virtio16 __cpu_to_virtio16(bool little_endian, u16 val)
> if (little_endian)
> return (__force __virtio16)cpu_to_le16(val);
> else
> - return (__force __virtio16)val;
> + return (__force __virtio16)cpu_to_be16(val);
> }
>
> static inline u32 __virtio32_to_cpu(bool little_endian, __virtio32 val)
> @@ -29,7 +33,7 @@ static inline u32 __virtio32_to_cpu(bool little_endian, __virtio32 val)
> if (little_endian)
> return le32_to_cpu((__force __le32)val);
> else
> - return (__force u32)val;
> + return be32_to_cpu((__force __be32)val);
> }
>
> static inline __virtio32 __cpu_to_virtio32(bool little_endian, u32 val)
> @@ -37,7 +41,7 @@ static inline __virtio32 __cpu_to_virtio32(bool little_endian, u32 val)
> if (little_endian)
> return (__force __virtio32)cpu_to_le32(val);
> else
> - return (__force __virtio32)val;
> + return (__force __virtio32)cpu_to_be32(val);
> }
>
> static inline u64 __virtio64_to_cpu(bool little_endian, __virtio64 val)
> @@ -45,7 +49,7 @@ static inline u64 __virtio64_to_cpu(bool little_endian, __virtio64 val)
> if (little_endian)
> return le64_to_cpu((__force __le64)val);
> else
> - return (__force u64)val;
> + return be64_to_cpu((__force __be64)val);
> }
>
> static inline __virtio64 __cpu_to_virtio64(bool little_endian, u64 val)
> @@ -53,7 +57,7 @@ static inline __virtio64 __cpu_to_virtio64(bool little_endian, u64 val)
> if (little_endian)
> return (__force __virtio64)cpu_to_le64(val);
> else
> - return (__force __virtio64)val;
> + return (__force __virtio64)cpu_to_be64(val);
> }
>
> #endif /* _LINUX_VIRTIO_BYTEORDER */
> diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
> index bd1a582..36a6daa 100644
> --- a/include/linux/virtio_config.h
> +++ b/include/linux/virtio_config.h
> @@ -207,7 +207,9 @@ int virtqueue_set_affinity(struct virtqueue *vq, int cpu)
>
> static inline bool virtio_is_little_endian(struct virtio_device *vdev)
> {
> - return virtio_has_feature(vdev, VIRTIO_F_VERSION_1);
> + if (virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
> + return true;
> + return virtio_legacy_is_little_endian();
> }
>
> /* Memory accessors */
> diff --git a/include/linux/vringh.h b/include/linux/vringh.h
> index 3ed62ef..d786c2d 100644
> --- a/include/linux/vringh.h
> +++ b/include/linux/vringh.h
> @@ -228,7 +228,9 @@ static inline void vringh_notify(struct vringh *vrh)
>
> static inline bool vringh_is_little_endian(const struct vringh *vrh)
> {
> - return vrh->little_endian;
> + if (vrh->little_endian)
> + return true;
> + return virtio_legacy_is_little_endian();
> }
>
> static inline u16 vringh16_to_cpu(const struct vringh *vrh, __virtio16 val)
On Fri, Apr 10, 2015 at 12:15:00PM +0200, Greg Kurz wrote:
> Hi,
>
> This patchset allows vhost to be used with legacy virtio when guest and host
> have a different endianness.
>
> Patch 7 got rewritten according to Cornelia's and Michael's comments. I have
> also introduced patch 8 that brings BE vnet headers support to tun/macvtap.
>
> This series is enough to have vhost_net working flawlessly. I could
> succesfully reboot guests from ppc64 to ppc64le and vice-versa on ppc64
> and ppc64le hosts.
Looks good overall.
A couple of style comments.
Thanks!
> ---
>
> Greg Kurz (8):
> virtio: introduce virtio_is_little_endian() helper
> tun: add tun_is_little_endian() helper
> macvtap: introduce macvtap_is_little_endian() helper
> vringh: introduce vringh_is_little_endian() helper
> vhost: introduce vhost_is_little_endian() helper
> virtio: add explicit big-endian support to memory accessors
> vhost: feature to set the vring endianness
> macvtap/tun: add VNET_BE flag
>
>
> drivers/net/Kconfig | 12 ++++++
> drivers/net/macvtap.c | 69 ++++++++++++++++++++++++++++++++++-
> drivers/net/tun.c | 71 +++++++++++++++++++++++++++++++++++-
> drivers/vhost/Kconfig | 10 +++++
> drivers/vhost/vhost.c | 76 ++++++++++++++++++++++++++++++++++++++
> drivers/vhost/vhost.h | 25 ++++++++++---
> include/linux/virtio_byteorder.h | 24 +++++++-----
> include/linux/virtio_config.h | 19 +++++++---
> include/linux/vringh.h | 19 +++++++---
> include/uapi/linux/if_tun.h | 2 +
> include/uapi/linux/vhost.h | 9 +++++
> 11 files changed, 303 insertions(+), 33 deletions(-)
>
> --
> Greg
On Tue, 21 Apr 2015 16:04:23 +0200
"Michael S. Tsirkin" <[email protected]> wrote:
> On Fri, Apr 10, 2015 at 12:19:16PM +0200, Greg Kurz wrote:
> > This patch brings cross-endian support to vhost when used to implement
> > legacy virtio devices. Since it is a relatively rare situation, the
> > feature availability is controlled by a kernel config option (not set
> > by default).
> >
> > The vq->is_le boolean field is added to cache the endianness to be
> > used for ring accesses. It defaults to native endian, as expected
> > by legacy virtio devices. When the ring gets active, we force little
> > endian if the device is modern. When the ring is deactivated, we
> > revert to the native endian default.
> >
> > If cross-endian was compiled in, a vq->user_be boolean field is added
> > so that userspace may request a specific endianness. This field is
> > used to override the default when activating the ring of a legacy
> > device. It has no effect on modern devices.
> >
> > Signed-off-by: Greg Kurz <[email protected]>
> > ---
> > drivers/vhost/Kconfig | 10 ++++++the
> > drivers/vhost/vhost.c | 76 +++++++++++++++++++++++++++++++++++++++++++-
> > drivers/vhost/vhost.h | 12 +++++--
> > include/uapi/linux/vhost.h | 9 +++++
> > 4 files changed, 103 insertions(+), 4 deletions(-)
> >
> > Changes since v3:
> > - VHOST_SET_VRING_ENDIAN_LEGACY ioctl renamed to VHOST_SET_VRING_BIG_ENDIAN
> > - ioctl API is now: 0 for le, 1 for be, other values are EINVAL
> > - ioctl doesn't filter out modern devices
> > - ioctl stubs return ENOIOCTLCMD
> > - forbid endianness changes when vring is active
> > - logic now handled with vq->is_le and vq->user_be according to device
> > start/stop as suggested by Michael
> >
> > diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig
> > index 017a1e8..0aec88c 100644
> > --- a/drivers/vhost/Kconfig
> > +++ b/drivers/vhost/Kconfig
> > @@ -32,3 +32,13 @@ config VHOST
> > ---help---
> > This option is selected by any driver which needs to access
> > the core of vhost.
> > +
> > +config VHOST_SET_ENDIAN_LEGACY
>
> I'd prefer namin this VHOST_CROSS_ENDIAN_LEGACY
>
Yes, makes sense. I'll make sure most of the cross-endian changes are
easy to grep.
> > + bool "Cross-endian support for host kernel accelerator"
> > + default n
> > + ---help---
> > + This option allows vhost to support guests with a different byte
> > + ordering from host. It is disabled by default since it adds overhead
> > + and it is only needed by a few platforms (powerpc and arm).
>
> and is only useful on a few platforms (powerpc and arm).
>
> "it" seems to refer to "overhead", which is rarely needed.
> needed is a bit too strong, you can always e.g. run virtio
> in userspace.
>
My poor English again... it seems you understood what I wanted to
write though :)
> > +
> > + If unsure, say "N".
> > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> > index 2ee2826..3eb756b 100644the
> > --- a/drivers/vhost/vhost.c
> > +++ b/drivers/vhost/vhost.c
> > @@ -199,6 +199,10 @@ static void vhost_vq_reset(struct vhost_dev *dev,
> > vq->call = NULL;
> > vq->log_ctx = NULL;
> > vq->memory = NULL;
> > + vq->is_le = virtio_legacy_is_little_endian();
> > +#ifdef CONFIG_VHOST_SET_ENDIAN_LEGACY
> > + vq->user_be = !vq->is_le;
> > +#endif
>
> add a wrapper for this too?
>
Will do.
> > }
> >
> > static int vhost_worker(void *data)
> > @@ -630,6 +634,53 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)
> > return 0;
> > }
> >
> > +#ifdef CONFIG_VHOST_SET_ENDIAN_LEGACY
> > +static long vhost_set_vring_big_endian(struct vhost_virtqueue *vq,
> > + int __user *argp)
> > +{
> > + struct vhost_vring_state s;
> > +
> > + if (vq->private_data)
> > + return -EBUSY;
> > +
> > + if (copy_from_user(&s, argp, sizeof(s)))
> > + return -EFAULT;
> > +
> > + if (s.num && s.num != 1)
>
> s.num & ~0x1
>
Since s.num is unsigned and I assume this won't change, what about
s.num > 1 as suggested by Cornelia ?
>
> > + return -EINVAL;
> > +
> > + vq->user_be = s.num;
> > +
> > + return 0;
> > +}
> > +
> > +static long vhost_get_vring_big_endian(struct vhost_virtqueue *vq, u32 idx,
> > + int __user *argp)
> > +{
> > + struct vhost_vring_state s = {
> > + .index = idx,
> > + .num = vq->user_be
> > + };
> > +
> > + if (copy_to_user(argp, &s, sizeof(s)))
> > + return -EFAULT;
> > +
> > + return 0;
> > +}
> > +#else
> > +static long vhost_set_vring_big_endian(struct vhost_virtqueue *vq,
> > + int __user *argp)
> > +{
> > + return -ENOIOCTLCMD;
> > +}
> > +
> > +static long vhost_get_vring_big_endian(struct vhost_virtqueue *vq, u32 idx,
> > + int __user *argp)
> > +{
> > + return -ENOIOCTLCMD;
> > +}
> > +#endif /* CONFIG_VHOST_SET_ENDIAN_LEGACY */
> > +
> > long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp)
> > {
> > struct file *eventfp, *filep = NULL;
> > @@ -806,6 +857,12 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp)
> > } else
> > filep = eventfp;the
> > break;
> > + case VHOST_SET_VRING_BIG_ENDIAN:
> > + r = vhost_set_vring_big_endian(vq, argp);
> > + break;
> > + case VHOST_GET_VRING_BIG_ENDIAN:
> > + r = vhost_get_vring_big_endian(vq, idx, argp);
> > + break;
> > default:
> > r = -ENOIOCTLCMD;
> > }
> > @@ -1040,12 +1097,29 @@ static int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 avail_event)
> > return 0;
> > }
> >
> > +#ifdef CONFIG_VHOST_SET_ENDIAN_LEGACY
> > +static void vhost_init_is_le(struct vhost_virtqueue *vq)
> > +{
> > + vq->is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1) || !vq->user_be;
> > +}
> > +#else
> > +static void vhost_init_is_le(struct vhost_virtqueue *vq)
> > +{
> > + if (vhost_has_feature(vq, VIRTIO_F_VERSION_1))
> > + vq->is_le = true;
> > +}
> > +#endif
> > +
>
> I'd prefer localizing ifdefery somewhere near top of file.
>
Will do.
> > int vhost_init_used(struct vhost_virtqueue *vq)
> > {
> > __virtio16 last_used_idx;
> > int r;
> > - if (!vq->private_data)
> > + if (!vq->private_data) {
> > + vq->is_le = virtio_legacy_is_little_endian();
> > return 0;
> > + }
> > +
> > + vhost_init_is_le(vq);
> >
> > r = vhost_update_used_flags(vq);
> > if (r)
> > diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> > index 4e9a186..04b2add 100644
> > --- a/drivers/vhost/vhost.h
> > +++ b/drivers/vhost/vhost.h
> > @@ -106,6 +106,14 @@ struct vhost_virtqueue {
> > /* Log write descriptors */
> > void __user *log_base;
> > struct vhost_log *log;
> > +
> > + /* Ring endianness. Defaults to legacy native endianness.
> > + * Set to true when starting a modern virtio device. */
> > + bool is_le;
> > +#ifdef CONFIG_VHOST_SET_ENDIAN_LEGACY
> > + /* Ring endianness requested by userspace for cross-endian support. */
> > + bool user_be;
> > +#endif
> > };
> >
> > struct vhost_dev {
> > @@ -175,9 +183,7 @@ static inline bool vhost_has_feature(struct vhost_virtqueue *vq, int bit)
> >
> > static inline bool vhost_is_little_endian(struct vhost_virtqueue *vq)
> > {
> > - if (vhost_has_feature(vq, VIRTIO_F_VERSION_1))
> > - return true;
> > - return virtio_legacy_is_little_endian();
> > + return vq->is_le;
> > }
> >
> > /* Memory accessors */
> > diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h
> > index bb6a5b4..5cdebbc 100644
> > --- a/include/uapi/linux/vhost.h
> > +++ b/include/uapi/linux/vhost.h
> > @@ -103,6 +103,15 @@ struct vhost_memory {
> > /* Get accessor: reads index, writes value in num */
> > #define VHOST_GET_VRING_BASE _IOWR(VHOST_VIRTIO, 0x12, struct vhost_vring_state)
> >
> > +/* Set the vring byte order in num. This is a legacy only API that is simply
> > + * ignored when VIRTIO_F_VERSION_1 is set.
> > + * 0 to set to little-endian
> > + * 1 to set to big-endian
>
> How about defines for these?
>
Ok. I'll put the defines here so that all the cross-endian stuff
lies in the same hunk. Is it ok for you ?
> > + * other values return EINVAL.
> > + */
> > +#define VHOST_SET_VRING_BIG_ENDIAN _IOW(VHOST_VIRTIO, 0x13, struct vhost_vring_state)
> > +#define VHOST_GET_VRING_BIG_ENDIAN _IOW(VHOST_VIRTIO, 0x14, struct vhost_vring_state)
> > +
> > /* The following ioctls use eventfd file descriptors to signal and poll
> > * for events. */
> >
>
On Tue, 21 Apr 2015 16:06:33 +0200
"Michael S. Tsirkin" <[email protected]> wrote:
> On Fri, Apr 10, 2015 at 12:20:21PM +0200, Greg Kurz wrote:
> > The VNET_LE flag was introduced to fix accesses to virtio 1.0 headers
> > that are always little-endian. It can also be used to handle the special
> > case of a legacy little-endian device implemented by a big-endian host.
> >
> > Let's add a flag and ioctls for big-endian devices as well. If both flags
> > are set, little-endian wins.
> >
> > Since this is isn't a common usecase, the feature is controlled by a kernel
> > config option (not set by default).
> >
> > Both macvtap and tun are covered by this patch since they share the same
> > API with userland.
> >
> > Signed-off-by: Greg Kurz <[email protected]>
> > ---
> > drivers/net/Kconfig | 12 ++++++++
> > drivers/net/macvtap.c | 60 +++++++++++++++++++++++++++++++++++++++++-
> > drivers/net/tun.c | 62 ++++++++++++++++++++++++++++++++++++++++++-
> > include/uapi/linux/if_tun.h | 2 +
> > 4 files changed, 134 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
> > index df51d60..f0e23a0 100644
> > --- a/drivers/net/Kconfig
> > +++ b/drivers/net/Kconfig
> > @@ -244,6 +244,18 @@ config TUN
> >
> > If you don't know what to use this for, you don't need it.
> >
> > +config TUN_VNET_BE
> > + bool "Support for big-endian vnet headers"
> > + default n
> > + ---help---
> > + This option allows TUN/TAP and MACVTAP device drivers to parse
> > + vnet headers that are in big-endian byte order. It is useful
> > + when the headers come from a big-endian legacy virtio driver and
> > + the host is little-endian.
> > +
> > + Unless you have a little-endian system hosting a big-endian virtual
> > + machine with a virtio NIC, you should say N.
> > +
>
> should mention cross-endian, not big-endian, right?
>
The current TUN_VNET_LE related code is already doing cross-endian: without
this patch, one can already run a LE guest on a BE host... wouldn't it be
confusing to mention cross-endian only when the guest is BE ?
What about having a completely distinct implementation for cross-endian that
don't reuse the existing code and defines then ?
> > config VETH
> > tristate "Virtual ethernet pair device"
> > ---help---
> > diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
> > index 0a03a66..e0ab1b7 100644
> > --- a/drivers/net/macvtap.c
> > +++ b/drivers/net/macvtap.c
> > @@ -48,12 +48,27 @@ struct macvtap_queue {
> > #define MACVTAP_FEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE)
> >
> > #define MACVTAP_VNET_LE 0x80000000
> > +#define MACVTAP_VNET_BE 0x40000000
> > +
> > +#ifdef CONFIG_TUN_VNET_BE
> > +static inline bool macvtap_legacy_is_little_endian(struct macvtap_queue *q)
> > +{
> > + if (q->flags & MACVTAP_VNET_BE)
> > + return false;
> > + return virtio_legacy_is_little_endian();
> > +}
> > +#else
> > +static inline bool macvtap_legacy_is_little_endian(struct macvtap_queue *q)
> > +{
> > + return virtio_legacy_is_little_endian();
> > +}
> > +#endif
> >
> > static inline bool macvtap_is_little_endian(struct macvtap_queue *q)
> > {
> > if (q->flags & MACVTAP_VNET_LE)
> > return true;
> > - return virtio_legacy_is_little_endian();
> > + return macvtap_legacy_is_little_endian(q);
> > }
> >
> > static inline u16 macvtap16_to_cpu(struct macvtap_queue *q, __virtio16 val)
> > @@ -1000,6 +1015,43 @@ static int set_offload(struct macvtap_queue *q, unsigned long arg)
> > return 0;
> > }
> >
> > +#ifdef CONFIG_TUN_VNET_BE
> > +static long macvtap_get_vnet_be(struct macvtap_queue *q, int __user *sp)
> > +{
> > + int s = !!(q->flags & MACVTAP_VNET_BE);
> > +
> > + if (put_user(s, sp))
> > + return -EFAULT;
> > +
> > + return 0;
> > +}
> > +
> > +static long macvtap_set_vnet_be(struct macvtap_queue *q, int __user *sp)
> > +{
> > + int s;
> > +
> > + if (get_user(s, sp))
> > + return -EFAULT;
> > +
> > + if (s)
> > + q->flags |= MACVTAP_VNET_BE;
> > + else
> > + q->flags &= ~MACVTAP_VNET_BE;
> > +
> > + return 0;
> > +}
> > +#else
> > +static long macvtap_get_vnet_be(struct macvtap_queue *q, int __user *argp)
> > +{
> > + return -EINVAL;
> > +}
> > +
> > +static long macvtap_set_vnet_be(struct macvtap_queue *q, int __user *argp)
> > +{
> > + return -EINVAL;
> > +}
> > +#endif /* CONFIG_TUN_VNET_BE */
> > +
> > /*
> > * provide compatibility with generic tun/tap interface
> > */
> > @@ -1097,6 +1149,12 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd,
> > q->flags &= ~MACVTAP_VNET_LE;
> > return 0;
> >
> > + case TUNGETVNETBE:
> > + return macvtap_get_vnet_be(q, sp);
> > +
> > + case TUNSETVNETBE:
> > + return macvtap_set_vnet_be(q, sp);
> > +
> > case TUNSETOFFLOAD:
> > /* let the user check for future flags */
> > if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 |
> > diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> > index 053f9b6..4e12488 100644
> > --- a/drivers/net/tun.c
> > +++ b/drivers/net/tun.c
> > @@ -111,6 +111,7 @@ do { \
> > #define TUN_FASYNC IFF_ATTACH_QUEUE
> > /* High bits in flags field are unused. */
> > #define TUN_VNET_LE 0x80000000
> > +#define TUN_VNET_BE 0x40000000
> >
> > #define TUN_FEATURES (IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR | \
> > IFF_MULTI_QUEUE)
> > @@ -206,11 +207,25 @@ struct tun_struct {
> > u32 flow_count;
> > };
> >
> > +#ifdef CONFIG_TUN_VNET_BE
> > +static inline bool tun_legacy_is_little_endian(struct tun_struct *tun)
> > +{
> > + if (tun->flags & TUN_VNET_BE)
> > + return false;
> > + return virtio_legacy_is_little_endian();
> > +}
> > +#else
> > +static inline bool tun_legacy_is_little_endian(struct tun_struct *tun)
> > +{
> > + return virtio_legacy_is_little_endian();
> > +}
> > +#endif
> > +
> > static inline bool tun_is_little_endian(struct tun_struct *tun)
> > {
> > if (tun->flags & TUN_VNET_LE)
> > return true;
> > - return virtio_legacy_is_little_endian();
> > + return tun_legacy_is_little_endian(tun);
> > }
> >
> > static inline u16 tun16_to_cpu(struct tun_struct *tun, __virtio16 val)
> > @@ -1836,6 +1851,43 @@ unlock:
> > return ret;
> > }
> >
> > +#ifdef CONFIG_TUN_VNET_BE
> > +static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp)
> > +{
> > + int be = !!(tun->flags & TUN_VNET_BE);
> > +
> > + if (put_user(be, argp))
> > + return EFAULT;
> > +
> > + return 0;
> > +}
> > +
> > +static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp)
> > +{
> > + int be;
> > +
> > + if (get_user(be, argp))
> > + return -EFAULT;
> > +
> > + if (be)
> > + tun->flags |= TUN_VNET_BE;
> > + else
> > + tun->flags &= ~TUN_VNET_BE;
> > +
> > + return 0;
> > +}
> > +#else
> > +static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp)
> > +{
> > + return -EINVAL;
> > +}
> > +
> > +static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp)
> > +{
> > + return -EINVAL;
> > +}
> > +#endif /* CONFIG_TUN_VNET_BE */
> > +
> > static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
> > unsigned long arg, int ifreq_len)
> > {
> > @@ -2065,6 +2117,14 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
> > tun->flags &= ~TUN_VNET_LE;
> > break;
> >
> > + case TUNGETVNETBE:
> > + ret = tun_get_vnet_be(tun, argp);
> > + break;
> > +
> > + case TUNSETVNETBE:
> > + ret = tun_set_vnet_be(tun, argp);
> > + break;
> > +
> > case TUNATTACHFILTER:
> > /* Can be set only for TAPs */
> > ret = -EINVAL;
> > diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h
> > index 50ae243..bcac4c0 100644
> > --- a/include/uapi/linux/if_tun.h
> > +++ b/include/uapi/linux/if_tun.h
> > @@ -50,6 +50,8 @@
> > #define TUNGETFILTER _IOR('T', 219, struct sock_fprog)
> > #define TUNSETVNETLE _IOW('T', 220, int)
> > #define TUNGETVNETLE _IOR('T', 221, int)
> > +#define TUNSETVNETBE _IOW('T', 222, int)
> > +#define TUNGETVNETBE _IOR('T', 223, int)
> >
> > /* TUNSETIFF ifr flags */
> > #define IFF_TUN 0x0001
>
On Tue, 21 Apr 2015 16:09:44 +0200
"Michael S. Tsirkin" <[email protected]> wrote:
> On Fri, Apr 10, 2015 at 12:16:20PM +0200, Greg Kurz wrote:
> > The current memory accessors logic is:
> > - little endian if little_endian
> > - native endian (i.e. no byteswap) if !little_endian
> >
> > If we want to fully support cross-endian vhost, we also need to be
> > able to convert to big endian.
> >
> > Instead of changing the little_endian argument to some 3-value enum, this
> > patch changes the logic to:
> > - little endian if little_endian
> > - big endian if !little_endian
> >
> > The native endian case is handled by all users with a trivial helper. This
> > patch doesn't change any functionality, nor it does add overhead.
> >
> > Signed-off-by: Greg Kurz <[email protected]>
> > ---
>
> OK overall. Style comment:
>
> > drivers/net/macvtap.c | 4 +++-
> > drivers/net/tun.c | 4 +++-
> > drivers/vhost/vhost.h | 4 +++-
> > include/linux/virtio_byteorder.h | 24 ++++++++++++++----------
> > include/linux/virtio_config.h | 4 +++-
> > include/linux/vringh.h | 4 +++-
> > 6 files changed, 29 insertions(+), 15 deletions(-)
> >
> > diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
> > index a2f2958..0a03a66 100644
> > --- a/drivers/net/macvtap.c
> > +++ b/drivers/net/macvtap.c
> > @@ -51,7 +51,9 @@ struct macvtap_queue {
> >
> > static inline bool macvtap_is_little_endian(struct macvtap_queue *q)
> > {
> > - return q->flags & MACVTAP_VNET_LE;
> > + if (q->flags & MACVTAP_VNET_LE)
> > + return true;
> > + return virtio_legacy_is_little_endian();
> > }
> >
>
> I'd prefer a bit more symmetry:
>
> + if (q->flags & MACVTAP_VNET_LE)
> + return true;
> + else
> + return virtio_legacy_is_little_endian();
>
> Or better just:
> return (q->flags & MACVTAP_VNET_LE) ? true : virtio_legacy_is_little_endian();
>
> might make line long, but your follow-up patch makes it short again,
> so that's ok.
>
Will do.
>
> > static inline u16 macvtap16_to_cpu(struct macvtap_queue *q, __virtio16 val)
> > diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> > index 3c3d6c0..053f9b6 100644
> > --- a/drivers/net/tun.c
> > +++ b/drivers/net/tun.c
> > @@ -208,7 +208,9 @@ struct tun_struct {
> >
> > static inline bool tun_is_little_endian(struct tun_struct *tun)
> > {
> > - return tun->flags & TUN_VNET_LE;
> > + if (tun->flags & TUN_VNET_LE)
> > + return true;
> > + return virtio_legacy_is_little_endian();
> > }
> >
> > static inline u16 tun16_to_cpu(struct tun_struct *tun, __virtio16 val)
> > diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> > index 6a49960..4e9a186 100644
> > --- a/drivers/vhost/vhost.h
> > +++ b/drivers/vhost/vhost.h
> > @@ -175,7 +175,9 @@ static inline bool vhost_has_feature(struct vhost_virtqueue *vq, int bit)
> >
> > static inline bool vhost_is_little_endian(struct vhost_virtqueue *vq)
> > {
> > - return vhost_has_feature(vq, VIRTIO_F_VERSION_1);
> > + if (vhost_has_feature(vq, VIRTIO_F_VERSION_1))
> > + return true;
> > + return virtio_legacy_is_little_endian();
> > }
> >
> > /* Memory accessors */
> > diff --git a/include/linux/virtio_byteorder.h b/include/linux/virtio_byteorder.h
> > index 51865d0..ce63a2c 100644
> > --- a/include/linux/virtio_byteorder.h
> > +++ b/include/linux/virtio_byteorder.h
> > @@ -3,17 +3,21 @@
> > #include <linux/types.h>
> > #include <uapi/linux/virtio_types.h>
> >
> > -/*
> > - * Low-level memory accessors for handling virtio in modern little endian and in
> > - * compatibility native endian format.
> > - */
> > +static inline bool virtio_legacy_is_little_endian(void)
> > +{
> > +#ifdef __LITTLE_ENDIAN
> > + return true;
> > +#else
> > + return false;
> > +#endif
> > +}
> >
> > static inline u16 __virtio16_to_cpu(bool little_endian, __virtio16 val)
> > {
> > if (little_endian)
> > return le16_to_cpu((__force __le16)val);
> > else
> > - return (__force u16)val;
> > + return be16_to_cpu((__force __be16)val);
> > }
> >
> > static inline __virtio16 __cpu_to_virtio16(bool little_endian, u16 val)
> > @@ -21,7 +25,7 @@ static inline __virtio16 __cpu_to_virtio16(bool little_endian, u16 val)
> > if (little_endian)
> > return (__force __virtio16)cpu_to_le16(val);
> > else
> > - return (__force __virtio16)val;
> > + return (__force __virtio16)cpu_to_be16(val);
> > }
> >
> > static inline u32 __virtio32_to_cpu(bool little_endian, __virtio32 val)
> > @@ -29,7 +33,7 @@ static inline u32 __virtio32_to_cpu(bool little_endian, __virtio32 val)
> > if (little_endian)
> > return le32_to_cpu((__force __le32)val);
> > else
> > - return (__force u32)val;
> > + return be32_to_cpu((__force __be32)val);
> > }
> >
> > static inline __virtio32 __cpu_to_virtio32(bool little_endian, u32 val)
> > @@ -37,7 +41,7 @@ static inline __virtio32 __cpu_to_virtio32(bool little_endian, u32 val)
> > if (little_endian)
> > return (__force __virtio32)cpu_to_le32(val);
> > else
> > - return (__force __virtio32)val;
> > + return (__force __virtio32)cpu_to_be32(val);
> > }
> >
> > static inline u64 __virtio64_to_cpu(bool little_endian, __virtio64 val)
> > @@ -45,7 +49,7 @@ static inline u64 __virtio64_to_cpu(bool little_endian, __virtio64 val)
> > if (little_endian)
> > return le64_to_cpu((__force __le64)val);
> > else
> > - return (__force u64)val;
> > + return be64_to_cpu((__force __be64)val);
> > }
> >
> > static inline __virtio64 __cpu_to_virtio64(bool little_endian, u64 val)
> > @@ -53,7 +57,7 @@ static inline __virtio64 __cpu_to_virtio64(bool little_endian, u64 val)
> > if (little_endian)
> > return (__force __virtio64)cpu_to_le64(val);
> > else
> > - return (__force __virtio64)val;
> > + return (__force __virtio64)cpu_to_be64(val);
> > }
> >
> > #endif /* _LINUX_VIRTIO_BYTEORDER */
> > diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
> > index bd1a582..36a6daa 100644
> > --- a/include/linux/virtio_config.h
> > +++ b/include/linux/virtio_config.h
> > @@ -207,7 +207,9 @@ int virtqueue_set_affinity(struct virtqueue *vq, int cpu)
> >
> > static inline bool virtio_is_little_endian(struct virtio_device *vdev)
> > {
> > - return virtio_has_feature(vdev, VIRTIO_F_VERSION_1);
> > + if (virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
> > + return true;
> > + return virtio_legacy_is_little_endian();
> > }
> >
> > /* Memory accessors */
> > diff --git a/include/linux/vringh.h b/include/linux/vringh.h
> > index 3ed62ef..d786c2d 100644
> > --- a/include/linux/vringh.h
> > +++ b/include/linux/vringh.h
> > @@ -228,7 +228,9 @@ static inline void vringh_notify(struct vringh *vrh)
> >
> > static inline bool vringh_is_little_endian(const struct vringh *vrh)
> > {
> > - return vrh->little_endian;
> > + if (vrh->little_endian)
> > + return true;
> > + return virtio_legacy_is_little_endian();
> > }
> >
> > static inline u16 vringh16_to_cpu(const struct vringh *vrh, __virtio16 val)
>
On Tue, 21 Apr 2015 16:10:18 +0200
"Michael S. Tsirkin" <[email protected]> wrote:
> On Fri, Apr 10, 2015 at 12:15:00PM +0200, Greg Kurz wrote:
> > Hi,
> >
> > This patchset allows vhost to be used with legacy virtio when guest and host
> > have a different endianness.
> >
> > Patch 7 got rewritten according to Cornelia's and Michael's comments. I have
> > also introduced patch 8 that brings BE vnet headers support to tun/macvtap.
> >
> > This series is enough to have vhost_net working flawlessly. I could
> > succesfully reboot guests from ppc64 to ppc64le and vice-versa on ppc64
> > and ppc64le hosts.
>
> Looks good overall.
> A couple of style comments.
>
> Thanks!
>
Thanks for your time Michael.
> > ---
> >
> > Greg Kurz (8):
> > virtio: introduce virtio_is_little_endian() helper
> > tun: add tun_is_little_endian() helper
> > macvtap: introduce macvtap_is_little_endian() helper
> > vringh: introduce vringh_is_little_endian() helper
> > vhost: introduce vhost_is_little_endian() helper
> > virtio: add explicit big-endian support to memory accessors
> > vhost: feature to set the vring endianness
> > macvtap/tun: add VNET_BE flag
> >
> >
> > drivers/net/Kconfig | 12 ++++++
> > drivers/net/macvtap.c | 69 ++++++++++++++++++++++++++++++++++-
> > drivers/net/tun.c | 71 +++++++++++++++++++++++++++++++++++-
> > drivers/vhost/Kconfig | 10 +++++
> > drivers/vhost/vhost.c | 76 ++++++++++++++++++++++++++++++++++++++
> > drivers/vhost/vhost.h | 25 ++++++++++---
> > include/linux/virtio_byteorder.h | 24 +++++++-----
> > include/linux/virtio_config.h | 19 +++++++---
> > include/linux/vringh.h | 19 +++++++---
> > include/uapi/linux/if_tun.h | 2 +
> > include/uapi/linux/vhost.h | 9 +++++
> > 11 files changed, 303 insertions(+), 33 deletions(-)
> >
> > --
> > Greg
>
On Tue, Apr 21, 2015 at 05:48:20PM +0200, Greg Kurz wrote:
> On Tue, 21 Apr 2015 16:04:23 +0200
> "Michael S. Tsirkin" <[email protected]> wrote:
>
> > On Fri, Apr 10, 2015 at 12:19:16PM +0200, Greg Kurz wrote:
> > > This patch brings cross-endian support to vhost when used to implement
> > > legacy virtio devices. Since it is a relatively rare situation, the
> > > feature availability is controlled by a kernel config option (not set
> > > by default).
> > >
> > > The vq->is_le boolean field is added to cache the endianness to be
> > > used for ring accesses. It defaults to native endian, as expected
> > > by legacy virtio devices. When the ring gets active, we force little
> > > endian if the device is modern. When the ring is deactivated, we
> > > revert to the native endian default.
> > >
> > > If cross-endian was compiled in, a vq->user_be boolean field is added
> > > so that userspace may request a specific endianness. This field is
> > > used to override the default when activating the ring of a legacy
> > > device. It has no effect on modern devices.
> > >
> > > Signed-off-by: Greg Kurz <[email protected]>
> > > ---
> > > drivers/vhost/Kconfig | 10 ++++++the
> > > drivers/vhost/vhost.c | 76 +++++++++++++++++++++++++++++++++++++++++++-
> > > drivers/vhost/vhost.h | 12 +++++--
> > > include/uapi/linux/vhost.h | 9 +++++
> > > 4 files changed, 103 insertions(+), 4 deletions(-)
> > >
> > > Changes since v3:
> > > - VHOST_SET_VRING_ENDIAN_LEGACY ioctl renamed to VHOST_SET_VRING_BIG_ENDIAN
> > > - ioctl API is now: 0 for le, 1 for be, other values are EINVAL
> > > - ioctl doesn't filter out modern devices
> > > - ioctl stubs return ENOIOCTLCMD
> > > - forbid endianness changes when vring is active
> > > - logic now handled with vq->is_le and vq->user_be according to device
> > > start/stop as suggested by Michael
> > >
> > > diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig
> > > index 017a1e8..0aec88c 100644
> > > --- a/drivers/vhost/Kconfig
> > > +++ b/drivers/vhost/Kconfig
> > > @@ -32,3 +32,13 @@ config VHOST
> > > ---help---
> > > This option is selected by any driver which needs to access
> > > the core of vhost.
> > > +
> > > +config VHOST_SET_ENDIAN_LEGACY
> >
> > I'd prefer namin this VHOST_CROSS_ENDIAN_LEGACY
> >
>
> Yes, makes sense. I'll make sure most of the cross-endian changes are
> easy to grep.
>
> > > + bool "Cross-endian support for host kernel accelerator"
> > > + default n
> > > + ---help---
> > > + This option allows vhost to support guests with a different byte
> > > + ordering from host. It is disabled by default since it adds overhead
> > > + and it is only needed by a few platforms (powerpc and arm).
> >
> > and is only useful on a few platforms (powerpc and arm).
> >
> > "it" seems to refer to "overhead", which is rarely needed.
> > needed is a bit too strong, you can always e.g. run virtio
> > in userspace.
> >
>
> My poor English again... it seems you understood what I wanted to
> write though :)
>
> > > +
> > > + If unsure, say "N".
> > > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> > > index 2ee2826..3eb756b 100644the
> > > --- a/drivers/vhost/vhost.c
> > > +++ b/drivers/vhost/vhost.c
> > > @@ -199,6 +199,10 @@ static void vhost_vq_reset(struct vhost_dev *dev,
> > > vq->call = NULL;
> > > vq->log_ctx = NULL;
> > > vq->memory = NULL;
> > > + vq->is_le = virtio_legacy_is_little_endian();
> > > +#ifdef CONFIG_VHOST_SET_ENDIAN_LEGACY
> > > + vq->user_be = !vq->is_le;
> > > +#endif
> >
> > add a wrapper for this too?
> >
>
> Will do.
>
> > > }
> > >
> > > static int vhost_worker(void *data)
> > > @@ -630,6 +634,53 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)
> > > return 0;
> > > }
> > >
> > > +#ifdef CONFIG_VHOST_SET_ENDIAN_LEGACY
> > > +static long vhost_set_vring_big_endian(struct vhost_virtqueue *vq,
> > > + int __user *argp)
> > > +{
> > > + struct vhost_vring_state s;
> > > +
> > > + if (vq->private_data)
> > > + return -EBUSY;
> > > +
> > > + if (copy_from_user(&s, argp, sizeof(s)))
> > > + return -EFAULT;
> > > +
> > > + if (s.num && s.num != 1)
> >
> > s.num & ~0x1
> >
>
> Since s.num is unsigned and I assume this won't change, what about
> s.num > 1 as suggested by Cornelia ?
I just tried and gcc optimizes
s.num != 0 && s.num != 1 to s.num > 1
The former will be more readable once we
replace 0 and 1 with defines.
So ignore my advice, keep code as is but use defines.
> >
> > > + return -EINVAL;
> > > +
> > > + vq->user_be = s.num;
> > > +
> > > + return 0;
> > > +}
> > > +
> > > +static long vhost_get_vring_big_endian(struct vhost_virtqueue *vq, u32 idx,
> > > + int __user *argp)
> > > +{
> > > + struct vhost_vring_state s = {
> > > + .index = idx,
> > > + .num = vq->user_be
> > > + };
> > > +
> > > + if (copy_to_user(argp, &s, sizeof(s)))
> > > + return -EFAULT;
> > > +
> > > + return 0;
> > > +}
> > > +#else
> > > +static long vhost_set_vring_big_endian(struct vhost_virtqueue *vq,
> > > + int __user *argp)
> > > +{
> > > + return -ENOIOCTLCMD;
> > > +}
> > > +
> > > +static long vhost_get_vring_big_endian(struct vhost_virtqueue *vq, u32 idx,
> > > + int __user *argp)
> > > +{
> > > + return -ENOIOCTLCMD;
> > > +}
> > > +#endif /* CONFIG_VHOST_SET_ENDIAN_LEGACY */
> > > +
> > > long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp)
> > > {
> > > struct file *eventfp, *filep = NULL;
> > > @@ -806,6 +857,12 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp)
> > > } else
> > > filep = eventfp;the
> > > break;
> > > + case VHOST_SET_VRING_BIG_ENDIAN:
> > > + r = vhost_set_vring_big_endian(vq, argp);
> > > + break;
> > > + case VHOST_GET_VRING_BIG_ENDIAN:
> > > + r = vhost_get_vring_big_endian(vq, idx, argp);
> > > + break;
> > > default:
> > > r = -ENOIOCTLCMD;
> > > }
> > > @@ -1040,12 +1097,29 @@ static int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 avail_event)
> > > return 0;
> > > }
> > >
> > > +#ifdef CONFIG_VHOST_SET_ENDIAN_LEGACY
> > > +static void vhost_init_is_le(struct vhost_virtqueue *vq)
> > > +{
> > > + vq->is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1) || !vq->user_be;
> > > +}
> > > +#else
> > > +static void vhost_init_is_le(struct vhost_virtqueue *vq)
> > > +{
> > > + if (vhost_has_feature(vq, VIRTIO_F_VERSION_1))
> > > + vq->is_le = true;
> > > +}
> > > +#endif
> > > +
> >
> > I'd prefer localizing ifdefery somewhere near top of file.
> >
>
> Will do.
>
> > > int vhost_init_used(struct vhost_virtqueue *vq)
> > > {
> > > __virtio16 last_used_idx;
> > > int r;
> > > - if (!vq->private_data)
> > > + if (!vq->private_data) {
> > > + vq->is_le = virtio_legacy_is_little_endian();
> > > return 0;
> > > + }
> > > +
> > > + vhost_init_is_le(vq);
> > >
> > > r = vhost_update_used_flags(vq);
> > > if (r)
> > > diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> > > index 4e9a186..04b2add 100644
> > > --- a/drivers/vhost/vhost.h
> > > +++ b/drivers/vhost/vhost.h
> > > @@ -106,6 +106,14 @@ struct vhost_virtqueue {
> > > /* Log write descriptors */
> > > void __user *log_base;
> > > struct vhost_log *log;
> > > +
> > > + /* Ring endianness. Defaults to legacy native endianness.
> > > + * Set to true when starting a modern virtio device. */
> > > + bool is_le;
> > > +#ifdef CONFIG_VHOST_SET_ENDIAN_LEGACY
> > > + /* Ring endianness requested by userspace for cross-endian support. */
> > > + bool user_be;
> > > +#endif
> > > };
> > >
> > > struct vhost_dev {
> > > @@ -175,9 +183,7 @@ static inline bool vhost_has_feature(struct vhost_virtqueue *vq, int bit)
> > >
> > > static inline bool vhost_is_little_endian(struct vhost_virtqueue *vq)
> > > {
> > > - if (vhost_has_feature(vq, VIRTIO_F_VERSION_1))
> > > - return true;
> > > - return virtio_legacy_is_little_endian();
> > > + return vq->is_le;
> > > }
> > >
> > > /* Memory accessors */
> > > diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h
> > > index bb6a5b4..5cdebbc 100644
> > > --- a/include/uapi/linux/vhost.h
> > > +++ b/include/uapi/linux/vhost.h
> > > @@ -103,6 +103,15 @@ struct vhost_memory {
> > > /* Get accessor: reads index, writes value in num */
> > > #define VHOST_GET_VRING_BASE _IOWR(VHOST_VIRTIO, 0x12, struct vhost_vring_state)
> > >
> > > +/* Set the vring byte order in num. This is a legacy only API that is simply
> > > + * ignored when VIRTIO_F_VERSION_1 is set.
> > > + * 0 to set to little-endian
> > > + * 1 to set to big-endian
> >
> > How about defines for these?
> >
>
> Ok. I'll put the defines here so that all the cross-endian stuff
> lies in the same hunk. Is it ok for you ?
Fine.
> > > + * other values return EINVAL.
Pls also add a note saying that not all kernel configurations support this ioctl,
but all configurations that support SET also support GET.
> > > + */
> > > +#define VHOST_SET_VRING_BIG_ENDIAN _IOW(VHOST_VIRTIO, 0x13, struct vhost_vring_state)
> > > +#define VHOST_GET_VRING_BIG_ENDIAN _IOW(VHOST_VIRTIO, 0x14, struct vhost_vring_state)
> > > +
> > > /* The following ioctls use eventfd file descriptors to signal and poll
> > > * for events. */
> > >
> >
I'm inclined to think VHOST_SET_VRING_ENDIAN is a slightly better name.
What do you think?
--
MST
On Tue, Apr 21, 2015 at 06:22:20PM +0200, Greg Kurz wrote:
> On Tue, 21 Apr 2015 16:06:33 +0200
> "Michael S. Tsirkin" <[email protected]> wrote:
>
> > On Fri, Apr 10, 2015 at 12:20:21PM +0200, Greg Kurz wrote:
> > > The VNET_LE flag was introduced to fix accesses to virtio 1.0 headers
> > > that are always little-endian. It can also be used to handle the special
> > > case of a legacy little-endian device implemented by a big-endian host.
> > >
> > > Let's add a flag and ioctls for big-endian devices as well. If both flags
> > > are set, little-endian wins.
> > >
> > > Since this is isn't a common usecase, the feature is controlled by a kernel
> > > config option (not set by default).
> > >
> > > Both macvtap and tun are covered by this patch since they share the same
> > > API with userland.
> > >
> > > Signed-off-by: Greg Kurz <[email protected]>
> > > ---
> > > drivers/net/Kconfig | 12 ++++++++
> > > drivers/net/macvtap.c | 60 +++++++++++++++++++++++++++++++++++++++++-
> > > drivers/net/tun.c | 62 ++++++++++++++++++++++++++++++++++++++++++-
> > > include/uapi/linux/if_tun.h | 2 +
> > > 4 files changed, 134 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
> > > index df51d60..f0e23a0 100644
> > > --- a/drivers/net/Kconfig
> > > +++ b/drivers/net/Kconfig
> > > @@ -244,6 +244,18 @@ config TUN
> > >
> > > If you don't know what to use this for, you don't need it.
> > >
> > > +config TUN_VNET_BE
> > > + bool "Support for big-endian vnet headers"
> > > + default n
> > > + ---help---
> > > + This option allows TUN/TAP and MACVTAP device drivers to parse
> > > + vnet headers that are in big-endian byte order. It is useful
> > > + when the headers come from a big-endian legacy virtio driver and
> > > + the host is little-endian.
> > > +
> > > + Unless you have a little-endian system hosting a big-endian virtual
> > > + machine with a virtio NIC, you should say N.
> > > +
> >
> > should mention cross-endian, not big-endian, right?
> >
>
> The current TUN_VNET_LE related code is already doing cross-endian: without
> this patch, one can already run a LE guest on a BE host... wouldn't it be
> confusing to mention cross-endian only when the guest is BE ?
Hmm I think no - LE is also useful for virtio 1 - this is what it was
intended for after all.
> What about having a completely distinct implementation for cross-endian that
> don't reuse the existing code and defines then ?
I think implementation and interface are fine, just the documentation
can be improved a bit.
How about:
"Support for cross-endian vnet headers on little-endian kernels".
Accordingly CONFIG_TUN_VNET_CROSS_LE
?
--
MST
On Tue, 21 Apr 2015 20:25:03 +0200
"Michael S. Tsirkin" <[email protected]> wrote:
[ ... ]
> > > > @@ -630,6 +634,53 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)
> > > > return 0;
> > > > }
> > > >
> > > > +#ifdef CONFIG_VHOST_SET_ENDIAN_LEGACY
> > > > +static long vhost_set_vring_big_endian(struct vhost_virtqueue *vq,
> > > > + int __user *argp)
> > > > +{
> > > > + struct vhost_vring_state s;
> > > > +
> > > > + if (vq->private_data)
> > > > + return -EBUSY;
> > > > +
> > > > + if (copy_from_user(&s, argp, sizeof(s)))
> > > > + return -EFAULT;
> > > > +
> > > > + if (s.num && s.num != 1)
> > >
> > > s.num & ~0x1
> > >
> >
> > Since s.num is unsigned and I assume this won't change, what about
> > s.num > 1 as suggested by Cornelia ?
>
> I just tried and gcc optimizes
> s.num != 0 && s.num != 1 to s.num > 1
>
> The former will be more readable once we
> replace 0 and 1 with defines.
>
> So ignore my advice, keep code as is but use defines.
>
Ok.
[ ... ]
> > > > --- a/include/uapi/linux/vhost.h
> > > > +++ b/include/uapi/linux/vhost.h
> > > > @@ -103,6 +103,15 @@ struct vhost_memory {
> > > > /* Get accessor: reads index, writes value in num */
> > > > #define VHOST_GET_VRING_BASE _IOWR(VHOST_VIRTIO, 0x12, struct vhost_vring_state)
> > > >
> > > > +/* Set the vring byte order in num. This is a legacy only API that is simply
> > > > + * ignored when VIRTIO_F_VERSION_1 is set.
> > > > + * 0 to set to little-endian
> > > > + * 1 to set to big-endian
> > >
> > > How about defines for these?
> > >
> >
> > Ok. I'll put the defines here so that all the cross-endian stuff
> > lies in the same hunk. Is it ok for you ?
>
> Fine.
>
> > > > + * other values return EINVAL.
>
> Pls also add a note saying that not all kernel configurations support this ioctl,
> but all configurations that support SET also support GET.
>
Ok.
> > > > + */
> > > > +#define VHOST_SET_VRING_BIG_ENDIAN _IOW(VHOST_VIRTIO, 0x13, struct vhost_vring_state)
> > > > +#define VHOST_GET_VRING_BIG_ENDIAN _IOW(VHOST_VIRTIO, 0x14, struct vhost_vring_state)
> > > > +
> > > > /* The following ioctls use eventfd file descriptors to signal and poll
> > > > * for events. */
> > > >
> > >
>
> I'm inclined to think VHOST_SET_VRING_ENDIAN is a slightly better name.
> What do you think?
>
Or VHOST_SET_VRING_CROSS_ENDIAN ? I like the idea to keep a hint that this
API is for cross-endian only... like the rest of this series.
--
Greg
On Tue, 21 Apr 2015 20:30:23 +0200
"Michael S. Tsirkin" <[email protected]> wrote:
> On Tue, Apr 21, 2015 at 06:22:20PM +0200, Greg Kurz wrote:
> > On Tue, 21 Apr 2015 16:06:33 +0200
> > "Michael S. Tsirkin" <[email protected]> wrote:
> >
> > > On Fri, Apr 10, 2015 at 12:20:21PM +0200, Greg Kurz wrote:
> > > > The VNET_LE flag was introduced to fix accesses to virtio 1.0 headers
> > > > that are always little-endian. It can also be used to handle the special
> > > > case of a legacy little-endian device implemented by a big-endian host.
> > > >
> > > > Let's add a flag and ioctls for big-endian devices as well. If both flags
> > > > are set, little-endian wins.
> > > >
> > > > Since this is isn't a common usecase, the feature is controlled by a kernel
> > > > config option (not set by default).
> > > >
> > > > Both macvtap and tun are covered by this patch since they share the same
> > > > API with userland.
> > > >
> > > > Signed-off-by: Greg Kurz <[email protected]>
> > > > ---
> > > > drivers/net/Kconfig | 12 ++++++++
> > > > drivers/net/macvtap.c | 60 +++++++++++++++++++++++++++++++++++++++++-
> > > > drivers/net/tun.c | 62 ++++++++++++++++++++++++++++++++++++++++++-
> > > > include/uapi/linux/if_tun.h | 2 +
> > > > 4 files changed, 134 insertions(+), 2 deletions(-)
> > > >
> > > > diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
> > > > index df51d60..f0e23a0 100644
> > > > --- a/drivers/net/Kconfig
> > > > +++ b/drivers/net/Kconfig
> > > > @@ -244,6 +244,18 @@ config TUN
> > > >
> > > > If you don't know what to use this for, you don't need it.
> > > >
> > > > +config TUN_VNET_BE
> > > > + bool "Support for big-endian vnet headers"
> > > > + default n
> > > > + ---help---
> > > > + This option allows TUN/TAP and MACVTAP device drivers to parse
> > > > + vnet headers that are in big-endian byte order. It is useful
> > > > + when the headers come from a big-endian legacy virtio driver and
> > > > + the host is little-endian.
> > > > +
> > > > + Unless you have a little-endian system hosting a big-endian virtual
> > > > + machine with a virtio NIC, you should say N.
> > > > +
> > >
> > > should mention cross-endian, not big-endian, right?
> > >
> >
> > The current TUN_VNET_LE related code is already doing cross-endian: without
> > this patch, one can already run a LE guest on a BE host... wouldn't it be
> > confusing to mention cross-endian only when the guest is BE ?
>
> Hmm I think no - LE is also useful for virtio 1 - this is what it was
> intended for after all.
>
> > What about having a completely distinct implementation for cross-endian that
> > don't reuse the existing code and defines then ?
>
> I think implementation and interface are fine, just the documentation
> can be improved a bit.
>
> How about:
> "Support for cross-endian vnet headers on little-endian kernels".
>
> Accordingly CONFIG_TUN_VNET_CROSS_LE
>
> ?
>
Sure. And what about also renaming the ioctl to TUNSETVNETCROSSLE then ?
--
Greg
On Wed, Apr 22, 2015 at 11:08:54AM +0200, Greg Kurz wrote:
> On Tue, 21 Apr 2015 20:25:03 +0200
> "Michael S. Tsirkin" <[email protected]> wrote:
> [ ... ]
> > > > > @@ -630,6 +634,53 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)
> > > > > return 0;
> > > > > }
> > > > >
> > > > > +#ifdef CONFIG_VHOST_SET_ENDIAN_LEGACY
> > > > > +static long vhost_set_vring_big_endian(struct vhost_virtqueue *vq,
> > > > > + int __user *argp)
> > > > > +{
> > > > > + struct vhost_vring_state s;
> > > > > +
> > > > > + if (vq->private_data)
> > > > > + return -EBUSY;
> > > > > +
> > > > > + if (copy_from_user(&s, argp, sizeof(s)))
> > > > > + return -EFAULT;
> > > > > +
> > > > > + if (s.num && s.num != 1)
> > > >
> > > > s.num & ~0x1
> > > >
> > >
> > > Since s.num is unsigned and I assume this won't change, what about
> > > s.num > 1 as suggested by Cornelia ?
> >
> > I just tried and gcc optimizes
> > s.num != 0 && s.num != 1 to s.num > 1
> >
> > The former will be more readable once we
> > replace 0 and 1 with defines.
> >
> > So ignore my advice, keep code as is but use defines.
> >
>
> Ok.
>
> [ ... ]
> > > > > --- a/include/uapi/linux/vhost.h
> > > > > +++ b/include/uapi/linux/vhost.h
> > > > > @@ -103,6 +103,15 @@ struct vhost_memory {
> > > > > /* Get accessor: reads index, writes value in num */
> > > > > #define VHOST_GET_VRING_BASE _IOWR(VHOST_VIRTIO, 0x12, struct vhost_vring_state)
> > > > >
> > > > > +/* Set the vring byte order in num. This is a legacy only API that is simply
> > > > > + * ignored when VIRTIO_F_VERSION_1 is set.
> > > > > + * 0 to set to little-endian
> > > > > + * 1 to set to big-endian
> > > >
> > > > How about defines for these?
> > > >
> > >
> > > Ok. I'll put the defines here so that all the cross-endian stuff
> > > lies in the same hunk. Is it ok for you ?
> >
> > Fine.
> >
> > > > > + * other values return EINVAL.
> >
> > Pls also add a note saying that not all kernel configurations support this ioctl,
> > but all configurations that support SET also support GET.
> >
>
> Ok.
>
> > > > > + */
> > > > > +#define VHOST_SET_VRING_BIG_ENDIAN _IOW(VHOST_VIRTIO, 0x13, struct vhost_vring_state)
> > > > > +#define VHOST_GET_VRING_BIG_ENDIAN _IOW(VHOST_VIRTIO, 0x14, struct vhost_vring_state)
> > > > > +
> > > > > /* The following ioctls use eventfd file descriptors to signal and poll
> > > > > * for events. */
> > > > >
> > > >
> >
> > I'm inclined to think VHOST_SET_VRING_ENDIAN is a slightly better name.
> > What do you think?
> >
>
> Or VHOST_SET_VRING_CROSS_ENDIAN ? I like the idea to keep a hint that this
> API is for cross-endian only... like the rest of this series.
>
> --
> Greg
I think VHOST_SET_VRING_CROSS_ENDIAN is not a good name -
it would imply 1 for cross endian, 0 for native endian.
--
MST
On Wed, Apr 22, 2015 at 12:01:29PM +0200, Greg Kurz wrote:
> On Tue, 21 Apr 2015 20:30:23 +0200
> "Michael S. Tsirkin" <[email protected]> wrote:
>
> > On Tue, Apr 21, 2015 at 06:22:20PM +0200, Greg Kurz wrote:
> > > On Tue, 21 Apr 2015 16:06:33 +0200
> > > "Michael S. Tsirkin" <[email protected]> wrote:
> > >
> > > > On Fri, Apr 10, 2015 at 12:20:21PM +0200, Greg Kurz wrote:
> > > > > The VNET_LE flag was introduced to fix accesses to virtio 1.0 headers
> > > > > that are always little-endian. It can also be used to handle the special
> > > > > case of a legacy little-endian device implemented by a big-endian host.
> > > > >
> > > > > Let's add a flag and ioctls for big-endian devices as well. If both flags
> > > > > are set, little-endian wins.
> > > > >
> > > > > Since this is isn't a common usecase, the feature is controlled by a kernel
> > > > > config option (not set by default).
> > > > >
> > > > > Both macvtap and tun are covered by this patch since they share the same
> > > > > API with userland.
> > > > >
> > > > > Signed-off-by: Greg Kurz <[email protected]>
> > > > > ---
> > > > > drivers/net/Kconfig | 12 ++++++++
> > > > > drivers/net/macvtap.c | 60 +++++++++++++++++++++++++++++++++++++++++-
> > > > > drivers/net/tun.c | 62 ++++++++++++++++++++++++++++++++++++++++++-
> > > > > include/uapi/linux/if_tun.h | 2 +
> > > > > 4 files changed, 134 insertions(+), 2 deletions(-)
> > > > >
> > > > > diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
> > > > > index df51d60..f0e23a0 100644
> > > > > --- a/drivers/net/Kconfig
> > > > > +++ b/drivers/net/Kconfig
> > > > > @@ -244,6 +244,18 @@ config TUN
> > > > >
> > > > > If you don't know what to use this for, you don't need it.
> > > > >
> > > > > +config TUN_VNET_BE
> > > > > + bool "Support for big-endian vnet headers"
> > > > > + default n
> > > > > + ---help---
> > > > > + This option allows TUN/TAP and MACVTAP device drivers to parse
> > > > > + vnet headers that are in big-endian byte order. It is useful
> > > > > + when the headers come from a big-endian legacy virtio driver and
> > > > > + the host is little-endian.
> > > > > +
> > > > > + Unless you have a little-endian system hosting a big-endian virtual
> > > > > + machine with a virtio NIC, you should say N.
> > > > > +
> > > >
> > > > should mention cross-endian, not big-endian, right?
> > > >
> > >
> > > The current TUN_VNET_LE related code is already doing cross-endian: without
> > > this patch, one can already run a LE guest on a BE host... wouldn't it be
> > > confusing to mention cross-endian only when the guest is BE ?
> >
> > Hmm I think no - LE is also useful for virtio 1 - this is what it was
> > intended for after all.
> >
> > > What about having a completely distinct implementation for cross-endian that
> > > don't reuse the existing code and defines then ?
> >
> > I think implementation and interface are fine, just the documentation
> > can be improved a bit.
> >
> > How about:
> > "Support for cross-endian vnet headers on little-endian kernels".
> >
> > Accordingly CONFIG_TUN_VNET_CROSS_LE
> >
> > ?
> >
>
> Sure. And what about also renaming the ioctl to TUNSETVNETCROSSLE then ?
>
> --
> Greg
I think not.
--
MST