2007-05-11 01:17:52

by Rusty Russell

[permalink] [raw]
Subject: [PATCH 0/5] lguest feedback tidyups

Hi all,

Gratefully-received recent feedback from CC'd was applied to excellent
effect (and the advice from Matt Mackall about my personal appearance is
best unrequited).

The patch is split in 5 parts to correspond with the 9 parts Andrew
sent out before, but here's the summary:

1) Sparse (thanks Christoph Hellwig):
- lguest_const can be static now
- lguest.c should include "lguest_bus.h" for lguest_devices declaration.
- page_tables.c unnecessary initialization
- But the cost was high: lots of __force casts 8(
2) Jeff Garzik
- Use netdev_priv instead of dev->priv.
- Check for ioremap failure
- iounmap on failure.
- Wrap SEND_DMA and BIND_DMA calls
- Don't set NETIF_F_SG unless we set NETIF_F_NO_CSUM
- Use SET_NETDEV_DEV()
- Don't set dev->irq, mem_start & mem_end (deprecated)
3) Sam Ravnborg
- lg-objs is deprecated, use lg-y.

Cheers,
Rusty.


2007-05-11 01:19:36

by Rusty Russell

[permalink] [raw]
Subject: [PATCH 1/5] lguest host feedback tidyups

1) Sam Ravnborg says lg-objs is deprecated, use lg-y.
2) Sparse: page_tables.c unnecessary initialization
3) Lots of __force to shut sparse up: guest "physical" addresses are
userspace virtual.
4) Change prototype of run_lguest and do cast in caller instead (when we add
__iomem to cast, it runs over another line).

Signed-off-by: Rusty Russell <[email protected]>
---
drivers/lguest/Makefile | 2 +-
drivers/lguest/core.c | 16 ++++++++--------
drivers/lguest/hypercalls.c | 3 ++-
drivers/lguest/interrupts_and_traps.c | 4 ++--
drivers/lguest/lg.h | 2 +-
drivers/lguest/lguest_user.c | 2 +-
drivers/lguest/page_tables.c | 2 +-
7 files changed, 16 insertions(+), 15 deletions(-)

===================================================================
--- a/drivers/lguest/Makefile
+++ b/drivers/lguest/Makefile
@@ -3,5 +3,5 @@ obj-$(CONFIG_LGUEST_GUEST) += lguest.o l

# Host requires the other files, which can be a module.
obj-$(CONFIG_LGUEST) += lg.o
-lg-objs := core.o hypercalls.o page_tables.o interrupts_and_traps.o \
+lg-y := core.o hypercalls.o page_tables.o interrupts_and_traps.o \
segments.o io.o lguest_user.o switcher.o
===================================================================
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c
@@ -218,7 +218,7 @@ u32 lgread_u32(struct lguest *lg, u32 ad

/* Don't let them access lguest binary */
if (!lguest_address_ok(lg, addr, sizeof(val))
- || get_user(val, (u32 __user *)addr) != 0)
+ || get_user(val, (__force u32 __user *)addr) != 0)
kill_guest(lg, "bad read address %u", addr);
return val;
}
@@ -226,14 +226,14 @@ void lgwrite_u32(struct lguest *lg, u32
void lgwrite_u32(struct lguest *lg, u32 addr, u32 val)
{
if (!lguest_address_ok(lg, addr, sizeof(val))
- || put_user(val, (u32 __user *)addr) != 0)
+ || put_user(val, (__force u32 __user *)addr) != 0)
kill_guest(lg, "bad write address %u", addr);
}

void lgread(struct lguest *lg, void *b, u32 addr, unsigned bytes)
{
if (!lguest_address_ok(lg, addr, bytes)
- || copy_from_user(b, (void __user *)addr, bytes) != 0) {
+ || copy_from_user(b, (__force void __user *)addr, bytes) != 0) {
/* copy_from_user should do this, but as we rely on it... */
memset(b, 0, bytes);
kill_guest(lg, "bad read address %u len %u", addr, bytes);
@@ -243,7 +243,7 @@ void lgwrite(struct lguest *lg, u32 addr
void lgwrite(struct lguest *lg, u32 addr, const void *b, unsigned bytes)
{
if (!lguest_address_ok(lg, addr, bytes)
- || copy_to_user((void __user *)addr, b, bytes) != 0)
+ || copy_to_user((__force void __user *)addr, b, bytes) != 0)
kill_guest(lg, "bad write address %u len %u", addr, bytes);
}

@@ -294,7 +294,7 @@ static void run_guest_once(struct lguest
: "memory", "%edx", "%ecx", "%edi", "%esi");
}

-int run_guest(struct lguest *lg, char *__user user)
+int run_guest(struct lguest *lg, unsigned long __user *user)
{
while (!lg->dead) {
unsigned int cr2 = 0; /* Damn gcc */
@@ -302,8 +302,8 @@ int run_guest(struct lguest *lg, char *_
/* Hypercalls first: we might have been out to userspace */
do_hypercalls(lg);
if (lg->dma_is_pending) {
- if (put_user(lg->pending_dma, (unsigned long *)user) ||
- put_user(lg->pending_key, (unsigned long *)user+1))
+ if (put_user(lg->pending_dma, user) ||
+ put_user(lg->pending_key, user+1))
return -EFAULT;
return sizeof(unsigned long)*2;
}
@@ -420,7 +420,7 @@ static int __init init(void)
lock_cpu_hotplug();
if (cpu_has_pge) { /* We have a broader idea of "global". */
cpu_had_pge = 1;
- on_each_cpu(adjust_pge, 0, 0, 1);
+ on_each_cpu(adjust_pge, (void *)0, 0, 1);
clear_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability);
}
unlock_cpu_hotplug();
===================================================================
--- a/drivers/lguest/hypercalls.c
+++ b/drivers/lguest/hypercalls.c
@@ -83,7 +83,8 @@ static void do_hcall(struct lguest *lg,
guest_set_pmd(lg, regs->edx, regs->ebx);
break;
case LHCALL_LOAD_TLS:
- guest_load_tls(lg, (struct desc_struct __user*)regs->edx);
+ guest_load_tls(lg,
+ (__force struct desc_struct __user*)regs->edx);
break;
case LHCALL_TS:
lg->ts = regs->edx;
===================================================================
--- a/drivers/lguest/interrupts_and_traps.c
+++ b/drivers/lguest/interrupts_and_traps.c
@@ -18,7 +18,7 @@ static int idt_present(u32 lo, u32 hi)

static void push_guest_stack(struct lguest *lg, u32 __user **gstack, u32 val)
{
- lgwrite_u32(lg, (u32)--(*gstack), val);
+ lgwrite_u32(lg, (__force u32)--(*gstack), val);
}

static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err)
@@ -53,7 +53,7 @@ static void set_guest_interrupt(struct l

/* Change the real stack so switcher returns to trap handler */
lg->regs->ss = ss;
- lg->regs->esp = (u32)gstack + lg->page_offset;
+ lg->regs->esp = (__force u32)gstack + lg->page_offset;
lg->regs->cs = (__KERNEL_CS|GUEST_PL);
lg->regs->eip = idt_address(lo, hi);

===================================================================
--- a/drivers/lguest/lg.h
+++ b/drivers/lguest/lg.h
@@ -182,7 +182,7 @@ int find_free_guest(void);
int find_free_guest(void);
int lguest_address_ok(const struct lguest *lg,
unsigned long addr, unsigned long len);
-int run_guest(struct lguest *lg, char *__user user);
+int run_guest(struct lguest *lg, unsigned long __user *user);


/* interrupts_and_traps.c: */
===================================================================
--- a/drivers/lguest/lguest_user.c
+++ b/drivers/lguest/lguest_user.c
@@ -65,7 +65,7 @@ static ssize_t read(struct file *file, c
if (lg->dma_is_pending)
lg->dma_is_pending = 0;

- return run_guest(lg, user);
+ return run_guest(lg, (unsigned long __user *)user);
}

/* Take: pfnlimit, pgdir, start, pageoffset. */
===================================================================
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
@@ -13,7 +13,7 @@
#define PTES_PER_PAGE (1 << PTES_PER_PAGE_SHIFT)
#define SWITCHER_PGD_INDEX (PTES_PER_PAGE - 1)

-static DEFINE_PER_CPU(spte_t *, switcher_pte_pages) = { NULL };
+static DEFINE_PER_CPU(spte_t *, switcher_pte_pages);
#define switcher_pte_page(cpu) per_cpu(switcher_pte_pages, cpu)

static unsigned vaddr_to_pgd_index(unsigned long vaddr)


2007-05-11 01:21:53

by Rusty Russell

[permalink] [raw]
Subject: [PATCH 2/5] lguest guest feedback tidyups

1) send-dma and bind-dma hypercall wrappers for drivers to use,
2) formalization of the convention that devices can use the irq
corresponding to their index on the lguest_bus.
3) ___force to shut up sparse: guests *can* use ioremap as virtual mem.
4) lguest.c should include "lguest_bus.h" for lguest_devices declaration.

Signed-off-by: Rusty Russell <[email protected]>
---
drivers/lguest/lguest.c | 20 ++++++++++++++++++++
drivers/lguest/lguest_bus.c | 2 +-
include/linux/lguest_bus.h | 13 ++++++++++++-
3 files changed, 33 insertions(+), 2 deletions(-)

===================================================================
--- a/include/linux/lguest_bus.h
+++ b/include/linux/lguest_bus.h
@@ -7,7 +7,6 @@

struct lguest_device {
/* Unique busid, and index into lguest_page->devices[] */
- /* By convention, each device can use irq index+1 if it wants to. */
unsigned int index;

struct device dev;
@@ -15,6 +14,18 @@ struct lguest_device {
/* Driver can hang data off here. */
void *private;
};
+
+/* By convention, each device can use irq index+1 if it wants to. */
+static inline int lgdev_irq(const struct lguest_device *dev)
+{
+ return dev->index + 1;
+}
+
+/* dma args must not be vmalloced! */
+void lguest_send_dma(unsigned long key, struct lguest_dma *dma);
+int lguest_bind_dma(unsigned long key, struct lguest_dma *dmas,
+ unsigned int num, u8 irq);
+void lguest_unbind_dma(unsigned long key, struct lguest_dma *dmas);

struct lguest_driver {
const char *name;
===================================================================
--- a/drivers/lguest/lguest.c
+++ b/drivers/lguest/lguest.c
@@ -27,6 +27,7 @@
#include <linux/interrupt.h>
#include <linux/lguest.h>
#include <linux/lguest_launcher.h>
+#include <linux/lguest_bus.h>
#include <asm/paravirt.h>
#include <asm/param.h>
#include <asm/page.h>
@@ -99,6 +100,25 @@ void async_hcall(unsigned long call,
next_call = 0;
}
local_irq_restore(flags);
+}
+
+void lguest_send_dma(unsigned long key, struct lguest_dma *dma)
+{
+ dma->used_len = 0;
+ hcall(LHCALL_SEND_DMA, key, __pa(dma), 0);
+}
+
+int lguest_bind_dma(unsigned long key, struct lguest_dma *dmas,
+ unsigned int num, u8 irq)
+{
+ if (!hcall(LHCALL_BIND_DMA, key, __pa(dmas), (num << 8) | irq))
+ return -ENOMEM;
+ return 0;
+}
+
+void lguest_unbind_dma(unsigned long key, struct lguest_dma *dmas)
+{
+ hcall(LHCALL_BIND_DMA, key, __pa(dmas), 0);
}

static unsigned long save_fl(void)
===================================================================
--- a/drivers/lguest/lguest_bus.c
+++ b/drivers/lguest/lguest_bus.c
@@ -136,7 +136,7 @@ static int __init lguest_bus_init(void)
return 0;

/* Devices are in page above top of "normal" mem. */
- lguest_devices = ioremap(max_pfn << PAGE_SHIFT, PAGE_SIZE);
+ lguest_devices = (__force void*)ioremap(max_pfn<<PAGE_SHIFT,PAGE_SIZE);

if (bus_register(&lguest_bus.bus) != 0
|| device_register(&lguest_bus.dev) != 0)


2007-05-11 01:23:18

by Rusty Russell

[permalink] [raw]
Subject: [PATCH 3/5] lguest network driver feedback tidyups

Feedback from Jeff Garzik:
1) Use netdev_priv instead of dev->priv.
2) Check for ioremap failure
3) iounmap on failure.
4) Wrap SEND_DMA and BIND_DMA calls
5) Don't set NETIF_F_SG unless we set NETIF_F_NO_CSUM
6) Use SET_NETDEV_DEV()
7) Don't set dev->irq, mem_start & mem_end (deprecated)

Sparse warnings:
8) __force the ioremap: guests can use it as normal memory.

Signed-off-by: Rusty Russell <[email protected]>
---
drivers/net/lguest_net.c | 53 ++++++++++++++++++++++++++--------------------
1 file changed, 31 insertions(+), 22 deletions(-)

===================================================================
--- a/drivers/net/lguest_net.c
+++ b/drivers/net/lguest_net.c
@@ -35,6 +35,9 @@ struct lguestnet_info
unsigned long peer_phys;
unsigned long mapsize;

+ /* The lguest_device I come from */
+ struct lguest_device *lgdev;
+
/* My peerid. */
unsigned int me;

@@ -84,7 +87,7 @@ static void skb_to_dma(const struct sk_b

static void lguestnet_set_multicast(struct net_device *dev)
{
- struct lguestnet_info *info = dev->priv;
+ struct lguestnet_info *info = netdev_priv(dev);

if ((dev->flags & (IFF_PROMISC|IFF_ALLMULTI)) || dev->mc_count)
info->peer[info->me].mac[0] |= PROMISC_BIT;
@@ -110,13 +113,13 @@ static void transfer_packet(struct net_d
struct sk_buff *skb,
unsigned int peernum)
{
- struct lguestnet_info *info = dev->priv;
+ struct lguestnet_info *info = netdev_priv(dev);
struct lguest_dma dma;

skb_to_dma(skb, skb_headlen(skb), &dma);
pr_debug("xfer length %04x (%u)\n", htons(skb->len), skb->len);

- hcall(LHCALL_SEND_DMA, peer_key(info,peernum), __pa(&dma), 0);
+ lguest_send_dma(peer_key(info, peernum), &dma);
if (dma.used_len != skb->len) {
dev->stats.tx_carrier_errors++;
pr_debug("Bad xfer to peer %i: %i of %i (dma %p/%i)\n",
@@ -137,7 +140,7 @@ static int lguestnet_start_xmit(struct s
{
unsigned int i;
int broadcast;
- struct lguestnet_info *info = dev->priv;
+ struct lguestnet_info *info = netdev_priv(dev);
const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;

pr_debug("%s: xmit %02x:%02x:%02x:%02x:%02x:%02x\n",
@@ -162,7 +165,7 @@ static int lguestnet_start_xmit(struct s
/* Find a new skb to put in this slot in shared mem. */
static int fill_slot(struct net_device *dev, unsigned int slot)
{
- struct lguestnet_info *info = dev->priv;
+ struct lguestnet_info *info = netdev_priv(dev);
/* Try to create and register a new one. */
info->skb[slot] = netdev_alloc_skb(dev, ETH_HLEN + ETH_DATA_LEN);
if (!info->skb[slot]) {
@@ -180,7 +183,7 @@ static irqreturn_t lguestnet_rcv(int irq
static irqreturn_t lguestnet_rcv(int irq, void *dev_id)
{
struct net_device *dev = dev_id;
- struct lguestnet_info *info = dev->priv;
+ struct lguestnet_info *info = netdev_priv(dev);
unsigned int i, done = 0;

for (i = 0; i < ARRAY_SIZE(info->dma); i++) {
@@ -220,7 +223,7 @@ static int lguestnet_open(struct net_dev
static int lguestnet_open(struct net_device *dev)
{
int i;
- struct lguestnet_info *info = dev->priv;
+ struct lguestnet_info *info = netdev_priv(dev);

/* Set up our MAC address */
memcpy(info->peer[info->me].mac, dev->dev_addr, ETH_ALEN);
@@ -232,8 +235,8 @@ static int lguestnet_open(struct net_dev
if (fill_slot(dev, i) != 0)
goto cleanup;
}
- if (!hcall(LHCALL_BIND_DMA, peer_key(info, info->me), __pa(info->dma),
- (NUM_SKBS << 8) | dev->irq))
+ if (lguest_bind_dma(peer_key(info,info->me), info->dma,
+ NUM_SKBS, lgdev_irq(info->lgdev)) != 0)
goto cleanup;
return 0;

@@ -246,13 +249,13 @@ static int lguestnet_close(struct net_de
static int lguestnet_close(struct net_device *dev)
{
unsigned int i;
- struct lguestnet_info *info = dev->priv;
+ struct lguestnet_info *info = netdev_priv(dev);

/* Clear all trace: others might deliver packets, we'll ignore it. */
memset(&info->peer[info->me], 0, sizeof(info->peer[info->me]));

/* Deregister sg lists. */
- hcall(LHCALL_BIND_DMA, peer_key(info, info->me), __pa(info->dma), 0);
+ lguest_unbind_dma(peer_key(info, info->me), info->dma);
for (i = 0; i < ARRAY_SIZE(info->dma); i++)
dev_kfree_skb(info->skb[i]);
return 0;
@@ -290,30 +293,34 @@ static int lguestnet_probe(struct lguest
/* Turning on/off promisc will call dev->set_multicast_list.
* We don't actually support multicast yet */
dev->set_multicast_list = lguestnet_set_multicast;
- dev->mem_start = ((unsigned long)desc->pfn << PAGE_SHIFT);
- dev->mem_end = dev->mem_start + PAGE_SIZE * desc->num_pages;
- dev->irq = lgdev->index+1;
- dev->features = NETIF_F_SG;
+ SET_NETDEV_DEV(dev, &lgdev->dev);
if (desc->features & LGUEST_NET_F_NOCSUM)
- dev->features |= NETIF_F_NO_CSUM;
-
- info = dev->priv;
+ dev->features = NETIF_F_SG|NETIF_F_NO_CSUM;
+
+ info = netdev_priv(dev);
info->mapsize = PAGE_SIZE * desc->num_pages;
info->peer_phys = ((unsigned long)desc->pfn << PAGE_SHIFT);
- info->peer = (void *)ioremap(info->peer_phys, info->mapsize);
+ info->lgdev = lgdev;
+ info->peer = (__force void *)ioremap(info->peer_phys, info->mapsize);
+ if (!info->peer) {
+ err = -ENOMEM;
+ goto free;
+ }
+
/* This stores our peerid (upper bits reserved for future). */
info->me = (desc->features & (info->mapsize-1));

err = register_netdev(dev);
if (err) {
pr_debug("lguestnet: registering device failed\n");
- goto free;
+ goto unmap;
}

if (lguest_devices[lgdev->index].features & LGUEST_DEVICE_F_RANDOMNESS)
irqf |= IRQF_SAMPLE_RANDOM;
- if (request_irq(dev->irq, lguestnet_rcv, irqf, "lguestnet", dev) != 0) {
- pr_debug("lguestnet: could not get net irq %i\n", dev->irq);
+ if (request_irq(lgdev_irq(lgdev), lguestnet_rcv, irqf, "lguestnet",
+ dev) != 0) {
+ pr_debug("lguestnet: cannot get irq %i\n", lgdev_irq(lgdev));
goto unregister;
}

@@ -323,6 +330,8 @@ static int lguestnet_probe(struct lguest

unregister:
unregister_netdev(dev);
+unmap:
+ iounmap((__force void __iomem *)info->peer);
free:
free_netdev(dev);
return err;


2007-05-11 01:24:19

by Rusty Russell

[permalink] [raw]
Subject: [PATCH 4/5] lguest block driver feedback tidyups

1) Use new dma wrapper functions, and handle bind failure (may happen
in future)
2) Use new lgdev_irq() "get me a good interrupt number" function.
3) __force the ioremap: guests can use it as normal memory.

Signed-off-by: Rusty Russell <[email protected]>
---
drivers/block/lguest_blk.c | 16 ++++++++++------
1 file changed, 10 insertions(+), 6 deletions(-)

===================================================================
--- a/drivers/block/lguest_blk.c
+++ b/drivers/block/lguest_blk.c
@@ -123,7 +123,7 @@ static void do_write(struct blockdev *bd
pr_debug("lgb: WRITE sector %li\n", (long)req->sector);
setup_req(bd, 1, req, &send);

- hcall(LHCALL_SEND_DMA, bd->phys_addr, __pa(&send), 0);
+ lguest_send_dma(bd->phys_addr, &send);
}

static void do_read(struct blockdev *bd, struct request *req)
@@ -134,7 +134,7 @@ static void do_read(struct blockdev *bd,
setup_req(bd, 0, req, &bd->dma);

empty_dma(&ping);
- hcall(LHCALL_SEND_DMA, bd->phys_addr, __pa(&ping), 0);
+ lguest_send_dma(bd->phys_addr, &ping);
}

static void do_lgb_request(request_queue_t *q)
@@ -183,13 +183,13 @@ static int lguestblk_probe(struct lguest
return -ENOMEM;

spin_lock_init(&bd->lock);
- bd->irq = lgdev->index+1;
+ bd->irq = lgdev_irq(lgdev);
bd->req = NULL;
bd->dma.used_len = 0;
bd->dma.len[0] = 0;
bd->phys_addr = (lguest_devices[lgdev->index].pfn << PAGE_SHIFT);

- bd->lb_page = (void *)ioremap(bd->phys_addr, PAGE_SIZE);
+ bd->lb_page = (__force void *)ioremap(bd->phys_addr, PAGE_SIZE);
if (!bd->lb_page) {
err = -ENOMEM;
goto out_free_bd;
@@ -225,7 +225,9 @@ static int lguestblk_probe(struct lguest
if (err)
goto out_cleanup_queue;

- hcall(LHCALL_BIND_DMA, bd->phys_addr, __pa(&bd->dma), (1<<8)+bd->irq);
+ err = lguest_bind_dma(bd->phys_addr, &bd->dma, 1, bd->irq);
+ if (err)
+ goto out_free_irq;

bd->disk->major = bd->major;
bd->disk->first_minor = 0;
@@ -241,6 +243,8 @@ static int lguestblk_probe(struct lguest
lgdev->private = bd;
return 0;

+out_free_irq:
+ free_irq(bd->irq, bd);
out_cleanup_queue:
blk_cleanup_queue(bd->disk->queue);
out_put_disk:
@@ -248,7 +252,7 @@ out_unregister_blkdev:
out_unregister_blkdev:
unregister_blkdev(bd->major, "lguestblk");
out_unmap:
- iounmap(bd->lb_page);
+ iounmap((__force void *__iomem)bd->lb_page);
out_free_bd:
kfree(bd);
return err;


2007-05-11 01:25:12

by Rusty Russell

[permalink] [raw]
Subject: [PATCH 5/5] lguest console driver feedback tidyups

1) Use new lguest_send_dma & lguest_bind_dma functions.
2) sparse: lguest_cons can be static.

Signed-off-by: Rusty Russell <[email protected]>
---
drivers/char/hvc_lguest.c | 15 +++++++++------
1 file changed, 9 insertions(+), 6 deletions(-)

===================================================================
--- a/drivers/char/hvc_lguest.c
+++ b/drivers/char/hvc_lguest.c
@@ -36,7 +36,7 @@ static int put_chars(u32 vtermno, const
dma.len[1] = 0;
dma.addr[0] = __pa(buf);

- hcall(LHCALL_SEND_DMA, LGUEST_CONSOLE_DMA_KEY, __pa(&dma), 0);
+ lguest_send_dma(LGUEST_CONSOLE_DMA_KEY, &dma);
return count;
}

@@ -59,7 +59,7 @@ static int get_chars(u32 vtermno, char *
return count;
}

-struct hv_ops lguest_cons = {
+static struct hv_ops lguest_cons = {
.get_chars = get_chars,
.put_chars = put_chars,
};
@@ -75,14 +75,17 @@ console_initcall(cons_init);

static int lguestcons_probe(struct lguest_device *lgdev)
{
- lgdev->private = hvc_alloc(0, lgdev->index+1, &lguest_cons, 256);
+ int err;
+
+ lgdev->private = hvc_alloc(0, lgdev_irq(lgdev), &lguest_cons, 256);
if (IS_ERR(lgdev->private))
return PTR_ERR(lgdev->private);

- if (!hcall(LHCALL_BIND_DMA, LGUEST_CONSOLE_DMA_KEY, __pa(&cons_input),
- (1<<8) + lgdev->index+1))
+ err = lguest_bind_dma(LGUEST_CONSOLE_DMA_KEY, &cons_input, 1,
+ lgdev_irq(lgdev));
+ if (err)
printk("lguest console: failed to bind buffer.\n");
- return 0;
+ return err;
}

static struct lguest_driver lguestcons_drv = {


2007-05-11 06:54:20

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 0/5] lguest feedback tidyups

On Fri, May 11, 2007 at 11:17:26AM +1000, Rusty Russell wrote:
> - But the cost was high: lots of __force casts 8(

That sounds like something is very fishy.

2007-05-11 06:56:25

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 2/5] lguest guest feedback tidyups

On Fri, May 11, 2007 at 11:21:30AM +1000, Rusty Russell wrote:
> 1) send-dma and bind-dma hypercall wrappers for drivers to use,
> 2) formalization of the convention that devices can use the irq
> corresponding to their index on the lguest_bus.
> 3) ___force to shut up sparse: guests *can* use ioremap as virtual mem.

No, they can't. Even if in your case the underlying address spaces
happen to be the same anything returned by ioremap must use the proper
accessors. That's the whole point of having this separation, otherwise
you wouldn't need to use ioremap at all. So instead of sprinkling cast
around add lguest_read*/lguest_write* accessors that do the __force cast
once and make sure the ioremap return value is always accessed using those.

2007-05-11 07:31:31

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH 2/5] lguest guest feedback tidyups

On Fri, 2007-05-11 at 07:56 +0100, Christoph Hellwig wrote:
> On Fri, May 11, 2007 at 11:21:30AM +1000, Rusty Russell wrote:
> > 1) send-dma and bind-dma hypercall wrappers for drivers to use,
> > 2) formalization of the convention that devices can use the irq
> > corresponding to their index on the lguest_bus.
> > 3) ___force to shut up sparse: guests *can* use ioremap as virtual mem.
>
> No, they can't. Even if in your case the underlying address spaces
> happen to be the same anything returned by ioremap must use the proper
> accessors. That's the whole point of having this separation, otherwise
> you wouldn't need to use ioremap at all.

Hi Christoph!

Well, without ioremap, the memory wouldn't normally be mapped. Is
there something better to use?

> So instead of sprinkling cast
> around add lguest_read*/lguest_write* accessors that do the __force cast
> once and make sure the ioremap return value is always accessed using those.

And that's nothing to do with iremap. They're required because guest
"physical" == host virtual, and casting a long to a "__user void *"
seems to require a __force.

I enjoy a good Hellwigging as much as anyone, but your aim is off.

Cheers,
Rusty.

2007-05-11 07:41:07

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 2/5] lguest guest feedback tidyups

On Fri, May 11, 2007 at 05:31:06PM +1000, Rusty Russell wrote:
> Well, without ioremap, the memory wouldn't normally be mapped. Is
> there something better to use?

Either use accessors or use your own lguest-specific remapping function that
doesn't return __iomem function

> > So instead of sprinkling cast
> > around add lguest_read*/lguest_write* accessors that do the __force cast
> > once and make sure the ioremap return value is always accessed using those.
>
> And that's nothing to do with iremap. They're required because guest
> "physical" == host virtual, and casting a long to a "__user void *"
> seems to require a __force.

Well, it's the same problem really. You want to treat it as host virtual
in some places and and guest physical in others, but you need to keep
the abstraction clean. To keep that absraction clean you introduce
accessors that contain the __force cast. Now that you have these accessors
instead of random casts you need to think a bit where the host virtual
abstraction makes more sense and were the the guest virtual abstraction
makes more sense and use it consistantly there with as few as possible
uses of the accessors in between.

Now to something different than the technical content of this mail:

> Hi Christoph!

> I enjoy a good Hellwigging as much as anyone, but your aim is off.

Please stop this crap. I know who I am, so there's no need to waste
mail estate with saying Hi. Also please stop being a total fuckass
and abusing my lastname just because you didn't get what was in the
mail.

2007-05-13 01:01:20

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH 2/5] lguest guest feedback tidyups

On Fri, 2007-05-11 at 08:40 +0100, Christoph Hellwig wrote:
> On Fri, May 11, 2007 at 05:31:06PM +1000, Rusty Russell wrote:
> > Hi Christoph!
>
> > I enjoy a good Hellwigging as much as anyone, but your aim is off.
>
> Please stop this crap. I know who I am, so there's no need to waste
> mail estate with saying Hi. Also please stop being a total fuckass
> and abusing my lastname just because you didn't get what was in the
> mail.

On the contrary, I was honestly trying to being polite. Perhaps I
failed.

You made a mistake. None of us like to be corrected, so I did it as
gently as I could. But you were right about one thing: describing your
previous mail as "a good Hellwigging" was inappropriate.

This latest mail... *that's* good Hellwigging!
Rusty.

2007-05-13 03:49:18

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH 2/5] lguest guest feedback tidyups

On Fri, 2007-05-11 at 07:56 +0100, Christoph Hellwig wrote:
> On Fri, May 11, 2007 at 11:21:30AM +1000, Rusty Russell wrote:
> > 1) send-dma and bind-dma hypercall wrappers for drivers to use,
> > 2) formalization of the convention that devices can use the irq
> > corresponding to their index on the lguest_bus.
> > 3) ___force to shut up sparse: guests *can* use ioremap as virtual mem.
>
> No, they can't.

OK, here's the updated patch (lguest_map/lguest_unmap).

==
Jeff Garzik argued forcefully that __pa() should not appear in
drivers, and that struct netdevice's irq field should not be used.
Christoph Hellwig suggested that I run sparse, and provide an
lguest-specific wrapper for mapping/unmapping virtual device memory.

Results:
1) send-dma and bind-dma hypercall wrappers for drivers to use,
2) formalization of the convention that devices can use the irq
corresponding to their index on the lguest_bus.
3) Helper to map and unmap virtual device memory (not classic __iomem).
4) lguest.c should include "lguest_bus.h" for lguest_devices declaration.

Signed-off-by: Rusty Russell <[email protected]>
---
drivers/lguest/lguest.c | 33 +++++++++++++++++++++++++++++++++
drivers/lguest/lguest_bus.c | 2 +-
include/linux/lguest_bus.h | 17 ++++++++++++++++-
3 files changed, 50 insertions(+), 2 deletions(-)

===================================================================
--- a/drivers/lguest/lguest.c
+++ b/drivers/lguest/lguest.c
@@ -27,6 +27,7 @@
#include <linux/interrupt.h>
#include <linux/lguest.h>
#include <linux/lguest_launcher.h>
+#include <linux/lguest_bus.h>
#include <asm/paravirt.h>
#include <asm/param.h>
#include <asm/page.h>
@@ -35,6 +36,7 @@
#include <asm/setup.h>
#include <asm/e820.h>
#include <asm/mce.h>
+#include <asm/io.h>

/* Declarations for definitions in lguest_guest.S */
extern char lguest_noirq_start[], lguest_noirq_end[];
@@ -99,6 +101,37 @@ void async_hcall(unsigned long call,
next_call = 0;
}
local_irq_restore(flags);
+}
+
+void lguest_send_dma(unsigned long key, struct lguest_dma *dma)
+{
+ dma->used_len = 0;
+ hcall(LHCALL_SEND_DMA, key, __pa(dma), 0);
+}
+
+int lguest_bind_dma(unsigned long key, struct lguest_dma *dmas,
+ unsigned int num, u8 irq)
+{
+ if (!hcall(LHCALL_BIND_DMA, key, __pa(dmas), (num << 8) | irq))
+ return -ENOMEM;
+ return 0;
+}
+
+void lguest_unbind_dma(unsigned long key, struct lguest_dma *dmas)
+{
+ hcall(LHCALL_BIND_DMA, key, __pa(dmas), 0);
+}
+
+/* For guests, device memory can be used as normal memory, so we cast away the
+ * __iomem to quieten sparse. */
+void *lguest_map(unsigned long phys_addr, unsigned long pages)
+{
+ return (__force void *)ioremap(phys_addr, PAGE_SIZE*pages);
+}
+
+void lguest_unmap(void *addr)
+{
+ iounmap((__force void __iomem *)addr);
}

static unsigned long save_fl(void)
===================================================================
--- a/drivers/lguest/lguest_bus.c
+++ b/drivers/lguest/lguest_bus.c
@@ -136,7 +136,7 @@ static int __init lguest_bus_init(void)
return 0;

/* Devices are in page above top of "normal" mem. */
- lguest_devices = ioremap(max_pfn << PAGE_SHIFT, PAGE_SIZE);
+ lguest_devices = lguest_map(max_pfn<<PAGE_SHIFT, 1);

if (bus_register(&lguest_bus.bus) != 0
|| device_register(&lguest_bus.dev) != 0)
===================================================================
--- a/include/linux/lguest_bus.h
+++ b/include/linux/lguest_bus.h
@@ -7,7 +7,6 @@

struct lguest_device {
/* Unique busid, and index into lguest_page->devices[] */
- /* By convention, each device can use irq index+1 if it wants to. */
unsigned int index;

struct device dev;
@@ -15,6 +14,22 @@ struct lguest_device {
/* Driver can hang data off here. */
void *private;
};
+
+/* By convention, each device can use irq index+1 if it wants to. */
+static inline int lgdev_irq(const struct lguest_device *dev)
+{
+ return dev->index + 1;
+}
+
+/* dma args must not be vmalloced! */
+void lguest_send_dma(unsigned long key, struct lguest_dma *dma);
+int lguest_bind_dma(unsigned long key, struct lguest_dma *dmas,
+ unsigned int num, u8 irq);
+void lguest_unbind_dma(unsigned long key, struct lguest_dma *dmas);
+
+/* Map the virtual device space */
+void *lguest_map(unsigned long phys_addr, unsigned long pages);
+void lguest_unmap(void *);

struct lguest_driver {
const char *name;


2007-05-13 03:52:24

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH 3/5] lguest network driver feedback tidyups

On Fri, 2007-05-11 at 11:22 +1000, Rusty Russell wrote:
> Feedback from Jeff Garzik:
> 1) Use netdev_priv instead of dev->priv.
> 2) Check for ioremap failure
> 3) iounmap on failure.
> 4) Wrap SEND_DMA and BIND_DMA calls
> 5) Don't set NETIF_F_SG unless we set NETIF_F_NO_CSUM
> 6) Use SET_NETDEV_DEV()
> 7) Don't set dev->irq, mem_start & mem_end (deprecated)
>
> Sparse warnings:
> 8) __force the ioremap: guests can use it as normal memory.

And here's the update which uses lguest_map/lguest_unmap() instead of
__force:

Feedback from Jeff Garzik:
1) Use netdev_priv instead of dev->priv.
2) Check for ioremap failure
3) iounmap on failure.
4) Wrap SEND_DMA and BIND_DMA calls
5) Don't set NETIF_F_SG unless we set NETIF_F_NO_CSUM
6) Use SET_NETDEV_DEV()
7) Don't set dev->irq, mem_start & mem_end (deprecated)

Feedback from Chrisoph Hellwig:
8) Use lguest_map()/lguest_unmap() helpers instead of ioremap/iounmap.

Signed-off-by: Rusty Russell <[email protected]>
---
drivers/net/lguest_net.c | 56 ++++++++++++++++++++++++++--------------------
1 file changed, 32 insertions(+), 24 deletions(-)

===================================================================
--- a/drivers/net/lguest_net.c
+++ b/drivers/net/lguest_net.c
@@ -22,7 +22,6 @@
#include <linux/module.h>
#include <linux/mm_types.h>
#include <linux/lguest_bus.h>
-#include <asm/io.h>

#define SHARED_SIZE PAGE_SIZE
#define MAX_LANS 4
@@ -34,6 +33,9 @@ struct lguestnet_info
struct lguest_net *peer;
unsigned long peer_phys;
unsigned long mapsize;
+
+ /* The lguest_device I come from */
+ struct lguest_device *lgdev;

/* My peerid. */
unsigned int me;
@@ -84,7 +86,7 @@ static void skb_to_dma(const struct sk_b

static void lguestnet_set_multicast(struct net_device *dev)
{
- struct lguestnet_info *info = dev->priv;
+ struct lguestnet_info *info = netdev_priv(dev);

if ((dev->flags & (IFF_PROMISC|IFF_ALLMULTI)) || dev->mc_count)
info->peer[info->me].mac[0] |= PROMISC_BIT;
@@ -110,16 +112,16 @@ static void transfer_packet(struct net_d
struct sk_buff *skb,
unsigned int peernum)
{
- struct lguestnet_info *info = dev->priv;
+ struct lguestnet_info *info = netdev_priv(dev);
struct lguest_dma dma;

skb_to_dma(skb, skb_headlen(skb), &dma);
pr_debug("xfer length %04x (%u)\n", htons(skb->len), skb->len);

- hcall(LHCALL_SEND_DMA, peer_key(info,peernum), __pa(&dma), 0);
+ lguest_send_dma(peer_key(info, peernum), &dma);
if (dma.used_len != skb->len) {
dev->stats.tx_carrier_errors++;
- pr_debug("Bad xfer to peer %i: %i of %i (dma %p/%i)\n",
+ pr_debug("Bad xfer to peer %i: %li of %i (dma %p/%i)\n",
peernum, dma.used_len, skb->len,
(void *)dma.addr[0], dma.len[0]);
} else {
@@ -137,7 +139,7 @@ static int lguestnet_start_xmit(struct s
{
unsigned int i;
int broadcast;
- struct lguestnet_info *info = dev->priv;
+ struct lguestnet_info *info = netdev_priv(dev);
const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;

pr_debug("%s: xmit %02x:%02x:%02x:%02x:%02x:%02x\n",
@@ -162,7 +164,7 @@ static int lguestnet_start_xmit(struct s
/* Find a new skb to put in this slot in shared mem. */
static int fill_slot(struct net_device *dev, unsigned int slot)
{
- struct lguestnet_info *info = dev->priv;
+ struct lguestnet_info *info = netdev_priv(dev);
/* Try to create and register a new one. */
info->skb[slot] = netdev_alloc_skb(dev, ETH_HLEN + ETH_DATA_LEN);
if (!info->skb[slot]) {
@@ -180,7 +182,7 @@ static irqreturn_t lguestnet_rcv(int irq
static irqreturn_t lguestnet_rcv(int irq, void *dev_id)
{
struct net_device *dev = dev_id;
- struct lguestnet_info *info = dev->priv;
+ struct lguestnet_info *info = netdev_priv(dev);
unsigned int i, done = 0;

for (i = 0; i < ARRAY_SIZE(info->dma); i++) {
@@ -220,7 +222,7 @@ static int lguestnet_open(struct net_dev
static int lguestnet_open(struct net_device *dev)
{
int i;
- struct lguestnet_info *info = dev->priv;
+ struct lguestnet_info *info = netdev_priv(dev);

/* Set up our MAC address */
memcpy(info->peer[info->me].mac, dev->dev_addr, ETH_ALEN);
@@ -232,8 +234,8 @@ static int lguestnet_open(struct net_dev
if (fill_slot(dev, i) != 0)
goto cleanup;
}
- if (!hcall(LHCALL_BIND_DMA, peer_key(info, info->me), __pa(info->dma),
- (NUM_SKBS << 8) | dev->irq))
+ if (lguest_bind_dma(peer_key(info,info->me), info->dma,
+ NUM_SKBS, lgdev_irq(info->lgdev)) != 0)
goto cleanup;
return 0;

@@ -246,13 +248,13 @@ static int lguestnet_close(struct net_de
static int lguestnet_close(struct net_device *dev)
{
unsigned int i;
- struct lguestnet_info *info = dev->priv;
+ struct lguestnet_info *info = netdev_priv(dev);

/* Clear all trace: others might deliver packets, we'll ignore it. */
memset(&info->peer[info->me], 0, sizeof(info->peer[info->me]));

/* Deregister sg lists. */
- hcall(LHCALL_BIND_DMA, peer_key(info, info->me), __pa(info->dma), 0);
+ lguest_unbind_dma(peer_key(info, info->me), info->dma);
for (i = 0; i < ARRAY_SIZE(info->dma); i++)
dev_kfree_skb(info->skb[i]);
return 0;
@@ -290,30 +292,34 @@ static int lguestnet_probe(struct lguest
/* Turning on/off promisc will call dev->set_multicast_list.
* We don't actually support multicast yet */
dev->set_multicast_list = lguestnet_set_multicast;
- dev->mem_start = ((unsigned long)desc->pfn << PAGE_SHIFT);
- dev->mem_end = dev->mem_start + PAGE_SIZE * desc->num_pages;
- dev->irq = lgdev->index+1;
- dev->features = NETIF_F_SG;
+ SET_NETDEV_DEV(dev, &lgdev->dev);
if (desc->features & LGUEST_NET_F_NOCSUM)
- dev->features |= NETIF_F_NO_CSUM;
-
- info = dev->priv;
+ dev->features = NETIF_F_SG|NETIF_F_NO_CSUM;
+
+ info = netdev_priv(dev);
info->mapsize = PAGE_SIZE * desc->num_pages;
info->peer_phys = ((unsigned long)desc->pfn << PAGE_SHIFT);
- info->peer = (void *)ioremap(info->peer_phys, info->mapsize);
+ info->lgdev = lgdev;
+ info->peer = lguest_map(info->peer_phys, desc->num_pages);
+ if (!info->peer) {
+ err = -ENOMEM;
+ goto free;
+ }
+
/* This stores our peerid (upper bits reserved for future). */
info->me = (desc->features & (info->mapsize-1));

err = register_netdev(dev);
if (err) {
pr_debug("lguestnet: registering device failed\n");
- goto free;
+ goto unmap;
}

if (lguest_devices[lgdev->index].features & LGUEST_DEVICE_F_RANDOMNESS)
irqf |= IRQF_SAMPLE_RANDOM;
- if (request_irq(dev->irq, lguestnet_rcv, irqf, "lguestnet", dev) != 0) {
- pr_debug("lguestnet: could not get net irq %i\n", dev->irq);
+ if (request_irq(lgdev_irq(lgdev), lguestnet_rcv, irqf, "lguestnet",
+ dev) != 0) {
+ pr_debug("lguestnet: cannot get irq %i\n", lgdev_irq(lgdev));
goto unregister;
}

@@ -323,6 +329,8 @@ static int lguestnet_probe(struct lguest

unregister:
unregister_netdev(dev);
+unmap:
+ lguest_unmap(info->peer);
free:
free_netdev(dev);
return err;


2007-05-13 04:07:55

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH 1/5] lguest host feedback tidyups

On Fri, 2007-05-11 at 11:19 +1000, Rusty Russell wrote:
> 1) Sam Ravnborg says lg-objs is deprecated, use lg-y.
> 2) Sparse: page_tables.c unnecessary initialization
> 3) Lots of __force to shut sparse up: guest "physical" addresses are
> userspace virtual.
> 4) Change prototype of run_lguest and do cast in caller instead (when we add
> __iomem to cast, it runs over another line).

(Andrew: replacement for lguest-the-host-code-tidyups.patch)

Christoph Hellwig said runs sparse:
1) page_tables.c unnecessary initialization
2) Change prototype of run_lguest and do cast in caller instead (when we add
__user to cast, it runs over another line).

Most importantly, I now realize that Christoph's incorrect ranting
about lgread_u32 et al was in fact a subtle ploy to make me diagnose
the real issue: sparse 0.3 complains about casting a __user pointer
to/from u32, but not an "unsigned long". They are (currently)
equivalent for lguest, but this is a much better solution than __force.

Kudos, Christoph, for such masterful manipulation!

Signed-off-by: Rusty Russell <[email protected]>
---
drivers/lguest/core.c | 37 ++++++++++++++++-----------------
drivers/lguest/hypercalls.c | 9 +++-----
drivers/lguest/interrupts_and_traps.c | 4 +--
drivers/lguest/io.c | 2 -
drivers/lguest/lg.h | 37 ++++++++++++++++-----------------
drivers/lguest/lguest_user.c | 2 -
drivers/lguest/page_tables.c | 2 -
drivers/lguest/segments.c | 6 ++---
include/linux/lguest_launcher.h | 4 +--
10 files changed, 52 insertions(+), 53 deletions(-)

diff -r 35c8b37f8d3c drivers/lguest/core.c
--- a/drivers/lguest/core.c Wed May 09 23:23:56 2007 +1000
+++ b/drivers/lguest/core.c Sun May 13 13:05:13 2007 +1000
@@ -212,39 +212,40 @@ int lguest_address_ok(const struct lgues
}

/* Just like get_user, but don't let guest access lguest binary. */
-u32 lgread_u32(struct lguest *lg, u32 addr)
+u32 lgread_u32(struct lguest *lg, unsigned long addr)
{
u32 val = 0;

/* Don't let them access lguest binary */
if (!lguest_address_ok(lg, addr, sizeof(val))
|| get_user(val, (u32 __user *)addr) != 0)
- kill_guest(lg, "bad read address %u", addr);
+ kill_guest(lg, "bad read address %#lx", addr);
return val;
}

-void lgwrite_u32(struct lguest *lg, u32 addr, u32 val)
+void lgwrite_u32(struct lguest *lg, unsigned long addr, u32 val)
{
if (!lguest_address_ok(lg, addr, sizeof(val))
|| put_user(val, (u32 __user *)addr) != 0)
- kill_guest(lg, "bad write address %u", addr);
-}
-
-void lgread(struct lguest *lg, void *b, u32 addr, unsigned bytes)
+ kill_guest(lg, "bad write address %#lx", addr);
+}
+
+void lgread(struct lguest *lg, void *b, unsigned long addr, unsigned bytes)
{
if (!lguest_address_ok(lg, addr, bytes)
|| copy_from_user(b, (void __user *)addr, bytes) != 0) {
/* copy_from_user should do this, but as we rely on it... */
memset(b, 0, bytes);
- kill_guest(lg, "bad read address %u len %u", addr, bytes);
- }
-}
-
-void lgwrite(struct lguest *lg, u32 addr, const void *b, unsigned bytes)
+ kill_guest(lg, "bad read address %#lx len %u", addr, bytes);
+ }
+}
+
+void lgwrite(struct lguest *lg, unsigned long addr, const void *b,
+ unsigned bytes)
{
if (!lguest_address_ok(lg, addr, bytes)
|| copy_to_user((void __user *)addr, b, bytes) != 0)
- kill_guest(lg, "bad write address %u len %u", addr, bytes);
+ kill_guest(lg, "bad write address %#lx len %u", addr, bytes);
}

static void set_ts(void)
@@ -294,7 +295,7 @@ static void run_guest_once(struct lguest
: "memory", "%edx", "%ecx", "%edi", "%esi");
}

-int run_guest(struct lguest *lg, char *__user user)
+int run_guest(struct lguest *lg, unsigned long __user *user)
{
while (!lg->dead) {
unsigned int cr2 = 0; /* Damn gcc */
@@ -302,8 +303,8 @@ int run_guest(struct lguest *lg, char *_
/* Hypercalls first: we might have been out to userspace */
do_hypercalls(lg);
if (lg->dma_is_pending) {
- if (put_user(lg->pending_dma, (unsigned long *)user) ||
- put_user(lg->pending_key, (unsigned long *)user+1))
+ if (put_user(lg->pending_dma, user) ||
+ put_user(lg->pending_key, user+1))
return -EFAULT;
return sizeof(unsigned long)*2;
}
@@ -367,7 +368,7 @@ int run_guest(struct lguest *lg, char *_
if (deliver_trap(lg, lg->regs->trapnum))
continue;

- kill_guest(lg, "unhandled trap %i at %#x (%#x)",
+ kill_guest(lg, "unhandled trap %li at %#lx (%#lx)",
lg->regs->trapnum, lg->regs->eip,
lg->regs->trapnum == 14 ? cr2 : lg->regs->errcode);
}
@@ -420,7 +421,7 @@ static int __init init(void)
lock_cpu_hotplug();
if (cpu_has_pge) { /* We have a broader idea of "global". */
cpu_had_pge = 1;
- on_each_cpu(adjust_pge, 0, 0, 1);
+ on_each_cpu(adjust_pge, (void *)0, 0, 1);
clear_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability);
}
unlock_cpu_hotplug();
diff -r 35c8b37f8d3c drivers/lguest/hypercalls.c
--- a/drivers/lguest/hypercalls.c Wed May 09 23:23:56 2007 +1000
+++ b/drivers/lguest/hypercalls.c Sun May 13 13:07:20 2007 +1000
@@ -83,7 +83,7 @@ static void do_hcall(struct lguest *lg,
guest_set_pmd(lg, regs->edx, regs->ebx);
break;
case LHCALL_LOAD_TLS:
- guest_load_tls(lg, (struct desc_struct __user*)regs->edx);
+ guest_load_tls(lg, regs->edx);
break;
case LHCALL_TS:
lg->ts = regs->edx;
@@ -92,7 +92,7 @@ static void do_hcall(struct lguest *lg,
lg->halted = 1;
break;
default:
- kill_guest(lg, "Bad hypercall %i\n", regs->eax);
+ kill_guest(lg, "Bad hypercall %li\n", regs->eax);
}
}

@@ -137,15 +137,14 @@ static void initialize(struct lguest *lg
static void initialize(struct lguest *lg)
{
if (lg->regs->eax != LHCALL_LGUEST_INIT) {
- kill_guest(lg, "hypercall %i before LGUEST_INIT",
+ kill_guest(lg, "hypercall %li before LGUEST_INIT",
lg->regs->eax);
return;
}

lg->lguest_data = (struct lguest_data __user *)lg->regs->edx;
/* We check here so we can simply copy_to_user/from_user */
- if (!lguest_address_ok(lg, (long)lg->lguest_data,
- sizeof(*lg->lguest_data))) {
+ if (!lguest_address_ok(lg, lg->regs->edx, sizeof(*lg->lguest_data))) {
kill_guest(lg, "bad guest page %p", lg->lguest_data);
return;
}
diff -r 35c8b37f8d3c drivers/lguest/interrupts_and_traps.c
--- a/drivers/lguest/interrupts_and_traps.c Wed May 09 23:23:56 2007 +1000
+++ b/drivers/lguest/interrupts_and_traps.c Sun May 13 13:20:40 2007 +1000
@@ -18,7 +18,7 @@ static int idt_present(u32 lo, u32 hi)

static void push_guest_stack(struct lguest *lg, u32 __user **gstack, u32 val)
{
- lgwrite_u32(lg, (u32)--(*gstack), val);
+ lgwrite_u32(lg, (unsigned long)--(*gstack), val);
}

static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err)
@@ -53,7 +53,7 @@ static void set_guest_interrupt(struct l

/* Change the real stack so switcher returns to trap handler */
lg->regs->ss = ss;
- lg->regs->esp = (u32)gstack + lg->page_offset;
+ lg->regs->esp = (unsigned long)gstack + lg->page_offset;
lg->regs->cs = (__KERNEL_CS|GUEST_PL);
lg->regs->eip = idt_address(lo, hi);

diff -r 35c8b37f8d3c drivers/lguest/io.c
--- a/drivers/lguest/io.c Wed May 09 23:23:56 2007 +1000
+++ b/drivers/lguest/io.c Sun May 13 13:09:20 2007 +1000
@@ -52,7 +52,7 @@ static int check_dma_list(struct lguest
return 1;

kill:
- kill_guest(lg, "bad DMA entry: %u@%#x", dma->len[i], dma->addr[i]);
+ kill_guest(lg, "bad DMA entry: %u@%#lx", dma->len[i], dma->addr[i]);
return 0;
}

diff -r 35c8b37f8d3c drivers/lguest/lg.h
--- a/drivers/lguest/lg.h Wed May 09 23:23:56 2007 +1000
+++ b/drivers/lguest/lg.h Sun May 13 13:06:53 2007 +1000
@@ -25,18 +25,18 @@ struct lguest_regs
struct lguest_regs
{
/* Manually saved part. */
- u32 ebx, ecx, edx;
- u32 esi, edi, ebp;
- u32 gs;
- u32 eax;
- u32 fs, ds, es;
- u32 trapnum, errcode;
+ unsigned long ebx, ecx, edx;
+ unsigned long esi, edi, ebp;
+ unsigned long gs;
+ unsigned long eax;
+ unsigned long fs, ds, es;
+ unsigned long trapnum, errcode;
/* Trap pushed part */
- u32 eip;
- u32 cs;
- u32 eflags;
- u32 esp;
- u32 ss;
+ unsigned long eip;
+ unsigned long cs;
+ unsigned long eflags;
+ unsigned long esp;
+ unsigned long ss;
};

void free_pagetables(void);
@@ -175,14 +175,14 @@ extern struct mutex lguest_lock;
extern struct mutex lguest_lock;

/* core.c: */
-u32 lgread_u32(struct lguest *lg, u32 addr);
-void lgwrite_u32(struct lguest *lg, u32 val, u32 addr);
-void lgread(struct lguest *lg, void *buf, u32 addr, unsigned bytes);
-void lgwrite(struct lguest *lg, u32 addr, const void *buf, unsigned bytes);
+u32 lgread_u32(struct lguest *lg, unsigned long addr);
+void lgwrite_u32(struct lguest *lg, unsigned long addr, u32 val);
+void lgread(struct lguest *lg, void *buf, unsigned long addr, unsigned len);
+void lgwrite(struct lguest *lg, unsigned long, const void *buf, unsigned len);
int find_free_guest(void);
int lguest_address_ok(const struct lguest *lg,
unsigned long addr, unsigned long len);
-int run_guest(struct lguest *lg, char *__user user);
+int run_guest(struct lguest *lg, unsigned long __user *user);


/* interrupts_and_traps.c: */
@@ -199,9 +199,8 @@ void copy_traps(const struct lguest *lg,
/* segments.c: */
void setup_default_gdt_entries(struct lguest_ro_state *state);
void setup_guest_gdt(struct lguest *lg);
-void load_guest_gdt(struct lguest *lg, u32 table, u32 num);
-void guest_load_tls(struct lguest *lg,
- const struct desc_struct __user *tls_array);
+void load_guest_gdt(struct lguest *lg, unsigned long table, u32 num);
+void guest_load_tls(struct lguest *lg, unsigned long tls_array);
void copy_gdt(const struct lguest *lg, struct desc_struct *gdt);

/* page_tables.c: */
diff -r 35c8b37f8d3c drivers/lguest/lguest_user.c
--- a/drivers/lguest/lguest_user.c Wed May 09 23:23:56 2007 +1000
+++ b/drivers/lguest/lguest_user.c Sun May 13 11:48:43 2007 +1000
@@ -65,7 +65,7 @@ static ssize_t read(struct file *file, c
if (lg->dma_is_pending)
lg->dma_is_pending = 0;

- return run_guest(lg, user);
+ return run_guest(lg, (unsigned long __user *)user);
}

/* Take: pfnlimit, pgdir, start, pageoffset. */
diff -r 35c8b37f8d3c drivers/lguest/page_tables.c
--- a/drivers/lguest/page_tables.c Wed May 09 23:23:56 2007 +1000
+++ b/drivers/lguest/page_tables.c Sun May 13 11:48:43 2007 +1000
@@ -13,7 +13,7 @@
#define PTES_PER_PAGE (1 << PTES_PER_PAGE_SHIFT)
#define SWITCHER_PGD_INDEX (PTES_PER_PAGE - 1)

-static DEFINE_PER_CPU(spte_t *, switcher_pte_pages) = { NULL };
+static DEFINE_PER_CPU(spte_t *, switcher_pte_pages);
#define switcher_pte_page(cpu) per_cpu(switcher_pte_pages, cpu)

static unsigned vaddr_to_pgd_index(unsigned long vaddr)
diff -r 35c8b37f8d3c drivers/lguest/segments.c
--- a/drivers/lguest/segments.c Wed May 09 23:23:56 2007 +1000
+++ b/drivers/lguest/segments.c Sun May 13 13:07:01 2007 +1000
@@ -95,7 +95,7 @@ void copy_gdt(const struct lguest *lg, s
gdt[i] = lg->gdt[i];
}

-void load_guest_gdt(struct lguest *lg, u32 table, u32 num)
+void load_guest_gdt(struct lguest *lg, unsigned long table, u32 num)
{
if (num > ARRAY_SIZE(lg->gdt))
kill_guest(lg, "too many gdt entries %i", num);
@@ -105,11 +105,11 @@ void load_guest_gdt(struct lguest *lg, u
lg->changed |= CHANGED_GDT;
}

-void guest_load_tls(struct lguest *lg, const struct desc_struct __user *gtls)
+void guest_load_tls(struct lguest *lg, unsigned long gtls)
{
struct desc_struct *tls = &lg->gdt[GDT_ENTRY_TLS_MIN];

- lgread(lg, tls, (u32)gtls, sizeof(*tls)*GDT_ENTRY_TLS_ENTRIES);
+ lgread(lg, tls, gtls, sizeof(*tls)*GDT_ENTRY_TLS_ENTRIES);
fixup_gdt_table(lg);
lg->changed |= CHANGED_GDT;
}
diff -r 35c8b37f8d3c include/linux/lguest_launcher.h
--- a/include/linux/lguest_launcher.h Wed May 09 23:23:56 2007 +1000
+++ b/include/linux/lguest_launcher.h Sun May 13 14:01:33 2007 +1000
@@ -13,7 +13,7 @@ struct lguest_dma
{
/* 0 if free to be used, filled by hypervisor. */
u32 used_len;
- u32 addr[LGUEST_MAX_DMA_SECTIONS];
+ unsigned long addr[LGUEST_MAX_DMA_SECTIONS];
u16 len[LGUEST_MAX_DMA_SECTIONS];
};



2007-05-13 04:09:20

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH 4/5] lguest block driver feedback tidyups

On Fri, 2007-05-11 at 11:23 +1000, Rusty Russell wrote:
> 1) Use new dma wrapper functions, and handle bind failure (may happen
> in future)
> 2) Use new lgdev_irq() "get me a good interrupt number" function.
> 3) __force the ioremap: guests can use it as normal memory.

And here is the update using lguest_map/unmap instead of ioremap +
__force:

1) Use new dma wrapper functions, and handle bind failure (may happen
in future)
2) Use new lgdev_irq() "get me a good interrupt number" function.
3) Use new lguest_map()/lguest_unmap() instead of ioremap/iounmap.

Signed-off-by: Rusty Russell <[email protected]>
---
drivers/block/lguest_blk.c | 18 +++++++++++-------
1 file changed, 11 insertions(+), 7 deletions(-)

===================================================================
--- a/drivers/block/lguest_blk.c
+++ b/drivers/block/lguest_blk.c
@@ -37,7 +37,7 @@ struct blockdev
int irq;

unsigned long phys_addr;
- /* The ioremap'ed block page. */
+ /* The mapped block page. */
struct lguest_block_page *lb_page;

/* We only have a single request outstanding at a time. */
@@ -123,7 +123,7 @@ static void do_write(struct blockdev *bd
pr_debug("lgb: WRITE sector %li\n", (long)req->sector);
setup_req(bd, 1, req, &send);

- hcall(LHCALL_SEND_DMA, bd->phys_addr, __pa(&send), 0);
+ lguest_send_dma(bd->phys_addr, &send);
}

static void do_read(struct blockdev *bd, struct request *req)
@@ -134,7 +134,7 @@ static void do_read(struct blockdev *bd,
setup_req(bd, 0, req, &bd->dma);

empty_dma(&ping);
- hcall(LHCALL_SEND_DMA, bd->phys_addr, __pa(&ping), 0);
+ lguest_send_dma(bd->phys_addr, &ping);
}

static void do_lgb_request(request_queue_t *q)
@@ -183,13 +183,13 @@ static int lguestblk_probe(struct lguest
return -ENOMEM;

spin_lock_init(&bd->lock);
- bd->irq = lgdev->index+1;
+ bd->irq = lgdev_irq(lgdev);
bd->req = NULL;
bd->dma.used_len = 0;
bd->dma.len[0] = 0;
bd->phys_addr = (lguest_devices[lgdev->index].pfn << PAGE_SHIFT);

- bd->lb_page = (void *)ioremap(bd->phys_addr, PAGE_SIZE);
+ bd->lb_page = lguest_map(bd->phys_addr, 1);
if (!bd->lb_page) {
err = -ENOMEM;
goto out_free_bd;
@@ -225,7 +225,9 @@ static int lguestblk_probe(struct lguest
if (err)
goto out_cleanup_queue;

- hcall(LHCALL_BIND_DMA, bd->phys_addr, __pa(&bd->dma), (1<<8)+bd->irq);
+ err = lguest_bind_dma(bd->phys_addr, &bd->dma, 1, bd->irq);
+ if (err)
+ goto out_free_irq;

bd->disk->major = bd->major;
bd->disk->first_minor = 0;
@@ -241,6 +243,8 @@ static int lguestblk_probe(struct lguest
lgdev->private = bd;
return 0;

+out_free_irq:
+ free_irq(bd->irq, bd);
out_cleanup_queue:
blk_cleanup_queue(bd->disk->queue);
out_put_disk:
@@ -248,7 +252,7 @@ out_unregister_blkdev:
out_unregister_blkdev:
unregister_blkdev(bd->major, "lguestblk");
out_unmap:
- iounmap(bd->lb_page);
+ lguest_unmap(bd->lb_page);
out_free_bd:
kfree(bd);
return err;


2007-05-13 04:12:34

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH 3/5] lguest network driver feedback tidyups

On Sun, 2007-05-13 at 13:52 +1000, Rusty Russell wrote:
> --- a/drivers/net/lguest_net.c
> +++ b/drivers/net/lguest_net.c
...
> if (dma.used_len != skb->len) {
> dev->stats.tx_carrier_errors++;
> - pr_debug("Bad xfer to peer %i: %i of %i (dma %p/%i)\n",
> + pr_debug("Bad xfer to peer %i: %li of %i (dma %p/%i)\n",
> peernum, dma.used_len, skb->len,
> (void *)dma.addr[0], dma.len[0]);
> } else {

That change slipped in by mistake. Here's the version without it:

Feedback from Jeff Garzik:
1) Use netdev_priv instead of dev->priv.
2) Check for ioremap failure
3) iounmap on failure.
4) Wrap SEND_DMA and BIND_DMA calls
5) Don't set NETIF_F_SG unless we set NETIF_F_NO_CSUM
6) Use SET_NETDEV_DEV()
7) Don't set dev->irq, mem_start & mem_end (deprecated)

Feedback from Chrisoph Hellwig:
8) Use lguest_map()/lguest_unmap() helpers instead of ioremap/iounmap.

Signed-off-by: Rusty Russell <[email protected]>
---
drivers/net/lguest_net.c | 56 ++++++++++++++++++++++++++--------------------
1 file changed, 32 insertions(+), 24 deletions(-)

===================================================================
--- a/drivers/net/lguest_net.c
+++ b/drivers/net/lguest_net.c
@@ -22,7 +22,6 @@
#include <linux/module.h>
#include <linux/mm_types.h>
#include <linux/lguest_bus.h>
-#include <asm/io.h>

#define SHARED_SIZE PAGE_SIZE
#define MAX_LANS 4
@@ -34,6 +33,9 @@ struct lguestnet_info
struct lguest_net *peer;
unsigned long peer_phys;
unsigned long mapsize;
+
+ /* The lguest_device I come from */
+ struct lguest_device *lgdev;

/* My peerid. */
unsigned int me;
@@ -84,7 +86,7 @@ static void skb_to_dma(const struct sk_b

static void lguestnet_set_multicast(struct net_device *dev)
{
- struct lguestnet_info *info = dev->priv;
+ struct lguestnet_info *info = netdev_priv(dev);

if ((dev->flags & (IFF_PROMISC|IFF_ALLMULTI)) || dev->mc_count)
info->peer[info->me].mac[0] |= PROMISC_BIT;
@@ -110,16 +112,16 @@ static void transfer_packet(struct net_d
struct sk_buff *skb,
unsigned int peernum)
{
- struct lguestnet_info *info = dev->priv;
+ struct lguestnet_info *info = netdev_priv(dev);
struct lguest_dma dma;

skb_to_dma(skb, skb_headlen(skb), &dma);
pr_debug("xfer length %04x (%u)\n", htons(skb->len), skb->len);

- hcall(LHCALL_SEND_DMA, peer_key(info,peernum), __pa(&dma), 0);
+ lguest_send_dma(peer_key(info, peernum), &dma);
if (dma.used_len != skb->len) {
dev->stats.tx_carrier_errors++;
- pr_debug("Bad xfer to peer %i: %i of %i (dma %p/%i)\n",
+ pr_debug("Bad xfer to peer %i: %li of %i (dma %p/%i)\n",
peernum, dma.used_len, skb->len,
(void *)dma.addr[0], dma.len[0]);
} else {
@@ -137,7 +139,7 @@ static int lguestnet_start_xmit(struct s
{
unsigned int i;
int broadcast;
- struct lguestnet_info *info = dev->priv;
+ struct lguestnet_info *info = netdev_priv(dev);
const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;

pr_debug("%s: xmit %02x:%02x:%02x:%02x:%02x:%02x\n",
@@ -162,7 +164,7 @@ static int lguestnet_start_xmit(struct s
/* Find a new skb to put in this slot in shared mem. */
static int fill_slot(struct net_device *dev, unsigned int slot)
{
- struct lguestnet_info *info = dev->priv;
+ struct lguestnet_info *info = netdev_priv(dev);
/* Try to create and register a new one. */
info->skb[slot] = netdev_alloc_skb(dev, ETH_HLEN + ETH_DATA_LEN);
if (!info->skb[slot]) {
@@ -180,7 +182,7 @@ static irqreturn_t lguestnet_rcv(int irq
static irqreturn_t lguestnet_rcv(int irq, void *dev_id)
{
struct net_device *dev = dev_id;
- struct lguestnet_info *info = dev->priv;
+ struct lguestnet_info *info = netdev_priv(dev);
unsigned int i, done = 0;

for (i = 0; i < ARRAY_SIZE(info->dma); i++) {
@@ -220,7 +222,7 @@ static int lguestnet_open(struct net_dev
static int lguestnet_open(struct net_device *dev)
{
int i;
- struct lguestnet_info *info = dev->priv;
+ struct lguestnet_info *info = netdev_priv(dev);

/* Set up our MAC address */
memcpy(info->peer[info->me].mac, dev->dev_addr, ETH_ALEN);
@@ -232,8 +234,8 @@ static int lguestnet_open(struct net_dev
if (fill_slot(dev, i) != 0)
goto cleanup;
}
- if (!hcall(LHCALL_BIND_DMA, peer_key(info, info->me), __pa(info->dma),
- (NUM_SKBS << 8) | dev->irq))
+ if (lguest_bind_dma(peer_key(info,info->me), info->dma,
+ NUM_SKBS, lgdev_irq(info->lgdev)) != 0)
goto cleanup;
return 0;

@@ -246,13 +248,13 @@ static int lguestnet_close(struct net_de
static int lguestnet_close(struct net_device *dev)
{
unsigned int i;
- struct lguestnet_info *info = dev->priv;
+ struct lguestnet_info *info = netdev_priv(dev);

/* Clear all trace: others might deliver packets, we'll ignore it. */
memset(&info->peer[info->me], 0, sizeof(info->peer[info->me]));

/* Deregister sg lists. */
- hcall(LHCALL_BIND_DMA, peer_key(info, info->me), __pa(info->dma), 0);
+ lguest_unbind_dma(peer_key(info, info->me), info->dma);
for (i = 0; i < ARRAY_SIZE(info->dma); i++)
dev_kfree_skb(info->skb[i]);
return 0;
@@ -290,30 +292,34 @@ static int lguestnet_probe(struct lguest
/* Turning on/off promisc will call dev->set_multicast_list.
* We don't actually support multicast yet */
dev->set_multicast_list = lguestnet_set_multicast;
- dev->mem_start = ((unsigned long)desc->pfn << PAGE_SHIFT);
- dev->mem_end = dev->mem_start + PAGE_SIZE * desc->num_pages;
- dev->irq = lgdev->index+1;
- dev->features = NETIF_F_SG;
+ SET_NETDEV_DEV(dev, &lgdev->dev);
if (desc->features & LGUEST_NET_F_NOCSUM)
- dev->features |= NETIF_F_NO_CSUM;
-
- info = dev->priv;
+ dev->features = NETIF_F_SG|NETIF_F_NO_CSUM;
+
+ info = netdev_priv(dev);
info->mapsize = PAGE_SIZE * desc->num_pages;
info->peer_phys = ((unsigned long)desc->pfn << PAGE_SHIFT);
- info->peer = (void *)ioremap(info->peer_phys, info->mapsize);
+ info->lgdev = lgdev;
+ info->peer = lguest_map(info->peer_phys, desc->num_pages);
+ if (!info->peer) {
+ err = -ENOMEM;
+ goto free;
+ }
+
/* This stores our peerid (upper bits reserved for future). */
info->me = (desc->features & (info->mapsize-1));

err = register_netdev(dev);
if (err) {
pr_debug("lguestnet: registering device failed\n");
- goto free;
+ goto unmap;
}

if (lguest_devices[lgdev->index].features & LGUEST_DEVICE_F_RANDOMNESS)
irqf |= IRQF_SAMPLE_RANDOM;
- if (request_irq(dev->irq, lguestnet_rcv, irqf, "lguestnet", dev) != 0) {
- pr_debug("lguestnet: could not get net irq %i\n", dev->irq);
+ if (request_irq(lgdev_irq(lgdev), lguestnet_rcv, irqf, "lguestnet",
+ dev) != 0) {
+ pr_debug("lguestnet: cannot get irq %i\n", lgdev_irq(lgdev));
goto unregister;
}

@@ -323,6 +329,8 @@ static int lguestnet_probe(struct lguest

unregister:
unregister_netdev(dev);
+unmap:
+ lguest_unmap(info->peer);
free:
free_netdev(dev);
return err;


2007-05-13 18:39:50

by Al Viro

[permalink] [raw]
Subject: Re: [PATCH 1/5] lguest host feedback tidyups

On Fri, May 11, 2007 at 11:19:14AM +1000, Rusty Russell wrote:
> @@ -218,7 +218,7 @@ u32 lgread_u32(struct lguest *lg, u32 ad
>
> /* Don't let them access lguest binary */
> if (!lguest_address_ok(lg, addr, sizeof(val))
> - || get_user(val, (u32 __user *)addr) != 0)
> + || get_user(val, (__force u32 __user *)addr) != 0)
> kill_guest(lg, "bad read address %u", addr);
> return val;

*Ahem*

What kind of address are we really getting there? IOW, where does it
ultimately come from?

> lock_cpu_hotplug();
> if (cpu_has_pge) { /* We have a broader idea of "global". */
> cpu_had_pge = 1;
> - on_each_cpu(adjust_pge, 0, 0, 1);
> + on_each_cpu(adjust_pge, (void *)0, 0, 1);

That's called NULL...

> case LHCALL_LOAD_TLS:
> - guest_load_tls(lg, (struct desc_struct __user*)regs->edx);
> + guest_load_tls(lg,
> + (__force struct desc_struct __user*)regs->edx);

Umm... That's borderline OK, but...

> static void push_guest_stack(struct lguest *lg, u32 __user **gstack, u32 val)
> {
> - lgwrite_u32(lg, (u32)--(*gstack), val);
> + lgwrite_u32(lg, (__force u32)--(*gstack), val);
> }

Now, _that_ is just plain dumb. Why not declare that lgwrite_u32() as taking
u32 __user * as argument and kill the casts?

> - lg->regs->esp = (u32)gstack + lg->page_offset;
> + lg->regs->esp = (__force u32)gstack + lg->page_offset;

Yuck. Cast to unsigned long (or uintptr_t), please. In this case it is
legitimate.

2007-05-13 18:43:53

by Al Viro

[permalink] [raw]
Subject: Re: [PATCH 2/5] lguest guest feedback tidyups

On Fri, May 11, 2007 at 11:21:30AM +1000, Rusty Russell wrote:
> /* Devices are in page above top of "normal" mem. */
> - lguest_devices = ioremap(max_pfn << PAGE_SHIFT, PAGE_SIZE);
> + lguest_devices = (__force void*)ioremap(max_pfn<<PAGE_SHIFT,PAGE_SIZE);

Er... What's being done here? What are we mapping and what's going to
be accessed when we dereference that sucker?

2007-05-13 18:48:07

by Al Viro

[permalink] [raw]
Subject: Re: [PATCH 1/5] lguest host feedback tidyups

On Sun, May 13, 2007 at 02:07:25PM +1000, Rusty Russell wrote:
> Most importantly, I now realize that Christoph's incorrect ranting
> about lgread_u32 et al was in fact a subtle ploy to make me diagnose
> the real issue: sparse 0.3 complains about casting a __user pointer
> to/from u32, but not an "unsigned long". They are (currently)
> equivalent for lguest, but this is a much better solution than __force.

The real fix would be to declare it with u32 __user * as argument and deal
with callers.

2007-05-14 01:30:53

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH 1/5] lguest host feedback tidyups

On Sun, 2007-05-13 at 19:47 +0100, Al Viro wrote:
> On Sun, May 13, 2007 at 02:07:25PM +1000, Rusty Russell wrote:
> > Most importantly, I now realize that Christoph's incorrect ranting
> > about lgread_u32 et al was in fact a subtle ploy to make me diagnose
> > the real issue: sparse 0.3 complains about casting a __user pointer
> > to/from u32, but not an "unsigned long". They are (currently)
> > equivalent for lguest, but this is a much better solution than __force.
>
> The real fix would be to declare it with u32 __user * as argument and deal
> with callers.

Hi Al.

Arguments come in from the guest in registers. I could use a funky
union in the structure definition, but I don't think we're winning
anything at that point.

Hope that clarifies,
Rusty.

2007-05-14 01:44:44

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH 1/5] lguest host feedback tidyups

On Sun, 2007-05-13 at 19:39 +0100, Al Viro wrote:
> On Fri, May 11, 2007 at 11:19:14AM +1000, Rusty Russell wrote:
> > @@ -218,7 +218,7 @@ u32 lgread_u32(struct lguest *lg, u32 ad
> >
> > /* Don't let them access lguest binary */
> > if (!lguest_address_ok(lg, addr, sizeof(val))
> > - || get_user(val, (u32 __user *)addr) != 0)
> > + || get_user(val, (__force u32 __user *)addr) != 0)
> > kill_guest(lg, "bad read address %u", addr);
> > return val;
>
> *Ahem*
>
> What kind of address are we really getting there? IOW, where does it
> ultimately come from?

Hi Al,

This patch has been superseded. But to clarify, the address generally
comes from a guest register. My confusion came from sparse warnings on
the above code like the following:

warning: cast removes address space of expression

I inserted __force, but it's actually caused by the "addr" being a
"u32": if it's an "unsigned long" sparse doesn't warn. This is a win
anyway: although the code is i386-specific at the moment, that will
change. I prefer to use u32 rather than unsigned long to declare
registers (eg. if I ever wanted to support 32 bit guests on a 64 bit
host), but that's not even a consideration at this stage.

> > lock_cpu_hotplug();
> > if (cpu_has_pge) { /* We have a broader idea of "global". */
> > cpu_had_pge = 1;
> > - on_each_cpu(adjust_pge, 0, 0, 1);
> > + on_each_cpu(adjust_pge, (void *)0, 0, 1);
>
> That's called NULL...

Yes, but this is clearer. Here's adjust_pge:

static void adjust_pge(void *on)
{
if (on)
write_cr4(read_cr4() | X86_CR4_PGE);
else
write_cr4(read_cr4() & ~X86_CR4_PGE);
}

And here's the two calls to it:

on_each_cpu(adjust_pge, (void *)0, 0, 1);
...
on_each_cpu(adjust_pge, (void *)1, 0, 1);

> > case LHCALL_LOAD_TLS:
> > - guest_load_tls(lg, (struct desc_struct __user*)regs->edx);
> > + guest_load_tls(lg,
> > + (__force struct desc_struct __user*)regs->edx);
>
> Umm... That's borderline OK, but...

Yeah, gone in replacement patch.

> > static void push_guest_stack(struct lguest *lg, u32 __user **gstack, u32 val)
> > {
> > - lgwrite_u32(lg, (u32)--(*gstack), val);
> > + lgwrite_u32(lg, (__force u32)--(*gstack), val);
> > }
>
> Now, _that_ is just plain dumb. Why not declare that lgwrite_u32() as taking
> u32 __user * as argument and kill the casts?

Last I tried, this turns out to create even more casts.

> > - lg->regs->esp = (u32)gstack + lg->page_offset;
> > + lg->regs->esp = (__force u32)gstack + lg->page_offset;
>
> Yuck. Cast to unsigned long (or uintptr_t), please. In this case it is
> legitimate.

Indeed, replacement patch uses unsigned long.

Thanks,
Rusty.

2007-05-14 05:38:28

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH 1/5] lguest host feedback tidyups

On Mon, 2007-05-14 at 11:44 +1000, Rusty Russell wrote:
> On Sun, 2007-05-13 at 19:39 +0100, Al Viro wrote:
> > > static void push_guest_stack(struct lguest *lg, u32 __user **gstack, u32 val)
> > > {
> > > - lgwrite_u32(lg, (u32)--(*gstack), val);
> > > + lgwrite_u32(lg, (__force u32)--(*gstack), val);
> > > }
> >
> > Now, _that_ is just plain dumb. Why not declare that lgwrite_u32() as taking
> > u32 __user * as argument and kill the casts?
>
> Last I tried, this turns out to create even more casts.

But after thinking harder... changing the gstack type gets rid of three
casts, including that one. Thanks!

How's this?
Rusty.
==
Reduce number of casts in set_guest_interrupt().

Al Viro pointed out the ugly cast in push_lguest_stack(); as this is
only called from set_guest_interrupt() which casts the arg in the
first place, let's stick with unsigned long the whole way through.

Signed-off-by: Rusty Russell <[email protected]>

diff -r 16e417365c64 drivers/lguest/interrupts_and_traps.c
--- a/drivers/lguest/interrupts_and_traps.c Mon May 14 15:05:48 2007 +1000
+++ b/drivers/lguest/interrupts_and_traps.c Mon May 14 15:22:38 2007 +1000
@@ -16,24 +16,25 @@ static int idt_present(u32 lo, u32 hi)
return (hi & 0x8000);
}

-static void push_guest_stack(struct lguest *lg, u32 __user **gstack, u32 val)
-{
- lgwrite_u32(lg, (unsigned long)--(*gstack), val);
+static void push_guest_stack(struct lguest *lg, unsigned long *gstack, u32 val)
+{
+ *gstack -= 4;
+ lgwrite_u32(lg, *gstack, val);
}

static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err)
{
- u32 __user *gstack;
+ unsigned long gstack;
u32 eflags, ss, irq_enable;

/* If they want a ring change, we use new stack and push old ss/esp */
if ((lg->regs->ss&0x3) != GUEST_PL) {
- gstack = (u32 __user *)guest_pa(lg, lg->esp1);
+ gstack = guest_pa(lg, lg->esp1);
ss = lg->ss1;
push_guest_stack(lg, &gstack, lg->regs->ss);
push_guest_stack(lg, &gstack, lg->regs->esp);
} else {
- gstack = (u32 __user *)guest_pa(lg, lg->regs->esp);
+ gstack = guest_pa(lg, lg->regs->esp);
ss = lg->regs->ss;
}

@@ -53,7 +54,7 @@ static void set_guest_interrupt(struct l

/* Change the real stack so switcher returns to trap handler */
lg->regs->ss = ss;
- lg->regs->esp = (unsigned long)gstack + lg->page_offset;
+ lg->regs->esp = gstack + lg->page_offset;
lg->regs->cs = (__KERNEL_CS|GUEST_PL);
lg->regs->eip = idt_address(lo, hi);



2007-05-14 07:12:25

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH 2/5] lguest guest feedback tidyups

On Sun, 2007-05-13 at 19:43 +0100, Al Viro wrote:
> On Fri, May 11, 2007 at 11:21:30AM +1000, Rusty Russell wrote:
> > /* Devices are in page above top of "normal" mem. */
> > - lguest_devices = ioremap(max_pfn << PAGE_SHIFT, PAGE_SIZE);
> > + lguest_devices = (__force void*)ioremap(max_pfn<<PAGE_SHIFT,PAGE_SIZE);
>
> Er... What's being done here? What are we mapping and what's going to
> be accessed when we dereference that sucker?

This, too, is now wrapped in lguest_map() in the updated patch.

Guest virtual device memory can be directly accessed, but Jeff Garzik
and Christoph Hellwig made good points about not encouraging such
abnormal use of ioremap in drivers.

BTW, I've done a new rollup patch of lguest against 2.6.21, if you would
like to review the whole thing. This version includes the big code
documentation patch not yet in -mm.

http://lguest.ozlabs.org/lguest-2.6.21-290.patch.gz

Thanks,
Rusty.