Hi,
So here finally is, Jesse!
I've cooked this set against linux-2.6 linus's tree (7638d532). Please point
me out if I need to apply to some other tree.
Thank you,
Tiago
Tiago Vignatti (2):
vga: implements VGA arbitration on Linux
vga: drops a documentation regarding the VGA arbiter
Documentation/vgaarbiter.txt | 197 ++++++++
drivers/gpu/Makefile | 2 +-
drivers/gpu/vga/Kconfig | 8 +
drivers/gpu/vga/Makefile | 1 +
drivers/gpu/vga/vgaarb.c | 1120 ++++++++++++++++++++++++++++++++++++++++++
drivers/gpu/vga/vgaarb.h | 168 +++++++
drivers/video/Kconfig | 2 +
7 files changed, 1497 insertions(+), 1 deletions(-)
create mode 100644 Documentation/vgaarbiter.txt
create mode 100644 drivers/gpu/vga/Kconfig
create mode 100644 drivers/gpu/vga/Makefile
create mode 100644 drivers/gpu/vga/vgaarb.c
create mode 100644 drivers/gpu/vga/vgaarb.h
Signed-off-by: Tiago Vignatti <[email protected]>
---
drivers/gpu/Makefile | 2 +-
drivers/gpu/vga/Kconfig | 8 +
drivers/gpu/vga/Makefile | 1 +
drivers/gpu/vga/vgaarb.c | 1120 ++++++++++++++++++++++++++++++++++++++++++++++
drivers/gpu/vga/vgaarb.h | 168 +++++++
drivers/video/Kconfig | 2 +
6 files changed, 1300 insertions(+), 1 deletions(-)
create mode 100644 drivers/gpu/vga/Kconfig
create mode 100644 drivers/gpu/vga/Makefile
create mode 100644 drivers/gpu/vga/vgaarb.c
create mode 100644 drivers/gpu/vga/vgaarb.h
diff --git a/drivers/gpu/Makefile b/drivers/gpu/Makefile
index de566cf..30879df 100644
--- a/drivers/gpu/Makefile
+++ b/drivers/gpu/Makefile
@@ -1 +1 @@
-obj-y += drm/
+obj-y += drm/ vga/
diff --git a/drivers/gpu/vga/Kconfig b/drivers/gpu/vga/Kconfig
new file mode 100644
index 0000000..06e0ab4
--- /dev/null
+++ b/drivers/gpu/vga/Kconfig
@@ -0,0 +1,8 @@
+config VGA_ARB
+ tristate "VGA arbitration"
+ help
+ Some "legacy" VGA devices implemented on PCI typically have the same
+ hard-decoded addresses as they did on ISA. When multiple PCI devices
+ are accessed at same time they need some kind of coordination. Please
+ see Documentation/vgaarbiter.txt for more details. Select this to
+ enable VGA arbiter.
diff --git a/drivers/gpu/vga/Makefile b/drivers/gpu/vga/Makefile
new file mode 100644
index 0000000..7cc8c1e
--- /dev/null
+++ b/drivers/gpu/vga/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_VGA_ARB) += vgaarb.o
diff --git a/drivers/gpu/vga/vgaarb.c b/drivers/gpu/vga/vgaarb.c
new file mode 100644
index 0000000..db89eee
--- /dev/null
+++ b/drivers/gpu/vga/vgaarb.c
@@ -0,0 +1,1120 @@
+/*
+ * vgaarb.c
+ *
+ * (C) Copyright 2005 Benjamin Herrenschmidt <[email protected]>
+ * (C) Copyright 2007 Paulo R. Zanoni <[email protected]>
+ * (C) Copyright 2007, 2009 Tiago Vignatti <[email protected]>
+ *
+ * Implements the VGA arbitration. For details refer to
+ * Documentation/vgaarbiter.txt
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/spinlock.h>
+#include <linux/poll.h>
+#include <linux/miscdevice.h>
+
+#include <linux/uaccess.h>
+
+#include "vgaarb.h"
+
+/*
+ * We keep a list of all vga devices in the system to speed
+ * up the various operations of the arbiter
+ */
+struct vga_device {
+ struct list_head list;
+ struct pci_dev *pdev;
+ unsigned int decodes; /* what does it decodes */
+ unsigned int owns; /* what does it owns */
+ unsigned int locks; /* what does it locks */
+ unsigned int io_lock_cnt; /* legacy IO lock count */
+ unsigned int mem_lock_cnt; /* legacy MEM lock count */
+ unsigned int io_norm_cnt; /* normal IO count */
+ unsigned int mem_norm_cnt; /* normal MEM count */
+};
+
+static LIST_HEAD(vga_list);
+static DEFINE_SPINLOCK(vga_lock);
+static DECLARE_WAIT_QUEUE_HEAD(vga_wait_queue);
+
+#ifndef __ARCH_HAS_VGA_DEFAULT_DEVICE
+static struct pci_dev *vga_default;
+#endif
+
+static void vga_arb_device_card_gone(struct pci_dev *pdev);
+
+/* Find somebody in our list */
+static struct vga_device *vgadev_find(struct pci_dev *pdev)
+{
+ struct vga_device *vgadev;
+
+ list_for_each_entry(vgadev, &vga_list, list)
+ if (pdev == vgadev->pdev)
+ return vgadev;
+ return NULL;
+}
+
+/* Returns the default VGA device (vgacon's babe) */
+#ifndef __ARCH_HAS_VGA_DEFAULT_DEVICE
+struct pci_dev *vga_default_device(void)
+{
+ return vga_default;
+}
+#endif
+
+/* Architecture can override enabling/disabling of a given
+ * device resources here
+ *
+ * TODO: <benh> you can try to be "smart" and if for example you have 2 cards
+ * that are behind separate bridges, you need only swapping the bridge controls
+ * and you can keep the card master IO and memory enable.
+ */
+#ifndef __ARCH_HAS_VGA_DISABLE_RESOURCES
+static inline void vga_disable_resources(struct pci_dev *pdev,
+ unsigned int rsrc,
+ unsigned int change_bridge)
+{
+ struct pci_bus *bus;
+ struct pci_dev *bridge;
+ u16 cmd;
+
+#ifdef DEBUG
+ printk(KERN_DEBUG "%s\n", __func__);
+#endif
+ pci_read_config_word(pdev, PCI_COMMAND, &cmd);
+ if (rsrc & (VGA_RSRC_LEGACY_IO | VGA_RSRC_NORMAL_IO))
+ cmd &= ~PCI_COMMAND_IO;
+ if (rsrc & (VGA_RSRC_LEGACY_MEM | VGA_RSRC_NORMAL_MEM))
+ cmd &= ~PCI_COMMAND_MEMORY;
+ pci_write_config_word(pdev, PCI_COMMAND, cmd);
+
+ if (!change_bridge)
+ return;
+
+ bus = pdev->bus;
+ while (bus) {
+ bridge = bus->self;
+ if (bridge) {
+ pci_read_config_word(bridge, PCI_BRIDGE_CONTROL,
+ &cmd);
+ if (cmd & PCI_BRIDGE_CTL_VGA) {
+ cmd &= ~PCI_BRIDGE_CTL_VGA;
+ pci_write_config_word(bridge,
+ PCI_BRIDGE_CONTROL,
+ cmd);
+ }
+ }
+ bus = bus->parent;
+ }
+}
+#endif
+
+#ifndef __ARCH_HAS_VGA_ENABLE_RESOURCES
+static inline void vga_enable_resources(struct pci_dev *pdev,
+ unsigned int rsrc)
+{
+ struct pci_bus *bus;
+ struct pci_dev *bridge;
+ u16 cmd;
+
+#ifdef DEBUG
+ printk(KERN_DEBUG "%s\n", __func__);
+#endif
+ pci_read_config_word(pdev, PCI_COMMAND, &cmd);
+ if (rsrc & (VGA_RSRC_LEGACY_IO | VGA_RSRC_NORMAL_IO))
+ cmd |= PCI_COMMAND_IO;
+ if (rsrc & (VGA_RSRC_LEGACY_MEM | VGA_RSRC_NORMAL_MEM))
+ cmd |= PCI_COMMAND_MEMORY;
+ pci_write_config_word(pdev, PCI_COMMAND, cmd);
+
+ if (!(rsrc & VGA_RSRC_LEGACY_MASK))
+ return;
+
+ bus = pdev->bus;
+ while (bus) {
+ bridge = bus->self;
+ if (bridge) {
+ pci_read_config_word(bridge, PCI_BRIDGE_CONTROL,
+ &cmd);
+ if (!(cmd & PCI_BRIDGE_CTL_VGA)) {
+ cmd |= PCI_BRIDGE_CTL_VGA;
+ pci_write_config_word(bridge,
+ PCI_BRIDGE_CONTROL,
+ cmd);
+ }
+ }
+ bus = bus->parent;
+ }
+}
+#endif
+
+static struct vga_device *__vga_tryget(struct vga_device *vgadev,
+ unsigned int rsrc)
+{
+ unsigned int wants, legacy_wants, match;
+ struct vga_device *conflict;
+
+ /* Account for "normal" resources to lock. If we decode the legacy,
+ * counterpart, we need to request it as well
+ */
+ if ((rsrc & VGA_RSRC_NORMAL_IO) &&
+ (vgadev->decodes & VGA_RSRC_LEGACY_IO))
+ rsrc |= VGA_RSRC_LEGACY_IO;
+ if ((rsrc & VGA_RSRC_NORMAL_MEM) &&
+ (vgadev->decodes & VGA_RSRC_LEGACY_MEM))
+ rsrc |= VGA_RSRC_LEGACY_MEM;
+
+#ifdef DEBUG
+ printk(KERN_DEBUG "%s: %d\n", __func__, rsrc);
+ printk(KERN_DEBUG "%s: owns: %d\n", __func__, vgadev->owns);
+#endif
+
+ /* Check what resources we need to acquire */
+ wants = rsrc & ~vgadev->owns;
+
+ /* We already own everything, just mark locked & bye bye */
+ if (wants == 0)
+ goto lock_them;
+
+ /* We don't need to request a legacy resource, we just enable
+ * appropriate decoding and go
+ */
+ legacy_wants = wants & VGA_RSRC_LEGACY_MASK;
+ if (legacy_wants == 0)
+ goto enable_them;
+
+ /* Ok, we don't, let's find out how we need to kick off */
+ list_for_each_entry(conflict, &vga_list, list) {
+ unsigned int lwants = legacy_wants;
+ unsigned int change_bridge = 0;
+
+ /* Don't conflict with myself */
+ if (vgadev == conflict)
+ continue;
+
+ /* Check if the architecture allows a conflict between those
+ * 2 devices or if they are on separate domains
+ */
+ if (!vga_conflicts(vgadev->pdev, conflict->pdev))
+ continue;
+
+ /* We have a possible conflict. before we go further, we must
+ * check if we sit on the same bus as the conflicting device.
+ * if we don't, then we must tie both IO and MEM resources
+ * together since there is only a single bit controlling
+ * VGA forwarding on P2P bridges
+ */
+ if (vgadev->pdev->bus != conflict->pdev->bus) {
+ change_bridge = 1;
+ lwants = VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM;
+ }
+
+ /* Check if the guy has a lock on the resource. If he does,
+ * return the conflicting entry
+ */
+ if (conflict->locks & lwants)
+ return conflict;
+
+ /* Ok, now check if he owns the resource we want. We don't need
+ * to check "decodes" since it should be impossible to own
+ * own legacy resources you don't decode unless I have a bug
+ * in this code...
+ */
+ WARN_ON(conflict->owns & ~conflict->decodes);
+ match = lwants & conflict->owns;
+ if (!match)
+ continue;
+
+ /* looks like he doesn't have a lock, we can steal
+ * them from him
+ */
+ vga_disable_resources(conflict->pdev, lwants,
+ change_bridge);
+ conflict->owns &= ~lwants;
+ /* If he also owned non-legacy, that is no longer the case */
+ if (lwants & VGA_RSRC_LEGACY_MEM)
+ conflict->owns &= ~VGA_RSRC_NORMAL_MEM;
+ if (lwants & VGA_RSRC_LEGACY_IO)
+ conflict->owns &= ~VGA_RSRC_NORMAL_IO;
+ }
+
+enable_them:
+ /* ok dude, we got it, everybody conflicting has been disabled, let's
+ * enable us. Make sure we don't mark a bit in "owns" that we don't
+ * also have in "decodes". We can lock resources we don't decode but
+ * not own them.
+ */
+ vga_enable_resources(vgadev->pdev, wants);
+ vgadev->owns |= (wants & vgadev->decodes);
+lock_them:
+ vgadev->locks |= (rsrc & VGA_RSRC_LEGACY_MASK);
+ if (rsrc & VGA_RSRC_LEGACY_IO)
+ vgadev->io_lock_cnt++;
+ if (rsrc & VGA_RSRC_LEGACY_MEM)
+ vgadev->mem_lock_cnt++;
+ if (rsrc & VGA_RSRC_NORMAL_IO)
+ vgadev->io_norm_cnt++;
+ if (rsrc & VGA_RSRC_NORMAL_MEM)
+ vgadev->mem_norm_cnt++;
+
+ return NULL;
+}
+
+static void __vga_put(struct vga_device *vgadev, unsigned int rsrc)
+{
+ unsigned int old_locks = vgadev->locks;
+
+#ifdef DEBUG
+ printk(KERN_DEBUG "%s\n", __func__);
+#endif
+
+ /* Update our counters, and account for equivalent legacy resources
+ * if we decode them
+ */
+ if ((rsrc & VGA_RSRC_NORMAL_IO) && vgadev->io_norm_cnt > 0) {
+ vgadev->io_norm_cnt--;
+ if (vgadev->decodes & VGA_RSRC_LEGACY_IO)
+ rsrc |= VGA_RSRC_LEGACY_IO;
+ }
+ if ((rsrc & VGA_RSRC_NORMAL_MEM) && vgadev->mem_norm_cnt > 0) {
+ vgadev->mem_norm_cnt--;
+ if (vgadev->decodes & VGA_RSRC_LEGACY_MEM)
+ rsrc |= VGA_RSRC_LEGACY_MEM;
+ }
+ if ((rsrc & VGA_RSRC_LEGACY_IO) && vgadev->io_lock_cnt > 0)
+ vgadev->io_lock_cnt--;
+ if ((rsrc & VGA_RSRC_LEGACY_MEM) && vgadev->mem_lock_cnt > 0)
+ vgadev->mem_lock_cnt--;
+
+ /* Just clear lock bits, we do lazy operations so we don't really
+ * have to bother about anything else at this point
+ */
+ if (vgadev->io_lock_cnt == 0)
+ vgadev->locks &= ~VGA_RSRC_LEGACY_IO;
+ if (vgadev->mem_lock_cnt == 0)
+ vgadev->locks &= ~VGA_RSRC_LEGACY_MEM;
+
+ /* Kick the wait queue in case somebody was waiting if we actually
+ * released something
+ */
+ if (old_locks != vgadev->locks)
+ wake_up_all(&vga_wait_queue);
+}
+
+int vga_get(struct pci_dev *pdev, unsigned int rsrc, int interruptible)
+{
+ struct vga_device *vgadev, *conflict;
+ unsigned long flags;
+ wait_queue_t wait;
+ int rc = 0;
+
+ /* The one who calls us should check for this, but lets be sure... */
+ if (pdev == NULL)
+ pdev = vga_default_device();
+ if (pdev == NULL)
+ return 0;
+
+ for (;;) {
+ spin_lock_irqsave(&vga_lock, flags);
+ vgadev = vgadev_find(pdev);
+ if (vgadev == NULL) {
+ spin_unlock_irqrestore(&vga_lock, flags);
+ rc = -ENODEV;
+ break;
+ }
+ conflict = __vga_tryget(vgadev, rsrc);
+ spin_unlock_irqrestore(&vga_lock, flags);
+ if (conflict == NULL)
+ break;
+
+
+ /* We have a conflict, we wait until somebody kicks the
+ * work queue. Currently we have one work queue that we
+ * kick each time some resources are released, but it would
+ * be fairly easy to have a per device one so that we only
+ * need to attach to the conflicting device
+ */
+ init_waitqueue_entry(&wait, current);
+ add_wait_queue(&vga_wait_queue, &wait);
+ set_current_state(interruptible ?
+ TASK_INTERRUPTIBLE :
+ TASK_UNINTERRUPTIBLE);
+ if (signal_pending(current)) {
+ rc = -EINTR;
+ break;
+ }
+ schedule();
+ remove_wait_queue(&vga_wait_queue, &wait);
+ set_current_state(TASK_RUNNING);
+ }
+ return rc;
+}
+
+int vga_tryget(struct pci_dev *pdev, unsigned int rsrc)
+{
+ struct vga_device *vgadev;
+ unsigned long flags;
+ int rc = 0;
+
+ /* The one who calls us should check for this, but lets be sure... */
+ if (pdev == NULL)
+ pdev = vga_default_device();
+ if (pdev == NULL)
+ return 0;
+ spin_lock_irqsave(&vga_lock, flags);
+ vgadev = vgadev_find(pdev);
+ if (vgadev == NULL) {
+ rc = -ENODEV;
+ goto bail;
+ }
+ if (__vga_tryget(vgadev, rsrc))
+ rc = -EBUSY;
+bail:
+ spin_unlock_irqrestore(&vga_lock, flags);
+ return rc;
+}
+
+void vga_put(struct pci_dev *pdev, unsigned int rsrc)
+{
+ struct vga_device *vgadev;
+ unsigned long flags;
+
+ /* The one who calls us should check for this, but lets be sure... */
+ if (pdev == NULL)
+ pdev = vga_default_device();
+ if (pdev == NULL)
+ return;
+ spin_lock_irqsave(&vga_lock, flags);
+ vgadev = vgadev_find(pdev);
+ if (vgadev == NULL)
+ goto bail;
+ __vga_put(vgadev, rsrc);
+bail:
+ spin_unlock_irqrestore(&vga_lock, flags);
+}
+
+/*
+ * Currently, we assume that the "initial" setup of the system is
+ * not sane, that is we come up with conflicting devices and let
+ * the arbiter's client decides if devices decodes or not legacy
+ * things.
+ */
+static void vga_arbiter_add_pci_device(struct pci_dev *pdev)
+{
+ struct vga_device *vgadev;
+ unsigned long flags;
+ struct pci_bus *bus;
+ struct pci_dev *bridge;
+ u16 cmd;
+
+ /* Only deal with VGA class devices */
+ if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA)
+ return;
+
+ /* Allocate structure */
+ vgadev = kmalloc(sizeof(struct vga_device), GFP_KERNEL);
+ if (vgadev == NULL) {
+ /* What to do on allocation failure ? For now, let's
+ * just do nothing, I'm not sure there is anything saner
+ * to be done
+ */
+ return;
+ }
+
+ memset(vgadev, 0, sizeof(*vgadev));
+
+ /* Take lock & check for duplicates */
+ spin_lock_irqsave(&vga_lock, flags);
+ if (vgadev_find(pdev) != NULL) {
+ BUG_ON(1);
+ goto fail;
+ }
+ vgadev->pdev = pdev;
+
+ /* By default, assume we decode everything */
+ vgadev->decodes = VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
+ VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
+
+ /* Mark that we "own" resources based on our enables, we will
+ * clear that below if the bridge isn't forwarding
+ */
+ pci_read_config_word(pdev, PCI_COMMAND, &cmd);
+ if (cmd & PCI_COMMAND_IO)
+ vgadev->owns |= VGA_RSRC_LEGACY_IO;
+ if (cmd & PCI_COMMAND_MEMORY)
+ vgadev->owns |= VGA_RSRC_LEGACY_MEM;
+
+ /* Check if VGA cycles can get down to us */
+ bus = pdev->bus;
+ while (bus) {
+ bridge = bus->self;
+ if (bridge) {
+ u16 l;
+ pci_read_config_word(bridge, PCI_BRIDGE_CONTROL,
+ &l);
+ if (!(l & PCI_BRIDGE_CTL_VGA)) {
+ vgadev->owns = 0;
+ break;
+ }
+ }
+ bus = bus->parent;
+ }
+
+ /* Deal with VGA default device. Use first enabled one
+ * by default if arch doesn't have it's own hook
+ */
+#ifndef __ARCH_HAS_VGA_DEFAULT_DEVICE
+ if (vga_default == NULL && (vgadev->owns & VGA_RSRC_LEGACY_MEM) &&
+ (vgadev->owns & VGA_RSRC_LEGACY_IO))
+ vga_default = pdev;
+
+#endif
+
+ /* Add to the list */
+ list_add(&vgadev->list, &vga_list);
+ printk(KERN_INFO "VGA Arbiter: device added: %d %d %d\n",
+ pdev->device, pdev->vendor, pdev->devfn);
+ spin_unlock_irqrestore(&vga_lock, flags);
+ return;
+fail:
+ spin_unlock_irqrestore(&vga_lock, flags);
+ kfree(vgadev);
+}
+
+static void vga_arbiter_del_pci_device(struct pci_dev *pdev)
+{
+ struct vga_device *vgadev;
+ unsigned long flags;
+
+ spin_lock_irqsave(&vga_lock, flags);
+ vgadev = vgadev_find(pdev);
+ if (vgadev == NULL)
+ goto bail;
+
+ /* Remove entry from list */
+ list_del(&vgadev->list);
+
+ /* Notify userland driver that the device is gone so it discards
+ * it's copies of the pci_dev pointer
+ */
+ vga_arb_device_card_gone(pdev);
+
+ /* Wake up all possible waiters */
+ wake_up_all(&vga_wait_queue);
+bail:
+ spin_unlock_irqrestore(&vga_lock, flags);
+ kfree(vgadev);
+}
+
+void vga_set_legacy_decoding(struct pci_dev *pdev, unsigned int decodes)
+{
+ struct vga_device *vgadev;
+ unsigned long flags;
+
+ decodes &= VGA_RSRC_LEGACY_MASK;
+
+ spin_lock_irqsave(&vga_lock, flags);
+ vgadev = vgadev_find(pdev);
+ if (vgadev == NULL)
+ goto bail;
+
+ vgadev->decodes = decodes;
+ vgadev->owns &= decodes;
+
+ /* XXX if somebody is going from "doesn't decode" to "decodes" state
+ * here, additional care must be taken as we may have pending owner
+ * ship of non-legacy region ...
+ */
+bail:
+ spin_unlock_irqrestore(&vga_lock, flags);
+}
+
+
+/*
+ * Char driver implementation
+ *
+ * Semantics is:
+ *
+ * open : open user instance of the arbitrer. by default, it's
+ * attached to the default VGA device of the system.
+ *
+ * close : close user instance, release locks
+ *
+ * read : return a string indicating the status of the target.
+ * an IO state string is of the form {io,mem,io+mem,none},
+ * mc and ic are respectively mem and io lock counts (for
+ * debugging/diagnostic only). "decodes" indicate what the
+ * card currently decodes, "owns" indicates what is currently
+ * enabled on it, and "locks" indicates what is locked by this
+ * card. If the card is unplugged, we get "invalid" then for
+ * card_ID and an -ENODEV error is returned for any command
+ * until a new card is targeted
+ *
+ * "<card_ID>,decodes=<io_state>,owns=<io_state>,locks=<io_state> (ic,mc)"
+ *
+ * write : write a command to the arbiter. List of commands is:
+ *
+ * target <card_ID> : switch target to card <card_ID> (see below)
+ * lock <io_state> : acquires locks on target ("none" is invalid io_state)
+ * trylock <io_state> : non-blocking acquire locks on target
+ * unlock <io_state> : release locks on target
+ * unlock all : release all locks on target held by this user
+ * decodes <io_state> : set the legacy decoding attributes for the card
+ *
+ * poll : event if something change on any card (not just the target)
+ *
+ * card_ID is of the form "PCI:domain:bus:dev.fn". It can be set to "default"
+ * to go back to the system default card (TODO: not implemented yet).
+ * Currently, only PCI is supported as a prefix, but the userland API may
+ * support other bus types in the future, even if the current kernel
+ * implementation doesn't.
+ *
+ * Note about locks:
+ *
+ * The driver keeps track of which user has what locks on which card. It
+ * supports stacking, like the kernel one. This complexifies the implementation
+ * a bit, but makes the arbiter more tolerant to userspace problems and able
+ * to properly cleanup in all cases when a process dies.
+ * Currently, a max of 16 cards simultaneously can have locks issued from
+ * userspace for a given user (file descriptor instance) of the arbiter.
+ *
+ * If the device is hot-unplugged, there is a hook inside the module to notify
+ * they being added/removed in the system and automatically added/removed in
+ * the arbiter.
+ */
+
+#define MAX_USER_CARDS 16
+#define PCI_INVALID_CARD ((struct pci_dev *)-1UL)
+
+/*
+ * Each user has an array of these, tracking which cards have locks
+ */
+struct vga_arb_user_card {
+ struct pci_dev *pdev;
+ unsigned int mem_cnt;
+ unsigned int io_cnt;
+};
+
+struct vga_arb_private {
+ struct list_head list;
+ struct pci_dev *target;
+ struct vga_arb_user_card cards[MAX_USER_CARDS];
+ spinlock_t lock;
+};
+
+static LIST_HEAD(vga_user_list);
+static DEFINE_SPINLOCK(vga_user_lock);
+
+static const char *vga_iostate_to_str(unsigned int iostate)
+{
+ /* Ignore VGA_RSRC_IO and VGA_RSRC_MEM */
+ iostate &= VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM;
+ switch (iostate) {
+ case VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM:
+ return "io+mem";
+ case VGA_RSRC_LEGACY_IO:
+ return "io";
+ case VGA_RSRC_LEGACY_MEM:
+ return "mem";
+ }
+ return "none";
+}
+
+static int vga_str_to_iostate(char *buf, int str_size, int *io_state)
+{
+ /* XXX We're not chekcing the str_size! */
+ if (strncmp(buf, "io+mem", 6) == 0)
+ *io_state = VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM;
+ else if (strncmp(buf, "io", 2) == 0)
+ *io_state = VGA_RSRC_LEGACY_IO;
+ else if (strncmp(buf, "mem", 3) == 0)
+ *io_state = VGA_RSRC_LEGACY_MEM;
+ else if (strncmp(buf, "none", 4) == 0)
+ *io_state = VGA_RSRC_NONE;
+ else
+ return 0;
+ return 1;
+}
+
+/*
+ * This function gets a string in the format: "PCI:domain:bus:dev.fn" and
+ * returns the respective values. If the string is not in this format,
+ * it returns 0.
+ */
+static int vga_pci_str_to_vars(char *buf, int count, unsigned int *domain,
+ unsigned int *bus, unsigned int *devfn)
+{
+ int n;
+ unsigned int slot, func;
+
+ n = sscanf(buf, "PCI:%d:%d:%d.%d", domain, bus, &slot, &func);
+ if (n != 4)
+ return 0;
+
+ *devfn = PCI_DEVFN(slot, func);
+
+ return 1;
+}
+
+static ssize_t vga_arb_read(struct file *file, char __user * buf,
+ size_t count, loff_t *ppos)
+{
+ struct vga_arb_private *priv = file->private_data;
+ struct vga_device *vgadev;
+ struct pci_dev *pdev;
+ unsigned long flags;
+ size_t len;
+ int rc;
+ char *lbuf;
+
+#ifdef DEBUG
+ printk(KERN_DEBUG "vga_arb_read()\n");
+#endif
+ lbuf = kmalloc(1024, GFP_KERNEL);
+ if (lbuf == NULL)
+ return -ENOMEM;
+
+ /* Shields against vga_arb_device_card_gone (pci_dev going
+ * away), and allows access to vga list
+ */
+ spin_lock_irqsave(&vga_lock, flags);
+
+ /* If we are targetting the default, use it */
+ pdev = priv->target;
+ if (pdev == NULL || pdev == PCI_INVALID_CARD) {
+ spin_unlock_irqrestore(&vga_lock, flags);
+ len = sprintf(lbuf, "invalid");
+ goto done;
+ }
+
+ /* Find card vgadev structure */
+ vgadev = vgadev_find(pdev);
+ if (vgadev == NULL) {
+ /* Wow, it's not in the list, that shouldn't happen,
+ * let's fix us up and return invalid card
+ */
+ if (pdev == priv->target)
+ vga_arb_device_card_gone(pdev);
+ spin_unlock_irqrestore(&vga_lock, flags);
+ len = sprintf(lbuf, "invalid");
+ goto done;
+ }
+
+ /* Fill the buffer with infos */
+ len = snprintf(lbuf, 1024,
+ "PCI:%s,decodes=%s,owns=%s,locks=%s (%d,%d)\n",
+ pci_name(pdev),
+ vga_iostate_to_str(vgadev->decodes),
+ vga_iostate_to_str(vgadev->owns),
+ vga_iostate_to_str(vgadev->locks),
+ vgadev->io_lock_cnt, vgadev->mem_lock_cnt);
+
+ spin_unlock_irqrestore(&vga_lock, flags);
+done:
+
+ /* Copy that to user */
+ if (len > count)
+ len = count;
+ rc = copy_to_user(buf, lbuf, len);
+ kfree(lbuf);
+ if (rc)
+ return -EFAULT;
+ return len;
+}
+
+/*
+ * TODO: To avoid parsing inside kernel and to improve the speed we may
+ * consider use ioctl here
+ */
+static ssize_t vga_arb_write(struct file *file, const char __user * buf,
+ size_t count, loff_t *ppos)
+{
+ struct vga_arb_private *priv = file->private_data;
+ struct pci_dev *pdev;
+
+ unsigned int io_state;
+
+ char *kbuf, *curr_pos;
+ size_t remaining = count;
+
+ int ret_val;
+ int i;
+
+
+ kbuf = kmalloc(count + 1, GFP_KERNEL);
+ if (!kbuf)
+ return -ENOMEM;
+
+ if (copy_from_user(kbuf, buf, count)) {
+ kfree(kbuf);
+ return -EFAULT;
+ }
+ curr_pos = kbuf;
+ kbuf[count] = '\0'; /* Just to make sure... */
+
+ if (strncmp(curr_pos, "lock ", 5) == 0) {
+ curr_pos += 5;
+ remaining -= 5;
+#ifdef DEBUG
+ printk(KERN_DEBUG "client 0x%X called 'lock'\n", (int)priv);
+#endif
+
+ if (!vga_str_to_iostate(curr_pos, remaining, &io_state)) {
+ ret_val = -EPROTO;
+ goto done;
+ }
+ if (io_state == VGA_RSRC_NONE) {
+ ret_val = -EPROTO;
+ goto done;
+ }
+
+ pdev = priv->target;
+ if (priv->target == NULL) {
+ ret_val = -ENODEV;
+ goto done;
+ }
+
+ vga_get_uninterruptible(pdev, io_state);
+
+ /* Update the client's locks lists... */
+ for (i = 0; i < MAX_USER_CARDS; i++) {
+ if (priv->cards[i].pdev == pdev) {
+ if (io_state & VGA_RSRC_LEGACY_IO)
+ priv->cards[i].io_cnt++;
+ if (io_state & VGA_RSRC_LEGACY_MEM)
+ priv->cards[i].mem_cnt++;
+ break;
+ }
+ }
+
+ ret_val = count;
+ goto done;
+ } else if (strncmp(curr_pos, "unlock ", 7) == 0) {
+ curr_pos += 7;
+ remaining -= 7;
+#ifdef DEBUG
+ printk(KERN_DEBUG "client 0x%X called 'unlock'\n", (int)priv);
+#endif
+
+ if (strncmp(curr_pos, "all", 3) == 0)
+ io_state = VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM;
+ else {
+ if (!vga_str_to_iostate
+ (curr_pos, remaining, &io_state)) {
+ ret_val = -EPROTO;
+ goto done;
+ }
+ /* TODO: Add this?
+ if (io_state == VGA_RSRC_NONE) {
+ ret_val = -EPROTO;
+ goto done;
+ }
+ */
+ }
+
+ pdev = priv->target;
+ if (priv->target == NULL) {
+ ret_val = -ENODEV;
+ goto done;
+ }
+
+ vga_put(pdev, io_state);
+ for (i = 0; i < MAX_USER_CARDS; i++) {
+ if (priv->cards[i].pdev == pdev) {
+ if (io_state & VGA_RSRC_LEGACY_IO)
+ priv->cards[i].io_cnt--;
+ if (io_state & VGA_RSRC_LEGACY_MEM)
+ priv->cards[i].mem_cnt--;
+ break;
+ }
+ }
+
+ ret_val = count;
+ goto done;
+ } else if (strncmp(curr_pos, "trylock ", 8) == 0) {
+ curr_pos += 8;
+ remaining -= 8;
+
+#ifdef DEBUG
+ printk(KERN_DEBUG "client 0x%X called 'trylock'\n", (int)priv);
+#endif
+ if (!vga_str_to_iostate(curr_pos, remaining, &io_state)) {
+ ret_val = -EPROTO;
+ goto done;
+ }
+ /* TODO: Add this?
+ if (io_state == VGA_RSRC_NONE) {
+ ret_val = -EPROTO;
+ goto done;
+ }
+ */
+
+ pdev = priv->target;
+ if (priv->target == NULL) {
+ ret_val = -ENODEV;
+ goto done;
+ }
+
+ if (vga_tryget(pdev, io_state)) {
+ /* Update the client's locks lists... */
+ for (i = 0; i < MAX_USER_CARDS; i++) {
+ if (priv->cards[i].pdev == pdev) {
+ if (io_state & VGA_RSRC_LEGACY_IO)
+ priv->cards[i].io_cnt++;
+ if (io_state & VGA_RSRC_LEGACY_MEM)
+ priv->cards[i].mem_cnt++;
+ break;
+ }
+ }
+ ret_val = count;
+ goto done;
+ } else {
+ ret_val = -EBUSY;
+ goto done;
+ }
+
+ } else if (strncmp(curr_pos, "target ", 7) == 0) {
+ unsigned int domain, bus, devfn;
+ struct vga_device *vgadev;
+
+ curr_pos += 7;
+ remaining -= 7;
+#ifdef DEBUG
+ printk(KERN_DEBUG "client 0x%X called 'target'\n", (int)priv);
+#endif
+ if (!vga_pci_str_to_vars(curr_pos, remaining,
+ &domain, &bus, &devfn)) {
+ ret_val = -EPROTO;
+ goto done;
+ }
+
+ pdev = pci_get_bus_and_slot(bus, devfn);
+ if (!pdev) {
+ printk(KERN_INFO "Invalid pci address!\n");
+ ret_val = -ENODEV;
+ goto done;
+ }
+
+ vgadev = vgadev_find(pdev);
+ if (vgadev == NULL) {
+ printk(KERN_INFO "This PCI is not a vga device\n");
+ ret_val = -ENODEV;
+ goto done;
+ }
+
+ priv->target = pdev;
+ for (i = 0; i < MAX_USER_CARDS; i++) {
+ if (priv->cards[i].pdev == pdev)
+ break;
+ if (priv->cards[i].pdev == NULL) {
+ priv->cards[i].pdev = pdev;
+ priv->cards[i].io_cnt = 0;
+ priv->cards[i].mem_cnt = 0;
+ break;
+ }
+ }
+ if (i == MAX_USER_CARDS) {
+ printk(KERN_ERR "Maximum user cards number reached!\n");
+ /* XXX: which value to return? */
+ ret_val = -ENOMEM;
+ goto done;
+ }
+
+ ret_val = count;
+ goto done;
+
+
+ } else if (strncmp(curr_pos, "decodes ", 8) == 0) {
+ curr_pos += 8;
+ remaining -= 8;
+#ifdef DEBUG
+ printk(KERN_DEBUG "client 0x%X called 'decodes'\n", (int)priv);
+#endif
+
+ if (!vga_str_to_iostate(curr_pos, remaining, &io_state)) {
+ ret_val = -EPROTO;
+ goto done;
+ }
+ pdev = priv->target;
+ if (priv->target == NULL) {
+ ret_val = -ENODEV;
+ goto done;
+ }
+
+ vga_set_legacy_decoding(pdev, io_state);
+ ret_val = count;
+ goto done;
+ }
+ /* If we got here, the message written is not part of the protocol! */
+ kfree(kbuf);
+ return -EPROTO;
+
+done:
+ kfree(kbuf);
+ return ret_val;
+}
+
+static unsigned int vga_arb_fpoll(struct file *file, poll_table * wait)
+{
+ struct vga_arb_private *priv = file->private_data;
+
+#ifdef DEBUG
+ printk(KERN_DEBUG "vga_arb_fpoll()\n");
+#endif
+
+ if (priv == NULL)
+ return -ENODEV;
+ poll_wait(file, &vga_wait_queue, wait);
+ return POLLIN;
+}
+
+static int vga_arb_open(struct inode *inode, struct file *file)
+{
+ struct vga_arb_private *priv;
+ unsigned long flags;
+
+#ifdef DEBUG
+ printk(KERN_DEBUG "vga_arb_open()\n");
+#endif
+ priv = kmalloc(sizeof(struct vga_arb_private), GFP_KERNEL);
+ if (priv == NULL)
+ return -ENOMEM;
+ memset(priv, 0, sizeof(*priv));
+ spin_lock_init(&priv->lock);
+ file->private_data = priv;
+
+ spin_lock_irqsave(&vga_user_lock, flags);
+ list_add(&priv->list, &vga_user_list);
+ spin_unlock_irqrestore(&vga_user_lock, flags);
+
+ /* Set the client' lists of locks */
+ priv->target = vga_default_device(); /* Maybe this is still null! */
+ priv->cards[0].pdev = priv->target;
+ priv->cards[0].io_cnt = 0;
+ priv->cards[0].mem_cnt = 0;
+
+
+ return 0;
+}
+
+static int vga_arb_release(struct inode *inode, struct file *file)
+{
+ struct vga_arb_private *priv = file->private_data;
+ struct vga_arb_user_card *uc;
+ unsigned long flags;
+ int i;
+
+#ifdef DEBUG
+ printk(KERN_DEBUG "vga_arb_release()\n");
+#endif
+ if (priv == NULL)
+ return -ENODEV;
+
+ spin_lock_irqsave(&vga_user_lock, flags);
+ list_del(&priv->list);
+ for (i = 0; i < MAX_USER_CARDS; i++) {
+ uc = &priv->cards[i];
+ if (uc->pdev == NULL)
+ continue;
+#ifdef DEBUG
+ printk(KERN_DEBUG "uc->io_cnt == %d, uc->mem_cnt == %d\n",
+ uc->io_cnt, uc->mem_cnt);
+#endif
+ while (uc->io_cnt--)
+ vga_put(uc->pdev, VGA_RSRC_LEGACY_IO);
+ while (uc->mem_cnt--)
+ vga_put(uc->pdev, VGA_RSRC_LEGACY_MEM);
+ }
+ spin_unlock_irqrestore(&vga_user_lock, flags);
+
+ kfree(priv);
+
+ return 0;
+}
+
+static void vga_arb_device_card_gone(struct pci_dev *pdev)
+{
+}
+
+static int pci_notify(struct notifier_block *nb, unsigned long action,
+ void *data)
+{
+ struct device *dev = data;
+ struct pci_dev *pdev = to_pci_dev(dev);
+
+#ifdef DEBUG
+ printk(KERN_DEBUG "pci_notify()\n");
+#endif
+ /* For now we're only intereted in devices added and removed. I didn't
+ * test this thing here, so someone needs to double check for the
+ * cases of hotplugable vga cards. */
+ if (action == BUS_NOTIFY_ADD_DEVICE)
+ vga_arbiter_add_pci_device(pdev);
+ else if (action == BUS_NOTIFY_DEL_DEVICE)
+ vga_arbiter_del_pci_device(pdev);
+
+ return 0;
+}
+
+static struct notifier_block pci_notifier = {
+ .notifier_call = pci_notify,
+};
+
+static const struct file_operations vga_arb_device_fops = {
+ .read = vga_arb_read,
+ .write = vga_arb_write,
+ .poll = vga_arb_fpoll,
+ .open = vga_arb_open,
+ .release = vga_arb_release,
+};
+
+static struct miscdevice vga_arb_device = {
+ MISC_DYNAMIC_MINOR, "vga_arbiter", &vga_arb_device_fops
+};
+
+static int __init vga_arb_device_init(void)
+{
+ int rc;
+ struct pci_dev *pdev;
+
+ rc = misc_register(&vga_arb_device);
+ if (rc < 0)
+ printk(KERN_ERR
+ "VGA Arbiter: error %d registering device\n", rc);
+
+ bus_register_notifier(&pci_bus_type, &pci_notifier);
+
+ /* We add all pci devices satisfying vga class in the arbiter by
+ * default */
+ pdev = NULL;
+ while ((pdev =
+ pci_get_subsys(PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
+ PCI_ANY_ID, pdev)) != NULL)
+ vga_arbiter_add_pci_device(pdev);
+
+ printk(KERN_INFO "VGA Arbiter: loaded\n");
+ return rc;
+}
+
+static void __exit vga_arb_device_exit(void)
+{
+ misc_deregister(&vga_arb_device);
+#ifdef DEBUG
+ printk(KERN_DEBUG "VGA Arbiter: unloaded\n");
+#endif
+}
+
+MODULE_AUTHOR("Benjamin Herrenschmidt, Paulo R. Zanoni, Tiago Vignatti");
+MODULE_DESCRIPTION("VGA arbiter");
+MODULE_LICENSE("GPL");
+
+module_init(vga_arb_device_init);
+module_exit(vga_arb_device_exit);
diff --git a/drivers/gpu/vga/vgaarb.h b/drivers/gpu/vga/vgaarb.h
new file mode 100644
index 0000000..4dcff9e
--- /dev/null
+++ b/drivers/gpu/vga/vgaarb.h
@@ -0,0 +1,168 @@
+/*
+ * vgaarb.c
+ *
+ * (C) Copyright 2005 Benjamin Herrenschmidt <[email protected]>
+ * (C) Copyright 2007 Paulo R. Zanoni <[email protected]>
+ * (C) Copyright 2007, 2009 Tiago Vignatti <[email protected]>
+ */
+
+#ifndef LINUX_VGA_H
+
+#include <asm/vga.h>
+
+/* Legacy VGA regions */
+#define VGA_RSRC_NONE 0x00
+#define VGA_RSRC_LEGACY_IO 0x01
+#define VGA_RSRC_LEGACY_MEM 0x02
+#define VGA_RSRC_LEGACY_MASK (VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM)
+/* Non-legacy access */
+#define VGA_RSRC_NORMAL_IO 0x04
+#define VGA_RSRC_NORMAL_MEM 0x08
+
+/* Passing that instead of a pci_dev to use the system "default"
+ * device, that is the one used by vgacon. Archs will probably
+ * have to provide their own vga_default_device();
+ */
+#define VGA_DEFAULT_DEVICE (NULL)
+
+/* For use by clients */
+
+/**
+ * vga_set_legacy_decoding
+ *
+ * @pdev: pci device of the VGA card
+ * @decodes: bit mask of what legacy regions the card decodes
+ *
+ * Indicates to the arbiter if the card decodes legacy VGA IOs,
+ * legacy VGA Memory, both, or none. All cards default to both,
+ * the card driver (fbdev for example) should tell the arbiter
+ * if it has disabled legacy decoding, so the card can be left
+ * out of the arbitration process (and can be safe to take
+ * interrupts at any time.
+ */
+extern void vga_set_legacy_decoding(struct pci_dev *pdev,
+ unsigned int decodes);
+
+/**
+ * vga_get - acquire & locks VGA resources
+ *
+ * pdev: pci device of the VGA card or NULL for the system default
+ * rsrc: bit mask of resources to acquire and lock
+ * interruptible: blocking should be interruptible by signals ?
+ *
+ * This function acquires VGA resources for the given
+ * card and mark those resources locked. If the resource requested
+ * are "normal" (and not legacy) resources, the arbiter will first check
+ * wether the card is doing legacy decoding for that type of resource. If
+ * yes, the lock is "converted" into a legacy resource lock.
+ * The arbiter will first look for all VGA cards that might conflict
+ * and disable their IOs and/or Memory access, inlcuding VGA forwarding
+ * on P2P bridges if necessary, so that the requested resources can
+ * be used. Then, the card is marked as locking these resources and
+ * the IO and/or Memory accesse are enabled on the card (including
+ * VGA forwarding on parent P2P bridges if any).
+ * This function will block if some conflicting card is already locking
+ * one of the required resources (or any resource on a different bus
+ * segment, since P2P bridges don't differenciate VGA memory and IO
+ * afaik). You can indicate wether this blocking should be interruptible
+ * by a signal (for userland interface) or not.
+ * Must not be called at interrupt time or in atomic context.
+ * If the card already owns the resources, the function succeeds.
+ * Nested calls are supported (a per-resource counter is maintained)
+ */
+
+extern int vga_get(struct pci_dev *pdev, unsigned int rsrc,
+ int interruptible);
+
+/**
+ * vga_get_interruptible
+ *
+ * Shortcut to vga_get
+ */
+
+static inline int vga_get_interruptible(struct pci_dev *pdev,
+ unsigned int rsrc)
+{
+ return vga_get(pdev, rsrc, 1);
+}
+
+/**
+ * vga_get_interruptible
+ *
+ * Shortcut to vga_get
+ */
+
+static inline int vga_get_uninterruptible(struct pci_dev *pdev,
+ unsigned int rsrc)
+{
+ return vga_get(pdev, rsrc, 0);
+}
+
+/**
+ * vga_tryget - try to acquire & lock legacy VGA resources
+ *
+ * @pdev: pci devivce of VGA card or NULL for system default
+ * @rsrc: bit mask of resources to acquire and lock
+ *
+ * This function performs the same operation as vga_get(), but
+ * will return an error (-EBUSY) instead of blocking if the resources
+ * are already locked by another card. It can be called in any context
+ */
+
+extern int vga_tryget(struct pci_dev *pdev, unsigned int rsrc);
+
+/**
+ * vga_put - release lock on legacy VGA resources
+ *
+ * @pdev: pci device of VGA card or NULL for system default
+ * @rsrc: but mask of resource to release
+ *
+ * This function releases resources previously locked by vga_get()
+ * or vga_tryget(). The resources aren't disabled right away, so
+ * that a subsequence vga_get() on the same card will succeed
+ * immediately. Resources have a counter, so locks are only
+ * released if the counter reaches 0.
+ */
+
+extern void vga_put(struct pci_dev *pdev, unsigned int rsrc);
+
+
+/**
+ * vga_default_device
+ *
+ * This can be defined by the platform. The default implementation
+ * is rather dumb and will probably only work properly on single
+ * vga card setups and/or x86 platforms.
+ *
+ * If your VGA default device is not PCI, you'll have to return
+ * NULL here. In this case, I assume it will not conflict with
+ * any PCI card. If this is not true, I'll have to define two archs
+ * hooks for enabling/disabling the VGA default device if that is
+ * possible. This may be a problem with real _ISA_ VGA cards, in
+ * addition to a PCI one. I don't know at this point how to deal
+ * with that card. Can theirs IOs be disabled at all ? If not, then
+ * I suppose it's a matter of having the proper arch hook telling
+ * us about it, so we basically never allow anybody to succeed a
+ * vga_get()...
+ */
+
+#ifndef __ARCH_HAS_VGA_DEFAULT_DEVICE
+extern struct pci_dev *vga_default_device(void);
+#endif
+
+/**
+ * vga_conflicts
+ *
+ * Architectures should define this if they have several
+ * independant PCI domains that can afford concurrent VGA
+ * decoding
+ */
+
+#ifndef __ARCH_HAS_VGA_CONFLICT
+static inline int vga_conflicts(struct pci_dev *p1, struct pci_dev *p2)
+{
+ return 1;
+}
+#endif
+
+#endif /* LINUX_VGA_H */
diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index 8afcf08..f4ed145 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -7,6 +7,8 @@ menu "Graphics support"
source "drivers/char/agp/Kconfig"
+source "drivers/gpu/vga/Kconfig"
+
source "drivers/gpu/drm/Kconfig"
config VGASTATE
--
1.5.6.3
Signed-off-by: Tiago Vignatti <[email protected]>
---
Documentation/vgaarbiter.txt | 197 ++++++++++++++++++++++++++++++++++++++++++
1 files changed, 197 insertions(+), 0 deletions(-)
create mode 100644 Documentation/vgaarbiter.txt
diff --git a/Documentation/vgaarbiter.txt b/Documentation/vgaarbiter.txt
new file mode 100644
index 0000000..37d3126
--- /dev/null
+++ b/Documentation/vgaarbiter.txt
@@ -0,0 +1,197 @@
+
+VGA Arbiter
+===========
+
+Graphic devices are accessed through ranges in I/O or memory space. While most
+modern devices allow relocation of such ranges, some "Legacy" VGA devices
+implemented on PCI will typically have the same "hard-decoded" addresses as
+they did on ISA. For more details see "PCI Bus Binding to IEEE Std 1275-1994
+Standard for Boot (Initialization Configuration) Firmware Revision 2.1"
+Section 7, Legacy Devices.
+
+The Resource Access Control (RAC) module inside the X server currently does
+the task of arbitration when more than one legacy device co-exists on the same
+machine. But the problem happens when these devices are trying to be accessed
+by different userspace clients (e.g. two server in parallel). Their address
+assignments conflict. Therefore an arbitration scheme _outside_ of the X
+server is needed to control the sharing of these resources. This document
+introduces the operation of the VGA arbiter implemented for Linux kernel.
+
+----------------------------------------------------------------------------
+
+I. Details and Theory of Operation
+ I.1 vgaarb
+ I.2 libpciaccess
+ I.3 xf86VGAArbiter (X server implementation)
+II. Open Issues / Bugs
+III.Credits
+VI. References
+
+
+I. Details and Theory of Operation
+==================================
+
+I.1 vgaarb
+----------
+
+The vgaarb is a module of the Linux Kernel. When it is initially loaded, it
+scans all PCI devices and add the VGA ones inside the arbitration. The arbiter
+then enables/disables the decoding of VGA legacy instructions by different
+devices which are trying to use them on the same machine. The device which
+does not want/need to use the arbiter may explicitly tell it by calling
+vga_arb_decodes() from the VGA library function. Driver must take a special
+care here.
+
+Basically the kernel exports a char device interface (/dev/vga_arbiter) to the
+clients. It has the usual semantics:
+
+ open : open user instance of the arbiter. By default, it's attached to
+ the default VGA device of the system.
+
+ close : close user instance. Release locks made by the user
+
+ read : return a string indicating the status of the target like:
+
+ "<card_ID>,decodes=<io_state>,owns=<io_state>,locks=<io_state> (ic,mc)"
+
+ An IO state string is of the form {io,mem,io+mem,none}, mc and
+ ic are respectively mem and io lock counts (for debugging/
+ diagnostic only). "decodes" indicate what the card currently
+ decodes, "owns" indicates what is currently enabled on it, and
+ "locks" indicates what is locked by this card. If the card is
+ unplugged, we get "invalid" then for card_ID and an -ENODEV
+ error is returned for any command until a new card is targeted.
+
+
+ write : write a command to the arbiter. List of commands:
+
+ target <card_ID> : switch target to card <card_ID> (see below)
+ lock <io_state> : acquires locks on target ("none" is an invalid io_state)
+ trylock <io_state> : non-blocking acquire locks on target (returns EBUSY if
+ unsuccessful)
+ unlock <io_state> : release locks on target
+ unlock all : release all locks on target held by this user (not
+ implemented yet)
+ decodes <io_state> : set the legacy decoding attributes for the card
+
+ poll : event if something changes on any card (not just the
+ target)
+
+ card_ID is of the form "PCI:domain:bus:dev.fn". It can be set to "default"
+ to go back to the system default card (TODO: not implemented yet). Currently,
+ only PCI is supported as a prefix, but the userland API may support other bus
+ types in the future, even if the current kernel implementation doesn't.
+
+Note about locks:
+
+The driver keeps track of which user has which locks on which card. It
+supports stacking, like the kernel one. This complexifies the implementation
+a bit, but makes the arbiter more tolerant to user space problems and able
+to properly cleanup in all cases when a process dies.
+Currently, a max of 16 cards can have locks simultaneously issued from
+user space for a given user (file descriptor instance) of the arbiter.
+
+If the device is hot-{un,}plugged, there is a hook inside the module to notify
+them being added/removed in the system and automatically added/removed in
+the arbiter.
+
+There's also a in-kernel API of the arbiter in the case of DRM, vgacon and
+others which may use the arbiter.
+
+
+I.2 libpciaccess
+----------------
+
+To use the vga arbiter char device it was implemented a serie of functions
+inside the pciaccess library. Two fields were added to struct pci_device for
+this be possible:
+
+ /* the type of resource decoded by the device */
+ int vgaarb_rsrc;
+ /* the file descriptor (/dev/vga_arbiter) */
+ int vgaarb_fd;
+
+
+and the functions:
+
+These functions below acquire VGA resources for the given card and mark those
+resources as locked. If the resources requested are "normal" (and not legacy)
+resources, the arbiter will first check whether the card is doing legacy
+decoding for that type of resource. If yes, the lock is "converted" into a
+legacy resource lock. The arbiter will first look for all VGA cards that
+might conflict and disable their IOs and/or Memory access, including VGA
+forwarding on P2P bridges if necessary, so that the requested resources can
+be used. Then, the card is marked as locking these resources and the IO and/or
+Memory access is enabled on the card (including VGA forwarding on parent
+P2P bridges if any). In the case of vga_arb_lock(), the function will block
+if some conflicting card is already locking one of the required resources (or
+any resource on a different bus segment, since P2P bridges don't differentiate
+VGA memory and IO afaik). If the card already owns the resources, the function
+succeeds. vga_arb_trylock() will return (-EBUSY) instead of blocking. Nested
+calls are supported (a per-resource counter is maintained).
+
+
+Set the target device of this client.
+ int pci_device_vgaarb_set_target (struct pci_device *dev);
+
+
+For instance, in x86 if two devices on the same bus want to lock different
+resources, both will succeed (lock). If devices are in different buses and
+trying to lock different resources, only the first who tried succeeds.
+ int pci_device_vgaarb_lock (struct pci_device *dev);
+ int pci_device_vgaarb_trylock (struct pci_device *dev);
+
+Unlock resources of device.
+ int pci_device_vgaarb_unlock (struct pci_device *dev);
+
+Indicates to the arbiter if the card decodes legacy VGA IOs, legacy VGA
+Memory, both, or none. All cards default to both, the card driver (fbdev for
+example) should tell the arbiter if it has disabled legacy decoding, so the
+card can be left out of the arbitration process (and can be safe to take
+interrupts at any time.
+ int pci_device_vgaarb_decodes (struct pci_device *dev);
+
+Connects to the arbiter device, allocates the struct
+ int pci_device_vgaarb_init (struct pci_device *dev);
+
+Close the connection
+ void pci_device_vgaarb_fini (struct pci_device *dev);
+
+
+I.3 xf86VGAArbiter (X server implementation)
+--------------------------------------------
+
+(TODO)
+
+This work will probably remove the RAC (Resource Access Control) code inside
+Xorg DDX. Complain: RAC is non-OS dependent. VGA Arbiter is.
+
+
+II. Open Issues / Bugs
+======================
+
+Current status:
+ - two X servers is parallel works (multiseat style)
+ - non-bottable card works
+ - secondary card works
+ - xinemara-like _doesn't_ works (need to trace more paths paths that touch
+ the registers and bracket with lock/unlock)
+
+
+III. Credits
+===========
+
+Benjamin Herrenschmidt (IBM?) started this work when he discussed such design
+with the Xorg community in 2005 [1, 2]. In the end of 2007, Paulo Zanoni and
+Tiago Vignatti (both of C3SL/Federal University of Paraná) proceeded his work
+enhancing the kernel code to adapt as a kernel module and also did the
+implementation of the user space side [3]. Now (2009) Tiago Vignatti is
+finally trying to push such work upstream.
+
+
+VI. References
+==============
+
+[1] http://lists.freedesktop.org/archives/xorg/2005-March/006663.html
+[2] http://lists.freedesktop.org/archives/xorg/2005-March/006745.html
+[3] http://lists.freedesktop.org/archives/xorg/2007-October/029507.html
--
1.5.6.3
> +#ifndef __ARCH_HAS_VGA_ENABLE_RESOURCES
> +static inline void vga_enable_resources(struct pci_dev *pdev,
> + unsigned int rsrc)
> +{
> + struct pci_bus *bus;
> + struct pci_dev *bridge;
> + u16 cmd;
> +
> +#ifdef DEBUG
> + printk(KERN_DEBUG "%s\n", __func__);
> +#endif
> + pci_read_config_word(pdev, PCI_COMMAND, &cmd);
> + if (rsrc & (VGA_RSRC_LEGACY_IO | VGA_RSRC_NORMAL_IO))
> + cmd |= PCI_COMMAND_IO;
> + if (rsrc & (VGA_RSRC_LEGACY_MEM | VGA_RSRC_NORMAL_MEM))
> + cmd |= PCI_COMMAND_MEMORY;
> + pci_write_config_word(pdev, PCI_COMMAND, cmd);
Locking question - what locks this lot against hotplug also touching
bridge settings ?
> +
> + if (!(rsrc & VGA_RSRC_LEGACY_MASK))
> + return;
> +
> + bus = pdev->bus;
> + while (bus) {
> + bridge = bus->self;
> + if (bridge) {
> + pci_read_config_word(bridge, PCI_BRIDGE_CONTROL,
> + &cmd);
> + if (!(cmd & PCI_BRIDGE_CTL_VGA)) {
> + cmd |= PCI_BRIDGE_CTL_VGA;
> + pci_write_config_word(bridge,
> + PCI_BRIDGE_CONTROL,
> + cmd);
> + }
> + }
> + bus = bus->parent;
> + }
> +}
> +#endif
> + /* The one who calls us should check for this, but lets be sure... */
> + if (pdev == NULL)
> + pdev = vga_default_device();
What if the BIOS provided device was hot unplugged ?
> + conflict = __vga_tryget(vgadev, rsrc);
> + spin_unlock_irqrestore(&vga_lock, flags);
> + if (conflict == NULL)
> + break;
> +
> +
> + /* We have a conflict, we wait until somebody kicks the
> + * work queue. Currently we have one work queue that we
If two drivers own half the resources and both are waiting for the rest
what handles the deadlock
> + * kick each time some resources are released, but it would
> + * be fairly easy to have a per device one so that we only
> + * need to attach to the conflicting device
> + */
> + init_waitqueue_entry(&wait, current);
> + add_wait_queue(&vga_wait_queue, &wait);
> + set_current_state(interruptible ?
> + TASK_INTERRUPTIBLE :
> + TASK_UNINTERRUPTIBLE);
> + if (signal_pending(current)) {
> + rc = -EINTR;
> + break;
> + }
> + schedule();
> + remove_wait_queue(&vga_wait_queue, &wait);
> + set_current_state(TASK_RUNNING);
Seems a very long winded way to write
wait_event_interruptible(...)
> + /* Allocate structure */
> + vgadev = kmalloc(sizeof(struct vga_device), GFP_KERNEL);
> + if (vgadev == NULL) {
> + /* What to do on allocation failure ? For now, let's
> + * just do nothing, I'm not sure there is anything saner
> + * to be done
> + */
If this is an "oh dear" moment then at least printk something
>
> + /* Set the client' lists of locks */
> + priv->target = vga_default_device(); /* Maybe this is still null! */
PCI device refcounting ?
Minor comment:
> +#ifdef DEBUG
> + printk(KERN_DEBUG "%s\n", __func__);
> +#endif
You should just use 'dev_dbg() for any debugging statments like this.
You can turn them on and off dynamically, and you get all of the proper
device information as to what is going on automatically.
Plus, there's no need for a #ifdef in the code, which is generally
frowned apon in .c files.
thanks,
greg k-h
On Wed, Jul 15, 2009 at 12:35 AM, Alan Cox<[email protected]> wrote:
>> +#ifndef __ARCH_HAS_VGA_ENABLE_RESOURCES
>> +static inline void vga_enable_resources(struct pci_dev *pdev,
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? unsigned int rsrc)
>> +{
>> + ? ? struct pci_bus *bus;
>> + ? ? struct pci_dev *bridge;
>> + ? ? u16 cmd;
>> +
>> +#ifdef DEBUG
>> + ? ? printk(KERN_DEBUG "%s\n", __func__);
>> +#endif
>> + ? ? pci_read_config_word(pdev, PCI_COMMAND, &cmd);
>> + ? ? if (rsrc & (VGA_RSRC_LEGACY_IO | VGA_RSRC_NORMAL_IO))
>> + ? ? ? ? ? ? cmd |= PCI_COMMAND_IO;
>> + ? ? if (rsrc & (VGA_RSRC_LEGACY_MEM | VGA_RSRC_NORMAL_MEM))
>> + ? ? ? ? ? ? cmd |= PCI_COMMAND_MEMORY;
>> + ? ? pci_write_config_word(pdev, PCI_COMMAND, cmd);
>
> Locking question - what locks this lot against hotplug also touching
> bridge settings ?
>
>> +
>> + ? ? if (!(rsrc & VGA_RSRC_LEGACY_MASK))
>> + ? ? ? ? ? ? return;
>> +
>> + ? ? bus = pdev->bus;
>> + ? ? while (bus) {
>> + ? ? ? ? ? ? bridge = bus->self;
>> + ? ? ? ? ? ? if (bridge) {
>> + ? ? ? ? ? ? ? ? ? ? pci_read_config_word(bridge, PCI_BRIDGE_CONTROL,
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?&cmd);
>> + ? ? ? ? ? ? ? ? ? ? if (!(cmd & PCI_BRIDGE_CTL_VGA)) {
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? cmd |= PCI_BRIDGE_CTL_VGA;
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? pci_write_config_word(bridge,
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? PCI_BRIDGE_CONTROL,
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? cmd);
>> + ? ? ? ? ? ? ? ? ? ? }
>> + ? ? ? ? ? ? }
>> + ? ? ? ? ? ? bus = bus->parent;
>> + ? ? }
>> +}
>> +#endif
>
>
>> + ? ? /* The one who calls us should check for this, but lets be sure... */
>> + ? ? if (pdev == NULL)
>> + ? ? ? ? ? ? pdev = vga_default_device();
>
> What if the BIOS provided device was hot unplugged ?
>
>> + ? ? ? ? ? ? conflict = __vga_tryget(vgadev, rsrc);
>> + ? ? ? ? ? ? spin_unlock_irqrestore(&vga_lock, flags);
>> + ? ? ? ? ? ? if (conflict == NULL)
>> + ? ? ? ? ? ? ? ? ? ? break;
>> +
>> +
>> + ? ? ? ? ? ? /* We have a conflict, we wait until somebody kicks the
>> + ? ? ? ? ? ? ?* work queue. Currently we have one work queue that we
>
> If two drivers own half the resources and both are waiting for the rest
> what handles the deadlock
I'm not sure we should care about resource locking granularity below,
I have all resources, I have no resources, I don't think we gain anything
by it, since really we want to avoid using vga arb as much as we can.
Tiago, the other issue I noticed when using this is we need to provide
a hook for drm drivers that use an irq to have the irq disabled around
the mem/io space disables otherwise they can get an irq with no mem/io
and boom all fall down.
Old X userspace RAC never worried about interrupts and assumed the
GPU wasn't using one which was true in the 80s.
Dave.
On Wed, Jul 15, 2009 at 2:15 AM, Greg KH<[email protected]> wrote:
> Minor comment:
>
>> +#ifdef DEBUG
>> + ? ? printk(KERN_DEBUG "%s\n", __func__);
>> +#endif
>
> You should just use 'dev_dbg() for any debugging statments like this.
> You can turn them on and off dynamically, and you get all of the proper
> device information as to what is going on automatically.
>
> Plus, there's no need for a #ifdef in the code, which is generally
> frowned apon in .c files.
>
We don't have a device in this case, vga arb is more of an abstraction than
an actual device.
I suppose if we were feeling crazy we could add a platform device for it.
Dave.
On Thu, Jul 16, 2009 at 01:54:42PM +1000, Dave Airlie wrote:
> On Wed, Jul 15, 2009 at 2:15 AM, Greg KH<[email protected]> wrote:
> > Minor comment:
> >
> >> +#ifdef DEBUG
> >> + ? ? printk(KERN_DEBUG "%s\n", __func__);
> >> +#endif
> >
> > You should just use 'dev_dbg() for any debugging statments like this.
> > You can turn them on and off dynamically, and you get all of the proper
> > device information as to what is going on automatically.
> >
> > Plus, there's no need for a #ifdef in the code, which is generally
> > frowned apon in .c files.
> >
>
> We don't have a device in this case, vga arb is more of an abstraction than
> an actual device.
>
> I suppose if we were feeling crazy we could add a platform device for it.
No, then use the debug_printk() functions, so that you tie into the
kernel-wide debug message system. Please don't roll your own anymore,
there is no need to.
thanks,
greg k-h
On Thu, Jul 16, 2009 at 2:02 PM, Greg KH<[email protected]> wrote:
> On Thu, Jul 16, 2009 at 01:54:42PM +1000, Dave Airlie wrote:
>> On Wed, Jul 15, 2009 at 2:15 AM, Greg KH<[email protected]> wrote:
>> > Minor comment:
>> >
>> >> +#ifdef DEBUG
>> >> + ? ? printk(KERN_DEBUG "%s\n", __func__);
>> >> +#endif
>> >
>> > You should just use 'dev_dbg() for any debugging statments like this.
>> > You can turn them on and off dynamically, and you get all of the proper
>> > device information as to what is going on automatically.
>> >
>> > Plus, there's no need for a #ifdef in the code, which is generally
>> > frowned apon in .c files.
>> >
>>
>> We don't have a device in this case, vga arb is more of an abstraction than
>> an actual device.
>>
>> I suppose if we were feeling crazy we could add a platform device for it.
>
> No, then use the debug_printk() functions, so that you tie into the
> kernel-wide debug message system. ?Please don't roll your own anymore,
> there is no need to.
Ah I assume you mean pr_devel and friends.
Dave.
>
> thanks,
>
> greg k-h
>
Hi Alan,
some hopeful answers,
On Wed, Jul 15, 2009 at 12:35 AM, Alan Cox<[email protected]> wrote:
>> +#ifndef __ARCH_HAS_VGA_ENABLE_RESOURCES
>> +static inline void vga_enable_resources(struct pci_dev *pdev,
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? unsigned int rsrc)
>> +{
>> + ? ? struct pci_bus *bus;
>> + ? ? struct pci_dev *bridge;
>> + ? ? u16 cmd;
>> +
>> +#ifdef DEBUG
>> + ? ? printk(KERN_DEBUG "%s\n", __func__);
>> +#endif
>> + ? ? pci_read_config_word(pdev, PCI_COMMAND, &cmd);
>> + ? ? if (rsrc & (VGA_RSRC_LEGACY_IO | VGA_RSRC_NORMAL_IO))
>> + ? ? ? ? ? ? cmd |= PCI_COMMAND_IO;
>> + ? ? if (rsrc & (VGA_RSRC_LEGACY_MEM | VGA_RSRC_NORMAL_MEM))
>> + ? ? ? ? ? ? cmd |= PCI_COMMAND_MEMORY;
>> + ? ? pci_write_config_word(pdev, PCI_COMMAND, cmd);
>
> Locking question - what locks this lot against hotplug also touching
> bridge settings ?
well here we just bang on device config space registers which means we
can probably
race against lots of other things that rmw the PCI_COMMAND not just hotplug.
Perhaps we need some sort per device PCI command space lock,
granted this still means we race against anyone directly hacking it
behind our backs.
As for the bridge settings, it sounds like we need to have a per
bridge pci spinlock
if hotplug is also doing this.
>
>
>> + ? ? /* The one who calls us should check for this, but lets be sure... */
>> + ? ? if (pdev == NULL)
>> + ? ? ? ? ? ? pdev = vga_default_device();
>
> What if the BIOS provided device was hot unplugged ?
we just use the pdev as a cookie, if it was hot unplugged we'll
have gotten a callback to remove it from the VGA device list
and the lookup which happens 5 lines later inside the spinlock
will fail.
>
>> + ? ? ? ? ? ? conflict = __vga_tryget(vgadev, rsrc);
>> + ? ? ? ? ? ? spin_unlock_irqrestore(&vga_lock, flags);
>> + ? ? ? ? ? ? if (conflict == NULL)
>> + ? ? ? ? ? ? ? ? ? ? break;
>> +
>> +
>> + ? ? ? ? ? ? /* We have a conflict, we wait until somebody kicks the
>> + ? ? ? ? ? ? ?* work queue. Currently we have one work queue that we
>
> If two drivers own half the resources and both are waiting for the rest
> what handles the deadlock
>
>> + ? ? ? ? ? ? ?* kick each time some resources are released, but it would
>> + ? ? ? ? ? ? ?* be fairly easy to have a per device one so that we only
>> + ? ? ? ? ? ? ?* need to attach to the conflicting device
>> + ? ? ? ? ? ? ?*/
>> + ? ? ? ? ? ? init_waitqueue_entry(&wait, current);
>> + ? ? ? ? ? ? add_wait_queue(&vga_wait_queue, &wait);
>> + ? ? ? ? ? ? set_current_state(interruptible ?
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? TASK_INTERRUPTIBLE :
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? TASK_UNINTERRUPTIBLE);
>> + ? ? ? ? ? ? if (signal_pending(current)) {
>> + ? ? ? ? ? ? ? ? ? ? rc = -EINTR;
>> + ? ? ? ? ? ? ? ? ? ? break;
>> + ? ? ? ? ? ? }
>> + ? ? ? ? ? ? schedule();
>> + ? ? ? ? ? ? remove_wait_queue(&vga_wait_queue, &wait);
>> + ? ? ? ? ? ? set_current_state(TASK_RUNNING);
>
> Seems a very long winded way to write
>
> ? ? ? ?wait_event_interruptible(...)
Is it? it looks close to wait_event_interruptible(vga_wait_queue, 1);
maybe __wait_even_interruptible(vga_wait_queue, 1)
maybe we can restructure the whole locking above to make it
more like a simple condition.
>
>
>> + ? ? /* Allocate structure */
>> + ? ? vgadev = kmalloc(sizeof(struct vga_device), GFP_KERNEL);
>> + ? ? if (vgadev == NULL) {
>> + ? ? ? ? ? ? /* What to do on allocation failure ? For now, let's
>> + ? ? ? ? ? ? ?* just do nothing, I'm not sure there is anything saner
>> + ? ? ? ? ? ? ?* to be done
>> + ? ? ? ? ? ? ?*/
>
> If this is an "oh dear" moment then at least printk something
>
Cool, fixed that one.
>>
>> + ? ? /* Set the client' lists of locks */
>> + ? ? priv->target = vga_default_device(); /* Maybe this is still null! */
>
> PCI device refcounting ?
Again its just used a cookie for a later lookup in our vgadev array,
its gone away it'll have been removed from the array,
We could use pci_dev_get/pci_dev_put I suppose but its only used as
a cookie so far.
Dave.
> We don't have a device in this case, vga arb is more of an abstraction than
> an actual device.
>
> I suppose if we were feeling crazy we could add a platform device for it.
Probably a good idea and it gives you something more meaningful to hang
the list of devices being arbitrated.
Alan
> >> + ? ? pci_read_config_word(pdev, PCI_COMMAND, &cmd);
> >> + ? ? if (rsrc & (VGA_RSRC_LEGACY_IO | VGA_RSRC_NORMAL_IO))
> >> + ? ? ? ? ? ? cmd |= PCI_COMMAND_IO;
> >> + ? ? if (rsrc & (VGA_RSRC_LEGACY_MEM | VGA_RSRC_NORMAL_MEM))
> >> + ? ? ? ? ? ? cmd |= PCI_COMMAND_MEMORY;
> >> + ? ? pci_write_config_word(pdev, PCI_COMMAND, cmd);
> >
> > Locking question - what locks this lot against hotplug also touching
> > bridge settings ?
>
> well here we just bang on device config space registers which means we
> can probably
> race against lots of other things that rmw the PCI_COMMAND not just hotplug.
>
> Perhaps we need some sort per device PCI command space lock,
> granted this still means we race against anyone directly hacking it
> behind our backs.
I suspect the right thing to do is to move that function into the
drivers/pci code and lock it properly there. That would keep all the
locking detail internal and private (and someone else's problem ;))
> >> + ? ? ? ? ? ? pdev = vga_default_device();
> >
> > What if the BIOS provided device was hot unplugged ?
>
> we just use the pdev as a cookie, if it was hot unplugged we'll
> have gotten a callback to remove it from the VGA device list
> and the lookup which happens 5 lines later inside the spinlock
> will fail.
What if I inserted a new device - the allocator might give me a new
device with the same pointer if its reusing the same slab entry for that
size. Unlikely but given a zillion boxes this starts to occur 8(
> >> + ? ? ? ? ? ? /* We have a conflict, we wait until somebody kicks the
> >> + ? ? ? ? ? ? ?* work queue. Currently we have one work queue that we
> >
> > If two drivers own half the resources and both are waiting for the rest
> > what handles the deadlock
> >
Not commented on - but a serious question would be "do we actually care
enough or are there really devices with just I/O and just vga memory
access used ?"
> > PCI device refcounting ?
>
> Again its just used a cookie for a later lookup in our vgadev array,
> its gone away it'll have been removed from the array,
>
> We could use pci_dev_get/pci_dev_put I suppose but its only used as
> a cookie so far.
Your cookie validity is suspect I fear. Also holding the device reference
means you stop that exact set of resources being reissued too early and
you (or clients) scribbling on them through unfortunate timing. So I
think you actually do need to grab references properly, Doesn't make the
code much more complex that I can see.
Alan
On Thu, Jul 16, 2009 at 6:48 PM, Alan Cox<[email protected]> wrote:
>> >> + ? ? pci_read_config_word(pdev, PCI_COMMAND, &cmd);
>> >> + ? ? if (rsrc & (VGA_RSRC_LEGACY_IO | VGA_RSRC_NORMAL_IO))
>> >> + ? ? ? ? ? ? cmd |= PCI_COMMAND_IO;
>> >> + ? ? if (rsrc & (VGA_RSRC_LEGACY_MEM | VGA_RSRC_NORMAL_MEM))
>> >> + ? ? ? ? ? ? cmd |= PCI_COMMAND_MEMORY;
>> >> + ? ? pci_write_config_word(pdev, PCI_COMMAND, cmd);
>> >
>> > Locking question - what locks this lot against hotplug also touching
>> > bridge settings ?
>>
>> well here we just bang on device config space registers which means we
>> can probably
>> race against lots of other things that rmw the PCI_COMMAND not just hotplug.
>>
>> Perhaps we need some sort per device PCI command space lock,
>> granted this still means we race against anyone directly hacking it
>> behind our backs.
>
> I suspect the right thing to do is to move that function into the
> drivers/pci code and lock it properly there. That would keep all the
> locking detail internal and private (and someone else's problem ;))
I'll have a look, Jesse any ideas? the hotplug bridge bashing looks unfun.
I noticed a fair few drivers seem to bash these things, and really
pci_enable_device
could possible race with hotplug.
>
>> >> + ? ? ? ? ? ? pdev = vga_default_device();
>> >
>> > What if the BIOS provided device was hot unplugged ?
>>
>> we just use the pdev as a cookie, if it was hot unplugged we'll
>> have gotten a callback to remove it from the VGA device list
>> and the lookup which happens 5 lines later inside the spinlock
>> will fail.
>
> What if I inserted a new device - the allocator might give me a new
> device with the same pointer if its reusing the same slab entry for that
> size. Unlikely but given a zillion boxes this starts to occur 8(
A zillion boxes with hotplug VGA devices, I wish :-), but I'll fix it up
to do the pci_dev_get/puts.
>
>> >> + ? ? ? ? ? ? /* We have a conflict, we wait until somebody kicks the
>> >> + ? ? ? ? ? ? ?* work queue. Currently we have one work queue that we
>> >
>> > If two drivers own half the resources and both are waiting for the rest
>> > what handles the deadlock
>> >
>
> Not commented on - but a serious question would be "do we actually care
> enough or are there really devices with just I/O and just vga memory
> access used ?"
>
Oops yes I want to avoid doing anything except VGA resources on/off
I don't think the granularity matters, will talk to BenH tomorrow see what his
original idea was.
>> > PCI device refcounting ?
>>
>> Again its just used a cookie for a later lookup in our vgadev array,
>> its gone away it'll have been removed from the array,
>>
>> We could use pci_dev_get/pci_dev_put I suppose but its only used as
>> a cookie so far.
>
> Your cookie validity is suspect I fear. Also holding the device reference
> means you stop that exact set of resources being reissued too early and
> you (or clients) scribbling on them through unfortunate timing. So I
> think you actually do need to grab references properly, Doesn't make the
> code much more complex that I can see.
I'll fix it seems like a good plan just in case.
Dave.
On Thu, 16 Jul 2009 20:38:44 +1000
Dave Airlie <[email protected]> wrote:
> On Thu, Jul 16, 2009 at 6:48 PM, Alan Cox<[email protected]>
> wrote:
> >> >> + pci_read_config_word(pdev, PCI_COMMAND, &cmd);
> >> >> + if (rsrc & (VGA_RSRC_LEGACY_IO | VGA_RSRC_NORMAL_IO))
> >> >> + cmd |= PCI_COMMAND_IO;
> >> >> + if (rsrc & (VGA_RSRC_LEGACY_MEM | VGA_RSRC_NORMAL_MEM))
> >> >> + cmd |= PCI_COMMAND_MEMORY;
> >> >> + pci_write_config_word(pdev, PCI_COMMAND, cmd);
> >> >
> >> > Locking question - what locks this lot against hotplug also
> >> > touching bridge settings ?
> >>
> >> well here we just bang on device config space registers which
> >> means we can probably
> >> race against lots of other things that rmw the PCI_COMMAND not
> >> just hotplug.
> >>
> >> Perhaps we need some sort per device PCI command space lock,
> >> granted this still means we race against anyone directly hacking it
> >> behind our backs.
> >
> > I suspect the right thing to do is to move that function into the
> > drivers/pci code and lock it properly there. That would keep all the
> > locking detail internal and private (and someone else's problem ;))
>
> I'll have a look, Jesse any ideas? the hotplug bridge bashing looks
> unfun.
>
> I noticed a fair few drivers seem to bash these things, and really
> pci_enable_device
> could possible race with hotplug.
Oh hm, yeah moving that sort of thing into the PCI core probably makes
sense. Hotplug interactions would be good to get right here, since it
might be possible to implement switchable graphics machines that way...
--
Jesse Barnes, Intel Open Source Technology Center
On Thu, 2009-07-16 at 09:48 +0100, Alan Cox wrote:
>
> > >> + pdev = vga_default_device();
> > >
> > > What if the BIOS provided device was hot unplugged ?
> >
> > we just use the pdev as a cookie, if it was hot unplugged we'll
> > have gotten a callback to remove it from the VGA device list
> > and the lookup which happens 5 lines later inside the spinlock
> > will fail.
>
> What if I inserted a new device - the allocator might give me a new
> device with the same pointer if its reusing the same slab entry for
> that
> size. Unlikely but given a zillion boxes this starts to occur 8(
The original proof-of concept draft code I wrote (and this is -very-
close to it :-) didn't quite handle hotplug of the default device. That
does need to be sorted.
But then, I have a hard time finding any useful locking to use vs. PCI
hotplug, so it's a non trivial matter.
> Not commented on - but a serious question would be "do we actually
> care enough or are there really devices with just I/O and just vga
> memory access used ?"
I honestly cannot remember why I split the 4 resource types that way
back then. I have the nagging feeling that I had a good reason to do so
but it totally eludes me today :-)
The one main thing I wanted was to keep legacy vs. standard resources so
that the all the portions of a driver (DDX, DRM, etc...) that use
standard resources can continue to lock/unlock just these, without
having to know whether one of the elements disable legacy decoding (or
is trying to request it again).
But maybe somebody with a fresh new look on all this will rightfully say
"fuck it, that's too complicated" and turn the whole thing into a single
token to pass around.
> Your cookie validity is suspect I fear.
Yes, it is sadly.
> Also holding the device reference means you stop that exact set of
> resources being reissued too early and
> you (or clients) scribbling on them through unfortunate timing. So I
> think you actually do need to grab references properly, Doesn't make
> the
> code much more complex that I can see.
>
Well, we are in the device addition/removal path, so the list of vga
devices doesn't need to hold a reference here. However, we probably need
to make sure we take one when we peek something from that list.
Cheers,
Ben.
On Thu, 2009-07-16 at 20:38 +1000, Dave Airlie wrote:
> Oops yes I want to avoid doing anything except VGA resources on/off
> I don't think the granularity matters, will talk to BenH tomorrow see
> what his original idea was.
Well, it's more than VGA on/off... if your card decodes VGA, then it's
all memory (or IO) resources on/off...
I -think- it may be that I split memory and IO with the idea that DRM
mostly don't need IO for irqs etc... and so we could technically steal
only IO for some things, like VGA based mode switching on the other
card, without disturbing the guy next door.
Cheers,
Ben.
On Thu, 2009-07-16 at 13:54 +1000, Dave Airlie wrote:
> On Wed, Jul 15, 2009 at 2:15 AM, Greg KH<[email protected]> wrote:
> > Minor comment:
> >
> >> +#ifdef DEBUG
> >> + printk(KERN_DEBUG "%s\n", __func__);
> >> +#endif
> >
> > You should just use 'dev_dbg() for any debugging statments like this.
> > You can turn them on and off dynamically, and you get all of the proper
> > device information as to what is going on automatically.
> >
> > Plus, there's no need for a #ifdef in the code, which is generally
> > frowned apon in .c files.
> >
>
> We don't have a device in this case, vga arb is more of an abstraction than
> an actual device.
>
> I suppose if we were feeling crazy we could add a platform device for it.
Then use pr_debug :-)
Ben.
On Thu, 2009-07-16 at 09:41 +0100, Alan Cox wrote:
> > We don't have a device in this case, vga arb is more of an abstraction than
> > an actual device.
> >
> > I suppose if we were feeling crazy we could add a platform device for it.
>
> Probably a good idea and it gives you something more meaningful to hang
> the list of devices being arbitrated.
That's a good point, it would be useful for diagnosis purposes at least
to have a sysfs dir with symlinks to the devices being arbitrated.
Cheers,
Ben.
On Tue, 2009-07-14 at 15:35 +0100, Alan Cox wrote:
> > +#ifndef __ARCH_HAS_VGA_ENABLE_RESOURCES
> > +static inline void vga_enable_resources(struct pci_dev *pdev,
> > + unsigned int rsrc)
> > +{
> > + struct pci_bus *bus;
> > + struct pci_dev *bridge;
> > + u16 cmd;
> > +
> > +#ifdef DEBUG
> > + printk(KERN_DEBUG "%s\n", __func__);
> > +#endif
> > + pci_read_config_word(pdev, PCI_COMMAND, &cmd);
> > + if (rsrc & (VGA_RSRC_LEGACY_IO | VGA_RSRC_NORMAL_IO))
> > + cmd |= PCI_COMMAND_IO;
> > + if (rsrc & (VGA_RSRC_LEGACY_MEM | VGA_RSRC_NORMAL_MEM))
> > + cmd |= PCI_COMMAND_MEMORY;
> > + pci_write_config_word(pdev, PCI_COMMAND, cmd);
>
> Locking question - what locks this lot against hotplug also touching
> bridge settings ?
>
> > +
> > + if (!(rsrc & VGA_RSRC_LEGACY_MASK))
> > + return;
> > +
> > + bus = pdev->bus;
> > + while (bus) {
> > + bridge = bus->self;
> > + if (bridge) {
> > + pci_read_config_word(bridge, PCI_BRIDGE_CONTROL,
> > + &cmd);
> > + if (!(cmd & PCI_BRIDGE_CTL_VGA)) {
> > + cmd |= PCI_BRIDGE_CTL_VGA;
> > + pci_write_config_word(bridge,
> > + PCI_BRIDGE_CONTROL,
> > + cmd);
> > + }
> > + }
> > + bus = bus->parent;
> > + }
> > +}
> > +#endif
>
>
> > + /* The one who calls us should check for this, but lets be sure... */
> > + if (pdev == NULL)
> > + pdev = vga_default_device();
>
> What if the BIOS provided device was hot unplugged ?
>
> > + conflict = __vga_tryget(vgadev, rsrc);
> > + spin_unlock_irqrestore(&vga_lock, flags);
> > + if (conflict == NULL)
> > + break;
> > +
> > +
> > + /* We have a conflict, we wait until somebody kicks the
> > + * work queue. Currently we have one work queue that we
>
> If two drivers own half the resources and both are waiting for the rest
> what handles the deadlock
Okay I've been thinking a bit about this one, and I think the best
answer is if you hold a lock you can't upgrade, you must drop all the
locks and relock with the new list of things you want. This means if you
have an IO lock you can't go getting the mem lock without dropping the
io lock first and getting both of them in one hit.
Anyone see any issues with that?
I'm also going to add normal locks so I'll change the userspace API to
pass 4 values or'ed together, legacyio, legacymem, normalio, normalmem
or none, then the locking should act like this:
device doesn't decode VGA: normal io/mem locks always pass, legact
io/mem locks always fail. don't touch main lock
device decodes VGA: (acts like read/write lock)
asks for normal mem/io locks: if no locks or only other normal locks are
held by devices that decode VGA, then take main lock as a reader and
enable decodes. If someone holds lock as a write block waiting for it.
asks for VGA mem/io locks, if no locks are held, take main lock as a
writer. If main lock has readers already, block waiting for a write lock
on it.
Does this sound sane?
Dave.
On Fri, 2009-07-17 at 15:00 +1000, Dave Airlie wrote:
>
> device doesn't decode VGA: normal io/mem locks always pass, legact
> io/mem locks always fail. don't touch main lock
>
> device decodes VGA: (acts like read/write lock)
Well, my initial idea was that even if you are marked as not decoding
legacy, you could still lock it.
The idea here is that if I'm going to just run a quick BIOS routine, ie
temporarily turn on legacy decoding and back off, I can just lock it, do
that, and unlock it.
No need to globally switch yourself to be a legacy decoder globally for
the arbiter (ie, you lock it so you can't lose it, and you'll have
re-disabled legacy decoding by the time you unlock, so there's no need
in getting notified of loss etc....
But you don't have to support that behaviour :-) I just thought it could
come handy.
> asks for normal mem/io locks: if no locks or only other normal locks
> are
> held by devices that decode VGA, then take main lock as a reader and
> enable decodes. If someone holds lock as a write block waiting for it.
>
> asks for VGA mem/io locks, if no locks are held, take main lock as a
> writer. If main lock has readers already, block waiting for a write
> lock
> on it.
>
> Does this sound sane?
Cursory glance it does.
Cheers,
Ben.
> Dave.
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci"
> in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
On Tue 2009-07-14 15:57:29, Tiago Vignatti wrote:
> Signed-off-by: Tiago Vignatti <[email protected]>
> ---
Some patch description might be useful.
> @@ -0,0 +1,1120 @@
> +/*
> + * vgaarb.c
> + *
> + * (C) Copyright 2005 Benjamin Herrenschmidt <[email protected]>
> + * (C) Copyright 2007 Paulo R. Zanoni <[email protected]>
> + * (C) Copyright 2007, 2009 Tiago Vignatti <[email protected]>
> + *
If you have copyright, GPL would be cool.
> +struct vga_device {
> + struct list_head list;
> + struct pci_dev *pdev;
> + unsigned int decodes; /* what does it decodes */
> + unsigned int owns; /* what does it owns */
> + unsigned int locks; /* what does it locks */
spelling in comments? And ...how can int tell me what it owns?
> +
> +static LIST_HEAD(vga_list);
> +static DEFINE_SPINLOCK(vga_lock);
> +static DECLARE_WAIT_QUEUE_HEAD(vga_wait_queue);
> +
> +#ifndef __ARCH_HAS_VGA_DEFAULT_DEVICE
> +static struct pci_dev *vga_default;
> +#endif
> +
> +static void vga_arb_device_card_gone(struct pci_dev *pdev);
> +
> +/* Find somebody in our list */
> +static struct vga_device *vgadev_find(struct pci_dev *pdev)
> +{
> + struct vga_device *vgadev;
> +
> + list_for_each_entry(vgadev, &vga_list, list)
> + if (pdev == vgadev->pdev)
> + return vgadev;
> + return NULL;
> +}
Indent using tabs. checkpatch?
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
> +I. Details and Theory of Operation
> +==================================
> +
> +I.1 vgaarb
> +----------
> +
> +The vgaarb is a module of the Linux Kernel. When it is initially loaded, it
> +scans all PCI devices and add the VGA ones inside the arbitration. The arbiter
and adds?
> +If the device is hot-{un,}plugged, there is a hook inside the module to notify
> +them being added/removed in the system and automatically added/removed in
> +the arbiter.
I don't see who's 'them' here.
> +I.2 libpciaccess
> +----------------
> +
> +To use the vga arbiter char device it was implemented a serie of functions
series?
> +inside the pciaccess library. Two fields were added to struct pci_device for
> +this be possible:
> +
> + /* the type of resource decoded by the device */
> + int vgaarb_rsrc;
> + /* the file descriptor (/dev/vga_arbiter) */
> + int vgaarb_fd;
> +
> +
> +and the functions:
> +
What functions?
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
On Sat, Jul 18, 2009 at 01:48:17PM +0200, ext Pavel Machek wrote:
>
> > +I. Details and Theory of Operation
> > +==================================
> > +
> > +I.1 vgaarb
> > +----------
> > +
> > +The vgaarb is a module of the Linux Kernel. When it is initially loaded, it
> > +scans all PCI devices and add the VGA ones inside the arbitration. The arbiter
>
> and adds?
>
>
> > +If the device is hot-{un,}plugged, there is a hook inside the module to notify
> > +them being added/removed in the system and automatically added/removed in
> > +the arbiter.
>
> I don't see who's 'them' here.
>
> > +I.2 libpciaccess
> > +----------------
> > +
> > +To use the vga arbiter char device it was implemented a serie of functions
>
> series?
>
> > +inside the pciaccess library. Two fields were added to struct pci_device for
> > +this be possible:
> > +
> > + /* the type of resource decoded by the device */
> > + int vgaarb_rsrc;
> > + /* the file descriptor (/dev/vga_arbiter) */
> > + int vgaarb_fd;
> > +
> > +
> > +and the functions:
> > +
>
> What functions?
Thanks for the review. I already updated the documentation here with your
comments.
Cheers,
Tiago