Hi,
this is the first post of the initial driver for AMD IOMMU hardware. The driver
is tested on hardware with various loads (disk and network) and showed no
problems.
It currently supports DMA remapping using the dma_ops API in the x86
architecture code. It also supports isolation of device DMA address spaces as
far as the hardware allows that (means each device can get its own DMA address
space and can't access the DMA memory of other devices).
Please give this code a good review and send me your comments about it so that
I can fix all possible bugs and objections and move this driver forward to
merging quality.
Kind regards,
Joerg Roedel
git diff --stat:
Documentation/kernel-parameters.txt | 12 +
MAINTAINERS | 6 +
arch/x86/Kconfig | 7 +
arch/x86/kernel/Makefile | 1 +
arch/x86/kernel/amd_iommu.c | 955 +++++++++++++++++++++++++++++++++++
arch/x86/kernel/amd_iommu_init.c | 842 ++++++++++++++++++++++++++++++
arch/x86/kernel/pci-dma.c | 5 +
include/asm-x86/amd_iommu.h | 32 ++
include/asm-x86/amd_iommu_types.h | 242 +++++++++
9 files changed, 2102 insertions(+), 0 deletions(-)
This patch adds the detection of AMD IOMMU hardware provided on information
from ACPI provided by the BIOS.
Signed-off-by: Joerg Roedel <[email protected]>
---
arch/x86/kernel/amd_iommu_init.c | 78 ++++++++++++++++++++++++++++++++++++++
1 files changed, 78 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 8ec48f1..3f4f7b8 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -478,3 +478,81 @@ static int __init init_iommu_devices(struct amd_iommu *iommu)
return 0;
}
+static void __init free_iommu_one(struct amd_iommu *iommu)
+{
+ free_command_buffer(iommu);
+ iommu_unmap_mmio_space(iommu);
+}
+
+static void __init free_iommu_all(void)
+{
+ struct amd_iommu *iommu, *next;
+
+ list_for_each_entry_safe(iommu, next, &amd_iommu_list, list) {
+ list_del(&iommu->list);
+ free_iommu_one(iommu);
+ kfree(iommu);
+ }
+}
+
+static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
+{
+ spin_lock_init(&iommu->lock);
+ list_add_tail(&iommu->list, &amd_iommu_list);
+
+ /*
+ * Copy data from ACPI table entry to the iommu struct
+ */
+ iommu->devid = h->devid;
+ iommu->cap_ptr = h->cap_ptr;
+ iommu->mmio_phys = h->mmio_phys;
+ iommu->mmio_base = iommu_map_mmio_space(h->mmio_phys);
+ if (!iommu->mmio_base)
+ return -ENOMEM;
+
+ iommu_set_device_table(iommu);
+ iommu->cmd_buf = alloc_command_buffer(iommu);
+ if (!iommu->cmd_buf)
+ return -ENOMEM;
+
+ init_iommu_from_pci(iommu);
+ init_iommu_from_acpi(iommu, h);
+ init_iommu_devices(iommu);
+
+ return 0;
+}
+
+static int __init init_iommu_all(struct acpi_table_header *table)
+{
+ u8 *p = (u8 *)table, *end = (u8 *)table;
+ struct ivhd_header *h;
+ struct amd_iommu *iommu;
+ int ret;
+
+ INIT_LIST_HEAD(&amd_iommu_list);
+
+ end += table->length;
+ p += IVRS_HEADER_LENGTH;
+
+ while (p < end) {
+ h = (struct ivhd_header *)p;
+ switch (*p) {
+ case ACPI_IVHD_TYPE:
+ iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL);
+ if (iommu == NULL)
+ return -ENOMEM;
+ ret = init_iommu_one(iommu, h);
+ if (ret)
+ return ret;
+ break;
+ default:
+ break;
+ }
+ p += h->length;
+
+ }
+ WARN_ON(p != end);
+
+ return 0;
+}
+
--
1.5.3.7
This patch adds the data structures which will contain the information read
from the ACPI table.
Signed-off-by: Joerg Roedel <[email protected]>
---
arch/x86/kernel/amd_iommu_init.c | 17 +++++++++++++++++
1 files changed, 17 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 6fce5ab..0ad8cf9 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -99,3 +99,20 @@ struct ivmd_header {
u64 range_length;
} __attribute__((packed));
+static int __initdata amd_iommu_disable;
+
+u16 amd_iommu_last_bdf;
+struct list_head amd_iommu_unity_map;
+unsigned amd_iommu_aperture_order = 26;
+int amd_iommu_isolate;
+
+struct list_head amd_iommu_list;
+struct dev_table_entry *amd_iommu_dev_table;
+u16 *amd_iommu_alias_table;
+struct amd_iommu **amd_iommu_rlookup_table;
+struct protection_domain **amd_iommu_pd_table;
+unsigned long *amd_iommu_pd_alloc_bitmap;
+
+static u32 dev_table_size;
+static u32 alias_table_size;
+static u32 rlookup_table_size;
--
1.5.3.7
This patch adds generic stuff used by the mapping and unmapping code for the
AMD IOMMU.
Signed-off-by: Joerg Roedel <[email protected]>
---
arch/x86/kernel/amd_iommu.c | 40 ++++++++++++++++++++++++++++++++++++++++
1 files changed, 40 insertions(+), 0 deletions(-)
create mode 100644 arch/x86/kernel/amd_iommu.c
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
new file mode 100644
index 0000000..90392c7
--- /dev/null
+++ b/arch/x86/kernel/amd_iommu.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
+ * Author: Joerg Roedel <[email protected]>
+ * Leo Duran <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/pci.h>
+#include <linux/gfp.h>
+#include <linux/bitops.h>
+#include <linux/scatterlist.h>
+#include <linux/iommu-helper.h>
+#include <asm/proto.h>
+#include <asm/gart.h>
+#include <asm/amd_iommu_types.h>
+
+#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
+
+#define to_pages(addr, size) \
+ (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
+
+static DEFINE_RWLOCK(amd_iommu_devtable_lock);
+
+struct command {
+ u32 data[4];
+};
+
+
--
1.5.3.7
This patch adds generic handling function as well as all functions to send
specific commands to the IOMMU hardware as required by this driver.
Signed-off-by: Joerg Roedel <[email protected]>
---
arch/x86/kernel/amd_iommu.c | 106 +++++++++++++++++++++++++++++++++++++++++++
1 files changed, 106 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 90392c7..a24ee4a 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -37,4 +37,110 @@ struct command {
u32 data[4];
};
+static int __iommu_queue_command(struct amd_iommu *iommu, struct command *cmd)
+{
+ u32 tail, head;
+ u8 *target;
+
+ tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
+ target = (iommu->cmd_buf + tail);
+ memcpy_toio(target, cmd, sizeof(*cmd));
+ tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
+ head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
+ if (tail == head)
+ return -ENOMEM;
+ writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
+
+ return 0;
+}
+
+static int iommu_queue_command(struct amd_iommu *iommu, struct command *cmd)
+{
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&iommu->lock, flags);
+ ret = __iommu_queue_command(iommu, cmd);
+ spin_unlock_irqrestore(&iommu->lock, flags);
+
+ return ret;
+}
+
+static int iommu_completion_wait(struct amd_iommu *iommu)
+{
+ int ret;
+ struct command cmd;
+ volatile u64 ready = 0;
+ unsigned long ready_phys = virt_to_phys(&ready);
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.data[0] = LOW_U32(ready_phys) | CMD_COMPL_WAIT_STORE_MASK;
+ cmd.data[1] = HIGH_U32(ready_phys);
+ cmd.data[2] = 1; /* value written to 'ready' */
+ CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
+
+ iommu->need_sync = 0;
+
+ ret = iommu_queue_command(iommu, &cmd);
+
+ if (ret)
+ return ret;
+
+ while (!ready)
+ cpu_relax();
+
+ return 0;
+}
+
+static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
+{
+ struct command cmd;
+
+ BUG_ON(iommu == NULL);
+
+ memset(&cmd, 0, sizeof(cmd));
+ CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY);
+ cmd.data[0] = devid;
+
+ iommu->need_sync = 1;
+
+ return iommu_queue_command(iommu, &cmd);
+}
+
+static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
+ u64 address, u16 domid, int pde, int s)
+{
+ struct command cmd;
+
+ memset(&cmd, 0, sizeof(cmd));
+ address &= PAGE_MASK;
+ CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES);
+ cmd.data[1] |= domid;
+ cmd.data[2] = LOW_U32(address);
+ cmd.data[3] = HIGH_U32(address);
+ if (s)
+ cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
+ if (pde)
+ cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
+
+ iommu->need_sync = 1;
+
+ return iommu_queue_command(iommu, &cmd);
+}
+
+static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid,
+ u64 address, size_t size)
+{
+ int i;
+ unsigned pages = to_pages(address, size);
+
+ address &= PAGE_MASK;
+
+ for (i = 0; i < pages; ++i) {
+ iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 0);
+ address += PAGE_SIZE;
+ }
+
+ return 0;
+}
--
1.5.3.7
This patch adds the required data structures and constants required to parse
the ACPI table for the AMD IOMMU.
Signed-off-by: Joerg Roedel <[email protected]>
---
arch/x86/kernel/amd_iommu_init.c | 101 ++++++++++++++++++++++++++++++++++++++
1 files changed, 101 insertions(+), 0 deletions(-)
create mode 100644 arch/x86/kernel/amd_iommu_init.c
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
new file mode 100644
index 0000000..6fce5ab
--- /dev/null
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
+ * Author: Joerg Roedel <[email protected]>
+ * Leo Duran <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/pci.h>
+#include <linux/acpi.h>
+#include <linux/gfp.h>
+#include <linux/list.h>
+#include <asm/pci-direct.h>
+#include <asm/amd_iommu_types.h>
+#include <asm/gart.h>
+
+/*
+ * definitions for the ACPI scanning code
+ */
+#define UPDATE_LAST_BDF(x) do {\
+ if ((x) > amd_iommu_last_bdf) \
+ amd_iommu_last_bdf = (x); \
+ } while (0);
+
+#define DEVID(bus, devfn) (((bus) << 8) | (devfn))
+#define PCI_BUS(x) (((x) >> 8) & 0xff)
+#define IVRS_HEADER_LENGTH 48
+#define TBL_SIZE(x) (1 << (PAGE_SHIFT + get_order(amd_iommu_last_bdf * (x))))
+
+#define ACPI_IVHD_TYPE 0x10
+#define ACPI_IVMD_TYPE_ALL 0x20
+#define ACPI_IVMD_TYPE 0x21
+#define ACPI_IVMD_TYPE_RANGE 0x22
+
+#define IVHD_DEV_ALL 0x01
+#define IVHD_DEV_SELECT 0x02
+#define IVHD_DEV_SELECT_RANGE_START 0x03
+#define IVHD_DEV_RANGE_END 0x04
+#define IVHD_DEV_ALIAS 0x42
+#define IVHD_DEV_ALIAS_RANGE 0x43
+#define IVHD_DEV_EXT_SELECT 0x46
+#define IVHD_DEV_EXT_SELECT_RANGE 0x47
+
+#define IVHD_FLAG_HT_TUN_EN 0x00
+#define IVHD_FLAG_PASSPW_EN 0x01
+#define IVHD_FLAG_RESPASSPW_EN 0x02
+#define IVHD_FLAG_ISOC_EN 0x03
+
+#define IVMD_FLAG_EXCL_RANGE 0x08
+#define IVMD_FLAG_UNITY_MAP 0x01
+
+#define ACPI_DEVFLAG_INITPASS 0x01
+#define ACPI_DEVFLAG_EXTINT 0x02
+#define ACPI_DEVFLAG_NMI 0x04
+#define ACPI_DEVFLAG_SYSMGT1 0x10
+#define ACPI_DEVFLAG_SYSMGT2 0x20
+#define ACPI_DEVFLAG_LINT0 0x40
+#define ACPI_DEVFLAG_LINT1 0x80
+#define ACPI_DEVFLAG_ATSDIS 0x10000000
+
+struct ivhd_header {
+ u8 type;
+ u8 flags;
+ u16 length;
+ u16 devid;
+ u16 cap_ptr;
+ u64 mmio_phys;
+ u16 pci_seg;
+ u16 info;
+ u32 reserved;
+} __attribute__((packed));
+
+struct ivhd_entry {
+ u8 type;
+ u16 devid;
+ u8 flags;
+ u32 ext;
+} __attribute__((packed));
+
+struct ivmd_header {
+ u8 type;
+ u8 flags;
+ u16 length;
+ u16 devid;
+ u16 aux;
+ u64 resv;
+ u64 range_start;
+ u64 range_length;
+} __attribute__((packed));
+
--
1.5.3.7
This patch adds the new amd_iommu.c file to the architecture kernel Makefile
for x86.
Signed-off-by: Joerg Roedel <[email protected]>
---
arch/x86/kernel/Makefile | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 1e4e00a..b34e0bb 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -100,7 +100,7 @@ ifeq ($(CONFIG_X86_64),y)
obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o
obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o
- obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o
+ obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o amd_iommu.o
obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o
obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o
--
1.5.3.7
This patch adds the Kconfig entry for the AMD IOMMU driver.
Signed-off-by: Joerg Roedel <[email protected]>
---
arch/x86/Kconfig | 7 +++++++
1 files changed, 7 insertions(+), 0 deletions(-)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e0edaaa..5a82f18 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -549,6 +549,13 @@ config CALGARY_IOMMU_ENABLED_BY_DEFAULT
Calgary anyway, pass 'iommu=calgary' on the kernel command line.
If unsure, say Y.
+config AMD_IOMMU
+ bool "AMD IOMMU support"
+ select SWIOTL
+ depends on X86_64 && PCI
+ help
+ Select this to get support for AMD IOMMU hardware in your system.
+
# need this always selected by IOMMU for the VIA workaround
config SWIOTLB
bool
--
1.5.3.7
This patch adds two parameters to the kernel command line to control behavior
of the AMD IOMMU.
Signed-off-by: Joerg Roedel <[email protected]>
---
arch/x86/kernel/amd_iommu_init.c | 34 ++++++++++++++++++++++++++++++++++
1 files changed, 34 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index d7a75bf..ec6f13b 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -789,3 +789,37 @@ void __init amd_iommu_detect(void)
}
}
+static int __init parse_amd_iommu_options(char *str)
+{
+ for (; *str; ++str) {
+ if (strcmp(str, "off") == 0)
+ amd_iommu_disable = 1;
+ if (strcmp(str, "isolate") == 0)
+ amd_iommu_isolate = 1;
+ }
+
+ return 1;
+}
+
+static int __init parse_amd_iommu_size_options(char *str)
+{
+ for (; *str; ++str) {
+ if (strcmp(str, "32M") == 0)
+ amd_iommu_aperture_order = 25;
+ if (strcmp(str, "64M") == 0)
+ amd_iommu_aperture_order = 26;
+ if (strcmp(str, "128M") == 0)
+ amd_iommu_aperture_order = 27;
+ if (strcmp(str, "256M") == 0)
+ amd_iommu_aperture_order = 28;
+ if (strcmp(str, "512M") == 0)
+ amd_iommu_aperture_order = 29;
+ if (strcmp(str, "1G") == 0)
+ amd_iommu_aperture_order = 30;
+ }
+
+ return 1;
+}
+
+__setup("amd_iommu=", parse_amd_iommu_options);
+__setup("amd_iommu_size=", parse_amd_iommu_size_options);
--
1.5.3.7
This patch puts the AMD IOMMU ACPI table parsing and hardware initialization
functions together to the main intialization routine.
Signed-off-by: Joerg Roedel <[email protected]>
---
arch/x86/kernel/amd_iommu_init.c | 126 ++++++++++++++++++++++++++++++++++++++
1 files changed, 126 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 555fcc9..c792ddc 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -643,3 +643,129 @@ static int __init init_memory_definitions(struct acpi_table_header *table)
return 0;
}
+int __init amd_iommu_init(void)
+{
+ int i, ret = 0;
+
+
+ if (amd_iommu_disable) {
+ printk(KERN_INFO "AMD IOMMU disabled by kernel command line\n");
+ return 0;
+ }
+
+ /*
+ * First parse ACPI tables to find the largest Bus/Dev/Func
+ * we need to handle. Upon this information the shared data
+ * structures for the IOMMUs in the system will be allocated
+ */
+ if (acpi_table_parse("IVRS", find_last_devid_acpi) != 0)
+ return -ENODEV;
+
+ dev_table_size = TBL_SIZE(DEV_TABLE_ENTRY_SIZE);
+ alias_table_size = TBL_SIZE(ALIAS_TABLE_ENTRY_SIZE);
+ rlookup_table_size = TBL_SIZE(RLOOKUP_TABLE_ENTRY_SIZE);
+
+ ret = -ENOMEM;
+
+ /* Device table - directly used by all IOMMUs */
+ amd_iommu_dev_table = (void *)__get_free_pages(GFP_KERNEL,
+ get_order(dev_table_size));
+ if (amd_iommu_dev_table == NULL)
+ goto out;
+
+ /*
+ * Alias table - map PCI Bus/Dev/Func to Bus/Dev/Func the
+ * IOMMU see for that device
+ */
+ amd_iommu_alias_table = (void *)__get_free_pages(GFP_KERNEL,
+ get_order(alias_table_size));
+ if (amd_iommu_alias_table == NULL)
+ goto free;
+
+ /* IOMMU rlookup table - find the IOMMU for a specific device */
+ amd_iommu_rlookup_table = (void *)__get_free_pages(GFP_KERNEL,
+ get_order(rlookup_table_size));
+ if (amd_iommu_rlookup_table == NULL)
+ goto free;
+
+ /*
+ * Protection Domain table - maps devices to protection domains
+ * This table has the same size as the rlookup_table
+ */
+ amd_iommu_pd_table = (void *)__get_free_pages(GFP_KERNEL,
+ get_order(rlookup_table_size));
+ if (amd_iommu_pd_table == NULL)
+ goto free;
+
+ amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(GFP_KERNEL,
+ get_order(MAX_DOMAIN_ID/8));
+ if (amd_iommu_pd_alloc_bitmap == NULL)
+ goto free;
+
+ /*
+ * memory is allocated now; initialize the device table with all zeroes
+ * and let all alias entries point to itself
+ */
+ memset(amd_iommu_dev_table, 0, dev_table_size);
+ for (i = 0; i < amd_iommu_last_bdf; ++i)
+ amd_iommu_alias_table[i] = i;
+
+ memset(amd_iommu_pd_table, 0, rlookup_table_size);
+ memset(amd_iommu_pd_alloc_bitmap, 0, MAX_DOMAIN_ID / 8);
+
+ /*
+ * never allocate domain 0 because its used as the non-allocated and
+ * error value placeholder
+ */
+ amd_iommu_pd_alloc_bitmap[0] = 1;
+
+ /*
+ * now the data structures are allocated and basically initialized
+ * start the real acpi table scan
+ */
+ ret = -ENODEV;
+ if (acpi_table_parse("IVRS", init_iommu_all) != 0)
+ goto free;
+
+ if (acpi_table_parse("IVRS", init_memory_definitions) != 0)
+ goto free;
+
+ printk(KERN_INFO "AMD IOMMU: aperture size is %d MB\n",
+ (1 << (amd_iommu_aperture_order-20)));
+
+ printk(KERN_INFO "AMD IOMMU: device isolation ");
+ if (amd_iommu_isolate)
+ printk("enabled\n");
+ else
+ printk("disabled\n");
+
+out:
+ return ret;
+
+free:
+ if (amd_iommu_pd_alloc_bitmap)
+ free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 1);
+
+ if (amd_iommu_pd_table)
+ free_pages((unsigned long)amd_iommu_pd_table,
+ get_order(rlookup_table_size));
+
+ if (amd_iommu_rlookup_table)
+ free_pages((unsigned long)amd_iommu_rlookup_table,
+ get_order(rlookup_table_size));
+
+ if (amd_iommu_alias_table)
+ free_pages((unsigned long)amd_iommu_alias_table,
+ get_order(alias_table_size));
+
+ if (amd_iommu_dev_table)
+ free_pages((unsigned long)amd_iommu_dev_table,
+ get_order(dev_table_size));
+
+ free_iommu_all();
+
+ free_unity_maps();
+
+ goto out;
+}
+
--
1.5.3.7
This patch adds the function calls to initialize the AMD IOMMU hardware and
dma_ops to the generic DMA code for the x86 architecture.
Signed-off-by: Joerg Roedel <[email protected]>
---
arch/x86/kernel/pci-dma.c | 5 +++++
1 files changed, 5 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index dc00a13..dea0167 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -7,6 +7,7 @@
#include <asm/dma.h>
#include <asm/gart.h>
#include <asm/calgary.h>
+#include <asm/amd_iommu.h>
int forbid_dac __read_mostly;
EXPORT_SYMBOL(forbid_dac);
@@ -122,6 +123,8 @@ void __init pci_iommu_alloc(void)
detect_intel_iommu();
+ amd_iommu_detect();
+
#ifdef CONFIG_SWIOTLB
pci_swiotlb_init();
#endif
@@ -502,6 +505,8 @@ static int __init pci_iommu_init(void)
intel_iommu_init();
+ amd_iommu_init();
+
#ifdef CONFIG_GART_IOMMU
gart_iommu_init();
#endif
--
1.5.3.7
This patch adds the generic functions to map and unmap pages to a protection
domain for dma_ops usage.
Signed-off-by: Joerg Roedel <[email protected]>
---
arch/x86/kernel/amd_iommu.c | 105 +++++++++++++++++++++++++++++++++++++++++++
1 files changed, 105 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 47e80b5..e00a3e7 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -536,3 +536,108 @@ static int get_device_resources(struct device *dev,
return 1;
}
+static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu,
+ struct dma_ops_domain *dom,
+ unsigned long address,
+ phys_addr_t paddr,
+ int direction)
+{
+ u64 *pte, __pte;
+
+ WARN_ON(address > dom->aperture_size);
+
+ paddr &= PAGE_MASK;
+
+ pte = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)];
+ pte += IOMMU_PTE_L0_INDEX(address);
+
+ __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC;
+
+ if (direction == DMA_TO_DEVICE)
+ __pte |= IOMMU_PTE_IR;
+ else if (direction == DMA_FROM_DEVICE)
+ __pte |= IOMMU_PTE_IW;
+ else if (direction == DMA_BIDIRECTIONAL)
+ __pte |= IOMMU_PTE_IR | IOMMU_PTE_IW;
+
+ WARN_ON(*pte);
+
+ *pte = __pte;
+
+ return (dma_addr_t)address;
+}
+
+static void dma_ops_domain_unmap(struct amd_iommu *iommu,
+ struct dma_ops_domain *dom,
+ unsigned long address)
+{
+ u64 *pte;
+
+ if (address >= dom->aperture_size)
+ return;
+
+ WARN_ON(address & 0xfffULL || address > dom->aperture_size);
+
+ pte = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)];
+ pte += IOMMU_PTE_L0_INDEX(address);
+
+ WARN_ON(!*pte);
+
+ *pte = 0ULL;
+}
+
+static dma_addr_t __map_single(struct device *dev,
+ struct amd_iommu *iommu,
+ struct dma_ops_domain *dma_dom,
+ phys_addr_t paddr,
+ size_t size,
+ int dir)
+{
+ dma_addr_t offset = paddr & ~PAGE_MASK;
+ dma_addr_t address, start;
+ unsigned int pages;
+ int i;
+
+ pages = to_pages(paddr, size);
+ paddr &= PAGE_MASK;
+
+ address = dma_ops_alloc_addresses(dev, dma_dom, pages);
+ if (unlikely(address == bad_dma_address))
+ goto out;
+
+ start = address;
+ for (i = 0; i < pages; ++i) {
+ dma_ops_domain_map(iommu, dma_dom, start, paddr, dir);
+ paddr += PAGE_SIZE;
+ start += PAGE_SIZE;
+ }
+ address += offset;
+
+out:
+ return address;
+}
+
+static void __unmap_single(struct amd_iommu *iommu,
+ struct dma_ops_domain *dma_dom,
+ dma_addr_t dma_addr,
+ size_t size,
+ int dir)
+{
+ dma_addr_t i, start;
+ unsigned int pages;
+
+ if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size))
+ return;
+
+ pages = to_pages(dma_addr, size);
+ dma_addr &= PAGE_MASK;
+ start = dma_addr;
+
+ for (i = 0; i < pages; ++i) {
+ dma_ops_domain_unmap(iommu, dma_dom, start);
+ start += PAGE_SIZE;
+ }
+
+ dma_ops_free_addresses(dma_dom, dma_addr, pages);
+}
+
--
1.5.3.7
This patch adds the dma_ops functions for mapping and unmapping scatter gather
lists.
Signed-off-by: Joerg Roedel <[email protected]>
---
arch/x86/kernel/amd_iommu.c | 98 +++++++++++++++++++++++++++++++++++++++++++
1 files changed, 98 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index b4079f6..f4747fe 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -700,3 +700,101 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr,
spin_unlock_irqrestore(&domain->lock, flags);
}
+static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist,
+ int nelems, int dir)
+{
+ struct scatterlist *s;
+ int i;
+
+ for_each_sg(sglist, s, nelems, i) {
+ s->dma_address = (dma_addr_t)sg_phys(s);
+ s->dma_length = s->length;
+ }
+
+ return nelems;
+}
+
+static int map_sg(struct device *dev, struct scatterlist *sglist,
+ int nelems, int dir)
+{
+ unsigned long flags;
+ struct amd_iommu *iommu;
+ struct protection_domain *domain;
+ u16 devid;
+ int i;
+ struct scatterlist *s;
+ phys_addr_t paddr;
+ int mapped_elems = 0;
+
+ get_device_resources(dev, &iommu, &domain, &devid);
+
+ if (!iommu || !domain)
+ return map_sg_no_iommu(dev, sglist, nelems, dir);
+
+ spin_lock_irqsave(&domain->lock, flags);
+
+ for_each_sg(sglist, s, nelems, i) {
+ paddr = sg_phys(s);
+
+ s->dma_address = __map_single(dev, iommu, domain->priv,
+ paddr, s->length, dir);
+
+ if (s->dma_address) {
+ s->dma_length = s->length;
+ mapped_elems++;
+ } else
+ goto unmap;
+ if (iommu_has_npcache(iommu))
+ iommu_flush_pages(iommu, domain->id, s->dma_address,
+ s->dma_length);
+ }
+
+ if (iommu->need_sync)
+ iommu_completion_wait(iommu);
+
+out:
+ spin_unlock_irqrestore(&domain->lock, flags);
+
+ return mapped_elems;
+unmap:
+ for_each_sg(sglist, s, mapped_elems, i) {
+ if (s->dma_address)
+ __unmap_single(iommu, domain->priv, s->dma_address,
+ s->dma_length, dir);
+ s->dma_address = s->dma_length = 0;
+ }
+
+ mapped_elems = 0;
+
+ goto out;
+}
+
+static void unmap_sg(struct device *dev, struct scatterlist *sglist,
+ int nelems, int dir)
+{
+ unsigned long flags;
+ struct amd_iommu *iommu;
+ struct protection_domain *domain;
+ struct scatterlist *s;
+ u16 devid;
+ int i;
+
+ if (!get_device_resources(dev, &iommu, &domain, &devid))
+ return;
+
+ spin_lock_irqsave(&domain->lock, flags);
+
+ for_each_sg(sglist, s, nelems, i) {
+ __unmap_single(iommu, domain->priv, s->dma_address,
+ s->dma_length, dir);
+ iommu_flush_pages(iommu, domain->id, s->dma_address,
+ s->dma_length);
+ s->dma_address = s->dma_length = 0;
+ }
+
+ if (iommu->need_sync)
+ iommu_completion_wait(iommu);
+
+ spin_unlock_irqrestore(&domain->lock, flags);
+}
+
--
1.5.3.7
This patch contains two functions to map and unmap the MMIO region of an IOMMU.
Signed-off-by: Joerg Roedel <[email protected]>
---
arch/x86/kernel/amd_iommu_init.c | 23 +++++++++++++++++++++++
1 files changed, 23 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index ee0b2da..3147e69 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -117,6 +117,29 @@ static u32 dev_table_size;
static u32 alias_table_size;
static u32 rlookup_table_size;
+static u8 * __init iommu_map_mmio_space(u64 address)
+{
+ u8 *ret;
+
+ if (!request_mem_region(address, MMIO_REGION_LENGTH, "amd_iommu"))
+ return NULL;
+
+ ret = ioremap_nocache(address, MMIO_REGION_LENGTH);
+ if (ret != NULL)
+ return ret;
+
+ release_mem_region(address, MMIO_REGION_LENGTH);
+
+ return NULL;
+}
+
+static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu)
+{
+ if (iommu->mmio_base)
+ iounmap(iommu->mmio_base);
+ release_mem_region(iommu->mmio_phys, MMIO_REGION_LENGTH);
+}
+
static int __init find_last_devid_on_pci(int bus, int dev, int fn, int cap_ptr)
{
u32 cap;
--
1.5.3.7
This patch adds the functions to allocate and free protection domains for the
IOMMU.
Signed-off-by: Joerg Roedel <[email protected]>
---
arch/x86/kernel/amd_iommu.c | 147 +++++++++++++++++++++++++++++++++++++++++++
1 files changed, 147 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 69d8d02..c43d15d 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -314,3 +314,150 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom,
iommu_area_free(dom->bitmap, address, pages);
}
+static u16 domain_id_alloc(void)
+{
+ unsigned long flags;
+ int id;
+
+ write_lock_irqsave(&amd_iommu_devtable_lock, flags);
+ id = find_first_zero_bit(amd_iommu_pd_alloc_bitmap, MAX_DOMAIN_ID);
+ BUG_ON(id == 0);
+ if (id > 0 && id < MAX_DOMAIN_ID)
+ __set_bit(id, amd_iommu_pd_alloc_bitmap);
+ else
+ id = 0;
+ write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+
+ return id;
+}
+
+static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
+ unsigned long start_page,
+ unsigned int pages)
+{
+ unsigned int last_page = dom->aperture_size >> PAGE_SHIFT;
+
+ if (start_page + pages > last_page)
+ pages = last_page - start_page;
+
+ set_bit_string(dom->bitmap, start_page, pages);
+}
+
+static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom)
+{
+ int i, j;
+ u64 *p1, *p2, *p3;
+
+ p1 = dma_dom->domain.pt_root;
+
+ if (!p1)
+ return;
+
+ for (i = 0; i < 512; ++i) {
+ if (!IOMMU_PTE_PRESENT(p1[i]))
+ continue;
+
+ p2 = IOMMU_PTE_PAGE(p1[i]);
+ for (j = 0; j < 512; ++i) {
+ if (!IOMMU_PTE_PRESENT(p2[j]))
+ continue;
+ p3 = IOMMU_PTE_PAGE(p2[j]);
+ free_page((unsigned long)p3);
+ }
+
+ free_page((unsigned long)p2);
+ }
+
+ free_page((unsigned long)p1);
+}
+
+static void dma_ops_domain_free(struct dma_ops_domain *dom)
+{
+ if (!dom)
+ return;
+
+ dma_ops_free_pagetable(dom);
+
+ kfree(dom->pte_pages);
+
+ kfree(dom->bitmap);
+
+ kfree(dom);
+}
+
+static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
+ unsigned order)
+{
+ struct dma_ops_domain *dma_dom;
+ unsigned i, num_pte_pages;
+ u64 *l2_pde;
+ u64 address;
+
+ /*
+ * Currently the DMA aperture must be between 32 MB and 1GB in size
+ */
+ if ((order < 25) || (order > 30))
+ return NULL;
+
+ dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL);
+ if (!dma_dom)
+ return NULL;
+
+ spin_lock_init(&dma_dom->domain.lock);
+
+ dma_dom->domain.id = domain_id_alloc();
+ if (dma_dom->domain.id == 0)
+ goto free_dma_dom;
+ dma_dom->domain.mode = PAGE_MODE_3_LEVEL;
+ dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
+ dma_dom->domain.priv = dma_dom;
+ if (!dma_dom->domain.pt_root)
+ goto free_dma_dom;
+ dma_dom->aperture_size = (1ULL << order);
+ dma_dom->bitmap = kzalloc(dma_dom->aperture_size / (PAGE_SIZE * 8),
+ GFP_KERNEL);
+ if (!dma_dom->bitmap)
+ goto free_dma_dom;
+ /*
+ * mark the first page as allocated so we never return 0 as
+ * a valid dma-address. So we can use 0 as error value
+ */
+ dma_dom->bitmap[0] = 1;
+ dma_dom->next_bit = 0;
+
+ if (iommu->exclusion_start &&
+ iommu->exclusion_start < dma_dom->aperture_size) {
+ unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT;
+ int pages = to_pages(iommu->exclusion_start,
+ iommu->exclusion_length);
+ dma_ops_reserve_addresses(dma_dom, startpage, pages);
+ }
+
+ num_pte_pages = dma_dom->aperture_size / (PAGE_SIZE * 512);
+ dma_dom->pte_pages = kzalloc(num_pte_pages * sizeof(void *),
+ GFP_KERNEL);
+ if (!dma_dom->pte_pages)
+ goto free_dma_dom;
+
+ l2_pde = (u64 *)get_zeroed_page(GFP_KERNEL);
+ if (l2_pde == NULL)
+ goto free_dma_dom;
+
+ dma_dom->domain.pt_root[0] = IOMMU_L2_PDE(virt_to_phys(l2_pde));
+
+ for (i = 0; i < num_pte_pages; ++i) {
+ dma_dom->pte_pages[i] = (u64 *)get_zeroed_page(GFP_KERNEL);
+ if (!dma_dom->pte_pages[i])
+ goto free_dma_dom;
+ address = virt_to_phys(dma_dom->pte_pages[i]);
+ l2_pde[i] = IOMMU_L1_PDE(address);
+ }
+
+ return dma_dom;
+
+free_dma_dom:
+ dma_ops_domain_free(dma_dom);
+
+ return NULL;
+}
+
--
1.5.3.7
This patch adds the functions to parse the information about IOMMU exclusion
ranges and required unity mappings for the devices handled by the IOMMU.
Signed-off-by: Joerg Roedel <[email protected]>
---
arch/x86/kernel/amd_iommu_init.c | 87 ++++++++++++++++++++++++++++++++++++++
1 files changed, 87 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 3f4f7b8..555fcc9 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -556,3 +556,90 @@ static int __init init_iommu_all(struct acpi_table_header *table)
return 0;
}
+static void __init free_unity_maps(void)
+{
+ struct unity_map_entry *entry, *next;
+
+ list_for_each_entry_safe(entry, next, &amd_iommu_unity_map, list) {
+ list_del(&entry->list);
+ kfree(entry);
+ }
+}
+
+static int __init init_exclusion_range(struct ivmd_header *m)
+{
+ int i;
+
+ switch (m->type) {
+ case ACPI_IVMD_TYPE:
+ set_device_exclusion_range(m->devid, m);
+ break;
+ case ACPI_IVMD_TYPE_ALL:
+ for (i = 0; i < amd_iommu_last_bdf; ++i)
+ set_device_exclusion_range(i, m);
+ break;
+ case ACPI_IVMD_TYPE_RANGE:
+ for (i = m->devid; i <= m->aux; ++i)
+ set_device_exclusion_range(i, m);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int __init init_unity_map_range(struct ivmd_header *m)
+{
+ struct unity_map_entry *e = 0;
+
+ e = kzalloc(sizeof(*e), GFP_KERNEL);
+ if (e == NULL)
+ return -ENOMEM;
+
+ switch (m->type) {
+ default:
+ case ACPI_IVMD_TYPE:
+ e->devid_start = e->devid_end = m->devid;
+ break;
+ case ACPI_IVMD_TYPE_ALL:
+ e->devid_start = 0;
+ e->devid_end = amd_iommu_last_bdf;
+ break;
+ case ACPI_IVMD_TYPE_RANGE:
+ e->devid_start = m->devid;
+ e->devid_end = m->aux;
+ break;
+ }
+ e->address_start = PAGE_ALIGN(m->range_start);
+ e->address_end = e->address_start + PAGE_ALIGN(m->range_length);
+ e->prot = m->flags >> 1;
+
+ list_add_tail(&e->list, &amd_iommu_unity_map);
+
+ return 0;
+}
+
+static int __init init_memory_definitions(struct acpi_table_header *table)
+{
+ u8 *p = (u8 *)table, *end = (u8 *)table;
+ struct ivmd_header *m;
+
+ INIT_LIST_HEAD(&amd_iommu_unity_map);
+
+ end += table->length;
+ p += IVRS_HEADER_LENGTH;
+
+ while (p < end) {
+ m = (struct ivmd_header *)p;
+ if (m->flags & IVMD_FLAG_EXCL_RANGE)
+ init_exclusion_range(m);
+ else if (m->flags & IVMD_FLAG_UNITY_MAP)
+ init_unity_map_range(m);
+
+ p += m->length;
+ }
+
+ return 0;
+}
+
--
1.5.3.7
This patch adds the function to initialize the dma_ops for the AMD IOMMU.
Signed-off-by: Joerg Roedel <[email protected]>
---
arch/x86/kernel/amd_iommu.c | 46 +++++++++++++++++++++++++++++++++++++++++++
1 files changed, 46 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index bed5f82..c282bde 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -906,3 +906,49 @@ void prealloc_protection_domains(void)
}
}
+static struct dma_mapping_ops amd_iommu_dma_ops = {
+ .alloc_coherent = alloc_coherent,
+ .free_coherent = free_coherent,
+ .map_single = map_single,
+ .unmap_single = unmap_single,
+ .map_sg = map_sg,
+ .unmap_sg = unmap_sg,
+};
+
+int __init amd_iommu_init_dma_ops(void)
+{
+ struct amd_iommu *iommu;
+ int order = amd_iommu_aperture_order;
+ int ret;
+
+ list_for_each_entry(iommu, &amd_iommu_list, list) {
+ iommu->default_dom = dma_ops_domain_alloc(iommu, order);
+ if (iommu->default_dom == NULL)
+ return -ENOMEM;
+ ret = iommu_init_unity_mappings(iommu);
+ if (ret)
+ goto free_domains;
+ }
+
+ if (amd_iommu_isolate)
+ prealloc_protection_domains();
+
+ iommu_detected = 1;
+ force_iommu = 1;
+ bad_dma_address = 0;
+ gart_iommu_aperture_disabled = 1;
+ gart_iommu_aperture = 0;
+
+ dma_ops = &amd_iommu_dma_ops;
+
+ return 0;
+
+free_domains:
+
+ list_for_each_entry(iommu, &amd_iommu_list, list) {
+ if (iommu->default_dom)
+ dma_ops_domain_free(iommu->default_dom);
+ }
+
+ return ret;
+}
--
1.5.3.7
Add an entry to the MAINTAINERS file for AMD IOMMU.
Signed-off-by: Joerg Roedel <[email protected]>
---
MAINTAINERS | 6 ++++++
1 files changed, 6 insertions(+), 0 deletions(-)
diff --git a/MAINTAINERS b/MAINTAINERS
index 8f0ec46..5e89cea 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -376,6 +376,12 @@ L: [email protected] (moderated for non-subscribers)
W: http://www.amd.com/us-en/ConnectivitySolutions/TechnicalResources/0,,50_2334_2452_11363,00.html
S: Supported
+AMD IOMMU (AMD-VI)
+P: Joerg Roedel
+M: [email protected]
+L: [email protected]
+S: Supported
+
AMS (Apple Motion Sensor) DRIVER
P: Stelian Pop
M: [email protected]
--
1.5.3.7
This patch adds functions necessary to find the IOMMU resources for a specific
device.
Signed-off-by: Joerg Roedel <[email protected]>
---
arch/x86/kernel/amd_iommu.c | 75 +++++++++++++++++++++++++++++++++++++++++++
1 files changed, 75 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index c43d15d..47e80b5 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -461,3 +461,78 @@ free_dma_dom:
return NULL;
}
+static struct protection_domain *domain_for_device(u16 devid)
+{
+ struct protection_domain *dom;
+ unsigned long flags;
+
+ read_lock_irqsave(&amd_iommu_devtable_lock, flags);
+ dom = amd_iommu_pd_table[devid];
+ read_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+
+ return dom;
+}
+
+static void set_device_domain(struct amd_iommu *iommu,
+ struct protection_domain *domain,
+ u16 devid)
+{
+ unsigned long flags;
+
+ u64 pte_root = virt_to_phys(domain->pt_root);
+
+ pte_root |= (domain->mode & 0x07) << 9;
+ pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | 2;
+
+ write_lock_irqsave(&amd_iommu_devtable_lock, flags);
+ amd_iommu_dev_table[devid].data[0] = pte_root;
+ amd_iommu_dev_table[devid].data[1] = pte_root >> 32;
+ amd_iommu_dev_table[devid].data[2] = domain->id;
+
+ amd_iommu_pd_table[devid] = domain;
+ write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+
+ iommu_queue_inv_dev_entry(iommu, devid);
+
+ iommu->need_sync = 1;
+}
+
+static int get_device_resources(struct device *dev,
+ struct amd_iommu **iommu,
+ struct protection_domain **domain,
+ u16 *bdf)
+{
+ struct dma_ops_domain *dma_dom;
+ struct pci_dev *pcidev;
+ u16 _bdf;
+
+ BUG_ON(!dev || dev->bus != &pci_bus_type || !dev->dma_mask);
+
+ pcidev = to_pci_dev(dev);
+ _bdf = (pcidev->bus->number << 8) | pcidev->devfn;
+
+ if (_bdf >= amd_iommu_last_bdf) {
+ *iommu = NULL;
+ *domain = NULL;
+ *bdf = 0xffff;
+ return 0;
+ }
+
+ *bdf = amd_iommu_alias_table[_bdf];
+
+ *iommu = amd_iommu_rlookup_table[*bdf];
+ if (*iommu == NULL)
+ return 0;
+ dma_dom = (*iommu)->default_dom;
+ *domain = domain_for_device(*bdf);
+ if (*domain == NULL) {
+ *domain = &dma_dom->domain;
+ set_device_domain(*iommu, *domain, *bdf);
+ printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
+ "device ", (*domain)->id);
+ print_devid(_bdf, 1);
+ }
+
+ return 1;
+}
+
--
1.5.3.7
This patch adds the dma_ops specific mapping functions for single mappings.
Signed-off-by: Joerg Roedel <[email protected]>
---
arch/x86/kernel/amd_iommu.c | 59 +++++++++++++++++++++++++++++++++++++++++++
1 files changed, 59 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index e00a3e7..b4079f6 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -40,6 +40,11 @@ struct command {
static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
struct unity_map_entry *e);
+static int iommu_has_npcache(struct amd_iommu *iommu)
+{
+ return iommu->cap & IOMMU_CAP_NPCACHE;
+}
+
static int __iommu_queue_command(struct amd_iommu *iommu, struct command *cmd)
{
u32 tail, head;
@@ -641,3 +646,57 @@ static void __unmap_single(struct amd_iommu *iommu,
dma_ops_free_addresses(dma_dom, dma_addr, pages);
}
+static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
+ size_t size, int dir)
+{
+ unsigned long flags;
+ struct amd_iommu *iommu;
+ struct protection_domain *domain;
+ u16 devid;
+ dma_addr_t addr;
+
+ get_device_resources(dev, &iommu, &domain, &devid);
+
+ if (iommu == NULL || domain == NULL)
+ return (dma_addr_t)paddr;
+
+ spin_lock_irqsave(&domain->lock, flags);
+ addr = __map_single(dev, iommu, domain->priv, paddr, size, dir);
+ if (addr == bad_dma_address)
+ goto out;
+
+ if (iommu_has_npcache(iommu))
+ iommu_flush_pages(iommu, domain->id, addr, size);
+
+ if (iommu->need_sync)
+ iommu_completion_wait(iommu);
+
+out:
+ spin_unlock_irqrestore(&domain->lock, flags);
+
+ return addr;
+}
+
+static void unmap_single(struct device *dev, dma_addr_t dma_addr,
+ size_t size, int dir)
+{
+ unsigned long flags;
+ struct amd_iommu *iommu;
+ struct protection_domain *domain;
+ u16 devid;
+
+ if (!get_device_resources(dev, &iommu, &domain, &devid))
+ return;
+
+ spin_lock_irqsave(&domain->lock, flags);
+
+ __unmap_single(iommu, domain->priv, dma_addr, size, dir);
+
+ iommu_flush_pages(iommu, domain->id, dma_addr, size);
+
+ if (iommu->need_sync)
+ iommu_completion_wait(iommu);
+
+ spin_unlock_irqrestore(&domain->lock, flags);
+}
+
--
1.5.3.7
This patch adds the amd_iommu.h file which will be included in the generic
code.
Signed-off-by: Joerg Roedel <[email protected]>
---
arch/x86/kernel/amd_iommu.c | 1 +
arch/x86/kernel/amd_iommu_init.c | 1 +
include/asm-x86/amd_iommu.h | 32 ++++++++++++++++++++++++++++++++
3 files changed, 34 insertions(+), 0 deletions(-)
create mode 100644 include/asm-x86/amd_iommu.h
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index c282bde..134dea1 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -25,6 +25,7 @@
#include <asm/proto.h>
#include <asm/gart.h>
#include <asm/amd_iommu_types.h>
+#include <asm/amd_iommu.h>
#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index ec6f13b..bae4a76 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -23,6 +23,7 @@
#include <linux/list.h>
#include <asm/pci-direct.h>
#include <asm/amd_iommu_types.h>
+#include <asm/amd_iommu.h>
#include <asm/gart.h>
/*
diff --git a/include/asm-x86/amd_iommu.h b/include/asm-x86/amd_iommu.h
new file mode 100644
index 0000000..30a1204
--- /dev/null
+++ b/include/asm-x86/amd_iommu.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
+ * Author: Joerg Roedel <[email protected]>
+ * Leo Duran <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _ASM_X86_AMD_IOMMU_H
+#define _ASM_X86_AMD_IOMMU_H
+
+#ifdef CONFIG_AMD_IOMMU
+extern int amd_iommu_init(void);
+extern int amd_iommu_init_dma_ops(void);
+extern void amd_iommu_detect(void);
+#else
+static inline int amd_iommu_init(void) { return -ENODEV; }
+static inline void amd_iommu_detect(void) { }
+#endif
+
+#endif
--
1.5.3.7
This patch adds the dma_ops functions for coherent mappings.
Signed-off-by: Joerg Roedel <[email protected]>
---
arch/x86/kernel/amd_iommu.c | 74 +++++++++++++++++++++++++++++++++++++++++++
1 files changed, 74 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index f4747fe..aab9125 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -798,3 +798,77 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
spin_unlock_irqrestore(&domain->lock, flags);
}
+static void *alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *dma_addr, gfp_t flag)
+{
+ unsigned long flags;
+ void *virt_addr;
+ struct amd_iommu *iommu;
+ struct protection_domain *domain;
+ u16 devid;
+ phys_addr_t paddr;
+
+ virt_addr = (void *)__get_free_pages(flag, get_order(size));
+ if (!virt_addr)
+ return 0;
+
+ memset(virt_addr, 0, size);
+ paddr = virt_to_phys(virt_addr);
+
+ get_device_resources(dev, &iommu, &domain, &devid);
+
+ if (!iommu || !domain) {
+ *dma_addr = (dma_addr_t)paddr;
+ return virt_addr;
+ }
+
+ spin_lock_irqsave(&domain->lock, flags);
+
+ *dma_addr = __map_single(dev, iommu, domain->priv, paddr,
+ size, DMA_BIDIRECTIONAL);
+
+ if (*dma_addr == bad_dma_address) {
+ free_pages((unsigned long)virt_addr, get_order(size));
+ virt_addr = NULL;
+ goto out;
+ }
+
+ if (iommu_has_npcache(iommu))
+ iommu_flush_pages(iommu, domain->id, *dma_addr, size);
+
+ if (iommu->need_sync)
+ iommu_completion_wait(iommu);
+
+out:
+ spin_unlock_irqrestore(&domain->lock, flags);
+
+ return virt_addr;
+}
+
+static void free_coherent(struct device *dev, size_t size,
+ void *virt_addr, dma_addr_t dma_addr)
+{
+ unsigned long flags;
+ struct amd_iommu *iommu;
+ struct protection_domain *domain;
+ u16 devid;
+
+ get_device_resources(dev, &iommu, &domain, &devid);
+
+ if (!iommu || !domain)
+ goto free_mem;
+
+ spin_lock_irqsave(&domain->lock, flags);
+
+ __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
+ iommu_flush_pages(iommu, domain->id, dma_addr, size);
+
+ if (iommu->need_sync)
+ iommu_completion_wait(iommu);
+
+ spin_unlock_irqrestore(&domain->lock, flags);
+
+free_mem:
+ free_pages((unsigned long)virt_addr, get_order(size));
+}
+
--
1.5.3.7
Add documentation for the kernel parameters introduced with this driver.
Signed-off-by: Joerg Roedel <[email protected]>
---
Documentation/kernel-parameters.txt | 12 ++++++++++++
1 files changed, 12 insertions(+), 0 deletions(-)
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index e07c432..fc8f936 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -271,6 +271,18 @@ and is between 256 and 4096 characters. It is defined in the file
aic79xx= [HW,SCSI]
See Documentation/scsi/aic79xx.txt.
+ amd_iommu= [HW,X86-84]
+ Pass parameters to the AMD IOMMU driver in the system.
+ Possible values are:
+ off - disable the driver for AMD IOMMU
+ isolate - enable device isolation (each device, as far
+ as possible, will get its own protection
+ domain)
+ amd_iommu_size= [HW,X86-64]
+ Define the size of the aperture for the AMD IOMMU
+ driver. Possible values are:
+ '32M', '64M' (default), '128M', '256M', '512M', '1G'
+
amijoy.map= [HW,JOY] Amiga joystick support
Map of devices attached to JOY0DAT and JOY1DAT
Format: <a>,<b>
--
1.5.3.7
This patch adds the functions required to programm the IOMMU with the MMIO
space.
Signed-off-by: Joerg Roedel <[email protected]>
---
arch/x86/kernel/amd_iommu_init.c | 60 ++++++++++++++++++++++++++++++++++++++
1 files changed, 60 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 3147e69..ffb8ac8 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -117,6 +117,66 @@ static u32 dev_table_size;
static u32 alias_table_size;
static u32 rlookup_table_size;
+static void __init iommu_set_exclusion_range(struct amd_iommu *iommu)
+{
+ u64 start = iommu->exclusion_start & PAGE_MASK;
+ u64 limit = (start + iommu->exclusion_length) & PAGE_MASK;
+ u64 entry;
+
+ if (!iommu->exclusion_start)
+ return;
+
+ entry = start | MMIO_EXCL_ENABLE_MASK;
+ memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET,
+ &entry, sizeof(entry));
+
+ entry = limit;
+ memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET,
+ &entry, sizeof(entry));
+}
+
+static void __init iommu_set_device_table(struct amd_iommu *iommu)
+{
+ u32 entry;
+
+ BUG_ON(iommu->mmio_base == NULL);
+
+ entry = virt_to_phys(amd_iommu_dev_table);
+ entry |= (dev_table_size >> 12) - 1;
+ memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET,
+ &entry, sizeof(entry));
+}
+
+static void __init iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
+{
+ u32 ctrl;
+
+ ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET);
+ ctrl |= (1 << bit);
+ writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
+}
+
+static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
+{
+ u32 ctrl;
+
+ ctrl = (u64)readl(iommu->mmio_base + MMIO_CONTROL_OFFSET);
+ ctrl &= ~(1 << bit);
+ writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
+}
+
+void __init iommu_enable(struct amd_iommu *iommu)
+{
+ u32 ctrl;
+
+ printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at ");
+ print_devid(iommu->devid, 0);
+ printk(" cap 0x%hx\n", iommu->cap_ptr);
+
+ iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
+ ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET);
+}
+
static u8 * __init iommu_map_mmio_space(u64 address)
{
u8 *ret;
--
1.5.3.7
This patch adds functions to initialize the IOMMU hardware with information
from ACPI and PCI.
Signed-off-by: Joerg Roedel <[email protected]>
---
arch/x86/kernel/amd_iommu_init.c | 125 ++++++++++++++++++++++++++++++++++++++
1 files changed, 125 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 4c37abb..8ec48f1 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -353,3 +353,128 @@ static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m)
}
}
+static void __init init_iommu_from_pci(struct amd_iommu *iommu)
+{
+ int bus = PCI_BUS(iommu->devid);
+ int dev = PCI_SLOT(iommu->devid);
+ int fn = PCI_FUNC(iommu->devid);
+ int cap_ptr = iommu->cap_ptr;
+ u32 range;
+
+ iommu->cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_CAP_HDR_OFFSET);
+
+ range = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET);
+ iommu->first_device = DEVID(MMIO_GET_BUS(range), MMIO_GET_FD(range));
+ iommu->last_device = DEVID(MMIO_GET_BUS(range), MMIO_GET_LD(range));
+}
+
+static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
+ struct ivhd_header *h)
+{
+ u8 *p = (u8 *)h;
+ u8 *end = p, flags = 0;
+ u16 dev_i, devid = 0, devid_start = 0, devid_to = 0;
+ u32 ext_flags = 0;
+ bool alias = 0;
+ struct ivhd_entry *e;
+
+ /*
+ * First set the recommended feature enable bits from ACPI
+ * into the IOMMU control registers
+ */
+ h->flags & IVHD_FLAG_HT_TUN_EN ?
+ iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) :
+ iommu_feature_disable(iommu, CONTROL_HT_TUN_EN);
+
+ h->flags & IVHD_FLAG_PASSPW_EN ?
+ iommu_feature_enable(iommu, CONTROL_PASSPW_EN) :
+ iommu_feature_disable(iommu, CONTROL_PASSPW_EN);
+
+ h->flags & IVHD_FLAG_RESPASSPW_EN ?
+ iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) :
+ iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN);
+
+ h->flags & IVHD_FLAG_ISOC_EN ?
+ iommu_feature_enable(iommu, CONTROL_ISOC_EN) :
+ iommu_feature_disable(iommu, CONTROL_ISOC_EN);
+
+ /*
+ * make IOMMU memory accesses cache coherent
+ */
+ iommu_feature_enable(iommu, CONTROL_COHERENT_EN);
+
+ /*
+ * Done. Now parse the device entries
+ */
+ p += sizeof(struct ivhd_header);
+ end += h->length;
+
+ while (p < end) {
+ e = (struct ivhd_entry *)p;
+ switch (e->type) {
+ case IVHD_DEV_ALL:
+ for (dev_i = iommu->first_device;
+ dev_i <= iommu->last_device; ++dev_i)
+ set_dev_entry_from_acpi(dev_i, e->flags, 0);
+ break;
+ case IVHD_DEV_SELECT:
+ devid = e->devid;
+ set_dev_entry_from_acpi(devid, e->flags, 0);
+ break;
+ case IVHD_DEV_SELECT_RANGE_START:
+ devid_start = e->devid;
+ flags = e->flags;
+ ext_flags = 0;
+ alias = 0;
+ break;
+ case IVHD_DEV_ALIAS:
+ devid = e->devid;
+ devid_to = e->ext >> 8;
+ set_dev_entry_from_acpi(devid, e->flags, 0);
+ amd_iommu_alias_table[devid] = devid_to;
+ break;
+ case IVHD_DEV_ALIAS_RANGE:
+ devid_start = e->devid;
+ flags = e->flags;
+ devid_to = e->ext >> 8;
+ ext_flags = 0;
+ alias = 1;
+ break;
+ case IVHD_DEV_EXT_SELECT:
+ devid = e->devid;
+ set_dev_entry_from_acpi(devid, e->flags, e->ext);
+ break;
+ case IVHD_DEV_EXT_SELECT_RANGE:
+ devid_start = e->devid;
+ flags = e->flags;
+ ext_flags = e->ext;
+ alias = 0;
+ break;
+ case IVHD_DEV_RANGE_END:
+ devid = e->devid;
+ for (dev_i = devid_start; dev_i <= devid; ++dev_i) {
+ if (alias)
+ amd_iommu_alias_table[dev_i] = devid_to;
+ set_dev_entry_from_acpi(
+ amd_iommu_alias_table[dev_i],
+ flags, ext_flags);
+ }
+ break;
+ default:
+ break;
+ }
+
+ p += 0x04 << (e->type >> 6);
+ }
+}
+
+static int __init init_iommu_devices(struct amd_iommu *iommu)
+{
+ u16 i;
+
+ for (i = iommu->first_device; i <= iommu->last_device; ++i)
+ set_iommu_for_device(iommu, i);
+
+ return 0;
+}
+
--
1.5.3.7
This patch adds functions necessary to initialize the device table from the
ACPI definitions.
Signed-off-by: Joerg Roedel <[email protected]>
---
arch/x86/kernel/amd_iommu_init.c | 45 ++++++++++++++++++++++++++++++++++++++
1 files changed, 45 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index c2be3ad..4c37abb 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -308,3 +308,48 @@ static void __init free_command_buffer(struct amd_iommu *iommu)
get_order(CMD_BUFFER_SIZE));
}
+static void set_dev_entry_bit(u16 devid, u8 bit)
+{
+ int i = (bit >> 5) & 0x07;
+ int _bit = bit & 0x1f;
+
+ amd_iommu_dev_table[devid].data[i] |= (1 << _bit);
+}
+
+static void __init set_dev_entry_from_acpi(u16 devid, u32 flags, u32 ext_flags)
+{
+ if (flags & ACPI_DEVFLAG_INITPASS)
+ set_dev_entry_bit(devid, DEV_ENTRY_INIT_PASS);
+ if (flags & ACPI_DEVFLAG_EXTINT)
+ set_dev_entry_bit(devid, DEV_ENTRY_EINT_PASS);
+ if (flags & ACPI_DEVFLAG_NMI)
+ set_dev_entry_bit(devid, DEV_ENTRY_NMI_PASS);
+ if (flags & ACPI_DEVFLAG_SYSMGT1)
+ set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1);
+ if (flags & ACPI_DEVFLAG_SYSMGT2)
+ set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2);
+ if (flags & ACPI_DEVFLAG_LINT0)
+ set_dev_entry_bit(devid, DEV_ENTRY_LINT0_PASS);
+ if (flags & ACPI_DEVFLAG_LINT1)
+ set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS);
+}
+
+static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid)
+{
+ amd_iommu_rlookup_table[devid] = iommu;
+}
+
+static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m)
+{
+ struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
+
+ if (!(m->flags & IVMD_FLAG_EXCL_RANGE))
+ return;
+
+ if (iommu) {
+ set_dev_entry_bit(m->devid, DEV_ENTRY_EX);
+ iommu->exclusion_start = m->range_start;
+ iommu->exclusion_length = m->range_length;
+ }
+}
+
--
1.5.3.7
This patch adds the functions which will initialize the unity mappings in the
device page tables.
Signed-off-by: Joerg Roedel <[email protected]>
---
arch/x86/kernel/amd_iommu.c | 122 +++++++++++++++++++++++++++++++++++++++++++
1 files changed, 122 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index a24ee4a..1d70f5e 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -37,6 +37,9 @@ struct command {
u32 data[4];
};
+static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
+ struct unity_map_entry *e);
+
static int __iommu_queue_command(struct amd_iommu *iommu, struct command *cmd)
{
u32 tail, head;
@@ -144,3 +147,122 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid,
return 0;
}
+static int iommu_map(struct protection_domain *dom,
+ unsigned long bus_addr,
+ unsigned long phys_addr,
+ int prot)
+{
+ u64 __pte, *pte, *page;
+
+ bus_addr = PAGE_ALIGN(bus_addr);
+ phys_addr = PAGE_ALIGN(bus_addr);
+
+ /* only support 512GB address spaces for now */
+ if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK))
+ return -EINVAL;
+
+ pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)];
+
+ if (!IOMMU_PTE_PRESENT(*pte)) {
+ page = (u64 *)get_zeroed_page(GFP_KERNEL);
+ if (!page)
+ return -ENOMEM;
+ *pte = IOMMU_L2_PDE(virt_to_phys(page));
+ }
+
+ pte = IOMMU_PTE_PAGE(*pte);
+ pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
+
+ if (!IOMMU_PTE_PRESENT(*pte)) {
+ page = (u64 *)get_zeroed_page(GFP_KERNEL);
+ if (!page)
+ return -ENOMEM;
+ *pte = IOMMU_L1_PDE(virt_to_phys(page));
+ }
+
+ pte = IOMMU_PTE_PAGE(*pte);
+ pte = &pte[IOMMU_PTE_L0_INDEX(bus_addr)];
+
+ if (IOMMU_PTE_PRESENT(*pte))
+ return -EBUSY;
+
+ __pte = phys_addr | IOMMU_PTE_P;
+ if (prot & IOMMU_PROT_IR)
+ __pte |= IOMMU_PTE_IR;
+ if (prot & IOMMU_PROT_IW)
+ __pte |= IOMMU_PTE_IW;
+
+ *pte = __pte;
+
+ return 0;
+}
+
+static int iommu_for_unity_map(struct amd_iommu *iommu,
+ struct unity_map_entry *entry)
+{
+ u16 bdf, i;
+
+ for (i = entry->devid_start; i <= entry->devid_end; ++i) {
+ bdf = amd_iommu_alias_table[i];
+ if (amd_iommu_rlookup_table[bdf] == iommu)
+ return 1;
+ }
+
+ return 0;
+}
+
+static int iommu_init_unity_mappings(struct amd_iommu *iommu)
+{
+ struct unity_map_entry *entry;
+ int ret;
+
+ list_for_each_entry(entry, &amd_iommu_unity_map, list) {
+ if (!iommu_for_unity_map(iommu, entry))
+ continue;
+ ret = dma_ops_unity_map(iommu->default_dom, entry);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
+ struct unity_map_entry *e)
+{
+ u64 addr;
+ int ret;
+
+ for (addr = e->address_start; addr < e->address_end;
+ addr += PAGE_SIZE) {
+ ret = iommu_map(&dma_dom->domain, addr, addr, e->prot);
+ if (ret)
+ return ret;
+ /*
+ * if unity mapping is in aperture range mark the page
+ * as allocated in the aperture
+ */
+ if (addr < dma_dom->aperture_size)
+ __set_bit(addr >> PAGE_SHIFT, dma_dom->bitmap);
+ }
+
+ return 0;
+}
+
+static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
+ u16 devid)
+{
+ struct unity_map_entry *e;
+ int ret;
+
+ list_for_each_entry(e, &amd_iommu_unity_map, list) {
+ if (!(devid >= e->devid_start && devid <= e->devid_end))
+ continue;
+ ret = dma_ops_unity_map(dma_dom, e);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
--
1.5.3.7
This patch adds code to detect an AMD IOMMU early during boot before the early
detection code of other IOMMUs run.
Signed-off-by: Joerg Roedel <[email protected]>
---
arch/x86/kernel/amd_iommu_init.c | 20 ++++++++++++++++++++
1 files changed, 20 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index c792ddc..d7a75bf 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -769,3 +769,23 @@ free:
goto out;
}
+static int __init early_amd_iommu_detect(struct acpi_table_header *table)
+{
+ return 0;
+}
+
+void __init amd_iommu_detect(void)
+{
+ if (swiotlb || no_iommu || iommu_detected)
+ return;
+
+ if (amd_iommu_disable)
+ return;
+
+ if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) {
+ iommu_detected = 1;
+ gart_iommu_aperture_disabled = 1;
+ gart_iommu_aperture = 0;
+ }
+}
+
--
1.5.3.7
This patch adds a call to the driver specific dma_ops initialization routine
from the AMD IOMMU hardware initialization. Further it adds the necessary code
to finally enable AMD IOMMU hardware.
Signed-off-by: Joerg Roedel <[email protected]>
---
arch/x86/kernel/amd_iommu_init.c | 16 ++++++++++++++++
1 files changed, 16 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index bae4a76..d1aa234 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -644,6 +644,16 @@ static int __init init_memory_definitions(struct acpi_table_header *table)
return 0;
}
+static void __init enable_iommus(void)
+{
+ struct amd_iommu *iommu;
+
+ list_for_each_entry(iommu, &amd_iommu_list, list) {
+ iommu_set_exclusion_range(iommu);
+ iommu_enable(iommu);
+ }
+}
+
int __init amd_iommu_init(void)
{
int i, ret = 0;
@@ -731,6 +741,12 @@ int __init amd_iommu_init(void)
if (acpi_table_parse("IVRS", init_memory_definitions) != 0)
goto free;
+ ret = amd_iommu_init_dma_ops();
+ if (ret)
+ goto free;
+
+ enable_iommus();
+
printk(KERN_INFO "AMD IOMMU: aperture size is %d MB\n",
(1 << (amd_iommu_aperture_order-20)));
--
1.5.3.7
This patch adds the functions to allocate and deallocate the command buffer for
one IOMMU in the system.
Signed-off-by: Joerg Roedel <[email protected]>
---
arch/x86/kernel/amd_iommu_init.c | 30 ++++++++++++++++++++++++++++++
1 files changed, 30 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index ffb8ac8..c2be3ad 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -278,3 +278,33 @@ static int __init find_last_devid_acpi(struct acpi_table_header *table)
return 0;
}
+static u8 * __init alloc_command_buffer(struct amd_iommu *iommu)
+{
+ u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL,
+ get_order(CMD_BUFFER_SIZE));
+ u64 entry = 0;
+
+ if (cmd_buf == NULL)
+ return NULL;
+
+ iommu->cmd_buf_size = CMD_BUFFER_SIZE;
+
+ memset(cmd_buf, 0, CMD_BUFFER_SIZE);
+
+ entry = (u64)virt_to_phys(cmd_buf);
+ entry |= MMIO_CMD_SIZE_512;
+ memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
+ &entry, sizeof(entry));
+
+ iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
+
+ return cmd_buf;
+}
+
+static void __init free_command_buffer(struct amd_iommu *iommu)
+{
+ if (iommu->cmd_buf)
+ free_pages((unsigned long)iommu->cmd_buf,
+ get_order(CMD_BUFFER_SIZE));
+}
+
--
1.5.3.7
This patch adds the source file amd_iommu_init.c to the kernel Makefile for the
x86 architecture.
Signed-off-by: Joerg Roedel <[email protected]>
---
arch/x86/kernel/Makefile | 1 +
1 files changed, 1 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 77807d4..1e4e00a 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -100,6 +100,7 @@ ifeq ($(CONFIG_X86_64),y)
obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o
obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o
+ obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o
obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o
obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o
--
1.5.3.7
This patch adds a function to pre-allocate protection domains. So we don't have
to allocate it on the first request for a device (which can happen in atomic
mode).
Signed-off-by: Joerg Roedel <[email protected]>
---
arch/x86/kernel/amd_iommu.c | 34 ++++++++++++++++++++++++++++++++++
1 files changed, 34 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index aab9125..bed5f82 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -872,3 +872,37 @@ free_mem:
free_pages((unsigned long)virt_addr, get_order(size));
}
+/*
+ * If the driver core informs the DMA layer if a driver grabs a device
+ * we don't need to preallocate the protection domains anymore.
+ * For now we have to.
+ */
+void prealloc_protection_domains(void)
+{
+ struct pci_dev *dev = NULL;
+ struct dma_ops_domain *dma_dom;
+ struct amd_iommu *iommu;
+ int order = amd_iommu_aperture_order;
+ u16 devid;
+
+ while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
+ devid = (dev->bus->number << 8) | dev->devfn;
+ if (devid >= amd_iommu_last_bdf)
+ continue;
+ devid = amd_iommu_alias_table[devid];
+ if (domain_for_device(devid))
+ continue;
+ iommu = amd_iommu_rlookup_table[devid];
+ if (!iommu)
+ continue;
+ dma_dom = dma_ops_domain_alloc(iommu, order);
+ if (!dma_dom)
+ continue;
+ init_unity_mappings_for_device(dma_dom, devid);
+ set_device_domain(iommu, &dma_dom->domain, devid);
+ printk(KERN_INFO "AMD IOMMU: Allocated domain %d for device ",
+ dma_dom->domain.id);
+ print_devid(devid, 1);
+ }
+}
+
--
1.5.3.7
This patch adds the address allocator to the AMD IOMMU code.
Signed-off-by: Joerg Roedel <[email protected]>
---
arch/x86/kernel/amd_iommu.c | 48 +++++++++++++++++++++++++++++++++++++++++++
1 files changed, 48 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 1d70f5e..69d8d02 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -266,3 +266,51 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
return 0;
}
+static unsigned long dma_mask_to_pages(unsigned long mask)
+{
+ return (mask >> PAGE_SHIFT) +
+ (PAGE_ALIGN(mask & ~PAGE_MASK) >> PAGE_SHIFT);
+}
+
+static unsigned long dma_ops_alloc_addresses(struct device *dev,
+ struct dma_ops_domain *dom,
+ unsigned int pages)
+{
+ unsigned long limit = dma_mask_to_pages(*dev->dma_mask);
+ unsigned long address;
+ unsigned long size = dom->aperture_size >> PAGE_SHIFT;
+ unsigned long boundary_size;
+
+ boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
+ PAGE_SIZE) >> PAGE_SHIFT;
+ limit = limit < size ? limit : size;
+
+ if (dom->next_bit >= limit)
+ dom->next_bit = 0;
+
+ address = iommu_area_alloc(dom->bitmap, limit, dom->next_bit, pages,
+ 0 , boundary_size, 0);
+ if (address == -1)
+ address = iommu_area_alloc(dom->bitmap, limit, 0, pages,
+ 0, boundary_size, 0);
+
+ if (likely(address != -1)) {
+ set_bit_string(dom->bitmap, address, pages);
+ dom->next_bit = address + pages;
+ address <<= PAGE_SHIFT;
+ } else
+ address = bad_dma_address;
+
+ WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size);
+
+ return address;
+}
+
+static void dma_ops_free_addresses(struct dma_ops_domain *dom,
+ unsigned long address,
+ unsigned int pages)
+{
+ address >>= PAGE_SHIFT;
+ iommu_area_free(dom->bitmap, address, pages);
+}
+
--
1.5.3.7
This patch adds a header file local to the AMD IOMMU driver with constants and
data structures needed in the code.
Signed-off-by: Joerg Roedel <[email protected]>
---
include/asm-x86/amd_iommu_types.h | 242 +++++++++++++++++++++++++++++++++++++
1 files changed, 242 insertions(+), 0 deletions(-)
create mode 100644 include/asm-x86/amd_iommu_types.h
diff --git a/include/asm-x86/amd_iommu_types.h b/include/asm-x86/amd_iommu_types.h
new file mode 100644
index 0000000..0f39550
--- /dev/null
+++ b/include/asm-x86/amd_iommu_types.h
@@ -0,0 +1,242 @@
+/*
+ * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
+ * Author: Joerg Roedel <[email protected]>
+ * Leo Duran <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef __AMD_IOMMU_TYPES_H__
+#define __AMD_IOMMU_TYPES_H__
+
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
+/*
+ * some size calculation constants
+ */
+#define DEV_TABLE_ENTRY_SIZE 256
+#define ALIAS_TABLE_ENTRY_SIZE 2
+#define RLOOKUP_TABLE_ENTRY_SIZE (sizeof(void *))
+
+/* helper macros */
+#define LOW_U32(x) ((x) & ((1ULL << 32)-1))
+#define HIGH_U32(x) (LOW_U32((x) >> 32))
+
+/* Length of the MMIO region for the AMD IOMMU */
+#define MMIO_REGION_LENGTH 0x4000
+
+/* Capability offsets used by the driver */
+#define MMIO_CAP_HDR_OFFSET 0x00
+#define MMIO_RANGE_OFFSET 0x0c
+
+/* Masks, shifts and macros to parse the device range capability */
+#define MMIO_RANGE_LD_MASK 0xff000000
+#define MMIO_RANGE_FD_MASK 0x00ff0000
+#define MMIO_RANGE_BUS_MASK 0x0000ff00
+#define MMIO_RANGE_LD_SHIFT 24
+#define MMIO_RANGE_FD_SHIFT 16
+#define MMIO_RANGE_BUS_SHIFT 8
+#define MMIO_GET_LD(x) (((x) & MMIO_RANGE_LD_MASK) >> MMIO_RANGE_LD_SHIFT)
+#define MMIO_GET_FD(x) (((x) & MMIO_RANGE_FD_MASK) >> MMIO_RANGE_FD_SHIFT)
+#define MMIO_GET_BUS(x) (((x) & MMIO_RANGE_BUS_MASK) >> MMIO_RANGE_BUS_SHIFT)
+
+/* Flag masks for the AMD IOMMU exclusion range */
+#define MMIO_EXCL_ENABLE_MASK 0x01ULL
+#define MMIO_EXCL_ALLOW_MASK 0x02ULL
+
+/* Used offsets into the MMIO space */
+#define MMIO_DEV_TABLE_OFFSET 0x0000
+#define MMIO_CMD_BUF_OFFSET 0x0008
+#define MMIO_EVT_BUF_OFFSET 0x0010
+#define MMIO_CONTROL_OFFSET 0x0018
+#define MMIO_EXCL_BASE_OFFSET 0x0020
+#define MMIO_EXCL_LIMIT_OFFSET 0x0028
+#define MMIO_CMD_HEAD_OFFSET 0x2000
+#define MMIO_CMD_TAIL_OFFSET 0x2008
+#define MMIO_EVT_HEAD_OFFSET 0x2010
+#define MMIO_EVT_TAIL_OFFSET 0x2018
+#define MMIO_STATUS_OFFSET 0x2020
+
+/* feature control bits */
+#define CONTROL_IOMMU_EN 0x00ULL
+#define CONTROL_HT_TUN_EN 0x01ULL
+#define CONTROL_EVT_LOG_EN 0x02ULL
+#define CONTROL_EVT_INT_EN 0x03ULL
+#define CONTROL_COMWAIT_EN 0x04ULL
+#define CONTROL_PASSPW_EN 0x08ULL
+#define CONTROL_RESPASSPW_EN 0x09ULL
+#define CONTROL_COHERENT_EN 0x0aULL
+#define CONTROL_ISOC_EN 0x0bULL
+#define CONTROL_CMDBUF_EN 0x0cULL
+#define CONTROL_PPFLOG_EN 0x0dULL
+#define CONTROL_PPFINT_EN 0x0eULL
+
+/* command specific defines */
+#define CMD_COMPL_WAIT 0x01
+#define CMD_INV_DEV_ENTRY 0x02
+#define CMD_INV_IOMMU_PAGES 0x03
+
+#define CMD_COMPL_WAIT_STORE_MASK 0x01
+#define CMD_INV_IOMMU_PAGES_SIZE_MASK 0x01
+#define CMD_INV_IOMMU_PAGES_PDE_MASK 0x02
+
+/* macros and definitions for device table entries */
+#define DEV_ENTRY_VALID 0x00
+#define DEV_ENTRY_TRANSLATION 0x01
+#define DEV_ENTRY_IR 0x3d
+#define DEV_ENTRY_IW 0x3e
+#define DEV_ENTRY_EX 0x67
+#define DEV_ENTRY_SYSMGT1 0x68
+#define DEV_ENTRY_SYSMGT2 0x69
+#define DEV_ENTRY_INIT_PASS 0xb8
+#define DEV_ENTRY_EINT_PASS 0xb9
+#define DEV_ENTRY_NMI_PASS 0xba
+#define DEV_ENTRY_LINT0_PASS 0xbe
+#define DEV_ENTRY_LINT1_PASS 0xbf
+
+/* constants to configure the command buffer */
+#define CMD_BUFFER_SIZE 8192
+#define CMD_BUFFER_ENTRIES 512
+#define MMIO_CMD_SIZE_SHIFT 56
+#define MMIO_CMD_SIZE_512 (0x9ULL << MMIO_CMD_SIZE_SHIFT)
+
+#define PAGE_MODE_1_LEVEL 0x01
+#define PAGE_MODE_2_LEVEL 0x02
+#define PAGE_MODE_3_LEVEL 0x03
+
+#define IOMMU_PDE_NL_0 0x000ULL
+#define IOMMU_PDE_NL_1 0x200ULL
+#define IOMMU_PDE_NL_2 0x400ULL
+#define IOMMU_PDE_NL_3 0x600ULL
+
+#define IOMMU_PTE_L2_INDEX(address) (((address) >> 30) & 0x1ffULL)
+#define IOMMU_PTE_L1_INDEX(address) (((address) >> 21) & 0x1ffULL)
+#define IOMMU_PTE_L0_INDEX(address) (((address) >> 12) & 0x1ffULL)
+
+#define IOMMU_MAP_SIZE_L1 (1ULL << 21)
+#define IOMMU_MAP_SIZE_L2 (1ULL << 30)
+#define IOMMU_MAP_SIZE_L3 (1ULL << 39)
+
+#define IOMMU_PTE_P (1ULL << 0)
+#define IOMMU_PTE_U (1ULL << 59)
+#define IOMMU_PTE_FC (1ULL << 60)
+#define IOMMU_PTE_IR (1ULL << 61)
+#define IOMMU_PTE_IW (1ULL << 62)
+
+#define IOMMU_L1_PDE(address) \
+ ((address) | IOMMU_PDE_NL_1 | IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW)
+#define IOMMU_L2_PDE(address) \
+ ((address) | IOMMU_PDE_NL_2 | IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW)
+
+#define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL)
+#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P)
+#define IOMMU_PTE_PAGE(pte) (phys_to_virt((pte) & IOMMU_PAGE_MASK))
+#define IOMMU_PTE_MODE(pte) (((pte) >> 9) & 0x07)
+
+#define IOMMU_PROT_MASK 0x03
+#define IOMMU_PROT_IR 0x01
+#define IOMMU_PROT_IW 0x02
+
+/* IOMMU capabilities */
+#define IOMMU_CAP_IOTLB 24
+#define IOMMU_CAP_NPCACHE 26
+
+#define MAX_DOMAIN_ID 65536
+
+struct protection_domain {
+ spinlock_t lock;
+ u16 id;
+ int mode;
+ u64 *pt_root;
+ void *priv;
+};
+
+struct dma_ops_domain {
+ struct list_head list;
+ struct protection_domain domain;
+ unsigned long aperture_size;
+ unsigned long next_bit;
+ unsigned long *bitmap;
+ u64 **pte_pages;
+};
+
+struct amd_iommu {
+ struct list_head list;
+ spinlock_t lock;
+
+ u16 devid;
+ u16 cap_ptr;
+
+ u64 mmio_phys;
+ u8 *mmio_base;
+ u32 cap;
+ u16 first_device;
+ u16 last_device;
+ u64 exclusion_start;
+ u64 exclusion_length;
+
+ u8 *cmd_buf;
+ u32 cmd_buf_size;
+
+ int need_sync;
+
+ struct dma_ops_domain *default_dom;
+};
+
+extern struct list_head amd_iommu_list;
+
+struct dev_table_entry {
+ u32 data[8];
+};
+
+struct unity_map_entry {
+ struct list_head list;
+ u16 devid_start;
+ u16 devid_end;
+ u64 address_start;
+ u64 address_end;
+ int prot;
+};
+
+extern struct list_head amd_iommu_unity_map;
+
+/* data structures for device handling */
+extern struct dev_table_entry *amd_iommu_dev_table;
+extern u16 *amd_iommu_alias_table;
+extern struct amd_iommu **amd_iommu_rlookup_table;
+
+extern unsigned amd_iommu_aperture_order;
+
+extern u16 amd_iommu_last_bdf;
+
+/* data structures for protection domain handling */
+extern struct protection_domain **amd_iommu_pd_table;
+extern unsigned long *amd_iommu_pd_alloc_bitmap;
+
+extern int amd_iommu_isolate;
+
+static inline void print_devid(u16 devid, int nl)
+{
+ int bus = devid >> 8;
+ int dev = devid >> 3 & 0x1f;
+ int fn = devid & 0x07;
+
+ printk("%02x:%02x.%x", bus, dev, fn);
+ if (nl)
+ printk("\n");
+}
+
+#endif
--
1.5.3.7
This patch adds functions to find the last PCI bus/device/function the IOMMU
driver has to handle. This information is used later to allocate the memory for
the data structures.
Signed-off-by: Joerg Roedel <[email protected]>
---
arch/x86/kernel/amd_iommu_init.c | 79 ++++++++++++++++++++++++++++++++++++++
1 files changed, 79 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 0ad8cf9..ee0b2da 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -116,3 +116,82 @@ unsigned long *amd_iommu_pd_alloc_bitmap;
static u32 dev_table_size;
static u32 alias_table_size;
static u32 rlookup_table_size;
+
+static int __init find_last_devid_on_pci(int bus, int dev, int fn, int cap_ptr)
+{
+ u32 cap;
+
+ cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET);
+ UPDATE_LAST_BDF(DEVID(MMIO_GET_BUS(cap), MMIO_GET_LD(cap)));
+
+ return 0;
+}
+
+static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
+{
+ u8 *p = (void *)h, *end = (void *)h;
+ struct ivhd_entry *dev;
+
+ p += sizeof(*h);
+ end += h->length;
+
+ find_last_devid_on_pci(PCI_BUS(h->devid),
+ PCI_SLOT(h->devid),
+ PCI_FUNC(h->devid),
+ h->cap_ptr);
+
+ while (p < end) {
+ dev = (struct ivhd_entry *)p;
+ switch (dev->type) {
+ case IVHD_DEV_SELECT:
+ case IVHD_DEV_RANGE_END:
+ case IVHD_DEV_ALIAS:
+ case IVHD_DEV_EXT_SELECT:
+ UPDATE_LAST_BDF(dev->devid);
+ break;
+ default:
+ break;
+ }
+ p += 0x04 << (*p >> 6);
+ }
+
+ WARN_ON(p != end);
+
+ return 0;
+}
+
+static int __init find_last_devid_acpi(struct acpi_table_header *table)
+{
+ int i;
+ u8 checksum = 0, *p = (u8 *)table, *end = (u8 *)table;
+ struct ivhd_header *h;
+
+ /*
+ * Validate checksum here so we don't need to do it when
+ * we actually parse the table
+ */
+ for (i = 0; i < table->length; ++i)
+ checksum += p[i];
+ if (checksum != 0)
+ /* ACPI table corrupt */
+ return -ENODEV;
+
+ p += IVRS_HEADER_LENGTH;
+
+ end += table->length;
+ while (p < end) {
+ h = (struct ivhd_header *)p;
+ switch (h->type) {
+ case ACPI_IVHD_TYPE:
+ find_last_devid_from_ivhd(h);
+ break;
+ default:
+ break;
+ }
+ p += h->length;
+ }
+ WARN_ON(p != end);
+
+ return 0;
+}
+
--
1.5.3.7
On Thursday, 26 of June 2008, Joerg Roedel wrote:
> Hi,
Hi,
> this is the first post of the initial driver for AMD IOMMU hardware. The driver
> is tested on hardware with various loads (disk and network) and showed no
> problems.
>
> It currently supports DMA remapping using the dma_ops API in the x86
> architecture code. It also supports isolation of device DMA address spaces as
> far as the hardware allows that (means each device can get its own DMA address
> space and can't access the DMA memory of other devices).
>
> Please give this code a good review and send me your comments about it so that
> I can fix all possible bugs and objections and move this driver forward to
> merging quality.
Do you implement suspend/resume callbacks for the IOMMU and if so, which patch
in the series does introduce them?
Rafael
On Thu, Jun 26, 2008 at 10:37:14PM +0200, Rafael J. Wysocki wrote:
> On Thursday, 26 of June 2008, Joerg Roedel wrote:
> > Hi,
>
> Hi,
>
> > this is the first post of the initial driver for AMD IOMMU hardware. The driver
> > is tested on hardware with various loads (disk and network) and showed no
> > problems.
> >
> > It currently supports DMA remapping using the dma_ops API in the x86
> > architecture code. It also supports isolation of device DMA address spaces as
> > far as the hardware allows that (means each device can get its own DMA address
> > space and can't access the DMA memory of other devices).
> >
> > Please give this code a good review and send me your comments about it so that
> > I can fix all possible bugs and objections and move this driver forward to
> > merging quality.
>
> Do you implement suspend/resume callbacks for the IOMMU and if so, which patch
> in the series does introduce them?
No, These callbacks are not implemented yet. But I plan to do so in a
later version.
Joerg
--
| AMD Saxony Limited Liability Company & Co. KG
Operating | Wilschdorfer Landstr. 101, 01109 Dresden, Germany
System | Register Court Dresden: HRA 4896
Research | General Partner authorized to represent:
Center | AMD Saxony LLC (Wilmington, Delaware, US)
| General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy
On Thursday, 26 of June 2008, Joerg Roedel wrote:
> On Thu, Jun 26, 2008 at 10:37:14PM +0200, Rafael J. Wysocki wrote:
> > On Thursday, 26 of June 2008, Joerg Roedel wrote:
> > > Hi,
> >
> > Hi,
> >
> > > this is the first post of the initial driver for AMD IOMMU hardware. The driver
> > > is tested on hardware with various loads (disk and network) and showed no
> > > problems.
> > >
> > > It currently supports DMA remapping using the dma_ops API in the x86
> > > architecture code. It also supports isolation of device DMA address spaces as
> > > far as the hardware allows that (means each device can get its own DMA address
> > > space and can't access the DMA memory of other devices).
> > >
> > > Please give this code a good review and send me your comments about it so that
> > > I can fix all possible bugs and objections and move this driver forward to
> > > merging quality.
> >
> > Do you implement suspend/resume callbacks for the IOMMU and if so, which patch
> > in the series does introduce them?
>
> No, These callbacks are not implemented yet. But I plan to do so in a
> later version.
OK, thanks.
Rafael
* Joerg Roedel <[email protected]> wrote:
> Hi,
>
> this is the first post of the initial driver for AMD IOMMU hardware.
> The driver is tested on hardware with various loads (disk and network)
> and showed no problems.
>
> It currently supports DMA remapping using the dma_ops API in the x86
> architecture code. It also supports isolation of device DMA address
> spaces as far as the hardware allows that (means each device can get
> its own DMA address space and can't access the DMA memory of other
> devices).
the code looks very clean. I guess down the line we might want to think
about making all the externally visible knobs transparent: there should
be only one way to disable the iommu via the boot command line (be that
an AMD or Intel one), etc.
i have created a new -tip topic for this: tip/x86/amd-iommu and have
applied your patches there - thanks Joerg. You can pick up the
integrated tree via:
http://people.redhat.com/mingo/tip.git/README
please check whether the integrated end result still works fine.
[ Obviously nobody but you can test this on real hw ;-) ]
Ingo
-tip testing found a build failure with:
arch/x86/kernel/amd_iommu_init.c:247: warning: 'struct acpi_table_header' declared inside parameter list
arch/x86/kernel/amd_iommu_init.c:247: warning: its scope is only this definition or declaration, which is probably not what you want
arch/x86/kernel/amd_iommu_init.c: In function 'find_last_devid_acpi':
arch/x86/kernel/amd_iommu_init.c:257: error: dereferencing pointer to incomplete type
http://redhat.com/~mingo/misc/config-Fri_Jun_27_10_29_06_CEST_2008.bad
this is due to !CONFIG_ACPI.
since AMD IOMMU discovery depends on ACPI anyway, i've fixed this in the
Kconfig space. (Eventually, if you want to make it directly discoverable
via the PCI space in the future, you might want to fix this dependency.)
i've pushed out the fix below into -tip.
Ingo
Hi Ingo,
On Fri, Jun 27, 2008 at 12:07:51PM +0200, Ingo Molnar wrote:
>
> -tip testing found a build failure with:
>
> arch/x86/kernel/amd_iommu_init.c:247: warning: 'struct acpi_table_header' declared inside parameter list
> arch/x86/kernel/amd_iommu_init.c:247: warning: its scope is only this definition or declaration, which is probably not what you want
> arch/x86/kernel/amd_iommu_init.c: In function 'find_last_devid_acpi':
> arch/x86/kernel/amd_iommu_init.c:257: error: dereferencing pointer to incomplete type
>
> http://redhat.com/~mingo/misc/config-Fri_Jun_27_10_29_06_CEST_2008.bad
>
> this is due to !CONFIG_ACPI.
>
> since AMD IOMMU discovery depends on ACPI anyway, i've fixed this in the
> Kconfig space. (Eventually, if you want to make it directly discoverable
> via the PCI space in the future, you might want to fix this dependency.)
>
> i've pushed out the fix below into -tip.
Thank you for your testing and the fixes. I will test the result from
your tree.
Joerg
--
| AMD Saxony Limited Liability Company & Co. KG
Operating | Wilschdorfer Landstr. 101, 01109 Dresden, Germany
System | Register Court Dresden: HRA 4896
Research | General Partner authorized to represent:
Center | AMD Saxony LLC (Wilmington, Delaware, US)
| General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy
On Thu, Jun 26, 2008 at 09:27:37PM +0200, Joerg Roedel wrote:
> This patch adds the Kconfig entry for the AMD IOMMU driver.
>
> Signed-off-by: Joerg Roedel <[email protected]>
> ---
> arch/x86/Kconfig | 7 +++++++
> 1 files changed, 7 insertions(+), 0 deletions(-)
>
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index e0edaaa..5a82f18 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -549,6 +549,13 @@ config CALGARY_IOMMU_ENABLED_BY_DEFAULT
> Calgary anyway, pass 'iommu=calgary' on the kernel command line.
> If unsure, say Y.
>
> +config AMD_IOMMU
> + bool "AMD IOMMU support"
> + select SWIOTL
> + depends on X86_64 && PCI
> + help
> + Select this to get support for AMD IOMMU hardware in your system.
> +
>...
This tells neither what as IOMMU is nor whether my system actually has
one.
Please shortly describe in the help text what an IOMMU is and which
AMD systems have an IOMMU.
The goal is that a system administrator building a kernel for his system
gets enough information for deciding whether to enable or disable this
option.
Thanks
Adrian
--
"Is there not promise of rain?" Ling Tan asked suddenly out
of the darkness. There had been need of rain for many days.
"Only a promise," Lao Er said.
Pearl S. Buck - Dragon Seed
Adrian Bunk <[email protected]> writes:
>
> This tells neither what as IOMMU is nor whether my system actually has
> one.
>
> Please shortly describe in the help text what an IOMMU is and which
> AMD systems have an IOMMU.
>
> The goal is that a system administrator building a kernel for his system
> gets enough information for deciding whether to enable or disable this
> option.
Also it should ideally describe that there is a trade off between
reliability and performance with IOMMU enabled.
-Andi
On Fri, Jun 27, 2008 at 04:47:29PM +0200, Andi Kleen wrote:
> Also it should ideally describe that there is a trade off between
> reliability and performance with IOMMU enabled.
I agree that there's a performance tradeoff with current generation
hardware and ingerfaces, but it wouldn't be fair to single out a
single IOMMU implementation ;=)
config DMAR
bool "Support for DMA Remapping Devices (EXPERIMENTAL)"
depends on X86_64 && PCI_MSI && ACPI && EXPERIMENTAL
help
DMA remapping (DMAR) devices support enables independent address
translations for Direct Memory Access (DMA) from devices.
These DMA remapping devices are reported via ACPI tables
and include PCI device scope covered by these DMA
remapping devices.
Cheers,
Muli
On Fri, Jun 27, 2008 at 12:39:45PM -0400, Muli Ben-Yehuda wrote:
> On Fri, Jun 27, 2008 at 04:47:29PM +0200, Andi Kleen wrote:
>
> > Also it should ideally describe that there is a trade off between
> > reliability and performance with IOMMU enabled.
>
> I agree that there's a performance tradeoff with current generation
> hardware and ingerfaces, but it wouldn't be fair to single out a
> single IOMMU implementation ;=)
True. At least for the case without device isolation I have some
optimizations in mind which will minimize the performance tradeoff. I
hope to have them ready for 2.6.28 :)
Joerg.
On Fri, Jun 27, 2008 at 06:54:30PM +0200, Joerg Roedel wrote:
> True. At least for the case without device isolation I have some
> optimizations in mind which will minimize the performance
> tradeoff. I hope to have them ready for 2.6.28 :)
Do you mean the case where you have a single I/O address space which
is shared by all devices?
Cheers,
Muli
On Fri, Jun 27, 2008 at 12:59:47PM -0400, Muli Ben-Yehuda wrote:
> On Fri, Jun 27, 2008 at 06:54:30PM +0200, Joerg Roedel wrote:
>
> > True. At least for the case without device isolation I have some
> > optimizations in mind which will minimize the performance
> > tradeoff. I hope to have them ready for 2.6.28 :)
>
> Do you mean the case where you have a single I/O address space which
> is shared by all devices?
Yes. I think this will be the case used most when IOMMU is used for
virtualization and to handle devices with limited DMA address ranges. In
this case there is a lot to optimize.
Joerg
On Fri, Jun 27, 2008 at 05:25:58PM +0300, Adrian Bunk wrote:
> On Thu, Jun 26, 2008 at 09:27:37PM +0200, Joerg Roedel wrote:
> > This patch adds the Kconfig entry for the AMD IOMMU driver.
> >
> > Signed-off-by: Joerg Roedel <[email protected]>
> > ---
> > arch/x86/Kconfig | 7 +++++++
> > 1 files changed, 7 insertions(+), 0 deletions(-)
> >
> > diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> > index e0edaaa..5a82f18 100644
> > --- a/arch/x86/Kconfig
> > +++ b/arch/x86/Kconfig
> > @@ -549,6 +549,13 @@ config CALGARY_IOMMU_ENABLED_BY_DEFAULT
> > Calgary anyway, pass 'iommu=calgary' on the kernel command line.
> > If unsure, say Y.
> >
> > +config AMD_IOMMU
> > + bool "AMD IOMMU support"
> > + select SWIOTL
> > + depends on X86_64 && PCI
> > + help
> > + Select this to get support for AMD IOMMU hardware in your system.
> > +
> >...
>
> This tells neither what as IOMMU is nor whether my system actually has
> one.
>
> Please shortly describe in the help text what an IOMMU is and which
> AMD systems have an IOMMU.
>
> The goal is that a system administrator building a kernel for his system
> gets enough information for deciding whether to enable or disable this
> option.
Ok, I will provide a new help text as an update to this patch series.
Joerg
On Fri, Jun 27, 2008 at 07:05:46PM +0200, Joerg Roedel wrote:
> On Fri, Jun 27, 2008 at 12:59:47PM -0400, Muli Ben-Yehuda wrote:
> > On Fri, Jun 27, 2008 at 06:54:30PM +0200, Joerg Roedel wrote:
> >
> > > True. At least for the case without device isolation I have some
> > > optimizations in mind which will minimize the performance
> > > tradeoff. I hope to have them ready for 2.6.28 :)
> >
> > Do you mean the case where you have a single I/O address space which
> > is shared by all devices?
>
> Yes. I think this will be the case used most when IOMMU is used for
> virtualization
Could you elaborate on what you mean here? I assume you're thinking
one I/O address space for the host, and one I/O address space per
guest with assigned devices?
> and to handle devices with limited DMA address ranges.
I'd be pretty surprised if you'll find such devices on machines which
will have AMD's IOMMU...
> In this case there is a lot to optimize.
Looking forward to seeing what you have in mind :-)
Cheers,
Muli
On Fri, Jun 27, 2008 at 01:12:01PM -0400, Muli Ben-Yehuda wrote:
> On Fri, Jun 27, 2008 at 07:05:46PM +0200, Joerg Roedel wrote:
> > On Fri, Jun 27, 2008 at 12:59:47PM -0400, Muli Ben-Yehuda wrote:
> > > On Fri, Jun 27, 2008 at 06:54:30PM +0200, Joerg Roedel wrote:
> > >
> > > > True. At least for the case without device isolation I have some
> > > > optimizations in mind which will minimize the performance
> > > > tradeoff. I hope to have them ready for 2.6.28 :)
> > >
> > > Do you mean the case where you have a single I/O address space which
> > > is shared by all devices?
> >
> > Yes. I think this will be the case used most when IOMMU is used for
> > virtualization
>
> Could you elaborate on what you mean here? I assume you're thinking
> one I/O address space for the host, and one I/O address space per
> guest with assigned devices?
I think we can create an address space which almost direct-maps the
physical memory and let some room free for the aperture at the beginning
(say 64MB). If a mapping request arrives the code looks if it has to do
mapping (physical address of memory to map is in the first 64MB or
not in the device address range). If this is not the case it simply
returns the physical address as dma_addr. otherwise it does the
expensive mapping. This way we could minimize the default overhead which
we will get with an IOMMU and still use it for virtualization and as a
GART replacement.
> > and to handle devices with limited DMA address ranges.
>
> I'd be pretty surprised if you'll find such devices on machines which
> will have AMD's IOMMU...
Think of 32bit PCI devices in a host with more than 4GB memory :)
Cheers,
Joerg
On Fri, Jun 27, 2008 at 07:20:30PM +0200, Joerg Roedel wrote:
> > Could you elaborate on what you mean here? I assume you're
> > thinking one I/O address space for the host, and one I/O address
> > space per guest with assigned devices?
>
> I think we can create an address space which almost direct-maps the
> physical memory and let some room free for the aperture at the
> beginning (say 64MB). If a mapping request arrives the code looks if
> it has to do mapping (physical address of memory to map is in the
> first 64MB or not in the device address range). If this is not the
> case it simply returns the physical address as dma_addr. otherwise
> it does the expensive mapping. This way we could minimize the
> default overhead which we will get with an IOMMU and still use it
> for virtualization and as a GART replacement.
What you are suggesting is an "almost-direct-map" approach for the
host I/O address space, which provides no protection from mis-behaving
host drivers. If we could avoid needing a GART replacement (see below
for why I think we could), you could simply avoid enabling translation
for host devices and be done with it.
In my humble opinion it's more interesting to try and figure out how
to get protection from mis-behaving host drivers while still keeping
performance as close as possible to native.
> > > and to handle devices with limited DMA address ranges.
> >
> > I'd be pretty surprised if you'll find such devices on machines which
> > will have AMD's IOMMU...
>
> Think of 32bit PCI devices in a host with more than 4GB memory :)
I am thinking of them and I'd be surprised if you'd find any in such
machines. Certainly I assume none of the on-board devices will have
this ancient limitation. But hey, it could happen ;-)
Cheers,
Muli
On Fri, Jun 27, 2008 at 01:31:00PM -0400, Muli Ben-Yehuda wrote:
> On Fri, Jun 27, 2008 at 07:20:30PM +0200, Joerg Roedel wrote:
>
> > > Could you elaborate on what you mean here? I assume you're
> > > thinking one I/O address space for the host, and one I/O address
> > > space per guest with assigned devices?
> >
> > I think we can create an address space which almost direct-maps the
> > physical memory and let some room free for the aperture at the
> > beginning (say 64MB). If a mapping request arrives the code looks if
> > it has to do mapping (physical address of memory to map is in the
> > first 64MB or not in the device address range). If this is not the
> > case it simply returns the physical address as dma_addr. otherwise
> > it does the expensive mapping. This way we could minimize the
> > default overhead which we will get with an IOMMU and still use it
> > for virtualization and as a GART replacement.
>
> What you are suggesting is an "almost-direct-map" approach for the
> host I/O address space, which provides no protection from mis-behaving
> host drivers. If we could avoid needing a GART replacement (see below
> for why I think we could), you could simply avoid enabling translation
> for host devices and be done with it.
Yes. As I said, this is for the non-isolating case. For the isolation
case (which is needed for protection) it is harder to optimize. But
there I think about some sort of lazy IOMMU TLB flushing. The flushing
and 'wait for the flush to finish' is the most expensive part in the
mapping and unmapping code path. But this needs some experiments.
> In my humble opinion it's more interesting to try and figure out how
> to get protection from mis-behaving host drivers while still keeping
> performance as close as possible to native.
True. But I also see IOMMU as an device usable to pass devices to
virtualization guests. In this case you don't necessarily want device
isolation in the host (for devices only the host uses). So the
optimization for the non-isolation case is also important imho.
> > > > and to handle devices with limited DMA address ranges.
> > >
> > > I'd be pretty surprised if you'll find such devices on machines which
> > > will have AMD's IOMMU...
> >
> > Think of 32bit PCI devices in a host with more than 4GB memory :)
>
> I am thinking of them and I'd be surprised if you'd find any in such
> machines. Certainly I assume none of the on-board devices will have
> this ancient limitation. But hey, it could happen ;-)
The IOMMU machine under my desk has a 32bit PCI slot with a card in it
:-)
Cheers,
Joerg
On Fri, Jun 27, 2008 at 07:40:35PM +0200, Joerg Roedel wrote:
> Yes. As I said, this is for the non-isolating case. For the
> isolation case (which is needed for protection) it is harder to
> optimize. But there I think about some sort of lazy IOMMU TLB
> flushing. The flushing and 'wait for the flush to finish' is the
> most expensive part in the mapping and unmapping code path. But this
> needs some experiments.
Agreed. The paper I just posted to the iommu mailing list has some
ideas and experimental data.
> > I am thinking of them and I'd be surprised if you'd find any in
> > such machines. Certainly I assume none of the on-board devices
> > will have this ancient limitation. But hey, it could happen ;-)
>
> The IOMMU machine under my desk has a 32bit PCI slot with a card in
> it :-)
Touche :-)
Cheers,
Muli
On Fri, Jun 27, 2008 at 01:44:15PM -0400, Muli Ben-Yehuda wrote:
> On Fri, Jun 27, 2008 at 07:40:35PM +0200, Joerg Roedel wrote:
>
> > Yes. As I said, this is for the non-isolating case. For the
> > isolation case (which is needed for protection) it is harder to
> > optimize. But there I think about some sort of lazy IOMMU TLB
> > flushing. The flushing and 'wait for the flush to finish' is the
> > most expensive part in the mapping and unmapping code path. But this
> > needs some experiments.
>
> Agreed. The paper I just posted to the iommu mailing list has some
> ideas and experimental data.
Cool. I wasn't aware if that paper. I will read it next week. Thanks for
the pointer to it.
Joerg
Joerg Roedel <[email protected]> writes:
> On Fri, Jun 27, 2008 at 12:39:45PM -0400, Muli Ben-Yehuda wrote:
>> On Fri, Jun 27, 2008 at 04:47:29PM +0200, Andi Kleen wrote:
>>
>> > Also it should ideally describe that there is a trade off between
>> > reliability and performance with IOMMU enabled.
>>
>> I agree that there's a performance tradeoff with current generation
>> hardware and ingerfaces, but it wouldn't be fair to single out a
>> single IOMMU implementation ;=)
>
> True. At least for the case without device isolation I have some
> optimizations in mind which will minimize the performance tradeoff. I
> hope to have them ready for 2.6.28 :)
Not sure that would be very useful. Outside of virtualization device
isolation is the main feature of an IOMMU on a native kernel.
-Andi
On Fri, Jun 27, 2008 at 12:06 PM, Joerg Roedel wrote
> On Fri, Jun 27, 2008 at 12:59:47PM -0400, Muli Ben-Yehuda wrote:
> > On Fri, Jun 27, 2008 at 06:54:30PM +0200, Joerg Roedel wrote:
> >
> > > True. At least for the case without device isolation I have some
> > > optimizations in mind which will minimize the performance
> > > tradeoff. I hope to have them ready for 2.6.28 :)
> >
> > Do you mean the case where you have a single I/O address space which
> > is shared by all devices?
>
> Yes. I think this will be the case used most when IOMMU is used for
> virtualization and to handle devices with limited DMA address ranges.
> In
> this case there is a lot to optimize.
>
With the AMD IOMMU there are basically two options with regards to
(untranslated) DMA handling:
1) IOMMU will translate using page tables, which can be set on per
device-table-entry basis; sharing page tables between devices could be
considered a 'resource usage optimization', with the caveat of not being
to provide protection for devices sharing the page tables.
2) IOMMU will not translate if the exclusion range has been enabled, and
the DMA address falls inside that range.
The exclusion range can be enabled for specific devices, or for all
devices... Enabling the exclusion range can be considered a 'performance
optimization' (no table-walks), with the caveat of not being able to
provide protection for devices sharing the exclusion range (BTW, there's
a single exclusion address range per IOMMU).
Leo.
On Fri, Jun 27, 2008 at 10:18:08AM +0200, Ingo Molnar wrote:
>
> * Joerg Roedel <[email protected]> wrote:
>
> > Hi,
> >
> > this is the first post of the initial driver for AMD IOMMU hardware.
> > The driver is tested on hardware with various loads (disk and network)
> > and showed no problems.
> >
> > It currently supports DMA remapping using the dma_ops API in the x86
> > architecture code. It also supports isolation of device DMA address
> > spaces as far as the hardware allows that (means each device can get
> > its own DMA address space and can't access the DMA memory of other
> > devices).
>
> the code looks very clean. I guess down the line we might want to think
> about making all the externally visible knobs transparent: there should
> be only one way to disable the iommu via the boot command line (be that
> an AMD or Intel one), etc.
>
> i have created a new -tip topic for this: tip/x86/amd-iommu and have
> applied your patches there - thanks Joerg. You can pick up the
> integrated tree via:
>
> http://people.redhat.com/mingo/tip.git/README
>
> please check whether the integrated end result still works fine.
> [ Obviously nobody but you can test this on real hw ;-) ]
Ok, I am testing the tip-branch for AMD IOMMU currently on real hardware
and everything looks very good. I will continue with load tests over the
weekend to be sure.
Joerg
--
| AMD Saxony Limited Liability Company & Co. KG
Operating | Wilschdorfer Landstr. 101, 01109 Dresden, Germany
System | Register Court Dresden: HRA 4896
Research | General Partner authorized to represent:
Center | AMD Saxony LLC (Wilmington, Delaware, US)
| General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy
On Fri, Jun 27, 2008 at 03:39:31PM -0500, Duran, Leo wrote:
> 2) IOMMU will not translate if the exclusion range has been enabled, and
> the DMA address falls inside that range.
> The exclusion range can be enabled for specific devices, or for all
> devices... Enabling the exclusion range can be considered a 'performance
> optimization' (no table-walks), with the caveat of not being able to
> provide protection for devices sharing the exclusion range (BTW, there's
> a single exclusion address range per IOMMU).
So, if I understand this correctly, could we implement Joerg's
"almost-direct-map" by having 0-64MB translated for host-owned
devices, and then 64MB-end excluded (again, for host-owned devices
only)? If yes, it should provide a small boost in performance (may or
may not be measurable) over having 64MB-end be an identity
translation.
Cheers,
Muli
On Friday, June 27, 2008 5:30 PM, Muli Ben-Yehuda wrote:
> > 2) IOMMU will not translate if the exclusion range has been enabled,
> and
> > the DMA address falls inside that range.
> > The exclusion range can be enabled for specific devices, or for all
> > devices... Enabling the exclusion range can be considered a
> 'performance
> > optimization' (no table-walks), with the caveat of not being able to
> > provide protection for devices sharing the exclusion range (BTW,
> there's
> > a single exclusion address range per IOMMU).
>
> So, if I understand this correctly, could we implement Joerg's
> "almost-direct-map" by having 0-64MB translated for host-owned
> devices, and then 64MB-end excluded (again, for host-owned devices
> only)? If yes, it should provide a small boost in performance (may or
> may not be measurable) over having 64MB-end be an identity
> translation.
>
> Cheers,
> Muli
Hi Muli,
Yes, you could set an exclusion range for addresses about the virtual
address space (or 'translation aperture').
I played with that in the context of "dma.ops" (not surprisingly, the
exclusion range was pretty busy!).
Again, keep in mind: no protection in the exclusion range (sort of
defeating the purpose of having an IOMMU in the first place), and only
ONE exclusion range per IOMMU.
Leo.
On Fri, Jun 27, 2008 at 08:54:59PM +0200, Andi Kleen wrote:
> Joerg Roedel <[email protected]> writes:
>
> > On Fri, Jun 27, 2008 at 12:39:45PM -0400, Muli Ben-Yehuda wrote:
> >> On Fri, Jun 27, 2008 at 04:47:29PM +0200, Andi Kleen wrote:
> >>
> >> > Also it should ideally describe that there is a trade off between
> >> > reliability and performance with IOMMU enabled.
> >>
> >> I agree that there's a performance tradeoff with current generation
> >> hardware and ingerfaces, but it wouldn't be fair to single out a
> >> single IOMMU implementation ;=)
> >
> > True. At least for the case without device isolation I have some
> > optimizations in mind which will minimize the performance tradeoff. I
> > hope to have them ready for 2.6.28 :)
>
> Not sure that would be very useful. Outside of virtualization device
> isolation is the main feature of an IOMMU on a native kernel.
I agree that device isolation is one of the a main usage scenarios for
an IOMMU. But if somebody want to use it for virtualization and doesn't
need device isolation this optimization would be usefull.
Joerg
On Fri, Jun 27, 2008 at 05:47:56PM -0500, Duran, Leo wrote:
> On Friday, June 27, 2008 5:30 PM, Muli Ben-Yehuda wrote:
>
> > > 2) IOMMU will not translate if the exclusion range has been enabled,
> > and
> > > the DMA address falls inside that range.
> > > The exclusion range can be enabled for specific devices, or for all
> > > devices... Enabling the exclusion range can be considered a
> > 'performance
> > > optimization' (no table-walks), with the caveat of not being able to
> > > provide protection for devices sharing the exclusion range (BTW,
> > there's
> > > a single exclusion address range per IOMMU).
> >
> > So, if I understand this correctly, could we implement Joerg's
> > "almost-direct-map" by having 0-64MB translated for host-owned
> > devices, and then 64MB-end excluded (again, for host-owned devices
> > only)? If yes, it should provide a small boost in performance (may or
> > may not be measurable) over having 64MB-end be an identity
> > translation.
> >
> > Cheers,
> > Muli
>
> Hi Muli,
>
> Yes, you could set an exclusion range for addresses about the virtual
> address space (or 'translation aperture').
> I played with that in the context of "dma.ops" (not surprisingly, the
> exclusion range was pretty busy!).
>
> Again, keep in mind: no protection in the exclusion range (sort of
> defeating the purpose of having an IOMMU in the first place), and only
> ONE exclusion range per IOMMU.
Yes, there is only one exclusion range per IOMMU. The problem is that an
exclusion range from 64MB to the end may not be possible because there is
an exclusion range already configured in the ACPI table. On my System for
example the exclusion range is somewhere in the first megabyte of RAM.
In this case the direct mapping using page tables is needed. If there is
no exclusion range defined in ACPI this idea would work of course.
Joerg
On Saturday, June 28, 2008 6:04 AM, Joerg Roedel wrote:
> Yes, there is only one exclusion range per IOMMU. The problem is that
> an
> exclusion range from 64MB to the end may not be possible because there
> is
> an exclusion range already configured in the ACPI table. On my System
> for
> example the exclusion range is somewhere in the first megabyte of RAM.
> In this case the direct mapping using page tables is needed. If there
> is
> no exclusion range defined in ACPI this idea would work of course.
>
> Joerg
>
Direct 1:1 mappings have a couple of issues:
1) They don't provide a performance optimization (i.e., table-walk still
required)
2) Your aperture would have to be large enough so that virtual==physical
(i.e., lots of memory for page-tables)
Leo.
On Saturday, June 28, 2008 6:04 AM, Joerg Roedel wrote:
> Yes, there is only one exclusion range per IOMMU. The problem is that
> an exclusion range from 64MB to the end may not be possible because
> there is an exclusion range already configured in the ACPI table. On
> my System for example the exclusion range is somewhere in the first
> megabyte of RAM.
> In this case the direct mapping using page tables is needed. If there
> is no exclusion range defined in ACPI this idea would work of course.
>
> Joerg
>
It would OK to have ONE all inclusive exclusion range (e.g., 64MB to
'end' if you want).
That is, any pre-defined areas would simply fall inside the larger
range.
And, if the ACPI table calls for exclusion of a range inside the
aperture (e.g., inside 64MB),
that range would have to be 'reserved', and set for 1:1 mapping.
Leo.
On Sat, Jun 28, 2008 at 09:40:57AM -0500, Duran, Leo wrote:
> On Saturday, June 28, 2008 6:04 AM, Joerg Roedel wrote:
>
> > Yes, there is only one exclusion range per IOMMU. The problem is that
> > an
> > exclusion range from 64MB to the end may not be possible because there
> > is
> > an exclusion range already configured in the ACPI table. On my System
> > for
> > example the exclusion range is somewhere in the first megabyte of RAM.
> > In this case the direct mapping using page tables is needed. If there
> > is
> > no exclusion range defined in ACPI this idea would work of course.
> >
> > Joerg
> >
>
> Direct 1:1 mappings have a couple of issues:
> 1) They don't provide a performance optimization (i.e., table-walk still
> required)
True. But the table walk is nothing which could be optimized in
software. The optimization here is that we don't need to map/unmap on
each dma_ops call. Mapping/Unmapping requires TLB flushing and
completion_wait. This is expensive for the CPU. But direct mapping
exclusion ranges inside the aperture and mark the area from 64mb to the
end as the exclusion range as you suggested could be a real option.
> 2) Your aperture would have to be large enough so that virtual==physical
> (i.e., lots of memory for page-tables)
The AMD IOMMU supports multiple page sizes from 4kb up to 4GB. I don't
think that we need too much memory for the page tables.
Joerg
On Thu, 26 Jun 2008 21:28:02 +0200
Joerg Roedel <[email protected]> wrote:
> This patch adds the dma_ops functions for mapping and unmapping scatter gather
> lists.
>
> Signed-off-by: Joerg Roedel <[email protected]>
> ---
> arch/x86/kernel/amd_iommu.c | 98 +++++++++++++++++++++++++++++++++++++++++++
> 1 files changed, 98 insertions(+), 0 deletions(-)
>
> diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
> index b4079f6..f4747fe 100644
> --- a/arch/x86/kernel/amd_iommu.c
> +++ b/arch/x86/kernel/amd_iommu.c
> @@ -700,3 +700,101 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr,
> spin_unlock_irqrestore(&domain->lock, flags);
> }
>
> +static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist,
> + int nelems, int dir)
> +{
> + struct scatterlist *s;
> + int i;
> +
> + for_each_sg(sglist, s, nelems, i) {
> + s->dma_address = (dma_addr_t)sg_phys(s);
> + s->dma_length = s->length;
> + }
> +
> + return nelems;
> +}
> +
> +static int map_sg(struct device *dev, struct scatterlist *sglist,
> + int nelems, int dir)
> +{
> + unsigned long flags;
> + struct amd_iommu *iommu;
> + struct protection_domain *domain;
> + u16 devid;
> + int i;
> + struct scatterlist *s;
> + phys_addr_t paddr;
> + int mapped_elems = 0;
> +
> + get_device_resources(dev, &iommu, &domain, &devid);
> +
> + if (!iommu || !domain)
> + return map_sg_no_iommu(dev, sglist, nelems, dir);
VT-d put a pointer in struct dev_archdata to store IOMMU info per
device. Is is useful for you too?
On Thu, 26 Jun 2008 21:27:38 +0200
Joerg Roedel <[email protected]> wrote:
> This patch adds a header file local to the AMD IOMMU driver with constants and
> data structures needed in the code.
>
> Signed-off-by: Joerg Roedel <[email protected]>
> ---
> include/asm-x86/amd_iommu_types.h | 242 +++++++++++++++++++++++++++++++++++++
> 1 files changed, 242 insertions(+), 0 deletions(-)
> create mode 100644 include/asm-x86/amd_iommu_types.h
Is necessary to make this file visible to everyone by putting this in
include/asm?
On Thu, 26 Jun 2008 21:28:10 +0200
Joerg Roedel <[email protected]> wrote:
> Add documentation for the kernel parameters introduced with this driver.
>
> Signed-off-by: Joerg Roedel <[email protected]>
> ---
> Documentation/kernel-parameters.txt | 12 ++++++++++++
> 1 files changed, 12 insertions(+), 0 deletions(-)
>
> diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
> index e07c432..fc8f936 100644
> --- a/Documentation/kernel-parameters.txt
> +++ b/Documentation/kernel-parameters.txt
> @@ -271,6 +271,18 @@ and is between 256 and 4096 characters. It is defined in the file
> aic79xx= [HW,SCSI]
> See Documentation/scsi/aic79xx.txt.
>
> + amd_iommu= [HW,X86-84]
> + Pass parameters to the AMD IOMMU driver in the system.
> + Possible values are:
> + off - disable the driver for AMD IOMMU
> + isolate - enable device isolation (each device, as far
> + as possible, will get its own protection
> + domain)
> + amd_iommu_size= [HW,X86-64]
> + Define the size of the aperture for the AMD IOMMU
> + driver. Possible values are:
> + '32M', '64M' (default), '128M', '256M', '512M', '1G'
> +
> amijoy.map= [HW,JOY] Amiga joystick support
> Map of devices attached to JOY0DAT and JOY1DAT
> Format: <a>,<b>
Documentation/x86_64/boot-options.txt would be a better place?
Probabaly, it would be better to clean up x86_64 IOMMU options.
On Thu, 26 Jun 2008 21:27:57 +0200
Joerg Roedel <[email protected]> wrote:
> This patch adds the address allocator to the AMD IOMMU code.
>
> Signed-off-by: Joerg Roedel <[email protected]>
> ---
> arch/x86/kernel/amd_iommu.c | 48 +++++++++++++++++++++++++++++++++++++++++++
> 1 files changed, 48 insertions(+), 0 deletions(-)
>
> diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
> index 1d70f5e..69d8d02 100644
> --- a/arch/x86/kernel/amd_iommu.c
> +++ b/arch/x86/kernel/amd_iommu.c
> @@ -266,3 +266,51 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
> return 0;
> }
>
> +static unsigned long dma_mask_to_pages(unsigned long mask)
> +{
> + return (mask >> PAGE_SHIFT) +
> + (PAGE_ALIGN(mask & ~PAGE_MASK) >> PAGE_SHIFT);
> +}
> +
> +static unsigned long dma_ops_alloc_addresses(struct device *dev,
> + struct dma_ops_domain *dom,
> + unsigned int pages)
> +{
> + unsigned long limit = dma_mask_to_pages(*dev->dma_mask);
> + unsigned long address;
> + unsigned long size = dom->aperture_size >> PAGE_SHIFT;
> + unsigned long boundary_size;
> +
> + boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
> + PAGE_SIZE) >> PAGE_SHIFT;
> + limit = limit < size ? limit : size;
> +
> + if (dom->next_bit >= limit)
> + dom->next_bit = 0;
> +
> + address = iommu_area_alloc(dom->bitmap, limit, dom->next_bit, pages,
> + 0 , boundary_size, 0);
> + if (address == -1)
> + address = iommu_area_alloc(dom->bitmap, limit, 0, pages,
> + 0, boundary_size, 0);
> +
> + if (likely(address != -1)) {
> + set_bit_string(dom->bitmap, address, pages);
Is set_bit_string() necessary here? iommu_area_alloc calls it internally.
On Mon, Jun 30, 2008 at 12:07:10AM +0900, FUJITA Tomonori wrote:
> On Thu, 26 Jun 2008 21:27:57 +0200
> Joerg Roedel <[email protected]> wrote:
>
> > This patch adds the address allocator to the AMD IOMMU code.
> >
> > Signed-off-by: Joerg Roedel <[email protected]>
> > ---
> > arch/x86/kernel/amd_iommu.c | 48 +++++++++++++++++++++++++++++++++++++++++++
> > 1 files changed, 48 insertions(+), 0 deletions(-)
> >
> > diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
> > index 1d70f5e..69d8d02 100644
> > --- a/arch/x86/kernel/amd_iommu.c
> > +++ b/arch/x86/kernel/amd_iommu.c
> > @@ -266,3 +266,51 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
> > return 0;
> > }
> >
> > +static unsigned long dma_mask_to_pages(unsigned long mask)
> > +{
> > + return (mask >> PAGE_SHIFT) +
> > + (PAGE_ALIGN(mask & ~PAGE_MASK) >> PAGE_SHIFT);
> > +}
> > +
> > +static unsigned long dma_ops_alloc_addresses(struct device *dev,
> > + struct dma_ops_domain *dom,
> > + unsigned int pages)
> > +{
> > + unsigned long limit = dma_mask_to_pages(*dev->dma_mask);
> > + unsigned long address;
> > + unsigned long size = dom->aperture_size >> PAGE_SHIFT;
> > + unsigned long boundary_size;
> > +
> > + boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
> > + PAGE_SIZE) >> PAGE_SHIFT;
> > + limit = limit < size ? limit : size;
> > +
> > + if (dom->next_bit >= limit)
> > + dom->next_bit = 0;
> > +
> > + address = iommu_area_alloc(dom->bitmap, limit, dom->next_bit, pages,
> > + 0 , boundary_size, 0);
> > + if (address == -1)
> > + address = iommu_area_alloc(dom->bitmap, limit, 0, pages,
> > + 0, boundary_size, 0);
> > +
> > + if (likely(address != -1)) {
> > + set_bit_string(dom->bitmap, address, pages);
>
> Is set_bit_string() necessary here? iommu_area_alloc calls it internally.
>
Probably not. I added it during debugging because the GART driver has it
in that code path too. I will remove it with an follow-up patch.
Joerg
--
| AMD Saxony Limited Liability Company & Co. KG
Operating | Wilschdorfer Landstr. 101, 01109 Dresden, Germany
System | Register Court Dresden: HRA 4896
Research | General Partner authorized to represent:
Center | AMD Saxony LLC (Wilmington, Delaware, US)
| General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy
On Mon, Jun 30, 2008 at 12:07:15AM +0900, FUJITA Tomonori wrote:
> On Thu, 26 Jun 2008 21:27:38 +0200
> Joerg Roedel <[email protected]> wrote:
>
> > This patch adds a header file local to the AMD IOMMU driver with constants and
> > data structures needed in the code.
> >
> > Signed-off-by: Joerg Roedel <[email protected]>
> > ---
> > include/asm-x86/amd_iommu_types.h | 242 +++++++++++++++++++++++++++++++++++++
> > 1 files changed, 242 insertions(+), 0 deletions(-)
> > create mode 100644 include/asm-x86/amd_iommu_types.h
>
> Is necessary to make this file visible to everyone by putting this in
> include/asm?
The alternative was to put this file in arch/x86/kernel/. But I don't
think its the right place for include files. We have include/ for that.
Joerg
--
| AMD Saxony Limited Liability Company & Co. KG
Operating | Wilschdorfer Landstr. 101, 01109 Dresden, Germany
System | Register Court Dresden: HRA 4896
Research | General Partner authorized to represent:
Center | AMD Saxony LLC (Wilmington, Delaware, US)
| General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy
On Sun, 29 Jun 2008 17:17:15 +0200
Joerg Roedel <[email protected]> wrote:
> On Mon, Jun 30, 2008 at 12:07:10AM +0900, FUJITA Tomonori wrote:
> > On Thu, 26 Jun 2008 21:27:57 +0200
> > Joerg Roedel <[email protected]> wrote:
> >
> > > This patch adds the address allocator to the AMD IOMMU code.
> > >
> > > Signed-off-by: Joerg Roedel <[email protected]>
> > > ---
> > > arch/x86/kernel/amd_iommu.c | 48 +++++++++++++++++++++++++++++++++++++++++++
> > > 1 files changed, 48 insertions(+), 0 deletions(-)
> > >
> > > diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
> > > index 1d70f5e..69d8d02 100644
> > > --- a/arch/x86/kernel/amd_iommu.c
> > > +++ b/arch/x86/kernel/amd_iommu.c
> > > @@ -266,3 +266,51 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
> > > return 0;
> > > }
> > >
> > > +static unsigned long dma_mask_to_pages(unsigned long mask)
> > > +{
> > > + return (mask >> PAGE_SHIFT) +
> > > + (PAGE_ALIGN(mask & ~PAGE_MASK) >> PAGE_SHIFT);
> > > +}
> > > +
> > > +static unsigned long dma_ops_alloc_addresses(struct device *dev,
> > > + struct dma_ops_domain *dom,
> > > + unsigned int pages)
> > > +{
> > > + unsigned long limit = dma_mask_to_pages(*dev->dma_mask);
> > > + unsigned long address;
> > > + unsigned long size = dom->aperture_size >> PAGE_SHIFT;
> > > + unsigned long boundary_size;
> > > +
> > > + boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
> > > + PAGE_SIZE) >> PAGE_SHIFT;
> > > + limit = limit < size ? limit : size;
> > > +
> > > + if (dom->next_bit >= limit)
> > > + dom->next_bit = 0;
> > > +
> > > + address = iommu_area_alloc(dom->bitmap, limit, dom->next_bit, pages,
> > > + 0 , boundary_size, 0);
> > > + if (address == -1)
> > > + address = iommu_area_alloc(dom->bitmap, limit, 0, pages,
> > > + 0, boundary_size, 0);
> > > +
> > > + if (likely(address != -1)) {
> > > + set_bit_string(dom->bitmap, address, pages);
> >
> > Is set_bit_string() necessary here? iommu_area_alloc calls it internally.
> >
>
> Probably not. I added it during debugging because the GART driver has it
> in that code path too. I will remove it with an follow-up patch.
Oops, seems that I left set_bit_string in the GART driver when I
converted it to use the IOMMU helpers.
On Sun, 29 Jun 2008 17:14:54 +0200
Joerg Roedel <[email protected]> wrote:
> On Mon, Jun 30, 2008 at 12:07:15AM +0900, FUJITA Tomonori wrote:
> > On Thu, 26 Jun 2008 21:27:38 +0200
> > Joerg Roedel <[email protected]> wrote:
> >
> > > This patch adds a header file local to the AMD IOMMU driver with constants and
> > > data structures needed in the code.
> > >
> > > Signed-off-by: Joerg Roedel <[email protected]>
> > > ---
> > > include/asm-x86/amd_iommu_types.h | 242 +++++++++++++++++++++++++++++++++++++
> > > 1 files changed, 242 insertions(+), 0 deletions(-)
> > > create mode 100644 include/asm-x86/amd_iommu_types.h
> >
> > Is necessary to make this file visible to everyone by putting this in
> > include/asm?
>
> The alternative was to put this file in arch/x86/kernel/. But I don't
> think its the right place for include files. We have include/ for that.
Hmm, really? It's the right thing in the SCSI subsystem at least. You
don't need to put a header file having only private stuff in include/.
* FUJITA Tomonori <[email protected]> wrote:
> > > > Signed-off-by: Joerg Roedel <[email protected]>
> > > > ---
> > > > include/asm-x86/amd_iommu_types.h | 242 +++++++++++++++++++++++++++++++++++++
> > > > 1 files changed, 242 insertions(+), 0 deletions(-)
> > > > create mode 100644 include/asm-x86/amd_iommu_types.h
> > >
> > > Is necessary to make this file visible to everyone by putting this in
> > > include/asm?
> >
> > The alternative was to put this file in arch/x86/kernel/. But I don't
> > think its the right place for include files. We have include/ for that.
>
> Hmm, really? It's the right thing in the SCSI subsystem at least. You
> don't need to put a header file having only private stuff in include/.
it's OK to have it in include/asm-x86/ (in fact i'd prefer it to stay
there). It includes hardware API details, etc.
Ingo
* FUJITA Tomonori <[email protected]> wrote:
> On Thu, 26 Jun 2008 21:28:10 +0200
> Joerg Roedel <[email protected]> wrote:
>
> > Add documentation for the kernel parameters introduced with this driver.
> >
> > Signed-off-by: Joerg Roedel <[email protected]>
> > ---
> > Documentation/kernel-parameters.txt | 12 ++++++++++++
> > 1 files changed, 12 insertions(+), 0 deletions(-)
> >
> > diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
> > index e07c432..fc8f936 100644
> > --- a/Documentation/kernel-parameters.txt
> > +++ b/Documentation/kernel-parameters.txt
> > @@ -271,6 +271,18 @@ and is between 256 and 4096 characters. It is defined in the file
> > aic79xx= [HW,SCSI]
> > See Documentation/scsi/aic79xx.txt.
> >
> > + amd_iommu= [HW,X86-84]
> > + Pass parameters to the AMD IOMMU driver in the system.
> > + Possible values are:
> > + off - disable the driver for AMD IOMMU
> > + isolate - enable device isolation (each device, as far
> > + as possible, will get its own protection
> > + domain)
> > + amd_iommu_size= [HW,X86-64]
> > + Define the size of the aperture for the AMD IOMMU
> > + driver. Possible values are:
> > + '32M', '64M' (default), '128M', '256M', '512M', '1G'
> > +
> > amijoy.map= [HW,JOY] Amiga joystick support
> > Map of devices attached to JOY0DAT and JOY1DAT
> > Format: <a>,<b>
>
> Documentation/x86_64/boot-options.txt would be a better place?
>
> Probabaly, it would be better to clean up x86_64 IOMMU options.
yeah, agreed. There should be a single set of options that is largely
transparent to the type of iommu used (for those options which are
commonly implemented - like iommu_size= and iommu= both are).
That belongs in a followup patchset that is decoupled from this IOMMU
driver submission and should include all current IOMMU(-ish) drivers.
(If you'd like to work on that we could try any such patchset in -tip,
ontop of tip/x86/amd-iommu - to keep integration simple.)
Ingo
On Mon, Jun 30, 2008 at 12:07:16AM +0900, FUJITA Tomonori wrote:
> On Thu, 26 Jun 2008 21:28:02 +0200
> Joerg Roedel <[email protected]> wrote:
>
> > This patch adds the dma_ops functions for mapping and unmapping scatter gather
> > lists.
> >
> > Signed-off-by: Joerg Roedel <[email protected]>
> > ---
> > arch/x86/kernel/amd_iommu.c | 98 +++++++++++++++++++++++++++++++++++++++++++
> > 1 files changed, 98 insertions(+), 0 deletions(-)
> >
> > diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
> > index b4079f6..f4747fe 100644
> > --- a/arch/x86/kernel/amd_iommu.c
> > +++ b/arch/x86/kernel/amd_iommu.c
> > @@ -700,3 +700,101 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr,
> > spin_unlock_irqrestore(&domain->lock, flags);
> > }
> >
> > +static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist,
> > + int nelems, int dir)
> > +{
> > + struct scatterlist *s;
> > + int i;
> > +
> > + for_each_sg(sglist, s, nelems, i) {
> > + s->dma_address = (dma_addr_t)sg_phys(s);
> > + s->dma_length = s->length;
> > + }
> > +
> > + return nelems;
> > +}
> > +
> > +static int map_sg(struct device *dev, struct scatterlist *sglist,
> > + int nelems, int dir)
> > +{
> > + unsigned long flags;
> > + struct amd_iommu *iommu;
> > + struct protection_domain *domain;
> > + u16 devid;
> > + int i;
> > + struct scatterlist *s;
> > + phys_addr_t paddr;
> > + int mapped_elems = 0;
> > +
> > + get_device_resources(dev, &iommu, &domain, &devid);
> > +
> > + if (!iommu || !domain)
> > + return map_sg_no_iommu(dev, sglist, nelems, dir);
>
> VT-d put a pointer in struct dev_archdata to store IOMMU info per
> device. Is is useful for you too?
Hmm yes. Thank you for that hint. I can save at least one of my arrays
with that method.
Joerg
--
| AMD Saxony Limited Liability Company & Co. KG
Operating | Wilschdorfer Landstr. 101, 01109 Dresden, Germany
System | Register Court Dresden: HRA 4896
Research | General Partner authorized to represent:
Center | AMD Saxony LLC (Wilmington, Delaware, US)
| General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy
On Friday 27 June 2008 23:10:35 Joerg Roedel wrote:
> On Fri, Jun 27, 2008 at 01:31:00PM -0400, Muli Ben-Yehuda wrote:
> > > > > and to handle devices with limited DMA address ranges.
> > > >
> > > > I'd be pretty surprised if you'll find such devices on machines which
> > > > will have AMD's IOMMU...
> > >
> > > Think of 32bit PCI devices in a host with more than 4GB memory :)
> >
> > I am thinking of them and I'd be surprised if you'd find any in such
> > machines. Certainly I assume none of the on-board devices will have
> > this ancient limitation. But hey, it could happen ;-)
>
> The IOMMU machine under my desk has a 32bit PCI slot with a card in it
Also, remember the "odd-ball devices" we had in our "why is PCI passthrough
needed" slide? People might want to upgrade to newer servers and still retain
their old PCI devices and assign them to guests.
Amit
> > > > Think of 32bit PCI devices in a host with more than 4GB memory :)
> > >
> > > I am thinking of them and I'd be surprised if you'd find any in such
> > > machines. Certainly I assume none of the on-board devices will have
> > > this ancient limitation. But hey, it could happen ;-)
> >
> > The IOMMU machine under my desk has a 32bit PCI slot with a card in it
>
> Also, remember the "odd-ball devices" we had in our "why is PCI passthrough
> needed" slide? People might want to upgrade to newer servers and still retain
> their old PCI devices and assign them to guests.
Nothing odd ball at all.
All SFF ATA controllers are 32bit (thats the PATA port on most PCs
still), even the Intel ICH controllers are 32bit in SFF mode (which is
still how most PCs come with it configured). Most PC hardware is still
32bit. A few bits are 31 or 28 bit just to ruin the party - 31bit being
quite common as old Windows had a 2/2GB split.
On Thu 2008-06-26 21:27:37, Joerg Roedel wrote:
> This patch adds the Kconfig entry for the AMD IOMMU driver.
>
> Signed-off-by: Joerg Roedel <[email protected]>
> ---
> arch/x86/Kconfig | 7 +++++++
> 1 files changed, 7 insertions(+), 0 deletions(-)
>
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index e0edaaa..5a82f18 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -549,6 +549,13 @@ config CALGARY_IOMMU_ENABLED_BY_DEFAULT
> Calgary anyway, pass 'iommu=calgary' on the kernel command line.
> If unsure, say Y.
>
> +config AMD_IOMMU
> + bool "AMD IOMMU support"
> + select SWIOTL
SWIOTLB?
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
* Pavel Machek <[email protected]> wrote:
> > +config AMD_IOMMU
> > + bool "AMD IOMMU support"
> > + select SWIOTL
>
> SWIOTLB?
yep, that typo got already fixed via the patch below.
Ingo
--------------->
commit 07c40e8a1acdb56fca485a6deeb252ebf19509a1
Author: Ingo Molnar <[email protected]>
Date: Fri Jun 27 11:31:28 2008 +0200
x86, AMD IOMMU: build fix #3
fix typo causing:
arch/x86/kernel/built-in.o: In function `__unmap_single':
amd_iommu.c:(.text+0x17771): undefined reference to `iommu_area_free'
arch/x86/kernel/built-in.o: In function `__map_single':
amd_iommu.c:(.text+0x1797a): undefined reference to `iommu_area_alloc'
amd_iommu.c:(.text+0x179a2): undefined reference to `iommu_area_alloc'
Signed-off-by: Ingo Molnar <[email protected]>
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 62a2820..8aae462 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -551,7 +551,7 @@ config CALGARY_IOMMU_ENABLED_BY_DEFAULT
config AMD_IOMMU
bool "AMD IOMMU support"
- select SWIOTL
+ select SWIOTLB
depends on X86_64 && PCI && ACPI
help
Select this to get support for AMD IOMMU hardware in your system.
On Wed, 9 Jul 2008 10:48:37 +0200
Ingo Molnar <[email protected]> wrote:
>
> * Pavel Machek <[email protected]> wrote:
>
> > > +config AMD_IOMMU
> > > + bool "AMD IOMMU support"
> > > + select SWIOTL
> >
> > SWIOTLB?
>
> yep, that typo got already fixed via the patch below.
>
> Ingo
>
> --------------->
> commit 07c40e8a1acdb56fca485a6deeb252ebf19509a1
> Author: Ingo Molnar <[email protected]>
> Date: Fri Jun 27 11:31:28 2008 +0200
>
> x86, AMD IOMMU: build fix #3
>
> fix typo causing:
>
> arch/x86/kernel/built-in.o: In function `__unmap_single':
> amd_iommu.c:(.text+0x17771): undefined reference to `iommu_area_free'
> arch/x86/kernel/built-in.o: In function `__map_single':
> amd_iommu.c:(.text+0x1797a): undefined reference to `iommu_area_alloc'
> amd_iommu.c:(.text+0x179a2): undefined reference to `iommu_area_alloc'
>
> Signed-off-by: Ingo Molnar <[email protected]>
>
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 62a2820..8aae462 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -551,7 +551,7 @@ config CALGARY_IOMMU_ENABLED_BY_DEFAULT
>
> config AMD_IOMMU
> bool "AMD IOMMU support"
> - select SWIOTL
> + select SWIOTLB
> depends on X86_64 && PCI && ACPI
> help
> Select this to get support for AMD IOMMU hardware in your system.
This fix is fine though the explicit fix is that AMD_IOMMU depends on
IOMMU_HELPER since they are the IOMMU helper functions. SWIOTLB
requires IOMMU_HELPER so declaring that AMD_IOMMU depends on SWIOTLB
properly fixes the problems.
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f98e1c3..4d85501 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -579,7 +579,7 @@ config SWIOTLB
3 GB of memory. If unsure, say Y.
config IOMMU_HELPER
- def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB)
+ def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU)
config MAXSMP
bool "Configure Maximum number of SMP Processors and NUMA Nodes"
depends on X86_64 && SMP
On Thu, 26 Jun 2008 21:27:38 +0200 Joerg Roedel <[email protected]> wrote:
> +/* helper macros */
> +#define LOW_U32(x) ((x) & ((1ULL << 32)-1))
> +#define HIGH_U32(x) (LOW_U32((x) >> 32))
Please avoid putting general-purpose helpers into private header files.
If we already have helper functions for this then use them.
If we don't have such helpers, let's write them, get them reviewed and put
them into kernel.h.
If we don't want these helpers in kernel.h then we don't want them in the
iommu driver either.
This cleanup work can be done separately from reviewing, testing amd
merging the IOMMU driver.
On Thu, 26 Jun 2008 21:27:39 +0200 Joerg Roedel <[email protected]> wrote:
> This patch adds the required data structures and constants required to parse
> the ACPI table for the AMD IOMMU.
>
> Signed-off-by: Joerg Roedel <[email protected]>
> ---
> arch/x86/kernel/amd_iommu_init.c | 101 ++++++++++++++++++++++++++++++++++++++
> 1 files changed, 101 insertions(+), 0 deletions(-)
> create mode 100644 arch/x86/kernel/amd_iommu_init.c
>
> diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
> new file mode 100644
> index 0000000..6fce5ab
> --- /dev/null
> +++ b/arch/x86/kernel/amd_iommu_init.c
> @@ -0,0 +1,101 @@
> +/*
> + * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
> + * Author: Joerg Roedel <[email protected]>
> + * Leo Duran <[email protected]>
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU General Public License version 2 as published
> + * by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
> + */
> +
> +#include <linux/pci.h>
> +#include <linux/acpi.h>
> +#include <linux/gfp.h>
> +#include <linux/list.h>
> +#include <asm/pci-direct.h>
> +#include <asm/amd_iommu_types.h>
> +#include <asm/gart.h>
> +
> +/*
> + * definitions for the ACPI scanning code
> + */
> +#define UPDATE_LAST_BDF(x) do {\
> + if ((x) > amd_iommu_last_bdf) \
> + amd_iommu_last_bdf = (x); \
> + } while (0);
Does this need to exist?
If so, it could and should be implemented as a C function, not as a macro.
> +#define DEVID(bus, devfn) (((bus) << 8) | (devfn))
I suspect that a helper function for this already exists.
> +#define PCI_BUS(x) (((x) >> 8) & 0xff)
And this. If it doesn't exist, probably it should?
> +#define IVRS_HEADER_LENGTH 48
> +#define TBL_SIZE(x) (1 << (PAGE_SHIFT + get_order(amd_iommu_last_bdf * (x))))
Can be implemented in C.
> +#define ACPI_IVHD_TYPE 0x10
> +#define ACPI_IVMD_TYPE_ALL 0x20
> +#define ACPI_IVMD_TYPE 0x21
> +#define ACPI_IVMD_TYPE_RANGE 0x22
> +
> +#define IVHD_DEV_ALL 0x01
> +#define IVHD_DEV_SELECT 0x02
> +#define IVHD_DEV_SELECT_RANGE_START 0x03
> +#define IVHD_DEV_RANGE_END 0x04
> +#define IVHD_DEV_ALIAS 0x42
> +#define IVHD_DEV_ALIAS_RANGE 0x43
> +#define IVHD_DEV_EXT_SELECT 0x46
> +#define IVHD_DEV_EXT_SELECT_RANGE 0x47
> +
> +#define IVHD_FLAG_HT_TUN_EN 0x00
> +#define IVHD_FLAG_PASSPW_EN 0x01
> +#define IVHD_FLAG_RESPASSPW_EN 0x02
> +#define IVHD_FLAG_ISOC_EN 0x03
> +
> +#define IVMD_FLAG_EXCL_RANGE 0x08
> +#define IVMD_FLAG_UNITY_MAP 0x01
> +
> +#define ACPI_DEVFLAG_INITPASS 0x01
> +#define ACPI_DEVFLAG_EXTINT 0x02
> +#define ACPI_DEVFLAG_NMI 0x04
> +#define ACPI_DEVFLAG_SYSMGT1 0x10
> +#define ACPI_DEVFLAG_SYSMGT2 0x20
> +#define ACPI_DEVFLAG_LINT0 0x40
> +#define ACPI_DEVFLAG_LINT1 0x80
> +#define ACPI_DEVFLAG_ATSDIS 0x10000000
> +
> +struct ivhd_header {
> + u8 type;
> + u8 flags;
> + u16 length;
> + u16 devid;
> + u16 cap_ptr;
> + u64 mmio_phys;
> + u16 pci_seg;
> + u16 info;
> + u32 reserved;
> +} __attribute__((packed));
> +
> +struct ivhd_entry {
> + u8 type;
> + u16 devid;
> + u8 flags;
> + u32 ext;
> +} __attribute__((packed));
> +
> +struct ivmd_header {
> + u8 type;
> + u8 flags;
> + u16 length;
> + u16 devid;
> + u16 aux;
> + u64 resv;
> + u64 range_start;
> + u64 range_length;
> +} __attribute__((packed));
I can make a guess as to why these are "packed", but a code comment would
prevent guesswork.
On Thu, 26 Jun 2008 21:27:40 +0200 Joerg Roedel <[email protected]> wrote:
> This patch adds the data structures which will contain the information read
> from the ACPI table.
>
> Signed-off-by: Joerg Roedel <[email protected]>
> ---
> arch/x86/kernel/amd_iommu_init.c | 17 +++++++++++++++++
> 1 files changed, 17 insertions(+), 0 deletions(-)
>
> diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
> index 6fce5ab..0ad8cf9 100644
> --- a/arch/x86/kernel/amd_iommu_init.c
> +++ b/arch/x86/kernel/amd_iommu_init.c
> @@ -99,3 +99,20 @@ struct ivmd_header {
> u64 range_length;
> } __attribute__((packed));
>
> +static int __initdata amd_iommu_disable;
> +
> +u16 amd_iommu_last_bdf;
> +struct list_head amd_iommu_unity_map;
> +unsigned amd_iommu_aperture_order = 26;
> +int amd_iommu_isolate;
> +
> +struct list_head amd_iommu_list;
> +struct dev_table_entry *amd_iommu_dev_table;
> +u16 *amd_iommu_alias_table;
> +struct amd_iommu **amd_iommu_rlookup_table;
> +struct protection_domain **amd_iommu_pd_table;
> +unsigned long *amd_iommu_pd_alloc_bitmap;
> +
> +static u32 dev_table_size;
> +static u32 alias_table_size;
> +static u32 rlookup_table_size;
hm, one would expect to see the header file which declares the globals in
the same patch as the .c file which defines them. Whatever.
What locks the list_heads? It would be good to add a code comment at the
definition site describing that locking.
On Wed, 9 Jul 2008 18:38:23 -0700
Andrew Morton <[email protected]> wrote:
> On Thu, 26 Jun 2008 21:27:38 +0200 Joerg Roedel
> <[email protected]> wrote:
>
> > +/* helper macros */
> > +#define LOW_U32(x) ((x) & ((1ULL << 32)-1))
> > +#define HIGH_U32(x) (LOW_U32((x) >> 32))
>
> Please avoid putting general-purpose helpers into private header
> files.
especially broken helpers.
A >> 32 on something that may be a 32 bit entry is bad; int32 >> 32...
gcc can (and does!) optimize that out.
(because it first gets translated into a SHR x86 instruction which then
notices it's encoded as a zero shift.. which then gets deleted)
--
If you want to reach me at my work email, use [email protected]
For development, discussion and tips for power savings,
visit http://www.lesswatts.org
On Thu, 26 Jun 2008 21:27:45 +0200 Joerg Roedel <[email protected]> wrote:
> This patch adds the functions to allocate and deallocate the command buffer for
> one IOMMU in the system.
>
> Signed-off-by: Joerg Roedel <[email protected]>
> ---
> arch/x86/kernel/amd_iommu_init.c | 30 ++++++++++++++++++++++++++++++
> 1 files changed, 30 insertions(+), 0 deletions(-)
>
> diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
> index ffb8ac8..c2be3ad 100644
> --- a/arch/x86/kernel/amd_iommu_init.c
> +++ b/arch/x86/kernel/amd_iommu_init.c
> @@ -278,3 +278,33 @@ static int __init find_last_devid_acpi(struct acpi_table_header *table)
> return 0;
> }
>
> +static u8 * __init alloc_command_buffer(struct amd_iommu *iommu)
> +{
> + u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL,
> + get_order(CMD_BUFFER_SIZE));
Can use __GFP_ZERO here
> + u64 entry = 0;
Unneeded initialisation.
> + if (cmd_buf == NULL)
> + return NULL;
> +
> + iommu->cmd_buf_size = CMD_BUFFER_SIZE;
> +
> + memset(cmd_buf, 0, CMD_BUFFER_SIZE);
And remove this.
> + entry = (u64)virt_to_phys(cmd_buf);
> + entry |= MMIO_CMD_SIZE_512;
> + memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
> + &entry, sizeof(entry));
> +
> + iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
> +
> + return cmd_buf;
> +}
> +
On Thu, 26 Jun 2008 21:27:47 +0200 Joerg Roedel <[email protected]> wrote:
> This patch adds functions to initialize the IOMMU hardware with information
> from ACPI and PCI.
>
> Signed-off-by: Joerg Roedel <[email protected]>
> ---
> arch/x86/kernel/amd_iommu_init.c | 125 ++++++++++++++++++++++++++++++++++++++
> 1 files changed, 125 insertions(+), 0 deletions(-)
>
> diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
> index 4c37abb..8ec48f1 100644
> --- a/arch/x86/kernel/amd_iommu_init.c
> +++ b/arch/x86/kernel/amd_iommu_init.c
> @@ -353,3 +353,128 @@ static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m)
> }
> }
>
> +static void __init init_iommu_from_pci(struct amd_iommu *iommu)
> +{
> + int bus = PCI_BUS(iommu->devid);
> + int dev = PCI_SLOT(iommu->devid);
> + int fn = PCI_FUNC(iommu->devid);
> + int cap_ptr = iommu->cap_ptr;
> + u32 range;
> +
> + iommu->cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_CAP_HDR_OFFSET);
> +
> + range = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET);
> + iommu->first_device = DEVID(MMIO_GET_BUS(range), MMIO_GET_FD(range));
> + iommu->last_device = DEVID(MMIO_GET_BUS(range), MMIO_GET_LD(range));
> +}
> +
> +static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
> + struct ivhd_header *h)
> +{
> + u8 *p = (u8 *)h;
> + u8 *end = p, flags = 0;
> + u16 dev_i, devid = 0, devid_start = 0, devid_to = 0;
> + u32 ext_flags = 0;
> + bool alias = 0;
Using `false' would be more conventional.
> + alias = 1;
`true'.
etc.
On Thu, 26 Jun 2008 21:27:48 +0200 Joerg Roedel <[email protected]> wrote:
> This patch adds the detection of AMD IOMMU hardware provided on information
> from ACPI provided by the BIOS.
>
> ...
>
> +static int __init init_iommu_all(struct acpi_table_header *table)
> +{
> + u8 *p = (u8 *)table, *end = (u8 *)table;
> + struct ivhd_header *h;
> + struct amd_iommu *iommu;
> + int ret;
> +
> + INIT_LIST_HEAD(&amd_iommu_list);
Initialisation of a global list_head should be unnecessary. Just define
the list_head with LIST_HEAD() and do it at compile-time.
On Thu, 26 Jun 2008 21:27:49 +0200 Joerg Roedel <[email protected]> wrote:
> +static int __init init_memory_definitions(struct acpi_table_header *table)
> +{
> + u8 *p = (u8 *)table, *end = (u8 *)table;
> + struct ivmd_header *m;
> +
> + INIT_LIST_HEAD(&amd_iommu_unity_map);
Define with LIST_HEAD().
On Thu, 26 Jun 2008 21:27:50 +0200 Joerg Roedel <[email protected]> wrote:
> This patch puts the AMD IOMMU ACPI table parsing and hardware initialization
> functions together to the main intialization routine.
>
> index 555fcc9..c792ddc 100644
> --- a/arch/x86/kernel/amd_iommu_init.c
> +++ b/arch/x86/kernel/amd_iommu_init.c
> @@ -643,3 +643,129 @@ static int __init init_memory_definitions(struct acpi_table_header *table)
> return 0;
> }
>
> +int __init amd_iommu_init(void)
> +{
> + int i, ret = 0;
> +
> +
> + if (amd_iommu_disable) {
> + printk(KERN_INFO "AMD IOMMU disabled by kernel command line\n");
> + return 0;
> + }
> +
> + /*
> + * First parse ACPI tables to find the largest Bus/Dev/Func
> + * we need to handle. Upon this information the shared data
> + * structures for the IOMMUs in the system will be allocated
> + */
> + if (acpi_table_parse("IVRS", find_last_devid_acpi) != 0)
> + return -ENODEV;
> +
> + dev_table_size = TBL_SIZE(DEV_TABLE_ENTRY_SIZE);
> + alias_table_size = TBL_SIZE(ALIAS_TABLE_ENTRY_SIZE);
> + rlookup_table_size = TBL_SIZE(RLOOKUP_TABLE_ENTRY_SIZE);
> +
> + ret = -ENOMEM;
> +
> + /* Device table - directly used by all IOMMUs */
> + amd_iommu_dev_table = (void *)__get_free_pages(GFP_KERNEL,
> + get_order(dev_table_size));
Is there a reason for using __get_free_pages() in this manner everywhere?
kmalloc() or kzalloc() are unsuitable?
> + if (amd_iommu_dev_table == NULL)
> + goto out;
> +
> + /*
> + * Alias table - map PCI Bus/Dev/Func to Bus/Dev/Func the
> + * IOMMU see for that device
> + */
> + amd_iommu_alias_table = (void *)__get_free_pages(GFP_KERNEL,
> + get_order(alias_table_size));
> + if (amd_iommu_alias_table == NULL)
> + goto free;
> +
> + /* IOMMU rlookup table - find the IOMMU for a specific device */
> + amd_iommu_rlookup_table = (void *)__get_free_pages(GFP_KERNEL,
> + get_order(rlookup_table_size));
> + if (amd_iommu_rlookup_table == NULL)
> + goto free;
> +
> + /*
> + * Protection Domain table - maps devices to protection domains
> + * This table has the same size as the rlookup_table
> + */
> + amd_iommu_pd_table = (void *)__get_free_pages(GFP_KERNEL,
> + get_order(rlookup_table_size));
> + if (amd_iommu_pd_table == NULL)
> + goto free;
> +
> + amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(GFP_KERNEL,
> + get_order(MAX_DOMAIN_ID/8));
> + if (amd_iommu_pd_alloc_bitmap == NULL)
> + goto free;
> +
> + /*
> + * memory is allocated now; initialize the device table with all zeroes
> + * and let all alias entries point to itself
> + */
> + memset(amd_iommu_dev_table, 0, dev_table_size);
> + for (i = 0; i < amd_iommu_last_bdf; ++i)
> + amd_iommu_alias_table[i] = i;
> +
> + memset(amd_iommu_pd_table, 0, rlookup_table_size);
> + memset(amd_iommu_pd_alloc_bitmap, 0, MAX_DOMAIN_ID / 8);
Use of __GFP_ZERO in here would relieve us of a lot of memsets.
> + /*
> + * never allocate domain 0 because its used as the non-allocated and
> + * error value placeholder
> + */
> + amd_iommu_pd_alloc_bitmap[0] = 1;
> +
> + /*
> + * now the data structures are allocated and basically initialized
> + * start the real acpi table scan
> + */
> + ret = -ENODEV;
> + if (acpi_table_parse("IVRS", init_iommu_all) != 0)
> + goto free;
> +
> + if (acpi_table_parse("IVRS", init_memory_definitions) != 0)
> + goto free;
> +
> + printk(KERN_INFO "AMD IOMMU: aperture size is %d MB\n",
> + (1 << (amd_iommu_aperture_order-20)));
> +
> + printk(KERN_INFO "AMD IOMMU: device isolation ");
> + if (amd_iommu_isolate)
> + printk("enabled\n");
> + else
> + printk("disabled\n");
> +
> +out:
> + return ret;
> +
> +free:
> + if (amd_iommu_pd_alloc_bitmap)
> + free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 1);
> +
> + if (amd_iommu_pd_table)
> + free_pages((unsigned long)amd_iommu_pd_table,
> + get_order(rlookup_table_size));
> +
> + if (amd_iommu_rlookup_table)
> + free_pages((unsigned long)amd_iommu_rlookup_table,
> + get_order(rlookup_table_size));
> +
> + if (amd_iommu_alias_table)
> + free_pages((unsigned long)amd_iommu_alias_table,
> + get_order(alias_table_size));
> +
> + if (amd_iommu_dev_table)
> + free_pages((unsigned long)amd_iommu_dev_table,
> + get_order(dev_table_size));
free_pages(0) is legal, so all these tests are unneeded, I expect.
This remains true if you decide to switch to kzalloc().
> + free_iommu_all();
> +
> + free_unity_maps();
> +
> + goto out;
> +}
On Thu, 26 Jun 2008 21:27:52 +0200 Joerg Roedel <[email protected]> wrote:
> This patch adds two parameters to the kernel command line to control behavior
> of the AMD IOMMU.
>
> Signed-off-by: Joerg Roedel <[email protected]>
> ---
> arch/x86/kernel/amd_iommu_init.c | 34 ++++++++++++++++++++++++++++++++++
> 1 files changed, 34 insertions(+), 0 deletions(-)
>
> diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
> index d7a75bf..ec6f13b 100644
> --- a/arch/x86/kernel/amd_iommu_init.c
> +++ b/arch/x86/kernel/amd_iommu_init.c
> @@ -789,3 +789,37 @@ void __init amd_iommu_detect(void)
> }
> }
>
> +static int __init parse_amd_iommu_options(char *str)
> +{
> + for (; *str; ++str) {
> + if (strcmp(str, "off") == 0)
> + amd_iommu_disable = 1;
> + if (strcmp(str, "isolate") == 0)
> + amd_iommu_isolate = 1;
> + }
> +
> + return 1;
> +}
> +
> +static int __init parse_amd_iommu_size_options(char *str)
> +{
> + for (; *str; ++str) {
> + if (strcmp(str, "32M") == 0)
> + amd_iommu_aperture_order = 25;
> + if (strcmp(str, "64M") == 0)
> + amd_iommu_aperture_order = 26;
> + if (strcmp(str, "128M") == 0)
> + amd_iommu_aperture_order = 27;
> + if (strcmp(str, "256M") == 0)
> + amd_iommu_aperture_order = 28;
> + if (strcmp(str, "512M") == 0)
> + amd_iommu_aperture_order = 29;
> + if (strcmp(str, "1G") == 0)
> + amd_iommu_aperture_order = 30;
> + }
> +
> + return 1;
> +}
> +
> +__setup("amd_iommu=", parse_amd_iommu_options);
> +__setup("amd_iommu_size=", parse_amd_iommu_size_options);
Please document kernel boot parameters in Documentation/kernel-parameters.txt.
I think you just reimplemented memparse().
On Thu, 26 Jun 2008 21:27:53 +0200 Joerg Roedel <[email protected]> wrote:
> This patch adds generic stuff used by the mapping and unmapping code for the
> AMD IOMMU.
>
> Signed-off-by: Joerg Roedel <[email protected]>
> ---
> arch/x86/kernel/amd_iommu.c | 40 ++++++++++++++++++++++++++++++++++++++++
> 1 files changed, 40 insertions(+), 0 deletions(-)
> create mode 100644 arch/x86/kernel/amd_iommu.c
>
> diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
> new file mode 100644
> index 0000000..90392c7
> --- /dev/null
> +++ b/arch/x86/kernel/amd_iommu.c
> @@ -0,0 +1,40 @@
> +/*
> + * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
> + * Author: Joerg Roedel <[email protected]>
> + * Leo Duran <[email protected]>
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU General Public License version 2 as published
> + * by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
> + */
> +
> +#include <linux/pci.h>
> +#include <linux/gfp.h>
> +#include <linux/bitops.h>
> +#include <linux/scatterlist.h>
> +#include <linux/iommu-helper.h>
> +#include <asm/proto.h>
> +#include <asm/gart.h>
> +#include <asm/amd_iommu_types.h>
> +
> +#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
Doesn't need to be implemented in a macro, hence should be implemented in C,
please.
> +#define to_pages(addr, size) \
> + (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
Does this patchset add any comments *at all*? Is it really all so obvious
that this was justifiable?
I don't know what this function does, but it shouldn't be doing it here.
Either it duplicates an existing helper or it should be implemented in mm.h
Preferably not as a macro.
Preferably with some documentation.
It is unobvious whether this haler has correct behaviour with all mixes of
32-bit and 64-bit signed and unsigned types.
> +static DEFINE_RWLOCK(amd_iommu_devtable_lock);
> +
> +struct command {
> + u32 data[4];
> +};
Nothing in this file is used by anything. I assume that later patches add
stuff to it. This made it a bit hard to review.
On Thu, 26 Jun 2008 21:27:55 +0200 Joerg Roedel <[email protected]> wrote:
> This patch adds generic handling function as well as all functions to send
> specific commands to the IOMMU hardware as required by this driver.
>
> Signed-off-by: Joerg Roedel <[email protected]>
> ---
> arch/x86/kernel/amd_iommu.c | 106 +++++++++++++++++++++++++++++++++++++++++++
> 1 files changed, 106 insertions(+), 0 deletions(-)
>
> diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
> index 90392c7..a24ee4a 100644
> --- a/arch/x86/kernel/amd_iommu.c
> +++ b/arch/x86/kernel/amd_iommu.c
> @@ -37,4 +37,110 @@ struct command {
> u32 data[4];
> };
>
> +static int __iommu_queue_command(struct amd_iommu *iommu, struct command *cmd)
> +{
> + u32 tail, head;
> + u8 *target;
> +
> + tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
> + target = (iommu->cmd_buf + tail);
> + memcpy_toio(target, cmd, sizeof(*cmd));
> + tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
> + head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
> + if (tail == head)
> + return -ENOMEM;
> + writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
> +
> + return 0;
> +}
> +
> +static int iommu_queue_command(struct amd_iommu *iommu, struct command *cmd)
> +{
> + unsigned long flags;
> + int ret;
> +
> + spin_lock_irqsave(&iommu->lock, flags);
> + ret = __iommu_queue_command(iommu, cmd);
> + spin_unlock_irqrestore(&iommu->lock, flags);
> +
> + return ret;
> +}
> +
> +static int iommu_completion_wait(struct amd_iommu *iommu)
> +{
> + int ret;
> + struct command cmd;
> + volatile u64 ready = 0;
> + unsigned long ready_phys = virt_to_phys(&ready);
> +
> + memset(&cmd, 0, sizeof(cmd));
> + cmd.data[0] = LOW_U32(ready_phys) | CMD_COMPL_WAIT_STORE_MASK;
> + cmd.data[1] = HIGH_U32(ready_phys);
> + cmd.data[2] = 1; /* value written to 'ready' */
> + CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
> +
> + iommu->need_sync = 0;
> +
> + ret = iommu_queue_command(iommu, &cmd);
> +
> + if (ret)
> + return ret;
> +
> + while (!ready)
> + cpu_relax();
I busywait is a big red flag. An uncommented one is not acceptable IMO.
What is the maximum duration?
Is it not possible to sleep and await an interrupt?
Should we implement a timeout in case the hardware is busted?
> + return 0;
> +}
> +
> +static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
> +{
> + struct command cmd;
`command' was a poorly-chosen identifier for this structure. Too generic.
> + BUG_ON(iommu == NULL);
> +
> + memset(&cmd, 0, sizeof(cmd));
> + CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY);
> + cmd.data[0] = devid;
> +
> + iommu->need_sync = 1;
> +
> + return iommu_queue_command(iommu, &cmd);
> +}
> +
> +static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
> + u64 address, u16 domid, int pde, int s)
> +{
> + struct command cmd;
> +
> + memset(&cmd, 0, sizeof(cmd));
> + address &= PAGE_MASK;
> + CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES);
> + cmd.data[1] |= domid;
> + cmd.data[2] = LOW_U32(address);
> + cmd.data[3] = HIGH_U32(address);
> + if (s)
> + cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
> + if (pde)
> + cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
> +
> + iommu->need_sync = 1;
> +
> + return iommu_queue_command(iommu, &cmd);
> +}
> +
> +static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid,
> + u64 address, size_t size)
> +{
> + int i;
> + unsigned pages = to_pages(address, size);
> +
> + address &= PAGE_MASK;
> +
> + for (i = 0; i < pages; ++i) {
> + iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 0);
> + address += PAGE_SIZE;
> + }
> +
> + return 0;
> +}
On Thu, 26 Jun 2008 21:27:58 +0200 Joerg Roedel <[email protected]> wrote:
> +static u16 domain_id_alloc(void)
> +{
> + unsigned long flags;
> + int id;
> +
> + write_lock_irqsave(&amd_iommu_devtable_lock, flags);
> + id = find_first_zero_bit(amd_iommu_pd_alloc_bitmap, MAX_DOMAIN_ID);
> + BUG_ON(id == 0);
> + if (id > 0 && id < MAX_DOMAIN_ID)
> + __set_bit(id, amd_iommu_pd_alloc_bitmap);
> + else
> + id = 0;
> + write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
> +
> + return id;
> +}
Presumably this is not performance-critical. If it is, an IDR
tree would help.
On Thu, 26 Jun 2008 21:27:59 +0200 Joerg Roedel <[email protected]> wrote:
> This patch adds functions necessary to find the IOMMU resources for a specific
> device.
>
> Signed-off-by: Joerg Roedel <[email protected]>
> ---
> arch/x86/kernel/amd_iommu.c | 75 +++++++++++++++++++++++++++++++++++++++++++
> 1 files changed, 75 insertions(+), 0 deletions(-)
>
> diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
> index c43d15d..47e80b5 100644
> --- a/arch/x86/kernel/amd_iommu.c
> +++ b/arch/x86/kernel/amd_iommu.c
> @@ -461,3 +461,78 @@ free_dma_dom:
> return NULL;
> }
>
> +static struct protection_domain *domain_for_device(u16 devid)
> +{
> + struct protection_domain *dom;
> + unsigned long flags;
> +
> + read_lock_irqsave(&amd_iommu_devtable_lock, flags);
Why is this cheerfully undocumented lock irq-safe? Is it ever taken from
IRQ context?
> + dom = amd_iommu_pd_table[devid];
> + read_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
> +
> + return dom;
> +}
The locking in this function makes no sense. We drop the lock then return
a value which the caller cannot use in a race-free fashion, because the
lock is no longer held.
On Thu, 26 Jun 2008 21:28:01 +0200 Joerg Roedel <[email protected]> wrote:
> This patch adds the dma_ops specific mapping functions for single mappings.
>
> Signed-off-by: Joerg Roedel <[email protected]>
> ---
> arch/x86/kernel/amd_iommu.c | 59 +++++++++++++++++++++++++++++++++++++++++++
> 1 files changed, 59 insertions(+), 0 deletions(-)
>
> diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
> index e00a3e7..b4079f6 100644
> --- a/arch/x86/kernel/amd_iommu.c
> +++ b/arch/x86/kernel/amd_iommu.c
> @@ -40,6 +40,11 @@ struct command {
> static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
> struct unity_map_entry *e);
>
> +static int iommu_has_npcache(struct amd_iommu *iommu)
> +{
> + return iommu->cap & IOMMU_CAP_NPCACHE;
> +}
> +
> static int __iommu_queue_command(struct amd_iommu *iommu, struct command *cmd)
> {
> u32 tail, head;
> @@ -641,3 +646,57 @@ static void __unmap_single(struct amd_iommu *iommu,
> dma_ops_free_addresses(dma_dom, dma_addr, pages);
> }
>
> +static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
> + size_t size, int dir)
> +{
> + unsigned long flags;
> + struct amd_iommu *iommu;
> + struct protection_domain *domain;
> + u16 devid;
> + dma_addr_t addr;
> +
> + get_device_resources(dev, &iommu, &domain, &devid);
> +
> + if (iommu == NULL || domain == NULL)
> + return (dma_addr_t)paddr;
OK, a test case. A reader of your code (ie: me) wants to find out what
this code is doing. What is the *meaning* of iommu == NULL || domain ==
NULL here?
I go look at the (undocumented) get_device_resources() and I see that this:
if (_bdf >= amd_iommu_last_bdf) {
happened. I don't know what that semantically means and I gave up.
I'm not saying that the code is unmaintainable, but I would assert that it
is a heck of a lot harder to maintain than it could be, and than it should
be.
get_device_resources() has an open-coded copy of your DEVID() macro, btw.
> + spin_lock_irqsave(&domain->lock, flags);
> + addr = __map_single(dev, iommu, domain->priv, paddr, size, dir);
> + if (addr == bad_dma_address)
> + goto out;
> +
> + if (iommu_has_npcache(iommu))
> + iommu_flush_pages(iommu, domain->id, addr, size);
> +
> + if (iommu->need_sync)
> + iommu_completion_wait(iommu);
> +
> +out:
> + spin_unlock_irqrestore(&domain->lock, flags);
> +
> + return addr;
> +}
> +
> +static void unmap_single(struct device *dev, dma_addr_t dma_addr,
> + size_t size, int dir)
> +{
> + unsigned long flags;
> + struct amd_iommu *iommu;
> + struct protection_domain *domain;
> + u16 devid;
> +
> + if (!get_device_resources(dev, &iommu, &domain, &devid))
> + return;
> +
> + spin_lock_irqsave(&domain->lock, flags);
> +
> + __unmap_single(iommu, domain->priv, dma_addr, size, dir);
> +
> + iommu_flush_pages(iommu, domain->id, dma_addr, size);
> +
> + if (iommu->need_sync)
> + iommu_completion_wait(iommu);
> +
> + spin_unlock_irqrestore(&domain->lock, flags);
> +}
On Wed, 9 Jul 2008 18:50:55 -0700 Arjan van de Ven <[email protected]> wrote:
> On Wed, 9 Jul 2008 18:38:23 -0700
> Andrew Morton <[email protected]> wrote:
>
> > On Thu, 26 Jun 2008 21:27:38 +0200 Joerg Roedel
> > <[email protected]> wrote:
> >
> > > +/* helper macros */
> > > +#define LOW_U32(x) ((x) & ((1ULL << 32)-1))
> > > +#define HIGH_U32(x) (LOW_U32((x) >> 32))
> >
> > Please avoid putting general-purpose helpers into private header
> > files.
>
> especially broken helpers.
>
> A >> 32 on something that may be a 32 bit entry is bad; int32 >> 32...
> gcc can (and does!) optimize that out.
>
> (because it first gets translated into a SHR x86 instruction which then
> notices it's encoded as a zero shift.. which then gets deleted)
>
Well yeah. upper_32_bits() gets it all corect. We could do a
lower_32_bits() I suppose, if it's useful.
On Wed, 9 Jul 2008 19:01:40 -0700 Andrew Morton <[email protected]> wrote:
> It is unobvious whether this haler has correct behaviour with all mixes of
helper
> 32-bit and 64-bit signed and unsigned types.
One of my more wild typos.
On Wed, 9 Jul 2008 19:01:40 -0700
Andrew Morton <[email protected]> wrote:
> On Thu, 26 Jun 2008 21:27:53 +0200 Joerg Roedel <[email protected]> wrote:
>
> > This patch adds generic stuff used by the mapping and unmapping code for the
> > AMD IOMMU.
> >
> > Signed-off-by: Joerg Roedel <[email protected]>
> > ---
> > arch/x86/kernel/amd_iommu.c | 40 ++++++++++++++++++++++++++++++++++++++++
> > 1 files changed, 40 insertions(+), 0 deletions(-)
> > create mode 100644 arch/x86/kernel/amd_iommu.c
> >
> > diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
> > new file mode 100644
> > index 0000000..90392c7
> > --- /dev/null
> > +++ b/arch/x86/kernel/amd_iommu.c
> > @@ -0,0 +1,40 @@
> > +/*
> > + * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
> > + * Author: Joerg Roedel <[email protected]>
> > + * Leo Duran <[email protected]>
> > + *
> > + * This program is free software; you can redistribute it and/or modify it
> > + * under the terms of the GNU General Public License version 2 as published
> > + * by the Free Software Foundation.
> > + *
> > + * This program is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> > + * GNU General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU General Public License
> > + * along with this program; if not, write to the Free Software
> > + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
> > + */
> > +
> > +#include <linux/pci.h>
> > +#include <linux/gfp.h>
> > +#include <linux/bitops.h>
> > +#include <linux/scatterlist.h>
> > +#include <linux/iommu-helper.h>
> > +#include <asm/proto.h>
> > +#include <asm/gart.h>
> > +#include <asm/amd_iommu_types.h>
> > +
> > +#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
>
> Doesn't need to be implemented in a macro, hence should be implemented in C,
> please.
>
> > +#define to_pages(addr, size) \
> > + (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
>
> Does this patchset add any comments *at all*? Is it really all so obvious
> that this was justifiable?
>
> I don't know what this function does, but it shouldn't be doing it here.
> Either it duplicates an existing helper or it should be implemented in mm.h
I think that this calculates the number of IOMMU pages for a
request. Every IOMMUs has a similar function so it might be better to
add a helper function to include/linux/iommu-helper.h.
Someone except for IOMMU need this? If so, it would be better to add
this to a different header file.
diff --git a/include/linux/iommu-helper.h b/include/linux/iommu-helper.h
index c975caf..9c29e65 100644
--- a/include/linux/iommu-helper.h
+++ b/include/linux/iommu-helper.h
@@ -8,3 +8,14 @@ extern unsigned long iommu_area_alloc(unsigned long *map, unsigned long size,
unsigned long align_mask);
extern void iommu_area_free(unsigned long *map, unsigned long start,
unsigned int nr);
+
+static inline unsigned long iommu_num_pages(unsigned long addr,
+ unsigned long len)
+{
+ unsigned long npages;
+
+ npages = ALIGN(addr + len, 1UL << IOMMU_PAGE_SHIFT) -
+ (addr & ~((1UL << IOMMU_PAGE_SHIFT) - 1));
+
+ return (npages >> IOMMU_PAGE_SHIFT);
+}
On Wed, 9 Jul 2008 18:56:34 -0700
Andrew Morton <[email protected]> wrote:
> On Thu, 26 Jun 2008 21:27:52 +0200 Joerg Roedel <[email protected]> wrote:
>
> > This patch adds two parameters to the kernel command line to control behavior
> > of the AMD IOMMU.
> >
> > Signed-off-by: Joerg Roedel <[email protected]>
> > ---
> > arch/x86/kernel/amd_iommu_init.c | 34 ++++++++++++++++++++++++++++++++++
> > 1 files changed, 34 insertions(+), 0 deletions(-)
> >
> > diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
> > index d7a75bf..ec6f13b 100644
> > --- a/arch/x86/kernel/amd_iommu_init.c
> > +++ b/arch/x86/kernel/amd_iommu_init.c
> > @@ -789,3 +789,37 @@ void __init amd_iommu_detect(void)
> > }
> > }
> >
> > +static int __init parse_amd_iommu_options(char *str)
> > +{
> > + for (; *str; ++str) {
> > + if (strcmp(str, "off") == 0)
> > + amd_iommu_disable = 1;
> > + if (strcmp(str, "isolate") == 0)
> > + amd_iommu_isolate = 1;
> > + }
> > +
> > + return 1;
> > +}
> > +
> > +static int __init parse_amd_iommu_size_options(char *str)
> > +{
> > + for (; *str; ++str) {
> > + if (strcmp(str, "32M") == 0)
> > + amd_iommu_aperture_order = 25;
> > + if (strcmp(str, "64M") == 0)
> > + amd_iommu_aperture_order = 26;
> > + if (strcmp(str, "128M") == 0)
> > + amd_iommu_aperture_order = 27;
> > + if (strcmp(str, "256M") == 0)
> > + amd_iommu_aperture_order = 28;
> > + if (strcmp(str, "512M") == 0)
> > + amd_iommu_aperture_order = 29;
> > + if (strcmp(str, "1G") == 0)
> > + amd_iommu_aperture_order = 30;
> > + }
> > +
> > + return 1;
> > +}
> > +
> > +__setup("amd_iommu=", parse_amd_iommu_options);
> > +__setup("amd_iommu_size=", parse_amd_iommu_size_options);
>
> Please document kernel boot parameters in Documentation/kernel-parameters.txt.
[PATCH 34/34] does (this should have included [PATCH 34/34], I guess).
I have one related question, we should document all the kernel boot
parameters in Documentation/kernel-parameters.txt?
Some of x86_64 boot parameters are in
Documentations/x86/x86_64/boot-options.txt. The parameters of IOMMUs
are scattered. It's confusing.
http://marc.info/?t=121541873700001&r=1&w=2
On Thu, 10 Jul 2008 13:25:23 +0900 FUJITA Tomonori <[email protected]> wrote:
> I have one related question, we should document all the kernel boot
> parameters in Documentation/kernel-parameters.txt?
>
> Some of x86_64 boot parameters are in
> Documentations/x86/x86_64/boot-options.txt. The parameters of IOMMUs
> are scattered. It's confusing.
A single monolithic file has advantages. I think I'd prefer that
personally. I'd forgotten that the x86-specific one exists, actually.
If we _do_ want to split this all out into separate files, they should all
be in a dedicated subdirectory under ./Documentation/, no?
* Andrew Morton <[email protected]> wrote:
> On Thu, 10 Jul 2008 13:25:23 +0900 FUJITA Tomonori <[email protected]> wrote:
>
> > I have one related question, we should document all the kernel boot
> > parameters in Documentation/kernel-parameters.txt?
> >
> > Some of x86_64 boot parameters are in
> > Documentations/x86/x86_64/boot-options.txt. The parameters of IOMMUs
> > are scattered. It's confusing.
>
> A single monolithic file has advantages. I think I'd prefer that
> personally. I'd forgotten that the x86-specific one exists, actually.
yeah, it should be all in Documentation/kernel-parameters.txt.
Ingo
On Thu, Jun 26, 2008 at 12:27 PM, Joerg Roedel <[email protected]> wrote:
> This patch adds two parameters to the kernel command line to control behavior
> of the AMD IOMMU.
>
> Signed-off-by: Joerg Roedel <[email protected]>
> ---
> arch/x86/kernel/amd_iommu_init.c | 34 ++++++++++++++++++++++++++++++++++
> 1 files changed, 34 insertions(+), 0 deletions(-)
>
> diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
> index d7a75bf..ec6f13b 100644
> --- a/arch/x86/kernel/amd_iommu_init.c
> +++ b/arch/x86/kernel/amd_iommu_init.c
> @@ -789,3 +789,37 @@ void __init amd_iommu_detect(void)
> }
> }
>
> +static int __init parse_amd_iommu_options(char *str)
> +{
> + for (; *str; ++str) {
> + if (strcmp(str, "off") == 0)
> + amd_iommu_disable = 1;
> + if (strcmp(str, "isolate") == 0)
> + amd_iommu_isolate = 1;
> + }
> +
> + return 1;
> +}
> +
> +static int __init parse_amd_iommu_size_options(char *str)
> +{
> + for (; *str; ++str) {
> + if (strcmp(str, "32M") == 0)
> + amd_iommu_aperture_order = 25;
> + if (strcmp(str, "64M") == 0)
> + amd_iommu_aperture_order = 26;
> + if (strcmp(str, "128M") == 0)
> + amd_iommu_aperture_order = 27;
> + if (strcmp(str, "256M") == 0)
> + amd_iommu_aperture_order = 28;
> + if (strcmp(str, "512M") == 0)
> + amd_iommu_aperture_order = 29;
> + if (strcmp(str, "1G") == 0)
> + amd_iommu_aperture_order = 30;
> + }
could use
memparse(str, &str);
and weight...
YH
* FUJITA Tomonori <[email protected]> wrote:
> On Wed, 9 Jul 2008 10:48:37 +0200
> Ingo Molnar <[email protected]> wrote:
>
> >
> > * Pavel Machek <[email protected]> wrote:
> >
> > > > +config AMD_IOMMU
> > > > + bool "AMD IOMMU support"
> > > > + select SWIOTL
> > >
> > > SWIOTLB?
> >
> > yep, that typo got already fixed via the patch below.
> >
> > Ingo
> >
> > --------------->
> > commit 07c40e8a1acdb56fca485a6deeb252ebf19509a1
> > Author: Ingo Molnar <[email protected]>
> > Date: Fri Jun 27 11:31:28 2008 +0200
> >
> > x86, AMD IOMMU: build fix #3
> >
> > fix typo causing:
> >
> > arch/x86/kernel/built-in.o: In function `__unmap_single':
> > amd_iommu.c:(.text+0x17771): undefined reference to `iommu_area_free'
> > arch/x86/kernel/built-in.o: In function `__map_single':
> > amd_iommu.c:(.text+0x1797a): undefined reference to `iommu_area_alloc'
> > amd_iommu.c:(.text+0x179a2): undefined reference to `iommu_area_alloc'
> >
> > Signed-off-by: Ingo Molnar <[email protected]>
> >
> > diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> > index 62a2820..8aae462 100644
> > --- a/arch/x86/Kconfig
> > +++ b/arch/x86/Kconfig
> > @@ -551,7 +551,7 @@ config CALGARY_IOMMU_ENABLED_BY_DEFAULT
> >
> > config AMD_IOMMU
> > bool "AMD IOMMU support"
> > - select SWIOTL
> > + select SWIOTLB
> > depends on X86_64 && PCI && ACPI
> > help
> > Select this to get support for AMD IOMMU hardware in your system.
>
> This fix is fine though the explicit fix is that AMD_IOMMU depends on
> IOMMU_HELPER since they are the IOMMU helper functions. SWIOTLB
> requires IOMMU_HELPER so declaring that AMD_IOMMU depends on SWIOTLB
> properly fixes the problems.
>
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index f98e1c3..4d85501 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -579,7 +579,7 @@ config SWIOTLB
> 3 GB of memory. If unsure, say Y.
>
> config IOMMU_HELPER
> - def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB)
> + def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU)
applied to tip/x86/core - thanks.
Ingo
On Wed, Jul 09, 2008 at 07:36:01PM -0700, Andrew Morton wrote:
> On Wed, 9 Jul 2008 18:50:55 -0700 Arjan van de Ven <[email protected]> wrote:
>
> > On Wed, 9 Jul 2008 18:38:23 -0700
> > Andrew Morton <[email protected]> wrote:
> >
> > > On Thu, 26 Jun 2008 21:27:38 +0200 Joerg Roedel
> > > <[email protected]> wrote:
> > >
> > > > +/* helper macros */
> > > > +#define LOW_U32(x) ((x) & ((1ULL << 32)-1))
> > > > +#define HIGH_U32(x) (LOW_U32((x) >> 32))
> > >
> > > Please avoid putting general-purpose helpers into private header
> > > files.
> >
> > especially broken helpers.
> >
> > A >> 32 on something that may be a 32 bit entry is bad; int32 >> 32...
> > gcc can (and does!) optimize that out.
> >
> > (because it first gets translated into a SHR x86 instruction which then
> > notices it's encoded as a zero shift.. which then gets deleted)
> >
>
> Well yeah. upper_32_bits() gets it all corect. We could do a
> lower_32_bits() I suppose, if it's useful.
Ok cool, I agree. I will replace the HIGH_U32 macro with the
upper_32_bits() function and the other one with lower_32_bits() if it
exists. Who will take the patch for the introduction of that function?
Joerg
--
| AMD Saxony Limited Liability Company & Co. KG
Operating | Wilschdorfer Landstr. 101, 01109 Dresden, Germany
System | Register Court Dresden: HRA 4896
Research | General Partner authorized to represent:
Center | AMD Saxony LLC (Wilmington, Delaware, US)
| General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy
On Wed, Jul 09, 2008 at 06:41:41PM -0700, Andrew Morton wrote:
> On Thu, 26 Jun 2008 21:27:39 +0200 Joerg Roedel <[email protected]> wrote:
>
> > This patch adds the required data structures and constants required to parse
> > the ACPI table for the AMD IOMMU.
> >
> > Signed-off-by: Joerg Roedel <[email protected]>
> > ---
> > arch/x86/kernel/amd_iommu_init.c | 101 ++++++++++++++++++++++++++++++++++++++
> > 1 files changed, 101 insertions(+), 0 deletions(-)
> > create mode 100644 arch/x86/kernel/amd_iommu_init.c
> >
> > diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
> > new file mode 100644
> > index 0000000..6fce5ab
> > --- /dev/null
> > +++ b/arch/x86/kernel/amd_iommu_init.c
> > @@ -0,0 +1,101 @@
> > +/*
> > + * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
> > + * Author: Joerg Roedel <[email protected]>
> > + * Leo Duran <[email protected]>
> > + *
> > + * This program is free software; you can redistribute it and/or modify it
> > + * under the terms of the GNU General Public License version 2 as published
> > + * by the Free Software Foundation.
> > + *
> > + * This program is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> > + * GNU General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU General Public License
> > + * along with this program; if not, write to the Free Software
> > + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
> > + */
> > +
> > +#include <linux/pci.h>
> > +#include <linux/acpi.h>
> > +#include <linux/gfp.h>
> > +#include <linux/list.h>
> > +#include <asm/pci-direct.h>
> > +#include <asm/amd_iommu_types.h>
> > +#include <asm/gart.h>
> > +
> > +/*
> > + * definitions for the ACPI scanning code
> > + */
> > +#define UPDATE_LAST_BDF(x) do {\
> > + if ((x) > amd_iommu_last_bdf) \
> > + amd_iommu_last_bdf = (x); \
> > + } while (0);
>
> Does this need to exist?
>
> If so, it could and should be implemented as a C function, not as a macro.
Yes, this macro is usefull because it makes the code look cleaner. But I
will convert that into a function.
> > +#define DEVID(bus, devfn) (((bus) << 8) | (devfn))
>
> I suspect that a helper function for this already exists.
I have not found one. Macros exist for slot/function of PCI devices. But
nothing which also includes the bus.
> > +#define PCI_BUS(x) (((x) >> 8) & 0xff)
>
> And this. If it doesn't exist, probably it should?
Ok, I move that one to linux/pci.h.
>
> > +#define IVRS_HEADER_LENGTH 48
> > +#define TBL_SIZE(x) (1 << (PAGE_SHIFT + get_order(amd_iommu_last_bdf * (x))))
>
> Can be implemented in C.
Right. I will convert that into a function too.
>
> > +#define ACPI_IVHD_TYPE 0x10
> > +#define ACPI_IVMD_TYPE_ALL 0x20
> > +#define ACPI_IVMD_TYPE 0x21
> > +#define ACPI_IVMD_TYPE_RANGE 0x22
> > +
> > +#define IVHD_DEV_ALL 0x01
> > +#define IVHD_DEV_SELECT 0x02
> > +#define IVHD_DEV_SELECT_RANGE_START 0x03
> > +#define IVHD_DEV_RANGE_END 0x04
> > +#define IVHD_DEV_ALIAS 0x42
> > +#define IVHD_DEV_ALIAS_RANGE 0x43
> > +#define IVHD_DEV_EXT_SELECT 0x46
> > +#define IVHD_DEV_EXT_SELECT_RANGE 0x47
> > +
> > +#define IVHD_FLAG_HT_TUN_EN 0x00
> > +#define IVHD_FLAG_PASSPW_EN 0x01
> > +#define IVHD_FLAG_RESPASSPW_EN 0x02
> > +#define IVHD_FLAG_ISOC_EN 0x03
> > +
> > +#define IVMD_FLAG_EXCL_RANGE 0x08
> > +#define IVMD_FLAG_UNITY_MAP 0x01
> > +
> > +#define ACPI_DEVFLAG_INITPASS 0x01
> > +#define ACPI_DEVFLAG_EXTINT 0x02
> > +#define ACPI_DEVFLAG_NMI 0x04
> > +#define ACPI_DEVFLAG_SYSMGT1 0x10
> > +#define ACPI_DEVFLAG_SYSMGT2 0x20
> > +#define ACPI_DEVFLAG_LINT0 0x40
> > +#define ACPI_DEVFLAG_LINT1 0x80
> > +#define ACPI_DEVFLAG_ATSDIS 0x10000000
> > +
> > +struct ivhd_header {
> > + u8 type;
> > + u8 flags;
> > + u16 length;
> > + u16 devid;
> > + u16 cap_ptr;
> > + u64 mmio_phys;
> > + u16 pci_seg;
> > + u16 info;
> > + u32 reserved;
> > +} __attribute__((packed));
> > +
> > +struct ivhd_entry {
> > + u8 type;
> > + u16 devid;
> > + u8 flags;
> > + u32 ext;
> > +} __attribute__((packed));
> > +
> > +struct ivmd_header {
> > + u8 type;
> > + u8 flags;
> > + u16 length;
> > + u16 devid;
> > + u16 aux;
> > + u64 resv;
> > + u64 range_start;
> > + u64 range_length;
> > +} __attribute__((packed));
>
> I can make a guess as to why these are "packed", but a code comment would
> prevent guesswork.
I am currently preparing patches to add a lot of comments to the code. I
will also add a comment to these structures.
Joerg
--
| AMD Saxony Limited Liability Company & Co. KG
Operating | Wilschdorfer Landstr. 101, 01109 Dresden, Germany
System | Register Court Dresden: HRA 4896
Research | General Partner authorized to represent:
Center | AMD Saxony LLC (Wilmington, Delaware, US)
| General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy
On Wed, Jul 09, 2008 at 06:43:42PM -0700, Andrew Morton wrote:
> On Thu, 26 Jun 2008 21:27:40 +0200 Joerg Roedel <[email protected]> wrote:
>
> > This patch adds the data structures which will contain the information read
> > from the ACPI table.
> >
> > Signed-off-by: Joerg Roedel <[email protected]>
> > ---
> > arch/x86/kernel/amd_iommu_init.c | 17 +++++++++++++++++
> > 1 files changed, 17 insertions(+), 0 deletions(-)
> >
> > diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
> > index 6fce5ab..0ad8cf9 100644
> > --- a/arch/x86/kernel/amd_iommu_init.c
> > +++ b/arch/x86/kernel/amd_iommu_init.c
> > @@ -99,3 +99,20 @@ struct ivmd_header {
> > u64 range_length;
> > } __attribute__((packed));
> >
> > +static int __initdata amd_iommu_disable;
> > +
> > +u16 amd_iommu_last_bdf;
> > +struct list_head amd_iommu_unity_map;
> > +unsigned amd_iommu_aperture_order = 26;
> > +int amd_iommu_isolate;
> > +
> > +struct list_head amd_iommu_list;
> > +struct dev_table_entry *amd_iommu_dev_table;
> > +u16 *amd_iommu_alias_table;
> > +struct amd_iommu **amd_iommu_rlookup_table;
> > +struct protection_domain **amd_iommu_pd_table;
> > +unsigned long *amd_iommu_pd_alloc_bitmap;
> > +
> > +static u32 dev_table_size;
> > +static u32 alias_table_size;
> > +static u32 rlookup_table_size;
>
> hm, one would expect to see the header file which declares the globals in
> the same patch as the .c file which defines them. Whatever.
>
> What locks the list_heads? It would be good to add a code comment at the
> definition site describing that locking.
True. I will add comments that describes the locking of the driver too.
The amd_iommu_list is not locked for example because it is only used at
driver initialization time or at resume (when its finally implemented).
The list elements itself have their own locks as well as the protection
domains.
Joerg
--
| AMD Saxony Limited Liability Company & Co. KG
Operating | Wilschdorfer Landstr. 101, 01109 Dresden, Germany
System | Register Court Dresden: HRA 4896
Research | General Partner authorized to represent:
Center | AMD Saxony LLC (Wilmington, Delaware, US)
| General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy
On Wed, Jul 09, 2008 at 06:55:12PM -0700, Andrew Morton wrote:
> On Thu, 26 Jun 2008 21:27:50 +0200 Joerg Roedel <[email protected]> wrote:
>
> > This patch puts the AMD IOMMU ACPI table parsing and hardware initialization
> > functions together to the main intialization routine.
> >
> > index 555fcc9..c792ddc 100644
> > --- a/arch/x86/kernel/amd_iommu_init.c
> > +++ b/arch/x86/kernel/amd_iommu_init.c
> > @@ -643,3 +643,129 @@ static int __init init_memory_definitions(struct acpi_table_header *table)
> > return 0;
> > }
> >
> > +int __init amd_iommu_init(void)
> > +{
> > + int i, ret = 0;
> > +
> > +
> > + if (amd_iommu_disable) {
> > + printk(KERN_INFO "AMD IOMMU disabled by kernel command line\n");
> > + return 0;
> > + }
> > +
> > + /*
> > + * First parse ACPI tables to find the largest Bus/Dev/Func
> > + * we need to handle. Upon this information the shared data
> > + * structures for the IOMMUs in the system will be allocated
> > + */
> > + if (acpi_table_parse("IVRS", find_last_devid_acpi) != 0)
> > + return -ENODEV;
> > +
> > + dev_table_size = TBL_SIZE(DEV_TABLE_ENTRY_SIZE);
> > + alias_table_size = TBL_SIZE(ALIAS_TABLE_ENTRY_SIZE);
> > + rlookup_table_size = TBL_SIZE(RLOOKUP_TABLE_ENTRY_SIZE);
> > +
> > + ret = -ENOMEM;
> > +
> > + /* Device table - directly used by all IOMMUs */
> > + amd_iommu_dev_table = (void *)__get_free_pages(GFP_KERNEL,
> > + get_order(dev_table_size));
>
> Is there a reason for using __get_free_pages() in this manner everywhere?
> kmalloc() or kzalloc() are unsuitable?
Yes. The device table has to be page aligned and has a maximum size of
2 MB.
> > + if (amd_iommu_dev_table == NULL)
> > + goto out;
> > +
> > + /*
> > + * Alias table - map PCI Bus/Dev/Func to Bus/Dev/Func the
> > + * IOMMU see for that device
> > + */
> > + amd_iommu_alias_table = (void *)__get_free_pages(GFP_KERNEL,
> > + get_order(alias_table_size));
Hmm, maybe this table will fit into a kmalloc allocation. Its maximum
size is 128 kb.
> > + if (amd_iommu_alias_table == NULL)
> > + goto free;
> > +
> > + /* IOMMU rlookup table - find the IOMMU for a specific device */
> > + amd_iommu_rlookup_table = (void *)__get_free_pages(GFP_KERNEL,
> > + get_order(rlookup_table_size));
> > + if (amd_iommu_rlookup_table == NULL)
> > + goto free;
> > +
> > + /*
> > + * Protection Domain table - maps devices to protection domains
> > + * This table has the same size as the rlookup_table
> > + */
> > + amd_iommu_pd_table = (void *)__get_free_pages(GFP_KERNEL,
> > + get_order(rlookup_table_size));
The rlookup and the pd table have a maximum size of 1 MB.
> > + if (amd_iommu_pd_table == NULL)
> > + goto free;
> > +
> > + amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(GFP_KERNEL,
> > + get_order(MAX_DOMAIN_ID/8));
> > + if (amd_iommu_pd_alloc_bitmap == NULL)
> > + goto free;
> > +
> > + /*
> > + * memory is allocated now; initialize the device table with all zeroes
> > + * and let all alias entries point to itself
> > + */
> > + memset(amd_iommu_dev_table, 0, dev_table_size);
> > + for (i = 0; i < amd_iommu_last_bdf; ++i)
> > + amd_iommu_alias_table[i] = i;
> > +
> > + memset(amd_iommu_pd_table, 0, rlookup_table_size);
> > + memset(amd_iommu_pd_alloc_bitmap, 0, MAX_DOMAIN_ID / 8);
>
> Use of __GFP_ZERO in here would relieve us of a lot of memsets.
>
> > + /*
> > + * never allocate domain 0 because its used as the non-allocated and
> > + * error value placeholder
> > + */
> > + amd_iommu_pd_alloc_bitmap[0] = 1;
> > +
> > + /*
> > + * now the data structures are allocated and basically initialized
> > + * start the real acpi table scan
> > + */
> > + ret = -ENODEV;
> > + if (acpi_table_parse("IVRS", init_iommu_all) != 0)
> > + goto free;
> > +
> > + if (acpi_table_parse("IVRS", init_memory_definitions) != 0)
> > + goto free;
> > +
> > + printk(KERN_INFO "AMD IOMMU: aperture size is %d MB\n",
> > + (1 << (amd_iommu_aperture_order-20)));
> > +
> > + printk(KERN_INFO "AMD IOMMU: device isolation ");
> > + if (amd_iommu_isolate)
> > + printk("enabled\n");
> > + else
> > + printk("disabled\n");
> > +
> > +out:
> > + return ret;
> > +
> > +free:
> > + if (amd_iommu_pd_alloc_bitmap)
> > + free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 1);
> > +
> > + if (amd_iommu_pd_table)
> > + free_pages((unsigned long)amd_iommu_pd_table,
> > + get_order(rlookup_table_size));
> > +
> > + if (amd_iommu_rlookup_table)
> > + free_pages((unsigned long)amd_iommu_rlookup_table,
> > + get_order(rlookup_table_size));
> > +
> > + if (amd_iommu_alias_table)
> > + free_pages((unsigned long)amd_iommu_alias_table,
> > + get_order(alias_table_size));
> > +
> > + if (amd_iommu_dev_table)
> > + free_pages((unsigned long)amd_iommu_dev_table,
> > + get_order(dev_table_size));
>
> free_pages(0) is legal, so all these tests are unneeded, I expect.
>
> This remains true if you decide to switch to kzalloc().
Thanks for that hint. I will remove the checks.
> > + free_iommu_all();
> > +
> > + free_unity_maps();
> > +
> > + goto out;
> > +}
>
>
--
| AMD Saxony Limited Liability Company & Co. KG
Operating | Wilschdorfer Landstr. 101, 01109 Dresden, Germany
System | Register Court Dresden: HRA 4896
Research | General Partner authorized to represent:
Center | AMD Saxony LLC (Wilmington, Delaware, US)
| General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy
On Thu, Jul 10, 2008 at 12:04:17AM -0700, Yinghai Lu wrote:
> On Thu, Jun 26, 2008 at 12:27 PM, Joerg Roedel <[email protected]> wrote:
> > This patch adds two parameters to the kernel command line to control behavior
> > of the AMD IOMMU.
> >
> > Signed-off-by: Joerg Roedel <[email protected]>
> > ---
> > arch/x86/kernel/amd_iommu_init.c | 34 ++++++++++++++++++++++++++++++++++
> > 1 files changed, 34 insertions(+), 0 deletions(-)
> >
> > diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
> > index d7a75bf..ec6f13b 100644
> > --- a/arch/x86/kernel/amd_iommu_init.c
> > +++ b/arch/x86/kernel/amd_iommu_init.c
> > @@ -789,3 +789,37 @@ void __init amd_iommu_detect(void)
> > }
> > }
> >
> > +static int __init parse_amd_iommu_options(char *str)
> > +{
> > + for (; *str; ++str) {
> > + if (strcmp(str, "off") == 0)
> > + amd_iommu_disable = 1;
> > + if (strcmp(str, "isolate") == 0)
> > + amd_iommu_isolate = 1;
> > + }
> > +
> > + return 1;
> > +}
> > +
> > +static int __init parse_amd_iommu_size_options(char *str)
> > +{
> > + for (; *str; ++str) {
> > + if (strcmp(str, "32M") == 0)
> > + amd_iommu_aperture_order = 25;
> > + if (strcmp(str, "64M") == 0)
> > + amd_iommu_aperture_order = 26;
> > + if (strcmp(str, "128M") == 0)
> > + amd_iommu_aperture_order = 27;
> > + if (strcmp(str, "256M") == 0)
> > + amd_iommu_aperture_order = 28;
> > + if (strcmp(str, "512M") == 0)
> > + amd_iommu_aperture_order = 29;
> > + if (strcmp(str, "1G") == 0)
> > + amd_iommu_aperture_order = 30;
> > + }
>
> could use
> memparse(str, &str);
>
> and weight...
Thanks for that hint. I wasn't aware of memparse.
Joerg
--
| AMD Saxony Limited Liability Company & Co. KG
Operating | Wilschdorfer Landstr. 101, 01109 Dresden, Germany
System | Register Court Dresden: HRA 4896
Research | General Partner authorized to represent:
Center | AMD Saxony LLC (Wilmington, Delaware, US)
| General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy
On Thu, Jul 10, 2008 at 01:25:26PM +0900, FUJITA Tomonori wrote:
> On Wed, 9 Jul 2008 19:01:40 -0700
> Andrew Morton <[email protected]> wrote:
>
> > On Thu, 26 Jun 2008 21:27:53 +0200 Joerg Roedel <[email protected]> wrote:
> >
> > > This patch adds generic stuff used by the mapping and unmapping code for the
> > > AMD IOMMU.
> > >
> > > Signed-off-by: Joerg Roedel <[email protected]>
> > > ---
> > > arch/x86/kernel/amd_iommu.c | 40 ++++++++++++++++++++++++++++++++++++++++
> > > 1 files changed, 40 insertions(+), 0 deletions(-)
> > > create mode 100644 arch/x86/kernel/amd_iommu.c
> > >
> > > diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
> > > new file mode 100644
> > > index 0000000..90392c7
> > > --- /dev/null
> > > +++ b/arch/x86/kernel/amd_iommu.c
> > > @@ -0,0 +1,40 @@
> > > +/*
> > > + * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
> > > + * Author: Joerg Roedel <[email protected]>
> > > + * Leo Duran <[email protected]>
> > > + *
> > > + * This program is free software; you can redistribute it and/or modify it
> > > + * under the terms of the GNU General Public License version 2 as published
> > > + * by the Free Software Foundation.
> > > + *
> > > + * This program is distributed in the hope that it will be useful,
> > > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> > > + * GNU General Public License for more details.
> > > + *
> > > + * You should have received a copy of the GNU General Public License
> > > + * along with this program; if not, write to the Free Software
> > > + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
> > > + */
> > > +
> > > +#include <linux/pci.h>
> > > +#include <linux/gfp.h>
> > > +#include <linux/bitops.h>
> > > +#include <linux/scatterlist.h>
> > > +#include <linux/iommu-helper.h>
> > > +#include <asm/proto.h>
> > > +#include <asm/gart.h>
> > > +#include <asm/amd_iommu_types.h>
> > > +
> > > +#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
> >
> > Doesn't need to be implemented in a macro, hence should be implemented in C,
> > please.
> >
> > > +#define to_pages(addr, size) \
> > > + (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
> >
> > Does this patchset add any comments *at all*? Is it really all so obvious
> > that this was justifiable?
> >
> > I don't know what this function does, but it shouldn't be doing it here.
> > Either it duplicates an existing helper or it should be implemented in mm.h
>
> I think that this calculates the number of IOMMU pages for a
> request. Every IOMMUs has a similar function so it might be better to
> add a helper function to include/linux/iommu-helper.h.
>
> Someone except for IOMMU need this? If so, it would be better to add
> this to a different header file.
>
>
> diff --git a/include/linux/iommu-helper.h b/include/linux/iommu-helper.h
> index c975caf..9c29e65 100644
> --- a/include/linux/iommu-helper.h
> +++ b/include/linux/iommu-helper.h
> @@ -8,3 +8,14 @@ extern unsigned long iommu_area_alloc(unsigned long *map, unsigned long size,
> unsigned long align_mask);
> extern void iommu_area_free(unsigned long *map, unsigned long start,
> unsigned int nr);
> +
> +static inline unsigned long iommu_num_pages(unsigned long addr,
> + unsigned long len)
> +{
> + unsigned long npages;
> +
> + npages = ALIGN(addr + len, 1UL << IOMMU_PAGE_SHIFT) -
> + (addr & ~((1UL << IOMMU_PAGE_SHIFT) - 1));
> +
> + return (npages >> IOMMU_PAGE_SHIFT);
> +}
>
Yeah. I fully agree that this function would make sense in the
iommu-helper code. Will you send a patch introducing that function
upstream? I will change my code to use it if it is merged.
Thanks,
Joerg
--
| AMD Saxony Limited Liability Company & Co. KG
Operating | Wilschdorfer Landstr. 101, 01109 Dresden, Germany
System | Register Court Dresden: HRA 4896
Research | General Partner authorized to represent:
Center | AMD Saxony LLC (Wilmington, Delaware, US)
| General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy
On Wed, Jul 09, 2008 at 07:04:53PM -0700, Andrew Morton wrote:
> On Thu, 26 Jun 2008 21:27:55 +0200 Joerg Roedel <[email protected]> wrote:
> > +static int iommu_completion_wait(struct amd_iommu *iommu)
> > +{
> > + int ret;
> > + struct command cmd;
> > + volatile u64 ready = 0;
> > + unsigned long ready_phys = virt_to_phys(&ready);
> > +
> > + memset(&cmd, 0, sizeof(cmd));
> > + cmd.data[0] = LOW_U32(ready_phys) | CMD_COMPL_WAIT_STORE_MASK;
> > + cmd.data[1] = HIGH_U32(ready_phys);
> > + cmd.data[2] = 1; /* value written to 'ready' */
> > + CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
> > +
> > + iommu->need_sync = 0;
> > +
> > + ret = iommu_queue_command(iommu, &cmd);
> > +
> > + if (ret)
> > + return ret;
> > +
> > + while (!ready)
> > + cpu_relax();
>
> I busywait is a big red flag. An uncommented one is not acceptable IMO.
>
> What is the maximum duration?
This depends on the hardware and how many commands have to be executed
before that command.
> Is it not possible to sleep and await an interrupt?
Yes it is. I plan to do so as an optimization to the current code. But
this needs changes to the address allocator to not allocate addresses
which have been freed but not yet flushed from the IOMMU TLB.
> Should we implement a timeout in case the hardware is busted?
This is the best I think. Ok, if the hardware is busted the whole
machine has a problem because DMA does not work reliably anymore. But I
will add a counter to that loop to have an emergency exit from it until
the optimization is implemented (which is a bit more work).
> > + return 0;
> > +}
> > +
> > +static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
> > +{
> > + struct command cmd;
>
> `command' was a poorly-chosen identifier for this structure. Too generic.
Hmm, maybe iommu_cmd. I don't want the identifier too long like
amd_iommu_command because it is only used in that file and thus its
clear that it belongs to the amd_iommu.
Joerg
--
| AMD Saxony Limited Liability Company & Co. KG
Operating | Wilschdorfer Landstr. 101, 01109 Dresden, Germany
System | Register Court Dresden: HRA 4896
Research | General Partner authorized to represent:
Center | AMD Saxony LLC (Wilmington, Delaware, US)
| General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy
On Wed, Jul 09, 2008 at 07:14:59PM -0700, Andrew Morton wrote:
> On Thu, 26 Jun 2008 21:27:58 +0200 Joerg Roedel <[email protected]> wrote:
>
> > +static u16 domain_id_alloc(void)
> > +{
> > + unsigned long flags;
> > + int id;
> > +
> > + write_lock_irqsave(&amd_iommu_devtable_lock, flags);
> > + id = find_first_zero_bit(amd_iommu_pd_alloc_bitmap, MAX_DOMAIN_ID);
> > + BUG_ON(id == 0);
> > + if (id > 0 && id < MAX_DOMAIN_ID)
> > + __set_bit(id, amd_iommu_pd_alloc_bitmap);
> > + else
> > + id = 0;
> > + write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
> > +
> > + return id;
> > +}
>
> Presumably this is not performance-critical. If it is, an IDR
> tree would help.
No, its not performance critical. Domain id allocation is only done at
IOMMU initialization currently. In the future it may also be used at
virtual machine creation when we have device passthrough for KVM.
Joerg
--
| AMD Saxony Limited Liability Company & Co. KG
Operating | Wilschdorfer Landstr. 101, 01109 Dresden, Germany
System | Register Court Dresden: HRA 4896
Research | General Partner authorized to represent:
Center | AMD Saxony LLC (Wilmington, Delaware, US)
| General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy
On Thu, 10 Jul 2008 14:12:22 +0200 Joerg Roedel <[email protected]> wrote:
> On Wed, Jul 09, 2008 at 07:36:01PM -0700, Andrew Morton wrote:
> > On Wed, 9 Jul 2008 18:50:55 -0700 Arjan van de Ven <[email protected]> wrote:
> >
> > > On Wed, 9 Jul 2008 18:38:23 -0700
> > > Andrew Morton <[email protected]> wrote:
> > >
> > > > On Thu, 26 Jun 2008 21:27:38 +0200 Joerg Roedel
> > > > <[email protected]> wrote:
> > > >
> > > > > +/* helper macros */
> > > > > +#define LOW_U32(x) ((x) & ((1ULL << 32)-1))
> > > > > +#define HIGH_U32(x) (LOW_U32((x) >> 32))
> > > >
> > > > Please avoid putting general-purpose helpers into private header
> > > > files.
> > >
> > > especially broken helpers.
> > >
> > > A >> 32 on something that may be a 32 bit entry is bad; int32 >> 32...
> > > gcc can (and does!) optimize that out.
> > >
> > > (because it first gets translated into a SHR x86 instruction which then
> > > notices it's encoded as a zero shift.. which then gets deleted)
> > >
> >
> > Well yeah. upper_32_bits() gets it all corect. We could do a
> > lower_32_bits() I suppose, if it's useful.
>
> Ok cool, I agree. I will replace the HIGH_U32 macro with the
> upper_32_bits() function and the other one with lower_32_bits() if it
> exists. Who will take the patch for the introduction of that function?
>
I can do that. We can slip it into mainline as soon as it's ready so that
the for-2.6.27 patches contine to work.
Or we can just do it later, as a followup thing. Probably this is better.
On Thu, 10 Jul 2008 14:37:47 +0200 Joerg Roedel <[email protected]> wrote:
> > Is there a reason for using __get_free_pages() in this manner everywhere?
> > kmalloc() or kzalloc() are unsuitable?
>
> Yes. The device table has to be page aligned and has a maximum size of
> 2 MB.
OK
> > > + if (amd_iommu_dev_table == NULL)
> > > + goto out;
> > > +
> > > + /*
> > > + * Alias table - map PCI Bus/Dev/Func to Bus/Dev/Func the
> > > + * IOMMU see for that device
> > > + */
> > > + amd_iommu_alias_table = (void *)__get_free_pages(GFP_KERNEL,
> > > + get_order(alias_table_size));
>
> Hmm, maybe this table will fit into a kmalloc allocation. Its maximum
> size is 128 kb.
Might as well keep it all consistent in that case.
On Wed, Jul 09, 2008 at 07:26:06PM -0700, Andrew Morton wrote:
> On Thu, 26 Jun 2008 21:28:01 +0200 Joerg Roedel <[email protected]> wrote:
>
> > This patch adds the dma_ops specific mapping functions for single mappings.
> >
> > Signed-off-by: Joerg Roedel <[email protected]>
> > ---
> > arch/x86/kernel/amd_iommu.c | 59 +++++++++++++++++++++++++++++++++++++++++++
> > 1 files changed, 59 insertions(+), 0 deletions(-)
> >
> > diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
> > index e00a3e7..b4079f6 100644
> > --- a/arch/x86/kernel/amd_iommu.c
> > +++ b/arch/x86/kernel/amd_iommu.c
> > @@ -40,6 +40,11 @@ struct command {
> > static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
> > struct unity_map_entry *e);
> >
> > +static int iommu_has_npcache(struct amd_iommu *iommu)
> > +{
> > + return iommu->cap & IOMMU_CAP_NPCACHE;
> > +}
> > +
> > static int __iommu_queue_command(struct amd_iommu *iommu, struct command *cmd)
> > {
> > u32 tail, head;
> > @@ -641,3 +646,57 @@ static void __unmap_single(struct amd_iommu *iommu,
> > dma_ops_free_addresses(dma_dom, dma_addr, pages);
> > }
> >
> > +static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
> > + size_t size, int dir)
> > +{
> > + unsigned long flags;
> > + struct amd_iommu *iommu;
> > + struct protection_domain *domain;
> > + u16 devid;
> > + dma_addr_t addr;
> > +
> > + get_device_resources(dev, &iommu, &domain, &devid);
> > +
> > + if (iommu == NULL || domain == NULL)
> > + return (dma_addr_t)paddr;
>
> OK, a test case. A reader of your code (ie: me) wants to find out what
> this code is doing. What is the *meaning* of iommu == NULL || domain ==
> NULL here?
>
> I go look at the (undocumented) get_device_resources() and I see that this:
>
> if (_bdf >= amd_iommu_last_bdf) {
>
> happened. I don't know what that semantically means and I gave up.
>
> I'm not saying that the code is unmaintainable, but I would assert that it
> is a heck of a lot harder to maintain than it could be, and than it should
> be.
>
> get_device_resources() has an open-coded copy of your DEVID() macro, btw.
Ok, thats a bit weird. If the check above fails it means that we have a
device the IOMMU is not translating. In this case the code just returns
the physical address of the buffer. I will add a comment for that too.
Joerg
--
| AMD Saxony Limited Liability Company & Co. KG
Operating | Wilschdorfer Landstr. 101, 01109 Dresden, Germany
System | Register Court Dresden: HRA 4896
Research | General Partner authorized to represent:
Center | AMD Saxony LLC (Wilmington, Delaware, US)
| General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy
On Thu, Jul 10, 2008 at 06:03:51AM -0700, Andrew Morton wrote:
> On Thu, 10 Jul 2008 14:37:47 +0200 Joerg Roedel <[email protected]> wrote:
>
> > > Is there a reason for using __get_free_pages() in this manner everywhere?
> > > kmalloc() or kzalloc() are unsuitable?
> >
> > Yes. The device table has to be page aligned and has a maximum size of
> > 2 MB.
>
> OK
>
> > > > + if (amd_iommu_dev_table == NULL)
> > > > + goto out;
> > > > +
> > > > + /*
> > > > + * Alias table - map PCI Bus/Dev/Func to Bus/Dev/Func the
> > > > + * IOMMU see for that device
> > > > + */
> > > > + amd_iommu_alias_table = (void *)__get_free_pages(GFP_KERNEL,
> > > > + get_order(alias_table_size));
> >
> > Hmm, maybe this table will fit into a kmalloc allocation. Its maximum
> > size is 128 kb.
>
> Might as well keep it all consistent in that case.
Yeah ok, I agree. Thanks for your review and comments. I try to push out
the updates today.
Joerg
--
| AMD Saxony Limited Liability Company & Co. KG
Operating | Wilschdorfer Landstr. 101, 01109 Dresden, Germany
System | Register Court Dresden: HRA 4896
Research | General Partner authorized to represent:
Center | AMD Saxony LLC (Wilmington, Delaware, US)
| General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy
On Wed, Jul 09, 2008 at 07:18:27PM -0700, Andrew Morton wrote:
> On Thu, 26 Jun 2008 21:27:59 +0200 Joerg Roedel <[email protected]> wrote:
>
> > This patch adds functions necessary to find the IOMMU resources for a specific
> > device.
> >
> > Signed-off-by: Joerg Roedel <[email protected]>
> > ---
> > arch/x86/kernel/amd_iommu.c | 75 +++++++++++++++++++++++++++++++++++++++++++
> > 1 files changed, 75 insertions(+), 0 deletions(-)
> >
> > diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
> > index c43d15d..47e80b5 100644
> > --- a/arch/x86/kernel/amd_iommu.c
> > +++ b/arch/x86/kernel/amd_iommu.c
> > @@ -461,3 +461,78 @@ free_dma_dom:
> > return NULL;
> > }
> >
> > +static struct protection_domain *domain_for_device(u16 devid)
> > +{
> > + struct protection_domain *dom;
> > + unsigned long flags;
> > +
> > + read_lock_irqsave(&amd_iommu_devtable_lock, flags);
>
> Why is this cheerfully undocumented lock irq-safe? Is it ever taken from
> IRQ context?
This function is called from the dma-mapping path. As far as I know the
DMA mapping functions can be called from interrupt context.
>
> > + dom = amd_iommu_pd_table[devid];
> > + read_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
> > +
> > + return dom;
> > +}
>
> The locking in this function makes no sense. We drop the lock then return
> a value which the caller cannot use in a race-free fashion, because the
> lock is no longer held.
The lock only protects the protection domain table (and the device
table) itself. It does not protect the values the pointers in that list
point to. In this case its also not racy because a value to that list is
only written once and then never changed again (currently). If this
changes in the future (and it will) I will change the locking too. This
will also need reference counting for 'struct protection_domain' which
is not implemented yet.
Joerg
--
| AMD Saxony Limited Liability Company & Co. KG
Operating | Wilschdorfer Landstr. 101, 01109 Dresden, Germany
System | Register Court Dresden: HRA 4896
Research | General Partner authorized to represent:
Center | AMD Saxony LLC (Wilmington, Delaware, US)
| General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy
On Thu, Jul 10, 2008 at 03:31:36PM +0200, Joerg Roedel wrote:
>
> Yeah ok, I agree. Thanks for your review and comments. I try to push out
> the updates today.
Ok, I am done with the patches (except the ones that do not only touch
my code). They are not as small as I expected so I will test them over
the night before posting. I everything goes well I send them out
tomorrow morning.
Joerg
--
| AMD Saxony Limited Liability Company & Co. KG
Operating | Wilschdorfer Landstr. 101, 01109 Dresden, Germany
System | Register Court Dresden: HRA 4896
Research | General Partner authorized to represent:
Center | AMD Saxony LLC (Wilmington, Delaware, US)
| General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy
On Thu, 10 Jul 2008 08:26:03 +0200 Ingo Molnar wrote:
>
> * Andrew Morton <[email protected]> wrote:
>
> > On Thu, 10 Jul 2008 13:25:23 +0900 FUJITA Tomonori <[email protected]> wrote:
> >
> > > I have one related question, we should document all the kernel boot
> > > parameters in Documentation/kernel-parameters.txt?
> > >
> > > Some of x86_64 boot parameters are in
> > > Documentations/x86/x86_64/boot-options.txt. The parameters of IOMMUs
> > > are scattered. It's confusing.
> >
> > A single monolithic file has advantages. I think I'd prefer that
> > personally. I'd forgotten that the x86-specific one exists, actually.
>
> yeah, it should be all in Documentation/kernel-parameters.txt.
Agreed.
---
~Randy
Linux Plumbers Conference, 17-19 September 2008, Portland, Oregon USA
http://linuxplumbersconf.org/
On Thu, 10 Jul 2008 18:46:44 +0200 Joerg Roedel <[email protected]> wrote:
> > > +static struct protection_domain *domain_for_device(u16 devid)
> > > +{
> > > + struct protection_domain *dom;
> > > + unsigned long flags;
> > > +
> > > + read_lock_irqsave(&amd_iommu_devtable_lock, flags);
> >
> > Why is this cheerfully undocumented lock irq-safe? Is it ever taken from
> > IRQ context?
>
> This function is called from the dma-mapping path. As far as I know the
> DMA mapping functions can be called from interrupt context.
>
> >
> > > + dom = amd_iommu_pd_table[devid];
> > > + read_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
> > > +
> > > + return dom;
> > > +}
> >
> > The locking in this function makes no sense. We drop the lock then return
> > a value which the caller cannot use in a race-free fashion, because the
> > lock is no longer held.
>
> The lock only protects the protection domain table (and the device
> table) itself. It does not protect the values the pointers in that list
> point to. In this case its also not racy because a value to that list is
> only written once and then never changed again (currently). If this
> changes in the future (and it will) I will change the locking too. This
> will also need reference counting for 'struct protection_domain' which
> is not implemented yet.
So no locking is needed in this function.
Joerg Roedel <[email protected]> writes:
> Hi,
>
> this is the first post of the initial driver for AMD IOMMU hardware. The driver
> is tested on hardware with various loads (disk and network) and showed no
> problems.
>
> It currently supports DMA remapping using the dma_ops API in the x86
> architecture code. It also supports isolation of device DMA address spaces as
> far as the hardware allows that (means each device can get its own DMA address
> space and can't access the DMA memory of other devices).
>
> Please give this code a good review and send me your comments about it so that
> I can fix all possible bugs and objections and move this driver forward to
> merging quality.
Small question. Does this IOMMU also support irq remapping?
Intel is starting to merge support for their iommu that supports irq
remapping and I am curious what the implications are going to be to
support both of these iommus?
Assuming their is irq remapping support it isn't tied to x2apic
support in the cpus is it?
Eric
On Fri, Jul 11, 2008 at 03:22:01AM -0700, Eric W. Biederman wrote:
> Joerg Roedel <[email protected]> writes:
>
> > Hi,
> >
> > this is the first post of the initial driver for AMD IOMMU hardware. The driver
> > is tested on hardware with various loads (disk and network) and showed no
> > problems.
> >
> > It currently supports DMA remapping using the dma_ops API in the x86
> > architecture code. It also supports isolation of device DMA address spaces as
> > far as the hardware allows that (means each device can get its own DMA address
> > space and can't access the DMA memory of other devices).
> >
> > Please give this code a good review and send me your comments about it so that
> > I can fix all possible bugs and objections and move this driver forward to
> > merging quality.
>
> Small question. Does this IOMMU also support irq remapping?
>
> Intel is starting to merge support for their iommu that supports irq
> remapping and I am curious what the implications are going to be to
> support both of these iommus?
Yes, AMD IOMMU also has interrupt remapping support.
> Assuming their is irq remapping support it isn't tied to x2apic
> support in the cpus is it?
Not sure what you mean here. I am not aware of an x2apic on AMD.
Joerg
--
| AMD Saxony Limited Liability Company & Co. KG
Operating | Wilschdorfer Landstr. 101, 01109 Dresden, Germany
System | Register Court Dresden: HRA 4896
Research | General Partner authorized to represent:
Center | AMD Saxony LLC (Wilmington, Delaware, US)
| General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy
On Friday, July 11, 2008 5:22 AM, Eric W. Biederman wrote:
> Small question. Does this IOMMU also support irq remapping?
>
> Intel is starting to merge support for their iommu that supports irq
> remapping and I am curious what the implications are going to be to
> support both of these iommus?
>
> Assuming their is irq remapping support it isn't tied to x2apic
> support in the cpus is it?
>
Follow-up questions:
(1) Outside of a virtualization environment, is there functional value
for "irq remapping" by an IOMMU? And, if there is, (2) How would this
functionality get abstracted? (e.g., as dma_ops abstracts 'address
remapping' by the IOMMU)
Leo.
"Duran, Leo" <[email protected]> writes:
> On Friday, July 11, 2008 5:22 AM, Eric W. Biederman wrote:
>
>> Small question. Does this IOMMU also support irq remapping?
>>
>> Intel is starting to merge support for their iommu that supports irq
>> remapping and I am curious what the implications are going to be to
>> support both of these iommus?
>>
>> Assuming their is irq remapping support it isn't tied to x2apic
>> support in the cpus is it?
>>
> Follow-up questions:
> (1) Outside of a virtualization environment, is there functional value
> for "irq remapping" by an IOMMU? And, if there is, (2) How would this
> functionality get abstracted? (e.g., as dma_ops abstracts 'address
> remapping' by the IOMMU)
Current x86 hardware ioapics have significant issues with irq
migration. So things like real cpu hotplug are not really
supportable unless there is better hardware or an iommu.
There is a good case for linux to support kernel bypass for some hardware.
>From kvm where linux acts as a hypervisor to crazy people wanting to
device drivers in user space, to the classic HPC mpi stack that
wants to go as fast as possible.
Then there is the Intel case where the are extending their cpu apicids
to 32bit and require going through a hypervisor to encode all of the
necessary bits. I think we can go to 16bit cpus without doing that
but it is a squeeze.
If we are going to run under under a hypervisor we need at least the
abstraction of going through an iommu (in that case a hypercall)
so we can run under a hypervisor.
So right now we are just starting to design the abstraction, and I'm
hoping we can get as large a perspective on what it should look like
as we can.
I expect the api will look something like:
struct irq_map_info x86_map_irq(irq, cpumask);
Where struct irq_map_info would return the architecture defined bits
that we can program into an msi or an ioapic routing entry.
With x86_map_irq looking up an appropriate ops structure and calling
the iommu specific operation.
Eric
On Thu, 10 Jul 2008 08:26:03 +0200
Ingo Molnar <[email protected]> wrote:
>
> * Andrew Morton <[email protected]> wrote:
>
> > On Thu, 10 Jul 2008 13:25:23 +0900 FUJITA Tomonori <[email protected]> wrote:
> >
> > > I have one related question, we should document all the kernel boot
> > > parameters in Documentation/kernel-parameters.txt?
> > >
> > > Some of x86_64 boot parameters are in
> > > Documentations/x86/x86_64/boot-options.txt. The parameters of IOMMUs
> > > are scattered. It's confusing.
> >
> > A single monolithic file has advantages. I think I'd prefer that
> > personally. I'd forgotten that the x86-specific one exists, actually.
>
> yeah, it should be all in Documentation/kernel-parameters.txt.
Then, let's integrate them right after rc1 (to avoid lot of conflicts
with patches that change Documentation/kernel-parameters.txt).