Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1759644AbZCMRJn (ORCPT ); Fri, 13 Mar 2009 13:09:43 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1760160AbZCMRCH (ORCPT ); Fri, 13 Mar 2009 13:02:07 -0400 Received: from gw.goop.org ([64.81.55.164]:57399 "EHLO abulafia.goop.org" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1759466AbZCMRCE (ORCPT ); Fri, 13 Mar 2009 13:02:04 -0400 From: Jeremy Fitzhardinge To: "H. Peter Anvin" Cc: the arch/x86 maintainers , Linux Kernel Mailing List , Xen-devel , David Airlie , Alex Nixon , Jeremy Fitzhardinge , Ian Campbell Subject: [PATCH 07/27] Xen/x86/PCI: Add support for the Xen PCI subsytem Date: Fri, 13 Mar 2009 09:59:52 -0700 Message-Id: <1236963612-14287-8-git-send-email-jeremy@goop.org> X-Mailer: git-send-email 1.6.0.6 In-Reply-To: <1236963612-14287-1-git-send-email-jeremy@goop.org> References: <1236963612-14287-1-git-send-email-jeremy@goop.org> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 12909 Lines: 508 From: Alex Nixon On boot, the system will search to see if a Xen iommu/pci subsystem is available. If the kernel detects it's running in a domain rather than on bare hardware, this subsystem will be used. Otherwise, it falls back to using hardware as usual. The frontend stub lives in arch/x86/pci-xen.c, alongside other sub-arch PCI init code (e.g. olpc.c) (All subsequent fixes, API changes and swiotlb operations folded in.) Signed-off-by: Alex Nixon Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ian Campbell --- arch/x86/Kconfig | 4 + arch/x86/include/asm/pci_x86.h | 1 + arch/x86/include/asm/xen/iommu.h | 12 ++ arch/x86/kernel/pci-dma.c | 3 + arch/x86/pci/Makefile | 1 + arch/x86/pci/init.c | 6 + arch/x86/pci/xen.c | 52 +++++++ drivers/pci/Makefile | 2 + drivers/pci/xen-iommu.c | 294 ++++++++++++++++++++++++++++++++++++++ 9 files changed, 375 insertions(+), 0 deletions(-) create mode 100644 arch/x86/include/asm/xen/iommu.h create mode 100644 arch/x86/pci/xen.c create mode 100644 drivers/pci/xen-iommu.c diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 15ec8a2..9092750 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1828,6 +1828,10 @@ config PCI_OLPC def_bool y depends on PCI && OLPC && (PCI_GOOLPC || PCI_GOANY) +config PCI_XEN + def_bool y + depends on XEN_PCI_PASSTHROUGH || XEN_DOM0_PCI + config PCI_DOMAINS def_bool y depends on PCI diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index 5401ca2..34f03a4 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -107,6 +107,7 @@ extern int pci_direct_probe(void); extern void pci_direct_init(int type); extern void pci_pcbios_init(void); extern int pci_olpc_init(void); +extern int pci_xen_init(void); extern void __init dmi_check_pciprobe(void); extern void __init dmi_check_skip_isa_align(void); diff --git a/arch/x86/include/asm/xen/iommu.h b/arch/x86/include/asm/xen/iommu.h new file mode 100644 index 0000000..75df312 --- /dev/null +++ b/arch/x86/include/asm/xen/iommu.h @@ -0,0 +1,12 @@ +#ifndef ASM_X86__XEN_IOMMU_H + +#ifdef CONFIG_PCI_XEN +extern void xen_iommu_init(void); +#else +static inline void xen_iommu_init(void) +{ +} +#endif + +#endif + diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index f293a8d..361fde2 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -9,6 +9,7 @@ #include #include #include +#include static int forbid_dac __read_mostly; @@ -265,6 +266,8 @@ EXPORT_SYMBOL(dma_supported); static int __init pci_iommu_init(void) { + xen_iommu_init(); + calgary_iommu_init(); intel_iommu_init(); diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile index d49202e..64182c5 100644 --- a/arch/x86/pci/Makefile +++ b/arch/x86/pci/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_PCI_BIOS) += pcbios.o obj-$(CONFIG_PCI_MMCONFIG) += mmconfig_$(BITS).o direct.o mmconfig-shared.o obj-$(CONFIG_PCI_DIRECT) += direct.o obj-$(CONFIG_PCI_OLPC) += olpc.o +obj-$(CONFIG_PCI_XEN) += xen.o obj-y += fixup.o obj-$(CONFIG_ACPI) += acpi.o diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c index 25a1f8e..4e2f90a 100644 --- a/arch/x86/pci/init.c +++ b/arch/x86/pci/init.c @@ -15,10 +15,16 @@ static __init int pci_arch_init(void) if (!(pci_probe & PCI_PROBE_NOEARLY)) pci_mmcfg_early_init(); +#ifdef CONFIG_PCI_XEN + if (!pci_xen_init()) + return 0; +#endif + #ifdef CONFIG_PCI_OLPC if (!pci_olpc_init()) return 0; /* skip additional checks if it's an XO */ #endif + #ifdef CONFIG_PCI_BIOS pci_pcbios_init(); #endif diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c new file mode 100644 index 0000000..76f803f --- /dev/null +++ b/arch/x86/pci/xen.c @@ -0,0 +1,52 @@ +/* + * Xen PCI Frontend Stub - puts some "dummy" functions in to the Linux + * x86 PCI core to support the Xen PCI Frontend + * + * Author: Ryan Wilson + */ +#include +#include +#include +#include + +#include + +#include + +static int xen_pcifront_enable_irq(struct pci_dev *dev) +{ + return 0; +} + +extern int isapnp_disable; + +int __init pci_xen_init(void) +{ + if (!xen_pv_domain() || xen_initial_domain()) + return -ENODEV; + + printk(KERN_INFO "PCI: setting up Xen PCI frontend stub\n"); + + pcibios_set_cache_line_size(); + + pcibios_enable_irq = xen_pcifront_enable_irq; + pcibios_disable_irq = NULL; + +#ifdef CONFIG_ACPI + /* Keep ACPI out of the picture */ + acpi_noirq = 1; +#endif + +#ifdef CONFIG_ISAPNP + /* Stop isapnp from probing */ + isapnp_disable = 1; +#endif + + /* Ensure a device still gets scanned even if it's fn number + * is non-zero. + */ + pci_scan_all_fns = 1; + + return 0; +} + diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile index 3d07ce2..106404e 100644 --- a/drivers/pci/Makefile +++ b/drivers/pci/Makefile @@ -27,6 +27,8 @@ obj-$(CONFIG_HT_IRQ) += htirq.o # Build Intel IOMMU support obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o +# Build Xen IOMMU support +obj-$(CONFIG_PCI_XEN) += xen-iommu.o obj-$(CONFIG_INTR_REMAP) += dmar.o intr_remapping.o # diff --git a/drivers/pci/xen-iommu.c b/drivers/pci/xen-iommu.c new file mode 100644 index 0000000..5b701e8 --- /dev/null +++ b/drivers/pci/xen-iommu.c @@ -0,0 +1,294 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +#define IOMMU_BUG_ON(test) \ +do { \ + if (unlikely(test)) { \ + printk(KERN_ALERT "Fatal DMA error! " \ + "Please use 'swiotlb=force'\n"); \ + BUG(); \ + } \ +} while (0) + +/* Print address range with message */ +#define PAR(msg, addr, size) \ +do { \ + printk(msg "[%#llx - %#llx]\n", \ + (unsigned long long)addr, \ + (unsigned long long)addr + size); \ +} while (0) + +struct dma_coherent_mem { + void *virt_base; + u32 device_base; + int size; + int flags; + unsigned long *bitmap; +}; + +static inline int address_needs_mapping(struct device *hwdev, + dma_addr_t addr) +{ + dma_addr_t mask = 0xffffffff; + int ret; + + /* If the device has a mask, use it, otherwise default to 32 bits */ + if (hwdev && hwdev->dma_mask) + mask = *hwdev->dma_mask; + + ret = (addr & ~mask) != 0; + + if (ret) { + printk(KERN_ERR "dma address needs mapping\n"); + printk(KERN_ERR "mask: %#llx\n address: [%#llx]\n", mask, addr); + } + return ret; +} + +static int check_pages_physically_contiguous(unsigned long pfn, + unsigned int offset, + size_t length) +{ + unsigned long next_mfn; + int i; + int nr_pages; + + next_mfn = pfn_to_mfn(pfn); + nr_pages = (offset + length + PAGE_SIZE-1) >> PAGE_SHIFT; + + for (i = 1; i < nr_pages; i++) { + if (pfn_to_mfn(++pfn) != ++next_mfn) + return 0; + } + return 1; +} + +static int range_straddles_page_boundary(phys_addr_t p, size_t size) +{ + unsigned long pfn = PFN_DOWN(p); + unsigned int offset = p & ~PAGE_MASK; + + if (offset + size <= PAGE_SIZE) + return 0; + if (check_pages_physically_contiguous(pfn, offset, size)) + return 0; + return 1; +} + +static inline void xen_dma_unmap_page(struct page *page) +{ + /* Xen TODO: 2.6.18 xen calls __gnttab_dma_unmap_page here + * to deal with foreign pages. We'll need similar logic here at + * some point. + */ +} + +/* Gets dma address of a page */ +static inline dma_addr_t xen_dma_map_page(struct page *page) +{ + /* Xen TODO: 2.6.18 xen calls __gnttab_dma_map_page here to deal + * with foreign pages. We'll need similar logic here at some + * point. + */ + return ((dma_addr_t)pfn_to_mfn(page_to_pfn(page))) << PAGE_SHIFT; +} + +static int xen_map_sg(struct device *hwdev, struct scatterlist *sg, + int nents, + enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + struct scatterlist *s; + struct page *page; + int i, rc; + + BUG_ON(direction == DMA_NONE); + WARN_ON(nents == 0 || sg[0].length == 0); + + for_each_sg(sg, s, nents, i) { + BUG_ON(!sg_page(s)); + page = sg_page(s); + s->dma_address = xen_dma_map_page(page) + s->offset; + s->dma_length = s->length; + IOMMU_BUG_ON(range_straddles_page_boundary( + page_to_phys(page), s->length)); + } + + rc = nents; + + flush_write_buffers(); + return rc; +} + +static void xen_unmap_sg(struct device *hwdev, struct scatterlist *sg, + int nents, + enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + struct scatterlist *s; + struct page *page; + int i; + + for_each_sg(sg, s, nents, i) { + page = pfn_to_page(mfn_to_pfn(PFN_DOWN(s->dma_address))); + xen_dma_unmap_page(page); + } +} + +static void *xen_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp) +{ + void *ret; + struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; + unsigned int order = get_order(size); + unsigned long vstart; + u64 mask; + + /* ignore region specifiers */ + gfp &= ~(__GFP_DMA | __GFP_HIGHMEM); + + if (mem) { + int page = bitmap_find_free_region(mem->bitmap, mem->size, + order); + if (page >= 0) { + *dma_handle = mem->device_base + (page << PAGE_SHIFT); + ret = mem->virt_base + (page << PAGE_SHIFT); + memset(ret, 0, size); + return ret; + } + if (mem->flags & DMA_MEMORY_EXCLUSIVE) + return NULL; + } + + if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff)) + gfp |= GFP_DMA; + + vstart = __get_free_pages(gfp, order); + ret = (void *)vstart; + + if (dev != NULL && dev->coherent_dma_mask) + mask = dev->coherent_dma_mask; + else + mask = 0xffffffff; + + if (ret != NULL) { + if (xen_create_contiguous_region(vstart, order, + fls64(mask)) != 0) { + free_pages(vstart, order); + return NULL; + } + memset(ret, 0, size); + *dma_handle = virt_to_machine(ret).maddr; + } + return ret; +} + +static void xen_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_addr) +{ + struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; + int order = get_order(size); + + if (mem && vaddr >= mem->virt_base && + vaddr < (mem->virt_base + (mem->size << PAGE_SHIFT))) { + int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; + bitmap_release_region(mem->bitmap, page, order); + } else { + xen_destroy_contiguous_region((unsigned long)vaddr, order); + free_pages((unsigned long)vaddr, order); + } +} + +static dma_addr_t xen_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + dma_addr_t dma; + + BUG_ON(direction == DMA_NONE); + + WARN_ON(size == 0); + + dma = xen_dma_map_page(page) + offset; + + IOMMU_BUG_ON(address_needs_mapping(dev, dma)); + flush_write_buffers(); + return dma; +} + +static void xen_unmap_page(struct device *dev, dma_addr_t dma_addr, + size_t size, + enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + BUG_ON(direction == DMA_NONE); + xen_dma_unmap_page(pfn_to_page(mfn_to_pfn(PFN_DOWN(dma_addr)))); +} + +static struct dma_map_ops xen_dma_ops = { + .dma_supported = NULL, + + .alloc_coherent = xen_alloc_coherent, + .free_coherent = xen_free_coherent, + + .map_page = xen_map_page, + .unmap_page = xen_unmap_page, + + .map_sg = xen_map_sg, + .unmap_sg = xen_unmap_sg, + + .mapping_error = NULL, + + .is_phys = 0, +}; + +static struct dma_map_ops xen_swiotlb_dma_ops = { + .dma_supported = swiotlb_dma_supported, + + .alloc_coherent = xen_alloc_coherent, + .free_coherent = xen_free_coherent, + + .map_page = swiotlb_map_page, + .unmap_page = swiotlb_unmap_page, + + .map_sg = swiotlb_map_sg_attrs, + .unmap_sg = swiotlb_unmap_sg_attrs, + + .mapping_error = swiotlb_dma_mapping_error, + + .is_phys = 0, +}; + +void __init xen_iommu_init(void) +{ + if (!xen_pv_domain()) + return; + + printk(KERN_INFO "Xen: Initializing Xen DMA ops\n"); + + force_iommu = 0; + dma_ops = &xen_dma_ops; + + if (swiotlb) { + printk(KERN_INFO "Xen: Enabling DMA fallback to swiotlb\n"); + dma_ops = &xen_swiotlb_dma_ops; + } +} + -- 1.6.0.6 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/