Per Andi Kleen's suggestion in the review of our Calgary IOMMU code, I
tried to use the alloc_bootmem_nopanic that Andi recently added.
Unfortunately, it needs low mem for our translation tables, so we needed
a new function to do this.
I have updated swiotlb to take advantage of this new function (and
added an error path to lib/swiotlb.c and resulting fallout from
calling functions).
This patch has been tested individually and cumulatively on x86_64 and
cross-compile tested on IA64. Since I have no IA64 hardware, any
testing on that platform would be appreciated.
Thanks,
Jon
Signed-off-by: Jon Mason <[email protected]>
diff -r b5bb5fea7490 -r 62dc1eb0c5e2 arch/x86_64/kernel/pci-swiotlb.c
--- a/arch/x86_64/kernel/pci-swiotlb.c Tue Apr 25 18:18:55 2006
+++ b/arch/x86_64/kernel/pci-swiotlb.c Wed Apr 26 16:12:39 2006
@@ -30,13 +30,19 @@
void pci_swiotlb_init(void)
{
+ int rc;
+
/* don't initialize swiotlb if iommu=off (no_iommu=1) */
if (!iommu_aperture && !no_iommu &&
(end_pfn > MAX_DMA32_PFN || force_iommu))
swiotlb = 1;
if (swiotlb) {
- printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n");
- swiotlb_init();
- dma_ops = &swiotlb_dma_ops;
+ printk(KERN_INFO "PCI-DMA: Using software bounce buffering for "
+ "IO (SWIOTLB)\n");
+ rc = swiotlb_init();
+ if (!rc)
+ dma_ops = &swiotlb_dma_ops;
+ else
+ swiotlb = 0;
}
}
diff -r b5bb5fea7490 -r 62dc1eb0c5e2 include/asm-ia64/machvec.h
--- a/include/asm-ia64/machvec.h Tue Apr 25 18:18:55 2006
+++ b/include/asm-ia64/machvec.h Wed Apr 26 16:12:39 2006
@@ -38,7 +38,7 @@
typedef void ia64_mv_migrate_t(struct task_struct * task);
/* DMA-mapping interface: */
-typedef void ia64_mv_dma_init (void);
+typedef int ia64_mv_dma_init (void);
typedef void *ia64_mv_dma_alloc_coherent (struct device *, size_t, dma_addr_t *, gfp_t);
typedef void ia64_mv_dma_free_coherent (struct device *, size_t, void *, dma_addr_t);
typedef dma_addr_t ia64_mv_dma_map_single (struct device *, void *, size_t, int);
diff -r b5bb5fea7490 -r 62dc1eb0c5e2 include/asm-x86_64/swiotlb.h
--- a/include/asm-x86_64/swiotlb.h Tue Apr 25 18:18:55 2006
+++ b/include/asm-x86_64/swiotlb.h Wed Apr 26 16:12:39 2006
@@ -41,7 +41,7 @@
extern void swiotlb_free_coherent (struct device *hwdev, size_t size,
void *vaddr, dma_addr_t dma_handle);
extern int swiotlb_dma_supported(struct device *hwdev, u64 mask);
-extern void swiotlb_init(void);
+extern int swiotlb_init(void);
#ifdef CONFIG_SWIOTLB
extern int swiotlb;
diff -r b5bb5fea7490 -r 62dc1eb0c5e2 include/linux/bootmem.h
--- a/include/linux/bootmem.h Tue Apr 25 18:18:55 2006
+++ b/include/linux/bootmem.h Wed Apr 26 16:12:39 2006
@@ -46,6 +46,7 @@
extern void __init free_bootmem (unsigned long addr, unsigned long size);
extern void * __init __alloc_bootmem (unsigned long size, unsigned long align, unsigned long goal);
extern void * __init __alloc_bootmem_nopanic (unsigned long size, unsigned long align, unsigned long goal);
+extern void * __init __alloc_bootmem_low_nopanic(unsigned long size, unsigned long align, unsigned long goal);
extern void * __init __alloc_bootmem_low(unsigned long size,
unsigned long align,
unsigned long goal);
diff -r b5bb5fea7490 -r 62dc1eb0c5e2 lib/swiotlb.c
--- a/lib/swiotlb.c Tue Apr 25 18:18:55 2006
+++ b/lib/swiotlb.c Wed Apr 26 16:12:39 2006
@@ -126,7 +126,7 @@
* Statically reserve bounce buffer space and initialize bounce buffer data
* structures for the software IO TLB used to implement the DMA API.
*/
-void
+int
swiotlb_init(void)
{
unsigned long i;
@@ -140,10 +140,11 @@
/*
* Get IO TLB memory from the low pages
*/
- io_tlb_start = alloc_bootmem_low_pages(io_tlb_nslabs *
- (1 << IO_TLB_SHIFT));
+ io_tlb_start = __alloc_bootmem_low_nopanic(io_tlb_nslabs *
+ (1 << IO_TLB_SHIFT), PAGE_SIZE, 0);
if (!io_tlb_start)
- panic("Cannot allocate SWIOTLB buffer");
+ goto nomem_error;
+
io_tlb_end = io_tlb_start + io_tlb_nslabs * (1 << IO_TLB_SHIFT);
/*
@@ -152,18 +153,39 @@
* between io_tlb_start and io_tlb_end.
*/
io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int));
+ if (!io_tlb_list)
+ goto free_io_tlb_start;
+
for (i = 0; i < io_tlb_nslabs; i++)
io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
io_tlb_index = 0;
io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(char *));
+ if (!io_tlb_orig_addr)
+ goto free_io_tlb_list;
/*
* Get the overflow emergency buffer
*/
io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow);
+ if (!io_tlb_overflow_buffer)
+ goto free_io_tlb_orig_addr;
+
printk(KERN_INFO "Placing %dMB software IO TLB between 0x%lx - 0x%lx\n",
(int) (io_tlb_nslabs * (1 << IO_TLB_SHIFT)) >> 20,
virt_to_phys(io_tlb_start), virt_to_phys(io_tlb_end));
+
+ return 0;
+
+free_io_tlb_orig_addr:
+ free_bootmem(__pa(io_tlb_orig_addr), io_tlb_nslabs * sizeof(char *));
+free_io_tlb_list:
+ free_bootmem(__pa(io_tlb_list), io_tlb_nslabs * sizeof(int));
+free_io_tlb_start:
+ free_bootmem(__pa(io_tlb_start), io_tlb_nslabs * (1 << IO_TLB_SHIFT));
+nomem_error:
+ printk(KERN_ERR "SWIOTLB: Unable to allocate memory, disabling "
+ "IOMMU\n");
+ return -ENOMEM;
}
/*
diff -r b5bb5fea7490 -r 62dc1eb0c5e2 mm/bootmem.c
--- a/mm/bootmem.c Tue Apr 25 18:18:55 2006
+++ b/mm/bootmem.c Wed Apr 26 16:12:39 2006
@@ -463,3 +463,16 @@
{
return __alloc_bootmem_core(pgdat->bdata, size, align, goal, LOW32LIMIT);
}
+
+void * __init __alloc_bootmem_low_nopanic(unsigned long size,
+ unsigned long align, unsigned long goal)
+{
+ bootmem_data_t *bdata;
+ void *ptr;
+
+ list_for_each_entry(bdata, &bdata_list, list)
+ if ((ptr = __alloc_bootmem_core(bdata, size, align, goal,
+ LOW32LIMIT)))
+ return(ptr);
+ return NULL;
+}
On Thu, May 04, 2006 at 03:59:29PM -0500, Jon Mason wrote:
> Per Andi Kleen's suggestion in the review of our Calgary IOMMU code, I
> tried to use the alloc_bootmem_nopanic that Andi recently added.
> Unfortunately, it needs low mem for our translation tables, so we needed
> a new function to do this.
>
> I have updated swiotlb to take advantage of this new function (and
> added an error path to lib/swiotlb.c and resulting fallout from
> calling functions).
>
> This patch has been tested individually and cumulatively on x86_64 and
> cross-compile tested on IA64. Since I have no IA64 hardware, any
> testing on that platform would be appreciated.
A couple of minor nits below, otherwise looks good.
> Signed-off-by: Jon Mason <[email protected]>
Acked-by: Muli Ben-Yehuda <[email protected]>
> diff -r b5bb5fea7490 -r 62dc1eb0c5e2 include/linux/bootmem.h
> --- a/include/linux/bootmem.h Tue Apr 25 18:18:55 2006
> +++ b/include/linux/bootmem.h Wed Apr 26 16:12:39 2006
> @@ -46,6 +46,7 @@
> extern void __init free_bootmem (unsigned long addr, unsigned long size);
> extern void * __init __alloc_bootmem (unsigned long size, unsigned long align, unsigned long goal);
> extern void * __init __alloc_bootmem_nopanic (unsigned long size, unsigned long align, unsigned long goal);
> +extern void * __init __alloc_bootmem_low_nopanic(unsigned long size, unsigned long align, unsigned long goal);
Would be nice to convert this and the preceding declarations to 80
chars per line, please.
> diff -r b5bb5fea7490 -r 62dc1eb0c5e2 mm/bootmem.c
> --- a/mm/bootmem.c Tue Apr 25 18:18:55 2006
> +++ b/mm/bootmem.c Wed Apr 26 16:12:39 2006
> @@ -463,3 +463,16 @@
> {
> return __alloc_bootmem_core(pgdat->bdata, size, align, goal, LOW32LIMIT);
> }
> +
> +void * __init __alloc_bootmem_low_nopanic(unsigned long size,
> + unsigned long align, unsigned long goal)
> +{
> + bootmem_data_t *bdata;
> + void *ptr;
> +
> + list_for_each_entry(bdata, &bdata_list, list)
> + if ((ptr = __alloc_bootmem_core(bdata, size, align, goal,
> + LOW32LIMIT)))
> + return(ptr);
This should be 'return ptr';
Cheers,
Muli
On Sun, May 07, 2006 at 11:50:36AM +0300, Muli Ben-Yehuda wrote:
> On Thu, May 04, 2006 at 03:59:29PM -0500, Jon Mason wrote:
>
> > Per Andi Kleen's suggestion in the review of our Calgary IOMMU code, I
> > tried to use the alloc_bootmem_nopanic that Andi recently added.
> > Unfortunately, it needs low mem for our translation tables, so we needed
> > a new function to do this.
> >
> > I have updated swiotlb to take advantage of this new function (and
> > added an error path to lib/swiotlb.c and resulting fallout from
> > calling functions).
> >
> > This patch has been tested individually and cumulatively on x86_64 and
> > cross-compile tested on IA64. Since I have no IA64 hardware, any
> > testing on that platform would be appreciated.
>
> A couple of minor nits below, otherwise looks good.
>
> > Signed-off-by: Jon Mason <[email protected]>
>
> Acked-by: Muli Ben-Yehuda <[email protected]>
>
> > diff -r b5bb5fea7490 -r 62dc1eb0c5e2 include/linux/bootmem.h
> > --- a/include/linux/bootmem.h Tue Apr 25 18:18:55 2006
> > +++ b/include/linux/bootmem.h Wed Apr 26 16:12:39 2006
> > @@ -46,6 +46,7 @@
> > extern void __init free_bootmem (unsigned long addr, unsigned long size);
> > extern void * __init __alloc_bootmem (unsigned long size, unsigned long align, unsigned long goal);
> > extern void * __init __alloc_bootmem_nopanic (unsigned long size, unsigned long align, unsigned long goal);
> > +extern void * __init __alloc_bootmem_low_nopanic(unsigned long size, unsigned long align, unsigned long goal);
>
> Would be nice to convert this and the preceding declarations to 80
> chars per line, please.
>
> > diff -r b5bb5fea7490 -r 62dc1eb0c5e2 mm/bootmem.c
> > --- a/mm/bootmem.c Tue Apr 25 18:18:55 2006
> > +++ b/mm/bootmem.c Wed Apr 26 16:12:39 2006
> > @@ -463,3 +463,16 @@
> > {
> > return __alloc_bootmem_core(pgdat->bdata, size, align, goal, LOW32LIMIT);
> > }
> > +
> > +void * __init __alloc_bootmem_low_nopanic(unsigned long size,
> > + unsigned long align, unsigned long goal)
> > +{
> > + bootmem_data_t *bdata;
> > + void *ptr;
> > +
> > + list_for_each_entry(bdata, &bdata_list, list)
> > + if ((ptr = __alloc_bootmem_core(bdata, size, align, goal,
> > + LOW32LIMIT)))
> > + return(ptr);
>
> This should be 'return ptr';
I completely agree with both nits, but the changes follow the style
currently in the files. I'll do a clean-up patch to both of the
files in question. :)
Thanks,
Jon
>
> Cheers,
> Muli
Jon,
Trailing white space added by part2 and part3 annoys "git" (29
total places between the two parts).
Generic builds (based on arch/ia64/defconfig or arch/ia64/configs/gensparse_defconfig)
throws out 3 warnings, one each from arch/ia64/hp/zx1/hpzx1_machvec.c,
arch/ia64/hp/zx1/hpzx1_swiotlb_machvec.c and arch/ia64/sn/kernel/machvec.c
include/asm/machvec_init.h:32: warning: initialization from incompatible pointer type
The problems is "platform_dma_init" which can be defined as
"machvec_noop()" which returns "void", not "int" at required
by your change to the ia64_mv_dma_init type.
On the plus side it does boot on ia64 Intel Tiger ... but more of
the fancier bits of this code are used by the zx1 and zx1_swiotlb
versions which I didn't get time to test today.
-Tony
On Mon, May 08, 2006 at 04:28:07PM -0700, Luck, Tony wrote:
> Jon,
>
> Trailing white space added by part2 and part3 annoys "git" (29
> total places between the two parts).
Oops. I'll fix that with the next release (in the next few hours or
so).
>
> Generic builds (based on arch/ia64/defconfig or arch/ia64/configs/gensparse_defconfig)
> throws out 3 warnings, one each from arch/ia64/hp/zx1/hpzx1_machvec.c,
> arch/ia64/hp/zx1/hpzx1_swiotlb_machvec.c and arch/ia64/sn/kernel/machvec.c
>
> include/asm/machvec_init.h:32: warning: initialization from incompatible pointer type
>
> The problems is "platform_dma_init" which can be defined as
> "machvec_noop()" which returns "void", not "int" at required
> by your change to the ia64_mv_dma_init type.
I "fixed" it with the hack below. Please let me know if this is not
palatable for you.
> On the plus side it does boot on ia64 Intel Tiger ... but more of
> the fancier bits of this code are used by the zx1 and zx1_swiotlb
> versions which I didn't get time to test today.
Fantastic! I appreciate you taking the time to look at and test these
patches.
Thanks,
Jon
>
> -Tony
diff -r 99976643895a include/asm-ia64/machvec.h
--- a/include/asm-ia64/machvec.h Tue May 2 23:10:14 2006
+++ b/include/asm-ia64/machvec.h Tue May 9 14:50:17 2006
@@ -77,9 +77,15 @@
typedef unsigned int ia64_mv_readl_relaxed_t (const volatile void __iomem *);
typedef unsigned long ia64_mv_readq_relaxed_t (const volatile void __iomem *);
-static inline void
+static inline void
machvec_noop (void)
{
+}
+
+static inline int
+machvec_noop_dma (void)
+{
+ return 0;
}
static inline void
diff -r 99976643895a include/asm-ia64/machvec_hpzx1.h
--- a/include/asm-ia64/machvec_hpzx1.h Tue May 2 23:10:14 2006
+++ b/include/asm-ia64/machvec_hpzx1.h Tue May 9 14:50:17 2006
@@ -20,7 +20,7 @@
*/
#define platform_name "hpzx1"
#define platform_setup dig_setup
-#define platform_dma_init machvec_noop
+#define platform_dma_init machvec_noop_dma
#define platform_dma_alloc_coherent sba_alloc_coherent
#define platform_dma_free_coherent sba_free_coherent
#define platform_dma_map_single sba_map_single
diff -r 99976643895a include/asm-ia64/machvec_hpzx1_swiotlb.h
--- a/include/asm-ia64/machvec_hpzx1_swiotlb.h Tue May 2 23:10:14 2006
+++ b/include/asm-ia64/machvec_hpzx1_swiotlb.h Tue May 9 14:50:17 2006
@@ -25,7 +25,7 @@
#define platform_name "hpzx1_swiotlb"
#define platform_setup dig_setup
-#define platform_dma_init machvec_noop
+#define platform_dma_init machvec_noop_dma
#define platform_dma_alloc_coherent hwsw_alloc_coherent
#define platform_dma_free_coherent hwsw_free_coherent
#define platform_dma_map_single hwsw_map_single
diff -r 99976643895a include/asm-ia64/machvec_sn2.h
--- a/include/asm-ia64/machvec_sn2.h Tue May 2 23:10:14 2006
+++ b/include/asm-ia64/machvec_sn2.h Tue May 9 14:50:17 2006
@@ -103,7 +103,7 @@
#define platform_pci_get_legacy_mem sn_pci_get_legacy_mem
#define platform_pci_legacy_read sn_pci_legacy_read
#define platform_pci_legacy_write sn_pci_legacy_write
-#define platform_dma_init machvec_noop
+#define platform_dma_init machvec_noop_dma
#define platform_dma_alloc_coherent sn_dma_alloc_coherent
#define platform_dma_free_coherent sn_dma_free_coherent
#define platform_dma_map_single sn_dma_map_single
> I "fixed" it with the hack below. Please let me know if this is not
> palatable for you.
Not really. The only use of platform_dma_init() that I can see is in
arch/ia64/mm/init.c:mem_init() ...
platform_dma_init();
so "void" looks to be the right return value. Why did it get changed to
be "int" (here's where I admit that I've only looked superficially at your
patch).
-Tony
On Tue, May 09, 2006 at 02:04:08PM -0700, Luck, Tony wrote:
> > I "fixed" it with the hack below. Please let me know if this is not
> > palatable for you.
>
> Not really. The only use of platform_dma_init() that I can see is in
> arch/ia64/mm/init.c:mem_init() ...
>
> platform_dma_init();
>
> so "void" looks to be the right return value. Why did it get changed to
> be "int" (here's where I admit that I've only looked superficially at your
> patch).
Ah, then I better describe it. The patch makes it possible to recover
from an insufficient amount of bootmem during swiotlb_init (instead of
panicing). For x86_64, I have it bailing out (via the returned int from
swiotlb_init and using the non-iommu DMA routines from
arch/x86_64/kernel/pci-nommu.c). For ia64, its not that simple.
There are no alternative DMA routines to switch to incase of an error.
Also, There is no way to "bail-out" from its mem_init. I could add a
panic there, if that is more palatable.
Thanks,
Jon
>
> -Tony
> Ah, then I better describe it. The patch makes it possible to recover
> from an insufficient amount of bootmem during swiotlb_init (instead of
> panicing). For x86_64, I have it bailing out (via the returned int from
> swiotlb_init and using the non-iommu DMA routines from
> arch/x86_64/kernel/pci-nommu.c). For ia64, its not that simple.
> There are no alternative DMA routines to switch to incase of an error.
> Also, There is no way to "bail-out" from its mem_init. I could add a
> panic there, if that is more palatable.
Presumably if you have insufficient memory to allocate the swiotlb
buffers, then you actually don't need *any* swiotlb buffers at all
as all your memory is at low enough addresses to be directly accessed
by whatever devices you have (offer void on bizarre discontig systems
that put the only memory they have in an address range that can't be
used by the devices that need to access it ... but perhaps in that
case it would be better to buy a different computer :-)
But ignoring that ... a new "noop" routine that returns an int
does seem to be the right solution. Looking at the others that
are already there, calling it machvec_noop_dma() isn't a bad fit
with the style of the other "_noop" functions that are already
there.
-Tony