This is the latter half of my iommu work to make the IOMMUs respect
LLDs restrictions.
IOMMUs allocate memory areas without considering a low level driver's
segment boundary limits. So we have some workarounds: splitting sg
segments again in LLDs; reserving all I/O space spanning 4GB boundary
in IOMMUs (with assumption that all the LLDs have 4GB boundary
restrictions). The goal is killing all the workarounds.
This patchset adds new accessors for segment_boundary_mask in
device_dma_parameters structure in the same way as the first half of
my work did for max_segment_size.
Currently, I fixed only swiotlb. Next, I'll generalize swiotlb's free
area management and convert all the IOMMUs to use it. Or I'll
generalize a free area management to use bitmap that most of the
IOMMUs use and convert them to use it.
This is against 2.6.24-rc3-mm1.
The first half of my iommu work is:
http://thread.gmane.org/gmane.linux.scsi/35602
This adds PCI's accessor for segment_boundary_mask in
device_dma_parameters.
The default segment_boundary is set to 0xffffffff, same to the block
layer's default value (and the scsi mid layer uses the same value).
Signed-off-by: FUJITA Tomonori <[email protected]>
---
drivers/pci/pci.c | 8 ++++++++
drivers/pci/probe.c | 1 +
include/linux/pci.h | 2 ++
3 files changed, 11 insertions(+), 0 deletions(-)
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index de623cf..3b7e0e0 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1435,6 +1435,14 @@ int pci_set_dma_max_seg_size(struct pci_dev *dev, unsigned int size)
EXPORT_SYMBOL(pci_set_dma_max_seg_size);
#endif
+#ifndef HAVE_ARCH_PCI_SET_DMA_SEGMENT_BOUNDARY
+int pci_set_dma_seg_boundary(struct pci_dev *dev, unsigned long mask)
+{
+ return dma_set_seg_boundary(&dev->dev, mask);
+}
+EXPORT_SYMBOL(pci_set_dma_seg_boundary);
+#endif
+
/**
* pcix_get_max_mmrbc - get PCI-X maximum designed memory read byte count
* @dev: PCI device to query
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index aa343e1..2e8b539 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -987,6 +987,7 @@ void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
dev->dev.coherent_dma_mask = 0xffffffffull;
pci_set_dma_max_seg_size(dev, 65536);
+ pci_set_dma_seg_boundary(dev, 0xffffffff);
/* Fix up broken headers */
pci_fixup_device(pci_fixup_header, dev);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index d56d0b6..a05a843 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -567,6 +567,7 @@ void pci_msi_off(struct pci_dev *dev);
int pci_set_dma_mask(struct pci_dev *dev, u64 mask);
int pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask);
int pci_set_dma_max_seg_size(struct pci_dev *dev, unsigned int size);
+int pci_set_dma_seg_boundary(struct pci_dev *dev, unsigned long mask);
int pcix_get_max_mmrbc(struct pci_dev *dev);
int pcix_get_mmrbc(struct pci_dev *dev);
int pcix_set_mmrbc(struct pci_dev *dev, int mmrbc);
@@ -753,6 +754,7 @@ static inline int pci_enable_device(struct pci_dev *dev) { return -EIO; }
static inline void pci_disable_device(struct pci_dev *dev) { }
static inline int pci_set_dma_mask(struct pci_dev *dev, u64 mask) { return -EIO; }
static inline int pci_set_dma_max_seg_size(struct pci_dev *dev, unsigned int size) { return -EIO; }
+static inline int pci_set_dma_seg_boundary(struct pci_dev *dev, unsigned long mask) { return -EIO; }
static inline int pci_assign_resource(struct pci_dev *dev, int i) { return -EBUSY;}
static inline int __pci_register_driver(struct pci_driver *drv, struct module *owner) { return 0;}
static inline int pci_register_driver(struct pci_driver *drv) { return 0;}
--
1.5.3.4
This adds new accessors for segment_boundary_mask in
device_dma_parameters structure in the same way I did for
max_segment_size. So we can easily change where to place struct
device_dma_parameters in the future.
dma_get_segment boundary returns 0xffffffff if dma_parms in struct
device isn't set up properly. 0xffffffff is the default value used in
the block layer and the scsi mid layer.
Signed-off-by: FUJITA Tomonori <[email protected]>
---
include/linux/dma-mapping.h | 15 +++++++++++++++
1 files changed, 15 insertions(+), 0 deletions(-)
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 71972ca..7d157ed 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -75,6 +75,21 @@ static inline unsigned int dma_set_max_seg_size(struct device *dev,
return -EIO;
}
+static inline unsigned long dma_get_seg_boundary(struct device *dev)
+{
+ return dev->dma_parms ?
+ dev->dma_parms->segment_boundary_mask : 0xffffffff;
+}
+
+static inline int dma_set_seg_boundary(struct device *dev, unsigned long mask)
+{
+ if (dev->dma_parms) {
+ dev->dma_parms->segment_boundary_mask = mask;
+ return 0;
+ } else
+ return -EIO;
+}
+
/* flags for the coherent memory api */
#define DMA_MEMORY_MAP 0x01
#define DMA_MEMORY_IO 0x02
--
1.5.3.4
This is a one-line patch to add the following to __scsi_alloc_queue():
dma_set_seg_boundary(dev, shost->dma_boundary);
This is the simplest approach but the result looks odd,
__scsi_alloc_queue() does:
blk_queue_segment_boundary(q, shost->dma_boundary);
dma_set_seg_boundary(dev, shost->dma_boundary);
blk_queue_max_segment_size(q, dma_get_max_seg_size(dev));
I think that it would be better to set up segment boundary in the same
way as we did for the maximum segment size. That is, removing
shost->dma_boundary and LLDs call pci_set_dma_seg_boundary (or its
friends).
Then __scsi_alloc_queue() can set up both limits in the same way:
blk_queue_segment_boundary(q, dma_get_seg_boundary(dev));
blk_queue_max_segment_size(q, dma_get_max_seg_size(dev));
killing dma_boundary in scsi_host_template needs a large patch for
libata (dma_boundary is used by only libata and sym53c8xx). I'll send
a patch to do that if it is acceptable. James and Jeff?
Signed-off-by: FUJITA Tomonori <[email protected]>
---
drivers/scsi/scsi_lib.c | 1 +
1 files changed, 1 insertions(+), 0 deletions(-)
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 733176d..2a15a3b 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1767,6 +1767,7 @@ struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost,
blk_queue_max_sectors(q, shost->max_sectors);
blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost));
blk_queue_segment_boundary(q, shost->dma_boundary);
+ dma_set_seg_boundary(dev, shost->dma_boundary);
blk_queue_max_segment_size(q, dma_get_max_seg_size(dev));
--
1.5.3.4
This patch makes swiotlb not allocate a memory area spanning LLD's
segment boundary.
is_span_boundary() judges whether a memory area spans LLD's segment
boundary. If map_single finds such a area, map_single tries to find
the next available memory area.
Signed-off-by: FUJITA Tomonori <[email protected]>
---
lib/swiotlb.c | 41 +++++++++++++++++++++++++++++++++++------
1 files changed, 35 insertions(+), 6 deletions(-)
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 1a8050a..4bb5a11 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -282,6 +282,15 @@ address_needs_mapping(struct device *hwdev, dma_addr_t addr)
return (addr & ~mask) != 0;
}
+static inline unsigned int is_span_boundary(unsigned int index,
+ unsigned int nslots,
+ unsigned long offset_slots,
+ unsigned long max_slots)
+{
+ unsigned long offset = (offset_slots + index) & (max_slots - 1);
+ return offset + nslots > max_slots;
+}
+
/*
* Allocates bounce buffer and returns its kernel virtual address.
*/
@@ -292,6 +301,16 @@ map_single(struct device *hwdev, char *buffer, size_t size, int dir)
char *dma_addr;
unsigned int nslots, stride, index, wrap;
int i;
+ unsigned long start_dma_addr;
+ unsigned long mask;
+ unsigned long offset_slots;
+ unsigned long max_slots;
+
+ mask = dma_get_seg_boundary(hwdev);
+ start_dma_addr = virt_to_bus(io_tlb_start) & mask;
+
+ offset_slots = ALIGN(start_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
+ max_slots = ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
/*
* For mappings greater than a page, we limit the stride (and
@@ -311,10 +330,17 @@ map_single(struct device *hwdev, char *buffer, size_t size, int dir)
*/
spin_lock_irqsave(&io_tlb_lock, flags);
{
- wrap = index = ALIGN(io_tlb_index, stride);
-
+ index = ALIGN(io_tlb_index, stride);
if (index >= io_tlb_nslabs)
- wrap = index = 0;
+ index = 0;
+
+ while (is_span_boundary(index, nslots, offset_slots,
+ max_slots)) {
+ index += stride;
+ if (index >= io_tlb_nslabs)
+ index = 0;
+ }
+ wrap = index;
do {
/*
@@ -341,9 +367,12 @@ map_single(struct device *hwdev, char *buffer, size_t size, int dir)
goto found;
}
- index += stride;
- if (index >= io_tlb_nslabs)
- index = 0;
+ do {
+ index += stride;
+ if (index >= io_tlb_nslabs)
+ index = 0;
+ } while (is_span_boundary(index, nslots, offset_slots,
+ max_slots));
} while (index != wrap);
spin_unlock_irqrestore(&io_tlb_lock, flags);
--
1.5.3.4