2016-04-18 10:56:36

by Yongji Xie

[permalink] [raw]
Subject: [RFC v6 01/10] PCI: Ignore resource_alignment if PCI_PROBE_ONLY was set

The resource_alignment will releases memory resources allocated
by firmware so that kernel can reassign new resources later on.
But this will cause the problem that no resources can be
allocated by kernel if PCI_PROBE_ONLY was set, e.g. on pSeries
platform because PCI_PROBE_ONLY force kernel to use firmware
setup and not to reassign any resources.

To solve this problem, this patch ignores resource_alignment
if PCI_PROBE_ONLY was set.

Signed-off-by: Yongji Xie <[email protected]>
---
drivers/pci/pci.c | 6 ++++++
1 file changed, 6 insertions(+)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 602eb42..77b7494 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4607,6 +4607,12 @@ static resource_size_t pci_specified_resource_alignment(struct pci_dev *dev)
spin_lock(&resource_alignment_lock);
p = resource_alignment_param;
while (*p) {
+ if (pci_has_flag(PCI_PROBE_ONLY)) {
+ printk(KERN_ERR "PCI: Ignore resource_alignment parameter: %s with PCI_PROBE_ONLY set\n",
+ p);
+ *p = 0;
+ break;
+ }
count = 0;
if (sscanf(p, "%d%n", &align_order, &count) == 1 &&
p[count] == '@') {
--
1.7.9.5


2016-04-18 10:57:10

by Yongji Xie

[permalink] [raw]
Subject: [RFC v6 04/10] PCI: Add support for enforcing all MMIO BARs to be page aligned

When vfio passthrough a PCI device of which MMIO BARs are
smaller than PAGE_SIZE, guest will not handle the mmio
accesses to the BARs which leads to mmio emulations in host.

This is because vfio will not allow to passthrough one BAR's
mmio page which may be shared with other BARs. Otherwise,
there will be a backdoor that guest can use to access BARs
of other guest.

To solve this issue, this patch modifies resource_alignment
to support syntax where multiple devices get the same
alignment. So we can use something like
"pci=resource_alignment=*:*:*.*:noresize" to enforce the
alignment of all MMIO BARs to be at least PAGE_SIZE so that
one BAR's mmio page would not be shared with other BARs.

And we also define a macro PCIBIOS_MIN_ALIGNMENT to enable this
automatically on PPC64 platform which can easily hit this issue
because its PAGE_SIZE is 64KB.

Signed-off-by: Yongji Xie <[email protected]>
---
Documentation/kernel-parameters.txt | 2 ++
arch/powerpc/include/asm/pci.h | 2 ++
drivers/pci/pci.c | 64 +++++++++++++++++++++++++++++------
3 files changed, 57 insertions(+), 11 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index d8b29ab..542be4a 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2918,6 +2918,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
aligned memory resources.
If <order of align> is not specified,
PAGE_SIZE is used as alignment.
+ <domain>, <bus>, <slot> and <func> can be set to
+ "*" which means match all values.
PCI-PCI bridge can be specified, if resource
windows need to be expanded.
noresize: Don't change the resources' sizes when
diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
index 6f8065a..78f230f 100644
--- a/arch/powerpc/include/asm/pci.h
+++ b/arch/powerpc/include/asm/pci.h
@@ -30,6 +30,8 @@
#define PCIBIOS_MIN_IO 0x1000
#define PCIBIOS_MIN_MEM 0x10000000

+#define PCIBIOS_MIN_ALIGNMENT PAGE_SIZE
+
struct pci_dev;

/* Values for the `which' argument to sys_pciconfig_iobase syscall. */
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 7564ccc..0381c28 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4605,7 +4605,12 @@ static resource_size_t pci_specified_resource_alignment(struct pci_dev *dev,
int seg, bus, slot, func, align_order, count;
resource_size_t align = 0;
char *p;
+ bool invalid = false;

+#ifdef PCIBIOS_MIN_ALIGNMENT
+ align = PCIBIOS_MIN_ALIGNMENT;
+ *resize = false;
+#endif
spin_lock(&resource_alignment_lock);
p = resource_alignment_param;
while (*p) {
@@ -4622,16 +4627,49 @@ static resource_size_t pci_specified_resource_alignment(struct pci_dev *dev,
} else {
align_order = -1;
}
- if (sscanf(p, "%x:%x:%x.%x%n",
- &seg, &bus, &slot, &func, &count) != 4) {
+ if (p[0] == '*' && p[1] == ':') {
+ seg = -1;
+ count = 1;
+ } else if (sscanf(p, "%x%n", &seg, &count) != 1 ||
+ p[count] != ':') {
+ invalid = true;
+ break;
+ }
+ p += count + 1;
+ if (*p == '*') {
+ bus = -1;
+ count = 1;
+ } else if (sscanf(p, "%x%n", &bus, &count) != 1) {
+ invalid = true;
+ break;
+ }
+ p += count;
+ if (*p == '.') {
+ slot = bus;
+ bus = seg;
seg = 0;
- if (sscanf(p, "%x:%x.%x%n",
- &bus, &slot, &func, &count) != 3) {
- /* Invalid format */
- printk(KERN_ERR "PCI: Can't parse resource_alignment parameter: %s\n",
- p);
+ p++;
+ } else if (*p == ':') {
+ p++;
+ if (p[0] == '*' && p[1] == '.') {
+ slot = -1;
+ count = 1;
+ } else if (sscanf(p, "%x%n", &slot, &count) != 1 ||
+ p[count] != '.') {
+ invalid = true;
break;
}
+ p += count + 1;
+ } else {
+ invalid = true;
+ break;
+ }
+ if (*p == '*') {
+ func = -1;
+ count = 1;
+ } else if (sscanf(p, "%x%n", &func, &count) != 1) {
+ invalid = true;
+ break;
}
p += count;
if (!strncmp(p, ":noresize", 9)) {
@@ -4639,10 +4677,10 @@ static resource_size_t pci_specified_resource_alignment(struct pci_dev *dev,
p += 9;
} else
*resize = true;
- if (seg == pci_domain_nr(dev->bus) &&
- bus == dev->bus->number &&
- slot == PCI_SLOT(dev->devfn) &&
- func == PCI_FUNC(dev->devfn)) {
+ if ((seg == pci_domain_nr(dev->bus) || seg == -1) &&
+ (bus == dev->bus->number || bus == -1) &&
+ (slot == PCI_SLOT(dev->devfn) || slot == -1) &&
+ (func == PCI_FUNC(dev->devfn) || func == -1)) {
if (align_order == -1)
align = PAGE_SIZE;
else
@@ -4652,10 +4690,14 @@ static resource_size_t pci_specified_resource_alignment(struct pci_dev *dev,
}
if (*p != ';' && *p != ',') {
/* End of param or invalid format */
+ invalid = true;
break;
}
p++;
}
+ if (invalid)
+ printk(KERN_ERR "PCI: Can't parse resource_alignment parameter:%s\n",
+ p);
spin_unlock(&resource_alignment_lock);
return align;
}
--
1.7.9.5

2016-04-18 10:57:41

by Yongji Xie

[permalink] [raw]
Subject: [RFC v6 02/10] PCI: Do not Use IORESOURCE_STARTALIGN to identify bridge resources

Now we use the IORESOURCE_STARTALIGN to identify bridge resources
in __assign_resources_sorted(). That's quite fragile. We can't
make sure that the PCI devices' resources will not use
IORESOURCE_STARTALIGN any more.

In this patch, we try to use a more robust way to identify
bridge resources.

Signed-off-by: Yongji Xie <[email protected]>
---
drivers/pci/setup-bus.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index 7796d0a..45fc484 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -391,6 +391,7 @@ static void __assign_resources_sorted(struct list_head *head,
struct pci_dev_resource *dev_res, *tmp_res, *dev_res2;
unsigned long fail_type;
resource_size_t add_align, align;
+ int index;

/* Check if optional add_size is there */
if (!realloc_head || list_empty(realloc_head))
@@ -411,11 +412,13 @@ static void __assign_resources_sorted(struct list_head *head,

/*
* There are two kinds of additional resources in the list:
- * 1. bridge resource -- IORESOURCE_STARTALIGN
- * 2. SR-IOV resource -- IORESOURCE_SIZEALIGN
+ * 1. bridge resource
+ * 2. SR-IOV resource
* Here just fix the additional alignment for bridge
*/
- if (!(dev_res->res->flags & IORESOURCE_STARTALIGN))
+ index = dev_res->res - dev_res->dev->resource;
+ if (index < PCI_BRIDGE_RESOURCES ||
+ index > PCI_BRIDGE_RESOURCE_END)
continue;

add_align = get_res_add_align(realloc_head, dev_res->res);
--
1.7.9.5

2016-04-18 10:58:14

by Yongji Xie

[permalink] [raw]
Subject: [RFC v6 03/10] PCI: Add a new option for resource_alignment to reassign alignment

When using resource_alignment kernel parameter, the current
implement reassigns the alignment by changing resources' size
which can potentially break some drivers. For example, the driver
uses the size to locate some register whose length is related
to the size.

This patch adds a new option "noresize" for the parameter to
solve this problem.

Signed-off-by: Yongji Xie <[email protected]>
---
Documentation/kernel-parameters.txt | 5 ++++-
drivers/pci/pci.c | 35 +++++++++++++++++++++++++----------
2 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 9a53c92..d8b29ab 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2912,13 +2912,16 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
window. The default value is 64 megabytes.
resource_alignment=
Format:
- [<order of align>@][<domain>:]<bus>:<slot>.<func>[; ...]
+ [<order of align>@][<domain>:]<bus>:<slot>.<func>
+ [:noresize][; ...]
Specifies alignment and device to reassign
aligned memory resources.
If <order of align> is not specified,
PAGE_SIZE is used as alignment.
PCI-PCI bridge can be specified, if resource
windows need to be expanded.
+ noresize: Don't change the resources' sizes when
+ reassigning alignment.
ecrc= Enable/disable PCIe ECRC (transaction layer
end-to-end CRC checking).
bios: Use BIOS/firmware settings. This is the
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 77b7494..7564ccc 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4594,11 +4594,13 @@ static DEFINE_SPINLOCK(resource_alignment_lock);
/**
* pci_specified_resource_alignment - get resource alignment specified by user.
* @dev: the PCI device to get
+ * @resize: whether or not to change resources' size when reassigning alignment
*
* RETURNS: Resource alignment if it is specified.
* Zero if it is not specified.
*/
-static resource_size_t pci_specified_resource_alignment(struct pci_dev *dev)
+static resource_size_t pci_specified_resource_alignment(struct pci_dev *dev,
+ bool *resize)
{
int seg, bus, slot, func, align_order, count;
resource_size_t align = 0;
@@ -4632,6 +4634,11 @@ static resource_size_t pci_specified_resource_alignment(struct pci_dev *dev)
}
}
p += count;
+ if (!strncmp(p, ":noresize", 9)) {
+ *resize = false;
+ p += 9;
+ } else
+ *resize = true;
if (seg == pci_domain_nr(dev->bus) &&
bus == dev->bus->number &&
slot == PCI_SLOT(dev->devfn) &&
@@ -4664,11 +4671,12 @@ void pci_reassigndev_resource_alignment(struct pci_dev *dev)
{
int i;
struct resource *r;
+ bool resize = true;
resource_size_t align, size;
u16 command;

/* check if specified PCI is target device to reassign */
- align = pci_specified_resource_alignment(dev);
+ align = pci_specified_resource_alignment(dev, &resize);
if (!align)
return;

@@ -4690,15 +4698,22 @@ void pci_reassigndev_resource_alignment(struct pci_dev *dev)
if (!(r->flags & IORESOURCE_MEM))
continue;
size = resource_size(r);
- if (size < align) {
- size = align;
- dev_info(&dev->dev,
- "Rounding up size of resource #%d to %#llx.\n",
- i, (unsigned long long)size);
+ if (resize) {
+ if (size < align) {
+ size = align;
+ dev_info(&dev->dev,
+ "Rounding up size of resource #%d to %#llx.\n",
+ i, (unsigned long long)size);
+ }
+ r->flags |= IORESOURCE_UNSET;
+ r->end = size - 1;
+ r->start = 0;
+ } else {
+ r->flags &= ~IORESOURCE_SIZEALIGN;
+ r->flags |= IORESOURCE_STARTALIGN | IORESOURCE_UNSET;
+ r->start = max(align, size);
+ r->end = r->start + size - 1;
}
- r->flags |= IORESOURCE_UNSET;
- r->end = size - 1;
- r->start = 0;
}
/* Need to disable bridge's resource window,
* to enable the kernel to reassign new resource
--
1.7.9.5

2016-04-18 10:58:25

by Yongji Xie

[permalink] [raw]
Subject: [RFC v6 05/10] vfio-pci: Allow to mmap sub-page MMIO BARs if the mmio page is exclusive

Current vfio-pci implementation disallows to mmap
sub-page(size < PAGE_SIZE) MMIO BARs because these BARs' mmio
page may be shared with other BARs.

But we should allow to mmap these sub-page MMIO BARs if we can
make sure these BARs' mmio page will not be shared with other BARs.

To acheive that, we firstly need to enforce all PCI MMIO BARs
to be page aligned like the commit "PCI: Add support for
enforcing all MMIO BARs to be page aligned" does. Most of PCI
BARs will be assigned into an exclusive page with a hole. Then,
we must make sure that hot-add device's BAR will never be assigned
into the hole. So we add shadow resources and put them into the
hole in this patch. With these two guarantees, I think it should
be safe to mmap sub-page BAR.

Signed-off-by: Yongji Xie <[email protected]>
---
drivers/vfio/pci/vfio_pci.c | 58 ++++++++++++++++++++++++++++++-----
drivers/vfio/pci/vfio_pci_private.h | 8 +++++
2 files changed, 59 insertions(+), 7 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 98059df..dc1779c 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -110,13 +110,47 @@ static inline bool vfio_pci_is_vga(struct pci_dev *pdev)
return (pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA;
}

+static bool vfio_pci_bar_mmap_supported(struct vfio_pci_device *vdev, int index)
+{
+ struct resource *res = vdev->pdev->resource + index;
+ struct vfio_pci_shadow_resource *shadow_res;
+
+ if (IS_ENABLED(CONFIG_VFIO_PCI_MMAP) && res->flags & IORESOURCE_MEM &&
+ resource_size(res) > 0) {
+ if (resource_size(res) >= PAGE_SIZE)
+ return true;
+
+ if (!(res->start & ~PAGE_MASK)) {
+ /*
+ * Add shadow resource for sub-page bar whose mmio
+ * page is exclusive in case that hot-add device's
+ * bar is assigned into the mem hole.
+ */
+ shadow_res = kzalloc(sizeof(*shadow_res), GFP_KERNEL);
+ shadow_res->resource.start = res->end + 1;
+ shadow_res->resource.end = res->start + PAGE_SIZE - 1;
+ shadow_res->resource.flags = res->flags;
+ if (request_resource(res->parent,
+ &shadow_res->resource)) {
+ kfree(shadow_res);
+ return false;
+ }
+ shadow_res->index = index;
+ list_add(&shadow_res->res_next,
+ &vdev->shadow_resources_list);
+ return true;
+ }
+ }
+ return false;
+}
+
static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev);
static void vfio_pci_disable(struct vfio_pci_device *vdev);

static int vfio_pci_enable(struct vfio_pci_device *vdev)
{
struct pci_dev *pdev = vdev->pdev;
- int ret;
+ int ret, bar;
u16 cmd;
u8 msix_pos;

@@ -183,12 +217,17 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev)
}
}

+ for (bar = PCI_STD_RESOURCES; bar <= PCI_STD_RESOURCE_END; bar++) {
+ vdev->bar_mmap_supported[bar] =
+ vfio_pci_bar_mmap_supported(vdev, bar);
+ }
return 0;
}

static void vfio_pci_disable(struct vfio_pci_device *vdev)
{
struct pci_dev *pdev = vdev->pdev;
+ struct vfio_pci_shadow_resource *shadow_res, *tmp;
int i, bar;

/* Stop the device from further DMA */
@@ -217,6 +256,13 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev)
vdev->barmap[bar] = NULL;
}

+ list_for_each_entry_safe(shadow_res, tmp,
+ &vdev->shadow_resources_list, res_next) {
+ list_del(&shadow_res->res_next);
+ release_resource(&shadow_res->resource);
+ kfree(shadow_res);
+ }
+
vdev->needs_reset = true;

/*
@@ -587,9 +633,7 @@ static long vfio_pci_ioctl(void *device_data,

info.flags = VFIO_REGION_INFO_FLAG_READ |
VFIO_REGION_INFO_FLAG_WRITE;
- if (IS_ENABLED(CONFIG_VFIO_PCI_MMAP) &&
- pci_resource_flags(pdev, info.index) &
- IORESOURCE_MEM && info.size >= PAGE_SIZE) {
+ if (vdev->bar_mmap_supported[info.index]) {
info.flags |= VFIO_REGION_INFO_FLAG_MMAP;
if (info.index == vdev->msix_bar) {
ret = msix_sparse_mmap_cap(vdev, &caps);
@@ -1011,16 +1055,16 @@ static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma)
return -EINVAL;
if (index >= VFIO_PCI_ROM_REGION_INDEX)
return -EINVAL;
- if (!(pci_resource_flags(pdev, index) & IORESOURCE_MEM))
+ if (!vdev->bar_mmap_supported[index])
return -EINVAL;

- phys_len = pci_resource_len(pdev, index);
+ phys_len = PAGE_ALIGN(pci_resource_len(pdev, index));
req_len = vma->vm_end - vma->vm_start;
pgoff = vma->vm_pgoff &
((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
req_start = pgoff << PAGE_SHIFT;

- if (phys_len < PAGE_SIZE || req_start + req_len > phys_len)
+ if (req_start + req_len > phys_len)
return -EINVAL;

if (index == vdev->msix_bar) {
diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h
index 8a7d546..0ea4c62 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -57,9 +57,16 @@ struct vfio_pci_region {
u32 flags;
};

+struct vfio_pci_shadow_resource {
+ struct resource resource;
+ int index;
+ struct list_head res_next;
+};
+
struct vfio_pci_device {
struct pci_dev *pdev;
void __iomem *barmap[PCI_STD_RESOURCE_END + 1];
+ bool bar_mmap_supported[PCI_STD_RESOURCE_END + 1];
u8 *pci_config_map;
u8 *vconfig;
struct perm_bits *msi_perm;
@@ -87,6 +94,7 @@ struct vfio_pci_device {
int refcnt;
struct eventfd_ctx *err_trigger;
struct eventfd_ctx *req_trigger;
+ struct list_head shadow_resources_list;
};

#define is_intx(vdev) (vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX)
--
1.7.9.5

2016-04-26 05:41:25

by Alexey Kardashevskiy

[permalink] [raw]
Subject: Re: [RFC v6 04/10] PCI: Add support for enforcing all MMIO BARs to be page aligned

On 04/18/2016 08:56 PM, Yongji Xie wrote:
> When vfio passthrough a PCI device of which MMIO BARs are
> smaller than PAGE_SIZE, guest will not handle the mmio
> accesses to the BARs which leads to mmio emulations in host.
>
> This is because vfio will not allow to passthrough one BAR's
> mmio page which may be shared with other BARs. Otherwise,
> there will be a backdoor that guest can use to access BARs
> of other guest.
>
> To solve this issue, this patch modifies resource_alignment
> to support syntax where multiple devices get the same
> alignment. So we can use something like
> "pci=resource_alignment=*:*:*.*:noresize" to enforce the
> alignment of all MMIO BARs to be at least PAGE_SIZE so that
> one BAR's mmio page would not be shared with other BARs.
>
> And we also define a macro PCIBIOS_MIN_ALIGNMENT to enable this
> automatically on PPC64 platform which can easily hit this issue
> because its PAGE_SIZE is 64KB.
>
> Signed-off-by: Yongji Xie <[email protected]>
> ---
> Documentation/kernel-parameters.txt | 2 ++
> arch/powerpc/include/asm/pci.h | 2 ++
> drivers/pci/pci.c | 64 +++++++++++++++++++++++++++++------
> 3 files changed, 57 insertions(+), 11 deletions(-)
>
> diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
> index d8b29ab..542be4a 100644
> --- a/Documentation/kernel-parameters.txt
> +++ b/Documentation/kernel-parameters.txt
> @@ -2918,6 +2918,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
> aligned memory resources.
> If <order of align> is not specified,
> PAGE_SIZE is used as alignment.
> + <domain>, <bus>, <slot> and <func> can be set to
> + "*" which means match all values.
> PCI-PCI bridge can be specified, if resource
> windows need to be expanded.
> noresize: Don't change the resources' sizes when
> diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
> index 6f8065a..78f230f 100644
> --- a/arch/powerpc/include/asm/pci.h
> +++ b/arch/powerpc/include/asm/pci.h
> @@ -30,6 +30,8 @@
> #define PCIBIOS_MIN_IO 0x1000
> #define PCIBIOS_MIN_MEM 0x10000000
>
> +#define PCIBIOS_MIN_ALIGNMENT PAGE_SIZE
> +
> struct pci_dev;
>
> /* Values for the `which' argument to sys_pciconfig_iobase syscall. */
> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> index 7564ccc..0381c28 100644
> --- a/drivers/pci/pci.c
> +++ b/drivers/pci/pci.c
> @@ -4605,7 +4605,12 @@ static resource_size_t pci_specified_resource_alignment(struct pci_dev *dev,
> int seg, bus, slot, func, align_order, count;
> resource_size_t align = 0;
> char *p;
> + bool invalid = false;
>
> +#ifdef PCIBIOS_MIN_ALIGNMENT
> + align = PCIBIOS_MIN_ALIGNMENT;
> + *resize = false;
> +#endif
> spin_lock(&resource_alignment_lock);
> p = resource_alignment_param;
> while (*p) {
> @@ -4622,16 +4627,49 @@ static resource_size_t pci_specified_resource_alignment(struct pci_dev *dev,
> } else {
> align_order = -1;
> }
> - if (sscanf(p, "%x:%x:%x.%x%n",
> - &seg, &bus, &slot, &func, &count) != 4) {


I'd replace the above lines with:

char segstr[5] = "*", busstr[3] = "*";
char slotstr[3] = "*", funstr[2] = "*";

if (sscanf(p, "%4[^:]:%2[^:]:%2[^.].%1s%n",
&segstr, &busstr, &slotstr, &funcstr, &count) != 4) {


and add some wrapper like:

static bool glob_match_hex(char const *pat, int val)
{
char valstr[5]; /* 5 should be enough for PCI */
snprintf(valstr, sizeof(valstr) - 1, "%4x", val);
return glob_match(pat, valstr);
}

and then use glob_match_hex() (or make a wrapper like above on top of
fnmatch()), this would enable better mask handling.

If anyone finds this useful (which I am not sure about).




> + if (p[0] == '*' && p[1] == ':') {
> + seg = -1;
> + count = 1;
> + } else if (sscanf(p, "%x%n", &seg, &count) != 1 ||
> + p[count] != ':') {
> + invalid = true;
> + break;
> + }
> + p += count + 1;
> + if (*p == '*') {
> + bus = -1;
> + count = 1;
> + } else if (sscanf(p, "%x%n", &bus, &count) != 1) {
> + invalid = true;
> + break;
> + }
> + p += count;
> + if (*p == '.') {
> + slot = bus;
> + bus = seg;
> seg = 0;
> - if (sscanf(p, "%x:%x.%x%n",
> - &bus, &slot, &func, &count) != 3) {
> - /* Invalid format */
> - printk(KERN_ERR "PCI: Can't parse resource_alignment parameter: %s\n",
> - p);
> + p++;
> + } else if (*p == ':') {
> + p++;
> + if (p[0] == '*' && p[1] == '.') {
> + slot = -1;
> + count = 1;
> + } else if (sscanf(p, "%x%n", &slot, &count) != 1 ||
> + p[count] != '.') {
> + invalid = true;
> break;
> }
> + p += count + 1;
> + } else {
> + invalid = true;
> + break;
> + }
> + if (*p == '*') {
> + func = -1;
> + count = 1;
> + } else if (sscanf(p, "%x%n", &func, &count) != 1) {
> + invalid = true;
> + break;
> }
> p += count;
> if (!strncmp(p, ":noresize", 9)) {
> @@ -4639,10 +4677,10 @@ static resource_size_t pci_specified_resource_alignment(struct pci_dev *dev,
> p += 9;
> } else
> *resize = true;
> - if (seg == pci_domain_nr(dev->bus) &&
> - bus == dev->bus->number &&
> - slot == PCI_SLOT(dev->devfn) &&
> - func == PCI_FUNC(dev->devfn)) {
> + if ((seg == pci_domain_nr(dev->bus) || seg == -1) &&
> + (bus == dev->bus->number || bus == -1) &&
> + (slot == PCI_SLOT(dev->devfn) || slot == -1) &&
> + (func == PCI_FUNC(dev->devfn) || func == -1)) {
> if (align_order == -1)
> align = PAGE_SIZE;
> else
> @@ -4652,10 +4690,14 @@ static resource_size_t pci_specified_resource_alignment(struct pci_dev *dev,
> }
> if (*p != ';' && *p != ',') {
> /* End of param or invalid format */
> + invalid = true;
> break;
> }
> p++;
> }
> + if (invalid)
> + printk(KERN_ERR "PCI: Can't parse resource_alignment parameter:%s\n",
> + p);
> spin_unlock(&resource_alignment_lock);
> return align;
> }
>


--
Alexey

2016-04-26 06:44:43

by Yongji Xie

[permalink] [raw]
Subject: Re: [RFC v6 04/10] PCI: Add support for enforcing all MMIO BARs to be page aligned

On 2016/4/26 13:41, Alexey Kardashevskiy wrote:

> On 04/18/2016 08:56 PM, Yongji Xie wrote:
>> When vfio passthrough a PCI device of which MMIO BARs are
>> smaller than PAGE_SIZE, guest will not handle the mmio
>> accesses to the BARs which leads to mmio emulations in host.
>>
>> This is because vfio will not allow to passthrough one BAR's
>> mmio page which may be shared with other BARs. Otherwise,
>> there will be a backdoor that guest can use to access BARs
>> of other guest.
>>
>> To solve this issue, this patch modifies resource_alignment
>> to support syntax where multiple devices get the same
>> alignment. So we can use something like
>> "pci=resource_alignment=*:*:*.*:noresize" to enforce the
>> alignment of all MMIO BARs to be at least PAGE_SIZE so that
>> one BAR's mmio page would not be shared with other BARs.
>>
>> And we also define a macro PCIBIOS_MIN_ALIGNMENT to enable this
>> automatically on PPC64 platform which can easily hit this issue
>> because its PAGE_SIZE is 64KB.
>>
>> Signed-off-by: Yongji Xie <[email protected]>
>> ---
>> Documentation/kernel-parameters.txt | 2 ++
>> arch/powerpc/include/asm/pci.h | 2 ++
>> drivers/pci/pci.c | 64
>> +++++++++++++++++++++++++++++------
>> 3 files changed, 57 insertions(+), 11 deletions(-)
>>
>> diff --git a/Documentation/kernel-parameters.txt
>> b/Documentation/kernel-parameters.txt
>> index d8b29ab..542be4a 100644
>> --- a/Documentation/kernel-parameters.txt
>> +++ b/Documentation/kernel-parameters.txt
>> @@ -2918,6 +2918,8 @@ bytes respectively. Such letter suffixes can
>> also be entirely omitted.
>> aligned memory resources.
>> If <order of align> is not specified,
>> PAGE_SIZE is used as alignment.
>> + <domain>, <bus>, <slot> and <func> can be set to
>> + "*" which means match all values.
>> PCI-PCI bridge can be specified, if resource
>> windows need to be expanded.
>> noresize: Don't change the resources' sizes when
>> diff --git a/arch/powerpc/include/asm/pci.h
>> b/arch/powerpc/include/asm/pci.h
>> index 6f8065a..78f230f 100644
>> --- a/arch/powerpc/include/asm/pci.h
>> +++ b/arch/powerpc/include/asm/pci.h
>> @@ -30,6 +30,8 @@
>> #define PCIBIOS_MIN_IO 0x1000
>> #define PCIBIOS_MIN_MEM 0x10000000
>>
>> +#define PCIBIOS_MIN_ALIGNMENT PAGE_SIZE
>> +
>> struct pci_dev;
>>
>> /* Values for the `which' argument to sys_pciconfig_iobase
>> syscall. */
>> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
>> index 7564ccc..0381c28 100644
>> --- a/drivers/pci/pci.c
>> +++ b/drivers/pci/pci.c
>> @@ -4605,7 +4605,12 @@ static resource_size_t
>> pci_specified_resource_alignment(struct pci_dev *dev,
>> int seg, bus, slot, func, align_order, count;
>> resource_size_t align = 0;
>> char *p;
>> + bool invalid = false;
>>
>> +#ifdef PCIBIOS_MIN_ALIGNMENT
>> + align = PCIBIOS_MIN_ALIGNMENT;
>> + *resize = false;
>> +#endif
>> spin_lock(&resource_alignment_lock);
>> p = resource_alignment_param;
>> while (*p) {
>> @@ -4622,16 +4627,49 @@ static resource_size_t
>> pci_specified_resource_alignment(struct pci_dev *dev,
>> } else {
>> align_order = -1;
>> }
>> - if (sscanf(p, "%x:%x:%x.%x%n",
>> - &seg, &bus, &slot, &func, &count) != 4) {
>
>
> I'd replace the above lines with:
>
> char segstr[5] = "*", busstr[3] = "*";
> char slotstr[3] = "*", funstr[2] = "*";
>
> if (sscanf(p, "%4[^:]:%2[^:]:%2[^.].%1s%n",
> &segstr, &busstr, &slotstr, &funcstr, &count) != 4) {
>
>

It seems the current implement of sscanf() in kernel
is not able to support the syntax: "%4[^:]:%2[^:]:%2[^.]".

Thanks,
Yongji

> and add some wrapper like:
>
> static bool glob_match_hex(char const *pat, int val)
> {
> char valstr[5]; /* 5 should be enough for PCI */
> snprintf(valstr, sizeof(valstr) - 1, "%4x", val);
> return glob_match(pat, valstr);
> }
>
> and then use glob_match_hex() (or make a wrapper like above on top of
> fnmatch()), this would enable better mask handling.
>
> If anyone finds this useful (which I am not sure about).
>
>
>
>
>> + if (p[0] == '*' && p[1] == ':') {
>> + seg = -1;
>> + count = 1;
>> + } else if (sscanf(p, "%x%n", &seg, &count) != 1 ||
>> + p[count] != ':') {
>> + invalid = true;
>> + break;
>> + }
>> + p += count + 1;
>> + if (*p == '*') {
>> + bus = -1;
>> + count = 1;
>> + } else if (sscanf(p, "%x%n", &bus, &count) != 1) {
>> + invalid = true;
>> + break;
>> + }
>> + p += count;
>> + if (*p == '.') {
>> + slot = bus;
>> + bus = seg;
>> seg = 0;
>> - if (sscanf(p, "%x:%x.%x%n",
>> - &bus, &slot, &func, &count) != 3) {
>> - /* Invalid format */
>> - printk(KERN_ERR "PCI: Can't parse resource_alignment
>> parameter: %s\n",
>> - p);
>> + p++;
>> + } else if (*p == ':') {
>> + p++;
>> + if (p[0] == '*' && p[1] == '.') {
>> + slot = -1;
>> + count = 1;
>> + } else if (sscanf(p, "%x%n", &slot, &count) != 1 ||
>> + p[count] != '.') {
>> + invalid = true;
>> break;
>> }
>> + p += count + 1;
>> + } else {
>> + invalid = true;
>> + break;
>> + }
>> + if (*p == '*') {
>> + func = -1;
>> + count = 1;
>> + } else if (sscanf(p, "%x%n", &func, &count) != 1) {
>> + invalid = true;
>> + break;
>> }
>> p += count;
>> if (!strncmp(p, ":noresize", 9)) {
>> @@ -4639,10 +4677,10 @@ static resource_size_t
>> pci_specified_resource_alignment(struct pci_dev *dev,
>> p += 9;
>> } else
>> *resize = true;
>> - if (seg == pci_domain_nr(dev->bus) &&
>> - bus == dev->bus->number &&
>> - slot == PCI_SLOT(dev->devfn) &&
>> - func == PCI_FUNC(dev->devfn)) {
>> + if ((seg == pci_domain_nr(dev->bus) || seg == -1) &&
>> + (bus == dev->bus->number || bus == -1) &&
>> + (slot == PCI_SLOT(dev->devfn) || slot == -1) &&
>> + (func == PCI_FUNC(dev->devfn) || func == -1)) {
>> if (align_order == -1)
>> align = PAGE_SIZE;
>> else
>> @@ -4652,10 +4690,14 @@ static resource_size_t
>> pci_specified_resource_alignment(struct pci_dev *dev,
>> }
>> if (*p != ';' && *p != ',') {
>> /* End of param or invalid format */
>> + invalid = true;
>> break;
>> }
>> p++;
>> }
>> + if (invalid)
>> + printk(KERN_ERR "PCI: Can't parse resource_alignment
>> parameter:%s\n",
>> + p);
>> spin_unlock(&resource_alignment_lock);
>> return align;
>> }
>>
>
>