For regular BAR, drivers can use pci_resize_resource to resize it to the desired
size provided that it is supported by the hardware, which the driver can query
using pci_rebar_get_possible_sizes.
This series expands the API to work with IOV BAR as well.
Thanks!
-Michał
Michał Winiarski (2):
PCI: Add support for VF Resizable Bar extended cap
PCI: Don't fail BAR resize if nothing is reassigned
drivers/pci/pci.c | 25 ++++++++++++++--
drivers/pci/setup-res.c | 55 +++++++++++++++++++++++++++++++----
include/uapi/linux/pci_regs.h | 1 +
3 files changed, 73 insertions(+), 8 deletions(-)
--
2.34.0
Similar to regular resizable BAR, VF BAR can also be resized.
The structures are very similar, which means we can reuse most of the
implementation. See PCIe r4.0, sec 9.3.7.4.
Signed-off-by: Michał Winiarski <[email protected]>
---
drivers/pci/pci.c | 25 +++++++++++++++--
drivers/pci/setup-res.c | 53 ++++++++++++++++++++++++++++++++---
include/uapi/linux/pci_regs.h | 1 +
3 files changed, 72 insertions(+), 7 deletions(-)
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 3d2fb394986a4..89448c5104e46 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1682,12 +1682,15 @@ static void pci_restore_config_space(struct pci_dev *pdev)
}
}
-static void pci_restore_rebar_state(struct pci_dev *pdev)
+static void __pci_restore_rebar_state(struct pci_dev *pdev, int cap)
{
unsigned int pos, nbars, i;
u32 ctrl;
- pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_REBAR);
+ if (WARN_ON(cap != PCI_EXT_CAP_ID_REBAR && cap != PCI_EXT_CAP_ID_VF_REBAR))
+ return;
+
+ pos = pci_find_ext_capability(pdev, cap);
if (!pos)
return;
@@ -1709,6 +1712,14 @@ static void pci_restore_rebar_state(struct pci_dev *pdev)
}
}
+static void pci_restore_rebar_state(struct pci_dev *pdev)
+{
+ __pci_restore_rebar_state(pdev, PCI_EXT_CAP_ID_REBAR);
+#ifdef CONFIG_PCI_IOV
+ __pci_restore_rebar_state(pdev, PCI_EXT_CAP_ID_VF_REBAR);
+#endif
+}
+
/**
* pci_restore_state - Restore the saved state of a PCI device
* @dev: PCI device that we're dealing with
@@ -3639,10 +3650,18 @@ void pci_acs_init(struct pci_dev *dev)
*/
static int pci_rebar_find_pos(struct pci_dev *pdev, int bar)
{
+ int cap = PCI_EXT_CAP_ID_REBAR;
unsigned int pos, nbars, i;
u32 ctrl;
- pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_REBAR);
+#ifdef CONFIG_PCI_IOV
+ if (bar >= PCI_IOV_RESOURCES) {
+ cap = PCI_EXT_CAP_ID_VF_REBAR;
+ bar -= PCI_IOV_RESOURCES;
+ }
+#endif
+
+ pos = pci_find_ext_capability(pdev, cap);
if (!pos)
return -ENOTSUPP;
diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c
index 7f1acb3918d0c..1946e52e7678a 100644
--- a/drivers/pci/setup-res.c
+++ b/drivers/pci/setup-res.c
@@ -407,13 +407,36 @@ void pci_release_resource(struct pci_dev *dev, int resno)
}
EXPORT_SYMBOL(pci_release_resource);
+static int pci_memory_decoding(struct pci_dev *dev)
+{
+ u16 cmd;
+
+ pci_read_config_word(dev, PCI_COMMAND, &cmd);
+ if (cmd & PCI_COMMAND_MEMORY)
+ return -EBUSY;
+
+ return 0;
+}
+
+#ifdef CONFIG_PCI_IOV
+static int pci_vf_memory_decoding(struct pci_dev *dev)
+{
+ u16 cmd;
+
+ pci_read_config_word(dev, dev->sriov->pos + PCI_SRIOV_CTRL, &cmd);
+ if (cmd & PCI_SRIOV_CTRL_MSE)
+ return -EBUSY;
+
+ return 0;
+}
+#endif
+
int pci_resize_resource(struct pci_dev *dev, int resno, int size)
{
struct resource *res = dev->resource + resno;
struct pci_host_bridge *host;
int old, ret;
u32 sizes;
- u16 cmd;
/* Check if we must preserve the firmware's resource assignment */
host = pci_find_host_bridge(dev->bus);
@@ -424,9 +447,14 @@ int pci_resize_resource(struct pci_dev *dev, int resno, int size)
if (!(res->flags & IORESOURCE_UNSET))
return -EBUSY;
- pci_read_config_word(dev, PCI_COMMAND, &cmd);
- if (cmd & PCI_COMMAND_MEMORY)
- return -EBUSY;
+#ifdef CONFIG_PCI_IOV
+ if (resno >= PCI_IOV_RESOURCES)
+ ret = pci_vf_memory_decoding(dev);
+ else
+#endif
+ ret = pci_memory_decoding(dev);
+ if (ret)
+ return ret;
sizes = pci_rebar_get_possible_sizes(dev, resno);
if (!sizes)
@@ -445,6 +473,14 @@ int pci_resize_resource(struct pci_dev *dev, int resno, int size)
res->end = res->start + pci_rebar_size_to_bytes(size) - 1;
+#ifdef CONFIG_PCI_IOV
+ if (resno >= PCI_IOV_RESOURCES) {
+ dev->sriov->barsz[resno - PCI_IOV_RESOURCES] = pci_rebar_size_to_bytes(size);
+ res->end = res->start +
+ resource_size(res) * pci_sriov_get_totalvfs(dev) - 1;
+ }
+#endif
+
/* Check if the new config works by trying to assign everything. */
if (dev->bus->self) {
ret = pci_reassign_bridge_resources(dev->bus->self, res->flags);
@@ -456,6 +492,15 @@ int pci_resize_resource(struct pci_dev *dev, int resno, int size)
error_resize:
pci_rebar_set_size(dev, resno, old);
res->end = res->start + pci_rebar_size_to_bytes(old) - 1;
+
+#ifdef CONFIG_PCI_IOV
+ if (resno >= PCI_IOV_RESOURCES) {
+ dev->sriov->barsz[resno - PCI_IOV_RESOURCES] = pci_rebar_size_to_bytes(old);
+ res->end = res->start +
+ pci_rebar_size_to_bytes(old) * pci_sriov_get_totalvfs(dev) - 1;
+ }
+#endif
+
return ret;
}
EXPORT_SYMBOL(pci_resize_resource);
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index ff6ccbc6efe96..7f5726d23b038 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -734,6 +734,7 @@
#define PCI_EXT_CAP_ID_L1SS 0x1E /* L1 PM Substates */
#define PCI_EXT_CAP_ID_PTM 0x1F /* Precision Time Measurement */
#define PCI_EXT_CAP_ID_DVSEC 0x23 /* Designated Vendor-Specific */
+#define PCI_EXT_CAP_ID_VF_REBAR 0x24 /* VF Resizable BAR */
#define PCI_EXT_CAP_ID_DLF 0x25 /* Data Link Feature */
#define PCI_EXT_CAP_ID_PL_16GT 0x26 /* Physical Layer 16.0 GT/s */
#define PCI_EXT_CAP_ID_MAX PCI_EXT_CAP_ID_PL_16GT
--
2.34.0
When pci_reassign_bridge_resources returns -ENOENT, it means that no
resources needed to be "moved". This can happen when the resource was
resized to be smaller, and it's completely fine - there's no need to treat
this as an error and go back to the original BAR size.
Signed-off-by: Michał Winiarski <[email protected]>
---
drivers/pci/setup-res.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c
index 1946e52e7678a..5de5129055e0a 100644
--- a/drivers/pci/setup-res.c
+++ b/drivers/pci/setup-res.c
@@ -484,7 +484,7 @@ int pci_resize_resource(struct pci_dev *dev, int resno, int size)
/* Check if the new config works by trying to assign everything. */
if (dev->bus->self) {
ret = pci_reassign_bridge_resources(dev->bus->self, res->flags);
- if (ret)
+ if (ret && ret != -ENOENT)
goto error_resize;
}
return 0;
--
2.34.0
Hi Michał,
[...]
> +static int pci_memory_decoding(struct pci_dev *dev)
> +{
> + u16 cmd;
> +
> + pci_read_config_word(dev, PCI_COMMAND, &cmd);
> + if (cmd & PCI_COMMAND_MEMORY)
> + return -EBUSY;
> +
> + return 0;
> +}
> +
> +#ifdef CONFIG_PCI_IOV
> +static int pci_vf_memory_decoding(struct pci_dev *dev)
> +{
> + u16 cmd;
> +
> + pci_read_config_word(dev, dev->sriov->pos + PCI_SRIOV_CTRL, &cmd);
> + if (cmd & PCI_SRIOV_CTRL_MSE)
> + return -EBUSY;
> +
> + return 0;
> +}
> +#endif
> +
> int pci_resize_resource(struct pci_dev *dev, int resno, int size)
> {
> struct resource *res = dev->resource + resno;
> struct pci_host_bridge *host;
> int old, ret;
> u32 sizes;
> - u16 cmd;
>
> /* Check if we must preserve the firmware's resource assignment */
> host = pci_find_host_bridge(dev->bus);
> @@ -424,9 +447,14 @@ int pci_resize_resource(struct pci_dev *dev, int resno, int size)
> if (!(res->flags & IORESOURCE_UNSET))
> return -EBUSY;
>
> - pci_read_config_word(dev, PCI_COMMAND, &cmd);
> - if (cmd & PCI_COMMAND_MEMORY)
> - return -EBUSY;
> +#ifdef CONFIG_PCI_IOV
> + if (resno >= PCI_IOV_RESOURCES)
> + ret = pci_vf_memory_decoding(dev);
> + else
> +#endif
> + ret = pci_memory_decoding(dev);
> + if (ret)
> + return ret;
The above else works, however, it does trip our static analysis tools, and
lack of the indentation makes it slightly confusing to read, at least at
the first glance. Thus, I wonder whether it would be possible to combine
the pci_vf_memory_decoding() and pci_memory_decoding() somehow neatly into
a private helper that takes a boolean to indicate whether it's a VF or not.
Then, we could drop the if-statement, since some of the logic could live
within the helper.
What do you think?
Krzysztof
Am 15.12.21 um 15:16 schrieb Michał Winiarski:
> When pci_reassign_bridge_resources returns -ENOENT, it means that no
> resources needed to be "moved". This can happen when the resource was
> resized to be smaller, and it's completely fine - there's no need to treat
> this as an error and go back to the original BAR size.
Well that doesn't make much sense as far as I can see.
Drivers mandatory need to call pci_release_resource() on all resources
which might need to move for a resize, including the one which is about
to be resized.
When you get -ENOENT from pci_reassign_bridge_resources() it just means
that the function was not able to do it's work because the driver failed
to release it's resources before the resize.
Technically we could indeed skip this step if the new size is smaller
than the old size, but then the question is why would somebody resize in
the first place? The freed up address space is not usable if you don't
do this.
Regards,
Christian.
>
> Signed-off-by: Michał Winiarski <[email protected]>
> ---
> drivers/pci/setup-res.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c
> index 1946e52e7678a..5de5129055e0a 100644
> --- a/drivers/pci/setup-res.c
> +++ b/drivers/pci/setup-res.c
> @@ -484,7 +484,7 @@ int pci_resize_resource(struct pci_dev *dev, int resno, int size)
> /* Check if the new config works by trying to assign everything. */
> if (dev->bus->self) {
> ret = pci_reassign_bridge_resources(dev->bus->self, res->flags);
> - if (ret)
> + if (ret && ret != -ENOENT)
> goto error_resize;
> }
> return 0;
Am 15.12.21 um 15:16 schrieb Michał Winiarski:
> Similar to regular resizable BAR, VF BAR can also be resized.
> The structures are very similar, which means we can reuse most of the
> implementation. See PCIe r4.0, sec 9.3.7.4.
>
> Signed-off-by: Michał Winiarski <[email protected]>
> ---
> drivers/pci/pci.c | 25 +++++++++++++++--
> drivers/pci/setup-res.c | 53 ++++++++++++++++++++++++++++++++---
> include/uapi/linux/pci_regs.h | 1 +
> 3 files changed, 72 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> index 3d2fb394986a4..89448c5104e46 100644
> --- a/drivers/pci/pci.c
> +++ b/drivers/pci/pci.c
> @@ -1682,12 +1682,15 @@ static void pci_restore_config_space(struct pci_dev *pdev)
> }
> }
>
> -static void pci_restore_rebar_state(struct pci_dev *pdev)
> +static void __pci_restore_rebar_state(struct pci_dev *pdev, int cap)
> {
> unsigned int pos, nbars, i;
> u32 ctrl;
>
> - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_REBAR);
> + if (WARN_ON(cap != PCI_EXT_CAP_ID_REBAR && cap != PCI_EXT_CAP_ID_VF_REBAR))
> + return;
> +
> + pos = pci_find_ext_capability(pdev, cap);
> if (!pos)
> return;
>
> @@ -1709,6 +1712,14 @@ static void pci_restore_rebar_state(struct pci_dev *pdev)
> }
> }
>
> +static void pci_restore_rebar_state(struct pci_dev *pdev)
> +{
> + __pci_restore_rebar_state(pdev, PCI_EXT_CAP_ID_REBAR);
> +#ifdef CONFIG_PCI_IOV
> + __pci_restore_rebar_state(pdev, PCI_EXT_CAP_ID_VF_REBAR);
> +#endif
> +}
> +
It's probably cleaner to let the caller specify the capability to
restore directly.
> /**
> * pci_restore_state - Restore the saved state of a PCI device
> * @dev: PCI device that we're dealing with
> @@ -3639,10 +3650,18 @@ void pci_acs_init(struct pci_dev *dev)
> */
> static int pci_rebar_find_pos(struct pci_dev *pdev, int bar)
> {
> + int cap = PCI_EXT_CAP_ID_REBAR;
> unsigned int pos, nbars, i;
> u32 ctrl;
>
> - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_REBAR);
> +#ifdef CONFIG_PCI_IOV
> + if (bar >= PCI_IOV_RESOURCES) {
> + cap = PCI_EXT_CAP_ID_VF_REBAR;
> + bar -= PCI_IOV_RESOURCES;
> + }
> +#endif
> +
> + pos = pci_find_ext_capability(pdev, cap);
> if (!pos)
> return -ENOTSUPP;
>
> diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c
> index 7f1acb3918d0c..1946e52e7678a 100644
> --- a/drivers/pci/setup-res.c
> +++ b/drivers/pci/setup-res.c
> @@ -407,13 +407,36 @@ void pci_release_resource(struct pci_dev *dev, int resno)
> }
> EXPORT_SYMBOL(pci_release_resource);
>
> +static int pci_memory_decoding(struct pci_dev *dev)
> +{
> + u16 cmd;
> +
> + pci_read_config_word(dev, PCI_COMMAND, &cmd);
> + if (cmd & PCI_COMMAND_MEMORY)
> + return -EBUSY;
> +
> + return 0;
> +}
> +
> +#ifdef CONFIG_PCI_IOV
> +static int pci_vf_memory_decoding(struct pci_dev *dev)
> +{
> + u16 cmd;
> +
> + pci_read_config_word(dev, dev->sriov->pos + PCI_SRIOV_CTRL, &cmd);
> + if (cmd & PCI_SRIOV_CTRL_MSE)
> + return -EBUSY;
> +
> + return 0;
> +}
> +#endif
> +
> int pci_resize_resource(struct pci_dev *dev, int resno, int size)
> {
> struct resource *res = dev->resource + resno;
> struct pci_host_bridge *host;
> int old, ret;
> u32 sizes;
> - u16 cmd;
>
> /* Check if we must preserve the firmware's resource assignment */
> host = pci_find_host_bridge(dev->bus);
> @@ -424,9 +447,14 @@ int pci_resize_resource(struct pci_dev *dev, int resno, int size)
> if (!(res->flags & IORESOURCE_UNSET))
> return -EBUSY;
>
> - pci_read_config_word(dev, PCI_COMMAND, &cmd);
> - if (cmd & PCI_COMMAND_MEMORY)
> - return -EBUSY;
> +#ifdef CONFIG_PCI_IOV
> + if (resno >= PCI_IOV_RESOURCES)
> + ret = pci_vf_memory_decoding(dev);
> + else
> +#endif
> + ret = pci_memory_decoding(dev);
> + if (ret)
> + return ret;
Way to many #ifdef spread around inside the code, please restructure that.
For example concentrating the logic in a single function should help:
static int pci_check_decoding_disabled(..., inr resno)
{
#ifdef CONFIG_PCI_IOV
if (resno...
return -EBUSY;
else
return 0;
#endif
if (...)
return -EBUSY;
return 0;
}
>
> sizes = pci_rebar_get_possible_sizes(dev, resno);
> if (!sizes)
> @@ -445,6 +473,14 @@ int pci_resize_resource(struct pci_dev *dev, int resno, int size)
>
> res->end = res->start + pci_rebar_size_to_bytes(size) - 1;
>
> +#ifdef CONFIG_PCI_IOV
> + if (resno >= PCI_IOV_RESOURCES) {
> + dev->sriov->barsz[resno - PCI_IOV_RESOURCES] = pci_rebar_size_to_bytes(size);
> + res->end = res->start +
> + resource_size(res) * pci_sriov_get_totalvfs(dev) - 1;
> + }
> +#endif
> +
> /* Check if the new config works by trying to assign everything. */
> if (dev->bus->self) {
> ret = pci_reassign_bridge_resources(dev->bus->self, res->flags);
> @@ -456,6 +492,15 @@ int pci_resize_resource(struct pci_dev *dev, int resno, int size)
> error_resize:
> pci_rebar_set_size(dev, resno, old);
> res->end = res->start + pci_rebar_size_to_bytes(old) - 1;
> +
> +#ifdef CONFIG_PCI_IOV
> + if (resno >= PCI_IOV_RESOURCES) {
> + dev->sriov->barsz[resno - PCI_IOV_RESOURCES] = pci_rebar_size_to_bytes(old);
> + res->end = res->start +
> + pci_rebar_size_to_bytes(old) * pci_sriov_get_totalvfs(dev) - 1;
> + }
> +#endif
> +
Looks like this deserves it's own function.
Regards,
Christian.
> return ret;
> }
> EXPORT_SYMBOL(pci_resize_resource);
> diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
> index ff6ccbc6efe96..7f5726d23b038 100644
> --- a/include/uapi/linux/pci_regs.h
> +++ b/include/uapi/linux/pci_regs.h
> @@ -734,6 +734,7 @@
> #define PCI_EXT_CAP_ID_L1SS 0x1E /* L1 PM Substates */
> #define PCI_EXT_CAP_ID_PTM 0x1F /* Precision Time Measurement */
> #define PCI_EXT_CAP_ID_DVSEC 0x23 /* Designated Vendor-Specific */
> +#define PCI_EXT_CAP_ID_VF_REBAR 0x24 /* VF Resizable BAR */
> #define PCI_EXT_CAP_ID_DLF 0x25 /* Data Link Feature */
> #define PCI_EXT_CAP_ID_PL_16GT 0x26 /* Physical Layer 16.0 GT/s */
> #define PCI_EXT_CAP_ID_MAX PCI_EXT_CAP_ID_PL_16GT
On 16.12.2021 08:50, Christian König wrote:
>
>
> Am 15.12.21 um 15:16 schrieb Michał Winiarski:
>> Similar to regular resizable BAR, VF BAR can also be resized.
>> The structures are very similar, which means we can reuse most of the
>> implementation. See PCIe r4.0, sec 9.3.7.4.
>>
>> Signed-off-by: Michał Winiarski <[email protected]>
>> ---
>> drivers/pci/pci.c | 25 +++++++++++++++--
>> drivers/pci/setup-res.c | 53 ++++++++++++++++++++++++++++++++---
>> include/uapi/linux/pci_regs.h | 1 +
>> 3 files changed, 72 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
>> index 3d2fb394986a4..89448c5104e46 100644
>> --- a/drivers/pci/pci.c
>> +++ b/drivers/pci/pci.c
>> @@ -1682,12 +1682,15 @@ static void pci_restore_config_space(struct
>> pci_dev *pdev)
>> }
>> }
>> -static void pci_restore_rebar_state(struct pci_dev *pdev)
>> +static void __pci_restore_rebar_state(struct pci_dev *pdev, int cap)
>> {
>> unsigned int pos, nbars, i;
>> u32 ctrl;
>> - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_REBAR);
>> + if (WARN_ON(cap != PCI_EXT_CAP_ID_REBAR && cap !=
>> PCI_EXT_CAP_ID_VF_REBAR))
>> + return;
>> +
>> + pos = pci_find_ext_capability(pdev, cap);
>> if (!pos)
>> return;
>> @@ -1709,6 +1712,14 @@ static void pci_restore_rebar_state(struct
>> pci_dev *pdev)
>> }
>> }
>> +static void pci_restore_rebar_state(struct pci_dev *pdev)
>> +{
>> + __pci_restore_rebar_state(pdev, PCI_EXT_CAP_ID_REBAR);
>> +#ifdef CONFIG_PCI_IOV
>> + __pci_restore_rebar_state(pdev, PCI_EXT_CAP_ID_VF_REBAR);
>> +#endif
>> +}
>> +
>
> It's probably cleaner to let the caller specify the capability to
> restore directly.
Ok - I'll go with:
pci_restore_rebar_state(dev);
pci_restore_vf_rebar_state(dev);
in pci_restore_state() (where pci_restore_vf_rebar_state() will be
"no-op" if there's no CONFIG_PCI_IOV).
>
>> /**
>> * pci_restore_state - Restore the saved state of a PCI device
>> * @dev: PCI device that we're dealing with
>> @@ -3639,10 +3650,18 @@ void pci_acs_init(struct pci_dev *dev)
>> */
>> static int pci_rebar_find_pos(struct pci_dev *pdev, int bar)
>> {
>> + int cap = PCI_EXT_CAP_ID_REBAR;
>> unsigned int pos, nbars, i;
>> u32 ctrl;
>> - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_REBAR);
>> +#ifdef CONFIG_PCI_IOV
>> + if (bar >= PCI_IOV_RESOURCES) {
>> + cap = PCI_EXT_CAP_ID_VF_REBAR;
>> + bar -= PCI_IOV_RESOURCES;
>> + }
>> +#endif
>> +
>> + pos = pci_find_ext_capability(pdev, cap);
>> if (!pos)
>> return -ENOTSUPP;
>> diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c
>> index 7f1acb3918d0c..1946e52e7678a 100644
>> --- a/drivers/pci/setup-res.c
>> +++ b/drivers/pci/setup-res.c
>> @@ -407,13 +407,36 @@ void pci_release_resource(struct pci_dev *dev,
>> int resno)
>> }
>> EXPORT_SYMBOL(pci_release_resource);
>> +static int pci_memory_decoding(struct pci_dev *dev)
>> +{
>> + u16 cmd;
>> +
>> + pci_read_config_word(dev, PCI_COMMAND, &cmd);
>> + if (cmd & PCI_COMMAND_MEMORY)
>> + return -EBUSY;
>> +
>> + return 0;
>> +}
>> +
>> +#ifdef CONFIG_PCI_IOV
>> +static int pci_vf_memory_decoding(struct pci_dev *dev)
>> +{
>> + u16 cmd;
>> +
>> + pci_read_config_word(dev, dev->sriov->pos + PCI_SRIOV_CTRL, &cmd);
>> + if (cmd & PCI_SRIOV_CTRL_MSE)
>> + return -EBUSY;
>> +
>> + return 0;
>> +}
>> +#endif
>> +
>> int pci_resize_resource(struct pci_dev *dev, int resno, int size)
>> {
>> struct resource *res = dev->resource + resno;
>> struct pci_host_bridge *host;
>> int old, ret;
>> u32 sizes;
>> - u16 cmd;
>> /* Check if we must preserve the firmware's resource assignment */
>> host = pci_find_host_bridge(dev->bus);
>> @@ -424,9 +447,14 @@ int pci_resize_resource(struct pci_dev *dev, int
>> resno, int size)
>> if (!(res->flags & IORESOURCE_UNSET))
>> return -EBUSY;
>> - pci_read_config_word(dev, PCI_COMMAND, &cmd);
>> - if (cmd & PCI_COMMAND_MEMORY)
>> - return -EBUSY;
>> +#ifdef CONFIG_PCI_IOV
>> + if (resno >= PCI_IOV_RESOURCES)
>> + ret = pci_vf_memory_decoding(dev);
>> + else
>> +#endif
>> + ret = pci_memory_decoding(dev);
>> + if (ret)
>> + return ret;
>
> Way to many #ifdef spread around inside the code, please restructure that.
>
> For example concentrating the logic in a single function should help:
>
> static int pci_check_decoding_disabled(..., inr resno)
> {
>
> #ifdef CONFIG_PCI_IOV
> if (resno...
> return -EBUSY;
> else
> return 0;
> #endif
>
> if (...)
> return -EBUSY;
> return 0;
>
> }
>
Ok.
>> sizes = pci_rebar_get_possible_sizes(dev, resno);
>> if (!sizes)
>> @@ -445,6 +473,14 @@ int pci_resize_resource(struct pci_dev *dev, int
>> resno, int size)
>> res->end = res->start + pci_rebar_size_to_bytes(size) - 1;
>> +#ifdef CONFIG_PCI_IOV
>> + if (resno >= PCI_IOV_RESOURCES) {
>> + dev->sriov->barsz[resno - PCI_IOV_RESOURCES] =
>> pci_rebar_size_to_bytes(size);
>> + res->end = res->start +
>> + resource_size(res) * pci_sriov_get_totalvfs(dev) - 1;
>> + }
>> +#endif
>> +
>> /* Check if the new config works by trying to assign everything. */
>> if (dev->bus->self) {
>> ret = pci_reassign_bridge_resources(dev->bus->self,
>> res->flags);
>> @@ -456,6 +492,15 @@ int pci_resize_resource(struct pci_dev *dev, int
>> resno, int size)
>> error_resize:
>> pci_rebar_set_size(dev, resno, old);
>> res->end = res->start + pci_rebar_size_to_bytes(old) - 1;
>> +
>> +#ifdef CONFIG_PCI_IOV
>> + if (resno >= PCI_IOV_RESOURCES) {
>> + dev->sriov->barsz[resno - PCI_IOV_RESOURCES] =
>> pci_rebar_size_to_bytes(old);
>> + res->end = res->start +
>> + pci_rebar_size_to_bytes(old) *
>> pci_sriov_get_totalvfs(dev) - 1;
>> + }
>> +#endif
>> +
>
> Looks like this deserves it's own function.
I'll extract the whole "restore after failed resize" to separate
function (for both native and IOV).
Thanks!
-Michał
>
> Regards,
> Christian.
>
>> return ret;
>> }
>> EXPORT_SYMBOL(pci_resize_resource);
>> diff --git a/include/uapi/linux/pci_regs.h
>> b/include/uapi/linux/pci_regs.h
>> index ff6ccbc6efe96..7f5726d23b038 100644
>> --- a/include/uapi/linux/pci_regs.h
>> +++ b/include/uapi/linux/pci_regs.h
>> @@ -734,6 +734,7 @@
>> #define PCI_EXT_CAP_ID_L1SS 0x1E /* L1 PM Substates */
>> #define PCI_EXT_CAP_ID_PTM 0x1F /* Precision Time Measurement */
>> #define PCI_EXT_CAP_ID_DVSEC 0x23 /* Designated
>> Vendor-Specific */
>> +#define PCI_EXT_CAP_ID_VF_REBAR 0x24 /* VF Resizable BAR */
>> #define PCI_EXT_CAP_ID_DLF 0x25 /* Data Link Feature */
>> #define PCI_EXT_CAP_ID_PL_16GT 0x26 /* Physical Layer 16.0
>> GT/s */
>> #define PCI_EXT_CAP_ID_MAX PCI_EXT_CAP_ID_PL_16GT
>
On 16.12.2021 08:12, Christian König wrote:
> Am 15.12.21 um 15:16 schrieb Michał Winiarski:
>> When pci_reassign_bridge_resources returns -ENOENT, it means that no
>> resources needed to be "moved". This can happen when the resource was
>> resized to be smaller, and it's completely fine - there's no need to
>> treat
>> this as an error and go back to the original BAR size.
>
> Well that doesn't make much sense as far as I can see.
>
> Drivers mandatory need to call pci_release_resource() on all resources
> which might need to move for a resize, including the one which is about
> to be resized.
Since IOV BARs have their own memory-decoding enabled bit, which is
usually tied to the lifetime of virtual functions, the PF driver could
do IOV BAR resize during its lifetime (without releasing its own resources).
>
> When you get -ENOENT from pci_reassign_bridge_resources() it just means
> that the function was not able to do it's work because the driver failed
> to release it's resources before the resize.
>
> Technically we could indeed skip this step if the new size is smaller
> than the old size, but then the question is why would somebody resize in
> the first place? The freed up address space is not usable if you don't
> do this.
With regular BAR, the size of MMIO resource is equal to bar_size.
With IOV BAR, the size of MMIO resource is equal to iov_bar_size *
total_vfs.
It means that the driver could use the pci_resize_resource in two ways,
it could just call it like for the native BAR - overall MMIO resource is
going to grow, or it could limit its total_vfs (overall MMIO resource is
going to shrink, but from VF perspective, its individual BAR is going to
be larger).
To ilustrate:
Native:
1G 2G
+--+ +--+
| | | |
+--+ | |
| |
+--+
Resource grows from 1G to 2G. No surprises.
IOV 4 VFs:
1G 2G
+--+ +--+
| | | |
+--+ | |
| | | |
+--+ +--+
| | | |
+--+ | |
| | | |
+--+ +--+
| |
| |
| |
+--+
| |
| |
| |
+--+
Resource grows from 4G to 8G. But for larger number of VFs, and larger
BAR sizes, finding MMIO space to accomodate may end up being tricky on
some platforms.
IOV (limited to 2 VFs):
1G 2G
+--+ +--+
| | | |
+--+ | |
| | | |
+--+ +--+
| | | |
+--+ | |
| | | |
+--+ +--+
No changes in resource size, we started with 4G and we end up with 4G
after resize (but those 2 VFs can now use 2G BAR).
Does that make sense?
Thanks
-Michał
>
> Regards,
> Christian.
>
>>
>> Signed-off-by: Michał Winiarski <[email protected]>
>> ---
>> drivers/pci/setup-res.c | 2 +-
>> 1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c
>> index 1946e52e7678a..5de5129055e0a 100644
>> --- a/drivers/pci/setup-res.c
>> +++ b/drivers/pci/setup-res.c
>> @@ -484,7 +484,7 @@ int pci_resize_resource(struct pci_dev *dev, int
>> resno, int size)
>> /* Check if the new config works by trying to assign everything. */
>> if (dev->bus->self) {
>> ret = pci_reassign_bridge_resources(dev->bus->self,
>> res->flags);
>> - if (ret)
>> + if (ret && ret != -ENOENT)
>> goto error_resize;
>> }
>> return 0;
>
On 16.12.2021 01:21, Krzysztof Wilczyński wrote:
> Hi Michał,
>
> [...]
>> +static int pci_memory_decoding(struct pci_dev *dev)
>> +{
>> + u16 cmd;
>> +
>> + pci_read_config_word(dev, PCI_COMMAND, &cmd);
>> + if (cmd & PCI_COMMAND_MEMORY)
>> + return -EBUSY;
>> +
>> + return 0;
>> +}
>> +
>> +#ifdef CONFIG_PCI_IOV
>> +static int pci_vf_memory_decoding(struct pci_dev *dev)
>> +{
>> + u16 cmd;
>> +
>> + pci_read_config_word(dev, dev->sriov->pos + PCI_SRIOV_CTRL, &cmd);
>> + if (cmd & PCI_SRIOV_CTRL_MSE)
>> + return -EBUSY;
>> +
>> + return 0;
>> +}
>> +#endif
>> +
>> int pci_resize_resource(struct pci_dev *dev, int resno, int size)
>> {
>> struct resource *res = dev->resource + resno;
>> struct pci_host_bridge *host;
>> int old, ret;
>> u32 sizes;
>> - u16 cmd;
>>
>> /* Check if we must preserve the firmware's resource assignment */
>> host = pci_find_host_bridge(dev->bus);
>> @@ -424,9 +447,14 @@ int pci_resize_resource(struct pci_dev *dev, int resno, int size)
>> if (!(res->flags & IORESOURCE_UNSET))
>> return -EBUSY;
>>
>> - pci_read_config_word(dev, PCI_COMMAND, &cmd);
>> - if (cmd & PCI_COMMAND_MEMORY)
>> - return -EBUSY;
>> +#ifdef CONFIG_PCI_IOV
>> + if (resno >= PCI_IOV_RESOURCES)
>> + ret = pci_vf_memory_decoding(dev);
>> + else
>> +#endif
>> + ret = pci_memory_decoding(dev);
>> + if (ret)
>> + return ret;
>
> The above else works, however, it does trip our static analysis tools, and
> lack of the indentation makes it slightly confusing to read, at least at
> the first glance. Thus, I wonder whether it would be possible to combine
> the pci_vf_memory_decoding() and pci_memory_decoding() somehow neatly into
> a private helper that takes a boolean to indicate whether it's a VF or not.
> Then, we could drop the if-statement, since some of the logic could live
> within the helper.
>
> What do you think?
Sure - implementing a helper in a way that Christian suggested (the
helper still takes resno though) should also take care of that.
Thanks
-Michał
>
> Krzysztof
Am 17.12.21 um 12:23 schrieb Michał Winiarski:
> On 16.12.2021 08:12, Christian König wrote:
>> Am 15.12.21 um 15:16 schrieb Michał Winiarski:
>>> When pci_reassign_bridge_resources returns -ENOENT, it means that no
>>> resources needed to be "moved". This can happen when the resource was
>>> resized to be smaller, and it's completely fine - there's no need to
>>> treat
>>> this as an error and go back to the original BAR size.
>>
>> Well that doesn't make much sense as far as I can see.
>>
>> Drivers mandatory need to call pci_release_resource() on all
>> resources which might need to move for a resize, including the one
>> which is about to be resized.
>
> Since IOV BARs have their own memory-decoding enabled bit, which is
> usually tied to the lifetime of virtual functions, the PF driver could
> do IOV BAR resize during its lifetime (without releasing its own
> resources).
I know, but that is totally irrelevant. See below.
>> When you get -ENOENT from pci_reassign_bridge_resources() it just
>> means that the function was not able to do it's work because the
>> driver failed to release it's resources before the resize.
>>
>> Technically we could indeed skip this step if the new size is smaller
>> than the old size, but then the question is why would somebody resize
>> in the first place? The freed up address space is not usable if you
>> don't do this.
>
> With regular BAR, the size of MMIO resource is equal to bar_size.
> With IOV BAR, the size of MMIO resource is equal to iov_bar_size *
> total_vfs.
>
> It means that the driver could use the pci_resize_resource in two
> ways, it could just call it like for the native BAR - overall MMIO
> resource is going to grow, or it could limit its total_vfs (overall
> MMIO resource is going to shrink, but from VF perspective, its
> individual BAR is going to be larger).
> [SNIP]
> No changes in resource size, we started with 4G and we end up with 4G
> after resize (but those 2 VFs can now use 2G BAR).
>
> Does that make sense?
Well no, I already had a good understanding of what you are doing here.
But that still doesn't really fit what the function is supposed to be doing.
See even when you reduce the number of virtual functions and increase
the BAR of the remaining functions you previously *must* manually
release some of the BARs. And even if it's just the VF BAR.
So there is either something wrong in your driver code using this or we
have an implementation error in the handling of VF BARs in the resize
function (which is certainly possible).
Regards,
Christian.
>
> Thanks
> -Michał
>
>>
>> Regards,
>> Christian.
>>
>>>
>>> Signed-off-by: Michał Winiarski <[email protected]>
>>> ---
>>> drivers/pci/setup-res.c | 2 +-
>>> 1 file changed, 1 insertion(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c
>>> index 1946e52e7678a..5de5129055e0a 100644
>>> --- a/drivers/pci/setup-res.c
>>> +++ b/drivers/pci/setup-res.c
>>> @@ -484,7 +484,7 @@ int pci_resize_resource(struct pci_dev *dev, int
>>> resno, int size)
>>> /* Check if the new config works by trying to assign
>>> everything. */
>>> if (dev->bus->self) {
>>> ret = pci_reassign_bridge_resources(dev->bus->self,
>>> res->flags);
>>> - if (ret)
>>> + if (ret && ret != -ENOENT)
>>> goto error_resize;
>>> }
>>> return 0;
>>
>