2014-07-11 19:07:21

by Konrad Rzeszutek Wilk

[permalink] [raw]
Subject: [PATCH v4] PCI back fixes for 3.17.

Please see this set of patches which are fixes to Xen pciback
for 3.17. They are also located at:

git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen.git devel/pciback-3.17.v4

These patches do not include the PCI bus reset/slot code as we are still
discussing that.

Konrad Rzeszutek Wilk (5):
xen-pciback: Document the various parameters and attributes in SysFS
xen/pciback: Don't deadlock when unbinding.
xen/pciback: Include the domain id if removing the device whilst still in use
xen/pciback: Print out the domain owning the device.
xen/pciback: Remove tons of dereferences

Documentation/ABI/testing/sysfs-driver-pciback | 25 +++++++++++++++
drivers/xen/xen-pciback/passthrough.c | 9 ++++-
drivers/xen/xen-pciback/pci_stub.c | 41 +++++++++++++------------
drivers/xen/xen-pciback/pciback.h | 7 ++--
drivers/xen/xen-pciback/vpci.c | 9 ++++-
drivers/xen/xen-pciback/xenbus.c | 4 +-
6 files changed, 67 insertions(+), 28 deletions(-)


2014-07-11 19:07:24

by Konrad Rzeszutek Wilk

[permalink] [raw]
Subject: [PATCH v4 2/5] xen/pciback: Don't deadlock when unbinding.

From: Konrad Rzeszutek Wilk <[email protected]>

As commit 0a9fd0152929db372ff61b0d6c280fdd34ae8bdb
'xen/pciback: Document the entry points for 'pcistub_put_pci_dev''
explained there are four entry points in this function.
Two of them are when the user fiddles in the SysFS to
unbind a device which might be in use by a guest or not.

Both 'unbind' states will cause a deadlock as the the PCI lock has
already been taken, which then pci_device_reset tries to take.

We can simplify this by requiring that all callers of
pcistub_put_pci_dev MUST hold the device lock. And then
we can just call the lockless version of pci_device_reset.

To make it even simpler we will modify xen_pcibk_release_pci_dev
to quality whether it should take a lock or not - as it ends
up calling xen_pcibk_release_pci_dev and needs to hold the lock.

Signed-off-by: Konrad Rzeszutek Wilk <[email protected]>
---
[v2: Per David Vrabel's suggestion - use lockless version of reset]
---
drivers/xen/xen-pciback/passthrough.c | 9 +++++++--
drivers/xen/xen-pciback/pci_stub.c | 11 +++++------
drivers/xen/xen-pciback/pciback.h | 7 ++++---
drivers/xen/xen-pciback/vpci.c | 9 +++++++--
drivers/xen/xen-pciback/xenbus.c | 2 +-
5 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/drivers/xen/xen-pciback/passthrough.c b/drivers/xen/xen-pciback/passthrough.c
index 828dddc..d0c3fb4 100644
--- a/drivers/xen/xen-pciback/passthrough.c
+++ b/drivers/xen/xen-pciback/passthrough.c
@@ -69,7 +69,7 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
}

static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
- struct pci_dev *dev)
+ struct pci_dev *dev, bool lock)
{
struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
struct pci_dev_entry *dev_entry, *t;
@@ -87,8 +87,13 @@ static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,

mutex_unlock(&dev_data->lock);

- if (found_dev)
+ if (found_dev) {
+ if (lock)
+ device_lock(&found_dev->dev);
pcistub_put_pci_dev(found_dev);
+ if (lock)
+ device_unlock(&found_dev->dev);
+ }
}

static int __xen_pcibk_init_devices(struct xen_pcibk_device *pdev)
diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c
index d57a173..eebbb41 100644
--- a/drivers/xen/xen-pciback/pci_stub.c
+++ b/drivers/xen/xen-pciback/pci_stub.c
@@ -250,6 +250,8 @@ struct pci_dev *pcistub_get_pci_dev(struct xen_pcibk_device *pdev,
* - 'echo BDF > unbind' with a guest still using it. See pcistub_remove
*
* As such we have to be careful.
+ *
+ * To make this easier, the caller has to hold the device lock.
*/
void pcistub_put_pci_dev(struct pci_dev *dev)
{
@@ -276,11 +278,7 @@ void pcistub_put_pci_dev(struct pci_dev *dev)
/* Cleanup our device
* (so it's ready for the next domain)
*/
-
- /* This is OK - we are running from workqueue context
- * and want to inhibit the user from fiddling with 'reset'
- */
- pci_reset_function(dev);
+ __pci_reset_function_locked(dev);
pci_restore_state(dev);

/* This disables the device. */
@@ -567,7 +565,8 @@ static void pcistub_remove(struct pci_dev *dev)
/* N.B. This ends up calling pcistub_put_pci_dev which ends up
* doing the FLR. */
xen_pcibk_release_pci_dev(found_psdev->pdev,
- found_psdev->dev);
+ found_psdev->dev,
+ false /* caller holds the lock. */);
}

spin_lock_irqsave(&pcistub_devices_lock, flags);
diff --git a/drivers/xen/xen-pciback/pciback.h b/drivers/xen/xen-pciback/pciback.h
index f72af87..58e38d5 100644
--- a/drivers/xen/xen-pciback/pciback.h
+++ b/drivers/xen/xen-pciback/pciback.h
@@ -99,7 +99,8 @@ struct xen_pcibk_backend {
unsigned int *domain, unsigned int *bus,
unsigned int *devfn);
int (*publish)(struct xen_pcibk_device *pdev, publish_pci_root_cb cb);
- void (*release)(struct xen_pcibk_device *pdev, struct pci_dev *dev);
+ void (*release)(struct xen_pcibk_device *pdev, struct pci_dev *dev,
+ bool lock);
int (*add)(struct xen_pcibk_device *pdev, struct pci_dev *dev,
int devid, publish_pci_dev_cb publish_cb);
struct pci_dev *(*get)(struct xen_pcibk_device *pdev,
@@ -122,10 +123,10 @@ static inline int xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
}

static inline void xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
- struct pci_dev *dev)
+ struct pci_dev *dev, bool lock)
{
if (xen_pcibk_backend && xen_pcibk_backend->release)
- return xen_pcibk_backend->release(pdev, dev);
+ return xen_pcibk_backend->release(pdev, dev, lock);
}

static inline struct pci_dev *
diff --git a/drivers/xen/xen-pciback/vpci.c b/drivers/xen/xen-pciback/vpci.c
index 51afff9..2fdfcb9 100644
--- a/drivers/xen/xen-pciback/vpci.c
+++ b/drivers/xen/xen-pciback/vpci.c
@@ -145,7 +145,7 @@ out:
}

static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
- struct pci_dev *dev)
+ struct pci_dev *dev, bool lock)
{
int slot;
struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
@@ -169,8 +169,13 @@ static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
out:
mutex_unlock(&vpci_dev->lock);

- if (found_dev)
+ if (found_dev) {
+ if (lock)
+ device_lock(&found_dev->dev);
pcistub_put_pci_dev(found_dev);
+ if (lock)
+ device_unlock(&found_dev->dev);
+ }
}

static int __xen_pcibk_init_devices(struct xen_pcibk_device *pdev)
diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c
index 4a7e6e0..065aae8 100644
--- a/drivers/xen/xen-pciback/xenbus.c
+++ b/drivers/xen/xen-pciback/xenbus.c
@@ -290,7 +290,7 @@ static int xen_pcibk_remove_device(struct xen_pcibk_device *pdev,

/* N.B. This ends up calling pcistub_put_pci_dev which ends up
* doing the FLR. */
- xen_pcibk_release_pci_dev(pdev, dev);
+ xen_pcibk_release_pci_dev(pdev, dev, true /* use the lock */);

out:
return err;
--
1.7.7.6

2014-07-11 19:07:38

by Konrad Rzeszutek Wilk

[permalink] [raw]
Subject: [PATCH v4 5/5] xen/pciback: Remove tons of dereferences

From: Konrad Rzeszutek Wilk <[email protected]>

A little cleanup. No functional difference.

Signed-off-by: Konrad Rzeszutek Wilk <[email protected]>
---
drivers/xen/xen-pciback/pci_stub.c | 20 +++++++++++---------
1 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c
index efcb73b..cf0b3c1 100644
--- a/drivers/xen/xen-pciback/pci_stub.c
+++ b/drivers/xen/xen-pciback/pci_stub.c
@@ -630,10 +630,12 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev,
{
pci_ers_result_t res = result;
struct xen_pcie_aer_op *aer_op;
+ struct xen_pcibk_device *pdev = psdev->pdev;
+ struct xen_pci_sharedinfo *sh_info = pdev->sh_info;
int ret;

/*with PV AER drivers*/
- aer_op = &(psdev->pdev->sh_info->aer_op);
+ aer_op = &(sh_info->aer_op);
aer_op->cmd = aer_cmd ;
/*useful for error_detected callback*/
aer_op->err = state;
@@ -654,36 +656,36 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev,
* this flag to judge whether we need to check pci-front give aer
* service ack signal
*/
- set_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags);
+ set_bit(_PCIB_op_pending, (unsigned long *)&pdev->flags);

/*It is possible that a pcifront conf_read_write ops request invokes
* the callback which cause the spurious execution of wake_up.
* Yet it is harmless and better than a spinlock here
*/
set_bit(_XEN_PCIB_active,
- (unsigned long *)&psdev->pdev->sh_info->flags);
+ (unsigned long *)&sh_info->flags);
wmb();
- notify_remote_via_irq(psdev->pdev->evtchn_irq);
+ notify_remote_via_irq(pdev->evtchn_irq);

ret = wait_event_timeout(xen_pcibk_aer_wait_queue,
!(test_bit(_XEN_PCIB_active, (unsigned long *)
- &psdev->pdev->sh_info->flags)), 300*HZ);
+ &sh_info->flags)), 300*HZ);

if (!ret) {
if (test_bit(_XEN_PCIB_active,
- (unsigned long *)&psdev->pdev->sh_info->flags)) {
+ (unsigned long *)&sh_info->flags)) {
dev_err(&psdev->dev->dev,
"pcifront aer process not responding!\n");
clear_bit(_XEN_PCIB_active,
- (unsigned long *)&psdev->pdev->sh_info->flags);
+ (unsigned long *)&sh_info->flags);
aer_op->err = PCI_ERS_RESULT_NONE;
return res;
}
}
- clear_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags);
+ clear_bit(_PCIB_op_pending, (unsigned long *)&pdev->flags);

if (test_bit(_XEN_PCIF_active,
- (unsigned long *)&psdev->pdev->sh_info->flags)) {
+ (unsigned long *)&sh_info->flags)) {
dev_dbg(&psdev->dev->dev,
"schedule pci_conf service in " DRV_NAME "\n");
xen_pcibk_test_and_schedule_op(psdev->pdev);
--
1.7.7.6

2014-07-11 19:07:27

by Konrad Rzeszutek Wilk

[permalink] [raw]
Subject: [PATCH v4 4/5] xen/pciback: Print out the domain owning the device.

From: Konrad Rzeszutek Wilk <[email protected]>

We had been printing it only if the device was built with
debug enabled. But this information is useful in the field
to troubleshoot.

Reviewed-by: David Vrabel <[email protected]>
Signed-off-by: Konrad Rzeszutek Wilk <[email protected]>
---
drivers/xen/xen-pciback/xenbus.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c
index 065aae8..0f80760 100644
--- a/drivers/xen/xen-pciback/xenbus.c
+++ b/drivers/xen/xen-pciback/xenbus.c
@@ -246,7 +246,7 @@ static int xen_pcibk_export_device(struct xen_pcibk_device *pdev,
if (err)
goto out;

- dev_dbg(&dev->dev, "registering for %d\n", pdev->xdev->otherend_id);
+ dev_info(&dev->dev, "registering for %d\n", pdev->xdev->otherend_id);
if (xen_register_device_domain_owner(dev,
pdev->xdev->otherend_id) != 0) {
dev_err(&dev->dev, "Stealing ownership from dom%d.\n",
--
1.7.7.6

2014-07-11 19:10:21

by Konrad Rzeszutek Wilk

[permalink] [raw]
Subject: [PATCH v4 3/5] xen/pciback: Include the domain id if removing the device whilst still in use

From: Konrad Rzeszutek Wilk <[email protected]>

Cleanup the function a bit - also include the id of the
domain that is using the device.

Reviewed-by: David Vrabel <[email protected]>
Signed-off-by: Konrad Rzeszutek Wilk <[email protected]>
---
drivers/xen/xen-pciback/pci_stub.c | 10 ++++++----
1 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c
index eebbb41..efcb73b 100644
--- a/drivers/xen/xen-pciback/pci_stub.c
+++ b/drivers/xen/xen-pciback/pci_stub.c
@@ -552,12 +552,14 @@ static void pcistub_remove(struct pci_dev *dev)
spin_unlock_irqrestore(&pcistub_devices_lock, flags);

if (found_psdev) {
- dev_dbg(&dev->dev, "found device to remove - in use? %p\n",
- found_psdev->pdev);
+ dev_dbg(&dev->dev, "found device to remove %s\n",
+ found_psdev->pdev ? "- in-use" : "");

if (found_psdev->pdev) {
- pr_warn("****** removing device %s while still in-use! ******\n",
- pci_name(found_psdev->dev));
+ int domid = xen_find_device_domain_owner(dev);
+
+ pr_warn("****** removing device %s while still in-use by domain %d! ******\n",
+ pci_name(found_psdev->dev), domid);
pr_warn("****** driver domain may still access this device's i/o resources!\n");
pr_warn("****** shutdown driver domain before binding device\n");
pr_warn("****** to other drivers or domains\n");
--
1.7.7.6

2014-07-11 19:10:53

by Konrad Rzeszutek Wilk

[permalink] [raw]
Subject: [PATCH v4 1/5] xen-pciback: Document the various parameters and attributes in SysFS

From: Konrad Rzeszutek Wilk <[email protected]>

Which hadn't been done with the initial commit.

Signed-off-by: Konrad Rzeszutek Wilk <[email protected]>
---
v2: Dropped the parameters and one that is unlikeable.
---
Documentation/ABI/testing/sysfs-driver-pciback | 25 ++++++++++++++++++++++++
1 files changed, 25 insertions(+), 0 deletions(-)
create mode 100644 Documentation/ABI/testing/sysfs-driver-pciback

diff --git a/Documentation/ABI/testing/sysfs-driver-pciback b/Documentation/ABI/testing/sysfs-driver-pciback
new file mode 100644
index 0000000..e482240
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-pciback
@@ -0,0 +1,25 @@
+What: /sys/bus/pci/drivers/pciback/quirks
+Date: Oct 2011
+KernelVersion: 3.1
+Contact: [email protected]
+Description:
+ If the permissive attribute is set, then writing a string in
+ the format of DDDD:BB:DD.F-REG:SIZE:MASK will allow the guest
+ to write and read from the PCI device. That is Domain:Bus:
+ Device.Function-Register:Size:Mask (Domain is optional).
+ For example:
+ #echo 00:19.0-E0:2:FF > /sys/bus/pci/drivers/pciback/quirks
+ will allow the guest to read and write to the configuration
+ register 0x0E.
+
+What: /sys/bus/pci/drivers/pciback/irq_handlers
+Date: Oct 2011
+KernelVersion: 3.1
+Contact: [email protected]
+Description:
+ A list of all of the PCI devices owned by Xen PCI back and
+ whether Xen PCI backend will acknowledge the interrupts received
+ and the amount of interrupts received. Xen PCI back acknowledges
+ said interrupts only when they are level, shared with another
+ guest, and enabled by the guest.
+
--
1.7.7.6

2014-07-11 20:44:23

by Boris Ostrovsky

[permalink] [raw]
Subject: Re: [PATCH v4 1/5] xen-pciback: Document the various parameters and attributes in SysFS

On 07/11/2014 04:08 PM, [email protected] wrote:
> From: Konrad Rzeszutek Wilk <[email protected]>
>
> Which hadn't been done with the initial commit.
>
> Signed-off-by: Konrad Rzeszutek Wilk <[email protected]>
> ---
> v2: Dropped the parameters and one that is unlikeable.
> ---
> Documentation/ABI/testing/sysfs-driver-pciback | 25 ++++++++++++++++++++++++
> 1 files changed, 25 insertions(+), 0 deletions(-)
> create mode 100644 Documentation/ABI/testing/sysfs-driver-pciback
>
> diff --git a/Documentation/ABI/testing/sysfs-driver-pciback b/Documentation/ABI/testing/sysfs-driver-pciback
> new file mode 100644
> index 0000000..e482240
> --- /dev/null
> +++ b/Documentation/ABI/testing/sysfs-driver-pciback
> @@ -0,0 +1,25 @@
> +What: /sys/bus/pci/drivers/pciback/quirks
> +Date: Oct 2011
> +KernelVersion: 3.1
> +Contact: [email protected]
> +Description:
> + If the permissive attribute is set, then writing a string in
> + the format of DDDD:BB:DD.F-REG:SIZE:MASK will allow the guest
> + to write and read from the PCI device. That is Domain:Bus:
> + Device.Function-Register:Size:Mask (Domain is optional).
> + For example:
> + #echo 00:19.0-E0:2:FF > /sys/bus/pci/drivers/pciback/quirks
> + will allow the guest to read and write to the configuration
> + register 0x0E.
> +
> +What: /sys/bus/pci/drivers/pciback/irq_handlers
> +Date: Oct 2011
> +KernelVersion: 3.1
> +Contact: [email protected]
> +Description:
> + A list of all of the PCI devices owned by Xen PCI back and
> + whether Xen PCI backend will acknowledge the interrupts received
> + and the amount of interrupts received. Xen PCI back acknowledges
> + said interrupts only when they are level, shared with another
> + guest, and enabled by the guest.

There are 4 fields per device. This description misses isr_on.

-boris

2014-07-11 20:45:51

by Boris Ostrovsky

[permalink] [raw]
Subject: Re: [PATCH v4 2/5] xen/pciback: Don't deadlock when unbinding.

On 07/11/2014 04:08 PM, [email protected] wrote:
> From: Konrad Rzeszutek Wilk <[email protected]>
>
> As commit 0a9fd0152929db372ff61b0d6c280fdd34ae8bdb
> 'xen/pciback: Document the entry points for 'pcistub_put_pci_dev''
> explained there are four entry points in this function.
> Two of them are when the user fiddles in the SysFS to
> unbind a device which might be in use by a guest or not.
>
> Both 'unbind' states will cause a deadlock as the the PCI lock has
> already been taken, which then pci_device_reset tries to take.
>
> We can simplify this by requiring that all callers of
> pcistub_put_pci_dev MUST hold the device lock. And then
> we can just call the lockless version of pci_device_reset.
>
> To make it even simpler we will modify xen_pcibk_release_pci_dev
> to quality whether it should take a lock or not - as it ends
> up calling xen_pcibk_release_pci_dev and needs to hold the lock.
>
> Signed-off-by: Konrad Rzeszutek Wilk <[email protected]>
> ---
> [v2: Per David Vrabel's suggestion - use lockless version of reset]
> ---
> drivers/xen/xen-pciback/passthrough.c | 9 +++++++--
> drivers/xen/xen-pciback/pci_stub.c | 11 +++++------
> drivers/xen/xen-pciback/pciback.h | 7 ++++---
> drivers/xen/xen-pciback/vpci.c | 9 +++++++--
> drivers/xen/xen-pciback/xenbus.c | 2 +-
> 5 files changed, 24 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/xen/xen-pciback/passthrough.c b/drivers/xen/xen-pciback/passthrough.c
> index 828dddc..d0c3fb4 100644
> --- a/drivers/xen/xen-pciback/passthrough.c
> +++ b/drivers/xen/xen-pciback/passthrough.c
> @@ -69,7 +69,7 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
> }
>
> static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
> - struct pci_dev *dev)
> + struct pci_dev *dev, bool lock)
> {
> struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
> struct pci_dev_entry *dev_entry, *t;
> @@ -87,8 +87,13 @@ static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
>
> mutex_unlock(&dev_data->lock);
>
> - if (found_dev)
> + if (found_dev) {
> + if (lock)
> + device_lock(&found_dev->dev);
> pcistub_put_pci_dev(found_dev);
> + if (lock)
> + device_unlock(&found_dev->dev);
> + }
> }
>
> static int __xen_pcibk_init_devices(struct xen_pcibk_device *pdev)
> diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c
> index d57a173..eebbb41 100644
> --- a/drivers/xen/xen-pciback/pci_stub.c
> +++ b/drivers/xen/xen-pciback/pci_stub.c
> @@ -250,6 +250,8 @@ struct pci_dev *pcistub_get_pci_dev(struct xen_pcibk_device *pdev,
> * - 'echo BDF > unbind' with a guest still using it. See pcistub_remove
> *
> * As such we have to be careful.
> + *
> + * To make this easier, the caller has to hold the device lock.

Should we assert that the lock is being held?

-boris

> */
> void pcistub_put_pci_dev(struct pci_dev *dev)
> {
> @@ -276,11 +278,7 @@ void pcistub_put_pci_dev(struct pci_dev *dev)
> /* Cleanup our device
> * (so it's ready for the next domain)
> */
> -
> - /* This is OK - we are running from workqueue context
> - * and want to inhibit the user from fiddling with 'reset'
> - */
> - pci_reset_function(dev);
> + __pci_reset_function_locked(dev);
> pci_restore_state(dev);
>
> /* This disables the device. */
> @@ -567,7 +565,8 @@ static void pcistub_remove(struct pci_dev *dev)
> /* N.B. This ends up calling pcistub_put_pci_dev which ends up
> * doing the FLR. */
> xen_pcibk_release_pci_dev(found_psdev->pdev,
> - found_psdev->dev);
> + found_psdev->dev,
> + false /* caller holds the lock. */);
> }
>
> spin_lock_irqsave(&pcistub_devices_lock, flags);
> diff --git a/drivers/xen/xen-pciback/pciback.h b/drivers/xen/xen-pciback/pciback.h
> index f72af87..58e38d5 100644
> --- a/drivers/xen/xen-pciback/pciback.h
> +++ b/drivers/xen/xen-pciback/pciback.h
> @@ -99,7 +99,8 @@ struct xen_pcibk_backend {
> unsigned int *domain, unsigned int *bus,
> unsigned int *devfn);
> int (*publish)(struct xen_pcibk_device *pdev, publish_pci_root_cb cb);
> - void (*release)(struct xen_pcibk_device *pdev, struct pci_dev *dev);
> + void (*release)(struct xen_pcibk_device *pdev, struct pci_dev *dev,
> + bool lock);
> int (*add)(struct xen_pcibk_device *pdev, struct pci_dev *dev,
> int devid, publish_pci_dev_cb publish_cb);
> struct pci_dev *(*get)(struct xen_pcibk_device *pdev,
> @@ -122,10 +123,10 @@ static inline int xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
> }
>
> static inline void xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
> - struct pci_dev *dev)
> + struct pci_dev *dev, bool lock)
> {
> if (xen_pcibk_backend && xen_pcibk_backend->release)
> - return xen_pcibk_backend->release(pdev, dev);
> + return xen_pcibk_backend->release(pdev, dev, lock);
> }
>
> static inline struct pci_dev *
> diff --git a/drivers/xen/xen-pciback/vpci.c b/drivers/xen/xen-pciback/vpci.c
> index 51afff9..2fdfcb9 100644
> --- a/drivers/xen/xen-pciback/vpci.c
> +++ b/drivers/xen/xen-pciback/vpci.c
> @@ -145,7 +145,7 @@ out:
> }
>
> static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
> - struct pci_dev *dev)
> + struct pci_dev *dev, bool lock)
> {
> int slot;
> struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
> @@ -169,8 +169,13 @@ static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
> out:
> mutex_unlock(&vpci_dev->lock);
>
> - if (found_dev)
> + if (found_dev) {
> + if (lock)
> + device_lock(&found_dev->dev);
> pcistub_put_pci_dev(found_dev);
> + if (lock)
> + device_unlock(&found_dev->dev);
> + }
> }
>
> static int __xen_pcibk_init_devices(struct xen_pcibk_device *pdev)
> diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c
> index 4a7e6e0..065aae8 100644
> --- a/drivers/xen/xen-pciback/xenbus.c
> +++ b/drivers/xen/xen-pciback/xenbus.c
> @@ -290,7 +290,7 @@ static int xen_pcibk_remove_device(struct xen_pcibk_device *pdev,
>
> /* N.B. This ends up calling pcistub_put_pci_dev which ends up
> * doing the FLR. */
> - xen_pcibk_release_pci_dev(pdev, dev);
> + xen_pcibk_release_pci_dev(pdev, dev, true /* use the lock */);
>
> out:
> return err;

2014-07-11 20:52:08

by Boris Ostrovsky

[permalink] [raw]
Subject: Re: [PATCH v4 5/5] xen/pciback: Remove tons of dereferences

On 07/11/2014 04:08 PM, [email protected] wrote:
> From: Konrad Rzeszutek Wilk <[email protected]>
>
> A little cleanup. No functional difference.

Reviewed-by: Boris Ostrovsky <[email protected]>

> Signed-off-by: Konrad Rzeszutek Wilk <[email protected]>
> ---
> drivers/xen/xen-pciback/pci_stub.c | 20 +++++++++++---------
> 1 files changed, 11 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c
> index efcb73b..cf0b3c1 100644
> --- a/drivers/xen/xen-pciback/pci_stub.c
> +++ b/drivers/xen/xen-pciback/pci_stub.c
> @@ -630,10 +630,12 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev,
> {
> pci_ers_result_t res = result;
> struct xen_pcie_aer_op *aer_op;
> + struct xen_pcibk_device *pdev = psdev->pdev;
> + struct xen_pci_sharedinfo *sh_info = pdev->sh_info;
> int ret;
>
> /*with PV AER drivers*/
> - aer_op = &(psdev->pdev->sh_info->aer_op);
> + aer_op = &(sh_info->aer_op);
> aer_op->cmd = aer_cmd ;
> /*useful for error_detected callback*/
> aer_op->err = state;
> @@ -654,36 +656,36 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev,
> * this flag to judge whether we need to check pci-front give aer
> * service ack signal
> */
> - set_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags);
> + set_bit(_PCIB_op_pending, (unsigned long *)&pdev->flags);
>
> /*It is possible that a pcifront conf_read_write ops request invokes
> * the callback which cause the spurious execution of wake_up.
> * Yet it is harmless and better than a spinlock here
> */
> set_bit(_XEN_PCIB_active,
> - (unsigned long *)&psdev->pdev->sh_info->flags);
> + (unsigned long *)&sh_info->flags);
> wmb();
> - notify_remote_via_irq(psdev->pdev->evtchn_irq);
> + notify_remote_via_irq(pdev->evtchn_irq);
>
> ret = wait_event_timeout(xen_pcibk_aer_wait_queue,
> !(test_bit(_XEN_PCIB_active, (unsigned long *)
> - &psdev->pdev->sh_info->flags)), 300*HZ);
> + &sh_info->flags)), 300*HZ);
>
> if (!ret) {
> if (test_bit(_XEN_PCIB_active,
> - (unsigned long *)&psdev->pdev->sh_info->flags)) {
> + (unsigned long *)&sh_info->flags)) {
> dev_err(&psdev->dev->dev,
> "pcifront aer process not responding!\n");
> clear_bit(_XEN_PCIB_active,
> - (unsigned long *)&psdev->pdev->sh_info->flags);
> + (unsigned long *)&sh_info->flags);
> aer_op->err = PCI_ERS_RESULT_NONE;
> return res;
> }
> }
> - clear_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags);
> + clear_bit(_PCIB_op_pending, (unsigned long *)&pdev->flags);
>
> if (test_bit(_XEN_PCIF_active,
> - (unsigned long *)&psdev->pdev->sh_info->flags)) {
> + (unsigned long *)&sh_info->flags)) {
> dev_dbg(&psdev->dev->dev,
> "schedule pci_conf service in " DRV_NAME "\n");
> xen_pcibk_test_and_schedule_op(psdev->pdev);

2014-07-11 21:02:14

by Konrad Rzeszutek Wilk

[permalink] [raw]
Subject: Re: [PATCH v4 2/5] xen/pciback: Don't deadlock when unbinding.

> >--- a/drivers/xen/xen-pciback/pci_stub.c
> >+++ b/drivers/xen/xen-pciback/pci_stub.c
> >@@ -250,6 +250,8 @@ struct pci_dev *pcistub_get_pci_dev(struct xen_pcibk_device *pdev,
> > * - 'echo BDF > unbind' with a guest still using it. See pcistub_remove
> > *
> > * As such we have to be careful.
> >+ *
> >+ * To make this easier, the caller has to hold the device lock.
>
> Should we assert that the lock is being held?

Yes of course we should. Thank you!

2014-07-14 14:13:48

by Konrad Rzeszutek Wilk

[permalink] [raw]
Subject: Re: [PATCH v4 2/5] xen/pciback: Don't deadlock when unbinding.

On Fri, Jul 11, 2014 at 05:02:01PM -0400, Konrad Rzeszutek Wilk wrote:
> > >--- a/drivers/xen/xen-pciback/pci_stub.c
> > >+++ b/drivers/xen/xen-pciback/pci_stub.c
> > >@@ -250,6 +250,8 @@ struct pci_dev *pcistub_get_pci_dev(struct xen_pcibk_device *pdev,
> > > * - 'echo BDF > unbind' with a guest still using it. See pcistub_remove
> > > *
> > > * As such we have to be careful.
> > >+ *
> > >+ * To make this easier, the caller has to hold the device lock.
> >
> > Should we assert that the lock is being held?
>
> Yes of course we should. Thank you!

How about this:

>From 388a03c598218dac8bfeb6c5bf3992e0d1e37d1e Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <[email protected]>
Date: Tue, 8 Jul 2014 11:12:02 -0400
Subject: [PATCH] xen/pciback: Don't deadlock when unbinding.

As commit 0a9fd0152929db372ff61b0d6c280fdd34ae8bdb
'xen/pciback: Document the entry points for 'pcistub_put_pci_dev''
explained there are four entry points in this function.
Two of them are when the user fiddles in the SysFS to
unbind a device which might be in use by a guest or not.

Both 'unbind' states will cause a deadlock as the the PCI lock has
already been taken, which then pci_device_reset tries to take.

We can simplify this by requiring that all callers of
pcistub_put_pci_dev MUST hold the device lock. And then
we can just call the lockless version of pci_device_reset.

To make it even simpler we will modify xen_pcibk_release_pci_dev
to quality whether it should take a lock or not - as it ends
up calling xen_pcibk_release_pci_dev and needs to hold the lock.

Signed-off-by: Konrad Rzeszutek Wilk <[email protected]>
---
[v2: Per David Vrabel's suggestion - use lockless version of reset]
[v3: Per Boris suggestion add assertion mechanism]
---
drivers/xen/xen-pciback/passthrough.c | 9 +++++++--
drivers/xen/xen-pciback/pci_stub.c | 12 ++++++------
drivers/xen/xen-pciback/pciback.h | 7 ++++---
drivers/xen/xen-pciback/vpci.c | 9 +++++++--
drivers/xen/xen-pciback/xenbus.c | 2 +-
5 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/drivers/xen/xen-pciback/passthrough.c b/drivers/xen/xen-pciback/passthrough.c
index 828dddc..d0c3fb4 100644
--- a/drivers/xen/xen-pciback/passthrough.c
+++ b/drivers/xen/xen-pciback/passthrough.c
@@ -69,7 +69,7 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
}

static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
- struct pci_dev *dev)
+ struct pci_dev *dev, bool lock)
{
struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
struct pci_dev_entry *dev_entry, *t;
@@ -87,8 +87,13 @@ static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,

mutex_unlock(&dev_data->lock);

- if (found_dev)
+ if (found_dev) {
+ if (lock)
+ device_lock(&found_dev->dev);
pcistub_put_pci_dev(found_dev);
+ if (lock)
+ device_unlock(&found_dev->dev);
+ }
}

static int __xen_pcibk_init_devices(struct xen_pcibk_device *pdev)
diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c
index d57a173..8293fbb 100644
--- a/drivers/xen/xen-pciback/pci_stub.c
+++ b/drivers/xen/xen-pciback/pci_stub.c
@@ -250,6 +250,8 @@ struct pci_dev *pcistub_get_pci_dev(struct xen_pcibk_device *pdev,
* - 'echo BDF > unbind' with a guest still using it. See pcistub_remove
*
* As such we have to be careful.
+ *
+ * To make this easier, the caller has to hold the device lock.
*/
void pcistub_put_pci_dev(struct pci_dev *dev)
{
@@ -276,11 +278,8 @@ void pcistub_put_pci_dev(struct pci_dev *dev)
/* Cleanup our device
* (so it's ready for the next domain)
*/
-
- /* This is OK - we are running from workqueue context
- * and want to inhibit the user from fiddling with 'reset'
- */
- pci_reset_function(dev);
+ lockdep_assert_held(&dev->dev.mutex);
+ __pci_reset_function_locked(dev);
pci_restore_state(dev);

/* This disables the device. */
@@ -567,7 +566,8 @@ static void pcistub_remove(struct pci_dev *dev)
/* N.B. This ends up calling pcistub_put_pci_dev which ends up
* doing the FLR. */
xen_pcibk_release_pci_dev(found_psdev->pdev,
- found_psdev->dev);
+ found_psdev->dev,
+ false /* caller holds the lock. */);
}

spin_lock_irqsave(&pcistub_devices_lock, flags);
diff --git a/drivers/xen/xen-pciback/pciback.h b/drivers/xen/xen-pciback/pciback.h
index f72af87..58e38d5 100644
--- a/drivers/xen/xen-pciback/pciback.h
+++ b/drivers/xen/xen-pciback/pciback.h
@@ -99,7 +99,8 @@ struct xen_pcibk_backend {
unsigned int *domain, unsigned int *bus,
unsigned int *devfn);
int (*publish)(struct xen_pcibk_device *pdev, publish_pci_root_cb cb);
- void (*release)(struct xen_pcibk_device *pdev, struct pci_dev *dev);
+ void (*release)(struct xen_pcibk_device *pdev, struct pci_dev *dev,
+ bool lock);
int (*add)(struct xen_pcibk_device *pdev, struct pci_dev *dev,
int devid, publish_pci_dev_cb publish_cb);
struct pci_dev *(*get)(struct xen_pcibk_device *pdev,
@@ -122,10 +123,10 @@ static inline int xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
}

static inline void xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
- struct pci_dev *dev)
+ struct pci_dev *dev, bool lock)
{
if (xen_pcibk_backend && xen_pcibk_backend->release)
- return xen_pcibk_backend->release(pdev, dev);
+ return xen_pcibk_backend->release(pdev, dev, lock);
}

static inline struct pci_dev *
diff --git a/drivers/xen/xen-pciback/vpci.c b/drivers/xen/xen-pciback/vpci.c
index 51afff9..2fdfcb9 100644
--- a/drivers/xen/xen-pciback/vpci.c
+++ b/drivers/xen/xen-pciback/vpci.c
@@ -145,7 +145,7 @@ out:
}

static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
- struct pci_dev *dev)
+ struct pci_dev *dev, bool lock)
{
int slot;
struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
@@ -169,8 +169,13 @@ static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
out:
mutex_unlock(&vpci_dev->lock);

- if (found_dev)
+ if (found_dev) {
+ if (lock)
+ device_lock(&found_dev->dev);
pcistub_put_pci_dev(found_dev);
+ if (lock)
+ device_unlock(&found_dev->dev);
+ }
}

static int __xen_pcibk_init_devices(struct xen_pcibk_device *pdev)
diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c
index 4a7e6e0..b3318fd 100644
--- a/drivers/xen/xen-pciback/xenbus.c
+++ b/drivers/xen/xen-pciback/xenbus.c
@@ -290,7 +290,7 @@ static int xen_pcibk_remove_device(struct xen_pcibk_device *pdev,

/* N.B. This ends up calling pcistub_put_pci_dev which ends up
* doing the FLR. */
- xen_pcibk_release_pci_dev(pdev, dev);
+ xen_pcibk_release_pci_dev(pdev, dev, true /* use the lock. */);

out:
return err;
--
1.9.3

2014-07-14 14:28:13

by Boris Ostrovsky

[permalink] [raw]
Subject: Re: [PATCH v4 2/5] xen/pciback: Don't deadlock when unbinding.

On 07/14/2014 10:13 AM, Konrad Rzeszutek Wilk wrote:
> On Fri, Jul 11, 2014 at 05:02:01PM -0400, Konrad Rzeszutek Wilk wrote:
>>>> --- a/drivers/xen/xen-pciback/pci_stub.c
>>>> +++ b/drivers/xen/xen-pciback/pci_stub.c
>>>> @@ -250,6 +250,8 @@ struct pci_dev *pcistub_get_pci_dev(struct xen_pcibk_device *pdev,
>>>> * - 'echo BDF > unbind' with a guest still using it. See pcistub_remove
>>>> *
>>>> * As such we have to be careful.
>>>> + *
>>>> + * To make this easier, the caller has to hold the device lock.
>>> Should we assert that the lock is being held?
>> Yes of course we should. Thank you!
> How about this:
>
> From 388a03c598218dac8bfeb6c5bf3992e0d1e37d1e Mon Sep 17 00:00:00 2001
> From: Konrad Rzeszutek Wilk <[email protected]>
> Date: Tue, 8 Jul 2014 11:12:02 -0400
> Subject: [PATCH] xen/pciback: Don't deadlock when unbinding.
>
> As commit 0a9fd0152929db372ff61b0d6c280fdd34ae8bdb
> 'xen/pciback: Document the entry points for 'pcistub_put_pci_dev''
> explained there are four entry points in this function.
> Two of them are when the user fiddles in the SysFS to
> unbind a device which might be in use by a guest or not.
>
> Both 'unbind' states will cause a deadlock as the the PCI lock has
> already been taken, which then pci_device_reset tries to take.
>
> We can simplify this by requiring that all callers of
> pcistub_put_pci_dev MUST hold the device lock. And then
> we can just call the lockless version of pci_device_reset.
>
> To make it even simpler we will modify xen_pcibk_release_pci_dev
> to quality whether it should take a lock or not - as it ends
> up calling xen_pcibk_release_pci_dev and needs to hold the lock.
>
> Signed-off-by: Konrad Rzeszutek Wilk <[email protected]>
> ---
> [v2: Per David Vrabel's suggestion - use lockless version of reset]
> [v3: Per Boris suggestion add assertion mechanism]
> ---
> drivers/xen/xen-pciback/passthrough.c | 9 +++++++--
> drivers/xen/xen-pciback/pci_stub.c | 12 ++++++------
> drivers/xen/xen-pciback/pciback.h | 7 ++++---
> drivers/xen/xen-pciback/vpci.c | 9 +++++++--
> drivers/xen/xen-pciback/xenbus.c | 2 +-
> 5 files changed, 25 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/xen/xen-pciback/passthrough.c b/drivers/xen/xen-pciback/passthrough.c
> index 828dddc..d0c3fb4 100644
> --- a/drivers/xen/xen-pciback/passthrough.c
> +++ b/drivers/xen/xen-pciback/passthrough.c
> @@ -69,7 +69,7 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
> }
>
> static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
> - struct pci_dev *dev)
> + struct pci_dev *dev, bool lock)
> {
> struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
> struct pci_dev_entry *dev_entry, *t;
> @@ -87,8 +87,13 @@ static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
>
> mutex_unlock(&dev_data->lock);
>
> - if (found_dev)
> + if (found_dev) {
> + if (lock)
> + device_lock(&found_dev->dev);
> pcistub_put_pci_dev(found_dev);
> + if (lock)
> + device_unlock(&found_dev->dev);
> + }
> }
>
> static int __xen_pcibk_init_devices(struct xen_pcibk_device *pdev)
> diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c
> index d57a173..8293fbb 100644
> --- a/drivers/xen/xen-pciback/pci_stub.c
> +++ b/drivers/xen/xen-pciback/pci_stub.c
> @@ -250,6 +250,8 @@ struct pci_dev *pcistub_get_pci_dev(struct xen_pcibk_device *pdev,
> * - 'echo BDF > unbind' with a guest still using it. See pcistub_remove
> *
> * As such we have to be careful.
> + *
> + * To make this easier, the caller has to hold the device lock.
> */
> void pcistub_put_pci_dev(struct pci_dev *dev)
> {
> @@ -276,11 +278,8 @@ void pcistub_put_pci_dev(struct pci_dev *dev)
> /* Cleanup our device
> * (so it's ready for the next domain)
> */
> -
> - /* This is OK - we are running from workqueue context
> - * and want to inhibit the user from fiddling with 'reset'
> - */
> - pci_reset_function(dev);
> + lockdep_assert_held(&dev->dev.mutex);
> + __pci_reset_function_locked(dev);
> pci_restore_state(dev);

Reviewed-by: Boris Ostrovsky <[email protected]>

(Although I wonder about the fact that we are exposing the mutex which
is typically hidden by device_lock()/unlock() inlines. Have you
considered adding something like is_device_locked() to device.h?)

-boris

2014-07-14 15:43:06

by Konrad Rzeszutek Wilk

[permalink] [raw]
Subject: Re: [PATCH v4 2/5] xen/pciback: Don't deadlock when unbinding.

On Mon, Jul 14, 2014 at 10:30:13AM -0400, Boris Ostrovsky wrote:
> On 07/14/2014 10:13 AM, Konrad Rzeszutek Wilk wrote:
> >On Fri, Jul 11, 2014 at 05:02:01PM -0400, Konrad Rzeszutek Wilk wrote:
> >>>>--- a/drivers/xen/xen-pciback/pci_stub.c
> >>>>+++ b/drivers/xen/xen-pciback/pci_stub.c
> >>>>@@ -250,6 +250,8 @@ struct pci_dev *pcistub_get_pci_dev(struct xen_pcibk_device *pdev,
> >>>> * - 'echo BDF > unbind' with a guest still using it. See pcistub_remove
> >>>> *
> >>>> * As such we have to be careful.
> >>>>+ *
> >>>>+ * To make this easier, the caller has to hold the device lock.
> >>>Should we assert that the lock is being held?
> >>Yes of course we should. Thank you!
> >How about this:
> >
> > From 388a03c598218dac8bfeb6c5bf3992e0d1e37d1e Mon Sep 17 00:00:00 2001
> >From: Konrad Rzeszutek Wilk <[email protected]>
> >Date: Tue, 8 Jul 2014 11:12:02 -0400
> >Subject: [PATCH] xen/pciback: Don't deadlock when unbinding.
> >
> >As commit 0a9fd0152929db372ff61b0d6c280fdd34ae8bdb
> >'xen/pciback: Document the entry points for 'pcistub_put_pci_dev''
> >explained there are four entry points in this function.
> >Two of them are when the user fiddles in the SysFS to
> >unbind a device which might be in use by a guest or not.
> >
> >Both 'unbind' states will cause a deadlock as the the PCI lock has
> >already been taken, which then pci_device_reset tries to take.
> >
> >We can simplify this by requiring that all callers of
> >pcistub_put_pci_dev MUST hold the device lock. And then
> >we can just call the lockless version of pci_device_reset.
> >
> >To make it even simpler we will modify xen_pcibk_release_pci_dev
> >to quality whether it should take a lock or not - as it ends
> >up calling xen_pcibk_release_pci_dev and needs to hold the lock.
> >
> >Signed-off-by: Konrad Rzeszutek Wilk <[email protected]>
> >---
> >[v2: Per David Vrabel's suggestion - use lockless version of reset]
> >[v3: Per Boris suggestion add assertion mechanism]
> >---
> > drivers/xen/xen-pciback/passthrough.c | 9 +++++++--
> > drivers/xen/xen-pciback/pci_stub.c | 12 ++++++------
> > drivers/xen/xen-pciback/pciback.h | 7 ++++---
> > drivers/xen/xen-pciback/vpci.c | 9 +++++++--
> > drivers/xen/xen-pciback/xenbus.c | 2 +-
> > 5 files changed, 25 insertions(+), 14 deletions(-)
> >
> >diff --git a/drivers/xen/xen-pciback/passthrough.c b/drivers/xen/xen-pciback/passthrough.c
> >index 828dddc..d0c3fb4 100644
> >--- a/drivers/xen/xen-pciback/passthrough.c
> >+++ b/drivers/xen/xen-pciback/passthrough.c
> >@@ -69,7 +69,7 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
> > }
> > static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
> >- struct pci_dev *dev)
> >+ struct pci_dev *dev, bool lock)
> > {
> > struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
> > struct pci_dev_entry *dev_entry, *t;
> >@@ -87,8 +87,13 @@ static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
> > mutex_unlock(&dev_data->lock);
> >- if (found_dev)
> >+ if (found_dev) {
> >+ if (lock)
> >+ device_lock(&found_dev->dev);
> > pcistub_put_pci_dev(found_dev);
> >+ if (lock)
> >+ device_unlock(&found_dev->dev);
> >+ }
> > }
> > static int __xen_pcibk_init_devices(struct xen_pcibk_device *pdev)
> >diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c
> >index d57a173..8293fbb 100644
> >--- a/drivers/xen/xen-pciback/pci_stub.c
> >+++ b/drivers/xen/xen-pciback/pci_stub.c
> >@@ -250,6 +250,8 @@ struct pci_dev *pcistub_get_pci_dev(struct xen_pcibk_device *pdev,
> > * - 'echo BDF > unbind' with a guest still using it. See pcistub_remove
> > *
> > * As such we have to be careful.
> >+ *
> >+ * To make this easier, the caller has to hold the device lock.
> > */
> > void pcistub_put_pci_dev(struct pci_dev *dev)
> > {
> >@@ -276,11 +278,8 @@ void pcistub_put_pci_dev(struct pci_dev *dev)
> > /* Cleanup our device
> > * (so it's ready for the next domain)
> > */
> >-
> >- /* This is OK - we are running from workqueue context
> >- * and want to inhibit the user from fiddling with 'reset'
> >- */
> >- pci_reset_function(dev);
> >+ lockdep_assert_held(&dev->dev.mutex);
> >+ __pci_reset_function_locked(dev);
> > pci_restore_state(dev);
>
> Reviewed-by: Boris Ostrovsky <[email protected]>
>
> (Although I wonder about the fact that we are exposing the mutex which is
> typically hidden by device_lock()/unlock() inlines. Have you considered
> adding something like is_device_locked() to device.h?)

I did, but this is a bug-fix (which can be backported to stable) so I thought
it would not be nice - as that is more of an API change.

Instead I split it up and there is another patch that makes it
an 'device_lock_assert' function.

And thanks to your idea - I did find two instances where we did
call without a mutex held.

Reposting shortly (will retain your Reviewed-by - please scream if you
prefer that I drop it).

2014-07-14 16:29:11

by Konrad Rzeszutek Wilk

[permalink] [raw]
Subject: Re: [PATCH v4 1/5] xen-pciback: Document the various parameters and attributes in SysFS

On Fri, Jul 11, 2014 at 04:46:31PM -0400, Boris Ostrovsky wrote:
> On 07/11/2014 04:08 PM, [email protected] wrote:
> >From: Konrad Rzeszutek Wilk <[email protected]>
> >
> >Which hadn't been done with the initial commit.
> >
> >Signed-off-by: Konrad Rzeszutek Wilk <[email protected]>
> >---
> >v2: Dropped the parameters and one that is unlikeable.
> >---
> > Documentation/ABI/testing/sysfs-driver-pciback | 25 ++++++++++++++++++++++++
> > 1 files changed, 25 insertions(+), 0 deletions(-)
> > create mode 100644 Documentation/ABI/testing/sysfs-driver-pciback
> >
> >diff --git a/Documentation/ABI/testing/sysfs-driver-pciback b/Documentation/ABI/testing/sysfs-driver-pciback
> >new file mode 100644
> >index 0000000..e482240
> >--- /dev/null
> >+++ b/Documentation/ABI/testing/sysfs-driver-pciback
> >@@ -0,0 +1,25 @@
> >+What: /sys/bus/pci/drivers/pciback/quirks
> >+Date: Oct 2011
> >+KernelVersion: 3.1
> >+Contact: [email protected]
> >+Description:
> >+ If the permissive attribute is set, then writing a string in
> >+ the format of DDDD:BB:DD.F-REG:SIZE:MASK will allow the guest
> >+ to write and read from the PCI device. That is Domain:Bus:
> >+ Device.Function-Register:Size:Mask (Domain is optional).
> >+ For example:
> >+ #echo 00:19.0-E0:2:FF > /sys/bus/pci/drivers/pciback/quirks
> >+ will allow the guest to read and write to the configuration
> >+ register 0x0E.
> >+
> >+What: /sys/bus/pci/drivers/pciback/irq_handlers
> >+Date: Oct 2011
> >+KernelVersion: 3.1
> >+Contact: [email protected]
> >+Description:
> >+ A list of all of the PCI devices owned by Xen PCI back and
> >+ whether Xen PCI backend will acknowledge the interrupts received
> >+ and the amount of interrupts received. Xen PCI back acknowledges
> >+ said interrupts only when they are level, shared with another
> >+ guest, and enabled by the guest.
>
> There are 4 fields per device. This description misses isr_on.

Grrr. I missed this in my repost. Will fix it to say on the last line:
'guest, whether it will acknowledge interrupts, and enabled by the guest'.

>
> -boris
>

2014-07-14 16:38:09

by Sander Eikelenboom

[permalink] [raw]
Subject: Re: [Xen-devel] [PATCH v4] PCI back fixes for 3.17.



Friday, July 11, 2014, 10:08:47 PM, you wrote:

> Please see this set of patches which are fixes to Xen pciback
> for 3.17. They are also located at:

> git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen.git devel/pciback-3.17.v4

> These patches do not include the PCI bus reset/slot code as we are still
> discussing that.

> Konrad Rzeszutek Wilk (5):
> xen-pciback: Document the various parameters and attributes in SysFS
> xen/pciback: Don't deadlock when unbinding.
> xen/pciback: Include the domain id if removing the device whilst still in use
> xen/pciback: Print out the domain owning the device.
> xen/pciback: Remove tons of dereferences

> Documentation/ABI/testing/sysfs-driver-pciback | 25 +++++++++++++++
> drivers/xen/xen-pciback/passthrough.c | 9 ++++-
> drivers/xen/xen-pciback/pci_stub.c | 41 +++++++++++++------------
> drivers/xen/xen-pciback/pciback.h | 7 ++--
> drivers/xen/xen-pciback/vpci.c | 9 ++++-
> drivers/xen/xen-pciback/xenbus.c | 4 +-
> 6 files changed, 67 insertions(+), 28 deletions(-)

Hi Konrad / David,

Thanks for the fixes in this series, i just tested this series and noticed (as
somewhat expected :-) ) it's still lacking a fix for the bug that pciback
doesn't properly free / disown a device from a HVM guest.

This only happens when using "xl pci-detach domain BDF"
AND
when the guest has more than one pci device attached and you remove only ONE of
them.

The pci-detach doesn't show an error, the device is removed from the guest, but
it seems it is not internally freed in pciback.

Below is the sequence and outcome (with some added printk's) for two situations:
A) Guest has only one pci-device passed through which is then removed, as you can see
it's freed in pciback as well.

B) Guest has two pci-devices passed through from which one is then removed.
After that the second is removed as well.

>From what i recall Konrad thought it could be due to the guest being a HVM and thus the signaling is different from a
PV-Guest (no pci-front confirming the removal via xenbus). But since it does get freed when it is the last device attached to the guest
i don't know if that's completely true.
It's now semi-fixed when doing the 'xl pci-assignable-remove', that seems to force unregistring the device owner, however something still doesn't seem
right see the second "xl pci-detach" removing the last device from (A), it takes about a minute instead of not more than a second.

Also note that when on calling "xl pci-assignable-remove" for the last (or only) passed through device for a guest, you don't get the warning messages about
the device being 'in-use'

Completely below an diff of the added printk's.

--

Sander


Ad A)

root@dom0:~# xl pci-list router
Vdev Device
05.0 0000:02:00.0
06.0 0000:00:1b.0

root@dom0:~# xl pci-assignable-list

root@dom0:~# xl pci-detach router 00:1b.0

dmesg shows:
[ 434.839156] pciback 0000:00:1b.0: restoring config space at offset 0x10 (was 0x4, writing 0xf7d30004)
[ 434.841745] pciback 0000:00:1b.0: restoring config space at offset 0xc (was 0x0, writing 0x10)
[ 434.844205] pciback 0000:00:1b.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100006)

xl dmesg shows:
(XEN) [2014-07-14 16:02:27] memory_map:remove: dom1 gfn=f3070 mfn=f7d30 nr=4
(XEN) [2014-07-14 16:02:27] io.c:322: d1: unbind: m_gsi=22 g_gsi=40 dev=00:00.6 intx=0
(XEN) [2014-07-14 16:02:27] io.c:390: d1 unmap: m_irq=22 dev=00:00.6 intx=0
(XEN) [2014-07-14 16:02:27] [VT-D]iommu.c:1579: d1:PCIe: unmap 0000:00:1b.0
(XEN) [2014-07-14 16:02:27] [VT-D]iommu.c:1440: d0:PCIe: map 0000:00:1b.0

root@dom0:~# xl pci-assignable-list
0000:00:1b.0

root@dom0:~# xl pci-assignable-remove 00:1b.0
dmesg shows:
[ 609.246406] xen_pciback: ****** removing device 0000:00:1b.0 while still in-use by domain 1! ******
[ 609.248985] xen_pciback: ****** driver domain may still access this device's i/o resources!
[ 609.251344] xen_pciback: ****** shutdown driver domain before binding device
[ 609.253291] xen_pciback: ****** to other drivers or domains
[ 609.355083] xen: xen_unregister_device_domain_owner
[ 609.356448] xen: xen_unregister_device_domain_owner
[ 609.357789] xen: xen_unregister_device_domain_owner: ENODEV
[ 609.463125] pciback 0000:00:1b.0: restoring config space at offset 0x10 (was 0x4, writing 0xf7d30004)
[ 609.465692] pciback 0000:00:1b.0: restoring config space at offset 0xc (was 0x0, writing 0x10)
[ 609.468137] pciback 0000:00:1b.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100006)

root@dom0:~# xl pci-list router
Vdev Device
05.0 0000:02:00.0

root@dom0:~# xl pci-detach router 02:00.0
dmesg shows:
[ 930.571300] pciback 0000:02:00.0: restoring config space at offset 0x3c (was 0x100, writing 0x104)
[ 930.574140] pciback 0000:02:00.0: restoring config space at offset 0x10 (was 0x4, writing 0xf7c00004)
[ 930.576859] pciback 0000:02:00.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100006)
[ 930.614554] xen-pciback pci-1-0: xen_pcibk_xenbus_remove freeing pdev @ 0xffff880058fcb480
[ 930.615606] xen-pciback pci-1-0: xen_pcibk_disconnect pdev @ 0xffff880058fcb480
[ 930.719110] xen: xen_unregister_device_domain_owner

xl dmesg shows:
(XEN) [2014-07-14 16:10:43] [VT-D]iommu.c:1579: d1:PCIe: unmap 0000:02:00.0
(XEN) [2014-07-14 16:10:43] [VT-D]iommu.c:1440: d0:PCIe: map 0000:02:00.0
(XEN) [2014-07-14 16:11:46] memory_map:remove: dom1 gfn=f3074 mfn=f7c00 nr=2
(XEN) [2014-07-14 16:11:46] io.c:322: d1: unbind: m_gsi=18 g_gsi=36 dev=00:00.5 intx=0
(XEN) [2014-07-14 16:11:46] io.c:390: d1 unmap: m_irq=18 dev=00:00.5 intx=0

root@dom0:~# xl pci-assignable-list
0000:02:00.0

root@dom0:~# xl pci-assignable-remove 02:00.0
dmesg shows:
[ 1468.561071] xen: xen_unregister_device_domain_owner
[ 1468.562431] xen: xen_unregister_device_domain_owner: ENODEV
[ 1468.667271] pciback 0000:02:00.0: restoring config space at offset 0x3c (was 0x100, writing 0x104)
[ 1468.670119] pciback 0000:02:00.0: restoring config space at offset 0x10 (was 0x4, writing 0xf7c00004)
[ 1468.672857] pciback 0000:02:00.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100006)


Ad B)

root@dom0:~# xl pci-list router
Vdev Device
05.0 0000:00:1b.0

root@dom0:~# xl pci-assignable-list
0000:02:00.0

root@dom0:~# xl pci-detach router 00:1b.0
dmesg shows:
[ 199.742668] pciback 0000:00:1b.0: restoring config space at offset 0x10 (was 0x4, writing 0xf7d30004)
[ 199.743527] pciback 0000:00:1b.0: restoring config space at offset 0xc (was 0x0, writing 0x10)
[ 199.744321] pciback 0000:00:1b.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100006)
[ 199.757184] xen-pciback pci-1-0: xen_pcibk_xenbus_remove freeing pdev @ 0xffff8800589fce40
[ 199.758139] xen-pciback pci-1-0: xen_pcibk_disconnect pdev @ 0xffff8800589fce40
[ 199.862595] xen: xen_unregister_device_domain_owner

xl dmesg shows:
(XEN) [2014-07-14 16:28:29] memory_map:remove: dom1 gfn=f3070 mfn=f7d30 nr=4
(XEN) [2014-07-14 16:28:29] io.c:322: d1: unbind: m_gsi=22 g_gsi=36 dev=00:00.5 intx=0
(XEN) [2014-07-14 16:28:29] io.c:390: d1 unmap: m_irq=22 dev=00:00.5 intx=0
(XEN) [2014-07-14 16:28:29] [VT-D]iommu.c:1579: d1:PCIe: unmap 0000:00:1b.0
(XEN) [2014-07-14 16:28:29] [VT-D]iommu.c:1440: d0:PCIe: map 0000:00:1b.0

root@dom0:~# xl pci-list router
root@dom0:~# xl pci-assignable-list
0000:00:1b.0
0000:02:00.0

root@dom0:~# xl pci-assignable-remove 00:1b.0
dmesg shows:
[ 318.827415] xen: xen_unregister_device_domain_owner
[ 318.828771] xen: xen_unregister_device_domain_owner: ENODEV
[ 318.930869] pciback 0000:00:1b.0: restoring config space at offset 0x10 (was 0x4, writing 0xf7d30004)
[ 318.933435] pciback 0000:00:1b.0: restoring config space at offset 0xc (was 0x0, writing 0x10)
[ 318.935877] pciback 0000:00:1b.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100006)

root@dom0:~# xl pci-list router
root@dom0:~# xl pci-assignable-list
0000:02:00.0



diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 905956f..8145e93 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -587,17 +587,22 @@ EXPORT_SYMBOL_GPL(xen_register_device_domain_owner);
int xen_unregister_device_domain_owner(struct pci_dev *dev)
{
struct xen_device_domain_owner *owner;
+ printk(KERN_WARNING "xen: xen_unregister_device_domain_owner\n");
+

spin_lock(&dev_domain_list_spinlock);
owner = find_device(dev);
if (!owner) {
spin_unlock(&dev_domain_list_spinlock);
+ printk(KERN_WARNING "xen: xen_unregister_device_domain_owner: ENODEV \n");
+
return -ENODEV;
}
list_del(&owner->list);
spin_unlock(&dev_domain_list_spinlock);
kfree(owner);
return 0;
+
}
EXPORT_SYMBOL_GPL(xen_unregister_device_domain_owner);
#endif
diff --git a/drivers/xen/xen-pciback/pciback.h b/drivers/xen/xen-pciback/pciback.h
index 58e38d5..ef38f5b 100644
--- a/drivers/xen/xen-pciback/pciback.h
+++ b/drivers/xen/xen-pciback/pciback.h
@@ -17,6 +17,8 @@

#define DRV_NAME "xen-pciback"

+#define DEBUG
+
struct pci_dev_entry {
struct list_head list;
struct pci_dev *dev;
diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c
index 0f80760..127725e 100644
--- a/drivers/xen/xen-pciback/xenbus.c
+++ b/drivers/xen/xen-pciback/xenbus.c
@@ -16,6 +16,8 @@
#include <asm/xen/pci.h>
#include "pciback.h"

+#define DEBUG
+
#define INVALID_EVTCHN_IRQ (-1)
struct workqueue_struct *xen_pcibk_wq;

@@ -64,6 +66,8 @@ out:

static void xen_pcibk_disconnect(struct xen_pcibk_device *pdev)
{
+ dev_info(&pdev->xdev->dev, "xen_pcibk_disconnect pdev @ 0x%p\n", pdev);
+
mutex_lock(&pdev->dev_lock);
/* Ensure the guest can't trigger our handler before removing devices */
if (pdev->evtchn_irq != INVALID_EVTCHN_IRQ) {
@@ -273,19 +277,19 @@ static int xen_pcibk_remove_device(struct xen_pcibk_device *pdev,
int err = 0;
struct pci_dev *dev;

- dev_dbg(&pdev->xdev->dev, "removing dom %x bus %x slot %x func %x\n",
+ dev_info(&pdev->xdev->dev, "removing dom %x bus %x slot %x func %x\n",
domain, bus, slot, func);

dev = xen_pcibk_get_pci_dev(pdev, domain, bus, PCI_DEVFN(slot, func));
if (!dev) {
err = -EINVAL;
- dev_dbg(&pdev->xdev->dev, "Couldn't locate PCI device "
+ dev_info(&pdev->xdev->dev, "Couldn't locate PCI device "
"(%04x:%02x:%02x.%d)! not owned by this domain\n",
domain, bus, slot, func);
goto out;
}

- dev_dbg(&dev->dev, "unregistering for %d\n", pdev->xdev->otherend_id);
+ dev_info(&dev->dev, "unregistering for %d\n", pdev->xdev->otherend_id);
xen_unregister_device_domain_owner(dev);

/* N.B. This ends up calling pcistub_put_pci_dev which ends up
@@ -707,9 +711,13 @@ static int xen_pcibk_xenbus_remove(struct xenbus_device *dev)
{
struct xen_pcibk_device *pdev = dev_get_drvdata(&dev->dev);

- if (pdev != NULL)
+ if (pdev != NULL){
+ dev_info(&dev->dev, "xen_pcibk_xenbus_remove freeing pdev @ 0x%p\n", pdev);
free_pdev(pdev);
+ } else {
+ dev_info(&dev->dev, "xen_pcibk_xenbus_remove trying to free pdev==NULL\n");

+ }
return 0;
}



2014-07-14 17:22:45

by Konrad Rzeszutek Wilk

[permalink] [raw]
Subject: Re: [Xen-devel] [PATCH v4] PCI back fixes for 3.17.

On Mon, Jul 14, 2014 at 06:37:55PM +0200, Sander Eikelenboom wrote:
>
> Friday, July 11, 2014, 10:08:47 PM, you wrote:
>
> > Please see this set of patches which are fixes to Xen pciback
> > for 3.17. They are also located at:
>
> > git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen.git devel/pciback-3.17.v4
>
> > These patches do not include the PCI bus reset/slot code as we are still
> > discussing that.
>
> > Konrad Rzeszutek Wilk (5):
> > xen-pciback: Document the various parameters and attributes in SysFS
> > xen/pciback: Don't deadlock when unbinding.
> > xen/pciback: Include the domain id if removing the device whilst still in use
> > xen/pciback: Print out the domain owning the device.
> > xen/pciback: Remove tons of dereferences
>
> > Documentation/ABI/testing/sysfs-driver-pciback | 25 +++++++++++++++
> > drivers/xen/xen-pciback/passthrough.c | 9 ++++-
> > drivers/xen/xen-pciback/pci_stub.c | 41 +++++++++++++------------
> > drivers/xen/xen-pciback/pciback.h | 7 ++--
> > drivers/xen/xen-pciback/vpci.c | 9 ++++-
> > drivers/xen/xen-pciback/xenbus.c | 4 +-
> > 6 files changed, 67 insertions(+), 28 deletions(-)
>
> Hi Konrad / David,
>
> Thanks for the fixes in this series, i just tested this series and noticed (as
> somewhat expected :-) ) it's still lacking a fix for the bug that pciback
> doesn't properly free / disown a device from a HVM guest.
>
> This only happens when using "xl pci-detach domain BDF"
> AND
> when the guest has more than one pci device attached and you remove only ONE of
> them.
>
> The pci-detach doesn't show an error, the device is removed from the guest, but
> it seems it is not internally freed in pciback.
>
> Below is the sequence and outcome (with some added printk's) for two situations:
> A) Guest has only one pci-device passed through which is then removed, as you can see
> it's freed in pciback as well.
>
> B) Guest has two pci-devices passed through from which one is then removed.
> After that the second is removed as well.
>
> >From what i recall Konrad thought it could be due to the guest being a HVM and thus the signaling is different from a
> PV-Guest (no pci-front confirming the removal via xenbus). But since it does get freed when it is the last device attached to the guest
> i don't know if that's completely true.
> It's now semi-fixed when doing the 'xl pci-assignable-remove', that seems to force unregistring the device owner, however something still doesn't seem
> right see the second "xl pci-detach" removing the last device from (A), it takes about a minute instead of not more than a second.
>
> Also note that when on calling "xl pci-assignable-remove" for the last (or only) passed through device for a guest, you don't get the warning messages about
> the device being 'in-use'
>
> Completely below an diff of the added printk's.
>
> --
>
> Sander
>
>
> Ad A)
>
> root@dom0:~# xl pci-list router
> Vdev Device
> 05.0 0000:02:00.0
> 06.0 0000:00:1b.0
>
> root@dom0:~# xl pci-assignable-list
>
> root@dom0:~# xl pci-detach router 00:1b.0
>
> dmesg shows:
> [ 434.839156] pciback 0000:00:1b.0: restoring config space at offset 0x10 (was 0x4, writing 0xf7d30004)
> [ 434.841745] pciback 0000:00:1b.0: restoring config space at offset 0xc (was 0x0, writing 0x10)
> [ 434.844205] pciback 0000:00:1b.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100006)
>
> xl dmesg shows:
> (XEN) [2014-07-14 16:02:27] memory_map:remove: dom1 gfn=f3070 mfn=f7d30 nr=4
> (XEN) [2014-07-14 16:02:27] io.c:322: d1: unbind: m_gsi=22 g_gsi=40 dev=00:00.6 intx=0
> (XEN) [2014-07-14 16:02:27] io.c:390: d1 unmap: m_irq=22 dev=00:00.6 intx=0
> (XEN) [2014-07-14 16:02:27] [VT-D]iommu.c:1579: d1:PCIe: unmap 0000:00:1b.0
> (XEN) [2014-07-14 16:02:27] [VT-D]iommu.c:1440: d0:PCIe: map 0000:00:1b.0
>
> root@dom0:~# xl pci-assignable-list
> 0000:00:1b.0
>
> root@dom0:~# xl pci-assignable-remove 00:1b.0
> dmesg shows:
> [ 609.246406] xen_pciback: ****** removing device 0000:00:1b.0 while still in-use by domain 1! ******
> [ 609.248985] xen_pciback: ****** driver domain may still access this device's i/o resources!
> [ 609.251344] xen_pciback: ****** shutdown driver domain before binding device
> [ 609.253291] xen_pciback: ****** to other drivers or domains
> [ 609.355083] xen: xen_unregister_device_domain_owner
> [ 609.356448] xen: xen_unregister_device_domain_owner
> [ 609.357789] xen: xen_unregister_device_domain_owner: ENODEV
> [ 609.463125] pciback 0000:00:1b.0: restoring config space at offset 0x10 (was 0x4, writing 0xf7d30004)
> [ 609.465692] pciback 0000:00:1b.0: restoring config space at offset 0xc (was 0x0, writing 0x10)
> [ 609.468137] pciback 0000:00:1b.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100006)
>
> root@dom0:~# xl pci-list router
> Vdev Device
> 05.0 0000:02:00.0
>
> root@dom0:~# xl pci-detach router 02:00.0
> dmesg shows:
> [ 930.571300] pciback 0000:02:00.0: restoring config space at offset 0x3c (was 0x100, writing 0x104)
> [ 930.574140] pciback 0000:02:00.0: restoring config space at offset 0x10 (was 0x4, writing 0xf7c00004)
> [ 930.576859] pciback 0000:02:00.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100006)
> [ 930.614554] xen-pciback pci-1-0: xen_pcibk_xenbus_remove freeing pdev @ 0xffff880058fcb480
> [ 930.615606] xen-pciback pci-1-0: xen_pcibk_disconnect pdev @ 0xffff880058fcb480
> [ 930.719110] xen: xen_unregister_device_domain_owner
>
> xl dmesg shows:
> (XEN) [2014-07-14 16:10:43] [VT-D]iommu.c:1579: d1:PCIe: unmap 0000:02:00.0
> (XEN) [2014-07-14 16:10:43] [VT-D]iommu.c:1440: d0:PCIe: map 0000:02:00.0
> (XEN) [2014-07-14 16:11:46] memory_map:remove: dom1 gfn=f3074 mfn=f7c00 nr=2
> (XEN) [2014-07-14 16:11:46] io.c:322: d1: unbind: m_gsi=18 g_gsi=36 dev=00:00.5 intx=0
> (XEN) [2014-07-14 16:11:46] io.c:390: d1 unmap: m_irq=18 dev=00:00.5 intx=0
>
> root@dom0:~# xl pci-assignable-list
> 0000:02:00.0
>
> root@dom0:~# xl pci-assignable-remove 02:00.0
> dmesg shows:
> [ 1468.561071] xen: xen_unregister_device_domain_owner
> [ 1468.562431] xen: xen_unregister_device_domain_owner: ENODEV
> [ 1468.667271] pciback 0000:02:00.0: restoring config space at offset 0x3c (was 0x100, writing 0x104)
> [ 1468.670119] pciback 0000:02:00.0: restoring config space at offset 0x10 (was 0x4, writing 0xf7c00004)
> [ 1468.672857] pciback 0000:02:00.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100006)
>
>

At this point if you do:


#xl pci-assignable-list

I presume you don't see anything

> Ad B)
>
> root@dom0:~# xl pci-list router
> Vdev Device
> 05.0 0000:00:1b.0
>
> root@dom0:~# xl pci-assignable-list
> 0000:02:00.0
>
> root@dom0:~# xl pci-detach router 00:1b.0
> dmesg shows:
> [ 199.742668] pciback 0000:00:1b.0: restoring config space at offset 0x10 (was 0x4, writing 0xf7d30004)
> [ 199.743527] pciback 0000:00:1b.0: restoring config space at offset 0xc (was 0x0, writing 0x10)
> [ 199.744321] pciback 0000:00:1b.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100006)
> [ 199.757184] xen-pciback pci-1-0: xen_pcibk_xenbus_remove freeing pdev @ 0xffff8800589fce40
> [ 199.758139] xen-pciback pci-1-0: xen_pcibk_disconnect pdev @ 0xffff8800589fce40
> [ 199.862595] xen: xen_unregister_device_domain_owner
>
> xl dmesg shows:
> (XEN) [2014-07-14 16:28:29] memory_map:remove: dom1 gfn=f3070 mfn=f7d30 nr=4
> (XEN) [2014-07-14 16:28:29] io.c:322: d1: unbind: m_gsi=22 g_gsi=36 dev=00:00.5 intx=0
> (XEN) [2014-07-14 16:28:29] io.c:390: d1 unmap: m_irq=22 dev=00:00.5 intx=0
> (XEN) [2014-07-14 16:28:29] [VT-D]iommu.c:1579: d1:PCIe: unmap 0000:00:1b.0
> (XEN) [2014-07-14 16:28:29] [VT-D]iommu.c:1440: d0:PCIe: map 0000:00:1b.0
>
> root@dom0:~# xl pci-list router
> root@dom0:~# xl pci-assignable-list
> 0000:00:1b.0
> 0000:02:00.0
>
> root@dom0:~# xl pci-assignable-remove 00:1b.0
> dmesg shows:
> [ 318.827415] xen: xen_unregister_device_domain_owner
> [ 318.828771] xen: xen_unregister_device_domain_owner: ENODEV
> [ 318.930869] pciback 0000:00:1b.0: restoring config space at offset 0x10 (was 0x4, writing 0xf7d30004)
> [ 318.933435] pciback 0000:00:1b.0: restoring config space at offset 0xc (was 0x0, writing 0x10)
> [ 318.935877] pciback 0000:00:1b.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100006)
>
> root@dom0:~# xl pci-list router
> root@dom0:~# xl pci-assignable-list
> 0000:02:00.0
>
>

And if you do:

# xl pci-detach router 02:00.0

Do you see it being cleared from pciback? And what do you
see in /sys/bus/pci/drivers/pciback ?


Thanks!

2014-07-14 17:29:43

by Sander Eikelenboom

[permalink] [raw]
Subject: Re: [Xen-devel] [PATCH v4] PCI back fixes for 3.17.


Monday, July 14, 2014, 7:22:25 PM, you wrote:

> On Mon, Jul 14, 2014 at 06:37:55PM +0200, Sander Eikelenboom wrote:
>>
>> Friday, July 11, 2014, 10:08:47 PM, you wrote:
>>
>> > Please see this set of patches which are fixes to Xen pciback
>> > for 3.17. They are also located at:
>>
>> > git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen.git devel/pciback-3.17.v4
>>
>> > These patches do not include the PCI bus reset/slot code as we are still
>> > discussing that.
>>
>> > Konrad Rzeszutek Wilk (5):
>> > xen-pciback: Document the various parameters and attributes in SysFS
>> > xen/pciback: Don't deadlock when unbinding.
>> > xen/pciback: Include the domain id if removing the device whilst still in use
>> > xen/pciback: Print out the domain owning the device.
>> > xen/pciback: Remove tons of dereferences
>>
>> > Documentation/ABI/testing/sysfs-driver-pciback | 25 +++++++++++++++
>> > drivers/xen/xen-pciback/passthrough.c | 9 ++++-
>> > drivers/xen/xen-pciback/pci_stub.c | 41 +++++++++++++------------
>> > drivers/xen/xen-pciback/pciback.h | 7 ++--
>> > drivers/xen/xen-pciback/vpci.c | 9 ++++-
>> > drivers/xen/xen-pciback/xenbus.c | 4 +-
>> > 6 files changed, 67 insertions(+), 28 deletions(-)
>>
>> Hi Konrad / David,
>>
>> Thanks for the fixes in this series, i just tested this series and noticed (as
>> somewhat expected :-) ) it's still lacking a fix for the bug that pciback
>> doesn't properly free / disown a device from a HVM guest.
>>
>> This only happens when using "xl pci-detach domain BDF"
>> AND
>> when the guest has more than one pci device attached and you remove only ONE of
>> them.
>>
>> The pci-detach doesn't show an error, the device is removed from the guest, but
>> it seems it is not internally freed in pciback.
>>
>> Below is the sequence and outcome (with some added printk's) for two situations:
>> A) Guest has only one pci-device passed through which is then removed, as you can see
>> it's freed in pciback as well.
>>
>> B) Guest has two pci-devices passed through from which one is then removed.
>> After that the second is removed as well.
>>
>> >From what i recall Konrad thought it could be due to the guest being a HVM and thus the signaling is different from a
>> PV-Guest (no pci-front confirming the removal via xenbus). But since it does get freed when it is the last device attached to the guest
>> i don't know if that's completely true.
>> It's now semi-fixed when doing the 'xl pci-assignable-remove', that seems to force unregistring the device owner, however something still doesn't seem
>> right see the second "xl pci-detach" removing the last device from (A), it takes about a minute instead of not more than a second.
>>
>> Also note that when on calling "xl pci-assignable-remove" for the last (or only) passed through device for a guest, you don't get the warning messages about
>> the device being 'in-use'
>>
>> Completely below an diff of the added printk's.
>>
>> --
>>
>> Sander
>>
>>
>> Ad A)
>>
>> root@dom0:~# xl pci-list router
>> Vdev Device
>> 05.0 0000:02:00.0
>> 06.0 0000:00:1b.0
>>
>> root@dom0:~# xl pci-assignable-list
>>
>> root@dom0:~# xl pci-detach router 00:1b.0
>>
>> dmesg shows:
>> [ 434.839156] pciback 0000:00:1b.0: restoring config space at offset 0x10 (was 0x4, writing 0xf7d30004)
>> [ 434.841745] pciback 0000:00:1b.0: restoring config space at offset 0xc (was 0x0, writing 0x10)
>> [ 434.844205] pciback 0000:00:1b.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100006)
>>
>> xl dmesg shows:
>> (XEN) [2014-07-14 16:02:27] memory_map:remove: dom1 gfn=f3070 mfn=f7d30 nr=4
>> (XEN) [2014-07-14 16:02:27] io.c:322: d1: unbind: m_gsi=22 g_gsi=40 dev=00:00.6 intx=0
>> (XEN) [2014-07-14 16:02:27] io.c:390: d1 unmap: m_irq=22 dev=00:00.6 intx=0
>> (XEN) [2014-07-14 16:02:27] [VT-D]iommu.c:1579: d1:PCIe: unmap 0000:00:1b.0
>> (XEN) [2014-07-14 16:02:27] [VT-D]iommu.c:1440: d0:PCIe: map 0000:00:1b.0
>>
>> root@dom0:~# xl pci-assignable-list
>> 0000:00:1b.0
>>
>> root@dom0:~# xl pci-assignable-remove 00:1b.0
>> dmesg shows:
>> [ 609.246406] xen_pciback: ****** removing device 0000:00:1b.0 while still in-use by domain 1! ******
>> [ 609.248985] xen_pciback: ****** driver domain may still access this device's i/o resources!
>> [ 609.251344] xen_pciback: ****** shutdown driver domain before binding device
>> [ 609.253291] xen_pciback: ****** to other drivers or domains
>> [ 609.355083] xen: xen_unregister_device_domain_owner
>> [ 609.356448] xen: xen_unregister_device_domain_owner
>> [ 609.357789] xen: xen_unregister_device_domain_owner: ENODEV
>> [ 609.463125] pciback 0000:00:1b.0: restoring config space at offset 0x10 (was 0x4, writing 0xf7d30004)
>> [ 609.465692] pciback 0000:00:1b.0: restoring config space at offset 0xc (was 0x0, writing 0x10)
>> [ 609.468137] pciback 0000:00:1b.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100006)
>>
>> root@dom0:~# xl pci-list router
>> Vdev Device
>> 05.0 0000:02:00.0
>>
>> root@dom0:~# xl pci-detach router 02:00.0
>> dmesg shows:
>> [ 930.571300] pciback 0000:02:00.0: restoring config space at offset 0x3c (was 0x100, writing 0x104)
>> [ 930.574140] pciback 0000:02:00.0: restoring config space at offset 0x10 (was 0x4, writing 0xf7c00004)
>> [ 930.576859] pciback 0000:02:00.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100006)
>> [ 930.614554] xen-pciback pci-1-0: xen_pcibk_xenbus_remove freeing pdev @ 0xffff880058fcb480
>> [ 930.615606] xen-pciback pci-1-0: xen_pcibk_disconnect pdev @ 0xffff880058fcb480
>> [ 930.719110] xen: xen_unregister_device_domain_owner
>>
>> xl dmesg shows:
>> (XEN) [2014-07-14 16:10:43] [VT-D]iommu.c:1579: d1:PCIe: unmap 0000:02:00.0
>> (XEN) [2014-07-14 16:10:43] [VT-D]iommu.c:1440: d0:PCIe: map 0000:02:00.0
>> (XEN) [2014-07-14 16:11:46] memory_map:remove: dom1 gfn=f3074 mfn=f7c00 nr=2
>> (XEN) [2014-07-14 16:11:46] io.c:322: d1: unbind: m_gsi=18 g_gsi=36 dev=00:00.5 intx=0
>> (XEN) [2014-07-14 16:11:46] io.c:390: d1 unmap: m_irq=18 dev=00:00.5 intx=0
>>
>> root@dom0:~# xl pci-assignable-list
>> 0000:02:00.0
>>
>> root@dom0:~# xl pci-assignable-remove 02:00.0
>> dmesg shows:
>> [ 1468.561071] xen: xen_unregister_device_domain_owner
>> [ 1468.562431] xen: xen_unregister_device_domain_owner: ENODEV
>> [ 1468.667271] pciback 0000:02:00.0: restoring config space at offset 0x3c (was 0x100, writing 0x104)
>> [ 1468.670119] pciback 0000:02:00.0: restoring config space at offset 0x10 (was 0x4, writing 0xf7c00004)
>> [ 1468.672857] pciback 0000:02:00.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100006)
>>
>>

> At this point if you do:


> #xl pci-assignable-list

> I presume you don't see anything

(nods)

>> Ad B)
>>
>> root@dom0:~# xl pci-list router
>> Vdev Device
>> 05.0 0000:00:1b.0
>>
>> root@dom0:~# xl pci-assignable-list
>> 0000:02:00.0
>>
>> root@dom0:~# xl pci-detach router 00:1b.0
>> dmesg shows:
>> [ 199.742668] pciback 0000:00:1b.0: restoring config space at offset 0x10 (was 0x4, writing 0xf7d30004)
>> [ 199.743527] pciback 0000:00:1b.0: restoring config space at offset 0xc (was 0x0, writing 0x10)
>> [ 199.744321] pciback 0000:00:1b.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100006)
>> [ 199.757184] xen-pciback pci-1-0: xen_pcibk_xenbus_remove freeing pdev @ 0xffff8800589fce40
>> [ 199.758139] xen-pciback pci-1-0: xen_pcibk_disconnect pdev @ 0xffff8800589fce40
>> [ 199.862595] xen: xen_unregister_device_domain_owner
>>
>> xl dmesg shows:
>> (XEN) [2014-07-14 16:28:29] memory_map:remove: dom1 gfn=f3070 mfn=f7d30 nr=4
>> (XEN) [2014-07-14 16:28:29] io.c:322: d1: unbind: m_gsi=22 g_gsi=36 dev=00:00.5 intx=0
>> (XEN) [2014-07-14 16:28:29] io.c:390: d1 unmap: m_irq=22 dev=00:00.5 intx=0
>> (XEN) [2014-07-14 16:28:29] [VT-D]iommu.c:1579: d1:PCIe: unmap 0000:00:1b.0
>> (XEN) [2014-07-14 16:28:29] [VT-D]iommu.c:1440: d0:PCIe: map 0000:00:1b.0
>>
>> root@dom0:~# xl pci-list router
>> root@dom0:~# xl pci-assignable-list
>> 0000:00:1b.0
>> 0000:02:00.0
>>
>> root@dom0:~# xl pci-assignable-remove 00:1b.0
>> dmesg shows:
>> [ 318.827415] xen: xen_unregister_device_domain_owner
>> [ 318.828771] xen: xen_unregister_device_domain_owner: ENODEV
>> [ 318.930869] pciback 0000:00:1b.0: restoring config space at offset 0x10 (was 0x4, writing 0xf7d30004)
>> [ 318.933435] pciback 0000:00:1b.0: restoring config space at offset 0xc (was 0x0, writing 0x10)
>> [ 318.935877] pciback 0000:00:1b.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100006)
>>
>> root@dom0:~# xl pci-list router
>> root@dom0:~# xl pci-assignable-list
>> 0000:02:00.0
>>
>>

> And if you do:

> # xl pci-detach router 02:00.0

> Do you see it being cleared from pciback? And what do you
> see in /sys/bus/pci/drivers/pciback ?

Hmm good point .. i also had the plan to look into xenstore what was in there ..
but forgot .. will post both right away :-)

> Thanks!

2014-07-14 17:38:17

by Konrad Rzeszutek Wilk

[permalink] [raw]
Subject: Re: [Xen-devel] [PATCH v4] PCI back fixes for 3.17.

> >> Ad B)
> >>
> >> root@dom0:~# xl pci-list router
> >> Vdev Device
> >> 05.0 0000:00:1b.0
> >>
> >> root@dom0:~# xl pci-assignable-list
> >> 0000:02:00.0
> >>
> >> root@dom0:~# xl pci-detach router 00:1b.0
> >> dmesg shows:
> >> [ 199.742668] pciback 0000:00:1b.0: restoring config space at offset 0x10 (was 0x4, writing 0xf7d30004)
> >> [ 199.743527] pciback 0000:00:1b.0: restoring config space at offset 0xc (was 0x0, writing 0x10)
> >> [ 199.744321] pciback 0000:00:1b.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100006)
> >> [ 199.757184] xen-pciback pci-1-0: xen_pcibk_xenbus_remove freeing pdev @ 0xffff8800589fce40
> >> [ 199.758139] xen-pciback pci-1-0: xen_pcibk_disconnect pdev @ 0xffff8800589fce40
> >> [ 199.862595] xen: xen_unregister_device_domain_owner
> >>
> >> xl dmesg shows:
> >> (XEN) [2014-07-14 16:28:29] memory_map:remove: dom1 gfn=f3070 mfn=f7d30 nr=4
> >> (XEN) [2014-07-14 16:28:29] io.c:322: d1: unbind: m_gsi=22 g_gsi=36 dev=00:00.5 intx=0
> >> (XEN) [2014-07-14 16:28:29] io.c:390: d1 unmap: m_irq=22 dev=00:00.5 intx=0
> >> (XEN) [2014-07-14 16:28:29] [VT-D]iommu.c:1579: d1:PCIe: unmap 0000:00:1b.0
> >> (XEN) [2014-07-14 16:28:29] [VT-D]iommu.c:1440: d0:PCIe: map 0000:00:1b.0
> >>
> >> root@dom0:~# xl pci-list router
> >> root@dom0:~# xl pci-assignable-list
> >> 0000:00:1b.0
> >> 0000:02:00.0
> >>
> >> root@dom0:~# xl pci-assignable-remove 00:1b.0
> >> dmesg shows:
> >> [ 318.827415] xen: xen_unregister_device_domain_owner
> >> [ 318.828771] xen: xen_unregister_device_domain_owner: ENODEV
> >> [ 318.930869] pciback 0000:00:1b.0: restoring config space at offset 0x10 (was 0x4, writing 0xf7d30004)
> >> [ 318.933435] pciback 0000:00:1b.0: restoring config space at offset 0xc (was 0x0, writing 0x10)
> >> [ 318.935877] pciback 0000:00:1b.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100006)
> >>
> >> root@dom0:~# xl pci-list router
> >> root@dom0:~# xl pci-assignable-list
> >> 0000:02:00.0
> >>
> >>
>
> > And if you do:
>
> > # xl pci-detach router 02:00.0
>

Err, I meant
# xl pci-assignable-remove 02:00.0

> > Do you see it being cleared from pciback? And what do you
> > see in /sys/bus/pci/drivers/pciback ?
>
> Hmm good point .. i also had the plan to look into xenstore what was in there ..
> but forgot .. will post both right away :-)

Thank you!
>
> > Thanks!
>
>

2014-07-14 17:43:17

by Sander Eikelenboom

[permalink] [raw]
Subject: Re: [Xen-devel] [PATCH v4] PCI back fixes for 3.17.


Monday, July 14, 2014, 7:37:53 PM, you wrote:

>> >> Ad B)
>> >>
>> >> root@dom0:~# xl pci-list router
>> >> Vdev Device
>> >> 05.0 0000:00:1b.0
>> >>
>> >> root@dom0:~# xl pci-assignable-list
>> >> 0000:02:00.0
>> >>
>> >> root@dom0:~# xl pci-detach router 00:1b.0
>> >> dmesg shows:
>> >> [ 199.742668] pciback 0000:00:1b.0: restoring config space at offset 0x10 (was 0x4, writing 0xf7d30004)
>> >> [ 199.743527] pciback 0000:00:1b.0: restoring config space at offset 0xc (was 0x0, writing 0x10)
>> >> [ 199.744321] pciback 0000:00:1b.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100006)
>> >> [ 199.757184] xen-pciback pci-1-0: xen_pcibk_xenbus_remove freeing pdev @ 0xffff8800589fce40
>> >> [ 199.758139] xen-pciback pci-1-0: xen_pcibk_disconnect pdev @ 0xffff8800589fce40
>> >> [ 199.862595] xen: xen_unregister_device_domain_owner
>> >>
>> >> xl dmesg shows:
>> >> (XEN) [2014-07-14 16:28:29] memory_map:remove: dom1 gfn=f3070 mfn=f7d30 nr=4
>> >> (XEN) [2014-07-14 16:28:29] io.c:322: d1: unbind: m_gsi=22 g_gsi=36 dev=00:00.5 intx=0
>> >> (XEN) [2014-07-14 16:28:29] io.c:390: d1 unmap: m_irq=22 dev=00:00.5 intx=0
>> >> (XEN) [2014-07-14 16:28:29] [VT-D]iommu.c:1579: d1:PCIe: unmap 0000:00:1b.0
>> >> (XEN) [2014-07-14 16:28:29] [VT-D]iommu.c:1440: d0:PCIe: map 0000:00:1b.0
>> >>
>> >> root@dom0:~# xl pci-list router
>> >> root@dom0:~# xl pci-assignable-list
>> >> 0000:00:1b.0
>> >> 0000:02:00.0
>> >>
>> >> root@dom0:~# xl pci-assignable-remove 00:1b.0
>> >> dmesg shows:
>> >> [ 318.827415] xen: xen_unregister_device_domain_owner
>> >> [ 318.828771] xen: xen_unregister_device_domain_owner: ENODEV
>> >> [ 318.930869] pciback 0000:00:1b.0: restoring config space at offset 0x10 (was 0x4, writing 0xf7d30004)
>> >> [ 318.933435] pciback 0000:00:1b.0: restoring config space at offset 0xc (was 0x0, writing 0x10)
>> >> [ 318.935877] pciback 0000:00:1b.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100006)
>> >>
>> >> root@dom0:~# xl pci-list router
>> >> root@dom0:~# xl pci-assignable-list
>> >> 0000:02:00.0
>> >>
>> >>
>>
>> > And if you do:
>>
>> > # xl pci-detach router 02:00.0
>>

> Err, I meant
> # xl pci-assignable-remove 02:00.0

Ah ok .. so that is:
remove a device from pciback that has never been assigned to any guest ..

will also give that a go .. although that probably won't be a problem.

>> > Do you see it being cleared from pciback? And what do you
>> > see in /sys/bus/pci/drivers/pciback ?
>>
>> Hmm good point .. i also had the plan to look into xenstore what was in there ..
>> but forgot .. will post both right away :-)

> Thank you!
>>
>> > Thanks!
>>
>>

2014-07-14 17:45:55

by Konrad Rzeszutek Wilk

[permalink] [raw]
Subject: Re: [Xen-devel] [PATCH v4] PCI back fixes for 3.17.

On Mon, Jul 14, 2014 at 07:43:04PM +0200, Sander Eikelenboom wrote:
>
> Monday, July 14, 2014, 7:37:53 PM, you wrote:
>
> >> >> Ad B)
> >> >>
> >> >> root@dom0:~# xl pci-list router
> >> >> Vdev Device
> >> >> 05.0 0000:00:1b.0
> >> >>
> >> >> root@dom0:~# xl pci-assignable-list
> >> >> 0000:02:00.0
> >> >>
> >> >> root@dom0:~# xl pci-detach router 00:1b.0
> >> >> dmesg shows:
> >> >> [ 199.742668] pciback 0000:00:1b.0: restoring config space at offset 0x10 (was 0x4, writing 0xf7d30004)
> >> >> [ 199.743527] pciback 0000:00:1b.0: restoring config space at offset 0xc (was 0x0, writing 0x10)
> >> >> [ 199.744321] pciback 0000:00:1b.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100006)
> >> >> [ 199.757184] xen-pciback pci-1-0: xen_pcibk_xenbus_remove freeing pdev @ 0xffff8800589fce40
> >> >> [ 199.758139] xen-pciback pci-1-0: xen_pcibk_disconnect pdev @ 0xffff8800589fce40
> >> >> [ 199.862595] xen: xen_unregister_device_domain_owner
> >> >>
> >> >> xl dmesg shows:
> >> >> (XEN) [2014-07-14 16:28:29] memory_map:remove: dom1 gfn=f3070 mfn=f7d30 nr=4
> >> >> (XEN) [2014-07-14 16:28:29] io.c:322: d1: unbind: m_gsi=22 g_gsi=36 dev=00:00.5 intx=0
> >> >> (XEN) [2014-07-14 16:28:29] io.c:390: d1 unmap: m_irq=22 dev=00:00.5 intx=0
> >> >> (XEN) [2014-07-14 16:28:29] [VT-D]iommu.c:1579: d1:PCIe: unmap 0000:00:1b.0
> >> >> (XEN) [2014-07-14 16:28:29] [VT-D]iommu.c:1440: d0:PCIe: map 0000:00:1b.0
> >> >>
> >> >> root@dom0:~# xl pci-list router
> >> >> root@dom0:~# xl pci-assignable-list
> >> >> 0000:00:1b.0
> >> >> 0000:02:00.0
> >> >>
> >> >> root@dom0:~# xl pci-assignable-remove 00:1b.0
> >> >> dmesg shows:
> >> >> [ 318.827415] xen: xen_unregister_device_domain_owner
> >> >> [ 318.828771] xen: xen_unregister_device_domain_owner: ENODEV
> >> >> [ 318.930869] pciback 0000:00:1b.0: restoring config space at offset 0x10 (was 0x4, writing 0xf7d30004)
> >> >> [ 318.933435] pciback 0000:00:1b.0: restoring config space at offset 0xc (was 0x0, writing 0x10)
> >> >> [ 318.935877] pciback 0000:00:1b.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100006)
> >> >>
> >> >> root@dom0:~# xl pci-list router
> >> >> root@dom0:~# xl pci-assignable-list
> >> >> 0000:02:00.0
> >> >>
> >> >>
> >>
> >> > And if you do:
> >>
> >> > # xl pci-detach router 02:00.0
> >>
>
> > Err, I meant
> > # xl pci-assignable-remove 02:00.0
>
> Ah ok .. so that is:
> remove a device from pciback that has never been assigned to any guest ..
>
> will also give that a go .. although that probably won't be a problem.

Right. So I think it all works as expected? That is if you have
two PCI devices assigned to a guest and want to re-use them you
have to do the 'pci-detach' twice and then follow that with
'pci-assignable-remove' twice as well?

2014-07-14 18:24:45

by Sander Eikelenboom

[permalink] [raw]
Subject: Re: [Xen-devel] [PATCH v4] PCI back fixes for 3.17.


Monday, July 14, 2014, 7:45:25 PM, you wrote:

> On Mon, Jul 14, 2014 at 07:43:04PM +0200, Sander Eikelenboom wrote:
>>
>> Monday, July 14, 2014, 7:37:53 PM, you wrote:
>>
>> >> >> Ad B)
>> >> >>
>> >> >> root@dom0:~# xl pci-list router
>> >> >> Vdev Device
>> >> >> 05.0 0000:00:1b.0
>> >> >>
>> >> >> root@dom0:~# xl pci-assignable-list
>> >> >> 0000:02:00.0
>> >> >>
>> >> >> root@dom0:~# xl pci-detach router 00:1b.0
>> >> >> dmesg shows:
>> >> >> [ 199.742668] pciback 0000:00:1b.0: restoring config space at offset 0x10 (was 0x4, writing 0xf7d30004)
>> >> >> [ 199.743527] pciback 0000:00:1b.0: restoring config space at offset 0xc (was 0x0, writing 0x10)
>> >> >> [ 199.744321] pciback 0000:00:1b.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100006)
>> >> >> [ 199.757184] xen-pciback pci-1-0: xen_pcibk_xenbus_remove freeing pdev @ 0xffff8800589fce40
>> >> >> [ 199.758139] xen-pciback pci-1-0: xen_pcibk_disconnect pdev @ 0xffff8800589fce40
>> >> >> [ 199.862595] xen: xen_unregister_device_domain_owner
>> >> >>
>> >> >> xl dmesg shows:
>> >> >> (XEN) [2014-07-14 16:28:29] memory_map:remove: dom1 gfn=f3070 mfn=f7d30 nr=4
>> >> >> (XEN) [2014-07-14 16:28:29] io.c:322: d1: unbind: m_gsi=22 g_gsi=36 dev=00:00.5 intx=0
>> >> >> (XEN) [2014-07-14 16:28:29] io.c:390: d1 unmap: m_irq=22 dev=00:00.5 intx=0
>> >> >> (XEN) [2014-07-14 16:28:29] [VT-D]iommu.c:1579: d1:PCIe: unmap 0000:00:1b.0
>> >> >> (XEN) [2014-07-14 16:28:29] [VT-D]iommu.c:1440: d0:PCIe: map 0000:00:1b.0
>> >> >>
>> >> >> root@dom0:~# xl pci-list router
>> >> >> root@dom0:~# xl pci-assignable-list
>> >> >> 0000:00:1b.0
>> >> >> 0000:02:00.0
>> >> >>
>> >> >> root@dom0:~# xl pci-assignable-remove 00:1b.0
>> >> >> dmesg shows:
>> >> >> [ 318.827415] xen: xen_unregister_device_domain_owner
>> >> >> [ 318.828771] xen: xen_unregister_device_domain_owner: ENODEV
>> >> >> [ 318.930869] pciback 0000:00:1b.0: restoring config space at offset 0x10 (was 0x4, writing 0xf7d30004)
>> >> >> [ 318.933435] pciback 0000:00:1b.0: restoring config space at offset 0xc (was 0x0, writing 0x10)
>> >> >> [ 318.935877] pciback 0000:00:1b.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100006)
>> >> >>
>> >> >> root@dom0:~# xl pci-list router
>> >> >> root@dom0:~# xl pci-assignable-list
>> >> >> 0000:02:00.0
>> >> >>
>> >> >>
>> >>
>> >> > And if you do:
>> >>
>> >> > # xl pci-detach router 02:00.0
>> >>
>>
>> > Err, I meant
>> > # xl pci-assignable-remove 02:00.0
>>
>> Ah ok .. so that is:
>> remove a device from pciback that has never been assigned to any guest ..
>>
>> will also give that a go .. although that probably won't be a problem.

> Right. So I think it all works as expected? That is if you have
> two PCI devices assigned to a guest and want to re-use them you
> have to do the 'pci-detach' twice and then follow that with
> 'pci-assignable-remove' twice as well?

Well except that's not what i want :-) .. that's about the same as shutting down
the guest .. and indeed that works :-)

What i want to do is to remove just *one* device and leave the other one in the guest.

The one that i remove .. i would like to be able to remove it from being
assignable .. (rebind it in dom0) and/or assign it to another guest.

And that fails because pciback still thinks the guest owns the device ..
while libxl thinks it doesn't (outcome of xl pci-assignable-list).

When doing the:

# xl pci-assignable-list
0000:02:00.0
# xl pci-assignable-remove 02:00.0
dmesg shows:
[ 443.292951] xen: xen_unregister_device_domain_owner
[ 443.294308] xen: xen_unregister_device_domain_owner: ENODEV
[ 443.398827] pciback 0000:02:00.0: restoring config space at offset 0x3c (was 0x100, writing 0x104)
[ 443.400797] pciback 0000:02:00.0: restoring config space at offset 0x10 (was 0x4, writing 0xf7c00004)
[ 443.401713] pciback 0000:02:00.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100006)

which i think is what is expected ..


Hmm /sys/bus/pci/drivers/pciback doesn't shed much light, because in the state
just after a xl pci-detach, the device is still registred by pciback (from dom0's
view) which is correct, what is wrong is that pciback still seems to think that
it is in turn still owned by the guest. That doesn't get reflected in the info
available in the /sys dir.

Ok over to xen-store ...

first a diff between the booted guest with two devices passed through and the
state after the "xl pci-detach router 00:1b.0"

root@creanuc:~# diff -U10 xs-before-detach-first.txt xs-after-detach-first.txt
--- xs-before-detach-first.txt 2014-07-14 19:56:23.233576000 +0200
+++ xs-after-detach-first.txt 2014-07-14 20:00:00.053576000 +0200
@@ -185,35 +185,29 @@
feature-rx-flip = "0"
feature-split-event-channels = "1"
multi-queue-max-queues = "4"
hotplug-status = "connected"
pci = ""
1 = ""
0 = ""
frontend = "/local/domain/1/device/pci/0"
frontend-id = "1"
online = "1"
- state = "3"
+ state = "7"
domain = "router"
key-0 = "0000:02:00.0"
dev-0 = "0000:02:00.0"
vdevfn-0 = "28"
opts-0 = "msitranslate=0,power_mgmt=0,permissive=0"
state-0 = "3"
- key-1 = "0000:00:1b.0"
- dev-1 = "0000:00:1b.0"
- vdevfn-1 = "30"
- opts-1 = "msitranslate=0,power_mgmt=0,permissive=0"
- state-1 = "3"
- num_devs = "2"
+ num_devs = "1"
vdev-0 = "0000:00:00.00"
- vdev-1 = "0000:00:01.00"
root-0 = "0000:00"
root_num = "1"
1 = ""
vm = "/vm/a102d97e-8388-414e-97e5-cc394656c47d"
name = "router"
cpu = ""
0 = ""
availability = "online"
1 = ""
availability = "online"

It seems ok, perhaps apart from the general pci state going from 3 to 7 ?
Haven't looked up the exact states, but since only one domain is removes i would
still expect it to be 3 ?


Now a diff before and after the "xl pci-assignable-remove 00:1b.0"
diff xs-after-detach-first.txt xs-after-pci-assignable-remove-first.txt is
empty, as expected since this is a pciback only thing now involving any guest.


And now the last diff, between the state before and after the "xl pci-detach
02:00.0" removing the last pci device from the guest.

root@creanuc:~# diff -U5 xs-after-pci-assignable-remove-first.txt xs-after-pci-detach-second.txt
--- xs-after-pci-assignable-remove-first.txt 2014-07-14 20:00:45.161576000 +0200
+++ xs-after-pci-detach-second.txt 2014-07-14 20:01:20.177576000 +0200
@@ -184,27 +184,10 @@
feature-rx-copy = "1"
feature-rx-flip = "0"
feature-split-event-channels = "1"
multi-queue-max-queues = "4"
hotplug-status = "connected"
- pci = ""
- 1 = ""
- 0 = ""
- frontend = "/local/domain/1/device/pci/0"
- frontend-id = "1"
- online = "1"
- state = "7"
- domain = "router"
- key-0 = "0000:02:00.0"
- dev-0 = "0000:02:00.0"
- vdevfn-0 = "28"
- opts-0 = "msitranslate=0,power_mgmt=0,permissive=0"
- state-0 = "3"
- num_devs = "1"
- vdev-0 = "0000:00:00.00"
- root-0 = "0000:00"
- root_num = "1"
1 = ""
vm = "/vm/a102d97e-8388-414e-97e5-cc394656c47d"
name = "router"
cpu = ""
0 = ""
@@ -249,15 +232,10 @@
event-channel-rx = "25"
request-rx-copy = "1"
feature-rx-notify = "1"
feature-sg = "1"
feature-gso-tcpv4 = "1"
- pci = ""
- 0 = ""
- backend = "/local/domain/0/backend/pci/1/0"
- backend-id = "0"
- state = "1"
control = ""
shutdown = ""
platform-feature-multiprocessor-suspend = "1"
platform-feature-xs_reset_watches = "1"
hvmloader = ""

That seems ok.

So apart from that perhaps incorrect general pci state change, the info in
xenstore seems to be OK.

So it does seem to be a lack of pciback receiving a confirm from libxl / qemu that the
device is indeed removed from the guest for the final deregistring of the
ownership.

Because after a pci-detach the pci device is:
- removed from xen-store as attached to the guest
- the iommu has assigned/mapped it back to dom0
- xl pci-assignable-list shows it as assignable again

So only pciback is the only one left out of the party :-)
Couldn't it be somehow tied to the iommu events, since it should be in sync with that i suppose ?

--
Sander

BTW, i don't see the "do_flr" anywhere in /sys/*/pciback/ ?

2014-07-14 18:46:18

by Konrad Rzeszutek Wilk

[permalink] [raw]
Subject: Re: [Xen-devel] [PATCH v4] PCI back fixes for 3.17.

On Mon, Jul 14, 2014 at 08:24:33PM +0200, Sander Eikelenboom wrote:
>
> Monday, July 14, 2014, 7:45:25 PM, you wrote:
>
> > On Mon, Jul 14, 2014 at 07:43:04PM +0200, Sander Eikelenboom wrote:
> >>
> >> Monday, July 14, 2014, 7:37:53 PM, you wrote:
> >>
> >> >> >> Ad B)
> >> >> >>
> >> >> >> root@dom0:~# xl pci-list router
> >> >> >> Vdev Device
> >> >> >> 05.0 0000:00:1b.0
> >> >> >>
> >> >> >> root@dom0:~# xl pci-assignable-list
> >> >> >> 0000:02:00.0
> >> >> >>
> >> >> >> root@dom0:~# xl pci-detach router 00:1b.0
> >> >> >> dmesg shows:
> >> >> >> [ 199.742668] pciback 0000:00:1b.0: restoring config space at offset 0x10 (was 0x4, writing 0xf7d30004)
> >> >> >> [ 199.743527] pciback 0000:00:1b.0: restoring config space at offset 0xc (was 0x0, writing 0x10)
> >> >> >> [ 199.744321] pciback 0000:00:1b.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100006)
> >> >> >> [ 199.757184] xen-pciback pci-1-0: xen_pcibk_xenbus_remove freeing pdev @ 0xffff8800589fce40
> >> >> >> [ 199.758139] xen-pciback pci-1-0: xen_pcibk_disconnect pdev @ 0xffff8800589fce40
> >> >> >> [ 199.862595] xen: xen_unregister_device_domain_owner
> >> >> >>
> >> >> >> xl dmesg shows:
> >> >> >> (XEN) [2014-07-14 16:28:29] memory_map:remove: dom1 gfn=f3070 mfn=f7d30 nr=4
> >> >> >> (XEN) [2014-07-14 16:28:29] io.c:322: d1: unbind: m_gsi=22 g_gsi=36 dev=00:00.5 intx=0
> >> >> >> (XEN) [2014-07-14 16:28:29] io.c:390: d1 unmap: m_irq=22 dev=00:00.5 intx=0
> >> >> >> (XEN) [2014-07-14 16:28:29] [VT-D]iommu.c:1579: d1:PCIe: unmap 0000:00:1b.0
> >> >> >> (XEN) [2014-07-14 16:28:29] [VT-D]iommu.c:1440: d0:PCIe: map 0000:00:1b.0
> >> >> >>
> >> >> >> root@dom0:~# xl pci-list router
> >> >> >> root@dom0:~# xl pci-assignable-list
> >> >> >> 0000:00:1b.0
> >> >> >> 0000:02:00.0
> >> >> >>
> >> >> >> root@dom0:~# xl pci-assignable-remove 00:1b.0
> >> >> >> dmesg shows:
> >> >> >> [ 318.827415] xen: xen_unregister_device_domain_owner
> >> >> >> [ 318.828771] xen: xen_unregister_device_domain_owner: ENODEV
> >> >> >> [ 318.930869] pciback 0000:00:1b.0: restoring config space at offset 0x10 (was 0x4, writing 0xf7d30004)
> >> >> >> [ 318.933435] pciback 0000:00:1b.0: restoring config space at offset 0xc (was 0x0, writing 0x10)
> >> >> >> [ 318.935877] pciback 0000:00:1b.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100006)
> >> >> >>
> >> >> >> root@dom0:~# xl pci-list router
> >> >> >> root@dom0:~# xl pci-assignable-list
> >> >> >> 0000:02:00.0
> >> >> >>
> >> >> >>
> >> >>
> >> >> > And if you do:
> >> >>
> >> >> > # xl pci-detach router 02:00.0
> >> >>
> >>
> >> > Err, I meant
> >> > # xl pci-assignable-remove 02:00.0
> >>
> >> Ah ok .. so that is:
> >> remove a device from pciback that has never been assigned to any guest ..
> >>
> >> will also give that a go .. although that probably won't be a problem.
>
> > Right. So I think it all works as expected? That is if you have
> > two PCI devices assigned to a guest and want to re-use them you
> > have to do the 'pci-detach' twice and then follow that with
> > 'pci-assignable-remove' twice as well?
>
> Well except that's not what i want :-) .. that's about the same as shutting down
> the guest .. and indeed that works :-)
>
> What i want to do is to remove just *one* device and leave the other one in the guest.
>
> The one that i remove .. i would like to be able to remove it from being
> assignable .. (rebind it in dom0) and/or assign it to another guest.

Aha
>
> And that fails because pciback still thinks the guest owns the device ..
> while libxl thinks it doesn't (outcome of xl pci-assignable-list).

Oh
>
> When doing the:
>
> # xl pci-assignable-list
> 0000:02:00.0
> # xl pci-assignable-remove 02:00.0
> dmesg shows:
> [ 443.292951] xen: xen_unregister_device_domain_owner
> [ 443.294308] xen: xen_unregister_device_domain_owner: ENODEV
> [ 443.398827] pciback 0000:02:00.0: restoring config space at offset 0x3c (was 0x100, writing 0x104)
> [ 443.400797] pciback 0000:02:00.0: restoring config space at offset 0x10 (was 0x4, writing 0xf7c00004)
> [ 443.401713] pciback 0000:02:00.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100006)
>
> which i think is what is expected ..

Right.

>
>
> Hmm /sys/bus/pci/drivers/pciback doesn't shed much light, because in the state
> just after a xl pci-detach, the device is still registred by pciback (from dom0's
> view) which is correct, what is wrong is that pciback still seems to think that
> it is in turn still owned by the guest. That doesn't get reflected in the info
> available in the /sys dir.

Oh
>
> Ok over to xen-store ...

I think I had a patch for that in libxl. It was the order of operations it was
doing - it did the 'unbind' first and then it did the XenBus operations.

>
> first a diff between the booted guest with two devices passed through and the
> state after the "xl pci-detach router 00:1b.0"
>
> root@creanuc:~# diff -U10 xs-before-detach-first.txt xs-after-detach-first.txt
> --- xs-before-detach-first.txt 2014-07-14 19:56:23.233576000 +0200
> +++ xs-after-detach-first.txt 2014-07-14 20:00:00.053576000 +0200
> @@ -185,35 +185,29 @@
> feature-rx-flip = "0"
> feature-split-event-channels = "1"
> multi-queue-max-queues = "4"
> hotplug-status = "connected"
> pci = ""
> 1 = ""
> 0 = ""
> frontend = "/local/domain/1/device/pci/0"
> frontend-id = "1"
> online = "1"
> - state = "3"
> + state = "7"
> domain = "router"
> key-0 = "0000:02:00.0"
> dev-0 = "0000:02:00.0"
> vdevfn-0 = "28"
> opts-0 = "msitranslate=0,power_mgmt=0,permissive=0"
> state-0 = "3"
> - key-1 = "0000:00:1b.0"
> - dev-1 = "0000:00:1b.0"
> - vdevfn-1 = "30"
> - opts-1 = "msitranslate=0,power_mgmt=0,permissive=0"
> - state-1 = "3"
> - num_devs = "2"
> + num_devs = "1"
> vdev-0 = "0000:00:00.00"
> - vdev-1 = "0000:00:01.00"
> root-0 = "0000:00"
> root_num = "1"
> 1 = ""
> vm = "/vm/a102d97e-8388-414e-97e5-cc394656c47d"
> name = "router"
> cpu = ""
> 0 = ""
> availability = "online"
> 1 = ""
> availability = "online"
>
> It seems ok, perhaps apart from the general pci state going from 3 to 7 ?
> Haven't looked up the exact states, but since only one domain is removes i would
> still expect it to be 3 ?

7 is Reconfiguring which is what it should move to (to update the XenBus).
>
>
> Now a diff before and after the "xl pci-assignable-remove 00:1b.0"
> diff xs-after-detach-first.txt xs-after-pci-assignable-remove-first.txt is
> empty, as expected since this is a pciback only thing now involving any guest.
>
>
> And now the last diff, between the state before and after the "xl pci-detach
> 02:00.0" removing the last pci device from the guest.
>
> root@creanuc:~# diff -U5 xs-after-pci-assignable-remove-first.txt xs-after-pci-detach-second.txt
> --- xs-after-pci-assignable-remove-first.txt 2014-07-14 20:00:45.161576000 +0200
> +++ xs-after-pci-detach-second.txt 2014-07-14 20:01:20.177576000 +0200
> @@ -184,27 +184,10 @@
> feature-rx-copy = "1"
> feature-rx-flip = "0"
> feature-split-event-channels = "1"
> multi-queue-max-queues = "4"
> hotplug-status = "connected"
> - pci = ""
> - 1 = ""
> - 0 = ""
> - frontend = "/local/domain/1/device/pci/0"
> - frontend-id = "1"
> - online = "1"
> - state = "7"
> - domain = "router"
> - key-0 = "0000:02:00.0"
> - dev-0 = "0000:02:00.0"
> - vdevfn-0 = "28"
> - opts-0 = "msitranslate=0,power_mgmt=0,permissive=0"
> - state-0 = "3"
> - num_devs = "1"
> - vdev-0 = "0000:00:00.00"
> - root-0 = "0000:00"
> - root_num = "1"
> 1 = ""
> vm = "/vm/a102d97e-8388-414e-97e5-cc394656c47d"
> name = "router"
> cpu = ""
> 0 = ""
> @@ -249,15 +232,10 @@
> event-channel-rx = "25"
> request-rx-copy = "1"
> feature-rx-notify = "1"
> feature-sg = "1"
> feature-gso-tcpv4 = "1"
> - pci = ""
> - 0 = ""
> - backend = "/local/domain/0/backend/pci/1/0"
> - backend-id = "0"
> - state = "1"
> control = ""
> shutdown = ""
> platform-feature-multiprocessor-suspend = "1"
> platform-feature-xs_reset_watches = "1"
> hvmloader = ""
>
> That seems ok.
>
> So apart from that perhaps incorrect general pci state change, the info in
> xenstore seems to be OK.
>
> So it does seem to be a lack of pciback receiving a confirm from libxl / qemu that the
> device is indeed removed from the guest for the final deregistring of the
> ownership.
>
> Because after a pci-detach the pci device is:
> - removed from xen-store as attached to the guest
> - the iommu has assigned/mapped it back to dom0
> - xl pci-assignable-list shows it as assignable again
>
> So only pciback is the only one left out of the party :-)
> Couldn't it be somehow tied to the iommu events, since it should be in sync with that i suppose ?


Let me find out what is up with this. Once the device
is de-assigned from a guest (even if forcefully) it should
be possible to assign it to another.

>
> --
> Sander
>
> BTW, i don't see the "do_flr" anywhere in /sys/*/pciback/ ?

We been having a lengthy discussion about this and David is out
on vacation so will have to wait until he is back.
>


Attachments:
(No filename) (9.33 kB)
libxl_pci.patch (1.25 kB)
Download all attachments

2014-07-14 19:01:41

by Sander Eikelenboom

[permalink] [raw]
Subject: Re: [Xen-devel] [PATCH v4] PCI back fixes for 3.17.


Monday, July 14, 2014, 8:45:47 PM, you wrote:

> On Mon, Jul 14, 2014 at 08:24:33PM +0200, Sander Eikelenboom wrote:
>>
>> Monday, July 14, 2014, 7:45:25 PM, you wrote:
>>
>> > On Mon, Jul 14, 2014 at 07:43:04PM +0200, Sander Eikelenboom wrote:
>> >>
>> >> Monday, July 14, 2014, 7:37:53 PM, you wrote:
>> >>
>> >> >> >> Ad B)
>> >> >> >>
>> >> >> >> root@dom0:~# xl pci-list router
>> >> >> >> Vdev Device
>> >> >> >> 05.0 0000:00:1b.0
>> >> >> >>
>> >> >> >> root@dom0:~# xl pci-assignable-list
>> >> >> >> 0000:02:00.0
>> >> >> >>
>> >> >> >> root@dom0:~# xl pci-detach router 00:1b.0
>> >> >> >> dmesg shows:
>> >> >> >> [ 199.742668] pciback 0000:00:1b.0: restoring config space at offset 0x10 (was 0x4, writing 0xf7d30004)
>> >> >> >> [ 199.743527] pciback 0000:00:1b.0: restoring config space at offset 0xc (was 0x0, writing 0x10)
>> >> >> >> [ 199.744321] pciback 0000:00:1b.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100006)
>> >> >> >> [ 199.757184] xen-pciback pci-1-0: xen_pcibk_xenbus_remove freeing pdev @ 0xffff8800589fce40
>> >> >> >> [ 199.758139] xen-pciback pci-1-0: xen_pcibk_disconnect pdev @ 0xffff8800589fce40
>> >> >> >> [ 199.862595] xen: xen_unregister_device_domain_owner
>> >> >> >>
>> >> >> >> xl dmesg shows:
>> >> >> >> (XEN) [2014-07-14 16:28:29] memory_map:remove: dom1 gfn=f3070 mfn=f7d30 nr=4
>> >> >> >> (XEN) [2014-07-14 16:28:29] io.c:322: d1: unbind: m_gsi=22 g_gsi=36 dev=00:00.5 intx=0
>> >> >> >> (XEN) [2014-07-14 16:28:29] io.c:390: d1 unmap: m_irq=22 dev=00:00.5 intx=0
>> >> >> >> (XEN) [2014-07-14 16:28:29] [VT-D]iommu.c:1579: d1:PCIe: unmap 0000:00:1b.0
>> >> >> >> (XEN) [2014-07-14 16:28:29] [VT-D]iommu.c:1440: d0:PCIe: map 0000:00:1b.0
>> >> >> >>
>> >> >> >> root@dom0:~# xl pci-list router
>> >> >> >> root@dom0:~# xl pci-assignable-list
>> >> >> >> 0000:00:1b.0
>> >> >> >> 0000:02:00.0
>> >> >> >>
>> >> >> >> root@dom0:~# xl pci-assignable-remove 00:1b.0
>> >> >> >> dmesg shows:
>> >> >> >> [ 318.827415] xen: xen_unregister_device_domain_owner
>> >> >> >> [ 318.828771] xen: xen_unregister_device_domain_owner: ENODEV
>> >> >> >> [ 318.930869] pciback 0000:00:1b.0: restoring config space at offset 0x10 (was 0x4, writing 0xf7d30004)
>> >> >> >> [ 318.933435] pciback 0000:00:1b.0: restoring config space at offset 0xc (was 0x0, writing 0x10)
>> >> >> >> [ 318.935877] pciback 0000:00:1b.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100006)
>> >> >> >>
>> >> >> >> root@dom0:~# xl pci-list router
>> >> >> >> root@dom0:~# xl pci-assignable-list
>> >> >> >> 0000:02:00.0
>> >> >> >>
>> >> >> >>
>> >> >>
>> >> >> > And if you do:
>> >> >>
>> >> >> > # xl pci-detach router 02:00.0
>> >> >>
>> >>
>> >> > Err, I meant
>> >> > # xl pci-assignable-remove 02:00.0
>> >>
>> >> Ah ok .. so that is:
>> >> remove a device from pciback that has never been assigned to any guest ..
>> >>
>> >> will also give that a go .. although that probably won't be a problem.
>>
>> > Right. So I think it all works as expected? That is if you have
>> > two PCI devices assigned to a guest and want to re-use them you
>> > have to do the 'pci-detach' twice and then follow that with
>> > 'pci-assignable-remove' twice as well?
>>
>> Well except that's not what i want :-) .. that's about the same as shutting down
>> the guest .. and indeed that works :-)
>>
>> What i want to do is to remove just *one* device and leave the other one in the guest.
>>
>> The one that i remove .. i would like to be able to remove it from being
>> assignable .. (rebind it in dom0) and/or assign it to another guest.

> Aha
>>
>> And that fails because pciback still thinks the guest owns the device ..
>> while libxl thinks it doesn't (outcome of xl pci-assignable-list).

> Oh
>>
>> When doing the:
>>
>> # xl pci-assignable-list
>> 0000:02:00.0
>> # xl pci-assignable-remove 02:00.0
>> dmesg shows:
>> [ 443.292951] xen: xen_unregister_device_domain_owner
>> [ 443.294308] xen: xen_unregister_device_domain_owner: ENODEV
>> [ 443.398827] pciback 0000:02:00.0: restoring config space at offset 0x3c (was 0x100, writing 0x104)
>> [ 443.400797] pciback 0000:02:00.0: restoring config space at offset 0x10 (was 0x4, writing 0xf7c00004)
>> [ 443.401713] pciback 0000:02:00.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100006)
>>
>> which i think is what is expected ..

> Right.

>>
>>
>> Hmm /sys/bus/pci/drivers/pciback doesn't shed much light, because in the state
>> just after a xl pci-detach, the device is still registred by pciback (from dom0's
>> view) which is correct, what is wrong is that pciback still seems to think that
>> it is in turn still owned by the guest. That doesn't get reflected in the info
>> available in the /sys dir.

> Oh
>>
>> Ok over to xen-store ...

> I think I had a patch for that in libxl. It was the order of operations it was
> doing - it did the 'unbind' first and then it did the XenBus operations.

>>
>> first a diff between the booted guest with two devices passed through and the
>> state after the "xl pci-detach router 00:1b.0"
>>
>> root@creanuc:~# diff -U10 xs-before-detach-first.txt xs-after-detach-first.txt
>> --- xs-before-detach-first.txt 2014-07-14 19:56:23.233576000 +0200
>> +++ xs-after-detach-first.txt 2014-07-14 20:00:00.053576000 +0200
>> @@ -185,35 +185,29 @@
>> feature-rx-flip = "0"
>> feature-split-event-channels = "1"
>> multi-queue-max-queues = "4"
>> hotplug-status = "connected"
>> pci = ""
>> 1 = ""
>> 0 = ""
>> frontend = "/local/domain/1/device/pci/0"
>> frontend-id = "1"
>> online = "1"
>> - state = "3"
>> + state = "7"
>> domain = "router"
>> key-0 = "0000:02:00.0"
>> dev-0 = "0000:02:00.0"
>> vdevfn-0 = "28"
>> opts-0 = "msitranslate=0,power_mgmt=0,permissive=0"
>> state-0 = "3"
>> - key-1 = "0000:00:1b.0"
>> - dev-1 = "0000:00:1b.0"
>> - vdevfn-1 = "30"
>> - opts-1 = "msitranslate=0,power_mgmt=0,permissive=0"
>> - state-1 = "3"
>> - num_devs = "2"
>> + num_devs = "1"
>> vdev-0 = "0000:00:00.00"
>> - vdev-1 = "0000:00:01.00"
>> root-0 = "0000:00"
>> root_num = "1"
>> 1 = ""
>> vm = "/vm/a102d97e-8388-414e-97e5-cc394656c47d"
>> name = "router"
>> cpu = ""
>> 0 = ""
>> availability = "online"
>> 1 = ""
>> availability = "online"
>>
>> It seems ok, perhaps apart from the general pci state going from 3 to 7 ?
>> Haven't looked up the exact states, but since only one domain is removes i would
>> still expect it to be 3 ?

> 7 is Reconfiguring which is what it should move to (to update the XenBus).

Ok, but that should than be acknowledged somewhere by someone i suppose .. and
not stay in that state ?

>>
>>
>> Now a diff before and after the "xl pci-assignable-remove 00:1b.0"
>> diff xs-after-detach-first.txt xs-after-pci-assignable-remove-first.txt is
>> empty, as expected since this is a pciback only thing now involving any guest.
>>
>>
>> And now the last diff, between the state before and after the "xl pci-detach
>> 02:00.0" removing the last pci device from the guest.
>>
>> root@creanuc:~# diff -U5 xs-after-pci-assignable-remove-first.txt xs-after-pci-detach-second.txt
>> --- xs-after-pci-assignable-remove-first.txt 2014-07-14 20:00:45.161576000 +0200
>> +++ xs-after-pci-detach-second.txt 2014-07-14 20:01:20.177576000 +0200
>> @@ -184,27 +184,10 @@
>> feature-rx-copy = "1"
>> feature-rx-flip = "0"
>> feature-split-event-channels = "1"
>> multi-queue-max-queues = "4"
>> hotplug-status = "connected"
>> - pci = ""
>> - 1 = ""
>> - 0 = ""
>> - frontend = "/local/domain/1/device/pci/0"
>> - frontend-id = "1"
>> - online = "1"
>> - state = "7"
>> - domain = "router"
>> - key-0 = "0000:02:00.0"
>> - dev-0 = "0000:02:00.0"
>> - vdevfn-0 = "28"
>> - opts-0 = "msitranslate=0,power_mgmt=0,permissive=0"
>> - state-0 = "3"
>> - num_devs = "1"
>> - vdev-0 = "0000:00:00.00"
>> - root-0 = "0000:00"
>> - root_num = "1"
>> 1 = ""
>> vm = "/vm/a102d97e-8388-414e-97e5-cc394656c47d"
>> name = "router"
>> cpu = ""
>> 0 = ""
>> @@ -249,15 +232,10 @@
>> event-channel-rx = "25"
>> request-rx-copy = "1"
>> feature-rx-notify = "1"
>> feature-sg = "1"
>> feature-gso-tcpv4 = "1"
>> - pci = ""
>> - 0 = ""
>> - backend = "/local/domain/0/backend/pci/1/0"
>> - backend-id = "0"
>> - state = "1"
>> control = ""
>> shutdown = ""
>> platform-feature-multiprocessor-suspend = "1"
>> platform-feature-xs_reset_watches = "1"
>> hvmloader = ""
>>
>> That seems ok.
>>
>> So apart from that perhaps incorrect general pci state change, the info in
>> xenstore seems to be OK.
>>
>> So it does seem to be a lack of pciback receiving a confirm from libxl / qemu that the
>> device is indeed removed from the guest for the final deregistring of the
>> ownership.
>>
>> Because after a pci-detach the pci device is:
>> - removed from xen-store as attached to the guest
>> - the iommu has assigned/mapped it back to dom0
>> - xl pci-assignable-list shows it as assignable again
>>
>> So only pciback is the only one left out of the party :-)
>> Couldn't it be somehow tied to the iommu events, since it should be in sync with that i suppose ?


> Let me find out what is up with this. Once the device
> is de-assigned from a guest (even if forcefully) it should
> be possible to assign it to another.

Yes .. at least for the rebinding to dom0 case, because the
pci-assignable-remove seems to do it forcefully now. But i haven't tried what it
does when reassigning to another guest (if it forcefully swaps ownership then,
since the pci-assignable-remove isn't involved then).

Will give the xen patch a try.

>>
>> --
>> Sander
>>
>> BTW, i don't see the "do_flr" anywhere in /sys/*/pciback/ ?

> We been having a lengthy discussion about this and David is out
> on vacation so will have to wait until he is back.

>>

2014-07-14 19:50:58

by Sander Eikelenboom

[permalink] [raw]
Subject: Re: [Xen-devel] [PATCH v4] PCI back fixes for 3.17.


Monday, July 14, 2014, 9:01:29 PM, you wrote:


> Monday, July 14, 2014, 8:45:47 PM, you wrote:

>> On Mon, Jul 14, 2014 at 08:24:33PM +0200, Sander Eikelenboom wrote:
>>>
>>> Monday, July 14, 2014, 7:45:25 PM, you wrote:
>>>
>>> > On Mon, Jul 14, 2014 at 07:43:04PM +0200, Sander Eikelenboom wrote:
>>> >>
>>> >> Monday, July 14, 2014, 7:37:53 PM, you wrote:
>>> >>
>>> >> >> >> Ad B)
>>> >> >> >>
>>> >> >> >> root@dom0:~# xl pci-list router
>>> >> >> >> Vdev Device
>>> >> >> >> 05.0 0000:00:1b.0
>>> >> >> >>
>>> >> >> >> root@dom0:~# xl pci-assignable-list
>>> >> >> >> 0000:02:00.0
>>> >> >> >>
>>> >> >> >> root@dom0:~# xl pci-detach router 00:1b.0
>>> >> >> >> dmesg shows:
>>> >> >> >> [ 199.742668] pciback 0000:00:1b.0: restoring config space at offset 0x10 (was 0x4, writing 0xf7d30004)
>>> >> >> >> [ 199.743527] pciback 0000:00:1b.0: restoring config space at offset 0xc (was 0x0, writing 0x10)
>>> >> >> >> [ 199.744321] pciback 0000:00:1b.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100006)
>>> >> >> >> [ 199.757184] xen-pciback pci-1-0: xen_pcibk_xenbus_remove freeing pdev @ 0xffff8800589fce40
>>> >> >> >> [ 199.758139] xen-pciback pci-1-0: xen_pcibk_disconnect pdev @ 0xffff8800589fce40
>>> >> >> >> [ 199.862595] xen: xen_unregister_device_domain_owner
>>> >> >> >>
>>> >> >> >> xl dmesg shows:
>>> >> >> >> (XEN) [2014-07-14 16:28:29] memory_map:remove: dom1 gfn=f3070 mfn=f7d30 nr=4
>>> >> >> >> (XEN) [2014-07-14 16:28:29] io.c:322: d1: unbind: m_gsi=22 g_gsi=36 dev=00:00.5 intx=0
>>> >> >> >> (XEN) [2014-07-14 16:28:29] io.c:390: d1 unmap: m_irq=22 dev=00:00.5 intx=0
>>> >> >> >> (XEN) [2014-07-14 16:28:29] [VT-D]iommu.c:1579: d1:PCIe: unmap 0000:00:1b.0
>>> >> >> >> (XEN) [2014-07-14 16:28:29] [VT-D]iommu.c:1440: d0:PCIe: map 0000:00:1b.0
>>> >> >> >>
>>> >> >> >> root@dom0:~# xl pci-list router
>>> >> >> >> root@dom0:~# xl pci-assignable-list
>>> >> >> >> 0000:00:1b.0
>>> >> >> >> 0000:02:00.0
>>> >> >> >>
>>> >> >> >> root@dom0:~# xl pci-assignable-remove 00:1b.0
>>> >> >> >> dmesg shows:
>>> >> >> >> [ 318.827415] xen: xen_unregister_device_domain_owner
>>> >> >> >> [ 318.828771] xen: xen_unregister_device_domain_owner: ENODEV
>>> >> >> >> [ 318.930869] pciback 0000:00:1b.0: restoring config space at offset 0x10 (was 0x4, writing 0xf7d30004)
>>> >> >> >> [ 318.933435] pciback 0000:00:1b.0: restoring config space at offset 0xc (was 0x0, writing 0x10)
>>> >> >> >> [ 318.935877] pciback 0000:00:1b.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100006)
>>> >> >> >>
>>> >> >> >> root@dom0:~# xl pci-list router
>>> >> >> >> root@dom0:~# xl pci-assignable-list
>>> >> >> >> 0000:02:00.0
>>> >> >> >>
>>> >> >> >>
>>> >> >>
>>> >> >> > And if you do:
>>> >> >>
>>> >> >> > # xl pci-detach router 02:00.0
>>> >> >>
>>> >>
>>> >> > Err, I meant
>>> >> > # xl pci-assignable-remove 02:00.0
>>> >>
>>> >> Ah ok .. so that is:
>>> >> remove a device from pciback that has never been assigned to any guest ..
>>> >>
>>> >> will also give that a go .. although that probably won't be a problem.
>>>
>>> > Right. So I think it all works as expected? That is if you have
>>> > two PCI devices assigned to a guest and want to re-use them you
>>> > have to do the 'pci-detach' twice and then follow that with
>>> > 'pci-assignable-remove' twice as well?
>>>
>>> Well except that's not what i want :-) .. that's about the same as shutting down
>>> the guest .. and indeed that works :-)
>>>
>>> What i want to do is to remove just *one* device and leave the other one in the guest.
>>>
>>> The one that i remove .. i would like to be able to remove it from being
>>> assignable .. (rebind it in dom0) and/or assign it to another guest.

>> Aha
>>>
>>> And that fails because pciback still thinks the guest owns the device ..
>>> while libxl thinks it doesn't (outcome of xl pci-assignable-list).

>> Oh
>>>
>>> When doing the:
>>>
>>> # xl pci-assignable-list
>>> 0000:02:00.0
>>> # xl pci-assignable-remove 02:00.0
>>> dmesg shows:
>>> [ 443.292951] xen: xen_unregister_device_domain_owner
>>> [ 443.294308] xen: xen_unregister_device_domain_owner: ENODEV
>>> [ 443.398827] pciback 0000:02:00.0: restoring config space at offset 0x3c (was 0x100, writing 0x104)
>>> [ 443.400797] pciback 0000:02:00.0: restoring config space at offset 0x10 (was 0x4, writing 0xf7c00004)
>>> [ 443.401713] pciback 0000:02:00.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100006)
>>>
>>> which i think is what is expected ..

>> Right.

>>>
>>>
>>> Hmm /sys/bus/pci/drivers/pciback doesn't shed much light, because in the state
>>> just after a xl pci-detach, the device is still registred by pciback (from dom0's
>>> view) which is correct, what is wrong is that pciback still seems to think that
>>> it is in turn still owned by the guest. That doesn't get reflected in the info
>>> available in the /sys dir.

>> Oh
>>>
>>> Ok over to xen-store ...

>> I think I had a patch for that in libxl. It was the order of operations it was
>> doing - it did the 'unbind' first and then it did the XenBus operations.

>>>
>>> first a diff between the booted guest with two devices passed through and the
>>> state after the "xl pci-detach router 00:1b.0"
>>>
>>> root@creanuc:~# diff -U10 xs-before-detach-first.txt xs-after-detach-first.txt
>>> --- xs-before-detach-first.txt 2014-07-14 19:56:23.233576000 +0200
>>> +++ xs-after-detach-first.txt 2014-07-14 20:00:00.053576000 +0200
>>> @@ -185,35 +185,29 @@
>>> feature-rx-flip = "0"
>>> feature-split-event-channels = "1"
>>> multi-queue-max-queues = "4"
>>> hotplug-status = "connected"
>>> pci = ""
>>> 1 = ""
>>> 0 = ""
>>> frontend = "/local/domain/1/device/pci/0"
>>> frontend-id = "1"
>>> online = "1"
>>> - state = "3"
>>> + state = "7"
>>> domain = "router"
>>> key-0 = "0000:02:00.0"
>>> dev-0 = "0000:02:00.0"
>>> vdevfn-0 = "28"
>>> opts-0 = "msitranslate=0,power_mgmt=0,permissive=0"
>>> state-0 = "3"
>>> - key-1 = "0000:00:1b.0"
>>> - dev-1 = "0000:00:1b.0"
>>> - vdevfn-1 = "30"
>>> - opts-1 = "msitranslate=0,power_mgmt=0,permissive=0"
>>> - state-1 = "3"
>>> - num_devs = "2"
>>> + num_devs = "1"
>>> vdev-0 = "0000:00:00.00"
>>> - vdev-1 = "0000:00:01.00"
>>> root-0 = "0000:00"
>>> root_num = "1"
>>> 1 = ""
>>> vm = "/vm/a102d97e-8388-414e-97e5-cc394656c47d"
>>> name = "router"
>>> cpu = ""
>>> 0 = ""
>>> availability = "online"
>>> 1 = ""
>>> availability = "online"
>>>
>>> It seems ok, perhaps apart from the general pci state going from 3 to 7 ?
>>> Haven't looked up the exact states, but since only one domain is removes i would
>>> still expect it to be 3 ?

>> 7 is Reconfiguring which is what it should move to (to update the XenBus).

> Ok, but that should than be acknowledged somewhere by someone i suppose .. and
> not stay in that state ?

>>>
>>>
>>> Now a diff before and after the "xl pci-assignable-remove 00:1b.0"
>>> diff xs-after-detach-first.txt xs-after-pci-assignable-remove-first.txt is
>>> empty, as expected since this is a pciback only thing now involving any guest.
>>>
>>>
>>> And now the last diff, between the state before and after the "xl pci-detach
>>> 02:00.0" removing the last pci device from the guest.
>>>
>>> root@creanuc:~# diff -U5 xs-after-pci-assignable-remove-first.txt xs-after-pci-detach-second.txt
>>> --- xs-after-pci-assignable-remove-first.txt 2014-07-14 20:00:45.161576000 +0200
>>> +++ xs-after-pci-detach-second.txt 2014-07-14 20:01:20.177576000 +0200
>>> @@ -184,27 +184,10 @@
>>> feature-rx-copy = "1"
>>> feature-rx-flip = "0"
>>> feature-split-event-channels = "1"
>>> multi-queue-max-queues = "4"
>>> hotplug-status = "connected"
>>> - pci = ""
>>> - 1 = ""
>>> - 0 = ""
>>> - frontend = "/local/domain/1/device/pci/0"
>>> - frontend-id = "1"
>>> - online = "1"
>>> - state = "7"
>>> - domain = "router"
>>> - key-0 = "0000:02:00.0"
>>> - dev-0 = "0000:02:00.0"
>>> - vdevfn-0 = "28"
>>> - opts-0 = "msitranslate=0,power_mgmt=0,permissive=0"
>>> - state-0 = "3"
>>> - num_devs = "1"
>>> - vdev-0 = "0000:00:00.00"
>>> - root-0 = "0000:00"
>>> - root_num = "1"
>>> 1 = ""
>>> vm = "/vm/a102d97e-8388-414e-97e5-cc394656c47d"
>>> name = "router"
>>> cpu = ""
>>> 0 = ""
>>> @@ -249,15 +232,10 @@
>>> event-channel-rx = "25"
>>> request-rx-copy = "1"
>>> feature-rx-notify = "1"
>>> feature-sg = "1"
>>> feature-gso-tcpv4 = "1"
>>> - pci = ""
>>> - 0 = ""
>>> - backend = "/local/domain/0/backend/pci/1/0"
>>> - backend-id = "0"
>>> - state = "1"
>>> control = ""
>>> shutdown = ""
>>> platform-feature-multiprocessor-suspend = "1"
>>> platform-feature-xs_reset_watches = "1"
>>> hvmloader = ""
>>>
>>> That seems ok.
>>>
>>> So apart from that perhaps incorrect general pci state change, the info in
>>> xenstore seems to be OK.
>>>
>>> So it does seem to be a lack of pciback receiving a confirm from libxl / qemu that the
>>> device is indeed removed from the guest for the final deregistring of the
>>> ownership.
>>>
>>> Because after a pci-detach the pci device is:
>>> - removed from xen-store as attached to the guest
>>> - the iommu has assigned/mapped it back to dom0
>>> - xl pci-assignable-list shows it as assignable again
>>>
>>> So only pciback is the only one left out of the party :-)
>>> Couldn't it be somehow tied to the iommu events, since it should be in sync with that i suppose ?


>> Let me find out what is up with this. Once the device
>> is de-assigned from a guest (even if forcefully) it should
>> be possible to assign it to another.

> Yes .. at least for the rebinding to dom0 case, because the
> pci-assignable-remove seems to do it forcefully now. But i haven't tried what it
> does when reassigning to another guest (if it forcefully swaps ownership then,
> since the pci-assignable-remove isn't involved then).

> Will give the xen patch a try.

The patch doesn't make a difference.

>>>
>>> --
>>> Sander
>>>
>>> BTW, i don't see the "do_flr" anywhere in /sys/*/pciback/ ?

>> We been having a lengthy discussion about this and David is out
>> on vacation so will have to wait until he is back.

>>>



2014-07-14 19:54:19

by Konrad Rzeszutek Wilk

[permalink] [raw]
Subject: Re: [Xen-devel] [PATCH v4] PCI back fixes for 3.17.

On Mon, Jul 14, 2014 at 09:01:29PM +0200, Sander Eikelenboom wrote:
>
> Monday, July 14, 2014, 8:45:47 PM, you wrote:
>
> > On Mon, Jul 14, 2014 at 08:24:33PM +0200, Sander Eikelenboom wrote:
> >>
> >> Monday, July 14, 2014, 7:45:25 PM, you wrote:
> >>
> >> > On Mon, Jul 14, 2014 at 07:43:04PM +0200, Sander Eikelenboom wrote:
> >> >>
> >> >> Monday, July 14, 2014, 7:37:53 PM, you wrote:
> >> >>
> >> >> >> >> Ad B)
> >> >> >> >>
> >> >> >> >> root@dom0:~# xl pci-list router
> >> >> >> >> Vdev Device
> >> >> >> >> 05.0 0000:00:1b.0
> >> >> >> >>
> >> >> >> >> root@dom0:~# xl pci-assignable-list
> >> >> >> >> 0000:02:00.0
> >> >> >> >>
> >> >> >> >> root@dom0:~# xl pci-detach router 00:1b.0
> >> >> >> >> dmesg shows:
> >> >> >> >> [ 199.742668] pciback 0000:00:1b.0: restoring config space at offset 0x10 (was 0x4, writing 0xf7d30004)
> >> >> >> >> [ 199.743527] pciback 0000:00:1b.0: restoring config space at offset 0xc (was 0x0, writing 0x10)
> >> >> >> >> [ 199.744321] pciback 0000:00:1b.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100006)
> >> >> >> >> [ 199.757184] xen-pciback pci-1-0: xen_pcibk_xenbus_remove freeing pdev @ 0xffff8800589fce40
> >> >> >> >> [ 199.758139] xen-pciback pci-1-0: xen_pcibk_disconnect pdev @ 0xffff8800589fce40
> >> >> >> >> [ 199.862595] xen: xen_unregister_device_domain_owner
> >> >> >> >>
> >> >> >> >> xl dmesg shows:
> >> >> >> >> (XEN) [2014-07-14 16:28:29] memory_map:remove: dom1 gfn=f3070 mfn=f7d30 nr=4
> >> >> >> >> (XEN) [2014-07-14 16:28:29] io.c:322: d1: unbind: m_gsi=22 g_gsi=36 dev=00:00.5 intx=0
> >> >> >> >> (XEN) [2014-07-14 16:28:29] io.c:390: d1 unmap: m_irq=22 dev=00:00.5 intx=0
> >> >> >> >> (XEN) [2014-07-14 16:28:29] [VT-D]iommu.c:1579: d1:PCIe: unmap 0000:00:1b.0
> >> >> >> >> (XEN) [2014-07-14 16:28:29] [VT-D]iommu.c:1440: d0:PCIe: map 0000:00:1b.0
> >> >> >> >>
> >> >> >> >> root@dom0:~# xl pci-list router
> >> >> >> >> root@dom0:~# xl pci-assignable-list
> >> >> >> >> 0000:00:1b.0
> >> >> >> >> 0000:02:00.0
> >> >> >> >>
> >> >> >> >> root@dom0:~# xl pci-assignable-remove 00:1b.0
> >> >> >> >> dmesg shows:
> >> >> >> >> [ 318.827415] xen: xen_unregister_device_domain_owner
> >> >> >> >> [ 318.828771] xen: xen_unregister_device_domain_owner: ENODEV
> >> >> >> >> [ 318.930869] pciback 0000:00:1b.0: restoring config space at offset 0x10 (was 0x4, writing 0xf7d30004)
> >> >> >> >> [ 318.933435] pciback 0000:00:1b.0: restoring config space at offset 0xc (was 0x0, writing 0x10)
> >> >> >> >> [ 318.935877] pciback 0000:00:1b.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100006)
> >> >> >> >>
> >> >> >> >> root@dom0:~# xl pci-list router
> >> >> >> >> root@dom0:~# xl pci-assignable-list
> >> >> >> >> 0000:02:00.0
> >> >> >> >>
> >> >> >> >>
> >> >> >>
> >> >> >> > And if you do:
> >> >> >>
> >> >> >> > # xl pci-detach router 02:00.0
> >> >> >>
> >> >>
> >> >> > Err, I meant
> >> >> > # xl pci-assignable-remove 02:00.0
> >> >>
> >> >> Ah ok .. so that is:
> >> >> remove a device from pciback that has never been assigned to any guest ..
> >> >>
> >> >> will also give that a go .. although that probably won't be a problem.
> >>
> >> > Right. So I think it all works as expected? That is if you have
> >> > two PCI devices assigned to a guest and want to re-use them you
> >> > have to do the 'pci-detach' twice and then follow that with
> >> > 'pci-assignable-remove' twice as well?
> >>
> >> Well except that's not what i want :-) .. that's about the same as shutting down
> >> the guest .. and indeed that works :-)
> >>
> >> What i want to do is to remove just *one* device and leave the other one in the guest.
> >>
> >> The one that i remove .. i would like to be able to remove it from being
> >> assignable .. (rebind it in dom0) and/or assign it to another guest.
>
> > Aha
> >>
> >> And that fails because pciback still thinks the guest owns the device ..
> >> while libxl thinks it doesn't (outcome of xl pci-assignable-list).
>
> > Oh
> >>
> >> When doing the:
> >>
> >> # xl pci-assignable-list
> >> 0000:02:00.0
> >> # xl pci-assignable-remove 02:00.0
> >> dmesg shows:
> >> [ 443.292951] xen: xen_unregister_device_domain_owner
> >> [ 443.294308] xen: xen_unregister_device_domain_owner: ENODEV
> >> [ 443.398827] pciback 0000:02:00.0: restoring config space at offset 0x3c (was 0x100, writing 0x104)
> >> [ 443.400797] pciback 0000:02:00.0: restoring config space at offset 0x10 (was 0x4, writing 0xf7c00004)
> >> [ 443.401713] pciback 0000:02:00.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100006)
> >>
> >> which i think is what is expected ..
>
> > Right.
>
> >>
> >>
> >> Hmm /sys/bus/pci/drivers/pciback doesn't shed much light, because in the state
> >> just after a xl pci-detach, the device is still registred by pciback (from dom0's
> >> view) which is correct, what is wrong is that pciback still seems to think that
> >> it is in turn still owned by the guest. That doesn't get reflected in the info
> >> available in the /sys dir.
>
> > Oh
> >>
> >> Ok over to xen-store ...
>
> > I think I had a patch for that in libxl. It was the order of operations it was
> > doing - it did the 'unbind' first and then it did the XenBus operations.
>
> >>
> >> first a diff between the booted guest with two devices passed through and the
> >> state after the "xl pci-detach router 00:1b.0"
> >>
> >> root@creanuc:~# diff -U10 xs-before-detach-first.txt xs-after-detach-first.txt
> >> --- xs-before-detach-first.txt 2014-07-14 19:56:23.233576000 +0200
> >> +++ xs-after-detach-first.txt 2014-07-14 20:00:00.053576000 +0200
> >> @@ -185,35 +185,29 @@
> >> feature-rx-flip = "0"
> >> feature-split-event-channels = "1"
> >> multi-queue-max-queues = "4"
> >> hotplug-status = "connected"
> >> pci = ""
> >> 1 = ""
> >> 0 = ""
> >> frontend = "/local/domain/1/device/pci/0"
> >> frontend-id = "1"
> >> online = "1"
> >> - state = "3"
> >> + state = "7"
> >> domain = "router"
> >> key-0 = "0000:02:00.0"
> >> dev-0 = "0000:02:00.0"
> >> vdevfn-0 = "28"
> >> opts-0 = "msitranslate=0,power_mgmt=0,permissive=0"
> >> state-0 = "3"
> >> - key-1 = "0000:00:1b.0"
> >> - dev-1 = "0000:00:1b.0"
> >> - vdevfn-1 = "30"
> >> - opts-1 = "msitranslate=0,power_mgmt=0,permissive=0"
> >> - state-1 = "3"
> >> - num_devs = "2"
> >> + num_devs = "1"
> >> vdev-0 = "0000:00:00.00"
> >> - vdev-1 = "0000:00:01.00"
> >> root-0 = "0000:00"
> >> root_num = "1"
> >> 1 = ""
> >> vm = "/vm/a102d97e-8388-414e-97e5-cc394656c47d"
> >> name = "router"
> >> cpu = ""
> >> 0 = ""
> >> availability = "online"
> >> 1 = ""
> >> availability = "online"
> >>
> >> It seems ok, perhaps apart from the general pci state going from 3 to 7 ?
> >> Haven't looked up the exact states, but since only one domain is removes i would
> >> still expect it to be 3 ?
>
> > 7 is Reconfiguring which is what it should move to (to update the XenBus).
>
> Ok, but that should than be acknowledged somewhere by someone i suppose .. and
> not stay in that state ?
>
> >>
> >>
> >> Now a diff before and after the "xl pci-assignable-remove 00:1b.0"
> >> diff xs-after-detach-first.txt xs-after-pci-assignable-remove-first.txt is
> >> empty, as expected since this is a pciback only thing now involving any guest.
> >>
> >>
> >> And now the last diff, between the state before and after the "xl pci-detach
> >> 02:00.0" removing the last pci device from the guest.
> >>
> >> root@creanuc:~# diff -U5 xs-after-pci-assignable-remove-first.txt xs-after-pci-detach-second.txt
> >> --- xs-after-pci-assignable-remove-first.txt 2014-07-14 20:00:45.161576000 +0200
> >> +++ xs-after-pci-detach-second.txt 2014-07-14 20:01:20.177576000 +0200
> >> @@ -184,27 +184,10 @@
> >> feature-rx-copy = "1"
> >> feature-rx-flip = "0"
> >> feature-split-event-channels = "1"
> >> multi-queue-max-queues = "4"
> >> hotplug-status = "connected"
> >> - pci = ""
> >> - 1 = ""
> >> - 0 = ""
> >> - frontend = "/local/domain/1/device/pci/0"
> >> - frontend-id = "1"
> >> - online = "1"
> >> - state = "7"
> >> - domain = "router"
> >> - key-0 = "0000:02:00.0"
> >> - dev-0 = "0000:02:00.0"
> >> - vdevfn-0 = "28"
> >> - opts-0 = "msitranslate=0,power_mgmt=0,permissive=0"
> >> - state-0 = "3"
> >> - num_devs = "1"
> >> - vdev-0 = "0000:00:00.00"
> >> - root-0 = "0000:00"
> >> - root_num = "1"
> >> 1 = ""
> >> vm = "/vm/a102d97e-8388-414e-97e5-cc394656c47d"
> >> name = "router"
> >> cpu = ""
> >> 0 = ""
> >> @@ -249,15 +232,10 @@
> >> event-channel-rx = "25"
> >> request-rx-copy = "1"
> >> feature-rx-notify = "1"
> >> feature-sg = "1"
> >> feature-gso-tcpv4 = "1"
> >> - pci = ""
> >> - 0 = ""
> >> - backend = "/local/domain/0/backend/pci/1/0"
> >> - backend-id = "0"
> >> - state = "1"
> >> control = ""
> >> shutdown = ""
> >> platform-feature-multiprocessor-suspend = "1"
> >> platform-feature-xs_reset_watches = "1"
> >> hvmloader = ""
> >>
> >> That seems ok.
> >>
> >> So apart from that perhaps incorrect general pci state change, the info in
> >> xenstore seems to be OK.
> >>
> >> So it does seem to be a lack of pciback receiving a confirm from libxl / qemu that the
> >> device is indeed removed from the guest for the final deregistring of the
> >> ownership.
> >>
> >> Because after a pci-detach the pci device is:
> >> - removed from xen-store as attached to the guest
> >> - the iommu has assigned/mapped it back to dom0
> >> - xl pci-assignable-list shows it as assignable again
> >>
> >> So only pciback is the only one left out of the party :-)
> >> Couldn't it be somehow tied to the iommu events, since it should be in sync with that i suppose ?
>
>
> > Let me find out what is up with this. Once the device
> > is de-assigned from a guest (even if forcefully) it should
> > be possible to assign it to another.
>
> Yes .. at least for the rebinding to dom0 case, because the
> pci-assignable-remove seems to do it forcefully now. But i haven't tried what it
> does when reassigning to another guest (if it forcefully swaps ownership then,
> since the pci-assignable-remove isn't involved then).
>
> Will give the xen patch a try.

I found out what the problem is. It has been in the code for eons and
I think we never tested for it because the older toolstack (xend) would
do things differently.

The issue here is:

1) xl pci-detach <dom-id> <BDF>

'xl' removes the whole device from the XenStore, so from:
domain = "USB"

vdevfn-0 = "28"
opts-0 = "msitranslate=0,power_mgmt=0,permissive=0"
state-0 = "3"
vdev-0 = "0000:00:00.00"
root-0 = "0000:00"
key-0 = "0000:07:00.0"
dev-0 = "0000:07:00.0"

key-1 = "0000:04:00.0"
dev-1 = "0000:04:00.0"
vdevfn-1 = "30"
opts-1 = "msitranslate=0,power_mgmt=0,permissive=0"
state-1 = "3"
num_devs = "2"
vdev-1 = "0000:00:01.00"
root_num = "1"

it just yanks out the '*-1' values, so we have:

domain = "USB"

vdevfn-0 = "28"
opts-0 = "msitranslate=0,power_mgmt=0,permissive=0"
state-0 = "3"
vdev-0 = "0000:00:00.00"
root-0 = "0000:00"
key-0 = "0000:07:00.0"
dev-0 = "0000:07:00.0"

num_devs = "1"

While if the frontend was PV (so xen-pcifront) it would first but
the state-1 to 5 (Closed) which would let the Xen pciback deallocate this.

Anyhow, Xen pciback should be able to deal with this, but the patch
is going to take a bit of time to figure out.

Thank you for reporting it and helping me with the XenStore output to
narrow down the culprit!

2014-07-14 20:16:26

by Sander Eikelenboom

[permalink] [raw]
Subject: Re: [Xen-devel] [PATCH v4] PCI back fixes for 3.17.


Monday, July 14, 2014, 9:54:05 PM, you wrote:

> On Mon, Jul 14, 2014 at 09:01:29PM +0200, Sander Eikelenboom wrote:
>>
>> Monday, July 14, 2014, 8:45:47 PM, you wrote:
>>
>> > On Mon, Jul 14, 2014 at 08:24:33PM +0200, Sander Eikelenboom wrote:
>> >>
>> >> Monday, July 14, 2014, 7:45:25 PM, you wrote:
>> >>
>> >> > On Mon, Jul 14, 2014 at 07:43:04PM +0200, Sander Eikelenboom wrote:
>> >> >>
>> >> >> Monday, July 14, 2014, 7:37:53 PM, you wrote:
>> >> >>
>> >> >> >> >> Ad B)
>> >> >> >> >>
>> >> >> >> >> root@dom0:~# xl pci-list router
>> >> >> >> >> Vdev Device
>> >> >> >> >> 05.0 0000:00:1b.0
>> >> >> >> >>
>> >> >> >> >> root@dom0:~# xl pci-assignable-list
>> >> >> >> >> 0000:02:00.0
>> >> >> >> >>
>> >> >> >> >> root@dom0:~# xl pci-detach router 00:1b.0
>> >> >> >> >> dmesg shows:
>> >> >> >> >> [ 199.742668] pciback 0000:00:1b.0: restoring config space at offset 0x10 (was 0x4, writing 0xf7d30004)
>> >> >> >> >> [ 199.743527] pciback 0000:00:1b.0: restoring config space at offset 0xc (was 0x0, writing 0x10)
>> >> >> >> >> [ 199.744321] pciback 0000:00:1b.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100006)
>> >> >> >> >> [ 199.757184] xen-pciback pci-1-0: xen_pcibk_xenbus_remove freeing pdev @ 0xffff8800589fce40
>> >> >> >> >> [ 199.758139] xen-pciback pci-1-0: xen_pcibk_disconnect pdev @ 0xffff8800589fce40
>> >> >> >> >> [ 199.862595] xen: xen_unregister_device_domain_owner
>> >> >> >> >>
>> >> >> >> >> xl dmesg shows:
>> >> >> >> >> (XEN) [2014-07-14 16:28:29] memory_map:remove: dom1 gfn=f3070 mfn=f7d30 nr=4
>> >> >> >> >> (XEN) [2014-07-14 16:28:29] io.c:322: d1: unbind: m_gsi=22 g_gsi=36 dev=00:00.5 intx=0
>> >> >> >> >> (XEN) [2014-07-14 16:28:29] io.c:390: d1 unmap: m_irq=22 dev=00:00.5 intx=0
>> >> >> >> >> (XEN) [2014-07-14 16:28:29] [VT-D]iommu.c:1579: d1:PCIe: unmap 0000:00:1b.0
>> >> >> >> >> (XEN) [2014-07-14 16:28:29] [VT-D]iommu.c:1440: d0:PCIe: map 0000:00:1b.0
>> >> >> >> >>
>> >> >> >> >> root@dom0:~# xl pci-list router
>> >> >> >> >> root@dom0:~# xl pci-assignable-list
>> >> >> >> >> 0000:00:1b.0
>> >> >> >> >> 0000:02:00.0
>> >> >> >> >>
>> >> >> >> >> root@dom0:~# xl pci-assignable-remove 00:1b.0
>> >> >> >> >> dmesg shows:
>> >> >> >> >> [ 318.827415] xen: xen_unregister_device_domain_owner
>> >> >> >> >> [ 318.828771] xen: xen_unregister_device_domain_owner: ENODEV
>> >> >> >> >> [ 318.930869] pciback 0000:00:1b.0: restoring config space at offset 0x10 (was 0x4, writing 0xf7d30004)
>> >> >> >> >> [ 318.933435] pciback 0000:00:1b.0: restoring config space at offset 0xc (was 0x0, writing 0x10)
>> >> >> >> >> [ 318.935877] pciback 0000:00:1b.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100006)
>> >> >> >> >>
>> >> >> >> >> root@dom0:~# xl pci-list router
>> >> >> >> >> root@dom0:~# xl pci-assignable-list
>> >> >> >> >> 0000:02:00.0
>> >> >> >> >>
>> >> >> >> >>
>> >> >> >>
>> >> >> >> > And if you do:
>> >> >> >>
>> >> >> >> > # xl pci-detach router 02:00.0
>> >> >> >>
>> >> >>
>> >> >> > Err, I meant
>> >> >> > # xl pci-assignable-remove 02:00.0
>> >> >>
>> >> >> Ah ok .. so that is:
>> >> >> remove a device from pciback that has never been assigned to any guest ..
>> >> >>
>> >> >> will also give that a go .. although that probably won't be a problem.
>> >>
>> >> > Right. So I think it all works as expected? That is if you have
>> >> > two PCI devices assigned to a guest and want to re-use them you
>> >> > have to do the 'pci-detach' twice and then follow that with
>> >> > 'pci-assignable-remove' twice as well?
>> >>
>> >> Well except that's not what i want :-) .. that's about the same as shutting down
>> >> the guest .. and indeed that works :-)
>> >>
>> >> What i want to do is to remove just *one* device and leave the other one in the guest.
>> >>
>> >> The one that i remove .. i would like to be able to remove it from being
>> >> assignable .. (rebind it in dom0) and/or assign it to another guest.
>>
>> > Aha
>> >>
>> >> And that fails because pciback still thinks the guest owns the device ..
>> >> while libxl thinks it doesn't (outcome of xl pci-assignable-list).
>>
>> > Oh
>> >>
>> >> When doing the:
>> >>
>> >> # xl pci-assignable-list
>> >> 0000:02:00.0
>> >> # xl pci-assignable-remove 02:00.0
>> >> dmesg shows:
>> >> [ 443.292951] xen: xen_unregister_device_domain_owner
>> >> [ 443.294308] xen: xen_unregister_device_domain_owner: ENODEV
>> >> [ 443.398827] pciback 0000:02:00.0: restoring config space at offset 0x3c (was 0x100, writing 0x104)
>> >> [ 443.400797] pciback 0000:02:00.0: restoring config space at offset 0x10 (was 0x4, writing 0xf7c00004)
>> >> [ 443.401713] pciback 0000:02:00.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100006)
>> >>
>> >> which i think is what is expected ..
>>
>> > Right.
>>
>> >>
>> >>
>> >> Hmm /sys/bus/pci/drivers/pciback doesn't shed much light, because in the state
>> >> just after a xl pci-detach, the device is still registred by pciback (from dom0's
>> >> view) which is correct, what is wrong is that pciback still seems to think that
>> >> it is in turn still owned by the guest. That doesn't get reflected in the info
>> >> available in the /sys dir.
>>
>> > Oh
>> >>
>> >> Ok over to xen-store ...
>>
>> > I think I had a patch for that in libxl. It was the order of operations it was
>> > doing - it did the 'unbind' first and then it did the XenBus operations.
>>
>> >>
>> >> first a diff between the booted guest with two devices passed through and the
>> >> state after the "xl pci-detach router 00:1b.0"
>> >>
>> >> root@creanuc:~# diff -U10 xs-before-detach-first.txt xs-after-detach-first.txt
>> >> --- xs-before-detach-first.txt 2014-07-14 19:56:23.233576000 +0200
>> >> +++ xs-after-detach-first.txt 2014-07-14 20:00:00.053576000 +0200
>> >> @@ -185,35 +185,29 @@
>> >> feature-rx-flip = "0"
>> >> feature-split-event-channels = "1"
>> >> multi-queue-max-queues = "4"
>> >> hotplug-status = "connected"
>> >> pci = ""
>> >> 1 = ""
>> >> 0 = ""
>> >> frontend = "/local/domain/1/device/pci/0"
>> >> frontend-id = "1"
>> >> online = "1"
>> >> - state = "3"
>> >> + state = "7"
>> >> domain = "router"
>> >> key-0 = "0000:02:00.0"
>> >> dev-0 = "0000:02:00.0"
>> >> vdevfn-0 = "28"
>> >> opts-0 = "msitranslate=0,power_mgmt=0,permissive=0"
>> >> state-0 = "3"
>> >> - key-1 = "0000:00:1b.0"
>> >> - dev-1 = "0000:00:1b.0"
>> >> - vdevfn-1 = "30"
>> >> - opts-1 = "msitranslate=0,power_mgmt=0,permissive=0"
>> >> - state-1 = "3"
>> >> - num_devs = "2"
>> >> + num_devs = "1"
>> >> vdev-0 = "0000:00:00.00"
>> >> - vdev-1 = "0000:00:01.00"
>> >> root-0 = "0000:00"
>> >> root_num = "1"
>> >> 1 = ""
>> >> vm = "/vm/a102d97e-8388-414e-97e5-cc394656c47d"
>> >> name = "router"
>> >> cpu = ""
>> >> 0 = ""
>> >> availability = "online"
>> >> 1 = ""
>> >> availability = "online"
>> >>
>> >> It seems ok, perhaps apart from the general pci state going from 3 to 7 ?
>> >> Haven't looked up the exact states, but since only one domain is removes i would
>> >> still expect it to be 3 ?
>>
>> > 7 is Reconfiguring which is what it should move to (to update the XenBus).
>>
>> Ok, but that should than be acknowledged somewhere by someone i suppose .. and
>> not stay in that state ?
>>
>> >>
>> >>
>> >> Now a diff before and after the "xl pci-assignable-remove 00:1b.0"
>> >> diff xs-after-detach-first.txt xs-after-pci-assignable-remove-first.txt is
>> >> empty, as expected since this is a pciback only thing now involving any guest.
>> >>
>> >>
>> >> And now the last diff, between the state before and after the "xl pci-detach
>> >> 02:00.0" removing the last pci device from the guest.
>> >>
>> >> root@creanuc:~# diff -U5 xs-after-pci-assignable-remove-first.txt xs-after-pci-detach-second.txt
>> >> --- xs-after-pci-assignable-remove-first.txt 2014-07-14 20:00:45.161576000 +0200
>> >> +++ xs-after-pci-detach-second.txt 2014-07-14 20:01:20.177576000 +0200
>> >> @@ -184,27 +184,10 @@
>> >> feature-rx-copy = "1"
>> >> feature-rx-flip = "0"
>> >> feature-split-event-channels = "1"
>> >> multi-queue-max-queues = "4"
>> >> hotplug-status = "connected"
>> >> - pci = ""
>> >> - 1 = ""
>> >> - 0 = ""
>> >> - frontend = "/local/domain/1/device/pci/0"
>> >> - frontend-id = "1"
>> >> - online = "1"
>> >> - state = "7"
>> >> - domain = "router"
>> >> - key-0 = "0000:02:00.0"
>> >> - dev-0 = "0000:02:00.0"
>> >> - vdevfn-0 = "28"
>> >> - opts-0 = "msitranslate=0,power_mgmt=0,permissive=0"
>> >> - state-0 = "3"
>> >> - num_devs = "1"
>> >> - vdev-0 = "0000:00:00.00"
>> >> - root-0 = "0000:00"
>> >> - root_num = "1"
>> >> 1 = ""
>> >> vm = "/vm/a102d97e-8388-414e-97e5-cc394656c47d"
>> >> name = "router"
>> >> cpu = ""
>> >> 0 = ""
>> >> @@ -249,15 +232,10 @@
>> >> event-channel-rx = "25"
>> >> request-rx-copy = "1"
>> >> feature-rx-notify = "1"
>> >> feature-sg = "1"
>> >> feature-gso-tcpv4 = "1"
>> >> - pci = ""
>> >> - 0 = ""
>> >> - backend = "/local/domain/0/backend/pci/1/0"
>> >> - backend-id = "0"
>> >> - state = "1"
>> >> control = ""
>> >> shutdown = ""
>> >> platform-feature-multiprocessor-suspend = "1"
>> >> platform-feature-xs_reset_watches = "1"
>> >> hvmloader = ""
>> >>
>> >> That seems ok.
>> >>
>> >> So apart from that perhaps incorrect general pci state change, the info in
>> >> xenstore seems to be OK.
>> >>
>> >> So it does seem to be a lack of pciback receiving a confirm from libxl / qemu that the
>> >> device is indeed removed from the guest for the final deregistring of the
>> >> ownership.
>> >>
>> >> Because after a pci-detach the pci device is:
>> >> - removed from xen-store as attached to the guest
>> >> - the iommu has assigned/mapped it back to dom0
>> >> - xl pci-assignable-list shows it as assignable again
>> >>
>> >> So only pciback is the only one left out of the party :-)
>> >> Couldn't it be somehow tied to the iommu events, since it should be in sync with that i suppose ?
>>
>>
>> > Let me find out what is up with this. Once the device
>> > is de-assigned from a guest (even if forcefully) it should
>> > be possible to assign it to another.
>>
>> Yes .. at least for the rebinding to dom0 case, because the
>> pci-assignable-remove seems to do it forcefully now. But i haven't tried what it
>> does when reassigning to another guest (if it forcefully swaps ownership then,
>> since the pci-assignable-remove isn't involved then).
>>
>> Will give the xen patch a try.

> I found out what the problem is. It has been in the code for eons and
> I think we never tested for it because the older toolstack (xend) would
> do things differently.

That .. and probably not many people testing ..
Until now i used the automatic assign and deassign on guest creation
and shutdown which doesn't exhibit this problem.

> The issue here is:

> 1) xl pci-detach <dom-id> <BDF>

> 'xl' removes the whole device from the XenStore, so from:
> domain = "USB"

> vdevfn-0 = "28"
> opts-0 = "msitranslate=0,power_mgmt=0,permissive=0"
> state-0 = "3"
> vdev-0 = "0000:00:00.00"
> root-0 = "0000:00"
> key-0 = "0000:07:00.0"
> dev-0 = "0000:07:00.0"

> key-1 = "0000:04:00.0"
> dev-1 = "0000:04:00.0"
> vdevfn-1 = "30"
> opts-1 = "msitranslate=0,power_mgmt=0,permissive=0"
> state-1 = "3"
> num_devs = "2"
> vdev-1 = "0000:00:01.00"
> root_num = "1"

> it just yanks out the '*-1' values, so we have:

> domain = "USB"

> vdevfn-0 = "28"
> opts-0 = "msitranslate=0,power_mgmt=0,permissive=0"
> state-0 = "3"
> vdev-0 = "0000:00:00.00"
> root-0 = "0000:00"
> key-0 = "0000:07:00.0"
> dev-0 = "0000:07:00.0"

> num_devs = "1"

> While if the frontend was PV (so xen-pcifront) it would first but
> the state-1 to 5 (Closed) which would let the Xen pciback deallocate this.

> Anyhow, Xen pciback should be able to deal with this, but the patch
> is going to take a bit of time to figure out.

> Thank you for reporting it and helping me with the XenStore output to
> narrow down the culprit!

The thing i don't understand then is:
How is it different for the last (or all ) device(s) ?
(since the last device (or removing all devices) before doing the
assignable-remove doesn't trigger the warning)

Or does the complete removal of the pci-part of the tree in xenstore trigger something ?

You know if you got a patch cooked up, i'm more than happy to test !

--
Sander

2014-07-14 20:18:31

by Konrad Rzeszutek Wilk

[permalink] [raw]
Subject: Re: [Xen-devel] [PATCH v4] PCI back fixes for 3.17.

On Mon, Jul 14, 2014 at 10:16:18PM +0200, Sander Eikelenboom wrote:
>
> Monday, July 14, 2014, 9:54:05 PM, you wrote:
>
> > On Mon, Jul 14, 2014 at 09:01:29PM +0200, Sander Eikelenboom wrote:
> >>
> >> Monday, July 14, 2014, 8:45:47 PM, you wrote:
> >>
> >> > On Mon, Jul 14, 2014 at 08:24:33PM +0200, Sander Eikelenboom wrote:
> >> >>
> >> >> Monday, July 14, 2014, 7:45:25 PM, you wrote:
> >> >>
> >> >> > On Mon, Jul 14, 2014 at 07:43:04PM +0200, Sander Eikelenboom wrote:
> >> >> >>
> >> >> >> Monday, July 14, 2014, 7:37:53 PM, you wrote:
> >> >> >>
> >> >> >> >> >> Ad B)
> >> >> >> >> >>
> >> >> >> >> >> root@dom0:~# xl pci-list router
> >> >> >> >> >> Vdev Device
> >> >> >> >> >> 05.0 0000:00:1b.0
> >> >> >> >> >>
> >> >> >> >> >> root@dom0:~# xl pci-assignable-list
> >> >> >> >> >> 0000:02:00.0
> >> >> >> >> >>
> >> >> >> >> >> root@dom0:~# xl pci-detach router 00:1b.0
> >> >> >> >> >> dmesg shows:
> >> >> >> >> >> [ 199.742668] pciback 0000:00:1b.0: restoring config space at offset 0x10 (was 0x4, writing 0xf7d30004)
> >> >> >> >> >> [ 199.743527] pciback 0000:00:1b.0: restoring config space at offset 0xc (was 0x0, writing 0x10)
> >> >> >> >> >> [ 199.744321] pciback 0000:00:1b.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100006)
> >> >> >> >> >> [ 199.757184] xen-pciback pci-1-0: xen_pcibk_xenbus_remove freeing pdev @ 0xffff8800589fce40
> >> >> >> >> >> [ 199.758139] xen-pciback pci-1-0: xen_pcibk_disconnect pdev @ 0xffff8800589fce40
> >> >> >> >> >> [ 199.862595] xen: xen_unregister_device_domain_owner
> >> >> >> >> >>
> >> >> >> >> >> xl dmesg shows:
> >> >> >> >> >> (XEN) [2014-07-14 16:28:29] memory_map:remove: dom1 gfn=f3070 mfn=f7d30 nr=4
> >> >> >> >> >> (XEN) [2014-07-14 16:28:29] io.c:322: d1: unbind: m_gsi=22 g_gsi=36 dev=00:00.5 intx=0
> >> >> >> >> >> (XEN) [2014-07-14 16:28:29] io.c:390: d1 unmap: m_irq=22 dev=00:00.5 intx=0
> >> >> >> >> >> (XEN) [2014-07-14 16:28:29] [VT-D]iommu.c:1579: d1:PCIe: unmap 0000:00:1b.0
> >> >> >> >> >> (XEN) [2014-07-14 16:28:29] [VT-D]iommu.c:1440: d0:PCIe: map 0000:00:1b.0
> >> >> >> >> >>
> >> >> >> >> >> root@dom0:~# xl pci-list router
> >> >> >> >> >> root@dom0:~# xl pci-assignable-list
> >> >> >> >> >> 0000:00:1b.0
> >> >> >> >> >> 0000:02:00.0
> >> >> >> >> >>
> >> >> >> >> >> root@dom0:~# xl pci-assignable-remove 00:1b.0
> >> >> >> >> >> dmesg shows:
> >> >> >> >> >> [ 318.827415] xen: xen_unregister_device_domain_owner
> >> >> >> >> >> [ 318.828771] xen: xen_unregister_device_domain_owner: ENODEV
> >> >> >> >> >> [ 318.930869] pciback 0000:00:1b.0: restoring config space at offset 0x10 (was 0x4, writing 0xf7d30004)
> >> >> >> >> >> [ 318.933435] pciback 0000:00:1b.0: restoring config space at offset 0xc (was 0x0, writing 0x10)
> >> >> >> >> >> [ 318.935877] pciback 0000:00:1b.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100006)
> >> >> >> >> >>
> >> >> >> >> >> root@dom0:~# xl pci-list router
> >> >> >> >> >> root@dom0:~# xl pci-assignable-list
> >> >> >> >> >> 0000:02:00.0
> >> >> >> >> >>
> >> >> >> >> >>
> >> >> >> >>
> >> >> >> >> > And if you do:
> >> >> >> >>
> >> >> >> >> > # xl pci-detach router 02:00.0
> >> >> >> >>
> >> >> >>
> >> >> >> > Err, I meant
> >> >> >> > # xl pci-assignable-remove 02:00.0
> >> >> >>
> >> >> >> Ah ok .. so that is:
> >> >> >> remove a device from pciback that has never been assigned to any guest ..
> >> >> >>
> >> >> >> will also give that a go .. although that probably won't be a problem.
> >> >>
> >> >> > Right. So I think it all works as expected? That is if you have
> >> >> > two PCI devices assigned to a guest and want to re-use them you
> >> >> > have to do the 'pci-detach' twice and then follow that with
> >> >> > 'pci-assignable-remove' twice as well?
> >> >>
> >> >> Well except that's not what i want :-) .. that's about the same as shutting down
> >> >> the guest .. and indeed that works :-)
> >> >>
> >> >> What i want to do is to remove just *one* device and leave the other one in the guest.
> >> >>
> >> >> The one that i remove .. i would like to be able to remove it from being
> >> >> assignable .. (rebind it in dom0) and/or assign it to another guest.
> >>
> >> > Aha
> >> >>
> >> >> And that fails because pciback still thinks the guest owns the device ..
> >> >> while libxl thinks it doesn't (outcome of xl pci-assignable-list).
> >>
> >> > Oh
> >> >>
> >> >> When doing the:
> >> >>
> >> >> # xl pci-assignable-list
> >> >> 0000:02:00.0
> >> >> # xl pci-assignable-remove 02:00.0
> >> >> dmesg shows:
> >> >> [ 443.292951] xen: xen_unregister_device_domain_owner
> >> >> [ 443.294308] xen: xen_unregister_device_domain_owner: ENODEV
> >> >> [ 443.398827] pciback 0000:02:00.0: restoring config space at offset 0x3c (was 0x100, writing 0x104)
> >> >> [ 443.400797] pciback 0000:02:00.0: restoring config space at offset 0x10 (was 0x4, writing 0xf7c00004)
> >> >> [ 443.401713] pciback 0000:02:00.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100006)
> >> >>
> >> >> which i think is what is expected ..
> >>
> >> > Right.
> >>
> >> >>
> >> >>
> >> >> Hmm /sys/bus/pci/drivers/pciback doesn't shed much light, because in the state
> >> >> just after a xl pci-detach, the device is still registred by pciback (from dom0's
> >> >> view) which is correct, what is wrong is that pciback still seems to think that
> >> >> it is in turn still owned by the guest. That doesn't get reflected in the info
> >> >> available in the /sys dir.
> >>
> >> > Oh
> >> >>
> >> >> Ok over to xen-store ...
> >>
> >> > I think I had a patch for that in libxl. It was the order of operations it was
> >> > doing - it did the 'unbind' first and then it did the XenBus operations.
> >>
> >> >>
> >> >> first a diff between the booted guest with two devices passed through and the
> >> >> state after the "xl pci-detach router 00:1b.0"
> >> >>
> >> >> root@creanuc:~# diff -U10 xs-before-detach-first.txt xs-after-detach-first.txt
> >> >> --- xs-before-detach-first.txt 2014-07-14 19:56:23.233576000 +0200
> >> >> +++ xs-after-detach-first.txt 2014-07-14 20:00:00.053576000 +0200
> >> >> @@ -185,35 +185,29 @@
> >> >> feature-rx-flip = "0"
> >> >> feature-split-event-channels = "1"
> >> >> multi-queue-max-queues = "4"
> >> >> hotplug-status = "connected"
> >> >> pci = ""
> >> >> 1 = ""
> >> >> 0 = ""
> >> >> frontend = "/local/domain/1/device/pci/0"
> >> >> frontend-id = "1"
> >> >> online = "1"
> >> >> - state = "3"
> >> >> + state = "7"
> >> >> domain = "router"
> >> >> key-0 = "0000:02:00.0"
> >> >> dev-0 = "0000:02:00.0"
> >> >> vdevfn-0 = "28"
> >> >> opts-0 = "msitranslate=0,power_mgmt=0,permissive=0"
> >> >> state-0 = "3"
> >> >> - key-1 = "0000:00:1b.0"
> >> >> - dev-1 = "0000:00:1b.0"
> >> >> - vdevfn-1 = "30"
> >> >> - opts-1 = "msitranslate=0,power_mgmt=0,permissive=0"
> >> >> - state-1 = "3"
> >> >> - num_devs = "2"
> >> >> + num_devs = "1"
> >> >> vdev-0 = "0000:00:00.00"
> >> >> - vdev-1 = "0000:00:01.00"
> >> >> root-0 = "0000:00"
> >> >> root_num = "1"
> >> >> 1 = ""
> >> >> vm = "/vm/a102d97e-8388-414e-97e5-cc394656c47d"
> >> >> name = "router"
> >> >> cpu = ""
> >> >> 0 = ""
> >> >> availability = "online"
> >> >> 1 = ""
> >> >> availability = "online"
> >> >>
> >> >> It seems ok, perhaps apart from the general pci state going from 3 to 7 ?
> >> >> Haven't looked up the exact states, but since only one domain is removes i would
> >> >> still expect it to be 3 ?
> >>
> >> > 7 is Reconfiguring which is what it should move to (to update the XenBus).
> >>
> >> Ok, but that should than be acknowledged somewhere by someone i suppose .. and
> >> not stay in that state ?
> >>
> >> >>
> >> >>
> >> >> Now a diff before and after the "xl pci-assignable-remove 00:1b.0"
> >> >> diff xs-after-detach-first.txt xs-after-pci-assignable-remove-first.txt is
> >> >> empty, as expected since this is a pciback only thing now involving any guest.
> >> >>
> >> >>
> >> >> And now the last diff, between the state before and after the "xl pci-detach
> >> >> 02:00.0" removing the last pci device from the guest.
> >> >>
> >> >> root@creanuc:~# diff -U5 xs-after-pci-assignable-remove-first.txt xs-after-pci-detach-second.txt
> >> >> --- xs-after-pci-assignable-remove-first.txt 2014-07-14 20:00:45.161576000 +0200
> >> >> +++ xs-after-pci-detach-second.txt 2014-07-14 20:01:20.177576000 +0200
> >> >> @@ -184,27 +184,10 @@
> >> >> feature-rx-copy = "1"
> >> >> feature-rx-flip = "0"
> >> >> feature-split-event-channels = "1"
> >> >> multi-queue-max-queues = "4"
> >> >> hotplug-status = "connected"
> >> >> - pci = ""
> >> >> - 1 = ""
> >> >> - 0 = ""
> >> >> - frontend = "/local/domain/1/device/pci/0"
> >> >> - frontend-id = "1"
> >> >> - online = "1"
> >> >> - state = "7"
> >> >> - domain = "router"
> >> >> - key-0 = "0000:02:00.0"
> >> >> - dev-0 = "0000:02:00.0"
> >> >> - vdevfn-0 = "28"
> >> >> - opts-0 = "msitranslate=0,power_mgmt=0,permissive=0"
> >> >> - state-0 = "3"
> >> >> - num_devs = "1"
> >> >> - vdev-0 = "0000:00:00.00"
> >> >> - root-0 = "0000:00"
> >> >> - root_num = "1"
> >> >> 1 = ""
> >> >> vm = "/vm/a102d97e-8388-414e-97e5-cc394656c47d"
> >> >> name = "router"
> >> >> cpu = ""
> >> >> 0 = ""
> >> >> @@ -249,15 +232,10 @@
> >> >> event-channel-rx = "25"
> >> >> request-rx-copy = "1"
> >> >> feature-rx-notify = "1"
> >> >> feature-sg = "1"
> >> >> feature-gso-tcpv4 = "1"
> >> >> - pci = ""
> >> >> - 0 = ""
> >> >> - backend = "/local/domain/0/backend/pci/1/0"
> >> >> - backend-id = "0"
> >> >> - state = "1"
> >> >> control = ""
> >> >> shutdown = ""
> >> >> platform-feature-multiprocessor-suspend = "1"
> >> >> platform-feature-xs_reset_watches = "1"
> >> >> hvmloader = ""
> >> >>
> >> >> That seems ok.
> >> >>
> >> >> So apart from that perhaps incorrect general pci state change, the info in
> >> >> xenstore seems to be OK.
> >> >>
> >> >> So it does seem to be a lack of pciback receiving a confirm from libxl / qemu that the
> >> >> device is indeed removed from the guest for the final deregistring of the
> >> >> ownership.
> >> >>
> >> >> Because after a pci-detach the pci device is:
> >> >> - removed from xen-store as attached to the guest
> >> >> - the iommu has assigned/mapped it back to dom0
> >> >> - xl pci-assignable-list shows it as assignable again
> >> >>
> >> >> So only pciback is the only one left out of the party :-)
> >> >> Couldn't it be somehow tied to the iommu events, since it should be in sync with that i suppose ?
> >>
> >>
> >> > Let me find out what is up with this. Once the device
> >> > is de-assigned from a guest (even if forcefully) it should
> >> > be possible to assign it to another.
> >>
> >> Yes .. at least for the rebinding to dom0 case, because the
> >> pci-assignable-remove seems to do it forcefully now. But i haven't tried what it
> >> does when reassigning to another guest (if it forcefully swaps ownership then,
> >> since the pci-assignable-remove isn't involved then).
> >>
> >> Will give the xen patch a try.
>
> > I found out what the problem is. It has been in the code for eons and
> > I think we never tested for it because the older toolstack (xend) would
> > do things differently.
>
> That .. and probably not many people testing ..
> Until now i used the automatic assign and deassign on guest creation
> and shutdown which doesn't exhibit this problem.
>
> > The issue here is:
>
> > 1) xl pci-detach <dom-id> <BDF>
>
> > 'xl' removes the whole device from the XenStore, so from:
> > domain = "USB"
>
> > vdevfn-0 = "28"
> > opts-0 = "msitranslate=0,power_mgmt=0,permissive=0"
> > state-0 = "3"
> > vdev-0 = "0000:00:00.00"
> > root-0 = "0000:00"
> > key-0 = "0000:07:00.0"
> > dev-0 = "0000:07:00.0"
>
> > key-1 = "0000:04:00.0"
> > dev-1 = "0000:04:00.0"
> > vdevfn-1 = "30"
> > opts-1 = "msitranslate=0,power_mgmt=0,permissive=0"
> > state-1 = "3"
> > num_devs = "2"
> > vdev-1 = "0000:00:01.00"
> > root_num = "1"
>
> > it just yanks out the '*-1' values, so we have:
>
> > domain = "USB"
>
> > vdevfn-0 = "28"
> > opts-0 = "msitranslate=0,power_mgmt=0,permissive=0"
> > state-0 = "3"
> > vdev-0 = "0000:00:00.00"
> > root-0 = "0000:00"
> > key-0 = "0000:07:00.0"
> > dev-0 = "0000:07:00.0"
>
> > num_devs = "1"
>
> > While if the frontend was PV (so xen-pcifront) it would first but
> > the state-1 to 5 (Closed) which would let the Xen pciback deallocate this.
>
> > Anyhow, Xen pciback should be able to deal with this, but the patch
> > is going to take a bit of time to figure out.
>
> > Thank you for reporting it and helping me with the XenStore output to
> > narrow down the culprit!
>
> The thing i don't understand then is:
> How is it different for the last (or all ) device(s) ?
> (since the last device (or removing all devices) before doing the
> assignable-remove doesn't trigger the warning)
>
> Or does the complete removal of the pci-part of the tree in xenstore trigger something ?

Yup. It removes the whole 'pci' directory in XenStore which triggers
in Xen pciback the code to deal with that.

>
> You know if you got a patch cooked up, i'm more than happy to test !

Hehe!
>
> --
> Sander
>

2014-07-14 20:21:59

by Sander Eikelenboom

[permalink] [raw]
Subject: Re: [Xen-devel] [PATCH v4] PCI back fixes for 3.17.


Monday, July 14, 2014, 10:18:17 PM, you wrote:

> On Mon, Jul 14, 2014 at 10:16:18PM +0200, Sander Eikelenboom wrote:
>>
>> Monday, July 14, 2014, 9:54:05 PM, you wrote:
>>
>> > On Mon, Jul 14, 2014 at 09:01:29PM +0200, Sander Eikelenboom wrote:
>> >>
>> >> Monday, July 14, 2014, 8:45:47 PM, you wrote:
>> >>
>> >> > On Mon, Jul 14, 2014 at 08:24:33PM +0200, Sander Eikelenboom wrote:
>> >> >>
>> >> >> Monday, July 14, 2014, 7:45:25 PM, you wrote:
>> >> >>
>> >> >> > On Mon, Jul 14, 2014 at 07:43:04PM +0200, Sander Eikelenboom wrote:
>> >> >> >>
>> >> >> >> Monday, July 14, 2014, 7:37:53 PM, you wrote:
>> >> >> >>
>> >> >> >> >> >> Ad B)
>> >> >> >> >> >>
>> >> >> >> >> >> root@dom0:~# xl pci-list router
>> >> >> >> >> >> Vdev Device
>> >> >> >> >> >> 05.0 0000:00:1b.0
>> >> >> >> >> >>
>> >> >> >> >> >> root@dom0:~# xl pci-assignable-list
>> >> >> >> >> >> 0000:02:00.0
>> >> >> >> >> >>
>> >> >> >> >> >> root@dom0:~# xl pci-detach router 00:1b.0
>> >> >> >> >> >> dmesg shows:
>> >> >> >> >> >> [ 199.742668] pciback 0000:00:1b.0: restoring config space at offset 0x10 (was 0x4, writing 0xf7d30004)
>> >> >> >> >> >> [ 199.743527] pciback 0000:00:1b.0: restoring config space at offset 0xc (was 0x0, writing 0x10)
>> >> >> >> >> >> [ 199.744321] pciback 0000:00:1b.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100006)
>> >> >> >> >> >> [ 199.757184] xen-pciback pci-1-0: xen_pcibk_xenbus_remove freeing pdev @ 0xffff8800589fce40
>> >> >> >> >> >> [ 199.758139] xen-pciback pci-1-0: xen_pcibk_disconnect pdev @ 0xffff8800589fce40
>> >> >> >> >> >> [ 199.862595] xen: xen_unregister_device_domain_owner
>> >> >> >> >> >>
>> >> >> >> >> >> xl dmesg shows:
>> >> >> >> >> >> (XEN) [2014-07-14 16:28:29] memory_map:remove: dom1 gfn=f3070 mfn=f7d30 nr=4
>> >> >> >> >> >> (XEN) [2014-07-14 16:28:29] io.c:322: d1: unbind: m_gsi=22 g_gsi=36 dev=00:00.5 intx=0
>> >> >> >> >> >> (XEN) [2014-07-14 16:28:29] io.c:390: d1 unmap: m_irq=22 dev=00:00.5 intx=0
>> >> >> >> >> >> (XEN) [2014-07-14 16:28:29] [VT-D]iommu.c:1579: d1:PCIe: unmap 0000:00:1b.0
>> >> >> >> >> >> (XEN) [2014-07-14 16:28:29] [VT-D]iommu.c:1440: d0:PCIe: map 0000:00:1b.0
>> >> >> >> >> >>
>> >> >> >> >> >> root@dom0:~# xl pci-list router
>> >> >> >> >> >> root@dom0:~# xl pci-assignable-list
>> >> >> >> >> >> 0000:00:1b.0
>> >> >> >> >> >> 0000:02:00.0
>> >> >> >> >> >>
>> >> >> >> >> >> root@dom0:~# xl pci-assignable-remove 00:1b.0
>> >> >> >> >> >> dmesg shows:
>> >> >> >> >> >> [ 318.827415] xen: xen_unregister_device_domain_owner
>> >> >> >> >> >> [ 318.828771] xen: xen_unregister_device_domain_owner: ENODEV
>> >> >> >> >> >> [ 318.930869] pciback 0000:00:1b.0: restoring config space at offset 0x10 (was 0x4, writing 0xf7d30004)
>> >> >> >> >> >> [ 318.933435] pciback 0000:00:1b.0: restoring config space at offset 0xc (was 0x0, writing 0x10)
>> >> >> >> >> >> [ 318.935877] pciback 0000:00:1b.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100006)
>> >> >> >> >> >>
>> >> >> >> >> >> root@dom0:~# xl pci-list router
>> >> >> >> >> >> root@dom0:~# xl pci-assignable-list
>> >> >> >> >> >> 0000:02:00.0
>> >> >> >> >> >>
>> >> >> >> >> >>
>> >> >> >> >>
>> >> >> >> >> > And if you do:
>> >> >> >> >>
>> >> >> >> >> > # xl pci-detach router 02:00.0
>> >> >> >> >>
>> >> >> >>
>> >> >> >> > Err, I meant
>> >> >> >> > # xl pci-assignable-remove 02:00.0
>> >> >> >>
>> >> >> >> Ah ok .. so that is:
>> >> >> >> remove a device from pciback that has never been assigned to any guest ..
>> >> >> >>
>> >> >> >> will also give that a go .. although that probably won't be a problem.
>> >> >>
>> >> >> > Right. So I think it all works as expected? That is if you have
>> >> >> > two PCI devices assigned to a guest and want to re-use them you
>> >> >> > have to do the 'pci-detach' twice and then follow that with
>> >> >> > 'pci-assignable-remove' twice as well?
>> >> >>
>> >> >> Well except that's not what i want :-) .. that's about the same as shutting down
>> >> >> the guest .. and indeed that works :-)
>> >> >>
>> >> >> What i want to do is to remove just *one* device and leave the other one in the guest.
>> >> >>
>> >> >> The one that i remove .. i would like to be able to remove it from being
>> >> >> assignable .. (rebind it in dom0) and/or assign it to another guest.
>> >>
>> >> > Aha
>> >> >>
>> >> >> And that fails because pciback still thinks the guest owns the device ..
>> >> >> while libxl thinks it doesn't (outcome of xl pci-assignable-list).
>> >>
>> >> > Oh
>> >> >>
>> >> >> When doing the:
>> >> >>
>> >> >> # xl pci-assignable-list
>> >> >> 0000:02:00.0
>> >> >> # xl pci-assignable-remove 02:00.0
>> >> >> dmesg shows:
>> >> >> [ 443.292951] xen: xen_unregister_device_domain_owner
>> >> >> [ 443.294308] xen: xen_unregister_device_domain_owner: ENODEV
>> >> >> [ 443.398827] pciback 0000:02:00.0: restoring config space at offset 0x3c (was 0x100, writing 0x104)
>> >> >> [ 443.400797] pciback 0000:02:00.0: restoring config space at offset 0x10 (was 0x4, writing 0xf7c00004)
>> >> >> [ 443.401713] pciback 0000:02:00.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100006)
>> >> >>
>> >> >> which i think is what is expected ..
>> >>
>> >> > Right.
>> >>
>> >> >>
>> >> >>
>> >> >> Hmm /sys/bus/pci/drivers/pciback doesn't shed much light, because in the state
>> >> >> just after a xl pci-detach, the device is still registred by pciback (from dom0's
>> >> >> view) which is correct, what is wrong is that pciback still seems to think that
>> >> >> it is in turn still owned by the guest. That doesn't get reflected in the info
>> >> >> available in the /sys dir.
>> >>
>> >> > Oh
>> >> >>
>> >> >> Ok over to xen-store ...
>> >>
>> >> > I think I had a patch for that in libxl. It was the order of operations it was
>> >> > doing - it did the 'unbind' first and then it did the XenBus operations.
>> >>
>> >> >>
>> >> >> first a diff between the booted guest with two devices passed through and the
>> >> >> state after the "xl pci-detach router 00:1b.0"
>> >> >>
>> >> >> root@creanuc:~# diff -U10 xs-before-detach-first.txt xs-after-detach-first.txt
>> >> >> --- xs-before-detach-first.txt 2014-07-14 19:56:23.233576000 +0200
>> >> >> +++ xs-after-detach-first.txt 2014-07-14 20:00:00.053576000 +0200
>> >> >> @@ -185,35 +185,29 @@
>> >> >> feature-rx-flip = "0"
>> >> >> feature-split-event-channels = "1"
>> >> >> multi-queue-max-queues = "4"
>> >> >> hotplug-status = "connected"
>> >> >> pci = ""
>> >> >> 1 = ""
>> >> >> 0 = ""
>> >> >> frontend = "/local/domain/1/device/pci/0"
>> >> >> frontend-id = "1"
>> >> >> online = "1"
>> >> >> - state = "3"
>> >> >> + state = "7"
>> >> >> domain = "router"
>> >> >> key-0 = "0000:02:00.0"
>> >> >> dev-0 = "0000:02:00.0"
>> >> >> vdevfn-0 = "28"
>> >> >> opts-0 = "msitranslate=0,power_mgmt=0,permissive=0"
>> >> >> state-0 = "3"
>> >> >> - key-1 = "0000:00:1b.0"
>> >> >> - dev-1 = "0000:00:1b.0"
>> >> >> - vdevfn-1 = "30"
>> >> >> - opts-1 = "msitranslate=0,power_mgmt=0,permissive=0"
>> >> >> - state-1 = "3"
>> >> >> - num_devs = "2"
>> >> >> + num_devs = "1"
>> >> >> vdev-0 = "0000:00:00.00"
>> >> >> - vdev-1 = "0000:00:01.00"
>> >> >> root-0 = "0000:00"
>> >> >> root_num = "1"
>> >> >> 1 = ""
>> >> >> vm = "/vm/a102d97e-8388-414e-97e5-cc394656c47d"
>> >> >> name = "router"
>> >> >> cpu = ""
>> >> >> 0 = ""
>> >> >> availability = "online"
>> >> >> 1 = ""
>> >> >> availability = "online"
>> >> >>
>> >> >> It seems ok, perhaps apart from the general pci state going from 3 to 7 ?
>> >> >> Haven't looked up the exact states, but since only one domain is removes i would
>> >> >> still expect it to be 3 ?
>> >>
>> >> > 7 is Reconfiguring which is what it should move to (to update the XenBus).
>> >>
>> >> Ok, but that should than be acknowledged somewhere by someone i suppose .. and
>> >> not stay in that state ?
>> >>
>> >> >>
>> >> >>
>> >> >> Now a diff before and after the "xl pci-assignable-remove 00:1b.0"
>> >> >> diff xs-after-detach-first.txt xs-after-pci-assignable-remove-first.txt is
>> >> >> empty, as expected since this is a pciback only thing now involving any guest.
>> >> >>
>> >> >>
>> >> >> And now the last diff, between the state before and after the "xl pci-detach
>> >> >> 02:00.0" removing the last pci device from the guest.
>> >> >>
>> >> >> root@creanuc:~# diff -U5 xs-after-pci-assignable-remove-first.txt xs-after-pci-detach-second.txt
>> >> >> --- xs-after-pci-assignable-remove-first.txt 2014-07-14 20:00:45.161576000 +0200
>> >> >> +++ xs-after-pci-detach-second.txt 2014-07-14 20:01:20.177576000 +0200
>> >> >> @@ -184,27 +184,10 @@
>> >> >> feature-rx-copy = "1"
>> >> >> feature-rx-flip = "0"
>> >> >> feature-split-event-channels = "1"
>> >> >> multi-queue-max-queues = "4"
>> >> >> hotplug-status = "connected"
>> >> >> - pci = ""
>> >> >> - 1 = ""
>> >> >> - 0 = ""
>> >> >> - frontend = "/local/domain/1/device/pci/0"
>> >> >> - frontend-id = "1"
>> >> >> - online = "1"
>> >> >> - state = "7"
>> >> >> - domain = "router"
>> >> >> - key-0 = "0000:02:00.0"
>> >> >> - dev-0 = "0000:02:00.0"
>> >> >> - vdevfn-0 = "28"
>> >> >> - opts-0 = "msitranslate=0,power_mgmt=0,permissive=0"
>> >> >> - state-0 = "3"
>> >> >> - num_devs = "1"
>> >> >> - vdev-0 = "0000:00:00.00"
>> >> >> - root-0 = "0000:00"
>> >> >> - root_num = "1"
>> >> >> 1 = ""
>> >> >> vm = "/vm/a102d97e-8388-414e-97e5-cc394656c47d"
>> >> >> name = "router"
>> >> >> cpu = ""
>> >> >> 0 = ""
>> >> >> @@ -249,15 +232,10 @@
>> >> >> event-channel-rx = "25"
>> >> >> request-rx-copy = "1"
>> >> >> feature-rx-notify = "1"
>> >> >> feature-sg = "1"
>> >> >> feature-gso-tcpv4 = "1"
>> >> >> - pci = ""
>> >> >> - 0 = ""
>> >> >> - backend = "/local/domain/0/backend/pci/1/0"
>> >> >> - backend-id = "0"
>> >> >> - state = "1"
>> >> >> control = ""
>> >> >> shutdown = ""
>> >> >> platform-feature-multiprocessor-suspend = "1"
>> >> >> platform-feature-xs_reset_watches = "1"
>> >> >> hvmloader = ""
>> >> >>
>> >> >> That seems ok.
>> >> >>
>> >> >> So apart from that perhaps incorrect general pci state change, the info in
>> >> >> xenstore seems to be OK.
>> >> >>
>> >> >> So it does seem to be a lack of pciback receiving a confirm from libxl / qemu that the
>> >> >> device is indeed removed from the guest for the final deregistring of the
>> >> >> ownership.
>> >> >>
>> >> >> Because after a pci-detach the pci device is:
>> >> >> - removed from xen-store as attached to the guest
>> >> >> - the iommu has assigned/mapped it back to dom0
>> >> >> - xl pci-assignable-list shows it as assignable again
>> >> >>
>> >> >> So only pciback is the only one left out of the party :-)
>> >> >> Couldn't it be somehow tied to the iommu events, since it should be in sync with that i suppose ?
>> >>
>> >>
>> >> > Let me find out what is up with this. Once the device
>> >> > is de-assigned from a guest (even if forcefully) it should
>> >> > be possible to assign it to another.
>> >>
>> >> Yes .. at least for the rebinding to dom0 case, because the
>> >> pci-assignable-remove seems to do it forcefully now. But i haven't tried what it
>> >> does when reassigning to another guest (if it forcefully swaps ownership then,
>> >> since the pci-assignable-remove isn't involved then).
>> >>
>> >> Will give the xen patch a try.
>>
>> > I found out what the problem is. It has been in the code for eons and
>> > I think we never tested for it because the older toolstack (xend) would
>> > do things differently.
>>
>> That .. and probably not many people testing ..
>> Until now i used the automatic assign and deassign on guest creation
>> and shutdown which doesn't exhibit this problem.
>>
>> > The issue here is:
>>
>> > 1) xl pci-detach <dom-id> <BDF>
>>
>> > 'xl' removes the whole device from the XenStore, so from:
>> > domain = "USB"
>>
>> > vdevfn-0 = "28"
>> > opts-0 = "msitranslate=0,power_mgmt=0,permissive=0"
>> > state-0 = "3"
>> > vdev-0 = "0000:00:00.00"
>> > root-0 = "0000:00"
>> > key-0 = "0000:07:00.0"
>> > dev-0 = "0000:07:00.0"
>>
>> > key-1 = "0000:04:00.0"
>> > dev-1 = "0000:04:00.0"
>> > vdevfn-1 = "30"
>> > opts-1 = "msitranslate=0,power_mgmt=0,permissive=0"
>> > state-1 = "3"
>> > num_devs = "2"
>> > vdev-1 = "0000:00:01.00"
>> > root_num = "1"
>>
>> > it just yanks out the '*-1' values, so we have:
>>
>> > domain = "USB"
>>
>> > vdevfn-0 = "28"
>> > opts-0 = "msitranslate=0,power_mgmt=0,permissive=0"
>> > state-0 = "3"
>> > vdev-0 = "0000:00:00.00"
>> > root-0 = "0000:00"
>> > key-0 = "0000:07:00.0"
>> > dev-0 = "0000:07:00.0"
>>
>> > num_devs = "1"
>>
>> > While if the frontend was PV (so xen-pcifront) it would first but
>> > the state-1 to 5 (Closed) which would let the Xen pciback deallocate this.
>>
>> > Anyhow, Xen pciback should be able to deal with this, but the patch
>> > is going to take a bit of time to figure out.
>>
>> > Thank you for reporting it and helping me with the XenStore output to
>> > narrow down the culprit!
>>
>> The thing i don't understand then is:
>> How is it different for the last (or all ) device(s) ?
>> (since the last device (or removing all devices) before doing the
>> assignable-remove doesn't trigger the warning)
>>
>> Or does the complete removal of the pci-part of the tree in xenstore trigger something ?

> Yup. It removes the whole 'pci' directory in XenStore which triggers
> in Xen pciback the code to deal with that.
So that essentially needs to be changed to watch the individual devices in the
xenstore tree instead of the whole pci stuff.
Then it all makes sense :-)

>> You know if you got a patch cooked up, i'm more than happy to test !

> Hehe!
>>
>> --
>> Sander
>>

2014-07-14 20:25:43

by Konrad Rzeszutek Wilk

[permalink] [raw]
Subject: Re: [Xen-devel] [PATCH v4] PCI back fixes for 3.17.

> >> > Thank you for reporting it and helping me with the XenStore output to
> >> > narrow down the culprit!
> >>
> >> The thing i don't understand then is:
> >> How is it different for the last (or all ) device(s) ?
> >> (since the last device (or removing all devices) before doing the
> >> assignable-remove doesn't trigger the warning)
> >>
> >> Or does the complete removal of the pci-part of the tree in xenstore trigger something ?
>
> > Yup. It removes the whole 'pci' directory in XenStore which triggers
> > in Xen pciback the code to deal with that.
> So that essentially needs to be changed to watch the individual devices in the
> xenstore tree instead of the whole pci stuff.

Correct. Thought instead of watching it will just compare with what it
had assigned to a guest and what it currently has exposed in XenStore. And
then whatever is not on the list (exposed in XenStore) is going to be reaped.

> Then it all makes sense :-)

Until you find another bug :-)
>
> >> You know if you got a patch cooked up, i'm more than happy to test !
>
> > Hehe!
> >>
> >> --
> >> Sander
> >>
>
>