v1 -> v2:
Make the pointer passed around less opaque for type safety.
Bug https://bugs.launchpad.net/qemu/+bug/754591 is caused because
the KVM module attempts to do a pci_save_state() before assigning
the device to a VM, expecting that the saved state will remain
valid until we release the device. This is in conflict with our
need to reset devices using PCI sysfs during a VM reset to
quiesce the device. Any calls to pci_reset_function() will
overwrite the device saved stated prior to reset, and reload and
invalidate the state after. KVM then ends up trying to restore
the state, but it's already invalid, so the device ends up with
reset values.
This series adds a mechanism to pull the saved state off the
struct pci_dev and reload it later. Thanks,
Alex
---
Alex Williamson (2):
KVM: Use pci_store/load_saved_state() around VM device usage
PCI: Add interfaces to store and load the device saved state
drivers/pci/pci.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++
include/linux/kvm_host.h | 1
include/linux/pci.h | 4 ++
virt/kvm/assigned-dev.c | 8 ++--
4 files changed, 104 insertions(+), 3 deletions(-)
Store the device saved state so that we can reload the device back
to the original state when it's unassigned. This has the benefit
that the state survives across pci_reset_function() calls via
the PCI sysfs reset interface while the VM is using the device.
Signed-off-by: Alex Williamson <[email protected]>
---
include/linux/kvm_host.h | 1 +
virt/kvm/assigned-dev.c | 8 +++++---
2 files changed, 6 insertions(+), 3 deletions(-)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ab42855..7099b67 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -513,6 +513,7 @@ struct kvm_assigned_dev_kernel {
struct kvm *kvm;
spinlock_t intx_lock;
char irq_name[32];
+ struct pci_state *pci_saved_state;
};
struct kvm_irq_mask_notifier {
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
index ae72ae6..66c6ccd 100644
--- a/virt/kvm/assigned-dev.c
+++ b/virt/kvm/assigned-dev.c
@@ -197,7 +197,9 @@ static void kvm_free_assigned_device(struct kvm *kvm,
{
kvm_free_assigned_irq(kvm, assigned_dev);
- __pci_reset_function(assigned_dev->dev);
+ pci_reset_function(assigned_dev->dev);
+ pci_load_and_free_saved_state(assigned_dev->dev,
+ &assigned_dev->pci_saved_state);
pci_restore_state(assigned_dev->dev);
pci_release_regions(assigned_dev->dev);
@@ -516,7 +518,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
pci_reset_function(dev);
pci_save_state(dev);
-
+ match->pci_saved_state = pci_store_saved_state(dev);
match->assigned_dev_id = assigned_dev->assigned_dev_id;
match->host_segnr = assigned_dev->segnr;
match->host_busnr = assigned_dev->busnr;
@@ -546,7 +548,7 @@ out:
mutex_unlock(&kvm->lock);
return r;
out_list_del:
- pci_restore_state(dev);
+ pci_load_and_free_saved_state(dev, &match->pci_saved_state);
list_del(&match->list);
pci_release_regions(dev);
out_disable:
For KVM device assignment, we'd like to save off the state of a device
prior to passing it to the guest and restore it later. We also want
to allow pci_reset_funciton() to be called while the device is owned
by the guest. This however overwrites and invalidates the struct pci_dev
buffers, so we can't just manually call save and restore. Add generic
interfaces for the saved state to be stored and reloaded back into
struct pci_dev at a later time.
Signed-off-by: Alex Williamson <[email protected]>
---
drivers/pci/pci.c | 94 +++++++++++++++++++++++++++++++++++++++++++++++++++
include/linux/pci.h | 4 ++
2 files changed, 98 insertions(+), 0 deletions(-)
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 2472e71..30e2ebd 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -975,6 +975,100 @@ void pci_restore_state(struct pci_dev *dev)
dev->state_saved = false;
}
+struct pci_state {
+ u32 config_space[16];
+ u16 pcie_state[PCI_EXP_SAVE_REGS];
+ u16 pcix_state[1];
+};
+
+/**
+ * pci_store_saved_state - Allocate and return an opaque struct containing
+ * the device saved state.
+ * @dev: PCI device that we're dealing with
+ *
+ * NULL if no state or error.
+ */
+struct pci_state *pci_store_saved_state(struct pci_dev *dev)
+{
+ struct pci_state *state;
+ struct pci_cap_saved_state *cap_state;
+ int pos;
+
+ if (!dev->state_saved)
+ return NULL;
+
+ state = kzalloc(sizeof(*state), GFP_KERNEL);
+ if (!state)
+ return NULL;
+
+ memcpy(state->config_space, dev->saved_config_space,
+ sizeof(state->config_space));
+
+ pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
+ cap_state = pci_find_saved_cap(dev, PCI_CAP_ID_EXP);
+ if (cap_state && pos)
+ memcpy(state->pcie_state, cap_state->data,
+ sizeof(state->pcie_state));
+
+ pos = pci_find_capability(dev, PCI_CAP_ID_PCIX);
+ cap_state = pci_find_saved_cap(dev, PCI_CAP_ID_PCIX);
+ if (cap_state && pos)
+ memcpy(state->pcix_state, cap_state->data,
+ sizeof(state->pcix_state));
+
+ return state;
+}
+EXPORT_SYMBOL_GPL(pci_store_saved_state);
+
+/**
+ * pci_load_saved_state - Reload the provided save state into struct pci_dev.
+ * @dev: PCI device that we're dealing with
+ * @state: Saved state returned from pci_store_saved_state()
+ */
+void pci_load_saved_state(struct pci_dev *dev, struct pci_state *state)
+{
+ struct pci_cap_saved_state *cap_state;
+ int pos;
+
+ if (!state) {
+ dev->state_saved = false;
+ return;
+ }
+
+ memcpy(dev->saved_config_space, state->config_space,
+ sizeof(state->config_space));
+
+ pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
+ cap_state = pci_find_saved_cap(dev, PCI_CAP_ID_EXP);
+ if (cap_state && pos)
+ memcpy(cap_state->data, state->pcie_state,
+ sizeof(state->pcie_state));
+
+ pos = pci_find_capability(dev, PCI_CAP_ID_PCIX);
+ cap_state = pci_find_saved_cap(dev, PCI_CAP_ID_PCIX);
+ if (cap_state && pos)
+ memcpy(cap_state->data, state->pcix_state,
+ sizeof(state->pcix_state));
+
+ dev->state_saved = true;
+}
+EXPORT_SYMBOL_GPL(pci_load_saved_state);
+
+/**
+ * pci_load_and_free_saved_state - Reload the save state pointed to by state,
+ * and free the memory allocated for it.
+ * @dev: PCI device that we're dealing with
+ * @state: Pointer to saved state returned from pci_store_saved_state()
+ */
+void pci_load_and_free_saved_state(struct pci_dev *dev,
+ struct pci_state **state)
+{
+ pci_load_saved_state(dev, *state);
+ kfree(*state);
+ *state = NULL;
+}
+EXPORT_SYMBOL_GPL(pci_load_and_free_saved_state);
+
static int do_pci_enable_device(struct pci_dev *dev, int bars)
{
int err;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 96f70d7..b7c6c1a 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -807,6 +807,10 @@ size_t pci_get_rom_size(struct pci_dev *pdev, void __iomem *rom, size_t size);
/* Power management related routines */
int pci_save_state(struct pci_dev *dev);
void pci_restore_state(struct pci_dev *dev);
+struct pci_state *pci_store_saved_state(struct pci_dev *dev);
+void pci_load_saved_state(struct pci_dev *dev, struct pci_state *state);
+void pci_load_and_free_saved_state(struct pci_dev *dev,
+ struct pci_state **state);
int __pci_complete_power_transition(struct pci_dev *dev, pci_power_t state);
int pci_set_power_state(struct pci_dev *dev, pci_power_t state);
pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state);
On 04/19/2011 11:12 PM, Alex Williamson wrote:
> v1 -> v2:
> Make the pointer passed around less opaque for type safety.
>
> Bug https://bugs.launchpad.net/qemu/+bug/754591 is caused because
> the KVM module attempts to do a pci_save_state() before assigning
> the device to a VM, expecting that the saved state will remain
> valid until we release the device. This is in conflict with our
> need to reset devices using PCI sysfs during a VM reset to
> quiesce the device. Any calls to pci_reset_function() will
> overwrite the device saved stated prior to reset, and reload and
> invalidate the state after. KVM then ends up trying to restore
> the state, but it's already invalid, so the device ends up with
> reset values.
>
> This series adds a mechanism to pull the saved state off the
> struct pci_dev and reload it later. Thanks,
Based on the sizes of the patches, this should go in via the pci tree.
--
error compiling committee.c: too many arguments to function