For now, please consider these patches for review and suggest if these can be merged to mainline kernel v4.9.
These patches add support for vPCI protocol version 1.2, by baqkpotring from v4.14 to v4.9. Individual patches are summarised below:
Patch 1: PCI: hv: Allocate physically contiguous hypercall params buffer
Backported as is.
Patch 2: PCI: hv: Add vPCI version protocol negotiation
Backported as is.
Patch 3: PCI: hv: Use vPCI protocol version 1.2 for v4.9
Change: Replaced hv_tmp_cpu_nr_to_vp_nr() with vmbus_cpu_number_to_vp_number()
to make this patch compatible for linux v4.9.
drivers/pci/host/pci-hyperv.c | 387 +++++++++++++++++++++++++++++++++---------
1 file changed, 311 insertions(+), 76 deletions(-)
--
2.7.4
Hyper-V vPCI offers different protocol versions. Add the infra for
negotiating the one to use.
Signed-off-by: Jork Loeser <[email protected]>
Signed-off-by: Bjorn Helgaas <[email protected]>
Reviewed-by: K. Y. Srinivasan <[email protected]>
Acked-by: K. Y. Srinivasan <[email protected]>
Signed-off-by: Ajay Kaher <[email protected]>
---
drivers/pci/host/pci-hyperv.c | 72 +++++++++++++++++++++++++++++++------------
1 file changed, 53 insertions(+), 19 deletions(-)
diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
index 9e44adf..6eed85b 100644
--- a/drivers/pci/host/pci-hyperv.c
+++ b/drivers/pci/host/pci-hyperv.c
@@ -65,22 +65,37 @@
* major version.
*/
-#define PCI_MAKE_VERSION(major, minor) ((u32)(((major) << 16) | (major)))
+#define PCI_MAKE_VERSION(major, minor) ((u32)(((major) << 16) | (minor)))
#define PCI_MAJOR_VERSION(version) ((u32)(version) >> 16)
#define PCI_MINOR_VERSION(version) ((u32)(version) & 0xff)
-enum {
- PCI_PROTOCOL_VERSION_1_1 = PCI_MAKE_VERSION(1, 1),
- PCI_PROTOCOL_VERSION_CURRENT = PCI_PROTOCOL_VERSION_1_1
+enum pci_protocol_version_t {
+ PCI_PROTOCOL_VERSION_1_1 = PCI_MAKE_VERSION(1, 1), /* Win10 */
};
#define CPU_AFFINITY_ALL -1ULL
+
+/*
+ * Supported protocol versions in the order of probing - highest go
+ * first.
+ */
+static enum pci_protocol_version_t pci_protocol_versions[] = {
+ PCI_PROTOCOL_VERSION_1_1,
+};
+
+/*
+ * Protocol version negotiated by hv_pci_protocol_negotiation().
+ */
+static enum pci_protocol_version_t pci_protocol_version;
+
#define PCI_CONFIG_MMIO_LENGTH 0x2000
#define CFG_PAGE_OFFSET 0x1000
#define CFG_PAGE_SIZE (PCI_CONFIG_MMIO_LENGTH - CFG_PAGE_OFFSET)
#define MAX_SUPPORTED_MSI_MESSAGES 0x400
+#define STATUS_REVISION_MISMATCH 0xC0000059
+
/*
* Message Types
*/
@@ -1789,6 +1804,7 @@ static int hv_pci_protocol_negotiation(struct hv_device *hdev)
struct hv_pci_compl comp_pkt;
struct pci_packet *pkt;
int ret;
+ int i;
/*
* Initiate the handshake with the host and negotiate
@@ -1805,26 +1821,44 @@ static int hv_pci_protocol_negotiation(struct hv_device *hdev)
pkt->compl_ctxt = &comp_pkt;
version_req = (struct pci_version_request *)&pkt->message;
version_req->message_type.type = PCI_QUERY_PROTOCOL_VERSION;
- version_req->protocol_version = PCI_PROTOCOL_VERSION_CURRENT;
- ret = vmbus_sendpacket(hdev->channel, version_req,
- sizeof(struct pci_version_request),
- (unsigned long)pkt, VM_PKT_DATA_INBAND,
- VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
- if (ret)
- goto exit;
+ for (i = 0; i < ARRAY_SIZE(pci_protocol_versions); i++) {
+ version_req->protocol_version = pci_protocol_versions[i];
+ ret = vmbus_sendpacket(hdev->channel, version_req,
+ sizeof(struct pci_version_request),
+ (unsigned long)pkt, VM_PKT_DATA_INBAND,
+ VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+ if (ret) {
+ dev_err(&hdev->device,
+ "PCI Pass-through VSP failed sending version reqquest: %#x",
+ ret);
+ goto exit;
+ }
- wait_for_completion(&comp_pkt.host_event);
+ wait_for_completion(&comp_pkt.host_event);
- if (comp_pkt.completion_status < 0) {
- dev_err(&hdev->device,
- "PCI Pass-through VSP failed version request %x\n",
- comp_pkt.completion_status);
- ret = -EPROTO;
- goto exit;
+ if (comp_pkt.completion_status >= 0) {
+ pci_protocol_version = pci_protocol_versions[i];
+ dev_info(&hdev->device,
+ "PCI VMBus probing: Using version %#x\n",
+ pci_protocol_version);
+ goto exit;
+ }
+
+ if (comp_pkt.completion_status != STATUS_REVISION_MISMATCH) {
+ dev_err(&hdev->device,
+ "PCI Pass-through VSP failed version request: %#x",
+ comp_pkt.completion_status);
+ ret = -EPROTO;
+ goto exit;
+ }
+
+ reinit_completion(&comp_pkt.host_event);
}
- ret = 0;
+ dev_err(&hdev->device,
+ "PCI pass-through VSP failed to find supported version");
+ ret = -EPROTO;
exit:
kfree(pkt);
--
2.7.4
hv_do_hypercall() assumes that we pass a segment from a physically
contiguous buffer. A buffer allocated on the stack may not work if
CONFIG_VMAP_STACK=y is set.
Use kmalloc() to allocate this buffer.
Reported-by: Haiyang Zhang <[email protected]>
Signed-off-by: Long Li <[email protected]>
Signed-off-by: Bjorn Helgaas <[email protected]>
Acked-by: K. Y. Srinivasan <[email protected]>
Signed-off-by: Ajay Kaher <[email protected]>
---
drivers/pci/host/pci-hyperv.c | 29 +++++++++++++++++++----------
1 file changed, 19 insertions(+), 10 deletions(-)
diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
index b4d8ccf..9e44adf 100644
--- a/drivers/pci/host/pci-hyperv.c
+++ b/drivers/pci/host/pci-hyperv.c
@@ -383,6 +383,8 @@ struct hv_pcibus_device {
struct msi_domain_info msi_info;
struct msi_controller msi_chip;
struct irq_domain *irq_domain;
+ struct retarget_msi_interrupt retarget_msi_interrupt_params;
+ spinlock_t retarget_msi_interrupt_lock;
};
/*
@@ -780,34 +782,40 @@ void hv_irq_unmask(struct irq_data *data)
{
struct msi_desc *msi_desc = irq_data_get_msi_desc(data);
struct irq_cfg *cfg = irqd_cfg(data);
- struct retarget_msi_interrupt params;
+ struct retarget_msi_interrupt *params;
struct hv_pcibus_device *hbus;
struct cpumask *dest;
struct pci_bus *pbus;
struct pci_dev *pdev;
int cpu;
+ unsigned long flags;
dest = irq_data_get_affinity_mask(data);
pdev = msi_desc_to_pci_dev(msi_desc);
pbus = pdev->bus;
hbus = container_of(pbus->sysdata, struct hv_pcibus_device, sysdata);
- memset(¶ms, 0, sizeof(params));
- params.partition_id = HV_PARTITION_ID_SELF;
- params.source = 1; /* MSI(-X) */
- params.address = msi_desc->msg.address_lo;
- params.data = msi_desc->msg.data;
- params.device_id = (hbus->hdev->dev_instance.b[5] << 24) |
+ spin_lock_irqsave(&hbus->retarget_msi_interrupt_lock, flags);
+
+ params = &hbus->retarget_msi_interrupt_params;
+ memset(params, 0, sizeof(*params));
+ params->partition_id = HV_PARTITION_ID_SELF;
+ params->source = 1; /* MSI(-X) */
+ params->address = msi_desc->msg.address_lo;
+ params->data = msi_desc->msg.data;
+ params->device_id = (hbus->hdev->dev_instance.b[5] << 24) |
(hbus->hdev->dev_instance.b[4] << 16) |
(hbus->hdev->dev_instance.b[7] << 8) |
(hbus->hdev->dev_instance.b[6] & 0xf8) |
PCI_FUNC(pdev->devfn);
- params.vector = cfg->vector;
+ params->vector = cfg->vector;
for_each_cpu_and(cpu, dest, cpu_online_mask)
- params.vp_mask |= (1ULL << vmbus_cpu_number_to_vp_number(cpu));
+ params->vp_mask |= (1ULL << vmbus_cpu_number_to_vp_number(cpu));
+
+ hv_do_hypercall(HVCALL_RETARGET_INTERRUPT, params, NULL);
- hv_do_hypercall(HVCALL_RETARGET_INTERRUPT, ¶ms, NULL);
+ spin_unlock_irqrestore(&hbus->retarget_msi_interrupt_lock, flags);
pci_msi_unmask_irq(data);
}
@@ -2212,6 +2220,7 @@ static int hv_pci_probe(struct hv_device *hdev,
INIT_LIST_HEAD(&hbus->resources_for_children);
spin_lock_init(&hbus->config_lock);
spin_lock_init(&hbus->device_list_lock);
+ spin_lock_init(&hbus->retarget_msi_interrupt_lock);
sema_init(&hbus->enum_sem, 1);
init_completion(&hbus->remove_event);
--
2.7.4
On Fri, Jan 18, 2019 at 02:17:15AM +0530, Ajay Kaher wrote:
> For now, please consider these patches for review and suggest if these can be merged to mainline kernel v4.9.
>
> These patches add support for vPCI protocol version 1.2, by baqkpotring from v4.14 to v4.9. Individual patches are summarised below:
<formletter>
This is not the correct way to submit patches for inclusion in the
stable kernel tree. Please read:
https://www.kernel.org/doc/html/latest/process/stable-kernel-rules.html
for how to do this properly.
</formletter>
Update the Hyper-V vPCI driver to use the Server-2016 version of the vPCI
protocol, fixing MSI creation and retargeting issues.
Replaced hv_tmp_cpu_nr_to_vp_nr() with vmbus_cpu_number_to_vp_number()
to make this patch compatibale for linux v4.9.
Signed-off-by: Jork Loeser <[email protected]>
Signed-off-by: Bjorn Helgaas <[email protected]>
Reviewed-by: K. Y. Srinivasan <[email protected]>
Acked-by: K. Y. Srinivasan <[email protected]>
Signed-off-by: Ajay Kaher <[email protected]>
---
drivers/pci/host/pci-hyperv.c | 300 ++++++++++++++++++++++++++++++++++--------
1 file changed, 246 insertions(+), 54 deletions(-)
diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
index 6eed85b..37d70b1 100644
--- a/drivers/pci/host/pci-hyperv.c
+++ b/drivers/pci/host/pci-hyperv.c
@@ -71,6 +71,7 @@
enum pci_protocol_version_t {
PCI_PROTOCOL_VERSION_1_1 = PCI_MAKE_VERSION(1, 1), /* Win10 */
+ PCI_PROTOCOL_VERSION_1_2 = PCI_MAKE_VERSION(1, 2), /* RS1 */
};
#define CPU_AFFINITY_ALL -1ULL
@@ -80,6 +81,7 @@ enum pci_protocol_version_t {
* first.
*/
static enum pci_protocol_version_t pci_protocol_versions[] = {
+ PCI_PROTOCOL_VERSION_1_2,
PCI_PROTOCOL_VERSION_1_1,
};
@@ -125,6 +127,9 @@ enum pci_message_type {
PCI_QUERY_PROTOCOL_VERSION = PCI_MESSAGE_BASE + 0x13,
PCI_CREATE_INTERRUPT_MESSAGE = PCI_MESSAGE_BASE + 0x14,
PCI_DELETE_INTERRUPT_MESSAGE = PCI_MESSAGE_BASE + 0x15,
+ PCI_RESOURCES_ASSIGNED2 = PCI_MESSAGE_BASE + 0x16,
+ PCI_CREATE_INTERRUPT_MESSAGE2 = PCI_MESSAGE_BASE + 0x17,
+ PCI_DELETE_INTERRUPT_MESSAGE2 = PCI_MESSAGE_BASE + 0x18, /* unused */
PCI_MESSAGE_MAXIMUM
};
@@ -195,6 +200,30 @@ struct hv_msi_desc {
} __packed;
/**
+ * struct hv_msi_desc2 - 1.2 version of hv_msi_desc
+ * @vector: IDT entry
+ * @delivery_mode: As defined in Intel's Programmer's
+ * Reference Manual, Volume 3, Chapter 8.
+ * @vector_count: Number of contiguous entries in the
+ * Interrupt Descriptor Table that are
+ * occupied by this Message-Signaled
+ * Interrupt. For "MSI", as first defined
+ * in PCI 2.2, this can be between 1 and
+ * 32. For "MSI-X," as first defined in PCI
+ * 3.0, this must be 1, as each MSI-X table
+ * entry would have its own descriptor.
+ * @processor_count: number of bits enabled in array.
+ * @processor_array: All the target virtual processors.
+ */
+struct hv_msi_desc2 {
+ u8 vector;
+ u8 delivery_mode;
+ u16 vector_count;
+ u16 processor_count;
+ u16 processor_array[32];
+} __packed;
+
+/**
* struct tran_int_desc
* @reserved: unused, padding
* @vector_count: same as in hv_msi_desc
@@ -310,6 +339,14 @@ struct pci_resources_assigned {
u32 reserved[4];
} __packed;
+struct pci_resources_assigned2 {
+ struct pci_message message_type;
+ union win_slot_encoding wslot;
+ u8 memory_range[0x14][6]; /* not used here */
+ u32 msi_descriptor_count;
+ u8 reserved[70];
+} __packed;
+
struct pci_create_interrupt {
struct pci_message message_type;
union win_slot_encoding wslot;
@@ -322,6 +359,12 @@ struct pci_create_int_response {
struct tran_int_desc int_desc;
} __packed;
+struct pci_create_interrupt2 {
+ struct pci_message message_type;
+ union win_slot_encoding wslot;
+ struct hv_msi_desc2 int_desc;
+} __packed;
+
struct pci_delete_interrupt {
struct pci_message message_type;
union win_slot_encoding wslot;
@@ -347,17 +390,42 @@ static int pci_ring_size = (4 * PAGE_SIZE);
#define HV_PARTITION_ID_SELF ((u64)-1)
#define HVCALL_RETARGET_INTERRUPT 0x7e
-struct retarget_msi_interrupt {
- u64 partition_id; /* use "self" */
- u64 device_id;
+struct hv_interrupt_entry {
u32 source; /* 1 for MSI(-X) */
u32 reserved1;
u32 address;
u32 data;
- u64 reserved2;
+};
+
+#define HV_VP_SET_BANK_COUNT_MAX 5 /* current implementation limit */
+
+struct hv_vp_set {
+ u64 format; /* 0 (HvGenericSetSparse4k) */
+ u64 valid_banks;
+ u64 masks[HV_VP_SET_BANK_COUNT_MAX];
+};
+
+/*
+ * flags for hv_device_interrupt_target.flags
+ */
+#define HV_DEVICE_INTERRUPT_TARGET_MULTICAST 1
+#define HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET 2
+
+struct hv_device_interrupt_target {
u32 vector;
u32 flags;
- u64 vp_mask;
+ union {
+ u64 vp_mask;
+ struct hv_vp_set vp_set;
+ };
+};
+
+struct retarget_msi_interrupt {
+ u64 partition_id; /* use "self" */
+ u64 device_id;
+ struct hv_interrupt_entry int_entry;
+ u64 reserved2;
+ struct hv_device_interrupt_target int_target;
} __packed;
/*
@@ -802,8 +870,11 @@ void hv_irq_unmask(struct irq_data *data)
struct cpumask *dest;
struct pci_bus *pbus;
struct pci_dev *pdev;
- int cpu;
unsigned long flags;
+ u32 var_size = 0;
+ int cpu_vmbus;
+ int cpu;
+ u64 res;
dest = irq_data_get_affinity_mask(data);
pdev = msi_desc_to_pci_dev(msi_desc);
@@ -815,23 +886,74 @@ void hv_irq_unmask(struct irq_data *data)
params = &hbus->retarget_msi_interrupt_params;
memset(params, 0, sizeof(*params));
params->partition_id = HV_PARTITION_ID_SELF;
- params->source = 1; /* MSI(-X) */
- params->address = msi_desc->msg.address_lo;
- params->data = msi_desc->msg.data;
+ params->int_entry.source = 1; /* MSI(-X) */
+ params->int_entry.address = msi_desc->msg.address_lo;
+ params->int_entry.data = msi_desc->msg.data;
params->device_id = (hbus->hdev->dev_instance.b[5] << 24) |
(hbus->hdev->dev_instance.b[4] << 16) |
(hbus->hdev->dev_instance.b[7] << 8) |
(hbus->hdev->dev_instance.b[6] & 0xf8) |
PCI_FUNC(pdev->devfn);
- params->vector = cfg->vector;
+ params->int_target.vector = cfg->vector;
+
+ /*
+ * Honoring apic->irq_delivery_mode set to dest_Fixed by
+ * setting the HV_DEVICE_INTERRUPT_TARGET_MULTICAST flag results in a
+ * spurious interrupt storm. Not doing so does not seem to have a
+ * negative effect (yet?).
+ */
+
+ if (pci_protocol_version >= PCI_PROTOCOL_VERSION_1_2) {
+ /*
+ * PCI_PROTOCOL_VERSION_1_2 supports the VP_SET version of the
+ * HVCALL_RETARGET_INTERRUPT hypercall, which also coincides
+ * with >64 VP support.
+ * ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED
+ * is not sufficient for this hypercall.
+ */
+ params->int_target.flags |=
+ HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET;
+ params->int_target.vp_set.valid_banks =
+ (1ull << HV_VP_SET_BANK_COUNT_MAX) - 1;
- for_each_cpu_and(cpu, dest, cpu_online_mask)
- params->vp_mask |= (1ULL << vmbus_cpu_number_to_vp_number(cpu));
+ /*
+ * var-sized hypercall, var-size starts after vp_mask (thus
+ * vp_set.format does not count, but vp_set.valid_banks does).
+ */
+ var_size = 1 + HV_VP_SET_BANK_COUNT_MAX;
- hv_do_hypercall(HVCALL_RETARGET_INTERRUPT, params, NULL);
+ for_each_cpu_and(cpu, dest, cpu_online_mask) {
+ cpu_vmbus = vmbus_cpu_number_to_vp_number(cpu);
+
+ if (cpu_vmbus >= HV_VP_SET_BANK_COUNT_MAX * 64) {
+ dev_err(&hbus->hdev->device,
+ "too high CPU %d", cpu_vmbus);
+ res = 1;
+ goto exit_unlock;
+ }
+ params->int_target.vp_set.masks[cpu_vmbus / 64] |=
+ (1ULL << (cpu_vmbus & 63));
+ }
+ } else {
+ for_each_cpu_and(cpu, dest, cpu_online_mask) {
+ params->int_target.vp_mask |=
+ (1ULL << vmbus_cpu_number_to_vp_number(cpu));
+ }
+ }
+
+ res = hv_do_hypercall(HVCALL_RETARGET_INTERRUPT | (var_size << 17),
+ params, NULL);
+
+exit_unlock:
spin_unlock_irqrestore(&hbus->retarget_msi_interrupt_lock, flags);
+ if (res) {
+ dev_err(&hbus->hdev->device,
+ "%s() failed: %#llx", __func__, res);
+ return;
+ }
+
pci_msi_unmask_irq(data);
}
@@ -852,6 +974,53 @@ static void hv_pci_compose_compl(void *context, struct pci_response *resp,
complete(&comp_pkt->comp_pkt.host_event);
}
+static u32 hv_compose_msi_req_v1(
+ struct pci_create_interrupt *int_pkt, struct cpumask *affinity,
+ u32 slot, u8 vector)
+{
+ int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE;
+ int_pkt->wslot.slot = slot;
+ int_pkt->int_desc.vector = vector;
+ int_pkt->int_desc.vector_count = 1;
+ int_pkt->int_desc.delivery_mode =
+ (apic->irq_delivery_mode == dest_LowestPrio) ?
+ dest_LowestPrio : dest_Fixed;
+
+ /*
+ * Create MSI w/ dummy vCPU set, overwritten by subsequent retarget in
+ * hv_irq_unmask().
+ */
+ int_pkt->int_desc.cpu_mask = CPU_AFFINITY_ALL;
+
+ return sizeof(*int_pkt);
+}
+
+static u32 hv_compose_msi_req_v2(
+ struct pci_create_interrupt2 *int_pkt, struct cpumask *affinity,
+ u32 slot, u8 vector)
+{
+ int cpu;
+
+ int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE2;
+ int_pkt->wslot.slot = slot;
+ int_pkt->int_desc.vector = vector;
+ int_pkt->int_desc.vector_count = 1;
+ int_pkt->int_desc.delivery_mode =
+ (apic->irq_delivery_mode == dest_LowestPrio) ?
+ dest_LowestPrio : dest_Fixed;
+
+ /*
+ * Create MSI w/ dummy vCPU set targeting just one vCPU, overwritten
+ * by subsequent retarget in hv_irq_unmask().
+ */
+ cpu = cpumask_first_and(affinity, cpu_online_mask);
+ int_pkt->int_desc.processor_array[0] =
+ vmbus_cpu_number_to_vp_number(cpu);
+ int_pkt->int_desc.processor_count = 1;
+
+ return sizeof(*int_pkt);
+}
+
/**
* hv_compose_msi_msg() - Supplies a valid MSI address/data
* @data: Everything about this MSI
@@ -870,15 +1039,17 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
struct hv_pci_dev *hpdev;
struct pci_bus *pbus;
struct pci_dev *pdev;
- struct pci_create_interrupt *int_pkt;
struct compose_comp_ctxt comp;
struct tran_int_desc *int_desc;
- struct cpumask *affinity;
struct {
- struct pci_packet pkt;
- u8 buffer[sizeof(struct pci_create_interrupt)];
- } ctxt;
- int cpu;
+ struct pci_packet pci_pkt;
+ union {
+ struct pci_create_interrupt v1;
+ struct pci_create_interrupt2 v2;
+ } int_pkts;
+ } __packed ctxt;
+
+ u32 size;
int ret;
pdev = msi_desc_to_pci_dev(irq_data_get_msi_desc(data));
@@ -901,36 +1072,44 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
memset(&ctxt, 0, sizeof(ctxt));
init_completion(&comp.comp_pkt.host_event);
- ctxt.pkt.completion_func = hv_pci_compose_compl;
- ctxt.pkt.compl_ctxt = ∁
- int_pkt = (struct pci_create_interrupt *)&ctxt.pkt.message;
- int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE;
- int_pkt->wslot.slot = hpdev->desc.win_slot.slot;
- int_pkt->int_desc.vector = cfg->vector;
- int_pkt->int_desc.vector_count = 1;
- int_pkt->int_desc.delivery_mode =
- (apic->irq_delivery_mode == dest_LowestPrio) ? 1 : 0;
+ ctxt.pci_pkt.completion_func = hv_pci_compose_compl;
+ ctxt.pci_pkt.compl_ctxt = ∁
+
+ switch (pci_protocol_version) {
+ case PCI_PROTOCOL_VERSION_1_1:
+ size = hv_compose_msi_req_v1(&ctxt.int_pkts.v1,
+ irq_data_get_affinity_mask(data),
+ hpdev->desc.win_slot.slot,
+ cfg->vector);
+ break;
- /*
- * This bit doesn't have to work on machines with more than 64
- * processors because Hyper-V only supports 64 in a guest.
- */
- affinity = irq_data_get_affinity_mask(data);
- if (cpumask_weight(affinity) >= 32) {
- int_pkt->int_desc.cpu_mask = CPU_AFFINITY_ALL;
- } else {
- for_each_cpu_and(cpu, affinity, cpu_online_mask) {
- int_pkt->int_desc.cpu_mask |=
- (1ULL << vmbus_cpu_number_to_vp_number(cpu));
- }
+ case PCI_PROTOCOL_VERSION_1_2:
+ size = hv_compose_msi_req_v2(&ctxt.int_pkts.v2,
+ irq_data_get_affinity_mask(data),
+ hpdev->desc.win_slot.slot,
+ cfg->vector);
+ break;
+
+ default:
+ /* As we only negotiate protocol versions known to this driver,
+ * this path should never hit. However, this is it not a hot
+ * path so we print a message to aid future updates.
+ */
+ dev_err(&hbus->hdev->device,
+ "Unexpected vPCI protocol, update driver.");
+ goto free_int_desc;
}
- ret = vmbus_sendpacket(hpdev->hbus->hdev->channel, int_pkt,
- sizeof(*int_pkt), (unsigned long)&ctxt.pkt,
+ ret = vmbus_sendpacket(hpdev->hbus->hdev->channel, &ctxt.int_pkts,
+ size, (unsigned long)&ctxt.pci_pkt,
VM_PKT_DATA_INBAND,
VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
- if (ret)
+ if (ret) {
+ dev_err(&hbus->hdev->device,
+ "Sending request for interrupt failed: 0x%x",
+ comp.comp_pkt.completion_status);
goto free_int_desc;
+ }
wait_for_completion(&comp.comp_pkt.host_event);
@@ -2117,13 +2296,18 @@ static int hv_send_resources_allocated(struct hv_device *hdev)
{
struct hv_pcibus_device *hbus = hv_get_drvdata(hdev);
struct pci_resources_assigned *res_assigned;
+ struct pci_resources_assigned2 *res_assigned2;
struct hv_pci_compl comp_pkt;
struct hv_pci_dev *hpdev;
struct pci_packet *pkt;
+ size_t size_res;
u32 wslot;
int ret;
- pkt = kmalloc(sizeof(*pkt) + sizeof(*res_assigned), GFP_KERNEL);
+ size_res = (pci_protocol_version < PCI_PROTOCOL_VERSION_1_2)
+ ? sizeof(*res_assigned) : sizeof(*res_assigned2);
+
+ pkt = kmalloc(sizeof(*pkt) + size_res, GFP_KERNEL);
if (!pkt)
return -ENOMEM;
@@ -2134,22 +2318,30 @@ static int hv_send_resources_allocated(struct hv_device *hdev)
if (!hpdev)
continue;
- memset(pkt, 0, sizeof(*pkt) + sizeof(*res_assigned));
+ memset(pkt, 0, sizeof(*pkt) + size_res);
init_completion(&comp_pkt.host_event);
pkt->completion_func = hv_pci_generic_compl;
pkt->compl_ctxt = &comp_pkt;
- res_assigned = (struct pci_resources_assigned *)&pkt->message;
- res_assigned->message_type.type = PCI_RESOURCES_ASSIGNED;
- res_assigned->wslot.slot = hpdev->desc.win_slot.slot;
+ if (pci_protocol_version < PCI_PROTOCOL_VERSION_1_2) {
+ res_assigned =
+ (struct pci_resources_assigned *)&pkt->message;
+ res_assigned->message_type.type =
+ PCI_RESOURCES_ASSIGNED;
+ res_assigned->wslot.slot = hpdev->desc.win_slot.slot;
+ } else {
+ res_assigned2 =
+ (struct pci_resources_assigned2 *)&pkt->message;
+ res_assigned2->message_type.type =
+ PCI_RESOURCES_ASSIGNED2;
+ res_assigned2->wslot.slot = hpdev->desc.win_slot.slot;
+ }
put_pcichild(hpdev, hv_pcidev_ref_by_slot);
- ret = vmbus_sendpacket(
- hdev->channel, &pkt->message,
- sizeof(*res_assigned),
- (unsigned long)pkt,
- VM_PKT_DATA_INBAND,
- VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+ ret = vmbus_sendpacket(hdev->channel, &pkt->message,
+ size_res, (unsigned long)pkt,
+ VM_PKT_DATA_INBAND,
+ VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
if (ret)
break;
--
2.7.4
On Fri, Jan 18, 2019 at 02:17:18AM +0530, Ajay Kaher wrote:
> Update the Hyper-V vPCI driver to use the Server-2016 version of the vPCI
> protocol, fixing MSI creation and retargeting issues.
>
> Replaced hv_tmp_cpu_nr_to_vp_nr() with vmbus_cpu_number_to_vp_number()
> to make this patch compatibale for linux v4.9.
s/compatibale for/compatible with/
This change (to make it compatible with v4.9) sounds like it should be
in its own separate patch. I don't see any use of
hv_tmp_cpu_nr_to_vp_nr() being removed, so the changelog doesn't quite
make sense.
> Signed-off-by: Jork Loeser <[email protected]>
> Signed-off-by: Bjorn Helgaas <[email protected]>
I did not sign off on this; please remove.
> Reviewed-by: K. Y. Srinivasan <[email protected]>
> Acked-by: K. Y. Srinivasan <[email protected]>
> Signed-off-by: Ajay Kaher <[email protected]>
> + * struct hv_msi_desc2 - 1.2 version of hv_msi_desc
> + * @vector: IDT entry
> + * @delivery_mode: As defined in Intel's Programmer's
> + * Reference Manual, Volume 3, Chapter 8.
> + * @vector_count: Number of contiguous entries in the
> + * Interrupt Descriptor Table that are
> + * occupied by this Message-Signaled
> + * Interrupt. For "MSI", as first defined
> + * in PCI 2.2, this can be between 1 and
> + * 32. For "MSI-X," as first defined in PCI
> + * 3.0, this must be 1, as each MSI-X table
> + * entry would have its own descriptor.
Please reflow these descriptions to take advantage of an 80 column
width. They are currently wrapped to fit in 50 columns, which is
unnecessarily short.
> + default:
> + /* As we only negotiate protocol versions known to this driver,
> + * this path should never hit. However, this is it not a hot
> + * path so we print a message to aid future updates.
> + */
> + dev_err(&hbus->hdev->device,
> + "Unexpected vPCI protocol, update driver.");
Include the actual protocol version in the message?
On Fri, Jan 18, 2019 at 02:17:16AM +0530, Ajay Kaher wrote:
> hv_do_hypercall() assumes that we pass a segment from a physically
> contiguous buffer. A buffer allocated on the stack may not work if
> CONFIG_VMAP_STACK=y is set.
>
> Use kmalloc() to allocate this buffer.
>
> Reported-by: Haiyang Zhang <[email protected]>
> Signed-off-by: Long Li <[email protected]>
> Signed-off-by: Bjorn Helgaas <[email protected]>
I did not sign off on this; please remove this. Signed-off-by should
only be added by the person mentioned.
> Acked-by: K. Y. Srinivasan <[email protected]>
> Signed-off-by: Ajay Kaher <[email protected]>
> ---
> drivers/pci/host/pci-hyperv.c | 29 +++++++++++++++++++----------
> 1 file changed, 19 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
> index b4d8ccf..9e44adf 100644
> --- a/drivers/pci/host/pci-hyperv.c
> +++ b/drivers/pci/host/pci-hyperv.c
> @@ -383,6 +383,8 @@ struct hv_pcibus_device {
> struct msi_domain_info msi_info;
> struct msi_controller msi_chip;
> struct irq_domain *irq_domain;
> + struct retarget_msi_interrupt retarget_msi_interrupt_params;
> + spinlock_t retarget_msi_interrupt_lock;
> };
>
> /*
> @@ -780,34 +782,40 @@ void hv_irq_unmask(struct irq_data *data)
> {
> struct msi_desc *msi_desc = irq_data_get_msi_desc(data);
> struct irq_cfg *cfg = irqd_cfg(data);
> - struct retarget_msi_interrupt params;
> + struct retarget_msi_interrupt *params;
> struct hv_pcibus_device *hbus;
> struct cpumask *dest;
> struct pci_bus *pbus;
> struct pci_dev *pdev;
> int cpu;
> + unsigned long flags;
>
> dest = irq_data_get_affinity_mask(data);
> pdev = msi_desc_to_pci_dev(msi_desc);
> pbus = pdev->bus;
> hbus = container_of(pbus->sysdata, struct hv_pcibus_device, sysdata);
>
> - memset(¶ms, 0, sizeof(params));
> - params.partition_id = HV_PARTITION_ID_SELF;
> - params.source = 1; /* MSI(-X) */
> - params.address = msi_desc->msg.address_lo;
> - params.data = msi_desc->msg.data;
> - params.device_id = (hbus->hdev->dev_instance.b[5] << 24) |
> + spin_lock_irqsave(&hbus->retarget_msi_interrupt_lock, flags);
> +
> + params = &hbus->retarget_msi_interrupt_params;
> + memset(params, 0, sizeof(*params));
> + params->partition_id = HV_PARTITION_ID_SELF;
> + params->source = 1; /* MSI(-X) */
> + params->address = msi_desc->msg.address_lo;
> + params->data = msi_desc->msg.data;
> + params->device_id = (hbus->hdev->dev_instance.b[5] << 24) |
> (hbus->hdev->dev_instance.b[4] << 16) |
> (hbus->hdev->dev_instance.b[7] << 8) |
> (hbus->hdev->dev_instance.b[6] & 0xf8) |
> PCI_FUNC(pdev->devfn);
> - params.vector = cfg->vector;
> + params->vector = cfg->vector;
>
> for_each_cpu_and(cpu, dest, cpu_online_mask)
> - params.vp_mask |= (1ULL << vmbus_cpu_number_to_vp_number(cpu));
> + params->vp_mask |= (1ULL << vmbus_cpu_number_to_vp_number(cpu));
> +
> + hv_do_hypercall(HVCALL_RETARGET_INTERRUPT, params, NULL);
>
> - hv_do_hypercall(HVCALL_RETARGET_INTERRUPT, ¶ms, NULL);
> + spin_unlock_irqrestore(&hbus->retarget_msi_interrupt_lock, flags);
>
> pci_msi_unmask_irq(data);
> }
> @@ -2212,6 +2220,7 @@ static int hv_pci_probe(struct hv_device *hdev,
> INIT_LIST_HEAD(&hbus->resources_for_children);
> spin_lock_init(&hbus->config_lock);
> spin_lock_init(&hbus->device_list_lock);
> + spin_lock_init(&hbus->retarget_msi_interrupt_lock);
> sema_init(&hbus->enum_sem, 1);
> init_completion(&hbus->remove_event);
>
> --
> 2.7.4
>
On Fri, Jan 18, 2019 at 02:17:17AM +0530, Ajay Kaher wrote:
> Hyper-V vPCI offers different protocol versions. Add the infra for
> negotiating the one to use.
>
> Signed-off-by: Jork Loeser <[email protected]>
> Signed-off-by: Bjorn Helgaas <[email protected]>
I did not sign off on this, please remove.
> Reviewed-by: K. Y. Srinivasan <[email protected]>
> Acked-by: K. Y. Srinivasan <[email protected]>
> Signed-off-by: Ajay Kaher <[email protected]>
> ---
> drivers/pci/host/pci-hyperv.c | 72 +++++++++++++++++++++++++++++++------------
> 1 file changed, 53 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
> index 9e44adf..6eed85b 100644
> --- a/drivers/pci/host/pci-hyperv.c
> +++ b/drivers/pci/host/pci-hyperv.c
> @@ -65,22 +65,37 @@
> * major version.
> */
>
> -#define PCI_MAKE_VERSION(major, minor) ((u32)(((major) << 16) | (major)))
> +#define PCI_MAKE_VERSION(major, minor) ((u32)(((major) << 16) | (minor)))
This looks like a bug fix that should be in its own patch.
> On 17/01/19, 8:37 PM, "Bjorn Helgaas" <[email protected]> wrote:
>
> > On Fri, Jan 18, 2019 at 02:17:18AM +0530, Ajay Kaher wrote:
> > Update the Hyper-V vPCI driver to use the Server-2016 version of the vPCI
> > protocol, fixing MSI creation and retargeting issues.
> >
> > Replaced hv_tmp_cpu_nr_to_vp_nr() with vmbus_cpu_number_to_vp_number()
> > to make this patch compatibale for linux v4.9.
>
> s/compatibale for/compatible with/
>
> This change (to make it compatible with v4.9) sounds like it should be
> in its own separate patch. I don't see any use of
> hv_tmp_cpu_nr_to_vp_nr() being removed, so the changelog doesn't quite
> make sense.
hv_tmp_cpu_nr_to_vp_nr() is not removed, v4.9 doesn't have this API,
and because of this failed to apply the original patch on v4.9.
That's the reason to use vmbus_cpu_number_to_vp_number()
instead of hv_tmp_cpu_nr_to_vp_nr().
I will change the explanation in changelog to:
Because v4.9 doesn't have hv_tmp_cpu_nr_to_vp_nr(),
so original patch from v4.14 fails to apply on v4.9.
To solve this, using vmbus_cpu_number_to_vp_number()
instead of hv_tmp_cpu_nr_to_vp_nr() in this patch.
Here, I would like maintainer of HyperV to review this change and
provide their feedback. Tested these patches on Azure Server with
VM of size L16s_v2 and it's working fine.
> > Signed-off-by: Jork Loeser <[email protected]>
> > Signed-off-by: Bjorn Helgaas <[email protected]>
>
> I did not sign off on this; please remove.
Sorry, I kept as it is from original patch. I will change this in next version of this patch.
> > Reviewed-by: K. Y. Srinivasan <[email protected]>
> > Acked-by: K. Y. Srinivasan <[email protected]>
> > Signed-off-by: Ajay Kaher <[email protected]>
>
> > + * struct hv_msi_desc2 - 1.2 version of hv_msi_desc
> > + * @vector: IDT entry
> > + * @delivery_mode: As defined in Intel's Programmer's
> > + * Reference Manual, Volume 3, Chapter 8.
> > + * @vector_count: Number of contiguous entries in the
> > + * Interrupt Descriptor Table that are
> > + * occupied by this Message-Signaled
> > + * Interrupt. For "MSI", as first defined
> > + * in PCI 2.2, this can be between 1 and
> > + * 32. For "MSI-X," as first defined in PCI
> > + * 3.0, this must be 1, as each MSI-X table
> > + * entry would have its own descriptor.
>
> Please reflow these descriptions to take advantage of an 80 column
> width. They are currently wrapped to fit in 50 columns, which is
> unnecessarily short.
Thanks, I will change this in next version of this patch.
> > + default:
> > + /* As we only negotiate protocol versions known to this driver,
> > + * this path should never hit. However, this is it not a hot
> > + * path so we print a message to aid future updates.
> > + */
> > + dev_err(&hbus->hdev->device,
> > + "Unexpected vPCI protocol, update driver.");
> Include the actual protocol version in the message?
Thanks, I will try to change this in next version.
On Fri, Jan 18, 2019 at 02:17:16AM +0530, Ajay Kaher wrote:
> hv_do_hypercall() assumes that we pass a segment from a physically
> contiguous buffer. A buffer allocated on the stack may not work if
> CONFIG_VMAP_STACK=y is set.
>
> Use kmalloc() to allocate this buffer.
>
Since you're going to need to resend this anyway, can you add in the
commit message what this looks like from a user perspective? Presumably
it's an occasional crash?
regards,
dan carpenter
On Mon, Jan 21, 2019 at 04:19:42PM +0300, Dan Carpenter wrote:
> On Fri, Jan 18, 2019 at 02:17:16AM +0530, Ajay Kaher wrote:
> > hv_do_hypercall() assumes that we pass a segment from a physically
> > contiguous buffer. A buffer allocated on the stack may not work if
> > CONFIG_VMAP_STACK=y is set.
> >
> > Use kmalloc() to allocate this buffer.
> >
>
> Since you're going to need to resend this anyway, can you add in the
> commit message what this looks like from a user perspective? Presumably
> it's an occasional crash?
>
Never mind. I didn't realize this was a two year old patch.
regards,
dan carpenter