Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752101AbbHECbt (ORCPT ); Tue, 4 Aug 2015 22:31:49 -0400 Received: from mgwkm04.jp.fujitsu.com ([202.219.69.171]:47365 "EHLO mgwkm04.jp.fujitsu.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751527AbbHECbs (ORCPT ); Tue, 4 Aug 2015 22:31:48 -0400 From: Taku Izumi To: linux-kernel@vger.kernel.org, a.p.zijlstra@chello.nl, mingo@redhat.com, acme@kernel.org Cc: Taku Izumi Subject: [PATCH v2] perf: Fix multi-segment problem of perf_event_intel_uncore Date: Wed, 5 Aug 2015 20:34:11 +0900 Message-Id: <1438774451-4274-1-git-send-email-izumi.taku@jp.fujitsu.com> X-Mailer: git-send-email 1.8.3.1 X-TM-AS-MML: disable Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 8192 Lines: 251 In multi-segment system, uncore devices may belong to buses whose segment number is other than 0. .... 0000:ff:10.5 System peripheral: Intel Corporation Xeon E5 v3/Core i7 Scratchpad & Semaphore Registers (rev 03) ... 0001:7f:10.5 System peripheral: Intel Corporation Xeon E5 v3/Core i7 Scratchpad & Semaphore Registers (rev 03) ... 0001:bf:10.5 System peripheral: Intel Corporation Xeon E5 v3/Core i7 Scratchpad & Semaphore Registers (rev 03) ... 0001:ff:10.5 System peripheral: Intel Corporation Xeon E5 v3/Core i7 Scratchpad & Semaphore Registers (rev 03 ... In that case relation of bus number and physical id may be broken because "uncore_pcibus_to_physid" doesn't take account of PCI segment. For example, bus 0000:ff and 0001:ff uses the same entry of "uncore_pcibus_to_physid" array. This patch fixes ths problem by introducing segment-aware pci2phy_map instead. v1->v2: - Extract method named uncore_pcibus_to_physid to avoid repetetion of retrieving phys_id code Signed-off-by: Taku Izumi --- arch/x86/kernel/cpu/perf_event_intel_uncore.c | 25 ++++++++-- arch/x86/kernel/cpu/perf_event_intel_uncore.h | 11 ++++- arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c | 23 +++++++++- .../x86/kernel/cpu/perf_event_intel_uncore_snbep.c | 53 ++++++++++++++++------ 4 files changed, 94 insertions(+), 18 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 21b5e38..0ed6f2b 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -7,7 +7,8 @@ struct intel_uncore_type **uncore_pci_uncores = empty_uncore; static bool pcidrv_registered; struct pci_driver *uncore_pci_driver; /* pci bus to socket mapping */ -int uncore_pcibus_to_physid[256] = { [0 ... 255] = -1, }; +DEFINE_RAW_SPINLOCK(pci2phy_map_lock); +struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head); struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX]; static DEFINE_RAW_SPINLOCK(uncore_box_lock); @@ -20,6 +21,23 @@ static struct event_constraint uncore_constraint_fixed = struct event_constraint uncore_constraint_empty = EVENT_CONSTRAINT(0, 0, 0); +int uncore_pcibus_to_physid(struct pci_bus *bus) +{ + int phys_id = -1; + struct pci2phy_map *map; + + raw_spin_lock(&pci2phy_map_lock); + list_for_each_entry(map, &pci2phy_map_head, list) { + if (map->segment == pci_domain_nr(bus)) { + phys_id = map->pbus_to_physid[bus->number]; + break; + } + } + raw_spin_unlock(&pci2phy_map_lock); + + return phys_id; +} + ssize_t uncore_event_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { @@ -809,7 +827,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id int phys_id; bool first_box = false; - phys_id = uncore_pcibus_to_physid[pdev->bus->number]; + phys_id = uncore_pcibus_to_physid(pdev->bus); if (phys_id < 0) return -ENODEV; @@ -856,9 +874,10 @@ static void uncore_pci_remove(struct pci_dev *pdev) { struct intel_uncore_box *box = pci_get_drvdata(pdev); struct intel_uncore_pmu *pmu; - int i, cpu, phys_id = uncore_pcibus_to_physid[pdev->bus->number]; + int i, cpu, phys_id; bool last_box = false; + phys_id = uncore_pcibus_to_physid(pdev->bus); box = pci_get_drvdata(pdev); if (!box) { for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) { diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h index 0f77f0a..6c96ee9 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h @@ -117,6 +117,14 @@ struct uncore_event_desc { const char *config; }; +struct pci2phy_map { + struct list_head list; + int segment; + int pbus_to_physid[256]; +}; + +int uncore_pcibus_to_physid(struct pci_bus *bus); + ssize_t uncore_event_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf); @@ -317,7 +325,8 @@ u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx); extern struct intel_uncore_type **uncore_msr_uncores; extern struct intel_uncore_type **uncore_pci_uncores; extern struct pci_driver *uncore_pci_driver; -extern int uncore_pcibus_to_physid[256]; +extern raw_spinlock_t pci2phy_map_lock; +extern struct list_head pci2phy_map_head; extern struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX]; extern struct event_constraint uncore_constraint_empty; diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c index b005a78..ccbc817 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c @@ -402,14 +402,35 @@ static int snb_pci2phy_map_init(int devid) { struct pci_dev *dev = NULL; int bus; + int segment; + struct pci2phy_map *map; + bool found = false; dev = pci_get_device(PCI_VENDOR_ID_INTEL, devid, dev); if (!dev) return -ENOTTY; bus = dev->bus->number; + segment = pci_domain_nr(dev->bus); - uncore_pcibus_to_physid[bus] = 0; + raw_spin_lock(&pci2phy_map_lock); + list_for_each_entry(map, &pci2phy_map_head, list) { + if (map->segment == segment) { + found = true; + break; + } + } + if (!found) { + map = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL); + if (map) { + map->segment = segment; + map->pbus_to_physid[bus] = 0; + list_add_tail(&map->list, &pci2phy_map_head); + } + } else { + map->pbus_to_physid[bus] = 0; + } + raw_spin_unlock(&pci2phy_map_lock); pci_dev_put(dev); diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c index 6d6e85d..1becd8b 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c @@ -1090,6 +1090,9 @@ static int snbep_pci2phy_map_init(int devid) int i, bus, nodeid; int err = 0; u32 config = 0; + struct pci2phy_map *map; + int segment; + bool found = false; while (1) { /* find the UBOX device */ @@ -1106,16 +1109,36 @@ static int snbep_pci2phy_map_init(int devid) err = pci_read_config_dword(ubox_dev, 0x54, &config); if (err) break; - /* - * every three bits in the Node ID mapping register maps - * to a particular node. - */ - for (i = 0; i < 8; i++) { - if (nodeid == ((config >> (3 * i)) & 0x7)) { - uncore_pcibus_to_physid[bus] = i; + + segment = pci_domain_nr(ubox_dev->bus); + raw_spin_lock(&pci2phy_map_lock); + list_for_each_entry(map, &pci2phy_map_head, list) { + if (map->segment == segment) { + found = true; break; } } + if (!found) { + map = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL); + if (map) { + map->segment = segment; + list_add_tail(&map->list, &pci2phy_map_head); + } + } + if (map) { + /* + * every three bits in the Node ID mapping register + * maps to a particular node. + */ + for (i = 0; i < 8; i++) { + if (nodeid == ((config >> (3 * i)) & 0x7)) { + map->pbus_to_physid[bus] = i; + break; + } + } + + } + raw_spin_unlock(&pci2phy_map_lock); } if (!err) { @@ -1123,13 +1146,17 @@ static int snbep_pci2phy_map_init(int devid) * For PCI bus with no UBOX device, find the next bus * that has UBOX device and use its mapping. */ - i = -1; - for (bus = 255; bus >= 0; bus--) { - if (uncore_pcibus_to_physid[bus] >= 0) - i = uncore_pcibus_to_physid[bus]; - else - uncore_pcibus_to_physid[bus] = i; + raw_spin_lock(&pci2phy_map_lock); + list_for_each_entry(map, &pci2phy_map_head, list) { + i = -1; + for (bus = 255; bus >= 0; bus--) { + if (map->pbus_to_physid[bus] >= 0) + i = map->pbus_to_physid[bus]; + else + map->pbus_to_physid[bus] = i; + } } + raw_spin_unlock(&pci2phy_map_lock); } pci_dev_put(ubox_dev); -- 1.8.3.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/