Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751564AbaBJG1v (ORCPT ); Mon, 10 Feb 2014 01:27:51 -0500 Received: from mga09.intel.com ([134.134.136.24]:1086 "EHLO mga09.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1750795AbaBJG1u (ORCPT ); Mon, 10 Feb 2014 01:27:50 -0500 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="4.95,816,1384329600"; d="scan'208";a="472574817" Message-ID: <52F87164.8030208@intel.com> Date: Mon, 10 Feb 2014 14:27:48 +0800 From: "Yan, Zheng" User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:24.0) Gecko/20100101 Thunderbird/24.3.0 MIME-Version: 1.0 To: Stephane Eranian , linux-kernel@vger.kernel.org CC: peterz@infradead.org, mingo@elte.hu, acme@redhat.com, ak@linux.intel.com Subject: Re: [PATCH v1 08/10] perf/x86/uncore: add SNB/IVB/HSW client uncore memory controller support References: <1391432142-18723-1-git-send-email-eranian@google.com> <1391432142-18723-9-git-send-email-eranian@google.com> In-Reply-To: <1391432142-18723-9-git-send-email-eranian@google.com> Content-Type: text/plain; charset=ISO-8859-1 Content-Transfer-Encoding: 7bit Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org On 02/03/2014 08:55 PM, Stephane Eranian wrote: > This patch adds a new uncore PMU for Intel SNB/IVB/HSW client > CPUs. It adds the Integrated Memory Controller (IMC) PMU. This > new PMU provides a set of events to measure memory bandwidth utilization. > > The IMC on those processor is PCI-space based. This patch > exposes a new uncore PMU on those processor: uncore_imc > > Two new events are defined: > - name: data_reads > - code: 0x1 > - unit: 64 bytes > - number of full cacheline read requests to the IMC > > - name: data_writes > - code: 0x2 > - unit: 64 bytes > - number of full cacheline write requests to the IMC > > Documentation available at: > http://software.intel.com/en-us/articles/monitoring-integrated-memory-controller-requests-in-the-2nd-3rd-and-4th-generation-intel > > Signed-off-by: Stephane Eranian > --- > arch/x86/kernel/cpu/perf_event_intel_uncore.c | 370 +++++++++++++++++++++++++ > arch/x86/kernel/cpu/perf_event_intel_uncore.h | 1 + > 2 files changed, 371 insertions(+) > > diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c > index 69a4ad0..8b1f81f 100644 > --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c > +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c > @@ -66,6 +66,12 @@ DEFINE_UNCORE_FORMAT_ATTR(mask_vnw, mask_vnw, "config2:3-4"); > DEFINE_UNCORE_FORMAT_ATTR(mask0, mask0, "config2:0-31"); > DEFINE_UNCORE_FORMAT_ATTR(mask1, mask1, "config2:32-63"); > > +static void uncore_pmu_start_hrtimer(struct intel_uncore_box *box); > +static void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box); > +static void uncore_perf_event_update(struct intel_uncore_box *box, > + struct perf_event *event); > +static void uncore_pmu_event_read(struct perf_event *event); > + > static struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event) > { > return container_of(event->pmu, struct intel_uncore_pmu, pmu); > @@ -1668,6 +1674,348 @@ static struct intel_uncore_type *snb_msr_uncores[] = { > &snb_uncore_cbox, > NULL, > }; > + > +enum { > + SNB_PCI_UNCORE_IMC, > +}; > + > +static struct uncore_event_desc snb_uncore_imc_events[] = { > + INTEL_UNCORE_EVENT_DESC(data_reads, "event=0x01"), > + INTEL_UNCORE_EVENT_DESC(data_reads.scale, "64"), > + INTEL_UNCORE_EVENT_DESC(data_reads.unit, "bytes"), > + > + INTEL_UNCORE_EVENT_DESC(data_writes, "event=0x02"), > + INTEL_UNCORE_EVENT_DESC(data_writes.scale, "64"), > + INTEL_UNCORE_EVENT_DESC(data_writes.unit, "bytes"), > + > + { /* end: all zeroes */ }, > +}; > + > +#define SNB_UNCORE_PCI_IMC_EVENT_MASK 0xff > +#define SNB_UNCORE_PCI_IMC_BAR_OFFSET 0x48 > + > +/* page size multiple covering all config regs */ > +#define SNB_UNCORE_PCI_IMC_MAP_SIZE 0x6000 > + > +#define SNB_UNCORE_PCI_IMC_DATA_READS 0x1 > +#define SNB_UNCORE_PCI_IMC_DATA_READS_BASE 0x5050 > +#define SNB_UNCORE_PCI_IMC_DATA_WRITES 0x2 > +#define SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE 0x5054 > +#define SNB_UNCORE_PCI_IMC_CTR_BASE 0x5050 > + > +static struct attribute *snb_uncore_imc_formats_attr[] = { > + &format_attr_event.attr, > + NULL, > +}; > + > +static struct attribute_group snb_uncore_imc_format_group = { > + .name = "format", > + .attrs = snb_uncore_imc_formats_attr, > +}; > + > +static void snb_uncore_imc_init_box(struct intel_uncore_box *box) > +{ > + struct pci_dev *pdev = box->pci_dev; > + u32 addr_lo, addr_hi; > + resource_size_t addr; > + > + pci_read_config_dword(pdev, SNB_UNCORE_PCI_IMC_BAR_OFFSET, &addr_lo); > + addr = addr_lo; > + > +#ifdef CONFIG_PHYS_ADDR_T_64BIT > + pci_read_config_dword(pdev, SNB_UNCORE_PCI_IMC_BAR_OFFSET+4, &addr_hi); > + addr = ((resource_size_t)addr_hi << 32) | addr_lo; > +#endif > + > + addr &= ~(PAGE_SIZE - 1); > + > + box->io_addr = ioremap(addr, SNB_UNCORE_PCI_IMC_MAP_SIZE); > +} > + > +static void snb_uncore_imc_enable_box(struct intel_uncore_box *box) > +{} > + > +static void snb_uncore_imc_disable_box(struct intel_uncore_box *box) > +{} > + > +static void snb_uncore_imc_enable_event(struct intel_uncore_box *box, > + struct perf_event *event) > +{} > + > +static void snb_uncore_imc_disable_event(struct intel_uncore_box *box, > + struct perf_event *event) > +{} > + > +static u64 snb_uncore_imc_read_counter(struct intel_uncore_box *box, > + struct perf_event *event) > +{ > + struct hw_perf_event *hwc = &event->hw; > + > + return (u64)*(unsigned int *)(box->io_addr + hwc->event_base); > +} > + > +/* > + * custom event_init() function because we define our own fixed, free > + * running counters, so we do not want to conflict with generic uncore > + * logic. Also simplifies processing > + */ > +static int snb_uncore_imc_event_init(struct perf_event *event) > +{ > + struct intel_uncore_pmu *pmu; > + struct intel_uncore_box *box; > + struct hw_perf_event *hwc = &event->hw; > + u64 cfg = event->attr.config & SNB_UNCORE_PCI_IMC_EVENT_MASK; > + int idx, base; > + > + if (event->attr.type != event->pmu->type) > + return -ENOENT; > + > + pmu = uncore_event_to_pmu(event); > + /* no device found for this pmu */ > + if (pmu->func_id < 0) > + return -ENOENT; > + > + /* Sampling not supported yet */ > + if (hwc->sample_period) > + return -EINVAL; > + > + /* unsupported modes and filters */ > + if (event->attr.exclude_user || > + event->attr.exclude_kernel || > + event->attr.exclude_hv || > + event->attr.exclude_idle || > + event->attr.exclude_host || > + event->attr.exclude_guest || > + event->attr.sample_period) /* no sampling */ > + return -EINVAL; > + > + /* > + * Place all uncore events for a particular physical package > + * onto a single cpu > + */ > + if (event->cpu < 0) > + return -EINVAL; > + > + /* check only supported bits are set */ > + if (event->attr.config & ~SNB_UNCORE_PCI_IMC_EVENT_MASK) > + return -EINVAL; > + > + box = uncore_pmu_to_box(pmu, event->cpu); > + if (!box || box->cpu < 0) > + return -EINVAL; > + > + event->cpu = box->cpu; > + > + event->hw.idx = -1; > + event->hw.last_tag = ~0ULL; > + event->hw.extra_reg.idx = EXTRA_REG_NONE; > + event->hw.branch_reg.idx = EXTRA_REG_NONE; > + /* > + * check event is known (whitelist, determines counter) > + */ > + switch (cfg) { > + case SNB_UNCORE_PCI_IMC_DATA_READS: > + base = SNB_UNCORE_PCI_IMC_DATA_READS_BASE; > + idx = UNCORE_PMC_IDX_FIXED; > + break; > + case SNB_UNCORE_PCI_IMC_DATA_WRITES: > + base = SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE; > + idx = UNCORE_PMC_IDX_FIXED + 1; > + break; > + default: > + return -EINVAL; > + } > + > + /* must be done before validate_group */ > + event->hw.event_base = base; > + event->hw.config = cfg; > + event->hw.idx = idx; > + > + /* no group validation needed, we have free running counters */ > + > + return 0; > +} > + > +static int snb_uncore_imc_hw_config(struct intel_uncore_box *box, > + struct perf_event *event) > +{ > + return 0; > +} > + > +static void snb_uncore_imc_event_start(struct perf_event *event, int flags) > +{ > + struct intel_uncore_box *box = uncore_event_to_box(event); > + u64 count; > + > + if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) > + return; > + > + event->hw.state = 0; > + box->n_active++; > + > + list_add_tail(&event->active_entry, &box->active_list); > + > + count = snb_uncore_imc_read_counter(box, event); > + local64_set(&event->hw.prev_count, count); > + > + if (box->n_active == 1) > + uncore_pmu_start_hrtimer(box); > +} > + > +static void snb_uncore_imc_event_stop(struct perf_event *event, int flags) > +{ > + struct intel_uncore_box *box = uncore_event_to_box(event); > + struct hw_perf_event *hwc = &event->hw; > + > + if (!(hwc->state & PERF_HES_STOPPED)) { > + box->n_active--; > + > + WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); > + hwc->state |= PERF_HES_STOPPED; > + > + list_del(&event->active_entry); > + > + if (box->n_active == 0) > + uncore_pmu_cancel_hrtimer(box); > + } > + > + if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { > + /* > + * Drain the remaining delta count out of a event > + * that we are disabling: > + */ > + uncore_perf_event_update(box, event); > + hwc->state |= PERF_HES_UPTODATE; > + } > +} > + > +static int snb_uncore_imc_event_add(struct perf_event *event, int flags) > +{ > + struct intel_uncore_box *box = uncore_event_to_box(event); > + struct hw_perf_event *hwc = &event->hw; > + > + if (!box) > + return -ENODEV; > + > + hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; > + if (!(flags & PERF_EF_START)) > + hwc->state |= PERF_HES_ARCH; > + > + snb_uncore_imc_event_start(event, 0); > + > + box->n_events++; > + > + return 0; > +} > + > +static void snb_uncore_imc_event_del(struct perf_event *event, int flags) > +{ > + struct intel_uncore_box *box = uncore_event_to_box(event); > + int i; > + > + snb_uncore_imc_event_stop(event, PERF_EF_UPDATE); > + > + for (i = 0; i < box->n_events; i++) { > + if (event == box->event_list[i]) { > + --box->n_events; > + break; > + } > + } > +} no need to update n_events and event_list, they are not used. the rest of the patch looks good. Regards Yan, Zheng > + > +static int snb_pci2phy_map_init(int devid) > +{ > + struct pci_dev *dev = NULL; > + int bus; > + > + dev = pci_get_device(PCI_VENDOR_ID_INTEL, devid, dev); > + if (!dev) > + return -ENOTTY; > + > + bus = dev->bus->number; > + > + pcibus_to_physid[bus] = 0; > + > + pci_dev_put(dev); > + > + return 0; > +} > + > +static struct pmu snb_uncore_imc_pmu = { > + .task_ctx_nr = perf_invalid_context, > + .event_init = snb_uncore_imc_event_init, > + .add = snb_uncore_imc_event_add, > + .del = snb_uncore_imc_event_del, > + .start = snb_uncore_imc_event_start, > + .stop = snb_uncore_imc_event_stop, > + .read = uncore_pmu_event_read, > +}; > + > +static struct intel_uncore_ops snb_uncore_imc_ops = { > + .init_box = snb_uncore_imc_init_box, > + .enable_box = snb_uncore_imc_enable_box, > + .disable_box = snb_uncore_imc_disable_box, > + .disable_event = snb_uncore_imc_disable_event, > + .enable_event = snb_uncore_imc_enable_event, > + .hw_config = snb_uncore_imc_hw_config, > + .read_counter = snb_uncore_imc_read_counter, > +}; > + > +static struct intel_uncore_type snb_uncore_imc = { > + .name = "imc", > + .num_counters = 2, > + .num_boxes = 1, > + .fixed_ctr_bits = 32, > + .fixed_ctr = SNB_UNCORE_PCI_IMC_CTR_BASE, > + .event_descs = snb_uncore_imc_events, > + .format_group = &snb_uncore_imc_format_group, > + .perf_ctr = SNB_UNCORE_PCI_IMC_DATA_READS_BASE, > + .event_mask = SNB_UNCORE_PCI_IMC_EVENT_MASK, > + .ops = &snb_uncore_imc_ops, > + .pmu = &snb_uncore_imc_pmu, > +}; > + > +static struct intel_uncore_type *snb_pci_uncores[] = { > + [SNB_PCI_UNCORE_IMC] = &snb_uncore_imc, > + NULL, > +}; > + > +static DEFINE_PCI_DEVICE_TABLE(snb_uncore_pci_ids) = { > + { /* IMC */ > + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SNB_IMC), > + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), > + }, > +}; > + > +static DEFINE_PCI_DEVICE_TABLE(ivb_uncore_pci_ids) = { > + { /* IMC */ > + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_IMC), > + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), > + }, > +}; > + > +static DEFINE_PCI_DEVICE_TABLE(hsw_uncore_pci_ids) = { > + { /* IMC */ > + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC), > + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), > + }, > +}; > + > +static struct pci_driver snb_uncore_pci_driver = { > + .name = "snb_uncore", > + .id_table = snb_uncore_pci_ids, > +}; > + > +static struct pci_driver ivb_uncore_pci_driver = { > + .name = "ivb_uncore", > + .id_table = ivb_uncore_pci_ids, > +}; > + > +static struct pci_driver hsw_uncore_pci_driver = { > + .name = "hsw_uncore", > + .id_table = hsw_uncore_pci_ids, > +}; > + > /* end of Sandy Bridge uncore support */ > > /* Nehalem uncore support */ > @@ -3502,6 +3850,28 @@ static int __init uncore_pci_init(void) > pci_uncores = ivt_pci_uncores; > uncore_pci_driver = &ivt_uncore_pci_driver; > break; > + case 42: /* Sandy Bridge */ > + ret = snb_pci2phy_map_init(PCI_DEVICE_ID_INTEL_SNB_IMC); > + if (ret) > + return ret; > + pci_uncores = snb_pci_uncores; > + uncore_pci_driver = &snb_uncore_pci_driver; > + break; > + case 60: /* Haswell */ > + case 69: /* Haswell Celeron */ > + ret = snb_pci2phy_map_init(PCI_DEVICE_ID_INTEL_HSW_IMC); > + if (ret) > + return ret; > + pci_uncores = snb_pci_uncores; > + uncore_pci_driver = &hsw_uncore_pci_driver; > + break; > + case 58: /* Ivy Bridge */ > + ret = snb_pci2phy_map_init(PCI_DEVICE_ID_INTEL_IVB_IMC); > + if (ret) > + return ret; > + pci_uncores = snb_pci_uncores; > + uncore_pci_driver = &ivb_uncore_pci_driver; > + break; > default: > return 0; > } > diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h > index c63a3ff..0770da2 100644 > --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h > +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h > @@ -492,6 +492,7 @@ struct intel_uncore_box { > u64 hrtimer_duration; /* hrtimer timeout for this box */ > struct hrtimer hrtimer; > struct list_head list; > + void *io_addr; > struct intel_uncore_extra_reg shared_regs[0]; > }; > > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/