Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751652AbbBRJBU (ORCPT ); Wed, 18 Feb 2015 04:01:20 -0500 Received: from mail.skyhub.de ([78.46.96.112]:47175 "EHLO mail.skyhub.de" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1750753AbbBRJBS (ORCPT ); Wed, 18 Feb 2015 04:01:18 -0500 Date: Wed, 18 Feb 2015 10:00:27 +0100 From: Borislav Petkov To: Daniel J Blueman Cc: Doug Thompson , Mauro Carvalho Chehab , linux-edac@vger.kernel.org, linux-kernel@vger.kernel.org, Steffen Persvold Subject: Re: [PATCH v2] x86: Prevent oops with >16 memory controllers Message-ID: <20150218090027.GE3211@pd.tnic> References: <1424144078-24589-1-git-send-email-daniel@numascale.com> MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Disposition: inline In-Reply-To: <1424144078-24589-1-git-send-email-daniel@numascale.com> User-Agent: Mutt/1.5.23 (2014-03-12) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 4013 Lines: 159 On Tue, Feb 17, 2015 at 11:34:38AM +0800, Daniel J Blueman wrote: > When ECC interrupts occur on memory controllers after EDAC_MAX_MCS (16), the > kernel fatally dereferences unallocated structures [1]; this occurs on at > least NumaConnect systems. > > Fix by checking if a memory controller info structure was found; candidate for > stable. > > v1->2: Use edac_mc_find() as per Boris's suggestion > > Signed-off-by: Daniel J Blueman Applied and queued for stable, thanks. I went and killed that mcis array too, ontop. --- From: Borislav Petkov Subject: [PATCH] EDAC, amd64_edac: Get rid of per-node driver instances ... and do the proper thing using EDAC core facilities. Cc: Daniel J Blueman Signed-off-by: Borislav Petkov --- drivers/edac/amd64_edac.c | 33 +++++++++++++-------------------- 1 file changed, 13 insertions(+), 20 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 3d6a511a9025..92772fffc52f 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -20,8 +20,7 @@ static struct msr __percpu *msrs; */ static atomic_t drv_instances = ATOMIC_INIT(0); -/* Per-node driver instances */ -static struct mem_ctl_info **mcis; +/* Per-node stuff */ static struct ecc_settings **ecc_stngs; /* @@ -903,9 +902,17 @@ static int k8_early_channel_count(struct amd64_pvt *pvt) /* On F10h and later ErrAddr is MC4_ADDR[47:1] */ static u64 get_error_address(struct amd64_pvt *pvt, struct mce *m) { - u64 addr; + u16 mce_nid = amd_get_nb_id(m->extcpu); + struct mem_ctl_info *mci; u8 start_bit = 1; u8 end_bit = 47; + u64 addr; + + mci = edac_mc_find(mce_nid); + if (!mci) + return 0; + + pvt = mci->pvt_info; if (pvt->fam == 0xf) { start_bit = 3; @@ -918,17 +925,13 @@ static u64 get_error_address(struct amd64_pvt *pvt, struct mce *m) * Erratum 637 workaround */ if (pvt->fam == 0x15) { - struct amd64_pvt *pvt; u64 cc6_base, tmp_addr; u32 tmp; - u16 mce_nid; u8 intlv_en; if ((addr & GENMASK_ULL(47, 24)) >> 24 != 0x00fdf7) return addr; - mce_nid = amd_get_nb_id(m->extcpu); - pvt = mcis[mce_nid]->pvt_info; amd64_read_pci_cfg(pvt->F1, DRAM_LOCAL_NODE_LIM, &tmp); intlv_en = tmp >> 21 & 0x7; @@ -1511,7 +1514,7 @@ static int f1x_lookup_addr_in_dct(u64 in_addr, u8 nid, u8 dct) int cs_found = -EINVAL; int csrow; - mci = mcis[nid]; + mci = edac_mc_find(nid); if (!mci) return cs_found; @@ -2837,8 +2840,6 @@ static int init_one_instance(struct pci_dev *F2) amd_register_ecc_decoder(decode_bus_error); - mcis[nid] = mci; - atomic_inc(&drv_instances); return 0; @@ -2936,7 +2937,6 @@ static void remove_one_instance(struct pci_dev *pdev) /* Free the EDAC CORE resources */ mci->pvt_info = NULL; - mcis[nid] = NULL; kfree(pvt); edac_mc_free(mci); @@ -2974,7 +2974,7 @@ static void setup_pci_device(void) if (pci_ctl) return; - mci = mcis[0]; + mci = edac_mc_find(0); if (!mci) return; @@ -2998,9 +2998,8 @@ static int __init amd64_edac_init(void) goto err_ret; err = -ENOMEM; - mcis = kzalloc(amd_nb_num() * sizeof(mcis[0]), GFP_KERNEL); ecc_stngs = kzalloc(amd_nb_num() * sizeof(ecc_stngs[0]), GFP_KERNEL); - if (!(mcis && ecc_stngs)) + if (!ecc_stngs) goto err_free; msrs = msrs_alloc(); @@ -3031,9 +3030,6 @@ err_pci: msrs = NULL; err_free: - kfree(mcis); - mcis = NULL; - kfree(ecc_stngs); ecc_stngs = NULL; @@ -3051,9 +3047,6 @@ static void __exit amd64_edac_exit(void) kfree(ecc_stngs); ecc_stngs = NULL; - kfree(mcis); - mcis = NULL; - msrs_free(msrs); msrs = NULL; } -- 2.2.0.33.gc18b867 -- Regards/Gruss, Boris. ECO tip #101: Trim your mails when you reply. -- -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/