Received: by 2002:a05:6a10:22f:0:0:0:0 with SMTP id 15csp2702400pxk; Sun, 27 Sep 2020 19:06:34 -0700 (PDT) X-Google-Smtp-Source: ABdhPJwJzZuhOEAhw1xm7NH+SveYz1GFeB3fcEUbEShXRrrzPy5bn5ay5xy0vyIhwNUTNMDllSHH X-Received: by 2002:a17:906:c015:: with SMTP id e21mr12696804ejz.432.1601258794546; Sun, 27 Sep 2020 19:06:34 -0700 (PDT) ARC-Seal: i=1; a=rsa-sha256; t=1601258794; cv=none; d=google.com; s=arc-20160816; b=ib3+d2JvqVAMO+6WXcZXRttzqChDA3MRCJ1gybI7nhpGkmoSmoq0CtAG100PNAxrHz B8008dQK6FaCst/iOKP8kbanAeVHyNuevwBxJwfUdN1uBOn38XHAVOl0zwtR+3O9UhKx 8FAIZnsbsCOVIv2YXN2uye4tiCe2dlDTlqDIfLVIDgEbIhtIcAE7wwkQ0QMda31CcIqt Tl3Io775MJHKImWGksiaSGRe0lP/cEgTY+sGDTTlPYTr2ghaY+xDJSKDYJdWnPd+lqnj gotWqZ/CbIQtNIPypf7SomyohMaTl2uAVd1qxzYrGxVpvIk++93EBea3kp4U5JvtVDkt x3TQ== ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20160816; h=list-id:precedence:mime-version:message-id:date:subject:cc:to:from; bh=ZvD6d/MU46KH+kEKAARDAOvyTl8FjQt0zObbxU2NdEY=; b=BsQIcNwE2BQZ7gn4HQ1Lmu0j37YNGEPg1SwhNH7IpjZw3zCso1vVFHzhJEFjxUJ6ba T9HNXYcJ5vF093ENFX5qtAMveOWbR1nfd90gWQONFa2Rt4vMqp42FUp2mAbCysmsdkPs 9bXzrruxG/+rTw+fZIghlnxaX8qYk1xVmhjWiDvioN76iSeedQd+DZVKMyRwQAmp2Qs7 PKUg+2kOxQlSxzt6VUKZXr2thNAn8TtJcr7ulwONaB2YONMx2a2sv4XoPF5IbM+PZ066 LjjPpwoLaY0UZoempuw46jwd+/yIIvj/0WvEwA1gxa86C6Qwripxw3ntUfrBf/SLbkSj iTtA== ARC-Authentication-Results: i=1; mx.google.com; spf=pass (google.com: domain of linux-kernel-owner@vger.kernel.org designates 23.128.96.18 as permitted sender) smtp.mailfrom=linux-kernel-owner@vger.kernel.org Return-Path: Received: from vger.kernel.org (vger.kernel.org. [23.128.96.18]) by mx.google.com with ESMTP id yd22si8035391ejb.546.2020.09.27.19.06.00; Sun, 27 Sep 2020 19:06:34 -0700 (PDT) Received-SPF: pass (google.com: domain of linux-kernel-owner@vger.kernel.org designates 23.128.96.18 as permitted sender) client-ip=23.128.96.18; Authentication-Results: mx.google.com; spf=pass (google.com: domain of linux-kernel-owner@vger.kernel.org designates 23.128.96.18 as permitted sender) smtp.mailfrom=linux-kernel-owner@vger.kernel.org Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726476AbgI1CEN (ORCPT + 99 others); Sun, 27 Sep 2020 22:04:13 -0400 Received: from szxga06-in.huawei.com ([45.249.212.32]:37056 "EHLO huawei.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726409AbgI1CEN (ORCPT ); Sun, 27 Sep 2020 22:04:13 -0400 Received: from DGGEMS408-HUB.china.huawei.com (unknown [172.30.72.58]) by Forcepoint Email with ESMTP id 389ABC2CD5F936A6489C; Mon, 28 Sep 2020 10:04:09 +0800 (CST) Received: from localhost.localdomain (10.67.165.24) by DGGEMS408-HUB.china.huawei.com (10.3.19.208) with Microsoft SMTP Server id 14.3.487.0; Mon, 28 Sep 2020 10:04:02 +0800 From: Xiaofei Tan To: , , , , , , , , CC: , , , Xiaofei Tan Subject: [PATCH v3] ACPI / APEI: do memory failure on the physical address reported by ARM processor error section Date: Mon, 28 Sep 2020 10:02:40 +0800 Message-ID: <1601258560-6658-1-git-send-email-tanxiaofei@huawei.com> X-Mailer: git-send-email 2.8.1 MIME-Version: 1.0 Content-Type: text/plain X-Originating-IP: [10.67.165.24] X-CFilter-Loop: Reflected Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org After the commit 8fcc4ae6faf8 ("arm64: acpi: Make apei_claim_sea() synchronise with APEI's irq work") applied, do_sea() return directly for user-mode if apei_claim_sea() handled any error record. Therefore, each error record reported by the user-mode SEA must be effectively processed in APEI GHES driver. Currently, GHES driver only processes Memory Error Section.(Ignore PCIe Error Section, as it has nothing to do with SEA). It is not enough. Because ARM Processor Error could also be used for SEA in some hardware platforms, such as Kunpeng9xx series. We can't ask them to switch to use Memory Error Section for two reasons: 1)The server was delivered to customers, and it will introduce compatibility issue. 2)It make sense to use ARM Processor Error Section. Because either cache or memory errors could generate SEA when consumed by a processor. Do memory failure handling for ARM Processor Error Section just like for Memory Error Section. Signed-off-by: Xiaofei Tan --- Changes since v2: - Updated commit log --- drivers/acpi/apei/ghes.c | 70 ++++++++++++++++++++++++++++++++++++------------ 1 file changed, 53 insertions(+), 17 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 99df00f..ca0aa97 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -441,28 +441,35 @@ static void ghes_kick_task_work(struct callback_head *head) gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node, node_len); } -static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, - int sev) +static bool ghes_do_memory_failure(u64 physical_addr, int flags) { unsigned long pfn; - int flags = -1; - int sec_sev = ghes_severity(gdata->error_severity); - struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata); if (!IS_ENABLED(CONFIG_ACPI_APEI_MEMORY_FAILURE)) return false; - if (!(mem_err->validation_bits & CPER_MEM_VALID_PA)) - return false; - - pfn = mem_err->physical_addr >> PAGE_SHIFT; + pfn = PHYS_PFN(physical_addr); if (!pfn_valid(pfn)) { pr_warn_ratelimited(FW_WARN GHES_PFX "Invalid address in generic error data: %#llx\n", - mem_err->physical_addr); + physical_addr); return false; } + memory_failure_queue(pfn, flags); + return true; +} + +static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, + int sev) +{ + int flags = -1; + int sec_sev = ghes_severity(gdata->error_severity); + struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata); + + if (!(mem_err->validation_bits & CPER_MEM_VALID_PA)) + return false; + /* iff following two events can be handled properly by now */ if (sec_sev == GHES_SEV_CORRECTED && (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED)) @@ -470,14 +477,45 @@ static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE) flags = 0; - if (flags != -1) { - memory_failure_queue(pfn, flags); - return true; - } + if (flags != -1) + return ghes_do_memory_failure(mem_err->physical_addr, flags); return false; } +static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata, int sev) +{ + struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata); + struct cper_arm_err_info *err_info; + bool queued = false; + int sec_sev, i; + + log_arm_hw_error(err); + + sec_sev = ghes_severity(gdata->error_severity); + if (sev != GHES_SEV_RECOVERABLE || sec_sev != GHES_SEV_RECOVERABLE) + return false; + + err_info = (struct cper_arm_err_info *) (err + 1); + for (i = 0; i < err->err_info_num; i++, err_info++) { + if (!(err_info->validation_bits & CPER_ARM_INFO_VALID_PHYSICAL_ADDR)) + continue; + + if (err_info->type != CPER_ARM_CACHE_ERROR) { + pr_warn_ratelimited(FW_WARN GHES_PFX + "Physical address should be invalid for %s\n", + err_info->type < ARRAY_SIZE(cper_proc_error_type_strs) ? + cper_proc_error_type_strs[err_info->type] : "unknown error type"); + continue; + } + + if (ghes_do_memory_failure(err_info->physical_fault_addr, 0)) + queued = true; + } + + return queued; +} + /* * PCIe AER errors need to be sent to the AER driver for reporting and * recovery. The GHES severities map to the following AER severities and @@ -605,9 +643,7 @@ static bool ghes_do_proc(struct ghes *ghes, ghes_handle_aer(gdata); } else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) { - struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata); - - log_arm_hw_error(err); + queued = ghes_handle_arm_hw_error(gdata, sev); } else { void *err = acpi_hest_get_payload(gdata); -- 2.8.1