Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S933960AbcJLSRd (ORCPT ); Wed, 12 Oct 2016 14:17:33 -0400 Received: from fw-tnat.cambridge.arm.com ([217.140.96.140]:55797 "EHLO cam-smtp0.cambridge.arm.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S933373AbcJLSRS (ORCPT ); Wed, 12 Oct 2016 14:17:18 -0400 From: Punit Agrawal To: Tyler Baicar Cc: christoffer.dall@linaro.org, marc.zyngier@arm.com, pbonzini@redhat.com, rkrcmar@redhat.com, linux@armlinux.org.uk, catalin.marinas@arm.com, will.deacon@arm.com, rjw@rjwysocki.net, lenb@kernel.org, matt@codeblueprint.co.uk, robert.moore@intel.com, lv.zheng@intel.com, mark.rutland@arm.com, james.morse@arm.com, akpm@linux-foundation.org, sandeepa.s.prabhu@gmail.com, shijie.huang@arm.com, paul.gortmaker@windriver.com, tomasz.nowicki@linaro.org, fu.wei@linaro.org, rostedt@goodmis.org, bristot@redhat.com, linux-arm-kernel@lists.infradead.org, kvmarm@lists.cs.columbia.edu, Dkvm@vger.kernel.org, linux-kernel@vger.kernel.org, linux-acpi@vger.kernel.org, linux-efi@vger.kernel.org, devel@acpica.org, "Jonathan \(Zhixiong\) Zhang" , Richard Ruigrok , Naveen Kaje Subject: Re: [PATCH V3 01/10] acpi: apei: read ack upon ghes record consumption References: <1475875882-2604-1-git-send-email-tbaicar@codeaurora.org> <1475875882-2604-2-git-send-email-tbaicar@codeaurora.org> Date: Wed, 12 Oct 2016 16:39:25 +0100 In-Reply-To: <1475875882-2604-2-git-send-email-tbaicar@codeaurora.org> (Tyler Baicar's message of "Fri, 7 Oct 2016 15:31:13 -0600") Message-ID: <8760oxtw42.fsf@e105922-lin.cambridge.arm.com> User-Agent: Gnus/5.13 (Gnus v5.13) Emacs/24.5 (gnu/linux) MIME-Version: 1.0 Content-Type: text/plain Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 5942 Lines: 181 Hi Tyler, A few comments below. Tyler Baicar writes: > A RAS (Reliability, Availability, Serviceability) controller > may be a separate processor running in parallel with OS > execution, and may generate error records for consumption by > the OS. If the RAS controller produces multiple error records, > then they may be overwritten before the OS has consumed them. > > The Generic Hardware Error Source (GHES) v2 structure > introduces the capability for the OS to acknowledge the > consumption of the error record generated by the RAS > controller. A RAS controller supporting GHESv2 shall wait for > the acknowledgment before writing a new error record, thus > eliminating the race condition. > > Signed-off-by: Jonathan (Zhixiong) Zhang > Signed-off-by: Richard Ruigrok > Signed-off-by: Tyler Baicar > Signed-off-by: Naveen Kaje > --- > drivers/acpi/apei/ghes.c | 41 +++++++++++++++++++++++++++++++++++++++++ > drivers/acpi/apei/hest.c | 7 +++++-- > include/acpi/ghes.h | 1 + > 3 files changed, 47 insertions(+), 2 deletions(-) > > diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c > index 60746ef..3021f0e 100644 > --- a/drivers/acpi/apei/ghes.c > +++ b/drivers/acpi/apei/ghes.c > @@ -45,6 +45,7 @@ > #include > #include > > +#include > #include > #include > #include > @@ -244,10 +245,22 @@ static struct ghes *ghes_new(struct acpi_hest_generic *generic) > struct ghes *ghes; > unsigned int error_block_length; > int rc; > + struct acpi_hest_header *hest_hdr; > > ghes = kzalloc(sizeof(*ghes), GFP_KERNEL); > if (!ghes) > return ERR_PTR(-ENOMEM); > + > + hest_hdr = (struct acpi_hest_header *)generic; > + if (hest_hdr->type == ACPI_HEST_TYPE_GENERIC_ERROR_V2) { > + ghes->generic_v2 = (struct acpi_hest_generic_v2 *)generic; > + rc = apei_map_generic_address( > + &ghes->generic_v2->read_ack_register); > + if (rc) > + goto err_unmap; > + } else > + ghes->generic_v2 = NULL; Since you kzalloc ghes, shouldn't ghes->generic_v2 be NULL already? > + > ghes->generic = generic; > rc = apei_map_generic_address(&generic->error_status_address); > if (rc) > @@ -270,6 +283,9 @@ static struct ghes *ghes_new(struct acpi_hest_generic *generic) > > err_unmap: > apei_unmap_generic_address(&generic->error_status_address); > + if (ghes->generic_v2) > + apei_unmap_generic_address( > + &ghes->generic_v2->read_ack_register); > err_free: > kfree(ghes); > return ERR_PTR(rc); > @@ -279,6 +295,9 @@ static void ghes_fini(struct ghes *ghes) > { > kfree(ghes->estatus); > apei_unmap_generic_address(&ghes->generic->error_status_address); > + if (ghes->generic_v2) > + apei_unmap_generic_address( > + &ghes->generic_v2->read_ack_register); > } > > static inline int ghes_severity(int severity) > @@ -648,6 +667,22 @@ static void ghes_estatus_cache_add( > rcu_read_unlock(); > } > > +static int ghes_do_read_ack(struct acpi_hest_generic_v2 *generic_v2) > +{ > + int rc; > + u64 val = 0; > + > + rc = apei_read(&val, &generic_v2->read_ack_register); > + if (rc) > + return rc; > + val &= generic_v2->read_ack_preserve << > + generic_v2->read_ack_register.bit_offset; > + val |= generic_v2->read_ack_write; Reading the spec, it is not clear whether you need the left shift above. Having said that, if you do it for read_ack_preserve, do you also need to left shift read_ack_write by read_ack_register.bit_offset? > + rc = apei_write(val, &generic_v2->read_ack_register); > + > + return rc; > +} > + > static int ghes_proc(struct ghes *ghes) > { > int rc; > @@ -660,6 +695,12 @@ static int ghes_proc(struct ghes *ghes) > ghes_estatus_cache_add(ghes->generic, ghes->estatus); > } > ghes_do_proc(ghes, ghes->estatus); > + > + if (ghes->generic_v2) { > + rc = ghes_do_read_ack(ghes->generic_v2); > + if (rc) > + return rc; > + } > out: > ghes_clear_estatus(ghes); > return 0; > diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c > index 792a0d9..ef725a9 100644 > --- a/drivers/acpi/apei/hest.c > +++ b/drivers/acpi/apei/hest.c > @@ -52,6 +52,7 @@ static const int hest_esrc_len_tab[ACPI_HEST_TYPE_RESERVED] = { > [ACPI_HEST_TYPE_AER_ENDPOINT] = sizeof(struct acpi_hest_aer), > [ACPI_HEST_TYPE_AER_BRIDGE] = sizeof(struct acpi_hest_aer_bridge), > [ACPI_HEST_TYPE_GENERIC_ERROR] = sizeof(struct acpi_hest_generic), > + [ACPI_HEST_TYPE_GENERIC_ERROR_V2] = sizeof(struct acpi_hest_generic_v2), > }; > > static int hest_esrc_len(struct acpi_hest_header *hest_hdr) > @@ -146,7 +147,8 @@ static int __init hest_parse_ghes_count(struct acpi_hest_header *hest_hdr, void > { > int *count = data; > > - if (hest_hdr->type == ACPI_HEST_TYPE_GENERIC_ERROR) > + if (hest_hdr->type == ACPI_HEST_TYPE_GENERIC_ERROR || > + hest_hdr->type == ACPI_HEST_TYPE_GENERIC_ERROR_V2) > (*count)++; > return 0; > } > @@ -157,7 +159,8 @@ static int __init hest_parse_ghes(struct acpi_hest_header *hest_hdr, void *data) > struct ghes_arr *ghes_arr = data; > int rc, i; > > - if (hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR) > + if (hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR && > + hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR_V2) > return 0; > > if (!((struct acpi_hest_generic *)hest_hdr)->enabled) > diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h > index 720446c..d0108b6 100644 > --- a/include/acpi/ghes.h > +++ b/include/acpi/ghes.h > @@ -14,6 +14,7 @@ > > struct ghes { > struct acpi_hest_generic *generic; > + struct acpi_hest_generic_v2 *generic_v2; You either have a GHES or a GHESv2 structure. Instead of duplication, could this be represented as a union? Thanks, Punit > struct acpi_hest_generic_status *estatus; > u64 buffer_paddr; > unsigned long flags;