Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S966306AbcJZM3K (ORCPT ); Wed, 26 Oct 2016 08:29:10 -0400 Received: from mail.linuxfoundation.org ([140.211.169.12]:55430 "EHLO mail.linuxfoundation.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S941731AbcJZM3B (ORCPT ); Wed, 26 Oct 2016 08:29:01 -0400 From: Greg Kroah-Hartman To: linux-kernel@vger.kernel.org Cc: Greg Kroah-Hartman , stable@vger.kernel.org, Frederic Barrat , Andrew Donnellan , Vaibhav Jain , Michael Ellerman Subject: [PATCH 4.8 113/140] cxl: Prevent adapter reset if an active context exists Date: Wed, 26 Oct 2016 14:22:53 +0200 Message-Id: <20161026122225.216433334@linuxfoundation.org> X-Mailer: git-send-email 2.10.1 In-Reply-To: <20161026122220.384323763@linuxfoundation.org> References: <20161026122220.384323763@linuxfoundation.org> User-Agent: quilt/0.64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 9281 Lines: 284 4.8-stable review patch. If anyone has any objections, please let me know. ------------------ From: Vaibhav Jain commit 70b565bbdb911023373e035225ab10077e4ab937 upstream. This patch prevents resetting the cxl adapter via sysfs in presence of one or more active cxl_context on it. This protects against an unrecoverable error caused by PSL owning a dirty cache line even after reset and host tries to touch the same cache line. In case a force reset of the card is required irrespective of any active contexts, the int value -1 can be stored in the 'reset' sysfs attribute of the card. The patch introduces a new atomic_t member named contexts_num inside struct cxl that holds the number of active context attached to the card , which is checked against '0' before proceeding with the reset. To prevent against a race condition where a context is activated just after reset check is performed, the contexts_num is atomically set to '-1' after reset-check to indicate that no more contexts can be activated on the card anymore. Before activating a context we atomically test if contexts_num is non-negative and if so, increment its value by one. In case the value of contexts_num is negative then it indicates that the card is about to be reset and context activation is error-ed out at that point. Fixes: 62fa19d4b4fd ("cxl: Add ability to reset the card") Acked-by: Frederic Barrat Reviewed-by: Andrew Donnellan Signed-off-by: Vaibhav Jain Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/testing/sysfs-class-cxl | 7 +++-- drivers/misc/cxl/api.c | 9 ++++++ drivers/misc/cxl/context.c | 3 ++ drivers/misc/cxl/cxl.h | 24 +++++++++++++++++ drivers/misc/cxl/file.c | 11 +++++++ drivers/misc/cxl/guest.c | 3 ++ drivers/misc/cxl/main.c | 42 +++++++++++++++++++++++++++++- drivers/misc/cxl/pci.c | 2 + drivers/misc/cxl/sysfs.c | 27 ++++++++++++++++--- 9 files changed, 121 insertions(+), 7 deletions(-) --- a/Documentation/ABI/testing/sysfs-class-cxl +++ b/Documentation/ABI/testing/sysfs-class-cxl @@ -220,8 +220,11 @@ What: /sys/class/cxl//re Date: October 2014 Contact: linuxppc-dev@lists.ozlabs.org Description: write only - Writing 1 will issue a PERST to card which may cause the card - to reload the FPGA depending on load_image_on_perst. + Writing 1 will issue a PERST to card provided there are no + contexts active on any one of the card AFUs. This may cause + the card to reload the FPGA depending on load_image_on_perst. + Writing -1 will do a force PERST irrespective of any active + contexts on the card AFUs. Users: https://github.com/ibm-capi/libcxl What: /sys/class/cxl//perst_reloads_same_image (not in a guest) --- a/drivers/misc/cxl/api.c +++ b/drivers/misc/cxl/api.c @@ -229,6 +229,14 @@ int cxl_start_context(struct cxl_context if (ctx->status == STARTED) goto out; /* already started */ + /* + * Increment the mapped context count for adapter. This also checks + * if adapter_context_lock is taken. + */ + rc = cxl_adapter_context_get(ctx->afu->adapter); + if (rc) + goto out; + if (task) { ctx->pid = get_task_pid(task, PIDTYPE_PID); ctx->glpid = get_task_pid(task->group_leader, PIDTYPE_PID); @@ -240,6 +248,7 @@ int cxl_start_context(struct cxl_context if ((rc = cxl_ops->attach_process(ctx, kernel, wed, 0))) { put_pid(ctx->pid); + cxl_adapter_context_put(ctx->afu->adapter); cxl_ctx_put(); goto out; } --- a/drivers/misc/cxl/context.c +++ b/drivers/misc/cxl/context.c @@ -238,6 +238,9 @@ int __detach_context(struct cxl_context put_pid(ctx->glpid); cxl_ctx_put(); + + /* Decrease the attached context count on the adapter */ + cxl_adapter_context_put(ctx->afu->adapter); return 0; } --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -615,6 +615,14 @@ struct cxl { bool perst_select_user; bool perst_same_image; bool psl_timebase_synced; + + /* + * number of contexts mapped on to this card. Possible values are: + * >0: Number of contexts mapped and new one can be mapped. + * 0: No active contexts and new ones can be mapped. + * -1: No contexts mapped and new ones cannot be mapped. + */ + atomic_t contexts_num; }; int cxl_pci_alloc_one_irq(struct cxl *adapter); @@ -940,4 +948,20 @@ bool cxl_pci_is_vphb_device(struct pci_d /* decode AFU error bits in the PSL register PSL_SERR_An */ void cxl_afu_decode_psl_serr(struct cxl_afu *afu, u64 serr); + +/* + * Increments the number of attached contexts on an adapter. + * In case an adapter_context_lock is taken the return -EBUSY. + */ +int cxl_adapter_context_get(struct cxl *adapter); + +/* Decrements the number of attached contexts on an adapter */ +void cxl_adapter_context_put(struct cxl *adapter); + +/* If no active contexts then prevents contexts from being attached */ +int cxl_adapter_context_lock(struct cxl *adapter); + +/* Unlock the contexts-lock if taken. Warn and force unlock otherwise */ +void cxl_adapter_context_unlock(struct cxl *adapter); + #endif --- a/drivers/misc/cxl/file.c +++ b/drivers/misc/cxl/file.c @@ -205,11 +205,22 @@ static long afu_ioctl_start_work(struct ctx->pid = get_task_pid(current, PIDTYPE_PID); ctx->glpid = get_task_pid(current->group_leader, PIDTYPE_PID); + /* + * Increment the mapped context count for adapter. This also checks + * if adapter_context_lock is taken. + */ + rc = cxl_adapter_context_get(ctx->afu->adapter); + if (rc) { + afu_release_irqs(ctx, ctx); + goto out; + } + trace_cxl_attach(ctx, work.work_element_descriptor, work.num_interrupts, amr); if ((rc = cxl_ops->attach_process(ctx, false, work.work_element_descriptor, amr))) { afu_release_irqs(ctx, ctx); + cxl_adapter_context_put(ctx->afu->adapter); goto out; } --- a/drivers/misc/cxl/guest.c +++ b/drivers/misc/cxl/guest.c @@ -1152,6 +1152,9 @@ struct cxl *cxl_guest_init_adapter(struc if ((rc = cxl_sysfs_adapter_add(adapter))) goto err_put1; + /* release the context lock as the adapter is configured */ + cxl_adapter_context_unlock(adapter); + return adapter; err_put1: --- a/drivers/misc/cxl/main.c +++ b/drivers/misc/cxl/main.c @@ -243,8 +243,10 @@ struct cxl *cxl_alloc_adapter(void) if (dev_set_name(&adapter->dev, "card%i", adapter->adapter_num)) goto err2; - return adapter; + /* start with context lock taken */ + atomic_set(&adapter->contexts_num, -1); + return adapter; err2: cxl_remove_adapter_nr(adapter); err1: @@ -286,6 +288,44 @@ int cxl_afu_select_best_mode(struct cxl_ return 0; } +int cxl_adapter_context_get(struct cxl *adapter) +{ + int rc; + + rc = atomic_inc_unless_negative(&adapter->contexts_num); + return rc >= 0 ? 0 : -EBUSY; +} + +void cxl_adapter_context_put(struct cxl *adapter) +{ + atomic_dec_if_positive(&adapter->contexts_num); +} + +int cxl_adapter_context_lock(struct cxl *adapter) +{ + int rc; + /* no active contexts -> contexts_num == 0 */ + rc = atomic_cmpxchg(&adapter->contexts_num, 0, -1); + return rc ? -EBUSY : 0; +} + +void cxl_adapter_context_unlock(struct cxl *adapter) +{ + int val = atomic_cmpxchg(&adapter->contexts_num, -1, 0); + + /* + * contexts lock taken -> contexts_num == -1 + * If not true then show a warning and force reset the lock. + * This will happen when context_unlock was requested without + * doing a context_lock. + */ + if (val != -1) { + atomic_set(&adapter->contexts_num, 0); + WARN(1, "Adapter context unlocked with %d active contexts", + val); + } +} + static int __init init_cxl(void) { int rc = 0; --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -1484,6 +1484,8 @@ static int cxl_configure_adapter(struct if ((rc = cxl_native_register_psl_err_irq(adapter))) goto err; + /* Release the context lock as adapter is configured */ + cxl_adapter_context_unlock(adapter); return 0; err: --- a/drivers/misc/cxl/sysfs.c +++ b/drivers/misc/cxl/sysfs.c @@ -75,12 +75,31 @@ static ssize_t reset_adapter_store(struc int val; rc = sscanf(buf, "%i", &val); - if ((rc != 1) || (val != 1)) + if ((rc != 1) || (val != 1 && val != -1)) return -EINVAL; - if ((rc = cxl_ops->adapter_reset(adapter))) - return rc; - return count; + /* + * See if we can lock the context mapping that's only allowed + * when there are no contexts attached to the adapter. Once + * taken this will also prevent any context from getting activated. + */ + if (val == 1) { + rc = cxl_adapter_context_lock(adapter); + if (rc) + goto out; + + rc = cxl_ops->adapter_reset(adapter); + /* In case reset failed release context lock */ + if (rc) + cxl_adapter_context_unlock(adapter); + + } else if (val == -1) { + /* Perform a forced adapter reset */ + rc = cxl_ops->adapter_reset(adapter); + } + +out: + return rc ? rc : count; } static ssize_t load_image_on_perst_show(struct device *device,