Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754235AbdHXVZF (ORCPT ); Thu, 24 Aug 2017 17:25:05 -0400 Received: from mail-wm0-f50.google.com ([74.125.82.50]:37411 "EHLO mail-wm0-f50.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753888AbdHXVXp (ORCPT ); Thu, 24 Aug 2017 17:23:45 -0400 From: Philipp Reisner To: Jens Axboe , linux-kernel@vger.kernel.org Cc: drbd-dev@lists.linbit.com Subject: [PATCH 12/17] drbd: fix potential deadlock when trying to detach during handshake Date: Thu, 24 Aug 2017 23:23:09 +0200 Message-Id: <1503609794-13233-13-git-send-email-philipp.reisner@linbit.com> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1503609794-13233-1-git-send-email-philipp.reisner@linbit.com> References: <1503609794-13233-1-git-send-email-philipp.reisner@linbit.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 6033 Lines: 168 From: Lars Ellenberg When requesting a detach, we first suspend IO, and also inhibit meta-data IO by means of drbd_md_get_buffer(), because we don't want to "fail" the disk while there is IO in-flight: the transition into D_FAILED for detach purposes may get misinterpreted as actual IO error in a confused endio function. We wrap it all into wait_event(), to retry in case the drbd_req_state() returns SS_IN_TRANSIENT_STATE, as it does for example during an ongoing connection handshake. In that example, the receiver thread may need to grab drbd_md_get_buffer() during the handshake to make progress. To avoid potential deadlock with detach, detach needs to grab and release the meta data buffer inside of that wait_event retry loop. To avoid lock inversion between mutex_lock(&device->state_mutex) and drbd_md_get_buffer(device), introduce a new enum chg_state_flag CS_INHIBIT_MD_IO, and move the call to drbd_md_get_buffer() inside the state_mutex grabbed in drbd_req_state(). Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index c383b6c..6bb58a6 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2149,34 +2149,13 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) static int adm_detach(struct drbd_device *device, int force) { - enum drbd_state_rv retcode; - void *buffer; - int ret; - if (force) { set_bit(FORCE_DETACH, &device->flags); drbd_force_state(device, NS(disk, D_FAILED)); - retcode = SS_SUCCESS; - goto out; + return SS_SUCCESS; } - drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */ - buffer = drbd_md_get_buffer(device, __func__); /* make sure there is no in-flight meta-data IO */ - if (buffer) { - retcode = drbd_request_state(device, NS(disk, D_FAILED)); - drbd_md_put_buffer(device); - } else /* already <= D_FAILED */ - retcode = SS_NOTHING_TO_DO; - /* D_FAILED will transition to DISKLESS. */ - drbd_resume_io(device); - ret = wait_event_interruptible(device->misc_wait, - device->state.disk != D_FAILED); - if ((int)retcode == (int)SS_IS_DISKLESS) - retcode = SS_NOTHING_TO_DO; - if (ret) - retcode = ERR_INTR; -out: - return retcode; + return drbd_request_detach_interruptible(device); } /* Detaching the disk is a process in multiple stages. First we need to lock diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 306f116..0813c65 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -579,11 +579,14 @@ drbd_req_state(struct drbd_device *device, union drbd_state mask, unsigned long flags; union drbd_state os, ns; enum drbd_state_rv rv; + void *buffer = NULL; init_completion(&done); if (f & CS_SERIALIZE) mutex_lock(device->state_mutex); + if (f & CS_INHIBIT_MD_IO) + buffer = drbd_md_get_buffer(device, __func__); spin_lock_irqsave(&device->resource->req_lock, flags); os = drbd_read_state(device); @@ -636,6 +639,8 @@ drbd_req_state(struct drbd_device *device, union drbd_state mask, } abort: + if (buffer) + drbd_md_put_buffer(device); if (f & CS_SERIALIZE) mutex_unlock(device->state_mutex); @@ -664,6 +669,47 @@ _drbd_request_state(struct drbd_device *device, union drbd_state mask, return rv; } +/* + * We grab drbd_md_get_buffer(), because we don't want to "fail" the disk while + * there is IO in-flight: the transition into D_FAILED for detach purposes + * may get misinterpreted as actual IO error in a confused endio function. + * + * We wrap it all into wait_event(), to retry in case the drbd_req_state() + * returns SS_IN_TRANSIENT_STATE. + * + * To avoid potential deadlock with e.g. the receiver thread trying to grab + * drbd_md_get_buffer() while trying to get out of the "transient state", we + * need to grab and release the meta data buffer inside of that wait_event loop. + */ +static enum drbd_state_rv +request_detach(struct drbd_device *device) +{ + return drbd_req_state(device, NS(disk, D_FAILED), + CS_VERBOSE | CS_ORDERED | CS_INHIBIT_MD_IO); +} + +enum drbd_state_rv +drbd_request_detach_interruptible(struct drbd_device *device) +{ + enum drbd_state_rv rv; + int ret; + + drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */ + wait_event_interruptible(device->state_wait, + (rv = request_detach(device)) != SS_IN_TRANSIENT_STATE); + drbd_resume_io(device); + + ret = wait_event_interruptible(device->misc_wait, + device->state.disk != D_FAILED); + + if (rv == SS_IS_DISKLESS) + rv = SS_NOTHING_TO_DO; + if (ret) + rv = ERR_INTR; + + return rv; +} + enum drbd_state_rv _drbd_request_state_holding_state_mutex(struct drbd_device *device, union drbd_state mask, union drbd_state val, enum chg_state_flags f) diff --git a/drivers/block/drbd/drbd_state.h b/drivers/block/drbd/drbd_state.h index 6c9d5d4..0276c98 100644 --- a/drivers/block/drbd/drbd_state.h +++ b/drivers/block/drbd/drbd_state.h @@ -71,6 +71,10 @@ enum chg_state_flags { CS_DC_SUSP = 1 << 10, CS_DC_MASK = CS_DC_ROLE + CS_DC_PEER + CS_DC_CONN + CS_DC_DISK + CS_DC_PDSK, CS_IGN_OUTD_FAIL = 1 << 11, + + /* Make sure no meta data IO is in flight, by calling + * drbd_md_get_buffer(). Used for graceful detach. */ + CS_INHIBIT_MD_IO = 1 << 12, }; /* drbd_dev_state and drbd_state are different types. This is to stress the @@ -156,6 +160,10 @@ static inline int drbd_request_state(struct drbd_device *device, return _drbd_request_state(device, mask, val, CS_VERBOSE + CS_ORDERED); } +/* for use in adm_detach() (drbd_adm_detach(), drbd_adm_down()) */ +enum drbd_state_rv +drbd_request_detach_interruptible(struct drbd_device *device); + enum drbd_role conn_highest_role(struct drbd_connection *connection); enum drbd_role conn_highest_peer(struct drbd_connection *connection); enum drbd_disk_state conn_highest_disk(struct drbd_connection *connection); -- 2.7.4