Return-Path: Received: from daytona.panasas.com ([67.152.220.89]:8055 "EHLO daytona.panasas.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754767Ab1BCI7B (ORCPT ); Thu, 3 Feb 2011 03:59:01 -0500 Message-ID: <4D4A6E52.1020108@panasas.com> Date: Thu, 03 Feb 2011 10:58:58 +0200 From: Benny Halevy To: Fred Isaman CC: linux-nfs@vger.kernel.org, Trond Myklebust Subject: Re: [PATCH 3/3] pnfs: fix pnfs lock inversion of i_lock and cl_lock References: <1296487633-21057-1-git-send-email-iisaman@netapp.com> <1296487633-21057-4-git-send-email-iisaman@netapp.com> In-Reply-To: <1296487633-21057-4-git-send-email-iisaman@netapp.com> Content-Type: text/plain; charset=ISO-8859-1 Sender: linux-nfs-owner@vger.kernel.org List-ID: MIME-Version: 1.0 On 2011-01-31 17:27, Fred Isaman wrote: > The pnfs code was using throughout the lock order i_lock, cl_lock. > This conflicts with the nfs delegation code. Rework the pnfs code > to avoid taking both locks simultaneously. > > Currently the code takes the double lock to add/remove the layout to a > nfs_client list, while atomically checking that the list of lsegs is > empty. To avoid this, we rely on existing serializations. When a > layout is initialized with lseg count equal zero, LAYOUTGET's > openstateid serialization is in effect, making it safe to assume it > stays zero unless we change it. And once a layout's lseg count drops > to zero, it is set as DESTROYED and so will stay at zero. > > Signed-off-by: Fred Isaman > --- > fs/nfs/callback_proc.c | 2 +- > fs/nfs/pnfs.c | 50 +++++++++++++++++++++++++++-------------------- > 2 files changed, 30 insertions(+), 22 deletions(-) > > diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c > index 8958757..2f41dcce 100644 > --- a/fs/nfs/callback_proc.c > +++ b/fs/nfs/callback_proc.c > @@ -188,10 +188,10 @@ static u32 initiate_bulk_draining(struct nfs_client *clp, > rv = NFS4ERR_DELAY; > list_del_init(&lo->plh_bulk_recall); > spin_unlock(&ino->i_lock); > + pnfs_free_lseg_list(&free_me_list); > put_layout_hdr(lo); > iput(ino); > } > - pnfs_free_lseg_list(&free_me_list); > return rv; > } > > diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c > index f89c3bb..ee6c69a 100644 > --- a/fs/nfs/pnfs.c > +++ b/fs/nfs/pnfs.c > @@ -247,13 +247,6 @@ put_lseg_locked(struct pnfs_layout_segment *lseg, > BUG_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); > list_del(&lseg->pls_list); > if (list_empty(&lseg->pls_layout->plh_segs)) { > - struct nfs_client *clp; > - > - clp = NFS_SERVER(ino)->nfs_client; > - spin_lock(&clp->cl_lock); > - /* List does not take a reference, so no need for put here */ > - list_del_init(&lseg->pls_layout->plh_layouts); > - spin_unlock(&clp->cl_lock); > set_bit(NFS_LAYOUT_DESTROYED, &lseg->pls_layout->plh_flags); > /* Matched by initial refcount set in alloc_init_layout_hdr */ > put_layout_hdr_locked(lseg->pls_layout); > @@ -319,14 +312,30 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, > return invalid - removed; > } > > +/* note free_me must contain lsegs from a single layout_hdr */ > void > pnfs_free_lseg_list(struct list_head *free_me) > { > - struct pnfs_layout_segment *lseg, *tmp; > + if (!list_empty(free_me)) { Since this puts everything in this function under the condition why not define an inline wrapper in pnfs.h that checks that and calls the real function only when the list's not empty or, if this is assumed to be a rare case, just begin the function with if (list_empty(free_me)) return; Benny > + struct pnfs_layout_segment *lseg, *tmp; > + struct pnfs_layout_hdr *lo; > > - list_for_each_entry_safe(lseg, tmp, free_me, pls_list) { > - list_del(&lseg->pls_list); > - free_lseg(lseg); > + lo = list_first_entry(free_me, > + struct pnfs_layout_segment, > + pls_list)->pls_layout; > + > + if (test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags)) { > + struct nfs_client *clp; > + > + clp = NFS_SERVER(lo->plh_inode)->nfs_client; > + spin_lock(&clp->cl_lock); > + list_del_init(&lo->plh_layouts); > + spin_unlock(&clp->cl_lock); > + } > + list_for_each_entry_safe(lseg, tmp, free_me, pls_list) { > + list_del(&lseg->pls_list); > + free_lseg(lseg); > + } > } > } > > @@ -704,6 +713,7 @@ pnfs_update_layout(struct inode *ino, > struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; > struct pnfs_layout_hdr *lo; > struct pnfs_layout_segment *lseg = NULL; > + bool first = false; > > if (!pnfs_enabled_sb(NFS_SERVER(ino))) > return NULL; > @@ -734,7 +744,10 @@ pnfs_update_layout(struct inode *ino, > atomic_inc(&lo->plh_outstanding); > > get_layout_hdr(lo); > - if (list_empty(&lo->plh_segs)) { > + if (list_empty(&lo->plh_segs)) > + first = true; > + spin_unlock(&ino->i_lock); > + if (first) { > /* The lo must be on the clp list if there is any > * chance of a CB_LAYOUTRECALL(FILE) coming in. > */ > @@ -743,17 +756,12 @@ pnfs_update_layout(struct inode *ino, > list_add_tail(&lo->plh_layouts, &clp->cl_layouts); > spin_unlock(&clp->cl_lock); > } > - spin_unlock(&ino->i_lock); > > lseg = send_layoutget(lo, ctx, iomode); > - if (!lseg) { > - spin_lock(&ino->i_lock); > - if (list_empty(&lo->plh_segs)) { > - spin_lock(&clp->cl_lock); > - list_del_init(&lo->plh_layouts); > - spin_unlock(&clp->cl_lock); > - } > - spin_unlock(&ino->i_lock); > + if (!lseg && first) { > + spin_lock(&clp->cl_lock); > + list_del_init(&lo->plh_layouts); > + spin_unlock(&clp->cl_lock); > } > atomic_dec(&lo->plh_outstanding); > put_layout_hdr(lo);