page_lock_anon_vma() uses spin_lock() to block RCU. This doesn't work with
PREEMPT_RCU, we have to do rcu_read_lock() explicitely. Otherwise, it is
theoretically possible that slab returns anon_vma's memory to the system
before we do spin_unlock(&anon_vma->lock).
Signed-off-by: Oleg Nesterov <[email protected]>
--- WQ/mm/rmap.c~ 2007-02-18 22:56:49.000000000 +0300
+++ WQ/mm/rmap.c 2007-02-25 22:43:00.000000000 +0300
@@ -183,7 +183,7 @@ void __init anon_vma_init(void)
*/
static struct anon_vma *page_lock_anon_vma(struct page *page)
{
- struct anon_vma *anon_vma = NULL;
+ struct anon_vma *anon_vma;
unsigned long anon_mapping;
rcu_read_lock();
@@ -195,9 +195,16 @@ static struct anon_vma *page_lock_anon_v
anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
spin_lock(&anon_vma->lock);
+ return anon_vma;
out:
rcu_read_unlock();
- return anon_vma;
+ return NULL;
+}
+
+static void page_unlock_anon_vma(struct anon_vma *anon_vma)
+{
+ spin_unlock(&anon_vma->lock);
+ rcu_read_unlock();
}
/*
@@ -333,7 +340,8 @@ static int page_referenced_anon(struct p
if (!mapcount)
break;
}
- spin_unlock(&anon_vma->lock);
+
+ page_unlock_anon_vma(anon_vma);
return referenced;
}
@@ -809,7 +817,8 @@ static int try_to_unmap_anon(struct page
!page_mapped(page))
break;
}
- spin_unlock(&anon_vma->lock);
+
+ page_unlock_anon_vma(anon_vma);
return ret;
}
> On Sun, 25 Feb 2007 23:06:21 +0300 Oleg Nesterov <[email protected]> wrote:
> page_lock_anon_vma() uses spin_lock() to block RCU. This doesn't work with
> PREEMPT_RCU, we have to do rcu_read_lock() explicitely. Otherwise, it is
> theoretically possible that slab returns anon_vma's memory to the system
> before we do spin_unlock(&anon_vma->lock).
>
> ...
>
> +static void page_unlock_anon_vma(struct anon_vma *anon_vma)
> +{
> + spin_unlock(&anon_vma->lock);
> + rcu_read_unlock();
> }
It's a bit sad doing a double preempt_disable() for non-PREEMPT_RCU builds.
Perhaps we would benefit from a new rcu_read_lock_preempt_rcu() which is a
no-op if !PREEMPT_RCU.
On 02/27, Andrew Morton wrote:
>
> > On Sun, 25 Feb 2007 23:06:21 +0300 Oleg Nesterov <[email protected]> wrote:
> > page_lock_anon_vma() uses spin_lock() to block RCU. This doesn't work with
> > PREEMPT_RCU, we have to do rcu_read_lock() explicitely. Otherwise, it is
> > theoretically possible that slab returns anon_vma's memory to the system
> > before we do spin_unlock(&anon_vma->lock).
> >
> > ...
> >
> > +static void page_unlock_anon_vma(struct anon_vma *anon_vma)
> > +{
> > + spin_unlock(&anon_vma->lock);
> > + rcu_read_unlock();
> > }
>
> It's a bit sad doing a double preempt_disable() for non-PREEMPT_RCU builds.
Actually, we don't in this case. This patch in essence moves "preempt_enable"
from "lock" to "unlock" side. Zero impact for non-PREEMPT_RCU builds, except
.text grows a bit.
Before this patch, page_lock_anon_vma() does preempt_enable() before return,
but this can't help because ->preempt_count was incremented by spin_lock().
> Perhaps we would benefit from a new rcu_read_lock_preempt_rcu() which is a
> no-op if !PREEMPT_RCU.
I also thought about things like
rcu_read_lock_when_we_know_that_preemption_disabled()
rcu_read_lock_when_we_know_that_irqs_disabled()
which are noops when !PREEMPT_RCU.
Oleg.
On Tue, Feb 27, 2007 at 12:25:17PM -0800, Andrew Morton wrote:
> > On Sun, 25 Feb 2007 23:06:21 +0300 Oleg Nesterov <[email protected]> wrote:
> > page_lock_anon_vma() uses spin_lock() to block RCU. This doesn't work with
> > PREEMPT_RCU, we have to do rcu_read_lock() explicitely. Otherwise, it is
> > theoretically possible that slab returns anon_vma's memory to the system
> > before we do spin_unlock(&anon_vma->lock).
> >
> > ...
> >
> > +static void page_unlock_anon_vma(struct anon_vma *anon_vma)
> > +{
> > + spin_unlock(&anon_vma->lock);
> > + rcu_read_unlock();
> > }
>
> It's a bit sad doing a double preempt_disable() for non-PREEMPT_RCU builds.
>
> Perhaps we would benefit from a new rcu_read_lock_preempt_rcu() which is a
> no-op if !PREEMPT_RCU.
We were doing double preempt_disable() before as well. The only
difference is that we moved RCU preempt_enable() (it used to be inside the
critical section, and now it is after the corresponding spin_unlock()).
I hope to keep RCU API proliferation down to a dull roar, but if we
need more APIs, we need more APIs. This example does not demonstrate
that need to me, however.
Thanx, Paul
On Sun, 25 Feb 2007, Oleg Nesterov wrote:
> page_lock_anon_vma() uses spin_lock() to block RCU. This doesn't work with
> PREEMPT_RCU, we have to do rcu_read_lock() explicitely. Otherwise, it is
> theoretically possible that slab returns anon_vma's memory to the system
> before we do spin_unlock(&anon_vma->lock).
>
> Signed-off-by: Oleg Nesterov <[email protected]>
Acked-by: Hugh Dickins <[email protected]>
Thanks for doing this, and sorry for my delay.
Hugh
>
> --- WQ/mm/rmap.c~ 2007-02-18 22:56:49.000000000 +0300
> +++ WQ/mm/rmap.c 2007-02-25 22:43:00.000000000 +0300
> @@ -183,7 +183,7 @@ void __init anon_vma_init(void)
> */
> static struct anon_vma *page_lock_anon_vma(struct page *page)
> {
> - struct anon_vma *anon_vma = NULL;
> + struct anon_vma *anon_vma;
> unsigned long anon_mapping;
>
> rcu_read_lock();
> @@ -195,9 +195,16 @@ static struct anon_vma *page_lock_anon_v
>
> anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
> spin_lock(&anon_vma->lock);
> + return anon_vma;
> out:
> rcu_read_unlock();
> - return anon_vma;
> + return NULL;
> +}
> +
> +static void page_unlock_anon_vma(struct anon_vma *anon_vma)
> +{
> + spin_unlock(&anon_vma->lock);
> + rcu_read_unlock();
> }
>
> /*
> @@ -333,7 +340,8 @@ static int page_referenced_anon(struct p
> if (!mapcount)
> break;
> }
> - spin_unlock(&anon_vma->lock);
> +
> + page_unlock_anon_vma(anon_vma);
> return referenced;
> }
>
> @@ -809,7 +817,8 @@ static int try_to_unmap_anon(struct page
> !page_mapped(page))
> break;
> }
> - spin_unlock(&anon_vma->lock);
> +
> + page_unlock_anon_vma(anon_vma);
> return ret;
> }
>
>