In preparation for making get/set mempolicy possible from outside the
context of the task being changed, we will need to take a reference
count on the task mempolicy in do_get_mempolicy.
do_get_mempolicy, operations on one of three policies
1) when MPOL_F_ADDR is set, it operates on a vma mempolicy
2) if the task does not have a mempolicy, default_policy is used
3) otherwise the task mempolicy is operated on
When the policy is from a vma, and that vma is a shared memory region,
the __get_vma_policy stack will take an additional reference
Change the behavior of do_get_mempolicy to unconditionally reference
whichever policy is operated on so that the cleanup logic can mpol_put
unconditionally, and mpol_cond_put is only called when a vma policy is
used.
Signed-off-by: Gregory Price <[email protected]>
---
mm/mempolicy.c | 35 ++++++++++++++++++++++-------------
1 file changed, 22 insertions(+), 13 deletions(-)
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 410754d56e46..37da712259d7 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -900,9 +900,9 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
unsigned long addr, unsigned long flags)
{
int err;
- struct mm_struct *mm = current->mm;
+ struct mm_struct *mm;
struct vm_area_struct *vma = NULL;
- struct mempolicy *pol = current->mempolicy, *pol_refcount = NULL;
+ struct mempolicy *pol = NULL, *pol_refcount = NULL;
if (flags &
~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR|MPOL_F_MEMS_ALLOWED))
@@ -925,29 +925,38 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
* vma/shared policy at addr is NULL. We
* want to return MPOL_DEFAULT in this case.
*/
+ mm = current->mm;
mmap_read_lock(mm);
vma = vma_lookup(mm, addr);
if (!vma) {
mmap_read_unlock(mm);
return -EFAULT;
}
- pol = __get_vma_policy(vma, addr, &ilx);
+ /*
+ * __get_vma_policy can refcount if a shared policy is
+ * referenced. We'll need to do a cond_put on the way
+ * out, but we need to reference this policy either way
+ * because we may drop the mmap read lock.
+ */
+ pol = pol_refcount = __get_vma_policy(vma, addr, &ilx);
+ mpol_get(pol);
} else if (addr)
return -EINVAL;
+ else {
+ /* take a reference of the task policy now */
+ pol = current->mempolicy;
+ mpol_get(pol);
+ }
- if (!pol)
+ if (!pol) {
pol = &default_policy; /* indicates default behavior */
+ mpol_get(pol);
+ }
+ /* we now have at least one reference on the policy */
if (flags & MPOL_F_NODE) {
if (flags & MPOL_F_ADDR) {
- /*
- * Take a refcount on the mpol, because we are about to
- * drop the mmap_lock, after which only "pol" remains
- * valid, "vma" is stale.
- */
- pol_refcount = pol;
vma = NULL;
- mpol_get(pol);
mmap_read_unlock(mm);
err = lookup_node(mm, addr);
if (err < 0)
@@ -982,11 +991,11 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
}
out:
- mpol_cond_put(pol);
+ mpol_put(pol);
if (vma)
mmap_read_unlock(mm);
if (pol_refcount)
- mpol_put(pol_refcount);
+ mpol_cond_put(pol_refcount);
return err;
}
--
2.39.1
[restoring the CC list, I supect you didn't want this to be a private
discussion]
On Tue 28-11-23 09:10:18, Gregory Price wrote:
> On Tue, Nov 28, 2023 at 03:07:10PM +0100, Michal Hocko wrote:
> > On Wed 22-11-23 16:11:51, Gregory Price wrote:
> > [...]
> > > @@ -982,11 +991,11 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
> > > }
> > >
> > > out:
> > > - mpol_cond_put(pol);
> > > + mpol_put(pol);
> > > if (vma)
> > > mmap_read_unlock(mm);
> > > if (pol_refcount)
> > > - mpol_put(pol_refcount);
> > > + mpol_cond_put(pol_refcount);
> >
> > Maybe I am just misreading the patch but pol_refcount should be always
> > NULL with this patch
> >
>
> earlier:
>
> + pol = pol_refcount = __get_vma_policy(vma, addr, &ilx);
>
> i can split this into two lines if preferred.
>
> If addr is not set, then yes pol_refcount is always null.
My bad, missed that. Making that two lines would be easier to read but
nothing I would insist on of course.
--
Michal Hocko
SUSE Labs