Linus asked for more strict maple tree lockdep checking [1] and for them
to resume the normal path through Andrews tree.
This series of patches adds checks to ensure the lock is held in write
mode during the write path of the maple tree instead of checking if it's
held at all.
It also reduces the validate_mm() calls by consolidating into commonly
used functions (patch 0001), and removes the necessity of holding the
lock on the detached tree during munmap() operations.
Changes since v1:
- Moved the relaxing of the lockdep check for on-stack trees to its own
patch.
- Moved the on-stack tree destruction to after the lock is dropped in
patch 0003.
[1] https://lore.kernel.org/linux-mm/CAHk-=wjUp5+tcsHG89ieuwa0wUtSWWBWRt8xOsoZ1nskZbbk-g@mail.gmail.com/
v1: https://lore.kernel.org/linux-mm/CAHk-=wjUp5+tcsHG89ieuwa0wUtSWWBWRt8xOsoZ1nskZbbk-g@mail.gmail.com/
v1 part 2: https://lore.kernel.org/linux-mm/20230705204629.clctvnx4qdqoexyp@revolver/
Liam R. Howlett (4):
mm/mmap: Clean up validate_mm() calls
maple_tree: Relax lockdep checks for on-stack trees
mm/mmap: Change detached vma locking scheme
maple_tree: Be more strict about locking
include/linux/maple_tree.h | 11 +++++++++--
lib/maple_tree.c | 10 ++++++++--
mm/mmap.c | 28 ++++++----------------------
3 files changed, 23 insertions(+), 26 deletions(-)
--
2.39.2
To support early release of the maple tree locks, do not lockdep check
the lock if it is set to NULL. This is intended for the special case
on-stack use of tracking entries and not for general use.
Cc: Linus Torvalds <[email protected]>
Signed-off-by: Liam R. Howlett <[email protected]>
---
include/linux/maple_tree.h | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
index 7769270b85e8..6618c1512886 100644
--- a/include/linux/maple_tree.h
+++ b/include/linux/maple_tree.h
@@ -182,7 +182,9 @@ enum maple_type {
#ifdef CONFIG_LOCKDEP
typedef struct lockdep_map *lockdep_map_p;
-#define mt_lock_is_held(mt) lock_is_held(mt->ma_external_lock)
+#define mt_lock_is_held(mt) \
+ (!(mt)->ma_external_lock || lock_is_held((mt)->ma_external_lock))
+
#define mt_set_external_lock(mt, lock) \
(mt)->ma_external_lock = &(lock)->dep_map
#else
--
2.39.2
validate_mm() calls are too spread out and duplicated in numerous
locations. Also, now that the stack write is done under the write lock,
it is not necessary to validate the mm prior to write operations.
Add a validate_mm() to the stack expansions, and to vma_complete() so
that numerous others may be dropped.
Note that vma_link() (and also insert_vm_struct() by call path) already
call validate_mm().
vma_merge() also had an unnecessary call to vma_iter_free() since the
logic change to abort earlier if no merging is necessary.
Drop extra validate_mm() calls at the start of functions and error paths
which won't write to the tree.
Relocate the validate_mm() call in the do_brk_flags() to avoid
re-running the same test when vma_complete() is used.
The call within the error path of mmap_region() is left intentionally
because of the complexity of the function and the potential of drivers
modifying the tree.
Cc: Linus Torvalds <[email protected]>
Cc: Oliver Sang <[email protected]>
Signed-off-by: Liam R. Howlett <[email protected]>
---
mm/mmap.c | 24 ++++--------------------
1 file changed, 4 insertions(+), 20 deletions(-)
diff --git a/mm/mmap.c b/mm/mmap.c
index ce31aec82e82..7b70379a8b3e 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -594,6 +594,7 @@ static inline void vma_complete(struct vma_prepare *vp,
}
if (vp->insert && vp->file)
uprobe_mmap(vp->insert);
+ validate_mm(mm);
}
/*
@@ -675,7 +676,6 @@ int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma,
vma_iter_store(vmi, vma);
vma_complete(&vp, vmi, vma->vm_mm);
- validate_mm(vma->vm_mm);
return 0;
nomem:
@@ -715,7 +715,6 @@ int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma,
vma->vm_end = end;
vma->vm_pgoff = pgoff;
vma_complete(&vp, vmi, vma->vm_mm);
- validate_mm(vma->vm_mm);
return 0;
}
@@ -888,7 +887,6 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
long adj_start = 0;
- validate_mm(mm);
/*
* We later require that vma->vm_flags == vm_flags,
* so this tests vma->vm_flags & VM_SPECIAL, too.
@@ -1015,10 +1013,7 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
}
vma_complete(&vp, vmi, mm);
- vma_iter_free(vmi);
- validate_mm(mm);
khugepaged_enter_vma(res, vm_flags);
-
return res;
}
@@ -1193,7 +1188,6 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
vm_flags_t vm_flags;
int pkey = 0;
- validate_mm(mm);
*populate = 0;
if (!len)
@@ -2022,6 +2016,7 @@ static int expand_upwards(struct vm_area_struct *vma, unsigned long address)
anon_vma_unlock_write(vma->anon_vma);
khugepaged_enter_vma(vma, vma->vm_flags);
mas_destroy(&mas);
+ validate_mm(mm);
return error;
}
#endif /* CONFIG_STACK_GROWSUP || CONFIG_IA64 */
@@ -2112,6 +2107,7 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address)
anon_vma_unlock_write(vma->anon_vma);
khugepaged_enter_vma(vma, vma->vm_flags);
mas_destroy(&mas);
+ validate_mm(mm);
return error;
}
@@ -2289,7 +2285,6 @@ static inline void remove_mt(struct mm_struct *mm, struct ma_state *mas)
remove_vma(vma, false);
}
vm_unacct_memory(nr_accounted);
- validate_mm(mm);
}
/*
@@ -2326,8 +2321,6 @@ int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
struct vm_area_struct *new;
int err;
- validate_mm(vma->vm_mm);
-
WARN_ON(vma->vm_start >= addr);
WARN_ON(vma->vm_end <= addr);
@@ -2384,7 +2377,6 @@ int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
/* Success. */
if (new_below)
vma_next(vmi);
- validate_mm(vma->vm_mm);
return 0;
out_free_mpol:
@@ -2393,7 +2385,6 @@ int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
vma_iter_free(vmi);
out_free_vma:
vm_area_free(new);
- validate_mm(vma->vm_mm);
return err;
}
@@ -3044,7 +3035,6 @@ static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
struct mm_struct *mm = current->mm;
struct vma_prepare vp;
- validate_mm(mm);
/*
* Check against address space limits by the changed size
* Note: This happens *after* clearing old mappings in some code paths.
@@ -3096,6 +3086,7 @@ static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
goto mas_store_fail;
mm->map_count++;
+ validate_mm(mm);
ksm_add_vma(vma);
out:
perf_event_mmap(vma);
@@ -3104,7 +3095,6 @@ static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
if (flags & VM_LOCKED)
mm->locked_vm += (len >> PAGE_SHIFT);
vm_flags_set(vma, VM_SOFTDIRTY);
- validate_mm(mm);
return 0;
mas_store_fail:
@@ -3285,7 +3275,6 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
bool faulted_in_anon_vma = true;
VMA_ITERATOR(vmi, mm, addr);
- validate_mm(mm);
/*
* If anonymous vma has not yet been faulted, update new pgoff
* to match new location, to increase its chance of merging.
@@ -3344,7 +3333,6 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
goto out_vma_link;
*need_rmap_locks = false;
}
- validate_mm(mm);
return new_vma;
out_vma_link:
@@ -3360,7 +3348,6 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
out_free_vma:
vm_area_free(new_vma);
out:
- validate_mm(mm);
return NULL;
}
@@ -3497,7 +3484,6 @@ static struct vm_area_struct *__install_special_mapping(
int ret;
struct vm_area_struct *vma;
- validate_mm(mm);
vma = vm_area_alloc(mm);
if (unlikely(vma == NULL))
return ERR_PTR(-ENOMEM);
@@ -3520,12 +3506,10 @@ static struct vm_area_struct *__install_special_mapping(
perf_event_mmap(vma);
- validate_mm(mm);
return vma;
out:
vm_area_free(vma);
- validate_mm(mm);
return ERR_PTR(ret);
}
--
2.39.2
On Fri, 14 Jul 2023 at 12:56, Liam R. Howlett <[email protected]> wrote:
>
> This series of patches adds checks to ensure the lock is held in write
> mode during the write path of the maple tree instead of checking if it's
> held at all.
Ack, LGTM. I assume that there were no lockdep errors found by all this..
Linus
Don't set the lock to the mm lock so that the detached VMA tree does not
complain about being unlocked when the mmap_lock is dropped prior to
freeing the tree.
Move the destroying of the detached tree outside the mmap lock all
together.
Cc: Linus Torvalds <[email protected]>
Signed-off-by: Liam R. Howlett <[email protected]>
---
mm/mmap.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/mm/mmap.c b/mm/mmap.c
index 7b70379a8b3e..ab6cb00d377a 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2427,7 +2427,7 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
unsigned long locked_vm = 0;
MA_STATE(mas_detach, &mt_detach, 0, 0);
mt_init_flags(&mt_detach, vmi->mas.tree->ma_flags & MT_FLAGS_LOCK_MASK);
- mt_set_external_lock(&mt_detach, &mm->mmap_lock);
+ mt_detach.ma_external_lock = NULL;
/*
* If we need to split any vma, do it now to save pain later.
@@ -2545,11 +2545,11 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
/* Statistics and freeing VMAs */
mas_set(&mas_detach, start);
remove_mt(mm, &mas_detach);
- __mt_destroy(&mt_detach);
validate_mm(mm);
if (unlock)
mmap_read_unlock(mm);
+ __mt_destroy(&mt_detach);
return 0;
clear_tree_failed:
--
2.39.2
Use lockdep to check the write path in the maple tree holds the lock in
write mode.
Introduce mt_write_lock_is_held() to check if the lock is held for
writing. Update the necessary checks for rcu_dereference_protected() to
use the new write lock check.
Cc: Linus Torvalds <[email protected]>
Signed-off-by: Liam R. Howlett <[email protected]>
---
include/linux/maple_tree.h | 7 ++++++-
lib/maple_tree.c | 10 ++++++++--
2 files changed, 14 insertions(+), 3 deletions(-)
diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
index 6618c1512886..43f18230cfa4 100644
--- a/include/linux/maple_tree.h
+++ b/include/linux/maple_tree.h
@@ -185,11 +185,16 @@ typedef struct lockdep_map *lockdep_map_p;
#define mt_lock_is_held(mt) \
(!(mt)->ma_external_lock || lock_is_held((mt)->ma_external_lock))
+#define mt_write_lock_is_held(mt) \
+ (!(mt)->ma_external_lock || \
+ lock_is_held_type((mt)->ma_external_lock, 0))
+
#define mt_set_external_lock(mt, lock) \
(mt)->ma_external_lock = &(lock)->dep_map
#else
typedef struct { /* nothing */ } lockdep_map_p;
-#define mt_lock_is_held(mt) 1
+#define mt_lock_is_held(mt) 1
+#define mt_write_lock_is_held(mt) 1
#define mt_set_external_lock(mt, lock) do { } while (0)
#endif
diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index cef47ce8eddd..722c78077b25 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -777,6 +777,12 @@ static inline void __rcu **ma_slots(struct maple_node *mn, enum maple_type mt)
}
}
+static inline bool mt_write_locked(const struct maple_tree *mt)
+{
+ return mt_external_lock(mt) ? mt_write_lock_is_held(mt) :
+ lockdep_is_held(&mt->ma_lock);
+}
+
static inline bool mt_locked(const struct maple_tree *mt)
{
return mt_external_lock(mt) ? mt_lock_is_held(mt) :
@@ -792,7 +798,7 @@ static inline void *mt_slot(const struct maple_tree *mt,
static inline void *mt_slot_locked(struct maple_tree *mt, void __rcu **slots,
unsigned char offset)
{
- return rcu_dereference_protected(slots[offset], mt_locked(mt));
+ return rcu_dereference_protected(slots[offset], mt_write_locked(mt));
}
/*
* mas_slot_locked() - Get the slot value when holding the maple tree lock.
@@ -835,7 +841,7 @@ static inline void *mas_root(struct ma_state *mas)
static inline void *mt_root_locked(struct maple_tree *mt)
{
- return rcu_dereference_protected(mt->ma_root, mt_locked(mt));
+ return rcu_dereference_protected(mt->ma_root, mt_write_locked(mt));
}
/*
--
2.39.2
* Linus Torvalds <[email protected]> [230714 16:16]:
> On Fri, 14 Jul 2023 at 12:56, Liam R. Howlett <[email protected]> wrote:
> >
> > This series of patches adds checks to ensure the lock is held in write
> > mode during the write path of the maple tree instead of checking if it's
> > held at all.
>
> Ack, LGTM. I assume that there were no lockdep errors found by all this..
>
No lockdeps errors. My config has the additional CONFIG_PROVE_RCU=y
this time, which I missed during the previous locking changes.
Thanks,
Liam
Hello Andrew,
Please replace v2 with the attached v3 of this patch to address the
issue with building ARCH=um [1].
[1] https://lore.kernel.org/linux-mm/[email protected]/T/
Thanks,
Liam
* Liam R. Howlett <[email protected]> [230714 15:56]:
> Don't set the lock to the mm lock so that the detached VMA tree does not
> complain about being unlocked when the mmap_lock is dropped prior to
> freeing the tree.
>
> Move the destroying of the detached tree outside the mmap lock all
> together.
>
> Cc: Linus Torvalds <[email protected]>
> Signed-off-by: Liam R. Howlett <[email protected]>
> ---
> mm/mmap.c | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/mm/mmap.c b/mm/mmap.c
> index 7b70379a8b3e..ab6cb00d377a 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -2427,7 +2427,7 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
> unsigned long locked_vm = 0;
> MA_STATE(mas_detach, &mt_detach, 0, 0);
> mt_init_flags(&mt_detach, vmi->mas.tree->ma_flags & MT_FLAGS_LOCK_MASK);
> - mt_set_external_lock(&mt_detach, &mm->mmap_lock);
> + mt_detach.ma_external_lock = NULL;
>
> /*
> * If we need to split any vma, do it now to save pain later.
> @@ -2545,11 +2545,11 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
> /* Statistics and freeing VMAs */
> mas_set(&mas_detach, start);
> remove_mt(mm, &mas_detach);
> - __mt_destroy(&mt_detach);
> validate_mm(mm);
> if (unlock)
> mmap_read_unlock(mm);
>
> + __mt_destroy(&mt_detach);
> return 0;
>
> clear_tree_failed:
> --
> 2.39.2
>