Create mmap trace file and add trace point of vm_unmapped_area.
To include mmap trace, remove inline of vm_unmapped_area and move code
to mmap.c. There is no logical change.
Jaewon Kim (2):
mmap: remove inline of vm_unmapped_area
mm: mmap: add trace point of vm_unmapped_area
include/linux/mm.h | 21 +-------------------
include/trace/events/mmap.h | 48 +++++++++++++++++++++++++++++++++++++++++++++
mm/mmap.c | 28 ++++++++++++++++++++++++--
3 files changed, 75 insertions(+), 22 deletions(-)
create mode 100644 include/trace/events/mmap.h
--
2.13.7
In prepration for next patch remove inline of vm_unmapped_area and move
code to mmap.c. There is no logical change.
Also remove unmapped_area[_topdown] out of mm.h, there is no code
calling to them.
Signed-off-by: Jaewon Kim <[email protected]>
Reviewed-by: Vlastimil Babka <[email protected]>
---
v3: add static keyword
v2: remove inline and move code to mmap.c
---
include/linux/mm.h | 21 +--------------------
mm/mmap.c | 20 ++++++++++++++++++--
2 files changed, 19 insertions(+), 22 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index c54fb96cb1e6..f4263993d053 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2364,26 +2364,7 @@ struct vm_unmapped_area_info {
unsigned long align_offset;
};
-extern unsigned long unmapped_area(struct vm_unmapped_area_info *info);
-extern unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info);
-
-/*
- * Search for an unmapped address range.
- *
- * We are looking for a range that:
- * - does not intersect with any VMA;
- * - is contained within the [low_limit, high_limit) interval;
- * - is at least the desired size.
- * - satisfies (begin_addr & align_mask) == (align_offset & align_mask)
- */
-static inline unsigned long
-vm_unmapped_area(struct vm_unmapped_area_info *info)
-{
- if (info->flags & VM_UNMAPPED_AREA_TOPDOWN)
- return unmapped_area_topdown(info);
- else
- return unmapped_area(info);
-}
+extern unsigned long vm_unmapped_area(struct vm_unmapped_area_info *info);
/* truncate.c */
extern void truncate_inode_pages(struct address_space *, loff_t);
diff --git a/mm/mmap.c b/mm/mmap.c
index d681a20eb4ea..ba990c20ecc2 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1848,7 +1848,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
return error;
}
-unsigned long unmapped_area(struct vm_unmapped_area_info *info)
+static unsigned long unmapped_area(struct vm_unmapped_area_info *info)
{
/*
* We implement the search by looking for an rbtree node that
@@ -1951,7 +1951,7 @@ unsigned long unmapped_area(struct vm_unmapped_area_info *info)
return gap_start;
}
-unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
+static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
@@ -2050,6 +2050,22 @@ unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
return gap_end;
}
+/*
+ * Search for an unmapped address range.
+ *
+ * We are looking for a range that:
+ * - does not intersect with any VMA;
+ * - is contained within the [low_limit, high_limit) interval;
+ * - is at least the desired size.
+ * - satisfies (begin_addr & align_mask) == (align_offset & align_mask)
+ */
+unsigned long vm_unmapped_area(struct vm_unmapped_area_info *info)
+{
+ if (info->flags & VM_UNMAPPED_AREA_TOPDOWN)
+ return unmapped_area_topdown(info);
+ else
+ return unmapped_area(info);
+}
#ifndef arch_get_mmap_end
#define arch_get_mmap_end(addr) (TASK_SIZE)
--
2.13.7
Even on 64 bit kernel, the mmap failure can happen for a 32 bit task.
Virtual memory space shortage of a task on mmap is reported to userspace
as -ENOMEM. It can be confused as physical memory shortage of overall
system.
The vm_unmapped_area can be called to by some drivers or other kernel
core system like filesystem. In my platform, GPU driver calls to
vm_unmapped_area and the driver returns -ENOMEM even in GPU side
shortage. It can be hard to distinguish which code layer returns the
-ENOMEM.
Create mmap trace file and add trace point of vm_unmapped_area.
i.e.)
277.156599: vm_unmapped_area: addr=77e0d03000 err=0 total_vm=0x17014b flags=0x1 len=0x400000 lo=0x8000 hi=0x7878c27000 mask=0x0 ofs=0x1
342.838740: vm_unmapped_area: addr=0 err=-12 total_vm=0xffb08 flags=0x0 len=0x100000 lo=0x40000000 hi=0xfffff000 mask=0x0 ofs=0x22
Signed-off-by: Jaewon Kim <[email protected]>
---
v3: reduce fast_assign and print format
v2: use trace point rather than printk with ratelimit
v1: use printk with ratelimit
---
include/trace/events/mmap.h | 48 +++++++++++++++++++++++++++++++++++++++++++++
mm/mmap.c | 12 ++++++++++--
2 files changed, 58 insertions(+), 2 deletions(-)
create mode 100644 include/trace/events/mmap.h
diff --git a/include/trace/events/mmap.h b/include/trace/events/mmap.h
new file mode 100644
index 000000000000..986a41b6cfa9
--- /dev/null
+++ b/include/trace/events/mmap.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mmap
+
+#if !defined(_TRACE_MMAP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MMAP_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(vm_unmapped_area,
+
+ TP_PROTO(unsigned long addr, struct vm_unmapped_area_info *info),
+
+ TP_ARGS(addr, info),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, addr)
+ __field(unsigned long, total_vm)
+ __field(unsigned long, flags)
+ __field(unsigned long, length)
+ __field(unsigned long, low_limit)
+ __field(unsigned long, high_limit)
+ __field(unsigned long, align_mask)
+ __field(unsigned long, align_offset)
+ ),
+
+ TP_fast_assign(
+ __entry->addr = addr;
+ __entry->total_vm = current->mm->total_vm;
+ __entry->flags = info->flags;
+ __entry->length = info->length;
+ __entry->low_limit = info->low_limit;
+ __entry->high_limit = info->high_limit;
+ __entry->align_mask = info->align_mask;
+ __entry->align_offset = info->align_offset;
+ ),
+
+ TP_printk("addr=%lx err=%ld total_vm=0x%lx flags=0x%lx len=0x%lx lo=0x%lx hi=0x%lx mask=0x%lx ofs=0x%lx\n",
+ IS_ERR_VALUE(__entry->addr) ? 0 : __entry->addr,
+ IS_ERR_VALUE(__entry->addr) ? __entry->addr : 0,
+ __entry->total_vm, __entry->flags, __entry->length,
+ __entry->low_limit, __entry->high_limit, __entry->align_mask,
+ __entry->align_offset)
+);
+#endif
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/mm/mmap.c b/mm/mmap.c
index ba990c20ecc2..94ae18398c59 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -53,6 +53,9 @@
#include <asm/tlb.h>
#include <asm/mmu_context.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/mmap.h>
+
#include "internal.h"
#ifndef arch_mmap_check
@@ -2061,10 +2064,15 @@ static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
*/
unsigned long vm_unmapped_area(struct vm_unmapped_area_info *info)
{
+ unsigned long addr;
+
if (info->flags & VM_UNMAPPED_AREA_TOPDOWN)
- return unmapped_area_topdown(info);
+ addr = unmapped_area_topdown(info);
else
- return unmapped_area(info);
+ addr = unmapped_area(info);
+
+ trace_vm_unmapped_area(addr, info);
+ return addr;
}
#ifndef arch_get_mmap_end
--
2.13.7
On Fri, Mar 20, 2020 at 02:58:23PM +0900, Jaewon Kim wrote:
> + TP_printk("addr=%lx err=%ld total_vm=0x%lx flags=0x%lx len=0x%lx lo=0x%lx hi=0x%lx mask=0x%lx ofs=0x%lx\n",
Shouldn't addr be printed as 0x%lx? I think it's arguable whether to print
len as %ld or 0x%lx.
On Fri, Mar 20, 2020 at 02:58:23PM +0900, Jaewon Kim wrote:
> + TP_printk("addr=%lx err=%ld total_vm=0x%lx flags=0x%lx len=0x%lx lo=0x%lx hi=0x%lx mask=0x%lx ofs=0x%lx\n",
> + IS_ERR_VALUE(__entry->addr) ? 0 : __entry->addr,
> + IS_ERR_VALUE(__entry->addr) ? __entry->addr : 0,
I didn't see the IS_ERR_VALUE problem that Vlastimil mentioned get resolved?
I might suggest ...
+++ b/include/linux/err.h
@@ -19,7 +19,8 @@
#ifndef __ASSEMBLY__
-#define IS_ERR_VALUE(x) unlikely((unsigned long)(void *)(x) >= (unsigned long)-MAX_ERRNO)
+#define __IS_ERR_VALUE(x) ((unsigned long)(void *)(x) >= (unsigned long)-MAX_ERRNO)
+#define IS_ERR_VALUE(x) unlikely(__IS_ERR_VALUE(x))
static inline void * __must_check ERR_PTR(long error)
{
and then you can use __IS_ERR_VALUE() which removes the unlikely() problem.
On 2020년 03월 30일 01:14, Matthew Wilcox wrote:
> On Fri, Mar 20, 2020 at 02:58:23PM +0900, Jaewon Kim wrote:
>> + TP_printk("addr=%lx err=%ld total_vm=0x%lx flags=0x%lx len=0x%lx lo=0x%lx hi=0x%lx mask=0x%lx ofs=0x%lx\n",
>> + IS_ERR_VALUE(__entry->addr) ? 0 : __entry->addr,
>> + IS_ERR_VALUE(__entry->addr) ? __entry->addr : 0,
> I didn't see the IS_ERR_VALUE problem that Vlastimil mentioned get resolved?
Sorry I missed the problem. And thank you for your comment and suggestion.
I still do not understand why the unlikely incurs [FAILED TO PARSE] problem on trace-cmd.
Then trace log should not use the unlikely?
I may need to resubmit a new patch set with your suggestion.
Thank you
>
> I might suggest ...
>
> +++ b/include/linux/err.h
> @@ -19,7 +19,8 @@
>
> #ifndef __ASSEMBLY__
>
> -#define IS_ERR_VALUE(x) unlikely((unsigned long)(void *)(x) >= (unsigned long)-MAX_ERRNO)
> +#define __IS_ERR_VALUE(x) ((unsigned long)(void *)(x) >= (unsigned long)-MAX_ERRNO)
> +#define IS_ERR_VALUE(x) unlikely(__IS_ERR_VALUE(x))
>
> static inline void * __must_check ERR_PTR(long error)
> {
>
> and then you can use __IS_ERR_VALUE() which removes the unlikely() problem.
>
>
On 3/29/20 6:14 PM, Matthew Wilcox wrote:
> On Fri, Mar 20, 2020 at 02:58:23PM +0900, Jaewon Kim wrote:
>> + TP_printk("addr=%lx err=%ld total_vm=0x%lx flags=0x%lx len=0x%lx lo=0x%lx hi=0x%lx mask=0x%lx ofs=0x%lx\n",
>> + IS_ERR_VALUE(__entry->addr) ? 0 : __entry->addr,
>> + IS_ERR_VALUE(__entry->addr) ? __entry->addr : 0,
>
> I didn't see the IS_ERR_VALUE problem that Vlastimil mentioned get resolved?
Steven is fixing it in trace-cmd:
https://lore.kernel.org/r/[email protected]
> I might suggest ...
>
> +++ b/include/linux/err.h
> @@ -19,7 +19,8 @@
>
> #ifndef __ASSEMBLY__
>
> -#define IS_ERR_VALUE(x) unlikely((unsigned long)(void *)(x) >= (unsigned long)-MAX_ERRNO)
> +#define __IS_ERR_VALUE(x) ((unsigned long)(void *)(x) >= (unsigned long)-MAX_ERRNO)
> +#define IS_ERR_VALUE(x) unlikely(__IS_ERR_VALUE(x))
So this shouldn't be needed, as we are adding a new tracepoint, not "breaking"
an existing one?
> static inline void * __must_check ERR_PTR(long error)
> {
>
> and then you can use __IS_ERR_VALUE() which removes the unlikely() problem.
>
On 2020년 03월 30일 18:56, Vlastimil Babka wrote:
> On 3/29/20 6:14 PM, Matthew Wilcox wrote:
>> On Fri, Mar 20, 2020 at 02:58:23PM +0900, Jaewon Kim wrote:
>>> + TP_printk("addr=%lx err=%ld total_vm=0x%lx flags=0x%lx len=0x%lx lo=0x%lx hi=0x%lx mask=0x%lx ofs=0x%lx\n",
>>> + IS_ERR_VALUE(__entry->addr) ? 0 : __entry->addr,
>>> + IS_ERR_VALUE(__entry->addr) ? __entry->addr : 0,
>> I didn't see the IS_ERR_VALUE problem that Vlastimil mentioned get resolved?
> Steven is fixing it in trace-cmd:
> https://lore.kernel.org/r/[email protected]
Good news for me.
Thank you
>
>> I might suggest ...
>>
>> +++ b/include/linux/err.h
>> @@ -19,7 +19,8 @@
>>
>> #ifndef __ASSEMBLY__
>>
>> -#define IS_ERR_VALUE(x) unlikely((unsigned long)(void *)(x) >= (unsigned long)-MAX_ERRNO)
>> +#define __IS_ERR_VALUE(x) ((unsigned long)(void *)(x) >= (unsigned long)-MAX_ERRNO)
>> +#define IS_ERR_VALUE(x) unlikely(__IS_ERR_VALUE(x))
> So this shouldn't be needed, as we are adding a new tracepoint, not "breaking"
> an existing one?
>
>> static inline void * __must_check ERR_PTR(long error)
>> {
>>
>> and then you can use __IS_ERR_VALUE() which removes the unlikely() problem.
>>
>
>
On Sun, Mar 29, 2020 at 09:08:58AM -0700, Matthew Wilcox wrote:
> On Fri, Mar 20, 2020 at 02:58:23PM +0900, Jaewon Kim wrote:
> > + TP_printk("addr=%lx err=%ld total_vm=0x%lx flags=0x%lx len=0x%lx lo=0x%lx hi=0x%lx mask=0x%lx ofs=0x%lx\n",
>
> Shouldn't addr be printed as 0x%lx?
%#lx should do the trick.
--
Kirill A. Shutemov