/proc/vmallocinfo will not show the area allocated by vm_map_ram, which
will make confusion when debug. Add vm_struct for them and show them in
proc.
Signed-off-by: Zhaoyang Huang <[email protected]>
---
mm/vmalloc.c | 27 ++++++++++++++++++++++++++-
1 file changed, 26 insertions(+), 1 deletion(-)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 34a1c3e..4a2e93c 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -46,6 +46,9 @@ struct vfree_deferred {
static void __vunmap(const void *, int);
+static void setup_vmap_ram_vm(struct vm_struct *vm, struct vmap_area *va,
+ unsigned long flags, const void *caller);
+
static void free_work(struct work_struct *w)
{
struct vfree_deferred *p = container_of(w, struct vfree_deferred, wq);
@@ -315,6 +318,7 @@ unsigned long vmalloc_to_pfn(const void *vmalloc_addr)
/*** Global kva allocator ***/
#define VM_VM_AREA 0x04
+#define VM_VM_RAM 0x08
static DEFINE_SPINLOCK(vmap_area_lock);
/* Export for kexec only */
@@ -1141,6 +1145,7 @@ void vm_unmap_ram(const void *mem, unsigned int count)
va = find_vmap_area(addr);
BUG_ON(!va);
+ kfree(va->vm);
free_unmap_vmap_area(va);
}
EXPORT_SYMBOL(vm_unmap_ram);
@@ -1173,6 +1178,12 @@ void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t pro
addr = (unsigned long)mem;
} else {
struct vmap_area *va;
+ struct vm_struct *area;
+
+ area = kzalloc_node(sizeof(*area), GFP_KERNEL, node);
+ if (unlikely(!area))
+ return NULL;
+
va = alloc_vmap_area(size, PAGE_SIZE,
VMALLOC_START, VMALLOC_END, node, GFP_KERNEL);
if (IS_ERR(va))
@@ -1180,6 +1191,7 @@ void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t pro
addr = va->va_start;
mem = (void *)addr;
+ setup_vmap_ram_vm(area, va, 0, __builtin_return_address(0));
}
if (vmap_page_range(addr, addr + size, prot, pages) < 0) {
vm_unmap_ram(mem, count);
@@ -1362,6 +1374,19 @@ static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
spin_unlock(&vmap_area_lock);
}
+static void setup_vmap_ram_vm(struct vm_struct *vm, struct vmap_area *va,
+ unsigned long flags, const void *caller)
+{
+ spin_lock(&vmap_area_lock);
+ vm->flags = flags;
+ vm->addr = (void *)va->va_start;
+ vm->size = va->va_end - va->va_start;
+ vm->caller = caller;
+ va->vm = vm;
+ va->flags |= VM_VM_RAM;
+ spin_unlock(&vmap_area_lock);
+}
+
static void clear_vm_uninitialized_flag(struct vm_struct *vm)
{
/*
@@ -2698,7 +2723,7 @@ static int s_show(struct seq_file *m, void *p)
* s_show can encounter race with remove_vm_area, !VM_VM_AREA on
* behalf of vmap area is being tear down or vm_map_ram allocation.
*/
- if (!(va->flags & VM_VM_AREA))
+ if (!(va->flags & (VM_VM_AREA | VM_VM_RAM)))
return 0;
v = va->vm;
--
1.9.1
On Wed, 19 Jul 2017 18:44:03 +0800 Zhaoyang Huang <[email protected]> wrote:
> /proc/vmallocinfo will not show the area allocated by vm_map_ram, which
> will make confusion when debug. Add vm_struct for them and show them in
> proc.
>
Please provide sample /proc/vmallocinfo so we can better understand the
proposal. Is there a means by which people can determine that a
particular area is from vm_map_ram()? I don't think so. Should there
be?
>
> ...
>
> @@ -1173,6 +1178,12 @@ void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t pro
> addr = (unsigned long)mem;
> } else {
> struct vmap_area *va;
> + struct vm_struct *area;
> +
> + area = kzalloc_node(sizeof(*area), GFP_KERNEL, node);
> + if (unlikely(!area))
> + return NULL;
Allocating a vm_struct for each vm_map_ram area is a cost. And we're
doing this purely for /proc/vmallocinfo. I think I'll need more
persuading to convince me that this is a good tradeoff, given that
*every* user will incur this cost, and approximately 0% of them will
ever use /proc/vmallocinfo.
So... do we *really* need this? If so, why?
On Thu, Jul 20, 2017 at 4:50 AM, Andrew Morton
<[email protected]> wrote:
> On Wed, 19 Jul 2017 18:44:03 +0800 Zhaoyang Huang <[email protected]> wrote:
>
>> /proc/vmallocinfo will not show the area allocated by vm_map_ram, which
>> will make confusion when debug. Add vm_struct for them and show them in
>> proc.
>>
>
> Please provide sample /proc/vmallocinfo so we can better understand the
> proposal. Is there a means by which people can determine that a
> particular area is from vm_map_ram()? I don't think so. Should there
> be?
Here is the part of vmallocinfo, the line start with '>' are the ones
allocated by vm_map_ram.
xxxx:/ # cat /proc/vmallocinfo
0xffffff8000a5f000-0xffffff8000abb000 376832
load_module+0x1004/0x1e98 pages=91 vmalloc
0xffffff8000ac6000-0xffffff8000ad2000 49152
load_module+0x1004/0x1e98 pages=11 vmalloc
0xffffff8000ad8000-0xffffff8000ade000 24576
load_module+0x1004/0x1e98 pages=5 vmalloc
0xffffff8008000000-0xffffff8008002000 8192 of_iomap+0x4c/0x68
phys=12001000 ioremap
0xffffff8008002000-0xffffff8008004000 8192 of_iomap+0x4c/0x68
phys=40356000 ioremap
0xffffff8008004000-0xffffff8008007000 12288 of_iomap+0x4c/0x68
phys=12002000 ioremap
0xffffff8008008000-0xffffff800800d000 20480
of_sprd_gates_clk_setup_with_ops+0x88/0x2a8 phys=402b0000 ioremap
0xffffff800800e000-0xffffff8008010000 8192 of_iomap+0x4c/0x68
phys=40356000 ioremap
...
>0xffffff800c5a3000-0xffffff800c5ec000 299008 shmem_ram_vmap+0xe8/0x1a0
0xffffff800c5fe000-0xffffff800c600000 8192
kbasep_js_policy_ctx_has_priority+0x254/0xdb0 [mali_kbase] pages=1
vmalloc
0xffffff800c600000-0xffffff800c701000 1052672 of_iomap+0x4c/0x68
phys=60d00000 ioremap
>0xffffff800c701000-0xffffff800c742000 266240 shmem_ram_vmap+0xe8/0x1a0
0xffffff800c74e000-0xffffff800c750000 8192
kbasep_js_policy_ctx_has_priority+0x2cc/0xdb0 [mali_kbase] pages=1
vmalloc
...
>
>>
>> ...
>>
>> @@ -1173,6 +1178,12 @@ void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t pro
>> addr = (unsigned long)mem;
>> } else {
>> struct vmap_area *va;
>> + struct vm_struct *area;
>> +
>> + area = kzalloc_node(sizeof(*area), GFP_KERNEL, node);
>> + if (unlikely(!area))
>> + return NULL;
>
> Allocating a vm_struct for each vm_map_ram area is a cost. And we're
> doing this purely for /proc/vmallocinfo. I think I'll need more
> persuading to convince me that this is a good tradeoff, given that
> *every* user will incur this cost, and approximately 0% of them will
> ever use /proc/vmallocinfo.
>
> So... do we *really* need this? If so, why?
The motivation of this commit comes from one practical debug, that is,
vmalloc failed by one driver's allocating a
huge area by vm_map_ram, which can not be traced by cat
/proc/vmallocinfo. We have to add a lot of printk and
dump_stack to get more information.
I don't think the vm_struct cost too much memory, just imagine that
the area got by vmalloc or ioremap instead, you have
to pay for it as well.
>
update the comment bellow as ...'s/by one driver's allocating/because
one driver has allocated/'..., sorry
for the confusion
On Thu, Jul 20, 2017 at 9:15 AM, Zhaoyang Huang <[email protected]> wrote:
> On Thu, Jul 20, 2017 at 4:50 AM, Andrew Morton
> <[email protected]> wrote:
>> On Wed, 19 Jul 2017 18:44:03 +0800 Zhaoyang Huang <[email protected]> wrote:
>>
>>> /proc/vmallocinfo will not show the area allocated by vm_map_ram, which
>>> will make confusion when debug. Add vm_struct for them and show them in
>>> proc.
>>>
>>
>> Please provide sample /proc/vmallocinfo so we can better understand the
>> proposal. Is there a means by which people can determine that a
>> particular area is from vm_map_ram()? I don't think so. Should there
>> be?
> Here is the part of vmallocinfo, the line start with '>' are the ones
> allocated by vm_map_ram.
> xxxx:/ # cat /proc/vmallocinfo
> 0xffffff8000a5f000-0xffffff8000abb000 376832
> load_module+0x1004/0x1e98 pages=91 vmalloc
> 0xffffff8000ac6000-0xffffff8000ad2000 49152
> load_module+0x1004/0x1e98 pages=11 vmalloc
> 0xffffff8000ad8000-0xffffff8000ade000 24576
> load_module+0x1004/0x1e98 pages=5 vmalloc
> 0xffffff8008000000-0xffffff8008002000 8192 of_iomap+0x4c/0x68
> phys=12001000 ioremap
> 0xffffff8008002000-0xffffff8008004000 8192 of_iomap+0x4c/0x68
> phys=40356000 ioremap
> 0xffffff8008004000-0xffffff8008007000 12288 of_iomap+0x4c/0x68
> phys=12002000 ioremap
> 0xffffff8008008000-0xffffff800800d000 20480
> of_sprd_gates_clk_setup_with_ops+0x88/0x2a8 phys=402b0000 ioremap
> 0xffffff800800e000-0xffffff8008010000 8192 of_iomap+0x4c/0x68
> phys=40356000 ioremap
> ...
>>0xffffff800c5a3000-0xffffff800c5ec000 299008 shmem_ram_vmap+0xe8/0x1a0
> 0xffffff800c5fe000-0xffffff800c600000 8192
> kbasep_js_policy_ctx_has_priority+0x254/0xdb0 [mali_kbase] pages=1
> vmalloc
> 0xffffff800c600000-0xffffff800c701000 1052672 of_iomap+0x4c/0x68
> phys=60d00000 ioremap
>>0xffffff800c701000-0xffffff800c742000 266240 shmem_ram_vmap+0xe8/0x1a0
> 0xffffff800c74e000-0xffffff800c750000 8192
> kbasep_js_policy_ctx_has_priority+0x2cc/0xdb0 [mali_kbase] pages=1
> vmalloc
> ...
>>
>>>
>>> ...
>>>
>>> @@ -1173,6 +1178,12 @@ void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t pro
>>> addr = (unsigned long)mem;
>>> } else {
>>> struct vmap_area *va;
>>> + struct vm_struct *area;
>>> +
>>> + area = kzalloc_node(sizeof(*area), GFP_KERNEL, node);
>>> + if (unlikely(!area))
>>> + return NULL;
>>
>> Allocating a vm_struct for each vm_map_ram area is a cost. And we're
>> doing this purely for /proc/vmallocinfo. I think I'll need more
>> persuading to convince me that this is a good tradeoff, given that
>> *every* user will incur this cost, and approximately 0% of them will
>> ever use /proc/vmallocinfo.
>>
>> So... do we *really* need this? If so, why?
> The motivation of this commit comes from one practical debug, that is,
> vmalloc failed 's/by one driver's allocating/because one driver has allocated/' a
> huge area by vm_map_ram, which can not be traced by cat
> /proc/vmallocinfo. We have to add a lot of printk and
> dump_stack to get more information.
> I don't think the vm_struct cost too much memory, just imagine that
> the area got by vmalloc or ioremap instead, you have
> to pay for it as well.
>>
On 07/19/2017 06:44 PM, Zhaoyang Huang wrote:
> /proc/vmallocinfo will not show the area allocated by vm_map_ram, which
> will make confusion when debug. Add vm_struct for them and show them in
> proc.
>
> Signed-off-by: Zhaoyang Huang <[email protected]>
> ---
another patch titled "vmalloc: show lazy-purged vma info in vmallocinfo" was phased-in to linux-next branch
to resolve this problem.
> mm/vmalloc.c | 27 ++++++++++++++++++++++++++-
> 1 file changed, 26 insertions(+), 1 deletion(-)
>
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index 34a1c3e..4a2e93c 100644
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -46,6 +46,9 @@ struct vfree_deferred {
>
> static void __vunmap(const void *, int);
>
> +static void setup_vmap_ram_vm(struct vm_struct *vm, struct vmap_area *va,
> + unsigned long flags, const void *caller);
> +
> static void free_work(struct work_struct *w)
> {
> struct vfree_deferred *p = container_of(w, struct vfree_deferred, wq);
> @@ -315,6 +318,7 @@ unsigned long vmalloc_to_pfn(const void *vmalloc_addr)
> /*** Global kva allocator ***/
>
> #define VM_VM_AREA 0x04
> +#define VM_VM_RAM 0x08
>
> static DEFINE_SPINLOCK(vmap_area_lock);
> /* Export for kexec only */
> @@ -1141,6 +1145,7 @@ void vm_unmap_ram(const void *mem, unsigned int count)
>
> va = find_vmap_area(addr);
> BUG_ON(!va);
> + kfree(va->vm);
> free_unmap_vmap_area(va);
> }
> EXPORT_SYMBOL(vm_unmap_ram);
> @@ -1173,6 +1178,12 @@ void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t pro
> addr = (unsigned long)mem;
> } else {
> struct vmap_area *va;
> + struct vm_struct *area;
> +
> + area = kzalloc_node(sizeof(*area), GFP_KERNEL, node);
> + if (unlikely(!area))
> + return NULL;
> +
> va = alloc_vmap_area(size, PAGE_SIZE,
> VMALLOC_START, VMALLOC_END, node, GFP_KERNEL);
> if (IS_ERR(va))
> @@ -1180,6 +1191,7 @@ void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t pro
>
> addr = va->va_start;
> mem = (void *)addr;
> + setup_vmap_ram_vm(area, va, 0, __builtin_return_address(0));
> }
> if (vmap_page_range(addr, addr + size, prot, pages) < 0) {
> vm_unmap_ram(mem, count);
> @@ -1362,6 +1374,19 @@ static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
> spin_unlock(&vmap_area_lock);
> }
>
> +static void setup_vmap_ram_vm(struct vm_struct *vm, struct vmap_area *va,
> + unsigned long flags, const void *caller)
> +{
> + spin_lock(&vmap_area_lock);
> + vm->flags = flags;
> + vm->addr = (void *)va->va_start;
> + vm->size = va->va_end - va->va_start;
> + vm->caller = caller;
> + va->vm = vm;
> + va->flags |= VM_VM_RAM;
> + spin_unlock(&vmap_area_lock);
> +}
> +
> static void clear_vm_uninitialized_flag(struct vm_struct *vm)
> {
> /*
> @@ -2698,7 +2723,7 @@ static int s_show(struct seq_file *m, void *p)
> * s_show can encounter race with remove_vm_area, !VM_VM_AREA on
> * behalf of vmap area is being tear down or vm_map_ram allocation.
> */
> - if (!(va->flags & VM_VM_AREA))
> + if (!(va->flags & (VM_VM_AREA | VM_VM_RAM)))
> return 0;
>
> v = va->vm;
>