2007-08-16 22:10:32

by Wu Fengguang

[permalink] [raw]
Subject: [PATCH 2/4] maps: address based vma walking

Split large vmas into page groups of proc_maps_private.batch_size bytes, and
iterate them one by one for seqfile->show. This allows us to export large scale
process address space information via the seqfile interface. The old behavior
of walking one vma at a time can be achieved by setting the batching size to
~0UL.

Cc: Matt Mackall <[email protected]>
Cc: Al Viro <[email protected]>
Signed-off-by: Fengguang Wu <[email protected]>
---
fs/proc/task_mmu.c | 105 ++++++++++++--------------------------
include/linux/proc_fs.h | 6 +-
mm/mempolicy.c | 2
3 files changed, 38 insertions(+), 75 deletions(-)

--- linux-2.6.23-rc2-mm2.orig/include/linux/proc_fs.h
+++ linux-2.6.23-rc2-mm2/include/linux/proc_fs.h
@@ -283,9 +283,9 @@ static inline struct proc_dir_entry *PDE
struct proc_maps_private {
struct pid *pid;
struct task_struct *task;
-#ifdef CONFIG_MMU
- struct vm_area_struct *tail_vma;
-#endif
+ struct mm_struct *mm;
+ /* walk min(batch_size, remaining_size_of(vma)) bytes at a time */
+ unsigned long batch_size;
};

#endif /* _LINUX_PROC_FS_H */
--- linux-2.6.23-rc2-mm2.orig/mm/mempolicy.c
+++ linux-2.6.23-rc2-mm2/mm/mempolicy.c
@@ -1937,7 +1937,5 @@ out:
seq_putc(m, '\n');
kfree(md);

- if (m->count < m->size)
- m->version = (vma != priv->tail_vma) ? vma->vm_start : 0;
return 0;
}
--- linux-2.6.23-rc2-mm2.orig/fs/proc/task_mmu.c
+++ linux-2.6.23-rc2-mm2/fs/proc/task_mmu.c
@@ -115,99 +115,65 @@ static void pad_len_spaces(struct seq_fi
seq_printf(m, "%*c", len, ' ');
}

-static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma)
+static void *seek_vma_addr(struct seq_file *m,
+ struct vm_area_struct *vma, loff_t *pos)
{
- if (vma && vma != priv->tail_vma) {
- struct mm_struct *mm = vma->vm_mm;
- up_read(&mm->mmap_sem);
- mmput(mm);
- }
+ struct proc_maps_private *priv = m->private;
+ unsigned long addr = *pos;
+
+ if (addr & ~PAGE_MASK) { /* time for next batch */
+ if (vma->vm_end - addr < priv->batch_size) {
+ vma = vma->vm_next;
+ if (!vma || vma == get_gate_vma(priv->task))
+ goto done;
+ } else
+ addr = (addr + priv->batch_size) & PAGE_MASK;
+ }
+ if (addr < vma->vm_start)
+ addr = vma->vm_start;
+
+ m->version = *pos = addr;
+ return vma;
+done:
+ return NULL;
}

static void *m_start(struct seq_file *m, loff_t *pos)
{
struct proc_maps_private *priv = m->private;
- unsigned long last_addr = m->version;
- struct mm_struct *mm;
- struct vm_area_struct *vma, *tail_vma = NULL;
- loff_t l = *pos;
-
- /* Clear the per syscall fields in priv */
- priv->task = NULL;
- priv->tail_vma = NULL;
-
- /*
- * We remember last_addr rather than next_addr to hit with
- * mmap_cache most of the time. We have zero last_addr at
- * the beginning and also after lseek. We will have -1 last_addr
- * after the end of the vmas.
- */
-
- if (last_addr == -1UL)
- return NULL;
+ struct vm_area_struct *vma;

+ priv->mm = NULL;
priv->task = get_pid_task(priv->pid, PIDTYPE_PID);
if (!priv->task)
return NULL;

- mm = get_task_mm(priv->task);
- if (!mm)
+ priv->mm = get_task_mm(priv->task);
+ if (!priv->mm)
return NULL;

- priv->tail_vma = tail_vma = get_gate_vma(priv->task);
- down_read(&mm->mmap_sem);
+ down_read(&priv->mm->mmap_sem);

- /* Start with last addr hint */
- if (last_addr && (vma = find_vma(mm, last_addr))) {
- vma = vma->vm_next;
- goto out;
- }
-
- /*
- * Check the vma index is within the range and do
- * sequential scan until m_index.
- */
- vma = NULL;
- if ((unsigned long)l < mm->map_count) {
- vma = mm->mmap;
- while (l-- && vma)
- vma = vma->vm_next;
- goto out;
- }
-
- if (l != mm->map_count)
- tail_vma = NULL; /* After gate vma */
-
-out:
- if (vma)
- return vma;
+ vma = find_vma(priv->mm, *pos);
+ if (!vma || vma == get_gate_vma(priv->task))
+ return NULL;

- /* End of vmas has been reached */
- m->version = (tail_vma != NULL)? 0: -1UL;
- up_read(&mm->mmap_sem);
- mmput(mm);
- return tail_vma;
+ return seek_vma_addr(m, vma, pos);
}

static void *m_next(struct seq_file *m, void *v, loff_t *pos)
{
- struct proc_maps_private *priv = m->private;
- struct vm_area_struct *vma = v;
- struct vm_area_struct *tail_vma = priv->tail_vma;
-
(*pos)++;
- if (vma && (vma != tail_vma) && vma->vm_next)
- return vma->vm_next;
- vma_stop(priv, vma);
- return (vma != tail_vma)? tail_vma: NULL;
+ return seek_vma_addr(m, v, pos);
}

static void m_stop(struct seq_file *m, void *v)
{
struct proc_maps_private *priv = m->private;
- struct vm_area_struct *vma = v;
-
- vma_stop(priv, vma);
+ if (priv->mm) {
+ up_read(&priv->mm->mmap_sem);
+ mmput(priv->mm);
+ }
if (priv->task)
put_task_struct(priv->task);
}
@@ -220,6 +186,7 @@ static int do_maps_open(struct inode *in
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
if (priv) {
priv->pid = proc_pid(inode);
+ priv->batch_size = ~0;
ret = seq_open(file, ops);
if (!ret) {
struct seq_file *m = file->private_data;
@@ -291,8 +258,6 @@ static int show_map(struct seq_file *m,
}
seq_putc(m, '\n');

- if (m->count < m->size) /* vma is copied successfully */
- m->version = (vma != get_gate_vma(task))? vma->vm_start: 0;
return 0;
}


--


2007-08-17 02:15:35

by Matt Mackall

[permalink] [raw]
Subject: Re: [PATCH 2/4] maps: address based vma walking

On Fri, Aug 17, 2007 at 06:05:18AM +0800, Fengguang Wu wrote:
> Split large vmas into page groups of proc_maps_private.batch_size bytes, and
> iterate them one by one for seqfile->show. This allows us to export large scale
> process address space information via the seqfile interface. The old behavior
> of walking one vma at a time can be achieved by setting the batching size to
> ~0UL.
>
> Cc: Matt Mackall <[email protected]>
> Cc: Al Viro <[email protected]>
> Signed-off-by: Fengguang Wu <[email protected]>
> ---
> fs/proc/task_mmu.c | 105 ++++++++++++--------------------------
> include/linux/proc_fs.h | 6 +-
> mm/mempolicy.c | 2
> 3 files changed, 38 insertions(+), 75 deletions(-)
>
> --- linux-2.6.23-rc2-mm2.orig/include/linux/proc_fs.h
> +++ linux-2.6.23-rc2-mm2/include/linux/proc_fs.h
> @@ -283,9 +283,9 @@ static inline struct proc_dir_entry *PDE
> struct proc_maps_private {
> struct pid *pid;
> struct task_struct *task;
> -#ifdef CONFIG_MMU
> - struct vm_area_struct *tail_vma;
> -#endif
> + struct mm_struct *mm;
> + /* walk min(batch_size, remaining_size_of(vma)) bytes at a time */
> + unsigned long batch_size;
> };
>
> #endif /* _LINUX_PROC_FS_H */
> --- linux-2.6.23-rc2-mm2.orig/mm/mempolicy.c
> +++ linux-2.6.23-rc2-mm2/mm/mempolicy.c
> @@ -1937,7 +1937,5 @@ out:
> seq_putc(m, '\n');
> kfree(md);
>
> - if (m->count < m->size)
> - m->version = (vma != priv->tail_vma) ? vma->vm_start : 0;
> return 0;
> }

What's this bit for?

> --- linux-2.6.23-rc2-mm2.orig/fs/proc/task_mmu.c
> +++ linux-2.6.23-rc2-mm2/fs/proc/task_mmu.c
> @@ -115,99 +115,65 @@ static void pad_len_spaces(struct seq_fi
> seq_printf(m, "%*c", len, ' ');
> }
>
> -static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma)
> +static void *seek_vma_addr(struct seq_file *m,
> + struct vm_area_struct *vma, loff_t *pos)
> {
> - if (vma && vma != priv->tail_vma) {
> - struct mm_struct *mm = vma->vm_mm;
> - up_read(&mm->mmap_sem);
> - mmput(mm);
> - }
> + struct proc_maps_private *priv = m->private;
> + unsigned long addr = *pos;
> +
> + if (addr & ~PAGE_MASK) { /* time for next batch */
> + if (vma->vm_end - addr < priv->batch_size) {
> + vma = vma->vm_next;
> + if (!vma || vma == get_gate_vma(priv->task))
> + goto done;
> + } else
> + addr = (addr + priv->batch_size) & PAGE_MASK;
> + }
> + if (addr < vma->vm_start)
> + addr = vma->vm_start;
> +
> + m->version = *pos = addr;
> + return vma;
> +done:
> + return NULL;
> }
>
> static void *m_start(struct seq_file *m, loff_t *pos)
> {
> struct proc_maps_private *priv = m->private;
> - unsigned long last_addr = m->version;
> - struct mm_struct *mm;
> - struct vm_area_struct *vma, *tail_vma = NULL;
> - loff_t l = *pos;
> -
> - /* Clear the per syscall fields in priv */
> - priv->task = NULL;
> - priv->tail_vma = NULL;
> -
> - /*
> - * We remember last_addr rather than next_addr to hit with
> - * mmap_cache most of the time. We have zero last_addr at
> - * the beginning and also after lseek. We will have -1 last_addr
> - * after the end of the vmas.
> - */
> -
> - if (last_addr == -1UL)
> - return NULL;
> + struct vm_area_struct *vma;
>
> + priv->mm = NULL;
> priv->task = get_pid_task(priv->pid, PIDTYPE_PID);
> if (!priv->task)
> return NULL;
>
> - mm = get_task_mm(priv->task);
> - if (!mm)
> + priv->mm = get_task_mm(priv->task);
> + if (!priv->mm)
> return NULL;
>
> - priv->tail_vma = tail_vma = get_gate_vma(priv->task);
> - down_read(&mm->mmap_sem);
> + down_read(&priv->mm->mmap_sem);
>
> - /* Start with last addr hint */
> - if (last_addr && (vma = find_vma(mm, last_addr))) {
> - vma = vma->vm_next;
> - goto out;
> - }
> -
> - /*
> - * Check the vma index is within the range and do
> - * sequential scan until m_index.
> - */
> - vma = NULL;
> - if ((unsigned long)l < mm->map_count) {
> - vma = mm->mmap;
> - while (l-- && vma)
> - vma = vma->vm_next;
> - goto out;
> - }
> -
> - if (l != mm->map_count)
> - tail_vma = NULL; /* After gate vma */
> -
> -out:
> - if (vma)
> - return vma;
> + vma = find_vma(priv->mm, *pos);
> + if (!vma || vma == get_gate_vma(priv->task))
> + return NULL;
>
> - /* End of vmas has been reached */
> - m->version = (tail_vma != NULL)? 0: -1UL;
> - up_read(&mm->mmap_sem);
> - mmput(mm);
> - return tail_vma;
> + return seek_vma_addr(m, vma, pos);
> }
>
> static void *m_next(struct seq_file *m, void *v, loff_t *pos)
> {
> - struct proc_maps_private *priv = m->private;
> - struct vm_area_struct *vma = v;
> - struct vm_area_struct *tail_vma = priv->tail_vma;
> -
> (*pos)++;
> - if (vma && (vma != tail_vma) && vma->vm_next)
> - return vma->vm_next;
> - vma_stop(priv, vma);
> - return (vma != tail_vma)? tail_vma: NULL;
> + return seek_vma_addr(m, v, pos);
> }
>
> static void m_stop(struct seq_file *m, void *v)
> {
> struct proc_maps_private *priv = m->private;
> - struct vm_area_struct *vma = v;
> -
> - vma_stop(priv, vma);
> + if (priv->mm) {
> + up_read(&priv->mm->mmap_sem);
> + mmput(priv->mm);
> + }
> if (priv->task)
> put_task_struct(priv->task);
> }
> @@ -220,6 +186,7 @@ static int do_maps_open(struct inode *in
> priv = kzalloc(sizeof(*priv), GFP_KERNEL);
> if (priv) {
> priv->pid = proc_pid(inode);
> + priv->batch_size = ~0;
> ret = seq_open(file, ops);
> if (!ret) {
> struct seq_file *m = file->private_data;
> @@ -291,8 +258,6 @@ static int show_map(struct seq_file *m,
> }
> seq_putc(m, '\n');
>
> - if (m->count < m->size) /* vma is copied successfully */
> - m->version = (vma != get_gate_vma(task))? vma->vm_start: 0;
> return 0;
> }

--
Mathematics is the supreme nostalgia of our time.

2007-08-17 02:55:11

by Wu Fengguang

[permalink] [raw]
Subject: Re: [PATCH 2/4] maps: address based vma walking

On Thu, Aug 16, 2007 at 09:16:17PM -0500, Matt Mackall wrote:
> On Fri, Aug 17, 2007 at 06:05:18AM +0800, Fengguang Wu wrote:
> > Split large vmas into page groups of proc_maps_private.batch_size bytes, and
> > iterate them one by one for seqfile->show. This allows us to export large scale
> > process address space information via the seqfile interface. The old behavior
> > of walking one vma at a time can be achieved by setting the batching size to
> > ~0UL.
> >
> > Cc: Matt Mackall <[email protected]>
> > Cc: Al Viro <[email protected]>
> > Signed-off-by: Fengguang Wu <[email protected]>
> > ---
> > fs/proc/task_mmu.c | 105 ++++++++++++--------------------------
> > include/linux/proc_fs.h | 6 +-
> > mm/mempolicy.c | 2
> > 3 files changed, 38 insertions(+), 75 deletions(-)
> >
> > --- linux-2.6.23-rc2-mm2.orig/include/linux/proc_fs.h
> > +++ linux-2.6.23-rc2-mm2/include/linux/proc_fs.h
> > @@ -283,9 +283,9 @@ static inline struct proc_dir_entry *PDE
> > struct proc_maps_private {
> > struct pid *pid;
> > struct task_struct *task;
> > -#ifdef CONFIG_MMU
> > - struct vm_area_struct *tail_vma;
> > -#endif
> > + struct mm_struct *mm;
> > + /* walk min(batch_size, remaining_size_of(vma)) bytes at a time */
> > + unsigned long batch_size;
> > };
> >
> > #endif /* _LINUX_PROC_FS_H */
> > --- linux-2.6.23-rc2-mm2.orig/mm/mempolicy.c
> > +++ linux-2.6.23-rc2-mm2/mm/mempolicy.c
> > @@ -1937,7 +1937,5 @@ out:
> > seq_putc(m, '\n');
> > kfree(md);
> >
> > - if (m->count < m->size)
> > - m->version = (vma != priv->tail_vma) ? vma->vm_start : 0;
> > return 0;
> > }
>
> What's this bit for?

This function is called by show_numa_map_checked(), which in turn also
uses m_start/m_next/m_stop. m->version used to store start address of
vmas, but now may also point to the middle of a vma.