This patchstack introduces a new "memtrack" module for tracking and accounting
memory exported to userspace as shared buffers, like dma-buf fds or GEM handles.
Any process holding a reference to these buffers will keep the kernel from
reclaiming its backing pages. mm counters don't provide a complete picture of
these allocations, since they only account for pages that are mapped into a
process's address space. This problem is especially bad for systems like
Android that use dma-buf fds to share graphics and multimedia buffers between
processes: these allocations are often large, have complex sharing patterns,
and are rarely mapped into every process that holds a reference to them.
memtrack maintains a per-process list of shared buffer references, which is
exported to userspace as /proc/[pid]/memtrack. Buffers can be optionally
"tagged" with a short string: for example, Android userspace would use this
tag to identify whether buffers were allocated on behalf of the camera stack,
GL, etc. memtrack also exports the VMAs associated with these buffers so
that pages already included in the process's mm counters aren't double-counted.
Shared-buffer allocators can hook into memtrack by embedding
struct memtrack_buffer in their buffer metadata, calling
memtrack_buffer_{init,remove} at buffer allocation and free time, and
memtrack_buffer_{install,uninstall} when a userspace process takes or
drops a reference to the buffer. For fd-backed buffers like dma-bufs, hooks in
fdtable.c and fork.c automatically notify memtrack when references are added or
removed from a process's fd table.
This patchstack adds memtrack hooks into dma-buf and ion. If there's upstream
interest in memtrack, it can be extended to other memory allocators as well,
such as GEM implementations.
Greg Hackmann (1):
drivers: staging: ion: add ION_IOC_TAG ioctl
Ruchi Kandoi (5):
fs: add installed and uninstalled file_operations
drivers: misc: add memtrack
dma-buf: add memtrack support
memtrack: Adds the accounting to keep track of all mmaped/unmapped
pages.
memtrack: Add memtrack accounting for forked processes.
drivers/android/binder.c | 4 +-
drivers/dma-buf/dma-buf.c | 37 +++
drivers/misc/Kconfig | 16 +
drivers/misc/Makefile | 1 +
drivers/misc/memtrack.c | 516 ++++++++++++++++++++++++++++++++
drivers/staging/android/ion/ion-ioctl.c | 17 ++
drivers/staging/android/ion/ion.c | 60 +++-
drivers/staging/android/ion/ion_priv.h | 2 +
drivers/staging/android/uapi/ion.h | 25 ++
fs/file.c | 38 ++-
fs/open.c | 2 +-
fs/proc/base.c | 4 +
include/linux/dma-buf.h | 5 +
include/linux/fdtable.h | 4 +-
include/linux/fs.h | 2 +
include/linux/memtrack.h | 130 ++++++++
include/linux/mm.h | 3 +
include/linux/sched.h | 3 +
kernel/fork.c | 23 +-
19 files changed, 875 insertions(+), 17 deletions(-)
create mode 100644 drivers/misc/memtrack.c
create mode 100644 include/linux/memtrack.h
--
2.8.0.rc3.226.g39d4020
These optional file_operations notify a file implementation when it is
installed or uninstalled from a task's fd table. This can be used for
accounting of file-backed shared resources like dma-buf.
This involves some changes to the __fd_install() and __close_fd() APIs
to actually pass along the responsible task_struct. These are low-level
APIs with only two in-tree callers, both adjusted in this patch.
Signed-off-by: Greg Hackmann <[email protected]>
Signed-off-by: Ruchi Kandoi <[email protected]>
---
drivers/android/binder.c | 4 ++--
fs/file.c | 38 +++++++++++++++++++++++++++++---------
fs/open.c | 2 +-
include/linux/fdtable.h | 4 ++--
include/linux/fs.h | 2 ++
5 files changed, 36 insertions(+), 14 deletions(-)
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 562af94..0bb174e 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -398,7 +398,7 @@ static void task_fd_install(
struct binder_proc *proc, unsigned int fd, struct file *file)
{
if (proc->files)
- __fd_install(proc->files, fd, file);
+ __fd_install(proc->tsk, fd, file);
}
/*
@@ -411,7 +411,7 @@ static long task_close_fd(struct binder_proc *proc, unsigned int fd)
if (proc->files == NULL)
return -ESRCH;
- retval = __close_fd(proc->files, fd);
+ retval = __close_fd(proc->tsk, fd);
/* can't restart close syscall because file table entry was cleared */
if (unlikely(retval == -ERESTARTSYS ||
retval == -ERESTARTNOINTR ||
diff --git a/fs/file.c b/fs/file.c
index 69d6990..19c5fad 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -282,6 +282,24 @@ static unsigned int count_open_files(struct fdtable *fdt)
return i;
}
+static inline void fdt_install(struct fdtable *fdt, int fd, struct file *file,
+ struct task_struct *task)
+{
+ if (file->f_op->installed)
+ file->f_op->installed(file, task);
+ rcu_assign_pointer(fdt->fd[fd], file);
+}
+
+static inline void fdt_uninstall(struct fdtable *fdt, int fd,
+ struct task_struct *task)
+{
+ struct file *old_file = fdt->fd[fd];
+
+ if (old_file->f_op->uninstalled)
+ old_file->f_op->uninstalled(old_file, task);
+ rcu_assign_pointer(fdt->fd[fd], NULL);
+}
+
/*
* Allocate a new files structure and copy contents from the
* passed in files structure.
@@ -543,7 +561,7 @@ int __alloc_fd(struct files_struct *files,
/* Sanity check */
if (rcu_access_pointer(fdt->fd[fd]) != NULL) {
printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd);
- rcu_assign_pointer(fdt->fd[fd], NULL);
+ fdt_uninstall(fdt, fd, current);
}
#endif
@@ -601,10 +619,11 @@ EXPORT_SYMBOL(put_unused_fd);
* fd_install() instead.
*/
-void __fd_install(struct files_struct *files, unsigned int fd,
+void __fd_install(struct task_struct *task, unsigned int fd,
struct file *file)
{
struct fdtable *fdt;
+ struct files_struct *files = task->files;
might_sleep();
rcu_read_lock_sched();
@@ -618,13 +637,13 @@ void __fd_install(struct files_struct *files, unsigned int fd,
smp_rmb();
fdt = rcu_dereference_sched(files->fdt);
BUG_ON(fdt->fd[fd] != NULL);
- rcu_assign_pointer(fdt->fd[fd], file);
+ fdt_install(fdt, fd, file, task);
rcu_read_unlock_sched();
}
void fd_install(unsigned int fd, struct file *file)
{
- __fd_install(current->files, fd, file);
+ __fd_install(current, fd, file);
}
EXPORT_SYMBOL(fd_install);
@@ -632,10 +651,11 @@ EXPORT_SYMBOL(fd_install);
/*
* The same warnings as for __alloc_fd()/__fd_install() apply here...
*/
-int __close_fd(struct files_struct *files, unsigned fd)
+int __close_fd(struct task_struct *task, unsigned fd)
{
struct file *file;
struct fdtable *fdt;
+ struct files_struct *files = task->files;
spin_lock(&files->file_lock);
fdt = files_fdtable(files);
@@ -644,7 +664,7 @@ int __close_fd(struct files_struct *files, unsigned fd)
file = fdt->fd[fd];
if (!file)
goto out_unlock;
- rcu_assign_pointer(fdt->fd[fd], NULL);
+ fdt_uninstall(fdt, fd, task);
__clear_close_on_exec(fd, fdt);
__put_unused_fd(files, fd);
spin_unlock(&files->file_lock);
@@ -679,7 +699,7 @@ void do_close_on_exec(struct files_struct *files)
file = fdt->fd[fd];
if (!file)
continue;
- rcu_assign_pointer(fdt->fd[fd], NULL);
+ fdt_uninstall(fdt, fd, current);
__put_unused_fd(files, fd);
spin_unlock(&files->file_lock);
filp_close(file, files);
@@ -846,7 +866,7 @@ __releases(&files->file_lock)
if (!tofree && fd_is_open(fd, fdt))
goto Ebusy;
get_file(file);
- rcu_assign_pointer(fdt->fd[fd], file);
+ fdt_install(fdt, fd, file, current);
__set_open_fd(fd, fdt);
if (flags & O_CLOEXEC)
__set_close_on_exec(fd, fdt);
@@ -870,7 +890,7 @@ int replace_fd(unsigned fd, struct file *file, unsigned flags)
struct files_struct *files = current->files;
if (!file)
- return __close_fd(files, fd);
+ return __close_fd(current, fd);
if (fd >= rlimit(RLIMIT_NOFILE))
return -EBADF;
diff --git a/fs/open.c b/fs/open.c
index 8aeb08b..0f1db76 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1120,7 +1120,7 @@ EXPORT_SYMBOL(filp_close);
*/
SYSCALL_DEFINE1(close, unsigned int, fd)
{
- int retval = __close_fd(current->files, fd);
+ int retval = __close_fd(current, fd);
/* can't restart close syscall because file table entry was cleared */
if (unlikely(retval == -ERESTARTSYS ||
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index aca2a6a..a45fce3 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -113,9 +113,9 @@ int iterate_fd(struct files_struct *, unsigned,
extern int __alloc_fd(struct files_struct *files,
unsigned start, unsigned end, unsigned flags);
-extern void __fd_install(struct files_struct *files,
+extern void __fd_install(struct task_struct *task,
unsigned int fd, struct file *file);
-extern int __close_fd(struct files_struct *files,
+extern int __close_fd(struct task_struct *task,
unsigned int fd);
extern struct kmem_cache *files_cachep;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c145219..d62bce8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1730,6 +1730,8 @@ struct file_operations {
u64);
ssize_t (*dedupe_file_range)(struct file *, u64, u64, struct file *,
u64);
+ void (*installed)(struct file *file, struct task_struct *task);
+ void (*uninstalled)(struct file *file, struct task_struct *task);
};
struct inode_operations {
--
2.8.0.rc3.226.g39d4020
From: Greg Hackmann <[email protected]>
ION_IOC_TAG provides a userspace interface for tagging buffers with
their memtrack usage after allocation.
Signed-off-by: Ruchi Kandoi <[email protected]>
---
drivers/staging/android/ion/ion-ioctl.c | 17 +++++++++++++++++
drivers/staging/android/uapi/ion.h | 25 +++++++++++++++++++++++++
2 files changed, 42 insertions(+)
diff --git a/drivers/staging/android/ion/ion-ioctl.c b/drivers/staging/android/ion/ion-ioctl.c
index 7e7431d..8745a85 100644
--- a/drivers/staging/android/ion/ion-ioctl.c
+++ b/drivers/staging/android/ion/ion-ioctl.c
@@ -28,6 +28,7 @@ union ion_ioctl_arg {
struct ion_handle_data handle;
struct ion_custom_data custom;
struct ion_heap_query query;
+ struct ion_tag_data tag;
};
static int validate_ioctl_arg(unsigned int cmd, union ion_ioctl_arg *arg)
@@ -162,6 +163,22 @@ long ion_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
case ION_IOC_HEAP_QUERY:
ret = ion_query_heaps(client, &data.query);
break;
+ case ION_IOC_TAG:
+ {
+#ifdef CONFIG_MEMTRACK
+ struct ion_handle *handle;
+
+ handle = ion_handle_get_by_id(client, data.tag.handle);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+ data.tag.tag[sizeof(data.tag.tag) - 1] = 0;
+ memtrack_buffer_set_tag(&handle->buffer->memtrack_buffer,
+ data.tag.tag);
+#else
+ ret = -ENOTTY;
+#endif
+ break;
+ }
default:
return -ENOTTY;
}
diff --git a/drivers/staging/android/uapi/ion.h b/drivers/staging/android/uapi/ion.h
index 14cd873..4c26196 100644
--- a/drivers/staging/android/uapi/ion.h
+++ b/drivers/staging/android/uapi/ion.h
@@ -115,6 +115,22 @@ struct ion_handle_data {
ion_user_handle_t handle;
};
+#define ION_MAX_TAG_LEN 32
+
+/**
+ * struct ion_fd_data - metadata passed from userspace for a handle
+ * @handle: a handle
+ * @tag: a string describing the buffer
+ *
+ * For ION_IOC_TAG userspace populates the handle field with
+ * the handle returned from ion alloc and type contains the memtrack_type which
+ * accurately describes the usage for the memory.
+ */
+struct ion_tag_data {
+ ion_user_handle_t handle;
+ char tag[ION_MAX_TAG_LEN];
+};
+
/**
* struct ion_custom_data - metadata passed to/from userspace for a custom ioctl
* @cmd: the custom ioctl function to call
@@ -217,6 +233,15 @@ struct ion_heap_query {
#define ION_IOC_SYNC _IOWR(ION_IOC_MAGIC, 7, struct ion_fd_data)
/**
+ * DOC: ION_IOC_TAG - adds a memtrack descriptor tag to memory
+ *
+ * Takes an ion_tag_data struct with the type field populated with a
+ * memtrack_type and handle populated with a valid opaque handle. The
+ * memtrack_type should accurately define the usage for the memory.
+ */
+#define ION_IOC_TAG _IOWR(ION_IOC_MAGIC, 8, struct ion_tag_data)
+
+/**
* DOC: ION_IOC_CUSTOM - call architecture specific ion ioctl
*
* Takes the argument of the architecture specific ioctl to call and
--
2.8.0.rc3.226.g39d4020
When a process is forked, all the buffers are shared with the forked
process too. Adds the functionality to add memtrack accounting for the
forked processes.
Forked process gets a copy of the mapped pages of the parent process.
This patch makes sure that the new mapped pages are attributed to the
child process instead of the parent.
Signed-off-by: Ruchi Kandoi <[email protected]>
---
drivers/misc/memtrack.c | 45 +++++++++++++++++++++++++++++++++++----
drivers/staging/android/ion/ion.c | 45 +++++++++++++++++++++++++++++++++++++--
include/linux/memtrack.h | 19 +++++++++++------
include/linux/mm.h | 3 +++
kernel/fork.c | 19 +++++++++++++++--
5 files changed, 117 insertions(+), 14 deletions(-)
diff --git a/drivers/misc/memtrack.c b/drivers/misc/memtrack.c
index 4b2d17f..fa2601a 100644
--- a/drivers/misc/memtrack.c
+++ b/drivers/misc/memtrack.c
@@ -204,12 +204,13 @@ EXPORT_SYMBOL(memtrack_buffer_uninstall);
* @buffer: the buffer's memtrack entry
*
* @vma: vma being opened
+ * @task: task which mapped the pages
*/
void memtrack_buffer_vm_open(struct memtrack_buffer *buffer,
- const struct vm_area_struct *vma)
+ const struct vm_area_struct *vma, struct task_struct *task)
{
unsigned long flags;
- struct task_struct *leader = current->group_leader;
+ struct task_struct *leader = task->group_leader;
struct memtrack_vma_list *vma_list;
vma_list = kmalloc(sizeof(*vma_list), GFP_KERNEL);
@@ -228,12 +229,13 @@ EXPORT_SYMBOL(memtrack_buffer_vm_open);
*
* @buffer: the buffer's memtrack entry
* @vma: the vma being closed
+ * @task: task that mmaped the pages
*/
void memtrack_buffer_vm_close(struct memtrack_buffer *buffer,
- const struct vm_area_struct *vma)
+ const struct vm_area_struct *vma, struct task_struct *task)
{
unsigned long flags;
- struct task_struct *leader = current->group_leader;
+ struct task_struct *leader = task->group_leader;
write_lock_irqsave(&leader->memtrack_lock, flags);
memtrack_buffer_vm_close_locked(&leader->memtrack_rb, buffer, vma);
@@ -241,6 +243,41 @@ void memtrack_buffer_vm_close(struct memtrack_buffer *buffer,
}
EXPORT_SYMBOL(memtrack_buffer_vm_close);
+/**
+ * memtrack_buffer_install_fork - Install all parent's handles into
+ * child.
+ *
+ * @parent: parent task
+ * @child: child task
+ */
+void memtrack_buffer_install_fork(struct task_struct *parent,
+ struct task_struct *child)
+{
+ struct task_struct *leader, *leader_child;
+ struct rb_root *root;
+ struct rb_node *node;
+ unsigned long flags;
+
+ if (!child || !parent)
+ return;
+
+ leader = parent->group_leader;
+ leader_child = child->group_leader;
+ write_lock_irqsave(&leader->memtrack_lock, flags);
+ root = &leader->memtrack_rb;
+ node = rb_first(root);
+ while (node) {
+ struct memtrack_handle *handle;
+
+ handle = rb_entry(node, struct memtrack_handle, node);
+ memtrack_buffer_install_locked(&leader_child->memtrack_rb,
+ handle->buffer);
+ node = rb_next(node);
+ }
+ write_unlock_irqrestore(&leader->memtrack_lock, flags);
+}
+EXPORT_SYMBOL(memtrack_buffer_install_fork);
+
static int memtrack_id_alloc(struct memtrack_buffer *buffer)
{
int ret;
diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c
index c32d520..451aa0f 100644
--- a/drivers/staging/android/ion/ion.c
+++ b/drivers/staging/android/ion/ion.c
@@ -906,7 +906,7 @@ static void ion_vm_open(struct vm_area_struct *vma)
list_add(&vma_list->list, &buffer->vmas);
mutex_unlock(&buffer->lock);
pr_debug("%s: adding %p\n", __func__, vma);
- memtrack_buffer_vm_open(&buffer->memtrack_buffer, vma);
+ memtrack_buffer_vm_open(&buffer->memtrack_buffer, vma, current);
}
static void ion_vm_close(struct vm_area_struct *vma)
@@ -925,13 +925,51 @@ static void ion_vm_close(struct vm_area_struct *vma)
break;
}
mutex_unlock(&buffer->lock);
- memtrack_buffer_vm_close(&buffer->memtrack_buffer, vma);
+ memtrack_buffer_vm_close(&buffer->memtrack_buffer, vma, current);
+}
+
+void vm_track(struct vm_area_struct *vma, struct task_struct *task)
+{
+ struct ion_buffer *buffer = vma->vm_private_data;
+
+ memtrack_buffer_vm_open(&buffer->memtrack_buffer, vma, task);
+}
+
+void vm_untrack(struct vm_area_struct *vma, struct task_struct *task)
+{
+ struct ion_buffer *buffer = vma->vm_private_data;
+
+ memtrack_buffer_vm_close(&buffer->memtrack_buffer, vma, task);
}
static const struct vm_operations_struct ion_vma_ops = {
.open = ion_vm_open,
.close = ion_vm_close,
.fault = ion_vm_fault,
+ .track = vm_track,
+ .untrack = vm_untrack,
+};
+
+static void memtrack_vm_close(struct vm_area_struct *vma)
+{
+ struct ion_buffer *buffer = vma->vm_private_data;
+
+ memtrack_buffer_vm_close(&buffer->memtrack_buffer, vma, current);
+}
+
+static void memtrack_vm_open(struct vm_area_struct *vma)
+{
+ struct ion_buffer *buffer = vma->vm_private_data;
+
+ memtrack_buffer_vm_open(&buffer->memtrack_buffer, vma, current);
+}
+
+static struct vm_operations_struct memtrack_vma_ops = {
+ .open = memtrack_vm_open,
+ .close = memtrack_vm_close,
+ .fault = NULL,
+ .track = vm_track,
+ .untrack = vm_untrack,
};
static int ion_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
@@ -952,6 +990,9 @@ static int ion_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
vma->vm_ops = &ion_vma_ops;
ion_vm_open(vma);
return 0;
+ } else {
+ vma->vm_private_data = buffer;
+ vma->vm_ops = &memtrack_vma_ops;
}
if (!(buffer->flags & ION_FLAG_CACHED))
diff --git a/include/linux/memtrack.h b/include/linux/memtrack.h
index 5a4c7ea..4595fb0 100644
--- a/include/linux/memtrack.h
+++ b/include/linux/memtrack.h
@@ -41,10 +41,12 @@ void memtrack_buffer_install(struct memtrack_buffer *buffer,
struct task_struct *tsk);
void memtrack_buffer_uninstall(struct memtrack_buffer *buffer,
struct task_struct *tsk);
+void memtrack_buffer_install_fork(struct task_struct *parent,
+ struct task_struct *child);
void memtrack_buffer_vm_open(struct memtrack_buffer *buffer,
- const struct vm_area_struct *vma);
+ const struct vm_area_struct *vma, struct task_struct *task);
void memtrack_buffer_vm_close(struct memtrack_buffer *buffer,
- const struct vm_area_struct *vma);
+ const struct vm_area_struct *vma, struct task_struct *task);
/**
* memtrack_buffer_set_tag - add a descriptive tag to a memtrack entry
@@ -90,6 +92,11 @@ static inline void memtrack_buffer_uninstall(struct memtrack_buffer *buffer,
{
}
+static inline void memtrack_buffer_install_fork(struct task_struct *parent,
+ struct task_struct *child)
+{
+}
+
static inline int memtrack_buffer_set_tag(struct memtrack_buffer *buffer,
const char *tag)
{
@@ -97,12 +104,12 @@ static inline int memtrack_buffer_set_tag(struct memtrack_buffer *buffer,
}
static inline void memtrack_buffer_vm_open(struct memtrack_buffer *buffer,
- const struct vm_area_struct *vma)
+ const struct vm_area_struct *vma, struct task_struct *task)
{
}
static inline void memtrack_buffer_vm_close(struct memtrack_buffer *buffer,
- const struct vm_area_struct *vma)
+ const struct vm_area_struct *vma, struct task_struct *task)
{
}
#endif /* CONFIG_MEMTRACK */
@@ -115,9 +122,9 @@ static inline void memtrack_buffer_vm_close(struct memtrack_buffer *buffer,
* @vma: the vma passed to mmap()
*/
static inline void memtrack_buffer_mmap(struct memtrack_buffer *buffer,
- const struct vm_area_struct *vma)
+ struct vm_area_struct *vma)
{
- memtrack_buffer_vm_open(buffer, vma);
+ memtrack_buffer_vm_open(buffer, vma, current);
}
#endif /* _MEMTRACK_ */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index e9caec6..619ea7f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -402,6 +402,9 @@ struct vm_operations_struct {
*/
struct page *(*find_special_page)(struct vm_area_struct *vma,
unsigned long addr);
+
+ void (*track)(struct vm_area_struct *vma, struct task_struct *task);
+ void (*untrack)(struct vm_area_struct *vma, struct task_struct *task);
};
struct mmu_gather;
diff --git a/kernel/fork.c b/kernel/fork.c
index da8537a..43a2e73 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -76,6 +76,7 @@
#include <linux/compiler.h>
#include <linux/sysctl.h>
#include <linux/kcov.h>
+#include <linux/memtrack.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -547,7 +548,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
}
#ifdef CONFIG_MMU
-static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
+static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm,
+ struct task_struct *tsk)
{
struct vm_area_struct *mpnt, *tmp, *prev, **pprev;
struct rb_node **rb_link, *rb_parent;
@@ -660,6 +662,11 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
if (tmp->vm_ops && tmp->vm_ops->open)
tmp->vm_ops->open(tmp);
+ if (tmp->vm_ops && tmp->vm_ops->track && tmp->vm_ops->untrack) {
+ tmp->vm_ops->untrack(tmp, current);
+ tmp->vm_ops->track(tmp, tsk);
+ }
+
if (retval)
goto out;
}
@@ -1125,7 +1132,7 @@ static struct mm_struct *dup_mm(struct task_struct *tsk)
if (!mm_init(mm, tsk))
goto fail_nomem;
- err = dup_mmap(mm, oldmm);
+ err = dup_mmap(mm, oldmm, tsk);
if (err)
goto free_pt;
@@ -1235,6 +1242,12 @@ static int copy_files(unsigned long clone_flags, struct task_struct *tsk)
tsk->files = newf;
error = 0;
+#ifdef CONFIG_MEMTRACK
+ if (!(clone_flags & CLONE_THREAD)) {
+ tsk->group_leader = tsk;
+ memtrack_buffer_install_fork(current, tsk);
+ }
+#endif
out:
return error;
}
@@ -2153,6 +2166,8 @@ static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp
*new_fdp = dup_fd(fd, &error);
if (!*new_fdp)
return error;
+
+ memtrack_buffer_install_fork(current->parent, current);
}
return 0;
--
2.8.0.rc3.226.g39d4020
Shared-buffer allocators like ion or GEM traditionally call into CMA or
alloc_pages() to get backing memory, meaning these allocations will not
show up in any process's mm counters. But since these allocations are
often used for things like graphics buffers that can be extremely large,
the user just sees a bunch of pages vanishing from the system without an
explanation.
CONFIG_MEMTRACK adds infrastructure for "blaming" these allocations back
to the processes currently holding a reference to the shared buffer.
This information is exposed to userspace through /proc/[pid]/memtrack.
To use memtrack, the shared memory allocator should:
(1) Embed a struct memtrack_buffer somewhere in the underlying buffer's
metadata, and initialize it with memtrack_buffer_init()
(3) Call memtrack_buffer_{install,uninstall} each time a task takes or
drops a reference to the shared buffer
(3) Call memtrack_buffer_remove() before destroying a tracked buffer
CONFIG_MEMTRACK_DEBUG adds a global list of all buffers tracked by
memtrack, accessible through /sys/kernel/debug/memtrack. This involves
maintaining a global idr of buffers. Due to the extra overhead,
CONFIG_MEMTRACK_DEBUG is intended for debugging memory leaks rather than
production use.
Signed-off-by: Greg Hackmann <[email protected]>
Signed-off-by: Ruchi Kandoi <[email protected]>
---
drivers/misc/Kconfig | 16 +++
drivers/misc/Makefile | 1 +
drivers/misc/memtrack.c | 360 +++++++++++++++++++++++++++++++++++++++++++++++
fs/proc/base.c | 4 +
include/linux/memtrack.h | 94 +++++++++++++
include/linux/sched.h | 3 +
kernel/fork.c | 4 +
7 files changed, 482 insertions(+)
create mode 100644 drivers/misc/memtrack.c
create mode 100644 include/linux/memtrack.h
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 64971ba..7557fb1 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -766,6 +766,22 @@ config PANEL_BOOT_MESSAGE
An empty message will only clear the display at driver init time. Any other
printf()-formatted message is valid with newline and escape codes.
+config MEMTRACK
+ tristate "Per-pid memory statistics"
+ default n
+ ---help---
+ Keeps track of shared buffers allocated by the process and
+ exports them via /proc/<pid>/memtrack.
+
+config MEMTRACK_DEBUG
+ tristate "Per-pid memory statistics debug option"
+ depends on MEMTRACK && DEBUG_FS
+ default n
+ ---help---
+ Keeps track of all shared buffers allocated and exports the list
+ via /sys/kernel/debug/memtrack.
+
+ source "drivers/misc/c2port/Kconfig"
source "drivers/misc/c2port/Kconfig"
source "drivers/misc/eeprom/Kconfig"
source "drivers/misc/cb710/Kconfig"
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index 3198336..1fbb084 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -68,3 +68,4 @@ OBJCOPYFLAGS_lkdtm_rodata_objcopy.o := \
targets += lkdtm_rodata.o lkdtm_rodata_objcopy.o
$(obj)/lkdtm_rodata_objcopy.o: $(obj)/lkdtm_rodata.o FORCE
$(call if_changed,objcopy)
+obj-$(CONFIG_MEMTRACK) += memtrack.o
diff --git a/drivers/misc/memtrack.c b/drivers/misc/memtrack.c
new file mode 100644
index 0000000..e5c7e03
--- /dev/null
+++ b/drivers/misc/memtrack.c
@@ -0,0 +1,360 @@
+/* drivers/misc/memtrack.c
+ *
+ * Copyright (C) 2016 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/debugfs.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/idr.h>
+#include <linux/init.h>
+#include <linux/memtrack.h>
+#include <linux/profile.h>
+#include <linux/rbtree.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+
+struct memtrack_handle {
+ struct memtrack_buffer *buffer;
+ struct rb_node node;
+ struct rb_root *root;
+ struct kref refcount;
+};
+
+static struct kmem_cache *memtrack_handle_cache;
+
+static DEFINE_MUTEX(memtrack_id_lock);
+#if IS_ENABLED(CONFIG_MEMTRACK_DEBUG)
+static struct dentry *debugfs_file;
+static DEFINE_IDR(mem_idr);
+#else
+static DEFINE_IDA(mem_ida);
+#endif
+
+static void memtrack_buffer_install_locked(struct rb_root *root,
+ struct memtrack_buffer *buffer)
+{
+ struct rb_node **new = &root->rb_node, *parent = NULL;
+ struct memtrack_handle *handle;
+
+ while (*new) {
+ struct rb_node *node = *new;
+
+ handle = rb_entry(node, struct memtrack_handle, node);
+ parent = node;
+ if (handle->buffer->id > buffer->id) {
+ new = &node->rb_left;
+ } else if (handle->buffer->id < buffer->id) {
+ new = &node->rb_right;
+ } else {
+ kref_get(&handle->refcount);
+ return;
+ }
+ }
+
+ handle = kmem_cache_alloc(memtrack_handle_cache, GFP_KERNEL);
+ if (!handle)
+ return;
+
+ handle->buffer = buffer;
+ handle->root = root;
+ kref_init(&handle->refcount);
+
+ rb_link_node(&handle->node, parent, new);
+ rb_insert_color(&handle->node, root);
+ atomic_inc(&handle->buffer->userspace_handles);
+}
+
+/**
+ * memtrack_buffer_install - add a userspace reference to a shared buffer
+ *
+ * @buffer: the buffer's memtrack entry
+ * @tsk: the userspace task that took the reference
+ *
+ * This is normally called while creating a userspace handle (fd, etc.) to
+ * @buffer.
+ */
+void memtrack_buffer_install(struct memtrack_buffer *buffer,
+ struct task_struct *tsk)
+{
+ struct task_struct *leader;
+ unsigned long flags;
+
+ if (!buffer || !tsk)
+ return;
+
+ leader = tsk->group_leader;
+ write_lock_irqsave(&leader->memtrack_lock, flags);
+ memtrack_buffer_install_locked(&leader->memtrack_rb, buffer);
+ write_unlock_irqrestore(&leader->memtrack_lock, flags);
+}
+EXPORT_SYMBOL(memtrack_buffer_install);
+
+static void memtrack_handle_destroy(struct kref *ref)
+{
+ struct memtrack_handle *handle;
+
+ handle = container_of(ref, struct memtrack_handle, refcount);
+ rb_erase(&handle->node, handle->root);
+ atomic_dec(&handle->buffer->userspace_handles);
+ kmem_cache_free(memtrack_handle_cache, handle);
+}
+
+static void memtrack_buffer_uninstall_locked(struct rb_root *root,
+ struct memtrack_buffer *buffer)
+{
+ struct rb_node *node = root->rb_node;
+
+ while (node) {
+ struct memtrack_handle *handle = rb_entry(node,
+ struct memtrack_handle, node);
+
+ if (handle->buffer->id > buffer->id) {
+ node = node->rb_left;
+ } else if (handle->buffer->id < buffer->id) {
+ node = node->rb_right;
+ } else {
+ kref_put(&handle->refcount, memtrack_handle_destroy);
+ return;
+ }
+ }
+}
+
+/**
+ * memtrack_buffer_uninstall - drop a userspace reference to a shared buffer
+ *
+ * @buffer: the buffer's memtrack entry
+ * @tsk: the userspace task that dropped the reference
+ *
+ * This is normally called while tearing down a userspace handle to @buffer.
+ */
+void memtrack_buffer_uninstall(struct memtrack_buffer *buffer,
+ struct task_struct *tsk)
+{
+ struct task_struct *leader;
+ unsigned long flags;
+
+ if (!buffer || !tsk)
+ return;
+
+ leader = tsk->group_leader;
+ write_lock_irqsave(&leader->memtrack_lock, flags);
+ memtrack_buffer_uninstall_locked(&leader->memtrack_rb, buffer);
+ write_unlock_irqrestore(&leader->memtrack_lock, flags);
+}
+EXPORT_SYMBOL(memtrack_buffer_uninstall);
+
+static int memtrack_id_alloc(struct memtrack_buffer *buffer)
+{
+ int ret;
+
+ mutex_lock(&memtrack_id_lock);
+#if IS_ENABLED(CONFIG_MEMTRACK_DEBUG)
+ ret = idr_alloc(&mem_idr, buffer, 0, 0, GFP_KERNEL);
+#else
+ ret = ida_simple_get(&mem_ida, 0, 0, GFP_KERNEL);
+#endif
+ mutex_unlock(&memtrack_id_lock);
+
+ return ret;
+}
+
+static void memtrack_id_free(struct memtrack_buffer *buffer)
+{
+ mutex_lock(&memtrack_id_lock);
+#if IS_ENABLED(CONFIG_MEMTRACK_DEBUG)
+ idr_remove(&mem_idr, buffer->id);
+#else
+ ida_simple_remove(&mem_ida, buffer->id);
+#endif
+ mutex_unlock(&memtrack_id_lock);
+}
+
+/**
+ * memtrack_buffer_remove - deinitialize a memtrack entry
+ *
+ * @buffer: the memtrack entry to deinitialize
+ *
+ * This is normally called just before freeing the pages backing @buffer.
+ */
+void memtrack_buffer_remove(struct memtrack_buffer *buffer)
+{
+ if (!buffer)
+ return;
+
+ if (WARN_ON(atomic_read(&buffer->userspace_handles)))
+ return;
+
+ kfree(buffer->tag);
+ memtrack_id_free(buffer);
+}
+EXPORT_SYMBOL(memtrack_buffer_remove);
+
+/**
+ * memtrack_buffer_init - initialize a memtrack entry for a shared buffer
+ *
+ * @buffer: the memtrack entry to initialize
+ * @size: the size of the shared buffer
+ *
+ * This is normally called just after allocating the buffer's backing pages.
+ *
+ * There must be a 1-to-1 mapping between buffers and
+ * struct memtrack_buffers. That is, memtrack_buffer_init() should be called
+ * only *once* for a given buffer, even if it's exported to
+ * userspace in multiple forms (e.g., simultaneously as a dma-buf fd and a
+ * GEM handle).
+ *
+ * Return 0 on success or a negative error code on failure.
+ */
+int memtrack_buffer_init(struct memtrack_buffer *buffer, size_t size)
+{
+ if (!buffer)
+ return -EINVAL;
+
+ memset(buffer, 0, sizeof(*buffer));
+
+ buffer->id = memtrack_id_alloc(buffer);
+ if (buffer->id < 0) {
+ pr_err("%s: Error allocating unique identifier\n", __func__);
+ return buffer->id;
+ }
+
+ buffer->size = size;
+ atomic_set(&buffer->userspace_handles, 0);
+#if IS_ENABLED(CONFIG_MEMTRACK_DEBUG)
+ buffer->pid = current->group_leader->pid;
+#endif
+ return 0;
+}
+EXPORT_SYMBOL(memtrack_buffer_init);
+
+static int process_notifier(struct notifier_block *self,
+ unsigned long cmd, void *v)
+{
+ struct task_struct *task = v, *leader;
+ struct rb_root *root;
+ struct rb_node *node;
+ unsigned long flags;
+
+ if (!task)
+ return NOTIFY_OK;
+
+ leader = task->group_leader;
+ write_lock_irqsave(&leader->memtrack_lock, flags);
+ root = &leader->memtrack_rb;
+ node = rb_first(root);
+ while (node) {
+ struct memtrack_handle *handle;
+
+ handle = rb_entry(node, struct memtrack_handle, node);
+ rb_erase(&handle->node, handle->root);
+ atomic_dec(&handle->buffer->userspace_handles);
+ kmem_cache_free(memtrack_handle_cache, handle);
+
+ node = rb_next(node);
+ }
+ write_unlock_irqrestore(&leader->memtrack_lock, flags);
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block process_notifier_block = {
+ .notifier_call = process_notifier,
+};
+
+int proc_memtrack(struct seq_file *m, struct pid_namespace *ns, struct pid *pid,
+ struct task_struct *task)
+{
+ struct rb_node *node;
+ unsigned long flags;
+
+ read_lock_irqsave(&task->memtrack_lock, flags);
+ if (RB_EMPTY_ROOT(&task->memtrack_rb))
+ goto done;
+
+ seq_printf(m, "%10.10s: %16.16s: %12.12s: %3.3s: pid:%d\n",
+ "ref_count", "Identifier", "size", "tag", task->pid);
+
+ for (node = rb_first(&task->memtrack_rb); node; node = rb_next(node)) {
+ struct memtrack_handle *handle = rb_entry(node,
+ struct memtrack_handle, node);
+ struct memtrack_buffer *buffer = handle->buffer;
+
+ seq_printf(m, "%10d %16d %12zu %s\n",
+ atomic_read(&buffer->userspace_handles),
+ buffer->id, buffer->size,
+ buffer->tag ? buffer->tag : "");
+ }
+
+done:
+ read_unlock_irqrestore(&task->memtrack_lock, flags);
+ return 0;
+}
+
+#if IS_ENABLED(CONFIG_MEMTRACK_DEBUG)
+static int memtrack_show(struct seq_file *m, void *v)
+{
+ struct memtrack_buffer *buffer;
+ int i;
+
+ seq_printf(m, "%4.4s %12.12s %10s %12.12s %3.3s\n", "pid",
+ "buffer_size", "ref", "Identifier", "tag");
+
+ rcu_read_lock();
+ idr_for_each_entry(&mem_idr, buffer, i)
+ seq_printf(m, "%4d %12zu %10d %12d %s\n", buffer->pid,
+ buffer->size,
+ atomic_read(&buffer->userspace_handles),
+ buffer->id, buffer->tag ? buffer->tag : "");
+ rcu_read_unlock();
+ return 0;
+}
+
+static int memtrack_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, memtrack_show, inode->i_private);
+}
+
+static const struct file_operations memtrack_fops = {
+ .open = memtrack_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+#endif
+
+
+static int __init memtrack_init(void)
+{
+ memtrack_handle_cache = KMEM_CACHE(memtrack_handle, SLAB_HWCACHE_ALIGN);
+ if (!memtrack_handle_cache)
+ return -ENOMEM;
+
+#if IS_ENABLED(CONFIG_MEMTRACK_DEBUG)
+ debugfs_file = debugfs_create_file("memtrack", S_IRUGO, NULL, NULL,
+ &memtrack_fops);
+#endif
+
+ profile_event_register(PROFILE_TASK_EXIT, &process_notifier_block);
+ return 0;
+}
+late_initcall(memtrack_init);
+
+static void __exit memtrack_exit(void)
+{
+ kmem_cache_destroy(memtrack_handle_cache);
+#if IS_ENABLED(CONFIG_MEMTRACK_DEBUG)
+ debugfs_remove(debugfs_file);
+#endif
+ profile_event_unregister(PROFILE_TASK_EXIT, &process_notifier_block);
+}
+__exitcall(memtrack_exit);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index c2964d8..5ed9d90 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -87,6 +87,7 @@
#include <linux/slab.h>
#include <linux/flex_array.h>
#include <linux/posix-timers.h>
+#include <linux/memtrack.h>
#ifdef CONFIG_HARDWALL
#include <asm/hardwall.h>
#endif
@@ -2932,6 +2933,9 @@ static const struct pid_entry tgid_base_stuff[] = {
REG("timers", S_IRUGO, proc_timers_operations),
#endif
REG("timerslack_ns", S_IRUGO|S_IWUGO, proc_pid_set_timerslack_ns_operations),
+#ifdef CONFIG_MEMTRACK
+ ONE("memtrack", S_IRUGO, proc_memtrack),
+#endif
};
static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
diff --git a/include/linux/memtrack.h b/include/linux/memtrack.h
new file mode 100644
index 0000000..f73be07
--- /dev/null
+++ b/include/linux/memtrack.h
@@ -0,0 +1,94 @@
+/* include/linux/memtrack.h
+ *
+ * Copyright (C) 2016 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _MEMTRACK_
+#define _MEMTRACK_
+
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+#ifdef CONFIG_MEMTRACK
+struct memtrack_buffer {
+ size_t size;
+ atomic_t userspace_handles;
+ int id;
+ const char *tag;
+#ifdef CONFIG_MEMTRACK_DEBUG
+ pid_t pid;
+#endif
+};
+
+int proc_memtrack(struct seq_file *m, struct pid_namespace *ns, struct pid *pid,
+ struct task_struct *task);
+int memtrack_buffer_init(struct memtrack_buffer *buffer, size_t size);
+void memtrack_buffer_remove(struct memtrack_buffer *buffer);
+void memtrack_buffer_install(struct memtrack_buffer *buffer,
+ struct task_struct *tsk);
+void memtrack_buffer_uninstall(struct memtrack_buffer *buffer,
+ struct task_struct *tsk);
+
+/**
+ * memtrack_buffer_set_tag - add a descriptive tag to a memtrack entry
+ *
+ * @buffer: the memtrack entry to tag
+ * @tag: a string describing the buffer
+ *
+ * The tag is optional and provided only as information to userspace. It has
+ * no special meaning in the kernel.
+ */
+static inline int memtrack_buffer_set_tag(struct memtrack_buffer *buffer,
+ const char *tag)
+{
+ const char *d = kstrdup(tag, GFP_KERNEL);
+
+ if (!d)
+ return -ENOMEM;
+
+ kfree(buffer->tag);
+ buffer->tag = d;
+ return 0;
+}
+#else
+struct memtrack_buffer { };
+
+static inline int memtrack_buffer_init(struct memtrack_buffer *buffer,
+ size_t size)
+{
+ return -ENOENT;
+}
+
+static inline void memtrack_buffer_remove(struct memtrack_buffer *buffer)
+{
+}
+
+static inline void memtrack_buffer_install(struct memtrack_buffer *buffer,
+ struct task_struct *tsk)
+{
+}
+
+static inline void memtrack_buffer_uninstall(struct memtrack_buffer *buffer,
+ struct task_struct *tsk)
+{
+}
+
+static inline int memtrack_buffer_set_tag(struct memtrack_buffer *buffer,
+ const char *tag)
+{
+ return -ENOENT;
+}
+
+#endif /* CONFIG_MEMTRACK */
+#endif /* _MEMTRACK_ */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 348f51b..995a94d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1954,6 +1954,9 @@ struct task_struct {
#ifdef CONFIG_THREAD_INFO_IN_TASK
/* A live task holds one reference. */
atomic_t stack_refcount;
+#ifdef CONFIG_MEMTRACK
+ struct rb_root memtrack_rb;
+ rwlock_t memtrack_lock;
#endif
/* CPU-specific state of this task */
struct thread_struct thread;
diff --git a/kernel/fork.c b/kernel/fork.c
index 6d42242..da8537a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1615,6 +1615,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->sequential_io = 0;
p->sequential_io_avg = 0;
#endif
+#ifdef CONFIG_MEMTRACK
+ p->memtrack_rb = RB_ROOT;
+ rwlock_init(&p->memtrack_lock);
+#endif
/* Perform scheduler related setup. Assign this task to a CPU. */
retval = sched_fork(clone_flags, p);
--
2.8.0.rc3.226.g39d4020
Signed-off-by: Greg Hackmann <[email protected]>
Signed-off-by: Ruchi Kandoi <[email protected]>
---
drivers/dma-buf/dma-buf.c | 37 ++++++++++++++++++++++++++++++++++
drivers/staging/android/ion/ion.c | 14 +++++++++++++
drivers/staging/android/ion/ion_priv.h | 2 ++
include/linux/dma-buf.h | 5 +++++
4 files changed, 58 insertions(+)
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index ddaee60..f632c2b 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -297,12 +297,32 @@ static long dma_buf_ioctl(struct file *file,
}
}
+static void dma_buf_installed(struct file *file, struct task_struct *task)
+{
+ struct memtrack_buffer *memtrack =
+ dma_buf_memtrack_buffer(file->private_data);
+
+ if (memtrack)
+ memtrack_buffer_install(memtrack, task);
+}
+
+static void dma_buf_uninstalled(struct file *file, struct task_struct *task)
+{
+ struct memtrack_buffer *memtrack =
+ dma_buf_memtrack_buffer(file->private_data);
+
+ if (memtrack)
+ memtrack_buffer_uninstall(memtrack, task);
+}
+
static const struct file_operations dma_buf_fops = {
.release = dma_buf_release,
.mmap = dma_buf_mmap_internal,
.llseek = dma_buf_llseek,
.poll = dma_buf_poll,
.unlocked_ioctl = dma_buf_ioctl,
+ .installed = dma_buf_installed,
+ .uninstalled = dma_buf_uninstalled,
};
/*
@@ -830,6 +850,23 @@ void dma_buf_vunmap(struct dma_buf *dmabuf, void *vaddr)
}
EXPORT_SYMBOL_GPL(dma_buf_vunmap);
+/**
+ * dma_buf_memtrack_buffer - returns a memtrack entry associated with dma_buf
+ *
+ * @dmabuf: [in] pointer to dma_buf
+ *
+ * Returns the struct memtrack_buffer associated with this dma_buf's
+ * backing pages. If memtrack isn't enabled in the kernel, or the dma_buf
+ * exporter doesn't have memtrack support, returns NULL.
+ */
+struct memtrack_buffer *dma_buf_memtrack_buffer(struct dma_buf *dmabuf)
+{
+ if (!dmabuf->ops->memtrack_buffer)
+ return NULL;
+ return dmabuf->ops->memtrack_buffer(dmabuf);
+}
+EXPORT_SYMBOL_GPL(dma_buf_memtrack_buffer);
+
#ifdef CONFIG_DEBUG_FS
static int dma_buf_debug_show(struct seq_file *s, void *unused)
{
diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c
index 396ded5..1c2df54 100644
--- a/drivers/staging/android/ion/ion.c
+++ b/drivers/staging/android/ion/ion.c
@@ -196,6 +196,7 @@ void ion_buffer_destroy(struct ion_buffer *buffer)
buffer->heap->ops->unmap_kernel(buffer->heap, buffer);
buffer->heap->ops->free(buffer);
vfree(buffer->pages);
+ memtrack_buffer_remove(&buffer->memtrack_buffer);
kfree(buffer);
}
@@ -458,6 +459,8 @@ struct ion_handle *ion_alloc(struct ion_client *client, size_t len,
handle = ERR_PTR(ret);
}
+ memtrack_buffer_init(&buffer->memtrack_buffer, len);
+
return handle;
}
EXPORT_SYMBOL(ion_alloc);
@@ -1013,6 +1016,16 @@ static int ion_dma_buf_end_cpu_access(struct dma_buf *dmabuf,
return 0;
}
+static struct memtrack_buffer *ion_memtrack_buffer(struct dma_buf *buffer)
+{
+ if (IS_ENABLED(CONFIG_MEMTRACK) && buffer && buffer->priv) {
+ struct ion_buffer *ion_buffer = buffer->priv;
+
+ return &ion_buffer->memtrack_buffer;
+ }
+ return NULL;
+}
+
static struct dma_buf_ops dma_buf_ops = {
.map_dma_buf = ion_map_dma_buf,
.unmap_dma_buf = ion_unmap_dma_buf,
@@ -1024,6 +1037,7 @@ static struct dma_buf_ops dma_buf_ops = {
.kunmap_atomic = ion_dma_buf_kunmap,
.kmap = ion_dma_buf_kmap,
.kunmap = ion_dma_buf_kunmap,
+ .memtrack_buffer = ion_memtrack_buffer,
};
struct dma_buf *ion_share_dma_buf(struct ion_client *client,
diff --git a/drivers/staging/android/ion/ion_priv.h b/drivers/staging/android/ion/ion_priv.h
index 3c3b324..74c38eb 100644
--- a/drivers/staging/android/ion/ion_priv.h
+++ b/drivers/staging/android/ion/ion_priv.h
@@ -27,6 +27,7 @@
#include <linux/shrinker.h>
#include <linux/types.h>
#include <linux/miscdevice.h>
+#include <linux/memtrack.h>
#include "ion.h"
@@ -78,6 +79,7 @@ struct ion_buffer {
int handle_count;
char task_comm[TASK_COMM_LEN];
pid_t pid;
+ struct memtrack_buffer memtrack_buffer;
};
void ion_buffer_destroy(struct ion_buffer *buffer);
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index e0b0741..dfcc2d0 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -32,6 +32,7 @@
#include <linux/fs.h>
#include <linux/fence.h>
#include <linux/wait.h>
+#include <linux/memtrack.h>
struct device;
struct dma_buf;
@@ -70,6 +71,8 @@ struct dma_buf_attachment;
* @vmap: [optional] creates a virtual mapping for the buffer into kernel
* address space. Same restrictions as for vmap and friends apply.
* @vunmap: [optional] unmaps a vmap from the buffer
+ * @memtrack_buffer: [optional] returns the memtrack entry for this buffer's
+ * backing pages
*/
struct dma_buf_ops {
int (*attach)(struct dma_buf *, struct device *,
@@ -104,6 +107,7 @@ struct dma_buf_ops {
void *(*vmap)(struct dma_buf *);
void (*vunmap)(struct dma_buf *, void *vaddr);
+ struct memtrack_buffer *(*memtrack_buffer)(struct dma_buf *);
};
/**
@@ -242,4 +246,5 @@ int dma_buf_mmap(struct dma_buf *, struct vm_area_struct *,
unsigned long);
void *dma_buf_vmap(struct dma_buf *);
void dma_buf_vunmap(struct dma_buf *, void *vaddr);
+struct memtrack_buffer *dma_buf_memtrack_buffer(struct dma_buf *dmabuf);
#endif /* __DMA_BUF_H__ */
--
2.8.0.rc3.226.g39d4020
Since mmaped pages will be accounted by the PSS, memtrack needs a way
to differentiate the total memory that hasn't been accounted for.
Signed-off-by: Ruchi Kandoi <[email protected]>
Signed-off-by: Greg Hackmann <[email protected]>
---
drivers/misc/memtrack.c | 175 ++++++++++++++++++++++++++++++++------
drivers/staging/android/ion/ion.c | 5 +-
include/linux/memtrack.h | 29 +++++++
3 files changed, 180 insertions(+), 29 deletions(-)
diff --git a/drivers/misc/memtrack.c b/drivers/misc/memtrack.c
index e5c7e03..4b2d17f 100644
--- a/drivers/misc/memtrack.c
+++ b/drivers/misc/memtrack.c
@@ -22,12 +22,19 @@
#include <linux/rbtree.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
+#include <linux/mm.h>
+
+struct memtrack_vma_list {
+ struct hlist_node node;
+ const struct vm_area_struct *vma;
+};
struct memtrack_handle {
struct memtrack_buffer *buffer;
struct rb_node node;
struct rb_root *root;
struct kref refcount;
+ struct hlist_head vma_list;
};
static struct kmem_cache *memtrack_handle_cache;
@@ -40,8 +47,8 @@ static DEFINE_IDR(mem_idr);
static DEFINE_IDA(mem_ida);
#endif
-static void memtrack_buffer_install_locked(struct rb_root *root,
- struct memtrack_buffer *buffer)
+static struct memtrack_handle *memtrack_handle_find_locked(struct rb_root *root,
+ struct memtrack_buffer *buffer, bool alloc)
{
struct rb_node **new = &root->rb_node, *parent = NULL;
struct memtrack_handle *handle;
@@ -56,22 +63,38 @@ static void memtrack_buffer_install_locked(struct rb_root *root,
} else if (handle->buffer->id < buffer->id) {
new = &node->rb_right;
} else {
- kref_get(&handle->refcount);
- return;
+ return handle;
}
}
- handle = kmem_cache_alloc(memtrack_handle_cache, GFP_KERNEL);
- if (!handle)
- return;
+ if (alloc) {
+ handle = kmem_cache_alloc(memtrack_handle_cache, GFP_KERNEL);
+ if (!handle)
+ return NULL;
- handle->buffer = buffer;
- handle->root = root;
- kref_init(&handle->refcount);
+ handle->buffer = buffer;
+ handle->root = root;
+ kref_init(&handle->refcount);
+ INIT_HLIST_HEAD(&handle->vma_list);
- rb_link_node(&handle->node, parent, new);
- rb_insert_color(&handle->node, root);
- atomic_inc(&handle->buffer->userspace_handles);
+ rb_link_node(&handle->node, parent, new);
+ rb_insert_color(&handle->node, root);
+ atomic_inc(&handle->buffer->userspace_handles);
+ }
+
+ return NULL;
+}
+
+static void memtrack_buffer_install_locked(struct rb_root *root,
+ struct memtrack_buffer *buffer)
+{
+ struct memtrack_handle *handle;
+
+ handle = memtrack_handle_find_locked(root, buffer, true);
+ if (handle) {
+ kref_get(&handle->refcount);
+ return;
+ }
}
/**
@@ -112,19 +135,41 @@ static void memtrack_handle_destroy(struct kref *ref)
static void memtrack_buffer_uninstall_locked(struct rb_root *root,
struct memtrack_buffer *buffer)
{
- struct rb_node *node = root->rb_node;
+ struct memtrack_handle *handle;
- while (node) {
- struct memtrack_handle *handle = rb_entry(node,
- struct memtrack_handle, node);
+ handle = memtrack_handle_find_locked(root, buffer, false);
- if (handle->buffer->id > buffer->id) {
- node = node->rb_left;
- } else if (handle->buffer->id < buffer->id) {
- node = node->rb_right;
- } else {
- kref_put(&handle->refcount, memtrack_handle_destroy);
- return;
+ if (handle)
+ kref_put(&handle->refcount, memtrack_handle_destroy);
+}
+
+static void memtrack_buffer_vm_open_locked(struct rb_root *root,
+ struct memtrack_buffer *buffer,
+ struct memtrack_vma_list *vma_list)
+{
+ struct memtrack_handle *handle;
+
+ handle = memtrack_handle_find_locked(root, buffer, false);
+ if (handle)
+ hlist_add_head(&vma_list->node, &handle->vma_list);
+}
+
+static void memtrack_buffer_vm_close_locked(struct rb_root *root,
+ struct memtrack_buffer *buffer,
+ const struct vm_area_struct *vma)
+{
+ struct memtrack_handle *handle;
+
+ handle = memtrack_handle_find_locked(root, buffer, false);
+ if (handle) {
+ struct memtrack_vma_list *vma_list;
+
+ hlist_for_each_entry(vma_list, &handle->vma_list, node) {
+ if (vma_list->vma == vma) {
+ hlist_del(&vma_list->node);
+ kfree(vma_list);
+ return;
+ }
}
}
}
@@ -153,6 +198,49 @@ void memtrack_buffer_uninstall(struct memtrack_buffer *buffer,
}
EXPORT_SYMBOL(memtrack_buffer_uninstall);
+/**
+ * memtrack_buffer_vm_open - account for pages mapped during vm open
+ *
+ * @buffer: the buffer's memtrack entry
+ *
+ * @vma: vma being opened
+ */
+void memtrack_buffer_vm_open(struct memtrack_buffer *buffer,
+ const struct vm_area_struct *vma)
+{
+ unsigned long flags;
+ struct task_struct *leader = current->group_leader;
+ struct memtrack_vma_list *vma_list;
+
+ vma_list = kmalloc(sizeof(*vma_list), GFP_KERNEL);
+ if (WARN_ON(!vma_list))
+ return;
+ vma_list->vma = vma;
+
+ write_lock_irqsave(&leader->memtrack_lock, flags);
+ memtrack_buffer_vm_open_locked(&leader->memtrack_rb, buffer, vma_list);
+ write_unlock_irqrestore(&leader->memtrack_lock, flags);
+}
+EXPORT_SYMBOL(memtrack_buffer_vm_open);
+
+/**
+ * memtrack_buffer_vm_close - account for pages unmapped during vm close
+ *
+ * @buffer: the buffer's memtrack entry
+ * @vma: the vma being closed
+ */
+void memtrack_buffer_vm_close(struct memtrack_buffer *buffer,
+ const struct vm_area_struct *vma)
+{
+ unsigned long flags;
+ struct task_struct *leader = current->group_leader;
+
+ write_lock_irqsave(&leader->memtrack_lock, flags);
+ memtrack_buffer_vm_close_locked(&leader->memtrack_rb, buffer, vma);
+ write_unlock_irqrestore(&leader->memtrack_lock, flags);
+}
+EXPORT_SYMBOL(memtrack_buffer_vm_close);
+
static int memtrack_id_alloc(struct memtrack_buffer *buffer)
{
int ret;
@@ -271,6 +359,33 @@ static struct notifier_block process_notifier_block = {
.notifier_call = process_notifier,
};
+static void show_memtrack_vma(struct seq_file *m,
+ const struct vm_area_struct *vma,
+ const struct memtrack_buffer *buf)
+{
+ unsigned long start = vma->vm_start;
+ unsigned long end = vma->vm_end;
+ unsigned long long pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT;
+ vm_flags_t flags = vma->vm_flags;
+ vm_flags_t remap_flag = VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
+
+ seq_setwidth(m, 50);
+ seq_printf(m, "%68lx-%08lx %c%c%c%c%c %08llx",
+ start,
+ end,
+ flags & VM_READ ? 'r' : '-',
+ flags & VM_WRITE ? 'w' : '-',
+ flags & VM_EXEC ? 'x' : '-',
+ flags & VM_MAYSHARE ? 's' : 'p',
+ flags & remap_flag ? '#' : '-',
+ pgoff);
+ if (buf->tag) {
+ seq_pad(m, ' ');
+ seq_puts(m, buf->tag);
+ }
+ seq_putc(m, '\n');
+}
+
int proc_memtrack(struct seq_file *m, struct pid_namespace *ns, struct pid *pid,
struct task_struct *task)
{
@@ -281,18 +396,23 @@ int proc_memtrack(struct seq_file *m, struct pid_namespace *ns, struct pid *pid,
if (RB_EMPTY_ROOT(&task->memtrack_rb))
goto done;
- seq_printf(m, "%10.10s: %16.16s: %12.12s: %3.3s: pid:%d\n",
- "ref_count", "Identifier", "size", "tag", task->pid);
+ seq_printf(m, "%10.10s: %16.16s: %12.12s: %12.12s: %20s: %5s: %8s: pid:%d\n",
+ "ref_count", "Identifier", "size", "tag",
+ "startAddr-endAddr", "Flags", "pgOff", task->pid);
for (node = rb_first(&task->memtrack_rb); node; node = rb_next(node)) {
struct memtrack_handle *handle = rb_entry(node,
struct memtrack_handle, node);
struct memtrack_buffer *buffer = handle->buffer;
+ struct memtrack_vma_list *vma;
- seq_printf(m, "%10d %16d %12zu %s\n",
+ seq_printf(m, "%10d %16d %12zu %12s\n",
atomic_read(&buffer->userspace_handles),
buffer->id, buffer->size,
buffer->tag ? buffer->tag : "");
+
+ hlist_for_each_entry(vma, &handle->vma_list, node)
+ show_memtrack_vma(m, vma->vma, handle->buffer);
}
done:
@@ -308,7 +428,6 @@ static int memtrack_show(struct seq_file *m, void *v)
seq_printf(m, "%4.4s %12.12s %10s %12.12s %3.3s\n", "pid",
"buffer_size", "ref", "Identifier", "tag");
-
rcu_read_lock();
idr_for_each_entry(&mem_idr, buffer, i)
seq_printf(m, "%4d %12zu %10d %12d %s\n", buffer->pid,
diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c
index 1c2df54..c32d520 100644
--- a/drivers/staging/android/ion/ion.c
+++ b/drivers/staging/android/ion/ion.c
@@ -906,6 +906,7 @@ static void ion_vm_open(struct vm_area_struct *vma)
list_add(&vma_list->list, &buffer->vmas);
mutex_unlock(&buffer->lock);
pr_debug("%s: adding %p\n", __func__, vma);
+ memtrack_buffer_vm_open(&buffer->memtrack_buffer, vma);
}
static void ion_vm_close(struct vm_area_struct *vma)
@@ -924,6 +925,7 @@ static void ion_vm_close(struct vm_area_struct *vma)
break;
}
mutex_unlock(&buffer->lock);
+ memtrack_buffer_vm_close(&buffer->memtrack_buffer, vma);
}
static const struct vm_operations_struct ion_vma_ops = {
@@ -963,7 +965,8 @@ static int ion_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
if (ret)
pr_err("%s: failure mapping buffer to userspace\n",
__func__);
-
+ else
+ memtrack_buffer_mmap(dma_buf_memtrack_buffer(dmabuf), vma);
return ret;
}
diff --git a/include/linux/memtrack.h b/include/linux/memtrack.h
index f73be07..5a4c7ea 100644
--- a/include/linux/memtrack.h
+++ b/include/linux/memtrack.h
@@ -33,12 +33,18 @@ struct memtrack_buffer {
int proc_memtrack(struct seq_file *m, struct pid_namespace *ns, struct pid *pid,
struct task_struct *task);
+int proc_memtrack_maps(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *task);
int memtrack_buffer_init(struct memtrack_buffer *buffer, size_t size);
void memtrack_buffer_remove(struct memtrack_buffer *buffer);
void memtrack_buffer_install(struct memtrack_buffer *buffer,
struct task_struct *tsk);
void memtrack_buffer_uninstall(struct memtrack_buffer *buffer,
struct task_struct *tsk);
+void memtrack_buffer_vm_open(struct memtrack_buffer *buffer,
+ const struct vm_area_struct *vma);
+void memtrack_buffer_vm_close(struct memtrack_buffer *buffer,
+ const struct vm_area_struct *vma);
/**
* memtrack_buffer_set_tag - add a descriptive tag to a memtrack entry
@@ -90,5 +96,28 @@ static inline int memtrack_buffer_set_tag(struct memtrack_buffer *buffer,
return -ENOENT;
}
+static inline void memtrack_buffer_vm_open(struct memtrack_buffer *buffer,
+ const struct vm_area_struct *vma)
+{
+}
+
+static inline void memtrack_buffer_vm_close(struct memtrack_buffer *buffer,
+ const struct vm_area_struct *vma)
+{
+}
#endif /* CONFIG_MEMTRACK */
+
+
+/**
+ * memtrack_buffer_vm_mmap - account for pages mapped to userspace during mmap
+ *
+ * @buffer: the buffer's memtrack entry
+ * @vma: the vma passed to mmap()
+ */
+static inline void memtrack_buffer_mmap(struct memtrack_buffer *buffer,
+ const struct vm_area_struct *vma)
+{
+ memtrack_buffer_vm_open(buffer, vma);
+}
+
#endif /* _MEMTRACK_ */
--
2.8.0.rc3.226.g39d4020
On Tue, Oct 11, 2016 at 04:50:04PM -0700, Ruchi Kandoi wrote:
> memtrack maintains a per-process list of shared buffer references, which is
> exported to userspace as /proc/[pid]/memtrack. Buffers can be optionally
> "tagged" with a short string: for example, Android userspace would use this
> tag to identify whether buffers were allocated on behalf of the camera stack,
> GL, etc. memtrack also exports the VMAs associated with these buffers so
> that pages already included in the process's mm counters aren't double-counted.
>
> Shared-buffer allocators can hook into memtrack by embedding
> struct memtrack_buffer in their buffer metadata, calling
> memtrack_buffer_{init,remove} at buffer allocation and free time, and
> memtrack_buffer_{install,uninstall} when a userspace process takes or
> drops a reference to the buffer. For fd-backed buffers like dma-bufs, hooks in
> fdtable.c and fork.c automatically notify memtrack when references are added or
> removed from a process's fd table.
>
> This patchstack adds memtrack hooks into dma-buf and ion. If there's upstream
> interest in memtrack, it can be extended to other memory allocators as well,
> such as GEM implementations.
No, with a side of Hell, No. Not to mention anything else,
* descriptor tables do not belong to any specific task_struct and
actions done by one show up in all who share that thing.
* shared descriptor table does not imply belonging to the same
group.
* shared descriptor table can become unshared at any point, invisibly
for that Fine Piece Of Software.
* while we are at it, blocking allocation under several spinlocks
(and with interrupts disabled, for good measure) is generally considered
a bloody bad idea.
That - just from the quick look through that patchset. Bringing task_struct
into the API is already sufficient for a NAK.
On Tue, Oct 11, 2016 at 7:50 PM, Ruchi Kandoi <[email protected]> wrote:
> This patchstack introduces a new "memtrack" module for tracking and accounting
> memory exported to userspace as shared buffers, like dma-buf fds or GEM handles.
btw, I wouldn't care much about the non-dmabuf case.. dri2/flink is
kind of legacy and the sharing patterns there are not so complex that
we have found the need for any more elaborate debug infrastructure
than what we already have.
Between existing dmabuf debugfs, and /proc/*/maps (and /proc/*/fd?), I
wonder what is missing? Maybe there is a less intrusive way to get at
the debugging info you want?
BR,
-R
> Any process holding a reference to these buffers will keep the kernel from
> reclaiming its backing pages. mm counters don't provide a complete picture of
> these allocations, since they only account for pages that are mapped into a
> process's address space. This problem is especially bad for systems like
> Android that use dma-buf fds to share graphics and multimedia buffers between
> processes: these allocations are often large, have complex sharing patterns,
> and are rarely mapped into every process that holds a reference to them.
>
> memtrack maintains a per-process list of shared buffer references, which is
> exported to userspace as /proc/[pid]/memtrack. Buffers can be optionally
> "tagged" with a short string: for example, Android userspace would use this
> tag to identify whether buffers were allocated on behalf of the camera stack,
> GL, etc. memtrack also exports the VMAs associated with these buffers so
> that pages already included in the process's mm counters aren't double-counted.
>
> Shared-buffer allocators can hook into memtrack by embedding
> struct memtrack_buffer in their buffer metadata, calling
> memtrack_buffer_{init,remove} at buffer allocation and free time, and
> memtrack_buffer_{install,uninstall} when a userspace process takes or
> drops a reference to the buffer. For fd-backed buffers like dma-bufs, hooks in
> fdtable.c and fork.c automatically notify memtrack when references are added or
> removed from a process's fd table.
>
> This patchstack adds memtrack hooks into dma-buf and ion. If there's upstream
> interest in memtrack, it can be extended to other memory allocators as well,
> such as GEM implementations.
>
> Greg Hackmann (1):
> drivers: staging: ion: add ION_IOC_TAG ioctl
>
> Ruchi Kandoi (5):
> fs: add installed and uninstalled file_operations
> drivers: misc: add memtrack
> dma-buf: add memtrack support
> memtrack: Adds the accounting to keep track of all mmaped/unmapped
> pages.
> memtrack: Add memtrack accounting for forked processes.
>
> drivers/android/binder.c | 4 +-
> drivers/dma-buf/dma-buf.c | 37 +++
> drivers/misc/Kconfig | 16 +
> drivers/misc/Makefile | 1 +
> drivers/misc/memtrack.c | 516 ++++++++++++++++++++++++++++++++
> drivers/staging/android/ion/ion-ioctl.c | 17 ++
> drivers/staging/android/ion/ion.c | 60 +++-
> drivers/staging/android/ion/ion_priv.h | 2 +
> drivers/staging/android/uapi/ion.h | 25 ++
> fs/file.c | 38 ++-
> fs/open.c | 2 +-
> fs/proc/base.c | 4 +
> include/linux/dma-buf.h | 5 +
> include/linux/fdtable.h | 4 +-
> include/linux/fs.h | 2 +
> include/linux/memtrack.h | 130 ++++++++
> include/linux/mm.h | 3 +
> include/linux/sched.h | 3 +
> kernel/fork.c | 23 +-
> 19 files changed, 875 insertions(+), 17 deletions(-)
> create mode 100644 drivers/misc/memtrack.c
> create mode 100644 include/linux/memtrack.h
>
> --
> 2.8.0.rc3.226.g39d4020
>
On Wednesday, October 12, 2016 7:50 AM Ruchi Kandoi wrote:
> +/**
> + * struct ion_fd_data - metadata passed from userspace for a handle
s/fd/tag/ ?
> + * @handle: a handle
> + * @tag: a string describing the buffer
> + *
> + * For ION_IOC_TAG userspace populates the handle field with
> + * the handle returned from ion alloc and type contains the memtrack_type which
> + * accurately describes the usage for the memory.
> + */
> +struct ion_tag_data {
> + ion_user_handle_t handle;
> + char tag[ION_MAX_TAG_LEN];
> +};
> +
Am 12.10.2016 um 01:50 schrieb Ruchi Kandoi:
> This patchstack adds memtrack hooks into dma-buf and ion. If there's upstream
> interest in memtrack, it can be extended to other memory allocators as well,
> such as GEM implementations.
We have run into similar problems before. Because of this I already
proposed a solution for this quite a while ago, but never pushed on
upstreaming this since it was only done for a special use case.
Instead of keeping track of how much memory a process has bound (which
is very fragile) my solution only added some more debugging info on a
per fd basis (e.g. how much memory is bound to this fd).
This information was then used by the OOM killer (for example) to make a
better decision on which process to reap.
Shouldn't be to hard to expose this through debugfs or maybe a new fcntl
to userspace for debugging.
I haven't looked at the code in detail, but messing with the per process
memory accounting like you did in this proposal is clearly not a good
idea if you ask me.
Regards,
Christian.
On 10/11/2016 04:50 PM, Ruchi Kandoi wrote:
> Any process holding a reference to these buffers will keep the kernel from
> reclaiming its backing pages. mm counters don't provide a complete picture of
> these allocations, since they only account for pages that are mapped into a
> process's address space. This problem is especially bad for systems like
> Android that use dma-buf fds to share graphics and multimedia buffers between
> processes: these allocations are often large, have complex sharing patterns,
> and are rarely mapped into every process that holds a reference to them.
What do you end up _doing_ with all this new information that you have
here? You know which processes have "pinned" these shared buffers, and
exported that information in /proc. But, then what?