2010-05-13 09:51:25

by Changli Gao

[permalink] [raw]
Subject: [PATCH 1/9] mm: add generic adaptive large memory allocation APIs

generic adaptive large memory allocation APIs

kv*alloc are used to allocate large contiguous memory and the users don't mind
whether the memory is physically or virtually contiguous. The allocator always
try its best to allocate physically contiguous memory first.

In this patch set, some APIs are introduced: kvmalloc(), kvzalloc(), kvcalloc(),
kvrealloc(), kvfree() and kvfree_inatomic().

Signed-off-by: Changli Gao <[email protected]>
----
include/linux/mm.h | 31 ++++++++++++++
include/linux/vmalloc.h | 1
mm/nommu.c | 6 ++
mm/util.c | 104 ++++++++++++++++++++++++++++++++++++++++++++++++
mm/vmalloc.c | 14 ++++++
5 files changed, 156 insertions(+)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 462acaf..0ece978 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1467,5 +1467,36 @@ extern int soft_offline_page(struct page *page, int flags);

extern void dump_page(struct page *page);

+void *__kvmalloc(size_t size, gfp_t flags);
+
+static inline void *kvmalloc(size_t size)
+{
+ return __kvmalloc(size, 0);
+}
+
+static inline void *kvzalloc(size_t size)
+{
+ return __kvmalloc(size, __GFP_ZERO);
+}
+
+static inline void *kvcalloc(size_t n, size_t size)
+{
+ return __kvmalloc(n * size, __GFP_ZERO);
+}
+
+void __kvfree(void *ptr, bool inatomic);
+
+static inline void kvfree(void *ptr)
+{
+ __kvfree(ptr, false);
+}
+
+static inline void kvfree_inatomic(void *ptr)
+{
+ __kvfree(ptr, true);
+}
+
+void *kvrealloc(void *ptr, size_t newsize);
+
#endif /* __KERNEL__ */
#endif /* _LINUX_MM_H */
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 227c2a5..33ec828 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -60,6 +60,7 @@ extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot);
extern void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask,
pgprot_t prot);
extern void vfree(const void *addr);
+extern unsigned long vsize(const void *addr);

extern void *vmap(struct page **pages, unsigned int count,
unsigned long flags, pgprot_t prot);
diff --git a/mm/nommu.c b/mm/nommu.c
index 63fa17d..1ddf3fe 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -223,6 +223,12 @@ void vfree(const void *addr)
}
EXPORT_SYMBOL(vfree);

+unsigned long vsize(const void *addr)
+{
+ return ksize(addr);
+}
+EXPORT_SYMBOL(vsize);
+
void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
{
/*
diff --git a/mm/util.c b/mm/util.c
index f5712e8..7cc364a 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -5,6 +5,7 @@
#include <linux/err.h>
#include <linux/sched.h>
#include <asm/uaccess.h>
+#include <linux/vmalloc.h>

#define CREATE_TRACE_POINTS
#include <trace/events/kmem.h>
@@ -289,6 +290,109 @@ int __attribute__((weak)) get_user_pages_fast(unsigned long start,
}
EXPORT_SYMBOL_GPL(get_user_pages_fast);

+void *__kvmalloc(size_t size, gfp_t flags)
+{
+ void *ptr;
+
+ if (size < PAGE_SIZE)
+ return kmalloc(size, GFP_KERNEL | flags);
+ size = PAGE_ALIGN(size);
+ if (is_power_of_2(size))
+ ptr = (void *)__get_free_pages(GFP_KERNEL | flags |
+ __GFP_NOWARN, get_order(size));
+ else
+ ptr = alloc_pages_exact(size, GFP_KERNEL | flags |
+ __GFP_NOWARN);
+ if (ptr != NULL) {
+ virt_to_head_page(ptr)->private = size;
+ return ptr;
+ }
+
+ ptr = vmalloc(size);
+ if (ptr != NULL && (flags & __GFP_ZERO))
+ memset(ptr, 0, size);
+
+ return ptr;
+}
+EXPORT_SYMBOL(__kvmalloc);
+
+static void kvfree_work(struct work_struct *work)
+{
+ vfree(work);
+}
+
+void __kvfree(void *ptr, bool inatomic)
+{
+ if (unlikely(ZERO_OR_NULL_PTR(ptr)))
+ return;
+ if (is_vmalloc_addr(ptr)) {
+ if (inatomic) {
+ struct work_struct *work;
+
+ work = ptr;
+ BUILD_BUG_ON(sizeof(struct work_struct) > PAGE_SIZE);
+ INIT_WORK(work, kvfree_work);
+ schedule_work(work);
+ } else {
+ vfree(ptr);
+ }
+ } else {
+ struct page *page;
+
+ page = virt_to_head_page(ptr);
+ if (PageSlab(page) || PageCompound(page))
+ kfree(ptr);
+ else if (is_power_of_2(page->private))
+ free_pages((unsigned long)ptr,
+ get_order(page->private));
+ else
+ free_pages_exact(ptr, page->private);
+ }
+}
+EXPORT_SYMBOL(__kvfree);
+
+void *kvrealloc(void *ptr, size_t newsize)
+{
+ void *nptr;
+ size_t oldsize;
+
+ if (unlikely(!newsize)) {
+ kvfree(ptr);
+ return ZERO_SIZE_PTR;
+ }
+
+ if (unlikely(ZERO_OR_NULL_PTR(ptr)))
+ return kvmalloc(newsize);
+
+ if (is_vmalloc_addr(ptr)) {
+ oldsize = vsize(ptr);
+ if (newsize <= oldsize)
+ return ptr;
+ } else {
+ struct page *page;
+
+ page = virt_to_head_page(ptr);
+ if (PageSlab(page) || PageCompound(page)) {
+ if (newsize < PAGE_SIZE)
+ return krealloc(ptr, newsize, GFP_KERNEL);
+ oldsize = ksize(ptr);
+ } else {
+ oldsize = page->private;
+ if (newsize <= oldsize)
+ return ptr;
+ }
+ }
+
+ nptr = kvmalloc(newsize);
+ if (nptr != NULL) {
+ memcpy(nptr, ptr, oldsize);
+ kvfree(ptr);
+ }
+
+ return nptr;
+}
+EXPORT_SYMBOL(kvrealloc);
+
/* Tracepoints definitions. */
EXPORT_TRACEPOINT_SYMBOL(kmalloc);
EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index ae00746..93552a8 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1413,6 +1413,20 @@ void vfree(const void *addr)
EXPORT_SYMBOL(vfree);

/**
+ * vsize - get the actual amount of memory allocated by vmalloc()
+ * @addr: memory base address
+ */
+unsigned long vsize(const void *addr)
+{
+ struct vmap_area *va;
+
+ va = find_vmap_area((unsigned long)addr);
+
+ return va->va_end - va->va_start - PAGE_SIZE;
+}
+EXPORT_SYMBOL(vsize);
+
+/**
* vunmap - release virtual mapping obtained by vmap()
* @addr: memory base address
*

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to [email protected]. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"[email protected]"> [email protected] </a>


2010-05-13 13:20:16

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 1/9] mm: add generic adaptive large memory allocation APIs

On Thu, 2010-05-13 at 17:51 +0800, Changli Gao wrote:
> +void *__kvmalloc(size_t size, gfp_t flags)
> +{
> + void *ptr;
> +
> + if (size < PAGE_SIZE)
> + return kmalloc(size, GFP_KERNEL | flags);
> + size = PAGE_ALIGN(size);
> + if (is_power_of_2(size))
> + ptr = (void *)__get_free_pages(GFP_KERNEL | flags |
> + __GFP_NOWARN, get_order(size));
> + else
> + ptr = alloc_pages_exact(size, GFP_KERNEL | flags |
> + __GFP_NOWARN);
> + if (ptr != NULL) {
> + virt_to_head_page(ptr)->private = size;
> + return ptr;
> + }
> +
> + ptr = vmalloc(size);
> + if (ptr != NULL && (flags & __GFP_ZERO))
> + memset(ptr, 0, size);
> +
> + return ptr;
> +}
> +EXPORT_SYMBOL(__kvmalloc);

So if I do kvmalloc(size, GFP_ATOMIC) I get GFP_KERNEL|GFP_ATOMIC, which
is not a recommended variation because one should not mix __GFP_WAIT and
__GFP_HIGH.

So I would simply drop the gfp argument to avoid confusion.

> +void __kvfree(void *ptr, bool inatomic)
> +{
> + if (unlikely(ZERO_OR_NULL_PTR(ptr)))
> + return;
> + if (is_vmalloc_addr(ptr)) {
> + if (inatomic) {
> + struct work_struct *work;
> +
> + work = ptr;
> + BUILD_BUG_ON(sizeof(struct work_struct) > PAGE_SIZE);
> + INIT_WORK(work, kvfree_work);
> + schedule_work(work);
> + } else {
> + vfree(ptr);
> + }
> + } else {
> + struct page *page;
> +
> + page = virt_to_head_page(ptr);
> + if (PageSlab(page) || PageCompound(page))
> + kfree(ptr);
> + else if (is_power_of_2(page->private))
> + free_pages((unsigned long)ptr,
> + get_order(page->private));
> + else
> + free_pages_exact(ptr, page->private);
> + }
> +}
> +EXPORT_SYMBOL(__kvfree);

NAK, I really utterly dislike that inatomic argument. The alloc side
doesn't function in atomic context either. Please keep the thing
symmetric in that regards.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to [email protected]. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href
ilto:"[email protected]"> [email protected] </a>

2010-05-13 13:36:55

by Tetsuo Handa

[permalink] [raw]
Subject: Re: [PATCH 1/9] mm: add generic adaptive large memory allocationAPIs

Peter Zijlstra wrote:
> NAK, I really utterly dislike that inatomic argument. The alloc side
> doesn't function in atomic context either. Please keep the thing
> symmetric in that regards.

Excuse me. kmalloc(GFP_KERNEL) may sleep (and therefore cannot be used in
atomic context). However, kfree() for memory allocated with kmalloc(GFP_KERNEL)
never sleep (and therefore can be used in atomic context).
Why kmalloc() and kfree() are NOT kept symmetric?

2010-05-13 14:08:26

by Changli Gao

[permalink] [raw]
Subject: Re: [PATCH 1/9] mm: add generic adaptive large memory allocation APIs

On Thu, May 13, 2010 at 9:20 PM, Peter Zijlstra <[email protected]> wrote:
> On Thu, 2010-05-13 at 17:51 +0800, Changli Gao wrote:
>> +void *__kvmalloc(size_t size, gfp_t flags)
>> +{
>> +       void *ptr;
>> +
>> +       if (size < PAGE_SIZE)
>> +               return kmalloc(size, GFP_KERNEL | flags);
>> +       size = PAGE_ALIGN(size);
>> +       if (is_power_of_2(size))
>> +               ptr = (void *)__get_free_pages(GFP_KERNEL | flags |
>> +                                              __GFP_NOWARN, get_order(size));
>> +       else
>> +               ptr = alloc_pages_exact(size, GFP_KERNEL | flags |
>> +                                             __GFP_NOWARN);
>> +       if (ptr != NULL) {
>> +               virt_to_head_page(ptr)->private = size;
>> +               return ptr;
>> +       }
>> +
>> +       ptr = vmalloc(size);
>> +       if (ptr != NULL && (flags & __GFP_ZERO))
>> +               memset(ptr, 0, size);
>> +
>> +       return ptr;
>> +}
>> +EXPORT_SYMBOL(__kvmalloc);
>
> So if I do kvmalloc(size, GFP_ATOMIC) I get GFP_KERNEL|GFP_ATOMIC, which
> is not a recommended variation because one should not mix __GFP_WAIT and
> __GFP_HIGH.

__kvmalloc() is only for internal use(kvmalloc, kvcalloc, and
kvzalloc), and the only value of flags is __GFP_ZERO. How about
replacing flags with a bool variable zero?

void *__kvmalloc(size_t size, bool zero);

Or check the value of flags in the front of __kvmalloc().

BUG_ON((flags & (~__GFP_ZERO)) != 0);

>
> So I would simply drop the gfp argument to avoid confusion.
>
>> +void __kvfree(void *ptr, bool inatomic)
>> +{
>> +       if (unlikely(ZERO_OR_NULL_PTR(ptr)))
>> +               return;
>> +       if (is_vmalloc_addr(ptr)) {
>> +               if (inatomic) {
>> +                       struct work_struct *work;
>> +
>> +                       work = ptr;
>> +                       BUILD_BUG_ON(sizeof(struct work_struct) > PAGE_SIZE);
>> +                       INIT_WORK(work, kvfree_work);
>> +                       schedule_work(work);
>> +               } else {
>> +                       vfree(ptr);
>> +               }
>> +       } else {
>> +               struct page *page;
>> +
>> +               page = virt_to_head_page(ptr);
>> +               if (PageSlab(page) || PageCompound(page))
>> +                       kfree(ptr);
>> +               else if (is_power_of_2(page->private))
>> +                       free_pages((unsigned long)ptr,
>> +                                  get_order(page->private));
>> +               else
>> +                       free_pages_exact(ptr, page->private);
>> +       }
>> +}
>> +EXPORT_SYMBOL(__kvfree);
>
> NAK, I really utterly dislike that inatomic argument. The alloc side
> doesn't function in atomic context either. Please keep the thing
> symmetric in that regards.
>

There are some users, who release memory in atomic context. for
example: fs/file.c: fdmem.

--
Regards,
Changli Gao([email protected])

2010-05-13 14:39:36

by Milton Miller

[permalink] [raw]
Subject: [PATCH 1/9] mm: add generic adaptive large memory allocation APIs

On Thu, 13 May 2010 at 17:51:25 +0800, Changli Gao wrote:

> +static inline void *kvcalloc(size_t n, size_t size)
> +{
> + return __kvmalloc(n * size, __GFP_ZERO);
>

This needs multiply overflow checking like kcalloc.

milton

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to [email protected]. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"[email protected]"> [email protected] </a>

2010-05-13 14:49:47

by Changli Gao

[permalink] [raw]
Subject: Re: [PATCH 1/9] mm: add generic adaptive large memory allocation APIs

On Thu, May 13, 2010 at 10:39 PM, Milton Miller <[email protected]> wrote:
> On Thu, 13 May 2010 at 17:51:25 +0800, Changli Gao wrote:
>
>> +static inline void *kvcalloc(size_t n, size_t size)
>> +{
>> +     return __kvmalloc(n * size, __GFP_ZERO);
>>
>
> This needs multiply overflow checking like kcalloc.
>

Thanks.

--
Regards,
Changli Gao([email protected])

2010-05-14 08:03:34

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 1/9] mm: add generic adaptive large memory allocation APIs

On Thu, 2010-05-13 at 22:08 +0800, Changli Gao wrote:
> > NAK, I really utterly dislike that inatomic argument. The alloc side
> > doesn't function in atomic context either. Please keep the thing
> > symmetric in that regards.
> >
>
> There are some users, who release memory in atomic context. for
> example: fs/file.c: fdmem.

urgh, but yeah, aside from not using vmalloc to allocate fd tables one
needs to deal with this.

But if that is the only one, I'd let them do the workqueue thing that's
already there. If there really are more people wanting to do this, then
maybe add: kvfree_atomic().

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to [email protected]. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href
ilto:"[email protected]"> [email protected] </a>

2010-05-14 08:12:41

by Changli Gao

[permalink] [raw]
Subject: Re: [PATCH 1/9] mm: add generic adaptive large memory allocation APIs

On Fri, May 14, 2010 at 4:03 PM, Peter Zijlstra <[email protected]> wrote:
> On Thu, 2010-05-13 at 22:08 +0800, Changli Gao wrote:
>> > NAK, I really utterly dislike that inatomic argument. The alloc side
>> > doesn't function in atomic context either. Please keep the thing
>> > symmetric in that regards.
>> >
>>
>> There are some users, who release memory in atomic context. for
>> example: fs/file.c: fdmem.
>
> urgh, but yeah, aside from not using vmalloc to allocate fd tables one
> needs to deal with this.
>
> But if that is the only one, I'd let them do the workqueue thing that's
> already there. If there really are more people wanting to do this, then
> maybe add: kvfree_atomic().
>

Tetsuo has pointed another one in apparmor.
http://kernel.ubuntu.com/git?p=jj/ubuntu-lucid.git;a=blobdiff;f=security/apparmor/match.c;h=d2cd55419acfcae85cb748c8f837a4384a3a0d29;hp=afc2dd2260edffcf88521ae86458ad03aa8ea12c;hb=f5eba4b0a01cc671affa429ba1512b6de7caeb5b;hpb=abdff9ddaf2644d0f9962490f73e030806ba90d3
, though apparmor hasn't been merged into mainline.

--
Regards,
Changli Gao([email protected])

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to [email protected]. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href
ilto:"[email protected]"> [email protected] </a>

2010-05-17 01:34:36

by KOSAKI Motohiro

[permalink] [raw]
Subject: Re: [PATCH 1/9] mm: add generic adaptive large memory allocationAPIs

> Peter Zijlstra wrote:
> > NAK, I really utterly dislike that inatomic argument. The alloc side
> > doesn't function in atomic context either. Please keep the thing
> > symmetric in that regards.
>
> Excuse me. kmalloc(GFP_KERNEL) may sleep (and therefore cannot be used in
> atomic context). However, kfree() for memory allocated with kmalloc(GFP_KERNEL)
> never sleep (and therefore can be used in atomic context).
> Why kmalloc() and kfree() are NOT kept symmetric?

In kmalloc case, we need to consider both kmalloc(GFP_KERNEL)/kfree() pair and
kmalloc(GFP_ATOMIC)/kfree() pair. latter is mainly used on atomic context.
To make kfree() atomic help to keep the implementation simple.

But kvmalloc don't have GFP_ATOMIC feautre. that's big difference.