2017-04-30 11:32:09

by Wei Yang

[permalink] [raw]
Subject: [PATCH 0/3] try to save some memory for kmem_cache in some cases

kmem_cache is a frequently used data in kernel. During the code reading, I
found maybe we could save some space in some cases.

1. On 64bit arch, type int will occupy a word if it doesn't sit well.
2. cpu_slab->partial is just used when CONFIG_SLUB_CPU_PARTIAL is set
3. cpu_partial is just used when CONFIG_SLUB_CPU_PARTIAL is set, while just
save some space on 32bit arch.

Wei Yang (3):
mm/slub: pack red_left_pad with another int to save a word
mm/slub: wrap cpu_slab->partial in CONFIG_SLUB_CPU_PARTIAL
mm/slub: wrap kmem_cache->cpu_partial in config
CONFIG_SLUB_CPU_PARTIAL

include/linux/slub_def.h | 6 +++-
mm/slub.c | 88 ++++++++++++++++++++++++++++++++----------------
2 files changed, 64 insertions(+), 30 deletions(-)

--
2.11.0


2017-04-30 11:32:21

by Wei Yang

[permalink] [raw]
Subject: [PATCH 1/3] mm/slub: pack red_left_pad with another int to save a word

On 64bit arch, struct is 8-bytes aligned, so int will occupy a word if it
doesn't sits well.

This patch pack red_left_pad with reserved to save 8 bytes for struct
kmem_cache on a 64bit arch.

Signed-off-by: Wei Yang <[email protected]>
---
include/linux/slub_def.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 07ef550c6627..ec13aab32647 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -79,9 +79,9 @@ struct kmem_cache {
int inuse; /* Offset to metadata */
int align; /* Alignment */
int reserved; /* Reserved bytes at the end of slabs */
+ int red_left_pad; /* Left redzone padding size */
const char *name; /* Name (only for display!) */
struct list_head list; /* List of slab caches */
- int red_left_pad; /* Left redzone padding size */
#ifdef CONFIG_SYSFS
struct kobject kobj; /* For sysfs */
#endif
--
2.11.0

2017-04-30 11:32:31

by Wei Yang

[permalink] [raw]
Subject: [PATCH 2/3] mm/slub: wrap cpu_slab->partial in CONFIG_SLUB_CPU_PARTIAL

cpu_slab's field partial is used when CONFIG_SLUB_CPU_PARTIAL is set, which
means we can save a pointer's space on each cpu for every slub item.

This patch wrap cpu_slab->partial in CONFIG_SLUB_CPU_PARTIAL and wrap its
sysfs too.

Signed-off-by: Wei Yang <[email protected]>
---
include/linux/slub_def.h | 2 ++
mm/slub.c | 16 +++++++++++++++-
2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index ec13aab32647..0debd8df1a7d 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -41,7 +41,9 @@ struct kmem_cache_cpu {
void **freelist; /* Pointer to next available object */
unsigned long tid; /* Globally unique transaction id */
struct page *page; /* The slab from which we are allocating */
+#ifdef CONFIG_SLUB_CPU_PARTIAL
struct page *partial; /* Partially allocated frozen slabs */
+#endif
#ifdef CONFIG_SLUB_STATS
unsigned stat[NR_SLUB_STAT_ITEMS];
#endif
diff --git a/mm/slub.c b/mm/slub.c
index 7f4bc7027ed5..fde499b6dad8 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2302,7 +2302,11 @@ static bool has_cpu_slab(int cpu, void *info)
struct kmem_cache *s = info;
struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);

- return c->page || c->partial;
+ return c->page
+#ifdef CONFIG_SLUB_CPU_PARTIAL
+ || c->partial
+#endif
+ ;
}

static void flush_all(struct kmem_cache *s)
@@ -2511,7 +2515,9 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
page = c->page;
if (!page)
goto new_slab;
+#ifdef CONFIG_SLUB_CPU_PARTIAL
redo:
+#endif

if (unlikely(!node_match(page, node))) {
int searchnode = node;
@@ -2568,6 +2574,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,

new_slab:

+#ifdef CONFIG_SLUB_CPU_PARTIAL
if (c->partial) {
page = c->page = c->partial;
c->partial = page->next;
@@ -2575,6 +2582,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
c->freelist = NULL;
goto redo;
}
+#endif

freelist = new_slab_objects(s, gfpflags, node, &c);

@@ -4760,6 +4768,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
total += x;
nodes[node] += x;

+#ifdef CONFIG_SLUB_CPU_PARTIAL
page = READ_ONCE(c->partial);
if (page) {
node = page_to_nid(page);
@@ -4772,6 +4781,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
total += x;
nodes[node] += x;
}
+#endif
}
}

@@ -4980,6 +4990,7 @@ static ssize_t objects_partial_show(struct kmem_cache *s, char *buf)
}
SLAB_ATTR_RO(objects_partial);

+#ifdef CONFIG_SLUB_CPU_PARTIAL
static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
{
int objects = 0;
@@ -5010,6 +5021,7 @@ static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
return len + sprintf(buf + len, "\n");
}
SLAB_ATTR_RO(slabs_cpu_partial);
+#endif

static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)
{
@@ -5364,7 +5376,9 @@ static struct attribute *slab_attrs[] = {
&destroy_by_rcu_attr.attr,
&shrink_attr.attr,
&reserved_attr.attr,
+#ifdef CONFIG_SLUB_CPU_PARTIAL
&slabs_cpu_partial_attr.attr,
+#endif
#ifdef CONFIG_SLUB_DEBUG
&total_objects_attr.attr,
&slabs_attr.attr,
--
2.11.0

2017-04-30 11:32:41

by Wei Yang

[permalink] [raw]
Subject: [PATCH 3/3] mm/slub: wrap kmem_cache->cpu_partial in config CONFIG_SLUB_CPU_PARTIAL

kmem_cache->cpu_partial is just used when CONFIG_SLUB_CPU_PARTIAL is set,
so wrap it with config CONFIG_SLUB_CPU_PARTIAL will save some space
on 32bit arch.

This patch wrap kmem_cache->cpu_partial in config CONFIG_SLUB_CPU_PARTIAL
and wrap its sysfs too.

Signed-off-by: Wei Yang <[email protected]>
---
include/linux/slub_def.h | 2 ++
mm/slub.c | 72 +++++++++++++++++++++++++++++-------------------
2 files changed, 46 insertions(+), 28 deletions(-)

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 0debd8df1a7d..477ab99800ed 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -69,7 +69,9 @@ struct kmem_cache {
int size; /* The size of an object including meta data */
int object_size; /* The size of an object without meta data */
int offset; /* Free pointer offset. */
+#ifdef CONFIG_SLUB_CPU_PARTIAL
int cpu_partial; /* Number of per cpu partial objects to keep around */
+#endif
struct kmem_cache_order_objects oo;

/* Allocation and freeing of slabs */
diff --git a/mm/slub.c b/mm/slub.c
index fde499b6dad8..94978f27882a 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1829,7 +1829,10 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
stat(s, CPU_PARTIAL_NODE);
}
if (!kmem_cache_has_cpu_partial(s)
- || available > s->cpu_partial / 2)
+#ifdef CONFIG_SLUB_CPU_PARTIAL
+ || available > s->cpu_partial / 2
+#endif
+ )
break;

}
@@ -3418,6 +3421,39 @@ static void set_min_partial(struct kmem_cache *s, unsigned long min)
s->min_partial = min;
}

+static void set_cpu_partial(struct kmem_cache *s)
+{
+#ifdef CONFIG_SLUB_CPU_PARTIAL
+ /*
+ * cpu_partial determined the maximum number of objects kept in the
+ * per cpu partial lists of a processor.
+ *
+ * Per cpu partial lists mainly contain slabs that just have one
+ * object freed. If they are used for allocation then they can be
+ * filled up again with minimal effort. The slab will never hit the
+ * per node partial lists and therefore no locking will be required.
+ *
+ * This setting also determines
+ *
+ * A) The number of objects from per cpu partial slabs dumped to the
+ * per node list when we reach the limit.
+ * B) The number of objects in cpu partial slabs to extract from the
+ * per node list when we run out of per cpu objects. We only fetch
+ * 50% to keep some capacity around for frees.
+ */
+ if (!kmem_cache_has_cpu_partial(s))
+ s->cpu_partial = 0;
+ else if (s->size >= PAGE_SIZE)
+ s->cpu_partial = 2;
+ else if (s->size >= 1024)
+ s->cpu_partial = 6;
+ else if (s->size >= 256)
+ s->cpu_partial = 13;
+ else
+ s->cpu_partial = 30;
+#endif
+}
+
/*
* calculate_sizes() determines the order and the distribution of data within
* a slab object.
@@ -3576,33 +3612,7 @@ static int kmem_cache_open(struct kmem_cache *s, unsigned long flags)
*/
set_min_partial(s, ilog2(s->size) / 2);

- /*
- * cpu_partial determined the maximum number of objects kept in the
- * per cpu partial lists of a processor.
- *
- * Per cpu partial lists mainly contain slabs that just have one
- * object freed. If they are used for allocation then they can be
- * filled up again with minimal effort. The slab will never hit the
- * per node partial lists and therefore no locking will be required.
- *
- * This setting also determines
- *
- * A) The number of objects from per cpu partial slabs dumped to the
- * per node list when we reach the limit.
- * B) The number of objects in cpu partial slabs to extract from the
- * per node list when we run out of per cpu objects. We only fetch
- * 50% to keep some capacity around for frees.
- */
- if (!kmem_cache_has_cpu_partial(s))
- s->cpu_partial = 0;
- else if (s->size >= PAGE_SIZE)
- s->cpu_partial = 2;
- else if (s->size >= 1024)
- s->cpu_partial = 6;
- else if (s->size >= 256)
- s->cpu_partial = 13;
- else
- s->cpu_partial = 30;
+ set_cpu_partial(s);

#ifdef CONFIG_NUMA
s->remote_node_defrag_ratio = 1000;
@@ -3989,7 +3999,9 @@ void __kmemcg_cache_deactivate(struct kmem_cache *s)
* Disable empty slabs caching. Used to avoid pinning offline
* memory cgroups by kmem pages that can be freed.
*/
+#ifdef CONFIG_SLUB_CPU_PARTIAL
s->cpu_partial = 0;
+#endif
s->min_partial = 0;

/*
@@ -4929,6 +4941,7 @@ static ssize_t min_partial_store(struct kmem_cache *s, const char *buf,
}
SLAB_ATTR(min_partial);

+#ifdef CONFIG_SLUB_CPU_PARTIAL
static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf)
{
return sprintf(buf, "%u\n", s->cpu_partial);
@@ -4951,6 +4964,7 @@ static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
return length;
}
SLAB_ATTR(cpu_partial);
+#endif

static ssize_t ctor_show(struct kmem_cache *s, char *buf)
{
@@ -5363,7 +5377,9 @@ static struct attribute *slab_attrs[] = {
&objs_per_slab_attr.attr,
&order_attr.attr,
&min_partial_attr.attr,
+#ifdef CONFIG_SLUB_CPU_PARTIAL
&cpu_partial_attr.attr,
+#endif
&objects_attr.attr,
&objects_partial_attr.attr,
&partial_attr.attr,
--
2.11.0

Subject: Re: [PATCH 0/3] try to save some memory for kmem_cache in some cases

On Sun, 30 Apr 2017, Wei Yang wrote:

> kmem_cache is a frequently used data in kernel. During the code reading, I
> found maybe we could save some space in some cases.
>
> 1. On 64bit arch, type int will occupy a word if it doesn't sit well.
> 2. cpu_slab->partial is just used when CONFIG_SLUB_CPU_PARTIAL is set
> 3. cpu_partial is just used when CONFIG_SLUB_CPU_PARTIAL is set, while just
> save some space on 32bit arch.

This looks fine. But do we really want to add that amount of ifdeffery?
How much memory does this save?

2017-05-01 02:41:16

by Matthew Wilcox

[permalink] [raw]
Subject: Re: [PATCH 2/3] mm/slub: wrap cpu_slab->partial in CONFIG_SLUB_CPU_PARTIAL

On Sun, Apr 30, 2017 at 07:31:51PM +0800, Wei Yang wrote:
> @@ -2302,7 +2302,11 @@ static bool has_cpu_slab(int cpu, void *info)
> struct kmem_cache *s = info;
> struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
>
> - return c->page || c->partial;
> + return c->page
> +#ifdef CONFIG_SLUB_CPU_PARTIAL
> + || c->partial
> +#endif
> + ;
> }

No. No way. This is disgusting.

The right way to do this is to create an accessor like this:

#ifdef CONFIG_SLUB_CPU_PARTIAL
#define slub_cpu_partial(c) ((c)->partial)
#else
#define slub_cpu_partial(c) 0
#endif

And then the above becomes:

- return c->page || c->partial;
+ return c->page || slub_cpu_partial(c);

All the other ifdefs go away, apart from these two:

> @@ -4980,6 +4990,7 @@ static ssize_t objects_partial_show(struct kmem_cache *s, char *buf)
> }
> SLAB_ATTR_RO(objects_partial);
>
> +#ifdef CONFIG_SLUB_CPU_PARTIAL
> static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
> {
> int objects = 0;
> @@ -5010,6 +5021,7 @@ static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
> return len + sprintf(buf + len, "\n");
> }
> SLAB_ATTR_RO(slabs_cpu_partial);
> +#endif
>
> static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)
> {
> @@ -5364,7 +5376,9 @@ static struct attribute *slab_attrs[] = {
> &destroy_by_rcu_attr.attr,
> &shrink_attr.attr,
> &reserved_attr.attr,
> +#ifdef CONFIG_SLUB_CPU_PARTIAL
> &slabs_cpu_partial_attr.attr,
> +#endif
> #ifdef CONFIG_SLUB_DEBUG
> &total_objects_attr.attr,
> &slabs_attr.attr,