kmem_cache is a frequently used data in kernel. During the code reading, I
found maybe we could save some space in some cases.
1. On 64bit arch, type int will occupy a word if it doesn't sit well.
2. cpu_slab->partial is just used when CONFIG_SLUB_CPU_PARTIAL is set
3. cpu_partial is just used when CONFIG_SLUB_CPU_PARTIAL is set, while just
save some space on 32bit arch.
Wei Yang (3):
mm/slub: pack red_left_pad with another int to save a word
mm/slub: wrap cpu_slab->partial in CONFIG_SLUB_CPU_PARTIAL
mm/slub: wrap kmem_cache->cpu_partial in config
CONFIG_SLUB_CPU_PARTIAL
include/linux/slub_def.h | 6 +++-
mm/slub.c | 88 ++++++++++++++++++++++++++++++++----------------
2 files changed, 64 insertions(+), 30 deletions(-)
--
2.11.0
On 64bit arch, struct is 8-bytes aligned, so int will occupy a word if it
doesn't sits well.
This patch pack red_left_pad with reserved to save 8 bytes for struct
kmem_cache on a 64bit arch.
Signed-off-by: Wei Yang <[email protected]>
---
include/linux/slub_def.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 07ef550c6627..ec13aab32647 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -79,9 +79,9 @@ struct kmem_cache {
int inuse; /* Offset to metadata */
int align; /* Alignment */
int reserved; /* Reserved bytes at the end of slabs */
+ int red_left_pad; /* Left redzone padding size */
const char *name; /* Name (only for display!) */
struct list_head list; /* List of slab caches */
- int red_left_pad; /* Left redzone padding size */
#ifdef CONFIG_SYSFS
struct kobject kobj; /* For sysfs */
#endif
--
2.11.0
cpu_slab's field partial is used when CONFIG_SLUB_CPU_PARTIAL is set, which
means we can save a pointer's space on each cpu for every slub item.
This patch wrap cpu_slab->partial in CONFIG_SLUB_CPU_PARTIAL and wrap its
sysfs too.
Signed-off-by: Wei Yang <[email protected]>
---
include/linux/slub_def.h | 2 ++
mm/slub.c | 16 +++++++++++++++-
2 files changed, 17 insertions(+), 1 deletion(-)
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index ec13aab32647..0debd8df1a7d 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -41,7 +41,9 @@ struct kmem_cache_cpu {
void **freelist; /* Pointer to next available object */
unsigned long tid; /* Globally unique transaction id */
struct page *page; /* The slab from which we are allocating */
+#ifdef CONFIG_SLUB_CPU_PARTIAL
struct page *partial; /* Partially allocated frozen slabs */
+#endif
#ifdef CONFIG_SLUB_STATS
unsigned stat[NR_SLUB_STAT_ITEMS];
#endif
diff --git a/mm/slub.c b/mm/slub.c
index 7f4bc7027ed5..fde499b6dad8 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2302,7 +2302,11 @@ static bool has_cpu_slab(int cpu, void *info)
struct kmem_cache *s = info;
struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
- return c->page || c->partial;
+ return c->page
+#ifdef CONFIG_SLUB_CPU_PARTIAL
+ || c->partial
+#endif
+ ;
}
static void flush_all(struct kmem_cache *s)
@@ -2511,7 +2515,9 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
page = c->page;
if (!page)
goto new_slab;
+#ifdef CONFIG_SLUB_CPU_PARTIAL
redo:
+#endif
if (unlikely(!node_match(page, node))) {
int searchnode = node;
@@ -2568,6 +2574,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
new_slab:
+#ifdef CONFIG_SLUB_CPU_PARTIAL
if (c->partial) {
page = c->page = c->partial;
c->partial = page->next;
@@ -2575,6 +2582,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
c->freelist = NULL;
goto redo;
}
+#endif
freelist = new_slab_objects(s, gfpflags, node, &c);
@@ -4760,6 +4768,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
total += x;
nodes[node] += x;
+#ifdef CONFIG_SLUB_CPU_PARTIAL
page = READ_ONCE(c->partial);
if (page) {
node = page_to_nid(page);
@@ -4772,6 +4781,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
total += x;
nodes[node] += x;
}
+#endif
}
}
@@ -4980,6 +4990,7 @@ static ssize_t objects_partial_show(struct kmem_cache *s, char *buf)
}
SLAB_ATTR_RO(objects_partial);
+#ifdef CONFIG_SLUB_CPU_PARTIAL
static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
{
int objects = 0;
@@ -5010,6 +5021,7 @@ static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
return len + sprintf(buf + len, "\n");
}
SLAB_ATTR_RO(slabs_cpu_partial);
+#endif
static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)
{
@@ -5364,7 +5376,9 @@ static struct attribute *slab_attrs[] = {
&destroy_by_rcu_attr.attr,
&shrink_attr.attr,
&reserved_attr.attr,
+#ifdef CONFIG_SLUB_CPU_PARTIAL
&slabs_cpu_partial_attr.attr,
+#endif
#ifdef CONFIG_SLUB_DEBUG
&total_objects_attr.attr,
&slabs_attr.attr,
--
2.11.0
kmem_cache->cpu_partial is just used when CONFIG_SLUB_CPU_PARTIAL is set,
so wrap it with config CONFIG_SLUB_CPU_PARTIAL will save some space
on 32bit arch.
This patch wrap kmem_cache->cpu_partial in config CONFIG_SLUB_CPU_PARTIAL
and wrap its sysfs too.
Signed-off-by: Wei Yang <[email protected]>
---
include/linux/slub_def.h | 2 ++
mm/slub.c | 72 +++++++++++++++++++++++++++++-------------------
2 files changed, 46 insertions(+), 28 deletions(-)
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 0debd8df1a7d..477ab99800ed 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -69,7 +69,9 @@ struct kmem_cache {
int size; /* The size of an object including meta data */
int object_size; /* The size of an object without meta data */
int offset; /* Free pointer offset. */
+#ifdef CONFIG_SLUB_CPU_PARTIAL
int cpu_partial; /* Number of per cpu partial objects to keep around */
+#endif
struct kmem_cache_order_objects oo;
/* Allocation and freeing of slabs */
diff --git a/mm/slub.c b/mm/slub.c
index fde499b6dad8..94978f27882a 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1829,7 +1829,10 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
stat(s, CPU_PARTIAL_NODE);
}
if (!kmem_cache_has_cpu_partial(s)
- || available > s->cpu_partial / 2)
+#ifdef CONFIG_SLUB_CPU_PARTIAL
+ || available > s->cpu_partial / 2
+#endif
+ )
break;
}
@@ -3418,6 +3421,39 @@ static void set_min_partial(struct kmem_cache *s, unsigned long min)
s->min_partial = min;
}
+static void set_cpu_partial(struct kmem_cache *s)
+{
+#ifdef CONFIG_SLUB_CPU_PARTIAL
+ /*
+ * cpu_partial determined the maximum number of objects kept in the
+ * per cpu partial lists of a processor.
+ *
+ * Per cpu partial lists mainly contain slabs that just have one
+ * object freed. If they are used for allocation then they can be
+ * filled up again with minimal effort. The slab will never hit the
+ * per node partial lists and therefore no locking will be required.
+ *
+ * This setting also determines
+ *
+ * A) The number of objects from per cpu partial slabs dumped to the
+ * per node list when we reach the limit.
+ * B) The number of objects in cpu partial slabs to extract from the
+ * per node list when we run out of per cpu objects. We only fetch
+ * 50% to keep some capacity around for frees.
+ */
+ if (!kmem_cache_has_cpu_partial(s))
+ s->cpu_partial = 0;
+ else if (s->size >= PAGE_SIZE)
+ s->cpu_partial = 2;
+ else if (s->size >= 1024)
+ s->cpu_partial = 6;
+ else if (s->size >= 256)
+ s->cpu_partial = 13;
+ else
+ s->cpu_partial = 30;
+#endif
+}
+
/*
* calculate_sizes() determines the order and the distribution of data within
* a slab object.
@@ -3576,33 +3612,7 @@ static int kmem_cache_open(struct kmem_cache *s, unsigned long flags)
*/
set_min_partial(s, ilog2(s->size) / 2);
- /*
- * cpu_partial determined the maximum number of objects kept in the
- * per cpu partial lists of a processor.
- *
- * Per cpu partial lists mainly contain slabs that just have one
- * object freed. If they are used for allocation then they can be
- * filled up again with minimal effort. The slab will never hit the
- * per node partial lists and therefore no locking will be required.
- *
- * This setting also determines
- *
- * A) The number of objects from per cpu partial slabs dumped to the
- * per node list when we reach the limit.
- * B) The number of objects in cpu partial slabs to extract from the
- * per node list when we run out of per cpu objects. We only fetch
- * 50% to keep some capacity around for frees.
- */
- if (!kmem_cache_has_cpu_partial(s))
- s->cpu_partial = 0;
- else if (s->size >= PAGE_SIZE)
- s->cpu_partial = 2;
- else if (s->size >= 1024)
- s->cpu_partial = 6;
- else if (s->size >= 256)
- s->cpu_partial = 13;
- else
- s->cpu_partial = 30;
+ set_cpu_partial(s);
#ifdef CONFIG_NUMA
s->remote_node_defrag_ratio = 1000;
@@ -3989,7 +3999,9 @@ void __kmemcg_cache_deactivate(struct kmem_cache *s)
* Disable empty slabs caching. Used to avoid pinning offline
* memory cgroups by kmem pages that can be freed.
*/
+#ifdef CONFIG_SLUB_CPU_PARTIAL
s->cpu_partial = 0;
+#endif
s->min_partial = 0;
/*
@@ -4929,6 +4941,7 @@ static ssize_t min_partial_store(struct kmem_cache *s, const char *buf,
}
SLAB_ATTR(min_partial);
+#ifdef CONFIG_SLUB_CPU_PARTIAL
static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf)
{
return sprintf(buf, "%u\n", s->cpu_partial);
@@ -4951,6 +4964,7 @@ static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
return length;
}
SLAB_ATTR(cpu_partial);
+#endif
static ssize_t ctor_show(struct kmem_cache *s, char *buf)
{
@@ -5363,7 +5377,9 @@ static struct attribute *slab_attrs[] = {
&objs_per_slab_attr.attr,
&order_attr.attr,
&min_partial_attr.attr,
+#ifdef CONFIG_SLUB_CPU_PARTIAL
&cpu_partial_attr.attr,
+#endif
&objects_attr.attr,
&objects_partial_attr.attr,
&partial_attr.attr,
--
2.11.0
On Sun, 30 Apr 2017, Wei Yang wrote:
> kmem_cache is a frequently used data in kernel. During the code reading, I
> found maybe we could save some space in some cases.
>
> 1. On 64bit arch, type int will occupy a word if it doesn't sit well.
> 2. cpu_slab->partial is just used when CONFIG_SLUB_CPU_PARTIAL is set
> 3. cpu_partial is just used when CONFIG_SLUB_CPU_PARTIAL is set, while just
> save some space on 32bit arch.
This looks fine. But do we really want to add that amount of ifdeffery?
How much memory does this save?
On Sun, Apr 30, 2017 at 07:31:51PM +0800, Wei Yang wrote:
> @@ -2302,7 +2302,11 @@ static bool has_cpu_slab(int cpu, void *info)
> struct kmem_cache *s = info;
> struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
>
> - return c->page || c->partial;
> + return c->page
> +#ifdef CONFIG_SLUB_CPU_PARTIAL
> + || c->partial
> +#endif
> + ;
> }
No. No way. This is disgusting.
The right way to do this is to create an accessor like this:
#ifdef CONFIG_SLUB_CPU_PARTIAL
#define slub_cpu_partial(c) ((c)->partial)
#else
#define slub_cpu_partial(c) 0
#endif
And then the above becomes:
- return c->page || c->partial;
+ return c->page || slub_cpu_partial(c);
All the other ifdefs go away, apart from these two:
> @@ -4980,6 +4990,7 @@ static ssize_t objects_partial_show(struct kmem_cache *s, char *buf)
> }
> SLAB_ATTR_RO(objects_partial);
>
> +#ifdef CONFIG_SLUB_CPU_PARTIAL
> static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
> {
> int objects = 0;
> @@ -5010,6 +5021,7 @@ static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
> return len + sprintf(buf + len, "\n");
> }
> SLAB_ATTR_RO(slabs_cpu_partial);
> +#endif
>
> static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)
> {
> @@ -5364,7 +5376,9 @@ static struct attribute *slab_attrs[] = {
> &destroy_by_rcu_attr.attr,
> &shrink_attr.attr,
> &reserved_attr.attr,
> +#ifdef CONFIG_SLUB_CPU_PARTIAL
> &slabs_cpu_partial_attr.attr,
> +#endif
> #ifdef CONFIG_SLUB_DEBUG
> &total_objects_attr.attr,
> &slabs_attr.attr,