Hi,
These patches support to export percpu_ida allocation/free information
via sysfs, so that percpu_ida performance can be monitored, and
at least two use cases:
- some parameters(such as percpu_max_size) from its users are very
sensitive to performance
- the data is helpful for verifying patches which try to improve
percpu_ida
Thanks,
--
Ming Lei
So that we can export some allocation/free information
for monitoring percpu_ida performance.
Signed-off-by: Ming Lei <[email protected]>
---
include/linux/percpu_ida.h | 16 ++++++++++++++++
lib/percpu_ida.c | 21 ++++++++++++++++++---
2 files changed, 34 insertions(+), 3 deletions(-)
diff --git a/include/linux/percpu_ida.h b/include/linux/percpu_ida.h
index f5cfdd6..463e3b3 100644
--- a/include/linux/percpu_ida.h
+++ b/include/linux/percpu_ida.h
@@ -8,6 +8,7 @@
#include <linux/spinlock_types.h>
#include <linux/wait.h>
#include <linux/cpumask.h>
+#include <linux/kobject.h>
struct percpu_ida_cpu;
@@ -52,6 +53,8 @@ struct percpu_ida {
unsigned nr_free;
unsigned *freelist;
} ____cacheline_aligned_in_smp;
+
+ struct kobject kobj;
};
/*
@@ -79,4 +82,17 @@ int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn,
void *data);
unsigned percpu_ida_free_tags(struct percpu_ida *pool, int cpu);
+
+static inline int percpu_ida_kobject_add(struct percpu_ida *pool,
+ struct kobject *parent, const char *name)
+{
+ if (pool->kobj.state_initialized)
+ return kobject_add(&pool->kobj, parent, name);
+ return 0;
+}
+static inline void percpu_ida_kobject_del(struct percpu_ida *pool)
+{
+ if (pool->kobj.state_in_sysfs)
+ kobject_del(&pool->kobj);
+}
#endif /* __PERCPU_IDA_H__ */
diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c
index 93d145e..56ae350 100644
--- a/lib/percpu_ida.c
+++ b/lib/percpu_ida.c
@@ -260,6 +260,20 @@ void percpu_ida_free(struct percpu_ida *pool, unsigned tag)
}
EXPORT_SYMBOL_GPL(percpu_ida_free);
+static void percpu_ida_release(struct kobject *kobj)
+{
+ struct percpu_ida *pool = container_of(kobj,
+ struct percpu_ida, kobj);
+
+ free_percpu(pool->tag_cpu);
+ free_pages((unsigned long) pool->freelist,
+ get_order(pool->nr_tags * sizeof(unsigned)));
+}
+
+static struct kobj_type percpu_ida_ktype = {
+ .release = percpu_ida_release,
+};
+
/**
* percpu_ida_destroy - release a tag pool's resources
* @pool: pool to free
@@ -268,9 +282,8 @@ EXPORT_SYMBOL_GPL(percpu_ida_free);
*/
void percpu_ida_destroy(struct percpu_ida *pool)
{
- free_percpu(pool->tag_cpu);
- free_pages((unsigned long) pool->freelist,
- get_order(pool->nr_tags * sizeof(unsigned)));
+ if (pool->kobj.state_initialized)
+ kobject_put(&pool->kobj);
}
EXPORT_SYMBOL_GPL(percpu_ida_destroy);
@@ -324,6 +337,8 @@ int __percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags,
for_each_possible_cpu(cpu)
spin_lock_init(&per_cpu_ptr(pool->tag_cpu, cpu)->lock);
+ kobject_init(&pool->kobj, &percpu_ida_ktype);
+
return 0;
err:
percpu_ida_destroy(pool);
--
1.7.9.5
With this information, it is easy to monitor percpu_ida
performance.
Signed-off-by: Ming Lei <[email protected]>
---
include/linux/percpu_ida.h | 24 ++++++++
lib/Kconfig | 7 +++
lib/percpu_ida.c | 130 +++++++++++++++++++++++++++++++++++++++++++-
3 files changed, 159 insertions(+), 2 deletions(-)
diff --git a/include/linux/percpu_ida.h b/include/linux/percpu_ida.h
index 463e3b3..be1036d 100644
--- a/include/linux/percpu_ida.h
+++ b/include/linux/percpu_ida.h
@@ -12,6 +12,27 @@
struct percpu_ida_cpu;
+#ifdef CONFIG_PERCPU_IDA_STATS
+struct percpu_ida_stats {
+ u64 alloc_tags;
+ u64 alloc_in_fastpath;
+ u64 alloc_from_global_pool;
+ u64 alloc_by_stealing;
+ u64 alloc_after_sched;
+
+ u64 freed_tags;
+ u64 freed_empty;
+ u64 freed_full;
+};
+
+#define percpu_ida_inc(pool, ptr) \
+do { \
+ __this_cpu_inc(pool->stats->ptr); \
+} while (0)
+#else
+#define percpu_ida_inc(pool, ptr) do {} while (0)
+#endif
+
struct percpu_ida {
/*
* number of tags available to be allocated, as passed to
@@ -55,6 +76,9 @@ struct percpu_ida {
} ____cacheline_aligned_in_smp;
struct kobject kobj;
+#ifdef CONFIG_PERCPU_IDA_STATS
+ struct percpu_ida_stats __percpu *stats;
+#endif
};
/*
diff --git a/lib/Kconfig b/lib/Kconfig
index 325a8d4..d47a1cf 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -476,6 +476,13 @@ config OID_REGISTRY
help
Enable fast lookup object identifier registry.
+config PERCPU_IDA_STATS
+ bool "Export percpu_ida status by sysfs"
+ default n
+ help
+ Export percpu_ida allocation/free information so
+ the performance can be monitored.
+
config UCS2_STRING
tristate
diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c
index 56ae350..6f6c68d 100644
--- a/lib/percpu_ida.c
+++ b/lib/percpu_ida.c
@@ -42,6 +42,105 @@ struct percpu_ida_cpu {
unsigned freelist[];
};
+#ifdef CONFIG_PERCPU_IDA_STATS
+struct pcpu_ida_sysfs_entry {
+ struct attribute attr;
+ ssize_t (*show)(struct percpu_ida *, char *);
+};
+
+#define pcpu_ida_show(field, fmt) \
+static ssize_t field##_show(struct percpu_ida *pool, char *buf) \
+{ \
+ u64 val = 0; \
+ ssize_t rc; \
+ unsigned cpu; \
+ \
+ for_each_possible_cpu(cpu) \
+ val += per_cpu_ptr(pool->stats, cpu)->field; \
+ \
+ rc = sprintf(buf, fmt, val); \
+ return rc; \
+}
+
+#define PERCPU_IDA_ATTR_RO(_name) \
+ struct pcpu_ida_sysfs_entry pcpu_ida_attr_##_name = __ATTR_RO(_name)
+
+#define pcpu_ida_attr_ro(field, fmt) \
+ pcpu_ida_show(field, fmt) \
+ static PERCPU_IDA_ATTR_RO(field)
+
+pcpu_ida_attr_ro(alloc_tags, "%lld\n");
+pcpu_ida_attr_ro(alloc_in_fastpath, "%lld\n");
+pcpu_ida_attr_ro(alloc_from_global_pool, "%lld\n");
+pcpu_ida_attr_ro(alloc_by_stealing, "%lld\n");
+pcpu_ida_attr_ro(alloc_after_sched, "%lld\n");
+pcpu_ida_attr_ro(freed_tags, "%lld\n");
+pcpu_ida_attr_ro(freed_empty, "%lld\n");
+pcpu_ida_attr_ro(freed_full, "%lld\n");
+
+ssize_t pcpu_ida_sysfs_max_size_show(struct percpu_ida *pool, char *page)
+{
+ ssize_t rc = sprintf(page, "%u\n", pool->percpu_max_size);
+ return rc;
+}
+
+static struct pcpu_ida_sysfs_entry pcpu_ida_attr_max_size = {
+ .attr = {.name = "percpu_max_size", .mode = S_IRUGO},
+ .show = pcpu_ida_sysfs_max_size_show,
+};
+
+ssize_t pcpu_ida_sysfs_batch_size_show(struct percpu_ida *pool, char *page)
+{
+ ssize_t rc = sprintf(page, "%u\n", pool->percpu_batch_size);
+ return rc;
+}
+
+static struct pcpu_ida_sysfs_entry pcpu_ida_attr_batch_size = {
+ .attr = {.name = "percpu_batch_size", .mode = S_IRUGO},
+ .show = pcpu_ida_sysfs_batch_size_show,
+};
+
+static ssize_t percpu_ida_sysfs_show(struct kobject *kobj,
+ struct attribute *attr, char *page)
+{
+ struct pcpu_ida_sysfs_entry *entry;
+ struct percpu_ida *pool;
+ ssize_t res = -EIO;
+
+ entry = container_of(attr, struct pcpu_ida_sysfs_entry, attr);
+ pool = container_of(kobj, struct percpu_ida, kobj);
+
+ if (!entry->show)
+ return res;
+ res = entry->show(pool, page);
+ return res;
+}
+
+static struct attribute *percpu_ida_def_attrs[] = {
+ &pcpu_ida_attr_alloc_tags.attr,
+ &pcpu_ida_attr_alloc_in_fastpath.attr,
+ &pcpu_ida_attr_alloc_from_global_pool.attr,
+ &pcpu_ida_attr_alloc_by_stealing.attr,
+ &pcpu_ida_attr_alloc_after_sched.attr,
+ &pcpu_ida_attr_freed_tags.attr,
+ &pcpu_ida_attr_freed_empty.attr,
+ &pcpu_ida_attr_freed_full.attr,
+ &pcpu_ida_attr_max_size.attr,
+ &pcpu_ida_attr_batch_size.attr,
+ NULL,
+};
+#else
+static ssize_t percpu_ida_sysfs_show(struct kobject *kobj,
+ struct attribute *attr, char *page)
+{
+ return -EIO;
+}
+
+static struct attribute *percpu_ida_def_attrs[] = {
+ NULL,
+};
+#endif
+
static inline void move_tags(unsigned *dst, unsigned *dst_nr,
unsigned *src, unsigned *src_nr,
unsigned nr)
@@ -154,9 +253,12 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state)
local_irq_save(flags);
tags = this_cpu_ptr(pool->tag_cpu);
+ percpu_ida_inc(pool, alloc_tags);
+
/* Fastpath */
tag = alloc_local_tag(tags);
if (likely(tag >= 0)) {
+ percpu_ida_inc(pool, alloc_in_fastpath);
local_irq_restore(flags);
return tag;
}
@@ -174,10 +276,15 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state)
if (state != TASK_RUNNING)
prepare_to_wait(&pool->wait, &wait, state);
- if (!tags->nr_free)
+ if (!tags->nr_free) {
+ percpu_ida_inc(pool, alloc_from_global_pool);
alloc_global_tags(pool, tags);
- if (!tags->nr_free)
+ }
+
+ if (!tags->nr_free) {
steal_tags(pool, tags);
+ percpu_ida_inc(pool, alloc_by_stealing);
+ }
if (tags->nr_free) {
tag = tags->freelist[--tags->nr_free];
@@ -197,6 +304,7 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state)
break;
}
+ percpu_ida_inc(pool, alloc_after_sched);
schedule();
local_irq_save(flags);
@@ -227,6 +335,8 @@ void percpu_ida_free(struct percpu_ida *pool, unsigned tag)
local_irq_save(flags);
tags = this_cpu_ptr(pool->tag_cpu);
+ percpu_ida_inc(pool, freed_tags);
+
spin_lock(&tags->lock);
tags->freelist[tags->nr_free++] = tag;
@@ -234,12 +344,14 @@ void percpu_ida_free(struct percpu_ida *pool, unsigned tag)
spin_unlock(&tags->lock);
if (nr_free == 1) {
+ percpu_ida_inc(pool, freed_empty);
cpumask_set_cpu(smp_processor_id(),
&pool->cpus_have_tags);
wake_up(&pool->wait);
}
if (nr_free == pool->percpu_max_size) {
+ percpu_ida_inc(pool, freed_full);
spin_lock(&pool->lock);
/*
@@ -265,13 +377,22 @@ static void percpu_ida_release(struct kobject *kobj)
struct percpu_ida *pool = container_of(kobj,
struct percpu_ida, kobj);
+#ifdef CONFIG_PERCPU_IDA_STATS
+ free_percpu(pool->stats);
+#endif
free_percpu(pool->tag_cpu);
free_pages((unsigned long) pool->freelist,
get_order(pool->nr_tags * sizeof(unsigned)));
}
+static struct sysfs_ops percpu_ida_sysfs_ops = {
+ .show = percpu_ida_sysfs_show,
+};
+
static struct kobj_type percpu_ida_ktype = {
+ .sysfs_ops = &percpu_ida_sysfs_ops,
.release = percpu_ida_release,
+ .default_attrs = percpu_ida_def_attrs,
};
/**
@@ -339,6 +460,11 @@ int __percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags,
kobject_init(&pool->kobj, &percpu_ida_ktype);
+#ifdef CONFIG_PERCPU_IDA_STATS
+ pool->stats = alloc_percpu(struct percpu_ida_stats);
+ if (!pool->stats)
+ goto err;
+#endif
return 0;
err:
percpu_ida_destroy(pool);
--
1.7.9.5
So that the percpu_ida performance can be monitored.
Signed-off-by: Ming Lei <[email protected]>
---
block/blk-mq-sysfs.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index 8145b5b..4171ae2 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -329,6 +329,8 @@ void blk_mq_unregister_disk(struct gendisk *disk)
kobject_del(&ctx->kobj);
kobject_put(&ctx->kobj);
}
+ percpu_ida_kobject_del(&hctx->tags->free_tags);
+ percpu_ida_kobject_del(&hctx->tags->reserved_tags);
kobject_del(&hctx->kobj);
kobject_put(&hctx->kobj);
}
@@ -362,6 +364,11 @@ int blk_mq_register_disk(struct gendisk *disk)
if (ret)
break;
+ percpu_ida_kobject_add(&hctx->tags->free_tags,
+ &hctx->kobj, "free_tags");
+ percpu_ida_kobject_add(&hctx->tags->reserved_tags,
+ &hctx->kobj, "reserved_tags");
+
if (!hctx->nr_ctx)
continue;
--
1.7.9.5