2020-05-27 09:58:14

by Christoph Hellwig

[permalink] [raw]
Subject: block I/O accounting improvements v2

Hi Jens,

they series contains various improvement for block I/O accounting. The
first bunch of patches switch the bio based drivers to better accounting
helpers compared to the current mess. The end contains a fix and various
performanc improvements. Most of this comes from a series Konstantin
sent a few weeks ago, rebased on changes that landed in your tree since
and my change to always use the percpu version of the disk stats.


Changes since v1:
- add an ifdef CONFIG_BLOCK to work around the sad state of our headers
- add reviewed-by tags to all patches


2020-05-27 10:18:54

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 11/16] block: always use a percpu variable for disk stats

percpu variables have a perfectly fine working stub implementation
for UP kernels, so use that.

Signed-off-by: Christoph Hellwig <[email protected]>
Reviewed-by: Konstantin Khlebnikov <[email protected]>
---
block/blk.h | 2 +-
block/genhd.c | 12 +++------
block/partitions/core.c | 5 ++--
include/linux/genhd.h | 13 ---------
include/linux/part_stat.h | 55 ++++++++-------------------------------
5 files changed, 18 insertions(+), 69 deletions(-)

diff --git a/block/blk.h b/block/blk.h
index bdf5e94467aa2..0ecba2ab383d6 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -378,7 +378,7 @@ static inline void hd_struct_put(struct hd_struct *part)

static inline void hd_free_part(struct hd_struct *part)
{
- free_part_stats(part);
+ free_percpu(part->dkstats);
kfree(part->info);
percpu_ref_exit(&part->ref);
}
diff --git a/block/genhd.c b/block/genhd.c
index 094ed90964964..3e7df0a3e6bb0 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -92,7 +92,6 @@ const char *bdevname(struct block_device *bdev, char *buf)
}
EXPORT_SYMBOL(bdevname);

-#ifdef CONFIG_SMP
static void part_stat_read_all(struct hd_struct *part, struct disk_stats *stat)
{
int cpu;
@@ -112,12 +111,6 @@ static void part_stat_read_all(struct hd_struct *part, struct disk_stats *stat)
stat->io_ticks += ptr->io_ticks;
}
}
-#else /* CONFIG_SMP */
-static void part_stat_read_all(struct hd_struct *part, struct disk_stats *stat)
-{
- memcpy(stat, &part->dkstats, sizeof(struct disk_stats));
-}
-#endif /* CONFIG_SMP */

static unsigned int part_in_flight(struct request_queue *q,
struct hd_struct *part)
@@ -1688,14 +1681,15 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)

disk = kzalloc_node(sizeof(struct gendisk), GFP_KERNEL, node_id);
if (disk) {
- if (!init_part_stats(&disk->part0)) {
+ disk->part0.dkstats = alloc_percpu(struct disk_stats);
+ if (!disk->part0.dkstats) {
kfree(disk);
return NULL;
}
init_rwsem(&disk->lookup_sem);
disk->node_id = node_id;
if (disk_expand_part_tbl(disk, 0)) {
- free_part_stats(&disk->part0);
+ free_percpu(disk->part0.dkstats);
kfree(disk);
return NULL;
}
diff --git a/block/partitions/core.c b/block/partitions/core.c
index 297004fd22648..78951e33b2d7c 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -387,7 +387,8 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno,
if (!p)
return ERR_PTR(-EBUSY);

- if (!init_part_stats(p)) {
+ p->dkstats = alloc_percpu(struct disk_stats);
+ if (!p->dkstats) {
err = -ENOMEM;
goto out_free;
}
@@ -468,7 +469,7 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno,
out_free_info:
kfree(p->info);
out_free_stats:
- free_part_stats(p);
+ free_percpu(p->dkstats);
out_free:
kfree(p);
return ERR_PTR(err);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index a9384449465a3..f0d6d77309a54 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -39,15 +39,6 @@ extern struct class block_class;
#include <linux/fs.h>
#include <linux/workqueue.h>

-struct disk_stats {
- u64 nsecs[NR_STAT_GROUPS];
- unsigned long sectors[NR_STAT_GROUPS];
- unsigned long ios[NR_STAT_GROUPS];
- unsigned long merges[NR_STAT_GROUPS];
- unsigned long io_ticks;
- local_t in_flight[2];
-};
-
#define PARTITION_META_INFO_VOLNAMELTH 64
/*
* Enough for the string representation of any kind of UUID plus NULL.
@@ -72,11 +63,7 @@ struct hd_struct {
seqcount_t nr_sects_seq;
#endif
unsigned long stamp;
-#ifdef CONFIG_SMP
struct disk_stats __percpu *dkstats;
-#else
- struct disk_stats dkstats;
-#endif
struct percpu_ref ref;

sector_t alignment_offset;
diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h
index ece607607a864..6644197980b92 100644
--- a/include/linux/part_stat.h
+++ b/include/linux/part_stat.h
@@ -4,19 +4,23 @@

#include <linux/genhd.h>

+struct disk_stats {
+ u64 nsecs[NR_STAT_GROUPS];
+ unsigned long sectors[NR_STAT_GROUPS];
+ unsigned long ios[NR_STAT_GROUPS];
+ unsigned long merges[NR_STAT_GROUPS];
+ unsigned long io_ticks;
+ local_t in_flight[2];
+};
+
/*
* Macros to operate on percpu disk statistics:
*
- * {disk|part|all}_stat_{add|sub|inc|dec}() modify the stat counters
- * and should be called between disk_stat_lock() and
- * disk_stat_unlock().
+ * {disk|part|all}_stat_{add|sub|inc|dec}() modify the stat counters and should
+ * be called between disk_stat_lock() and disk_stat_unlock().
*
* part_stat_read() can be called at any time.
- *
- * part_stat_{add|set_all}() and {init|free}_part_stats are for
- * internal use only.
*/
-#ifdef CONFIG_SMP
#define part_stat_lock() ({ rcu_read_lock(); get_cpu(); })
#define part_stat_unlock() do { put_cpu(); rcu_read_unlock(); } while (0)

@@ -44,43 +48,6 @@ static inline void part_stat_set_all(struct hd_struct *part, int value)
sizeof(struct disk_stats));
}

-static inline int init_part_stats(struct hd_struct *part)
-{
- part->dkstats = alloc_percpu(struct disk_stats);
- if (!part->dkstats)
- return 0;
- return 1;
-}
-
-static inline void free_part_stats(struct hd_struct *part)
-{
- free_percpu(part->dkstats);
-}
-
-#else /* !CONFIG_SMP */
-#define part_stat_lock() ({ rcu_read_lock(); 0; })
-#define part_stat_unlock() rcu_read_unlock()
-
-#define part_stat_get(part, field) ((part)->dkstats.field)
-#define part_stat_get_cpu(part, field, cpu) part_stat_get(part, field)
-#define part_stat_read(part, field) part_stat_get(part, field)
-
-static inline void part_stat_set_all(struct hd_struct *part, int value)
-{
- memset(&part->dkstats, value, sizeof(struct disk_stats));
-}
-
-static inline int init_part_stats(struct hd_struct *part)
-{
- return 1;
-}
-
-static inline void free_part_stats(struct hd_struct *part)
-{
-}
-
-#endif /* CONFIG_SMP */
-
#define part_stat_read_accum(part, field) \
(part_stat_read(part, field[STAT_READ]) + \
part_stat_read(part, field[STAT_WRITE]) + \
--
2.26.2

2020-05-27 10:24:53

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 10/16] block: move update_io_ticks to blk-core.c

All callers are in blk-core.c, so move update_io_ticks over.

Signed-off-by: Christoph Hellwig <[email protected]>
Reviewed-by: Konstantin Khlebnikov <[email protected]>
---
block/bio.c | 16 ----------------
block/blk-core.c | 15 +++++++++++++++
block/blk.h | 1 -
3 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index 3e89c7b37855a..5235da6434aab 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1376,22 +1376,6 @@ void bio_check_pages_dirty(struct bio *bio)
schedule_work(&bio_dirty_work);
}

-void update_io_ticks(struct hd_struct *part, unsigned long now, bool end)
-{
- unsigned long stamp;
-again:
- stamp = READ_ONCE(part->stamp);
- if (unlikely(stamp != now)) {
- if (likely(cmpxchg(&part->stamp, stamp, now) == stamp)) {
- __part_stat_add(part, io_ticks, end ? now - stamp : 1);
- }
- }
- if (part->partno) {
- part = &part_to_disk(part)->part0;
- goto again;
- }
-}
-
static inline bool bio_remaining_done(struct bio *bio)
{
/*
diff --git a/block/blk-core.c b/block/blk-core.c
index 8973104f88d90..c1675d43c2da0 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1381,6 +1381,21 @@ unsigned int blk_rq_err_bytes(const struct request *rq)
}
EXPORT_SYMBOL_GPL(blk_rq_err_bytes);

+static void update_io_ticks(struct hd_struct *part, unsigned long now, bool end)
+{
+ unsigned long stamp;
+again:
+ stamp = READ_ONCE(part->stamp);
+ if (unlikely(stamp != now)) {
+ if (likely(cmpxchg(&part->stamp, stamp, now) == stamp))
+ __part_stat_add(part, io_ticks, end ? now - stamp : 1);
+ }
+ if (part->partno) {
+ part = &part_to_disk(part)->part0;
+ goto again;
+ }
+}
+
static void blk_account_io_completion(struct request *req, unsigned int bytes)
{
if (req->part && blk_do_io_stat(req)) {
diff --git a/block/blk.h b/block/blk.h
index 5db4ec1e85f7b..bdf5e94467aa2 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -344,7 +344,6 @@ void blk_queue_free_zone_bitmaps(struct request_queue *q);
static inline void blk_queue_free_zone_bitmaps(struct request_queue *q) {}
#endif

-void update_io_ticks(struct hd_struct *part, unsigned long now, bool end);
struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector);

int blk_alloc_devt(struct hd_struct *part, dev_t *devt);
--
2.26.2

2020-05-27 15:43:23

by Jens Axboe

[permalink] [raw]
Subject: Re: block I/O accounting improvements v2

On 5/26/20 11:24 PM, Christoph Hellwig wrote:
> Hi Jens,
>
> they series contains various improvement for block I/O accounting. The
> first bunch of patches switch the bio based drivers to better accounting
> helpers compared to the current mess. The end contains a fix and various
> performanc improvements. Most of this comes from a series Konstantin
> sent a few weeks ago, rebased on changes that landed in your tree since
> and my change to always use the percpu version of the disk stats.

Applied, thanks.

--
Jens Axboe