2021-12-10 04:46:14

by Li Jinlin

[permalink] [raw]
Subject: [PATCH 0/3] Fix undefined behaviour during device synchronization

md/drbd drivers use 'signed int' variable to track sync vs non-sync IO,
and judge whether sync IO needs to be throttled by signed comparison.
If the value of the variable is greater than INT_MAX or close to
INT_MAX, some undefined behavior may occur.

Fix by using 64bit signed integer type.

The v2 "md: Fix undefined behaviour in is_mddev_idle" patch
differences to v1:
- add ubsan info in message
- use 64bit signed integer type instead of long type;
- move sync_io variable form struct gendisk to struct md_rdev, and
modify md_sync_acct() and md_sync_acct_bio() to fit for this change.

Li Jinlin (3):
md: Fix undefined behaviour in is_mddev_idle
drdb: Fix undefined behaviour in drbd_rs_c_min_rate_throttle
drdb: Remove useless variable in struct drbd_device

drivers/block/drbd/drbd_bitmap.c | 2 +-
drivers/block/drbd/drbd_int.h | 5 ++---
drivers/block/drbd/drbd_main.c | 3 +--
drivers/block/drbd/drbd_receiver.c | 12 ++++++------
drivers/block/drbd/drbd_state.c | 1 -
drivers/block/drbd/drbd_worker.c | 5 ++---
drivers/md/md.c | 6 +++---
drivers/md/md.h | 13 +++++++++----
drivers/md/raid1.c | 4 ++--
drivers/md/raid10.c | 24 ++++++++++++------------
drivers/md/raid5.c | 4 ++--
include/linux/genhd.h | 1 -
12 files changed, 40 insertions(+), 40 deletions(-)

--
2.27.0



2021-12-10 04:46:17

by Li Jinlin

[permalink] [raw]
Subject: [PATCH 2/3] drdb: Fix undefined behaviour in drbd_rs_c_min_rate_throttle

When the stat aacum of the disk is greater than INT_MAX, its
value becomes negative after casting to 'int', which may lead
to overflow after subtracting a positive number. In the same
way, when the value of rs_sect_ev is greater than INT_MAX,
overflow may also occur. These situations will lead to
undefined behavior.

Otherwise, if the stat accum of the disk is close to INT_MAX
when creating md, the initial value of rs_last_events would be
set close to INT_MAX when mddev initializes IO event counters.
'curr_events - device->rs_last_events > 64' will always false
during synchronization, which may cause resync is not throttled
even if the lower device is busy.

Fix by using atomic64_t type for rs_sect_ev, and using s64 type
for curr_events/last_events.

Signed-off-by: Li Jinlin <[email protected]>
---
drivers/block/drbd/drbd_bitmap.c | 2 +-
drivers/block/drbd/drbd_int.h | 4 ++--
drivers/block/drbd/drbd_main.c | 2 +-
drivers/block/drbd/drbd_receiver.c | 12 ++++++------
drivers/block/drbd/drbd_worker.c | 4 ++--
5 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index c1f816f896a8..d580f4071622 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -1021,7 +1021,7 @@ static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_ho
submit_bio(bio);
/* this should not count as user activity and cause the
* resync to throttle -- see drbd_rs_should_slow_down(). */
- atomic_add(len >> 9, &device->rs_sect_ev);
+ atomic64_add(len >> 9, &device->rs_sect_ev);
}
}

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index f27d5b0f9a0b..1b71adc07e83 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -954,9 +954,9 @@ struct drbd_device {
struct mutex *state_mutex; /* either own_state_mutex or first_peer_device(device)->connection->cstate_mutex */
char congestion_reason; /* Why we where congested... */
atomic_t rs_sect_in; /* for incoming resync data rate, SyncTarget */
- atomic_t rs_sect_ev; /* for submitted resync data rate, both */
+ atomic64_t rs_sect_ev; /* for submitted resync data rate, both */
int rs_last_sect_ev; /* counter to compare with */
- int rs_last_events; /* counter of read or write "events" (unit sectors)
+ s64 rs_last_events; /* counter of read or write "events" (unit sectors)
* on the lower level device when we last looked. */
int c_sync_rate; /* current resync rate after syncer throttle magic */
struct fifo_buffer *rs_plan_s; /* correction values of resync planer (RCU, connection->conn_update) */
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 53ba2dddba6e..ea057bd60541 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -1974,7 +1974,7 @@ void drbd_init_set_defaults(struct drbd_device *device)
atomic_set(&device->local_cnt, 0);
atomic_set(&device->pp_in_use_by_net, 0);
atomic_set(&device->rs_sect_in, 0);
- atomic_set(&device->rs_sect_ev, 0);
+ atomic64_set(&device->rs_sect_ev, 0);
atomic_set(&device->ap_in_flight, 0);
atomic_set(&device->md_io.in_use, 0);

diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 1f740e42e457..4b75ad3dd0cd 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -2106,7 +2106,7 @@ static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t secto
list_add_tail(&peer_req->w.list, &device->sync_ee);
spin_unlock_irq(&device->resource->req_lock);

- atomic_add(pi->size >> 9, &device->rs_sect_ev);
+ atomic64_add(pi->size >> 9, &device->rs_sect_ev);
if (drbd_submit_peer_request(device, peer_req, REQ_OP_WRITE, 0,
DRBD_FAULT_RS_WR) == 0)
return 0;
@@ -2792,7 +2792,7 @@ bool drbd_rs_c_min_rate_throttle(struct drbd_device *device)
struct gendisk *disk = device->ldev->backing_bdev->bd_disk;
unsigned long db, dt, dbdt;
unsigned int c_min_rate;
- int curr_events;
+ s64 curr_events;

rcu_read_lock();
c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate;
@@ -2802,8 +2802,8 @@ bool drbd_rs_c_min_rate_throttle(struct drbd_device *device)
if (c_min_rate == 0)
return false;

- curr_events = (int)part_stat_read_accum(disk->part0, sectors) -
- atomic_read(&device->rs_sect_ev);
+ curr_events = (s64)part_stat_read_accum(disk->part0, sectors) -
+ atomic64_read(&device->rs_sect_ev);

if (atomic_read(&device->ap_actlog_cnt)
|| curr_events - device->rs_last_events > 64) {
@@ -3023,7 +3023,7 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet
goto out_free_e;

submit_for_resync:
- atomic_add(size >> 9, &device->rs_sect_ev);
+ atomic64_add(size >> 9, &device->rs_sect_ev);

submit:
update_receiver_timing_details(connection, drbd_submit_peer_request);
@@ -5019,7 +5019,7 @@ static int receive_rs_deallocated(struct drbd_connection *connection, struct pac
list_add_tail(&peer_req->w.list, &device->sync_ee);
spin_unlock_irq(&device->resource->req_lock);

- atomic_add(pi->size >> 9, &device->rs_sect_ev);
+ atomic64_add(pi->size >> 9, &device->rs_sect_ev);
err = drbd_submit_peer_request(device, peer_req, op, 0, DRBD_FAULT_RS_WR);

if (err) {
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 64563bfdf0da..a4edd0a9c875 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -409,7 +409,7 @@ static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector,
list_add_tail(&peer_req->w.list, &device->read_ee);
spin_unlock_irq(&device->resource->req_lock);

- atomic_add(size >> 9, &device->rs_sect_ev);
+ atomic64_add(size >> 9, &device->rs_sect_ev);
if (drbd_submit_peer_request(device, peer_req, REQ_OP_READ, 0,
DRBD_FAULT_RS_RD) == 0)
return 0;
@@ -1679,7 +1679,7 @@ void drbd_rs_controller_reset(struct drbd_device *device)
struct fifo_buffer *plan;

atomic_set(&device->rs_sect_in, 0);
- atomic_set(&device->rs_sect_ev, 0);
+ atomic64_set(&device->rs_sect_ev, 0);
device->rs_in_flight = 0;
device->rs_last_events =
(int)part_stat_read_accum(disk->part0, sectors);
--
2.27.0


2021-12-10 04:46:21

by Li Jinlin

[permalink] [raw]
Subject: [PATCH v2 1/3] md: Fix undefined behaviour in is_mddev_idle

UBSAN reports this problem:

[ 5984.281385] UBSAN: Undefined behaviour in drivers/md/md.c:8175:15
[ 5984.281390] signed integer overflow:
[ 5984.281393] -2147483291 - 2072033152 cannot be represented in type 'int'
[ 5984.281400] CPU: 25 PID: 1854 Comm: md101_resync Kdump: loaded Not tainted 4.19.90
[ 5984.281404] Hardware name: Huawei TaiShan 200 (Model 5280)/BC82AMDDA
[ 5984.281406] Call trace:
[ 5984.281415] dump_backtrace+0x0/0x310
[ 5984.281418] show_stack+0x28/0x38
[ 5984.281425] dump_stack+0xec/0x15c
[ 5984.281430] ubsan_epilogue+0x18/0x84
[ 5984.281434] handle_overflow+0x14c/0x19c
[ 5984.281439] __ubsan_handle_sub_overflow+0x34/0x44
[ 5984.281445] is_mddev_idle+0x338/0x3d8
[ 5984.281449] md_do_sync+0x1bb8/0x1cf8
[ 5984.281452] md_thread+0x220/0x288
[ 5984.281457] kthread+0x1d8/0x1e0
[ 5984.281461] ret_from_fork+0x10/0x18

When the stat aacum of the disk is greater than INT_MAX, its
value becomes negative after casting to 'int', which may lead
to overflow after subtracting a positive number. In the same
way, when the value of sync_io is greater than INT_MAX,
overflow may also occur. These situations will lead to
undefined behavior.

Otherwise, if the stat accum of the disk is close to INT_MAX
when creating raid arrays, the initial value of last_events
would be set close to INT_MAX when mddev initializes IO
event counters. 'curr_events - rdev->last_events > 64' will
always false during synchronization. If all the disks of mddev
are in this case, is_mddev_idle() will always return 1, which
may cause non-sync IO is very slow.

To address these problems, need to use 64bit signed integer
type for sync_io, last_events, and curr_events.

In all the drivers that come with the kernel, the sync_io
variable in struct gendisk is only used for the md driver
currently. It should be more suitable in struct md_rdev, so
add the sync_io variable in struct md_rdev, and use it to
replace. We modify md_sync_acct() and md_sync_acct_bio()
to fit for this change as well. md_sync_acct_bio() need
access to the rdev, so we set bio->bi_bdev to rdev before
calling it, and reset bio->bi_bdev to bdev in this function.

Signed-off-by: Li Jinlin <[email protected]>
---
drivers/md/md.c | 6 +++---
drivers/md/md.h | 13 +++++++++----
drivers/md/raid1.c | 4 ++--
drivers/md/raid10.c | 24 ++++++++++++------------
drivers/md/raid5.c | 4 ++--
include/linux/genhd.h | 1 -
6 files changed, 28 insertions(+), 24 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 5111ed966947..f1b71a92801e 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -8429,14 +8429,14 @@ static int is_mddev_idle(struct mddev *mddev, int init)
{
struct md_rdev *rdev;
int idle;
- int curr_events;
+ s64 curr_events;

idle = 1;
rcu_read_lock();
rdev_for_each_rcu(rdev, mddev) {
struct gendisk *disk = rdev->bdev->bd_disk;
- curr_events = (int)part_stat_read_accum(disk->part0, sectors) -
- atomic_read(&disk->sync_io);
+ curr_events = (s64)part_stat_read_accum(disk->part0, sectors) -
+ atomic64_read(&rdev->sync_io);
/* sync IO will cause sync_io to increase before the disk_stats
* as sync_io is counted when a request starts, and
* disk_stats is counted when it completes.
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 53ea7a6961de..584e357e0940 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -50,7 +50,7 @@ struct md_rdev {

sector_t sectors; /* Device size (in 512bytes sectors) */
struct mddev *mddev; /* RAID array if running */
- int last_events; /* IO event timestamp */
+ s64 last_events; /* IO event timestamp */

/*
* If meta_bdev is non-NULL, it means that a separate device is
@@ -138,6 +138,8 @@ struct md_rdev {
unsigned int size; /* Size in sectors of the PPL space */
sector_t sector; /* First sector of the PPL space */
} ppl;
+
+ atomic64_t sync_io; /* counter of sync IO (unit sectors) */
};
enum flag_bits {
Faulty, /* device is known to have a fault */
@@ -549,14 +551,17 @@ static inline int mddev_trylock(struct mddev *mddev)
}
extern void mddev_unlock(struct mddev *mddev);

-static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sectors)
+static inline void md_sync_acct(struct md_rdev *rdev, unsigned long nr_sectors)
{
- atomic_add(nr_sectors, &bdev->bd_disk->sync_io);
+ atomic64_add(nr_sectors, &rdev->sync_io);
}

static inline void md_sync_acct_bio(struct bio *bio, unsigned long nr_sectors)
{
- md_sync_acct(bio->bi_bdev, nr_sectors);
+ struct md_rdev *rdev = (void *)bio->bi_bdev;
+
+ bio_set_dev(bio, rdev->bdev);
+ md_sync_acct(rdev, nr_sectors);
}

struct md_personality
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 7dc8026cf6ee..74c42dabe57c 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -2232,7 +2232,7 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio)

wbio->bi_end_io = end_sync_write;
atomic_inc(&r1_bio->remaining);
- md_sync_acct(conf->mirrors[i].rdev->bdev, bio_sectors(wbio));
+ md_sync_acct(conf->mirrors[i].rdev, bio_sectors(wbio));

submit_bio_noacct(wbio);
}
@@ -2791,7 +2791,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
if (rdev && bio->bi_end_io) {
atomic_inc(&rdev->nr_pending);
bio->bi_iter.bi_sector = sector_nr + rdev->data_offset;
- bio_set_dev(bio, rdev->bdev);
+ bio->bi_bdev = (void *)rdev;
if (test_bit(FailFast, &rdev->flags))
bio->bi_opf |= MD_FAILFAST;
}
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index dde98f65bd04..fc1e6c0996de 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -2407,7 +2407,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)

atomic_inc(&conf->mirrors[d].rdev->nr_pending);
atomic_inc(&r10_bio->remaining);
- md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(tbio));
+ md_sync_acct(conf->mirrors[d].rdev, bio_sectors(tbio));

if (test_bit(FailFast, &conf->mirrors[d].rdev->flags))
tbio->bi_opf |= MD_FAILFAST;
@@ -2430,7 +2430,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
bio_copy_data(tbio, fbio);
d = r10_bio->devs[i].devnum;
atomic_inc(&r10_bio->remaining);
- md_sync_acct(conf->mirrors[d].replacement->bdev,
+ md_sync_acct(conf->mirrors[d].replacement,
bio_sectors(tbio));
submit_bio_noacct(tbio);
}
@@ -2562,12 +2562,12 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
wbio2 = NULL;
if (wbio->bi_end_io) {
atomic_inc(&conf->mirrors[d].rdev->nr_pending);
- md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(wbio));
+ md_sync_acct(conf->mirrors[d].rdev, bio_sectors(wbio));
submit_bio_noacct(wbio);
}
if (wbio2) {
atomic_inc(&conf->mirrors[d].replacement->nr_pending);
- md_sync_acct(conf->mirrors[d].replacement->bdev,
+ md_sync_acct(conf->mirrors[d].replacement,
bio_sectors(wbio2));
submit_bio_noacct(wbio2);
}
@@ -3486,7 +3486,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
from_addr = r10_bio->devs[j].addr;
bio->bi_iter.bi_sector = from_addr +
rdev->data_offset;
- bio_set_dev(bio, rdev->bdev);
+ bio->bi_bdev = (void *)rdev;
atomic_inc(&rdev->nr_pending);
/* and we write to 'i' (if not in_sync) */

@@ -3508,7 +3508,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
bio->bi_iter.bi_sector = to_addr
+ mrdev->data_offset;
- bio_set_dev(bio, mrdev->bdev);
+ bio->bi_bdev = (void *)mrdev;
atomic_inc(&r10_bio->remaining);
} else
r10_bio->devs[1].bio->bi_end_io = NULL;
@@ -3529,7 +3529,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
bio->bi_iter.bi_sector = to_addr +
mreplace->data_offset;
- bio_set_dev(bio, mreplace->bdev);
+ bio->bi_bdev = (void *)mreplace;
atomic_inc(&r10_bio->remaining);
break;
}
@@ -3684,7 +3684,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
if (test_bit(FailFast, &rdev->flags))
bio->bi_opf |= MD_FAILFAST;
bio->bi_iter.bi_sector = sector + rdev->data_offset;
- bio_set_dev(bio, rdev->bdev);
+ bio->bi_bdev = (void *)rdev;
count++;

rdev = rcu_dereference(conf->mirrors[d].replacement);
@@ -3706,7 +3706,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
if (test_bit(FailFast, &rdev->flags))
bio->bi_opf |= MD_FAILFAST;
bio->bi_iter.bi_sector = sector + rdev->data_offset;
- bio_set_dev(bio, rdev->bdev);
+ bio->bi_bdev = (void *)rdev;
count++;
rcu_read_unlock();
}
@@ -4865,7 +4865,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,

read_bio = bio_alloc_bioset(GFP_KERNEL, RESYNC_PAGES, &mddev->bio_set);

- bio_set_dev(read_bio, rdev->bdev);
+ read_bio->bi_bdev = (void *)rdev;
read_bio->bi_iter.bi_sector = (r10_bio->devs[r10_bio->read_slot].addr
+ rdev->data_offset);
read_bio->bi_private = r10_bio;
@@ -4921,7 +4921,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
if (!rdev2 || test_bit(Faulty, &rdev2->flags))
continue;

- bio_set_dev(b, rdev2->bdev);
+ b->bi_bdev = (void *)rdev2;
b->bi_iter.bi_sector = r10_bio->devs[s/2].addr +
rdev2->new_data_offset;
b->bi_end_io = end_reshape_write;
@@ -5016,7 +5016,7 @@ static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio)
}
atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
- md_sync_acct_bio(b, r10_bio->sectors);
+ md_sync_acct(rdev, r10_bio->sectors);
atomic_inc(&r10_bio->remaining);
b->bi_next = NULL;
submit_bio_noacct(b);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 9c1a5877cf9f..b932282ff50a 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1167,7 +1167,7 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
if (rdev) {
if (s->syncing || s->expanding || s->expanded
|| s->replacing)
- md_sync_acct(rdev->bdev, RAID5_STRIPE_SECTORS(conf));
+ md_sync_acct(rdev, RAID5_STRIPE_SECTORS(conf));

set_bit(STRIPE_IO_STARTED, &sh->state);

@@ -1234,7 +1234,7 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
if (rrdev) {
if (s->syncing || s->expanding || s->expanded
|| s->replacing)
- md_sync_acct(rrdev->bdev, RAID5_STRIPE_SECTORS(conf));
+ md_sync_acct(rrdev, RAID5_STRIPE_SECTORS(conf));

set_bit(STRIPE_IO_STARTED, &sh->state);

diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 74c410263113..6b84444111e4 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -150,7 +150,6 @@ struct gendisk {
struct list_head slave_bdevs;
#endif
struct timer_rand_state *random;
- atomic_t sync_io; /* RAID */
struct disk_events *ev;
#ifdef CONFIG_BLK_DEV_INTEGRITY
struct kobject integrity_kobj;
--
2.27.0


2021-12-10 04:46:22

by Li Jinlin

[permalink] [raw]
Subject: [PATCH 3/3] drdb: Remove useless variable in struct drbd_device

rs_last_sect_ev is unused since added in 1d7734a0df02, so just remove it.

Signed-off-by: Li Jinlin <[email protected]>
---
drivers/block/drbd/drbd_int.h | 1 -
drivers/block/drbd/drbd_main.c | 1 -
drivers/block/drbd/drbd_state.c | 1 -
drivers/block/drbd/drbd_worker.c | 1 -
4 files changed, 4 deletions(-)

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 1b71adc07e83..a163141aff1b 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -955,7 +955,6 @@ struct drbd_device {
char congestion_reason; /* Why we where congested... */
atomic_t rs_sect_in; /* for incoming resync data rate, SyncTarget */
atomic64_t rs_sect_ev; /* for submitted resync data rate, both */
- int rs_last_sect_ev; /* counter to compare with */
s64 rs_last_events; /* counter of read or write "events" (unit sectors)
* on the lower level device when we last looked. */
int c_sync_rate; /* current resync rate after syncer throttle magic */
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index ea057bd60541..f1fa03c69809 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2046,7 +2046,6 @@ void drbd_device_cleanup(struct drbd_device *device)
device->rs_total =
device->rs_failed = 0;
device->rs_last_events = 0;
- device->rs_last_sect_ev = 0;
for (i = 0; i < DRBD_SYNC_MARKS; i++) {
device->rs_mark_left[i] = 0;
device->rs_mark_time[i] = 0;
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index b8a27818ab3f..4a6c69133c62 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -1389,7 +1389,6 @@ _drbd_set_state(struct drbd_device *device, union drbd_state ns,

set_ov_position(device, ns.conn);
device->rs_start = now;
- device->rs_last_sect_ev = 0;
device->ov_last_oos_size = 0;
device->ov_last_oos_start = 0;

diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index a4edd0a9c875..45ae4abd355a 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -1829,7 +1829,6 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
device->rs_failed = 0;
device->rs_paused = 0;
device->rs_same_csum = 0;
- device->rs_last_sect_ev = 0;
device->rs_total = tw;
device->rs_start = now;
for (i = 0; i < DRBD_SYNC_MARKS; i++) {
--
2.27.0


2021-12-10 06:45:57

by Hannes Reinecke

[permalink] [raw]
Subject: Re: [PATCH v2 1/3] md: Fix undefined behaviour in is_mddev_idle

On 12/10/21 6:17 AM, Li Jinlin wrote:
> UBSAN reports this problem:
>
> [ 5984.281385] UBSAN: Undefined behaviour in drivers/md/md.c:8175:15
> [ 5984.281390] signed integer overflow:
> [ 5984.281393] -2147483291 - 2072033152 cannot be represented in type 'int'
> [ 5984.281400] CPU: 25 PID: 1854 Comm: md101_resync Kdump: loaded Not tainted 4.19.90
> [ 5984.281404] Hardware name: Huawei TaiShan 200 (Model 5280)/BC82AMDDA
> [ 5984.281406] Call trace:
> [ 5984.281415] dump_backtrace+0x0/0x310
> [ 5984.281418] show_stack+0x28/0x38
> [ 5984.281425] dump_stack+0xec/0x15c
> [ 5984.281430] ubsan_epilogue+0x18/0x84
> [ 5984.281434] handle_overflow+0x14c/0x19c
> [ 5984.281439] __ubsan_handle_sub_overflow+0x34/0x44
> [ 5984.281445] is_mddev_idle+0x338/0x3d8
> [ 5984.281449] md_do_sync+0x1bb8/0x1cf8
> [ 5984.281452] md_thread+0x220/0x288
> [ 5984.281457] kthread+0x1d8/0x1e0
> [ 5984.281461] ret_from_fork+0x10/0x18
>
> When the stat aacum of the disk is greater than INT_MAX, its
> value becomes negative after casting to 'int', which may lead
> to overflow after subtracting a positive number. In the same
> way, when the value of sync_io is greater than INT_MAX,
> overflow may also occur. These situations will lead to
> undefined behavior.
>
> Otherwise, if the stat accum of the disk is close to INT_MAX
> when creating raid arrays, the initial value of last_events
> would be set close to INT_MAX when mddev initializes IO
> event counters. 'curr_events - rdev->last_events > 64' will
> always false during synchronization. If all the disks of mddev
> are in this case, is_mddev_idle() will always return 1, which
> may cause non-sync IO is very slow.
>
> To address these problems, need to use 64bit signed integer
> type for sync_io, last_events, and curr_events.
>
> In all the drivers that come with the kernel, the sync_io
> variable in struct gendisk is only used for the md driver
> currently. It should be more suitable in struct md_rdev, so
> add the sync_io variable in struct md_rdev, and use it to
> replace. We modify md_sync_acct() and md_sync_acct_bio()
> to fit for this change as well. md_sync_acct_bio() need
> access to the rdev, so we set bio->bi_bdev to rdev before
> calling it, and reset bio->bi_bdev to bdev in this function.
>
Please make that two patches, one for moving sync_io and one for
fixing the undefined behaviour.

> Signed-off-by: Li Jinlin <[email protected]>
> ---
> drivers/md/md.c | 6 +++---
> drivers/md/md.h | 13 +++++++++----
> drivers/md/raid1.c | 4 ++--
> drivers/md/raid10.c | 24 ++++++++++++------------
> drivers/md/raid5.c | 4 ++--
> include/linux/genhd.h | 1 -
> 6 files changed, 28 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 5111ed966947..f1b71a92801e 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -8429,14 +8429,14 @@ static int is_mddev_idle(struct mddev *mddev, int init)
> {
> struct md_rdev *rdev;
> int idle;
> - int curr_events;
> + s64 curr_events;
>
> idle = 1;
> rcu_read_lock();
> rdev_for_each_rcu(rdev, mddev) {
> struct gendisk *disk = rdev->bdev->bd_disk;
> - curr_events = (int)part_stat_read_accum(disk->part0, sectors) -
> - atomic_read(&disk->sync_io);
> + curr_events = (s64)part_stat_read_accum(disk->part0, sectors) -
> + atomic64_read(&rdev->sync_io);

So you are replacing a 'signed integer' (ie 32bit) calculation with a
'signed 64-bit integer' calculation.
IE you just shifted the overflow from INT_MAX to LONG_MAX, without
actually fixing it, or?

> /* sync IO will cause sync_io to increase before the disk_stats
> * as sync_io is counted when a request starts, and
> * disk_stats is counted when it completes.
> diff --git a/drivers/md/md.h b/drivers/md/md.h
> index 53ea7a6961de..584e357e0940 100644
> --- a/drivers/md/md.h
> +++ b/drivers/md/md.h
> @@ -50,7 +50,7 @@ struct md_rdev {
>
> sector_t sectors; /* Device size (in 512bytes sectors) */
> struct mddev *mddev; /* RAID array if running */
> - int last_events; /* IO event timestamp */
> + s64 last_events; /* IO event timestamp */
>
> /*
> * If meta_bdev is non-NULL, it means that a separate device is
> @@ -138,6 +138,8 @@ struct md_rdev {
> unsigned int size; /* Size in sectors of the PPL space */
> sector_t sector; /* First sector of the PPL space */
> } ppl;
> +
> + atomic64_t sync_io; /* counter of sync IO (unit sectors) */
> };
> enum flag_bits {
> Faulty, /* device is known to have a fault */
> @@ -549,14 +551,17 @@ static inline int mddev_trylock(struct mddev *mddev)
> }
> extern void mddev_unlock(struct mddev *mddev);
>
> -static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sectors)
> +static inline void md_sync_acct(struct md_rdev *rdev, unsigned long nr_sectors)
> {
> - atomic_add(nr_sectors, &bdev->bd_disk->sync_io);
> + atomic64_add(nr_sectors, &rdev->sync_io);
> }
>
> static inline void md_sync_acct_bio(struct bio *bio, unsigned long nr_sectors)
> {
> - md_sync_acct(bio->bi_bdev, nr_sectors);
> + struct md_rdev *rdev = (void *)bio->bi_bdev;

That look weird. bio->bi_bdev should be a 'struct gendisk', not a MD
internal data structure.

> +
> + bio_set_dev(bio, rdev->bdev);
> + md_sync_acct(rdev, nr_sectors);
> }
>
> struct md_personality
> diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
> index 7dc8026cf6ee..74c42dabe57c 100644
> --- a/drivers/md/raid1.c
> +++ b/drivers/md/raid1.c
> @@ -2232,7 +2232,7 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio)
>
> wbio->bi_end_io = end_sync_write;
> atomic_inc(&r1_bio->remaining);
> - md_sync_acct(conf->mirrors[i].rdev->bdev, bio_sectors(wbio));
> + md_sync_acct(conf->mirrors[i].rdev, bio_sectors(wbio));
>
> submit_bio_noacct(wbio);
> }
> @@ -2791,7 +2791,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
> if (rdev && bio->bi_end_io) {
> atomic_inc(&rdev->nr_pending);
> bio->bi_iter.bi_sector = sector_nr + rdev->data_offset;
> - bio_set_dev(bio, rdev->bdev);
> + bio->bi_bdev = (void *)rdev;
> if (test_bit(FailFast, &rdev->flags))
> bio->bi_opf |= MD_FAILFAST;
> }
> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
> index dde98f65bd04..fc1e6c0996de 100644
> --- a/drivers/md/raid10.c
> +++ b/drivers/md/raid10.c
> @@ -2407,7 +2407,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
>
> atomic_inc(&conf->mirrors[d].rdev->nr_pending);
> atomic_inc(&r10_bio->remaining);
> - md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(tbio));
> + md_sync_acct(conf->mirrors[d].rdev, bio_sectors(tbio));
>
> if (test_bit(FailFast, &conf->mirrors[d].rdev->flags))
> tbio->bi_opf |= MD_FAILFAST;
> @@ -2430,7 +2430,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
> bio_copy_data(tbio, fbio);
> d = r10_bio->devs[i].devnum;
> atomic_inc(&r10_bio->remaining);
> - md_sync_acct(conf->mirrors[d].replacement->bdev,
> + md_sync_acct(conf->mirrors[d].replacement,
> bio_sectors(tbio));
> submit_bio_noacct(tbio);
> }
> @@ -2562,12 +2562,12 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
> wbio2 = NULL;
> if (wbio->bi_end_io) {
> atomic_inc(&conf->mirrors[d].rdev->nr_pending);
> - md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(wbio));
> + md_sync_acct(conf->mirrors[d].rdev, bio_sectors(wbio));
> submit_bio_noacct(wbio);
> }
> if (wbio2) {
> atomic_inc(&conf->mirrors[d].replacement->nr_pending);
> - md_sync_acct(conf->mirrors[d].replacement->bdev,
> + md_sync_acct(conf->mirrors[d].replacement,
> bio_sectors(wbio2));
> submit_bio_noacct(wbio2);
> }
> @@ -3486,7 +3486,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
> from_addr = r10_bio->devs[j].addr;
> bio->bi_iter.bi_sector = from_addr +
> rdev->data_offset;
> - bio_set_dev(bio, rdev->bdev);
> + bio->bi_bdev = (void *)rdev;
> atomic_inc(&rdev->nr_pending);
> /* and we write to 'i' (if not in_sync) */
>
> @@ -3508,7 +3508,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
> bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
> bio->bi_iter.bi_sector = to_addr
> + mrdev->data_offset;
> - bio_set_dev(bio, mrdev->bdev);
> + bio->bi_bdev = (void *)mrdev;
> atomic_inc(&r10_bio->remaining);
> } else
> r10_bio->devs[1].bio->bi_end_io = NULL;
> @@ -3529,7 +3529,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
> bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
> bio->bi_iter.bi_sector = to_addr +
> mreplace->data_offset;
> - bio_set_dev(bio, mreplace->bdev);
> + bio->bi_bdev = (void *)mreplace;
> atomic_inc(&r10_bio->remaining);
> break;
> }
> @@ -3684,7 +3684,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
> if (test_bit(FailFast, &rdev->flags))
> bio->bi_opf |= MD_FAILFAST;
> bio->bi_iter.bi_sector = sector + rdev->data_offset;
> - bio_set_dev(bio, rdev->bdev);
> + bio->bi_bdev = (void *)rdev;
> count++;
>
> rdev = rcu_dereference(conf->mirrors[d].replacement);
> @@ -3706,7 +3706,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
> if (test_bit(FailFast, &rdev->flags))
> bio->bi_opf |= MD_FAILFAST;
> bio->bi_iter.bi_sector = sector + rdev->data_offset;
> - bio_set_dev(bio, rdev->bdev);
> + bio->bi_bdev = (void *)rdev;
> count++;
> rcu_read_unlock();
> }
> @@ -4865,7 +4865,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
>
> read_bio = bio_alloc_bioset(GFP_KERNEL, RESYNC_PAGES, &mddev->bio_set);
>
> - bio_set_dev(read_bio, rdev->bdev);
> + read_bio->bi_bdev = (void *)rdev;
> read_bio->bi_iter.bi_sector = (r10_bio->devs[r10_bio->read_slot].addr
> + rdev->data_offset);
> read_bio->bi_private = r10_bio;
> @@ -4921,7 +4921,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
> if (!rdev2 || test_bit(Faulty, &rdev2->flags))
> continue;
>
> - bio_set_dev(b, rdev2->bdev);
> + b->bi_bdev = (void *)rdev2;
> b->bi_iter.bi_sector = r10_bio->devs[s/2].addr +
> rdev2->new_data_offset;
> b->bi_end_io = end_reshape_write;
> @@ -5016,7 +5016,7 @@ static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio)
> }
> atomic_inc(&rdev->nr_pending);
> rcu_read_unlock();
> - md_sync_acct_bio(b, r10_bio->sectors);
> + md_sync_acct(rdev, r10_bio->sectors);
> atomic_inc(&r10_bio->remaining);
> b->bi_next = NULL;
> submit_bio_noacct(b);
> diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
> index 9c1a5877cf9f..b932282ff50a 100644
> --- a/drivers/md/raid5.c
> +++ b/drivers/md/raid5.c
> @@ -1167,7 +1167,7 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
> if (rdev) {
> if (s->syncing || s->expanding || s->expanded
> || s->replacing)
> - md_sync_acct(rdev->bdev, RAID5_STRIPE_SECTORS(conf));
> + md_sync_acct(rdev, RAID5_STRIPE_SECTORS(conf));
>
> set_bit(STRIPE_IO_STARTED, &sh->state);
>
> @@ -1234,7 +1234,7 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
> if (rrdev) {
> if (s->syncing || s->expanding || s->expanded
> || s->replacing)
> - md_sync_acct(rrdev->bdev, RAID5_STRIPE_SECTORS(conf));
> + md_sync_acct(rrdev, RAID5_STRIPE_SECTORS(conf));
>
> set_bit(STRIPE_IO_STARTED, &sh->state);
>
> diff --git a/include/linux/genhd.h b/include/linux/genhd.h
> index 74c410263113..6b84444111e4 100644
> --- a/include/linux/genhd.h
> +++ b/include/linux/genhd.h
> @@ -150,7 +150,6 @@ struct gendisk {
> struct list_head slave_bdevs;
> #endif
> struct timer_rand_state *random;
> - atomic_t sync_io; /* RAID */
> struct disk_events *ev;
> #ifdef CONFIG_BLK_DEV_INTEGRITY
> struct kobject integrity_kobj;
>

Cheers,

Hannes
--
Dr. Hannes Reinecke Kernel Storage Architect
[email protected] +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Felix Imendörffer

2021-12-10 07:17:19

by Damien Le Moal

[permalink] [raw]
Subject: Re: [PATCH v2 1/3] md: Fix undefined behaviour in is_mddev_idle

On 2021/12/10 15:45, Hannes Reinecke wrote:
> On 12/10/21 6:17 AM, Li Jinlin wrote:
>> UBSAN reports this problem:
>>
>> [ 5984.281385] UBSAN: Undefined behaviour in drivers/md/md.c:8175:15
>> [ 5984.281390] signed integer overflow:
>> [ 5984.281393] -2147483291 - 2072033152 cannot be represented in type 'int'
>> [ 5984.281400] CPU: 25 PID: 1854 Comm: md101_resync Kdump: loaded Not tainted 4.19.90
>> [ 5984.281404] Hardware name: Huawei TaiShan 200 (Model 5280)/BC82AMDDA
>> [ 5984.281406] Call trace:
>> [ 5984.281415] dump_backtrace+0x0/0x310
>> [ 5984.281418] show_stack+0x28/0x38
>> [ 5984.281425] dump_stack+0xec/0x15c
>> [ 5984.281430] ubsan_epilogue+0x18/0x84
>> [ 5984.281434] handle_overflow+0x14c/0x19c
>> [ 5984.281439] __ubsan_handle_sub_overflow+0x34/0x44
>> [ 5984.281445] is_mddev_idle+0x338/0x3d8
>> [ 5984.281449] md_do_sync+0x1bb8/0x1cf8
>> [ 5984.281452] md_thread+0x220/0x288
>> [ 5984.281457] kthread+0x1d8/0x1e0
>> [ 5984.281461] ret_from_fork+0x10/0x18
>>
>> When the stat aacum of the disk is greater than INT_MAX, its
>> value becomes negative after casting to 'int', which may lead
>> to overflow after subtracting a positive number. In the same
>> way, when the value of sync_io is greater than INT_MAX,
>> overflow may also occur. These situations will lead to
>> undefined behavior.
>>
>> Otherwise, if the stat accum of the disk is close to INT_MAX
>> when creating raid arrays, the initial value of last_events
>> would be set close to INT_MAX when mddev initializes IO
>> event counters. 'curr_events - rdev->last_events > 64' will
>> always false during synchronization. If all the disks of mddev
>> are in this case, is_mddev_idle() will always return 1, which
>> may cause non-sync IO is very slow.
>>
>> To address these problems, need to use 64bit signed integer
>> type for sync_io, last_events, and curr_events.
>>
>> In all the drivers that come with the kernel, the sync_io
>> variable in struct gendisk is only used for the md driver
>> currently. It should be more suitable in struct md_rdev, so
>> add the sync_io variable in struct md_rdev, and use it to
>> replace. We modify md_sync_acct() and md_sync_acct_bio()
>> to fit for this change as well. md_sync_acct_bio() need
>> access to the rdev, so we set bio->bi_bdev to rdev before
>> calling it, and reset bio->bi_bdev to bdev in this function.
>>
> Please make that two patches, one for moving sync_io and one for
> fixing the undefined behaviour.
>
>> Signed-off-by: Li Jinlin <[email protected]>
>> ---
>> drivers/md/md.c | 6 +++---
>> drivers/md/md.h | 13 +++++++++----
>> drivers/md/raid1.c | 4 ++--
>> drivers/md/raid10.c | 24 ++++++++++++------------
>> drivers/md/raid5.c | 4 ++--
>> include/linux/genhd.h | 1 -
>> 6 files changed, 28 insertions(+), 24 deletions(-)
>>
>> diff --git a/drivers/md/md.c b/drivers/md/md.c
>> index 5111ed966947..f1b71a92801e 100644
>> --- a/drivers/md/md.c
>> +++ b/drivers/md/md.c
>> @@ -8429,14 +8429,14 @@ static int is_mddev_idle(struct mddev *mddev, int init)
>> {
>> struct md_rdev *rdev;
>> int idle;
>> - int curr_events;
>> + s64 curr_events;
>>
>> idle = 1;
>> rcu_read_lock();
>> rdev_for_each_rcu(rdev, mddev) {
>> struct gendisk *disk = rdev->bdev->bd_disk;
>> - curr_events = (int)part_stat_read_accum(disk->part0, sectors) -
>> - atomic_read(&disk->sync_io);
>> + curr_events = (s64)part_stat_read_accum(disk->part0, sectors) -
>> + atomic64_read(&rdev->sync_io);
>
> So you are replacing a 'signed integer' (ie 32bit) calculation with a
> 'signed 64-bit integer' calculation.
> IE you just shifted the overflow from INT_MAX to LONG_MAX, without
> actually fixing it, or?
>
>> /* sync IO will cause sync_io to increase before the disk_stats
>> * as sync_io is counted when a request starts, and
>> * disk_stats is counted when it completes.
>> diff --git a/drivers/md/md.h b/drivers/md/md.h
>> index 53ea7a6961de..584e357e0940 100644
>> --- a/drivers/md/md.h
>> +++ b/drivers/md/md.h
>> @@ -50,7 +50,7 @@ struct md_rdev {
>>
>> sector_t sectors; /* Device size (in 512bytes sectors) */
>> struct mddev *mddev; /* RAID array if running */
>> - int last_events; /* IO event timestamp */
>> + s64 last_events; /* IO event timestamp */
>>
>> /*
>> * If meta_bdev is non-NULL, it means that a separate device is
>> @@ -138,6 +138,8 @@ struct md_rdev {
>> unsigned int size; /* Size in sectors of the PPL space */
>> sector_t sector; /* First sector of the PPL space */
>> } ppl;
>> +
>> + atomic64_t sync_io; /* counter of sync IO (unit sectors) */
>> };
>> enum flag_bits {
>> Faulty, /* device is known to have a fault */
>> @@ -549,14 +551,17 @@ static inline int mddev_trylock(struct mddev *mddev)
>> }
>> extern void mddev_unlock(struct mddev *mddev);
>>
>> -static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sectors)
>> +static inline void md_sync_acct(struct md_rdev *rdev, unsigned long nr_sectors)
>> {
>> - atomic_add(nr_sectors, &bdev->bd_disk->sync_io);
>> + atomic64_add(nr_sectors, &rdev->sync_io);
>> }
>>
>> static inline void md_sync_acct_bio(struct bio *bio, unsigned long nr_sectors)
>> {
>> - md_sync_acct(bio->bi_bdev, nr_sectors);
>> + struct md_rdev *rdev = (void *)bio->bi_bdev;
>
> That look weird. bio->bi_bdev should be a 'struct gendisk', not a MD
> internal data structure.

You mean a "struct block_device". right ? :)

>
>> +
>> + bio_set_dev(bio, rdev->bdev);
>> + md_sync_acct(rdev, nr_sectors);
>> }
>>
>> struct md_personality
>> diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
>> index 7dc8026cf6ee..74c42dabe57c 100644
>> --- a/drivers/md/raid1.c
>> +++ b/drivers/md/raid1.c
>> @@ -2232,7 +2232,7 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio)
>>
>> wbio->bi_end_io = end_sync_write;
>> atomic_inc(&r1_bio->remaining);
>> - md_sync_acct(conf->mirrors[i].rdev->bdev, bio_sectors(wbio));
>> + md_sync_acct(conf->mirrors[i].rdev, bio_sectors(wbio));
>>
>> submit_bio_noacct(wbio);
>> }
>> @@ -2791,7 +2791,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
>> if (rdev && bio->bi_end_io) {
>> atomic_inc(&rdev->nr_pending);
>> bio->bi_iter.bi_sector = sector_nr + rdev->data_offset;
>> - bio_set_dev(bio, rdev->bdev);
>> + bio->bi_bdev = (void *)rdev;
>> if (test_bit(FailFast, &rdev->flags))
>> bio->bi_opf |= MD_FAILFAST;
>> }
>> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
>> index dde98f65bd04..fc1e6c0996de 100644
>> --- a/drivers/md/raid10.c
>> +++ b/drivers/md/raid10.c
>> @@ -2407,7 +2407,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
>>
>> atomic_inc(&conf->mirrors[d].rdev->nr_pending);
>> atomic_inc(&r10_bio->remaining);
>> - md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(tbio));
>> + md_sync_acct(conf->mirrors[d].rdev, bio_sectors(tbio));
>>
>> if (test_bit(FailFast, &conf->mirrors[d].rdev->flags))
>> tbio->bi_opf |= MD_FAILFAST;
>> @@ -2430,7 +2430,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
>> bio_copy_data(tbio, fbio);
>> d = r10_bio->devs[i].devnum;
>> atomic_inc(&r10_bio->remaining);
>> - md_sync_acct(conf->mirrors[d].replacement->bdev,
>> + md_sync_acct(conf->mirrors[d].replacement,
>> bio_sectors(tbio));
>> submit_bio_noacct(tbio);
>> }
>> @@ -2562,12 +2562,12 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
>> wbio2 = NULL;
>> if (wbio->bi_end_io) {
>> atomic_inc(&conf->mirrors[d].rdev->nr_pending);
>> - md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(wbio));
>> + md_sync_acct(conf->mirrors[d].rdev, bio_sectors(wbio));
>> submit_bio_noacct(wbio);
>> }
>> if (wbio2) {
>> atomic_inc(&conf->mirrors[d].replacement->nr_pending);
>> - md_sync_acct(conf->mirrors[d].replacement->bdev,
>> + md_sync_acct(conf->mirrors[d].replacement,
>> bio_sectors(wbio2));
>> submit_bio_noacct(wbio2);
>> }
>> @@ -3486,7 +3486,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
>> from_addr = r10_bio->devs[j].addr;
>> bio->bi_iter.bi_sector = from_addr +
>> rdev->data_offset;
>> - bio_set_dev(bio, rdev->bdev);
>> + bio->bi_bdev = (void *)rdev;
>> atomic_inc(&rdev->nr_pending);
>> /* and we write to 'i' (if not in_sync) */
>>
>> @@ -3508,7 +3508,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
>> bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
>> bio->bi_iter.bi_sector = to_addr
>> + mrdev->data_offset;
>> - bio_set_dev(bio, mrdev->bdev);
>> + bio->bi_bdev = (void *)mrdev;
>> atomic_inc(&r10_bio->remaining);
>> } else
>> r10_bio->devs[1].bio->bi_end_io = NULL;
>> @@ -3529,7 +3529,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
>> bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
>> bio->bi_iter.bi_sector = to_addr +
>> mreplace->data_offset;
>> - bio_set_dev(bio, mreplace->bdev);
>> + bio->bi_bdev = (void *)mreplace;
>> atomic_inc(&r10_bio->remaining);
>> break;
>> }
>> @@ -3684,7 +3684,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
>> if (test_bit(FailFast, &rdev->flags))
>> bio->bi_opf |= MD_FAILFAST;
>> bio->bi_iter.bi_sector = sector + rdev->data_offset;
>> - bio_set_dev(bio, rdev->bdev);
>> + bio->bi_bdev = (void *)rdev;
>> count++;
>>
>> rdev = rcu_dereference(conf->mirrors[d].replacement);
>> @@ -3706,7 +3706,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
>> if (test_bit(FailFast, &rdev->flags))
>> bio->bi_opf |= MD_FAILFAST;
>> bio->bi_iter.bi_sector = sector + rdev->data_offset;
>> - bio_set_dev(bio, rdev->bdev);
>> + bio->bi_bdev = (void *)rdev;
>> count++;
>> rcu_read_unlock();
>> }
>> @@ -4865,7 +4865,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
>>
>> read_bio = bio_alloc_bioset(GFP_KERNEL, RESYNC_PAGES, &mddev->bio_set);
>>
>> - bio_set_dev(read_bio, rdev->bdev);
>> + read_bio->bi_bdev = (void *)rdev;
>> read_bio->bi_iter.bi_sector = (r10_bio->devs[r10_bio->read_slot].addr
>> + rdev->data_offset);
>> read_bio->bi_private = r10_bio;
>> @@ -4921,7 +4921,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
>> if (!rdev2 || test_bit(Faulty, &rdev2->flags))
>> continue;
>>
>> - bio_set_dev(b, rdev2->bdev);
>> + b->bi_bdev = (void *)rdev2;
>> b->bi_iter.bi_sector = r10_bio->devs[s/2].addr +
>> rdev2->new_data_offset;
>> b->bi_end_io = end_reshape_write;
>> @@ -5016,7 +5016,7 @@ static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio)
>> }
>> atomic_inc(&rdev->nr_pending);
>> rcu_read_unlock();
>> - md_sync_acct_bio(b, r10_bio->sectors);
>> + md_sync_acct(rdev, r10_bio->sectors);
>> atomic_inc(&r10_bio->remaining);
>> b->bi_next = NULL;
>> submit_bio_noacct(b);
>> diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
>> index 9c1a5877cf9f..b932282ff50a 100644
>> --- a/drivers/md/raid5.c
>> +++ b/drivers/md/raid5.c
>> @@ -1167,7 +1167,7 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
>> if (rdev) {
>> if (s->syncing || s->expanding || s->expanded
>> || s->replacing)
>> - md_sync_acct(rdev->bdev, RAID5_STRIPE_SECTORS(conf));
>> + md_sync_acct(rdev, RAID5_STRIPE_SECTORS(conf));
>>
>> set_bit(STRIPE_IO_STARTED, &sh->state);
>>
>> @@ -1234,7 +1234,7 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
>> if (rrdev) {
>> if (s->syncing || s->expanding || s->expanded
>> || s->replacing)
>> - md_sync_acct(rrdev->bdev, RAID5_STRIPE_SECTORS(conf));
>> + md_sync_acct(rrdev, RAID5_STRIPE_SECTORS(conf));
>>
>> set_bit(STRIPE_IO_STARTED, &sh->state);
>>
>> diff --git a/include/linux/genhd.h b/include/linux/genhd.h
>> index 74c410263113..6b84444111e4 100644
>> --- a/include/linux/genhd.h
>> +++ b/include/linux/genhd.h
>> @@ -150,7 +150,6 @@ struct gendisk {
>> struct list_head slave_bdevs;
>> #endif
>> struct timer_rand_state *random;
>> - atomic_t sync_io; /* RAID */
>> struct disk_events *ev;
>> #ifdef CONFIG_BLK_DEV_INTEGRITY
>> struct kobject integrity_kobj;
>>
>
> Cheers,
>
> Hannes


--
Damien Le Moal
Western Digital Research

2021-12-10 08:46:39

by Li Jinlin

[permalink] [raw]
Subject: Re: [PATCH v2 1/3] md: Fix undefined behaviour in is_mddev_idle



On 12/10/2021 3:17 PM, Damien Le Moal wrote:
> On 2021/12/10 15:45, Hannes Reinecke wrote:
>> On 12/10/21 6:17 AM, Li Jinlin wrote:
>>> UBSAN reports this problem:
>>>
>>> [ 5984.281385] UBSAN: Undefined behaviour in drivers/md/md.c:8175:15
>>> [ 5984.281390] signed integer overflow:
>>> [ 5984.281393] -2147483291 - 2072033152 cannot be represented in type 'int'
>>> [ 5984.281400] CPU: 25 PID: 1854 Comm: md101_resync Kdump: loaded Not tainted 4.19.90
>>> [ 5984.281404] Hardware name: Huawei TaiShan 200 (Model 5280)/BC82AMDDA
>>> [ 5984.281406] Call trace:
>>> [ 5984.281415] dump_backtrace+0x0/0x310
>>> [ 5984.281418] show_stack+0x28/0x38
>>> [ 5984.281425] dump_stack+0xec/0x15c
>>> [ 5984.281430] ubsan_epilogue+0x18/0x84
>>> [ 5984.281434] handle_overflow+0x14c/0x19c
>>> [ 5984.281439] __ubsan_handle_sub_overflow+0x34/0x44
>>> [ 5984.281445] is_mddev_idle+0x338/0x3d8
>>> [ 5984.281449] md_do_sync+0x1bb8/0x1cf8
>>> [ 5984.281452] md_thread+0x220/0x288
>>> [ 5984.281457] kthread+0x1d8/0x1e0
>>> [ 5984.281461] ret_from_fork+0x10/0x18
>>>
>>> When the stat aacum of the disk is greater than INT_MAX, its
>>> value becomes negative after casting to 'int', which may lead
>>> to overflow after subtracting a positive number. In the same
>>> way, when the value of sync_io is greater than INT_MAX,
>>> overflow may also occur. These situations will lead to
>>> undefined behavior.
>>>
>>> Otherwise, if the stat accum of the disk is close to INT_MAX
>>> when creating raid arrays, the initial value of last_events
>>> would be set close to INT_MAX when mddev initializes IO
>>> event counters. 'curr_events - rdev->last_events > 64' will
>>> always false during synchronization. If all the disks of mddev
>>> are in this case, is_mddev_idle() will always return 1, which
>>> may cause non-sync IO is very slow.
>>>
>>> To address these problems, need to use 64bit signed integer
>>> type for sync_io, last_events, and curr_events.
>>>
>>> In all the drivers that come with the kernel, the sync_io
>>> variable in struct gendisk is only used for the md driver
>>> currently. It should be more suitable in struct md_rdev, so
>>> add the sync_io variable in struct md_rdev, and use it to
>>> replace. We modify md_sync_acct() and md_sync_acct_bio()
>>> to fit for this change as well. md_sync_acct_bio() need
>>> access to the rdev, so we set bio->bi_bdev to rdev before
>>> calling it, and reset bio->bi_bdev to bdev in this function.
>>>
>> Please make that two patches, one for moving sync_io and one for
>> fixing the undefined behaviour.

ok.

>>
>>> Signed-off-by: Li Jinlin <[email protected]>
>>> ---
>>> drivers/md/md.c | 6 +++---
>>> drivers/md/md.h | 13 +++++++++----
>>> drivers/md/raid1.c | 4 ++--
>>> drivers/md/raid10.c | 24 ++++++++++++------------
>>> drivers/md/raid5.c | 4 ++--
>>> include/linux/genhd.h | 1 -
>>> 6 files changed, 28 insertions(+), 24 deletions(-)
>>>
>>> diff --git a/drivers/md/md.c b/drivers/md/md.c
>>> index 5111ed966947..f1b71a92801e 100644
>>> --- a/drivers/md/md.c
>>> +++ b/drivers/md/md.c
>>> @@ -8429,14 +8429,14 @@ static int is_mddev_idle(struct mddev *mddev, int init)
>>> {
>>> struct md_rdev *rdev;
>>> int idle;
>>> - int curr_events;
>>> + s64 curr_events;
>>>
>>> idle = 1;
>>> rcu_read_lock();
>>> rdev_for_each_rcu(rdev, mddev) {
>>> struct gendisk *disk = rdev->bdev->bd_disk;
>>> - curr_events = (int)part_stat_read_accum(disk->part0, sectors) -
>>> - atomic_read(&disk->sync_io);
>>> + curr_events = (s64)part_stat_read_accum(disk->part0, sectors) -
>>> + atomic64_read(&rdev->sync_io);
>>
>> So you are replacing a 'signed integer' (ie 32bit) calculation with a
>> 'signed 64-bit integer' calculation.
>> IE you just shifted the overflow from INT_MAX to LONG_MAX, without
>> actually fixing it, or?

Yes, for the disk with 512-byte sector sizes, the data size of INT_MAX
sectors is 1T. LONG_MAX is enough to use until the server restarts,
and overflow will not occur anymore.

>>
>>> /* sync IO will cause sync_io to increase before the disk_stats
>>> * as sync_io is counted when a request starts, and
>>> * disk_stats is counted when it completes.
>>> diff --git a/drivers/md/md.h b/drivers/md/md.h
>>> index 53ea7a6961de..584e357e0940 100644
>>> --- a/drivers/md/md.h
>>> +++ b/drivers/md/md.h
>>> @@ -50,7 +50,7 @@ struct md_rdev {
>>>
>>> sector_t sectors; /* Device size (in 512bytes sectors) */
>>> struct mddev *mddev; /* RAID array if running */
>>> - int last_events; /* IO event timestamp */
>>> + s64 last_events; /* IO event timestamp */
>>>
>>> /*
>>> * If meta_bdev is non-NULL, it means that a separate device is
>>> @@ -138,6 +138,8 @@ struct md_rdev {
>>> unsigned int size; /* Size in sectors of the PPL space */
>>> sector_t sector; /* First sector of the PPL space */
>>> } ppl;
>>> +
>>> + atomic64_t sync_io; /* counter of sync IO (unit sectors) */
>>> };
>>> enum flag_bits {
>>> Faulty, /* device is known to have a fault */
>>> @@ -549,14 +551,17 @@ static inline int mddev_trylock(struct mddev *mddev)
>>> }
>>> extern void mddev_unlock(struct mddev *mddev);
>>>
>>> -static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sectors)
>>> +static inline void md_sync_acct(struct md_rdev *rdev, unsigned long nr_sectors)
>>> {
>>> - atomic_add(nr_sectors, &bdev->bd_disk->sync_io);
>>> + atomic64_add(nr_sectors, &rdev->sync_io);
>>> }
>>>
>>> static inline void md_sync_acct_bio(struct bio *bio, unsigned long nr_sectors)
>>> {
>>> - md_sync_acct(bio->bi_bdev, nr_sectors);
>>> + struct md_rdev *rdev = (void *)bio->bi_bdev;
>>
>> That look weird. bio->bi_bdev should be a 'struct gendisk', not a MD
>> internal data structure.
>
> You mean a "struct block_device". right ? :)
>

Here is a difficult point for moving sync_io. We need a good way
to slove it or not move sync_io.

>>
>>> +
>>> + bio_set_dev(bio, rdev->bdev);
>>> + md_sync_acct(rdev, nr_sectors);
>>> }
>>>
>>> struct md_personality
>>> diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
>>> index 7dc8026cf6ee..74c42dabe57c 100644
>>> --- a/drivers/md/raid1.c
>>> +++ b/drivers/md/raid1.c
>>> @@ -2232,7 +2232,7 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio)
>>>
>>> wbio->bi_end_io = end_sync_write;
>>> atomic_inc(&r1_bio->remaining);
>>> - md_sync_acct(conf->mirrors[i].rdev->bdev, bio_sectors(wbio));
>>> + md_sync_acct(conf->mirrors[i].rdev, bio_sectors(wbio));
>>>
>>> submit_bio_noacct(wbio);
>>> }
>>> @@ -2791,7 +2791,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
>>> if (rdev && bio->bi_end_io) {
>>> atomic_inc(&rdev->nr_pending);
>>> bio->bi_iter.bi_sector = sector_nr + rdev->data_offset;
>>> - bio_set_dev(bio, rdev->bdev);
>>> + bio->bi_bdev = (void *)rdev;
>>> if (test_bit(FailFast, &rdev->flags))
>>> bio->bi_opf |= MD_FAILFAST;
>>> }
>>> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
>>> index dde98f65bd04..fc1e6c0996de 100644
>>> --- a/drivers/md/raid10.c
>>> +++ b/drivers/md/raid10.c
>>> @@ -2407,7 +2407,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
>>>
>>> atomic_inc(&conf->mirrors[d].rdev->nr_pending);
>>> atomic_inc(&r10_bio->remaining);
>>> - md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(tbio));
>>> + md_sync_acct(conf->mirrors[d].rdev, bio_sectors(tbio));
>>>
>>> if (test_bit(FailFast, &conf->mirrors[d].rdev->flags))
>>> tbio->bi_opf |= MD_FAILFAST;
>>> @@ -2430,7 +2430,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
>>> bio_copy_data(tbio, fbio);
>>> d = r10_bio->devs[i].devnum;
>>> atomic_inc(&r10_bio->remaining);
>>> - md_sync_acct(conf->mirrors[d].replacement->bdev,
>>> + md_sync_acct(conf->mirrors[d].replacement,
>>> bio_sectors(tbio));
>>> submit_bio_noacct(tbio);
>>> }
>>> @@ -2562,12 +2562,12 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
>>> wbio2 = NULL;
>>> if (wbio->bi_end_io) {
>>> atomic_inc(&conf->mirrors[d].rdev->nr_pending);
>>> - md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(wbio));
>>> + md_sync_acct(conf->mirrors[d].rdev, bio_sectors(wbio));
>>> submit_bio_noacct(wbio);
>>> }
>>> if (wbio2) {
>>> atomic_inc(&conf->mirrors[d].replacement->nr_pending);
>>> - md_sync_acct(conf->mirrors[d].replacement->bdev,
>>> + md_sync_acct(conf->mirrors[d].replacement,
>>> bio_sectors(wbio2));
>>> submit_bio_noacct(wbio2);
>>> }
>>> @@ -3486,7 +3486,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
>>> from_addr = r10_bio->devs[j].addr;
>>> bio->bi_iter.bi_sector = from_addr +
>>> rdev->data_offset;
>>> - bio_set_dev(bio, rdev->bdev);
>>> + bio->bi_bdev = (void *)rdev;
>>> atomic_inc(&rdev->nr_pending);
>>> /* and we write to 'i' (if not in_sync) */
>>>
>>> @@ -3508,7 +3508,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
>>> bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
>>> bio->bi_iter.bi_sector = to_addr
>>> + mrdev->data_offset;
>>> - bio_set_dev(bio, mrdev->bdev);
>>> + bio->bi_bdev = (void *)mrdev;
>>> atomic_inc(&r10_bio->remaining);
>>> } else
>>> r10_bio->devs[1].bio->bi_end_io = NULL;
>>> @@ -3529,7 +3529,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
>>> bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
>>> bio->bi_iter.bi_sector = to_addr +
>>> mreplace->data_offset;
>>> - bio_set_dev(bio, mreplace->bdev);
>>> + bio->bi_bdev = (void *)mreplace;
>>> atomic_inc(&r10_bio->remaining);
>>> break;
>>> }
>>> @@ -3684,7 +3684,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
>>> if (test_bit(FailFast, &rdev->flags))
>>> bio->bi_opf |= MD_FAILFAST;
>>> bio->bi_iter.bi_sector = sector + rdev->data_offset;
>>> - bio_set_dev(bio, rdev->bdev);
>>> + bio->bi_bdev = (void *)rdev;
>>> count++;
>>>
>>> rdev = rcu_dereference(conf->mirrors[d].replacement);
>>> @@ -3706,7 +3706,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
>>> if (test_bit(FailFast, &rdev->flags))
>>> bio->bi_opf |= MD_FAILFAST;
>>> bio->bi_iter.bi_sector = sector + rdev->data_offset;
>>> - bio_set_dev(bio, rdev->bdev);
>>> + bio->bi_bdev = (void *)rdev;
>>> count++;
>>> rcu_read_unlock();
>>> }
>>> @@ -4865,7 +4865,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
>>>
>>> read_bio = bio_alloc_bioset(GFP_KERNEL, RESYNC_PAGES, &mddev->bio_set);
>>>
>>> - bio_set_dev(read_bio, rdev->bdev);
>>> + read_bio->bi_bdev = (void *)rdev;
>>> read_bio->bi_iter.bi_sector = (r10_bio->devs[r10_bio->read_slot].addr
>>> + rdev->data_offset);
>>> read_bio->bi_private = r10_bio;
>>> @@ -4921,7 +4921,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
>>> if (!rdev2 || test_bit(Faulty, &rdev2->flags))
>>> continue;
>>>
>>> - bio_set_dev(b, rdev2->bdev);
>>> + b->bi_bdev = (void *)rdev2;
>>> b->bi_iter.bi_sector = r10_bio->devs[s/2].addr +
>>> rdev2->new_data_offset;
>>> b->bi_end_io = end_reshape_write;
>>> @@ -5016,7 +5016,7 @@ static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio)
>>> }
>>> atomic_inc(&rdev->nr_pending);
>>> rcu_read_unlock();
>>> - md_sync_acct_bio(b, r10_bio->sectors);
>>> + md_sync_acct(rdev, r10_bio->sectors);
>>> atomic_inc(&r10_bio->remaining);
>>> b->bi_next = NULL;
>>> submit_bio_noacct(b);
>>> diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
>>> index 9c1a5877cf9f..b932282ff50a 100644
>>> --- a/drivers/md/raid5.c
>>> +++ b/drivers/md/raid5.c
>>> @@ -1167,7 +1167,7 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
>>> if (rdev) {
>>> if (s->syncing || s->expanding || s->expanded
>>> || s->replacing)
>>> - md_sync_acct(rdev->bdev, RAID5_STRIPE_SECTORS(conf));
>>> + md_sync_acct(rdev, RAID5_STRIPE_SECTORS(conf));
>>>
>>> set_bit(STRIPE_IO_STARTED, &sh->state);
>>>
>>> @@ -1234,7 +1234,7 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
>>> if (rrdev) {
>>> if (s->syncing || s->expanding || s->expanded
>>> || s->replacing)
>>> - md_sync_acct(rrdev->bdev, RAID5_STRIPE_SECTORS(conf));
>>> + md_sync_acct(rrdev, RAID5_STRIPE_SECTORS(conf));
>>>
>>> set_bit(STRIPE_IO_STARTED, &sh->state);
>>>
>>> diff --git a/include/linux/genhd.h b/include/linux/genhd.h
>>> index 74c410263113..6b84444111e4 100644
>>> --- a/include/linux/genhd.h
>>> +++ b/include/linux/genhd.h
>>> @@ -150,7 +150,6 @@ struct gendisk {
>>> struct list_head slave_bdevs;
>>> #endif
>>> struct timer_rand_state *random;
>>> - atomic_t sync_io; /* RAID */
>>> struct disk_events *ev;
>>> #ifdef CONFIG_BLK_DEV_INTEGRITY
>>> struct kobject integrity_kobj;
>>>
>>
>> Cheers,
>>
>> Hannes
>
>
Thanks,
JinLin