2005-10-05 16:57:51

by Suzanne Wood

[permalink] [raw]
Subject: [RFC][PATCH] identify raid rcu-protected pointer

Please consider the following patch submittal
to insert rcu_dereference() to make explicit
the object of the rcu read-side critical sections.
Thank you.

drivers/md/multipath.c | 6 +++---
drivers/md/raid10.c | 6 +++---
drivers/md/raid1.c | 8 ++++----
drivers/md/raid5.c | 6 +++---
drivers/md/raid6main.c | 6 +++---
5 files changed, 16 insertions(+), 16 deletions(-)

---------------------------------------------------

diff -urpNa -X dontdiff a/linux-2.6.14-rc3/drivers/md/multipath.c b/linux-2.6.14-rc3/drivers/md/multipath.c
--- a/linux-2.6.14-rc3/drivers/md/multipath.c 2005-09-30 14:17:34.000000000 -0700
+++ b/linux-2.6.14-rc3/drivers/md/multipath.c 2005-10-04 21:12:36.000000000 -0700
@@ -63,7 +63,7 @@ static int multipath_map (multipath_conf

rcu_read_lock();
for (i = 0; i < disks; i++) {
- mdk_rdev_t *rdev = conf->multipaths[i].rdev;
+ mdk_rdev_t *rdev = rcu_deference(conf->multipaths[i].rdev);
if (rdev && rdev->in_sync) {
atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
@@ -139,7 +139,7 @@ static void unplug_slaves(mddev_t *mddev

rcu_read_lock();
for (i=0; i<mddev->raid_disks; i++) {
- mdk_rdev_t *rdev = conf->multipaths[i].rdev;
+ mdk_rdev_t *rdev = rcu_deference(conf->multipaths[i].rdev);
if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
request_queue_t *r_queue = bdev_get_queue(rdev->bdev);

@@ -228,7 +228,7 @@ static int multipath_issue_flush(request

rcu_read_lock();
for (i=0; i<mddev->raid_disks && ret == 0; i++) {
- mdk_rdev_t *rdev = conf->multipaths[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev);
if (rdev && !rdev->faulty) {
struct block_device *bdev = rdev->bdev;
request_queue_t *r_queue = bdev_get_queue(bdev);
diff -urpNa -X dontdiff a/linux-2.6.14-rc3/drivers/md/raid10.c b/linux-2.6.14-rc3/drivers/md/raid10.c
--- a/linux-2.6.14-rc3/drivers/md/raid10.c 2005-09-30 14:17:34.000000000 -0700
+++ b/linux-2.6.14-rc3/drivers/md/raid10.c 2005-10-04 21:24:33.000000000 -0700
@@ -583,7 +583,7 @@ static void unplug_slaves(mddev_t *mddev

rcu_read_lock();
for (i=0; i<mddev->raid_disks; i++) {
- mdk_rdev_t *rdev = conf->mirrors[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
request_queue_t *r_queue = bdev_get_queue(rdev->bdev);

@@ -614,7 +614,7 @@ static int raid10_issue_flush(request_qu

rcu_read_lock();
for (i=0; i<mddev->raid_disks && ret == 0; i++) {
- mdk_rdev_t *rdev = conf->mirrors[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
if (rdev && !rdev->faulty) {
struct block_device *bdev = rdev->bdev;
request_queue_t *r_queue = bdev_get_queue(bdev);
@@ -772,7 +772,7 @@ static int make_request(request_queue_t
rcu_read_lock();
for (i = 0; i < conf->copies; i++) {
int d = r10_bio->devs[i].devnum;
- if (conf->mirrors[d].rdev &&
+ if (rcu_dereference(conf->mirrors[d].rdev) &&
!conf->mirrors[d].rdev->faulty) {
atomic_inc(&conf->mirrors[d].rdev->nr_pending);
r10_bio->devs[i].bio = bio;
diff -urpNa -X dontdiff a/linux-2.6.14-rc3/drivers/md/raid1.c b/linux-2.6.14-rc3/drivers/md/raid1.c
--- a/linux-2.6.14-rc3/drivers/md/raid1.c 2005-09-30 14:17:34.000000000 -0700
+++ b/linux-2.6.14-rc3/drivers/md/raid1.c 2005-10-04 21:16:00.000000000 -0700
@@ -416,7 +416,7 @@ static int read_balance(conf_t *conf, r1
/* Choose the first operation device, for consistancy */
new_disk = 0;

- for (rdev = conf->mirrors[new_disk].rdev;
+ for (rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
!rdev || !rdev->in_sync
|| test_bit(WriteMostly, &rdev->flags);
rdev = conf->mirrors[++new_disk].rdev) {
@@ -522,7 +522,7 @@ static void unplug_slaves(mddev_t *mddev

rcu_read_lock();
for (i=0; i<mddev->raid_disks; i++) {
- mdk_rdev_t *rdev = conf->mirrors[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdevi);
if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
request_queue_t *r_queue = bdev_get_queue(rdev->bdev);

@@ -556,7 +556,7 @@ static int raid1_issue_flush(request_que

rcu_read_lock();
for (i=0; i<mddev->raid_disks && ret == 0; i++) {
- mdk_rdev_t *rdev = conf->mirrors[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
if (rdev && !rdev->faulty) {
struct block_device *bdev = rdev->bdev;
request_queue_t *r_queue = bdev_get_queue(bdev);
@@ -732,7 +732,7 @@ static int make_request(request_queue_t
#endif
rcu_read_lock();
for (i = 0; i < disks; i++) {
- if ((rdev=conf->mirrors[i].rdev) != NULL &&
+ if ((rdev=rcu_dereference(conf->mirrors[i].rdev)) != NULL &&
!rdev->faulty) {
atomic_inc(&rdev->nr_pending);
if (rdev->faulty) {
diff -urpNa -X dontdiff a/linux-2.6.14-rc3/drivers/md/raid5.c b/linux-2.6.14-rc3/drivers/md/raid5.c
--- a/linux-2.6.14-rc3/drivers/md/raid5.c 2005-09-30 14:17:34.000000000 -0700
+++ b/linux-2.6.14-rc3/drivers/md/raid5.c 2005-10-04 21:26:07.000000000 -0700
@@ -1305,7 +1305,7 @@ static void handle_stripe(struct stripe_
bi->bi_end_io = raid5_end_read_request;

rcu_read_lock();
- rdev = conf->disks[i].rdev;
+ rdev = rcu_dereference(conf->disks[i].rdev);
if (rdev && rdev->faulty)
rdev = NULL;
if (rdev)
@@ -1379,7 +1379,7 @@ static void unplug_slaves(mddev_t *mddev

rcu_read_lock();
for (i=0; i<mddev->raid_disks; i++) {
- mdk_rdev_t *rdev = conf->disks[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
request_queue_t *r_queue = bdev_get_queue(rdev->bdev);

@@ -1424,7 +1424,7 @@ static int raid5_issue_flush(request_que

rcu_read_lock();
for (i=0; i<mddev->raid_disks && ret == 0; i++) {
- mdk_rdev_t *rdev = conf->disks[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
if (rdev && !rdev->faulty) {
struct block_device *bdev = rdev->bdev;
request_queue_t *r_queue = bdev_get_queue(bdev);
diff -urpNa -X dontdiff a/linux-2.6.14-rc3/drivers/md/raid6main.c b/linux-2.6.14-rc3/drivers/md/raid6main.c
--- a/linux-2.6.14-rc3/drivers/md/raid6main.c 2005-09-30 14:17:34.000000000 -0700
+++ b/linux-2.6.14-rc3/drivers/md/raid6main.c 2005-10-04 21:29:19.000000000 -0700
@@ -1464,7 +1464,7 @@ static void handle_stripe(struct stripe_
bi->bi_end_io = raid6_end_read_request;

rcu_read_lock();
- rdev = conf->disks[i].rdev;
+ rdev = rcu_dereference(conf->disks[i].rdev);
if (rdev && rdev->faulty)
rdev = NULL;
if (rdev)
@@ -1538,7 +1538,7 @@ static void unplug_slaves(mddev_t *mddev

rcu_read_lock();
for (i=0; i<mddev->raid_disks; i++) {
- mdk_rdev_t *rdev = conf->disks[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
request_queue_t *r_queue = bdev_get_queue(rdev->bdev);

@@ -1583,7 +1583,7 @@ static int raid6_issue_flush(request_que

rcu_read_lock();
for (i=0; i<mddev->raid_disks && ret == 0; i++) {
- mdk_rdev_t *rdev = conf->disks[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
if (rdev && !rdev->faulty) {
struct block_device *bdev = rdev->bdev;
request_queue_t *r_queue = bdev_get_queue(bdev);


2005-10-11 21:49:41

by Suzanne Wood

[permalink] [raw]
Subject: Re: [RFC][PATCH] identify raid rcu-protected pointer

Elements of this patch were submitted Oct 5-6, 2005, but
are being resent with some explanation of reasoning employed
and additions. Insertions of rcu_dereference()
are done in response to previously marked rcu readside
critical sections with corresponding rcu_assign_pointer().

Because synchronize_rcu() occurs after p->rdev = NULL;
in the five files (multipath.c, raid10.c, raid1.c,
raid5.c, and raid6main.c) within drivers/md, it is
thought that the rcu_dereference() protects the later
dereference of that pointer which is type mdk_rdev_t
defined in md_k.h for the struct mdk_rdev_s for the
extended device.

In a case like the following from raid10.c:
while (!conf->mirrors[disk].rdev ||
!conf->mirrors[disk].rdev->in_sync) {

with no assignment of the rdev, the thought is that
an rcu-dereference() of the rdev protects the access
of, e.g, in_sync. While in_sync is an integer (flag),
it's validity for the current object would be protected.
More likely, a temporary variable will be used in
read_balance() as seen in raid1.c, e.g.:
rdev = conf->mirrors[new_disk].rdev)

In read_balance() of raid1.c, it was assumed that the
extent of the rcu readside critical section was due to
the "retry" label and the possibility of desiring to be
external to the loop, but the "goto retry" is nested
in 2 levels of conditionals. This may indicate
a reconsideration of the placement of rcu_read_lock()/
unlock(). raid10.c read_balance() may also merit
reevaluation.

The rcu_assign_pointer(rdev->mddev, mddev) is inserted
to make the mddev object globally visible because it
is the structure referenced by the rcu protected rdev
pointer. Other assignments to rdev fields in md.c
appear to be in regard to initialization, but the
developer will want to consider this.

Thank you.

----------------------------------------------------

md.c | 2 +-
multipath.c | 6 +++---
raid1.c | 18 +++++++++---------
raid10.c | 14 +++++++-------
raid5.c | 8 ++++----
raid6main.c | 6 +++---
6 files changed, 28 insertions(+), 27 deletions(-)

----------------------------------------------------

diff -urpNa -X dontdiff linux-2.6.14-rc4/drivers/md/md.c linux-2.6.14-rc4_patch/drivers/md/md.c
--- linux-2.6.14-rc4/drivers/md/md.c 2005-10-10 18:19:19.000000000 -0700
+++ linux-2.6.14-rc4_patch/drivers/md/md.c 2005-10-11 13:30:22.000000000 -0700
@@ -1145,7 +1145,7 @@ static int bind_rdev_to_array(mdk_rdev_t
}

list_add(&rdev->same_set, &mddev->disks);
- rdev->mddev = mddev;
+ rcu_assign_pointer(rdev->mddev, mddev);
printk(KERN_INFO "md: bind<%s>\n", bdevname(rdev->bdev,b));
return 0;
}
diff -urpNa -X dontdiff linux-2.6.14-rc4/drivers/md/multipath.c linux-2.6.14-rc4_patch/drivers/md/multipath.c
--- linux-2.6.14-rc4/drivers/md/multipath.c 2005-10-10 18:19:19.000000000 -0700
+++ linux-2.6.14-rc4_patch/drivers/md/multipath.c 2005-10-11 10:28:33.000000000 -0700
@@ -63,7 +63,7 @@ static int multipath_map (multipath_conf

rcu_read_lock();
for (i = 0; i < disks; i++) {
- mdk_rdev_t *rdev = conf->multipaths[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev);
if (rdev && rdev->in_sync) {
atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
@@ -139,7 +139,7 @@ static void unplug_slaves(mddev_t *mddev

rcu_read_lock();
for (i=0; i<mddev->raid_disks; i++) {
- mdk_rdev_t *rdev = conf->multipaths[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev);
if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
request_queue_t *r_queue = bdev_get_queue(rdev->bdev);

@@ -228,7 +228,7 @@ static int multipath_issue_flush(request

rcu_read_lock();
for (i=0; i<mddev->raid_disks && ret == 0; i++) {
- mdk_rdev_t *rdev = conf->multipaths[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev);
if (rdev && !rdev->faulty) {
struct block_device *bdev = rdev->bdev;
request_queue_t *r_queue = bdev_get_queue(bdev);
diff -urpNa -X dontdiff linux-2.6.14-rc4/drivers/md/raid10.c linux-2.6.14-rc4_patch/drivers/md/raid10.c
--- linux-2.6.14-rc4/drivers/md/raid10.c 2005-10-10 18:19:19.000000000 -0700
+++ linux-2.6.14-rc4_patch/drivers/md/raid10.c 2005-10-11 14:09:20.000000000 -0700
@@ -510,7 +510,7 @@ static int read_balance(conf_t *conf, r1
slot = 0;
disk = r10_bio->devs[slot].devnum;

- while (!conf->mirrors[disk].rdev ||
+ while (!rcu_dereference(conf->mirrors[disk].rdev) ||
!conf->mirrors[disk].rdev->in_sync) {
slot++;
if (slot == conf->copies) {
@@ -527,7 +527,7 @@ static int read_balance(conf_t *conf, r1
/* make sure the disk is operational */
slot = 0;
disk = r10_bio->devs[slot].devnum;
- while (!conf->mirrors[disk].rdev ||
+ while (!rcu_dereference(conf->mirrors[disk].rdev) ||
!conf->mirrors[disk].rdev->in_sync) {
slot ++;
if (slot == conf->copies) {
@@ -547,7 +547,7 @@ static int read_balance(conf_t *conf, r1
int ndisk = r10_bio->devs[nslot].devnum;


- if (!conf->mirrors[ndisk].rdev ||
+ if (!rcu_dereference(conf->mirrors[ndisk].rdev) ||
!conf->mirrors[ndisk].rdev->in_sync)
continue;

@@ -569,7 +569,7 @@ rb_out:
r10_bio->read_slot = slot;
/* conf->next_seq_sect = this_sector + sectors;*/

- if (disk >= 0 && conf->mirrors[disk].rdev)
+ if (disk >= 0 && rcu_dereference(conf->mirrors[disk].rdev))
atomic_inc(&conf->mirrors[disk].rdev->nr_pending);
rcu_read_unlock();

@@ -583,7 +583,7 @@ static void unplug_slaves(mddev_t *mddev

rcu_read_lock();
for (i=0; i<mddev->raid_disks; i++) {
- mdk_rdev_t *rdev = conf->mirrors[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
request_queue_t *r_queue = bdev_get_queue(rdev->bdev);

@@ -614,7 +614,7 @@ static int raid10_issue_flush(request_qu

rcu_read_lock();
for (i=0; i<mddev->raid_disks && ret == 0; i++) {
- mdk_rdev_t *rdev = conf->mirrors[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
if (rdev && !rdev->faulty) {
struct block_device *bdev = rdev->bdev;
request_queue_t *r_queue = bdev_get_queue(bdev);
@@ -772,7 +772,7 @@ static int make_request(request_queue_t
rcu_read_lock();
for (i = 0; i < conf->copies; i++) {
int d = r10_bio->devs[i].devnum;
- if (conf->mirrors[d].rdev &&
+ if (rcu_dereference(conf->mirrors[d].rdev) &&
!conf->mirrors[d].rdev->faulty) {
atomic_inc(&conf->mirrors[d].rdev->nr_pending);
r10_bio->devs[i].bio = bio;
diff -urpNa -X dontdiff linux-2.6.14-rc4/drivers/md/raid1.c linux-2.6.14-rc4_patch/drivers/md/raid1.c
--- linux-2.6.14-rc4/drivers/md/raid1.c 2005-10-10 18:19:19.000000000 -0700
+++ linux-2.6.14-rc4_patch/drivers/md/raid1.c 2005-10-11 13:23:23.000000000 -0700
@@ -416,10 +416,10 @@ static int read_balance(conf_t *conf, r1
/* Choose the first operation device, for consistancy */
new_disk = 0;

- for (rdev = conf->mirrors[new_disk].rdev;
+ for (rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
!rdev || !rdev->in_sync
|| test_bit(WriteMostly, &rdev->flags);
- rdev = conf->mirrors[++new_disk].rdev) {
+ rdev = rcu_dereference(conf->mirrors[++new_disk].rdev)) {

if (rdev && rdev->in_sync)
wonly_disk = new_disk;
@@ -434,10 +434,10 @@ static int read_balance(conf_t *conf, r1


/* make sure the disk is operational */
- for (rdev = conf->mirrors[new_disk].rdev;
+ for (rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
!rdev || !rdev->in_sync ||
test_bit(WriteMostly, &rdev->flags);
- rdev = conf->mirrors[new_disk].rdev) {
+ rdev = rcu_dereference(conf->mirrors[new_disk].rdev)) { // increment new_disk?

if (rdev && rdev->in_sync)
wonly_disk = new_disk;
@@ -474,7 +474,7 @@ static int read_balance(conf_t *conf, r1
disk = conf->raid_disks;
disk--;

- rdev = conf->mirrors[disk].rdev;
+ rdev = rcu_dereference(conf->mirrors[disk].rdev);

if (!rdev ||
!rdev->in_sync ||
@@ -496,7 +496,7 @@ static int read_balance(conf_t *conf, r1


if (new_disk >= 0) {
- rdev = conf->mirrors[new_disk].rdev;
+ rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
if (!rdev)
goto retry;
atomic_inc(&rdev->nr_pending);
@@ -522,7 +522,7 @@ static void unplug_slaves(mddev_t *mddev

rcu_read_lock();
for (i=0; i<mddev->raid_disks; i++) {
- mdk_rdev_t *rdev = conf->mirrors[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
request_queue_t *r_queue = bdev_get_queue(rdev->bdev);

@@ -547,6 +547,7 @@ static void raid1_unplug(request_queue_t
md_wakeup_thread(mddev->thread);
}

static int raid1_issue_flush(request_queue_t *q, struct gendisk *disk,
sector_t *error_sector)
{
@@ -556,7 +557,7 @@ static int raid1_issue_flush(request_que

rcu_read_lock();
for (i=0; i<mddev->raid_disks && ret == 0; i++) {
- mdk_rdev_t *rdev = conf->mirrors[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
if (rdev && !rdev->faulty) {
struct block_device *bdev = rdev->bdev;
request_queue_t *r_queue = bdev_get_queue(bdev);
@@ -732,7 +733,7 @@ static int make_request(request_queue_t
#endif
rcu_read_lock();
for (i = 0; i < disks; i++) {
- if ((rdev=conf->mirrors[i].rdev) != NULL &&
+ if ((rdev=rcu_dereference(conf->mirrors[i].rdev)) != NULL &&
!rdev->faulty) {
atomic_inc(&rdev->nr_pending);
if (rdev->faulty) {
diff -urpNa -X dontdiff linux-2.6.14-rc4/drivers/md/raid5.c linux-2.6.14-rc4_patch/drivers/md/raid5.c
--- linux-2.6.14-rc4/drivers/md/raid5.c 2005-10-10 18:19:19.000000000 -0700
+++ linux-2.6.14-rc4_patch/drivers/md/raid5.c 2005-10-11 13:27:27.000000000 -0700
@@ -1305,12 +1305,11 @@ static void handle_stripe(struct stripe_
bi->bi_end_io = raid5_end_read_request;

rcu_read_lock();
- rdev = conf->disks[i].rdev;
+ rdev = rcu_dereference(conf->disks[i].rdev);
if (rdev && rdev->faulty)
rdev = NULL;
if (rdev)
atomic_inc(&rdev->nr_pending);
- rcu_read_unlock();

if (rdev) {
if (test_bit(R5_Syncio, &sh->dev[i].flags))
@@ -1339,6 +1338,7 @@ static void handle_stripe(struct stripe_
clear_bit(R5_LOCKED, &sh->dev[i].flags);
set_bit(STRIPE_HANDLE, &sh->state);
}
+ rcu_read_unlock();
}
}

@@ -1379,7 +1379,7 @@ static void unplug_slaves(mddev_t *mddev

rcu_read_lock();
for (i=0; i<mddev->raid_disks; i++) {
- mdk_rdev_t *rdev = conf->disks[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
request_queue_t *r_queue = bdev_get_queue(rdev->bdev);

@@ -1424,7 +1424,7 @@ static int raid5_issue_flush(request_que

rcu_read_lock();
for (i=0; i<mddev->raid_disks && ret == 0; i++) {
- mdk_rdev_t *rdev = conf->disks[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
if (rdev && !rdev->faulty) {
struct block_device *bdev = rdev->bdev;
request_queue_t *r_queue = bdev_get_queue(bdev);
diff -urpNa -X dontdiff linux-2.6.14-rc4/drivers/md/raid6main.c linux-2.6.14-rc4_patch/drivers/md/raid6main.c
--- linux-2.6.14-rc4/drivers/md/raid6main.c 2005-10-10 18:19:19.000000000 -0700
+++ linux-2.6.14-rc4_patch/drivers/md/raid6main.c 2005-10-11 13:29:08.000000000 -0700
@@ -1464,7 +1464,7 @@ static void handle_stripe(struct stripe_
bi->bi_end_io = raid6_end_read_request;

rcu_read_lock();
- rdev = conf->disks[i].rdev;
+ rdev = rcu_dereference(conf->disks[i].rdev);
if (rdev && rdev->faulty)
rdev = NULL;
if (rdev)
@@ -1538,7 +1538,7 @@ static void unplug_slaves(mddev_t *mddev

rcu_read_lock();
for (i=0; i<mddev->raid_disks; i++) {
- mdk_rdev_t *rdev = conf->disks[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
request_queue_t *r_queue = bdev_get_queue(rdev->bdev);

@@ -1583,7 +1583,7 @@ static int raid6_issue_flush(request_que

rcu_read_lock();
for (i=0; i<mddev->raid_disks && ret == 0; i++) {
- mdk_rdev_t *rdev = conf->disks[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
if (rdev && !rdev->faulty) {
struct block_device *bdev = rdev->bdev;
request_queue_t *r_queue = bdev_get_queue(bdev);
~p

2005-10-11 22:34:58

by Suzanne Wood

[permalink] [raw]
Subject: Re: [RFC][PATCH] identify raid rcu-protected pointer

To provide further background to the recently submitted patch,
please let me note the following in regard to the commented
question in read_balance() of raid1.c

diff linux-2.6.13-rc6/drivers/md/raid1.c linux-2.6.14-rc4/drivers/md/raid1.c

changed to 379,383c419,428
< while ((new_rdev=conf->mirrors[new_disk].rdev) == NULL ||
< !new_rdev->in_sync) {
< new_disk++;
< if (new_disk == conf->raid_disks) {
< new_disk = -1;
---
> for (rdev = conf->mirrors[new_disk].rdev;
> !rdev || !rdev->in_sync
> || test_bit(WriteMostly, &rdev->flags);
> rdev = conf->mirrors[++new_disk].rdev) {
>
> if (rdev && rdev->in_sync)
> wonly_disk = new_disk;
>
> if (new_disk == conf->raid_disks - 1) {
> new_disk = wonly_disk;
392,393c437,444
< while ((new_rdev=conf->mirrors[new_disk].rdev) == NULL ||
< !new_rdev->in_sync) {
---
> for (rdev = conf->mirrors[new_disk].rdev;
> !rdev || !rdev->in_sync ||
> test_bit(WriteMostly, &rdev->flags);
> rdev = conf->mirrors[new_disk].rdev) {
>

On the second revision section, one would consider rcu_dereference()
on both "rdev =" occurrences, but expr3 is not apparently changing, so
comparing it to the earlier "for loop" elicits my question.
Thank you.
Suzanne

2005-10-11 23:01:33

by Paul E. McKenney

[permalink] [raw]
Subject: Re: [RFC][PATCH] identify raid rcu-protected pointer

On Tue, Oct 11, 2005 at 02:48:37PM -0700, Suzanne Wood wrote:

Acked-by: <[email protected]>

Thanx, Paul

> Elements of this patch were submitted Oct 5-6, 2005, but
> are being resent with some explanation of reasoning employed
> and additions. Insertions of rcu_dereference()
> are done in response to previously marked rcu readside
> critical sections with corresponding rcu_assign_pointer().
>
> Because synchronize_rcu() occurs after p->rdev = NULL;
> in the five files (multipath.c, raid10.c, raid1.c,
> raid5.c, and raid6main.c) within drivers/md, it is
> thought that the rcu_dereference() protects the later
> dereference of that pointer which is type mdk_rdev_t
> defined in md_k.h for the struct mdk_rdev_s for the
> extended device.
>
> In a case like the following from raid10.c:
> while (!conf->mirrors[disk].rdev ||
> !conf->mirrors[disk].rdev->in_sync) {
>
> with no assignment of the rdev, the thought is that
> an rcu-dereference() of the rdev protects the access
> of, e.g, in_sync. While in_sync is an integer (flag),
> it's validity for the current object would be protected.
> More likely, a temporary variable will be used in
> read_balance() as seen in raid1.c, e.g.:
> rdev = conf->mirrors[new_disk].rdev)
>
> In read_balance() of raid1.c, it was assumed that the
> extent of the rcu readside critical section was due to
> the "retry" label and the possibility of desiring to be
> external to the loop, but the "goto retry" is nested
> in 2 levels of conditionals. This may indicate
> a reconsideration of the placement of rcu_read_lock()/
> unlock(). raid10.c read_balance() may also merit
> reevaluation.
>
> The rcu_assign_pointer(rdev->mddev, mddev) is inserted
> to make the mddev object globally visible because it
> is the structure referenced by the rcu protected rdev
> pointer. Other assignments to rdev fields in md.c
> appear to be in regard to initialization, but the
> developer will want to consider this.
>
> Thank you.
>
> ----------------------------------------------------
>
> md.c | 2 +-
> multipath.c | 6 +++---
> raid1.c | 18 +++++++++---------
> raid10.c | 14 +++++++-------
> raid5.c | 8 ++++----
> raid6main.c | 6 +++---
> 6 files changed, 28 insertions(+), 27 deletions(-)
>
> ----------------------------------------------------
>
> diff -urpNa -X dontdiff linux-2.6.14-rc4/drivers/md/md.c linux-2.6.14-rc4_patch/drivers/md/md.c
> --- linux-2.6.14-rc4/drivers/md/md.c 2005-10-10 18:19:19.000000000 -0700
> +++ linux-2.6.14-rc4_patch/drivers/md/md.c 2005-10-11 13:30:22.000000000 -0700
> @@ -1145,7 +1145,7 @@ static int bind_rdev_to_array(mdk_rdev_t
> }
>
> list_add(&rdev->same_set, &mddev->disks);
> - rdev->mddev = mddev;
> + rcu_assign_pointer(rdev->mddev, mddev);
> printk(KERN_INFO "md: bind<%s>\n", bdevname(rdev->bdev,b));
> return 0;
> }
> diff -urpNa -X dontdiff linux-2.6.14-rc4/drivers/md/multipath.c linux-2.6.14-rc4_patch/drivers/md/multipath.c
> --- linux-2.6.14-rc4/drivers/md/multipath.c 2005-10-10 18:19:19.000000000 -0700
> +++ linux-2.6.14-rc4_patch/drivers/md/multipath.c 2005-10-11 10:28:33.000000000 -0700
> @@ -63,7 +63,7 @@ static int multipath_map (multipath_conf
>
> rcu_read_lock();
> for (i = 0; i < disks; i++) {
> - mdk_rdev_t *rdev = conf->multipaths[i].rdev;
> + mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev);
> if (rdev && rdev->in_sync) {
> atomic_inc(&rdev->nr_pending);
> rcu_read_unlock();
> @@ -139,7 +139,7 @@ static void unplug_slaves(mddev_t *mddev
>
> rcu_read_lock();
> for (i=0; i<mddev->raid_disks; i++) {
> - mdk_rdev_t *rdev = conf->multipaths[i].rdev;
> + mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev);
> if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
> request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
>
> @@ -228,7 +228,7 @@ static int multipath_issue_flush(request
>
> rcu_read_lock();
> for (i=0; i<mddev->raid_disks && ret == 0; i++) {
> - mdk_rdev_t *rdev = conf->multipaths[i].rdev;
> + mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev);
> if (rdev && !rdev->faulty) {
> struct block_device *bdev = rdev->bdev;
> request_queue_t *r_queue = bdev_get_queue(bdev);
> diff -urpNa -X dontdiff linux-2.6.14-rc4/drivers/md/raid10.c linux-2.6.14-rc4_patch/drivers/md/raid10.c
> --- linux-2.6.14-rc4/drivers/md/raid10.c 2005-10-10 18:19:19.000000000 -0700
> +++ linux-2.6.14-rc4_patch/drivers/md/raid10.c 2005-10-11 14:09:20.000000000 -0700
> @@ -510,7 +510,7 @@ static int read_balance(conf_t *conf, r1
> slot = 0;
> disk = r10_bio->devs[slot].devnum;
>
> - while (!conf->mirrors[disk].rdev ||
> + while (!rcu_dereference(conf->mirrors[disk].rdev) ||
> !conf->mirrors[disk].rdev->in_sync) {
> slot++;
> if (slot == conf->copies) {
> @@ -527,7 +527,7 @@ static int read_balance(conf_t *conf, r1
> /* make sure the disk is operational */
> slot = 0;
> disk = r10_bio->devs[slot].devnum;
> - while (!conf->mirrors[disk].rdev ||
> + while (!rcu_dereference(conf->mirrors[disk].rdev) ||
> !conf->mirrors[disk].rdev->in_sync) {
> slot ++;
> if (slot == conf->copies) {
> @@ -547,7 +547,7 @@ static int read_balance(conf_t *conf, r1
> int ndisk = r10_bio->devs[nslot].devnum;
>
>
> - if (!conf->mirrors[ndisk].rdev ||
> + if (!rcu_dereference(conf->mirrors[ndisk].rdev) ||
> !conf->mirrors[ndisk].rdev->in_sync)
> continue;
>
> @@ -569,7 +569,7 @@ rb_out:
> r10_bio->read_slot = slot;
> /* conf->next_seq_sect = this_sector + sectors;*/
>
> - if (disk >= 0 && conf->mirrors[disk].rdev)
> + if (disk >= 0 && rcu_dereference(conf->mirrors[disk].rdev))
> atomic_inc(&conf->mirrors[disk].rdev->nr_pending);
> rcu_read_unlock();
>
> @@ -583,7 +583,7 @@ static void unplug_slaves(mddev_t *mddev
>
> rcu_read_lock();
> for (i=0; i<mddev->raid_disks; i++) {
> - mdk_rdev_t *rdev = conf->mirrors[i].rdev;
> + mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
> if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
> request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
>
> @@ -614,7 +614,7 @@ static int raid10_issue_flush(request_qu
>
> rcu_read_lock();
> for (i=0; i<mddev->raid_disks && ret == 0; i++) {
> - mdk_rdev_t *rdev = conf->mirrors[i].rdev;
> + mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
> if (rdev && !rdev->faulty) {
> struct block_device *bdev = rdev->bdev;
> request_queue_t *r_queue = bdev_get_queue(bdev);
> @@ -772,7 +772,7 @@ static int make_request(request_queue_t
> rcu_read_lock();
> for (i = 0; i < conf->copies; i++) {
> int d = r10_bio->devs[i].devnum;
> - if (conf->mirrors[d].rdev &&
> + if (rcu_dereference(conf->mirrors[d].rdev) &&
> !conf->mirrors[d].rdev->faulty) {
> atomic_inc(&conf->mirrors[d].rdev->nr_pending);
> r10_bio->devs[i].bio = bio;
> diff -urpNa -X dontdiff linux-2.6.14-rc4/drivers/md/raid1.c linux-2.6.14-rc4_patch/drivers/md/raid1.c
> --- linux-2.6.14-rc4/drivers/md/raid1.c 2005-10-10 18:19:19.000000000 -0700
> +++ linux-2.6.14-rc4_patch/drivers/md/raid1.c 2005-10-11 13:23:23.000000000 -0700
> @@ -416,10 +416,10 @@ static int read_balance(conf_t *conf, r1
> /* Choose the first operation device, for consistancy */
> new_disk = 0;
>
> - for (rdev = conf->mirrors[new_disk].rdev;
> + for (rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
> !rdev || !rdev->in_sync
> || test_bit(WriteMostly, &rdev->flags);
> - rdev = conf->mirrors[++new_disk].rdev) {
> + rdev = rcu_dereference(conf->mirrors[++new_disk].rdev)) {
>
> if (rdev && rdev->in_sync)
> wonly_disk = new_disk;
> @@ -434,10 +434,10 @@ static int read_balance(conf_t *conf, r1
>
>
> /* make sure the disk is operational */
> - for (rdev = conf->mirrors[new_disk].rdev;
> + for (rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
> !rdev || !rdev->in_sync ||
> test_bit(WriteMostly, &rdev->flags);
> - rdev = conf->mirrors[new_disk].rdev) {
> + rdev = rcu_dereference(conf->mirrors[new_disk].rdev)) { // increment new_disk?
>
> if (rdev && rdev->in_sync)
> wonly_disk = new_disk;
> @@ -474,7 +474,7 @@ static int read_balance(conf_t *conf, r1
> disk = conf->raid_disks;
> disk--;
>
> - rdev = conf->mirrors[disk].rdev;
> + rdev = rcu_dereference(conf->mirrors[disk].rdev);
>
> if (!rdev ||
> !rdev->in_sync ||
> @@ -496,7 +496,7 @@ static int read_balance(conf_t *conf, r1
>
>
> if (new_disk >= 0) {
> - rdev = conf->mirrors[new_disk].rdev;
> + rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
> if (!rdev)
> goto retry;
> atomic_inc(&rdev->nr_pending);
> @@ -522,7 +522,7 @@ static void unplug_slaves(mddev_t *mddev
>
> rcu_read_lock();
> for (i=0; i<mddev->raid_disks; i++) {
> - mdk_rdev_t *rdev = conf->mirrors[i].rdev;
> + mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
> if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
> request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
>
> @@ -547,6 +547,7 @@ static void raid1_unplug(request_queue_t
> md_wakeup_thread(mddev->thread);
> }
>
> static int raid1_issue_flush(request_queue_t *q, struct gendisk *disk,
> sector_t *error_sector)
> {
> @@ -556,7 +557,7 @@ static int raid1_issue_flush(request_que
>
> rcu_read_lock();
> for (i=0; i<mddev->raid_disks && ret == 0; i++) {
> - mdk_rdev_t *rdev = conf->mirrors[i].rdev;
> + mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
> if (rdev && !rdev->faulty) {
> struct block_device *bdev = rdev->bdev;
> request_queue_t *r_queue = bdev_get_queue(bdev);
> @@ -732,7 +733,7 @@ static int make_request(request_queue_t
> #endif
> rcu_read_lock();
> for (i = 0; i < disks; i++) {
> - if ((rdev=conf->mirrors[i].rdev) != NULL &&
> + if ((rdev=rcu_dereference(conf->mirrors[i].rdev)) != NULL &&
> !rdev->faulty) {
> atomic_inc(&rdev->nr_pending);
> if (rdev->faulty) {
> diff -urpNa -X dontdiff linux-2.6.14-rc4/drivers/md/raid5.c linux-2.6.14-rc4_patch/drivers/md/raid5.c
> --- linux-2.6.14-rc4/drivers/md/raid5.c 2005-10-10 18:19:19.000000000 -0700
> +++ linux-2.6.14-rc4_patch/drivers/md/raid5.c 2005-10-11 13:27:27.000000000 -0700
> @@ -1305,12 +1305,11 @@ static void handle_stripe(struct stripe_
> bi->bi_end_io = raid5_end_read_request;
>
> rcu_read_lock();
> - rdev = conf->disks[i].rdev;
> + rdev = rcu_dereference(conf->disks[i].rdev);
> if (rdev && rdev->faulty)
> rdev = NULL;
> if (rdev)
> atomic_inc(&rdev->nr_pending);
> - rcu_read_unlock();
>
> if (rdev) {
> if (test_bit(R5_Syncio, &sh->dev[i].flags))
> @@ -1339,6 +1338,7 @@ static void handle_stripe(struct stripe_
> clear_bit(R5_LOCKED, &sh->dev[i].flags);
> set_bit(STRIPE_HANDLE, &sh->state);
> }
> + rcu_read_unlock();
> }
> }
>
> @@ -1379,7 +1379,7 @@ static void unplug_slaves(mddev_t *mddev
>
> rcu_read_lock();
> for (i=0; i<mddev->raid_disks; i++) {
> - mdk_rdev_t *rdev = conf->disks[i].rdev;
> + mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
> if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
> request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
>
> @@ -1424,7 +1424,7 @@ static int raid5_issue_flush(request_que
>
> rcu_read_lock();
> for (i=0; i<mddev->raid_disks && ret == 0; i++) {
> - mdk_rdev_t *rdev = conf->disks[i].rdev;
> + mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
> if (rdev && !rdev->faulty) {
> struct block_device *bdev = rdev->bdev;
> request_queue_t *r_queue = bdev_get_queue(bdev);
> diff -urpNa -X dontdiff linux-2.6.14-rc4/drivers/md/raid6main.c linux-2.6.14-rc4_patch/drivers/md/raid6main.c
> --- linux-2.6.14-rc4/drivers/md/raid6main.c 2005-10-10 18:19:19.000000000 -0700
> +++ linux-2.6.14-rc4_patch/drivers/md/raid6main.c 2005-10-11 13:29:08.000000000 -0700
> @@ -1464,7 +1464,7 @@ static void handle_stripe(struct stripe_
> bi->bi_end_io = raid6_end_read_request;
>
> rcu_read_lock();
> - rdev = conf->disks[i].rdev;
> + rdev = rcu_dereference(conf->disks[i].rdev);
> if (rdev && rdev->faulty)
> rdev = NULL;
> if (rdev)
> @@ -1538,7 +1538,7 @@ static void unplug_slaves(mddev_t *mddev
>
> rcu_read_lock();
> for (i=0; i<mddev->raid_disks; i++) {
> - mdk_rdev_t *rdev = conf->disks[i].rdev;
> + mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
> if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
> request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
>
> @@ -1583,7 +1583,7 @@ static int raid6_issue_flush(request_que
>
> rcu_read_lock();
> for (i=0; i<mddev->raid_disks && ret == 0; i++) {
> - mdk_rdev_t *rdev = conf->disks[i].rdev;
> + mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
> if (rdev && !rdev->faulty) {
> struct block_device *bdev = rdev->bdev;
> request_queue_t *r_queue = bdev_get_queue(bdev);
> ~p
>
>

2005-10-12 06:37:24

by Suzanne Wood

[permalink] [raw]
Subject: Re: [RFC][PATCH] identify raid rcu-protected pointer

>From "Paul E. McKenney" October 11, 2005 4:02 PM

> On Tue, Oct 11, 2005 at 02:48:37PM -0700, Suzanne Wood wrote:
>
> Acked-by: <[email protected]>
>
> Thanx, Paul

Thank you. Recognizing that the comment in raid1.c will be
addressed and/or removed, please consider this.

Signed-off-by: <[email protected]>

>
>> Elements of this patch were submitted Oct 5-6, 2005, but
>> are being resent with some explanation of reasoning employed
>> and additions. Insertions of rcu_dereference()
>> are done in response to previously marked rcu readside
>> critical sections with corresponding rcu_assign_pointer().
>>
>> Because synchronize_rcu() occurs after p->rdev = NULL;
>> in the five files (multipath.c, raid10.c, raid1.c,
>> raid5.c, and raid6main.c) within drivers/md, it is
>> thought that the rcu_dereference() protects the later
>> dereference of that pointer which is type mdk_rdev_t
>> defined in md_k.h for the struct mdk_rdev_s for the
>> extended device.
>>
>> In a case like the following from raid10.c:
>> while (!conf->mirrors[disk].rdev ||
>> !conf->mirrors[disk].rdev->in_sync) {
>>
>> with no assignment of the rdev, the thought is that
>> an rcu-dereference() of the rdev protects the access
>> of, e.g, in_sync. While in_sync is an integer (flag),
>> it's validity for the current object would be protected.
>> More likely, a temporary variable will be used in
>> read_balance() as seen in raid1.c, e.g.:
>> rdev = conf->mirrors[new_disk].rdev)
>>
>> In read_balance() of raid1.c, it was assumed that the
>> extent of the rcu readside critical section was due to
>> the "retry" label and the possibility of desiring to be
>> external to the loop, but the "goto retry" is nested
>> in 2 levels of conditionals. This may indicate
>> a reconsideration of the placement of rcu_read_lock()/
>> unlock(). raid10.c read_balance() may also merit
>> reevaluation.
>>
>> The rcu_assign_pointer(rdev->mddev, mddev) is inserted
>> to make the mddev object globally visible because it
>> is the structure referenced by the rcu protected rdev
>> pointer. Other assignments to rdev fields in md.c
>> appear to be in regard to initialization, but the
>> developer will want to consider this.
>>
>> Thank you.
>>
>> ----------------------------------------------------
>>
>> md.c | 2 +-
>> multipath.c | 6 +++---
>> raid1.c | 18 +++++++++---------
>> raid10.c | 14 +++++++-------
>> raid5.c | 8 ++++----
>> raid6main.c | 6 +++---
>> 6 files changed, 27 insertions(+), 27 deletions(-)
>>
>> ----------------------------------------------------
>>
>> diff -urpNa -X dontdiff linux-2.6.14-rc4/drivers/md/md.c linux-2.6.14-rc4_patch/drivers/md/md.c
>> --- linux-2.6.14-rc4/drivers/md/md.c 2005-10-10 18:19:19.000000000 -0700
>> +++ linux-2.6.14-rc4_patch/drivers/md/md.c 2005-10-11 13:30:22.000000000 -0700
>> @@ -1145,7 +1145,7 @@ static int bind_rdev_to_array(mdk_rdev_t
>> }
>>
>> list_add(&rdev->same_set, &mddev->disks);
>> - rdev->mddev = mddev;
>> + rcu_assign_pointer(rdev->mddev, mddev);
>> printk(KERN_INFO "md: bind<%s>\n", bdevname(rdev->bdev,b));
>> return 0;
>> }
>> diff -urpNa -X dontdiff linux-2.6.14-rc4/drivers/md/multipath.c linux-2.6.14-rc4_patch/drivers/md/multipath.c
>> --- linux-2.6.14-rc4/drivers/md/multipath.c 2005-10-10 18:19:19.000000000 -0700
>> +++ linux-2.6.14-rc4_patch/drivers/md/multipath.c 2005-10-11 10:28:33.000000000 -0700
>> @@ -63,7 +63,7 @@ static int multipath_map (multipath_conf
>>
>> rcu_read_lock();
>> for (i = 0; i < disks; i++) {
>> - mdk_rdev_t *rdev = conf->multipaths[i].rdev;
>> + mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev);
>> if (rdev && rdev->in_sync) {
>> atomic_inc(&rdev->nr_pending);
>> rcu_read_unlock();
>> @@ -139,7 +139,7 @@ static void unplug_slaves(mddev_t *mddev
>>
>> rcu_read_lock();
>> for (i=0; i<mddev->raid_disks; i++) {
>> - mdk_rdev_t *rdev = conf->multipaths[i].rdev;
>> + mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev);
>> if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
>> request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
>>
>> @@ -228,7 +228,7 @@ static int multipath_issue_flush(request
>>
>> rcu_read_lock();
>> for (i=0; i<mddev->raid_disks && ret == 0; i++) {
>> - mdk_rdev_t *rdev = conf->multipaths[i].rdev;
>> + mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev);
>> if (rdev && !rdev->faulty) {
>> struct block_device *bdev = rdev->bdev;
>> request_queue_t *r_queue = bdev_get_queue(bdev);
>> diff -urpNa -X dontdiff linux-2.6.14-rc4/drivers/md/raid10.c linux-2.6.14-rc4_patch/drivers/md/raid10.c
>> --- linux-2.6.14-rc4/drivers/md/raid10.c 2005-10-10 18:19:19.000000000 -0700
>> +++ linux-2.6.14-rc4_patch/drivers/md/raid10.c 2005-10-11 14:09:20.000000000 -0700
>> @@ -510,7 +510,7 @@ static int read_balance(conf_t *conf, r1
>> slot = 0;
>> disk = r10_bio->devs[slot].devnum;
>>
>> - while (!conf->mirrors[disk].rdev ||
>> + while (!rcu_dereference(conf->mirrors[disk].rdev) ||
>> !conf->mirrors[disk].rdev->in_sync) {
>> slot++;
>> if (slot == conf->copies) {
>> @@ -527,7 +527,7 @@ static int read_balance(conf_t *conf, r1
>> /* make sure the disk is operational */
>> slot = 0;
>> disk = r10_bio->devs[slot].devnum;
>> - while (!conf->mirrors[disk].rdev ||
>> + while (!rcu_dereference(conf->mirrors[disk].rdev) ||
>> !conf->mirrors[disk].rdev->in_sync) {
>> slot ++;
>> if (slot == conf->copies) {
>> @@ -547,7 +547,7 @@ static int read_balance(conf_t *conf, r1
>> int ndisk = r10_bio->devs[nslot].devnum;
>>
>>
>> - if (!conf->mirrors[ndisk].rdev ||
>> + if (!rcu_dereference(conf->mirrors[ndisk].rdev) ||
>> !conf->mirrors[ndisk].rdev->in_sync)
>> continue;
>>
>> @@ -569,7 +569,7 @@ rb_out:
>> r10_bio->read_slot = slot;
>> /* conf->next_seq_sect = this_sector + sectors;*/
>>
>> - if (disk >= 0 && conf->mirrors[disk].rdev)
>> + if (disk >= 0 && rcu_dereference(conf->mirrors[disk].rdev))
>> atomic_inc(&conf->mirrors[disk].rdev->nr_pending);
>> rcu_read_unlock();
>>
>> @@ -583,7 +583,7 @@ static void unplug_slaves(mddev_t *mddev
>>
>> rcu_read_lock();
>> for (i=0; i<mddev->raid_disks; i++) {
>> - mdk_rdev_t *rdev = conf->mirrors[i].rdev;
>> + mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
>> if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
>> request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
>>
>> @@ -614,7 +614,7 @@ static int raid10_issue_flush(request_qu
>>
>> rcu_read_lock();
>> for (i=0; i<mddev->raid_disks && ret == 0; i++) {
>> - mdk_rdev_t *rdev = conf->mirrors[i].rdev;
>> + mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
>> if (rdev && !rdev->faulty) {
>> struct block_device *bdev = rdev->bdev;
>> request_queue_t *r_queue = bdev_get_queue(bdev);
>> @@ -772,7 +772,7 @@ static int make_request(request_queue_t
>> rcu_read_lock();
>> for (i = 0; i < conf->copies; i++) {
>> int d = r10_bio->devs[i].devnum;
>> - if (conf->mirrors[d].rdev &&
>> + if (rcu_dereference(conf->mirrors[d].rdev) &&
>> !conf->mirrors[d].rdev->faulty) {
>> atomic_inc(&conf->mirrors[d].rdev->nr_pending);
>> r10_bio->devs[i].bio = bio;
>> diff -urpNa -X dontdiff linux-2.6.14-rc4/drivers/md/raid1.c linux-2.6.14-rc4_patch/drivers/md/raid1.c
>> --- linux-2.6.14-rc4/drivers/md/raid1.c 2005-10-10 18:19:19.000000000 -0700
>> +++ linux-2.6.14-rc4_patch/drivers/md/raid1.c 2005-10-11 13:23:23.000000000 -0700
>> @@ -416,10 +416,10 @@ static int read_balance(conf_t *conf, r1
>> /* Choose the first operation device, for consistancy */
>> new_disk = 0;
>>
>> - for (rdev = conf->mirrors[new_disk].rdev;
>> + for (rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
>> !rdev || !rdev->in_sync
>> || test_bit(WriteMostly, &rdev->flags);
>> - rdev = conf->mirrors[++new_disk].rdev) {
>> + rdev = rcu_dereference(conf->mirrors[++new_disk].rdev)) {
>>
>> if (rdev && rdev->in_sync)
>> wonly_disk = new_disk;
>> @@ -434,10 +434,10 @@ static int read_balance(conf_t *conf, r1
>>
>>
>> /* make sure the disk is operational */
>> - for (rdev = conf->mirrors[new_disk].rdev;
>> + for (rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
>> !rdev || !rdev->in_sync ||
>> test_bit(WriteMostly, &rdev->flags);
>> - rdev = conf->mirrors[new_disk].rdev) {
>> + rdev = rcu_dereference(conf->mirrors[new_disk].rdev)) { // increment new_disk?
>>
>> if (rdev && rdev->in_sync)
>> wonly_disk = new_disk;
>> @@ -474,7 +474,7 @@ static int read_balance(conf_t *conf, r1
>> disk = conf->raid_disks;
>> disk--;
>>
>> - rdev = conf->mirrors[disk].rdev;
>> + rdev = rcu_dereference(conf->mirrors[disk].rdev);
>>
>> if (!rdev ||
>> !rdev->in_sync ||
>> @@ -496,7 +496,7 @@ static int read_balance(conf_t *conf, r1
>>
>>
>> if (new_disk >= 0) {
>> - rdev = conf->mirrors[new_disk].rdev;
>> + rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
>> if (!rdev)
>> goto retry;
>> atomic_inc(&rdev->nr_pending);
>> @@ -522,7 +522,7 @@ static void unplug_slaves(mddev_t *mddev
>>
>> rcu_read_lock();
>> for (i=0; i<mddev->raid_disks; i++) {
>> - mdk_rdev_t *rdev = conf->mirrors[i].rdev;
>> + mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
>> if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
>> request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
>>
>> @@ -547,6 +547,7 @@ static void raid1_unplug(request_queue_t
>> md_wakeup_thread(mddev->thread);
>> }
>>
>> static int raid1_issue_flush(request_queue_t *q, struct gendisk *disk,
>> sector_t *error_sector)
>> {
>> @@ -556,7 +557,7 @@ static int raid1_issue_flush(request_que
>>
>> rcu_read_lock();
>> for (i=0; i<mddev->raid_disks && ret == 0; i++) {
>> - mdk_rdev_t *rdev = conf->mirrors[i].rdev;
>> + mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
>> if (rdev && !rdev->faulty) {
>> struct block_device *bdev = rdev->bdev;
>> request_queue_t *r_queue = bdev_get_queue(bdev);
>> @@ -732,7 +733,7 @@ static int make_request(request_queue_t
>> #endif
>> rcu_read_lock();
>> for (i = 0; i < disks; i++) {
>> - if ((rdev=conf->mirrors[i].rdev) != NULL &&
>> + if ((rdev=rcu_dereference(conf->mirrors[i].rdev)) != NULL &&
>> !rdev->faulty) {
>> atomic_inc(&rdev->nr_pending);
>> if (rdev->faulty) {
>> diff -urpNa -X dontdiff linux-2.6.14-rc4/drivers/md/raid5.c linux-2.6.14-rc4_patch/drivers/md/raid5.c
>> --- linux-2.6.14-rc4/drivers/md/raid5.c 2005-10-10 18:19:19.000000000 -0700
>> +++ linux-2.6.14-rc4_patch/drivers/md/raid5.c 2005-10-11 13:27:27.000000000 -0700
>> @@ -1305,12 +1305,11 @@ static void handle_stripe(struct stripe_
>> bi->bi_end_io = raid5_end_read_request;
>>
>> rcu_read_lock();
>> - rdev = conf->disks[i].rdev;
>> + rdev = rcu_dereference(conf->disks[i].rdev);
>> if (rdev && rdev->faulty)
>> rdev = NULL;
>> if (rdev)
>> atomic_inc(&rdev->nr_pending);
>> - rcu_read_unlock();
>>
>> if (rdev) {
>> if (test_bit(R5_Syncio, &sh->dev[i].flags))
>> @@ -1339,6 +1338,7 @@ static void handle_stripe(struct stripe_
>> clear_bit(R5_LOCKED, &sh->dev[i].flags);
>> set_bit(STRIPE_HANDLE, &sh->state);
>> }
>> + rcu_read_unlock();
>> }
>> }
>>
>> @@ -1379,7 +1379,7 @@ static void unplug_slaves(mddev_t *mddev
>>
>> rcu_read_lock();
>> for (i=0; i<mddev->raid_disks; i++) {
>> - mdk_rdev_t *rdev = conf->disks[i].rdev;
>> + mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
>> if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
>> request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
>>
>> @@ -1424,7 +1424,7 @@ static int raid5_issue_flush(request_que
>>
>> rcu_read_lock();
>> for (i=0; i<mddev->raid_disks && ret == 0; i++) {
>> - mdk_rdev_t *rdev = conf->disks[i].rdev;
>> + mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
>> if (rdev && !rdev->faulty) {
>> struct block_device *bdev = rdev->bdev;
>> request_queue_t *r_queue = bdev_get_queue(bdev);
>> diff -urpNa -X dontdiff linux-2.6.14-rc4/drivers/md/raid6main.c linux-2.6.14-rc4_patch/drivers/md/raid6main.c
>> --- linux-2.6.14-rc4/drivers/md/raid6main.c 2005-10-10 18:19:19.000000000 -0700
>> +++ linux-2.6.14-rc4_patch/drivers/md/raid6main.c 2005-10-11 13:29:08.000000000 -0700
>> @@ -1464,7 +1464,7 @@ static void handle_stripe(struct stripe_
>> bi->bi_end_io = raid6_end_read_request;
>>
>> rcu_read_lock();
>> - rdev = conf->disks[i].rdev;
>> + rdev = rcu_dereference(conf->disks[i].rdev);
>> if (rdev && rdev->faulty)
>> rdev = NULL;
>> if (rdev)
>> @@ -1538,7 +1538,7 @@ static void unplug_slaves(mddev_t *mddev
>>
>> rcu_read_lock();
>> for (i=0; i<mddev->raid_disks; i++) {
>> - mdk_rdev_t *rdev = conf->disks[i].rdev;
>> + mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
>> if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
>> request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
>>
>> @@ -1583,7 +1583,7 @@ static int raid6_issue_flush(request_que
>>
>> rcu_read_lock();
>> for (i=0; i<mddev->raid_disks && ret == 0; i++) {
>> - mdk_rdev_t *rdev = conf->disks[i].rdev;
>> + mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
>> if (rdev && !rdev->faulty) {
>> struct block_device *bdev = rdev->bdev;
>> request_queue_t *r_queue = bdev_get_queue(bdev);

2005-10-13 04:14:38

by NeilBrown

[permalink] [raw]
Subject: Re: [RFC][PATCH] identify raid rcu-protected pointer

On Wednesday October 5, [email protected] wrote:
> Please consider the following patch submittal
> to insert rcu_dereference() to make explicit
> the object of the rcu read-side critical sections.
> Thank you.

Thank for the patch and detailed explanations.

I agree with all the places where you add rcu_dereference.

I do not agree with the place you added rcu_assign_pointer.
rdev->mddev is of no relevance to the rcu usage. However I think it
is appropriate to use rcu_assign_pointer when setting the pointer that
rcu_dereference dereferences. I had made appropriate changes.

The place where you put the comment about incrementing new_disk does
*not* require an increment. That loop actually runs backwards, and
new_disk is decremented in the body of the loop.

You moved 'rcu_read_unlock' down several lines in raid5.c, but didn't
make an equivalent change in similar code in raid6main.c. I'm
confident that this change isn't needed. After nr_pending has been
incremented, there is no longer a need to hold the rcu read lock.

I have improved the code in raid10.c so that the rcu_dereference'd
pointer is held in a temporary variable, which is much cleaner than
the existing code.


My revised patch follows. I will submit it to Andrew if I hear
nothing negative.

Thanks,
NeilBrown

-----
Provide proper rcu_dereference / rcu_assign_pointer annotations in md


Acked-by: <[email protected]>
Signed-off-by: <[email protected]>
Signed-off-by: Neil Brown <[email protected]>

### Diffstat output
./drivers/md/multipath.c | 8 ++++----
./drivers/md/raid1.c | 20 ++++++++++----------
./drivers/md/raid10.c | 30 ++++++++++++++++--------------
./drivers/md/raid5.c | 8 ++++----
./drivers/md/raid6main.c | 8 ++++----
5 files changed, 38 insertions(+), 36 deletions(-)

diff ./drivers/md/multipath.c~current~ ./drivers/md/multipath.c
--- ./drivers/md/multipath.c~current~ 2005-10-13 13:42:05.000000000 +1000
+++ ./drivers/md/multipath.c 2005-10-13 13:49:39.000000000 +1000
@@ -63,7 +63,7 @@ static int multipath_map (multipath_conf

rcu_read_lock();
for (i = 0; i < disks; i++) {
- mdk_rdev_t *rdev = conf->multipaths[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev);
if (rdev && rdev->in_sync) {
atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
@@ -139,7 +139,7 @@ static void unplug_slaves(mddev_t *mddev

rcu_read_lock();
for (i=0; i<mddev->raid_disks; i++) {
- mdk_rdev_t *rdev = conf->multipaths[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev);
if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
request_queue_t *r_queue = bdev_get_queue(rdev->bdev);

@@ -228,7 +228,7 @@ static int multipath_issue_flush(request

rcu_read_lock();
for (i=0; i<mddev->raid_disks && ret == 0; i++) {
- mdk_rdev_t *rdev = conf->multipaths[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev);
if (rdev && !rdev->faulty) {
struct block_device *bdev = rdev->bdev;
request_queue_t *r_queue = bdev_get_queue(bdev);
@@ -335,7 +335,7 @@ static int multipath_add_disk(mddev_t *m
conf->working_disks++;
rdev->raid_disk = path;
rdev->in_sync = 1;
- p->rdev = rdev;
+ rcu_assign_pointer(p->rdev, rdev);
found = 1;
}


diff ./drivers/md/raid1.c~current~ ./drivers/md/raid1.c
--- ./drivers/md/raid1.c~current~ 2005-10-13 13:42:05.000000000 +1000
+++ ./drivers/md/raid1.c 2005-10-13 13:50:32.000000000 +1000
@@ -416,10 +416,10 @@ static int read_balance(conf_t *conf, r1
/* Choose the first operation device, for consistancy */
new_disk = 0;

- for (rdev = conf->mirrors[new_disk].rdev;
+ for (rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
!rdev || !rdev->in_sync
|| test_bit(WriteMostly, &rdev->flags);
- rdev = conf->mirrors[++new_disk].rdev) {
+ rdev = rcu_dereference(conf->mirrors[++new_disk].rdev)) {

if (rdev && rdev->in_sync)
wonly_disk = new_disk;
@@ -434,10 +434,10 @@ static int read_balance(conf_t *conf, r1


/* make sure the disk is operational */
- for (rdev = conf->mirrors[new_disk].rdev;
+ for (rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
!rdev || !rdev->in_sync ||
test_bit(WriteMostly, &rdev->flags);
- rdev = conf->mirrors[new_disk].rdev) {
+ rdev = rcu_dereference(conf->mirrors[new_disk].rdev)) {

if (rdev && rdev->in_sync)
wonly_disk = new_disk;
@@ -474,7 +474,7 @@ static int read_balance(conf_t *conf, r1
disk = conf->raid_disks;
disk--;

- rdev = conf->mirrors[disk].rdev;
+ rdev = rcu_dereference(conf->mirrors[disk].rdev);

if (!rdev ||
!rdev->in_sync ||
@@ -496,7 +496,7 @@ static int read_balance(conf_t *conf, r1


if (new_disk >= 0) {
- rdev = conf->mirrors[new_disk].rdev;
+ rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
if (!rdev)
goto retry;
atomic_inc(&rdev->nr_pending);
@@ -522,7 +522,7 @@ static void unplug_slaves(mddev_t *mddev

rcu_read_lock();
for (i=0; i<mddev->raid_disks; i++) {
- mdk_rdev_t *rdev = conf->mirrors[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
request_queue_t *r_queue = bdev_get_queue(rdev->bdev);

@@ -556,7 +556,7 @@ static int raid1_issue_flush(request_que

rcu_read_lock();
for (i=0; i<mddev->raid_disks && ret == 0; i++) {
- mdk_rdev_t *rdev = conf->mirrors[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
if (rdev && !rdev->faulty) {
struct block_device *bdev = rdev->bdev;
request_queue_t *r_queue = bdev_get_queue(bdev);
@@ -732,7 +732,7 @@ static int make_request(request_queue_t
#endif
rcu_read_lock();
for (i = 0; i < disks; i++) {
- if ((rdev=conf->mirrors[i].rdev) != NULL &&
+ if ((rdev=rcu_dereference(conf->mirrors[i].rdev)) != NULL &&
!rdev->faulty) {
atomic_inc(&rdev->nr_pending);
if (rdev->faulty) {
@@ -958,7 +958,7 @@ static int raid1_add_disk(mddev_t *mddev
found = 1;
if (rdev->saved_raid_disk != mirror)
conf->fullsync = 1;
- p->rdev = rdev;
+ rcu_assign_pointer(p->rdev, rdev);
break;
}


diff ./drivers/md/raid10.c~current~ ./drivers/md/raid10.c
--- ./drivers/md/raid10.c~current~ 2005-10-13 13:42:05.000000000 +1000
+++ ./drivers/md/raid10.c 2005-10-13 14:09:29.000000000 +1000
@@ -496,6 +496,7 @@ static int read_balance(conf_t *conf, r1
int disk, slot, nslot;
const int sectors = r10_bio->sectors;
sector_t new_distance, current_distance;
+ mdk_rdev_t *rdev;

raid10_find_phys(conf, r10_bio);
rcu_read_lock();
@@ -510,8 +511,8 @@ static int read_balance(conf_t *conf, r1
slot = 0;
disk = r10_bio->devs[slot].devnum;

- while (!conf->mirrors[disk].rdev ||
- !conf->mirrors[disk].rdev->in_sync) {
+ while ((rdev = rcu_dereference(conf->mirrors[disk].rdev)) == NULL ||
+ !rdev->in_sync) {
slot++;
if (slot == conf->copies) {
slot = 0;
@@ -527,8 +528,8 @@ static int read_balance(conf_t *conf, r1
/* make sure the disk is operational */
slot = 0;
disk = r10_bio->devs[slot].devnum;
- while (!conf->mirrors[disk].rdev ||
- !conf->mirrors[disk].rdev->in_sync) {
+ while ((rdev=rcu_dereference(conf->mirrors[disk].rdev)) == NULL ||
+ !rdev->in_sync) {
slot ++;
if (slot == conf->copies) {
disk = -1;
@@ -547,11 +548,11 @@ static int read_balance(conf_t *conf, r1
int ndisk = r10_bio->devs[nslot].devnum;


- if (!conf->mirrors[ndisk].rdev ||
- !conf->mirrors[ndisk].rdev->in_sync)
+ if ((rdev=rcu_dereference(conf->mirrors[ndisk].rdev)) == NULL ||
+ !rdev->in_sync)
continue;

- if (!atomic_read(&conf->mirrors[ndisk].rdev->nr_pending)) {
+ if (!atomic_read(&rdev->nr_pending)) {
disk = ndisk;
slot = nslot;
break;
@@ -569,7 +570,7 @@ rb_out:
r10_bio->read_slot = slot;
/* conf->next_seq_sect = this_sector + sectors;*/

- if (disk >= 0 && conf->mirrors[disk].rdev)
+ if (disk >= 0 && (rdev=rcu_dereference(conf->mirrors[disk].rdev))!= NULL)
atomic_inc(&conf->mirrors[disk].rdev->nr_pending);
rcu_read_unlock();

@@ -583,7 +584,7 @@ static void unplug_slaves(mddev_t *mddev

rcu_read_lock();
for (i=0; i<mddev->raid_disks; i++) {
- mdk_rdev_t *rdev = conf->mirrors[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
request_queue_t *r_queue = bdev_get_queue(rdev->bdev);

@@ -614,7 +615,7 @@ static int raid10_issue_flush(request_qu

rcu_read_lock();
for (i=0; i<mddev->raid_disks && ret == 0; i++) {
- mdk_rdev_t *rdev = conf->mirrors[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
if (rdev && !rdev->faulty) {
struct block_device *bdev = rdev->bdev;
request_queue_t *r_queue = bdev_get_queue(bdev);
@@ -772,9 +773,10 @@ static int make_request(request_queue_t
rcu_read_lock();
for (i = 0; i < conf->copies; i++) {
int d = r10_bio->devs[i].devnum;
- if (conf->mirrors[d].rdev &&
- !conf->mirrors[d].rdev->faulty) {
- atomic_inc(&conf->mirrors[d].rdev->nr_pending);
+ mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[d].rdev);
+ if (rdev &&
+ !rdev->faulty) {
+ atomic_inc(&rdev->nr_pending);
r10_bio->devs[i].bio = bio;
} else
r10_bio->devs[i].bio = NULL;
@@ -984,7 +986,7 @@ static int raid10_add_disk(mddev_t *mdde
p->head_position = 0;
rdev->raid_disk = mirror;
found = 1;
- p->rdev = rdev;
+ rcu_assign_pointer(p->rdev, rdev);
break;
}


diff ./drivers/md/raid5.c~current~ ./drivers/md/raid5.c
--- ./drivers/md/raid5.c~current~ 2005-10-13 13:42:05.000000000 +1000
+++ ./drivers/md/raid5.c 2005-10-13 14:00:38.000000000 +1000
@@ -1383,7 +1383,7 @@ static void handle_stripe(struct stripe_
bi->bi_end_io = raid5_end_read_request;

rcu_read_lock();
- rdev = conf->disks[i].rdev;
+ rdev = rcu_dereference(conf->disks[i].rdev);
if (rdev && rdev->faulty)
rdev = NULL;
if (rdev)
@@ -1457,7 +1457,7 @@ static void unplug_slaves(mddev_t *mddev

rcu_read_lock();
for (i=0; i<mddev->raid_disks; i++) {
- mdk_rdev_t *rdev = conf->disks[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
request_queue_t *r_queue = bdev_get_queue(rdev->bdev);

@@ -1502,7 +1502,7 @@ static int raid5_issue_flush(request_que

rcu_read_lock();
for (i=0; i<mddev->raid_disks && ret == 0; i++) {
- mdk_rdev_t *rdev = conf->disks[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
if (rdev && !rdev->faulty) {
struct block_device *bdev = rdev->bdev;
request_queue_t *r_queue = bdev_get_queue(bdev);
@@ -2178,7 +2178,7 @@ static int raid5_add_disk(mddev_t *mddev
found = 1;
if (rdev->saved_raid_disk != disk)
conf->fullsync = 1;
- p->rdev = rdev;
+ rcu_assign_pointer(p->rdev, rdev);
break;
}
print_raid5_conf(conf);

diff ./drivers/md/raid6main.c~current~ ./drivers/md/raid6main.c
--- ./drivers/md/raid6main.c~current~ 2005-10-13 13:42:05.000000000 +1000
+++ ./drivers/md/raid6main.c 2005-10-13 13:57:08.000000000 +1000
@@ -1464,7 +1464,7 @@ static void handle_stripe(struct stripe_
bi->bi_end_io = raid6_end_read_request;

rcu_read_lock();
- rdev = conf->disks[i].rdev;
+ rdev = rcu_dereference(conf->disks[i].rdev);
if (rdev && rdev->faulty)
rdev = NULL;
if (rdev)
@@ -1538,7 +1538,7 @@ static void unplug_slaves(mddev_t *mddev

rcu_read_lock();
for (i=0; i<mddev->raid_disks; i++) {
- mdk_rdev_t *rdev = conf->disks[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
request_queue_t *r_queue = bdev_get_queue(rdev->bdev);

@@ -1583,7 +1583,7 @@ static int raid6_issue_flush(request_que

rcu_read_lock();
for (i=0; i<mddev->raid_disks && ret == 0; i++) {
- mdk_rdev_t *rdev = conf->disks[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
if (rdev && !rdev->faulty) {
struct block_device *bdev = rdev->bdev;
request_queue_t *r_queue = bdev_get_queue(bdev);
@@ -2158,7 +2158,7 @@ static int raid6_add_disk(mddev_t *mddev
found = 1;
if (rdev->saved_raid_disk != disk)
conf->fullsync = 1;
- p->rdev = rdev;
+ rcu_assign_pointer(p->rdev, rdev);
break;
}
print_raid6_conf(conf);

2005-10-13 06:47:35

by Suzanne Wood

[permalink] [raw]
Subject: Re: [RFC][PATCH] identify raid rcu-protected pointer


On Wednesday, October 12, 2005, Neil Brown [email protected] wrote:
>
> On Wednesday October 5, [email protected] wrote:
>> Please consider the following patch submittal
>> to insert rcu_dereference() to make explicit
>> the object of the rcu read-side critical sections.
>> Thank you.
>
> Thank for the patch and detailed explanations.
>
> I agree with all the places where you add rcu_dereference.
>
> I do not agree with the place you added rcu_assign_pointer.
> rdev->mddev is of no relevance to the rcu usage. However I think it
> is appropriate to use rcu_assign_pointer when setting the pointer that
> rcu_dereference dereferences. I had made appropriate changes.
>
> The place where you put the comment about incrementing new_disk does
> *not* require an increment. That loop actually runs backwards, and
> new_disk is decremented in the body of the loop.
>
> You moved 'rcu_read_unlock' down several lines in raid5.c, but didn't
> make an equivalent change in similar code in raid6main.c. I'm
> confident that this change isn't needed. After nr_pending has been
> incremented, there is no longer a need to hold the rcu read lock.
>
> I have improved the code in raid10.c so that the rcu_dereference'd
> pointer is held in a temporary variable, which is much cleaner than
> the existing code.
>
>
> My revised patch follows. I will submit it to Andrew if I hear
> nothing negative.
>
> Thanks,
> NeilBrown

This looks very good. Thank you for clarifying the implication of the
increment of nr_pending and for the rcu_assign_pointer() corrections.

Thank you.
Suzanne Wood

>
> -----
> Provide proper rcu_dereference / rcu_assign_pointer annotations in md
>
>
> Acked-by: <[email protected]>
> Signed-off-by: <[email protected]>
> Signed-off-by: Neil Brown <[email protected]>
>
> ### Diffstat output
> ./drivers/md/multipath.c | 8 ++++----
> ./drivers/md/raid1.c | 20 ++++++++++----------
> ./drivers/md/raid10.c | 30 ++++++++++++++++--------------
> ./drivers/md/raid5.c | 8 ++++----
> ./drivers/md/raid6main.c | 8 ++++----
> 5 files changed, 38 insertions(+), 36 deletions(-)
>
> diff ./drivers/md/multipath.c~current~ ./drivers/md/multipath.c
> --- ./drivers/md/multipath.c~current~ 2005-10-13 13:42:05.000000000 +1000
> +++ ./drivers/md/multipath.c 2005-10-13 13:49:39.000000000 +1000
> @@ -63,7 +63,7 @@ static int multipath_map (multipath_conf
>
> rcu_read_lock();
> for (i = 0; i < disks; i++) {
> - mdk_rdev_t *rdev = conf->multipaths[i].rdev;
> + mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev);
> if (rdev && rdev->in_sync) {
> atomic_inc(&rdev->nr_pending);
> rcu_read_unlock();
> @@ -139,7 +139,7 @@ static void unplug_slaves(mddev_t *mddev
>
> rcu_read_lock();
> for (i=0; i<mddev->raid_disks; i++) {
> - mdk_rdev_t *rdev = conf->multipaths[i].rdev;
> + mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev);
> if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
> request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
>
> @@ -228,7 +228,7 @@ static int multipath_issue_flush(request
>
> rcu_read_lock();
> for (i=0; i<mddev->raid_disks && ret == 0; i++) {
> - mdk_rdev_t *rdev = conf->multipaths[i].rdev;
> + mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev);
> if (rdev && !rdev->faulty) {
> struct block_device *bdev = rdev->bdev;
> request_queue_t *r_queue = bdev_get_queue(bdev);
> @@ -335,7 +335,7 @@ static int multipath_add_disk(mddev_t *m
> conf->working_disks++;
> rdev->raid_disk = path;
> rdev->in_sync = 1;
> - p->rdev = rdev;
> + rcu_assign_pointer(p->rdev, rdev);
> found = 1;
> }
>
>
> diff ./drivers/md/raid1.c~current~ ./drivers/md/raid1.c
> --- ./drivers/md/raid1.c~current~ 2005-10-13 13:42:05.000000000 +1000
> +++ ./drivers/md/raid1.c 2005-10-13 13:50:32.000000000 +1000
> @@ -416,10 +416,10 @@ static int read_balance(conf_t *conf, r1
> /* Choose the first operation device, for consistancy */
> new_disk = 0;
>
> - for (rdev = conf->mirrors[new_disk].rdev;
> + for (rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
> !rdev || !rdev->in_sync
> || test_bit(WriteMostly, &rdev->flags);
> - rdev = conf->mirrors[++new_disk].rdev) {
> + rdev = rcu_dereference(conf->mirrors[++new_disk].rdev)) {
>
> if (rdev && rdev->in_sync)
> wonly_disk = new_disk;
> @@ -434,10 +434,10 @@ static int read_balance(conf_t *conf, r1
>
>
> /* make sure the disk is operational */
> - for (rdev = conf->mirrors[new_disk].rdev;
> + for (rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
> !rdev || !rdev->in_sync ||
> test_bit(WriteMostly, &rdev->flags);
> - rdev = conf->mirrors[new_disk].rdev) {
> + rdev = rcu_dereference(conf->mirrors[new_disk].rdev)) {
>
> if (rdev && rdev->in_sync)
> wonly_disk = new_disk;
> @@ -474,7 +474,7 @@ static int read_balance(conf_t *conf, r1
> disk = conf->raid_disks;
> disk--;
>
> - rdev = conf->mirrors[disk].rdev;
> + rdev = rcu_dereference(conf->mirrors[disk].rdev);
>
> if (!rdev ||
> !rdev->in_sync ||
> @@ -496,7 +496,7 @@ static int read_balance(conf_t *conf, r1
>
>
> if (new_disk >= 0) {
> - rdev = conf->mirrors[new_disk].rdev;
> + rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
> if (!rdev)
> goto retry;
> atomic_inc(&rdev->nr_pending);
> @@ -522,7 +522,7 @@ static void unplug_slaves(mddev_t *mddev
>
> rcu_read_lock();
> for (i=0; i<mddev->raid_disks; i++) {
> - mdk_rdev_t *rdev = conf->mirrors[i].rdev;
> + mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
> if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
> request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
>
> @@ -556,7 +556,7 @@ static int raid1_issue_flush(request_que
>
> rcu_read_lock();
> for (i=0; i<mddev->raid_disks && ret == 0; i++) {
> - mdk_rdev_t *rdev = conf->mirrors[i].rdev;
> + mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
> if (rdev && !rdev->faulty) {
> struct block_device *bdev = rdev->bdev;
> request_queue_t *r_queue = bdev_get_queue(bdev);
> @@ -732,7 +732,7 @@ static int make_request(request_queue_t
> #endif
> rcu_read_lock();
> for (i = 0; i < disks; i++) {
> - if ((rdev=conf->mirrors[i].rdev) != NULL &&
> + if ((rdev=rcu_dereference(conf->mirrors[i].rdev)) != NULL &&
> !rdev->faulty) {
> atomic_inc(&rdev->nr_pending);
> if (rdev->faulty) {
> @@ -958,7 +958,7 @@ static int raid1_add_disk(mddev_t *mddev
> found = 1;
> if (rdev->saved_raid_disk != mirror)
> conf->fullsync = 1;
> - p->rdev = rdev;
> + rcu_assign_pointer(p->rdev, rdev);
> break;
> }
>
>
> diff ./drivers/md/raid10.c~current~ ./drivers/md/raid10.c
> --- ./drivers/md/raid10.c~current~ 2005-10-13 13:42:05.000000000 +1000
> +++ ./drivers/md/raid10.c 2005-10-13 14:09:29.000000000 +1000
> @@ -496,6 +496,7 @@ static int read_balance(conf_t *conf, r1
> int disk, slot, nslot;
> const int sectors = r10_bio->sectors;
> sector_t new_distance, current_distance;
> + mdk_rdev_t *rdev;
>
> raid10_find_phys(conf, r10_bio);
> rcu_read_lock();
> @@ -510,8 +511,8 @@ static int read_balance(conf_t *conf, r1
> slot = 0;
> disk = r10_bio->devs[slot].devnum;
>
> - while (!conf->mirrors[disk].rdev ||
> - !conf->mirrors[disk].rdev->in_sync) {
> + while ((rdev = rcu_dereference(conf->mirrors[disk].rdev)) == NULL ||
> + !rdev->in_sync) {
> slot++;
> if (slot == conf->copies) {
> slot = 0;
> @@ -527,8 +528,8 @@ static int read_balance(conf_t *conf, r1
> /* make sure the disk is operational */
> slot = 0;
> disk = r10_bio->devs[slot].devnum;
> - while (!conf->mirrors[disk].rdev ||
> - !conf->mirrors[disk].rdev->in_sync) {
> + while ((rdev=rcu_dereference(conf->mirrors[disk].rdev)) == NULL ||
> + !rdev->in_sync) {
> slot ++;
> if (slot == conf->copies) {
> disk = -1;
> @@ -547,11 +548,11 @@ static int read_balance(conf_t *conf, r1
> int ndisk = r10_bio->devs[nslot].devnum;
>
>
> - if (!conf->mirrors[ndisk].rdev ||
> - !conf->mirrors[ndisk].rdev->in_sync)
> + if ((rdev=rcu_dereference(conf->mirrors[ndisk].rdev)) == NULL ||
> + !rdev->in_sync)
> continue;
>
> - if (!atomic_read(&conf->mirrors[ndisk].rdev->nr_pending)) {
> + if (!atomic_read(&rdev->nr_pending)) {
> disk = ndisk;
> slot = nslot;
> break;
> @@ -569,7 +570,7 @@ rb_out:
> r10_bio->read_slot = slot;
> /* conf->next_seq_sect = this_sector + sectors;*/
>
> - if (disk >= 0 && conf->mirrors[disk].rdev)
> + if (disk >= 0 && (rdev=rcu_dereference(conf->mirrors[disk].rdev))!= NULL)
> atomic_inc(&conf->mirrors[disk].rdev->nr_pending);
> rcu_read_unlock();
>
> @@ -583,7 +584,7 @@ static void unplug_slaves(mddev_t *mddev
>
> rcu_read_lock();
> for (i=0; i<mddev->raid_disks; i++) {
> - mdk_rdev_t *rdev = conf->mirrors[i].rdev;
> + mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
> if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
> request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
>
> @@ -614,7 +615,7 @@ static int raid10_issue_flush(request_qu
>
> rcu_read_lock();
> for (i=0; i<mddev->raid_disks && ret == 0; i++) {
> - mdk_rdev_t *rdev = conf->mirrors[i].rdev;
> + mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
> if (rdev && !rdev->faulty) {
> struct block_device *bdev = rdev->bdev;
> request_queue_t *r_queue = bdev_get_queue(bdev);
> @@ -772,9 +773,10 @@ static int make_request(request_queue_t
> rcu_read_lock();
> for (i = 0; i < conf->copies; i++) {
> int d = r10_bio->devs[i].devnum;
> - if (conf->mirrors[d].rdev &&
> - !conf->mirrors[d].rdev->faulty) {
> - atomic_inc(&conf->mirrors[d].rdev->nr_pending);
> + mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[d].rdev);
> + if (rdev &&
> + !rdev->faulty) {
> + atomic_inc(&rdev->nr_pending);
> r10_bio->devs[i].bio = bio;
> } else
> r10_bio->devs[i].bio = NULL;
> @@ -984,7 +986,7 @@ static int raid10_add_disk(mddev_t *mdde
> p->head_position = 0;
> rdev->raid_disk = mirror;
> found = 1;
> - p->rdev = rdev;
> + rcu_assign_pointer(p->rdev, rdev);
> break;
> }
>
>
> diff ./drivers/md/raid5.c~current~ ./drivers/md/raid5.c
> --- ./drivers/md/raid5.c~current~ 2005-10-13 13:42:05.000000000 +1000
> +++ ./drivers/md/raid5.c 2005-10-13 14:00:38.000000000 +1000
> @@ -1383,7 +1383,7 @@ static void handle_stripe(struct stripe_
> bi->bi_end_io = raid5_end_read_request;
>
> rcu_read_lock();
> - rdev = conf->disks[i].rdev;
> + rdev = rcu_dereference(conf->disks[i].rdev);
> if (rdev && rdev->faulty)
> rdev = NULL;
> if (rdev)
> @@ -1457,7 +1457,7 @@ static void unplug_slaves(mddev_t *mddev
>
> rcu_read_lock();
> for (i=0; i<mddev->raid_disks; i++) {
> - mdk_rdev_t *rdev = conf->disks[i].rdev;
> + mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
> if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
> request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
>
> @@ -1502,7 +1502,7 @@ static int raid5_issue_flush(request_que
>
> rcu_read_lock();
> for (i=0; i<mddev->raid_disks && ret == 0; i++) {
> - mdk_rdev_t *rdev = conf->disks[i].rdev;
> + mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
> if (rdev && !rdev->faulty) {
> struct block_device *bdev = rdev->bdev;
> request_queue_t *r_queue = bdev_get_queue(bdev);
> @@ -2178,7 +2178,7 @@ static int raid5_add_disk(mddev_t *mddev
> found = 1;
> if (rdev->saved_raid_disk != disk)
> conf->fullsync = 1;
> - p->rdev = rdev;
> + rcu_assign_pointer(p->rdev, rdev);
> break;
> }
> print_raid5_conf(conf);
>
> diff ./drivers/md/raid6main.c~current~ ./drivers/md/raid6main.c
> --- ./drivers/md/raid6main.c~current~ 2005-10-13 13:42:05.000000000 +1000
> +++ ./drivers/md/raid6main.c 2005-10-13 13:57:08.000000000 +1000
> @@ -1464,7 +1464,7 @@ static void handle_stripe(struct stripe_
> bi->bi_end_io = raid6_end_read_request;
>
> rcu_read_lock();
> - rdev = conf->disks[i].rdev;
> + rdev = rcu_dereference(conf->disks[i].rdev);
> if (rdev && rdev->faulty)
> rdev = NULL;
> if (rdev)
> @@ -1538,7 +1538,7 @@ static void unplug_slaves(mddev_t *mddev
>
> rcu_read_lock();
> for (i=0; i<mddev->raid_disks; i++) {
> - mdk_rdev_t *rdev = conf->disks[i].rdev;
> + mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
> if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
> request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
>
> @@ -1583,7 +1583,7 @@ static int raid6_issue_flush(request_que
>
> rcu_read_lock();
> for (i=0; i<mddev->raid_disks && ret == 0; i++) {
> - mdk_rdev_t *rdev = conf->disks[i].rdev;
> + mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
> if (rdev && !rdev->faulty) {
> struct block_device *bdev = rdev->bdev;
> request_queue_t *r_queue = bdev_get_queue(bdev);
> @@ -2158,7 +2158,7 @@ static int raid6_add_disk(mddev_t *mddev
> found = 1;
> if (rdev->saved_raid_disk != disk)
> conf->fullsync = 1;
> - p->rdev = rdev;
> + rcu_assign_pointer(p->rdev, rdev);
> break;
> }
> print_raid6_conf(conf);
>

2005-10-13 23:34:14

by Paul E. McKenney

[permalink] [raw]
Subject: Re: [RFC][PATCH] identify raid rcu-protected pointer

On Thu, Oct 13, 2005 at 02:14:17PM +1000, Neil Brown wrote:
> On Wednesday October 5, [email protected] wrote:
> > Please consider the following patch submittal
> > to insert rcu_dereference() to make explicit
> > the object of the rcu read-side critical sections.
> > Thank you.
>
> Thank for the patch and detailed explanations.
>
> I agree with all the places where you add rcu_dereference.
>
> I do not agree with the place you added rcu_assign_pointer.
> rdev->mddev is of no relevance to the rcu usage. However I think it
> is appropriate to use rcu_assign_pointer when setting the pointer that
> rcu_dereference dereferences. I had made appropriate changes.
>
> The place where you put the comment about incrementing new_disk does
> *not* require an increment. That loop actually runs backwards, and
> new_disk is decremented in the body of the loop.
>
> You moved 'rcu_read_unlock' down several lines in raid5.c, but didn't
> make an equivalent change in similar code in raid6main.c. I'm
> confident that this change isn't needed. After nr_pending has been
> incremented, there is no longer a need to hold the rcu read lock.
>
> I have improved the code in raid10.c so that the rcu_dereference'd
> pointer is held in a temporary variable, which is much cleaner than
> the existing code.
>
>
> My revised patch follows. I will submit it to Andrew if I hear
> nothing negative.

Looks good to me!

Thanx, Paul

> Thanks,
> NeilBrown
>
> -----
> Provide proper rcu_dereference / rcu_assign_pointer annotations in md
>
>
> Acked-by: <[email protected]>
> Signed-off-by: <[email protected]>
> Signed-off-by: Neil Brown <[email protected]>
>
> ### Diffstat output
> ./drivers/md/multipath.c | 8 ++++----
> ./drivers/md/raid1.c | 20 ++++++++++----------
> ./drivers/md/raid10.c | 30 ++++++++++++++++--------------
> ./drivers/md/raid5.c | 8 ++++----
> ./drivers/md/raid6main.c | 8 ++++----
> 5 files changed, 38 insertions(+), 36 deletions(-)
>
> diff ./drivers/md/multipath.c~current~ ./drivers/md/multipath.c
> --- ./drivers/md/multipath.c~current~ 2005-10-13 13:42:05.000000000 +1000
> +++ ./drivers/md/multipath.c 2005-10-13 13:49:39.000000000 +1000
> @@ -63,7 +63,7 @@ static int multipath_map (multipath_conf
>
> rcu_read_lock();
> for (i = 0; i < disks; i++) {
> - mdk_rdev_t *rdev = conf->multipaths[i].rdev;
> + mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev);
> if (rdev && rdev->in_sync) {
> atomic_inc(&rdev->nr_pending);
> rcu_read_unlock();
> @@ -139,7 +139,7 @@ static void unplug_slaves(mddev_t *mddev
>
> rcu_read_lock();
> for (i=0; i<mddev->raid_disks; i++) {
> - mdk_rdev_t *rdev = conf->multipaths[i].rdev;
> + mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev);
> if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
> request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
>
> @@ -228,7 +228,7 @@ static int multipath_issue_flush(request
>
> rcu_read_lock();
> for (i=0; i<mddev->raid_disks && ret == 0; i++) {
> - mdk_rdev_t *rdev = conf->multipaths[i].rdev;
> + mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev);
> if (rdev && !rdev->faulty) {
> struct block_device *bdev = rdev->bdev;
> request_queue_t *r_queue = bdev_get_queue(bdev);
> @@ -335,7 +335,7 @@ static int multipath_add_disk(mddev_t *m
> conf->working_disks++;
> rdev->raid_disk = path;
> rdev->in_sync = 1;
> - p->rdev = rdev;
> + rcu_assign_pointer(p->rdev, rdev);
> found = 1;
> }
>
>
> diff ./drivers/md/raid1.c~current~ ./drivers/md/raid1.c
> --- ./drivers/md/raid1.c~current~ 2005-10-13 13:42:05.000000000 +1000
> +++ ./drivers/md/raid1.c 2005-10-13 13:50:32.000000000 +1000
> @@ -416,10 +416,10 @@ static int read_balance(conf_t *conf, r1
> /* Choose the first operation device, for consistancy */
> new_disk = 0;
>
> - for (rdev = conf->mirrors[new_disk].rdev;
> + for (rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
> !rdev || !rdev->in_sync
> || test_bit(WriteMostly, &rdev->flags);
> - rdev = conf->mirrors[++new_disk].rdev) {
> + rdev = rcu_dereference(conf->mirrors[++new_disk].rdev)) {
>
> if (rdev && rdev->in_sync)
> wonly_disk = new_disk;
> @@ -434,10 +434,10 @@ static int read_balance(conf_t *conf, r1
>
>
> /* make sure the disk is operational */
> - for (rdev = conf->mirrors[new_disk].rdev;
> + for (rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
> !rdev || !rdev->in_sync ||
> test_bit(WriteMostly, &rdev->flags);
> - rdev = conf->mirrors[new_disk].rdev) {
> + rdev = rcu_dereference(conf->mirrors[new_disk].rdev)) {
>
> if (rdev && rdev->in_sync)
> wonly_disk = new_disk;
> @@ -474,7 +474,7 @@ static int read_balance(conf_t *conf, r1
> disk = conf->raid_disks;
> disk--;
>
> - rdev = conf->mirrors[disk].rdev;
> + rdev = rcu_dereference(conf->mirrors[disk].rdev);
>
> if (!rdev ||
> !rdev->in_sync ||
> @@ -496,7 +496,7 @@ static int read_balance(conf_t *conf, r1
>
>
> if (new_disk >= 0) {
> - rdev = conf->mirrors[new_disk].rdev;
> + rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
> if (!rdev)
> goto retry;
> atomic_inc(&rdev->nr_pending);
> @@ -522,7 +522,7 @@ static void unplug_slaves(mddev_t *mddev
>
> rcu_read_lock();
> for (i=0; i<mddev->raid_disks; i++) {
> - mdk_rdev_t *rdev = conf->mirrors[i].rdev;
> + mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
> if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
> request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
>
> @@ -556,7 +556,7 @@ static int raid1_issue_flush(request_que
>
> rcu_read_lock();
> for (i=0; i<mddev->raid_disks && ret == 0; i++) {
> - mdk_rdev_t *rdev = conf->mirrors[i].rdev;
> + mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
> if (rdev && !rdev->faulty) {
> struct block_device *bdev = rdev->bdev;
> request_queue_t *r_queue = bdev_get_queue(bdev);
> @@ -732,7 +732,7 @@ static int make_request(request_queue_t
> #endif
> rcu_read_lock();
> for (i = 0; i < disks; i++) {
> - if ((rdev=conf->mirrors[i].rdev) != NULL &&
> + if ((rdev=rcu_dereference(conf->mirrors[i].rdev)) != NULL &&
> !rdev->faulty) {
> atomic_inc(&rdev->nr_pending);
> if (rdev->faulty) {
> @@ -958,7 +958,7 @@ static int raid1_add_disk(mddev_t *mddev
> found = 1;
> if (rdev->saved_raid_disk != mirror)
> conf->fullsync = 1;
> - p->rdev = rdev;
> + rcu_assign_pointer(p->rdev, rdev);
> break;
> }
>
>
> diff ./drivers/md/raid10.c~current~ ./drivers/md/raid10.c
> --- ./drivers/md/raid10.c~current~ 2005-10-13 13:42:05.000000000 +1000
> +++ ./drivers/md/raid10.c 2005-10-13 14:09:29.000000000 +1000
> @@ -496,6 +496,7 @@ static int read_balance(conf_t *conf, r1
> int disk, slot, nslot;
> const int sectors = r10_bio->sectors;
> sector_t new_distance, current_distance;
> + mdk_rdev_t *rdev;
>
> raid10_find_phys(conf, r10_bio);
> rcu_read_lock();
> @@ -510,8 +511,8 @@ static int read_balance(conf_t *conf, r1
> slot = 0;
> disk = r10_bio->devs[slot].devnum;
>
> - while (!conf->mirrors[disk].rdev ||
> - !conf->mirrors[disk].rdev->in_sync) {
> + while ((rdev = rcu_dereference(conf->mirrors[disk].rdev)) == NULL ||
> + !rdev->in_sync) {
> slot++;
> if (slot == conf->copies) {
> slot = 0;
> @@ -527,8 +528,8 @@ static int read_balance(conf_t *conf, r1
> /* make sure the disk is operational */
> slot = 0;
> disk = r10_bio->devs[slot].devnum;
> - while (!conf->mirrors[disk].rdev ||
> - !conf->mirrors[disk].rdev->in_sync) {
> + while ((rdev=rcu_dereference(conf->mirrors[disk].rdev)) == NULL ||
> + !rdev->in_sync) {
> slot ++;
> if (slot == conf->copies) {
> disk = -1;
> @@ -547,11 +548,11 @@ static int read_balance(conf_t *conf, r1
> int ndisk = r10_bio->devs[nslot].devnum;
>
>
> - if (!conf->mirrors[ndisk].rdev ||
> - !conf->mirrors[ndisk].rdev->in_sync)
> + if ((rdev=rcu_dereference(conf->mirrors[ndisk].rdev)) == NULL ||
> + !rdev->in_sync)
> continue;
>
> - if (!atomic_read(&conf->mirrors[ndisk].rdev->nr_pending)) {
> + if (!atomic_read(&rdev->nr_pending)) {
> disk = ndisk;
> slot = nslot;
> break;
> @@ -569,7 +570,7 @@ rb_out:
> r10_bio->read_slot = slot;
> /* conf->next_seq_sect = this_sector + sectors;*/
>
> - if (disk >= 0 && conf->mirrors[disk].rdev)
> + if (disk >= 0 && (rdev=rcu_dereference(conf->mirrors[disk].rdev))!= NULL)
> atomic_inc(&conf->mirrors[disk].rdev->nr_pending);
> rcu_read_unlock();
>
> @@ -583,7 +584,7 @@ static void unplug_slaves(mddev_t *mddev
>
> rcu_read_lock();
> for (i=0; i<mddev->raid_disks; i++) {
> - mdk_rdev_t *rdev = conf->mirrors[i].rdev;
> + mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
> if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
> request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
>
> @@ -614,7 +615,7 @@ static int raid10_issue_flush(request_qu
>
> rcu_read_lock();
> for (i=0; i<mddev->raid_disks && ret == 0; i++) {
> - mdk_rdev_t *rdev = conf->mirrors[i].rdev;
> + mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
> if (rdev && !rdev->faulty) {
> struct block_device *bdev = rdev->bdev;
> request_queue_t *r_queue = bdev_get_queue(bdev);
> @@ -772,9 +773,10 @@ static int make_request(request_queue_t
> rcu_read_lock();
> for (i = 0; i < conf->copies; i++) {
> int d = r10_bio->devs[i].devnum;
> - if (conf->mirrors[d].rdev &&
> - !conf->mirrors[d].rdev->faulty) {
> - atomic_inc(&conf->mirrors[d].rdev->nr_pending);
> + mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[d].rdev);
> + if (rdev &&
> + !rdev->faulty) {
> + atomic_inc(&rdev->nr_pending);
> r10_bio->devs[i].bio = bio;
> } else
> r10_bio->devs[i].bio = NULL;
> @@ -984,7 +986,7 @@ static int raid10_add_disk(mddev_t *mdde
> p->head_position = 0;
> rdev->raid_disk = mirror;
> found = 1;
> - p->rdev = rdev;
> + rcu_assign_pointer(p->rdev, rdev);
> break;
> }
>
>
> diff ./drivers/md/raid5.c~current~ ./drivers/md/raid5.c
> --- ./drivers/md/raid5.c~current~ 2005-10-13 13:42:05.000000000 +1000
> +++ ./drivers/md/raid5.c 2005-10-13 14:00:38.000000000 +1000
> @@ -1383,7 +1383,7 @@ static void handle_stripe(struct stripe_
> bi->bi_end_io = raid5_end_read_request;
>
> rcu_read_lock();
> - rdev = conf->disks[i].rdev;
> + rdev = rcu_dereference(conf->disks[i].rdev);
> if (rdev && rdev->faulty)
> rdev = NULL;
> if (rdev)
> @@ -1457,7 +1457,7 @@ static void unplug_slaves(mddev_t *mddev
>
> rcu_read_lock();
> for (i=0; i<mddev->raid_disks; i++) {
> - mdk_rdev_t *rdev = conf->disks[i].rdev;
> + mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
> if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
> request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
>
> @@ -1502,7 +1502,7 @@ static int raid5_issue_flush(request_que
>
> rcu_read_lock();
> for (i=0; i<mddev->raid_disks && ret == 0; i++) {
> - mdk_rdev_t *rdev = conf->disks[i].rdev;
> + mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
> if (rdev && !rdev->faulty) {
> struct block_device *bdev = rdev->bdev;
> request_queue_t *r_queue = bdev_get_queue(bdev);
> @@ -2178,7 +2178,7 @@ static int raid5_add_disk(mddev_t *mddev
> found = 1;
> if (rdev->saved_raid_disk != disk)
> conf->fullsync = 1;
> - p->rdev = rdev;
> + rcu_assign_pointer(p->rdev, rdev);
> break;
> }
> print_raid5_conf(conf);
>
> diff ./drivers/md/raid6main.c~current~ ./drivers/md/raid6main.c
> --- ./drivers/md/raid6main.c~current~ 2005-10-13 13:42:05.000000000 +1000
> +++ ./drivers/md/raid6main.c 2005-10-13 13:57:08.000000000 +1000
> @@ -1464,7 +1464,7 @@ static void handle_stripe(struct stripe_
> bi->bi_end_io = raid6_end_read_request;
>
> rcu_read_lock();
> - rdev = conf->disks[i].rdev;
> + rdev = rcu_dereference(conf->disks[i].rdev);
> if (rdev && rdev->faulty)
> rdev = NULL;
> if (rdev)
> @@ -1538,7 +1538,7 @@ static void unplug_slaves(mddev_t *mddev
>
> rcu_read_lock();
> for (i=0; i<mddev->raid_disks; i++) {
> - mdk_rdev_t *rdev = conf->disks[i].rdev;
> + mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
> if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
> request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
>
> @@ -1583,7 +1583,7 @@ static int raid6_issue_flush(request_que
>
> rcu_read_lock();
> for (i=0; i<mddev->raid_disks && ret == 0; i++) {
> - mdk_rdev_t *rdev = conf->disks[i].rdev;
> + mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
> if (rdev && !rdev->faulty) {
> struct block_device *bdev = rdev->bdev;
> request_queue_t *r_queue = bdev_get_queue(bdev);
> @@ -2158,7 +2158,7 @@ static int raid6_add_disk(mddev_t *mddev
> found = 1;
> if (rdev->saved_raid_disk != disk)
> conf->fullsync = 1;
> - p->rdev = rdev;
> + rcu_assign_pointer(p->rdev, rdev);
> break;
> }
> print_raid6_conf(conf);
>