From: Mike Christie Subject: Re: [PATCH 1/1] mm/block: convert rw_page users to bio op use Date: Fri, 5 Aug 2016 13:05:03 -0500 Message-ID: <57A4D54F.4080307@redhat.com> References: <1470331056-796-1-git-send-email-mchristi@redhat.com> <20160805075756.GA28577@infradead.org> <10142950-64f9-4054-55c7-82f0758d2099@kernel.dk> Mime-Version: 1.0 Content-Type: text/plain; charset=windows-1252 Content-Transfer-Encoding: 7bit Cc: ross.zwisler@linux.intel.com, linux-ext4@vger.kernel.org, linux-block@vger.kernel.org, linux-fsdevel@vger.kernel.org, david@fromorbit.com, minchan@kernel.org, ngupta@vflare.org, vishal.l.verma@intel.com To: Jens Axboe , Christoph Hellwig Return-path: In-Reply-To: Sender: linux-fsdevel-owner@vger.kernel.org List-Id: linux-ext4.vger.kernel.org On 08/05/2016 09:09 AM, Jens Axboe wrote: > On 08/05/2016 07:52 AM, Jens Axboe wrote: >> On 08/05/2016 01:57 AM, Christoph Hellwig wrote: >>>> >>>> The rw_page users were not converted to use bio/req ops. As a result >>>> bdev_write_page is not passing down REQ_OP_WRITE and the IOs will >>>> be sent down as reads. >>> >>> Can we just get rid of REQ_OP_WRITE enum for the ->rw_page interface >>> and pass a 'bool write'? If not I'd prefer to avoid using op_is_write >>> as much as possible - it's a confusing interface if we really want >>> to do a switch on read vs write vs invalid. >> >> That's a really good point, especially since the op_flags was dropped as >> well. I'll cook that up. > > How about something like this? We don't bleed into the non-core bits > anymore, so we can move those back under CONFIG_BLOCK too. > > diff --git a/drivers/block/brd.c b/drivers/block/brd.c > index 3439b28cce8b..0c76d4016eeb 100644 > --- a/drivers/block/brd.c > +++ b/drivers/block/brd.c > @@ -300,20 +300,20 @@ static void copy_from_brd(void *dst, struct brd_device *brd, > * Process a single bvec of a bio. > */ > static int brd_do_bvec(struct brd_device *brd, struct page *page, > - unsigned int len, unsigned int off, int op, > + unsigned int len, unsigned int off, bool is_write, > sector_t sector) > { > void *mem; > int err = 0; > > - if (op_is_write(op)) { > + if (is_write) { > err = copy_to_brd_setup(brd, sector, len); > if (err) > goto out; > } > > mem = kmap_atomic(page); > - if (!op_is_write(op)) { > + if (!is_write) { > copy_from_brd(mem + off, brd, sector, len); > flush_dcache_page(page); > } else { > @@ -350,8 +350,8 @@ static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio) > unsigned int len = bvec.bv_len; > int err; > > - err = brd_do_bvec(brd, bvec.bv_page, len, > - bvec.bv_offset, bio_op(bio), sector); > + err = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset, > + op_is_write(bio_op(bio)), sector); > if (err) > goto io_error; > sector += len >> SECTOR_SHIFT; > @@ -366,11 +366,11 @@ io_error: > } > > static int brd_rw_page(struct block_device *bdev, sector_t sector, > - struct page *page, int op) > + struct page *page, bool is_write) > { > struct brd_device *brd = bdev->bd_disk->private_data; > - int err = brd_do_bvec(brd, page, PAGE_SIZE, 0, op, sector); > - page_endio(page, op, err); > + int err = brd_do_bvec(brd, page, PAGE_SIZE, 0, is_write, sector); > + page_endio(page, is_write, err); > return err; > } > > diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c > index ca29649c4b08..04365b17ee67 100644 > --- a/drivers/block/zram/zram_drv.c > +++ b/drivers/block/zram/zram_drv.c > @@ -843,15 +843,16 @@ static void zram_bio_discard(struct zram *zram, u32 index, > } > > static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, > - int offset, int op) > + int offset, bool is_write) > { > unsigned long start_time = jiffies; > + int rw_acct = is_write ? REQ_OP_WRITE : REQ_OP_READ; > int ret; > > - generic_start_io_acct(op, bvec->bv_len >> SECTOR_SHIFT, > + generic_start_io_acct(rw_acct, bvec->bv_len >> SECTOR_SHIFT, > &zram->disk->part0); > > - if (!op_is_write(op)) { > + if (!is_write) { > atomic64_inc(&zram->stats.num_reads); > ret = zram_bvec_read(zram, bvec, index, offset); > } else { > @@ -859,10 +860,10 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, > ret = zram_bvec_write(zram, bvec, index, offset); > } > > - generic_end_io_acct(op, &zram->disk->part0, start_time); > + generic_end_io_acct(rw_acct, &zram->disk->part0, start_time); > > if (unlikely(ret)) { > - if (!op_is_write(op)) > + if (!is_write) > atomic64_inc(&zram->stats.failed_reads); > else > atomic64_inc(&zram->stats.failed_writes); > @@ -903,17 +904,17 @@ static void __zram_make_request(struct zram *zram, struct bio *bio) > bv.bv_offset = bvec.bv_offset; > > if (zram_bvec_rw(zram, &bv, index, offset, > - bio_op(bio)) < 0) > + op_is_write(bio_op(bio))) < 0) > goto out; > > bv.bv_len = bvec.bv_len - max_transfer_size; > bv.bv_offset += max_transfer_size; > if (zram_bvec_rw(zram, &bv, index + 1, 0, > - bio_op(bio)) < 0) > + op_is_write(bio_op(bio))) < 0) > goto out; > } else > if (zram_bvec_rw(zram, &bvec, index, offset, > - bio_op(bio)) < 0) > + op_is_write(bio_op(bio))) < 0) > goto out; > > update_position(&index, &offset, &bvec); > @@ -970,7 +971,7 @@ static void zram_slot_free_notify(struct block_device *bdev, > } > > static int zram_rw_page(struct block_device *bdev, sector_t sector, > - struct page *page, int op) > + struct page *page, bool is_write) > { > int offset, err = -EIO; > u32 index; > @@ -994,7 +995,7 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector, > bv.bv_len = PAGE_SIZE; > bv.bv_offset = 0; > > - err = zram_bvec_rw(zram, &bv, index, offset, op); > + err = zram_bvec_rw(zram, &bv, index, offset, is_write); > put_zram: > zram_meta_put(zram); > out: > @@ -1007,7 +1008,7 @@ out: > * (e.g., SetPageError, set_page_dirty and extra works). > */ > if (err == 0) > - page_endio(page, op, 0); > + page_endio(page, is_write, 0); > return err; > } > > diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c > index 7cf3bdfaf809..88e91666f145 100644 > --- a/drivers/nvdimm/btt.c > +++ b/drivers/nvdimm/btt.c > @@ -1133,11 +1133,11 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip, > > static int btt_do_bvec(struct btt *btt, struct bio_integrity_payload *bip, > struct page *page, unsigned int len, unsigned int off, > - int op, sector_t sector) > + bool is_write, sector_t sector) > { > int ret; > > - if (!op_is_write(op)) { > + if (!is_write) { > ret = btt_read_pg(btt, bip, page, off, sector, len); > flush_dcache_page(page); > } else { > @@ -1180,7 +1180,7 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio) > BUG_ON(len % btt->sector_size); > > err = btt_do_bvec(btt, bip, bvec.bv_page, len, bvec.bv_offset, > - bio_op(bio), iter.bi_sector); > + op_is_write(bio_op(bio)), iter.bi_sector); > if (err) { > dev_info(&btt->nd_btt->dev, > "io error in %s sector %lld, len %d,\n", > @@ -1200,12 +1200,12 @@ out: > } > > static int btt_rw_page(struct block_device *bdev, sector_t sector, > - struct page *page, int op) > + struct page *page, bool is_write) > { > struct btt *btt = bdev->bd_disk->private_data; > > - btt_do_bvec(btt, NULL, page, PAGE_SIZE, 0, op, sector); > - page_endio(page, op, 0); > + btt_do_bvec(btt, NULL, page, PAGE_SIZE, 0, is_write, sector); > + page_endio(page, is_write, 0); > return 0; > } > > diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c > index d64d92481c1d..20bae50c231d 100644 > --- a/drivers/nvdimm/pmem.c > +++ b/drivers/nvdimm/pmem.c > @@ -67,7 +67,7 @@ static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset, > } > > static int pmem_do_bvec(struct pmem_device *pmem, struct page *page, > - unsigned int len, unsigned int off, int op, > + unsigned int len, unsigned int off, bool is_write, > sector_t sector) > { > int rc = 0; > @@ -79,7 +79,7 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page, > if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) > bad_pmem = true; > > - if (!op_is_write(op)) { > + if (!is_write) { > if (unlikely(bad_pmem)) > rc = -EIO; > else { > @@ -134,7 +134,7 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) > do_acct = nd_iostat_start(bio, &start); > bio_for_each_segment(bvec, bio, iter) { > rc = pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len, > - bvec.bv_offset, bio_op(bio), > + bvec.bv_offset, op_is_write(bio_op(bio)), > iter.bi_sector); > if (rc) { > bio->bi_error = rc; > @@ -152,12 +152,12 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) > } > > static int pmem_rw_page(struct block_device *bdev, sector_t sector, > - struct page *page, int op) > + struct page *page, bool is_write) > { > struct pmem_device *pmem = bdev->bd_queue->queuedata; > int rc; > > - rc = pmem_do_bvec(pmem, page, PAGE_SIZE, 0, op, sector); > + rc = pmem_do_bvec(pmem, page, PAGE_SIZE, 0, is_write, sector); > > /* > * The ->rw_page interface is subtle and tricky. The core > @@ -166,7 +166,7 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector, > * caused by double completion. > */ > if (rc == 0) > - page_endio(page, op, 0); > + page_endio(page, is_write, 0); > > return rc; > } > diff --git a/fs/block_dev.c b/fs/block_dev.c > index d402899ba135..c3cdde87cc8c 100644 > --- a/fs/block_dev.c > +++ b/fs/block_dev.c > @@ -416,8 +416,7 @@ int bdev_read_page(struct block_device *bdev, sector_t sector, > result = blk_queue_enter(bdev->bd_queue, false); > if (result) > return result; > - result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, > - REQ_OP_READ); > + result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, false); > blk_queue_exit(bdev->bd_queue); > return result; > } > @@ -455,8 +454,7 @@ int bdev_write_page(struct block_device *bdev, sector_t sector, > return result; > > set_page_writeback(page); > - result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, > - REQ_OP_WRITE); > + result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, true); > if (result) > end_page_writeback(page); > else > diff --git a/fs/mpage.c b/fs/mpage.c > index 7a09c55b4bd0..d2413af0823a 100644 > --- a/fs/mpage.c > +++ b/fs/mpage.c > @@ -50,7 +50,7 @@ static void mpage_end_io(struct bio *bio) > > bio_for_each_segment_all(bv, bio, i) { > struct page *page = bv->bv_page; > - page_endio(page, bio_op(bio), bio->bi_error); > + page_endio(page, op_is_write(bio_op(bio)), bio->bi_error); > } > > bio_put(bio); > diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h > index 14b28ff2caf8..f254eb264924 100644 > --- a/include/linux/blk_types.h > +++ b/include/linux/blk_types.h > @@ -18,17 +18,6 @@ struct cgroup_subsys_state; > typedef void (bio_end_io_t) (struct bio *); > typedef void (bio_destructor_t) (struct bio *); > > -enum req_op { > - REQ_OP_READ, > - REQ_OP_WRITE, > - REQ_OP_DISCARD, /* request to discard sectors */ > - REQ_OP_SECURE_ERASE, /* request to securely erase sectors */ > - REQ_OP_WRITE_SAME, /* write same block many times */ > - REQ_OP_FLUSH, /* request for cache flush */ > -}; > - > -#define REQ_OP_BITS 3 > - > #ifdef CONFIG_BLOCK > /* > * main unit of I/O for the block layer and lower layers (ie drivers and > @@ -239,6 +228,17 @@ enum rq_flag_bits { > #define REQ_HASHED (1ULL << __REQ_HASHED) > #define REQ_MQ_INFLIGHT (1ULL << __REQ_MQ_INFLIGHT) > > +enum req_op { > + REQ_OP_READ, > + REQ_OP_WRITE, > + REQ_OP_DISCARD, /* request to discard sectors */ > + REQ_OP_SECURE_ERASE, /* request to securely erase sectors */ > + REQ_OP_WRITE_SAME, /* write same block many times */ > + REQ_OP_FLUSH, /* request for cache flush */ > +}; > + > +#define REQ_OP_BITS 3 > + > typedef unsigned int blk_qc_t; > #define BLK_QC_T_NONE -1U > #define BLK_QC_T_SHIFT 16 > diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h > index ccd68c0d01de..2c210b6a7bcf 100644 > --- a/include/linux/blkdev.h > +++ b/include/linux/blkdev.h > @@ -1672,7 +1672,7 @@ struct blk_dax_ctl { > struct block_device_operations { > int (*open) (struct block_device *, fmode_t); > void (*release) (struct gendisk *, fmode_t); > - int (*rw_page)(struct block_device *, sector_t, struct page *, int op); > + int (*rw_page)(struct block_device *, sector_t, struct page *, bool); > int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); > int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); > long (*direct_access)(struct block_device *, sector_t, void **, pfn_t *, > diff --git a/include/linux/fs.h b/include/linux/fs.h > index 498255e6914e..f3f0b4c8e8ac 100644 > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -2480,13 +2480,12 @@ extern void init_special_inode(struct inode *, umode_t, dev_t); > extern void make_bad_inode(struct inode *); > extern bool is_bad_inode(struct inode *); > > +#ifdef CONFIG_BLOCK > static inline bool op_is_write(unsigned int op) > { > return op == REQ_OP_READ ? false : true; > } > > -#ifdef CONFIG_BLOCK > - > /* > * return data direction, READ or WRITE > */ > diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h > index 45786374abbd..66a1260b33de 100644 > --- a/include/linux/pagemap.h > +++ b/include/linux/pagemap.h > @@ -510,7 +510,7 @@ static inline void wait_on_page_writeback(struct page *page) > extern void end_page_writeback(struct page *page); > void wait_for_stable_page(struct page *page); > > -void page_endio(struct page *page, int op, int err); > +void page_endio(struct page *page, bool is_write, int err); > > /* > * Add an arbitrary waiter to a page's wait queue > diff --git a/mm/filemap.c b/mm/filemap.c > index daef091d4c50..8a287dfc5372 100644 > --- a/mm/filemap.c > +++ b/mm/filemap.c > @@ -887,9 +887,9 @@ EXPORT_SYMBOL(end_page_writeback); > * After completing I/O on a page, call this routine to update the page > * flags appropriately > */ > -void page_endio(struct page *page, int op, int err) > +void page_endio(struct page *page, bool is_write, int err) > { > - if (!op_is_write(op)) { > + if (!is_write) { > if (!err) { > SetPageUptodate(page); > } else { > Looks ok to me. Also tested all the patches in the linus2 branch and verified it fixed the rw_page related hang for me. Reviewed-by: Mike Christie