direct IO is useful in certain scenarios for uncompressed files.
For example, it can avoid double pagecache when use the uncompressed
file to mount upper layer filesystem.
In addition, another patch adds direct IO test for the stress tool
which was mentioned here:
https://lore.kernel.org/linux-erofs/[email protected]/
Signed-off-by: Huang Jianan <[email protected]>
Signed-off-by: Guo Weichao <[email protected]>
---
fs/erofs/data.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 57 insertions(+)
diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index ea4f693bee22..3067aa3defff 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -6,6 +6,8 @@
*/
#include "internal.h"
#include <linux/prefetch.h>
+#include <linux/uio.h>
+#include <linux/blkdev.h>
#include <trace/events/erofs.h>
@@ -312,6 +314,60 @@ static void erofs_raw_access_readahead(struct readahead_control *rac)
submit_bio(bio);
}
+static int erofs_get_block(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh, int create)
+{
+ struct erofs_map_blocks map = {
+ .m_la = blknr_to_addr(iblock),
+ };
+ int err;
+
+ err = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW);
+ if (err)
+ return err;
+
+ if (map.m_flags & EROFS_MAP_MAPPED)
+ map_bh(bh, inode->i_sb, erofs_blknr(map.m_pa));
+
+ return err;
+}
+
+static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
+ loff_t offset)
+{
+ unsigned i_blkbits = READ_ONCE(inode->i_blkbits);
+ unsigned blkbits = i_blkbits;
+ unsigned blocksize_mask = (1 << blkbits) - 1;
+ unsigned long align = offset | iov_iter_alignment(iter);
+ struct block_device *bdev = inode->i_sb->s_bdev;
+
+ if (align & blocksize_mask) {
+ if (bdev)
+ blkbits = blksize_bits(bdev_logical_block_size(bdev));
+ blocksize_mask = (1 << blkbits) - 1;
+ if (align & blocksize_mask)
+ return -EINVAL;
+ return 1;
+ }
+ return 0;
+}
+
+static ssize_t erofs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
+{
+ struct address_space *mapping = iocb->ki_filp->f_mapping;
+ struct inode *inode = mapping->host;
+ loff_t offset = iocb->ki_pos;
+ int err;
+
+ err = check_direct_IO(inode, iter, offset);
+ if (err)
+ return err < 0 ? err : 0;
+
+ return __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter,
+ erofs_get_block, NULL, NULL,
+ DIO_LOCKING | DIO_SKIP_HOLES);
+}
+
static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
{
struct inode *inode = mapping->host;
@@ -336,6 +392,7 @@ static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
const struct address_space_operations erofs_raw_access_aops = {
.readpage = erofs_raw_access_readpage,
.readahead = erofs_raw_access_readahead,
+ .direct_IO = erofs_direct_IO,
.bmap = erofs_bmap,
};
--
2.25.1
Hi Jianan,
On Mon, Dec 14, 2020 at 10:04:27PM +0800, Huang Jianan wrote:
> direct IO is useful in certain scenarios for uncompressed files.
> For example, it can avoid double pagecache when use the uncompressed
> file to mount upper layer filesystem.
>
> In addition, another patch adds direct IO test for the stress tool
> which was mentioned here:
> https://lore.kernel.org/linux-erofs/[email protected]/
>
> Signed-off-by: Huang Jianan <[email protected]>
> Signed-off-by: Guo Weichao <[email protected]>
> ---
> fs/erofs/data.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 57 insertions(+)
>
> diff --git a/fs/erofs/data.c b/fs/erofs/data.c
> index ea4f693bee22..3067aa3defff 100644
> --- a/fs/erofs/data.c
> +++ b/fs/erofs/data.c
> @@ -6,6 +6,8 @@
> */
> #include "internal.h"
> #include <linux/prefetch.h>
> +#include <linux/uio.h>
> +#include <linux/blkdev.h>
>
> #include <trace/events/erofs.h>
>
> @@ -312,6 +314,60 @@ static void erofs_raw_access_readahead(struct readahead_control *rac)
> submit_bio(bio);
> }
>
> +static int erofs_get_block(struct inode *inode, sector_t iblock,
> + struct buffer_head *bh, int create)
> +{
> + struct erofs_map_blocks map = {
> + .m_la = blknr_to_addr(iblock),
> + };
> + int err;
> +
> + err = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW);
> + if (err)
> + return err;
> +
> + if (map.m_flags & EROFS_MAP_MAPPED)
> + map_bh(bh, inode->i_sb, erofs_blknr(map.m_pa));
> +
> + return err;
> +}
> +
> +static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
> + loff_t offset)
> +{
> + unsigned i_blkbits = READ_ONCE(inode->i_blkbits);
It would be better to fold in check_direct_IO, also the READ_ONCE above
is somewhat weird...
No rush here, since 5.11-rc1 haven't be out yet, we have >= 2 months to
work it out.
Thanks,
Gao Xiang
> + unsigned blkbits = i_blkbits;
> + unsigned blocksize_mask = (1 << blkbits) - 1;
> + unsigned long align = offset | iov_iter_alignment(iter);
> + struct block_device *bdev = inode->i_sb->s_bdev;
> +
> + if (align & blocksize_mask) {
> + if (bdev)
> + blkbits = blksize_bits(bdev_logical_block_size(bdev));
> + blocksize_mask = (1 << blkbits) - 1;
> + if (align & blocksize_mask)
> + return -EINVAL;
> + return 1;
> + }
> + return 0;
> +}
> +
> +static ssize_t erofs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
> +{
> + struct address_space *mapping = iocb->ki_filp->f_mapping;
> + struct inode *inode = mapping->host;
> + loff_t offset = iocb->ki_pos;
> + int err;
> +
> + err = check_direct_IO(inode, iter, offset);
> + if (err)
> + return err < 0 ? err : 0;
> +
> + return __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter,
> + erofs_get_block, NULL, NULL,
> + DIO_LOCKING | DIO_SKIP_HOLES);
> +}
> +
> static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
> {
> struct inode *inode = mapping->host;
> @@ -336,6 +392,7 @@ static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
> const struct address_space_operations erofs_raw_access_aops = {
> .readpage = erofs_raw_access_readpage,
> .readahead = erofs_raw_access_readahead,
> + .direct_IO = erofs_direct_IO,
> .bmap = erofs_bmap,
> };
>
> --
> 2.25.1
>
Please do not add new callers of __blockdev_direct_IO and use the modern
iomap variant instead.
Hi Christoph,
On Tue, Dec 22, 2020 at 02:22:34PM +0000, Christoph Hellwig wrote:
> Please do not add new callers of __blockdev_direct_IO and use the modern
> iomap variant instead.
We've talked about this topic before. The current status is that iomap
doesn't support tail-packing inline data yet (Chao once sent out a version),
and erofs only cares about read intrastructure for now (So we don't think
more about how to deal with tail-packing inline write path). Plus, the
original patch was once lack of inline data regression test from gfs2 folks.
The main use case I know so far is to enable direct I/O and leave loop images
uncompressed for loop devices. And making the content of the loop images
compressed to avoid double caching.
Personally, I'd like to convert it to iomap infrastructure as well. So if it
has some concern for __blockdev_direct_IO as an interim solution, hope that
Jianan could pick this work up. That would be better.
Thanks,
Gao Xiang
>
On Wed, Dec 23, 2020 at 03:39:01AM +0800, Gao Xiang wrote:
> Hi Christoph,
>
> On Tue, Dec 22, 2020 at 02:22:34PM +0000, Christoph Hellwig wrote:
> > Please do not add new callers of __blockdev_direct_IO and use the modern
> > iomap variant instead.
>
> We've talked about this topic before. The current status is that iomap
> doesn't support tail-packing inline data yet (Chao once sent out a version),
> and erofs only cares about read intrastructure for now (So we don't think
> more about how to deal with tail-packing inline write path). Plus, the
> original patch was once lack of inline data regression test from gfs2 folks.
So resend Chaos prep patch as part of the series switching parts of
erofs to iomap. We need to move things off the old infrastructure instead
of adding more users and everyone needs to help a little.
Hi Christoph,
The reason we use dio is because we need to deploy the patch on some
early kernel versions, and we don't pay much attention to the change of
iomap. Anyway, I will study the problem mentioned by Gao Xiang and try
to convert the current patch to iomap.
Thanks,
Jianan
> On Wed, Dec 23, 2020 at 03:39:01AM +0800, Gao Xiang wrote:
>> Hi Christoph,
>>
>> On Tue, Dec 22, 2020 at 02:22:34PM +0000, Christoph Hellwig wrote:
>>> Please do not add new callers of __blockdev_direct_IO and use the modern
>>> iomap variant instead.
>> We've talked about this topic before. The current status is that iomap
>> doesn't support tail-packing inline data yet (Chao once sent out a version),
>> and erofs only cares about read intrastructure for now (So we don't think
>> more about how to deal with tail-packing inline write path). Plus, the
>> original patch was once lack of inline data regression test from gfs2 folks.
> So resend Chaos prep patch as part of the series switching parts of
> erofs to iomap. We need to move things off the old infrastructure instead
> of adding more users and everyone needs to help a little.
On Wed, Dec 23, 2020 at 04:48:20PM +0800, Huang Jianan wrote:
> Hi Christoph,
>
> The reason we use dio is because we need to deploy the patch on some early
> kernel versions, and we don't pay much attention to the change of iomap.
No, that is never an excuse for upstream development.
On Wed, Dec 23, 2020 at 08:54:01AM +0000, Christoph Hellwig wrote:
> On Wed, Dec 23, 2020 at 04:48:20PM +0800, Huang Jianan wrote:
> > Hi Christoph,
> >
> > The reason we use dio is because we need to deploy the patch on some early
> > kernel versions, and we don't pay much attention to the change of iomap.
>
> No, that is never an excuse for upstream development.
Ok, personally I also agree this, let's go further in this way.
Thanks,
Gao Xiang
>