2010-04-23 14:44:32

by Josef Bacik

[permalink] [raw]
Subject: [PATCH] cleanup block based fiemap

Hello,

I'm resending this patch again since it doesn't seem to have made it in yet.
The generic block fiemap stuff doesn't use the right typing and has a problem
with not setting the last extent flag properly. Also there is an issue with
GFS2 where it doesn't like non-block aligned requests, so this fixes all these
issues. Thanks,

Signed-off-by: Josef Bacik <[email protected]>
---
fs/ioctl.c | 85 +++++++++++++++++++++++++++++----------------------
include/linux/fs.h | 5 ++-
2 files changed, 51 insertions(+), 39 deletions(-)

diff --git a/fs/ioctl.c b/fs/ioctl.c
index 6c75110..f330fec 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -228,14 +228,23 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)

#ifdef CONFIG_BLOCK

-#define blk_to_logical(inode, blk) (blk << (inode)->i_blkbits)
-#define logical_to_blk(inode, offset) (offset >> (inode)->i_blkbits);
+static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
+{
+ return (offset >> inode->i_blkbits);
+}
+
+static inline loff_t blk_to_logical(struct inode *inode, sector_t blk)
+{
+ return (blk << inode->i_blkbits);
+}

/**
* __generic_block_fiemap - FIEMAP for block based inodes (no locking)
- * @inode - the inode to map
- * @arg - the pointer to userspace where we copy everything to
- * @get_block - the fs's get_block function
+ * @inode: the inode to map
+ * @fieinfo: the fiemap info struct that will be passed back to userspace
+ * @start: where to start mapping in the inode
+ * @len: how much space to map
+ * @get_block: the fs's get_block function
*
* This does FIEMAP for block based inodes. Basically it will just loop
* through get_block until we hit the number of extents we want to map, or we
@@ -250,46 +259,46 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
*/

int __generic_block_fiemap(struct inode *inode,
- struct fiemap_extent_info *fieinfo, u64 start,
- u64 len, get_block_t *get_block)
+ struct fiemap_extent_info *fieinfo, loff_t start,
+ loff_t len, get_block_t *get_block)
{
- struct buffer_head tmp;
- unsigned long long start_blk;
- long long length = 0, map_len = 0;
+ struct buffer_head map_bh;
+ sector_t start_blk, last_blk;
u64 logical = 0, phys = 0, size = 0;
u32 flags = FIEMAP_EXTENT_MERGED;
- int ret = 0, past_eof = 0, whole_file = 0;
+ bool past_eof = false, whole_file = false;
+ int ret = 0;

- if ((ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC)))
+ ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
+ if (ret)
return ret;

- start_blk = logical_to_blk(inode, start);
-
- length = (long long)min_t(u64, len, i_size_read(inode));
- if (length < len)
- whole_file = 1;
+ if (len >= i_size_read(inode)) {
+ whole_file = true;
+ len = i_size_read(inode);
+ }

- map_len = length;
+ start_blk = logical_to_blk(inode, start);
+ last_blk = logical_to_blk(inode, start + len - 1);

do {
/*
* we set b_size to the total size we want so it will map as
* many contiguous blocks as possible at once
*/
- memset(&tmp, 0, sizeof(struct buffer_head));
- tmp.b_size = map_len;
+ memset(&map_bh, 0, sizeof(struct buffer_head));
+ map_bh.b_size = len;

- ret = get_block(inode, start_blk, &tmp, 0);
+ ret = get_block(inode, start_blk, &map_bh, 0);
if (ret)
break;

/* HOLE */
- if (!buffer_mapped(&tmp)) {
- length -= blk_to_logical(inode, 1);
+ if (!buffer_mapped(&map_bh)) {
start_blk++;

/*
- * we want to handle the case where there is an
+ * We want to handle the case where there is an
* allocated block at the front of the file, and then
* nothing but holes up to the end of the file properly,
* to make sure that extent at the front gets properly
@@ -297,11 +306,11 @@ int __generic_block_fiemap(struct inode *inode,
*/
if (!past_eof &&
blk_to_logical(inode, start_blk) >=
- blk_to_logical(inode, 0)+i_size_read(inode))
+ blk_to_logical(inode, 0) + i_size_read(inode))
past_eof = 1;

/*
- * first hole after going past the EOF, this is our
+ * First hole after going past the EOF, this is our
* last extent
*/
if (past_eof && size) {
@@ -309,15 +318,18 @@ int __generic_block_fiemap(struct inode *inode,
ret = fiemap_fill_next_extent(fieinfo, logical,
phys, size,
flags);
- break;
+ } else if (size) {
+ ret = fiemap_fill_next_extent(fieinfo, logical,
+ phys, size, flags);
+ size = 0;
}

/* if we have holes up to/past EOF then we're done */
- if (length <= 0 || past_eof)
+ if (start_blk > last_blk || past_eof || ret)
break;
} else {
/*
- * we have gone over the length of what we wanted to
+ * We have gone over the length of what we wanted to
* map, and it wasn't the entire file, so add the extent
* we got last time and exit.
*
@@ -331,7 +343,7 @@ int __generic_block_fiemap(struct inode *inode,
* are good to go, just add the extent to the fieinfo
* and break
*/
- if (length <= 0 && !whole_file) {
+ if (start_blk > last_blk && !whole_file) {
ret = fiemap_fill_next_extent(fieinfo, logical,
phys, size,
flags);
@@ -351,11 +363,10 @@ int __generic_block_fiemap(struct inode *inode,
}

logical = blk_to_logical(inode, start_blk);
- phys = blk_to_logical(inode, tmp.b_blocknr);
- size = tmp.b_size;
+ phys = blk_to_logical(inode, map_bh.b_blocknr);
+ size = map_bh.b_size;
flags = FIEMAP_EXTENT_MERGED;

- length -= tmp.b_size;
start_blk += logical_to_blk(inode, size);

/*
@@ -364,14 +375,14 @@ int __generic_block_fiemap(struct inode *inode,
* is marked with FIEMAP_EXTENT_LAST
*/
if (!past_eof &&
- logical+size >=
- blk_to_logical(inode, 0)+i_size_read(inode))
- past_eof = 1;
+ logical + size >=
+ blk_to_logical(inode, 0) + i_size_read(inode))
+ past_eof = true;
}
cond_resched();
} while (1);

- /* if ret is 1 then we just hit the end of the extent array */
+ /* If ret is 1 then we just hit the end of the extent array */
if (ret == 1)
ret = 0;

diff --git a/include/linux/fs.h b/include/linux/fs.h
index ebb1cd5..7e6e47f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2313,8 +2313,9 @@ extern int vfs_fstatat(int , char __user *, struct kstat *, int);
extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
unsigned long arg);
extern int __generic_block_fiemap(struct inode *inode,
- struct fiemap_extent_info *fieinfo, u64 start,
- u64 len, get_block_t *get_block);
+ struct fiemap_extent_info *fieinfo,
+ loff_t start, loff_t len,
+ get_block_t *get_block);
extern int generic_block_fiemap(struct inode *inode,
struct fiemap_extent_info *fieinfo, u64 start,
u64 len, get_block_t *get_block);
--
1.6.5.rc2


2010-04-23 15:02:52

by Linus Torvalds

[permalink] [raw]
Subject: Re: [PATCH] cleanup block based fiemap



On Fri, 23 Apr 2010, Josef Bacik wrote:
>
> I'm resending this patch again since it doesn't seem to have made it in yet.
> The generic block fiemap stuff doesn't use the right typing and has a problem
> with not setting the last extent flag properly. Also there is an issue with
> GFS2 where it doesn't like non-block aligned requests, so this fixes all these
> issues. Thanks,

I'd really like the patch to clean up the crazy stuff too.

As-is, there's at least two remaining issues I see from just reading the
patch:

> + if (len >= i_size_read(inode)) {
> + whole_file = true;
> + len = i_size_read(inode);
> + }
...
> if (!past_eof &&
> blk_to_logical(inode, start_blk) >=
> - blk_to_logical(inode, 0)+i_size_read(inode))
> + blk_to_logical(inode, 0) + i_size_read(inode))
> past_eof = 1;

Issue #1: it does that i_size_read() several times. What happens if the
file grows? Maybe we hold the i_mutex already, although I don't see it.
Regardless, it seems bogus to read the size several times.

Issue #2: "blk_to_logical(inode, 0)"? WTF? Since when has shifting zero
ever resulted in anything interesting or relevant? There's at least two of
those things.

Linus

2010-04-23 15:19:13

by Josef Bacik

[permalink] [raw]
Subject: Re: [PATCH] cleanup block based fiemap

On Fri, Apr 23, 2010 at 08:00:35AM -0700, Linus Torvalds wrote:
>
>
> On Fri, 23 Apr 2010, Josef Bacik wrote:
> >
> > I'm resending this patch again since it doesn't seem to have made it in yet.
> > The generic block fiemap stuff doesn't use the right typing and has a problem
> > with not setting the last extent flag properly. Also there is an issue with
> > GFS2 where it doesn't like non-block aligned requests, so this fixes all these
> > issues. Thanks,
>
> I'd really like the patch to clean up the crazy stuff too.
>
> As-is, there's at least two remaining issues I see from just reading the
> patch:
>
> > + if (len >= i_size_read(inode)) {
> > + whole_file = true;
> > + len = i_size_read(inode);
> > + }
> ...
> > if (!past_eof &&
> > blk_to_logical(inode, start_blk) >=
> > - blk_to_logical(inode, 0)+i_size_read(inode))
> > + blk_to_logical(inode, 0) + i_size_read(inode))
> > past_eof = 1;
>
> Issue #1: it does that i_size_read() several times. What happens if the
> file grows? Maybe we hold the i_mutex already, although I don't see it.
> Regardless, it seems bogus to read the size several times.
>

__generic_block_fiemap is called by generic_block_fiemap which takes the
i_mutex. The only reason we have __generic_block_fiemap is because gfs2 needs
to do its own locking magic before we go calling get_block. The idea is that
the file size doesn't change while we're doing this.

As for reading the size several times, I can read it once and store it in a
local variable if you prefer, but theres no way to know if len is smaller than
the size or not, which is why I'm constantly doing i_size_read(). If thats what
you would prefer I can do that, just let me know.

> Issue #2: "blk_to_logical(inode, 0)"? WTF? Since when has shifting zero
> ever resulted in anything interesting or relevant? There's at least two of
> those things.
>

Umm, yeah I'm sorry? I have no idea why I did that. I think its because I was
getting the logical offset of the first block + size, which is just stupid
because the logical offset is 0, so all I can say is I'm sorry that me a year
ago was alot dumber than me now :). Thanks,

Josef

2010-04-23 15:28:48

by Linus Torvalds

[permalink] [raw]
Subject: Re: [PATCH] cleanup block based fiemap



On Fri, 23 Apr 2010, Josef Bacik wrote:
>
> __generic_block_fiemap is called by generic_block_fiemap which takes the
> i_mutex. The only reason we have __generic_block_fiemap is because gfs2 needs
> to do its own locking magic before we go calling get_block. The idea is that
> the file size doesn't change while we're doing this.

Ok, maybe just a comment then..

> As for reading the size several times, I can read it once and store it in a
> local variable if you prefer, but theres no way to know if len is smaller than
> the size or not, which is why I'm constantly doing i_size_read(). If thats what
> you would prefer I can do that, just let me know.

.. or a comment _and_ a "read size once into a variable". Not a big deal,
it just wasn't entirely clear to me and I hadn't checked the callchain.

> > Issue #2: "blk_to_logical(inode, 0)"? WTF? Since when has shifting zero
> > ever resulted in anything interesting or relevant? There's at least two of
> > those things.
> >
>
> Umm, yeah I'm sorry? I have no idea why I did that. I think its because I was
> getting the logical offset of the first block + size, which is just stupid
> because the logical offset is 0, so all I can say is I'm sorry that me a year
> ago was alot dumber than me now :). Thanks,

Ok, fix that, and I think the patch will be a clear improvement.

Linus

2010-04-23 15:48:09

by Josef Bacik

[permalink] [raw]
Subject: Re: [PATCH] cleanup block based fiemap

On Fri, Apr 23, 2010 at 08:27:02AM -0700, Linus Torvalds wrote:
>
>
> On Fri, 23 Apr 2010, Josef Bacik wrote:
> >
> > __generic_block_fiemap is called by generic_block_fiemap which takes the
> > i_mutex. The only reason we have __generic_block_fiemap is because gfs2 needs
> > to do its own locking magic before we go calling get_block. The idea is that
> > the file size doesn't change while we're doing this.
>
> Ok, maybe just a comment then..
>
> > As for reading the size several times, I can read it once and store it in a
> > local variable if you prefer, but theres no way to know if len is smaller than
> > the size or not, which is why I'm constantly doing i_size_read(). If thats what
> > you would prefer I can do that, just let me know.
>
> .. or a comment _and_ a "read size once into a variable". Not a big deal,
> it just wasn't entirely clear to me and I hadn't checked the callchain.
>
> > > Issue #2: "blk_to_logical(inode, 0)"? WTF? Since when has shifting zero
> > > ever resulted in anything interesting or relevant? There's at least two of
> > > those things.
> > >
> >
> > Umm, yeah I'm sorry? I have no idea why I did that. I think its because I was
> > getting the logical offset of the first block + size, which is just stupid
> > because the logical offset is 0, so all I can say is I'm sorry that me a year
> > ago was alot dumber than me now :). Thanks,
>
> Ok, fix that, and I think the patch will be a clear improvement.
>

Great, here is the updated patch. Thanks for the review!

Josef

diff --git a/fs/ioctl.c b/fs/ioctl.c
index 6c75110..7faefb4 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -228,14 +228,23 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)

#ifdef CONFIG_BLOCK

-#define blk_to_logical(inode, blk) (blk << (inode)->i_blkbits)
-#define logical_to_blk(inode, offset) (offset >> (inode)->i_blkbits);
+static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
+{
+ return (offset >> inode->i_blkbits);
+}
+
+static inline loff_t blk_to_logical(struct inode *inode, sector_t blk)
+{
+ return (blk << inode->i_blkbits);
+}

/**
* __generic_block_fiemap - FIEMAP for block based inodes (no locking)
- * @inode - the inode to map
- * @arg - the pointer to userspace where we copy everything to
- * @get_block - the fs's get_block function
+ * @inode: the inode to map
+ * @fieinfo: the fiemap info struct that will be passed back to userspace
+ * @start: where to start mapping in the inode
+ * @len: how much space to map
+ * @get_block: the fs's get_block function
*
* This does FIEMAP for block based inodes. Basically it will just loop
* through get_block until we hit the number of extents we want to map, or we
@@ -250,58 +259,63 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
*/

int __generic_block_fiemap(struct inode *inode,
- struct fiemap_extent_info *fieinfo, u64 start,
- u64 len, get_block_t *get_block)
+ struct fiemap_extent_info *fieinfo, loff_t start,
+ loff_t len, get_block_t *get_block)
{
- struct buffer_head tmp;
- unsigned long long start_blk;
- long long length = 0, map_len = 0;
+ struct buffer_head map_bh;
+ sector_t start_blk, last_blk;
+ loff_t isize = i_size_read(inode);
u64 logical = 0, phys = 0, size = 0;
u32 flags = FIEMAP_EXTENT_MERGED;
- int ret = 0, past_eof = 0, whole_file = 0;
+ bool past_eof = false, whole_file = false;
+ int ret = 0;

- if ((ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC)))
+ ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
+ if (ret)
return ret;

- start_blk = logical_to_blk(inode, start);
-
- length = (long long)min_t(u64, len, i_size_read(inode));
- if (length < len)
- whole_file = 1;
+ /*
+ * Either the i_mutex or other appropriate locking needs to be held
+ * since we expect isize to not change at all through the duration of
+ * this call.
+ */
+ if (len >= isize) {
+ whole_file = true;
+ len = isize;
+ }

- map_len = length;
+ start_blk = logical_to_blk(inode, start);
+ last_blk = logical_to_blk(inode, start + len - 1);

do {
/*
* we set b_size to the total size we want so it will map as
* many contiguous blocks as possible at once
*/
- memset(&tmp, 0, sizeof(struct buffer_head));
- tmp.b_size = map_len;
+ memset(&map_bh, 0, sizeof(struct buffer_head));
+ map_bh.b_size = len;

- ret = get_block(inode, start_blk, &tmp, 0);
+ ret = get_block(inode, start_blk, &map_bh, 0);
if (ret)
break;

/* HOLE */
- if (!buffer_mapped(&tmp)) {
- length -= blk_to_logical(inode, 1);
+ if (!buffer_mapped(&map_bh)) {
start_blk++;

/*
- * we want to handle the case where there is an
+ * We want to handle the case where there is an
* allocated block at the front of the file, and then
* nothing but holes up to the end of the file properly,
* to make sure that extent at the front gets properly
* marked with FIEMAP_EXTENT_LAST
*/
if (!past_eof &&
- blk_to_logical(inode, start_blk) >=
- blk_to_logical(inode, 0)+i_size_read(inode))
+ blk_to_logical(inode, start_blk) >= isize)
past_eof = 1;

/*
- * first hole after going past the EOF, this is our
+ * First hole after going past the EOF, this is our
* last extent
*/
if (past_eof && size) {
@@ -309,15 +323,18 @@ int __generic_block_fiemap(struct inode *inode,
ret = fiemap_fill_next_extent(fieinfo, logical,
phys, size,
flags);
- break;
+ } else if (size) {
+ ret = fiemap_fill_next_extent(fieinfo, logical,
+ phys, size, flags);
+ size = 0;
}

/* if we have holes up to/past EOF then we're done */
- if (length <= 0 || past_eof)
+ if (start_blk > last_blk || past_eof || ret)
break;
} else {
/*
- * we have gone over the length of what we wanted to
+ * We have gone over the length of what we wanted to
* map, and it wasn't the entire file, so add the extent
* we got last time and exit.
*
@@ -331,7 +348,7 @@ int __generic_block_fiemap(struct inode *inode,
* are good to go, just add the extent to the fieinfo
* and break
*/
- if (length <= 0 && !whole_file) {
+ if (start_blk > last_blk && !whole_file) {
ret = fiemap_fill_next_extent(fieinfo, logical,
phys, size,
flags);
@@ -351,11 +368,10 @@ int __generic_block_fiemap(struct inode *inode,
}

logical = blk_to_logical(inode, start_blk);
- phys = blk_to_logical(inode, tmp.b_blocknr);
- size = tmp.b_size;
+ phys = blk_to_logical(inode, map_bh.b_blocknr);
+ size = map_bh.b_size;
flags = FIEMAP_EXTENT_MERGED;

- length -= tmp.b_size;
start_blk += logical_to_blk(inode, size);

/*
@@ -363,15 +379,13 @@ int __generic_block_fiemap(struct inode *inode,
* soon as we find a hole that the last extent we found
* is marked with FIEMAP_EXTENT_LAST
*/
- if (!past_eof &&
- logical+size >=
- blk_to_logical(inode, 0)+i_size_read(inode))
- past_eof = 1;
+ if (!past_eof && logical + size >= isize)
+ past_eof = true;
}
cond_resched();
} while (1);

- /* if ret is 1 then we just hit the end of the extent array */
+ /* If ret is 1 then we just hit the end of the extent array */
if (ret == 1)
ret = 0;

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 39d57bc..44f35ae 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2315,8 +2315,9 @@ extern int vfs_fstatat(int , char __user *, struct kstat *, int);
extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
unsigned long arg);
extern int __generic_block_fiemap(struct inode *inode,
- struct fiemap_extent_info *fieinfo, u64 start,
- u64 len, get_block_t *get_block);
+ struct fiemap_extent_info *fieinfo,
+ loff_t start, loff_t len,
+ get_block_t *get_block);
extern int generic_block_fiemap(struct inode *inode,
struct fiemap_extent_info *fieinfo, u64 start,
u64 len, get_block_t *get_block);

2010-04-23 15:58:13

by Linus Torvalds

[permalink] [raw]
Subject: Re: [PATCH] cleanup block based fiemap



On Fri, 23 Apr 2010, Josef Bacik wrote:
>
> Great, here is the updated patch. Thanks for the review!

Can you repeat the changelog and sign-off too, and make this a proper
submission?

Linus