LinuxLists.cc - [PATCH v3 0/3] ocfs2: add nowait aio support

2017-12-28 10:07:55

Subject: [PATCH v3 0/3] ocfs2: add nowait aio support

As you know, VFS layer has introduced non-block aio
flag IOCB_NOWAIT, which informs kernel to bail out
if an AIO request will block for reasons such as file
allocations, or a writeback triggered, or would block
while allocating requests while performing direct I/O.
Subsequent, pwritev2/preadv2 also can leverage this
part kernel code.
So far, ext4/xfs/btrfs have supported this feature,
I'd like to add the related code for ocfs2 file system.

Compare with v2, do some modification in ocfs2_overwrite_io()
function for OCFS2_INLINE_DATA_FL case.
Compare with v1, some changes are as below,
use osb pointer in ocfs2_try_rw_lock() function,
modify ocfs2_overwrite_io() function to make all error
value can be returned to the upper code,
move invoking ocfs2_overwrite_io() function from
ocfs2_file_write_iter() to ocfs2_prepare_inode_for_write(),
this change can combine acquiring the related locks.

Gang He (3):
ocfs2: add ocfs2_try_rw_lock and ocfs2_try_inode_lock
ocfs2: add ocfs2_overwrite_io function
ocfs2: nowait aio support

fs/ocfs2/dir.c | 2 +-
fs/ocfs2/dlmglue.c | 41 +++++++++++++++++++---
fs/ocfs2/dlmglue.h | 6 +++-
fs/ocfs2/extent_map.c | 45 ++++++++++++++++++++++++
fs/ocfs2/extent_map.h | 3 ++
fs/ocfs2/file.c | 95 +++++++++++++++++++++++++++++++++++++++-----------
fs/ocfs2/mmap.c | 2 +-
fs/ocfs2/ocfs2_trace.h | 10 +++---
8 files changed, 172 insertions(+), 32 deletions(-)

--
1.8.5.6

2017-12-28 10:07:56

by Gang He

[permalink] [raw]

Subject: [PATCH v3 1/3] ocfs2: add ocfs2_try_rw_lock and ocfs2_try_inode_lock

Add ocfs2_try_rw_lock and ocfs2_try_inode_lock functions, which
will be used in non-block IO scenarios.

Signed-off-by: Gang He <[email protected]>
---
fs/ocfs2/dlmglue.c | 21 +++++++++++++++++++++
fs/ocfs2/dlmglue.h | 4 ++++
2 files changed, 25 insertions(+)

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 4689940..a68efa3 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -1742,6 +1742,27 @@ int ocfs2_rw_lock(struct inode *inode, int write)
return status;
}

+int ocfs2_try_rw_lock(struct inode *inode, int write)
+{
+ int status, level;
+ struct ocfs2_lock_res *lockres;
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+
+ mlog(0, "inode %llu try to take %s RW lock\n",
+ (unsigned long long)OCFS2_I(inode)->ip_blkno,
+ write ? "EXMODE" : "PRMODE");
+
+ if (ocfs2_mount_local(osb))
+ return 0;
+
+ lockres = &OCFS2_I(inode)->ip_rw_lockres;
+
+ level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
+
+ status = ocfs2_cluster_lock(osb, lockres, level, DLM_LKF_NOQUEUE, 0);
+ return status;
+}
+
void ocfs2_rw_unlock(struct inode *inode, int write)
{
int level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index a7fc18b..05910fc 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -116,6 +116,7 @@ void ocfs2_refcount_lock_res_init(struct ocfs2_lock_res *lockres,
int ocfs2_create_new_inode_locks(struct inode *inode);
int ocfs2_drop_inode_locks(struct inode *inode);
int ocfs2_rw_lock(struct inode *inode, int write);
+int ocfs2_try_rw_lock(struct inode *inode, int write);
void ocfs2_rw_unlock(struct inode *inode, int write);
int ocfs2_open_lock(struct inode *inode);
int ocfs2_try_open_lock(struct inode *inode, int write);
@@ -140,6 +141,9 @@ int ocfs2_inode_lock_with_page(struct inode *inode,
/* 99% of the time we don't want to supply any additional flags --
* those are for very specific cases only. */
#define ocfs2_inode_lock(i, b, e) ocfs2_inode_lock_full_nested(i, b, e, 0, OI_LS_NORMAL)
+#define ocfs2_try_inode_lock(i, b, e)\
+ ocfs2_inode_lock_full_nested(i, b, e, OCFS2_META_LOCK_NOQUEUE,\
+ OI_LS_NORMAL)
void ocfs2_inode_unlock(struct inode *inode,
int ex);
int ocfs2_super_lock(struct ocfs2_super *osb,
--
1.8.5.6

2017-12-28 10:08:11

by Gang He

[permalink] [raw]

Subject: [PATCH v3 3/3] ocfs2: nowait aio support

Return -EAGAIN if any of the following checks fail for
direct I/O with nowait flag:
Can not get the related locks immediately,
Blocks are not allocated at the write location, it will trigger
block allocation, this will block IO operations.

Signed-off-by: Gang He <[email protected]>
---
fs/ocfs2/dir.c | 2 +-
fs/ocfs2/dlmglue.c | 20 ++++++++---
fs/ocfs2/dlmglue.h | 2 +-
fs/ocfs2/file.c | 95 +++++++++++++++++++++++++++++++++++++++-----------
fs/ocfs2/mmap.c | 2 +-
fs/ocfs2/ocfs2_trace.h | 10 +++---
6 files changed, 99 insertions(+), 32 deletions(-)

diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index febe631..ea50901 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -1957,7 +1957,7 @@ int ocfs2_readdir(struct file *file, struct dir_context *ctx)

trace_ocfs2_readdir((unsigned long long)OCFS2_I(inode)->ip_blkno);

- error = ocfs2_inode_lock_atime(inode, file->f_path.mnt, &lock_level);
+ error = ocfs2_inode_lock_atime(inode, file->f_path.mnt, &lock_level, 1);
if (lock_level && error >= 0) {
/* We release EX lock which used to update atime
* and get PR lock again to reduce contention
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index a68efa3..07e169f 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2515,13 +2515,18 @@ int ocfs2_inode_lock_with_page(struct inode *inode,

int ocfs2_inode_lock_atime(struct inode *inode,
struct vfsmount *vfsmnt,
- int *level)
+ int *level, int wait)
{
int ret;

- ret = ocfs2_inode_lock(inode, NULL, 0);
+ if (wait)
+ ret = ocfs2_inode_lock(inode, NULL, 0);
+ else
+ ret = ocfs2_try_inode_lock(inode, NULL, 0);
+
if (ret < 0) {
- mlog_errno(ret);
+ if (ret != -EAGAIN)
+ mlog_errno(ret);
return ret;
}

@@ -2533,9 +2538,14 @@ int ocfs2_inode_lock_atime(struct inode *inode,
struct buffer_head *bh = NULL;

ocfs2_inode_unlock(inode, 0);
- ret = ocfs2_inode_lock(inode, &bh, 1);
+ if (wait)
+ ret = ocfs2_inode_lock(inode, &bh, 1);
+ else
+ ret = ocfs2_try_inode_lock(inode, &bh, 1);
+
if (ret < 0) {
- mlog_errno(ret);
+ if (ret != -EAGAIN)
+ mlog_errno(ret);
return ret;
}
*level = 1;
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index 05910fc..c83dbb5 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -123,7 +123,7 @@ void ocfs2_refcount_lock_res_init(struct ocfs2_lock_res *lockres,
void ocfs2_open_unlock(struct inode *inode);
int ocfs2_inode_lock_atime(struct inode *inode,
struct vfsmount *vfsmnt,
- int *level);
+ int *level, int wait);
int ocfs2_inode_lock_full_nested(struct inode *inode,
struct buffer_head **ret_bh,
int ex,
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index a1d0510..caef9b1 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -140,6 +140,8 @@ static int ocfs2_file_open(struct inode *inode, struct file *file)
spin_unlock(&oi->ip_lock);
}

+ file->f_mode |= FMODE_NOWAIT;
+
leave:
return status;
}
@@ -2132,12 +2134,12 @@ static int ocfs2_prepare_inode_for_refcount(struct inode *inode,
}

static int ocfs2_prepare_inode_for_write(struct file *file,
- loff_t pos,
- size_t count)
+ loff_t pos, size_t count, int wait)
{
- int ret = 0, meta_level = 0;
+ int ret = 0, meta_level = 0, overwrite_io = 0;
struct dentry *dentry = file->f_path.dentry;
struct inode *inode = d_inode(dentry);
+ struct buffer_head *di_bh = NULL;
loff_t end;

/*
@@ -2145,13 +2147,40 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
* if we need to make modifications here.
*/
for(;;) {
- ret = ocfs2_inode_lock(inode, NULL, meta_level);
+ if (wait)
+ ret = ocfs2_inode_lock(inode, NULL, meta_level);
+ else
+ ret = ocfs2_try_inode_lock(inode,
+ overwrite_io ? NULL : &di_bh, meta_level);
if (ret < 0) {
meta_level = -1;
- mlog_errno(ret);
+ if (ret != -EAGAIN)
+ mlog_errno(ret);
goto out;
}

+ /*
+ * Check if IO will overwrite allocated blocks in case
+ * IOCB_NOWAIT flag is set.
+ */
+ if (!wait && !overwrite_io) {
+ overwrite_io = 1;
+ if (!down_read_trylock(&OCFS2_I(inode)->ip_alloc_sem)) {
+ ret = -EAGAIN;
+ goto out_unlock;
+ }
+
+ ret = ocfs2_overwrite_io(inode, di_bh, pos, count);
+ brelse(di_bh);
+ di_bh = NULL;
+ up_read(&OCFS2_I(inode)->ip_alloc_sem);
+ if (ret < 0) {
+ if (ret != -EAGAIN)
+ mlog_errno(ret);
+ goto out_unlock;
+ }
+ }
+
/* Clear suid / sgid if necessary. We do this here
* instead of later in the write path because
* remove_suid() calls ->setattr without any hint that
@@ -2199,7 +2228,9 @@ static int ocfs2_prepare_inode_for_write(struct file *file,

out_unlock:
trace_ocfs2_prepare_inode_for_write(OCFS2_I(inode)->ip_blkno,
- pos, count);
+ pos, count, wait);
+
+ brelse(di_bh);

if (meta_level >= 0)
ocfs2_inode_unlock(inode, meta_level);
@@ -2211,7 +2242,7 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
struct iov_iter *from)
{
- int direct_io, rw_level;
+ int rw_level;
ssize_t written = 0;
ssize_t ret;
size_t count = iov_iter_count(from);
@@ -2223,6 +2254,8 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
void *saved_ki_complete = NULL;
int append_write = ((iocb->ki_pos + count) >=
i_size_read(inode) ? 1 : 0);
+ int direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0;
+ int nowait = iocb->ki_flags & IOCB_NOWAIT ? 1 : 0;

trace_ocfs2_file_aio_write(inode, file, file->f_path.dentry,
(unsigned long long)OCFS2_I(inode)->ip_blkno,
@@ -2230,12 +2263,17 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
file->f_path.dentry->d_name.name,
(unsigned int)from->nr_segs); /* GRRRRR */

+ if (!direct_io && nowait)
+ return -EOPNOTSUPP;
+
if (count == 0)
return 0;

- direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0;
-
- inode_lock(inode);
+ if (direct_io && nowait) {
+ if (!inode_trylock(inode))
+ return -EAGAIN;
+ } else
+ inode_lock(inode);

/*
* Concurrent O_DIRECT writes are allowed with
@@ -2244,9 +2282,13 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
*/
rw_level = (!direct_io || full_coherency || append_write);

- ret = ocfs2_rw_lock(inode, rw_level);
+ if (direct_io && nowait)
+ ret = ocfs2_try_rw_lock(inode, rw_level);
+ else
+ ret = ocfs2_rw_lock(inode, rw_level);
if (ret < 0) {
- mlog_errno(ret);
+ if (ret != -EAGAIN)
+ mlog_errno(ret);
goto out_mutex;
}

@@ -2260,9 +2302,13 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
* other nodes to drop their caches. Buffered I/O
* already does this in write_begin().
*/
- ret = ocfs2_inode_lock(inode, NULL, 1);
+ if (nowait)
+ ret = ocfs2_try_inode_lock(inode, NULL, 1);
+ else
+ ret = ocfs2_inode_lock(inode, NULL, 1);
if (ret < 0) {
- mlog_errno(ret);
+ if (ret != -EAGAIN)
+ mlog_errno(ret);
goto out;
}

@@ -2277,9 +2323,10 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
}
count = ret;

- ret = ocfs2_prepare_inode_for_write(file, iocb->ki_pos, count);
+ ret = ocfs2_prepare_inode_for_write(file, iocb->ki_pos, count, !nowait);
if (ret < 0) {
- mlog_errno(ret);
+ if (ret != -EAGAIN)
+ mlog_errno(ret);
goto out;
}

@@ -2355,6 +2402,7 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
int ret = 0, rw_level = -1, lock_level = 0;
struct file *filp = iocb->ki_filp;
struct inode *inode = file_inode(filp);
+ int nowait = iocb->ki_flags & IOCB_NOWAIT ? 1 : 0;

trace_ocfs2_file_aio_read(inode, filp, filp->f_path.dentry,
(unsigned long long)OCFS2_I(inode)->ip_blkno,
@@ -2374,9 +2422,14 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
* need locks to protect pending reads from racing with truncate.
*/
if (iocb->ki_flags & IOCB_DIRECT) {
- ret = ocfs2_rw_lock(inode, 0);
+ if (nowait)
+ ret = ocfs2_try_rw_lock(inode, 0);
+ else
+ ret = ocfs2_rw_lock(inode, 0);
+
if (ret < 0) {
- mlog_errno(ret);
+ if (ret != -EAGAIN)
+ mlog_errno(ret);
goto bail;
}
rw_level = 0;
@@ -2393,9 +2446,11 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
* like i_size. This allows the checks down below
* generic_file_aio_read() a chance of actually working.
*/
- ret = ocfs2_inode_lock_atime(inode, filp->f_path.mnt, &lock_level);
+ ret = ocfs2_inode_lock_atime(inode, filp->f_path.mnt, &lock_level,
+ !nowait);
if (ret < 0) {
- mlog_errno(ret);
+ if (ret != -EAGAIN)
+ mlog_errno(ret);
goto bail;
}
ocfs2_inode_unlock(inode, lock_level);
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index 098f5c7..fb9a20e 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -184,7 +184,7 @@ int ocfs2_mmap(struct file *file, struct vm_area_struct *vma)
int ret = 0, lock_level = 0;

ret = ocfs2_inode_lock_atime(file_inode(file),
- file->f_path.mnt, &lock_level);
+ file->f_path.mnt, &lock_level, 1);
if (ret < 0) {
mlog_errno(ret);
goto out;
diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h
index a0b5d00..e2a11aa 100644
--- a/fs/ocfs2/ocfs2_trace.h
+++ b/fs/ocfs2/ocfs2_trace.h
@@ -1449,20 +1449,22 @@

TRACE_EVENT(ocfs2_prepare_inode_for_write,
TP_PROTO(unsigned long long ino, unsigned long long saved_pos,
- unsigned long count),
- TP_ARGS(ino, saved_pos, count),
+ unsigned long count, int wait),
+ TP_ARGS(ino, saved_pos, count, wait),
TP_STRUCT__entry(
__field(unsigned long long, ino)
__field(unsigned long long, saved_pos)
__field(unsigned long, count)
+ __field(int, wait)
),
TP_fast_assign(
__entry->ino = ino;
__entry->saved_pos = saved_pos;
__entry->count = count;
+ __entry->wait = wait;
),
- TP_printk("%llu %llu %lu", __entry->ino,
- __entry->saved_pos, __entry->count)
+ TP_printk("%llu %llu %lu %d", __entry->ino,
+ __entry->saved_pos, __entry->count, __entry->wait)
);

DEFINE_OCFS2_INT_EVENT(generic_file_aio_read_ret);
--
1.8.5.6

2017-12-28 10:08:31

by Gang He

[permalink] [raw]

Subject: [PATCH v3 2/3] ocfs2: add ocfs2_overwrite_io function

Add ocfs2_overwrite_io function, which is used to judge if
overwrite allocated blocks, otherwise, the write will bring extra
block allocation overhead.

Signed-off-by: Gang He <[email protected]>
---
fs/ocfs2/extent_map.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
fs/ocfs2/extent_map.h | 3 +++
2 files changed, 48 insertions(+)

diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index e4719e0..06cb964 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -38,6 +38,7 @@
#include "inode.h"
#include "super.h"
#include "symlink.h"
+#include "aops.h"
#include "ocfs2_trace.h"

#include "buffer_head_io.h"
@@ -832,6 +833,50 @@ int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
return ret;
}

+/* Is IO overwriting allocated blocks? */
+int ocfs2_overwrite_io(struct inode *inode, struct buffer_head *di_bh,
+ u64 map_start, u64 map_len)
+{
+ int ret = 0, is_last;
+ u32 mapping_end, cpos;
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ struct ocfs2_extent_rec rec;
+
+ if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
+ if (ocfs2_size_fits_inline_data(di_bh, map_start + map_len))
+ return ret;
+ else
+ return -EAGAIN;
+ }
+
+ cpos = map_start >> osb->s_clustersize_bits;
+ mapping_end = ocfs2_clusters_for_bytes(inode->i_sb,
+ map_start + map_len);
+ is_last = 0;
+ while (cpos < mapping_end && !is_last) {
+ ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos,
+ NULL, &rec, &is_last);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ if (rec.e_blkno == 0ULL)
+ break;
+
+ if (rec.e_flags & OCFS2_EXT_REFCOUNTED)
+ break;
+
+ cpos = le32_to_cpu(rec.e_cpos) +
+ le16_to_cpu(rec.e_leaf_clusters);
+ }
+
+ if (cpos < mapping_end)
+ ret = -EAGAIN;
+out:
+ return ret;
+}
+
int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int whence)
{
struct inode *inode = file->f_mapping->host;
diff --git a/fs/ocfs2/extent_map.h b/fs/ocfs2/extent_map.h
index 67ea57d..1057586 100644
--- a/fs/ocfs2/extent_map.h
+++ b/fs/ocfs2/extent_map.h
@@ -53,6 +53,9 @@ int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno,
int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 map_start, u64 map_len);

+int ocfs2_overwrite_io(struct inode *inode, struct buffer_head *di_bh,
+ u64 map_start, u64 map_len);
+
int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int origin);

int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
--
1.8.5.6

2018-01-02 11:01:20

by alex chen

[permalink] [raw]

Subject: Re: [Ocfs2-devel] [PATCH v3 2/3] ocfs2: add ocfs2_overwrite_io function

Hi Gang,

On 2017/12/28 18:07, Gang He wrote:
> Add ocfs2_overwrite_io function, which is used to judge if
> overwrite allocated blocks, otherwise, the write will bring extra
> block allocation overhead.
>
> Signed-off-by: Gang He <[email protected]>
> ---
> fs/ocfs2/extent_map.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
> fs/ocfs2/extent_map.h | 3 +++
> 2 files changed, 48 insertions(+)
>
> diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
> index e4719e0..06cb964 100644
> --- a/fs/ocfs2/extent_map.c
> +++ b/fs/ocfs2/extent_map.c
> @@ -38,6 +38,7 @@
> #include "inode.h"
> #include "super.h"
> #include "symlink.h"
> +#include "aops.h"
> #include "ocfs2_trace.h"
>
> #include "buffer_head_io.h"
> @@ -832,6 +833,50 @@ int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
> return ret;
> }
>
> +/* Is IO overwriting allocated blocks? */
> +int ocfs2_overwrite_io(struct inode *inode, struct buffer_head *di_bh,
> + u64 map_start, u64 map_len)
Here can the type of 'map_start' is struct loff_t and map_len is struct size_t?

Thanks,
Alex
> +{
> + int ret = 0, is_last;
> + u32 mapping_end, cpos;
> + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
> + struct ocfs2_extent_rec rec;
> +
> + if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
> + if (ocfs2_size_fits_inline_data(di_bh, map_start + map_len))
> + return ret;
> + else
> + return -EAGAIN;
> + }
> +
> + cpos = map_start >> osb->s_clustersize_bits;
> + mapping_end = ocfs2_clusters_for_bytes(inode->i_sb,
> + map_start + map_len);
> + is_last = 0;
> + while (cpos < mapping_end && !is_last) {
> + ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos,
> + NULL, &rec, &is_last);
> + if (ret) {
> + mlog_errno(ret);
> + goto out;
> + }
> +
> + if (rec.e_blkno == 0ULL)
> + break;
> +
> + if (rec.e_flags & OCFS2_EXT_REFCOUNTED)
> + break;
> +
> + cpos = le32_to_cpu(rec.e_cpos) +
> + le16_to_cpu(rec.e_leaf_clusters);
> + }
> +
> + if (cpos < mapping_end)
> + ret = -EAGAIN;
> +out:
> + return ret;
> +}
> +
> int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int whence)
> {
> struct inode *inode = file->f_mapping->host;
> diff --git a/fs/ocfs2/extent_map.h b/fs/ocfs2/extent_map.h
> index 67ea57d..1057586 100644
> --- a/fs/ocfs2/extent_map.h
> +++ b/fs/ocfs2/extent_map.h
> @@ -53,6 +53,9 @@ int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno,
> int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
> u64 map_start, u64 map_len);
>
> +int ocfs2_overwrite_io(struct inode *inode, struct buffer_head *di_bh,
> + u64 map_start, u64 map_len);
> +
> int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int origin);
>
> int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
>

2018-01-03 05:14:05

by Gang He

[permalink] [raw]

Subject: Re: [Ocfs2-devel] [PATCH v3 2/3] ocfs2: add ocfs2_overwrite_io function

Hi Alex,

>>>
> Hi Gang,
>
> On 2017/12/28 18:07, Gang He wrote:
>> Add ocfs2_overwrite_io function, which is used to judge if
>> overwrite allocated blocks, otherwise, the write will bring extra
>> block allocation overhead.
>>
>> Signed-off-by: Gang He <[email protected]>
>> ---
>> fs/ocfs2/extent_map.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
>> fs/ocfs2/extent_map.h | 3 +++
>> 2 files changed, 48 insertions(+)
>>
>> diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
>> index e4719e0..06cb964 100644
>> --- a/fs/ocfs2/extent_map.c
>> +++ b/fs/ocfs2/extent_map.c
>> @@ -38,6 +38,7 @@
>> #include "inode.h"
>> #include "super.h"
>> #include "symlink.h"
>> +#include "aops.h"
>> #include "ocfs2_trace.h"
>>
>> #include "buffer_head_io.h"
>> @@ -832,6 +833,50 @@ int ocfs2_fiemap(struct inode *inode, struct
> fiemap_extent_info *fieinfo,
>> return ret;
>> }
>>
>> +/* Is IO overwriting allocated blocks? */
>> +int ocfs2_overwrite_io(struct inode *inode, struct buffer_head *di_bh,
>> + u64 map_start, u64 map_len)
> Here can the type of 'map_start' is struct loff_t and map_len is struct
> size_t?
I prefer to use the detailed types for file start address and length in ocfs2_overwrite_io() function declaration,
then here will be a potential type conversion (loff_t -> u64, size_t -> u64), I think this conversion should be considered as expectation.
Since our OCFS2 is a 64 bit file system, the related data types do not change, but loff_t and size_t type can change under different architectures (e.g. x86_32, x86_64, etc.).

Thanks
Gang

>
> Thanks,
> Alex
>> +{
>> + int ret = 0, is_last;
>> + u32 mapping_end, cpos;
>> + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
>> + struct ocfs2_extent_rec rec;
>> +
>> + if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
>> + if (ocfs2_size_fits_inline_data(di_bh, map_start + map_len))
>> + return ret;
>> + else
>> + return -EAGAIN;
>> + }
>> +
>> + cpos = map_start >> osb->s_clustersize_bits;
>> + mapping_end = ocfs2_clusters_for_bytes(inode->i_sb,
>> + map_start + map_len);
>> + is_last = 0;
>> + while (cpos < mapping_end && !is_last) {
>> + ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos,
>> + NULL, &rec, &is_last);
>> + if (ret) {
>> + mlog_errno(ret);
>> + goto out;
>> + }
>> +
>> + if (rec.e_blkno == 0ULL)
>> + break;
>> +
>> + if (rec.e_flags & OCFS2_EXT_REFCOUNTED)
>> + break;
>> +
>> + cpos = le32_to_cpu(rec.e_cpos) +
>> + le16_to_cpu(rec.e_leaf_clusters);
>> + }
>> +
>> + if (cpos < mapping_end)
>> + ret = -EAGAIN;
>> +out:
>> + return ret;
>> +}
>> +
>> int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int
> whence)
>> {
>> struct inode *inode = file->f_mapping->host;
>> diff --git a/fs/ocfs2/extent_map.h b/fs/ocfs2/extent_map.h
>> index 67ea57d..1057586 100644
>> --- a/fs/ocfs2/extent_map.h
>> +++ b/fs/ocfs2/extent_map.h
>> @@ -53,6 +53,9 @@ int ocfs2_extent_map_get_blocks(struct inode *inode, u64
> v_blkno, u64 *p_blkno,
>> int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
>> u64 map_start, u64 map_len);
>>
>> +int ocfs2_overwrite_io(struct inode *inode, struct buffer_head *di_bh,
>> + u64 map_start, u64 map_len);
>> +
>> int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int
> origin);
>>
>> int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
>>

2018-01-04 01:10:29

by alex chen

[permalink] [raw]

Subject: Re: [Ocfs2-devel] [PATCH v3 2/3] ocfs2: add ocfs2_overwrite_io function

Hi Gang,

On 2018/1/3 13:14, Gang He wrote:
> Hi Alex,
>
>
>>>>
>> Hi Gang,
>>
>> On 2017/12/28 18:07, Gang He wrote:
>>> Add ocfs2_overwrite_io function, which is used to judge if
>>> overwrite allocated blocks, otherwise, the write will bring extra
>>> block allocation overhead.
>>>
>>> Signed-off-by: Gang He <[email protected]>
>>> ---
>>> fs/ocfs2/extent_map.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
>>> fs/ocfs2/extent_map.h | 3 +++
>>> 2 files changed, 48 insertions(+)
>>>
>>> diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
>>> index e4719e0..06cb964 100644
>>> --- a/fs/ocfs2/extent_map.c
>>> +++ b/fs/ocfs2/extent_map.c
>>> @@ -38,6 +38,7 @@
>>> #include "inode.h"
>>> #include "super.h"
>>> #include "symlink.h"
>>> +#include "aops.h"
>>> #include "ocfs2_trace.h"
>>>
>>> #include "buffer_head_io.h"
>>> @@ -832,6 +833,50 @@ int ocfs2_fiemap(struct inode *inode, struct
>> fiemap_extent_info *fieinfo,
>>> return ret;
>>> }
>>>
>>> +/* Is IO overwriting allocated blocks? */
>>> +int ocfs2_overwrite_io(struct inode *inode, struct buffer_head *di_bh,
>>> + u64 map_start, u64 map_len)
>> Here can the type of 'map_start' is struct loff_t and map_len is struct
>> size_t?
> I prefer to use the detailed types for file start address and length in ocfs2_overwrite_io() function declaration,
> then here will be a potential type conversion (loff_t -> u64, size_t -> u64), I think this conversion should be considered as expectation.
> Since our OCFS2 is a 64 bit file system, the related data types do not change, but loff_t and size_t type can change under different architectures (e.g. x86_32, x86_64, etc.).
>
The type conversion (loff_t -> u64, size_t -> u64) has been made before calling the function ocfs2_overwrite_io().
So it doesn't matter which type we use for file start address and length in ocfs2_overwrite_io(), Right?
To be consistent with the context, is it better to use struct loff_t for 'map_start' and struct size_t for 'map_len'?

Thanks,
Alex

> Thanks
> Gang
>
>>
>> Thanks,
>> Alex
>>> +{
>>> + int ret = 0, is_last;
>>> + u32 mapping_end, cpos;
>>> + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
>>> + struct ocfs2_extent_rec rec;
>>> +
>>> + if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
>>> + if (ocfs2_size_fits_inline_data(di_bh, map_start + map_len))
>>> + return ret;
>>> + else
>>> + return -EAGAIN;
>>> + }
>>> +
>>> + cpos = map_start >> osb->s_clustersize_bits;
>>> + mapping_end = ocfs2_clusters_for_bytes(inode->i_sb,
>>> + map_start + map_len);
>>> + is_last = 0;
>>> + while (cpos < mapping_end && !is_last) {
>>> + ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos,
>>> + NULL, &rec, &is_last);
>>> + if (ret) {
>>> + mlog_errno(ret);
>>> + goto out;
>>> + }
>>> +
>>> + if (rec.e_blkno == 0ULL)
>>> + break;
>>> +
>>> + if (rec.e_flags & OCFS2_EXT_REFCOUNTED)
>>> + break;
>>> +
>>> + cpos = le32_to_cpu(rec.e_cpos) +
>>> + le16_to_cpu(rec.e_leaf_clusters);
>>> + }
>>> +
>>> + if (cpos < mapping_end)
>>> + ret = -EAGAIN;
>>> +out:
>>> + return ret;
>>> +}
>>> +
>>> int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int
>> whence)
>>> {
>>> struct inode *inode = file->f_mapping->host;
>>> diff --git a/fs/ocfs2/extent_map.h b/fs/ocfs2/extent_map.h
>>> index 67ea57d..1057586 100644
>>> --- a/fs/ocfs2/extent_map.h
>>> +++ b/fs/ocfs2/extent_map.h
>>> @@ -53,6 +53,9 @@ int ocfs2_extent_map_get_blocks(struct inode *inode, u64
>> v_blkno, u64 *p_blkno,
>>> int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
>>> u64 map_start, u64 map_len);
>>>
>>> +int ocfs2_overwrite_io(struct inode *inode, struct buffer_head *di_bh,
>>> + u64 map_start, u64 map_len);
>>> +
>>> int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int
>> origin);
>>>
>>> int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
>>>
>
>
> .
>

2018-01-04 03:32:18

by Gang He

[permalink] [raw]

Subject: Re: [Ocfs2-devel] [PATCH v3 2/3] ocfs2: add ocfs2_overwrite_io function

Hi Alex,

>>>
> Hi Gang,
>
> On 2018/1/3 13:14, Gang He wrote:
>> Hi Alex,
>>
>>
>>>>>
>>> Hi Gang,
>>>
>>> On 2017/12/28 18:07, Gang He wrote:
>>>> Add ocfs2_overwrite_io function, which is used to judge if
>>>> overwrite allocated blocks, otherwise, the write will bring extra
>>>> block allocation overhead.
>>>>
>>>> Signed-off-by: Gang He <[email protected]>
>>>> ---
>>>> fs/ocfs2/extent_map.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
>>>> fs/ocfs2/extent_map.h | 3 +++
>>>> 2 files changed, 48 insertions(+)
>>>>
>>>> diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
>>>> index e4719e0..06cb964 100644
>>>> --- a/fs/ocfs2/extent_map.c
>>>> +++ b/fs/ocfs2/extent_map.c
>>>> @@ -38,6 +38,7 @@
>>>> #include "inode.h"
>>>> #include "super.h"
>>>> #include "symlink.h"
>>>> +#include "aops.h"
>>>> #include "ocfs2_trace.h"
>>>>
>>>> #include "buffer_head_io.h"
>>>> @@ -832,6 +833,50 @@ int ocfs2_fiemap(struct inode *inode, struct
>>> fiemap_extent_info *fieinfo,
>>>> return ret;
>>>> }
>>>>
>>>> +/* Is IO overwriting allocated blocks? */
>>>> +int ocfs2_overwrite_io(struct inode *inode, struct buffer_head *di_bh,
>>>> + u64 map_start, u64 map_len)
>>> Here can the type of 'map_start' is struct loff_t and map_len is struct
>>> size_t?
>> I prefer to use the detailed types for file start address and length in
> ocfs2_overwrite_io() function declaration,
>> then here will be a potential type conversion (loff_t -> u64, size_t -> u64), I
> think this conversion should be considered as expectation.
>> Since our OCFS2 is a 64 bit file system, the related data types do not
> change, but loff_t and size_t type can change under different architectures
> (e.g. x86_32, x86_64, etc.).
>>
> The type conversion (loff_t -> u64, size_t -> u64) has been made before calling
> the function ocfs2_overwrite_io().
> So it doesn't matter which type we use for file start address and length in
> ocfs2_overwrite_io(), Right?
> To be consistent with the context, is it better to use struct loff_t for
> 'map_start' and struct size_t for 'map_len'?
I am not sure if I describe my thought clearly.
In VFS layer, loff_t, size_t and other related data types are used for all architectures, that means these kinds of data type's lengths
will change based on different CPU bits.
But, for a specific file system, the file system bit is fixed, e.g. ocfs2 is a 64 bits file system, this bit length is determined by file system layout (not CPU bits).
Then, in this layer we should use fixed-length (or common) data type in the code, the VFS layer data types should be converted into our data types potentially (but except pointer type).

Thanks
Gang

>
> Thanks,
> Alex
>
>> Thanks
>> Gang
>>
>>>
>>> Thanks,
>>> Alex
>>>> +{
>>>> + int ret = 0, is_last;
>>>> + u32 mapping_end, cpos;
>>>> + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
>>>> + struct ocfs2_extent_rec rec;
>>>> +
>>>> + if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
>>>> + if (ocfs2_size_fits_inline_data(di_bh, map_start + map_len))
>>>> + return ret;
>>>> + else
>>>> + return -EAGAIN;
>>>> + }
>>>> +
>>>> + cpos = map_start >> osb->s_clustersize_bits;
>>>> + mapping_end = ocfs2_clusters_for_bytes(inode->i_sb,
>>>> + map_start + map_len);
>>>> + is_last = 0;
>>>> + while (cpos < mapping_end && !is_last) {
>>>> + ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos,
>>>> + NULL, &rec, &is_last);
>>>> + if (ret) {
>>>> + mlog_errno(ret);
>>>> + goto out;
>>>> + }
>>>> +
>>>> + if (rec.e_blkno == 0ULL)
>>>> + break;
>>>> +
>>>> + if (rec.e_flags & OCFS2_EXT_REFCOUNTED)
>>>> + break;
>>>> +
>>>> + cpos = le32_to_cpu(rec.e_cpos) +
>>>> + le16_to_cpu(rec.e_leaf_clusters);
>>>> + }
>>>> +
>>>> + if (cpos < mapping_end)
>>>> + ret = -EAGAIN;
>>>> +out:
>>>> + return ret;
>>>> +}
>>>> +
>>>> int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int
>>> whence)
>>>> {
>>>> struct inode *inode = file->f_mapping->host;
>>>> diff --git a/fs/ocfs2/extent_map.h b/fs/ocfs2/extent_map.h
>>>> index 67ea57d..1057586 100644
>>>> --- a/fs/ocfs2/extent_map.h
>>>> +++ b/fs/ocfs2/extent_map.h
>>>> @@ -53,6 +53,9 @@ int ocfs2_extent_map_get_blocks(struct inode *inode, u64
>>> v_blkno, u64 *p_blkno,
>>>> int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
>>>> u64 map_start, u64 map_len);
>>>>
>>>> +int ocfs2_overwrite_io(struct inode *inode, struct buffer_head *di_bh,
>>>> + u64 map_start, u64 map_len);
>>>> +
>>>> int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int
>>> origin);
>>>>
>>>> int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
>>>>
>>
>>
>> .
>>

2018-01-11 02:00:04

by alex chen

[permalink] [raw]

Subject: Re: [Ocfs2-devel] [PATCH v3 2/3] ocfs2: add ocfs2_overwrite_io function

Hi Gang,

On 2018/1/4 11:32, Gang He wrote:
> Hi Alex,
>
>
>>>>
>> Hi Gang,
>>
>> On 2018/1/3 13:14, Gang He wrote:
>>> Hi Alex,
>>>
>>>
>>>>>>
>>>> Hi Gang,
>>>>
>>>> On 2017/12/28 18:07, Gang He wrote:
>>>>> Add ocfs2_overwrite_io function, which is used to judge if
>>>>> overwrite allocated blocks, otherwise, the write will bring extra
>>>>> block allocation overhead.
>>>>>
>>>>> Signed-off-by: Gang He <[email protected]>
>>>>> ---
>>>>> fs/ocfs2/extent_map.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
>>>>> fs/ocfs2/extent_map.h | 3 +++
>>>>> 2 files changed, 48 insertions(+)
>>>>>
>>>>> diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
>>>>> index e4719e0..06cb964 100644
>>>>> --- a/fs/ocfs2/extent_map.c
>>>>> +++ b/fs/ocfs2/extent_map.c
>>>>> @@ -38,6 +38,7 @@
>>>>> #include "inode.h"
>>>>> #include "super.h"
>>>>> #include "symlink.h"
>>>>> +#include "aops.h"
>>>>> #include "ocfs2_trace.h"
>>>>>
>>>>> #include "buffer_head_io.h"
>>>>> @@ -832,6 +833,50 @@ int ocfs2_fiemap(struct inode *inode, struct
>>>> fiemap_extent_info *fieinfo,
>>>>> return ret;
>>>>> }
>>>>>
>>>>> +/* Is IO overwriting allocated blocks? */
>>>>> +int ocfs2_overwrite_io(struct inode *inode, struct buffer_head *di_bh,
>>>>> + u64 map_start, u64 map_len)
>>>> Here can the type of 'map_start' is struct loff_t and map_len is struct
>>>> size_t?
>>> I prefer to use the detailed types for file start address and length in
>> ocfs2_overwrite_io() function declaration,
>>> then here will be a potential type conversion (loff_t -> u64, size_t -> u64), I
>> think this conversion should be considered as expectation.
>>> Since our OCFS2 is a 64 bit file system, the related data types do not
>> change, but loff_t and size_t type can change under different architectures
>> (e.g. x86_32, x86_64, etc.).
>>>
>> The type conversion (loff_t -> u64, size_t -> u64) has been made before calling
>> the function ocfs2_overwrite_io().
>> So it doesn't matter which type we use for file start address and length in
>> ocfs2_overwrite_io(), Right?
>> To be consistent with the context, is it better to use struct loff_t for
>> 'map_start' and struct size_t for 'map_len'?
> I am not sure if I describe my thought clearly.
> In VFS layer, loff_t, size_t and other related data types are used for all architectures, that means these kinds of data type's lengths
> will change based on different CPU bits.
> But, for a specific file system, the file system bit is fixed, e.g. ocfs2 is a 64 bits file system, this bit length is determined by file system layout (not CPU bits).
> Then, in this layer we should use fixed-length (or common) data type in the code, the VFS layer data types should be converted into our data types potentially (but except pointer type).
>
I agree about you to use fixed-length data type in OCFS2 layer, but now we already use the loff_t for pos in ocfs2_prepare_inode_for_write(), I don't
think it's going to work whatever data type we use. Right ?
Anyway, I accept it if you think it is better to use fixed-length type here.

Reviewed-by: Alex Chen <[email protected]>

Thanks,
Alex
>
> Thanks
> Gang
>
>>
>> Thanks,
>> Alex
>>
>>> Thanks
>>> Gang
>>>
>>>>
>>>> Thanks,
>>>> Alex
>>>>> +{
>>>>> + int ret = 0, is_last;
>>>>> + u32 mapping_end, cpos;
>>>>> + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
>>>>> + struct ocfs2_extent_rec rec;
>>>>> +
>>>>> + if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
>>>>> + if (ocfs2_size_fits_inline_data(di_bh, map_start + map_len))
>>>>> + return ret;
>>>>> + else
>>>>> + return -EAGAIN;
>>>>> + }
>>>>> +
>>>>> + cpos = map_start >> osb->s_clustersize_bits;
>>>>> + mapping_end = ocfs2_clusters_for_bytes(inode->i_sb,
>>>>> + map_start + map_len);
>>>>> + is_last = 0;
>>>>> + while (cpos < mapping_end && !is_last) {
>>>>> + ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos,
>>>>> + NULL, &rec, &is_last);
>>>>> + if (ret) {
>>>>> + mlog_errno(ret);
>>>>> + goto out;
>>>>> + }
>>>>> +
>>>>> + if (rec.e_blkno == 0ULL)
>>>>> + break;
>>>>> +
>>>>> + if (rec.e_flags & OCFS2_EXT_REFCOUNTED)
>>>>> + break;
>>>>> +
>>>>> + cpos = le32_to_cpu(rec.e_cpos) +
>>>>> + le16_to_cpu(rec.e_leaf_clusters);
>>>>> + }
>>>>> +
>>>>> + if (cpos < mapping_end)
>>>>> + ret = -EAGAIN;
>>>>> +out:
>>>>> + return ret;
>>>>> +}
>>>>> +
>>>>> int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int
>>>> whence)
>>>>> {
>>>>> struct inode *inode = file->f_mapping->host;
>>>>> diff --git a/fs/ocfs2/extent_map.h b/fs/ocfs2/extent_map.h
>>>>> index 67ea57d..1057586 100644
>>>>> --- a/fs/ocfs2/extent_map.h
>>>>> +++ b/fs/ocfs2/extent_map.h
>>>>> @@ -53,6 +53,9 @@ int ocfs2_extent_map_get_blocks(struct inode *inode, u64
>>>> v_blkno, u64 *p_blkno,
>>>>> int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
>>>>> u64 map_start, u64 map_len);
>>>>>
>>>>> +int ocfs2_overwrite_io(struct inode *inode, struct buffer_head *di_bh,
>>>>> + u64 map_start, u64 map_len);
>>>>> +
>>>>> int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int
>>>> origin);
>>>>>
>>>>> int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
>>>>>
>>>
>>>
>>> .
>>>
>
> .
>

2018-01-11 02:19:20

by alex chen

[permalink] [raw]

Subject: Re: [Ocfs2-devel] [PATCH v3 3/3] ocfs2: nowait aio support

Hi Gang,

On 2017/12/28 18:07, Gang He wrote:
> Return -EAGAIN if any of the following checks fail for
> direct I/O with nowait flag:
> Can not get the related locks immediately,
> Blocks are not allocated at the write location, it will trigger
> block allocation, this will block IO operations.
>
> Signed-off-by: Gang He <[email protected]>
> ---
> fs/ocfs2/dir.c | 2 +-
> fs/ocfs2/dlmglue.c | 20 ++++++++---
> fs/ocfs2/dlmglue.h | 2 +-
> fs/ocfs2/file.c | 95 +++++++++++++++++++++++++++++++++++++++-----------
> fs/ocfs2/mmap.c | 2 +-
> fs/ocfs2/ocfs2_trace.h | 10 +++---
> 6 files changed, 99 insertions(+), 32 deletions(-)
>
> diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
> index febe631..ea50901 100644
> --- a/fs/ocfs2/dir.c
> +++ b/fs/ocfs2/dir.c
> @@ -1957,7 +1957,7 @@ int ocfs2_readdir(struct file *file, struct dir_context *ctx)
>
> trace_ocfs2_readdir((unsigned long long)OCFS2_I(inode)->ip_blkno);
>
> - error = ocfs2_inode_lock_atime(inode, file->f_path.mnt, &lock_level);
> + error = ocfs2_inode_lock_atime(inode, file->f_path.mnt, &lock_level, 1);
> if (lock_level && error >= 0) {
> /* We release EX lock which used to update atime
> * and get PR lock again to reduce contention
> diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
> index a68efa3..07e169f 100644
> --- a/fs/ocfs2/dlmglue.c
> +++ b/fs/ocfs2/dlmglue.c
> @@ -2515,13 +2515,18 @@ int ocfs2_inode_lock_with_page(struct inode *inode,
>
> int ocfs2_inode_lock_atime(struct inode *inode,
> struct vfsmount *vfsmnt,
> - int *level)
> + int *level, int wait)
> {
> int ret;
>
> - ret = ocfs2_inode_lock(inode, NULL, 0);
> + if (wait)
> + ret = ocfs2_inode_lock(inode, NULL, 0);
> + else
> + ret = ocfs2_try_inode_lock(inode, NULL, 0);
> +
> if (ret < 0) {
> - mlog_errno(ret);
> + if (ret != -EAGAIN)
> + mlog_errno(ret);
> return ret;
> }
>
> @@ -2533,9 +2538,14 @@ int ocfs2_inode_lock_atime(struct inode *inode,
> struct buffer_head *bh = NULL;
>
> ocfs2_inode_unlock(inode, 0);
> - ret = ocfs2_inode_lock(inode, &bh, 1);
> + if (wait)
> + ret = ocfs2_inode_lock(inode, &bh, 1);
> + else
> + ret = ocfs2_try_inode_lock(inode, &bh, 1);
> +
> if (ret < 0) {
> - mlog_errno(ret);
> + if (ret != -EAGAIN)
> + mlog_errno(ret);
> return ret;
> }
> *level = 1;
> diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
> index 05910fc..c83dbb5 100644
> --- a/fs/ocfs2/dlmglue.h
> +++ b/fs/ocfs2/dlmglue.h
> @@ -123,7 +123,7 @@ void ocfs2_refcount_lock_res_init(struct ocfs2_lock_res *lockres,
> void ocfs2_open_unlock(struct inode *inode);
> int ocfs2_inode_lock_atime(struct inode *inode,
> struct vfsmount *vfsmnt,
> - int *level);
> + int *level, int wait);
> int ocfs2_inode_lock_full_nested(struct inode *inode,
> struct buffer_head **ret_bh,
> int ex,
> diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
> index a1d0510..caef9b1 100644
> --- a/fs/ocfs2/file.c
> +++ b/fs/ocfs2/file.c
> @@ -140,6 +140,8 @@ static int ocfs2_file_open(struct inode *inode, struct file *file)
> spin_unlock(&oi->ip_lock);
> }
>
> + file->f_mode |= FMODE_NOWAIT;
> +
> leave:
> return status;
> }
> @@ -2132,12 +2134,12 @@ static int ocfs2_prepare_inode_for_refcount(struct inode *inode,
> }
>
> static int ocfs2_prepare_inode_for_write(struct file *file,
> - loff_t pos,
> - size_t count)
> + loff_t pos, size_t count, int wait)
> {
> - int ret = 0, meta_level = 0;
> + int ret = 0, meta_level = 0, overwrite_io = 0;
> struct dentry *dentry = file->f_path.dentry;
> struct inode *inode = d_inode(dentry);
> + struct buffer_head *di_bh = NULL;
> loff_t end;
>
> /*
> @@ -2145,13 +2147,40 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
> * if we need to make modifications here.
> */
> for(;;) {
> - ret = ocfs2_inode_lock(inode, NULL, meta_level);
> + if (wait)
> + ret = ocfs2_inode_lock(inode, NULL, meta_level);
> + else
> + ret = ocfs2_try_inode_lock(inode,
> + overwrite_io ? NULL : &di_bh, meta_level);
> if (ret < 0) {
> meta_level = -1;
> - mlog_errno(ret);
> + if (ret != -EAGAIN)
> + mlog_errno(ret);
> goto out;
> }
>
> + /*
> + * Check if IO will overwrite allocated blocks in case
> + * IOCB_NOWAIT flag is set.
> + */
> + if (!wait && !overwrite_io) {
> + overwrite_io = 1;
> + if (!down_read_trylock(&OCFS2_I(inode)->ip_alloc_sem)) {
> + ret = -EAGAIN;
> + goto out_unlock;
> + }
> +
> + ret = ocfs2_overwrite_io(inode, di_bh, pos, count);
> + brelse(di_bh);
> + di_bh = NULL;
> + up_read(&OCFS2_I(inode)->ip_alloc_sem);
> + if (ret < 0) {
> + if (ret != -EAGAIN)
> + mlog_errno(ret);
> + goto out_unlock;
> + }
> + }
> +
> /* Clear suid / sgid if necessary. We do this here
> * instead of later in the write path because
> * remove_suid() calls ->setattr without any hint that
> @@ -2199,7 +2228,9 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
>
> out_unlock:
> trace_ocfs2_prepare_inode_for_write(OCFS2_I(inode)->ip_blkno,
> - pos, count);
> + pos, count, wait);
> +
> + brelse(di_bh);
>
> if (meta_level >= 0)
> ocfs2_inode_unlock(inode, meta_level);
> @@ -2211,7 +2242,7 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
> static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
> struct iov_iter *from)
> {
> - int direct_io, rw_level;
> + int rw_level;
> ssize_t written = 0;
> ssize_t ret;
> size_t count = iov_iter_count(from);
> @@ -2223,6 +2254,8 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
> void *saved_ki_complete = NULL;
> int append_write = ((iocb->ki_pos + count) >=
> i_size_read(inode) ? 1 : 0);
> + int direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0;
> + int nowait = iocb->ki_flags & IOCB_NOWAIT ? 1 : 0;
>
> trace_ocfs2_file_aio_write(inode, file, file->f_path.dentry,
> (unsigned long long)OCFS2_I(inode)->ip_blkno,
> @@ -2230,12 +2263,17 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
> file->f_path.dentry->d_name.name,
> (unsigned int)from->nr_segs); /* GRRRRR */
>
> + if (!direct_io && nowait)
> + return -EOPNOTSUPP;
> +
> if (count == 0)
> return 0;
>
> - direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0;
> -
> - inode_lock(inode);
I think we only need check the nowait here because we already check if the 'IOCB_DIRECT'
flag and the 'IOCB_NOWAIT' flag are both set in the front of this function.

> + if (direct_io && nowait) {
> + if (!inode_trylock(inode))
> + return -EAGAIN;
> + } else
> + inode_lock(inode);
>
> /*
> * Concurrent O_DIRECT writes are allowed with
> @@ -2244,9 +2282,13 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
> */
> rw_level = (!direct_io || full_coherency || append_write);
>
> - ret = ocfs2_rw_lock(inode, rw_level);
> + if (direct_io && nowait)
> + ret = ocfs2_try_rw_lock(inode, rw_level);
> + else
> + ret = ocfs2_rw_lock(inode, rw_level);
> if (ret < 0) {
> - mlog_errno(ret);
> + if (ret != -EAGAIN)
> + mlog_errno(ret);
> goto out_mutex;
> }
>
> @@ -2260,9 +2302,13 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
> * other nodes to drop their caches. Buffered I/O
> * already does this in write_begin().
> */
> - ret = ocfs2_inode_lock(inode, NULL, 1);
> + if (nowait)
> + ret = ocfs2_try_inode_lock(inode, NULL, 1);
> + else
> + ret = ocfs2_inode_lock(inode, NULL, 1);
> if (ret < 0) {
> - mlog_errno(ret);
> + if (ret != -EAGAIN)
> + mlog_errno(ret);
> goto out;
> }
>
> @@ -2277,9 +2323,10 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
> }
> count = ret;
>
> - ret = ocfs2_prepare_inode_for_write(file, iocb->ki_pos, count);
> + ret = ocfs2_prepare_inode_for_write(file, iocb->ki_pos, count, !nowait);
> if (ret < 0) {
> - mlog_errno(ret);
> + if (ret != -EAGAIN)
> + mlog_errno(ret);
> goto out;
> }
>
> @@ -2355,6 +2402,7 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
> int ret = 0, rw_level = -1, lock_level = 0;
> struct file *filp = iocb->ki_filp;
> struct inode *inode = file_inode(filp);
> + int nowait = iocb->ki_flags & IOCB_NOWAIT ? 1 : 0;
>
Here should we check if the 'IOCB_DIRECT' flag and the 'IOCB_NOWAIT' flag are both set?

Thanks,
Alex

> trace_ocfs2_file_aio_read(inode, filp, filp->f_path.dentry,
> (unsigned long long)OCFS2_I(inode)->ip_blkno,
> @@ -2374,9 +2422,14 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
> * need locks to protect pending reads from racing with truncate.
> */
> if (iocb->ki_flags & IOCB_DIRECT) {
> - ret = ocfs2_rw_lock(inode, 0);
> + if (nowait)
> + ret = ocfs2_try_rw_lock(inode, 0);
> + else
> + ret = ocfs2_rw_lock(inode, 0);
> +
> if (ret < 0) {
> - mlog_errno(ret);
> + if (ret != -EAGAIN)
> + mlog_errno(ret);
> goto bail;
> }
> rw_level = 0;
> @@ -2393,9 +2446,11 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
> * like i_size. This allows the checks down below
> * generic_file_aio_read() a chance of actually working.
> */
> - ret = ocfs2_inode_lock_atime(inode, filp->f_path.mnt, &lock_level);
> + ret = ocfs2_inode_lock_atime(inode, filp->f_path.mnt, &lock_level,
> + !nowait);
> if (ret < 0) {
> - mlog_errno(ret);
> + if (ret != -EAGAIN)
> + mlog_errno(ret);
> goto bail;
> }
> ocfs2_inode_unlock(inode, lock_level);
> diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
> index 098f5c7..fb9a20e 100644
> --- a/fs/ocfs2/mmap.c
> +++ b/fs/ocfs2/mmap.c
> @@ -184,7 +184,7 @@ int ocfs2_mmap(struct file *file, struct vm_area_struct *vma)
> int ret = 0, lock_level = 0;
>
> ret = ocfs2_inode_lock_atime(file_inode(file),
> - file->f_path.mnt, &lock_level);
> + file->f_path.mnt, &lock_level, 1);
> if (ret < 0) {
> mlog_errno(ret);
> goto out;
> diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h
> index a0b5d00..e2a11aa 100644
> --- a/fs/ocfs2/ocfs2_trace.h
> +++ b/fs/ocfs2/ocfs2_trace.h
> @@ -1449,20 +1449,22 @@
>
> TRACE_EVENT(ocfs2_prepare_inode_for_write,
> TP_PROTO(unsigned long long ino, unsigned long long saved_pos,
> - unsigned long count),
> - TP_ARGS(ino, saved_pos, count),
> + unsigned long count, int wait),
> + TP_ARGS(ino, saved_pos, count, wait),
> TP_STRUCT__entry(
> __field(unsigned long long, ino)
> __field(unsigned long long, saved_pos)
> __field(unsigned long, count)
> + __field(int, wait)
> ),
> TP_fast_assign(
> __entry->ino = ino;
> __entry->saved_pos = saved_pos;
> __entry->count = count;
> + __entry->wait = wait;
> ),
> - TP_printk("%llu %llu %lu", __entry->ino,
> - __entry->saved_pos, __entry->count)
> + TP_printk("%llu %llu %lu %d", __entry->ino,
> + __entry->saved_pos, __entry->count, __entry->wait)
> );
>
> DEFINE_OCFS2_INT_EVENT(generic_file_aio_read_ret);
>

2018-01-11 03:23:58

by Gang He

[permalink] [raw]

Subject: Re: [Ocfs2-devel] [PATCH v3 3/3] ocfs2: nowait aio support

Hi Alex,

>>>
> Hi Gang,
>
> On 2017/12/28 18:07, Gang He wrote:
>> Return -EAGAIN if any of the following checks fail for
>> direct I/O with nowait flag:
>> Can not get the related locks immediately,
>> Blocks are not allocated at the write location, it will trigger
>> block allocation, this will block IO operations.
>>
>> Signed-off-by: Gang He <[email protected]>
>> ---
>> fs/ocfs2/dir.c | 2 +-
>> fs/ocfs2/dlmglue.c | 20 ++++++++---
>> fs/ocfs2/dlmglue.h | 2 +-
>> fs/ocfs2/file.c | 95 +++++++++++++++++++++++++++++++++++++++-----------
>> fs/ocfs2/mmap.c | 2 +-
>> fs/ocfs2/ocfs2_trace.h | 10 +++---
>> 6 files changed, 99 insertions(+), 32 deletions(-)
>>
>> diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
>> index febe631..ea50901 100644
>> --- a/fs/ocfs2/dir.c
>> +++ b/fs/ocfs2/dir.c
>> @@ -1957,7 +1957,7 @@ int ocfs2_readdir(struct file *file, struct dir_context
> *ctx)
>>
>> trace_ocfs2_readdir((unsigned long long)OCFS2_I(inode)->ip_blkno);
>>
>> - error = ocfs2_inode_lock_atime(inode, file->f_path.mnt, &lock_level);
>> + error = ocfs2_inode_lock_atime(inode, file->f_path.mnt, &lock_level, 1);
>> if (lock_level && error >= 0) {
>> /* We release EX lock which used to update atime
>> * and get PR lock again to reduce contention
>> diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
>> index a68efa3..07e169f 100644
>> --- a/fs/ocfs2/dlmglue.c
>> +++ b/fs/ocfs2/dlmglue.c
>> @@ -2515,13 +2515,18 @@ int ocfs2_inode_lock_with_page(struct inode *inode,
>>
>> int ocfs2_inode_lock_atime(struct inode *inode,
>> struct vfsmount *vfsmnt,
>> - int *level)
>> + int *level, int wait)
>> {
>> int ret;
>>
>> - ret = ocfs2_inode_lock(inode, NULL, 0);
>> + if (wait)
>> + ret = ocfs2_inode_lock(inode, NULL, 0);
>> + else
>> + ret = ocfs2_try_inode_lock(inode, NULL, 0);
>> +
>> if (ret < 0) {
>> - mlog_errno(ret);
>> + if (ret != -EAGAIN)
>> + mlog_errno(ret);
>> return ret;
>> }
>>
>> @@ -2533,9 +2538,14 @@ int ocfs2_inode_lock_atime(struct inode *inode,
>> struct buffer_head *bh = NULL;
>>
>> ocfs2_inode_unlock(inode, 0);
>> - ret = ocfs2_inode_lock(inode, &bh, 1);
>> + if (wait)
>> + ret = ocfs2_inode_lock(inode, &bh, 1);
>> + else
>> + ret = ocfs2_try_inode_lock(inode, &bh, 1);
>> +
>> if (ret < 0) {
>> - mlog_errno(ret);
>> + if (ret != -EAGAIN)
>> + mlog_errno(ret);
>> return ret;
>> }
>> *level = 1;
>> diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
>> index 05910fc..c83dbb5 100644
>> --- a/fs/ocfs2/dlmglue.h
>> +++ b/fs/ocfs2/dlmglue.h
>> @@ -123,7 +123,7 @@ void ocfs2_refcount_lock_res_init(struct ocfs2_lock_res
> *lockres,
>> void ocfs2_open_unlock(struct inode *inode);
>> int ocfs2_inode_lock_atime(struct inode *inode,
>> struct vfsmount *vfsmnt,
>> - int *level);
>> + int *level, int wait);
>> int ocfs2_inode_lock_full_nested(struct inode *inode,
>> struct buffer_head **ret_bh,
>> int ex,
>> diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
>> index a1d0510..caef9b1 100644
>> --- a/fs/ocfs2/file.c
>> +++ b/fs/ocfs2/file.c
>> @@ -140,6 +140,8 @@ static int ocfs2_file_open(struct inode *inode, struct
> file *file)
>> spin_unlock(&oi->ip_lock);
>> }
>>
>> + file->f_mode |= FMODE_NOWAIT;
>> +
>> leave:
>> return status;
>> }
>> @@ -2132,12 +2134,12 @@ static int ocfs2_prepare_inode_for_refcount(struct
> inode *inode,
>> }
>>
>> static int ocfs2_prepare_inode_for_write(struct file *file,
>> - loff_t pos,
>> - size_t count)
>> + loff_t pos, size_t count, int wait)
>> {
>> - int ret = 0, meta_level = 0;
>> + int ret = 0, meta_level = 0, overwrite_io = 0;
>> struct dentry *dentry = file->f_path.dentry;
>> struct inode *inode = d_inode(dentry);
>> + struct buffer_head *di_bh = NULL;
>> loff_t end;
>>
>> /*
>> @@ -2145,13 +2147,40 @@ static int ocfs2_prepare_inode_for_write(struct file
> *file,
>> * if we need to make modifications here.
>> */
>> for(;;) {
>> - ret = ocfs2_inode_lock(inode, NULL, meta_level);
>> + if (wait)
>> + ret = ocfs2_inode_lock(inode, NULL, meta_level);
>> + else
>> + ret = ocfs2_try_inode_lock(inode,
>> + overwrite_io ? NULL : &di_bh, meta_level);
>> if (ret < 0) {
>> meta_level = -1;
>> - mlog_errno(ret);
>> + if (ret != -EAGAIN)
>> + mlog_errno(ret);
>> goto out;
>> }
>>
>> + /*
>> + * Check if IO will overwrite allocated blocks in case
>> + * IOCB_NOWAIT flag is set.
>> + */
>> + if (!wait && !overwrite_io) {
>> + overwrite_io = 1;
>> + if (!down_read_trylock(&OCFS2_I(inode)->ip_alloc_sem)) {
>> + ret = -EAGAIN;
>> + goto out_unlock;
>> + }
>> +
>> + ret = ocfs2_overwrite_io(inode, di_bh, pos, count);
>> + brelse(di_bh);
>> + di_bh = NULL;
>> + up_read(&OCFS2_I(inode)->ip_alloc_sem);
>> + if (ret < 0) {
>> + if (ret != -EAGAIN)
>> + mlog_errno(ret);
>> + goto out_unlock;
>> + }
>> + }
>> +
>> /* Clear suid / sgid if necessary. We do this here
>> * instead of later in the write path because
>> * remove_suid() calls ->setattr without any hint that
>> @@ -2199,7 +2228,9 @@ static int ocfs2_prepare_inode_for_write(struct file
> *file,
>>
>> out_unlock:
>> trace_ocfs2_prepare_inode_for_write(OCFS2_I(inode)->ip_blkno,
>> - pos, count);
>> + pos, count, wait);
>> +
>> + brelse(di_bh);
>>
>> if (meta_level >= 0)
>> ocfs2_inode_unlock(inode, meta_level);
>> @@ -2211,7 +2242,7 @@ static int ocfs2_prepare_inode_for_write(struct file
> *file,
>> static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
>> struct iov_iter *from)
>> {
>> - int direct_io, rw_level;
>> + int rw_level;
>> ssize_t written = 0;
>> ssize_t ret;
>> size_t count = iov_iter_count(from);
>> @@ -2223,6 +2254,8 @@ static ssize_t ocfs2_file_write_iter(struct kiocb
> *iocb,
>> void *saved_ki_complete = NULL;
>> int append_write = ((iocb->ki_pos + count) >=
>> i_size_read(inode) ? 1 : 0);
>> + int direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0;
>> + int nowait = iocb->ki_flags & IOCB_NOWAIT ? 1 : 0;
>>
>> trace_ocfs2_file_aio_write(inode, file, file->f_path.dentry,
>> (unsigned long long)OCFS2_I(inode)->ip_blkno,
>> @@ -2230,12 +2263,17 @@ static ssize_t ocfs2_file_write_iter(struct kiocb
> *iocb,
>> file->f_path.dentry->d_name.name,
>> (unsigned int)from->nr_segs); /* GRRRRR */
>>
>> + if (!direct_io && nowait)
>> + return -EOPNOTSUPP;
>> +
>> if (count == 0)
>> return 0;
>>
>> - direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0;
>> -
>> - inode_lock(inode);
> I think we only need check the nowait here because we already check if the
> 'IOCB_DIRECT'
> flag and the 'IOCB_NOWAIT' flag are both set in the front of this function.
Yes.

>
>> + if (direct_io && nowait) {
>> + if (!inode_trylock(inode))
>> + return -EAGAIN;
>> + } else
>> + inode_lock(inode);
>>
>> /*
>> * Concurrent O_DIRECT writes are allowed with
>> @@ -2244,9 +2282,13 @@ static ssize_t ocfs2_file_write_iter(struct kiocb
> *iocb,
>> */
>> rw_level = (!direct_io || full_coherency || append_write);
>>
>> - ret = ocfs2_rw_lock(inode, rw_level);
>> + if (direct_io && nowait)
>> + ret = ocfs2_try_rw_lock(inode, rw_level);
>> + else
>> + ret = ocfs2_rw_lock(inode, rw_level);
>> if (ret < 0) {
>> - mlog_errno(ret);
>> + if (ret != -EAGAIN)
>> + mlog_errno(ret);
>> goto out_mutex;
>> }
>>
>> @@ -2260,9 +2302,13 @@ static ssize_t ocfs2_file_write_iter(struct kiocb
> *iocb,
>> * other nodes to drop their caches. Buffered I/O
>> * already does this in write_begin().
>> */
>> - ret = ocfs2_inode_lock(inode, NULL, 1);
>> + if (nowait)
>> + ret = ocfs2_try_inode_lock(inode, NULL, 1);
>> + else
>> + ret = ocfs2_inode_lock(inode, NULL, 1);
>> if (ret < 0) {
>> - mlog_errno(ret);
>> + if (ret != -EAGAIN)
>> + mlog_errno(ret);
>> goto out;
>> }
>>
>> @@ -2277,9 +2323,10 @@ static ssize_t ocfs2_file_write_iter(struct kiocb
> *iocb,
>> }
>> count = ret;
>>
>> - ret = ocfs2_prepare_inode_for_write(file, iocb->ki_pos, count);
>> + ret = ocfs2_prepare_inode_for_write(file, iocb->ki_pos, count, !nowait);
>> if (ret < 0) {
>> - mlog_errno(ret);
>> + if (ret != -EAGAIN)
>> + mlog_errno(ret);
>> goto out;
>> }
>>
>> @@ -2355,6 +2402,7 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
>> int ret = 0, rw_level = -1, lock_level = 0;
>> struct file *filp = iocb->ki_filp;
>> struct inode *inode = file_inode(filp);
>> + int nowait = iocb->ki_flags & IOCB_NOWAIT ? 1 : 0;
>>
> Here should we check if the 'IOCB_DIRECT' flag and the 'IOCB_NOWAIT' flag
> are both set?
Ok, I will look at the code again, make the code style consistent.

Thanks
Gang

>
> Thanks,
> Alex
>
>> trace_ocfs2_file_aio_read(inode, filp, filp->f_path.dentry,
>> (unsigned long long)OCFS2_I(inode)->ip_blkno,
>> @@ -2374,9 +2422,14 @@ static ssize_t ocfs2_file_read_iter(struct kiocb
> *iocb,
>> * need locks to protect pending reads from racing with truncate.
>> */
>> if (iocb->ki_flags & IOCB_DIRECT) {
>> - ret = ocfs2_rw_lock(inode, 0);
>> + if (nowait)
>> + ret = ocfs2_try_rw_lock(inode, 0);
>> + else
>> + ret = ocfs2_rw_lock(inode, 0);
>> +
>> if (ret < 0) {
>> - mlog_errno(ret);
>> + if (ret != -EAGAIN)
>> + mlog_errno(ret);
>> goto bail;
>> }
>> rw_level = 0;
>> @@ -2393,9 +2446,11 @@ static ssize_t ocfs2_file_read_iter(struct kiocb
> *iocb,
>> * like i_size. This allows the checks down below
>> * generic_file_aio_read() a chance of actually working.
>> */
>> - ret = ocfs2_inode_lock_atime(inode, filp->f_path.mnt, &lock_level);
>> + ret = ocfs2_inode_lock_atime(inode, filp->f_path.mnt, &lock_level,
>> + !nowait);
>> if (ret < 0) {
>> - mlog_errno(ret);
>> + if (ret != -EAGAIN)
>> + mlog_errno(ret);
>> goto bail;
>> }
>> ocfs2_inode_unlock(inode, lock_level);
>> diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
>> index 098f5c7..fb9a20e 100644
>> --- a/fs/ocfs2/mmap.c
>> +++ b/fs/ocfs2/mmap.c
>> @@ -184,7 +184,7 @@ int ocfs2_mmap(struct file *file, struct vm_area_struct
> *vma)
>> int ret = 0, lock_level = 0;
>>
>> ret = ocfs2_inode_lock_atime(file_inode(file),
>> - file->f_path.mnt, &lock_level);
>> + file->f_path.mnt, &lock_level, 1);
>> if (ret < 0) {
>> mlog_errno(ret);
>> goto out;
>> diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h
>> index a0b5d00..e2a11aa 100644
>> --- a/fs/ocfs2/ocfs2_trace.h
>> +++ b/fs/ocfs2/ocfs2_trace.h
>> @@ -1449,20 +1449,22 @@
>>
>> TRACE_EVENT(ocfs2_prepare_inode_for_write,
>> TP_PROTO(unsigned long long ino, unsigned long long saved_pos,
>> - unsigned long count),
>> - TP_ARGS(ino, saved_pos, count),
>> + unsigned long count, int wait),
>> + TP_ARGS(ino, saved_pos, count, wait),
>> TP_STRUCT__entry(
>> __field(unsigned long long, ino)
>> __field(unsigned long long, saved_pos)
>> __field(unsigned long, count)
>> + __field(int, wait)
>> ),
>> TP_fast_assign(
>> __entry->ino = ino;
>> __entry->saved_pos = saved_pos;
>> __entry->count = count;
>> + __entry->wait = wait;
>> ),
>> - TP_printk("%llu %llu %lu", __entry->ino,
>> - __entry->saved_pos, __entry->count)
>> + TP_printk("%llu %llu %lu %d", __entry->ino,
>> + __entry->saved_pos, __entry->count, __entry->wait)
>> );
>>
>> DEFINE_OCFS2_INT_EVENT(generic_file_aio_read_ret);
>>