2014-12-21 16:35:31

by Jaegeuk Kim

[permalink] [raw]
Subject: [PATCH 1/6] f2fs: use f2fs_io_info to clean up messy parameters during IO path

This patch cleans up parameters on IO paths.
The key idea is to use f2fs_io_info adding a parameter, block address, and then
use this structure as parameters.

Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/checkpoint.c | 24 ++++++++++---------
fs/f2fs/data.c | 66 +++++++++++++++++++++++++++++++---------------------
fs/f2fs/f2fs.h | 14 ++++++-----
fs/f2fs/inline.c | 7 +++---
fs/f2fs/node.c | 14 +++++++----
fs/f2fs/segment.c | 28 +++++++++++-----------
6 files changed, 87 insertions(+), 66 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 825158e..0ac7c39 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -50,6 +50,11 @@ struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
{
struct address_space *mapping = META_MAPPING(sbi);
struct page *page;
+ struct f2fs_io_info fio = {
+ .type = META,
+ .rw = READ_SYNC | REQ_META | REQ_PRIO,
+ .blk_addr = index,
+ };
repeat:
page = grab_cache_page(mapping, index);
if (!page) {
@@ -59,8 +64,7 @@ repeat:
if (PageUptodate(page))
goto out;

- if (f2fs_submit_page_bio(sbi, page, index,
- READ_SYNC | REQ_META | REQ_PRIO))
+ if (f2fs_submit_page_bio(sbi, page, &fio))
goto repeat;

lock_page(page);
@@ -112,14 +116,12 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type
block_t prev_blk_addr = 0;
struct page *page;
block_t blkno = start;
-
struct f2fs_io_info fio = {
.type = META,
.rw = READ_SYNC | REQ_META | REQ_PRIO
};

for (; nrpages-- > 0; blkno++) {
- block_t blk_addr;

if (!is_valid_blkaddr(sbi, blkno, type))
goto out;
@@ -130,27 +132,27 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type
NAT_BLOCK_OFFSET(NM_I(sbi)->max_nid)))
blkno = 0;
/* get nat block addr */
- blk_addr = current_nat_addr(sbi,
+ fio.blk_addr = current_nat_addr(sbi,
blkno * NAT_ENTRY_PER_BLOCK);
break;
case META_SIT:
/* get sit block addr */
- blk_addr = current_sit_addr(sbi,
+ fio.blk_addr = current_sit_addr(sbi,
blkno * SIT_ENTRY_PER_BLOCK);
- if (blkno != start && prev_blk_addr + 1 != blk_addr)
+ if (blkno != start && prev_blk_addr + 1 != fio.blk_addr)
goto out;
- prev_blk_addr = blk_addr;
+ prev_blk_addr = fio.blk_addr;
break;
case META_SSA:
case META_CP:
case META_POR:
- blk_addr = blkno;
+ fio.blk_addr = blkno;
break;
default:
BUG();
}

- page = grab_cache_page(META_MAPPING(sbi), blk_addr);
+ page = grab_cache_page(META_MAPPING(sbi), fio.blk_addr);
if (!page)
continue;
if (PageUptodate(page)) {
@@ -158,7 +160,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type
continue;
}

- f2fs_submit_page_mbio(sbi, page, blk_addr, &fio);
+ f2fs_submit_page_mbio(sbi, page, &fio);
f2fs_put_page(page, 0);
}
out:
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index caa08e4..d86f8b1 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -132,14 +132,14 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
* Return unlocked page.
*/
int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page,
- block_t blk_addr, int rw)
+ struct f2fs_io_info *fio)
{
struct bio *bio;

- trace_f2fs_submit_page_bio(page, blk_addr, rw);
+ trace_f2fs_submit_page_bio(page, fio->blk_addr, fio->rw);

/* Allocate a new bio */
- bio = __bio_alloc(sbi, blk_addr, 1, is_read_io(rw));
+ bio = __bio_alloc(sbi, fio->blk_addr, 1, is_read_io(fio->rw));

if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) {
bio_put(bio);
@@ -147,12 +147,12 @@ int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page,
return -EFAULT;
}

- submit_bio(rw, bio);
+ submit_bio(fio->rw, bio);
return 0;
}

void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page,
- block_t blk_addr, struct f2fs_io_info *fio)
+ struct f2fs_io_info *fio)
{
enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
struct f2fs_bio_info *io;
@@ -160,21 +160,21 @@ void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page,

io = is_read ? &sbi->read_io : &sbi->write_io[btype];

- verify_block_addr(sbi, blk_addr);
+ verify_block_addr(sbi, fio->blk_addr);

down_write(&io->io_rwsem);

if (!is_read)
inc_page_count(sbi, F2FS_WRITEBACK);

- if (io->bio && (io->last_block_in_bio != blk_addr - 1 ||
+ if (io->bio && (io->last_block_in_bio != fio->blk_addr - 1 ||
io->fio.rw != fio->rw))
__submit_merged_bio(io);
alloc_new:
if (io->bio == NULL) {
int bio_blocks = MAX_BIO_BLOCKS(sbi);

- io->bio = __bio_alloc(sbi, blk_addr, bio_blocks, is_read);
+ io->bio = __bio_alloc(sbi, fio->blk_addr, bio_blocks, is_read);
io->fio = *fio;
}

@@ -184,10 +184,10 @@ alloc_new:
goto alloc_new;
}

- io->last_block_in_bio = blk_addr;
+ io->last_block_in_bio = fio->blk_addr;

up_write(&io->io_rwsem);
- trace_f2fs_submit_page_mbio(page, fio->rw, fio->type, blk_addr);
+ trace_f2fs_submit_page_mbio(page, fio->rw, fio->type, fio->blk_addr);
}

/*
@@ -376,6 +376,10 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)
struct dnode_of_data dn;
struct page *page;
int err;
+ struct f2fs_io_info fio = {
+ .type = DATA,
+ .rw = sync ? READ_SYNC : READA,
+ };

page = find_get_page(mapping, index);
if (page && PageUptodate(page))
@@ -404,8 +408,8 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)
return page;
}

- err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, dn.data_blkaddr,
- sync ? READ_SYNC : READA);
+ fio.blk_addr = dn.data_blkaddr;
+ err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, &fio);
if (err)
return ERR_PTR(err);

@@ -430,7 +434,10 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index)
struct dnode_of_data dn;
struct page *page;
int err;
-
+ struct f2fs_io_info fio = {
+ .type = DATA,
+ .rw = READ_SYNC,
+ };
repeat:
page = grab_cache_page(mapping, index);
if (!page)
@@ -464,8 +471,8 @@ repeat:
return page;
}

- err = f2fs_submit_page_bio(F2FS_I_SB(inode), page,
- dn.data_blkaddr, READ_SYNC);
+ fio.blk_addr = dn.data_blkaddr;
+ err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, &fio);
if (err)
return ERR_PTR(err);

@@ -515,8 +522,12 @@ repeat:
zero_user_segment(page, 0, PAGE_CACHE_SIZE);
SetPageUptodate(page);
} else {
- err = f2fs_submit_page_bio(F2FS_I_SB(inode), page,
- dn.data_blkaddr, READ_SYNC);
+ struct f2fs_io_info fio = {
+ .type = DATA,
+ .rw = READ_SYNC,
+ .blk_addr = dn.data_blkaddr,
+ };
+ err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, &fio);
if (err)
goto put_err;

@@ -745,7 +756,6 @@ static int f2fs_read_data_pages(struct file *file,
int do_write_data_page(struct page *page, struct f2fs_io_info *fio)
{
struct inode *inode = page->mapping->host;
- block_t old_blkaddr, new_blkaddr;
struct dnode_of_data dn;
int err = 0;

@@ -754,10 +764,10 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio)
if (err)
return err;

- old_blkaddr = dn.data_blkaddr;
+ fio->blk_addr = dn.data_blkaddr;

/* This page is already truncated */
- if (old_blkaddr == NULL_ADDR)
+ if (fio->blk_addr == NULL_ADDR)
goto out_writepage;

set_page_writeback(page);
@@ -766,14 +776,14 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio)
* If current allocation needs SSR,
* it had better in-place writes for updated data.
*/
- if (unlikely(old_blkaddr != NEW_ADDR &&
+ if (unlikely(fio->blk_addr != NEW_ADDR &&
!is_cold_data(page) &&
need_inplace_update(inode))) {
- rewrite_data_page(page, old_blkaddr, fio);
+ rewrite_data_page(page, fio);
set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE);
} else {
- write_data_page(page, &dn, &new_blkaddr, fio);
- update_extent_cache(new_blkaddr, &dn);
+ write_data_page(page, &dn, fio);
+ update_extent_cache(fio->blk_addr, &dn);
set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
}
out_writepage:
@@ -1007,8 +1017,12 @@ put_next:
if (dn.data_blkaddr == NEW_ADDR) {
zero_user_segment(page, 0, PAGE_CACHE_SIZE);
} else {
- err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
- READ_SYNC);
+ struct f2fs_io_info fio = {
+ .type = DATA,
+ .rw = READ_SYNC,
+ .blk_addr = dn.data_blkaddr,
+ };
+ err = f2fs_submit_page_bio(sbi, page, &fio);
if (err)
goto fail;

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index a8ccbce..3f07b50 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -491,6 +491,7 @@ enum page_type {
struct f2fs_io_info {
enum page_type type; /* contains DATA/NODE/META/META_FLUSH */
int rw; /* contains R/RS/W/WS with REQ_META/REQ_PRIO */
+ block_t blk_addr; /* block address to be written */
};

#define is_read_io(rw) (((rw) & 1) == READ)
@@ -1414,10 +1415,10 @@ int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *);
struct page *get_sum_page(struct f2fs_sb_info *, unsigned int);
void write_meta_page(struct f2fs_sb_info *, struct page *);
void write_node_page(struct f2fs_sb_info *, struct page *,
- struct f2fs_io_info *, unsigned int, block_t, block_t *);
-void write_data_page(struct page *, struct dnode_of_data *, block_t *,
- struct f2fs_io_info *);
-void rewrite_data_page(struct page *, block_t, struct f2fs_io_info *);
+ unsigned int, struct f2fs_io_info *);
+void write_data_page(struct page *, struct dnode_of_data *,
+ struct f2fs_io_info *);
+void rewrite_data_page(struct page *, struct f2fs_io_info *);
void recover_data_page(struct f2fs_sb_info *, struct page *,
struct f2fs_summary *, block_t, block_t);
void allocate_data_block(struct f2fs_sb_info *, struct page *,
@@ -1464,8 +1465,9 @@ void destroy_checkpoint_caches(void);
* data.c
*/
void f2fs_submit_merged_bio(struct f2fs_sb_info *, enum page_type, int);
-int f2fs_submit_page_bio(struct f2fs_sb_info *, struct page *, block_t, int);
-void f2fs_submit_page_mbio(struct f2fs_sb_info *, struct page *, block_t,
+int f2fs_submit_page_bio(struct f2fs_sb_info *, struct page *,
+ struct f2fs_io_info *);
+void f2fs_submit_page_mbio(struct f2fs_sb_info *, struct page *,
struct f2fs_io_info *);
int reserve_new_block(struct dnode_of_data *);
int f2fs_reserve_block(struct dnode_of_data *, pgoff_t);
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index f2d3c58..0c3f3f9 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -79,7 +79,6 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page)
int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
{
void *src_addr, *dst_addr;
- block_t new_blk_addr;
struct f2fs_io_info fio = {
.type = DATA,
.rw = WRITE_SYNC | REQ_PRIO,
@@ -115,9 +114,9 @@ no_update:

/* write data page to try to make data consistent */
set_page_writeback(page);
-
- write_data_page(page, dn, &new_blk_addr, &fio);
- update_extent_cache(new_blk_addr, dn);
+ fio.blk_addr = dn->data_blkaddr;
+ write_data_page(page, dn, &fio);
+ update_extent_cache(fio.blk_addr, dn);
f2fs_wait_on_page_writeback(page, DATA);
if (dirty)
inode_dec_dirty_pages(dn->inode);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index bcfd67c..adc35c9 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -977,6 +977,10 @@ static int read_node_page(struct page *page, int rw)
{
struct f2fs_sb_info *sbi = F2FS_P_SB(page);
struct node_info ni;
+ struct f2fs_io_info fio = {
+ .type = NODE,
+ .rw = rw,
+ };

get_node_info(sbi, page->index, &ni);

@@ -988,7 +992,8 @@ static int read_node_page(struct page *page, int rw)
if (PageUptodate(page))
return LOCKED_PAGE;

- return f2fs_submit_page_bio(sbi, page, ni.blk_addr, rw);
+ fio.blk_addr = ni.blk_addr;
+ return f2fs_submit_page_bio(sbi, page, &fio);
}

/*
@@ -1269,7 +1274,6 @@ static int f2fs_write_node_page(struct page *page,
{
struct f2fs_sb_info *sbi = F2FS_P_SB(page);
nid_t nid;
- block_t new_addr;
struct node_info ni;
struct f2fs_io_info fio = {
.type = NODE,
@@ -1304,9 +1308,11 @@ static int f2fs_write_node_page(struct page *page,
} else {
down_read(&sbi->node_write);
}
+
set_page_writeback(page);
- write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr);
- set_node_addr(sbi, &ni, new_addr, is_fsync_dnode(page));
+ fio.blk_addr = ni.blk_addr;
+ write_node_page(sbi, page, nid, &fio);
+ set_node_addr(sbi, &ni, fio.blk_addr, is_fsync_dnode(page));
dec_page_count(sbi, F2FS_DIRTY_NODES);
up_read(&sbi->node_write);
unlock_page(page);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index c950c93..c726f86 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1182,39 +1182,39 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
}

static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
- block_t old_blkaddr, block_t *new_blkaddr,
- struct f2fs_summary *sum, struct f2fs_io_info *fio)
+ struct f2fs_summary *sum,
+ struct f2fs_io_info *fio)
{
int type = __get_segment_type(page, fio->type);

- allocate_data_block(sbi, page, old_blkaddr, new_blkaddr, sum, type);
+ allocate_data_block(sbi, page, fio->blk_addr, &fio->blk_addr, sum, type);

/* writeout dirty page into bdev */
- f2fs_submit_page_mbio(sbi, page, *new_blkaddr, fio);
+ f2fs_submit_page_mbio(sbi, page, fio);
}

void write_meta_page(struct f2fs_sb_info *sbi, struct page *page)
{
struct f2fs_io_info fio = {
.type = META,
- .rw = WRITE_SYNC | REQ_META | REQ_PRIO
+ .rw = WRITE_SYNC | REQ_META | REQ_PRIO,
+ .blk_addr = page->index,
};

set_page_writeback(page);
- f2fs_submit_page_mbio(sbi, page, page->index, &fio);
+ f2fs_submit_page_mbio(sbi, page, &fio);
}

void write_node_page(struct f2fs_sb_info *sbi, struct page *page,
- struct f2fs_io_info *fio,
- unsigned int nid, block_t old_blkaddr, block_t *new_blkaddr)
+ unsigned int nid, struct f2fs_io_info *fio)
{
struct f2fs_summary sum;
set_summary(&sum, nid, 0, 0);
- do_write_page(sbi, page, old_blkaddr, new_blkaddr, &sum, fio);
+ do_write_page(sbi, page, &sum, fio);
}

void write_data_page(struct page *page, struct dnode_of_data *dn,
- block_t *new_blkaddr, struct f2fs_io_info *fio)
+ struct f2fs_io_info *fio)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
struct f2fs_summary sum;
@@ -1223,14 +1223,12 @@ void write_data_page(struct page *page, struct dnode_of_data *dn,
f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
get_node_info(sbi, dn->nid, &ni);
set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
-
- do_write_page(sbi, page, dn->data_blkaddr, new_blkaddr, &sum, fio);
+ do_write_page(sbi, page, &sum, fio);
}

-void rewrite_data_page(struct page *page, block_t old_blkaddr,
- struct f2fs_io_info *fio)
+void rewrite_data_page(struct page *page, struct f2fs_io_info *fio)
{
- f2fs_submit_page_mbio(F2FS_P_SB(page), page, old_blkaddr, fio);
+ f2fs_submit_page_mbio(F2FS_P_SB(page), page, fio);
}

void recover_data_page(struct f2fs_sb_info *sbi,
--
2.1.1


2014-12-21 16:35:35

by Jaegeuk Kim

[permalink] [raw]
Subject: [PATCH 3/6] f2fs: add key functions for f2fs_io_tracer

This patch adds two key functions to trace process ids and IOs.
The basic idea is to
1. remain process ids, pids, in page->private.
2. show pids in IO traces.

So, later we can retrieve process information according to IO traces.

Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/trace.c | 86 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
fs/f2fs/trace.h | 18 ++++++++++++
2 files changed, 104 insertions(+)

diff --git a/fs/f2fs/trace.c b/fs/f2fs/trace.c
index 4e4a069..19f5216 100644
--- a/fs/f2fs/trace.c
+++ b/fs/f2fs/trace.c
@@ -15,10 +15,96 @@
#include "f2fs.h"
#include "trace.h"

+RADIX_TREE(pids, GFP_NOIO);
+struct last_io_info last_io;
+
+static inline void __print_last_io(void)
+{
+ if (!last_io.len)
+ return;
+
+ trace_printk("%3x:%3x %4x %-16s %2x %5x %12x %4x\n",
+ last_io.major, last_io.minor,
+ last_io.pid, "----------------",
+ last_io.type,
+ last_io.fio.rw, last_io.fio.blk_addr,
+ last_io.len);
+ memset(&last_io, 0, sizeof(last_io));
+}
+
+static int __file_type(struct inode *inode, pid_t pid)
+{
+ if (f2fs_is_atomic_file(inode))
+ return __ATOMIC_FILE;
+ else if (f2fs_is_volatile_file(inode))
+ return __VOLATILE_FILE;
+ else if (S_ISDIR(inode->i_mode))
+ return __DIR_FILE;
+ else if (inode->i_ino == F2FS_NODE_INO(F2FS_I_SB(inode)))
+ return __NODE_FILE;
+ else if (inode->i_ino == F2FS_META_INO(F2FS_I_SB(inode)))
+ return __META_FILE;
+ else if (pid)
+ return __NORMAL_FILE;
+ else
+ return __MISC_FILE;
+}
+
void f2fs_trace_pid(struct page *page)
{
+ struct inode *inode = page->mapping->host;
+ pid_t pid = task_pid_nr(current);
+ void *p;
+
+ page->private = pid;
+
+ p = radix_tree_lookup(&pids, pid);
+ if (p == current)
+ return;
+ if (p)
+ radix_tree_delete(&pids, pid);
+
+ f2fs_radix_tree_insert(&pids, pid, current);
+
+ trace_printk("%3x:%3x %4x %-16s\n",
+ MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev),
+ pid, current->comm);
+
}

void f2fs_trace_ios(struct page *page, struct f2fs_io_info *fio, int flush)
{
+ struct inode *inode;
+ pid_t pid;
+ int major, minor;
+
+ if (flush) {
+ __print_last_io();
+ return;
+ }
+
+ inode = page->mapping->host;
+ pid = page_private(page);
+
+ major = MAJOR(inode->i_sb->s_dev);
+ minor = MINOR(inode->i_sb->s_dev);
+
+ if (last_io.major == major && last_io.minor == minor &&
+ last_io.pid == pid &&
+ last_io.type == __file_type(inode, pid) &&
+ last_io.fio.rw == fio->rw &&
+ last_io.fio.blk_addr + last_io.len == fio->blk_addr) {
+ last_io.len++;
+ return;
+ }
+
+ __print_last_io();
+
+ last_io.major = major;
+ last_io.minor = minor;
+ last_io.pid = pid;
+ last_io.type = __file_type(inode, pid);
+ last_io.fio = *fio;
+ last_io.len = 1;
+ return;
}
diff --git a/fs/f2fs/trace.h b/fs/f2fs/trace.h
index 08856a9..aa6663b 100644
--- a/fs/f2fs/trace.h
+++ b/fs/f2fs/trace.h
@@ -14,6 +14,24 @@
#ifdef CONFIG_F2FS_IO_TRACE
#include <trace/events/f2fs.h>

+enum file_type {
+ __NORMAL_FILE,
+ __DIR_FILE,
+ __NODE_FILE,
+ __META_FILE,
+ __ATOMIC_FILE,
+ __VOLATILE_FILE,
+ __MISC_FILE,
+};
+
+struct last_io_info {
+ int major, minor;
+ pid_t pid;
+ enum file_type type;
+ struct f2fs_io_info fio;
+ block_t len;
+};
+
extern void f2fs_trace_pid(struct page *);
extern void f2fs_trace_ios(struct page *, struct f2fs_io_info *, int);
#else
--
2.1.1

2014-12-21 16:35:39

by Jaegeuk Kim

[permalink] [raw]
Subject: [PATCH 6/6] f2fs: avoid double lock for cp_rwsem

The __f2fs_add_link is covered by cp_rwsem all the time.
This calls init_inode_metadata, which conducts some acl operations including
memory allocation with GFP_KERNEL previously.
But, under memory pressure, f2fs_write_data_page can be called, which also
grabs cp_mutex too.
Basically, it's safe since down_read was used in both of cases, but it'd
better avoid this situation in advance.

Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/acl.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index 1ccb26b..7f12d28 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -62,7 +62,7 @@ static struct posix_acl *f2fs_acl_from_disk(const char *value, size_t size)
if (count == 0)
return NULL;

- acl = posix_acl_alloc(count, GFP_KERNEL);
+ acl = posix_acl_alloc(count, GFP_NOFS);
if (!acl)
return ERR_PTR(-ENOMEM);

@@ -116,7 +116,7 @@ static void *f2fs_acl_to_disk(const struct posix_acl *acl, size_t *size)
int i;

f2fs_acl = kmalloc(sizeof(struct f2fs_acl_header) + acl->a_count *
- sizeof(struct f2fs_acl_entry), GFP_KERNEL);
+ sizeof(struct f2fs_acl_entry), GFP_NOFS);
if (!f2fs_acl)
return ERR_PTR(-ENOMEM);

--
2.1.1

2014-12-21 16:36:04

by Jaegeuk Kim

[permalink] [raw]
Subject: [PATCH 5/6] f2fs: activate f2fs_trace_ios

This patch activates f2fs_trace_ios.

Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/data.c | 3 +++
fs/f2fs/file.c | 2 ++
fs/f2fs/super.c | 2 ++
3 files changed, 7 insertions(+)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index d86f8b1..20aa3c3 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -22,6 +22,7 @@
#include "f2fs.h"
#include "node.h"
#include "segment.h"
+#include "trace.h"
#include <trace/events/f2fs.h>

static void f2fs_read_end_io(struct bio *bio, int err)
@@ -137,6 +138,7 @@ int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page,
struct bio *bio;

trace_f2fs_submit_page_bio(page, fio->blk_addr, fio->rw);
+ f2fs_trace_ios(page, fio, 0);

/* Allocate a new bio */
bio = __bio_alloc(sbi, fio->blk_addr, 1, is_read_io(fio->rw));
@@ -185,6 +187,7 @@ alloc_new:
}

io->last_block_in_bio = fio->blk_addr;
+ f2fs_trace_ios(page, fio, 0);

up_write(&io->io_rwsem);
trace_f2fs_submit_page_mbio(page, fio->rw, fio->type, fio->blk_addr);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 5139f90..f172ddc4 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -26,6 +26,7 @@
#include "segment.h"
#include "xattr.h"
#include "acl.h"
+#include "trace.h"
#include <trace/events/f2fs.h>

static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
@@ -265,6 +266,7 @@ flush_out:
ret = f2fs_issue_flush(sbi);
out:
trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
+ f2fs_trace_ios(NULL, NULL, 1);
return ret;
}

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index f71421d..cf68e44 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -30,6 +30,7 @@
#include "segment.h"
#include "xattr.h"
#include "gc.h"
+#include "trace.h"

#define CREATE_TRACE_POINTS
#include <trace/events/f2fs.h>
@@ -493,6 +494,7 @@ int f2fs_sync_fs(struct super_block *sb, int sync)
} else {
f2fs_balance_fs(sbi);
}
+ f2fs_trace_ios(NULL, NULL, 1);

return 0;
}
--
2.1.1

2014-12-21 16:36:29

by Jaegeuk Kim

[permalink] [raw]
Subject: [PATCH 2/6] f2fs: add f2fs_io_tracer support

This patch adds:
o initial trace.c and trace.h with skeleton functions
o Kconfig and Makefile to activate this feature

Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/Kconfig | 10 ++++++++++
fs/f2fs/Makefile | 1 +
fs/f2fs/trace.c | 24 ++++++++++++++++++++++++
fs/f2fs/trace.h | 24 ++++++++++++++++++++++++
4 files changed, 59 insertions(+)
create mode 100644 fs/f2fs/trace.c
create mode 100644 fs/f2fs/trace.h

diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig
index 736a348..94e2d2f 100644
--- a/fs/f2fs/Kconfig
+++ b/fs/f2fs/Kconfig
@@ -71,3 +71,13 @@ config F2FS_CHECK_FS
Enables BUG_ONs which check the filesystem consistency in runtime.

If you want to improve the performance, say N.
+
+config F2FS_IO_TRACE
+ bool "F2FS IO tracer"
+ depends on F2FS_FS
+ depends on FUNCTION_TRACER
+ help
+ F2FS IO trace is based on a function trace, which gathers process
+ information and block IO patterns in the filesystem level.
+
+ If unsure, say N.
diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile
index 2e35da1..d923977 100644
--- a/fs/f2fs/Makefile
+++ b/fs/f2fs/Makefile
@@ -5,3 +5,4 @@ f2fs-y += checkpoint.o gc.o data.o node.o segment.o recovery.o
f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o
f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o
f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o
+f2fs-$(CONFIG_F2FS_IO_TRACE) += trace.o
diff --git a/fs/f2fs/trace.c b/fs/f2fs/trace.c
new file mode 100644
index 0000000..4e4a069
--- /dev/null
+++ b/fs/f2fs/trace.c
@@ -0,0 +1,24 @@
+/*
+ * f2fs IO tracer
+ *
+ * Copyright (c) 2014 Motorola Mobility
+ * Copyright (c) 2014 Jaegeuk Kim <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/fs.h>
+#include <linux/f2fs_fs.h>
+#include <linux/sched.h>
+
+#include "f2fs.h"
+#include "trace.h"
+
+void f2fs_trace_pid(struct page *page)
+{
+}
+
+void f2fs_trace_ios(struct page *page, struct f2fs_io_info *fio, int flush)
+{
+}
diff --git a/fs/f2fs/trace.h b/fs/f2fs/trace.h
new file mode 100644
index 0000000..08856a9
--- /dev/null
+++ b/fs/f2fs/trace.h
@@ -0,0 +1,24 @@
+/*
+ * f2fs IO tracer
+ *
+ * Copyright (c) 2014 Motorola Mobility
+ * Copyright (c) 2014 Jaegeuk Kim <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __F2FS_TRACE_H__
+#define __F2FS_TRACE_H__
+
+#ifdef CONFIG_F2FS_IO_TRACE
+#include <trace/events/f2fs.h>
+
+extern void f2fs_trace_pid(struct page *);
+extern void f2fs_trace_ios(struct page *, struct f2fs_io_info *, int);
+#else
+#define f2fs_trace_pid(p)
+#define f2fs_trace_ios(p, i, n)
+
+#endif
+#endif /* __F2FS_TRACE_H__ */
--
2.1.1

2014-12-21 16:36:28

by Jaegeuk Kim

[permalink] [raw]
Subject: [PATCH 4/6] f2fs: activate f2fs_trace_pid

This patch activates f2fs_trace_pid.

Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/checkpoint.c | 3 +++
fs/f2fs/node.c | 2 ++
fs/f2fs/segment.c | 2 ++
3 files changed, 7 insertions(+)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 0ac7c39..3999933 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -20,6 +20,7 @@
#include "f2fs.h"
#include "node.h"
#include "segment.h"
+#include "trace.h"
#include <trace/events/f2fs.h>

static struct kmem_cache *ino_entry_slab;
@@ -301,6 +302,7 @@ static int f2fs_set_meta_page_dirty(struct page *page)
if (!PageDirty(page)) {
__set_page_dirty_nobuffers(page);
inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META);
+ f2fs_trace_pid(page);
return 1;
}
return 0;
@@ -712,6 +714,7 @@ void update_dirty_page(struct inode *inode, struct page *page)
kmem_cache_free(inode_entry_slab, new);
out:
SetPagePrivate(page);
+ f2fs_trace_pid(page);
}

void add_dirty_dir_inode(struct inode *inode)
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index adc35c9..a7cb0db 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -19,6 +19,7 @@
#include "f2fs.h"
#include "node.h"
#include "segment.h"
+#include "trace.h"
#include <trace/events/f2fs.h>

#define on_build_free_nids(nmi) mutex_is_locked(&nm_i->build_lock)
@@ -1362,6 +1363,7 @@ static int f2fs_set_node_page_dirty(struct page *page)
__set_page_dirty_nobuffers(page);
inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES);
SetPagePrivate(page);
+ f2fs_trace_pid(page);
return 1;
}
return 0;
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index c726f86..b688991 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -20,6 +20,7 @@
#include "f2fs.h"
#include "segment.h"
#include "node.h"
+#include "trace.h"
#include <trace/events/f2fs.h>

#define __reverse_ffz(x) __reverse_ffs(~(x))
@@ -181,6 +182,7 @@ void register_inmem_page(struct inode *inode, struct page *page)
int err;

SetPagePrivate(page);
+ f2fs_trace_pid(page);

new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);

--
2.1.1

2014-12-23 07:01:16

by Chao Yu

[permalink] [raw]
Subject: RE: [f2fs-dev] [PATCH 1/6] f2fs: use f2fs_io_info to clean up messy parameters during IO path

> -----Original Message-----
> From: Jaegeuk Kim [mailto:[email protected]]
> Sent: Monday, December 22, 2014 12:35 AM
> To: [email protected]; [email protected];
> [email protected]
> Cc: Jaegeuk Kim
> Subject: [f2fs-dev] [PATCH 1/6] f2fs: use f2fs_io_info to clean up messy parameters during IO
> path
>
> This patch cleans up parameters on IO paths.
> The key idea is to use f2fs_io_info adding a parameter, block address, and then
> use this structure as parameters.
>

Nice clean work.

> Signed-off-by: Jaegeuk Kim <[email protected]>

Reviewed-by: Chao Yu <[email protected]>

2014-12-23 07:02:52

by Chao Yu

[permalink] [raw]
Subject: RE: [f2fs-dev] [PATCH 6/6] f2fs: avoid double lock for cp_rwsem

Hi Jaegeuk,

> -----Original Message-----
> From: Jaegeuk Kim [mailto:[email protected]]
> Sent: Monday, December 22, 2014 12:35 AM
> To: [email protected]; [email protected];
> [email protected]
> Cc: Jaegeuk Kim
> Subject: [f2fs-dev] [PATCH 6/6] f2fs: avoid double lock for cp_rwsem
>
> The __f2fs_add_link is covered by cp_rwsem all the time.
> This calls init_inode_metadata, which conducts some acl operations including
> memory allocation with GFP_KERNEL previously.
> But, under memory pressure, f2fs_write_data_page can be called, which also
> grabs cp_mutex too.

grabs cp_rwsem.

> Basically, it's safe since down_read was used in both of cases, but it'd
> better avoid this situation in advance.

If checkpoint intend to hold down_write in the middle of the two down_read
caller, it will cause a deadlock, so it's not safe.

Could you update the comments?

>
> Signed-off-by: Jaegeuk Kim <[email protected]>

Anyway, Nice catch and please add:

Reviewed-by: Chao Yu <[email protected]>

> ---
> fs/f2fs/acl.c | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
> index 1ccb26b..7f12d28 100644
> --- a/fs/f2fs/acl.c
> +++ b/fs/f2fs/acl.c
> @@ -62,7 +62,7 @@ static struct posix_acl *f2fs_acl_from_disk(const char *value, size_t size)
> if (count == 0)
> return NULL;
>
> - acl = posix_acl_alloc(count, GFP_KERNEL);
> + acl = posix_acl_alloc(count, GFP_NOFS);
> if (!acl)
> return ERR_PTR(-ENOMEM);
>
> @@ -116,7 +116,7 @@ static void *f2fs_acl_to_disk(const struct posix_acl *acl, size_t *size)
> int i;
>
> f2fs_acl = kmalloc(sizeof(struct f2fs_acl_header) + acl->a_count *
> - sizeof(struct f2fs_acl_entry), GFP_KERNEL);
> + sizeof(struct f2fs_acl_entry), GFP_NOFS);
> if (!f2fs_acl)
> return ERR_PTR(-ENOMEM);
>
> --
> 2.1.1
>
>
> ------------------------------------------------------------------------------
> Download BIRT iHub F-Type - The Free Enterprise-Grade BIRT Server
> from Actuate! Instantly Supercharge Your Business Reports and Dashboards
> with Interactivity, Sharing, Native Excel Exports, App Integration & more
> Get technology previously reserved for billion-dollar corporations, FREE
> http://pubads.g.doubleclick.net/gampad/clk?id=164703151&iu=/4140/ostg.clktrk
> _______________________________________________
> Linux-f2fs-devel mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

2014-12-23 07:40:45

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 6/6] f2fs: avoid double lock for cp_rwsem

Hi,

On Tue, Dec 23, 2014 at 03:01:36PM +0800, Chao Yu wrote:
> Hi Jaegeuk,
>
> > -----Original Message-----
> > From: Jaegeuk Kim [mailto:[email protected]]
> > Sent: Monday, December 22, 2014 12:35 AM
> > To: [email protected]; [email protected];
> > [email protected]
> > Cc: Jaegeuk Kim
> > Subject: [f2fs-dev] [PATCH 6/6] f2fs: avoid double lock for cp_rwsem
> >
> > The __f2fs_add_link is covered by cp_rwsem all the time.
> > This calls init_inode_metadata, which conducts some acl operations including
> > memory allocation with GFP_KERNEL previously.
> > But, under memory pressure, f2fs_write_data_page can be called, which also
> > grabs cp_mutex too.
>
> grabs cp_rwsem.

Got it.

>
> > Basically, it's safe since down_read was used in both of cases, but it'd
> > better avoid this situation in advance.
>
> If checkpoint intend to hold down_write in the middle of the two down_read
> caller, it will cause a deadlock, so it's not safe.

What I meant was like this.
- down_read
- down_read
- up_read
- up_read

This should be safe, right?
Thanks,

>
> Could you update the comments?
>
> >
> > Signed-off-by: Jaegeuk Kim <[email protected]>
>
> Anyway, Nice catch and please add:
>
> Reviewed-by: Chao Yu <[email protected]>
>
> > ---
> > fs/f2fs/acl.c | 4 ++--
> > 1 file changed, 2 insertions(+), 2 deletions(-)
> >
> > diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
> > index 1ccb26b..7f12d28 100644
> > --- a/fs/f2fs/acl.c
> > +++ b/fs/f2fs/acl.c
> > @@ -62,7 +62,7 @@ static struct posix_acl *f2fs_acl_from_disk(const char *value, size_t size)
> > if (count == 0)
> > return NULL;
> >
> > - acl = posix_acl_alloc(count, GFP_KERNEL);
> > + acl = posix_acl_alloc(count, GFP_NOFS);
> > if (!acl)
> > return ERR_PTR(-ENOMEM);
> >
> > @@ -116,7 +116,7 @@ static void *f2fs_acl_to_disk(const struct posix_acl *acl, size_t *size)
> > int i;
> >
> > f2fs_acl = kmalloc(sizeof(struct f2fs_acl_header) + acl->a_count *
> > - sizeof(struct f2fs_acl_entry), GFP_KERNEL);
> > + sizeof(struct f2fs_acl_entry), GFP_NOFS);
> > if (!f2fs_acl)
> > return ERR_PTR(-ENOMEM);
> >
> > --
> > 2.1.1
> >
> >
> > ------------------------------------------------------------------------------
> > Download BIRT iHub F-Type - The Free Enterprise-Grade BIRT Server
> > from Actuate! Instantly Supercharge Your Business Reports and Dashboards
> > with Interactivity, Sharing, Native Excel Exports, App Integration & more
> > Get technology previously reserved for billion-dollar corporations, FREE
> > http://pubads.g.doubleclick.net/gampad/clk?id=164703151&iu=/4140/ostg.clktrk
> > _______________________________________________
> > Linux-f2fs-devel mailing list
> > [email protected]
> > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

2014-12-23 08:59:41

by Chao Yu

[permalink] [raw]
Subject: RE: [f2fs-dev] [PATCH 6/6] f2fs: avoid double lock for cp_rwsem

Hi Jaegeuk,

> -----Original Message-----
> From: Jaegeuk Kim [mailto:[email protected]]
> Sent: Tuesday, December 23, 2014 3:41 PM
> To: Chao Yu
> Cc: [email protected]; [email protected];
> [email protected]
> Subject: Re: [f2fs-dev] [PATCH 6/6] f2fs: avoid double lock for cp_rwsem
>
> Hi,
>
> On Tue, Dec 23, 2014 at 03:01:36PM +0800, Chao Yu wrote:
> > Hi Jaegeuk,
> >
> > > -----Original Message-----
> > > From: Jaegeuk Kim [mailto:[email protected]]
> > > Sent: Monday, December 22, 2014 12:35 AM
> > > To: [email protected]; [email protected];
> > > [email protected]
> > > Cc: Jaegeuk Kim
> > > Subject: [f2fs-dev] [PATCH 6/6] f2fs: avoid double lock for cp_rwsem
> > >
> > > The __f2fs_add_link is covered by cp_rwsem all the time.
> > > This calls init_inode_metadata, which conducts some acl operations including
> > > memory allocation with GFP_KERNEL previously.
> > > But, under memory pressure, f2fs_write_data_page can be called, which also
> > > grabs cp_mutex too.
> >
> > grabs cp_rwsem.
>
> Got it.
>
> >
> > > Basically, it's safe since down_read was used in both of cases, but it'd
> > > better avoid this situation in advance.
> >
> > If checkpoint intend to hold down_write in the middle of the two down_read
> > caller, it will cause a deadlock, so it's not safe.
>
> What I meant was like this.
> - down_read

If someone else intends to hold down_write (in checkpoint()) here,
we will encounter deadlock.

Because this writer will add itself to inner waiter list of spinlock as there
is an exist reader had held this lock, then the following reader will start to
spin as it detect that waiter list is not empty. Then deadlock occurred.

I guess this design can effectively avoid starve for the writer when encounters
a large number of readers.

Thanks

> - down_read
> - up_read
> - up_read
>
> This should be safe, right?
> Thanks,
>
> >
> > Could you update the comments?
> >
> > >
> > > Signed-off-by: Jaegeuk Kim <[email protected]>
> >
> > Anyway, Nice catch and please add:
> >
> > Reviewed-by: Chao Yu <[email protected]>
> >
> > > ---
> > > fs/f2fs/acl.c | 4 ++--
> > > 1 file changed, 2 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
> > > index 1ccb26b..7f12d28 100644
> > > --- a/fs/f2fs/acl.c
> > > +++ b/fs/f2fs/acl.c
> > > @@ -62,7 +62,7 @@ static struct posix_acl *f2fs_acl_from_disk(const char *value, size_t
> size)
> > > if (count == 0)
> > > return NULL;
> > >
> > > - acl = posix_acl_alloc(count, GFP_KERNEL);
> > > + acl = posix_acl_alloc(count, GFP_NOFS);
> > > if (!acl)
> > > return ERR_PTR(-ENOMEM);
> > >
> > > @@ -116,7 +116,7 @@ static void *f2fs_acl_to_disk(const struct posix_acl *acl, size_t *size)
> > > int i;
> > >
> > > f2fs_acl = kmalloc(sizeof(struct f2fs_acl_header) + acl->a_count *
> > > - sizeof(struct f2fs_acl_entry), GFP_KERNEL);
> > > + sizeof(struct f2fs_acl_entry), GFP_NOFS);
> > > if (!f2fs_acl)
> > > return ERR_PTR(-ENOMEM);
> > >
> > > --
> > > 2.1.1
> > >
> > >
> > > ------------------------------------------------------------------------------
> > > Download BIRT iHub F-Type - The Free Enterprise-Grade BIRT Server
> > > from Actuate! Instantly Supercharge Your Business Reports and Dashboards
> > > with Interactivity, Sharing, Native Excel Exports, App Integration & more
> > > Get technology previously reserved for billion-dollar corporations, FREE
> > > http://pubads.g.doubleclick.net/gampad/clk?id=164703151&iu=/4140/ostg.clktrk
> > > _______________________________________________
> > > Linux-f2fs-devel mailing list
> > > [email protected]
> > > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

2014-12-23 19:05:49

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 6/6] f2fs: avoid double lock for cp_rwsem

On Tue, Dec 23, 2014 at 04:58:46PM +0800, Chao Yu wrote:
> Hi Jaegeuk,
>
> > -----Original Message-----
> > From: Jaegeuk Kim [mailto:[email protected]]
> > Sent: Tuesday, December 23, 2014 3:41 PM
> > To: Chao Yu
> > Cc: [email protected]; [email protected];
> > [email protected]
> > Subject: Re: [f2fs-dev] [PATCH 6/6] f2fs: avoid double lock for cp_rwsem
> >
> > Hi,
> >
> > On Tue, Dec 23, 2014 at 03:01:36PM +0800, Chao Yu wrote:
> > > Hi Jaegeuk,
> > >
> > > > -----Original Message-----
> > > > From: Jaegeuk Kim [mailto:[email protected]]
> > > > Sent: Monday, December 22, 2014 12:35 AM
> > > > To: [email protected]; [email protected];
> > > > [email protected]
> > > > Cc: Jaegeuk Kim
> > > > Subject: [f2fs-dev] [PATCH 6/6] f2fs: avoid double lock for cp_rwsem
> > > >
> > > > The __f2fs_add_link is covered by cp_rwsem all the time.
> > > > This calls init_inode_metadata, which conducts some acl operations including
> > > > memory allocation with GFP_KERNEL previously.
> > > > But, under memory pressure, f2fs_write_data_page can be called, which also
> > > > grabs cp_mutex too.
> > >
> > > grabs cp_rwsem.
> >
> > Got it.
> >
> > >
> > > > Basically, it's safe since down_read was used in both of cases, but it'd
> > > > better avoid this situation in advance.
> > >
> > > If checkpoint intend to hold down_write in the middle of the two down_read
> > > caller, it will cause a deadlock, so it's not safe.
> >
> > What I meant was like this.
> > - down_read
>
> If someone else intends to hold down_write (in checkpoint()) here,
> we will encounter deadlock.
>
> Because this writer will add itself to inner waiter list of spinlock as there
> is an exist reader had held this lock, then the following reader will start to
> spin as it detect that waiter list is not empty. Then deadlock occurred.

Ah ha. I see. :)
I'll change the description.

Thanks,

>
> I guess this design can effectively avoid starve for the writer when encounters
> a large number of readers.
>
> Thanks
>
> > - down_read
> > - up_read
> > - up_read
> >
> > This should be safe, right?
> > Thanks,
> >
> > >
> > > Could you update the comments?
> > >
> > > >
> > > > Signed-off-by: Jaegeuk Kim <[email protected]>
> > >
> > > Anyway, Nice catch and please add:
> > >
> > > Reviewed-by: Chao Yu <[email protected]>
> > >
> > > > ---
> > > > fs/f2fs/acl.c | 4 ++--
> > > > 1 file changed, 2 insertions(+), 2 deletions(-)
> > > >
> > > > diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
> > > > index 1ccb26b..7f12d28 100644
> > > > --- a/fs/f2fs/acl.c
> > > > +++ b/fs/f2fs/acl.c
> > > > @@ -62,7 +62,7 @@ static struct posix_acl *f2fs_acl_from_disk(const char *value, size_t
> > size)
> > > > if (count == 0)
> > > > return NULL;
> > > >
> > > > - acl = posix_acl_alloc(count, GFP_KERNEL);
> > > > + acl = posix_acl_alloc(count, GFP_NOFS);
> > > > if (!acl)
> > > > return ERR_PTR(-ENOMEM);
> > > >
> > > > @@ -116,7 +116,7 @@ static void *f2fs_acl_to_disk(const struct posix_acl *acl, size_t *size)
> > > > int i;
> > > >
> > > > f2fs_acl = kmalloc(sizeof(struct f2fs_acl_header) + acl->a_count *
> > > > - sizeof(struct f2fs_acl_entry), GFP_KERNEL);
> > > > + sizeof(struct f2fs_acl_entry), GFP_NOFS);
> > > > if (!f2fs_acl)
> > > > return ERR_PTR(-ENOMEM);
> > > >
> > > > --
> > > > 2.1.1
> > > >
> > > >
> > > > ------------------------------------------------------------------------------
> > > > Download BIRT iHub F-Type - The Free Enterprise-Grade BIRT Server
> > > > from Actuate! Instantly Supercharge Your Business Reports and Dashboards
> > > > with Interactivity, Sharing, Native Excel Exports, App Integration & more
> > > > Get technology previously reserved for billion-dollar corporations, FREE
> > > > http://pubads.g.doubleclick.net/gampad/clk?id=164703151&iu=/4140/ostg.clktrk
> > > > _______________________________________________
> > > > Linux-f2fs-devel mailing list
> > > > [email protected]
> > > > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

2014-12-23 19:08:49

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 6/6 v2] f2fs: avoid double lock for cp_rwsem

Change log from v1:
o fix description

>From fc897d61c01acc19dba434cd4aa29baeb1537b60 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <[email protected]>
Date: Thu, 18 Dec 2014 19:32:36 -0800
Subject: [PATCH] f2fs: avoid double lock for cp_rwsem

The __f2fs_add_link is covered by cp_rwsem all the time.
This calls init_inode_metadata, which conducts some acl operations including
memory allocation with GFP_KERNEL previously.
But, under memory pressure, f2fs_write_data_page can be called, which also
grabs cp_rwsem too.

In this case, this incurs a deadlock pointed by Chao.
Thread #1 Thread #2
down_read
down_write
down_read
-> here down_read should wait forever.

Reviewed-by: Chao Yu <[email protected]>
Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/acl.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index 1ccb26b..7f12d28 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -62,7 +62,7 @@ static struct posix_acl *f2fs_acl_from_disk(const char *value, size_t size)
if (count == 0)
return NULL;

- acl = posix_acl_alloc(count, GFP_KERNEL);
+ acl = posix_acl_alloc(count, GFP_NOFS);
if (!acl)
return ERR_PTR(-ENOMEM);

@@ -116,7 +116,7 @@ static void *f2fs_acl_to_disk(const struct posix_acl *acl, size_t *size)
int i;

f2fs_acl = kmalloc(sizeof(struct f2fs_acl_header) + acl->a_count *
- sizeof(struct f2fs_acl_entry), GFP_KERNEL);
+ sizeof(struct f2fs_acl_entry), GFP_NOFS);
if (!f2fs_acl)
return ERR_PTR(-ENOMEM);

--
2.1.1