2013-04-26 09:45:44

by Steven Whitehouse

[permalink] [raw]
Subject: GFS2: Pre-pull patch posting (merge window)

Hi,

Since the merge window is coming up soon, I'm posting the content of
the GFS2 -nmw tree as usual. There is not a whole lot of change this
time - there are some further changes which are in the works, but those
will be held over until next time.

Here there are some clean ups to inode creation, the addition of an
origin (local or remote) indicator to glock demote requests, removal
of one of the remaining GFP_NOFAIL allocations during log flushes,
one minor clean up, and a one liner bug fix,

Steve.


2013-04-26 09:45:48

by Steven Whitehouse

[permalink] [raw]
Subject: [PATCH 2/8] GFS2: Remove gfs2_refresh_inode from inode creation path

The original method for creating inodes used in GFS2 was to fill
out a buffer, with all the information, and then to read that
buffer into the in-core inode, using gfs2_refresh_inode()

The problem with this approach is that all the inode's fields
need to be calculated ahead of time, and were stored in various
variables making the code rather complicated.

The new approach is simply to allocate the in-core inode earlier
and fill in as many fields as possible ahead of time. These can
then be used to initilise the on disk representation. The
code has been working towards the point where it is possible
to remove gfs2_refresh_inode() because all the fields are
correctly initialised ahead of time. We've now reached that
milestone, and have reversed the order of setting up the in
core and on disk inodes.

Signed-off-by: Steven Whitehouse <[email protected]>

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index cf35155..6e30fd1 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1279,19 +1279,6 @@ void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs)
gfs2_glock_dq(&ghs[num_gh]);
}

-/**
- * gfs2_glock_dq_uninit_m - release multiple glocks
- * @num_gh: the number of structures
- * @ghs: an array of struct gfs2_holder structures
- *
- */
-
-void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs)
-{
- while (num_gh--)
- gfs2_glock_dq_uninit(&ghs[num_gh]);
-}
-
void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
{
unsigned long delay = 0;
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index fd580b7..69f66e3 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -201,7 +201,6 @@ extern int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number,
struct gfs2_holder *gh);
extern int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs);
extern void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs);
-extern void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs);
extern int gfs2_dump_glock(struct seq_file *seq, const struct gfs2_glock *gl);
#define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { gfs2_dump_glock(NULL, gl); BUG(); } } while(0)
extern __printf(2, 3)
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index df51557..cb53b4f 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -448,7 +448,6 @@ static void gfs2_init_dir(struct buffer_head *dibh,
static void init_dinode(struct gfs2_inode *dip, struct gfs2_inode *ip,
const char *symname, struct buffer_head **bhp)
{
- struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
struct gfs2_dinode *di;
struct buffer_head *dibh;

@@ -465,7 +464,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_inode *ip,
di->di_gid = cpu_to_be32(i_gid_read(&ip->i_inode));
di->di_nlink = 0;
di->di_size = cpu_to_be64(ip->i_inode.i_size);
- di->di_blocks = cpu_to_be64(1);
+ di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
di->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
di->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec);
di->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec);
@@ -473,33 +472,23 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_inode *ip,
di->di_minor = cpu_to_be32(MINOR(ip->i_inode.i_rdev));
di->di_goal_meta = di->di_goal_data = cpu_to_be64(ip->i_no_addr);
di->di_generation = cpu_to_be64(ip->i_generation);
- di->di_flags = 0;
+ di->di_flags = cpu_to_be32(ip->i_diskflags);
di->__pad1 = 0;
di->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) ? GFS2_FORMAT_DE : 0);
- di->di_height = 0;
+ di->di_height = cpu_to_be16(ip->i_height);
di->__pad2 = 0;
di->__pad3 = 0;
- di->di_depth = 0;
- di->di_entries = 0;
+ di->di_depth = cpu_to_be16(ip->i_depth);
+ di->di_entries = cpu_to_be32(ip->i_entries);
memset(&di->__pad4, 0, sizeof(di->__pad4));
- di->di_eattr = 0;
+ di->di_eattr = cpu_to_be64(ip->i_eattr);
di->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec);
di->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec);
di->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec);
memset(&di->di_reserved, 0, sizeof(di->di_reserved));

switch(ip->i_inode.i_mode & S_IFMT) {
- case S_IFREG:
- if ((dip->i_diskflags & GFS2_DIF_INHERIT_JDATA) ||
- gfs2_tune_get(sdp, gt_new_files_jdata))
- di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA);
- break;
case S_IFDIR:
- di->di_flags |= cpu_to_be32(dip->i_diskflags &
- GFS2_DIF_INHERIT_JDATA);
- di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA);
- di->di_size = cpu_to_be64(sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode));
- di->di_entries = cpu_to_be32(2);
gfs2_init_dir(dibh, dip);
break;
case S_IFLNK:
@@ -516,15 +505,10 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
struct gfs2_inode *ip, int arq)
{
struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
- struct buffer_head *dibh;
int error;

- error = gfs2_quota_lock(dip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
- if (error)
- return error;
-
if (arq) {
- error = gfs2_quota_check(dip, dip->i_inode.i_uid, dip->i_inode.i_gid);
+ error = gfs2_quota_lock_check(dip);
if (error)
goto fail_quota_locks;

@@ -548,14 +532,8 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
if (error)
goto fail_end_trans;

- error = gfs2_meta_inode_buffer(ip, &dibh);
- if (error)
- goto fail_end_trans;
set_nlink(&ip->i_inode, S_ISDIR(ip->i_inode.i_mode) ? 2 : 1);
- gfs2_trans_add_meta(ip->i_gl, dibh);
- gfs2_dinode_out(ip, dibh->b_data);
- brelse(dibh);
- return 0;
+ mark_inode_dirty(&ip->i_inode);

fail_end_trans:
gfs2_trans_end(sdp);
@@ -655,13 +633,33 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
if (error)
goto fail_free_inode;

- set_bit(GIF_INVALID, &ip->i_flags);
inode->i_mode = mode;
inode->i_rdev = dev;
inode->i_size = size;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ gfs2_set_inode_blocks(inode, 1);
munge_mode_uid_gid(dip, inode);
ip->i_goal = dip->i_goal;
+ ip->i_diskflags = 0;
+ ip->i_eattr = 0;
+ ip->i_height = 0;
+ ip->i_depth = 0;
+ ip->i_entries = 0;
+
+ switch(mode & S_IFMT) {
+ case S_IFREG:
+ if ((dip->i_diskflags & GFS2_DIF_INHERIT_JDATA) ||
+ gfs2_tune_get(sdp, gt_new_files_jdata))
+ ip->i_diskflags |= GFS2_DIF_JDATA;
+ gfs2_set_aops(inode);
+ break;
+ case S_IFDIR:
+ ip->i_diskflags |= (dip->i_diskflags & GFS2_DIF_INHERIT_JDATA);
+ ip->i_diskflags |= GFS2_DIF_JDATA;
+ ip->i_entries = 2;
+ break;
+ }
+ gfs2_set_inode_flags(inode);

if ((GFS2_I(sdp->sd_root_dir->d_inode) == dip) ||
(dip->i_diskflags & GFS2_DIF_TOPDIR))
@@ -700,10 +698,6 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
gfs2_set_iop(inode);
insert_inode_hash(inode);

- error = gfs2_inode_refresh(ip);
- if (error)
- goto fail_gunlock3;
-
error = gfs2_acl_create(dip, inode);
if (error)
goto fail_gunlock3;
@@ -716,14 +710,9 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
if (error)
goto fail_gunlock3;

- if (bh)
- brelse(bh);
-
- gfs2_trans_end(sdp);
- gfs2_inplace_release(dip);
- gfs2_quota_unlock(dip);
- mark_inode_dirty(inode);
- gfs2_glock_dq_uninit_m(2, ghs);
+ brelse(bh);
+ gfs2_glock_dq_uninit(ghs);
+ gfs2_glock_dq_uninit(ghs + 1);
d_instantiate(dentry, inode);
return 0;

@@ -1126,7 +1115,9 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry,

static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
{
- return gfs2_create_inode(dir, dentry, S_IFDIR | mode, 0, NULL, 0, 0);
+ struct gfs2_sbd *sdp = GFS2_SB(dir);
+ unsigned dsize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode);
+ return gfs2_create_inode(dir, dentry, S_IFDIR | mode, 0, NULL, dsize, 0);
}

/**
--
1.7.4

2013-04-26 09:45:55

by Steven Whitehouse

[permalink] [raw]
Subject: [PATCH 4/8] GFS2: Remove vestigial parameter ip from function rs_deltree

From: Bob Peterson <[email protected]>

The functions that delete block reservations from the rgrp block
reservations rbtree no longer use the ip parameter. This patch
eliminates the parameter.

Signed-off-by: Bob Peterson <[email protected]>
Signed-off-by: Steven Whitehouse <[email protected]>

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 5e83657..1dc9a13 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -787,7 +787,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
goto out_rlist;

if (gfs2_rs_active(ip->i_res)) /* needs to be done with the rgrp glock held */
- gfs2_rs_deltree(ip, ip->i_res);
+ gfs2_rs_deltree(ip->i_res);

error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE +
RES_INDIRECT + RES_STATFS + RES_QUOTA,
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 71c7fc5..0c5a575 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -592,7 +592,7 @@ static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs)
* @rs: The reservation to remove
*
*/
-static void __rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs)
+static void __rs_deltree(struct gfs2_blkreserv *rs)
{
struct gfs2_rgrpd *rgd;

@@ -605,7 +605,7 @@ static void __rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs)
RB_CLEAR_NODE(&rs->rs_node);

if (rs->rs_free) {
- /* return reserved blocks to the rgrp and the ip */
+ /* return reserved blocks to the rgrp */
BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free);
rs->rs_rbm.rgd->rd_reserved -= rs->rs_free;
rs->rs_free = 0;
@@ -619,14 +619,14 @@ static void __rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs)
* @rs: The reservation to remove
*
*/
-void gfs2_rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs)
+void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
{
struct gfs2_rgrpd *rgd;

rgd = rs->rs_rbm.rgd;
if (rgd) {
spin_lock(&rgd->rd_rsspin);
- __rs_deltree(ip, rs);
+ __rs_deltree(rs);
spin_unlock(&rgd->rd_rsspin);
}
}
@@ -640,7 +640,7 @@ void gfs2_rs_delete(struct gfs2_inode *ip)
{
down_write(&ip->i_rw_mutex);
if (ip->i_res) {
- gfs2_rs_deltree(ip, ip->i_res);
+ gfs2_rs_deltree(ip->i_res);
BUG_ON(ip->i_res->rs_free);
kmem_cache_free(gfs2_rsrv_cachep, ip->i_res);
ip->i_res = NULL;
@@ -664,7 +664,7 @@ static void return_all_reservations(struct gfs2_rgrpd *rgd)
spin_lock(&rgd->rd_rsspin);
while ((n = rb_first(&rgd->rd_rstree))) {
rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
- __rs_deltree(NULL, rs);
+ __rs_deltree(rs);
}
spin_unlock(&rgd->rd_rsspin);
}
@@ -1874,7 +1874,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested, u32 aflags)

/* Drop reservation, if we couldn't use reserved rgrp */
if (gfs2_rs_active(rs))
- gfs2_rs_deltree(ip, rs);
+ gfs2_rs_deltree(rs);
check_rgrp:
/* Check for unlinked inodes which can be reclaimed */
if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK)
@@ -2087,7 +2087,7 @@ static void gfs2_adjust_reservation(struct gfs2_inode *ip,
if (rs->rs_free && !ret)
goto out;
}
- __rs_deltree(ip, rs);
+ __rs_deltree(rs);
}
out:
spin_unlock(&rgd->rd_rsspin);
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index 8421858..5b3f4a8 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -47,7 +47,7 @@ extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
bool dinode, u64 *generation);

extern int gfs2_rs_alloc(struct gfs2_inode *ip);
-extern void gfs2_rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs);
+extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs);
extern void gfs2_rs_delete(struct gfs2_inode *ip);
extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta);
extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index cab77b8..917c8e1 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1512,7 +1512,7 @@ out_truncate:
out_unlock:
/* Error path for case 1 */
if (gfs2_rs_active(ip->i_res))
- gfs2_rs_deltree(ip, ip->i_res);
+ gfs2_rs_deltree(ip->i_res);

if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags))
gfs2_glock_dq(&ip->i_iopen_gh);
--
1.7.4

2013-04-26 09:45:58

by Steven Whitehouse

[permalink] [raw]
Subject: [PATCH 8/8] GFS2: Flush work queue before clearing glock hash tables

From: Bob Peterson <[email protected]>

There was a timing window when a GFS2 file system was unmounted
that caused GFS2 to call BUG() and panic the kernel. The call
to BUG() is meant to ensure that the glock reference count,
gl_ref, never gets down to zero and bounce back up again. What was
happening during umount is that function gfs2_put_super was dequeing
its glocks for well-known files. In particular, we saw it on the
journal glock, sd_jinode_gh. The dequeue caused delayed work to be
queued for the glock state machine, to transition the lock to an
"unlocked" state. While the work was still queued, gfs2_put_super
called gfs2_gl_hash_clear to clear out the glock hash tables.
If the timing was just so, the glock work function would drop the
reference count at the time when it was being checked for zero,
and that caused BUG() to be called. This patch calls
flush_workqueue before clearing the glock hash tables, thereby
ensuring that the delayed work is executed before the hash tables
are cleared, and therefore the reference count never goes to zero
until the glock is cleared.

Signed-off-by: Bob Peterson <[email protected]>
Signed-off-by: Steven Whitehouse <[email protected]>

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 3b9e178..b777691 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1577,6 +1577,7 @@ static void dump_glock_func(struct gfs2_glock *gl)
void gfs2_gl_hash_clear(struct gfs2_sbd *sdp)
{
set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags);
+ flush_workqueue(glock_workqueue);
glock_hash_walk(clear_glock, sdp);
flush_workqueue(glock_workqueue);
wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0);
--
1.7.4

2013-04-26 09:46:29

by Steven Whitehouse

[permalink] [raw]
Subject: [PATCH 7/8] GFS2: Add origin indicator to glock demote tracing

This adds the origin indicator to the trace point for glock
demotion, so that it is possible to see where demote requests
have come from.

Note that requests generated from the demote_rq sysfs interface
will show as remote, since they are intended to replicate
exactly the effect of a demote reuqest from a remote node. It
is still possible to tell these apart by looking at the process
which initiated the demote request.

Signed-off-by: Steven Whitehouse <[email protected]>

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 77d7927..3b9e178 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -926,7 +926,7 @@ static void handle_callback(struct gfs2_glock *gl, unsigned int state,
}
if (gl->gl_ops->go_callback)
gl->gl_ops->go_callback(gl, remote);
- trace_gfs2_demote_rq(gl);
+ trace_gfs2_demote_rq(gl, remote);
}

void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...)
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index 2ee13e8..20c007d 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -159,9 +159,9 @@ TRACE_EVENT(gfs2_glock_put,
/* Callback (local or remote) requesting lock demotion */
TRACE_EVENT(gfs2_demote_rq,

- TP_PROTO(const struct gfs2_glock *gl),
+ TP_PROTO(const struct gfs2_glock *gl, bool remote),

- TP_ARGS(gl),
+ TP_ARGS(gl, remote),

TP_STRUCT__entry(
__field( dev_t, dev )
@@ -170,6 +170,7 @@ TRACE_EVENT(gfs2_demote_rq,
__field( u8, cur_state )
__field( u8, dmt_state )
__field( unsigned long, flags )
+ __field( bool, remote )
),

TP_fast_assign(
@@ -179,14 +180,16 @@ TRACE_EVENT(gfs2_demote_rq,
__entry->cur_state = glock_trace_state(gl->gl_state);
__entry->dmt_state = glock_trace_state(gl->gl_demote_state);
__entry->flags = gl->gl_flags | (gl->gl_object ? (1UL<<GLF_OBJECT) : 0);
+ __entry->remote = remote;
),

- TP_printk("%u,%u glock %d:%lld demote %s to %s flags:%s",
+ TP_printk("%u,%u glock %d:%lld demote %s to %s flags:%s %s",
MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype,
(unsigned long long)__entry->glnum,
glock_trace_name(__entry->cur_state),
glock_trace_name(__entry->dmt_state),
- show_glock_flags(__entry->flags))
+ show_glock_flags(__entry->flags),
+ __entry->remote ? "remote" : "local")

);

--
1.7.4

2013-04-26 09:45:54

by Steven Whitehouse

[permalink] [raw]
Subject: [PATCH 5/8] GFS2: replace gfs2_ail structure with gfs2_trans

From: Benjamin Marzinski <[email protected]>

In order to allow transactions and log flushes to happen at the same
time, gfs2 needs to move the transaction accounting and active items
list code into the gfs2_trans structure. As a first step toward this,
this patch removes the gfs2_ail structure, and handles the active items
list in the gfs_trans structure. This keeps gfs2 from allocating an ail
structure on log flushes, and gives us a struture that can later be used
to store the transaction accounting outside of the gfs2 superblock
structure.

With this patch, at the end of a transaction, gfs2 will add the
gfs2_trans structure to the superblock if there is not one already.
This structure now has the active items fields that were previously in
gfs2_ail. This is not necessary in the case where the transaction was
simply used to add revokes, since these are never written outside of the
journal, and thus, don't need an active items list.

Also, in order to make sure that the transaction structure is not
removed while it's still in use by gfs2_trans_end, unlocking the
sd_log_flush_lock has to happen slightly later in ending the
transaction.

Signed-off-by: Benjamin Marzinski <[email protected]>
Signed-off-by: Steven Whitehouse <[email protected]>

diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 24f414f..9883694 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -1055,7 +1055,7 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
if (atomic_read(&bh->b_count))
goto cannot_release;
bd = bh->b_private;
- if (bd && bd->bd_ail)
+ if (bd && bd->bd_tr)
goto cannot_release;
if (buffer_pinned(bh) || buffer_dirty(bh))
goto not_possible;
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 5c29216..2532f7e 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -31,7 +31,6 @@ struct gfs2_holder;
struct gfs2_glock;
struct gfs2_quota_data;
struct gfs2_trans;
-struct gfs2_ail;
struct gfs2_jdesc;
struct gfs2_sbd;
struct lm_lockops;
@@ -53,7 +52,7 @@ struct gfs2_log_header_host {

struct gfs2_log_operations {
void (*lo_before_commit) (struct gfs2_sbd *sdp);
- void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_ail *ai);
+ void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_trans *tr);
void (*lo_before_scan) (struct gfs2_jdesc *jd,
struct gfs2_log_header_host *head, int pass);
int (*lo_scan_elements) (struct gfs2_jdesc *jd, unsigned int start,
@@ -139,7 +138,7 @@ struct gfs2_bufdata {
struct list_head bd_list;
const struct gfs2_log_operations *bd_ops;

- struct gfs2_ail *bd_ail;
+ struct gfs2_trans *bd_tr;
struct list_head bd_ail_st_list;
struct list_head bd_ail_gl_list;
};
@@ -433,6 +432,7 @@ struct gfs2_trans {
struct gfs2_holder tr_t_gh;

int tr_touched;
+ int tr_attached;

unsigned int tr_num_buf_new;
unsigned int tr_num_databuf_new;
@@ -440,14 +440,12 @@ struct gfs2_trans {
unsigned int tr_num_databuf_rm;
unsigned int tr_num_revoke;
unsigned int tr_num_revoke_rm;
-};

-struct gfs2_ail {
- struct list_head ai_list;
+ struct list_head tr_list;

- unsigned int ai_first;
- struct list_head ai_ail1_list;
- struct list_head ai_ail2_list;
+ unsigned int tr_first;
+ struct list_head tr_ail1_list;
+ struct list_head tr_ail2_list;
};

struct gfs2_journal_extent {
@@ -710,6 +708,7 @@ struct gfs2_sbd {

spinlock_t sd_log_lock;

+ struct gfs2_trans *sd_log_tr;
unsigned int sd_log_blks_reserved;
unsigned int sd_log_commited_buf;
unsigned int sd_log_commited_databuf;
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 9a2ca8b..b404f48 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -73,7 +73,7 @@ unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,

void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
{
- bd->bd_ail = NULL;
+ bd->bd_tr = NULL;
list_del_init(&bd->bd_ail_st_list);
list_del_init(&bd->bd_ail_gl_list);
atomic_dec(&bd->bd_gl->gl_ail_count);
@@ -90,7 +90,7 @@ void gfs2_remove_from_ail(struct gfs2_bufdata *bd)

static int gfs2_ail1_start_one(struct gfs2_sbd *sdp,
struct writeback_control *wbc,
- struct gfs2_ail *ai)
+ struct gfs2_trans *tr)
__releases(&sdp->sd_ail_lock)
__acquires(&sdp->sd_ail_lock)
{
@@ -99,15 +99,15 @@ __acquires(&sdp->sd_ail_lock)
struct gfs2_bufdata *bd, *s;
struct buffer_head *bh;

- list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list, bd_ail_st_list) {
+ list_for_each_entry_safe_reverse(bd, s, &tr->tr_ail1_list, bd_ail_st_list) {
bh = bd->bd_bh;

- gfs2_assert(sdp, bd->bd_ail == ai);
+ gfs2_assert(sdp, bd->bd_tr == tr);

if (!buffer_busy(bh)) {
if (!buffer_uptodate(bh))
gfs2_io_error_bh(sdp, bh);
- list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
+ list_move(&bd->bd_ail_st_list, &tr->tr_ail2_list);
continue;
}

@@ -116,7 +116,7 @@ __acquires(&sdp->sd_ail_lock)
if (gl == bd->bd_gl)
continue;
gl = bd->bd_gl;
- list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list);
+ list_move(&bd->bd_ail_st_list, &tr->tr_ail1_list);
mapping = bh->b_page->mapping;
if (!mapping)
continue;
@@ -144,15 +144,15 @@ __acquires(&sdp->sd_ail_lock)
void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc)
{
struct list_head *head = &sdp->sd_ail1_list;
- struct gfs2_ail *ai;
+ struct gfs2_trans *tr;

trace_gfs2_ail_flush(sdp, wbc, 1);
spin_lock(&sdp->sd_ail_lock);
restart:
- list_for_each_entry_reverse(ai, head, ai_list) {
+ list_for_each_entry_reverse(tr, head, tr_list) {
if (wbc->nr_to_write <= 0)
break;
- if (gfs2_ail1_start_one(sdp, wbc, ai))
+ if (gfs2_ail1_start_one(sdp, wbc, tr))
goto restart;
}
spin_unlock(&sdp->sd_ail_lock);
@@ -183,20 +183,20 @@ static void gfs2_ail1_start(struct gfs2_sbd *sdp)
*
*/

-static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
+static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
{
struct gfs2_bufdata *bd, *s;
struct buffer_head *bh;

- list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list,
+ list_for_each_entry_safe_reverse(bd, s, &tr->tr_ail1_list,
bd_ail_st_list) {
bh = bd->bd_bh;
- gfs2_assert(sdp, bd->bd_ail == ai);
+ gfs2_assert(sdp, bd->bd_tr == tr);
if (buffer_busy(bh))
continue;
if (!buffer_uptodate(bh))
gfs2_io_error_bh(sdp, bh);
- list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
+ list_move(&bd->bd_ail_st_list, &tr->tr_ail2_list);
}

}
@@ -210,14 +210,14 @@ static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)

static int gfs2_ail1_empty(struct gfs2_sbd *sdp)
{
- struct gfs2_ail *ai, *s;
+ struct gfs2_trans *tr, *s;
int ret;

spin_lock(&sdp->sd_ail_lock);
- list_for_each_entry_safe_reverse(ai, s, &sdp->sd_ail1_list, ai_list) {
- gfs2_ail1_empty_one(sdp, ai);
- if (list_empty(&ai->ai_ail1_list))
- list_move(&ai->ai_list, &sdp->sd_ail2_list);
+ list_for_each_entry_safe_reverse(tr, s, &sdp->sd_ail1_list, tr_list) {
+ gfs2_ail1_empty_one(sdp, tr);
+ if (list_empty(&tr->tr_ail1_list))
+ list_move(&tr->tr_list, &sdp->sd_ail2_list);
else
break;
}
@@ -229,13 +229,13 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp)

static void gfs2_ail1_wait(struct gfs2_sbd *sdp)
{
- struct gfs2_ail *ai;
+ struct gfs2_trans *tr;
struct gfs2_bufdata *bd;
struct buffer_head *bh;

spin_lock(&sdp->sd_ail_lock);
- list_for_each_entry_reverse(ai, &sdp->sd_ail1_list, ai_list) {
- list_for_each_entry(bd, &ai->ai_ail1_list, bd_ail_st_list) {
+ list_for_each_entry_reverse(tr, &sdp->sd_ail1_list, tr_list) {
+ list_for_each_entry(bd, &tr->tr_ail1_list, bd_ail_st_list) {
bh = bd->bd_bh;
if (!buffer_locked(bh))
continue;
@@ -256,40 +256,40 @@ static void gfs2_ail1_wait(struct gfs2_sbd *sdp)
*
*/

-static void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
+static void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
{
- struct list_head *head = &ai->ai_ail2_list;
+ struct list_head *head = &tr->tr_ail2_list;
struct gfs2_bufdata *bd;

while (!list_empty(head)) {
bd = list_entry(head->prev, struct gfs2_bufdata,
bd_ail_st_list);
- gfs2_assert(sdp, bd->bd_ail == ai);
+ gfs2_assert(sdp, bd->bd_tr == tr);
gfs2_remove_from_ail(bd);
}
}

static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
{
- struct gfs2_ail *ai, *safe;
+ struct gfs2_trans *tr, *safe;
unsigned int old_tail = sdp->sd_log_tail;
int wrap = (new_tail < old_tail);
int a, b, rm;

spin_lock(&sdp->sd_ail_lock);

- list_for_each_entry_safe(ai, safe, &sdp->sd_ail2_list, ai_list) {
- a = (old_tail <= ai->ai_first);
- b = (ai->ai_first < new_tail);
+ list_for_each_entry_safe(tr, safe, &sdp->sd_ail2_list, tr_list) {
+ a = (old_tail <= tr->tr_first);
+ b = (tr->tr_first < new_tail);
rm = (wrap) ? (a || b) : (a && b);
if (!rm)
continue;

- gfs2_ail2_empty_one(sdp, ai);
- list_del(&ai->ai_list);
- gfs2_assert_warn(sdp, list_empty(&ai->ai_ail1_list));
- gfs2_assert_warn(sdp, list_empty(&ai->ai_ail2_list));
- kfree(ai);
+ gfs2_ail2_empty_one(sdp, tr);
+ list_del(&tr->tr_list);
+ gfs2_assert_warn(sdp, list_empty(&tr->tr_ail1_list));
+ gfs2_assert_warn(sdp, list_empty(&tr->tr_ail2_list));
+ kfree(tr);
}

spin_unlock(&sdp->sd_ail_lock);
@@ -435,7 +435,7 @@ static unsigned int calc_reserved(struct gfs2_sbd *sdp)

static unsigned int current_tail(struct gfs2_sbd *sdp)
{
- struct gfs2_ail *ai;
+ struct gfs2_trans *tr;
unsigned int tail;

spin_lock(&sdp->sd_ail_lock);
@@ -443,8 +443,9 @@ static unsigned int current_tail(struct gfs2_sbd *sdp)
if (list_empty(&sdp->sd_ail1_list)) {
tail = sdp->sd_log_head;
} else {
- ai = list_entry(sdp->sd_ail1_list.prev, struct gfs2_ail, ai_list);
- tail = ai->ai_first;
+ tr = list_entry(sdp->sd_ail1_list.prev, struct gfs2_trans,
+ tr_list);
+ tail = tr->tr_first;
}

spin_unlock(&sdp->sd_ail_lock);
@@ -600,7 +601,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags)

void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
{
- struct gfs2_ail *ai;
+ struct gfs2_trans *tr;

down_write(&sdp->sd_log_flush_lock);

@@ -611,9 +612,12 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
}
trace_gfs2_log_flush(sdp, 1);

- ai = kzalloc(sizeof(struct gfs2_ail), GFP_NOFS | __GFP_NOFAIL);
- INIT_LIST_HEAD(&ai->ai_ail1_list);
- INIT_LIST_HEAD(&ai->ai_ail2_list);
+ tr = sdp->sd_log_tr;
+ if (tr) {
+ sdp->sd_log_tr = NULL;
+ INIT_LIST_HEAD(&tr->tr_ail1_list);
+ INIT_LIST_HEAD(&tr->tr_ail2_list);
+ }

if (sdp->sd_log_num_buf != sdp->sd_log_commited_buf) {
printk(KERN_INFO "GFS2: log buf %u %u\n", sdp->sd_log_num_buf,
@@ -630,7 +634,8 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)

sdp->sd_log_flush_head = sdp->sd_log_head;
sdp->sd_log_flush_wrapped = 0;
- ai->ai_first = sdp->sd_log_flush_head;
+ if (tr)
+ tr->tr_first = sdp->sd_log_flush_head;

gfs2_ordered_write(sdp);
lops_before_commit(sdp);
@@ -643,7 +648,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
trace_gfs2_log_blocks(sdp, -1);
log_write_header(sdp, 0);
}
- lops_after_commit(sdp, ai);
+ lops_after_commit(sdp, tr);

gfs2_log_lock(sdp);
sdp->sd_log_head = sdp->sd_log_flush_head;
@@ -653,16 +658,16 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
sdp->sd_log_commited_revoke = 0;

spin_lock(&sdp->sd_ail_lock);
- if (!list_empty(&ai->ai_ail1_list)) {
- list_add(&ai->ai_list, &sdp->sd_ail1_list);
- ai = NULL;
+ if (tr && !list_empty(&tr->tr_ail1_list)) {
+ list_add(&tr->tr_list, &sdp->sd_ail1_list);
+ tr = NULL;
}
spin_unlock(&sdp->sd_ail_lock);
gfs2_log_unlock(sdp);
trace_gfs2_log_flush(sdp, 0);
up_write(&sdp->sd_log_flush_lock);

- kfree(ai);
+ kfree(tr);
}

static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
@@ -687,6 +692,12 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
sdp->sd_jdesc->jd_blocks);
sdp->sd_log_blks_reserved = reserved;

+ if (sdp->sd_log_tr == NULL &&
+ (tr->tr_num_buf_new || tr->tr_num_databuf_new)) {
+ gfs2_assert_withdraw(sdp, tr->tr_t_gh.gh_gl);
+ sdp->sd_log_tr = tr;
+ tr->tr_attached = 1;
+ }
gfs2_log_unlock(sdp);
}

@@ -708,7 +719,6 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
{
log_refund(sdp, tr);
- up_read(&sdp->sd_log_flush_lock);

if (atomic_read(&sdp->sd_log_pinned) > atomic_read(&sdp->sd_log_thresh1) ||
((sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free)) >
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index a505597..7318abf 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -53,8 +53,8 @@ void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
* to in-place disk block, remove it from the AIL.
*/
spin_lock(&sdp->sd_ail_lock);
- if (bd->bd_ail)
- list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list);
+ if (bd->bd_tr)
+ list_move(&bd->bd_ail_st_list, &bd->bd_tr->tr_ail2_list);
spin_unlock(&sdp->sd_ail_lock);
get_bh(bh);
atomic_inc(&sdp->sd_log_pinned);
@@ -94,7 +94,7 @@ static void maybe_release_space(struct gfs2_bufdata *bd)
*/

static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
- struct gfs2_ail *ai)
+ struct gfs2_trans *tr)
{
struct gfs2_bufdata *bd = bh->b_private;

@@ -109,7 +109,7 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
maybe_release_space(bd);

spin_lock(&sdp->sd_ail_lock);
- if (bd->bd_ail) {
+ if (bd->bd_tr) {
list_del(&bd->bd_ail_st_list);
brelse(bh);
} else {
@@ -117,8 +117,8 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
list_add(&bd->bd_ail_gl_list, &gl->gl_ail_list);
atomic_inc(&gl->gl_ail_count);
}
- bd->bd_ail = ai;
- list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list);
+ bd->bd_tr = tr;
+ list_add(&bd->bd_ail_st_list, &tr->tr_ail1_list);
spin_unlock(&sdp->sd_ail_lock);

clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
@@ -480,17 +480,22 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp)
&sdp->sd_log_le_buf, 0);
}

-static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
+static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
{
struct list_head *head = &sdp->sd_log_le_buf;
struct gfs2_bufdata *bd;

+ if (tr == NULL) {
+ gfs2_assert(sdp, list_empty(head));
+ return;
+ }
+
while (!list_empty(head)) {
bd = list_entry(head->next, struct gfs2_bufdata, bd_list);
list_del_init(&bd->bd_list);
sdp->sd_log_num_buf--;

- gfs2_unpin(sdp, bd->bd_bh, ai);
+ gfs2_unpin(sdp, bd->bd_bh, tr);
}
gfs2_assert_warn(sdp, !sdp->sd_log_num_buf);
}
@@ -613,7 +618,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
gfs2_log_write_page(sdp, page);
}

-static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
+static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
{
struct list_head *head = &sdp->sd_log_le_revoke;
struct gfs2_bufdata *bd;
@@ -791,16 +796,21 @@ static void databuf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks);
}

-static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
+static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
{
struct list_head *head = &sdp->sd_log_le_databuf;
struct gfs2_bufdata *bd;

+ if (tr == NULL) {
+ gfs2_assert(sdp, list_empty(head));
+ return;
+ }
+
while (!list_empty(head)) {
bd = list_entry(head->next, struct gfs2_bufdata, bd_list);
list_del_init(&bd->bd_list);
sdp->sd_log_num_databuf--;
- gfs2_unpin(sdp, bd->bd_bh, ai);
+ gfs2_unpin(sdp, bd->bd_bh, tr);
}
gfs2_assert_warn(sdp, !sdp->sd_log_num_databuf);
}
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
index ba77b7d..87e062e 100644
--- a/fs/gfs2/lops.h
+++ b/fs/gfs2/lops.h
@@ -55,12 +55,13 @@ static inline void lops_before_commit(struct gfs2_sbd *sdp)
gfs2_log_ops[x]->lo_before_commit(sdp);
}

-static inline void lops_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
+static inline void lops_after_commit(struct gfs2_sbd *sdp,
+ struct gfs2_trans *tr)
{
int x;
for (x = 0; gfs2_log_ops[x]; x++)
if (gfs2_log_ops[x]->lo_after_commit)
- gfs2_log_ops[x]->lo_after_commit(sdp, ai);
+ gfs2_log_ops[x]->lo_after_commit(sdp, tr);
}

static inline void lops_before_scan(struct gfs2_jdesc *jd,
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index b059bbb..1a89afb 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -295,7 +295,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int
}
if (bd) {
spin_lock(&sdp->sd_ail_lock);
- if (bd->bd_ail) {
+ if (bd->bd_tr) {
gfs2_remove_from_ail(bd);
bh->b_private = NULL;
bd->bd_bh = NULL;
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index 88162fa..3bb4ac7 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -135,8 +135,10 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
if (tr->tr_t_gh.gh_gl) {
gfs2_glock_dq(&tr->tr_t_gh);
gfs2_holder_uninit(&tr->tr_t_gh);
- kfree(tr);
+ if (!tr->tr_attached)
+ kfree(tr);
}
+ up_read(&sdp->sd_log_flush_lock);

if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS)
gfs2_log_flush(sdp, NULL);
--
1.7.4

2013-04-26 09:46:57

by Steven Whitehouse

[permalink] [raw]
Subject: [PATCH 6/8] GFS2: Add origin indicator to glock callbacks

This patch adds a bool indicating whether the demote
request was originated locally or remotely. This is then
used by the iopen ->go_callback() to make 100% sure that
it will only respond to remote callbacks.

Since ->evict_inode() uses GL_NOCACHE when it attempts to
get an exclusive lock on the iopen lock, this may result
in extra scheduling of the workqueue in case that the
exclusive promotion request failed. This patch prevents
that from happening.

Signed-off-by: Steven Whitehouse <[email protected]>

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 6e30fd1..77d7927 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -912,7 +912,7 @@ int gfs2_glock_wait(struct gfs2_holder *gh)
*/

static void handle_callback(struct gfs2_glock *gl, unsigned int state,
- unsigned long delay)
+ unsigned long delay, bool remote)
{
int bit = delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE;

@@ -925,7 +925,7 @@ static void handle_callback(struct gfs2_glock *gl, unsigned int state,
gl->gl_demote_state = LM_ST_UNLOCKED;
}
if (gl->gl_ops->go_callback)
- gl->gl_ops->go_callback(gl);
+ gl->gl_ops->go_callback(gl, remote);
trace_gfs2_demote_rq(gl);
}

@@ -1091,7 +1091,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh)

spin_lock(&gl->gl_spin);
if (gh->gh_flags & GL_NOCACHE)
- handle_callback(gl, LM_ST_UNLOCKED, 0);
+ handle_callback(gl, LM_ST_UNLOCKED, 0, false);

list_del_init(&gh->gh_list);
if (find_first_holder(gl) == NULL) {
@@ -1296,7 +1296,7 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
}

spin_lock(&gl->gl_spin);
- handle_callback(gl, state, delay);
+ handle_callback(gl, state, delay, true);
spin_unlock(&gl->gl_spin);
if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
gfs2_glock_put(gl);
@@ -1409,7 +1409,7 @@ __acquires(&lru_lock)
spin_unlock(&lru_lock);
spin_lock(&gl->gl_spin);
if (demote_ok(gl))
- handle_callback(gl, LM_ST_UNLOCKED, 0);
+ handle_callback(gl, LM_ST_UNLOCKED, 0, false);
WARN_ON(!test_and_clear_bit(GLF_LOCK, &gl->gl_flags));
smp_mb__after_clear_bit();
if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
@@ -1534,7 +1534,7 @@ static void clear_glock(struct gfs2_glock *gl)

spin_lock(&gl->gl_spin);
if (gl->gl_state != LM_ST_UNLOCKED)
- handle_callback(gl, LM_ST_UNLOCKED, 0);
+ handle_callback(gl, LM_ST_UNLOCKED, 0, false);
spin_unlock(&gl->gl_spin);
gfs2_glock_hold(gl);
if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 444b650..c66e99c 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -515,12 +515,12 @@ static int trans_go_demote_ok(const struct gfs2_glock *gl)
*
* gl_spin lock is held while calling this
*/
-static void iopen_go_callback(struct gfs2_glock *gl)
+static void iopen_go_callback(struct gfs2_glock *gl, bool remote)
{
struct gfs2_inode *ip = (struct gfs2_inode *)gl->gl_object;
struct gfs2_sbd *sdp = gl->gl_sbd;

- if (sdp->sd_vfs->s_flags & MS_RDONLY)
+ if (!remote || (sdp->sd_vfs->s_flags & MS_RDONLY))
return;

if (gl->gl_demote_state == LM_ST_UNLOCKED &&
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 2532f7e..26aabd7 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -210,7 +210,7 @@ struct gfs2_glock_operations {
int (*go_lock) (struct gfs2_holder *gh);
void (*go_unlock) (struct gfs2_holder *gh);
int (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl);
- void (*go_callback) (struct gfs2_glock *gl);
+ void (*go_callback)(struct gfs2_glock *gl, bool remote);
const int go_type;
const unsigned long go_flags;
#define GLOF_ASPACE 1
--
1.7.4

2013-04-26 09:47:21

by Steven Whitehouse

[permalink] [raw]
Subject: [PATCH 3/8] GFS2: Use gfs2_dinode_out() in the inode create path

Over the previous two patches relating to inode creation, the
content of init_dinode() has been looking more and more like
gfs2_dinode_out(). This is not an accident! This patch replaces
the parts of init_dinode() which are duplicated in gfs2_dinode_out()
with a call to that function.

Mostly that is straightforward, but there is one issue which needed
to be resolved relating to the link count. The link count has to be
set to zero in a certain error handling code path, which lands up
calling iput(). This is now done specifically in that code path
allowing the link count to be set earlier and written into the
on disk inode by gfs2_dinode_put() in the normal way.

Signed-off-by: Steven Whitehouse <[email protected]>

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index cb53b4f..8833a4f 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -446,46 +446,24 @@ static void gfs2_init_dir(struct buffer_head *dibh,
*/

static void init_dinode(struct gfs2_inode *dip, struct gfs2_inode *ip,
- const char *symname, struct buffer_head **bhp)
+ const char *symname)
{
struct gfs2_dinode *di;
struct buffer_head *dibh;

dibh = gfs2_meta_new(ip->i_gl, ip->i_no_addr);
gfs2_trans_add_meta(ip->i_gl, dibh);
- gfs2_metatype_set(dibh, GFS2_METATYPE_DI, GFS2_FORMAT_DI);
- gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
di = (struct gfs2_dinode *)dibh->b_data;
+ gfs2_dinode_out(ip, di);

- di->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
- di->di_num.no_addr = cpu_to_be64(ip->i_no_addr);
- di->di_mode = cpu_to_be32(ip->i_inode.i_mode);
- di->di_uid = cpu_to_be32(i_uid_read(&ip->i_inode));
- di->di_gid = cpu_to_be32(i_gid_read(&ip->i_inode));
- di->di_nlink = 0;
- di->di_size = cpu_to_be64(ip->i_inode.i_size);
- di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
- di->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
- di->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec);
- di->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec);
di->di_major = cpu_to_be32(MAJOR(ip->i_inode.i_rdev));
di->di_minor = cpu_to_be32(MINOR(ip->i_inode.i_rdev));
- di->di_goal_meta = di->di_goal_data = cpu_to_be64(ip->i_no_addr);
- di->di_generation = cpu_to_be64(ip->i_generation);
- di->di_flags = cpu_to_be32(ip->i_diskflags);
di->__pad1 = 0;
- di->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) ? GFS2_FORMAT_DE : 0);
- di->di_height = cpu_to_be16(ip->i_height);
di->__pad2 = 0;
di->__pad3 = 0;
- di->di_depth = cpu_to_be16(ip->i_depth);
- di->di_entries = cpu_to_be32(ip->i_entries);
memset(&di->__pad4, 0, sizeof(di->__pad4));
- di->di_eattr = cpu_to_be64(ip->i_eattr);
- di->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec);
- di->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec);
- di->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec);
memset(&di->di_reserved, 0, sizeof(di->di_reserved));
+ gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));

switch(ip->i_inode.i_mode & S_IFMT) {
case S_IFDIR:
@@ -497,8 +475,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_inode *ip,
}

set_buffer_uptodate(dibh);
-
- *bhp = dibh;
+ brelse(dibh);
}

static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
@@ -532,9 +509,6 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
if (error)
goto fail_end_trans;

- set_nlink(&ip->i_inode, S_ISDIR(ip->i_inode.i_mode) ? 2 : 1);
- mark_inode_dirty(&ip->i_inode);
-
fail_end_trans:
gfs2_trans_end(sdp);
fail_ipreserv:
@@ -590,7 +564,6 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
struct gfs2_glock *io_gl;
int error;
- struct buffer_head *bh = NULL;
u32 aflags = 0;
int arq;

@@ -634,6 +607,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
goto fail_free_inode;

inode->i_mode = mode;
+ set_nlink(inode, S_ISDIR(mode) ? 2 : 1);
inode->i_rdev = dev;
inode->i_size = size;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
@@ -682,7 +656,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
if (error)
goto fail_gunlock2;

- init_dinode(dip, ip, symname, &bh);
+ init_dinode(dip, ip, symname);
gfs2_trans_end(sdp);

error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
@@ -710,7 +684,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
if (error)
goto fail_gunlock3;

- brelse(bh);
+ mark_inode_dirty(inode);
gfs2_glock_dq_uninit(ghs);
gfs2_glock_dq_uninit(ghs + 1);
d_instantiate(dentry, inode);
@@ -733,12 +707,12 @@ fail_free_inode:
fail_gunlock:
gfs2_glock_dq_uninit(ghs);
if (inode && !IS_ERR(inode)) {
+ clear_nlink(inode);
+ mark_inode_dirty(inode);
set_bit(GIF_ALLOC_FAILED, &GFS2_I(inode)->i_flags);
iput(inode);
}
fail:
- if (bh)
- brelse(bh);
return error;
}

--
1.7.4

2013-04-26 09:45:46

by Steven Whitehouse

[permalink] [raw]
Subject: [PATCH 1/8] GFS2: Clean up inode creation path

This patch cleans up the inode creation code path in GFS2. After the
Orlov allocator was merged, a number of potential improvements are
now possible, and this is a first set of these.

The quota handling is now updated so that it matches the point in
the code where the allocation takes place. This means that the one
exception in gfs2_alloc_blocks relating to quota is now no longer
required, and we can use the generic code everywhere.

In addition the call to figure out whether we need to allocate any
extra blocks in order to add a directory entry is moved higher up
gfs2_create_inode. This means that if it returns an error, we
can deal with that at a stage where it is easier to handle that case.
The returned status cannot change during the function since we hold
an exclusive lock on the directory.

Two calls to gfs2_rindex_update have been changed to one, again at
the top of gfs2_create_inode to simplify error handling.

The time stamps are also now initialised earlier in the creation
process, this is gradually moving towards being able to remove the
call to gfs2_refresh_inode in gfs2_inode_create once we have all the
fields covered.

Signed-off-by: Steven Whitehouse <[email protected]>

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index cc00bd1..df51557 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -392,11 +392,15 @@ static int alloc_dinode(struct gfs2_inode *ip, u32 flags)
int error;
int dblocks = 1;

- error = gfs2_inplace_reserve(ip, RES_DINODE, flags);
+ error = gfs2_quota_lock_check(ip);
if (error)
goto out;

- error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS, 0);
+ error = gfs2_inplace_reserve(ip, RES_DINODE, flags);
+ if (error)
+ goto out_quota;
+
+ error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA, 0);
if (error)
goto out_ipreserv;

@@ -409,6 +413,8 @@ static int alloc_dinode(struct gfs2_inode *ip, u32 flags)

out_ipreserv:
gfs2_inplace_release(ip);
+out_quota:
+ gfs2_quota_unlock(ip);
out:
return error;
}
@@ -445,7 +451,6 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_inode *ip,
struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
struct gfs2_dinode *di;
struct buffer_head *dibh;
- struct timespec tv = CURRENT_TIME;

dibh = gfs2_meta_new(ip->i_gl, ip->i_no_addr);
gfs2_trans_add_meta(ip->i_gl, dibh);
@@ -461,7 +466,9 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_inode *ip,
di->di_nlink = 0;
di->di_size = cpu_to_be64(ip->i_inode.i_size);
di->di_blocks = cpu_to_be64(1);
- di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(tv.tv_sec);
+ di->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
+ di->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec);
+ di->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec);
di->di_major = cpu_to_be32(MAJOR(ip->i_inode.i_rdev));
di->di_minor = cpu_to_be32(MINOR(ip->i_inode.i_rdev));
di->di_goal_meta = di->di_goal_data = cpu_to_be64(ip->i_no_addr);
@@ -476,9 +483,9 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_inode *ip,
di->di_entries = 0;
memset(&di->__pad4, 0, sizeof(di->__pad4));
di->di_eattr = 0;
- di->di_atime_nsec = cpu_to_be32(tv.tv_nsec);
- di->di_mtime_nsec = cpu_to_be32(tv.tv_nsec);
- di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec);
+ di->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec);
+ di->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec);
+ di->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec);
memset(&di->di_reserved, 0, sizeof(di->di_reserved));

switch(ip->i_inode.i_mode & S_IFMT) {
@@ -505,58 +512,18 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_inode *ip,
*bhp = dibh;
}

-static int make_dinode(struct gfs2_inode *dip, struct gfs2_inode *ip,
- const char *symname, struct buffer_head **bhp)
-{
- struct inode *inode = &ip->i_inode;
- struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
- int error;
-
- error = gfs2_rindex_update(sdp);
- if (error)
- return error;
-
- error = gfs2_quota_lock(dip, inode->i_uid, inode->i_gid);
- if (error)
- return error;
-
- error = gfs2_quota_check(dip, inode->i_uid, inode->i_gid);
- if (error)
- goto out_quota;
-
- error = gfs2_trans_begin(sdp, RES_DINODE + RES_QUOTA, 0);
- if (error)
- goto out_quota;
-
- init_dinode(dip, ip, symname, bhp);
- gfs2_quota_change(dip, +1, inode->i_uid, inode->i_gid);
- gfs2_trans_end(sdp);
-
-out_quota:
- gfs2_quota_unlock(dip);
- return error;
-}
-
static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
- struct gfs2_inode *ip)
+ struct gfs2_inode *ip, int arq)
{
struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
- int alloc_required;
struct buffer_head *dibh;
int error;

- error = gfs2_rindex_update(sdp);
- if (error)
- return error;
-
error = gfs2_quota_lock(dip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
if (error)
- goto fail;
+ return error;

- error = alloc_required = gfs2_diradd_alloc_required(&dip->i_inode, name);
- if (alloc_required < 0)
- goto fail_quota_locks;
- if (alloc_required) {
+ if (arq) {
error = gfs2_quota_check(dip, dip->i_inode.i_uid, dip->i_inode.i_gid);
if (error)
goto fail_quota_locks;
@@ -592,15 +559,10 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,

fail_end_trans:
gfs2_trans_end(sdp);
-
fail_ipreserv:
- if (alloc_required)
- gfs2_inplace_release(dip);
-
+ gfs2_inplace_release(dip);
fail_quota_locks:
gfs2_quota_unlock(dip);
-
-fail:
return error;
}

@@ -652,6 +614,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
int error;
struct buffer_head *bh = NULL;
u32 aflags = 0;
+ int arq;

if (!name->len || name->len > GFS2_FNAMESIZE)
return -ENAMETOOLONG;
@@ -660,6 +623,10 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
if (error)
return error;

+ error = gfs2_rindex_update(sdp);
+ if (error)
+ return error;
+
error = gfs2_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
if (error)
goto fail;
@@ -674,11 +641,15 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
if (error)
goto fail_gunlock;

+ arq = error = gfs2_diradd_alloc_required(dir, name);
+ if (error < 0)
+ goto fail_gunlock;
+
inode = new_inode(sdp->sd_vfs);
- if (!inode) {
- gfs2_glock_dq_uninit(ghs);
- return -ENOMEM;
- }
+ error = -ENOMEM;
+ if (!inode)
+ goto fail_gunlock;
+
ip = GFS2_I(inode);
error = gfs2_rs_alloc(ip);
if (error)
@@ -688,6 +659,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
inode->i_mode = mode;
inode->i_rdev = dev;
inode->i_size = size;
+ inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
munge_mode_uid_gid(dip, inode);
ip->i_goal = dip->i_goal;

@@ -708,10 +680,13 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
if (error)
goto fail_free_inode;

- error = make_dinode(dip, ip, symname, &bh);
+ error = gfs2_trans_begin(sdp, RES_DINODE, 0);
if (error)
goto fail_gunlock2;

+ init_dinode(dip, ip, symname, &bh);
+ gfs2_trans_end(sdp);
+
error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
if (error)
goto fail_gunlock2;
@@ -737,7 +712,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
if (error)
goto fail_gunlock3;

- error = link_dinode(dip, name, ip);
+ error = link_dinode(dip, name, ip, arq);
if (error)
goto fail_gunlock3;

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 5a51265..71c7fc5 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -2180,13 +2180,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
if (dinode)
gfs2_trans_add_unrevoke(sdp, block, 1);

- /*
- * This needs reviewing to see why we cannot do the quota change
- * at this point in the dinode case.
- */
- if (ndata)
- gfs2_quota_change(ip, ndata, ip->i_inode.i_uid,
- ip->i_inode.i_gid);
+ gfs2_quota_change(ip, *nblocks, ip->i_inode.i_uid, ip->i_inode.i_gid);

rbm.rgd->rd_free_clone -= *nblocks;
trace_gfs2_block_alloc(ip, rbm.rgd, block, *nblocks,
--
1.7.4