2022-01-24 11:20:16

by Ritesh Harjani

[permalink] [raw]
Subject: [PATCHv1 1/2] jbd2: Kill t_handle_lock transaction spinlock

This patch kills t_handle_lock transaction spinlock completely from
jbd2.
To explain the reasoning, currently there were three sites at which this
spinlock was used.
1. jbd2_journal_wait_updates()
a. Based on careful code review it can be seen that, we don't need this
lock here. This is since we wait for any currently ongoing updates
based on a atomic variable t_updates. And we anyway don't take any
t_handle_lock while in stop_this_handle().
i.e.

write_lock(&journal->j_state_lock()
jbd2_journal_wait_updates() stop_this_handle()
while (atomic_read(txn->t_updates) { |
DEFINE_WAIT(wait); |
prepare_to_wait(); |
if (atomic_read(txn->t_updates) if (atomic_dec_and_test(txn->t_updates))
write_unlock(&journal->j_state_lock);
schedule(); wake_up()
write_lock(&journal->j_state_lock);
finish_wait();
}
txn->t_state = T_COMMIT
write_unlock(&journal->j_state_lock);

b. Also note that between atomic_inc(&txn->t_updates) in
start_this_handle() and jbd2_journal_wait_updates(), the
synchronization happens via read_lock(journal->j_state_lock) in
start_this_handle();

2. jbd2_journal_extend()
a. jbd2_journal_extend() is called with the handle of each process from
task_struct. So no lock required in updating member fields of handle_t

b. For member fields of h_transaction, all updates happens only via
atomic APIs (which is also within read_lock()).
So, no need of this transaction spinlock.

3. update_t_max_wait()
Based on Jan suggestion, this can be carefully removed using atomic
cmpxchg API.
Note that there can be several processes which are waiting for a new
transaction to be allocated and started. For doing this only one
process will succeed in taking write_lock() and allocating a new txn.
After that all of the process will be updating the t_max_wait (max
transaction wait time). This can be done via below method w/o taking
any locks using atomic cmpxchg.
For more details refer [1]

new = get_new_val();
old = READ_ONCE(ptr->max_val);
while (old < new)
old = cmpxchg(&ptr->max_val, old, new);

[1]: https://lwn.net/Articles/849237/
Suggested-by: Jan Kara <[email protected]>
Signed-off-by: Ritesh Harjani <[email protected]>
---
fs/jbd2/transaction.c | 29 +++++++++--------------------
include/linux/jbd2.h | 3 ---
2 files changed, 9 insertions(+), 23 deletions(-)

diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 8e2f8275a253..68dd7de49aff 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -107,7 +107,6 @@ static void jbd2_get_transaction(journal_t *journal,
transaction->t_start_time = ktime_get();
transaction->t_tid = journal->j_transaction_sequence++;
transaction->t_expires = jiffies + journal->j_commit_interval;
- spin_lock_init(&transaction->t_handle_lock);
atomic_set(&transaction->t_updates, 0);
atomic_set(&transaction->t_outstanding_credits,
jbd2_descriptor_blocks_per_trans(journal) +
@@ -139,24 +138,21 @@ static void jbd2_get_transaction(journal_t *journal,
/*
* Update transaction's maximum wait time, if debugging is enabled.
*
- * In order for t_max_wait to be reliable, it must be protected by a
- * lock. But doing so will mean that start_this_handle() can not be
- * run in parallel on SMP systems, which limits our scalability. So
- * unless debugging is enabled, we no longer update t_max_wait, which
- * means that maximum wait time reported by the jbd2_run_stats
- * tracepoint will always be zero.
+ * t_max_wait is carefully updated here with use of atomic compare exchange.
+ * Note that there could be multiplre threads trying to do this simultaneously
+ * hence using cmpxchg to avoid any use of locks in this case.
*/
static inline void update_t_max_wait(transaction_t *transaction,
unsigned long ts)
{
#ifdef CONFIG_JBD2_DEBUG
+ unsigned long oldts, newts;
if (jbd2_journal_enable_debug &&
time_after(transaction->t_start, ts)) {
- ts = jbd2_time_diff(ts, transaction->t_start);
- spin_lock(&transaction->t_handle_lock);
- if (ts > transaction->t_max_wait)
- transaction->t_max_wait = ts;
- spin_unlock(&transaction->t_handle_lock);
+ newts = jbd2_time_diff(ts, transaction->t_start);
+ oldts = READ_ONCE(transaction->t_max_wait);
+ while (oldts < newts)
+ oldts = cmpxchg(&transaction->t_max_wait, oldts, newts);
}
#endif
}
@@ -690,7 +686,6 @@ int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records)
DIV_ROUND_UP(
handle->h_revoke_credits_requested,
journal->j_revoke_records_per_block);
- spin_lock(&transaction->t_handle_lock);
wanted = atomic_add_return(nblocks,
&transaction->t_outstanding_credits);

@@ -698,7 +693,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records)
jbd_debug(3, "denied handle %p %d blocks: "
"transaction too large\n", handle, nblocks);
atomic_sub(nblocks, &transaction->t_outstanding_credits);
- goto unlock;
+ goto error_out;
}

trace_jbd2_handle_extend(journal->j_fs_dev->bd_dev,
@@ -714,8 +709,6 @@ int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records)
result = 0;

jbd_debug(3, "extended handle %p by %d\n", handle, nblocks);
-unlock:
- spin_unlock(&transaction->t_handle_lock);
error_out:
read_unlock(&journal->j_state_lock);
return result;
@@ -847,22 +840,18 @@ void jbd2_journal_wait_updates(journal_t *journal)
if (!commit_transaction)
return;

- spin_lock(&commit_transaction->t_handle_lock);
while (atomic_read(&commit_transaction->t_updates)) {
DEFINE_WAIT(wait);

prepare_to_wait(&journal->j_wait_updates, &wait,
TASK_UNINTERRUPTIBLE);
if (atomic_read(&commit_transaction->t_updates)) {
- spin_unlock(&commit_transaction->t_handle_lock);
write_unlock(&journal->j_state_lock);
schedule();
write_lock(&journal->j_state_lock);
- spin_lock(&commit_transaction->t_handle_lock);
}
finish_wait(&journal->j_wait_updates, &wait);
}
- spin_unlock(&commit_transaction->t_handle_lock);
}

/**
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 575c3057a98a..500a95f8c914 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -554,9 +554,6 @@ struct transaction_chp_stats_s {
* ->j_list_lock
*
* j_state_lock
- * ->t_handle_lock
- *
- * j_state_lock
* ->j_list_lock (journal_unmap_buffer)
*
*/
--
2.31.1


2022-01-24 18:56:26

by Jan Kara

[permalink] [raw]
Subject: Re: [PATCHv1 1/2] jbd2: Kill t_handle_lock transaction spinlock

On Sun 23-01-22 22:53:27, Ritesh Harjani wrote:
> This patch kills t_handle_lock transaction spinlock completely from
> jbd2.
> To explain the reasoning, currently there were three sites at which this
> spinlock was used.
> 1. jbd2_journal_wait_updates()
> a. Based on careful code review it can be seen that, we don't need this
> lock here. This is since we wait for any currently ongoing updates
> based on a atomic variable t_updates. And we anyway don't take any
> t_handle_lock while in stop_this_handle().
> i.e.
>
> write_lock(&journal->j_state_lock()
> jbd2_journal_wait_updates() stop_this_handle()
> while (atomic_read(txn->t_updates) { |
> DEFINE_WAIT(wait); |
> prepare_to_wait(); |
> if (atomic_read(txn->t_updates) if (atomic_dec_and_test(txn->t_updates))
> write_unlock(&journal->j_state_lock);
> schedule(); wake_up()
> write_lock(&journal->j_state_lock);
> finish_wait();
> }
> txn->t_state = T_COMMIT
> write_unlock(&journal->j_state_lock);
>
> b. Also note that between atomic_inc(&txn->t_updates) in
> start_this_handle() and jbd2_journal_wait_updates(), the
> synchronization happens via read_lock(journal->j_state_lock) in
> start_this_handle();
>
> 2. jbd2_journal_extend()
> a. jbd2_journal_extend() is called with the handle of each process from
> task_struct. So no lock required in updating member fields of handle_t
>
> b. For member fields of h_transaction, all updates happens only via
> atomic APIs (which is also within read_lock()).
> So, no need of this transaction spinlock.
>
> 3. update_t_max_wait()
> Based on Jan suggestion, this can be carefully removed using atomic
> cmpxchg API.
> Note that there can be several processes which are waiting for a new
> transaction to be allocated and started. For doing this only one
> process will succeed in taking write_lock() and allocating a new txn.
> After that all of the process will be updating the t_max_wait (max
> transaction wait time). This can be done via below method w/o taking
> any locks using atomic cmpxchg.
> For more details refer [1]
>
> new = get_new_val();
> old = READ_ONCE(ptr->max_val);
> while (old < new)
> old = cmpxchg(&ptr->max_val, old, new);
>
> [1]: https://lwn.net/Articles/849237/
> Suggested-by: Jan Kara <[email protected]>
> Signed-off-by: Ritesh Harjani <[email protected]>

Looks good. Feel free to add:

Reviewed-by: Jan Kara <[email protected]>

Honza

> ---
> fs/jbd2/transaction.c | 29 +++++++++--------------------
> include/linux/jbd2.h | 3 ---
> 2 files changed, 9 insertions(+), 23 deletions(-)
>
> diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
> index 8e2f8275a253..68dd7de49aff 100644
> --- a/fs/jbd2/transaction.c
> +++ b/fs/jbd2/transaction.c
> @@ -107,7 +107,6 @@ static void jbd2_get_transaction(journal_t *journal,
> transaction->t_start_time = ktime_get();
> transaction->t_tid = journal->j_transaction_sequence++;
> transaction->t_expires = jiffies + journal->j_commit_interval;
> - spin_lock_init(&transaction->t_handle_lock);
> atomic_set(&transaction->t_updates, 0);
> atomic_set(&transaction->t_outstanding_credits,
> jbd2_descriptor_blocks_per_trans(journal) +
> @@ -139,24 +138,21 @@ static void jbd2_get_transaction(journal_t *journal,
> /*
> * Update transaction's maximum wait time, if debugging is enabled.
> *
> - * In order for t_max_wait to be reliable, it must be protected by a
> - * lock. But doing so will mean that start_this_handle() can not be
> - * run in parallel on SMP systems, which limits our scalability. So
> - * unless debugging is enabled, we no longer update t_max_wait, which
> - * means that maximum wait time reported by the jbd2_run_stats
> - * tracepoint will always be zero.
> + * t_max_wait is carefully updated here with use of atomic compare exchange.
> + * Note that there could be multiplre threads trying to do this simultaneously
> + * hence using cmpxchg to avoid any use of locks in this case.
> */
> static inline void update_t_max_wait(transaction_t *transaction,
> unsigned long ts)
> {
> #ifdef CONFIG_JBD2_DEBUG
> + unsigned long oldts, newts;
> if (jbd2_journal_enable_debug &&
> time_after(transaction->t_start, ts)) {
> - ts = jbd2_time_diff(ts, transaction->t_start);
> - spin_lock(&transaction->t_handle_lock);
> - if (ts > transaction->t_max_wait)
> - transaction->t_max_wait = ts;
> - spin_unlock(&transaction->t_handle_lock);
> + newts = jbd2_time_diff(ts, transaction->t_start);
> + oldts = READ_ONCE(transaction->t_max_wait);
> + while (oldts < newts)
> + oldts = cmpxchg(&transaction->t_max_wait, oldts, newts);
> }
> #endif
> }
> @@ -690,7 +686,6 @@ int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records)
> DIV_ROUND_UP(
> handle->h_revoke_credits_requested,
> journal->j_revoke_records_per_block);
> - spin_lock(&transaction->t_handle_lock);
> wanted = atomic_add_return(nblocks,
> &transaction->t_outstanding_credits);
>
> @@ -698,7 +693,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records)
> jbd_debug(3, "denied handle %p %d blocks: "
> "transaction too large\n", handle, nblocks);
> atomic_sub(nblocks, &transaction->t_outstanding_credits);
> - goto unlock;
> + goto error_out;
> }
>
> trace_jbd2_handle_extend(journal->j_fs_dev->bd_dev,
> @@ -714,8 +709,6 @@ int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records)
> result = 0;
>
> jbd_debug(3, "extended handle %p by %d\n", handle, nblocks);
> -unlock:
> - spin_unlock(&transaction->t_handle_lock);
> error_out:
> read_unlock(&journal->j_state_lock);
> return result;
> @@ -847,22 +840,18 @@ void jbd2_journal_wait_updates(journal_t *journal)
> if (!commit_transaction)
> return;
>
> - spin_lock(&commit_transaction->t_handle_lock);
> while (atomic_read(&commit_transaction->t_updates)) {
> DEFINE_WAIT(wait);
>
> prepare_to_wait(&journal->j_wait_updates, &wait,
> TASK_UNINTERRUPTIBLE);
> if (atomic_read(&commit_transaction->t_updates)) {
> - spin_unlock(&commit_transaction->t_handle_lock);
> write_unlock(&journal->j_state_lock);
> schedule();
> write_lock(&journal->j_state_lock);
> - spin_lock(&commit_transaction->t_handle_lock);
> }
> finish_wait(&journal->j_wait_updates, &wait);
> }
> - spin_unlock(&commit_transaction->t_handle_lock);
> }
>
> /**
> diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
> index 575c3057a98a..500a95f8c914 100644
> --- a/include/linux/jbd2.h
> +++ b/include/linux/jbd2.h
> @@ -554,9 +554,6 @@ struct transaction_chp_stats_s {
> * ->j_list_lock
> *
> * j_state_lock
> - * ->t_handle_lock
> - *
> - * j_state_lock
> * ->j_list_lock (journal_unmap_buffer)
> *
> */
> --
> 2.31.1
>
--
Jan Kara <[email protected]>
SUSE Labs, CR