Snapshot doesn't work with realtime kernels since the commit f79ae415b64c.
hlist_bl is implemented as a raw spinlock and the code takes two non-raw
spinlocks while holding hlist_bl (non-raw spinlocks are blocking mutexes
in the realtime kernel).
We can't change hlist_bl to use non-raw spinlocks, this triggers warnings
in dentry lookup code, because the dentry lookup code uses hlist_bl while
holding a seqlock.
This patch fixes the problem by using non-raw spinlock
exception_table_lock instead of the hlist_bl lock.
Signed-off-by: Mikulas Patocka <[email protected]>
Fixes: f79ae415b64c ("dm snapshot: Make exception tables scalable")
---
drivers/md/dm-snap.c | 23 +++++++++++++++++++++++
1 file changed, 23 insertions(+)
Index: linux-2.6/drivers/md/dm-snap.c
===================================================================
--- linux-2.6.orig/drivers/md/dm-snap.c 2019-11-12 16:44:36.000000000 +0100
+++ linux-2.6/drivers/md/dm-snap.c 2019-11-12 17:01:46.000000000 +0100
@@ -141,6 +141,10 @@ struct dm_snapshot {
* for them to be committed.
*/
struct bio_list bios_queued_during_merge;
+
+#ifdef CONFIG_PREEMPT_RT_BASE
+ spinlock_t exception_table_lock;
+#endif
};
/*
@@ -625,30 +629,46 @@ static uint32_t exception_hash(struct dm
/* Lock to protect access to the completed and pending exception hash tables. */
struct dm_exception_table_lock {
+#ifndef CONFIG_PREEMPT_RT_BASE
struct hlist_bl_head *complete_slot;
struct hlist_bl_head *pending_slot;
+#else
+ spinlock_t *lock;
+#endif
};
static void dm_exception_table_lock_init(struct dm_snapshot *s, chunk_t chunk,
struct dm_exception_table_lock *lock)
{
+#ifndef CONFIG_PREEMPT_RT_BASE
struct dm_exception_table *complete = &s->complete;
struct dm_exception_table *pending = &s->pending;
lock->complete_slot = &complete->table[exception_hash(complete, chunk)];
lock->pending_slot = &pending->table[exception_hash(pending, chunk)];
+#else
+ lock->lock = &s->exception_table_lock;
+#endif
}
static void dm_exception_table_lock(struct dm_exception_table_lock *lock)
{
+#ifndef CONFIG_PREEMPT_RT_BASE
hlist_bl_lock(lock->complete_slot);
hlist_bl_lock(lock->pending_slot);
+#else
+ spin_lock(lock->lock);
+#endif
}
static void dm_exception_table_unlock(struct dm_exception_table_lock *lock)
{
+#ifndef CONFIG_PREEMPT_RT_BASE
hlist_bl_unlock(lock->pending_slot);
hlist_bl_unlock(lock->complete_slot);
+#else
+ spin_unlock(lock->lock);
+#endif
}
static int dm_exception_table_init(struct dm_exception_table *et,
@@ -1318,6 +1338,9 @@ static int snapshot_ctr(struct dm_target
s->first_merging_chunk = 0;
s->num_merging_chunks = 0;
bio_list_init(&s->bios_queued_during_merge);
+#ifdef CONFIG_PREEMPT_RT_BASE
+ spin_lock_init(&s->exception_table_lock);
+#endif
/* Allocate hash table for COW data */
if (init_hash_tables(s)) {
On 11/12/19 6:09 PM, Mikulas Patocka wrote:
> Snapshot doesn't work with realtime kernels since the commit f79ae415b64c.
> hlist_bl is implemented as a raw spinlock and the code takes two non-raw
> spinlocks while holding hlist_bl (non-raw spinlocks are blocking mutexes
> in the realtime kernel).
>
> We can't change hlist_bl to use non-raw spinlocks, this triggers warnings
> in dentry lookup code, because the dentry lookup code uses hlist_bl while
> holding a seqlock.
>
> This patch fixes the problem by using non-raw spinlock
> exception_table_lock instead of the hlist_bl lock.
>
> Signed-off-by: Mikulas Patocka <[email protected]>
> Fixes: f79ae415b64c ("dm snapshot: Make exception tables scalable")
>
Reviewed-by: Nikos Tsironis <[email protected]>
> ---
> drivers/md/dm-snap.c | 23 +++++++++++++++++++++++
> 1 file changed, 23 insertions(+)
>
> Index: linux-2.6/drivers/md/dm-snap.c
> ===================================================================
> --- linux-2.6.orig/drivers/md/dm-snap.c 2019-11-12 16:44:36.000000000 +0100
> +++ linux-2.6/drivers/md/dm-snap.c 2019-11-12 17:01:46.000000000 +0100
> @@ -141,6 +141,10 @@ struct dm_snapshot {
> * for them to be committed.
> */
> struct bio_list bios_queued_during_merge;
> +
> +#ifdef CONFIG_PREEMPT_RT_BASE
> + spinlock_t exception_table_lock;
> +#endif
> };
>
> /*
> @@ -625,30 +629,46 @@ static uint32_t exception_hash(struct dm
>
> /* Lock to protect access to the completed and pending exception hash tables. */
> struct dm_exception_table_lock {
> +#ifndef CONFIG_PREEMPT_RT_BASE
> struct hlist_bl_head *complete_slot;
> struct hlist_bl_head *pending_slot;
> +#else
> + spinlock_t *lock;
> +#endif
> };
>
> static void dm_exception_table_lock_init(struct dm_snapshot *s, chunk_t chunk,
> struct dm_exception_table_lock *lock)
> {
> +#ifndef CONFIG_PREEMPT_RT_BASE
> struct dm_exception_table *complete = &s->complete;
> struct dm_exception_table *pending = &s->pending;
>
> lock->complete_slot = &complete->table[exception_hash(complete, chunk)];
> lock->pending_slot = &pending->table[exception_hash(pending, chunk)];
> +#else
> + lock->lock = &s->exception_table_lock;
> +#endif
> }
>
> static void dm_exception_table_lock(struct dm_exception_table_lock *lock)
> {
> +#ifndef CONFIG_PREEMPT_RT_BASE
> hlist_bl_lock(lock->complete_slot);
> hlist_bl_lock(lock->pending_slot);
> +#else
> + spin_lock(lock->lock);
> +#endif
> }
>
> static void dm_exception_table_unlock(struct dm_exception_table_lock *lock)
> {
> +#ifndef CONFIG_PREEMPT_RT_BASE
> hlist_bl_unlock(lock->pending_slot);
> hlist_bl_unlock(lock->complete_slot);
> +#else
> + spin_unlock(lock->lock);
> +#endif
> }
>
> static int dm_exception_table_init(struct dm_exception_table *et,
> @@ -1318,6 +1338,9 @@ static int snapshot_ctr(struct dm_target
> s->first_merging_chunk = 0;
> s->num_merging_chunks = 0;
> bio_list_init(&s->bios_queued_during_merge);
> +#ifdef CONFIG_PREEMPT_RT_BASE
> + spin_lock_init(&s->exception_table_lock);
> +#endif
>
> /* Allocate hash table for COW data */
> if (init_hash_tables(s)) {
>
On 2019-11-12 11:09:51 [-0500], Mikulas Patocka wrote:
> Snapshot doesn't work with realtime kernels since the commit f79ae415b64c.
> hlist_bl is implemented as a raw spinlock and the code takes two non-raw
> spinlocks while holding hlist_bl (non-raw spinlocks are blocking mutexes
> in the realtime kernel).
this series is still on the list of things for me to look at…
Sebastian
On 2019-11-12 11:09:51 [-0500], Mikulas Patocka wrote:
> ===================================================================
> --- linux-2.6.orig/drivers/md/dm-snap.c 2019-11-12 16:44:36.000000000 +0100
> +++ linux-2.6/drivers/md/dm-snap.c 2019-11-12 17:01:46.000000000 +0100
…
> static void dm_exception_table_lock(struct dm_exception_table_lock *lock)
> {
> +#ifndef CONFIG_PREEMPT_RT_BASE
> hlist_bl_lock(lock->complete_slot);
> hlist_bl_lock(lock->pending_slot);
> +#else
> + spin_lock(lock->lock);
if you also set the lowest bit for complete_slot + pending_slot then
patch 2 of this mini series wouldn't be required. That means we could
keep the debug code on -RT. Or am I missing something?
> +#endif
> }
Sebastian