From: Rao Shoaib <[email protected]>
This patch moves kfree_call_rcu() and related macros out of rcu code. A new
function __call_rcu_lazy() is created for calling __call_rcu() with the lazy
flag. Also moving macros generated following checkpatch noise. I do not know
how to silence checkpatch as there is nothing wrong.
CHECK: Macro argument reuse 'offset' - possible side-effects?
#91: FILE: include/linux/slab.h:348:
+#define __kfree_rcu(head, offset) \
+ do { \
+ BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); \
+ kfree_call_rcu(head, (rcu_callback_t)(unsigned long)(offset)); \
+ } while (0)
CHECK: Macro argument reuse 'ptr' - possible side-effects?
#123: FILE: include/linux/slab.h:380:
+#define kfree_rcu(ptr, rcu_head) \
+ __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head))
CHECK: Macro argument reuse 'rcu_head' - possible side-effects?
#123: FILE: include/linux/slab.h:380:
+#define kfree_rcu(ptr, rcu_head) \
+ __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head))
total: 0 errors, 0 warnings, 3 checks, 156 lines checked
Signed-off-by: Rao Shoaib <[email protected]>
---
include/linux/rcupdate.h | 41 ++---------------------------------------
include/linux/rcutree.h | 2 --
include/linux/slab.h | 37 +++++++++++++++++++++++++++++++++++++
kernel/rcu/tree.c | 24 ++++++++++--------------
mm/slab_common.c | 10 ++++++++++
5 files changed, 59 insertions(+), 55 deletions(-)
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index a6ddc42..d2c25d8 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -55,6 +55,8 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func);
#define call_rcu call_rcu_sched
#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
+void __call_rcu_lazy(struct rcu_head *head, rcu_callback_t func);
+
void call_rcu_bh(struct rcu_head *head, rcu_callback_t func);
void call_rcu_sched(struct rcu_head *head, rcu_callback_t func);
void synchronize_sched(void);
@@ -838,45 +840,6 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
#define __is_kfree_rcu_offset(offset) ((offset) < 4096)
/*
- * Helper macro for kfree_rcu() to prevent argument-expansion eyestrain.
- */
-#define __kfree_rcu(head, offset) \
- do { \
- BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); \
- kfree_call_rcu(head, (rcu_callback_t)(unsigned long)(offset)); \
- } while (0)
-
-/**
- * kfree_rcu() - kfree an object after a grace period.
- * @ptr: pointer to kfree
- * @rcu_head: the name of the struct rcu_head within the type of @ptr.
- *
- * Many rcu callbacks functions just call kfree() on the base structure.
- * These functions are trivial, but their size adds up, and furthermore
- * when they are used in a kernel module, that module must invoke the
- * high-latency rcu_barrier() function at module-unload time.
- *
- * The kfree_rcu() function handles this issue. Rather than encoding a
- * function address in the embedded rcu_head structure, kfree_rcu() instead
- * encodes the offset of the rcu_head structure within the base structure.
- * Because the functions are not allowed in the low-order 4096 bytes of
- * kernel virtual memory, offsets up to 4095 bytes can be accommodated.
- * If the offset is larger than 4095 bytes, a compile-time error will
- * be generated in __kfree_rcu(). If this error is triggered, you can
- * either fall back to use of call_rcu() or rearrange the structure to
- * position the rcu_head structure into the first 4096 bytes.
- *
- * Note that the allowable offset might decrease in the future, for example,
- * to allow something like kmem_cache_free_rcu().
- *
- * The BUILD_BUG_ON check must not involve any function calls, hence the
- * checks are done in macros here.
- */
-#define kfree_rcu(ptr, rcu_head) \
- __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head))
-
-
-/*
* Place this after a lock-acquisition primitive to guarantee that
* an UNLOCK+LOCK pair acts as a full barrier. This guarantee applies
* if the UNLOCK and LOCK are executed by the same CPU or if the
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 37d6fd3..7746b19 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -48,8 +48,6 @@ void synchronize_rcu_bh(void);
void synchronize_sched_expedited(void);
void synchronize_rcu_expedited(void);
-void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func);
-
/**
* synchronize_rcu_bh_expedited - Brute-force RCU-bh grace period
*
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 50697a1..36d6431 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -342,6 +342,43 @@ void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __malloc;
void *kmem_cache_alloc(struct kmem_cache *, gfp_t flags) __assume_slab_alignment __malloc;
void kmem_cache_free(struct kmem_cache *, void *);
+void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func);
+
+/* Helper macro for kfree_rcu() to prevent argument-expansion eyestrain. */
+#define __kfree_rcu(head, offset) \
+ do { \
+ BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); \
+ kfree_call_rcu(head, (rcu_callback_t)(unsigned long)(offset)); \
+ } while (0)
+
+/**
+ * kfree_rcu() - kfree an object after a grace period.
+ * @ptr: pointer to kfree
+ * @rcu_head: the name of the struct rcu_head within the type of @ptr.
+ *
+ * Many rcu callbacks functions just call kfree() on the base structure.
+ * These functions are trivial, but their size adds up, and furthermore
+ * when they are used in a kernel module, that module must invoke the
+ * high-latency rcu_barrier() function at module-unload time.
+ *
+ * The kfree_rcu() function handles this issue. Rather than encoding a
+ * function address in the embedded rcu_head structure, kfree_rcu() instead
+ * encodes the offset of the rcu_head structure within the base structure.
+ * Because the functions are not allowed in the low-order 4096 bytes of
+ * kernel virtual memory, offsets up to 4095 bytes can be accommodated.
+ * If the offset is larger than 4095 bytes, a compile-time error will
+ * be generated in __kfree_rcu(). If this error is triggered, you can
+ * either fall back to use of call_rcu() or rearrange the structure to
+ * position the rcu_head structure into the first 4096 bytes.
+ *
+ * Note that the allowable offset might decrease in the future, for example,
+ * to allow something like kmem_cache_free_rcu().
+ *
+ * The BUILD_BUG_ON check must not involve any function calls, hence the
+ * checks are done in macros here.
+ */
+#define kfree_rcu(ptr, rcu_head) \
+ __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head))
/*
* Bulk allocation and freeing operations. These are accelerated in an
* allocator specific way to avoid taking locks repeatedly or building
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index f9c0ca2..943137d 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3180,6 +3180,16 @@ void call_rcu_sched(struct rcu_head *head, rcu_callback_t func)
}
EXPORT_SYMBOL_GPL(call_rcu_sched);
+/* Queue an RCU callback for lazy invocation after a grace period.
+ * Currently there is no way of tagging the lazy RCU callbacks in the
+ * list of pending callbacks. Until then, this function may only be
+ * called from kfree_call_rcu().
+ */
+void __call_rcu_lazy(struct rcu_head *head, rcu_callback_t func)
+{
+ __call_rcu(head, func, &rcu_sched_state, -1, 1);
+}
+
/**
* call_rcu_bh() - Queue an RCU for invocation after a quicker grace period.
* @head: structure to be used for queueing the RCU updates.
@@ -3209,20 +3219,6 @@ void call_rcu_bh(struct rcu_head *head, rcu_callback_t func)
EXPORT_SYMBOL_GPL(call_rcu_bh);
/*
- * Queue an RCU callback for lazy invocation after a grace period.
- * This will likely be later named something like "call_rcu_lazy()",
- * but this change will require some way of tagging the lazy RCU
- * callbacks in the list of pending callbacks. Until then, this
- * function may only be called from __kfree_rcu().
- */
-void kfree_call_rcu(struct rcu_head *head,
- rcu_callback_t func)
-{
- __call_rcu(head, func, rcu_state_p, -1, 1);
-}
-EXPORT_SYMBOL_GPL(kfree_call_rcu);
-
-/*
* Because a context switch is a grace period for RCU-sched and RCU-bh,
* any blocking grace-period wait automatically implies a grace period
* if there is only one CPU online at any point time during execution
diff --git a/mm/slab_common.c b/mm/slab_common.c
index c8cb367..0bb8a75 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -1483,6 +1483,16 @@ void kzfree(const void *p)
}
EXPORT_SYMBOL(kzfree);
+/*
+ * Queue Memory to be freed by RCU after a grace period.
+ */
+void kfree_call_rcu(struct rcu_head *head,
+ rcu_callback_t func)
+{
+ __call_rcu_lazy(head, func);
+}
+EXPORT_SYMBOL_GPL(kfree_call_rcu);
+
/* Tracepoints definitions. */
EXPORT_TRACEPOINT_SYMBOL(kmalloc);
EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
--
2.7.4
On Thu, Dec 21, 2017 at 12:19:47AM -0800, [email protected] wrote:
> This patch moves kfree_call_rcu() and related macros out of rcu code. A new
> function __call_rcu_lazy() is created for calling __call_rcu() with the lazy
> flag.
Something you probably didn't know ... there are two RCU implementations
in the kernel; Tree and Tiny. It looks like you've only added
__call_rcu_lazy() to Tree and you'll also need to add it to Tiny.
> Also moving macros generated following checkpatch noise. I do not know
> how to silence checkpatch as there is nothing wrong.
>
> CHECK: Macro argument reuse 'offset' - possible side-effects?
> #91: FILE: include/linux/slab.h:348:
> +#define __kfree_rcu(head, offset) \
> + do { \
> + BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); \
> + kfree_call_rcu(head, (rcu_callback_t)(unsigned long)(offset)); \
> + } while (0)
What checkpatch is warning you about here is that somebody might call
__kfree_rcu(p, a++);
and this would expand into
do { \
BUILD_BUG_ON(!__is_kfree_rcu_offset(a++)); \
kfree_call_rcu(p, (rcu_callback_t)(unsigned long)(a++)); \
} while (0)
which would increment 'a' twice, and cause pain and suffering.
That's pretty unlikely usage of __kfree_rcu(), but I suppose it's not
impossible. We have various hacks to get around this kind of thing;
for example I might do this as::
#define __kfree_rcu(head, offset) \
do { \
unsigned long __o = offset;
BUILD_BUG_ON(!__is_kfree_rcu_offset(__o)); \
kfree_call_rcu(head, (rcu_callback_t)(unsigned long)(__o)); \
} while (0)
Now offset is only evaluated once per invocation of the macro. The other
two warnings are the same problem.
On Thu, Dec 21, 2017 at 12:19:47AM -0800, [email protected] wrote:
> From: Rao Shoaib <[email protected]>
>
> This patch moves kfree_call_rcu() and related macros out of rcu code.
I do very much like this idea!
> A new
> function __call_rcu_lazy() is created for calling __call_rcu() with the lazy
> flag. Also moving macros generated following checkpatch noise. I do not know
> how to silence checkpatch as there is nothing wrong.
>
> CHECK: Macro argument reuse 'offset' - possible side-effects?
> #91: FILE: include/linux/slab.h:348:
> +#define __kfree_rcu(head, offset) \
> + do { \
> + BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); \
> + kfree_call_rcu(head, (rcu_callback_t)(unsigned long)(offset)); \
> + } while (0)
>
> CHECK: Macro argument reuse 'ptr' - possible side-effects?
> #123: FILE: include/linux/slab.h:380:
> +#define kfree_rcu(ptr, rcu_head) \
> + __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head))
>
> CHECK: Macro argument reuse 'rcu_head' - possible side-effects?
> #123: FILE: include/linux/slab.h:380:
> +#define kfree_rcu(ptr, rcu_head) \
> + __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head))
Matthew Wilcox already covered these.
Please see below.
> total: 0 errors, 0 warnings, 3 checks, 156 lines checked
>
>
> Signed-off-by: Rao Shoaib <[email protected]>
> ---
> include/linux/rcupdate.h | 41 ++---------------------------------------
> include/linux/rcutree.h | 2 --
> include/linux/slab.h | 37 +++++++++++++++++++++++++++++++++++++
> kernel/rcu/tree.c | 24 ++++++++++--------------
> mm/slab_common.c | 10 ++++++++++
> 5 files changed, 59 insertions(+), 55 deletions(-)
>
> diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
> index a6ddc42..d2c25d8 100644
> --- a/include/linux/rcupdate.h
> +++ b/include/linux/rcupdate.h
> @@ -55,6 +55,8 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func);
> #define call_rcu call_rcu_sched
> #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
>
> +void __call_rcu_lazy(struct rcu_head *head, rcu_callback_t func);
We don't really need the "__" prefix here.
> void call_rcu_bh(struct rcu_head *head, rcu_callback_t func);
> void call_rcu_sched(struct rcu_head *head, rcu_callback_t func);
> void synchronize_sched(void);
> @@ -838,45 +840,6 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
> #define __is_kfree_rcu_offset(offset) ((offset) < 4096)
>
> /*
> - * Helper macro for kfree_rcu() to prevent argument-expansion eyestrain.
> - */
> -#define __kfree_rcu(head, offset) \
> - do { \
> - BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); \
> - kfree_call_rcu(head, (rcu_callback_t)(unsigned long)(offset)); \
> - } while (0)
> -
> -/**
> - * kfree_rcu() - kfree an object after a grace period.
> - * @ptr: pointer to kfree
> - * @rcu_head: the name of the struct rcu_head within the type of @ptr.
> - *
> - * Many rcu callbacks functions just call kfree() on the base structure.
> - * These functions are trivial, but their size adds up, and furthermore
> - * when they are used in a kernel module, that module must invoke the
> - * high-latency rcu_barrier() function at module-unload time.
> - *
> - * The kfree_rcu() function handles this issue. Rather than encoding a
> - * function address in the embedded rcu_head structure, kfree_rcu() instead
> - * encodes the offset of the rcu_head structure within the base structure.
> - * Because the functions are not allowed in the low-order 4096 bytes of
> - * kernel virtual memory, offsets up to 4095 bytes can be accommodated.
> - * If the offset is larger than 4095 bytes, a compile-time error will
> - * be generated in __kfree_rcu(). If this error is triggered, you can
> - * either fall back to use of call_rcu() or rearrange the structure to
> - * position the rcu_head structure into the first 4096 bytes.
> - *
> - * Note that the allowable offset might decrease in the future, for example,
> - * to allow something like kmem_cache_free_rcu().
> - *
> - * The BUILD_BUG_ON check must not involve any function calls, hence the
> - * checks are done in macros here.
> - */
> -#define kfree_rcu(ptr, rcu_head) \
> - __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head))
> -
> -
> -/*
> * Place this after a lock-acquisition primitive to guarantee that
> * an UNLOCK+LOCK pair acts as a full barrier. This guarantee applies
> * if the UNLOCK and LOCK are executed by the same CPU or if the
> diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
> index 37d6fd3..7746b19 100644
> --- a/include/linux/rcutree.h
> +++ b/include/linux/rcutree.h
> @@ -48,8 +48,6 @@ void synchronize_rcu_bh(void);
> void synchronize_sched_expedited(void);
> void synchronize_rcu_expedited(void);
>
> -void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func);
> -
> /**
> * synchronize_rcu_bh_expedited - Brute-force RCU-bh grace period
> *
> diff --git a/include/linux/slab.h b/include/linux/slab.h
> index 50697a1..36d6431 100644
> --- a/include/linux/slab.h
> +++ b/include/linux/slab.h
> @@ -342,6 +342,43 @@ void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __malloc;
> void *kmem_cache_alloc(struct kmem_cache *, gfp_t flags) __assume_slab_alignment __malloc;
> void kmem_cache_free(struct kmem_cache *, void *);
>
> +void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func);
> +
> +/* Helper macro for kfree_rcu() to prevent argument-expansion eyestrain. */
> +#define __kfree_rcu(head, offset) \
> + do { \
> + BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); \
> + kfree_call_rcu(head, (rcu_callback_t)(unsigned long)(offset)); \
> + } while (0)
> +
> +/**
> + * kfree_rcu() - kfree an object after a grace period.
> + * @ptr: pointer to kfree
> + * @rcu_head: the name of the struct rcu_head within the type of @ptr.
> + *
> + * Many rcu callbacks functions just call kfree() on the base structure.
> + * These functions are trivial, but their size adds up, and furthermore
> + * when they are used in a kernel module, that module must invoke the
> + * high-latency rcu_barrier() function at module-unload time.
> + *
> + * The kfree_rcu() function handles this issue. Rather than encoding a
> + * function address in the embedded rcu_head structure, kfree_rcu() instead
> + * encodes the offset of the rcu_head structure within the base structure.
> + * Because the functions are not allowed in the low-order 4096 bytes of
> + * kernel virtual memory, offsets up to 4095 bytes can be accommodated.
> + * If the offset is larger than 4095 bytes, a compile-time error will
> + * be generated in __kfree_rcu(). If this error is triggered, you can
> + * either fall back to use of call_rcu() or rearrange the structure to
> + * position the rcu_head structure into the first 4096 bytes.
> + *
> + * Note that the allowable offset might decrease in the future, for example,
> + * to allow something like kmem_cache_free_rcu().
> + *
> + * The BUILD_BUG_ON check must not involve any function calls, hence the
> + * checks are done in macros here.
> + */
> +#define kfree_rcu(ptr, rcu_head) \
> + __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head))
> /*
> * Bulk allocation and freeing operations. These are accelerated in an
> * allocator specific way to avoid taking locks repeatedly or building
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index f9c0ca2..943137d 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -3180,6 +3180,16 @@ void call_rcu_sched(struct rcu_head *head, rcu_callback_t func)
> }
> EXPORT_SYMBOL_GPL(call_rcu_sched);
>
> +/* Queue an RCU callback for lazy invocation after a grace period.
> + * Currently there is no way of tagging the lazy RCU callbacks in the
> + * list of pending callbacks. Until then, this function may only be
> + * called from kfree_call_rcu().
But now we might have a way.
If the value in ->func is too small to be a valid function, RCU invokes
a fixed function name. This function can then look at ->func and do
whatever it wants, for example, maintaining an array indexed by the
->func value that says what function to call and what else to pass it,
including for example the slab pointer and offset.
Thoughts?
Thanx, Paul
> + */
> +void __call_rcu_lazy(struct rcu_head *head, rcu_callback_t func)
> +{
> + __call_rcu(head, func, &rcu_sched_state, -1, 1);
> +}
> +
> /**
> * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period.
> * @head: structure to be used for queueing the RCU updates.
> @@ -3209,20 +3219,6 @@ void call_rcu_bh(struct rcu_head *head, rcu_callback_t func)
> EXPORT_SYMBOL_GPL(call_rcu_bh);
>
> /*
> - * Queue an RCU callback for lazy invocation after a grace period.
> - * This will likely be later named something like "call_rcu_lazy()",
> - * but this change will require some way of tagging the lazy RCU
> - * callbacks in the list of pending callbacks. Until then, this
> - * function may only be called from __kfree_rcu().
> - */
> -void kfree_call_rcu(struct rcu_head *head,
> - rcu_callback_t func)
> -{
> - __call_rcu(head, func, rcu_state_p, -1, 1);
> -}
> -EXPORT_SYMBOL_GPL(kfree_call_rcu);
> -
> -/*
> * Because a context switch is a grace period for RCU-sched and RCU-bh,
> * any blocking grace-period wait automatically implies a grace period
> * if there is only one CPU online at any point time during execution
> diff --git a/mm/slab_common.c b/mm/slab_common.c
> index c8cb367..0bb8a75 100644
> --- a/mm/slab_common.c
> +++ b/mm/slab_common.c
> @@ -1483,6 +1483,16 @@ void kzfree(const void *p)
> }
> EXPORT_SYMBOL(kzfree);
>
> +/*
> + * Queue Memory to be freed by RCU after a grace period.
> + */
> +void kfree_call_rcu(struct rcu_head *head,
> + rcu_callback_t func)
> +{
> + __call_rcu_lazy(head, func);
> +}
> +EXPORT_SYMBOL_GPL(kfree_call_rcu);
> +
> /* Tracepoints definitions. */
> EXPORT_TRACEPOINT_SYMBOL(kmalloc);
> EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
> --
> 2.7.4
>
On Thu, Dec 21, 2017 at 07:54:34AM -0800, Paul E. McKenney wrote:
> > +/* Queue an RCU callback for lazy invocation after a grace period.
> > + * Currently there is no way of tagging the lazy RCU callbacks in the
> > + * list of pending callbacks. Until then, this function may only be
> > + * called from kfree_call_rcu().
>
> But now we might have a way.
>
> If the value in ->func is too small to be a valid function, RCU invokes
> a fixed function name. This function can then look at ->func and do
> whatever it wants, for example, maintaining an array indexed by the
> ->func value that says what function to call and what else to pass it,
> including for example the slab pointer and offset.
>
> Thoughts?
Thought 1 is that we can force functions to be quad-byte aligned on all
architectures (gcc option -falign-functions=...), so we can have more
than the 4096 different values we currently use. We can get 63.5 bits of
information into that ->func argument if we align functions to at least
4 bytes, or 63 if we only force alignment to a 2-byte boundary. I'm not
sure if we support any architecture other than x86 with byte-aligned
instructions. (I'm assuming that function descriptors as used on POWER
and ia64 will also be sensibly aligned).
Thought 2 is that the slab is quite capable of getting the slab pointer
from the address of the object -- virt_to_head_page(p)->slab_cache
So sorting objects by address is as good as storing their slab caches
and offsets.
Thought 3 is that we probably don't want to overengineer this.
Just allocating a 14-entry buffer (along with an RCU head) is probably
enough to give us at least 90% of the wins that a more complex solution
would give.
On 12/21/2017 04:36 AM, Matthew Wilcox wrote:
> On Thu, Dec 21, 2017 at 12:19:47AM -0800, [email protected] wrote:
>> This patch moves kfree_call_rcu() and related macros out of rcu code. A new
>> function __call_rcu_lazy() is created for calling __call_rcu() with the lazy
>> flag.
> Something you probably didn't know ... there are two RCU implementations
> in the kernel; Tree and Tiny. It looks like you've only added
> __call_rcu_lazy() to Tree and you'll also need to add it to Tiny.
I left it out on purpose because the call in tiny is a little different
rcutiny.h:
static inline void kfree_call_rcu(struct rcu_head *head,
void (*func)(struct rcu_head *rcu))
{
call_rcu(head, func);
}
tree.c:
void kfree_call_rcu(struct rcu_head *head,
void (*func)(struct rcu_head *rcu))
{
__call_rcu(head, func, rcu_state_p, -1, 1);
}
EXPORT_SYMBOL_GPL(kfree_call_rcu);
If we want the code to be exactly same I can create a lazy version for
tiny as well. However, I don not know where to move kfree_call_rcu()
from it's current home in rcutiny.h though. Any thoughts ?
>
>> Also moving macros generated following checkpatch noise. I do not know
>> how to silence checkpatch as there is nothing wrong.
>>
>> CHECK: Macro argument reuse 'offset' - possible side-effects?
>> #91: FILE: include/linux/slab.h:348:
>> +#define __kfree_rcu(head, offset) \
>> + do { \
>> + BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); \
>> + kfree_call_rcu(head, (rcu_callback_t)(unsigned long)(offset)); \
>> + } while (0)
> What checkpatch is warning you about here is that somebody might call
>
> __kfree_rcu(p, a++);
>
> and this would expand into
>
> do { \
> BUILD_BUG_ON(!__is_kfree_rcu_offset(a++)); \
> kfree_call_rcu(p, (rcu_callback_t)(unsigned long)(a++)); \
> } while (0)
>
> which would increment 'a' twice, and cause pain and suffering.
>
> That's pretty unlikely usage of __kfree_rcu(), but I suppose it's not
> impossible. We have various hacks to get around this kind of thing;
> for example I might do this as::
>
> #define __kfree_rcu(head, offset) \
> do { \
> unsigned long __o = offset;
> BUILD_BUG_ON(!__is_kfree_rcu_offset(__o)); \
> kfree_call_rcu(head, (rcu_callback_t)(unsigned long)(__o)); \
> } while (0)
>
> Now offset is only evaluated once per invocation of the macro. The other
> two warnings are the same problem.
>
Thanks. I was not sure if I was required to fix the noise or based on
inspection the noise could be ignored. I will make the change and resubmit.
Shoaib
On Thu, Dec 21, 2017 at 09:06:28AM -0800, Matthew Wilcox wrote:
> On Thu, Dec 21, 2017 at 07:54:34AM -0800, Paul E. McKenney wrote:
> > > +/* Queue an RCU callback for lazy invocation after a grace period.
> > > + * Currently there is no way of tagging the lazy RCU callbacks in the
> > > + * list of pending callbacks. Until then, this function may only be
> > > + * called from kfree_call_rcu().
> >
> > But now we might have a way.
> >
> > If the value in ->func is too small to be a valid function, RCU invokes
> > a fixed function name. This function can then look at ->func and do
> > whatever it wants, for example, maintaining an array indexed by the
> > ->func value that says what function to call and what else to pass it,
> > including for example the slab pointer and offset.
> >
> > Thoughts?
>
> Thought 1 is that we can force functions to be quad-byte aligned on all
> architectures (gcc option -falign-functions=...), so we can have more
> than the 4096 different values we currently use. We can get 63.5 bits of
> information into that ->func argument if we align functions to at least
> 4 bytes, or 63 if we only force alignment to a 2-byte boundary. I'm not
> sure if we support any architecture other than x86 with byte-aligned
> instructions. (I'm assuming that function descriptors as used on POWER
> and ia64 will also be sensibly aligned).
I do like this approach, especially should some additional subsystems
need this sort of special handling from RCU. It is also much faster
to demultiplex than alternative schemes based on address ranges and
the like.
How many bits are required by slab? Would ~56 bits (less the bottom
bit pattern reserved for function pointers) suffice on 64-bit systems
and ~24 bits on 32-bit systems? That would allow up to 256 specially
handled situations, which should be enough. (Famous last words!)
> Thought 2 is that the slab is quite capable of getting the slab pointer
> from the address of the object -- virt_to_head_page(p)->slab_cache
> So sorting objects by address is as good as storing their slab caches
> and offsets.
Different slabs can in some cases interleave their slabs of objects,
right? It might well be that grouping together different slabs from
the same slab cache doesn't help, but seems worth my asking the question.
> Thought 3 is that we probably don't want to overengineer this.
> Just allocating a 14-entry buffer (along with an RCU head) is probably
> enough to give us at least 90% of the wins that a more complex solution
> would give.
Can we benchmark this? After all, memory allocation can sometimes
counter one's intuition.
One alternative approach would be to allocate such a buffer per
slab cache, and run each slab caches through RCU independently.
Seems like this should allow some savings. Might not be worthwhile,
but again seemed worth asking the question.
Thanx, Paul
On Thu, Dec 21, 2017 at 05:27:41PM -0800, Paul E. McKenney wrote:
> On Thu, Dec 21, 2017 at 09:06:28AM -0800, Matthew Wilcox wrote:
> > On Thu, Dec 21, 2017 at 07:54:34AM -0800, Paul E. McKenney wrote:
> > > > +/* Queue an RCU callback for lazy invocation after a grace period.
> > > > + * Currently there is no way of tagging the lazy RCU callbacks in the
> > > > + * list of pending callbacks. Until then, this function may only be
> > > > + * called from kfree_call_rcu().
> > >
> > > But now we might have a way.
> > >
> > > If the value in ->func is too small to be a valid function, RCU invokes
> > > a fixed function name. This function can then look at ->func and do
> > > whatever it wants, for example, maintaining an array indexed by the
> > > ->func value that says what function to call and what else to pass it,
> > > including for example the slab pointer and offset.
> > >
> > > Thoughts?
> >
> > Thought 1 is that we can force functions to be quad-byte aligned on all
> > architectures (gcc option -falign-functions=...), so we can have more
> > than the 4096 different values we currently use. We can get 63.5 bits of
> > information into that ->func argument if we align functions to at least
> > 4 bytes, or 63 if we only force alignment to a 2-byte boundary. I'm not
> > sure if we support any architecture other than x86 with byte-aligned
> > instructions. (I'm assuming that function descriptors as used on POWER
> > and ia64 will also be sensibly aligned).
>
> I do like this approach, especially should some additional subsystems
> need this sort of special handling from RCU. It is also much faster
> to demultiplex than alternative schemes based on address ranges and
> the like.
Oh, and having four-byte alignment would allow making laziness orthogonal
to special handling, which should improve energy efficiency of callback
handling by allowing normal call_rcu() callbacks to invoke laziness.
(And would require renaming the call_rcu_lazy() API yet again, sorry Rao!)
Thanx, Paul
> How many bits are required by slab? Would ~56 bits (less the bottom
> bit pattern reserved for function pointers) suffice on 64-bit systems
> and ~24 bits on 32-bit systems? That would allow up to 256 specially
> handled situations, which should be enough. (Famous last words!)
>
> > Thought 2 is that the slab is quite capable of getting the slab pointer
> > from the address of the object -- virt_to_head_page(p)->slab_cache
> > So sorting objects by address is as good as storing their slab caches
> > and offsets.
>
> Different slabs can in some cases interleave their slabs of objects,
> right? It might well be that grouping together different slabs from
> the same slab cache doesn't help, but seems worth my asking the question.
>
> > Thought 3 is that we probably don't want to overengineer this.
> > Just allocating a 14-entry buffer (along with an RCU head) is probably
> > enough to give us at least 90% of the wins that a more complex solution
> > would give.
>
> Can we benchmark this? After all, memory allocation can sometimes
> counter one's intuition.
>
> One alternative approach would be to allocate such a buffer per
> slab cache, and run each slab caches through RCU independently.
> Seems like this should allow some savings. Might not be worthwhile,
> but again seemed worth asking the question.
>
> Thanx, Paul
On Thu, Dec 21, 2017 at 09:31:23AM -0800, Rao Shoaib wrote:
>
>
> On 12/21/2017 04:36 AM, Matthew Wilcox wrote:
> >On Thu, Dec 21, 2017 at 12:19:47AM -0800, [email protected] wrote:
> >>This patch moves kfree_call_rcu() and related macros out of rcu code. A new
> >>function __call_rcu_lazy() is created for calling __call_rcu() with the lazy
> >>flag.
> >Something you probably didn't know ... there are two RCU implementations
> >in the kernel; Tree and Tiny. It looks like you've only added
> >__call_rcu_lazy() to Tree and you'll also need to add it to Tiny.
> I left it out on purpose because the call in tiny is a little different
>
> rcutiny.h:
>
> static inline void kfree_call_rcu(struct rcu_head *head,
> ??? ??? ??? ??? ? void (*func)(struct rcu_head *rcu))
> {
> ??? call_rcu(head, func);
> }
>
> tree.c:
>
> void kfree_call_rcu(struct rcu_head *head,
> ??? ??? ??? void (*func)(struct rcu_head *rcu))
> {
> ??? __call_rcu(head, func, rcu_state_p, -1, 1);
> }
> EXPORT_SYMBOL_GPL(kfree_call_rcu);
>
> If we want the code to be exactly same I can create a lazy version
> for tiny as well. However,? I don not know where to move
> kfree_call_rcu() from it's current home in rcutiny.h though. Any
> thoughts ?
I might be missing something subtle here, but in case I am not, my
suggestion is to simply rename rcutiny.h's kfree_call_rcu() and otherwise
leave it as is. If you want to update the type of the second argument,
which got missed back in the day, there is always this:
static inline void call_rcu_lazy(struct rcu_head *head, rcu_callback_t func)
{
call_rcu(head, func);
}
The reason that Tiny RCU doesn't handle laziness specially is because
Tree RCU's handling of laziness is a big no-op on the single CPU systems
on which Tiny RCU runs. So Tiny RCU need do nothing special to support
laziness.
Thanx, Paul
> >>Also moving macros generated following checkpatch noise. I do not know
> >>how to silence checkpatch as there is nothing wrong.
> >>
> >>CHECK: Macro argument reuse 'offset' - possible side-effects?
> >>#91: FILE: include/linux/slab.h:348:
> >>+#define __kfree_rcu(head, offset) \
> >>+ do { \
> >>+ BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); \
> >>+ kfree_call_rcu(head, (rcu_callback_t)(unsigned long)(offset)); \
> >>+ } while (0)
> >What checkpatch is warning you about here is that somebody might call
> >
> >__kfree_rcu(p, a++);
> >
> >and this would expand into
> >
> > do { \
> > BUILD_BUG_ON(!__is_kfree_rcu_offset(a++)); \
> > kfree_call_rcu(p, (rcu_callback_t)(unsigned long)(a++)); \
> > } while (0)
> >
> >which would increment 'a' twice, and cause pain and suffering.
> >
> >That's pretty unlikely usage of __kfree_rcu(), but I suppose it's not
> >impossible. We have various hacks to get around this kind of thing;
> >for example I might do this as::
> >
> >#define __kfree_rcu(head, offset) \
> > do { \
> > unsigned long __o = offset;
> > BUILD_BUG_ON(!__is_kfree_rcu_offset(__o)); \
> > kfree_call_rcu(head, (rcu_callback_t)(unsigned long)(__o)); \
> > } while (0)
> >
> >Now offset is only evaluated once per invocation of the macro. The other
> >two warnings are the same problem.
> >
> Thanks. I was not sure if I was required to fix the noise or based
> on inspection the noise could be ignored. I will make the change and
> resubmit.
>
> Shoaib
>
On Thu, Dec 21, 2017 at 07:17:35PM -0800, Rao Shoaib wrote:
>
>
> On 12/21/2017 05:39 PM, Paul E. McKenney wrote:
> >>I left it out on purpose because the call in tiny is a little different
> >>
> >>rcutiny.h:
> >>
> >>static inline void kfree_call_rcu(struct rcu_head *head,
> >> ??? ??? ??? ??? ? void (*func)(struct rcu_head *rcu))
> >>{
> >> ??? call_rcu(head, func);
> >>}
> >>
> >>tree.c:
> >>
> >>void kfree_call_rcu(struct rcu_head *head,
> >> ??? ??? ??? void (*func)(struct rcu_head *rcu))
> >>{
> >> ??? __call_rcu(head, func, rcu_state_p, -1, 1);
> >>}
> >>EXPORT_SYMBOL_GPL(kfree_call_rcu);
> >>
> >>If we want the code to be exactly same I can create a lazy version
> >>for tiny as well. However,? I don not know where to move
> >>kfree_call_rcu() from it's current home in rcutiny.h though. Any
> >>thoughts ?
> >I might be missing something subtle here, but in case I am not, my
> >suggestion is to simply rename rcutiny.h's kfree_call_rcu() and otherwise
> >leave it as is. If you want to update the type of the second argument,
> >which got missed back in the day, there is always this:
> >
> >static inline void call_rcu_lazy(struct rcu_head *head, rcu_callback_t func)
> >{
> > call_rcu(head, func);
> >}
> >
> >The reason that Tiny RCU doesn't handle laziness specially is because
> >Tree RCU's handling of laziness is a big no-op on the single CPU systems
> >on which Tiny RCU runs. So Tiny RCU need do nothing special to support
> >laziness.
> >
> > Thanx, Paul
> >
> Hi Paul,
>
> I can not just change the name as __kfree_call_rcu macro calls
> kfree_call_rcu(). I have made tiny version of kfree_call_rcu() call
> rcu_call_lazy() which calls call_rcu(). As far as the type is
> concerned, my bad, I cut and posted from an older release. Latest
> code is already using the typedef.
Hello, Rao,
Perhaps it would be best if you simply reposted the latest patch. ;-)
Thanx, Paul