On 2024/1/19 06:14, Christoph Lameter (Ampere) wrote:
> On Thu, 18 Jan 2024, Chengming Zhou wrote:
>
>> So get_freelist() has two cases to handle: cpu slab and cpu partial list slab.
>> The latter is NOT frozen, so need to remove "VM_BUG_ON(!new.frozen)" from it.
>
> Right so keep the check if it is the former?
>
Ok, I get it. Maybe like this:
diff --git a/mm/slub.c b/mm/slub.c
index 2ef88bbf56a3..7fa9dbc2e938 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3313,7 +3313,7 @@ __update_cpu_freelist_fast(struct kmem_cache *s,
*
* If this function returns NULL then the slab has been unfrozen.
*/
-static inline void *get_freelist(struct kmem_cache *s, struct slab *slab)
+static inline void *get_freelist(struct kmem_cache *s, struct slab *slab, int frozen)
{
struct slab new;
unsigned long counters;
@@ -3326,7 +3326,7 @@ static inline void *get_freelist(struct kmem_cache *s, struct slab *slab)
counters = slab->counters;
new.counters = counters;
- VM_BUG_ON(!new.frozen);
+ VM_BUG_ON(frozen && !new.frozen);
new.inuse = slab->objects;
new.frozen = freelist != NULL;
@@ -3440,7 +3440,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
if (freelist)
goto load_freelist;
- freelist = get_freelist(s, slab);
+ freelist = get_freelist(s, slab, 1);
if (!freelist) {
c->slab = NULL;
@@ -3498,18 +3498,19 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
slab = slub_percpu_partial(c);
slub_set_percpu_partial(c, slab);
- local_unlock_irqrestore(&s->cpu_slab->lock, flags);
- stat(s, CPU_PARTIAL_ALLOC);
- if (unlikely(!node_match(slab, node) ||
- !pfmemalloc_match(slab, gfpflags))) {
- slab->next = NULL;
- __put_partials(s, slab);
- continue;
+ if (likely(node_match(slab, node) &&
+ pfmemalloc_match(slab, gfpflags))) {
+ c->slab = slab;
+ freelist = get_freelist(s, slab, 0);
+ stat(s, CPU_PARTIAL_ALLOC);
+ goto load_freelist;
}
- freelist = freeze_slab(s, slab);
- goto retry_load_slab;
+ local_unlock_irqrestore(&s->cpu_slab->lock, flags);
+
+ slab->next = NULL;
+ __put_partials(s, slab);
}
#endif
On 1/19/24 04:53, Chengming Zhou wrote:
> On 2024/1/19 06:14, Christoph Lameter (Ampere) wrote:
>> On Thu, 18 Jan 2024, Chengming Zhou wrote:
>>
>>> So get_freelist() has two cases to handle: cpu slab and cpu partial list slab.
>>> The latter is NOT frozen, so need to remove "VM_BUG_ON(!new.frozen)" from it.
>>
>> Right so keep the check if it is the former?
>>
>
> Ok, I get it. Maybe like this:
I think that's just too ugly for a VM_BUG_ON(). I'd just remove the check
and be done with that.
I have a somewhat different point. You reused get_freelist() but in fact
it's more like freeze_slab(), but that one uses slab_update_freelist() and
we are under the local_lock so we want the cheaper __slab_update_freelist(),
which get_freelist() has and I guess that's why you reused that one.
However get_freelist() also assumes it can return NULL if the freelist is
empty. If that's possible to happen on the percpu partial list, we should
not "goto load_freelist;" but rather create a new label above that, above
the "if (!freelist) {" block that handles the case.
If that's not possible to happen (needs careful audit) and we have guarantee
that slabs on percpu partial list must have non-empty freelist, then we
probably instead want a new __freeze_slab() variant that is like
freeze_slab(), but uses __slab_update_freelist() and probably also has
VM_BUG_ON(!freelist) before returning it?
>
> diff --git a/mm/slub.c b/mm/slub.c
> index 2ef88bbf56a3..7fa9dbc2e938 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -3313,7 +3313,7 @@ __update_cpu_freelist_fast(struct kmem_cache *s,
> *
> * If this function returns NULL then the slab has been unfrozen.
> */
> -static inline void *get_freelist(struct kmem_cache *s, struct slab *slab)
> +static inline void *get_freelist(struct kmem_cache *s, struct slab *slab, int frozen)
> {
> struct slab new;
> unsigned long counters;
> @@ -3326,7 +3326,7 @@ static inline void *get_freelist(struct kmem_cache *s, struct slab *slab)
> counters = slab->counters;
>
> new.counters = counters;
> - VM_BUG_ON(!new.frozen);
> + VM_BUG_ON(frozen && !new.frozen);
>
> new.inuse = slab->objects;
> new.frozen = freelist != NULL;
> @@ -3440,7 +3440,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
> if (freelist)
> goto load_freelist;
>
> - freelist = get_freelist(s, slab);
> + freelist = get_freelist(s, slab, 1);
>
> if (!freelist) {
> c->slab = NULL;
> @@ -3498,18 +3498,19 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
>
> slab = slub_percpu_partial(c);
> slub_set_percpu_partial(c, slab);
> - local_unlock_irqrestore(&s->cpu_slab->lock, flags);
> - stat(s, CPU_PARTIAL_ALLOC);
>
> - if (unlikely(!node_match(slab, node) ||
> - !pfmemalloc_match(slab, gfpflags))) {
> - slab->next = NULL;
> - __put_partials(s, slab);
> - continue;
> + if (likely(node_match(slab, node) &&
> + pfmemalloc_match(slab, gfpflags))) {
> + c->slab = slab;
> + freelist = get_freelist(s, slab, 0);
> + stat(s, CPU_PARTIAL_ALLOC);
> + goto load_freelist;
> }
>
> - freelist = freeze_slab(s, slab);
> - goto retry_load_slab;
> + local_unlock_irqrestore(&s->cpu_slab->lock, flags);
> +
> + slab->next = NULL;
> + __put_partials(s, slab);
> }
> #endif
On 2024/1/23 01:13, Vlastimil Babka wrote:
> On 1/19/24 04:53, Chengming Zhou wrote:
>> On 2024/1/19 06:14, Christoph Lameter (Ampere) wrote:
>>> On Thu, 18 Jan 2024, Chengming Zhou wrote:
>>>
>>>> So get_freelist() has two cases to handle: cpu slab and cpu partial list slab.
>>>> The latter is NOT frozen, so need to remove "VM_BUG_ON(!new.frozen)" from it.
>>>
>>> Right so keep the check if it is the former?
>>>
>>
>> Ok, I get it. Maybe like this:
>
> I think that's just too ugly for a VM_BUG_ON(). I'd just remove the check
> and be done with that.
Ok with me.
>
> I have a somewhat different point. You reused get_freelist() but in fact
> it's more like freeze_slab(), but that one uses slab_update_freelist() and
> we are under the local_lock so we want the cheaper __slab_update_freelist(),
> which get_freelist() has and I guess that's why you reused that one.
Right, we already have the lock_lock, so reuse get_freelist().
>
> However get_freelist() also assumes it can return NULL if the freelist is
> empty. If that's possible to happen on the percpu partial list, we should
> not "goto load_freelist;" but rather create a new label above that, above
> the "if (!freelist) {" block that handles the case.
>
> If that's not possible to happen (needs careful audit) and we have guarantee
Yes, it's not possible for now.
> that slabs on percpu partial list must have non-empty freelist, then we
> probably instead want a new __freeze_slab() variant that is like
> freeze_slab(), but uses __slab_update_freelist() and probably also has
> VM_BUG_ON(!freelist) before returning it?
>
Instead of introducing another new function, how about still reusing get_freelist()
and VM_BUG_ON(!freelist) after calling it? I feel this is simpler.
Thanks!
On 2024/1/23 10:51, Chengming Zhou wrote:
> On 2024/1/23 01:13, Vlastimil Babka wrote:
>> On 1/19/24 04:53, Chengming Zhou wrote:
>>> On 2024/1/19 06:14, Christoph Lameter (Ampere) wrote:
>>>> On Thu, 18 Jan 2024, Chengming Zhou wrote:
>>>>
>>>>> So get_freelist() has two cases to handle: cpu slab and cpu partial list slab.
>>>>> The latter is NOT frozen, so need to remove "VM_BUG_ON(!new.frozen)" from it.
>>>>
>>>> Right so keep the check if it is the former?
>>>>
>>>
>>> Ok, I get it. Maybe like this:
>>
>> I think that's just too ugly for a VM_BUG_ON(). I'd just remove the check
>> and be done with that.
>
> Ok with me.
>
>>
>> I have a somewhat different point. You reused get_freelist() but in fact
>> it's more like freeze_slab(), but that one uses slab_update_freelist() and
>> we are under the local_lock so we want the cheaper __slab_update_freelist(),
>> which get_freelist() has and I guess that's why you reused that one.
>
> Right, we already have the lock_lock, so reuse get_freelist().
>
>>
>> However get_freelist() also assumes it can return NULL if the freelist is
>> empty. If that's possible to happen on the percpu partial list, we should
>> not "goto load_freelist;" but rather create a new label above that, above
>> the "if (!freelist) {" block that handles the case.
>>
>> If that's not possible to happen (needs careful audit) and we have guarantee
>
> Yes, it's not possible for now.
>
>> that slabs on percpu partial list must have non-empty freelist, then we
>> probably instead want a new __freeze_slab() variant that is like
>> freeze_slab(), but uses __slab_update_freelist() and probably also has
>> VM_BUG_ON(!freelist) before returning it?
>>
>
> Instead of introducing another new function, how about still reusing get_freelist()
> and VM_BUG_ON(!freelist) after calling it? I feel this is simpler.
>
> Thanks!
Does this look fine?
diff --git a/mm/slub.c b/mm/slub.c
index 2ef88bbf56a3..fda402b2d649 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3326,7 +3326,6 @@ static inline void *get_freelist(struct kmem_cache *s, struct slab *slab)
counters = slab->counters;
new.counters = counters;
- VM_BUG_ON(!new.frozen);
new.inuse = slab->objects;
new.frozen = freelist != NULL;
@@ -3498,18 +3497,20 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
slab = slub_percpu_partial(c);
slub_set_percpu_partial(c, slab);
- local_unlock_irqrestore(&s->cpu_slab->lock, flags);
- stat(s, CPU_PARTIAL_ALLOC);
- if (unlikely(!node_match(slab, node) ||
- !pfmemalloc_match(slab, gfpflags))) {
- slab->next = NULL;
- __put_partials(s, slab);
- continue;
+ if (likely(node_match(slab, node) &&
+ pfmemalloc_match(slab, gfpflags))) {
+ c->slab = slab;
+ freelist = get_freelist(s, slab);
+ VM_BUG_ON(!freelist);
+ stat(s, CPU_PARTIAL_ALLOC);
+ goto load_freelist;
}
- freelist = freeze_slab(s, slab);
- goto retry_load_slab;
+ local_unlock_irqrestore(&s->cpu_slab->lock, flags);
+
+ slab->next = NULL;
+ __put_partials(s, slab);
}
#endif
On 1/23/24 03:51, Chengming Zhou wrote:
> On 2024/1/23 01:13, Vlastimil Babka wrote:
>> On 1/19/24 04:53, Chengming Zhou wrote:
>>> On 2024/1/19 06:14, Christoph Lameter (Ampere) wrote:
>>>> On Thu, 18 Jan 2024, Chengming Zhou wrote:
>>>>
>>>>> So get_freelist() has two cases to handle: cpu slab and cpu partial list slab.
>>>>> The latter is NOT frozen, so need to remove "VM_BUG_ON(!new.frozen)" from it.
>>>>
>>>> Right so keep the check if it is the former?
>>>>
>>>
>>> Ok, I get it. Maybe like this:
>>
>> I think that's just too ugly for a VM_BUG_ON(). I'd just remove the check
>> and be done with that.
>
> Ok with me.
>
>>
>> I have a somewhat different point. You reused get_freelist() but in fact
>> it's more like freeze_slab(), but that one uses slab_update_freelist() and
>> we are under the local_lock so we want the cheaper __slab_update_freelist(),
>> which get_freelist() has and I guess that's why you reused that one.
>
> Right, we already have the lock_lock, so reuse get_freelist().
>
>>
>> However get_freelist() also assumes it can return NULL if the freelist is
>> empty. If that's possible to happen on the percpu partial list, we should
>> not "goto load_freelist;" but rather create a new label above that, above
>> the "if (!freelist) {" block that handles the case.
>>
>> If that's not possible to happen (needs careful audit) and we have guarantee
>
> Yes, it's not possible for now.
>
>> that slabs on percpu partial list must have non-empty freelist, then we
>> probably instead want a new __freeze_slab() variant that is like
>> freeze_slab(), but uses __slab_update_freelist() and probably also has
>> VM_BUG_ON(!freelist) before returning it?
>>
>
> Instead of introducing another new function, how about still reusing get_freelist()
> and VM_BUG_ON(!freelist) after calling it? I feel this is simpler.
Could you measure if introducing new function that sets new.frozen = 1; has
any performance benefit? If not, we can reuse get_freelist() as you say.
Thanks!
> Thanks!
On 2024/1/23 16:24, Vlastimil Babka wrote:
> On 1/23/24 03:51, Chengming Zhou wrote:
>> On 2024/1/23 01:13, Vlastimil Babka wrote:
>>> On 1/19/24 04:53, Chengming Zhou wrote:
>>>> On 2024/1/19 06:14, Christoph Lameter (Ampere) wrote:
>>>>> On Thu, 18 Jan 2024, Chengming Zhou wrote:
>>>>>
>>>>>> So get_freelist() has two cases to handle: cpu slab and cpu partial list slab.
>>>>>> The latter is NOT frozen, so need to remove "VM_BUG_ON(!new.frozen)" from it.
>>>>>
>>>>> Right so keep the check if it is the former?
>>>>>
>>>>
>>>> Ok, I get it. Maybe like this:
>>>
>>> I think that's just too ugly for a VM_BUG_ON(). I'd just remove the check
>>> and be done with that.
>>
>> Ok with me.
>>
>>>
>>> I have a somewhat different point. You reused get_freelist() but in fact
>>> it's more like freeze_slab(), but that one uses slab_update_freelist() and
>>> we are under the local_lock so we want the cheaper __slab_update_freelist(),
>>> which get_freelist() has and I guess that's why you reused that one.
>>
>> Right, we already have the lock_lock, so reuse get_freelist().
>>
>>>
>>> However get_freelist() also assumes it can return NULL if the freelist is
>>> empty. If that's possible to happen on the percpu partial list, we should
>>> not "goto load_freelist;" but rather create a new label above that, above
>>> the "if (!freelist) {" block that handles the case.
>>>
>>> If that's not possible to happen (needs careful audit) and we have guarantee
>>
>> Yes, it's not possible for now.
>>
>>> that slabs on percpu partial list must have non-empty freelist, then we
>>> probably instead want a new __freeze_slab() variant that is like
>>> freeze_slab(), but uses __slab_update_freelist() and probably also has
>>> VM_BUG_ON(!freelist) before returning it?
>>>
>>
>> Instead of introducing another new function, how about still reusing get_freelist()
>> and VM_BUG_ON(!freelist) after calling it? I feel this is simpler.
>
> Could you measure if introducing new function that sets new.frozen = 1; has
> any performance benefit? If not, we can reuse get_freelist() as you say.
> Thanks!
>
I just tested using the new function: __freeze_slab() that uses __slab_update_freelist()
and sets new.frozen = 1, but found the performance is a little worse than reusing
get_freelist().
The reason I think maybe more code memory footprint? I don't look deep into that.
Anyway it looks better to reuse get_freelist(), I will update a version later.
Thanks!