The lowest bit in page->memcg_data is used to distinguish between
struct memory_cgroup pointer and a pointer to a objcgs array.
All checks and modifications of this bit are open-coded.
Let's formalize it using page memcg flags, defined in page_memcg_flags
enum and replace all open-coded accesses with test_bit()/__set_bit().
Few additional flags might be added later. Flags are intended to be
mutually exclusive.
Signed-off-by: Roman Gushchin <[email protected]>
---
include/linux/memcontrol.h | 29 +++++++++++++++++++----------
1 file changed, 19 insertions(+), 10 deletions(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index ab3ea3e90583..9a49f1e1c0c7 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -343,6 +343,11 @@ struct mem_cgroup {
extern struct mem_cgroup *root_mem_cgroup;
+enum page_memcg_flags {
+ /* page->memcg_data is a pointer to an objcgs vector */
+ PG_MEMCG_OBJ_CGROUPS,
+};
+
/*
* page_mem_cgroup - get the memory cgroup associated with a page
* @page: a pointer to the page struct
@@ -371,13 +376,7 @@ static inline struct mem_cgroup *page_mem_cgroup_check(struct page *page)
{
unsigned long memcg_data = page->memcg_data;
- /*
- * The lowest bit set means that memcg isn't a valid
- * memcg pointer, but a obj_cgroups pointer.
- * In this case the page is shared and doesn't belong
- * to any specific memory cgroup.
- */
- if (memcg_data & 0x1UL)
+ if (test_bit(PG_MEMCG_OBJ_CGROUPS, &memcg_data))
return NULL;
return (struct mem_cgroup *)memcg_data;
@@ -422,7 +421,13 @@ static inline void clear_page_mem_cgroup(struct page *page)
*/
static inline struct obj_cgroup **page_obj_cgroups(struct page *page)
{
- return (struct obj_cgroup **)(page->memcg_data & ~0x1UL);
+ unsigned long memcg_data = page->memcg_data;
+
+ VM_BUG_ON_PAGE(memcg_data && !test_bit(PG_MEMCG_OBJ_CGROUPS,
+ &memcg_data), page);
+ __clear_bit(PG_MEMCG_OBJ_CGROUPS, &memcg_data);
+
+ return (struct obj_cgroup **)memcg_data;
}
/*
@@ -437,7 +442,7 @@ static inline struct obj_cgroup **page_obj_cgroups_check(struct page *page)
{
unsigned long memcg_data = page->memcg_data;
- if (memcg_data && (memcg_data & 0x1UL))
+ if (memcg_data && test_bit(PG_MEMCG_OBJ_CGROUPS, &memcg_data))
return (struct obj_cgroup **)memcg_data;
return NULL;
@@ -453,7 +458,11 @@ static inline struct obj_cgroup **page_obj_cgroups_check(struct page *page)
static inline bool set_page_obj_cgroups(struct page *page,
struct obj_cgroup **objcgs)
{
- return !cmpxchg(&page->memcg_data, 0, (unsigned long)objcgs | 0x1UL);
+ unsigned long memcg_data = (unsigned long)objcgs;
+
+ __set_bit(PG_MEMCG_OBJ_CGROUPS, &memcg_data);
+
+ return !cmpxchg(&page->memcg_data, 0, memcg_data);
}
/*
--
2.26.2
On Tue, Sep 22, 2020 at 1:38 PM Roman Gushchin <[email protected]> wrote:
>
> The lowest bit in page->memcg_data is used to distinguish between
> struct memory_cgroup pointer and a pointer to a objcgs array.
> All checks and modifications of this bit are open-coded.
>
> Let's formalize it using page memcg flags, defined in page_memcg_flags
> enum and replace all open-coded accesses with test_bit()/__set_bit().
>
> Few additional flags might be added later. Flags are intended to be
> mutually exclusive.
Why mutually exclusive? I understand mutual exclusion between non-slab
kernel memory and objcgs vector but future feature might not need to
be mutually exclusive.
One use-case I am thinking of is actually using a couple of bits here
to store more idle (or hot) age by future extension of DAMON. That
would be for user memory (anon or file and not slab or kmem) but
multiple bits can set.
>
> Signed-off-by: Roman Gushchin <[email protected]>
> ---
> include/linux/memcontrol.h | 29 +++++++++++++++++++----------
> 1 file changed, 19 insertions(+), 10 deletions(-)
>
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index ab3ea3e90583..9a49f1e1c0c7 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -343,6 +343,11 @@ struct mem_cgroup {
>
> extern struct mem_cgroup *root_mem_cgroup;
>
> +enum page_memcg_flags {
> + /* page->memcg_data is a pointer to an objcgs vector */
> + PG_MEMCG_OBJ_CGROUPS,
> +};
If you agree with my next comment then I think PG_MEMCG_LAST_FLAG and
MEMCG_FLAGS_MASK should be introduced in this patch instead of the
next one.
> +
> /*
> * page_mem_cgroup - get the memory cgroup associated with a page
> * @page: a pointer to the page struct
> @@ -371,13 +376,7 @@ static inline struct mem_cgroup *page_mem_cgroup_check(struct page *page)
> {
> unsigned long memcg_data = page->memcg_data;
>
> - /*
> - * The lowest bit set means that memcg isn't a valid
> - * memcg pointer, but a obj_cgroups pointer.
> - * In this case the page is shared and doesn't belong
> - * to any specific memory cgroup.
> - */
> - if (memcg_data & 0x1UL)
> + if (test_bit(PG_MEMCG_OBJ_CGROUPS, &memcg_data))
> return NULL;
>
> return (struct mem_cgroup *)memcg_data;
> @@ -422,7 +421,13 @@ static inline void clear_page_mem_cgroup(struct page *page)
> */
> static inline struct obj_cgroup **page_obj_cgroups(struct page *page)
> {
> - return (struct obj_cgroup **)(page->memcg_data & ~0x1UL);
> + unsigned long memcg_data = page->memcg_data;
> +
> + VM_BUG_ON_PAGE(memcg_data && !test_bit(PG_MEMCG_OBJ_CGROUPS,
> + &memcg_data), page);
> + __clear_bit(PG_MEMCG_OBJ_CGROUPS, &memcg_data);
> +
> + return (struct obj_cgroup **)memcg_data;
Wouldn't the following be more future proof?
return (struct obj_cgroup **)(memcg_data & ~MEMCG_FLAGS_MASK);
> }
>
> /*
> @@ -437,7 +442,7 @@ static inline struct obj_cgroup **page_obj_cgroups_check(struct page *page)
> {
> unsigned long memcg_data = page->memcg_data;
>
> - if (memcg_data && (memcg_data & 0x1UL))
> + if (memcg_data && test_bit(PG_MEMCG_OBJ_CGROUPS, &memcg_data))
> return (struct obj_cgroup **)memcg_data;
>
> return NULL;
> @@ -453,7 +458,11 @@ static inline struct obj_cgroup **page_obj_cgroups_check(struct page *page)
> static inline bool set_page_obj_cgroups(struct page *page,
> struct obj_cgroup **objcgs)
> {
> - return !cmpxchg(&page->memcg_data, 0, (unsigned long)objcgs | 0x1UL);
> + unsigned long memcg_data = (unsigned long)objcgs;
> +
> + __set_bit(PG_MEMCG_OBJ_CGROUPS, &memcg_data);
> +
> + return !cmpxchg(&page->memcg_data, 0, memcg_data);
> }
>
> /*
> --
> 2.26.2
>
On Thu, Sep 24, 2020 at 12:03:35AM -0700, Shakeel Butt wrote:
> On Tue, Sep 22, 2020 at 1:38 PM Roman Gushchin <[email protected]> wrote:
> >
> > The lowest bit in page->memcg_data is used to distinguish between
> > struct memory_cgroup pointer and a pointer to a objcgs array.
> > All checks and modifications of this bit are open-coded.
> >
> > Let's formalize it using page memcg flags, defined in page_memcg_flags
> > enum and replace all open-coded accesses with test_bit()/__set_bit().
> >
> > Few additional flags might be added later. Flags are intended to be
> > mutually exclusive.
>
> Why mutually exclusive? I understand mutual exclusion between non-slab
> kernel memory and objcgs vector but future feature might not need to
> be mutually exclusive.
>
> One use-case I am thinking of is actually using a couple of bits here
> to store more idle (or hot) age by future extension of DAMON. That
> would be for user memory (anon or file and not slab or kmem) but
> multiple bits can set.
Yeah, I agree. There are no reasons to require a mutual exclusion.
I'll drop it.
>
> >
> > Signed-off-by: Roman Gushchin <[email protected]>
> > ---
> > include/linux/memcontrol.h | 29 +++++++++++++++++++----------
> > 1 file changed, 19 insertions(+), 10 deletions(-)
> >
> > diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> > index ab3ea3e90583..9a49f1e1c0c7 100644
> > --- a/include/linux/memcontrol.h
> > +++ b/include/linux/memcontrol.h
> > @@ -343,6 +343,11 @@ struct mem_cgroup {
> >
> > extern struct mem_cgroup *root_mem_cgroup;
> >
> > +enum page_memcg_flags {
> > + /* page->memcg_data is a pointer to an objcgs vector */
> > + PG_MEMCG_OBJ_CGROUPS,
> > +};
>
> If you agree with my next comment then I think PG_MEMCG_LAST_FLAG and
> MEMCG_FLAGS_MASK should be introduced in this patch instead of the
> next one.
Ok, agree.
>
> > +
> > /*
> > * page_mem_cgroup - get the memory cgroup associated with a page
> > * @page: a pointer to the page struct
> > @@ -371,13 +376,7 @@ static inline struct mem_cgroup *page_mem_cgroup_check(struct page *page)
> > {
> > unsigned long memcg_data = page->memcg_data;
> >
> > - /*
> > - * The lowest bit set means that memcg isn't a valid
> > - * memcg pointer, but a obj_cgroups pointer.
> > - * In this case the page is shared and doesn't belong
> > - * to any specific memory cgroup.
> > - */
> > - if (memcg_data & 0x1UL)
> > + if (test_bit(PG_MEMCG_OBJ_CGROUPS, &memcg_data))
> > return NULL;
> >
> > return (struct mem_cgroup *)memcg_data;
> > @@ -422,7 +421,13 @@ static inline void clear_page_mem_cgroup(struct page *page)
> > */
> > static inline struct obj_cgroup **page_obj_cgroups(struct page *page)
> > {
> > - return (struct obj_cgroup **)(page->memcg_data & ~0x1UL);
> > + unsigned long memcg_data = page->memcg_data;
> > +
> > + VM_BUG_ON_PAGE(memcg_data && !test_bit(PG_MEMCG_OBJ_CGROUPS,
> > + &memcg_data), page);
> > + __clear_bit(PG_MEMCG_OBJ_CGROUPS, &memcg_data);
> > +
> > + return (struct obj_cgroup **)memcg_data;
>
> Wouldn't the following be more future proof?
>
> return (struct obj_cgroup **)(memcg_data & ~MEMCG_FLAGS_MASK);
Agree. I'll send an updated version soon.
Thank you for looking into the patchset!
Roman
On Tue, Sep 22, 2020 at 01:36:59PM -0700, Roman Gushchin wrote:
> The lowest bit in page->memcg_data is used to distinguish between
> struct memory_cgroup pointer and a pointer to a objcgs array.
> All checks and modifications of this bit are open-coded.
>
> Let's formalize it using page memcg flags, defined in page_memcg_flags
> enum and replace all open-coded accesses with test_bit()/__set_bit().
>
> Few additional flags might be added later. Flags are intended to be
> mutually exclusive.
>
> Signed-off-by: Roman Gushchin <[email protected]>
> ---
> include/linux/memcontrol.h | 29 +++++++++++++++++++----------
> 1 file changed, 19 insertions(+), 10 deletions(-)
>
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index ab3ea3e90583..9a49f1e1c0c7 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -343,6 +343,11 @@ struct mem_cgroup {
>
> extern struct mem_cgroup *root_mem_cgroup;
>
> +enum page_memcg_flags {
> + /* page->memcg_data is a pointer to an objcgs vector */
> + PG_MEMCG_OBJ_CGROUPS,
How about enum memcg_data_flags and PGMEMCG_OBJCG?
> @@ -371,13 +376,7 @@ static inline struct mem_cgroup *page_mem_cgroup_check(struct page *page)
> {
> unsigned long memcg_data = page->memcg_data;
>
> - /*
> - * The lowest bit set means that memcg isn't a valid
> - * memcg pointer, but a obj_cgroups pointer.
> - * In this case the page is shared and doesn't belong
> - * to any specific memory cgroup.
> - */
> - if (memcg_data & 0x1UL)
> + if (test_bit(PG_MEMCG_OBJ_CGROUPS, &memcg_data))
> return NULL;
>
> return (struct mem_cgroup *)memcg_data;
> @@ -422,7 +421,13 @@ static inline void clear_page_mem_cgroup(struct page *page)
> */
> static inline struct obj_cgroup **page_obj_cgroups(struct page *page)
> {
> - return (struct obj_cgroup **)(page->memcg_data & ~0x1UL);
> + unsigned long memcg_data = page->memcg_data;
> +
> + VM_BUG_ON_PAGE(memcg_data && !test_bit(PG_MEMCG_OBJ_CGROUPS,
> + &memcg_data), page);
> + __clear_bit(PG_MEMCG_OBJ_CGROUPS, &memcg_data);
The flag names make sense to me, but this shouldn't be using test_bit,
__clear_bit, __set_bit etc. on local variables. It suggests that it's
modifying some shared/global state, when it's just masking out a bit
during a read. We usually just open-code the bitwise ops for that.
On Tue, Sep 22, 2020 at 01:36:59PM -0700, Roman Gushchin wrote:
> @@ -422,7 +421,13 @@ static inline void clear_page_mem_cgroup(struct page *page)
> */
> static inline struct obj_cgroup **page_obj_cgroups(struct page *page)
> {
> - return (struct obj_cgroup **)(page->memcg_data & ~0x1UL);
> + unsigned long memcg_data = page->memcg_data;
> +
> + VM_BUG_ON_PAGE(memcg_data && !test_bit(PG_MEMCG_OBJ_CGROUPS,
> + &memcg_data), page);
> + __clear_bit(PG_MEMCG_OBJ_CGROUPS, &memcg_data);
> +
> + return (struct obj_cgroup **)memcg_data;
Slab allocations set up page->memcg_data locklessly, right? AFAICS,
the page_objcg lookup functions all need READ_ONCE() loads.
On Thu, Sep 24, 2020 at 04:01:22PM -0400, Johannes Weiner wrote:
> On Tue, Sep 22, 2020 at 01:36:59PM -0700, Roman Gushchin wrote:
> > The lowest bit in page->memcg_data is used to distinguish between
> > struct memory_cgroup pointer and a pointer to a objcgs array.
> > All checks and modifications of this bit are open-coded.
> >
> > Let's formalize it using page memcg flags, defined in page_memcg_flags
> > enum and replace all open-coded accesses with test_bit()/__set_bit().
> >
> > Few additional flags might be added later. Flags are intended to be
> > mutually exclusive.
> >
> > Signed-off-by: Roman Gushchin <[email protected]>
> > ---
> > include/linux/memcontrol.h | 29 +++++++++++++++++++----------
> > 1 file changed, 19 insertions(+), 10 deletions(-)
> >
> > diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> > index ab3ea3e90583..9a49f1e1c0c7 100644
> > --- a/include/linux/memcontrol.h
> > +++ b/include/linux/memcontrol.h
> > @@ -343,6 +343,11 @@ struct mem_cgroup {
> >
> > extern struct mem_cgroup *root_mem_cgroup;
> >
> > +enum page_memcg_flags {
> > + /* page->memcg_data is a pointer to an objcgs vector */
> > + PG_MEMCG_OBJ_CGROUPS,
>
> How about enum memcg_data_flags and PGMEMCG_OBJCG?
Honestly I prefer the original names. I'm ok with enum memcg_data_flags,
if you prefer it. PGMEMCG_OBJCG looks bulky with too many letters
without a separator, also we use object cgroups (plural) everywhere,
like OBJCGS vs OBJCG. PG_MEMCG_OBJCGS works for me.
>
> > @@ -371,13 +376,7 @@ static inline struct mem_cgroup *page_mem_cgroup_check(struct page *page)
> > {
> > unsigned long memcg_data = page->memcg_data;
> >
> > - /*
> > - * The lowest bit set means that memcg isn't a valid
> > - * memcg pointer, but a obj_cgroups pointer.
> > - * In this case the page is shared and doesn't belong
> > - * to any specific memory cgroup.
> > - */
> > - if (memcg_data & 0x1UL)
> > + if (test_bit(PG_MEMCG_OBJ_CGROUPS, &memcg_data))
> > return NULL;
> >
> > return (struct mem_cgroup *)memcg_data;
> > @@ -422,7 +421,13 @@ static inline void clear_page_mem_cgroup(struct page *page)
> > */
> > static inline struct obj_cgroup **page_obj_cgroups(struct page *page)
> > {
> > - return (struct obj_cgroup **)(page->memcg_data & ~0x1UL);
> > + unsigned long memcg_data = page->memcg_data;
> > +
> > + VM_BUG_ON_PAGE(memcg_data && !test_bit(PG_MEMCG_OBJ_CGROUPS,
> > + &memcg_data), page);
> > + __clear_bit(PG_MEMCG_OBJ_CGROUPS, &memcg_data);
>
> The flag names make sense to me, but this shouldn't be using test_bit,
> __clear_bit, __set_bit etc. on local variables. It suggests that it's
> modifying some shared/global state, when it's just masking out a bit
> during a read. We usually just open-code the bitwise ops for that.
It will be way more bulky otherwise, all those memcg_data & (1UL << PG_MEMCG_OBJ_CGROUPS) etc.
I don't see why these bitops helpers can't be used on local variables.
Is the preference to not use them this way documented anywhere?
Thanks!
On Thu, Sep 24, 2020 at 01:39:05PM -0700, Roman Gushchin wrote:
> On Thu, Sep 24, 2020 at 04:01:22PM -0400, Johannes Weiner wrote:
> > On Tue, Sep 22, 2020 at 01:36:59PM -0700, Roman Gushchin wrote:
> > > The lowest bit in page->memcg_data is used to distinguish between
> > > struct memory_cgroup pointer and a pointer to a objcgs array.
> > > All checks and modifications of this bit are open-coded.
> > >
> > > Let's formalize it using page memcg flags, defined in page_memcg_flags
> > > enum and replace all open-coded accesses with test_bit()/__set_bit().
> > >
> > > Few additional flags might be added later. Flags are intended to be
> > > mutually exclusive.
> > >
> > > Signed-off-by: Roman Gushchin <[email protected]>
> > > ---
> > > include/linux/memcontrol.h | 29 +++++++++++++++++++----------
> > > 1 file changed, 19 insertions(+), 10 deletions(-)
> > >
> > > diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> > > index ab3ea3e90583..9a49f1e1c0c7 100644
> > > --- a/include/linux/memcontrol.h
> > > +++ b/include/linux/memcontrol.h
> > > @@ -343,6 +343,11 @@ struct mem_cgroup {
> > >
> > > extern struct mem_cgroup *root_mem_cgroup;
> > >
> > > +enum page_memcg_flags {
> > > + /* page->memcg_data is a pointer to an objcgs vector */
> > > + PG_MEMCG_OBJ_CGROUPS,
> >
> > How about enum memcg_data_flags and PGMEMCG_OBJCG?
>
> Honestly I prefer the original names. I'm ok with enum memcg_data_flags,
> if you prefer it. PGMEMCG_OBJCG looks bulky with too many letters
> without a separator, also we use object cgroups (plural) everywhere,
> like OBJCGS vs OBJCG. PG_MEMCG_OBJCGS works for me.
Fair enough, it's a bit dense.
MEMCG_DATA_OBJCGS could work too. It wouldn't introduce a new prefix
and would relate to the field those flags belong to.
> > > @@ -371,13 +376,7 @@ static inline struct mem_cgroup *page_mem_cgroup_check(struct page *page)
> > > {
> > > unsigned long memcg_data = page->memcg_data;
> > >
> > > - /*
> > > - * The lowest bit set means that memcg isn't a valid
> > > - * memcg pointer, but a obj_cgroups pointer.
> > > - * In this case the page is shared and doesn't belong
> > > - * to any specific memory cgroup.
> > > - */
> > > - if (memcg_data & 0x1UL)
> > > + if (test_bit(PG_MEMCG_OBJ_CGROUPS, &memcg_data))
> > > return NULL;
> > >
> > > return (struct mem_cgroup *)memcg_data;
> > > @@ -422,7 +421,13 @@ static inline void clear_page_mem_cgroup(struct page *page)
> > > */
> > > static inline struct obj_cgroup **page_obj_cgroups(struct page *page)
> > > {
> > > - return (struct obj_cgroup **)(page->memcg_data & ~0x1UL);
> > > + unsigned long memcg_data = page->memcg_data;
> > > +
> > > + VM_BUG_ON_PAGE(memcg_data && !test_bit(PG_MEMCG_OBJ_CGROUPS,
> > > + &memcg_data), page);
> > > + __clear_bit(PG_MEMCG_OBJ_CGROUPS, &memcg_data);
> >
> > The flag names make sense to me, but this shouldn't be using test_bit,
> > __clear_bit, __set_bit etc. on local variables. It suggests that it's
> > modifying some shared/global state, when it's just masking out a bit
> > during a read. We usually just open-code the bitwise ops for that.
>
> It will be way more bulky otherwise, all those memcg_data & (1UL << PG_MEMCG_OBJ_CGROUPS) etc.
Does anybody need the bit numbers? You can make them masks directly:
enum memcg_data_flags {
MEMCG_DATA_OBJCGS = (1 << 0),
...
}
and do memcg_data | MEMCG_DATA_OBJCGS.
cgroup-defs.h alone has 3 examples of this. It's very common.
> I don't see why these bitops helpers can't be used on local variables.
> Is the preference to not use them this way documented anywhere?
The bitops are for shared state, that's why set_bit(), clear_bit(),
test_bit() provide atomicity, and the __ versions of them usually
indicate that outside locking is provided.
Grep for __clear_bit() and most of the time it's on a shared data
structure and surrounded by some sort of lock or atomic context.
Why would you want to replace a single | expression with an RMW
transaction involving three statements and a function call to
__set_bit()?