2022-11-02 02:12:43

by Leonardo Bras

[permalink] [raw]
Subject: [PATCH v1 2/3] mm/memcontrol: Change stock_lock type from local_lock_t to spinlock_t

In this context, since it's using per-cpu variables, changing from
local_lock to spinlock should not deal much impact in performance and can
allow operations such as stock draining to happen in remote cpus.

Why performance would probably not get impacted:
1 - Since the lock is in the same cache line as the information that is
used next, there is no extra memory access for caching the lock.
2 - Since it's a percpu struct, there should be no other cpu sharing this
cacheline, so there is no need for cacheline invalidation, and writing
to the lock should be as fast as the next struct members.
3 - Even the write in (2) could be pipelined and batched with following
writes to the cacheline (such as nr_pages member), further decreasing
the impact of this change.

Suggested-by: Marcelo Tosatti <[email protected]>
Signed-off-by: Leonardo Bras <[email protected]>
---
mm/memcontrol.c | 38 ++++++++++++++++++++------------------
1 file changed, 20 insertions(+), 18 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 2d8549ae1b300..add46da2e6df1 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2167,7 +2167,7 @@ void unlock_page_memcg(struct page *page)
}

struct memcg_stock_pcp {
- local_lock_t stock_lock;
+ spinlock_t stock_lock; /* Protects the percpu struct */
struct mem_cgroup *cached; /* this never be root cgroup */
unsigned int nr_pages;

@@ -2184,7 +2184,7 @@ struct memcg_stock_pcp {
#define FLUSHING_CACHED_CHARGE 0
};
static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock) = {
- .stock_lock = INIT_LOCAL_LOCK(stock_lock),
+ .stock_lock = __SPIN_LOCK_UNLOCKED(stock_lock),
};
static DEFINE_MUTEX(percpu_charge_mutex);

@@ -2229,15 +2229,15 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
if (nr_pages > MEMCG_CHARGE_BATCH)
return ret;

- local_lock_irqsave(&memcg_stock.stock_lock, flags);
-
stock = this_cpu_ptr(&memcg_stock);
+ spin_lock_irqsave(&stock->stock_lock, flags);
+
if (memcg == stock->cached && stock->nr_pages >= nr_pages) {
stock->nr_pages -= nr_pages;
ret = true;
}

- local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
+ spin_unlock_irqrestore(&stock->stock_lock, flags);

return ret;
}
@@ -2274,14 +2274,14 @@ static void drain_local_stock(struct work_struct *dummy)
* drain_stock races is that we always operate on local CPU stock
* here with IRQ disabled
*/
- local_lock_irqsave(&memcg_stock.stock_lock, flags);
-
stock = this_cpu_ptr(&memcg_stock);
+ spin_lock_irqsave(&stock->stock_lock, flags);
+
old = drain_obj_stock(stock);
drain_stock(stock);
clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags);

- local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
+ spin_unlock_irqrestore(&stock->stock_lock, flags);
if (old)
obj_cgroup_put(old);
}
@@ -2309,10 +2309,12 @@ static void __refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
{
unsigned long flags;
+ struct memcg_stock_pcp *stock;

- local_lock_irqsave(&memcg_stock.stock_lock, flags);
+ stock = this_cpu_ptr(&memcg_stock);
+ spin_lock_irqsave(&stock->stock_lock, flags);
__refill_stock(memcg, nr_pages);
- local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
+ spin_unlock_irqrestore(&stock->stock_lock, flags);
}

/*
@@ -3157,8 +3159,8 @@ void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
unsigned long flags;
int *bytes;

- local_lock_irqsave(&memcg_stock.stock_lock, flags);
stock = this_cpu_ptr(&memcg_stock);
+ spin_lock_irqsave(&stock->stock_lock, flags);

/*
* Save vmstat data in stock and skip vmstat array update unless
@@ -3210,7 +3212,7 @@ void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
if (nr)
mod_objcg_mlstate(objcg, pgdat, idx, nr);

- local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
+ spin_unlock_irqrestore(&stock->stock_lock, flags);
if (old)
obj_cgroup_put(old);
}
@@ -3221,15 +3223,15 @@ static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
unsigned long flags;
bool ret = false;

- local_lock_irqsave(&memcg_stock.stock_lock, flags);
-
stock = this_cpu_ptr(&memcg_stock);
+ spin_lock_irqsave(&stock->stock_lock, flags);
+
if (objcg == stock->cached_objcg && stock->nr_bytes >= nr_bytes) {
stock->nr_bytes -= nr_bytes;
ret = true;
}

- local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
+ spin_unlock_irqrestore(&stock->stock_lock, flags);

return ret;
}
@@ -3319,9 +3321,9 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
unsigned long flags;
unsigned int nr_pages = 0;

- local_lock_irqsave(&memcg_stock.stock_lock, flags);
-
stock = this_cpu_ptr(&memcg_stock);
+ spin_lock_irqsave(&stock->stock_lock, flags);
+
if (stock->cached_objcg != objcg) { /* reset if necessary */
old = drain_obj_stock(stock);
obj_cgroup_get(objcg);
@@ -3337,7 +3339,7 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
stock->nr_bytes &= (PAGE_SIZE - 1);
}

- local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
+ spin_unlock_irqrestore(&stock->stock_lock, flags);
if (old)
obj_cgroup_put(old);

--
2.38.1