On Tue, May 30, 2017 at 02:17:24PM -0400, Johannes Weiner wrote:
> Josef's redesign of the balancing between slab caches and the page
> cache requires slab cache statistics at the lruvec level.
>
> Signed-off-by: Johannes Weiner <[email protected]>
> Acked-by: Vladimir Davydov <[email protected]>
Presumably this is already known, but a remarkable number of crashes
in next-20170605 bisects to this patch.
Guenter
---
Qemu test results:
total: 122 pass: 51 fail: 71
Failed tests:
arm:vexpress-a9:vexpress_defconfig:vexpress-v2p-ca9
arm:vexpress-a15:vexpress_defconfig:vexpress-v2p-ca15-tc1
arm:kzm:imx_v6_v7_defconfig
arm:sabrelite:imx_v6_v7_defconfig:imx6dl-sabrelite
arm:beagle:multi_v7_defconfig:omap3-beagle
arm:beaglexm:multi_v7_defconfig:omap3-beagle-xm
arm:overo:multi_v7_defconfig:omap3-overo-tobi
arm:sabrelite:multi_v7_defconfig:imx6dl-sabrelite
arm:vexpress-a9:multi_v7_defconfig:vexpress-v2p-ca9
arm:vexpress-a15:multi_v7_defconfig:vexpress-v2p-ca15-tc1
arm:vexpress-a15-a7:multi_v7_defconfig:vexpress-v2p-ca15_a7
arm:xilinx-zynq-a9:multi_v7_defconfig:zynq-zc702
arm:xilinx-zynq-a9:multi_v7_defconfig:zynq-zc706
arm:xilinx-zynq-a9:multi_v7_defconfig:zynq-zed
arm:midway:multi_v7_defconfig:ecx-2000
arm:smdkc210:multi_v7_defconfig:exynos4210-smdkv310
arm:smdkc210:exynos_defconfig:exynos4210-smdkv310
arm:beagle:omap2plus_defconfig:omap3-beagle
arm:beaglexm:omap2plus_defconfig:omap3-beagle-xm
arm:overo:omap2plus_defconfig:omap3-overo-tobi
arm:realview-pb-a8:realview_defconfig:arm-realview-pba8
arm:realview-pbx-a9:realview_defconfig:arm-realview-pbx-a9
arm:realview-eb:realview_defconfig:arm-realview-eb
arm:realview-eb-mpcore:realview_defconfig:arm-realview-eb-11mp-ctrevb
arm64:virt:smp:defconfig
arm64:xlnx-ep108:smp:defconfig:zynqmp-ep108
arm64:virt:nosmp:defconfig
arm64:xlnx-ep108:nosmp:defconfig:zynqmp-ep108
mips:malta_defconfig:smp
mipsel:24Kf:malta_defconfig:smp
powerpc:mac99:nosmp:ppc_book3s_defconfig
powerpc:g3beige:nosmp:ppc_book3s_defconfig
powerpc:mac99:smp:ppc_book3s_defconfig
powerpc:mpc8548cds:smpdev:85xx/mpc85xx_cds_defconfig
powerpc:mac99:ppc64_book3s_defconfig:nosmp
powerpc:mac99:ppc64_book3s_defconfig:smp4
powerpc:pseries:pseries_defconfig
powerpc:mpc8544ds:ppc64_e5500_defconfig:smp
sparc32:SPARCClassic:smp:sparc32_defconfig
sparc32:SPARCbook:smp:sparc32_defconfig
sparc32:SS-4:smp:sparc32_defconfig
sparc32:SS-5:smp:sparc32_defconfig
sparc32:SS-10:smp:sparc32_defconfig
sparc32:SS-20:smp:sparc32_defconfig
sparc32:SS-600MP:smp:sparc32_defconfig
sparc32:LX:smp:sparc32_defconfig
sparc32:Voyager:smp:sparc32_defconfig
x86:Broadwell:q35:x86_pc_defconfig
x86:Skylake-Client:q35:x86_pc_defconfig
x86:SandyBridge:q35:x86_pc_defconfig
x86:Haswell:pc:x86_pc_defconfig
x86:Nehalem:q35:x86_pc_defconfig
x86:phenom:pc:x86_pc_defconfig
x86:core2duo:q35:x86_pc_nosmp_defconfig
x86:Conroe:isapc:x86_pc_nosmp_defconfig
x86:Opteron_G1:pc:x86_pc_nosmp_defconfig
x86:n270:isapc:x86_pc_nosmp_defconfig
x86_64:q35:Broadwell-noTSX:x86_64_pc_defconfig
x86_64:q35:IvyBridge:x86_64_pc_defconfig
x86_64:q35:SandyBridge:x86_64_pc_defconfig
x86_64:q35:Haswell:x86_64_pc_defconfig
x86_64:pc:core2duo:x86_64_pc_defconfig
x86_64:q35:Nehalem:x86_64_pc_defconfig
x86_64:pc:phenom:x86_64_pc_defconfig
x86_64:q35:Opteron_G1:x86_64_pc_defconfig
x86_64:pc:Opteron_G4:x86_64_pc_nosmp_defconfig
x86_64:q35:IvyBridge:x86_64_pc_nosmp_defconfig
xtensa:dc232b:lx60:generic_kc705_defconfig
xtensa:dc232b:kc705:generic_kc705_defconfig
xtensa:dc233c:ml605:generic_kc705_defconfig
xtensa:dc233c:kc705:generic_kc705_defconfig
On Mon, Jun 05, 2017 at 09:52:03AM -0700, Guenter Roeck wrote:
> On Tue, May 30, 2017 at 02:17:24PM -0400, Johannes Weiner wrote:
> > Josef's redesign of the balancing between slab caches and the page
> > cache requires slab cache statistics at the lruvec level.
> >
> > Signed-off-by: Johannes Weiner <[email protected]>
> > Acked-by: Vladimir Davydov <[email protected]>
>
> Presumably this is already known, but a remarkable number of crashes
> in next-20170605 bisects to this patch.
Thanks Guenter.
Can you test if the fix below resolves the problem?
---
>From 47007dfcd7873cb93d11466a93b1f41f6a7a434f Mon Sep 17 00:00:00 2001
From: Johannes Weiner <[email protected]>
Date: Sun, 4 Jun 2017 07:02:44 -0400
Subject: [PATCH] mm: memcontrol: per-lruvec stats infrastructure fix 2
Even with the previous fix routing !page->mem_cgroup stats to the root
cgroup, we still see crashes in certain configurations as the root is
not initialized for the earliest possible accounting sites in certain
configurations.
Don't track uncharged pages at all, not even in the root. This takes
care of early accounting as well as special pages that aren't tracked.
Because we still need to account at the pgdat level, we can no longer
implement the lruvec_page_state functions on top of the lruvec_state
ones. But that's okay. It was a little silly to look up the nodeinfo
and descend to the lruvec, only to container_of() back to the nodeinfo
where the lruvec_stat structure is sitting.
Signed-off-by: Johannes Weiner <[email protected]>
---
include/linux/memcontrol.h | 28 ++++++++++++++--------------
1 file changed, 14 insertions(+), 14 deletions(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index bea6f08e9e16..da9360885260 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -585,27 +585,27 @@ static inline void mod_lruvec_state(struct lruvec *lruvec,
static inline void __mod_lruvec_page_state(struct page *page,
enum node_stat_item idx, int val)
{
- struct mem_cgroup *memcg;
- struct lruvec *lruvec;
-
- /* Special pages in the VM aren't charged, use root */
- memcg = page->mem_cgroup ? : root_mem_cgroup;
+ struct mem_cgroup_per_node *pn;
- lruvec = mem_cgroup_lruvec(page_pgdat(page), memcg);
- __mod_lruvec_state(lruvec, idx, val);
+ __mod_node_page_state(page_pgdat(page), idx, val);
+ if (mem_cgroup_disabled() || !page->mem_cgroup)
+ return;
+ __mod_memcg_state(page->mem_cgroup, idx, val);
+ pn = page->mem_cgroup->nodeinfo[page_to_nid(page)];
+ __this_cpu_add(pn->lruvec_stat->count[idx], val);
}
static inline void mod_lruvec_page_state(struct page *page,
enum node_stat_item idx, int val)
{
- struct mem_cgroup *memcg;
- struct lruvec *lruvec;
-
- /* Special pages in the VM aren't charged, use root */
- memcg = page->mem_cgroup ? : root_mem_cgroup;
+ struct mem_cgroup_per_node *pn;
- lruvec = mem_cgroup_lruvec(page_pgdat(page), memcg);
- mod_lruvec_state(lruvec, idx, val);
+ mod_node_page_state(page_pgdat(page), idx, val);
+ if (mem_cgroup_disabled() || !page->mem_cgroup)
+ return;
+ mod_memcg_state(page->mem_cgroup, idx, val);
+ pn = page->mem_cgroup->nodeinfo[page_to_nid(page)];
+ this_cpu_add(pn->lruvec_stat->count[idx], val);
}
unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
--
2.13.0