Due to the semantics of memory.high enforcement i.e. throttle the
workload without oom-kill, we are trying to use it for right sizing the
workloads in our production environment. However we observed the
mechanism fails for some specific applications which does bug chunck of
allocations in a single syscall. The reason behind this failure is due
to the limitation of the memory.high enforcement's current
implementation. This patch series solves this issue by enforcing the
memory.high synchronously and making it more robust.
Shakeel Butt (4):
memcg: refactor mem_cgroup_oom
memcg: unify force charging conditions
selftests: memcg: test high limit for single entry allocation
memcg: synchronously enforce memory.high
include/linux/page_counter.h | 10 +
mm/memcontrol.c | 175 ++++++++++--------
mm/page_counter.c | 59 ++++--
tools/testing/selftests/cgroup/cgroup_util.c | 15 +-
tools/testing/selftests/cgroup/cgroup_util.h | 1 +
.../selftests/cgroup/test_memcontrol.c | 78 ++++++++
6 files changed, 240 insertions(+), 98 deletions(-)
--
2.35.1.265.g69c8d7142f-goog
Test the enforcement of memory.high limit for large amount of memory
allocation within a single kernel entry. There are valid use-cases
where the application can trigger large amount of memory allocation
within a single syscall e.g. mlock() or mmap(MAP_POPULATE). Make sure
memory.high limit enforcement works for such use-cases.
Signed-off-by: Shakeel Butt <[email protected]>
---
tools/testing/selftests/cgroup/cgroup_util.c | 15 +++-
tools/testing/selftests/cgroup/cgroup_util.h | 1 +
.../selftests/cgroup/test_memcontrol.c | 78 +++++++++++++++++++
3 files changed, 91 insertions(+), 3 deletions(-)
diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c
index 0cf7e90c0052..dbaa7aabbb4a 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.c
+++ b/tools/testing/selftests/cgroup/cgroup_util.c
@@ -583,7 +583,7 @@ int clone_into_cgroup_run_wait(const char *cgroup)
return 0;
}
-int cg_prepare_for_wait(const char *cgroup)
+static int __prepare_for_wait(const char *cgroup, const char *filename)
{
int fd, ret = -1;
@@ -591,8 +591,7 @@ int cg_prepare_for_wait(const char *cgroup)
if (fd == -1)
return fd;
- ret = inotify_add_watch(fd, cg_control(cgroup, "cgroup.events"),
- IN_MODIFY);
+ ret = inotify_add_watch(fd, cg_control(cgroup, filename), IN_MODIFY);
if (ret == -1) {
close(fd);
fd = -1;
@@ -601,6 +600,16 @@ int cg_prepare_for_wait(const char *cgroup)
return fd;
}
+int cg_prepare_for_wait(const char *cgroup)
+{
+ return __prepare_for_wait(cgroup, "cgroup.events");
+}
+
+int memcg_prepare_for_wait(const char *cgroup)
+{
+ return __prepare_for_wait(cgroup, "memory.events");
+}
+
int cg_wait_for(int fd)
{
int ret = -1;
diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h
index 4f66d10626d2..628738532ac9 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.h
+++ b/tools/testing/selftests/cgroup/cgroup_util.h
@@ -55,4 +55,5 @@ extern int clone_reap(pid_t pid, int options);
extern int clone_into_cgroup_run_wait(const char *cgroup);
extern int dirfd_open_opath(const char *dir);
extern int cg_prepare_for_wait(const char *cgroup);
+extern int memcg_prepare_for_wait(const char *cgroup);
extern int cg_wait_for(int fd);
diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c
index c19a97dd02d4..36ccf2322e21 100644
--- a/tools/testing/selftests/cgroup/test_memcontrol.c
+++ b/tools/testing/selftests/cgroup/test_memcontrol.c
@@ -16,6 +16,7 @@
#include <netinet/in.h>
#include <netdb.h>
#include <errno.h>
+#include <sys/mman.h>
#include "../kselftest.h"
#include "cgroup_util.h"
@@ -628,6 +629,82 @@ static int test_memcg_high(const char *root)
return ret;
}
+static int alloc_anon_mlock(const char *cgroup, void *arg)
+{
+ size_t size = (size_t)arg;
+ void *buf;
+
+ buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
+ 0, 0);
+ if (buf == MAP_FAILED)
+ return -1;
+
+ mlock(buf, size);
+ munmap(buf, size);
+ return 0;
+}
+
+/*
+ * This test checks that memory.high is able to throttle big single shot
+ * allocation i.e. large allocation within one kernel entry.
+ */
+static int test_memcg_high_sync(const char *root)
+{
+ int ret = KSFT_FAIL, pid, fd = -1;
+ char *memcg;
+ long pre_high, pre_max;
+ long post_high, post_max;
+
+ memcg = cg_name(root, "memcg_test");
+ if (!memcg)
+ goto cleanup;
+
+ if (cg_create(memcg))
+ goto cleanup;
+
+ pre_high = cg_read_key_long(memcg, "memory.events", "high ");
+ pre_max = cg_read_key_long(memcg, "memory.events", "max ");
+ if (pre_high < 0 || pre_max < 0)
+ goto cleanup;
+
+ if (cg_write(memcg, "memory.swap.max", "0"))
+ goto cleanup;
+
+ if (cg_write(memcg, "memory.high", "30M"))
+ goto cleanup;
+
+ if (cg_write(memcg, "memory.max", "140M"))
+ goto cleanup;
+
+ fd = memcg_prepare_for_wait(memcg);
+ if (fd < 0)
+ goto cleanup;
+
+ pid = cg_run_nowait(memcg, alloc_anon_mlock, (void *)MB(200));
+ if (pid < 0)
+ goto cleanup;
+
+ cg_wait_for(fd);
+
+ post_high = cg_read_key_long(memcg, "memory.events", "high ");
+ post_max = cg_read_key_long(memcg, "memory.events", "max ");
+ if (post_high < 0 || post_max < 0)
+ goto cleanup;
+
+ if (pre_high == post_high || pre_max != post_max)
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (fd >= 0)
+ close(fd);
+ cg_destroy(memcg);
+ free(memcg);
+
+ return ret;
+}
+
/*
* This test checks that memory.max limits the amount of
* memory which can be consumed by either anonymous memory
@@ -1180,6 +1257,7 @@ struct memcg_test {
T(test_memcg_min),
T(test_memcg_low),
T(test_memcg_high),
+ T(test_memcg_high_sync),
T(test_memcg_max),
T(test_memcg_oom_events),
T(test_memcg_swap_max),
--
2.35.1.265.g69c8d7142f-goog