Hi,
v2 -> v3:
- A few coding style change in
'[PATCH v3 1/4] virtio_balloon: separate vm events into a function'
v1 -> v2:
- Add a new patch 'virtio_balloon: separate vm events into a function'
to avoid any compiler warnings(unused stack variable on
CONFIG_VM_EVENT_COUNTERS=n)
- Suggested by David, use a loop 'for (zid = 0; zid < MAX_NR_ZONES; zid++)'
to obtain all the stall events.
RFC -> v1:
- several text changes: oom-kill -> oom-kills, SCAN_ASYNC -> ASYN_SCAN.
- move vm events codes into '#ifdef CONFIG_VM_EVENT_COUNTERS'
RFC version:
Link: https://lore.kernel.org/lkml/[email protected]/T/#m1898963b3c27a989b1123db475135c3ca687ca84
zhenwei pi (4):
virtio_balloon: separate vm events into a function
virtio_balloon: introduce oom-kill invocations
virtio_balloon: introduce memory allocation stall counter
virtio_balloon: introduce memory scan/reclaim info
drivers/virtio/virtio_balloon.c | 61 ++++++++++++++++++++++-------
include/uapi/linux/virtio_balloon.h | 16 +++++++-
2 files changed, 61 insertions(+), 16 deletions(-)
--
2.34.1
All the VM events related statistics have dependence on
'CONFIG_VM_EVENT_COUNTERS', separate these events into a function to
make code clean. Then we can remove 'CONFIG_VM_EVENT_COUNTERS' from
'update_balloon_stats'.
Signed-off-by: zhenwei pi <[email protected]>
---
drivers/virtio/virtio_balloon.c | 43 ++++++++++++++++++++++-----------
1 file changed, 29 insertions(+), 14 deletions(-)
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 1f5b3dd31fcf..1710e3098ecd 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -316,34 +316,49 @@ static inline void update_stat(struct virtio_balloon *vb, int idx,
#define pages_to_bytes(x) ((u64)(x) << PAGE_SHIFT)
-static unsigned int update_balloon_stats(struct virtio_balloon *vb)
+#ifdef CONFIG_VM_EVENT_COUNTERS
+/* Return the number of entries filled by vm events */
+static inline unsigned int update_balloon_vm_stats(struct virtio_balloon *vb)
{
unsigned long events[NR_VM_EVENT_ITEMS];
- struct sysinfo i;
unsigned int idx = 0;
- long available;
- unsigned long caches;
all_vm_events(events);
- si_meminfo(&i);
-
- available = si_mem_available();
- caches = global_node_page_state(NR_FILE_PAGES);
-
-#ifdef CONFIG_VM_EVENT_COUNTERS
update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_IN,
- pages_to_bytes(events[PSWPIN]));
+ pages_to_bytes(events[PSWPIN]));
update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_OUT,
- pages_to_bytes(events[PSWPOUT]));
+ pages_to_bytes(events[PSWPOUT]));
update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, events[PGMAJFAULT]);
update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]);
+
#ifdef CONFIG_HUGETLB_PAGE
update_stat(vb, idx++, VIRTIO_BALLOON_S_HTLB_PGALLOC,
events[HTLB_BUDDY_PGALLOC]);
update_stat(vb, idx++, VIRTIO_BALLOON_S_HTLB_PGFAIL,
events[HTLB_BUDDY_PGALLOC_FAIL]);
-#endif
-#endif
+#endif /* CONFIG_HUGETLB_PAGE */
+
+ return idx;
+}
+#else /* CONFIG_VM_EVENT_COUNTERS */
+static inline unsigned int update_balloon_vm_stats(struct virtio_balloon *vb)
+{
+ return 0;
+}
+#endif /* CONFIG_VM_EVENT_COUNTERS */
+
+static unsigned int update_balloon_stats(struct virtio_balloon *vb)
+{
+ struct sysinfo i;
+ unsigned int idx;
+ long available;
+ unsigned long caches;
+
+ idx = update_balloon_vm_stats(vb);
+
+ si_meminfo(&i);
+ available = si_mem_available();
+ caches = global_node_page_state(NR_FILE_PAGES);
update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMFREE,
pages_to_bytes(i.freeram));
update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMTOT,
--
2.34.1
Memory allocation stall counter represents the performance/latency of
memory allocation, expose this counter to the host side by virtio
balloon device via out-of-bound way.
Acked-by: David Hildenbrand <[email protected]>
Signed-off-by: zhenwei pi <[email protected]>
---
drivers/virtio/virtio_balloon.c | 8 ++++++++
include/uapi/linux/virtio_balloon.h | 6 ++++--
2 files changed, 12 insertions(+), 2 deletions(-)
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index f7a47eaa0936..e6229e548832 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -322,6 +322,8 @@ static inline unsigned int update_balloon_vm_stats(struct virtio_balloon *vb)
{
unsigned long events[NR_VM_EVENT_ITEMS];
unsigned int idx = 0;
+ unsigned int zid;
+ unsigned long stall = 0;
all_vm_events(events);
update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_IN,
@@ -332,6 +334,12 @@ static inline unsigned int update_balloon_vm_stats(struct virtio_balloon *vb)
update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]);
update_stat(vb, idx++, VIRTIO_BALLOON_S_OOM_KILL, events[OOM_KILL]);
+ /* sum all the stall events */
+ for (zid = 0; zid < MAX_NR_ZONES; zid++)
+ stall += events[ALLOCSTALL_NORMAL - ZONE_NORMAL + zid];
+
+ update_stat(vb, idx++, VIRTIO_BALLOON_S_ALLOC_STALL, stall);
+
#ifdef CONFIG_HUGETLB_PAGE
update_stat(vb, idx++, VIRTIO_BALLOON_S_HTLB_PGALLOC,
events[HTLB_BUDDY_PGALLOC]);
diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
index b17bbe033697..487b893a160e 100644
--- a/include/uapi/linux/virtio_balloon.h
+++ b/include/uapi/linux/virtio_balloon.h
@@ -72,7 +72,8 @@ struct virtio_balloon_config {
#define VIRTIO_BALLOON_S_HTLB_PGALLOC 8 /* Hugetlb page allocations */
#define VIRTIO_BALLOON_S_HTLB_PGFAIL 9 /* Hugetlb page allocation failures */
#define VIRTIO_BALLOON_S_OOM_KILL 10 /* OOM killer invocations */
-#define VIRTIO_BALLOON_S_NR 11
+#define VIRTIO_BALLOON_S_ALLOC_STALL 11 /* Stall count of memory allocatoin */
+#define VIRTIO_BALLOON_S_NR 12
#define VIRTIO_BALLOON_S_NAMES_WITH_PREFIX(VIRTIO_BALLOON_S_NAMES_prefix) { \
VIRTIO_BALLOON_S_NAMES_prefix "swap-in", \
@@ -85,7 +86,8 @@ struct virtio_balloon_config {
VIRTIO_BALLOON_S_NAMES_prefix "disk-caches", \
VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-allocations", \
VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-failures", \
- VIRTIO_BALLOON_S_NAMES_prefix "oom-kills" \
+ VIRTIO_BALLOON_S_NAMES_prefix "oom-kills", \
+ VIRTIO_BALLOON_S_NAMES_prefix "alloc-stalls" \
}
#define VIRTIO_BALLOON_S_NAMES VIRTIO_BALLOON_S_NAMES_WITH_PREFIX("")
--
2.34.1
When the guest OS runs under critical memory pressure, the guest
starts to kill processes. A guest monitor agent may scan 'oom_kill'
from /proc/vmstat, and reports the OOM KILL event. However, the agent
may be killed and we will loss this critical event(and the later
events).
For now we can also grep for magic words in guest kernel log from host
side. Rather than this unstable way, virtio balloon reports OOM-KILL
invocations instead.
Acked-by: David Hildenbrand <[email protected]>
Signed-off-by: zhenwei pi <[email protected]>
---
drivers/virtio/virtio_balloon.c | 1 +
include/uapi/linux/virtio_balloon.h | 6 ++++--
2 files changed, 5 insertions(+), 2 deletions(-)
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 1710e3098ecd..f7a47eaa0936 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -330,6 +330,7 @@ static inline unsigned int update_balloon_vm_stats(struct virtio_balloon *vb)
pages_to_bytes(events[PSWPOUT]));
update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, events[PGMAJFAULT]);
update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]);
+ update_stat(vb, idx++, VIRTIO_BALLOON_S_OOM_KILL, events[OOM_KILL]);
#ifdef CONFIG_HUGETLB_PAGE
update_stat(vb, idx++, VIRTIO_BALLOON_S_HTLB_PGALLOC,
diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
index ddaa45e723c4..b17bbe033697 100644
--- a/include/uapi/linux/virtio_balloon.h
+++ b/include/uapi/linux/virtio_balloon.h
@@ -71,7 +71,8 @@ struct virtio_balloon_config {
#define VIRTIO_BALLOON_S_CACHES 7 /* Disk caches */
#define VIRTIO_BALLOON_S_HTLB_PGALLOC 8 /* Hugetlb page allocations */
#define VIRTIO_BALLOON_S_HTLB_PGFAIL 9 /* Hugetlb page allocation failures */
-#define VIRTIO_BALLOON_S_NR 10
+#define VIRTIO_BALLOON_S_OOM_KILL 10 /* OOM killer invocations */
+#define VIRTIO_BALLOON_S_NR 11
#define VIRTIO_BALLOON_S_NAMES_WITH_PREFIX(VIRTIO_BALLOON_S_NAMES_prefix) { \
VIRTIO_BALLOON_S_NAMES_prefix "swap-in", \
@@ -83,7 +84,8 @@ struct virtio_balloon_config {
VIRTIO_BALLOON_S_NAMES_prefix "available-memory", \
VIRTIO_BALLOON_S_NAMES_prefix "disk-caches", \
VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-allocations", \
- VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-failures" \
+ VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-failures", \
+ VIRTIO_BALLOON_S_NAMES_prefix "oom-kills" \
}
#define VIRTIO_BALLOON_S_NAMES VIRTIO_BALLOON_S_NAMES_WITH_PREFIX("")
--
2.34.1
Expose memory scan/reclaim information to the host side via virtio
balloon device.
Now we have a metric to analyze the memory performance:
y: counter increases
n: counter does not changes
h: the rate of counter change is high
l: the rate of counter change is low
OOM: VIRTIO_BALLOON_S_OOM_KILL
STALL: VIRTIO_BALLOON_S_ALLOC_STALL
ASCAN: VIRTIO_BALLOON_S_SCAN_ASYNC
DSCAN: VIRTIO_BALLOON_S_SCAN_DIRECT
ARCLM: VIRTIO_BALLOON_S_RECLAIM_ASYNC
DRCLM: VIRTIO_BALLOON_S_RECLAIM_DIRECT
- OOM[y], STALL[*], ASCAN[*], DSCAN[*], ARCLM[*], DRCLM[*]:
the guest runs under really critial memory pressure
- OOM[n], STALL[h], ASCAN[*], DSCAN[l], ARCLM[*], DRCLM[l]:
the memory allocation stalls due to cgroup, not the global memory
pressure.
- OOM[n], STALL[h], ASCAN[*], DSCAN[h], ARCLM[*], DRCLM[h]:
the memory allocation stalls due to global memory pressure. The
performance gets hurt a lot. A high ratio between DRCLM/DSCAN shows
quite effective memory reclaiming.
- OOM[n], STALL[h], ASCAN[*], DSCAN[h], ARCLM[*], DRCLM[l]:
the memory allocation stalls due to global memory pressure.
the ratio between DRCLM/DSCAN gets low, the guest OS is thrashing
heavily, the serious case leads poor performance and difficult
trouble shooting. Ex, sshd may block on memory allocation when
accepting new connections, a user can't login a VM by ssh command.
- OOM[n], STALL[n], ASCAN[h], DSCAN[n], ARCLM[l], DRCLM[n]:
the low ratio between ARCLM/ASCAN shows that the guest tries to
reclaim more memory, but it can't. Once more memory is required in
future, it will struggle to reclaim memory.
Acked-by: David Hildenbrand <[email protected]>
Signed-off-by: zhenwei pi <[email protected]>
---
drivers/virtio/virtio_balloon.c | 9 +++++++++
include/uapi/linux/virtio_balloon.h | 12 ++++++++++--
2 files changed, 19 insertions(+), 2 deletions(-)
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index e6229e548832..225662358221 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -340,6 +340,15 @@ static inline unsigned int update_balloon_vm_stats(struct virtio_balloon *vb)
update_stat(vb, idx++, VIRTIO_BALLOON_S_ALLOC_STALL, stall);
+ update_stat(vb, idx++, VIRTIO_BALLOON_S_ASYNC_SCAN,
+ pages_to_bytes(events[PGSCAN_KSWAPD]));
+ update_stat(vb, idx++, VIRTIO_BALLOON_S_DIRECT_SCAN,
+ pages_to_bytes(events[PGSCAN_DIRECT]));
+ update_stat(vb, idx++, VIRTIO_BALLOON_S_ASYNC_RECLAIM,
+ pages_to_bytes(events[PGSTEAL_KSWAPD]));
+ update_stat(vb, idx++, VIRTIO_BALLOON_S_DIRECT_RECLAIM,
+ pages_to_bytes(events[PGSTEAL_DIRECT]));
+
#ifdef CONFIG_HUGETLB_PAGE
update_stat(vb, idx++, VIRTIO_BALLOON_S_HTLB_PGALLOC,
events[HTLB_BUDDY_PGALLOC]);
diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
index 487b893a160e..ee35a372805d 100644
--- a/include/uapi/linux/virtio_balloon.h
+++ b/include/uapi/linux/virtio_balloon.h
@@ -73,7 +73,11 @@ struct virtio_balloon_config {
#define VIRTIO_BALLOON_S_HTLB_PGFAIL 9 /* Hugetlb page allocation failures */
#define VIRTIO_BALLOON_S_OOM_KILL 10 /* OOM killer invocations */
#define VIRTIO_BALLOON_S_ALLOC_STALL 11 /* Stall count of memory allocatoin */
-#define VIRTIO_BALLOON_S_NR 12
+#define VIRTIO_BALLOON_S_ASYNC_SCAN 12 /* Amount of memory scanned asynchronously */
+#define VIRTIO_BALLOON_S_DIRECT_SCAN 13 /* Amount of memory scanned directly */
+#define VIRTIO_BALLOON_S_ASYNC_RECLAIM 14 /* Amount of memory reclaimed asynchronously */
+#define VIRTIO_BALLOON_S_DIRECT_RECLAIM 15 /* Amount of memory reclaimed directly */
+#define VIRTIO_BALLOON_S_NR 16
#define VIRTIO_BALLOON_S_NAMES_WITH_PREFIX(VIRTIO_BALLOON_S_NAMES_prefix) { \
VIRTIO_BALLOON_S_NAMES_prefix "swap-in", \
@@ -87,7 +91,11 @@ struct virtio_balloon_config {
VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-allocations", \
VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-failures", \
VIRTIO_BALLOON_S_NAMES_prefix "oom-kills", \
- VIRTIO_BALLOON_S_NAMES_prefix "alloc-stalls" \
+ VIRTIO_BALLOON_S_NAMES_prefix "alloc-stalls", \
+ VIRTIO_BALLOON_S_NAMES_prefix "async-scans", \
+ VIRTIO_BALLOON_S_NAMES_prefix "direct-scans", \
+ VIRTIO_BALLOON_S_NAMES_prefix "async-reclaims", \
+ VIRTIO_BALLOON_S_NAMES_prefix "direct-reclaims" \
}
#define VIRTIO_BALLOON_S_NAMES VIRTIO_BALLOON_S_NAMES_WITH_PREFIX("")
--
2.34.1
On Tue, Apr 23, 2024 at 11:41:07AM +0800, zhenwei pi wrote:
> When the guest OS runs under critical memory pressure, the guest
> starts to kill processes. A guest monitor agent may scan 'oom_kill'
> from /proc/vmstat, and reports the OOM KILL event. However, the agent
> may be killed and we will loss this critical event(and the later
> events).
>
> For now we can also grep for magic words in guest kernel log from host
> side. Rather than this unstable way, virtio balloon reports OOM-KILL
> invocations instead.
>
> Acked-by: David Hildenbrand <[email protected]>
> Signed-off-by: zhenwei pi <[email protected]>
> ---
> drivers/virtio/virtio_balloon.c | 1 +
> include/uapi/linux/virtio_balloon.h | 6 ++++--
> 2 files changed, 5 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> index 1710e3098ecd..f7a47eaa0936 100644
> --- a/drivers/virtio/virtio_balloon.c
> +++ b/drivers/virtio/virtio_balloon.c
> @@ -330,6 +330,7 @@ static inline unsigned int update_balloon_vm_stats(struct virtio_balloon *vb)
> pages_to_bytes(events[PSWPOUT]));
> update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, events[PGMAJFAULT]);
> update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]);
> + update_stat(vb, idx++, VIRTIO_BALLOON_S_OOM_KILL, events[OOM_KILL]);
>
> #ifdef CONFIG_HUGETLB_PAGE
> update_stat(vb, idx++, VIRTIO_BALLOON_S_HTLB_PGALLOC,
> diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
> index ddaa45e723c4..b17bbe033697 100644
> --- a/include/uapi/linux/virtio_balloon.h
> +++ b/include/uapi/linux/virtio_balloon.h
> @@ -71,7 +71,8 @@ struct virtio_balloon_config {
> #define VIRTIO_BALLOON_S_CACHES 7 /* Disk caches */
> #define VIRTIO_BALLOON_S_HTLB_PGALLOC 8 /* Hugetlb page allocations */
> #define VIRTIO_BALLOON_S_HTLB_PGFAIL 9 /* Hugetlb page allocation failures */
> -#define VIRTIO_BALLOON_S_NR 10
> +#define VIRTIO_BALLOON_S_OOM_KILL 10 /* OOM killer invocations */
> +#define VIRTIO_BALLOON_S_NR 11
>
> #define VIRTIO_BALLOON_S_NAMES_WITH_PREFIX(VIRTIO_BALLOON_S_NAMES_prefix) { \
> VIRTIO_BALLOON_S_NAMES_prefix "swap-in", \
Looks like a useful extension. But
any UAPI extension has to go to virtio spec first.
> @@ -83,7 +84,8 @@ struct virtio_balloon_config {
> VIRTIO_BALLOON_S_NAMES_prefix "available-memory", \
> VIRTIO_BALLOON_S_NAMES_prefix "disk-caches", \
> VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-allocations", \
> - VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-failures" \
> + VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-failures", \
> + VIRTIO_BALLOON_S_NAMES_prefix "oom-kills" \
> }
>
> #define VIRTIO_BALLOON_S_NAMES VIRTIO_BALLOON_S_NAMES_WITH_PREFIX("")
> --
> 2.34.1
On 4/23/24 17:13, Michael S. Tsirkin wrote:
> On Tue, Apr 23, 2024 at 11:41:07AM +0800, zhenwei pi wrote:
[snip]
>> #define VIRTIO_BALLOON_S_NAMES_WITH_PREFIX(VIRTIO_BALLOON_S_NAMES_prefix) { \
>> VIRTIO_BALLOON_S_NAMES_prefix "swap-in", \
>
> Looks like a useful extension. But
> any UAPI extension has to go to virtio spec first.
>
Sure, I'll send related virtio spec changes once virtio comment mail
list gets ready.
>> @@ -83,7 +84,8 @@ struct virtio_balloon_config {
>> VIRTIO_BALLOON_S_NAMES_prefix "available-memory", \
>> VIRTIO_BALLOON_S_NAMES_prefix "disk-caches", \
>> VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-allocations", \
>> - VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-failures" \
>> + VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-failures", \
>> + VIRTIO_BALLOON_S_NAMES_prefix "oom-kills" \
>> }
>>
>> #define VIRTIO_BALLOON_S_NAMES VIRTIO_BALLOON_S_NAMES_WITH_PREFIX("")
>> --
>> 2.34.1
>
--
zhenwei pi
On 23.04.24 05:41, zhenwei pi wrote:
> All the VM events related statistics have dependence on
> 'CONFIG_VM_EVENT_COUNTERS', separate these events into a function to
> make code clean. Then we can remove 'CONFIG_VM_EVENT_COUNTERS' from
> 'update_balloon_stats'.
>
> Signed-off-by: zhenwei pi <[email protected]>
> ---
Reviewed-by: David Hildenbrand <[email protected]>
--
Cheers,
David / dhildenb