Allow the guest to know how much it is ballooned by the host.
It is useful when debugging out of memory conditions.
When host gets back memory from the guest it is accounted
as used memory in the guest but the guest have no way to know
how much it is actually ballooned.
Signed-off-by: Alexander Atanasov <[email protected]>
---
drivers/virtio/virtio_balloon.c | 77 +++++++++++++++++++++++++++++
include/uapi/linux/virtio_balloon.h | 1 +
2 files changed, 78 insertions(+)
V2:
- fixed coding style
- removed pretty print
V3:
- removed dublicate of features
- comment about balooned_pages more clear
- convert host pages to balloon pages
V4:
- added a define for BALLOON_PAGE_SIZE to make things clear
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index b9737da6c4dd..dc4ad584b947 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -10,6 +10,7 @@
#include <linux/virtio_balloon.h>
#include <linux/swap.h>
#include <linux/workqueue.h>
+#include <linux/debugfs.h>
#include <linux/delay.h>
#include <linux/slab.h>
#include <linux/module.h>
@@ -731,6 +732,77 @@ static void report_free_page_func(struct work_struct *work)
}
}
+/*
+ * DEBUGFS Interface
+ */
+#ifdef CONFIG_DEBUG_FS
+
+#define guest_to_balloon_pages(i) ((i)*VIRTIO_BALLOON_PAGES_PER_PAGE)
+/**
+ * virtio_balloon_debug_show - shows statistics of balloon operations.
+ * @f: pointer to the &struct seq_file.
+ * @offset: ignored.
+ *
+ * Provides the statistics that can be accessed in virtio-balloon in the debugfs.
+ *
+ * Return: zero on success or an error code.
+ */
+
+static int virtio_balloon_debug_show(struct seq_file *f, void *offset)
+{
+ struct virtio_balloon *b = f->private;
+ u32 num_pages;
+ struct sysinfo i;
+
+ si_meminfo(&i);
+
+ seq_printf(f, "%-22s: %d\n", "page_size", VIRTIO_BALLOON_PAGE_SIZE);
+
+ virtio_cread_le(b->vdev, struct virtio_balloon_config, actual,
+ &num_pages);
+ /*
+ * Pages allocated by host from the guest memory.
+ * Host inflates the balloon to get more memory.
+ * Guest needs to deflate the balloon to get more memory.
+ */
+ seq_printf(f, "%-22s: %u\n", "ballooned_pages", num_pages);
+
+ /* Total Memory for the guest from host */
+ seq_printf(f, "%-22s: %lu\n", "total_pages",
+ guest_to_balloon_pages(i.totalram));
+
+ /* Current memory for the guest */
+ seq_printf(f, "%-22s: %lu\n", "current_pages",
+ guest_to_balloon_pages(i.totalram) - num_pages);
+
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(virtio_balloon_debug);
+
+static void virtio_balloon_debugfs_init(struct virtio_balloon *b)
+{
+ debugfs_create_file("virtio-balloon", 0444, NULL, b,
+ &virtio_balloon_debug_fops);
+}
+
+static void virtio_balloon_debugfs_exit(struct virtio_balloon *b)
+{
+ debugfs_remove(debugfs_lookup("virtio-balloon", NULL));
+}
+
+#else
+
+static inline void virtio_balloon_debugfs_init(struct virtio_balloon *b)
+{
+}
+
+static inline void virtio_balloon_debugfs_exit(struct virtio_balloon *b)
+{
+}
+
+#endif /* CONFIG_DEBUG_FS */
+
#ifdef CONFIG_BALLOON_COMPACTION
/*
* virtballoon_migratepage - perform the balloon page migration on behalf of
@@ -1019,6 +1091,9 @@ static int virtballoon_probe(struct virtio_device *vdev)
if (towards_target(vb))
virtballoon_changed(vdev);
+
+ virtio_balloon_debugfs_init(vb);
+
return 0;
out_unregister_oom:
@@ -1065,6 +1140,8 @@ static void virtballoon_remove(struct virtio_device *vdev)
{
struct virtio_balloon *vb = vdev->priv;
+ virtio_balloon_debugfs_exit(vb);
+
if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_REPORTING))
page_reporting_unregister(&vb->pr_dev_info);
if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
index ddaa45e723c4..f3ff7c4e5884 100644
--- a/include/uapi/linux/virtio_balloon.h
+++ b/include/uapi/linux/virtio_balloon.h
@@ -40,6 +40,7 @@
/* Size of a PFN in the balloon interface. */
#define VIRTIO_BALLOON_PFN_SHIFT 12
+#define VIRTIO_BALLOON_PAGE_SIZE (1<<VIRTIO_BALLOON_PFN_SHIFT)
#define VIRTIO_BALLOON_CMD_ID_STOP 0
#define VIRTIO_BALLOON_CMD_ID_DONE 1
--
2.25.1
On Tue, Jul 05, 2022 at 08:36:37AM +0000, Alexander Atanasov wrote:
> Allow the guest to know how much it is ballooned by the host.
> It is useful when debugging out of memory conditions.
>
> When host gets back memory from the guest it is accounted
> as used memory in the guest but the guest have no way to know
> how much it is actually ballooned.
>
> Signed-off-by: Alexander Atanasov <[email protected]>
> ---
> drivers/virtio/virtio_balloon.c | 77 +++++++++++++++++++++++++++++
> include/uapi/linux/virtio_balloon.h | 1 +
> 2 files changed, 78 insertions(+)
>
> V2:
> - fixed coding style
> - removed pretty print
> V3:
> - removed dublicate of features
> - comment about balooned_pages more clear
> - convert host pages to balloon pages
> V4:
> - added a define for BALLOON_PAGE_SIZE to make things clear
>
> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> index b9737da6c4dd..dc4ad584b947 100644
> --- a/drivers/virtio/virtio_balloon.c
> +++ b/drivers/virtio/virtio_balloon.c
> @@ -10,6 +10,7 @@
> #include <linux/virtio_balloon.h>
> #include <linux/swap.h>
> #include <linux/workqueue.h>
> +#include <linux/debugfs.h>
> #include <linux/delay.h>
> #include <linux/slab.h>
> #include <linux/module.h>
> @@ -731,6 +732,77 @@ static void report_free_page_func(struct work_struct *work)
> }
> }
>
> +/*
> + * DEBUGFS Interface
> + */
> +#ifdef CONFIG_DEBUG_FS
> +
> +#define guest_to_balloon_pages(i) ((i)*VIRTIO_BALLOON_PAGES_PER_PAGE)
> +/**
> + * virtio_balloon_debug_show - shows statistics of balloon operations.
> + * @f: pointer to the &struct seq_file.
> + * @offset: ignored.
> + *
> + * Provides the statistics that can be accessed in virtio-balloon in the debugfs.
> + *
> + * Return: zero on success or an error code.
> + */
> +
> +static int virtio_balloon_debug_show(struct seq_file *f, void *offset)
> +{
> + struct virtio_balloon *b = f->private;
> + u32 num_pages;
> + struct sysinfo i;
> +
> + si_meminfo(&i);
> +
> + seq_printf(f, "%-22s: %d\n", "page_size", VIRTIO_BALLOON_PAGE_SIZE);
> +
> + virtio_cread_le(b->vdev, struct virtio_balloon_config, actual,
> + &num_pages);
> + /*
> + * Pages allocated by host from the guest memory.
> + * Host inflates the balloon to get more memory.
> + * Guest needs to deflate the balloon to get more memory.
> + */
> + seq_printf(f, "%-22s: %u\n", "ballooned_pages", num_pages);
> +
> + /* Total Memory for the guest from host */
> + seq_printf(f, "%-22s: %lu\n", "total_pages",
> + guest_to_balloon_pages(i.totalram));
> +
> + /* Current memory for the guest */
> + seq_printf(f, "%-22s: %lu\n", "current_pages",
> + guest_to_balloon_pages(i.totalram) - num_pages);
> +
> + return 0;
> +}
> +
> +DEFINE_SHOW_ATTRIBUTE(virtio_balloon_debug);
> +
> +static void virtio_balloon_debugfs_init(struct virtio_balloon *b)
> +{
> + debugfs_create_file("virtio-balloon", 0444, NULL, b,
> + &virtio_balloon_debug_fops);
> +}
> +
> +static void virtio_balloon_debugfs_exit(struct virtio_balloon *b)
> +{
> + debugfs_remove(debugfs_lookup("virtio-balloon", NULL));
> +}
> +
> +#else
> +
> +static inline void virtio_balloon_debugfs_init(struct virtio_balloon *b)
> +{
> +}
> +
> +static inline void virtio_balloon_debugfs_exit(struct virtio_balloon *b)
> +{
> +}
> +
> +#endif /* CONFIG_DEBUG_FS */
> +
> #ifdef CONFIG_BALLOON_COMPACTION
> /*
> * virtballoon_migratepage - perform the balloon page migration on behalf of
> @@ -1019,6 +1091,9 @@ static int virtballoon_probe(struct virtio_device *vdev)
>
> if (towards_target(vb))
> virtballoon_changed(vdev);
> +
> + virtio_balloon_debugfs_init(vb);
> +
> return 0;
>
> out_unregister_oom:
> @@ -1065,6 +1140,8 @@ static void virtballoon_remove(struct virtio_device *vdev)
> {
> struct virtio_balloon *vb = vdev->priv;
>
> + virtio_balloon_debugfs_exit(vb);
> +
> if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_REPORTING))
> page_reporting_unregister(&vb->pr_dev_info);
> if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
> diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
> index ddaa45e723c4..f3ff7c4e5884 100644
> --- a/include/uapi/linux/virtio_balloon.h
> +++ b/include/uapi/linux/virtio_balloon.h
> @@ -40,6 +40,7 @@
>
> /* Size of a PFN in the balloon interface. */
> #define VIRTIO_BALLOON_PFN_SHIFT 12
> +#define VIRTIO_BALLOON_PAGE_SIZE (1<<VIRTIO_BALLOON_PFN_SHIFT)
> #define VIRTIO_BALLOON_CMD_ID_STOP 0
> #define VIRTIO_BALLOON_CMD_ID_DONE 1
Did you run checkpatch on this?
> --
> 2.25.1
Hello,
On 05/07/2022 11:59, Michael S. Tsirkin wrote:
> On Tue, Jul 05, 2022 at 08:36:37AM +0000, Alexander Atanasov wrote:
>> Allow the guest to know how much it is ballooned by the host.
>> It is useful when debugging out of memory conditions.
>>
>> When host gets back memory from the guest it is accounted
>> as used memory in the guest but the guest have no way to know
>> how much it is actually ballooned.
>>
>> Signed-off-by: Alexander Atanasov <[email protected]>
>> ---
>> drivers/virtio/virtio_balloon.c | 77 +++++++++++++++++++++++++++++
>> include/uapi/linux/virtio_balloon.h | 1 +
>> 2 files changed, 78 insertions(+)
>>
>> V2:
>> - fixed coding style
>> - removed pretty print
>> V3:
>> - removed dublicate of features
>> - comment about balooned_pages more clear
>> - convert host pages to balloon pages
>> V4:
>> - added a define for BALLOON_PAGE_SIZE to make things clear
>>
>> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
>> index b9737da6c4dd..dc4ad584b947 100644
>> --- a/drivers/virtio/virtio_balloon.c
>> +++ b/drivers/virtio/virtio_balloon.c
>> @@ -10,6 +10,7 @@
>> #include <linux/virtio_balloon.h>
>> #include <linux/swap.h>
>> #include <linux/workqueue.h>
>> +#include <linux/debugfs.h>
>> #include <linux/delay.h>
>> #include <linux/slab.h>
>> #include <linux/module.h>
>> @@ -731,6 +732,77 @@ static void report_free_page_func(struct work_struct *work)
>> }
>> }
>>
>> +/*
>> + * DEBUGFS Interface
>> + */
>> +#ifdef CONFIG_DEBUG_FS
>> +
>> +#define guest_to_balloon_pages(i) ((i)*VIRTIO_BALLOON_PAGES_PER_PAGE)
>> +/**
>> + * virtio_balloon_debug_show - shows statistics of balloon operations.
>> + * @f: pointer to the &struct seq_file.
>> + * @offset: ignored.
>> + *
>> + * Provides the statistics that can be accessed in virtio-balloon in the debugfs.
>> + *
>> + * Return: zero on success or an error code.
>> + */
>> +
>> +static int virtio_balloon_debug_show(struct seq_file *f, void *offset)
>> +{
>> + struct virtio_balloon *b = f->private;
>> + u32 num_pages;
>> + struct sysinfo i;
>> +
>> + si_meminfo(&i);
>> +
>> + seq_printf(f, "%-22s: %d\n", "page_size", VIRTIO_BALLOON_PAGE_SIZE);
>> +
>> + virtio_cread_le(b->vdev, struct virtio_balloon_config, actual,
>> + &num_pages);
>> + /*
>> + * Pages allocated by host from the guest memory.
>> + * Host inflates the balloon to get more memory.
>> + * Guest needs to deflate the balloon to get more memory.
>> + */
>> + seq_printf(f, "%-22s: %u\n", "ballooned_pages", num_pages);
>> +
>> + /* Total Memory for the guest from host */
>> + seq_printf(f, "%-22s: %lu\n", "total_pages",
>> + guest_to_balloon_pages(i.totalram));
>> +
>> + /* Current memory for the guest */
>> + seq_printf(f, "%-22s: %lu\n", "current_pages",
>> + guest_to_balloon_pages(i.totalram) - num_pages);
>> +
>> + return 0;
>> +}
>> +
>> +DEFINE_SHOW_ATTRIBUTE(virtio_balloon_debug);
>> +
>> +static void virtio_balloon_debugfs_init(struct virtio_balloon *b)
>> +{
>> + debugfs_create_file("virtio-balloon", 0444, NULL, b,
>> + &virtio_balloon_debug_fops);
>> +}
>> +
>> +static void virtio_balloon_debugfs_exit(struct virtio_balloon *b)
>> +{
>> + debugfs_remove(debugfs_lookup("virtio-balloon", NULL));
>> +}
>> +
>> +#else
>> +
>> +static inline void virtio_balloon_debugfs_init(struct virtio_balloon *b)
>> +{
>> +}
>> +
>> +static inline void virtio_balloon_debugfs_exit(struct virtio_balloon *b)
>> +{
>> +}
>> +
>> +#endif /* CONFIG_DEBUG_FS */
>> +
>> #ifdef CONFIG_BALLOON_COMPACTION
>> /*
>> * virtballoon_migratepage - perform the balloon page migration on behalf of
>> @@ -1019,6 +1091,9 @@ static int virtballoon_probe(struct virtio_device *vdev)
>>
>> if (towards_target(vb))
>> virtballoon_changed(vdev);
>> +
>> + virtio_balloon_debugfs_init(vb);
>> +
>> return 0;
>>
>> out_unregister_oom:
>> @@ -1065,6 +1140,8 @@ static void virtballoon_remove(struct virtio_device *vdev)
>> {
>> struct virtio_balloon *vb = vdev->priv;
>>
>> + virtio_balloon_debugfs_exit(vb);
>> +
>> if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_REPORTING))
>> page_reporting_unregister(&vb->pr_dev_info);
>> if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
>> diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
>> index ddaa45e723c4..f3ff7c4e5884 100644
>> --- a/include/uapi/linux/virtio_balloon.h
>> +++ b/include/uapi/linux/virtio_balloon.h
>> @@ -40,6 +40,7 @@
>>
>> /* Size of a PFN in the balloon interface. */
>> #define VIRTIO_BALLOON_PFN_SHIFT 12
>> +#define VIRTIO_BALLOON_PAGE_SIZE (1<<VIRTIO_BALLOON_PFN_SHIFT)
>> #define VIRTIO_BALLOON_CMD_ID_STOP 0
>> #define VIRTIO_BALLOON_CMD_ID_DONE 1
> Did you run checkpatch on this?
Sure, i did:
scripts/checkpatch.pl
../outgoing/v4-0001-Create-debugfs-file-with-virtio-balloon-usage-inf.patch
total: 0 errors, 0 warnings, 108 lines checked
../outgoing/v4-0001-Create-debugfs-file-with-virtio-balloon-usage-inf.patch
has no obvious style problems and is ready for submission.
--
Regards,
Alexander Atanasov
Hi,
On 05/07/2022 11:59, Michael S. Tsirkin wrote:
> On Tue, Jul 05, 2022 at 08:36:37AM +0000, Alexander Atanasov wrote:
>> Allow the guest to know how much it is ballooned by the host.
>> It is useful when debugging out of memory conditions.
>>
>> When host gets back memory from the guest it is accounted
>> as used memory in the guest but the guest have no way to know
>> how much it is actually ballooned.
>>
>> Signed-off-by: Alexander Atanasov <[email protected]>
>> ---
>> drivers/virtio/virtio_balloon.c | 77 +++++++++++++++++++++++++++++
>> include/uapi/linux/virtio_balloon.h | 1 +
>> 2 files changed, 78 insertions(+)
>>
>> V2:
>> - fixed coding style
>> - removed pretty print
>> V3:
>> - removed dublicate of features
>> - comment about balooned_pages more clear
>> - convert host pages to balloon pages
>> V4:
>> - added a define for BALLOON_PAGE_SIZE to make things clear
....
>> /* Size of a PFN in the balloon interface. */
>> #define VIRTIO_BALLOON_PFN_SHIFT 12
>> +#define VIRTIO_BALLOON_PAGE_SIZE (1<<VIRTIO_BALLOON_PFN_SHIFT)
>> #define VIRTIO_BALLOON_CMD_ID_STOP 0
>> #define VIRTIO_BALLOON_CMD_ID_DONE 1
> Did you run checkpatch on this?
Yes, i did.
More than a week passed and i haven't got any feedback. Have i missed
something ?
Same goes for the patch that restores semantics of vq->broken which
Jason Wang ack-ed.
--
Regards,
Alexander Atanasov
On 05.07.22 10:36, Alexander Atanasov wrote:
> Allow the guest to know how much it is ballooned by the host.
> It is useful when debugging out of memory conditions.
>
> When host gets back memory from the guest it is accounted
> as used memory in the guest but the guest have no way to know
> how much it is actually ballooned.
>
> Signed-off-by: Alexander Atanasov <[email protected]>
> ---
> drivers/virtio/virtio_balloon.c | 77 +++++++++++++++++++++++++++++
> include/uapi/linux/virtio_balloon.h | 1 +
> 2 files changed, 78 insertions(+)
>
> V2:
> - fixed coding style
> - removed pretty print
> V3:
> - removed dublicate of features
> - comment about balooned_pages more clear
> - convert host pages to balloon pages
> V4:
> - added a define for BALLOON_PAGE_SIZE to make things clear
>
> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> index b9737da6c4dd..dc4ad584b947 100644
> --- a/drivers/virtio/virtio_balloon.c
> +++ b/drivers/virtio/virtio_balloon.c
> @@ -10,6 +10,7 @@
> #include <linux/virtio_balloon.h>
> #include <linux/swap.h>
> #include <linux/workqueue.h>
> +#include <linux/debugfs.h>
> #include <linux/delay.h>
> #include <linux/slab.h>
> #include <linux/module.h>
> @@ -731,6 +732,77 @@ static void report_free_page_func(struct work_struct *work)
> }
> }
>
> +/*
> + * DEBUGFS Interface
> + */
> +#ifdef CONFIG_DEBUG_FS
> +
> +#define guest_to_balloon_pages(i) ((i)*VIRTIO_BALLOON_PAGES_PER_PAGE)
> +/**
> + * virtio_balloon_debug_show - shows statistics of balloon operations.
> + * @f: pointer to the &struct seq_file.
> + * @offset: ignored.
> + *
> + * Provides the statistics that can be accessed in virtio-balloon in the debugfs.
> + *
> + * Return: zero on success or an error code.
> + */
> +
> +static int virtio_balloon_debug_show(struct seq_file *f, void *offset)
> +{
> + struct virtio_balloon *b = f->private;
> + u32 num_pages;
> + struct sysinfo i;
> +
> + si_meminfo(&i);
> +
> + seq_printf(f, "%-22s: %d\n", "page_size", VIRTIO_BALLOON_PAGE_SIZE);
> +
> + virtio_cread_le(b->vdev, struct virtio_balloon_config, actual,
> + &num_pages);
> + /*
> + * Pages allocated by host from the guest memory.
> + * Host inflates the balloon to get more memory.
> + * Guest needs to deflate the balloon to get more memory.
> + */
Please drop that comment. This is basic virtio-balloon operation that
must not be explained at this point.
> + seq_printf(f, "%-22s: %u\n", "ballooned_pages", num_pages);
> +
> + /* Total Memory for the guest from host */
> + seq_printf(f, "%-22s: %lu\n", "total_pages",
> + guest_to_balloon_pages(i.totalram));
totalram is calculated from totalram_pages().
When we inflate/deflate, we adjust totalram as well via
adjust_managed_page_count().
Consequently, this doesn't calculate what you actually want?
Total memory would be totalram+inflated, current would be totalram.
But, TBH, only export num_pages. User space can just lookup the other
information (totalram) via /proc/meminfo.
--
Thanks,
David / dhildenb
Hello,
On 14/07/2022 14:35, David Hildenbrand wrote:
> On 05.07.22 10:36, Alexander Atanasov wrote:
>> Allow the guest to know how much it is ballooned by the host.
>> It is useful when debugging out of memory conditions.
>>
>> When host gets back memory from the guest it is accounted
>> as used memory in the guest but the guest have no way to know
>> how much it is actually ballooned.
>>
>> Signed-off-by: Alexander Atanasov <[email protected]>
>> ---
>> drivers/virtio/virtio_balloon.c | 77 +++++++++++++++++++++++++++++
>> include/uapi/linux/virtio_balloon.h | 1 +
>> 2 files changed, 78 insertions(+)
>>
>> V2:
>> - fixed coding style
>> - removed pretty print
>> V3:
>> - removed dublicate of features
>> - comment about balooned_pages more clear
>> - convert host pages to balloon pages
>> V4:
>> - added a define for BALLOON_PAGE_SIZE to make things clear
>>
>> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
>> index b9737da6c4dd..dc4ad584b947 100644
>> --- a/drivers/virtio/virtio_balloon.c
>> +++ b/drivers/virtio/virtio_balloon.c
>> @@ -10,6 +10,7 @@
>> #include <linux/virtio_balloon.h>
>> #include <linux/swap.h>
>> #include <linux/workqueue.h>
>> +#include <linux/debugfs.h>
>> #include <linux/delay.h>
>> #include <linux/slab.h>
>> #include <linux/module.h>
>> @@ -731,6 +732,77 @@ static void report_free_page_func(struct work_struct *work)
>> }
>> }
>>
>> +/*
>> + * DEBUGFS Interface
>> + */
>> +#ifdef CONFIG_DEBUG_FS
>> +
>> +#define guest_to_balloon_pages(i) ((i)*VIRTIO_BALLOON_PAGES_PER_PAGE)
>> +/**
>> + * virtio_balloon_debug_show - shows statistics of balloon operations.
>> + * @f: pointer to the &struct seq_file.
>> + * @offset: ignored.
>> + *
>> + * Provides the statistics that can be accessed in virtio-balloon in the debugfs.
>> + *
>> + * Return: zero on success or an error code.
>> + */
>> +
>> +static int virtio_balloon_debug_show(struct seq_file *f, void *offset)
>> +{
>> + struct virtio_balloon *b = f->private;
>> + u32 num_pages;
>> + struct sysinfo i;
>> +
>> + si_meminfo(&i);
>> +
>> + seq_printf(f, "%-22s: %d\n", "page_size", VIRTIO_BALLOON_PAGE_SIZE);
>> +
>> + virtio_cread_le(b->vdev, struct virtio_balloon_config, actual,
>> + &num_pages);
>> + /*
>> + * Pages allocated by host from the guest memory.
>> + * Host inflates the balloon to get more memory.
>> + * Guest needs to deflate the balloon to get more memory.
>> + */
> Please drop that comment. This is basic virtio-balloon operation that
> must not be explained at this point.
Ok
>> + seq_printf(f, "%-22s: %u\n", "ballooned_pages", num_pages);
>> +
>> + /* Total Memory for the guest from host */
>> + seq_printf(f, "%-22s: %lu\n", "total_pages",
>> + guest_to_balloon_pages(i.totalram));
> totalram is calculated from totalram_pages().
>
> When we inflate/deflate, we adjust totalram as well via
> adjust_managed_page_count().
That is true only when not using DEFLATE_ON_OOM.
Otherwise inflated memory is accounted as used and total ram stays the same.
> Consequently, this doesn't calculate what you actually want?
> Total memory would be totalram+inflated, current would be totalram.
My calculations are correct for the case deflate_on_oom is enabled.
> But, TBH, only export num_pages. User space can just lookup the other
> information (totalram) via /proc/meminfo.
I have missed that the memory accounting is made differently depending
on a flag.
Since the calculations are different i'd prefer to have the values
calculate and printed there.
Where no matter what the flags is:
- ballooned_pages -> how much is balloon inflated
- total_pages -> total ram the guest can get
- current_pages -> current ram the guest have
Patch to unify both cases with or without deflate on oom coming next.
--
Regards,
Alexander Atanasov
On 14.07.22 15:20, Alexander Atanasov wrote:
> Hello,
>
> On 14/07/2022 14:35, David Hildenbrand wrote:
>> On 05.07.22 10:36, Alexander Atanasov wrote:
>>> Allow the guest to know how much it is ballooned by the host.
>>> It is useful when debugging out of memory conditions.
>>>
>>> When host gets back memory from the guest it is accounted
>>> as used memory in the guest but the guest have no way to know
>>> how much it is actually ballooned.
>>>
>>> Signed-off-by: Alexander Atanasov <[email protected]>
>>> ---
>>> drivers/virtio/virtio_balloon.c | 77 +++++++++++++++++++++++++++++
>>> include/uapi/linux/virtio_balloon.h | 1 +
>>> 2 files changed, 78 insertions(+)
>>>
>>> V2:
>>> - fixed coding style
>>> - removed pretty print
>>> V3:
>>> - removed dublicate of features
>>> - comment about balooned_pages more clear
>>> - convert host pages to balloon pages
>>> V4:
>>> - added a define for BALLOON_PAGE_SIZE to make things clear
>>>
>>> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
>>> index b9737da6c4dd..dc4ad584b947 100644
>>> --- a/drivers/virtio/virtio_balloon.c
>>> +++ b/drivers/virtio/virtio_balloon.c
>>> @@ -10,6 +10,7 @@
>>> #include <linux/virtio_balloon.h>
>>> #include <linux/swap.h>
>>> #include <linux/workqueue.h>
>>> +#include <linux/debugfs.h>
>>> #include <linux/delay.h>
>>> #include <linux/slab.h>
>>> #include <linux/module.h>
>>> @@ -731,6 +732,77 @@ static void report_free_page_func(struct work_struct *work)
>>> }
>>> }
>>>
>>> +/*
>>> + * DEBUGFS Interface
>>> + */
>>> +#ifdef CONFIG_DEBUG_FS
>>> +
>>> +#define guest_to_balloon_pages(i) ((i)*VIRTIO_BALLOON_PAGES_PER_PAGE)
>>> +/**
>>> + * virtio_balloon_debug_show - shows statistics of balloon operations.
>>> + * @f: pointer to the &struct seq_file.
>>> + * @offset: ignored.
>>> + *
>>> + * Provides the statistics that can be accessed in virtio-balloon in the debugfs.
>>> + *
>>> + * Return: zero on success or an error code.
>>> + */
>>> +
>>> +static int virtio_balloon_debug_show(struct seq_file *f, void *offset)
>>> +{
>>> + struct virtio_balloon *b = f->private;
>>> + u32 num_pages;
>>> + struct sysinfo i;
>>> +
>>> + si_meminfo(&i);
>>> +
>>> + seq_printf(f, "%-22s: %d\n", "page_size", VIRTIO_BALLOON_PAGE_SIZE);
>>> +
>>> + virtio_cread_le(b->vdev, struct virtio_balloon_config, actual,
>>> + &num_pages);
>>> + /*
>>> + * Pages allocated by host from the guest memory.
>>> + * Host inflates the balloon to get more memory.
>>> + * Guest needs to deflate the balloon to get more memory.
>>> + */
>> Please drop that comment. This is basic virtio-balloon operation that
>> must not be explained at this point.
>
> Ok
>
>>> + seq_printf(f, "%-22s: %u\n", "ballooned_pages", num_pages);
>>> +
>>> + /* Total Memory for the guest from host */
>>> + seq_printf(f, "%-22s: %lu\n", "total_pages",
>>> + guest_to_balloon_pages(i.totalram));
>> totalram is calculated from totalram_pages().
>>
>> When we inflate/deflate, we adjust totalram as well via
>> adjust_managed_page_count().
>
> That is true only when not using DEFLATE_ON_OOM.
>
> Otherwise inflated memory is accounted as used and total ram stays the same.
>> Consequently, this doesn't calculate what you actually want?
>> Total memory would be totalram+inflated, current would be totalram.
>
> My calculations are correct for the case deflate_on_oom is enabled.
>
Which is the corner cases. You'd have to special case on DEFLATE_ON_OOM
availability.
>> But, TBH, only export num_pages. User space can just lookup the other
>> information (totalram) via /proc/meminfo.
>
> I have missed that the memory accounting is made differently depending
> on a flag.
>
> Since the calculations are different i'd prefer to have the values
> calculate and printed there.
What about an indication instead, whether or not inflated pages are
accounted into total or not? That would be slightly cleaner IMHO.
--
Thanks,
David / dhildenb
Allow the guest to know how much it is ballooned by the host.
It is useful when debugging out of memory conditions.
When host gets back memory from the guest it is accounted
as used memory in the guest but the guest have no way to know
how much it is actually ballooned.
Signed-off-by: Alexander Atanasov <[email protected]>
---
drivers/virtio/virtio_balloon.c | 79 +++++++++++++++++++++++++++++
include/uapi/linux/virtio_balloon.h | 1 +
2 files changed, 80 insertions(+)
V2:
- fixed coding style
- removed pretty print
V3:
- removed dublicate of features
- comment about balooned_pages more clear
- convert host pages to balloon pages
V4:
- added a define for BALLOON_PAGE_SIZE to make things clear
V5:
- Made the calculatons work properly for both ways of memory accounting
with or without deflate_on_oom
- dropped comment
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index b9737da6c4dd..e17f8cc71ba4 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -10,6 +10,7 @@
#include <linux/virtio_balloon.h>
#include <linux/swap.h>
#include <linux/workqueue.h>
+#include <linux/debugfs.h>
#include <linux/delay.h>
#include <linux/slab.h>
#include <linux/module.h>
@@ -731,6 +732,79 @@ static void report_free_page_func(struct work_struct *work)
}
}
+/*
+ * DEBUGFS Interface
+ */
+#ifdef CONFIG_DEBUG_FS
+
+#define guest_to_balloon_pages(i) ((i)*VIRTIO_BALLOON_PAGES_PER_PAGE)
+/**
+ * virtio_balloon_debug_show - shows statistics of balloon operations.
+ * @f: pointer to the &struct seq_file.
+ * @offset: ignored.
+ *
+ * Provides the statistics that can be accessed in virtio-balloon in the debugfs.
+ *
+ * Return: zero on success or an error code.
+ */
+
+static int virtio_balloon_debug_show(struct seq_file *f, void *offset)
+{
+ struct virtio_balloon *b = f->private;
+ u32 num_pages, total_pages, current_pages;
+ struct sysinfo i;
+
+ si_meminfo(&i);
+
+ seq_printf(f, "%-22s: %d\n", "page_size", VIRTIO_BALLOON_PAGE_SIZE);
+
+ virtio_cread_le(b->vdev, struct virtio_balloon_config, actual,
+ &num_pages);
+
+ seq_printf(f, "%-22s: %u\n", "ballooned_pages", num_pages);
+
+ if (virtio_has_feature(b->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) {
+ total_pages = guest_to_balloon_pages(i.totalram);
+ current_pages = guest_to_balloon_pages(i.totalram) - num_pages;
+ } else {
+ total_pages = guest_to_balloon_pages(i.totalram) + num_pages;
+ current_pages = guest_to_balloon_pages(i.totalram);
+ }
+
+ /* Total Memory for the guest from host */
+ seq_printf(f, "%-22s: %u\n", "total_pages", total_pages);
+
+ /* Current memory for the guest */
+ seq_printf(f, "%-22s: %u\n", "current_pages", current_pages);
+
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(virtio_balloon_debug);
+
+static void virtio_balloon_debugfs_init(struct virtio_balloon *b)
+{
+ debugfs_create_file("virtio-balloon", 0444, NULL, b,
+ &virtio_balloon_debug_fops);
+}
+
+static void virtio_balloon_debugfs_exit(struct virtio_balloon *b)
+{
+ debugfs_remove(debugfs_lookup("virtio-balloon", NULL));
+}
+
+#else
+
+static inline void virtio_balloon_debugfs_init(struct virtio_balloon *b)
+{
+}
+
+static inline void virtio_balloon_debugfs_exit(struct virtio_balloon *b)
+{
+}
+
+#endif /* CONFIG_DEBUG_FS */
+
#ifdef CONFIG_BALLOON_COMPACTION
/*
* virtballoon_migratepage - perform the balloon page migration on behalf of
@@ -1019,6 +1093,9 @@ static int virtballoon_probe(struct virtio_device *vdev)
if (towards_target(vb))
virtballoon_changed(vdev);
+
+ virtio_balloon_debugfs_init(vb);
+
return 0;
out_unregister_oom:
@@ -1065,6 +1142,8 @@ static void virtballoon_remove(struct virtio_device *vdev)
{
struct virtio_balloon *vb = vdev->priv;
+ virtio_balloon_debugfs_exit(vb);
+
if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_REPORTING))
page_reporting_unregister(&vb->pr_dev_info);
if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
index ddaa45e723c4..f3ff7c4e5884 100644
--- a/include/uapi/linux/virtio_balloon.h
+++ b/include/uapi/linux/virtio_balloon.h
@@ -40,6 +40,7 @@
/* Size of a PFN in the balloon interface. */
#define VIRTIO_BALLOON_PFN_SHIFT 12
+#define VIRTIO_BALLOON_PAGE_SIZE (1<<VIRTIO_BALLOON_PFN_SHIFT)
#define VIRTIO_BALLOON_CMD_ID_STOP 0
#define VIRTIO_BALLOON_CMD_ID_DONE 1
--
2.25.1
Hello,
On 14/07/2022 16:24, David Hildenbrand wrote:
> On 14.07.22 15:20, Alexander Atanasov wrote:
>> Hello,
>>
>> On 14/07/2022 14:35, David Hildenbrand wrote:
>>> On 05.07.22 10:36, Alexander Atanasov wrote:
>>>> Allow the guest to know how much it is ballooned by the host.
>>>> It is useful when debugging out of memory conditions.
>>>>
>>>> When host gets back memory from the guest it is accounted
>>>> as used memory in the guest but the guest have no way to know
>>>> how much it is actually ballooned.
>>>>
>>>> Signed-off-by: Alexander Atanasov <[email protected]>
>>>> ---
>>>> drivers/virtio/virtio_balloon.c | 77 +++++++++++++++++++++++++++++
>>>> include/uapi/linux/virtio_balloon.h | 1 +
>>>> 2 files changed, 78 insertions(+)
>>>>
>>>> V2:
>>>> - fixed coding style
>>>> - removed pretty print
>>>> V3:
>>>> - removed dublicate of features
>>>> - comment about balooned_pages more clear
>>>> - convert host pages to balloon pages
>>>> V4:
>>>> - added a define for BALLOON_PAGE_SIZE to make things clear
>>>>
>>>>
>>>>
>>>> + seq_printf(f, "%-22s: %u\n", "ballooned_pages", num_pages);
>>>> +
>>>> + /* Total Memory for the guest from host */
>>>> + seq_printf(f, "%-22s: %lu\n", "total_pages",
>>>> + guest_to_balloon_pages(i.totalram));
>>> totalram is calculated from totalram_pages().
>>>
>>> When we inflate/deflate, we adjust totalram as well via
>>> adjust_managed_page_count().
>> That is true only when not using DEFLATE_ON_OOM.
>>
>> Otherwise inflated memory is accounted as used and total ram stays the same.
>>> Consequently, this doesn't calculate what you actually want?
>>> Total memory would be totalram+inflated, current would be totalram.
>> My calculations are correct for the case deflate_on_oom is enabled.
>>
> Which is the corner cases. You'd have to special case on DEFLATE_ON_OOM
> availability.
Besides checking the features?
>
>>> But, TBH, only export num_pages. User space can just lookup the other
>>> information (totalram) via /proc/meminfo.
>> I have missed that the memory accounting is made differently depending
>> on a flag.
>>
>> Since the calculations are different i'd prefer to have the values
>> calculate and printed there.
> What about an indication instead, whether or not inflated pages are
> accounted into total or not? That would be slightly cleaner IMHO.
If you care if they are included or not you can check the features and
see if the flag is enabled or not. Going vice versa is not so easy to calculate
the final values. Please, see the v5.
--
Regards,
Alexander Atanasov
On 14.07.22 15:20, Alexander Atanasov wrote:
> Allow the guest to know how much it is ballooned by the host.
> It is useful when debugging out of memory conditions.
>
> When host gets back memory from the guest it is accounted
> as used memory in the guest but the guest have no way to know
> how much it is actually ballooned.
>
> Signed-off-by: Alexander Atanasov <[email protected]>
> ---
> drivers/virtio/virtio_balloon.c | 79 +++++++++++++++++++++++++++++
> include/uapi/linux/virtio_balloon.h | 1 +
> 2 files changed, 80 insertions(+)
>
> V2:
> - fixed coding style
> - removed pretty print
> V3:
> - removed dublicate of features
> - comment about balooned_pages more clear
> - convert host pages to balloon pages
> V4:
> - added a define for BALLOON_PAGE_SIZE to make things clear
> V5:
> - Made the calculatons work properly for both ways of memory accounting
> with or without deflate_on_oom
> - dropped comment
>
> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> index b9737da6c4dd..e17f8cc71ba4 100644
> --- a/drivers/virtio/virtio_balloon.c
> +++ b/drivers/virtio/virtio_balloon.c
> @@ -10,6 +10,7 @@
> #include <linux/virtio_balloon.h>
> #include <linux/swap.h>
> #include <linux/workqueue.h>
> +#include <linux/debugfs.h>
> #include <linux/delay.h>
> #include <linux/slab.h>
> #include <linux/module.h>
> @@ -731,6 +732,79 @@ static void report_free_page_func(struct work_struct *work)
> }
> }
>
> +/*
> + * DEBUGFS Interface
> + */
> +#ifdef CONFIG_DEBUG_FS
> +
> +#define guest_to_balloon_pages(i) ((i)*VIRTIO_BALLOON_PAGES_PER_PAGE)
> +/**
> + * virtio_balloon_debug_show - shows statistics of balloon operations.
> + * @f: pointer to the &struct seq_file.
> + * @offset: ignored.
> + *
> + * Provides the statistics that can be accessed in virtio-balloon in the debugfs.
> + *
> + * Return: zero on success or an error code.
> + */
> +
> +static int virtio_balloon_debug_show(struct seq_file *f, void *offset)
> +{
> + struct virtio_balloon *b = f->private;
Most other functions call this "vb".
> + u32 num_pages, total_pages, current_pages;
> + struct sysinfo i;
> +
> + si_meminfo(&i);
> +
> + seq_printf(f, "%-22s: %d\n", "page_size", VIRTIO_BALLOON_PAGE_SIZE);
Why? Either export all in ordinary page size or in kB. No need to
over-complicate the interface with a different page size that users
don't actually care about.
I'd just stick to /proc/meminfo and use kB.
> +
> + virtio_cread_le(b->vdev, struct virtio_balloon_config, actual,
> + &num_pages);
What's wrong with vb->num_pages? I'd prefer not doing config access, if
it can be avoided.
> +
> + seq_printf(f, "%-22s: %u\n", "ballooned_pages", num_pages);
> +
> + if (virtio_has_feature(b->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) {
> + total_pages = guest_to_balloon_pages(i.totalram);
> + current_pages = guest_to_balloon_pages(i.totalram) - num_pages;
> + } else {
> + total_pages = guest_to_balloon_pages(i.totalram) + num_pages;
> + current_pages = guest_to_balloon_pages(i.totalram);
> + }
> +
> + /* Total Memory for the guest from host */
> + seq_printf(f, "%-22s: %u\n", "total_pages", total_pages);
> +
> + /* Current memory for the guest */
> + seq_printf(f, "%-22s: %u\n", "current_pages", current_pages);
The think I detest about that interface (total/current) is that it's
simply not correct -- because i.totalram for example doesn't include
things like (similar to MemTotal in /proc/meminfo)
a) crashkernel
b) early boot allocations (e.g., memmap)
c) any kind of possible memory (un)hotplug in the future
I'd really suggest to just KIS and not mess with i.totalram.
Exposing how much memory is inflated and some way to identify how that
memory is accounted in /proc/meminfo should be good enough.
E.g., the output here could simply be
"Inflated: 1024 kB"
"MemTotalReduced: 1024 kB"
That would even allow in the future for flexibility when it comes to how
much/what was actually subtracted from MemTotal.
> +
> + return 0;
> +}
> +
> +DEFINE_SHOW_ATTRIBUTE(virtio_balloon_debug);
> +
> +static void virtio_balloon_debugfs_init(struct virtio_balloon *b)
> +{
> + debugfs_create_file("virtio-balloon", 0444, NULL, b,
> + &virtio_balloon_debug_fops);
> +}
> +
> +static void virtio_balloon_debugfs_exit(struct virtio_balloon *b)
> +{
> + debugfs_remove(debugfs_lookup("virtio-balloon", NULL));
> +}
> +
> +#else
> +
> +static inline void virtio_balloon_debugfs_init(struct virtio_balloon *b)
> +{
> +}
> +
> +static inline void virtio_balloon_debugfs_exit(struct virtio_balloon *b)
> +{
> +}
> +
> +#endif /* CONFIG_DEBUG_FS */
[...]
> diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
> index ddaa45e723c4..f3ff7c4e5884 100644
> --- a/include/uapi/linux/virtio_balloon.h
> +++ b/include/uapi/linux/virtio_balloon.h
> @@ -40,6 +40,7 @@
>
> /* Size of a PFN in the balloon interface. */
> #define VIRTIO_BALLOON_PFN_SHIFT 12
> +#define VIRTIO_BALLOON_PAGE_SIZE (1<<VIRTIO_BALLOON_PFN_SHIFT)
We prefer extra spacing
(1 << VIRTIO_BALLOON_PFN_SHIFT)
--
Thanks,
David / dhildenb
Hi,
On 18/07/2022 14:35, David Hildenbrand wrote:
> On 14.07.22 15:20, Alexander Atanasov wrote:
>> Allow the guest to know how much it is ballooned by the host.
>> It is useful when debugging out of memory conditions.
>>
>> When host gets back memory from the guest it is accounted
>> as used memory in the guest but the guest have no way to know
>> how much it is actually ballooned.
>>
>> Signed-off-by: Alexander Atanasov <[email protected]>
>> ---
>> drivers/virtio/virtio_balloon.c | 79 +++++++++++++++++++++++++++++
>> include/uapi/linux/virtio_balloon.h | 1 +
>> 2 files changed, 80 insertions(+)
>>
>> V2:
>> - fixed coding style
>> - removed pretty print
>> V3:
>> - removed dublicate of features
>> - comment about balooned_pages more clear
>> - convert host pages to balloon pages
>> V4:
>> - added a define for BALLOON_PAGE_SIZE to make things clear
>> V5:
>> - Made the calculatons work properly for both ways of memory accounting
>> with or without deflate_on_oom
>> - dropped comment
>>
[....]
>> + u32 num_pages, total_pages, current_pages;
>> + struct sysinfo i;
>> +
>> + si_meminfo(&i);
>> +
>> + seq_printf(f, "%-22s: %d\n", "page_size", VIRTIO_BALLOON_PAGE_SIZE);
> Why? Either export all in ordinary page size or in kB. No need to
> over-complicate the interface with a different page size that users
> don't actually care about.
>
> I'd just stick to /proc/meminfo and use kB.
The point is to expose some internal balloon data. Balloon works with
pages not with kB and users can easily calculate kB.
>> +
>> + seq_printf(f, "%-22s: %u\n", "ballooned_pages", num_pages);
>> +
>> + if (virtio_has_feature(b->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) {
>> + total_pages = guest_to_balloon_pages(i.totalram);
>> + current_pages = guest_to_balloon_pages(i.totalram) - num_pages;
>> + } else {
>> + total_pages = guest_to_balloon_pages(i.totalram) + num_pages;
>> + current_pages = guest_to_balloon_pages(i.totalram);
>> + }
>> +
>> + /* Total Memory for the guest from host */
>> + seq_printf(f, "%-22s: %u\n", "total_pages", total_pages);
>> +
>> + /* Current memory for the guest */
>> + seq_printf(f, "%-22s: %u\n", "current_pages", current_pages);
> The think I detest about that interface (total/current) is that it's
> simply not correct -- because i.totalram for example doesn't include
> things like (similar to MemTotal in /proc/meminfo)
>
> a) crashkernel
> b) early boot allocations (e.g., memmap)
> c) any kind of possible memory (un)hotplug in the future
>
> I'd really suggest to just KIS and not mess with i.totalram.
>
> Exposing how much memory is inflated and some way to identify how that
> memory is accounted in /proc/meminfo should be good enough.
>
> E.g., the output here could simply be
>
> "Inflated: 1024 kB"
> "MemTotalReduced: 1024 kB"
>
> That would even allow in the future for flexibility when it comes to how
> much/what was actually subtracted from MemTotal.
The point of the debug interface is to expose some of the balloon driver
internals to the guest.
The users of this are user space processes that try to work according to
the memory pressure and nested virtualization.
I haven't seen one userspace software that expects total ram to change,
it should be constant with one exception hotplug. But if you do hotplug
RAM you know that and you can restart what you need to restart.
So instead of messing with totalram with adding or removing pages /it
would even be optimization since now it is done page by page/ i suggest
to just account the inflated memory as used as in the deflate_on_oom
case now. It is confusing and inconsistent as it is now. How to explain
to a vps user why his RAM is constantly changing?
And the file can go just to
inflated: <pages>
ballon_page_size: 4096
or
inflated: kB
I prefer pages because on theory guest and host can different size and
if at some point guest starts to make requests to the host for pages it
must somehow know what the host/balloon page is. Since you have all the
information at one place it is easy to calculate kB. But you can not
calculate pages from only kB.
Later on when hotplug comes it can add it's own data to the file so it
can be used to see the amount that is added to the total ram.
It can add
hotplugged: <pages>
What do you think?
>> diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
>> index ddaa45e723c4..f3ff7c4e5884 100644
>> --- a/include/uapi/linux/virtio_balloon.h
>> +++ b/include/uapi/linux/virtio_balloon.h
>> @@ -40,6 +40,7 @@
>>
>> /* Size of a PFN in the balloon interface. */
>> #define VIRTIO_BALLOON_PFN_SHIFT 12
>> +#define VIRTIO_BALLOON_PAGE_SIZE (1<<VIRTIO_BALLOON_PFN_SHIFT)
> We prefer extra spacing
>
> (1 << VIRTIO_BALLOON_PFN_SHIFT)
Ok, i am working on my bad habits - thanks! I'll address the style
issues and variable naming in the next version along with the future
feedback.
--
Regards,
Alexander Atanasov
On 25.07.22 13:27, Alexander Atanasov wrote:
> Hi,
>
> On 18/07/2022 14:35, David Hildenbrand wrote:
>> On 14.07.22 15:20, Alexander Atanasov wrote:
>>> Allow the guest to know how much it is ballooned by the host.
>>> It is useful when debugging out of memory conditions.
>>>
>>> When host gets back memory from the guest it is accounted
>>> as used memory in the guest but the guest have no way to know
>>> how much it is actually ballooned.
>>>
>>> Signed-off-by: Alexander Atanasov <[email protected]>
>>> ---
>>> drivers/virtio/virtio_balloon.c | 79 +++++++++++++++++++++++++++++
>>> include/uapi/linux/virtio_balloon.h | 1 +
>>> 2 files changed, 80 insertions(+)
>>>
>>> V2:
>>> - fixed coding style
>>> - removed pretty print
>>> V3:
>>> - removed dublicate of features
>>> - comment about balooned_pages more clear
>>> - convert host pages to balloon pages
>>> V4:
>>> - added a define for BALLOON_PAGE_SIZE to make things clear
>>> V5:
>>> - Made the calculatons work properly for both ways of memory accounting
>>> with or without deflate_on_oom
>>> - dropped comment
>>>
> [....]
>>> + u32 num_pages, total_pages, current_pages;
>>> + struct sysinfo i;
>>> +
>>> + si_meminfo(&i);
>>> +
>>> + seq_printf(f, "%-22s: %d\n", "page_size", VIRTIO_BALLOON_PAGE_SIZE);
>> Why? Either export all in ordinary page size or in kB. No need to
>> over-complicate the interface with a different page size that users
>> don't actually care about.
>>
>> I'd just stick to /proc/meminfo and use kB.
>
> The point is to expose some internal balloon data. Balloon works with
> pages not with kB and users can easily calculate kB.
Pages translate to KB. kB are easy to consume by humans instead of pages
with variable apge sizes
>
>>> +
>>> + seq_printf(f, "%-22s: %u\n", "ballooned_pages", num_pages);
>>> +
>>> + if (virtio_has_feature(b->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) {
>>> + total_pages = guest_to_balloon_pages(i.totalram);
>>> + current_pages = guest_to_balloon_pages(i.totalram) - num_pages;
>>> + } else {
>>> + total_pages = guest_to_balloon_pages(i.totalram) + num_pages;
>>> + current_pages = guest_to_balloon_pages(i.totalram);
>>> + }
>>> +
>>> + /* Total Memory for the guest from host */
>>> + seq_printf(f, "%-22s: %u\n", "total_pages", total_pages);
>>> +
>>> + /* Current memory for the guest */
>>> + seq_printf(f, "%-22s: %u\n", "current_pages", current_pages);
>> The think I detest about that interface (total/current) is that it's
>> simply not correct -- because i.totalram for example doesn't include
>> things like (similar to MemTotal in /proc/meminfo)
>>
>> a) crashkernel
>> b) early boot allocations (e.g., memmap)
>> c) any kind of possible memory (un)hotplug in the future
>>
>> I'd really suggest to just KIS and not mess with i.totalram.
>>
>> Exposing how much memory is inflated and some way to identify how that
>> memory is accounted in /proc/meminfo should be good enough.
>>
>> E.g., the output here could simply be
>>
>> "Inflated: 1024 kB"
>> "MemTotalReduced: 1024 kB"
>>
>> That would even allow in the future for flexibility when it comes to how
>> much/what was actually subtracted from MemTotal.
>
>
> The point of the debug interface is to expose some of the balloon driver
> internals to the guest.
>
> The users of this are user space processes that try to work according to
> the memory pressure and nested virtualization.
>
> I haven't seen one userspace software that expects total ram to change,
> it should be constant with one exception hotplug. But if you do hotplug
> RAM you know that and you can restart what you need to restart.
>
> So instead of messing with totalram with adding or removing pages /it
> would even be optimization since now it is done page by page/ i suggest
> to just account the inflated memory as used as in the deflate_on_oom
> case now. It is confusing and inconsistent as it is now. How to explain
> to a vps user why his RAM is constantly changing?
>
> And the file can go just to
>
> inflated: <pages>
>
> ballon_page_size: 4096
>
> or
>
> inflated: kB
>
> I prefer pages because on theory guest and host can different size and
> if at some point guest starts to make requests to the host for pages it
> must somehow know what the host/balloon page is. Since you have all the
> information at one place it is easy to calculate kB. But you can not
> calculate pages from only kB.
The host can only inflate in guest-page base sizes. How that translates
to host-page base sizes is absolutely irrelevant.
Who should care about pages? Absolutely irrelevant.
Guest pages: kB / getpagesize()
Logical balloon pages: kB / 4k
Host pages: ???
>
> Later on when hotplug comes it can add it's own data to the file so it
> can be used to see the amount that is added to the total ram.
>
> It can add
>
> hotplugged: <pages>
>
>
> What do you think?
Let's keep this interface simple, please.
--
Thanks,
David / dhildenb
Allow the guest to know how much it is ballooned by the host
and how that memory is accounted.
It is useful when debugging out of memory conditions,
as well for userspace processes that monitor the memory pressure
and for nested virtualization.
Depending on the deflate on oom flag memory is accounted in two ways.
If the flag is set the inflated pages are accounted as used memory.
If the flag is not set the inflated pages are substracted from totalram.
To make clear how are inflated pages accounted sign prefix the value.
Where negative means substracted from totalram and positive means
they are accounted as used.
Signed-off-by: Alexander Atanasov <[email protected]>
---
drivers/virtio/virtio_balloon.c | 59 +++++++++++++++++++++++++++++++++
1 file changed, 59 insertions(+)
V2:
- fixed coding style
- removed pretty print
V3:
- removed dublicate of features
- comment about balooned_pages more clear
- convert host pages to balloon pages
V4:
- added a define for BALLOON_PAGE_SIZE to make things clear
V5:
- Made the calculatons work properly for both ways of memory accounting
with or without deflate_on_oom
- dropped comment
V6:
- reduced the file to minimum
- unify accounting
I didn't understand if you agree to change the accounting to be the same
so following part 2 is about it.
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index f4c34a2a6b8e..97d3b29cb9f1 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -10,6 +10,7 @@
#include <linux/virtio_balloon.h>
#include <linux/swap.h>
#include <linux/workqueue.h>
+#include <linux/debugfs.h>
#include <linux/delay.h>
#include <linux/slab.h>
#include <linux/module.h>
@@ -731,6 +732,59 @@ static void report_free_page_func(struct work_struct *work)
}
}
+/*
+ * DEBUGFS Interface
+ */
+#ifdef CONFIG_DEBUG_FS
+
+/**
+ * virtio_balloon_debug_show - shows statistics of balloon operations.
+ * @f: pointer to the &struct seq_file.
+ * @offset: ignored.
+ *
+ * Provides the statistics that can be accessed in virtio-balloon in the debugfs.
+ *
+ * Return: zero on success or an error code.
+ */
+
+static int virtio_balloon_debug_show(struct seq_file *f, void *offset)
+{
+ struct virtio_balloon *vb = f->private;
+ s64 num_pages = vb->num_pages << (VIRTIO_BALLOON_PFN_SHIFT - 10);
+
+ if (!virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
+ num_pages = -num_pages;
+
+ seq_printf(f, "inflated: %lld kB\n", num_pages);
+
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(virtio_balloon_debug);
+
+static void virtio_balloon_debugfs_init(struct virtio_balloon *b)
+{
+ debugfs_create_file("virtio-balloon", 0444, NULL, b,
+ &virtio_balloon_debug_fops);
+}
+
+static void virtio_balloon_debugfs_exit(struct virtio_balloon *b)
+{
+ debugfs_remove(debugfs_lookup("virtio-balloon", NULL));
+}
+
+#else
+
+static inline void virtio_balloon_debugfs_init(struct virtio_balloon *b)
+{
+}
+
+static inline void virtio_balloon_debugfs_exit(struct virtio_balloon *b)
+{
+}
+
+#endif /* CONFIG_DEBUG_FS */
+
#ifdef CONFIG_BALLOON_COMPACTION
/*
* virtballoon_migratepage - perform the balloon page migration on behalf of
@@ -1019,6 +1073,9 @@ static int virtballoon_probe(struct virtio_device *vdev)
if (towards_target(vb))
virtballoon_changed(vdev);
+
+ virtio_balloon_debugfs_init(vb);
+
return 0;
out_unregister_oom:
@@ -1065,6 +1122,8 @@ static void virtballoon_remove(struct virtio_device *vdev)
{
struct virtio_balloon *vb = vdev->priv;
+ virtio_balloon_debugfs_exit(vb);
+
if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_REPORTING))
page_reporting_unregister(&vb->pr_dev_info);
if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
--
2.25.1
On 26.07.22 16:08, Alexander Atanasov wrote:
> Allow the guest to know how much it is ballooned by the host
> and how that memory is accounted.
>
> It is useful when debugging out of memory conditions,
> as well for userspace processes that monitor the memory pressure
> and for nested virtualization.
>
> Depending on the deflate on oom flag memory is accounted in two ways.
> If the flag is set the inflated pages are accounted as used memory.
> If the flag is not set the inflated pages are substracted from totalram.
> To make clear how are inflated pages accounted sign prefix the value.
> Where negative means substracted from totalram and positive means
> they are accounted as used.
>
> Signed-off-by: Alexander Atanasov <[email protected]>
> ---
> drivers/virtio/virtio_balloon.c | 59 +++++++++++++++++++++++++++++++++
> 1 file changed, 59 insertions(+)
>
> V2:
> - fixed coding style
> - removed pretty print
> V3:
> - removed dublicate of features
> - comment about balooned_pages more clear
> - convert host pages to balloon pages
> V4:
> - added a define for BALLOON_PAGE_SIZE to make things clear
> V5:
> - Made the calculatons work properly for both ways of memory accounting
> with or without deflate_on_oom
> - dropped comment
> V6:
> - reduced the file to minimum
> - unify accounting
>
> I didn't understand if you agree to change the accounting to be the same
> so following part 2 is about it.
>
>
> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> index f4c34a2a6b8e..97d3b29cb9f1 100644
> --- a/drivers/virtio/virtio_balloon.c
> +++ b/drivers/virtio/virtio_balloon.c
> @@ -10,6 +10,7 @@
> #include <linux/virtio_balloon.h>
> #include <linux/swap.h>
> #include <linux/workqueue.h>
> +#include <linux/debugfs.h>
> #include <linux/delay.h>
> #include <linux/slab.h>
> #include <linux/module.h>
> @@ -731,6 +732,59 @@ static void report_free_page_func(struct work_struct *work)
> }
> }
>
> +/*
> + * DEBUGFS Interface
> + */
> +#ifdef CONFIG_DEBUG_FS
> +
> +/**
> + * virtio_balloon_debug_show - shows statistics of balloon operations.
> + * @f: pointer to the &struct seq_file.
> + * @offset: ignored.
> + *
> + * Provides the statistics that can be accessed in virtio-balloon in the debugfs.
> + *
> + * Return: zero on success or an error code.
> + */
> +
Superfluous empty line. Personally, I'd just get rid of this
(comparatively obvious) documentation completely.
> +static int virtio_balloon_debug_show(struct seq_file *f, void *offset)
> +{
> + struct virtio_balloon *vb = f->private;
> + s64 num_pages = vb->num_pages << (VIRTIO_BALLOON_PFN_SHIFT - 10);
Rather call this "inflated_kb" then, it's no longer "pages".
> +
> + if (!virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
> + num_pages = -num_pages;
With VIRTIO_BALLOON_F_DEFLATE_ON_OOM this will now always report "0".
which would be the same as "num_pages = 0;" and would deserve a comment
explaining why we don't indicate that as "inflated: ".
Thanks.
--
Thanks,
David / dhildenb
On 01/08/2022 18:18, David Hildenbrand wrote:
> On 26.07.22 16:08, Alexander Atanasov wrote:
>> Allow the guest to know how much it is ballooned by the host
>> and how that memory is accounted.
>>
>> It is useful when debugging out of memory conditions,
>> as well for userspace processes that monitor the memory pressure
>> and for nested virtualization.
>>
>> Depending on the deflate on oom flag memory is accounted in two ways.
>> If the flag is set the inflated pages are accounted as used memory.
>> If the flag is not set the inflated pages are substracted from totalram.
>> To make clear how are inflated pages accounted sign prefix the value.
>> Where negative means substracted from totalram and positive means
>> they are accounted as used.
>>
>> Signed-off-by: Alexander Atanasov <[email protected]>
>> ---
>> drivers/virtio/virtio_balloon.c | 59 +++++++++++++++++++++++++++++++++
>> 1 file changed, 59 insertions(+)
>>
>> V2:
>> - fixed coding style
>> - removed pretty print
>> V3:
>> - removed dublicate of features
>> - comment about balooned_pages more clear
>> - convert host pages to balloon pages
>> V4:
>> - added a define for BALLOON_PAGE_SIZE to make things clear
>> V5:
>> - Made the calculatons work properly for both ways of memory accounting
>> with or without deflate_on_oom
>> - dropped comment
>> V6:
>> - reduced the file to minimum
>> - unify accounting
>>
>> I didn't understand if you agree to change the accounting to be the same
>> so following part 2 is about it.
>>
>>
>> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
>> index f4c34a2a6b8e..97d3b29cb9f1 100644
>> --- a/drivers/virtio/virtio_balloon.c
>> +++ b/drivers/virtio/virtio_balloon.c
>> @@ -10,6 +10,7 @@
>> #include <linux/virtio_balloon.h>
>> #include <linux/swap.h>
>> #include <linux/workqueue.h>
>> +#include <linux/debugfs.h>
>> #include <linux/delay.h>
>> #include <linux/slab.h>
>> #include <linux/module.h>
>> @@ -731,6 +732,59 @@ static void report_free_page_func(struct work_struct *work)
>> }
>> }
>>
>> +/*
>> + * DEBUGFS Interface
>> + */
>> +#ifdef CONFIG_DEBUG_FS
>> +
>> +/**
>> + * virtio_balloon_debug_show - shows statistics of balloon operations.
>> + * @f: pointer to the &struct seq_file.
>> + * @offset: ignored.
>> + *
>> + * Provides the statistics that can be accessed in virtio-balloon in the debugfs.
>> + *
>> + * Return: zero on success or an error code.
>> + */
>> +
> Superfluous empty line. Personally, I'd just get rid of this
> (comparatively obvious) documentation completely.
Ok.
>
>> +static int virtio_balloon_debug_show(struct seq_file *f, void *offset)
>> +{
>> + struct virtio_balloon *vb = f->private;
>> + s64 num_pages = vb->num_pages << (VIRTIO_BALLOON_PFN_SHIFT - 10);
> Rather call this "inflated_kb" then, it's no longer "pages".
Ok.
>
>> +
>> + if (!virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
>> + num_pages = -num_pages;
> With VIRTIO_BALLOON_F_DEFLATE_ON_OOM this will now always report "0".
>
> which would be the same as "num_pages = 0;" and would deserve a comment
> explaining why we don't indicate that as "inflated: ".
>
> Thanks.
Except if "now" refers to some commit that i am missing it does not report 0.
with qemu-system-x86_64 -enable-kvm -device virtio-balloon,id=balloon0,deflate-on-oom=on,notify_on_empty=on,page-per-vq=on -m 3G ....
/ # cat /sys/kernel/debug/virtio-balloon
inflated: 0 kB
/ # QEMU 4.2.1 monitor - type 'help' for more information
(qemu) balloon 1024
(qemu)
/ # cat /sys/kernel/debug/virtio-balloon
inflated: 2097152 kB
/ #
with qemu-system-x86_64 -enable-kvm -device
virtio-balloon,id=balloon0,deflate-on-oom=off,notify_on_empty=on,page-per-vq=on
-m 3G ...
/ # cat /sys/kernel/debug/virtio-balloon
inflated: 0 kB
/ # QEMU 4.2.1 monitor - type 'help' for more information
(qemu) balloon 1024
(qemu)
/ # cat /sys/kernel/debug/virtio-balloon
inflated: -2097152 kB
To join the threads:
>> Always account inflated memory as used for both cases - with and
>> without deflate on oom. Do not change total ram which can confuse
>> userspace and users.
> Sorry, but NAK.
Ok.
> This would affect existing users / user space / balloon stats. For example
> HV just recently switch to properly using adjust_managed_page_count()
I am wondering what's the rationale behind this i have never seen such users
that expect it to work like this. Do you have any pointers to such users, so
i can understood why they do so ?
--
Regards,
Alexander Atanasov
>>> +
>>> + if (!virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
>>> + num_pages = -num_pages;
>> With VIRTIO_BALLOON_F_DEFLATE_ON_OOM this will now always report "0".
>>
>> which would be the same as "num_pages = 0;" and would deserve a comment
>> explaining why we don't indicate that as "inflated: ".
>>
>> Thanks.
>
> Except if "now" refers to some commit that i am missing it does not report 0.
/me facepalm
I read "-= num_pages"
>
>
> with qemu-system-x86_64 -enable-kvm -device virtio-balloon,id=balloon0,deflate-on-oom=on,notify_on_empty=on,page-per-vq=on -m 3G ....
>
> / # cat /sys/kernel/debug/virtio-balloon
> inflated: 0 kB
> / # QEMU 4.2.1 monitor - type 'help' for more information
> (qemu) balloon 1024
> (qemu)
>
> / # cat /sys/kernel/debug/virtio-balloon
> inflated: 2097152 kB
> / #
>
> with qemu-system-x86_64 -enable-kvm -device
> virtio-balloon,id=balloon0,deflate-on-oom=off,notify_on_empty=on,page-per-vq=on
> -m 3G ...
>
> / # cat /sys/kernel/debug/virtio-balloon
> inflated: 0 kB
> / # QEMU 4.2.1 monitor - type 'help' for more information
> (qemu) balloon 1024
> (qemu)
> / # cat /sys/kernel/debug/virtio-balloon
> inflated: -2097152 kB
What's the rationale of making it negative?
>
> To join the threads:
>
>>> Always account inflated memory as used for both cases - with and
>>> without deflate on oom. Do not change total ram which can confuse
>>> userspace and users.
>
>> Sorry, but NAK.
>
> Ok.
>
>> This would affect existing users / user space / balloon stats. For example
>> HV just recently switch to properly using adjust_managed_page_count()
>
>
> I am wondering what's the rationale behind this i have never seen such users
> that expect it to work like this. Do you have any pointers to such users, so
> i can understood why they do so ?
We adjust total pages and managed pages to simulate what memory is
actually available to the system (just like during memory hot(un)plug).
Even though the pages are "allocated" by the driver, they are actually
unusable for the system, just as if they would have been offlined.
Strictly speaking, the guest OS can kill as many processes as it wants,
it cannot reclaim that memory, as it's logically no longer available.
There is nothing (valid, well, except driver unloading) the guest can do
to reuse these pages. The hypervisor has to get involved first to grant
access to some of these pages again (deflate the balloon).
It's different with deflate-on-oom: the guest will *itself* decide to
reuse inflated pages to deflate them. So the allocated pages can become
back usable easily. There was a recent discussion for virtio-balloon to
change that behavior when it's known that the hypervisor essentially
implements "deflate-on-oom" by looking at guest memory stats and
adjusting the balloon size accordingly; however, as long as we don't
know what the hypervisor does or doesn't do, we have to keep the
existing behavior.
Note that most balloon drivers under Linux share that behavior.
In case of Hyper-V I remember a customer BUG report that requested that
exact behavior, however, I'm not able to locate the BZ quickly.
[1]
https://lists.linuxfoundation.org/pipermail/virtualization/2021-November/057767.html
(note that I can't easily find the original mail in the archives)
Note: I suggested under [1] to expose inflated pages via /proc/meminfo
directly. We could do that consistently over all balloon drivers ...
doesn't sound too crazy.
--
Thanks,
David / dhildenb
Hi,
I put some more people on the CC, questions for you at the end , TIA.
On 01/08/2022 23:12, David Hildenbrand wrote:
>> / # cat /sys/kernel/debug/virtio-balloon
>> inflated: -2097152 kB
> What's the rationale of making it negative?
As suggested earlier indicate how the memory is accounted in the two
different cases. Negative means it is subtracted from MemTotal .
Positive means it is accounted as used .
>> To join the threads:
>>
>>>> Always account inflated memory as used for both cases - with and
>>>> without deflate on oom. Do not change total ram which can confuse
>>>> userspace and users.
>>> Sorry, but NAK.
>> Ok.
>>
>>> This would affect existing users / user space / balloon stats. For example
>>> HV just recently switch to properly using adjust_managed_page_count()
>>
>> I am wondering what's the rationale behind this i have never seen such users
>> that expect it to work like this. Do you have any pointers to such users, so
>> i can understood why they do so ?
> We adjust total pages and managed pages to simulate what memory is
> actually available to the system (just like during memory hot(un)plug).
> Even though the pages are "allocated" by the driver, they are actually
> unusable for the system, just as if they would have been offlined.
> Strictly speaking, the guest OS can kill as many processes as it wants,
> it cannot reclaim that memory, as it's logically no longer available.
>
> There is nothing (valid, well, except driver unloading) the guest can do
> to reuse these pages. The hypervisor has to get involved first to grant
> access to some of these pages again (deflate the balloon).
>
> It's different with deflate-on-oom: the guest will *itself* decide to
> reuse inflated pages to deflate them. So the allocated pages can become
> back usable easily. There was a recent discussion for virtio-balloon to
> change that behavior when it's known that the hypervisor essentially
> implements "deflate-on-oom" by looking at guest memory stats and
> adjusting the balloon size accordingly; however, as long as we don't
> know what the hypervisor does or doesn't do, we have to keep the
> existing behavior.
>
> Note that most balloon drivers under Linux share that behavior.
>
> In case of Hyper-V I remember a customer BUG report that requested that
> exact behavior, however, I'm not able to locate the BZ quickly.
> [1] https://lists.linuxfoundation.org/pipermail/virtualization/2021-November/057767.html
> (note that I can't easily find the original mail in the archives)
VMWare does not, Xen do, HV do (but it didn't) - Virtio does both.
For me the confusion comes from mixing ballooning and hot plug.
Ballooning is like a heap inside the guest from which the host can
allocate/deallocate pages, if there is a mechanism for the guest to ask
the host for more/to free/ pages or the host have a heuristic to monitor
the guest and inflate/deflate the guest it is a matter of implementation.
Hot plug is adding to MemTotal and it is not a random event either in
real or virtual environment - so you can act upon it. MemTotal goes
down on hot unplug and if pages get marked as faulty RAM.
Historically MemTotal is a stable value ( i agree with most of David
Stevens points) and user space is expecting it to be stable ,
initialized at startup and it does not expect it to change.
Used is what changes and that is what user space expects to change.
Delfate on oom might have been a mistake but it is there and if anything
depends on changing MemTotal it will be broken by that option. How
that can be fixed?
I agree that the host can not reclaim what is marked as used but should
it be able to ? May be it will be good to teach oom killer that there
can be such ram that can not be reclaimed.
> Note: I suggested under [1] to expose inflated pages via /proc/meminfo
> directly. We could do that consistently over all balloon drivers ...
> doesn't sound too crazy.
Initally i wanted to do exactly this BUT:
- some drivers prefer to expose some more internal information in the file.
- a lot of user space is using meminfo so better keep it as is to avoid breaking something, ballooning is not very frequently used.
Please, share your view on how the ballooned memory should be accounted by the drivers inside the guests so we can work towards consistent behaviour:
Should the inflated memory be accounted as Used or MemTotal be adjusted?
Should the inflated memory be added to /proc/meminfo ?
--
Regards,
Alexander Atanasov
>>
>> In case of Hyper-V I remember a customer BUG report that requested that
>> exact behavior, however, I'm not able to locate the BZ quickly.
>> [1] https://lists.linuxfoundation.org/pipermail/virtualization/2021-November/057767.html
>> (note that I can't easily find the original mail in the archives)
>
> VMWare does not, Xen do, HV do (but it didn't) - Virtio does both.
>
> For me the confusion comes from mixing ballooning and hot plug.
For example, QEMU (and even libvirt) doesn't even have built in support
for any kind of automatic balloon resizing on guest memory pressure (and
I'm happy that we don't implement any such heuristics). As a user/admin,
all you can do is manually adjust the logical VM size by requesting to
inflate/deflate the balloon. For virtio-balloon, we cannot derive what
the hypervisor/admin might or might not do -- and whether the admin
intends to use memory ballooning for memory hotunplug or for optimizing
memory overcommit.
As another example, HV dynamic memory actually combines memory hotplug
with memory ballooning: use memory hotplug to add more memory on demand
and use memory ballooning to logically unplug memory again.
The VMWare balloon is a bit special, because it actually usually
implements deflate-on-oom semantics in the hypervisor. IIRC, the
hypervisor will actually adjust the balloon size based on guest memory
stats, and there isn't really an interface to manually set the balloon
size for an admin. But I might be wrong regarding the latter.
>
> Ballooning is like a heap inside the guest from which the host can
> allocate/deallocate pages, if there is a mechanism for the guest to ask
> the host for more/to free/ pages or the host have a heuristic to monitor
> the guest and inflate/deflate the guest it is a matter of implementation.
Please don't assume that the use case for memory ballooning is only
optimizing memory overcommit in the hypervisor under memory pressure.
>
> Hot plug is adding to MemTotal and it is not a random event either in
> real or virtual environment - so you can act upon it. MemTotal goes
> down on hot unplug and if pages get marked as faulty RAM.
"not a random event either" -- sure, with ppc dlpar, xen balloon, hv
balloon or virtio-mem ... which all are able to hotplug memory fairly
randomly based on hypervisor decisions.
In physical environments, it's not really a random event, I agree.
>
> Historically MemTotal is a stable value ( i agree with most of David
> Stevens points) and user space is expecting it to be stable ,
> initialized at startup and it does not expect it to change.
Just like some apps are not prepared for memory hot(un)plug. Some apps
just don't work in environments with variable physical memory sizes:
examples include databases, where memory ballooning might essentially be
completely useless (there is a paper about application-aware memory
ballooning for that exact use case).
>
> Used is what changes and that is what user space expects to change.
>
> Delfate on oom might have been a mistake but it is there and if anything
> depends on changing MemTotal it will be broken by that option. How
> that can be fixed?
I didn't quite get your concern here. Deflate-on-oom in virtio-balloon
won't adjust MemTotal, so under which condition would be something broken?
>
> I agree that the host can not reclaim what is marked as used but should
> it be able to ? May be it will be good to teach oom killer that there
> can be such ram that can not be reclaimed.
>
>> Note: I suggested under [1] to expose inflated pages via /proc/meminfo
>> directly. We could do that consistently over all balloon drivers ...
>> doesn't sound too crazy.
>
> Initally i wanted to do exactly this BUT:
> - some drivers prefer to expose some more internal information in the file.
They always can have an extended debugfs interface in addition.
> - a lot of user space is using meminfo so better keep it as is to avoid breaking something, ballooning is not very frequently used.
We can always extend. Just recently, we exposed Zswap data:
commit f6498b776d280b30a4614d8261840961e993c2c8
Author: Johannes Weiner <[email protected]>
Date: Thu May 19 14:08:53 2022 -0700
mm: zswap: add basic meminfo and vmstat coverage
Exposing information about inflated pages in a generic way doesn't sound
completely wrong to me, but there might be people that object.
>
>
> Please, share your view on how the ballooned memory should be accounted by the drivers inside the guests so we can work towards consistent behaviour:
>
> Should the inflated memory be accounted as Used or MemTotal be adjusted?
I hope I was able to make it clear that it completely depends on how
memory ballooning is actually intended to be used. It's not uncommon to
use it as form of fake memory hotunplug, where that memory is actually
gone for good and won't simply come back when under memory pressure.
>
> Should the inflated memory be added to /proc/meminfo ?
My gut feeling is yes. The interesting question remains, how to
distinguish the two use cases (inflated memory subtracted from MemTotal
or subtracted from MemFree).
I'm not sure if we even want to unify balloon handling reagrding
adjusting managed pages. IMHO, there are good reasons to do it either way.
--
Thanks,
David / dhildenb
Hello,
On 2.08.22 16:48, David Hildenbrand wrote:
>>>
>>> In case of Hyper-V I remember a customer BUG report that requested that
>>> exact behavior, however, I'm not able to locate the BZ quickly.
>>> [1] https://lists.linuxfoundation.org/pipermail/virtualization/2021-November/057767.html
>>> (note that I can't easily find the original mail in the archives)
>>
>> VMWare does not, Xen do, HV do (but it didn't) - Virtio does both.
>>
>> For me the confusion comes from mixing ballooning and hot plug.
>
> For example, QEMU (and even libvirt) doesn't even have built in support
> for any kind of automatic balloon resizing on guest memory pressure (and
> I'm happy that we don't implement any such heuristics). As a user/admin,
> all you can do is manually adjust the logical VM size by requesting to
> inflate/deflate the balloon. For virtio-balloon, we cannot derive what
> the hypervisor/admin might or might not do -- and whether the admin
> intends to use memory ballooning for memory hotunplug or for optimizing > memory overcommit.
Is the lack of proper hotplug/unplug leading the admins to use it like
this? I really don't understand why you don't like automatic resizing -
both HyperV and VMWare do it ?
> As another example, HV dynamic memory actually combines memory hotplug
> with memory ballooning: use memory hotplug to add more memory on demand
> and use memory ballooning to logically unplug memory again.
Have both as an options - min/max memory , percentage free to keep as
minimum, fixed size and have hot add - kind of hot plug to go above
initial max - tries to manage it in dynamic way.
> The VMWare balloon is a bit special, because it actually usually
> implements deflate-on-oom semantics in the hypervisor. IIRC, the
> hypervisor will actually adjust the balloon size based on guest memory
> stats, and there isn't really an interface to manually set the balloon
> size for an admin. But I might be wrong regarding the latter.
For me this is what makes sense - you have a limited amount of
physical RAM that you want to be used optimally by the guests.
Waiting for the admin to adjust the balloon would not give very
optimal results.
>
>>
>> Ballooning is like a heap inside the guest from which the host can
>> allocate/deallocate pages, if there is a mechanism for the guest to ask
>> the host for more/to free/ pages or the host have a heuristic to monitor
>> the guest and inflate/deflate the guest it is a matter of implementation.
>
> Please don't assume that the use case for memory ballooning is only
> optimizing memory overcommit in the hypervisor under memory pressure.
I understood that - currently it is used and as a way to do
hotplug/unplug. The question is if that is the right way to use it.
>>
>> Hot plug is adding to MemTotal and it is not a random event either in
>> real or virtual environment - so you can act upon it. MemTotal goes
>> down on hot unplug and if pages get marked as faulty RAM.
>
> "not a random event either" -- sure, with ppc dlpar, xen balloon, hv
> balloon or virtio-mem ... which all are able to hotplug memory fairly
> randomly based on hypervisor decisions.
>
> In physical environments, it's not really a random event, I agree.
Even if it is not manual - if they do hotplug it is ok - you can track
hotplug events. But you can not track balloon events.
>
>>
>> Historically MemTotal is a stable value ( i agree with most of David
>> Stevens points) and user space is expecting it to be stable ,
>> initialized at startup and it does not expect it to change.
>
> Just like some apps are not prepared for memory hot(un)plug. Some apps
> just don't work in environments with variable physical memory sizes:
> examples include databases, where memory ballooning might essentially be
> completely useless (there is a paper about application-aware memory > ballooning for that exact use case).
I would say that even the kernel is not prepared to work with changing
MemTotal - there are caches that are sized according to it -
While with hotplug there is a notifier and who is interested can use it.
With balloon inflate/deflate there is no way to do so , implementing
a clone of hotplug_memory_notifier doesn't sound right for me.
Same for the userspace - on a hotplug/unplug event you can restart your
database or any other process sensitive to the value of MemTotal.
>>
>> Used is what changes and that is what user space expects to change.
>>
>> Delfate on oom might have been a mistake but it is there and if anything
>> depends on changing MemTotal it will be broken by that option. How
>> that can be fixed?
>
> I didn't quite get your concern here. Deflate-on-oom in virtio-balloon > won't adjust MemTotal, so under which condition would be something
broken?
I mean the two ways of accounting - if a process depends on either
used or total to change it will break depending on the option .
It can of course parse features of the virtio and see what's the option
but that doesn't make it harder to break - just adds more ifs.
>>
>> I agree that the host can not reclaim what is marked as used but should
>> it be able to ? May be it will be good to teach oom killer that there
>> can be such ram that can not be reclaimed.
>>
>>> Note: I suggested under [1] to expose inflated pages via /proc/meminfo
>>> directly. We could do that consistently over all balloon drivers ...
>>> doesn't sound too crazy.
>>
>> Initally i wanted to do exactly this BUT:
>> - some drivers prefer to expose some more internal information in the file.
>
> They always can have an extended debugfs interface in addition.
>
>> - a lot of user space is using meminfo so better keep it as is to avoid breaking something, ballooning is not very frequently used.
>
> We can always extend. Just recently, we exposed Zswap data:
>
> commit f6498b776d280b30a4614d8261840961e993c2c8
> Author: Johannes Weiner <[email protected]>
> Date: Thu May 19 14:08:53 2022 -0700
>
> mm: zswap: add basic meminfo and vmstat coverage
>
>
> Exposing information about inflated pages in a generic way doesn't sound
> completely wrong to me, but there might be people that object.
>
Patch for /proc/meminfo coming next.
>>
>>
>> Please, share your view on how the ballooned memory should be accounted by the drivers inside the guests so we can work towards consistent behaviour:
>>
>> Should the inflated memory be accounted as Used or MemTotal be adjusted?
>
> I hope I was able to make it clear that it completely depends on how
> memory ballooning is actually intended to be used. It's not uncommon to
> use it as form of fake memory hotunplug, where that memory is actually
> gone for good and won't simply come back when under memory pressure.
>
>>
>> Should the inflated memory be added to /proc/meminfo ?
>
> My gut feeling is yes. The interesting question remains, how to
> distinguish the two use cases (inflated memory subtracted from MemTotal > or subtracted from MemFree).
There are currently two options:
=========== RAM ===================|
|_Used Marker |_ Total Marker
You either move Used marker or move Total to adjust.
For simplicity sign bit can be used. If an other way appears
the bit next to the sign bit can be used.
>
> I'm not sure if we even want to unify balloon handling reagrding
> adjusting managed pages. IMHO, there are good reasons to do it either way.
I think there is a need of clear rules based on what is correct and what
not. It seems that currently every hypervisor/driver is going the easy
way with hot plug/hot unplug vs inflate/deflate vs hot-add/hot-remove.
Now if i try to make my app "smart" about memory pressure i need to know
way too much about each current and future hypervisor.
--
Regards,
Alexander Atanasov
Display reported in /proc/meminfo as:
Inflated(total) or Inflated(free)
depending on the driver.
Drivers use the sign bit to indicate where they do account
the inflated memory.
Amount of inflated memory can be used by:
- as a hint for the oom a killer
- user space software that monitors memory pressure
Cc: David Hildenbrand <[email protected]>
Cc: Wei Liu <[email protected]>
Cc: Nadav Amit <[email protected]>
Signed-off-by: Alexander Atanasov <[email protected]>
---
Documentation/filesystems/proc.rst | 5 +++++
fs/proc/meminfo.c | 11 +++++++++++
include/linux/mm.h | 4 ++++
mm/page_alloc.c | 4 ++++
4 files changed, 24 insertions(+)
diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst
index 1bc91fb8c321..064b5b3d5bd8 100644
--- a/Documentation/filesystems/proc.rst
+++ b/Documentation/filesystems/proc.rst
@@ -986,6 +986,7 @@ Example output. You may not have all of these fields.
VmallocUsed: 40444 kB
VmallocChunk: 0 kB
Percpu: 29312 kB
+ Inflated(total): 2097152 kB
HardwareCorrupted: 0 kB
AnonHugePages: 4149248 kB
ShmemHugePages: 0 kB
@@ -1133,6 +1134,10 @@ VmallocChunk
Percpu
Memory allocated to the percpu allocator used to back percpu
allocations. This stat excludes the cost of metadata.
+Inflated(total) or Inflated(free)
+ Amount of memory that is inflated by the balloon driver.
+ Due to differences among balloon drivers inflated memory
+ is either subtracted from TotalRam or from MemFree.
HardwareCorrupted
The amount of RAM/memory in KB, the kernel identifies as
corrupted.
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 6e89f0e2fd20..ebbe52ccbb93 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -38,6 +38,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
unsigned long pages[NR_LRU_LISTS];
unsigned long sreclaimable, sunreclaim;
int lru;
+#ifdef CONFIG_MEMORY_BALLOON
+ long inflated_kb;
+#endif
si_meminfo(&i);
si_swapinfo(&i);
@@ -153,6 +156,14 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
global_zone_page_state(NR_FREE_CMA_PAGES));
#endif
+#ifdef CONFIG_MEMORY_BALLOON
+ inflated_kb = atomic_long_read(&mem_balloon_inflated_kb);
+ if (inflated_kb >= 0)
+ seq_printf(m, "Inflated(total): %8ld kB\n", inflated_kb);
+ else
+ seq_printf(m, "Inflated(free): %8ld kB\n", -inflated_kb);
+#endif
+
hugetlb_report_meminfo(m);
arch_report_meminfo(m);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7898e29bcfb5..b190811dc16e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2582,6 +2582,10 @@ extern int watermark_boost_factor;
extern int watermark_scale_factor;
extern bool arch_has_descending_max_zone_pfns(void);
+#ifdef CONFIG_MEMORY_BALLOON
+extern atomic_long_t mem_balloon_inflated_kb;
+#endif
+
/* nommu.c */
extern atomic_long_t mmap_pages_allocated;
extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b0bcab50f0a3..12359179a3a2 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -194,6 +194,10 @@ EXPORT_SYMBOL(init_on_alloc);
DEFINE_STATIC_KEY_MAYBE(CONFIG_INIT_ON_FREE_DEFAULT_ON, init_on_free);
EXPORT_SYMBOL(init_on_free);
+#ifdef CONFIG_MEMORY_BALLOON
+atomic_long_t mem_balloon_inflated_kb = ATOMIC_LONG_INIT(0);
+#endif
+
static bool _init_on_alloc_enabled_early __read_mostly
= IS_ENABLED(CONFIG_INIT_ON_ALLOC_DEFAULT_ON);
static int __init early_init_on_alloc(char *buf)
--
2.31.1
On 09.08.22 11:36, Alexander Atanasov wrote:
> Hello,
>
> On 2.08.22 16:48, David Hildenbrand wrote:
>>>>
>>>> In case of Hyper-V I remember a customer BUG report that requested that
>>>> exact behavior, however, I'm not able to locate the BZ quickly.
>>>> [1] https://lists.linuxfoundation.org/pipermail/virtualization/2021-November/057767.html
>>>> (note that I can't easily find the original mail in the archives)
>>>
>>> VMWare does not, Xen do, HV do (but it didn't) - Virtio does both.
>>>
>>> For me the confusion comes from mixing ballooning and hot plug.
>>
>> For example, QEMU (and even libvirt) doesn't even have built in support
>> for any kind of automatic balloon resizing on guest memory pressure (and
>> I'm happy that we don't implement any such heuristics). As a user/admin,
>> all you can do is manually adjust the logical VM size by requesting to
>> inflate/deflate the balloon. For virtio-balloon, we cannot derive what
>> the hypervisor/admin might or might not do -- and whether the admin
>> intends to use memory ballooning for memory hotunplug or for optimizing > memory overcommit.
>
> Is the lack of proper hotplug/unplug leading the admins to use it like
> this?
Yes. Especially unplug is tricky and hard to get working reliably in
practice because of unmovable pages. Ballooning is an easy hack to get
unplug somewhat working.
For reference: under Windows ballooning was (and maybe still mostly is)
the only way to unplug memory again. Unplug of DIMMs is not supported.
> I really don't understand why you don't like automatic resizing -
> both HyperV and VMWare do it ?
You need heuristics to guess what the guest might be doing next and
deflate fast enough to avoid any kind of OOMs in the guest. I pretty
much dislike that concept, because it just screams to be fragile.
In short: I don't like ballooning, to me it's a technology from ancient
times before we were able to do any better. In comparison, I do like
free page reporting for optimizing memory overcommit, but it still has
some drawbacks (caches consuming too much memory), that people are
working on to improve. So we're stuck with ballooning for now for some
use cases.
Personally, I consider any balloon extensions (inflate/deflate, not
things like free page reporting) a waste of time and effort, but that's
just my humble opinion. So I keep reviewing and maintaining them ;)
>
>> As another example, HV dynamic memory actually combines memory hotplug
>> with memory ballooning: use memory hotplug to add more memory on demand
>> and use memory ballooning to logically unplug memory again.
>
> Have both as an options - min/max memory , percentage free to keep as
> minimum, fixed size and have hot add - kind of hot plug to go above
> initial max - tries to manage it in dynamic way.
>
>> The VMWare balloon is a bit special, because it actually usually
>> implements deflate-on-oom semantics in the hypervisor. IIRC, the
>> hypervisor will actually adjust the balloon size based on guest memory
>> stats, and there isn't really an interface to manually set the balloon
>> size for an admin. But I might be wrong regarding the latter.
>
> For me this is what makes sense - you have a limited amount of
> physical RAM that you want to be used optimally by the guests.
> Waiting for the admin to adjust the balloon would not give very
> optimal results.
Exactly. For the use case of optimizing memory overcommit in the
hypervisor, you want deflate-on-oom semantics if you go with balloon
inflation/deflation.
>
>>
>>>
>>> Ballooning is like a heap inside the guest from which the host can
>>> allocate/deallocate pages, if there is a mechanism for the guest to ask
>>> the host for more/to free/ pages or the host have a heuristic to monitor
>>> the guest and inflate/deflate the guest it is a matter of implementation.
>>
>> Please don't assume that the use case for memory ballooning is only
>> optimizing memory overcommit in the hypervisor under memory pressure.
>
> I understood that - currently it is used and as a way to do
> hotplug/unplug. The question is if that is the right way to use it.
People use it like that, and we have no control over that. I've heard of
people using hotplug of DIMMs to increase VM memory and balloon
inflation to hotunplug memory, to decrease VM memory.
>
>>>
>>> Hot plug is adding to MemTotal and it is not a random event either in
>>> real or virtual environment - so you can act upon it. MemTotal goes
>>> down on hot unplug and if pages get marked as faulty RAM.
>>
>> "not a random event either" -- sure, with ppc dlpar, xen balloon, hv
>> balloon or virtio-mem ... which all are able to hotplug memory fairly
>> randomly based on hypervisor decisions.
>>
>> In physical environments, it's not really a random event, I agree.
>
> Even if it is not manual - if they do hotplug it is ok - you can track
> hotplug events. But you can not track balloon events.
I was already asking myself in the past if there should be notifiers
when we inflate/deflate -- when we adjust MemTotal essentially. But I
think there is a more fundamental problem: some things are just
incompatible to any of that.
>
>>
>>>
>>> Historically MemTotal is a stable value ( i agree with most of David
>>> Stevens points) and user space is expecting it to be stable ,
>>> initialized at startup and it does not expect it to change.
>>
>> Just like some apps are not prepared for memory hot(un)plug. Some apps
>> just don't work in environments with variable physical memory sizes:
>> examples include databases, where memory ballooning might essentially be
>> completely useless (there is a paper about application-aware memory > ballooning for that exact use case).
>
> I would say that even the kernel is not prepared to work with changing
> MemTotal - there are caches that are sized according to it -
> While with hotplug there is a notifier and who is interested can use it.
> With balloon inflate/deflate there is no way to do so , implementing
> a clone of hotplug_memory_notifier doesn't sound right for me.
Again, it completely depends on the use case.
As a reference, we used to adjust MemTotal ever since virtio-balloon was
introduce in the kernel (2003 !), which was almost 20 (!) years ago. I
am not aware of many (any) complains. It's just what people actually do
expect. Changing that suddenly is not ok.
>
> Same for the userspace - on a hotplug/unplug event you can restart your
> database or any other process sensitive to the value of MemTotal.
IMHO databases and any form of MemTotal changes are incomaptible,
because databases are simply extreme memhogs.
>
>>>
>>> Used is what changes and that is what user space expects to change.
>>>
>>> Delfate on oom might have been a mistake but it is there and if anything
>>> depends on changing MemTotal it will be broken by that option. How
>>> that can be fixed?
>>
>> I didn't quite get your concern here. Deflate-on-oom in virtio-balloon > won't adjust MemTotal, so under which condition would be something
> broken?
>
> I mean the two ways of accounting - if a process depends on either
> used or total to change it will break depending on the option .
... and I would argue that such applications are not designed for
physical memory changes in any form. And not even for running
concurrently with other applications.
Yes, they might be compatible with deflate-on-oom.
[...]
>> Exposing information about inflated pages in a generic way doesn't sound
>> completely wrong to me, but there might be people that object.
>>
>
> Patch for /proc/meminfo coming next.
Good!
>
>>>
>>>
>>> Please, share your view on how the ballooned memory should be accounted by the drivers inside the guests so we can work towards consistent behaviour:
>>>
>>> Should the inflated memory be accounted as Used or MemTotal be adjusted?
>>
>> I hope I was able to make it clear that it completely depends on how
>> memory ballooning is actually intended to be used. It's not uncommon to
>> use it as form of fake memory hotunplug, where that memory is actually
>> gone for good and won't simply come back when under memory pressure.
>>
>>>
>>> Should the inflated memory be added to /proc/meminfo ?
>>
>> My gut feeling is yes. The interesting question remains, how to
>> distinguish the two use cases (inflated memory subtracted from MemTotal > or subtracted from MemFree).
>
> There are currently two options:
> =========== RAM ===================|
> |_Used Marker |_ Total Marker
>
> You either move Used marker or move Total to adjust.
> For simplicity sign bit can be used. If an other way appears
> the bit next to the sign bit can be used.
>
>>
>> I'm not sure if we even want to unify balloon handling reagrding
>> adjusting managed pages. IMHO, there are good reasons to do it either way.
>
> I think there is a need of clear rules based on what is correct and what
> not. It seems that currently every hypervisor/driver is going the easy
> way with hot plug/hot unplug vs inflate/deflate vs hot-add/hot-remove.
> Now if i try to make my app "smart" about memory pressure i need to know
> way too much about each current and future hypervisor.
Yeah, I raised in the past that, for example for virtio-balloon, we'd
need information (e.g., feature flag) from the hypervisor what it is
actually going to do: whether it implements some form of deflate-on-oom
such that you can allocate huge portions of memory and immediately get
that memory freed up instead of running into OOMs and triggering
application/kernel crashes.
--
Thanks,
David / dhildenb
On Tue, Aug 09, 2022 at 12:49:32PM +0300, Alexander Atanasov wrote:
> Display reported in /proc/meminfo as:
>
> Inflated(total) or Inflated(free)
>
> depending on the driver.
>
> Drivers use the sign bit to indicate where they do account
> the inflated memory.
>
> Amount of inflated memory can be used by:
> - as a hint for the oom a killer
> - user space software that monitors memory pressure
>
> Cc: David Hildenbrand <[email protected]>
> Cc: Wei Liu <[email protected]>
> Cc: Nadav Amit <[email protected]>
>
> Signed-off-by: Alexander Atanasov <[email protected]>
> ---
> Documentation/filesystems/proc.rst | 5 +++++
> fs/proc/meminfo.c | 11 +++++++++++
> include/linux/mm.h | 4 ++++
> mm/page_alloc.c | 4 ++++
> 4 files changed, 24 insertions(+)
>
> diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst
> index 1bc91fb8c321..064b5b3d5bd8 100644
> --- a/Documentation/filesystems/proc.rst
> +++ b/Documentation/filesystems/proc.rst
> @@ -986,6 +986,7 @@ Example output. You may not have all of these fields.
> VmallocUsed: 40444 kB
> VmallocChunk: 0 kB
> Percpu: 29312 kB
> + Inflated(total): 2097152 kB
> HardwareCorrupted: 0 kB
> AnonHugePages: 4149248 kB
> ShmemHugePages: 0 kB
> @@ -1133,6 +1134,10 @@ VmallocChunk
> Percpu
> Memory allocated to the percpu allocator used to back percpu
> allocations. This stat excludes the cost of metadata.
> +Inflated(total) or Inflated(free)
> + Amount of memory that is inflated by the balloon driver.
> + Due to differences among balloon drivers inflated memory
> + is either subtracted from TotalRam or from MemFree.
> HardwareCorrupted
> The amount of RAM/memory in KB, the kernel identifies as
> corrupted.
> diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
> index 6e89f0e2fd20..ebbe52ccbb93 100644
> --- a/fs/proc/meminfo.c
> +++ b/fs/proc/meminfo.c
> @@ -38,6 +38,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
> unsigned long pages[NR_LRU_LISTS];
> unsigned long sreclaimable, sunreclaim;
> int lru;
> +#ifdef CONFIG_MEMORY_BALLOON
> + long inflated_kb;
> +#endif
>
> si_meminfo(&i);
> si_swapinfo(&i);
> @@ -153,6 +156,14 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
> global_zone_page_state(NR_FREE_CMA_PAGES));
> #endif
>
> +#ifdef CONFIG_MEMORY_BALLOON
> + inflated_kb = atomic_long_read(&mem_balloon_inflated_kb);
> + if (inflated_kb >= 0)
> + seq_printf(m, "Inflated(total): %8ld kB\n", inflated_kb);
> + else
> + seq_printf(m, "Inflated(free): %8ld kB\n", -inflated_kb);
> +#endif
> +
> hugetlb_report_meminfo(m);
>
> arch_report_meminfo(m);
This seems too baroque for my taste.
Why not just have two counters for the two pruposes?
And is there any value in having this atomic?
We want a consistent value but just READ_ONCE seems sufficient ...
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 7898e29bcfb5..b190811dc16e 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -2582,6 +2582,10 @@ extern int watermark_boost_factor;
> extern int watermark_scale_factor;
> extern bool arch_has_descending_max_zone_pfns(void);
>
> +#ifdef CONFIG_MEMORY_BALLOON
> +extern atomic_long_t mem_balloon_inflated_kb;
> +#endif
> +
> /* nommu.c */
> extern atomic_long_t mmap_pages_allocated;
> extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t);
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index b0bcab50f0a3..12359179a3a2 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -194,6 +194,10 @@ EXPORT_SYMBOL(init_on_alloc);
> DEFINE_STATIC_KEY_MAYBE(CONFIG_INIT_ON_FREE_DEFAULT_ON, init_on_free);
> EXPORT_SYMBOL(init_on_free);
>
> +#ifdef CONFIG_MEMORY_BALLOON
> +atomic_long_t mem_balloon_inflated_kb = ATOMIC_LONG_INIT(0);
> +#endif
> +
> static bool _init_on_alloc_enabled_early __read_mostly
> = IS_ENABLED(CONFIG_INIT_ON_ALLOC_DEFAULT_ON);
> static int __init early_init_on_alloc(char *buf)
> --
> 2.31.1
>
>
Update the value in page_alloc on balloon fill/leak.
Cc: David Hildenbrand <[email protected]>
Cc: Wei Liu <[email protected]>
Cc: Nadav Amit <[email protected]>
Signed-off-by: Alexander Atanasov <[email protected]>
---
drivers/virtio/virtio_balloon.c | 13 +++++++++++++
1 file changed, 13 insertions(+)
Firts user, other balloons i will do if it is accepted to avoid too much emails.
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index b9737da6c4dd..e2693ffbd48b 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -208,6 +208,16 @@ static void set_page_pfns(struct virtio_balloon *vb,
page_to_balloon_pfn(page) + i);
}
+static void update_meminfo(struct virtio_balloon *vb)
+{
+ long inflated_kb = vb->num_pages << (VIRTIO_BALLOON_PFN_SHIFT - 10);
+
+ if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
+ inflated_kb = -inflated_kb;
+
+ atomic_long_set(&mem_balloon_inflated_kb, inflated_kb);
+}
+
static unsigned int fill_balloon(struct virtio_balloon *vb, size_t num)
{
unsigned int num_allocated_pages;
@@ -250,6 +260,7 @@ static unsigned int fill_balloon(struct virtio_balloon *vb, size_t num)
}
num_allocated_pages = vb->num_pfns;
+ update_meminfo(vb);
/* Did we get any? */
if (vb->num_pfns != 0)
tell_host(vb, vb->inflate_vq);
@@ -296,6 +307,7 @@ static unsigned int leak_balloon(struct virtio_balloon *vb, size_t num)
}
num_freed_pages = vb->num_pfns;
+ update_meminfo(vb);
/*
* Note that if
* virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST);
@@ -1089,6 +1101,7 @@ static void virtballoon_remove(struct virtio_device *vdev)
kern_unmount(balloon_mnt);
#endif
+ update_meminfo(0);
kfree(vb);
}
--
2.31.1
On Tue, Jul 05, 2022 at 12:01:58PM +0300, Alexander Atanasov wrote:
> > > diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
> > > index ddaa45e723c4..f3ff7c4e5884 100644
> > > --- a/include/uapi/linux/virtio_balloon.h
> > > +++ b/include/uapi/linux/virtio_balloon.h
> > > @@ -40,6 +40,7 @@
> > > /* Size of a PFN in the balloon interface. */
> > > #define VIRTIO_BALLOON_PFN_SHIFT 12
> > > +#define VIRTIO_BALLOON_PAGE_SIZE (1<<VIRTIO_BALLOON_PFN_SHIFT)
> > > #define VIRTIO_BALLOON_CMD_ID_STOP 0
> > > #define VIRTIO_BALLOON_CMD_ID_DONE 1
> > Did you run checkpatch on this?
>
>
> Sure, i did:
>
> scripts/checkpatch.pl
> ../outgoing/v4-0001-Create-debugfs-file-with-virtio-balloon-usage-inf.patch
> total: 0 errors, 0 warnings, 108 lines checked
>
> ../outgoing/v4-0001-Create-debugfs-file-with-virtio-balloon-usage-inf.patch
> has no obvious style problems and is ready for submission.
Weird. There should be spaces around << I think.
--
MST
On Tue, Jul 26, 2022 at 02:08:27PM +0000, Alexander Atanasov wrote:
> Allow the guest to know how much it is ballooned by the host
> and how that memory is accounted.
>
> It is useful when debugging out of memory conditions,
> as well for userspace processes that monitor the memory pressure
> and for nested virtualization.
Hmm. debugging is ok. monitoring/nested use-cases probably
call for sysfs/procfs.
> Depending on the deflate on oom flag memory is accounted in two ways.
> If the flag is set the inflated pages are accounted as used memory.
> If the flag is not set the inflated pages are substracted from totalram.
> To make clear how are inflated pages accounted sign prefix the value.
> Where negative means substracted from totalram and positive means
> they are accounted as used.
>
> Signed-off-by: Alexander Atanasov <[email protected]>
> ---
> drivers/virtio/virtio_balloon.c | 59 +++++++++++++++++++++++++++++++++
> 1 file changed, 59 insertions(+)
>
> V2:
> - fixed coding style
> - removed pretty print
> V3:
> - removed dublicate of features
> - comment about balooned_pages more clear
> - convert host pages to balloon pages
> V4:
> - added a define for BALLOON_PAGE_SIZE to make things clear
> V5:
> - Made the calculatons work properly for both ways of memory accounting
> with or without deflate_on_oom
> - dropped comment
> V6:
> - reduced the file to minimum
> - unify accounting
>
> I didn't understand if you agree to change the accounting to be the same
> so following part 2 is about it.
>
>
> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> index f4c34a2a6b8e..97d3b29cb9f1 100644
> --- a/drivers/virtio/virtio_balloon.c
> +++ b/drivers/virtio/virtio_balloon.c
> @@ -10,6 +10,7 @@
> #include <linux/virtio_balloon.h>
> #include <linux/swap.h>
> #include <linux/workqueue.h>
> +#include <linux/debugfs.h>
> #include <linux/delay.h>
> #include <linux/slab.h>
> #include <linux/module.h>
> @@ -731,6 +732,59 @@ static void report_free_page_func(struct work_struct *work)
> }
> }
>
> +/*
> + * DEBUGFS Interface
> + */
> +#ifdef CONFIG_DEBUG_FS
> +
> +/**
> + * virtio_balloon_debug_show - shows statistics of balloon operations.
> + * @f: pointer to the &struct seq_file.
> + * @offset: ignored.
> + *
> + * Provides the statistics that can be accessed in virtio-balloon in the debugfs.
> + *
> + * Return: zero on success or an error code.
> + */
> +
> +static int virtio_balloon_debug_show(struct seq_file *f, void *offset)
> +{
> + struct virtio_balloon *vb = f->private;
> + s64 num_pages = vb->num_pages << (VIRTIO_BALLOON_PFN_SHIFT - 10);
> +
> + if (!virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
> + num_pages = -num_pages;
> +
> + seq_printf(f, "inflated: %lld kB\n", num_pages);
> +
> + return 0;
> +}
Can't we just have two attributes, without hacks like this one?
> +
> +DEFINE_SHOW_ATTRIBUTE(virtio_balloon_debug);
> +
> +static void virtio_balloon_debugfs_init(struct virtio_balloon *b)
> +{
> + debugfs_create_file("virtio-balloon", 0444, NULL, b,
> + &virtio_balloon_debug_fops);
> +}
> +
> +static void virtio_balloon_debugfs_exit(struct virtio_balloon *b)
> +{
> + debugfs_remove(debugfs_lookup("virtio-balloon", NULL));
> +}
> +
> +#else
> +
> +static inline void virtio_balloon_debugfs_init(struct virtio_balloon *b)
> +{
> +}
> +
> +static inline void virtio_balloon_debugfs_exit(struct virtio_balloon *b)
> +{
> +}
> +
> +#endif /* CONFIG_DEBUG_FS */
> +
> #ifdef CONFIG_BALLOON_COMPACTION
> /*
> * virtballoon_migratepage - perform the balloon page migration on behalf of
> @@ -1019,6 +1073,9 @@ static int virtballoon_probe(struct virtio_device *vdev)
>
> if (towards_target(vb))
> virtballoon_changed(vdev);
> +
> + virtio_balloon_debugfs_init(vb);
> +
> return 0;
>
> out_unregister_oom:
> @@ -1065,6 +1122,8 @@ static void virtballoon_remove(struct virtio_device *vdev)
> {
> struct virtio_balloon *vb = vdev->priv;
>
> + virtio_balloon_debugfs_exit(vb);
> +
> if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_REPORTING))
> page_reporting_unregister(&vb->pr_dev_info);
> if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
> --
> 2.25.1
>
>
Hello,
On 9.08.22 13:35, Michael S. Tsirkin wrote:
> Weird. There should be spaces around << I think.
>
Yes, i understood, bad habits, David pointed it out to me. I will avoid
same thread email in the future. Please, check the hardening thread too.
https://lore.kernel.org/lkml/[email protected]/
I will get back to your other comments too, i need to think a bit.
--
Regards,
Alexander Atanasov
On Aug 9, 2022, at 2:53 AM, Alexander Atanasov <[email protected]> wrote:
> Update the value in page_alloc on balloon fill/leak.
Some general comments if this patch goes forward.
Please cc [email protected] in the future.
>
> Cc: David Hildenbrand <[email protected]>
> Cc: Wei Liu <[email protected]>
> Cc: Nadav Amit <[email protected]>
>
> Signed-off-by: Alexander Atanasov <[email protected]>
> ---
> drivers/virtio/virtio_balloon.c | 13 +++++++++++++
> 1 file changed, 13 insertions(+)
>
> Firts user, other balloons i will do if it is accepted to avoid too much emails.
>
>
> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> index b9737da6c4dd..e2693ffbd48b 100644
> --- a/drivers/virtio/virtio_balloon.c
> +++ b/drivers/virtio/virtio_balloon.c
> @@ -208,6 +208,16 @@ static void set_page_pfns(struct virtio_balloon *vb,
> page_to_balloon_pfn(page) + i);
> }
>
> +static void update_meminfo(struct virtio_balloon *vb)
Putting aside the less-than-optimal function name, I would like to ask that
any new generic balloon logic would go into balloon_compaction.[hc] as much
as possible. I made the effort to reuse this infrastructure (which is now
used by both VMware and virtio), and would prefer to share as much code as
possible.
For instance, I would appreciate if the update upon inflate would go into
balloon_page_list_enqueue() and balloon_page_enqueue(). VMware's 2MB pages
logic is not shared, so it would require a change that is specific for
VMware code.
> On Aug 9, 2022, at 17:49, Alexander Atanasov <[email protected]> wrote:
>
> Display reported in /proc/meminfo as:
Hi,
I am not sure if this is a right place (meminfo) to put this statistic in since
it is the accounting from a specific driver. IIUC, this driver is only installed
in a VM, then this accounting will always be zero if this driver is not installed.
Is this possible to put it in a driver-specific sysfs file (maybe it is better)?
Just some thoughts from me.
Muchun,
Thanks.
>
> Inflated(total) or Inflated(free)
>
> depending on the driver.
>
> Drivers use the sign bit to indicate where they do account
> the inflated memory.
>
> Amount of inflated memory can be used by:
> - as a hint for the oom a killer
> - user space software that monitors memory pressure
>
> Cc: David Hildenbrand <[email protected]>
> Cc: Wei Liu <[email protected]>
> Cc: Nadav Amit <[email protected]>
>
> Signed-off-by: Alexander Atanasov <[email protected]>
> ---
> Documentation/filesystems/proc.rst | 5 +++++
> fs/proc/meminfo.c | 11 +++++++++++
> include/linux/mm.h | 4 ++++
> mm/page_alloc.c | 4 ++++
> 4 files changed, 24 insertions(+)
>
> diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst
> index 1bc91fb8c321..064b5b3d5bd8 100644
> --- a/Documentation/filesystems/proc.rst
> +++ b/Documentation/filesystems/proc.rst
> @@ -986,6 +986,7 @@ Example output. You may not have all of these fields.
> VmallocUsed: 40444 kB
> VmallocChunk: 0 kB
> Percpu: 29312 kB
> + Inflated(total): 2097152 kB
> HardwareCorrupted: 0 kB
> AnonHugePages: 4149248 kB
> ShmemHugePages: 0 kB
> @@ -1133,6 +1134,10 @@ VmallocChunk
> Percpu
> Memory allocated to the percpu allocator used to back percpu
> allocations. This stat excludes the cost of metadata.
> +Inflated(total) or Inflated(free)
> + Amount of memory that is inflated by the balloon driver.
> + Due to differences among balloon drivers inflated memory
> + is either subtracted from TotalRam or from MemFree.
> HardwareCorrupted
> The amount of RAM/memory in KB, the kernel identifies as
> corrupted.
> diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
> index 6e89f0e2fd20..ebbe52ccbb93 100644
> --- a/fs/proc/meminfo.c
> +++ b/fs/proc/meminfo.c
> @@ -38,6 +38,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
> unsigned long pages[NR_LRU_LISTS];
> unsigned long sreclaimable, sunreclaim;
> int lru;
> +#ifdef CONFIG_MEMORY_BALLOON
> + long inflated_kb;
> +#endif
>
> si_meminfo(&i);
> si_swapinfo(&i);
> @@ -153,6 +156,14 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
> global_zone_page_state(NR_FREE_CMA_PAGES));
> #endif
>
> +#ifdef CONFIG_MEMORY_BALLOON
> + inflated_kb = atomic_long_read(&mem_balloon_inflated_kb);
> + if (inflated_kb >= 0)
> + seq_printf(m, "Inflated(total): %8ld kB\n", inflated_kb);
> + else
> + seq_printf(m, "Inflated(free): %8ld kB\n", -inflated_kb);
> +#endif
> +
> hugetlb_report_meminfo(m);
>
> arch_report_meminfo(m);
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 7898e29bcfb5..b190811dc16e 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -2582,6 +2582,10 @@ extern int watermark_boost_factor;
> extern int watermark_scale_factor;
> extern bool arch_has_descending_max_zone_pfns(void);
>
> +#ifdef CONFIG_MEMORY_BALLOON
> +extern atomic_long_t mem_balloon_inflated_kb;
> +#endif
> +
> /* nommu.c */
> extern atomic_long_t mmap_pages_allocated;
> extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t);
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index b0bcab50f0a3..12359179a3a2 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -194,6 +194,10 @@ EXPORT_SYMBOL(init_on_alloc);
> DEFINE_STATIC_KEY_MAYBE(CONFIG_INIT_ON_FREE_DEFAULT_ON, init_on_free);
> EXPORT_SYMBOL(init_on_free);
>
> +#ifdef CONFIG_MEMORY_BALLOON
> +atomic_long_t mem_balloon_inflated_kb = ATOMIC_LONG_INIT(0);
> +#endif
> +
> static bool _init_on_alloc_enabled_early __read_mostly
> = IS_ENABLED(CONFIG_INIT_ON_ALLOC_DEFAULT_ON);
> static int __init early_init_on_alloc(char *buf)
> --
> 2.31.1
>
>
Hello,
On 10.08.22 6:05, Muchun Song wrote:
>
>
>> On Aug 9, 2022, at 17:49, Alexander Atanasov <[email protected]> wrote:
>>
>> Display reported in /proc/meminfo as:
>
> Hi,
>
> I am not sure if this is a right place (meminfo) to put this statistic in since
> it is the accounting from a specific driver. IIUC, this driver is only installed
> in a VM, then this accounting will always be zero if this driver is not installed.
> Is this possible to put it in a driver-specific sysfs file (maybe it is better)?
> Just some thoughts from me.
Yes, it is only used if run under hypervisor but it is under a config
option for that reason.
There are several balloon drivers that will use it, not only one driver
- virtio, VMWare, HyperV, XEN and possibly others. I made one as an
example. Initially i worked on a patches for debugfs but discussion lead
to put it into a centralized place.
--
Regards,
Alexander Atanasov
On 9.08.22 13:32, Michael S. Tsirkin wrote:
> On Tue, Aug 09, 2022 at 12:49:32PM +0300, Alexander Atanasov wrote:
>> @@ -153,6 +156,14 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
>> global_zone_page_state(NR_FREE_CMA_PAGES));
>> #endif
>>
>> +#ifdef CONFIG_MEMORY_BALLOON
>> + inflated_kb = atomic_long_read(&mem_balloon_inflated_kb);
>> + if (inflated_kb >= 0)
>> + seq_printf(m, "Inflated(total): %8ld kB\n", inflated_kb);
>> + else
>> + seq_printf(m, "Inflated(free): %8ld kB\n", -inflated_kb);
>> +#endif
>> +
>> hugetlb_report_meminfo(m);
>>
>> arch_report_meminfo(m);
>
>
> This seems too baroque for my taste.
> Why not just have two counters for the two pruposes?
I agree it is not good but it reflects the current situation.
Dirvers account in only one way - either used or total - which i don't
like. So to save space and to avoid the possibility that some driver
starts to use both at the same time. I suggest to be only one value.
> And is there any value in having this atomic?
> We want a consistent value but just READ_ONCE seems sufficient ...
I do not see this as only a value that is going to be displayed.
I tried to be defensive here and to avoid premature optimization.
One possible scenario is OOM killer(using the value) vs balloon deflate
on oom will need it. But any other user of that value will likely need
it atomic too. Drivers use spin_locks for calculations they might find a
way to reduce the spin lock usage and use the atomic.
While making it a long could only bring bugs without benefits.
It is not on a fast path too so i prefer to be safe.
--
Regards,
Alexander Atanasov
On Wed, Aug 10, 2022 at 08:54:52AM +0300, Alexander Atanasov wrote:
> On 9.08.22 13:32, Michael S. Tsirkin wrote:
> > On Tue, Aug 09, 2022 at 12:49:32PM +0300, Alexander Atanasov wrote:
> > > @@ -153,6 +156,14 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
> > > global_zone_page_state(NR_FREE_CMA_PAGES));
> > > #endif
> > > +#ifdef CONFIG_MEMORY_BALLOON
> > > + inflated_kb = atomic_long_read(&mem_balloon_inflated_kb);
> > > + if (inflated_kb >= 0)
> > > + seq_printf(m, "Inflated(total): %8ld kB\n", inflated_kb);
> > > + else
> > > + seq_printf(m, "Inflated(free): %8ld kB\n", -inflated_kb);
> > > +#endif
> > > +
> > > hugetlb_report_meminfo(m);
> > > arch_report_meminfo(m);
> >
> >
> > This seems too baroque for my taste.
> > Why not just have two counters for the two pruposes?
>
> I agree it is not good but it reflects the current situation.
> Dirvers account in only one way - either used or total - which i don't like.
> So to save space and to avoid the possibility that some driver starts to use
> both at the same time. I suggest to be only one value.
I don't see what would be wrong if some driver used both
at some point.
>
> > And is there any value in having this atomic?
> > We want a consistent value but just READ_ONCE seems sufficient ...
>
> I do not see this as only a value that is going to be displayed.
> I tried to be defensive here and to avoid premature optimization.
> One possible scenario is OOM killer(using the value) vs balloon deflate on
> oom will need it. But any other user of that value will likely need it
> atomic too. Drivers use spin_locks for calculations they might find a way to
> reduce the spin lock usage and use the atomic.
> While making it a long could only bring bugs without benefits.
> It is not on a fast path too so i prefer to be safe.
Well we do not normally spread atomics around just because we
can, it does not magically make the code safe.
If this needs atomics we need to document why.
> --
> Regards,
> Alexander Atanasov
Hello,
On 10.08.22 9:05, Michael S. Tsirkin wrote:
> On Wed, Aug 10, 2022 at 08:54:52AM +0300, Alexander Atanasov wrote:
>> On 9.08.22 13:32, Michael S. Tsirkin wrote:
>>> On Tue, Aug 09, 2022 at 12:49:32PM +0300, Alexander Atanasov wrote:
>>>> @@ -153,6 +156,14 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
>>>> global_zone_page_state(NR_FREE_CMA_PAGES));
>>>> #endif
>>>> +#ifdef CONFIG_MEMORY_BALLOON
>>>> + inflated_kb = atomic_long_read(&mem_balloon_inflated_kb);
>>>> + if (inflated_kb >= 0)
>>>> + seq_printf(m, "Inflated(total): %8ld kB\n", inflated_kb);
>>>> + else
>>>> + seq_printf(m, "Inflated(free): %8ld kB\n", -inflated_kb);
>>>> +#endif
>>>> +
>>>> hugetlb_report_meminfo(m);
>>>> arch_report_meminfo(m);
>>>
>>>
>>> This seems too baroque for my taste.
>>> Why not just have two counters for the two pruposes?
>>
>> I agree it is not good but it reflects the current situation.
>> Dirvers account in only one way - either used or total - which i don't like.
>> So to save space and to avoid the possibility that some driver starts to use
>> both at the same time. I suggest to be only one value.
>
> I don't see what would be wrong if some driver used both
> at some point.
If you don't see what's wrong with using both, i might as well add
Cached and Buffers - next hypervisor might want to use them or any other
by its discretion leaving the fun to figure it out to the userspace?
Single definitive value is much better and clear from user prespective
and meminfo is exactly for the users.
If a driver for some wierd reason needs to do both it is a whole new
topic that i don't like to go into. Good news is that currently no such
driver exists.
>
>>
>>> And is there any value in having this atomic?
>>> We want a consistent value but just READ_ONCE seems sufficient ...
>>
>> I do not see this as only a value that is going to be displayed.
>> I tried to be defensive here and to avoid premature optimization.
>> One possible scenario is OOM killer(using the value) vs balloon deflate on
>> oom will need it. But any other user of that value will likely need it
>> atomic too. Drivers use spin_locks for calculations they might find a way to
>> reduce the spin lock usage and use the atomic.
>> While making it a long could only bring bugs without benefits.
>> It is not on a fast path too so i prefer to be safe.
>
> Well we do not normally spread atomics around just because we
> can, it does not magically make the code safe.
> If this needs atomics we need to document why.
Of course it does not. In one of your comments to my other patches you
said you do not like patches that add one line then remove it in the
next patch. To avoid that i put an atomic - if at one point it is clear
it is not required i would be happy to change it but it is more likely
to be need than not. So i will probably have to document it instead.
At this point the decision if it should be or should not be in the
meminfo is more important - if general opinion is positive i will
address the technical details.
--
Regards,
Alexander Atanasov
On Wed, Aug 10, 2022 at 10:50:10AM +0300, Alexander Atanasov wrote:
> Hello,
>
> On 10.08.22 9:05, Michael S. Tsirkin wrote:
> > On Wed, Aug 10, 2022 at 08:54:52AM +0300, Alexander Atanasov wrote:
> > > On 9.08.22 13:32, Michael S. Tsirkin wrote:
> > > > On Tue, Aug 09, 2022 at 12:49:32PM +0300, Alexander Atanasov wrote:
> > > > > @@ -153,6 +156,14 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
> > > > > global_zone_page_state(NR_FREE_CMA_PAGES));
> > > > > #endif
> > > > > +#ifdef CONFIG_MEMORY_BALLOON
> > > > > + inflated_kb = atomic_long_read(&mem_balloon_inflated_kb);
> > > > > + if (inflated_kb >= 0)
> > > > > + seq_printf(m, "Inflated(total): %8ld kB\n", inflated_kb);
> > > > > + else
> > > > > + seq_printf(m, "Inflated(free): %8ld kB\n", -inflated_kb);
> > > > > +#endif
> > > > > +
> > > > > hugetlb_report_meminfo(m);
> > > > > arch_report_meminfo(m);
> > > >
> > > >
> > > > This seems too baroque for my taste.
> > > > Why not just have two counters for the two pruposes?
> > >
> > > I agree it is not good but it reflects the current situation.
> > > Dirvers account in only one way - either used or total - which i don't like.
> > > So to save space and to avoid the possibility that some driver starts to use
> > > both at the same time. I suggest to be only one value.
> >
> > I don't see what would be wrong if some driver used both
> > at some point.
>
> If you don't see what's wrong with using both, i might as well add
> Cached and Buffers - next hypervisor might want to use them or any other by
> its discretion leaving the fun to figure it out to the userspace?
Assuming you document what these mean, sure.
> Single definitive value is much better and clear from user prespective and
> meminfo is exactly for the users.
Not really, the negative value trick is anything but clear.
> If a driver for some wierd reason needs to do both it is a whole new topic
> that i don't like to go into. Good news is that currently no such driver
> exists.
>
>
> >
> > >
> > > > And is there any value in having this atomic?
> > > > We want a consistent value but just READ_ONCE seems sufficient ...
> > >
> > > I do not see this as only a value that is going to be displayed.
> > > I tried to be defensive here and to avoid premature optimization.
> > > One possible scenario is OOM killer(using the value) vs balloon deflate on
> > > oom will need it. But any other user of that value will likely need it
> > > atomic too. Drivers use spin_locks for calculations they might find a way to
> > > reduce the spin lock usage and use the atomic.
> > > While making it a long could only bring bugs without benefits.
> > > It is not on a fast path too so i prefer to be safe.
> >
> > Well we do not normally spread atomics around just because we
> > can, it does not magically make the code safe.
> > If this needs atomics we need to document why.
>
> Of course it does not. In one of your comments to my other patches you said
> you do not like patches that add one line then remove it in the next patch.
> To avoid that i put an atomic - if at one point it is clear it is not
> required i would be happy to change it but it is more likely to be need than
> not. So i will probably have to document it instead.
>
> At this point the decision if it should be or should not be in the meminfo
> is more important - if general opinion is positive i will address the
> technical details.
Not up to me, you need ack from linux-mm guys for that.
> --
> Regards,
> Alexander Atanasov
Hi,
On 9.08.22 20:44, Nadav Amit wrote:
>
> On Aug 9, 2022, at 2:53 AM, Alexander Atanasov <[email protected]> wrote:
>
>> Update the value in page_alloc on balloon fill/leak.
>
> Some general comments if this patch goes forward.
>
> Please cc [email protected] in the future.
Ok.
>
>>
>> Cc: David Hildenbrand <[email protected]>
>> Cc: Wei Liu <[email protected]>
>> Cc: Nadav Amit <[email protected]>
>>
>> Signed-off-by: Alexander Atanasov <[email protected]>
>> ---
>> drivers/virtio/virtio_balloon.c | 13 +++++++++++++
>> 1 file changed, 13 insertions(+)
>>
>> Firts user, other balloons i will do if it is accepted to avoid too much emails.
>>
>>
>> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
>> index b9737da6c4dd..e2693ffbd48b 100644
>> --- a/drivers/virtio/virtio_balloon.c
>> +++ b/drivers/virtio/virtio_balloon.c
>> @@ -208,6 +208,16 @@ static void set_page_pfns(struct virtio_balloon *vb,
>> page_to_balloon_pfn(page) + i);
>> }
>>
>> +static void update_meminfo(struct virtio_balloon *vb)
>
> Putting aside the less-than-optimal function name, I would like to ask that
Right, i will think of a better one.
> any new generic balloon logic would go into balloon_compaction.[hc] as much
If it is going to be a place for generic logic may be it should be
renamed to balloon_common.[ch] ?
> as possible. I made the effort to reuse this infrastructure (which is now
> used by both VMware and virtio), and would prefer to share as much code as
> possible.
>
> For instance, I would appreciate if the update upon inflate would go into
> balloon_page_list_enqueue() and balloon_page_enqueue(). VMware's 2MB pages
> logic is not shared, so it would require a change that is specific for
> VMware code.
I looked at the code and i do not see how i can reuse it since
BALLOON_COMPACTION can be disabled and as you say even for VMWare it
would require updates on other places. Looks like if i do so i would
have to handle update from each driver for both cases. I think it is
better to clearly mark the spots when drivers do their internal
recalculations and report to the core. I see only VMWare is using
balloon_page_list_enqueue , virtio balloon is using only migration and
i don't see how to hook it there - i haven't checked the rest of the
balloons but i guess it would be similiar . I agree it is a good to have
a common place for such logic but it might be better of for a separate
work in the future.
--
Regards,
Alexander Atanasov
On Aug 15, 2022, at 5:52 AM, Alexander Atanasov <[email protected]> wrote:
> ⚠ External Email
>
> Hi,
>
> On 9.08.22 20:44, Nadav Amit wrote:
>> On Aug 9, 2022, at 2:53 AM, Alexander Atanasov <[email protected]> wrote:
>>
>>> Update the value in page_alloc on balloon fill/leak.
>>
>> Some general comments if this patch goes forward.
>>
>> Please cc [email protected] in the future.
>
> Ok.
>
>>> Cc: David Hildenbrand <[email protected]>
>>> Cc: Wei Liu <[email protected]>
>>> Cc: Nadav Amit <[email protected]>
>>>
>>> Signed-off-by: Alexander Atanasov <[email protected]>
>>> ---
>>> drivers/virtio/virtio_balloon.c | 13 +++++++++++++
>>> 1 file changed, 13 insertions(+)
>>>
>>> Firts user, other balloons i will do if it is accepted to avoid too much emails.
>>>
>>>
>>> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
>>> index b9737da6c4dd..e2693ffbd48b 100644
>>> --- a/drivers/virtio/virtio_balloon.c
>>> +++ b/drivers/virtio/virtio_balloon.c
>>> @@ -208,6 +208,16 @@ static void set_page_pfns(struct virtio_balloon *vb,
>>> page_to_balloon_pfn(page) + i);
>>> }
>>>
>>> +static void update_meminfo(struct virtio_balloon *vb)
>>
>> Putting aside the less-than-optimal function name, I would like to ask that
>
> Right, i will think of a better one.
>
>> any new generic balloon logic would go into balloon_compaction.[hc] as much
>
> If it is going to be a place for generic logic may be it should be
> renamed to balloon_common.[ch] ?
>
>> as possible. I made the effort to reuse this infrastructure (which is now
>> used by both VMware and virtio), and would prefer to share as much code as
>> possible.
>>
>> For instance, I would appreciate if the update upon inflate would go into
>> balloon_page_list_enqueue() and balloon_page_enqueue(). VMware's 2MB pages
>> logic is not shared, so it would require a change that is specific for
>> VMware code.
>
> I looked at the code and i do not see how i can reuse it since
> BALLOON_COMPACTION can be disabled and as you say even for VMWare it
> would require updates on other places. Looks like if i do so i would
> have to handle update from each driver for both cases. I think it is
> better to clearly mark the spots when drivers do their internal
> recalculations and report to the core. I see only VMWare is using
> balloon_page_list_enqueue , virtio balloon is using only migration and
> i don't see how to hook it there - i haven't checked the rest of the
> balloons but i guess it would be similiar . I agree it is a good to have
> a common place for such logic but it might be better of for a separate
> work in the future.
Fine. I would live with whatever you do and if needed change it later.
Thanks for considering.
Regards,
Nadav
On 15.08.22 19:05, Nadav Amit wrote:
> On Aug 15, 2022, at 5:52 AM, Alexander Atanasov <[email protected]> wrote:
>
>> If it is going to be a place for generic logic may be it should be
>> renamed to balloon_common.[ch] ?
>>
>>> as possible. I made the effort to reuse this infrastructure (which is now
>>> used by both VMware and virtio), and would prefer to share as much code as
>>> possible.
>>>
>>> For instance, I would appreciate if the update upon inflate would go into
>>> balloon_page_list_enqueue() and balloon_page_enqueue(). VMware's 2MB pages
>>> logic is not shared, so it would require a change that is specific for
>>> VMware code.
>>
>> I looked at the code and i do not see how i can reuse it since
>> BALLOON_COMPACTION can be disabled and as you say even for VMWare it
>> would require updates on other places. Looks like if i do so i would
>> have to handle update from each driver for both cases. I think it is
>> better to clearly mark the spots when drivers do their internal
>> recalculations and report to the core. I see only VMWare is using
>> balloon_page_list_enqueue , virtio balloon is using only migration and
>> i don't see how to hook it there - i haven't checked the rest of the
>> balloons but i guess it would be similiar . I agree it is a good to have
>> a common place for such logic but it might be better of for a separate
>> work in the future.
>
> Fine. I would live with whatever you do and if needed change it later.
- balloon_page_list_enqueue and balloon_page_enqueue are called under locks.
- they work page by page
- different page sizes different hypervisors
- pages to bytes conversion again different page sizes
I will put the new code in balloon_common but i can not reuse what you
propose for the reasons above.
--
Regards,
Alexander Atanasov