Hi,
This series is a respin of automatic ballooning support I started
working on last year. Patch 2/2 contains all relevant technical
details and performance measurements results.
This is in RFC state because it's a work in progress.
Here's some information if you want to try automatic ballooning:
1. You'll need 3.9+ for the host kernel
2. Apply this series for the guest kernel
3. Grab the QEMU bits from:
git://repo.or.cz/qemu/qmp-unstable.git balloon/auto-ballooning/memcg/rfc
4. Enable the balloon device in qemu with:
-device virtio-balloon-pci,auto-balloon=true
5. Balloon the guest memory down, say from 1G to 256MB
6. Generate some pressure in the guest, say a kernel build with -j16
Any feedback is appreciated!
Luiz Capitulino (2):
virtio_balloon: move balloon_lock mutex to callers
virtio_balloon: auto-ballooning support
drivers/virtio/virtio_balloon.c | 63 ++++++++++++++++++++++++++++++++++---
include/uapi/linux/virtio_balloon.h | 1 +
2 files changed, 60 insertions(+), 4 deletions(-)
--
1.8.1.4
Automatic ballooning consists of dynamically adjusting the guest's
balloon according to memory pressure in the host and in the guest.
This commit implements the guest side of automatic balloning, which
basically consists of registering a shrinker callback with the kernel,
which will try to deflate the guest's balloon by the amount of pages
being requested. The shrinker callback is only registered if the host
supports the VIRTIO_BALLOON_F_AUTO_BALLOON feature bit.
Automatic inflate is performed by the host.
Here are some numbers. The test-case is to run 35 VMs (1G of RAM each)
in parallel doing a kernel build. Host has 32GB of RAM and 16GB of swap.
SWAP IN and SWAP OUT correspond to the number of pages swapped in and
swapped out, respectively.
Auto-ballooning disabled:
RUN TIME(s) SWAP IN SWAP OUT
1 634 930980 1588522
2 610 627422 1362174
3 649 1079847 1616367
4 543 953289 1635379
5 642 913237 1514000
Auto-ballooning enabled:
RUN TIME(s) SWAP IN SWAP OUT
1 629 901 12537
2 624 981 18506
3 626 573 9085
4 631 2250 42534
5 627 1610 20808
Signed-off-by: Luiz Capitulino <[email protected]>
---
drivers/virtio/virtio_balloon.c | 55 +++++++++++++++++++++++++++++++++++++
include/uapi/linux/virtio_balloon.h | 1 +
2 files changed, 56 insertions(+)
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 9d5fe2b..f9dcae8 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -71,6 +71,9 @@ struct virtio_balloon
/* Memory statistics */
int need_stats_update;
struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR];
+
+ /* Memory shrinker */
+ struct shrinker shrinker;
};
static struct virtio_device_id id_table[] = {
@@ -126,6 +129,7 @@ static void set_page_pfns(u32 pfns[], struct page *page)
pfns[i] = page_to_balloon_pfn(page) + i;
}
+/* This function should be called with vb->balloon_mutex held */
static void fill_balloon(struct virtio_balloon *vb, size_t num)
{
struct balloon_dev_info *vb_dev_info = vb->vb_dev_info;
@@ -166,6 +170,7 @@ static void release_pages_by_pfn(const u32 pfns[], unsigned int num)
}
}
+/* This function should be called with vb->balloon_mutex held */
static void leak_balloon(struct virtio_balloon *vb, size_t num)
{
struct page *page;
@@ -285,6 +290,45 @@ static void update_balloon_size(struct virtio_balloon *vb)
&actual, sizeof(actual));
}
+static unsigned long balloon_get_nr_pages(const struct virtio_balloon *vb)
+{
+ return vb->num_pages / VIRTIO_BALLOON_PAGES_PER_PAGE;
+}
+
+static int balloon_shrinker(struct shrinker *shrinker,struct shrink_control *sc)
+{
+ unsigned int nr_pages, new_target;
+ struct virtio_balloon *vb;
+
+ vb = container_of(shrinker, struct virtio_balloon, shrinker);
+ if (!mutex_trylock(&vb->balloon_lock)) {
+ return -1;
+ }
+
+ nr_pages = balloon_get_nr_pages(vb);
+ if (!sc->nr_to_scan || !nr_pages) {
+ goto out;
+ }
+
+ /*
+ * If the current balloon size is greater than the number of
+ * pages being reclaimed by the kernel, deflate only the needed
+ * amount. Otherwise deflate everything we have.
+ */
+ new_target = 0;
+ if (nr_pages > sc->nr_to_scan) {
+ new_target = nr_pages - sc->nr_to_scan;
+ }
+
+ leak_balloon(vb, new_target);
+ update_balloon_size(vb);
+ nr_pages = balloon_get_nr_pages(vb);
+
+out:
+ mutex_unlock(&vb->balloon_lock);
+ return nr_pages;
+}
+
static int balloon(void *_vballoon)
{
struct virtio_balloon *vb = _vballoon;
@@ -471,6 +515,13 @@ static int virtballoon_probe(struct virtio_device *vdev)
goto out_del_vqs;
}
+ memset(&vb->shrinker, 0, sizeof(vb->shrinker));
+ if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_AUTO_BALLOON)) {
+ vb->shrinker.shrink = balloon_shrinker;
+ vb->shrinker.seeks = DEFAULT_SEEKS;
+ register_shrinker(&vb->shrinker);
+ }
+
return 0;
out_del_vqs:
@@ -487,6 +538,9 @@ out:
static void remove_common(struct virtio_balloon *vb)
{
+ if (vb->shrinker.shrink)
+ unregister_shrinker(&vb->shrinker);
+
/* There might be pages left in the balloon: free them. */
mutex_lock(&vb->balloon_lock);
while (vb->num_pages)
@@ -543,6 +597,7 @@ static int virtballoon_restore(struct virtio_device *vdev)
static unsigned int features[] = {
VIRTIO_BALLOON_F_MUST_TELL_HOST,
VIRTIO_BALLOON_F_STATS_VQ,
+ VIRTIO_BALLOON_F_AUTO_BALLOON,
};
static struct virtio_driver virtio_balloon_driver = {
diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
index 5e26f61..bd378a4 100644
--- a/include/uapi/linux/virtio_balloon.h
+++ b/include/uapi/linux/virtio_balloon.h
@@ -31,6 +31,7 @@
/* The feature bitmap for virtio balloon */
#define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages */
#define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */
+#define VIRTIO_BALLOON_F_AUTO_BALLOON 2 /* Automatic ballooning */
/* Size of a PFN in the balloon interface. */
#define VIRTIO_BALLOON_PFN_SHIFT 12
--
1.8.1.4
This commit moves the balloon_lock mutex out of the fill_balloon()
and leak_balloon() functions to their callers.
The reason for this change is that the next commit will introduce
a shrinker callback for the balloon driver, which will also call
leak_balloon() but will require different locking semantics.
Signed-off-by: Luiz Capitulino <[email protected]>
---
drivers/virtio/virtio_balloon.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index bd3ae32..9d5fe2b 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -133,7 +133,6 @@ static void fill_balloon(struct virtio_balloon *vb, size_t num)
/* We can only do one array worth at a time. */
num = min(num, ARRAY_SIZE(vb->pfns));
- mutex_lock(&vb->balloon_lock);
for (vb->num_pfns = 0; vb->num_pfns < num;
vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) {
struct page *page = balloon_page_enqueue(vb_dev_info);
@@ -154,7 +153,6 @@ static void fill_balloon(struct virtio_balloon *vb, size_t num)
/* Did we get any? */
if (vb->num_pfns != 0)
tell_host(vb, vb->inflate_vq);
- mutex_unlock(&vb->balloon_lock);
}
static void release_pages_by_pfn(const u32 pfns[], unsigned int num)
@@ -176,7 +174,6 @@ static void leak_balloon(struct virtio_balloon *vb, size_t num)
/* We can only do one array worth at a time. */
num = min(num, ARRAY_SIZE(vb->pfns));
- mutex_lock(&vb->balloon_lock);
for (vb->num_pfns = 0; vb->num_pfns < num;
vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) {
page = balloon_page_dequeue(vb_dev_info);
@@ -192,7 +189,6 @@ static void leak_balloon(struct virtio_balloon *vb, size_t num)
* is true, we *have* to do it in this order
*/
tell_host(vb, vb->deflate_vq);
- mutex_unlock(&vb->balloon_lock);
release_pages_by_pfn(vb->pfns, vb->num_pfns);
}
@@ -305,11 +301,13 @@ static int balloon(void *_vballoon)
|| freezing(current));
if (vb->need_stats_update)
stats_handle_request(vb);
+ mutex_lock(&vb->balloon_lock);
if (diff > 0)
fill_balloon(vb, diff);
else if (diff < 0)
leak_balloon(vb, -diff);
update_balloon_size(vb);
+ mutex_unlock(&vb->balloon_lock);
}
return 0;
}
@@ -490,9 +488,11 @@ out:
static void remove_common(struct virtio_balloon *vb)
{
/* There might be pages left in the balloon: free them. */
+ mutex_lock(&vb->balloon_lock);
while (vb->num_pages)
leak_balloon(vb, vb->num_pages);
update_balloon_size(vb);
+ mutex_unlock(&vb->balloon_lock);
/* Now we reset the device so we can clean up the queues. */
vb->vdev->config->reset(vb->vdev);
--
1.8.1.4
On Thu, May 09, 2013 at 10:53:48AM -0400, Luiz Capitulino wrote:
> This commit moves the balloon_lock mutex out of the fill_balloon()
> and leak_balloon() functions to their callers.
>
> The reason for this change is that the next commit will introduce
> a shrinker callback for the balloon driver, which will also call
> leak_balloon() but will require different locking semantics.
>
> Signed-off-by: Luiz Capitulino <[email protected]>
> ---
> drivers/virtio/virtio_balloon.c | 8 ++++----
> 1 file changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> index bd3ae32..9d5fe2b 100644
> --- a/drivers/virtio/virtio_balloon.c
> +++ b/drivers/virtio/virtio_balloon.c
> @@ -133,7 +133,6 @@ static void fill_balloon(struct virtio_balloon *vb, size_t num)
> /* We can only do one array worth at a time. */
> num = min(num, ARRAY_SIZE(vb->pfns));
>
> - mutex_lock(&vb->balloon_lock);
> for (vb->num_pfns = 0; vb->num_pfns < num;
> vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) {
> struct page *page = balloon_page_enqueue(vb_dev_info);
> @@ -154,7 +153,6 @@ static void fill_balloon(struct virtio_balloon *vb, size_t num)
> /* Did we get any? */
> if (vb->num_pfns != 0)
> tell_host(vb, vb->inflate_vq);
> - mutex_unlock(&vb->balloon_lock);
> }
>
> static void release_pages_by_pfn(const u32 pfns[], unsigned int num)
> @@ -176,7 +174,6 @@ static void leak_balloon(struct virtio_balloon *vb, size_t num)
> /* We can only do one array worth at a time. */
> num = min(num, ARRAY_SIZE(vb->pfns));
>
> - mutex_lock(&vb->balloon_lock);
> for (vb->num_pfns = 0; vb->num_pfns < num;
> vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) {
> page = balloon_page_dequeue(vb_dev_info);
> @@ -192,7 +189,6 @@ static void leak_balloon(struct virtio_balloon *vb, size_t num)
> * is true, we *have* to do it in this order
> */
> tell_host(vb, vb->deflate_vq);
> - mutex_unlock(&vb->balloon_lock);
> release_pages_by_pfn(vb->pfns, vb->num_pfns);
> }
>
> @@ -305,11 +301,13 @@ static int balloon(void *_vballoon)
> || freezing(current));
> if (vb->need_stats_update)
> stats_handle_request(vb);
> + mutex_lock(&vb->balloon_lock);
> if (diff > 0)
> fill_balloon(vb, diff);
> else if (diff < 0)
> leak_balloon(vb, -diff);
> update_balloon_size(vb);
> + mutex_unlock(&vb->balloon_lock);
> }
> return 0;
> }
> @@ -490,9 +488,11 @@ out:
> static void remove_common(struct virtio_balloon *vb)
> {
> /* There might be pages left in the balloon: free them. */
> + mutex_lock(&vb->balloon_lock);
> while (vb->num_pages)
> leak_balloon(vb, vb->num_pages);
> update_balloon_size(vb);
> + mutex_unlock(&vb->balloon_lock);
I think you will need to introduce the same change as above to virtballoon_restore()
On Thu, May 09, 2013 at 10:53:49AM -0400, Luiz Capitulino wrote:
> Automatic ballooning consists of dynamically adjusting the guest's
> balloon according to memory pressure in the host and in the guest.
>
> This commit implements the guest side of automatic balloning, which
> basically consists of registering a shrinker callback with the kernel,
> which will try to deflate the guest's balloon by the amount of pages
> being requested. The shrinker callback is only registered if the host
> supports the VIRTIO_BALLOON_F_AUTO_BALLOON feature bit.
>
> Automatic inflate is performed by the host.
>
> Here are some numbers. The test-case is to run 35 VMs (1G of RAM each)
> in parallel doing a kernel build. Host has 32GB of RAM and 16GB of swap.
> SWAP IN and SWAP OUT correspond to the number of pages swapped in and
> swapped out, respectively.
>
> Auto-ballooning disabled:
>
> RUN TIME(s) SWAP IN SWAP OUT
>
> 1 634 930980 1588522
> 2 610 627422 1362174
> 3 649 1079847 1616367
> 4 543 953289 1635379
> 5 642 913237 1514000
>
> Auto-ballooning enabled:
>
> RUN TIME(s) SWAP IN SWAP OUT
>
> 1 629 901 12537
> 2 624 981 18506
> 3 626 573 9085
> 4 631 2250 42534
> 5 627 1610 20808
>
> Signed-off-by: Luiz Capitulino <[email protected]>
> ---
Nice work Luiz! Just allow me a silly question, though. Since your shrinker
doesn't change the balloon target size, as soon as the shrink round finishes the
balloon will re-inflate again, won't it? Doesn't this cause a sort of "balloon
thrashing" scenario, if both guest and host are suffering from memory pressure?
The rest I have for the moment, are only nitpicks :)
> drivers/virtio/virtio_balloon.c | 55 +++++++++++++++++++++++++++++++++++++
> include/uapi/linux/virtio_balloon.h | 1 +
> 2 files changed, 56 insertions(+)
>
> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> index 9d5fe2b..f9dcae8 100644
> --- a/drivers/virtio/virtio_balloon.c
> +++ b/drivers/virtio/virtio_balloon.c
> @@ -71,6 +71,9 @@ struct virtio_balloon
> /* Memory statistics */
> int need_stats_update;
> struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR];
> +
> + /* Memory shrinker */
> + struct shrinker shrinker;
> };
>
> static struct virtio_device_id id_table[] = {
> @@ -126,6 +129,7 @@ static void set_page_pfns(u32 pfns[], struct page *page)
> pfns[i] = page_to_balloon_pfn(page) + i;
> }
>
> +/* This function should be called with vb->balloon_mutex held */
> static void fill_balloon(struct virtio_balloon *vb, size_t num)
> {
> struct balloon_dev_info *vb_dev_info = vb->vb_dev_info;
> @@ -166,6 +170,7 @@ static void release_pages_by_pfn(const u32 pfns[], unsigned int num)
> }
> }
>
> +/* This function should be called with vb->balloon_mutex held */
> static void leak_balloon(struct virtio_balloon *vb, size_t num)
> {
> struct page *page;
> @@ -285,6 +290,45 @@ static void update_balloon_size(struct virtio_balloon *vb)
> &actual, sizeof(actual));
> }
>
> +static unsigned long balloon_get_nr_pages(const struct virtio_balloon *vb)
> +{
> + return vb->num_pages / VIRTIO_BALLOON_PAGES_PER_PAGE;
> +}
> +
> +static int balloon_shrinker(struct shrinker *shrinker,struct shrink_control *sc)
> +{
> + unsigned int nr_pages, new_target;
> + struct virtio_balloon *vb;
> +
> + vb = container_of(shrinker, struct virtio_balloon, shrinker);
> + if (!mutex_trylock(&vb->balloon_lock)) {
> + return -1;
> + }
> +
> + nr_pages = balloon_get_nr_pages(vb);
> + if (!sc->nr_to_scan || !nr_pages) {
> + goto out;
> + }
> +
> + /*
> + * If the current balloon size is greater than the number of
> + * pages being reclaimed by the kernel, deflate only the needed
> + * amount. Otherwise deflate everything we have.
> + */
> + new_target = 0;
> + if (nr_pages > sc->nr_to_scan) {
> + new_target = nr_pages - sc->nr_to_scan;
> + }
> +
CodingStyle: you don't need the curly-braces for all these single staments above
> + leak_balloon(vb, new_target);
> + update_balloon_size(vb);
> + nr_pages = balloon_get_nr_pages(vb);
> +
> +out:
> + mutex_unlock(&vb->balloon_lock);
> + return nr_pages;
> +}
> +
> static int balloon(void *_vballoon)
> {
> struct virtio_balloon *vb = _vballoon;
> @@ -471,6 +515,13 @@ static int virtballoon_probe(struct virtio_device *vdev)
> goto out_del_vqs;
> }
>
> + memset(&vb->shrinker, 0, sizeof(vb->shrinker));
> + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_AUTO_BALLOON)) {
> + vb->shrinker.shrink = balloon_shrinker;
> + vb->shrinker.seeks = DEFAULT_SEEKS;
> + register_shrinker(&vb->shrinker);
> + }
> +
> return 0;
>
> out_del_vqs:
> @@ -487,6 +538,9 @@ out:
>
> static void remove_common(struct virtio_balloon *vb)
> {
> + if (vb->shrinker.shrink)
> + unregister_shrinker(&vb->shrinker);
> +
> /* There might be pages left in the balloon: free them. */
> mutex_lock(&vb->balloon_lock);
> while (vb->num_pages)
> @@ -543,6 +597,7 @@ static int virtballoon_restore(struct virtio_device *vdev)
> static unsigned int features[] = {
> VIRTIO_BALLOON_F_MUST_TELL_HOST,
> VIRTIO_BALLOON_F_STATS_VQ,
> + VIRTIO_BALLOON_F_AUTO_BALLOON,
> };
>
> static struct virtio_driver virtio_balloon_driver = {
> diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
> index 5e26f61..bd378a4 100644
> --- a/include/uapi/linux/virtio_balloon.h
> +++ b/include/uapi/linux/virtio_balloon.h
> @@ -31,6 +31,7 @@
> /* The feature bitmap for virtio balloon */
> #define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages */
> #define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */
> +#define VIRTIO_BALLOON_F_AUTO_BALLOON 2 /* Automatic ballooning */
>
> /* Size of a PFN in the balloon interface. */
> #define VIRTIO_BALLOON_PFN_SHIFT 12
> --
> 1.8.1.4
>
On Thu, 9 May 2013 18:03:09 -0300
Rafael Aquini <[email protected]> wrote:
> On Thu, May 09, 2013 at 10:53:48AM -0400, Luiz Capitulino wrote:
> > This commit moves the balloon_lock mutex out of the fill_balloon()
> > and leak_balloon() functions to their callers.
> >
> > The reason for this change is that the next commit will introduce
> > a shrinker callback for the balloon driver, which will also call
> > leak_balloon() but will require different locking semantics.
> >
> > Signed-off-by: Luiz Capitulino <[email protected]>
> > ---
> > drivers/virtio/virtio_balloon.c | 8 ++++----
> > 1 file changed, 4 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> > index bd3ae32..9d5fe2b 100644
> > --- a/drivers/virtio/virtio_balloon.c
> > +++ b/drivers/virtio/virtio_balloon.c
> > @@ -133,7 +133,6 @@ static void fill_balloon(struct virtio_balloon *vb, size_t num)
> > /* We can only do one array worth at a time. */
> > num = min(num, ARRAY_SIZE(vb->pfns));
> >
> > - mutex_lock(&vb->balloon_lock);
> > for (vb->num_pfns = 0; vb->num_pfns < num;
> > vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) {
> > struct page *page = balloon_page_enqueue(vb_dev_info);
> > @@ -154,7 +153,6 @@ static void fill_balloon(struct virtio_balloon *vb, size_t num)
> > /* Did we get any? */
> > if (vb->num_pfns != 0)
> > tell_host(vb, vb->inflate_vq);
> > - mutex_unlock(&vb->balloon_lock);
> > }
> >
> > static void release_pages_by_pfn(const u32 pfns[], unsigned int num)
> > @@ -176,7 +174,6 @@ static void leak_balloon(struct virtio_balloon *vb, size_t num)
> > /* We can only do one array worth at a time. */
> > num = min(num, ARRAY_SIZE(vb->pfns));
> >
> > - mutex_lock(&vb->balloon_lock);
> > for (vb->num_pfns = 0; vb->num_pfns < num;
> > vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) {
> > page = balloon_page_dequeue(vb_dev_info);
> > @@ -192,7 +189,6 @@ static void leak_balloon(struct virtio_balloon *vb, size_t num)
> > * is true, we *have* to do it in this order
> > */
> > tell_host(vb, vb->deflate_vq);
> > - mutex_unlock(&vb->balloon_lock);
> > release_pages_by_pfn(vb->pfns, vb->num_pfns);
> > }
> >
> > @@ -305,11 +301,13 @@ static int balloon(void *_vballoon)
> > || freezing(current));
> > if (vb->need_stats_update)
> > stats_handle_request(vb);
> > + mutex_lock(&vb->balloon_lock);
> > if (diff > 0)
> > fill_balloon(vb, diff);
> > else if (diff < 0)
> > leak_balloon(vb, -diff);
> > update_balloon_size(vb);
> > + mutex_unlock(&vb->balloon_lock);
> > }
> > return 0;
> > }
> > @@ -490,9 +488,11 @@ out:
> > static void remove_common(struct virtio_balloon *vb)
> > {
> > /* There might be pages left in the balloon: free them. */
> > + mutex_lock(&vb->balloon_lock);
> > while (vb->num_pages)
> > leak_balloon(vb, vb->num_pages);
> > update_balloon_size(vb);
> > + mutex_unlock(&vb->balloon_lock);
>
> I think you will need to introduce the same change as above to virtballoon_restore()
Thanks Rafael, I've fixed it in my tree.
On Thu, 9 May 2013 18:15:19 -0300
Rafael Aquini <[email protected]> wrote:
> On Thu, May 09, 2013 at 10:53:49AM -0400, Luiz Capitulino wrote:
> > Automatic ballooning consists of dynamically adjusting the guest's
> > balloon according to memory pressure in the host and in the guest.
> >
> > This commit implements the guest side of automatic balloning, which
> > basically consists of registering a shrinker callback with the kernel,
> > which will try to deflate the guest's balloon by the amount of pages
> > being requested. The shrinker callback is only registered if the host
> > supports the VIRTIO_BALLOON_F_AUTO_BALLOON feature bit.
> >
> > Automatic inflate is performed by the host.
> >
> > Here are some numbers. The test-case is to run 35 VMs (1G of RAM each)
> > in parallel doing a kernel build. Host has 32GB of RAM and 16GB of swap.
> > SWAP IN and SWAP OUT correspond to the number of pages swapped in and
> > swapped out, respectively.
> >
> > Auto-ballooning disabled:
> >
> > RUN TIME(s) SWAP IN SWAP OUT
> >
> > 1 634 930980 1588522
> > 2 610 627422 1362174
> > 3 649 1079847 1616367
> > 4 543 953289 1635379
> > 5 642 913237 1514000
> >
> > Auto-ballooning enabled:
> >
> > RUN TIME(s) SWAP IN SWAP OUT
> >
> > 1 629 901 12537
> > 2 624 981 18506
> > 3 626 573 9085
> > 4 631 2250 42534
> > 5 627 1610 20808
> >
> > Signed-off-by: Luiz Capitulino <[email protected]>
> > ---
>
> Nice work Luiz! Just allow me a silly question, though.
I have 100% more chances of committing sillynesses than you, so please
go ahead.
> Since your shrinker
> doesn't change the balloon target size,
Which target size are you referring to? The one in the host (member num_pages
of VirtIOBalloon in QEMU)?
If it the one in the host, then my understanding is that that member is only
used to communicate the new balloon target to the guest. The guest driver
will only read it when told (by the host) to do so, and when it does the
target value will be correct.
Am I right?
> as soon as the shrink round finishes the
> balloon will re-inflate again, won't it? Doesn't this cause a sort of "balloon
> thrashing" scenario, if both guest and host are suffering from memory pressure?
>
>
> The rest I have for the moment, are only nitpicks :)
>
>
> > drivers/virtio/virtio_balloon.c | 55 +++++++++++++++++++++++++++++++++++++
> > include/uapi/linux/virtio_balloon.h | 1 +
> > 2 files changed, 56 insertions(+)
> >
> > diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> > index 9d5fe2b..f9dcae8 100644
> > --- a/drivers/virtio/virtio_balloon.c
> > +++ b/drivers/virtio/virtio_balloon.c
> > @@ -71,6 +71,9 @@ struct virtio_balloon
> > /* Memory statistics */
> > int need_stats_update;
> > struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR];
> > +
> > + /* Memory shrinker */
> > + struct shrinker shrinker;
> > };
> >
> > static struct virtio_device_id id_table[] = {
> > @@ -126,6 +129,7 @@ static void set_page_pfns(u32 pfns[], struct page *page)
> > pfns[i] = page_to_balloon_pfn(page) + i;
> > }
> >
> > +/* This function should be called with vb->balloon_mutex held */
> > static void fill_balloon(struct virtio_balloon *vb, size_t num)
> > {
> > struct balloon_dev_info *vb_dev_info = vb->vb_dev_info;
> > @@ -166,6 +170,7 @@ static void release_pages_by_pfn(const u32 pfns[], unsigned int num)
> > }
> > }
> >
> > +/* This function should be called with vb->balloon_mutex held */
> > static void leak_balloon(struct virtio_balloon *vb, size_t num)
> > {
> > struct page *page;
> > @@ -285,6 +290,45 @@ static void update_balloon_size(struct virtio_balloon *vb)
> > &actual, sizeof(actual));
> > }
> >
> > +static unsigned long balloon_get_nr_pages(const struct virtio_balloon *vb)
> > +{
> > + return vb->num_pages / VIRTIO_BALLOON_PAGES_PER_PAGE;
> > +}
> > +
> > +static int balloon_shrinker(struct shrinker *shrinker,struct shrink_control *sc)
> > +{
> > + unsigned int nr_pages, new_target;
> > + struct virtio_balloon *vb;
> > +
> > + vb = container_of(shrinker, struct virtio_balloon, shrinker);
> > + if (!mutex_trylock(&vb->balloon_lock)) {
> > + return -1;
> > + }
> > +
> > + nr_pages = balloon_get_nr_pages(vb);
> > + if (!sc->nr_to_scan || !nr_pages) {
> > + goto out;
> > + }
> > +
> > + /*
> > + * If the current balloon size is greater than the number of
> > + * pages being reclaimed by the kernel, deflate only the needed
> > + * amount. Otherwise deflate everything we have.
> > + */
> > + new_target = 0;
> > + if (nr_pages > sc->nr_to_scan) {
> > + new_target = nr_pages - sc->nr_to_scan;
> > + }
> > +
>
> CodingStyle: you don't need the curly-braces for all these single staments above
Oh, this comes from QEMU coding style. Fixed.
> > + leak_balloon(vb, new_target);
> > + update_balloon_size(vb);
> > + nr_pages = balloon_get_nr_pages(vb);
> > +
> > +out:
> > + mutex_unlock(&vb->balloon_lock);
> > + return nr_pages;
> > +}
> > +
> > static int balloon(void *_vballoon)
> > {
> > struct virtio_balloon *vb = _vballoon;
> > @@ -471,6 +515,13 @@ static int virtballoon_probe(struct virtio_device *vdev)
> > goto out_del_vqs;
> > }
> >
> > + memset(&vb->shrinker, 0, sizeof(vb->shrinker));
> > + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_AUTO_BALLOON)) {
> > + vb->shrinker.shrink = balloon_shrinker;
> > + vb->shrinker.seeks = DEFAULT_SEEKS;
> > + register_shrinker(&vb->shrinker);
> > + }
> > +
> > return 0;
> >
> > out_del_vqs:
> > @@ -487,6 +538,9 @@ out:
> >
> > static void remove_common(struct virtio_balloon *vb)
> > {
> > + if (vb->shrinker.shrink)
> > + unregister_shrinker(&vb->shrinker);
> > +
> > /* There might be pages left in the balloon: free them. */
> > mutex_lock(&vb->balloon_lock);
> > while (vb->num_pages)
> > @@ -543,6 +597,7 @@ static int virtballoon_restore(struct virtio_device *vdev)
> > static unsigned int features[] = {
> > VIRTIO_BALLOON_F_MUST_TELL_HOST,
> > VIRTIO_BALLOON_F_STATS_VQ,
> > + VIRTIO_BALLOON_F_AUTO_BALLOON,
> > };
> >
> > static struct virtio_driver virtio_balloon_driver = {
> > diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
> > index 5e26f61..bd378a4 100644
> > --- a/include/uapi/linux/virtio_balloon.h
> > +++ b/include/uapi/linux/virtio_balloon.h
> > @@ -31,6 +31,7 @@
> > /* The feature bitmap for virtio balloon */
> > #define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages */
> > #define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */
> > +#define VIRTIO_BALLOON_F_AUTO_BALLOON 2 /* Automatic ballooning */
> >
> > /* Size of a PFN in the balloon interface. */
> > #define VIRTIO_BALLOON_PFN_SHIFT 12
> > --
> > 1.8.1.4
> >
>
On Fri, 10 May 2013 09:20:46 -0400
Luiz Capitulino <[email protected]> wrote:
> On Thu, 9 May 2013 18:15:19 -0300
> Rafael Aquini <[email protected]> wrote:
>
> > On Thu, May 09, 2013 at 10:53:49AM -0400, Luiz Capitulino wrote:
> > > Automatic ballooning consists of dynamically adjusting the guest's
> > > balloon according to memory pressure in the host and in the guest.
> > >
> > > This commit implements the guest side of automatic balloning, which
> > > basically consists of registering a shrinker callback with the kernel,
> > > which will try to deflate the guest's balloon by the amount of pages
> > > being requested. The shrinker callback is only registered if the host
> > > supports the VIRTIO_BALLOON_F_AUTO_BALLOON feature bit.
> > >
> > > Automatic inflate is performed by the host.
> > >
> > > Here are some numbers. The test-case is to run 35 VMs (1G of RAM each)
> > > in parallel doing a kernel build. Host has 32GB of RAM and 16GB of swap.
> > > SWAP IN and SWAP OUT correspond to the number of pages swapped in and
> > > swapped out, respectively.
> > >
> > > Auto-ballooning disabled:
> > >
> > > RUN TIME(s) SWAP IN SWAP OUT
> > >
> > > 1 634 930980 1588522
> > > 2 610 627422 1362174
> > > 3 649 1079847 1616367
> > > 4 543 953289 1635379
> > > 5 642 913237 1514000
> > >
> > > Auto-ballooning enabled:
> > >
> > > RUN TIME(s) SWAP IN SWAP OUT
> > >
> > > 1 629 901 12537
> > > 2 624 981 18506
> > > 3 626 573 9085
> > > 4 631 2250 42534
> > > 5 627 1610 20808
> > >
> > > Signed-off-by: Luiz Capitulino <[email protected]>
> > > ---
> >
> > Nice work Luiz! Just allow me a silly question, though.
>
> I have 100% more chances of committing sillynesses than you, so please
> go ahead.
>
> > Since your shrinker
> > doesn't change the balloon target size,
>
> Which target size are you referring to? The one in the host (member num_pages
> of VirtIOBalloon in QEMU)?
>
> If it the one in the host, then my understanding is that that member is only
> used to communicate the new balloon target to the guest. The guest driver
> will only read it when told (by the host) to do so, and when it does the
> target value will be correct.
>
> Am I right?
>
> > as soon as the shrink round finishes the
> > balloon will re-inflate again, won't it? Doesn't this cause a sort of "balloon
> > thrashing" scenario, if both guest and host are suffering from memory pressure?
Forgot to say that I didn't observe this in my testing. But I'll try harder
as soon as we clarify which target size we're talking about.
On Thu, May 09, 2013 at 10:53:49AM -0400, Luiz Capitulino wrote:
> Automatic ballooning consists of dynamically adjusting the guest's
> balloon according to memory pressure in the host and in the guest.
>
> This commit implements the guest side of automatic balloning, which
> basically consists of registering a shrinker callback with the kernel,
> which will try to deflate the guest's balloon by the amount of pages
> being requested. The shrinker callback is only registered if the host
> supports the VIRTIO_BALLOON_F_AUTO_BALLOON feature bit.
OK so we have a new feature bit, such that:
- if AUTO_BALLOON is set in host, guest can leak a
page from a balloon at any time
questions left unanswered
- what meaning does num_pages have now?
- when will the balloon be re-inflated?
I'd like to see a spec patch addressing these questions.
Would we ever want to mix the two types of ballooning?
If yes possibly when we put a page in the balloon we
might want to give host a hint "this page might be
leaked again soon".
> Automatic inflate is performed by the host.
>
> Here are some numbers. The test-case is to run 35 VMs (1G of RAM each)
> in parallel doing a kernel build. Host has 32GB of RAM and 16GB of swap.
> SWAP IN and SWAP OUT correspond to the number of pages swapped in and
> swapped out, respectively.
>
> Auto-ballooning disabled:
>
> RUN TIME(s) SWAP IN SWAP OUT
>
> 1 634 930980 1588522
> 2 610 627422 1362174
> 3 649 1079847 1616367
> 4 543 953289 1635379
> 5 642 913237 1514000
>
> Auto-ballooning enabled:
>
> RUN TIME(s) SWAP IN SWAP OUT
>
> 1 629 901 12537
> 2 624 981 18506
> 3 626 573 9085
> 4 631 2250 42534
> 5 627 1610 20808
So what exactly happens here?
Much less swap in/out activity, but no gain
in total runtime. Doesn't this show there's
a bottleneck somewhere? Could be a problem in
the implementation?
Also, what happened with the balloon?
Did we end up with balloon completely inflated? deflated?
One question to consider: possibly if we are
going to reuse the page in the balloon soon,
we might want to bypass notify before use for it?
Maybe that will help speed things up.
> Signed-off-by: Luiz Capitulino <[email protected]>
> ---
> drivers/virtio/virtio_balloon.c | 55 +++++++++++++++++++++++++++++++++++++
> include/uapi/linux/virtio_balloon.h | 1 +
> 2 files changed, 56 insertions(+)
>
> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> index 9d5fe2b..f9dcae8 100644
> --- a/drivers/virtio/virtio_balloon.c
> +++ b/drivers/virtio/virtio_balloon.c
> @@ -71,6 +71,9 @@ struct virtio_balloon
> /* Memory statistics */
> int need_stats_update;
> struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR];
> +
> + /* Memory shrinker */
> + struct shrinker shrinker;
> };
>
> static struct virtio_device_id id_table[] = {
> @@ -126,6 +129,7 @@ static void set_page_pfns(u32 pfns[], struct page *page)
> pfns[i] = page_to_balloon_pfn(page) + i;
> }
>
> +/* This function should be called with vb->balloon_mutex held */
> static void fill_balloon(struct virtio_balloon *vb, size_t num)
> {
> struct balloon_dev_info *vb_dev_info = vb->vb_dev_info;
> @@ -166,6 +170,7 @@ static void release_pages_by_pfn(const u32 pfns[], unsigned int num)
> }
> }
>
> +/* This function should be called with vb->balloon_mutex held */
> static void leak_balloon(struct virtio_balloon *vb, size_t num)
> {
> struct page *page;
> @@ -285,6 +290,45 @@ static void update_balloon_size(struct virtio_balloon *vb)
> &actual, sizeof(actual));
> }
>
> +static unsigned long balloon_get_nr_pages(const struct virtio_balloon *vb)
> +{
> + return vb->num_pages / VIRTIO_BALLOON_PAGES_PER_PAGE;
> +}
> +
> +static int balloon_shrinker(struct shrinker *shrinker,struct shrink_control *sc)
> +{
> + unsigned int nr_pages, new_target;
> + struct virtio_balloon *vb;
> +
> + vb = container_of(shrinker, struct virtio_balloon, shrinker);
> + if (!mutex_trylock(&vb->balloon_lock)) {
> + return -1;
> + }
> +
> + nr_pages = balloon_get_nr_pages(vb);
> + if (!sc->nr_to_scan || !nr_pages) {
> + goto out;
> + }
> +
> + /*
> + * If the current balloon size is greater than the number of
> + * pages being reclaimed by the kernel, deflate only the needed
> + * amount. Otherwise deflate everything we have.
> + */
> + new_target = 0;
> + if (nr_pages > sc->nr_to_scan) {
> + new_target = nr_pages - sc->nr_to_scan;
> + }
> +
> + leak_balloon(vb, new_target);
> + update_balloon_size(vb);
> + nr_pages = balloon_get_nr_pages(vb);
> +
> +out:
> + mutex_unlock(&vb->balloon_lock);
> + return nr_pages;
> +}
> +
> static int balloon(void *_vballoon)
> {
> struct virtio_balloon *vb = _vballoon;
> @@ -471,6 +515,13 @@ static int virtballoon_probe(struct virtio_device *vdev)
> goto out_del_vqs;
> }
>
> + memset(&vb->shrinker, 0, sizeof(vb->shrinker));
> + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_AUTO_BALLOON)) {
> + vb->shrinker.shrink = balloon_shrinker;
> + vb->shrinker.seeks = DEFAULT_SEEKS;
> + register_shrinker(&vb->shrinker);
> + }
> +
> return 0;
>
> out_del_vqs:
> @@ -487,6 +538,9 @@ out:
>
> static void remove_common(struct virtio_balloon *vb)
> {
> + if (vb->shrinker.shrink)
> + unregister_shrinker(&vb->shrinker);
> +
> /* There might be pages left in the balloon: free them. */
> mutex_lock(&vb->balloon_lock);
> while (vb->num_pages)
> @@ -543,6 +597,7 @@ static int virtballoon_restore(struct virtio_device *vdev)
> static unsigned int features[] = {
> VIRTIO_BALLOON_F_MUST_TELL_HOST,
> VIRTIO_BALLOON_F_STATS_VQ,
> + VIRTIO_BALLOON_F_AUTO_BALLOON,
> };
>
> static struct virtio_driver virtio_balloon_driver = {
> diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
> index 5e26f61..bd378a4 100644
> --- a/include/uapi/linux/virtio_balloon.h
> +++ b/include/uapi/linux/virtio_balloon.h
> @@ -31,6 +31,7 @@
> /* The feature bitmap for virtio balloon */
> #define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages */
> #define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */
> +#define VIRTIO_BALLOON_F_AUTO_BALLOON 2 /* Automatic ballooning */
>
> /* Size of a PFN in the balloon interface. */
> #define VIRTIO_BALLOON_PFN_SHIFT 12
> --
> 1.8.1.4
On 05/12/2013 10:30 AM, Michael S. Tsirkin wrote:
> On Thu, May 09, 2013 at 10:53:49AM -0400, Luiz Capitulino wrote:
>> Automatic ballooning consists of dynamically adjusting the guest's
>> balloon according to memory pressure in the host and in the guest.
>>
>> This commit implements the guest side of automatic balloning, which
>> basically consists of registering a shrinker callback with the kernel,
>> which will try to deflate the guest's balloon by the amount of pages
>> being requested. The shrinker callback is only registered if the host
>> supports the VIRTIO_BALLOON_F_AUTO_BALLOON feature bit.
>
> OK so we have a new feature bit, such that:
> - if AUTO_BALLOON is set in host, guest can leak a
> page from a balloon at any time
>
> questions left unanswered
> - what meaning does num_pages have now?
"as large as we could go"
> - when will the balloon be re-inflated?
I believe the qemu changes Luiz wrote address that side,
with qemu-kvm getting notifications from the host kernel
when there is memory pressure, and shrinking the guest
in reaction to that notification.
> I'd like to see a spec patch addressing these questions.
>
> Would we ever want to mix the two types of ballooning?
> If yes possibly when we put a page in the balloon we
> might want to give host a hint "this page might be
> leaked again soon".
It might not be the same page, and the host really does
not care which page it is.
The automatic inflation happens when the host needs to
free up memory.
>> Automatic inflate is performed by the host.
>>
>> Here are some numbers. The test-case is to run 35 VMs (1G of RAM each)
>> in parallel doing a kernel build. Host has 32GB of RAM and 16GB of swap.
>> SWAP IN and SWAP OUT correspond to the number of pages swapped in and
>> swapped out, respectively.
>>
>> Auto-ballooning disabled:
>>
>> RUN TIME(s) SWAP IN SWAP OUT
>>
>> 1 634 930980 1588522
>> 2 610 627422 1362174
>> 3 649 1079847 1616367
>> 4 543 953289 1635379
>> 5 642 913237 1514000
>>
>> Auto-ballooning enabled:
>>
>> RUN TIME(s) SWAP IN SWAP OUT
>>
>> 1 629 901 12537
>> 2 624 981 18506
>> 3 626 573 9085
>> 4 631 2250 42534
>> 5 627 1610 20808
>
> So what exactly happens here?
> Much less swap in/out activity, but no gain
> in total runtime. Doesn't this show there's
> a bottleneck somewhere? Could be a problem in
> the implementation?
It could also be an issue with the benchmark chosen,
which may not have swap as its bottleneck at any point.
However, the reduced swapping is still very promising!
> Also, what happened with the balloon?
> Did we end up with balloon completely inflated? deflated?
>
> One question to consider: possibly if we are
> going to reuse the page in the balloon soon,
> we might want to bypass notify before use for it?
> Maybe that will help speed things up.
>
>
>> Signed-off-by: Luiz Capitulino <[email protected]>
>> ---
>> drivers/virtio/virtio_balloon.c | 55 +++++++++++++++++++++++++++++++++++++
>> include/uapi/linux/virtio_balloon.h | 1 +
>> 2 files changed, 56 insertions(+)
>>
>> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
>> index 9d5fe2b..f9dcae8 100644
>> --- a/drivers/virtio/virtio_balloon.c
>> +++ b/drivers/virtio/virtio_balloon.c
>> @@ -71,6 +71,9 @@ struct virtio_balloon
>> /* Memory statistics */
>> int need_stats_update;
>> struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR];
>> +
>> + /* Memory shrinker */
>> + struct shrinker shrinker;
>> };
>>
>> static struct virtio_device_id id_table[] = {
>> @@ -126,6 +129,7 @@ static void set_page_pfns(u32 pfns[], struct page *page)
>> pfns[i] = page_to_balloon_pfn(page) + i;
>> }
>>
>> +/* This function should be called with vb->balloon_mutex held */
>> static void fill_balloon(struct virtio_balloon *vb, size_t num)
>> {
>> struct balloon_dev_info *vb_dev_info = vb->vb_dev_info;
>> @@ -166,6 +170,7 @@ static void release_pages_by_pfn(const u32 pfns[], unsigned int num)
>> }
>> }
>>
>> +/* This function should be called with vb->balloon_mutex held */
>> static void leak_balloon(struct virtio_balloon *vb, size_t num)
>> {
>> struct page *page;
>> @@ -285,6 +290,45 @@ static void update_balloon_size(struct virtio_balloon *vb)
>> &actual, sizeof(actual));
>> }
>>
>> +static unsigned long balloon_get_nr_pages(const struct virtio_balloon *vb)
>> +{
>> + return vb->num_pages / VIRTIO_BALLOON_PAGES_PER_PAGE;
>> +}
>> +
>> +static int balloon_shrinker(struct shrinker *shrinker,struct shrink_control *sc)
>> +{
>> + unsigned int nr_pages, new_target;
>> + struct virtio_balloon *vb;
>> +
>> + vb = container_of(shrinker, struct virtio_balloon, shrinker);
>> + if (!mutex_trylock(&vb->balloon_lock)) {
>> + return -1;
>> + }
>> +
>> + nr_pages = balloon_get_nr_pages(vb);
>> + if (!sc->nr_to_scan || !nr_pages) {
>> + goto out;
>> + }
>> +
>> + /*
>> + * If the current balloon size is greater than the number of
>> + * pages being reclaimed by the kernel, deflate only the needed
>> + * amount. Otherwise deflate everything we have.
>> + */
>> + new_target = 0;
>> + if (nr_pages > sc->nr_to_scan) {
>> + new_target = nr_pages - sc->nr_to_scan;
>> + }
>> +
>> + leak_balloon(vb, new_target);
>> + update_balloon_size(vb);
>> + nr_pages = balloon_get_nr_pages(vb);
>> +
>> +out:
>> + mutex_unlock(&vb->balloon_lock);
>> + return nr_pages;
>> +}
>> +
>> static int balloon(void *_vballoon)
>> {
>> struct virtio_balloon *vb = _vballoon;
>> @@ -471,6 +515,13 @@ static int virtballoon_probe(struct virtio_device *vdev)
>> goto out_del_vqs;
>> }
>>
>> + memset(&vb->shrinker, 0, sizeof(vb->shrinker));
>> + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_AUTO_BALLOON)) {
>> + vb->shrinker.shrink = balloon_shrinker;
>> + vb->shrinker.seeks = DEFAULT_SEEKS;
>> + register_shrinker(&vb->shrinker);
>> + }
>> +
>> return 0;
>>
>> out_del_vqs:
>> @@ -487,6 +538,9 @@ out:
>>
>> static void remove_common(struct virtio_balloon *vb)
>> {
>> + if (vb->shrinker.shrink)
>> + unregister_shrinker(&vb->shrinker);
>> +
>> /* There might be pages left in the balloon: free them. */
>> mutex_lock(&vb->balloon_lock);
>> while (vb->num_pages)
>> @@ -543,6 +597,7 @@ static int virtballoon_restore(struct virtio_device *vdev)
>> static unsigned int features[] = {
>> VIRTIO_BALLOON_F_MUST_TELL_HOST,
>> VIRTIO_BALLOON_F_STATS_VQ,
>> + VIRTIO_BALLOON_F_AUTO_BALLOON,
>> };
>>
>> static struct virtio_driver virtio_balloon_driver = {
>> diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
>> index 5e26f61..bd378a4 100644
>> --- a/include/uapi/linux/virtio_balloon.h
>> +++ b/include/uapi/linux/virtio_balloon.h
>> @@ -31,6 +31,7 @@
>> /* The feature bitmap for virtio balloon */
>> #define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages */
>> #define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */
>> +#define VIRTIO_BALLOON_F_AUTO_BALLOON 2 /* Automatic ballooning */
>>
>> /* Size of a PFN in the balloon interface. */
>> #define VIRTIO_BALLOON_PFN_SHIFT 12
>> --
>> 1.8.1.4
--
All rights reversed
On Sun, May 12, 2013 at 12:36:09PM -0400, Rik van Riel wrote:
> On 05/12/2013 10:30 AM, Michael S. Tsirkin wrote:
> >On Thu, May 09, 2013 at 10:53:49AM -0400, Luiz Capitulino wrote:
> >>Automatic ballooning consists of dynamically adjusting the guest's
> >>balloon according to memory pressure in the host and in the guest.
> >>
> >>This commit implements the guest side of automatic balloning, which
> >>basically consists of registering a shrinker callback with the kernel,
> >>which will try to deflate the guest's balloon by the amount of pages
> >>being requested. The shrinker callback is only registered if the host
> >>supports the VIRTIO_BALLOON_F_AUTO_BALLOON feature bit.
> >
> >OK so we have a new feature bit, such that:
> >- if AUTO_BALLOON is set in host, guest can leak a
> > page from a balloon at any time
> >
> >questions left unanswered
> >- what meaning does num_pages have now?
>
> "as large as we could go"
I see. This is the reverse of it's current meaning.
I would suggest a new field instead of overriding
the existing one.
> >- when will the balloon be re-inflated?
>
> I believe the qemu changes Luiz wrote address that side,
> with qemu-kvm getting notifications from the host kernel
> when there is memory pressure, and shrinking the guest
> in reaction to that notification.
But it's the guest memory pressure we care about:
- host asks balloon to inflate
later
- guest asks balloon to deflate
with this patch guest takes priority,
balloon deflates. So we should only inflate
if guest does not need the memory.
> >I'd like to see a spec patch addressing these questions.
> >
> >Would we ever want to mix the two types of ballooning?
> >If yes possibly when we put a page in the balloon we
> >might want to give host a hint "this page might be
> >leaked again soon".
>
> It might not be the same page, and the host really does
> not care which page it is.
Whether we care depends on what we do with the page.
For example, in the future we might care which numa node is
used.
> The automatic inflation happens when the host needs to
> free up memory.
This can be done today by management, with no need to
change qemu. So automatic inflate, IMHO does not need
a feature flag. It's the automatic deflate in guest that's new.
> >>Automatic inflate is performed by the host.
> >>
> >>Here are some numbers. The test-case is to run 35 VMs (1G of RAM each)
> >>in parallel doing a kernel build. Host has 32GB of RAM and 16GB of swap.
> >>SWAP IN and SWAP OUT correspond to the number of pages swapped in and
> >>swapped out, respectively.
> >>
> >>Auto-ballooning disabled:
> >>
> >>RUN TIME(s) SWAP IN SWAP OUT
> >>
> >>1 634 930980 1588522
> >>2 610 627422 1362174
> >>3 649 1079847 1616367
> >>4 543 953289 1635379
> >>5 642 913237 1514000
> >>
> >>Auto-ballooning enabled:
> >>
> >>RUN TIME(s) SWAP IN SWAP OUT
> >>
> >>1 629 901 12537
> >>2 624 981 18506
> >>3 626 573 9085
> >>4 631 2250 42534
> >>5 627 1610 20808
> >
> >So what exactly happens here?
> >Much less swap in/out activity, but no gain
> >in total runtime. Doesn't this show there's
> >a bottleneck somewhere? Could be a problem in
> >the implementation?
>
> It could also be an issue with the benchmark chosen,
> which may not have swap as its bottleneck at any point.
>
> However, the reduced swapping is still very promising!
Isn't this a direct result of inflating the balloon?
E.g. just inflating the balloon without
the shrinker will make us avoid swap in host.
What we would want to check is whether shrinking works as
expected, and whether we need to speed up shrinking.
As I say above, inflating the balloon is easy.
A good benchmark would show how we can
deflate and re-inflate it efficiently with
demand.
> >Also, what happened with the balloon?
> >Did we end up with balloon completely inflated? deflated?
> >
> >One question to consider: possibly if we are
> >going to reuse the page in the balloon soon,
> >we might want to bypass notify before use for it?
> >Maybe that will help speed things up.
> >
> >
> >>Signed-off-by: Luiz Capitulino <[email protected]>
> >>---
> >> drivers/virtio/virtio_balloon.c | 55 +++++++++++++++++++++++++++++++++++++
> >> include/uapi/linux/virtio_balloon.h | 1 +
> >> 2 files changed, 56 insertions(+)
> >>
> >>diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> >>index 9d5fe2b..f9dcae8 100644
> >>--- a/drivers/virtio/virtio_balloon.c
> >>+++ b/drivers/virtio/virtio_balloon.c
> >>@@ -71,6 +71,9 @@ struct virtio_balloon
> >> /* Memory statistics */
> >> int need_stats_update;
> >> struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR];
> >>+
> >>+ /* Memory shrinker */
> >>+ struct shrinker shrinker;
> >> };
> >>
> >> static struct virtio_device_id id_table[] = {
> >>@@ -126,6 +129,7 @@ static void set_page_pfns(u32 pfns[], struct page *page)
> >> pfns[i] = page_to_balloon_pfn(page) + i;
> >> }
> >>
> >>+/* This function should be called with vb->balloon_mutex held */
> >> static void fill_balloon(struct virtio_balloon *vb, size_t num)
> >> {
> >> struct balloon_dev_info *vb_dev_info = vb->vb_dev_info;
> >>@@ -166,6 +170,7 @@ static void release_pages_by_pfn(const u32 pfns[], unsigned int num)
> >> }
> >> }
> >>
> >>+/* This function should be called with vb->balloon_mutex held */
> >> static void leak_balloon(struct virtio_balloon *vb, size_t num)
> >> {
> >> struct page *page;
> >>@@ -285,6 +290,45 @@ static void update_balloon_size(struct virtio_balloon *vb)
> >> &actual, sizeof(actual));
> >> }
> >>
> >>+static unsigned long balloon_get_nr_pages(const struct virtio_balloon *vb)
> >>+{
> >>+ return vb->num_pages / VIRTIO_BALLOON_PAGES_PER_PAGE;
> >>+}
> >>+
> >>+static int balloon_shrinker(struct shrinker *shrinker,struct shrink_control *sc)
> >>+{
> >>+ unsigned int nr_pages, new_target;
> >>+ struct virtio_balloon *vb;
> >>+
> >>+ vb = container_of(shrinker, struct virtio_balloon, shrinker);
> >>+ if (!mutex_trylock(&vb->balloon_lock)) {
> >>+ return -1;
> >>+ }
> >>+
> >>+ nr_pages = balloon_get_nr_pages(vb);
> >>+ if (!sc->nr_to_scan || !nr_pages) {
> >>+ goto out;
> >>+ }
> >>+
> >>+ /*
> >>+ * If the current balloon size is greater than the number of
> >>+ * pages being reclaimed by the kernel, deflate only the needed
> >>+ * amount. Otherwise deflate everything we have.
> >>+ */
> >>+ new_target = 0;
> >>+ if (nr_pages > sc->nr_to_scan) {
> >>+ new_target = nr_pages - sc->nr_to_scan;
> >>+ }
> >>+
> >>+ leak_balloon(vb, new_target);
> >>+ update_balloon_size(vb);
> >>+ nr_pages = balloon_get_nr_pages(vb);
> >>+
> >>+out:
> >>+ mutex_unlock(&vb->balloon_lock);
> >>+ return nr_pages;
> >>+}
> >>+
> >> static int balloon(void *_vballoon)
> >> {
> >> struct virtio_balloon *vb = _vballoon;
> >>@@ -471,6 +515,13 @@ static int virtballoon_probe(struct virtio_device *vdev)
> >> goto out_del_vqs;
> >> }
> >>
> >>+ memset(&vb->shrinker, 0, sizeof(vb->shrinker));
> >>+ if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_AUTO_BALLOON)) {
> >>+ vb->shrinker.shrink = balloon_shrinker;
> >>+ vb->shrinker.seeks = DEFAULT_SEEKS;
> >>+ register_shrinker(&vb->shrinker);
> >>+ }
> >>+
> >> return 0;
> >>
> >> out_del_vqs:
> >>@@ -487,6 +538,9 @@ out:
> >>
> >> static void remove_common(struct virtio_balloon *vb)
> >> {
> >>+ if (vb->shrinker.shrink)
> >>+ unregister_shrinker(&vb->shrinker);
> >>+
> >> /* There might be pages left in the balloon: free them. */
> >> mutex_lock(&vb->balloon_lock);
> >> while (vb->num_pages)
> >>@@ -543,6 +597,7 @@ static int virtballoon_restore(struct virtio_device *vdev)
> >> static unsigned int features[] = {
> >> VIRTIO_BALLOON_F_MUST_TELL_HOST,
> >> VIRTIO_BALLOON_F_STATS_VQ,
> >>+ VIRTIO_BALLOON_F_AUTO_BALLOON,
> >> };
> >>
> >> static struct virtio_driver virtio_balloon_driver = {
> >>diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
> >>index 5e26f61..bd378a4 100644
> >>--- a/include/uapi/linux/virtio_balloon.h
> >>+++ b/include/uapi/linux/virtio_balloon.h
> >>@@ -31,6 +31,7 @@
> >> /* The feature bitmap for virtio balloon */
> >> #define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages */
> >> #define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */
> >>+#define VIRTIO_BALLOON_F_AUTO_BALLOON 2 /* Automatic ballooning */
> >>
> >> /* Size of a PFN in the balloon interface. */
> >> #define VIRTIO_BALLOON_PFN_SHIFT 12
> >>--
> >>1.8.1.4
>
>
> --
> All rights reversed
On Fri, May 10, 2013 at 09:20:46AM -0400, Luiz Capitulino wrote:
> On Thu, 9 May 2013 18:15:19 -0300
> Rafael Aquini <[email protected]> wrote:
>
> > Since your shrinker
> > doesn't change the balloon target size,
>
> Which target size are you referring to? The one in the host (member num_pages
> of VirtIOBalloon in QEMU)?
>
Yes, I'm referring to the struct virtio_balloon_config->num_pages element,
which basically is the balloon size target. Guest's struct
virtio_balloon->num_pages is just a book keeping element to allow the guest
balloon thread to track how far it is from achieving the target set by the host,
IIUC.
> If it the one in the host, then my understanding is that that member is only
> used to communicate the new balloon target to the guest. The guest driver
> will only read it when told (by the host) to do so, and when it does the
> target value will be correct.
>
> Am I right?
>
You're right, and the host's member is used to communicate the configured size
to guest's balloon device, however, by not changing it when the shrinker causes
the balloon to deflate will make the balloon thread to be woken up again
in order to chase the balloon size target again, won't it? Check
towards_target() and balloon() and you will see from where my concern arises.
Cheers!
-- Rafael
> > as soon as the shrink round finishes the
> > balloon will re-inflate again, won't it? Doesn't this cause a sort of "balloon
> > thrashing" scenario, if both guest and host are suffering from memory pressure?
> >
> >
> > The rest I have for the moment, are only nitpicks :)
> >
> >
> > > drivers/virtio/virtio_balloon.c | 55 +++++++++++++++++++++++++++++++++++++
> > > include/uapi/linux/virtio_balloon.h | 1 +
> > > 2 files changed, 56 insertions(+)
> > >
> > > diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> > > index 9d5fe2b..f9dcae8 100644
> > > --- a/drivers/virtio/virtio_balloon.c
> > > +++ b/drivers/virtio/virtio_balloon.c
> > > @@ -71,6 +71,9 @@ struct virtio_balloon
> > > /* Memory statistics */
> > > int need_stats_update;
> > > struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR];
> > > +
> > > + /* Memory shrinker */
> > > + struct shrinker shrinker;
> > > };
> > >
> > > static struct virtio_device_id id_table[] = {
> > > @@ -126,6 +129,7 @@ static void set_page_pfns(u32 pfns[], struct page *page)
> > > pfns[i] = page_to_balloon_pfn(page) + i;
> > > }
> > >
> > > +/* This function should be called with vb->balloon_mutex held */
> > > static void fill_balloon(struct virtio_balloon *vb, size_t num)
> > > {
> > > struct balloon_dev_info *vb_dev_info = vb->vb_dev_info;
> > > @@ -166,6 +170,7 @@ static void release_pages_by_pfn(const u32 pfns[], unsigned int num)
> > > }
> > > }
> > >
> > > +/* This function should be called with vb->balloon_mutex held */
> > > static void leak_balloon(struct virtio_balloon *vb, size_t num)
> > > {
> > > struct page *page;
> > > @@ -285,6 +290,45 @@ static void update_balloon_size(struct virtio_balloon *vb)
> > > &actual, sizeof(actual));
> > > }
> > >
> > > +static unsigned long balloon_get_nr_pages(const struct virtio_balloon *vb)
> > > +{
> > > + return vb->num_pages / VIRTIO_BALLOON_PAGES_PER_PAGE;
> > > +}
> > > +
> > > +static int balloon_shrinker(struct shrinker *shrinker,struct shrink_control *sc)
> > > +{
> > > + unsigned int nr_pages, new_target;
> > > + struct virtio_balloon *vb;
> > > +
> > > + vb = container_of(shrinker, struct virtio_balloon, shrinker);
> > > + if (!mutex_trylock(&vb->balloon_lock)) {
> > > + return -1;
> > > + }
> > > +
> > > + nr_pages = balloon_get_nr_pages(vb);
> > > + if (!sc->nr_to_scan || !nr_pages) {
> > > + goto out;
> > > + }
> > > +
> > > + /*
> > > + * If the current balloon size is greater than the number of
> > > + * pages being reclaimed by the kernel, deflate only the needed
> > > + * amount. Otherwise deflate everything we have.
> > > + */
> > > + new_target = 0;
> > > + if (nr_pages > sc->nr_to_scan) {
> > > + new_target = nr_pages - sc->nr_to_scan;
> > > + }
> > > +
> >
> > CodingStyle: you don't need the curly-braces for all these single staments above
>
> Oh, this comes from QEMU coding style. Fixed.
>
> > > + leak_balloon(vb, new_target);
> > > + update_balloon_size(vb);
> > > + nr_pages = balloon_get_nr_pages(vb);
> > > +
> > > +out:
> > > + mutex_unlock(&vb->balloon_lock);
> > > + return nr_pages;
> > > +}
> > > +
> > > static int balloon(void *_vballoon)
> > > {
> > > struct virtio_balloon *vb = _vballoon;
> > > @@ -471,6 +515,13 @@ static int virtballoon_probe(struct virtio_device *vdev)
> > > goto out_del_vqs;
> > > }
> > >
> > > + memset(&vb->shrinker, 0, sizeof(vb->shrinker));
> > > + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_AUTO_BALLOON)) {
> > > + vb->shrinker.shrink = balloon_shrinker;
> > > + vb->shrinker.seeks = DEFAULT_SEEKS;
> > > + register_shrinker(&vb->shrinker);
> > > + }
> > > +
> > > return 0;
> > >
> > > out_del_vqs:
> > > @@ -487,6 +538,9 @@ out:
> > >
> > > static void remove_common(struct virtio_balloon *vb)
> > > {
> > > + if (vb->shrinker.shrink)
> > > + unregister_shrinker(&vb->shrinker);
> > > +
> > > /* There might be pages left in the balloon: free them. */
> > > mutex_lock(&vb->balloon_lock);
> > > while (vb->num_pages)
> > > @@ -543,6 +597,7 @@ static int virtballoon_restore(struct virtio_device *vdev)
> > > static unsigned int features[] = {
> > > VIRTIO_BALLOON_F_MUST_TELL_HOST,
> > > VIRTIO_BALLOON_F_STATS_VQ,
> > > + VIRTIO_BALLOON_F_AUTO_BALLOON,
> > > };
> > >
> > > static struct virtio_driver virtio_balloon_driver = {
> > > diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
> > > index 5e26f61..bd378a4 100644
> > > --- a/include/uapi/linux/virtio_balloon.h
> > > +++ b/include/uapi/linux/virtio_balloon.h
> > > @@ -31,6 +31,7 @@
> > > /* The feature bitmap for virtio balloon */
> > > #define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages */
> > > #define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */
> > > +#define VIRTIO_BALLOON_F_AUTO_BALLOON 2 /* Automatic ballooning */
> > >
> > > /* Size of a PFN in the balloon interface. */
> > > #define VIRTIO_BALLOON_PFN_SHIFT 12
> > > --
> > > 1.8.1.4
> > >
> >
>
On Sun, 12 May 2013 21:49:34 +0300
"Michael S. Tsirkin" <[email protected]> wrote:
> On Sun, May 12, 2013 at 12:36:09PM -0400, Rik van Riel wrote:
> > On 05/12/2013 10:30 AM, Michael S. Tsirkin wrote:
> > >On Thu, May 09, 2013 at 10:53:49AM -0400, Luiz Capitulino wrote:
> > >>Automatic ballooning consists of dynamically adjusting the guest's
> > >>balloon according to memory pressure in the host and in the guest.
> > >>
> > >>This commit implements the guest side of automatic balloning, which
> > >>basically consists of registering a shrinker callback with the kernel,
> > >>which will try to deflate the guest's balloon by the amount of pages
> > >>being requested. The shrinker callback is only registered if the host
> > >>supports the VIRTIO_BALLOON_F_AUTO_BALLOON feature bit.
> > >
> > >OK so we have a new feature bit, such that:
> > >- if AUTO_BALLOON is set in host, guest can leak a
> > > page from a balloon at any time
> > >
> > >questions left unanswered
> > >- what meaning does num_pages have now?
> >
> > "as large as we could go"
>
> I see. This is the reverse of it's current meaning.
> I would suggest a new field instead of overriding
> the existing one.
I'll write a spec patch as you suggested on irc and will decide what
to do from there.
> > >- when will the balloon be re-inflated?
> >
> > I believe the qemu changes Luiz wrote address that side,
> > with qemu-kvm getting notifications from the host kernel
> > when there is memory pressure, and shrinking the guest
> > in reaction to that notification.
>
> But it's the guest memory pressure we care about:
>
> - host asks balloon to inflate
> later
> - guest asks balloon to deflate
>
> with this patch guest takes priority,
> balloon deflates. So we should only inflate
> if guest does not need the memory.
Inflate will actually fail if the guest doesn't have memory to fill
the balloon. But in any case, and as you said elsewhere in this
thread, inflate is not new and could be even done by mngt. So I don't
think this should be changed in this patch.
> > >I'd like to see a spec patch addressing these questions.
> > >
> > >Would we ever want to mix the two types of ballooning?
> > >If yes possibly when we put a page in the balloon we
> > >might want to give host a hint "this page might be
> > >leaked again soon".
> >
> > It might not be the same page, and the host really does
> > not care which page it is.
>
> Whether we care depends on what we do with the page.
> For example, in the future we might care which numa node is
> used.
>
> > The automatic inflation happens when the host needs to
> > free up memory.
>
> This can be done today by management, with no need to
> change qemu. So automatic inflate, IMHO does not need
> a feature flag. It's the automatic deflate in guest that's new.
Makes sense.
> > >>Automatic inflate is performed by the host.
> > >>
> > >>Here are some numbers. The test-case is to run 35 VMs (1G of RAM each)
> > >>in parallel doing a kernel build. Host has 32GB of RAM and 16GB of swap.
> > >>SWAP IN and SWAP OUT correspond to the number of pages swapped in and
> > >>swapped out, respectively.
> > >>
> > >>Auto-ballooning disabled:
> > >>
> > >>RUN TIME(s) SWAP IN SWAP OUT
> > >>
> > >>1 634 930980 1588522
> > >>2 610 627422 1362174
> > >>3 649 1079847 1616367
> > >>4 543 953289 1635379
> > >>5 642 913237 1514000
> > >>
> > >>Auto-ballooning enabled:
> > >>
> > >>RUN TIME(s) SWAP IN SWAP OUT
> > >>
> > >>1 629 901 12537
> > >>2 624 981 18506
> > >>3 626 573 9085
> > >>4 631 2250 42534
> > >>5 627 1610 20808
> > >
> > >So what exactly happens here?
> > >Much less swap in/out activity, but no gain
> > >in total runtime. Doesn't this show there's
> > >a bottleneck somewhere? Could be a problem in
> > >the implementation?
> >
> > It could also be an issue with the benchmark chosen,
> > which may not have swap as its bottleneck at any point.
> >
> > However, the reduced swapping is still very promising!
>
> Isn't this a direct result of inflating the balloon?
> E.g. just inflating the balloon without
> the shrinker will make us avoid swap in host.
> What we would want to check is whether shrinking works as
> expected, and whether we need to speed up shrinking.
>
> As I say above, inflating the balloon is easy.
> A good benchmark would show how we can
> deflate and re-inflate it efficiently with
> demand.
I'm going to measure this.
> > >Also, what happened with the balloon?
> > >Did we end up with balloon completely inflated? deflated?
In my test-case VMs are started with 1G. After the test, almost all
of them have between 200-300MB.
> > >
> > >One question to consider: possibly if we are
> > >going to reuse the page in the balloon soon,
> > >we might want to bypass notify before use for it?
> > >Maybe that will help speed things up.
> > >
> > >
> > >>Signed-off-by: Luiz Capitulino <[email protected]>
> > >>---
> > >> drivers/virtio/virtio_balloon.c | 55 +++++++++++++++++++++++++++++++++++++
> > >> include/uapi/linux/virtio_balloon.h | 1 +
> > >> 2 files changed, 56 insertions(+)
> > >>
> > >>diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> > >>index 9d5fe2b..f9dcae8 100644
> > >>--- a/drivers/virtio/virtio_balloon.c
> > >>+++ b/drivers/virtio/virtio_balloon.c
> > >>@@ -71,6 +71,9 @@ struct virtio_balloon
> > >> /* Memory statistics */
> > >> int need_stats_update;
> > >> struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR];
> > >>+
> > >>+ /* Memory shrinker */
> > >>+ struct shrinker shrinker;
> > >> };
> > >>
> > >> static struct virtio_device_id id_table[] = {
> > >>@@ -126,6 +129,7 @@ static void set_page_pfns(u32 pfns[], struct page *page)
> > >> pfns[i] = page_to_balloon_pfn(page) + i;
> > >> }
> > >>
> > >>+/* This function should be called with vb->balloon_mutex held */
> > >> static void fill_balloon(struct virtio_balloon *vb, size_t num)
> > >> {
> > >> struct balloon_dev_info *vb_dev_info = vb->vb_dev_info;
> > >>@@ -166,6 +170,7 @@ static void release_pages_by_pfn(const u32 pfns[], unsigned int num)
> > >> }
> > >> }
> > >>
> > >>+/* This function should be called with vb->balloon_mutex held */
> > >> static void leak_balloon(struct virtio_balloon *vb, size_t num)
> > >> {
> > >> struct page *page;
> > >>@@ -285,6 +290,45 @@ static void update_balloon_size(struct virtio_balloon *vb)
> > >> &actual, sizeof(actual));
> > >> }
> > >>
> > >>+static unsigned long balloon_get_nr_pages(const struct virtio_balloon *vb)
> > >>+{
> > >>+ return vb->num_pages / VIRTIO_BALLOON_PAGES_PER_PAGE;
> > >>+}
> > >>+
> > >>+static int balloon_shrinker(struct shrinker *shrinker,struct shrink_control *sc)
> > >>+{
> > >>+ unsigned int nr_pages, new_target;
> > >>+ struct virtio_balloon *vb;
> > >>+
> > >>+ vb = container_of(shrinker, struct virtio_balloon, shrinker);
> > >>+ if (!mutex_trylock(&vb->balloon_lock)) {
> > >>+ return -1;
> > >>+ }
> > >>+
> > >>+ nr_pages = balloon_get_nr_pages(vb);
> > >>+ if (!sc->nr_to_scan || !nr_pages) {
> > >>+ goto out;
> > >>+ }
> > >>+
> > >>+ /*
> > >>+ * If the current balloon size is greater than the number of
> > >>+ * pages being reclaimed by the kernel, deflate only the needed
> > >>+ * amount. Otherwise deflate everything we have.
> > >>+ */
> > >>+ new_target = 0;
> > >>+ if (nr_pages > sc->nr_to_scan) {
> > >>+ new_target = nr_pages - sc->nr_to_scan;
> > >>+ }
> > >>+
> > >>+ leak_balloon(vb, new_target);
> > >>+ update_balloon_size(vb);
> > >>+ nr_pages = balloon_get_nr_pages(vb);
> > >>+
> > >>+out:
> > >>+ mutex_unlock(&vb->balloon_lock);
> > >>+ return nr_pages;
> > >>+}
> > >>+
> > >> static int balloon(void *_vballoon)
> > >> {
> > >> struct virtio_balloon *vb = _vballoon;
> > >>@@ -471,6 +515,13 @@ static int virtballoon_probe(struct virtio_device *vdev)
> > >> goto out_del_vqs;
> > >> }
> > >>
> > >>+ memset(&vb->shrinker, 0, sizeof(vb->shrinker));
> > >>+ if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_AUTO_BALLOON)) {
> > >>+ vb->shrinker.shrink = balloon_shrinker;
> > >>+ vb->shrinker.seeks = DEFAULT_SEEKS;
> > >>+ register_shrinker(&vb->shrinker);
> > >>+ }
> > >>+
> > >> return 0;
> > >>
> > >> out_del_vqs:
> > >>@@ -487,6 +538,9 @@ out:
> > >>
> > >> static void remove_common(struct virtio_balloon *vb)
> > >> {
> > >>+ if (vb->shrinker.shrink)
> > >>+ unregister_shrinker(&vb->shrinker);
> > >>+
> > >> /* There might be pages left in the balloon: free them. */
> > >> mutex_lock(&vb->balloon_lock);
> > >> while (vb->num_pages)
> > >>@@ -543,6 +597,7 @@ static int virtballoon_restore(struct virtio_device *vdev)
> > >> static unsigned int features[] = {
> > >> VIRTIO_BALLOON_F_MUST_TELL_HOST,
> > >> VIRTIO_BALLOON_F_STATS_VQ,
> > >>+ VIRTIO_BALLOON_F_AUTO_BALLOON,
> > >> };
> > >>
> > >> static struct virtio_driver virtio_balloon_driver = {
> > >>diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
> > >>index 5e26f61..bd378a4 100644
> > >>--- a/include/uapi/linux/virtio_balloon.h
> > >>+++ b/include/uapi/linux/virtio_balloon.h
> > >>@@ -31,6 +31,7 @@
> > >> /* The feature bitmap for virtio balloon */
> > >> #define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages */
> > >> #define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */
> > >>+#define VIRTIO_BALLOON_F_AUTO_BALLOON 2 /* Automatic ballooning */
> > >>
> > >> /* Size of a PFN in the balloon interface. */
> > >> #define VIRTIO_BALLOON_PFN_SHIFT 12
> > >>--
> > >>1.8.1.4
> >
> >
> > --
> > All rights reversed
>
On Mon, May 13, 2013 at 11:03:03AM -0400, Luiz Capitulino wrote:
> On Sun, 12 May 2013 21:49:34 +0300
> "Michael S. Tsirkin" <[email protected]> wrote:
>
> > On Sun, May 12, 2013 at 12:36:09PM -0400, Rik van Riel wrote:
> > > On 05/12/2013 10:30 AM, Michael S. Tsirkin wrote:
> > > >On Thu, May 09, 2013 at 10:53:49AM -0400, Luiz Capitulino wrote:
> > > >>Automatic ballooning consists of dynamically adjusting the guest's
> > > >>balloon according to memory pressure in the host and in the guest.
> > > >>
> > > >>This commit implements the guest side of automatic balloning, which
> > > >>basically consists of registering a shrinker callback with the kernel,
> > > >>which will try to deflate the guest's balloon by the amount of pages
> > > >>being requested. The shrinker callback is only registered if the host
> > > >>supports the VIRTIO_BALLOON_F_AUTO_BALLOON feature bit.
> > > >
> > > >OK so we have a new feature bit, such that:
> > > >- if AUTO_BALLOON is set in host, guest can leak a
> > > > page from a balloon at any time
> > > >
> > > >questions left unanswered
> > > >- what meaning does num_pages have now?
> > >
> > > "as large as we could go"
> >
> > I see. This is the reverse of it's current meaning.
> > I would suggest a new field instead of overriding
> > the existing one.
>
> I'll write a spec patch as you suggested on irc and will decide what
> to do from there.
>
> > > >- when will the balloon be re-inflated?
> > >
> > > I believe the qemu changes Luiz wrote address that side,
> > > with qemu-kvm getting notifications from the host kernel
> > > when there is memory pressure, and shrinking the guest
> > > in reaction to that notification.
> >
> > But it's the guest memory pressure we care about:
> >
> > - host asks balloon to inflate
> > later
> > - guest asks balloon to deflate
> >
> > with this patch guest takes priority,
> > balloon deflates. So we should only inflate
> > if guest does not need the memory.
>
> Inflate will actually fail if the guest doesn't have memory to fill
> the balloon. But in any case, and as you said elsewhere in this
> thread, inflate is not new and could be even done by mngt. So I don't
> think this should be changed in this patch.
>
> > > >I'd like to see a spec patch addressing these questions.
> > > >
> > > >Would we ever want to mix the two types of ballooning?
> > > >If yes possibly when we put a page in the balloon we
> > > >might want to give host a hint "this page might be
> > > >leaked again soon".
> > >
> > > It might not be the same page, and the host really does
> > > not care which page it is.
> >
> > Whether we care depends on what we do with the page.
> > For example, in the future we might care which numa node is
> > used.
> >
> > > The automatic inflation happens when the host needs to
> > > free up memory.
> >
> > This can be done today by management, with no need to
> > change qemu. So automatic inflate, IMHO does not need
> > a feature flag. It's the automatic deflate in guest that's new.
>
> Makes sense.
However, there's a big question mark: host specifies
inflate, guest says deflate, who wins?
At some point Google sent patches that gave guest
complete control over the balloon.
This has the advantage that management isn't involved.
And at some level it seems to make sense: why set
an upper limit on size of the balloon?
The bigger it is, the better.
> > > >>Automatic inflate is performed by the host.
> > > >>
> > > >>Here are some numbers. The test-case is to run 35 VMs (1G of RAM each)
> > > >>in parallel doing a kernel build. Host has 32GB of RAM and 16GB of swap.
> > > >>SWAP IN and SWAP OUT correspond to the number of pages swapped in and
> > > >>swapped out, respectively.
> > > >>
> > > >>Auto-ballooning disabled:
> > > >>
> > > >>RUN TIME(s) SWAP IN SWAP OUT
> > > >>
> > > >>1 634 930980 1588522
> > > >>2 610 627422 1362174
> > > >>3 649 1079847 1616367
> > > >>4 543 953289 1635379
> > > >>5 642 913237 1514000
> > > >>
> > > >>Auto-ballooning enabled:
> > > >>
> > > >>RUN TIME(s) SWAP IN SWAP OUT
> > > >>
> > > >>1 629 901 12537
> > > >>2 624 981 18506
> > > >>3 626 573 9085
> > > >>4 631 2250 42534
> > > >>5 627 1610 20808
> > > >
> > > >So what exactly happens here?
> > > >Much less swap in/out activity, but no gain
> > > >in total runtime. Doesn't this show there's
> > > >a bottleneck somewhere? Could be a problem in
> > > >the implementation?
> > >
> > > It could also be an issue with the benchmark chosen,
> > > which may not have swap as its bottleneck at any point.
> > >
> > > However, the reduced swapping is still very promising!
> >
> > Isn't this a direct result of inflating the balloon?
> > E.g. just inflating the balloon without
> > the shrinker will make us avoid swap in host.
> > What we would want to check is whether shrinking works as
> > expected, and whether we need to speed up shrinking.
> >
> > As I say above, inflating the balloon is easy.
> > A good benchmark would show how we can
> > deflate and re-inflate it efficiently with
> > demand.
>
> I'm going to measure this.
>
> > > >Also, what happened with the balloon?
> > > >Did we end up with balloon completely inflated? deflated?
>
> In my test-case VMs are started with 1G. After the test, almost all
> of them have between 200-300MB.
>
And what was the max balloon size that you specified? 1G?
> > > >
> > > >One question to consider: possibly if we are
> > > >going to reuse the page in the balloon soon,
> > > >we might want to bypass notify before use for it?
> > > >Maybe that will help speed things up.
> > > >
> > > >
> > > >>Signed-off-by: Luiz Capitulino <[email protected]>
> > > >>---
> > > >> drivers/virtio/virtio_balloon.c | 55 +++++++++++++++++++++++++++++++++++++
> > > >> include/uapi/linux/virtio_balloon.h | 1 +
> > > >> 2 files changed, 56 insertions(+)
> > > >>
> > > >>diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> > > >>index 9d5fe2b..f9dcae8 100644
> > > >>--- a/drivers/virtio/virtio_balloon.c
> > > >>+++ b/drivers/virtio/virtio_balloon.c
> > > >>@@ -71,6 +71,9 @@ struct virtio_balloon
> > > >> /* Memory statistics */
> > > >> int need_stats_update;
> > > >> struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR];
> > > >>+
> > > >>+ /* Memory shrinker */
> > > >>+ struct shrinker shrinker;
> > > >> };
> > > >>
> > > >> static struct virtio_device_id id_table[] = {
> > > >>@@ -126,6 +129,7 @@ static void set_page_pfns(u32 pfns[], struct page *page)
> > > >> pfns[i] = page_to_balloon_pfn(page) + i;
> > > >> }
> > > >>
> > > >>+/* This function should be called with vb->balloon_mutex held */
> > > >> static void fill_balloon(struct virtio_balloon *vb, size_t num)
> > > >> {
> > > >> struct balloon_dev_info *vb_dev_info = vb->vb_dev_info;
> > > >>@@ -166,6 +170,7 @@ static void release_pages_by_pfn(const u32 pfns[], unsigned int num)
> > > >> }
> > > >> }
> > > >>
> > > >>+/* This function should be called with vb->balloon_mutex held */
> > > >> static void leak_balloon(struct virtio_balloon *vb, size_t num)
> > > >> {
> > > >> struct page *page;
> > > >>@@ -285,6 +290,45 @@ static void update_balloon_size(struct virtio_balloon *vb)
> > > >> &actual, sizeof(actual));
> > > >> }
> > > >>
> > > >>+static unsigned long balloon_get_nr_pages(const struct virtio_balloon *vb)
> > > >>+{
> > > >>+ return vb->num_pages / VIRTIO_BALLOON_PAGES_PER_PAGE;
> > > >>+}
> > > >>+
> > > >>+static int balloon_shrinker(struct shrinker *shrinker,struct shrink_control *sc)
> > > >>+{
> > > >>+ unsigned int nr_pages, new_target;
> > > >>+ struct virtio_balloon *vb;
> > > >>+
> > > >>+ vb = container_of(shrinker, struct virtio_balloon, shrinker);
> > > >>+ if (!mutex_trylock(&vb->balloon_lock)) {
> > > >>+ return -1;
> > > >>+ }
> > > >>+
> > > >>+ nr_pages = balloon_get_nr_pages(vb);
> > > >>+ if (!sc->nr_to_scan || !nr_pages) {
> > > >>+ goto out;
> > > >>+ }
> > > >>+
> > > >>+ /*
> > > >>+ * If the current balloon size is greater than the number of
> > > >>+ * pages being reclaimed by the kernel, deflate only the needed
> > > >>+ * amount. Otherwise deflate everything we have.
> > > >>+ */
> > > >>+ new_target = 0;
> > > >>+ if (nr_pages > sc->nr_to_scan) {
> > > >>+ new_target = nr_pages - sc->nr_to_scan;
> > > >>+ }
> > > >>+
> > > >>+ leak_balloon(vb, new_target);
> > > >>+ update_balloon_size(vb);
> > > >>+ nr_pages = balloon_get_nr_pages(vb);
> > > >>+
> > > >>+out:
> > > >>+ mutex_unlock(&vb->balloon_lock);
> > > >>+ return nr_pages;
> > > >>+}
> > > >>+
> > > >> static int balloon(void *_vballoon)
> > > >> {
> > > >> struct virtio_balloon *vb = _vballoon;
> > > >>@@ -471,6 +515,13 @@ static int virtballoon_probe(struct virtio_device *vdev)
> > > >> goto out_del_vqs;
> > > >> }
> > > >>
> > > >>+ memset(&vb->shrinker, 0, sizeof(vb->shrinker));
> > > >>+ if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_AUTO_BALLOON)) {
> > > >>+ vb->shrinker.shrink = balloon_shrinker;
> > > >>+ vb->shrinker.seeks = DEFAULT_SEEKS;
> > > >>+ register_shrinker(&vb->shrinker);
> > > >>+ }
> > > >>+
> > > >> return 0;
> > > >>
> > > >> out_del_vqs:
> > > >>@@ -487,6 +538,9 @@ out:
> > > >>
> > > >> static void remove_common(struct virtio_balloon *vb)
> > > >> {
> > > >>+ if (vb->shrinker.shrink)
> > > >>+ unregister_shrinker(&vb->shrinker);
> > > >>+
> > > >> /* There might be pages left in the balloon: free them. */
> > > >> mutex_lock(&vb->balloon_lock);
> > > >> while (vb->num_pages)
> > > >>@@ -543,6 +597,7 @@ static int virtballoon_restore(struct virtio_device *vdev)
> > > >> static unsigned int features[] = {
> > > >> VIRTIO_BALLOON_F_MUST_TELL_HOST,
> > > >> VIRTIO_BALLOON_F_STATS_VQ,
> > > >>+ VIRTIO_BALLOON_F_AUTO_BALLOON,
> > > >> };
> > > >>
> > > >> static struct virtio_driver virtio_balloon_driver = {
> > > >>diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
> > > >>index 5e26f61..bd378a4 100644
> > > >>--- a/include/uapi/linux/virtio_balloon.h
> > > >>+++ b/include/uapi/linux/virtio_balloon.h
> > > >>@@ -31,6 +31,7 @@
> > > >> /* The feature bitmap for virtio balloon */
> > > >> #define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages */
> > > >> #define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */
> > > >>+#define VIRTIO_BALLOON_F_AUTO_BALLOON 2 /* Automatic ballooning */
> > > >>
> > > >> /* Size of a PFN in the balloon interface. */
> > > >> #define VIRTIO_BALLOON_PFN_SHIFT 12
> > > >>--
> > > >>1.8.1.4
> > >
> > >
> > > --
> > > All rights reversed
> >
On 05/13/2013 11:16 AM, Michael S. Tsirkin wrote:
> However, there's a big question mark: host specifies
> inflate, guest says deflate, who wins?
If we're dealing with a NUMA guest, they could both win :)
The host could see reduced memory use of the guest in one
place, while the guest could see increased memory availability
in another place...
I also suspect that having some "churn" could help sort out
exactly what the working set is.
> At some point Google sent patches that gave guest
> complete control over the balloon.
> This has the advantage that management isn't involved.
I believe the Google patches still included some way for the
host to initiate balloon inflation on the guest side, because
the guest internal state alone is not enough to see when the
host is under memory pressure.
I discussed the project with the Google developers in question
a little over a year ago, but I do not remember whether their
pressure notification went through qemu, or directly from the
host kernel to the guest kernel...
> And at some level it seems to make sense: why set
> an upper limit on size of the balloon?
> The bigger it is, the better.
Response time.
If too much of a guest's memory has been removed, it can take
too long for the guest to react to user requests, be it over
the web or ssh or something else...
--
All rights reversed
On Mon, May 13, 2013 at 11:22:58AM -0400, Rik van Riel wrote:
> On 05/13/2013 11:16 AM, Michael S. Tsirkin wrote:
>
> >However, there's a big question mark: host specifies
> >inflate, guest says deflate, who wins?
>
> If we're dealing with a NUMA guest, they could both win :)
>
> The host could see reduced memory use of the guest in one
> place, while the guest could see increased memory availability
> in another place...
>
> I also suspect that having some "churn" could help sort out
> exactly what the working set is.
>
> >At some point Google sent patches that gave guest
> >complete control over the balloon.
> >This has the advantage that management isn't involved.
>
> I believe the Google patches still included some way for the
> host to initiate balloon inflation on the guest side, because
> the guest internal state alone is not enough to see when the
> host is under memory pressure.
>
> I discussed the project with the Google developers in question
> a little over a year ago, but I do not remember whether their
> pressure notification went through qemu, or directly from the
> host kernel to the guest kernel...
So increasing the max number of pages in balloon
indicates host memory pressure to the guest?
Fair enough but I wonder whether there's a way to
make it more explicit in the interface, somehow.
> >And at some level it seems to make sense: why set
> >an upper limit on size of the balloon?
> >The bigger it is, the better.
>
> Response time.
>
> If too much of a guest's memory has been removed, it can take
> too long for the guest to react to user requests, be it over
> the web or ssh or something else...
Absolutely. But it's a Guest issue. Host does not care.
If Guest wants to shoot itself in the foot it has
many other ways to do this.
> --
> All rights reversed
On Mon, 13 May 2013 11:34:41 -0300
Rafael Aquini <[email protected]> wrote:
> You're right, and the host's member is used to communicate the configured size
> to guest's balloon device, however, by not changing it when the shrinker causes
> the balloon to deflate will make the balloon thread to be woken up again
> in order to chase the balloon size target again, won't it? Check
I don't see the balloon thread waking up after the shrinker executes in my
testing. Maybe this is so because it will only wake up when QEMU notifies
a config change.
But anyway, I'll think how to improve this as suggested by Michael too, as
I seem to be changing num_pages' semantics according to the virtio spec.
On Mon, May 13, 2013 at 02:25:11PM -0400, Luiz Capitulino wrote:
> On Mon, 13 May 2013 11:34:41 -0300
> Rafael Aquini <[email protected]> wrote:
>
> > You're right, and the host's member is used to communicate the configured size
> > to guest's balloon device, however, by not changing it when the shrinker causes
> > the balloon to deflate will make the balloon thread to be woken up again
> > in order to chase the balloon size target again, won't it? Check
>
> I don't see the balloon thread waking up after the shrinker executes in my
> testing. Maybe this is so because it will only wake up when QEMU notifies
> a config change.
Well that's also a problem.
Need some mechanism to re-inflate balloon
when guest memory pressure is down.
virtio fs mechanism worth a look?
> But anyway, I'll think how to improve this as suggested by Michael too, as
> I seem to be changing num_pages' semantics according to the virtio spec.
On 05/13/2013 11:35 AM, Michael S. Tsirkin wrote:
> On Mon, May 13, 2013 at 11:22:58AM -0400, Rik van Riel wrote:
>> I believe the Google patches still included some way for the
>> host to initiate balloon inflation on the guest side, because
>> the guest internal state alone is not enough to see when the
>> host is under memory pressure.
>>
>> I discussed the project with the Google developers in question
>> a little over a year ago, but I do not remember whether their
>> pressure notification went through qemu, or directly from the
>> host kernel to the guest kernel...
>
> So increasing the max number of pages in balloon
> indicates host memory pressure to the guest?
> Fair enough but I wonder whether there's a way to
> make it more explicit in the interface, somehow.
There may be a better way to do this, but I am really not sure
what that would be. What properties would you like to see in
the interface? What kind of behaviour are you looking for?
--
All rights reversed
On Mon, May 13, 2013 at 03:10:19PM -0400, Rik van Riel wrote:
> On 05/13/2013 11:35 AM, Michael S. Tsirkin wrote:
> >On Mon, May 13, 2013 at 11:22:58AM -0400, Rik van Riel wrote:
>
> >>I believe the Google patches still included some way for the
> >>host to initiate balloon inflation on the guest side, because
> >>the guest internal state alone is not enough to see when the
> >>host is under memory pressure.
> >>
> >>I discussed the project with the Google developers in question
> >>a little over a year ago, but I do not remember whether their
> >>pressure notification went through qemu, or directly from the
> >>host kernel to the guest kernel...
> >
> >So increasing the max number of pages in balloon
> >indicates host memory pressure to the guest?
> >Fair enough but I wonder whether there's a way to
> >make it more explicit in the interface, somehow.
>
> There may be a better way to do this, but I am really not sure
> what that would be. What properties would you like to see in
> the interface? What kind of behaviour are you looking for?
I'd like to propagate what we know to the guest and
not require things we don't know.
Well for once, all we know is host is under memory pressure.
We don't really know how much memory should be freed.
So maybe we should just have a binary "host under memory
pressure" and have guest free what it can, e.g. have it
drop caches more aggressively.
> --
> All rights reversed
On Mon, 13 May 2013 22:02:50 +0300
"Michael S. Tsirkin" <[email protected]> wrote:
> On Mon, May 13, 2013 at 02:25:11PM -0400, Luiz Capitulino wrote:
> > On Mon, 13 May 2013 11:34:41 -0300
> > Rafael Aquini <[email protected]> wrote:
> >
> > > You're right, and the host's member is used to communicate the configured size
> > > to guest's balloon device, however, by not changing it when the shrinker causes
> > > the balloon to deflate will make the balloon thread to be woken up again
> > > in order to chase the balloon size target again, won't it? Check
> >
> > I don't see the balloon thread waking up after the shrinker executes in my
> > testing. Maybe this is so because it will only wake up when QEMU notifies
> > a config change.
>
> Well that's also a problem.
> Need some mechanism to re-inflate balloon
In this implemention this is done by QEMU, have you looked at the QEMU
patch yet?
https://lists.gnu.org/archive/html/qemu-devel/2013-05/msg01295.html
> when guest memory pressure is down.
In this implementation we do this when there's pressure in the host. I expect
things to balance over time, and this seems to be what I'm observing in my
testing, but of course we need more testing.
> virtio fs mechanism worth a look?
Can you elaborate?
On 05/09/2013 10:53 AM, Luiz Capitulino wrote:
> Hi,
>
> This series is a respin of automatic ballooning support I started
> working on last year. Patch 2/2 contains all relevant technical
> details and performance measurements results.
>
> This is in RFC state because it's a work in progress.
Hi Luiz,
Is there a virtio spec patch I could use to get it implemented on
kvmtool?
Thanks,
Sasha
On Thu, 16 May 2013 16:56:34 -0400
Sasha Levin <[email protected]> wrote:
> On 05/09/2013 10:53 AM, Luiz Capitulino wrote:
> > Hi,
> >
> > This series is a respin of automatic ballooning support I started
> > working on last year. Patch 2/2 contains all relevant technical
> > details and performance measurements results.
> >
> > This is in RFC state because it's a work in progress.
>
> Hi Luiz,
>
> Is there a virtio spec patch I could use to get it implemented on
> kvmtool?
Not yet, this will come with v1. But I got some homework to do before
posting it (more perf tests).