This patch set enhances the balloon driver functionality to add support
for memory "hot-add". Windows hosts use a combination of ballooning and
hot-add to dynamically balance the available memory across competing virtual
machines. With this, Linux guests can fully participate in the Windows
Dynamic Memory protocol.
Additionally, I have also fixed an issue in the Vmbus driver in the way
the offer rescind message is handled.
K. Y. Srinivasan (6):
Drivers: hv: balloon: Do not request completion notification
Drivers: hv: balloon: Execute balloon inflation in a separate context
Drivers: hv: balloon: Execute hot-add code in a separate context
Drivers: hv: balloon: Make the balloon driver not unloadable
Drivers: hv: balloon: Implement hot-add functionality
Drivers: hv: vmbus: Handle channel rescind message correctly
drivers/hv/Kconfig | 2 +-
drivers/hv/channel_mgmt.c | 11 +
drivers/hv/hv_balloon.c | 467 +++++++++++++++++++++++++++++++++++++++-----
3 files changed, 426 insertions(+), 54 deletions(-)
--
1.7.4.1
There is no need to request completion notification; get rid of it.
Signed-off-by: K. Y. Srinivasan <[email protected]>
---
drivers/hv/hv_balloon.c | 6 ++----
1 files changed, 2 insertions(+), 4 deletions(-)
diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index 3787321..7fb72dd 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -962,8 +962,7 @@ static int balloon_probe(struct hv_device *dev,
ret = vmbus_sendpacket(dev->channel, &version_req,
sizeof(struct dm_version_request),
(unsigned long)NULL,
- VM_PKT_DATA_INBAND,
- VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+ VM_PKT_DATA_INBAND, 0);
if (ret)
goto probe_error2;
@@ -1009,8 +1008,7 @@ static int balloon_probe(struct hv_device *dev,
ret = vmbus_sendpacket(dev->channel, &cap_msg,
sizeof(struct dm_capabilities),
(unsigned long)NULL,
- VM_PKT_DATA_INBAND,
- VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+ VM_PKT_DATA_INBAND, 0);
if (ret)
goto probe_error2;
--
1.7.4.1
Implement the memory hot-add functionality. With this, Linux guests can fully
participate in the Dynamic Memory protocol implemented in the Windows hosts.
Signed-off-by: K. Y. Srinivasan <[email protected]>
Reviewed-by: Haiyang Zhang <[email protected]>
---
drivers/hv/Kconfig | 2 +-
drivers/hv/hv_balloon.c | 393 ++++++++++++++++++++++++++++++++++++++++++++---
2 files changed, 373 insertions(+), 22 deletions(-)
diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig
index 64630f1..5415fac 100644
--- a/drivers/hv/Kconfig
+++ b/drivers/hv/Kconfig
@@ -15,7 +15,7 @@ config HYPERV_UTILS
config HYPERV_BALLOON
tristate "Microsoft Hyper-V Balloon driver"
- depends on HYPERV
+ depends on HYPERV && MEMORY_HOTPLUG
help
Select this option to enable Hyper-V Balloon driver.
diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index 4743db9..2cdf3d7 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -412,6 +412,27 @@ struct dm_info_msg {
* End protocol definitions.
*/
+/*
+ * State to manage hot adding memory into the guest.
+ * The range start_pfn : end_pfn specifies the range
+ * that the host has asked us to hot add. The range
+ * start_pfn : ha_end_pfn specifies the range that we have
+ * currently hot added. We hot add in multiples of 128M
+ * chunks; it is possible that we may not be able to bring
+ * online all the pages in the region. The range
+ * covered_start_pfn : covered_end_pfn defines the pages that can
+ * be brough online.
+ */
+
+struct hv_hotadd_state {
+ struct list_head list;
+ unsigned long start_pfn;
+ unsigned long covered_start_pfn;
+ unsigned long covered_end_pfn;
+ unsigned long ha_end_pfn;
+ unsigned long end_pfn;
+};
+
struct balloon_state {
__u32 num_pages;
struct work_struct wrk;
@@ -419,16 +440,17 @@ struct balloon_state {
struct hot_add_wrk {
union dm_mem_page_range ha_page_range;
+ union dm_mem_page_range ha_region_range;
struct work_struct wrk;
};
-static bool hot_add;
+static bool hot_add = true;
static bool do_hot_add;
/*
* Delay reporting memory pressure by
* the specified number of seconds.
*/
-static uint pressure_report_delay = 30;
+static uint pressure_report_delay = 45;
module_param(hot_add, bool, (S_IRUGO | S_IWUSR));
MODULE_PARM_DESC(hot_add, "If set attempt memory hot_add");
@@ -456,6 +478,7 @@ enum hv_dm_state {
static __u8 recv_buffer[PAGE_SIZE];
static __u8 *send_buffer;
#define PAGES_IN_2M 512
+#define HA_CHUNK (32 * 1024)
struct hv_dynmem_device {
struct hv_device *dev;
@@ -479,6 +502,17 @@ struct hv_dynmem_device {
struct hot_add_wrk ha_wrk;
/*
+ * This state tracks if the host has specified a hot-add
+ * region.
+ */
+ bool host_specified_ha_region;
+
+ /*
+ * State to synchronize hot-add.
+ */
+ struct completion ol_waitevent;
+ bool ha_waiting;
+ /*
* This thread handles hot-add
* requests from the host as well as notifying
* the host with regards to memory pressure in
@@ -487,6 +521,11 @@ struct hv_dynmem_device {
struct task_struct *thread;
/*
+ * A list of hot-add regions.
+ */
+ struct list_head ha_region_list;
+
+ /*
* We start with the highest version we can support
* and downgrade based on the host; we save here the
* next version to try.
@@ -496,35 +535,321 @@ struct hv_dynmem_device {
static struct hv_dynmem_device dm_device;
-static void hot_add_req(struct work_struct *dummy)
+void hv_bring_pgs_online(unsigned long start_pfn, unsigned long size)
{
+ int i;
- struct dm_hot_add_response resp;
+ for (i = 0; i < size; i++) {
+ struct page *pg;
+ pg = pfn_to_page(start_pfn + i);
+ __online_page_set_limits(pg);
+ __online_page_increment_counters(pg);
+ __online_page_free(pg);
+ }
+}
+
+static void hv_mem_hot_add(unsigned long start, unsigned long size,
+ unsigned long pfn_count,
+ struct hv_hotadd_state *has)
+{
+ int ret = 0;
+ int i, nid, t;
+ unsigned long start_pfn;
+ unsigned long processed_pfn;
+ unsigned long total_pfn = pfn_count;
+
+ for (i = 0; i < (size/HA_CHUNK); i++) {
+ start_pfn = start + (i * HA_CHUNK);
+ has->ha_end_pfn += HA_CHUNK;
+
+ if (total_pfn > HA_CHUNK) {
+ processed_pfn = HA_CHUNK;
+ total_pfn -= HA_CHUNK;
+ } else {
+ processed_pfn = total_pfn;
+ total_pfn = 0;
+ }
+
+ has->covered_end_pfn += processed_pfn;
+
+ init_completion(&dm_device.ol_waitevent);
+ dm_device.ha_waiting = true;
+
+ nid = memory_add_physaddr_to_nid(PFN_PHYS(start_pfn));
+ ret = add_memory(nid, PFN_PHYS((start_pfn)),
+ (HA_CHUNK << PAGE_SHIFT));
+
+ if (ret) {
+ pr_info("hot_add memory failed error is %d\n", ret);
+ has->ha_end_pfn -= HA_CHUNK;
+ has->covered_end_pfn -= processed_pfn;
+ break;
+ }
+
+ /*
+ * Wait for the memory block to be onlined.
+ */
+ t = wait_for_completion_timeout(&dm_device.ol_waitevent, 5*HZ);
+ if (t == 0) {
+ pr_info("hot_add memory timedout\n");
+ has->ha_end_pfn -= HA_CHUNK;
+ has->covered_end_pfn -= processed_pfn;
+ break;
+ }
+
+ }
+
+ return;
+}
+
+static void hv_online_page(struct page *pg)
+{
+ struct list_head *cur;
+ struct hv_hotadd_state *has;
+ unsigned long cur_start_pgp;
+ unsigned long cur_end_pgp;
+
+ if (dm_device.ha_waiting) {
+ dm_device.ha_waiting = false;
+ complete(&dm_device.ol_waitevent);
+ }
+
+ list_for_each(cur, &dm_device.ha_region_list) {
+ has = list_entry(cur, struct hv_hotadd_state, list);
+ cur_start_pgp = (unsigned long)
+ pfn_to_page(has->covered_start_pfn);
+ cur_end_pgp = (unsigned long)pfn_to_page(has->covered_end_pfn);
+
+ if (((unsigned long)pg >= cur_start_pgp) &&
+ ((unsigned long)pg < cur_end_pgp)) {
+ /*
+ * This frame is currently backed; online the
+ * page.
+ */
+ __online_page_set_limits(pg);
+ __online_page_increment_counters(pg);
+ __online_page_free(pg);
+ has->covered_start_pfn++;
+ }
+ }
+}
- if (do_hot_add) {
+static bool pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt)
+{
+ struct list_head *cur;
+ struct hv_hotadd_state *has;
+ unsigned long residual, new_inc;
+
+ if (list_empty(&dm_device.ha_region_list))
+ return false;
+
+ list_for_each(cur, &dm_device.ha_region_list) {
+ has = list_entry(cur, struct hv_hotadd_state, list);
+
+ /*
+ * If the pfn range we are dealing with is not in the current
+ * "hot add block", move on.
+ */
+ if ((start_pfn >= has->end_pfn))
+ continue;
+ /*
+ * If the current hot add-request extends beyond
+ * our current limit; extend it.
+ */
+ if ((start_pfn + pfn_cnt) > has->end_pfn) {
+ residual = (start_pfn + pfn_cnt - has->end_pfn);
+ /*
+ * Extend the region by multiples of HA_CHUNK.
+ */
+ new_inc = (residual / HA_CHUNK) * HA_CHUNK;
+ if (residual % HA_CHUNK)
+ new_inc += HA_CHUNK;
- pr_info("Memory hot add not supported\n");
+ has->end_pfn += new_inc;
+ }
/*
- * Currently we do not support hot add.
- * Just fail the request.
+ * If the current start pfn is not where the covered_end
+ * is, update it.
*/
+
+ if (has->covered_end_pfn != start_pfn) {
+ has->covered_end_pfn = start_pfn;
+ has->covered_start_pfn = start_pfn;
+ }
+ return true;
+
}
+ return false;
+}
+
+static unsigned long handle_pg_range(unsigned long pg_start,
+ unsigned long pg_count)
+{
+ unsigned long start_pfn = pg_start;
+ unsigned long pfn_cnt = pg_count;
+ unsigned long size;
+ struct list_head *cur;
+ struct hv_hotadd_state *has;
+ unsigned long pgs_ol = 0;
+ unsigned long old_covered_state;
+
+ if (list_empty(&dm_device.ha_region_list))
+ return 0;
+
+ list_for_each(cur, &dm_device.ha_region_list) {
+ has = list_entry(cur, struct hv_hotadd_state, list);
+
+ /*
+ * If the pfn range we are dealing with is not in the current
+ * "hot add block", move on.
+ */
+ if ((start_pfn >= has->end_pfn))
+ continue;
+
+ old_covered_state = has->covered_end_pfn;
+
+ if (start_pfn < has->ha_end_pfn) {
+ /*
+ * This is the case where we are backing pages
+ * in an already hot added region. Bring
+ * these pages online first.
+ */
+ pgs_ol = has->ha_end_pfn - start_pfn;
+ if (pgs_ol > pfn_cnt)
+ pgs_ol = pfn_cnt;
+ hv_bring_pgs_online(start_pfn, pgs_ol);
+ has->covered_end_pfn += pgs_ol;
+ has->covered_start_pfn += pgs_ol;
+ pfn_cnt -= pgs_ol;
+ }
+
+ if ((has->ha_end_pfn < has->end_pfn) && (pfn_cnt > 0)) {
+ /*
+ * We have some residual hot add range
+ * that needs to be hot added; hot add
+ * it now. Hot add a multiple of
+ * of HA_CHUNK that fully covers the pages
+ * we have.
+ */
+ size = (has->end_pfn - has->ha_end_pfn);
+ if (pfn_cnt <= size) {
+ size = ((pfn_cnt / HA_CHUNK) * HA_CHUNK);
+ if (pfn_cnt % HA_CHUNK)
+ size += HA_CHUNK;
+ } else {
+ pfn_cnt = size;
+ }
+ hv_mem_hot_add(has->ha_end_pfn, size, pfn_cnt, has);
+ }
+ /*
+ * If we managed to online any pages that were given to us,
+ * we declare success.
+ */
+ return has->covered_end_pfn - old_covered_state;
+
+ }
+
+ return 0;
+}
+
+static unsigned long process_hot_add(unsigned long pg_start,
+ unsigned long pfn_cnt,
+ unsigned long rg_start,
+ unsigned long rg_size)
+{
+ struct hv_hotadd_state *ha_region = NULL;
+
+ if (pfn_cnt == 0)
+ return 0;
+
+ if (!dm_device.host_specified_ha_region)
+ if (pfn_covered(pg_start, pfn_cnt))
+ goto do_pg_range;
+
+ /*
+ * If the host has specified a hot-add range; deal with it first.
+ */
+
+ if ((rg_size != 0) && (!dm_device.host_specified_ha_region)) {
+ ha_region = kzalloc(sizeof(struct hv_hotadd_state), GFP_KERNEL);
+ if (!ha_region)
+ return 0;
+
+ INIT_LIST_HEAD(&ha_region->list);
+
+ list_add_tail(&ha_region->list, &dm_device.ha_region_list);
+ ha_region->start_pfn = rg_start;
+ ha_region->ha_end_pfn = rg_start;
+ ha_region->covered_start_pfn = pg_start;
+ ha_region->covered_end_pfn = pg_start;
+ ha_region->end_pfn = rg_start + rg_size;
+ }
+
+do_pg_range:
+ /*
+ * Process the page range specified; bringing them
+ * online if possible.
+ */
+ return handle_pg_range(pg_start, pfn_cnt);
+}
+
+static void hot_add_req(struct work_struct *dummy)
+{
+ struct dm_hot_add_response resp;
+ unsigned long pg_start, pfn_cnt;
+ unsigned long rg_start, rg_sz;
+ struct hv_dynmem_device *dm = &dm_device;
+
memset(&resp, 0, sizeof(struct dm_hot_add_response));
resp.hdr.type = DM_MEM_HOT_ADD_RESPONSE;
resp.hdr.size = sizeof(struct dm_hot_add_response);
resp.hdr.trans_id = atomic_inc_return(&trans_id);
- resp.page_count = 0;
- resp.result = 0;
+ pg_start = dm->ha_wrk.ha_page_range.finfo.start_page;
+ pfn_cnt = dm->ha_wrk.ha_page_range.finfo.page_cnt;
- dm_device.state = DM_INITIALIZED;
- vmbus_sendpacket(dm_device.dev->channel, &resp,
+ rg_start = dm->ha_wrk.ha_region_range.finfo.start_page;
+ rg_sz = dm->ha_wrk.ha_region_range.finfo.page_cnt;
+
+ if ((rg_start == 0) && (!dm->host_specified_ha_region)) {
+ unsigned long region_size;
+ unsigned long region_start;
+
+ /*
+ * The host has not specified the hot-add region.
+ * Based on the hot-add page range being specified,
+ * compute a hot-add region that can cover the pages
+ * that need to be hot-added while ensuring the alignment
+ * and size requirements of Linux as it relates to hot-add.
+ */
+ region_start = pg_start;
+ region_size = (pfn_cnt / HA_CHUNK) * HA_CHUNK;
+ if (pfn_cnt % HA_CHUNK)
+ region_size += HA_CHUNK;
+
+ region_start = (pg_start / HA_CHUNK) * HA_CHUNK;
+
+ rg_start = region_start;
+ rg_sz = region_size;
+ }
+
+ resp.page_count = process_hot_add(pg_start, pfn_cnt,
+ rg_start, rg_sz);
+ if (resp.page_count > 0)
+ resp.result = 1;
+ else
+ resp.result = 0;
+
+ if (!do_hot_add || (resp.page_count == 0))
+ pr_info("Memory hot add failed\n");
+
+ dm->state = DM_INITIALIZED;
+ vmbus_sendpacket(dm->dev->channel, &resp,
sizeof(struct dm_hot_add_response),
(unsigned long)NULL,
VM_PKT_DATA_INBAND, 0);
-
}
static void process_info(struct hv_dynmem_device *dm, struct dm_info_msg *msg)
@@ -867,6 +1192,7 @@ static void balloon_onchannelcallback(void *context)
struct dm_balloon *bal_msg;
struct dm_hot_add *ha_msg;
union dm_mem_page_range *ha_pg_range;
+ union dm_mem_page_range *ha_region;
memset(recv_buffer, 0, sizeof(recv_buffer));
vmbus_recvpacket(dev->channel, recv_buffer,
@@ -907,8 +1233,26 @@ static void balloon_onchannelcallback(void *context)
pr_warn("Currently hot-adding\n");
dm->state = DM_HOT_ADD;
ha_msg = (struct dm_hot_add *)recv_buffer;
- ha_pg_range = &ha_msg->range;
- dm_device.ha_wrk.ha_page_range = *ha_pg_range;
+ if (ha_msg->hdr.size == sizeof(struct dm_hot_add)) {
+ /*
+ * This is a normal hot-add request specifying
+ * hot-add memory.
+ */
+ ha_pg_range = &ha_msg->range;
+ dm->ha_wrk.ha_page_range = *ha_pg_range;
+ dm->ha_wrk.ha_region_range.page_range = 0;
+ } else {
+ /*
+ * Host is specifying that we first hot-add
+ * a region and then partially populate this
+ * region.
+ */
+ dm->host_specified_ha_region = true;
+ ha_pg_range = &ha_msg->range;
+ ha_region = &ha_pg_range[1];
+ dm->ha_wrk.ha_page_range = *ha_pg_range;
+ dm->ha_wrk.ha_region_range = *ha_region;
+ }
schedule_work(&dm_device.ha_wrk.wrk);
break;
@@ -952,8 +1296,10 @@ static int balloon_probe(struct hv_device *dev,
dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN7;
init_completion(&dm_device.host_event);
init_completion(&dm_device.config_event);
+ INIT_LIST_HEAD(&dm_device.ha_region_list);
INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up);
INIT_WORK(&dm_device.ha_wrk.wrk, hot_add_req);
+ dm_device.host_specified_ha_region = false;
dm_device.thread =
kthread_run(dm_thread_func, &dm_device, "hv_balloon");
@@ -962,6 +1308,8 @@ static int balloon_probe(struct hv_device *dev,
goto probe_error1;
}
+ set_online_page_callback(&hv_online_page);
+
hv_set_drvdata(dev, &dm_device);
/*
* Initiate the hand shake with the host and negotiate
@@ -1006,12 +1354,6 @@ static int balloon_probe(struct hv_device *dev,
cap_msg.hdr.trans_id = atomic_inc_return(&trans_id);
cap_msg.caps.cap_bits.balloon = 1;
- /*
- * While we currently don't support hot-add,
- * we still advertise this capability since the
- * host requires that guests partcipating in the
- * dynamic memory protocol support hot add.
- */
cap_msg.caps.cap_bits.hot_add = 1;
/*
@@ -1061,15 +1403,24 @@ probe_error0:
static int balloon_remove(struct hv_device *dev)
{
struct hv_dynmem_device *dm = hv_get_drvdata(dev);
+ struct list_head *cur, *tmp;
+ struct hv_hotadd_state *has;
if (dm->num_pages_ballooned != 0)
pr_warn("Ballooned pages: %d\n", dm->num_pages_ballooned);
cancel_work_sync(&dm->balloon_wrk.wrk);
cancel_work_sync(&dm->ha_wrk.wrk);
+
vmbus_close(dev->channel);
kthread_stop(dm->thread);
kfree(send_buffer);
+ restore_online_page_callback(&hv_online_page);
+ list_for_each_safe(cur, tmp, &dm->ha_region_list) {
+ has = list_entry(cur, struct hv_hotadd_state, list);
+ list_del(&has->list);
+ kfree(has);
+ }
return 0;
}
--
1.7.4.1
Execute the hot-add operation in a separate work context.
This allows us to decouple the pressure reporting activity from the
"hot-add" activity. Testing has shown that this makes the guest more
responsive to hot add requests.
Signed-off-by: K. Y. Srinivasan <[email protected]>
Reviewed-by: Haiyang Zhang <[email protected]>
---
drivers/hv/hv_balloon.c | 41 +++++++++++++++++++++++------------------
1 files changed, 23 insertions(+), 18 deletions(-)
diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index 8dc406c..13fda38 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -417,6 +417,11 @@ struct balloon_state {
struct work_struct wrk;
};
+struct hot_add_wrk {
+ union dm_mem_page_range ha_page_range;
+ struct work_struct wrk;
+};
+
static bool hot_add;
static bool do_hot_add;
/*
@@ -469,6 +474,11 @@ struct hv_dynmem_device {
struct balloon_state balloon_wrk;
/*
+ * State to execute the "hot-add" operation.
+ */
+ struct hot_add_wrk ha_wrk;
+
+ /*
* This thread handles hot-add
* requests from the host as well as notifying
* the host with regards to memory pressure in
@@ -486,7 +496,7 @@ struct hv_dynmem_device {
static struct hv_dynmem_device dm_device;
-static void hot_add_req(struct hv_dynmem_device *dm, struct dm_hot_add *msg)
+static void hot_add_req(struct work_struct *dummy)
{
struct dm_hot_add_response resp;
@@ -509,8 +519,8 @@ static void hot_add_req(struct hv_dynmem_device *dm, struct dm_hot_add *msg)
resp.page_count = 0;
resp.result = 0;
- dm->state = DM_INITIALIZED;
- vmbus_sendpacket(dm->dev->channel, &resp,
+ dm_device.state = DM_INITIALIZED;
+ vmbus_sendpacket(dm_device.dev->channel, &resp,
sizeof(struct dm_hot_add_response),
(unsigned long)NULL,
VM_PKT_DATA_INBAND, 0);
@@ -771,7 +781,6 @@ static int dm_thread_func(void *dm_dev)
{
struct hv_dynmem_device *dm = dm_dev;
int t;
- unsigned long scan_start;
while (!kthread_should_stop()) {
t = wait_for_completion_timeout(&dm_device.config_event, 1*HZ);
@@ -783,19 +792,6 @@ static int dm_thread_func(void *dm_dev)
if (t == 0)
post_status(dm);
- scan_start = jiffies;
- switch (dm->state) {
-
- case DM_HOT_ADD:
- hot_add_req(dm, (struct dm_hot_add *)recv_buffer);
- break;
- default:
- break;
- }
-
- if (!time_in_range(jiffies, scan_start, scan_start + HZ))
- post_status(dm);
-
}
return 0;
@@ -869,6 +865,8 @@ static void balloon_onchannelcallback(void *context)
struct dm_header *dm_hdr;
struct hv_dynmem_device *dm = hv_get_drvdata(dev);
struct dm_balloon *bal_msg;
+ struct dm_hot_add *ha_msg;
+ union dm_mem_page_range *ha_pg_range;
memset(recv_buffer, 0, sizeof(recv_buffer));
vmbus_recvpacket(dev->channel, recv_buffer,
@@ -905,8 +903,13 @@ static void balloon_onchannelcallback(void *context)
break;
case DM_MEM_HOT_ADD_REQUEST:
+ if (dm->state == DM_HOT_ADD)
+ pr_warn("Currently hot-adding\n");
dm->state = DM_HOT_ADD;
- complete(&dm->config_event);
+ ha_msg = (struct dm_hot_add *)recv_buffer;
+ ha_pg_range = &ha_msg->range;
+ dm_device.ha_wrk.ha_page_range = *ha_pg_range;
+ schedule_work(&dm_device.ha_wrk.wrk);
break;
case DM_INFO_MESSAGE:
@@ -950,6 +953,7 @@ static int balloon_probe(struct hv_device *dev,
init_completion(&dm_device.host_event);
init_completion(&dm_device.config_event);
INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up);
+ INIT_WORK(&dm_device.ha_wrk.wrk, hot_add_req);
dm_device.thread =
kthread_run(dm_thread_func, &dm_device, "hv_balloon");
@@ -1062,6 +1066,7 @@ static int balloon_remove(struct hv_device *dev)
pr_warn("Ballooned pages: %d\n", dm->num_pages_ballooned);
cancel_work_sync(&dm->balloon_wrk.wrk);
+ cancel_work_sync(&dm->ha_wrk.wrk);
vmbus_close(dev->channel);
kthread_stop(dm->thread);
kfree(send_buffer);
--
1.7.4.1
Execute the balloon inflation operation in a separate work context.
This allows us to decouple the pressure reporting activity from the
ballooning activity. Testing has shown that this decoupling makes the
guest more reponsive.
Signed-off-by: K. Y. Srinivasan <[email protected]>
Reviewed-by: Haiyang Zhang <[email protected]>
---
drivers/hv/hv_balloon.c | 34 ++++++++++++++++++++++++----------
1 files changed, 24 insertions(+), 10 deletions(-)
diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index 7fb72dd..8dc406c 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -412,6 +412,11 @@ struct dm_info_msg {
* End protocol definitions.
*/
+struct balloon_state {
+ __u32 num_pages;
+ struct work_struct wrk;
+};
+
static bool hot_add;
static bool do_hot_add;
/*
@@ -459,7 +464,12 @@ struct hv_dynmem_device {
unsigned int num_pages_ballooned;
/*
- * This thread handles both balloon/hot-add
+ * State to manage the ballooning (up) operation.
+ */
+ struct balloon_state balloon_wrk;
+
+ /*
+ * This thread handles hot-add
* requests from the host as well as notifying
* the host with regards to memory pressure in
* the guest.
@@ -657,9 +667,9 @@ static int alloc_balloon_pages(struct hv_dynmem_device *dm, int num_pages,
-static void balloon_up(struct hv_dynmem_device *dm, struct dm_balloon *req)
+static void balloon_up(struct work_struct *dummy)
{
- int num_pages = req->num_pages;
+ int num_pages = dm_device.balloon_wrk.num_pages;
int num_ballooned = 0;
struct dm_balloon_response *bl_resp;
int alloc_unit;
@@ -684,14 +694,14 @@ static void balloon_up(struct hv_dynmem_device *dm, struct dm_balloon *req)
num_pages -= num_ballooned;
- num_ballooned = alloc_balloon_pages(dm, num_pages,
+ num_ballooned = alloc_balloon_pages(&dm_device, num_pages,
bl_resp, alloc_unit,
&alloc_error);
if ((alloc_error) || (num_ballooned == num_pages)) {
bl_resp->more_pages = 0;
done = true;
- dm->state = DM_INITIALIZED;
+ dm_device.state = DM_INITIALIZED;
}
/*
@@ -719,7 +729,7 @@ static void balloon_up(struct hv_dynmem_device *dm, struct dm_balloon *req)
pr_info("Balloon response failed\n");
for (i = 0; i < bl_resp->range_count; i++)
- free_balloon_pages(dm,
+ free_balloon_pages(&dm_device,
&bl_resp->range_array[i]);
done = true;
@@ -775,9 +785,6 @@ static int dm_thread_func(void *dm_dev)
scan_start = jiffies;
switch (dm->state) {
- case DM_BALLOON_UP:
- balloon_up(dm, (struct dm_balloon *)recv_buffer);
- break;
case DM_HOT_ADD:
hot_add_req(dm, (struct dm_hot_add *)recv_buffer);
@@ -861,6 +868,7 @@ static void balloon_onchannelcallback(void *context)
struct dm_message *dm_msg;
struct dm_header *dm_hdr;
struct hv_dynmem_device *dm = hv_get_drvdata(dev);
+ struct dm_balloon *bal_msg;
memset(recv_buffer, 0, sizeof(recv_buffer));
vmbus_recvpacket(dev->channel, recv_buffer,
@@ -882,8 +890,12 @@ static void balloon_onchannelcallback(void *context)
break;
case DM_BALLOON_REQUEST:
+ if (dm->state == DM_BALLOON_UP)
+ pr_warn("Currently ballooning\n");
+ bal_msg = (struct dm_balloon *)recv_buffer;
dm->state = DM_BALLOON_UP;
- complete(&dm->config_event);
+ dm_device.balloon_wrk.num_pages = bal_msg->num_pages;
+ schedule_work(&dm_device.balloon_wrk.wrk);
break;
case DM_UNBALLOON_REQUEST:
@@ -937,6 +949,7 @@ static int balloon_probe(struct hv_device *dev,
dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN7;
init_completion(&dm_device.host_event);
init_completion(&dm_device.config_event);
+ INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up);
dm_device.thread =
kthread_run(dm_thread_func, &dm_device, "hv_balloon");
@@ -1048,6 +1061,7 @@ static int balloon_remove(struct hv_device *dev)
if (dm->num_pages_ballooned != 0)
pr_warn("Ballooned pages: %d\n", dm->num_pages_ballooned);
+ cancel_work_sync(&dm->balloon_wrk.wrk);
vmbus_close(dev->channel);
kthread_stop(dm->thread);
kfree(send_buffer);
--
1.7.4.1
The balloon driver is stateful. For instance, it needs to keep track of pages
that have been ballooned out to properly post pressure reports. This state cannot
be re-constructed if the driver were to be unloaded and subsequently loaded.
Furthermore, as we support memory hot-add as part of this driver, this driver becomes
even more stateful and this state cannot be re-created. Make the balloon driver
unloadable to deal with this issue.
Signed-off-by: K. Y. Srinivasan <[email protected]>
Reviewed-by: Haiyang Zhang <[email protected]>
---
drivers/hv/hv_balloon.c | 7 -------
1 files changed, 0 insertions(+), 7 deletions(-)
diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index 13fda38..4743db9 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -1096,14 +1096,7 @@ static int __init init_balloon_drv(void)
return vmbus_driver_register(&balloon_drv);
}
-static void exit_balloon_drv(void)
-{
-
- vmbus_driver_unregister(&balloon_drv);
-}
-
module_init(init_balloon_drv);
-module_exit(exit_balloon_drv);
MODULE_DESCRIPTION("Hyper-V Balloon");
MODULE_VERSION(HV_DRV_VERSION);
--
1.7.4.1
Properly cleanup the channel state on receipt of the "offer rescind" message.
Starting with ws2012, the host requires that the channel "relid" be properly
cleaned up when the offer is rescinded.
Signed-off-by: K. Y. Srinivasan <[email protected]>
Reviewed-by: Haiyang Zhang <[email protected]>
---
drivers/hv/channel_mgmt.c | 11 +++++++++++
1 files changed, 11 insertions(+), 0 deletions(-)
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index ff1be16..bad8128 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -165,8 +165,19 @@ static void vmbus_process_rescind_offer(struct work_struct *work)
struct vmbus_channel *channel = container_of(work,
struct vmbus_channel,
work);
+ unsigned long flags;
+ struct vmbus_channel_relid_released msg;
vmbus_device_unregister(channel->device_obj);
+ memset(&msg, 0, sizeof(struct vmbus_channel_relid_released));
+ msg.child_relid = channel->offermsg.child_relid;
+ msg.header.msgtype = CHANNELMSG_RELID_RELEASED;
+ vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released));
+
+ spin_lock_irqsave(&vmbus_connection.channel_lock, flags);
+ list_del(&channel->listentry);
+ spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);
+ free_channel(channel);
}
void vmbus_free_channels(void)
--
1.7.4.1
On Tue, Mar 12, K. Y. Srinivasan wrote:
> Implement the memory hot-add functionality. With this, Linux guests can fully
> participate in the Dynamic Memory protocol implemented in the Windows hosts.
> +++ b/drivers/hv/Kconfig
> @@ -15,7 +15,7 @@ config HYPERV_UTILS
>
> config HYPERV_BALLOON
> tristate "Microsoft Hyper-V Balloon driver"
> - depends on HYPERV
> + depends on HYPERV && MEMORY_HOTPLUG
> help
> Select this option to enable Hyper-V Balloon driver.
This driver now depends on CONFIG_MEMORY_HOTPLUG, which is disabled in
openSuSE i386 kernel. As a result, the hv_balloon driver disappears for
32bit kernels.
Any chance that the Kconfig dependency can be moved inside the code so
that a 32bit guest can still do balloon operations?
Olaf
> -----Original Message-----
> From: Olaf Hering [mailto:[email protected]]
> Sent: Wednesday, March 13, 2013 12:50 PM
> To: KY Srinivasan
> Cc: [email protected]; [email protected];
> [email protected]; [email protected]; [email protected]
> Subject: Re: [PATCH RESEND 5/6] Drivers: hv: balloon: Implement hot-add
> functionality
>
> On Tue, Mar 12, K. Y. Srinivasan wrote:
>
> > Implement the memory hot-add functionality. With this, Linux guests can fully
> > participate in the Dynamic Memory protocol implemented in the Windows
> hosts.
>
> > +++ b/drivers/hv/Kconfig
> > @@ -15,7 +15,7 @@ config HYPERV_UTILS
> >
> > config HYPERV_BALLOON
> > tristate "Microsoft Hyper-V Balloon driver"
> > - depends on HYPERV
> > + depends on HYPERV && MEMORY_HOTPLUG
> > help
> > Select this option to enable Hyper-V Balloon driver.
>
> This driver now depends on CONFIG_MEMORY_HOTPLUG, which is disabled in
> openSuSE i386 kernel. As a result, the hv_balloon driver disappears for
> 32bit kernels.
>
> Any chance that the Kconfig dependency can be moved inside the code so
> that a 32bit guest can still do balloon operations?
I can preserve the old behavior by getting rid of the config dependency and adding in
the MEMORY_HOTPLUG dependency around the affected code. Greg, shall I just re-send
this one patch.
Regards,
K. Y
>
>
> Olaf
>
????{.n?+???????+%?????ݶ??w??{.n?+????{??G?????{ay?ʇڙ?,j??f???h?????????z_??(?階?ݢj"???m??????G????????????&???~???iO???z??v?^?m????????????I?