From: "K. Y. Srinivasan" <[email protected]>
Miscellaneous fixes.
V2: Only the patch "Drivers: hv: vmbus: Implement Direct Mode for stimer0"
has been modified to address review comments from Dan Carpenter.
Dexuan Cui (1):
tools: hv: fix compiler warnings about major/target_fname
Haiyang Zhang (2):
tools/hv: Fix IP reporting by KVP daemon with SRIOV
hv_vmbus: Correct the stale comments regarding cpu affinity
Jia-Ju Bai (1):
hyper-v: use GFP_KERNEL for hv_context.hv_numa_map
Joe Perches (1):
hv: Synthetic typo correction
Michael Kelley (1):
Drivers: hv: vmbus: Implement Direct Mode for stimer0
Olaf Hering (1):
tools: hv: include string.h in hv_fcopy_daemon
Vitaly Kuznetsov (4):
hv_balloon: fix printk loglevel
hv_balloon: simplify hv_online_page()/hv_page_online_one()
hv_balloon: fix bugs in num_pages_onlined accounting
hv_balloon: trace post_status
[email protected] (1):
vmbus/ring_buffer: remove some redundant helper function.
arch/x86/entry/entry_32.S | 3 +
arch/x86/entry/entry_64.S | 3 +
arch/x86/include/asm/hardirq.h | 1 +
arch/x86/include/asm/irq_vectors.h | 3 +-
arch/x86/include/asm/mshyperv.h | 13 ++++
arch/x86/include/uapi/asm/hyperv.h | 3 +
arch/x86/kernel/cpu/mshyperv.c | 40 +++++++++++
arch/x86/kernel/irq.c | 7 ++
drivers/hv/Makefile | 1 +
drivers/hv/channel_mgmt.c | 6 +-
drivers/hv/hv.c | 63 +++++++++++++++--
drivers/hv/hv_balloon.c | 121 ++++++++++++++++++++++----------
drivers/hv/hv_trace_balloon.h | 48 +++++++++++++
drivers/hv/hyperv_vmbus.h | 4 +-
drivers/hv/ring_buffer.c | 49 ++-----------
include/linux/hyperv.h | 2 +-
tools/hv/hv_fcopy_daemon.c | 4 +-
tools/hv/hv_kvp_daemon.c | 138 +++++++++++++++++--------------------
tools/hv/hv_vss_daemon.c | 1 +
19 files changed, 341 insertions(+), 169 deletions(-)
create mode 100644 drivers/hv/hv_trace_balloon.h
--
2.15.1
From: Vitaly Kuznetsov <[email protected]>
We have a mix of different ideas of which loglevel should be used. Unify
on the following:
- pr_info() for normal operation
- pr_warn() for 'strange' host behavior
- pr_err() for all errors.
Signed-off-by: Vitaly Kuznetsov <[email protected]>
Signed-off-by: K. Y. Srinivasan <[email protected]>
---
drivers/hv/hv_balloon.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index db0e6652d7ef..1aece72da9ba 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -691,7 +691,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
(HA_CHUNK << PAGE_SHIFT));
if (ret) {
- pr_warn("hot_add memory failed error is %d\n", ret);
+ pr_err("hot_add memory failed error is %d\n", ret);
if (ret == -EEXIST) {
/*
* This error indicates that the error
@@ -1014,7 +1014,7 @@ static void hot_add_req(struct work_struct *dummy)
resp.result = 0;
if (!do_hot_add || (resp.page_count == 0))
- pr_info("Memory hot add failed\n");
+ pr_err("Memory hot add failed\n");
dm->state = DM_INITIALIZED;
resp.hdr.trans_id = atomic_inc_return(&trans_id);
@@ -1041,7 +1041,7 @@ static void process_info(struct hv_dynmem_device *dm, struct dm_info_msg *msg)
break;
default:
- pr_info("Received Unknown type: %d\n", info_hdr->type);
+ pr_warn("Received Unknown type: %d\n", info_hdr->type);
}
}
@@ -1290,7 +1290,7 @@ static void balloon_up(struct work_struct *dummy)
/*
* Free up the memory we allocatted.
*/
- pr_info("Balloon response failed\n");
+ pr_err("Balloon response failed\n");
for (i = 0; i < bl_resp->range_count; i++)
free_balloon_pages(&dm_device,
@@ -1421,7 +1421,7 @@ static void cap_resp(struct hv_dynmem_device *dm,
struct dm_capabilities_resp_msg *cap_resp)
{
if (!cap_resp->is_accepted) {
- pr_info("Capabilities not accepted by host\n");
+ pr_err("Capabilities not accepted by host\n");
dm->state = DM_INIT_ERROR;
}
complete(&dm->host_event);
@@ -1508,7 +1508,7 @@ static void balloon_onchannelcallback(void *context)
break;
default:
- pr_err("Unhandled message: type: %d\n", dm_hdr->type);
+ pr_warn("Unhandled message: type: %d\n", dm_hdr->type);
}
}
--
2.15.1
From: Haiyang Zhang <[email protected]>
On Hyper-V the VF NIC has the same MAC as the related synthetic NIC.
VF NIC can work under the synthetic NIC transparently, without its
own IP address. The existing KVP daemon only gets IP from the first
NIC matching a MAC address, and may not be able to find the IP in
this case.
This patch fixes the problem by searching the NIC matching the MAC,
and having an IP address. So, the IP address will be found and
reported to the host successfully.
Signed-off-by: Haiyang Zhang <[email protected]>
Signed-off-by: K. Y. Srinivasan <[email protected]>
---
tools/hv/hv_kvp_daemon.c | 138 ++++++++++++++++++++++-------------------------
1 file changed, 65 insertions(+), 73 deletions(-)
diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c
index 4c99c57736ce..dbf6e8bd98ba 100644
--- a/tools/hv/hv_kvp_daemon.c
+++ b/tools/hv/hv_kvp_daemon.c
@@ -634,64 +634,6 @@ static char *kvp_if_name_to_mac(char *if_name)
return mac_addr;
}
-
-/*
- * Retrieve the interface name given tha MAC address.
- */
-
-static char *kvp_mac_to_if_name(char *mac)
-{
- DIR *dir;
- struct dirent *entry;
- FILE *file;
- char *p, *x;
- char *if_name = NULL;
- char buf[256];
- char dev_id[PATH_MAX];
- unsigned int i;
-
- dir = opendir(KVP_NET_DIR);
- if (dir == NULL)
- return NULL;
-
- while ((entry = readdir(dir)) != NULL) {
- /*
- * Set the state for the next pass.
- */
- snprintf(dev_id, sizeof(dev_id), "%s%s/address", KVP_NET_DIR,
- entry->d_name);
-
- file = fopen(dev_id, "r");
- if (file == NULL)
- continue;
-
- p = fgets(buf, sizeof(buf), file);
- if (p) {
- x = strchr(p, '\n');
- if (x)
- *x = '\0';
-
- for (i = 0; i < strlen(p); i++)
- p[i] = toupper(p[i]);
-
- if (!strcmp(p, mac)) {
- /*
- * Found the MAC match; return the interface
- * name. The caller will free the memory.
- */
- if_name = strdup(entry->d_name);
- fclose(file);
- break;
- }
- }
- fclose(file);
- }
-
- closedir(dir);
- return if_name;
-}
-
-
static void kvp_process_ipconfig_file(char *cmd,
char *config_buf, unsigned int len,
int element_size, int offset)
@@ -997,6 +939,70 @@ kvp_get_ip_info(int family, char *if_name, int op,
return error;
}
+/*
+ * Retrieve the IP given the MAC address.
+ */
+static int kvp_mac_to_ip(struct hv_kvp_ipaddr_value *kvp_ip_val)
+{
+ char *mac = (char *)kvp_ip_val->adapter_id;
+ DIR *dir;
+ struct dirent *entry;
+ FILE *file;
+ char *p, *x;
+ char *if_name = NULL;
+ char buf[256];
+ char dev_id[PATH_MAX];
+ unsigned int i;
+ int error = HV_E_FAIL;
+
+ dir = opendir(KVP_NET_DIR);
+ if (dir == NULL)
+ return HV_E_FAIL;
+
+ while ((entry = readdir(dir)) != NULL) {
+ /*
+ * Set the state for the next pass.
+ */
+ snprintf(dev_id, sizeof(dev_id), "%s%s/address", KVP_NET_DIR,
+ entry->d_name);
+
+ file = fopen(dev_id, "r");
+ if (file == NULL)
+ continue;
+
+ p = fgets(buf, sizeof(buf), file);
+ fclose(file);
+ if (!p)
+ continue;
+
+ x = strchr(p, '\n');
+ if (x)
+ *x = '\0';
+
+ for (i = 0; i < strlen(p); i++)
+ p[i] = toupper(p[i]);
+
+ if (strcmp(p, mac))
+ continue;
+
+ /*
+ * Found the MAC match.
+ * A NIC (e.g. VF) matching the MAC, but without IP, is skipped.
+ */
+ if_name = entry->d_name;
+ if (!if_name)
+ continue;
+
+ error = kvp_get_ip_info(0, if_name, KVP_OP_GET_IP_INFO,
+ kvp_ip_val, MAX_IP_ADDR_SIZE * 2);
+
+ if (!error && strlen((char *)kvp_ip_val->ip_addr))
+ break;
+ }
+
+ closedir(dir);
+ return error;
+}
static int expand_ipv6(char *addr, int type)
{
@@ -1472,26 +1478,12 @@ int main(int argc, char *argv[])
switch (op) {
case KVP_OP_GET_IP_INFO:
kvp_ip_val = &hv_msg->body.kvp_ip_val;
- if_name =
- kvp_mac_to_if_name((char *)kvp_ip_val->adapter_id);
- if (if_name == NULL) {
- /*
- * We could not map the mac address to an
- * interface name; return error.
- */
- hv_msg->error = HV_E_FAIL;
- break;
- }
- error = kvp_get_ip_info(
- 0, if_name, KVP_OP_GET_IP_INFO,
- kvp_ip_val,
- (MAX_IP_ADDR_SIZE * 2));
+ error = kvp_mac_to_ip(kvp_ip_val);
if (error)
hv_msg->error = error;
- free(if_name);
break;
case KVP_OP_SET_IP_INFO:
--
2.15.1
From: "[email protected]" <[email protected]>
Some hv_get/set** helper functions in ring_buffer code are
only called once or not used. This patch is to clear up these codes.
Signed-off-by: Tianyu Lan <[email protected]>
Signed-off-by: K. Y. Srinivasan <[email protected]>
---
drivers/hv/ring_buffer.c | 49 ++++--------------------------------------------
1 file changed, 4 insertions(+), 45 deletions(-)
diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index 50e071444a5c..1aa17795727b 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -78,46 +78,6 @@ static void hv_signal_on_write(u32 old_write, struct vmbus_channel *channel)
vmbus_setevent(channel);
}
-/* Get the next write location for the specified ring buffer. */
-static inline u32
-hv_get_next_write_location(struct hv_ring_buffer_info *ring_info)
-{
- u32 next = ring_info->ring_buffer->write_index;
-
- return next;
-}
-
-/* Set the next write location for the specified ring buffer. */
-static inline void
-hv_set_next_write_location(struct hv_ring_buffer_info *ring_info,
- u32 next_write_location)
-{
- ring_info->ring_buffer->write_index = next_write_location;
-}
-
-/* Set the next read location for the specified ring buffer. */
-static inline void
-hv_set_next_read_location(struct hv_ring_buffer_info *ring_info,
- u32 next_read_location)
-{
- ring_info->ring_buffer->read_index = next_read_location;
- ring_info->priv_read_index = next_read_location;
-}
-
-/* Get the size of the ring buffer. */
-static inline u32
-hv_get_ring_buffersize(const struct hv_ring_buffer_info *ring_info)
-{
- return ring_info->ring_datasize;
-}
-
-/* Get the read and write indices as u64 of the specified ring buffer. */
-static inline u64
-hv_get_ring_bufferindices(struct hv_ring_buffer_info *ring_info)
-{
- return (u64)ring_info->ring_buffer->write_index << 32;
-}
-
/*
* Helper routine to copy from source to ring buffer.
* Assume there is enough room. Handles wrap-around in dest case only!!
@@ -129,7 +89,7 @@ static u32 hv_copyto_ringbuffer(
u32 srclen)
{
void *ring_buffer = hv_get_ring_buffer(ring_info);
- u32 ring_buffer_size = hv_get_ring_buffersize(ring_info);
+ u32 ring_buffer_size = ring_info->ring_datasize;
memcpy(ring_buffer + start_write_offset, src, srclen);
@@ -275,8 +235,7 @@ int hv_ringbuffer_write(struct vmbus_channel *channel,
}
/* Write to the ring buffer */
- next_write_location = hv_get_next_write_location(outring_info);
-
+ next_write_location = outring_info->ring_buffer->write_index;
old_write = next_write_location;
for (i = 0; i < kv_count; i++) {
@@ -287,7 +246,7 @@ int hv_ringbuffer_write(struct vmbus_channel *channel,
}
/* Set previous packet start */
- prev_indices = hv_get_ring_bufferindices(outring_info);
+ prev_indices = (u64)outring_info->ring_buffer->write_index << 32;
next_write_location = hv_copyto_ringbuffer(outring_info,
next_write_location,
@@ -298,7 +257,7 @@ int hv_ringbuffer_write(struct vmbus_channel *channel,
virt_mb();
/* Now, update the write location */
- hv_set_next_write_location(outring_info, next_write_location);
+ outring_info->ring_buffer->write_index = next_write_location;
spin_unlock_irqrestore(&outring_info->ring_lock, flags);
--
2.15.1
From: Joe Perches <[email protected]>
Just a trivial tyop fix.
Signed-off-by: Joe Perches <[email protected]>
Signed-off-by: K. Y. Srinivasan <[email protected]>
---
drivers/hv/hv.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index b6cacc4cccf2..31142b72f1b9 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -217,7 +217,7 @@ void hv_synic_free(void)
}
/*
- * hv_synic_init - Initialize the Synthethic Interrupt Controller.
+ * hv_synic_init - Initialize the Synthetic Interrupt Controller.
*
* If it is already initialized by another entity (ie x2v shim), we need to
* retrieve the initialized message and event pages. Otherwise, we create and
--
2.15.1
From: Vitaly Kuznetsov <[email protected]>
Instead of doing pfn_to_page() and continuosly casting page to unsigned
long just cache the pfn of the page with page_to_pfn().
Signed-off-by: Vitaly Kuznetsov <[email protected]>
Signed-off-by: K. Y. Srinivasan <[email protected]>
---
drivers/hv/hv_balloon.c | 27 +++++----------------------
1 file changed, 5 insertions(+), 22 deletions(-)
diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index 1aece72da9ba..5b8e1ad1bcfe 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -612,28 +612,17 @@ static struct notifier_block hv_memory_nb = {
/* Check if the particular page is backed and can be onlined and online it. */
static void hv_page_online_one(struct hv_hotadd_state *has, struct page *pg)
{
- unsigned long cur_start_pgp;
- unsigned long cur_end_pgp;
struct hv_hotadd_gap *gap;
-
- cur_start_pgp = (unsigned long)pfn_to_page(has->covered_start_pfn);
- cur_end_pgp = (unsigned long)pfn_to_page(has->covered_end_pfn);
+ unsigned long pfn = page_to_pfn(pg);
/* The page is not backed. */
- if (((unsigned long)pg < cur_start_pgp) ||
- ((unsigned long)pg >= cur_end_pgp))
+ if ((pfn < has->covered_start_pfn) || (pfn >= has->covered_end_pfn))
return;
/* Check for gaps. */
list_for_each_entry(gap, &has->gap_list, list) {
- cur_start_pgp = (unsigned long)
- pfn_to_page(gap->start_pfn);
- cur_end_pgp = (unsigned long)
- pfn_to_page(gap->end_pfn);
- if (((unsigned long)pg >= cur_start_pgp) &&
- ((unsigned long)pg < cur_end_pgp)) {
+ if ((pfn >= gap->start_pfn) && (pfn < gap->end_pfn))
return;
- }
}
/* This frame is currently backed; online the page. */
@@ -726,19 +715,13 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
static void hv_online_page(struct page *pg)
{
struct hv_hotadd_state *has;
- unsigned long cur_start_pgp;
- unsigned long cur_end_pgp;
unsigned long flags;
+ unsigned long pfn = page_to_pfn(pg);
spin_lock_irqsave(&dm_device.ha_lock, flags);
list_for_each_entry(has, &dm_device.ha_region_list, list) {
- cur_start_pgp = (unsigned long)
- pfn_to_page(has->start_pfn);
- cur_end_pgp = (unsigned long)pfn_to_page(has->end_pfn);
-
/* The page belongs to a different HAS. */
- if (((unsigned long)pg < cur_start_pgp) ||
- ((unsigned long)pg >= cur_end_pgp))
+ if ((pfn < has->start_pfn) || (pfn >= has->end_pfn))
continue;
hv_page_online_one(has, pg);
--
2.15.1
From: Dexuan Cui <[email protected]>
This patch fixes the below warnings with new glibc and gcc:
hv_vss_daemon.c:100:13: warning: In the GNU C Library, "major" is defined
by <sys/sysmacros.h>. For historical compatibility, it is currently
defined by <sys/types.h> as well, but we plan to remove this soon.
To use "major", include <sys/sysmacros.h> directly.
hv_fcopy_daemon.c:42:2: note: 'snprintf' output between 2 and 1040
bytes into a destination of size 260
Signed-off-by: Dexuan Cui <[email protected]>
Cc: Stephen Hemminger <[email protected]>
Cc: K. Y. Srinivasan <[email protected]>
Signed-off-by: K. Y. Srinivasan <[email protected]>
---
tools/hv/hv_fcopy_daemon.c | 3 ++-
tools/hv/hv_vss_daemon.c | 1 +
2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/tools/hv/hv_fcopy_daemon.c b/tools/hv/hv_fcopy_daemon.c
index 457a1521f32f..785f4e95148c 100644
--- a/tools/hv/hv_fcopy_daemon.c
+++ b/tools/hv/hv_fcopy_daemon.c
@@ -23,13 +23,14 @@
#include <unistd.h>
#include <errno.h>
#include <linux/hyperv.h>
+#include <linux/limits.h>
#include <syslog.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <getopt.h>
static int target_fd;
-static char target_fname[W_MAX_PATH];
+static char target_fname[PATH_MAX];
static unsigned long long filesize;
static int hv_start_fcopy(struct hv_start_fcopy *smsg)
diff --git a/tools/hv/hv_vss_daemon.c b/tools/hv/hv_vss_daemon.c
index b2b4ebffab8c..34031a297f02 100644
--- a/tools/hv/hv_vss_daemon.c
+++ b/tools/hv/hv_vss_daemon.c
@@ -22,6 +22,7 @@
#include <sys/poll.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
+#include <sys/sysmacros.h>
#include <fcntl.h>
#include <stdio.h>
#include <mntent.h>
--
2.15.1
From: Vitaly Kuznetsov <[email protected]>
Our num_pages_onlined accounting is buggy:
1) In case we're offlining a memory block which was present at boot (e.g.
when there was no hotplug at all) we subtract 32k from 0 and as
num_pages_onlined is unsigned get a very big positive number.
2) Commit 6df8d9aaf3af ("Drivers: hv: balloon: Correctly update onlined
page count") made num_pages_onlined counter accurate on onlining but
totally incorrect on offlining for partly populated regions: no matter
how many pages were onlined and what was actually added to
num_pages_onlined counter we always subtract the full region (32k) so
again, num_pages_onlined can wrap around zero. By onlining/offlining
the same partly populated region multiple times we can make the
situation worse.
Solve these issues by doing accurate accounting on offlining: walk HAS
list, check for covered range and gaps.
Fixes: 6df8d9aaf3af ("Drivers: hv: balloon: Correctly update onlined page count")
Signed-off-by: Vitaly Kuznetsov <[email protected]>
Signed-off-by: K. Y. Srinivasan <[email protected]>
---
drivers/hv/hv_balloon.c | 82 +++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 69 insertions(+), 13 deletions(-)
diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index 5b8e1ad1bcfe..7514a32d0de4 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -576,11 +576,65 @@ static struct hv_dynmem_device dm_device;
static void post_status(struct hv_dynmem_device *dm);
#ifdef CONFIG_MEMORY_HOTPLUG
+static inline bool has_pfn_is_backed(struct hv_hotadd_state *has,
+ unsigned long pfn)
+{
+ struct hv_hotadd_gap *gap;
+
+ /* The page is not backed. */
+ if ((pfn < has->covered_start_pfn) || (pfn >= has->covered_end_pfn))
+ return false;
+
+ /* Check for gaps. */
+ list_for_each_entry(gap, &has->gap_list, list) {
+ if ((pfn >= gap->start_pfn) && (pfn < gap->end_pfn))
+ return false;
+ }
+
+ return true;
+}
+
+static unsigned long hv_page_offline_check(unsigned long start_pfn,
+ unsigned long nr_pages)
+{
+ unsigned long pfn = start_pfn, count = 0;
+ struct hv_hotadd_state *has;
+ bool found;
+
+ while (pfn < start_pfn + nr_pages) {
+ /*
+ * Search for HAS which covers the pfn and when we find one
+ * count how many consequitive PFNs are covered.
+ */
+ found = false;
+ list_for_each_entry(has, &dm_device.ha_region_list, list) {
+ while ((pfn >= has->start_pfn) &&
+ (pfn < has->end_pfn) &&
+ (pfn < start_pfn + nr_pages)) {
+ found = true;
+ if (has_pfn_is_backed(has, pfn))
+ count++;
+ pfn++;
+ }
+ }
+
+ /*
+ * This PFN is not in any HAS (e.g. we're offlining a region
+ * which was present at boot), no need to account for it. Go
+ * to the next one.
+ */
+ if (!found)
+ pfn++;
+ }
+
+ return count;
+}
+
static int hv_memory_notifier(struct notifier_block *nb, unsigned long val,
void *v)
{
struct memory_notify *mem = (struct memory_notify *)v;
- unsigned long flags;
+ unsigned long flags, pfn_count;
switch (val) {
case MEM_ONLINE:
@@ -593,7 +647,19 @@ static int hv_memory_notifier(struct notifier_block *nb, unsigned long val,
case MEM_OFFLINE:
spin_lock_irqsave(&dm_device.ha_lock, flags);
- dm_device.num_pages_onlined -= mem->nr_pages;
+ pfn_count = hv_page_offline_check(mem->start_pfn,
+ mem->nr_pages);
+ if (pfn_count <= dm_device.num_pages_onlined) {
+ dm_device.num_pages_onlined -= pfn_count;
+ } else {
+ /*
+ * We're offlining more pages than we managed to online.
+ * This is unexpected. In any case don't let
+ * num_pages_onlined wrap around zero.
+ */
+ WARN_ON_ONCE(1);
+ dm_device.num_pages_onlined = 0;
+ }
spin_unlock_irqrestore(&dm_device.ha_lock, flags);
break;
case MEM_GOING_ONLINE:
@@ -612,19 +678,9 @@ static struct notifier_block hv_memory_nb = {
/* Check if the particular page is backed and can be onlined and online it. */
static void hv_page_online_one(struct hv_hotadd_state *has, struct page *pg)
{
- struct hv_hotadd_gap *gap;
- unsigned long pfn = page_to_pfn(pg);
-
- /* The page is not backed. */
- if ((pfn < has->covered_start_pfn) || (pfn >= has->covered_end_pfn))
+ if (!has_pfn_is_backed(has, page_to_pfn(pg)))
return;
- /* Check for gaps. */
- list_for_each_entry(gap, &has->gap_list, list) {
- if ((pfn >= gap->start_pfn) && (pfn < gap->end_pfn))
- return;
- }
-
/* This frame is currently backed; online the page. */
__online_page_set_limits(pg);
__online_page_increment_counters(pg);
--
2.15.1
From: Olaf Hering <[email protected]>
The usage of strchr requires inclusion of string.h.
Fixes: 0c38cda64aec ("tools: hv: remove unnecessary header files and netlink related code")
Signed-off-by: Olaf Hering <[email protected]>
Signed-off-by: K. Y. Srinivasan <[email protected]>
---
tools/hv/hv_fcopy_daemon.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/tools/hv/hv_fcopy_daemon.c b/tools/hv/hv_fcopy_daemon.c
index 785f4e95148c..d78aed86af09 100644
--- a/tools/hv/hv_fcopy_daemon.c
+++ b/tools/hv/hv_fcopy_daemon.c
@@ -21,6 +21,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
+#include <string.h>
#include <errno.h>
#include <linux/hyperv.h>
#include <linux/limits.h>
--
2.15.1
From: Jia-Ju Bai <[email protected]>
The kzalloc function is called with GFP_ATOMIC.
But according to driver call graph, it is not in atomic context,
namely no spinlock is held nor in an interrupt handler.
This GFP_ATOMIC is unnecessary, and replace with GFP_KERNEL.
Signed-off-by: Jia-Ju Bai <[email protected]>
Reviewed-by: Vitaly Kuznetsov <[email protected]>
Signed-off-by: K. Y. Srinivasan <[email protected]>
---
drivers/hv/hv.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index fe96aab9e794..b6cacc4cccf2 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -147,7 +147,7 @@ int hv_synic_alloc(void)
int cpu;
hv_context.hv_numa_map = kzalloc(sizeof(struct cpumask) * nr_node_ids,
- GFP_ATOMIC);
+ GFP_KERNEL);
if (hv_context.hv_numa_map == NULL) {
pr_err("Unable to allocate NUMA map\n");
goto err;
--
2.15.1
From: Haiyang Zhang <[email protected]>
The comments doesn't match what the current code does, also have a
typo. This patch corrects them.
Signed-off-by: Haiyang Zhang <[email protected]>
Signed-off-by: K. Y. Srinivasan <[email protected]>
---
drivers/hv/channel_mgmt.c | 6 ++----
include/linux/hyperv.h | 2 +-
2 files changed, 3 insertions(+), 5 deletions(-)
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index c21020b69114..c6d9d19bc04e 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -596,10 +596,8 @@ static int next_numa_node_id;
/*
* Starting with Win8, we can statically distribute the incoming
* channel interrupt load by binding a channel to VCPU.
- * We do this in a hierarchical fashion:
- * First distribute the primary channels across available NUMA nodes
- * and then distribute the subchannels amongst the CPUs in the NUMA
- * node assigned to the primary channel.
+ * We distribute the interrupt loads to one or more NUMA nodes based on
+ * the channel's affinity_policy.
*
* For pre-win8 hosts or non-performance critical channels we assign the
* first CPU in the first NUMA node.
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 93bd6fcd6e62..2048f3c3b68a 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -844,7 +844,7 @@ struct vmbus_channel {
/*
* NUMA distribution policy:
- * We support teo policies:
+ * We support two policies:
* 1) Balanced: Here all performance critical channels are
* distributed evenly amongst all the NUMA nodes.
* This policy will be the default policy.
--
2.15.1
From: Vitaly Kuznetsov <[email protected]>
Hyper-V balloon driver makes non-trivial calculations to convert Linux's
representation of free/used memory to what Hyper-V host expects to see. Add
a tracepoint to see what's being sent and where the data comes from.
Signed-off-by: Vitaly Kuznetsov <[email protected]>
Signed-off-by: K. Y. Srinivasan <[email protected]>
---
drivers/hv/Makefile | 1 +
drivers/hv/hv_balloon.c | 6 ++++++
drivers/hv/hv_trace_balloon.h | 48 +++++++++++++++++++++++++++++++++++++++++++
3 files changed, 55 insertions(+)
create mode 100644 drivers/hv/hv_trace_balloon.h
diff --git a/drivers/hv/Makefile b/drivers/hv/Makefile
index 14c22786b519..a1eec7177c2d 100644
--- a/drivers/hv/Makefile
+++ b/drivers/hv/Makefile
@@ -4,6 +4,7 @@ obj-$(CONFIG_HYPERV_UTILS) += hv_utils.o
obj-$(CONFIG_HYPERV_BALLOON) += hv_balloon.o
CFLAGS_hv_trace.o = -I$(src)
+CFLAGS_hv_balloon.o = -I$(src)
hv_vmbus-y := vmbus_drv.o \
hv.o connection.o channel.o \
diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index 7514a32d0de4..b3e9f13f8bc3 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -34,6 +34,9 @@
#include <linux/hyperv.h>
+#define CREATE_TRACE_POINTS
+#include "hv_trace_balloon.h"
+
/*
* We begin with definitions supporting the Dynamic Memory protocol
* with the host.
@@ -1159,6 +1162,9 @@ static void post_status(struct hv_dynmem_device *dm)
dm->num_pages_added - dm->num_pages_onlined : 0) +
compute_balloon_floor();
+ trace_balloon_status(status.num_avail, status.num_committed,
+ vm_memory_committed(), dm->num_pages_ballooned,
+ dm->num_pages_added, dm->num_pages_onlined);
/*
* If our transaction ID is no longer current, just don't
* send the status. This can happen if we were interrupted
diff --git a/drivers/hv/hv_trace_balloon.h b/drivers/hv/hv_trace_balloon.h
new file mode 100644
index 000000000000..93082888aec3
--- /dev/null
+++ b/drivers/hv/hv_trace_balloon.h
@@ -0,0 +1,48 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM hyperv
+
+#if !defined(_HV_TRACE_BALLOON_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _HV_TRACE_BALLOON_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(balloon_status,
+ TP_PROTO(u64 available, u64 committed,
+ unsigned long vm_memory_committed,
+ unsigned long pages_ballooned,
+ unsigned long pages_added,
+ unsigned long pages_onlined),
+ TP_ARGS(available, committed, vm_memory_committed,
+ pages_ballooned, pages_added, pages_onlined),
+ TP_STRUCT__entry(
+ __field(u64, available)
+ __field(u64, committed)
+ __field(unsigned long, vm_memory_committed)
+ __field(unsigned long, pages_ballooned)
+ __field(unsigned long, pages_added)
+ __field(unsigned long, pages_onlined)
+ ),
+ TP_fast_assign(
+ __entry->available = available;
+ __entry->committed = committed;
+ __entry->vm_memory_committed = vm_memory_committed;
+ __entry->pages_ballooned = pages_ballooned;
+ __entry->pages_added = pages_added;
+ __entry->pages_onlined = pages_onlined;
+ ),
+ TP_printk("available %lld, committed %lld; vm_memory_committed %ld;"
+ " pages_ballooned %ld, pages_added %ld, pages_onlined %ld",
+ __entry->available, __entry->committed,
+ __entry->vm_memory_committed, __entry->pages_ballooned,
+ __entry->pages_added, __entry->pages_onlined
+ )
+ );
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE hv_trace_balloon
+#endif /* _HV_TRACE_BALLOON_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
--
2.15.1
From: Michael Kelley <[email protected]>
The 2016 version of Hyper-V offers the option to operate the guest VM
per-vcpu stimer's in Direct Mode, which means the timer interupts on its
own vector rather than queueing a VMbus message. Direct Mode reduces
timer processing overhead in both the hypervisor and the guest, and
avoids having timer interrupts pollute the VMbus interrupt stream for
the synthetic NIC and storage. This patch enables Direct Mode by
default on stimer0 when running on a version of Hyper-V that supports
it.
In prep for coming support of Hyper-V on ARM64, the arch independent
portion of the code contains calls to routines that will be populated
on ARM64 but are not needed and do nothing on x86.
Signed-off-by: Michael Kelley <[email protected]>
Signed-off-by: K. Y. Srinivasan <[email protected]>
---
arch/x86/entry/entry_32.S | 3 ++
arch/x86/entry/entry_64.S | 3 ++
arch/x86/include/asm/hardirq.h | 1 +
arch/x86/include/asm/irq_vectors.h | 3 +-
arch/x86/include/asm/mshyperv.h | 13 +++++++++
arch/x86/include/uapi/asm/hyperv.h | 3 ++
arch/x86/kernel/cpu/mshyperv.c | 40 ++++++++++++++++++++++++++
arch/x86/kernel/irq.c | 7 +++++
drivers/hv/hv.c | 59 ++++++++++++++++++++++++++++++++++++--
drivers/hv/hyperv_vmbus.h | 4 ++-
10 files changed, 131 insertions(+), 5 deletions(-)
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 16c2c022540d..e12d60873376 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -903,6 +903,9 @@ BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
BUILD_INTERRUPT3(hyperv_reenlightenment_vector, HYPERV_REENLIGHTENMENT_VECTOR,
hyperv_reenlightenment_intr)
+BUILD_INTERRUPT3(hv_stimer0_callback_vector, HYPERV_STIMER0_VECTOR,
+ hv_stimer0_vector_handler)
+
#endif /* CONFIG_HYPERV */
ENTRY(page_fault)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 8971bd64d515..606118459133 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1106,6 +1106,9 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
apicinterrupt3 HYPERV_REENLIGHTENMENT_VECTOR \
hyperv_reenlightenment_vector hyperv_reenlightenment_intr
+
+apicinterrupt3 HYPERV_STIMER0_VECTOR \
+ hv_stimer0_callback_vector hv_stimer0_vector_handler
#endif /* CONFIG_HYPERV */
idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 7c341a74ec8c..5ea2afd4c871 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -40,6 +40,7 @@ typedef struct {
#endif
#if IS_ENABLED(CONFIG_HYPERV)
unsigned int irq_hv_reenlightenment_count;
+ unsigned int hyperv_stimer0_count;
#endif
} ____cacheline_aligned irq_cpustat_t;
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index e71c1120426b..404c5fdff859 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -106,9 +106,10 @@
#if IS_ENABLED(CONFIG_HYPERV)
#define HYPERV_REENLIGHTENMENT_VECTOR 0xee
+#define HYPERV_STIMER0_VECTOR 0xed
#endif
-#define LOCAL_TIMER_VECTOR 0xed
+#define LOCAL_TIMER_VECTOR 0xec
#define NR_VECTORS 256
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 25283f7eb299..e73c4d0c06ad 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -173,6 +173,19 @@ void hv_remove_kexec_handler(void);
void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs));
void hv_remove_crash_handler(void);
+/*
+ * Routines for stimer0 Direct Mode handling.
+ * On x86/x64, there are no percpu actions to take.
+ */
+void hv_stimer0_vector_handler(struct pt_regs *regs);
+void hv_stimer0_callback_vector(void);
+int hv_setup_stimer0_irq(int *irq, int *vector, void (*handler)(void));
+void hv_remove_stimer0_irq(int irq);
+
+static inline void hv_enable_stimer0_percpu_irq(int irq) {}
+static inline void hv_disable_stimer0_percpu_irq(int irq) {}
+
+
#if IS_ENABLED(CONFIG_HYPERV)
extern struct clocksource *hyperv_cs;
extern void *hv_hypercall_pg;
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index 099414345865..6c0c3a3b631c 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -77,6 +77,9 @@
/* Crash MSR available */
#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE (1 << 10)
+/* stimer Direct Mode is available */
+#define HV_X64_STIMER_DIRECT_MODE_AVAILABLE (1 << 19)
+
/*
* Feature identification: EBX indicates which flags were specified at
* partition creation. The format is the same as the partition creation
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 9340f41ce8d3..4488cf0dd499 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -37,6 +37,7 @@ EXPORT_SYMBOL_GPL(ms_hyperv);
#if IS_ENABLED(CONFIG_HYPERV)
static void (*vmbus_handler)(void);
+static void (*hv_stimer0_handler)(void);
static void (*hv_kexec_handler)(void);
static void (*hv_crash_handler)(struct pt_regs *regs);
@@ -69,6 +70,41 @@ void hv_remove_vmbus_irq(void)
EXPORT_SYMBOL_GPL(hv_setup_vmbus_irq);
EXPORT_SYMBOL_GPL(hv_remove_vmbus_irq);
+/*
+ * Routines to do per-architecture handling of stimer0
+ * interrupts when in Direct Mode
+ */
+
+__visible void __irq_entry hv_stimer0_vector_handler(struct pt_regs *regs)
+{
+ struct pt_regs *old_regs = set_irq_regs(regs);
+
+ entering_irq();
+ inc_irq_stat(hyperv_stimer0_count);
+ if (hv_stimer0_handler)
+ hv_stimer0_handler();
+ ack_APIC_irq();
+
+ exiting_irq();
+ set_irq_regs(old_regs);
+}
+
+int hv_setup_stimer0_irq(int *irq, int *vector, void (*handler)(void))
+{
+ *vector = HYPERV_STIMER0_VECTOR;
+ *irq = 0; /* Unused on x86/x64 */
+ hv_stimer0_handler = handler;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(hv_setup_stimer0_irq);
+
+void hv_remove_stimer0_irq(int irq)
+{
+ /* We have no way to deallocate the interrupt gate */
+ hv_stimer0_handler = NULL;
+}
+EXPORT_SYMBOL_GPL(hv_remove_stimer0_irq);
+
void hv_setup_kexec_handler(void (*handler)(void))
{
hv_kexec_handler = handler;
@@ -257,6 +293,10 @@ static void __init ms_hyperv_init_platform(void)
alloc_intr_gate(HYPERV_REENLIGHTENMENT_VECTOR,
hyperv_reenlightenment_vector);
+ /* Setup the IDT for stimer0 */
+ if (ms_hyperv.misc_features & HV_X64_STIMER_DIRECT_MODE_AVAILABLE)
+ alloc_intr_gate(HYPERV_STIMER0_VECTOR,
+ hv_stimer0_callback_vector);
#endif
}
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 45fb4d2565f8..328d027d829d 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -150,6 +150,13 @@ int arch_show_interrupts(struct seq_file *p, int prec)
irq_stats(j)->irq_hv_reenlightenment_count);
seq_puts(p, " Hyper-V reenlightenment interrupts\n");
}
+ if (test_bit(HYPERV_STIMER0_VECTOR, system_vectors)) {
+ seq_printf(p, "%*s: ", prec, "HVS");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ",
+ irq_stats(j)->hyperv_stimer0_count);
+ seq_puts(p, " Hyper-V stimer0 interrupts\n");
+ }
#endif
seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
#if defined(CONFIG_X86_IO_APIC)
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index 31142b72f1b9..b1f6793acf4c 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -27,7 +27,7 @@
#include <linux/vmalloc.h>
#include <linux/hyperv.h>
#include <linux/version.h>
-#include <linux/interrupt.h>
+#include <linux/random.h>
#include <linux/clockchips.h>
#include <asm/hyperv.h>
#include <asm/mshyperv.h>
@@ -38,6 +38,17 @@ struct hv_context hv_context = {
.synic_initialized = false,
};
+/*
+ * If false, we're using the old mechanism for stimer0 interrupts
+ * where it sends a VMbus message when it expires. The old
+ * mechanism is used when running on older versions of Hyper-V
+ * that don't support Direct Mode. While Hyper-V provides
+ * four stimer's per CPU, Linux uses only stimer0.
+ */
+static bool direct_mode_enabled;
+static int stimer0_irq;
+static int stimer0_vector;
+
#define HV_TIMER_FREQUENCY (10 * 1000 * 1000) /* 100ns period */
#define HV_MAX_MAX_DELTA_TICKS 0xffffffff
#define HV_MIN_DELTA_TICKS 1
@@ -53,6 +64,8 @@ int hv_init(void)
if (!hv_context.cpu_context)
return -ENOMEM;
+ direct_mode_enabled = ms_hyperv.misc_features &
+ HV_X64_STIMER_DIRECT_MODE_AVAILABLE;
return 0;
}
@@ -91,6 +104,21 @@ int hv_post_message(union hv_connection_id connection_id,
return status & 0xFFFF;
}
+/*
+ * ISR for when stimer0 is operating in Direct Mode. Direct Mode
+ * does not use VMbus or any VMbus messages, so process here and not
+ * in the VMbus driver code.
+ */
+
+static void hv_stimer0_isr(void)
+{
+ struct hv_per_cpu_context *hv_cpu;
+
+ hv_cpu = this_cpu_ptr(hv_context.cpu_context);
+ hv_cpu->clk_evt->event_handler(hv_cpu->clk_evt);
+ add_interrupt_randomness(stimer0_vector, 0);
+}
+
static int hv_ce_set_next_event(unsigned long delta,
struct clock_event_device *evt)
{
@@ -108,6 +136,8 @@ static int hv_ce_shutdown(struct clock_event_device *evt)
{
hv_init_timer(HV_X64_MSR_STIMER0_COUNT, 0);
hv_init_timer_config(HV_X64_MSR_STIMER0_CONFIG, 0);
+ if (direct_mode_enabled)
+ hv_disable_stimer0_percpu_irq(stimer0_irq);
return 0;
}
@@ -116,11 +146,26 @@ static int hv_ce_set_oneshot(struct clock_event_device *evt)
{
union hv_timer_config timer_cfg;
+ timer_cfg.as_uint64 = 0;
timer_cfg.enable = 1;
timer_cfg.auto_enable = 1;
- timer_cfg.sintx = VMBUS_MESSAGE_SINT;
+ if (direct_mode_enabled) {
+ /*
+ * When it expires, the timer will directly interrupt
+ * on the specified hardware vector/IRQ.
+ */
+ timer_cfg.direct_mode = 1;
+ timer_cfg.apic_vector = stimer0_vector;
+ hv_enable_stimer0_percpu_irq(stimer0_irq);
+ } else {
+ /*
+ * When it expires, the timer will generate a VMbus message,
+ * to be handled by the normal VMbus interrupt handler.
+ */
+ timer_cfg.direct_mode = 0;
+ timer_cfg.sintx = VMBUS_MESSAGE_SINT;
+ }
hv_init_timer_config(HV_X64_MSR_STIMER0_CONFIG, timer_cfg.as_uint64);
-
return 0;
}
@@ -191,6 +236,11 @@ int hv_synic_alloc(void)
INIT_LIST_HEAD(&hv_cpu->chan_list);
}
+ if (direct_mode_enabled &&
+ hv_setup_stimer0_irq(&stimer0_irq, &stimer0_vector,
+ hv_stimer0_isr))
+ goto err;
+
return 0;
err:
return -ENOMEM;
@@ -292,6 +342,9 @@ void hv_synic_clockevents_cleanup(void)
if (!(ms_hyperv.features & HV_X64_MSR_SYNTIMER_AVAILABLE))
return;
+ if (direct_mode_enabled)
+ hv_remove_stimer0_irq(stimer0_irq);
+
for_each_present_cpu(cpu) {
struct hv_per_cpu_context *hv_cpu
= per_cpu_ptr(hv_context.cpu_context, cpu);
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 22300ec7b556..36d34fe3ccb3 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -57,7 +57,9 @@ union hv_timer_config {
u64 periodic:1;
u64 lazy:1;
u64 auto_enable:1;
- u64 reserved_z0:12;
+ u64 apic_vector:8;
+ u64 direct_mode:1;
+ u64 reserved_z0:3;
u64 sintx:4;
u64 reserved_z1:44;
};
--
2.15.1
On Sun, Mar 04, 2018 at 10:17:16PM -0700, [email protected] wrote:
> From: "[email protected]" <[email protected]>
That's not a "name" :(
Please fix and resend.
thanks,
greg k-h