In order to support Linux to run as a pv guest on machines with huge
memory (>16TB) the kernel has to support grant table interface v2, as
v1 is limited to 32 bit frame numbers.
This series re-adds that support (it has been removed in 2015) and
restricts usage of v2 to the features of v1 in order to support
migration to hosts which only support v1.
V2 is selected only in case the host has memory frames populated
above the 16TB border.
Juergen Gross (4):
xen: re-introduce support for grant v2 interface
xen: limit grant v2 interface to the v1 functionality
xen: add grant interface version dependent constants to gnttab_ops
xen: select grant interface version
arch/arm/xen/grant-table.c | 9 +-
arch/x86/xen/grant-table.c | 60 +++++++++++-
drivers/xen/grant-table.c | 234 ++++++++++++++++++++++++++++++++++++++++-----
include/xen/grant_table.h | 5 +-
4 files changed, 276 insertions(+), 32 deletions(-)
--
2.12.3
The grant v2 support was removed from the kernel with
commit 438b33c7145ca8a5131a30c36d8f59bce119a19a ("xen/grant-table:
remove support for V2 tables") as the higher memory footprint of v2
grants resulted in less grants being possible for a kernel compared
to the v1 grant interface.
As machines with more than 16TB of memory are expected to be more
common in the near future support of grant v2 is mandatory in order
to be able to run a Xen pv domain at any memory location.
So re-add grant v2 support basically by reverting above commit.
Signed-off-by: Juergen Gross <[email protected]>
---
arch/arm/xen/grant-table.c | 9 +-
arch/x86/xen/grant-table.c | 60 ++++++++-
drivers/xen/grant-table.c | 312 ++++++++++++++++++++++++++++++++++++++++++++-
include/xen/grant_table.h | 30 ++++-
4 files changed, 398 insertions(+), 13 deletions(-)
diff --git a/arch/arm/xen/grant-table.c b/arch/arm/xen/grant-table.c
index e43791829ace..91cf08ba1e95 100644
--- a/arch/arm/xen/grant-table.c
+++ b/arch/arm/xen/grant-table.c
@@ -45,7 +45,14 @@ void arch_gnttab_unmap(void *shared, unsigned long nr_gframes)
return;
}
-int arch_gnttab_init(unsigned long nr_shared)
+int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes,
+ unsigned long max_nr_gframes,
+ grant_status_t **__shared)
+{
+ return -ENOSYS;
+}
+
+int arch_gnttab_init(unsigned long nr_shared, unsigned long nr_status)
{
return 0;
}
diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c
index 809b6c812654..92ccc718152d 100644
--- a/arch/x86/xen/grant-table.c
+++ b/arch/x86/xen/grant-table.c
@@ -49,7 +49,7 @@
static struct gnttab_vm_area {
struct vm_struct *area;
pte_t **ptes;
-} gnttab_shared_vm_area;
+} gnttab_shared_vm_area, gnttab_status_vm_area;
int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
unsigned long max_nr_gframes,
@@ -73,16 +73,43 @@ int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
return 0;
}
+int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes,
+ unsigned long max_nr_gframes,
+ grant_status_t **__shared)
+{
+ grant_status_t *shared = *__shared;
+ unsigned long addr;
+ unsigned long i;
+
+ if (shared == NULL)
+ *__shared = shared = gnttab_status_vm_area.area->addr;
+
+ addr = (unsigned long)shared;
+
+ for (i = 0; i < nr_gframes; i++) {
+ set_pte_at(&init_mm, addr, gnttab_status_vm_area.ptes[i],
+ mfn_pte(frames[i], PAGE_KERNEL));
+ addr += PAGE_SIZE;
+ }
+
+ return 0;
+}
+
void arch_gnttab_unmap(void *shared, unsigned long nr_gframes)
{
+ pte_t **ptes;
unsigned long addr;
unsigned long i;
+ if (shared == gnttab_status_vm_area.area->addr)
+ ptes = gnttab_status_vm_area.ptes;
+ else
+ ptes = gnttab_shared_vm_area.ptes;
+
addr = (unsigned long)shared;
for (i = 0; i < nr_gframes; i++) {
- set_pte_at(&init_mm, addr, gnttab_shared_vm_area.ptes[i],
- __pte(0));
+ set_pte_at(&init_mm, addr, ptes[i], __pte(0));
addr += PAGE_SIZE;
}
}
@@ -102,12 +129,35 @@ static int arch_gnttab_valloc(struct gnttab_vm_area *area, unsigned nr_frames)
return 0;
}
-int arch_gnttab_init(unsigned long nr_shared)
+static void arch_gnttab_vfree(struct gnttab_vm_area *area)
{
+ free_vm_area(area->area);
+ kfree(area->ptes);
+}
+
+int arch_gnttab_init(unsigned long nr_shared, unsigned long nr_status)
+{
+ int ret;
+
if (!xen_pv_domain())
return 0;
- return arch_gnttab_valloc(&gnttab_shared_vm_area, nr_shared);
+ ret = arch_gnttab_valloc(&gnttab_shared_vm_area, nr_shared);
+ if (ret < 0)
+ return ret;
+
+ /*
+ * Always allocate the space for the status frames in case
+ * we're migrated to a host with V2 support.
+ */
+ ret = arch_gnttab_valloc(&gnttab_status_vm_area, nr_status);
+ if (ret < 0)
+ goto err;
+
+ return 0;
+err:
+ arch_gnttab_vfree(&gnttab_shared_vm_area);
+ return -ENOMEM;
}
#ifdef CONFIG_XEN_PVH
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 2c6a9114d332..65c4bdb0b463 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -71,6 +71,7 @@ struct grant_frames xen_auto_xlat_grant_frames;
static union {
struct grant_entry_v1 *v1;
+ union grant_entry_v2 *v2;
void *addr;
} gnttab_shared;
@@ -121,6 +122,29 @@ struct gnttab_ops {
* by bit operations.
*/
int (*query_foreign_access)(grant_ref_t ref);
+ /*
+ * Grant a domain to access a range of bytes within the page referred by
+ * an available grant entry. Ref parameter is reference of a grant entry
+ * which will be sub-page accessed, domid is id of grantee domain, frame
+ * is frame address of subpage grant, flags is grant type and flag
+ * information, page_off is offset of the range of bytes, and length is
+ * length of bytes to be accessed.
+ */
+ void (*update_subpage_entry)(grant_ref_t ref, domid_t domid,
+ unsigned long frame, int flags,
+ unsigned page_off, unsigned length);
+ /*
+ * Redirect an available grant entry on domain A to another grant
+ * reference of domain B, then allow domain C to use grant reference
+ * of domain B transitively. Ref parameter is an available grant entry
+ * reference on domain A, domid is id of domain C which accesses grant
+ * entry transitively, flags is grant type and flag information,
+ * trans_domid is id of domain B whose grant entry is finally accessed
+ * transitively, trans_gref is grant entry transitive reference of
+ * domain B.
+ */
+ void (*update_trans_entry)(grant_ref_t ref, domid_t domid, int flags,
+ domid_t trans_domid, grant_ref_t trans_gref);
};
struct unmap_refs_callback_data {
@@ -130,6 +154,9 @@ struct unmap_refs_callback_data {
static const struct gnttab_ops *gnttab_interface;
+/* This reflects status of grant entries, so act as a global value. */
+static grant_status_t *grstatus;
+
static int grant_table_version;
static int grefs_per_grant_frame;
@@ -138,6 +165,7 @@ static struct gnttab_free_callback *gnttab_free_callback_list;
static int gnttab_expand(unsigned int req_entries);
#define RPP (PAGE_SIZE / sizeof(grant_ref_t))
+#define SPP (PAGE_SIZE / sizeof(grant_status_t))
static inline grant_ref_t *__gnttab_entry(grant_ref_t entry)
{
@@ -210,7 +238,7 @@ static void put_free_entry(grant_ref_t ref)
}
/*
- * Following applies to gnttab_update_entry_v1.
+ * Following applies to gnttab_update_entry_v1 and gnttab_update_entry_v2.
* Introducing a valid entry into the grant table:
* 1. Write ent->domid.
* 2. Write ent->frame:
@@ -229,6 +257,15 @@ static void gnttab_update_entry_v1(grant_ref_t ref, domid_t domid,
gnttab_shared.v1[ref].flags = flags;
}
+static void gnttab_update_entry_v2(grant_ref_t ref, domid_t domid,
+ unsigned long frame, unsigned int flags)
+{
+ gnttab_shared.v2[ref].hdr.domid = domid;
+ gnttab_shared.v2[ref].full_page.frame = frame;
+ wmb(); /* Hypervisor concurrent accesses. */
+ gnttab_shared.v2[ref].hdr.flags = GTF_permit_access | flags;
+}
+
/*
* Public grant-issuing interface functions
*/
@@ -255,11 +292,132 @@ int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
}
EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access);
+static void gnttab_update_subpage_entry_v2(grant_ref_t ref, domid_t domid,
+ unsigned long frame, int flags,
+ unsigned page_off, unsigned length)
+{
+ gnttab_shared.v2[ref].sub_page.frame = frame;
+ gnttab_shared.v2[ref].sub_page.page_off = page_off;
+ gnttab_shared.v2[ref].sub_page.length = length;
+ gnttab_shared.v2[ref].hdr.domid = domid;
+ wmb();
+ gnttab_shared.v2[ref].hdr.flags =
+ GTF_permit_access | GTF_sub_page | flags;
+}
+
+int gnttab_grant_foreign_access_subpage_ref(grant_ref_t ref, domid_t domid,
+ unsigned long frame, int flags,
+ unsigned page_off,
+ unsigned length)
+{
+ if (flags & (GTF_accept_transfer | GTF_reading |
+ GTF_writing | GTF_transitive))
+ return -EPERM;
+
+ if (gnttab_interface->update_subpage_entry == NULL)
+ return -ENOSYS;
+
+ gnttab_interface->update_subpage_entry(ref, domid, frame, flags,
+ page_off, length);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_subpage_ref);
+
+int gnttab_grant_foreign_access_subpage(domid_t domid, unsigned long frame,
+ int flags, unsigned page_off,
+ unsigned length)
+{
+ int ref, rc;
+
+ ref = get_free_entries(1);
+ if (unlikely(ref < 0))
+ return -ENOSPC;
+
+ rc = gnttab_grant_foreign_access_subpage_ref(ref, domid, frame, flags,
+ page_off, length);
+ if (rc < 0) {
+ put_free_entry(ref);
+ return rc;
+ }
+
+ return ref;
+}
+EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_subpage);
+
+bool gnttab_subpage_grants_available(void)
+{
+ return gnttab_interface->update_subpage_entry != NULL;
+}
+EXPORT_SYMBOL_GPL(gnttab_subpage_grants_available);
+
+static void gnttab_update_trans_entry_v2(grant_ref_t ref, domid_t domid,
+ int flags, domid_t trans_domid,
+ grant_ref_t trans_gref)
+{
+ gnttab_shared.v2[ref].transitive.trans_domid = trans_domid;
+ gnttab_shared.v2[ref].transitive.gref = trans_gref;
+ gnttab_shared.v2[ref].hdr.domid = domid;
+ wmb();
+ gnttab_shared.v2[ref].hdr.flags =
+ GTF_permit_access | GTF_transitive | flags;
+}
+
+int gnttab_grant_foreign_access_trans_ref(grant_ref_t ref, domid_t domid,
+ int flags, domid_t trans_domid,
+ grant_ref_t trans_gref)
+{
+ if (flags & (GTF_accept_transfer | GTF_reading |
+ GTF_writing | GTF_sub_page))
+ return -EPERM;
+
+ if (gnttab_interface->update_trans_entry == NULL)
+ return -ENOSYS;
+
+ gnttab_interface->update_trans_entry(ref, domid, flags, trans_domid,
+ trans_gref);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_trans_ref);
+
+int gnttab_grant_foreign_access_trans(domid_t domid, int flags,
+ domid_t trans_domid,
+ grant_ref_t trans_gref)
+{
+ int ref, rc;
+
+ ref = get_free_entries(1);
+ if (unlikely(ref < 0))
+ return -ENOSPC;
+
+ rc = gnttab_grant_foreign_access_trans_ref(ref, domid, flags,
+ trans_domid, trans_gref);
+ if (rc < 0) {
+ put_free_entry(ref);
+ return rc;
+ }
+
+ return ref;
+}
+EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_trans);
+
+bool gnttab_trans_grants_available(void)
+{
+ return gnttab_interface->update_trans_entry != NULL;
+}
+EXPORT_SYMBOL_GPL(gnttab_trans_grants_available);
+
static int gnttab_query_foreign_access_v1(grant_ref_t ref)
{
return gnttab_shared.v1[ref].flags & (GTF_reading|GTF_writing);
}
+static int gnttab_query_foreign_access_v2(grant_ref_t ref)
+{
+ return grstatus[ref] & (GTF_reading|GTF_writing);
+}
+
int gnttab_query_foreign_access(grant_ref_t ref)
{
return gnttab_interface->query_foreign_access(ref);
@@ -282,6 +440,29 @@ static int gnttab_end_foreign_access_ref_v1(grant_ref_t ref, int readonly)
return 1;
}
+static int gnttab_end_foreign_access_ref_v2(grant_ref_t ref, int readonly)
+{
+ gnttab_shared.v2[ref].hdr.flags = 0;
+ mb(); /* Concurrent access by hypervisor. */
+ if (grstatus[ref] & (GTF_reading|GTF_writing)) {
+ return 0;
+ } else {
+ /*
+ * The read of grstatus needs to have acquire semantics.
+ * On x86, reads already have that, and we just need to
+ * protect against compiler reorderings.
+ * On other architectures we may need a full barrier.
+ */
+#ifdef CONFIG_X86
+ barrier();
+#else
+ mb();
+#endif
+ }
+
+ return 1;
+}
+
static inline int _gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly)
{
return gnttab_interface->end_foreign_access_ref(ref, readonly);
@@ -442,6 +623,37 @@ static unsigned long gnttab_end_foreign_transfer_ref_v1(grant_ref_t ref)
return frame;
}
+static unsigned long gnttab_end_foreign_transfer_ref_v2(grant_ref_t ref)
+{
+ unsigned long frame;
+ u16 flags;
+ u16 *pflags;
+
+ pflags = &gnttab_shared.v2[ref].hdr.flags;
+
+ /*
+ * If a transfer is not even yet started, try to reclaim the grant
+ * reference and return failure (== 0).
+ */
+ while (!((flags = *pflags) & GTF_transfer_committed)) {
+ if (sync_cmpxchg(pflags, flags, 0) == flags)
+ return 0;
+ cpu_relax();
+ }
+
+ /* If a transfer is in progress then wait until it is completed. */
+ while (!(flags & GTF_transfer_completed)) {
+ flags = *pflags;
+ cpu_relax();
+ }
+
+ rmb(); /* Read the frame number /after/ reading completion status. */
+ frame = gnttab_shared.v2[ref].full_page.frame;
+ BUG_ON(frame == 0);
+
+ return frame;
+}
+
unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref)
{
return gnttab_interface->end_foreign_transfer_ref(ref);
@@ -938,6 +1150,12 @@ int gnttab_unmap_refs_sync(struct gntab_unmap_queue_data *item)
}
EXPORT_SYMBOL_GPL(gnttab_unmap_refs_sync);
+static unsigned int nr_status_frames(unsigned int nr_grant_frames)
+{
+ BUG_ON(grefs_per_grant_frame == 0);
+ return (nr_grant_frames * grefs_per_grant_frame + SPP - 1) / SPP;
+}
+
static int gnttab_map_frames_v1(xen_pfn_t *frames, unsigned int nr_gframes)
{
int rc;
@@ -955,6 +1173,55 @@ static void gnttab_unmap_frames_v1(void)
arch_gnttab_unmap(gnttab_shared.addr, nr_grant_frames);
}
+static int gnttab_map_frames_v2(xen_pfn_t *frames, unsigned int nr_gframes)
+{
+ uint64_t *sframes;
+ unsigned int nr_sframes;
+ struct gnttab_get_status_frames getframes;
+ int rc;
+
+ nr_sframes = nr_status_frames(nr_gframes);
+
+ /* No need for kzalloc as it is initialized in following hypercall
+ * GNTTABOP_get_status_frames.
+ */
+ sframes = kmalloc_array(nr_sframes, sizeof(uint64_t), GFP_ATOMIC);
+ if (!sframes)
+ return -ENOMEM;
+
+ getframes.dom = DOMID_SELF;
+ getframes.nr_frames = nr_sframes;
+ set_xen_guest_handle(getframes.frame_list, sframes);
+
+ rc = HYPERVISOR_grant_table_op(GNTTABOP_get_status_frames,
+ &getframes, 1);
+ if (rc == -ENOSYS) {
+ kfree(sframes);
+ return -ENOSYS;
+ }
+
+ BUG_ON(rc || getframes.status);
+
+ rc = arch_gnttab_map_status(sframes, nr_sframes,
+ nr_status_frames(gnttab_max_grant_frames()),
+ &grstatus);
+ BUG_ON(rc);
+ kfree(sframes);
+
+ rc = arch_gnttab_map_shared(frames, nr_gframes,
+ gnttab_max_grant_frames(),
+ &gnttab_shared.addr);
+ BUG_ON(rc);
+
+ return 0;
+}
+
+static void gnttab_unmap_frames_v2(void)
+{
+ arch_gnttab_unmap(gnttab_shared.addr, nr_grant_frames);
+ arch_gnttab_unmap(grstatus, nr_status_frames(nr_grant_frames));
+}
+
static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
{
struct gnttab_setup_table setup;
@@ -1022,13 +1289,45 @@ static const struct gnttab_ops gnttab_v1_ops = {
.query_foreign_access = gnttab_query_foreign_access_v1,
};
+static const struct gnttab_ops gnttab_v2_ops = {
+ .map_frames = gnttab_map_frames_v2,
+ .unmap_frames = gnttab_unmap_frames_v2,
+ .update_entry = gnttab_update_entry_v2,
+ .end_foreign_access_ref = gnttab_end_foreign_access_ref_v2,
+ .end_foreign_transfer_ref = gnttab_end_foreign_transfer_ref_v2,
+ .query_foreign_access = gnttab_query_foreign_access_v2,
+ .update_subpage_entry = gnttab_update_subpage_entry_v2,
+ .update_trans_entry = gnttab_update_trans_entry_v2,
+};
+
static void gnttab_request_version(void)
{
- /* Only version 1 is used, which will always be available. */
- grant_table_version = 1;
- grefs_per_grant_frame = XEN_PAGE_SIZE / sizeof(struct grant_entry_v1);
- gnttab_interface = &gnttab_v1_ops;
+ int rc;
+ struct gnttab_set_version gsv;
+ gsv.version = 1;
+
+ rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gsv, 1);
+ if (rc == 0 && gsv.version == 2) {
+ grant_table_version = 2;
+ grefs_per_grant_frame = XEN_PAGE_SIZE /
+ sizeof(union grant_entry_v2);
+ gnttab_interface = &gnttab_v2_ops;
+ } else if (grant_table_version == 2) {
+ /*
+ * If we've already used version 2 features,
+ * but then suddenly discover that they're not
+ * available (e.g. migrating to an older
+ * version of Xen), almost unbounded badness
+ * can happen.
+ */
+ panic("we need grant tables version 2, but only version 1 is available");
+ } else {
+ grant_table_version = 1;
+ grefs_per_grant_frame = XEN_PAGE_SIZE /
+ sizeof(struct grant_entry_v1);
+ gnttab_interface = &gnttab_v1_ops;
+ }
pr_info("Grant tables using version %d layout\n", grant_table_version);
}
@@ -1122,7 +1421,8 @@ int gnttab_init(void)
}
}
- ret = arch_gnttab_init(max_nr_grant_frames);
+ ret = arch_gnttab_init(max_nr_grant_frames,
+ nr_status_frames(max_nr_grant_frames));
if (ret < 0)
goto ini_nomem;
diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h
index 34b1379f9777..dd6c7a32ee32 100644
--- a/include/xen/grant_table.h
+++ b/include/xen/grant_table.h
@@ -84,6 +84,24 @@ int gnttab_resume(void);
int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
int readonly);
+int gnttab_grant_foreign_access_subpage(domid_t domid, unsigned long frame,
+ int flags, unsigned page_off,
+ unsigned length);
+int gnttab_grant_foreign_access_trans(domid_t domid, int flags,
+ domid_t trans_domid,
+ grant_ref_t trans_gref);
+
+/*
+ * Are sub-page grants available on this version of Xen? Returns true if they
+ * are, and false if they're not.
+ */
+bool gnttab_subpage_grants_available(void);
+
+/*
+ * Are transitive grants available on this version of Xen? Returns true if they
+ * are, and false if they're not.
+ */
+bool gnttab_trans_grants_available(void);
/*
* End access through the given grant reference, iff the grant entry is no
@@ -130,6 +148,13 @@ void gnttab_cancel_free_callback(struct gnttab_free_callback *callback);
void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
unsigned long frame, int readonly);
+int gnttab_grant_foreign_access_subpage_ref(grant_ref_t ref, domid_t domid,
+ unsigned long frame, int flags,
+ unsigned page_off,
+ unsigned length);
+int gnttab_grant_foreign_access_trans_ref(grant_ref_t ref, domid_t domid,
+ int flags, domid_t trans_domid,
+ grant_ref_t trans_gref);
/* Give access to the first 4K of the page */
static inline void gnttab_page_grant_foreign_access_ref_one(
@@ -174,10 +199,13 @@ gnttab_set_unmap_op(struct gnttab_unmap_grant_ref *unmap, phys_addr_t addr,
unmap->dev_bus_addr = 0;
}
-int arch_gnttab_init(unsigned long nr_shared);
+int arch_gnttab_init(unsigned long nr_shared, unsigned long nr_status);
int arch_gnttab_map_shared(xen_pfn_t *frames, unsigned long nr_gframes,
unsigned long max_nr_gframes,
void **__shared);
+int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes,
+ unsigned long max_nr_gframes,
+ grant_status_t **__shared);
void arch_gnttab_unmap(void *shared, unsigned long nr_gframes);
struct grant_frames {
--
2.12.3
Based on the maximum page number of the host select either grant v1 or
grant v2.
For testing purposes add a way to specify the grant interface version
via a boot parameter.
Signed-off-by: Juergen Gross <[email protected]>
---
drivers/xen/grant-table.c | 13 +++++++++++--
1 file changed, 11 insertions(+), 2 deletions(-)
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 573af785c425..c8479cb4c0dc 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -43,6 +43,7 @@
#include <linux/hardirq.h>
#include <linux/workqueue.h>
#include <linux/ratelimit.h>
+#include <linux/moduleparam.h>
#include <xen/xen.h>
#include <xen/interface/xen.h>
@@ -68,6 +69,8 @@ static int gnttab_free_count;
static grant_ref_t gnttab_free_head;
static DEFINE_SPINLOCK(gnttab_list_lock);
struct grant_frames xen_auto_xlat_grant_frames;
+static unsigned int xen_gnttab_version;
+module_param_named(version, xen_gnttab_version, uint, 0);
static union {
struct grant_entry_v1 *v1;
@@ -1191,10 +1194,16 @@ static const struct gnttab_ops gnttab_v2_ops = {
static void gnttab_request_version(void)
{
- int rc;
+ long rc;
struct gnttab_set_version gsv;
- gsv.version = 1;
+ rc = HYPERVISOR_memory_op(XENMEM_maximum_ram_page, NULL);
+ if (rc < 0 || !(rc >> 32))
+ gsv.version = 1;
+ else
+ gsv.version = 2;
+ if (xen_gnttab_version >= 1 && xen_gnttab_version <= 2)
+ gsv.version = xen_gnttab_version;
rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gsv, 1);
if (rc == 0 && gsv.version == 2)
--
2.12.3
As there is currently no user for sub-page grants or transient grants
remove that functionality. This at once makes it possible to switch
from grant v2 to grant v1 without restrictions, as there is no loss of
functionality other than the limited frame number width related to
the switch.
Signed-off-by: Juergen Gross <[email protected]>
---
drivers/xen/grant-table.c | 138 ----------------------------------------------
include/xen/grant_table.h | 25 ---------
2 files changed, 163 deletions(-)
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 65c4bdb0b463..02451a696cc5 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -123,17 +123,6 @@ struct gnttab_ops {
*/
int (*query_foreign_access)(grant_ref_t ref);
/*
- * Grant a domain to access a range of bytes within the page referred by
- * an available grant entry. Ref parameter is reference of a grant entry
- * which will be sub-page accessed, domid is id of grantee domain, frame
- * is frame address of subpage grant, flags is grant type and flag
- * information, page_off is offset of the range of bytes, and length is
- * length of bytes to be accessed.
- */
- void (*update_subpage_entry)(grant_ref_t ref, domid_t domid,
- unsigned long frame, int flags,
- unsigned page_off, unsigned length);
- /*
* Redirect an available grant entry on domain A to another grant
* reference of domain B, then allow domain C to use grant reference
* of domain B transitively. Ref parameter is an available grant entry
@@ -292,122 +281,6 @@ int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
}
EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access);
-static void gnttab_update_subpage_entry_v2(grant_ref_t ref, domid_t domid,
- unsigned long frame, int flags,
- unsigned page_off, unsigned length)
-{
- gnttab_shared.v2[ref].sub_page.frame = frame;
- gnttab_shared.v2[ref].sub_page.page_off = page_off;
- gnttab_shared.v2[ref].sub_page.length = length;
- gnttab_shared.v2[ref].hdr.domid = domid;
- wmb();
- gnttab_shared.v2[ref].hdr.flags =
- GTF_permit_access | GTF_sub_page | flags;
-}
-
-int gnttab_grant_foreign_access_subpage_ref(grant_ref_t ref, domid_t domid,
- unsigned long frame, int flags,
- unsigned page_off,
- unsigned length)
-{
- if (flags & (GTF_accept_transfer | GTF_reading |
- GTF_writing | GTF_transitive))
- return -EPERM;
-
- if (gnttab_interface->update_subpage_entry == NULL)
- return -ENOSYS;
-
- gnttab_interface->update_subpage_entry(ref, domid, frame, flags,
- page_off, length);
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_subpage_ref);
-
-int gnttab_grant_foreign_access_subpage(domid_t domid, unsigned long frame,
- int flags, unsigned page_off,
- unsigned length)
-{
- int ref, rc;
-
- ref = get_free_entries(1);
- if (unlikely(ref < 0))
- return -ENOSPC;
-
- rc = gnttab_grant_foreign_access_subpage_ref(ref, domid, frame, flags,
- page_off, length);
- if (rc < 0) {
- put_free_entry(ref);
- return rc;
- }
-
- return ref;
-}
-EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_subpage);
-
-bool gnttab_subpage_grants_available(void)
-{
- return gnttab_interface->update_subpage_entry != NULL;
-}
-EXPORT_SYMBOL_GPL(gnttab_subpage_grants_available);
-
-static void gnttab_update_trans_entry_v2(grant_ref_t ref, domid_t domid,
- int flags, domid_t trans_domid,
- grant_ref_t trans_gref)
-{
- gnttab_shared.v2[ref].transitive.trans_domid = trans_domid;
- gnttab_shared.v2[ref].transitive.gref = trans_gref;
- gnttab_shared.v2[ref].hdr.domid = domid;
- wmb();
- gnttab_shared.v2[ref].hdr.flags =
- GTF_permit_access | GTF_transitive | flags;
-}
-
-int gnttab_grant_foreign_access_trans_ref(grant_ref_t ref, domid_t domid,
- int flags, domid_t trans_domid,
- grant_ref_t trans_gref)
-{
- if (flags & (GTF_accept_transfer | GTF_reading |
- GTF_writing | GTF_sub_page))
- return -EPERM;
-
- if (gnttab_interface->update_trans_entry == NULL)
- return -ENOSYS;
-
- gnttab_interface->update_trans_entry(ref, domid, flags, trans_domid,
- trans_gref);
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_trans_ref);
-
-int gnttab_grant_foreign_access_trans(domid_t domid, int flags,
- domid_t trans_domid,
- grant_ref_t trans_gref)
-{
- int ref, rc;
-
- ref = get_free_entries(1);
- if (unlikely(ref < 0))
- return -ENOSPC;
-
- rc = gnttab_grant_foreign_access_trans_ref(ref, domid, flags,
- trans_domid, trans_gref);
- if (rc < 0) {
- put_free_entry(ref);
- return rc;
- }
-
- return ref;
-}
-EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_trans);
-
-bool gnttab_trans_grants_available(void)
-{
- return gnttab_interface->update_trans_entry != NULL;
-}
-EXPORT_SYMBOL_GPL(gnttab_trans_grants_available);
-
static int gnttab_query_foreign_access_v1(grant_ref_t ref)
{
return gnttab_shared.v1[ref].flags & (GTF_reading|GTF_writing);
@@ -1296,8 +1169,6 @@ static const struct gnttab_ops gnttab_v2_ops = {
.end_foreign_access_ref = gnttab_end_foreign_access_ref_v2,
.end_foreign_transfer_ref = gnttab_end_foreign_transfer_ref_v2,
.query_foreign_access = gnttab_query_foreign_access_v2,
- .update_subpage_entry = gnttab_update_subpage_entry_v2,
- .update_trans_entry = gnttab_update_trans_entry_v2,
};
static void gnttab_request_version(void)
@@ -1313,15 +1184,6 @@ static void gnttab_request_version(void)
grefs_per_grant_frame = XEN_PAGE_SIZE /
sizeof(union grant_entry_v2);
gnttab_interface = &gnttab_v2_ops;
- } else if (grant_table_version == 2) {
- /*
- * If we've already used version 2 features,
- * but then suddenly discover that they're not
- * available (e.g. migrating to an older
- * version of Xen), almost unbounded badness
- * can happen.
- */
- panic("we need grant tables version 2, but only version 1 is available");
} else {
grant_table_version = 1;
grefs_per_grant_frame = XEN_PAGE_SIZE /
diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h
index dd6c7a32ee32..2e37741f6b8d 100644
--- a/include/xen/grant_table.h
+++ b/include/xen/grant_table.h
@@ -84,24 +84,6 @@ int gnttab_resume(void);
int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
int readonly);
-int gnttab_grant_foreign_access_subpage(domid_t domid, unsigned long frame,
- int flags, unsigned page_off,
- unsigned length);
-int gnttab_grant_foreign_access_trans(domid_t domid, int flags,
- domid_t trans_domid,
- grant_ref_t trans_gref);
-
-/*
- * Are sub-page grants available on this version of Xen? Returns true if they
- * are, and false if they're not.
- */
-bool gnttab_subpage_grants_available(void);
-
-/*
- * Are transitive grants available on this version of Xen? Returns true if they
- * are, and false if they're not.
- */
-bool gnttab_trans_grants_available(void);
/*
* End access through the given grant reference, iff the grant entry is no
@@ -148,13 +130,6 @@ void gnttab_cancel_free_callback(struct gnttab_free_callback *callback);
void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
unsigned long frame, int readonly);
-int gnttab_grant_foreign_access_subpage_ref(grant_ref_t ref, domid_t domid,
- unsigned long frame, int flags,
- unsigned page_off,
- unsigned length);
-int gnttab_grant_foreign_access_trans_ref(grant_ref_t ref, domid_t domid,
- int flags, domid_t trans_domid,
- grant_ref_t trans_gref);
/* Give access to the first 4K of the page */
static inline void gnttab_page_grant_foreign_access_ref_one(
--
2.12.3
Instead of having multiple variables with constants like
grant_table_version or grefs_per_grant_frame add those to struct
gnttab_ops and access them just via the gnttab_interface pointer.
Signed-off-by: Juergen Gross <[email protected]>
---
drivers/xen/grant-table.c | 73 ++++++++++++++++++++++++++++-------------------
1 file changed, 43 insertions(+), 30 deletions(-)
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 02451a696cc5..573af785c425 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -78,6 +78,14 @@ static union {
/*This is a structure of function pointers for grant table*/
struct gnttab_ops {
/*
+ * Version of the grant interface.
+ */
+ unsigned int version;
+ /*
+ * Grant refs per grant frame.
+ */
+ unsigned int grefs_per_grant_frame;
+ /*
* Mapping a list of frames for storing grant entries. Frames parameter
* is used to store grant table address when grant table being setup,
* nr_gframes is the number of frames to map grant table. Returning
@@ -146,9 +154,6 @@ static const struct gnttab_ops *gnttab_interface;
/* This reflects status of grant entries, so act as a global value. */
static grant_status_t *grstatus;
-static int grant_table_version;
-static int grefs_per_grant_frame;
-
static struct gnttab_free_callback *gnttab_free_callback_list;
static int gnttab_expand(unsigned int req_entries);
@@ -648,19 +653,26 @@ void gnttab_cancel_free_callback(struct gnttab_free_callback *callback)
}
EXPORT_SYMBOL_GPL(gnttab_cancel_free_callback);
+static unsigned int gnttab_frames(unsigned int frames, unsigned int align)
+{
+ return (frames * gnttab_interface->grefs_per_grant_frame + align - 1) /
+ align;
+}
+
static int grow_gnttab_list(unsigned int more_frames)
{
unsigned int new_nr_grant_frames, extra_entries, i;
unsigned int nr_glist_frames, new_nr_glist_frames;
+ unsigned int grefs_per_frame;
- BUG_ON(grefs_per_grant_frame == 0);
+ BUG_ON(gnttab_interface == NULL);
+ grefs_per_frame = gnttab_interface->grefs_per_grant_frame;
new_nr_grant_frames = nr_grant_frames + more_frames;
- extra_entries = more_frames * grefs_per_grant_frame;
+ extra_entries = more_frames * grefs_per_frame;
- nr_glist_frames = (nr_grant_frames * grefs_per_grant_frame + RPP - 1) / RPP;
- new_nr_glist_frames =
- (new_nr_grant_frames * grefs_per_grant_frame + RPP - 1) / RPP;
+ nr_glist_frames = gnttab_frames(nr_grant_frames, RPP);
+ new_nr_glist_frames = gnttab_frames(new_nr_grant_frames, RPP);
for (i = nr_glist_frames; i < new_nr_glist_frames; i++) {
gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_ATOMIC);
if (!gnttab_list[i])
@@ -668,12 +680,12 @@ static int grow_gnttab_list(unsigned int more_frames)
}
- for (i = grefs_per_grant_frame * nr_grant_frames;
- i < grefs_per_grant_frame * new_nr_grant_frames - 1; i++)
+ for (i = grefs_per_frame * nr_grant_frames;
+ i < grefs_per_frame * new_nr_grant_frames - 1; i++)
gnttab_entry(i) = i + 1;
gnttab_entry(i) = gnttab_free_head;
- gnttab_free_head = grefs_per_grant_frame * nr_grant_frames;
+ gnttab_free_head = grefs_per_frame * nr_grant_frames;
gnttab_free_count += extra_entries;
nr_grant_frames = new_nr_grant_frames;
@@ -1025,8 +1037,8 @@ EXPORT_SYMBOL_GPL(gnttab_unmap_refs_sync);
static unsigned int nr_status_frames(unsigned int nr_grant_frames)
{
- BUG_ON(grefs_per_grant_frame == 0);
- return (nr_grant_frames * grefs_per_grant_frame + SPP - 1) / SPP;
+ BUG_ON(gnttab_interface == NULL);
+ return gnttab_frames(nr_grant_frames, SPP);
}
static int gnttab_map_frames_v1(xen_pfn_t *frames, unsigned int nr_gframes)
@@ -1154,6 +1166,9 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
}
static const struct gnttab_ops gnttab_v1_ops = {
+ .version = 1,
+ .grefs_per_grant_frame = XEN_PAGE_SIZE /
+ sizeof(struct grant_entry_v1),
.map_frames = gnttab_map_frames_v1,
.unmap_frames = gnttab_unmap_frames_v1,
.update_entry = gnttab_update_entry_v1,
@@ -1163,6 +1178,9 @@ static const struct gnttab_ops gnttab_v1_ops = {
};
static const struct gnttab_ops gnttab_v2_ops = {
+ .version = 2,
+ .grefs_per_grant_frame = XEN_PAGE_SIZE /
+ sizeof(union grant_entry_v2),
.map_frames = gnttab_map_frames_v2,
.unmap_frames = gnttab_unmap_frames_v2,
.update_entry = gnttab_update_entry_v2,
@@ -1179,18 +1197,12 @@ static void gnttab_request_version(void)
gsv.version = 1;
rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gsv, 1);
- if (rc == 0 && gsv.version == 2) {
- grant_table_version = 2;
- grefs_per_grant_frame = XEN_PAGE_SIZE /
- sizeof(union grant_entry_v2);
+ if (rc == 0 && gsv.version == 2)
gnttab_interface = &gnttab_v2_ops;
- } else {
- grant_table_version = 1;
- grefs_per_grant_frame = XEN_PAGE_SIZE /
- sizeof(struct grant_entry_v1);
+ else
gnttab_interface = &gnttab_v1_ops;
- }
- pr_info("Grant tables using version %d layout\n", grant_table_version);
+ pr_info("Grant tables using version %d layout\n",
+ gnttab_interface->version);
}
static int gnttab_setup(void)
@@ -1230,10 +1242,10 @@ static int gnttab_expand(unsigned int req_entries)
int rc;
unsigned int cur, extra;
- BUG_ON(grefs_per_grant_frame == 0);
+ BUG_ON(gnttab_interface == NULL);
cur = nr_grant_frames;
- extra = ((req_entries + (grefs_per_grant_frame-1)) /
- grefs_per_grant_frame);
+ extra = ((req_entries + gnttab_interface->grefs_per_grant_frame - 1) /
+ gnttab_interface->grefs_per_grant_frame);
if (cur + extra > gnttab_max_grant_frames()) {
pr_warn_ratelimited("xen/grant-table: max_grant_frames reached"
" cur=%u extra=%u limit=%u"
@@ -1265,16 +1277,16 @@ int gnttab_init(void)
/* Determine the maximum number of frames required for the
* grant reference free list on the current hypervisor.
*/
- BUG_ON(grefs_per_grant_frame == 0);
+ BUG_ON(gnttab_interface == NULL);
max_nr_glist_frames = (max_nr_grant_frames *
- grefs_per_grant_frame / RPP);
+ gnttab_interface->grefs_per_grant_frame / RPP);
gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *),
GFP_KERNEL);
if (gnttab_list == NULL)
return -ENOMEM;
- nr_glist_frames = (nr_grant_frames * grefs_per_grant_frame + RPP - 1) / RPP;
+ nr_glist_frames = gnttab_frames(nr_grant_frames, RPP);
for (i = 0; i < nr_glist_frames; i++) {
gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_KERNEL);
if (gnttab_list[i] == NULL) {
@@ -1293,7 +1305,8 @@ int gnttab_init(void)
goto ini_nomem;
}
- nr_init_grefs = nr_grant_frames * grefs_per_grant_frame;
+ nr_init_grefs = nr_grant_frames *
+ gnttab_interface->grefs_per_grant_frame;
for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++)
gnttab_entry(i) = i + 1;
--
2.12.3
> +int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes,
> + unsigned long max_nr_gframes,
> + grant_status_t **__shared)
> +{
> + grant_status_t *shared = *__shared;
> + unsigned long addr;
> + unsigned long i;
> +
> + if (shared == NULL)
> + *__shared = shared = gnttab_status_vm_area.area->addr;
> +
> + addr = (unsigned long)shared;
> +
> + for (i = 0; i < nr_gframes; i++) {
> + set_pte_at(&init_mm, addr, gnttab_status_vm_area.ptes[i],
> + mfn_pte(frames[i], PAGE_KERNEL));
> + addr += PAGE_SIZE;
> + }
> +
> + return 0;
> +}
This looks pretty much identical to arch_gnttab_map_shared() except for
gnttab_shared_vm_area vs. gnttab_status_vm_area,which can be passed in
as a parameter.
> +
> void arch_gnttab_unmap(void *shared, unsigned long nr_gframes)
> {
> + pte_t **ptes;
> unsigned long addr;
> unsigned long i;
>
> + if (shared == gnttab_status_vm_area.area->addr)
> + ptes = gnttab_status_vm_area.ptes;
> + else
> + ptes = gnttab_shared_vm_area.ptes;
> +
> addr = (unsigned long)shared;
>
> for (i = 0; i < nr_gframes; i++) {
> - set_pte_at(&init_mm, addr, gnttab_shared_vm_area.ptes[i],
> - __pte(0));
> + set_pte_at(&init_mm, addr, ptes[i], __pte(0));
> addr += PAGE_SIZE;
> }
And this too looks like can be factored out (to something like
arch_update_gnttab()). But perhaps not in this patch.
> }
> @@ -102,12 +129,35 @@ static int arch_gnttab_valloc(struct gnttab_vm_area *area, unsigned nr_frames)
> return 0;
> }
>
> -int arch_gnttab_init(unsigned long nr_shared)
> +static void arch_gnttab_vfree(struct gnttab_vm_area *area)
> {
> + free_vm_area(area->area);
> + kfree(area->ptes);
> +}
Not sure there is need for this routine. It is used only once.
Also, as an overall comment -- It feels like there may be too many
BUG_ON()s.
-boris
On 12/09/17 17:41, Boris Ostrovsky wrote:
>
>> +int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes,
>> + unsigned long max_nr_gframes,
>> + grant_status_t **__shared)
>> +{
>> + grant_status_t *shared = *__shared;
>> + unsigned long addr;
>> + unsigned long i;
>> +
>> + if (shared == NULL)
>> + *__shared = shared = gnttab_status_vm_area.area->addr;
>> +
>> + addr = (unsigned long)shared;
>> +
>> + for (i = 0; i < nr_gframes; i++) {
>> + set_pte_at(&init_mm, addr, gnttab_status_vm_area.ptes[i],
>> + mfn_pte(frames[i], PAGE_KERNEL));
>> + addr += PAGE_SIZE;
>> + }
>> +
>> + return 0;
>> +}
>
> This looks pretty much identical to arch_gnttab_map_shared() except for
> gnttab_shared_vm_area vs. gnttab_status_vm_area,which can be passed in
> as a parameter.
>
>> +
>> void arch_gnttab_unmap(void *shared, unsigned long nr_gframes)
>> {
>> + pte_t **ptes;
>> unsigned long addr;
>> unsigned long i;
>>
>> + if (shared == gnttab_status_vm_area.area->addr)
>> + ptes = gnttab_status_vm_area.ptes;
>> + else
>> + ptes = gnttab_shared_vm_area.ptes;
>> +
>> addr = (unsigned long)shared;
>>
>> for (i = 0; i < nr_gframes; i++) {
>> - set_pte_at(&init_mm, addr, gnttab_shared_vm_area.ptes[i],
>> - __pte(0));
>> + set_pte_at(&init_mm, addr, ptes[i], __pte(0));
>> addr += PAGE_SIZE;
>> }
>
> And this too looks like can be factored out (to something like
> arch_update_gnttab()). But perhaps not in this patch.
>
>> }
>> @@ -102,12 +129,35 @@ static int arch_gnttab_valloc(struct gnttab_vm_area *area, unsigned nr_frames)
>> return 0;
>> }
>>
>> -int arch_gnttab_init(unsigned long nr_shared)
>> +static void arch_gnttab_vfree(struct gnttab_vm_area *area)
>> {
>> + free_vm_area(area->area);
>> + kfree(area->ptes);
>> +}
>
> Not sure there is need for this routine. It is used only once.
>
> Also, as an overall comment -- It feels like there may be too many
> BUG_ON()s.
This patch is mostly a revert of David's patch removing grant v2 support
with just very few adaptions. I wanted to keep it as similar to the
former grant v2 support as possible.
Juergen
On 09/08/2017 10:48 AM, Juergen Gross wrote:
> As there is currently no user for sub-page grants or transient grants
> remove that functionality. This at once makes it possible to switch
> from grant v2 to grant v1 without restrictions, as there is no loss of
> functionality other than the limited frame number width related to
> the switch.
But isn't that ABI violation? v2 is expected to support this (XSAs
notwithstanding)
-boris
On 12/09/17 17:44, Boris Ostrovsky wrote:
> On 09/08/2017 10:48 AM, Juergen Gross wrote:
>> As there is currently no user for sub-page grants or transient grants
>> remove that functionality. This at once makes it possible to switch
>> from grant v2 to grant v1 without restrictions, as there is no loss of
>> functionality other than the limited frame number width related to
>> the switch.
>
> But isn't that ABI violation? v2 is expected to support this (XSAs
> notwithstanding)
No, I don't think so.
The hypervisor still supports it, but the domU (or dom0) isn't required
to make use of all the features IMHO. Or are you aware of any backend
querying the grant version of a frontend and acting in another way if v2
is detected?
Juergen
On 09/08/2017 10:48 AM, Juergen Gross wrote:
> Instead of having multiple variables with constants like
> grant_table_version or grefs_per_grant_frame add those to struct
> gnttab_ops and access them just via the gnttab_interface pointer.
>
> Signed-off-by: Juergen Gross <[email protected]>
One possible suggestion --- define gnttab_sframes() inline and get rid
of RPP/SPP.
But either way:
Reviewed-by: Boris Ostrovsky <[email protected]>
-boris
On 09/12/2017 11:50 AM, Juergen Gross wrote:
> On 12/09/17 17:44, Boris Ostrovsky wrote:
>> On 09/08/2017 10:48 AM, Juergen Gross wrote:
>>> As there is currently no user for sub-page grants or transient grants
>>> remove that functionality. This at once makes it possible to switch
>>> from grant v2 to grant v1 without restrictions, as there is no loss of
>>> functionality other than the limited frame number width related to
>>> the switch.
>> But isn't that ABI violation? v2 is expected to support this (XSAs
>> notwithstanding)
> No, I don't think so.
>
> The hypervisor still supports it, but the domU (or dom0) isn't required
> to make use of all the features IMHO. Or are you aware of any backend
> querying the grant version of a frontend and acting in another way if v2
> is detected?
I am not aware of any but that doesn't mean that they don't (or won't)
exist.
-boris
On 12/09/17 18:05, Boris Ostrovsky wrote:
> On 09/12/2017 11:50 AM, Juergen Gross wrote:
>> On 12/09/17 17:44, Boris Ostrovsky wrote:
>>> On 09/08/2017 10:48 AM, Juergen Gross wrote:
>>>> As there is currently no user for sub-page grants or transient grants
>>>> remove that functionality. This at once makes it possible to switch
>>>> from grant v2 to grant v1 without restrictions, as there is no loss of
>>>> functionality other than the limited frame number width related to
>>>> the switch.
>>> But isn't that ABI violation? v2 is expected to support this (XSAs
>>> notwithstanding)
>> No, I don't think so.
>>
>> The hypervisor still supports it, but the domU (or dom0) isn't required
>> to make use of all the features IMHO. Or are you aware of any backend
>> querying the grant version of a frontend and acting in another way if v2
>> is detected?
>
> I am not aware of any but that doesn't mean that they don't (or won't)
> exist.
But isn't the frontend the one which is defining what is granted in
which way? How should there be an ABI breakage when the frontend just
isn't using sub-page or transitive grants?
Juergen
On 09/08/2017 10:48 AM, Juergen Gross wrote:
> Based on the maximum page number of the host select either grant v1 or
> grant v2.
>
> For testing purposes add a way to specify the grant interface version
> via a boot parameter.
>
> Signed-off-by: Juergen Gross <[email protected]>
Reviewed-by: Boris Ostrovsky <[email protected]>
On 09/12/2017 12:09 PM, Juergen Gross wrote:
> On 12/09/17 18:05, Boris Ostrovsky wrote:
>> On 09/12/2017 11:50 AM, Juergen Gross wrote:
>>> On 12/09/17 17:44, Boris Ostrovsky wrote:
>>>> On 09/08/2017 10:48 AM, Juergen Gross wrote:
>>>>> As there is currently no user for sub-page grants or transient grants
>>>>> remove that functionality. This at once makes it possible to switch
>>>>> from grant v2 to grant v1 without restrictions, as there is no loss of
>>>>> functionality other than the limited frame number width related to
>>>>> the switch.
>>>> But isn't that ABI violation? v2 is expected to support this (XSAs
>>>> notwithstanding)
>>> No, I don't think so.
>>>
>>> The hypervisor still supports it, but the domU (or dom0) isn't required
>>> to make use of all the features IMHO. Or are you aware of any backend
>>> querying the grant version of a frontend and acting in another way if v2
>>> is detected?
>> I am not aware of any but that doesn't mean that they don't (or won't)
>> exist.
> But isn't the frontend the one which is defining what is granted in
> which way? How should there be an ABI breakage when the frontend just
> isn't using sub-page or transitive grants?
People may provide both front and backend drivers and frontends, knowing
that v2 is available, could decide to use those features.
-boris
On 12/09/17 18:21, Boris Ostrovsky wrote:
> On 09/12/2017 12:09 PM, Juergen Gross wrote:
>> On 12/09/17 18:05, Boris Ostrovsky wrote:
>>> On 09/12/2017 11:50 AM, Juergen Gross wrote:
>>>> On 12/09/17 17:44, Boris Ostrovsky wrote:
>>>>> On 09/08/2017 10:48 AM, Juergen Gross wrote:
>>>>>> As there is currently no user for sub-page grants or transient grants
>>>>>> remove that functionality. This at once makes it possible to switch
>>>>>> from grant v2 to grant v1 without restrictions, as there is no loss of
>>>>>> functionality other than the limited frame number width related to
>>>>>> the switch.
>>>>> But isn't that ABI violation? v2 is expected to support this (XSAs
>>>>> notwithstanding)
>>>> No, I don't think so.
>>>>
>>>> The hypervisor still supports it, but the domU (or dom0) isn't required
>>>> to make use of all the features IMHO. Or are you aware of any backend
>>>> querying the grant version of a frontend and acting in another way if v2
>>>> is detected?
>>> I am not aware of any but that doesn't mean that they don't (or won't)
>>> exist.
>> But isn't the frontend the one which is defining what is granted in
>> which way? How should there be an ABI breakage when the frontend just
>> isn't using sub-page or transitive grants?
>
> People may provide both front and backend drivers and frontends, knowing
> that v2 is available, could decide to use those features.
No, without the functions to use them it will be impossible. So it won't
hit them on a random system by not working, but they would not be able
to load such a driver (same as today without V2 support).
In case they really want it they can send patches for support of subpage
or transient grants. Like they would have to do for complete V2 support
today.
Juergen
On 08/09/17 15:48, Juergen Gross wrote:
> static void gnttab_request_version(void)
> {
> - int rc;
> + long rc;
> struct gnttab_set_version gsv;
>
> - gsv.version = 1;
> + rc = HYPERVISOR_memory_op(XENMEM_maximum_ram_page, NULL);
This hypercall is information leak and layering violation. Please can
we avoid adding more dependence on its presence? (I'm got a
proto-series which strips various corners off the hypervisor for attack
surface reduction purposes, and this hypercall is one victim which is
restricted to privileged domains only.)
For translated guests, it is definitely not the right number to check.
What matters is the maximum frame inside the translated guest, not on
the host.
For PV guests, I'm not sure what to suggest, but the result of
XENMEM_maximum_ram_page isn't applicable. Xen's max_page can increase
at runtime through memory hotplug, after which ballooning operations can
leave Linux with a frame it wishes to grant which exceeds the limit
calculated here.
The more I look into this, the more of a mess it turns out to be.
~Andrew
> + if (rc < 0 || !(rc >> 32))
> + gsv.version = 1;
> + else
> + gsv.version = 2;
> + if (xen_gnttab_version >= 1 && xen_gnttab_version <= 2)
> + gsv.version = xen_gnttab_version;
>
> rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gsv, 1);
> if (rc == 0 && gsv.version == 2)
On 12/09/17 20:54, Andrew Cooper wrote:
> On 08/09/17 15:48, Juergen Gross wrote:
>> static void gnttab_request_version(void)
>> {
>> - int rc;
>> + long rc;
>> struct gnttab_set_version gsv;
>>
>> - gsv.version = 1;
>> + rc = HYPERVISOR_memory_op(XENMEM_maximum_ram_page, NULL);
>
> This hypercall is information leak and layering violation. Please can
> we avoid adding more dependence on its presence? (I'm got a
> proto-series which strips various corners off the hypervisor for attack
> surface reduction purposes, and this hypercall is one victim which is
> restricted to privileged domains only.)
>
> For translated guests, it is definitely not the right number to check.
> What matters is the maximum frame inside the translated guest, not on
> the host.
Oh, right.
> For PV guests, I'm not sure what to suggest, but the result of
> XENMEM_maximum_ram_page isn't applicable. Xen's max_page can increase
> at runtime through memory hotplug, after which ballooning operations can
> leave Linux with a frame it wishes to grant which exceeds the limit
> calculated here.
We need a way to decide whether V2 is to be selected.
Is there a way to determine which is the highest physical address being
available for memory hotplug on a system? Something in ACPI tables
perhaps?
Juergen
On 09/12/2017 02:18 PM, Juergen Gross wrote:
> On 12/09/17 18:21, Boris Ostrovsky wrote:
>> On 09/12/2017 12:09 PM, Juergen Gross wrote:
>>> On 12/09/17 18:05, Boris Ostrovsky wrote:
>>>> On 09/12/2017 11:50 AM, Juergen Gross wrote:
>>>>> On 12/09/17 17:44, Boris Ostrovsky wrote:
>>>>>> On 09/08/2017 10:48 AM, Juergen Gross wrote:
>>>>>>> As there is currently no user for sub-page grants or transient grants
>>>>>>> remove that functionality. This at once makes it possible to switch
>>>>>>> from grant v2 to grant v1 without restrictions, as there is no loss of
>>>>>>> functionality other than the limited frame number width related to
>>>>>>> the switch.
>>>>>> But isn't that ABI violation? v2 is expected to support this (XSAs
>>>>>> notwithstanding)
>>>>> No, I don't think so.
>>>>>
>>>>> The hypervisor still supports it, but the domU (or dom0) isn't required
>>>>> to make use of all the features IMHO. Or are you aware of any backend
>>>>> querying the grant version of a frontend and acting in another way if v2
>>>>> is detected?
>>>> I am not aware of any but that doesn't mean that they don't (or won't)
>>>> exist.
>>> But isn't the frontend the one which is defining what is granted in
>>> which way? How should there be an ABI breakage when the frontend just
>>> isn't using sub-page or transitive grants?
>> People may provide both front and backend drivers and frontends, knowing
>> that v2 is available, could decide to use those features.
> No, without the functions to use them it will be impossible.
I don't follow this. Which functions? The ones this patch is removing?
-boris
> So it won't
> hit them on a random system by not working, but they would not be able
> to load such a driver (same as today without V2 support).
>
> In case they really want it they can send patches for support of subpage
> or transient grants. Like they would have to do for complete V2 support
> today.
>
>
> Juergen
>
On 13/09/17 15:22, Boris Ostrovsky wrote:
> On 09/12/2017 02:18 PM, Juergen Gross wrote:
>> On 12/09/17 18:21, Boris Ostrovsky wrote:
>>> On 09/12/2017 12:09 PM, Juergen Gross wrote:
>>>> On 12/09/17 18:05, Boris Ostrovsky wrote:
>>>>> On 09/12/2017 11:50 AM, Juergen Gross wrote:
>>>>>> On 12/09/17 17:44, Boris Ostrovsky wrote:
>>>>>>> On 09/08/2017 10:48 AM, Juergen Gross wrote:
>>>>>>>> As there is currently no user for sub-page grants or transient grants
>>>>>>>> remove that functionality. This at once makes it possible to switch
>>>>>>>> from grant v2 to grant v1 without restrictions, as there is no loss of
>>>>>>>> functionality other than the limited frame number width related to
>>>>>>>> the switch.
>>>>>>> But isn't that ABI violation? v2 is expected to support this (XSAs
>>>>>>> notwithstanding)
>>>>>> No, I don't think so.
>>>>>>
>>>>>> The hypervisor still supports it, but the domU (or dom0) isn't required
>>>>>> to make use of all the features IMHO. Or are you aware of any backend
>>>>>> querying the grant version of a frontend and acting in another way if v2
>>>>>> is detected?
>>>>> I am not aware of any but that doesn't mean that they don't (or won't)
>>>>> exist.
>>>> But isn't the frontend the one which is defining what is granted in
>>>> which way? How should there be an ABI breakage when the frontend just
>>>> isn't using sub-page or transitive grants?
>>> People may provide both front and backend drivers and frontends, knowing
>>> that v2 is available, could decide to use those features.
>> No, without the functions to use them it will be impossible.
>
> I don't follow this. Which functions? The ones this patch is removing?
Yes, just after having been added one patch earlier.
Right now the Linux kernel doesn't support grant V2 at all. So there
surely is no driver making use of V2 features right now.
Ican merge patches 1 and 2 in case you want. I just thought a pure
revert of the former V2 remove patch would be easier to review,
taking into account that the former V2 support was working in
production environments (and even back then there was no user of
sub-page or transient grants).
Juergen
On 09/13/2017 09:38 AM, Juergen Gross wrote:
> On 13/09/17 15:22, Boris Ostrovsky wrote:
>> On 09/12/2017 02:18 PM, Juergen Gross wrote:
>>> On 12/09/17 18:21, Boris Ostrovsky wrote:
>>>> On 09/12/2017 12:09 PM, Juergen Gross wrote:
>>>>> On 12/09/17 18:05, Boris Ostrovsky wrote:
>>>>>> On 09/12/2017 11:50 AM, Juergen Gross wrote:
>>>>>>> On 12/09/17 17:44, Boris Ostrovsky wrote:
>>>>>>>> On 09/08/2017 10:48 AM, Juergen Gross wrote:
>>>>>>>>> As there is currently no user for sub-page grants or transient grants
>>>>>>>>> remove that functionality. This at once makes it possible to switch
>>>>>>>>> from grant v2 to grant v1 without restrictions, as there is no loss of
>>>>>>>>> functionality other than the limited frame number width related to
>>>>>>>>> the switch.
>>>>>>>> But isn't that ABI violation? v2 is expected to support this (XSAs
>>>>>>>> notwithstanding)
>>>>>>> No, I don't think so.
>>>>>>>
>>>>>>> The hypervisor still supports it, but the domU (or dom0) isn't required
>>>>>>> to make use of all the features IMHO. Or are you aware of any backend
>>>>>>> querying the grant version of a frontend and acting in another way if v2
>>>>>>> is detected?
>>>>>> I am not aware of any but that doesn't mean that they don't (or won't)
>>>>>> exist.
>>>>> But isn't the frontend the one which is defining what is granted in
>>>>> which way? How should there be an ABI breakage when the frontend just
>>>>> isn't using sub-page or transitive grants?
>>>> People may provide both front and backend drivers and frontends, knowing
>>>> that v2 is available, could decide to use those features.
>>> No, without the functions to use them it will be impossible.
>> I don't follow this. Which functions? The ones this patch is removing?
> Yes, just after having been added one patch earlier.
>
> Right now the Linux kernel doesn't support grant V2 at all. So there
> surely is no driver making use of V2 features right now.
>
> Ican merge patches 1 and 2 in case you want. I just thought a pure
> revert of the former V2 remove patch would be easier to review,
> taking into account that the former V2 support was working in
> production environments (and even back then there was no user of
> sub-page or transient grants).
No, I don't have problems with *how* you are doing this (revert fully
first and then remove).
I am just not sure that removing these functions is the way to go
because we are ending up with partial implementation of v2. The fact
that noone is/has been using these features is IMO not particularly
relevant.
If these two were optional features then it would have been reasonable
to drop them.
-boris
On 13/09/17 15:50, Boris Ostrovsky wrote:
> On 09/13/2017 09:38 AM, Juergen Gross wrote:
>> On 13/09/17 15:22, Boris Ostrovsky wrote:
>>> On 09/12/2017 02:18 PM, Juergen Gross wrote:
>>>> On 12/09/17 18:21, Boris Ostrovsky wrote:
>>>>> On 09/12/2017 12:09 PM, Juergen Gross wrote:
>>>>>> On 12/09/17 18:05, Boris Ostrovsky wrote:
>>>>>>> On 09/12/2017 11:50 AM, Juergen Gross wrote:
>>>>>>>> On 12/09/17 17:44, Boris Ostrovsky wrote:
>>>>>>>>> On 09/08/2017 10:48 AM, Juergen Gross wrote:
>>>>>>>>>> As there is currently no user for sub-page grants or transient grants
>>>>>>>>>> remove that functionality. This at once makes it possible to switch
>>>>>>>>>> from grant v2 to grant v1 without restrictions, as there is no loss of
>>>>>>>>>> functionality other than the limited frame number width related to
>>>>>>>>>> the switch.
>>>>>>>>> But isn't that ABI violation? v2 is expected to support this (XSAs
>>>>>>>>> notwithstanding)
>>>>>>>> No, I don't think so.
>>>>>>>>
>>>>>>>> The hypervisor still supports it, but the domU (or dom0) isn't required
>>>>>>>> to make use of all the features IMHO. Or are you aware of any backend
>>>>>>>> querying the grant version of a frontend and acting in another way if v2
>>>>>>>> is detected?
>>>>>>> I am not aware of any but that doesn't mean that they don't (or won't)
>>>>>>> exist.
>>>>>> But isn't the frontend the one which is defining what is granted in
>>>>>> which way? How should there be an ABI breakage when the frontend just
>>>>>> isn't using sub-page or transitive grants?
>>>>> People may provide both front and backend drivers and frontends, knowing
>>>>> that v2 is available, could decide to use those features.
>>>> No, without the functions to use them it will be impossible.
>>> I don't follow this. Which functions? The ones this patch is removing?
>> Yes, just after having been added one patch earlier.
>>
>> Right now the Linux kernel doesn't support grant V2 at all. So there
>> surely is no driver making use of V2 features right now.
>>
>> Ican merge patches 1 and 2 in case you want. I just thought a pure
>> revert of the former V2 remove patch would be easier to review,
>> taking into account that the former V2 support was working in
>> production environments (and even back then there was no user of
>> sub-page or transient grants).
>
> No, I don't have problems with *how* you are doing this (revert fully
> first and then remove).
>
> I am just not sure that removing these functions is the way to go
> because we are ending up with partial implementation of v2. The fact
> that noone is/has been using these features is IMO not particularly
> relevant.
>
> If these two were optional features then it would have been reasonable
> to drop them.
Why does the kernel need to support all features of an interface?
I'm quite sure there are lots of interfaces supported only partially in
the kernel.
Having support for functionality in the kernel not being used at all is
just adding dead code with a high potential to bitrot. I can't even test
this functionality right now, as there are no users of it. So I'd risk
adding something which is broken from the beginning. So currently my V2
support is regarding the exported kernel interfaces the same as V1, but
without the limitation to 32 bit frame numbers.
And looking at
https://lists.xen.org/archives/html/xen-devel/2017-08/msg03194.html
I believe my partial V2 support isn't the worst idea.
Juergen
On 09/13/2017 10:45 AM, Juergen Gross wrote:
> On 13/09/17 15:50, Boris Ostrovsky wrote:
>> On 09/13/2017 09:38 AM, Juergen Gross wrote:
>>> On 13/09/17 15:22, Boris Ostrovsky wrote:
>>>> On 09/12/2017 02:18 PM, Juergen Gross wrote:
>>>>> On 12/09/17 18:21, Boris Ostrovsky wrote:
>>>>>> On 09/12/2017 12:09 PM, Juergen Gross wrote:
>>>>>>> On 12/09/17 18:05, Boris Ostrovsky wrote:
>>>>>>>> On 09/12/2017 11:50 AM, Juergen Gross wrote:
>>>>>>>>> On 12/09/17 17:44, Boris Ostrovsky wrote:
>>>>>>>>>> On 09/08/2017 10:48 AM, Juergen Gross wrote:
>>>>>>>>>>> As there is currently no user for sub-page grants or transient grants
>>>>>>>>>>> remove that functionality. This at once makes it possible to switch
>>>>>>>>>>> from grant v2 to grant v1 without restrictions, as there is no loss of
>>>>>>>>>>> functionality other than the limited frame number width related to
>>>>>>>>>>> the switch.
>>>>>>>>>> But isn't that ABI violation? v2 is expected to support this (XSAs
>>>>>>>>>> notwithstanding)
>>>>>>>>> No, I don't think so.
>>>>>>>>>
>>>>>>>>> The hypervisor still supports it, but the domU (or dom0) isn't required
>>>>>>>>> to make use of all the features IMHO. Or are you aware of any backend
>>>>>>>>> querying the grant version of a frontend and acting in another way if v2
>>>>>>>>> is detected?
>>>>>>>> I am not aware of any but that doesn't mean that they don't (or won't)
>>>>>>>> exist.
>>>>>>> But isn't the frontend the one which is defining what is granted in
>>>>>>> which way? How should there be an ABI breakage when the frontend just
>>>>>>> isn't using sub-page or transitive grants?
>>>>>> People may provide both front and backend drivers and frontends, knowing
>>>>>> that v2 is available, could decide to use those features.
>>>>> No, without the functions to use them it will be impossible.
>>>> I don't follow this. Which functions? The ones this patch is removing?
>>> Yes, just after having been added one patch earlier.
>>>
>>> Right now the Linux kernel doesn't support grant V2 at all. So there
>>> surely is no driver making use of V2 features right now.
>>>
>>> Ican merge patches 1 and 2 in case you want. I just thought a pure
>>> revert of the former V2 remove patch would be easier to review,
>>> taking into account that the former V2 support was working in
>>> production environments (and even back then there was no user of
>>> sub-page or transient grants).
>> No, I don't have problems with *how* you are doing this (revert fully
>> first and then remove).
>>
>> I am just not sure that removing these functions is the way to go
>> because we are ending up with partial implementation of v2. The fact
>> that noone is/has been using these features is IMO not particularly
>> relevant.
>>
>> If these two were optional features then it would have been reasonable
>> to drop them.
> Why does the kernel need to support all features of an interface?
>
> I'm quite sure there are lots of interfaces supported only partially in
> the kernel.
I don't think partially supported interface is a supported interface.
It's just something that has been working until now.
> Having support for functionality in the kernel not being used at all is
> just adding dead code with a high potential to bitrot. I can't even test
> this functionality right now, as there are no users of it. So I'd risk
> adding something which is broken from the beginning.
OK. That I haven't considered that.
BTW, why are you not removing (*update_trans_entry) definition from
gnttab_ops? You are taking (*update_subpage_entry) out.
> So currently my V2
> support is regarding the exported kernel interfaces the same as V1, but
> without the limitation to 32 bit frame numbers.
>
> And looking at
>
> https://lists.xen.org/archives/html/xen-devel/2017-08/msg03194.html
>
> I believe my partial V2 support isn't the worst idea.
I never said that ;-)
-boris
On 13/09/17 18:20, Boris Ostrovsky wrote:
> On 09/13/2017 10:45 AM, Juergen Gross wrote:
>> On 13/09/17 15:50, Boris Ostrovsky wrote:
>>> On 09/13/2017 09:38 AM, Juergen Gross wrote:
>>>> On 13/09/17 15:22, Boris Ostrovsky wrote:
>>>>> On 09/12/2017 02:18 PM, Juergen Gross wrote:
>>>>>> On 12/09/17 18:21, Boris Ostrovsky wrote:
>>>>>>> On 09/12/2017 12:09 PM, Juergen Gross wrote:
>>>>>>>> On 12/09/17 18:05, Boris Ostrovsky wrote:
>>>>>>>>> On 09/12/2017 11:50 AM, Juergen Gross wrote:
>>>>>>>>>> On 12/09/17 17:44, Boris Ostrovsky wrote:
>>>>>>>>>>> On 09/08/2017 10:48 AM, Juergen Gross wrote:
>>>>>>>>>>>> As there is currently no user for sub-page grants or transient grants
>>>>>>>>>>>> remove that functionality. This at once makes it possible to switch
>>>>>>>>>>>> from grant v2 to grant v1 without restrictions, as there is no loss of
>>>>>>>>>>>> functionality other than the limited frame number width related to
>>>>>>>>>>>> the switch.
>>>>>>>>>>> But isn't that ABI violation? v2 is expected to support this (XSAs
>>>>>>>>>>> notwithstanding)
>>>>>>>>>> No, I don't think so.
>>>>>>>>>>
>>>>>>>>>> The hypervisor still supports it, but the domU (or dom0) isn't required
>>>>>>>>>> to make use of all the features IMHO. Or are you aware of any backend
>>>>>>>>>> querying the grant version of a frontend and acting in another way if v2
>>>>>>>>>> is detected?
>>>>>>>>> I am not aware of any but that doesn't mean that they don't (or won't)
>>>>>>>>> exist.
>>>>>>>> But isn't the frontend the one which is defining what is granted in
>>>>>>>> which way? How should there be an ABI breakage when the frontend just
>>>>>>>> isn't using sub-page or transitive grants?
>>>>>>> People may provide both front and backend drivers and frontends, knowing
>>>>>>> that v2 is available, could decide to use those features.
>>>>>> No, without the functions to use them it will be impossible.
>>>>> I don't follow this. Which functions? The ones this patch is removing?
>>>> Yes, just after having been added one patch earlier.
>>>>
>>>> Right now the Linux kernel doesn't support grant V2 at all. So there
>>>> surely is no driver making use of V2 features right now.
>>>>
>>>> Ican merge patches 1 and 2 in case you want. I just thought a pure
>>>> revert of the former V2 remove patch would be easier to review,
>>>> taking into account that the former V2 support was working in
>>>> production environments (and even back then there was no user of
>>>> sub-page or transient grants).
>>> No, I don't have problems with *how* you are doing this (revert fully
>>> first and then remove).
>>>
>>> I am just not sure that removing these functions is the way to go
>>> because we are ending up with partial implementation of v2. The fact
>>> that noone is/has been using these features is IMO not particularly
>>> relevant.
>>>
>>> If these two were optional features then it would have been reasonable
>>> to drop them.
>> Why does the kernel need to support all features of an interface?
>>
>> I'm quite sure there are lots of interfaces supported only partially in
>> the kernel.
>
> I don't think partially supported interface is a supported interface.
> It's just something that has been working until now.
>
>> Having support for functionality in the kernel not being used at all is
>> just adding dead code with a high potential to bitrot. I can't even test
>> this functionality right now, as there are no users of it. So I'd risk
>> adding something which is broken from the beginning.
>
> OK. That I haven't considered that.
>
> BTW, why are you not removing (*update_trans_entry) definition from
> gnttab_ops? You are taking (*update_subpage_entry) out.
Just for having a reason to send V2. ;-)
Thanks for catching it.
Juergen
On 13/09/17 11:23, Juergen Gross wrote:
> On 12/09/17 20:54, Andrew Cooper wrote:
>> On 08/09/17 15:48, Juergen Gross wrote:
>>> static void gnttab_request_version(void)
>>> {
>>> - int rc;
>>> + long rc;
>>> struct gnttab_set_version gsv;
>>>
>>> - gsv.version = 1;
>>> + rc = HYPERVISOR_memory_op(XENMEM_maximum_ram_page, NULL);
>>
>> This hypercall is information leak and layering violation. Please can
>> we avoid adding more dependence on its presence? (I'm got a
>> proto-series which strips various corners off the hypervisor for attack
>> surface reduction purposes, and this hypercall is one victim which is
>> restricted to privileged domains only.)
>>
>> For translated guests, it is definitely not the right number to check.
>> What matters is the maximum frame inside the translated guest, not on
>> the host.
>
> Oh, right.
>
>> For PV guests, I'm not sure what to suggest, but the result of
>> XENMEM_maximum_ram_page isn't applicable. Xen's max_page can increase
>> at runtime through memory hotplug, after which ballooning operations can
>> leave Linux with a frame it wishes to grant which exceeds the limit
>> calculated here.
>
> We need a way to decide whether V2 is to be selected.
>
> Is there a way to determine which is the highest physical address being
> available for memory hotplug on a system? Something in ACPI tables
> perhaps?
So I've found the data I've searched in the hypervisor. The maximum
frame number to expect can be calculated from max_page, mem_hotplug
and the maximum physical address from cpuid node 0x80000008. If
CONFIG_BIGMEM isn't defined in Xen it is 16TB max.
The question is how to present this value to a guest. IMHO something
like the maximum address width similar to cpuid node 0x80000008
would be fine. It could be above width for pv guests and the max.
memory address of the guest for HVM guests (adding a cap for those
wouldn't be the worst idea, I guess).
What about a new subop of the xen_version hypercall?
Juergen
>>> On 15.09.17 at 15:00, <[email protected]> wrote:
> So I've found the data I've searched in the hypervisor. The maximum
> frame number to expect can be calculated from max_page, mem_hotplug
> and the maximum physical address from cpuid node 0x80000008. If
> CONFIG_BIGMEM isn't defined in Xen it is 16TB max.
>
> The question is how to present this value to a guest. IMHO something
> like the maximum address width similar to cpuid node 0x80000008
> would be fine. It could be above width for pv guests and the max.
> memory address of the guest for HVM guests (adding a cap for those
> wouldn't be the worst idea, I guess).
>
> What about a new subop of the xen_version hypercall?
I don't see how that would be a good fit; instead, with the CPUID
similarity you mention, why not provide the information in one of
Xen's CPUID leaves? Otoh I wonder whether returning max_page
from XENMEM_maximum_ram_page is really a good idea, if later
on that value may increase, so perhaps that op should take
mem_hotplug into account.
Jan