When launching a virtual machine, Gunyah userspace allocates memory for
the guest and informs Gunyah about these memory regions through
SET_USER_MEMORY_REGION ioctl.
Co-developed-by: Prakruthi Deepak Heragu <[email protected]>
Signed-off-by: Prakruthi Deepak Heragu <[email protected]>
Signed-off-by: Elliot Berman <[email protected]>
---
drivers/virt/gunyah/Makefile | 2 +-
drivers/virt/gunyah/vm_mgr.c | 46 +++++++
drivers/virt/gunyah/vm_mgr.h | 28 +++-
drivers/virt/gunyah/vm_mgr_mm.c | 223 ++++++++++++++++++++++++++++++++
include/uapi/linux/gunyah.h | 22 ++++
5 files changed, 319 insertions(+), 2 deletions(-)
create mode 100644 drivers/virt/gunyah/vm_mgr_mm.c
diff --git a/drivers/virt/gunyah/Makefile b/drivers/virt/gunyah/Makefile
index 03951cf82023..ff8bc4925392 100644
--- a/drivers/virt/gunyah/Makefile
+++ b/drivers/virt/gunyah/Makefile
@@ -2,5 +2,5 @@
obj-$(CONFIG_GUNYAH) += gunyah.o
-gunyah_rsc_mgr-y += rsc_mgr.o rsc_mgr_rpc.o vm_mgr.o
+gunyah_rsc_mgr-y += rsc_mgr.o rsc_mgr_rpc.o vm_mgr.o vm_mgr_mm.o
obj-$(CONFIG_GUNYAH) += gunyah_rsc_mgr.o
diff --git a/drivers/virt/gunyah/vm_mgr.c b/drivers/virt/gunyah/vm_mgr.c
index 0864dbd77e28..b847fde63333 100644
--- a/drivers/virt/gunyah/vm_mgr.c
+++ b/drivers/virt/gunyah/vm_mgr.c
@@ -35,14 +35,55 @@ static __must_check struct gunyah_vm *gunyah_vm_alloc(struct gh_rm_rpc *rm)
ghvm->vmid = vmid;
ghvm->rm = rm;
+ mutex_init(&ghvm->mm_lock);
+ INIT_LIST_HEAD(&ghvm->memory_mappings);
+
return ghvm;
}
static long gh_vm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
+ struct gunyah_vm *ghvm = filp->private_data;
+ void __user *argp = (void __user *)arg;
long r;
switch (cmd) {
+ case GH_VM_SET_USER_MEM_REGION: {
+ struct gunyah_vm_memory_mapping *mapping;
+ struct gh_userspace_memory_region region;
+
+ r = -EFAULT;
+ if (copy_from_user(®ion, argp, sizeof(region)))
+ break;
+
+ r = -EINVAL;
+ /* All other flag bits are reserved for future use */
+ if (region.flags & ~(GH_MEM_ALLOW_READ | GH_MEM_ALLOW_WRITE | GH_MEM_ALLOW_EXEC |
+ GH_MEM_LENT))
+ break;
+
+
+ if (region.memory_size) {
+ r = 0;
+ mapping = gh_vm_mem_mapping_alloc(ghvm, ®ion);
+ if (IS_ERR(mapping)) {
+ r = PTR_ERR(mapping);
+ break;
+ }
+ } else {
+ mapping = gh_vm_mem_mapping_find(ghvm, region.label);
+ if (IS_ERR(mapping)) {
+ r = PTR_ERR(mapping);
+ break;
+ }
+ r = 0;
+ if (!mapping)
+ break;
+ gh_vm_mem_mapping_reclaim(ghvm, mapping);
+ kfree(mapping);
+ }
+ break;
+ }
default:
r = -ENOTTY;
break;
@@ -54,7 +95,12 @@ static long gh_vm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
static int gh_vm_release(struct inode *inode, struct file *filp)
{
struct gunyah_vm *ghvm = filp->private_data;
+ struct gunyah_vm_memory_mapping *mapping, *tmp;
+ list_for_each_entry_safe(mapping, tmp, &ghvm->memory_mappings, list) {
+ gh_vm_mem_mapping_reclaim(ghvm, mapping);
+ kfree(mapping);
+ }
put_gh_rm(ghvm->rm);
kfree(ghvm);
return 0;
diff --git a/drivers/virt/gunyah/vm_mgr.h b/drivers/virt/gunyah/vm_mgr.h
index e47f34de7f9e..6b38bf780f76 100644
--- a/drivers/virt/gunyah/vm_mgr.h
+++ b/drivers/virt/gunyah/vm_mgr.h
@@ -7,14 +7,40 @@
#define _GH_PRIV_VM_MGR_H
#include <linux/gunyah_rsc_mgr.h>
+#include <linux/list.h>
+#include <linux/miscdevice.h>
+#include <linux/mutex.h>
#include <uapi/linux/gunyah.h>
long gh_dev_vm_mgr_ioctl(struct gh_rm *rm, unsigned int cmd, unsigned long arg);
+enum gunyah_vm_mem_share_type {
+ VM_MEM_SHARE,
+ VM_MEM_LEND,
+};
+
+struct gunyah_vm_memory_mapping {
+ struct list_head list;
+ enum gunyah_vm_mem_share_type share_type;
+ struct gh_rm_mem_parcel parcel;
+
+ __u64 guest_phys_addr;
+ struct page **pages;
+ unsigned long npages;
+};
+
struct gunyah_vm {
u16 vmid;
- struct gh_rm_rpc *rm;
+ struct gh_rm *rm;
+
+ struct mutex mm_lock;
+ struct list_head memory_mappings;
};
+struct gunyah_vm_memory_mapping *gh_vm_mem_mapping_alloc(struct gunyah_vm *ghvm,
+ struct gh_userspace_memory_region *region);
+void gh_vm_mem_mapping_reclaim(struct gunyah_vm *ghvm, struct gunyah_vm_memory_mapping *mapping);
+struct gunyah_vm_memory_mapping *gh_vm_mem_mapping_find(struct gunyah_vm *ghvm, u32 label);
+
#endif
diff --git a/drivers/virt/gunyah/vm_mgr_mm.c b/drivers/virt/gunyah/vm_mgr_mm.c
new file mode 100644
index 000000000000..f2dbdb4ee8ab
--- /dev/null
+++ b/drivers/virt/gunyah/vm_mgr_mm.c
@@ -0,0 +1,223 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#define pr_fmt(fmt) "gh_vm_mgr: " fmt
+
+#include <linux/gunyah_rsc_mgr.h>
+#include <linux/mm.h>
+
+#include <uapi/linux/gunyah.h>
+
+#include "vm_mgr.h"
+
+static inline bool page_contiguous(phys_addr_t p, phys_addr_t t)
+{
+ return t - p == PAGE_SIZE;
+}
+
+static struct gunyah_vm_memory_mapping *__gh_vm_mem_mapping_find(struct gunyah_vm *ghvm, u32 label)
+{
+ struct gunyah_vm_memory_mapping *mapping;
+
+
+ list_for_each_entry(mapping, &ghvm->memory_mappings, list)
+ if (mapping->parcel.label == label)
+ return mapping;
+
+ return NULL;
+}
+
+void gh_vm_mem_mapping_reclaim(struct gunyah_vm *ghvm, struct gunyah_vm_memory_mapping *mapping)
+{
+ int i, ret = 0;
+
+ if (mapping->parcel.mem_handle != GH_MEM_HANDLE_INVAL) {
+ ret = gh_rm_mem_reclaim(ghvm->rm, &mapping->parcel);
+ if (ret)
+ pr_warn("Failed to reclaim memory parcel for label %d: %d\n",
+ mapping->parcel.label, ret);
+ }
+
+ if (!ret)
+ for (i = 0; i < mapping->npages; i++)
+ unpin_user_page(mapping->pages[i]);
+
+ kfree(mapping->pages);
+ kfree(mapping->parcel.acl_entries);
+ kfree(mapping->parcel.mem_entries);
+
+ mutex_lock(&ghvm->mm_lock);
+ list_del(&mapping->list);
+ mutex_unlock(&ghvm->mm_lock);
+}
+
+struct gunyah_vm_memory_mapping *gh_vm_mem_mapping_find(struct gunyah_vm *ghvm, u32 label)
+{
+ struct gunyah_vm_memory_mapping *mapping;
+ int ret;
+
+ ret = mutex_lock_interruptible(&ghvm->mm_lock);
+ if (ret)
+ return ERR_PTR(ret);
+ mapping = __gh_vm_mem_mapping_find(ghvm, label);
+ mutex_unlock(&ghvm->mm_lock);
+ return mapping ? : ERR_PTR(-ENODEV);
+}
+
+struct gunyah_vm_memory_mapping *gh_vm_mem_mapping_alloc(struct gunyah_vm *ghvm,
+ struct gh_userspace_memory_region *region)
+{
+ phys_addr_t curr_page, prev_page;
+ struct gunyah_vm_memory_mapping *mapping, *tmp_mapping;
+ struct gh_rm_mem_entry *mem_entries;
+ int i, j, pinned, ret = 0;
+ struct gh_rm_mem_parcel *parcel;
+ size_t entry_size;
+ u16 vmid;
+
+ if (!region->memory_size || !PAGE_ALIGNED(region->memory_size) ||
+ !PAGE_ALIGNED(region->userspace_addr))
+ return ERR_PTR(-EINVAL);
+
+ if (!gh_api_has_feature(GH_API_FEATURE_MEMEXTENT))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ ret = mutex_lock_interruptible(&ghvm->mm_lock);
+ if (ret)
+ return ERR_PTR(ret);
+ mapping = __gh_vm_mem_mapping_find(ghvm, region->label);
+ if (mapping) {
+ mutex_unlock(&ghvm->mm_lock);
+ return ERR_PTR(-EEXIST);
+ }
+
+ mapping = kzalloc(sizeof(*mapping), GFP_KERNEL);
+ if (!mapping) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
+
+ mapping->parcel.label = region->label;
+ mapping->guest_phys_addr = region->guest_phys_addr;
+ mapping->npages = region->memory_size >> PAGE_SHIFT;
+ parcel = &mapping->parcel;
+ parcel->mem_handle = GH_MEM_HANDLE_INVAL; /* to be filled later by mem_share/mem_lend */
+ parcel->mem_type = GH_RM_MEM_TYPE_NORMAL;
+
+ /* Check for overlap */
+ list_for_each_entry(tmp_mapping, &ghvm->memory_mappings, list) {
+ if (!((mapping->guest_phys_addr + (mapping->npages << PAGE_SHIFT) <=
+ tmp_mapping->guest_phys_addr) ||
+ (mapping->guest_phys_addr >=
+ tmp_mapping->guest_phys_addr + (tmp_mapping->npages << PAGE_SHIFT)))) {
+ ret = -EEXIST;
+ goto unlock;
+ }
+ }
+
+ list_add(&mapping->list, &ghvm->memory_mappings);
+unlock:
+ mutex_unlock(&ghvm->mm_lock);
+ if (ret)
+ goto free_mapping;
+
+ mapping->pages = kcalloc(mapping->npages, sizeof(*mapping->pages), GFP_KERNEL);
+ if (!mapping->pages) {
+ ret = -ENOMEM;
+ goto reclaim;
+ }
+
+ pinned = pin_user_pages_fast(region->userspace_addr, mapping->npages,
+ FOLL_WRITE | FOLL_LONGTERM, mapping->pages);
+ if (pinned < 0) {
+ ret = pinned;
+ goto reclaim;
+ } else if (pinned != mapping->npages) {
+ ret = -EFAULT;
+ mapping->npages = pinned; /* update npages for reclaim */
+ goto reclaim;
+ }
+
+ if (region->flags & GH_MEM_LENT) {
+ parcel->n_acl_entries = 1;
+ mapping->share_type = VM_MEM_LEND;
+ } else {
+ parcel->n_acl_entries = 2;
+ mapping->share_type = VM_MEM_SHARE;
+ }
+ parcel->acl_entries = kcalloc(parcel->n_acl_entries,
+ sizeof(*parcel->acl_entries),
+ GFP_KERNEL);
+ if (!parcel->acl_entries) {
+ ret = -ENOMEM;
+ goto reclaim;
+ }
+
+ parcel->acl_entries[0].vmid = cpu_to_le16(ghvm->vmid);
+ if (region->flags & GH_MEM_ALLOW_READ)
+ parcel->acl_entries[0].perms |= GH_RM_ACL_R;
+ if (region->flags & GH_MEM_ALLOW_WRITE)
+ parcel->acl_entries[0].perms |= GH_RM_ACL_W;
+ if (region->flags & GH_MEM_ALLOW_EXEC)
+ parcel->acl_entries[0].perms |= GH_RM_ACL_X;
+
+ if (mapping->share_type == VM_MEM_SHARE) {
+ ret = gh_rm_get_vmid(ghvm->rm, &vmid);
+ if (ret) {
+ if (ret > 0) {
+ pr_warn("RM failed to get this VM's VMID: %d", ret);
+ ret = -EINVAL;
+ }
+ goto reclaim;
+ }
+
+ parcel->acl_entries[1].vmid = cpu_to_le16(vmid);
+ /* Host assumed to have all these permissions. Gunyah will not
+ * grant new permissions if host actually had less than RWX
+ */
+ parcel->acl_entries[1].perms |= GH_RM_ACL_R | GH_RM_ACL_W | GH_RM_ACL_X;
+ }
+
+ mem_entries = kcalloc(mapping->npages, sizeof(*mem_entries), GFP_KERNEL);
+ if (!mem_entries) {
+ ret = -ENOMEM;
+ goto reclaim;
+ }
+
+ /* reduce number of entries by combining contiguous pages into single memory entry */
+ prev_page = page_to_phys(mapping->pages[0]);
+ mem_entries[0].ipa_base = cpu_to_le64(prev_page);
+ entry_size = PAGE_SIZE;
+ for (i = 1, j = 0; i < mapping->npages; i++) {
+ curr_page = page_to_phys(mapping->pages[i]);
+ if (page_contiguous(prev_page, curr_page)) {
+ entry_size += PAGE_SIZE;
+ } else {
+ mem_entries[j].size = cpu_to_le64(entry_size);
+ j++;
+ mem_entries[j].ipa_base = cpu_to_le64(curr_page);
+ entry_size = PAGE_SIZE;
+ }
+
+ prev_page = curr_page;
+ }
+ mem_entries[j].size = cpu_to_le64(entry_size);
+
+ parcel->n_mem_entries = j + 1;
+ parcel->mem_entries = kmemdup(mem_entries, sizeof(*mem_entries) * parcel->n_mem_entries,
+ GFP_KERNEL);
+ kfree(mem_entries);
+ if (!parcel->mem_entries) {
+ ret = -ENOMEM;
+ goto reclaim;
+ }
+
+ return mapping;
+reclaim:
+ gh_vm_mem_mapping_reclaim(ghvm, mapping);
+free_mapping:
+ kfree(mapping);
+ return ERR_PTR(ret);
+}
diff --git a/include/uapi/linux/gunyah.h b/include/uapi/linux/gunyah.h
index 88a40d6e0b96..574f33b198d0 100644
--- a/include/uapi/linux/gunyah.h
+++ b/include/uapi/linux/gunyah.h
@@ -20,4 +20,26 @@
*/
#define GH_CREATE_VM _IO(GH_IOCTL_TYPE, 0x0) /* Returns a Gunyah VM fd */
+/*
+ * ioctls for VM fds
+ */
+struct gh_userspace_memory_region {
+ __u32 label;
+#define GH_MEM_ALLOW_READ (1UL << 0)
+#define GH_MEM_ALLOW_WRITE (1UL << 1)
+#define GH_MEM_ALLOW_EXEC (1UL << 2)
+/*
+ * The guest will be lent the memory instead of shared.
+ * In other words, the guest has exclusive access to the memory region and the host loses access.
+ */
+#define GH_MEM_LENT (1UL << 3)
+ __u32 flags;
+ __u64 guest_phys_addr;
+ __u64 memory_size;
+ __u64 userspace_addr;
+};
+
+#define GH_VM_SET_USER_MEM_REGION _IOW(GH_IOCTL_TYPE, 0x1, \
+ struct gh_userspace_memory_region)
+
#endif
--
2.39.0
* Elliot Berman <[email protected]> [2023-01-20 14:46:12]:
> + /* Check for overlap */
> + list_for_each_entry(tmp_mapping, &ghvm->memory_mappings, list) {
> + if (!((mapping->guest_phys_addr + (mapping->npages << PAGE_SHIFT) <=
> + tmp_mapping->guest_phys_addr) ||
> + (mapping->guest_phys_addr >=
> + tmp_mapping->guest_phys_addr + (tmp_mapping->npages << PAGE_SHIFT)))) {
> + ret = -EEXIST;
> + goto unlock;
> + }
> + }
> +
> + list_add(&mapping->list, &ghvm->memory_mappings);
I think the potential race condition described last time is still possible. Pls
check.
> +unlock:
> + mutex_unlock(&ghvm->mm_lock);
> + if (ret)
> + goto free_mapping;
> +
> + mapping->pages = kcalloc(mapping->npages, sizeof(*mapping->pages), GFP_KERNEL);
> + if (!mapping->pages) {
> + ret = -ENOMEM;
> + goto reclaim;
Same comment as last time. Can you check this error path? We seem to call
unpin_user_page() in this path, which is not correct.
> + }
> +
> + pinned = pin_user_pages_fast(region->userspace_addr, mapping->npages,
> + FOLL_WRITE | FOLL_LONGTERM, mapping->pages);
> + if (pinned < 0) {
> + ret = pinned;
> + goto reclaim;
Same comment as above.
On 1/25/2023 5:34 AM, Srivatsa Vaddagiri wrote:
> * Elliot Berman <[email protected]> [2023-01-20 14:46:12]:
>
>> + /* Check for overlap */
>> + list_for_each_entry(tmp_mapping, &ghvm->memory_mappings, list) {
>> + if (!((mapping->guest_phys_addr + (mapping->npages << PAGE_SHIFT) <=
>> + tmp_mapping->guest_phys_addr) ||
>> + (mapping->guest_phys_addr >=
>> + tmp_mapping->guest_phys_addr + (tmp_mapping->npages << PAGE_SHIFT)))) {
>> + ret = -EEXIST;
>> + goto unlock;
>> + }
>> + }
>> +
>> + list_add(&mapping->list, &ghvm->memory_mappings);
>
> I think the potential race condition described last time is still possible. Pls
> check.
>
>> +unlock:
>> + mutex_unlock(&ghvm->mm_lock);
>> + if (ret)
>> + goto free_mapping;
>> +
>> + mapping->pages = kcalloc(mapping->npages, sizeof(*mapping->pages), GFP_KERNEL);
>> + if (!mapping->pages) {
>> + ret = -ENOMEM;
>> + goto reclaim;
>
> Same comment as last time. Can you check this error path? We seem to call
> unpin_user_page() in this path, which is not correct.
>
>> + }
>> +
>> + pinned = pin_user_pages_fast(region->userspace_addr, mapping->npages,
>> + FOLL_WRITE | FOLL_LONGTERM, mapping->pages);
>> + if (pinned < 0) {
>> + ret = pinned;
>> + goto reclaim;
>
> Same comment as above.
>
Caught the race condition you are describing between adding and removing
the memory parcels. Should be fixed for v10.
- Elliot
On 20/01/2023 22:46, Elliot Berman wrote:
> When launching a virtual machine, Gunyah userspace allocates memory for
> the guest and informs Gunyah about these memory regions through
> SET_USER_MEMORY_REGION ioctl.
>
> Co-developed-by: Prakruthi Deepak Heragu <[email protected]>
> Signed-off-by: Prakruthi Deepak Heragu <[email protected]>
> Signed-off-by: Elliot Berman <[email protected]>
> ---
> drivers/virt/gunyah/Makefile | 2 +-
> drivers/virt/gunyah/vm_mgr.c | 46 +++++++
> drivers/virt/gunyah/vm_mgr.h | 28 +++-
> drivers/virt/gunyah/vm_mgr_mm.c | 223 ++++++++++++++++++++++++++++++++
> include/uapi/linux/gunyah.h | 22 ++++
> 5 files changed, 319 insertions(+), 2 deletions(-)
> create mode 100644 drivers/virt/gunyah/vm_mgr_mm.c
>
> diff --git a/drivers/virt/gunyah/Makefile b/drivers/virt/gunyah/Makefile
> index 03951cf82023..ff8bc4925392 100644
> --- a/drivers/virt/gunyah/Makefile
> +++ b/drivers/virt/gunyah/Makefile
> @@ -2,5 +2,5 @@
>
> obj-$(CONFIG_GUNYAH) += gunyah.o
>
> -gunyah_rsc_mgr-y += rsc_mgr.o rsc_mgr_rpc.o vm_mgr.o
> +gunyah_rsc_mgr-y += rsc_mgr.o rsc_mgr_rpc.o vm_mgr.o vm_mgr_mm.o
> obj-$(CONFIG_GUNYAH) += gunyah_rsc_mgr.o
> diff --git a/drivers/virt/gunyah/vm_mgr.c b/drivers/virt/gunyah/vm_mgr.c
> index 0864dbd77e28..b847fde63333 100644
> --- a/drivers/virt/gunyah/vm_mgr.c
> +++ b/drivers/virt/gunyah/vm_mgr.c
> @@ -35,14 +35,55 @@ static __must_check struct gunyah_vm *gunyah_vm_alloc(struct gh_rm_rpc *rm)
> ghvm->vmid = vmid;
> ghvm->rm = rm;
>
> + mutex_init(&ghvm->mm_lock);
> + INIT_LIST_HEAD(&ghvm->memory_mappings);
> +
> return ghvm;
> }
>
> static long gh_vm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
> {
> + struct gunyah_vm *ghvm = filp->private_data;
> + void __user *argp = (void __user *)arg;
> long r;
>
> switch (cmd) {
> + case GH_VM_SET_USER_MEM_REGION: {
> + struct gunyah_vm_memory_mapping *mapping;
> + struct gh_userspace_memory_region region;
> +
> + r = -EFAULT;
> + if (copy_from_user(®ion, argp, sizeof(region)))
> + break;
Why not be explict about the error codes, do something like.
if (copy_from_user(®ion, argp, sizeof(region)))
return -EFAULT;
setting r value everytime before starting any code is making the code
more reader unfriendly.
> +
> + r = -EINVAL;
> + /* All other flag bits are reserved for future use */
> + if (region.flags & ~(GH_MEM_ALLOW_READ | GH_MEM_ALLOW_WRITE | GH_MEM_ALLOW_EXEC |
> + GH_MEM_LENT))
> + break;
> +
> +
> + if (region.memory_size) {
This behaviour allocating memory in presense of valid memory_size and
finding memory in cases of zero size needs to be described properly in
the uapi so that the users are aware of this.
> + r = 0;
> + mapping = gh_vm_mem_mapping_alloc(ghvm, ®ion);
> + if (IS_ERR(mapping)) {
> + r = PTR_ERR(mapping);
> + break;
> + }
> + } else {
> + mapping = gh_vm_mem_mapping_find(ghvm, region.label);
> + if (IS_ERR(mapping)) {
> + r = PTR_ERR(mapping);
> + break;
> + }
> + r = 0;
> + if (!mapping)
> + break;
> + gh_vm_mem_mapping_reclaim(ghvm, mapping);
> + kfree(mapping);
> + }
> + break;
> + }
> default:
> r = -ENOTTY;
> break;
> @@ -54,7 +95,12 @@ static long gh_vm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
> static int gh_vm_release(struct inode *inode, struct file *filp)
> {
> struct gunyah_vm *ghvm = filp->private_data;
> + struct gunyah_vm_memory_mapping *mapping, *tmp;
>
Locking?
> + list_for_each_entry_safe(mapping, tmp, &ghvm->memory_mappings, list) {
> + gh_vm_mem_mapping_reclaim(ghvm, mapping);
> + kfree(mapping);
> + }
> put_gh_rm(ghvm->rm);
> kfree(ghvm);
> return 0;
> diff --git a/drivers/virt/gunyah/vm_mgr.h b/drivers/virt/gunyah/vm_mgr.h
> index e47f34de7f9e..6b38bf780f76 100644
> --- a/drivers/virt/gunyah/vm_mgr.h
> +++ b/drivers/virt/gunyah/vm_mgr.h
> @@ -7,14 +7,40 @@
> #define _GH_PRIV_VM_MGR_H
>
> #include <linux/gunyah_rsc_mgr.h>
> +#include <linux/list.h>
> +#include <linux/miscdevice.h>
> +#include <linux/mutex.h>
>
> #include <uapi/linux/gunyah.h>
>
> long gh_dev_vm_mgr_ioctl(struct gh_rm *rm, unsigned int cmd, unsigned long arg);
>
> +enum gunyah_vm_mem_share_type {
> + VM_MEM_SHARE,
> + VM_MEM_LEND,
> +};
> +
> +struct gunyah_vm_memory_mapping {
> + struct list_head list;
> + enum gunyah_vm_mem_share_type share_type;
> + struct gh_rm_mem_parcel parcel;
> +
> + __u64 guest_phys_addr;
> + struct page **pages;
> + unsigned long npages;
> +};
> +
> struct gunyah_vm {
> u16 vmid;
> - struct gh_rm_rpc *rm;
> + struct gh_rm *rm;
> +
> + struct mutex mm_lock;
> + struct list_head memory_mappings;
> };
>
> +struct gunyah_vm_memory_mapping *gh_vm_mem_mapping_alloc(struct gunyah_vm *ghvm,
> + struct gh_userspace_memory_region *region);
> +void gh_vm_mem_mapping_reclaim(struct gunyah_vm *ghvm, struct gunyah_vm_memory_mapping *mapping);
> +struct gunyah_vm_memory_mapping *gh_vm_mem_mapping_find(struct gunyah_vm *ghvm, u32 label);
> +
> #endif
> diff --git a/drivers/virt/gunyah/vm_mgr_mm.c b/drivers/virt/gunyah/vm_mgr_mm.c
> new file mode 100644
> index 000000000000..f2dbdb4ee8ab
> --- /dev/null
> +++ b/drivers/virt/gunyah/vm_mgr_mm.c
> @@ -0,0 +1,223 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
> + */
> +
> +#define pr_fmt(fmt) "gh_vm_mgr: " fmt
> +
> +#include <linux/gunyah_rsc_mgr.h>
> +#include <linux/mm.h>
> +
> +#include <uapi/linux/gunyah.h>
> +
> +#include "vm_mgr.h"
> +
> +static inline bool page_contiguous(phys_addr_t p, phys_addr_t t)
> +{
> + return t - p == PAGE_SIZE;
> +}
> +
> +static struct gunyah_vm_memory_mapping *__gh_vm_mem_mapping_find(struct gunyah_vm *ghvm, u32 label)
> +{
> + struct gunyah_vm_memory_mapping *mapping;
> +
> +
only one line.
> + list_for_each_entry(mapping, &ghvm->memory_mappings, list)
> + if (mapping->parcel.label == label)
> + return mapping;
> +
> + return NULL;
> +}
> +
> +void gh_vm_mem_mapping_reclaim(struct gunyah_vm *ghvm, struct gunyah_vm_memory_mapping *mapping)
> +{
> + int i, ret = 0;
> +
> + if (mapping->parcel.mem_handle != GH_MEM_HANDLE_INVAL) {
> + ret = gh_rm_mem_reclaim(ghvm->rm, &mapping->parcel);
> + if (ret)
> + pr_warn("Failed to reclaim memory parcel for label %d: %d\n",
> + mapping->parcel.label, ret);
> + }
> +
> + if (!ret)
> + for (i = 0; i < mapping->npages; i++)
> + unpin_user_page(mapping->pages[i]);
> +
> + kfree(mapping->pages);
> + kfree(mapping->parcel.acl_entries);
> + kfree(mapping->parcel.mem_entries);
> +
> + mutex_lock(&ghvm->mm_lock);
> + list_del(&mapping->list);
> + mutex_unlock(&ghvm->mm_lock);
> +}
> +
> +struct gunyah_vm_memory_mapping *gh_vm_mem_mapping_find(struct gunyah_vm *ghvm, u32 label)
> +{
> + struct gunyah_vm_memory_mapping *mapping;
> + int ret;
> +
> + ret = mutex_lock_interruptible(&ghvm->mm_lock);
> + if (ret)
> + return ERR_PTR(ret);
> + mapping = __gh_vm_mem_mapping_find(ghvm, label);
> + mutex_unlock(&ghvm->mm_lock);
> + return mapping ? : ERR_PTR(-ENODEV);
> +}
> +
> +struct gunyah_vm_memory_mapping *gh_vm_mem_mapping_alloc(struct gunyah_vm *ghvm,
> + struct gh_userspace_memory_region *region)
> +{
Is this a static functoin or an exported symbol?
> + phys_addr_t curr_page, prev_page;
> + struct gunyah_vm_memory_mapping *mapping, *tmp_mapping;
> + struct gh_rm_mem_entry *mem_entries;
> + int i, j, pinned, ret = 0;
> + struct gh_rm_mem_parcel *parcel;
> + size_t entry_size;
> + u16 vmid; > +
Reverse christmas tree to sor local variables would be nice.
> + if (!region->memory_size || !PAGE_ALIGNED(region->memory_size) ||
> + !PAGE_ALIGNED(region->userspace_addr))
Even this alignment needs some documentation.
Or why not just let the user only pass number of pages instead of size?
> + return ERR_PTR(-EINVAL);
> +
> + if (!gh_api_has_feature(GH_API_FEATURE_MEMEXTENT))
> + return ERR_PTR(-EOPNOTSUPP);
We should proabably move this as very first check while handling this IOCTL.
> +
> + ret = mutex_lock_interruptible(&ghvm->mm_lock);
> + if (ret)
> + return ERR_PTR(ret);
> + mapping = __gh_vm_mem_mapping_find(ghvm, region->label);
so label is unique and userspace proabably aware of this?
Can we have more than one userspace doing this? and if so how can it
ensure that each label is unique?
> + if (mapping) {
> + mutex_unlock(&ghvm->mm_lock);
> + return ERR_PTR(-EEXIST);
> + }
> +
> + mapping = kzalloc(sizeof(*mapping), GFP_KERNEL);
> + if (!mapping) {
> + ret = -ENOMEM;
> + goto unlock;
> + }
> +
> + mapping->parcel.label = region->label;
> + mapping->guest_phys_addr = region->guest_phys_addr;
> + mapping->npages = region->memory_size >> PAGE_SHIFT;
> + parcel = &mapping->parcel;
> + parcel->mem_handle = GH_MEM_HANDLE_INVAL; /* to be filled later by mem_share/mem_lend */
> + parcel->mem_type = GH_RM_MEM_TYPE_NORMAL;
> +
> + /* Check for overlap */
> + list_for_each_entry(tmp_mapping, &ghvm->memory_mappings, list) {
> + if (!((mapping->guest_phys_addr + (mapping->npages << PAGE_SHIFT) <=
> + tmp_mapping->guest_phys_addr) ||
> + (mapping->guest_phys_addr >=
> + tmp_mapping->guest_phys_addr + (tmp_mapping->npages << PAGE_SHIFT)))) {
> + ret = -EEXIST;
> + goto unlock;
> + }
> + }
This looks like we will loop every mappign for each allocation giving us
an O(n), How frequent and how many max mappings can be there in the system?
> +
> + list_add(&mapping->list, &ghvm->memory_mappings);
> +unlock:
> + mutex_unlock(&ghvm->mm_lock);
> + if (ret)
> + goto free_mapping;
> +
> + mapping->pages = kcalloc(mapping->npages, sizeof(*mapping->pages), GFP_KERNEL);
> + if (!mapping->pages) {
> + ret = -ENOMEM;
> + goto reclaim;
> + }
> +
> + pinned = pin_user_pages_fast(region->userspace_addr, mapping->npages,
> + FOLL_WRITE | FOLL_LONGTERM, mapping->pages);
> + if (pinned < 0) {
> + ret = pinned;
> + goto reclaim;
> + } else if (pinned != mapping->npages) {
> + ret = -EFAULT;
> + mapping->npages = pinned; /* update npages for reclaim */
> + goto reclaim;
> + }
> +
> + if (region->flags & GH_MEM_LENT) {
> + parcel->n_acl_entries = 1;
> + mapping->share_type = VM_MEM_LEND;
> + } else {
> + parcel->n_acl_entries = 2;
> + mapping->share_type = VM_MEM_SHARE;
> + }
> + parcel->acl_entries = kcalloc(parcel->n_acl_entries,
> + sizeof(*parcel->acl_entries),
> + GFP_KERNEL);
> + if (!parcel->acl_entries) {
> + ret = -ENOMEM;
> + goto reclaim;
> + }
> +
> + parcel->acl_entries[0].vmid = cpu_to_le16(ghvm->vmid);
> + if (region->flags & GH_MEM_ALLOW_READ)
> + parcel->acl_entries[0].perms |= GH_RM_ACL_R;
> + if (region->flags & GH_MEM_ALLOW_WRITE)
> + parcel->acl_entries[0].perms |= GH_RM_ACL_W;
> + if (region->flags & GH_MEM_ALLOW_EXEC)
> + parcel->acl_entries[0].perms |= GH_RM_ACL_X;
> +
> + if (mapping->share_type == VM_MEM_SHARE) {
> + ret = gh_rm_get_vmid(ghvm->rm, &vmid);
> + if (ret) {
> + if (ret > 0) {
> + pr_warn("RM failed to get this VM's VMID: %d", ret);
> + ret = -EINVAL;
> + }
> + goto reclaim;
> + }
> +
> + parcel->acl_entries[1].vmid = cpu_to_le16(vmid);
> + /* Host assumed to have all these permissions. Gunyah will not
> + * grant new permissions if host actually had less than RWX
> + */
> + parcel->acl_entries[1].perms |= GH_RM_ACL_R | GH_RM_ACL_W | GH_RM_ACL_X;
> + }
> +
> + mem_entries = kcalloc(mapping->npages, sizeof(*mem_entries), GFP_KERNEL);
> + if (!mem_entries) {
> + ret = -ENOMEM;
> + goto reclaim;
> + }
> +
> + /* reduce number of entries by combining contiguous pages into single memory entry */
> + prev_page = page_to_phys(mapping->pages[0]);
> + mem_entries[0].ipa_base = cpu_to_le64(prev_page);
> + entry_size = PAGE_SIZE;
> + for (i = 1, j = 0; i < mapping->npages; i++) {
> + curr_page = page_to_phys(mapping->pages[i]);
> + if (page_contiguous(prev_page, curr_page)) {
> + entry_size += PAGE_SIZE;
> + } else {
> + mem_entries[j].size = cpu_to_le64(entry_size);
> + j++;
> + mem_entries[j].ipa_base = cpu_to_le64(curr_page);
> + entry_size = PAGE_SIZE;
> + }
> +
> + prev_page = curr_page;
> + }
> + mem_entries[j].size = cpu_to_le64(entry_size);
> +
> + parcel->n_mem_entries = j + 1;
> + parcel->mem_entries = kmemdup(mem_entries, sizeof(*mem_entries) * parcel->n_mem_entries,
> + GFP_KERNEL);
> + kfree(mem_entries);
> + if (!parcel->mem_entries) {
> + ret = -ENOMEM;
> + goto reclaim;
> + }
> +
> + return mapping;
> +reclaim:
> + gh_vm_mem_mapping_reclaim(ghvm, mapping);
> +free_mapping:
> + kfree(mapping);
> + return ERR_PTR(ret);
> +}
> diff --git a/include/uapi/linux/gunyah.h b/include/uapi/linux/gunyah.h
> index 88a40d6e0b96..574f33b198d0 100644
> --- a/include/uapi/linux/gunyah.h
> +++ b/include/uapi/linux/gunyah.h
> @@ -20,4 +20,26 @@
> */
> #define GH_CREATE_VM _IO(GH_IOCTL_TYPE, 0x0) /* Returns a Gunyah VM fd */
>
> +/*
> + * ioctls for VM fds
> + */
> +struct gh_userspace_memory_region {
This struct needs some kernedoc.
> + __u32 label;
> +#define GH_MEM_ALLOW_READ (1UL << 0)
> +#define GH_MEM_ALLOW_WRITE (1UL << 1)
> +#define GH_MEM_ALLOW_EXEC (1UL << 2)
> +/*
> + * The guest will be lent the memory instead of shared.
> + * In other words, the guest has exclusive access to the memory region and the host loses access.
> + */
> +#define GH_MEM_LENT (1UL << 3)
> + __u32 flags;
> + __u64 guest_phys_addr;
> + __u64 memory_size;
if we are only expecting pages, this should probably be make explict by
using nr_pages instead of size
> + __u64 userspace_addr;
> +};
> +
> +#define GH_VM_SET_USER_MEM_REGION _IOW(GH_IOCTL_TYPE, 0x1, \
> + struct gh_userspace_memory_region)
> +
> #endif
On 2/6/2023 8:12 AM, Srinivas Kandagatla wrote:
>
>
> On 20/01/2023 22:46, Elliot Berman wrote:
>> When launching a virtual machine, Gunyah userspace allocates memory for
>> the guest and informs Gunyah about these memory regions through
>> SET_USER_MEMORY_REGION ioctl.
>>
>> Co-developed-by: Prakruthi Deepak Heragu <[email protected]>
>> Signed-off-by: Prakruthi Deepak Heragu <[email protected]>
>> Signed-off-by: Elliot Berman <[email protected]>
>> ---
>> drivers/virt/gunyah/Makefile | 2 +-
>> drivers/virt/gunyah/vm_mgr.c | 46 +++++++
>> drivers/virt/gunyah/vm_mgr.h | 28 +++-
>> drivers/virt/gunyah/vm_mgr_mm.c | 223 ++++++++++++++++++++++++++++++++
>> include/uapi/linux/gunyah.h | 22 ++++
>> 5 files changed, 319 insertions(+), 2 deletions(-)
>> create mode 100644 drivers/virt/gunyah/vm_mgr_mm.c
>>
>> diff --git a/drivers/virt/gunyah/Makefile b/drivers/virt/gunyah/Makefile
>> index 03951cf82023..ff8bc4925392 100644
>> --- a/drivers/virt/gunyah/Makefile
>> +++ b/drivers/virt/gunyah/Makefile
>> @@ -2,5 +2,5 @@
>> obj-$(CONFIG_GUNYAH) += gunyah.o
>> -gunyah_rsc_mgr-y += rsc_mgr.o rsc_mgr_rpc.o vm_mgr.o
>> +gunyah_rsc_mgr-y += rsc_mgr.o rsc_mgr_rpc.o vm_mgr.o vm_mgr_mm.o
>> obj-$(CONFIG_GUNYAH) += gunyah_rsc_mgr.o
>> diff --git a/drivers/virt/gunyah/vm_mgr.c b/drivers/virt/gunyah/vm_mgr.c
>> index 0864dbd77e28..b847fde63333 100644
>> --- a/drivers/virt/gunyah/vm_mgr.c
>> +++ b/drivers/virt/gunyah/vm_mgr.c
>> @@ -35,14 +35,55 @@ static __must_check struct gunyah_vm
>> *gunyah_vm_alloc(struct gh_rm_rpc *rm)
>> ghvm->vmid = vmid;
>> ghvm->rm = rm;
>> + mutex_init(&ghvm->mm_lock);
>> + INIT_LIST_HEAD(&ghvm->memory_mappings);
>> +
>> return ghvm;
>> }
>> static long gh_vm_ioctl(struct file *filp, unsigned int cmd,
>> unsigned long arg)
>> {
>> + struct gunyah_vm *ghvm = filp->private_data;
>> + void __user *argp = (void __user *)arg;
>> long r;
>> switch (cmd) {
>> + case GH_VM_SET_USER_MEM_REGION: {
>> + struct gunyah_vm_memory_mapping *mapping;
>> + struct gh_userspace_memory_region region;
>> +
>> + r = -EFAULT;
>> + if (copy_from_user(®ion, argp, sizeof(region)))
>> + break;
> Why not be explict about the error codes, do something like.
>
> if (copy_from_user(®ion, argp, sizeof(region)))
> return -EFAULT;
>
>
> setting r value everytime before starting any code is making the code
> more reader unfriendly.
>
>
Done.
>> +
>> + r = -EINVAL;
>> + /* All other flag bits are reserved for future use */
>> + if (region.flags & ~(GH_MEM_ALLOW_READ | GH_MEM_ALLOW_WRITE |
>> GH_MEM_ALLOW_EXEC |
>> + GH_MEM_LENT))
>> + break;
>> +
>> +
>> + if (region.memory_size) {
>
> This behaviour allocating memory in presense of valid memory_size and
> finding memory in cases of zero size needs to be described properly in
> the uapi so that the users are aware of this.
>
This behavior is described in "Document Gunyah VM Manager":
https://lore.kernel.org/all/[email protected]/
>> + r = 0;
>> + mapping = gh_vm_mem_mapping_alloc(ghvm, ®ion);
>> + if (IS_ERR(mapping)) {
>> + r = PTR_ERR(mapping);
>> + break;
>> + }
>> + } else {
>> + mapping = gh_vm_mem_mapping_find(ghvm, region.label);
>> + if (IS_ERR(mapping)) {
>> + r = PTR_ERR(mapping);
>> + break;
>> + }
>> + r = 0;
>> + if (!mapping)
>> + break;
>> + gh_vm_mem_mapping_reclaim(ghvm, mapping);
>> + kfree(mapping);
>> + }
>> + break;
>> + }
>> default:
>> r = -ENOTTY;
>> break;
>> @@ -54,7 +95,12 @@ static long gh_vm_ioctl(struct file *filp, unsigned
>> int cmd, unsigned long arg)
>> static int gh_vm_release(struct inode *inode, struct file *filp)
>> {
>> struct gunyah_vm *ghvm = filp->private_data;
>> + struct gunyah_vm_memory_mapping *mapping, *tmp;
> Locking?
>> + list_for_each_entry_safe(mapping, tmp, &ghvm->memory_mappings,
>> list) {
>> + gh_vm_mem_mapping_reclaim(ghvm, mapping);
>> + kfree(mapping);
>> + }
>> put_gh_rm(ghvm->rm);
>> kfree(ghvm);
>> return 0;
>> diff --git a/drivers/virt/gunyah/vm_mgr.h b/drivers/virt/gunyah/vm_mgr.h
>> index e47f34de7f9e..6b38bf780f76 100644
>> --- a/drivers/virt/gunyah/vm_mgr.h
>> +++ b/drivers/virt/gunyah/vm_mgr.h
>> @@ -7,14 +7,40 @@
>> #define _GH_PRIV_VM_MGR_H
>> #include <linux/gunyah_rsc_mgr.h>
>> +#include <linux/list.h>
>> +#include <linux/miscdevice.h>
>> +#include <linux/mutex.h>
>> #include <uapi/linux/gunyah.h>
>> long gh_dev_vm_mgr_ioctl(struct gh_rm *rm, unsigned int cmd,
>> unsigned long arg);
>> +enum gunyah_vm_mem_share_type {
>> + VM_MEM_SHARE,
>> + VM_MEM_LEND,
>> +};
>> +
>> +struct gunyah_vm_memory_mapping {
>> + struct list_head list;
>> + enum gunyah_vm_mem_share_type share_type;
>> + struct gh_rm_mem_parcel parcel;
>> +
>> + __u64 guest_phys_addr;
>> + struct page **pages;
>> + unsigned long npages;
>> +};
>> +
>> struct gunyah_vm {
>> u16 vmid;
>> - struct gh_rm_rpc *rm;
>> + struct gh_rm *rm;
>> +
>> + struct mutex mm_lock;
>> + struct list_head memory_mappings;
>> };
>> +struct gunyah_vm_memory_mapping *gh_vm_mem_mapping_alloc(struct
>> gunyah_vm *ghvm,
>> + struct gh_userspace_memory_region *region);
>> +void gh_vm_mem_mapping_reclaim(struct gunyah_vm *ghvm, struct
>> gunyah_vm_memory_mapping *mapping);
>> +struct gunyah_vm_memory_mapping *gh_vm_mem_mapping_find(struct
>> gunyah_vm *ghvm, u32 label);
>> +
>> #endif
>> diff --git a/drivers/virt/gunyah/vm_mgr_mm.c
>> b/drivers/virt/gunyah/vm_mgr_mm.c
>> new file mode 100644
>> index 000000000000..f2dbdb4ee8ab
>> --- /dev/null
>> +++ b/drivers/virt/gunyah/vm_mgr_mm.c
>> @@ -0,0 +1,223 @@
>> +// SPDX-License-Identifier: GPL-2.0-only
>> +/*
>> + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All
>> rights reserved.
>> + */
>> +
>> +#define pr_fmt(fmt) "gh_vm_mgr: " fmt
>> +
>> +#include <linux/gunyah_rsc_mgr.h>
>> +#include <linux/mm.h>
>> +
>> +#include <uapi/linux/gunyah.h>
>> +
>> +#include "vm_mgr.h"
>> +
>> +static inline bool page_contiguous(phys_addr_t p, phys_addr_t t)
>> +{
>> + return t - p == PAGE_SIZE;
>> +}
>> +
>> +static struct gunyah_vm_memory_mapping
>> *__gh_vm_mem_mapping_find(struct gunyah_vm *ghvm, u32 label)
>> +{
>> + struct gunyah_vm_memory_mapping *mapping;
>> +
>> +
> only one line.
Done.
>> + list_for_each_entry(mapping, &ghvm->memory_mappings, list)
>> + if (mapping->parcel.label == label)
>> + return mapping;
>> +
>> + return NULL;
>> +}
>> +
>> +void gh_vm_mem_mapping_reclaim(struct gunyah_vm *ghvm, struct
>> gunyah_vm_memory_mapping *mapping)
>> +{
>> + int i, ret = 0;
>> +
>> + if (mapping->parcel.mem_handle != GH_MEM_HANDLE_INVAL) {
>> + ret = gh_rm_mem_reclaim(ghvm->rm, &mapping->parcel);
>> + if (ret)
>> + pr_warn("Failed to reclaim memory parcel for label %d:
>> %d\n",
>> + mapping->parcel.label, ret);
>> + }
>> +
>> + if (!ret)
>> + for (i = 0; i < mapping->npages; i++)
>> + unpin_user_page(mapping->pages[i]);
>> +
>> + kfree(mapping->pages);
>> + kfree(mapping->parcel.acl_entries);
>> + kfree(mapping->parcel.mem_entries);
>> +
>> + mutex_lock(&ghvm->mm_lock);
>> + list_del(&mapping->list);
>> + mutex_unlock(&ghvm->mm_lock);
>> +}
>> +
>> +struct gunyah_vm_memory_mapping *gh_vm_mem_mapping_find(struct
>> gunyah_vm *ghvm, u32 label)
>> +{
>> + struct gunyah_vm_memory_mapping *mapping;
>> + int ret;
>> +
>> + ret = mutex_lock_interruptible(&ghvm->mm_lock);
>> + if (ret)
>> + return ERR_PTR(ret);
>> + mapping = __gh_vm_mem_mapping_find(ghvm, label);
>> + mutex_unlock(&ghvm->mm_lock);
>> + return mapping ? : ERR_PTR(-ENODEV);
>> +}
>> +
>> +struct gunyah_vm_memory_mapping *gh_vm_mem_mapping_alloc(struct
>> gunyah_vm *ghvm,
>> + struct gh_userspace_memory_region *region)
>> +{
> Is this a static functoin or an exported symbol?
>
Neither, it's used in vm_mgr.c.
>> + phys_addr_t curr_page, prev_page;
>> + struct gunyah_vm_memory_mapping *mapping, *tmp_mapping;
>> + struct gh_rm_mem_entry *mem_entries;
>> + int i, j, pinned, ret = 0;
>> + struct gh_rm_mem_parcel *parcel;
>> + size_t entry_size;
>> + u16 vmid; > +
> Reverse christmas tree to sor local variables would be nice.
>
Done, and checked throughout the series as well.
>> + if (!region->memory_size || !PAGE_ALIGNED(region->memory_size) ||
>> + !PAGE_ALIGNED(region->userspace_addr))
> Even this alignment needs some documentation.
>
Documented now.
> Or why not just let the user only pass number of pages instead of size?
>
>
KVM, Nitro enclaves, and ACRN all give size directly and not as # of pages.
>> + return ERR_PTR(-EINVAL);
>> +
>> + if (!gh_api_has_feature(GH_API_FEATURE_MEMEXTENT))
>> + return ERR_PTR(-EOPNOTSUPP);
>
> We should proabably move this as very first check while handling this
> IOCTL.
>
Done.
>
>> +
>> + ret = mutex_lock_interruptible(&ghvm->mm_lock);
>> + if (ret)
>> + return ERR_PTR(ret);
>> + mapping = __gh_vm_mem_mapping_find(ghvm, region->label);
>
> so label is unique and userspace proabably aware of this?
> Can we have more than one userspace doing this? and if so how can it
> ensure that each label is unique?
>
The label is unique and userspace is aware of this. One userspace owns
the VM FD.
>
>> + if (mapping) {
>> + mutex_unlock(&ghvm->mm_lock);
>> + return ERR_PTR(-EEXIST);
>> + }
>> +
>> + mapping = kzalloc(sizeof(*mapping), GFP_KERNEL);
>> + if (!mapping) {
>> + ret = -ENOMEM;
>> + goto unlock;
>> + }
>> +
>> + mapping->parcel.label = region->label;
>
>> + mapping->guest_phys_addr = region->guest_phys_addr;
>> + mapping->npages = region->memory_size >> PAGE_SHIFT;
>> + parcel = &mapping->parcel;
>> + parcel->mem_handle = GH_MEM_HANDLE_INVAL; /* to be filled later
>> by mem_share/mem_lend */
>> + parcel->mem_type = GH_RM_MEM_TYPE_NORMAL;
>> +
>> + /* Check for overlap */
>> + list_for_each_entry(tmp_mapping, &ghvm->memory_mappings, list) {
>> + if (!((mapping->guest_phys_addr + (mapping->npages <<
>> PAGE_SHIFT) <=
>> + tmp_mapping->guest_phys_addr) ||
>> + (mapping->guest_phys_addr >=
>> + tmp_mapping->guest_phys_addr + (tmp_mapping->npages <<
>> PAGE_SHIFT)))) {
>> + ret = -EEXIST;
>> + goto unlock;
>> + }
>> + }
> This looks like we will loop every mappign for each allocation giving us
> an O(n), How frequent and how many max mappings can be there in the system?
>
In all our use cases so far, only a few (max 3) mappings are used.
Gunyah and Linux would be similarly bounded by heap-based memory
constraints when adding more memory mappings if you don't hit U32_MAX first.
This could be reworked into a rb tree, but I thought it better to use a
simpler list-based implementation since it was easier to review and
benefits are not seen when only a few mappings used.
[snip]
>> --- a/include/uapi/linux/gunyah.h
>> +++ b/include/uapi/linux/gunyah.h
>> @@ -20,4 +20,26 @@
>> */
>> #define GH_CREATE_VM _IO(GH_IOCTL_TYPE, 0x0) /* Returns a
>> Gunyah VM fd */
>> +/*
>> + * ioctls for VM fds
>> + */
>> +struct gh_userspace_memory_region {
>
> This struct needs some kernedoc.
>
Done, as well for the other UAPI structs added later in the series.
>> + __u32 label;
>
>> +#define GH_MEM_ALLOW_READ (1UL << 0)
>> +#define GH_MEM_ALLOW_WRITE (1UL << 1)
>> +#define GH_MEM_ALLOW_EXEC (1UL << 2)
>> +/*
>> + * The guest will be lent the memory instead of shared.
>> + * In other words, the guest has exclusive access to the memory
>> region and the host loses access.
>> + */
>> +#define GH_MEM_LENT (1UL << 3)
>> + __u32 flags;
>> + __u64 guest_phys_addr;
>> + __u64 memory_size;
> if we are only expecting pages, this should probably be make explict by
> using nr_pages instead of size
>
>> + __u64 userspace_addr;
>> +};
>> +
>> +#define GH_VM_SET_USER_MEM_REGION _IOW(GH_IOCTL_TYPE, 0x1, \
>> + struct gh_userspace_memory_region)
>> +
>> #endif