Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752413AbdLFQKk (ORCPT ); Wed, 6 Dec 2017 11:10:40 -0500 Received: from szxga04-in.huawei.com ([45.249.212.190]:11914 "EHLO szxga04-in.huawei.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751815AbdLFQKi (ORCPT ); Wed, 6 Dec 2017 11:10:38 -0500 From: Shameer Kolothum To: , , CC: , , , Shameer Kolothum Subject: [RFC] vfio/type1: Add IOVA_RANGE capability support Date: Wed, 6 Dec 2017 16:07:36 +0000 Message-ID: <20171206160736.77704-1-shameerali.kolothum.thodi@huawei.com> X-Mailer: git-send-email 2.12.0.windows.1 MIME-Version: 1.0 Content-Type: text/plain X-Originating-IP: [10.202.227.237] X-CFilter-Loop: Reflected X-Mirapoint-Virus-RAPID-Raw: score=unknown(0), refid=str=0001.0A020203.5A28167A.026B,ss=1,re=0.000,recu=0.000,reip=0.000,cl=1,cld=1,fgs=0, ip=0.0.0.0, so=2014-11-16 11:51:01, dmn=2013-03-21 17:37:32 X-Mirapoint-Loop-Id: c1a585e15280aed903112af46643196e Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 7214 Lines: 262 This patch allows the user-space to retrieve the supported IOVA range(s), excluding any reserved regions. The implementation is based on capability chains, added to the VFIO_IOMMU_GET_INFO ioctl. This is following the discussions here[1] and is based on the RFC patch[2]. ToDo: - This currently derives the default supported iova range from the first iommu domain. This needs to be changed to go through the domain_list instead. - Sync with Pierre's patch[3]. 1.https://lists.gnu.org/archive/html/qemu-devel/2017-11/msg03651.html 2.https://lists.linuxfoundation.org/pipermail/iommu/2016-November/019002.html 3.https://patchwork.kernel.org/patch/10084655/ Signed-off-by: Shameer Kolothum --- drivers/vfio/vfio_iommu_type1.c | 172 +++++++++++++++++++++++++++++++++++++++- include/uapi/linux/vfio.h | 13 +++ 2 files changed, 184 insertions(+), 1 deletion(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index e30e29a..72ca78a 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -92,6 +93,12 @@ struct vfio_group { struct list_head next; }; +struct vfio_iommu_iova { + struct list_head list; + phys_addr_t start; + phys_addr_t end; +}; + /* * Guest RAM pinning working set or DMA target */ @@ -1537,6 +1544,144 @@ static int vfio_domains_have_iommu_cache(struct vfio_iommu *iommu) return ret; } +static int vfio_add_iova_cap(struct vfio_info_cap *caps, u64 start, u64 end) +{ + struct vfio_iommu_type1_info_cap_iova_range *cap; + struct vfio_info_cap_header *header; + + header = vfio_info_cap_add(caps, sizeof(*cap), + VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE, 1); + if (IS_ERR(header)) + return PTR_ERR(header); + + cap = container_of(header, + struct vfio_iommu_type1_info_cap_iova_range, + header); + + cap->start = start; + cap->end = end; + + return 0; +} + +static int vfio_insert_iova(phys_addr_t start, phys_addr_t end, + struct list_head *head) +{ + struct vfio_iommu_iova *region; + + region = kzalloc(sizeof(*region), GFP_KERNEL); + if (!region) + return -ENOMEM; + + INIT_LIST_HEAD(®ion->list); + region->start = start; + region->end = end; + + list_add_tail(®ion->list, head); + return 0; +} + +/* + * Check and update iova region list in case a reserved region + * overlaps the iommu iova range. + */ +static int vfio_update_iommu_iova_range(phys_addr_t start, phys_addr_t end, + struct list_head *iova) +{ + struct vfio_iommu_iova *node; + phys_addr_t a, b; + int ret = 0; + + if (list_empty(iova)) + return -ENODEV; + + node = list_last_entry(iova, struct vfio_iommu_iova, list); + a = node->start; + b = node->end; + + /* No overlap */ + if ((start > b) || (end < a)) + return 0; + + if (start > a) + ret = vfio_insert_iova(a, start - 1, &node->list); + if (ret) + goto done; + if (end < b) + ret = vfio_insert_iova(end + 1, b, &node->list); + +done: + list_del(&node->list); + kfree(node); + + return ret; +} + +static int vfio_resv_cmp(void *priv, struct list_head *a, struct list_head *b) +{ + struct iommu_resv_region *ra, *rb; + + ra = container_of(a, struct iommu_resv_region, list); + rb = container_of(b, struct iommu_resv_region, list); + + if (ra->start < rb->start) + return -1; + if (ra->start > rb->start) + return 1; + return 0; +} + +static int vfio_build_iommu_iova_caps(struct vfio_iommu *iommu, + struct vfio_info_cap *caps) +{ + struct iommu_resv_region *resv, *resv_next; + struct vfio_iommu_iova *iova, *iova_next; + struct list_head group_resv_regions, vfio_iova_regions; + struct vfio_domain *domain; + struct vfio_group *g; + phys_addr_t start, end; + int ret = 0; + + domain = list_first_entry(&iommu->domain_list, + struct vfio_domain, next); + /* Get the default iova range supported */ + start = domain->domain->geometry.aperture_start; + end = domain->domain->geometry.aperture_end; + INIT_LIST_HEAD(&vfio_iova_regions); + vfio_insert_iova(start, end, &vfio_iova_regions); + + /* Get reserved regions if any */ + INIT_LIST_HEAD(&group_resv_regions); + list_for_each_entry(g, &domain->group_list, next) + iommu_get_group_resv_regions(g->iommu_group, + &group_resv_regions); + list_sort(NULL, &group_resv_regions, vfio_resv_cmp); + + /* Update iova range excluding reserved regions */ + list_for_each_entry(resv, &group_resv_regions, list) { + ret = vfio_update_iommu_iova_range(resv->start, + resv->start + resv->length - 1, + &vfio_iova_regions); + if (ret) + goto done; + } + + list_for_each_entry(iova, &vfio_iova_regions, list) { + ret = vfio_add_iova_cap(caps, iova->start, iova->end); + if (ret) + goto done; + } + +done: + list_for_each_entry_safe(resv, resv_next, &group_resv_regions, list) + kfree(resv); + + list_for_each_entry_safe(iova, iova_next, &vfio_iova_regions, list) + kfree(iova); + + return ret; +} + static long vfio_iommu_type1_ioctl(void *iommu_data, unsigned int cmd, unsigned long arg) { @@ -1558,8 +1703,10 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, } } else if (cmd == VFIO_IOMMU_GET_INFO) { struct vfio_iommu_type1_info info; + struct vfio_info_cap caps = { .buf = NULL, .size = 0 }; + int ret; - minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes); + minsz = offsetofend(struct vfio_iommu_type1_info, cap_offset); if (copy_from_user(&info, (void __user *)arg, minsz)) return -EFAULT; @@ -1571,6 +1718,29 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, info.iova_pgsizes = vfio_pgsize_bitmap(iommu); + ret = vfio_build_iommu_iova_caps(iommu, &caps); + if (ret) + return ret; + + if (caps.size) { + info.flags |= VFIO_IOMMU_INFO_CAPS; + if (info.argsz < sizeof(info) + caps.size) { + info.argsz = sizeof(info) + caps.size; + info.cap_offset = 0; + } else { + vfio_info_cap_shift(&caps, sizeof(info)); + if (copy_to_user((void __user *)arg + + sizeof(info), caps.buf, + caps.size)) { + kfree(caps.buf); + return -EFAULT; + } + info.cap_offset = sizeof(info); + } + + kfree(caps.buf); + } + return copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0; diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index e3301db..c4e338b 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -517,7 +517,20 @@ struct vfio_iommu_type1_info { __u32 argsz; __u32 flags; #define VFIO_IOMMU_INFO_PGSIZES (1 << 0) /* supported page sizes info */ +#define VFIO_IOMMU_INFO_CAPS (1 << 1) /* Info supports caps */ __u64 iova_pgsizes; /* Bitmap of supported page sizes */ + __u32 cap_offset; /* Offset within info struct of first cap */ + __u32 __resv; +}; + +/* + * The IOVA_RANGE capability allows to report the IOVA range(s), + */ +#define VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE 1 +struct vfio_iommu_type1_info_cap_iova_range { + struct vfio_info_cap_header header; + __u64 start; + __u64 end; }; #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) -- 1.9.1