Currently it's not possible to register kernel buffers with TEE
which are allocated via vmalloc.
Use iov_iter and associated helper functions to manage the page
registration for all type of memories.
Suggested-by: Christoph Hellwig <[email protected]>
Signed-off-by: Arnaud Pouliquen <[email protected]>
---
Update from V3 to V4:
- improve commit message,
- use import_ubuf() instead of iov_iter_init(),
- move shm_get_kernel_pages in register_shm_helper,
- put back untagged_addr in register_shm_helper(),
- move the comment related to pin pages from shm_get_kernel_pages()
to register_shm_helper().
Update from V2 to V3:
- break lines longer than 80 columns.
Update from V1 to V2:
- replace ITER_SOURCE by ITER_DEST flag in tee_shm_register_user_buf(),
- replace IS_ERR_OR NULL(shm) by IS_ERR(shm) in tee_shm_register_user_buf().
V1:
The support of buffer registration allocated with vmalloc is no more
available since c83900393aa1 ("tee: Remove vmalloc page support").
This patch is an alternative to a revert and resulted from a discussion
with Christopher Hellwig [1].
This patch has been tested using xtest tool in optee qemu environment [2]
and using the series related to the remoteproc tee that should be
proposed soon [3].
References:
[1] https://lore.kernel.org/linux-arm-kernel/[email protected]/T/#m8ec683c44fcd9b69c2aee42eaed0793afac9dd18in
[2] https://optee.readthedocs.io/en/latest/building/devices/qemu.html#build-instructions
[3] https://lore.kernel.org/linux-arm-kernel/[email protected]/T/#maca0a1fc897aadd54c7deac432e11473fe970d1d
---
drivers/tee/tee_shm.c | 83 ++++++++++++++++++++++++-------------------
1 file changed, 46 insertions(+), 37 deletions(-)
diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c
index 673cf0359494..ac73e8143233 100644
--- a/drivers/tee/tee_shm.c
+++ b/drivers/tee/tee_shm.c
@@ -22,23 +22,12 @@ static void shm_put_kernel_pages(struct page **pages, size_t page_count)
put_page(pages[n]);
}
-static int shm_get_kernel_pages(unsigned long start, size_t page_count,
- struct page **pages)
+static void shm_get_kernel_pages(struct page **pages, size_t page_count)
{
- struct page *page;
size_t n;
- if (WARN_ON_ONCE(is_vmalloc_addr((void *)start) ||
- is_kmap_addr((void *)start)))
- return -EINVAL;
-
- page = virt_to_page((void *)start);
- for (n = 0; n < page_count; n++) {
- pages[n] = page + n;
+ for (n = 0; n < page_count; n++)
get_page(pages[n]);
- }
-
- return page_count;
}
static void release_registered_pages(struct tee_shm *shm)
@@ -214,13 +203,14 @@ struct tee_shm *tee_shm_alloc_priv_buf(struct tee_context *ctx, size_t size)
EXPORT_SYMBOL_GPL(tee_shm_alloc_priv_buf);
static struct tee_shm *
-register_shm_helper(struct tee_context *ctx, unsigned long addr,
- size_t length, u32 flags, int id)
+register_shm_helper(struct tee_context *ctx, struct iov_iter *iter, u32 flags,
+ int id)
{
struct tee_device *teedev = ctx->teedev;
struct tee_shm *shm;
- unsigned long start;
- size_t num_pages;
+ unsigned long start, addr;
+ size_t num_pages, off;
+ ssize_t len;
void *ret;
int rc;
@@ -245,31 +235,38 @@ register_shm_helper(struct tee_context *ctx, unsigned long addr,
shm->flags = flags;
shm->ctx = ctx;
shm->id = id;
- addr = untagged_addr(addr);
+ addr = untagged_addr((unsigned long)iter_iov_addr(iter));
start = rounddown(addr, PAGE_SIZE);
- shm->offset = addr - start;
- shm->size = length;
- num_pages = (roundup(addr + length, PAGE_SIZE) - start) / PAGE_SIZE;
+ num_pages = iov_iter_npages(iter, INT_MAX);
+ if (!num_pages) {
+ ret = ERR_PTR(-ENOMEM);
+ goto err_ctx_put;
+ }
+
shm->pages = kcalloc(num_pages, sizeof(*shm->pages), GFP_KERNEL);
if (!shm->pages) {
ret = ERR_PTR(-ENOMEM);
goto err_free_shm;
}
- if (flags & TEE_SHM_USER_MAPPED)
- rc = pin_user_pages_fast(start, num_pages, FOLL_WRITE,
- shm->pages);
- else
- rc = shm_get_kernel_pages(start, num_pages, shm->pages);
- if (rc > 0)
- shm->num_pages = rc;
- if (rc != num_pages) {
- if (rc >= 0)
- rc = -ENOMEM;
- ret = ERR_PTR(rc);
- goto err_put_shm_pages;
+ len = iov_iter_extract_pages(iter, &shm->pages, LONG_MAX, num_pages, 0,
+ &off);
+ if (unlikely(len <= 0)) {
+ ret = len ? ERR_PTR(len) : ERR_PTR(-ENOMEM);
+ goto err_free_shm_pages;
}
+ /*
+ * iov_iter_extract_kvec_pages does not get reference on the pages,
+ * get a pin on them.
+ */
+ if (iov_iter_is_kvec(iter))
+ shm_get_kernel_pages(shm->pages, num_pages);
+
+ shm->offset = off;
+ shm->size = len;
+ shm->num_pages = num_pages;
+
rc = teedev->desc->ops->shm_register(ctx, shm, shm->pages,
shm->num_pages, start);
if (rc) {
@@ -279,10 +276,11 @@ register_shm_helper(struct tee_context *ctx, unsigned long addr,
return shm;
err_put_shm_pages:
- if (flags & TEE_SHM_USER_MAPPED)
+ if (!iov_iter_is_kvec(iter))
unpin_user_pages(shm->pages, shm->num_pages);
else
shm_put_kernel_pages(shm->pages, shm->num_pages);
+err_free_shm_pages:
kfree(shm->pages);
err_free_shm:
kfree(shm);
@@ -307,8 +305,9 @@ struct tee_shm *tee_shm_register_user_buf(struct tee_context *ctx,
u32 flags = TEE_SHM_USER_MAPPED | TEE_SHM_DYNAMIC;
struct tee_device *teedev = ctx->teedev;
struct tee_shm *shm;
+ struct iov_iter iter;
void *ret;
- int id;
+ int id, err;
if (!access_ok((void __user *)addr, length))
return ERR_PTR(-EFAULT);
@@ -319,7 +318,11 @@ struct tee_shm *tee_shm_register_user_buf(struct tee_context *ctx,
if (id < 0)
return ERR_PTR(id);
- shm = register_shm_helper(ctx, addr, length, flags, id);
+ err = import_ubuf(ITER_DEST, (void __user *)addr, length, &iter);
+ if (err)
+ return ERR_PTR(err);
+
+ shm = register_shm_helper(ctx, &iter, flags, id);
if (IS_ERR(shm)) {
mutex_lock(&teedev->mutex);
idr_remove(&teedev->idr, id);
@@ -352,8 +355,14 @@ struct tee_shm *tee_shm_register_kernel_buf(struct tee_context *ctx,
void *addr, size_t length)
{
u32 flags = TEE_SHM_DYNAMIC;
+ struct kvec kvec;
+ struct iov_iter iter;
+
+ kvec.iov_base = addr;
+ kvec.iov_len = length;
+ iov_iter_kvec(&iter, ITER_DEST, &kvec, 1, length);
- return register_shm_helper(ctx, (unsigned long)addr, length, flags, -1);
+ return register_shm_helper(ctx, &iter, flags, -1);
}
EXPORT_SYMBOL_GPL(tee_shm_register_kernel_buf);
--
2.25.1
On Wed, 29 Nov 2023 at 22:15, Arnaud Pouliquen
<[email protected]> wrote:
>
> Currently it's not possible to register kernel buffers with TEE
> which are allocated via vmalloc.
>
> Use iov_iter and associated helper functions to manage the page
> registration for all type of memories.
>
> Suggested-by: Christoph Hellwig <[email protected]>
> Signed-off-by: Arnaud Pouliquen <[email protected]>
> ---
> Update from V3 to V4:
> - improve commit message,
> - use import_ubuf() instead of iov_iter_init(),
> - move shm_get_kernel_pages in register_shm_helper,
> - put back untagged_addr in register_shm_helper(),
> - move the comment related to pin pages from shm_get_kernel_pages()
> to register_shm_helper().
>
> Update from V2 to V3:
> - break lines longer than 80 columns.
>
> Update from V1 to V2:
> - replace ITER_SOURCE by ITER_DEST flag in tee_shm_register_user_buf(),
> - replace IS_ERR_OR NULL(shm) by IS_ERR(shm) in tee_shm_register_user_buf().
>
> V1:
> The support of buffer registration allocated with vmalloc is no more
> available since c83900393aa1 ("tee: Remove vmalloc page support").
>
> This patch is an alternative to a revert and resulted from a discussion
> with Christopher Hellwig [1].
>
> This patch has been tested using xtest tool in optee qemu environment [2]
> and using the series related to the remoteproc tee that should be
> proposed soon [3].
>
> References:
> [1] https://lore.kernel.org/linux-arm-kernel/[email protected]/T/#m8ec683c44fcd9b69c2aee42eaed0793afac9dd18in
> [2] https://optee.readthedocs.io/en/latest/building/devices/qemu.html#build-instructions
> [3] https://lore.kernel.org/linux-arm-kernel/[email protected]/T/#maca0a1fc897aadd54c7deac432e11473fe970d1d
> ---
> drivers/tee/tee_shm.c | 83 ++++++++++++++++++++++++-------------------
> 1 file changed, 46 insertions(+), 37 deletions(-)
>
> diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c
> index 673cf0359494..ac73e8143233 100644
> --- a/drivers/tee/tee_shm.c
> +++ b/drivers/tee/tee_shm.c
> @@ -22,23 +22,12 @@ static void shm_put_kernel_pages(struct page **pages, size_t page_count)
> put_page(pages[n]);
> }
>
> -static int shm_get_kernel_pages(unsigned long start, size_t page_count,
> - struct page **pages)
> +static void shm_get_kernel_pages(struct page **pages, size_t page_count)
> {
> - struct page *page;
> size_t n;
>
> - if (WARN_ON_ONCE(is_vmalloc_addr((void *)start) ||
> - is_kmap_addr((void *)start)))
> - return -EINVAL;
> -
> - page = virt_to_page((void *)start);
> - for (n = 0; n < page_count; n++) {
> - pages[n] = page + n;
> + for (n = 0; n < page_count; n++)
> get_page(pages[n]);
> - }
> -
> - return page_count;
> }
>
> static void release_registered_pages(struct tee_shm *shm)
> @@ -214,13 +203,14 @@ struct tee_shm *tee_shm_alloc_priv_buf(struct tee_context *ctx, size_t size)
> EXPORT_SYMBOL_GPL(tee_shm_alloc_priv_buf);
>
> static struct tee_shm *
> -register_shm_helper(struct tee_context *ctx, unsigned long addr,
> - size_t length, u32 flags, int id)
> +register_shm_helper(struct tee_context *ctx, struct iov_iter *iter, u32 flags,
> + int id)
> {
> struct tee_device *teedev = ctx->teedev;
> struct tee_shm *shm;
> - unsigned long start;
> - size_t num_pages;
> + unsigned long start, addr;
> + size_t num_pages, off;
> + ssize_t len;
> void *ret;
> int rc;
>
> @@ -245,31 +235,38 @@ register_shm_helper(struct tee_context *ctx, unsigned long addr,
> shm->flags = flags;
> shm->ctx = ctx;
> shm->id = id;
> - addr = untagged_addr(addr);
> + addr = untagged_addr((unsigned long)iter_iov_addr(iter));
> start = rounddown(addr, PAGE_SIZE);
> - shm->offset = addr - start;
> - shm->size = length;
> - num_pages = (roundup(addr + length, PAGE_SIZE) - start) / PAGE_SIZE;
> + num_pages = iov_iter_npages(iter, INT_MAX);
> + if (!num_pages) {
> + ret = ERR_PTR(-ENOMEM);
> + goto err_ctx_put;
> + }
> +
> shm->pages = kcalloc(num_pages, sizeof(*shm->pages), GFP_KERNEL);
> if (!shm->pages) {
> ret = ERR_PTR(-ENOMEM);
> goto err_free_shm;
> }
>
> - if (flags & TEE_SHM_USER_MAPPED)
> - rc = pin_user_pages_fast(start, num_pages, FOLL_WRITE,
> - shm->pages);
> - else
> - rc = shm_get_kernel_pages(start, num_pages, shm->pages);
> - if (rc > 0)
> - shm->num_pages = rc;
> - if (rc != num_pages) {
> - if (rc >= 0)
> - rc = -ENOMEM;
> - ret = ERR_PTR(rc);
> - goto err_put_shm_pages;
> + len = iov_iter_extract_pages(iter, &shm->pages, LONG_MAX, num_pages, 0,
> + &off);
> + if (unlikely(len <= 0)) {
> + ret = len ? ERR_PTR(len) : ERR_PTR(-ENOMEM);
> + goto err_free_shm_pages;
> }
>
> + /*
> + * iov_iter_extract_kvec_pages does not get reference on the pages,
> + * get a pin on them.
I think you meant: "get a reference on them". But I don't see the
value of this comment since iov_iter_extract_kvec_pages() already has
been commented properly as follows:
/*
* Extract a list of virtually contiguous pages from an ITER_KVEC iterator.
* This does not get references on the pages, nor does it get a pin on them.
*/
> + */
> + if (iov_iter_is_kvec(iter))
> + shm_get_kernel_pages(shm->pages, num_pages);
> +
> + shm->offset = off;
> + shm->size = len;
> + shm->num_pages = num_pages;
> +
> rc = teedev->desc->ops->shm_register(ctx, shm, shm->pages,
> shm->num_pages, start);
> if (rc) {
> @@ -279,10 +276,11 @@ register_shm_helper(struct tee_context *ctx, unsigned long addr,
>
> return shm;
> err_put_shm_pages:
> - if (flags & TEE_SHM_USER_MAPPED)
> + if (!iov_iter_is_kvec(iter))
> unpin_user_pages(shm->pages, shm->num_pages);
> else
> shm_put_kernel_pages(shm->pages, shm->num_pages);
> +err_free_shm_pages:
> kfree(shm->pages);
> err_free_shm:
> kfree(shm);
> @@ -307,8 +305,9 @@ struct tee_shm *tee_shm_register_user_buf(struct tee_context *ctx,
> u32 flags = TEE_SHM_USER_MAPPED | TEE_SHM_DYNAMIC;
> struct tee_device *teedev = ctx->teedev;
> struct tee_shm *shm;
> + struct iov_iter iter;
> void *ret;
> - int id;
> + int id, err;
>
> if (!access_ok((void __user *)addr, length))
> return ERR_PTR(-EFAULT);
> @@ -319,7 +318,11 @@ struct tee_shm *tee_shm_register_user_buf(struct tee_context *ctx,
> if (id < 0)
> return ERR_PTR(id);
>
> - shm = register_shm_helper(ctx, addr, length, flags, id);
> + err = import_ubuf(ITER_DEST, (void __user *)addr, length, &iter);
As I mentioned in a previous review, import_ubuf() already does the
access_ok() check, so we don't need the extra access_ok() check above.
Also, you should move import_ubuf() to be the first invocation within
this API.
-Sumit
> + if (err)
> + return ERR_PTR(err);
> +
> + shm = register_shm_helper(ctx, &iter, flags, id);
> if (IS_ERR(shm)) {
> mutex_lock(&teedev->mutex);
> idr_remove(&teedev->idr, id);
> @@ -352,8 +355,14 @@ struct tee_shm *tee_shm_register_kernel_buf(struct tee_context *ctx,
> void *addr, size_t length)
> {
> u32 flags = TEE_SHM_DYNAMIC;
> + struct kvec kvec;
> + struct iov_iter iter;
> +
> + kvec.iov_base = addr;
> + kvec.iov_len = length;
> + iov_iter_kvec(&iter, ITER_DEST, &kvec, 1, length);
>
> - return register_shm_helper(ctx, (unsigned long)addr, length, flags, -1);
> + return register_shm_helper(ctx, &iter, flags, -1);
> }
> EXPORT_SYMBOL_GPL(tee_shm_register_kernel_buf);
>
> --
> 2.25.1
>
On 11/30/23 08:54, Sumit Garg wrote:
> On Wed, 29 Nov 2023 at 22:15, Arnaud Pouliquen
> <[email protected]> wrote:
>>
>> Currently it's not possible to register kernel buffers with TEE
>> which are allocated via vmalloc.
>>
>> Use iov_iter and associated helper functions to manage the page
>> registration for all type of memories.
>>
>> Suggested-by: Christoph Hellwig <[email protected]>
>> Signed-off-by: Arnaud Pouliquen <[email protected]>
>> ---
>> Update from V3 to V4:
>> - improve commit message,
>> - use import_ubuf() instead of iov_iter_init(),
>> - move shm_get_kernel_pages in register_shm_helper,
>> - put back untagged_addr in register_shm_helper(),
>> - move the comment related to pin pages from shm_get_kernel_pages()
>> to register_shm_helper().
>>
>> Update from V2 to V3:
>> - break lines longer than 80 columns.
>>
>> Update from V1 to V2:
>> - replace ITER_SOURCE by ITER_DEST flag in tee_shm_register_user_buf(),
>> - replace IS_ERR_OR NULL(shm) by IS_ERR(shm) in tee_shm_register_user_buf().
>>
>> V1:
>> The support of buffer registration allocated with vmalloc is no more
>> available since c83900393aa1 ("tee: Remove vmalloc page support").
>>
>> This patch is an alternative to a revert and resulted from a discussion
>> with Christopher Hellwig [1].
>>
>> This patch has been tested using xtest tool in optee qemu environment [2]
>> and using the series related to the remoteproc tee that should be
>> proposed soon [3].
>>
>> References:
>> [1] https://lore.kernel.org/linux-arm-kernel/[email protected]/T/#m8ec683c44fcd9b69c2aee42eaed0793afac9dd18in
>> [2] https://optee.readthedocs.io/en/latest/building/devices/qemu.html#build-instructions
>> [3] https://lore.kernel.org/linux-arm-kernel/[email protected]/T/#maca0a1fc897aadd54c7deac432e11473fe970d1d
>> ---
>> drivers/tee/tee_shm.c | 83 ++++++++++++++++++++++++-------------------
>> 1 file changed, 46 insertions(+), 37 deletions(-)
>>
>> diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c
>> index 673cf0359494..ac73e8143233 100644
>> --- a/drivers/tee/tee_shm.c
>> +++ b/drivers/tee/tee_shm.c
>> @@ -22,23 +22,12 @@ static void shm_put_kernel_pages(struct page **pages, size_t page_count)
>> put_page(pages[n]);
>> }
>>
>> -static int shm_get_kernel_pages(unsigned long start, size_t page_count,
>> - struct page **pages)
>> +static void shm_get_kernel_pages(struct page **pages, size_t page_count)
>> {
>> - struct page *page;
>> size_t n;
>>
>> - if (WARN_ON_ONCE(is_vmalloc_addr((void *)start) ||
>> - is_kmap_addr((void *)start)))
>> - return -EINVAL;
>> -
>> - page = virt_to_page((void *)start);
>> - for (n = 0; n < page_count; n++) {
>> - pages[n] = page + n;
>> + for (n = 0; n < page_count; n++)
>> get_page(pages[n]);
>> - }
>> -
>> - return page_count;
>> }
>>
>> static void release_registered_pages(struct tee_shm *shm)
>> @@ -214,13 +203,14 @@ struct tee_shm *tee_shm_alloc_priv_buf(struct tee_context *ctx, size_t size)
>> EXPORT_SYMBOL_GPL(tee_shm_alloc_priv_buf);
>>
>> static struct tee_shm *
>> -register_shm_helper(struct tee_context *ctx, unsigned long addr,
>> - size_t length, u32 flags, int id)
>> +register_shm_helper(struct tee_context *ctx, struct iov_iter *iter, u32 flags,
>> + int id)
>> {
>> struct tee_device *teedev = ctx->teedev;
>> struct tee_shm *shm;
>> - unsigned long start;
>> - size_t num_pages;
>> + unsigned long start, addr;
>> + size_t num_pages, off;
>> + ssize_t len;
>> void *ret;
>> int rc;
>>
>> @@ -245,31 +235,38 @@ register_shm_helper(struct tee_context *ctx, unsigned long addr,
>> shm->flags = flags;
>> shm->ctx = ctx;
>> shm->id = id;
>> - addr = untagged_addr(addr);
>> + addr = untagged_addr((unsigned long)iter_iov_addr(iter));
>> start = rounddown(addr, PAGE_SIZE);
>> - shm->offset = addr - start;
>> - shm->size = length;
>> - num_pages = (roundup(addr + length, PAGE_SIZE) - start) / PAGE_SIZE;
>> + num_pages = iov_iter_npages(iter, INT_MAX);
>> + if (!num_pages) {
>> + ret = ERR_PTR(-ENOMEM);
>> + goto err_ctx_put;
>> + }
>> +
>> shm->pages = kcalloc(num_pages, sizeof(*shm->pages), GFP_KERNEL);
>> if (!shm->pages) {
>> ret = ERR_PTR(-ENOMEM);
>> goto err_free_shm;
>> }
>>
>> - if (flags & TEE_SHM_USER_MAPPED)
>> - rc = pin_user_pages_fast(start, num_pages, FOLL_WRITE,
>> - shm->pages);
>> - else
>> - rc = shm_get_kernel_pages(start, num_pages, shm->pages);
>> - if (rc > 0)
>> - shm->num_pages = rc;
>> - if (rc != num_pages) {
>> - if (rc >= 0)
>> - rc = -ENOMEM;
>> - ret = ERR_PTR(rc);
>> - goto err_put_shm_pages;
>> + len = iov_iter_extract_pages(iter, &shm->pages, LONG_MAX, num_pages, 0,
>> + &off);
>> + if (unlikely(len <= 0)) {
>> + ret = len ? ERR_PTR(len) : ERR_PTR(-ENOMEM);
>> + goto err_free_shm_pages;
>> }
>>
>> + /*
>> + * iov_iter_extract_kvec_pages does not get reference on the pages,
>> + * get a pin on them.
>
> I think you meant: "get a reference on them". But I don't see the
> value of this comment since iov_iter_extract_kvec_pages() already has
> been commented properly as follows:
>
> /*
> * Extract a list of virtually contiguous pages from an ITER_KVEC iterator.
> * This does not get references on the pages, nor does it get a pin on them.
> */
>
I spent some time debugging this part. Since we use the same API for both user
and kernel buffers, we wouldn't expect to have any specific actions to take.
Therefore, I thought it would be helpful to add a comment explaining the reason
for this specific code, rather than go deeper into iov_iter to understand it.
But if you don't see the value, I can remove the comment.
>> + */
>> + if (iov_iter_is_kvec(iter))
>> + shm_get_kernel_pages(shm->pages, num_pages);
>> +
>> + shm->offset = off;
>> + shm->size = len;
>> + shm->num_pages = num_pages;
>> +
>> rc = teedev->desc->ops->shm_register(ctx, shm, shm->pages,
>> shm->num_pages, start);
>> if (rc) {
>> @@ -279,10 +276,11 @@ register_shm_helper(struct tee_context *ctx, unsigned long addr,
>>
>> return shm;
>> err_put_shm_pages:
>> - if (flags & TEE_SHM_USER_MAPPED)
>> + if (!iov_iter_is_kvec(iter))
>> unpin_user_pages(shm->pages, shm->num_pages);
>> else
>> shm_put_kernel_pages(shm->pages, shm->num_pages);
>> +err_free_shm_pages:
>> kfree(shm->pages);
>> err_free_shm:
>> kfree(shm);
>> @@ -307,8 +305,9 @@ struct tee_shm *tee_shm_register_user_buf(struct tee_context *ctx,
>> u32 flags = TEE_SHM_USER_MAPPED | TEE_SHM_DYNAMIC;
>> struct tee_device *teedev = ctx->teedev;
>> struct tee_shm *shm;
>> + struct iov_iter iter;
>> void *ret;
>> - int id;
>> + int id, err;
>>
>> if (!access_ok((void __user *)addr, length))
>> return ERR_PTR(-EFAULT);
>> @@ -319,7 +318,11 @@ struct tee_shm *tee_shm_register_user_buf(struct tee_context *ctx,
>> if (id < 0)
>> return ERR_PTR(id);
>>
>> - shm = register_shm_helper(ctx, addr, length, flags, id);
>> + err = import_ubuf(ITER_DEST, (void __user *)addr, length, &iter);
>
> As I mentioned in a previous review, import_ubuf() already does the
> access_ok() check, so we don't need the extra access_ok() check above.
> Also, you should move import_ubuf() to be the first invocation within
> this API.
My apologies, I re-added import_ubuf() during testing to debug an issue and
forgot to
remove it afterwards.
Thanks and regards,
Arnaud
>
> -Sumit
>
>> + if (err)
>> + return ERR_PTR(err);
>> +
>> + shm = register_shm_helper(ctx, &iter, flags, id);
>> if (IS_ERR(shm)) {
>> mutex_lock(&teedev->mutex);
>> idr_remove(&teedev->idr, id);
>> @@ -352,8 +355,14 @@ struct tee_shm *tee_shm_register_kernel_buf(struct tee_context *ctx,
>> void *addr, size_t length)
>> {
>> u32 flags = TEE_SHM_DYNAMIC;
>> + struct kvec kvec;
>> + struct iov_iter iter;
>> +
>> + kvec.iov_base = addr;
>> + kvec.iov_len = length;
>> + iov_iter_kvec(&iter, ITER_DEST, &kvec, 1, length);
>>
>> - return register_shm_helper(ctx, (unsigned long)addr, length, flags, -1);
>> + return register_shm_helper(ctx, &iter, flags, -1);
>> }
>> EXPORT_SYMBOL_GPL(tee_shm_register_kernel_buf);
>>
>> --
>> 2.25.1
>>
On Thu, 30 Nov 2023 at 14:38, Arnaud POULIQUEN
<[email protected]> wrote:
>
>
>
> On 11/30/23 08:54, Sumit Garg wrote:
> > On Wed, 29 Nov 2023 at 22:15, Arnaud Pouliquen
> > <[email protected]> wrote:
> >>
> >> Currently it's not possible to register kernel buffers with TEE
> >> which are allocated via vmalloc.
> >>
> >> Use iov_iter and associated helper functions to manage the page
> >> registration for all type of memories.
> >>
> >> Suggested-by: Christoph Hellwig <[email protected]>
> >> Signed-off-by: Arnaud Pouliquen <[email protected]>
> >> ---
> >> Update from V3 to V4:
> >> - improve commit message,
> >> - use import_ubuf() instead of iov_iter_init(),
> >> - move shm_get_kernel_pages in register_shm_helper,
> >> - put back untagged_addr in register_shm_helper(),
> >> - move the comment related to pin pages from shm_get_kernel_pages()
> >> to register_shm_helper().
> >>
> >> Update from V2 to V3:
> >> - break lines longer than 80 columns.
> >>
> >> Update from V1 to V2:
> >> - replace ITER_SOURCE by ITER_DEST flag in tee_shm_register_user_buf(),
> >> - replace IS_ERR_OR NULL(shm) by IS_ERR(shm) in tee_shm_register_user_buf().
> >>
> >> V1:
> >> The support of buffer registration allocated with vmalloc is no more
> >> available since c83900393aa1 ("tee: Remove vmalloc page support").
> >>
> >> This patch is an alternative to a revert and resulted from a discussion
> >> with Christopher Hellwig [1].
> >>
> >> This patch has been tested using xtest tool in optee qemu environment [2]
> >> and using the series related to the remoteproc tee that should be
> >> proposed soon [3].
> >>
> >> References:
> >> [1] https://lore.kernel.org/linux-arm-kernel/[email protected]/T/#m8ec683c44fcd9b69c2aee42eaed0793afac9dd18in
> >> [2] https://optee.readthedocs.io/en/latest/building/devices/qemu.html#build-instructions
> >> [3] https://lore.kernel.org/linux-arm-kernel/[email protected]/T/#maca0a1fc897aadd54c7deac432e11473fe970d1d
> >> ---
> >> drivers/tee/tee_shm.c | 83 ++++++++++++++++++++++++-------------------
> >> 1 file changed, 46 insertions(+), 37 deletions(-)
> >>
> >> diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c
> >> index 673cf0359494..ac73e8143233 100644
> >> --- a/drivers/tee/tee_shm.c
> >> +++ b/drivers/tee/tee_shm.c
> >> @@ -22,23 +22,12 @@ static void shm_put_kernel_pages(struct page **pages, size_t page_count)
> >> put_page(pages[n]);
> >> }
> >>
> >> -static int shm_get_kernel_pages(unsigned long start, size_t page_count,
> >> - struct page **pages)
> >> +static void shm_get_kernel_pages(struct page **pages, size_t page_count)
> >> {
> >> - struct page *page;
> >> size_t n;
> >>
> >> - if (WARN_ON_ONCE(is_vmalloc_addr((void *)start) ||
> >> - is_kmap_addr((void *)start)))
> >> - return -EINVAL;
> >> -
> >> - page = virt_to_page((void *)start);
> >> - for (n = 0; n < page_count; n++) {
> >> - pages[n] = page + n;
> >> + for (n = 0; n < page_count; n++)
> >> get_page(pages[n]);
> >> - }
> >> -
> >> - return page_count;
> >> }
> >>
> >> static void release_registered_pages(struct tee_shm *shm)
> >> @@ -214,13 +203,14 @@ struct tee_shm *tee_shm_alloc_priv_buf(struct tee_context *ctx, size_t size)
> >> EXPORT_SYMBOL_GPL(tee_shm_alloc_priv_buf);
> >>
> >> static struct tee_shm *
> >> -register_shm_helper(struct tee_context *ctx, unsigned long addr,
> >> - size_t length, u32 flags, int id)
> >> +register_shm_helper(struct tee_context *ctx, struct iov_iter *iter, u32 flags,
> >> + int id)
> >> {
> >> struct tee_device *teedev = ctx->teedev;
> >> struct tee_shm *shm;
> >> - unsigned long start;
> >> - size_t num_pages;
> >> + unsigned long start, addr;
> >> + size_t num_pages, off;
> >> + ssize_t len;
> >> void *ret;
> >> int rc;
> >>
> >> @@ -245,31 +235,38 @@ register_shm_helper(struct tee_context *ctx, unsigned long addr,
> >> shm->flags = flags;
> >> shm->ctx = ctx;
> >> shm->id = id;
> >> - addr = untagged_addr(addr);
> >> + addr = untagged_addr((unsigned long)iter_iov_addr(iter));
> >> start = rounddown(addr, PAGE_SIZE);
> >> - shm->offset = addr - start;
> >> - shm->size = length;
> >> - num_pages = (roundup(addr + length, PAGE_SIZE) - start) / PAGE_SIZE;
> >> + num_pages = iov_iter_npages(iter, INT_MAX);
> >> + if (!num_pages) {
> >> + ret = ERR_PTR(-ENOMEM);
> >> + goto err_ctx_put;
> >> + }
> >> +
> >> shm->pages = kcalloc(num_pages, sizeof(*shm->pages), GFP_KERNEL);
> >> if (!shm->pages) {
> >> ret = ERR_PTR(-ENOMEM);
> >> goto err_free_shm;
> >> }
> >>
> >> - if (flags & TEE_SHM_USER_MAPPED)
> >> - rc = pin_user_pages_fast(start, num_pages, FOLL_WRITE,
> >> - shm->pages);
> >> - else
> >> - rc = shm_get_kernel_pages(start, num_pages, shm->pages);
> >> - if (rc > 0)
> >> - shm->num_pages = rc;
> >> - if (rc != num_pages) {
> >> - if (rc >= 0)
> >> - rc = -ENOMEM;
> >> - ret = ERR_PTR(rc);
> >> - goto err_put_shm_pages;
> >> + len = iov_iter_extract_pages(iter, &shm->pages, LONG_MAX, num_pages, 0,
> >> + &off);
> >> + if (unlikely(len <= 0)) {
> >> + ret = len ? ERR_PTR(len) : ERR_PTR(-ENOMEM);
> >> + goto err_free_shm_pages;
> >> }
> >>
> >> + /*
> >> + * iov_iter_extract_kvec_pages does not get reference on the pages,
> >> + * get a pin on them.
> >
> > I think you meant: "get a reference on them". But I don't see the
> > value of this comment since iov_iter_extract_kvec_pages() already has
> > been commented properly as follows:
> >
> > /*
> > * Extract a list of virtually contiguous pages from an ITER_KVEC iterator.
> > * This does not get references on the pages, nor does it get a pin on them.
> > */
> >
>
> I spent some time debugging this part. Since we use the same API for both user
> and kernel buffers, we wouldn't expect to have any specific actions to take.
> Therefore, I thought it would be helpful to add a comment explaining the reason
> for this specific code, rather than go deeper into iov_iter to understand it.
>
Fair enough, let's keep it with s/pin/reference/.
> But if you don't see the value, I can remove the comment.
>
> >> + */
> >> + if (iov_iter_is_kvec(iter))
> >> + shm_get_kernel_pages(shm->pages, num_pages);
> >> +
> >> + shm->offset = off;
> >> + shm->size = len;
> >> + shm->num_pages = num_pages;
> >> +
> >> rc = teedev->desc->ops->shm_register(ctx, shm, shm->pages,
> >> shm->num_pages, start);
> >> if (rc) {
> >> @@ -279,10 +276,11 @@ register_shm_helper(struct tee_context *ctx, unsigned long addr,
> >>
> >> return shm;
> >> err_put_shm_pages:
> >> - if (flags & TEE_SHM_USER_MAPPED)
> >> + if (!iov_iter_is_kvec(iter))
> >> unpin_user_pages(shm->pages, shm->num_pages);
> >> else
> >> shm_put_kernel_pages(shm->pages, shm->num_pages);
> >> +err_free_shm_pages:
> >> kfree(shm->pages);
> >> err_free_shm:
> >> kfree(shm);
> >> @@ -307,8 +305,9 @@ struct tee_shm *tee_shm_register_user_buf(struct tee_context *ctx,
> >> u32 flags = TEE_SHM_USER_MAPPED | TEE_SHM_DYNAMIC;
> >> struct tee_device *teedev = ctx->teedev;
> >> struct tee_shm *shm;
> >> + struct iov_iter iter;
> >> void *ret;
> >> - int id;
> >> + int id, err;
> >>
> >> if (!access_ok((void __user *)addr, length))
> >> return ERR_PTR(-EFAULT);
> >> @@ -319,7 +318,11 @@ struct tee_shm *tee_shm_register_user_buf(struct tee_context *ctx,
> >> if (id < 0)
> >> return ERR_PTR(id);
> >>
> >> - shm = register_shm_helper(ctx, addr, length, flags, id);
> >> + err = import_ubuf(ITER_DEST, (void __user *)addr, length, &iter);
> >
> > As I mentioned in a previous review, import_ubuf() already does the
> > access_ok() check, so we don't need the extra access_ok() check above.
> > Also, you should move import_ubuf() to be the first invocation within
> > this API.
>
> My apologies, I re-added import_ubuf() during testing to debug an issue and
I suppose you intended to mention access_ok() here, BTW, no worries :).
-Sumit
> forgot to
> remove it afterwards.
>
> Thanks and regards,
> Arnaud
>
> >
> > -Sumit
> >
> >> + if (err)
> >> + return ERR_PTR(err);
> >> +
> >> + shm = register_shm_helper(ctx, &iter, flags, id);
> >> if (IS_ERR(shm)) {
> >> mutex_lock(&teedev->mutex);
> >> idr_remove(&teedev->idr, id);
> >> @@ -352,8 +355,14 @@ struct tee_shm *tee_shm_register_kernel_buf(struct tee_context *ctx,
> >> void *addr, size_t length)
> >> {
> >> u32 flags = TEE_SHM_DYNAMIC;
> >> + struct kvec kvec;
> >> + struct iov_iter iter;
> >> +
> >> + kvec.iov_base = addr;
> >> + kvec.iov_len = length;
> >> + iov_iter_kvec(&iter, ITER_DEST, &kvec, 1, length);
> >>
> >> - return register_shm_helper(ctx, (unsigned long)addr, length, flags, -1);
> >> + return register_shm_helper(ctx, &iter, flags, -1);
> >> }
> >> EXPORT_SYMBOL_GPL(tee_shm_register_kernel_buf);
> >>
> >> --
> >> 2.25.1
> >>
On 11/30/23 13:00, Sumit Garg wrote:
> On Thu, 30 Nov 2023 at 14:38, Arnaud POULIQUEN
> <[email protected]> wrote:
>>
>>
>>
>> On 11/30/23 08:54, Sumit Garg wrote:
>>> On Wed, 29 Nov 2023 at 22:15, Arnaud Pouliquen
>>> <[email protected]> wrote:
>>>>
>>>> Currently it's not possible to register kernel buffers with TEE
>>>> which are allocated via vmalloc.
>>>>
>>>> Use iov_iter and associated helper functions to manage the page
>>>> registration for all type of memories.
>>>>
>>>> Suggested-by: Christoph Hellwig <[email protected]>
>>>> Signed-off-by: Arnaud Pouliquen <[email protected]>
>>>> ---
>>>> Update from V3 to V4:
>>>> - improve commit message,
>>>> - use import_ubuf() instead of iov_iter_init(),
>>>> - move shm_get_kernel_pages in register_shm_helper,
>>>> - put back untagged_addr in register_shm_helper(),
>>>> - move the comment related to pin pages from shm_get_kernel_pages()
>>>> to register_shm_helper().
>>>>
>>>> Update from V2 to V3:
>>>> - break lines longer than 80 columns.
>>>>
>>>> Update from V1 to V2:
>>>> - replace ITER_SOURCE by ITER_DEST flag in tee_shm_register_user_buf(),
>>>> - replace IS_ERR_OR NULL(shm) by IS_ERR(shm) in tee_shm_register_user_buf().
>>>>
>>>> V1:
>>>> The support of buffer registration allocated with vmalloc is no more
>>>> available since c83900393aa1 ("tee: Remove vmalloc page support").
>>>>
>>>> This patch is an alternative to a revert and resulted from a discussion
>>>> with Christopher Hellwig [1].
>>>>
>>>> This patch has been tested using xtest tool in optee qemu environment [2]
>>>> and using the series related to the remoteproc tee that should be
>>>> proposed soon [3].
>>>>
>>>> References:
>>>> [1] https://lore.kernel.org/linux-arm-kernel/[email protected]/T/#m8ec683c44fcd9b69c2aee42eaed0793afac9dd18in
>>>> [2] https://optee.readthedocs.io/en/latest/building/devices/qemu.html#build-instructions
>>>> [3] https://lore.kernel.org/linux-arm-kernel/[email protected]/T/#maca0a1fc897aadd54c7deac432e11473fe970d1d
>>>> ---
>>>> drivers/tee/tee_shm.c | 83 ++++++++++++++++++++++++-------------------
>>>> 1 file changed, 46 insertions(+), 37 deletions(-)
>>>>
>>>> diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c
>>>> index 673cf0359494..ac73e8143233 100644
>>>> --- a/drivers/tee/tee_shm.c
>>>> +++ b/drivers/tee/tee_shm.c
>>>> @@ -22,23 +22,12 @@ static void shm_put_kernel_pages(struct page **pages, size_t page_count)
>>>> put_page(pages[n]);
>>>> }
>>>>
>>>> -static int shm_get_kernel_pages(unsigned long start, size_t page_count,
>>>> - struct page **pages)
>>>> +static void shm_get_kernel_pages(struct page **pages, size_t page_count)
>>>> {
>>>> - struct page *page;
>>>> size_t n;
>>>>
>>>> - if (WARN_ON_ONCE(is_vmalloc_addr((void *)start) ||
>>>> - is_kmap_addr((void *)start)))
>>>> - return -EINVAL;
>>>> -
>>>> - page = virt_to_page((void *)start);
>>>> - for (n = 0; n < page_count; n++) {
>>>> - pages[n] = page + n;
>>>> + for (n = 0; n < page_count; n++)
>>>> get_page(pages[n]);
>>>> - }
>>>> -
>>>> - return page_count;
>>>> }
>>>>
>>>> static void release_registered_pages(struct tee_shm *shm)
>>>> @@ -214,13 +203,14 @@ struct tee_shm *tee_shm_alloc_priv_buf(struct tee_context *ctx, size_t size)
>>>> EXPORT_SYMBOL_GPL(tee_shm_alloc_priv_buf);
>>>>
>>>> static struct tee_shm *
>>>> -register_shm_helper(struct tee_context *ctx, unsigned long addr,
>>>> - size_t length, u32 flags, int id)
>>>> +register_shm_helper(struct tee_context *ctx, struct iov_iter *iter, u32 flags,
>>>> + int id)
>>>> {
>>>> struct tee_device *teedev = ctx->teedev;
>>>> struct tee_shm *shm;
>>>> - unsigned long start;
>>>> - size_t num_pages;
>>>> + unsigned long start, addr;
>>>> + size_t num_pages, off;
>>>> + ssize_t len;
>>>> void *ret;
>>>> int rc;
>>>>
>>>> @@ -245,31 +235,38 @@ register_shm_helper(struct tee_context *ctx, unsigned long addr,
>>>> shm->flags = flags;
>>>> shm->ctx = ctx;
>>>> shm->id = id;
>>>> - addr = untagged_addr(addr);
>>>> + addr = untagged_addr((unsigned long)iter_iov_addr(iter));
>>>> start = rounddown(addr, PAGE_SIZE);
>>>> - shm->offset = addr - start;
>>>> - shm->size = length;
>>>> - num_pages = (roundup(addr + length, PAGE_SIZE) - start) / PAGE_SIZE;
>>>> + num_pages = iov_iter_npages(iter, INT_MAX);
>>>> + if (!num_pages) {
>>>> + ret = ERR_PTR(-ENOMEM);
>>>> + goto err_ctx_put;
>>>> + }
>>>> +
>>>> shm->pages = kcalloc(num_pages, sizeof(*shm->pages), GFP_KERNEL);
>>>> if (!shm->pages) {
>>>> ret = ERR_PTR(-ENOMEM);
>>>> goto err_free_shm;
>>>> }
>>>>
>>>> - if (flags & TEE_SHM_USER_MAPPED)
>>>> - rc = pin_user_pages_fast(start, num_pages, FOLL_WRITE,
>>>> - shm->pages);
>>>> - else
>>>> - rc = shm_get_kernel_pages(start, num_pages, shm->pages);
>>>> - if (rc > 0)
>>>> - shm->num_pages = rc;
>>>> - if (rc != num_pages) {
>>>> - if (rc >= 0)
>>>> - rc = -ENOMEM;
>>>> - ret = ERR_PTR(rc);
>>>> - goto err_put_shm_pages;
>>>> + len = iov_iter_extract_pages(iter, &shm->pages, LONG_MAX, num_pages, 0,
>>>> + &off);
>>>> + if (unlikely(len <= 0)) {
>>>> + ret = len ? ERR_PTR(len) : ERR_PTR(-ENOMEM);
>>>> + goto err_free_shm_pages;
>>>> }
>>>>
>>>> + /*
>>>> + * iov_iter_extract_kvec_pages does not get reference on the pages,
>>>> + * get a pin on them.
>>>
>>> I think you meant: "get a reference on them". But I don't see the
>>> value of this comment since iov_iter_extract_kvec_pages() already has
>>> been commented properly as follows:
>>>
>>> /*
>>> * Extract a list of virtually contiguous pages from an ITER_KVEC iterator.
>>> * This does not get references on the pages, nor does it get a pin on them.
>>> */
>>>
>>
>> I spent some time debugging this part. Since we use the same API for both user
>> and kernel buffers, we wouldn't expect to have any specific actions to take.
>> Therefore, I thought it would be helpful to add a comment explaining the reason
>> for this specific code, rather than go deeper into iov_iter to understand it.
>>
>
> Fair enough, let's keep it with s/pin/reference/.
>
>> But if you don't see the value, I can remove the comment.
>>
>>>> + */
>>>> + if (iov_iter_is_kvec(iter))
>>>> + shm_get_kernel_pages(shm->pages, num_pages);
>>>> +
>>>> + shm->offset = off;
>>>> + shm->size = len;
>>>> + shm->num_pages = num_pages;
>>>> +
>>>> rc = teedev->desc->ops->shm_register(ctx, shm, shm->pages,
>>>> shm->num_pages, start);
>>>> if (rc) {
>>>> @@ -279,10 +276,11 @@ register_shm_helper(struct tee_context *ctx, unsigned long addr,
>>>>
>>>> return shm;
>>>> err_put_shm_pages:
>>>> - if (flags & TEE_SHM_USER_MAPPED)
>>>> + if (!iov_iter_is_kvec(iter))
>>>> unpin_user_pages(shm->pages, shm->num_pages);
>>>> else
>>>> shm_put_kernel_pages(shm->pages, shm->num_pages);
>>>> +err_free_shm_pages:
>>>> kfree(shm->pages);
>>>> err_free_shm:
>>>> kfree(shm);
>>>> @@ -307,8 +305,9 @@ struct tee_shm *tee_shm_register_user_buf(struct tee_context *ctx,
>>>> u32 flags = TEE_SHM_USER_MAPPED | TEE_SHM_DYNAMIC;
>>>> struct tee_device *teedev = ctx->teedev;
>>>> struct tee_shm *shm;
>>>> + struct iov_iter iter;
>>>> void *ret;
>>>> - int id;
>>>> + int id, err;
>>>>
>>>> if (!access_ok((void __user *)addr, length))
>>>> return ERR_PTR(-EFAULT);
>>>> @@ -319,7 +318,11 @@ struct tee_shm *tee_shm_register_user_buf(struct tee_context *ctx,
>>>> if (id < 0)
>>>> return ERR_PTR(id);
>>>>
>>>> - shm = register_shm_helper(ctx, addr, length, flags, id);
>>>> + err = import_ubuf(ITER_DEST, (void __user *)addr, length, &iter);
>>>
>>> As I mentioned in a previous review, import_ubuf() already does the
>>> access_ok() check, so we don't need the extra access_ok() check above.
>>> Also, you should move import_ubuf() to be the first invocation within
>>> this API.
>>
>> My apologies, I re-added import_ubuf() during testing to debug an issue and
>
> I suppose you intended to mention access_ok() here, BTW, no worries :).
Re-running xtest after removing the access_ok() I have a crash in
regression_5006.3 Allocate_out_of_memory
[ 89.258100] ------------[ cut here ]------------
[ 89.258377] WARNING: CPU: 1 PID: 134 at mm/page_alloc.c:4402
__alloc_pages+0x674/0xd14
[ 89.258988] Modules linked in:
[ 89.259554] CPU: 1 PID: 134 Comm: xtest Not tainted 6.6.0-g1ebcc18a80d7-dirty #69
[ 89.259763] Hardware name: linux,dummy-virt (DT)
[ 89.259977] pstate: 21400005 (nzCv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--)
[ 89.260143] pc : __alloc_pages+0x674/0xd14
[ 89.260252] lr : alloc_pages+0xac/0x160
[ 89.260364] sp : ffff8000803f3a30
[ 89.260437] x29: ffff8000803f3a30 x28: ffff8000803f3d70 x27: 0000000000000000
[ 89.260705] x26: 000000000000000b x25: 0000000000000000 x24: ffff000000178000
[ 89.260847] x23: ffff00003f52b720 x22: ffff000001442720 x21: 0000000000000000
[ 89.261022] x20: 000000000000000b x19: 0000000000000000 x18: ffff8000803f3b78
[ 89.261162] x17: 0000000000000000 x16: ffffa7a428b67660 x15: 0000ffff63ffffff
[ 89.261348] x14: 0000000000000008 x13: 1fffe00000a4c421 x12: ffff8000803f3b78
[ 89.261528] x11: ffff000005262100 x10: ffff000005262108 x9 : ffff00000526210c
[ 89.261739] x8 : 0000aaab0a97c000 x7 : 0000aaab0a90e000 x6 : ffff000005262150
[ 89.261920] x5 : 0000000000000000 x4 : ffff000000976740 x3 : 0000000000000000
[ 89.262098] x2 : 0000000000000000 x1 : 0000000000000001 x0 : ffffa7a429daf000
[ 89.262340] Call trace:
[ 89.262921] __alloc_pages+0x674/0xd14
[ 89.263262] alloc_pages+0xac/0x160
[ 89.263373] alloc_pages_exact+0x48/0x94
[ 89.263464] optee_shm_register+0xa8/0x1f4
[ 89.263591] register_shm_helper+0x1bc/0x28c
[ 89.263687] tee_shm_register_user_buf+0xb8/0x128
[ 89.263816] tee_ioctl+0xbc/0xfa0
[ 89.263915] __arm64_sys_ioctl+0xa8/0xec
[ 89.264053] invoke_syscall+0x48/0x114
[ 89.264173] el0_svc_common.constprop.0+0x40/0xe8
[ 89.264321] do_el0_svc+0x20/0x2c
[ 89.264488] el0_svc+0x40/0xf4
[ 89.264578] el0t_64_sync_handler+0x13c/0x158
[ 89.264714] el0t_64_sync+0x190/0x194
[ 89.265003] ---[ end trace 0000000000000000 ]---
The issue is that, in import_ubuf(), it updates the length [1], making the
access_ok() succeed and leading to an issue later in the page allocation process.
To fix, I propose to add a test in tee_shm_register_user_buf() after calling
import_ubuf()
if (length != iter_iov_len(&iter))
return ERR_PTR(-EFAULT);
Would it be ok for you ? I'm afraid of side effects if I update import_ubuf()...
[1] https://elixir.bootlin.com/linux/latest/source/lib/iov_iter.c#L1553
Regards,
Arnaud
>
> -Sumit
>
>> forgot to
>> remove it afterwards.
>>
>> Thanks and regards,
>> Arnaud
>>
>>>
>>> -Sumit
>>>
>>>> + if (err)
>>>> + return ERR_PTR(err);
>>>> +
>>>> + shm = register_shm_helper(ctx, &iter, flags, id);
>>>> if (IS_ERR(shm)) {
>>>> mutex_lock(&teedev->mutex);
>>>> idr_remove(&teedev->idr, id);
>>>> @@ -352,8 +355,14 @@ struct tee_shm *tee_shm_register_kernel_buf(struct tee_context *ctx,
>>>> void *addr, size_t length)
>>>> {
>>>> u32 flags = TEE_SHM_DYNAMIC;
>>>> + struct kvec kvec;
>>>> + struct iov_iter iter;
>>>> +
>>>> + kvec.iov_base = addr;
>>>> + kvec.iov_len = length;
>>>> + iov_iter_kvec(&iter, ITER_DEST, &kvec, 1, length);
>>>>
>>>> - return register_shm_helper(ctx, (unsigned long)addr, length, flags, -1);
>>>> + return register_shm_helper(ctx, &iter, flags, -1);
>>>> }
>>>> EXPORT_SYMBOL_GPL(tee_shm_register_kernel_buf);
>>>>
>>>> --
>>>> 2.25.1
>>>>
+ Jens A., Al Viro
<snip>
> >>>> + */
> >>>> + if (iov_iter_is_kvec(iter))
> >>>> + shm_get_kernel_pages(shm->pages, num_pages);
> >>>> +
> >>>> + shm->offset = off;
> >>>> + shm->size = len;
> >>>> + shm->num_pages = num_pages;
> >>>> +
> >>>> rc = teedev->desc->ops->shm_register(ctx, shm, shm->pages,
> >>>> shm->num_pages, start);
> >>>> if (rc) {
> >>>> @@ -279,10 +276,11 @@ register_shm_helper(struct tee_context *ctx, unsigned long addr,
> >>>>
> >>>> return shm;
> >>>> err_put_shm_pages:
> >>>> - if (flags & TEE_SHM_USER_MAPPED)
> >>>> + if (!iov_iter_is_kvec(iter))
> >>>> unpin_user_pages(shm->pages, shm->num_pages);
> >>>> else
> >>>> shm_put_kernel_pages(shm->pages, shm->num_pages);
> >>>> +err_free_shm_pages:
> >>>> kfree(shm->pages);
> >>>> err_free_shm:
> >>>> kfree(shm);
> >>>> @@ -307,8 +305,9 @@ struct tee_shm *tee_shm_register_user_buf(struct tee_context *ctx,
> >>>> u32 flags = TEE_SHM_USER_MAPPED | TEE_SHM_DYNAMIC;
> >>>> struct tee_device *teedev = ctx->teedev;
> >>>> struct tee_shm *shm;
> >>>> + struct iov_iter iter;
> >>>> void *ret;
> >>>> - int id;
> >>>> + int id, err;
> >>>>
> >>>> if (!access_ok((void __user *)addr, length))
> >>>> return ERR_PTR(-EFAULT);
> >>>> @@ -319,7 +318,11 @@ struct tee_shm *tee_shm_register_user_buf(struct tee_context *ctx,
> >>>> if (id < 0)
> >>>> return ERR_PTR(id);
> >>>>
> >>>> - shm = register_shm_helper(ctx, addr, length, flags, id);
> >>>> + err = import_ubuf(ITER_DEST, (void __user *)addr, length, &iter);
> >>>
> >>> As I mentioned in a previous review, import_ubuf() already does the
> >>> access_ok() check, so we don't need the extra access_ok() check above.
> >>> Also, you should move import_ubuf() to be the first invocation within
> >>> this API.
> >>
> >> My apologies, I re-added import_ubuf() during testing to debug an issue and
> >
> > I suppose you intended to mention access_ok() here, BTW, no worries :).
>
> Re-running xtest after removing the access_ok() I have a crash in
> regression_5006.3 Allocate_out_of_memory
>
> [ 89.258100] ------------[ cut here ]------------
> [ 89.258377] WARNING: CPU: 1 PID: 134 at mm/page_alloc.c:4402
> __alloc_pages+0x674/0xd14
> [ 89.258988] Modules linked in:
> [ 89.259554] CPU: 1 PID: 134 Comm: xtest Not tainted 6.6.0-g1ebcc18a80d7-dirty #69
> [ 89.259763] Hardware name: linux,dummy-virt (DT)
> [ 89.259977] pstate: 21400005 (nzCv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--)
> [ 89.260143] pc : __alloc_pages+0x674/0xd14
> [ 89.260252] lr : alloc_pages+0xac/0x160
> [ 89.260364] sp : ffff8000803f3a30
> [ 89.260437] x29: ffff8000803f3a30 x28: ffff8000803f3d70 x27: 0000000000000000
> [ 89.260705] x26: 000000000000000b x25: 0000000000000000 x24: ffff000000178000
> [ 89.260847] x23: ffff00003f52b720 x22: ffff000001442720 x21: 0000000000000000
> [ 89.261022] x20: 000000000000000b x19: 0000000000000000 x18: ffff8000803f3b78
> [ 89.261162] x17: 0000000000000000 x16: ffffa7a428b67660 x15: 0000ffff63ffffff
> [ 89.261348] x14: 0000000000000008 x13: 1fffe00000a4c421 x12: ffff8000803f3b78
> [ 89.261528] x11: ffff000005262100 x10: ffff000005262108 x9 : ffff00000526210c
> [ 89.261739] x8 : 0000aaab0a97c000 x7 : 0000aaab0a90e000 x6 : ffff000005262150
> [ 89.261920] x5 : 0000000000000000 x4 : ffff000000976740 x3 : 0000000000000000
> [ 89.262098] x2 : 0000000000000000 x1 : 0000000000000001 x0 : ffffa7a429daf000
> [ 89.262340] Call trace:
> [ 89.262921] __alloc_pages+0x674/0xd14
> [ 89.263262] alloc_pages+0xac/0x160
> [ 89.263373] alloc_pages_exact+0x48/0x94
> [ 89.263464] optee_shm_register+0xa8/0x1f4
> [ 89.263591] register_shm_helper+0x1bc/0x28c
> [ 89.263687] tee_shm_register_user_buf+0xb8/0x128
> [ 89.263816] tee_ioctl+0xbc/0xfa0
> [ 89.263915] __arm64_sys_ioctl+0xa8/0xec
> [ 89.264053] invoke_syscall+0x48/0x114
> [ 89.264173] el0_svc_common.constprop.0+0x40/0xe8
> [ 89.264321] do_el0_svc+0x20/0x2c
> [ 89.264488] el0_svc+0x40/0xf4
> [ 89.264578] el0t_64_sync_handler+0x13c/0x158
> [ 89.264714] el0t_64_sync+0x190/0x194
> [ 89.265003] ---[ end trace 0000000000000000 ]---
>
>
> The issue is that, in import_ubuf(), it updates the length [1], making the
>
> access_ok() succeed and leading to an issue later in the page allocation process.
>
> To fix, I propose to add a test in tee_shm_register_user_buf() after calling
>
> import_ubuf()
>
> if (length != iter_iov_len(&iter))
> return ERR_PTR(-EFAULT);
>
>
> Would it be ok for you ? I'm afraid of side effects if I update import_ubuf()...
IMO, access_ok() should be the first thing that import_ubuf() or
import_single_range() should do, something as follows:
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 8ff6824a1005..4aee0371824c 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1384,10 +1384,10 @@ EXPORT_SYMBOL(import_single_range);
int import_ubuf(int rw, void __user *buf, size_t len, struct iov_iter *i)
{
- if (len > MAX_RW_COUNT)
- len = MAX_RW_COUNT;
if (unlikely(!access_ok(buf, len)))
return -EFAULT;
+ if (len > MAX_RW_COUNT)
+ len = MAX_RW_COUNT;
iov_iter_ubuf(i, rw, buf, len);
return 0;
Jens A., Al Viro,
Was there any particular reason which I am unaware of to perform
access_ok() check on modified input length?
-Sumit
>
> [1] https://elixir.bootlin.com/linux/latest/source/lib/iov_iter.c#L1553
>
On 12/4/23 5:42 AM, Sumit Garg wrote:
> IMO, access_ok() should be the first thing that import_ubuf() or
> import_single_range() should do, something as follows:
>
> diff --git a/lib/iov_iter.c b/lib/iov_iter.c
> index 8ff6824a1005..4aee0371824c 100644
> --- a/lib/iov_iter.c
> +++ b/lib/iov_iter.c
> @@ -1384,10 +1384,10 @@ EXPORT_SYMBOL(import_single_range);
>
> int import_ubuf(int rw, void __user *buf, size_t len, struct iov_iter *i)
> {
> - if (len > MAX_RW_COUNT)
> - len = MAX_RW_COUNT;
> if (unlikely(!access_ok(buf, len)))
> return -EFAULT;
> + if (len > MAX_RW_COUNT)
> + len = MAX_RW_COUNT;
>
> iov_iter_ubuf(i, rw, buf, len);
> return 0;
>
> Jens A., Al Viro,
>
> Was there any particular reason which I am unaware of to perform
> access_ok() check on modified input length?
This change makes sense to me, and seems consistent with what is done
elsewhere too.
--
Jens Axboe
On 12/4/23 9:36 AM, Jens Axboe wrote:
> On 12/4/23 5:42 AM, Sumit Garg wrote:
>> IMO, access_ok() should be the first thing that import_ubuf() or
>> import_single_range() should do, something as follows:
>>
>> diff --git a/lib/iov_iter.c b/lib/iov_iter.c
>> index 8ff6824a1005..4aee0371824c 100644
>> --- a/lib/iov_iter.c
>> +++ b/lib/iov_iter.c
>> @@ -1384,10 +1384,10 @@ EXPORT_SYMBOL(import_single_range);
>>
>> int import_ubuf(int rw, void __user *buf, size_t len, struct iov_iter *i)
>> {
>> - if (len > MAX_RW_COUNT)
>> - len = MAX_RW_COUNT;
>> if (unlikely(!access_ok(buf, len)))
>> return -EFAULT;
>> + if (len > MAX_RW_COUNT)
>> + len = MAX_RW_COUNT;
>>
>> iov_iter_ubuf(i, rw, buf, len);
>> return 0;
>>
>> Jens A., Al Viro,
>>
>> Was there any particular reason which I am unaware of to perform
>> access_ok() check on modified input length?
>
> This change makes sense to me, and seems consistent with what is done
> elsewhere too.
For some reason I missed import_single_range(), which does it the same
way as import_ubuf() currently does - cap the range before the
access_ok() check. The vec variants sum as they go, but access_ok()
before the range.
I think part of the issue here is that the single range imports return 0
for success and -ERROR otherwise. This means that the caller does not
know if the full range was imported or not. OTOH, we always cap any data
transfer at MAX_RW_COUNT, so may make more sense to fix up the caller
here.
--
Jens Axboe
Hi,
On 12/4/23 17:40, Jens Axboe wrote:
> On 12/4/23 9:36 AM, Jens Axboe wrote:
>> On 12/4/23 5:42 AM, Sumit Garg wrote:
>>> IMO, access_ok() should be the first thing that import_ubuf() or
>>> import_single_range() should do, something as follows:
>>>
>>> diff --git a/lib/iov_iter.c b/lib/iov_iter.c
>>> index 8ff6824a1005..4aee0371824c 100644
>>> --- a/lib/iov_iter.c
>>> +++ b/lib/iov_iter.c
>>> @@ -1384,10 +1384,10 @@ EXPORT_SYMBOL(import_single_range);
>>>
>>> int import_ubuf(int rw, void __user *buf, size_t len, struct iov_iter *i)
>>> {
>>> - if (len > MAX_RW_COUNT)
>>> - len = MAX_RW_COUNT;
>>> if (unlikely(!access_ok(buf, len)))
>>> return -EFAULT;
>>> + if (len > MAX_RW_COUNT)
>>> + len = MAX_RW_COUNT;
>>>
>>> iov_iter_ubuf(i, rw, buf, len);
>>> return 0;
>>>
>>> Jens A., Al Viro,
>>>
>>> Was there any particular reason which I am unaware of to perform
>>> access_ok() check on modified input length?
>>
>> This change makes sense to me, and seems consistent with what is done
>> elsewhere too.
>
> For some reason I missed import_single_range(), which does it the same
> way as import_ubuf() currently does - cap the range before the
> access_ok() check. The vec variants sum as they go, but access_ok()
> before the range.
>
> I think part of the issue here is that the single range imports return 0
> for success and -ERROR otherwise. This means that the caller does not
> know if the full range was imported or not. OTOH, we always cap any data
> transfer at MAX_RW_COUNT, so may make more sense to fix up the caller
> here.
>
Should we limit to MAX_RW_COUNT or return an error?
Seems to me that limiting could generate side effect later that could be not
simple to debug.
>>> int import_ubuf(int rw, void __user *buf, size_t len, struct iov_iter *i)
>>> {
>>> - if (len > MAX_RW_COUNT)
>>> + return -EFAULT;
>>> if (unlikely(!access_ok(buf, len)))
>>> return -EFAULT;
>>>
>>> iov_iter_ubuf(i, rw, buf, len);
>>> return 0;
or perhaps just remove the test as __access_ok() already tests that the
size < TASK_SIZE
https://elixir.bootlin.com/linux/v6.7-rc4/source/include/asm-generic/access_ok.h#L31
Thanks,
Arnaud
On 12/4/23 10:02 AM, Arnaud POULIQUEN wrote:
> Hi,
>
> On 12/4/23 17:40, Jens Axboe wrote:
>> On 12/4/23 9:36 AM, Jens Axboe wrote:
>>> On 12/4/23 5:42 AM, Sumit Garg wrote:
>>>> IMO, access_ok() should be the first thing that import_ubuf() or
>>>> import_single_range() should do, something as follows:
>>>>
>>>> diff --git a/lib/iov_iter.c b/lib/iov_iter.c
>>>> index 8ff6824a1005..4aee0371824c 100644
>>>> --- a/lib/iov_iter.c
>>>> +++ b/lib/iov_iter.c
>>>> @@ -1384,10 +1384,10 @@ EXPORT_SYMBOL(import_single_range);
>>>>
>>>> int import_ubuf(int rw, void __user *buf, size_t len, struct iov_iter *i)
>>>> {
>>>> - if (len > MAX_RW_COUNT)
>>>> - len = MAX_RW_COUNT;
>>>> if (unlikely(!access_ok(buf, len)))
>>>> return -EFAULT;
>>>> + if (len > MAX_RW_COUNT)
>>>> + len = MAX_RW_COUNT;
>>>>
>>>> iov_iter_ubuf(i, rw, buf, len);
>>>> return 0;
>>>>
>>>> Jens A., Al Viro,
>>>>
>>>> Was there any particular reason which I am unaware of to perform
>>>> access_ok() check on modified input length?
>>>
>>> This change makes sense to me, and seems consistent with what is done
>>> elsewhere too.
>>
>> For some reason I missed import_single_range(), which does it the same
>> way as import_ubuf() currently does - cap the range before the
>> access_ok() check. The vec variants sum as they go, but access_ok()
>> before the range.
>>
>> I think part of the issue here is that the single range imports return 0
>> for success and -ERROR otherwise. This means that the caller does not
>> know if the full range was imported or not. OTOH, we always cap any data
>> transfer at MAX_RW_COUNT, so may make more sense to fix up the caller
>> here.
>>
>
> Should we limit to MAX_RW_COUNT or return an error? Seems to me that
> limiting could generate side effect later that could be not simple to
> debug.
We've traditionally just truncated the length, so principle of least
surprise says we should continue doing that.
--
Jens Axboe
Hi Arnaud,
On Mon, 4 Dec 2023 at 22:32, Arnaud POULIQUEN
<[email protected]> wrote:
>
> Hi,
>
> On 12/4/23 17:40, Jens Axboe wrote:
> > On 12/4/23 9:36 AM, Jens Axboe wrote:
> >> On 12/4/23 5:42 AM, Sumit Garg wrote:
> >>> IMO, access_ok() should be the first thing that import_ubuf() or
> >>> import_single_range() should do, something as follows:
> >>>
> >>> diff --git a/lib/iov_iter.c b/lib/iov_iter.c
> >>> index 8ff6824a1005..4aee0371824c 100644
> >>> --- a/lib/iov_iter.c
> >>> +++ b/lib/iov_iter.c
> >>> @@ -1384,10 +1384,10 @@ EXPORT_SYMBOL(import_single_range);
> >>>
> >>> int import_ubuf(int rw, void __user *buf, size_t len, struct iov_iter *i)
> >>> {
> >>> - if (len > MAX_RW_COUNT)
> >>> - len = MAX_RW_COUNT;
> >>> if (unlikely(!access_ok(buf, len)))
> >>> return -EFAULT;
> >>> + if (len > MAX_RW_COUNT)
> >>> + len = MAX_RW_COUNT;
> >>>
> >>> iov_iter_ubuf(i, rw, buf, len);
> >>> return 0;
> >>>
> >>> Jens A., Al Viro,
> >>>
> >>> Was there any particular reason which I am unaware of to perform
> >>> access_ok() check on modified input length?
> >>
> >> This change makes sense to me, and seems consistent with what is done
> >> elsewhere too.
> >
> > For some reason I missed import_single_range(), which does it the same
> > way as import_ubuf() currently does - cap the range before the
> > access_ok() check. The vec variants sum as they go, but access_ok()
> > before the range.
> >
> > I think part of the issue here is that the single range imports return 0
> > for success and -ERROR otherwise. This means that the caller does not
> > know if the full range was imported or not. OTOH, we always cap any data
> > transfer at MAX_RW_COUNT, so may make more sense to fix up the caller
> > here.
> >
>
> Should we limit to MAX_RW_COUNT or return an error?
> Seems to me that limiting could generate side effect later that could be not
> simple to debug.
>
>
> >>> int import_ubuf(int rw, void __user *buf, size_t len, struct iov_iter *i)
> >>> {
> >>> - if (len > MAX_RW_COUNT)
> >>> + return -EFAULT;
> >>> if (unlikely(!access_ok(buf, len)))
> >>> return -EFAULT;
> >>>
> >>> iov_iter_ubuf(i, rw, buf, len);
> >>> return 0;
>
> or perhaps just remove the test as __access_ok() already tests that the
> size < TASK_SIZE
>
> https://elixir.bootlin.com/linux/v6.7-rc4/source/include/asm-generic/access_ok.h#L31
>
It looks like there are predefined constraints for using import_ubuf()
which doesn't properly match our needs. So let's directly use:
iov_iter_ubuf() instead.
-Sumit
>
> Thanks,
> Arnaud
>
Hi Sumit,
On 12/5/23 13:07, Sumit Garg wrote:
> Hi Arnaud,
>
> On Mon, 4 Dec 2023 at 22:32, Arnaud POULIQUEN
> <[email protected]> wrote:
>>
>> Hi,
>>
>> On 12/4/23 17:40, Jens Axboe wrote:
>>> On 12/4/23 9:36 AM, Jens Axboe wrote:
>>>> On 12/4/23 5:42 AM, Sumit Garg wrote:
>>>>> IMO, access_ok() should be the first thing that import_ubuf() or
>>>>> import_single_range() should do, something as follows:
>>>>>
>>>>> diff --git a/lib/iov_iter.c b/lib/iov_iter.c
>>>>> index 8ff6824a1005..4aee0371824c 100644
>>>>> --- a/lib/iov_iter.c
>>>>> +++ b/lib/iov_iter.c
>>>>> @@ -1384,10 +1384,10 @@ EXPORT_SYMBOL(import_single_range);
>>>>>
>>>>> int import_ubuf(int rw, void __user *buf, size_t len, struct iov_iter *i)
>>>>> {
>>>>> - if (len > MAX_RW_COUNT)
>>>>> - len = MAX_RW_COUNT;
>>>>> if (unlikely(!access_ok(buf, len)))
>>>>> return -EFAULT;
>>>>> + if (len > MAX_RW_COUNT)
>>>>> + len = MAX_RW_COUNT;
>>>>>
>>>>> iov_iter_ubuf(i, rw, buf, len);
>>>>> return 0;
>>>>>
>>>>> Jens A., Al Viro,
>>>>>
>>>>> Was there any particular reason which I am unaware of to perform
>>>>> access_ok() check on modified input length?
>>>>
>>>> This change makes sense to me, and seems consistent with what is done
>>>> elsewhere too.
>>>
>>> For some reason I missed import_single_range(), which does it the same
>>> way as import_ubuf() currently does - cap the range before the
>>> access_ok() check. The vec variants sum as they go, but access_ok()
>>> before the range.
>>>
>>> I think part of the issue here is that the single range imports return 0
>>> for success and -ERROR otherwise. This means that the caller does not
>>> know if the full range was imported or not. OTOH, we always cap any data
>>> transfer at MAX_RW_COUNT, so may make more sense to fix up the caller
>>> here.
>>>
>>
>> Should we limit to MAX_RW_COUNT or return an error?
>> Seems to me that limiting could generate side effect later that could be not
>> simple to debug.
>>
>>
>>>>> int import_ubuf(int rw, void __user *buf, size_t len, struct iov_iter *i)
>>>>> {
>>>>> - if (len > MAX_RW_COUNT)
>>>>> + return -EFAULT;
>>>>> if (unlikely(!access_ok(buf, len)))
>>>>> return -EFAULT;
>>>>>
>>>>> iov_iter_ubuf(i, rw, buf, len);
>>>>> return 0;
>>
>> or perhaps just remove the test as __access_ok() already tests that the
>> size < TASK_SIZE
>>
>> https://elixir.bootlin.com/linux/v6.7-rc4/source/include/asm-generic/access_ok.h#L31
>>
>
> It looks like there are predefined constraints for using import_ubuf()
> which doesn't properly match our needs. So let's directly use:
> iov_iter_ubuf() instead.
Yes, this seems a safer alternative. I will send a new version based on it.
Thanks,
Arnaud
>
> -Sumit
>
>>
>> Thanks,
>> Arnaud
>>
hi Jens Axboe, Al Viro,
On 12/4/23 18:13, Jens Axboe wrote:
> On 12/4/23 10:02 AM, Arnaud POULIQUEN wrote:
>> Hi,
>>
>> On 12/4/23 17:40, Jens Axboe wrote:
>>> On 12/4/23 9:36 AM, Jens Axboe wrote:
>>>> On 12/4/23 5:42 AM, Sumit Garg wrote:
>>>>> IMO, access_ok() should be the first thing that import_ubuf() or
>>>>> import_single_range() should do, something as follows:
>>>>>
>>>>> diff --git a/lib/iov_iter.c b/lib/iov_iter.c
>>>>> index 8ff6824a1005..4aee0371824c 100644
>>>>> --- a/lib/iov_iter.c
>>>>> +++ b/lib/iov_iter.c
>>>>> @@ -1384,10 +1384,10 @@ EXPORT_SYMBOL(import_single_range);
>>>>>
>>>>> int import_ubuf(int rw, void __user *buf, size_t len, struct iov_iter *i)
>>>>> {
>>>>> - if (len > MAX_RW_COUNT)
>>>>> - len = MAX_RW_COUNT;
>>>>> if (unlikely(!access_ok(buf, len)))
>>>>> return -EFAULT;
>>>>> + if (len > MAX_RW_COUNT)
>>>>> + len = MAX_RW_COUNT;
>>>>>
>>>>> iov_iter_ubuf(i, rw, buf, len);
>>>>> return 0;
>>>>>
>>>>> Jens A., Al Viro,
>>>>>
>>>>> Was there any particular reason which I am unaware of to perform
>>>>> access_ok() check on modified input length?
>>>>
>>>> This change makes sense to me, and seems consistent with what is done
>>>> elsewhere too.
>>>
>>> For some reason I missed import_single_range(), which does it the same
>>> way as import_ubuf() currently does - cap the range before the
>>> access_ok() check. The vec variants sum as they go, but access_ok()
>>> before the range.
>>>
>>> I think part of the issue here is that the single range imports return 0
>>> for success and -ERROR otherwise. This means that the caller does not
>>> know if the full range was imported or not. OTOH, we always cap any data
>>> transfer at MAX_RW_COUNT, so may make more sense to fix up the caller
>>> here.
>>>
>>
>> Should we limit to MAX_RW_COUNT or return an error? Seems to me that
>> limiting could generate side effect later that could be not simple to
>> debug.
>
> We've traditionally just truncated the length, so principle of least
> surprise says we should continue doing that.
>
As Jens Wiklander has proposed using iov_iter_ubuf() instead of import_ubuf(),
should I propose a patch updating import_ubuf() and import_single_range()?
Or would you prefer that we keep the functions unchanged for the time being?
Regards,
Arnaud
On 12/5/23 9:55 AM, Arnaud POULIQUEN wrote:
> hi Jens Axboe, Al Viro,
>
> On 12/4/23 18:13, Jens Axboe wrote:
>> On 12/4/23 10:02 AM, Arnaud POULIQUEN wrote:
>>> Hi,
>>>
>>> On 12/4/23 17:40, Jens Axboe wrote:
>>>> On 12/4/23 9:36 AM, Jens Axboe wrote:
>>>>> On 12/4/23 5:42 AM, Sumit Garg wrote:
>>>>>> IMO, access_ok() should be the first thing that import_ubuf() or
>>>>>> import_single_range() should do, something as follows:
>>>>>>
>>>>>> diff --git a/lib/iov_iter.c b/lib/iov_iter.c
>>>>>> index 8ff6824a1005..4aee0371824c 100644
>>>>>> --- a/lib/iov_iter.c
>>>>>> +++ b/lib/iov_iter.c
>>>>>> @@ -1384,10 +1384,10 @@ EXPORT_SYMBOL(import_single_range);
>>>>>>
>>>>>> int import_ubuf(int rw, void __user *buf, size_t len, struct iov_iter *i)
>>>>>> {
>>>>>> - if (len > MAX_RW_COUNT)
>>>>>> - len = MAX_RW_COUNT;
>>>>>> if (unlikely(!access_ok(buf, len)))
>>>>>> return -EFAULT;
>>>>>> + if (len > MAX_RW_COUNT)
>>>>>> + len = MAX_RW_COUNT;
>>>>>>
>>>>>> iov_iter_ubuf(i, rw, buf, len);
>>>>>> return 0;
>>>>>>
>>>>>> Jens A., Al Viro,
>>>>>>
>>>>>> Was there any particular reason which I am unaware of to perform
>>>>>> access_ok() check on modified input length?
>>>>>
>>>>> This change makes sense to me, and seems consistent with what is done
>>>>> elsewhere too.
>>>>
>>>> For some reason I missed import_single_range(), which does it the same
>>>> way as import_ubuf() currently does - cap the range before the
>>>> access_ok() check. The vec variants sum as they go, but access_ok()
>>>> before the range.
>>>>
>>>> I think part of the issue here is that the single range imports return 0
>>>> for success and -ERROR otherwise. This means that the caller does not
>>>> know if the full range was imported or not. OTOH, we always cap any data
>>>> transfer at MAX_RW_COUNT, so may make more sense to fix up the caller
>>>> here.
>>>>
>>>
>>> Should we limit to MAX_RW_COUNT or return an error? Seems to me that
>>> limiting could generate side effect later that could be not simple to
>>> debug.
>>
>> We've traditionally just truncated the length, so principle of least
>> surprise says we should continue doing that.
>>
>
>
> As Jens Wiklander has proposed using iov_iter_ubuf() instead of
> import_ubuf(), should I propose a patch updating import_ubuf() and
> import_single_range()? Or would you prefer that we keep the functions
> unchanged for the time being?
Arguably it should be consistent with iovec imports, which return the
length (or error). But it might be safer to just check access_ok()
first and then truncate at least, vs what is there now.
Note that for 6.8 import_single_range() is gone as it was really just
doing the same thing that import_ubuf() is. Any further changes in this
area should CC Christian Brauner as well, as he has that staged in his
tree.
--
Jens Axboe
> > As Jens Wiklander has proposed using iov_iter_ubuf() instead of
> > import_ubuf(), should I propose a patch updating import_ubuf() and
> > import_single_range()? Or would you prefer that we keep the functions
> > unchanged for the time being?
>
> Arguably it should be consistent with iovec imports, which return the
> length (or error). But it might be safer to just check access_ok()
> first and then truncate at least, vs what is there now.
Is the access_ok() check even needed when setting up an iov_iter?
It is always checked again when the actual copy is done.
I looked at this a while back and couldn't see any code paths that
relied on the early access_ok() check.
Maybe it is historic?
David
-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)