2023-10-25 06:58:07

by NK, JESHWANTHKUMAR

[permalink] [raw]
Subject: [PATCH 0/3] Introduce DMA APIs to allocate and free TEE shared memory

From: Jeshwanth Kumar N K <[email protected]>

At present, the shared memory for TEE ring buffer, command buffer and
data buffer is allocated using get_free_pages(). The driver shares the
physical address of these buffers with PSP so that it can be mapped by
the Trusted OS.

In this patch series we have replaced get_free_pages() with
dma_alloc_coherent() to allocate shared memory to cleanup the existing
allocation method.

Rijo Thomas (3):
crypto: ccp - Add function to allocate and free memory using DMA APIs
crypto: ccp - Use psp_tee_alloc_buffer() and psp_tee_free_buffer()
tee: amdtee: Use psp_tee_alloc_buffer() and psp_tee_free_buffer()

drivers/crypto/ccp/psp-dev.c | 3 +
drivers/crypto/ccp/tee-dev.c | 119 ++++++++++++++++++----------
drivers/crypto/ccp/tee-dev.h | 11 +--
drivers/tee/amdtee/amdtee_private.h | 18 ++---
drivers/tee/amdtee/call.c | 74 ++++++++---------
drivers/tee/amdtee/core.c | 72 ++++++++++-------
drivers/tee/amdtee/shm_pool.c | 21 ++---
include/linux/psp-tee.h | 47 +++++++++++
8 files changed, 221 insertions(+), 144 deletions(-)

--
2.25.1


2023-10-25 06:58:38

by NK, JESHWANTHKUMAR

[permalink] [raw]
Subject: [PATCH 1/3] crypto: ccp - Add function to allocate and free memory using DMA APIs

From: Rijo Thomas <[email protected]>

As part of cleanup, memory allocation using get_free_pages() is replaced
with DMA APIs.

psp_tee_alloc_buffer() and psp_tee_free_buffer() has been introduced, so
that DMA address can be shared with PSP Trusted OS for buffer mapping.
In the presence of IOMMU, the address will be an IOVA. So, it must be
converted into a physical address before sharing it with PSP Trusted OS.

Signed-off-by: Rijo Thomas <[email protected]>
Signed-off-by: Jeshwanth Kumar <[email protected]>
---
drivers/crypto/ccp/psp-dev.c | 3 +++
drivers/crypto/ccp/tee-dev.c | 51 ++++++++++++++++++++++++++++++++++++
include/linux/psp-tee.h | 47 +++++++++++++++++++++++++++++++++
3 files changed, 101 insertions(+)

diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c
index d42d7bc62352..049954d9984b 100644
--- a/drivers/crypto/ccp/psp-dev.c
+++ b/drivers/crypto/ccp/psp-dev.c
@@ -163,6 +163,9 @@ int psp_dev_init(struct sp_device *sp)
goto e_err;
}

+ if (sp->set_psp_master_device)
+ sp->set_psp_master_device(sp);
+
psp->io_regs = sp->io_map;

ret = psp_get_capability(psp);
diff --git a/drivers/crypto/ccp/tee-dev.c b/drivers/crypto/ccp/tee-dev.c
index 5560bf8329a1..fa6f89572613 100644
--- a/drivers/crypto/ccp/tee-dev.c
+++ b/drivers/crypto/ccp/tee-dev.c
@@ -13,6 +13,8 @@
#include <linux/mutex.h>
#include <linux/delay.h>
#include <linux/slab.h>
+#include <linux/dma-direct.h>
+#include <linux/iommu.h>
#include <linux/gfp.h>
#include <linux/psp.h>
#include <linux/psp-tee.h>
@@ -22,6 +24,55 @@

static bool psp_dead;

+struct psp_tee_buffer *psp_tee_alloc_buffer(unsigned long size, gfp_t gfp)
+{
+ struct psp_device *psp = psp_get_master_device();
+ struct psp_tee_buffer *tee_buf;
+ struct iommu_domain *dom;
+
+ if (!psp || !size)
+ return NULL;
+
+ tee_buf = kzalloc(sizeof(*tee_buf), GFP_KERNEL);
+ if (!tee_buf)
+ return NULL;
+
+ tee_buf->vaddr = dma_alloc_coherent(psp->dev, size, &tee_buf->dma, gfp);
+ if (!tee_buf->vaddr || !tee_buf->dma) {
+ kfree(tee_buf);
+ return NULL;
+ }
+
+ tee_buf->size = size;
+
+ /* Check whether IOMMU is present. If present, translate IOVA
+ * to physical address, else the dma handle is the physical
+ * address.
+ */
+ dom = iommu_get_domain_for_dev(psp->dev);
+ if (dom)
+ tee_buf->paddr = iommu_iova_to_phys(dom, tee_buf->dma);
+ else
+ tee_buf->paddr = tee_buf->dma;
+
+ return tee_buf;
+}
+EXPORT_SYMBOL(psp_tee_alloc_buffer);
+
+void psp_tee_free_buffer(struct psp_tee_buffer *tee_buf)
+{
+ struct psp_device *psp = psp_get_master_device();
+
+ if (!psp || !tee_buf)
+ return;
+
+ dma_free_coherent(psp->dev, tee_buf->size,
+ tee_buf->vaddr, tee_buf->dma);
+
+ kfree(tee_buf);
+}
+EXPORT_SYMBOL(psp_tee_free_buffer);
+
static int tee_alloc_ring(struct psp_tee_device *tee, int ring_size)
{
struct ring_buf_manager *rb_mgr = &tee->rb_mgr;
diff --git a/include/linux/psp-tee.h b/include/linux/psp-tee.h
index cb0c95d6d76b..c3db3f33d069 100644
--- a/include/linux/psp-tee.h
+++ b/include/linux/psp-tee.h
@@ -40,6 +40,20 @@ enum tee_cmd_id {
TEE_CMD_ID_UNMAP_SHARED_MEM,
};

+/**
+ * struct psp_tee_buffer - Structure of a TEE buffer shared with PSP.
+ * @dma: DMA buffer address
+ * @paddr: Physical address of DMA buffer
+ * @vaddr: CPU virtual address of DMA buffer
+ * @size: Size of DMA buffer in bytes
+ */
+struct psp_tee_buffer {
+ dma_addr_t dma;
+ phys_addr_t paddr;
+ void *vaddr;
+ unsigned long size;
+};
+
#ifdef CONFIG_CRYPTO_DEV_SP_PSP
/**
* psp_tee_process_cmd() - Process command in Trusted Execution Environment
@@ -75,6 +89,28 @@ int psp_tee_process_cmd(enum tee_cmd_id cmd_id, void *buf, size_t len,
*/
int psp_check_tee_status(void);

+/**
+ * psp_tee_alloc_buffer() - Allocates memory of requested size and flags using
+ * dma_alloc_coherent() API.
+ *
+ * This function can be used to allocate a shared memory region between the
+ * host and PSP TEE.
+ *
+ * Returns:
+ * non-NULL a valid pointer to struct psp_tee_buffer
+ * NULL on failure
+ */
+struct psp_tee_buffer *psp_tee_alloc_buffer(unsigned long size, gfp_t gfp);
+
+/**
+ * psp_tee_free_buffer() - Deallocates memory using dma_free_coherent() API.
+ *
+ * This function can be used to release shared memory region between host
+ * and PSP TEE.
+ *
+ */
+void psp_tee_free_buffer(struct psp_tee_buffer *tee_buffer);
+
#else /* !CONFIG_CRYPTO_DEV_SP_PSP */

static inline int psp_tee_process_cmd(enum tee_cmd_id cmd_id, void *buf,
@@ -87,5 +123,16 @@ static inline int psp_check_tee_status(void)
{
return -ENODEV;
}
+
+static inline
+struct psp_tee_buffer *psp_tee_alloc_buffer(unsigned long size, gfp_t gfp)
+{
+ return NULL;
+}
+
+static inline void psp_tee_free_buffer(struct psp_tee_buffer *tee_buffer)
+{
+}
+
#endif /* CONFIG_CRYPTO_DEV_SP_PSP */
#endif /* __PSP_TEE_H_ */
--
2.25.1

2023-10-25 13:32:41

by Sumit Garg

[permalink] [raw]
Subject: Re: [PATCH 0/3] Introduce DMA APIs to allocate and free TEE shared memory

Hi Jeshwank,

On Wed, 25 Oct 2023 at 12:27, jeshwank <[email protected]> wrote:
>
> From: Jeshwanth Kumar N K <[email protected]>
>
> At present, the shared memory for TEE ring buffer, command buffer and
> data buffer is allocated using get_free_pages(). The driver shares the
> physical address of these buffers with PSP so that it can be mapped by
> the Trusted OS.
>
> In this patch series we have replaced get_free_pages() with
> dma_alloc_coherent() to allocate shared memory to cleanup the existing
> allocation method.

Thanks for putting this together but I can't find the reasoning behind
this change neither in this commit message and nor in the patch
descriptions. Care to explain why?

-Sumit

>
> Rijo Thomas (3):
> crypto: ccp - Add function to allocate and free memory using DMA APIs
> crypto: ccp - Use psp_tee_alloc_buffer() and psp_tee_free_buffer()
> tee: amdtee: Use psp_tee_alloc_buffer() and psp_tee_free_buffer()
>
> drivers/crypto/ccp/psp-dev.c | 3 +
> drivers/crypto/ccp/tee-dev.c | 119 ++++++++++++++++++----------
> drivers/crypto/ccp/tee-dev.h | 11 +--
> drivers/tee/amdtee/amdtee_private.h | 18 ++---
> drivers/tee/amdtee/call.c | 74 ++++++++---------
> drivers/tee/amdtee/core.c | 72 ++++++++++-------
> drivers/tee/amdtee/shm_pool.c | 21 ++---
> include/linux/psp-tee.h | 47 +++++++++++
> 8 files changed, 221 insertions(+), 144 deletions(-)
>
> --
> 2.25.1
>

2023-10-26 14:54:13

by Tom Lendacky

[permalink] [raw]
Subject: Re: [PATCH 0/3] Introduce DMA APIs to allocate and free TEE shared memory

On 10/26/23 05:30, NK, JESHWANTHKUMAR wrote:
>
> On 25-Oct-23 7:01 PM, Sumit Garg wrote:
>> Hi Jeshwank,
>>
>> On Wed, 25 Oct 2023 at 12:27, jeshwank <[email protected]> wrote:
>>> From: Jeshwanth Kumar N K <[email protected]>
>>>
>>> At present, the shared memory for TEE ring buffer, command buffer and
>>> data buffer is allocated using get_free_pages(). The driver shares the
>>> physical address of these buffers with PSP so that it can be mapped by
>>> the Trusted OS.
>>>
>>> In this patch series we have replaced get_free_pages() with
>>> dma_alloc_coherent() to allocate shared memory to cleanup the existing
>>> allocation method.
>> Thanks for putting this together but I can't find the reasoning behind
>> this change neither in this commit message and nor in the patch
>> descriptions. Care to explain why?
>>
>> -Sumit
> Hi Sumit,
>
> We see that there is an advantage in using dma_alloc_coherent() over
> get_free_pages(). The dma-ops associated with PSP PCIe device can be
> overridden. This capability will be helpful when we enable virtualization
> support. We plan to post a virtualization related patch in future.

To be specific, you are referring to Xen virtualization support, correct?
Because I don't see how this works in a Qemu/KVM environment where you
would get a GPA and not an SPA.

If that is the case, you should clearly specify that. Also, this looks
like it should be introduced with the virtualization support that you
submit in the future and not before.

Thanks,
Tom

>
> Regards,
>
> Jeshwanth
>
>>
>>> Rijo Thomas (3):
>>>    crypto: ccp - Add function to allocate and free memory using DMA APIs
>>>    crypto: ccp - Use psp_tee_alloc_buffer() and psp_tee_free_buffer()
>>>    tee: amdtee: Use psp_tee_alloc_buffer() and psp_tee_free_buffer()
>>>
>>>   drivers/crypto/ccp/psp-dev.c        |   3 +
>>>   drivers/crypto/ccp/tee-dev.c        | 119 ++++++++++++++++++----------
>>>   drivers/crypto/ccp/tee-dev.h        |  11 +--
>>>   drivers/tee/amdtee/amdtee_private.h |  18 ++---
>>>   drivers/tee/amdtee/call.c           |  74 ++++++++---------
>>>   drivers/tee/amdtee/core.c           |  72 ++++++++++-------
>>>   drivers/tee/amdtee/shm_pool.c       |  21 ++---
>>>   include/linux/psp-tee.h             |  47 +++++++++++
>>>   8 files changed, 221 insertions(+), 144 deletions(-)
>>>
>>> --
>>> 2.25.1
>>>

2023-10-27 05:24:24

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 1/3] crypto: ccp - Add function to allocate and free memory using DMA APIs

On Wed, Oct 25, 2023 at 12:26:58PM +0530, jeshwank wrote:
> + tee_buf->vaddr = dma_alloc_coherent(psp->dev, size, &tee_buf->dma, gfp);
> + if (!tee_buf->vaddr || !tee_buf->dma) {
> + kfree(tee_buf);
> + return NULL;
> + }
> +
> + tee_buf->size = size;
> +
> + /* Check whether IOMMU is present. If present, translate IOVA
> + * to physical address, else the dma handle is the physical
> + * address.
> + */
> + dom = iommu_get_domain_for_dev(psp->dev);
> + if (dom)
> + tee_buf->paddr = iommu_iova_to_phys(dom, tee_buf->dma);
> + else

No, you can't mix the DMA API and iommu API. You need to stick to one
or the other.

2023-10-27 13:36:46

by NK, JESHWANTHKUMAR

[permalink] [raw]
Subject: Re: [PATCH 1/3] crypto: ccp - Add function to allocate and free memory using DMA APIs


On 27-Oct-23 10:54 AM, Christoph Hellwig wrote:
> On Wed, Oct 25, 2023 at 12:26:58PM +0530, jeshwank wrote:
>> + tee_buf->vaddr = dma_alloc_coherent(psp->dev, size, &tee_buf->dma, gfp);
>> + if (!tee_buf->vaddr || !tee_buf->dma) {
>> + kfree(tee_buf);
>> + return NULL;
>> + }
>> +
>> + tee_buf->size = size;
>> +
>> + /* Check whether IOMMU is present. If present, translate IOVA
>> + * to physical address, else the dma handle is the physical
>> + * address.
>> + */
>> + dom = iommu_get_domain_for_dev(psp->dev);
>> + if (dom)
>> + tee_buf->paddr = iommu_iova_to_phys(dom, tee_buf->dma);
>> + else
> No, you can't mix the DMA API and iommu API. You need to stick to one
> or the other.

Can you please elaborate a bit more? Is it because in the presence of IOMMU,
a contiguous DMA or bus-address-space "buffer" may be mapped non-contiguously
into the physical address space? As a result, for buffers larger than a page
size, when PSP tries to map the physical address into it's address space, the
PSP Trusted OS will not be able to read back the entire buffer data.

CPU CPU Bus
Virtual Physical Address
Address Address Space
Space Space

+-------+ +------+ +------+
| | |MMIO | Offset | |
| | Virtual |Space | applied | |
C +-------+ --------> B +------+ ----------> +------+ A
| | mapping | | by host | |
+-----+ | | | | bridge | | +--------+
| | | | +------+ | | | |
| CPU | | | | RAM | | | | Device |
| | | | | | | | | |
+-----+ +-------+ +------+ +------+ +--------+
| | Virtual |Buffer| Mapping | |
X +-------+ --------> Y +------+ <---------- +------+ Z
| | mapping | RAM | by IOMMU
| | | |
| | | |
+-------+ +------+

Reference diagram from:
https://www.kernel.org/doc/Documentation/DMA-API-HOWTO.txt


Regards,

Jeshwanth

>

2023-10-30 06:06:20

by NK, JESHWANTHKUMAR

[permalink] [raw]
Subject: Re: [PATCH 1/3] crypto: ccp - Add function to allocate and free memory using DMA APIs

Hi Christoph,

On 27-Oct-23 10:54 AM, Christoph Hellwig wrote:
> On Wed, Oct 25, 2023 at 12:26:58PM +0530, jeshwank wrote:
>> + tee_buf->vaddr = dma_alloc_coherent(psp->dev, size, &tee_buf->dma, gfp);
>> + if (!tee_buf->vaddr || !tee_buf->dma) {
>> + kfree(tee_buf);
>> + return NULL;
>> + }
>> +
>> + tee_buf->size = size;
>> +
>> + /* Check whether IOMMU is present. If present, translate IOVA
>> + * to physical address, else the dma handle is the physical
>> + * address.
>> + */
>> + dom = iommu_get_domain_for_dev(psp->dev);
>> + if (dom)
>> + tee_buf->paddr = iommu_iova_to_phys(dom, tee_buf->dma);
>> + else
> No, you can't mix the DMA API and iommu API. You need to stick to one
> or the other.
Can you please elaborate?

2023-10-30 13:33:33

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 1/3] crypto: ccp - Add function to allocate and free memory using DMA APIs

On Fri, Oct 27, 2023 at 07:05:17PM +0530, NK, JESHWANTHKUMAR wrote:
> Can you please elaborate a bit more?

Becasue the DMA API is a blackbox. You can pass the dma_addr_t to
hardware, and you can use the kernel virtual address. That it can
sometimes be implemented using the IMMU API is an implementation
detail. Similarly you can only feeds iovas generated by the IOMMU
API into the IOMMU API, not any random other scalar value, which
is what you can get from the DMA API.