2023-10-05 17:25:45

by Arvind Yadav

[permalink] [raw]
Subject: [PATCH v4 0/1] drm/amdkfd: Fix unaligned doorbell absolute offset for gfx8

On older chips, the absolute doorbell offset within
the doorbell page is based on the queue ID.
KFD is using queue ID and doorbell size to get an
absolute doorbell offset in userspace.

Here, adding db_size in byte to find the doorbell's
absolute offset for both 32-bit and 64-bit doorbell sizes.
So that doorbell offset will be aligned based on the doorbell
size.

v2:
- Addressed the review comment from Felix.

v3:
- Adding doorbell_size as parameter to get db absolute offset.

v4:
Squash the two patches into one.

Arvind Yadav (1):
drm/amdkfd: get doorbell's absolute offset based on the db_size

drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h | 5 +++--
drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c | 13 +++++++++----
.../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 3 ++-
drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 10 ++++++++--
.../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 3 ++-
5 files changed, 24 insertions(+), 10 deletions(-)

--
2.34.1


2023-10-05 17:27:15

by Arvind Yadav

[permalink] [raw]
Subject: [PATCH v4 1/1] drm/amdkfd: get doorbell's absolute offset based on the db_size

Here, Adding db_size in byte to find the doorbell's
absolute offset for both 32-bit and 64-bit doorbell sizes.
So that doorbell offset will be aligned based on the doorbell
size.

v2:
- Addressed the review comment from Felix.
v3:
- Adding doorbell_size as parameter to get db absolute offset.
v4:
Squash the two patches into one.

Cc: Christian Koenig <[email protected]>
Cc: Alex Deucher <[email protected]>
Signed-off-by: Shashank Sharma <[email protected]>
Signed-off-by: Arvind Yadav <[email protected]>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h | 5 +++--
drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c | 13 +++++++++----
.../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 3 ++-
drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 10 ++++++++--
.../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 3 ++-
5 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
index 09f6727e7c73..4a8b33f55f6b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
@@ -357,8 +357,9 @@ int amdgpu_doorbell_init(struct amdgpu_device *adev);
void amdgpu_doorbell_fini(struct amdgpu_device *adev);
int amdgpu_doorbell_create_kernel_doorbells(struct amdgpu_device *adev);
uint32_t amdgpu_doorbell_index_on_bar(struct amdgpu_device *adev,
- struct amdgpu_bo *db_bo,
- uint32_t doorbell_index);
+ struct amdgpu_bo *db_bo,
+ uint32_t doorbell_index,
+ uint32_t db_size);

#define RDOORBELL32(index) amdgpu_mm_rdoorbell(adev, (index))
#define WDOORBELL32(index, v) amdgpu_mm_wdoorbell(adev, (index), (v))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c
index da4be0bbb446..6690f5a72f4d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c
@@ -114,19 +114,24 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
* @adev: amdgpu_device pointer
* @db_bo: doorbell object's bo
* @db_index: doorbell relative index in this doorbell object
+ * @db_size: doorbell size is in byte
*
* returns doorbell's absolute index in BAR
*/
uint32_t amdgpu_doorbell_index_on_bar(struct amdgpu_device *adev,
- struct amdgpu_bo *db_bo,
- uint32_t doorbell_index)
+ struct amdgpu_bo *db_bo,
+ uint32_t doorbell_index,
+ uint32_t db_size)
{
int db_bo_offset;

db_bo_offset = amdgpu_bo_gpu_offset_no_check(db_bo);

- /* doorbell index is 32 bit but doorbell's size is 64-bit, so *2 */
- return db_bo_offset / sizeof(u32) + doorbell_index * 2;
+ /* doorbell index is 32 bit but doorbell's size can be 32 bit
+ * or 64 bit, so *db_size(in byte)/4 for alignment.
+ */
+ return db_bo_offset / sizeof(u32) + doorbell_index *
+ DIV_ROUND_UP(db_size, 4);
}

/**
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 0d3d538b64eb..e07652e72496 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -407,7 +407,8 @@ static int allocate_doorbell(struct qcm_process_device *qpd,

q->properties.doorbell_off = amdgpu_doorbell_index_on_bar(dev->adev,
qpd->proc_doorbells,
- q->doorbell_id);
+ q->doorbell_id,
+ dev->kfd->device_info.doorbell_size);
return 0;
}

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
index 7b38537c7c99..05c74887fd6f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
@@ -161,7 +161,10 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
return NULL;

- *doorbell_off = amdgpu_doorbell_index_on_bar(kfd->adev, kfd->doorbells, inx);
+ *doorbell_off = amdgpu_doorbell_index_on_bar(kfd->adev,
+ kfd->doorbells,
+ inx,
+ kfd->device_info.doorbell_size);
inx *= 2;

pr_debug("Get kernel queue doorbell\n"
@@ -240,7 +243,10 @@ phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd)
return 0;
}

- first_db_index = amdgpu_doorbell_index_on_bar(adev, pdd->qpd.proc_doorbells, 0);
+ first_db_index = amdgpu_doorbell_index_on_bar(adev,
+ pdd->qpd.proc_doorbells,
+ 0,
+ pdd->dev->kfd->device_info.doorbell_size);
return adev->doorbell.base + first_db_index * sizeof(uint32_t);
}

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index adb5e4bdc0b2..77649392e233 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -377,7 +377,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
*/
uint32_t first_db_index = amdgpu_doorbell_index_on_bar(pdd->dev->adev,
pdd->qpd.proc_doorbells,
- 0);
+ 0,
+ pdd->dev->kfd->device_info.doorbell_size);

*p_doorbell_offset_in_process = (q->properties.doorbell_off
- first_db_index) * sizeof(uint32_t);
--
2.34.1

2023-10-05 19:48:30

by Felix Kuehling

[permalink] [raw]
Subject: Re: [PATCH v4 1/1] drm/amdkfd: get doorbell's absolute offset based on the db_size

On 2023-10-05 13:20, Arvind Yadav wrote:
> Here, Adding db_size in byte to find the doorbell's
> absolute offset for both 32-bit and 64-bit doorbell sizes.
> So that doorbell offset will be aligned based on the doorbell
> size.
>
> v2:
> - Addressed the review comment from Felix.
> v3:
> - Adding doorbell_size as parameter to get db absolute offset.
> v4:
> Squash the two patches into one.
>
> Cc: Christian Koenig <[email protected]>
> Cc: Alex Deucher <[email protected]>
> Signed-off-by: Shashank Sharma <[email protected]>
> Signed-off-by: Arvind Yadav <[email protected]>

Reviewed-by: Felix Kuehling <[email protected]>


> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h | 5 +++--
> drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c | 13 +++++++++----
> .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 3 ++-
> drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 10 ++++++++--
> .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 3 ++-
> 5 files changed, 24 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
> index 09f6727e7c73..4a8b33f55f6b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
> @@ -357,8 +357,9 @@ int amdgpu_doorbell_init(struct amdgpu_device *adev);
> void amdgpu_doorbell_fini(struct amdgpu_device *adev);
> int amdgpu_doorbell_create_kernel_doorbells(struct amdgpu_device *adev);
> uint32_t amdgpu_doorbell_index_on_bar(struct amdgpu_device *adev,
> - struct amdgpu_bo *db_bo,
> - uint32_t doorbell_index);
> + struct amdgpu_bo *db_bo,
> + uint32_t doorbell_index,
> + uint32_t db_size);
>
> #define RDOORBELL32(index) amdgpu_mm_rdoorbell(adev, (index))
> #define WDOORBELL32(index, v) amdgpu_mm_wdoorbell(adev, (index), (v))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c
> index da4be0bbb446..6690f5a72f4d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c
> @@ -114,19 +114,24 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
> * @adev: amdgpu_device pointer
> * @db_bo: doorbell object's bo
> * @db_index: doorbell relative index in this doorbell object
> + * @db_size: doorbell size is in byte
> *
> * returns doorbell's absolute index in BAR
> */
> uint32_t amdgpu_doorbell_index_on_bar(struct amdgpu_device *adev,
> - struct amdgpu_bo *db_bo,
> - uint32_t doorbell_index)
> + struct amdgpu_bo *db_bo,
> + uint32_t doorbell_index,
> + uint32_t db_size)
> {
> int db_bo_offset;
>
> db_bo_offset = amdgpu_bo_gpu_offset_no_check(db_bo);
>
> - /* doorbell index is 32 bit but doorbell's size is 64-bit, so *2 */
> - return db_bo_offset / sizeof(u32) + doorbell_index * 2;
> + /* doorbell index is 32 bit but doorbell's size can be 32 bit
> + * or 64 bit, so *db_size(in byte)/4 for alignment.
> + */
> + return db_bo_offset / sizeof(u32) + doorbell_index *
> + DIV_ROUND_UP(db_size, 4);
> }
>
> /**
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index 0d3d538b64eb..e07652e72496 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -407,7 +407,8 @@ static int allocate_doorbell(struct qcm_process_device *qpd,
>
> q->properties.doorbell_off = amdgpu_doorbell_index_on_bar(dev->adev,
> qpd->proc_doorbells,
> - q->doorbell_id);
> + q->doorbell_id,
> + dev->kfd->device_info.doorbell_size);
> return 0;
> }
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
> index 7b38537c7c99..05c74887fd6f 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
> @@ -161,7 +161,10 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
> if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
> return NULL;
>
> - *doorbell_off = amdgpu_doorbell_index_on_bar(kfd->adev, kfd->doorbells, inx);
> + *doorbell_off = amdgpu_doorbell_index_on_bar(kfd->adev,
> + kfd->doorbells,
> + inx,
> + kfd->device_info.doorbell_size);
> inx *= 2;
>
> pr_debug("Get kernel queue doorbell\n"
> @@ -240,7 +243,10 @@ phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd)
> return 0;
> }
>
> - first_db_index = amdgpu_doorbell_index_on_bar(adev, pdd->qpd.proc_doorbells, 0);
> + first_db_index = amdgpu_doorbell_index_on_bar(adev,
> + pdd->qpd.proc_doorbells,
> + 0,
> + pdd->dev->kfd->device_info.doorbell_size);
> return adev->doorbell.base + first_db_index * sizeof(uint32_t);
> }
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> index adb5e4bdc0b2..77649392e233 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> @@ -377,7 +377,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
> */
> uint32_t first_db_index = amdgpu_doorbell_index_on_bar(pdd->dev->adev,
> pdd->qpd.proc_doorbells,
> - 0);
> + 0,
> + pdd->dev->kfd->device_info.doorbell_size);
>
> *p_doorbell_offset_in_process = (q->properties.doorbell_off
> - first_db_index) * sizeof(uint32_t);