Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/hsa/radeon/kfd_sched_cik_static.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/hsa/radeon/kfd_sched_cik_static.c b/drivers/gpu/hsa/radeon/kfd_sched_cik_static.c
index 3c3e7d6..5bfde5c 100644
--- a/drivers/gpu/hsa/radeon/kfd_sched_cik_static.c
+++ b/drivers/gpu/hsa/radeon/kfd_sched_cik_static.c
@@ -433,7 +433,7 @@ static int cik_static_create(struct kfd_dev *dev, struct kfd_scheduler **schedul
* are no active queues.
*/
err = radeon_kfd_vidmem_alloc(dev,
- CIK_HPD_SIZE * priv->num_pipes * 2,
+ CIK_HPD_SIZE * priv->num_pipes,
PAGE_SIZE,
KFD_MEMPOOL_SYSTEM_WRITECOMBINE,
&priv->hpd_mem);
--
1.9.1
From: Evgeny Pinchuk <[email protected]>
Fixed wrong reporting of timestamps in kfd_ioctl_get_clock_counters.
Signed-off-by: Evgeny Pinchuk <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/hsa/radeon/kfd_chardev.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/hsa/radeon/kfd_chardev.c b/drivers/gpu/hsa/radeon/kfd_chardev.c
index dba6084..75fe11f 100644
--- a/drivers/gpu/hsa/radeon/kfd_chardev.c
+++ b/drivers/gpu/hsa/radeon/kfd_chardev.c
@@ -304,10 +304,10 @@ kfd_ioctl_get_clock_counters(struct file *filep, struct kfd_process *p, void __u
/* No access to rdtsc. Using raw monotonic time */
getrawmonotonic(&time);
- args.cpu_clock_counter = time.tv_nsec;
+ args.cpu_clock_counter = (uint64_t)timespec_to_ns(&time);
get_monotonic_boottime(&time);
- args.system_clock_counter = time.tv_nsec;
+ args.system_clock_counter = (uint64_t)timespec_to_ns(&time);
/* Since the counter is in nano-seconds we use 1GHz frequency */
args.system_clock_freq = 1000000000;
--
1.9.1
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/hsa/radeon/kfd_sched_cik_static.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/hsa/radeon/kfd_sched_cik_static.c b/drivers/gpu/hsa/radeon/kfd_sched_cik_static.c
index 5bfde5c..7573d25 100644
--- a/drivers/gpu/hsa/radeon/kfd_sched_cik_static.c
+++ b/drivers/gpu/hsa/radeon/kfd_sched_cik_static.c
@@ -899,7 +899,7 @@ cik_static_interrupt_isr(struct kfd_scheduler *scheduler, const void *ih_ring_en
if (!int_compute_pipe(priv, ihre, &pipe_id))
return false;
- dev_info(radeon_kfd_chardev(), "INT(ISR): src=%02x, data=0x%x, pipe=%u, vmid=%u, pasid=%u\n",
+ dev_dbg(radeon_kfd_chardev(), "INT(ISR): src=%02x, data=0x%x, pipe=%u, vmid=%u, pasid=%u\n",
ihre->source_id, ihre->data, pipe_id, ihre->vmid, ihre->pasid);
switch (source_id) {
--
1.9.1
From: Evgeny Pinchuk <[email protected]>
Adding new function to the interface used by kfd.
The new function retrieves the max engine clock speed.
Signed-off-by: Evgeny Pinchuk <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/drm/radeon/radeon_kfd.c | 11 +++++++++++
include/linux/radeon_kfd.h | 2 ++
2 files changed, 13 insertions(+)
diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c
index 6dba170..8b6d497 100644
--- a/drivers/gpu/drm/radeon/radeon_kfd.c
+++ b/drivers/gpu/drm/radeon/radeon_kfd.c
@@ -50,6 +50,8 @@ static void unlock_srbm_gfx_cntl(struct kgd_dev *kgd);
static void lock_grbm_gfx_idx(struct kgd_dev *kgd);
static void unlock_grbm_gfx_idx(struct kgd_dev *kgd);
+static uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd);
+
static const struct kfd2kgd_calls kfd2kgd = {
.allocate_mem = allocate_mem,
@@ -64,6 +66,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
.unlock_srbm_gfx_cntl = unlock_srbm_gfx_cntl,
.lock_grbm_gfx_idx = lock_grbm_gfx_idx,
.unlock_grbm_gfx_idx = unlock_grbm_gfx_idx,
+ .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
};
static const struct kgd2kfd_calls *kgd2kfd;
@@ -307,3 +310,11 @@ static uint64_t get_gpu_clock_counter(struct kgd_dev *kgd)
return rdev->asic->get_gpu_clock_counter(rdev);
}
+
+static uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
+{
+ struct radeon_device *rdev = (struct radeon_device *)kgd;
+
+ /* The sclk is in quantas of 10kHz */
+ return rdev->pm.power_state->clock_info->sclk / 100;
+}
diff --git a/include/linux/radeon_kfd.h b/include/linux/radeon_kfd.h
index 4c7e923..4114c8e 100644
--- a/include/linux/radeon_kfd.h
+++ b/include/linux/radeon_kfd.h
@@ -93,6 +93,8 @@ struct kfd2kgd_calls {
/* GRBM_GFX_INDEX mutex */
void (*lock_grbm_gfx_idx)(struct kgd_dev *kgd);
void (*unlock_grbm_gfx_idx)(struct kgd_dev *kgd);
+
+ uint32_t (*get_max_engine_clock_in_mhz)(struct kgd_dev *kgd);
};
bool kgd2kfd_init(unsigned interface_version,
--
1.9.1
From: Alexey Skidanov <[email protected]>
Signed-off-by: Alexey Skidanov <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/hsa/radeon/kfd_sched_cik_static.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/hsa/radeon/kfd_sched_cik_static.c b/drivers/gpu/hsa/radeon/kfd_sched_cik_static.c
index 7573d25..7ee8125 100644
--- a/drivers/gpu/hsa/radeon/kfd_sched_cik_static.c
+++ b/drivers/gpu/hsa/radeon/kfd_sched_cik_static.c
@@ -627,8 +627,10 @@ static void cik_static_deregister_process(struct kfd_scheduler *scheduler,
struct cik_static_private *priv = kfd_scheduler_to_private(scheduler);
struct cik_static_process *pp = kfd_process_to_private(scheduler_process);
- release_vmid(priv, pp->vmid);
- kfree(pp);
+ if (priv && pp) {
+ release_vmid(priv, pp->vmid);
+ kfree(pp);
+ }
}
static bool allocate_hqd(struct cik_static_private *priv, unsigned int *queue)
--
1.9.1
From: Michael Varga <[email protected]>
Added debug print statements so critical errors during init may be debugged more easily.
Signed-off-by: Michael Varga <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/hsa/radeon/kfd_device.c | 18 +++++++++++++++---
1 file changed, 15 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/hsa/radeon/kfd_device.c b/drivers/gpu/hsa/radeon/kfd_device.c
index 2e7d50d..82febf4 100644
--- a/drivers/gpu/hsa/radeon/kfd_device.c
+++ b/drivers/gpu/hsa/radeon/kfd_device.c
@@ -107,20 +107,30 @@ device_iommu_pasid_init(struct kfd_dev *kfd)
int err;
err = amd_iommu_device_info(kfd->pdev, &iommu_info);
- if (err < 0)
+ if (err < 0) {
+ dev_err(kfd_device, "error getting iommu info. is the iommu enabled?\n");
return false;
+ }
- if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags)
+ if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags) {
+ dev_err(kfd_device, "error required iommu flags ats(%i), pri(%i), pasid(%i)\n",
+ (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP) != 0,
+ (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) != 0,
+ (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP) != 0);
return false;
+ }
pasid_limit = min_t(pasid_t, (pasid_t)1 << kfd->device_info->max_pasid_bits, iommu_info.max_pasids);
pasid_limit = min_t(pasid_t, pasid_limit, kfd->doorbell_process_limit);
err = amd_iommu_init_device(kfd->pdev, pasid_limit);
- if (err < 0)
+ if (err < 0) {
+ dev_err(kfd_device, "error initializing iommu device\n");
return false;
+ }
if (!radeon_kfd_set_pasid_limit(pasid_limit)) {
+ dev_err(kfd_device, "error setting pasid limit\n");
amd_iommu_free_device(kfd->pdev);
return false;
}
@@ -166,6 +176,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
kfd->device_info->scheduler_class->start(kfd->scheduler);
kfd->init_complete = true;
+ dev_info(kfd_device, "added device (%x:%x)\n", kfd->pdev->vendor,
+ kfd->pdev->device);
return true;
}
--
1.9.1
From: Ben Goz <[email protected]>
This patch adds infrastructure to allocate doorbells which are not exposed to
user space.
Signed-off-by: Ben Goz <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/hsa/radeon/kfd_doorbell.c | 76 ++++++++++++++++++++++++++++++++++-
drivers/gpu/hsa/radeon/kfd_priv.h | 5 +++
2 files changed, 80 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/hsa/radeon/kfd_doorbell.c b/drivers/gpu/hsa/radeon/kfd_doorbell.c
index 3de8a02..abf4cb0 100644
--- a/drivers/gpu/hsa/radeon/kfd_doorbell.c
+++ b/drivers/gpu/hsa/radeon/kfd_doorbell.c
@@ -23,6 +23,16 @@
#include "kfd_priv.h"
#include <linux/mm.h>
#include <linux/mman.h>
+#include <linux/slab.h>
+
+/*
+ * This extension supports a kernel level doorbells management for the kernel queues
+ * basically the last doorbells page is devoted to kernel queues and that's assures
+ * that any user process won't get access to the kernel doorbells page
+ */
+static DEFINE_MUTEX(doorbell_mutex);
+static unsigned long doorbell_available_index[DIV_ROUND_UP(MAX_PROCESS_QUEUES, BITS_PER_LONG)] = { 0 };
+#define KERNEL_DOORBELL_PASID 1
/*
* Each device exposes a doorbell aperture, a PCI MMIO aperture that
@@ -67,7 +77,22 @@ void radeon_kfd_doorbell_init(struct kfd_dev *kfd)
kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address + doorbell_start_offset;
kfd->doorbell_id_offset = doorbell_start_offset / sizeof(doorbell_t);
- kfd->doorbell_process_limit = doorbell_process_limit;
+ kfd->doorbell_process_limit = doorbell_process_limit - 1;
+
+ kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base, doorbell_process_allocation());
+ BUG_ON(!kfd->doorbell_kernel_ptr);
+
+ pr_debug("kfd: doorbell initialization\n"
+ " doorbell base == 0x%08lX\n"
+ " doorbell_id_offset == 0x%08lu\n"
+ " doorbell_process_limit == 0x%08lu\n"
+ " doorbell_kernel_offset == 0x%08lX\n"
+ " doorbell aperture size == 0x%08lX\n"
+ " doorbell kernel address == 0x%08lX\n",
+ (uintptr_t)kfd->doorbell_base, kfd->doorbell_id_offset, doorbell_process_limit,
+ (uintptr_t)kfd->doorbell_base, kfd->shared_resources.doorbell_aperture_size,
+ (uintptr_t)kfd->doorbell_kernel_ptr);
+
}
/* This is the /dev/kfd mmap (for doorbell) implementation. We intend that this is only called through map_doorbells,
@@ -136,6 +161,53 @@ map_doorbells(struct file *devkfd, struct kfd_process *process, struct kfd_dev *
return 0;
}
+/* get kernel iomem pointer for a doorbell */
+u32 __iomem *radeon_kfd_get_kernel_doorbell(struct kfd_dev *kfd, unsigned int *doorbell_off)
+{
+ u32 inx;
+
+ BUG_ON(!kfd || !doorbell_off);
+
+ mutex_lock(&doorbell_mutex);
+ inx = find_first_zero_bit(doorbell_available_index, MAX_PROCESS_QUEUES);
+ __set_bit(inx, doorbell_available_index);
+ mutex_unlock(&doorbell_mutex);
+
+ if (inx >= MAX_PROCESS_QUEUES)
+ return NULL;
+
+ /* caluculating the kernel doorbell offset using "faked" kernel pasid that allocated for kernel queues only */
+ *doorbell_off = KERNEL_DOORBELL_PASID * (doorbell_process_allocation()/sizeof(doorbell_t)) + inx;
+
+ pr_debug("kfd: get kernel queue doorbell\n"
+ " doorbell offset == 0x%08d\n"
+ " kernel address == 0x%08lX\n",
+ *doorbell_off, (uintptr_t)(kfd->doorbell_kernel_ptr + inx));
+
+ return kfd->doorbell_kernel_ptr + inx;
+}
+
+void radeon_kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
+{
+ unsigned int inx;
+
+ BUG_ON(!kfd || !db_addr);
+
+ inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr);
+
+ mutex_lock(&doorbell_mutex);
+ __clear_bit(inx, doorbell_available_index);
+ mutex_unlock(&doorbell_mutex);
+}
+
+inline void write_kernel_doorbell(u32 __iomem *db, u32 value)
+{
+ if (db) {
+ writel(value, db);
+ pr_debug("writing %d to doorbell address 0x%p\n", value, db);
+ }
+}
+
/* Get the user-mode address of a doorbell. Assumes that the process mutex is being held. */
doorbell_t __user *radeon_kfd_get_doorbell(struct file *devkfd, struct kfd_process *process, struct kfd_dev *dev,
unsigned int doorbell_index)
@@ -152,6 +224,8 @@ doorbell_t __user *radeon_kfd_get_doorbell(struct file *devkfd, struct kfd_proce
pdd = radeon_kfd_get_process_device_data(dev, process);
BUG_ON(pdd == NULL); /* map_doorbells would have failed otherwise */
+ pr_debug("doorbell value on creation 0x%x\n", pdd->doorbell_mapping[doorbell_index]);
+
return &pdd->doorbell_mapping[doorbell_index];
}
diff --git a/drivers/gpu/hsa/radeon/kfd_priv.h b/drivers/gpu/hsa/radeon/kfd_priv.h
index 14a3f9b..df17387 100644
--- a/drivers/gpu/hsa/radeon/kfd_priv.h
+++ b/drivers/gpu/hsa/radeon/kfd_priv.h
@@ -95,6 +95,7 @@ struct kfd_dev {
* at the start)
*/
size_t doorbell_process_limit; /* Number of processes we have doorbell space for. */
+ u32 __iomem *doorbell_kernel_ptr; /* this is a pointer for a doorbells page used by kernel queue */
struct kgd2kfd_shared_resources shared_resources;
@@ -288,6 +289,10 @@ void radeon_kfd_doorbell_init(struct kfd_dev *kfd);
int radeon_kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma);
doorbell_t __user *radeon_kfd_get_doorbell(struct file *devkfd, struct kfd_process *process, struct kfd_dev *dev,
unsigned int doorbell_index);
+u32 __iomem *radeon_kfd_get_kernel_doorbell(struct kfd_dev *kfd, unsigned int *doorbell_off);
+void radeon_kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr);
+u32 read_kernel_doorbell(u32 __iomem *db);
+void write_kernel_doorbell(u32 __iomem *db, u32 value);
unsigned int radeon_kfd_queue_id_to_doorbell(struct kfd_dev *kfd, struct kfd_process *process, unsigned int queue_id);
extern struct device *kfd_device;
--
1.9.1
From: Ben Goz <[email protected]>
The packet manager module builds PM4 packets for the sole use of the CP
scheduler. Those packets are used by the HIQ to submit runlists to the CP.
Signed-off-by: Ben Goz <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/hsa/radeon/Makefile | 2 +-
drivers/gpu/hsa/radeon/kfd_packet_manager.c | 473 ++++++++++++++++++++++++++++
2 files changed, 474 insertions(+), 1 deletion(-)
create mode 100644 drivers/gpu/hsa/radeon/kfd_packet_manager.c
diff --git a/drivers/gpu/hsa/radeon/Makefile b/drivers/gpu/hsa/radeon/Makefile
index f06d925..4978915 100644
--- a/drivers/gpu/hsa/radeon/Makefile
+++ b/drivers/gpu/hsa/radeon/Makefile
@@ -7,6 +7,6 @@ radeon_kfd-y := kfd_module.o kfd_device.o kfd_chardev.o \
kfd_doorbell.o kfd_sched_cik_static.o kfd_registers.o \
kfd_vidmem.o kfd_interrupt.o kfd_aperture.o \
kfd_queue.o kfd_hw_pointer_store.o kfd_mqd_manager.o \
- kfd_kernel_queue.o
+ kfd_kernel_queue.o kfd_packet_manager.o
obj-$(CONFIG_HSA_RADEON) += radeon_kfd.o
diff --git a/drivers/gpu/hsa/radeon/kfd_packet_manager.c b/drivers/gpu/hsa/radeon/kfd_packet_manager.c
new file mode 100644
index 0000000..4967b7c
--- /dev/null
+++ b/drivers/gpu/hsa/radeon/kfd_packet_manager.c
@@ -0,0 +1,473 @@
+/*
+ * packet_manager.c
+ *
+ * Created on: Mar 16, 2014
+ * Author: ben
+ */
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include "kfd_device_queue_manager.h"
+#include "kfd_kernel_queue.h"
+#include "kfd_priv.h"
+#include "kfd_pm4_headers.h"
+#include "kfd_pm4_opcodes.h"
+#include "cik_mqds.h"
+
+static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes, unsigned int buffer_size_bytes)
+{
+ unsigned int temp = *wptr + increment_bytes / sizeof(uint32_t);
+
+ BUG_ON((temp * sizeof(uint32_t)) > buffer_size_bytes);
+ *wptr = temp;
+}
+
+static unsigned int build_pm4_header(unsigned int opcode, size_t packet_size)
+{
+ PM4_TYPE_3_HEADER header;
+
+ header.u32all = 0;
+ header.opcode = opcode;
+ header.count = packet_size/sizeof(uint32_t) - 2;
+ header.type = PM4_TYPE_3;
+
+ return header.u32all;
+}
+
+static void pm_calc_rlib_size(struct packet_manager *pm, unsigned int *rlib_size, bool *over_subscription)
+{
+ unsigned int process_count, queue_count;
+
+ BUG_ON(!pm || !rlib_size || !over_subscription);
+
+ process_count = pm->dqm->processes_count;
+ queue_count = pm->dqm->queue_count;
+
+ /* check if there is over subscription*/
+ *over_subscription = false;
+ if ((process_count >= VMID_PER_DEVICE) ||
+ queue_count >= PIPE_PER_ME_CP_SCHEDULING * QUEUES_PER_PIPE) {
+ *over_subscription = true;
+ pr_debug("kfd: over subscribed runlist\n");
+ }
+
+ /* calculate run list ib allocation size */
+ *rlib_size = process_count * sizeof(struct pm4_map_process) +
+ queue_count * sizeof(struct pm4_map_queues);
+
+ /* increase the allocation size in case we need a chained run list when over subscription */
+ if (*over_subscription)
+ *rlib_size += sizeof(struct pm4_runlist);
+
+ pr_debug("kfd: runlist ib size %d\n", *rlib_size);
+}
+
+static int pm_allocate_runlist_ib(struct packet_manager *pm, unsigned int **rl_buffer, uint64_t *rl_gpu_buffer,
+ unsigned int *rl_buffer_size, bool *is_over_subscription)
+{
+ int retval;
+
+ BUG_ON(!pm);
+ BUG_ON(pm->allocated == true);
+
+ pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription);
+ if (is_over_subscription &&
+ sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION)
+ return -EFAULT;
+
+ retval = radeon_kfd_vidmem_alloc_map(pm->dqm->dev, &pm->ib_buffer_obj, (void **)rl_buffer,
+ rl_gpu_buffer, ALIGN(*rl_buffer_size, PAGE_SIZE));
+ if (retval != 0) {
+ pr_err("kfd: failed to allocate runlist IB\n");
+ return retval;
+ }
+
+ memset(*rl_buffer, 0, *rl_buffer_size);
+ pm->allocated = true;
+ return retval;
+}
+
+static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer,
+ uint64_t ib, size_t ib_size_in_dwords, bool chain)
+{
+ struct pm4_runlist *packet;
+
+ BUG_ON(!pm || !buffer || !ib);
+
+ packet = (struct pm4_runlist *)buffer;
+
+ memset(buffer, 0, sizeof(struct pm4_runlist));
+ packet->header.u32all = build_pm4_header(IT_RUN_LIST, sizeof(struct pm4_runlist));
+
+ packet->bitfields4.ib_size = ib_size_in_dwords;
+ packet->bitfields4.chain = chain ? 1 : 0;
+ packet->bitfields4.offload_polling = 0;
+ packet->bitfields4.valid = 1;
+ packet->bitfields4.vmid = 0;
+ packet->ordinal2 = lower_32(ib);
+ packet->bitfields3.ib_base_hi = upper_32(ib);
+
+ return 0;
+}
+
+static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer, struct qcm_process_device *qpd)
+{
+ struct pm4_map_process *packet;
+
+ BUG_ON(!pm || !buffer || !qpd);
+
+ packet = (struct pm4_map_process *)buffer;
+
+ pr_debug("kfd: In func %s\n", __func__);
+
+ memset(buffer, 0, sizeof(struct pm4_map_process));
+
+ packet->header.u32all = build_pm4_header(IT_MAP_PROCESS, sizeof(struct pm4_map_process));
+ packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
+ packet->bitfields2.pasid = qpd->pqm->process->pasid;
+ packet->bitfields3.page_table_base = qpd->page_table_base;
+ packet->bitfields4.gds_size = qpd->gds_size;
+ packet->bitfields4.num_gws = qpd->num_gws;
+ packet->bitfields4.num_oac = qpd->num_oac;
+
+ packet->sh_mem_config = qpd->sh_mem_config;
+ packet->sh_mem_bases = qpd->sh_mem_bases;
+ packet->sh_mem_ape1_base = qpd->sh_mem_ape1_base;
+ packet->sh_mem_ape1_limit = qpd->sh_mem_ape1_limit;
+
+ packet->gds_addr_lo = lower_32(qpd->gds_context_area);
+ packet->gds_addr_hi = upper_32(qpd->gds_context_area);
+
+ return 0;
+}
+
+static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer, struct queue *q)
+{
+ struct pm4_map_queues *packet;
+
+ BUG_ON(!pm || !buffer || !q);
+
+ pr_debug("kfd: In func %s\n", __func__);
+
+ packet = (struct pm4_map_queues *)buffer;
+ memset(buffer, 0, sizeof(struct pm4_map_queues));
+
+ packet->header.u32all = build_pm4_header(IT_MAP_QUEUES, sizeof(struct pm4_map_queues));
+ packet->bitfields2.alloc_format = alloc_format___map_queues__one_per_pipe;
+ packet->bitfields2.num_queues = 1;
+ packet->bitfields2.queue_sel = queue_sel___map_queues__map_to_hws_determined_queue_slots;
+ packet->bitfields2.vidmem = (q->properties.is_interop) ? vidmem___map_queues__uses_video_memory :
+ vidmem___map_queues__uses_no_video_memory;
+
+ switch (q->properties.type) {
+ case KFD_QUEUE_TYPE_COMPUTE:
+ case KFD_QUEUE_TYPE_DIQ:
+ packet->bitfields2.engine_sel = engine_sel___map_queues__compute;
+ break;
+ case KFD_QUEUE_TYPE_SDMA:
+ packet->bitfields2.engine_sel = engine_sel___map_queues__sdma0_queue;
+ break;
+ default:
+ BUG();
+ break;
+ }
+
+ packet->_map_queues_ordinals[0].bitfields3.doorbell_offset = q->properties.doorbell_off;
+ packet->_map_queues_ordinals[0].mqd_addr_lo = lower_32(q->gart_mqd_addr);
+ packet->_map_queues_ordinals[0].mqd_addr_hi = upper_32(q->gart_mqd_addr);
+ packet->_map_queues_ordinals[0].wptr_addr_lo = lower_32((uint64_t)q->properties.write_ptr);
+ packet->_map_queues_ordinals[0].wptr_addr_hi = upper_32((uint64_t)q->properties.write_ptr);
+
+ return 0;
+}
+
+static int pm_create_runlist_ib(struct packet_manager *pm, struct list_head *queues,
+ uint64_t *rl_gpu_addr, size_t *rl_size_bytes)
+{
+ unsigned int alloc_size_bytes;
+ unsigned int *rl_buffer, rl_wptr, i;
+ int retval, proccesses_mapped;
+ struct device_process_node *cur;
+ struct qcm_process_device *qpd;
+ struct queue *q;
+ struct kernel_queue *kq;
+ bool is_over_subscription;
+
+ BUG_ON(!pm || !queues || !rl_size_bytes || !rl_gpu_addr);
+
+ rl_wptr = retval = proccesses_mapped = 0;
+
+ retval = pm_allocate_runlist_ib(pm, &rl_buffer, rl_gpu_addr, &alloc_size_bytes, &is_over_subscription);
+ if (retval != 0)
+ return retval;
+
+ *rl_size_bytes = alloc_size_bytes;
+
+ pr_debug("kfd: In func %s\n", __func__);
+ pr_debug("kfd: building runlist ib process count: %d queues count %d\n", pm->dqm->processes_count,
+ pm->dqm->queue_count);
+
+ /* build the run list ib packet */
+ list_for_each_entry(cur, queues, list) {
+ qpd = cur->qpd;
+ /* build map process packet */
+ if (proccesses_mapped >= pm->dqm->processes_count) {
+ pr_debug("kfd: not enough space left in runlist IB\n");
+ pm_release_ib(pm);
+ return -ENOMEM;
+ }
+ retval = pm_create_map_process(pm, &rl_buffer[rl_wptr], qpd);
+ if (retval != 0)
+ return retval;
+ proccesses_mapped++;
+ inc_wptr(&rl_wptr, sizeof(struct pm4_map_process), alloc_size_bytes);
+ list_for_each_entry(kq, &qpd->priv_queue_list, list) {
+ if (kq->queue->properties.is_active != true)
+ continue;
+ retval = pm_create_map_queue(pm, &rl_buffer[rl_wptr], kq->queue);
+ if (retval != 0)
+ return retval;
+ inc_wptr(&rl_wptr, sizeof(struct pm4_map_queues), alloc_size_bytes);
+ }
+
+ list_for_each_entry(q, &qpd->queues_list, list) {
+ if (q->properties.is_active != true)
+ continue;
+ retval = pm_create_map_queue(pm, &rl_buffer[rl_wptr], q);
+ if (retval != 0)
+ return retval;
+ inc_wptr(&rl_wptr, sizeof(struct pm4_map_queues), alloc_size_bytes);
+ }
+ }
+
+ pr_debug("kfd: finished map process and queues to runlist\n");
+
+ if (is_over_subscription)
+ pm_create_runlist(pm, &rl_buffer[rl_wptr], *rl_gpu_addr, alloc_size_bytes / sizeof(uint32_t), true);
+
+ for (i = 0; i < alloc_size_bytes / sizeof(uint32_t); i++)
+ pr_debug("0x%2X ", rl_buffer[i]);
+ pr_debug("\n");
+
+ return 0;
+}
+
+int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
+{
+ BUG_ON(!dqm);
+
+ pm->dqm = dqm;
+ mutex_init(&pm->lock);
+ pm->priv_queue = kernel_queue_init(dqm->dev, KFD_QUEUE_TYPE_HIQ);
+ if (pm->priv_queue == NULL) {
+ mutex_destroy(&pm->lock);
+ return -ENOMEM;
+ }
+ pm->allocated = false;
+
+ return 0;
+}
+
+void pm_uninit(struct packet_manager *pm)
+{
+ BUG_ON(!pm);
+
+ mutex_destroy(&pm->lock);
+ kernel_queue_uninit(pm->priv_queue);
+}
+
+int pm_send_set_resources(struct packet_manager *pm, struct scheduling_resources *res)
+{
+ struct pm4_set_resources *packet;
+
+ BUG_ON(!pm || !res);
+
+ pr_debug("kfd: In func %s\n", __func__);
+
+ mutex_lock(&pm->lock);
+ pm->priv_queue->acquire_packet_buffer(pm->priv_queue, sizeof(*packet) / sizeof(uint32_t),
+ (unsigned int **)&packet);
+ if (packet == NULL) {
+ mutex_unlock(&pm->lock);
+ pr_err("kfd: failed to allocate buffer on kernel queue\n");
+ return -ENOMEM;
+ }
+
+ memset(packet, 0, sizeof(struct pm4_set_resources));
+ packet->header.u32all = build_pm4_header(IT_SET_RESOURCES, sizeof(struct pm4_set_resources));
+
+ packet->bitfields2.queue_type = queue_type___set_resources__hsa_interface_queue_hiq;
+ packet->bitfields2.vmid_mask = res->vmid_mask;
+ packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY;
+ packet->bitfields3.oac_mask = res->oac_mask;
+ packet->bitfields4.gds_heap_base = res->gds_heap_base;
+ packet->bitfields4.gds_heap_size = res->gds_heap_size;
+
+ packet->gws_mask_lo = lower_32(res->gws_mask);
+ packet->gws_mask_hi = upper_32(res->gws_mask);
+
+ packet->queue_mask_lo = lower_32(res->queue_mask);
+ packet->queue_mask_hi = upper_32(res->queue_mask);
+
+ pm->priv_queue->submit_packet(pm->priv_queue);
+ pm->priv_queue->sync_with_hw(pm->priv_queue, KFD_HIQ_TIMEOUT);
+
+ mutex_unlock(&pm->lock);
+
+ return 0;
+}
+
+int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
+{
+ uint64_t rl_gpu_ib_addr;
+ uint32_t *rl_buffer;
+ size_t rl_ib_size, packet_size_dwords;
+ int retval;
+
+ BUG_ON(!pm || !dqm_queues);
+
+ retval = pm_create_runlist_ib(pm, dqm_queues, &rl_gpu_ib_addr, &rl_ib_size);
+ if (retval != 0)
+ goto fail_create_runlist_ib;
+
+ pr_debug("kfd: runlist IB address: 0x%llX\n", rl_gpu_ib_addr);
+
+ packet_size_dwords = sizeof(struct pm4_runlist) / sizeof(uint32_t);
+ mutex_lock(&pm->lock);
+
+ retval = pm->priv_queue->acquire_packet_buffer(pm->priv_queue, packet_size_dwords, &rl_buffer);
+ if (retval != 0)
+ goto fail_acquire_packet_buffer;
+
+ retval = pm_create_runlist(pm, rl_buffer, rl_gpu_ib_addr, rl_ib_size / sizeof(uint32_t), false);
+ if (retval != 0)
+ goto fail_create_runlist;
+
+ pm->priv_queue->submit_packet(pm->priv_queue);
+ pm->priv_queue->sync_with_hw(pm->priv_queue, KFD_HIQ_TIMEOUT);
+
+ mutex_unlock(&pm->lock);
+
+ return retval;
+
+fail_create_runlist:
+ pm->priv_queue->rollback_packet(pm->priv_queue);
+fail_acquire_packet_buffer:
+ mutex_unlock(&pm->lock);
+fail_create_runlist_ib:
+ if (pm->allocated == true)
+ pm_release_ib(pm);
+ return retval;
+}
+
+int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address, uint32_t fence_value)
+{
+ int retval;
+ struct pm4_query_status *packet;
+
+ BUG_ON(!pm || !fence_address);
+
+ mutex_lock(&pm->lock);
+ retval = pm->priv_queue->acquire_packet_buffer(pm->priv_queue,
+ sizeof(struct pm4_query_status) / sizeof(uint32_t), (unsigned int **)&packet);
+ if (retval != 0)
+ goto fail_acquire_packet_buffer;
+
+ packet->header.u32all = build_pm4_header(IT_QUERY_STATUS, sizeof(struct pm4_query_status));
+
+ packet->bitfields2.context_id = 0;
+ packet->bitfields2.interrupt_sel = interrupt_sel___query_status__completion_status;
+ packet->bitfields2.command = command___query_status__fence_only_after_write_ack;
+
+ packet->addr_hi = upper_32((uint64_t)fence_address);
+ packet->addr_lo = lower_32((uint64_t)fence_address);
+ packet->data_hi = upper_32((uint64_t)fence_value);
+ packet->data_lo = lower_32((uint64_t)fence_value);
+
+ pm->priv_queue->submit_packet(pm->priv_queue);
+ pm->priv_queue->sync_with_hw(pm->priv_queue, KFD_HIQ_TIMEOUT);
+ mutex_unlock(&pm->lock);
+
+ return 0;
+
+fail_acquire_packet_buffer:
+ mutex_unlock(&pm->lock);
+ return retval;
+}
+
+int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
+ enum kfd_preempt_type_filter mode, uint32_t filter_param, bool reset)
+{
+ int retval;
+ uint32_t *buffer;
+ struct pm4_unmap_queues *packet;
+
+ BUG_ON(!pm);
+
+ mutex_lock(&pm->lock);
+ retval = pm->priv_queue->acquire_packet_buffer(pm->priv_queue,
+ sizeof(struct pm4_unmap_queues) / sizeof(uint32_t), &buffer);
+ if (retval != 0)
+ goto err_acquire_packet_buffer;
+
+ packet = (struct pm4_unmap_queues *)buffer;
+ memset(buffer, 0, sizeof(struct pm4_unmap_queues));
+
+ packet->header.u32all = build_pm4_header(IT_UNMAP_QUEUES, sizeof(struct pm4_unmap_queues));
+ switch (type) {
+ case KFD_QUEUE_TYPE_COMPUTE:
+ case KFD_QUEUE_TYPE_DIQ:
+ packet->bitfields2.engine_sel = engine_sel___unmap_queues__compute;
+ break;
+ case KFD_QUEUE_TYPE_SDMA:
+ packet->bitfields2.engine_sel = engine_sel___unmap_queues__sdma0;
+ break;
+ default:
+ BUG();
+ break;
+ }
+
+ if (reset)
+ packet->bitfields2.action = action___unmap_queues__reset_queues;
+ else
+ packet->bitfields2.action = action___unmap_queues__preempt_queues;
+
+ switch (mode) {
+ case KFD_PREEMPT_TYPE_FILTER_SINGLE_QUEUE:
+ packet->bitfields2.queue_sel = queue_sel___unmap_queues__perform_request_on_specified_queues;
+ packet->bitfields2.num_queues = 1;
+ packet->bitfields4.doorbell_offset0 = filter_param;
+ break;
+ case KFD_PRERMPT_TYPE_FILTER_BY_PASID:
+ packet->bitfields2.queue_sel = queue_sel___unmap_queues__perform_request_on_pasid_queues;
+ packet->bitfields3.pasid = filter_param;
+ break;
+ case KFD_PRERMPT_TYPE_FILTER_ALL_QUEUES:
+ packet->bitfields2.queue_sel = queue_sel___unmap_queues__perform_request_on_all_active_queues;
+ break;
+ default:
+ BUG();
+ break;
+ };
+
+ pm->priv_queue->submit_packet(pm->priv_queue);
+ pm->priv_queue->sync_with_hw(pm->priv_queue, KFD_HIQ_TIMEOUT);
+
+ mutex_unlock(&pm->lock);
+ return 0;
+
+err_acquire_packet_buffer:
+ mutex_unlock(&pm->lock);
+ return retval;
+}
+
+void pm_release_ib(struct packet_manager *pm)
+{
+ BUG_ON(!pm);
+
+ mutex_lock(&pm->lock);
+ if (pm->allocated) {
+ radeon_kfd_vidmem_free_unmap(pm->dqm->dev, pm->ib_buffer_obj);
+ pm->allocated = false;
+ }
+ mutex_unlock(&pm->lock);
+}
--
1.9.1
From: Ben Goz <[email protected]>
The queue scheduler divides into two sections, one section is process bounded
and the other section is device bounded.
The process bounded section is handled by this module. The PQM handles HSA queue setup, updates and tear-down.
Signed-off-by: Ben Goz <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/hsa/radeon/Makefile | 3 +-
drivers/gpu/hsa/radeon/kfd_priv.h | 29 ++
drivers/gpu/hsa/radeon/kfd_process_queue_manager.c | 370 +++++++++++++++++++++
3 files changed, 401 insertions(+), 1 deletion(-)
create mode 100644 drivers/gpu/hsa/radeon/kfd_process_queue_manager.c
diff --git a/drivers/gpu/hsa/radeon/Makefile b/drivers/gpu/hsa/radeon/Makefile
index 4978915..341fa67 100644
--- a/drivers/gpu/hsa/radeon/Makefile
+++ b/drivers/gpu/hsa/radeon/Makefile
@@ -7,6 +7,7 @@ radeon_kfd-y := kfd_module.o kfd_device.o kfd_chardev.o \
kfd_doorbell.o kfd_sched_cik_static.o kfd_registers.o \
kfd_vidmem.o kfd_interrupt.o kfd_aperture.o \
kfd_queue.o kfd_hw_pointer_store.o kfd_mqd_manager.o \
- kfd_kernel_queue.o kfd_packet_manager.o
+ kfd_kernel_queue.o kfd_packet_manager.o \
+ kfd_process_queue_manager.o
obj-$(CONFIG_HSA_RADEON) += radeon_kfd.o
diff --git a/drivers/gpu/hsa/radeon/kfd_priv.h b/drivers/gpu/hsa/radeon/kfd_priv.h
index b3889aa..e716745 100644
--- a/drivers/gpu/hsa/radeon/kfd_priv.h
+++ b/drivers/gpu/hsa/radeon/kfd_priv.h
@@ -311,6 +311,9 @@ struct kfd_process_device {
/* Scheduler process data for this device. */
struct kfd_scheduler_process *scheduler_process;
+ /* per-process-per device QCM data structure */
+ struct qcm_process_device qpd;
+
/* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */
bool bound;
@@ -342,6 +345,11 @@ struct kfd_process {
/* List of kfd_process_device structures, one for each device the process is using. */
struct list_head per_device_data;
+ struct hw_pointer_store_properties write_ptr;
+ struct hw_pointer_store_properties read_ptr;
+
+ struct process_queue_manager pqm;
+
/* The process's queues. */
size_t queue_array_size;
struct kfd_queue **queues; /* Size is queue_array_size, up to MAX_PROCESS_QUEUES. */
@@ -431,6 +439,27 @@ struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, struct kfd_dev *dev
struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, enum kfd_queue_type type);
void kernel_queue_uninit(struct kernel_queue *kq);
+/* Process Queue Manager */
+struct process_queue_node {
+ struct queue *q;
+ struct kernel_queue *kq;
+ struct list_head process_queue_list;
+};
+
+int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p);
+void pqm_uninit(struct process_queue_manager *pqm);
+int pqm_create_queue(struct process_queue_manager *pqm,
+ struct kfd_dev *dev,
+ struct file *f,
+ struct queue_properties *properties,
+ unsigned int flags,
+ enum kfd_queue_type type,
+ unsigned int *qid);
+int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid);
+int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid, struct queue_properties *p);
+struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm, unsigned int qid);
+void test_diq(struct kfd_dev *dev, struct process_queue_manager *pqm);
+
/* Packet Manager */
#define KFD_HIQ_TIMEOUT (500)
diff --git a/drivers/gpu/hsa/radeon/kfd_process_queue_manager.c b/drivers/gpu/hsa/radeon/kfd_process_queue_manager.c
new file mode 100644
index 0000000..6e38ca4
--- /dev/null
+++ b/drivers/gpu/hsa/radeon/kfd_process_queue_manager.c
@@ -0,0 +1,370 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Ben Goz
+ */
+
+#include <linux/slab.h>
+#include <linux/list.h>
+#include "kfd_device_queue_manager.h"
+#include "kfd_priv.h"
+#include "kfd_hw_pointer_store.h"
+#include "kfd_kernel_queue.h"
+
+static inline struct process_queue_node *get_queue_by_qid(struct process_queue_manager *pqm, unsigned int qid)
+{
+ struct process_queue_node *pqn;
+
+ BUG_ON(!pqm);
+
+ list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
+ if (pqn->q && pqn->q->properties.queue_id == qid)
+ return pqn;
+ if (pqn->kq && pqn->kq->queue->properties.queue_id == qid)
+ return pqn;
+ }
+
+ return NULL;
+}
+
+static int allocate_hw_pointers(struct process_queue_manager *pqm,
+ struct queue_properties *q_properties,
+ struct file *f, struct kfd_dev *dev,
+ unsigned int qid)
+{
+ int retval;
+
+ BUG_ON(!pqm || !q_properties);
+
+ retval = 0;
+
+ pr_debug("kfd: In func %s\n", __func__);
+
+ /* allocates r/w pointers in lazy mode */
+ if (pqm->process->read_ptr.page_mapping == NULL)
+ if (hw_pointer_store_init(&pqm->process->read_ptr, KFD_HW_POINTER_STORE_TYPE_RPTR) != 0)
+ return -EBUSY;
+ if (pqm->process->write_ptr.page_mapping == NULL)
+ if (hw_pointer_store_init(&pqm->process->write_ptr, KFD_HW_POINTER_STORE_TYPE_WPTR) != 0) {
+ hw_pointer_store_destroy(&pqm->process->read_ptr);
+ return -EBUSY;
+ }
+
+ q_properties->read_ptr = hw_pointer_store_create_queue(&pqm->process->read_ptr, qid, f);
+ if (!q_properties->read_ptr)
+ return -ENOMEM;
+
+ q_properties->write_ptr = hw_pointer_store_create_queue(&pqm->process->write_ptr, qid, f);
+ if (!q_properties->write_ptr)
+ return -ENOMEM;
+
+ q_properties->doorbell_ptr = radeon_kfd_get_doorbell(f, pqm->process, dev, qid);
+ if (!q_properties->doorbell_ptr)
+ return -ENOMEM;
+
+ q_properties->doorbell_off = radeon_kfd_queue_id_to_doorbell(dev, pqm->process, qid);
+
+ return retval;
+}
+
+static int find_available_queue_slot(struct process_queue_manager *pqm, unsigned int *qid)
+{
+ unsigned long found;
+
+ BUG_ON(!pqm || !qid);
+
+ pr_debug("kfd: in %s\n", __func__);
+
+ found = find_first_zero_bit(pqm->queue_slot_bitmap, MAX_PROCESS_QUEUES);
+
+ pr_debug("kfd: the new slot id %lu\n", found);
+
+ if (found >= MAX_PROCESS_QUEUES)
+ return -ENOMEM;
+
+ set_bit(found, pqm->queue_slot_bitmap);
+ *qid = found;
+
+ return 0;
+}
+
+int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p)
+{
+ BUG_ON(!pqm);
+
+ INIT_LIST_HEAD(&pqm->queues);
+ pqm->queue_slot_bitmap = kzalloc(DIV_ROUND_UP(MAX_PROCESS_QUEUES, BITS_PER_BYTE), GFP_KERNEL);
+ if (pqm->queue_slot_bitmap == NULL)
+ return -ENOMEM;
+ pqm->process = p;
+
+ return 0;
+}
+
+void pqm_uninit(struct process_queue_manager *pqm)
+{
+ int retval;
+ struct process_queue_node *pqn, *next;
+
+ BUG_ON(!pqm);
+
+ pr_debug("In func %s\n", __func__);
+
+ list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) {
+ retval = pqm_destroy_queue(
+ pqm,
+ (pqn->q != NULL) ?
+ pqn->q->properties.queue_id :
+ pqn->kq->queue->properties.queue_id);
+ if (retval != 0)
+ return;
+ }
+ kfree(pqm->queue_slot_bitmap);
+
+ if (pqm->process->read_ptr.page_mapping)
+ hw_pointer_store_destroy(&pqm->process->read_ptr);
+ if (pqm->process->write_ptr.page_mapping)
+ hw_pointer_store_destroy(&pqm->process->write_ptr);
+}
+
+static int create_cp_queue(struct process_queue_manager *pqm, struct kfd_dev *dev, struct queue **q,
+ struct queue_properties *q_properties, struct file *f, unsigned int qid)
+{
+ int retval;
+
+ retval = 0;
+
+ /* allocate hw pointers */
+ if (allocate_hw_pointers(pqm, q_properties, f, dev, qid) != 0) {
+ retval = -ENOMEM;
+ goto err_allocate_hw_pointers;
+ }
+
+ /* let DQM handle it*/
+ q_properties->vmid = 0;
+ q_properties->queue_id = qid;
+ q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
+
+ retval = init_queue(q, *q_properties);
+ if (retval != 0)
+ goto err_init_queue;
+
+ (*q)->device = dev;
+ (*q)->process = pqm->process;
+
+ pr_debug("kfd: PQM After init queue");
+
+ return retval;
+
+err_init_queue:
+err_allocate_hw_pointers:
+ return retval;
+}
+
+int pqm_create_queue(struct process_queue_manager *pqm,
+ struct kfd_dev *dev,
+ struct file *f,
+ struct queue_properties *properties,
+ unsigned int flags,
+ enum kfd_queue_type type,
+ unsigned int *qid)
+{
+ int retval;
+ struct kfd_process_device *pdd;
+ struct queue_properties q_properties;
+ struct queue *q;
+ struct process_queue_node *pqn;
+ struct kernel_queue *kq;
+
+ BUG_ON(!pqm || !dev || !properties || !qid);
+
+ memset(&q_properties, 0, sizeof(struct queue_properties));
+ memcpy(&q_properties, properties, sizeof(struct queue_properties));
+
+ pdd = radeon_kfd_get_process_device_data(dev, pqm->process);
+ BUG_ON(!pdd);
+
+ retval = find_available_queue_slot(pqm, qid);
+ if (retval != 0)
+ return retval;
+
+ if (list_empty(&pqm->queues)) {
+ pdd->qpd.pqm = pqm;
+ dev->dqm->register_process(dev->dqm, &pdd->qpd);
+ }
+
+ pqn = kzalloc(sizeof(struct process_queue_node), GFP_KERNEL);
+ if (!pqn) {
+ retval = -ENOMEM;
+ goto err_allocate_pqn;
+ }
+
+ switch (type) {
+ case KFD_QUEUE_TYPE_COMPUTE:
+ retval = create_cp_queue(pqm, dev, &q, &q_properties, f, *qid);
+ if (retval != 0)
+ goto err_create_queue;
+ pqn->q = q;
+ pqn->kq = NULL;
+ retval = dev->dqm->create_queue(dev->dqm, q, &pdd->qpd, &q->properties.vmid);
+ print_queue(q);
+ break;
+ case KFD_QUEUE_TYPE_DIQ:
+ kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_DIQ);
+ if (kq == NULL) {
+ kernel_queue_uninit(kq);
+ goto err_create_queue;
+ }
+ kq->queue->properties.queue_id = *qid;
+ pqn->kq = kq;
+ pqn->q = NULL;
+ retval = dev->dqm->create_kernel_queue(dev->dqm, kq, &pdd->qpd);
+ break;
+ default:
+ BUG();
+ break;
+ }
+
+ if (retval != 0) {
+ pr_err("kfd: error dqm create queue\n");
+ goto err_create_queue;
+ }
+
+ pr_debug("kfd: PQM After DQM create queue\n");
+
+ list_add(&pqn->process_queue_list, &pqm->queues);
+
+ retval = dev->dqm->execute_queues(dev->dqm);
+ if (retval != 0) {
+ if (pqn->kq)
+ dev->dqm->destroy_kernel_queue(dev->dqm, pqn->kq, &pdd->qpd);
+ if (pqn->q)
+ dev->dqm->destroy_queue(dev->dqm, &pdd->qpd, pqn->q);
+
+ goto err_execute_runlist;
+ }
+
+ *properties = q->properties;
+ pr_debug("kfd: PQM done creating queue\n");
+ print_queue_properties(properties);
+
+ return retval;
+
+err_execute_runlist:
+ list_del(&pqn->process_queue_list);
+err_create_queue:
+ kfree(pqn);
+err_allocate_pqn:
+ clear_bit(*qid, pqm->queue_slot_bitmap);
+ return retval;
+}
+
+int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
+{
+ struct process_queue_node *pqn;
+ struct kfd_process_device *pdd;
+ struct device_queue_manager *dqm;
+ struct kfd_dev *dev;
+ int retval;
+
+ dqm = NULL;
+
+ BUG_ON(!pqm);
+ retval = 0;
+
+ pr_debug("kfd: In Func %s\n", __func__);
+
+ pqn = get_queue_by_qid(pqm, qid);
+ BUG_ON(!pqn);
+
+ dev = NULL;
+ if (pqn->kq)
+ dev = pqn->kq->dev;
+ if (pqn->q)
+ dev = pqn->q->device;
+ BUG_ON(!dev);
+
+ pdd = radeon_kfd_get_process_device_data(dev, pqm->process);
+ BUG_ON(!pdd);
+
+ if (pqn->kq) {
+ /* destroy kernel queue (DIQ) */
+ dqm = pqn->kq->dev->dqm;
+ dqm->destroy_kernel_queue(dqm, pqn->kq, &pdd->qpd);
+ kernel_queue_uninit(pqn->kq);
+ }
+
+ if (pqn->q) {
+ dqm = pqn->q->device->dqm;
+ retval = dqm->destroy_queue(dqm, &pdd->qpd, pqn->q);
+ if (retval != 0)
+ return retval;
+
+ list_del(&pqn->process_queue_list);
+ uninit_queue(pqn->q);
+ }
+
+ kfree(pqn);
+ clear_bit(qid, pqm->queue_slot_bitmap);
+
+ if (list_empty(&pqm->queues))
+ dqm->unregister_process(dqm, &pdd->qpd);
+
+ retval = dqm->execute_queues(dqm);
+
+ return retval;
+}
+
+int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid, struct queue_properties *p)
+{
+ struct process_queue_node *pqn;
+
+ BUG_ON(!pqm);
+
+ pqn = get_queue_by_qid(pqm, qid);
+ BUG_ON(!pqn);
+
+ pqn->q->properties.queue_address = p->queue_address;
+ pqn->q->properties.queue_size = p->queue_size;
+ pqn->q->properties.queue_percent = p->queue_percent;
+ pqn->q->properties.priority = p->priority;
+
+ pqn->q->device->dqm->destroy_queues(pqn->q->device->dqm);
+ pqn->q->device->dqm->update_queue(pqn->q->device->dqm, pqn->q);
+ pqn->q->device->dqm->execute_queues(pqn->q->device->dqm);
+
+ return 0;
+}
+
+struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm, unsigned int qid)
+{
+ struct process_queue_node *pqn;
+
+ BUG_ON(!pqm);
+
+ pqn = get_queue_by_qid(pqm, qid);
+ if (pqn && pqn->kq)
+ return pqn->kq;
+
+ return NULL;
+}
+
+
--
1.9.1
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/hsa/radeon/kfd_chardev.c | 6 +++---
drivers/gpu/hsa/radeon/kfd_kernel_queue.c | 4 ++--
drivers/gpu/hsa/radeon/kfd_queue.c | 2 +-
3 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/hsa/radeon/kfd_chardev.c b/drivers/gpu/hsa/radeon/kfd_chardev.c
index 9a77332..80b702e 100644
--- a/drivers/gpu/hsa/radeon/kfd_chardev.c
+++ b/drivers/gpu/hsa/radeon/kfd_chardev.c
@@ -114,8 +114,8 @@ kfd_open(struct inode *inode, struct file *filep)
process->is_32bit_user_mode = is_compat_task();
- dev_info(kfd_device, "process %d opened, compat mode (32 bit) - %d\n",
- process->pasid, process->is_32bit_user_mode);
+ dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n",
+ process->pasid, process->is_32bit_user_mode);
kfd_init_apertures(process);
@@ -149,7 +149,7 @@ kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, void __user *a
pr_debug("%s Arguments: Queue Percentage (%d, %d)\n"
"Queue Priority (%d, %d)\n"
"Queue Address (0x%llX, 0x%llX)\n"
- "Queue Size (%u64, %ll)\n",
+ "Queue Size (%llX, %u)\n",
__func__,
q_properties.queue_percent, args.queue_percentage,
q_properties.priority, args.queue_priority,
diff --git a/drivers/gpu/hsa/radeon/kfd_kernel_queue.c b/drivers/gpu/hsa/radeon/kfd_kernel_queue.c
index aa64693e..25528b3 100644
--- a/drivers/gpu/hsa/radeon/kfd_kernel_queue.c
+++ b/drivers/gpu/hsa/radeon/kfd_kernel_queue.c
@@ -89,8 +89,8 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
prop.type = type;
prop.vmid = 0;
prop.queue_address = kq->pq_gpu_addr;
- prop.read_ptr = kq->rptr_gpu_addr;
- prop.write_ptr = kq->wptr_gpu_addr;
+ prop.read_ptr = (qptr_t *) kq->rptr_gpu_addr;
+ prop.write_ptr = (qptr_t *) kq->wptr_gpu_addr;
if (init_queue(&kq->queue, prop) != 0)
goto err_init_queue;
diff --git a/drivers/gpu/hsa/radeon/kfd_queue.c b/drivers/gpu/hsa/radeon/kfd_queue.c
index 2d22cc1..646b6d1 100644
--- a/drivers/gpu/hsa/radeon/kfd_queue.c
+++ b/drivers/gpu/hsa/radeon/kfd_queue.c
@@ -67,7 +67,7 @@ void print_queue(struct queue *q)
"Queue Doorbell Pointer: 0x%p\n"
"Queue Doorbell Offset: %u\n"
"Queue MQD Address: 0x%p\n"
- "Queue MQD Gart: 0x%p\n"
+ "Queue MQD Gart: 0x%llX\n"
"Queue Process Address: 0x%p\n"
"Queue Device Address: 0x%p\n",
q->properties.type,
--
1.9.1
From: Evgeny Pinchuk <[email protected]>
Introducing IOCTL implementation for controlling exclusive access to performace counters.
The exclusive access is per GPU device.
Signed-off-by: Evgeny Pinchuk <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/hsa/radeon/kfd_chardev.c | 61 ++++++++++++++++++++++++++++++++++++
drivers/gpu/hsa/radeon/kfd_device.c | 2 ++
drivers/gpu/hsa/radeon/kfd_priv.h | 5 +++
drivers/gpu/hsa/radeon/kfd_process.c | 8 +++--
include/uapi/linux/kfd_ioctl.h | 12 +++++++
5 files changed, 86 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/hsa/radeon/kfd_chardev.c b/drivers/gpu/hsa/radeon/kfd_chardev.c
index 80b702e..b39df68 100644
--- a/drivers/gpu/hsa/radeon/kfd_chardev.c
+++ b/drivers/gpu/hsa/radeon/kfd_chardev.c
@@ -387,6 +387,59 @@ static int kfd_ioctl_get_process_apertures(struct file *filp, struct kfd_process
return 0;
}
+static long
+kfd_ioctl_pmc_acquire_access(struct file *filp, struct kfd_process *p, void __user *arg)
+{
+ struct kfd_ioctl_pmc_acquire_access_args args;
+ struct kfd_dev *dev;
+ int err = -EBUSY;
+
+ if (copy_from_user(&args, arg, sizeof(args)))
+ return -EFAULT;
+
+ dev = radeon_kfd_device_by_id(args.gpu_id);
+ if (dev == NULL)
+ return -EINVAL;
+
+ spin_lock(&dev->pmc_access_lock);
+ if (dev->pmc_locking_process == NULL) {
+ dev->pmc_locking_process = p;
+ dev->pmc_locking_trace = args.trace_id;
+ err = 0;
+ } else if (dev->pmc_locking_process == p && dev->pmc_locking_trace == args.trace_id) {
+ /* Same trace already has an access. Returning success */
+ err = 0;
+ }
+
+ spin_unlock(&dev->pmc_access_lock);
+
+ return err;
+}
+
+static long
+kfd_ioctl_pmc_release_access(struct file *filp, struct kfd_process *p, void __user *arg)
+{
+ struct kfd_ioctl_pmc_release_access_args args;
+ struct kfd_dev *dev;
+ int err = -EINVAL;
+
+ if (copy_from_user(&args, arg, sizeof(args)))
+ return -EFAULT;
+
+ dev = radeon_kfd_device_by_id(args.gpu_id);
+ if (dev == NULL)
+ return -EINVAL;
+
+ spin_lock(&dev->pmc_access_lock);
+ if (dev->pmc_locking_process == p && dev->pmc_locking_trace == args.trace_id) {
+ dev->pmc_locking_process = NULL;
+ dev->pmc_locking_trace = 0;
+ err = 0;
+ }
+ spin_unlock(&dev->pmc_access_lock);
+
+ return err;
+}
static long
kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
@@ -427,6 +480,14 @@ kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
err = kfd_ioctl_update_queue(filep, process, (void __user *)arg);
break;
+ case KFD_IOC_PMC_ACQUIRE_ACCESS:
+ err = kfd_ioctl_pmc_acquire_access(filep, process, (void __user *) arg);
+ break;
+
+ case KFD_IOC_PMC_RELEASE_ACCESS:
+ err = kfd_ioctl_pmc_release_access(filep, process, (void __user *) arg);
+ break;
+
default:
dev_err(kfd_device,
"unknown ioctl cmd 0x%x, arg 0x%lx)\n",
diff --git a/drivers/gpu/hsa/radeon/kfd_device.c b/drivers/gpu/hsa/radeon/kfd_device.c
index c602e16..9af812b 100644
--- a/drivers/gpu/hsa/radeon/kfd_device.c
+++ b/drivers/gpu/hsa/radeon/kfd_device.c
@@ -185,6 +185,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
return false;
}
+ spin_lock_init(&kfd->pmc_access_lock);
+
kfd->init_complete = true;
dev_info(kfd_device, "added device (%x:%x)\n", kfd->pdev->vendor,
kfd->pdev->device);
diff --git a/drivers/gpu/hsa/radeon/kfd_priv.h b/drivers/gpu/hsa/radeon/kfd_priv.h
index 049671b..e6d4993 100644
--- a/drivers/gpu/hsa/radeon/kfd_priv.h
+++ b/drivers/gpu/hsa/radeon/kfd_priv.h
@@ -135,6 +135,11 @@ struct kfd_dev {
/* QCM Device instance */
struct device_queue_manager *dqm;
+
+ /* Performance counters exclusivity lock */
+ spinlock_t pmc_access_lock;
+ struct kfd_process *pmc_locking_process;
+ uint64_t pmc_locking_trace;
};
/* KGD2KFD callbacks */
diff --git a/drivers/gpu/hsa/radeon/kfd_process.c b/drivers/gpu/hsa/radeon/kfd_process.c
index f967c15..9bb5cab 100644
--- a/drivers/gpu/hsa/radeon/kfd_process.c
+++ b/drivers/gpu/hsa/radeon/kfd_process.c
@@ -96,9 +96,13 @@ static void free_process(struct kfd_process *p)
BUG_ON(p == NULL);
- /* doorbell mappings: automatic */
-
list_for_each_entry_safe(pdd, temp, &p->per_device_data, per_device_list) {
+ spin_lock(&pdd->dev->pmc_access_lock);
+ if (pdd->dev->pmc_locking_process == p) {
+ pdd->dev->pmc_locking_process = NULL;
+ pdd->dev->pmc_locking_trace = 0;
+ }
+ spin_unlock(&pdd->dev->pmc_access_lock);
amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid);
list_del(&pdd->per_device_list);
kfree(pdd);
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index d58231d..509c4a0 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -103,6 +103,16 @@ struct kfd_ioctl_get_process_apertures_args {
uint8_t num_of_nodes; /* from KFD, should be in the range [1 - NUM_OF_SUPPORTED_GPUS]*/
};
+struct kfd_ioctl_pmc_acquire_access_args {
+ uint32_t gpu_id; /* to KFD */
+ uint64_t trace_id; /* to KFD */
+};
+
+struct kfd_ioctl_pmc_release_access_args {
+ uint32_t gpu_id; /* to KFD */
+ uint64_t trace_id; /* to KFD */
+};
+
#define KFD_IOC_MAGIC 'K'
#define KFD_IOC_GET_VERSION _IOR(KFD_IOC_MAGIC, 1, struct kfd_ioctl_get_version_args)
@@ -112,6 +122,8 @@ struct kfd_ioctl_get_process_apertures_args {
#define KFD_IOC_GET_CLOCK_COUNTERS _IOWR(KFD_IOC_MAGIC, 5, struct kfd_ioctl_get_clock_counters_args)
#define KFD_IOC_GET_PROCESS_APERTURES _IOR(KFD_IOC_MAGIC, 6, struct kfd_ioctl_get_process_apertures_args)
#define KFD_IOC_UPDATE_QUEUE _IOW(KFD_IOC_MAGIC, 7, struct kfd_ioctl_update_queue_args)
+#define KFD_IOC_PMC_ACQUIRE_ACCESS _IOW(KFD_IOC_MAGIC, 12, struct kfd_ioctl_pmc_acquire_access_args)
+#define KFD_IOC_PMC_RELEASE_ACCESS _IOW(KFD_IOC_MAGIC, 13, struct kfd_ioctl_pmc_release_access_args)
#pragma pack(pop)
--
1.9.1
This patch fixes a bug in the timeout calculation done in sync_with_hw
functions. The original code assumed that jiffies is incremented in ms.
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/hsa/radeon/kfd_kernel_queue.c | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/hsa/radeon/kfd_kernel_queue.c b/drivers/gpu/hsa/radeon/kfd_kernel_queue.c
index 25528b3..ce3261b 100644
--- a/drivers/gpu/hsa/radeon/kfd_kernel_queue.c
+++ b/drivers/gpu/hsa/radeon/kfd_kernel_queue.c
@@ -222,12 +222,18 @@ static void submit_packet(struct kernel_queue *kq)
static int sync_with_hw(struct kernel_queue *kq, unsigned long timeout_ms)
{
+ unsigned long org_timeout_ms;
+
BUG_ON(!kq);
- timeout_ms += jiffies;
+
+ org_timeout_ms = timeout_ms;
+ timeout_ms += jiffies * 1000 / HZ;
while (*kq->wptr_kernel != *kq->rptr_kernel) {
- if (time_after(jiffies, timeout_ms)) {
- pr_err("kfd: kernel_queue %s timeout expired %lu\n", __func__, timeout_ms);
- pr_err("kfd: wptr: %d rptr: %d\n", *kq->wptr_kernel, *kq->rptr_kernel);
+ if (time_after(jiffies * 1000 / HZ, timeout_ms)) {
+ pr_err("kfd: kernel_queue %s timeout expired %lu\n",
+ __func__, org_timeout_ms);
+ pr_err("kfd: wptr: %d rptr: %d\n",
+ *kq->wptr_kernel, *kq->rptr_kernel);
return -ETIME;
}
cpu_relax();
--
1.9.1
From: Ben Goz <[email protected]>
This commit adding fault handling for process queue manager update queue
Signed-off-by: Ben Goz <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/hsa/radeon/kfd_process_queue_manager.c | 15 ++++++++++++---
1 file changed, 12 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/hsa/radeon/kfd_process_queue_manager.c b/drivers/gpu/hsa/radeon/kfd_process_queue_manager.c
index fe74dd7..2034d2b 100644
--- a/drivers/gpu/hsa/radeon/kfd_process_queue_manager.c
+++ b/drivers/gpu/hsa/radeon/kfd_process_queue_manager.c
@@ -334,6 +334,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid, struct queue_properties *p)
{
+ int retval;
struct process_queue_node *pqn;
BUG_ON(!pqm);
@@ -346,9 +347,17 @@ int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid, struct
pqn->q->properties.queue_percent = p->queue_percent;
pqn->q->properties.priority = p->priority;
- pqn->q->device->dqm->destroy_queues(pqn->q->device->dqm);
- pqn->q->device->dqm->update_queue(pqn->q->device->dqm, pqn->q);
- pqn->q->device->dqm->execute_queues(pqn->q->device->dqm);
+ retval = pqn->q->device->dqm->destroy_queues(pqn->q->device->dqm);
+ if (retval != 0)
+ return retval;
+
+ retval = pqn->q->device->dqm->update_queue(pqn->q->device->dqm, pqn->q);
+ if (retval != 0)
+ return retval;
+
+ retval = pqn->q->device->dqm->execute_queues(pqn->q->device->dqm);
+ if (retval != 0)
+ return retval;
return 0;
}
--
1.9.1
From: Ben Goz <[email protected]>
This commit is a bug fix for 32b hsa processes support
Signed-off-by: Ben Goz <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/hsa/radeon/cik_regs.h | 1 +
drivers/gpu/hsa/radeon/kfd_device_queue_manager.c | 8 +++++---
2 files changed, 6 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/hsa/radeon/cik_regs.h b/drivers/gpu/hsa/radeon/cik_regs.h
index fa5ec01..a6404e3 100644
--- a/drivers/gpu/hsa/radeon/cik_regs.h
+++ b/drivers/gpu/hsa/radeon/cik_regs.h
@@ -45,6 +45,7 @@
/* if PTR32, this is the upper limit of GPUVM */
#define SH_MEM_CONFIG 0x8C34
#define PTR32 (1 << 0)
+#define PRIVATE_ATC (1 << 1)
#define ALIGNMENT_MODE(x) ((x) << 2)
#define SH_MEM_ALIGNMENT_MODE_DWORD 0
#define SH_MEM_ALIGNMENT_MODE_DWORD_STRICT 1
diff --git a/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c b/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c
index 01573b1..3e1def1 100644
--- a/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c
+++ b/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c
@@ -90,15 +90,17 @@ static void init_process_memory(struct device_queue_manager *dqm, struct qcm_pro
if (qpd->pqm->process->is_32bit_user_mode) {
temp = get_sh_mem_bases_32(qpd->pqm->process, dqm->dev);
qpd->sh_mem_bases = SHARED_BASE(temp);
+ qpd->sh_mem_config = PTR32;
} else {
temp = get_sh_mem_bases_nybble_64(qpd->pqm->process, dqm->dev);
qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
+ qpd->sh_mem_config = 0;
}
- qpd->sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
+ qpd->sh_mem_config |= ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
qpd->sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
qpd->sh_mem_ape1_limit = 0;
- qpd->sh_mem_ape1_base = 1;
+ qpd->sh_mem_ape1_base = 0;
pr_debug("kfd: is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases);
@@ -854,7 +856,7 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm)
}
if (dqm->queue_count <= 0 || dqm->processes_count <= 0)
- return 0;
+ return 0;
mutex_lock(&dqm->lock);
if (dqm->active_runlist) {
--
1.9.1
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/hsa/radeon/kfd_module.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/hsa/radeon/kfd_module.c b/drivers/gpu/hsa/radeon/kfd_module.c
index fbfcce6..33cee3c 100644
--- a/drivers/gpu/hsa/radeon/kfd_module.c
+++ b/drivers/gpu/hsa/radeon/kfd_module.c
@@ -32,7 +32,7 @@
#define KFD_DRIVER_DESC "Standalone HSA driver for AMD's GPUs"
#define KFD_DRIVER_DATE "20140424"
#define KFD_DRIVER_MAJOR 0
-#define KFD_DRIVER_MINOR 5
+#define KFD_DRIVER_MINOR 6
#define KFD_DRIVER_PATCHLEVEL 0
const struct kfd2kgd_calls *kfd2kgd;
--
1.9.1
From: Yair Shachar <[email protected]>
Waiting on fence returns status
Signed-off-by: Yair Shachar <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/hsa/radeon/kfd_device_queue_manager.c | 6 ++++--
drivers/gpu/hsa/radeon/kfd_priv.h | 2 ++
2 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c b/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c
index 4931f8a..4c53e57 100644
--- a/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c
+++ b/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c
@@ -800,7 +800,7 @@ out:
return retval;
}
-static void fence_wait_timeout(unsigned int *fence_addr, unsigned int fence_value, unsigned long timeout)
+int fence_wait_timeout(unsigned int *fence_addr, unsigned int fence_value, unsigned long timeout)
{
BUG_ON(!fence_addr);
timeout += jiffies;
@@ -808,10 +808,12 @@ static void fence_wait_timeout(unsigned int *fence_addr, unsigned int fence_valu
while (*fence_addr != fence_value) {
if (time_after(jiffies, timeout)) {
pr_err("kfd: qcm fence wait loop timeout expired\n");
- break;
+ return -ETIME;
}
cpu_relax();
}
+
+ return 0;
}
static int destroy_queues_cpsch(struct device_queue_manager *dqm)
diff --git a/drivers/gpu/hsa/radeon/kfd_priv.h b/drivers/gpu/hsa/radeon/kfd_priv.h
index 97bf58a..b61187a 100644
--- a/drivers/gpu/hsa/radeon/kfd_priv.h
+++ b/drivers/gpu/hsa/radeon/kfd_priv.h
@@ -463,6 +463,8 @@ int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid, struct
struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm, unsigned int qid);
void test_diq(struct kfd_dev *dev, struct process_queue_manager *pqm);
+int fence_wait_timeout(unsigned int *fence_addr, unsigned int fence_value, unsigned long timeout);
+
/* Packet Manager */
#define KFD_HIQ_TIMEOUT (500)
--
1.9.1
This patch fixes a bug when using the mode of CP hardware
scheduling without oversubscription.
The bug was that the oversubscription check was performed
_after_ the current runlist was destroyed, which caused
the current HSA application to stop working.
This patch moves the oversubscription check before the call
to destroy the current runlist. If there is oversubscription,
the function prints an error to dmesg and simply exits.
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/hsa/radeon/kfd_packet_manager.c | 3 ---
drivers/gpu/hsa/radeon/kfd_process_queue_manager.c | 9 +++++++++
2 files changed, 9 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/hsa/radeon/kfd_packet_manager.c b/drivers/gpu/hsa/radeon/kfd_packet_manager.c
index 5cd23b0..0aef907 100644
--- a/drivers/gpu/hsa/radeon/kfd_packet_manager.c
+++ b/drivers/gpu/hsa/radeon/kfd_packet_manager.c
@@ -88,9 +88,6 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm, unsigned int **rl_b
BUG_ON(is_over_subscription == NULL);
pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription);
- if (*is_over_subscription &&
- sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION)
- return -EFAULT;
retval = radeon_kfd_vidmem_alloc_map(pm->dqm->dev, &pm->ib_buffer_obj, (void **)rl_buffer,
rl_gpu_buffer, ALIGN(*rl_buffer_size, PAGE_SIZE));
diff --git a/drivers/gpu/hsa/radeon/kfd_process_queue_manager.c b/drivers/gpu/hsa/radeon/kfd_process_queue_manager.c
index 5d7c46d..97b3cc6 100644
--- a/drivers/gpu/hsa/radeon/kfd_process_queue_manager.c
+++ b/drivers/gpu/hsa/radeon/kfd_process_queue_manager.c
@@ -174,6 +174,15 @@ int pqm_create_queue(struct process_queue_manager *pqm,
switch (type) {
case KFD_QUEUE_TYPE_COMPUTE:
+ /* check if there is over subscription */
+ if ((sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) &&
+ ((dev->dqm->processes_count >= VMID_PER_DEVICE) ||
+ (dev->dqm->queue_count >= PIPE_PER_ME_CP_SCHEDULING * QUEUES_PER_PIPE))) {
+ pr_err("kfd: over-subscription is not allowed in radeon_kfd.sched_policy == 1\n");
+ retval = -EPERM;
+ goto err_create_queue;
+ }
+
retval = create_cp_queue(pqm, dev, &q, &q_properties, f, *qid);
if (retval != 0)
goto err_create_queue;
--
1.9.1
This version is intended for upstreaming to the Linux kernel 3.17
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/hsa/radeon/kfd_module.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/hsa/radeon/kfd_module.c b/drivers/gpu/hsa/radeon/kfd_module.c
index c706236..c783eeb 100644
--- a/drivers/gpu/hsa/radeon/kfd_module.c
+++ b/drivers/gpu/hsa/radeon/kfd_module.c
@@ -30,10 +30,10 @@
#define KFD_DRIVER_AUTHOR "AMD Inc. and others"
#define KFD_DRIVER_DESC "Standalone HSA driver for AMD's GPUs"
-#define KFD_DRIVER_DATE "20140623"
+#define KFD_DRIVER_DATE "20140710"
#define KFD_DRIVER_MAJOR 0
#define KFD_DRIVER_MINOR 6
-#define KFD_DRIVER_PATCHLEVEL 1
+#define KFD_DRIVER_PATCHLEVEL 2
const struct kfd2kgd_calls *kfd2kgd;
static const struct kgd2kfd_calls kgd2kfd = {
--
1.9.1
From: Ben Goz <[email protected]>
Signed-off-by: Ben Goz <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/drm/radeon/radeon_kfd.c | 44 -------------------------------------
include/linux/radeon_kfd.h | 10 ---------
2 files changed, 54 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c
index 738c2b3..7e8e041 100644
--- a/drivers/gpu/drm/radeon/radeon_kfd.c
+++ b/drivers/gpu/drm/radeon/radeon_kfd.c
@@ -115,12 +115,6 @@ static void unkmap_mem(struct kgd_dev *kgd, struct kgd_mem *mem);
static uint64_t get_vmem_size(struct kgd_dev *kgd);
static uint64_t get_gpu_clock_counter(struct kgd_dev *kgd);
-static void lock_srbm_gfx_cntl(struct kgd_dev *kgd);
-static void unlock_srbm_gfx_cntl(struct kgd_dev *kgd);
-
-static void lock_grbm_gfx_idx(struct kgd_dev *kgd);
-static void unlock_grbm_gfx_idx(struct kgd_dev *kgd);
-
static uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd);
/*
@@ -146,10 +140,6 @@ static const struct kfd2kgd_calls kfd2kgd = {
.unkmap_mem = unkmap_mem,
.get_vmem_size = get_vmem_size,
.get_gpu_clock_counter = get_gpu_clock_counter,
- .lock_srbm_gfx_cntl = lock_srbm_gfx_cntl,
- .unlock_srbm_gfx_cntl = unlock_srbm_gfx_cntl,
- .lock_grbm_gfx_idx = lock_grbm_gfx_idx,
- .unlock_grbm_gfx_idx = unlock_grbm_gfx_idx,
.get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
.program_sh_mem_settings = kgd_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
@@ -200,8 +190,6 @@ void radeon_kfd_device_init(struct radeon_device *rdev)
{
if (rdev->kfd) {
struct kgd2kfd_shared_resources gpu_resources = {
- .mmio_registers = rdev->rmmio,
-
.compute_vmid_bitmap = 0xFF00,
.first_compute_pipe = 1,
@@ -363,38 +351,6 @@ static uint64_t get_vmem_size(struct kgd_dev *kgd)
return rdev->mc.real_vram_size;
}
-static void lock_srbm_gfx_cntl(struct kgd_dev *kgd)
-{
- struct radeon_device *rdev = (struct radeon_device *)kgd;
-
- mutex_lock(&rdev->srbm_mutex);
-}
-
-static void unlock_srbm_gfx_cntl(struct kgd_dev *kgd)
-{
- struct radeon_device *rdev = (struct radeon_device *)kgd;
-
- mutex_unlock(&rdev->srbm_mutex);
-}
-
-static void lock_grbm_gfx_idx(struct kgd_dev *kgd)
-{
- struct radeon_device *rdev = (struct radeon_device *)kgd;
-
- BUG_ON(kgd == NULL);
-
- mutex_lock(&rdev->grbm_idx_mutex);
-}
-
-static void unlock_grbm_gfx_idx(struct kgd_dev *kgd)
-{
- struct radeon_device *rdev = (struct radeon_device *)kgd;
-
- BUG_ON(kgd == NULL);
-
- mutex_unlock(&rdev->grbm_idx_mutex);
-}
-
static uint64_t get_gpu_clock_counter(struct kgd_dev *kgd)
{
struct radeon_device *rdev = (struct radeon_device *)kgd;
diff --git a/include/linux/radeon_kfd.h b/include/linux/radeon_kfd.h
index aa021fb..2fffe32 100644
--- a/include/linux/radeon_kfd.h
+++ b/include/linux/radeon_kfd.h
@@ -45,8 +45,6 @@ enum kgd_memory_pool {
};
struct kgd2kfd_shared_resources {
- void __iomem *mmio_registers; /* Mapped pointer to GFX MMIO registers. */
-
unsigned int compute_vmid_bitmap; /* Bit n == 1 means VMID n is available for KFD. */
unsigned int first_compute_pipe; /* Compute pipes are counted starting from MEC0/pipe0 as 0. */
@@ -86,14 +84,6 @@ struct kfd2kgd_calls {
uint64_t (*get_vmem_size)(struct kgd_dev *kgd);
uint64_t (*get_gpu_clock_counter)(struct kgd_dev *kgd);
- /* SRBM_GFX_CNTL mutex */
- void (*lock_srbm_gfx_cntl)(struct kgd_dev *kgd);
- void (*unlock_srbm_gfx_cntl)(struct kgd_dev *kgd);
-
- /* GRBM_GFX_INDEX mutex */
- void (*lock_grbm_gfx_idx)(struct kgd_dev *kgd);
- void (*unlock_grbm_gfx_idx)(struct kgd_dev *kgd);
-
uint32_t (*get_max_engine_clock_in_mhz)(struct kgd_dev *kgd);
/* Register access functions */
--
1.9.1
From: Ben Goz <[email protected]>
This patch eliminates all direct register accesses from KFD
and eliminate using of shared locks between KFD and radeon.
The single exception is the doorbells that are used in
both of the drivers. However, because they are located
in separate pci bar pages, the danger of sharing registers
between the drivers is minimal.
Having said that, we are planning to move the doorbells as well
to radeon.
Signed-off-by: Ben Goz <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/hsa/radeon/Makefile | 2 +-
drivers/gpu/hsa/radeon/kfd_device.c | 2 -
drivers/gpu/hsa/radeon/kfd_device_queue_manager.c | 113 +++-----------
drivers/gpu/hsa/radeon/kfd_kernel_queue.c | 12 +-
drivers/gpu/hsa/radeon/kfd_mqd_manager.c | 175 +---------------------
drivers/gpu/hsa/radeon/kfd_mqd_manager.h | 37 +++--
drivers/gpu/hsa/radeon/kfd_priv.h | 18 ---
drivers/gpu/hsa/radeon/kfd_registers.c | 50 -------
8 files changed, 54 insertions(+), 355 deletions(-)
delete mode 100644 drivers/gpu/hsa/radeon/kfd_registers.c
diff --git a/drivers/gpu/hsa/radeon/Makefile b/drivers/gpu/hsa/radeon/Makefile
index b5f05b4..d838bce 100644
--- a/drivers/gpu/hsa/radeon/Makefile
+++ b/drivers/gpu/hsa/radeon/Makefile
@@ -4,7 +4,7 @@
radeon_kfd-y := kfd_module.o kfd_device.o kfd_chardev.o \
kfd_pasid.o kfd_topology.o kfd_process.o \
- kfd_doorbell.o kfd_registers.o kfd_vidmem.o \
+ kfd_doorbell.o kfd_vidmem.o \
kfd_interrupt.o kfd_aperture.o kfd_queue.o kfd_mqd_manager.o \
kfd_kernel_queue.o kfd_packet_manager.o \
kfd_process_queue_manager.o kfd_device_queue_manager.o
diff --git a/drivers/gpu/hsa/radeon/kfd_device.c b/drivers/gpu/hsa/radeon/kfd_device.c
index 30558c9..0ff2241 100644
--- a/drivers/gpu/hsa/radeon/kfd_device.c
+++ b/drivers/gpu/hsa/radeon/kfd_device.c
@@ -157,8 +157,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
{
kfd->shared_resources = *gpu_resources;
- kfd->regs = gpu_resources->mmio_registers;
-
radeon_kfd_doorbell_init(kfd);
if (radeon_kfd_interrupt_init(kfd))
diff --git a/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c b/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c
index 12b8b33..3eb5db3 100644
--- a/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c
+++ b/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c
@@ -112,30 +112,15 @@ static void init_process_memory(struct device_queue_manager *dqm, struct qcm_pro
static void program_sh_mem_settings(struct device_queue_manager *dqm, struct qcm_process_device *qpd)
{
- struct mqd_manager *mqd;
-
- BUG_ON(qpd->vmid < KFD_VMID_START_OFFSET);
-
- mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE);
- if (mqd == NULL)
- return;
-
- mqd->acquire_hqd(mqd, 0, 0, qpd->vmid);
-
- WRITE_REG(dqm->dev, SH_MEM_CONFIG, qpd->sh_mem_config);
-
- WRITE_REG(dqm->dev, SH_MEM_APE1_BASE, qpd->sh_mem_ape1_base);
- WRITE_REG(dqm->dev, SH_MEM_APE1_LIMIT, qpd->sh_mem_ape1_limit);
- WRITE_REG(dqm->dev, SH_MEM_BASES, qpd->sh_mem_bases);
-
- mqd->release_hqd(mqd);
+ return kfd2kgd->program_sh_mem_settings(dqm->dev->kgd, qpd->vmid, qpd->sh_mem_config,
+ qpd->sh_mem_ape1_base, qpd->sh_mem_ape1_limit, qpd->sh_mem_bases);
}
static int create_queue_nocpsch(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd, int *allocate_vmid)
{
bool set, is_new_vmid;
- int bit, retval, pipe;
+ int bit, retval, pipe, i;
struct mqd_manager *mqd;
BUG_ON(!dqm || !q || !qpd || !allocate_vmid);
@@ -171,8 +156,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, struct queue *
q->properties.vmid = qpd->vmid;
set = false;
- for (pipe = dqm->next_pipe_to_allocate; pipe < get_pipes_num(dqm);
- pipe = (pipe + 1) % get_pipes_num(dqm)) {
+ for (i = 0, pipe = dqm->next_pipe_to_allocate; i < get_pipes_num(dqm);
+ pipe = (pipe + i++) % get_pipes_num(dqm)) {
if (dqm->allocated_queues[pipe] != 0) {
bit = find_first_bit((unsigned long *)&dqm->allocated_queues[pipe], QUEUES_PER_PIPE);
clear_bit(bit, (unsigned long *)&dqm->allocated_queues[pipe]);
@@ -238,9 +223,7 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm, struct qcm_pr
retval = -ENOMEM;
goto out;
}
- mqd->acquire_hqd(mqd, q->pipe, q->queue, 0);
- retval = mqd->destroy_mqd(mqd, q->mqd, KFD_PREEMPT_TYPE_WAVEFRONT, QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS);
- mqd->release_hqd(mqd);
+ retval = mqd->destroy_mqd(mqd, false, QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS, q->pipe, q->queue);
if (retval != 0)
goto out;
@@ -299,12 +282,7 @@ static int destroy_queues_nocpsch(struct device_queue_manager *dqm)
list_for_each_entry(cur, &dqm->queues, list) {
list_for_each_entry(q, &cur->qpd->queues_list, list) {
-
-
- mqd->acquire_hqd(mqd, q->pipe, q->queue, 0);
- mqd->destroy_mqd(mqd, q->mqd, KFD_PREEMPT_TYPE_WAVEFRONT,
- QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS);
- mqd->release_hqd(mqd);
+ mqd->destroy_mqd(mqd, false, QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS, q->pipe, q->queue);
}
}
@@ -352,10 +330,9 @@ static int execute_queues_nocpsch(struct device_queue_manager *dqm)
qpd = node->qpd;
list_for_each_entry(q, &qpd->queues_list, list) {
pr_debug("kfd: executing queue (%d, %d)\n", q->pipe, q->queue);
- mqd->acquire_hqd(mqd, q->pipe, q->queue, 0);
- if (mqd->is_occupied(mqd, q->mqd, &q->properties) == false)
- mqd->load_mqd(mqd, q->mqd);
- mqd->release_hqd(mqd);
+ if (mqd->is_occupied(mqd, q->properties.queue_address, q->pipe, q->queue) == false &&
+ q->properties.is_active == true)
+ mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue, q->properties.write_ptr);
}
}
@@ -420,25 +397,9 @@ out:
static int
set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid, unsigned int vmid)
{
- /* We have to assume that there is no outstanding mapping.
- * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because a mapping
- * is in progress or because a mapping finished and the SW cleared it.
- * So the protocol is to always wait & clear.
- */
uint32_t pasid_mapping;
-
- BUG_ON(!dqm);
-
- pr_debug("kfd: In %s set pasid: %d to vmid: %d\n", __func__, pasid, vmid);
pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid | ATC_VMID_PASID_MAPPING_VALID;
-
- WRITE_REG(dqm->dev, ATC_VMID0_PASID_MAPPING + vmid*sizeof(uint32_t), pasid_mapping);
-
- while (!(READ_REG(dqm->dev, ATC_VMID_PASID_MAPPING_UPDATE_STATUS) & (1U << vmid)))
- cpu_relax();
- WRITE_REG(dqm->dev, ATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid);
-
- return 0;
+ return kfd2kgd->set_pasid_vmid_mapping(dqm->dev->kgd, pasid_mapping, vmid);
}
static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
@@ -463,46 +424,18 @@ static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
static int init_memory(struct device_queue_manager *dqm)
{
- int i;
- struct mqd_manager *mqd;
-
- BUG_ON(!dqm);
-
- pr_debug("kfd: In func %s\n", __func__);
- mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE);
- if (mqd == NULL)
- return -ENOMEM;
- for (i = 0; i < 16; i++) {
- uint32_t sh_mem_config;
+ int i, retval;
- mqd->acquire_hqd(mqd, 0, 0, i);
+ for (i = 8; i < 16; i++)
set_pasid_vmid_mapping(dqm, 0, i);
- sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
- sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
-
- WRITE_REG(dqm->dev, SH_MEM_CONFIG, sh_mem_config);
-
- /* Configure apertures:
- * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
- * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
- * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
- */
- WRITE_REG(dqm->dev, SH_MEM_BASES, compute_sh_mem_bases_64bit(6));
-
- /* Scratch aperture is not supported for now. */
- WRITE_REG(dqm->dev, SH_STATIC_MEM_CONFIG, 0);
-
- /* APE1 disabled for now. */
- WRITE_REG(dqm->dev, SH_MEM_APE1_BASE, 1);
- WRITE_REG(dqm->dev, SH_MEM_APE1_LIMIT, 0);
-
- mqd->release_hqd(mqd);
- }
- is_mem_initialized = true;
- return 0;
+ retval = kfd2kgd->init_memory(dqm->dev->kgd);
+ if (retval == 0)
+ is_mem_initialized = true;
+ return retval;
}
+
static int init_pipelines(struct device_queue_manager *dqm, unsigned int pipes_num, unsigned int first_pipe)
{
void *hpdptr;
@@ -552,13 +485,7 @@ static int init_pipelines(struct device_queue_manager *dqm, unsigned int pipes_n
inx = i + first_pipe;
pipe_hpd_addr = dqm->pipelines_addr + i * CIK_HPD_SIZE;
pr_debug("kfd: pipeline address %llX\n", pipe_hpd_addr);
-
- mqd->acquire_hqd(mqd, inx, 0, 0);
- WRITE_REG(dqm->dev, CP_HPD_EOP_BASE_ADDR, lower_32(pipe_hpd_addr >> 8));
- WRITE_REG(dqm->dev, CP_HPD_EOP_BASE_ADDR_HI, upper_32(pipe_hpd_addr >> 8));
- WRITE_REG(dqm->dev, CP_HPD_EOP_VMID, 0);
- WRITE_REG(dqm->dev, CP_HPD_EOP_CONTROL, CIK_HPD_SIZE_LOG2 - 1);
- mqd->release_hqd(mqd);
+ kfd2kgd->init_pipeline(dqm->dev->kgd, i, CIK_HPD_SIZE_LOG2, pipe_hpd_addr);
}
return 0;
@@ -925,7 +852,6 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm,
uint32_t ape1_mtype;
pr_debug("kfd: In func %s\n", __func__);
-
mutex_lock(&dqm->lock);
if (alternate_aperture_size == 0) {
@@ -954,6 +880,7 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm,
qpd->sh_mem_ape1_base = base >> 16;
qpd->sh_mem_ape1_limit = limit >> 16;
+
}
default_mtype = (default_policy == cache_policy_coherent) ?
diff --git a/drivers/gpu/hsa/radeon/kfd_kernel_queue.c b/drivers/gpu/hsa/radeon/kfd_kernel_queue.c
index ce3261b..315d48e 100644
--- a/drivers/gpu/hsa/radeon/kfd_kernel_queue.c
+++ b/drivers/gpu/hsa/radeon/kfd_kernel_queue.c
@@ -108,10 +108,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
pr_debug("assigning hiq to hqd\n");
kq->queue->pipe = KFD_CIK_HIQ_PIPE;
kq->queue->queue = KFD_CIK_HIQ_QUEUE;
-
- kq->mqd->acquire_hqd(kq->mqd, kq->queue->pipe, kq->queue->queue, 0);
- kq->mqd->load_mqd(kq->mqd, kq->queue->mqd);
- kq->mqd->release_hqd(kq->mqd);
+ kq->mqd->load_mqd(kq->mqd, kq->queue->mqd, kq->queue->pipe, kq->queue->queue, NULL);
} else {
/* allocate fence for DIQ */
retval = radeon_kfd_vidmem_alloc_map(
@@ -152,9 +149,10 @@ static void uninitialize(struct kernel_queue *kq)
if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ)
kq->mqd->destroy_mqd(kq->mqd,
- kq->queue->mqd,
- KFD_PREEMPT_TYPE_WAVEFRONT,
- QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS);
+ false,
+ QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS,
+ kq->queue->pipe,
+ kq->queue->queue);
radeon_kfd_vidmem_free_unmap(kq->dev, kq->rptr_mem);
radeon_kfd_vidmem_free_unmap(kq->dev, kq->wptr_mem);
diff --git a/drivers/gpu/hsa/radeon/kfd_mqd_manager.c b/drivers/gpu/hsa/radeon/kfd_mqd_manager.c
index 8c1192e..45e3fd0 100644
--- a/drivers/gpu/hsa/radeon/kfd_mqd_manager.c
+++ b/drivers/gpu/hsa/radeon/kfd_mqd_manager.c
@@ -108,59 +108,10 @@ static void uninit_mqd(struct mqd_manager *mm, void *mqd, kfd_mem_obj mqd_mem_ob
radeon_kfd_vidmem_free_unmap(mm->dev, mqd_mem_obj);
}
-static int load_mqd(struct mqd_manager *mm, void *mqd)
+static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr)
{
- struct cik_mqd *m;
-
- BUG_ON(!mm || !mqd);
-
- m = get_mqd(mqd);
-
- WRITE_REG(mm->dev, CP_MQD_BASE_ADDR, m->queue_state.cp_mqd_base_addr);
- WRITE_REG(mm->dev, CP_MQD_BASE_ADDR_HI, m->queue_state.cp_mqd_base_addr_hi);
- WRITE_REG(mm->dev, CP_MQD_CONTROL, m->queue_state.cp_mqd_control);
-
- WRITE_REG(mm->dev, CP_HQD_PQ_BASE, m->queue_state.cp_hqd_pq_base);
- WRITE_REG(mm->dev, CP_HQD_PQ_BASE_HI, m->queue_state.cp_hqd_pq_base_hi);
- WRITE_REG(mm->dev, CP_HQD_PQ_CONTROL, m->queue_state.cp_hqd_pq_control);
-
- WRITE_REG(mm->dev, CP_HQD_IB_CONTROL, m->queue_state.cp_hqd_ib_control);
- WRITE_REG(mm->dev, CP_HQD_IB_BASE_ADDR, m->queue_state.cp_hqd_ib_base_addr);
- WRITE_REG(mm->dev, CP_HQD_IB_BASE_ADDR_HI, m->queue_state.cp_hqd_ib_base_addr_hi);
-
- WRITE_REG(mm->dev, CP_HQD_IB_RPTR, m->queue_state.cp_hqd_ib_rptr);
-
- WRITE_REG(mm->dev, CP_HQD_PERSISTENT_STATE, m->queue_state.cp_hqd_persistent_state);
- WRITE_REG(mm->dev, CP_HQD_SEMA_CMD, m->queue_state.cp_hqd_sema_cmd);
- WRITE_REG(mm->dev, CP_HQD_MSG_TYPE, m->queue_state.cp_hqd_msg_type);
-
- WRITE_REG(mm->dev, CP_HQD_ATOMIC0_PREOP_LO, m->queue_state.cp_hqd_atomic0_preop_lo);
- WRITE_REG(mm->dev, CP_HQD_ATOMIC0_PREOP_HI, m->queue_state.cp_hqd_atomic0_preop_hi);
- WRITE_REG(mm->dev, CP_HQD_ATOMIC1_PREOP_LO, m->queue_state.cp_hqd_atomic1_preop_lo);
- WRITE_REG(mm->dev, CP_HQD_ATOMIC1_PREOP_HI, m->queue_state.cp_hqd_atomic1_preop_hi);
-
- WRITE_REG(mm->dev, CP_HQD_PQ_RPTR_REPORT_ADDR, m->queue_state.cp_hqd_pq_rptr_report_addr);
- WRITE_REG(mm->dev, CP_HQD_PQ_RPTR_REPORT_ADDR_HI, m->queue_state.cp_hqd_pq_rptr_report_addr_hi);
- WRITE_REG(mm->dev, CP_HQD_PQ_RPTR, m->queue_state.cp_hqd_pq_rptr);
-
- WRITE_REG(mm->dev, CP_HQD_PQ_WPTR_POLL_ADDR, m->queue_state.cp_hqd_pq_wptr_poll_addr);
- WRITE_REG(mm->dev, CP_HQD_PQ_WPTR_POLL_ADDR_HI, m->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
-
- WRITE_REG(mm->dev, CP_HQD_PQ_DOORBELL_CONTROL, m->queue_state.cp_hqd_pq_doorbell_control);
-
- WRITE_REG(mm->dev, CP_HQD_VMID, m->queue_state.cp_hqd_vmid);
+ return kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, wptr);
- WRITE_REG(mm->dev, CP_HQD_QUANTUM, m->queue_state.cp_hqd_quantum);
-
- WRITE_REG(mm->dev, CP_HQD_PIPE_PRIORITY, m->queue_state.cp_hqd_pipe_priority);
- WRITE_REG(mm->dev, CP_HQD_QUEUE_PRIORITY, m->queue_state.cp_hqd_queue_priority);
-
- WRITE_REG(mm->dev, CP_HQD_HQ_SCHEDULER0, m->queue_state.cp_hqd_hq_scheduler0);
- WRITE_REG(mm->dev, CP_HQD_HQ_SCHEDULER1, m->queue_state.cp_hqd_hq_scheduler1);
-
- WRITE_REG(mm->dev, CP_HQD_ACTIVE, m->queue_state.cp_hqd_active);
-
- return 0;
}
static int update_mqd(struct mqd_manager *mm, void *mqd, struct queue_properties *q)
@@ -195,117 +146,16 @@ static int update_mqd(struct mqd_manager *mm, void *mqd, struct queue_properties
return 0;
}
-static int destroy_mqd(struct mqd_manager *mm, void *mqd, enum kfd_preempt_type type, unsigned int timeout)
-{
- int status;
- uint32_t temp;
- bool sync;
-
- status = 0;
- BUG_ON(!mm || !mqd);
-
- pr_debug("kfd: In func %s\n", __func__);
-
- WRITE_REG(mm->dev, CP_HQD_PQ_DOORBELL_CONTROL, 0);
-
- if (type == KFD_PREEMPT_TYPE_WAVEFRONT_RESET)
- WRITE_REG(mm->dev, CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQUEST_RESET);
- else
- WRITE_REG(mm->dev, CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQUEST_DRAIN);
-
- sync = (timeout > 0);
- temp = timeout;
-
- while (READ_REG(mm->dev, CP_HQD_ACTIVE) != 0) {
- if (sync && timeout <= 0) {
- status = -EBUSY;
- pr_err("kfd: cp queue preemption time out (%dms)\n", temp);
- break;
- }
- busy_wait(1000);
- if (sync)
- timeout--;
- }
-
- return status;
-}
-
-static inline uint32_t make_srbm_gfx_cntl_mpqv(unsigned int me,
- unsigned int pipe,
- unsigned int queue,
- unsigned int vmid)
+static int destroy_mqd(struct mqd_manager *mm, bool is_reset, unsigned int timeout, uint32_t pipe_id, uint32_t queue_id)
{
- return QUEUEID(queue) | VMID(vmid) | MEID(me) | PIPEID(pipe);
+ return kfd2kgd->hqd_destroy(mm->dev->kgd, is_reset, timeout, pipe_id, queue_id);
}
-static inline uint32_t get_first_pipe_offset(struct mqd_manager *mm)
+bool is_occupied(struct mqd_manager *mm, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id)
{
- BUG_ON(!mm);
- return mm->dev->shared_resources.first_compute_pipe;
-}
-
-static void acquire_hqd(struct mqd_manager *mm, unsigned int pipe, unsigned int queue, unsigned int vmid)
-{
- unsigned int mec, pipe_in_mec;
-
- BUG_ON(!mm);
-
- radeon_kfd_lock_srbm_index(mm->dev);
-
- pipe_in_mec = (pipe + get_first_pipe_offset(mm)) % 4;
- mec = (pipe + get_first_pipe_offset(mm)) / 4;
- mec++;
-
- pr_debug("kfd: acquire mec: %d pipe: %d queue: %d vmid: %d\n",
- mec,
- pipe_in_mec,
- queue,
- vmid);
-
- WRITE_REG(mm->dev, SRBM_GFX_CNTL, make_srbm_gfx_cntl_mpqv(mec,
- pipe_in_mec, queue, vmid));
-}
-
-static void release_hqd(struct mqd_manager *mm)
-{
- BUG_ON(!mm);
- /* Be nice to KGD, reset indexed CP registers to the GFX pipe. */
- WRITE_REG(mm->dev, SRBM_GFX_CNTL, 0);
- radeon_kfd_unlock_srbm_index(mm->dev);
-}
-
-bool is_occupied(struct mqd_manager *mm, void *mqd, struct queue_properties *q)
-{
- int act;
- struct cik_mqd *m;
- uint32_t low, high;
- BUG_ON(!mm || !mqd || !q);
+ return kfd2kgd->hqd_is_occupies(mm->dev->kgd, queue_address, pipe_id, queue_id);
- m = get_mqd(mqd);
-
- act = READ_REG(mm->dev, CP_HQD_ACTIVE);
- if (act) {
- low = lower_32((uint64_t)q->queue_address >> 8);
- high = upper_32((uint64_t)q->queue_address >> 8);
-
- if (low == READ_REG(mm->dev, CP_HQD_PQ_BASE) &&
- high == READ_REG(mm->dev, CP_HQD_PQ_BASE_HI))
- return true;
- }
-
- return false;
-}
-
-static int initialize(struct mqd_manager *mm)
-{
- BUG_ON(!mm);
- return 0;
-}
-
-static void uninitialize(struct mqd_manager *mm)
-{
- BUG_ON(!mm);
}
/*
@@ -418,11 +268,7 @@ struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, struct kfd_dev *dev
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd;
mqd->destroy_mqd = destroy_mqd;
- mqd->acquire_hqd = acquire_hqd;
- mqd->release_hqd = release_hqd;
mqd->is_occupied = is_occupied;
- mqd->initialize = initialize;
- mqd->uninitialize = uninitialize;
break;
case KFD_MQD_TYPE_CIK_HIQ:
mqd->init_mqd = init_mqd_hiq;
@@ -430,11 +276,7 @@ struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, struct kfd_dev *dev
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd_hiq;
mqd->destroy_mqd = destroy_mqd;
- mqd->acquire_hqd = acquire_hqd;
- mqd->release_hqd = release_hqd;
mqd->is_occupied = is_occupied;
- mqd->initialize = initialize;
- mqd->uninitialize = uninitialize;
break;
default:
kfree(mqd);
@@ -442,11 +284,6 @@ struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, struct kfd_dev *dev
break;
}
- if (mqd->initialize(mqd) != 0) {
- pr_err("kfd: mqd manager initialization failed\n");
- kfree(mqd);
- return NULL;
- }
return mqd;
}
diff --git a/drivers/gpu/hsa/radeon/kfd_mqd_manager.h b/drivers/gpu/hsa/radeon/kfd_mqd_manager.h
index 314d490..a6b0007 100644
--- a/drivers/gpu/hsa/radeon/kfd_mqd_manager.h
+++ b/drivers/gpu/hsa/radeon/kfd_mqd_manager.h
@@ -27,21 +27,28 @@
#include "kfd_priv.h"
struct mqd_manager {
- int (*init_mqd)(struct mqd_manager *mm, void **mqd, kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
- struct queue_properties *q);
- int (*load_mqd)(struct mqd_manager *mm, void *mqd);
- int (*update_mqd)(struct mqd_manager *mm, void *mqd, struct queue_properties *q);
- int (*destroy_mqd)(struct mqd_manager *mm, void *mqd, enum kfd_preempt_type type, unsigned int timeout);
- void (*uninit_mqd)(struct mqd_manager *mm, void *mqd, kfd_mem_obj mqd_mem_obj);
- void (*acquire_hqd)(struct mqd_manager *mm, unsigned int pipe, unsigned int queue, unsigned int vmid);
- void (*release_hqd)(struct mqd_manager *mm);
- bool (*is_occupied)(struct mqd_manager *mm, void *mqd, struct queue_properties *q);
- int (*initialize)(struct mqd_manager *mm);
- void (*uninitialize)(struct mqd_manager *mm);
-
- struct mutex mqd_mutex;
- struct kfd_dev *dev;
-};
+ int (*init_mqd)(struct mqd_manager *mm, void **mqd,
+ kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *q);
+
+ int (*load_mqd)(struct mqd_manager *mm, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t __user *wptr);
+
+ int (*update_mqd)(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q);
+ int (*destroy_mqd)(struct mqd_manager *mm, bool is_reset,
+ unsigned int timeout, uint32_t pipe_id,
+ uint32_t queue_id);
+
+ void (*uninit_mqd)(struct mqd_manager *mm, void *mqd,
+ kfd_mem_obj mqd_mem_obj);
+ bool (*is_occupied)(struct mqd_manager *mm, uint64_t queue_address,
+ uint32_t pipe_id, uint32_t queue_id);
+
+ struct mutex mqd_mutex;
+ struct kfd_dev *dev;
+};
#endif /* KFD_MQD_MANAGER_H_ */
diff --git a/drivers/gpu/hsa/radeon/kfd_priv.h b/drivers/gpu/hsa/radeon/kfd_priv.h
index b61187a..be77ca8 100644
--- a/drivers/gpu/hsa/radeon/kfd_priv.h
+++ b/drivers/gpu/hsa/radeon/kfd_priv.h
@@ -100,8 +100,6 @@ struct kfd_dev {
const struct kfd_device_info *device_info;
struct pci_dev *pdev;
- void __iomem *regs;
-
bool init_complete;
unsigned int id; /* topology stub index */
@@ -148,11 +146,6 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd);
extern const struct kfd2kgd_calls *kfd2kgd;
-
-/* KFD2KGD callback wrappers */
-void radeon_kfd_lock_srbm_index(struct kfd_dev *kfd);
-void radeon_kfd_unlock_srbm_index(struct kfd_dev *kfd);
-
enum kfd_mempool {
KFD_MEMPOOL_SYSTEM_CACHEABLE = 1,
KFD_MEMPOOL_SYSTEM_WRITECOMBINE = 2,
@@ -198,11 +191,6 @@ enum kfd_preempt_type_filter {
KFD_PRERMPT_TYPE_FILTER_BY_PASID
};
-enum kfd_preempt_type {
- KFD_PREEMPT_TYPE_WAVEFRONT,
- KFD_PREEMPT_TYPE_WAVEFRONT_RESET
-};
-
enum kfd_queue_type {
KFD_QUEUE_TYPE_COMPUTE,
KFD_QUEUE_TYPE_SDMA,
@@ -406,12 +394,6 @@ struct kfd_dev *radeon_kfd_device_by_id(uint32_t gpu_id);
struct kfd_dev *radeon_kfd_device_by_pci_dev(const struct pci_dev *pdev);
struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx);
-/* MMIO registers */
-#define WRITE_REG(dev, reg, value) radeon_kfd_write_reg((dev), (reg), (value))
-#define READ_REG(dev, reg) radeon_kfd_read_reg((dev), (reg))
-void radeon_kfd_write_reg(struct kfd_dev *dev, uint32_t reg, uint32_t value);
-uint32_t radeon_kfd_read_reg(struct kfd_dev *dev, uint32_t reg);
-
/* Interrupts */
int radeon_kfd_interrupt_init(struct kfd_dev *dev);
void radeon_kfd_interrupt_exit(struct kfd_dev *dev);
diff --git a/drivers/gpu/hsa/radeon/kfd_registers.c b/drivers/gpu/hsa/radeon/kfd_registers.c
deleted file mode 100644
index 223debd..0000000
--- a/drivers/gpu/hsa/radeon/kfd_registers.c
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright 2014 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include <linux/io.h>
-#include "kfd_priv.h"
-
-/* In KFD, "reg" is the byte offset of the register. */
-static void __iomem *reg_address(struct kfd_dev *dev, uint32_t reg)
-{
- return dev->regs + reg;
-}
-
-void radeon_kfd_write_reg(struct kfd_dev *dev, uint32_t reg, uint32_t value)
-{
- writel(value, reg_address(dev, reg));
-}
-
-uint32_t radeon_kfd_read_reg(struct kfd_dev *dev, uint32_t reg)
-{
- return readl(reg_address(dev, reg));
-}
-
-void radeon_kfd_lock_srbm_index(struct kfd_dev *dev)
-{
- kfd2kgd->lock_srbm_gfx_cntl(dev->kgd);
-}
-
-void radeon_kfd_unlock_srbm_index(struct kfd_dev *dev)
-{
- kfd2kgd->unlock_srbm_gfx_cntl(dev->kgd);
-}
--
1.9.1
From: Ben Goz <[email protected]>
This patch extends the kfd2kgd interface by adding functions
that perform direct register access.
These functions can be called from kfd and will allow to
eliminate all direct register accesses from within the kfd.
Signed-off-by: Ben Goz <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/drm/radeon/cikd.h | 51 +++++-
drivers/gpu/drm/radeon/radeon_kfd.c | 354 ++++++++++++++++++++++++++++++++++++
include/linux/radeon_kfd.h | 11 ++
3 files changed, 415 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h
index 0c6e1b5..0a2a403 100644
--- a/drivers/gpu/drm/radeon/cikd.h
+++ b/drivers/gpu/drm/radeon/cikd.h
@@ -1137,6 +1137,9 @@
#define SH_MEM_ALIGNMENT_MODE_UNALIGNED 3
#define DEFAULT_MTYPE(x) ((x) << 4)
#define APE1_MTYPE(x) ((x) << 7)
+/* valid for both DEFAULT_MTYPE and APE1_MTYPE */
+#define MTYPE_CACHED 0
+#define MTYPE_NONCACHED 3
#define SX_DEBUG_1 0x9060
@@ -1447,6 +1450,16 @@
#define CP_HQD_ACTIVE 0xC91C
#define CP_HQD_VMID 0xC920
+#define CP_HQD_PERSISTENT_STATE 0xC924u
+#define DEFAULT_CP_HQD_PERSISTENT_STATE (0x33U << 8)
+
+#define CP_HQD_PIPE_PRIORITY 0xC928u
+#define CP_HQD_QUEUE_PRIORITY 0xC92Cu
+#define CP_HQD_QUANTUM 0xC930u
+#define QUANTUM_EN 1U
+#define QUANTUM_SCALE_1MS (1U << 4)
+#define QUANTUM_DURATION(x) ((x) << 8)
+
#define CP_HQD_PQ_BASE 0xC934
#define CP_HQD_PQ_BASE_HI 0xC938
#define CP_HQD_PQ_RPTR 0xC93C
@@ -1474,12 +1487,32 @@
#define PRIV_STATE (1 << 30)
#define KMD_QUEUE (1 << 31)
-#define CP_HQD_DEQUEUE_REQUEST 0xC974
+#define CP_HQD_IB_BASE_ADDR 0xC95Cu
+#define CP_HQD_IB_BASE_ADDR_HI 0xC960u
+#define CP_HQD_IB_RPTR 0xC964u
+#define CP_HQD_IB_CONTROL 0xC968u
+#define IB_ATC_EN (1U << 23)
+#define DEFAULT_MIN_IB_AVAIL_SIZE (3U << 20)
+
+#define CP_HQD_DEQUEUE_REQUEST 0xC974
+#define DEQUEUE_REQUEST_DRAIN 1
+#define DEQUEUE_REQUEST_RESET 2
#define CP_MQD_CONTROL 0xC99C
#define MQD_VMID(x) ((x) << 0)
#define MQD_VMID_MASK (0xf << 0)
+#define CP_HQD_SEMA_CMD 0xC97Cu
+#define CP_HQD_MSG_TYPE 0xC980u
+#define CP_HQD_ATOMIC0_PREOP_LO 0xC984u
+#define CP_HQD_ATOMIC0_PREOP_HI 0xC988u
+#define CP_HQD_ATOMIC1_PREOP_LO 0xC98Cu
+#define CP_HQD_ATOMIC1_PREOP_HI 0xC990u
+#define CP_HQD_HQ_SCHEDULER0 0xC994u
+#define CP_HQD_HQ_SCHEDULER1 0xC998u
+
+#define SH_STATIC_MEM_CONFIG 0x9604u
+
#define DB_RENDER_CONTROL 0x28000
#define PA_SC_RASTER_CONFIG 0x28350
@@ -2069,4 +2102,20 @@
#define VCE_CMD_IB_AUTO 0x00000005
#define VCE_CMD_SEMAPHORE 0x00000006
+#define ATC_VMID0_PASID_MAPPING 0x339Cu
+#define ATC_VMID_PASID_MAPPING_UPDATE_STATUS 0x3398u
+#define ATC_VMID_PASID_MAPPING_VALID (1U << 31)
+
+#define ATC_VM_APERTURE0_CNTL 0x3310u
+#define ATS_ACCESS_MODE_NEVER 0
+#define ATS_ACCESS_MODE_ALWAYS 1
+
+#define ATC_VM_APERTURE0_CNTL2 0x3318u
+#define ATC_VM_APERTURE0_HIGH_ADDR 0x3308u
+#define ATC_VM_APERTURE0_LOW_ADDR 0x3300u
+#define ATC_VM_APERTURE1_CNTL 0x3314u
+#define ATC_VM_APERTURE1_CNTL2 0x331Cu
+#define ATC_VM_APERTURE1_HIGH_ADDR 0x330Cu
+#define ATC_VM_APERTURE1_LOW_ADDR 0x3304u
+
#endif
diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c
index a28cf6b..738c2b3 100644
--- a/drivers/gpu/drm/radeon/radeon_kfd.c
+++ b/drivers/gpu/drm/radeon/radeon_kfd.c
@@ -24,10 +24,81 @@
#include <linux/radeon_kfd.h>
#include <drm/drmP.h>
#include "radeon.h"
+#include "cikd.h"
+#include <linux/fdtable.h>
+#include <linux/uaccess.h>
+
+#define CIK_PIPE_PER_MEC (4)
struct kgd_mem {
struct radeon_bo *bo;
u32 domain;
+ struct radeon_bo_va *bo_va;
+};
+
+struct cik_hqd_registers {
+ u32 cp_mqd_base_addr;
+ u32 cp_mqd_base_addr_hi;
+ u32 cp_hqd_active;
+ u32 cp_hqd_vmid;
+ u32 cp_hqd_persistent_state;
+ u32 cp_hqd_pipe_priority;
+ u32 cp_hqd_queue_priority;
+ u32 cp_hqd_quantum;
+ u32 cp_hqd_pq_base;
+ u32 cp_hqd_pq_base_hi;
+ u32 cp_hqd_pq_rptr;
+ u32 cp_hqd_pq_rptr_report_addr;
+ u32 cp_hqd_pq_rptr_report_addr_hi;
+ u32 cp_hqd_pq_wptr_poll_addr;
+ u32 cp_hqd_pq_wptr_poll_addr_hi;
+ u32 cp_hqd_pq_doorbell_control;
+ u32 cp_hqd_pq_wptr;
+ u32 cp_hqd_pq_control;
+ u32 cp_hqd_ib_base_addr;
+ u32 cp_hqd_ib_base_addr_hi;
+ u32 cp_hqd_ib_rptr;
+ u32 cp_hqd_ib_control;
+ u32 cp_hqd_iq_timer;
+ u32 cp_hqd_iq_rptr;
+ u32 cp_hqd_dequeue_request;
+ u32 cp_hqd_dma_offload;
+ u32 cp_hqd_sema_cmd;
+ u32 cp_hqd_msg_type;
+ u32 cp_hqd_atomic0_preop_lo;
+ u32 cp_hqd_atomic0_preop_hi;
+ u32 cp_hqd_atomic1_preop_lo;
+ u32 cp_hqd_atomic1_preop_hi;
+ u32 cp_hqd_hq_scheduler0;
+ u32 cp_hqd_hq_scheduler1;
+ u32 cp_mqd_control;
+};
+
+struct cik_mqd {
+ u32 header;
+ u32 dispatch_initiator;
+ u32 dimensions[3];
+ u32 start_idx[3];
+ u32 num_threads[3];
+ u32 pipeline_stat_enable;
+ u32 perf_counter_enable;
+ u32 pgm[2];
+ u32 tba[2];
+ u32 tma[2];
+ u32 pgm_rsrc[2];
+ u32 vmid;
+ u32 resource_limits;
+ u32 static_thread_mgmt01[2];
+ u32 tmp_ring_size;
+ u32 static_thread_mgmt23[2];
+ u32 restart[3];
+ u32 thread_trace_enable;
+ u32 reserved1;
+ u32 user_data[16];
+ u32 vgtcs_invoke_count[2];
+ struct cik_hqd_registers queue_state;
+ u32 dequeue_cntr;
+ u32 interrupt_queue[64];
};
static int allocate_mem(struct kgd_dev *kgd, size_t size, size_t alignment,
@@ -52,6 +123,19 @@ static void unlock_grbm_gfx_idx(struct kgd_dev *kgd);
static uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd);
+/*
+ * Register access functions
+ */
+
+static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, uint32_t sh_mem_config,
+ uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases);
+static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, unsigned int vmid);
+static int kgd_init_memory(struct kgd_dev *kgd);
+static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t hpd_size, uint64_t hpd_gpu_addr);
+static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr);
+static bool kgd_hqd_is_occupies(struct kgd_dev *kgd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id);
+static int kgd_hqd_destroy(struct kgd_dev *kgd, bool is_reset, unsigned int timeout,
+ uint32_t pipe_id, uint32_t queue_id);
static const struct kfd2kgd_calls kfd2kgd = {
.allocate_mem = allocate_mem,
@@ -67,6 +151,13 @@ static const struct kfd2kgd_calls kfd2kgd = {
.lock_grbm_gfx_idx = lock_grbm_gfx_idx,
.unlock_grbm_gfx_idx = unlock_grbm_gfx_idx,
.get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
+ .program_sh_mem_settings = kgd_program_sh_mem_settings,
+ .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
+ .init_memory = kgd_init_memory,
+ .init_pipeline = kgd_init_pipeline,
+ .hqd_load = kgd_hqd_load,
+ .hqd_is_occupies = kgd_hqd_is_occupies,
+ .hqd_destroy = kgd_hqd_destroy,
};
static const struct kgd2kfd_calls *kgd2kfd;
@@ -318,3 +409,266 @@ static uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
/* The sclk is in quantas of 10kHz */
return rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100;
}
+
+/*
+ * kfd/radeon registers access interface
+ */
+
+inline uint32_t lower_32(uint64_t x)
+{
+ return (uint32_t)x;
+}
+
+inline uint32_t upper_32(uint64_t x)
+{
+ return (uint32_t)(x >> 32);
+}
+
+static inline struct radeon_device *get_radeon_device(struct kgd_dev *kgd)
+{
+ return (struct radeon_device *)kgd;
+}
+
+static void write_register(struct kgd_dev *kgd, uint32_t offset, uint32_t value)
+{
+ struct radeon_device *rdev = get_radeon_device(kgd);
+
+ writel(value, (void __iomem *)(rdev->rmmio + offset));
+}
+
+static uint32_t read_register(struct kgd_dev *kgd, uint32_t offset)
+{
+ struct radeon_device *rdev = get_radeon_device(kgd);
+
+ return readl((void __iomem *)(rdev->rmmio + offset));
+}
+
+static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe, uint32_t queue, uint32_t vmid)
+{
+ struct radeon_device *rdev = get_radeon_device(kgd);
+ uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue);
+
+ mutex_lock(&rdev->srbm_mutex);
+ write_register(kgd, SRBM_GFX_CNTL, value);
+}
+
+static void unlock_srbm(struct kgd_dev *kgd)
+{
+ struct radeon_device *rdev = get_radeon_device(kgd);
+
+ write_register(kgd, SRBM_GFX_CNTL, 0);
+ mutex_unlock(&rdev->srbm_mutex);
+}
+
+static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t queue_id)
+{
+ uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1;
+ uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC);
+
+ lock_srbm(kgd, mec, pipe, queue_id, 0);
+}
+
+static void release_queue(struct kgd_dev *kgd)
+{
+ unlock_srbm(kgd);
+}
+
+static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, uint32_t sh_mem_config,
+ uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases)
+{
+ lock_srbm(kgd, 0, 0, 0, vmid);
+
+ write_register(kgd, SH_MEM_CONFIG, sh_mem_config);
+ write_register(kgd, SH_MEM_APE1_BASE, sh_mem_ape1_base);
+ write_register(kgd, SH_MEM_APE1_LIMIT, sh_mem_ape1_limit);
+ write_register(kgd, SH_MEM_BASES, sh_mem_bases);
+
+ unlock_srbm(kgd);
+}
+
+static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, unsigned int vmid)
+{
+ /* We have to assume that there is no outstanding mapping.
+ * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because a mapping
+ * is in progress or because a mapping finished and the SW cleared it.
+ * So the protocol is to always wait & clear.
+ */
+ uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid | ATC_VMID_PASID_MAPPING_VALID;
+
+ write_register(kgd, ATC_VMID0_PASID_MAPPING + vmid*sizeof(uint32_t), pasid_mapping);
+
+ while (!(read_register(kgd, ATC_VMID_PASID_MAPPING_UPDATE_STATUS) & (1U << vmid)))
+ cpu_relax();
+ write_register(kgd, ATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid);
+
+ return 0;
+}
+
+static int kgd_init_memory(struct kgd_dev *kgd)
+{
+ /* Configure apertures:
+ * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
+ * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
+ * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
+ */
+ int i;
+ uint32_t sh_mem_bases = PRIVATE_BASE(0x6000) | SHARED_BASE(0x6000);
+
+ for (i = 8; i < 16; i++) {
+ uint32_t sh_mem_config;
+
+ lock_srbm(kgd, 0, 0, 0, i);
+
+ sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
+ sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
+
+ write_register(kgd, SH_MEM_CONFIG, sh_mem_config);
+
+ write_register(kgd, SH_MEM_BASES, sh_mem_bases);
+
+ /* Scratch aperture is not supported for now. */
+ write_register(kgd, SH_STATIC_MEM_CONFIG, 0);
+
+ /* APE1 disabled for now. */
+ write_register(kgd, SH_MEM_APE1_BASE, 1);
+ write_register(kgd, SH_MEM_APE1_LIMIT, 0);
+
+ unlock_srbm(kgd);
+ }
+
+ return 0;
+}
+
+static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t hpd_size, uint64_t hpd_gpu_addr)
+{
+ uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1;
+ uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC);
+
+ lock_srbm(kgd, mec, pipe, 0, 0);
+ write_register(kgd, CP_HPD_EOP_BASE_ADDR, lower_32(hpd_gpu_addr >> 8));
+ write_register(kgd, CP_HPD_EOP_BASE_ADDR_HI, upper_32(hpd_gpu_addr >> 8));
+ write_register(kgd, CP_HPD_EOP_VMID, 0);
+ write_register(kgd, CP_HPD_EOP_CONTROL, hpd_size);
+ unlock_srbm(kgd);
+
+ return 0;
+}
+
+static inline struct cik_mqd *get_mqd(void *mqd)
+{
+ return (struct cik_mqd *)mqd;
+}
+
+static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr)
+{
+ uint32_t wptr_shadow, is_wptr_shadow_valid;
+ struct cik_mqd *m;
+
+ m = get_mqd(mqd);
+
+ is_wptr_shadow_valid = !get_user(wptr_shadow, wptr);
+
+ acquire_queue(kgd, pipe_id, queue_id);
+ write_register(kgd, CP_MQD_BASE_ADDR, m->queue_state.cp_mqd_base_addr);
+ write_register(kgd, CP_MQD_BASE_ADDR_HI, m->queue_state.cp_mqd_base_addr_hi);
+ write_register(kgd, CP_MQD_CONTROL, m->queue_state.cp_mqd_control);
+
+ write_register(kgd, CP_HQD_PQ_BASE, m->queue_state.cp_hqd_pq_base);
+ write_register(kgd, CP_HQD_PQ_BASE_HI, m->queue_state.cp_hqd_pq_base_hi);
+ write_register(kgd, CP_HQD_PQ_CONTROL, m->queue_state.cp_hqd_pq_control);
+
+ write_register(kgd, CP_HQD_IB_CONTROL, m->queue_state.cp_hqd_ib_control);
+ write_register(kgd, CP_HQD_IB_BASE_ADDR, m->queue_state.cp_hqd_ib_base_addr);
+ write_register(kgd, CP_HQD_IB_BASE_ADDR_HI, m->queue_state.cp_hqd_ib_base_addr_hi);
+
+ write_register(kgd, CP_HQD_IB_RPTR, m->queue_state.cp_hqd_ib_rptr);
+
+ write_register(kgd, CP_HQD_PERSISTENT_STATE, m->queue_state.cp_hqd_persistent_state);
+ write_register(kgd, CP_HQD_SEMA_CMD, m->queue_state.cp_hqd_sema_cmd);
+ write_register(kgd, CP_HQD_MSG_TYPE, m->queue_state.cp_hqd_msg_type);
+
+ write_register(kgd, CP_HQD_ATOMIC0_PREOP_LO, m->queue_state.cp_hqd_atomic0_preop_lo);
+ write_register(kgd, CP_HQD_ATOMIC0_PREOP_HI, m->queue_state.cp_hqd_atomic0_preop_hi);
+ write_register(kgd, CP_HQD_ATOMIC1_PREOP_LO, m->queue_state.cp_hqd_atomic1_preop_lo);
+ write_register(kgd, CP_HQD_ATOMIC1_PREOP_HI, m->queue_state.cp_hqd_atomic1_preop_hi);
+
+ write_register(kgd, CP_HQD_PQ_RPTR_REPORT_ADDR, m->queue_state.cp_hqd_pq_rptr_report_addr);
+ write_register(kgd, CP_HQD_PQ_RPTR_REPORT_ADDR_HI, m->queue_state.cp_hqd_pq_rptr_report_addr_hi);
+ write_register(kgd, CP_HQD_PQ_RPTR, m->queue_state.cp_hqd_pq_rptr);
+
+ write_register(kgd, CP_HQD_PQ_WPTR_POLL_ADDR, m->queue_state.cp_hqd_pq_wptr_poll_addr);
+ write_register(kgd, CP_HQD_PQ_WPTR_POLL_ADDR_HI, m->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
+
+ write_register(kgd, CP_HQD_PQ_DOORBELL_CONTROL, m->queue_state.cp_hqd_pq_doorbell_control);
+
+ write_register(kgd, CP_HQD_VMID, m->queue_state.cp_hqd_vmid);
+
+ write_register(kgd, CP_HQD_QUANTUM, m->queue_state.cp_hqd_quantum);
+
+ write_register(kgd, CP_HQD_PIPE_PRIORITY, m->queue_state.cp_hqd_pipe_priority);
+ write_register(kgd, CP_HQD_QUEUE_PRIORITY, m->queue_state.cp_hqd_queue_priority);
+
+ write_register(kgd, CP_HQD_HQ_SCHEDULER0, m->queue_state.cp_hqd_hq_scheduler0);
+ write_register(kgd, CP_HQD_HQ_SCHEDULER1, m->queue_state.cp_hqd_hq_scheduler1);
+
+ if (is_wptr_shadow_valid)
+ write_register(kgd, CP_HQD_PQ_WPTR, wptr_shadow);
+
+ write_register(kgd, CP_HQD_ACTIVE, m->queue_state.cp_hqd_active);
+ release_queue(kgd);
+
+ return 0;
+}
+
+static bool kgd_hqd_is_occupies(struct kgd_dev *kgd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id)
+{
+ uint32_t act;
+ bool retval = false;
+ uint32_t low, high;
+
+ acquire_queue(kgd, pipe_id, queue_id);
+ act = read_register(kgd, CP_HQD_ACTIVE);
+ if (act) {
+ low = lower_32(queue_address >> 8);
+ high = upper_32(queue_address >> 8);
+
+ if (low == read_register(kgd, CP_HQD_PQ_BASE) &&
+ high == read_register(kgd, CP_HQD_PQ_BASE_HI))
+ retval = true;
+ }
+ release_queue(kgd);
+ return retval;
+}
+
+static int kgd_hqd_destroy(struct kgd_dev *kgd, bool is_reset,
+ unsigned int timeout, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ int status = 0;
+ bool sync = (timeout > 0) ? true : false;
+
+ acquire_queue(kgd, pipe_id, queue_id);
+ write_register(kgd, CP_HQD_PQ_DOORBELL_CONTROL, 0);
+
+ if (is_reset)
+ write_register(kgd, CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQUEST_RESET);
+ else
+ write_register(kgd, CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQUEST_DRAIN);
+
+
+ while (read_register(kgd, CP_HQD_ACTIVE) != 0) {
+ if (sync && timeout <= 0) {
+ status = -EBUSY;
+ break;
+ }
+ msleep(20);
+ if (sync) {
+ if (timeout >= 20)
+ timeout -= 20;
+ else
+ timeout = 0;
+ }
+ }
+ release_queue(kgd);
+ return status;
+}
diff --git a/include/linux/radeon_kfd.h b/include/linux/radeon_kfd.h
index 4114c8e..aa021fb 100644
--- a/include/linux/radeon_kfd.h
+++ b/include/linux/radeon_kfd.h
@@ -95,6 +95,17 @@ struct kfd2kgd_calls {
void (*unlock_grbm_gfx_idx)(struct kgd_dev *kgd);
uint32_t (*get_max_engine_clock_in_mhz)(struct kgd_dev *kgd);
+
+ /* Register access functions */
+ void (*program_sh_mem_settings)(struct kgd_dev *kgd, uint32_t vmid, uint32_t sh_mem_config,
+ uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases);
+ int (*set_pasid_vmid_mapping)(struct kgd_dev *kgd, unsigned int pasid, unsigned int vmid);
+ int (*init_memory)(struct kgd_dev *kgd);
+ int (*init_pipeline)(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t hpd_size, uint64_t hpd_gpu_addr);
+ int (*hqd_load)(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr);
+ bool (*hqd_is_occupies)(struct kgd_dev *kgd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id);
+ int (*hqd_destroy)(struct kgd_dev *kgd, bool is_reset, unsigned int timeout,
+ uint32_t pipe_id, uint32_t queue_id);
};
bool kgd2kfd_init(unsigned interface_version,
--
1.9.1
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/hsa/radeon/kfd_module.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/hsa/radeon/kfd_module.c b/drivers/gpu/hsa/radeon/kfd_module.c
index 33cee3c..c706236 100644
--- a/drivers/gpu/hsa/radeon/kfd_module.c
+++ b/drivers/gpu/hsa/radeon/kfd_module.c
@@ -30,10 +30,10 @@
#define KFD_DRIVER_AUTHOR "AMD Inc. and others"
#define KFD_DRIVER_DESC "Standalone HSA driver for AMD's GPUs"
-#define KFD_DRIVER_DATE "20140424"
+#define KFD_DRIVER_DATE "20140623"
#define KFD_DRIVER_MAJOR 0
#define KFD_DRIVER_MINOR 6
-#define KFD_DRIVER_PATCHLEVEL 0
+#define KFD_DRIVER_PATCHLEVEL 1
const struct kfd2kgd_calls *kfd2kgd;
static const struct kgd2kfd_calls kgd2kfd = {
--
1.9.1
This patch removes the checksum verification done when
parsing a CRAT table. The verification was both erronous and
redundant, as it is done by another piece of kernel code.
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/hsa/radeon/kfd_topology.c | 29 ++---------------------------
1 file changed, 2 insertions(+), 27 deletions(-)
diff --git a/drivers/gpu/hsa/radeon/kfd_topology.c b/drivers/gpu/hsa/radeon/kfd_topology.c
index d3aaad1..b686b7e 100644
--- a/drivers/gpu/hsa/radeon/kfd_topology.c
+++ b/drivers/gpu/hsa/radeon/kfd_topology.c
@@ -38,21 +38,6 @@ static struct kfd_system_properties sys_props;
static DECLARE_RWSEM(topology_lock);
-
-static uint8_t checksum_image(const void *buf, size_t len)
-{
- uint8_t *p = (uint8_t *)buf;
- uint8_t sum = 0;
-
- if (!buf)
- return 0;
-
- while (len-- > 0)
- sum += *p++;
-
- return sum;
- }
-
struct kfd_dev *radeon_kfd_device_by_id(uint32_t gpu_id)
{
struct kfd_topology_device *top_dev;
@@ -97,9 +82,9 @@ static int kfd_topology_get_crat_acpi(void *crat_image, size_t *size)
if (!size)
return -EINVAL;
-/*
+ /*
* Fetch the CRAT table from ACPI
- */
+ */
status = acpi_get_table(CRAT_SIGNATURE, 0, &crat_table);
if (status == AE_NOT_FOUND) {
pr_warn("CRAT table not found\n");
@@ -111,16 +96,6 @@ static int kfd_topology_get_crat_acpi(void *crat_image, size_t *size)
return -EINVAL;
}
- /*
- * The checksum of the table should be verified
- */
- if (checksum_image(crat_table, crat_table->length) ==
- crat_table->checksum) {
- pr_err("Bad checksum for the CRAT table\n");
- return -EINVAL;
-}
-
-
if (*size >= crat_table->length && crat_image != 0)
memcpy(crat_image, crat_table, crat_table->length);
--
1.9.1
From: Alexey Skidanov <[email protected]>
Signed-off-by: Alexey Skidanov <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/hsa/radeon/kfd_topology.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/gpu/hsa/radeon/kfd_topology.c b/drivers/gpu/hsa/radeon/kfd_topology.c
index 059b7db..d3aaad1 100644
--- a/drivers/gpu/hsa/radeon/kfd_topology.c
+++ b/drivers/gpu/hsa/radeon/kfd_topology.c
@@ -715,6 +715,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
sysfs_show_32bit_prop(buffer, "max_engine_clk_fcompute",
kfd2kgd->get_max_engine_clock_in_mhz(
dev->gpu->kgd));
+ sysfs_show_64bit_prop(buffer, "local_mem_size",
+ kfd2kgd->get_vmem_size(dev->gpu->kgd));
ret = sysfs_show_32bit_prop(buffer, "max_engine_clk_ccompute",
cpufreq_quick_get_max(0)/1000);
}
--
1.9.1
From: Ben Goz <[email protected]>
Signed-off-by: Ben Goz <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/hsa/radeon/kfd_chardev.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/hsa/radeon/kfd_chardev.c b/drivers/gpu/hsa/radeon/kfd_chardev.c
index 09c9a61..be89d26 100644
--- a/drivers/gpu/hsa/radeon/kfd_chardev.c
+++ b/drivers/gpu/hsa/radeon/kfd_chardev.c
@@ -137,11 +137,15 @@ kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, void __user *a
if (copy_from_user(&args, arg, sizeof(args)))
return -EFAULT;
- if (!access_ok(VERIFY_WRITE, args.read_pointer_address, sizeof(qptr_t)))
+ if (!access_ok(VERIFY_WRITE, args.read_pointer_address, sizeof(qptr_t))) {
+ pr_err("kfd: can't access read pointer");
return -EFAULT;
+ }
- if (!access_ok(VERIFY_WRITE, args.write_pointer_address, sizeof(qptr_t)))
+ if (!access_ok(VERIFY_WRITE, args.write_pointer_address, sizeof(qptr_t))) {
+ pr_err("kfd: can't access write pointer");
return -EFAULT;
+ }
q_properties.is_interop = false;
q_properties.queue_percent = args.queue_percentage;
--
1.9.1
From: Ben Goz <[email protected]>
* re-execute runlist on kernel queues destruction.
* delete kernel queues from pqm's queues list on pqm unint
Signed-off-by: Ben Goz <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/hsa/radeon/kfd_device_queue_manager.c | 4 ++++
drivers/gpu/hsa/radeon/kfd_process_queue_manager.c | 2 +-
2 files changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c b/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c
index 4c53e57..12b8b33 100644
--- a/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c
+++ b/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c
@@ -759,6 +759,10 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
{
BUG_ON(!dqm || !kq);
+ pr_debug("kfd: In %s\n", __func__);
+
+ dqm->destroy_queues(dqm);
+
mutex_lock(&dqm->lock);
list_del(&kq->list);
dqm->queue_count--;
diff --git a/drivers/gpu/hsa/radeon/kfd_process_queue_manager.c b/drivers/gpu/hsa/radeon/kfd_process_queue_manager.c
index 89461ab..5d7c46d 100644
--- a/drivers/gpu/hsa/radeon/kfd_process_queue_manager.c
+++ b/drivers/gpu/hsa/radeon/kfd_process_queue_manager.c
@@ -273,10 +273,10 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
if (retval != 0)
return retval;
- list_del(&pqn->process_queue_list);
uninit_queue(pqn->q);
}
+ list_del(&pqn->process_queue_list);
kfree(pqn);
clear_bit(qid, pqm->queue_slot_bitmap);
--
1.9.1
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/hsa/radeon/Makefile | 5 +-
drivers/gpu/hsa/radeon/kfd_sched_cik_static.c | 987 --------------------------
2 files changed, 2 insertions(+), 990 deletions(-)
delete mode 100644 drivers/gpu/hsa/radeon/kfd_sched_cik_static.c
diff --git a/drivers/gpu/hsa/radeon/Makefile b/drivers/gpu/hsa/radeon/Makefile
index 26ce0ae..b5f05b4 100644
--- a/drivers/gpu/hsa/radeon/Makefile
+++ b/drivers/gpu/hsa/radeon/Makefile
@@ -4,9 +4,8 @@
radeon_kfd-y := kfd_module.o kfd_device.o kfd_chardev.o \
kfd_pasid.o kfd_topology.o kfd_process.o \
- kfd_doorbell.o kfd_sched_cik_static.o kfd_registers.o \
- kfd_vidmem.o kfd_interrupt.o kfd_aperture.o \
- kfd_queue.o kfd_mqd_manager.o \
+ kfd_doorbell.o kfd_registers.o kfd_vidmem.o \
+ kfd_interrupt.o kfd_aperture.o kfd_queue.o kfd_mqd_manager.o \
kfd_kernel_queue.o kfd_packet_manager.o \
kfd_process_queue_manager.o kfd_device_queue_manager.o
diff --git a/drivers/gpu/hsa/radeon/kfd_sched_cik_static.c b/drivers/gpu/hsa/radeon/kfd_sched_cik_static.c
deleted file mode 100644
index d576d95..0000000
--- a/drivers/gpu/hsa/radeon/kfd_sched_cik_static.c
+++ /dev/null
@@ -1,987 +0,0 @@
-/*
- * Copyright 2014 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include <linux/log2.h>
-#include <linux/mutex.h>
-#include <linux/slab.h>
-#include <linux/types.h>
-#include <linux/uaccess.h>
-#include <linux/device.h>
-#include <linux/sched.h>
-#include "kfd_priv.h"
-#include "kfd_scheduler.h"
-#include "cik_regs.h"
-#include "cik_int.h"
-
-/* CIK CP hardware is arranged with 8 queues per pipe and 8 pipes per MEC (microengine for compute).
- * The first MEC is ME 1 with the GFX ME as ME 0.
- * We split the CP with the KGD, they take the first N pipes and we take the rest.
- */
-#define CIK_QUEUES_PER_PIPE 8
-#define CIK_PIPES_PER_MEC 4
-
-#define CIK_MAX_PIPES (2 * CIK_PIPES_PER_MEC)
-
-#define CIK_NUM_VMID 16
-
-#define CIK_HPD_SIZE_LOG2 11
-#define CIK_HPD_SIZE (1U << CIK_HPD_SIZE_LOG2)
-#define CIK_HPD_ALIGNMENT 256
-#define CIK_MQD_ALIGNMENT 4
-
-#pragma pack(push, 4)
-
-struct cik_hqd_registers {
- u32 cp_mqd_base_addr;
- u32 cp_mqd_base_addr_hi;
- u32 cp_hqd_active;
- u32 cp_hqd_vmid;
- u32 cp_hqd_persistent_state;
- u32 cp_hqd_pipe_priority;
- u32 cp_hqd_queue_priority;
- u32 cp_hqd_quantum;
- u32 cp_hqd_pq_base;
- u32 cp_hqd_pq_base_hi;
- u32 cp_hqd_pq_rptr;
- u32 cp_hqd_pq_rptr_report_addr;
- u32 cp_hqd_pq_rptr_report_addr_hi;
- u32 cp_hqd_pq_wptr_poll_addr;
- u32 cp_hqd_pq_wptr_poll_addr_hi;
- u32 cp_hqd_pq_doorbell_control;
- u32 cp_hqd_pq_wptr;
- u32 cp_hqd_pq_control;
- u32 cp_hqd_ib_base_addr;
- u32 cp_hqd_ib_base_addr_hi;
- u32 cp_hqd_ib_rptr;
- u32 cp_hqd_ib_control;
- u32 cp_hqd_iq_timer;
- u32 cp_hqd_iq_rptr;
- u32 cp_hqd_dequeue_request;
- u32 cp_hqd_dma_offload;
- u32 cp_hqd_sema_cmd;
- u32 cp_hqd_msg_type;
- u32 cp_hqd_atomic0_preop_lo;
- u32 cp_hqd_atomic0_preop_hi;
- u32 cp_hqd_atomic1_preop_lo;
- u32 cp_hqd_atomic1_preop_hi;
- u32 cp_hqd_hq_scheduler0;
- u32 cp_hqd_hq_scheduler1;
- u32 cp_mqd_control;
-};
-
-struct cik_mqd {
- u32 header;
- u32 dispatch_initiator;
- u32 dimensions[3];
- u32 start_idx[3];
- u32 num_threads[3];
- u32 pipeline_stat_enable;
- u32 perf_counter_enable;
- u32 pgm[2];
- u32 tba[2];
- u32 tma[2];
- u32 pgm_rsrc[2];
- u32 vmid;
- u32 resource_limits;
- u32 static_thread_mgmt01[2];
- u32 tmp_ring_size;
- u32 static_thread_mgmt23[2];
- u32 restart[3];
- u32 thread_trace_enable;
- u32 reserved1;
- u32 user_data[16];
- u32 vgtcs_invoke_count[2];
- struct cik_hqd_registers queue_state;
- u32 dequeue_cntr;
- u32 interrupt_queue[64];
-};
-
-struct cik_mqd_padded {
- struct cik_mqd mqd;
- u8 padding[1024 - sizeof(struct cik_mqd)]; /* Pad MQD out to 1KB. (HW requires 4-byte alignment.) */
-};
-
-#pragma pack(pop)
-
-struct cik_static_private {
- struct kfd_dev *dev;
-
- struct mutex mutex;
-
- unsigned int first_pipe;
- unsigned int num_pipes;
-
- unsigned long free_vmid_mask; /* unsigned long to make set/clear_bit happy */
-
- /* Everything below here is offset by first_pipe. E.g. bit 0 in
- * free_queues is queue 0 in pipe first_pipe
- */
-
- /* Queue q on pipe p is at bit QUEUES_PER_PIPE * p + q. */
- unsigned long free_queues[DIV_ROUND_UP(CIK_MAX_PIPES * CIK_QUEUES_PER_PIPE, BITS_PER_LONG)];
-
- /*
- * Dequeue waits for waves to finish so it could take a long time. We
- * defer through an interrupt. dequeue_wait is woken when a dequeue-
- * complete interrupt comes for that pipe.
- */
- wait_queue_head_t dequeue_wait[CIK_MAX_PIPES];
-
- kfd_mem_obj hpd_mem; /* Single allocation for HPDs for all KFD pipes. */
- kfd_mem_obj mqd_mem; /* Single allocation for all MQDs for all KFD
- * pipes. This is actually struct cik_mqd_padded. */
- uint64_t hpd_addr; /* GPU address for hpd_mem. */
- uint64_t mqd_addr; /* GPU address for mqd_mem. */
- /*
- * Pointer for mqd_mem.
- * We keep this mapped because multiple processes may need to access it
- * in parallel and this is simpler than controlling concurrent kmaps
- */
- struct cik_mqd_padded *mqds;
-};
-
-struct cik_static_process {
- unsigned int vmid;
- pasid_t pasid;
-
- uint32_t sh_mem_config;
- uint32_t ape1_base;
- uint32_t ape1_limit;
-};
-
-struct cik_static_queue {
- unsigned int queue; /* + first_pipe * QUEUES_PER_PIPE */
-
- uint64_t mqd_addr;
- struct cik_mqd *mqd;
-
- void __user *pq_addr;
- void __user *rptr_address;
- doorbell_t __user *wptr_address;
- uint32_t doorbell_index;
-
- uint32_t queue_size_encoded; /* CP_HQD_PQ_CONTROL.QUEUE_SIZE takes the queue size as log2(size) - 3. */
-};
-
-/* SRBM_GFX_CNTL provides the MEC/pipe/queue and vmid for many registers that are
- * In particular, CP_HQD_* and CP_MQD_* are instanced for each queue. CP_HPD_* are instanced for each pipe.
- * SH_MEM_* are instanced per-VMID.
- *
- * We provide queue_select, pipe_select and vmid_select helpers that should be used before accessing
- * registers from those groups. Note that these overwrite each other, e.g. after vmid_select the current
- * selected MEC/pipe/queue is undefined.
- *
- * SRBM_GFX_CNTL and the registers it indexes are shared with KGD. You must be holding the srbm_gfx_cntl
- * lock via lock_srbm_index before setting SRBM_GFX_CNTL or accessing any of the instanced registers.
- */
-static uint32_t make_srbm_gfx_cntl_mpqv(unsigned int me, unsigned int pipe, unsigned int queue, unsigned int vmid)
-{
- return QUEUEID(queue) | VMID(vmid) | MEID(me) | PIPEID(pipe);
-}
-
-static void pipe_select(struct cik_static_private *priv, unsigned int pipe)
-{
- unsigned int pipe_in_mec = (pipe + priv->first_pipe) % CIK_PIPES_PER_MEC;
- unsigned int mec = (pipe + priv->first_pipe) / CIK_PIPES_PER_MEC;
-
- WRITE_REG(priv->dev, SRBM_GFX_CNTL, make_srbm_gfx_cntl_mpqv(mec+1, pipe_in_mec, 0, 0));
-}
-
-static void queue_select(struct cik_static_private *priv, unsigned int queue)
-{
- unsigned int queue_in_pipe = queue % CIK_QUEUES_PER_PIPE;
- unsigned int pipe = queue / CIK_QUEUES_PER_PIPE + priv->first_pipe;
- unsigned int pipe_in_mec = pipe % CIK_PIPES_PER_MEC;
- unsigned int mec = pipe / CIK_PIPES_PER_MEC;
-
-#if 0
- dev_err(radeon_kfd_chardev(), "queue select %d = %u/%u/%u = 0x%08x\n", queue, mec+1, pipe_in_mec, queue_in_pipe,
- make_srbm_gfx_cntl_mpqv(mec+1, pipe_in_mec, queue_in_pipe, 0));
-#endif
-
- WRITE_REG(priv->dev, SRBM_GFX_CNTL, make_srbm_gfx_cntl_mpqv(mec+1, pipe_in_mec, queue_in_pipe, 0));
-}
-
-static void vmid_select(struct cik_static_private *priv, unsigned int vmid)
-{
- WRITE_REG(priv->dev, SRBM_GFX_CNTL, make_srbm_gfx_cntl_mpqv(0, 0, 0, vmid));
-}
-
-static void lock_srbm_index(struct cik_static_private *priv)
-{
- radeon_kfd_lock_srbm_index(priv->dev);
-}
-
-static void unlock_srbm_index(struct cik_static_private *priv)
-{
- WRITE_REG(priv->dev, SRBM_GFX_CNTL, 0); /* Be nice to KGD, reset indexed CP registers to the GFX pipe. */
- radeon_kfd_unlock_srbm_index(priv->dev);
-}
-
-/* One-time setup for all compute pipes. They need to be programmed with the address & size of the HPD EOP buffer. */
-static void init_pipes(struct cik_static_private *priv)
-{
- unsigned int i;
-
- lock_srbm_index(priv);
-
- for (i = 0; i < priv->num_pipes; i++) {
- uint64_t pipe_hpd_addr = priv->hpd_addr + i * CIK_HPD_SIZE;
-
- pipe_select(priv, i);
-
- WRITE_REG(priv->dev, CP_HPD_EOP_BASE_ADDR, lower_32(pipe_hpd_addr >> 8));
- WRITE_REG(priv->dev, CP_HPD_EOP_BASE_ADDR_HI, upper_32(pipe_hpd_addr >> 8));
- WRITE_REG(priv->dev, CP_HPD_EOP_VMID, 0);
- WRITE_REG(priv->dev, CP_HPD_EOP_CONTROL, CIK_HPD_SIZE_LOG2 - 1);
- }
-
- unlock_srbm_index(priv);
-}
-
-/* Program the VMID -> PASID mapping for one VMID.
- * PASID 0 is special: it means to associate no PASID with that VMID.
- * This function waits for the VMID/PASID mapping to complete.
- */
-static void set_vmid_pasid_mapping(struct cik_static_private *priv, unsigned int vmid, pasid_t pasid)
-{
- /* We have to assume that there is no outstanding mapping.
- * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because a mapping
- * is in progress or because a mapping finished and the SW cleared it.
- * So the protocol is to always wait & clear.
- */
-
- uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid | ATC_VMID_PASID_MAPPING_VALID;
-
- WRITE_REG(priv->dev, ATC_VMID0_PASID_MAPPING + vmid*sizeof(uint32_t), pasid_mapping);
-
- while (!(READ_REG(priv->dev, ATC_VMID_PASID_MAPPING_UPDATE_STATUS) & (1U << vmid)))
- cpu_relax();
- WRITE_REG(priv->dev, ATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid);
-
- WRITE_REG(priv->dev, IH_VMID_0_LUT + vmid*sizeof(uint32_t), pasid);
-}
-
-static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
-{
- /* In 64-bit mode, we can only control the top 3 bits of the LDS, scratch and GPUVM apertures.
- * The hardware fills in the remaining 59 bits according to the following pattern:
- * LDS: X0000000'00000000 - X0000001'00000000 (4GB)
- * Scratch: X0000001'00000000 - X0000002'00000000 (4GB)
- * GPUVM: Y0010000'00000000 - Y0020000'00000000 (1TB)
- *
- * (where X/Y is the configurable nybble with the low-bit 0)
- *
- * LDS and scratch will have the same top nybble programmed in the top 3 bits of SH_MEM_BASES.PRIVATE_BASE.
- * GPUVM can have a different top nybble programmed in the top 3 bits of SH_MEM_BASES.SHARED_BASE.
- * We don't bother to support different top nybbles for LDS/Scratch and GPUVM.
- */
-
- BUG_ON((top_address_nybble & 1) || top_address_nybble > 0xE);
-
- return PRIVATE_BASE(top_address_nybble << 12) | SHARED_BASE(top_address_nybble << 12);
-}
-
-/* Initial programming for all ATS registers.
- * - enable ATS for all compute VMIDs
- * - clear the VMID/PASID mapping for all compute VMIDS
- * - program the shader core flat address settings:
- * -- 64-bit mode
- * -- unaligned access allowed
- * -- noncached (this is the only CPU-coherent mode in CIK)
- * -- APE 1 disabled
- */
-static void init_ats(struct cik_static_private *priv)
-{
- unsigned int i;
-
- /* Enable self-ringing doorbell recognition and direct the BIF to send
- * untranslated writes to the IOMMU before comparing to the aperture.*/
- WRITE_REG(priv->dev, BIF_DOORBELL_CNTL, 0);
-
- WRITE_REG(priv->dev, ATC_VM_APERTURE0_CNTL, ATS_ACCESS_MODE_ALWAYS);
- WRITE_REG(priv->dev, ATC_VM_APERTURE0_CNTL2, priv->free_vmid_mask);
- WRITE_REG(priv->dev, ATC_VM_APERTURE0_LOW_ADDR, 0);
- WRITE_REG(priv->dev, ATC_VM_APERTURE0_HIGH_ADDR, 0);
-
- WRITE_REG(priv->dev, ATC_VM_APERTURE1_CNTL, 0);
- WRITE_REG(priv->dev, ATC_VM_APERTURE1_CNTL2, 0);
- WRITE_REG(priv->dev, ATC_VM_APERTURE1_LOW_ADDR, 0);
- WRITE_REG(priv->dev, ATC_VM_APERTURE1_HIGH_ADDR, 0);
-
- lock_srbm_index(priv);
-
- for (i = 0; i < CIK_NUM_VMID; i++) {
- if (priv->free_vmid_mask & (1U << i)) {
- uint32_t sh_mem_config;
-
- set_vmid_pasid_mapping(priv, i, 0);
-
- vmid_select(priv, i);
-
- sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
- sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
- sh_mem_config |= APE1_MTYPE(MTYPE_NONCACHED);
-
- WRITE_REG(priv->dev, SH_MEM_CONFIG, sh_mem_config);
-
- /* Configure apertures:
- * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
- * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
- * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
- */
- WRITE_REG(priv->dev, SH_MEM_BASES, compute_sh_mem_bases_64bit(6));
-
- /* Scratch aperture is not supported for now. */
- WRITE_REG(priv->dev, SH_STATIC_MEM_CONFIG, 0);
-
- /* APE1 disabled for now. */
- WRITE_REG(priv->dev, SH_MEM_APE1_BASE, 1);
- WRITE_REG(priv->dev, SH_MEM_APE1_LIMIT, 0);
- }
- }
-
- unlock_srbm_index(priv);
-}
-
-static void exit_ats(struct cik_static_private *priv)
-{
- unsigned int i;
-
- for (i = 0; i < CIK_NUM_VMID; i++)
- if (priv->free_vmid_mask & (1U << i))
- set_vmid_pasid_mapping(priv, i, 0);
-
- WRITE_REG(priv->dev, ATC_VM_APERTURE0_CNTL, ATS_ACCESS_MODE_NEVER);
- WRITE_REG(priv->dev, ATC_VM_APERTURE0_CNTL2, 0);
-}
-
-static struct cik_static_private *kfd_scheduler_to_private(struct kfd_scheduler *scheduler)
-{
- return (struct cik_static_private *)scheduler;
-}
-
-static struct cik_static_process *kfd_process_to_private(struct kfd_scheduler_process *process)
-{
- return (struct cik_static_process *)process;
-}
-
-static struct cik_static_queue *kfd_queue_to_private(struct kfd_scheduler_queue *queue)
-{
- return (struct cik_static_queue *)queue;
-}
-
-static int cik_static_create(struct kfd_dev *dev, struct kfd_scheduler **scheduler)
-{
- struct cik_static_private *priv;
- unsigned int i;
- int err;
- void *hpdptr;
-
- priv = kmalloc(sizeof(*priv), GFP_KERNEL);
- if (priv == NULL)
- return -ENOMEM;
-
- mutex_init(&priv->mutex);
-
- priv->dev = dev;
-
- priv->first_pipe = dev->shared_resources.first_compute_pipe;
- priv->num_pipes = dev->shared_resources.compute_pipe_count;
-
- for (i = 0; i < priv->num_pipes * CIK_QUEUES_PER_PIPE; i++)
- __set_bit(i, priv->free_queues);
-
- priv->free_vmid_mask = dev->shared_resources.compute_vmid_bitmap;
-
- for (i = 0; i < priv->num_pipes; i++)
- init_waitqueue_head(&priv->dequeue_wait[i]);
-
- /*
- * Allocate memory for the HPDs. This is hardware-owned per-pipe data.
- * The driver never accesses this memory after zeroing it. It doesn't even have
- * to be saved/restored on suspend/resume because it contains no data when there
- * are no active queues.
- */
- err = radeon_kfd_vidmem_alloc(dev,
- CIK_HPD_SIZE * priv->num_pipes,
- PAGE_SIZE,
- KFD_MEMPOOL_SYSTEM_WRITECOMBINE,
- &priv->hpd_mem);
- if (err)
- goto err_hpd_alloc;
-
- err = radeon_kfd_vidmem_kmap(dev, priv->hpd_mem, &hpdptr);
- if (err)
- goto err_hpd_kmap;
- memset(hpdptr, 0, CIK_HPD_SIZE * priv->num_pipes);
- radeon_kfd_vidmem_unkmap(dev, priv->hpd_mem);
-
- /*
- * Allocate memory for all the MQDs.
- * These are per-queue data that is hardware owned but with driver init.
- * The driver has to copy this data into HQD registers when a
- * pipe is (re)activated.
- */
- err = radeon_kfd_vidmem_alloc(dev,
- sizeof(struct cik_mqd_padded) * priv->num_pipes * CIK_QUEUES_PER_PIPE,
- PAGE_SIZE,
- KFD_MEMPOOL_SYSTEM_CACHEABLE,
- &priv->mqd_mem);
- if (err)
- goto err_mqd_alloc;
- radeon_kfd_vidmem_kmap(dev, priv->mqd_mem, (void **)&priv->mqds);
- if (err)
- goto err_mqd_kmap;
-
- *scheduler = (struct kfd_scheduler *)priv;
-
- return 0;
-
-err_mqd_kmap:
- radeon_kfd_vidmem_free(dev, priv->mqd_mem);
-err_mqd_alloc:
-err_hpd_kmap:
- radeon_kfd_vidmem_free(dev, priv->hpd_mem);
-err_hpd_alloc:
- mutex_destroy(&priv->mutex);
- kfree(priv);
- return err;
-}
-
-static void cik_static_destroy(struct kfd_scheduler *scheduler)
-{
- struct cik_static_private *priv = kfd_scheduler_to_private(scheduler);
-
- radeon_kfd_vidmem_unkmap(priv->dev, priv->mqd_mem);
- radeon_kfd_vidmem_free(priv->dev, priv->mqd_mem);
- radeon_kfd_vidmem_free(priv->dev, priv->hpd_mem);
-
- mutex_destroy(&priv->mutex);
-
- kfree(priv);
-}
-
-static void
-enable_interrupts(struct cik_static_private *priv)
-{
- unsigned int i;
-
- lock_srbm_index(priv);
- for (i = 0; i < priv->num_pipes; i++) {
- pipe_select(priv, i);
- WRITE_REG(priv->dev, CPC_INT_CNTL, DEQUEUE_REQUEST_INT_ENABLE);
- }
- unlock_srbm_index(priv);
-}
-
-static void
-disable_interrupts(struct cik_static_private *priv)
-{
- unsigned int i;
-
- lock_srbm_index(priv);
- for (i = 0; i < priv->num_pipes; i++) {
- pipe_select(priv, i);
- WRITE_REG(priv->dev, CPC_INT_CNTL, 0);
- }
- unlock_srbm_index(priv);
-}
-
-static void cik_static_start(struct kfd_scheduler *scheduler)
-{
- struct cik_static_private *priv = kfd_scheduler_to_private(scheduler);
-
- radeon_kfd_vidmem_gpumap(priv->dev, priv->hpd_mem, &priv->hpd_addr);
- radeon_kfd_vidmem_gpumap(priv->dev, priv->mqd_mem, &priv->mqd_addr);
-
- init_pipes(priv);
- init_ats(priv);
- enable_interrupts(priv);
-}
-
-static void cik_static_stop(struct kfd_scheduler *scheduler)
-{
- struct cik_static_private *priv = kfd_scheduler_to_private(scheduler);
-
- exit_ats(priv);
- disable_interrupts(priv);
-
- radeon_kfd_vidmem_ungpumap(priv->dev, priv->hpd_mem);
- radeon_kfd_vidmem_ungpumap(priv->dev, priv->mqd_mem);
-}
-
-static bool allocate_vmid(struct cik_static_private *priv, unsigned int *vmid)
-{
- bool ok = false;
-
- mutex_lock(&priv->mutex);
-
- if (priv->free_vmid_mask != 0) {
- unsigned int v = __ffs64(priv->free_vmid_mask);
-
- clear_bit(v, &priv->free_vmid_mask);
- *vmid = v;
-
- ok = true;
- }
-
- mutex_unlock(&priv->mutex);
-
- return ok;
-}
-
-static void release_vmid(struct cik_static_private *priv, unsigned int vmid)
-{
- /* It's okay to race against allocate_vmid because this only adds bits to free_vmid_mask.
- * And set_bit/clear_bit are atomic wrt each other. */
- set_bit(vmid, &priv->free_vmid_mask);
-}
-
-static void program_sh_mem_settings(struct cik_static_private *sched,
- struct cik_static_process *proc)
-{
- lock_srbm_index(sched);
-
- vmid_select(sched, proc->vmid);
-
- WRITE_REG(sched->dev, SH_MEM_CONFIG, proc->sh_mem_config);
-
- WRITE_REG(sched->dev, SH_MEM_APE1_BASE, proc->ape1_base);
- WRITE_REG(sched->dev, SH_MEM_APE1_LIMIT, proc->ape1_limit);
-
- unlock_srbm_index(sched);
-}
-
-static void setup_vmid_for_process(struct cik_static_private *priv, struct cik_static_process *p)
-{
- set_vmid_pasid_mapping(priv, p->vmid, p->pasid);
-
- program_sh_mem_settings(priv, p);
-}
-
-static int
-cik_static_register_process(struct kfd_scheduler *scheduler, struct kfd_process *process,
- struct kfd_scheduler_process **scheduler_process)
-{
- struct cik_static_private *priv = kfd_scheduler_to_private(scheduler);
-
- struct cik_static_process *hwp;
-
- hwp = kmalloc(sizeof(*hwp), GFP_KERNEL);
- if (hwp == NULL)
- return -ENOMEM;
-
- if (!allocate_vmid(priv, &hwp->vmid)) {
- kfree(hwp);
- return -ENOMEM;
- }
-
- hwp->pasid = process->pasid;
-
- hwp->sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
- | DEFAULT_MTYPE(MTYPE_NONCACHED)
- | APE1_MTYPE(MTYPE_NONCACHED);
- hwp->ape1_base = 1;
- hwp->ape1_limit = 0;
-
- setup_vmid_for_process(priv, hwp);
-
- *scheduler_process = (struct kfd_scheduler_process *)hwp;
-
- return 0;
-}
-
-static void cik_static_deregister_process(struct kfd_scheduler *scheduler,
- struct kfd_scheduler_process *scheduler_process)
-{
- struct cik_static_private *priv = kfd_scheduler_to_private(scheduler);
- struct cik_static_process *pp = kfd_process_to_private(scheduler_process);
-
-
- if (priv && pp) {
- release_vmid(priv, pp->vmid);
- kfree(pp);
- }
-}
-
-static bool allocate_hqd(struct cik_static_private *priv, unsigned int *queue)
-{
- bool ok = false;
- unsigned int q;
-
- mutex_lock(&priv->mutex);
-
- q = find_first_bit(priv->free_queues, priv->num_pipes * CIK_QUEUES_PER_PIPE);
-
- if (q != priv->num_pipes * CIK_QUEUES_PER_PIPE) {
- clear_bit(q, priv->free_queues);
- *queue = q;
-
- ok = true;
- }
-
- mutex_unlock(&priv->mutex);
-
- return ok;
-}
-
-static void release_hqd(struct cik_static_private *priv, unsigned int queue)
-{
- /* It's okay to race against allocate_hqd because this only adds bits to free_queues.
- * And set_bit/clear_bit are atomic wrt each other. */
- set_bit(queue, priv->free_queues);
-}
-
-static void init_mqd(const struct cik_static_queue *queue, const struct cik_static_process *process)
-{
- struct cik_mqd *mqd = queue->mqd;
-
- memset(mqd, 0, sizeof(*mqd));
-
- mqd->header = 0xC0310800;
- mqd->pipeline_stat_enable = 1;
- mqd->static_thread_mgmt01[0] = 0xffffffff;
- mqd->static_thread_mgmt01[1] = 0xffffffff;
- mqd->static_thread_mgmt23[0] = 0xffffffff;
- mqd->static_thread_mgmt23[1] = 0xffffffff;
-
- mqd->queue_state.cp_mqd_base_addr = lower_32(queue->mqd_addr);
- mqd->queue_state.cp_mqd_base_addr_hi = upper_32(queue->mqd_addr);
- mqd->queue_state.cp_mqd_control = MQD_CONTROL_PRIV_STATE_EN;
-
- mqd->queue_state.cp_hqd_pq_base = lower_32((uintptr_t)queue->pq_addr >> 8);
- mqd->queue_state.cp_hqd_pq_base_hi = upper_32((uintptr_t)queue->pq_addr >> 8);
- mqd->queue_state.cp_hqd_pq_control = QUEUE_SIZE(queue->queue_size_encoded) | DEFAULT_RPTR_BLOCK_SIZE
- | DEFAULT_MIN_AVAIL_SIZE | PQ_ATC_EN;
- mqd->queue_state.cp_hqd_pq_rptr_report_addr = lower_32((uintptr_t)queue->rptr_address);
- mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi = upper_32((uintptr_t)queue->rptr_address);
- mqd->queue_state.cp_hqd_pq_doorbell_control = DOORBELL_OFFSET(queue->doorbell_index) | DOORBELL_EN;
- mqd->queue_state.cp_hqd_vmid = process->vmid;
- mqd->queue_state.cp_hqd_active = 1;
-
- mqd->queue_state.cp_hqd_persistent_state = DEFAULT_CP_HQD_PERSISTENT_STATE;
-
- /* The values for these 3 are from WinKFD. */
- mqd->queue_state.cp_hqd_quantum = QUANTUM_EN | QUANTUM_SCALE_1MS | QUANTUM_DURATION(10);
- mqd->queue_state.cp_hqd_pipe_priority = 1;
- mqd->queue_state.cp_hqd_queue_priority = 15;
-
- mqd->queue_state.cp_hqd_ib_control = IB_ATC_EN | DEFAULT_MIN_IB_AVAIL_SIZE;
-}
-
-/* Write the HQD registers and activate the queue.
- * Requires that SRBM_GFX_CNTL has already been programmed for the queue.
- */
-static void load_hqd(struct cik_static_private *priv, struct cik_static_queue *queue)
-{
- struct kfd_dev *dev = priv->dev;
- const struct cik_hqd_registers *qs = &queue->mqd->queue_state;
-
- WRITE_REG(dev, CP_MQD_BASE_ADDR, qs->cp_mqd_base_addr);
- WRITE_REG(dev, CP_MQD_BASE_ADDR_HI, qs->cp_mqd_base_addr_hi);
- WRITE_REG(dev, CP_MQD_CONTROL, qs->cp_mqd_control);
-
- WRITE_REG(dev, CP_HQD_PQ_BASE, qs->cp_hqd_pq_base);
- WRITE_REG(dev, CP_HQD_PQ_BASE_HI, qs->cp_hqd_pq_base_hi);
- WRITE_REG(dev, CP_HQD_PQ_CONTROL, qs->cp_hqd_pq_control);
- /* DOORBELL_CONTROL before WPTR because WPTR writes are dropped if DOORBELL_HIT is set. */
- WRITE_REG(dev, CP_HQD_PQ_DOORBELL_CONTROL, qs->cp_hqd_pq_doorbell_control);
- WRITE_REG(dev, CP_HQD_PQ_WPTR, qs->cp_hqd_pq_wptr);
- WRITE_REG(dev, CP_HQD_PQ_RPTR, qs->cp_hqd_pq_rptr);
- WRITE_REG(dev, CP_HQD_PQ_RPTR_REPORT_ADDR, qs->cp_hqd_pq_rptr_report_addr);
- WRITE_REG(dev, CP_HQD_PQ_RPTR_REPORT_ADDR_HI, qs->cp_hqd_pq_rptr_report_addr_hi);
-
- WRITE_REG(dev, CP_HQD_VMID, qs->cp_hqd_vmid);
- WRITE_REG(dev, CP_HQD_PERSISTENT_STATE, qs->cp_hqd_persistent_state);
- WRITE_REG(dev, CP_HQD_QUANTUM, qs->cp_hqd_quantum);
- WRITE_REG(dev, CP_HQD_PIPE_PRIORITY, qs->cp_hqd_pipe_priority);
- WRITE_REG(dev, CP_HQD_QUEUE_PRIORITY, qs->cp_hqd_queue_priority);
-
- WRITE_REG(dev, CP_HQD_IB_CONTROL, qs->cp_hqd_ib_control);
- WRITE_REG(dev, CP_HQD_IB_BASE_ADDR, qs->cp_hqd_ib_base_addr);
- WRITE_REG(dev, CP_HQD_IB_BASE_ADDR_HI, qs->cp_hqd_ib_base_addr_hi);
- WRITE_REG(dev, CP_HQD_IB_RPTR, qs->cp_hqd_ib_rptr);
- WRITE_REG(dev, CP_HQD_SEMA_CMD, qs->cp_hqd_sema_cmd);
- WRITE_REG(dev, CP_HQD_MSG_TYPE, qs->cp_hqd_msg_type);
- WRITE_REG(dev, CP_HQD_ATOMIC0_PREOP_LO, qs->cp_hqd_atomic0_preop_lo);
- WRITE_REG(dev, CP_HQD_ATOMIC0_PREOP_HI, qs->cp_hqd_atomic0_preop_hi);
- WRITE_REG(dev, CP_HQD_ATOMIC1_PREOP_LO, qs->cp_hqd_atomic1_preop_lo);
- WRITE_REG(dev, CP_HQD_ATOMIC1_PREOP_HI, qs->cp_hqd_atomic1_preop_hi);
- WRITE_REG(dev, CP_HQD_HQ_SCHEDULER0, qs->cp_hqd_hq_scheduler0);
- WRITE_REG(dev, CP_HQD_HQ_SCHEDULER1, qs->cp_hqd_hq_scheduler1);
-
- WRITE_REG(dev, CP_HQD_ACTIVE, 1);
-}
-
-static void activate_queue(struct cik_static_private *priv, struct cik_static_queue *queue)
-{
- bool wptr_shadow_valid;
- doorbell_t wptr_shadow;
-
- /* Avoid sleeping while holding the SRBM lock. */
- wptr_shadow_valid = !get_user(wptr_shadow, queue->wptr_address);
-
- lock_srbm_index(priv);
- queue_select(priv, queue->queue);
-
- load_hqd(priv, queue);
-
- /* Doorbell and wptr are special because there is a race when reactivating a queue.
- * Since doorbell writes to deactivated queues are ignored by hardware, the application
- * shadows the doorbell into memory at queue->wptr_address.
- *
- * We want the queue to automatically resume processing as if it were always active,
- * so we want to copy from queue->wptr_address into the wptr/doorbell.
- *
- * The race is that the app could write a new wptr into the doorbell before we
- * write the shadowed wptr, resulting in an old wptr written later.
- *
- * The hardware solves this ignoring CP_HQD_WPTR writes after a doorbell write.
- * So the KFD can activate the doorbell then write the shadow wptr to CP_HQD_WPTR
- * knowing it will be ignored if the user has written a more-recent doorbell.
- */
- if (wptr_shadow_valid)
- WRITE_REG(priv->dev, CP_HQD_PQ_WPTR, wptr_shadow);
-
- unlock_srbm_index(priv);
-}
-
-static bool queue_inactive(struct cik_static_private *priv, struct cik_static_queue *queue)
-{
- bool inactive;
-
- lock_srbm_index(priv);
- queue_select(priv, queue->queue);
-
- inactive = (READ_REG(priv->dev, CP_HQD_ACTIVE) == 0);
-
- unlock_srbm_index(priv);
-
- return inactive;
-}
-
-static void deactivate_queue(struct cik_static_private *priv, struct cik_static_queue *queue)
-{
- lock_srbm_index(priv);
- queue_select(priv, queue->queue);
-
- WRITE_REG(priv->dev, CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQUEST_DRAIN | DEQUEUE_INT);
-
- unlock_srbm_index(priv);
-
- wait_event(priv->dequeue_wait[queue->queue/CIK_QUEUES_PER_PIPE],
- queue_inactive(priv, queue));
-}
-
-#define BIT_MASK_64(high, low) (((1ULL << (high)) - 1) & ~((1ULL << (low)) - 1))
-#define RING_ADDRESS_BAD_BIT_MASK (~BIT_MASK_64(48, 8))
-#define RWPTR_ADDRESS_BAD_BIT_MASK (~BIT_MASK_64(48, 2))
-
-#define MAX_QUEUE_SIZE (1ULL << 32)
-#define MIN_QUEUE_SIZE (1ULL << 10)
-
-static int
-cik_static_create_queue(struct kfd_scheduler *scheduler,
- struct kfd_scheduler_process *process,
- struct kfd_scheduler_queue *queue,
- void __user *ring_address,
- uint64_t ring_size,
- void __user *rptr_address,
- void __user *wptr_address,
- unsigned int doorbell)
-{
- struct cik_static_private *priv = kfd_scheduler_to_private(scheduler);
- struct cik_static_process *hwp = kfd_process_to_private(process);
- struct cik_static_queue *hwq = kfd_queue_to_private(queue);
-
- if ((uint64_t)ring_address & RING_ADDRESS_BAD_BIT_MASK
- || (uint64_t)rptr_address & RWPTR_ADDRESS_BAD_BIT_MASK
- || (uint64_t)wptr_address & RWPTR_ADDRESS_BAD_BIT_MASK)
- return -EINVAL;
-
- if (ring_size > MAX_QUEUE_SIZE || ring_size < MIN_QUEUE_SIZE || !is_power_of_2(ring_size))
- return -EINVAL;
-
- if (!allocate_hqd(priv, &hwq->queue))
- return -ENOMEM;
-
- hwq->mqd_addr = priv->mqd_addr + sizeof(struct cik_mqd_padded) * hwq->queue;
- hwq->mqd = &priv->mqds[hwq->queue].mqd;
- hwq->pq_addr = ring_address;
- hwq->rptr_address = rptr_address;
- hwq->wptr_address = wptr_address;
- hwq->doorbell_index = doorbell;
- hwq->queue_size_encoded = ilog2(ring_size) - 3;
-
- init_mqd(hwq, hwp);
- activate_queue(priv, hwq);
-
- return 0;
-}
-
-static void
-cik_static_destroy_queue(struct kfd_scheduler *scheduler, struct kfd_scheduler_queue *queue)
-{
- struct cik_static_private *priv = kfd_scheduler_to_private(scheduler);
- struct cik_static_queue *hwq = kfd_queue_to_private(queue);
-
- deactivate_queue(priv, hwq);
-
- release_hqd(priv, hwq->queue);
-}
-
-static void
-dequeue_int_received(struct cik_static_private *priv, uint32_t pipe_id)
-{
- /* The waiting threads will check CP_HQD_ACTIVE to see whether their
- * queue completed. */
- wake_up_all(&priv->dequeue_wait[pipe_id]);
-}
-
-/* Figure out the KFD compute pipe ID for an interrupt ring entry.
- * Returns true if it's a KFD compute pipe, false otherwise. */
-static bool int_compute_pipe(const struct cik_static_private *priv,
- const struct cik_ih_ring_entry *ih_ring_entry,
- uint32_t *kfd_pipe)
-{
- uint32_t pipe_id;
-
- if (ih_ring_entry->meid == 0) /* Ignore graphics interrupts - compute only. */
- return false;
-
- pipe_id = (ih_ring_entry->meid - 1) * CIK_PIPES_PER_MEC + ih_ring_entry->pipeid;
- if (pipe_id < priv->first_pipe)
- return false;
-
- pipe_id -= priv->first_pipe;
-
- *kfd_pipe = pipe_id;
-
- return true;
-}
-
-static bool
-cik_static_interrupt_isr(struct kfd_scheduler *scheduler, const void *ih_ring_entry)
-{
- struct cik_static_private *priv = kfd_scheduler_to_private(scheduler);
- const struct cik_ih_ring_entry *ihre = ih_ring_entry;
- uint32_t source_id = ihre->source_id;
- uint32_t pipe_id;
-
- /* We only care about CP interrupts here, they all come with a pipe. */
- if (!int_compute_pipe(priv, ihre, &pipe_id))
- return false;
-
- dev_dbg(radeon_kfd_chardev(), "INT(ISR): src=%02x, data=0x%x, pipe=%u, vmid=%u, pasid=%u\n",
- ihre->source_id, ihre->data, pipe_id, ihre->vmid, ihre->pasid);
-
- switch (source_id) {
- case CIK_INTSRC_DEQUEUE_COMPLETE:
- dequeue_int_received(priv, pipe_id);
- return false; /* Already handled. */
-
- default:
- return false; /* Not interested. */
- }
-}
-
-static void
-cik_static_interrupt_wq(struct kfd_scheduler *scheduler, const void *ih_ring_entry)
-{
-}
-
-/* Low bits must be 0000/FFFF as required by HW, high bits must be 0 to stay in user mode. */
-#define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
-#define APE1_LIMIT_ALIGNMENT 0xFFFF /* APE1 limit is inclusive and 64K aligned. */
-
-static bool cik_static_set_cache_policy(struct kfd_scheduler *scheduler,
- struct kfd_scheduler_process *process,
- enum cache_policy default_policy,
- enum cache_policy alternate_policy,
- void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size)
-{
- struct cik_static_private *sched = kfd_scheduler_to_private(scheduler);
- struct cik_static_process *proc = kfd_process_to_private(process);
-
- uint32_t default_mtype;
- uint32_t ape1_mtype;
-
- if (alternate_aperture_size == 0) {
- /* base > limit disables APE1 */
- proc->ape1_base = 1;
- proc->ape1_limit = 0;
- } else {
- /*
- * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]}, SH_MEM_APE1_BASE[31:0], 0x0000 }
- * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]}, SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
- * Verify that the base and size parameters can be represented in this format
- * and convert them. Additionally restrict APE1 to user-mode addresses.
- */
-
- uint64_t base = (uintptr_t)alternate_aperture_base;
- uint64_t limit = base + alternate_aperture_size - 1;
-
- if (limit <= base)
- return false;
-
- if ((base & APE1_FIXED_BITS_MASK) != 0)
- return false;
-
- if ((limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT)
- return false;
-
- proc->ape1_base = base >> 16;
- proc->ape1_limit = limit >> 16;
- }
-
- default_mtype = (default_policy == cache_policy_coherent) ? MTYPE_NONCACHED : MTYPE_CACHED;
- ape1_mtype = (alternate_policy == cache_policy_coherent) ? MTYPE_NONCACHED : MTYPE_CACHED;
-
- proc->sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
- | DEFAULT_MTYPE(default_mtype)
- | APE1_MTYPE(ape1_mtype);
-
- program_sh_mem_settings(sched, proc);
-
- return true;
-}
-
-
-const struct kfd_scheduler_class radeon_kfd_cik_static_scheduler_class = {
- .name = "CIK static scheduler",
- .create = cik_static_create,
- .destroy = cik_static_destroy,
- .start = cik_static_start,
- .stop = cik_static_stop,
- .register_process = cik_static_register_process,
- .deregister_process = cik_static_deregister_process,
- .queue_size = sizeof(struct cik_static_queue),
- .create_queue = cik_static_create_queue,
- .destroy_queue = cik_static_destroy_queue,
-
- .interrupt_isr = cik_static_interrupt_isr,
- .interrupt_wq = cik_static_interrupt_wq,
-
- .set_cache_policy = cik_static_set_cache_policy,
-};
--
1.9.1
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/hsa/radeon/kfd_device.c | 3 +++
drivers/gpu/hsa/radeon/kfd_device_queue_manager.c | 2 +-
drivers/gpu/hsa/radeon/kfd_mqd_manager.c | 1 +
drivers/gpu/hsa/radeon/kfd_packet_manager.c | 3 ++-
drivers/gpu/hsa/radeon/kfd_process.c | 10 ++++++----
5 files changed, 13 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/hsa/radeon/kfd_device.c b/drivers/gpu/hsa/radeon/kfd_device.c
index 9af812b..30558c9 100644
--- a/drivers/gpu/hsa/radeon/kfd_device.c
+++ b/drivers/gpu/hsa/radeon/kfd_device.c
@@ -88,6 +88,9 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev)
return NULL;
kfd = kzalloc(sizeof(*kfd), GFP_KERNEL);
+ if (!kfd)
+ return NULL;
+
kfd->kgd = kgd;
kfd->device_info = device_info;
kfd->pdev = pdev;
diff --git a/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c b/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c
index 56875f9..4931f8a 100644
--- a/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c
+++ b/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c
@@ -317,7 +317,7 @@ static struct mqd_manager *get_mqd_manager_nocpsch(struct device_queue_manager *
{
struct mqd_manager *mqd;
- BUG_ON(!dqm || type > KFD_MQD_TYPE_MAX);
+ BUG_ON(!dqm || type >= KFD_MQD_TYPE_MAX);
pr_debug("kfd: In func %s mqd type %d\n", __func__, type);
diff --git a/drivers/gpu/hsa/radeon/kfd_mqd_manager.c b/drivers/gpu/hsa/radeon/kfd_mqd_manager.c
index a3e9f7c..8c1192e 100644
--- a/drivers/gpu/hsa/radeon/kfd_mqd_manager.c
+++ b/drivers/gpu/hsa/radeon/kfd_mqd_manager.c
@@ -437,6 +437,7 @@ struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, struct kfd_dev *dev
mqd->uninitialize = uninitialize;
break;
default:
+ kfree(mqd);
return NULL;
break;
}
diff --git a/drivers/gpu/hsa/radeon/kfd_packet_manager.c b/drivers/gpu/hsa/radeon/kfd_packet_manager.c
index 621a720..5cd23b0 100644
--- a/drivers/gpu/hsa/radeon/kfd_packet_manager.c
+++ b/drivers/gpu/hsa/radeon/kfd_packet_manager.c
@@ -85,9 +85,10 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm, unsigned int **rl_b
BUG_ON(!pm);
BUG_ON(pm->allocated == true);
+ BUG_ON(is_over_subscription == NULL);
pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription);
- if (is_over_subscription &&
+ if (*is_over_subscription &&
sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION)
return -EFAULT;
diff --git a/drivers/gpu/hsa/radeon/kfd_process.c b/drivers/gpu/hsa/radeon/kfd_process.c
index eb30cb3..aacc7ef 100644
--- a/drivers/gpu/hsa/radeon/kfd_process.c
+++ b/drivers/gpu/hsa/radeon/kfd_process.c
@@ -146,15 +146,15 @@ static struct kfd_process *create_process(const struct task_struct *thread)
process = kzalloc(sizeof(*process), GFP_KERNEL);
if (!process)
- goto err_alloc;
+ goto err_alloc_process;
process->queues = kmalloc_array(INITIAL_QUEUE_ARRAY_SIZE, sizeof(process->queues[0]), GFP_KERNEL);
if (!process->queues)
- goto err_alloc;
+ goto err_alloc_queues;
process->pasid = radeon_kfd_pasid_alloc();
if (process->pasid == 0)
- goto err_alloc;
+ goto err_alloc_pasid;
mutex_init(&process->mutex);
@@ -178,9 +178,11 @@ err_process_pqm_init:
radeon_kfd_pasid_free(process->pasid);
list_del(&process->processes_list);
thread->mm->kfd_process = NULL;
-err_alloc:
+err_alloc_pasid:
kfree(process->queues);
+err_alloc_queues:
kfree(process);
+err_alloc_process:
return ERR_PTR(err);
}
--
1.9.1
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/hsa/radeon/kfd_chardev.c | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/hsa/radeon/kfd_chardev.c b/drivers/gpu/hsa/radeon/kfd_chardev.c
index 51f790f..09c9a61 100644
--- a/drivers/gpu/hsa/radeon/kfd_chardev.c
+++ b/drivers/gpu/hsa/radeon/kfd_chardev.c
@@ -148,21 +148,22 @@ kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, void __user *a
q_properties.priority = args.queue_priority;
q_properties.queue_address = args.ring_base_address;
q_properties.queue_size = args.ring_size;
- q_properties.read_ptr = args.read_pointer_address;
- q_properties.write_ptr = args.write_pointer_address;
+ q_properties.read_ptr = (qptr_t *) args.read_pointer_address;
+ q_properties.write_ptr = (qptr_t *) args.write_pointer_address;
pr_debug("%s Arguments: Queue Percentage (%d, %d)\n"
"Queue Priority (%d, %d)\n"
"Queue Address (0x%llX, 0x%llX)\n"
- "Queue Size (%llX, %u)\n",
- "Queue r/w Pointers (%llX, %llX)\n",
+ "Queue Size (0x%llX, %u)\n"
+ "Queue r/w Pointers (0x%llX, 0x%llX)\n",
__func__,
q_properties.queue_percent, args.queue_percentage,
q_properties.priority, args.queue_priority,
q_properties.queue_address, args.ring_base_address,
q_properties.queue_size, args.ring_size,
- q_properties.read_ptr, q_properties.write_ptr);
+ (uint64_t) q_properties.read_ptr,
+ (uint64_t) q_properties.write_ptr);
dev = radeon_kfd_device_by_id(args.gpu_id);
if (dev == NULL)
--
1.9.1
This patch fixes a bug in the code flow that caused an override of the sh_mem
registers.
The bug resulted in sh_mem registers being not initialized properly and
overwrite over sh_mem registers for vmid 0 (which is a vmid of non-HSA processes).
Reviewed-by: Ben Goz <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/hsa/radeon/kfd_device_queue_manager.c | 48 ++++++++++++-----------
1 file changed, 26 insertions(+), 22 deletions(-)
diff --git a/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c b/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c
index 5ec8da7..56875f9 100644
--- a/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c
+++ b/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c
@@ -87,21 +87,25 @@ static void init_process_memory(struct device_queue_manager *dqm, struct qcm_pro
unsigned int temp;
BUG_ON(!dqm || !qpd);
+ /* check if sh_mem_config register already configured */
+ if (qpd->sh_mem_config == 0) {
+ qpd->sh_mem_config =
+ ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) |
+ DEFAULT_MTYPE(MTYPE_NONCACHED) |
+ APE1_MTYPE(MTYPE_NONCACHED);
+ qpd->sh_mem_ape1_limit = 0;
+ qpd->sh_mem_ape1_base = 0;
+ }
+
if (qpd->pqm->process->is_32bit_user_mode) {
temp = get_sh_mem_bases_32(qpd->pqm->process, dqm->dev);
qpd->sh_mem_bases = SHARED_BASE(temp);
- qpd->sh_mem_config = PTR32;
+ qpd->sh_mem_config |= PTR32;
} else {
temp = get_sh_mem_bases_nybble_64(qpd->pqm->process, dqm->dev);
qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
- qpd->sh_mem_config = 0;
}
- qpd->sh_mem_config |= ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
- qpd->sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
- qpd->sh_mem_ape1_limit = 0;
- qpd->sh_mem_ape1_base = 0;
-
pr_debug("kfd: is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases);
}
@@ -110,6 +114,8 @@ static void program_sh_mem_settings(struct device_queue_manager *dqm, struct qcm
{
struct mqd_manager *mqd;
+ BUG_ON(qpd->vmid < KFD_VMID_START_OFFSET);
+
mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE);
if (mqd == NULL)
return;
@@ -139,12 +145,6 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, struct queue *
print_queue(q);
mutex_lock(&dqm->lock);
- /* later memory apertures should be initialized in lazy mode */
- if (!is_mem_initialized)
- if (init_memory(dqm) != 0) {
- retval = -ENODATA;
- goto init_memory_failed;
- }
if (dqm->vmid_bitmap == 0 && qpd->vmid == 0) {
retval = -ENOMEM;
@@ -217,7 +217,6 @@ no_hqd:
*allocate_vmid = qpd->vmid = q->properties.vmid = 0;
}
no_vmid:
-init_memory_failed:
mutex_unlock(&dqm->lock);
return retval;
}
@@ -951,20 +950,25 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm,
qpd->sh_mem_ape1_limit = limit >> 16;
}
- default_mtype = (default_policy == cache_policy_coherent) ? MTYPE_NONCACHED : MTYPE_CACHED;
- ape1_mtype = (alternate_policy == cache_policy_coherent) ? MTYPE_NONCACHED : MTYPE_CACHED;
+ default_mtype = (default_policy == cache_policy_coherent) ?
+ MTYPE_NONCACHED :
+ MTYPE_CACHED;
+
+ ape1_mtype = (alternate_policy == cache_policy_coherent) ?
+ MTYPE_NONCACHED :
+ MTYPE_CACHED;
- qpd->sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
+ qpd->sh_mem_config = (qpd->sh_mem_config & PTR32)
+ | ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
| DEFAULT_MTYPE(default_mtype)
| APE1_MTYPE(ape1_mtype);
-
- if (sched_policy == KFD_SCHED_POLICY_NO_HWS)
+ if ((sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
program_sh_mem_settings(dqm, qpd);
-
- pr_debug("kfd: sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n", qpd->sh_mem_config,
- qpd->sh_mem_ape1_base, qpd->sh_mem_ape1_limit);
+ pr_debug("kfd: sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n",
+ qpd->sh_mem_config, qpd->sh_mem_ape1_base,
+ qpd->sh_mem_ape1_limit);
mutex_unlock(&dqm->lock);
return true;
--
1.9.1
From: Ben Goz <[email protected]>
This module is unnecessary as we allocating read/write pointers
from userspace thunk layer
Signed-off-by: Ben Goz <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/hsa/radeon/Makefile | 2 +-
drivers/gpu/hsa/radeon/kfd_chardev.c | 22 +--
drivers/gpu/hsa/radeon/kfd_hw_pointer_store.c | 149 ---------------------
drivers/gpu/hsa/radeon/kfd_hw_pointer_store.h | 64 ---------
drivers/gpu/hsa/radeon/kfd_priv.h | 10 +-
drivers/gpu/hsa/radeon/kfd_process.c | 1 -
drivers/gpu/hsa/radeon/kfd_process_queue_manager.c | 62 ++-------
7 files changed, 23 insertions(+), 287 deletions(-)
delete mode 100644 drivers/gpu/hsa/radeon/kfd_hw_pointer_store.c
delete mode 100644 drivers/gpu/hsa/radeon/kfd_hw_pointer_store.h
diff --git a/drivers/gpu/hsa/radeon/Makefile b/drivers/gpu/hsa/radeon/Makefile
index 3409203..26ce0ae 100644
--- a/drivers/gpu/hsa/radeon/Makefile
+++ b/drivers/gpu/hsa/radeon/Makefile
@@ -6,7 +6,7 @@ radeon_kfd-y := kfd_module.o kfd_device.o kfd_chardev.o \
kfd_pasid.o kfd_topology.o kfd_process.o \
kfd_doorbell.o kfd_sched_cik_static.o kfd_registers.o \
kfd_vidmem.o kfd_interrupt.o kfd_aperture.o \
- kfd_queue.o kfd_hw_pointer_store.o kfd_mqd_manager.o \
+ kfd_queue.o kfd_mqd_manager.o \
kfd_kernel_queue.o kfd_packet_manager.o \
kfd_process_queue_manager.o kfd_device_queue_manager.o
diff --git a/drivers/gpu/hsa/radeon/kfd_chardev.c b/drivers/gpu/hsa/radeon/kfd_chardev.c
index b39df68..51f790f 100644
--- a/drivers/gpu/hsa/radeon/kfd_chardev.c
+++ b/drivers/gpu/hsa/radeon/kfd_chardev.c
@@ -32,9 +32,9 @@
#include <linux/time.h>
#include "kfd_priv.h"
#include <linux/mm.h>
+#include <linux/uaccess.h>
#include <uapi/asm-generic/mman-common.h>
#include <asm/processor.h>
-#include "kfd_hw_pointer_store.h"
#include "kfd_device_queue_manager.h"
static long kfd_ioctl(struct file *, unsigned int, unsigned long);
@@ -137,24 +137,32 @@ kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, void __user *a
if (copy_from_user(&args, arg, sizeof(args)))
return -EFAULT;
- /* need to validate parameters */
+ if (!access_ok(VERIFY_WRITE, args.read_pointer_address, sizeof(qptr_t)))
+ return -EFAULT;
+
+ if (!access_ok(VERIFY_WRITE, args.write_pointer_address, sizeof(qptr_t)))
+ return -EFAULT;
q_properties.is_interop = false;
q_properties.queue_percent = args.queue_percentage;
q_properties.priority = args.queue_priority;
q_properties.queue_address = args.ring_base_address;
q_properties.queue_size = args.ring_size;
+ q_properties.read_ptr = args.read_pointer_address;
+ q_properties.write_ptr = args.write_pointer_address;
pr_debug("%s Arguments: Queue Percentage (%d, %d)\n"
"Queue Priority (%d, %d)\n"
"Queue Address (0x%llX, 0x%llX)\n"
"Queue Size (%llX, %u)\n",
+ "Queue r/w Pointers (%llX, %llX)\n",
__func__,
q_properties.queue_percent, args.queue_percentage,
q_properties.priority, args.queue_priority,
q_properties.queue_address, args.ring_base_address,
- q_properties.queue_size, args.ring_size);
+ q_properties.queue_size, args.ring_size,
+ q_properties.read_ptr, q_properties.write_ptr);
dev = radeon_kfd_device_by_id(args.gpu_id);
if (dev == NULL)
@@ -177,8 +185,6 @@ kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, void __user *a
goto err_create_queue;
args.queue_id = queue_id;
- args.read_pointer_address = (uint64_t)q_properties.read_ptr;
- args.write_pointer_address = (uint64_t)q_properties.write_ptr;
args.doorbell_address = (uint64_t)q_properties.doorbell_ptr;
if (copy_to_user(arg, &args, sizeof(args))) {
@@ -515,11 +521,5 @@ kfd_mmap(struct file *filp, struct vm_area_struct *vma)
if (pgoff >= KFD_MMAP_DOORBELL_START && pgoff < KFD_MMAP_DOORBELL_END)
return radeon_kfd_doorbell_mmap(process, vma);
- if (pgoff >= KFD_MMAP_RPTR_START && pgoff < KFD_MMAP_RPTR_END)
- return radeon_kfd_hw_pointer_store_mmap(&process->read_ptr, vma);
-
- if (pgoff >= KFD_MMAP_WPTR_START && pgoff < KFD_MMAP_WPTR_END)
- return radeon_kfd_hw_pointer_store_mmap(&process->write_ptr, vma);
-
return -EINVAL;
}
diff --git a/drivers/gpu/hsa/radeon/kfd_hw_pointer_store.c b/drivers/gpu/hsa/radeon/kfd_hw_pointer_store.c
deleted file mode 100644
index 4e71f7d..0000000
--- a/drivers/gpu/hsa/radeon/kfd_hw_pointer_store.c
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- * Copyright 2014 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#include <linux/types.h>
-#include <linux/version.h>
-#include <linux/kernel.h>
-#include <linux/mutex.h>
-#include <linux/mm.h>
-#include <linux/mman.h>
-#include <linux/slab.h>
-#include <linux/io.h>
-#include "kfd_hw_pointer_store.h"
-#include "kfd_priv.h"
-
-/* do the same trick as in map_doorbells() */
-static int hw_pointer_store_map(struct hw_pointer_store_properties *ptr,
- struct file *devkfd)
-{
- qptr_t __user *user_address;
-
- BUG_ON(!ptr || !devkfd);
-
- if (!ptr->page_mapping) {
- if (!ptr->page_address)
- return -EINVAL;
-
- user_address = (qptr_t __user *)vm_mmap(devkfd, 0, PAGE_SIZE,
- PROT_WRITE | PROT_READ , MAP_SHARED, ptr->offset);
-
- if (IS_ERR(user_address))
- return PTR_ERR(user_address);
-
- ptr->page_mapping = user_address;
- }
-
- return 0;
-}
-
-int hw_pointer_store_init(struct hw_pointer_store_properties *ptr,
- enum hw_pointer_store_type type)
-{
- unsigned long *addr;
-
- BUG_ON(!ptr);
-
- /* using the offset value as a hint for mmap to distinguish between page types */
- if (type == KFD_HW_POINTER_STORE_TYPE_RPTR)
- ptr->offset = KFD_MMAP_RPTR_START << PAGE_SHIFT;
- else if (type == KFD_HW_POINTER_STORE_TYPE_WPTR)
- ptr->offset = KFD_MMAP_WPTR_START << PAGE_SHIFT;
- else
- return -EINVAL;
-
- addr = (unsigned long *)get_zeroed_page(GFP_KERNEL);
- if (!addr) {
- pr_debug("Error allocating page\n");
- return -ENOMEM;
- }
-
- ptr->page_address = addr;
- ptr->page_mapping = NULL;
-
- return 0;
-}
-
-void hw_pointer_store_destroy(struct hw_pointer_store_properties *ptr)
-{
- BUG_ON(!ptr);
- pr_debug("kfd in func: %s\n", __func__);
- if (ptr->page_address)
- free_page((unsigned long)ptr->page_address);
- if (ptr->page_mapping)
- vm_munmap((uintptr_t)ptr->page_mapping, PAGE_SIZE);
- ptr->page_address = NULL;
- ptr->page_mapping = NULL;
-}
-
-qptr_t __user *
-hw_pointer_store_create_queue(struct hw_pointer_store_properties *ptr,
- unsigned int queue_id, struct file *devkfd)
-{
- BUG_ON(!ptr || queue_id >= MAX_PROCESS_QUEUES);
-
- /* mapping value to user space*/
- hw_pointer_store_map(ptr, devkfd);
-
- /* User process address */
- if (!ptr->page_mapping) {
- pr_debug(KERN_ERR "kfd: hw pointer store doesn't mapped to user space\n");
- return NULL;
- }
-
- ptr->page_mapping[queue_id] = 0;
-
- return ptr->page_mapping + queue_id;
-}
-
-unsigned long *hw_pointer_store_get_address
- (struct hw_pointer_store_properties *ptr, unsigned int queue_id)
-{
- return ptr->page_address + queue_id;
-}
-
-int radeon_kfd_hw_pointer_store_mmap(struct hw_pointer_store_properties *ptr,
- struct vm_area_struct *vma)
-{
- BUG_ON(!ptr || !vma);
-
- if (vma->vm_end - vma->vm_start != PAGE_SIZE) {
- pr_debug("start address(0x%lx) - end address(0x%lx) != len(0x%lx)\n",
- vma->vm_end, vma->vm_start, PAGE_SIZE);
- return -EINVAL;
- }
-
- vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE
- | VM_DONTDUMP | VM_PFNMAP;
-
- pr_debug("kfd: mapping hw pointer page in radeon_kfd_hw_pointer_store_mmap\n"
- " target user address == 0x%016llX\n"
- " physical address == 0x%016lX\n"
- " vm_flags == 0x%08lX\n"
- " size == 0x%08lX\n",
- (long long unsigned int) vma->vm_start,
- __pa(ptr->page_address), vma->vm_flags, PAGE_SIZE);
-
- /* mapping the page to user process */
- return remap_pfn_range(vma, vma->vm_start, __pa(ptr->page_address) >> PAGE_SHIFT, PAGE_SIZE, vma->vm_page_prot);
-}
-
diff --git a/drivers/gpu/hsa/radeon/kfd_hw_pointer_store.h b/drivers/gpu/hsa/radeon/kfd_hw_pointer_store.h
deleted file mode 100644
index 642703f..0000000
--- a/drivers/gpu/hsa/radeon/kfd_hw_pointer_store.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright 2014 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#ifndef KFD_HW_POINTER_STORE_H_
-#define KFD_HW_POINTER_STORE_H_
-
-#include <linux/mutex.h>
-
-/* Type that represents a HW doorbell slot. and read/write HW pointers */
-typedef u32 qptr_t;
-
-/* Hw Pointer Store */
-enum hw_pointer_store_type {
- KFD_HW_POINTER_STORE_TYPE_RPTR = 0,
- KFD_HW_POINTER_STORE_TYPE_WPTR
-};
-
-struct hw_pointer_store_properties {
- qptr_t __user *page_mapping;
- unsigned long *page_address;
- unsigned long offset;
-};
-
-int
-hw_pointer_store_init(struct hw_pointer_store_properties *ptr,
- enum hw_pointer_store_type type);
-
-void
-hw_pointer_store_destroy(struct hw_pointer_store_properties *ptr);
-
-qptr_t __user *
-hw_pointer_store_create_queue(struct hw_pointer_store_properties *ptr,
- unsigned int queue_id, struct file *devkfd);
-
-unsigned long *
-hw_pointer_store_get_address(struct hw_pointer_store_properties *ptr,
- unsigned int queue_id);
-
-int
-radeon_kfd_hw_pointer_store_mmap(struct hw_pointer_store_properties *ptr,
- struct vm_area_struct *vma);
-
-
-#endif /* KFD_HW_POINTER_STORE_H_ */
diff --git a/drivers/gpu/hsa/radeon/kfd_priv.h b/drivers/gpu/hsa/radeon/kfd_priv.h
index e6d4993..97bf58a 100644
--- a/drivers/gpu/hsa/radeon/kfd_priv.h
+++ b/drivers/gpu/hsa/radeon/kfd_priv.h
@@ -31,7 +31,6 @@
#include <linux/atomic.h>
#include <linux/workqueue.h>
#include <linux/spinlock.h>
-#include "kfd_hw_pointer_store.h"
struct kfd_scheduler_class;
@@ -50,10 +49,6 @@ struct kfd_scheduler_class;
** We figure out what type of memory the caller wanted by comparing the mmap page offset to known ranges. */
#define KFD_MMAP_DOORBELL_START (((1ULL << 32)*1) >> PAGE_SHIFT)
#define KFD_MMAP_DOORBELL_END (((1ULL << 32)*2) >> PAGE_SHIFT)
-#define KFD_MMAP_RPTR_START KFD_MMAP_DOORBELL_END
-#define KFD_MMAP_RPTR_END (((1ULL << 32)*3) >> PAGE_SHIFT)
-#define KFD_MMAP_WPTR_START KFD_MMAP_RPTR_END
-#define KFD_MMAP_WPTR_END (((1ULL << 32)*4) >> PAGE_SHIFT)
/*
* When working with cp scheduler we should assign the HIQ manually or via the radeon driver
@@ -85,6 +80,8 @@ typedef unsigned int pasid_t;
/* Type that represents a HW doorbell slot. */
typedef u32 doorbell_t;
+/* Type that represents queue pointer */
+typedef u32 qptr_t;
enum cache_policy {
cache_policy_coherent,
@@ -350,9 +347,6 @@ struct kfd_process {
/* List of kfd_process_device structures, one for each device the process is using. */
struct list_head per_device_data;
- struct hw_pointer_store_properties write_ptr;
- struct hw_pointer_store_properties read_ptr;
-
struct process_queue_manager pqm;
/* The process's queues. */
diff --git a/drivers/gpu/hsa/radeon/kfd_process.c b/drivers/gpu/hsa/radeon/kfd_process.c
index 9bb5cab..eb30cb3 100644
--- a/drivers/gpu/hsa/radeon/kfd_process.c
+++ b/drivers/gpu/hsa/radeon/kfd_process.c
@@ -168,7 +168,6 @@ static struct kfd_process *create_process(const struct task_struct *thread)
INIT_LIST_HEAD(&process->per_device_data);
- process->read_ptr.page_mapping = process->write_ptr.page_mapping = NULL;
err = pqm_init(&process->pqm, process);
if (err != 0)
goto err_process_pqm_init;
diff --git a/drivers/gpu/hsa/radeon/kfd_process_queue_manager.c b/drivers/gpu/hsa/radeon/kfd_process_queue_manager.c
index 2034d2b..89461ab 100644
--- a/drivers/gpu/hsa/radeon/kfd_process_queue_manager.c
+++ b/drivers/gpu/hsa/radeon/kfd_process_queue_manager.c
@@ -25,7 +25,6 @@
#include <linux/list.h>
#include "kfd_device_queue_manager.h"
#include "kfd_priv.h"
-#include "kfd_hw_pointer_store.h"
#include "kfd_kernel_queue.h"
static inline struct process_queue_node *get_queue_by_qid(struct process_queue_manager *pqm, unsigned int qid)
@@ -44,46 +43,6 @@ static inline struct process_queue_node *get_queue_by_qid(struct process_queue_m
return NULL;
}
-static int allocate_hw_pointers(struct process_queue_manager *pqm,
- struct queue_properties *q_properties,
- struct file *f, struct kfd_dev *dev,
- unsigned int qid)
-{
- int retval;
-
- BUG_ON(!pqm || !q_properties);
-
- retval = 0;
-
- pr_debug("kfd: In func %s\n", __func__);
-
- /* allocates r/w pointers in lazy mode */
- if (pqm->process->read_ptr.page_mapping == NULL)
- if (hw_pointer_store_init(&pqm->process->read_ptr, KFD_HW_POINTER_STORE_TYPE_RPTR) != 0)
- return -EBUSY;
- if (pqm->process->write_ptr.page_mapping == NULL)
- if (hw_pointer_store_init(&pqm->process->write_ptr, KFD_HW_POINTER_STORE_TYPE_WPTR) != 0) {
- hw_pointer_store_destroy(&pqm->process->read_ptr);
- return -EBUSY;
- }
-
- q_properties->read_ptr = hw_pointer_store_create_queue(&pqm->process->read_ptr, qid, f);
- if (!q_properties->read_ptr)
- return -ENOMEM;
-
- q_properties->write_ptr = hw_pointer_store_create_queue(&pqm->process->write_ptr, qid, f);
- if (!q_properties->write_ptr)
- return -ENOMEM;
-
- q_properties->doorbell_ptr = radeon_kfd_get_doorbell(f, pqm->process, dev, qid);
- if (!q_properties->doorbell_ptr)
- return -ENOMEM;
-
- q_properties->doorbell_off = radeon_kfd_queue_id_to_doorbell(dev, pqm->process, qid);
-
- return retval;
-}
-
static int find_available_queue_slot(struct process_queue_manager *pqm, unsigned int *qid)
{
unsigned long found;
@@ -133,15 +92,13 @@ void pqm_uninit(struct process_queue_manager *pqm)
(pqn->q != NULL) ?
pqn->q->properties.queue_id :
pqn->kq->queue->properties.queue_id);
- if (retval != 0)
+
+ if (retval != 0) {
+ pr_err("kfd: failed to destroy queue\n");
return;
+ }
}
kfree(pqm->queue_slot_bitmap);
-
- if (pqm->process->read_ptr.page_mapping)
- hw_pointer_store_destroy(&pqm->process->read_ptr);
- if (pqm->process->write_ptr.page_mapping)
- hw_pointer_store_destroy(&pqm->process->write_ptr);
}
static int create_cp_queue(struct process_queue_manager *pqm, struct kfd_dev *dev, struct queue **q,
@@ -151,11 +108,11 @@ static int create_cp_queue(struct process_queue_manager *pqm, struct kfd_dev *de
retval = 0;
- /* allocate hw pointers */
- if (allocate_hw_pointers(pqm, q_properties, f, dev, qid) != 0) {
- retval = -ENOMEM;
- goto err_allocate_hw_pointers;
- }
+ q_properties->doorbell_ptr = radeon_kfd_get_doorbell(f, pqm->process, dev, qid);
+ if (!q_properties->doorbell_ptr)
+ return -ENOMEM;
+
+ q_properties->doorbell_off = radeon_kfd_queue_id_to_doorbell(dev, pqm->process, qid);
/* let DQM handle it*/
q_properties->vmid = 0;
@@ -174,7 +131,6 @@ static int create_cp_queue(struct process_queue_manager *pqm, struct kfd_dev *de
return retval;
err_init_queue:
-err_allocate_hw_pointers:
return retval;
}
--
1.9.1
From: Ben Goz <[email protected]>
Signed-off-by: Ben Goz <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/hsa/radeon/kfd_device_queue_manager.c | 2 +-
drivers/gpu/hsa/radeon/kfd_device_queue_manager.h | 2 +-
drivers/gpu/hsa/radeon/kfd_packet_manager.c | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c b/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c
index 3e1def1..5ec8da7 100644
--- a/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c
+++ b/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c
@@ -55,7 +55,7 @@ static inline unsigned int get_first_pipe(struct device_queue_manager *dqm)
static inline unsigned int get_pipes_num_cpsch(void)
{
- return PIPE_PER_ME_CP_SCHEDULING - 1;
+ return PIPE_PER_ME_CP_SCHEDULING;
}
static unsigned int get_sh_mem_bases_nybble_64(struct kfd_process *process, struct kfd_dev *dev)
diff --git a/drivers/gpu/hsa/radeon/kfd_device_queue_manager.h b/drivers/gpu/hsa/radeon/kfd_device_queue_manager.h
index 57dc636..037eaf8 100644
--- a/drivers/gpu/hsa/radeon/kfd_device_queue_manager.h
+++ b/drivers/gpu/hsa/radeon/kfd_device_queue_manager.h
@@ -31,7 +31,7 @@
#define QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS (500)
#define QUEUES_PER_PIPE (8)
-#define PIPE_PER_ME_CP_SCHEDULING (4)
+#define PIPE_PER_ME_CP_SCHEDULING (3)
#define CIK_VMID_NUM (8)
#define KFD_VMID_START_OFFSET (8)
#define VMID_PER_DEVICE CIK_VMID_NUM
diff --git a/drivers/gpu/hsa/radeon/kfd_packet_manager.c b/drivers/gpu/hsa/radeon/kfd_packet_manager.c
index 3fc8c34..621a720 100644
--- a/drivers/gpu/hsa/radeon/kfd_packet_manager.c
+++ b/drivers/gpu/hsa/radeon/kfd_packet_manager.c
@@ -62,7 +62,7 @@ static void pm_calc_rlib_size(struct packet_manager *pm, unsigned int *rlib_size
/* check if there is over subscription*/
*over_subscription = false;
if ((process_count >= VMID_PER_DEVICE) ||
- queue_count >= PIPE_PER_ME_CP_SCHEDULING * QUEUES_PER_PIPE) {
+ queue_count > PIPE_PER_ME_CP_SCHEDULING * QUEUES_PER_PIPE) {
*over_subscription = true;
pr_debug("kfd: over subscribed runlist\n");
}
--
1.9.1
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/hsa/radeon/kfd_module.c | 19 ++++++++++++-------
1 file changed, 12 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/hsa/radeon/kfd_module.c b/drivers/gpu/hsa/radeon/kfd_module.c
index 85069c5..fbfcce6 100644
--- a/drivers/gpu/hsa/radeon/kfd_module.c
+++ b/drivers/gpu/hsa/radeon/kfd_module.c
@@ -27,11 +27,13 @@
#include <linux/device.h>
#include "kfd_priv.h"
-#define DRIVER_AUTHOR "Andrew Lewycky, Oded Gabbay, Evgeny Pinchuk, others."
+#define KFD_DRIVER_AUTHOR "AMD Inc. and others"
-#define DRIVER_NAME "kfd"
-#define DRIVER_DESC "AMD HSA Kernel Fusion Driver"
-#define DRIVER_DATE "20140127"
+#define KFD_DRIVER_DESC "Standalone HSA driver for AMD's GPUs"
+#define KFD_DRIVER_DATE "20140424"
+#define KFD_DRIVER_MAJOR 0
+#define KFD_DRIVER_MINOR 5
+#define KFD_DRIVER_PATCHLEVEL 0
const struct kfd2kgd_calls *kfd2kgd;
static const struct kgd2kfd_calls kgd2kfd = {
@@ -120,6 +122,9 @@ static void __exit kfd_module_exit(void)
module_init(kfd_module_init);
module_exit(kfd_module_exit);
-MODULE_AUTHOR(DRIVER_AUTHOR);
-MODULE_DESCRIPTION(DRIVER_DESC);
-MODULE_LICENSE("GPL");
+MODULE_AUTHOR(KFD_DRIVER_AUTHOR);
+MODULE_DESCRIPTION(KFD_DRIVER_DESC);
+MODULE_LICENSE("GPL and additional rights");
+MODULE_VERSION(__stringify(KFD_DRIVER_MAJOR) "."
+ __stringify(KFD_DRIVER_MINOR) "."
+ __stringify(KFD_DRIVER_PATCHLEVEL));
--
1.9.1
This patch rearranges the structures defined in kfd_ioctl.h so that
all the uint64_t variables are located at the start of each structure and
then all the uint32_t variables are located.
Signed-off-by: Oded Gabbay <[email protected]>
---
include/uapi/linux/kfd_ioctl.h | 51 ++++++++++++++++++++++--------------------
1 file changed, 27 insertions(+), 24 deletions(-)
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 509c4a0..3cedd1a 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -42,15 +42,15 @@ struct kfd_ioctl_get_version_args {
struct kfd_ioctl_create_queue_args {
uint64_t ring_base_address; /* to KFD */
+ uint64_t write_pointer_address; /* from KFD */
+ uint64_t read_pointer_address; /* from KFD */
+ uint64_t doorbell_address; /* from KFD */
+
uint32_t ring_size; /* to KFD */
uint32_t gpu_id; /* to KFD */
uint32_t queue_type; /* to KFD */
uint32_t queue_percentage; /* to KFD */
uint32_t queue_priority; /* to KFD */
-
- uint64_t write_pointer_address; /* from KFD */
- uint64_t read_pointer_address; /* from KFD */
- uint64_t doorbell_address; /* from KFD */
uint32_t queue_id; /* from KFD */
};
@@ -59,8 +59,9 @@ struct kfd_ioctl_destroy_queue_args {
};
struct kfd_ioctl_update_queue_args {
- uint32_t queue_id; /* to KFD */
uint64_t ring_base_address; /* to KFD */
+
+ uint32_t queue_id; /* to KFD */
uint32_t ring_size; /* to KFD */
uint32_t queue_percentage; /* to KFD */
uint32_t queue_priority; /* to KFD */
@@ -71,31 +72,33 @@ struct kfd_ioctl_update_queue_args {
#define KFD_IOC_CACHE_POLICY_NONCOHERENT 1
struct kfd_ioctl_set_memory_policy_args {
+ uint64_t alternate_aperture_base; /* to KFD */
+ uint64_t alternate_aperture_size; /* to KFD */
+
uint32_t gpu_id; /* to KFD */
uint32_t default_policy; /* to KFD */
uint32_t alternate_policy; /* to KFD */
- uint64_t alternate_aperture_base; /* to KFD */
- uint64_t alternate_aperture_size; /* to KFD */
};
struct kfd_ioctl_get_clock_counters_args {
- uint32_t gpu_id; /* to KFD */
uint64_t gpu_clock_counter; /* from KFD */
uint64_t cpu_clock_counter; /* from KFD */
uint64_t system_clock_counter; /* from KFD */
uint64_t system_clock_freq; /* from KFD */
+
+ uint32_t gpu_id; /* to KFD */
};
#define NUM_OF_SUPPORTED_GPUS 7
struct kfd_process_device_apertures {
- uint64_t lds_base;/* from KFD */
- uint64_t lds_limit;/* from KFD */
- uint64_t scratch_base;/* from KFD */
- uint64_t scratch_limit;/* from KFD */
- uint64_t gpuvm_base;/* from KFD */
- uint64_t gpuvm_limit;/* from KFD */
- uint32_t gpu_id;/* from KFD */
+ uint64_t lds_base; /* from KFD */
+ uint64_t lds_limit; /* from KFD */
+ uint64_t scratch_base; /* from KFD */
+ uint64_t scratch_limit; /* from KFD */
+ uint64_t gpuvm_base; /* from KFD */
+ uint64_t gpuvm_limit; /* from KFD */
+ uint32_t gpu_id; /* from KFD */
};
struct kfd_ioctl_get_process_apertures_args {
@@ -104,24 +107,24 @@ struct kfd_ioctl_get_process_apertures_args {
};
struct kfd_ioctl_pmc_acquire_access_args {
- uint32_t gpu_id; /* to KFD */
- uint64_t trace_id; /* to KFD */
+ uint64_t trace_id; /* to KFD */
+ uint32_t gpu_id; /* to KFD */
};
struct kfd_ioctl_pmc_release_access_args {
- uint32_t gpu_id; /* to KFD */
- uint64_t trace_id; /* to KFD */
+ uint64_t trace_id; /* to KFD */
+ uint32_t gpu_id; /* to KFD */
};
#define KFD_IOC_MAGIC 'K'
-#define KFD_IOC_GET_VERSION _IOR(KFD_IOC_MAGIC, 1, struct kfd_ioctl_get_version_args)
-#define KFD_IOC_CREATE_QUEUE _IOWR(KFD_IOC_MAGIC, 2, struct kfd_ioctl_create_queue_args)
-#define KFD_IOC_DESTROY_QUEUE _IOWR(KFD_IOC_MAGIC, 3, struct kfd_ioctl_destroy_queue_args)
+#define KFD_IOC_GET_VERSION _IOR(KFD_IOC_MAGIC, 1, struct kfd_ioctl_get_version_args)
+#define KFD_IOC_CREATE_QUEUE _IOWR(KFD_IOC_MAGIC, 2, struct kfd_ioctl_create_queue_args)
+#define KFD_IOC_DESTROY_QUEUE _IOWR(KFD_IOC_MAGIC, 3, struct kfd_ioctl_destroy_queue_args)
#define KFD_IOC_SET_MEMORY_POLICY _IOW(KFD_IOC_MAGIC, 4, struct kfd_ioctl_set_memory_policy_args)
#define KFD_IOC_GET_CLOCK_COUNTERS _IOWR(KFD_IOC_MAGIC, 5, struct kfd_ioctl_get_clock_counters_args)
-#define KFD_IOC_GET_PROCESS_APERTURES _IOR(KFD_IOC_MAGIC, 6, struct kfd_ioctl_get_process_apertures_args)
-#define KFD_IOC_UPDATE_QUEUE _IOW(KFD_IOC_MAGIC, 7, struct kfd_ioctl_update_queue_args)
+#define KFD_IOC_GET_PROCESS_APERTURES _IOR(KFD_IOC_MAGIC, 6, struct kfd_ioctl_get_process_apertures_args)
+#define KFD_IOC_UPDATE_QUEUE _IOW(KFD_IOC_MAGIC, 7, struct kfd_ioctl_update_queue_args)
#define KFD_IOC_PMC_ACQUIRE_ACCESS _IOW(KFD_IOC_MAGIC, 12, struct kfd_ioctl_pmc_acquire_access_args)
#define KFD_IOC_PMC_RELEASE_ACCESS _IOW(KFD_IOC_MAGIC, 13, struct kfd_ioctl_pmc_release_access_args)
--
1.9.1
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/hsa/radeon/kfd_topology.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/hsa/radeon/kfd_topology.c b/drivers/gpu/hsa/radeon/kfd_topology.c
index 213ae7b..059b7db 100644
--- a/drivers/gpu/hsa/radeon/kfd_topology.c
+++ b/drivers/gpu/hsa/radeon/kfd_topology.c
@@ -1121,7 +1121,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
gpu_id = kfd_generate_gpu_id(gpu);
- pr_info("Adding new GPU (ID: 0x%x) to topology\n", gpu_id);
+ pr_debug("kfd: Adding new GPU (ID: 0x%x) to topology\n", gpu_id);
down_write(&topology_lock);
/*
--
1.9.1
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/hsa/radeon/kfd_device_queue_manager.h | 6 +++---
drivers/gpu/hsa/radeon/kfd_hw_pointer_store.h | 6 +++---
drivers/gpu/hsa/radeon/kfd_kernel_queue.h | 6 +++---
drivers/gpu/hsa/radeon/kfd_module.c | 8 ++++----
drivers/gpu/hsa/radeon/kfd_mqd_manager.h | 6 +++---
drivers/gpu/hsa/radeon/kfd_pm4_headers.h | 11 ++++++-----
drivers/gpu/hsa/radeon/kfd_pm4_opcodes.h | 6 +++---
7 files changed, 25 insertions(+), 24 deletions(-)
diff --git a/drivers/gpu/hsa/radeon/kfd_device_queue_manager.h b/drivers/gpu/hsa/radeon/kfd_device_queue_manager.h
index fe9ef10..57dc636 100644
--- a/drivers/gpu/hsa/radeon/kfd_device_queue_manager.h
+++ b/drivers/gpu/hsa/radeon/kfd_device_queue_manager.h
@@ -21,8 +21,8 @@
*
*/
-#ifndef DEVICE_QUEUE_MANAGER_H_
-#define DEVICE_QUEUE_MANAGER_H_
+#ifndef KFD_DEVICE_QUEUE_MANAGER_H_
+#define KFD_DEVICE_QUEUE_MANAGER_H_
#include <linux/rwsem.h>
#include <linux/list.h>
@@ -98,4 +98,4 @@ struct device_queue_manager {
-#endif /* DEVICE_QUEUE_MANAGER_H_ */
+#endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */
diff --git a/drivers/gpu/hsa/radeon/kfd_hw_pointer_store.h b/drivers/gpu/hsa/radeon/kfd_hw_pointer_store.h
index f384b7f..642703f 100644
--- a/drivers/gpu/hsa/radeon/kfd_hw_pointer_store.h
+++ b/drivers/gpu/hsa/radeon/kfd_hw_pointer_store.h
@@ -21,8 +21,8 @@
*
*/
-#ifndef HW_POINTER_STORE_H_
-#define HW_POINTER_STORE_H_
+#ifndef KFD_HW_POINTER_STORE_H_
+#define KFD_HW_POINTER_STORE_H_
#include <linux/mutex.h>
@@ -61,4 +61,4 @@ radeon_kfd_hw_pointer_store_mmap(struct hw_pointer_store_properties *ptr,
struct vm_area_struct *vma);
-#endif /* HW_POINTER_STORE_H_ */
+#endif /* KFD_HW_POINTER_STORE_H_ */
diff --git a/drivers/gpu/hsa/radeon/kfd_kernel_queue.h b/drivers/gpu/hsa/radeon/kfd_kernel_queue.h
index 963e861..abfb9c8 100644
--- a/drivers/gpu/hsa/radeon/kfd_kernel_queue.h
+++ b/drivers/gpu/hsa/radeon/kfd_kernel_queue.h
@@ -21,8 +21,8 @@
*
*/
-#ifndef KERNEL_QUEUE_H_
-#define KERNEL_QUEUE_H_
+#ifndef KFD_KERNEL_QUEUE_H_
+#define KFD_KERNEL_QUEUE_H_
#include <linux/list.h>
#include <linux/types.h>
@@ -63,4 +63,4 @@ struct kernel_queue {
struct list_head list;
};
-#endif /* KERNEL_QUEUE_H_ */
+#endif /* KFD_KERNEL_QUEUE_H_ */
diff --git a/drivers/gpu/hsa/radeon/kfd_module.c b/drivers/gpu/hsa/radeon/kfd_module.c
index e8bb67c..85069c5 100644
--- a/drivers/gpu/hsa/radeon/kfd_module.c
+++ b/drivers/gpu/hsa/radeon/kfd_module.c
@@ -24,7 +24,7 @@
#include <linux/sched.h>
#include <linux/notifier.h>
#include <linux/moduleparam.h>
-
+#include <linux/device.h>
#include "kfd_priv.h"
#define DRIVER_AUTHOR "Andrew Lewycky, Oded Gabbay, Evgeny Pinchuk, others."
@@ -46,7 +46,7 @@ static const struct kgd2kfd_calls kgd2kfd = {
int sched_policy = KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION;
module_param(sched_policy, int, S_IRUSR | S_IWUSR);
-MODULE_PARM_DESC(sched_policy, "Kernel comline parameter define the kfd scheduling policy");
+MODULE_PARM_DESC(sched_policy, "Kernel cmdline parameter define the kfd scheduling policy");
bool kgd2kfd_init(unsigned interface_version,
const struct kfd2kgd_calls *f2g,
@@ -95,7 +95,7 @@ static int __init kfd_module_init(void)
if (err < 0)
goto err_topology;
- pr_info("[hsa] Initialized kfd module");
+ dev_info(kfd_device, "Initialized module\n");
return 0;
err_topology:
@@ -114,7 +114,7 @@ static void __exit kfd_module_exit(void)
mmput_unregister_notifier(&kfd_mmput_nb);
radeon_kfd_chardev_exit();
radeon_kfd_pasid_exit();
- pr_info("[hsa] Removed kfd module");
+ dev_info(kfd_device, "Removed module\n");
}
module_init(kfd_module_init);
diff --git a/drivers/gpu/hsa/radeon/kfd_mqd_manager.h b/drivers/gpu/hsa/radeon/kfd_mqd_manager.h
index 8e7a5fd..314d490 100644
--- a/drivers/gpu/hsa/radeon/kfd_mqd_manager.h
+++ b/drivers/gpu/hsa/radeon/kfd_mqd_manager.h
@@ -21,8 +21,8 @@
*
*/
-#ifndef MQD_MANAGER_H_
-#define MQD_MANAGER_H_
+#ifndef KFD_MQD_MANAGER_H_
+#define KFD_MQD_MANAGER_H_
#include "kfd_priv.h"
@@ -44,4 +44,4 @@ struct mqd_manager {
};
-#endif /* MQD_MANAGER_H_ */
+#endif /* KFD_MQD_MANAGER_H_ */
diff --git a/drivers/gpu/hsa/radeon/kfd_pm4_headers.h b/drivers/gpu/hsa/radeon/kfd_pm4_headers.h
index dae460f..3ffb3f4 100644
--- a/drivers/gpu/hsa/radeon/kfd_pm4_headers.h
+++ b/drivers/gpu/hsa/radeon/kfd_pm4_headers.h
@@ -21,8 +21,8 @@
*
*/
-#ifndef F32_MES_PM4_PACKETS_72_H
-#define F32_MES_PM4_PACKETS_72_H
+#ifndef KFD_PM4_HEADERS_H_
+#define KFD_PM4_HEADERS_H_
#ifndef PM4_HEADER_DEFINED
#define PM4_HEADER_DEFINED
@@ -657,7 +657,7 @@ typedef struct _PM4__SET_SH_REG {
#ifndef _PM4__SET_CONFIG_REG_DEFINED
#define _PM4__SET_CONFIG_REG_DEFINED
-typedef struct _PM4__SET_CONFIG_REG {
+struct pm4__set_config_reg {
union {
PM4_TYPE_3_HEADER header;
unsigned int ordinal1;
@@ -676,6 +676,7 @@ typedef struct _PM4__SET_CONFIG_REG {
unsigned int reg_data[1]; /* 1..N of these fields */
-} PM4_SET_CONFIG_REG, *PPM4_SET_CONFIG_REG;
-#endif
+};
#endif
+
+#endif /* KFD_PM4_HEADERS_H_ */
diff --git a/drivers/gpu/hsa/radeon/kfd_pm4_opcodes.h b/drivers/gpu/hsa/radeon/kfd_pm4_opcodes.h
index c04060c..b72fa3b 100644
--- a/drivers/gpu/hsa/radeon/kfd_pm4_opcodes.h
+++ b/drivers/gpu/hsa/radeon/kfd_pm4_opcodes.h
@@ -22,8 +22,8 @@
*/
-#ifndef PM4_IT_OPCODES_H
-#define PM4_IT_OPCODES_H
+#ifndef KFD_PM4_OPCODES_H
+#define KFD_PM4_OPCODES_H
enum it_opcode_type {
IT_NOP = 0x10,
@@ -103,5 +103,5 @@ enum it_opcode_type {
#define PM4_TYPE_2 2
#define PM4_TYPE_3 3
-#endif /* PM4_IT_OPCODES_H */
+#endif /* KFD_PM4_OPCODES_H */
--
1.9.1
From: Ben Goz <[email protected]>
This patch adds a new IOCTL that enables the user to perform update to an HSA
queue.
Signed-off-by: Ben Goz <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/hsa/radeon/cik_mqds.h | 1 -
drivers/gpu/hsa/radeon/kfd_chardev.c | 29 ++++++++++++++++++++++
drivers/gpu/hsa/radeon/kfd_device_queue_manager.c | 1 -
drivers/gpu/hsa/radeon/kfd_device_queue_manager.h | 1 -
drivers/gpu/hsa/radeon/kfd_hw_pointer_store.c | 1 -
drivers/gpu/hsa/radeon/kfd_hw_pointer_store.h | 1 -
drivers/gpu/hsa/radeon/kfd_kernel_queue.c | 1 -
drivers/gpu/hsa/radeon/kfd_kernel_queue.h | 1 -
drivers/gpu/hsa/radeon/kfd_mqd_manager.c | 1 -
drivers/gpu/hsa/radeon/kfd_mqd_manager.h | 1 -
drivers/gpu/hsa/radeon/kfd_packet_manager.c | 23 ++++++++++++++---
drivers/gpu/hsa/radeon/kfd_process_queue_manager.c | 1 -
drivers/gpu/hsa/radeon/kfd_queue.c | 1 -
include/uapi/linux/kfd_ioctl.h | 9 +++++++
14 files changed, 58 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/hsa/radeon/cik_mqds.h b/drivers/gpu/hsa/radeon/cik_mqds.h
index 58945c8..35a35b4 100644
--- a/drivers/gpu/hsa/radeon/cik_mqds.h
+++ b/drivers/gpu/hsa/radeon/cik_mqds.h
@@ -19,7 +19,6 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
- * Author: Ben Goz
*/
#ifndef CIK_MQDS_H_
diff --git a/drivers/gpu/hsa/radeon/kfd_chardev.c b/drivers/gpu/hsa/radeon/kfd_chardev.c
index bb2ef02..9a77332 100644
--- a/drivers/gpu/hsa/radeon/kfd_chardev.c
+++ b/drivers/gpu/hsa/radeon/kfd_chardev.c
@@ -230,6 +230,31 @@ kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p, void __user *a
return retval;
}
+static int
+kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p, void __user *arg)
+{
+ int retval;
+ struct kfd_ioctl_update_queue_args args;
+ struct queue_properties properties;
+
+ if (copy_from_user(&args, arg, sizeof(args)))
+ return -EFAULT;
+
+ properties.queue_address = args.ring_base_address;
+ properties.queue_size = args.ring_size;
+ properties.queue_percent = args.queue_percentage;
+ properties.priority = args.queue_priority;
+
+ pr_debug("kfd: updating queue id %d for PASID %d\n", args.queue_id, p->pasid);
+
+ mutex_lock(&p->mutex);
+
+ retval = pqm_update_queue(&p->pqm, args.queue_id, &properties);
+
+ mutex_unlock(&p->mutex);
+
+ return retval;
+}
static long
kfd_ioctl_set_memory_policy(struct file *filep, struct kfd_process *p, void __user *arg)
@@ -398,6 +423,10 @@ kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
err = kfd_ioctl_get_process_apertures(filep, process, (void __user *)arg);
break;
+ case KFD_IOC_UPDATE_QUEUE:
+ err = kfd_ioctl_update_queue(filep, process, (void __user *)arg);
+ break;
+
default:
dev_err(kfd_device,
"unknown ioctl cmd 0x%x, arg 0x%lx)\n",
diff --git a/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c b/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c
index 9e21074..c2d91c9 100644
--- a/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c
+++ b/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c
@@ -19,7 +19,6 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
- * Author: Ben Goz
*/
#include <linux/slab.h>
diff --git a/drivers/gpu/hsa/radeon/kfd_device_queue_manager.h b/drivers/gpu/hsa/radeon/kfd_device_queue_manager.h
index 0529a96..fe9ef10 100644
--- a/drivers/gpu/hsa/radeon/kfd_device_queue_manager.h
+++ b/drivers/gpu/hsa/radeon/kfd_device_queue_manager.h
@@ -19,7 +19,6 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
- * Author: Ben Goz
*/
#ifndef DEVICE_QUEUE_MANAGER_H_
diff --git a/drivers/gpu/hsa/radeon/kfd_hw_pointer_store.c b/drivers/gpu/hsa/radeon/kfd_hw_pointer_store.c
index 1372fb2..4e71f7d 100644
--- a/drivers/gpu/hsa/radeon/kfd_hw_pointer_store.c
+++ b/drivers/gpu/hsa/radeon/kfd_hw_pointer_store.c
@@ -19,7 +19,6 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
- * Author: Ben Goz
*/
#include <linux/types.h>
diff --git a/drivers/gpu/hsa/radeon/kfd_hw_pointer_store.h b/drivers/gpu/hsa/radeon/kfd_hw_pointer_store.h
index be1d6cb..f384b7f 100644
--- a/drivers/gpu/hsa/radeon/kfd_hw_pointer_store.h
+++ b/drivers/gpu/hsa/radeon/kfd_hw_pointer_store.h
@@ -19,7 +19,6 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
- * Author: Ben Goz
*/
#ifndef HW_POINTER_STORE_H_
diff --git a/drivers/gpu/hsa/radeon/kfd_kernel_queue.c b/drivers/gpu/hsa/radeon/kfd_kernel_queue.c
index 61f420f..aa64693e 100644
--- a/drivers/gpu/hsa/radeon/kfd_kernel_queue.c
+++ b/drivers/gpu/hsa/radeon/kfd_kernel_queue.c
@@ -19,7 +19,6 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
- * Author: Ben Goz
*/
#include <linux/types.h>
diff --git a/drivers/gpu/hsa/radeon/kfd_kernel_queue.h b/drivers/gpu/hsa/radeon/kfd_kernel_queue.h
index 339376c..963e861 100644
--- a/drivers/gpu/hsa/radeon/kfd_kernel_queue.h
+++ b/drivers/gpu/hsa/radeon/kfd_kernel_queue.h
@@ -19,7 +19,6 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
- * Author: Ben Goz
*/
#ifndef KERNEL_QUEUE_H_
diff --git a/drivers/gpu/hsa/radeon/kfd_mqd_manager.c b/drivers/gpu/hsa/radeon/kfd_mqd_manager.c
index 14b248f..a3e9f7c 100644
--- a/drivers/gpu/hsa/radeon/kfd_mqd_manager.c
+++ b/drivers/gpu/hsa/radeon/kfd_mqd_manager.c
@@ -19,7 +19,6 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
- * Author: Ben Goz
*/
#include <linux/printk.h>
diff --git a/drivers/gpu/hsa/radeon/kfd_mqd_manager.h b/drivers/gpu/hsa/radeon/kfd_mqd_manager.h
index e7b39ee..8e7a5fd 100644
--- a/drivers/gpu/hsa/radeon/kfd_mqd_manager.h
+++ b/drivers/gpu/hsa/radeon/kfd_mqd_manager.h
@@ -19,7 +19,6 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
- * Author: Ben Goz
*/
#ifndef MQD_MANAGER_H_
diff --git a/drivers/gpu/hsa/radeon/kfd_packet_manager.c b/drivers/gpu/hsa/radeon/kfd_packet_manager.c
index 4967b7c..3fc8c34 100644
--- a/drivers/gpu/hsa/radeon/kfd_packet_manager.c
+++ b/drivers/gpu/hsa/radeon/kfd_packet_manager.c
@@ -1,9 +1,26 @@
/*
- * packet_manager.c
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
*
- * Created on: Mar 16, 2014
- * Author: ben
*/
+
#include <linux/slab.h>
#include <linux/mutex.h>
#include "kfd_device_queue_manager.h"
diff --git a/drivers/gpu/hsa/radeon/kfd_process_queue_manager.c b/drivers/gpu/hsa/radeon/kfd_process_queue_manager.c
index 6e38ca4..fe74dd7 100644
--- a/drivers/gpu/hsa/radeon/kfd_process_queue_manager.c
+++ b/drivers/gpu/hsa/radeon/kfd_process_queue_manager.c
@@ -19,7 +19,6 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
- * Author: Ben Goz
*/
#include <linux/slab.h>
diff --git a/drivers/gpu/hsa/radeon/kfd_queue.c b/drivers/gpu/hsa/radeon/kfd_queue.c
index 78fe180..2d22cc1 100644
--- a/drivers/gpu/hsa/radeon/kfd_queue.c
+++ b/drivers/gpu/hsa/radeon/kfd_queue.c
@@ -19,7 +19,6 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
- * Author: Ben Goz
*/
#include <linux/slab.h>
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 5134880..d58231d 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -58,6 +58,14 @@ struct kfd_ioctl_destroy_queue_args {
uint32_t queue_id; /* to KFD */
};
+struct kfd_ioctl_update_queue_args {
+ uint32_t queue_id; /* to KFD */
+ uint64_t ring_base_address; /* to KFD */
+ uint32_t ring_size; /* to KFD */
+ uint32_t queue_percentage; /* to KFD */
+ uint32_t queue_priority; /* to KFD */
+};
+
/* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */
#define KFD_IOC_CACHE_POLICY_COHERENT 0
#define KFD_IOC_CACHE_POLICY_NONCOHERENT 1
@@ -103,6 +111,7 @@ struct kfd_ioctl_get_process_apertures_args {
#define KFD_IOC_SET_MEMORY_POLICY _IOW(KFD_IOC_MAGIC, 4, struct kfd_ioctl_set_memory_policy_args)
#define KFD_IOC_GET_CLOCK_COUNTERS _IOWR(KFD_IOC_MAGIC, 5, struct kfd_ioctl_get_clock_counters_args)
#define KFD_IOC_GET_PROCESS_APERTURES _IOR(KFD_IOC_MAGIC, 6, struct kfd_ioctl_get_process_apertures_args)
+#define KFD_IOC_UPDATE_QUEUE _IOW(KFD_IOC_MAGIC, 7, struct kfd_ioctl_update_queue_args)
#pragma pack(pop)
--
1.9.1
From: Ben Goz <[email protected]>
This patch makes the switch between the old KFD queue scheduler to the new KFD
queue scheduler. The new scheduler supports H/W CP scheduling, over-subscription
of queues and pre-emption of queues.
Signed-off-by: Ben Goz <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/hsa/radeon/kfd_aperture.c | 1 -
drivers/gpu/hsa/radeon/kfd_chardev.c | 107 +++++++++++++++------------------
drivers/gpu/hsa/radeon/kfd_device.c | 31 ++++++----
drivers/gpu/hsa/radeon/kfd_interrupt.c | 4 +-
drivers/gpu/hsa/radeon/kfd_priv.h | 2 +
drivers/gpu/hsa/radeon/kfd_process.c | 56 ++++-------------
include/uapi/linux/kfd_ioctl.h | 4 +-
7 files changed, 88 insertions(+), 117 deletions(-)
diff --git a/drivers/gpu/hsa/radeon/kfd_aperture.c b/drivers/gpu/hsa/radeon/kfd_aperture.c
index 9e2d6da..2c72b21 100644
--- a/drivers/gpu/hsa/radeon/kfd_aperture.c
+++ b/drivers/gpu/hsa/radeon/kfd_aperture.c
@@ -32,7 +32,6 @@
#include <uapi/linux/kfd_ioctl.h>
#include <linux/time.h>
#include "kfd_priv.h"
-#include "kfd_scheduler.h"
#include <linux/mm.h>
#include <uapi/asm-generic/mman-common.h>
#include <asm/processor.h>
diff --git a/drivers/gpu/hsa/radeon/kfd_chardev.c b/drivers/gpu/hsa/radeon/kfd_chardev.c
index 07cac88..bb2ef02 100644
--- a/drivers/gpu/hsa/radeon/kfd_chardev.c
+++ b/drivers/gpu/hsa/radeon/kfd_chardev.c
@@ -31,10 +31,11 @@
#include <uapi/linux/kfd_ioctl.h>
#include <linux/time.h>
#include "kfd_priv.h"
-#include "kfd_scheduler.h"
#include <linux/mm.h>
#include <uapi/asm-generic/mman-common.h>
#include <asm/processor.h>
+#include "kfd_hw_pointer_store.h"
+#include "kfd_device_queue_manager.h"
static long kfd_ioctl(struct file *, unsigned int, unsigned long);
static int kfd_open(struct inode *, struct file *);
@@ -128,24 +129,36 @@ kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, void __user *a
struct kfd_dev *dev;
int err = 0;
unsigned int queue_id;
- struct kfd_queue *queue;
struct kfd_process_device *pdd;
+ struct queue_properties q_properties;
+
+ memset(&q_properties, 0, sizeof(struct queue_properties));
if (copy_from_user(&args, arg, sizeof(args)))
return -EFAULT;
- dev = radeon_kfd_device_by_id(args.gpu_id);
- if (dev == NULL)
- return -EINVAL;
+ /* need to validate parameters */
+
+ q_properties.is_interop = false;
+ q_properties.queue_percent = args.queue_percentage;
+ q_properties.priority = args.queue_priority;
+ q_properties.queue_address = args.ring_base_address;
+ q_properties.queue_size = args.ring_size;
- queue = kzalloc(
- offsetof(struct kfd_queue, scheduler_queue) + dev->device_info->scheduler_class->queue_size,
- GFP_KERNEL);
- if (!queue)
- return -ENOMEM;
+ pr_debug("%s Arguments: Queue Percentage (%d, %d)\n"
+ "Queue Priority (%d, %d)\n"
+ "Queue Address (0x%llX, 0x%llX)\n"
+ "Queue Size (%u64, %ll)\n",
+ __func__,
+ q_properties.queue_percent, args.queue_percentage,
+ q_properties.priority, args.queue_priority,
+ q_properties.queue_address, args.ring_base_address,
+ q_properties.queue_size, args.ring_size);
- queue->dev = dev;
+ dev = radeon_kfd_device_by_id(args.gpu_id);
+ if (dev == NULL)
+ return -EINVAL;
mutex_lock(&p->mutex);
@@ -159,23 +172,14 @@ kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, void __user *a
p->pasid,
dev->id);
- if (!radeon_kfd_allocate_queue_id(p, &queue_id))
- goto err_allocate_queue_id;
-
- err = dev->device_info->scheduler_class->create_queue(dev->scheduler, pdd->scheduler_process,
- &queue->scheduler_queue,
- (void __user *)args.ring_base_address,
- args.ring_size,
- (void __user *)args.read_pointer_address,
- (void __user *)args.write_pointer_address,
- radeon_kfd_queue_id_to_doorbell(dev, p, queue_id));
- if (err)
+ err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, 0, KFD_QUEUE_TYPE_COMPUTE, &queue_id);
+ if (err != 0)
goto err_create_queue;
- radeon_kfd_install_queue(p, queue_id, queue);
-
args.queue_id = queue_id;
- args.doorbell_address = (uint64_t)(uintptr_t)radeon_kfd_get_doorbell(filep, p, dev, queue_id);
+ args.read_pointer_address = (uint64_t)q_properties.read_ptr;
+ args.write_pointer_address = (uint64_t)q_properties.write_ptr;
+ args.doorbell_address = (uint64_t)q_properties.doorbell_ptr;
if (copy_to_user(arg, &args, sizeof(args))) {
err = -EFAULT;
@@ -198,12 +202,9 @@ kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, void __user *a
return 0;
err_copy_args_out:
- dev->device_info->scheduler_class->destroy_queue(dev->scheduler, &queue->scheduler_queue);
+ pqm_destroy_queue(&p->pqm, queue_id);
err_create_queue:
- radeon_kfd_remove_queue(p, queue_id);
-err_allocate_queue_id:
err_bind_process:
- kfree(queue);
mutex_unlock(&p->mutex);
return err;
}
@@ -211,36 +212,25 @@ err_bind_process:
static int
kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p, void __user *arg)
{
+ int retval;
struct kfd_ioctl_destroy_queue_args args;
- struct kfd_queue *queue;
- struct kfd_dev *dev;
if (copy_from_user(&args, arg, sizeof(args)))
return -EFAULT;
- mutex_lock(&p->mutex);
-
- queue = radeon_kfd_get_queue(p, args.queue_id);
- if (!queue) {
- mutex_unlock(&p->mutex);
- return -EINVAL;
- }
-
- dev = queue->dev;
-
pr_debug("kfd: destroying queue id %d for PASID %d\n",
- args.queue_id,
- p->pasid);
+ args.queue_id,
+ p->pasid);
- radeon_kfd_remove_queue(p, args.queue_id);
- dev->device_info->scheduler_class->destroy_queue(dev->scheduler, &queue->scheduler_queue);
+ mutex_lock(&p->mutex);
- kfree(queue);
+ retval = pqm_destroy_queue(&p->pqm, args.queue_id);
mutex_unlock(&p->mutex);
- return 0;
+ return retval;
}
+
static long
kfd_ioctl_set_memory_policy(struct file *filep, struct kfd_process *p, void __user *arg)
{
@@ -281,12 +271,12 @@ kfd_ioctl_set_memory_policy(struct file *filep, struct kfd_process *p, void __us
alternate_policy = (args.alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT)
? cache_policy_coherent : cache_policy_noncoherent;
- if (!dev->device_info->scheduler_class->set_cache_policy(dev->scheduler,
- pdd->scheduler_process,
- default_policy,
- alternate_policy,
- (void __user *)args.alternate_aperture_base,
- args.alternate_aperture_size))
+ if (!dev->dqm->set_cache_memory_policy(dev->dqm,
+ &pdd->qpd,
+ default_policy,
+ alternate_policy,
+ (void __user *)args.alternate_aperture_base,
+ args.alternate_aperture_size))
err = -EINVAL;
out:
@@ -432,11 +422,14 @@ kfd_mmap(struct file *filp, struct vm_area_struct *vma)
if (IS_ERR(process))
return PTR_ERR(process);
- if (pgoff < KFD_MMAP_DOORBELL_START)
- return -EINVAL;
-
- if (pgoff < KFD_MMAP_DOORBELL_END)
+ if (pgoff >= KFD_MMAP_DOORBELL_START && pgoff < KFD_MMAP_DOORBELL_END)
return radeon_kfd_doorbell_mmap(process, vma);
+ if (pgoff >= KFD_MMAP_RPTR_START && pgoff < KFD_MMAP_RPTR_END)
+ return radeon_kfd_hw_pointer_store_mmap(&process->read_ptr, vma);
+
+ if (pgoff >= KFD_MMAP_WPTR_START && pgoff < KFD_MMAP_WPTR_END)
+ return radeon_kfd_hw_pointer_store_mmap(&process->write_ptr, vma);
+
return -EINVAL;
}
diff --git a/drivers/gpu/hsa/radeon/kfd_device.c b/drivers/gpu/hsa/radeon/kfd_device.c
index 82febf4..c602e16 100644
--- a/drivers/gpu/hsa/radeon/kfd_device.c
+++ b/drivers/gpu/hsa/radeon/kfd_device.c
@@ -25,10 +25,9 @@
#include <linux/pci.h>
#include <linux/slab.h>
#include "kfd_priv.h"
-#include "kfd_scheduler.h"
+#include "kfd_device_queue_manager.h"
static const struct kfd_device_info kaveri_device_info = {
- .scheduler_class = &radeon_kfd_cik_static_scheduler_class,
.max_pasid_bits = 16,
.ih_ring_entry_size = 4 * sizeof(uint32_t)
};
@@ -121,7 +120,11 @@ device_iommu_pasid_init(struct kfd_dev *kfd)
}
pasid_limit = min_t(pasid_t, (pasid_t)1 << kfd->device_info->max_pasid_bits, iommu_info.max_pasids);
- pasid_limit = min_t(pasid_t, pasid_limit, kfd->doorbell_process_limit);
+ /*
+ * last pasid is used for kernel queues doorbells
+ * in the future the last pasid might be used for a kernel thread.
+ */
+ pasid_limit = min_t(pasid_t, pasid_limit, kfd->doorbell_process_limit - 1);
err = amd_iommu_init_device(kfd->pdev, pasid_limit);
if (err < 0) {
@@ -168,17 +171,26 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
amd_iommu_set_invalidate_ctx_cb(kfd->pdev, iommu_pasid_shutdown_callback);
- if (kfd->device_info->scheduler_class->create(kfd, &kfd->scheduler)) {
+ kfd->dqm = device_queue_manager_init(kfd);
+ if (!kfd->dqm) {
+ kfd_topology_remove_device(kfd);
amd_iommu_free_device(kfd->pdev);
return false;
}
- kfd->device_info->scheduler_class->start(kfd->scheduler);
+ if (kfd->dqm->start(kfd->dqm) != 0) {
+ device_queue_manager_uninit(kfd->dqm);
+ kfd_topology_remove_device(kfd);
+ amd_iommu_free_device(kfd->pdev);
+ return false;
+ }
kfd->init_complete = true;
dev_info(kfd_device, "added device (%x:%x)\n", kfd->pdev->vendor,
kfd->pdev->device);
+ pr_debug("kfd: Starting kfd with the following scheduling policy %d\n", sched_policy);
+
return true;
}
@@ -188,13 +200,10 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
BUG_ON(err != 0);
- if (kfd->init_complete)
- kfd->device_info->scheduler_class->stop(kfd->scheduler);
-
radeon_kfd_interrupt_exit(kfd);
if (kfd->init_complete) {
- kfd->device_info->scheduler_class->destroy(kfd->scheduler);
+ device_queue_manager_uninit(kfd->dqm);
amd_iommu_free_device(kfd->pdev);
}
@@ -206,7 +215,7 @@ void kgd2kfd_suspend(struct kfd_dev *kfd)
BUG_ON(kfd == NULL);
if (kfd->init_complete) {
- kfd->device_info->scheduler_class->stop(kfd->scheduler);
+ kfd->dqm->stop(kfd->dqm);
amd_iommu_free_device(kfd->pdev);
}
}
@@ -225,7 +234,7 @@ int kgd2kfd_resume(struct kfd_dev *kfd)
if (err < 0)
return -ENXIO;
amd_iommu_set_invalidate_ctx_cb(kfd->pdev, iommu_pasid_shutdown_callback);
- kfd->device_info->scheduler_class->start(kfd->scheduler);
+ kfd->dqm->start(kfd->dqm);
}
return 0;
diff --git a/drivers/gpu/hsa/radeon/kfd_interrupt.c b/drivers/gpu/hsa/radeon/kfd_interrupt.c
index 2179780..1c9ad46 100644
--- a/drivers/gpu/hsa/radeon/kfd_interrupt.c
+++ b/drivers/gpu/hsa/radeon/kfd_interrupt.c
@@ -43,7 +43,6 @@
#include <linux/slab.h>
#include <linux/device.h>
#include "kfd_priv.h"
-#include "kfd_scheduler.h"
#define KFD_INTERRUPT_RING_SIZE 256
@@ -162,7 +161,7 @@ static void interrupt_wq(struct work_struct *work)
uint32_t ih_ring_entry[DIV_ROUND_UP(dev->device_info->ih_ring_entry_size, sizeof(uint32_t))];
while (dequeue_ih_ring_entry(dev, ih_ring_entry))
- dev->device_info->scheduler_class->interrupt_wq(dev->scheduler, ih_ring_entry);
+ ;
}
/* This is called directly from KGD at ISR. */
@@ -171,7 +170,6 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
spin_lock(&kfd->interrupt_lock);
if (kfd->interrupts_active
- && kfd->device_info->scheduler_class->interrupt_isr(kfd->scheduler, ih_ring_entry)
&& enqueue_ih_ring_entry(kfd, ih_ring_entry))
schedule_work(&kfd->interrupt_work);
diff --git a/drivers/gpu/hsa/radeon/kfd_priv.h b/drivers/gpu/hsa/radeon/kfd_priv.h
index 0af4c71..049671b 100644
--- a/drivers/gpu/hsa/radeon/kfd_priv.h
+++ b/drivers/gpu/hsa/radeon/kfd_priv.h
@@ -441,6 +441,8 @@ void device_queue_manager_uninit(struct device_queue_manager *dqm);
struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, enum kfd_queue_type type);
void kernel_queue_uninit(struct kernel_queue *kq);
+int get_vmid_from_pasid(struct kfd_dev *dev, pasid_t pasid , unsigned int *vmid);
+
/* Process Queue Manager */
struct process_queue_node {
struct queue *q;
diff --git a/drivers/gpu/hsa/radeon/kfd_process.c b/drivers/gpu/hsa/radeon/kfd_process.c
index 80136e6..f967c15 100644
--- a/drivers/gpu/hsa/radeon/kfd_process.c
+++ b/drivers/gpu/hsa/radeon/kfd_process.c
@@ -29,7 +29,6 @@
struct mm_struct;
#include "kfd_priv.h"
-#include "kfd_scheduler.h"
/* Initial size for the array of queues.
* The allocated size is doubled each time it is exceeded up to MAX_PROCESS_QUEUES. */
@@ -91,52 +90,15 @@ radeon_kfd_get_process(const struct task_struct *thread)
return process;
}
-/* Assumes that the kfd_process mutex is held.
- * (Or that it doesn't need to be held because the process is exiting.)
- *
- * dev_filter can be set to only destroy queues for one device.
- * Otherwise all queues for the process are destroyed.
- */
-static void
-destroy_queues(struct kfd_process *p, struct kfd_dev *dev_filter)
-{
- unsigned long queue_id;
-
- for_each_set_bit(queue_id, p->allocated_queue_bitmap, MAX_PROCESS_QUEUES) {
-
- struct kfd_queue *queue = radeon_kfd_get_queue(p, queue_id);
- struct kfd_dev *dev;
-
- BUG_ON(queue == NULL);
-
- dev = queue->dev;
-
- if (!dev_filter || dev == dev_filter) {
- struct kfd_process_device *pdd = radeon_kfd_get_process_device_data(dev, p);
-
- BUG_ON(pdd == NULL); /* A queue exists so pdd must. */
-
- radeon_kfd_remove_queue(p, queue_id);
- dev->device_info->scheduler_class->destroy_queue(dev->scheduler, &queue->scheduler_queue);
-
- kfree(queue);
- }
- }
-}
-
static void free_process(struct kfd_process *p)
{
struct kfd_process_device *pdd, *temp;
BUG_ON(p == NULL);
- destroy_queues(p, NULL);
-
/* doorbell mappings: automatic */
list_for_each_entry_safe(pdd, temp, &p->per_device_data, per_device_list) {
- pdd->dev->device_info->scheduler_class->deregister_process(pdd->dev->scheduler, pdd->scheduler_process);
- pdd->scheduler_process = NULL;
amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid);
list_del(&pdd->per_device_list);
kfree(pdd);
@@ -202,8 +164,17 @@ static struct kfd_process *create_process(const struct task_struct *thread)
INIT_LIST_HEAD(&process->per_device_data);
+ process->read_ptr.page_mapping = process->write_ptr.page_mapping = NULL;
+ err = pqm_init(&process->pqm, process);
+ if (err != 0)
+ goto err_process_pqm_init;
+
return process;
+err_process_pqm_init:
+ radeon_kfd_pasid_free(process->pasid);
+ list_del(&process->processes_list);
+ thread->mm->kfd_process = NULL;
err_alloc:
kfree(process->queues);
kfree(process);
@@ -222,6 +193,9 @@ radeon_kfd_get_process_device_data(struct kfd_dev *dev, struct kfd_process *p)
pdd = kzalloc(sizeof(*pdd), GFP_KERNEL);
if (pdd != NULL) {
pdd->dev = dev;
+ INIT_LIST_HEAD(&pdd->qpd.queues_list);
+ INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
+ pdd->qpd.dqm = dev->dqm;
list_add(&pdd->per_device_list, &p->per_device_data);
}
@@ -248,7 +222,6 @@ struct kfd_process_device *radeon_kfd_bind_process_to_device(struct kfd_dev *dev
if (err < 0)
return ERR_PTR(err);
- err = dev->device_info->scheduler_class->register_process(dev->scheduler, p, &pdd->scheduler_process);
if (err < 0) {
amd_iommu_unbind_pasid(dev->pdev, p->pasid);
return ERR_PTR(err);
@@ -282,10 +255,7 @@ void radeon_kfd_unbind_process_from_device(struct kfd_dev *dev, pasid_t pasid)
mutex_lock(&p->mutex);
- destroy_queues(p, dev);
-
- dev->device_info->scheduler_class->deregister_process(dev->scheduler, pdd->scheduler_process);
- pdd->scheduler_process = NULL;
+ pqm_uninit(&p->pqm);
/*
* Just mark pdd as unbound, because we still need it to call
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index e5fcb8b..5134880 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -47,9 +47,9 @@ struct kfd_ioctl_create_queue_args {
uint32_t queue_type; /* to KFD */
uint32_t queue_percentage; /* to KFD */
uint32_t queue_priority; /* to KFD */
- uint64_t write_pointer_address; /* to KFD */
- uint64_t read_pointer_address; /* to KFD */
+ uint64_t write_pointer_address; /* from KFD */
+ uint64_t read_pointer_address; /* from KFD */
uint64_t doorbell_address; /* from KFD */
uint32_t queue_id; /* from KFD */
};
--
1.9.1
From: Ben Goz <[email protected]>
This patch adding support for LDS aperture for user processes.
Signed-off-by: Ben Goz <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/hsa/radeon/kfd_device_queue_manager.c | 41 +++++++++++++++++++++--
1 file changed, 39 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c b/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c
index c2d91c9..01573b1 100644
--- a/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c
+++ b/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c
@@ -58,16 +58,50 @@ static inline unsigned int get_pipes_num_cpsch(void)
return PIPE_PER_ME_CP_SCHEDULING - 1;
}
+static unsigned int get_sh_mem_bases_nybble_64(struct kfd_process *process, struct kfd_dev *dev)
+{
+ struct kfd_process_device *pdd;
+ uint32_t nybble;
+
+ pdd = radeon_kfd_get_process_device_data(dev, process);
+ nybble = (pdd->lds_base >> 60) & 0x0E;
+
+ return nybble;
+
+}
+
+static unsigned int get_sh_mem_bases_32(struct kfd_process *process, struct kfd_dev *dev)
+{
+ struct kfd_process_device *pdd;
+ unsigned int shared_base;
+
+ pdd = radeon_kfd_get_process_device_data(dev, process);
+ shared_base = (pdd->lds_base >> 16) & 0xFF;
+
+ return shared_base;
+}
+
static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble);
static void init_process_memory(struct device_queue_manager *dqm, struct qcm_process_device *qpd)
{
+ unsigned int temp;
BUG_ON(!dqm || !qpd);
+ if (qpd->pqm->process->is_32bit_user_mode) {
+ temp = get_sh_mem_bases_32(qpd->pqm->process, dqm->dev);
+ qpd->sh_mem_bases = SHARED_BASE(temp);
+ } else {
+ temp = get_sh_mem_bases_nybble_64(qpd->pqm->process, dqm->dev);
+ qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
+ }
+
qpd->sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
qpd->sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
- qpd->sh_mem_bases = compute_sh_mem_bases_64bit(6);
qpd->sh_mem_ape1_limit = 0;
qpd->sh_mem_ape1_base = 1;
+
+ pr_debug("kfd: is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
+ qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases);
}
static void program_sh_mem_settings(struct device_queue_manager *dqm, struct qcm_process_device *qpd)
@@ -84,6 +118,7 @@ static void program_sh_mem_settings(struct device_queue_manager *dqm, struct qcm
WRITE_REG(dqm->dev, SH_MEM_APE1_BASE, qpd->sh_mem_ape1_base);
WRITE_REG(dqm->dev, SH_MEM_APE1_LIMIT, qpd->sh_mem_ape1_limit);
+ WRITE_REG(dqm->dev, SH_MEM_BASES, qpd->sh_mem_bases);
mqd->release_hqd(mqd);
}
@@ -128,6 +163,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, struct queue *
set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid);
qpd->vmid = *allocate_vmid;
is_new_vmid = true;
+
+ program_sh_mem_settings(dqm, qpd);
}
q->properties.vmid = qpd->vmid;
@@ -418,7 +455,7 @@ static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
* We don't bother to support different top nybbles for LDS/Scratch and GPUVM.
*/
- BUG_ON((top_address_nybble & 1) || top_address_nybble > 0xE);
+ BUG_ON((top_address_nybble & 1) || top_address_nybble > 0xE || top_address_nybble == 0);
return PRIVATE_BASE(top_address_nybble << 12) | SHARED_BASE(top_address_nybble << 12);
}
--
1.9.1
From: Ben Goz <[email protected]>
The queue scheduler divides into two sections, one section is process bounded
and the other section is device bounded.
The device bounded section is handled by this module.
The DQM module handles queue setup, update and tear-down from the device side.
It also supports suspend/resume operation.
Signed-off-by: Ben Goz <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/hsa/radeon/Makefile | 2 +-
drivers/gpu/hsa/radeon/kfd_device_queue_manager.c | 1006 +++++++++++++++++++++
drivers/gpu/hsa/radeon/kfd_priv.h | 2 +
3 files changed, 1009 insertions(+), 1 deletion(-)
create mode 100644 drivers/gpu/hsa/radeon/kfd_device_queue_manager.c
diff --git a/drivers/gpu/hsa/radeon/Makefile b/drivers/gpu/hsa/radeon/Makefile
index 341fa67..3409203 100644
--- a/drivers/gpu/hsa/radeon/Makefile
+++ b/drivers/gpu/hsa/radeon/Makefile
@@ -8,6 +8,6 @@ radeon_kfd-y := kfd_module.o kfd_device.o kfd_chardev.o \
kfd_vidmem.o kfd_interrupt.o kfd_aperture.o \
kfd_queue.o kfd_hw_pointer_store.o kfd_mqd_manager.o \
kfd_kernel_queue.o kfd_packet_manager.o \
- kfd_process_queue_manager.o
+ kfd_process_queue_manager.o kfd_device_queue_manager.o
obj-$(CONFIG_HSA_RADEON) += radeon_kfd.o
diff --git a/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c b/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c
new file mode 100644
index 0000000..9e21074
--- /dev/null
+++ b/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c
@@ -0,0 +1,1006 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Ben Goz
+ */
+
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/printk.h>
+#include <linux/bitops.h>
+#include "kfd_priv.h"
+#include "kfd_device_queue_manager.h"
+#include "kfd_mqd_manager.h"
+#include "cik_regs.h"
+#include "kfd_kernel_queue.h"
+
+#define CIK_HPD_SIZE_LOG2 11
+#define CIK_HPD_SIZE (1U << CIK_HPD_SIZE_LOG2)
+
+static bool is_mem_initialized;
+
+static int init_memory(struct device_queue_manager *dqm);
+static int
+set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid, unsigned int vmid);
+
+static inline unsigned int get_pipes_num(struct device_queue_manager *dqm)
+{
+ BUG_ON(!dqm || !dqm->dev);
+ return dqm->dev->shared_resources.compute_pipe_count;
+}
+
+static inline unsigned int get_first_pipe(struct device_queue_manager *dqm)
+{
+ BUG_ON(!dqm);
+ return dqm->dev->shared_resources.first_compute_pipe;
+}
+
+static inline unsigned int get_pipes_num_cpsch(void)
+{
+ return PIPE_PER_ME_CP_SCHEDULING - 1;
+}
+
+static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble);
+static void init_process_memory(struct device_queue_manager *dqm, struct qcm_process_device *qpd)
+{
+ BUG_ON(!dqm || !qpd);
+
+ qpd->sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
+ qpd->sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
+ qpd->sh_mem_bases = compute_sh_mem_bases_64bit(6);
+ qpd->sh_mem_ape1_limit = 0;
+ qpd->sh_mem_ape1_base = 1;
+}
+
+static void program_sh_mem_settings(struct device_queue_manager *dqm, struct qcm_process_device *qpd)
+{
+ struct mqd_manager *mqd;
+
+ mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE);
+ if (mqd == NULL)
+ return;
+
+ mqd->acquire_hqd(mqd, 0, 0, qpd->vmid);
+
+ WRITE_REG(dqm->dev, SH_MEM_CONFIG, qpd->sh_mem_config);
+
+ WRITE_REG(dqm->dev, SH_MEM_APE1_BASE, qpd->sh_mem_ape1_base);
+ WRITE_REG(dqm->dev, SH_MEM_APE1_LIMIT, qpd->sh_mem_ape1_limit);
+
+ mqd->release_hqd(mqd);
+}
+
+static int create_queue_nocpsch(struct device_queue_manager *dqm, struct queue *q,
+ struct qcm_process_device *qpd, int *allocate_vmid)
+{
+ bool set, is_new_vmid;
+ int bit, retval, pipe;
+ struct mqd_manager *mqd;
+
+ BUG_ON(!dqm || !q || !qpd || !allocate_vmid);
+ retval = 0;
+
+ pr_debug("kfd: In func %s\n", __func__);
+ print_queue(q);
+
+ mutex_lock(&dqm->lock);
+ /* later memory apertures should be initialized in lazy mode */
+ if (!is_mem_initialized)
+ if (init_memory(dqm) != 0) {
+ retval = -ENODATA;
+ goto init_memory_failed;
+ }
+
+ if (dqm->vmid_bitmap == 0 && qpd->vmid == 0) {
+ retval = -ENOMEM;
+ goto no_vmid;
+ }
+
+ is_new_vmid = false;
+ if (qpd->vmid == 0) {
+ bit = find_first_bit((unsigned long *)&dqm->vmid_bitmap, CIK_VMID_NUM);
+ clear_bit(bit, (unsigned long *)&dqm->vmid_bitmap);
+
+ /* Kaveri kfd vmid's strts from vmid 8 */
+ *allocate_vmid = qpd->vmid = bit + KFD_VMID_START_OFFSET;
+ q->properties.vmid = *allocate_vmid;
+
+
+ pr_debug("kfd: vmid allocation %d\n", *allocate_vmid);
+ set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid);
+ qpd->vmid = *allocate_vmid;
+ is_new_vmid = true;
+ }
+ q->properties.vmid = qpd->vmid;
+
+ set = false;
+ for (pipe = dqm->next_pipe_to_allocate; pipe < get_pipes_num(dqm);
+ pipe = (pipe + 1) % get_pipes_num(dqm)) {
+ if (dqm->allocated_queues[pipe] != 0) {
+ bit = find_first_bit((unsigned long *)&dqm->allocated_queues[pipe], QUEUES_PER_PIPE);
+ clear_bit(bit, (unsigned long *)&dqm->allocated_queues[pipe]);
+ q->pipe = pipe;
+ q->queue = bit;
+ set = true;
+ break;
+ }
+ }
+
+ if (set == false) {
+ retval = -EBUSY;
+ goto no_hqd;
+ }
+ pr_debug("kfd: DQM %s hqd slot - pipe (%d) queue(%d)\n",
+ __func__, q->pipe, q->queue);
+ dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_num(dqm);
+
+ mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE);
+ if (mqd == NULL) {
+ retval = -ENOMEM;
+ goto fail_get_mqd_manager;
+ }
+
+ retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, &q->gart_mqd_addr, &q->properties);
+ if (retval != 0) {
+ set_bit(q->queue, (unsigned long *)&dqm->allocated_queues[q->pipe]);
+ goto init_mqd_failed;
+ }
+
+ list_add(&q->list, &qpd->queues_list);
+ dqm->queue_count++;
+
+ mutex_unlock(&dqm->lock);
+ return 0;
+
+init_mqd_failed:
+fail_get_mqd_manager:
+no_hqd:
+ if (is_new_vmid == true) {
+ set_bit(*allocate_vmid - KFD_VMID_START_OFFSET, (unsigned long *)&dqm->vmid_bitmap);
+ *allocate_vmid = qpd->vmid = q->properties.vmid = 0;
+ }
+no_vmid:
+init_memory_failed:
+ mutex_unlock(&dqm->lock);
+ return retval;
+}
+
+static int destroy_queue_nocpsch(struct device_queue_manager *dqm, struct qcm_process_device *qpd, struct queue *q)
+{
+ int retval;
+ struct mqd_manager *mqd;
+
+ BUG_ON(!dqm || !q || !q->mqd || !qpd);
+
+ retval = 0;
+
+ pr_debug("kfd: In Func %s\n", __func__);
+
+ mutex_lock(&dqm->lock);
+ mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE);
+ if (mqd == NULL) {
+ retval = -ENOMEM;
+ goto out;
+ }
+ mqd->acquire_hqd(mqd, q->pipe, q->queue, 0);
+ retval = mqd->destroy_mqd(mqd, q->mqd, KFD_PREEMPT_TYPE_WAVEFRONT, QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS);
+ mqd->release_hqd(mqd);
+ if (retval != 0)
+ goto out;
+
+ mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
+
+ set_bit(q->queue, (unsigned long *)&dqm->allocated_queues[q->pipe]);
+ q->queue = q->pipe = 0;
+ list_del(&q->list);
+ if (list_empty(&qpd->queues_list)) {
+ set_bit(qpd->vmid - 8, (unsigned long *)&dqm->vmid_bitmap);
+ qpd->vmid = 0;
+ }
+ dqm->queue_count--;
+out:
+ mutex_unlock(&dqm->lock);
+ return retval;
+}
+
+static int update_queue_nocpsch(struct device_queue_manager *dqm, struct queue *q)
+{
+ int retval;
+ struct mqd_manager *mqd;
+
+ BUG_ON(!dqm || !q || !q->mqd);
+
+ mutex_lock(&dqm->lock);
+ mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE);
+ if (mqd == NULL) {
+ mutex_unlock(&dqm->lock);
+ return -ENOMEM;
+ }
+ retval = mqd->update_mqd(mqd, q->mqd, &q->properties);
+ if (q->properties.is_active == true)
+ dqm->queue_count++;
+ else
+ dqm->queue_count--;
+
+ mutex_unlock(&dqm->lock);
+ return 0;
+}
+
+static int destroy_queues_nocpsch(struct device_queue_manager *dqm)
+{
+ struct device_process_node *cur;
+ struct mqd_manager *mqd;
+ struct queue *q;
+
+ BUG_ON(!dqm);
+
+ mutex_lock(&dqm->lock);
+ mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE);
+ if (mqd == NULL) {
+ mutex_unlock(&dqm->lock);
+ return -ENOMEM;
+ }
+
+ list_for_each_entry(cur, &dqm->queues, list) {
+ list_for_each_entry(q, &cur->qpd->queues_list, list) {
+
+
+ mqd->acquire_hqd(mqd, q->pipe, q->queue, 0);
+ mqd->destroy_mqd(mqd, q->mqd, KFD_PREEMPT_TYPE_WAVEFRONT,
+ QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS);
+ mqd->release_hqd(mqd);
+ }
+ }
+
+ mutex_unlock(&dqm->lock);
+
+ return 0;
+}
+
+static struct mqd_manager *get_mqd_manager_nocpsch(struct device_queue_manager *dqm, enum KFD_MQD_TYPE type)
+{
+ struct mqd_manager *mqd;
+
+ BUG_ON(!dqm || type > KFD_MQD_TYPE_MAX);
+
+ pr_debug("kfd: In func %s mqd type %d\n", __func__, type);
+
+ mqd = dqm->mqds[type];
+ if (!mqd) {
+ mqd = mqd_manager_init(type, dqm->dev);
+ if (mqd == NULL)
+ pr_err("kfd: mqd manager is NULL");
+ dqm->mqds[type] = mqd;
+ }
+
+ return mqd;
+}
+
+static int execute_queues_nocpsch(struct device_queue_manager *dqm)
+{
+ struct qcm_process_device *qpd;
+ struct device_process_node *node;
+ struct queue *q;
+ struct mqd_manager *mqd;
+
+ BUG_ON(!dqm);
+
+ mutex_lock(&dqm->lock);
+ mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE);
+ if (mqd == NULL) {
+ mutex_unlock(&dqm->lock);
+ return -ENOMEM;
+ }
+
+ list_for_each_entry(node, &dqm->queues, list) {
+ qpd = node->qpd;
+ list_for_each_entry(q, &qpd->queues_list, list) {
+ pr_debug("kfd: executing queue (%d, %d)\n", q->pipe, q->queue);
+ mqd->acquire_hqd(mqd, q->pipe, q->queue, 0);
+ if (mqd->is_occupied(mqd, q->mqd, &q->properties) == false)
+ mqd->load_mqd(mqd, q->mqd);
+ mqd->release_hqd(mqd);
+ }
+ }
+
+ mutex_unlock(&dqm->lock);
+
+ return 0;
+}
+
+static int register_process_nocpsch(struct device_queue_manager *dqm, struct qcm_process_device *qpd)
+{
+ struct device_process_node *n;
+
+ BUG_ON(!dqm || !qpd);
+
+ pr_debug("kfd: In func %s\n", __func__);
+
+ n = kzalloc(sizeof(struct device_process_node), GFP_KERNEL);
+ if (!n)
+ return -ENOMEM;
+
+ n->qpd = qpd;
+
+ mutex_lock(&dqm->lock);
+ list_add(&n->list, &dqm->queues);
+
+ init_process_memory(dqm, qpd);
+ dqm->processes_count++;
+
+ mutex_unlock(&dqm->lock);
+
+ return 0;
+}
+
+static int unregister_process_nocpsch(struct device_queue_manager *dqm, struct qcm_process_device *qpd)
+{
+ int retval;
+ struct device_process_node *cur, *next;
+
+ BUG_ON(!dqm || !qpd);
+
+ BUG_ON(!list_empty(&qpd->queues_list));
+
+ pr_debug("kfd: In func %s\n", __func__);
+
+ retval = 0;
+ mutex_lock(&dqm->lock);
+
+ list_for_each_entry_safe(cur, next, &dqm->queues, list) {
+ if (qpd == cur->qpd) {
+ list_del(&cur->list);
+ dqm->processes_count--;
+ goto out;
+ }
+ }
+ /* qpd not found in dqm list */
+ retval = 1;
+out:
+ mutex_unlock(&dqm->lock);
+ return retval;
+}
+
+static int
+set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid, unsigned int vmid)
+{
+ /* We have to assume that there is no outstanding mapping.
+ * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because a mapping
+ * is in progress or because a mapping finished and the SW cleared it.
+ * So the protocol is to always wait & clear.
+ */
+ uint32_t pasid_mapping;
+
+ BUG_ON(!dqm);
+
+ pr_debug("kfd: In %s set pasid: %d to vmid: %d\n", __func__, pasid, vmid);
+ pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid | ATC_VMID_PASID_MAPPING_VALID;
+
+ WRITE_REG(dqm->dev, ATC_VMID0_PASID_MAPPING + vmid*sizeof(uint32_t), pasid_mapping);
+
+ while (!(READ_REG(dqm->dev, ATC_VMID_PASID_MAPPING_UPDATE_STATUS) & (1U << vmid)))
+ cpu_relax();
+ WRITE_REG(dqm->dev, ATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid);
+
+ return 0;
+}
+
+static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
+{
+ /* In 64-bit mode, we can only control the top 3 bits of the LDS, scratch and GPUVM apertures.
+ * The hardware fills in the remaining 59 bits according to the following pattern:
+ * LDS: X0000000'00000000 - X0000001'00000000 (4GB)
+ * Scratch: X0000001'00000000 - X0000002'00000000 (4GB)
+ * GPUVM: Y0010000'00000000 - Y0020000'00000000 (1TB)
+ *
+ * (where X/Y is the configurable nybble with the low-bit 0)
+ *
+ * LDS and scratch will have the same top nybble programmed in the top 3 bits of SH_MEM_BASES.PRIVATE_BASE.
+ * GPUVM can have a different top nybble programmed in the top 3 bits of SH_MEM_BASES.SHARED_BASE.
+ * We don't bother to support different top nybbles for LDS/Scratch and GPUVM.
+ */
+
+ BUG_ON((top_address_nybble & 1) || top_address_nybble > 0xE);
+
+ return PRIVATE_BASE(top_address_nybble << 12) | SHARED_BASE(top_address_nybble << 12);
+}
+
+static int init_memory(struct device_queue_manager *dqm)
+{
+ int i;
+ struct mqd_manager *mqd;
+
+ BUG_ON(!dqm);
+
+ pr_debug("kfd: In func %s\n", __func__);
+ mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE);
+ if (mqd == NULL)
+ return -ENOMEM;
+ for (i = 0; i < 16; i++) {
+ uint32_t sh_mem_config;
+
+ mqd->acquire_hqd(mqd, 0, 0, i);
+ set_pasid_vmid_mapping(dqm, 0, i);
+
+ sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
+ sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
+
+ WRITE_REG(dqm->dev, SH_MEM_CONFIG, sh_mem_config);
+
+ /* Configure apertures:
+ * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
+ * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
+ * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
+ */
+ WRITE_REG(dqm->dev, SH_MEM_BASES, compute_sh_mem_bases_64bit(6));
+
+ /* Scratch aperture is not supported for now. */
+ WRITE_REG(dqm->dev, SH_STATIC_MEM_CONFIG, 0);
+
+ /* APE1 disabled for now. */
+ WRITE_REG(dqm->dev, SH_MEM_APE1_BASE, 1);
+ WRITE_REG(dqm->dev, SH_MEM_APE1_LIMIT, 0);
+
+ mqd->release_hqd(mqd);
+ }
+ is_mem_initialized = true;
+ return 0;
+}
+
+static int init_pipelines(struct device_queue_manager *dqm, unsigned int pipes_num, unsigned int first_pipe)
+{
+ void *hpdptr;
+ struct mqd_manager *mqd;
+ unsigned int i, err, inx;
+ uint64_t pipe_hpd_addr;
+
+ BUG_ON(!dqm || !dqm->dev);
+
+ pr_debug("kfd: In func %s\n", __func__);
+
+ /*
+ * Allocate memory for the HPDs. This is hardware-owned per-pipe data.
+ * The driver never accesses this memory after zeroing it. It doesn't even have
+ * to be saved/restored on suspend/resume because it contains no data when there
+ * are no active queues.
+ */
+ err = radeon_kfd_vidmem_alloc(dqm->dev,
+ CIK_HPD_SIZE * pipes_num,
+ PAGE_SIZE,
+ KFD_MEMPOOL_SYSTEM_WRITECOMBINE,
+ &dqm->pipeline_mem);
+ if (err) {
+ pr_err("kfd: error allocate vidmem num pipes: %d\n", pipes_num);
+ return -ENOMEM;
+ }
+
+ err = radeon_kfd_vidmem_kmap(dqm->dev, dqm->pipeline_mem, &hpdptr);
+ if (err) {
+ pr_err("kfd: err kmap vidmem\n");
+ radeon_kfd_vidmem_free(dqm->dev, dqm->pipeline_mem);
+ return -ENOMEM;
+ }
+
+ memset(hpdptr, 0, CIK_HPD_SIZE * pipes_num);
+ radeon_kfd_vidmem_unkmap(dqm->dev, dqm->pipeline_mem);
+
+ radeon_kfd_vidmem_gpumap(dqm->dev, dqm->pipeline_mem, &dqm->pipelines_addr);
+
+ mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE);
+ if (mqd == NULL) {
+ radeon_kfd_vidmem_free(dqm->dev, dqm->pipeline_mem);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < pipes_num; i++) {
+ inx = i + first_pipe;
+ pipe_hpd_addr = dqm->pipelines_addr + i * CIK_HPD_SIZE;
+ pr_debug("kfd: pipeline address %llX\n", pipe_hpd_addr);
+
+ mqd->acquire_hqd(mqd, inx, 0, 0);
+ WRITE_REG(dqm->dev, CP_HPD_EOP_BASE_ADDR, lower_32(pipe_hpd_addr >> 8));
+ WRITE_REG(dqm->dev, CP_HPD_EOP_BASE_ADDR_HI, upper_32(pipe_hpd_addr >> 8));
+ WRITE_REG(dqm->dev, CP_HPD_EOP_VMID, 0);
+ WRITE_REG(dqm->dev, CP_HPD_EOP_CONTROL, CIK_HPD_SIZE_LOG2 - 1);
+ mqd->release_hqd(mqd);
+ }
+
+ return 0;
+}
+
+
+static int init_scheduler(struct device_queue_manager *dqm)
+{
+ int retval;
+
+ BUG_ON(!dqm);
+
+ pr_debug("kfd: In %s\n", __func__);
+
+ retval = init_pipelines(dqm, get_pipes_num(dqm), KFD_DQM_FIRST_PIPE);
+ if (retval != 0)
+ return retval;
+ /* should be later integrated with Evgeny/Alexey memory management code */
+ retval = init_memory(dqm);
+ return retval;
+}
+
+static int initialize_nocpsch(struct device_queue_manager *dqm)
+{
+ int i;
+
+ BUG_ON(!dqm);
+
+ pr_debug("kfd: In func %s num of pipes: %d\n", __func__, get_pipes_num(dqm));
+
+ mutex_init(&dqm->lock);
+ INIT_LIST_HEAD(&dqm->queues);
+ dqm->queue_count = dqm->next_pipe_to_allocate = 0;
+ dqm->allocated_queues = kcalloc(get_pipes_num(dqm), sizeof(unsigned int), GFP_KERNEL);
+ if (!dqm->allocated_queues) {
+ mutex_destroy(&dqm->lock);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < get_pipes_num(dqm); i++)
+ dqm->allocated_queues[i] = (1 << QUEUES_PER_PIPE) - 1;
+
+ dqm->vmid_bitmap = (1 << VMID_PER_DEVICE) - 1;
+
+ init_scheduler(dqm);
+ return 0;
+}
+
+static void uninitialize_nocpsch(struct device_queue_manager *dqm)
+{
+ BUG_ON(!dqm);
+
+ BUG_ON(dqm->queue_count > 0 || dqm->processes_count > 0);
+
+ kfree(dqm->allocated_queues);
+ mutex_destroy(&dqm->lock);
+ radeon_kfd_vidmem_free(dqm->dev, dqm->pipeline_mem);
+}
+
+static int start_nocpsch(struct device_queue_manager *dqm)
+{
+ return 0;
+}
+
+static int stop_nocpsch(struct device_queue_manager *dqm)
+{
+ return 0;
+}
+
+/*
+ * Device Queue Manager implementation for cp scheduler
+ */
+
+static int set_sched_resources(struct device_queue_manager *dqm)
+{
+ struct scheduling_resources res;
+ unsigned int queue_num, queue_mask;
+
+ BUG_ON(!dqm);
+
+ pr_debug("kfd: In func %s\n", __func__);
+
+ queue_num = get_pipes_num_cpsch() * QUEUES_PER_PIPE;
+ queue_mask = (1 << queue_num) - 1;
+ res.vmid_mask = (1 << VMID_PER_DEVICE) - 1;
+ res.vmid_mask <<= KFD_VMID_START_OFFSET;
+ res.queue_mask = queue_mask << (get_first_pipe(dqm) * QUEUES_PER_PIPE);
+ res.gws_mask = res.oac_mask = res.gds_heap_base = res.gds_heap_size = 0;
+
+ pr_debug("kfd: scheduling resources:\n"
+ " vmid mask: 0x%8X\n"
+ " queue mask: 0x%8llX\n", res.vmid_mask, res.queue_mask);
+
+ return pm_send_set_resources(&dqm->packets, &res);
+}
+
+static int initialize_cpsch(struct device_queue_manager *dqm)
+{
+ int retval;
+
+ BUG_ON(!dqm);
+
+ pr_debug("kfd: In func %s num of pipes: %d\n", __func__, get_pipes_num_cpsch());
+
+ mutex_init(&dqm->lock);
+ INIT_LIST_HEAD(&dqm->queues);
+ dqm->queue_count = dqm->processes_count = 0;
+ dqm->active_runlist = false;
+ retval = init_pipelines(dqm, get_pipes_num(dqm), 0);
+ if (retval != 0)
+ goto fail_init_pipelines;
+
+ return 0;
+
+fail_init_pipelines:
+ mutex_destroy(&dqm->lock);
+ return retval;
+}
+
+static int start_cpsch(struct device_queue_manager *dqm)
+{
+ struct device_process_node *node;
+ int retval;
+
+ BUG_ON(!dqm);
+
+ retval = 0;
+
+ retval = pm_init(&dqm->packets, dqm);
+ if (retval != 0)
+ goto fail_packet_manager_init;
+
+ retval = set_sched_resources(dqm);
+ if (retval != 0)
+ goto fail_set_sched_resources;
+
+ pr_debug("kfd: allocating fence memory\n");
+ /* allocate fence memory on the gart */
+ retval = radeon_kfd_vidmem_alloc_map(dqm->dev, &dqm->fence_mem, (void **)&dqm->fence_addr, &dqm->fence_gpu_addr,
+ sizeof(*dqm->fence_addr));
+ if (retval != 0)
+ goto fail_allocate_vidmem;
+
+ list_for_each_entry(node, &dqm->queues, list) {
+ if (node->qpd->pqm->process && dqm->dev)
+ radeon_kfd_bind_process_to_device(dqm->dev, node->qpd->pqm->process);
+ }
+
+ dqm->execute_queues(dqm);
+
+ return 0;
+fail_allocate_vidmem:
+fail_set_sched_resources:
+ pm_uninit(&dqm->packets);
+fail_packet_manager_init:
+ return retval;
+}
+
+static int stop_cpsch(struct device_queue_manager *dqm)
+{
+ struct device_process_node *node;
+ struct kfd_process_device *pdd;
+
+ BUG_ON(!dqm);
+
+ dqm->destroy_queues(dqm);
+
+ list_for_each_entry(node, &dqm->queues, list) {
+ pdd = radeon_kfd_get_process_device_data(dqm->dev, node->qpd->pqm->process);
+ pdd->bound = false;
+ }
+ radeon_kfd_vidmem_free_unmap(dqm->dev, dqm->fence_mem);
+ pm_uninit(&dqm->packets);
+
+ return 0;
+}
+
+static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
+ struct kernel_queue *kq,
+ struct qcm_process_device *qpd)
+{
+ BUG_ON(!dqm || !kq || !qpd);
+
+ pr_debug("kfd: In func %s\n", __func__);
+
+ mutex_lock(&dqm->lock);
+ list_add(&kq->list, &qpd->priv_queue_list);
+ dqm->queue_count++;
+ qpd->is_debug = true;
+ mutex_unlock(&dqm->lock);
+
+ return 0;
+}
+
+static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
+ struct kernel_queue *kq,
+ struct qcm_process_device *qpd)
+{
+ BUG_ON(!dqm || !kq);
+
+ mutex_lock(&dqm->lock);
+ list_del(&kq->list);
+ dqm->queue_count--;
+ qpd->is_debug = false;
+ mutex_unlock(&dqm->lock);
+}
+
+static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
+ struct qcm_process_device *qpd, int *allocate_vmid)
+{
+ int retval;
+ struct mqd_manager *mqd;
+
+ BUG_ON(!dqm || !q || !qpd);
+
+ retval = 0;
+
+ if (allocate_vmid)
+ *allocate_vmid = 0;
+
+ mutex_lock(&dqm->lock);
+
+ mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_CP);
+ if (mqd == NULL) {
+ mutex_unlock(&dqm->lock);
+ return -ENOMEM;
+ }
+
+ retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, &q->gart_mqd_addr, &q->properties);
+ if (retval != 0)
+ goto out;
+
+ list_add(&q->list, &qpd->queues_list);
+ if (q->properties.is_active)
+ dqm->queue_count++;
+
+out:
+ mutex_unlock(&dqm->lock);
+ return retval;
+}
+
+static void fence_wait_timeout(unsigned int *fence_addr, unsigned int fence_value, unsigned long timeout)
+{
+ BUG_ON(!fence_addr);
+ timeout += jiffies;
+
+ while (*fence_addr != fence_value) {
+ if (time_after(jiffies, timeout)) {
+ pr_err("kfd: qcm fence wait loop timeout expired\n");
+ break;
+ }
+ cpu_relax();
+ }
+}
+
+static int destroy_queues_cpsch(struct device_queue_manager *dqm)
+{
+ int retval;
+
+ BUG_ON(!dqm);
+
+ retval = 0;
+
+ mutex_lock(&dqm->lock);
+ if (dqm->active_runlist == false)
+ goto out;
+ retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
+ KFD_PRERMPT_TYPE_FILTER_ALL_QUEUES, 0, false);
+ if (retval != 0)
+ goto out;
+
+ *dqm->fence_addr = KFD_FENCE_INIT;
+ pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr, KFD_FENCE_COMPLETED);
+ /* should be timed out */
+ fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED, QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS);
+ pm_release_ib(&dqm->packets);
+ dqm->active_runlist = false;
+
+out:
+ mutex_unlock(&dqm->lock);
+ return retval;
+}
+
+static int execute_queues_cpsch(struct device_queue_manager *dqm)
+{
+ int retval;
+
+ BUG_ON(!dqm);
+
+ retval = dqm->destroy_queues(dqm);
+ if (retval != 0) {
+ pr_err("kfd: the cp might be in an unrecoverable state due to an unsuccesful queues premption");
+ return retval;
+ }
+
+ if (dqm->queue_count <= 0 || dqm->processes_count <= 0)
+ return 0;
+
+ mutex_lock(&dqm->lock);
+ if (dqm->active_runlist) {
+ retval = 0;
+ goto out;
+ }
+ retval = pm_send_runlist(&dqm->packets, &dqm->queues);
+ if (retval != 0) {
+ pr_err("kfd: failed to execute runlist");
+ goto out;
+ }
+ dqm->active_runlist = true;
+
+out:
+ mutex_unlock(&dqm->lock);
+ return retval;
+}
+
+static int destroy_queue_cpsch(struct device_queue_manager *dqm, struct qcm_process_device *qpd, struct queue *q)
+{
+ int retval;
+ struct mqd_manager *mqd;
+
+ BUG_ON(!dqm || !qpd || !q);
+
+ retval = 0;
+
+ /* preempt queues before delete mqd */
+ dqm->destroy_queues(dqm);
+
+ mutex_lock(&dqm->lock);
+ mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_CP);
+ if (!mqd) {
+ retval = -ENOMEM;
+ goto failed_get_mqd_manager;
+ }
+ list_del(&q->list);
+
+ mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
+ dqm->queue_count--;
+ mutex_unlock(&dqm->lock);
+
+ return 0;
+failed_get_mqd_manager:
+ mutex_unlock(&dqm->lock);
+ return retval;
+}
+
+/* Low bits must be 0000/FFFF as required by HW, high bits must be 0 to stay in user mode. */
+#define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
+#define APE1_LIMIT_ALIGNMENT 0xFFFF /* APE1 limit is inclusive and 64K aligned. */
+
+static bool set_cache_memory_policy(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ enum cache_policy default_policy,
+ enum cache_policy alternate_policy,
+ void __user *alternate_aperture_base,
+ uint64_t alternate_aperture_size)
+{
+ uint32_t default_mtype;
+ uint32_t ape1_mtype;
+
+ pr_debug("kfd: In func %s\n", __func__);
+
+ mutex_lock(&dqm->lock);
+
+ if (alternate_aperture_size == 0) {
+ /* base > limit disables APE1 */
+ qpd->sh_mem_ape1_base = 1;
+ qpd->sh_mem_ape1_limit = 0;
+ } else {
+ /*
+ * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]}, SH_MEM_APE1_BASE[31:0], 0x0000 }
+ * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]}, SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
+ * Verify that the base and size parameters can be represented in this format
+ * and convert them. Additionally restrict APE1 to user-mode addresses.
+ */
+
+ uint64_t base = (uintptr_t)alternate_aperture_base;
+ uint64_t limit = base + alternate_aperture_size - 1;
+
+ if (limit <= base)
+ goto out;
+
+ if ((base & APE1_FIXED_BITS_MASK) != 0)
+ goto out;
+
+ if ((limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT)
+ goto out;
+
+ qpd->sh_mem_ape1_base = base >> 16;
+ qpd->sh_mem_ape1_limit = limit >> 16;
+ }
+
+ default_mtype = (default_policy == cache_policy_coherent) ? MTYPE_NONCACHED : MTYPE_CACHED;
+ ape1_mtype = (alternate_policy == cache_policy_coherent) ? MTYPE_NONCACHED : MTYPE_CACHED;
+
+ qpd->sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
+ | DEFAULT_MTYPE(default_mtype)
+ | APE1_MTYPE(ape1_mtype);
+
+
+ if (sched_policy == KFD_SCHED_POLICY_NO_HWS)
+ program_sh_mem_settings(dqm, qpd);
+
+
+ pr_debug("kfd: sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n", qpd->sh_mem_config,
+ qpd->sh_mem_ape1_base, qpd->sh_mem_ape1_limit);
+
+ mutex_unlock(&dqm->lock);
+ return true;
+
+out:
+ mutex_unlock(&dqm->lock);
+ return false;
+}
+
+struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
+{
+ struct device_queue_manager *dqm;
+
+ BUG_ON(!dev);
+
+ dqm = kzalloc(sizeof(struct device_queue_manager), GFP_KERNEL);
+ if (!dqm)
+ return NULL;
+
+ dqm->dev = dev;
+ switch (sched_policy) {
+ case KFD_SCHED_POLICY_HWS:
+ case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION:
+ /* initialize dqm for cp scheduling */
+ dqm->create_queue = create_queue_cpsch;
+ dqm->initialize = initialize_cpsch;
+ dqm->start = start_cpsch;
+ dqm->stop = stop_cpsch;
+ dqm->destroy_queues = destroy_queues_cpsch;
+ dqm->execute_queues = execute_queues_cpsch;
+ dqm->destroy_queue = destroy_queue_cpsch;
+ dqm->update_queue = update_queue_nocpsch;
+ dqm->get_mqd_manager = get_mqd_manager_nocpsch;
+ dqm->register_process = register_process_nocpsch;
+ dqm->unregister_process = unregister_process_nocpsch;
+ dqm->uninitialize = uninitialize_nocpsch;
+ dqm->create_kernel_queue = create_kernel_queue_cpsch;
+ dqm->destroy_kernel_queue = destroy_kernel_queue_cpsch;
+ dqm->set_cache_memory_policy = set_cache_memory_policy;
+ break;
+ case KFD_SCHED_POLICY_NO_HWS:
+ /* initialize dqm for no cp scheduling */
+ dqm->start = start_nocpsch;
+ dqm->stop = stop_nocpsch;
+ dqm->create_queue = create_queue_nocpsch;
+ dqm->destroy_queue = destroy_queue_nocpsch;
+ dqm->update_queue = update_queue_nocpsch;
+ dqm->destroy_queues = destroy_queues_nocpsch;
+ dqm->get_mqd_manager = get_mqd_manager_nocpsch;
+ dqm->execute_queues = execute_queues_nocpsch;
+ dqm->register_process = register_process_nocpsch;
+ dqm->unregister_process = unregister_process_nocpsch;
+ dqm->initialize = initialize_nocpsch;
+ dqm->uninitialize = uninitialize_nocpsch;
+ dqm->set_cache_memory_policy = set_cache_memory_policy;
+ break;
+ default:
+ BUG();
+ break;
+ }
+
+ if (dqm->initialize(dqm) != 0) {
+ kfree(dqm);
+ return NULL;
+ }
+
+ return dqm;
+}
+
+void device_queue_manager_uninit(struct device_queue_manager *dqm)
+{
+ BUG_ON(!dqm);
+
+ dqm->uninitialize(dqm);
+ kfree(dqm);
+}
+
diff --git a/drivers/gpu/hsa/radeon/kfd_priv.h b/drivers/gpu/hsa/radeon/kfd_priv.h
index e716745..0af4c71 100644
--- a/drivers/gpu/hsa/radeon/kfd_priv.h
+++ b/drivers/gpu/hsa/radeon/kfd_priv.h
@@ -436,6 +436,8 @@ void print_queue_properties(struct queue_properties *q);
void print_queue(struct queue *q);
struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, struct kfd_dev *dev);
+struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev);
+void device_queue_manager_uninit(struct device_queue_manager *dqm);
struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, enum kfd_queue_type type);
void kernel_queue_uninit(struct kernel_queue *kq);
--
1.9.1
From: Ben Goz <[email protected]>
The kernel queue module enables the KFD to establish kernel queues, not exposed
to user space. The kernel queues are used for HIQ (HSA Interface Queue) and DIQ
(Debug Interface Queue) operations.
Signed-off-by: Ben Goz <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/hsa/radeon/Makefile | 3 +-
drivers/gpu/hsa/radeon/kfd_device_queue_manager.h | 102 ++++
drivers/gpu/hsa/radeon/kfd_kernel_queue.c | 302 ++++++++++
drivers/gpu/hsa/radeon/kfd_kernel_queue.h | 67 +++
drivers/gpu/hsa/radeon/kfd_pm4_headers.h | 681 ++++++++++++++++++++++
drivers/gpu/hsa/radeon/kfd_pm4_opcodes.h | 107 ++++
drivers/gpu/hsa/radeon/kfd_priv.h | 34 ++
drivers/gpu/hsa/radeon/kfd_scheduler.h | 5 -
8 files changed, 1295 insertions(+), 6 deletions(-)
create mode 100644 drivers/gpu/hsa/radeon/kfd_device_queue_manager.h
create mode 100644 drivers/gpu/hsa/radeon/kfd_kernel_queue.c
create mode 100644 drivers/gpu/hsa/radeon/kfd_kernel_queue.h
create mode 100644 drivers/gpu/hsa/radeon/kfd_pm4_headers.h
create mode 100644 drivers/gpu/hsa/radeon/kfd_pm4_opcodes.h
diff --git a/drivers/gpu/hsa/radeon/Makefile b/drivers/gpu/hsa/radeon/Makefile
index c87b518..f06d925 100644
--- a/drivers/gpu/hsa/radeon/Makefile
+++ b/drivers/gpu/hsa/radeon/Makefile
@@ -6,6 +6,7 @@ radeon_kfd-y := kfd_module.o kfd_device.o kfd_chardev.o \
kfd_pasid.o kfd_topology.o kfd_process.o \
kfd_doorbell.o kfd_sched_cik_static.o kfd_registers.o \
kfd_vidmem.o kfd_interrupt.o kfd_aperture.o \
- kfd_queue.o kfd_hw_pointer_store.o kfd_mqd_manager.o
+ kfd_queue.o kfd_hw_pointer_store.o kfd_mqd_manager.o \
+ kfd_kernel_queue.o
obj-$(CONFIG_HSA_RADEON) += radeon_kfd.o
diff --git a/drivers/gpu/hsa/radeon/kfd_device_queue_manager.h b/drivers/gpu/hsa/radeon/kfd_device_queue_manager.h
new file mode 100644
index 0000000..0529a96
--- /dev/null
+++ b/drivers/gpu/hsa/radeon/kfd_device_queue_manager.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Ben Goz
+ */
+
+#ifndef DEVICE_QUEUE_MANAGER_H_
+#define DEVICE_QUEUE_MANAGER_H_
+
+#include <linux/rwsem.h>
+#include <linux/list.h>
+#include "kfd_priv.h"
+#include "kfd_mqd_manager.h"
+
+#define QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS (500)
+#define QUEUES_PER_PIPE (8)
+#define PIPE_PER_ME_CP_SCHEDULING (4)
+#define CIK_VMID_NUM (8)
+#define KFD_VMID_START_OFFSET (8)
+#define VMID_PER_DEVICE CIK_VMID_NUM
+#define KFD_DQM_FIRST_PIPE (0)
+
+struct device_process_node {
+ struct qcm_process_device *qpd;
+ struct list_head list;
+};
+
+struct device_queue_manager {
+ int (*create_queue)(struct device_queue_manager *dqm,
+ struct queue *q,
+ struct qcm_process_device *qpd,
+ int *allocate_vmid);
+ int (*destroy_queue)(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ struct queue *q);
+ int (*update_queue)(struct device_queue_manager *dqm,
+ struct queue *q);
+ int (*destroy_queues)(struct device_queue_manager *dqm);
+ struct mqd_manager * (*get_mqd_manager)(struct device_queue_manager *dqm,
+ enum KFD_MQD_TYPE type);
+ int (*execute_queues)(struct device_queue_manager *dqm);
+ int (*register_process)(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd);
+ int (*unregister_process)(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd);
+ int (*initialize)(struct device_queue_manager *dqm);
+ int (*start)(struct device_queue_manager *dqm);
+ int (*stop)(struct device_queue_manager *dqm);
+ void (*uninitialize)(struct device_queue_manager *dqm);
+ int (*create_kernel_queue)(struct device_queue_manager *dqm,
+ struct kernel_queue *kq,
+ struct qcm_process_device *qpd);
+ void (*destroy_kernel_queue)(struct device_queue_manager *dqm,
+ struct kernel_queue *kq,
+ struct qcm_process_device *qpd);
+ bool (*set_cache_memory_policy)(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ enum cache_policy default_policy,
+ enum cache_policy alternate_policy,
+ void __user *alternate_aperture_base,
+ uint64_t alternate_aperture_size);
+
+
+ struct mqd_manager *mqds[KFD_MQD_TYPE_MAX];
+ struct packet_manager packets;
+ struct kfd_dev *dev;
+ struct mutex lock;
+ struct list_head queues;
+ unsigned int processes_count;
+ unsigned int queue_count;
+ unsigned int next_pipe_to_allocate;
+ unsigned int *allocated_queues;
+ unsigned int vmid_bitmap;
+ uint64_t pipelines_addr;
+ kfd_mem_obj pipeline_mem;
+ uint64_t fence_gpu_addr;
+ unsigned int *fence_addr;
+ kfd_mem_obj fence_mem;
+ bool active_runlist;
+};
+
+
+
+#endif /* DEVICE_QUEUE_MANAGER_H_ */
diff --git a/drivers/gpu/hsa/radeon/kfd_kernel_queue.c b/drivers/gpu/hsa/radeon/kfd_kernel_queue.c
new file mode 100644
index 0000000..61f420f
--- /dev/null
+++ b/drivers/gpu/hsa/radeon/kfd_kernel_queue.c
@@ -0,0 +1,302 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Ben Goz
+ */
+
+#include <linux/types.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/printk.h>
+#include "kfd_kernel_queue.h"
+#include "kfd_priv.h"
+#include "kfd_device_queue_manager.h"
+#include "kfd_pm4_headers.h"
+#include "kfd_pm4_opcodes.h"
+
+#define PM4_COUNT_ZERO (((1 << 15) - 1) << 16)
+
+static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
+ enum kfd_queue_type type, unsigned int queue_size)
+{
+ struct queue_properties prop;
+ int retval;
+ PM4_TYPE_3_HEADER nop;
+
+ BUG_ON(!kq || !dev);
+ BUG_ON(type != KFD_QUEUE_TYPE_DIQ && type != KFD_QUEUE_TYPE_HIQ);
+
+ pr_debug("kfd: In func %s initializing queue type %d size %d\n", __func__, KFD_QUEUE_TYPE_HIQ, queue_size);
+
+ nop.opcode = IT_NOP;
+ nop.type = PM4_TYPE_3;
+ nop.u32all |= PM4_COUNT_ZERO;
+
+ kq->dev = dev;
+ kq->nop_packet = nop.u32all;
+ switch (type) {
+ case KFD_QUEUE_TYPE_DIQ:
+ case KFD_QUEUE_TYPE_HIQ:
+ kq->mqd = dev->dqm->get_mqd_manager(dev->dqm, KFD_MQD_TYPE_CIK_HIQ);
+ break;
+ default:
+ BUG();
+ break;
+ }
+
+ if (kq->mqd == NULL)
+ return false;
+
+ prop.doorbell_ptr = (qptr_t *)radeon_kfd_get_kernel_doorbell(dev, &prop.doorbell_off);
+ if (prop.doorbell_ptr == NULL)
+ goto err_get_kernel_doorbell;
+
+ retval = radeon_kfd_vidmem_alloc_map(dev, &kq->pq, (void **)&kq->pq_kernel_addr, &kq->pq_gpu_addr, queue_size);
+ if (retval != 0)
+ goto err_pq_allocate_vidmem;
+
+ retval = radeon_kfd_vidmem_alloc_map(kq->dev, &kq->rptr_mem, (void **)&kq->rptr_kernel, &kq->rptr_gpu_addr,
+ sizeof(*kq->rptr_kernel));
+ if (retval != 0)
+ goto err_rptr_allocate_vidmem;
+
+ retval = radeon_kfd_vidmem_alloc_map(kq->dev, &kq->wptr_mem, (void **)&kq->wptr_kernel, &kq->wptr_gpu_addr,
+ sizeof(*kq->rptr_kernel));
+ if (retval != 0)
+ goto err_wptr_allocate_vidmem;
+
+ prop.queue_size = queue_size;
+ prop.is_interop = false;
+ prop.priority = 1;
+ prop.queue_percent = 100;
+ prop.type = type;
+ prop.vmid = 0;
+ prop.queue_address = kq->pq_gpu_addr;
+ prop.read_ptr = kq->rptr_gpu_addr;
+ prop.write_ptr = kq->wptr_gpu_addr;
+
+ if (init_queue(&kq->queue, prop) != 0)
+ goto err_init_queue;
+
+ kq->queue->device = dev;
+ kq->queue->process = radeon_kfd_get_process(current);
+
+ retval = kq->mqd->init_mqd(kq->mqd, &kq->queue->mqd, &kq->queue->mqd_mem_obj,
+ &kq->queue->gart_mqd_addr, &kq->queue->properties);
+ if (retval != 0)
+ goto err_init_mqd;
+
+ /* assign HIQ to HQD */
+ if (type == KFD_QUEUE_TYPE_HIQ) {
+ pr_debug("assigning hiq to hqd\n");
+ kq->queue->pipe = KFD_CIK_HIQ_PIPE;
+ kq->queue->queue = KFD_CIK_HIQ_QUEUE;
+
+ kq->mqd->acquire_hqd(kq->mqd, kq->queue->pipe, kq->queue->queue, 0);
+ kq->mqd->load_mqd(kq->mqd, kq->queue->mqd);
+ kq->mqd->release_hqd(kq->mqd);
+ } else {
+ /* allocate fence for DIQ */
+ retval = radeon_kfd_vidmem_alloc_map(
+ dev,
+ &kq->fence_mem_obj,
+ &kq->fence_kernel_address,
+ &kq->fence_gpu_addr,
+ sizeof(uint32_t));
+
+ if (retval != 0)
+ goto err_alloc_fence;
+ }
+
+ print_queue(kq->queue);
+
+ return true;
+err_alloc_fence:
+err_init_mqd:
+ uninit_queue(kq->queue);
+err_init_queue:
+ radeon_kfd_vidmem_free_unmap(kq->dev, kq->wptr_mem);
+err_wptr_allocate_vidmem:
+ radeon_kfd_vidmem_free_unmap(kq->dev, kq->rptr_mem);
+err_rptr_allocate_vidmem:
+ radeon_kfd_vidmem_free_unmap(kq->dev, kq->pq);
+err_pq_allocate_vidmem:
+ pr_err("kfd: error init pq\n");
+ radeon_kfd_release_kernel_doorbell(dev, (u32 *)prop.doorbell_ptr);
+err_get_kernel_doorbell:
+ pr_err("kfd: error init doorbell");
+ return false;
+
+}
+
+static void uninitialize(struct kernel_queue *kq)
+{
+ BUG_ON(!kq);
+
+ if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ)
+ kq->mqd->destroy_mqd(kq->mqd,
+ kq->queue->mqd,
+ KFD_PREEMPT_TYPE_WAVEFRONT,
+ QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS);
+
+ radeon_kfd_vidmem_free_unmap(kq->dev, kq->rptr_mem);
+ radeon_kfd_vidmem_free_unmap(kq->dev, kq->wptr_mem);
+ radeon_kfd_vidmem_free_unmap(kq->dev, kq->pq);
+ radeon_kfd_release_kernel_doorbell(kq->dev, (u32 *)kq->queue->properties.doorbell_ptr);
+ uninit_queue(kq->queue);
+}
+
+static int acquire_packet_buffer(struct kernel_queue *kq,
+ size_t packet_size_in_dwords, unsigned int **buffer_ptr)
+{
+ size_t available_size;
+ size_t queue_size_dwords;
+ qptr_t wptr, rptr;
+ unsigned int *queue_address;
+
+ BUG_ON(!kq || !buffer_ptr);
+
+ rptr = *kq->rptr_kernel;
+ wptr = *kq->wptr_kernel;
+ queue_address = (unsigned int *)kq->pq_kernel_addr;
+ queue_size_dwords = kq->queue->properties.queue_size / sizeof(uint32_t);
+
+ pr_debug("kfd: In func %s\nrptr: %d\nwptr: %d\nqueue_address 0x%p\n", __func__, rptr, wptr, queue_address);
+
+ available_size = (rptr - 1 - wptr + queue_size_dwords) % queue_size_dwords;
+
+ if (packet_size_in_dwords >= queue_size_dwords ||
+ packet_size_in_dwords >= available_size)
+ return -ENOMEM;
+
+ if (wptr + packet_size_in_dwords > queue_size_dwords) {
+ while (wptr > 0) {
+ queue_address[wptr] = kq->nop_packet;
+ wptr = (wptr + 1) % queue_size_dwords;
+ }
+ }
+
+ *buffer_ptr = &queue_address[wptr];
+ kq->pending_wptr = wptr + packet_size_in_dwords;
+
+ return 0;
+}
+
+static void submit_packet(struct kernel_queue *kq)
+{
+#ifdef DEBUG
+ int i;
+#endif
+
+ BUG_ON(!kq);
+
+#ifdef DEBUG
+ for (i = *kq->wptr_kernel; i < kq->pending_wptr; i++) {
+ pr_debug("0x%2X ", kq->pq_kernel_addr[i]);
+ if (i % 15 == 0)
+ pr_debug("\n");
+ }
+ pr_debug("\n");
+#endif
+
+ *kq->wptr_kernel = kq->pending_wptr;
+ write_kernel_doorbell((u32 *)kq->queue->properties.doorbell_ptr, kq->pending_wptr);
+}
+
+static int sync_with_hw(struct kernel_queue *kq, unsigned long timeout_ms)
+{
+ BUG_ON(!kq);
+ timeout_ms += jiffies;
+ while (*kq->wptr_kernel != *kq->rptr_kernel) {
+ if (time_after(jiffies, timeout_ms)) {
+ pr_err("kfd: kernel_queue %s timeout expired %lu\n", __func__, timeout_ms);
+ pr_err("kfd: wptr: %d rptr: %d\n", *kq->wptr_kernel, *kq->rptr_kernel);
+ return -ETIME;
+ }
+ cpu_relax();
+ }
+
+ return 0;
+}
+
+static void rollback_packet(struct kernel_queue *kq)
+{
+ BUG_ON(!kq);
+ kq->pending_wptr = *kq->queue->properties.write_ptr;
+}
+
+struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, enum kfd_queue_type type)
+{
+ struct kernel_queue *kq;
+
+ BUG_ON(!dev);
+
+ kq = kzalloc(sizeof(struct kernel_queue), GFP_KERNEL);
+ if (!kq)
+ return NULL;
+
+ kq->initialize = initialize;
+ kq->uninitialize = uninitialize;
+ kq->acquire_packet_buffer = acquire_packet_buffer;
+ kq->submit_packet = submit_packet;
+ kq->sync_with_hw = sync_with_hw;
+ kq->rollback_packet = rollback_packet;
+
+ if (kq->initialize(kq, dev, type, 2048) == false) {
+ pr_err("kfd: failed to init kernel queue\n");
+ kfree(kq);
+ return NULL;
+ }
+ return kq;
+}
+
+void kernel_queue_uninit(struct kernel_queue *kq)
+{
+ BUG_ON(!kq);
+
+ kq->uninitialize(kq);
+ kfree(kq);
+}
+
+void test_kq(struct kfd_dev *dev)
+{
+ struct kernel_queue *kq;
+ uint32_t *buffer, i;
+ int retval;
+
+ BUG_ON(!dev);
+
+ pr_debug("kfd: starting kernel queue test\n");
+
+ kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_HIQ);
+ BUG_ON(!kq);
+
+ retval = kq->acquire_packet_buffer(kq, 5, &buffer);
+ BUG_ON(retval != 0);
+ for (i = 0; i < 5; i++)
+ buffer[i] = kq->nop_packet;
+ kq->submit_packet(kq);
+ kq->sync_with_hw(kq, 1000);
+
+ pr_debug("kfd: ending kernel queue test\n");
+}
+
+
diff --git a/drivers/gpu/hsa/radeon/kfd_kernel_queue.h b/drivers/gpu/hsa/radeon/kfd_kernel_queue.h
new file mode 100644
index 0000000..339376c
--- /dev/null
+++ b/drivers/gpu/hsa/radeon/kfd_kernel_queue.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Ben Goz
+ */
+
+#ifndef KERNEL_QUEUE_H_
+#define KERNEL_QUEUE_H_
+
+#include <linux/list.h>
+#include <linux/types.h>
+#include "kfd_priv.h"
+
+struct kernel_queue {
+ /* interface */
+ bool (*initialize)(struct kernel_queue *kq, struct kfd_dev *dev,
+ enum kfd_queue_type type, unsigned int queue_size);
+ void (*uninitialize)(struct kernel_queue *kq);
+ int (*acquire_packet_buffer)(struct kernel_queue *kq,
+ size_t packet_size_in_dwords, unsigned int **buffer_ptr);
+ void (*submit_packet)(struct kernel_queue *kq);
+ int (*sync_with_hw)(struct kernel_queue *kq, unsigned long timeout_ms);
+ void (*rollback_packet)(struct kernel_queue *kq);
+
+ /* data */
+ struct kfd_dev *dev;
+ struct mqd_manager *mqd;
+ struct queue *queue;
+ qptr_t pending_wptr;
+ unsigned int nop_packet;
+
+ kfd_mem_obj rptr_mem;
+ qptr_t *rptr_kernel;
+ uint64_t rptr_gpu_addr;
+ kfd_mem_obj wptr_mem;
+ qptr_t *wptr_kernel;
+ uint64_t wptr_gpu_addr;
+ kfd_mem_obj pq;
+ uint64_t pq_gpu_addr;
+ qptr_t *pq_kernel_addr;
+
+ kfd_mem_obj fence_mem_obj;
+ uint64_t fence_gpu_addr;
+ void *fence_kernel_address;
+
+ struct list_head list;
+};
+
+#endif /* KERNEL_QUEUE_H_ */
diff --git a/drivers/gpu/hsa/radeon/kfd_pm4_headers.h b/drivers/gpu/hsa/radeon/kfd_pm4_headers.h
new file mode 100644
index 0000000..dae460f
--- /dev/null
+++ b/drivers/gpu/hsa/radeon/kfd_pm4_headers.h
@@ -0,0 +1,681 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef F32_MES_PM4_PACKETS_72_H
+#define F32_MES_PM4_PACKETS_72_H
+
+#ifndef PM4_HEADER_DEFINED
+#define PM4_HEADER_DEFINED
+
+typedef union PM4_TYPE_3_HEADER {
+ struct {
+ unsigned int predicate:1; /* < 0 for diq packets */
+ unsigned int shader_type:1; /* < 0 for diq packets */
+ unsigned int reserved1:6; /* < reserved */
+ unsigned int opcode:8; /* < IT opcode */
+ unsigned int count:14; /* < number of DWORDs - 1 in the information body. */
+ unsigned int type:2; /* < packet identifier. It should be 3 for type 3 packets */
+ };
+ unsigned int u32all;
+} PM4_TYPE_3_HEADER;
+#endif
+
+/*
+ * --------------------_MAP_QUEUES--------------------
+ */
+
+#ifndef _PM4__MAP_QUEUES_DEFINED
+#define _PM4__MAP_QUEUES_DEFINED
+enum _map_queues_queue_sel_enum {
+ queue_sel___map_queues__map_to_specified_queue_slots = 0,
+ queue_sel___map_queues__map_to_hws_determined_queue_slots = 1,
+ queue_sel___map_queues__enable_process_queues = 2,
+ queue_sel___map_queues__reserved = 3 };
+
+enum _map_queues_vidmem_enum {
+ vidmem___map_queues__uses_no_video_memory = 0,
+ vidmem___map_queues__uses_video_memory = 1 };
+
+enum _map_queues_alloc_format_enum {
+ alloc_format___map_queues__one_per_pipe = 0,
+ alloc_format___map_queues__all_on_one_pipe = 1 };
+
+enum _map_queues_engine_sel_enum {
+ engine_sel___map_queues__compute = 0,
+ engine_sel___map_queues__sdma0_queue = 2,
+ engine_sel___map_queues__sdma1_queue = 3 };
+
+struct pm4_map_queues {
+ union {
+ PM4_TYPE_3_HEADER header;
+ unsigned int ordinal1;
+ };
+
+ union {
+ struct {
+ unsigned int reserved1:4;
+ enum _map_queues_queue_sel_enum queue_sel:2;
+ unsigned int reserved2:2;
+ unsigned int vmid:4;
+ unsigned int reserved3:4;
+ enum _map_queues_vidmem_enum vidmem:2;
+ unsigned int reserved4:6;
+ enum _map_queues_alloc_format_enum alloc_format:2;
+ enum _map_queues_engine_sel_enum engine_sel:3;
+ unsigned int num_queues:3;
+ } bitfields2;
+ unsigned int ordinal2;
+ };
+
+ struct {
+ union {
+ struct {
+ unsigned int reserved5:2;
+ unsigned int doorbell_offset:21;
+ unsigned int reserved6:3;
+ unsigned int queue:6;
+ } bitfields3;
+ unsigned int ordinal3;
+ };
+
+ unsigned int mqd_addr_lo;
+ unsigned int mqd_addr_hi;
+ unsigned int wptr_addr_lo;
+ unsigned int wptr_addr_hi;
+
+ } _map_queues_ordinals[1]; /* 1..N of these ordinal groups */
+
+};
+#endif
+
+/*
+ * --------------------_QUERY_STATUS--------------------
+ */
+
+#ifndef _PM4__QUERY_STATUS_DEFINED
+#define _PM4__QUERY_STATUS_DEFINED
+enum _query_status_interrupt_sel_enum {
+ interrupt_sel___query_status__completion_status = 0,
+ interrupt_sel___query_status__process_status = 1,
+ interrupt_sel___query_status__queue_status = 2,
+ interrupt_sel___query_status__reserved = 3 };
+
+enum _query_status_command_enum {
+ command___query_status__interrupt_only = 0,
+ command___query_status__fence_only_immediate = 1,
+ command___query_status__fence_only_after_write_ack = 2,
+ command___query_status__fence_wait_for_write_ack_send_interrupt = 3 };
+
+enum _query_status_engine_sel_enum {
+ engine_sel___query_status__compute = 0,
+ engine_sel___query_status__sdma0 = 2,
+ engine_sel___query_status__sdma1 = 3 };
+
+struct pm4_query_status {
+ union {
+ PM4_TYPE_3_HEADER header;
+ unsigned int ordinal1;
+ };
+
+ union {
+ struct {
+ unsigned int context_id:28;
+ enum _query_status_interrupt_sel_enum interrupt_sel:2;
+ enum _query_status_command_enum command:2;
+ } bitfields2;
+ unsigned int ordinal2;
+ };
+
+ union {
+ struct {
+ unsigned int pasid:16;
+ unsigned int reserved1:16;
+ } bitfields3;
+ struct {
+ unsigned int reserved2:2;
+ unsigned int doorbell_offset:21;
+ unsigned int reserved3:3;
+ enum _query_status_engine_sel_enum engine_sel:3;
+ unsigned int reserved4:3;
+ } bitfields4;
+ unsigned int ordinal3;
+ };
+
+ unsigned int addr_lo;
+ unsigned int addr_hi;
+ unsigned int data_lo;
+ unsigned int data_hi;
+
+};
+#endif
+
+/*
+ * --------------------_UNMAP_QUEUES--------------------
+ */
+
+#ifndef _PM4__UNMAP_QUEUES_DEFINED
+#define _PM4__UNMAP_QUEUES_DEFINED
+enum _unmap_queues_action_enum {
+ action___unmap_queues__preempt_queues = 0,
+ action___unmap_queues__reset_queues = 1,
+ action___unmap_queues__disable_process_queues = 2,
+ action___unmap_queues__reserved = 3 };
+
+enum _unmap_queues_queue_sel_enum {
+ queue_sel___unmap_queues__perform_request_on_specified_queues = 0,
+ queue_sel___unmap_queues__perform_request_on_pasid_queues = 1,
+ queue_sel___unmap_queues__perform_request_on_all_active_queues = 2,
+ queue_sel___unmap_queues__reserved = 3 };
+
+enum _unmap_queues_engine_sel_enum {
+ engine_sel___unmap_queues__compute = 0,
+ engine_sel___unmap_queues__sdma0 = 2,
+ engine_sel___unmap_queues__sdma1 = 3 };
+
+struct pm4_unmap_queues {
+ union {
+ PM4_TYPE_3_HEADER header;
+ unsigned int ordinal1;
+ };
+
+ union {
+ struct {
+ enum _unmap_queues_action_enum action:2;
+ unsigned int reserved1:2;
+ enum _unmap_queues_queue_sel_enum queue_sel:2;
+ unsigned int reserved2:20;
+ enum _unmap_queues_engine_sel_enum engine_sel:3;
+ unsigned int num_queues:3;
+ } bitfields2;
+ unsigned int ordinal2;
+ };
+
+ union {
+ struct {
+ unsigned int pasid:16;
+ unsigned int reserved3:16;
+ } bitfields3;
+ struct {
+ unsigned int reserved4:2;
+ unsigned int doorbell_offset0:21;
+ unsigned int reserved5:9;
+ } bitfields4;
+ unsigned int ordinal3;
+ };
+
+ union {
+ struct {
+ unsigned int reserved6:2;
+ unsigned int doorbell_offset1:21;
+ unsigned int reserved7:9;
+ } bitfields5;
+ unsigned int ordinal4;
+ };
+
+ union {
+ struct {
+ unsigned int reserved8:2;
+ unsigned int doorbell_offset2:21;
+ unsigned int reserved9:9;
+ } bitfields6;
+ unsigned int ordinal5;
+ };
+
+ union {
+ struct {
+ unsigned int reserved10:2;
+ unsigned int doorbell_offset3:21;
+ unsigned int reserved11:9;
+ } bitfields7;
+ unsigned int ordinal6;
+ };
+
+};
+#endif
+
+/*
+ * --------------------_SET_RESOURCES--------------------
+ */
+
+#ifndef _PM4__SET_RESOURCES_DEFINED
+#define _PM4__SET_RESOURCES_DEFINED
+enum _set_resources_queue_type_enum {
+ queue_type___set_resources__hsa_interface_queue_hiq = 1,
+ queue_type___set_resources__hsa_debug_interface_queue = 4 };
+
+struct pm4_set_resources {
+ union {
+ PM4_TYPE_3_HEADER header;
+ unsigned int ordinal1;
+ };
+
+ union {
+ struct {
+
+ unsigned int vmid_mask:16;
+ unsigned int unmap_latency:8;
+ unsigned int reserved1:5;
+ enum _set_resources_queue_type_enum queue_type:3;
+ } bitfields2;
+ unsigned int ordinal2;
+ };
+
+ unsigned int queue_mask_lo;
+ unsigned int queue_mask_hi;
+ unsigned int gws_mask_lo;
+ unsigned int gws_mask_hi;
+
+ union {
+ struct {
+ unsigned int oac_mask:16;
+ unsigned int reserved2:16;
+ } bitfields3;
+ unsigned int ordinal7;
+ };
+
+ union {
+ struct {
+ unsigned int gds_heap_base:6;
+ unsigned int reserved3:5;
+ unsigned int gds_heap_size:6;
+ unsigned int reserved4:15;
+ } bitfields4;
+ unsigned int ordinal8;
+ };
+
+};
+#endif
+
+/*
+ * --------------------_RUN_LIST--------------------
+ */
+
+#ifndef _PM4__RUN_LIST_DEFINED
+#define _PM4__RUN_LIST_DEFINED
+
+struct pm4_runlist {
+ union {
+ PM4_TYPE_3_HEADER header;
+ unsigned int ordinal1;
+ };
+
+ union {
+ struct {
+ unsigned int reserved1:2;
+ unsigned int ib_base_lo:30;
+ } bitfields2;
+ unsigned int ordinal2;
+ };
+
+ union {
+ struct {
+ unsigned int ib_base_hi:16;
+ unsigned int reserved2:16;
+ } bitfields3;
+ unsigned int ordinal3;
+ };
+
+ union {
+ struct {
+ unsigned int ib_size:20;
+ unsigned int chain:1;
+ unsigned int offload_polling:1;
+ unsigned int reserved3:1;
+ unsigned int valid:1;
+ unsigned int vmid:4;
+ unsigned int reserved4:4;
+ } bitfields4;
+ unsigned int ordinal4;
+ };
+
+};
+#endif
+
+/*
+ * --------------------_MAP_PROCESS--------------------
+ */
+
+#ifndef _PM4__MAP_PROCESS_DEFINED
+#define _PM4__MAP_PROCESS_DEFINED
+
+struct pm4_map_process {
+ union {
+ PM4_TYPE_3_HEADER header;
+ unsigned int ordinal1;
+ };
+
+ union {
+ struct {
+ unsigned int pasid:16;
+ unsigned int reserved1:8;
+ unsigned int diq_enable:1;
+ unsigned int reserved2:7;
+ } bitfields2;
+ unsigned int ordinal2;
+ };
+
+ union {
+ struct {
+ unsigned int page_table_base:28;
+ unsigned int reserved3:4;
+ } bitfields3;
+ unsigned int ordinal3;
+ };
+
+ unsigned int sh_mem_bases;
+ unsigned int sh_mem_ape1_base;
+ unsigned int sh_mem_ape1_limit;
+ unsigned int sh_mem_config;
+ unsigned int gds_addr_lo;
+ unsigned int gds_addr_hi;
+
+ union {
+ struct {
+ unsigned int num_gws:6;
+ unsigned int reserved4:2;
+ unsigned int num_oac:4;
+ unsigned int reserved5:4;
+ unsigned int gds_size:6;
+ unsigned int reserved6:10;
+ } bitfields4;
+ unsigned int ordinal10;
+ };
+
+};
+#endif
+
+/*--------------------_MAP_QUEUES--------------------*/
+
+#ifndef _PM4__MAP_QUEUES_DEFINED
+#define _PM4__MAP_QUEUES_DEFINED
+enum _MAP_QUEUES_queue_sel_enum {
+ queue_sel___map_queues__map_to_specified_queue_slots = 0,
+ queue_sel___map_queues__map_to_hws_determined_queue_slots = 1,
+ queue_sel___map_queues__enable_process_queues = 2,
+ queue_sel___map_queues__reserved = 3 };
+
+enum _MAP_QUEUES_vidmem_enum {
+ vidmem___map_queues__uses_no_video_memory = 0,
+ vidmem___map_queues__uses_video_memory = 1 };
+
+enum _MAP_QUEUES_alloc_format_enum {
+ alloc_format___map_queues__one_per_pipe = 0,
+ alloc_format___map_queues__all_on_one_pipe = 1 };
+
+enum _MAP_QUEUES_engine_sel_enum {
+ engine_sel___map_queues__compute = 0,
+ engine_sel___map_queues__sdma0_queue = 2,
+ engine_sel___map_queues__sdma1_queue = 3 };
+
+
+typedef struct _PM4__MAP_QUEUES {
+ union {
+ PM4_TYPE_3_HEADER header;
+ unsigned int ordinal1;
+ };
+
+ union {
+ struct {
+ unsigned int reserved1:4;
+ enum _MAP_QUEUES_queue_sel_enum queue_sel:2;
+ unsigned int reserved2:2;
+ unsigned int vmid:4;
+ unsigned int reserved3:4;
+ enum _MAP_QUEUES_vidmem_enum vidmem:2;
+ unsigned int reserved4:6;
+ enum _MAP_QUEUES_alloc_format_enum alloc_format:2;
+ enum _MAP_QUEUES_engine_sel_enum engine_sel:3;
+ unsigned int num_queues:3;
+ } bitfields2;
+ unsigned int ordinal2;
+ };
+
+ struct {
+ union {
+ struct {
+ unsigned int reserved5:2;
+ unsigned int doorbell_offset:21;
+ unsigned int reserved6:3;
+ unsigned int queue:6;
+ } bitfields3;
+ unsigned int ordinal3;
+ };
+
+ unsigned int mqd_addr_lo;
+
+ unsigned int mqd_addr_hi;
+
+ unsigned int wptr_addr_lo;
+
+ unsigned int wptr_addr_hi;
+
+ } _map_queues_ordinals[1]; /* 1..N of these ordinal groups */
+
+} PM4_MAP_QUEUES, *PPM4_MAP_QUEUES;
+#endif
+
+/*--------------------_QUERY_STATUS--------------------*/
+
+#ifndef _PM4__QUERY_STATUS_DEFINED
+#define _PM4__QUERY_STATUS_DEFINED
+enum _QUERY_STATUS_interrupt_sel_enum {
+ interrupt_sel___query_status__completion_status = 0,
+ interrupt_sel___query_status__process_status = 1,
+ interrupt_sel___query_status__queue_status = 2,
+ interrupt_sel___query_status__reserved = 3 };
+
+enum _QUERY_STATUS_command_enum {
+ command___query_status__interrupt_only = 0,
+ command___query_status__fence_only_immediate = 1,
+ command___query_status__fence_only_after_write_ack = 2,
+ command___query_status__fence_wait_for_write_ack_send_interrupt = 3 };
+
+enum _QUERY_STATUS_engine_sel_enum {
+ engine_sel___query_status__compute = 0,
+ engine_sel___query_status__sdma0 = 2,
+ engine_sel___query_status__sdma1 = 3 };
+
+
+typedef struct _PM4__QUERY_STATUS {
+ union {
+ PM4_TYPE_3_HEADER header;
+ unsigned int ordinal1;
+ };
+
+ union {
+ struct {
+ unsigned int context_id:28;
+ enum _QUERY_STATUS_interrupt_sel_enum interrupt_sel:2;
+ enum _QUERY_STATUS_command_enum command:2;
+ } bitfields2;
+ unsigned int ordinal2;
+ };
+
+ union {
+ struct {
+ unsigned int pasid:16;
+ unsigned int reserved1:16;
+ } bitfields3;
+ struct {
+ unsigned int reserved2:2;
+ unsigned int doorbell_offset:21;
+ unsigned int reserved3:3;
+ enum _QUERY_STATUS_engine_sel_enum engine_sel:3;
+ unsigned int reserved4:3;
+ } bitfields4;
+ unsigned int ordinal3;
+ };
+
+ unsigned int addr_lo;
+
+ unsigned int addr_hi;
+
+ unsigned int data_lo;
+
+ unsigned int data_hi;
+
+} PM4_QUERY_STATUS, *PPM4_QUERY_STATUS;
+#endif
+
+/*
+ * --------------------UNMAP_QUEUES--------------------
+ */
+
+#ifndef _PM4__UNMAP_QUEUES_DEFINED
+#define _PM4__UNMAP_QUEUES_DEFINED
+enum _unmap_queues_action_enum {
+ action___unmap_queues__preempt_queues = 0,
+ action___unmap_queues__reset_queues = 1,
+ action___unmap_queues__disable_process_queues = 2,
+ action___unmap_queues__reserved = 3 };
+
+enum _unmap_queues_queue_sel_enum {
+ queue_sel___unmap_queues__perform_request_on_specified_queues = 0,
+ queue_sel___unmap_queues__perform_request_on_pasid_queues = 1,
+ queue_sel___unmap_queues__perform_request_on_all_active_queues = 2,
+ queue_sel___unmap_queues__reserved = 3 };
+
+enum _unmap_queues_engine_sel_enum {
+ engine_sel___unmap_queues__compute = 0,
+ engine_sel___unmap_queues__sdma0 = 2,
+ engine_sel___unmap_queues__sdma1 = 3 };
+
+
+struct pm4_unmap_queues {
+ union {
+ PM4_TYPE_3_HEADER header;
+ unsigned int ordinal1;
+ };
+
+ union {
+ struct {
+ _unmap_queues_action_enum action:2;
+ unsigned int reserved1:2;
+
+ _unmap_queues_queue_sel_enum queue_sel:2;
+ unsigned int reserved2:20;
+
+ _unmap_queues_engine_sel_enum engine_sel:3;
+ unsigned int num_queues:3;
+ } bitfields2;
+ unsigned int ordinal2;
+ };
+
+ union {
+ struct {
+ unsigned int pasid:16;
+ unsigned int reserved3:16;
+ } bitfields3;
+ struct {
+ unsigned int reserved4:2;
+ unsigned int doorbell_offset0:21;
+ unsigned int reserved5:9;
+ } bitfields4;
+ unsigned int ordinal3;
+ };
+
+ union {
+ struct {
+ unsigned int reserved6:2;
+ unsigned int doorbell_offset1:21;
+ unsigned int reserved7:9;
+ } bitfields5;
+ unsigned int ordinal4;
+ };
+
+ union {
+ struct {
+ unsigned int reserved8:2;
+ unsigned int doorbell_offset2:21;
+ unsigned int reserved9:9;
+ } bitfields6;
+ unsigned int ordinal5;
+ };
+
+ union {
+ struct {
+ unsigned int reserved10:2;
+ unsigned int doorbell_offset3:21;
+ unsigned int reserved11:9;
+ } bitfields7;
+ unsigned int ordinal6;
+ };
+
+};
+#endif
+
+/* --------------------_SET_SH_REG--------------------*/
+
+#ifndef _PM4__SET_SH_REG_DEFINED
+#define _PM4__SET_SH_REG_DEFINED
+
+typedef struct _PM4__SET_SH_REG {
+ union {
+ PM4_TYPE_3_HEADER header;
+ unsigned int ordinal1;
+ };
+
+ union {
+ struct {
+ unsigned int reg_offset:16;
+ unsigned int reserved1:8;
+ unsigned int vmid_shift:5;
+ unsigned int insert_vmid:1;
+ unsigned int reserved2:1;
+ unsigned int non_incr_addr:1;
+ } bitfields2;
+ unsigned int ordinal2;
+ };
+
+ unsigned int reg_data[1]; /* 1..N of these fields */
+
+} PM4_SET_SH_REG, *PPM4_SET_SH_REG;
+#endif
+
+/*--------------------_SET_CONFIG_REG--------------------*/
+
+#ifndef _PM4__SET_CONFIG_REG_DEFINED
+#define _PM4__SET_CONFIG_REG_DEFINED
+
+typedef struct _PM4__SET_CONFIG_REG {
+ union {
+ PM4_TYPE_3_HEADER header;
+ unsigned int ordinal1;
+ };
+
+ union {
+ struct {
+ unsigned int reg_offset:16;
+ unsigned int reserved1:8;
+ unsigned int vmid_shift:5;
+ unsigned int insert_vmid:1;
+ unsigned int reserved2:2;
+ } bitfields2;
+ unsigned int ordinal2;
+ };
+
+ unsigned int reg_data[1]; /* 1..N of these fields */
+
+} PM4_SET_CONFIG_REG, *PPM4_SET_CONFIG_REG;
+#endif
+#endif
diff --git a/drivers/gpu/hsa/radeon/kfd_pm4_opcodes.h b/drivers/gpu/hsa/radeon/kfd_pm4_opcodes.h
new file mode 100644
index 0000000..c04060c
--- /dev/null
+++ b/drivers/gpu/hsa/radeon/kfd_pm4_opcodes.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+
+#ifndef PM4_IT_OPCODES_H
+#define PM4_IT_OPCODES_H
+
+enum it_opcode_type {
+ IT_NOP = 0x10,
+ IT_SET_BASE = 0x11,
+ IT_CLEAR_STATE = 0x12,
+ IT_INDEX_BUFFER_SIZE = 0x13,
+ IT_DISPATCH_DIRECT = 0x15,
+ IT_DISPATCH_INDIRECT = 0x16,
+ IT_ATOMIC_GDS = 0x1D,
+ IT_OCCLUSION_QUERY = 0x1F,
+ IT_SET_PREDICATION = 0x20,
+ IT_REG_RMW = 0x21,
+ IT_COND_EXEC = 0x22,
+ IT_PRED_EXEC = 0x23,
+ IT_DRAW_INDIRECT = 0x24,
+ IT_DRAW_INDEX_INDIRECT = 0x25,
+ IT_INDEX_BASE = 0x26,
+ IT_DRAW_INDEX_2 = 0x27,
+ IT_CONTEXT_CONTROL = 0x28,
+ IT_INDEX_TYPE = 0x2A,
+ IT_DRAW_INDIRECT_MULTI = 0x2C,
+ IT_DRAW_INDEX_AUTO = 0x2D,
+ IT_NUM_INSTANCES = 0x2F,
+ IT_DRAW_INDEX_MULTI_AUTO = 0x30,
+ IT_INDIRECT_BUFFER_CNST = 0x33,
+ IT_STRMOUT_BUFFER_UPDATE = 0x34,
+ IT_DRAW_INDEX_OFFSET_2 = 0x35,
+ IT_DRAW_PREAMBLE = 0x36,
+ IT_WRITE_DATA = 0x37,
+ IT_DRAW_INDEX_INDIRECT_MULTI = 0x38,
+ IT_MEM_SEMAPHORE = 0x39,
+ IT_COPY_DW = 0x3B,
+ IT_WAIT_REG_MEM = 0x3C,
+ IT_INDIRECT_BUFFER = 0x3F,
+ IT_COPY_DATA = 0x40,
+ IT_PFP_SYNC_ME = 0x42,
+ IT_SURFACE_SYNC = 0x43,
+ IT_COND_WRITE = 0x45,
+ IT_EVENT_WRITE = 0x46,
+ IT_EVENT_WRITE_EOP = 0x47,
+ IT_EVENT_WRITE_EOS = 0x48,
+ IT_RELEASE_MEM = 0x49,
+ IT_PREAMBLE_CNTL = 0x4A,
+ IT_DMA_DATA = 0x50,
+ IT_ACQUIRE_MEM = 0x58,
+ IT_REWIND = 0x59,
+ IT_LOAD_UCONFIG_REG = 0x5E,
+ IT_LOAD_SH_REG = 0x5F,
+ IT_LOAD_CONFIG_REG = 0x60,
+ IT_LOAD_CONTEXT_REG = 0x61,
+ IT_SET_CONFIG_REG = 0x68,
+ IT_SET_CONTEXT_REG = 0x69,
+ IT_SET_CONTEXT_REG_INDIRECT = 0x73,
+ IT_SET_SH_REG = 0x76,
+ IT_SET_SH_REG_OFFSET = 0x77,
+ IT_SET_QUEUE_REG = 0x78,
+ IT_SET_UCONFIG_REG = 0x79,
+ IT_SCRATCH_RAM_WRITE = 0x7D,
+ IT_SCRATCH_RAM_READ = 0x7E,
+ IT_LOAD_CONST_RAM = 0x80,
+ IT_WRITE_CONST_RAM = 0x81,
+ IT_DUMP_CONST_RAM = 0x83,
+ IT_INCREMENT_CE_COUNTER = 0x84,
+ IT_INCREMENT_DE_COUNTER = 0x85,
+ IT_WAIT_ON_CE_COUNTER = 0x86,
+ IT_WAIT_ON_DE_COUNTER_DIFF = 0x88,
+ IT_SWITCH_BUFFER = 0x8B,
+ IT_SET_RESOURCES = 0xA0,
+ IT_MAP_PROCESS = 0xA1,
+ IT_MAP_QUEUES = 0xA2,
+ IT_UNMAP_QUEUES = 0xA3,
+ IT_QUERY_STATUS = 0xA4,
+ IT_RUN_LIST = 0xA5,
+};
+
+#define PM4_TYPE_0 0
+#define PM4_TYPE_2 2
+#define PM4_TYPE_3 3
+
+#endif /* PM4_IT_OPCODES_H */
+
diff --git a/drivers/gpu/hsa/radeon/kfd_priv.h b/drivers/gpu/hsa/radeon/kfd_priv.h
index cc60b48..3a5cecf 100644
--- a/drivers/gpu/hsa/radeon/kfd_priv.h
+++ b/drivers/gpu/hsa/radeon/kfd_priv.h
@@ -55,6 +55,15 @@ struct kfd_scheduler_class;
#define KFD_MMAP_WPTR_START KFD_MMAP_RPTR_END
#define KFD_MMAP_WPTR_END (((1ULL << 32)*4) >> PAGE_SHIFT)
+/*
+ * When working with cp scheduler we should assign the HIQ manually or via the radeon driver
+ * to a fixed hqd slot, here are the fixed HIQ hqd slot definitions for Kaveri.
+ * In Kaveri only the first ME queues participates in the cp scheduling taking that in mind
+ * we set the HIQ slot in the second ME.
+ */
+#define KFD_CIK_HIQ_PIPE 4
+#define KFD_CIK_HIQ_QUEUE 0
+
/* GPU ID hash width in bits */
#define KFD_GPU_ID_HASH_WIDTH 16
@@ -68,6 +77,11 @@ typedef unsigned int pasid_t;
/* Type that represents a HW doorbell slot. */
typedef u32 doorbell_t;
+enum cache_policy {
+ cache_policy_coherent,
+ cache_policy_noncoherent
+};
+
struct kfd_device_info {
const struct kfd_scheduler_class *scheduler_class;
unsigned int max_pasid_bits;
@@ -109,6 +123,9 @@ struct kfd_dev {
atomic_t interrupt_ring_wptr;
struct work_struct interrupt_work;
spinlock_t interrupt_lock;
+
+ /* QCM Device instance */
+ struct device_queue_manager *dqm;
};
/* KGD2KFD callbacks */
@@ -357,4 +374,21 @@ void print_queue_properties(struct queue_properties *q);
void print_queue(struct queue *q);
struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, struct kfd_dev *dev);
+
+/* Packet Manager */
+
+#define KFD_HIQ_TIMEOUT (500)
+
+#define KFD_FENCE_COMPLETED (100)
+#define KFD_FENCE_INIT (10)
+#define KFD_UNMAP_LATENCY (15)
+
+struct packet_manager {
+ struct device_queue_manager *dqm;
+ struct kernel_queue *priv_queue;
+ struct mutex lock;
+ bool allocated;
+ kfd_mem_obj ib_buffer_obj;
+};
+
#endif
diff --git a/drivers/gpu/hsa/radeon/kfd_scheduler.h b/drivers/gpu/hsa/radeon/kfd_scheduler.h
index 9dc2994..d2e74b0 100644
--- a/drivers/gpu/hsa/radeon/kfd_scheduler.h
+++ b/drivers/gpu/hsa/radeon/kfd_scheduler.h
@@ -31,11 +31,6 @@ struct kfd_scheduler;
struct kfd_scheduler_process;
struct kfd_scheduler_queue;
-enum cache_policy {
- cache_policy_coherent,
- cache_policy_noncoherent
-};
-
struct kfd_scheduler_class {
const char *name;
--
1.9.1
From: Ben Goz <[email protected]>
This patch adds a new parameter to the KFD module. This parameter enables the
user to select the scheduling policy of the CP. The choices are:
* CP Scheduling with support for over-subscription
* CP Scheduling without support for over-subscription
* Without CP Scheduling
Signed-off-by: Ben Goz <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/hsa/radeon/kfd_module.c | 5 +++
drivers/gpu/hsa/radeon/kfd_priv.h | 65 +++++++++++++++++++++++++++++++++++++
2 files changed, 70 insertions(+)
diff --git a/drivers/gpu/hsa/radeon/kfd_module.c b/drivers/gpu/hsa/radeon/kfd_module.c
index a03743a..e8bb67c 100644
--- a/drivers/gpu/hsa/radeon/kfd_module.c
+++ b/drivers/gpu/hsa/radeon/kfd_module.c
@@ -23,6 +23,7 @@
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/notifier.h>
+#include <linux/moduleparam.h>
#include "kfd_priv.h"
@@ -43,6 +44,10 @@ static const struct kgd2kfd_calls kgd2kfd = {
.resume = kgd2kfd_resume,
};
+int sched_policy = KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION;
+module_param(sched_policy, int, S_IRUSR | S_IWUSR);
+MODULE_PARM_DESC(sched_policy, "Kernel comline parameter define the kfd scheduling policy");
+
bool kgd2kfd_init(unsigned interface_version,
const struct kfd2kgd_calls *f2g,
const struct kgd2kfd_calls **g2f)
diff --git a/drivers/gpu/hsa/radeon/kfd_priv.h b/drivers/gpu/hsa/radeon/kfd_priv.h
index 3a5cecf..b3889aa 100644
--- a/drivers/gpu/hsa/radeon/kfd_priv.h
+++ b/drivers/gpu/hsa/radeon/kfd_priv.h
@@ -70,6 +70,15 @@ struct kfd_scheduler_class;
/* Macro for allocating structures */
#define kfd_alloc_struct(ptr_to_struct) ((typeof(ptr_to_struct)) kzalloc(sizeof(*ptr_to_struct), GFP_KERNEL))
+/* Kernel module parameter to specify the scheduling policy */
+extern int sched_policy;
+
+enum kfd_sched_policy {
+ KFD_SCHED_POLICY_HWS = 0,
+ KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION,
+ KFD_SCHED_POLICY_NO_HWS
+};
+
/* Large enough to hold the maximum usable pasid + 1.
** It must also be able to store the number of doorbells reported by a KFD device. */
typedef unsigned int pasid_t;
@@ -243,6 +252,51 @@ enum KFD_MQD_TYPE {
KFD_MQD_TYPE_MAX
};
+struct scheduling_resources {
+ unsigned int vmid_mask;
+ enum kfd_queue_type type;
+ uint64_t queue_mask;
+ uint64_t gws_mask;
+ uint32_t oac_mask;
+ uint32_t gds_heap_base;
+ uint32_t gds_heap_size;
+};
+
+struct process_queue_manager {
+ /* data */
+ struct kfd_process *process;
+ unsigned int num_concurrent_processes;
+ struct list_head queues;
+ unsigned long *queue_slot_bitmap;
+};
+
+struct qcm_process_device {
+ /* The Device Queue Manager that owns this data */
+ struct device_queue_manager *dqm;
+ struct process_queue_manager *pqm;
+ /* Device Queue Manager lock */
+ struct mutex *lock;
+ /* Queues list */
+ struct list_head queues_list;
+ struct list_head priv_queue_list;
+
+ unsigned int queue_count;
+ unsigned int vmid;
+ bool is_debug;
+ /*
+ * All the memory management data should be here too
+ */
+ uint64_t gds_context_area;
+ uint32_t sh_mem_config;
+ uint32_t sh_mem_bases;
+ uint32_t sh_mem_ape1_base;
+ uint32_t sh_mem_ape1_limit;
+ uint32_t page_table_base;
+ uint32_t gds_size;
+ uint32_t num_gws;
+ uint32_t num_oac;
+};
+
/* Data that is per-process-per device. */
struct kfd_process_device {
/* List of all per-device data for a process. Starts from kfd_process.per_device_data. */
@@ -374,6 +428,8 @@ void print_queue_properties(struct queue_properties *q);
void print_queue(struct queue *q);
struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, struct kfd_dev *dev);
+struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, enum kfd_queue_type type);
+void kernel_queue_uninit(struct kernel_queue *kq);
/* Packet Manager */
@@ -391,4 +447,13 @@ struct packet_manager {
kfd_mem_obj ib_buffer_obj;
};
+int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm);
+void pm_uninit(struct packet_manager *pm);
+int pm_send_set_resources(struct packet_manager *pm, struct scheduling_resources *res);
+int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues);
+int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address, uint32_t fence_value);
+int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
+ enum kfd_preempt_type_filter mode, uint32_t filter_param, bool reset);
+void pm_release_ib(struct packet_manager *pm);
+
#endif
--
1.9.1
From: Ben Goz <[email protected]>
The mqd_manager module handles MQD data structures. MQD stands for Memory Queue
Descriptor, which is used by the H/W to keep the HSA queue state in memory.
Signed-off-by: Ben Goz <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/hsa/radeon/Makefile | 2 +-
drivers/gpu/hsa/radeon/cik_mqds.h | 251 ++++++++++++++
drivers/gpu/hsa/radeon/cik_regs.h | 1 +
drivers/gpu/hsa/radeon/kfd_mqd_manager.c | 453 ++++++++++++++++++++++++++
drivers/gpu/hsa/radeon/kfd_mqd_manager.h | 48 +++
drivers/gpu/hsa/radeon/kfd_priv.h | 26 ++
drivers/gpu/hsa/radeon/kfd_sched_cik_static.c | 10 -
drivers/gpu/hsa/radeon/kfd_vidmem.c | 36 ++
8 files changed, 816 insertions(+), 11 deletions(-)
create mode 100644 drivers/gpu/hsa/radeon/cik_mqds.h
create mode 100644 drivers/gpu/hsa/radeon/kfd_mqd_manager.c
create mode 100644 drivers/gpu/hsa/radeon/kfd_mqd_manager.h
diff --git a/drivers/gpu/hsa/radeon/Makefile b/drivers/gpu/hsa/radeon/Makefile
index 18e1639..c87b518 100644
--- a/drivers/gpu/hsa/radeon/Makefile
+++ b/drivers/gpu/hsa/radeon/Makefile
@@ -6,6 +6,6 @@ radeon_kfd-y := kfd_module.o kfd_device.o kfd_chardev.o \
kfd_pasid.o kfd_topology.o kfd_process.o \
kfd_doorbell.o kfd_sched_cik_static.o kfd_registers.o \
kfd_vidmem.o kfd_interrupt.o kfd_aperture.o \
- kfd_queue.o kfd_hw_pointer_store.o
+ kfd_queue.o kfd_hw_pointer_store.o kfd_mqd_manager.o
obj-$(CONFIG_HSA_RADEON) += radeon_kfd.o
diff --git a/drivers/gpu/hsa/radeon/cik_mqds.h b/drivers/gpu/hsa/radeon/cik_mqds.h
new file mode 100644
index 0000000..58945c8
--- /dev/null
+++ b/drivers/gpu/hsa/radeon/cik_mqds.h
@@ -0,0 +1,251 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Ben Goz
+ */
+
+#ifndef CIK_MQDS_H_
+#define CIK_MQDS_H_
+
+#pragma pack(push, 4)
+
+struct cik_hpd_registers {
+ u32 cp_hpd_roq_offsets;
+ u32 cp_hpd_eop_base_addr;
+ u32 cp_hpd_eop_base_addr_hi;
+ u32 cp_hpd_eop_vmid;
+ u32 cp_hpd_eop_control;
+};
+
+struct cik_hqd_registers {
+ u32 cp_mqd_base_addr;
+ u32 cp_mqd_base_addr_hi;
+ u32 cp_hqd_active;
+ u32 cp_hqd_vmid;
+ u32 cp_hqd_persistent_state;
+ u32 cp_hqd_pipe_priority;
+ u32 cp_hqd_queue_priority;
+ u32 cp_hqd_quantum;
+ u32 cp_hqd_pq_base;
+ u32 cp_hqd_pq_base_hi;
+ u32 cp_hqd_pq_rptr;
+ u32 cp_hqd_pq_rptr_report_addr;
+ u32 cp_hqd_pq_rptr_report_addr_hi;
+ u32 cp_hqd_pq_wptr_poll_addr;
+ u32 cp_hqd_pq_wptr_poll_addr_hi;
+ u32 cp_hqd_pq_doorbell_control;
+ u32 cp_hqd_pq_wptr;
+ u32 cp_hqd_pq_control;
+ u32 cp_hqd_ib_base_addr;
+ u32 cp_hqd_ib_base_addr_hi;
+ u32 cp_hqd_ib_rptr;
+ u32 cp_hqd_ib_control;
+ u32 cp_hqd_iq_timer;
+ u32 cp_hqd_iq_rptr;
+ u32 cp_hqd_dequeue_request;
+ u32 cp_hqd_dma_offload;
+ u32 cp_hqd_sema_cmd;
+ u32 cp_hqd_msg_type;
+ u32 cp_hqd_atomic0_preop_lo;
+ u32 cp_hqd_atomic0_preop_hi;
+ u32 cp_hqd_atomic1_preop_lo;
+ u32 cp_hqd_atomic1_preop_hi;
+ u32 cp_hqd_hq_scheduler0;
+ u32 cp_hqd_hq_scheduler1;
+ u32 cp_mqd_control;
+};
+
+struct cik_mqd {
+ u32 header;
+ u32 dispatch_initiator;
+ u32 dimensions[3];
+ u32 start_idx[3];
+ u32 num_threads[3];
+ u32 pipeline_stat_enable;
+ u32 perf_counter_enable;
+ u32 pgm[2];
+ u32 tba[2];
+ u32 tma[2];
+ u32 pgm_rsrc[2];
+ u32 vmid;
+ u32 resource_limits;
+ u32 static_thread_mgmt01[2];
+ u32 tmp_ring_size;
+ u32 static_thread_mgmt23[2];
+ u32 restart[3];
+ u32 thread_trace_enable;
+ u32 reserved1;
+ u32 user_data[16];
+ u32 vgtcs_invoke_count[2];
+ struct cik_hqd_registers queue_state;
+ u32 dequeue_cntr;
+ u32 interrupt_queue[64];
+};
+
+/* This structure represents mqd used for cp scheduling queue
+ * taken from Gfx72_cp_program_spec.pdf
+ */
+struct cik_compute_mqd {
+ u32 header;
+ u32 compute_dispatch_initiator;
+ u32 compute_dim_x;
+ u32 compute_dim_y;
+ u32 compute_dim_z;
+ u32 compute_start_x;
+ u32 compute_start_y;
+ u32 compute_start_z;
+ u32 compute_num_thread_x;
+ u32 compute_num_thread_y;
+ u32 compute_num_thread_z;
+ u32 compute_pipelinestat_enable;
+ u32 compute_perfcount_enable;
+ u32 compute_pgm_lo;
+ u32 compute_pgm_hi;
+ u32 compute_tba_lo;
+ u32 compute_tba_hi;
+ u32 compute_tma_lo;
+ u32 compute_tma_hi;
+ u32 compute_pgm_rsrc1;
+ u32 compute_pgm_rsrc2;
+ u32 compute_vmid;
+ u32 compute_resource_limits;
+ u32 compute_static_thread_mgmt_se0;
+ u32 compute_static_thread_mgmt_se1;
+ u32 compute_tmpring_size;
+ u32 compute_static_thread_mgmt_se2;
+ u32 compute_static_thread_mgmt_se3;
+ u32 compute_restart_x;
+ u32 compute_restart_y;
+ u32 compute_restart_z;
+ u32 compute_thread_trace_enable;
+ u32 compute_misc_reserved;
+ u32 compute_user_data[16];
+ u32 vgt_csinvoc_count_lo;
+ u32 vgt_csinvoc_count_hi;
+ u32 cp_mqd_base_addr51;
+ u32 cp_mqd_base_addr_hi;
+ u32 cp_hqd_active;
+ u32 cp_hqd_vmid;
+ u32 cp_hqd_persistent_state;
+ u32 cp_hqd_pipe_priority;
+ u32 cp_hqd_queue_priority;
+ u32 cp_hqd_quantum;
+ u32 cp_hqd_pq_base;
+ u32 cp_hqd_pq_base_hi;
+ u32 cp_hqd_pq_rptr;
+ u32 cp_hqd_pq_rptr_report_addr;
+ u32 cp_hqd_pq_rptr_report_addr_hi;
+ u32 cp_hqd_pq_wptr_poll_addr;
+ u32 cp_hqd_pq_wptr_poll_addr_hi;
+ u32 cp_hqd_pq_doorbell_control;
+ u32 cp_hqd_pq_wptr;
+ u32 cp_hqd_pq_control;
+ u32 cp_hqd_ib_base_addr;
+ u32 cp_hqd_ib_base_addr_hi;
+ u32 cp_hqd_ib_rptr;
+ u32 cp_hqd_ib_control;
+ u32 cp_hqd_iq_timer;
+ u32 cp_hqd_iq_rptr;
+ u32 cp_hqd_dequeue_request;
+ u32 cp_hqd_dma_offload;
+ u32 cp_hqd_sema_cmd;
+ u32 cp_hqd_msg_type;
+ u32 cp_hqd_atomic0_preop_lo;
+ u32 cp_hqd_atomic0_preop_hi;
+ u32 cp_hqd_atomic1_preop_lo;
+ u32 cp_hqd_atomic1_preop_hi;
+ u32 cp_hqd_hq_scheduler0;
+ u32 cp_hqd_hq_scheduler1;
+ u32 cp_mqd_control;
+ u32 reserved1[10];
+ u32 cp_mqd_query_time_lo;
+ u32 cp_mqd_query_time_hi;
+ u32 reserved2[4];
+ u32 cp_mqd_connect_start_time_lo;
+ u32 cp_mqd_connect_start_time_hi;
+ u32 cp_mqd_connect_end_time_lo;
+ u32 cp_mqd_connect_end_time_hi;
+ u32 cp_mqd_connect_end_wf_count;
+ u32 cp_mqd_connect_end_pq_rptr;
+ u32 cp_mqd_connect_end_pq_wptr;
+ u32 cp_mqd_connect_end_ib_rptr;
+ u32 reserved3[18];
+};
+
+/* This structure represents all *IQs
+ * Taken from Gfx73_CPC_Eng_Init_Prog.pdf
+ */
+struct cik_interface_mqd {
+ u32 reserved1[128];
+ u32 cp_mqd_base_addr;
+ u32 cp_mqd_base_addr_hi;
+ u32 cp_hqd_active;
+ u32 cp_hqd_vmid;
+ u32 cp_hqd_persistent_state;
+ u32 cp_hqd_pipe_priority;
+ u32 cp_hqd_queue_priority;
+ u32 cp_hqd_quantum;
+ u32 cp_hqd_pq_base;
+ u32 cp_hqd_pq_base_hi;
+ u32 cp_hqd_pq_rptr;
+ u32 cp_hqd_pq_rptr_report_addr;
+ u32 cp_hqd_pq_rptr_report_addr_hi;
+ u32 cp_hqd_pq_wptr_poll_addr;
+ u32 cp_hqd_pq_wptr_poll_addr_hi;
+ u32 cp_hqd_pq_doorbell_control;
+ u32 cp_hqd_pq_wptr;
+ u32 cp_hqd_pq_control;
+ u32 cp_hqd_ib_base_addr;
+ u32 cp_hqd_ib_base_addr_hi;
+ u32 cp_hqd_ib_rptr;
+ u32 cp_hqd_ib_control;
+ u32 cp_hqd_iq_timer;
+ u32 cp_hqd_iq_rptr;
+ u32 cp_hqd_dequeue_request;
+ u32 cp_hqd_dma_offload;
+ u32 cp_hqd_sema_cmd;
+ u32 cp_hqd_msg_type;
+ u32 cp_hqd_atomic0_preop_lo;
+ u32 cp_hqd_atomic0_preop_hi;
+ u32 cp_hqd_atomic1_preop_lo;
+ u32 cp_hqd_atomic1_preop_hi;
+ u32 cp_hqd_hq_status0;
+ u32 cp_hqd_hq_control0;
+ u32 cp_mqd_control;
+ u32 reserved2[3];
+ u32 cp_hqd_hq_status1;
+ u32 cp_hqd_hq_control1;
+ u32 reserved3[16];
+ u32 cp_hqd_hq_status2;
+ u32 cp_hqd_hq_control2;
+ u32 cp_hqd_hq_status3;
+ u32 cp_hqd_hq_control3;
+ u32 reserved4[2];
+ u32 cp_mqd_query_time_lo;
+ u32 cp_mqd_query_time_hi;
+ u32 reserved5[48];
+ u32 cp_mqd_skip_process[16];
+};
+
+#pragma pack(pop)
+
+
+#endif /* CIK_MQDS_H_ */
diff --git a/drivers/gpu/hsa/radeon/cik_regs.h b/drivers/gpu/hsa/radeon/cik_regs.h
index 93f7b34..fa5ec01 100644
--- a/drivers/gpu/hsa/radeon/cik_regs.h
+++ b/drivers/gpu/hsa/radeon/cik_regs.h
@@ -168,6 +168,7 @@
#define CP_HQD_DEQUEUE_REQUEST 0xC974
#define DEQUEUE_REQUEST_DRAIN 1
+#define DEQUEUE_REQUEST_RESET 2
#define DEQUEUE_INT (1U << 8)
#define CP_HQD_SEMA_CMD 0xC97Cu
diff --git a/drivers/gpu/hsa/radeon/kfd_mqd_manager.c b/drivers/gpu/hsa/radeon/kfd_mqd_manager.c
new file mode 100644
index 0000000..14b248f
--- /dev/null
+++ b/drivers/gpu/hsa/radeon/kfd_mqd_manager.c
@@ -0,0 +1,453 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Ben Goz
+ */
+
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include "kfd_priv.h"
+#include "kfd_mqd_manager.h"
+#include "cik_mqds.h"
+#include "cik_regs.h"
+
+inline uint32_t lower_32(uint64_t x)
+{
+ return (uint32_t)x;
+}
+
+inline uint32_t upper_32(uint64_t x)
+{
+ return (uint32_t)(x >> 32);
+}
+
+inline void busy_wait(unsigned long ms)
+{
+ while (time_before(jiffies, ms))
+ cpu_relax();
+}
+
+static inline struct cik_mqd *get_mqd(void *mqd)
+{
+ return (struct cik_mqd *)mqd;
+}
+
+static int init_mqd(struct mqd_manager *mm, void **mqd, kfd_mem_obj *mqd_mem_obj,
+ uint64_t *gart_addr, struct queue_properties *q)
+{
+ uint64_t addr;
+ struct cik_mqd *m;
+ int retval;
+
+ BUG_ON(!mm || !q || !mqd);
+
+ pr_debug("kfd: In func %s\n", __func__);
+
+ retval = radeon_kfd_vidmem_alloc_map(
+ mm->dev,
+ mqd_mem_obj,
+ (void **)&m,
+ &addr,
+ ALIGN(sizeof(struct cik_mqd), 256));
+
+ if (retval != 0)
+ return -ENOMEM;
+
+ memset(m, 0, sizeof(struct cik_mqd));
+
+ m->header = 0xC0310800;
+ m->pipeline_stat_enable = 1;
+ m->static_thread_mgmt01[0] = 0xFFFFFFFF;
+ m->static_thread_mgmt01[1] = 0xFFFFFFFF;
+ m->static_thread_mgmt23[0] = 0xFFFFFFFF;
+ m->static_thread_mgmt23[1] = 0xFFFFFFFF;
+
+ m->queue_state.cp_hqd_persistent_state = DEFAULT_CP_HQD_PERSISTENT_STATE;
+
+ m->queue_state.cp_mqd_control = MQD_CONTROL_PRIV_STATE_EN;
+ m->queue_state.cp_mqd_base_addr = lower_32(addr);
+ m->queue_state.cp_mqd_base_addr_hi = upper_32(addr);
+
+ m->queue_state.cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE | IB_ATC_EN;
+ /* Although WinKFD writes this, I suspect it should not be necessary. */
+ m->queue_state.cp_hqd_ib_control = IB_ATC_EN | DEFAULT_MIN_IB_AVAIL_SIZE;
+
+ m->queue_state.cp_hqd_quantum = QUANTUM_EN | QUANTUM_SCALE_1MS | QUANTUM_DURATION(10);
+
+ m->queue_state.cp_hqd_pipe_priority = 1;
+ m->queue_state.cp_hqd_queue_priority = 15;
+
+ *mqd = m;
+ if (gart_addr != NULL)
+ *gart_addr = addr;
+ retval = mm->update_mqd(mm, m, q);
+
+ return retval;
+}
+
+static void uninit_mqd(struct mqd_manager *mm, void *mqd, kfd_mem_obj mqd_mem_obj)
+{
+ BUG_ON(!mm || !mqd);
+ radeon_kfd_vidmem_free_unmap(mm->dev, mqd_mem_obj);
+}
+
+static int load_mqd(struct mqd_manager *mm, void *mqd)
+{
+ struct cik_mqd *m;
+
+ BUG_ON(!mm || !mqd);
+
+ m = get_mqd(mqd);
+
+ WRITE_REG(mm->dev, CP_MQD_BASE_ADDR, m->queue_state.cp_mqd_base_addr);
+ WRITE_REG(mm->dev, CP_MQD_BASE_ADDR_HI, m->queue_state.cp_mqd_base_addr_hi);
+ WRITE_REG(mm->dev, CP_MQD_CONTROL, m->queue_state.cp_mqd_control);
+
+ WRITE_REG(mm->dev, CP_HQD_PQ_BASE, m->queue_state.cp_hqd_pq_base);
+ WRITE_REG(mm->dev, CP_HQD_PQ_BASE_HI, m->queue_state.cp_hqd_pq_base_hi);
+ WRITE_REG(mm->dev, CP_HQD_PQ_CONTROL, m->queue_state.cp_hqd_pq_control);
+
+ WRITE_REG(mm->dev, CP_HQD_IB_CONTROL, m->queue_state.cp_hqd_ib_control);
+ WRITE_REG(mm->dev, CP_HQD_IB_BASE_ADDR, m->queue_state.cp_hqd_ib_base_addr);
+ WRITE_REG(mm->dev, CP_HQD_IB_BASE_ADDR_HI, m->queue_state.cp_hqd_ib_base_addr_hi);
+
+ WRITE_REG(mm->dev, CP_HQD_IB_RPTR, m->queue_state.cp_hqd_ib_rptr);
+
+ WRITE_REG(mm->dev, CP_HQD_PERSISTENT_STATE, m->queue_state.cp_hqd_persistent_state);
+ WRITE_REG(mm->dev, CP_HQD_SEMA_CMD, m->queue_state.cp_hqd_sema_cmd);
+ WRITE_REG(mm->dev, CP_HQD_MSG_TYPE, m->queue_state.cp_hqd_msg_type);
+
+ WRITE_REG(mm->dev, CP_HQD_ATOMIC0_PREOP_LO, m->queue_state.cp_hqd_atomic0_preop_lo);
+ WRITE_REG(mm->dev, CP_HQD_ATOMIC0_PREOP_HI, m->queue_state.cp_hqd_atomic0_preop_hi);
+ WRITE_REG(mm->dev, CP_HQD_ATOMIC1_PREOP_LO, m->queue_state.cp_hqd_atomic1_preop_lo);
+ WRITE_REG(mm->dev, CP_HQD_ATOMIC1_PREOP_HI, m->queue_state.cp_hqd_atomic1_preop_hi);
+
+ WRITE_REG(mm->dev, CP_HQD_PQ_RPTR_REPORT_ADDR, m->queue_state.cp_hqd_pq_rptr_report_addr);
+ WRITE_REG(mm->dev, CP_HQD_PQ_RPTR_REPORT_ADDR_HI, m->queue_state.cp_hqd_pq_rptr_report_addr_hi);
+ WRITE_REG(mm->dev, CP_HQD_PQ_RPTR, m->queue_state.cp_hqd_pq_rptr);
+
+ WRITE_REG(mm->dev, CP_HQD_PQ_WPTR_POLL_ADDR, m->queue_state.cp_hqd_pq_wptr_poll_addr);
+ WRITE_REG(mm->dev, CP_HQD_PQ_WPTR_POLL_ADDR_HI, m->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
+
+ WRITE_REG(mm->dev, CP_HQD_PQ_DOORBELL_CONTROL, m->queue_state.cp_hqd_pq_doorbell_control);
+
+ WRITE_REG(mm->dev, CP_HQD_VMID, m->queue_state.cp_hqd_vmid);
+
+ WRITE_REG(mm->dev, CP_HQD_QUANTUM, m->queue_state.cp_hqd_quantum);
+
+ WRITE_REG(mm->dev, CP_HQD_PIPE_PRIORITY, m->queue_state.cp_hqd_pipe_priority);
+ WRITE_REG(mm->dev, CP_HQD_QUEUE_PRIORITY, m->queue_state.cp_hqd_queue_priority);
+
+ WRITE_REG(mm->dev, CP_HQD_HQ_SCHEDULER0, m->queue_state.cp_hqd_hq_scheduler0);
+ WRITE_REG(mm->dev, CP_HQD_HQ_SCHEDULER1, m->queue_state.cp_hqd_hq_scheduler1);
+
+ WRITE_REG(mm->dev, CP_HQD_ACTIVE, m->queue_state.cp_hqd_active);
+
+ return 0;
+}
+
+static int update_mqd(struct mqd_manager *mm, void *mqd, struct queue_properties *q)
+{
+ struct cik_mqd *m;
+
+ BUG_ON(!mm || !q || !mqd);
+
+ pr_debug("kfd: In func %s\n", __func__);
+
+ m = get_mqd(mqd);
+ m->queue_state.cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE | DEFAULT_MIN_AVAIL_SIZE | PQ_ATC_EN;
+ /* calculating queue size which is log base 2 of actual queue size -1 dwords and another -1 for ffs */
+ m->queue_state.cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1;
+ m->queue_state.cp_hqd_pq_base = lower_32((uint64_t)q->queue_address >> 8);
+ m->queue_state.cp_hqd_pq_base_hi = upper_32((uint64_t)q->queue_address >> 8);
+ m->queue_state.cp_hqd_pq_rptr_report_addr = lower_32((uint64_t)q->read_ptr);
+ m->queue_state.cp_hqd_pq_rptr_report_addr_hi = upper_32((uint64_t)q->read_ptr);
+ m->queue_state.cp_hqd_pq_doorbell_control = DOORBELL_EN | DOORBELL_OFFSET(q->doorbell_off);
+
+ m->queue_state.cp_hqd_vmid = q->vmid;
+
+ m->queue_state.cp_hqd_active = 0;
+ q->is_active = false;
+ if (q->queue_size > 0 &&
+ q->queue_address != 0 &&
+ q->queue_percent > 0) {
+ m->queue_state.cp_hqd_active = 1;
+ q->is_active = true;
+ }
+
+ return 0;
+}
+
+static int destroy_mqd(struct mqd_manager *mm, void *mqd, enum kfd_preempt_type type, unsigned int timeout)
+{
+ int status;
+ uint32_t temp;
+ bool sync;
+
+ status = 0;
+ BUG_ON(!mm || !mqd);
+
+ pr_debug("kfd: In func %s\n", __func__);
+
+ WRITE_REG(mm->dev, CP_HQD_PQ_DOORBELL_CONTROL, 0);
+
+ if (type == KFD_PREEMPT_TYPE_WAVEFRONT_RESET)
+ WRITE_REG(mm->dev, CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQUEST_RESET);
+ else
+ WRITE_REG(mm->dev, CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQUEST_DRAIN);
+
+ sync = (timeout > 0);
+ temp = timeout;
+
+ while (READ_REG(mm->dev, CP_HQD_ACTIVE) != 0) {
+ if (sync && timeout <= 0) {
+ status = -EBUSY;
+ pr_err("kfd: cp queue preemption time out (%dms)\n", temp);
+ break;
+ }
+ busy_wait(1000);
+ if (sync)
+ timeout--;
+ }
+
+ return status;
+}
+
+static inline uint32_t make_srbm_gfx_cntl_mpqv(unsigned int me,
+ unsigned int pipe,
+ unsigned int queue,
+ unsigned int vmid)
+{
+ return QUEUEID(queue) | VMID(vmid) | MEID(me) | PIPEID(pipe);
+}
+
+static inline uint32_t get_first_pipe_offset(struct mqd_manager *mm)
+{
+ BUG_ON(!mm);
+ return mm->dev->shared_resources.first_compute_pipe;
+}
+
+static void acquire_hqd(struct mqd_manager *mm, unsigned int pipe, unsigned int queue, unsigned int vmid)
+{
+ unsigned int mec, pipe_in_mec;
+
+ BUG_ON(!mm);
+
+ radeon_kfd_lock_srbm_index(mm->dev);
+
+ pipe_in_mec = (pipe + get_first_pipe_offset(mm)) % 4;
+ mec = (pipe + get_first_pipe_offset(mm)) / 4;
+ mec++;
+
+ pr_debug("kfd: acquire mec: %d pipe: %d queue: %d vmid: %d\n",
+ mec,
+ pipe_in_mec,
+ queue,
+ vmid);
+
+ WRITE_REG(mm->dev, SRBM_GFX_CNTL, make_srbm_gfx_cntl_mpqv(mec,
+ pipe_in_mec, queue, vmid));
+}
+
+static void release_hqd(struct mqd_manager *mm)
+{
+ BUG_ON(!mm);
+ /* Be nice to KGD, reset indexed CP registers to the GFX pipe. */
+ WRITE_REG(mm->dev, SRBM_GFX_CNTL, 0);
+ radeon_kfd_unlock_srbm_index(mm->dev);
+}
+
+bool is_occupied(struct mqd_manager *mm, void *mqd, struct queue_properties *q)
+{
+ int act;
+ struct cik_mqd *m;
+ uint32_t low, high;
+
+ BUG_ON(!mm || !mqd || !q);
+
+ m = get_mqd(mqd);
+
+ act = READ_REG(mm->dev, CP_HQD_ACTIVE);
+ if (act) {
+ low = lower_32((uint64_t)q->queue_address >> 8);
+ high = upper_32((uint64_t)q->queue_address >> 8);
+
+ if (low == READ_REG(mm->dev, CP_HQD_PQ_BASE) &&
+ high == READ_REG(mm->dev, CP_HQD_PQ_BASE_HI))
+ return true;
+ }
+
+ return false;
+}
+
+static int initialize(struct mqd_manager *mm)
+{
+ BUG_ON(!mm);
+ return 0;
+}
+
+static void uninitialize(struct mqd_manager *mm)
+{
+ BUG_ON(!mm);
+}
+
+/*
+ * HIQ MQD Implementation
+ */
+
+static int init_mqd_hiq(struct mqd_manager *mm, void **mqd, kfd_mem_obj *mqd_mem_obj,
+ uint64_t *gart_addr, struct queue_properties *q)
+{
+ uint64_t addr;
+ struct cik_mqd *m;
+ int retval;
+
+ BUG_ON(!mm || !q || !mqd || !mqd_mem_obj);
+
+ pr_debug("kfd: In func %s\n", __func__);
+
+ retval = radeon_kfd_vidmem_alloc_map(
+ mm->dev,
+ mqd_mem_obj,
+ (void **)&m,
+ &addr,
+ ALIGN(sizeof(struct cik_mqd), PAGE_SIZE));
+
+ if (retval != 0)
+ return -ENOMEM;
+
+ memset(m, 0, sizeof(struct cik_mqd));
+
+ m->header = 0xC0310800;
+ m->pipeline_stat_enable = 1;
+ m->static_thread_mgmt01[0] = 0xFFFFFFFF;
+ m->static_thread_mgmt01[1] = 0xFFFFFFFF;
+ m->static_thread_mgmt23[0] = 0xFFFFFFFF;
+ m->static_thread_mgmt23[1] = 0xFFFFFFFF;
+
+ m->queue_state.cp_hqd_persistent_state = DEFAULT_CP_HQD_PERSISTENT_STATE;
+
+ m->queue_state.cp_mqd_control = MQD_CONTROL_PRIV_STATE_EN;
+ m->queue_state.cp_mqd_base_addr = lower_32(addr);
+ m->queue_state.cp_mqd_base_addr_hi = upper_32(addr);
+
+ m->queue_state.cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE;
+
+ m->queue_state.cp_hqd_quantum = QUANTUM_EN | QUANTUM_SCALE_1MS | QUANTUM_DURATION(10);
+
+ m->queue_state.cp_hqd_pipe_priority = 1;
+ m->queue_state.cp_hqd_queue_priority = 15;
+
+ *mqd = m;
+ if (gart_addr)
+ *gart_addr = addr;
+ retval = mm->update_mqd(mm, m, q);
+
+ return retval;
+}
+
+static int update_mqd_hiq(struct mqd_manager *mm, void *mqd, struct queue_properties *q)
+{
+ struct cik_mqd *m;
+
+ BUG_ON(!mm || !q || !mqd);
+
+ pr_debug("kfd: In func %s\n", __func__);
+
+ m = get_mqd(mqd);
+ m->queue_state.cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE | DEFAULT_MIN_AVAIL_SIZE | PRIV_STATE | KMD_QUEUE;
+ /* calculating queue size which is log base 2 of actual queue size -1 dwords */
+ m->queue_state.cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1;
+ m->queue_state.cp_hqd_pq_base = lower_32((uint64_t)q->queue_address >> 8);
+ m->queue_state.cp_hqd_pq_base_hi = upper_32((uint64_t)q->queue_address >> 8);
+ m->queue_state.cp_hqd_pq_rptr_report_addr = lower_32((uint64_t)q->read_ptr);
+ m->queue_state.cp_hqd_pq_rptr_report_addr_hi = upper_32((uint64_t)q->read_ptr);
+ m->queue_state.cp_hqd_pq_doorbell_control = DOORBELL_EN | DOORBELL_OFFSET(q->doorbell_off);
+
+ m->queue_state.cp_hqd_vmid = q->vmid;
+
+ m->queue_state.cp_hqd_active = 0;
+ q->is_active = false;
+ if (q->queue_size > 0 &&
+ q->queue_address != 0 &&
+ q->queue_percent > 0) {
+ m->queue_state.cp_hqd_active = 1;
+ q->is_active = true;
+ }
+
+ return 0;
+}
+
+struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, struct kfd_dev *dev)
+{
+ struct mqd_manager *mqd;
+
+ BUG_ON(!dev);
+ BUG_ON(type >= KFD_MQD_TYPE_MAX);
+
+ pr_debug("kfd: In func %s\n", __func__);
+
+ mqd = kzalloc(sizeof(struct mqd_manager), GFP_KERNEL);
+ if (!mqd)
+ return NULL;
+
+ mqd->dev = dev;
+
+ switch (type) {
+ case KFD_MQD_TYPE_CIK_CP:
+ case KFD_MQD_TYPE_CIK_COMPUTE:
+ mqd->init_mqd = init_mqd;
+ mqd->uninit_mqd = uninit_mqd;
+ mqd->load_mqd = load_mqd;
+ mqd->update_mqd = update_mqd;
+ mqd->destroy_mqd = destroy_mqd;
+ mqd->acquire_hqd = acquire_hqd;
+ mqd->release_hqd = release_hqd;
+ mqd->is_occupied = is_occupied;
+ mqd->initialize = initialize;
+ mqd->uninitialize = uninitialize;
+ break;
+ case KFD_MQD_TYPE_CIK_HIQ:
+ mqd->init_mqd = init_mqd_hiq;
+ mqd->uninit_mqd = uninit_mqd;
+ mqd->load_mqd = load_mqd;
+ mqd->update_mqd = update_mqd_hiq;
+ mqd->destroy_mqd = destroy_mqd;
+ mqd->acquire_hqd = acquire_hqd;
+ mqd->release_hqd = release_hqd;
+ mqd->is_occupied = is_occupied;
+ mqd->initialize = initialize;
+ mqd->uninitialize = uninitialize;
+ break;
+ default:
+ return NULL;
+ break;
+ }
+
+ if (mqd->initialize(mqd) != 0) {
+ pr_err("kfd: mqd manager initialization failed\n");
+ kfree(mqd);
+ return NULL;
+ }
+ return mqd;
+}
+
+/* SDMA queues should be implemented here when the cp will supports them */
diff --git a/drivers/gpu/hsa/radeon/kfd_mqd_manager.h b/drivers/gpu/hsa/radeon/kfd_mqd_manager.h
new file mode 100644
index 0000000..e7b39ee
--- /dev/null
+++ b/drivers/gpu/hsa/radeon/kfd_mqd_manager.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Ben Goz
+ */
+
+#ifndef MQD_MANAGER_H_
+#define MQD_MANAGER_H_
+
+#include "kfd_priv.h"
+
+struct mqd_manager {
+ int (*init_mqd)(struct mqd_manager *mm, void **mqd, kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *q);
+ int (*load_mqd)(struct mqd_manager *mm, void *mqd);
+ int (*update_mqd)(struct mqd_manager *mm, void *mqd, struct queue_properties *q);
+ int (*destroy_mqd)(struct mqd_manager *mm, void *mqd, enum kfd_preempt_type type, unsigned int timeout);
+ void (*uninit_mqd)(struct mqd_manager *mm, void *mqd, kfd_mem_obj mqd_mem_obj);
+ void (*acquire_hqd)(struct mqd_manager *mm, unsigned int pipe, unsigned int queue, unsigned int vmid);
+ void (*release_hqd)(struct mqd_manager *mm);
+ bool (*is_occupied)(struct mqd_manager *mm, void *mqd, struct queue_properties *q);
+ int (*initialize)(struct mqd_manager *mm);
+ void (*uninitialize)(struct mqd_manager *mm);
+
+ struct mutex mqd_mutex;
+ struct kfd_dev *dev;
+};
+
+
+#endif /* MQD_MANAGER_H_ */
diff --git a/drivers/gpu/hsa/radeon/kfd_priv.h b/drivers/gpu/hsa/radeon/kfd_priv.h
index df17387..cc60b48 100644
--- a/drivers/gpu/hsa/radeon/kfd_priv.h
+++ b/drivers/gpu/hsa/radeon/kfd_priv.h
@@ -141,6 +141,9 @@ int radeon_kfd_vidmem_gpumap(struct kfd_dev *kfd, kfd_mem_obj mem_obj, uint64_t
void radeon_kfd_vidmem_ungpumap(struct kfd_dev *kfd, kfd_mem_obj mem_obj);
int radeon_kfd_vidmem_kmap(struct kfd_dev *kfd, kfd_mem_obj mem_obj, void **ptr);
void radeon_kfd_vidmem_unkmap(struct kfd_dev *kfd, kfd_mem_obj mem_obj);
+int radeon_kfd_vidmem_alloc_map(struct kfd_dev *kfd, kfd_mem_obj *mem_obj, void **ptr,
+ uint64_t *vmid0_address, size_t size);
+void radeon_kfd_vidmem_free_unmap(struct kfd_dev *kfd, kfd_mem_obj mem_obj);
/* Character device interface */
int radeon_kfd_chardev_init(void);
@@ -161,6 +164,17 @@ struct kfd_queue {
struct kfd_scheduler_queue scheduler_queue;
};
+enum kfd_preempt_type_filter {
+ KFD_PREEMPT_TYPE_FILTER_SINGLE_QUEUE,
+ KFD_PRERMPT_TYPE_FILTER_ALL_QUEUES,
+ KFD_PRERMPT_TYPE_FILTER_BY_PASID
+};
+
+enum kfd_preempt_type {
+ KFD_PREEMPT_TYPE_WAVEFRONT,
+ KFD_PREEMPT_TYPE_WAVEFRONT_RESET
+};
+
enum kfd_queue_type {
KFD_QUEUE_TYPE_COMPUTE,
KFD_QUEUE_TYPE_SDMA,
@@ -204,6 +218,14 @@ struct queue {
struct kfd_dev *device;
};
+enum KFD_MQD_TYPE {
+ KFD_MQD_TYPE_CIK_COMPUTE = 0, /* for no cp scheduling */
+ KFD_MQD_TYPE_CIK_HIQ, /* for hiq */
+ KFD_MQD_TYPE_CIK_CP, /* for cp queues and diq */
+ KFD_MQD_TYPE_CIK_SDMA, /* for sdma queues */
+ KFD_MQD_TYPE_MAX
+};
+
/* Data that is per-process-per device. */
struct kfd_process_device {
/* List of all per-device data for a process. Starts from kfd_process.per_device_data. */
@@ -325,10 +347,14 @@ int kgd2kfd_resume(struct kfd_dev *dev);
int kfd_init_apertures(struct kfd_process *process);
/* Queue Context Management */
+inline uint32_t lower_32(uint64_t x);
+inline uint32_t upper_32(uint64_t x);
+inline void busy_wait(unsigned long ms);
int init_queue(struct queue **q, struct queue_properties properties);
void uninit_queue(struct queue *q);
void print_queue_properties(struct queue_properties *q);
void print_queue(struct queue *q);
+struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, struct kfd_dev *dev);
#endif
diff --git a/drivers/gpu/hsa/radeon/kfd_sched_cik_static.c b/drivers/gpu/hsa/radeon/kfd_sched_cik_static.c
index 30561a6..d576d95 100644
--- a/drivers/gpu/hsa/radeon/kfd_sched_cik_static.c
+++ b/drivers/gpu/hsa/radeon/kfd_sched_cik_static.c
@@ -182,16 +182,6 @@ struct cik_static_queue {
uint32_t queue_size_encoded; /* CP_HQD_PQ_CONTROL.QUEUE_SIZE takes the queue size as log2(size) - 3. */
};
-static uint32_t lower_32(uint64_t x)
-{
- return (uint32_t)x;
-}
-
-static uint32_t upper_32(uint64_t x)
-{
- return (uint32_t)(x >> 32);
-}
-
/* SRBM_GFX_CNTL provides the MEC/pipe/queue and vmid for many registers that are
* In particular, CP_HQD_* and CP_MQD_* are instanced for each queue. CP_HPD_* are instanced for each pipe.
* SH_MEM_* are instanced per-VMID.
diff --git a/drivers/gpu/hsa/radeon/kfd_vidmem.c b/drivers/gpu/hsa/radeon/kfd_vidmem.c
index c8d3770..9713373 100644
--- a/drivers/gpu/hsa/radeon/kfd_vidmem.c
+++ b/drivers/gpu/hsa/radeon/kfd_vidmem.c
@@ -59,3 +59,39 @@ void radeon_kfd_vidmem_unkmap(struct kfd_dev *kfd, kfd_mem_obj mem_obj)
{
kfd2kgd->unkmap_mem(kfd->kgd, (struct kgd_mem *)mem_obj);
}
+
+int radeon_kfd_vidmem_alloc_map(struct kfd_dev *kfd, kfd_mem_obj *mem_obj,
+ void **ptr, uint64_t *vmid0_address,
+ size_t size)
+{
+ int retval;
+
+ retval = radeon_kfd_vidmem_alloc(kfd, size, PAGE_SIZE, KFD_MEMPOOL_SYSTEM_WRITECOMBINE,
+ mem_obj);
+ if (retval != 0)
+ goto fail_vidmem_alloc;
+
+ retval = radeon_kfd_vidmem_kmap(kfd, *mem_obj, ptr);
+ if (retval != 0)
+ goto fail_vidmem_kmap;
+
+ retval = radeon_kfd_vidmem_gpumap(kfd, *mem_obj, vmid0_address);
+ if (retval != 0)
+ goto fail_vidmem_gpumap;
+
+ return 0;
+
+fail_vidmem_gpumap:
+ radeon_kfd_vidmem_unkmap(kfd, *mem_obj);
+fail_vidmem_kmap:
+ radeon_kfd_vidmem_free(kfd, *mem_obj);
+fail_vidmem_alloc:
+ return retval;
+}
+
+void radeon_kfd_vidmem_free_unmap(struct kfd_dev *kfd, kfd_mem_obj mem_obj)
+{
+ radeon_kfd_vidmem_ungpumap(kfd, mem_obj);
+ radeon_kfd_vidmem_unkmap(kfd, mem_obj);
+ radeon_kfd_vidmem_free(kfd, mem_obj);
+}
--
1.9.1
From: Ben Goz <[email protected]>
The queue module enables allocating and initializing queues uniformly.
The hw_pointer_store module handles allocation and assignment of read and write
pointers to user HSA queues.
Signed-off-by: Ben Goz <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/hsa/radeon/Makefile | 3 +-
drivers/gpu/hsa/radeon/kfd_hw_pointer_store.c | 150 ++++++++++++++++++++++++++
drivers/gpu/hsa/radeon/kfd_hw_pointer_store.h | 65 +++++++++++
drivers/gpu/hsa/radeon/kfd_priv.h | 55 ++++++++++
drivers/gpu/hsa/radeon/kfd_queue.c | 110 +++++++++++++++++++
5 files changed, 382 insertions(+), 1 deletion(-)
create mode 100644 drivers/gpu/hsa/radeon/kfd_hw_pointer_store.c
create mode 100644 drivers/gpu/hsa/radeon/kfd_hw_pointer_store.h
create mode 100644 drivers/gpu/hsa/radeon/kfd_queue.c
diff --git a/drivers/gpu/hsa/radeon/Makefile b/drivers/gpu/hsa/radeon/Makefile
index 813b31f..18e1639 100644
--- a/drivers/gpu/hsa/radeon/Makefile
+++ b/drivers/gpu/hsa/radeon/Makefile
@@ -5,6 +5,7 @@
radeon_kfd-y := kfd_module.o kfd_device.o kfd_chardev.o \
kfd_pasid.o kfd_topology.o kfd_process.o \
kfd_doorbell.o kfd_sched_cik_static.o kfd_registers.o \
- kfd_vidmem.o kfd_interrupt.o kfd_aperture.o
+ kfd_vidmem.o kfd_interrupt.o kfd_aperture.o \
+ kfd_queue.o kfd_hw_pointer_store.o
obj-$(CONFIG_HSA_RADEON) += radeon_kfd.o
diff --git a/drivers/gpu/hsa/radeon/kfd_hw_pointer_store.c b/drivers/gpu/hsa/radeon/kfd_hw_pointer_store.c
new file mode 100644
index 0000000..1372fb2
--- /dev/null
+++ b/drivers/gpu/hsa/radeon/kfd_hw_pointer_store.c
@@ -0,0 +1,150 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Ben Goz
+ */
+
+#include <linux/types.h>
+#include <linux/version.h>
+#include <linux/kernel.h>
+#include <linux/mutex.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include "kfd_hw_pointer_store.h"
+#include "kfd_priv.h"
+
+/* do the same trick as in map_doorbells() */
+static int hw_pointer_store_map(struct hw_pointer_store_properties *ptr,
+ struct file *devkfd)
+{
+ qptr_t __user *user_address;
+
+ BUG_ON(!ptr || !devkfd);
+
+ if (!ptr->page_mapping) {
+ if (!ptr->page_address)
+ return -EINVAL;
+
+ user_address = (qptr_t __user *)vm_mmap(devkfd, 0, PAGE_SIZE,
+ PROT_WRITE | PROT_READ , MAP_SHARED, ptr->offset);
+
+ if (IS_ERR(user_address))
+ return PTR_ERR(user_address);
+
+ ptr->page_mapping = user_address;
+ }
+
+ return 0;
+}
+
+int hw_pointer_store_init(struct hw_pointer_store_properties *ptr,
+ enum hw_pointer_store_type type)
+{
+ unsigned long *addr;
+
+ BUG_ON(!ptr);
+
+ /* using the offset value as a hint for mmap to distinguish between page types */
+ if (type == KFD_HW_POINTER_STORE_TYPE_RPTR)
+ ptr->offset = KFD_MMAP_RPTR_START << PAGE_SHIFT;
+ else if (type == KFD_HW_POINTER_STORE_TYPE_WPTR)
+ ptr->offset = KFD_MMAP_WPTR_START << PAGE_SHIFT;
+ else
+ return -EINVAL;
+
+ addr = (unsigned long *)get_zeroed_page(GFP_KERNEL);
+ if (!addr) {
+ pr_debug("Error allocating page\n");
+ return -ENOMEM;
+ }
+
+ ptr->page_address = addr;
+ ptr->page_mapping = NULL;
+
+ return 0;
+}
+
+void hw_pointer_store_destroy(struct hw_pointer_store_properties *ptr)
+{
+ BUG_ON(!ptr);
+ pr_debug("kfd in func: %s\n", __func__);
+ if (ptr->page_address)
+ free_page((unsigned long)ptr->page_address);
+ if (ptr->page_mapping)
+ vm_munmap((uintptr_t)ptr->page_mapping, PAGE_SIZE);
+ ptr->page_address = NULL;
+ ptr->page_mapping = NULL;
+}
+
+qptr_t __user *
+hw_pointer_store_create_queue(struct hw_pointer_store_properties *ptr,
+ unsigned int queue_id, struct file *devkfd)
+{
+ BUG_ON(!ptr || queue_id >= MAX_PROCESS_QUEUES);
+
+ /* mapping value to user space*/
+ hw_pointer_store_map(ptr, devkfd);
+
+ /* User process address */
+ if (!ptr->page_mapping) {
+ pr_debug(KERN_ERR "kfd: hw pointer store doesn't mapped to user space\n");
+ return NULL;
+ }
+
+ ptr->page_mapping[queue_id] = 0;
+
+ return ptr->page_mapping + queue_id;
+}
+
+unsigned long *hw_pointer_store_get_address
+ (struct hw_pointer_store_properties *ptr, unsigned int queue_id)
+{
+ return ptr->page_address + queue_id;
+}
+
+int radeon_kfd_hw_pointer_store_mmap(struct hw_pointer_store_properties *ptr,
+ struct vm_area_struct *vma)
+{
+ BUG_ON(!ptr || !vma);
+
+ if (vma->vm_end - vma->vm_start != PAGE_SIZE) {
+ pr_debug("start address(0x%lx) - end address(0x%lx) != len(0x%lx)\n",
+ vma->vm_end, vma->vm_start, PAGE_SIZE);
+ return -EINVAL;
+ }
+
+ vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE
+ | VM_DONTDUMP | VM_PFNMAP;
+
+ pr_debug("kfd: mapping hw pointer page in radeon_kfd_hw_pointer_store_mmap\n"
+ " target user address == 0x%016llX\n"
+ " physical address == 0x%016lX\n"
+ " vm_flags == 0x%08lX\n"
+ " size == 0x%08lX\n",
+ (long long unsigned int) vma->vm_start,
+ __pa(ptr->page_address), vma->vm_flags, PAGE_SIZE);
+
+ /* mapping the page to user process */
+ return remap_pfn_range(vma, vma->vm_start, __pa(ptr->page_address) >> PAGE_SHIFT, PAGE_SIZE, vma->vm_page_prot);
+}
+
diff --git a/drivers/gpu/hsa/radeon/kfd_hw_pointer_store.h b/drivers/gpu/hsa/radeon/kfd_hw_pointer_store.h
new file mode 100644
index 0000000..be1d6cb
--- /dev/null
+++ b/drivers/gpu/hsa/radeon/kfd_hw_pointer_store.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Ben Goz
+ */
+
+#ifndef HW_POINTER_STORE_H_
+#define HW_POINTER_STORE_H_
+
+#include <linux/mutex.h>
+
+/* Type that represents a HW doorbell slot. and read/write HW pointers */
+typedef u32 qptr_t;
+
+/* Hw Pointer Store */
+enum hw_pointer_store_type {
+ KFD_HW_POINTER_STORE_TYPE_RPTR = 0,
+ KFD_HW_POINTER_STORE_TYPE_WPTR
+};
+
+struct hw_pointer_store_properties {
+ qptr_t __user *page_mapping;
+ unsigned long *page_address;
+ unsigned long offset;
+};
+
+int
+hw_pointer_store_init(struct hw_pointer_store_properties *ptr,
+ enum hw_pointer_store_type type);
+
+void
+hw_pointer_store_destroy(struct hw_pointer_store_properties *ptr);
+
+qptr_t __user *
+hw_pointer_store_create_queue(struct hw_pointer_store_properties *ptr,
+ unsigned int queue_id, struct file *devkfd);
+
+unsigned long *
+hw_pointer_store_get_address(struct hw_pointer_store_properties *ptr,
+ unsigned int queue_id);
+
+int
+radeon_kfd_hw_pointer_store_mmap(struct hw_pointer_store_properties *ptr,
+ struct vm_area_struct *vma);
+
+
+#endif /* HW_POINTER_STORE_H_ */
diff --git a/drivers/gpu/hsa/radeon/kfd_priv.h b/drivers/gpu/hsa/radeon/kfd_priv.h
index 28155bc..14a3f9b 100644
--- a/drivers/gpu/hsa/radeon/kfd_priv.h
+++ b/drivers/gpu/hsa/radeon/kfd_priv.h
@@ -31,6 +31,7 @@
#include <linux/atomic.h>
#include <linux/workqueue.h>
#include <linux/spinlock.h>
+#include "kfd_hw_pointer_store.h"
struct kfd_scheduler_class;
@@ -49,6 +50,10 @@ struct kfd_scheduler_class;
** We figure out what type of memory the caller wanted by comparing the mmap page offset to known ranges. */
#define KFD_MMAP_DOORBELL_START (((1ULL << 32)*1) >> PAGE_SHIFT)
#define KFD_MMAP_DOORBELL_END (((1ULL << 32)*2) >> PAGE_SHIFT)
+#define KFD_MMAP_RPTR_START KFD_MMAP_DOORBELL_END
+#define KFD_MMAP_RPTR_END (((1ULL << 32)*3) >> PAGE_SHIFT)
+#define KFD_MMAP_WPTR_START KFD_MMAP_RPTR_END
+#define KFD_MMAP_WPTR_END (((1ULL << 32)*4) >> PAGE_SHIFT)
/* GPU ID hash width in bits */
#define KFD_GPU_ID_HASH_WIDTH 16
@@ -155,6 +160,49 @@ struct kfd_queue {
struct kfd_scheduler_queue scheduler_queue;
};
+enum kfd_queue_type {
+ KFD_QUEUE_TYPE_COMPUTE,
+ KFD_QUEUE_TYPE_SDMA,
+ KFD_QUEUE_TYPE_HIQ,
+ KFD_QUEUE_TYPE_DIQ
+};
+
+struct queue_properties {
+ enum kfd_queue_type type;
+ unsigned int queue_id;
+ uint64_t queue_address;
+ uint64_t queue_size;
+ uint32_t priority;
+ uint32_t queue_percent;
+ qptr_t *read_ptr;
+ qptr_t *write_ptr;
+ qptr_t *doorbell_ptr;
+ qptr_t doorbell_off;
+ bool is_interop;
+ bool is_active;
+ /* Not relevant for user mode queues in cp scheduling */
+ unsigned int vmid;
+};
+
+struct queue {
+ struct list_head list;
+ void *mqd;
+ /* kfd_mem_obj contains the mqd */
+ kfd_mem_obj mqd_mem_obj;
+ uint64_t gart_mqd_addr; /* needed for cp scheduling */
+ struct queue_properties properties;
+
+ /* Used by the queue device manager to track the hqd slot per queue
+ * when using no cp scheduling
+ */
+ uint32_t mec;
+ uint32_t pipe;
+ uint32_t queue;
+
+ struct kfd_process *process;
+ struct kfd_dev *device;
+};
+
/* Data that is per-process-per device. */
struct kfd_process_device {
/* List of all per-device data for a process. Starts from kfd_process.per_device_data. */
@@ -271,4 +319,11 @@ int kgd2kfd_resume(struct kfd_dev *dev);
/*HSA apertures*/
int kfd_init_apertures(struct kfd_process *process);
+/* Queue Context Management */
+
+int init_queue(struct queue **q, struct queue_properties properties);
+void uninit_queue(struct queue *q);
+void print_queue_properties(struct queue_properties *q);
+void print_queue(struct queue *q);
+
#endif
diff --git a/drivers/gpu/hsa/radeon/kfd_queue.c b/drivers/gpu/hsa/radeon/kfd_queue.c
new file mode 100644
index 0000000..78fe180
--- /dev/null
+++ b/drivers/gpu/hsa/radeon/kfd_queue.c
@@ -0,0 +1,110 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Ben Goz
+ */
+
+#include <linux/slab.h>
+#include "kfd_priv.h"
+
+void print_queue_properties(struct queue_properties *q)
+{
+ if (!q)
+ return;
+
+ pr_debug("Printing queue properties\n"
+ "Queue Type: %u\n"
+ "Queue Size: %llu\n"
+ "Queue percent: %u\n"
+ "Queue Address: 0x%llX\n"
+ "Queue Id: %u\n"
+ "Queue Process Vmid: %u\n"
+ "Queue Read Pointer: 0x%p\n"
+ "Queue Write Pointer: 0x%p\n"
+ "Queue Doorbell Pointer: 0x%p\n"
+ "Queue Doorbell Offset: %u\n", q->type,
+ q->queue_size,
+ q->queue_percent,
+ q->queue_address,
+ q->queue_id,
+ q->vmid,
+ q->read_ptr,
+ q->write_ptr,
+ q->doorbell_ptr,
+ q->doorbell_off);
+}
+
+void print_queue(struct queue *q)
+{
+ if (!q)
+ return;
+ pr_debug("Printing queue\n"
+ "Queue Type: %u\n"
+ "Queue Size: %llu\n"
+ "Queue percent: %u\n"
+ "Queue Address: 0x%llX\n"
+ "Queue Id: %u\n"
+ "Queue Process Vmid: %u\n"
+ "Queue Read Pointer: 0x%p\n"
+ "Queue Write Pointer: 0x%p\n"
+ "Queue Doorbell Pointer: 0x%p\n"
+ "Queue Doorbell Offset: %u\n"
+ "Queue MQD Address: 0x%p\n"
+ "Queue MQD Gart: 0x%p\n"
+ "Queue Process Address: 0x%p\n"
+ "Queue Device Address: 0x%p\n",
+ q->properties.type,
+ q->properties.queue_size,
+ q->properties.queue_percent,
+ q->properties.queue_address,
+ q->properties.queue_id,
+ q->properties.vmid,
+ q->properties.read_ptr,
+ q->properties.write_ptr,
+ q->properties.doorbell_ptr,
+ q->properties.doorbell_off,
+ q->mqd,
+ q->gart_mqd_addr,
+ q->process,
+ q->device);
+}
+
+int init_queue(struct queue **q, struct queue_properties properties)
+{
+ struct queue *tmp;
+
+ BUG_ON(!q);
+
+ tmp = kzalloc(sizeof(struct queue), GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+
+ memset(&tmp->properties, 0, sizeof(struct queue_properties));
+ memcpy(&tmp->properties, &properties, sizeof(struct queue_properties));
+
+ *q = tmp;
+ return 0;
+}
+
+void uninit_queue(struct queue *q)
+{
+ kfree(q);
+}
--
1.9.1
From: Alexey Skidanov <[email protected]>
Added apertures initialization and appropriate ioctl
Signed-off-by: Alexey Skidanov <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/hsa/radeon/Makefile | 2 +-
drivers/gpu/hsa/radeon/kfd_aperture.c | 124 ++++++++++++++++++++++++++
drivers/gpu/hsa/radeon/kfd_chardev.c | 58 +++++++++++-
drivers/gpu/hsa/radeon/kfd_priv.h | 18 ++++
drivers/gpu/hsa/radeon/kfd_process.c | 17 ++++
drivers/gpu/hsa/radeon/kfd_sched_cik_static.c | 3 +-
drivers/gpu/hsa/radeon/kfd_topology.c | 27 ++++++
include/uapi/linux/kfd_ioctl.h | 18 ++++
8 files changed, 264 insertions(+), 3 deletions(-)
create mode 100644 drivers/gpu/hsa/radeon/kfd_aperture.c
diff --git a/drivers/gpu/hsa/radeon/Makefile b/drivers/gpu/hsa/radeon/Makefile
index 5422e6a..813b31f 100644
--- a/drivers/gpu/hsa/radeon/Makefile
+++ b/drivers/gpu/hsa/radeon/Makefile
@@ -5,6 +5,6 @@
radeon_kfd-y := kfd_module.o kfd_device.o kfd_chardev.o \
kfd_pasid.o kfd_topology.o kfd_process.o \
kfd_doorbell.o kfd_sched_cik_static.o kfd_registers.o \
- kfd_vidmem.o kfd_interrupt.o
+ kfd_vidmem.o kfd_interrupt.o kfd_aperture.o
obj-$(CONFIG_HSA_RADEON) += radeon_kfd.o
diff --git a/drivers/gpu/hsa/radeon/kfd_aperture.c b/drivers/gpu/hsa/radeon/kfd_aperture.c
new file mode 100644
index 0000000..9e2d6da
--- /dev/null
+++ b/drivers/gpu/hsa/radeon/kfd_aperture.c
@@ -0,0 +1,124 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/device.h>
+#include <linux/export.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/compat.h>
+#include <uapi/linux/kfd_ioctl.h>
+#include <linux/time.h>
+#include "kfd_priv.h"
+#include "kfd_scheduler.h"
+#include <linux/mm.h>
+#include <uapi/asm-generic/mman-common.h>
+#include <asm/processor.h>
+
+
+#define MAKE_GPUVM_APP_BASE(gpu_num) (((uint64_t)(gpu_num) << 61) + 0x1000000000000)
+#define MAKE_GPUVM_APP_LIMIT(base) (((uint64_t)(base) & 0xFFFFFF0000000000) | 0xFFFFFFFFFF)
+#define MAKE_SCRATCH_APP_BASE(gpu_num) (((uint64_t)(gpu_num) << 61) + 0x100000000)
+#define MAKE_SCRATCH_APP_LIMIT(base) (((uint64_t)base & 0xFFFFFFFF00000000) | 0xFFFFFFFF)
+#define MAKE_LDS_APP_BASE(gpu_num) (((uint64_t)(gpu_num) << 61) + 0x0)
+#define MAKE_LDS_APP_LIMIT(base) (((uint64_t)(base) & 0xFFFFFFFF00000000) | 0xFFFFFFFF)
+
+#define HSA_32BIT_LDS_APP_SIZE 0x10000
+#define HSA_32BIT_LDS_APP_ALIGNMENT 0x10000
+
+static unsigned long kfd_reserve_aperture(struct kfd_process *process, unsigned long len, unsigned long alignment)
+{
+
+ unsigned long addr = 0;
+ unsigned long start_address;
+
+ /*
+ * Go bottom up and find the first available aligned address.
+ * We may narrow space to scan by getting mmap range limits.
+ */
+ for (start_address = alignment; start_address < (TASK_SIZE - alignment); start_address += alignment) {
+ addr = vm_mmap(NULL, start_address, len, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, 0);
+ if (!IS_ERR_VALUE(addr)) {
+ if (addr == start_address)
+ return addr;
+ vm_munmap(addr, len);
+ }
+ }
+ return 0;
+
+}
+
+int kfd_init_apertures(struct kfd_process *process)
+{
+ uint8_t id = 0;
+ struct kfd_dev *dev;
+ struct kfd_process_device *pdd;
+
+ mutex_lock(&process->mutex);
+
+ /*Iterating over all devices*/
+ while ((dev = kfd_topology_enum_kfd_devices(id)) != NULL && id < NUM_OF_SUPPORTED_GPUS) {
+
+ pdd = radeon_kfd_get_process_device_data(dev, process);
+
+ /*for 64 bit process aperture will be statically reserved in the non canonical process address space
+ *for 32 bit process the aperture will be reserved in the process address space
+ */
+ if (process->is_32bit_user_mode) {
+ /*try to reserve aperture. continue on failure, just put the aperture size to be 0*/
+ pdd->lds_base = kfd_reserve_aperture(
+ process,
+ HSA_32BIT_LDS_APP_SIZE,
+ HSA_32BIT_LDS_APP_ALIGNMENT);
+
+ if (pdd->lds_base)
+ pdd->lds_limit = pdd->lds_base + HSA_32BIT_LDS_APP_SIZE - 1;
+ else
+ pdd->lds_limit = 0;
+
+ /*GPUVM and Scratch apertures are not supported*/
+ pdd->gpuvm_base = pdd->gpuvm_limit = pdd->scratch_base = pdd->scratch_limit = 0;
+ } else {
+ /*node id couldn't be 0 - the three MSB bits of aperture shoudn't be 0*/
+ pdd->lds_base = MAKE_LDS_APP_BASE(id + 1);
+ pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
+ pdd->gpuvm_base = MAKE_GPUVM_APP_BASE(id + 1);
+ pdd->gpuvm_limit = MAKE_GPUVM_APP_LIMIT(pdd->gpuvm_base);
+ pdd->scratch_base = MAKE_SCRATCH_APP_BASE(id + 1);
+ pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
+ }
+
+ dev_dbg(kfd_device, "node id %u, gpu id %u, lds_base %llX lds_limit %llX gpuvm_base %llX gpuvm_limit %llX scratch_base %llX scratch_limit %llX",
+ id, pdd->dev->id, pdd->lds_base, pdd->lds_limit, pdd->gpuvm_base, pdd->gpuvm_limit, pdd->scratch_base, pdd->scratch_limit);
+
+ id++;
+ }
+
+ mutex_unlock(&process->mutex);
+
+ return 0;
+}
+
+
diff --git a/drivers/gpu/hsa/radeon/kfd_chardev.c b/drivers/gpu/hsa/radeon/kfd_chardev.c
index e95d597..07cac88 100644
--- a/drivers/gpu/hsa/radeon/kfd_chardev.c
+++ b/drivers/gpu/hsa/radeon/kfd_chardev.c
@@ -32,6 +32,9 @@
#include <linux/time.h>
#include "kfd_priv.h"
#include "kfd_scheduler.h"
+#include <linux/mm.h>
+#include <uapi/asm-generic/mman-common.h>
+#include <asm/processor.h>
static long kfd_ioctl(struct file *, unsigned int, unsigned long);
static int kfd_open(struct inode *, struct file *);
@@ -107,9 +110,13 @@ kfd_open(struct inode *inode, struct file *filep)
process = radeon_kfd_create_process(current);
if (IS_ERR(process))
return PTR_ERR(process);
+
process->is_32bit_user_mode = is_compat_task();
+
dev_info(kfd_device, "process %d opened, compat mode (32 bit) - %d\n",
- process->pasid, process->is_32bit_user_mode);
+ process->pasid, process->is_32bit_user_mode);
+
+ kfd_init_apertures(process);
return 0;
}
@@ -321,6 +328,51 @@ kfd_ioctl_get_clock_counters(struct file *filep, struct kfd_process *p, void __u
return 0;
}
+
+static int kfd_ioctl_get_process_apertures(struct file *filp, struct kfd_process *p, void __user *arg)
+{
+ struct kfd_ioctl_get_process_apertures_args args;
+ struct kfd_process_device *pdd;
+
+ dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid);
+
+ if (copy_from_user(&args, arg, sizeof(args)))
+ return -EFAULT;
+
+ args.num_of_nodes = 0;
+
+ mutex_lock(&p->mutex);
+
+ /*if the process-device list isn't empty*/
+ if (kfd_has_process_device_data(p)) {
+ /* Run over all pdd of the process */
+ pdd = kfd_get_first_process_device_data(p);
+ do {
+
+ args.process_apertures[args.num_of_nodes].gpu_id = pdd->dev->id;
+ args.process_apertures[args.num_of_nodes].lds_base = pdd->lds_base;
+ args.process_apertures[args.num_of_nodes].lds_limit = pdd->lds_limit;
+ args.process_apertures[args.num_of_nodes].gpuvm_base = pdd->gpuvm_base;
+ args.process_apertures[args.num_of_nodes].gpuvm_limit = pdd->gpuvm_limit;
+ args.process_apertures[args.num_of_nodes].scratch_base = pdd->scratch_base;
+ args.process_apertures[args.num_of_nodes].scratch_limit = pdd->scratch_limit;
+
+ dev_dbg(kfd_device, "node id %u, gpu id %u, lds_base %llX lds_limit %llX gpuvm_base %llX gpuvm_limit %llX scratch_base %llX scratch_limit %llX",
+ args.num_of_nodes, pdd->dev->id, pdd->lds_base, pdd->lds_limit, pdd->gpuvm_base, pdd->gpuvm_limit, pdd->scratch_base, pdd->scratch_limit);
+ args.num_of_nodes++;
+ } while ((pdd = kfd_get_next_process_device_data(p, pdd)) != NULL &&
+ (args.num_of_nodes < NUM_OF_SUPPORTED_GPUS));
+ }
+
+ mutex_unlock(&p->mutex);
+
+ if (copy_to_user(arg, &args, sizeof(args)))
+ return -EFAULT;
+
+ return 0;
+}
+
+
static long
kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
{
@@ -352,6 +404,10 @@ kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
err = kfd_ioctl_get_clock_counters(filep, process, (void __user *)arg);
break;
+ case KFD_IOC_GET_PROCESS_APERTURES:
+ err = kfd_ioctl_get_process_apertures(filep, process, (void __user *)arg);
+ break;
+
default:
dev_err(kfd_device,
"unknown ioctl cmd 0x%x, arg 0x%lx)\n",
diff --git a/drivers/gpu/hsa/radeon/kfd_priv.h b/drivers/gpu/hsa/radeon/kfd_priv.h
index 9d3b1fc..28155bc 100644
--- a/drivers/gpu/hsa/radeon/kfd_priv.h
+++ b/drivers/gpu/hsa/radeon/kfd_priv.h
@@ -171,6 +171,16 @@ struct kfd_process_device {
/* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */
bool bound;
+
+ /*Apertures*/
+ uint64_t lds_base;
+ uint64_t lds_limit;
+ uint64_t gpuvm_base;
+ uint64_t gpuvm_limit;
+ uint64_t scratch_base;
+ uint64_t scratch_limit;
+
+
};
/* Process data */
@@ -212,6 +222,10 @@ void radeon_kfd_install_queue(struct kfd_process *p, unsigned int queue_id, stru
void radeon_kfd_remove_queue(struct kfd_process *p, unsigned int queue_id);
struct kfd_queue *radeon_kfd_get_queue(struct kfd_process *p, unsigned int queue_id);
+/* Process device data iterator */
+struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p);
+struct kfd_process_device *kfd_get_next_process_device_data(struct kfd_process *p, struct kfd_process_device *pdd);
+bool kfd_has_process_device_data(struct kfd_process *p);
/* PASIDs */
int radeon_kfd_pasid_init(void);
@@ -237,6 +251,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu);
int kfd_topology_remove_device(struct kfd_dev *gpu);
struct kfd_dev *radeon_kfd_device_by_id(uint32_t gpu_id);
struct kfd_dev *radeon_kfd_device_by_pci_dev(const struct pci_dev *pdev);
+struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx);
/* MMIO registers */
#define WRITE_REG(dev, reg, value) radeon_kfd_write_reg((dev), (reg), (value))
@@ -253,4 +268,7 @@ void kgd2kfd_interrupt(struct kfd_dev *dev, const void *ih_ring_entry);
void kgd2kfd_suspend(struct kfd_dev *dev);
int kgd2kfd_resume(struct kfd_dev *dev);
+/*HSA apertures*/
+int kfd_init_apertures(struct kfd_process *process);
+
#endif
diff --git a/drivers/gpu/hsa/radeon/kfd_process.c b/drivers/gpu/hsa/radeon/kfd_process.c
index f89f855..80136e6 100644
--- a/drivers/gpu/hsa/radeon/kfd_process.c
+++ b/drivers/gpu/hsa/radeon/kfd_process.c
@@ -397,3 +397,20 @@ struct kfd_queue *radeon_kfd_get_queue(struct kfd_process *p, unsigned int queue
test_bit(queue_id, p->allocated_queue_bitmap)) ?
p->queues[queue_id] : NULL;
}
+
+struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p)
+{
+ return list_first_entry(&p->per_device_data, struct kfd_process_device, per_device_list);
+}
+
+struct kfd_process_device *kfd_get_next_process_device_data(struct kfd_process *p, struct kfd_process_device *pdd)
+{
+ if (list_is_last(&pdd->per_device_list, &p->per_device_data))
+ return NULL;
+ return list_next_entry(pdd, per_device_list);
+}
+
+bool kfd_has_process_device_data(struct kfd_process *p)
+{
+ return !(list_empty(&p->per_device_data));
+}
diff --git a/drivers/gpu/hsa/radeon/kfd_sched_cik_static.c b/drivers/gpu/hsa/radeon/kfd_sched_cik_static.c
index 7ee8125..30561a6 100644
--- a/drivers/gpu/hsa/radeon/kfd_sched_cik_static.c
+++ b/drivers/gpu/hsa/radeon/kfd_sched_cik_static.c
@@ -627,7 +627,8 @@ static void cik_static_deregister_process(struct kfd_scheduler *scheduler,
struct cik_static_private *priv = kfd_scheduler_to_private(scheduler);
struct cik_static_process *pp = kfd_process_to_private(scheduler_process);
- if (priv && pp) {
+
+ if (priv && pp) {
release_vmid(priv, pp->vmid);
kfree(pp);
}
diff --git a/drivers/gpu/hsa/radeon/kfd_topology.c b/drivers/gpu/hsa/radeon/kfd_topology.c
index 21bb66e..213ae7b 100644
--- a/drivers/gpu/hsa/radeon/kfd_topology.c
+++ b/drivers/gpu/hsa/radeon/kfd_topology.c
@@ -1201,3 +1201,30 @@ int kfd_topology_remove_device(struct kfd_dev *gpu)
return res;
}
+
+/*
+ * When idx is out of bounds, the function will return NULL
+ */
+struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx)
+{
+
+ struct kfd_topology_device *top_dev;
+ struct kfd_dev *device = NULL;
+ uint8_t device_idx = 0;
+
+ down_read(&topology_lock);
+
+ list_for_each_entry(top_dev, &topology_device_list, list) {
+ if (device_idx == idx) {
+ device = top_dev->gpu;
+ break;
+ }
+
+ device_idx++;
+ }
+
+ up_read(&topology_lock);
+
+ return device;
+
+}
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index a7c3abd..e5fcb8b 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -78,6 +78,23 @@ struct kfd_ioctl_get_clock_counters_args {
uint64_t system_clock_freq; /* from KFD */
};
+#define NUM_OF_SUPPORTED_GPUS 7
+
+struct kfd_process_device_apertures {
+ uint64_t lds_base;/* from KFD */
+ uint64_t lds_limit;/* from KFD */
+ uint64_t scratch_base;/* from KFD */
+ uint64_t scratch_limit;/* from KFD */
+ uint64_t gpuvm_base;/* from KFD */
+ uint64_t gpuvm_limit;/* from KFD */
+ uint32_t gpu_id;/* from KFD */
+};
+
+struct kfd_ioctl_get_process_apertures_args {
+ struct kfd_process_device_apertures process_apertures[NUM_OF_SUPPORTED_GPUS];/* from KFD */
+ uint8_t num_of_nodes; /* from KFD, should be in the range [1 - NUM_OF_SUPPORTED_GPUS]*/
+};
+
#define KFD_IOC_MAGIC 'K'
#define KFD_IOC_GET_VERSION _IOR(KFD_IOC_MAGIC, 1, struct kfd_ioctl_get_version_args)
@@ -85,6 +102,7 @@ struct kfd_ioctl_get_clock_counters_args {
#define KFD_IOC_DESTROY_QUEUE _IOWR(KFD_IOC_MAGIC, 3, struct kfd_ioctl_destroy_queue_args)
#define KFD_IOC_SET_MEMORY_POLICY _IOW(KFD_IOC_MAGIC, 4, struct kfd_ioctl_set_memory_policy_args)
#define KFD_IOC_GET_CLOCK_COUNTERS _IOWR(KFD_IOC_MAGIC, 5, struct kfd_ioctl_get_clock_counters_args)
+#define KFD_IOC_GET_PROCESS_APERTURES _IOR(KFD_IOC_MAGIC, 6, struct kfd_ioctl_get_process_apertures_args)
#pragma pack(pop)
--
1.9.1
From: Alexey Skidanov <[email protected]>
Initializing compat_ioctl properly. All ioctls args are packed.
Signed-off-by: Alexey Skidanov <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/hsa/radeon/kfd_chardev.c | 7 +++++--
drivers/gpu/hsa/radeon/kfd_priv.h | 4 ++++
include/uapi/linux/kfd_ioctl.h | 2 +-
3 files changed, 10 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/hsa/radeon/kfd_chardev.c b/drivers/gpu/hsa/radeon/kfd_chardev.c
index 75fe11f..e95d597 100644
--- a/drivers/gpu/hsa/radeon/kfd_chardev.c
+++ b/drivers/gpu/hsa/radeon/kfd_chardev.c
@@ -27,6 +27,7 @@
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
+#include <linux/compat.h>
#include <uapi/linux/kfd_ioctl.h>
#include <linux/time.h>
#include "kfd_priv.h"
@@ -41,6 +42,7 @@ static const char kfd_dev_name[] = "kfd";
static const struct file_operations kfd_fops = {
.owner = THIS_MODULE,
.unlocked_ioctl = kfd_ioctl,
+ .compat_ioctl = kfd_ioctl,
.open = kfd_open,
.mmap = kfd_mmap,
};
@@ -105,8 +107,9 @@ kfd_open(struct inode *inode, struct file *filep)
process = radeon_kfd_create_process(current);
if (IS_ERR(process))
return PTR_ERR(process);
-
- pr_debug("\nkfd: process %d opened dev/kfd", process->pasid);
+ process->is_32bit_user_mode = is_compat_task();
+ dev_info(kfd_device, "process %d opened, compat mode (32 bit) - %d\n",
+ process->pasid, process->is_32bit_user_mode);
return 0;
}
diff --git a/drivers/gpu/hsa/radeon/kfd_priv.h b/drivers/gpu/hsa/radeon/kfd_priv.h
index 8b877ca..9d3b1fc 100644
--- a/drivers/gpu/hsa/radeon/kfd_priv.h
+++ b/drivers/gpu/hsa/radeon/kfd_priv.h
@@ -194,6 +194,10 @@ struct kfd_process {
size_t queue_array_size;
struct kfd_queue **queues; /* Size is queue_array_size, up to MAX_PROCESS_QUEUES. */
unsigned long allocated_queue_bitmap[DIV_ROUND_UP(MAX_PROCESS_QUEUES, BITS_PER_LONG)];
+
+ /*Is the user space process 32 bit?*/
+ bool is_32bit_user_mode;
+
};
struct kfd_process *radeon_kfd_create_process(const struct task_struct *);
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 5b9517e..a7c3abd 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -29,7 +29,7 @@
#define KFD_IOCTL_CURRENT_VERSION 1
/* The 64-bit ABI is the authoritative version. */
-#pragma pack(push, 8)
+#pragma pack(push, 1)
struct kfd_ioctl_get_version_args {
uint32_t min_supported_version; /* from KFD */
--
1.9.1
From: Evgeny Pinchuk <[email protected]>
Changing the source of the max engine clock value.
Signed-off-by: Evgeny Pinchuk <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/drm/radeon/radeon_kfd.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c
index 8b6d497..a28cf6b 100644
--- a/drivers/gpu/drm/radeon/radeon_kfd.c
+++ b/drivers/gpu/drm/radeon/radeon_kfd.c
@@ -316,5 +316,5 @@ static uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
struct radeon_device *rdev = (struct radeon_device *)kgd;
/* The sclk is in quantas of 10kHz */
- return rdev->pm.power_state->clock_info->sclk / 100;
+ return rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100;
}
--
1.9.1
From: Evgeny Pinchuk <[email protected]>
Adding support for CPU and GPU max clock speeds in node properties.
Signed-off-by: Evgeny Pinchuk <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/hsa/radeon/kfd_topology.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/hsa/radeon/kfd_topology.c b/drivers/gpu/hsa/radeon/kfd_topology.c
index 2ee5444..21bb66e 100644
--- a/drivers/gpu/hsa/radeon/kfd_topology.c
+++ b/drivers/gpu/hsa/radeon/kfd_topology.c
@@ -26,6 +26,7 @@
#include <linux/errno.h>
#include <linux/acpi.h>
#include <linux/hash.h>
+#include <linux/cpufreq.h>
#include "kfd_priv.h"
#include "kfd_crat.h"
@@ -712,9 +713,10 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
sysfs_show_32bit_prop(buffer, "location_id",
dev->node_props.location_id);
sysfs_show_32bit_prop(buffer, "max_engine_clk_fcompute",
- dev->node_props.max_engine_clk_fcompute);
+ kfd2kgd->get_max_engine_clock_in_mhz(
+ dev->gpu->kgd));
ret = sysfs_show_32bit_prop(buffer, "max_engine_clk_ccompute",
- dev->node_props.max_engine_clk_ccompute);
+ cpufreq_quick_get_max(0)/1000);
}
return ret;
--
1.9.1
This patch creates a workaround for a bug in amd_iommu driver, where the driver
doesn't save all necessary information when going to suspend.
The workaround removes a device from the IOMMU device list on suspend and register a resumed device in the IOMMU device list.
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/hsa/radeon/Makefile | 2 +-
drivers/gpu/hsa/radeon/kfd_device.c | 30 ++++++++++++++++++++++++++
drivers/gpu/hsa/radeon/kfd_pasid.c | 5 +++++
drivers/gpu/hsa/radeon/kfd_pm.c | 43 -------------------------------------
drivers/gpu/hsa/radeon/kfd_priv.h | 1 +
5 files changed, 37 insertions(+), 44 deletions(-)
delete mode 100644 drivers/gpu/hsa/radeon/kfd_pm.c
diff --git a/drivers/gpu/hsa/radeon/Makefile b/drivers/gpu/hsa/radeon/Makefile
index 935f9b7..5422e6a 100644
--- a/drivers/gpu/hsa/radeon/Makefile
+++ b/drivers/gpu/hsa/radeon/Makefile
@@ -5,6 +5,6 @@
radeon_kfd-y := kfd_module.o kfd_device.o kfd_chardev.o \
kfd_pasid.o kfd_topology.o kfd_process.o \
kfd_doorbell.o kfd_sched_cik_static.o kfd_registers.o \
- kfd_vidmem.o kfd_interrupt.o kfd_pm.o
+ kfd_vidmem.o kfd_interrupt.o
obj-$(CONFIG_HSA_RADEON) += radeon_kfd.o
diff --git a/drivers/gpu/hsa/radeon/kfd_device.c b/drivers/gpu/hsa/radeon/kfd_device.c
index a21c095..2e7d50d 100644
--- a/drivers/gpu/hsa/radeon/kfd_device.c
+++ b/drivers/gpu/hsa/radeon/kfd_device.c
@@ -188,3 +188,33 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
kfree(kfd);
}
+
+void kgd2kfd_suspend(struct kfd_dev *kfd)
+{
+ BUG_ON(kfd == NULL);
+
+ if (kfd->init_complete) {
+ kfd->device_info->scheduler_class->stop(kfd->scheduler);
+ amd_iommu_free_device(kfd->pdev);
+ }
+}
+
+int kgd2kfd_resume(struct kfd_dev *kfd)
+{
+ pasid_t pasid_limit;
+ int err;
+
+ BUG_ON(kfd == NULL);
+
+ pasid_limit = radeon_kfd_get_pasid_limit();
+
+ if (kfd->init_complete) {
+ err = amd_iommu_init_device(kfd->pdev, pasid_limit);
+ if (err < 0)
+ return -ENXIO;
+ amd_iommu_set_invalidate_ctx_cb(kfd->pdev, iommu_pasid_shutdown_callback);
+ kfd->device_info->scheduler_class->start(kfd->scheduler);
+ }
+
+ return 0;
+}
diff --git a/drivers/gpu/hsa/radeon/kfd_pasid.c b/drivers/gpu/hsa/radeon/kfd_pasid.c
index d78bd00..8bd1562 100644
--- a/drivers/gpu/hsa/radeon/kfd_pasid.c
+++ b/drivers/gpu/hsa/radeon/kfd_pasid.c
@@ -68,6 +68,11 @@ bool radeon_kfd_set_pasid_limit(pasid_t new_limit)
return true;
}
+inline pasid_t radeon_kfd_get_pasid_limit(void)
+{
+ return pasid_limit;
+}
+
pasid_t radeon_kfd_pasid_alloc(void)
{
pasid_t found;
diff --git a/drivers/gpu/hsa/radeon/kfd_pm.c b/drivers/gpu/hsa/radeon/kfd_pm.c
deleted file mode 100644
index 783311f..0000000
--- a/drivers/gpu/hsa/radeon/kfd_pm.c
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright 2014 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Author: Oded Gabbay
- */
-
-#include <linux/device.h>
-#include "kfd_priv.h"
-#include "kfd_scheduler.h"
-
-void kgd2kfd_suspend(struct kfd_dev *kfd)
-{
- BUG_ON(kfd == NULL);
-
- kfd->device_info->scheduler_class->stop(kfd->scheduler);
-}
-
-int kgd2kfd_resume(struct kfd_dev *kfd)
-{
- BUG_ON(kfd == NULL);
-
- kfd->device_info->scheduler_class->start(kfd->scheduler);
-
- return 0;
-}
diff --git a/drivers/gpu/hsa/radeon/kfd_priv.h b/drivers/gpu/hsa/radeon/kfd_priv.h
index bca9cce..8b877ca 100644
--- a/drivers/gpu/hsa/radeon/kfd_priv.h
+++ b/drivers/gpu/hsa/radeon/kfd_priv.h
@@ -213,6 +213,7 @@ struct kfd_queue *radeon_kfd_get_queue(struct kfd_process *p, unsigned int queue
int radeon_kfd_pasid_init(void);
void radeon_kfd_pasid_exit(void);
bool radeon_kfd_set_pasid_limit(pasid_t new_limit);
+pasid_t radeon_kfd_get_pasid_limit(void);
pasid_t radeon_kfd_pasid_alloc(void);
void radeon_kfd_pasid_free(pasid_t pasid);
--
1.9.1
From: Evgeny Pinchuk <[email protected]>
Implementing a lock for selecting and accessing shader engines and arrays.
This lock will make sure that drm/radeon and hsa/radeon are not colliding when
accessing shader engines and arrays with GRBM_GFX_INDEX register.
Signed-off-by: Evgeny Pinchuk <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/drm/radeon/cik.c | 26 ++++++++++++++++++++++++++
drivers/gpu/drm/radeon/radeon.h | 2 ++
drivers/gpu/drm/radeon/radeon_device.c | 1 +
drivers/gpu/drm/radeon/radeon_kfd.c | 23 +++++++++++++++++++++++
include/linux/radeon_kfd.h | 4 ++++
5 files changed, 56 insertions(+)
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index 6f4999a..fc560b0 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -1566,6 +1566,8 @@ static const u32 godavari_golden_registers[] =
static void cik_init_golden_registers(struct radeon_device *rdev)
{
+ /* Some of the registers might be dependant on GRBM_GFX_INDEX */
+ mutex_lock(&rdev->grbm_idx_mutex);
switch (rdev->family) {
case CHIP_BONAIRE:
radeon_program_register_sequence(rdev,
@@ -1640,6 +1642,7 @@ static void cik_init_golden_registers(struct radeon_device *rdev)
default:
break;
}
+ mutex_unlock(&rdev->grbm_idx_mutex);
}
/**
@@ -3421,6 +3424,7 @@ static void cik_setup_rb(struct radeon_device *rdev,
u32 disabled_rbs = 0;
u32 enabled_rbs = 0;
+ mutex_lock(&rdev->grbm_idx_mutex);
for (i = 0; i < se_num; i++) {
for (j = 0; j < sh_per_se; j++) {
cik_select_se_sh(rdev, i, j);
@@ -3432,6 +3436,7 @@ static void cik_setup_rb(struct radeon_device *rdev,
}
}
cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
+ mutex_unlock(&rdev->grbm_idx_mutex);
mask = 1;
for (i = 0; i < max_rb_num_per_se * se_num; i++) {
@@ -3442,6 +3447,7 @@ static void cik_setup_rb(struct radeon_device *rdev,
rdev->config.cik.backend_enable_mask = enabled_rbs;
+ mutex_lock(&rdev->grbm_idx_mutex);
for (i = 0; i < se_num; i++) {
cik_select_se_sh(rdev, i, 0xffffffff);
data = 0;
@@ -3469,6 +3475,7 @@ static void cik_setup_rb(struct radeon_device *rdev,
WREG32(PA_SC_RASTER_CONFIG, data);
}
cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
+ mutex_unlock(&rdev->grbm_idx_mutex);
}
/**
@@ -3686,6 +3693,12 @@ static void cik_gpu_init(struct radeon_device *rdev)
/* set HW defaults for 3D engine */
WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
+ mutex_lock(&rdev->grbm_idx_mutex);
+ /*
+ * making sure that the following register writes will be broadcasted
+ * to all the shaders
+ */
+ cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
WREG32(SX_DEBUG_1, 0x20);
WREG32(TA_CNTL_AUX, 0x00010000);
@@ -3741,6 +3754,7 @@ static void cik_gpu_init(struct radeon_device *rdev)
WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
+ mutex_unlock(&rdev->grbm_idx_mutex);
udelay(50);
}
@@ -6040,6 +6054,7 @@ static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
u32 i, j, k;
u32 mask;
+ mutex_lock(&rdev->grbm_idx_mutex);
for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
cik_select_se_sh(rdev, i, j);
@@ -6051,6 +6066,7 @@ static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
}
}
cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
+ mutex_unlock(&rdev->grbm_idx_mutex);
mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
for (k = 0; k < rdev->usec_timeout; k++) {
@@ -6185,10 +6201,12 @@ static int cik_rlc_resume(struct radeon_device *rdev)
WREG32(RLC_LB_CNTR_INIT, 0);
WREG32(RLC_LB_CNTR_MAX, 0x00008000);
+ mutex_lock(&rdev->grbm_idx_mutex);
cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
WREG32(RLC_LB_PARAMS, 0x00600408);
WREG32(RLC_LB_CNTL, 0x80000004);
+ mutex_unlock(&rdev->grbm_idx_mutex);
WREG32(RLC_MC_CNTL, 0);
WREG32(RLC_UCODE_CNTL, 0);
@@ -6255,11 +6273,13 @@ static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
tmp = cik_halt_rlc(rdev);
+ mutex_lock(&rdev->grbm_idx_mutex);
cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
WREG32(RLC_SERDES_WR_CTRL, tmp2);
+ mutex_unlock(&rdev->grbm_idx_mutex);
cik_update_rlc(rdev, tmp);
@@ -6301,11 +6321,13 @@ static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
tmp = cik_halt_rlc(rdev);
+ mutex_lock(&rdev->grbm_idx_mutex);
cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
WREG32(RLC_SERDES_WR_CTRL, data);
+ mutex_unlock(&rdev->grbm_idx_mutex);
cik_update_rlc(rdev, tmp);
@@ -6349,11 +6371,13 @@ static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
tmp = cik_halt_rlc(rdev);
+ mutex_lock(&rdev->grbm_idx_mutex);
cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
WREG32(RLC_SERDES_WR_CTRL, data);
+ mutex_unlock(&rdev->grbm_idx_mutex);
cik_update_rlc(rdev, tmp);
}
@@ -6786,10 +6810,12 @@ static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
u32 mask = 0, tmp, tmp1;
int i;
+ mutex_lock(&rdev->grbm_idx_mutex);
cik_select_se_sh(rdev, se, sh);
tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
+ mutex_unlock(&rdev->grbm_idx_mutex);
tmp &= 0xffff0000;
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 90f66bb..94b38a7 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -2334,6 +2334,8 @@ struct radeon_device {
struct radeon_atcs atcs;
/* srbm instance registers */
struct mutex srbm_mutex;
+ /* GRBM index mutex. Protects concurrents access to GRBM index */
+ struct mutex grbm_idx_mutex;
/* clock, powergating flags */
u32 cg_flags;
u32 pg_flags;
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 98538d2..1b8b8b7 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -1258,6 +1258,7 @@ int radeon_device_init(struct radeon_device *rdev,
mutex_init(&rdev->pm.mutex);
mutex_init(&rdev->gpu_clock_mutex);
mutex_init(&rdev->srbm_mutex);
+ mutex_init(&rdev->grbm_idx_mutex);
init_rwsem(&rdev->pm.mclk_lock);
init_rwsem(&rdev->exclusive_lock);
init_waitqueue_head(&rdev->irq.vblank_queue);
diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c
index 121e67b..6dba170 100644
--- a/drivers/gpu/drm/radeon/radeon_kfd.c
+++ b/drivers/gpu/drm/radeon/radeon_kfd.c
@@ -47,6 +47,9 @@ static uint64_t get_gpu_clock_counter(struct kgd_dev *kgd);
static void lock_srbm_gfx_cntl(struct kgd_dev *kgd);
static void unlock_srbm_gfx_cntl(struct kgd_dev *kgd);
+static void lock_grbm_gfx_idx(struct kgd_dev *kgd);
+static void unlock_grbm_gfx_idx(struct kgd_dev *kgd);
+
static const struct kfd2kgd_calls kfd2kgd = {
.allocate_mem = allocate_mem,
@@ -59,6 +62,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
.get_gpu_clock_counter = get_gpu_clock_counter,
.lock_srbm_gfx_cntl = lock_srbm_gfx_cntl,
.unlock_srbm_gfx_cntl = unlock_srbm_gfx_cntl,
+ .lock_grbm_gfx_idx = lock_grbm_gfx_idx,
+ .unlock_grbm_gfx_idx = unlock_grbm_gfx_idx,
};
static const struct kgd2kfd_calls *kgd2kfd;
@@ -278,6 +283,24 @@ static void unlock_srbm_gfx_cntl(struct kgd_dev *kgd)
mutex_unlock(&rdev->srbm_mutex);
}
+static void lock_grbm_gfx_idx(struct kgd_dev *kgd)
+{
+ struct radeon_device *rdev = (struct radeon_device *)kgd;
+
+ BUG_ON(kgd == NULL);
+
+ mutex_lock(&rdev->grbm_idx_mutex);
+}
+
+static void unlock_grbm_gfx_idx(struct kgd_dev *kgd)
+{
+ struct radeon_device *rdev = (struct radeon_device *)kgd;
+
+ BUG_ON(kgd == NULL);
+
+ mutex_unlock(&rdev->grbm_idx_mutex);
+}
+
static uint64_t get_gpu_clock_counter(struct kgd_dev *kgd)
{
struct radeon_device *rdev = (struct radeon_device *)kgd;
diff --git a/include/linux/radeon_kfd.h b/include/linux/radeon_kfd.h
index fcb6c7a..4c7e923 100644
--- a/include/linux/radeon_kfd.h
+++ b/include/linux/radeon_kfd.h
@@ -89,6 +89,10 @@ struct kfd2kgd_calls {
/* SRBM_GFX_CNTL mutex */
void (*lock_srbm_gfx_cntl)(struct kgd_dev *kgd);
void (*unlock_srbm_gfx_cntl)(struct kgd_dev *kgd);
+
+ /* GRBM_GFX_INDEX mutex */
+ void (*lock_grbm_gfx_idx)(struct kgd_dev *kgd);
+ void (*unlock_grbm_gfx_idx)(struct kgd_dev *kgd);
};
bool kgd2kfd_init(unsigned interface_version,
--
1.9.1
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/hsa/radeon/kfd_chardev.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/hsa/radeon/kfd_chardev.c b/drivers/gpu/hsa/radeon/kfd_chardev.c
index d6fa980..dba6084 100644
--- a/drivers/gpu/hsa/radeon/kfd_chardev.c
+++ b/drivers/gpu/hsa/radeon/kfd_chardev.c
@@ -324,9 +324,9 @@ kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
struct kfd_process *process;
long err = -EINVAL;
- dev_info(kfd_device,
- "ioctl cmd 0x%x (#%d), arg 0x%lx\n",
- cmd, _IOC_NR(cmd), arg);
+ dev_dbg(kfd_device,
+ "ioctl cmd 0x%x (#%d), arg 0x%lx\n",
+ cmd, _IOC_NR(cmd), arg);
process = radeon_kfd_get_process(current);
if (IS_ERR(process))
--
1.9.1
From: Evgeny Pinchuk <[email protected]>
Implemented new IOCTL to query the CPU and GPU clock counters.
Signed-off-by: Evgeny Pinchuk <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/hsa/radeon/kfd_chardev.c | 37 ++++++++++++++++++++++++++++++++++++
include/uapi/linux/kfd_ioctl.h | 9 +++++++++
2 files changed, 46 insertions(+)
diff --git a/drivers/gpu/hsa/radeon/kfd_chardev.c b/drivers/gpu/hsa/radeon/kfd_chardev.c
index ddaf357..d6fa980 100644
--- a/drivers/gpu/hsa/radeon/kfd_chardev.c
+++ b/drivers/gpu/hsa/radeon/kfd_chardev.c
@@ -28,6 +28,7 @@
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <uapi/linux/kfd_ioctl.h>
+#include <linux/time.h>
#include "kfd_priv.h"
#include "kfd_scheduler.h"
@@ -284,6 +285,38 @@ out:
return err;
}
+static long
+kfd_ioctl_get_clock_counters(struct file *filep, struct kfd_process *p, void __user *arg)
+{
+ struct kfd_ioctl_get_clock_counters_args args;
+ struct kfd_dev *dev;
+ struct timespec time;
+
+ if (copy_from_user(&args, arg, sizeof(args)))
+ return -EFAULT;
+
+ dev = radeon_kfd_device_by_id(args.gpu_id);
+ if (dev == NULL)
+ return -EINVAL;
+
+ /* Reading GPU clock counter from KGD */
+ args.gpu_clock_counter = kfd2kgd->get_gpu_clock_counter(dev->kgd);
+
+ /* No access to rdtsc. Using raw monotonic time */
+ getrawmonotonic(&time);
+ args.cpu_clock_counter = time.tv_nsec;
+
+ get_monotonic_boottime(&time);
+ args.system_clock_counter = time.tv_nsec;
+
+ /* Since the counter is in nano-seconds we use 1GHz frequency */
+ args.system_clock_freq = 1000000000;
+
+ if (copy_to_user(arg, &args, sizeof(args)))
+ return -EFAULT;
+
+ return 0;
+}
static long
kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
@@ -312,6 +345,10 @@ kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
err = kfd_ioctl_set_memory_policy(filep, process, (void __user *)arg);
break;
+ case KFD_IOC_GET_CLOCK_COUNTERS:
+ err = kfd_ioctl_get_clock_counters(filep, process, (void __user *)arg);
+ break;
+
default:
dev_err(kfd_device,
"unknown ioctl cmd 0x%x, arg 0x%lx)\n",
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 928e628..5b9517e 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -70,12 +70,21 @@ struct kfd_ioctl_set_memory_policy_args {
uint64_t alternate_aperture_size; /* to KFD */
};
+struct kfd_ioctl_get_clock_counters_args {
+ uint32_t gpu_id; /* to KFD */
+ uint64_t gpu_clock_counter; /* from KFD */
+ uint64_t cpu_clock_counter; /* from KFD */
+ uint64_t system_clock_counter; /* from KFD */
+ uint64_t system_clock_freq; /* from KFD */
+};
+
#define KFD_IOC_MAGIC 'K'
#define KFD_IOC_GET_VERSION _IOR(KFD_IOC_MAGIC, 1, struct kfd_ioctl_get_version_args)
#define KFD_IOC_CREATE_QUEUE _IOWR(KFD_IOC_MAGIC, 2, struct kfd_ioctl_create_queue_args)
#define KFD_IOC_DESTROY_QUEUE _IOWR(KFD_IOC_MAGIC, 3, struct kfd_ioctl_destroy_queue_args)
#define KFD_IOC_SET_MEMORY_POLICY _IOW(KFD_IOC_MAGIC, 4, struct kfd_ioctl_set_memory_policy_args)
+#define KFD_IOC_GET_CLOCK_COUNTERS _IOWR(KFD_IOC_MAGIC, 5, struct kfd_ioctl_get_clock_counters_args)
#pragma pack(pop)
--
1.9.1
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/hsa/radeon/cik_int.h | 20 ++++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/drivers/gpu/hsa/radeon/cik_int.h b/drivers/gpu/hsa/radeon/cik_int.h
index e98551d..350f0c2 100644
--- a/drivers/gpu/hsa/radeon/cik_int.h
+++ b/drivers/gpu/hsa/radeon/cik_int.h
@@ -26,20 +26,20 @@
#include <linux/types.h>
struct cik_ih_ring_entry {
- uint32_t source_id : 8;
- uint32_t reserved1 : 8;
- uint32_t reserved2 : 16;
+ uint32_t source_id:8;
+ uint32_t reserved1:8;
+ uint32_t reserved2:16;
- uint32_t data : 28;
- uint32_t reserved3 : 4;
+ uint32_t data:28;
+ uint32_t reserved3:4;
/* pipeid, meid and unused3 are officially called RINGID,
* but for our purposes, they always decode into pipe and ME. */
- uint32_t pipeid : 2;
- uint32_t meid : 2;
- uint32_t reserved4 : 4;
- uint32_t vmid : 8;
- uint32_t pasid : 16;
+ uint32_t pipeid:2;
+ uint32_t meid:2;
+ uint32_t reserved4:4;
+ uint32_t vmid:8;
+ uint32_t pasid:16;
uint32_t reserved5;
};
--
1.9.1
From: Evgeny Pinchuk <[email protected]>
Adding API for KFD to be able to query the GPU clock counter.
Signed-off-by: Evgeny Pinchuk <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/drm/radeon/radeon_kfd.c | 9 +++++++++
include/linux/radeon_kfd.h | 1 +
2 files changed, 10 insertions(+)
diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c
index f4cc3c5..121e67b 100644
--- a/drivers/gpu/drm/radeon/radeon_kfd.c
+++ b/drivers/gpu/drm/radeon/radeon_kfd.c
@@ -42,6 +42,7 @@ static int kmap_mem(struct kgd_dev *kgd, struct kgd_mem *mem, void **ptr);
static void unkmap_mem(struct kgd_dev *kgd, struct kgd_mem *mem);
static uint64_t get_vmem_size(struct kgd_dev *kgd);
+static uint64_t get_gpu_clock_counter(struct kgd_dev *kgd);
static void lock_srbm_gfx_cntl(struct kgd_dev *kgd);
static void unlock_srbm_gfx_cntl(struct kgd_dev *kgd);
@@ -55,6 +56,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
.kmap_mem = kmap_mem,
.unkmap_mem = unkmap_mem,
.get_vmem_size = get_vmem_size,
+ .get_gpu_clock_counter = get_gpu_clock_counter,
.lock_srbm_gfx_cntl = lock_srbm_gfx_cntl,
.unlock_srbm_gfx_cntl = unlock_srbm_gfx_cntl,
};
@@ -275,3 +277,10 @@ static void unlock_srbm_gfx_cntl(struct kgd_dev *kgd)
mutex_unlock(&rdev->srbm_mutex);
}
+
+static uint64_t get_gpu_clock_counter(struct kgd_dev *kgd)
+{
+ struct radeon_device *rdev = (struct radeon_device *)kgd;
+
+ return rdev->asic->get_gpu_clock_counter(rdev);
+}
diff --git a/include/linux/radeon_kfd.h b/include/linux/radeon_kfd.h
index 63b7bac..fcb6c7a 100644
--- a/include/linux/radeon_kfd.h
+++ b/include/linux/radeon_kfd.h
@@ -84,6 +84,7 @@ struct kfd2kgd_calls {
void (*unkmap_mem)(struct kgd_dev *kgd, struct kgd_mem *mem);
uint64_t (*get_vmem_size)(struct kgd_dev *kgd);
+ uint64_t (*get_gpu_clock_counter)(struct kgd_dev *kgd);
/* SRBM_GFX_CNTL mutex */
void (*lock_srbm_gfx_cntl)(struct kgd_dev *kgd);
--
1.9.1
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/gpu/hsa/radeon/kfd_device.c | 28 +++++++++++++++++++++++-----
1 file changed, 23 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/hsa/radeon/kfd_device.c b/drivers/gpu/hsa/radeon/kfd_device.c
index b627e57..a21c095 100644
--- a/drivers/gpu/hsa/radeon/kfd_device.c
+++ b/drivers/gpu/hsa/radeon/kfd_device.c
@@ -27,7 +27,7 @@
#include "kfd_priv.h"
#include "kfd_scheduler.h"
-static const struct kfd_device_info bonaire_device_info = {
+static const struct kfd_device_info kaveri_device_info = {
.scheduler_class = &radeon_kfd_cik_static_scheduler_class,
.max_pasid_bits = 16,
.ih_ring_entry_size = 4 * sizeof(uint32_t)
@@ -40,10 +40,28 @@ struct kfd_deviceid {
/* Please keep this sorted by increasing device id. */
static const struct kfd_deviceid supported_devices[] = {
- { 0x1305, &bonaire_device_info }, /* Kaveri */
- { 0x1307, &bonaire_device_info }, /* Kaveri */
- { 0x130F, &bonaire_device_info }, /* Kaveri */
- { 0x665C, &bonaire_device_info }, /* Bonaire */
+ { 0x1304, &kaveri_device_info }, /* Kaveri */
+ { 0x1305, &kaveri_device_info }, /* Kaveri */
+ { 0x1306, &kaveri_device_info }, /* Kaveri */
+ { 0x1307, &kaveri_device_info }, /* Kaveri */
+ { 0x1309, &kaveri_device_info }, /* Kaveri */
+ { 0x130A, &kaveri_device_info }, /* Kaveri */
+ { 0x130B, &kaveri_device_info }, /* Kaveri */
+ { 0x130C, &kaveri_device_info }, /* Kaveri */
+ { 0x130D, &kaveri_device_info }, /* Kaveri */
+ { 0x130E, &kaveri_device_info }, /* Kaveri */
+ { 0x130F, &kaveri_device_info }, /* Kaveri */
+ { 0x1310, &kaveri_device_info }, /* Kaveri */
+ { 0x1311, &kaveri_device_info }, /* Kaveri */
+ { 0x1312, &kaveri_device_info }, /* Kaveri */
+ { 0x1313, &kaveri_device_info }, /* Kaveri */
+ { 0x1315, &kaveri_device_info }, /* Kaveri */
+ { 0x1316, &kaveri_device_info }, /* Kaveri */
+ { 0x1317, &kaveri_device_info }, /* Kaveri */
+ { 0x1318, &kaveri_device_info }, /* Kaveri */
+ { 0x131B, &kaveri_device_info }, /* Kaveri */
+ { 0x131C, &kaveri_device_info }, /* Kaveri */
+ { 0x131D, &kaveri_device_info }, /* Kaveri */
};
static const struct kfd_device_info *
--
1.9.1
On Fri, Jul 11, 2014 at 12:53:54AM +0300, Oded Gabbay wrote:
> This patch creates a workaround for a bug in amd_iommu driver, where
> the driver doesn't save all necessary information when going to
> suspend. The workaround removes a device from the IOMMU device list
> on suspend and register a resumed device in the IOMMU device list.
>
> Signed-off-by: Oded Gabbay <[email protected]>
Which bug do you workaround here? It needs to be fixed in the AMD IOMMU
driver instead of being wrapped in KFD.
Joerg
On Fri, Jul 11, 2014 at 12:53:48AM +0300, Oded Gabbay wrote:
> From: Evgeny Pinchuk <[email protected]>
>
> Implemented new IOCTL to query the CPU and GPU clock counters.
>
> Signed-off-by: Evgeny Pinchuk <[email protected]>
> Signed-off-by: Oded Gabbay <[email protected]>
> ---
> drivers/gpu/hsa/radeon/kfd_chardev.c | 37 ++++++++++++++++++++++++++++++++++++
> include/uapi/linux/kfd_ioctl.h | 9 +++++++++
> 2 files changed, 46 insertions(+)
>
> diff --git a/drivers/gpu/hsa/radeon/kfd_chardev.c b/drivers/gpu/hsa/radeon/kfd_chardev.c
> index ddaf357..d6fa980 100644
> --- a/drivers/gpu/hsa/radeon/kfd_chardev.c
> +++ b/drivers/gpu/hsa/radeon/kfd_chardev.c
> @@ -28,6 +28,7 @@
> #include <linux/slab.h>
> #include <linux/uaccess.h>
> #include <uapi/linux/kfd_ioctl.h>
> +#include <linux/time.h>
> #include "kfd_priv.h"
> #include "kfd_scheduler.h"
>
> @@ -284,6 +285,38 @@ out:
> return err;
> }
>
> +static long
> +kfd_ioctl_get_clock_counters(struct file *filep, struct kfd_process *p, void __user *arg)
> +{
> + struct kfd_ioctl_get_clock_counters_args args;
> + struct kfd_dev *dev;
> + struct timespec time;
> +
> + if (copy_from_user(&args, arg, sizeof(args)))
> + return -EFAULT;
> +
> + dev = radeon_kfd_device_by_id(args.gpu_id);
> + if (dev == NULL)
> + return -EINVAL;
> +
> + /* Reading GPU clock counter from KGD */
> + args.gpu_clock_counter = kfd2kgd->get_gpu_clock_counter(dev->kgd);
> +
> + /* No access to rdtsc. Using raw monotonic time */
> + getrawmonotonic(&time);
> + args.cpu_clock_counter = time.tv_nsec;
Is the GPU clock counter monotonic too ? Even after GPU reset (hard reset
included) what could go wrong if it rolls back ?
> +
> + get_monotonic_boottime(&time);
> + args.system_clock_counter = time.tv_nsec;
> +
> + /* Since the counter is in nano-seconds we use 1GHz frequency */
> + args.system_clock_freq = 1000000000;
> +
> + if (copy_to_user(arg, &args, sizeof(args)))
> + return -EFAULT;
> +
> + return 0;
> +}
>
> static long
> kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
> @@ -312,6 +345,10 @@ kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
> err = kfd_ioctl_set_memory_policy(filep, process, (void __user *)arg);
> break;
>
> + case KFD_IOC_GET_CLOCK_COUNTERS:
> + err = kfd_ioctl_get_clock_counters(filep, process, (void __user *)arg);
> + break;
> +
> default:
> dev_err(kfd_device,
> "unknown ioctl cmd 0x%x, arg 0x%lx)\n",
> diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
> index 928e628..5b9517e 100644
> --- a/include/uapi/linux/kfd_ioctl.h
> +++ b/include/uapi/linux/kfd_ioctl.h
> @@ -70,12 +70,21 @@ struct kfd_ioctl_set_memory_policy_args {
> uint64_t alternate_aperture_size; /* to KFD */
> };
>
> +struct kfd_ioctl_get_clock_counters_args {
> + uint32_t gpu_id; /* to KFD */
> + uint64_t gpu_clock_counter; /* from KFD */
> + uint64_t cpu_clock_counter; /* from KFD */
> + uint64_t system_clock_counter; /* from KFD */
> + uint64_t system_clock_freq; /* from KFD */
> +};
> +
> #define KFD_IOC_MAGIC 'K'
>
> #define KFD_IOC_GET_VERSION _IOR(KFD_IOC_MAGIC, 1, struct kfd_ioctl_get_version_args)
> #define KFD_IOC_CREATE_QUEUE _IOWR(KFD_IOC_MAGIC, 2, struct kfd_ioctl_create_queue_args)
> #define KFD_IOC_DESTROY_QUEUE _IOWR(KFD_IOC_MAGIC, 3, struct kfd_ioctl_destroy_queue_args)
> #define KFD_IOC_SET_MEMORY_POLICY _IOW(KFD_IOC_MAGIC, 4, struct kfd_ioctl_set_memory_policy_args)
> +#define KFD_IOC_GET_CLOCK_COUNTERS _IOWR(KFD_IOC_MAGIC, 5, struct kfd_ioctl_get_clock_counters_args)
>
> #pragma pack(pop)
>
> --
> 1.9.1
>
On Fri, Jul 11, 2014 at 12:54:00AM +0300, Oded Gabbay wrote:
> From: Alexey Skidanov <[email protected]>
>
> Added apertures initialization and appropriate ioctl
What is process aperture and what it is use for ? This is a very
cryptic commit message.
Cheers,
J?r?me
>
> Signed-off-by: Alexey Skidanov <[email protected]>
> Signed-off-by: Oded Gabbay <[email protected]>
> ---
> drivers/gpu/hsa/radeon/Makefile | 2 +-
> drivers/gpu/hsa/radeon/kfd_aperture.c | 124 ++++++++++++++++++++++++++
> drivers/gpu/hsa/radeon/kfd_chardev.c | 58 +++++++++++-
> drivers/gpu/hsa/radeon/kfd_priv.h | 18 ++++
> drivers/gpu/hsa/radeon/kfd_process.c | 17 ++++
> drivers/gpu/hsa/radeon/kfd_sched_cik_static.c | 3 +-
> drivers/gpu/hsa/radeon/kfd_topology.c | 27 ++++++
> include/uapi/linux/kfd_ioctl.h | 18 ++++
> 8 files changed, 264 insertions(+), 3 deletions(-)
> create mode 100644 drivers/gpu/hsa/radeon/kfd_aperture.c
>
> diff --git a/drivers/gpu/hsa/radeon/Makefile b/drivers/gpu/hsa/radeon/Makefile
> index 5422e6a..813b31f 100644
> --- a/drivers/gpu/hsa/radeon/Makefile
> +++ b/drivers/gpu/hsa/radeon/Makefile
> @@ -5,6 +5,6 @@
> radeon_kfd-y := kfd_module.o kfd_device.o kfd_chardev.o \
> kfd_pasid.o kfd_topology.o kfd_process.o \
> kfd_doorbell.o kfd_sched_cik_static.o kfd_registers.o \
> - kfd_vidmem.o kfd_interrupt.o
> + kfd_vidmem.o kfd_interrupt.o kfd_aperture.o
>
> obj-$(CONFIG_HSA_RADEON) += radeon_kfd.o
> diff --git a/drivers/gpu/hsa/radeon/kfd_aperture.c b/drivers/gpu/hsa/radeon/kfd_aperture.c
> new file mode 100644
> index 0000000..9e2d6da
> --- /dev/null
> +++ b/drivers/gpu/hsa/radeon/kfd_aperture.c
> @@ -0,0 +1,124 @@
> +/*
> + * Copyright 2014 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + */
> +
> +#include <linux/device.h>
> +#include <linux/export.h>
> +#include <linux/err.h>
> +#include <linux/fs.h>
> +#include <linux/sched.h>
> +#include <linux/slab.h>
> +#include <linux/uaccess.h>
> +#include <linux/compat.h>
> +#include <uapi/linux/kfd_ioctl.h>
> +#include <linux/time.h>
> +#include "kfd_priv.h"
> +#include "kfd_scheduler.h"
> +#include <linux/mm.h>
> +#include <uapi/asm-generic/mman-common.h>
> +#include <asm/processor.h>
> +
> +
> +#define MAKE_GPUVM_APP_BASE(gpu_num) (((uint64_t)(gpu_num) << 61) + 0x1000000000000)
> +#define MAKE_GPUVM_APP_LIMIT(base) (((uint64_t)(base) & 0xFFFFFF0000000000) | 0xFFFFFFFFFF)
> +#define MAKE_SCRATCH_APP_BASE(gpu_num) (((uint64_t)(gpu_num) << 61) + 0x100000000)
> +#define MAKE_SCRATCH_APP_LIMIT(base) (((uint64_t)base & 0xFFFFFFFF00000000) | 0xFFFFFFFF)
> +#define MAKE_LDS_APP_BASE(gpu_num) (((uint64_t)(gpu_num) << 61) + 0x0)
> +#define MAKE_LDS_APP_LIMIT(base) (((uint64_t)(base) & 0xFFFFFFFF00000000) | 0xFFFFFFFF)
> +
> +#define HSA_32BIT_LDS_APP_SIZE 0x10000
> +#define HSA_32BIT_LDS_APP_ALIGNMENT 0x10000
> +
> +static unsigned long kfd_reserve_aperture(struct kfd_process *process, unsigned long len, unsigned long alignment)
> +{
> +
> + unsigned long addr = 0;
> + unsigned long start_address;
> +
> + /*
> + * Go bottom up and find the first available aligned address.
> + * We may narrow space to scan by getting mmap range limits.
> + */
> + for (start_address = alignment; start_address < (TASK_SIZE - alignment); start_address += alignment) {
> + addr = vm_mmap(NULL, start_address, len, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, 0);
> + if (!IS_ERR_VALUE(addr)) {
> + if (addr == start_address)
> + return addr;
> + vm_munmap(addr, len);
> + }
> + }
> + return 0;
> +
> +}
> +
> +int kfd_init_apertures(struct kfd_process *process)
> +{
> + uint8_t id = 0;
> + struct kfd_dev *dev;
> + struct kfd_process_device *pdd;
> +
> + mutex_lock(&process->mutex);
> +
> + /*Iterating over all devices*/
> + while ((dev = kfd_topology_enum_kfd_devices(id)) != NULL && id < NUM_OF_SUPPORTED_GPUS) {
> +
> + pdd = radeon_kfd_get_process_device_data(dev, process);
> +
> + /*for 64 bit process aperture will be statically reserved in the non canonical process address space
> + *for 32 bit process the aperture will be reserved in the process address space
> + */
> + if (process->is_32bit_user_mode) {
> + /*try to reserve aperture. continue on failure, just put the aperture size to be 0*/
> + pdd->lds_base = kfd_reserve_aperture(
> + process,
> + HSA_32BIT_LDS_APP_SIZE,
> + HSA_32BIT_LDS_APP_ALIGNMENT);
> +
> + if (pdd->lds_base)
> + pdd->lds_limit = pdd->lds_base + HSA_32BIT_LDS_APP_SIZE - 1;
> + else
> + pdd->lds_limit = 0;
> +
> + /*GPUVM and Scratch apertures are not supported*/
> + pdd->gpuvm_base = pdd->gpuvm_limit = pdd->scratch_base = pdd->scratch_limit = 0;
> + } else {
> + /*node id couldn't be 0 - the three MSB bits of aperture shoudn't be 0*/
> + pdd->lds_base = MAKE_LDS_APP_BASE(id + 1);
> + pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
> + pdd->gpuvm_base = MAKE_GPUVM_APP_BASE(id + 1);
> + pdd->gpuvm_limit = MAKE_GPUVM_APP_LIMIT(pdd->gpuvm_base);
> + pdd->scratch_base = MAKE_SCRATCH_APP_BASE(id + 1);
> + pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
> + }
> +
> + dev_dbg(kfd_device, "node id %u, gpu id %u, lds_base %llX lds_limit %llX gpuvm_base %llX gpuvm_limit %llX scratch_base %llX scratch_limit %llX",
> + id, pdd->dev->id, pdd->lds_base, pdd->lds_limit, pdd->gpuvm_base, pdd->gpuvm_limit, pdd->scratch_base, pdd->scratch_limit);
> +
> + id++;
> + }
> +
> + mutex_unlock(&process->mutex);
> +
> + return 0;
> +}
> +
> +
> diff --git a/drivers/gpu/hsa/radeon/kfd_chardev.c b/drivers/gpu/hsa/radeon/kfd_chardev.c
> index e95d597..07cac88 100644
> --- a/drivers/gpu/hsa/radeon/kfd_chardev.c
> +++ b/drivers/gpu/hsa/radeon/kfd_chardev.c
> @@ -32,6 +32,9 @@
> #include <linux/time.h>
> #include "kfd_priv.h"
> #include "kfd_scheduler.h"
> +#include <linux/mm.h>
> +#include <uapi/asm-generic/mman-common.h>
> +#include <asm/processor.h>
>
> static long kfd_ioctl(struct file *, unsigned int, unsigned long);
> static int kfd_open(struct inode *, struct file *);
> @@ -107,9 +110,13 @@ kfd_open(struct inode *inode, struct file *filep)
> process = radeon_kfd_create_process(current);
> if (IS_ERR(process))
> return PTR_ERR(process);
> +
> process->is_32bit_user_mode = is_compat_task();
> +
> dev_info(kfd_device, "process %d opened, compat mode (32 bit) - %d\n",
> - process->pasid, process->is_32bit_user_mode);
> + process->pasid, process->is_32bit_user_mode);
> +
> + kfd_init_apertures(process);
>
> return 0;
> }
> @@ -321,6 +328,51 @@ kfd_ioctl_get_clock_counters(struct file *filep, struct kfd_process *p, void __u
> return 0;
> }
>
> +
> +static int kfd_ioctl_get_process_apertures(struct file *filp, struct kfd_process *p, void __user *arg)
> +{
> + struct kfd_ioctl_get_process_apertures_args args;
> + struct kfd_process_device *pdd;
> +
> + dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid);
> +
> + if (copy_from_user(&args, arg, sizeof(args)))
> + return -EFAULT;
> +
> + args.num_of_nodes = 0;
> +
> + mutex_lock(&p->mutex);
> +
> + /*if the process-device list isn't empty*/
> + if (kfd_has_process_device_data(p)) {
> + /* Run over all pdd of the process */
> + pdd = kfd_get_first_process_device_data(p);
> + do {
> +
> + args.process_apertures[args.num_of_nodes].gpu_id = pdd->dev->id;
> + args.process_apertures[args.num_of_nodes].lds_base = pdd->lds_base;
> + args.process_apertures[args.num_of_nodes].lds_limit = pdd->lds_limit;
> + args.process_apertures[args.num_of_nodes].gpuvm_base = pdd->gpuvm_base;
> + args.process_apertures[args.num_of_nodes].gpuvm_limit = pdd->gpuvm_limit;
> + args.process_apertures[args.num_of_nodes].scratch_base = pdd->scratch_base;
> + args.process_apertures[args.num_of_nodes].scratch_limit = pdd->scratch_limit;
> +
> + dev_dbg(kfd_device, "node id %u, gpu id %u, lds_base %llX lds_limit %llX gpuvm_base %llX gpuvm_limit %llX scratch_base %llX scratch_limit %llX",
> + args.num_of_nodes, pdd->dev->id, pdd->lds_base, pdd->lds_limit, pdd->gpuvm_base, pdd->gpuvm_limit, pdd->scratch_base, pdd->scratch_limit);
> + args.num_of_nodes++;
> + } while ((pdd = kfd_get_next_process_device_data(p, pdd)) != NULL &&
> + (args.num_of_nodes < NUM_OF_SUPPORTED_GPUS));
> + }
> +
> + mutex_unlock(&p->mutex);
> +
> + if (copy_to_user(arg, &args, sizeof(args)))
> + return -EFAULT;
> +
> + return 0;
> +}
> +
> +
> static long
> kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
> {
> @@ -352,6 +404,10 @@ kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
> err = kfd_ioctl_get_clock_counters(filep, process, (void __user *)arg);
> break;
>
> + case KFD_IOC_GET_PROCESS_APERTURES:
> + err = kfd_ioctl_get_process_apertures(filep, process, (void __user *)arg);
> + break;
> +
> default:
> dev_err(kfd_device,
> "unknown ioctl cmd 0x%x, arg 0x%lx)\n",
> diff --git a/drivers/gpu/hsa/radeon/kfd_priv.h b/drivers/gpu/hsa/radeon/kfd_priv.h
> index 9d3b1fc..28155bc 100644
> --- a/drivers/gpu/hsa/radeon/kfd_priv.h
> +++ b/drivers/gpu/hsa/radeon/kfd_priv.h
> @@ -171,6 +171,16 @@ struct kfd_process_device {
>
> /* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */
> bool bound;
> +
> + /*Apertures*/
> + uint64_t lds_base;
> + uint64_t lds_limit;
> + uint64_t gpuvm_base;
> + uint64_t gpuvm_limit;
> + uint64_t scratch_base;
> + uint64_t scratch_limit;
> +
> +
> };
>
> /* Process data */
> @@ -212,6 +222,10 @@ void radeon_kfd_install_queue(struct kfd_process *p, unsigned int queue_id, stru
> void radeon_kfd_remove_queue(struct kfd_process *p, unsigned int queue_id);
> struct kfd_queue *radeon_kfd_get_queue(struct kfd_process *p, unsigned int queue_id);
>
> +/* Process device data iterator */
> +struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p);
> +struct kfd_process_device *kfd_get_next_process_device_data(struct kfd_process *p, struct kfd_process_device *pdd);
> +bool kfd_has_process_device_data(struct kfd_process *p);
>
> /* PASIDs */
> int radeon_kfd_pasid_init(void);
> @@ -237,6 +251,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu);
> int kfd_topology_remove_device(struct kfd_dev *gpu);
> struct kfd_dev *radeon_kfd_device_by_id(uint32_t gpu_id);
> struct kfd_dev *radeon_kfd_device_by_pci_dev(const struct pci_dev *pdev);
> +struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx);
>
> /* MMIO registers */
> #define WRITE_REG(dev, reg, value) radeon_kfd_write_reg((dev), (reg), (value))
> @@ -253,4 +268,7 @@ void kgd2kfd_interrupt(struct kfd_dev *dev, const void *ih_ring_entry);
> void kgd2kfd_suspend(struct kfd_dev *dev);
> int kgd2kfd_resume(struct kfd_dev *dev);
>
> +/*HSA apertures*/
> +int kfd_init_apertures(struct kfd_process *process);
> +
> #endif
> diff --git a/drivers/gpu/hsa/radeon/kfd_process.c b/drivers/gpu/hsa/radeon/kfd_process.c
> index f89f855..80136e6 100644
> --- a/drivers/gpu/hsa/radeon/kfd_process.c
> +++ b/drivers/gpu/hsa/radeon/kfd_process.c
> @@ -397,3 +397,20 @@ struct kfd_queue *radeon_kfd_get_queue(struct kfd_process *p, unsigned int queue
> test_bit(queue_id, p->allocated_queue_bitmap)) ?
> p->queues[queue_id] : NULL;
> }
> +
> +struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p)
> +{
> + return list_first_entry(&p->per_device_data, struct kfd_process_device, per_device_list);
> +}
> +
> +struct kfd_process_device *kfd_get_next_process_device_data(struct kfd_process *p, struct kfd_process_device *pdd)
> +{
> + if (list_is_last(&pdd->per_device_list, &p->per_device_data))
> + return NULL;
> + return list_next_entry(pdd, per_device_list);
> +}
> +
> +bool kfd_has_process_device_data(struct kfd_process *p)
> +{
> + return !(list_empty(&p->per_device_data));
> +}
> diff --git a/drivers/gpu/hsa/radeon/kfd_sched_cik_static.c b/drivers/gpu/hsa/radeon/kfd_sched_cik_static.c
> index 7ee8125..30561a6 100644
> --- a/drivers/gpu/hsa/radeon/kfd_sched_cik_static.c
> +++ b/drivers/gpu/hsa/radeon/kfd_sched_cik_static.c
> @@ -627,7 +627,8 @@ static void cik_static_deregister_process(struct kfd_scheduler *scheduler,
> struct cik_static_private *priv = kfd_scheduler_to_private(scheduler);
> struct cik_static_process *pp = kfd_process_to_private(scheduler_process);
>
> - if (priv && pp) {
> +
> + if (priv && pp) {
> release_vmid(priv, pp->vmid);
> kfree(pp);
> }
> diff --git a/drivers/gpu/hsa/radeon/kfd_topology.c b/drivers/gpu/hsa/radeon/kfd_topology.c
> index 21bb66e..213ae7b 100644
> --- a/drivers/gpu/hsa/radeon/kfd_topology.c
> +++ b/drivers/gpu/hsa/radeon/kfd_topology.c
> @@ -1201,3 +1201,30 @@ int kfd_topology_remove_device(struct kfd_dev *gpu)
>
> return res;
> }
> +
> +/*
> + * When idx is out of bounds, the function will return NULL
> + */
> +struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx)
> +{
> +
> + struct kfd_topology_device *top_dev;
> + struct kfd_dev *device = NULL;
> + uint8_t device_idx = 0;
> +
> + down_read(&topology_lock);
> +
> + list_for_each_entry(top_dev, &topology_device_list, list) {
> + if (device_idx == idx) {
> + device = top_dev->gpu;
> + break;
> + }
> +
> + device_idx++;
> + }
> +
> + up_read(&topology_lock);
> +
> + return device;
> +
> +}
> diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
> index a7c3abd..e5fcb8b 100644
> --- a/include/uapi/linux/kfd_ioctl.h
> +++ b/include/uapi/linux/kfd_ioctl.h
> @@ -78,6 +78,23 @@ struct kfd_ioctl_get_clock_counters_args {
> uint64_t system_clock_freq; /* from KFD */
> };
>
> +#define NUM_OF_SUPPORTED_GPUS 7
> +
> +struct kfd_process_device_apertures {
> + uint64_t lds_base;/* from KFD */
> + uint64_t lds_limit;/* from KFD */
> + uint64_t scratch_base;/* from KFD */
> + uint64_t scratch_limit;/* from KFD */
> + uint64_t gpuvm_base;/* from KFD */
> + uint64_t gpuvm_limit;/* from KFD */
> + uint32_t gpu_id;/* from KFD */
> +};
> +
> +struct kfd_ioctl_get_process_apertures_args {
> + struct kfd_process_device_apertures process_apertures[NUM_OF_SUPPORTED_GPUS];/* from KFD */
> + uint8_t num_of_nodes; /* from KFD, should be in the range [1 - NUM_OF_SUPPORTED_GPUS]*/
> +};
> +
> #define KFD_IOC_MAGIC 'K'
>
> #define KFD_IOC_GET_VERSION _IOR(KFD_IOC_MAGIC, 1, struct kfd_ioctl_get_version_args)
> @@ -85,6 +102,7 @@ struct kfd_ioctl_get_clock_counters_args {
> #define KFD_IOC_DESTROY_QUEUE _IOWR(KFD_IOC_MAGIC, 3, struct kfd_ioctl_destroy_queue_args)
> #define KFD_IOC_SET_MEMORY_POLICY _IOW(KFD_IOC_MAGIC, 4, struct kfd_ioctl_set_memory_policy_args)
> #define KFD_IOC_GET_CLOCK_COUNTERS _IOWR(KFD_IOC_MAGIC, 5, struct kfd_ioctl_get_clock_counters_args)
> +#define KFD_IOC_GET_PROCESS_APERTURES _IOR(KFD_IOC_MAGIC, 6, struct kfd_ioctl_get_process_apertures_args)
>
> #pragma pack(pop)
>
> --
> 1.9.1
>