2023-01-15 09:48:03

by Oded Gabbay

[permalink] [raw]
Subject: [PATCH 1/5] habanalabs: check pad and reserved fields in ioctls

From: farah kassabri <[email protected]>

Make sure all reserved/pad fields in uapi input structures
are set to 0.

Signed-off-by: farah kassabri <[email protected]>
Reviewed-by: Oded Gabbay <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
.../accel/habanalabs/common/command_submission.c | 15 ++++++++++++++-
.../accel/habanalabs/common/habanalabs_ioctl.c | 6 +++++-
2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/drivers/accel/habanalabs/common/command_submission.c b/drivers/accel/habanalabs/common/command_submission.c
index bb9584d72c32..c54f504383ac 100644
--- a/drivers/accel/habanalabs/common/command_submission.c
+++ b/drivers/accel/habanalabs/common/command_submission.c
@@ -1310,6 +1310,13 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
enum hl_device_status status;
enum hl_cs_type cs_type;
bool is_sync_stream;
+ int i;
+
+ for (i = 0 ; i < sizeof(args->in.pad) ; i++)
+ if (args->in.pad[i]) {
+ dev_dbg(hdev->dev, "Padding bytes must be 0\n");
+ return -EINVAL;
+ }

if (!hl_device_operational(hdev, &status)) {
return -EBUSY;
@@ -2918,7 +2925,13 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
u32 size_to_copy;
u64 *cs_seq_arr;
u8 seq_arr_len;
- int rc;
+ int rc, i;
+
+ for (i = 0 ; i < sizeof(args->in.pad) ; i++)
+ if (args->in.pad[i]) {
+ dev_dbg(hdev->dev, "Padding bytes must be 0\n");
+ return -EINVAL;
+ }

if (!hdev->supports_wait_for_multi_cs) {
dev_err(hdev->dev, "Wait for multi CS is not supported\n");
diff --git a/drivers/accel/habanalabs/common/habanalabs_ioctl.c b/drivers/accel/habanalabs/common/habanalabs_ioctl.c
index 72493bf94ba3..5005e6fca691 100644
--- a/drivers/accel/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/accel/habanalabs/common/habanalabs_ioctl.c
@@ -884,9 +884,13 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
enum hl_device_status status;
struct hl_info_args *args = data;
struct hl_device *hdev = hpriv->hdev;
-
int rc;

+ if (args->pad) {
+ dev_dbg(hdev->dev, "Padding bytes must be 0\n");
+ return -EINVAL;
+ }
+
/*
* Information is returned for the following opcodes even if the device
* is disabled or in reset.
--
2.25.1


2023-01-15 10:13:40

by Oded Gabbay

[permalink] [raw]
Subject: [PATCH 4/5] habanalabs/gaudi2: read mmio razwi information

From: Dani Liberman <[email protected]>

In gaudi2 there night be different routers for low b/w and high b/w
transactions. But in the code that collects razwi information, we used
the same router for high b/w and low b/w.

Fixed it by reading the information also from low b/w routers.

Signed-off-by: Dani Liberman <[email protected]>
Reviewed-by: Oded Gabbay <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/accel/habanalabs/gaudi2/gaudi2.c | 117 ++++++++++++++++-------
1 file changed, 80 insertions(+), 37 deletions(-)

diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c
index 2b5cd058f5ad..32a824766f24 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c
@@ -1568,7 +1568,7 @@ enum rtr_id {
DCORE3_RTR7,
};

-static const u32 gaudi2_tpc_initiator_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = {
+static const u32 gaudi2_tpc_initiator_hbw_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = {
DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR2, DCORE0_RTR2, DCORE0_RTR3, DCORE0_RTR3,
DCORE1_RTR6, DCORE1_RTR6, DCORE1_RTR5, DCORE1_RTR5, DCORE1_RTR4, DCORE1_RTR4,
DCORE2_RTR3, DCORE2_RTR3, DCORE2_RTR2, DCORE2_RTR2, DCORE2_RTR1, DCORE2_RTR1,
@@ -1576,12 +1576,30 @@ static const u32 gaudi2_tpc_initiator_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORE
DCORE0_RTR0
};

-static const u32 gaudi2_dec_initiator_rtr_id[NUMBER_OF_DEC] = {
+static const u32 gaudi2_tpc_initiator_lbw_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = {
+ DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR2, DCORE0_RTR2,
+ DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR6, DCORE1_RTR6, DCORE1_RTR5, DCORE1_RTR5,
+ DCORE2_RTR2, DCORE2_RTR2, DCORE2_RTR1, DCORE2_RTR1, DCORE2_RTR0, DCORE2_RTR0,
+ DCORE3_RTR5, DCORE3_RTR5, DCORE3_RTR6, DCORE3_RTR6, DCORE3_RTR7, DCORE3_RTR7,
+ DCORE0_RTR0
+};
+
+static const u32 gaudi2_dec_initiator_hbw_rtr_id[NUMBER_OF_DEC] = {
DCORE0_RTR0, DCORE0_RTR0, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0, DCORE2_RTR0,
DCORE3_RTR7, DCORE3_RTR7, DCORE0_RTR0, DCORE0_RTR0
};

-static const u32 gaudi2_nic_initiator_rtr_id[NIC_NUMBER_OF_MACROS] = {
+static const u32 gaudi2_dec_initiator_lbw_rtr_id[NUMBER_OF_DEC] = {
+ DCORE0_RTR1, DCORE0_RTR1, DCORE1_RTR6, DCORE1_RTR6, DCORE2_RTR1, DCORE2_RTR1,
+ DCORE3_RTR6, DCORE3_RTR6, DCORE0_RTR0, DCORE0_RTR0
+};
+
+static const u32 gaudi2_nic_initiator_hbw_rtr_id[NIC_NUMBER_OF_MACROS] = {
+ DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0,
+ DCORE2_RTR0, DCORE2_RTR0, DCORE2_RTR0, DCORE3_RTR7, DCORE3_RTR7, DCORE3_RTR7
+};
+
+static const u32 gaudi2_nic_initiator_lbw_rtr_id[NIC_NUMBER_OF_MACROS] = {
DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0,
DCORE2_RTR0, DCORE2_RTR0, DCORE2_RTR0, DCORE3_RTR7, DCORE3_RTR7, DCORE3_RTR7
};
@@ -1595,14 +1613,22 @@ static const struct sft_info gaudi2_edma_initiator_sft_id[NUM_OF_EDMA_PER_DCORE
{0, 0}, {1, 0}, {0, 1}, {1, 1}, {1, 2}, {1, 3}, {0, 2}, {0, 3},
};

-static const u32 gaudi2_pdma_initiator_rtr_id[NUM_OF_PDMA] = {
+static const u32 gaudi2_pdma_initiator_hbw_rtr_id[NUM_OF_PDMA] = {
DCORE0_RTR0, DCORE0_RTR0
};

-static const u32 gaudi2_rot_initiator_rtr_id[NUM_OF_ROT] = {
+static const u32 gaudi2_pdma_initiator_lbw_rtr_id[NUM_OF_PDMA] = {
+ DCORE0_RTR2, DCORE0_RTR2
+};
+
+static const u32 gaudi2_rot_initiator_hbw_rtr_id[NUM_OF_ROT] = {
DCORE2_RTR0, DCORE3_RTR7
};

+static const u32 gaudi2_rot_initiator_lbw_rtr_id[NUM_OF_ROT] = {
+ DCORE2_RTR2, DCORE3_RTR5
+};
+
struct mme_initiators_rtr_id {
u32 wap0;
u32 wap1;
@@ -7185,50 +7211,60 @@ static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev,
u8 module_sub_idx, u64 *event_mask)
{
bool via_sft = false;
- u32 rtr_id, dcore_id, dcore_rtr_id, sft_id, eng_id;
- u64 rtr_mstr_if_base_addr;
+ u32 hbw_rtr_id, lbw_rtr_id, dcore_id, dcore_rtr_id, sft_id, eng_id;
+ u64 hbw_rtr_mstr_if_base_addr, lbw_rtr_mstr_if_base_addr;
u32 hbw_shrd_aw = 0, hbw_shrd_ar = 0;
u32 lbw_shrd_aw = 0, lbw_shrd_ar = 0;
char initiator_name[64];

switch (module) {
case RAZWI_TPC:
- rtr_id = gaudi2_tpc_initiator_rtr_id[module_idx];
+ hbw_rtr_id = gaudi2_tpc_initiator_hbw_rtr_id[module_idx];
+
+ /* TODO : remove this check and depend only on tpc routers table
+ * when SW-118828 is resolved
+ */
+ if (!hdev->asic_prop.fw_security_enabled &&
+ ((module_idx == 0) || (module_idx == 1)))
+ lbw_rtr_id = DCORE0_RTR0;
+ else
+ lbw_rtr_id = gaudi2_tpc_initiator_lbw_rtr_id[module_idx];
sprintf(initiator_name, "TPC_%u", module_idx);
break;
case RAZWI_MME:
sprintf(initiator_name, "MME_%u", module_idx);
switch (module_sub_idx) {
case MME_WAP0:
- rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap0;
+ hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap0;
break;
case MME_WAP1:
- rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap1;
+ hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap1;
break;
case MME_WRITE:
- rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].write;
+ hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].write;
break;
case MME_READ:
- rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].read;
+ hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].read;
break;
case MME_SBTE0:
- rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte0;
+ hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte0;
break;
case MME_SBTE1:
- rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte1;
+ hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte1;
break;
case MME_SBTE2:
- rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte2;
+ hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte2;
break;
case MME_SBTE3:
- rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte3;
+ hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte3;
break;
case MME_SBTE4:
- rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte4;
+ hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte4;
break;
default:
return;
}
+ lbw_rtr_id = hbw_rtr_id;
break;
case RAZWI_EDMA:
sft_id = gaudi2_edma_initiator_sft_id[module_idx].interface_id;
@@ -7237,19 +7273,23 @@ static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev,
sprintf(initiator_name, "EDMA_%u", module_idx);
break;
case RAZWI_PDMA:
- rtr_id = gaudi2_pdma_initiator_rtr_id[module_idx];
+ hbw_rtr_id = gaudi2_pdma_initiator_hbw_rtr_id[module_idx];
+ lbw_rtr_id = gaudi2_pdma_initiator_lbw_rtr_id[module_idx];
sprintf(initiator_name, "PDMA_%u", module_idx);
break;
case RAZWI_NIC:
- rtr_id = gaudi2_nic_initiator_rtr_id[module_idx];
+ hbw_rtr_id = gaudi2_nic_initiator_hbw_rtr_id[module_idx];
+ lbw_rtr_id = gaudi2_nic_initiator_lbw_rtr_id[module_idx];
sprintf(initiator_name, "NIC_%u", module_idx);
break;
case RAZWI_DEC:
- rtr_id = gaudi2_dec_initiator_rtr_id[module_idx];
+ hbw_rtr_id = gaudi2_dec_initiator_hbw_rtr_id[module_idx];
+ lbw_rtr_id = gaudi2_dec_initiator_lbw_rtr_id[module_idx];
sprintf(initiator_name, "DEC_%u", module_idx);
break;
case RAZWI_ROT:
- rtr_id = gaudi2_rot_initiator_rtr_id[module_idx];
+ hbw_rtr_id = gaudi2_rot_initiator_hbw_rtr_id[module_idx];
+ lbw_rtr_id = gaudi2_rot_initiator_lbw_rtr_id[module_idx];
sprintf(initiator_name, "ROT_%u", module_idx);
break;
default:
@@ -7258,22 +7298,25 @@ static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev,

/* Find router mstr_if register base */
if (via_sft) {
- rtr_mstr_if_base_addr = mmSFT0_HBW_RTR_IF0_RTR_CTRL_BASE +
+ hbw_rtr_mstr_if_base_addr = mmSFT0_HBW_RTR_IF0_RTR_CTRL_BASE +
dcore_id * SFT_DCORE_OFFSET +
sft_id * SFT_IF_OFFSET +
RTR_MSTR_IF_OFFSET;
+ lbw_rtr_mstr_if_base_addr = hbw_rtr_mstr_if_base_addr;
} else {
- dcore_id = rtr_id / NUM_OF_RTR_PER_DCORE;
- dcore_rtr_id = rtr_id % NUM_OF_RTR_PER_DCORE;
- rtr_mstr_if_base_addr = mmDCORE0_RTR0_CTRL_BASE +
+ dcore_id = hbw_rtr_id / NUM_OF_RTR_PER_DCORE;
+ dcore_rtr_id = hbw_rtr_id % NUM_OF_RTR_PER_DCORE;
+ hbw_rtr_mstr_if_base_addr = mmDCORE0_RTR0_CTRL_BASE +
dcore_id * DCORE_OFFSET +
dcore_rtr_id * DCORE_RTR_OFFSET +
RTR_MSTR_IF_OFFSET;
+ lbw_rtr_mstr_if_base_addr = hbw_rtr_mstr_if_base_addr +
+ (((s32)lbw_rtr_id - hbw_rtr_id) * DCORE_RTR_OFFSET);
}

/* Find out event cause by reading "RAZWI_HAPPENED" registers */
- hbw_shrd_aw = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED);
- hbw_shrd_ar = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED);
+ hbw_shrd_aw = RREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED);
+ hbw_shrd_ar = RREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED);

if (via_sft) {
/* SFT has separate MSTR_IF for LBW, only there we can
@@ -7287,41 +7330,41 @@ static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev,
lbw_shrd_aw = RREG32(base + RR_SHRD_LBW_AW_RAZWI_HAPPENED);
lbw_shrd_ar = RREG32(base + RR_SHRD_LBW_AR_RAZWI_HAPPENED);
} else {
- lbw_shrd_aw = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED);
- lbw_shrd_ar = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED);
+ lbw_shrd_aw = RREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED);
+ lbw_shrd_ar = RREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED);
}

eng_id = gaudi2_razwi_calc_engine_id(hdev, module, module_idx);
if (hbw_shrd_aw) {
- gaudi2_razwi_rr_hbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, true,
+ gaudi2_razwi_rr_hbw_shared_printf_info(hdev, hbw_rtr_mstr_if_base_addr, true,
initiator_name, eng_id, event_mask);

/* Clear event indication */
- WREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED, hbw_shrd_aw);
+ WREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED, hbw_shrd_aw);
}

if (hbw_shrd_ar) {
- gaudi2_razwi_rr_hbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, false,
+ gaudi2_razwi_rr_hbw_shared_printf_info(hdev, hbw_rtr_mstr_if_base_addr, false,
initiator_name, eng_id, event_mask);

/* Clear event indication */
- WREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED, hbw_shrd_ar);
+ WREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED, hbw_shrd_ar);
}

if (lbw_shrd_aw) {
- gaudi2_razwi_rr_lbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, true,
+ gaudi2_razwi_rr_lbw_shared_printf_info(hdev, lbw_rtr_mstr_if_base_addr, true,
initiator_name, eng_id, event_mask);

/* Clear event indication */
- WREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED, lbw_shrd_aw);
+ WREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED, lbw_shrd_aw);
}

if (lbw_shrd_ar) {
- gaudi2_razwi_rr_lbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, false,
+ gaudi2_razwi_rr_lbw_shared_printf_info(hdev, lbw_rtr_mstr_if_base_addr, false,
initiator_name, eng_id, event_mask);

/* Clear event indication */
- WREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED, lbw_shrd_ar);
+ WREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED, lbw_shrd_ar);
}
}

--
2.25.1

2023-01-15 10:13:41

by Oded Gabbay

[permalink] [raw]
Subject: [PATCH 3/5] habanalabs: fix bug in timestamps registration code

From: farah kassabri <[email protected]>

Protect re-using the same timestamp buffer record before actually
adding it to the to interrupt wait list.
Mark ts buff offset as in use in the spinlock protection area of the
interrupt wait list to avoid getting in the re-use section in
ts_buff_get_kernel_ts_record before adding the node to the list.
this scenario might happen when multiple threads are racing on
same offset and one thread could set data in the ts buff in
ts_buff_get_kernel_ts_record then the other thread takes over
and get to ts_buff_get_kernel_ts_record and we will try
to re-use the same ts buff offset then we will try to
delete a non existing node from the list.

Signed-off-by: farah kassabri <[email protected]>
Reviewed-by: Oded Gabbay <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
.../habanalabs/common/command_submission.c | 33 ++++++++++++-------
1 file changed, 22 insertions(+), 11 deletions(-)

diff --git a/drivers/accel/habanalabs/common/command_submission.c b/drivers/accel/habanalabs/common/command_submission.c
index c54f504383ac..00fedf2d8654 100644
--- a/drivers/accel/habanalabs/common/command_submission.c
+++ b/drivers/accel/habanalabs/common/command_submission.c
@@ -3180,19 +3180,18 @@ static int ts_buff_get_kernel_ts_record(struct hl_mmap_mem_buf *buf,
goto start_over;
}
} else {
+ /* Fill up the new registration node info */
+ requested_offset_record->ts_reg_info.buf = buf;
+ requested_offset_record->ts_reg_info.cq_cb = cq_cb;
+ requested_offset_record->ts_reg_info.timestamp_kernel_addr =
+ (u64 *) ts_buff->user_buff_address + ts_offset;
+ requested_offset_record->cq_kernel_addr =
+ (u64 *) cq_cb->kernel_address + cq_offset;
+ requested_offset_record->cq_target_value = target_value;
+
spin_unlock_irqrestore(wait_list_lock, flags);
}

- /* Fill up the new registration node info */
- requested_offset_record->ts_reg_info.in_use = 1;
- requested_offset_record->ts_reg_info.buf = buf;
- requested_offset_record->ts_reg_info.cq_cb = cq_cb;
- requested_offset_record->ts_reg_info.timestamp_kernel_addr =
- (u64 *) ts_buff->user_buff_address + ts_offset;
- requested_offset_record->cq_kernel_addr =
- (u64 *) cq_cb->kernel_address + cq_offset;
- requested_offset_record->cq_target_value = target_value;
-
*pend = requested_offset_record;

dev_dbg(buf->mmg->dev, "Found available node in TS kernel CB %p\n",
@@ -3240,7 +3239,7 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
goto put_cq_cb;
}

- /* Find first available record */
+ /* get ts buffer record */
rc = ts_buff_get_kernel_ts_record(buf, cq_cb, ts_offset,
cq_counters_offset, target_value,
&interrupt->wait_list_lock, &pend);
@@ -3288,7 +3287,19 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
* Note that we cannot have sorted list by target value,
* in order to shorten the list pass loop, since
* same list could have nodes for different cq counter handle.
+ * Note:
+ * Mark ts buff offset as in use here in the spinlock protection area
+ * to avoid getting in the re-use section in ts_buff_get_kernel_ts_record
+ * before adding the node to the list. this scenario might happen when
+ * multiple threads are racing on same offset and one thread could
+ * set the ts buff in ts_buff_get_kernel_ts_record then the other thread
+ * takes over and get to ts_buff_get_kernel_ts_record and then we will try
+ * to re-use the same ts buff offset, and will try to delete a non existing
+ * node from the list.
*/
+ if (register_ts_record)
+ pend->ts_reg_info.in_use = 1;
+
list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);

--
2.25.1

2023-01-15 10:14:53

by Oded Gabbay

[permalink] [raw]
Subject: [PATCH 5/5] habanalabe/gaudi2: add cfg base when displaying razwi addresses

From: Dani Liberman <[email protected]>

Captured addresses of low b/w razwi information contains only the
offset from the cfg base. To make it more user readable, add the cfg
base to it.

Signed-off-by: Dani Liberman <[email protected]>
Reviewed-by: Oded Gabbay <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/accel/habanalabs/gaudi2/gaudi2.c | 17 +++++++++--------
1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c
index 32a824766f24..8c0cbd3b4a0c 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c
@@ -7137,23 +7137,24 @@ static void gaudi2_razwi_rr_lbw_shared_printf_info(struct hl_device *hdev,
u64 rtr_mstr_if_base_addr, bool is_write, char *name,
enum gaudi2_engine_id id, u64 *event_mask)
{
- u32 razwi_addr, razwi_xy;
+ u64 razwi_addr = CFG_BASE;
+ u32 razwi_xy;
u16 eng_id = id;
u8 rd_wr_flag;

if (is_write) {
- razwi_addr = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI);
+ razwi_addr += RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI);
razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_XY);
rd_wr_flag = HL_RAZWI_WRITE;
} else {
- razwi_addr = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI);
+ razwi_addr += RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI);
razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_XY);
rd_wr_flag = HL_RAZWI_READ;
}

hl_handle_razwi(hdev, razwi_addr, &eng_id, 1, rd_wr_flag | HL_RAZWI_LBW, event_mask);
dev_err_ratelimited(hdev->dev,
- "%s-RAZWI SHARED RR LBW %s error, mstr_if 0x%llx, captured address 0x%x Initiator coordinates 0x%x\n",
+ "%s-RAZWI SHARED RR LBW %s error, mstr_if 0x%llx, captured address 0x%llX Initiator coordinates 0x%x\n",
name, is_write ? "WR" : "RD", rtr_mstr_if_base_addr, razwi_addr,
razwi_xy);
}
@@ -7665,19 +7666,19 @@ static void gaudi2_razwi_unmapped_addr_lbw_printf_info(struct hl_device *hdev, u
u64 *event_mask)
{
u16 engines[HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR], num_of_eng;
- u32 razwi_addr;
+ u64 razwi_addr = CFG_BASE;
u8 rd_wr_flag;

num_of_eng = gaudi2_get_razwi_initiators(rtr_id, &engines[0]);

if (is_write) {
- razwi_addr = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_ADDR);
+ razwi_addr += RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_ADDR);
rd_wr_flag = HL_RAZWI_WRITE;

/* Clear set indication */
WREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_SET, 0x1);
} else {
- razwi_addr = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_ADDR);
+ razwi_addr += RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_ADDR);
rd_wr_flag = HL_RAZWI_READ;

/* Clear set indication */
@@ -7687,7 +7688,7 @@ static void gaudi2_razwi_unmapped_addr_lbw_printf_info(struct hl_device *hdev, u
hl_handle_razwi(hdev, razwi_addr, &engines[0], num_of_eng, rd_wr_flag | HL_RAZWI_LBW,
event_mask);
dev_err_ratelimited(hdev->dev,
- "RAZWI PSOC unmapped LBW %s error, rtr id %u, address %#x\n",
+ "RAZWI PSOC unmapped LBW %s error, rtr id %u, address 0x%llX\n",
is_write ? "WR" : "RD", rtr_id, razwi_addr);

dev_err_ratelimited(hdev->dev,
--
2.25.1

2023-01-15 10:18:59

by Oded Gabbay

[permalink] [raw]
Subject: [PATCH 2/5] habanalabs: bugs fixes in timestamps buff alloc

From: farah kassabri <[email protected]>

use argument instead of fixed GFP value for allocation
in Timestamps buffers alloc function.
change data type of size to size_t.

Fixes: 9158bf69e74f ("habanalabs: Timestamps buffers registration")
Signed-off-by: farah kassabri <[email protected]>
Reviewed-by: Oded Gabbay <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/accel/habanalabs/common/memory.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/accel/habanalabs/common/memory.c b/drivers/accel/habanalabs/common/memory.c
index 56283dd874e1..e6474d38afc4 100644
--- a/drivers/accel/habanalabs/common/memory.c
+++ b/drivers/accel/habanalabs/common/memory.c
@@ -2170,12 +2170,13 @@ static int hl_ts_mmap(struct hl_mmap_mem_buf *buf, struct vm_area_struct *vma, v
static int hl_ts_alloc_buf(struct hl_mmap_mem_buf *buf, gfp_t gfp, void *args)
{
struct hl_ts_buff *ts_buff = NULL;
- u32 size, num_elements;
+ u32 num_elements;
+ size_t size;
void *p;

num_elements = *(u32 *)args;

- ts_buff = kzalloc(sizeof(*ts_buff), GFP_KERNEL);
+ ts_buff = kzalloc(sizeof(*ts_buff), gfp);
if (!ts_buff)
return -ENOMEM;

--
2.25.1