2022-10-27 09:30:29

by Oded Gabbay

[permalink] [raw]
Subject: [PATCH 01/15] habanalabs: fix using freed pointer

From: Ohad Sharabi <[email protected]>

The code uses the pointer for trace purpose (without actually
dereference it) but still get static analysis warning.
This patch eliminate the warning.

Signed-off-by: Ohad Sharabi <[email protected]>
Reviewed-by: Oded Gabbay <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/misc/habanalabs/common/device.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 9b54d1df5302..dd01be5c4ba3 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -135,6 +135,9 @@ static void hl_asic_dma_free_common(struct hl_device *hdev, size_t size, void *c
dma_addr_t dma_handle, enum dma_alloc_type alloc_type,
const char *caller)
{
+ /* this is needed to avoid warning on using freed pointer */
+ u64 store_cpu_addr = (u64) (uintptr_t) cpu_addr;
+
switch (alloc_type) {
case DMA_ALLOC_COHERENT:
hdev->asic_funcs->asic_dma_free_coherent(hdev, size, cpu_addr, dma_handle);
@@ -147,7 +150,7 @@ static void hl_asic_dma_free_common(struct hl_device *hdev, size_t size, void *c
break;
}

- trace_habanalabs_dma_free(hdev->dev, (u64) (uintptr_t) cpu_addr, dma_handle, size, caller);
+ trace_habanalabs_dma_free(hdev->dev, store_cpu_addr, dma_handle, size, caller);
}

void *hl_asic_dma_alloc_coherent_caller(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle,
--
2.25.1



2022-10-27 09:30:30

by Oded Gabbay

[permalink] [raw]
Subject: [PATCH 06/15] habanalabs: add support for graceful hard reset

From: Tomer Tayar <[email protected]>

Calling hl_device_reset() for a hard reset will lead to a quite
immediate device reset and to killing user process.
For resets that follow errors, it disables the option to debug the
errors on both the device side and the user application side.

This patch adds a 'graceful hard reset' option and a new
hl_device_cond_reset() function.
Under some conditions, mainly if there is no user process or if he is
not registered to driver notifications, this function will execute hard
reset as usual.
Otherwise, the reset will be postponed and a notification will be sent
to user, to let him perform post-error actions and then to release the
device, after which reset will take place.

If device is not released by user in some defined time, a watchdog work
will execute the reset in any case.

Signed-off-by: Tomer Tayar <[email protected]>
Reviewed-by: Oded Gabbay <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/misc/habanalabs/common/device.c | 141 ++++++++++++++++++--
drivers/misc/habanalabs/common/habanalabs.h | 14 +-
2 files changed, 140 insertions(+), 15 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index b71303ba11d0..bcd959924971 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -16,7 +16,9 @@

#include <trace/events/habanalabs.h>

-#define HL_RESET_DELAY_USEC 10000 /* 10ms */
+#define HL_RESET_DELAY_USEC 10000 /* 10ms */
+
+#define HL_DEVICE_RELEASE_WATCHDOG_TIMEOUT_SEC 5

enum dma_alloc_type {
DMA_ALLOC_COHERENT,
@@ -387,7 +389,7 @@ bool hl_ctrl_device_operational(struct hl_device *hdev,
static void hpriv_release(struct kref *ref)
{
u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
- bool device_is_idle = true;
+ bool reset_device, device_is_idle = true;
struct hl_fpriv *hpriv;
struct hl_device *hdev;

@@ -404,14 +406,20 @@ static void hpriv_release(struct kref *ref)
mutex_destroy(&hpriv->ctx_lock);
mutex_destroy(&hpriv->restore_phase_mutex);

- /* No need for idle status check if device is going to be reset in any case */
- if (!hdev->reset_upon_device_release && hdev->pdev && !hdev->pldm)
+ /* Device should be reset if reset-upon-device-release is enabled, or if there is a pending
+ * reset that waits for device release.
+ */
+ reset_device = hdev->reset_upon_device_release || hdev->reset_info.watchdog_active;
+
+ /* Unless device is reset in any case, check idle status and reset if device is not idle */
+ if (!reset_device && hdev->pdev && !hdev->pldm)
device_is_idle = hdev->asic_funcs->is_device_idle(hdev, idle_mask,
HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL);
-
- if (!device_is_idle)
+ if (!device_is_idle) {
dev_err(hdev->dev, "device not idle after user context is closed (0x%llx_%llx)\n",
idle_mask[1], idle_mask[0]);
+ reset_device = true;
+ }

/* We need to remove the user from the list to make sure the reset process won't
* try to kill the user process. Because, if we got here, it means there are no
@@ -426,9 +434,10 @@ static void hpriv_release(struct kref *ref)
list_del(&hpriv->dev_node);
mutex_unlock(&hdev->fpriv_list_lock);

- if (!device_is_idle || hdev->reset_upon_device_release) {
+ if (reset_device) {
hl_device_reset(hdev, HL_DRV_RESET_DEV_RELEASE);
} else {
+ /* Scrubbing is handled within hl_device_reset(), so here need to do it directly */
int rc = hdev->asic_funcs->scrub_device_mem(hdev);

if (rc)
@@ -695,6 +704,20 @@ static void device_hard_reset_pending(struct work_struct *work)
}
}

+static void device_release_watchdog_func(struct work_struct *work)
+{
+ struct hl_device_reset_work *device_release_watchdog_work =
+ container_of(work, struct hl_device_reset_work, reset_work.work);
+ struct hl_device *hdev = device_release_watchdog_work->hdev;
+ u32 flags;
+
+ dev_dbg(hdev->dev, "Device wasn't released in time. Initiate device reset.\n");
+
+ flags = device_release_watchdog_work->flags | HL_DRV_RESET_FROM_WD_THR;
+
+ hl_device_reset(hdev, flags);
+}
+
/*
* device_early_init - do some early initialization for the habanalabs device
*
@@ -813,11 +836,14 @@ static int device_early_init(struct hl_device *hdev)
goto free_cb_mgr;
}

- INIT_DELAYED_WORK(&hdev->device_reset_work.reset_work,
- device_hard_reset_pending);
+ INIT_DELAYED_WORK(&hdev->device_reset_work.reset_work, device_hard_reset_pending);
hdev->device_reset_work.hdev = hdev;
hdev->device_fini_pending = 0;

+ INIT_DELAYED_WORK(&hdev->device_release_watchdog_work.reset_work,
+ device_release_watchdog_func);
+ hdev->device_release_watchdog_work.hdev = hdev;
+
mutex_init(&hdev->send_cpu_message_lock);
mutex_init(&hdev->debug_lock);
INIT_LIST_HEAD(&hdev->cs_mirror_list);
@@ -1367,8 +1393,8 @@ static void handle_reset_trigger(struct hl_device *hdev, u32 flags)
int hl_device_reset(struct hl_device *hdev, u32 flags)
{
bool hard_reset, from_hard_reset_thread, fw_reset, hard_instead_soft = false,
- reset_upon_device_release = false, schedule_hard_reset = false,
- skip_wq_flush, delay_reset;
+ reset_upon_device_release = false, schedule_hard_reset = false, delay_reset,
+ from_dev_release, from_watchdog_thread;
u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
struct hl_ctx *ctx;
int i, rc;
@@ -1381,8 +1407,9 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
hard_reset = !!(flags & HL_DRV_RESET_HARD);
from_hard_reset_thread = !!(flags & HL_DRV_RESET_FROM_RESET_THR);
fw_reset = !!(flags & HL_DRV_RESET_BYPASS_REQ_TO_FW);
- skip_wq_flush = !!(flags & HL_DRV_RESET_DEV_RELEASE);
+ from_dev_release = !!(flags & HL_DRV_RESET_DEV_RELEASE);
delay_reset = !!(flags & HL_DRV_RESET_DELAY);
+ from_watchdog_thread = !!(flags & HL_DRV_RESET_FROM_WD_THR);

if (!hard_reset && !hdev->asic_prop.supports_compute_reset) {
hard_instead_soft = true;
@@ -1439,6 +1466,23 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)

spin_unlock(&hdev->reset_info.lock);

+ /* Cancel the device release watchdog work if required.
+ * In case of reset-upon-device-release while the release watchdog work is
+ * scheduled, do hard-reset instead of compute-reset.
+ */
+ if ((hard_reset || from_dev_release) && hdev->reset_info.watchdog_active) {
+ hdev->reset_info.watchdog_active = 0;
+ if (!from_watchdog_thread)
+ cancel_delayed_work_sync(
+ &hdev->device_release_watchdog_work.reset_work);
+
+ if (from_dev_release) {
+ flags |= HL_DRV_RESET_HARD;
+ flags &= ~HL_DRV_RESET_DEV_RELEASE;
+ hard_reset = true;
+ }
+ }
+
if (delay_reset)
usleep_range(HL_RESET_DELAY_USEC, HL_RESET_DELAY_USEC << 1);

@@ -1474,7 +1518,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
return 0;
}

- cleanup_resources(hdev, hard_reset, fw_reset, skip_wq_flush);
+ cleanup_resources(hdev, hard_reset, fw_reset, from_dev_release);

kill_processes:
if (hard_reset) {
@@ -1735,6 +1779,73 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
return rc;
}

+/*
+ * hl_device_cond_reset() - conditionally reset the device.
+ * @hdev: pointer to habanalabs device structure.
+ * @reset_flags: reset flags.
+ * @event_mask: events to notify user about.
+ *
+ * Conditionally reset the device, or alternatively schedule a watchdog work to reset the device
+ * unless another reset precedes it.
+ */
+int hl_device_cond_reset(struct hl_device *hdev, u32 flags, u64 event_mask)
+{
+ struct hl_ctx *ctx = NULL;
+
+ /* Device release watchdog is only for hard reset */
+ if (!(flags & HL_DRV_RESET_HARD) && hdev->asic_prop.allow_inference_soft_reset)
+ goto device_reset;
+
+ /* F/W reset cannot be postponed */
+ if (flags & HL_DRV_RESET_BYPASS_REQ_TO_FW)
+ goto device_reset;
+
+ /* Device release watchdog is relevant only if user exists and gets a reset notification */
+ if (!(event_mask & HL_NOTIFIER_EVENT_DEVICE_RESET)) {
+ dev_err(hdev->dev, "Resetting device without a reset indication to user\n");
+ goto device_reset;
+ }
+
+ ctx = hl_get_compute_ctx(hdev);
+ if (!ctx || !ctx->hpriv->notifier_event.eventfd)
+ goto device_reset;
+
+ /* Schedule the device release watchdog work unless reset is already in progress or if the
+ * work is already scheduled.
+ */
+ spin_lock(&hdev->reset_info.lock);
+ if (hdev->reset_info.in_reset) {
+ spin_unlock(&hdev->reset_info.lock);
+ goto device_reset;
+ }
+
+ if (hdev->reset_info.watchdog_active)
+ goto out;
+
+ hdev->device_release_watchdog_work.flags = flags;
+ dev_dbg(hdev->dev, "Device is going to be reset in %u sec unless being released\n",
+ hdev->device_release_watchdog_timeout_sec);
+ schedule_delayed_work(&hdev->device_release_watchdog_work.reset_work,
+ msecs_to_jiffies(hdev->device_release_watchdog_timeout_sec * 1000));
+ hdev->reset_info.watchdog_active = 1;
+out:
+ spin_unlock(&hdev->reset_info.lock);
+
+ hl_notifier_event_send_all(hdev, event_mask);
+
+ hl_ctx_put(ctx);
+
+ return 0;
+
+device_reset:
+ if (event_mask)
+ hl_notifier_event_send_all(hdev, event_mask);
+ if (ctx)
+ hl_ctx_put(ctx);
+
+ return hl_device_reset(hdev, flags);
+}
+
static void hl_notifier_event_send(struct hl_notifier_event *notifier_event, u64 event_mask)
{
mutex_lock(&notifier_event->lock);
@@ -1932,6 +2043,8 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)

hdev->asic_funcs->state_dump_init(hdev);

+ hdev->device_release_watchdog_timeout_sec = HL_DEVICE_RELEASE_WATCHDOG_TIMEOUT_SEC;
+
hdev->memory_scrub_val = MEM_SCRUB_DEFAULT_VAL;
hl_debugfs_add_device(hdev);

@@ -2152,6 +2265,8 @@ void hl_device_fini(struct hl_device *hdev)
}
}

+ cancel_delayed_work_sync(&hdev->device_release_watchdog_work.reset_work);
+
/* Disable PCI access from device F/W so it won't send us additional
* interrupts. We disable MSI/MSI-X at the halt_engines function and we
* can't have the F/W sending us interrupts after that. We need to
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index c8347eac09ed..bfaaa9daa750 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -191,6 +191,9 @@ enum hl_mmu_enablement {
*
* - HL_DRV_RESET_DELAY
* Set if a delay should be added before the reset
+ *
+ * - HL_DRV_RESET_FROM_WD_THR
+ * Set if the caller is the device release watchdog thread
*/

#define HL_DRV_RESET_HARD (1 << 0)
@@ -201,6 +204,7 @@ enum hl_mmu_enablement {
#define HL_DRV_RESET_BYPASS_REQ_TO_FW (1 << 5)
#define HL_DRV_RESET_FW_FATAL_ERR (1 << 6)
#define HL_DRV_RESET_DELAY (1 << 7)
+#define HL_DRV_RESET_FROM_WD_THR (1 << 8)

/*
* Security
@@ -3009,6 +3013,7 @@ struct hl_error_info {
* same cause.
* @skip_reset_on_timeout: Skip device reset if CS has timed out, wait for it to
* complete instead.
+ * @watchdog_active: true if a device release watchdog work is scheduled.
*/
struct hl_reset_info {
spinlock_t lock;
@@ -3019,12 +3024,11 @@ struct hl_reset_info {
u8 in_compute_reset;
u8 needs_reset;
u8 hard_reset_pending;
-
u8 curr_reset_cause;
u8 prev_reset_trigger;
u8 reset_trigger_repeated;
-
u8 skip_reset_on_timeout;
+ u8 watchdog_active;
};

/**
@@ -3040,6 +3044,8 @@ struct hl_reset_info {
* @dev_ctrl: related kernel device structure for the control device
* @work_heartbeat: delayed work for CPU-CP is-alive check.
* @device_reset_work: delayed work which performs hard reset
+ * @device_release_watchdog_work: watchdog work that performs hard reset if user doesn't release
+ * device upon certain error cases.
* @asic_name: ASIC specific name.
* @asic_type: ASIC specific type.
* @completion_queue: array of hl_cq.
@@ -3149,6 +3155,7 @@ struct hl_reset_info {
* indicates which decoder engines are binned-out
* @edma_binning: contains mask of edma engines that is received from the f/w which
* indicates which edma engines are binned-out
+ * @device_release_watchdog_timeout_sec: device release watchdog timeout value in seconds.
* @id: device minor.
* @id_control: minor of the control device.
* @cdev_idx: char device index. Used for setting its name.
@@ -3218,6 +3225,7 @@ struct hl_device {
struct device *dev_ctrl;
struct delayed_work work_heartbeat;
struct hl_device_reset_work device_reset_work;
+ struct hl_device_reset_work device_release_watchdog_work;
char asic_name[HL_STR_MAX];
char status[HL_DEV_STS_MAX][HL_STR_MAX];
enum hl_asic_type asic_type;
@@ -3312,6 +3320,7 @@ struct hl_device {
u32 high_pll;
u32 decoder_binning;
u32 edma_binning;
+ u32 device_release_watchdog_timeout_sec;
u16 id;
u16 id_control;
u16 cdev_idx;
@@ -3551,6 +3560,7 @@ void hl_device_fini(struct hl_device *hdev);
int hl_device_suspend(struct hl_device *hdev);
int hl_device_resume(struct hl_device *hdev);
int hl_device_reset(struct hl_device *hdev, u32 flags);
+int hl_device_cond_reset(struct hl_device *hdev, u32 flags, u64 event_mask);
void hl_hpriv_get(struct hl_fpriv *hpriv);
int hl_hpriv_put(struct hl_fpriv *hpriv);
int hl_device_utilization(struct hl_device *hdev, u32 *utilization);
--
2.25.1


2022-10-27 09:30:51

by Oded Gabbay

[permalink] [raw]
Subject: [PATCH 04/15] habanalabs: fix user mappings calculation in case of page fault

From: Dani Liberman <[email protected]>

As there are 2 types of user mappings, pmmu and hmmu, calculate
only the relevant mappings for the requested type.

Signed-off-by: Dani Liberman <[email protected]>
Reviewed-by: Oded Gabbay <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/misc/habanalabs/common/device.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 0026fe42b3d2..0e88396744a1 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -2305,8 +2305,13 @@ static void hl_capture_user_mappings(struct hl_device *hdev, bool is_pmmu)
}

mutex_lock(&ctx->mem_hash_lock);
- hash_for_each(ctx->mem_hash, i, hnode, node)
- pgf_info->num_of_user_mappings++;
+ hash_for_each(ctx->mem_hash, i, hnode, node) {
+ vm_type = hnode->ptr;
+ if (((*vm_type == VM_TYPE_USERPTR) && is_pmmu) ||
+ ((*vm_type == VM_TYPE_PHYS_PACK) && !is_pmmu))
+ pgf_info->num_of_user_mappings++;
+
+ }

if (!pgf_info->num_of_user_mappings)
goto finish;
--
2.25.1


2022-10-27 09:32:34

by Oded Gabbay

[permalink] [raw]
Subject: [PATCH 10/15] habanalabs: use graceful hard reset for CS timeouts

From: Tomer Tayar <[email protected]>

Use graceful hard reset when detecting a CS timeout that requires a
device reset.

Signed-off-by: Tomer Tayar <[email protected]>
Reviewed-by: Oded Gabbay <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
.../misc/habanalabs/common/command_submission.c | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index fa05770865c6..f1c69c8ed74a 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -798,7 +798,7 @@ static void cs_do_release(struct kref *ref)
static void cs_timedout(struct work_struct *work)
{
struct hl_device *hdev;
- u64 event_mask;
+ u64 event_mask = 0x0;
int rc;
struct hl_cs *cs = container_of(work, struct hl_cs,
work_tdr.work);
@@ -830,11 +830,7 @@ static void cs_timedout(struct work_struct *work)
if (rc) {
hdev->captured_err_info.cs_timeout.timestamp = ktime_get();
hdev->captured_err_info.cs_timeout.seq = cs->sequence;
-
- event_mask = device_reset ? (HL_NOTIFIER_EVENT_CS_TIMEOUT |
- HL_NOTIFIER_EVENT_DEVICE_RESET) : HL_NOTIFIER_EVENT_CS_TIMEOUT;
-
- hl_notifier_event_send_all(hdev, event_mask);
+ event_mask |= HL_NOTIFIER_EVENT_CS_TIMEOUT;
}

switch (cs->type) {
@@ -869,8 +865,12 @@ static void cs_timedout(struct work_struct *work)

cs_put(cs);

- if (device_reset)
- hl_device_reset(hdev, HL_DRV_RESET_TDR);
+ if (device_reset) {
+ event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
+ hl_device_cond_reset(hdev, HL_DRV_RESET_TDR, event_mask);
+ } else if (event_mask) {
+ hl_notifier_event_send_all(hdev, event_mask);
+ }
}

static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
--
2.25.1


2022-10-27 09:33:07

by Oded Gabbay

[permalink] [raw]
Subject: [PATCH 12/15] habanalabs: zero ts registration buff when allocated

From: farah kassabri <[email protected]>

To avoid memory corruption in kernel memory while using timestamp
registration nodes, zero the kernel buff memory when its allocated.

Signed-off-by: farah kassabri <[email protected]>
Reviewed-by: Oded Gabbay <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/misc/habanalabs/common/memory.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c
index 99b1d6ce26ae..541e1b6a2176 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -2109,7 +2109,7 @@ static int hl_ts_alloc_buf(struct hl_mmap_mem_buf *buf, gfp_t gfp, void *args)

/* Allocate the internal kernel buffer */
size = num_elements * sizeof(struct hl_user_pending_interrupt);
- p = vmalloc(size);
+ p = vzalloc(size);
if (!p)
goto free_user_buff;

--
2.25.1


2022-10-27 09:33:12

by Oded Gabbay

[permalink] [raw]
Subject: [PATCH 09/15] habanalabs/gaudi2: use graceful hard reset for F/W events

From: Tomer Tayar <[email protected]>

Use graceful hard reset for F/W events on Gaudi2 device that require a
device reset.

While at it, do a small refactor of the checks and function calls,
to simplify it and to avoid code duplication.

Signed-off-by: Tomer Tayar <[email protected]>
Reviewed-by: Oded Gabbay <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/misc/habanalabs/gaudi2/gaudi2.c | 27 +++++++++----------------
1 file changed, 10 insertions(+), 17 deletions(-)

diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c
index 9208f69dd7f8..22f5445fe71c 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c
@@ -8768,9 +8768,9 @@ static void hl_arc_event_handle(struct hl_device *hdev,

static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
{
- u32 ctl, reset_flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY;
- struct gaudi2_device *gaudi2 = hdev->asic_specific;
bool reset_required = false, skip_reset = false, is_critical = false;
+ struct gaudi2_device *gaudi2 = hdev->asic_specific;
+ u32 ctl, reset_flags = HL_DRV_RESET_HARD;
int index, sbte_index;
u64 event_mask = 0;
u16 event_type;
@@ -9158,7 +9158,9 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
event_type);
}

- if ((gaudi2_irq_map_table[event_type].reset || reset_required) && !skip_reset)
+ if ((gaudi2_irq_map_table[event_type].reset || reset_required) && !skip_reset &&
+ (hdev->hard_reset_on_fw_events ||
+ (hdev->asic_prop.fw_security_enabled && is_critical)))
goto reset_device;

/* Send unmask irq only for interrupts not classified as MSG */
@@ -9172,22 +9174,13 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent

reset_device:
if (hdev->asic_prop.fw_security_enabled && is_critical) {
- reset_flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW;
-
- /* notify on device unavailable while the reset triggered by fw */
- event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET |
- HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE);
- hl_device_reset(hdev, reset_flags);
- } else if (hdev->hard_reset_on_fw_events) {
- event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
- hl_device_reset(hdev, reset_flags);
+ reset_flags |= HL_DRV_RESET_BYPASS_REQ_TO_FW;
+ event_mask |= HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
} else {
- if (!gaudi2_irq_map_table[event_type].msg)
- hl_fw_unmask_irq(hdev, event_type);
+ reset_flags |= HL_DRV_RESET_DELAY;
}
-
- if (event_mask)
- hl_notifier_event_send_all(hdev, event_mask);
+ event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
+ hl_device_cond_reset(hdev, reset_flags, event_mask);
}

static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val)
--
2.25.1


2022-10-27 09:33:47

by Oded Gabbay

[permalink] [raw]
Subject: [PATCH 05/15] habanalabs: avoid divide by zero in device utilization

From: Ohad Sharabi <[email protected]>

Currently there is no verification whether the divisor is legal.

Signed-off-by: Ohad Sharabi <[email protected]>
Reviewed-by: Oded Gabbay <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/misc/habanalabs/common/device.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 0e88396744a1..b71303ba11d0 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -987,11 +987,16 @@ static void device_late_fini(struct hl_device *hdev)

int hl_device_utilization(struct hl_device *hdev, u32 *utilization)
{
- u64 max_power, curr_power, dc_power, dividend;
+ u64 max_power, curr_power, dc_power, dividend, divisor;
int rc;

max_power = hdev->max_power;
dc_power = hdev->asic_prop.dc_power_default;
+ divisor = max_power - dc_power;
+ if (!divisor) {
+ dev_warn(hdev->dev, "device utilization is not supported\n");
+ return -EOPNOTSUPP;
+ }
rc = hl_fw_cpucp_power_get(hdev, &curr_power);

if (rc)
@@ -1000,7 +1005,7 @@ int hl_device_utilization(struct hl_device *hdev, u32 *utilization)
curr_power = clamp(curr_power, dc_power, max_power);

dividend = (curr_power - dc_power) * 100;
- *utilization = (u32) div_u64(dividend, (max_power - dc_power));
+ *utilization = (u32) div_u64(dividend, divisor);

return 0;
}
--
2.25.1


2022-10-27 09:34:33

by Oded Gabbay

[permalink] [raw]
Subject: [PATCH 14/15] habanalabs: add warning print upon a PCI error

From: Ofir Bitton <[email protected]>

In order to know if driver catches PCI errors correctly, we need to
print a warning per each error.

Signed-off-by: Ofir Bitton <[email protected]>
Reviewed-by: Oded Gabbay <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/misc/habanalabs/common/habanalabs_drv.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c
index 714994725224..e82af8989700 100644
--- a/drivers/misc/habanalabs/common/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/common/habanalabs_drv.c
@@ -595,15 +595,16 @@ hl_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t state)

switch (state) {
case pci_channel_io_normal:
+ dev_warn(hdev->dev, "PCI normal state error detected\n");
return PCI_ERS_RESULT_CAN_RECOVER;

case pci_channel_io_frozen:
- dev_warn(hdev->dev, "frozen state error detected\n");
+ dev_warn(hdev->dev, "PCI frozen state error detected\n");
result = PCI_ERS_RESULT_NEED_RESET;
break;

case pci_channel_io_perm_failure:
- dev_warn(hdev->dev, "failure state error detected\n");
+ dev_warn(hdev->dev, "PCI failure state error detected\n");
result = PCI_ERS_RESULT_DISCONNECT;
break;

@@ -639,6 +640,10 @@ static void hl_pci_err_resume(struct pci_dev *pdev)
*/
static pci_ers_result_t hl_pci_err_slot_reset(struct pci_dev *pdev)
{
+ struct hl_device *hdev = pci_get_drvdata(pdev);
+
+ dev_warn(hdev->dev, "PCI slot reset detected\n");
+
return PCI_ERS_RESULT_RECOVERED;
}

--
2.25.1


2022-10-27 09:36:32

by Oded Gabbay

[permalink] [raw]
Subject: [PATCH 07/15] habanalabs: add an option to control watchdog timeout via debugfs

From: Tomer Tayar <[email protected]>

Add an option to control the timeout value for the driver's watchdog
of the reset process. The timeout represents the amount of the user
has to close his process once he gets a device reset notification from
the driver.

Signed-off-by: Tomer Tayar <[email protected]>
Reviewed-by: Oded Gabbay <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
Documentation/ABI/testing/debugfs-driver-habanalabs | 7 +++++++
drivers/misc/habanalabs/common/debugfs.c | 5 +++++
2 files changed, 12 insertions(+)

diff --git a/Documentation/ABI/testing/debugfs-driver-habanalabs b/Documentation/ABI/testing/debugfs-driver-habanalabs
index c915bf17b293..85f6d04f528b 100644
--- a/Documentation/ABI/testing/debugfs-driver-habanalabs
+++ b/Documentation/ABI/testing/debugfs-driver-habanalabs
@@ -91,6 +91,13 @@ Description: Enables the root user to set the device to specific state.
Valid values are "disable", "enable", "suspend", "resume".
User can read this property to see the valid values

+What: /sys/kernel/debug/habanalabs/hl<n>/device_release_watchdog_timeout
+Date: Oct 2022
+KernelVersion: 6.2
+Contact: [email protected]
+Description: The watchdog timeout value in seconds for a device relese upon
+ certain error cases, after which the device is reset.
+
What: /sys/kernel/debug/habanalabs/hl<n>/dma_size
Date: Apr 2021
KernelVersion: 5.13
diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c
index 48d3ec8b5c82..945c0e6758ca 100644
--- a/drivers/misc/habanalabs/common/debugfs.c
+++ b/drivers/misc/habanalabs/common/debugfs.c
@@ -1769,6 +1769,11 @@ void hl_debugfs_add_device(struct hl_device *hdev)
dev_entry,
&hl_timeout_locked_fops);

+ debugfs_create_u32("device_release_watchdog_timeout",
+ 0644,
+ dev_entry->root,
+ &hdev->device_release_watchdog_timeout_sec);
+
for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {
debugfs_create_file(hl_debugfs_list[i].name,
0444,
--
2.25.1


2022-10-27 09:55:47

by Oded Gabbay

[permalink] [raw]
Subject: [PATCH 08/15] habanalabs/gaudi: use graceful hard reset for F/W events

From: Tomer Tayar <[email protected]>

Use graceful hard reset for F/W events on Gaudi device that require a
device reset.

Signed-off-by: Tomer Tayar <[email protected]>
Reviewed-by: Oded Gabbay <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/misc/habanalabs/gaudi/gaudi.c | 16 +++++++---------
1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 337123f73501..3dfb9ecf7db3 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -7942,16 +7942,14 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr
reset_required = false;
}

- /* despite reset doesn't execute. a notification on
- * occurred event needs to be sent here
- */
- if (event_mask)
- hl_notifier_event_send_all(hdev, event_mask);
-
- if (reset_required)
- hl_device_reset(hdev, flags);
- else
+ if (reset_required) {
+ hl_device_cond_reset(hdev, flags, event_mask);
+ } else {
hl_fw_unmask_irq(hdev, event_type);
+ /* Notification on occurred event needs to be sent although reset is not executed */
+ if (event_mask)
+ hl_notifier_event_send_all(hdev, event_mask);
+ }
}

static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
--
2.25.1


2022-10-27 09:57:29

by Oded Gabbay

[permalink] [raw]
Subject: [PATCH 11/15] habanalabs: no consecutive err when user context is enabled

From: Tal Cohen <[email protected]>

Consecutive error protects a device reset loop from being triggered
due to h/w issues and enters the device into an unavailable state.
When user may cause the error, an unavailable state
will prevent the user from running its workloads.

The commit prevents entering consecutive state when a user context
is enabled.

Signed-off-by: Tal Cohen <[email protected]>
Reviewed-by: Oded Gabbay <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/misc/habanalabs/common/device.c | 4 ++++
1 file changed, 4 insertions(+)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index bcd959924971..61ddcb1ce508 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -1320,6 +1320,10 @@ static void handle_reset_trigger(struct hl_device *hdev, u32 flags)
{
u32 cur_reset_trigger = HL_RESET_TRIGGER_DEFAULT;

+ /* No consecutive mechanism when user context exists */
+ if (hdev->is_compute_ctx_active)
+ return;
+
/*
* 'reset cause' is being updated here, because getting here
* means that it's the 1st time and the last time we're here
--
2.25.1


2022-10-27 09:57:44

by Oded Gabbay

[permalink] [raw]
Subject: [PATCH 13/15] habanalabs: fix PCIe access to SRAM via debugfs

From: Tomer Tayar <[email protected]>

hl_access_sram_dram_region() uses a region base which is set within the
hl_set_dram_bar() function. However, for SRAM access this function is
not called, and we end up with a wrong value of region base and with a
bad calculated address.
Fix it by initializing the region base value independently of whether
hl_set_dram_bar() is called or not.

Signed-off-by: Tomer Tayar <[email protected]>
Reviewed-by: Oded Gabbay <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/misc/habanalabs/common/device.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 61ddcb1ce508..cb8ecc17bba1 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -69,17 +69,17 @@ int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val,
enum debugfs_access_type acc_type, enum pci_region region_type, bool set_dram_bar)
{
struct pci_mem_region *region = &hdev->pci_mem_region[region_type];
- u64 old_base = 0, rc, new_bar_region_base = 0;
+ u64 old_base = 0, rc, bar_region_base = region->region_base;
void __iomem *acc_addr;

if (set_dram_bar) {
- old_base = hl_set_dram_bar(hdev, addr, region, &new_bar_region_base);
+ old_base = hl_set_dram_bar(hdev, addr, region, &bar_region_base);
if (old_base == U64_MAX)
return -EIO;
}

acc_addr = hdev->pcie_bar[region->bar_id] + region->offset_in_bar +
- (addr - new_bar_region_base);
+ (addr - bar_region_base);

switch (acc_type) {
case DEBUGFS_READ8:
--
2.25.1


2022-10-27 09:58:50

by Oded Gabbay

[permalink] [raw]
Subject: [PATCH 03/15] habanalabs/gaudi2: remove configurations to access the MSI-X doorbell

From: Tomer Tayar <[email protected]>

The virtual MSI-X doorbell is supported now in F/W, so all
configurations to access the PCIE_DBI MSI-X doorbell can be removed.

Signed-off-by: Tomer Tayar <[email protected]>
Reviewed-by: Oded Gabbay <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/misc/habanalabs/gaudi2/gaudi2.c | 34 +++----------------------
1 file changed, 3 insertions(+), 31 deletions(-)

diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c
index a4e3586f1a12..9208f69dd7f8 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c
@@ -4473,23 +4473,9 @@ static void gaudi2_init_sm(struct hl_device *hdev)
reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1);
WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val);

- /* Init CQ0 DB */
- /* Configure the monitor to trigger MSI-X interrupt */
- /* TODO:
- * Remove the if statement when virtual MSI-X doorbell is supported in simulator (SW-93022)
- * and in F/W (SW-93024).
- */
- if (!hdev->pdev || hdev->asic_prop.fw_security_enabled) {
- u64 msix_db_reg = CFG_BASE + mmPCIE_DBI_MSIX_DOORBELL_OFF;
-
- WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0, lower_32_bits(msix_db_reg));
- WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0, upper_32_bits(msix_db_reg));
- } else {
- WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0,
- lower_32_bits(gaudi2->virt_msix_db_dma_addr));
- WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0,
- upper_32_bits(gaudi2->virt_msix_db_dma_addr));
- }
+ /* Init CQ0 DB - configure the monitor to trigger MSI-X interrupt */
+ WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0, lower_32_bits(gaudi2->virt_msix_db_dma_addr));
+ WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0, upper_32_bits(gaudi2->virt_msix_db_dma_addr));
WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0, GAUDI2_IRQ_NUM_COMPLETION);

for (i = 0 ; i < GAUDI2_RESERVED_CQ_NUMBER ; i++) {
@@ -4657,20 +4643,6 @@ static void gaudi2_init_vdec_brdg_ctrl(struct hl_device *hdev, u64 base_addr, u3
{
u32 sob_id;

- /* TODO:
- * Remove when virtual MSI-X doorbell is supported in simulator (SW-93022) and in F/W
- * (SW-93024).
- */
- if (!hdev->pdev || hdev->asic_prop.fw_security_enabled) {
- u32 interrupt_id = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 2 * decoder_id;
-
- WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_AWADDR, mmPCIE_DBI_MSIX_DOORBELL_OFF);
- WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_WDATA, interrupt_id);
- WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_AWADDR, mmPCIE_DBI_MSIX_DOORBELL_OFF);
- WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_WDATA, interrupt_id + 1);
- return;
- }
-
/* VCMD normal interrupt */
sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id;
WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_AWADDR,
--
2.25.1


2022-10-27 09:59:15

by Oded Gabbay

[permalink] [raw]
Subject: [PATCH 02/15] habanalabs: allow setting HBM BAR to other regions

From: Ohad Sharabi <[email protected]>

Up until now the use-case in the driver was that the HBM is accessed
using the HBM BAR, yet the BAR sometimes cannot cover the whole HBM and
so we needed to set the BAR to other HBM offset.
Now we are facing the need to access other PCI memory regions that can
be covered by the HBM BAR.
To answer that we are allowing the caller to determine if the HBM BAR
need to be set or not regardless of the PCI memory region.

Signed-off-by: Ohad Sharabi <[email protected]>
Reviewed-by: Oded Gabbay <[email protected]>
Signed-off-by: Oded Gabbay <[email protected]>
---
drivers/misc/habanalabs/common/device.c | 29 ++++++++++++---------
drivers/misc/habanalabs/common/habanalabs.h | 2 ++
2 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index dd01be5c4ba3..0026fe42b3d2 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -32,6 +32,7 @@ enum dma_alloc_type {
* @hdev: pointer to habanalabs device structure.
* @addr: the address the caller wants to access.
* @region: the PCI region.
+ * @new_bar_region_base: the new BAR region base address.
*
* @return: the old BAR base address on success, U64_MAX for failure.
* The caller should set it back to the old address after use.
@@ -41,7 +42,8 @@ enum dma_alloc_type {
* This function can be called also if the bar doesn't need to be set,
* in that case it just won't change the base.
*/
-static u64 hl_set_dram_bar(struct hl_device *hdev, u64 addr, struct pci_mem_region *region)
+static u64 hl_set_dram_bar(struct hl_device *hdev, u64 addr, struct pci_mem_region *region,
+ u64 *new_bar_region_base)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
u64 bar_base_addr, old_base;
@@ -55,27 +57,28 @@ static u64 hl_set_dram_bar(struct hl_device *hdev, u64 addr, struct pci_mem_regi
old_base = hdev->asic_funcs->set_dram_bar_base(hdev, bar_base_addr);

/* in case of success we need to update the new BAR base */
- if (old_base != U64_MAX)
- region->region_base = bar_base_addr;
+ if ((old_base != U64_MAX) && new_bar_region_base)
+ *new_bar_region_base = bar_base_addr;

return old_base;
}

-static int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val,
- enum debugfs_access_type acc_type, enum pci_region region_type)
+int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val,
+ enum debugfs_access_type acc_type, enum pci_region region_type, bool set_dram_bar)
{
struct pci_mem_region *region = &hdev->pci_mem_region[region_type];
+ u64 old_base = 0, rc, new_bar_region_base = 0;
void __iomem *acc_addr;
- u64 old_base = 0, rc;

- if (region_type == PCI_REGION_DRAM) {
- old_base = hl_set_dram_bar(hdev, addr, region);
+ if (set_dram_bar) {
+ old_base = hl_set_dram_bar(hdev, addr, region, &new_bar_region_base);
if (old_base == U64_MAX)
return -EIO;
}

- acc_addr = hdev->pcie_bar[region->bar_id] + addr - region->region_base +
- region->offset_in_bar;
+ acc_addr = hdev->pcie_bar[region->bar_id] + region->offset_in_bar +
+ (addr - new_bar_region_base);
+
switch (acc_type) {
case DEBUGFS_READ8:
*val = readb(acc_addr);
@@ -97,8 +100,8 @@ static int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val
break;
}

- if (region_type == PCI_REGION_DRAM) {
- rc = hl_set_dram_bar(hdev, old_base, region);
+ if (set_dram_bar) {
+ rc = hl_set_dram_bar(hdev, old_base, region, NULL);
if (rc == U64_MAX)
return -EIO;
}
@@ -283,7 +286,7 @@ int hl_access_dev_mem(struct hl_device *hdev, enum pci_region region_type,
case PCI_REGION_SRAM:
case PCI_REGION_DRAM:
return hl_access_sram_dram_region(hdev, addr, val, acc_type,
- region_type);
+ region_type, (region_type == PCI_REGION_DRAM));
default:
return -EFAULT;
}
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 4913197c433e..c8347eac09ed 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -3486,6 +3486,8 @@ void hl_asic_dma_pool_free_caller(struct hl_device *hdev, void *vaddr, dma_addr_
int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir);
void hl_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt,
enum dma_data_direction dir);
+int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val,
+ enum debugfs_access_type acc_type, enum pci_region region_type, bool set_dram_bar);
int hl_access_cfg_region(struct hl_device *hdev, u64 addr, u64 *val,
enum debugfs_access_type acc_type);
int hl_access_dev_mem(struct hl_device *hdev, enum pci_region region_type,
--
2.25.1