2019-08-08 12:27:16

by Tomer Tayar

[permalink] [raw]
Subject: [PATCH] habanalabs: Expose devices after initialization is done

The char devices are currently exposed to user before the device and
driver initialization are done.
This patch moves the cdev and device adding to the system to the end of
the initialization sequence, while keeping the creation of the
structures at the beginning to allow the usage of dev_*().

Signed-off-by: Tomer Tayar <[email protected]>
---
drivers/misc/habanalabs/device.c | 163 ++++++++++++++++++---------
drivers/misc/habanalabs/habanalabs.h | 2 +
2 files changed, 111 insertions(+), 54 deletions(-)

diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c
index efde1fe7d286..9a5926888b99 100644
--- a/drivers/misc/habanalabs/device.c
+++ b/drivers/misc/habanalabs/device.c
@@ -151,50 +151,94 @@ static const struct file_operations hl_ctrl_ops = {
.compat_ioctl = hl_ioctl_control
};

+static void device_release_func(struct device *dev)
+{
+ kfree(dev);
+}
+
/*
- * device_setup_cdev - setup cdev and device for habanalabs device
+ * device_init_cdev - Initialize cdev and device for habanalabs device
*
* @hdev: pointer to habanalabs device structure
* @hclass: pointer to the class object of the device
* @minor: minor number of the specific device
* @fpos: file operations to install for this device
* @name: name of the device as it will appear in the filesystem
- * @cdev: pointer to the char device object that will be created
- * @dev: pointer to the device object that will be created
+ * @cdev: pointer to the char device object that will be initialized
+ * @dev: pointer to the device object that will be initialized
*
- * Create a cdev and a Linux device for habanalabs's device. Need to be
- * called at the end of the habanalabs device initialization process,
- * because this function exposes the device to the user
+ * Initialize a cdev and a Linux device for habanalabs's device.
*/
-static int device_setup_cdev(struct hl_device *hdev, struct class *hclass,
+static int device_init_cdev(struct hl_device *hdev, struct class *hclass,
int minor, const struct file_operations *fops,
char *name, struct cdev *cdev,
struct device **dev)
{
- int err, devno = MKDEV(hdev->major, minor);
-
cdev_init(cdev, fops);
cdev->owner = THIS_MODULE;
- err = cdev_add(cdev, devno, 1);
- if (err) {
- pr_err("Failed to add char device %s\n", name);
- return err;
+
+ *dev = kzalloc(sizeof(**dev), GFP_KERNEL);
+ if (!*dev)
+ return -ENOMEM;
+
+ device_initialize(*dev);
+ (*dev)->devt = MKDEV(hdev->major, minor);
+ (*dev)->class = hclass;
+ (*dev)->release = device_release_func;
+ dev_set_drvdata(*dev, hdev);
+ dev_set_name(*dev, "%s", name);
+
+ return 0;
+}
+
+static int device_cdev_sysfs_add(struct hl_device *hdev)
+{
+ int rc;
+
+ rc = cdev_device_add(&hdev->cdev, hdev->dev);
+ if (rc) {
+ dev_err(hdev->dev,
+ "failed to add a char device to the system\n");
+ return rc;
}

- *dev = device_create(hclass, NULL, devno, NULL, "%s", name);
- if (IS_ERR(*dev)) {
- pr_err("Failed to create device %s\n", name);
- err = PTR_ERR(*dev);
- goto err_device_create;
+ rc = cdev_device_add(&hdev->cdev_ctrl, hdev->dev_ctrl);
+ if (rc) {
+ dev_err(hdev->dev,
+ "failed to add a control char device to the system\n");
+ goto delete_cdev_device;
}

- dev_set_drvdata(*dev, hdev);
+ /* hl_sysfs_init() must be done after adding the device to the system */
+ rc = hl_sysfs_init(hdev);
+ if (rc) {
+ dev_err(hdev->dev, "failed to initialize sysfs\n");
+ goto delete_ctrl_cdev_device;
+ }
+
+ hdev->cdev_sysfs_created = true;

return 0;

-err_device_create:
- cdev_del(cdev);
- return err;
+delete_ctrl_cdev_device:
+ cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
+delete_cdev_device:
+ cdev_device_del(&hdev->cdev, hdev->dev);
+ return rc;
+}
+
+static void device_cdev_sysfs_del(struct hl_device *hdev)
+{
+ /* device_release() won't be called so must free devices explicitly */
+ if (!hdev->cdev_sysfs_created) {
+ kfree(hdev->dev_ctrl);
+ kfree(hdev->dev);
+ return;
+ }
+
+ hl_sysfs_fini(hdev);
+ cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
+ cdev_device_del(&hdev->cdev, hdev->dev);
}

/*
@@ -898,13 +942,16 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
{
int i, rc, cq_ready_cnt;
char *name;
+ bool add_cdev_sysfs_on_err = false;

name = kasprintf(GFP_KERNEL, "hl%d", hdev->id / 2);
- if (!name)
- return -ENOMEM;
+ if (!name) {
+ rc = -ENOMEM;
+ goto out_disabled;
+ }

- /* Create device */
- rc = device_setup_cdev(hdev, hclass, hdev->id, &hl_ops, name,
+ /* Initialize cdev and device structures */
+ rc = device_init_cdev(hdev, hclass, hdev->id, &hl_ops, name,
&hdev->cdev, &hdev->dev);

kfree(name);
@@ -915,22 +962,22 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->id / 2);
if (!name) {
rc = -ENOMEM;
- goto release_device;
+ goto free_dev;
}

- /* Create control device */
- rc = device_setup_cdev(hdev, hclass, hdev->id_control, &hl_ctrl_ops,
+ /* Initialize cdev and device structures for control device */
+ rc = device_init_cdev(hdev, hclass, hdev->id_control, &hl_ctrl_ops,
name, &hdev->cdev_ctrl, &hdev->dev_ctrl);

kfree(name);

if (rc)
- goto release_device;
+ goto free_dev;

/* Initialize ASIC function pointers and perform early init */
rc = device_early_init(hdev);
if (rc)
- goto release_control_device;
+ goto free_dev_ctrl;

/*
* Start calling ASIC initialization. First S/W then H/W and finally
@@ -1016,12 +1063,6 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
goto release_ctx;
}

- rc = hl_sysfs_init(hdev);
- if (rc) {
- dev_err(hdev->dev, "failed to initialize sysfs\n");
- goto free_cb_pool;
- }
-
hl_debugfs_add_device(hdev);

if (hdev->asic_funcs->get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
@@ -1030,6 +1071,12 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
hdev->asic_funcs->hw_fini(hdev, true);
}

+ /*
+ * From this point, in case of an error, add char devices and create
+ * sysfs nodes as part of the error flow, to allow debugging.
+ */
+ add_cdev_sysfs_on_err = true;
+
rc = hdev->asic_funcs->hw_init(hdev);
if (rc) {
dev_err(hdev->dev, "failed to initialize the H/W\n");
@@ -1066,9 +1113,24 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
}

/*
- * hl_hwmon_init must be called after device_late_init, because only
+ * Expose devices and sysfs nodes to user.
+ * From here there is no need to add char devices and create sysfs nodes
+ * in case of an error.
+ */
+ add_cdev_sysfs_on_err = false;
+ rc = device_cdev_sysfs_add(hdev);
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to add char devices and sysfs nodes\n");
+ rc = 0;
+ goto out_disabled;
+ }
+
+ /*
+ * hl_hwmon_init() must be called after device_late_init(), because only
* there we get the information from the device about which
- * hwmon-related sensors the device supports
+ * hwmon-related sensors the device supports.
+ * Furthermore, it must be done after adding the device to the system.
*/
rc = hl_hwmon_init(hdev);
if (rc) {
@@ -1084,8 +1146,6 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)

return 0;

-free_cb_pool:
- hl_cb_pool_fini(hdev);
release_ctx:
if (hl_ctx_put(hdev->kernel_ctx) != 1)
dev_err(hdev->dev,
@@ -1106,14 +1166,14 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
hdev->asic_funcs->sw_fini(hdev);
early_fini:
device_early_fini(hdev);
-release_control_device:
- device_destroy(hclass, hdev->dev_ctrl->devt);
- cdev_del(&hdev->cdev_ctrl);
-release_device:
- device_destroy(hclass, hdev->dev->devt);
- cdev_del(&hdev->cdev);
+free_dev_ctrl:
+ kfree(hdev->dev_ctrl);
+free_dev:
+ kfree(hdev->dev);
out_disabled:
hdev->disabled = true;
+ if (add_cdev_sysfs_on_err)
+ device_cdev_sysfs_add(hdev);
if (hdev->pdev)
dev_err(&hdev->pdev->dev,
"Failed to initialize hl%d. Device is NOT usable !\n",
@@ -1179,8 +1239,6 @@ void hl_device_fini(struct hl_device *hdev)

hl_debugfs_remove_device(hdev);

- hl_sysfs_fini(hdev);
-
/*
* Halt the engines and disable interrupts so we won't get any more
* completions from H/W and we won't have any accesses from the
@@ -1223,11 +1281,8 @@ void hl_device_fini(struct hl_device *hdev)

device_early_fini(hdev);

- /* Hide device from user */
- device_destroy(hdev->dev_ctrl->class, hdev->dev_ctrl->devt);
- cdev_del(&hdev->cdev_ctrl);
- device_destroy(hdev->dev->class, hdev->dev->devt);
- cdev_del(&hdev->cdev);
+ /* Hide devices and sysfs nodes from user */
+ device_cdev_sysfs_del(hdev);

pr_info("removed device successfully\n");
}
diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
index aaaa29d98901..1bb181285589 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -1216,6 +1216,7 @@ struct hl_device_reset_work {
* @dma_mask: the dma mask that was set for this device
* @in_debug: is device under debug. This, together with fpriv_list, enforces
* that only a single user is configuring the debug infrastructure.
+ * @cdev_sysfs_created: were char devices and sysfs nodes created.
*/
struct hl_device {
struct pci_dev *pdev;
@@ -1291,6 +1292,7 @@ struct hl_device {
u8 device_cpu_disabled;
u8 dma_mask;
u8 in_debug;
+ u8 cdev_sysfs_created;

/* Parameters for bring-up */
u8 mmu_enable;
--
2.17.1


2019-08-08 14:13:48

by Oded Gabbay

[permalink] [raw]
Subject: Re: [PATCH] habanalabs: Expose devices after initialization is done

On Thu, Aug 8, 2019 at 3:25 PM Tomer Tayar <[email protected]> wrote:
>
> The char devices are currently exposed to user before the device and
> driver initialization are done.
> This patch moves the cdev and device adding to the system to the end of
> the initialization sequence, while keeping the creation of the
> structures at the beginning to allow the usage of dev_*().
>
> Signed-off-by: Tomer Tayar <[email protected]>
> ---
> drivers/misc/habanalabs/device.c | 163 ++++++++++++++++++---------
> drivers/misc/habanalabs/habanalabs.h | 2 +
> 2 files changed, 111 insertions(+), 54 deletions(-)
>
> diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c
> index efde1fe7d286..9a5926888b99 100644
> --- a/drivers/misc/habanalabs/device.c
> +++ b/drivers/misc/habanalabs/device.c
> @@ -151,50 +151,94 @@ static const struct file_operations hl_ctrl_ops = {
> .compat_ioctl = hl_ioctl_control
> };
>
> +static void device_release_func(struct device *dev)
> +{
> + kfree(dev);
> +}
> +
> /*
> - * device_setup_cdev - setup cdev and device for habanalabs device
> + * device_init_cdev - Initialize cdev and device for habanalabs device
> *
> * @hdev: pointer to habanalabs device structure
> * @hclass: pointer to the class object of the device
> * @minor: minor number of the specific device
> * @fpos: file operations to install for this device
> * @name: name of the device as it will appear in the filesystem
> - * @cdev: pointer to the char device object that will be created
> - * @dev: pointer to the device object that will be created
> + * @cdev: pointer to the char device object that will be initialized
> + * @dev: pointer to the device object that will be initialized
> *
> - * Create a cdev and a Linux device for habanalabs's device. Need to be
> - * called at the end of the habanalabs device initialization process,
> - * because this function exposes the device to the user
> + * Initialize a cdev and a Linux device for habanalabs's device.
> */
> -static int device_setup_cdev(struct hl_device *hdev, struct class *hclass,
> +static int device_init_cdev(struct hl_device *hdev, struct class *hclass,
> int minor, const struct file_operations *fops,
> char *name, struct cdev *cdev,
> struct device **dev)
> {
> - int err, devno = MKDEV(hdev->major, minor);
> -
> cdev_init(cdev, fops);
> cdev->owner = THIS_MODULE;
> - err = cdev_add(cdev, devno, 1);
> - if (err) {
> - pr_err("Failed to add char device %s\n", name);
> - return err;
> +
> + *dev = kzalloc(sizeof(**dev), GFP_KERNEL);
> + if (!*dev)
> + return -ENOMEM;
> +
> + device_initialize(*dev);
> + (*dev)->devt = MKDEV(hdev->major, minor);
> + (*dev)->class = hclass;
> + (*dev)->release = device_release_func;
> + dev_set_drvdata(*dev, hdev);
> + dev_set_name(*dev, "%s", name);
> +
> + return 0;
> +}
> +
> +static int device_cdev_sysfs_add(struct hl_device *hdev)
> +{
> + int rc;
> +
> + rc = cdev_device_add(&hdev->cdev, hdev->dev);
> + if (rc) {
> + dev_err(hdev->dev,
> + "failed to add a char device to the system\n");
> + return rc;
> }
>
> - *dev = device_create(hclass, NULL, devno, NULL, "%s", name);
> - if (IS_ERR(*dev)) {
> - pr_err("Failed to create device %s\n", name);
> - err = PTR_ERR(*dev);
> - goto err_device_create;
> + rc = cdev_device_add(&hdev->cdev_ctrl, hdev->dev_ctrl);
> + if (rc) {
> + dev_err(hdev->dev,
> + "failed to add a control char device to the system\n");
> + goto delete_cdev_device;
> }
>
> - dev_set_drvdata(*dev, hdev);
> + /* hl_sysfs_init() must be done after adding the device to the system */
> + rc = hl_sysfs_init(hdev);
> + if (rc) {
> + dev_err(hdev->dev, "failed to initialize sysfs\n");
> + goto delete_ctrl_cdev_device;
> + }
> +
> + hdev->cdev_sysfs_created = true;
>
> return 0;
>
> -err_device_create:
> - cdev_del(cdev);
> - return err;
> +delete_ctrl_cdev_device:
> + cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
> +delete_cdev_device:
> + cdev_device_del(&hdev->cdev, hdev->dev);
> + return rc;
> +}
> +
> +static void device_cdev_sysfs_del(struct hl_device *hdev)
> +{
> + /* device_release() won't be called so must free devices explicitly */
> + if (!hdev->cdev_sysfs_created) {
> + kfree(hdev->dev_ctrl);
> + kfree(hdev->dev);
> + return;
> + }
> +
> + hl_sysfs_fini(hdev);
> + cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
> + cdev_device_del(&hdev->cdev, hdev->dev);
> }
>
> /*
> @@ -898,13 +942,16 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
> {
> int i, rc, cq_ready_cnt;
> char *name;
> + bool add_cdev_sysfs_on_err = false;
>
> name = kasprintf(GFP_KERNEL, "hl%d", hdev->id / 2);
> - if (!name)
> - return -ENOMEM;
> + if (!name) {
> + rc = -ENOMEM;
> + goto out_disabled;
> + }
>
> - /* Create device */
> - rc = device_setup_cdev(hdev, hclass, hdev->id, &hl_ops, name,
> + /* Initialize cdev and device structures */
> + rc = device_init_cdev(hdev, hclass, hdev->id, &hl_ops, name,
> &hdev->cdev, &hdev->dev);
>
> kfree(name);
> @@ -915,22 +962,22 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
> name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->id / 2);
> if (!name) {
> rc = -ENOMEM;
> - goto release_device;
> + goto free_dev;
> }
>
> - /* Create control device */
> - rc = device_setup_cdev(hdev, hclass, hdev->id_control, &hl_ctrl_ops,
> + /* Initialize cdev and device structures for control device */
> + rc = device_init_cdev(hdev, hclass, hdev->id_control, &hl_ctrl_ops,
> name, &hdev->cdev_ctrl, &hdev->dev_ctrl);
>
> kfree(name);
>
> if (rc)
> - goto release_device;
> + goto free_dev;
>
> /* Initialize ASIC function pointers and perform early init */
> rc = device_early_init(hdev);
> if (rc)
> - goto release_control_device;
> + goto free_dev_ctrl;
>
> /*
> * Start calling ASIC initialization. First S/W then H/W and finally
> @@ -1016,12 +1063,6 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
> goto release_ctx;
> }
>
> - rc = hl_sysfs_init(hdev);
> - if (rc) {
> - dev_err(hdev->dev, "failed to initialize sysfs\n");
> - goto free_cb_pool;
> - }
> -
> hl_debugfs_add_device(hdev);
>
> if (hdev->asic_funcs->get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
> @@ -1030,6 +1071,12 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
> hdev->asic_funcs->hw_fini(hdev, true);
> }
>
> + /*
> + * From this point, in case of an error, add char devices and create
> + * sysfs nodes as part of the error flow, to allow debugging.
> + */
> + add_cdev_sysfs_on_err = true;
> +
> rc = hdev->asic_funcs->hw_init(hdev);
> if (rc) {
> dev_err(hdev->dev, "failed to initialize the H/W\n");
> @@ -1066,9 +1113,24 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
> }
>
> /*
> - * hl_hwmon_init must be called after device_late_init, because only
> + * Expose devices and sysfs nodes to user.
> + * From here there is no need to add char devices and create sysfs nodes
> + * in case of an error.
> + */
> + add_cdev_sysfs_on_err = false;
> + rc = device_cdev_sysfs_add(hdev);
> + if (rc) {
> + dev_err(hdev->dev,
> + "Failed to add char devices and sysfs nodes\n");
> + rc = 0;
> + goto out_disabled;
> + }
> +
> + /*
> + * hl_hwmon_init() must be called after device_late_init(), because only
> * there we get the information from the device about which
> - * hwmon-related sensors the device supports
> + * hwmon-related sensors the device supports.
> + * Furthermore, it must be done after adding the device to the system.
> */
> rc = hl_hwmon_init(hdev);
> if (rc) {
> @@ -1084,8 +1146,6 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
>
> return 0;
>
> -free_cb_pool:
> - hl_cb_pool_fini(hdev);
> release_ctx:
> if (hl_ctx_put(hdev->kernel_ctx) != 1)
> dev_err(hdev->dev,
> @@ -1106,14 +1166,14 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
> hdev->asic_funcs->sw_fini(hdev);
> early_fini:
> device_early_fini(hdev);
> -release_control_device:
> - device_destroy(hclass, hdev->dev_ctrl->devt);
> - cdev_del(&hdev->cdev_ctrl);
> -release_device:
> - device_destroy(hclass, hdev->dev->devt);
> - cdev_del(&hdev->cdev);
> +free_dev_ctrl:
> + kfree(hdev->dev_ctrl);
> +free_dev:
> + kfree(hdev->dev);
> out_disabled:
> hdev->disabled = true;
> + if (add_cdev_sysfs_on_err)
> + device_cdev_sysfs_add(hdev);
> if (hdev->pdev)
> dev_err(&hdev->pdev->dev,
> "Failed to initialize hl%d. Device is NOT usable !\n",
> @@ -1179,8 +1239,6 @@ void hl_device_fini(struct hl_device *hdev)
>
> hl_debugfs_remove_device(hdev);
>
> - hl_sysfs_fini(hdev);
> -
> /*
> * Halt the engines and disable interrupts so we won't get any more
> * completions from H/W and we won't have any accesses from the
> @@ -1223,11 +1281,8 @@ void hl_device_fini(struct hl_device *hdev)
>
> device_early_fini(hdev);
>
> - /* Hide device from user */
> - device_destroy(hdev->dev_ctrl->class, hdev->dev_ctrl->devt);
> - cdev_del(&hdev->cdev_ctrl);
> - device_destroy(hdev->dev->class, hdev->dev->devt);
> - cdev_del(&hdev->cdev);
> + /* Hide devices and sysfs nodes from user */
> + device_cdev_sysfs_del(hdev);
>
> pr_info("removed device successfully\n");
> }
> diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
> index aaaa29d98901..1bb181285589 100644
> --- a/drivers/misc/habanalabs/habanalabs.h
> +++ b/drivers/misc/habanalabs/habanalabs.h
> @@ -1216,6 +1216,7 @@ struct hl_device_reset_work {
> * @dma_mask: the dma mask that was set for this device
> * @in_debug: is device under debug. This, together with fpriv_list, enforces
> * that only a single user is configuring the debug infrastructure.
> + * @cdev_sysfs_created: were char devices and sysfs nodes created.
> */
> struct hl_device {
> struct pci_dev *pdev;
> @@ -1291,6 +1292,7 @@ struct hl_device {
> u8 device_cpu_disabled;
> u8 dma_mask;
> u8 in_debug;
> + u8 cdev_sysfs_created;
>
> /* Parameters for bring-up */
> u8 mmu_enable;
> --
> 2.17.1
>

This patch is:
Reviewed-by: Oded Gabbay <[email protected]>