2019-09-04 06:27:38

by Pi-Hsun Shih

[permalink] [raw]
Subject: [PATCH] platform/chrome: cros_ec_rpmsg: Fix race with host command when probe failed.

Since the rpmsg_endpoint is created before probe is called, it's
possible that a host event is received during cros_ec_register, and
there would be some pending work in the host_event_work workqueue while
cros_ec_register is called.

If cros_ec_register fails, when the leftover work in host_event_work
run, the ec_dev from the drvdata of the rpdev could be already set to
NULL, causing kernel crash when trying to run cros_ec_get_next_event.

Fix this by creating the rpmsg_endpoint by ourself, and when
cros_ec_register fails (or on remove), destroy the endpoint first (to
make sure there's no more new calls to cros_ec_rpmsg_callback), and then
cancel all works in the host_event_work workqueue.

Fixes: 2de89fd98958 ("platform/chrome: cros_ec: Add EC host command support using rpmsg")
Signed-off-by: Pi-Hsun Shih <[email protected]>
---
drivers/platform/chrome/cros_ec_rpmsg.c | 33 +++++++++++++++++++++----
1 file changed, 28 insertions(+), 5 deletions(-)

diff --git a/drivers/platform/chrome/cros_ec_rpmsg.c b/drivers/platform/chrome/cros_ec_rpmsg.c
index 8b6bd775cc9a..0c3738c3244d 100644
--- a/drivers/platform/chrome/cros_ec_rpmsg.c
+++ b/drivers/platform/chrome/cros_ec_rpmsg.c
@@ -41,6 +41,7 @@ struct cros_ec_rpmsg {
struct rpmsg_device *rpdev;
struct completion xfer_ack;
struct work_struct host_event_work;
+ struct rpmsg_endpoint *ept;
};

/**
@@ -72,7 +73,6 @@ static int cros_ec_pkt_xfer_rpmsg(struct cros_ec_device *ec_dev,
struct cros_ec_command *ec_msg)
{
struct cros_ec_rpmsg *ec_rpmsg = ec_dev->priv;
- struct rpmsg_device *rpdev = ec_rpmsg->rpdev;
struct ec_host_response *response;
unsigned long timeout;
int len;
@@ -85,7 +85,7 @@ static int cros_ec_pkt_xfer_rpmsg(struct cros_ec_device *ec_dev,
dev_dbg(ec_dev->dev, "prepared, len=%d\n", len);

reinit_completion(&ec_rpmsg->xfer_ack);
- ret = rpmsg_send(rpdev->ept, ec_dev->dout, len);
+ ret = rpmsg_send(ec_rpmsg->ept, ec_dev->dout, len);
if (ret) {
dev_err(ec_dev->dev, "rpmsg send failed\n");
return ret;
@@ -196,11 +196,24 @@ static int cros_ec_rpmsg_callback(struct rpmsg_device *rpdev, void *data,
return 0;
}

+static struct rpmsg_endpoint *
+cros_ec_rpmsg_create_ept(struct rpmsg_device *rpdev)
+{
+ struct rpmsg_channel_info chinfo = {};
+
+ strscpy(chinfo.name, rpdev->id.name, RPMSG_NAME_SIZE);
+ chinfo.src = rpdev->src;
+ chinfo.dst = RPMSG_ADDR_ANY;
+
+ return rpmsg_create_ept(rpdev, cros_ec_rpmsg_callback, NULL, chinfo);
+}
+
static int cros_ec_rpmsg_probe(struct rpmsg_device *rpdev)
{
struct device *dev = &rpdev->dev;
struct cros_ec_rpmsg *ec_rpmsg;
struct cros_ec_device *ec_dev;
+ int ret;

ec_dev = devm_kzalloc(dev, sizeof(*ec_dev), GFP_KERNEL);
if (!ec_dev)
@@ -225,7 +238,18 @@ static int cros_ec_rpmsg_probe(struct rpmsg_device *rpdev)
INIT_WORK(&ec_rpmsg->host_event_work,
cros_ec_rpmsg_host_event_function);

- return cros_ec_register(ec_dev);
+ ec_rpmsg->ept = cros_ec_rpmsg_create_ept(rpdev);
+ if (!ec_rpmsg->ept)
+ return -ENOMEM;
+
+ ret = cros_ec_register(ec_dev);
+ if (ret < 0) {
+ rpmsg_destroy_ept(ec_rpmsg->ept);
+ cancel_work_sync(&ec_rpmsg->host_event_work);
+ return ret;
+ }
+
+ return 0;
}

static void cros_ec_rpmsg_remove(struct rpmsg_device *rpdev)
@@ -234,7 +258,7 @@ static void cros_ec_rpmsg_remove(struct rpmsg_device *rpdev)
struct cros_ec_rpmsg *ec_rpmsg = ec_dev->priv;

cros_ec_unregister(ec_dev);
-
+ rpmsg_destroy_ept(ec_rpmsg->ept);
cancel_work_sync(&ec_rpmsg->host_event_work);
}

@@ -271,7 +295,6 @@ static struct rpmsg_driver cros_ec_driver_rpmsg = {
},
.probe = cros_ec_rpmsg_probe,
.remove = cros_ec_rpmsg_remove,
- .callback = cros_ec_rpmsg_callback,
};

module_rpmsg_driver(cros_ec_driver_rpmsg);
--
2.23.0.187.g17f5b7556c-goog


2019-09-19 13:57:58

by Enric Balletbo i Serra

[permalink] [raw]
Subject: Re: [PATCH] platform/chrome: cros_ec_rpmsg: Fix race with host command when probe failed.

Hi,

On 4/9/19 8:26, Pi-Hsun Shih wrote:
> Since the rpmsg_endpoint is created before probe is called, it's
> possible that a host event is received during cros_ec_register, and
> there would be some pending work in the host_event_work workqueue while
> cros_ec_register is called.
>
> If cros_ec_register fails, when the leftover work in host_event_work
> run, the ec_dev from the drvdata of the rpdev could be already set to
> NULL, causing kernel crash when trying to run cros_ec_get_next_event.
>
> Fix this by creating the rpmsg_endpoint by ourself, and when
> cros_ec_register fails (or on remove), destroy the endpoint first (to
> make sure there's no more new calls to cros_ec_rpmsg_callback), and then
> cancel all works in the host_event_work workqueue.
>
> Fixes: 2de89fd98958 ("platform/chrome: cros_ec: Add EC host command support using rpmsg")
> Signed-off-by: Pi-Hsun Shih <[email protected]>
> ---

Added the stable tag and applied for 5.4, the patches went to linux-next some
time ago, so sorry for late notice.

Thanks,
Enric


> drivers/platform/chrome/cros_ec_rpmsg.c | 33 +++++++++++++++++++++----
> 1 file changed, 28 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/platform/chrome/cros_ec_rpmsg.c b/drivers/platform/chrome/cros_ec_rpmsg.c
> index 8b6bd775cc9a..0c3738c3244d 100644
> --- a/drivers/platform/chrome/cros_ec_rpmsg.c
> +++ b/drivers/platform/chrome/cros_ec_rpmsg.c
> @@ -41,6 +41,7 @@ struct cros_ec_rpmsg {
> struct rpmsg_device *rpdev;
> struct completion xfer_ack;
> struct work_struct host_event_work;
> + struct rpmsg_endpoint *ept;
> };
>
> /**
> @@ -72,7 +73,6 @@ static int cros_ec_pkt_xfer_rpmsg(struct cros_ec_device *ec_dev,
> struct cros_ec_command *ec_msg)
> {
> struct cros_ec_rpmsg *ec_rpmsg = ec_dev->priv;
> - struct rpmsg_device *rpdev = ec_rpmsg->rpdev;
> struct ec_host_response *response;
> unsigned long timeout;
> int len;
> @@ -85,7 +85,7 @@ static int cros_ec_pkt_xfer_rpmsg(struct cros_ec_device *ec_dev,
> dev_dbg(ec_dev->dev, "prepared, len=%d\n", len);
>
> reinit_completion(&ec_rpmsg->xfer_ack);
> - ret = rpmsg_send(rpdev->ept, ec_dev->dout, len);
> + ret = rpmsg_send(ec_rpmsg->ept, ec_dev->dout, len);
> if (ret) {
> dev_err(ec_dev->dev, "rpmsg send failed\n");
> return ret;
> @@ -196,11 +196,24 @@ static int cros_ec_rpmsg_callback(struct rpmsg_device *rpdev, void *data,
> return 0;
> }
>
> +static struct rpmsg_endpoint *
> +cros_ec_rpmsg_create_ept(struct rpmsg_device *rpdev)
> +{
> + struct rpmsg_channel_info chinfo = {};
> +
> + strscpy(chinfo.name, rpdev->id.name, RPMSG_NAME_SIZE);
> + chinfo.src = rpdev->src;
> + chinfo.dst = RPMSG_ADDR_ANY;
> +
> + return rpmsg_create_ept(rpdev, cros_ec_rpmsg_callback, NULL, chinfo);
> +}
> +
> static int cros_ec_rpmsg_probe(struct rpmsg_device *rpdev)
> {
> struct device *dev = &rpdev->dev;
> struct cros_ec_rpmsg *ec_rpmsg;
> struct cros_ec_device *ec_dev;
> + int ret;
>
> ec_dev = devm_kzalloc(dev, sizeof(*ec_dev), GFP_KERNEL);
> if (!ec_dev)
> @@ -225,7 +238,18 @@ static int cros_ec_rpmsg_probe(struct rpmsg_device *rpdev)
> INIT_WORK(&ec_rpmsg->host_event_work,
> cros_ec_rpmsg_host_event_function);
>
> - return cros_ec_register(ec_dev);
> + ec_rpmsg->ept = cros_ec_rpmsg_create_ept(rpdev);
> + if (!ec_rpmsg->ept)
> + return -ENOMEM;
> +
> + ret = cros_ec_register(ec_dev);
> + if (ret < 0) {
> + rpmsg_destroy_ept(ec_rpmsg->ept);
> + cancel_work_sync(&ec_rpmsg->host_event_work);
> + return ret;
> + }
> +
> + return 0;
> }
>
> static void cros_ec_rpmsg_remove(struct rpmsg_device *rpdev)
> @@ -234,7 +258,7 @@ static void cros_ec_rpmsg_remove(struct rpmsg_device *rpdev)
> struct cros_ec_rpmsg *ec_rpmsg = ec_dev->priv;
>
> cros_ec_unregister(ec_dev);
> -
> + rpmsg_destroy_ept(ec_rpmsg->ept);
> cancel_work_sync(&ec_rpmsg->host_event_work);
> }
>
> @@ -271,7 +295,6 @@ static struct rpmsg_driver cros_ec_driver_rpmsg = {
> },
> .probe = cros_ec_rpmsg_probe,
> .remove = cros_ec_rpmsg_remove,
> - .callback = cros_ec_rpmsg_callback,
> };
>
> module_rpmsg_driver(cros_ec_driver_rpmsg);
>