Try to get the device ID repeatedly during initialization before giving up.
The BMC isn't always responsive, and this allows it to be slightly flaky
during early boot.
Tested: Installed on a system with the BMC software disabled
such that it was non-responsive. The driver correctly detected this
and gave up as expected. Then I re-enabled the BMC software unloaded
and reloaded the driver and it was detected properly.
Signed-off-by: Patrick Venture <[email protected]>
---
drivers/char/ipmi/ipmi_si_intf.c | 24 +++++++++++++++++++++++-
1 file changed, 23 insertions(+), 1 deletion(-)
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 90ec010bffbd..a1ca34af7698 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -1918,11 +1918,14 @@ int ipmi_si_add_smi(struct si_sm_io *io)
* held, primarily to keep smi_num consistent, we only one to do these
* one at a time.
*/
+#define GET_DEVICE_ID_ATTEMPTS 5
static int try_smi_init(struct smi_info *new_smi)
{
int rv = 0;
int i;
char *init_name = NULL;
+ bool platform_device_registered = false;
+ unsigned long sleep_rm;
pr_info(PFX "Trying %s-specified %s state machine at %s address 0x%lx, slave address 0x%x, irq %d\n",
ipmi_addr_src_to_str(new_smi->io.addr_source),
@@ -2003,7 +2006,26 @@ static int try_smi_init(struct smi_info *new_smi)
* Attempt a get device id command. If it fails, we probably
* don't have a BMC here.
*/
- rv = try_get_dev_id(new_smi);
+ for (i = 0; i < GET_DEVICE_ID_ATTEMPTS; i++) {
+ pr_info(PFX "Attempting to read BMC device ID\n");
+ rv = try_get_dev_id(new_smi);
+ /* If it succeeded, stop trying */
+ if (!rv)
+ break;
+
+ /* Sleep for ~0.25s before trying again instead of hammering
+ * the BMC.
+ */
+ sleep_rm = msleep_interruptible(250);
+ if (sleep_rm != 0) {
+ pr_info(PFX "Find BMC interrupted\n");
+ rv = -EINTR;
+ goto out_err;
+ }
+ }
+
+ /* If we exited the loop above and rv is non-zero we ran out of tries.
+ */
if (rv) {
if (new_smi->io.addr_source)
dev_err(new_smi->io.dev,
--
2.19.0.rc2.392.g5ba43deb5a-goog
On Tue, Sep 11, 2018 at 3:52 PM Patrick Venture <[email protected]> wrote:
>
> Try to get the device ID repeatedly during initialization before giving up.
> The BMC isn't always responsive, and this allows it to be slightly flaky
> during early boot.
>
> Tested: Installed on a system with the BMC software disabled
> such that it was non-responsive. The driver correctly detected this
> and gave up as expected. Then I re-enabled the BMC software unloaded
> and reloaded the driver and it was detected properly.
>
> Signed-off-by: Patrick Venture <[email protected]>
> ---
> drivers/char/ipmi/ipmi_si_intf.c | 24 +++++++++++++++++++++++-
> 1 file changed, 23 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
> index 90ec010bffbd..a1ca34af7698 100644
> --- a/drivers/char/ipmi/ipmi_si_intf.c
> +++ b/drivers/char/ipmi/ipmi_si_intf.c
> @@ -1918,11 +1918,14 @@ int ipmi_si_add_smi(struct si_sm_io *io)
> * held, primarily to keep smi_num consistent, we only one to do these
> * one at a time.
> */
> +#define GET_DEVICE_ID_ATTEMPTS 5
> static int try_smi_init(struct smi_info *new_smi)
> {
> int rv = 0;
> int i;
> char *init_name = NULL;
> + bool platform_device_registered = false;
platform_device_registered -- looks like I accidentally included a
variable from my branch.
> + unsigned long sleep_rm;
>
> pr_info(PFX "Trying %s-specified %s state machine at %s address 0x%lx, slave address 0x%x, irq %d\n",
> ipmi_addr_src_to_str(new_smi->io.addr_source),
> @@ -2003,7 +2006,26 @@ static int try_smi_init(struct smi_info *new_smi)
> * Attempt a get device id command. If it fails, we probably
> * don't have a BMC here.
> */
> - rv = try_get_dev_id(new_smi);
> + for (i = 0; i < GET_DEVICE_ID_ATTEMPTS; i++) {
> + pr_info(PFX "Attempting to read BMC device ID\n");
> + rv = try_get_dev_id(new_smi);
> + /* If it succeeded, stop trying */
> + if (!rv)
> + break;
> +
> + /* Sleep for ~0.25s before trying again instead of hammering
> + * the BMC.
> + */
> + sleep_rm = msleep_interruptible(250);
> + if (sleep_rm != 0) {
> + pr_info(PFX "Find BMC interrupted\n");
> + rv = -EINTR;
> + goto out_err;
> + }
> + }
> +
> + /* If we exited the loop above and rv is non-zero we ran out of tries.
> + */
> if (rv) {
> if (new_smi->io.addr_source)
> dev_err(new_smi->io.dev,
> --
> 2.19.0.rc2.392.g5ba43deb5a-goog
>