2018-08-23 20:39:01

by Corey Minyard

[permalink] [raw]
Subject: [PATCH 0/2] ipmi: Fix issues in the BT code.

The first change fixes the race condition at startup between
capabilities detection and the startup of the upper layers of
the IPMI driver.

The second is cleanup code that I'm doing as I touch things in the
driver.



2018-08-23 20:38:51

by Corey Minyard

[permalink] [raw]
Subject: [PATCH 1/2] ipmi: Move BT capabilities detection to the detect call

From: Corey Minyard <[email protected]>

The capabilities detection was being done as part of the normal
state machine, but it was possible for it to be running while
the upper layers of the IPMI driver were initializing the
device, resulting in error and failure to initialize.

Move the capabilities detection to the the detect function,
so it's done before anything else runs on the device. This also
simplifies the state machine and removes some code, as a bonus.

Signed-off-by: Corey Minyard <[email protected]>
Reported-by: Andrew Banman <[email protected]>
---
drivers/char/ipmi/ipmi_bt_sm.c | 92 ++++++++++++++++++++++--------------------
1 file changed, 48 insertions(+), 44 deletions(-)

diff --git a/drivers/char/ipmi/ipmi_bt_sm.c b/drivers/char/ipmi/ipmi_bt_sm.c
index cbc6126..b4133832e 100644
--- a/drivers/char/ipmi/ipmi_bt_sm.c
+++ b/drivers/char/ipmi/ipmi_bt_sm.c
@@ -59,8 +59,6 @@ enum bt_states {
BT_STATE_RESET3,
BT_STATE_RESTART,
BT_STATE_PRINTME,
- BT_STATE_CAPABILITIES_BEGIN,
- BT_STATE_CAPABILITIES_END,
BT_STATE_LONG_BUSY /* BT doesn't get hosed :-) */
};

@@ -86,7 +84,6 @@ struct si_sm_data {
int error_retries; /* end of "common" fields */
int nonzero_status; /* hung BMCs stay all 0 */
enum bt_states complete; /* to divert the state machine */
- int BT_CAP_outreqs;
long BT_CAP_req2rsp;
int BT_CAP_retries; /* Recommended retries */
};
@@ -137,8 +134,6 @@ static char *state2txt(unsigned char state)
case BT_STATE_RESET3: return("RESET3");
case BT_STATE_RESTART: return("RESTART");
case BT_STATE_LONG_BUSY: return("LONG_BUSY");
- case BT_STATE_CAPABILITIES_BEGIN: return("CAP_BEGIN");
- case BT_STATE_CAPABILITIES_END: return("CAP_END");
}
return("BAD STATE");
}
@@ -185,7 +180,6 @@ static unsigned int bt_init_data(struct si_sm_data *bt, struct si_sm_io *io)
bt->complete = BT_STATE_IDLE; /* end here */
bt->BT_CAP_req2rsp = BT_NORMAL_TIMEOUT * USEC_PER_SEC;
bt->BT_CAP_retries = BT_NORMAL_RETRY_LIMIT;
- /* BT_CAP_outreqs == zero is a flag to read BT Capabilities */
return 3; /* We claim 3 bytes of space; ought to check SPMI table */
}

@@ -447,7 +441,7 @@ static enum si_sm_result error_recovery(struct si_sm_data *bt,

static enum si_sm_result bt_event(struct si_sm_data *bt, long time)
{
- unsigned char status, BT_CAP[8];
+ unsigned char status;
static enum bt_states last_printed = BT_STATE_PRINTME;
int i;

@@ -500,12 +494,6 @@ static enum si_sm_result bt_event(struct si_sm_data *bt, long time)
if (status & BT_H_BUSY) /* clear a leftover H_BUSY */
BT_CONTROL(BT_H_BUSY);

- bt->timeout = bt->BT_CAP_req2rsp;
-
- /* Read BT capabilities if it hasn't been done yet */
- if (!bt->BT_CAP_outreqs)
- BT_STATE_CHANGE(BT_STATE_CAPABILITIES_BEGIN,
- SI_SM_CALL_WITHOUT_DELAY);
BT_SI_SM_RETURN(SI_SM_IDLE);

case BT_STATE_XACTION_START:
@@ -610,37 +598,6 @@ static enum si_sm_result bt_event(struct si_sm_data *bt, long time)
BT_STATE_CHANGE(BT_STATE_XACTION_START,
SI_SM_CALL_WITH_DELAY);

- /*
- * Get BT Capabilities, using timing of upper level state machine.
- * Set outreqs to prevent infinite loop on timeout.
- */
- case BT_STATE_CAPABILITIES_BEGIN:
- bt->BT_CAP_outreqs = 1;
- {
- unsigned char GetBT_CAP[] = { 0x18, 0x36 };
- bt->state = BT_STATE_IDLE;
- bt_start_transaction(bt, GetBT_CAP, sizeof(GetBT_CAP));
- }
- bt->complete = BT_STATE_CAPABILITIES_END;
- BT_STATE_CHANGE(BT_STATE_XACTION_START,
- SI_SM_CALL_WITH_DELAY);
-
- case BT_STATE_CAPABILITIES_END:
- i = bt_get_result(bt, BT_CAP, sizeof(BT_CAP));
- bt_init_data(bt, bt->io);
- if ((i == 8) && !BT_CAP[2]) {
- bt->BT_CAP_outreqs = BT_CAP[3];
- bt->BT_CAP_req2rsp = BT_CAP[6] * USEC_PER_SEC;
- bt->BT_CAP_retries = BT_CAP[7];
- } else
- pr_warn("IPMI BT: using default values\n");
- if (!bt->BT_CAP_outreqs)
- bt->BT_CAP_outreqs = 1;
- pr_warn("IPMI BT: req2rsp=%ld secs retries=%d\n",
- bt->BT_CAP_req2rsp / USEC_PER_SEC, bt->BT_CAP_retries);
- bt->timeout = bt->BT_CAP_req2rsp;
- return SI_SM_CALL_WITHOUT_DELAY;
-
default: /* should never occur */
return error_recovery(bt,
status,
@@ -651,6 +608,11 @@ static enum si_sm_result bt_event(struct si_sm_data *bt, long time)

static int bt_detect(struct si_sm_data *bt)
{
+ unsigned char GetBT_CAP[] = { 0x18, 0x36 };
+ unsigned char BT_CAP[8];
+ enum si_sm_result smi_result;
+ int rv;
+
/*
* It's impossible for the BT status and interrupt registers to be
* all 1's, (assuming a properly functioning, self-initialized BMC)
@@ -661,6 +623,48 @@ static int bt_detect(struct si_sm_data *bt)
if ((BT_STATUS == 0xFF) && (BT_INTMASK_R == 0xFF))
return 1;
reset_flags(bt);
+
+ /*
+ * Try getting the BT capabilities here.
+ */
+ rv = bt_start_transaction(bt, GetBT_CAP, sizeof(GetBT_CAP));
+ if (rv) {
+ dev_warn(bt->io->dev,
+ "Can't start capabilities transaction: %d\n", rv);
+ goto out_no_bt_cap;
+ }
+
+ smi_result = SI_SM_CALL_WITHOUT_DELAY;
+ for (;;) {
+ if (smi_result == SI_SM_CALL_WITH_DELAY ||
+ smi_result == SI_SM_CALL_WITH_TICK_DELAY) {
+ schedule_timeout_uninterruptible(1);
+ smi_result = bt_event(bt, jiffies_to_usecs(1));
+ } else if (smi_result == SI_SM_CALL_WITHOUT_DELAY) {
+ smi_result = bt_event(bt, 0);
+ } else
+ break;
+ }
+
+ rv = bt_get_result(bt, BT_CAP, sizeof(BT_CAP));
+ bt_init_data(bt, bt->io);
+ if (rv < 8) {
+ dev_warn(bt->io->dev, "bt cap response too short: %d\n", rv);
+ goto out_no_bt_cap;
+ }
+
+ if (BT_CAP[2]) {
+ dev_warn(bt->io->dev, "Error fetching bt cap: %x\n", BT_CAP[2]);
+out_no_bt_cap:
+ dev_warn(bt->io->dev, "using default values\n");
+ } else {
+ bt->BT_CAP_req2rsp = BT_CAP[6] * USEC_PER_SEC;
+ bt->BT_CAP_retries = BT_CAP[7];
+ }
+
+ dev_info(bt->io->dev, "req2rsp=%ld secs retries=%d\n",
+ bt->BT_CAP_req2rsp / USEC_PER_SEC, bt->BT_CAP_retries);
+
return 0;
}

--
2.7.4


2018-08-23 20:39:43

by Corey Minyard

[permalink] [raw]
Subject: [PATCH 2/2] ipmi: Convert pr_xxx() to dev_xxx() in the BT code

From: Corey Minyard <[email protected]>

Cleanups, do the replacement and change the levels to the proper
ones for the function they are serving, as many were wrong.

Signed-off-by: Corey Minyard <[email protected]>
---
drivers/char/ipmi/ipmi_bt_sm.c | 38 ++++++++++++++++++++++----------------
1 file changed, 22 insertions(+), 16 deletions(-)

diff --git a/drivers/char/ipmi/ipmi_bt_sm.c b/drivers/char/ipmi/ipmi_bt_sm.c
index b4133832e..f3f216c 100644
--- a/drivers/char/ipmi/ipmi_bt_sm.c
+++ b/drivers/char/ipmi/ipmi_bt_sm.c
@@ -8,6 +8,8 @@
* Author: Rocky Craig <[email protected]>
*/

+#define DEBUG /* So dev_dbg() is always available. */
+
#include <linux/kernel.h> /* For printk. */
#include <linux/string.h>
#include <linux/module.h>
@@ -215,8 +217,8 @@ static int bt_start_transaction(struct si_sm_data *bt,
return IPMI_NOT_IN_MY_STATE_ERR;

if (bt_debug & BT_DEBUG_MSG) {
- pr_warn("BT: +++++++++++++++++ New command\n");
- pr_warn("BT: NetFn/LUN CMD [%d data]:", size - 2);
+ dev_dbg(bt->io->dev, "+++++++++++++++++ New command\n");
+ dev_dbg(bt->io->dev, "NetFn/LUN CMD [%d data]:", size - 2);
for (i = 0; i < size; i ++)
pr_cont(" %02x", data[i]);
pr_cont("\n");
@@ -260,7 +262,7 @@ static int bt_get_result(struct si_sm_data *bt,
memcpy(data + 2, bt->read_data + 4, msg_len - 2);

if (bt_debug & BT_DEBUG_MSG) {
- pr_warn("BT: result %d bytes:", msg_len);
+ dev_dbg(bt->io->dev, "result %d bytes:", msg_len);
for (i = 0; i < msg_len; i++)
pr_cont(" %02x", data[i]);
pr_cont("\n");
@@ -274,7 +276,7 @@ static int bt_get_result(struct si_sm_data *bt,
static void reset_flags(struct si_sm_data *bt)
{
if (bt_debug)
- pr_warn("IPMI BT: flag reset %s\n", status2txt(BT_STATUS));
+ dev_dbg(bt->io->dev, "flag reset %s\n", status2txt(BT_STATUS));
if (BT_STATUS & BT_H_BUSY)
BT_CONTROL(BT_H_BUSY); /* force clear */
BT_CONTROL(BT_CLR_WR_PTR); /* always reset */
@@ -300,7 +302,8 @@ static void drain_BMC2HOST(struct si_sm_data *bt)
BT_CONTROL(BT_B2H_ATN); /* some BMCs are stubborn */
BT_CONTROL(BT_CLR_RD_PTR); /* always reset */
if (bt_debug)
- pr_warn("IPMI BT: stale response %s; ", status2txt(BT_STATUS));
+ dev_dbg(bt->io->dev, "stale response %s; ",
+ status2txt(BT_STATUS));
size = BMC2HOST;
for (i = 0; i < size ; i++)
BMC2HOST;
@@ -314,7 +317,7 @@ static inline void write_all_bytes(struct si_sm_data *bt)
int i;

if (bt_debug & BT_DEBUG_MSG) {
- pr_warn("BT: write %d bytes seq=0x%02X",
+ dev_dbg(bt->io->dev, "write %d bytes seq=0x%02X",
bt->write_count, bt->seq);
for (i = 0; i < bt->write_count; i++)
pr_cont(" %02x", bt->write_data[i]);
@@ -338,7 +341,8 @@ static inline int read_all_bytes(struct si_sm_data *bt)

if (bt->read_count < 4 || bt->read_count >= IPMI_MAX_MSG_LENGTH) {
if (bt_debug & BT_DEBUG_MSG)
- pr_warn("BT: bad raw rsp len=%d\n", bt->read_count);
+ dev_dbg(bt->io->dev,
+ "bad raw rsp len=%d\n", bt->read_count);
bt->truncated = 1;
return 1; /* let next XACTION START clean it up */
}
@@ -349,7 +353,8 @@ static inline int read_all_bytes(struct si_sm_data *bt)
if (bt_debug & BT_DEBUG_MSG) {
int max = bt->read_count;

- pr_warn("BT: got %d bytes seq=0x%02X", max, bt->read_data[2]);
+ dev_dbg(bt->io->dev,
+ "got %d bytes seq=0x%02X", max, bt->read_data[2]);
if (max > 16)
max = 16;
for (i = 0; i < max; i++)
@@ -364,7 +369,8 @@ static inline int read_all_bytes(struct si_sm_data *bt)
return 1;

if (bt_debug & BT_DEBUG_MSG)
- pr_warn("IPMI BT: bad packet: want 0x(%02X, %02X, %02X) got (%02X, %02X, %02X)\n",
+ dev_dbg(bt->io->dev,
+ "IPMI BT: bad packet: want 0x(%02X, %02X, %02X) got (%02X, %02X, %02X)\n",
bt->write_data[1] | 0x04, bt->write_data[2],
bt->write_data[3],
bt->read_data[1], bt->read_data[2], bt->read_data[3]);
@@ -390,8 +396,8 @@ static enum si_sm_result error_recovery(struct si_sm_data *bt,
break;
}

- pr_warn("IPMI BT: %s in %s %s ", /* open-ended line */
- reason, STATE2TXT, STATUS2TXT);
+ dev_warn(bt->io->dev, "IPMI BT: %s in %s %s ", /* open-ended line */
+ reason, STATE2TXT, STATUS2TXT);

/*
* Per the IPMI spec, retries are based on the sequence number
@@ -405,14 +411,14 @@ static enum si_sm_result error_recovery(struct si_sm_data *bt,
return SI_SM_CALL_WITHOUT_DELAY;
}

- pr_warn("failed %d retries, sending error response\n",
- bt->BT_CAP_retries);
+ dev_warn(bt->io->dev, "failed %d retries, sending error response\n",
+ bt->BT_CAP_retries);
if (!bt->nonzero_status)
- pr_err("IPMI BT: stuck, try power cycle\n");
+ dev_err(bt->io->dev, "stuck, try power cycle\n");

/* this is most likely during insmod */
else if (bt->seq <= (unsigned char)(bt->BT_CAP_retries & 0xFF)) {
- pr_warn("IPMI: BT reset (takes 5 secs)\n");
+ dev_warn(bt->io->dev, "BT reset (takes 5 secs)\n");
bt->state = BT_STATE_RESET1;
return SI_SM_CALL_WITHOUT_DELAY;
}
@@ -448,7 +454,7 @@ static enum si_sm_result bt_event(struct si_sm_data *bt, long time)
status = BT_STATUS;
bt->nonzero_status |= status;
if ((bt_debug & BT_DEBUG_STATES) && (bt->state != last_printed)) {
- pr_warn("BT: %s %s TO=%ld - %ld\n",
+ dev_dbg(bt->io->dev, "BT: %s %s TO=%ld - %ld\n",
STATE2TXT,
STATUS2TXT,
bt->timeout,
--
2.7.4


2018-08-24 04:01:34

by Russ Anderson

[permalink] [raw]
Subject: Re: [PATCH 0/2] ipmi: Fix issues in the BT code.

On Thu, Aug 23, 2018 at 03:37:01PM -0500, [email protected] wrote:
> The first change fixes the race condition at startup between
> capabilities detection and the startup of the upper layers of
> the IPMI driver.
>
> The second is cleanup code that I'm doing as I touch things in the
> driver.

Quick testing looks good. This was with your patch backported to RHEL7.6.

More testing tomorrow but so far so good.


[root@ah-038 ~]# dmesg | grep -i ipmi
[ 76.409474] ipmi message handler version 39.2
[ 76.529702] ipmi device interface
[ 76.773727] IPMI System Interface driver.
[ 76.778277] ipmi_si ipmi_si.0: ipmi_platform: probing via SMBIOS
[ 76.784999] ipmi_si: SMBIOS: mem 0xce4 regsize 1 spacing 1 irq 6
[ 76.806979] ipmi_si: Adding SMBIOS-specified bt state machine
[ 76.813903] ipmi_si IPI0001:00: ipmi_platform: probing via ACPI
[ 76.833722] ipmi_si IPI0001:00: [io 0x0ce4-0x0ce6] regsize 1 spacing 1 irq 6
[ 76.842593] ipmi_si: Adding ACPI-specified bt state machine
[ 76.842980] ipmi_platform: probing via SPMI
[ 76.842982] ipmi_si: SPMI: io 0xce4 regsize 1 spacing 1 irq 6
[ 76.845116] ipmi_si: Trying SMBIOS-specified bt state machine at mem address 0xce4, slave address 0x20, irq 6
[ 76.845123] ipmi_si ipmi_si.0: Could not set up I/O space
[ 76.939459] ipmi_si: Trying ACPI-specified bt state machine at i/o address 0xce4, slave address 0x0, irq 6
[ 76.959709] ipmi_si IPI0001:00: Using irq 6
[ 76.993053] ipmi_si IPI0001:00: Found new BMC (man_id: 0x000000, prod_id: 0x0101, dev_id: 0x20)
[ 77.063491] ipmi_si IPI0001:00: IPMI bt interface initialized

Thanks.
--
Russ Anderson, SuperDome Flex Linux Kernel Group Manager
HPE - Hewlett Packard Enterprise (formerly SGI) [email protected]

2018-08-27 20:18:41

by Andrew Banman

[permalink] [raw]
Subject: RE: [PATCH 1/2] ipmi: Move BT capabilities detection to the detect call

Tested-by: Andrew Banman

Ran through 100+ boots with no error with your 1st patch applied. I don't see any endcases to worry about.

Thanks Corey!

Andrew

> -----Original Message-----
> From: Corey Minyard [mailto:[email protected]] On Behalf Of
> [email protected]
> Sent: Thursday, August 23, 2018 3:37 PM
> To: Banman, Andrew <[email protected]>
> Cc: [email protected]; linux-
> [email protected]; Anderson, Russ <[email protected]>; Ramsay,
> Frank <[email protected]>; Ernst, Justin <[email protected]>;
> Corey Minyard <[email protected]>
> Subject: [PATCH 1/2] ipmi: Move BT capabilities detection to the detect
> call
>
> From: Corey Minyard <[email protected]>
>
> The capabilities detection was being done as part of the normal
> state machine, but it was possible for it to be running while
> the upper layers of the IPMI driver were initializing the
> device, resulting in error and failure to initialize.
>
> Move the capabilities detection to the the detect function,
> so it's done before anything else runs on the device. This also
> simplifies the state machine and removes some code, as a bonus.
>
> Signed-off-by: Corey Minyard <[email protected]>
> Reported-by: Andrew Banman <[email protected]>
> ---
> drivers/char/ipmi/ipmi_bt_sm.c | 92 ++++++++++++++++++++++-------------
> -------
> 1 file changed, 48 insertions(+), 44 deletions(-)
>
> diff --git a/drivers/char/ipmi/ipmi_bt_sm.c
> b/drivers/char/ipmi/ipmi_bt_sm.c
> index cbc6126..b4133832e 100644
> --- a/drivers/char/ipmi/ipmi_bt_sm.c
> +++ b/drivers/char/ipmi/ipmi_bt_sm.c
> @@ -59,8 +59,6 @@ enum bt_states {
> BT_STATE_RESET3,
> BT_STATE_RESTART,
> BT_STATE_PRINTME,
> - BT_STATE_CAPABILITIES_BEGIN,
> - BT_STATE_CAPABILITIES_END,
> BT_STATE_LONG_BUSY /* BT doesn't get hosed :-) */
> };
>
> @@ -86,7 +84,6 @@ struct si_sm_data {
> int error_retries; /* end of "common" fields */
> int nonzero_status; /* hung BMCs stay all 0 */
> enum bt_states complete; /* to divert the state machine */
> - int BT_CAP_outreqs;
> long BT_CAP_req2rsp;
> int BT_CAP_retries; /* Recommended retries */
> };
> @@ -137,8 +134,6 @@ static char *state2txt(unsigned char state)
> case BT_STATE_RESET3: return("RESET3");
> case BT_STATE_RESTART: return("RESTART");
> case BT_STATE_LONG_BUSY: return("LONG_BUSY");
> - case BT_STATE_CAPABILITIES_BEGIN: return("CAP_BEGIN");
> - case BT_STATE_CAPABILITIES_END: return("CAP_END");
> }
> return("BAD STATE");
> }
> @@ -185,7 +180,6 @@ static unsigned int bt_init_data(struct si_sm_data
> *bt, struct si_sm_io *io)
> bt->complete = BT_STATE_IDLE; /* end here */
> bt->BT_CAP_req2rsp = BT_NORMAL_TIMEOUT * USEC_PER_SEC;
> bt->BT_CAP_retries = BT_NORMAL_RETRY_LIMIT;
> - /* BT_CAP_outreqs == zero is a flag to read BT Capabilities */
> return 3; /* We claim 3 bytes of space; ought to check SPMI table
> */
> }
>
> @@ -447,7 +441,7 @@ static enum si_sm_result error_recovery(struct
> si_sm_data *bt,
>
> static enum si_sm_result bt_event(struct si_sm_data *bt, long time)
> {
> - unsigned char status, BT_CAP[8];
> + unsigned char status;
> static enum bt_states last_printed = BT_STATE_PRINTME;
> int i;
>
> @@ -500,12 +494,6 @@ static enum si_sm_result bt_event(struct si_sm_data
> *bt, long time)
> if (status & BT_H_BUSY) /* clear a leftover H_BUSY */
> BT_CONTROL(BT_H_BUSY);
>
> - bt->timeout = bt->BT_CAP_req2rsp;
> -
> - /* Read BT capabilities if it hasn't been done yet */
> - if (!bt->BT_CAP_outreqs)
> - BT_STATE_CHANGE(BT_STATE_CAPABILITIES_BEGIN,
> - SI_SM_CALL_WITHOUT_DELAY);
> BT_SI_SM_RETURN(SI_SM_IDLE);
>
> case BT_STATE_XACTION_START:
> @@ -610,37 +598,6 @@ static enum si_sm_result bt_event(struct si_sm_data
> *bt, long time)
> BT_STATE_CHANGE(BT_STATE_XACTION_START,
> SI_SM_CALL_WITH_DELAY);
>
> - /*
> - * Get BT Capabilities, using timing of upper level state machine.
> - * Set outreqs to prevent infinite loop on timeout.
> - */
> - case BT_STATE_CAPABILITIES_BEGIN:
> - bt->BT_CAP_outreqs = 1;
> - {
> - unsigned char GetBT_CAP[] = { 0x18, 0x36 };
> - bt->state = BT_STATE_IDLE;
> - bt_start_transaction(bt, GetBT_CAP, sizeof(GetBT_CAP));
> - }
> - bt->complete = BT_STATE_CAPABILITIES_END;
> - BT_STATE_CHANGE(BT_STATE_XACTION_START,
> - SI_SM_CALL_WITH_DELAY);
> -
> - case BT_STATE_CAPABILITIES_END:
> - i = bt_get_result(bt, BT_CAP, sizeof(BT_CAP));
> - bt_init_data(bt, bt->io);
> - if ((i == 8) && !BT_CAP[2]) {
> - bt->BT_CAP_outreqs = BT_CAP[3];
> - bt->BT_CAP_req2rsp = BT_CAP[6] * USEC_PER_SEC;
> - bt->BT_CAP_retries = BT_CAP[7];
> - } else
> - pr_warn("IPMI BT: using default values\n");
> - if (!bt->BT_CAP_outreqs)
> - bt->BT_CAP_outreqs = 1;
> - pr_warn("IPMI BT: req2rsp=%ld secs retries=%d\n",
> - bt->BT_CAP_req2rsp / USEC_PER_SEC, bt->BT_CAP_retries);
> - bt->timeout = bt->BT_CAP_req2rsp;
> - return SI_SM_CALL_WITHOUT_DELAY;
> -
> default: /* should never occur */
> return error_recovery(bt,
> status,
> @@ -651,6 +608,11 @@ static enum si_sm_result bt_event(struct si_sm_data
> *bt, long time)
>
> static int bt_detect(struct si_sm_data *bt)
> {
> + unsigned char GetBT_CAP[] = { 0x18, 0x36 };
> + unsigned char BT_CAP[8];
> + enum si_sm_result smi_result;
> + int rv;
> +
> /*
> * It's impossible for the BT status and interrupt registers to be
> * all 1's, (assuming a properly functioning, self-initialized BMC)
> @@ -661,6 +623,48 @@ static int bt_detect(struct si_sm_data *bt)
> if ((BT_STATUS == 0xFF) && (BT_INTMASK_R == 0xFF))
> return 1;
> reset_flags(bt);
> +
> + /*
> + * Try getting the BT capabilities here.
> + */
> + rv = bt_start_transaction(bt, GetBT_CAP, sizeof(GetBT_CAP));
> + if (rv) {
> + dev_warn(bt->io->dev,
> + "Can't start capabilities transaction: %d\n", rv);
> + goto out_no_bt_cap;
> + }
> +
> + smi_result = SI_SM_CALL_WITHOUT_DELAY;
> + for (;;) {
> + if (smi_result == SI_SM_CALL_WITH_DELAY ||
> + smi_result == SI_SM_CALL_WITH_TICK_DELAY) {
> + schedule_timeout_uninterruptible(1);
> + smi_result = bt_event(bt, jiffies_to_usecs(1));
> + } else if (smi_result == SI_SM_CALL_WITHOUT_DELAY) {
> + smi_result = bt_event(bt, 0);
> + } else
> + break;
> + }
> +
> + rv = bt_get_result(bt, BT_CAP, sizeof(BT_CAP));
> + bt_init_data(bt, bt->io);
> + if (rv < 8) {
> + dev_warn(bt->io->dev, "bt cap response too short: %d\n", rv);
> + goto out_no_bt_cap;
> + }
> +
> + if (BT_CAP[2]) {
> + dev_warn(bt->io->dev, "Error fetching bt cap: %x\n",
> BT_CAP[2]);
> +out_no_bt_cap:
> + dev_warn(bt->io->dev, "using default values\n");
> + } else {
> + bt->BT_CAP_req2rsp = BT_CAP[6] * USEC_PER_SEC;
> + bt->BT_CAP_retries = BT_CAP[7];
> + }
> +
> + dev_info(bt->io->dev, "req2rsp=%ld secs retries=%d\n",
> + bt->BT_CAP_req2rsp / USEC_PER_SEC, bt->BT_CAP_retries);
> +
> return 0;
> }
>
> --
> 2.7.4


2018-08-27 20:49:23

by Corey Minyard

[permalink] [raw]
Subject: Re: [PATCH 1/2] ipmi: Move BT capabilities detection to the detect call

On 08/27/2018 03:16 PM, Banman, Andrew wrote:
> Tested-by: Andrew Banman
>
> Ran through 100+ boots with no error with your 1st patch applied. I don't see any endcases to worry about.
>

Thanks for the testing, it's queued for the next release.

-corey

> Thanks Corey!
>
> Andrew
>
>> -----Original Message-----
>> From: Corey Minyard [mailto:[email protected]] On Behalf Of
>> [email protected]
>> Sent: Thursday, August 23, 2018 3:37 PM
>> To: Banman, Andrew <[email protected]>
>> Cc: [email protected]; linux-
>> [email protected]; Anderson, Russ <[email protected]>; Ramsay,
>> Frank <[email protected]>; Ernst, Justin <[email protected]>;
>> Corey Minyard <[email protected]>
>> Subject: [PATCH 1/2] ipmi: Move BT capabilities detection to the detect
>> call
>>
>> From: Corey Minyard <[email protected]>
>>
>> The capabilities detection was being done as part of the normal
>> state machine, but it was possible for it to be running while
>> the upper layers of the IPMI driver were initializing the
>> device, resulting in error and failure to initialize.
>>
>> Move the capabilities detection to the the detect function,
>> so it's done before anything else runs on the device. This also
>> simplifies the state machine and removes some code, as a bonus.
>>
>> Signed-off-by: Corey Minyard <[email protected]>
>> Reported-by: Andrew Banman <[email protected]>
>> ---
>> drivers/char/ipmi/ipmi_bt_sm.c | 92 ++++++++++++++++++++++-------------
>> -------
>> 1 file changed, 48 insertions(+), 44 deletions(-)
>>
>> diff --git a/drivers/char/ipmi/ipmi_bt_sm.c
>> b/drivers/char/ipmi/ipmi_bt_sm.c
>> index cbc6126..b4133832e 100644
>> --- a/drivers/char/ipmi/ipmi_bt_sm.c
>> +++ b/drivers/char/ipmi/ipmi_bt_sm.c
>> @@ -59,8 +59,6 @@ enum bt_states {
>> BT_STATE_RESET3,
>> BT_STATE_RESTART,
>> BT_STATE_PRINTME,
>> - BT_STATE_CAPABILITIES_BEGIN,
>> - BT_STATE_CAPABILITIES_END,
>> BT_STATE_LONG_BUSY /* BT doesn't get hosed :-) */
>> };
>>
>> @@ -86,7 +84,6 @@ struct si_sm_data {
>> int error_retries; /* end of "common" fields */
>> int nonzero_status; /* hung BMCs stay all 0 */
>> enum bt_states complete; /* to divert the state machine */
>> - int BT_CAP_outreqs;
>> long BT_CAP_req2rsp;
>> int BT_CAP_retries; /* Recommended retries */
>> };
>> @@ -137,8 +134,6 @@ static char *state2txt(unsigned char state)
>> case BT_STATE_RESET3: return("RESET3");
>> case BT_STATE_RESTART: return("RESTART");
>> case BT_STATE_LONG_BUSY: return("LONG_BUSY");
>> - case BT_STATE_CAPABILITIES_BEGIN: return("CAP_BEGIN");
>> - case BT_STATE_CAPABILITIES_END: return("CAP_END");
>> }
>> return("BAD STATE");
>> }
>> @@ -185,7 +180,6 @@ static unsigned int bt_init_data(struct si_sm_data
>> *bt, struct si_sm_io *io)
>> bt->complete = BT_STATE_IDLE; /* end here */
>> bt->BT_CAP_req2rsp = BT_NORMAL_TIMEOUT * USEC_PER_SEC;
>> bt->BT_CAP_retries = BT_NORMAL_RETRY_LIMIT;
>> - /* BT_CAP_outreqs == zero is a flag to read BT Capabilities */
>> return 3; /* We claim 3 bytes of space; ought to check SPMI table
>> */
>> }
>>
>> @@ -447,7 +441,7 @@ static enum si_sm_result error_recovery(struct
>> si_sm_data *bt,
>>
>> static enum si_sm_result bt_event(struct si_sm_data *bt, long time)
>> {
>> - unsigned char status, BT_CAP[8];
>> + unsigned char status;
>> static enum bt_states last_printed = BT_STATE_PRINTME;
>> int i;
>>
>> @@ -500,12 +494,6 @@ static enum si_sm_result bt_event(struct si_sm_data
>> *bt, long time)
>> if (status & BT_H_BUSY) /* clear a leftover H_BUSY */
>> BT_CONTROL(BT_H_BUSY);
>>
>> - bt->timeout = bt->BT_CAP_req2rsp;
>> -
>> - /* Read BT capabilities if it hasn't been done yet */
>> - if (!bt->BT_CAP_outreqs)
>> - BT_STATE_CHANGE(BT_STATE_CAPABILITIES_BEGIN,
>> - SI_SM_CALL_WITHOUT_DELAY);
>> BT_SI_SM_RETURN(SI_SM_IDLE);
>>
>> case BT_STATE_XACTION_START:
>> @@ -610,37 +598,6 @@ static enum si_sm_result bt_event(struct si_sm_data
>> *bt, long time)
>> BT_STATE_CHANGE(BT_STATE_XACTION_START,
>> SI_SM_CALL_WITH_DELAY);
>>
>> - /*
>> - * Get BT Capabilities, using timing of upper level state machine.
>> - * Set outreqs to prevent infinite loop on timeout.
>> - */
>> - case BT_STATE_CAPABILITIES_BEGIN:
>> - bt->BT_CAP_outreqs = 1;
>> - {
>> - unsigned char GetBT_CAP[] = { 0x18, 0x36 };
>> - bt->state = BT_STATE_IDLE;
>> - bt_start_transaction(bt, GetBT_CAP, sizeof(GetBT_CAP));
>> - }
>> - bt->complete = BT_STATE_CAPABILITIES_END;
>> - BT_STATE_CHANGE(BT_STATE_XACTION_START,
>> - SI_SM_CALL_WITH_DELAY);
>> -
>> - case BT_STATE_CAPABILITIES_END:
>> - i = bt_get_result(bt, BT_CAP, sizeof(BT_CAP));
>> - bt_init_data(bt, bt->io);
>> - if ((i == 8) && !BT_CAP[2]) {
>> - bt->BT_CAP_outreqs = BT_CAP[3];
>> - bt->BT_CAP_req2rsp = BT_CAP[6] * USEC_PER_SEC;
>> - bt->BT_CAP_retries = BT_CAP[7];
>> - } else
>> - pr_warn("IPMI BT: using default values\n");
>> - if (!bt->BT_CAP_outreqs)
>> - bt->BT_CAP_outreqs = 1;
>> - pr_warn("IPMI BT: req2rsp=%ld secs retries=%d\n",
>> - bt->BT_CAP_req2rsp / USEC_PER_SEC, bt->BT_CAP_retries);
>> - bt->timeout = bt->BT_CAP_req2rsp;
>> - return SI_SM_CALL_WITHOUT_DELAY;
>> -
>> default: /* should never occur */
>> return error_recovery(bt,
>> status,
>> @@ -651,6 +608,11 @@ static enum si_sm_result bt_event(struct si_sm_data
>> *bt, long time)
>>
>> static int bt_detect(struct si_sm_data *bt)
>> {
>> + unsigned char GetBT_CAP[] = { 0x18, 0x36 };
>> + unsigned char BT_CAP[8];
>> + enum si_sm_result smi_result;
>> + int rv;
>> +
>> /*
>> * It's impossible for the BT status and interrupt registers to be
>> * all 1's, (assuming a properly functioning, self-initialized BMC)
>> @@ -661,6 +623,48 @@ static int bt_detect(struct si_sm_data *bt)
>> if ((BT_STATUS == 0xFF) && (BT_INTMASK_R == 0xFF))
>> return 1;
>> reset_flags(bt);
>> +
>> + /*
>> + * Try getting the BT capabilities here.
>> + */
>> + rv = bt_start_transaction(bt, GetBT_CAP, sizeof(GetBT_CAP));
>> + if (rv) {
>> + dev_warn(bt->io->dev,
>> + "Can't start capabilities transaction: %d\n", rv);
>> + goto out_no_bt_cap;
>> + }
>> +
>> + smi_result = SI_SM_CALL_WITHOUT_DELAY;
>> + for (;;) {
>> + if (smi_result == SI_SM_CALL_WITH_DELAY ||
>> + smi_result == SI_SM_CALL_WITH_TICK_DELAY) {
>> + schedule_timeout_uninterruptible(1);
>> + smi_result = bt_event(bt, jiffies_to_usecs(1));
>> + } else if (smi_result == SI_SM_CALL_WITHOUT_DELAY) {
>> + smi_result = bt_event(bt, 0);
>> + } else
>> + break;
>> + }
>> +
>> + rv = bt_get_result(bt, BT_CAP, sizeof(BT_CAP));
>> + bt_init_data(bt, bt->io);
>> + if (rv < 8) {
>> + dev_warn(bt->io->dev, "bt cap response too short: %d\n", rv);
>> + goto out_no_bt_cap;
>> + }
>> +
>> + if (BT_CAP[2]) {
>> + dev_warn(bt->io->dev, "Error fetching bt cap: %x\n",
>> BT_CAP[2]);
>> +out_no_bt_cap:
>> + dev_warn(bt->io->dev, "using default values\n");
>> + } else {
>> + bt->BT_CAP_req2rsp = BT_CAP[6] * USEC_PER_SEC;
>> + bt->BT_CAP_retries = BT_CAP[7];
>> + }
>> +
>> + dev_info(bt->io->dev, "req2rsp=%ld secs retries=%d\n",
>> + bt->BT_CAP_req2rsp / USEC_PER_SEC, bt->BT_CAP_retries);
>> +
>> return 0;
>> }
>>
>> --
>> 2.7.4