2014-07-22 23:02:41

by Ben Greear

[permalink] [raw]
Subject: [PATCH v3 1/5] ath10k: provide firmware crash info via debugfs.

From: Ben Greear <[email protected]>

Store the firmware crash registers and last 128 or so
firmware debug-log ids and present them to user-space
via debugfs.

Should help with figuring out why the firmware crashed.

Signed-off-by: Ben Greear <[email protected]>
---

changes from v3:

* Use 'data_lock' instead of a new lock.
* Move storage to the 'debug' object.
* Add new patch-5 that also dumps BSS regions if FW supports it.
* Fix compile problem when ath10k-debugfs is not enabled.

drivers/net/wireless/ath/ath10k/core.h | 71 ++++++++++++++
drivers/net/wireless/ath/ath10k/debug.c | 166 ++++++++++++++++++++++++++++++++
drivers/net/wireless/ath/ath10k/debug.h | 8 ++
drivers/net/wireless/ath/ath10k/hw.h | 24 +++++
drivers/net/wireless/ath/ath10k/pci.c | 106 +++++++++++++++++++-
drivers/net/wireless/ath/ath10k/pci.h | 3 -
6 files changed, 373 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h
index 83a5fa9..5c0c5bf 100644
--- a/drivers/net/wireless/ath/ath10k/core.h
+++ b/drivers/net/wireless/ath/ath10k/core.h
@@ -276,6 +276,70 @@ struct ath10k_vif_iter {
struct ath10k_vif *arvif;
};

+/**
+ * enum ath10k_fw_error_dump_type - types of data in the dump file
+ * @ATH10K_FW_ERROR_DUMP_DBGLOG: Recent firmware debug log entries
+ * @ATH10K_FW_ERROR_DUMP_REGDUMP: Register crash dump in binary format
+ */
+enum ath10k_fw_error_dump_type {
+ ATH10K_FW_ERROR_DUMP_DBGLOG = 0,
+ ATH10K_FW_ERROR_DUMP_REGDUMP = 1,
+
+ ATH10K_FW_ERROR_DUMP_MAX,
+};
+
+struct ath10k_tlv_dump_data {
+ u32 type; /* see ath10k_fw_error_dump_type above */
+ u32 tlv_len; /* in bytes */
+ u8 tlv_data[]; /* Pad to 32-bit boundaries as needed. */
+} __packed;
+
+struct ath10k_dump_file_data {
+ /* Dump file information */
+ char df_magic[16]; /* ATH10K-FW-DUMP */
+ u32 len;
+ u32 big_endian; /* 0x1 if host is big-endian */
+ u32 version; /* File dump version, 1 for now. */
+
+ /* Some info we can get from ath10k struct that might help. */
+ u32 chip_id;
+ u32 bus_type; /* 0 for now, in place for later hardware */
+ u32 target_version;
+ u32 fw_version_major;
+ u32 fw_version_minor;
+ u32 fw_version_release;
+ u32 fw_version_build;
+ u32 phy_capability;
+ u32 hw_min_tx_power;
+ u32 hw_max_tx_power;
+ u32 ht_cap_info;
+ u32 vht_cap_info;
+ u32 num_rf_chains;
+ char fw_ver[ETHTOOL_FWVERS_LEN]; /* Firmware version string */
+
+ /* Kernel related information */
+ u32 tv_sec_hi; /* time-of-day stamp, high 32-bits for seconds */
+ u32 tv_sec_lo; /* time-of-day stamp, low 32-bits for seconds */
+ u32 tv_nsec_hi; /* time-of-day stamp, nano-seconds, high bits */
+ u32 tv_nsec_lo; /* time-of-day stamp, nano-seconds, low bits */
+ u32 kernel_ver_code; /* LINUX_VERSION_CODE */
+ char kernel_ver[64]; /* VERMAGIC_STRING */
+
+ u8 unused[128]; /* Room for growth w/out changing binary format */
+
+ u8 data[0]; /* struct ath10k_tlv_dump_data + more */
+} __packed;
+
+/* This will store at least the last 128 entries. Each dbglog message
+ * is a max of 7 32-bit integers in length, but the length can be less
+ * than that as well.
+ */
+#define ATH10K_DBGLOG_DATA_LEN (128 * 7 * sizeof(u32))
+struct ath10k_dbglog_entry_storage {
+ u32 next_idx; /* Where to write next chunk of data */
+ u8 data[ATH10K_DBGLOG_DATA_LEN];
+};
+
struct ath10k_debug {
struct dentry *debugfs_phy;

@@ -293,6 +357,13 @@ struct ath10k_debug {

u8 htt_max_amsdu;
u8 htt_max_ampdu;
+
+ /* Used for crash-dump storage */
+ /* Don't over-write dump info until someone reads the data. */
+ /* Protected by data-lock */
+ bool crashed_since_read;
+ struct ath10k_dbglog_entry_storage dbglog_entry_data;
+ u32 reg_dump_values[REG_DUMP_COUNT_QCA988X];
};

enum ath10k_state {
diff --git a/drivers/net/wireless/ath/ath10k/debug.c b/drivers/net/wireless/ath/ath10k/debug.c
index c9e35c87e..b952775 100644
--- a/drivers/net/wireless/ath/ath10k/debug.c
+++ b/drivers/net/wireless/ath/ath10k/debug.c
@@ -17,6 +17,9 @@

#include <linux/module.h>
#include <linux/debugfs.h>
+#include <linux/version.h>
+#include <linux/vermagic.h>
+#include <linux/vmalloc.h>

#include "core.h"
#include "debug.h"
@@ -580,6 +583,166 @@ static const struct file_operations fops_chip_id = {
.llseek = default_llseek,
};

+void ath10k_dbg_save_fw_dbg_buffer(struct ath10k *ar, u8 *buffer, int len)
+{
+ int i;
+ int z = ar->debug.dbglog_entry_data.next_idx;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ar->data_lock, flags);
+
+ /* Don't save any new logs until user-space reads this. */
+ if (ar->debug.crashed_since_read)
+ goto exit;
+
+ for (i = 0; i < len; i++) {
+ ar->debug.dbglog_entry_data.data[z] = buffer[i];
+ z++;
+ if (z >= ATH10K_DBGLOG_DATA_LEN)
+ z = 0;
+ }
+
+ ar->debug.dbglog_entry_data.next_idx = z;
+exit:
+ spin_unlock_irqrestore(&ar->data_lock, flags);
+}
+EXPORT_SYMBOL(ath10k_dbg_save_fw_dbg_buffer);
+
+static struct ath10k_dump_file_data *ath10k_build_dump_file(struct ath10k *ar)
+{
+ unsigned int len;
+ unsigned int sofar = 0;
+ unsigned char *buf;
+ struct ath10k_tlv_dump_data *dump_tlv;
+ struct ath10k_dump_file_data *dump_data;
+ int hdr_len = sizeof(*dump_data);
+ unsigned long flags;
+ struct timespec timestamp;
+
+ len = hdr_len;
+ len += sizeof(*dump_tlv) + sizeof(ar->debug.reg_dump_values);
+ len += sizeof(*dump_tlv) + sizeof(ar->debug.dbglog_entry_data);
+
+ lockdep_assert_held(&ar->conf_mutex);
+
+ sofar += hdr_len;
+
+ /* This is going to get big when we start dumping FW RAM and such,
+ * so go ahead and use vmalloc.
+ */
+ buf = vmalloc(len);
+ if (!buf)
+ return NULL;
+
+ memset(buf, 0, len);
+ dump_data = (struct ath10k_dump_file_data *)(buf);
+ strlcpy(dump_data->df_magic, "ATH10K-FW-DUMP",
+ sizeof(dump_data->df_magic));
+ dump_data->len = len;
+#ifdef __BIG_ENDIAN
+ dump_data->big_endian = 1;
+#else
+ dump_data->big_endian = 0;
+#endif
+ dump_data->version = 1;
+ dump_data->chip_id = ar->chip_id;
+ dump_data->bus_type = 0;
+ dump_data->target_version = ar->target_version;
+ dump_data->fw_version_major = ar->fw_version_major;
+ dump_data->fw_version_minor = ar->fw_version_minor;
+ dump_data->fw_version_release = ar->fw_version_release;
+ dump_data->fw_version_build = ar->fw_version_build;
+ dump_data->phy_capability = ar->phy_capability;
+ dump_data->hw_min_tx_power = ar->hw_min_tx_power;
+ dump_data->hw_max_tx_power = ar->hw_max_tx_power;
+ dump_data->ht_cap_info = ar->ht_cap_info;
+ dump_data->vht_cap_info = ar->vht_cap_info;
+ dump_data->num_rf_chains = ar->num_rf_chains;
+
+ strlcpy(dump_data->fw_ver, ar->hw->wiphy->fw_version,
+ sizeof(dump_data->fw_ver));
+
+ dump_data->kernel_ver_code = LINUX_VERSION_CODE;
+ strlcpy(dump_data->kernel_ver, VERMAGIC_STRING,
+ sizeof(dump_data->kernel_ver));
+
+ getnstimeofday(&timestamp);
+ dump_data->tv_sec_hi = timestamp.tv_sec >> 32;
+ dump_data->tv_sec_lo = timestamp.tv_sec;
+ dump_data->tv_nsec_hi = timestamp.tv_nsec >> 32;
+ dump_data->tv_nsec_lo = timestamp.tv_nsec;
+
+ spin_lock_irqsave(&ar->data_lock, flags);
+
+ /* Gather dbg-log */
+ dump_tlv = (struct ath10k_tlv_dump_data *)(buf + sofar);
+ dump_tlv->type = ATH10K_FW_ERROR_DUMP_DBGLOG;
+ dump_tlv->tlv_len = sizeof(ar->debug.dbglog_entry_data);
+ memcpy(dump_tlv->tlv_data, &ar->debug.dbglog_entry_data, dump_tlv->tlv_len);
+ sofar += sizeof(*dump_tlv) + dump_tlv->tlv_len;
+
+ /* Gather crash-dump */
+ dump_tlv = (struct ath10k_tlv_dump_data *)(buf + sofar);
+ dump_tlv->type = ATH10K_FW_ERROR_DUMP_REGDUMP;
+ dump_tlv->tlv_len = sizeof(ar->debug.reg_dump_values);
+ memcpy(dump_tlv->tlv_data, &ar->debug.reg_dump_values, dump_tlv->tlv_len);
+ sofar += sizeof(*dump_tlv) + dump_tlv->tlv_len;
+
+ spin_unlock_irqrestore(&ar->data_lock, flags);
+
+ return dump_data;
+}
+
+static int ath10k_fw_error_dump_open(struct inode *inode, struct file *file)
+{
+ struct ath10k *ar = inode->i_private;
+ int ret;
+ struct ath10k_dump_file_data *dump;
+
+ mutex_lock(&ar->conf_mutex);
+
+ dump = ath10k_build_dump_file(ar);
+ if (!dump) {
+ ret = -ENODATA;
+ goto out;
+ }
+
+ file->private_data = dump;
+ ar->debug.crashed_since_read = false;
+ ret = 0;
+
+out:
+ mutex_unlock(&ar->conf_mutex);
+ return ret;
+}
+
+static ssize_t ath10k_fw_error_dump_read(struct file *file,
+ char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ struct ath10k_dump_file_data *dump_file = file->private_data;
+
+ return simple_read_from_buffer(user_buf, count, ppos,
+ dump_file,
+ dump_file->len);
+}
+
+static int ath10k_fw_error_dump_release(struct inode *inode,
+ struct file *file)
+{
+ vfree(file->private_data);
+
+ return 0;
+}
+
+static const struct file_operations fops_fw_error_dump = {
+ .open = ath10k_fw_error_dump_open,
+ .read = ath10k_fw_error_dump_read,
+ .release = ath10k_fw_error_dump_release,
+ .owner = THIS_MODULE,
+ .llseek = default_llseek,
+};
+
static int ath10k_debug_htt_stats_req(struct ath10k *ar)
{
u64 cookie;
@@ -933,6 +1096,9 @@ int ath10k_debug_create(struct ath10k *ar)
debugfs_create_file("simulate_fw_crash", S_IRUSR, ar->debug.debugfs_phy,
ar, &fops_simulate_fw_crash);

+ debugfs_create_file("fw_error_dump", S_IRUSR, ar->debug.debugfs_phy,
+ ar, &fops_fw_error_dump);
+
debugfs_create_file("chip_id", S_IRUSR, ar->debug.debugfs_phy,
ar, &fops_chip_id);

diff --git a/drivers/net/wireless/ath/ath10k/debug.h b/drivers/net/wireless/ath/ath10k/debug.h
index a582499..6e8f5f6 100644
--- a/drivers/net/wireless/ath/ath10k/debug.h
+++ b/drivers/net/wireless/ath/ath10k/debug.h
@@ -96,6 +96,8 @@ __printf(2, 3) void ath10k_dbg(enum ath10k_debug_mask mask,
void ath10k_dbg_dump(enum ath10k_debug_mask mask,
const char *msg, const char *prefix,
const void *buf, size_t len);
+void ath10k_dbg_save_fw_dbg_buffer(struct ath10k *ar, u8 *buffer, int len);
+
#else /* CONFIG_ATH10K_DEBUG */

static inline int ath10k_dbg(enum ath10k_debug_mask dbg_mask,
@@ -109,5 +111,11 @@ static inline void ath10k_dbg_dump(enum ath10k_debug_mask mask,
const void *buf, size_t len)
{
}
+
+static inline void ath10k_dbg_save_fw_dbg_buffer(struct ath10k *ar,
+ u8 *buffer, int len)
+{
+}
#endif /* CONFIG_ATH10K_DEBUG */
+
#endif /* _DEBUG_H_ */
diff --git a/drivers/net/wireless/ath/ath10k/hw.h b/drivers/net/wireless/ath/ath10k/hw.h
index 007e855..d4df3f0 100644
--- a/drivers/net/wireless/ath/ath10k/hw.h
+++ b/drivers/net/wireless/ath/ath10k/hw.h
@@ -38,6 +38,8 @@
/* includes also the null byte */
#define ATH10K_FIRMWARE_MAGIC "QCA-ATH10K"

+#define REG_DUMP_COUNT_QCA988X 60
+
struct ath10k_fw_ie {
__le32 id;
__le32 len;
@@ -361,4 +363,26 @@ enum ath10k_mcast2ucast_mode {

#define RTC_STATE_V_GET(x) (((x) & RTC_STATE_V_MASK) >> RTC_STATE_V_LSB)

+
+/* Target debug log related defines and structs */
+
+/* Target is 32-bit CPU, so we just use u32 for
+ * the pointers. The memory space is relative to the
+ * target, not the host.
+ */
+struct ath10k_fw_dbglog_buf {
+ u32 next; /* pointer to dblog_buf_s. */
+ u32 buffer; /* pointer to u8 buffer */
+ u32 bufsize;
+ u32 length;
+ u32 count;
+ u32 free;
+} __packed;
+
+struct ath10k_fw_dbglog_hdr {
+ u32 dbuf; /* pointer to dbglog_buf_s */
+ u32 dropped;
+} __packed;
+
+
#endif /* _HW_H_ */
diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c
index 06840d1..24ddc22 100644
--- a/drivers/net/wireless/ath/ath10k/pci.c
+++ b/drivers/net/wireless/ath/ath10k/pci.c
@@ -840,6 +840,12 @@ static void ath10k_pci_hif_dump_area(struct ath10k *ar)
u32 host_addr;
int ret;
u32 i;
+#ifdef CONFIG_ATH10K_DEBUGFS
+ struct ath10k_fw_dbglog_hdr dbg_hdr;
+ u32 dbufp; /* pointer in target memory space */
+ struct ath10k_fw_dbglog_buf dbuf;
+ u8 *buffer;
+#endif

ath10k_err("firmware crashed!\n");
ath10k_err("hardware name %s version 0x%x\n",
@@ -851,7 +857,7 @@ static void ath10k_pci_hif_dump_area(struct ath10k *ar)
&reg_dump_area, sizeof(u32));
if (ret) {
ath10k_err("failed to read FW dump area address: %d\n", ret);
- return;
+ goto exit;
}

ath10k_err("target register Dump Location: 0x%08X\n", reg_dump_area);
@@ -861,7 +867,7 @@ static void ath10k_pci_hif_dump_area(struct ath10k *ar)
REG_DUMP_COUNT_QCA988X * sizeof(u32));
if (ret != 0) {
ath10k_err("failed to read FW dump area: %d\n", ret);
- return;
+ goto exit;
}

BUILD_BUG_ON(REG_DUMP_COUNT_QCA988X % 4);
@@ -875,6 +881,102 @@ static void ath10k_pci_hif_dump_area(struct ath10k *ar)
reg_dump_values[i + 2],
reg_dump_values[i + 3]);

+#ifdef CONFIG_ATH10K_DEBUGFS
+ /* Dump the debug logs on the target */
+ host_addr = host_interest_item_address(HI_ITEM(hi_dbglog_hdr));
+ ret = ath10k_pci_diag_read_mem(ar, host_addr,
+ &reg_dump_area, sizeof(u32));
+ if (ret != 0) {
+ ath10k_warn("failed to read hi_dbglog_hdr: %d\n", ret);
+ goto exit_save_regs;
+ }
+
+ ret = ath10k_pci_diag_read_mem(ar, reg_dump_area,
+ &dbg_hdr, sizeof(dbg_hdr));
+ if (ret != 0) {
+ ath10k_err("failed to dump debug log area: %d (addr 0x%x)\n",
+ ret, reg_dump_area);
+ goto exit_save_regs;
+ }
+
+ ath10k_dbg(ATH10K_DBG_PCI,
+ "Debug Log Header, dbuf: 0x%x dropped: %i\n",
+ dbg_hdr.dbuf, dbg_hdr.dropped);
+ dbufp = dbg_hdr.dbuf;
+
+ /* i is for logging purposes and sanity check in case firmware buffers
+ * are corrupted and will not properly terminate the list.
+ * In standard firmware, it appears there are no more than 2
+ * buffers, so 10 should be safe upper limit even if firmware
+ * changes quite a bit.
+ */
+ i = 0;
+ while (dbufp && i < 10) {
+ ret = ath10k_pci_diag_read_mem(ar, dbufp,
+ &dbuf, sizeof(dbuf));
+ if (ret != 0) {
+ ath10k_err("failed to read debug log area: %d (addr 0x%x)\n",
+ ret, dbufp);
+ goto exit_save_regs;
+ }
+
+ /* We have a buffer of data */
+ ath10k_dbg(ATH10K_DBG_PCI,
+ "[%i] next: 0x%x buf: 0x%x sz: %i len: %i count: %i free: %i\n",
+ i, dbuf.next, dbuf.buffer, dbuf.bufsize, dbuf.length,
+ dbuf.count, dbuf.free);
+ if (dbuf.buffer == 0 || dbuf.length == 0)
+ goto next;
+
+ /* Pick arbitrary upper bound in case firmware is corrupted for
+ * whatever reason.
+ */
+ if (dbuf.length > 16000) {
+ ath10k_err("debuglog buf length is out of bounds: %d\n",
+ dbuf.length);
+ /* Do not trust the next pointer either... */
+ goto exit_save_regs;
+ }
+
+ buffer = kmalloc(dbuf.length, GFP_ATOMIC);
+
+ if (!buffer)
+ goto next;
+
+ ret = ath10k_pci_diag_read_mem(ar, dbuf.buffer, buffer,
+ dbuf.length);
+ if (ret != 0) {
+ ath10k_err("failed to read debug log buffer: %d (addr 0x%x)\n",
+ ret, dbuf.buffer);
+ kfree(buffer);
+ goto exit_save_regs;
+ }
+
+ ath10k_dbg_save_fw_dbg_buffer(ar, buffer, dbuf.length);
+ kfree(buffer);
+
+next:
+ dbufp = dbuf.next;
+ if (dbufp == dbg_hdr.dbuf) {
+ /* It is a circular buffer it seems, bail if next
+ * is head
+ */
+ break;
+ }
+ i++;
+ } /* While we have a debug buffer to read */
+
+exit_save_regs:
+ spin_lock_bh(&ar->data_lock);
+ if (!ar->debug.crashed_since_read) {
+ ar->debug.crashed_since_read = true;
+ memcpy(ar->debug.reg_dump_values, reg_dump_values,
+ sizeof(ar->debug.reg_dump_values));
+ }
+ spin_unlock_bh(&ar->data_lock);
+#endif /* debugfs */
+
+exit:
queue_work(ar->workqueue, &ar->restart_work);
}

diff --git a/drivers/net/wireless/ath/ath10k/pci.h b/drivers/net/wireless/ath/ath10k/pci.h
index 9401292..f72a7cd 100644
--- a/drivers/net/wireless/ath/ath10k/pci.h
+++ b/drivers/net/wireless/ath/ath10k/pci.h
@@ -23,9 +23,6 @@
#include "hw.h"
#include "ce.h"

-/* FW dump area */
-#define REG_DUMP_COUNT_QCA988X 60
-
/*
* maximum number of bytes that can be handled atomically by DiagRead/DiagWrite
*/
--
1.7.11.7



2014-07-23 12:45:25

by Ben Greear

[permalink] [raw]
Subject: Re: [PATCH v3 1/5] ath10k: provide firmware crash info via debugfs.



On 07/23/2014 01:25 AM, Michal Kazior wrote:
> On 23 July 2014 01:02, <[email protected]> wrote:
>> From: Ben Greear <[email protected]>
>>
>> Store the firmware crash registers and last 128 or so
>> firmware debug-log ids and present them to user-space
>> via debugfs.
>>
>> Should help with figuring out why the firmware crashed.
>>
>> Signed-off-by: Ben Greear <[email protected]>
> [...]
>> +struct ath10k_dump_file_data {
>> + /* Dump file information */
>> + char df_magic[16]; /* ATH10K-FW-DUMP */
>> + u32 len;
>> + u32 big_endian; /* 0x1 if host is big-endian */
>> + u32 version; /* File dump version, 1 for now. */
>> +
>> + /* Some info we can get from ath10k struct that might help. */
>> + u32 chip_id;
>> + u32 bus_type; /* 0 for now, in place for later hardware */
>> + u32 target_version;
>> + u32 fw_version_major;
>> + u32 fw_version_minor;
>> + u32 fw_version_release;
>> + u32 fw_version_build;
>> + u32 phy_capability;
>> + u32 hw_min_tx_power;
>> + u32 hw_max_tx_power;
>> + u32 ht_cap_info;
>> + u32 vht_cap_info;
>> + u32 num_rf_chains;
>> + char fw_ver[ETHTOOL_FWVERS_LEN]; /* Firmware version string */
>> +
>> + /* Kernel related information */
>> + u32 tv_sec_hi; /* time-of-day stamp, high 32-bits for seconds */
>> + u32 tv_sec_lo; /* time-of-day stamp, low 32-bits for seconds */
>> + u32 tv_nsec_hi; /* time-of-day stamp, nano-seconds, high bits */
>> + u32 tv_nsec_lo; /* time-of-day stamp, nano-seconds, low bits */
>> + u32 kernel_ver_code; /* LINUX_VERSION_CODE */
>> + char kernel_ver[64]; /* VERMAGIC_STRING */
>> +
>> + u8 unused[128]; /* Room for growth w/out changing binary format */
>> +
>> + u8 data[0]; /* struct ath10k_tlv_dump_data + more */
>> +} __packed;
>
> I think it would be a lot better if this format had a fixed endianess
> (__le/__be instead of u32). But see below.
>
>
>> +static struct ath10k_dump_file_data *ath10k_build_dump_file(struct ath10k *ar)
> [...]
>> + getnstimeofday(&timestamp);
>> + dump_data->tv_sec_hi = timestamp.tv_sec >> 32;
>> + dump_data->tv_sec_lo = timestamp.tv_sec;
>> + dump_data->tv_nsec_hi = timestamp.tv_nsec >> 32;
>> + dump_data->tv_nsec_lo = timestamp.tv_nsec;
>> +
>> + spin_lock_irqsave(&ar->data_lock, flags);
>
> You can't just use _irqsave with data_lock like that. You have to
> convert *all* data_lock usages from _bh to _irqsave.
>
> Is it really necessary to use the _irqsave here anyway? We can
> probably make sure dump function is never called directly from an
> interrupt. I think that's already the case anyway.

I can change to bh variant and make sure lockdep doesn't complain.

>> +/* Target debug log related defines and structs */
>> +
>> +/* Target is 32-bit CPU, so we just use u32 for
>> + * the pointers. The memory space is relative to the
>> + * target, not the host.
>> + */
>> +struct ath10k_fw_dbglog_buf {
>> + u32 next; /* pointer to dblog_buf_s. */
>> + u32 buffer; /* pointer to u8 buffer */
>> + u32 bufsize;
>> + u32 length;
>> + u32 count;
>> + u32 free;
>> +} __packed;
>> +
>> +struct ath10k_fw_dbglog_hdr {
>> + u32 dbuf; /* pointer to dbglog_buf_s */
>> + u32 dropped;
>> +} __packed;
>
> These structures are target device specific, right? Since the target
> is little-endian I'd think these structures should be defined as such
> (i.e. __le32 instead of u32).
>
> But then ath10k_pci_diag_read/write_mem() do byte-swapping
> automatically (hint: they shouldn't, but since it's my fault I guess I
> should be the one to fix it up.. :).
>
> This would also explain why you defined a big_endian bool in the dump
> structure instead of using __be/__le and explicit conversion. It's all
> good until you start dumping RAM and try to deal with non-word data
> (e.g. mac addresses).

This can all be handled in the decode tool, so I'd like to just keep the
kernel bit as is.

Thanks,
Ben


--
Ben Greear <[email protected]>
Candela Technologies Inc http://www.candelatech.com

2014-07-23 08:25:18

by Michal Kazior

[permalink] [raw]
Subject: Re: [PATCH v3 1/5] ath10k: provide firmware crash info via debugfs.

On 23 July 2014 01:02, <[email protected]> wrote:
> From: Ben Greear <[email protected]>
>
> Store the firmware crash registers and last 128 or so
> firmware debug-log ids and present them to user-space
> via debugfs.
>
> Should help with figuring out why the firmware crashed.
>
> Signed-off-by: Ben Greear <[email protected]>
[...]
> +struct ath10k_dump_file_data {
> + /* Dump file information */
> + char df_magic[16]; /* ATH10K-FW-DUMP */
> + u32 len;
> + u32 big_endian; /* 0x1 if host is big-endian */
> + u32 version; /* File dump version, 1 for now. */
> +
> + /* Some info we can get from ath10k struct that might help. */
> + u32 chip_id;
> + u32 bus_type; /* 0 for now, in place for later hardware */
> + u32 target_version;
> + u32 fw_version_major;
> + u32 fw_version_minor;
> + u32 fw_version_release;
> + u32 fw_version_build;
> + u32 phy_capability;
> + u32 hw_min_tx_power;
> + u32 hw_max_tx_power;
> + u32 ht_cap_info;
> + u32 vht_cap_info;
> + u32 num_rf_chains;
> + char fw_ver[ETHTOOL_FWVERS_LEN]; /* Firmware version string */
> +
> + /* Kernel related information */
> + u32 tv_sec_hi; /* time-of-day stamp, high 32-bits for seconds */
> + u32 tv_sec_lo; /* time-of-day stamp, low 32-bits for seconds */
> + u32 tv_nsec_hi; /* time-of-day stamp, nano-seconds, high bits */
> + u32 tv_nsec_lo; /* time-of-day stamp, nano-seconds, low bits */
> + u32 kernel_ver_code; /* LINUX_VERSION_CODE */
> + char kernel_ver[64]; /* VERMAGIC_STRING */
> +
> + u8 unused[128]; /* Room for growth w/out changing binary format */
> +
> + u8 data[0]; /* struct ath10k_tlv_dump_data + more */
> +} __packed;

I think it would be a lot better if this format had a fixed endianess
(__le/__be instead of u32). But see below.


> +static struct ath10k_dump_file_data *ath10k_build_dump_file(struct ath10k *ar)
[...]
> + getnstimeofday(&timestamp);
> + dump_data->tv_sec_hi = timestamp.tv_sec >> 32;
> + dump_data->tv_sec_lo = timestamp.tv_sec;
> + dump_data->tv_nsec_hi = timestamp.tv_nsec >> 32;
> + dump_data->tv_nsec_lo = timestamp.tv_nsec;
> +
> + spin_lock_irqsave(&ar->data_lock, flags);

You can't just use _irqsave with data_lock like that. You have to
convert *all* data_lock usages from _bh to _irqsave.

Is it really necessary to use the _irqsave here anyway? We can
probably make sure dump function is never called directly from an
interrupt. I think that's already the case anyway.


> +/* Target debug log related defines and structs */
> +
> +/* Target is 32-bit CPU, so we just use u32 for
> + * the pointers. The memory space is relative to the
> + * target, not the host.
> + */
> +struct ath10k_fw_dbglog_buf {
> + u32 next; /* pointer to dblog_buf_s. */
> + u32 buffer; /* pointer to u8 buffer */
> + u32 bufsize;
> + u32 length;
> + u32 count;
> + u32 free;
> +} __packed;
> +
> +struct ath10k_fw_dbglog_hdr {
> + u32 dbuf; /* pointer to dbglog_buf_s */
> + u32 dropped;
> +} __packed;

These structures are target device specific, right? Since the target
is little-endian I'd think these structures should be defined as such
(i.e. __le32 instead of u32).

But then ath10k_pci_diag_read/write_mem() do byte-swapping
automatically (hint: they shouldn't, but since it's my fault I guess I
should be the one to fix it up.. :).

This would also explain why you defined a big_endian bool in the dump
structure instead of using __be/__le and explicit conversion. It's all
good until you start dumping RAM and try to deal with non-word data
(e.g. mac addresses).


MichaƂ

2014-07-22 23:02:45

by Ben Greear

[permalink] [raw]
Subject: [PATCH v3 5/5] ath10k: save firmware RAM and ROM BSS sections on crash.

From: Ben Greear <[email protected]>

This can be used to get a useful back trace out of a firmware
crash that involves an interrupt handler. For instance, a
null-pointer-exception would be this kind of trace. A user-space
tool can read the debugfs file and decode things as wished.

This requires a packaged firmware with a new IE to describe the
BSS section starts and length.

Signed-off-by: Ben Greear <[email protected]>
---
drivers/net/wireless/ath/ath10k/core.c | 62 +++++++++++++++++++++++++++++++++
drivers/net/wireless/ath/ath10k/core.h | 11 ++++++
drivers/net/wireless/ath/ath10k/debug.c | 32 +++++++++++++++++
drivers/net/wireless/ath/ath10k/hw.h | 1 +
drivers/net/wireless/ath/ath10k/pci.c | 37 ++++++++++++++++++++
5 files changed, 143 insertions(+)

diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c
index 93adb8c..08e96e7 100644
--- a/drivers/net/wireless/ath/ath10k/core.c
+++ b/drivers/net/wireless/ath/ath10k/core.c
@@ -365,6 +365,12 @@ static int ath10k_core_fetch_firmware_api_n(struct ath10k *ar, const char *name)
struct ath10k_fw_ie *hdr;
const u8 *data;
__le32 *timestamp;
+#ifdef CONFIG_ATH10K_DEBUGFS
+ u32 old_ram_bss_len = ar->debug.fw_ram_bss_len;
+ u32 old_rom_bss_len = ar->debug.fw_rom_bss_len;
+ unsigned char *t1, *t2;
+ unsigned long flags;
+#endif

/* first fetch the firmware file (firmware-*.bin) */
ar->firmware = ath10k_fetch_fw_file(ar, ar->hw_params.fw.dir, name);
@@ -479,6 +485,38 @@ static int ath10k_core_fetch_firmware_api_n(struct ath10k *ar, const char *name)
ar->otp_len = ie_len;

break;
+ case ATH10K_FW_IE_BSS_INFO:
+#ifdef CONFIG_ATH10K_DEBUGFS
+ spin_lock_irqsave(&ar->data_lock, flags);
+
+ ar->debug.fw_ram_bss_addr = le32_to_cpu(((u32 *)(data))[0]);
+ ar->debug.fw_ram_bss_len = le32_to_cpu(((u32 *)(data))[1]);
+ ar->debug.fw_rom_bss_addr = le32_to_cpu(((u32 *)(data))[2]);
+ ar->debug.fw_rom_bss_len = le32_to_cpu(((u32 *)(data))[3]);
+ t1 = t2 = NULL;
+ if ((old_ram_bss_len < ar->debug.fw_ram_bss_len) &&
+ ar->debug.ram_bss_buf) {
+ t1 = ar->debug.ram_bss_buf;
+ ar->debug.ram_bss_buf = NULL;
+ }
+ if ((old_rom_bss_len < ar->debug.fw_rom_bss_len) &&
+ ar->debug.rom_bss_buf) {
+ t2 = ar->debug.rom_bss_buf;
+ ar->debug.rom_bss_buf = NULL;
+ }
+ spin_unlock_irqrestore(&ar->data_lock, flags);
+ if (t1)
+ vfree(t1);
+ if (t2)
+ vfree(t2);
+ ath10k_dbg(ATH10K_DBG_BOOT,
+ "found FW bss info, RAM: addr 0x%x len 0x%x ROM: addr 0x%x len 0x%x\n",
+ ar->debug.fw_ram_bss_addr,
+ ar->debug.fw_ram_bss_len,
+ ar->debug.fw_rom_bss_addr,
+ ar->debug.fw_rom_bss_len);
+#endif
+ break;
default:
ath10k_warn("Unknown FW IE: %u\n",
le32_to_cpu(hdr->id));
@@ -1094,9 +1132,33 @@ EXPORT_SYMBOL(ath10k_core_create);

void ath10k_core_destroy(struct ath10k *ar)
{
+#ifdef CONFIG_ATH10K_DEBUGFS
+ unsigned long flags;
+ unsigned char *t1 = NULL;
+ unsigned char *t2 = NULL;
+#endif
+
flush_workqueue(ar->workqueue);
destroy_workqueue(ar->workqueue);

+#ifdef CONFIG_ATH10K_DEBUGFS
+ spin_lock_irqsave(&ar->data_lock, flags);
+ if (ar->debug.rom_bss_buf) {
+ t1 = ar->debug.rom_bss_buf;
+ ar->debug.rom_bss_buf = NULL;
+ }
+ if (ar->debug.ram_bss_buf) {
+ t2 = ar->debug.ram_bss_buf;
+ ar->debug.ram_bss_buf = NULL;
+ }
+ spin_unlock_irqrestore(&ar->data_lock, flags);
+
+ if (t1)
+ vfree(t1);
+ if (t2)
+ vfree(t2);
+#endif
+
ath10k_mac_destroy(ar);
}
EXPORT_SYMBOL(ath10k_core_destroy);
diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h
index f122eae..f44957f 100644
--- a/drivers/net/wireless/ath/ath10k/core.h
+++ b/drivers/net/wireless/ath/ath10k/core.h
@@ -282,12 +282,16 @@ struct ath10k_vif_iter {
* @ATH10K_FW_ERROR_DUMP_REGDUMP: Register crash dump in binary format
* @ATH10K_FW_ERROR_DUMP_STACK: Stack memory contents.
* @ATH10K_FW_ERROR_DUMP_EXC_STACK: Exception stack contents
+ * @ATH10K_FW_ERROR_DUMP_RAM_BSS: BSS area for RAM code
+ * @ATH10K_FW_ERROR_DUMP_ROM_BSS: BSS area for ROM code
*/
enum ath10k_fw_error_dump_type {
ATH10K_FW_ERROR_DUMP_DBGLOG = 0,
ATH10K_FW_ERROR_DUMP_REGDUMP = 1,
ATH10K_FW_ERROR_DUMP_STACK = 2,
ATH10K_FW_ERROR_DUMP_EXC_STACK = 3,
+ ATH10K_FW_ERROR_DUMP_RAM_BSS = 4,
+ ATH10K_FW_ERROR_DUMP_ROM_BSS = 5,

ATH10K_FW_ERROR_DUMP_MAX,
};
@@ -370,6 +374,13 @@ struct ath10k_debug {
u32 reg_dump_values[REG_DUMP_COUNT_QCA988X];
unsigned char stack_buf[ATH10K_FW_STACK_SIZE];
unsigned char exc_stack_buf[ATH10K_FW_STACK_SIZE];
+
+ unsigned char *rom_bss_buf;
+ unsigned char *ram_bss_buf;
+ unsigned int fw_ram_bss_addr;
+ unsigned int fw_ram_bss_len;
+ unsigned int fw_rom_bss_addr;
+ unsigned int fw_rom_bss_len;
};

enum ath10k_state {
diff --git a/drivers/net/wireless/ath/ath10k/debug.c b/drivers/net/wireless/ath/ath10k/debug.c
index 1cbc175..450b31d 100644
--- a/drivers/net/wireless/ath/ath10k/debug.c
+++ b/drivers/net/wireless/ath/ath10k/debug.c
@@ -618,6 +618,8 @@ static struct ath10k_dump_file_data *ath10k_build_dump_file(struct ath10k *ar)
int hdr_len = sizeof(*dump_data);
unsigned long flags;
struct timespec timestamp;
+ int rom_bss_len = 0;
+ int ram_bss_len = 0;

len = hdr_len;
len += sizeof(*dump_tlv) + sizeof(ar->debug.reg_dump_values);
@@ -625,6 +627,19 @@ static struct ath10k_dump_file_data *ath10k_build_dump_file(struct ath10k *ar)
len += sizeof(*dump_tlv) + sizeof(ar->debug.stack_buf);
len += sizeof(*dump_tlv) + sizeof(ar->debug.exc_stack_buf);

+ /* Need to store local copies to detect races */
+ spin_lock_irqsave(&ar->data_lock, flags);
+
+ ram_bss_len = ar->debug.fw_ram_bss_len;
+ rom_bss_len = ar->debug.fw_ram_bss_len;
+
+ if (ram_bss_len && ar->debug.ram_bss_buf)
+ len += sizeof(*dump_tlv) + ram_bss_len;
+ if (rom_bss_len && ar->debug.rom_bss_buf)
+ len += sizeof(*dump_tlv) + rom_bss_len;
+
+ spin_unlock_irqrestore(&ar->data_lock, flags);
+
lockdep_assert_held(&ar->conf_mutex);

sofar += hdr_len;
@@ -704,6 +719,23 @@ static struct ath10k_dump_file_data *ath10k_build_dump_file(struct ath10k *ar)
memcpy(dump_tlv->tlv_data, &ar->debug.exc_stack_buf, dump_tlv->tlv_len);
sofar += sizeof(*dump_tlv) + dump_tlv->tlv_len;

+ if (ram_bss_len >= ar->debug.fw_ram_bss_len &&
+ ar->debug.fw_ram_bss_len && ar->debug.ram_bss_buf) {
+ dump_tlv = (struct ath10k_tlv_dump_data *)(buf + sofar);
+ dump_tlv->type = ATH10K_FW_ERROR_DUMP_RAM_BSS;
+ dump_tlv->tlv_len = ar->debug.fw_ram_bss_len;
+ memcpy(dump_tlv->tlv_data, ar->debug.ram_bss_buf, dump_tlv->tlv_len);
+ sofar += sizeof(*dump_tlv) + dump_tlv->tlv_len;
+ }
+ if (rom_bss_len >= ar->debug.fw_rom_bss_len &&
+ ar->debug.fw_rom_bss_len && ar->debug.rom_bss_buf) {
+ dump_tlv = (struct ath10k_tlv_dump_data *)(buf + sofar);
+ dump_tlv->type = ATH10K_FW_ERROR_DUMP_ROM_BSS;
+ dump_tlv->tlv_len = ar->debug.fw_rom_bss_len;
+ memcpy(dump_tlv->tlv_data, ar->debug.rom_bss_buf, dump_tlv->tlv_len);
+ sofar += sizeof(*dump_tlv) + dump_tlv->tlv_len;
+ }
+
spin_unlock_irqrestore(&ar->data_lock, flags);

WARN_ON(sofar != len);
diff --git a/drivers/net/wireless/ath/ath10k/hw.h b/drivers/net/wireless/ath/ath10k/hw.h
index 38a03e4..724bd94 100644
--- a/drivers/net/wireless/ath/ath10k/hw.h
+++ b/drivers/net/wireless/ath/ath10k/hw.h
@@ -53,6 +53,7 @@ enum ath10k_fw_ie_type {
ATH10K_FW_IE_FEATURES = 2,
ATH10K_FW_IE_FW_IMAGE = 3,
ATH10K_FW_IE_OTP_IMAGE = 4,
+ ATH10K_FW_IE_BSS_INFO = 5,
};

/* Known pecularities:
diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c
index d298e14..6d43845 100644
--- a/drivers/net/wireless/ath/ath10k/pci.c
+++ b/drivers/net/wireless/ath/ath10k/pci.c
@@ -872,6 +872,37 @@ static void ath10k_save_firmware_exc_stack(struct ath10k *ar)
ar->debug.exc_stack_buf,
HI_ITEM(hi_err_stack));
}
+
+static void ath10k_check_dump_fw_bss(struct ath10k *ar, u32 addr, u32 len,
+ bool is_rom) {
+ int ret;
+
+ if (!addr)
+ return;
+ if (is_rom) {
+ if (!ar->debug.rom_bss_buf)
+ ar->debug.rom_bss_buf = kmalloc(len, GFP_ATOMIC);
+ if (!ar->debug.rom_bss_buf)
+ return;
+
+ ret = ath10k_pci_diag_read_mem(ar, addr, ar->debug.rom_bss_buf,
+ len);
+ if (ret != 0)
+ ath10k_err("failed to read FW ROM BSS memory: %d (addr %d sz %d)\n",
+ ret, addr, len);
+ } else {
+ if (!ar->debug.ram_bss_buf)
+ ar->debug.ram_bss_buf = kmalloc(len, GFP_ATOMIC);
+ if (!ar->debug.ram_bss_buf)
+ return;
+
+ ret = ath10k_pci_diag_read_mem(ar, addr, ar->debug.ram_bss_buf,
+ len);
+ if (ret != 0)
+ ath10k_err("failed to read FW RAM BSS memory: %d (addr %d sz %d)\n",
+ ret, addr, len);
+ }
+}
#endif

static void ath10k_pci_hif_dump_area(struct ath10k *ar)
@@ -928,6 +959,12 @@ static void ath10k_pci_hif_dump_area(struct ath10k *ar)
if (!ar->debug.crashed_since_read) {
ath10k_save_firmware_stack(ar);
ath10k_save_firmware_exc_stack(ar);
+
+ /* And the RAM BSS region, if we know it. */
+ ath10k_check_dump_fw_bss(ar, ar->debug.fw_ram_bss_addr,
+ ar->debug.fw_ram_bss_len, false);
+ ath10k_check_dump_fw_bss(ar, ar->debug.fw_rom_bss_addr,
+ ar->debug.fw_rom_bss_len, true);
}

spin_unlock_bh(&ar->data_lock);
--
1.7.11.7


2014-07-23 08:34:53

by Emmanuel Grumbach

[permalink] [raw]
Subject: Re: [PATCH v3 1/5] ath10k: provide firmware crash info via debugfs.

On Wed, Jul 23, 2014 at 11:25 AM, Michal Kazior <[email protected]> wrote:
> On 23 July 2014 01:02, <[email protected]> wrote:
>> From: Ben Greear <[email protected]>
>>
>> Store the firmware crash registers and last 128 or so
>> firmware debug-log ids and present them to user-space
>> via debugfs.
>>
>> Should help with figuring out why the firmware crashed.
>>

Do we plan to have a common place for all the drivers?
I'd suggest to have mac80211 / cfg80211 link to a file created by the
driver. This would allow us to have this file even if we haven't
registered to mac80211 yet - although it'd be under a different place.
What do you think?

2014-07-22 23:02:42

by Ben Greear

[permalink] [raw]
Subject: [PATCH v3 2/5] ath10k: save firmware debug log messages.

From: Ben Greear <[email protected]>

They may be dumped through the firmware dump debugfs
file.

Signed-off-by: Ben Greear <[email protected]>
---
drivers/net/wireless/ath/ath10k/wmi.c | 6 ++++++
1 file changed, 6 insertions(+)

diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c
index 6f83cae..df62115 100644
--- a/drivers/net/wireless/ath/ath10k/wmi.c
+++ b/drivers/net/wireless/ath/ath10k/wmi.c
@@ -1089,6 +1089,12 @@ static int ath10k_wmi_event_debug_mesg(struct ath10k *ar, struct sk_buff *skb)

trace_ath10k_wmi_dbglog(skb->data, skb->len);

+ /* First 4 bytes are a messages-dropped-due-to-overflow counter,
+ * and should not be recorded in the dbglog buffer, so we skip
+ * them.
+ */
+ ath10k_dbg_save_fw_dbg_buffer(ar, skb->data + 4, skb->len - 4);
+
return 0;
}

--
1.7.11.7


2014-07-22 23:02:44

by Ben Greear

[permalink] [raw]
Subject: [PATCH v3 4/5] ath10k: Dump exception stack contents on firmware crash.

From: Ben Greear <[email protected]>

Firmware developers can decode this and maybe figure out
why the firmware crashed.

Signed-off-by: Ben Greear <[email protected]>
---
drivers/net/wireless/ath/ath10k/core.h | 3 +++
drivers/net/wireless/ath/ath10k/debug.c | 8 ++++++++
drivers/net/wireless/ath/ath10k/pci.c | 13 +++++++++++--
3 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h
index 2f200f6..f122eae 100644
--- a/drivers/net/wireless/ath/ath10k/core.h
+++ b/drivers/net/wireless/ath/ath10k/core.h
@@ -281,11 +281,13 @@ struct ath10k_vif_iter {
* @ATH10K_FW_ERROR_DUMP_DBGLOG: Recent firmware debug log entries
* @ATH10K_FW_ERROR_DUMP_REGDUMP: Register crash dump in binary format
* @ATH10K_FW_ERROR_DUMP_STACK: Stack memory contents.
+ * @ATH10K_FW_ERROR_DUMP_EXC_STACK: Exception stack contents
*/
enum ath10k_fw_error_dump_type {
ATH10K_FW_ERROR_DUMP_DBGLOG = 0,
ATH10K_FW_ERROR_DUMP_REGDUMP = 1,
ATH10K_FW_ERROR_DUMP_STACK = 2,
+ ATH10K_FW_ERROR_DUMP_EXC_STACK = 3,

ATH10K_FW_ERROR_DUMP_MAX,
};
@@ -367,6 +369,7 @@ struct ath10k_debug {
struct ath10k_dbglog_entry_storage dbglog_entry_data;
u32 reg_dump_values[REG_DUMP_COUNT_QCA988X];
unsigned char stack_buf[ATH10K_FW_STACK_SIZE];
+ unsigned char exc_stack_buf[ATH10K_FW_STACK_SIZE];
};

enum ath10k_state {
diff --git a/drivers/net/wireless/ath/ath10k/debug.c b/drivers/net/wireless/ath/ath10k/debug.c
index befdc60..1cbc175 100644
--- a/drivers/net/wireless/ath/ath10k/debug.c
+++ b/drivers/net/wireless/ath/ath10k/debug.c
@@ -623,6 +623,7 @@ static struct ath10k_dump_file_data *ath10k_build_dump_file(struct ath10k *ar)
len += sizeof(*dump_tlv) + sizeof(ar->debug.reg_dump_values);
len += sizeof(*dump_tlv) + sizeof(ar->debug.dbglog_entry_data);
len += sizeof(*dump_tlv) + sizeof(ar->debug.stack_buf);
+ len += sizeof(*dump_tlv) + sizeof(ar->debug.exc_stack_buf);

lockdep_assert_held(&ar->conf_mutex);

@@ -696,6 +697,13 @@ static struct ath10k_dump_file_data *ath10k_build_dump_file(struct ath10k *ar)
memcpy(dump_tlv->tlv_data, &ar->debug.stack_buf, dump_tlv->tlv_len);
sofar += sizeof(*dump_tlv) + dump_tlv->tlv_len;

+ /* Gather firmware exception (irq) stack dump */
+ dump_tlv = (struct ath10k_tlv_dump_data *)(buf + sofar);
+ dump_tlv->type = ATH10K_FW_ERROR_DUMP_EXC_STACK;
+ dump_tlv->tlv_len = sizeof(ar->debug.exc_stack_buf);
+ memcpy(dump_tlv->tlv_data, &ar->debug.exc_stack_buf, dump_tlv->tlv_len);
+ sofar += sizeof(*dump_tlv) + dump_tlv->tlv_len;
+
spin_unlock_irqrestore(&ar->data_lock, flags);

WARN_ON(sofar != len);
diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c
index b58f1a1..d298e14 100644
--- a/drivers/net/wireless/ath/ath10k/pci.c
+++ b/drivers/net/wireless/ath/ath10k/pci.c
@@ -864,8 +864,15 @@ static void ath10k_save_firmware_stack(struct ath10k *ar)
ath10k_save_firmware_mem_helper(ar, ATH10K_FW_STACK_SIZE,
ar->debug.stack_buf, HI_ITEM(hi_stack));
}
-#endif

+/* Save the firmware exception stack */
+static void ath10k_save_firmware_exc_stack(struct ath10k *ar)
+{
+ ath10k_save_firmware_mem_helper(ar, ATH10K_FW_STACK_SIZE,
+ ar->debug.exc_stack_buf,
+ HI_ITEM(hi_err_stack));
+}
+#endif

static void ath10k_pci_hif_dump_area(struct ath10k *ar)
{
@@ -918,8 +925,10 @@ static void ath10k_pci_hif_dump_area(struct ath10k *ar)
#ifdef CONFIG_ATH10K_DEBUGFS
spin_lock_bh(&ar->data_lock);

- if (!ar->debug.crashed_since_read)
+ if (!ar->debug.crashed_since_read) {
ath10k_save_firmware_stack(ar);
+ ath10k_save_firmware_exc_stack(ar);
+ }

spin_unlock_bh(&ar->data_lock);

--
1.7.11.7


2014-07-22 23:02:43

by Ben Greear

[permalink] [raw]
Subject: [PATCH v3 3/5] ath10k: save firmware stack upon firmware crash.

From: Ben Greear <[email protected]>

Should help debug firmware crashes, and give users a way
to provide some useful debug reports to firmware developers.

Signed-off-by: Ben Greear <[email protected]>
---
drivers/net/wireless/ath/ath10k/core.h | 3 +++
drivers/net/wireless/ath/ath10k/debug.c | 9 ++++++++
drivers/net/wireless/ath/ath10k/hw.h | 1 +
drivers/net/wireless/ath/ath10k/pci.c | 41 +++++++++++++++++++++++++++++++++
4 files changed, 54 insertions(+)

diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h
index 5c0c5bf..2f200f6 100644
--- a/drivers/net/wireless/ath/ath10k/core.h
+++ b/drivers/net/wireless/ath/ath10k/core.h
@@ -280,10 +280,12 @@ struct ath10k_vif_iter {
* enum ath10k_fw_error_dump_type - types of data in the dump file
* @ATH10K_FW_ERROR_DUMP_DBGLOG: Recent firmware debug log entries
* @ATH10K_FW_ERROR_DUMP_REGDUMP: Register crash dump in binary format
+ * @ATH10K_FW_ERROR_DUMP_STACK: Stack memory contents.
*/
enum ath10k_fw_error_dump_type {
ATH10K_FW_ERROR_DUMP_DBGLOG = 0,
ATH10K_FW_ERROR_DUMP_REGDUMP = 1,
+ ATH10K_FW_ERROR_DUMP_STACK = 2,

ATH10K_FW_ERROR_DUMP_MAX,
};
@@ -364,6 +366,7 @@ struct ath10k_debug {
bool crashed_since_read;
struct ath10k_dbglog_entry_storage dbglog_entry_data;
u32 reg_dump_values[REG_DUMP_COUNT_QCA988X];
+ unsigned char stack_buf[ATH10K_FW_STACK_SIZE];
};

enum ath10k_state {
diff --git a/drivers/net/wireless/ath/ath10k/debug.c b/drivers/net/wireless/ath/ath10k/debug.c
index b952775..befdc60 100644
--- a/drivers/net/wireless/ath/ath10k/debug.c
+++ b/drivers/net/wireless/ath/ath10k/debug.c
@@ -622,6 +622,7 @@ static struct ath10k_dump_file_data *ath10k_build_dump_file(struct ath10k *ar)
len = hdr_len;
len += sizeof(*dump_tlv) + sizeof(ar->debug.reg_dump_values);
len += sizeof(*dump_tlv) + sizeof(ar->debug.dbglog_entry_data);
+ len += sizeof(*dump_tlv) + sizeof(ar->debug.stack_buf);

lockdep_assert_held(&ar->conf_mutex);

@@ -688,8 +689,16 @@ static struct ath10k_dump_file_data *ath10k_build_dump_file(struct ath10k *ar)
memcpy(dump_tlv->tlv_data, &ar->debug.reg_dump_values, dump_tlv->tlv_len);
sofar += sizeof(*dump_tlv) + dump_tlv->tlv_len;

+ /* Gather firmware stack dump */
+ dump_tlv = (struct ath10k_tlv_dump_data *)(buf + sofar);
+ dump_tlv->type = ATH10K_FW_ERROR_DUMP_STACK;
+ dump_tlv->tlv_len = sizeof(ar->debug.stack_buf);
+ memcpy(dump_tlv->tlv_data, &ar->debug.stack_buf, dump_tlv->tlv_len);
+ sofar += sizeof(*dump_tlv) + dump_tlv->tlv_len;
+
spin_unlock_irqrestore(&ar->data_lock, flags);

+ WARN_ON(sofar != len);
return dump_data;
}

diff --git a/drivers/net/wireless/ath/ath10k/hw.h b/drivers/net/wireless/ath/ath10k/hw.h
index d4df3f0..38a03e4 100644
--- a/drivers/net/wireless/ath/ath10k/hw.h
+++ b/drivers/net/wireless/ath/ath10k/hw.h
@@ -39,6 +39,7 @@
#define ATH10K_FIRMWARE_MAGIC "QCA-ATH10K"

#define REG_DUMP_COUNT_QCA988X 60
+#define ATH10K_FW_STACK_SIZE 4096

struct ath10k_fw_ie {
__le32 id;
diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c
index 24ddc22..b58f1a1 100644
--- a/drivers/net/wireless/ath/ath10k/pci.c
+++ b/drivers/net/wireless/ath/ath10k/pci.c
@@ -833,6 +833,40 @@ static u16 ath10k_pci_hif_get_free_queue_number(struct ath10k *ar, u8 pipe)
return ath10k_ce_num_free_src_entries(ar_pci->pipe_info[pipe].ce_hdl);
}

+static void ath10k_save_firmware_mem_helper(struct ath10k *ar, u32 sz,
+ unsigned char *to, u32 addr)
+{
+ u32 host_addr;
+ u32 reg_dump_area;
+ int ret;
+
+ host_addr = host_interest_item_address(addr);
+
+ ret = ath10k_pci_diag_read_mem(ar, host_addr, &reg_dump_area,
+ sizeof(u32));
+ if (ret != 0) {
+ ath10k_warn("failed to read FW addr: %d (addr %d)\n",
+ ret, addr);
+ return;
+ }
+
+ ret = ath10k_pci_diag_read_mem(ar, reg_dump_area, to, sz);
+ if (ret != 0)
+ ath10k_err("failed to read FW memory: %d (addr %d sz %d)\n",
+ ret, reg_dump_area, sz);
+}
+
+#ifdef CONFIG_ATH10K_DEBUGFS
+/* Save the main firmware stack */
+static void ath10k_save_firmware_stack(struct ath10k *ar)
+{
+ BUILD_BUG_ON(ATH10K_FW_STACK_SIZE % 4);
+ ath10k_save_firmware_mem_helper(ar, ATH10K_FW_STACK_SIZE,
+ ar->debug.stack_buf, HI_ITEM(hi_stack));
+}
+#endif
+
+
static void ath10k_pci_hif_dump_area(struct ath10k *ar)
{
u32 reg_dump_area = 0;
@@ -882,6 +916,13 @@ static void ath10k_pci_hif_dump_area(struct ath10k *ar)
reg_dump_values[i + 3]);

#ifdef CONFIG_ATH10K_DEBUGFS
+ spin_lock_bh(&ar->data_lock);
+
+ if (!ar->debug.crashed_since_read)
+ ath10k_save_firmware_stack(ar);
+
+ spin_unlock_bh(&ar->data_lock);
+
/* Dump the debug logs on the target */
host_addr = host_interest_item_address(HI_ITEM(hi_dbglog_hdr));
ret = ath10k_pci_diag_read_mem(ar, host_addr,
--
1.7.11.7