2022-03-16 23:29:15

by Ashish Mhetre

[permalink] [raw]
Subject: [Patch v5 2/4] memory: tegra: Add MC error logging on tegra186 onward

Add new function 'get_int_channel' in tegra_mc_soc struture which is
implemented by tegra SOCs which support multiple MC channels. This
function returns the channel which should be used to get the information
of interrupts.
Remove static from tegra30_mc_handle_irq and use it as interrupt handler
for MC interrupts on tegra186, tegra194 and tegra234 to log the errors.
Add error specific MC status and address register bits and use them on
tegra186, tegra194 and tegra234.
Add error logging for generalized carveout interrupt on tegra186, tegra194
and tegra234.
Add error logging for route sanity interrupt on tegra194 an tegra234.
Add register for higher bits of error address which is available on
tegra194 and tegra234.
Add a boolean variable 'has_addr_hi_reg' in tegra_mc_soc struture which
will be true if soc has register for higher bits of memory controller
error address. Set it true for tegra194 and tegra234.

Signed-off-by: Ashish Mhetre <[email protected]>
Reported-by: kernel test robot <[email protected]>
Reported-by: Dan Carpenter <[email protected]>
---
drivers/memory/tegra/mc.c | 108 +++++++++++++++++++++++++++-----
drivers/memory/tegra/mc.h | 37 ++++++++++-
drivers/memory/tegra/tegra186.c | 44 +++++++++++++
drivers/memory/tegra/tegra194.c | 43 +++++++++++++
drivers/memory/tegra/tegra234.c | 58 +++++++++++++++++
include/soc/tegra/mc.h | 4 ++
6 files changed, 278 insertions(+), 16 deletions(-)

diff --git a/drivers/memory/tegra/mc.c b/drivers/memory/tegra/mc.c
index 3cda1d9ad32a..c1c8b5c2ab7a 100644
--- a/drivers/memory/tegra/mc.c
+++ b/drivers/memory/tegra/mc.c
@@ -508,14 +508,34 @@ int tegra30_mc_probe(struct tegra_mc *mc)
return 0;
}

-static irqreturn_t tegra30_mc_handle_irq(int irq, void *data)
+const struct tegra_mc_ops tegra30_mc_ops = {
+ .probe = tegra30_mc_probe,
+ .handle_irq = tegra30_mc_handle_irq,
+};
+#endif
+
+irqreturn_t tegra30_mc_handle_irq(int irq, void *data)
{
struct tegra_mc *mc = data;
unsigned long status;
+ bool mc_has_channels;
unsigned int bit;
+ int channel;
+
+ mc_has_channels = mc->soc->num_channels && mc->soc->get_int_channel;
+ if (mc_has_channels) {
+ int err;
+
+ err = mc->soc->get_int_channel(mc, &channel);
+ if (err < 0)
+ return IRQ_NONE;
+
+ /* mask all interrupts to avoid flooding */
+ status = mc_ch_readl(mc, channel, MC_INTSTATUS) & mc->soc->intmask;
+ } else {
+ status = mc_readl(mc, MC_INTSTATUS) & mc->soc->intmask;
+ }

- /* mask all interrupts to avoid flooding */
- status = mc_readl(mc, MC_INTSTATUS) & mc->soc->intmask;
if (!status)
return IRQ_NONE;

@@ -523,18 +543,70 @@ static irqreturn_t tegra30_mc_handle_irq(int irq, void *data)
const char *error = tegra_mc_status_names[bit] ?: "unknown";
const char *client = "unknown", *desc;
const char *direction, *secure;
+ u32 status_reg, addr_reg;
+ u32 intmask = BIT(bit);
phys_addr_t addr = 0;
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+ u32 addr_hi_reg = 0;
+#endif
unsigned int i;
char perm[7];
u8 id, type;
u32 value;

- value = mc_readl(mc, MC_ERR_STATUS);
+ switch (intmask) {
+ case MC_INT_DECERR_VPR:
+ status_reg = MC_ERR_VPR_STATUS;
+ addr_reg = MC_ERR_VPR_ADR;
+ break;
+
+ case MC_INT_SECERR_SEC:
+ status_reg = MC_ERR_SEC_STATUS;
+ addr_reg = MC_ERR_SEC_ADR;
+ break;
+
+ case MC_INT_DECERR_MTS:
+ status_reg = MC_ERR_MTS_STATUS;
+ addr_reg = MC_ERR_MTS_ADR;
+ break;
+
+ case MC_INT_DECERR_GENERALIZED_CARVEOUT:
+ status_reg = MC_ERR_GENERALIZED_CARVEOUT_STATUS;
+ addr_reg = MC_ERR_GENERALIZED_CARVEOUT_ADR;
+ break;
+
+ case MC_INT_DECERR_ROUTE_SANITY:
+ status_reg = MC_ERR_ROUTE_SANITY_STATUS;
+ addr_reg = MC_ERR_ROUTE_SANITY_ADR;
+ break;
+
+ default:
+ status_reg = MC_ERR_STATUS;
+ addr_reg = MC_ERR_ADR;
+
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+ if (mc->soc->has_addr_hi_reg)
+ addr_hi_reg = MC_ERR_ADR_HI;
+#endif
+ break;
+ }
+
+ if (mc_has_channels)
+ value = mc_ch_readl(mc, channel, status_reg);
+ else
+ value = mc_readl(mc, status_reg);

#ifdef CONFIG_PHYS_ADDR_T_64BIT
if (mc->soc->num_address_bits > 32) {
- addr = ((value >> MC_ERR_STATUS_ADR_HI_SHIFT) &
- MC_ERR_STATUS_ADR_HI_MASK);
+ if (addr_hi_reg) {
+ if (mc_has_channels)
+ addr = mc_ch_readl(mc, channel, addr_hi_reg);
+ else
+ addr = mc_readl(mc, addr_hi_reg);
+ } else {
+ addr = ((value >> MC_ERR_STATUS_ADR_HI_SHIFT) &
+ MC_ERR_STATUS_ADR_HI_MASK);
+ }
addr <<= 32;
}
#endif
@@ -591,7 +663,10 @@ static irqreturn_t tegra30_mc_handle_irq(int irq, void *data)
break;
}

- value = mc_readl(mc, MC_ERR_ADR);
+ if (mc_has_channels)
+ value = mc_ch_readl(mc, channel, addr_reg);
+ else
+ value = mc_readl(mc, addr_reg);
addr |= value;

dev_err_ratelimited(mc->dev, "%s: %s%s @%pa: %s (%s%s)\n",
@@ -600,17 +675,14 @@ static irqreturn_t tegra30_mc_handle_irq(int irq, void *data)
}

/* clear interrupts */
- mc_writel(mc, status, MC_INTSTATUS);
+ if (mc_has_channels)
+ mc_ch_writel(mc, channel, status, MC_INTSTATUS);
+ else
+ mc_writel(mc, status, MC_INTSTATUS);

return IRQ_HANDLED;
}

-const struct tegra_mc_ops tegra30_mc_ops = {
- .probe = tegra30_mc_probe,
- .handle_irq = tegra30_mc_handle_irq,
-};
-#endif
-
const char *const tegra_mc_status_names[32] = {
[ 1] = "External interrupt",
[ 6] = "EMEM address decode error",
@@ -622,6 +694,8 @@ const char *const tegra_mc_status_names[32] = {
[12] = "VPR violation",
[13] = "Secure carveout violation",
[16] = "MTS carveout violation",
+ [17] = "Generalized carveout violation",
+ [20] = "Route Sanity error",
};

const char *const tegra_mc_error_names[8] = {
@@ -770,7 +844,11 @@ static int tegra_mc_probe(struct platform_device *pdev)

WARN(!mc->soc->client_id_mask, "missing client ID mask for this SoC\n");

- mc_writel(mc, mc->soc->intmask, MC_INTMASK);
+ if (mc->soc->num_channels && mc->mcb_regs)
+ mc_ch_writel(mc, MC_BROADCAST_CHANNEL, mc->soc->intmask,
+ MC_INTMASK);
+ else
+ mc_writel(mc, mc->soc->intmask, MC_INTMASK);

err = devm_request_irq(&pdev->dev, mc->irq, mc->soc->ops->handle_irq, 0,
dev_name(&pdev->dev), mc);
diff --git a/drivers/memory/tegra/mc.h b/drivers/memory/tegra/mc.h
index 062886e94c04..3836c35ddd7a 100644
--- a/drivers/memory/tegra/mc.h
+++ b/drivers/memory/tegra/mc.h
@@ -43,7 +43,21 @@
#define MC_EMEM_ARB_OVERRIDE 0xe8
#define MC_TIMING_CONTROL_DBG 0xf8
#define MC_TIMING_CONTROL 0xfc
-
+#define MC_ERR_VPR_STATUS 0x654
+#define MC_ERR_VPR_ADR 0x658
+#define MC_ERR_SEC_STATUS 0x67c
+#define MC_ERR_SEC_ADR 0x680
+#define MC_ERR_MTS_STATUS 0x9b0
+#define MC_ERR_MTS_ADR 0x9b4
+#define MC_ERR_ROUTE_SANITY_STATUS 0x9c0
+#define MC_ERR_ROUTE_SANITY_ADR 0x9c4
+#define MC_ERR_GENERALIZED_CARVEOUT_STATUS 0xc00
+#define MC_ERR_GENERALIZED_CARVEOUT_ADR 0xc04
+#define MC_GLOBAL_INTSTATUS 0xf24
+#define MC_ERR_ADR_HI 0x11fc
+
+#define MC_INT_DECERR_ROUTE_SANITY BIT(20)
+#define MC_INT_DECERR_GENERALIZED_CARVEOUT BIT(17)
#define MC_INT_DECERR_MTS BIT(16)
#define MC_INT_SECERR_SEC BIT(13)
#define MC_INT_DECERR_VPR BIT(12)
@@ -78,6 +92,8 @@

#define MC_TIMING_UPDATE BIT(0)

+#define MC_BROADCAST_CHANNEL ~0
+
static inline u32 tegra_mc_scale_percents(u64 val, unsigned int percents)
{
val = val * percents;
@@ -92,6 +108,24 @@ icc_provider_to_tegra_mc(struct icc_provider *provider)
return container_of(provider, struct tegra_mc, provider);
}

+static inline u32 mc_ch_readl(const struct tegra_mc *mc, int ch,
+ unsigned long offset)
+{
+ if (ch == MC_BROADCAST_CHANNEL)
+ return readl_relaxed(mc->mcb_regs + offset);
+
+ return readl_relaxed(mc->mc_regs[ch] + offset);
+}
+
+static inline void mc_ch_writel(const struct tegra_mc *mc, int ch,
+ u32 value, unsigned long offset)
+{
+ if (ch == MC_BROADCAST_CHANNEL)
+ writel_relaxed(value, mc->mcb_regs + offset);
+ else
+ writel_relaxed(value, mc->mc_regs[ch] + offset);
+}
+
static inline u32 mc_readl(const struct tegra_mc *mc, unsigned long offset)
{
return readl_relaxed(mc->regs + offset);
@@ -156,6 +190,7 @@ extern const struct tegra_mc_ops tegra30_mc_ops;
extern const struct tegra_mc_ops tegra186_mc_ops;
#endif

+irqreturn_t tegra30_mc_handle_irq(int irq, void *data);
extern const char * const tegra_mc_status_names[32];
extern const char * const tegra_mc_error_names[8];

diff --git a/drivers/memory/tegra/tegra186.c b/drivers/memory/tegra/tegra186.c
index a8a45e6ff1f1..1d8a93807f91 100644
--- a/drivers/memory/tegra/tegra186.c
+++ b/drivers/memory/tegra/tegra186.c
@@ -16,6 +16,8 @@
#include <dt-bindings/memory/tegra186-mc.h>
#endif

+#include "mc.h"
+
#define MC_SID_STREAMID_OVERRIDE_MASK GENMASK(7, 0)
#define MC_SID_STREAMID_SECURITY_WRITE_ACCESS_DISABLED BIT(16)
#define MC_SID_STREAMID_SECURITY_OVERRIDE BIT(8)
@@ -195,6 +197,7 @@ const struct tegra_mc_ops tegra186_mc_ops = {
.resume = tegra186_mc_resume,
.probe_device = tegra186_mc_probe_device,
.map_regs = tegra186_mc_map_regs,
+ .handle_irq = tegra30_mc_handle_irq,
};

#if defined(CONFIG_ARCH_TEGRA_186_SOC)
@@ -922,11 +925,52 @@ static const struct tegra_mc_client tegra186_mc_clients[] = {
},
};

+static int tegra186_mc_get_channel(const struct tegra_mc *mc, int *mc_channel)
+{
+ u32 status;
+
+ status = mc_ch_readl(mc, MC_BROADCAST_CHANNEL, MC_GLOBAL_INTSTATUS);
+
+ switch (status & mc->soc->int_channel_mask) {
+ case BIT(0):
+ *mc_channel = 0;
+ break;
+
+ case BIT(1):
+ *mc_channel = 1;
+ break;
+
+ case BIT(2):
+ *mc_channel = 2;
+ break;
+
+ case BIT(3):
+ *mc_channel = 3;
+ break;
+
+ case BIT(24):
+ *mc_channel = MC_BROADCAST_CHANNEL;
+ break;
+
+ default:
+ pr_err("Unknown interrupt source\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
const struct tegra_mc_soc tegra186_mc_soc = {
.num_clients = ARRAY_SIZE(tegra186_mc_clients),
.clients = tegra186_mc_clients,
.num_address_bits = 40,
.num_channels = 4,
+ .client_id_mask = 0xff,
+ .intmask = MC_INT_DECERR_GENERALIZED_CARVEOUT | MC_INT_DECERR_MTS |
+ MC_INT_SECERR_SEC | MC_INT_DECERR_VPR |
+ MC_INT_SECURITY_VIOLATION | MC_INT_DECERR_EMEM,
.ops = &tegra186_mc_ops,
+ .int_channel_mask = 0x100000f,
+ .get_int_channel = tegra186_mc_get_channel,
};
#endif
diff --git a/drivers/memory/tegra/tegra194.c b/drivers/memory/tegra/tegra194.c
index 94001174deaf..499bb71549c6 100644
--- a/drivers/memory/tegra/tegra194.c
+++ b/drivers/memory/tegra/tegra194.c
@@ -1343,10 +1343,53 @@ static const struct tegra_mc_client tegra194_mc_clients[] = {
},
};

+static int tegra194_mc_get_channel(const struct tegra_mc *mc, int *mc_channel)
+{
+ u32 status;
+
+ status = mc_ch_readl(mc, MC_BROADCAST_CHANNEL, MC_GLOBAL_INTSTATUS);
+
+ switch (status & mc->soc->int_channel_mask) {
+ case BIT(8):
+ *mc_channel = 0;
+ break;
+
+ case BIT(9):
+ *mc_channel = 1;
+ break;
+
+ case BIT(10):
+ *mc_channel = 2;
+ break;
+
+ case BIT(11):
+ *mc_channel = 3;
+ break;
+
+ case BIT(25):
+ *mc_channel = MC_BROADCAST_CHANNEL;
+ break;
+
+ default:
+ pr_err("Unknown interrupt source\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
const struct tegra_mc_soc tegra194_mc_soc = {
.num_clients = ARRAY_SIZE(tegra194_mc_clients),
.clients = tegra194_mc_clients,
.num_address_bits = 40,
.num_channels = 16,
+ .client_id_mask = 0xff,
+ .intmask = MC_INT_DECERR_ROUTE_SANITY |
+ MC_INT_DECERR_GENERALIZED_CARVEOUT | MC_INT_DECERR_MTS |
+ MC_INT_SECERR_SEC | MC_INT_DECERR_VPR |
+ MC_INT_SECURITY_VIOLATION | MC_INT_DECERR_EMEM,
+ .has_addr_hi_reg = true,
.ops = &tegra186_mc_ops,
+ .int_channel_mask = 0x2000f00,
+ .get_int_channel = tegra194_mc_get_channel,
};
diff --git a/drivers/memory/tegra/tegra234.c b/drivers/memory/tegra/tegra234.c
index 6335a132be2d..0865dd1b48e9 100644
--- a/drivers/memory/tegra/tegra234.c
+++ b/drivers/memory/tegra/tegra234.c
@@ -93,10 +93,68 @@ static const struct tegra_mc_client tegra234_mc_clients[] = {
},
};

+static int tegra234_mc_get_channel(const struct tegra_mc *mc, int *mc_channel)
+{
+ u32 status;
+
+ status = mc_ch_readl(mc, MC_BROADCAST_CHANNEL, MC_GLOBAL_INTSTATUS);
+
+ switch (status & mc->soc->int_channel_mask) {
+ case BIT(8):
+ *mc_channel = 0;
+ break;
+
+ case BIT(9):
+ *mc_channel = 1;
+ break;
+
+ case BIT(10):
+ *mc_channel = 2;
+ break;
+
+ case BIT(11):
+ *mc_channel = 3;
+ break;
+
+ case BIT(12):
+ *mc_channel = 4;
+ break;
+
+ case BIT(13):
+ *mc_channel = 5;
+ break;
+
+ case BIT(14):
+ *mc_channel = 6;
+ break;
+
+ case BIT(15):
+ *mc_channel = 7;
+ break;
+
+ case BIT(25):
+ *mc_channel = MC_BROADCAST_CHANNEL;
+ break;
+
+ default:
+ pr_err("Unknown interrupt source\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
const struct tegra_mc_soc tegra234_mc_soc = {
.num_clients = ARRAY_SIZE(tegra234_mc_clients),
.clients = tegra234_mc_clients,
.num_address_bits = 40,
.num_channels = 16,
+ .intmask = MC_INT_DECERR_ROUTE_SANITY |
+ MC_INT_DECERR_GENERALIZED_CARVEOUT | MC_INT_DECERR_MTS |
+ MC_INT_SECERR_SEC | MC_INT_DECERR_VPR |
+ MC_INT_SECURITY_VIOLATION | MC_INT_DECERR_EMEM,
+ .has_addr_hi_reg = true,
.ops = &tegra186_mc_ops,
+ .int_channel_mask = 0x200ff00,
+ .get_int_channel = tegra234_mc_get_channel,
};
diff --git a/include/soc/tegra/mc.h b/include/soc/tegra/mc.h
index 92f810c55b43..6f115436e344 100644
--- a/include/soc/tegra/mc.h
+++ b/include/soc/tegra/mc.h
@@ -203,6 +203,8 @@ struct tegra_mc_soc {
const struct tegra_smmu_soc *smmu;

u32 intmask;
+ u32 int_channel_mask;
+ bool has_addr_hi_reg;

const struct tegra_mc_reset_ops *reset_ops;
const struct tegra_mc_reset *resets;
@@ -210,6 +212,8 @@ struct tegra_mc_soc {

const struct tegra_mc_icc_ops *icc_ops;
const struct tegra_mc_ops *ops;
+
+ int (*get_int_channel)(const struct tegra_mc *mc, int *mc_channel);
};

struct tegra_mc {
--
2.17.1


2022-03-20 16:28:43

by Dmitry Osipenko

[permalink] [raw]
Subject: Re: [Patch v5 2/4] memory: tegra: Add MC error logging on tegra186 onward

16.03.2022 12:25, Ashish Mhetre пишет:
> +irqreturn_t tegra30_mc_handle_irq(int irq, void *data)
> {
> struct tegra_mc *mc = data;
> unsigned long status;
> + bool mc_has_channels;
> unsigned int bit;
> + int channel;

unsigned int

> + mc_has_channels = mc->soc->num_channels && mc->soc->get_int_channel;
> + if (mc_has_channels) {
> + int err;
> +
> + err = mc->soc->get_int_channel(mc, &channel);
> + if (err < 0)
> + return IRQ_NONE;
> +
> + /* mask all interrupts to avoid flooding */
> + status = mc_ch_readl(mc, channel, MC_INTSTATUS) & mc->soc->intmask;
> + } else {
> + status = mc_readl(mc, MC_INTSTATUS) & mc->soc->intmask;
> + }

So if mc_has_channels=false, while it should be true, then you're going
to handle interrupt incorrectly?

2022-03-21 09:47:06

by Dmitry Osipenko

[permalink] [raw]
Subject: Re: [Patch v5 2/4] memory: tegra: Add MC error logging on tegra186 onward

16.03.2022 12:25, Ashish Mhetre пишет:
> +static int tegra186_mc_get_channel(const struct tegra_mc *mc, int *mc_channel)
> +{
> + u32 status;
> +
> + status = mc_ch_readl(mc, MC_BROADCAST_CHANNEL, MC_GLOBAL_INTSTATUS);
> +
> + switch (status & mc->soc->int_channel_mask) {
> + case BIT(0):
> + *mc_channel = 0;
> + break;
> +
> + case BIT(1):
> + *mc_channel = 1;
> + break;
> +
> + case BIT(2):
> + *mc_channel = 2;
> + break;
> +
> + case BIT(3):
> + *mc_channel = 3;
> + break;

This won't work if multiple bits are set at once.

2022-03-21 14:34:15

by Dmitry Osipenko

[permalink] [raw]
Subject: Re: [Patch v5 2/4] memory: tegra: Add MC error logging on tegra186 onward

16.03.2022 12:25, Ashish Mhetre пишет:
> +static int tegra186_mc_get_channel(const struct tegra_mc *mc, int *mc_channel)
> +{
> + u32 status;
> +
> + status = mc_ch_readl(mc, MC_BROADCAST_CHANNEL, MC_GLOBAL_INTSTATUS);

This mc_ch_readl(MC_GLOBAL_INTSTATUS) is replicated by every
tegraxxx_mc_get_channel(), it should be a part of common interrupt
handler, IMO.

And then I'd rename that callback to global_intstatus_to_channel().

> + switch (status & mc->soc->int_channel_mask) {
> + case BIT(0):
> + *mc_channel = 0;
> + break;
> +
> + case BIT(1):
> + *mc_channel = 1;
> + break;
> +
> + case BIT(2):
> + *mc_channel = 2;
> + break;
> +
> + case BIT(3):
> + *mc_channel = 3;
> + break;
> +
> + case BIT(24):
> + *mc_channel = MC_BROADCAST_CHANNEL;
> + break;
> +
> + default:
> + pr_err("Unknown interrupt source\n");

dev_err_ratelimited("unknown interrupt channel 0x%08x\n", status) and
should be moved to the common interrupt handler.

2022-03-21 22:50:16

by Dmitry Osipenko

[permalink] [raw]
Subject: Re: [Patch v5 2/4] memory: tegra: Add MC error logging on tegra186 onward

16.03.2022 12:25, Ashish Mhetre пишет:
> diff --git a/include/soc/tegra/mc.h b/include/soc/tegra/mc.h
> index 92f810c55b43..6f115436e344 100644
> --- a/include/soc/tegra/mc.h
> +++ b/include/soc/tegra/mc.h
> @@ -203,6 +203,8 @@ struct tegra_mc_soc {
> const struct tegra_smmu_soc *smmu;
>
> u32 intmask;
> + u32 int_channel_mask;

ch_intmask

> + bool has_addr_hi_reg;
>
> const struct tegra_mc_reset_ops *reset_ops;
> const struct tegra_mc_reset *resets;
> @@ -210,6 +212,8 @@ struct tegra_mc_soc {
>
> const struct tegra_mc_icc_ops *icc_ops;
> const struct tegra_mc_ops *ops;
> +
> + int (*get_int_channel)(const struct tegra_mc *mc, int *mc_channel);

This should be a part of tegra_mc_ops.

2022-03-21 23:09:39

by Dmitry Osipenko

[permalink] [raw]
Subject: Re: [Patch v5 2/4] memory: tegra: Add MC error logging on tegra186 onward

19.03.2022 18:50, Dmitry Osipenko пишет:
>> + mc_has_channels = mc->soc->num_channels && mc->soc->get_int_channel;
>> + if (mc_has_channels) {
>> + int err;
>> +
>> + err = mc->soc->get_int_channel(mc, &channel);
>> + if (err < 0)
>> + return IRQ_NONE;
>> +
>> + /* mask all interrupts to avoid flooding */
>> + status = mc_ch_readl(mc, channel, MC_INTSTATUS) & mc->soc->intmask;
>> + } else {
>> + status = mc_readl(mc, MC_INTSTATUS) & mc->soc->intmask;
>> + }
> So if mc_has_channels=false, while it should be true, then you're going
> to handle interrupt incorrectly?

I see now that num_channels and get_int_channel are const, so I don't
see why mc_has_channels variable is needed. Use mc->soc->num_channels.

if (mc->soc->num_channels) {

2022-03-22 21:17:10

by Ashish Mhetre

[permalink] [raw]
Subject: Re: [Patch v5 2/4] memory: tegra: Add MC error logging on tegra186 onward



On 3/19/2022 9:20 PM, Dmitry Osipenko wrote:
> External email: Use caution opening links or attachments
>
>
> 16.03.2022 12:25, Ashish Mhetre пишет:
>> +irqreturn_t tegra30_mc_handle_irq(int irq, void *data)
>> {
>> struct tegra_mc *mc = data;
>> unsigned long status;
>> + bool mc_has_channels;
>> unsigned int bit;
>> + int channel;
>
> unsigned int
>
Okay, I will update in next version.

>> + mc_has_channels = mc->soc->num_channels && mc->soc->get_int_channel;
>> + if (mc_has_channels) {
>> + int err;
>> +
>> + err = mc->soc->get_int_channel(mc, &channel);
>> + if (err < 0)
>> + return IRQ_NONE;
>> +
>> + /* mask all interrupts to avoid flooding */
>> + status = mc_ch_readl(mc, channel, MC_INTSTATUS) & mc->soc->intmask;
>> + } else {
>> + status = mc_readl(mc, MC_INTSTATUS) & mc->soc->intmask;
>> + }
>
> So if mc_has_channels=false, while it should be true, then you're going
> to handle interrupt incorrectly?

I am not able to understand the case where this can happen?
num_channels and get_int_channels are both getting defined on T186
onwards where mc_has_channels is expected to be true.
Do you mean to say that we need to add more chip specific checks
in case of mc_has_channels is false?

2022-03-22 21:45:33

by Ashish Mhetre

[permalink] [raw]
Subject: Re: [Patch v5 2/4] memory: tegra: Add MC error logging on tegra186 onward



On 3/19/2022 9:29 PM, Dmitry Osipenko wrote:
> External email: Use caution opening links or attachments
>
>
> 16.03.2022 12:25, Ashish Mhetre пишет:
>> diff --git a/include/soc/tegra/mc.h b/include/soc/tegra/mc.h
>> index 92f810c55b43..6f115436e344 100644
>> --- a/include/soc/tegra/mc.h
>> +++ b/include/soc/tegra/mc.h
>> @@ -203,6 +203,8 @@ struct tegra_mc_soc {
>> const struct tegra_smmu_soc *smmu;
>>
>> u32 intmask;
>> + u32 int_channel_mask;
>
> ch_intmask
>
Okay, I will update,

>> + bool has_addr_hi_reg;
>>
>> const struct tegra_mc_reset_ops *reset_ops;
>> const struct tegra_mc_reset *resets;
>> @@ -210,6 +212,8 @@ struct tegra_mc_soc {
>>
>> const struct tegra_mc_icc_ops *icc_ops;
>> const struct tegra_mc_ops *ops;
>> +
>> + int (*get_int_channel)(const struct tegra_mc *mc, int *mc_channel);
>
> This should be a part of tegra_mc_ops.

tegra_mc_ops is common for T186, T194 and T234 i.e. all of them use
tegra186_mc_ops. get_int_channel function has to be differently
implemented for all of these SOCs. So I had put it in tegra_mc_soc.

2022-03-23 13:47:30

by Ashish Mhetre

[permalink] [raw]
Subject: Re: [Patch v5 2/4] memory: tegra: Add MC error logging on tegra186 onward



On 3/19/2022 9:44 PM, Dmitry Osipenko wrote:
> External email: Use caution opening links or attachments
>
>
> 16.03.2022 12:25, Ashish Mhetre пишет:
>> +static int tegra186_mc_get_channel(const struct tegra_mc *mc, int *mc_channel)
>> +{
>> + u32 status;
>> +
>> + status = mc_ch_readl(mc, MC_BROADCAST_CHANNEL, MC_GLOBAL_INTSTATUS);
>
> This mc_ch_readl(MC_GLOBAL_INTSTATUS) is replicated by every
> tegraxxx_mc_get_channel(), it should be a part of common interrupt
> handler, IMO.
>
> And then I'd rename that callback to global_intstatus_to_channel().
>
Okay, I'll do that in next version.

>> + switch (status & mc->soc->int_channel_mask) {
>> + case BIT(0):
>> + *mc_channel = 0;
>> + break;
>> +
>> + case BIT(1):
>> + *mc_channel = 1;
>> + break;
>> +
>> + case BIT(2):
>> + *mc_channel = 2;
>> + break;
>> +
>> + case BIT(3):
>> + *mc_channel = 3;
>> + break;
>> +
>> + case BIT(24):
>> + *mc_channel = MC_BROADCAST_CHANNEL;
>> + break;
>> +
>> + default:
>> + pr_err("Unknown interrupt source\n");
>
> dev_err_ratelimited("unknown interrupt channel 0x%08x\n", status) and
> should be moved to the common interrupt handler.
>
So return just error from default case and handle error in common
interrupt handler with this print, right? I'll update this in next
version.

2022-03-23 16:47:19

by Ashish Mhetre

[permalink] [raw]
Subject: Re: [Patch v5 2/4] memory: tegra: Add MC error logging on tegra186 onward



On 3/19/2022 9:49 PM, Dmitry Osipenko wrote:
> External email: Use caution opening links or attachments
>
>
> 19.03.2022 18:50, Dmitry Osipenko пишет:
>>> + mc_has_channels = mc->soc->num_channels && mc->soc->get_int_channel;
>>> + if (mc_has_channels) {
>>> + int err;
>>> +
>>> + err = mc->soc->get_int_channel(mc, &channel);
>>> + if (err < 0)
>>> + return IRQ_NONE;
>>> +
>>> + /* mask all interrupts to avoid flooding */
>>> + status = mc_ch_readl(mc, channel, MC_INTSTATUS) & mc->soc->intmask;
>>> + } else {
>>> + status = mc_readl(mc, MC_INTSTATUS) & mc->soc->intmask;
>>> + }
>> So if mc_has_channels=false, while it should be true, then you're going
>> to handle interrupt incorrectly?
>
> I see now that num_channels and get_int_channel are const, so I don't
> see why mc_has_channels variable is needed. Use mc->soc->num_channels.
>
> if (mc->soc->num_channels) {

Okay, I will remove mc_has_channels and replace it with
mc->soc->num_channels.

2022-03-24 02:09:57

by Ashish Mhetre

[permalink] [raw]
Subject: Re: [Patch v5 2/4] memory: tegra: Add MC error logging on tegra186 onward



On 3/20/2022 6:23 PM, Dmitry Osipenko wrote:
> External email: Use caution opening links or attachments
>
>
> 16.03.2022 12:25, Ashish Mhetre пишет:
>> +static int tegra186_mc_get_channel(const struct tegra_mc *mc, int *mc_channel)
>> +{
>> + u32 status;
>> +
>> + status = mc_ch_readl(mc, MC_BROADCAST_CHANNEL, MC_GLOBAL_INTSTATUS);
>> +
>> + switch (status & mc->soc->int_channel_mask) {
>> + case BIT(0):
>> + *mc_channel = 0;
>> + break;
>> +
>> + case BIT(1):
>> + *mc_channel = 1;
>> + break;
>> +
>> + case BIT(2):
>> + *mc_channel = 2;
>> + break;
>> +
>> + case BIT(3):
>> + *mc_channel = 3;
>> + break;
>
> This won't work if multiple bits are set at once.

I talked with our HW team and they said that it's technically possible
that interrupts can come at multiple channels at same time. SW can take
care of this by logging interrupts at first channel and then clearing
bit of that. Then take care of interrupts from next channel and so on.
I'll update the patches accordingly in next version.

2022-03-30 05:21:14

by Dmitry Osipenko

[permalink] [raw]
Subject: Re: [Patch v5 2/4] memory: tegra: Add MC error logging on tegra186 onward

On 3/16/22 12:25, Ashish Mhetre wrote:
> Add new function 'get_int_channel' in tegra_mc_soc struture which is
> implemented by tegra SOCs which support multiple MC channels. This
> function returns the channel which should be used to get the information
> of interrupts.
> Remove static from tegra30_mc_handle_irq and use it as interrupt handler
> for MC interrupts on tegra186, tegra194 and tegra234 to log the errors.
> Add error specific MC status and address register bits and use them on
> tegra186, tegra194 and tegra234.
> Add error logging for generalized carveout interrupt on tegra186, tegra194
> and tegra234.
> Add error logging for route sanity interrupt on tegra194 an tegra234.
> Add register for higher bits of error address which is available on
> tegra194 and tegra234.
> Add a boolean variable 'has_addr_hi_reg' in tegra_mc_soc struture which
> will be true if soc has register for higher bits of memory controller
> error address. Set it true for tegra194 and tegra234.
>
> Signed-off-by: Ashish Mhetre <[email protected]>

> Reported-by: kernel test robot <[email protected]>
> Reported-by: Dan Carpenter <[email protected]>

Reported what? You should add this tag only if patch addresses reported
problem. This patch doesn't address anything, hence the tag is
inappropriate, you should remove it.

2022-03-30 11:56:11

by Dmitry Osipenko

[permalink] [raw]
Subject: Re: [Patch v5 2/4] memory: tegra: Add MC error logging on tegra186 onward

On 3/30/22 12:03, Ashish Mhetre wrote:
>
>
> On 3/30/2022 5:36 AM, Dmitry Osipenko wrote:
>> External email: Use caution opening links or attachments
>>
>>
>> On 3/16/22 12:25, Ashish Mhetre wrote:
>>> Add new function 'get_int_channel' in tegra_mc_soc struture which is
>>> implemented by tegra SOCs which support multiple MC channels. This
>>> function returns the channel which should be used to get the information
>>> of interrupts.
>>> Remove static from tegra30_mc_handle_irq and use it as interrupt handler
>>> for MC interrupts on tegra186, tegra194 and tegra234 to log the errors.
>>> Add error specific MC status and address register bits and use them on
>>> tegra186, tegra194 and tegra234.
>>> Add error logging for generalized carveout interrupt on tegra186,
>>> tegra194
>>> and tegra234.
>>> Add error logging for route sanity interrupt on tegra194 an tegra234.
>>> Add register for higher bits of error address which is available on
>>> tegra194 and tegra234.
>>> Add a boolean variable 'has_addr_hi_reg' in tegra_mc_soc struture which
>>> will be true if soc has register for higher bits of memory controller
>>> error address. Set it true for tegra194 and tegra234.
>>>
>>> Signed-off-by: Ashish Mhetre <[email protected]>
>>
>>> Reported-by: kernel test robot <[email protected]>
>>> Reported-by: Dan Carpenter <[email protected]>
>>
>> Reported what? You should add this tag only if patch addresses reported
>> problem. This patch doesn't address anything, hence the tag is
>> inappropriate, you should remove it.
>
> Okay, smatch warning was reported on v4 of this patch which is fixed in
> v5. Then I understand that we don't need to add Reported-by if we fix
> bug in subsequent versions, right?

Right, if the report was made to the in-progress patch, then you
shouldn't add the tag.

If report was made to the patch that was already merged, then you should
create a new patch that fixes the reported problem and add the
reported-by to this patch.

2022-03-30 18:25:05

by Ashish Mhetre

[permalink] [raw]
Subject: Re: [Patch v5 2/4] memory: tegra: Add MC error logging on tegra186 onward



On 3/30/2022 3:49 PM, Dmitry Osipenko wrote:
> External email: Use caution opening links or attachments
>
>
> On 3/30/22 12:03, Ashish Mhetre wrote:
>>
>>
>> On 3/30/2022 5:36 AM, Dmitry Osipenko wrote:
>>> External email: Use caution opening links or attachments
>>>
>>>
>>> On 3/16/22 12:25, Ashish Mhetre wrote:
>>>> Add new function 'get_int_channel' in tegra_mc_soc struture which is
>>>> implemented by tegra SOCs which support multiple MC channels. This
>>>> function returns the channel which should be used to get the information
>>>> of interrupts.
>>>> Remove static from tegra30_mc_handle_irq and use it as interrupt handler
>>>> for MC interrupts on tegra186, tegra194 and tegra234 to log the errors.
>>>> Add error specific MC status and address register bits and use them on
>>>> tegra186, tegra194 and tegra234.
>>>> Add error logging for generalized carveout interrupt on tegra186,
>>>> tegra194
>>>> and tegra234.
>>>> Add error logging for route sanity interrupt on tegra194 an tegra234.
>>>> Add register for higher bits of error address which is available on
>>>> tegra194 and tegra234.
>>>> Add a boolean variable 'has_addr_hi_reg' in tegra_mc_soc struture which
>>>> will be true if soc has register for higher bits of memory controller
>>>> error address. Set it true for tegra194 and tegra234.
>>>>
>>>> Signed-off-by: Ashish Mhetre <[email protected]>
>>>
>>>> Reported-by: kernel test robot <[email protected]>
>>>> Reported-by: Dan Carpenter <[email protected]>
>>>
>>> Reported what? You should add this tag only if patch addresses reported
>>> problem. This patch doesn't address anything, hence the tag is
>>> inappropriate, you should remove it.
>>
>> Okay, smatch warning was reported on v4 of this patch which is fixed in
>> v5. Then I understand that we don't need to add Reported-by if we fix
>> bug in subsequent versions, right?
>
> Right, if the report was made to the in-progress patch, then you
> shouldn't add the tag.
>
> If report was made to the patch that was already merged, then you should
> create a new patch that fixes the reported problem and add the
> reported-by to this patch.

Got it, thanks for the explanation. I'll remove tag from next version.

2022-03-30 21:52:51

by Ashish Mhetre

[permalink] [raw]
Subject: Re: [Patch v5 2/4] memory: tegra: Add MC error logging on tegra186 onward



On 3/30/2022 5:21 AM, Dmitry Osipenko wrote:
> External email: Use caution opening links or attachments
>
>
> On 3/22/22 20:23, Ashish Mhetre wrote:
>>
>>
>> On 3/19/2022 9:29 PM, Dmitry Osipenko wrote:
>>> External email: Use caution opening links or attachments
>>>
>>>
>>> 16.03.2022 12:25, Ashish Mhetre пишет:
>>>> diff --git a/include/soc/tegra/mc.h b/include/soc/tegra/mc.h
>>>> index 92f810c55b43..6f115436e344 100644
>>>> --- a/include/soc/tegra/mc.h
>>>> +++ b/include/soc/tegra/mc.h
>>>> @@ -203,6 +203,8 @@ struct tegra_mc_soc {
>>>> const struct tegra_smmu_soc *smmu;
>>>>
>>>> u32 intmask;
>>>> + u32 int_channel_mask;
>>>
>>> ch_intmask
>>>
>> Okay, I will update,
>>
>>>> + bool has_addr_hi_reg;
>>>>
>>>> const struct tegra_mc_reset_ops *reset_ops;
>>>> const struct tegra_mc_reset *resets;
>>>> @@ -210,6 +212,8 @@ struct tegra_mc_soc {
>>>>
>>>> const struct tegra_mc_icc_ops *icc_ops;
>>>> const struct tegra_mc_ops *ops;
>>>> +
>>>> + int (*get_int_channel)(const struct tegra_mc *mc, int
>>>> *mc_channel);
>>>
>>> This should be a part of tegra_mc_ops.
>>
>> tegra_mc_ops is common for T186, T194 and T234 i.e. all of them use
>> tegra186_mc_ops. get_int_channel function has to be differently
>> implemented for all of these SOCs. So I had put it in tegra_mc_soc.
>
> Then tegra_mc_ops shouldn't be common anymore?

Yes, that can be done. But the tegra186_mc_ops functions are common for
Tegra186, Tegra194 and Tegra234.
We can separate tegra_mc_ops and keep the callbacks to same tegra186
functions by removing static from them.

2022-03-30 23:59:48

by Ashish Mhetre

[permalink] [raw]
Subject: Re: [Patch v5 2/4] memory: tegra: Add MC error logging on tegra186 onward



On 3/30/2022 5:31 AM, Dmitry Osipenko wrote:
> External email: Use caution opening links or attachments
>
>
> On 3/22/22 20:34, Ashish Mhetre wrote:
>>>> + switch (status & mc->soc->int_channel_mask) {
>>>> + case BIT(0):
>>>> + *mc_channel = 0;
>>>> + break;
>>>> +
>>>> + case BIT(1):
>>>> + *mc_channel = 1;
>>>> + break;
>>>> +
>>>> + case BIT(2):
>>>> + *mc_channel = 2;
>>>> + break;
>>>> +
>>>> + case BIT(3):
>>>> + *mc_channel = 3;
>>>> + break;
>>>> +
>>>> + case BIT(24):
>>>> + *mc_channel = MC_BROADCAST_CHANNEL;
>>>> + break;
>>>> +
>>>> + default:
>>>> + pr_err("Unknown interrupt source\n");
>>>
>>> dev_err_ratelimited("unknown interrupt channel 0x%08x\n", status) and
>>> should be moved to the common interrupt handler.
>>>
>> So return just error from default case and handle error in common
>> interrupt handler with this print, right? I'll update this in next
>> version.
>
> Yes, just move out the common print.
>
> Although, you could parameterize the shift per SoC and then have a
> common helper that does "status >> intmask_chan_shift", couldn't you?

Do you mean shift to get the channel, like
"channel = status >> intmask_chan_shift"?
So to get rid of this callback completely and adding a variable in
tegra_mc_soc for intmask_chan_shift, right? Or compute shift in this
callback and use it in common handler?
If we are to remove this callback then how to handle unknown interrupt
channel error?
Also we want to handle interrupts on one channel at a time and then
clear it from status register. There can be interrupts on multiple
channel. So multiple bits from status will be set. Hence it will be
hard to parameterize shift such that it gives appropriate channel.
So I think current approach is fine. Please correct me if I am wrong
somewhere.

2022-03-31 03:11:35

by Dmitry Osipenko

[permalink] [raw]
Subject: Re: [Patch v5 2/4] memory: tegra: Add MC error logging on tegra186 onward

On 3/30/22 13:16, Ashish Mhetre wrote:
>
>
> On 3/30/2022 5:31 AM, Dmitry Osipenko wrote:
>> External email: Use caution opening links or attachments
>>
>>
>> On 3/22/22 20:34, Ashish Mhetre wrote:
>>>>> +     switch (status & mc->soc->int_channel_mask) {
>>>>> +     case BIT(0):
>>>>> +             *mc_channel = 0;
>>>>> +             break;
>>>>> +
>>>>> +     case BIT(1):
>>>>> +             *mc_channel = 1;
>>>>> +             break;
>>>>> +
>>>>> +     case BIT(2):
>>>>> +             *mc_channel = 2;
>>>>> +             break;
>>>>> +
>>>>> +     case BIT(3):
>>>>> +             *mc_channel = 3;
>>>>> +             break;
>>>>> +
>>>>> +     case BIT(24):
>>>>> +             *mc_channel = MC_BROADCAST_CHANNEL;
>>>>> +             break;
>>>>> +
>>>>> +     default:
>>>>> +             pr_err("Unknown interrupt source\n");
>>>>
>>>> dev_err_ratelimited("unknown interrupt channel 0x%08x\n", status) and
>>>> should be moved to the common interrupt handler.
>>>>
>>> So return just error from default case and handle error in common
>>> interrupt handler with this print, right? I'll update this in next
>>> version.
>>
>> Yes, just move out the common print.
>>
>> Although, you could parameterize the shift per SoC and then have a
>> common helper that does "status >> intmask_chan_shift", couldn't you?
>
> Do you mean shift to get the channel, like
> "channel = status >> intmask_chan_shift"?
> So to get rid of this callback completely and adding a variable in
> tegra_mc_soc for intmask_chan_shift, right? Or compute shift in this
> callback and use it in common handler?

Add variable to tegra_mc_soc.

The intmask_chan_shift is a misnomer, perhaps something like
status_reg_chan_shift will be a better name.

> If we are to remove this callback then how to handle unknown interrupt
> channel error?

Create a common helper function that returns ID of the raised channel or
errorno if not bits are set.

> Also we want to handle interrupts on one channel at a time and then
> clear it from status register. There can be interrupts on multiple
> channel. So multiple bits from status will be set. Hence it will be
> hard to parameterize shift such that it gives appropriate channel.
> So I think current approach is fine. Please correct me if I am wrong
> somewhere.

You may do the following:

1. find the first channel bit set in the status reg
2. handle that channel
3. clear only the handled status bit, don't clear the other bits
4. return from interrupt

If there are other bits set, then interrupt handler will fire again and
next channel will be handled.

2022-03-31 03:54:48

by Dmitry Osipenko

[permalink] [raw]
Subject: Re: [Patch v5 2/4] memory: tegra: Add MC error logging on tegra186 onward

On 3/22/22 20:23, Ashish Mhetre wrote:
>
>
> On 3/19/2022 9:29 PM, Dmitry Osipenko wrote:
>> External email: Use caution opening links or attachments
>>
>>
>> 16.03.2022 12:25, Ashish Mhetre пишет:
>>> diff --git a/include/soc/tegra/mc.h b/include/soc/tegra/mc.h
>>> index 92f810c55b43..6f115436e344 100644
>>> --- a/include/soc/tegra/mc.h
>>> +++ b/include/soc/tegra/mc.h
>>> @@ -203,6 +203,8 @@ struct tegra_mc_soc {
>>>        const struct tegra_smmu_soc *smmu;
>>>
>>>        u32 intmask;
>>> +     u32 int_channel_mask;
>>
>> ch_intmask
>>
> Okay, I will update,
>
>>> +     bool has_addr_hi_reg;
>>>
>>>        const struct tegra_mc_reset_ops *reset_ops;
>>>        const struct tegra_mc_reset *resets;
>>> @@ -210,6 +212,8 @@ struct tegra_mc_soc {
>>>
>>>        const struct tegra_mc_icc_ops *icc_ops;
>>>        const struct tegra_mc_ops *ops;
>>> +
>>> +     int (*get_int_channel)(const struct tegra_mc *mc, int
>>> *mc_channel);
>>
>> This should be a part of tegra_mc_ops.
>
> tegra_mc_ops is common for T186, T194 and T234 i.e. all of them use
> tegra186_mc_ops. get_int_channel function has to be differently
> implemented for all of these SOCs. So I had put it in tegra_mc_soc.

Then tegra_mc_ops shouldn't be common anymore?

2022-03-31 03:56:06

by Ashish Mhetre

[permalink] [raw]
Subject: Re: [Patch v5 2/4] memory: tegra: Add MC error logging on tegra186 onward



On 3/30/2022 4:06 PM, Dmitry Osipenko wrote:
> External email: Use caution opening links or attachments
>
>
> On 3/30/22 13:16, Ashish Mhetre wrote:
>>
>>
>> On 3/30/2022 5:31 AM, Dmitry Osipenko wrote:
>>> External email: Use caution opening links or attachments
>>>
>>>
>>> On 3/22/22 20:34, Ashish Mhetre wrote:
>>>>>> + switch (status & mc->soc->int_channel_mask) {
>>>>>> + case BIT(0):
>>>>>> + *mc_channel = 0;
>>>>>> + break;
>>>>>> +
>>>>>> + case BIT(1):
>>>>>> + *mc_channel = 1;
>>>>>> + break;
>>>>>> +
>>>>>> + case BIT(2):
>>>>>> + *mc_channel = 2;
>>>>>> + break;
>>>>>> +
>>>>>> + case BIT(3):
>>>>>> + *mc_channel = 3;
>>>>>> + break;
>>>>>> +
>>>>>> + case BIT(24):
>>>>>> + *mc_channel = MC_BROADCAST_CHANNEL;
>>>>>> + break;
>>>>>> +
>>>>>> + default:
>>>>>> + pr_err("Unknown interrupt source\n");
>>>>>
>>>>> dev_err_ratelimited("unknown interrupt channel 0x%08x\n", status) and
>>>>> should be moved to the common interrupt handler.
>>>>>
>>>> So return just error from default case and handle error in common
>>>> interrupt handler with this print, right? I'll update this in next
>>>> version.
>>>
>>> Yes, just move out the common print.
>>>
>>> Although, you could parameterize the shift per SoC and then have a
>>> common helper that does "status >> intmask_chan_shift", couldn't you?
>>
>> Do you mean shift to get the channel, like
>> "channel = status >> intmask_chan_shift"?
>> So to get rid of this callback completely and adding a variable in
>> tegra_mc_soc for intmask_chan_shift, right? Or compute shift in this
>> callback and use it in common handler?
>
> Add variable to tegra_mc_soc.
>
> The intmask_chan_shift is a misnomer, perhaps something like
> status_reg_chan_shift will be a better name.
>
Okay, I will do this.

>> If we are to remove this callback then how to handle unknown interrupt
>> channel error?
>
> Create a common helper function that returns ID of the raised channel or
> errorno if not bits are set.
>
So something like this:

int status_to_channel(const struct tegra_mc *mc, u32 status,
unsigned int *mc_channel)
{
if ((status & mc->soc->ch_intmask) == 0)
return -EINVAL;

*mc_channel = __ffs((status & mc->soc->ch_intmask) >>
mc->soc->status_reg_chan_shift);

return 0;
}

Correct?

>> Also we want to handle interrupts on one channel at a time and then
>> clear it from status register. There can be interrupts on multiple
>> channel. So multiple bits from status will be set. Hence it will be
>> hard to parameterize shift such that it gives appropriate channel.
>> So I think current approach is fine. Please correct me if I am wrong
>> somewhere.
>
> You may do the following:
>
> 1. find the first channel bit set in the status reg
> 2. handle that channel
> 3. clear only the handled status bit, don't clear the other bits
> 4. return from interrupt
>
> If there are other bits set, then interrupt handler will fire again and
> next channel will be handled.

For clearing status bit after handling, we can retrieve channel bit by
something like this:

ch_bit = BIT(*mc_channel) << mc->soc->status_reg_chan_shift;

Correct?

2022-03-31 04:09:12

by Dmitry Osipenko

[permalink] [raw]
Subject: Re: [Patch v5 2/4] memory: tegra: Add MC error logging on tegra186 onward

On 3/22/22 20:34, Ashish Mhetre wrote:
>>> +     switch (status & mc->soc->int_channel_mask) {
>>> +     case BIT(0):
>>> +             *mc_channel = 0;
>>> +             break;
>>> +
>>> +     case BIT(1):
>>> +             *mc_channel = 1;
>>> +             break;
>>> +
>>> +     case BIT(2):
>>> +             *mc_channel = 2;
>>> +             break;
>>> +
>>> +     case BIT(3):
>>> +             *mc_channel = 3;
>>> +             break;
>>> +
>>> +     case BIT(24):
>>> +             *mc_channel = MC_BROADCAST_CHANNEL;
>>> +             break;
>>> +
>>> +     default:
>>> +             pr_err("Unknown interrupt source\n");
>>
>> dev_err_ratelimited("unknown interrupt channel 0x%08x\n", status) and
>> should be moved to the common interrupt handler.
>>
> So return just error from default case and handle error in common
> interrupt handler with this print, right? I'll update this in next
> version.

Yes, just move out the common print.

Although, you could parameterize the shift per SoC and then have a
common helper that does "status >> intmask_chan_shift", couldn't you?

2022-03-31 04:49:07

by Ashish Mhetre

[permalink] [raw]
Subject: Re: [Patch v5 2/4] memory: tegra: Add MC error logging on tegra186 onward



On 3/30/2022 5:36 AM, Dmitry Osipenko wrote:
> External email: Use caution opening links or attachments
>
>
> On 3/16/22 12:25, Ashish Mhetre wrote:
>> Add new function 'get_int_channel' in tegra_mc_soc struture which is
>> implemented by tegra SOCs which support multiple MC channels. This
>> function returns the channel which should be used to get the information
>> of interrupts.
>> Remove static from tegra30_mc_handle_irq and use it as interrupt handler
>> for MC interrupts on tegra186, tegra194 and tegra234 to log the errors.
>> Add error specific MC status and address register bits and use them on
>> tegra186, tegra194 and tegra234.
>> Add error logging for generalized carveout interrupt on tegra186, tegra194
>> and tegra234.
>> Add error logging for route sanity interrupt on tegra194 an tegra234.
>> Add register for higher bits of error address which is available on
>> tegra194 and tegra234.
>> Add a boolean variable 'has_addr_hi_reg' in tegra_mc_soc struture which
>> will be true if soc has register for higher bits of memory controller
>> error address. Set it true for tegra194 and tegra234.
>>
>> Signed-off-by: Ashish Mhetre <[email protected]>
>
>> Reported-by: kernel test robot <[email protected]>
>> Reported-by: Dan Carpenter <[email protected]>
>
> Reported what? You should add this tag only if patch addresses reported
> problem. This patch doesn't address anything, hence the tag is
> inappropriate, you should remove it.

Okay, smatch warning was reported on v4 of this patch which is fixed in
v5. Then I understand that we don't need to add Reported-by if we fix
bug in subsequent versions, right?

2022-04-01 12:04:25

by Dmitry Osipenko

[permalink] [raw]
Subject: Re: [Patch v5 2/4] memory: tegra: Add MC error logging on tegra186 onward

On 3/30/22 14:22, Ashish Mhetre wrote:
...
>>> If we are to remove this callback then how to handle unknown interrupt
>>> channel error?
>>
>> Create a common helper function that returns ID of the raised channel or
>> errorno if not bits are set.
>>
> So something like this:
>
> int status_to_channel(const struct tegra_mc *mc, u32 status,
>               unsigned int *mc_channel)
> {
>     if ((status & mc->soc->ch_intmask) == 0)
>         return -EINVAL;
>
>     *mc_channel = __ffs((status & mc->soc->ch_intmask) >>
>                  mc->soc->status_reg_chan_shift);
>
>     return 0;
> }
>
> Correct?

Yes

>>> Also we want to handle interrupts on one channel at a time and then
>>> clear it from status register. There can be interrupts on multiple
>>> channel. So multiple bits from status will be set. Hence it will be
>>> hard to parameterize shift such that it gives appropriate channel.
>>> So I think current approach is fine. Please correct me if I am wrong
>>> somewhere.
>>
>> You may do the following:
>>
>> 1. find the first channel bit set in the status reg
>> 2. handle that channel
>> 3. clear only the handled status bit, don't clear the other bits
>> 4. return from interrupt
>>
>> If there are other bits set, then interrupt handler will fire again and
>> next channel will be handled.
>
> For clearing status bit after handling, we can retrieve channel bit by
> something like this:
>
> ch_bit = BIT(*mc_channel) << mc->soc->status_reg_chan_shift;
>
> Correct?

Yes

Perhaps using FIELD_PREP() and alike helpers could make it look nice in
the code.

2022-04-04 22:24:52

by Ashish Mhetre

[permalink] [raw]
Subject: Re: [Patch v5 2/4] memory: tegra: Add MC error logging on tegra186 onward



On 4/1/2022 1:19 AM, Dmitry Osipenko wrote:
> External email: Use caution opening links or attachments
>
>
> On 3/30/22 14:22, Ashish Mhetre wrote:
> ...
>>>> If we are to remove this callback then how to handle unknown interrupt
>>>> channel error?
>>>
>>> Create a common helper function that returns ID of the raised channel or
>>> errorno if not bits are set.
>>>
>> So something like this:
>>
>> int status_to_channel(const struct tegra_mc *mc, u32 status,
>> unsigned int *mc_channel)
>> {
>> if ((status & mc->soc->ch_intmask) == 0)
>> return -EINVAL;
>>
>> *mc_channel = __ffs((status & mc->soc->ch_intmask) >>
>> mc->soc->status_reg_chan_shift);
>>
>> return 0;
>> }
>>
>> Correct?
>
> Yes
>
>>>> Also we want to handle interrupts on one channel at a time and then
>>>> clear it from status register. There can be interrupts on multiple
>>>> channel. So multiple bits from status will be set. Hence it will be
>>>> hard to parameterize shift such that it gives appropriate channel.
>>>> So I think current approach is fine. Please correct me if I am wrong
>>>> somewhere.
>>>
>>> You may do the following:
>>>
>>> 1. find the first channel bit set in the status reg
>>> 2. handle that channel
>>> 3. clear only the handled status bit, don't clear the other bits
>>> 4. return from interrupt
>>>
>>> If there are other bits set, then interrupt handler will fire again and
>>> next channel will be handled.
>>
>> For clearing status bit after handling, we can retrieve channel bit by
>> something like this:
>>
>> ch_bit = BIT(*mc_channel) << mc->soc->status_reg_chan_shift;
>>
>> Correct?
>
> Yes
>
> Perhaps using FIELD_PREP() and alike helpers could make it look nice in
> the code.

I tried using FIELD_PREP() and FIELD_GET() for our use-case but
compilation is failing because these macros require the mask to be
compile time constant and our mask "mc->soc->ch_intmask" cannot qualify
to be compile time constant.