2019-03-11 18:03:26

by Sowjanya Komatineni

[permalink] [raw]
Subject: [PATCH V2 01/10] mmc: tegra: fix ddr signaling for non-ddr modes

ddr_signaling is set to true for DDR50 and DDR52 modes but is
not set back to false for other modes. This programs incorrect
host clock when mode change happens from DDR52/DDR50 to other
SDR or HS modes like incase of mmc_retune where it switches
from HS400 to HS DDR and then from HS DDR to HS mode and then
to HS200.

This patch fixes the ddr_signaling to set properly for non DDR
modes.

Tested-by: Jon Hunter <[email protected]>
Acked-by: Adrian Hunter <[email protected]>
Signed-off-by: Sowjanya Komatineni <[email protected]>
---
drivers/mmc/host/sdhci-tegra.c | 1 +
1 file changed, 1 insertion(+)

diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c
index 32e62904c0d3..46086dd43bfb 100644
--- a/drivers/mmc/host/sdhci-tegra.c
+++ b/drivers/mmc/host/sdhci-tegra.c
@@ -779,6 +779,7 @@ static void tegra_sdhci_set_uhs_signaling(struct sdhci_host *host,
bool set_dqs_trim = false;
bool do_hs400_dll_cal = false;

+ tegra_host->ddr_signaling = false;
switch (timing) {
case MMC_TIMING_UHS_SDR50:
case MMC_TIMING_UHS_SDR104:
--
2.7.4



2019-03-11 18:03:40

by Sowjanya Komatineni

[permalink] [raw]
Subject: [PATCH V2 10/10] arm64: tegra: enable command queue for tegra186 sdmmc4

This patch enables command queue support for Tegra186 SDMMC4.

Tested-by: Jon Hunter <[email protected]>
Signed-off-by: Sowjanya Komatineni <[email protected]>
---
arch/arm64/boot/dts/nvidia/tegra186.dtsi | 1 +
1 file changed, 1 insertion(+)

diff --git a/arch/arm64/boot/dts/nvidia/tegra186.dtsi b/arch/arm64/boot/dts/nvidia/tegra186.dtsi
index 472f55fe9488..6e2b6ce99df2 100644
--- a/arch/arm64/boot/dts/nvidia/tegra186.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra186.dtsi
@@ -321,6 +321,7 @@
nvidia,default-trim = <0x5>;
nvidia,dqs-trim = <63>;
mmc-hs400-1_8v;
+ supports-cqe;
status = "disabled";
};

--
2.7.4


2019-03-11 18:03:45

by Sowjanya Komatineni

[permalink] [raw]
Subject: [PATCH V2 09/10] mmc: tegra: fix CQE enable and resume sequences

Tegra CQHCI/SDHCI design prevents write access to SDHCI block size
register when CQE is enabled and unhalted.

CQHCI driver enables CQE prior to invoking sdhci_cqe_enable which
violates this Tegra specific host requirement.

This patch fixes this by configuring sdhci block registers prior
to CQE unhalt.

This patch also has a fix for retry of unhalt due to known Tegra
specific CQE resume bug where first unhalt might not succeed when
clear all tasks is performed prior to resume and need a second unhalt.

This patch also includes CQE enable fix for CMD CRC errors that
happen with the specific sandisk emmc device when status command
is sent during the transfer of last data block due to marginal timing.

Tested-by: Jon Hunter <[email protected]>
Acked-by: Adrian Hunter <[email protected]>
Signed-off-by: Sowjanya Komatineni <[email protected]>
---
drivers/mmc/host/sdhci-tegra.c | 72 ++++++++++++++++++++++++++++++++++++------
1 file changed, 62 insertions(+), 10 deletions(-)

diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c
index 1ac0ca37ce95..a1655990af7a 100644
--- a/drivers/mmc/host/sdhci-tegra.c
+++ b/drivers/mmc/host/sdhci-tegra.c
@@ -1124,6 +1124,43 @@ static void tegra_sdhci_voltage_switch(struct sdhci_host *host)
tegra_host->pad_calib_required = true;
}

+static void tegra_cqhci_writel(struct cqhci_host *cq_host, u32 val, int reg)
+{
+ struct mmc_host *mmc = cq_host->mmc;
+ u8 ctrl;
+ ktime_t timeout;
+ bool timed_out;
+
+ /*
+ * During CQE resume/unhalt, CQHCI driver unhalts CQE prior to
+ * cqhci_host_ops enable where SDHCI DMA and BLOCK_SIZE registers need
+ * to be re-configured.
+ * Tegra CQHCI/SDHCI prevents write access to block size register when
+ * CQE is unhalted. So handling CQE resume sequence here to configure
+ * SDHCI block registers prior to exiting CQE halt state.
+ */
+ if (reg == CQHCI_CTL && !(val & CQHCI_HALT) &&
+ cqhci_readl(cq_host, CQHCI_CTL) & CQHCI_HALT) {
+ sdhci_cqe_enable(mmc);
+ writel(val, cq_host->mmio + reg);
+ timeout = ktime_add_us(ktime_get(), 50);
+ while (1) {
+ timed_out = ktime_compare(ktime_get(), timeout) > 0;
+ ctrl = cqhci_readl(cq_host, CQHCI_CTL);
+ if (!(ctrl & CQHCI_HALT) || timed_out)
+ break;
+ }
+ /*
+ * CQE usually resumes very quick, but incase if Tegra CQE
+ * doesn't resume retry unhalt.
+ */
+ if (timed_out)
+ writel(val, cq_host->mmio + reg);
+ } else {
+ writel(val, cq_host->mmio + reg);
+ }
+}
+
static u8 sdhci_tegra_cqe_dcmd_cmd_timing(struct mmc_host *mmc,
struct mmc_request *mrq)
{
@@ -1142,20 +1179,34 @@ static u8 sdhci_tegra_cqe_dcmd_cmd_timing(struct mmc_host *mmc,
static void sdhci_tegra_cqe_enable(struct mmc_host *mmc)
{
struct cqhci_host *cq_host = mmc->cqe_private;
- u32 cqcfg = 0;
+ u32 val;

/*
- * Tegra SDMMC Controller design prevents write access to BLOCK_COUNT
- * registers when CQE is enabled.
+ * Tegra CQHCI/SDMMC design prevents write access to sdhci block size
+ * register when CQE is enabled and unhalted.
+ * CQHCI driver enables CQE prior to activation, so disable CQE before
+ * programming block size in sdhci controller and enable it back.
*/
- cqcfg = cqhci_readl(cq_host, CQHCI_CFG);
- if (cqcfg & CQHCI_ENABLE)
- cqhci_writel(cq_host, (cqcfg & ~CQHCI_ENABLE), CQHCI_CFG);
-
- sdhci_cqe_enable(mmc);
+ if (!cq_host->activated) {
+ val = cqhci_readl(cq_host, CQHCI_CFG);
+ if (val & CQHCI_ENABLE)
+ cqhci_writel(cq_host, (val & ~CQHCI_ENABLE),
+ CQHCI_CFG);
+ sdhci_cqe_enable(mmc);
+ if (val & CQHCI_ENABLE)
+ cqhci_writel(cq_host, val, CQHCI_CFG);
+ }

- if (cqcfg & CQHCI_ENABLE)
- cqhci_writel(cq_host, cqcfg, CQHCI_CFG);
+ /*
+ * CMD CRC errors are seen sometimes with some eMMC devices when status
+ * command is sent during transfer of last data block which is the
+ * default case as send status command block counter (CBC) is 1.
+ * Recommended fix to set CBC to 0 allowing send status command only
+ * when data lines are idle.
+ */
+ val = cqhci_readl(cq_host, CQHCI_SSC1);
+ val &= ~CQHCI_SSC1_CBC_MASK;
+ cqhci_writel(cq_host, val, CQHCI_SSC1);
}

static void sdhci_tegra_dumpregs(struct mmc_host *mmc)
@@ -1177,6 +1228,7 @@ static u32 sdhci_tegra_cqhci_irq(struct sdhci_host *host, u32 intmask)
}

static const struct cqhci_host_ops sdhci_tegra_cqhci_ops = {
+ .write_l = tegra_cqhci_writel,
.enable = sdhci_tegra_cqe_enable,
.disable = sdhci_cqe_disable,
.dumpregs = sdhci_tegra_dumpregs,
--
2.7.4


2019-03-11 18:03:52

by Sowjanya Komatineni

[permalink] [raw]
Subject: [PATCH V2 07/10] mmc: tegra: add Tegra186 WAR for CQE

Tegra186 CQHCI host has a known bug where CQHCI controller selects
DATA_PRESENT_SELECT bit to 1 for DCMDs with R1B response type and
since DCMD does not trigger any data transfer, DCMD task complete
happens leaving the DATA FSM of host controller in wait state for
the data.

This effects the data transfer tasks issued after the DCMDs with
R1b response type resulting in timeout.

SW WAR is to set CMD_TIMING to 1 in DCMD task descriptor. This bug
and SW WAR is applicable only for Tegra186 and not for Tegra194.

This patch implements this WAR thru NVQUIRK_CQHCI_DCMD_R1B_CMD_TIMING
for Tegra186 and also implements get_dcmd_cmd_timing cqhci_host_ops
interface to specify the CMD_TIMING bit depending on the NVQUIRK.

Tested-by: Jon Hunter <[email protected]>
Signed-off-by: Sowjanya Komatineni <[email protected]>
---
drivers/mmc/host/sdhci-tegra.c | 20 +++++++++++++++++++-
1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c
index f1aa0591112a..1ac0ca37ce95 100644
--- a/drivers/mmc/host/sdhci-tegra.c
+++ b/drivers/mmc/host/sdhci-tegra.c
@@ -106,6 +106,7 @@
#define NVQUIRK_HAS_PADCALIB BIT(6)
#define NVQUIRK_NEEDS_PAD_CONTROL BIT(7)
#define NVQUIRK_DIS_CARD_CLK_CONFIG_TAP BIT(8)
+#define NVQUIRK_CQHCI_DCMD_R1B_CMD_TIMING BIT(9)

/* SDMMC CQE Base Address for Tegra Host Ver 4.1 and Higher */
#define SDHCI_TEGRA_CQE_BASE_ADDR 0xF000
@@ -1123,6 +1124,21 @@ static void tegra_sdhci_voltage_switch(struct sdhci_host *host)
tegra_host->pad_calib_required = true;
}

+static u8 sdhci_tegra_cqe_dcmd_cmd_timing(struct mmc_host *mmc,
+ struct mmc_request *mrq)
+{
+ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(mmc_priv(mmc));
+ struct sdhci_tegra *tegra_host = sdhci_pltfm_priv(pltfm_host);
+ const struct sdhci_tegra_soc_data *soc_data = tegra_host->soc_data;
+
+ if (soc_data->nvquirks & NVQUIRK_CQHCI_DCMD_R1B_CMD_TIMING)
+ return 1;
+ else if (mrq->cmd->flags & MMC_RSP_R1B)
+ return 0;
+ else
+ return 1;
+}
+
static void sdhci_tegra_cqe_enable(struct mmc_host *mmc)
{
struct cqhci_host *cq_host = mmc->cqe_private;
@@ -1164,6 +1180,7 @@ static const struct cqhci_host_ops sdhci_tegra_cqhci_ops = {
.enable = sdhci_tegra_cqe_enable,
.disable = sdhci_cqe_disable,
.dumpregs = sdhci_tegra_dumpregs,
+ .get_dcmd_cmd_timing = sdhci_tegra_cqe_dcmd_cmd_timing,
};

static const struct sdhci_ops tegra_sdhci_ops = {
@@ -1345,7 +1362,8 @@ static const struct sdhci_tegra_soc_data soc_data_tegra186 = {
NVQUIRK_HAS_PADCALIB |
NVQUIRK_DIS_CARD_CLK_CONFIG_TAP |
NVQUIRK_ENABLE_SDR50 |
- NVQUIRK_ENABLE_SDR104,
+ NVQUIRK_ENABLE_SDR104 |
+ NVQUIRK_CQHCI_DCMD_R1B_CMD_TIMING,
.min_tap_delay = 84,
.max_tap_delay = 136,
};
--
2.7.4


2019-03-11 18:03:58

by Sowjanya Komatineni

[permalink] [raw]
Subject: [PATCH V2 06/10] mmc: cqhci: allow hosts to specify dcmd cmd timing parameter

This patch adds get_dcmd_cmd_timing interface to cqhci_host_ops to
allow hosts to specify CMD_TIMING bit of the DCMD task descriptor
command parameter.

This helps host driver to control whether the command can be issued
during data transfer or only during idle time.

Tested-by: Jon Hunter <[email protected]>
Signed-off-by: Sowjanya Komatineni <[email protected]>
---
drivers/mmc/host/cqhci.c | 2 ++
drivers/mmc/host/cqhci.h | 2 ++
2 files changed, 4 insertions(+)

diff --git a/drivers/mmc/host/cqhci.c b/drivers/mmc/host/cqhci.c
index a8af682a9182..9a02f9c82aeb 100644
--- a/drivers/mmc/host/cqhci.c
+++ b/drivers/mmc/host/cqhci.c
@@ -528,6 +528,8 @@ static void cqhci_prep_dcmd_desc(struct mmc_host *mmc,
}
}

+ if (cq_host->ops->get_dcmd_cmd_timing)
+ timing = cq_host->ops->get_dcmd_cmd_timing(mmc, mrq);
task_desc = (__le64 __force *)get_desc(cq_host, cq_host->dcmd_slot);
memset(task_desc, 0, cq_host->task_desc_len);
data |= (CQHCI_VALID(1) |
diff --git a/drivers/mmc/host/cqhci.h b/drivers/mmc/host/cqhci.h
index 9e68286a07b4..981158da3326 100644
--- a/drivers/mmc/host/cqhci.h
+++ b/drivers/mmc/host/cqhci.h
@@ -210,6 +210,8 @@ struct cqhci_host_ops {
u32 (*read_l)(struct cqhci_host *host, int reg);
void (*enable)(struct mmc_host *mmc);
void (*disable)(struct mmc_host *mmc, bool recovery);
+ u8 (*get_dcmd_cmd_timing)(struct mmc_host *mmc,
+ struct mmc_request *mrq);
};

static inline void cqhci_writel(struct cqhci_host *host, u32 val, int reg)
--
2.7.4


2019-03-11 18:04:05

by Sowjanya Komatineni

[permalink] [raw]
Subject: [PATCH V2 04/10] dt-bindings: mmc: tegra: document Tegra194 compatible string

SDHCI controller of Tegra194 is similar to SDHCI controller in Tegra186.
This patch documents Tegra194 sdhci compatible string.

Signed-off-by: Sowjanya Komatineni <[email protected]>
---
Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.txt | 1 +
1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.txt b/Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.txt
index 2cecdc71d94c..2cf3affa1be7 100644
--- a/Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.txt
+++ b/Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.txt
@@ -14,6 +14,7 @@ Required properties:
- "nvidia,tegra124-sdhci": for Tegra124 and Tegra132
- "nvidia,tegra210-sdhci": for Tegra210
- "nvidia,tegra186-sdhci": for Tegra186
+ - "nvidia,tegra194-sdhci": for Tegra194
- clocks : Must contain one entry, for the module clock.
See ../clocks/clock-bindings.txt for details.
- resets : Must contain an entry for each entry in reset-names.
--
2.7.4


2019-03-11 18:04:24

by Sowjanya Komatineni

[permalink] [raw]
Subject: [PATCH V2 05/10] arm64: tegra: fix default tap and trim values

Default tap and trim values are incorrect for Tegra186 SDMMC4.
This patch fixes it.

Tested-by: Jon Hunter <[email protected]>
Signed-off-by: Sowjanya Komatineni <[email protected]>
---
arch/arm64/boot/dts/nvidia/tegra186.dtsi | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/boot/dts/nvidia/tegra186.dtsi b/arch/arm64/boot/dts/nvidia/tegra186.dtsi
index 97aeb946ed5e..472f55fe9488 100644
--- a/arch/arm64/boot/dts/nvidia/tegra186.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra186.dtsi
@@ -317,8 +317,8 @@
nvidia,pad-autocal-pull-down-offset-1v8-timeout = <0x0a>;
nvidia,pad-autocal-pull-up-offset-3v3-timeout = <0x0a>;
nvidia,pad-autocal-pull-down-offset-3v3-timeout = <0x0a>;
- nvidia,default-tap = <0x5>;
- nvidia,default-trim = <0x9>;
+ nvidia,default-tap = <0x9>;
+ nvidia,default-trim = <0x5>;
nvidia,dqs-trim = <63>;
mmc-hs400-1_8v;
status = "disabled";
--
2.7.4


2019-03-11 18:04:50

by Sowjanya Komatineni

[permalink] [raw]
Subject: [PATCH V2 02/10] mmc: sdhci: allow host to specify maximum tuning loops

As per the Host Controller Standard Specification Version 4.20,
limitation of tuning iteration count is removed as PLL locking
time can be longer than UHS-1 tuning due to larger PVT fluctuation
and it will result in increase of tuning iteration to complete the
tuning.

This patch creates sdhci_host member tuning_loop_count to allow
hosts to specify maximum tuning iterations and also updates
execute_tuning to use this specified maximum tuning iteration count.

Default tuning_loop_count is set to same as existing loop count of
MAX_TUNING_LOOP which is 40 iterations.

Tested-by: Jon Hunter <[email protected]>
Signed-off-by: Sowjanya Komatineni <[email protected]>
---
drivers/mmc/host/sdhci.c | 5 +++--
drivers/mmc/host/sdhci.h | 1 +
2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index a8141ff9be03..bbc0e0bb7128 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -2369,9 +2369,9 @@ static int __sdhci_execute_tuning(struct sdhci_host *host, u32 opcode)

/*
* Issue opcode repeatedly till Execute Tuning is set to 0 or the number
- * of loops reaches 40 times.
+ * of loops reaches tuning loop count.
*/
- for (i = 0; i < MAX_TUNING_LOOP; i++) {
+ for (i = 0; i < host->tuning_loop_count; i++) {
u16 ctrl;

sdhci_send_tuning(host, opcode);
@@ -3494,6 +3494,7 @@ struct sdhci_host *sdhci_alloc_host(struct device *dev,
host->cqe_err_ier = SDHCI_CQE_INT_ERR_MASK;

host->tuning_delay = -1;
+ host->tuning_loop_count = MAX_TUNING_LOOP;

host->sdma_boundary = SDHCI_DEFAULT_BOUNDARY_ARG;

diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 01002cba1359..57bb3e3dca89 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -596,6 +596,7 @@ struct sdhci_host {
#define SDHCI_TUNING_MODE_3 2
/* Delay (ms) between tuning commands */
int tuning_delay;
+ int tuning_loop_count;

/* Host SDMA buffer boundary. */
u32 sdma_boundary;
--
2.7.4


2019-03-11 18:05:11

by Sowjanya Komatineni

[permalink] [raw]
Subject: [PATCH V2 08/10] mmc: cqhci: add CQHCI_SSC1 register CBC field mask

This patch adds define for CBC field mask of the register
CQHCI_SSC1.

Tested-by: Jon Hunter <[email protected]>
Acked-by: Adrian Hunter <[email protected]>
Signed-off-by: Sowjanya Komatineni <[email protected]>
---
drivers/mmc/host/cqhci.h | 1 +
1 file changed, 1 insertion(+)

diff --git a/drivers/mmc/host/cqhci.h b/drivers/mmc/host/cqhci.h
index 981158da3326..9fb2bb638884 100644
--- a/drivers/mmc/host/cqhci.h
+++ b/drivers/mmc/host/cqhci.h
@@ -88,6 +88,7 @@

/* send status config 1 */
#define CQHCI_SSC1 0x40
+#define CQHCI_SSC1_CBC_MASK GENMASK(19, 16)

/* send status config 2 */
#define CQHCI_SSC2 0x44
--
2.7.4


2019-03-11 18:05:26

by Sowjanya Komatineni

[permalink] [raw]
Subject: [PATCH V2 03/10] mmc: tegra: update hw tuning process

This patch includes below HW tuning related fixes.
configures tuning parameters as per Tegra TRM
WAR fix for manual tap change
HW auto-tuning post process

As per Tegra TRM, SDR50 mode tuning execution takes upto maximum
of 256 tuning iterations and SDR104/HS200/HS400 modes tuning
execution takes upto maximum of 128 tuning iterations.

This patch programs tuning control register with maximum tuning
iterations needed based on the timing along with the start tap,
multiplier, and step size used by the HW tuning.

Tegra210 has a known issue of glitch on trimmer output when the
tap value is changed with the trimmer input clock running and the
WAR is to disable card clock before sending tuning command and
after sending tuning command wait for 1usec and issue SW reset
followed by enabling card clock.

This WAR is applicable when changing tap value manually as well.
Tegra SDHCI driver has this implemented correctly for manual tap
change but missing SW reset before enabling card clock during
sending tuning command.

Issuing SW reset during tuning command as a part of WAR and is
applicable in cases where tuning is performed with single step size
for more iterations. This patch includes this fix.

HW auto-tuning finds the best largest passing window and sets the
tap at the middle of the window. With some devices like sandisk
eMMC driving fast edges and due to high tap to tap delay in the
Tegra chipset, auto-tuning does not detect falling tap between the
valid windows resulting in a parital window or a merged window and
the best tap is set at the signal transition which is actually the
worst tap location.

Recommended SW solution is to detect if the best passing window
picked by the HW tuning is a partial or a merged window based on
min and max tap delays found from chip characterization across
PVT and perform tuning correction to pick the best tap.

This patch has implementation of this post HW tuning process for
the tegra hosts that support HW tuning through the callback function
tegra_sdhci_execute_hw_tuning and uses the tuned tap delay.

Tested-by: Jon Hunter <[email protected]>
Signed-off-by: Sowjanya Komatineni <[email protected]>
---
drivers/mmc/host/sdhci-tegra.c | 216 ++++++++++++++++++++++++++++++++++++++++-
1 file changed, 215 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c
index 46086dd43bfb..f1aa0591112a 100644
--- a/drivers/mmc/host/sdhci-tegra.c
+++ b/drivers/mmc/host/sdhci-tegra.c
@@ -66,6 +66,22 @@

#define SDHCI_VNDR_TUN_CTRL0_0 0x1c0
#define SDHCI_VNDR_TUN_CTRL0_TUN_HW_TAP 0x20000
+#define SDHCI_VNDR_TUN_CTRL0_START_TAP_VAL_MASK 0x03fc0000
+#define SDHCI_VNDR_TUN_CTRL0_START_TAP_VAL_SHIFT 18
+#define SDHCI_VNDR_TUN_CTRL0_MUL_M_MASK 0x00001fc0
+#define SDHCI_VNDR_TUN_CTRL0_MUL_M_SHIFT 6
+#define SDHCI_VNDR_TUN_CTRL0_TUN_ITER_MASK 0x000e000
+#define SDHCI_VNDR_TUN_CTRL0_TUN_ITER_SHIFT 13
+#define TRIES_128 2
+#define TRIES_256 4
+#define SDHCI_VNDR_TUN_CTRL0_TUN_WORD_SEL_MASK 0x7
+
+#define SDHCI_TEGRA_VNDR_TUN_CTRL1_0 0x1c4
+#define SDHCI_TEGRA_VNDR_TUN_STATUS0 0x1C8
+#define SDHCI_TEGRA_VNDR_TUN_STATUS1 0x1CC
+#define SDHCI_TEGRA_VNDR_TUN_STATUS1_TAP_MASK 0xFF
+#define SDHCI_TEGRA_VNDR_TUN_STATUS1_END_TAP_SHIFT 0x8
+#define TUNING_WORD_BIT_SIZE 32

#define SDHCI_TEGRA_AUTO_CAL_CONFIG 0x1e4
#define SDHCI_AUTO_CAL_START BIT(31)
@@ -97,6 +113,8 @@
struct sdhci_tegra_soc_data {
const struct sdhci_pltfm_data *pdata;
u32 nvquirks;
+ u8 min_tap_delay;
+ u8 max_tap_delay;
};

/* Magic pull up and pull down pad calibration offsets */
@@ -136,6 +154,8 @@ struct sdhci_tegra {
u32 default_trim;
u32 dqs_trim;
bool enable_hwcq;
+ unsigned long curr_clk_rate;
+ u8 tuned_tap_delay;
};

static u16 tegra_sdhci_readw(struct sdhci_host *host, int reg)
@@ -241,6 +261,7 @@ static void tegra210_sdhci_writew(struct sdhci_host *host, u16 val, int reg)

if (is_tuning_cmd) {
udelay(1);
+ sdhci_reset(host, SDHCI_RESET_CMD | SDHCI_RESET_DATA);
tegra_sdhci_configure_card_clk(host, clk_enabled);
}
}
@@ -722,6 +743,7 @@ static void tegra_sdhci_set_clock(struct sdhci_host *host, unsigned int clock)
*/
host_clk = tegra_host->ddr_signaling ? clock * 2 : clock;
clk_set_rate(pltfm_host->clk, host_clk);
+ tegra_host->curr_clk_rate = host_clk;
if (tegra_host->ddr_signaling)
host->max_clk = host_clk;
else
@@ -770,6 +792,159 @@ static void tegra_sdhci_hs400_dll_cal(struct sdhci_host *host)
"HS400 delay line calibration timed out\n");
}

+static void tegra_sdhci_tap_correction(struct sdhci_host *host, u8 thd_up,
+ u8 thd_low, u8 fixed_tap)
+{
+ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+ struct sdhci_tegra *tegra_host = sdhci_pltfm_priv(pltfm_host);
+ u32 val, tun_status;
+ u8 word, bit, edge1, tap, window;
+ bool tap_result;
+ bool start_fail = false;
+ bool start_pass = false;
+ bool end_pass = false;
+ bool first_fail = false;
+ bool first_pass = false;
+ u8 start_pass_tap = 0;
+ u8 end_pass_tap = 0;
+ u8 first_fail_tap = 0;
+ u8 first_pass_tap = 0;
+ u8 total_tuning_words = host->tuning_loop_count / TUNING_WORD_BIT_SIZE;
+
+ /*
+ * Read auto-tuned results and extract good valid passing window by
+ * filtering out un-wanted bubble/partial/merged windows.
+ */
+ for (word = 0; word < total_tuning_words; word++) {
+ val = sdhci_readl(host, SDHCI_VNDR_TUN_CTRL0_0);
+ val &= ~SDHCI_VNDR_TUN_CTRL0_TUN_WORD_SEL_MASK;
+ val |= word;
+ sdhci_writel(host, val, SDHCI_VNDR_TUN_CTRL0_0);
+ tun_status = sdhci_readl(host, SDHCI_TEGRA_VNDR_TUN_STATUS0);
+ bit = 0;
+ while (bit < TUNING_WORD_BIT_SIZE) {
+ tap = word * TUNING_WORD_BIT_SIZE + bit;
+ tap_result = tun_status & (1 << bit);
+ if (!tap_result && !start_fail) {
+ start_fail = true;
+ if (!first_fail) {
+ first_fail_tap = tap;
+ first_fail = true;
+ }
+
+ } else if (tap_result && start_fail && !start_pass) {
+ start_pass_tap = tap;
+ start_pass = true;
+ if (!first_pass) {
+ first_pass_tap = tap;
+ first_pass = true;
+ }
+
+ } else if (!tap_result && start_fail && start_pass &&
+ !end_pass) {
+ end_pass_tap = tap - 1;
+ end_pass = true;
+ } else if (tap_result && start_pass && start_fail &&
+ end_pass) {
+ window = end_pass_tap - start_pass_tap;
+ /* discard merged window and bubble window */
+ if (window >= thd_up || window < thd_low) {
+ start_pass_tap = tap;
+ end_pass = false;
+ } else {
+ /* set tap at middle of valid window */
+ tap = start_pass_tap + window / 2;
+ tegra_host->tuned_tap_delay = tap;
+ return;
+ }
+ }
+
+ bit++;
+ }
+ }
+
+ if (!first_fail) {
+ WARN_ON("no edge detected, continue with hw tuned delay.\n");
+ } else if (first_pass) {
+ /* set tap location at fixed tap relative to the first edge */
+ edge1 = first_fail_tap + (first_pass_tap - first_fail_tap) / 2;
+ if (edge1 - 1 > fixed_tap)
+ tegra_host->tuned_tap_delay = edge1 - fixed_tap;
+ else
+ tegra_host->tuned_tap_delay = edge1 + fixed_tap;
+ }
+}
+
+static void tegra_sdhci_post_tuning(struct sdhci_host *host)
+{
+ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+ struct sdhci_tegra *tegra_host = sdhci_pltfm_priv(pltfm_host);
+ const struct sdhci_tegra_soc_data *soc_data = tegra_host->soc_data;
+ u32 avg_tap_dly, val, min_tap_dly, max_tap_dly;
+ u8 fixed_tap, start_tap, end_tap, window_width;
+ u8 thdupper, thdlower;
+ u8 num_iter;
+ u32 clk_rate_mhz, period_ps, bestcase, worstcase;
+
+ /* retain HW tuned tap to use incase if no correction is needed */
+ val = sdhci_readl(host, SDHCI_TEGRA_VENDOR_CLOCK_CTRL);
+ tegra_host->tuned_tap_delay = (val & SDHCI_CLOCK_CTRL_TAP_MASK) >>
+ SDHCI_CLOCK_CTRL_TAP_SHIFT;
+ if (soc_data->min_tap_delay && soc_data->max_tap_delay) {
+ min_tap_dly = soc_data->min_tap_delay;
+ max_tap_dly = soc_data->max_tap_delay;
+ clk_rate_mhz = tegra_host->curr_clk_rate / USEC_PER_SEC;
+ period_ps = USEC_PER_SEC / clk_rate_mhz;
+ bestcase = period_ps / min_tap_dly;
+ worstcase = period_ps / max_tap_dly;
+ /*
+ * Upper and Lower bound thresholds used to detect merged and
+ * bubble windows
+ */
+ thdupper = (2 * worstcase + bestcase) / 2;
+ thdlower = worstcase / 4;
+ /*
+ * fixed tap is used when HW tuning result contains single edge
+ * and tap is set at fixed tap delay relative to the first edge
+ */
+ avg_tap_dly = (period_ps * 2) / (min_tap_dly + max_tap_dly);
+ fixed_tap = avg_tap_dly / 2;
+
+ val = sdhci_readl(host, SDHCI_TEGRA_VNDR_TUN_STATUS1);
+ start_tap = val & SDHCI_TEGRA_VNDR_TUN_STATUS1_TAP_MASK;
+ end_tap = (val >> SDHCI_TEGRA_VNDR_TUN_STATUS1_END_TAP_SHIFT) &
+ SDHCI_TEGRA_VNDR_TUN_STATUS1_TAP_MASK;
+ window_width = end_tap - start_tap;
+ num_iter = host->tuning_loop_count;
+ /*
+ * partial window includes edges of the tuning range.
+ * merged window includes more taps so window width is higher
+ * than upper threshold.
+ */
+ if (start_tap == 0 || (end_tap == (num_iter - 1)) ||
+ (end_tap == num_iter - 2) || window_width >= thdupper) {
+ pr_debug("%s: Apply tuning correction\n",
+ mmc_hostname(host->mmc));
+ tegra_sdhci_tap_correction(host, thdupper, thdlower,
+ fixed_tap);
+ }
+ }
+
+ tegra_sdhci_set_tap(host, tegra_host->tuned_tap_delay);
+}
+
+static int tegra_sdhci_execute_hw_tuning(struct mmc_host *mmc, u32 opcode)
+{
+ struct sdhci_host *host = mmc_priv(mmc);
+ int err;
+
+ err = sdhci_execute_tuning(mmc, opcode);
+ if (!err && !host->tuning_err)
+ tegra_sdhci_post_tuning(host);
+
+ return err;
+}
+
static void tegra_sdhci_set_uhs_signaling(struct sdhci_host *host,
unsigned timing)
{
@@ -778,17 +953,22 @@ static void tegra_sdhci_set_uhs_signaling(struct sdhci_host *host,
bool set_default_tap = false;
bool set_dqs_trim = false;
bool do_hs400_dll_cal = false;
+ u8 iter = TRIES_256;
+ u32 val;

tegra_host->ddr_signaling = false;
switch (timing) {
case MMC_TIMING_UHS_SDR50:
+ break;
case MMC_TIMING_UHS_SDR104:
case MMC_TIMING_MMC_HS200:
/* Don't set default tap on tunable modes. */
+ iter = TRIES_128;
break;
case MMC_TIMING_MMC_HS400:
set_dqs_trim = true;
do_hs400_dll_cal = true;
+ iter = TRIES_128;
break;
case MMC_TIMING_MMC_DDR52:
case MMC_TIMING_UHS_DDR50:
@@ -800,11 +980,25 @@ static void tegra_sdhci_set_uhs_signaling(struct sdhci_host *host,
break;
}

+ val = sdhci_readl(host, SDHCI_VNDR_TUN_CTRL0_0);
+ val &= ~(SDHCI_VNDR_TUN_CTRL0_TUN_ITER_MASK |
+ SDHCI_VNDR_TUN_CTRL0_START_TAP_VAL_MASK |
+ SDHCI_VNDR_TUN_CTRL0_MUL_M_MASK);
+ val |= (iter << SDHCI_VNDR_TUN_CTRL0_TUN_ITER_SHIFT |
+ 0 << SDHCI_VNDR_TUN_CTRL0_START_TAP_VAL_SHIFT |
+ 1 << SDHCI_VNDR_TUN_CTRL0_MUL_M_SHIFT);
+ sdhci_writel(host, val, SDHCI_VNDR_TUN_CTRL0_0);
+ sdhci_writel(host, 0, SDHCI_TEGRA_VNDR_TUN_CTRL1_0);
+
+ host->tuning_loop_count = (iter == TRIES_128) ? 128 : 256;
+
sdhci_set_uhs_signaling(host, timing);

tegra_sdhci_pad_autocalib(host);

- if (set_default_tap)
+ if (tegra_host->tuned_tap_delay && !set_default_tap)
+ tegra_sdhci_set_tap(host, tegra_host->tuned_tap_delay);
+ else
tegra_sdhci_set_tap(host, tegra_host->default_tap);

if (set_dqs_trim)
@@ -1110,6 +1304,8 @@ static const struct sdhci_tegra_soc_data soc_data_tegra210 = {
NVQUIRK_DIS_CARD_CLK_CONFIG_TAP |
NVQUIRK_ENABLE_SDR50 |
NVQUIRK_ENABLE_SDR104,
+ .min_tap_delay = 106,
+ .max_tap_delay = 185,
};

static const struct sdhci_ops tegra186_sdhci_ops = {
@@ -1150,9 +1346,23 @@ static const struct sdhci_tegra_soc_data soc_data_tegra186 = {
NVQUIRK_DIS_CARD_CLK_CONFIG_TAP |
NVQUIRK_ENABLE_SDR50 |
NVQUIRK_ENABLE_SDR104,
+ .min_tap_delay = 84,
+ .max_tap_delay = 136,
+};
+
+static const struct sdhci_tegra_soc_data soc_data_tegra194 = {
+ .pdata = &sdhci_tegra186_pdata,
+ .nvquirks = NVQUIRK_NEEDS_PAD_CONTROL |
+ NVQUIRK_HAS_PADCALIB |
+ NVQUIRK_DIS_CARD_CLK_CONFIG_TAP |
+ NVQUIRK_ENABLE_SDR50 |
+ NVQUIRK_ENABLE_SDR104,
+ .min_tap_delay = 96,
+ .max_tap_delay = 139,
};

static const struct of_device_id sdhci_tegra_dt_match[] = {
+ { .compatible = "nvidia,tegra194-sdhci", .data = &soc_data_tegra194 },
{ .compatible = "nvidia,tegra186-sdhci", .data = &soc_data_tegra186 },
{ .compatible = "nvidia,tegra210-sdhci", .data = &soc_data_tegra210 },
{ .compatible = "nvidia,tegra124-sdhci", .data = &soc_data_tegra124 },
@@ -1251,6 +1461,10 @@ static int sdhci_tegra_probe(struct platform_device *pdev)
host->mmc_host_ops.hs400_enhanced_strobe =
tegra_sdhci_hs400_enhanced_strobe;

+ if (!host->ops->platform_execute_tuning)
+ host->mmc_host_ops.execute_tuning =
+ tegra_sdhci_execute_hw_tuning;
+
rc = mmc_of_parse(host->mmc);
if (rc)
goto err_parse_dt;
--
2.7.4