2020-01-29 20:10:23

by Eddie James

[permalink] [raw]
Subject: [PATCH] spi: Add FSI-attached SPI controller driver

There exists a set of SPI controllers on some POWER processors that may
be accessed through the FSI bus. Add a driver to traverse the FSI CFAM
engine that can access and drive the SPI controllers. This driver would
typically be used by a baseboard management controller (BMC).

Signed-off-by: Eddie James <[email protected]>
---
MAINTAINERS | 6 +
drivers/spi/Kconfig | 7 +
drivers/spi/Makefile | 1 +
drivers/spi/spi-fsi.c | 547 ++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 561 insertions(+)
create mode 100644 drivers/spi/spi-fsi.c

diff --git a/MAINTAINERS b/MAINTAINERS
index fc1e24c..56a50c3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6862,6 +6862,12 @@ S: Maintained
F: drivers/i2c/busses/i2c-fsi.c
F: Documentation/devicetree/bindings/i2c/i2c-fsi.txt

+FSI-ATTACHED SPI DRIVER
+M: Eddie James <[email protected]>
+L: [email protected]
+S: Maintained
+F: drivers/spi/spi-fsi.c
+
FSNOTIFY: FILESYSTEM NOTIFICATION INFRASTRUCTURE
M: Jan Kara <[email protected]>
R: Amir Goldstein <[email protected]>
diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index d6ed0c3..c6fc9f1 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -264,6 +264,13 @@ config SPI_FALCON
has only been tested with m25p80 type chips. The hardware has no
support for other types of SPI peripherals.

+config SPI_FSI
+ tristate "FSI SPI driver"
+ depends on FSI
+ help
+ This enables support for the driver for FSI bus attached SPI
+ controllers.
+
config SPI_FSL_LPSPI
tristate "Freescale i.MX LPSPI controller"
depends on ARCH_MXC || COMPILE_TEST
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index 9b65ec5..d4ea24e 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -40,6 +40,7 @@ spi-dw-midpci-objs := spi-dw-pci.o spi-dw-mid.o
obj-$(CONFIG_SPI_EFM32) += spi-efm32.o
obj-$(CONFIG_SPI_EP93XX) += spi-ep93xx.o
obj-$(CONFIG_SPI_FALCON) += spi-falcon.o
+obj-$(CONFIG_SPI_FSI) += spi-fsi.o
obj-$(CONFIG_SPI_FSL_CPM) += spi-fsl-cpm.o
obj-$(CONFIG_SPI_FSL_DSPI) += spi-fsl-dspi.o
obj-$(CONFIG_SPI_FSL_LIB) += spi-fsl-lib.o
diff --git a/drivers/spi/spi-fsi.c b/drivers/spi/spi-fsi.c
new file mode 100644
index 0000000..4be0a0f
--- /dev/null
+++ b/drivers/spi/spi-fsi.c
@@ -0,0 +1,547 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) IBM Corporation 2020
+ */
+
+#include <linux/bitfield.h>
+#include <linux/bits.h>
+#include <linux/fsi.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/spi/spi.h>
+
+#define FSI_ENGID_SPI 0x23
+
+#define FSI2SPI_DATA0 0x00
+#define FSI2SPI_DATA1 0x04
+#define FSI2SPI_CMD 0x08
+#define FSI2SPI_CMD_WRITE BIT(31)
+#define FSI2SPI_RESET 0x18
+#define FSI2SPI_STATUS 0x1c
+#define FSI2SPI_STATUS_ANY_ERROR BIT(31)
+#define FSI2SPI_IRQ 0x20
+
+#define SPI_FSI_BASE 0x70000
+#define SPI_FSI_MAX_TRANSFER_SIZE 2048
+
+#define SPI_FSI_ERROR 0x0
+#define SPI_FSI_COUNTER_CFG 0x1
+#define SPI_FSI_COUNTER_CFG_LOOPS(x) (((u64)(x) & 0xffULL) << 32)
+#define SPI_FSI_CFG1 0x2
+#define SPI_FSI_CLOCK_CFG 0x3
+#define SPI_FSI_CLOCK_CFG_MM_ENABLE BIT_ULL(32)
+#define SPI_FSI_CLOCK_CFG_ECC_DISABLE (BIT_ULL(35) | BIT_ULL(33))
+#define SPI_FSI_CLOCK_CFG_RESET1 (BIT_ULL(36) | BIT_ULL(38))
+#define SPI_FSI_CLOCK_CFG_RESET2 (BIT_ULL(37) | BIT_ULL(39))
+#define SPI_FSI_CLOCK_CFG_MODE (BIT_ULL(41) | BIT_ULL(42))
+#define SPI_FSI_CLOCK_CFG_SCK_RECV_DEL GENMASK_ULL(51, 44)
+#define SPI_FSI_CLOCK_CFG_SCK_NO_DEL BIT_ULL(51)
+#define SPI_FSI_CLOCK_CFG_SCK_DIV GENMASK_ULL(63, 52)
+#define SPI_FSI_MMAP 0x4
+#define SPI_FSI_DATA_TX 0x5
+#define SPI_FSI_DATA_RX 0x6
+#define SPI_FSI_SEQUENCE 0x7
+#define SPI_FSI_SEQUENCE_STOP 0x00
+#define SPI_FSI_SEQUENCE_SEL_SLAVE(x) (0x10 | ((x) & 0xf))
+#define SPI_FSI_SEQUENCE_SHIFT_OUT(x) (0x30 | ((x) & 0xf))
+#define SPI_FSI_SEQUENCE_SHIFT_IN(x) (0x40 | ((x) & 0xf))
+#define SPI_FSI_SEQUENCE_COPY_DATA_TX 0xc0
+#define SPI_FSI_SEQUENCE_BRANCH(x) (0xe0 | ((x) & 0xf))
+#define SPI_FSI_STATUS 0x8
+#define SPI_FSI_STATUS_ERROR \
+ (GENMASK_ULL(31, 21) | GENMASK_ULL(15, 12))
+#define SPI_FSI_STATUS_SEQ_STATE GENMASK_ULL(55, 48)
+#define SPI_FSI_STATUS_SEQ_STATE_IDLE BIT_ULL(48)
+#define SPI_FSI_STATUS_TDR_UNDERRUN BIT_ULL(57)
+#define SPI_FSI_STATUS_TDR_OVERRUN BIT_ULL(58)
+#define SPI_FSI_STATUS_TDR_FULL BIT_ULL(59)
+#define SPI_FSI_STATUS_RDR_UNDERRUN BIT_ULL(61)
+#define SPI_FSI_STATUS_RDR_OVERRUN BIT_ULL(62)
+#define SPI_FSI_STATUS_RDR_FULL BIT_ULL(63)
+#define SPI_FSI_STATUS_ANY_ERROR \
+ (SPI_FSI_STATUS_ERROR | SPI_FSI_STATUS_TDR_UNDERRUN | \
+ SPI_FSI_STATUS_TDR_OVERRUN | SPI_FSI_STATUS_RDR_UNDERRUN | \
+ SPI_FSI_STATUS_RDR_OVERRUN)
+#define SPI_FSI_PORT_CTRL 0x9
+
+struct fsi_spi {
+ struct device *dev;
+ struct fsi_device *fsi;
+ u32 base;
+};
+
+struct fsi_spi_sequence {
+ int bit;
+ u64 data;
+};
+
+static int fsi_spi_check_status(struct fsi_spi *ctx)
+{
+ int rc;
+ u32 sts;
+ __be32 sts_be;
+
+ rc = fsi_device_read(ctx->fsi, FSI2SPI_STATUS, &sts_be,
+ sizeof(sts_be));
+ if (rc)
+ return rc;
+
+ sts = be32_to_cpu(sts_be);
+ if (sts & FSI2SPI_STATUS_ANY_ERROR) {
+ dev_err(ctx->dev, "Error with FSI2SPI interface: %08x.\n", sts);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int fsi_spi_read_reg(struct fsi_spi *ctx, u32 offset, u64 *value)
+{
+ int rc;
+ __be32 cmd_be;
+ __be32 data_be;
+
+ *value = 0ULL;
+
+ cmd_be = cpu_to_be32(offset + ctx->base);
+ rc = fsi_device_write(ctx->fsi, FSI2SPI_CMD, &cmd_be, sizeof(cmd_be));
+ if (rc)
+ return rc;
+
+ rc = fsi_spi_check_status(ctx);
+ if (rc)
+ return rc;
+
+ rc = fsi_device_read(ctx->fsi, FSI2SPI_DATA0, &data_be,
+ sizeof(data_be));
+ if (rc)
+ return rc;
+
+ *value |= (u64)be32_to_cpu(data_be) << 32;
+
+ rc = fsi_device_read(ctx->fsi, FSI2SPI_DATA1, &data_be,
+ sizeof(data_be));
+ if (rc)
+ return rc;
+
+ *value |= (u64)be32_to_cpu(data_be);
+ dev_dbg(ctx->dev, "Read %02x[%016llx].\n", offset, *value);
+
+ return 0;
+}
+
+static int fsi_spi_write_reg(struct fsi_spi *ctx, u32 offset, u64 value)
+{
+ int rc;
+ __be32 cmd_be;
+ __be32 data_be;
+
+ dev_dbg(ctx->dev, "Write %02x[%016llx].\n", offset, value);
+
+ data_be = cpu_to_be32((value >> 32) & 0xFFFFFFFF);
+ rc = fsi_device_write(ctx->fsi, FSI2SPI_DATA0, &data_be,
+ sizeof(data_be));
+ if (rc)
+ return rc;
+
+ data_be = cpu_to_be32(value & 0xFFFFFFFF);
+ rc = fsi_device_write(ctx->fsi, FSI2SPI_DATA1, &data_be,
+ sizeof(data_be));
+ if (rc)
+ return rc;
+
+ cmd_be = cpu_to_be32((offset + ctx->base) | FSI2SPI_CMD_WRITE);
+ rc = fsi_device_write(ctx->fsi, FSI2SPI_CMD, &cmd_be, sizeof(cmd_be));
+ if (rc)
+ return rc;
+
+ return fsi_spi_check_status(ctx);
+}
+
+static int fsi_spi_data_in(u64 in, u8 *rx, int len)
+{
+ int i;
+ int num_bytes = len > 8 ? 8 : len;
+
+ for (i = 0; i < num_bytes; ++i)
+ rx[i] = (u8)((in >> (8 * ((num_bytes - 1) - i))) & 0xffULL);
+
+ return num_bytes;
+}
+
+static int fsi_spi_data_out(u64 *out, const u8 *tx, int len)
+{
+ int i;
+ int num_bytes = len > 8 ? 8 : len;
+
+ *out = 0ULL;
+
+ for (i = 0; i < num_bytes; ++i)
+ *out |= (u64)tx[i] << (8 * (8 - (i + 1)));
+
+ return num_bytes;
+}
+
+static int fsi_spi_reset(struct fsi_spi *ctx)
+{
+ int rc;
+
+ dev_info(ctx->dev, "Resetting SPI controller.\n");
+
+ rc = fsi_spi_write_reg(ctx, SPI_FSI_CLOCK_CFG,
+ SPI_FSI_CLOCK_CFG_RESET1);
+ if (rc)
+ return rc;
+
+ rc = fsi_spi_write_reg(ctx, SPI_FSI_CLOCK_CFG,
+ SPI_FSI_CLOCK_CFG_RESET2);
+ return rc;
+}
+
+static int fsi_spi_sequence_add(struct fsi_spi_sequence *seq, u8 val)
+{
+ seq->data |= (u64)val << seq->bit;
+ seq->bit -= 8;
+
+ return ((64 - seq->bit) / 8) - 2;
+}
+
+static void fsi_spi_sequence_init(struct fsi_spi_sequence *seq)
+{
+ seq->bit = 56;
+ seq->data = 0ULL;
+}
+
+static int fsi_spi_sequence_transfer(struct fsi_spi *ctx,
+ struct fsi_spi_sequence *seq,
+ struct spi_transfer *transfer)
+{
+ int loops = 1;
+ int idx = 0;
+ int rc;
+ u8 len;
+ u8 rem = 0;
+
+ if (transfer->len > 8) {
+ loops = transfer->len / 8;
+ rem = transfer->len - (loops * 8);
+ len = 8;
+ } else {
+ len = transfer->len;
+ }
+
+ if (transfer->tx_buf) {
+ idx = fsi_spi_sequence_add(seq,
+ SPI_FSI_SEQUENCE_SHIFT_OUT(len));
+ if (rem)
+ rem = SPI_FSI_SEQUENCE_SHIFT_OUT(rem);
+ } else if (transfer->rx_buf) {
+ idx = fsi_spi_sequence_add(seq,
+ SPI_FSI_SEQUENCE_SHIFT_IN(len));
+ if (rem)
+ rem = SPI_FSI_SEQUENCE_SHIFT_IN(rem);
+ } else {
+ return -EINVAL;
+ }
+
+ if (loops > 1) {
+ fsi_spi_sequence_add(seq, SPI_FSI_SEQUENCE_BRANCH(idx));
+
+ if (rem)
+ fsi_spi_sequence_add(seq, rem);
+
+ rc = fsi_spi_write_reg(ctx, SPI_FSI_COUNTER_CFG,
+ SPI_FSI_COUNTER_CFG_LOOPS(loops - 1));
+ if (rc) {
+ /* Ensure error returns < 0 in this case. */
+ if (rc > 0)
+ rc = -rc;
+
+ return rc;
+ }
+
+ return loops;
+ }
+
+ return 0;
+}
+
+static int fsi_spi_transfer_data(struct fsi_spi *ctx,
+ struct spi_transfer *transfer)
+{
+ int rc = 0;
+ u64 status = 0ULL;
+
+ if (transfer->tx_buf) {
+ int nb;
+ int sent = 0;
+ u64 out = 0ULL;
+ const u8 *tx = transfer->tx_buf;
+
+ while (transfer->len > sent) {
+ nb = fsi_spi_data_out(&out, &tx[sent],
+ (int)transfer->len - sent);
+
+ rc = fsi_spi_write_reg(ctx, SPI_FSI_DATA_TX, out);
+ if (rc)
+ return rc;
+
+ do {
+ rc = fsi_spi_read_reg(ctx, SPI_FSI_STATUS,
+ &status);
+ if (rc)
+ return rc;
+
+ if (status & SPI_FSI_STATUS_ANY_ERROR) {
+ rc = fsi_spi_reset(ctx);
+ if (rc)
+ return rc;
+
+ return -EREMOTEIO;
+ }
+ } while (status & SPI_FSI_STATUS_TDR_FULL);
+
+ sent += nb;
+ }
+ } else if (transfer->rx_buf) {
+ int recv = 0;
+ u64 in = 0ULL;
+ u8 *rx = transfer->rx_buf;
+
+ while (transfer->len > recv) {
+ do {
+ rc = fsi_spi_read_reg(ctx, SPI_FSI_STATUS,
+ &status);
+ if (rc)
+ return rc;
+
+ if (status & SPI_FSI_STATUS_ANY_ERROR) {
+ rc = fsi_spi_reset(ctx);
+ if (rc)
+ return rc;
+
+ return -EREMOTEIO;
+ }
+ } while (!(status & SPI_FSI_STATUS_RDR_FULL));
+
+ rc = fsi_spi_read_reg(ctx, SPI_FSI_DATA_RX, &in);
+ if (rc)
+ return rc;
+
+ recv += fsi_spi_data_in(in, &rx[recv],
+ (int)transfer->len - recv);
+ }
+ }
+
+ return 0;
+}
+
+static int fsi_spi_transfer_init(struct fsi_spi *ctx)
+{
+ int rc;
+ u64 seq_state;
+ u64 clock_cfg = 0ULL;
+ u64 status = 0ULL;
+ u64 wanted_clock_cfg = SPI_FSI_CLOCK_CFG_ECC_DISABLE |
+ SPI_FSI_CLOCK_CFG_SCK_NO_DEL |
+ FIELD_PREP(SPI_FSI_CLOCK_CFG_SCK_DIV, 4);
+
+ do {
+ rc = fsi_spi_read_reg(ctx, SPI_FSI_STATUS, &status);
+ if (rc)
+ return rc;
+
+ if (status & (SPI_FSI_STATUS_ANY_ERROR |
+ SPI_FSI_STATUS_TDR_FULL |
+ SPI_FSI_STATUS_RDR_FULL)) {
+ rc = fsi_spi_reset(ctx);
+ if (rc)
+ return rc;
+
+ continue;
+ }
+
+ seq_state = status & SPI_FSI_STATUS_SEQ_STATE;
+ } while (seq_state && (seq_state != SPI_FSI_STATUS_SEQ_STATE_IDLE));
+
+ rc = fsi_spi_read_reg(ctx, SPI_FSI_CLOCK_CFG, &clock_cfg);
+ if (rc)
+ return rc;
+
+ if ((clock_cfg & (SPI_FSI_CLOCK_CFG_MM_ENABLE |
+ SPI_FSI_CLOCK_CFG_ECC_DISABLE |
+ SPI_FSI_CLOCK_CFG_MODE |
+ SPI_FSI_CLOCK_CFG_SCK_RECV_DEL |
+ SPI_FSI_CLOCK_CFG_SCK_DIV)) != wanted_clock_cfg)
+ rc = fsi_spi_write_reg(ctx, SPI_FSI_CLOCK_CFG,
+ wanted_clock_cfg);
+
+ return rc;
+}
+
+static int fsi_spi_transfer_one_message(struct spi_controller *ctlr,
+ struct spi_message *mesg)
+{
+ int rc = 0;
+ u8 seq_slave = SPI_FSI_SEQUENCE_SEL_SLAVE(mesg->spi->chip_select + 1);
+ struct spi_transfer *transfer;
+ struct fsi_spi *ctx = spi_controller_get_devdata(ctlr);
+
+ list_for_each_entry(transfer, &mesg->transfers, transfer_list) {
+ struct fsi_spi_sequence seq;
+ struct spi_transfer *next = NULL;
+
+ /* Sequencer must do shift out (tx) first. */
+ if (!transfer->tx_buf ||
+ transfer->len > SPI_FSI_MAX_TRANSFER_SIZE) {
+ rc = -EINVAL;
+ goto error;
+ }
+
+ dev_dbg(ctx->dev, "Start tx of %d bytes.\n", transfer->len);
+
+ rc = fsi_spi_transfer_init(ctx);
+ if (rc < 0)
+ goto error;
+
+ fsi_spi_sequence_init(&seq);
+ fsi_spi_sequence_add(&seq, seq_slave);
+
+ rc = fsi_spi_sequence_transfer(ctx, &seq, transfer);
+ if (rc < 0)
+ goto error;
+
+ if (!list_is_last(&transfer->transfer_list,
+ &mesg->transfers)) {
+ next = list_next_entry(transfer, transfer_list);
+
+ /* Sequencer can only do shift in (rx) after tx. */
+ if (next->rx_buf) {
+ if (next->len > SPI_FSI_MAX_TRANSFER_SIZE) {
+ rc = -EINVAL;
+ goto error;
+ }
+
+ dev_dbg(ctx->dev, "Sequence rx of %d bytes.\n",
+ next->len);
+
+ rc = fsi_spi_sequence_transfer(ctx, &seq,
+ next);
+ if (rc < 0)
+ goto error;
+ } else {
+ next = NULL;
+ }
+ }
+
+ fsi_spi_sequence_add(&seq, SPI_FSI_SEQUENCE_SEL_SLAVE(0));
+
+ rc = fsi_spi_write_reg(ctx, SPI_FSI_SEQUENCE, seq.data);
+ if (rc)
+ goto error;
+
+ rc = fsi_spi_transfer_data(ctx, transfer);
+ if (rc)
+ goto error;
+
+ if (next) {
+ rc = fsi_spi_transfer_data(ctx, next);
+ if (rc)
+ goto error;
+
+ transfer = next;
+ }
+ }
+
+error:
+ mesg->status = rc;
+ spi_finalize_current_message(ctlr);
+
+ return rc;
+}
+
+static size_t fsi_spi_max_transfer_size(struct spi_device *spi)
+{
+ return SPI_FSI_MAX_TRANSFER_SIZE;
+}
+
+static int fsi_spi_probe(struct device *dev)
+{
+ int rc;
+ u32 root_ctrl_8;
+ struct device_node *np;
+ int num_controllers_registered = 0;
+ struct fsi_device *fsi = to_fsi_dev(dev);
+
+ rc = fsi_slave_read(fsi->slave, 0x2860, &root_ctrl_8,
+ sizeof(root_ctrl_8));
+ if (rc)
+ return rc;
+
+ if (!root_ctrl_8) {
+ dev_dbg(dev, "SPI MUX not set, aborting probe.\n");
+ return -ENODEV;
+ }
+
+ for_each_available_child_of_node(dev->of_node, np) {
+ u32 base;
+ struct fsi_spi *ctx;
+ struct spi_controller *ctlr;
+
+ if (of_property_read_u32(np, "reg", &base))
+ continue;
+
+ ctlr = spi_alloc_master(dev, sizeof(*ctx));
+ if (!ctlr)
+ break;
+
+ ctlr->dev.of_node = np;
+ ctlr->num_chipselect = of_get_available_child_count(np) ?: 1;
+ ctlr->flags = SPI_CONTROLLER_HALF_DUPLEX;
+ ctlr->max_transfer_size = fsi_spi_max_transfer_size;
+ ctlr->transfer_one_message = fsi_spi_transfer_one_message;
+
+ ctx = spi_controller_get_devdata(ctlr);
+ ctx->dev = &ctlr->dev;
+ ctx->fsi = fsi;
+ ctx->base = base + SPI_FSI_BASE;
+
+ rc = devm_spi_register_controller(dev, ctlr);
+ if (rc)
+ spi_controller_put(ctlr);
+ else
+ num_controllers_registered++;
+ }
+
+ if (!num_controllers_registered)
+ return -ENODEV;
+
+ return 0;
+}
+
+static int fsi_spi_remove(struct device *dev)
+{
+ return 0;
+}
+
+static const struct fsi_device_id fsi_spi_ids[] = {
+ { FSI_ENGID_SPI, FSI_VERSION_ANY },
+ { }
+};
+
+static struct fsi_driver fsi_spi_driver = {
+ .id_table = fsi_spi_ids,
+ .drv = {
+ .name = "spi-fsi",
+ .bus = &fsi_bus_type,
+ .probe = fsi_spi_probe,
+ .remove = fsi_spi_remove,
+ },
+};
+
+module_fsi_driver(fsi_spi_driver);
+
+MODULE_AUTHOR("Eddie James <[email protected]>");
+MODULE_DESCRIPTION("FSI attached SPI controller");
+MODULE_LICENSE("GPL");
--
1.8.3.1


2020-01-30 14:47:36

by Mark Brown

[permalink] [raw]
Subject: Re: [PATCH] spi: Add FSI-attached SPI controller driver

On Wed, Jan 29, 2020 at 02:08:24PM -0600, Eddie James wrote:

Overall this looks good, some comments below but they're all fairly
minor.

> +++ b/drivers/spi/spi-fsi.c
> @@ -0,0 +1,547 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * Copyright (C) IBM Corporation 2020
> + */

Please make the entire comment a C++ one so things look more
intentional.

> +
> +static int fsi_spi_data_in(u64 in, u8 *rx, int len)
> +{
> + int i;
> + int num_bytes = len > 8 ? 8 : len;

Please write normal conditional statements to improve legibility, the
ternery operator isn't really needed here.

> +static int fsi_spi_reset(struct fsi_spi *ctx)
> +{
> + int rc;
> +
> + dev_info(ctx->dev, "Resetting SPI controller.\n");

This should be lowered to dev_dbg() at most, it's not really adding
anything otherwise.

> +static int fsi_spi_remove(struct device *dev)
> +{
> + return 0;
> +}

Remove empty functions, if they can safely be empty then it should be
possible to omit them.

> +static const struct fsi_device_id fsi_spi_ids[] = {
> + { FSI_ENGID_SPI, FSI_VERSION_ANY },
> + { }
> +};

This needs a MODULE_DEVICE_TABLE annotation.


Attachments:
(No filename) (1.15 kB)
signature.asc (499.00 B)
Download all attachments

2020-01-30 15:33:49

by Eddie James

[permalink] [raw]
Subject: Re: [PATCH] spi: Add FSI-attached SPI controller driver


On 1/30/20 8:46 AM, Mark Brown wrote:
> On Wed, Jan 29, 2020 at 02:08:24PM -0600, Eddie James wrote:
>
> Overall this looks good, some comments below but they're all fairly
> minor.


Thanks for the quick review! I'll fix what you've suggested below.

Thanks,

Eddie


>
>> +++ b/drivers/spi/spi-fsi.c
>> @@ -0,0 +1,547 @@
>> +// SPDX-License-Identifier: GPL-2.0-or-later
>> +/*
>> + * Copyright (C) IBM Corporation 2020
>> + */
> Please make the entire comment a C++ one so things look more
> intentional.
>
>> +
>> +static int fsi_spi_data_in(u64 in, u8 *rx, int len)
>> +{
>> + int i;
>> + int num_bytes = len > 8 ? 8 : len;
> Please write normal conditional statements to improve legibility, the
> ternery operator isn't really needed here.
>
>> +static int fsi_spi_reset(struct fsi_spi *ctx)
>> +{
>> + int rc;
>> +
>> + dev_info(ctx->dev, "Resetting SPI controller.\n");
> This should be lowered to dev_dbg() at most, it's not really adding
> anything otherwise.
>
>> +static int fsi_spi_remove(struct device *dev)
>> +{
>> + return 0;
>> +}
> Remove empty functions, if they can safely be empty then it should be
> possible to omit them.
>
>> +static const struct fsi_device_id fsi_spi_ids[] = {
>> + { FSI_ENGID_SPI, FSI_VERSION_ANY },
>> + { }
>> +};
> This needs a MODULE_DEVICE_TABLE annotation.

2020-01-30 16:40:07

by Andy Shevchenko

[permalink] [raw]
Subject: Re: [PATCH] spi: Add FSI-attached SPI controller driver

On Wed, Jan 29, 2020 at 10:09 PM Eddie James <[email protected]> wrote:
>
> There exists a set of SPI controllers on some POWER processors that may
> be accessed through the FSI bus. Add a driver to traverse the FSI CFAM
> engine that can access and drive the SPI controllers. This driver would
> typically be used by a baseboard management controller (BMC).

...

> +#include <linux/bitfield.h>
> +#include <linux/bits.h>

> +#include <linux/of.h>

...

> +struct fsi_spi {
> + struct device *dev;

Isn't fsl->dev the same?
Perhaps kernel doc to explain the difference?

> + struct fsi_device *fsi;

> + u32 base;
> +};

...

> +static int fsi_spi_read_reg(struct fsi_spi *ctx, u32 offset, u64 *value)
> +{
> + int rc;
> + __be32 cmd_be;
> + __be32 data_be;

> + *value = 0ULL;

Usually the pattern is don't pollute output on error condition. Any
reason why you zeroing output beforehand?

> + cmd_be = cpu_to_be32(offset + ctx->base);
> + rc = fsi_device_write(ctx->fsi, FSI2SPI_CMD, &cmd_be, sizeof(cmd_be));
> + if (rc)
> + return rc;

> + return 0;
> +}

...

> + data_be = cpu_to_be32((value >> 32) & 0xFFFFFFFF);

Redundant & 0xff... part.

> + data_be = cpu_to_be32(value & 0xFFFFFFFF);

Ditto.

You may use upper_32_bits() / lower_32_bits() instead.

...

> +static int fsi_spi_data_in(u64 in, u8 *rx, int len)
> +{
> + int i;

> + int num_bytes = len > 8 ? 8 : len;

min(len, 8);

> + for (i = 0; i < num_bytes; ++i)
> + rx[i] = (u8)((in >> (8 * ((num_bytes - 1) - i))) & 0xffULL);

Redundant & 0xffULL part.

Isn't it NIH of get_unalinged_be64 / le64 or something similar?

> + return num_bytes;
> +}

> +static int fsi_spi_data_out(u64 *out, const u8 *tx, int len)
> +{

Ditto as for above function. (put_unaligned ...)

> +}

...

> + dev_info(ctx->dev, "Resetting SPI controller.\n");

info?! Why?

> + rc = fsi_spi_write_reg(ctx, SPI_FSI_CLOCK_CFG,
> + SPI_FSI_CLOCK_CFG_RESET2);
> + return rc;

return fsi_spi_write_reg();

...

> + return ((64 - seq->bit) / 8) - 2;

Too many parentheses.

...

> +static int fsi_spi_sequence_transfer(struct fsi_spi *ctx,
> + struct fsi_spi_sequence *seq,
> + struct spi_transfer *transfer)
> +{

> + int loops = 1;
> + int idx = 0;
> + int rc;
> + u8 len;
> + u8 rem = 0;

> + if (transfer->len > 8) {
> + loops = transfer->len / 8;
> + rem = transfer->len - (loops * 8);
> + len = 8;
> + } else {
> + len = transfer->len;
> + }

len = min(transfer->len, 8);

loops = transfer->len / len;
rem = transfer->len % len;

(I think compiler is clever enough to find out that the division can be avoided)

...and drop assignments in definition block.

I didn't look carefully in the implementation, but I believe there is
still room for improvement / optimization.

> + if (loops > 1) {

> + rc = fsi_spi_write_reg(ctx, SPI_FSI_COUNTER_CFG,
> + SPI_FSI_COUNTER_CFG_LOOPS(loops - 1));
> + if (rc) {

> + /* Ensure error returns < 0 in this case. */

I didn't get why this case is special? Why not to be consistent with
return value?

> + if (rc > 0)
> + rc = -rc;
> +
> + return rc;
> + }

> + return loops;

If we return here the amount of loops...

> + }
> +
> + return 0;

...why here is 0?

I think more consistency is required.

> +}

...

> +static int fsi_spi_transfer_data(struct fsi_spi *ctx,
> + struct spi_transfer *transfer)
> +{

Can you refactor to tx and rx parts?

> + return 0;
> +}

...

> + do {
> + rc = fsi_spi_read_reg(ctx, SPI_FSI_STATUS, &status);
> + if (rc)
> + return rc;
> +
> + if (status & (SPI_FSI_STATUS_ANY_ERROR |
> + SPI_FSI_STATUS_TDR_FULL |
> + SPI_FSI_STATUS_RDR_FULL)) {
> + rc = fsi_spi_reset(ctx);
> + if (rc)
> + return rc;
> +

> + continue;

I forgot if this to be infinite loop or if it's going to check
previous seq_state value. In any case this code is a bit fishy. Needs
comments / refactoring.

> + }
> +
> + seq_state = status & SPI_FSI_STATUS_SEQ_STATE;
> + } while (seq_state && (seq_state != SPI_FSI_STATUS_SEQ_STATE_IDLE));

...

> + if ((clock_cfg & (SPI_FSI_CLOCK_CFG_MM_ENABLE |
> + SPI_FSI_CLOCK_CFG_ECC_DISABLE |
> + SPI_FSI_CLOCK_CFG_MODE |
> + SPI_FSI_CLOCK_CFG_SCK_RECV_DEL |
> + SPI_FSI_CLOCK_CFG_SCK_DIV)) != wanted_clock_cfg)

> + rc = fsi_spi_write_reg(ctx, SPI_FSI_CLOCK_CFG,
> + wanted_clock_cfg);

Missed {} ?

> +
> + return rc;
> +}

...

> + rc = fsi_slave_read(fsi->slave, 0x2860, &root_ctrl_8,

What is this magic for?

> + sizeof(root_ctrl_8));
> + if (rc)
> + return rc;

...

> +static int fsi_spi_remove(struct device *dev)
> +{
> + return 0;
> +}

Why do you need this?

...

> +static struct fsi_driver fsi_spi_driver = {
> + .id_table = fsi_spi_ids,
> + .drv = {
> + .name = "spi-fsi",

> + .bus = &fsi_bus_type,

Why is it not in the module_fsi_driver() macro?

> + .probe = fsi_spi_probe,
> + .remove = fsi_spi_remove,
> + },
> +};
> +
> +module_fsi_driver(fsi_spi_driver);

--
With Best Regards,
Andy Shevchenko

2020-02-03 20:34:28

by Eddie James

[permalink] [raw]
Subject: Re: [PATCH] spi: Add FSI-attached SPI controller driver


On 1/30/20 10:37 AM, Andy Shevchenko wrote:
> On Wed, Jan 29, 2020 at 10:09 PM Eddie James <[email protected]> wrote:
>> There exists a set of SPI controllers on some POWER processors that may
>> be accessed through the FSI bus. Add a driver to traverse the FSI CFAM
>> engine that can access and drive the SPI controllers. This driver would
>> typically be used by a baseboard management controller (BMC).
> ...
>
>> +#include <linux/bitfield.h>
>> +#include <linux/bits.h>
>> +#include <linux/of.h>
> ...
>
>> +struct fsi_spi {
>> + struct device *dev;
> Isn't fsl->dev the same?
> Perhaps kernel doc to explain the difference?


No, it's not the same, as dev here is the SPI controller. I'll add a
comment.


>
>> + struct fsi_device *fsi;
>> + u32 base;
>> +};
> ...
>
>> +static int fsi_spi_read_reg(struct fsi_spi *ctx, u32 offset, u64 *value)
>> +{
>> + int rc;
>> + __be32 cmd_be;
>> + __be32 data_be;
>> + *value = 0ULL;
> Usually the pattern is don't pollute output on error condition. Any
> reason why you zeroing output beforehand?


Well otherwise I have to store another 64 bit int and do another
assignment at the end. This is an internal function and all the users
below know what's happening.


>
>> + cmd_be = cpu_to_be32(offset + ctx->base);
>> + rc = fsi_device_write(ctx->fsi, FSI2SPI_CMD, &cmd_be, sizeof(cmd_be));
>> + if (rc)
>> + return rc;
>> + return 0;
>> +}
> ...
>
>> + data_be = cpu_to_be32((value >> 32) & 0xFFFFFFFF);
> Redundant & 0xff... part.
>
>> + data_be = cpu_to_be32(value & 0xFFFFFFFF);
> Ditto.
>
> You may use upper_32_bits() / lower_32_bits() instead.


OK, thanks.


>
> ...
>
>> +static int fsi_spi_data_in(u64 in, u8 *rx, int len)
>> +{
>> + int i;
>> + int num_bytes = len > 8 ? 8 : len;
> min(len, 8);


Sure.


>
>> + for (i = 0; i < num_bytes; ++i)
>> + rx[i] = (u8)((in >> (8 * ((num_bytes - 1) - i))) & 0xffULL);
> Redundant & 0xffULL part.
>
> Isn't it NIH of get_unalinged_be64 / le64 or something similar?


No, these are shift in/out operations. The read register will also have
previous operations data in them and must be extracted with only the
correct number of bytes.


>
>> + return num_bytes;
>> +}
>> +static int fsi_spi_data_out(u64 *out, const u8 *tx, int len)
>> +{
> Ditto as for above function. (put_unaligned ...)
>
>> +}
> ...
>
>> + dev_info(ctx->dev, "Resetting SPI controller.\n");
> info?! Why?
>
>> + rc = fsi_spi_write_reg(ctx, SPI_FSI_CLOCK_CFG,
>> + SPI_FSI_CLOCK_CFG_RESET2);
>> + return rc;
> return fsi_spi_write_reg();
>
> ...
>
>> + return ((64 - seq->bit) / 8) - 2;
> Too many parentheses.


I prefer using 2 extra characters to make it much clearer at a glance.


>
> ...
>
>> +static int fsi_spi_sequence_transfer(struct fsi_spi *ctx,
>> + struct fsi_spi_sequence *seq,
>> + struct spi_transfer *transfer)
>> +{
>> + int loops = 1;
>> + int idx = 0;
>> + int rc;
>> + u8 len;
>> + u8 rem = 0;
>> + if (transfer->len > 8) {
>> + loops = transfer->len / 8;
>> + rem = transfer->len - (loops * 8);
>> + len = 8;
>> + } else {
>> + len = transfer->len;
>> + }
> len = min(transfer->len, 8);
>
> loops = transfer->len / len;
> rem = transfer->len % len;


Sure.


>
> (I think compiler is clever enough to find out that the division can be avoided)
>
> ...and drop assignments in definition block.
>
> I didn't look carefully in the implementation, but I believe there is
> still room for improvement / optimization.
>
>> + if (loops > 1) {
>> + rc = fsi_spi_write_reg(ctx, SPI_FSI_COUNTER_CFG,
>> + SPI_FSI_COUNTER_CFG_LOOPS(loops - 1));
>> + if (rc) {
>> + /* Ensure error returns < 0 in this case. */
> I didn't get why this case is special? Why not to be consistent with
> return value?


Sure, will fix, this was leftover from some testing.


>
>> + if (rc > 0)
>> + rc = -rc;
>> +
>> + return rc;
>> + }
>> + return loops;
> If we return here the amount of loops...
>
>> + }
>> +
>> + return 0;
> ...why here is 0?
>
> I think more consistency is required.


Will refactor.


>
>> +}
> ...
>
>> +static int fsi_spi_transfer_data(struct fsi_spi *ctx,
>> + struct spi_transfer *transfer)
>> +{
> Can you refactor to tx and rx parts?


Why?


>
>> + return 0;
>> +}
> ...
>
>> + do {
>> + rc = fsi_spi_read_reg(ctx, SPI_FSI_STATUS, &status);
>> + if (rc)
>> + return rc;
>> +
>> + if (status & (SPI_FSI_STATUS_ANY_ERROR |
>> + SPI_FSI_STATUS_TDR_FULL |
>> + SPI_FSI_STATUS_RDR_FULL)) {
>> + rc = fsi_spi_reset(ctx);
>> + if (rc)
>> + return rc;
>> +
>> + continue;
> I forgot if this to be infinite loop or if it's going to check
> previous seq_state value. In any case this code is a bit fishy. Needs
> comments / refactoring.


I'll add a timeout.


>
>> + }
>> +
>> + seq_state = status & SPI_FSI_STATUS_SEQ_STATE;
>> + } while (seq_state && (seq_state != SPI_FSI_STATUS_SEQ_STATE_IDLE));
> ...
>
>> + if ((clock_cfg & (SPI_FSI_CLOCK_CFG_MM_ENABLE |
>> + SPI_FSI_CLOCK_CFG_ECC_DISABLE |
>> + SPI_FSI_CLOCK_CFG_MODE |
>> + SPI_FSI_CLOCK_CFG_SCK_RECV_DEL |
>> + SPI_FSI_CLOCK_CFG_SCK_DIV)) != wanted_clock_cfg)
>> + rc = fsi_spi_write_reg(ctx, SPI_FSI_CLOCK_CFG,
>> + wanted_clock_cfg);
> Missed {} ?


No? It's one line under the if.


>
>> +
>> + return rc;
>> +}
> ...
>
>> + rc = fsi_slave_read(fsi->slave, 0x2860, &root_ctrl_8,
> What is this magic for?


Added comment.


>
>> + sizeof(root_ctrl_8));
>> + if (rc)
>> + return rc;
> ...
>
>> +static int fsi_spi_remove(struct device *dev)
>> +{
>> + return 0;
>> +}
> Why do you need this?


Will drop it.


Thanks for the review!

Eddie


>
> ...
>
>> +static struct fsi_driver fsi_spi_driver = {
>> + .id_table = fsi_spi_ids,
>> + .drv = {
>> + .name = "spi-fsi",
>> + .bus = &fsi_bus_type,
> Why is it not in the module_fsi_driver() macro?
>
>> + .probe = fsi_spi_probe,
>> + .remove = fsi_spi_remove,
>> + },
>> +};
>> +
>> +module_fsi_driver(fsi_spi_driver);

2020-02-04 11:04:04

by Andy Shevchenko

[permalink] [raw]
Subject: Re: [PATCH] spi: Add FSI-attached SPI controller driver

On Mon, Feb 3, 2020 at 10:33 PM Eddie James <[email protected]> wrote:
> On 1/30/20 10:37 AM, Andy Shevchenko wrote:
> > On Wed, Jan 29, 2020 at 10:09 PM Eddie James <[email protected]> wrote:

...

> >> + struct device *dev;
> > Isn't fsl->dev the same?
> > Perhaps kernel doc to explain the difference?
>
>
> No, it's not the same, as dev here is the SPI controller. I'll add a
> comment.

Why to have duplication then?

> >> + struct fsi_device *fsi;

...

> >> + for (i = 0; i < num_bytes; ++i)
> >> + rx[i] = (u8)((in >> (8 * ((num_bytes - 1) - i))) & 0xffULL);
> > Redundant & 0xffULL part.
> >
> > Isn't it NIH of get_unalinged_be64 / le64 or something similar?
>
>
> No, these are shift in/out operations. The read register will also have
> previous operations data in them and must be extracted with only the
> correct number of bytes.

Why not to call put_unaligned() how the tail in this case (it's 0 or
can be easily made to be 0) will affect the result?

> >> + return num_bytes;
> >> +}

> >> +static int fsi_spi_data_out(u64 *out, const u8 *tx, int len)
> >> +{
> > Ditto as for above function. (put_unaligned ...)

Ditto.

> >> +}

...

> >> +static int fsi_spi_transfer_data(struct fsi_spi *ctx,
> >> + struct spi_transfer *transfer)
> >> +{
> > Can you refactor to tx and rx parts?
>
>
> Why?

It's way too long function to read. Indentation level also can improve
readability.
That's basically what refactoring is for.

> >> + return 0;
> >> +}

...

> >> + if ((clock_cfg & (SPI_FSI_CLOCK_CFG_MM_ENABLE |
> >> + SPI_FSI_CLOCK_CFG_ECC_DISABLE |
> >> + SPI_FSI_CLOCK_CFG_MODE |
> >> + SPI_FSI_CLOCK_CFG_SCK_RECV_DEL |
> >> + SPI_FSI_CLOCK_CFG_SCK_DIV)) != wanted_clock_cfg)
> >> + rc = fsi_spi_write_reg(ctx, SPI_FSI_CLOCK_CFG,
> >> + wanted_clock_cfg);
> > Missed {} ?
>
>
> No? It's one line under the if.

One statement, but *two* lines.
What does checkpatch.pl tell you about this?

--
With Best Regards,
Andy Shevchenko

2020-02-04 16:08:07

by Eddie James

[permalink] [raw]
Subject: Re: [PATCH] spi: Add FSI-attached SPI controller driver


On 2/4/20 5:02 AM, Andy Shevchenko wrote:
> On Mon, Feb 3, 2020 at 10:33 PM Eddie James <[email protected]> wrote:
>> On 1/30/20 10:37 AM, Andy Shevchenko wrote:
>>> On Wed, Jan 29, 2020 at 10:09 PM Eddie James <[email protected]> wrote:
> ...
>
>>>> + struct device *dev;
>>> Isn't fsl->dev the same?
>>> Perhaps kernel doc to explain the difference?
>>
>> No, it's not the same, as dev here is the SPI controller. I'll add a
>> comment.
> Why to have duplication then?


Nothing is being duplicated, the two variables are storing entirely
different information, both of which are necessary for each SPI
controller that this driver is driving.


>
>>>> + struct fsi_device *fsi;
> ...
>
>>>> + for (i = 0; i < num_bytes; ++i)
>>>> + rx[i] = (u8)((in >> (8 * ((num_bytes - 1) - i))) & 0xffULL);
>>> Redundant & 0xffULL part.
>>>
>>> Isn't it NIH of get_unalinged_be64 / le64 or something similar?
>>
>> No, these are shift in/out operations. The read register will also have
>> previous operations data in them and must be extracted with only the
>> correct number of bytes.
> Why not to call put_unaligned() how the tail in this case (it's 0 or
> can be easily made to be 0) will affect the result?


The shift-in is not the same as any byte-swap or unaligned operation.
For however many bytes we've read, we start at that many bytes
left-shifted in the register and copy out to our buffer, moving right
for each next byte... I don't think there is an existing function for
this operation.


>
>>>> + return num_bytes;
>>>> +}
>>>> +static int fsi_spi_data_out(u64 *out, const u8 *tx, int len)
>>>> +{
>>> Ditto as for above function. (put_unaligned ...)
> Ditto.


I don't understand how this could work for transfers of less than 8
bytes, any put_unaligned would access memory that it doesn't own.


>
>>>> +}
> ...
>
>>>> +static int fsi_spi_transfer_data(struct fsi_spi *ctx,
>>>> + struct spi_transfer *transfer)
>>>> +{
>>> Can you refactor to tx and rx parts?
>>
>> Why?
> It's way too long function to read. Indentation level also can improve
> readability.
> That's basically what refactoring is for.


The body is 65 lines, I don't think it is too long.

Since the function is used multiple times I think it makes more sense to
encapsulate the check for tx/rx in the function rather than split it and
have to check each time the functions are used.


>
>>>> + return 0;
>>>> +}
> ...
>
>>>> + if ((clock_cfg & (SPI_FSI_CLOCK_CFG_MM_ENABLE |
>>>> + SPI_FSI_CLOCK_CFG_ECC_DISABLE |
>>>> + SPI_FSI_CLOCK_CFG_MODE |
>>>> + SPI_FSI_CLOCK_CFG_SCK_RECV_DEL |
>>>> + SPI_FSI_CLOCK_CFG_SCK_DIV)) != wanted_clock_cfg)
>>>> + rc = fsi_spi_write_reg(ctx, SPI_FSI_CLOCK_CFG,
>>>> + wanted_clock_cfg);
>>> Missed {} ?
>>
>> No? It's one line under the if.
> One statement, but *two* lines.
> What does checkpatch.pl tell you about this?


Right.

checkpatch.pl says nothing about this, I think it meets the coding
standards as is.


Thanks for the review,

Eddie


>

2020-02-05 15:53:08

by Andy Shevchenko

[permalink] [raw]
Subject: Re: [PATCH] spi: Add FSI-attached SPI controller driver

On Tue, Feb 4, 2020 at 6:06 PM Eddie James <[email protected]> wrote:
> On 2/4/20 5:02 AM, Andy Shevchenko wrote:
> > On Mon, Feb 3, 2020 at 10:33 PM Eddie James <[email protected]> wrote:
> >> On 1/30/20 10:37 AM, Andy Shevchenko wrote:
> >>> On Wed, Jan 29, 2020 at 10:09 PM Eddie James <[email protected]> wrote:

...

> >>>> + struct device *dev;
> >>> Isn't fsl->dev the same?
> >>> Perhaps kernel doc to explain the difference?
> >>
> >> No, it's not the same, as dev here is the SPI controller. I'll add a
> >> comment.
> > Why to have duplication then?
>
>
> Nothing is being duplicated, the two variables are storing entirely
> different information, both of which are necessary for each SPI
> controller that this driver is driving.

Oh, I see now, thanks!

...

> >>>> + for (i = 0; i < num_bytes; ++i)
> >>>> + rx[i] = (u8)((in >> (8 * ((num_bytes - 1) - i))) & 0xffULL);
> >>> Redundant & 0xffULL part.
> >>>
> >>> Isn't it NIH of get_unalinged_be64 / le64 or something similar?
> >>
> >> No, these are shift in/out operations. The read register will also have
> >> previous operations data in them and must be extracted with only the
> >> correct number of bytes.
> > Why not to call put_unaligned() how the tail in this case (it's 0 or
> > can be easily made to be 0) will affect the result?
>
>
> The shift-in is not the same as any byte-swap or unaligned operation.
> For however many bytes we've read, we start at that many bytes
> left-shifted in the register and copy out to our buffer, moving right
> for each next byte... I don't think there is an existing function for
> this operation.

For me it looks like

u8 tmp[8];

put_unaligned_be64(in, tmp);
memcpy(rx, tmp, num_bytes);

put_unaligned*() is just a method to unroll the value to the u8 buffer.
See, for example, linux/unaligned/be_byteshift.h implementation.

> >>>> + return num_bytes;
> >>>> +}
> >>>> +static int fsi_spi_data_out(u64 *out, const u8 *tx, int len)
> >>>> +{
> >>> Ditto as for above function. (put_unaligned ...)
> > Ditto.
>
>
> I don't understand how this could work for transfers of less than 8
> bytes, any put_unaligned would access memory that it doesn't own.

Ditto.

> >>>> +}

--
With Best Regards,
Andy Shevchenko

2020-02-07 19:30:52

by Eddie James

[permalink] [raw]
Subject: Re: [PATCH] spi: Add FSI-attached SPI controller driver


On 2/5/20 9:51 AM, Andy Shevchenko wrote:
> On Tue, Feb 4, 2020 at 6:06 PM Eddie James <[email protected]> wrote:
>> On 2/4/20 5:02 AM, Andy Shevchenko wrote:
>>> On Mon, Feb 3, 2020 at 10:33 PM Eddie James <[email protected]> wrote:
>>>> On 1/30/20 10:37 AM, Andy Shevchenko wrote:
>>>>> On Wed, Jan 29, 2020 at 10:09 PM Eddie James <[email protected]> wrote:
> ...
>
>>>>>> + struct device *dev;
>>>>> Isn't fsl->dev the same?
>>>>> Perhaps kernel doc to explain the difference?
>>>> No, it's not the same, as dev here is the SPI controller. I'll add a
>>>> comment.
>>> Why to have duplication then?
>>
>> Nothing is being duplicated, the two variables are storing entirely
>> different information, both of which are necessary for each SPI
>> controller that this driver is driving.
> Oh, I see now, thanks!
>
> ...
>
>>>>>> + for (i = 0; i < num_bytes; ++i)
>>>>>> + rx[i] = (u8)((in >> (8 * ((num_bytes - 1) - i))) & 0xffULL);
>>>>> Redundant & 0xffULL part.
>>>>>
>>>>> Isn't it NIH of get_unalinged_be64 / le64 or something similar?
>>>> No, these are shift in/out operations. The read register will also have
>>>> previous operations data in them and must be extracted with only the
>>>> correct number of bytes.
>>> Why not to call put_unaligned() how the tail in this case (it's 0 or
>>> can be easily made to be 0) will affect the result?
>>
>> The shift-in is not the same as any byte-swap or unaligned operation.
>> For however many bytes we've read, we start at that many bytes
>> left-shifted in the register and copy out to our buffer, moving right
>> for each next byte... I don't think there is an existing function for
>> this operation.
> For me it looks like
>
> u8 tmp[8];
>
> put_unaligned_be64(in, tmp);
> memcpy(rx, tmp, num_bytes);
>
> put_unaligned*() is just a method to unroll the value to the u8 buffer.
> See, for example, linux/unaligned/be_byteshift.h implementation.


Unforunately it is not the same. put_unaligned_be64 will take the
highest 8 bits (0xff00000000000000) and move it into tmp[0]. Then
0x00ff000000000000 into tmp[1], etc. This is only correct for this
driver IF my transfer is 8 bytes. If, for example, I transfer 5 bytes,
then I need 0x000000ff00000000 into tmp[0], 0x00000000ff000000 into
tmp[1], etc. So I think my current implementation is correct.


Thanks,

Eddie


>
>>>>>> + return num_bytes;
>>>>>> +}
>>>>>> +static int fsi_spi_data_out(u64 *out, const u8 *tx, int len)
>>>>>> +{
>>>>> Ditto as for above function. (put_unaligned ...)
>>> Ditto.
>>
>> I don't understand how this could work for transfers of less than 8
>> bytes, any put_unaligned would access memory that it doesn't own.
> Ditto.
>
>>>>>> +}

2020-02-07 19:41:35

by Andy Shevchenko

[permalink] [raw]
Subject: Re: [PATCH] spi: Add FSI-attached SPI controller driver

On Fri, Feb 7, 2020 at 9:28 PM Eddie James <[email protected]> wrote:
> On 2/5/20 9:51 AM, Andy Shevchenko wrote:
> > On Tue, Feb 4, 2020 at 6:06 PM Eddie James <[email protected]> wrote:
> >> On 2/4/20 5:02 AM, Andy Shevchenko wrote:
> >>> On Mon, Feb 3, 2020 at 10:33 PM Eddie James <[email protected]> wrote:
> >>>> On 1/30/20 10:37 AM, Andy Shevchenko wrote:

...

> >>>>>> + for (i = 0; i < num_bytes; ++i)
> >>>>>> + rx[i] = (u8)((in >> (8 * ((num_bytes - 1) - i))) & 0xffULL);
> >>>>> Redundant & 0xffULL part.
> >>>>>
> >>>>> Isn't it NIH of get_unalinged_be64 / le64 or something similar?
> >>>> No, these are shift in/out operations. The read register will also have
> >>>> previous operations data in them and must be extracted with only the
> >>>> correct number of bytes.
> >>> Why not to call put_unaligned() how the tail in this case (it's 0 or
> >>> can be easily made to be 0) will affect the result?
> >>
> >> The shift-in is not the same as any byte-swap or unaligned operation.
> >> For however many bytes we've read, we start at that many bytes
> >> left-shifted in the register and copy out to our buffer, moving right
> >> for each next byte... I don't think there is an existing function for
> >> this operation.
> > For me it looks like
> >
> > u8 tmp[8];
> >
> > put_unaligned_be64(in, tmp);
> > memcpy(rx, tmp, num_bytes);
> >
> > put_unaligned*() is just a method to unroll the value to the u8 buffer.
> > See, for example, linux/unaligned/be_byteshift.h implementation.
>
>
> Unforunately it is not the same. put_unaligned_be64 will take the
> highest 8 bits (0xff00000000000000) and move it into tmp[0]. Then
> 0x00ff000000000000 into tmp[1], etc. This is only correct for this
> driver IF my transfer is 8 bytes. If, for example, I transfer 5 bytes,
> then I need 0x000000ff00000000 into tmp[0], 0x00000000ff000000 into
> tmp[1], etc. So I think my current implementation is correct.

Yes, I missed correction of the start address in memcpy(). Otherwise
it's still the same what I was talking about.

> >>>>>> + return num_bytes;
> >>>>>> +}
> >>>>>> +static int fsi_spi_data_out(u64 *out, const u8 *tx, int len)
> >>>>>> +{
> >>>>> Ditto as for above function. (put_unaligned ...)
> >>> Ditto.
> >>
> >> I don't understand how this could work for transfers of less than 8
> >> bytes, any put_unaligned would access memory that it doesn't own.
> > Ditto.
> >
> >>>>>> +}

--
With Best Regards,
Andy Shevchenko

2020-02-07 20:06:34

by Eddie James

[permalink] [raw]
Subject: Re: [PATCH] spi: Add FSI-attached SPI controller driver


On 2/7/20 1:39 PM, Andy Shevchenko wrote:
> On Fri, Feb 7, 2020 at 9:28 PM Eddie James <[email protected]> wrote:
>> On 2/5/20 9:51 AM, Andy Shevchenko wrote:
>>> On Tue, Feb 4, 2020 at 6:06 PM Eddie James <[email protected]> wrote:
>>>> On 2/4/20 5:02 AM, Andy Shevchenko wrote:
>>>>> On Mon, Feb 3, 2020 at 10:33 PM Eddie James <[email protected]> wrote:
>>>>>> On 1/30/20 10:37 AM, Andy Shevchenko wrote:
> ...
>
>>>>>>>> + for (i = 0; i < num_bytes; ++i)
>>>>>>>> + rx[i] = (u8)((in >> (8 * ((num_bytes - 1) - i))) & 0xffULL);
>>>>>>> Redundant & 0xffULL part.
>>>>>>>
>>>>>>> Isn't it NIH of get_unalinged_be64 / le64 or something similar?
>>>>>> No, these are shift in/out operations. The read register will also have
>>>>>> previous operations data in them and must be extracted with only the
>>>>>> correct number of bytes.
>>>>> Why not to call put_unaligned() how the tail in this case (it's 0 or
>>>>> can be easily made to be 0) will affect the result?
>>>> The shift-in is not the same as any byte-swap or unaligned operation.
>>>> For however many bytes we've read, we start at that many bytes
>>>> left-shifted in the register and copy out to our buffer, moving right
>>>> for each next byte... I don't think there is an existing function for
>>>> this operation.
>>> For me it looks like
>>>
>>> u8 tmp[8];
>>>
>>> put_unaligned_be64(in, tmp);
>>> memcpy(rx, tmp, num_bytes);
>>>
>>> put_unaligned*() is just a method to unroll the value to the u8 buffer.
>>> See, for example, linux/unaligned/be_byteshift.h implementation.
>>
>> Unforunately it is not the same. put_unaligned_be64 will take the
>> highest 8 bits (0xff00000000000000) and move it into tmp[0]. Then
>> 0x00ff000000000000 into tmp[1], etc. This is only correct for this
>> driver IF my transfer is 8 bytes. If, for example, I transfer 5 bytes,
>> then I need 0x000000ff00000000 into tmp[0], 0x00000000ff000000 into
>> tmp[1], etc. So I think my current implementation is correct.
> Yes, I missed correction of the start address in memcpy(). Otherwise
> it's still the same what I was talking about.


I see now, yes, thanks.

Do you think this is worth a v3? Perhaps put_unaligned is slightly more
optimized than the loop but there is more memory copy with that way too.

Eddie


>
>>>>>>>> + return num_bytes;
>>>>>>>> +}
>>>>>>>> +static int fsi_spi_data_out(u64 *out, const u8 *tx, int len)
>>>>>>>> +{
>>>>>>> Ditto as for above function. (put_unaligned ...)
>>>>> Ditto.
>>>> I don't understand how this could work for transfers of less than 8
>>>> bytes, any put_unaligned would access memory that it doesn't own.
>>> Ditto.
>>>
>>>>>>>> +}

2020-02-07 20:36:49

by Andy Shevchenko

[permalink] [raw]
Subject: Re: [PATCH] spi: Add FSI-attached SPI controller driver

On Fri, Feb 7, 2020 at 10:04 PM Eddie James <[email protected]> wrote:
> On 2/7/20 1:39 PM, Andy Shevchenko wrote:
> > On Fri, Feb 7, 2020 at 9:28 PM Eddie James <[email protected]> wrote:
> >> On 2/5/20 9:51 AM, Andy Shevchenko wrote:
> >>> On Tue, Feb 4, 2020 at 6:06 PM Eddie James <[email protected]> wrote:
> >>>> On 2/4/20 5:02 AM, Andy Shevchenko wrote:
> >>>>> On Mon, Feb 3, 2020 at 10:33 PM Eddie James <[email protected]> wrote:
> >>>>>> On 1/30/20 10:37 AM, Andy Shevchenko wrote:


> >>>>>>>> + for (i = 0; i < num_bytes; ++i)
> >>>>>>>> + rx[i] = (u8)((in >> (8 * ((num_bytes - 1) - i))) & 0xffULL);
> >>>>>>> Redundant & 0xffULL part.

> >>> For me it looks like
> >>>
> >>> u8 tmp[8];
> >>>
> >>> put_unaligned_be64(in, tmp);
> >>> memcpy(rx, tmp, num_bytes);
> >>>
> >>> put_unaligned*() is just a method to unroll the value to the u8 buffer.
> >>> See, for example, linux/unaligned/be_byteshift.h implementation.
> >>
> >> Unforunately it is not the same. put_unaligned_be64 will take the
> >> highest 8 bits (0xff00000000000000) and move it into tmp[0]. Then
> >> 0x00ff000000000000 into tmp[1], etc. This is only correct for this
> >> driver IF my transfer is 8 bytes. If, for example, I transfer 5 bytes,
> >> then I need 0x000000ff00000000 into tmp[0], 0x00000000ff000000 into
> >> tmp[1], etc. So I think my current implementation is correct.
> > Yes, I missed correction of the start address in memcpy(). Otherwise
> > it's still the same what I was talking about.
>
>
> I see now, yes, thanks.
>
> Do you think this is worth a v3? Perhaps put_unaligned is slightly more
> optimized than the loop but there is more memory copy with that way too.

I already forgot the entire context when this has been called. Can you
summarize what the sequence(s) of num_bytes are expected usually.

IIUC if packets small, less than 8 bytes, than num_bytes will be that value.
Otherwise it will be something like 8 + 8 + 8 ... + tail. Is it
correct assumption?

> >>>>>>>> + return num_bytes;
> >>>>>>>> +}

--
With Best Regards,
Andy Shevchenko

2020-02-07 21:06:09

by Eddie James

[permalink] [raw]
Subject: Re: [PATCH] spi: Add FSI-attached SPI controller driver


On 2/7/20 2:34 PM, Andy Shevchenko wrote:
> On Fri, Feb 7, 2020 at 10:04 PM Eddie James <[email protected]> wrote:
>> On 2/7/20 1:39 PM, Andy Shevchenko wrote:
>>> On Fri, Feb 7, 2020 at 9:28 PM Eddie James <[email protected]> wrote:
>>>> On 2/5/20 9:51 AM, Andy Shevchenko wrote:
>>>>> On Tue, Feb 4, 2020 at 6:06 PM Eddie James <[email protected]> wrote:
>>>>>> On 2/4/20 5:02 AM, Andy Shevchenko wrote:
>>>>>>> On Mon, Feb 3, 2020 at 10:33 PM Eddie James <[email protected]> wrote:
>>>>>>>> On 1/30/20 10:37 AM, Andy Shevchenko wrote:
>
>>>>>>>>>> + for (i = 0; i < num_bytes; ++i)
>>>>>>>>>> + rx[i] = (u8)((in >> (8 * ((num_bytes - 1) - i))) & 0xffULL);
>>>>>>>>> Redundant & 0xffULL part.
>>>>> For me it looks like
>>>>>
>>>>> u8 tmp[8];
>>>>>
>>>>> put_unaligned_be64(in, tmp);
>>>>> memcpy(rx, tmp, num_bytes);
>>>>>
>>>>> put_unaligned*() is just a method to unroll the value to the u8 buffer.
>>>>> See, for example, linux/unaligned/be_byteshift.h implementation.
>>>> Unforunately it is not the same. put_unaligned_be64 will take the
>>>> highest 8 bits (0xff00000000000000) and move it into tmp[0]. Then
>>>> 0x00ff000000000000 into tmp[1], etc. This is only correct for this
>>>> driver IF my transfer is 8 bytes. If, for example, I transfer 5 bytes,
>>>> then I need 0x000000ff00000000 into tmp[0], 0x00000000ff000000 into
>>>> tmp[1], etc. So I think my current implementation is correct.
>>> Yes, I missed correction of the start address in memcpy(). Otherwise
>>> it's still the same what I was talking about.
>>
>> I see now, yes, thanks.
>>
>> Do you think this is worth a v3? Perhaps put_unaligned is slightly more
>> optimized than the loop but there is more memory copy with that way too.
> I already forgot the entire context when this has been called. Can you
> summarize what the sequence(s) of num_bytes are expected usually.
>
> IIUC if packets small, less than 8 bytes, than num_bytes will be that value.
> Otherwise it will be something like 8 + 8 + 8 ... + tail. Is it
> correct assumption?


Yes, it will typically be 8 + 8 +... remainder. Basically, on any RX,
the driver polls for the rx register full. Once full, it will read
however much data is left to be transferred. Since we use min(len, 8)
then we read 8 usually, until we get to the end.


>
>>>>>>>>>> + return num_bytes;
>>>>>>>>>> +}

2020-02-07 22:06:59

by Andy Shevchenko

[permalink] [raw]
Subject: Re: [PATCH] spi: Add FSI-attached SPI controller driver

On Fri, Feb 7, 2020 at 11:04 PM Eddie James <[email protected]> wrote:
> On 2/7/20 2:34 PM, Andy Shevchenko wrote:
> > On Fri, Feb 7, 2020 at 10:04 PM Eddie James <[email protected]> wrote:
> >> On 2/7/20 1:39 PM, Andy Shevchenko wrote:
> >>> On Fri, Feb 7, 2020 at 9:28 PM Eddie James <[email protected]> wrote:
> >>>> On 2/5/20 9:51 AM, Andy Shevchenko wrote:
> >>>>> On Tue, Feb 4, 2020 at 6:06 PM Eddie James <[email protected]> wrote:
> >>>>>> On 2/4/20 5:02 AM, Andy Shevchenko wrote:
> >>>>>>> On Mon, Feb 3, 2020 at 10:33 PM Eddie James <[email protected]> wrote:
> >>>>>>>> On 1/30/20 10:37 AM, Andy Shevchenko wrote:
> >
> >>>>>>>>>> + for (i = 0; i < num_bytes; ++i)
> >>>>>>>>>> + rx[i] = (u8)((in >> (8 * ((num_bytes - 1) - i))) & 0xffULL);
> >>>>>>>>> Redundant & 0xffULL part.
> >>>>> For me it looks like
> >>>>>
> >>>>> u8 tmp[8];
> >>>>>
> >>>>> put_unaligned_be64(in, tmp);
> >>>>> memcpy(rx, tmp, num_bytes);
> >>>>>
> >>>>> put_unaligned*() is just a method to unroll the value to the u8 buffer.
> >>>>> See, for example, linux/unaligned/be_byteshift.h implementation.
> >>>> Unforunately it is not the same. put_unaligned_be64 will take the
> >>>> highest 8 bits (0xff00000000000000) and move it into tmp[0]. Then
> >>>> 0x00ff000000000000 into tmp[1], etc. This is only correct for this
> >>>> driver IF my transfer is 8 bytes. If, for example, I transfer 5 bytes,
> >>>> then I need 0x000000ff00000000 into tmp[0], 0x00000000ff000000 into
> >>>> tmp[1], etc. So I think my current implementation is correct.
> >>> Yes, I missed correction of the start address in memcpy(). Otherwise
> >>> it's still the same what I was talking about.
> >>
> >> I see now, yes, thanks.
> >>
> >> Do you think this is worth a v3? Perhaps put_unaligned is slightly more
> >> optimized than the loop but there is more memory copy with that way too.
> > I already forgot the entire context when this has been called. Can you
> > summarize what the sequence(s) of num_bytes are expected usually.
> >
> > IIUC if packets small, less than 8 bytes, than num_bytes will be that value.
> > Otherwise it will be something like 8 + 8 + 8 ... + tail. Is it
> > correct assumption?
>
>
> Yes, it will typically be 8 + 8 +... remainder. Basically, on any RX,
> the driver polls for the rx register full. Once full, it will read
> however much data is left to be transferred. Since we use min(len, 8)
> then we read 8 usually, until we get to the end.

I asked that because we might have a better optimization, i.e, call
directly put_unaligned_be64() when we know that length is 8 bytes. For
the rest your approach might be simpler. Similar for the TX case.

> >>>>>>>>>> + return num_bytes;
> >>>>>>>>>> +}

--
With Best Regards,
Andy Shevchenko

2020-02-10 20:05:59

by Eddie James

[permalink] [raw]
Subject: Re: [PATCH] spi: Add FSI-attached SPI controller driver


On 2/7/20 4:04 PM, Andy Shevchenko wrote:
> On Fri, Feb 7, 2020 at 11:04 PM Eddie James <[email protected]> wrote:
>> On 2/7/20 2:34 PM, Andy Shevchenko wrote:
>>> On Fri, Feb 7, 2020 at 10:04 PM Eddie James <[email protected]> wrote:
>>>> On 2/7/20 1:39 PM, Andy Shevchenko wrote:
>>>>> On Fri, Feb 7, 2020 at 9:28 PM Eddie James <[email protected]> wrote:
>>>>>> On 2/5/20 9:51 AM, Andy Shevchenko wrote:
>>>>>>> On Tue, Feb 4, 2020 at 6:06 PM Eddie James <[email protected]> wrote:
>>>>>>>> On 2/4/20 5:02 AM, Andy Shevchenko wrote:
>>>>>>>>> On Mon, Feb 3, 2020 at 10:33 PM Eddie James <[email protected]> wrote:
>>>>>>>>>> On 1/30/20 10:37 AM, Andy Shevchenko wrote:
>>>>>>>>>>>> + for (i = 0; i < num_bytes; ++i)
>>>>>>>>>>>> + rx[i] = (u8)((in >> (8 * ((num_bytes - 1) - i))) & 0xffULL);
>>>>>>>>>>> Redundant & 0xffULL part.
>>>>>>> For me it looks like
>>>>>>>
>>>>>>> u8 tmp[8];
>>>>>>>
>>>>>>> put_unaligned_be64(in, tmp);
>>>>>>> memcpy(rx, tmp, num_bytes);
>>>>>>>
>>>>>>> put_unaligned*() is just a method to unroll the value to the u8 buffer.
>>>>>>> See, for example, linux/unaligned/be_byteshift.h implementation.
>>>>>> Unforunately it is not the same. put_unaligned_be64 will take the
>>>>>> highest 8 bits (0xff00000000000000) and move it into tmp[0]. Then
>>>>>> 0x00ff000000000000 into tmp[1], etc. This is only correct for this
>>>>>> driver IF my transfer is 8 bytes. If, for example, I transfer 5 bytes,
>>>>>> then I need 0x000000ff00000000 into tmp[0], 0x00000000ff000000 into
>>>>>> tmp[1], etc. So I think my current implementation is correct.
>>>>> Yes, I missed correction of the start address in memcpy(). Otherwise
>>>>> it's still the same what I was talking about.
>>>> I see now, yes, thanks.
>>>>
>>>> Do you think this is worth a v3? Perhaps put_unaligned is slightly more
>>>> optimized than the loop but there is more memory copy with that way too.
>>> I already forgot the entire context when this has been called. Can you
>>> summarize what the sequence(s) of num_bytes are expected usually.
>>>
>>> IIUC if packets small, less than 8 bytes, than num_bytes will be that value.
>>> Otherwise it will be something like 8 + 8 + 8 ... + tail. Is it
>>> correct assumption?
>>
>> Yes, it will typically be 8 + 8 +... remainder. Basically, on any RX,
>> the driver polls for the rx register full. Once full, it will read
>> however much data is left to be transferred. Since we use min(len, 8)
>> then we read 8 usually, until we get to the end.
> I asked that because we might have a better optimization, i.e, call
> directly put_unaligned_be64() when we know that length is 8 bytes. For
> the rest your approach might be simpler. Similar for the TX case.


I just tried to implement as you suggested but I realized something: The
value is already swapped from BE to CPU when the register is read in
fsi_spi_read_reg. It happens to work out correctly to use
put_unaligned_be64 on a LE CPU to flip the bytes here. But on a BE CPU,
this wouldn't be correct I think. Now I don't anticipate this driver
running on a BE CPU, but I think it is weird to flip it twice, and
better to do it manually here.

What do you think Andy?

Thanks,

Eddie


>
>>>>>>>>>>>> + return num_bytes;
>>>>>>>>>>>> +}

2020-02-10 20:35:22

by Andy Shevchenko

[permalink] [raw]
Subject: Re: [PATCH] spi: Add FSI-attached SPI controller driver

On Mon, Feb 10, 2020 at 10:05 PM Eddie James <[email protected]> wrote:
> On 2/7/20 4:04 PM, Andy Shevchenko wrote:
> > On Fri, Feb 7, 2020 at 11:04 PM Eddie James <[email protected]> wrote:
> >> On 2/7/20 2:34 PM, Andy Shevchenko wrote:
> >>> On Fri, Feb 7, 2020 at 10:04 PM Eddie James <[email protected]> wrote:
> >>>> On 2/7/20 1:39 PM, Andy Shevchenko wrote:
> >>>>> On Fri, Feb 7, 2020 at 9:28 PM Eddie James <[email protected]> wrote:
> >>>>>> On 2/5/20 9:51 AM, Andy Shevchenko wrote:
> >>>>>>> On Tue, Feb 4, 2020 at 6:06 PM Eddie James <[email protected]> wrote:
> >>>>>>>> On 2/4/20 5:02 AM, Andy Shevchenko wrote:
> >>>>>>>>> On Mon, Feb 3, 2020 at 10:33 PM Eddie James <[email protected]> wrote:
> >>>>>>>>>> On 1/30/20 10:37 AM, Andy Shevchenko wrote:
> >>>>>>>>>>>> + for (i = 0; i < num_bytes; ++i)
> >>>>>>>>>>>> + rx[i] = (u8)((in >> (8 * ((num_bytes - 1) - i))) & 0xffULL);
> >>>>>>>>>>> Redundant & 0xffULL part.
> >>>>>>> For me it looks like
> >>>>>>>
> >>>>>>> u8 tmp[8];
> >>>>>>>
> >>>>>>> put_unaligned_be64(in, tmp);
> >>>>>>> memcpy(rx, tmp, num_bytes);
> >>>>>>>
> >>>>>>> put_unaligned*() is just a method to unroll the value to the u8 buffer.
> >>>>>>> See, for example, linux/unaligned/be_byteshift.h implementation.
> >>>>>> Unforunately it is not the same. put_unaligned_be64 will take the
> >>>>>> highest 8 bits (0xff00000000000000) and move it into tmp[0]. Then
> >>>>>> 0x00ff000000000000 into tmp[1], etc. This is only correct for this
> >>>>>> driver IF my transfer is 8 bytes. If, for example, I transfer 5 bytes,
> >>>>>> then I need 0x000000ff00000000 into tmp[0], 0x00000000ff000000 into
> >>>>>> tmp[1], etc. So I think my current implementation is correct.
> >>>>> Yes, I missed correction of the start address in memcpy(). Otherwise
> >>>>> it's still the same what I was talking about.
> >>>> I see now, yes, thanks.
> >>>>
> >>>> Do you think this is worth a v3? Perhaps put_unaligned is slightly more
> >>>> optimized than the loop but there is more memory copy with that way too.
> >>> I already forgot the entire context when this has been called. Can you
> >>> summarize what the sequence(s) of num_bytes are expected usually.
> >>>
> >>> IIUC if packets small, less than 8 bytes, than num_bytes will be that value.
> >>> Otherwise it will be something like 8 + 8 + 8 ... + tail. Is it
> >>> correct assumption?
> >>
> >> Yes, it will typically be 8 + 8 +... remainder. Basically, on any RX,
> >> the driver polls for the rx register full. Once full, it will read
> >> however much data is left to be transferred. Since we use min(len, 8)
> >> then we read 8 usually, until we get to the end.
> > I asked that because we might have a better optimization, i.e, call
> > directly put_unaligned_be64() when we know that length is 8 bytes. For
> > the rest your approach might be simpler. Similar for the TX case.
>
>
> I just tried to implement as you suggested but I realized something: The
> value is already swapped from BE to CPU when the register is read in
> fsi_spi_read_reg. It happens to work out correctly to use
> put_unaligned_be64 on a LE CPU to flip the bytes here. But on a BE CPU,
> this wouldn't be correct I think.

Hmm... Any BE conversion op on BE architecture is no-op.
Same for LE on LE.

> Now I don't anticipate this driver
> running on a BE CPU, but I think it is weird to flip it twice, and
> better to do it manually here.
>
> What do you think Andy?



> >>>>>>>>>>>> + return num_bytes;
> >>>>>>>>>>>> +}



--
With Best Regards,
Andy Shevchenko

2020-02-10 20:50:47

by Eddie James

[permalink] [raw]
Subject: Re: [PATCH] spi: Add FSI-attached SPI controller driver


On 2/10/20 2:33 PM, Andy Shevchenko wrote:
> On Mon, Feb 10, 2020 at 10:05 PM Eddie James <[email protected]> wrote:
>> On 2/7/20 4:04 PM, Andy Shevchenko wrote:
>>> On Fri, Feb 7, 2020 at 11:04 PM Eddie James <[email protected]> wrote:
>>>> On 2/7/20 2:34 PM, Andy Shevchenko wrote:
>>>>> On Fri, Feb 7, 2020 at 10:04 PM Eddie James <[email protected]> wrote:
>>>>>> On 2/7/20 1:39 PM, Andy Shevchenko wrote:
>>>>>>> On Fri, Feb 7, 2020 at 9:28 PM Eddie James <[email protected]> wrote:
>>>>>>>> On 2/5/20 9:51 AM, Andy Shevchenko wrote:
>>>>>>>>> On Tue, Feb 4, 2020 at 6:06 PM Eddie James <[email protected]> wrote:
>>>>>>>>>> On 2/4/20 5:02 AM, Andy Shevchenko wrote:
>>>>>>>>>>> On Mon, Feb 3, 2020 at 10:33 PM Eddie James <[email protected]> wrote:
>>>>>>>>>>>> On 1/30/20 10:37 AM, Andy Shevchenko wrote:
>>>>>>>>>>>>>> + for (i = 0; i < num_bytes; ++i)
>>>>>>>>>>>>>> + rx[i] = (u8)((in >> (8 * ((num_bytes - 1) - i))) & 0xffULL);
>>>>>>>>>>>>> Redundant & 0xffULL part.
>>>>>>>>> For me it looks like
>>>>>>>>>
>>>>>>>>> u8 tmp[8];
>>>>>>>>>
>>>>>>>>> put_unaligned_be64(in, tmp);
>>>>>>>>> memcpy(rx, tmp, num_bytes);
>>>>>>>>>
>>>>>>>>> put_unaligned*() is just a method to unroll the value to the u8 buffer.
>>>>>>>>> See, for example, linux/unaligned/be_byteshift.h implementation.
>>>>>>>> Unforunately it is not the same. put_unaligned_be64 will take the
>>>>>>>> highest 8 bits (0xff00000000000000) and move it into tmp[0]. Then
>>>>>>>> 0x00ff000000000000 into tmp[1], etc. This is only correct for this
>>>>>>>> driver IF my transfer is 8 bytes. If, for example, I transfer 5 bytes,
>>>>>>>> then I need 0x000000ff00000000 into tmp[0], 0x00000000ff000000 into
>>>>>>>> tmp[1], etc. So I think my current implementation is correct.
>>>>>>> Yes, I missed correction of the start address in memcpy(). Otherwise
>>>>>>> it's still the same what I was talking about.
>>>>>> I see now, yes, thanks.
>>>>>>
>>>>>> Do you think this is worth a v3? Perhaps put_unaligned is slightly more
>>>>>> optimized than the loop but there is more memory copy with that way too.
>>>>> I already forgot the entire context when this has been called. Can you
>>>>> summarize what the sequence(s) of num_bytes are expected usually.
>>>>>
>>>>> IIUC if packets small, less than 8 bytes, than num_bytes will be that value.
>>>>> Otherwise it will be something like 8 + 8 + 8 ... + tail. Is it
>>>>> correct assumption?
>>>> Yes, it will typically be 8 + 8 +... remainder. Basically, on any RX,
>>>> the driver polls for the rx register full. Once full, it will read
>>>> however much data is left to be transferred. Since we use min(len, 8)
>>>> then we read 8 usually, until we get to the end.
>>> I asked that because we might have a better optimization, i.e, call
>>> directly put_unaligned_be64() when we know that length is 8 bytes. For
>>> the rest your approach might be simpler. Similar for the TX case.
>>
>> I just tried to implement as you suggested but I realized something: The
>> value is already swapped from BE to CPU when the register is read in
>> fsi_spi_read_reg. It happens to work out correctly to use
>> put_unaligned_be64 on a LE CPU to flip the bytes here. But on a BE CPU,
>> this wouldn't be correct I think.
> Hmm... Any BE conversion op on BE architecture is no-op.
> Same for LE on LE.


Right. So regardless of architecture, by the time we get to
fsi_spi_data_in, the data is in the correct endianness. But on a BE
architecture, it would still need to get flipped because that's what the
specification indicates. So doing it manually seems correct to me.


>
>> Now I don't anticipate this driver
>> running on a BE CPU, but I think it is weird to flip it twice, and
>> better to do it manually here.
>>
>> What do you think Andy?
>
>
>>>>>>>>>>>>>> + return num_bytes;
>>>>>>>>>>>>>> +}
>
>