2019-01-14 10:01:46

by Christoph Hellwig

[permalink] [raw]
Subject: remove block layer bounce buffering for MMC

Hi everyone,

this series converts the remaining MMC host drivers to properly kmap the
scatterlist entries it does PIO operations on, and then goes on to
remove the usage of block layer bounce buffering (which I plan to remove
eventually) from the MMC layer.

As a bonus I've converted various drivers to the proper scatterlist
helpers so that at least in theory they are ready for chained
scatterlists.

All the changes are compile tested only as I don't have any of the
hardware, so a careful review would be appreciated.


2019-01-14 09:59:35

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 01/11] mmc: davinci: handle highmem pages

Instead of setting up a kernel pointer to track the current PIO address,
track the offset in the current page, and do an atomic kmap for the page
while doing the actual PIO operations.

Signed-off-by: Christoph Hellwig <[email protected]>
---
drivers/mmc/host/davinci_mmc.c | 43 ++++++++++++++++++++--------------
1 file changed, 26 insertions(+), 17 deletions(-)

diff --git a/drivers/mmc/host/davinci_mmc.c b/drivers/mmc/host/davinci_mmc.c
index 9e68c3645e22..9c500c52b63d 100644
--- a/drivers/mmc/host/davinci_mmc.c
+++ b/drivers/mmc/host/davinci_mmc.c
@@ -26,6 +26,7 @@
#include <linux/clk.h>
#include <linux/err.h>
#include <linux/cpufreq.h>
+#include <linux/highmem.h>
#include <linux/mmc/host.h>
#include <linux/io.h>
#include <linux/irq.h>
@@ -194,11 +195,12 @@ struct mmc_davinci_host {
#define DAVINCI_MMC_DATADIR_WRITE 2
unsigned char data_dir;

- /* buffer is used during PIO of one scatterlist segment, and
- * is updated along with buffer_bytes_left. bytes_left applies
- * to all N blocks of the PIO transfer.
+ /*
+ * buffer_offset is used during PIO of one scatterlist segment, and is
+ * updated along with buffer_bytes_left. bytes_left applies to all N
+ * blocks of the PIO transfer.
*/
- u8 *buffer;
+ u32 buffer_offset;
u32 buffer_bytes_left;
u32 bytes_left;

@@ -229,8 +231,8 @@ static irqreturn_t mmc_davinci_irq(int irq, void *dev_id);
/* PIO only */
static void mmc_davinci_sg_to_buf(struct mmc_davinci_host *host)
{
+ host->buffer_offset = host->sg->offset;
host->buffer_bytes_left = sg_dma_len(host->sg);
- host->buffer = sg_virt(host->sg);
if (host->buffer_bytes_left > host->bytes_left)
host->buffer_bytes_left = host->bytes_left;
}
@@ -238,7 +240,7 @@ static void mmc_davinci_sg_to_buf(struct mmc_davinci_host *host)
static void davinci_fifo_data_trans(struct mmc_davinci_host *host,
unsigned int n)
{
- u8 *p;
+ void *p;
unsigned int i;

if (host->buffer_bytes_left == 0) {
@@ -246,7 +248,7 @@ static void davinci_fifo_data_trans(struct mmc_davinci_host *host,
mmc_davinci_sg_to_buf(host);
}

- p = host->buffer;
+ p = kmap_atomic(sg_page(host->sg));
if (n > host->buffer_bytes_left)
n = host->buffer_bytes_left;
host->buffer_bytes_left -= n;
@@ -258,24 +260,31 @@ static void davinci_fifo_data_trans(struct mmc_davinci_host *host,
*/
if (host->data_dir == DAVINCI_MMC_DATADIR_WRITE) {
for (i = 0; i < (n >> 2); i++) {
- writel(*((u32 *)p), host->base + DAVINCI_MMCDXR);
- p = p + 4;
+ u32 *val = p + host->buffer_offset;
+
+ writel(*val, host->base + DAVINCI_MMCDXR);
+ host->buffer_offset += 4;
}
if (n & 3) {
- iowrite8_rep(host->base + DAVINCI_MMCDXR, p, (n & 3));
- p = p + (n & 3);
+ iowrite8_rep(host->base + DAVINCI_MMCDXR,
+ p + host->buffer_offset, n & 3);
+ host->buffer_offset += (n & 3);
}
} else {
for (i = 0; i < (n >> 2); i++) {
- *((u32 *)p) = readl(host->base + DAVINCI_MMCDRR);
- p = p + 4;
+ u32 *val = p + host->buffer_offset;
+
+ *val = readl(host->base + DAVINCI_MMCDRR);
+ host->buffer_offset += 4;
}
if (n & 3) {
- ioread8_rep(host->base + DAVINCI_MMCDRR, p, (n & 3));
- p = p + (n & 3);
+ ioread8_rep(host->base + DAVINCI_MMCDRR,
+ p + host->buffer_offset, n & 3);
+ host->buffer_offset += (n & 3);
}
}
- host->buffer = p;
+
+ kunmap_atomic(p);
}

static void mmc_davinci_start_command(struct mmc_davinci_host *host,
@@ -572,7 +581,7 @@ mmc_davinci_prepare_data(struct mmc_davinci_host *host, struct mmc_request *req)
host->base + DAVINCI_MMCFIFOCTL);
}

- host->buffer = NULL;
+ host->buffer_offset = 0;
host->bytes_left = data->blocks * data->blksz;

/* For now we try to use DMA whenever we won't need partial FIFO
--
2.20.1


2019-01-14 10:00:07

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 05/11] mmc: s3cmci: handle highmem pages

Instead of setting up a kernel pointer to track the current PIO address,
track the offset in the current page, and do an atomic kmap for the page
while doing the actual PIO operations.

Signed-off-by: Christoph Hellwig <[email protected]>
---
drivers/mmc/host/s3cmci.c | 107 +++++++++++++++++++-------------------
drivers/mmc/host/s3cmci.h | 3 +-
2 files changed, 55 insertions(+), 55 deletions(-)

diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c
index 10f5219b3b40..1be84426c817 100644
--- a/drivers/mmc/host/s3cmci.c
+++ b/drivers/mmc/host/s3cmci.c
@@ -15,6 +15,7 @@
#include <linux/dmaengine.h>
#include <linux/dma-mapping.h>
#include <linux/clk.h>
+#include <linux/highmem.h>
#include <linux/mmc/host.h>
#include <linux/platform_device.h>
#include <linux/cpufreq.h>
@@ -317,26 +318,17 @@ static void s3cmci_check_sdio_irq(struct s3cmci_host *host)
}
}

-static inline int get_data_buffer(struct s3cmci_host *host,
- u32 *bytes, u32 **pointer)
+static inline int get_data_buffer(struct s3cmci_host *host)
{
- struct scatterlist *sg;
-
- if (host->pio_active == XFER_NONE)
- return -EINVAL;
-
- if ((!host->mrq) || (!host->mrq->data))
- return -EINVAL;
-
if (host->pio_sgptr >= host->mrq->data->sg_len) {
dbg(host, dbg_debug, "no more buffers (%i/%i)\n",
host->pio_sgptr, host->mrq->data->sg_len);
return -EBUSY;
}
- sg = &host->mrq->data->sg[host->pio_sgptr];
+ host->cur_sg = &host->mrq->data->sg[host->pio_sgptr];

- *bytes = sg->length;
- *pointer = sg_virt(sg);
+ host->pio_bytes = host->cur_sg->length;
+ host->pio_offset = host->cur_sg->offset;

host->pio_sgptr++;

@@ -422,11 +414,16 @@ static void s3cmci_disable_irq(struct s3cmci_host *host, bool transfer)

static void do_pio_read(struct s3cmci_host *host)
{
- int res;
u32 fifo;
u32 *ptr;
u32 fifo_words;
void __iomem *from_ptr;
+ void *buf;
+
+ if (host->pio_active == XFER_NONE)
+ goto done;
+ if (!host->mrq || !host->mrq->data)
+ goto done;

/* write real prescaler to host, it might be set slow to fix */
writel(host->prescaler, host->base + S3C2410_SDIPRE);
@@ -435,20 +432,12 @@ static void do_pio_read(struct s3cmci_host *host)

while ((fifo = fifo_count(host))) {
if (!host->pio_bytes) {
- res = get_data_buffer(host, &host->pio_bytes,
- &host->pio_ptr);
- if (res) {
- host->pio_active = XFER_NONE;
- host->complete_what = COMPLETION_FINALIZE;
-
- dbg(host, dbg_pio, "pio_read(): "
- "complete (no more data).\n");
- return;
- }
+ if (get_data_buffer(host) < 0)
+ goto done;

dbg(host, dbg_pio,
- "pio_read(): new target: [%i]@[%p]\n",
- host->pio_bytes, host->pio_ptr);
+ "pio_read(): new target: [%i]@[%zu]\n",
+ host->pio_bytes, host->pio_offset);
}

dbg(host, dbg_pio,
@@ -470,63 +459,65 @@ static void do_pio_read(struct s3cmci_host *host)
host->pio_count += fifo;

fifo_words = fifo >> 2;
- ptr = host->pio_ptr;
- while (fifo_words--)
+
+ buf = (kmap_atomic(sg_page(host->cur_sg)) + host->pio_offset);
+ ptr = buf;
+ while (fifo_words--) {
*ptr++ = readl(from_ptr);
- host->pio_ptr = ptr;
+ host->pio_offset += 4;
+ }

if (fifo & 3) {
u32 n = fifo & 3;
u32 data = readl(from_ptr);
- u8 *p = (u8 *)host->pio_ptr;
+ u8 *p = (u8 *)ptr;

while (n--) {
*p++ = data;
data >>= 8;
+ host->pio_offset++;
}
}
+ kunmap_atomic(buf);
}

if (!host->pio_bytes) {
- res = get_data_buffer(host, &host->pio_bytes, &host->pio_ptr);
- if (res) {
- dbg(host, dbg_pio,
- "pio_read(): complete (no more buffers).\n");
- host->pio_active = XFER_NONE;
- host->complete_what = COMPLETION_FINALIZE;
-
- return;
- }
+ if (get_data_buffer(host) < 0)
+ goto done;
}

enable_imask(host,
S3C2410_SDIIMSK_RXFIFOHALF | S3C2410_SDIIMSK_RXFIFOLAST);
+ return;
+
+done:
+ host->pio_active = XFER_NONE;
+ host->complete_what = COMPLETION_FINALIZE;
+ dbg(host, dbg_pio, "pio_read(): complete (no more data).\n");
}

static void do_pio_write(struct s3cmci_host *host)
{
void __iomem *to_ptr;
- int res;
+ void *buf;
u32 fifo;
u32 *ptr;

+ if (host->pio_active == XFER_NONE)
+ goto done;
+ if (!host->mrq || !host->mrq->data)
+ goto done;
+
to_ptr = host->base + host->sdidata;

while ((fifo = fifo_free(host)) > 3) {
if (!host->pio_bytes) {
- res = get_data_buffer(host, &host->pio_bytes,
- &host->pio_ptr);
- if (res) {
- dbg(host, dbg_pio,
- "pio_write(): complete (no more data).\n");
- host->pio_active = XFER_NONE;
-
- return;
- }
+ if (get_data_buffer(host) < 0)
+ goto done;

dbg(host, dbg_pio,
- "pio_write(): new source: [%i]@[%p]\n",
- host->pio_bytes, host->pio_ptr);
+ "pio_write(): new source: [%i]@[%zd]\n",
+ host->pio_bytes, host->pio_offset);

}

@@ -543,13 +534,20 @@ static void do_pio_write(struct s3cmci_host *host)
host->pio_count += fifo;

fifo = (fifo + 3) >> 2;
- ptr = host->pio_ptr;
- while (fifo--)
+ buf = (kmap_atomic(sg_page(host->cur_sg)) + host->pio_offset);
+ ptr = buf;
+ while (fifo--) {
writel(*ptr++, to_ptr);
- host->pio_ptr = ptr;
+ host->pio_offset += 4;
+ }
+ kunmap_atomic(buf);
}

enable_imask(host, S3C2410_SDIIMSK_TXFIFOHALF);
+ return;
+done:
+ dbg(host, dbg_pio, "pio_write(): complete (no more data).\n");
+ host->pio_active = XFER_NONE;
}

static void pio_tasklet(unsigned long data)
@@ -1055,6 +1053,7 @@ static int s3cmci_prepare_pio(struct s3cmci_host *host, struct mmc_data *data)
BUG_ON((data->flags & BOTH_DIR) == BOTH_DIR);

host->pio_sgptr = 0;
+ host->cur_sg = &host->mrq->data->sg[host->pio_sgptr];
host->pio_bytes = 0;
host->pio_count = 0;
host->pio_active = rw ? XFER_WRITE : XFER_READ;
diff --git a/drivers/mmc/host/s3cmci.h b/drivers/mmc/host/s3cmci.h
index 30c2c0dd1bc8..4320f7d832dc 100644
--- a/drivers/mmc/host/s3cmci.h
+++ b/drivers/mmc/host/s3cmci.h
@@ -50,10 +50,11 @@ struct s3cmci_host {

int dma_complete;

+ struct scatterlist *cur_sg;
u32 pio_sgptr;
u32 pio_bytes;
u32 pio_count;
- u32 *pio_ptr;
+ u32 pio_offset;
#define XFER_NONE 0
#define XFER_READ 1
#define XFER_WRITE 2
--
2.20.1


2019-01-14 10:00:19

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 07/11] mmc: mvsdio: handle highmem pages

Instead of setting up a kernel pointer to track the current PIO address,
track the offset in the current page, and do an atomic kmap for the page
while doing the actual PIO operations.

Signed-off-by: Christoph Hellwig <[email protected]>
---
drivers/mmc/host/mvsdio.c | 48 +++++++++++++++++++++++----------------
1 file changed, 29 insertions(+), 19 deletions(-)

diff --git a/drivers/mmc/host/mvsdio.c b/drivers/mmc/host/mvsdio.c
index e22bbff89c8d..545e370d6dae 100644
--- a/drivers/mmc/host/mvsdio.c
+++ b/drivers/mmc/host/mvsdio.c
@@ -12,6 +12,7 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/io.h>
+#include <linux/highmem.h>
#include <linux/platform_device.h>
#include <linux/mbus.h>
#include <linux/delay.h>
@@ -42,7 +43,8 @@ struct mvsd_host {
unsigned int intr_en;
unsigned int ctrl;
unsigned int pio_size;
- void *pio_ptr;
+ struct scatterlist *pio_sg;
+ unsigned int pio_offset; /* offset in words into the segment */
unsigned int sg_frags;
unsigned int ns_per_clk;
unsigned int clock;
@@ -96,9 +98,9 @@ static int mvsd_setup_data(struct mvsd_host *host, struct mmc_data *data)
if (tmout_index > MVSD_HOST_CTRL_TMOUT_MAX)
tmout_index = MVSD_HOST_CTRL_TMOUT_MAX;

- dev_dbg(host->dev, "data %s at 0x%08x: blocks=%d blksz=%d tmout=%u (%d)\n",
+ dev_dbg(host->dev, "data %s at 0x%08llx: blocks=%d blksz=%d tmout=%u (%d)\n",
(data->flags & MMC_DATA_READ) ? "read" : "write",
- (u32)sg_virt(data->sg), data->blocks, data->blksz,
+ (u64)sg_phys(data->sg), data->blocks, data->blksz,
tmout, tmout_index);

host->ctrl &= ~MVSD_HOST_CTRL_TMOUT_MASK;
@@ -118,10 +120,11 @@ static int mvsd_setup_data(struct mvsd_host *host, struct mmc_data *data)
* boundary.
*/
host->pio_size = data->blocks * data->blksz;
- host->pio_ptr = sg_virt(data->sg);
+ host->pio_sg = data->sg;
+ host->pio_offset = data->sg->offset / 2;
if (!nodma)
- dev_dbg(host->dev, "fallback to PIO for data at 0x%p size %d\n",
- host->pio_ptr, host->pio_size);
+ dev_dbg(host->dev, "fallback to PIO for data at 0x%x size %d\n",
+ host->pio_offset, host->pio_size);
return 1;
} else {
dma_addr_t phys_addr;
@@ -291,8 +294,9 @@ static u32 mvsd_finish_data(struct mvsd_host *host, struct mmc_data *data,
{
void __iomem *iobase = host->base;

- if (host->pio_ptr) {
- host->pio_ptr = NULL;
+ if (host->pio_sg) {
+ host->pio_sg = NULL;
+ host->pio_offset = 0;
host->pio_size = 0;
} else {
dma_unmap_sg(mmc_dev(host->mmc), data->sg, host->sg_frags,
@@ -376,11 +380,12 @@ static irqreturn_t mvsd_irq(int irq, void *dev)
if (host->pio_size &&
(intr_status & host->intr_en &
(MVSD_NOR_RX_READY | MVSD_NOR_RX_FIFO_8W))) {
- u16 *p = host->pio_ptr;
+ u16 *p = kmap_atomic(sg_page(host->pio_sg));
+ unsigned int o = host->pio_offset;
int s = host->pio_size;
while (s >= 32 && (intr_status & MVSD_NOR_RX_FIFO_8W)) {
- readsw(iobase + MVSD_FIFO, p, 16);
- p += 16;
+ readsw(iobase + MVSD_FIFO, p + o, 16);
+ o += 16;
s -= 32;
intr_status = mvsd_read(MVSD_NOR_INTR_STATUS);
}
@@ -391,8 +396,10 @@ static irqreturn_t mvsd_irq(int irq, void *dev)
*/
if (s <= 32) {
while (s >= 4 && (intr_status & MVSD_NOR_RX_READY)) {
- put_unaligned(mvsd_read(MVSD_FIFO), p++);
- put_unaligned(mvsd_read(MVSD_FIFO), p++);
+ put_unaligned(mvsd_read(MVSD_FIFO), p + o);
+ o++;
+ put_unaligned(mvsd_read(MVSD_FIFO), p + o);
+ o++;
s -= 4;
intr_status = mvsd_read(MVSD_NOR_INTR_STATUS);
}
@@ -400,7 +407,7 @@ static irqreturn_t mvsd_irq(int irq, void *dev)
u16 val[2] = {0, 0};
val[0] = mvsd_read(MVSD_FIFO);
val[1] = mvsd_read(MVSD_FIFO);
- memcpy(p, ((void *)&val) + 4 - s, s);
+ memcpy(p + o, ((void *)&val) + 4 - s, s);
s = 0;
intr_status = mvsd_read(MVSD_NOR_INTR_STATUS);
}
@@ -416,13 +423,14 @@ static irqreturn_t mvsd_irq(int irq, void *dev)
}
dev_dbg(host->dev, "pio %d intr 0x%04x hw_state 0x%04x\n",
s, intr_status, mvsd_read(MVSD_HW_STATE));
- host->pio_ptr = p;
+ host->pio_offset = o;
host->pio_size = s;
irq_handled = 1;
} else if (host->pio_size &&
(intr_status & host->intr_en &
(MVSD_NOR_TX_AVAIL | MVSD_NOR_TX_FIFO_8W))) {
- u16 *p = host->pio_ptr;
+ u16 *p = kmap_atomic(sg_page(host->pio_sg));
+ unsigned int o = host->pio_offset;
int s = host->pio_size;
/*
* The TX_FIFO_8W bit is unreliable. When set, bursting
@@ -431,8 +439,10 @@ static irqreturn_t mvsd_irq(int irq, void *dev)
* TX_FIFO_8W remains set.
*/
while (s >= 4 && (intr_status & MVSD_NOR_TX_AVAIL)) {
- mvsd_write(MVSD_FIFO, get_unaligned(p++));
- mvsd_write(MVSD_FIFO, get_unaligned(p++));
+ mvsd_write(MVSD_FIFO, get_unaligned(p + o));
+ o++;
+ mvsd_write(MVSD_FIFO, get_unaligned(p + o));
+ o++;
s -= 4;
intr_status = mvsd_read(MVSD_NOR_INTR_STATUS);
}
@@ -453,7 +463,7 @@ static irqreturn_t mvsd_irq(int irq, void *dev)
}
dev_dbg(host->dev, "pio %d intr 0x%04x hw_state 0x%04x\n",
s, intr_status, mvsd_read(MVSD_HW_STATE));
- host->pio_ptr = p;
+ host->pio_offset = o;
host->pio_size = s;
irq_handled = 1;
}
--
2.20.1


2019-01-14 10:00:47

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 04/11] mmc: omap: handle chained sglists

Use the proper sg_next() helper to move to the next scatterlist element
to support chained scatterlists.

Signed-off-by: Christoph Hellwig <[email protected]>
---
drivers/mmc/host/omap.c | 16 ++++++----------
1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c
index 0377ac6194a0..248707aba2df 100644
--- a/drivers/mmc/host/omap.c
+++ b/drivers/mmc/host/omap.c
@@ -148,7 +148,7 @@ struct mmc_omap_host {
struct mmc_data *stop_data;

unsigned int sg_len;
- int sg_idx;
+ struct scatterlist *cur_sg;
u32 buffer_offset;
u32 buffer_bytes_left;
u32 total_bytes_left;
@@ -646,11 +646,8 @@ mmc_omap_cmd_timer(struct timer_list *t)
static void
mmc_omap_sg_to_buf(struct mmc_omap_host *host)
{
- struct scatterlist *sg;
-
- sg = host->data->sg + host->sg_idx;
- host->buffer_bytes_left = sg->length;
- host->buffer_offset = sg->offset;
+ host->buffer_bytes_left = host->cur_sg->length;
+ host->buffer_offset = host->cur_sg->offset;
if (host->buffer_bytes_left > host->total_bytes_left)
host->buffer_bytes_left = host->total_bytes_left;
}
@@ -667,13 +664,12 @@ mmc_omap_clk_timer(struct timer_list *t)
static void
mmc_omap_xfer_data(struct mmc_omap_host *host, int write)
{
- struct scatterlist *sg = host->data->sg + host->sg_idx;
+ struct scatterlist *sg = host->cur_sg;
int n, nwords;
void *p;

if (host->buffer_bytes_left == 0) {
- host->sg_idx++;
- BUG_ON(host->sg_idx == host->sg_len);
+ host->cur_sg = sg_next(host->cur_sg);
mmc_omap_sg_to_buf(host);
}
n = 64;
@@ -985,7 +981,7 @@ mmc_omap_prepare_data(struct mmc_omap_host *host, struct mmc_request *req)
}
}

- host->sg_idx = 0;
+ host->cur_sg = host->data->sg;
if (use_dma) {
enum dma_data_direction dma_data_dir;
struct dma_async_tx_descriptor *tx;
--
2.20.1


2019-01-14 10:01:01

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 10/11] mmc: core: don't use block layer bounce buffers

All MMC and SD host drivers are highmem safe now, and bounce buffering
for addressing limitations is handled in the DMA layer now.

Signed-off-by: Christoph Hellwig <[email protected]>
---
drivers/mmc/core/queue.c | 5 -----
1 file changed, 5 deletions(-)

diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index 35cc138b096d..26a18b851397 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -354,17 +354,12 @@ static const struct blk_mq_ops mmc_mq_ops = {
static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card)
{
struct mmc_host *host = card->host;
- u64 limit = BLK_BOUNCE_HIGH;
-
- if (mmc_dev(host)->dma_mask && *mmc_dev(host)->dma_mask)
- limit = (u64)dma_max_pfn(mmc_dev(host)) << PAGE_SHIFT;

blk_queue_flag_set(QUEUE_FLAG_NONROT, mq->queue);
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, mq->queue);
if (mmc_can_erase(card))
mmc_queue_setup_discard(mq->queue, card);

- blk_queue_bounce_limit(mq->queue, limit);
blk_queue_max_hw_sectors(mq->queue,
min(host->max_blk_count, host->max_req_size / 512));
blk_queue_max_segments(mq->queue, host->max_segs);
--
2.20.1


2019-01-14 10:01:09

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 02/11] mmc: moxart: handle highmem pages

Instead of setting up a kernel pointer to track the current PIO address,
track the offset in the current page, and do a kmap for the page while
doing the actual PIO operations.

Signed-off-by: Christoph Hellwig <[email protected]>
---
drivers/mmc/host/moxart-mmc.c | 19 +++++++++++--------
1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/drivers/mmc/host/moxart-mmc.c b/drivers/mmc/host/moxart-mmc.c
index a0670e9cd012..7142bca425c8 100644
--- a/drivers/mmc/host/moxart-mmc.c
+++ b/drivers/mmc/host/moxart-mmc.c
@@ -311,7 +311,7 @@ static void moxart_transfer_pio(struct moxart_host *host)
if (host->data_len == data->bytes_xfered)
return;

- sgp = sg_virt(host->cur_sg);
+ sgp = kmap(sg_page(host->cur_sg)) + host->cur_sg->offset;
remain = host->data_remain;

if (data->flags & MMC_DATA_WRITE) {
@@ -319,8 +319,7 @@ static void moxart_transfer_pio(struct moxart_host *host)
if (moxart_wait_for_status(host, FIFO_URUN, &status)
== -ETIMEDOUT) {
data->error = -ETIMEDOUT;
- complete(&host->pio_complete);
- return;
+ goto done;
}
for (len = 0; len < remain && len < host->fifo_width;) {
iowrite32(*sgp, host->base + REG_DATA_WINDOW);
@@ -335,8 +334,7 @@ static void moxart_transfer_pio(struct moxart_host *host)
if (moxart_wait_for_status(host, FIFO_ORUN, &status)
== -ETIMEDOUT) {
data->error = -ETIMEDOUT;
- complete(&host->pio_complete);
- return;
+ goto done;
}
for (len = 0; len < remain && len < host->fifo_width;) {
/* SCR data must be read in big endian. */
@@ -356,10 +354,15 @@ static void moxart_transfer_pio(struct moxart_host *host)
data->bytes_xfered += host->data_remain - remain;
host->data_remain = remain;

- if (host->data_len != data->bytes_xfered)
+ if (host->data_len != data->bytes_xfered) {
+ kunmap(sg_page(host->cur_sg));
moxart_next_sg(host);
- else
- complete(&host->pio_complete);
+ return;
+ }
+
+done:
+ kunmap(sg_page(host->cur_sg));
+ complete(&host->pio_complete);
}

static void moxart_prepare_data(struct moxart_host *host)
--
2.20.1


2019-01-14 10:01:10

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 03/11] mmc: omap: handle highmem pages

Instead of setting up a kernel pointer to track the current PIO address,
track the offset in the current page, and do an atomic kmap for the page
while doing the actual PIO operations.

Signed-off-by: Christoph Hellwig <[email protected]>
---
drivers/mmc/host/omap.c | 15 ++++++++++-----
1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c
index c60a7625b1fa..0377ac6194a0 100644
--- a/drivers/mmc/host/omap.c
+++ b/drivers/mmc/host/omap.c
@@ -13,6 +13,7 @@

#include <linux/module.h>
#include <linux/moduleparam.h>
+#include <linux/highmem.h>
#include <linux/init.h>
#include <linux/ioport.h>
#include <linux/platform_device.h>
@@ -148,7 +149,7 @@ struct mmc_omap_host {

unsigned int sg_len;
int sg_idx;
- u16 * buffer;
+ u32 buffer_offset;
u32 buffer_bytes_left;
u32 total_bytes_left;

@@ -649,7 +650,7 @@ mmc_omap_sg_to_buf(struct mmc_omap_host *host)

sg = host->data->sg + host->sg_idx;
host->buffer_bytes_left = sg->length;
- host->buffer = sg_virt(sg);
+ host->buffer_offset = sg->offset;
if (host->buffer_bytes_left > host->total_bytes_left)
host->buffer_bytes_left = host->total_bytes_left;
}
@@ -666,7 +667,9 @@ mmc_omap_clk_timer(struct timer_list *t)
static void
mmc_omap_xfer_data(struct mmc_omap_host *host, int write)
{
+ struct scatterlist *sg = host->data->sg + host->sg_idx;
int n, nwords;
+ void *p;

if (host->buffer_bytes_left == 0) {
host->sg_idx++;
@@ -684,15 +687,17 @@ mmc_omap_xfer_data(struct mmc_omap_host *host, int write)
host->total_bytes_left -= n;
host->data->bytes_xfered += n;

+ p = kmap_atomic(sg_page(sg));
if (write) {
__raw_writesw(host->virt_base + OMAP_MMC_REG(host, DATA),
- host->buffer, nwords);
+ p + host->buffer_offset, nwords);
} else {
__raw_readsw(host->virt_base + OMAP_MMC_REG(host, DATA),
- host->buffer, nwords);
+ p + host->buffer_offset, nwords);
}
+ kunmap_atomic(p);

- host->buffer += nwords;
+ host->buffer_offset += nwords;
}

#ifdef CONFIG_MMC_DEBUG
--
2.20.1


2019-01-14 10:01:12

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 08/11] mmc: sh_mmcif: handle highmem pages

Instead of setting up a kernel pointer to track the current PIO address,
track the offset in the current page, and do an atomic kmap for the page
while doing the actual PIO operations.

Signed-off-by: Christoph Hellwig <[email protected]>
---
drivers/mmc/host/sh_mmcif.c | 58 +++++++++++++++++++++++--------------
1 file changed, 36 insertions(+), 22 deletions(-)

diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c
index 81bd9afb0980..0d2bbb8943f5 100644
--- a/drivers/mmc/host/sh_mmcif.c
+++ b/drivers/mmc/host/sh_mmcif.c
@@ -228,7 +228,7 @@ struct sh_mmcif_host {
bool dying;
long timeout;
void __iomem *addr;
- u32 *pio_ptr;
+ u32 pio_offset;
spinlock_t lock; /* protect sh_mmcif_host::state */
enum sh_mmcif_state state;
enum sh_mmcif_wait_for wait_for;
@@ -595,7 +595,7 @@ static int sh_mmcif_error_manage(struct sh_mmcif_host *host)
return ret;
}

-static bool sh_mmcif_next_block(struct sh_mmcif_host *host, u32 *p)
+static bool sh_mmcif_next_block(struct sh_mmcif_host *host)
{
struct mmc_data *data = host->mrq->data;

@@ -606,10 +606,10 @@ static bool sh_mmcif_next_block(struct sh_mmcif_host *host, u32 *p)

if (host->sg_blkidx == data->sg->length) {
host->sg_blkidx = 0;
- if (++host->sg_idx < data->sg_len)
- host->pio_ptr = sg_virt(++data->sg);
- } else {
- host->pio_ptr = p;
+ if (++host->sg_idx < data->sg_len) {
+ data->sg++;
+ host->pio_offset = data->sg->offset / 4;
+ }
}

return host->sg_idx != data->sg_len;
@@ -631,8 +631,8 @@ static bool sh_mmcif_read_block(struct sh_mmcif_host *host)
{
struct device *dev = sh_mmcif_host_to_dev(host);
struct mmc_data *data = host->mrq->data;
- u32 *p = sg_virt(data->sg);
- int i;
+ u32 *p;
+ int off, i;

if (host->sd_error) {
data->error = sh_mmcif_error_manage(host);
@@ -640,8 +640,11 @@ static bool sh_mmcif_read_block(struct sh_mmcif_host *host)
return false;
}

+ p = kmap_atomic(sg_page(data->sg));
+ off = data->sg->offset / 4;
for (i = 0; i < host->blocksize / 4; i++)
- *p++ = sh_mmcif_readl(host->addr, MMCIF_CE_DATA);
+ p[off++] = sh_mmcif_readl(host->addr, MMCIF_CE_DATA);
+ kunmap_atomic(p);

/* buffer read end */
sh_mmcif_bitset(host, MMCIF_CE_INT_MASK, MASK_MBUFRE);
@@ -664,7 +667,7 @@ static void sh_mmcif_multi_read(struct sh_mmcif_host *host,
host->wait_for = MMCIF_WAIT_FOR_MREAD;
host->sg_idx = 0;
host->sg_blkidx = 0;
- host->pio_ptr = sg_virt(data->sg);
+ host->pio_offset = data->sg->offset / 4;

sh_mmcif_bitset(host, MMCIF_CE_INT_MASK, MASK_MBUFREN);
}
@@ -673,7 +676,7 @@ static bool sh_mmcif_mread_block(struct sh_mmcif_host *host)
{
struct device *dev = sh_mmcif_host_to_dev(host);
struct mmc_data *data = host->mrq->data;
- u32 *p = host->pio_ptr;
+ u32 *p;
int i;

if (host->sd_error) {
@@ -684,10 +687,14 @@ static bool sh_mmcif_mread_block(struct sh_mmcif_host *host)

BUG_ON(!data->sg->length);

- for (i = 0; i < host->blocksize / 4; i++)
- *p++ = sh_mmcif_readl(host->addr, MMCIF_CE_DATA);
+ p = kmap_atomic(sg_page(data->sg));
+ for (i = 0; i < host->blocksize / 4; i++) {
+ p[host->pio_offset++] =
+ sh_mmcif_readl(host->addr, MMCIF_CE_DATA);
+ }
+ kunmap_atomic(p);

- if (!sh_mmcif_next_block(host, p))
+ if (!sh_mmcif_next_block(host))
return false;

sh_mmcif_bitset(host, MMCIF_CE_INT_MASK, MASK_MBUFREN);
@@ -711,8 +718,8 @@ static bool sh_mmcif_write_block(struct sh_mmcif_host *host)
{
struct device *dev = sh_mmcif_host_to_dev(host);
struct mmc_data *data = host->mrq->data;
- u32 *p = sg_virt(data->sg);
- int i;
+ u32 *p;
+ int off, i;

if (host->sd_error) {
data->error = sh_mmcif_error_manage(host);
@@ -720,8 +727,11 @@ static bool sh_mmcif_write_block(struct sh_mmcif_host *host)
return false;
}

+ p = kmap_atomic(sg_page(data->sg));
+ off = data->sg->offset / 4;
for (i = 0; i < host->blocksize / 4; i++)
- sh_mmcif_writel(host->addr, MMCIF_CE_DATA, *p++);
+ sh_mmcif_writel(host->addr, MMCIF_CE_DATA, p[off++]);
+ kunmap_atomic(p);

/* buffer write end */
sh_mmcif_bitset(host, MMCIF_CE_INT_MASK, MASK_MDTRANE);
@@ -744,7 +754,7 @@ static void sh_mmcif_multi_write(struct sh_mmcif_host *host,
host->wait_for = MMCIF_WAIT_FOR_MWRITE;
host->sg_idx = 0;
host->sg_blkidx = 0;
- host->pio_ptr = sg_virt(data->sg);
+ host->pio_offset = data->sg->offset / 4;

sh_mmcif_bitset(host, MMCIF_CE_INT_MASK, MASK_MBUFWEN);
}
@@ -753,7 +763,7 @@ static bool sh_mmcif_mwrite_block(struct sh_mmcif_host *host)
{
struct device *dev = sh_mmcif_host_to_dev(host);
struct mmc_data *data = host->mrq->data;
- u32 *p = host->pio_ptr;
+ u32 *p;
int i;

if (host->sd_error) {
@@ -764,10 +774,14 @@ static bool sh_mmcif_mwrite_block(struct sh_mmcif_host *host)

BUG_ON(!data->sg->length);

- for (i = 0; i < host->blocksize / 4; i++)
- sh_mmcif_writel(host->addr, MMCIF_CE_DATA, *p++);
+ p = kmap_atomic(sg_page(data->sg));
+ for (i = 0; i < host->blocksize / 4; i++) {
+ sh_mmcif_writel(host->addr, MMCIF_CE_DATA,
+ p[host->pio_offset++]);
+ }
+ kunmap_atomic(p);

- if (!sh_mmcif_next_block(host, p))
+ if (!sh_mmcif_next_block(host))
return false;

sh_mmcif_bitset(host, MMCIF_CE_INT_MASK, MASK_MBUFWEN);
--
2.20.1


2019-01-14 10:01:23

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 09/11] mmc: sh_mmcif: handle chained sglists

Use the proper sg_next() helper to move to the next scatterlist element
to support chained scatterlists.

Signed-off-by: Christoph Hellwig <[email protected]>
---
drivers/mmc/host/sh_mmcif.c | 13 +++++--------
1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c
index 0d2bbb8943f5..a4116be5ebc7 100644
--- a/drivers/mmc/host/sh_mmcif.c
+++ b/drivers/mmc/host/sh_mmcif.c
@@ -234,7 +234,6 @@ struct sh_mmcif_host {
enum sh_mmcif_wait_for wait_for;
struct delayed_work timeout_work;
size_t blocksize;
- int sg_idx;
int sg_blkidx;
bool power;
bool ccs_enable; /* Command Completion Signal support */
@@ -606,13 +605,13 @@ static bool sh_mmcif_next_block(struct sh_mmcif_host *host)

if (host->sg_blkidx == data->sg->length) {
host->sg_blkidx = 0;
- if (++host->sg_idx < data->sg_len) {
- data->sg++;
- host->pio_offset = data->sg->offset / 4;
- }
+ data->sg = sg_next(data->sg);
+ if (!data->sg)
+ return false;
+ host->pio_offset = data->sg->offset / 4;
}

- return host->sg_idx != data->sg_len;
+ return true;
}

static void sh_mmcif_single_read(struct sh_mmcif_host *host,
@@ -665,7 +664,6 @@ static void sh_mmcif_multi_read(struct sh_mmcif_host *host,
BLOCK_SIZE_MASK;

host->wait_for = MMCIF_WAIT_FOR_MREAD;
- host->sg_idx = 0;
host->sg_blkidx = 0;
host->pio_offset = data->sg->offset / 4;

@@ -752,7 +750,6 @@ static void sh_mmcif_multi_write(struct sh_mmcif_host *host,
BLOCK_SIZE_MASK;

host->wait_for = MMCIF_WAIT_FOR_MWRITE;
- host->sg_idx = 0;
host->sg_blkidx = 0;
host->pio_offset = data->sg->offset / 4;

--
2.20.1


2019-01-14 10:01:54

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 11/11] dma-mapping: remove dma_max_pfn

These days the DMA mapping code must bounce buffer for any not supported
address, and if they driver needs to optimize for natively supported
ranged it should use dma_get_required_mask.

Signed-off-by: Christoph Hellwig <[email protected]>
---
arch/arm/include/asm/dma-mapping.h | 7 -------
include/linux/dma-mapping.h | 7 -------
2 files changed, 14 deletions(-)

diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
index 31d3b96f0f4b..496b36b9a7ff 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h
@@ -89,13 +89,6 @@ static inline dma_addr_t virt_to_dma(struct device *dev, void *addr)
}
#endif

-/* The ARM override for dma_max_pfn() */
-static inline unsigned long dma_max_pfn(struct device *dev)
-{
- return dma_to_pfn(dev, *dev->dma_mask);
-}
-#define dma_max_pfn(dev) dma_max_pfn(dev)
-
#define arch_setup_dma_ops arch_setup_dma_ops
extern void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
const struct iommu_ops *iommu, bool coherent);
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index f6ded992c183..c6dbc287e466 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -710,13 +710,6 @@ static inline int dma_set_seg_boundary(struct device *dev, unsigned long mask)
return -EIO;
}

-#ifndef dma_max_pfn
-static inline unsigned long dma_max_pfn(struct device *dev)
-{
- return (*dev->dma_mask >> PAGE_SHIFT) + dev->dma_pfn_offset;
-}
-#endif
-
static inline int dma_get_cache_alignment(void)
{
#ifdef ARCH_DMA_MINALIGN
--
2.20.1


2019-01-14 10:02:04

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 06/11] mmc: s3cmci: handle chained sglists

Use the proper sg_next() helper to move to the next scatterlist element
to support chained scatterlists.

Signed-off-by: Christoph Hellwig <[email protected]>
---
drivers/mmc/host/s3cmci.c | 19 +++++++++----------
drivers/mmc/host/s3cmci.h | 2 +-
2 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c
index 1be84426c817..2660fb61d1d0 100644
--- a/drivers/mmc/host/s3cmci.c
+++ b/drivers/mmc/host/s3cmci.c
@@ -320,20 +320,19 @@ static void s3cmci_check_sdio_irq(struct s3cmci_host *host)

static inline int get_data_buffer(struct s3cmci_host *host)
{
- if (host->pio_sgptr >= host->mrq->data->sg_len) {
- dbg(host, dbg_debug, "no more buffers (%i/%i)\n",
- host->pio_sgptr, host->mrq->data->sg_len);
+ if (!host->next_sg) {
+ dbg(host, dbg_debug, "no more buffers (%i)\n",
+ host->mrq->data->sg_len);
return -EBUSY;
}
- host->cur_sg = &host->mrq->data->sg[host->pio_sgptr];
+ host->cur_sg = host->next_sg;
+ host->next_sg = sg_next(host->next_sg);

host->pio_bytes = host->cur_sg->length;
host->pio_offset = host->cur_sg->offset;

- host->pio_sgptr++;
-
- dbg(host, dbg_sg, "new buffer (%i/%i)\n",
- host->pio_sgptr, host->mrq->data->sg_len);
+ dbg(host, dbg_sg, "new buffer (%i)\n",
+ host->mrq->data->sg_len);

return 0;
}
@@ -1052,8 +1051,8 @@ static int s3cmci_prepare_pio(struct s3cmci_host *host, struct mmc_data *data)

BUG_ON((data->flags & BOTH_DIR) == BOTH_DIR);

- host->pio_sgptr = 0;
- host->cur_sg = &host->mrq->data->sg[host->pio_sgptr];
+ host->cur_sg = host->mrq->data->sg;
+ host->next_sg = sg_next(host->cur_sg);
host->pio_bytes = 0;
host->pio_count = 0;
host->pio_active = rw ? XFER_WRITE : XFER_READ;
diff --git a/drivers/mmc/host/s3cmci.h b/drivers/mmc/host/s3cmci.h
index 4320f7d832dc..caf1078d07d1 100644
--- a/drivers/mmc/host/s3cmci.h
+++ b/drivers/mmc/host/s3cmci.h
@@ -51,7 +51,7 @@ struct s3cmci_host {
int dma_complete;

struct scatterlist *cur_sg;
- u32 pio_sgptr;
+ struct scatterlist *next_sg;
u32 pio_bytes;
u32 pio_count;
u32 pio_offset;
--
2.20.1


2019-01-14 10:28:48

by Ulf Hansson

[permalink] [raw]
Subject: Re: remove block layer bounce buffering for MMC

+Linus Walleij (recently made a cleanup of the mmc bounce buffering code).

On Mon, 14 Jan 2019 at 10:58, Christoph Hellwig <[email protected]> wrote:
>
> Hi everyone,
>
> this series converts the remaining MMC host drivers to properly kmap the
> scatterlist entries it does PIO operations on, and then goes on to
> remove the usage of block layer bounce buffering (which I plan to remove
> eventually) from the MMC layer.
>
> As a bonus I've converted various drivers to the proper scatterlist
> helpers so that at least in theory they are ready for chained
> scatterlists.
>
> All the changes are compile tested only as I don't have any of the
> hardware, so a careful review would be appreciated.

Thanks for posting this. I will have a look as soon as I can, but
first needs to catch up with things since the holidays.

Kind regards
Uffe

2019-01-14 16:30:49

by Nicolas Pitre

[permalink] [raw]
Subject: Re: [PATCH 07/11] mmc: mvsdio: handle highmem pages

On Mon, 14 Jan 2019, Christoph Hellwig wrote:

> Instead of setting up a kernel pointer to track the current PIO address,
> track the offset in the current page, and do an atomic kmap for the page
> while doing the actual PIO operations.
>
> Signed-off-by: Christoph Hellwig <[email protected]>

Just a small suggestion below


> ---
> drivers/mmc/host/mvsdio.c | 48 +++++++++++++++++++++++----------------
> 1 file changed, 29 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/mmc/host/mvsdio.c b/drivers/mmc/host/mvsdio.c
> index e22bbff89c8d..545e370d6dae 100644
> --- a/drivers/mmc/host/mvsdio.c
> +++ b/drivers/mmc/host/mvsdio.c
> @@ -12,6 +12,7 @@
> #include <linux/module.h>
> #include <linux/init.h>
> #include <linux/io.h>
> +#include <linux/highmem.h>
> #include <linux/platform_device.h>
> #include <linux/mbus.h>
> #include <linux/delay.h>
> @@ -42,7 +43,8 @@ struct mvsd_host {
> unsigned int intr_en;
> unsigned int ctrl;
> unsigned int pio_size;
> - void *pio_ptr;
> + struct scatterlist *pio_sg;
> + unsigned int pio_offset; /* offset in words into the segment */
> unsigned int sg_frags;
> unsigned int ns_per_clk;
> unsigned int clock;
> @@ -96,9 +98,9 @@ static int mvsd_setup_data(struct mvsd_host *host, struct mmc_data *data)
> if (tmout_index > MVSD_HOST_CTRL_TMOUT_MAX)
> tmout_index = MVSD_HOST_CTRL_TMOUT_MAX;
>
> - dev_dbg(host->dev, "data %s at 0x%08x: blocks=%d blksz=%d tmout=%u (%d)\n",
> + dev_dbg(host->dev, "data %s at 0x%08llx: blocks=%d blksz=%d tmout=%u (%d)\n",
> (data->flags & MMC_DATA_READ) ? "read" : "write",
> - (u32)sg_virt(data->sg), data->blocks, data->blksz,
> + (u64)sg_phys(data->sg), data->blocks, data->blksz,
> tmout, tmout_index);
>
> host->ctrl &= ~MVSD_HOST_CTRL_TMOUT_MASK;
> @@ -118,10 +120,11 @@ static int mvsd_setup_data(struct mvsd_host *host, struct mmc_data *data)
> * boundary.
> */
> host->pio_size = data->blocks * data->blksz;
> - host->pio_ptr = sg_virt(data->sg);
> + host->pio_sg = data->sg;
> + host->pio_offset = data->sg->offset / 2;
> if (!nodma)
> - dev_dbg(host->dev, "fallback to PIO for data at 0x%p size %d\n",
> - host->pio_ptr, host->pio_size);
> + dev_dbg(host->dev, "fallback to PIO for data at 0x%x size %d\n",
> + host->pio_offset, host->pio_size);
> return 1;
> } else {
> dma_addr_t phys_addr;
> @@ -291,8 +294,9 @@ static u32 mvsd_finish_data(struct mvsd_host *host, struct mmc_data *data,
> {
> void __iomem *iobase = host->base;
>
> - if (host->pio_ptr) {
> - host->pio_ptr = NULL;
> + if (host->pio_sg) {
> + host->pio_sg = NULL;
> + host->pio_offset = 0;
> host->pio_size = 0;
> } else {
> dma_unmap_sg(mmc_dev(host->mmc), data->sg, host->sg_frags,
> @@ -376,11 +380,12 @@ static irqreturn_t mvsd_irq(int irq, void *dev)
> if (host->pio_size &&
> (intr_status & host->intr_en &
> (MVSD_NOR_RX_READY | MVSD_NOR_RX_FIFO_8W))) {
> - u16 *p = host->pio_ptr;
> + u16 *p = kmap_atomic(sg_page(host->pio_sg));
> + unsigned int o = host->pio_offset;

To minimize code change, you could do:

u16 *p0 = kmap_atomic(sg_page(host->pio_sg));
u16 *p = p0 + host->pio_offset;

and at the end:

host->pio_offset = p - p0;

leaving the middle code unchanged.

This might also produce slightly better assembly with the post increment
instructions on ARM if the compiler doesn't figure it out on its own
otherwise.


Nicolas

2019-01-14 16:54:45

by Robin Murphy

[permalink] [raw]
Subject: Re: remove block layer bounce buffering for MMC

On 14/01/2019 09:57, Christoph Hellwig wrote:
> Hi everyone,
>
> this series converts the remaining MMC host drivers to properly kmap the
> scatterlist entries it does PIO operations on, and then goes on to
> remove the usage of block layer bounce buffering (which I plan to remove
> eventually) from the MMC layer.
>
> As a bonus I've converted various drivers to the proper scatterlist
> helpers so that at least in theory they are ready for chained
> scatterlists.
>
> All the changes are compile tested only as I don't have any of the
> hardware, so a careful review would be appreciated.

One general point for the kmap() conversions - it's not obvious (to me
at least) whether or how that would work for a segment where sg->length
> PAGE_SIZE. Or is there some cast-iron guarantee from the MMC
mid-layer that it will never let the block layer generate such things in
the first place?

Robin.

2019-01-14 17:00:17

by Christoph Hellwig

[permalink] [raw]
Subject: Re: remove block layer bounce buffering for MMC

On Mon, Jan 14, 2019 at 04:52:40PM +0000, Robin Murphy wrote:
> One general point for the kmap() conversions - it's not obvious (to me at
> least) whether or how that would work for a segment where sg->length >
> PAGE_SIZE. Or is there some cast-iron guarantee from the MMC mid-layer that
> it will never let the block layer generate such things in the first place?

None of this will with such segments. But yes, I guess the old case
could have worked as long as any physical contigous ranges are also
virtually contigous. So we might have to throw in a page size segment
boundary here.

2019-01-16 19:06:53

by Arnd Bergmann

[permalink] [raw]
Subject: Re: remove block layer bounce buffering for MMC

On Mon, Jan 14, 2019 at 11:27 AM Ulf Hansson <[email protected]> wrote:
>
> +Linus Walleij (recently made a cleanup of the mmc bounce buffering code).
>
> On Mon, 14 Jan 2019 at 10:58, Christoph Hellwig <[email protected]> wrote:
> >
> > Hi everyone,
> >
> > this series converts the remaining MMC host drivers to properly kmap the
> > scatterlist entries it does PIO operations on, and then goes on to
> > remove the usage of block layer bounce buffering (which I plan to remove
> > eventually) from the MMC layer.
> >
> > As a bonus I've converted various drivers to the proper scatterlist
> > helpers so that at least in theory they are ready for chained
> > scatterlists.
> >
> > All the changes are compile tested only as I don't have any of the
> > hardware, so a careful review would be appreciated.
>
> Thanks for posting this. I will have a look as soon as I can, but
> first needs to catch up with things since the holidays.

Linus probably knows more here, but I have a vague recollection of
the MMC bounce buffer code being needed mostly for performance
reasons: when the scatterlist is discontiguous, that can result in
a request being split up into separate MMC commands, which due
to the lack of queued commands combined with the need for
garbage collection on sub-page writes results in a huge slowdown
compared to having larger bounce buffers all the time.

We had discussed finding a different way to do this (separate
from the bounce buffering), but I don't know if that ever happened,
or if this is even the code that you are changing here.

Arnd

2019-01-16 22:09:54

by Linus Walleij

[permalink] [raw]
Subject: Re: remove block layer bounce buffering for MMC

On Wed, Jan 16, 2019 at 11:25 AM Arnd Bergmann <[email protected]> wrote:
> On Mon, Jan 14, 2019 at 11:27 AM Ulf Hansson <[email protected]> wrote:
> >
> > +Linus Walleij (recently made a cleanup of the mmc bounce buffering code).

Nah it's not THAT bounce buffer.

> Linus probably knows more here, but I have a vague recollection of
> the MMC bounce buffer code being needed mostly for performance
> reasons: when the scatterlist is discontiguous, that can result in
> a request being split up into separate MMC commands, which due
> to the lack of queued commands combined with the need for
> garbage collection on sub-page writes results in a huge slowdown
> compared to having larger bounce buffers all the time.
>
> We had discussed finding a different way to do this (separate
> from the bounce buffering), but I don't know if that ever happened,
> or if this is even the code that you are changing here.

Nope not the same code.

The term "bounce buffer" is sadly used as ambigously as
__underscores in front of function names.

That other "bounce buffer" was first deleted and then
reimplemented as a local hack in the SDHCI driver core
after it caused performance regressions on the i.MX and
some laptops, see commit:

commit bd9b902798ab14d19ca116b10bde581ddff8f905
mmc: sdhci: Implement an SDHCI-specific bounce buffer

That should be orthogonal to Christoph's changes in this
patch series.

Yours,
Linus Walleij

2019-01-16 22:10:03

by Arnd Bergmann

[permalink] [raw]
Subject: Re: remove block layer bounce buffering for MMC

On Wed, Jan 16, 2019 at 2:51 PM Linus Walleij <[email protected]> wrote:
>
> On Wed, Jan 16, 2019 at 11:25 AM Arnd Bergmann <[email protected]> wrote:
> > On Mon, Jan 14, 2019 at 11:27 AM Ulf Hansson <[email protected]> wrote:
> > >
> > > +Linus Walleij (recently made a cleanup of the mmc bounce buffering code).
>
> Nah it's not THAT bounce buffer.
>
> > Linus probably knows more here, but I have a vague recollection of
> > the MMC bounce buffer code being needed mostly for performance
> > reasons: when the scatterlist is discontiguous, that can result in
> > a request being split up into separate MMC commands, which due
> > to the lack of queued commands combined with the need for
> > garbage collection on sub-page writes results in a huge slowdown
> > compared to having larger bounce buffers all the time.
> >
> > We had discussed finding a different way to do this (separate
> > from the bounce buffering), but I don't know if that ever happened,
> > or if this is even the code that you are changing here.
>
> Nope not the same code.
>
> The term "bounce buffer" is sadly used as ambigously as
> __underscores in front of function names.
>
> That other "bounce buffer" was first deleted and then
> reimplemented as a local hack in the SDHCI driver core
> after it caused performance regressions on the i.MX and
> some laptops, see commit:
>
> commit bd9b902798ab14d19ca116b10bde581ddff8f905
> mmc: sdhci: Implement an SDHCI-specific bounce buffer
>
> That should be orthogonal to Christoph's changes in this
> patch series.

Ok, thanks for the clarification. Please ignore my comments then.

Arnd

2019-01-30 07:56:19

by Ulf Hansson

[permalink] [raw]
Subject: Re: [PATCH 07/11] mmc: mvsdio: handle highmem pages

On Mon, 14 Jan 2019 at 10:58, Christoph Hellwig <[email protected]> wrote:
>
> Instead of setting up a kernel pointer to track the current PIO address,
> track the offset in the current page, and do an atomic kmap for the page
> while doing the actual PIO operations.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> ---
> drivers/mmc/host/mvsdio.c | 48 +++++++++++++++++++++++----------------
> 1 file changed, 29 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/mmc/host/mvsdio.c b/drivers/mmc/host/mvsdio.c
> index e22bbff89c8d..545e370d6dae 100644
> --- a/drivers/mmc/host/mvsdio.c
> +++ b/drivers/mmc/host/mvsdio.c
> @@ -12,6 +12,7 @@
> #include <linux/module.h>
> #include <linux/init.h>
> #include <linux/io.h>
> +#include <linux/highmem.h>
> #include <linux/platform_device.h>
> #include <linux/mbus.h>
> #include <linux/delay.h>
> @@ -42,7 +43,8 @@ struct mvsd_host {
> unsigned int intr_en;
> unsigned int ctrl;
> unsigned int pio_size;
> - void *pio_ptr;
> + struct scatterlist *pio_sg;
> + unsigned int pio_offset; /* offset in words into the segment */
> unsigned int sg_frags;
> unsigned int ns_per_clk;
> unsigned int clock;
> @@ -96,9 +98,9 @@ static int mvsd_setup_data(struct mvsd_host *host, struct mmc_data *data)
> if (tmout_index > MVSD_HOST_CTRL_TMOUT_MAX)
> tmout_index = MVSD_HOST_CTRL_TMOUT_MAX;
>
> - dev_dbg(host->dev, "data %s at 0x%08x: blocks=%d blksz=%d tmout=%u (%d)\n",
> + dev_dbg(host->dev, "data %s at 0x%08llx: blocks=%d blksz=%d tmout=%u (%d)\n",
> (data->flags & MMC_DATA_READ) ? "read" : "write",
> - (u32)sg_virt(data->sg), data->blocks, data->blksz,
> + (u64)sg_phys(data->sg), data->blocks, data->blksz,
> tmout, tmout_index);
>
> host->ctrl &= ~MVSD_HOST_CTRL_TMOUT_MASK;
> @@ -118,10 +120,11 @@ static int mvsd_setup_data(struct mvsd_host *host, struct mmc_data *data)
> * boundary.
> */
> host->pio_size = data->blocks * data->blksz;
> - host->pio_ptr = sg_virt(data->sg);
> + host->pio_sg = data->sg;
> + host->pio_offset = data->sg->offset / 2;
> if (!nodma)
> - dev_dbg(host->dev, "fallback to PIO for data at 0x%p size %d\n",
> - host->pio_ptr, host->pio_size);
> + dev_dbg(host->dev, "fallback to PIO for data at 0x%x size %d\n",
> + host->pio_offset, host->pio_size);
> return 1;
> } else {
> dma_addr_t phys_addr;
> @@ -291,8 +294,9 @@ static u32 mvsd_finish_data(struct mvsd_host *host, struct mmc_data *data,
> {
> void __iomem *iobase = host->base;
>
> - if (host->pio_ptr) {
> - host->pio_ptr = NULL;
> + if (host->pio_sg) {
> + host->pio_sg = NULL;
> + host->pio_offset = 0;
> host->pio_size = 0;
> } else {
> dma_unmap_sg(mmc_dev(host->mmc), data->sg, host->sg_frags,
> @@ -376,11 +380,12 @@ static irqreturn_t mvsd_irq(int irq, void *dev)
> if (host->pio_size &&
> (intr_status & host->intr_en &
> (MVSD_NOR_RX_READY | MVSD_NOR_RX_FIFO_8W))) {
> - u16 *p = host->pio_ptr;
> + u16 *p = kmap_atomic(sg_page(host->pio_sg));

Seems like a corresponding kunmap_atomic() is missing somewhere.

> + unsigned int o = host->pio_offset;
> int s = host->pio_size;
> while (s >= 32 && (intr_status & MVSD_NOR_RX_FIFO_8W)) {
> - readsw(iobase + MVSD_FIFO, p, 16);
> - p += 16;
> + readsw(iobase + MVSD_FIFO, p + o, 16);
> + o += 16;
> s -= 32;
> intr_status = mvsd_read(MVSD_NOR_INTR_STATUS);
> }
> @@ -391,8 +396,10 @@ static irqreturn_t mvsd_irq(int irq, void *dev)
> */
> if (s <= 32) {
> while (s >= 4 && (intr_status & MVSD_NOR_RX_READY)) {
> - put_unaligned(mvsd_read(MVSD_FIFO), p++);
> - put_unaligned(mvsd_read(MVSD_FIFO), p++);
> + put_unaligned(mvsd_read(MVSD_FIFO), p + o);
> + o++;
> + put_unaligned(mvsd_read(MVSD_FIFO), p + o);
> + o++;
> s -= 4;
> intr_status = mvsd_read(MVSD_NOR_INTR_STATUS);
> }
> @@ -400,7 +407,7 @@ static irqreturn_t mvsd_irq(int irq, void *dev)
> u16 val[2] = {0, 0};
> val[0] = mvsd_read(MVSD_FIFO);
> val[1] = mvsd_read(MVSD_FIFO);
> - memcpy(p, ((void *)&val) + 4 - s, s);
> + memcpy(p + o, ((void *)&val) + 4 - s, s);
> s = 0;
> intr_status = mvsd_read(MVSD_NOR_INTR_STATUS);
> }
> @@ -416,13 +423,14 @@ static irqreturn_t mvsd_irq(int irq, void *dev)
> }
> dev_dbg(host->dev, "pio %d intr 0x%04x hw_state 0x%04x\n",
> s, intr_status, mvsd_read(MVSD_HW_STATE));
> - host->pio_ptr = p;
> + host->pio_offset = o;
> host->pio_size = s;
> irq_handled = 1;
> } else if (host->pio_size &&
> (intr_status & host->intr_en &
> (MVSD_NOR_TX_AVAIL | MVSD_NOR_TX_FIFO_8W))) {
> - u16 *p = host->pio_ptr;
> + u16 *p = kmap_atomic(sg_page(host->pio_sg));

Ditto.

> + unsigned int o = host->pio_offset;
> int s = host->pio_size;
> /*
> * The TX_FIFO_8W bit is unreliable. When set, bursting
> @@ -431,8 +439,10 @@ static irqreturn_t mvsd_irq(int irq, void *dev)
> * TX_FIFO_8W remains set.
> */
> while (s >= 4 && (intr_status & MVSD_NOR_TX_AVAIL)) {
> - mvsd_write(MVSD_FIFO, get_unaligned(p++));
> - mvsd_write(MVSD_FIFO, get_unaligned(p++));
> + mvsd_write(MVSD_FIFO, get_unaligned(p + o));
> + o++;
> + mvsd_write(MVSD_FIFO, get_unaligned(p + o));
> + o++;
> s -= 4;
> intr_status = mvsd_read(MVSD_NOR_INTR_STATUS);
> }
> @@ -453,7 +463,7 @@ static irqreturn_t mvsd_irq(int irq, void *dev)
> }
> dev_dbg(host->dev, "pio %d intr 0x%04x hw_state 0x%04x\n",
> s, intr_status, mvsd_read(MVSD_HW_STATE));
> - host->pio_ptr = p;
> + host->pio_offset = o;
> host->pio_size = s;
> irq_handled = 1;
> }
> --
> 2.20.1
>

Kind regards
Uffe

2019-01-30 07:59:20

by Ulf Hansson

[permalink] [raw]
Subject: Re: [PATCH 05/11] mmc: s3cmci: handle highmem pages

On Mon, 14 Jan 2019 at 10:58, Christoph Hellwig <[email protected]> wrote:
>
> Instead of setting up a kernel pointer to track the current PIO address,
> track the offset in the current page, and do an atomic kmap for the page
> while doing the actual PIO operations.
>
> Signed-off-by: Christoph Hellwig <[email protected]>

Nitpick: This one have some trailing whitespace errors, which git
complains about when I apply it.

No need to re-spin due to this, I can fix it up.

Kind regards
Uffe

> ---
> drivers/mmc/host/s3cmci.c | 107 +++++++++++++++++++-------------------
> drivers/mmc/host/s3cmci.h | 3 +-
> 2 files changed, 55 insertions(+), 55 deletions(-)
>
> diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c
> index 10f5219b3b40..1be84426c817 100644
> --- a/drivers/mmc/host/s3cmci.c
> +++ b/drivers/mmc/host/s3cmci.c
> @@ -15,6 +15,7 @@
> #include <linux/dmaengine.h>
> #include <linux/dma-mapping.h>
> #include <linux/clk.h>
> +#include <linux/highmem.h>
> #include <linux/mmc/host.h>
> #include <linux/platform_device.h>
> #include <linux/cpufreq.h>
> @@ -317,26 +318,17 @@ static void s3cmci_check_sdio_irq(struct s3cmci_host *host)
> }
> }
>
> -static inline int get_data_buffer(struct s3cmci_host *host,
> - u32 *bytes, u32 **pointer)
> +static inline int get_data_buffer(struct s3cmci_host *host)
> {
> - struct scatterlist *sg;
> -
> - if (host->pio_active == XFER_NONE)
> - return -EINVAL;
> -
> - if ((!host->mrq) || (!host->mrq->data))
> - return -EINVAL;
> -
> if (host->pio_sgptr >= host->mrq->data->sg_len) {
> dbg(host, dbg_debug, "no more buffers (%i/%i)\n",
> host->pio_sgptr, host->mrq->data->sg_len);
> return -EBUSY;
> }
> - sg = &host->mrq->data->sg[host->pio_sgptr];
> + host->cur_sg = &host->mrq->data->sg[host->pio_sgptr];
>
> - *bytes = sg->length;
> - *pointer = sg_virt(sg);
> + host->pio_bytes = host->cur_sg->length;
> + host->pio_offset = host->cur_sg->offset;
>
> host->pio_sgptr++;
>
> @@ -422,11 +414,16 @@ static void s3cmci_disable_irq(struct s3cmci_host *host, bool transfer)
>
> static void do_pio_read(struct s3cmci_host *host)
> {
> - int res;
> u32 fifo;
> u32 *ptr;
> u32 fifo_words;
> void __iomem *from_ptr;
> + void *buf;
> +
> + if (host->pio_active == XFER_NONE)
> + goto done;
> + if (!host->mrq || !host->mrq->data)
> + goto done;
>
> /* write real prescaler to host, it might be set slow to fix */
> writel(host->prescaler, host->base + S3C2410_SDIPRE);
> @@ -435,20 +432,12 @@ static void do_pio_read(struct s3cmci_host *host)
>
> while ((fifo = fifo_count(host))) {
> if (!host->pio_bytes) {
> - res = get_data_buffer(host, &host->pio_bytes,
> - &host->pio_ptr);
> - if (res) {
> - host->pio_active = XFER_NONE;
> - host->complete_what = COMPLETION_FINALIZE;
> -
> - dbg(host, dbg_pio, "pio_read(): "
> - "complete (no more data).\n");
> - return;
> - }
> + if (get_data_buffer(host) < 0)
> + goto done;
>
> dbg(host, dbg_pio,
> - "pio_read(): new target: [%i]@[%p]\n",
> - host->pio_bytes, host->pio_ptr);
> + "pio_read(): new target: [%i]@[%zu]\n",
> + host->pio_bytes, host->pio_offset);
> }
>
> dbg(host, dbg_pio,
> @@ -470,63 +459,65 @@ static void do_pio_read(struct s3cmci_host *host)
> host->pio_count += fifo;
>
> fifo_words = fifo >> 2;
> - ptr = host->pio_ptr;
> - while (fifo_words--)
> +
> + buf = (kmap_atomic(sg_page(host->cur_sg)) + host->pio_offset);
> + ptr = buf;
> + while (fifo_words--) {
> *ptr++ = readl(from_ptr);
> - host->pio_ptr = ptr;
> + host->pio_offset += 4;
> + }
>
> if (fifo & 3) {
> u32 n = fifo & 3;
> u32 data = readl(from_ptr);
> - u8 *p = (u8 *)host->pio_ptr;
> + u8 *p = (u8 *)ptr;
>
> while (n--) {
> *p++ = data;
> data >>= 8;
> + host->pio_offset++;
> }
> }
> + kunmap_atomic(buf);
> }
>
> if (!host->pio_bytes) {
> - res = get_data_buffer(host, &host->pio_bytes, &host->pio_ptr);
> - if (res) {
> - dbg(host, dbg_pio,
> - "pio_read(): complete (no more buffers).\n");
> - host->pio_active = XFER_NONE;
> - host->complete_what = COMPLETION_FINALIZE;
> -
> - return;
> - }
> + if (get_data_buffer(host) < 0)
> + goto done;
> }
>
> enable_imask(host,
> S3C2410_SDIIMSK_RXFIFOHALF | S3C2410_SDIIMSK_RXFIFOLAST);
> + return;
> +
> +done:
> + host->pio_active = XFER_NONE;
> + host->complete_what = COMPLETION_FINALIZE;
> + dbg(host, dbg_pio, "pio_read(): complete (no more data).\n");
> }
>
> static void do_pio_write(struct s3cmci_host *host)
> {
> void __iomem *to_ptr;
> - int res;
> + void *buf;
> u32 fifo;
> u32 *ptr;
>
> + if (host->pio_active == XFER_NONE)
> + goto done;
> + if (!host->mrq || !host->mrq->data)
> + goto done;
> +
> to_ptr = host->base + host->sdidata;
>
> while ((fifo = fifo_free(host)) > 3) {
> if (!host->pio_bytes) {
> - res = get_data_buffer(host, &host->pio_bytes,
> - &host->pio_ptr);
> - if (res) {
> - dbg(host, dbg_pio,
> - "pio_write(): complete (no more data).\n");
> - host->pio_active = XFER_NONE;
> -
> - return;
> - }
> + if (get_data_buffer(host) < 0)
> + goto done;
>
> dbg(host, dbg_pio,
> - "pio_write(): new source: [%i]@[%p]\n",
> - host->pio_bytes, host->pio_ptr);
> + "pio_write(): new source: [%i]@[%zd]\n",
> + host->pio_bytes, host->pio_offset);
>
> }
>
> @@ -543,13 +534,20 @@ static void do_pio_write(struct s3cmci_host *host)
> host->pio_count += fifo;
>
> fifo = (fifo + 3) >> 2;
> - ptr = host->pio_ptr;
> - while (fifo--)
> + buf = (kmap_atomic(sg_page(host->cur_sg)) + host->pio_offset);
> + ptr = buf;
> + while (fifo--) {
> writel(*ptr++, to_ptr);
> - host->pio_ptr = ptr;
> + host->pio_offset += 4;
> + }
> + kunmap_atomic(buf);
> }
>
> enable_imask(host, S3C2410_SDIIMSK_TXFIFOHALF);
> + return;
> +done:
> + dbg(host, dbg_pio, "pio_write(): complete (no more data).\n");
> + host->pio_active = XFER_NONE;
> }
>
> static void pio_tasklet(unsigned long data)
> @@ -1055,6 +1053,7 @@ static int s3cmci_prepare_pio(struct s3cmci_host *host, struct mmc_data *data)
> BUG_ON((data->flags & BOTH_DIR) == BOTH_DIR);
>
> host->pio_sgptr = 0;
> + host->cur_sg = &host->mrq->data->sg[host->pio_sgptr];
> host->pio_bytes = 0;
> host->pio_count = 0;
> host->pio_active = rw ? XFER_WRITE : XFER_READ;
> diff --git a/drivers/mmc/host/s3cmci.h b/drivers/mmc/host/s3cmci.h
> index 30c2c0dd1bc8..4320f7d832dc 100644
> --- a/drivers/mmc/host/s3cmci.h
> +++ b/drivers/mmc/host/s3cmci.h
> @@ -50,10 +50,11 @@ struct s3cmci_host {
>
> int dma_complete;
>
> + struct scatterlist *cur_sg;
> u32 pio_sgptr;
> u32 pio_bytes;
> u32 pio_count;
> - u32 *pio_ptr;
> + u32 pio_offset;
> #define XFER_NONE 0
> #define XFER_READ 1
> #define XFER_WRITE 2
> --
> 2.20.1
>

2019-01-30 08:04:56

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 05/11] mmc: s3cmci: handle highmem pages

On Wed, Jan 30, 2019 at 08:57:47AM +0100, Ulf Hansson wrote:
> On Mon, 14 Jan 2019 at 10:58, Christoph Hellwig <[email protected]> wrote:
> >
> > Instead of setting up a kernel pointer to track the current PIO address,
> > track the offset in the current page, and do an atomic kmap for the page
> > while doing the actual PIO operations.
> >
> > Signed-off-by: Christoph Hellwig <[email protected]>
>
> Nitpick: This one have some trailing whitespace errors, which git
> complains about when I apply it.
>
> No need to re-spin due to this, I can fix it up.

I'll need to respin to limit the segment size to a page anyway, so I
will take this into account as well for v2.

2019-01-30 08:18:40

by Ulf Hansson

[permalink] [raw]
Subject: Re: [PATCH 05/11] mmc: s3cmci: handle highmem pages

On Wed, 30 Jan 2019 at 09:04, Christoph Hellwig <[email protected]> wrote:
>
> On Wed, Jan 30, 2019 at 08:57:47AM +0100, Ulf Hansson wrote:
> > On Mon, 14 Jan 2019 at 10:58, Christoph Hellwig <[email protected]> wrote:
> > >
> > > Instead of setting up a kernel pointer to track the current PIO address,
> > > track the offset in the current page, and do an atomic kmap for the page
> > > while doing the actual PIO operations.
> > >
> > > Signed-off-by: Christoph Hellwig <[email protected]>
> >
> > Nitpick: This one have some trailing whitespace errors, which git
> > complains about when I apply it.
> >
> > No need to re-spin due to this, I can fix it up.
>
> I'll need to respin to limit the segment size to a page anyway, so I
> will take this into account as well for v2.

Okay!

FYI, I have no further comment on the series, it looks okay to me!

Kind regards
Uffe