2021-08-06 07:34:17

by Xianting Tian

[permalink] [raw]
Subject: [PATCH v4 1/2] tty: hvc: pass DMA capable memory to put_chars()

As well known, hvc backend can register its opertions to hvc backend.
the opertions contain put_chars(), get_chars() and so on.

Some hvc backend may do dma in its opertions. eg, put_chars() of
virtio-console. But in the code of hvc framework, it may pass DMA
incapable memory to put_chars() under a specific configuration, which
is explained in commit c4baad5029(virtio-console: avoid DMA from stack):
1, c[] is on stack,
hvc_console_print():
char c[N_OUTBUF] __ALIGNED__;
cons_ops[index]->put_chars(vtermnos[index], c, i);
2, ch is on stack,
static void hvc_poll_put_char(,,char ch)
{
struct tty_struct *tty = driver->ttys[0];
struct hvc_struct *hp = tty->driver_data;
int n;

do {
n = hp->ops->put_chars(hp->vtermno, &ch, 1);
} while (n <= 0);
}

Commit c4baad5029 is just the fix to avoid DMA from stack memory, which
is passed to virtio-console by hvc framework in above code. But I think
the fix is aggressive, it directly uses kmemdup() to alloc new buffer
from kmalloc area and do memcpy no matter the memory is in kmalloc area
or not. But most importantly, it should better be fixed in the hvc
framework, by changing it to never pass stack memory to the put_chars()
function in the first place. Otherwise, we still face the same issue if
a new hvc backend using dma added in the furture.

We make 'char c[N_OUTBUF]' part of 'struct hvc_struct', so hp->c is no
longer the stack memory. we can use it in above two cases.

Other cleanup is to make 'hp->outbuf' aligned and use struct_size() to
calculate the size of hvc_struct.

With the patch, we can remove the fix c4baad5029.

Signed-off-by: Xianting Tian <[email protected]>
Tested-by: Xianting Tian <[email protected]>
---
drivers/tty/hvc/hvc_console.c | 33 ++++++++++++++++++---------------
drivers/tty/hvc/hvc_console.h | 16 ++++++++++++++--
2 files changed, 32 insertions(+), 17 deletions(-)

diff --git a/drivers/tty/hvc/hvc_console.c b/drivers/tty/hvc/hvc_console.c
index 5bb8c4e44..3afdb169c 100644
--- a/drivers/tty/hvc/hvc_console.c
+++ b/drivers/tty/hvc/hvc_console.c
@@ -41,16 +41,6 @@
*/
#define HVC_CLOSE_WAIT (HZ/100) /* 1/10 of a second */

-/*
- * These sizes are most efficient for vio, because they are the
- * native transfer size. We could make them selectable in the
- * future to better deal with backends that want other buffer sizes.
- */
-#define N_OUTBUF 16
-#define N_INBUF 16
-
-#define __ALIGNED__ __attribute__((__aligned__(sizeof(long))))
-
static struct tty_driver *hvc_driver;
static struct task_struct *hvc_task;

@@ -151,9 +141,11 @@ static uint32_t vtermnos[MAX_NR_HVC_CONSOLES] =
static void hvc_console_print(struct console *co, const char *b,
unsigned count)
{
- char c[N_OUTBUF] __ALIGNED__;
+ char *c;
unsigned i = 0, n = 0;
int r, donecr = 0, index = co->index;
+ unsigned long flags;
+ struct hvc_struct *hp;

/* Console access attempt outside of acceptable console range. */
if (index >= MAX_NR_HVC_CONSOLES)
@@ -163,6 +155,13 @@ static void hvc_console_print(struct console *co, const char *b,
if (vtermnos[index] == -1)
return;

+ list_for_each_entry(hp, &hvc_structs, next)
+ if (hp->vtermno == vtermnos[index])
+ break;
+
+ c = hp->c;
+
+ spin_lock_irqsave(&hp->c_lock, flags);
while (count > 0 || i > 0) {
if (count > 0 && i < sizeof(c)) {
if (b[n] == '\n' && !donecr) {
@@ -191,6 +190,7 @@ static void hvc_console_print(struct console *co, const char *b,
}
}
}
+ spin_unlock_irqrestore(&hp->c_lock, flags);
hvc_console_flush(cons_ops[index], vtermnos[index]);
}

@@ -878,9 +878,13 @@ static void hvc_poll_put_char(struct tty_driver *driver, int line, char ch)
struct tty_struct *tty = driver->ttys[0];
struct hvc_struct *hp = tty->driver_data;
int n;
+ unsigned long flags;

do {
- n = hp->ops->put_chars(hp->vtermno, &ch, 1);
+ spin_lock_irqsave(&hp->c_lock, flags);
+ hp->c[0] = ch;
+ n = hp->ops->put_chars(hp->vtermno, hp->c, 1);
+ spin_unlock_irqrestore(&hp->c_lock, flags);
} while (n <= 0);
}
#endif
@@ -922,8 +926,7 @@ struct hvc_struct *hvc_alloc(uint32_t vtermno, int data,
return ERR_PTR(err);
}

- hp = kzalloc(ALIGN(sizeof(*hp), sizeof(long)) + outbuf_size,
- GFP_KERNEL);
+ hp = kzalloc(struct_size(hp, outbuf, outbuf_size), GFP_KERNEL);
if (!hp)
return ERR_PTR(-ENOMEM);

@@ -931,13 +934,13 @@ struct hvc_struct *hvc_alloc(uint32_t vtermno, int data,
hp->data = data;
hp->ops = ops;
hp->outbuf_size = outbuf_size;
- hp->outbuf = &((char *)hp)[ALIGN(sizeof(*hp), sizeof(long))];

tty_port_init(&hp->port);
hp->port.ops = &hvc_port_ops;

INIT_WORK(&hp->tty_resize, hvc_set_winsz);
spin_lock_init(&hp->lock);
+ spin_lock_init(&hp->c_lock);
mutex_lock(&hvc_structs_mutex);

/*
diff --git a/drivers/tty/hvc/hvc_console.h b/drivers/tty/hvc/hvc_console.h
index 18d005814..52374e2da 100644
--- a/drivers/tty/hvc/hvc_console.h
+++ b/drivers/tty/hvc/hvc_console.h
@@ -32,13 +32,21 @@
*/
#define HVC_ALLOC_TTY_ADAPTERS 8

+/*
+ * These sizes are most efficient for vio, because they are the
+ * native transfer size. We could make them selectable in the
+ * future to better deal with backends that want other buffer sizes.
+ */
+#define N_OUTBUF 16
+#define N_INBUF 16
+
+#define __ALIGNED__ __attribute__((__aligned__(sizeof(long))))
+
struct hvc_struct {
struct tty_port port;
spinlock_t lock;
int index;
int do_wakeup;
- char *outbuf;
- int outbuf_size;
int n_outbuf;
uint32_t vtermno;
const struct hv_ops *ops;
@@ -48,6 +56,10 @@ struct hvc_struct {
struct work_struct tty_resize;
struct list_head next;
unsigned long flags;
+ spinlock_t c_lock;
+ char c[N_OUTBUF] __ALIGNED__;
+ int outbuf_size;
+ char outbuf[0] __ALIGNED__;
};

/* implemented by a low level driver */
--
2.17.1


2021-08-06 22:20:48

by Arnd Bergmann

[permalink] [raw]
Subject: Re: [PATCH v4 1/2] tty: hvc: pass DMA capable memory to put_chars()

On Fri, Aug 6, 2021 at 5:01 AM Xianting Tian
<[email protected]> wrote:
> @@ -163,6 +155,13 @@ static void hvc_console_print(struct console *co, const char *b,
> if (vtermnos[index] == -1)
> return;
>
> + list_for_each_entry(hp, &hvc_structs, next)
> + if (hp->vtermno == vtermnos[index])
> + break;
> +
> + c = hp->c;
> +
> + spin_lock_irqsave(&hp->c_lock, flags);

The loop looks like it might race against changes to the list. It seems strange
that the print function has to actually search for the structure here.

It may be better to have yet another array for the buffer pointers next to
the cons_ops[] and vtermnos[] arrays.

> +/*
> + * These sizes are most efficient for vio, because they are the
> + * native transfer size. We could make them selectable in the
> + * future to better deal with backends that want other buffer sizes.
> + */
> +#define N_OUTBUF 16
> +#define N_INBUF 16
> +
> +#define __ALIGNED__ __attribute__((__aligned__(sizeof(long))))

I think you need a higher alignment for DMA buffers, instead of sizeof(long),
I would suggest ARCH_DMA_MINALIGN.

Arnd

2021-08-07 15:01:26

by Xianting Tian

[permalink] [raw]
Subject: Re: [PATCH v4 1/2] tty: hvc: pass DMA capable memory to put_chars()


在 2021/8/6 下午10:51, Arnd Bergmann 写道:
> On Fri, Aug 6, 2021 at 5:01 AM Xianting Tian
> <[email protected]> wrote:
>> @@ -163,6 +155,13 @@ static void hvc_console_print(struct console *co, const char *b,
>> if (vtermnos[index] == -1)
>> return;
>>
>> + list_for_each_entry(hp, &hvc_structs, next)
>> + if (hp->vtermno == vtermnos[index])
>> + break;
>> +
>> + c = hp->c;
>> +
>> + spin_lock_irqsave(&hp->c_lock, flags);
> The loop looks like it might race against changes to the list. It seems strange
> that the print function has to actually search for the structure here.
>
> It may be better to have yet another array for the buffer pointers next to
> the cons_ops[] and vtermnos[] arrays.
I will make the change in v5, thanks.
>
>> +/*
>> + * These sizes are most efficient for vio, because they are the
>> + * native transfer size. We could make them selectable in the
>> + * future to better deal with backends that want other buffer sizes.
>> + */
>> +#define N_OUTBUF 16
>> +#define N_INBUF 16
>> +
>> +#define __ALIGNED__ __attribute__((__aligned__(sizeof(long))))
> I think you need a higher alignment for DMA buffers, instead of sizeof(long),
> I would suggest ARCH_DMA_MINALIGN.

thanks, I will fix it in v5:

#define __ALIGNED__ __attribute__((__aligned__(ARCH_DMA_MINALIGN)))

>
> Arnd

2021-08-12 08:09:03

by Xianting Tian

[permalink] [raw]
Subject: Re: [PATCH v4 1/2] tty: hvc: pass DMA capable memory to put_chars()


在 2021/8/6 下午10:51, Arnd Bergmann 写道:
> On Fri, Aug 6, 2021 at 5:01 AM Xianting Tian
> <[email protected]> wrote:
>> @@ -163,6 +155,13 @@ static void hvc_console_print(struct console *co, const char *b,
>> if (vtermnos[index] == -1)
>> return;
>>
>> + list_for_each_entry(hp, &hvc_structs, next)
>> + if (hp->vtermno == vtermnos[index])
>> + break;
>> +
>> + c = hp->c;
>> +
>> + spin_lock_irqsave(&hp->c_lock, flags);
> The loop looks like it might race against changes to the list. It seems strange
> that the print function has to actually search for the structure here.
>
> It may be better to have yet another array for the buffer pointers next to
> the cons_ops[] and vtermnos[] arrays.
>
>> +/*
>> + * These sizes are most efficient for vio, because they are the
>> + * native transfer size. We could make them selectable in the
>> + * future to better deal with backends that want other buffer sizes.
>> + */
>> +#define N_OUTBUF 16
>> +#define N_INBUF 16
>> +
>> +#define __ALIGNED__ __attribute__((__aligned__(sizeof(long))))
> I think you need a higher alignment for DMA buffers, instead of sizeof(long),
> I would suggest ARCH_DMA_MINALIGN.

As some ARCH(eg, x86, riscv) doesn't define ARCH_DMA_MINALIG, so i think
it 's better remain the code unchanged,

I will send v5 patch soon.

>
> Arnd

2021-08-12 08:56:42

by Arnd Bergmann

[permalink] [raw]
Subject: Re: [PATCH v4 1/2] tty: hvc: pass DMA capable memory to put_chars()

On Thu, Aug 12, 2021 at 10:08 AM Xianting TIan
<[email protected]> wrote:
> 在 2021/8/6 下午10:51, Arnd Bergmann 写道:
> > On Fri, Aug 6, 2021 at 5:01 AM Xianting Tian
> >> +#define __ALIGNED__ __attribute__((__aligned__(sizeof(long))))
> > I think you need a higher alignment for DMA buffers, instead of sizeof(long),
> > I would suggest ARCH_DMA_MINALIGN.
>
> As some ARCH(eg, x86, riscv) doesn't define ARCH_DMA_MINALIG, so i think
> it 's better remain the code unchanged,
>
> I will send v5 patch soon.

I think you could just use "L1_CACHE_BYTES" as the alignment in this case.
This will make the structure slightly larger for architectures that do not have
alignment constraints on DMA buffers, but using a smaller alignment is
clearly wrong. Another option would be to use ARCH_KMALLOC_MINALIGN.

Note that there is a patch to add ARCH_DMA_MINALIGN to riscv already,
as some implementations do not have coherent DMA. I had failed to
realized though that on x86 you do not get an ARCH_DMA_MINALIGN
definition.

Arnd

2021-08-12 10:01:12

by Xianting Tian

[permalink] [raw]
Subject: Re: [PATCH v4 1/2] tty: hvc: pass DMA capable memory to put_chars()


在 2021/8/12 下午4:54, Arnd Bergmann 写道:
> On Thu, Aug 12, 2021 at 10:08 AM Xianting TIan
> <[email protected]> wrote:
>> 在 2021/8/6 下午10:51, Arnd Bergmann 写道:
>>> On Fri, Aug 6, 2021 at 5:01 AM Xianting Tian
>>>> +#define __ALIGNED__ __attribute__((__aligned__(sizeof(long))))
>>> I think you need a higher alignment for DMA buffers, instead of sizeof(long),
>>> I would suggest ARCH_DMA_MINALIGN.
>> As some ARCH(eg, x86, riscv) doesn't define ARCH_DMA_MINALIG, so i think
>> it 's better remain the code unchanged,
>>
>> I will send v5 patch soon.
> I think you could just use "L1_CACHE_BYTES" as the alignment in this case.
> This will make the structure slightly larger for architectures that do not have
> alignment constraints on DMA buffers, but using a smaller alignment is
> clearly wrong. Another option would be to use ARCH_KMALLOC_MINALIGN.
yes, I unstand you, the align size must  L1_CACHE_BYTES at least.
>
> Note that there is a patch to add ARCH_DMA_MINALIGN to riscv already,
yes, I summited this patch, it is discussing, seems they don't want to
apply it.
> as some implementations do not have coherent DMA. I had failed to
> realized though that on x86 you do not get an ARCH_DMA_MINALIGN
> definition.
I didn't find the definition in arch/x86/include/asm/cache.h and other
place, x86 is dma coherent, it may doesn't need it.
>
> Arnd