2013-07-12 09:24:34

by Sergey Senozhatsky

[permalink] [raw]
Subject: [PATCH] LZ4: compression/decompression signedness mismatch

LZ4 compression and decompression functions require different
in signedness input/output parameters: unsigned char for
compression and signed char for decompression.

Change decompression API to require unsigned char.

Signed-off-by: Sergey Senozhatsky <[email protected]>

---

include/linux/lz4.h | 8 ++++----
lib/lz4/lz4_decompress.c | 8 ++++----
2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/include/linux/lz4.h b/include/linux/lz4.h
index d21c13f..c13f0bc 100644
--- a/include/linux/lz4.h
+++ b/include/linux/lz4.h
@@ -67,8 +67,8 @@ int lz4hc_compress(const unsigned char *src, size_t src_len,
* note : Destination buffer must be already allocated.
* slightly faster than lz4_decompress_unknownoutputsize()
*/
-int lz4_decompress(const char *src, size_t *src_len, char *dest,
- size_t actual_dest_len);
+int lz4_decompress(unsigned const char *src, size_t *src_len,
+ unsigned char *dest, size_t actual_dest_len);

/*
* lz4_decompress_unknownoutputsize()
@@ -82,6 +82,6 @@ int lz4_decompress(const char *src, size_t *src_len, char *dest,
* Error if return (< 0)
* note : Destination buffer must be already allocated.
*/
-int lz4_decompress_unknownoutputsize(const char *src, size_t src_len,
- char *dest, size_t *dest_len);
+int lz4_decompress_unknownoutputsize(unsigned const char *src, size_t src_len,
+ unsigned char *dest, size_t *dest_len);
#endif
diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c
index d3414ea..7ceda1f 100644
--- a/lib/lz4/lz4_decompress.c
+++ b/lib/lz4/lz4_decompress.c
@@ -283,8 +283,8 @@ _output_error:
return (int) (-(((char *) ip) - source));
}

-int lz4_decompress(const char *src, size_t *src_len, char *dest,
- size_t actual_dest_len)
+int lz4_decompress(unsigned const char *src, size_t *src_len,
+ unsigned char *dest, size_t actual_dest_len)
{
int ret = -1;
int input_len = 0;
@@ -302,8 +302,8 @@ exit_0:
EXPORT_SYMBOL_GPL(lz4_decompress);
#endif

-int lz4_decompress_unknownoutputsize(const char *src, size_t src_len,
- char *dest, size_t *dest_len)
+int lz4_decompress_unknownoutputsize(unsigned const char *src, size_t src_len,
+ unsigned char *dest, size_t *dest_len)
{
int ret = -1;
int out_len = 0;


2013-07-12 09:49:01

by Sergey Senozhatsky

[permalink] [raw]
Subject: Re: [PATCH] LZ4: compression/decompression signedness mismatch

On (07/12/13 11:28), Yann Collet wrote:
> The reference implementation, hosted at :�
> [1]https://code.google.com/p/lz4/
> only proposes char* (signed) types as part of the interface contract.
> I would recommend to keep it that way, to remain consistent.
> Regards

Crypto lz4 accepts u8 * for both compression and decompression:

lz4_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
unsigned int slen, u8 *dst, unsigned int *dlen)

lz4_decompress_crypto(struct crypto_tfm *tfm, const u8 *src,
unsigned int slen, u8 *dst, unsigned int *dlen)


Internally LZ4 may cast unsigned char* to signed char*, the same way you
already do with compression:

int lz4_compress(const unsigned char *src, size_t src_len,
unsigned char *dst, size_t *dst_len, void *wrkmem)

calls:
lz4_compressctx(void *ctx,
const char *source, char *dest,
int isize, int maxoutputsize)



At the moment API is a bit misaligned: unsiged char* for compression and signed char* for
decompression.


My 'real word' use case is, suppose:

struct foo {
[..]
int (*compress)(const unsigned char *src, size_t src_len,
unsigned char *dst, size_t *dst_len, void *wrkmem);
int (*decompress)(const unsigned char *src, size_t src_len,
unsigned char *dst, size_t *dst_len);
};


and (for example) module also provides sysfs attribute, so user can switch select
LZO or LZ4 compressions depending of his needs:

->compress = lzo1x_1_compress;
->decompress = lzo1x_decompress_safe;

to
->compress = lz4_compress;
->decompress = lz4_decompress_unknownoutputsize;


the last one produces unneccessary compilation warning.


-ss

> 2013/7/12 Sergey Senozhatsky <[2][email protected]>
>
> LZ4 compression and decompression functions require different
> in signedness input/output parameters: unsigned char for
> compression and signed char for decompression.
>
> Change decompression API to require unsigned char.
>
> Signed-off-by: Sergey Senozhatsky <[3][email protected]>
>
> ---
>
> �include/linux/lz4.h � � �| 8 ++++----
> �lib/lz4/lz4_decompress.c | 8 ++++----
> �2 files changed, 8 insertions(+), 8 deletions(-)
>
> diff --git a/include/linux/lz4.h b/include/linux/lz4.h
> index d21c13f..c13f0bc 100644
> --- a/include/linux/lz4.h
> +++ b/include/linux/lz4.h
> @@ -67,8 +67,8 @@ int lz4hc_compress(const unsigned char *src, size_t
> src_len,
> � * � � note : �Destination buffer must be already allocated.
> � * � � � � � � slightly faster than lz4_decompress_unknownoutputsize()
> � */
> -int lz4_decompress(const char *src, size_t *src_len, char *dest,
> - � � � � � � � size_t actual_dest_len);
> +int lz4_decompress(unsigned const char *src, size_t *src_len,
> + � � � � � � � unsigned char *dest, size_t actual_dest_len);
>
> �/*
> � * lz4_decompress_unknownoutputsize()
> @@ -82,6 +82,6 @@ int lz4_decompress(const char *src, size_t *src_len,
> char *dest,
> � * � � � � � � � Error if return (< 0)
> � * � � note : �Destination buffer must be already allocated.
> � */
> -int lz4_decompress_unknownoutputsize(const char *src, size_t src_len,
> - � � � � � � � char *dest, size_t *dest_len);
> +int lz4_decompress_unknownoutputsize(unsigned const char *src, size_t
> src_len,
> + � � � � � � � unsigned char *dest, size_t *dest_len);
> �#endif
> diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c
> index d3414ea..7ceda1f 100644
> --- a/lib/lz4/lz4_decompress.c
> +++ b/lib/lz4/lz4_decompress.c
> @@ -283,8 +283,8 @@ _output_error:
> � � � � return (int) (-(((char *) ip) - source));
> �}
>
> -int lz4_decompress(const char *src, size_t *src_len, char *dest,
> - � � � � � � � size_t actual_dest_len)
> +int lz4_decompress(unsigned const char *src, size_t *src_len,
> + � � � � � � � unsigned char *dest, size_t actual_dest_len)
> �{
> � � � � int ret = -1;
> � � � � int input_len = 0;
> @@ -302,8 +302,8 @@ exit_0:
> �EXPORT_SYMBOL_GPL(lz4_decompress);
> �#endif
>
> -int lz4_decompress_unknownoutputsize(const char *src, size_t src_len,
> - � � � � � � � char *dest, size_t *dest_len)
> +int lz4_decompress_unknownoutputsize(unsigned const char *src, size_t
> src_len,
> + � � � � � � � unsigned char *dest, size_t *dest_len)
> �{
> � � � � int ret = -1;
> � � � � int out_len = 0;
>
> References
>
> Visible links
> 1. https://code.google.com/p/lz4/
> 2. mailto:[email protected]
> 3. mailto:[email protected]

2013-07-18 21:18:47

by Sergey Senozhatsky

[permalink] [raw]
Subject: Re: [PATCH] LZ4: compression/decompression signedness mismatch

Hello,

On (07/12/13 12:48), Sergey Senozhatsky wrote:
> On (07/12/13 11:28), Yann Collet wrote:
> > The reference implementation, hosted at :�
> > [1]https://code.google.com/p/lz4/
> > only proposes char* (signed) types as part of the interface contract.
> > I would recommend to keep it that way, to remain consistent.
> > Regards
>
> Crypto lz4 accepts u8 * for both compression and decompression:
>
> lz4_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
> unsigned int slen, u8 *dst, unsigned int *dlen)
>
> lz4_decompress_crypto(struct crypto_tfm *tfm, const u8 *src,
> unsigned int slen, u8 *dst, unsigned int *dlen)
>
>
> Internally LZ4 may cast unsigned char* to signed char*, the same way you
> already do with compression:
>
> int lz4_compress(const unsigned char *src, size_t src_len,
> unsigned char *dst, size_t *dst_len, void *wrkmem)
>
> calls:
> lz4_compressctx(void *ctx,
> const char *source, char *dest,
> int isize, int maxoutputsize)
>


+ lib/decompress_unlz4.c
STATIC int INIT decompress(unsigned char *buf, int in_len,
int(*fill)(void*, unsigned int),
int(*flush)(void*, unsigned int),
unsigned char *output,
int *posp,
void(*error)(char *x)

>
> At the moment API is a bit misaligned: unsiged char* for compression and signed char* for
> decompression.
>
>
> My 'real word' use case is, suppose:
>
> struct foo {
> [..]
> int (*compress)(const unsigned char *src, size_t src_len,
> unsigned char *dst, size_t *dst_len, void *wrkmem);
> int (*decompress)(const unsigned char *src, size_t src_len,
> unsigned char *dst, size_t *dst_len);
> };
>
>
> and (for example) module also provides sysfs attribute, so user can switch select
> LZO or LZ4 compressions depending of his needs:
>
> ->compress = lzo1x_1_compress;
> ->decompress = lzo1x_decompress_safe;
>
> to
> ->compress = lz4_compress;
> ->decompress = lz4_decompress_unknownoutputsize;
>
>
> the last one produces unneccessary compilation warning.
>

did you guys have a chance to review the patch? it does not change
implementation/internals, just decompression exported symbols.


-ss

2013-07-18 21:29:59

by Geert Uytterhoeven

[permalink] [raw]
Subject: Re: [PATCH] LZ4: compression/decompression signedness mismatch

On Thu, Jul 18, 2013 at 11:18 PM, Sergey Senozhatsky
<[email protected]> wrote:
> On (07/12/13 12:48), Sergey Senozhatsky wrote:
>> On (07/12/13 11:28), Yann Collet wrote:
>> > The reference implementation, hosted at :�
>> > [1]https://code.google.com/p/lz4/
>> > only proposes char* (signed) types as part of the interface contract.
>> > I would recommend to keep it that way, to remain consistent.
>> > Regards
>>
>> Crypto lz4 accepts u8 * for both compression and decompression:
>>
>> lz4_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
>> unsigned int slen, u8 *dst, unsigned int *dlen)
>>
>> lz4_decompress_crypto(struct crypto_tfm *tfm, const u8 *src,
>> unsigned int slen, u8 *dst, unsigned int *dlen)
>>
>>
>> Internally LZ4 may cast unsigned char* to signed char*, the same way you
>> already do with compression:
>>
>> int lz4_compress(const unsigned char *src, size_t src_len,
>> unsigned char *dst, size_t *dst_len, void *wrkmem)
>>
>> calls:
>> lz4_compressctx(void *ctx,
>> const char *source, char *dest,
>> int isize, int maxoutputsize)
>>
>
>
> + lib/decompress_unlz4.c
> STATIC int INIT decompress(unsigned char *buf, int in_len,
> int(*fill)(void*, unsigned int),
> int(*flush)(void*, unsigned int),
> unsigned char *output,
> int *posp,
> void(*error)(char *x)
>
>>
>> At the moment API is a bit misaligned: unsiged char* for compression and signed char* for
>> decompression.
>>
>>
>> My 'real word' use case is, suppose:
>>
>> struct foo {
>> [..]
>> int (*compress)(const unsigned char *src, size_t src_len,
>> unsigned char *dst, size_t *dst_len, void *wrkmem);
>> int (*decompress)(const unsigned char *src, size_t src_len,
>> unsigned char *dst, size_t *dst_len);
>> };
>>
>>
>> and (for example) module also provides sysfs attribute, so user can switch select
>> LZO or LZ4 compressions depending of his needs:
>>
>> ->compress = lzo1x_1_compress;
>> ->decompress = lzo1x_decompress_safe;
>>
>> to
>> ->compress = lz4_compress;
>> ->decompress = lz4_decompress_unknownoutputsize;
>>
>>
>> the last one produces unneccessary compilation warning.
>>
>
> did you guys have a chance to review the patch? it does not change
> implementation/internals, just decompression exported symbols.

IMHO, all these memory buffers should be of type "(const) void *", cfr.
e.g. read(2) and memcpy(3).

This avoids casts in the callers.

Gr{oetje,eeting}s,

Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- [email protected]

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds

2013-07-19 08:27:10

by Kyungsik Lee

[permalink] [raw]
Subject: Re: [PATCH] LZ4: compression/decompression signedness mismatch

On Thu, Jul 18, 2013 at 11:29:57PM +0200, Geert Uytterhoeven wrote:
> On Thu, Jul 18, 2013 at 11:18 PM, Sergey Senozhatsky
> <[email protected]> wrote:
> > On (07/12/13 12:48), Sergey Senozhatsky wrote:
> >> On (07/12/13 11:28), Yann Collet wrote:
> >> > The reference implementation, hosted at :�
> >> > [1]https://code.google.com/p/lz4/
> >> > only proposes char* (signed) types as part of the interface contract.
> >> > I would recommend to keep it that way, to remain consistent.
> >> > Regards
> >>
> >> Crypto lz4 accepts u8 * for both compression and decompression:
> >>
> >> lz4_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
> >> unsigned int slen, u8 *dst, unsigned int *dlen)
> >>
> >> lz4_decompress_crypto(struct crypto_tfm *tfm, const u8 *src,
> >> unsigned int slen, u8 *dst, unsigned int *dlen)
> >>
> >>
> >> Internally LZ4 may cast unsigned char* to signed char*, the same way you
> >> already do with compression:
> >>
> >> int lz4_compress(const unsigned char *src, size_t src_len,
> >> unsigned char *dst, size_t *dst_len, void *wrkmem)
> >>
> >> calls:
> >> lz4_compressctx(void *ctx,
> >> const char *source, char *dest,
> >> int isize, int maxoutputsize)
> >>
> >
> >
> > + lib/decompress_unlz4.c
> > STATIC int INIT decompress(unsigned char *buf, int in_len,
> > int(*fill)(void*, unsigned int),
> > int(*flush)(void*, unsigned int),
> > unsigned char *output,
> > int *posp,
> > void(*error)(char *x)
> >
> >>
> >> At the moment API is a bit misaligned: unsiged char* for compression and signed char* for
> >> decompression.
> >>
> >>
> >> My 'real word' use case is, suppose:
> >>
> >> struct foo {
> >> [..]
> >> int (*compress)(const unsigned char *src, size_t src_len,
> >> unsigned char *dst, size_t *dst_len, void *wrkmem);
> >> int (*decompress)(const unsigned char *src, size_t src_len,
> >> unsigned char *dst, size_t *dst_len);
> >> };
> >>
> >>
> >> and (for example) module also provides sysfs attribute, so user can switch select
> >> LZO or LZ4 compressions depending of his needs:
> >>
> >> ->compress = lzo1x_1_compress;
> >> ->decompress = lzo1x_decompress_safe;
> >>
> >> to
> >> ->compress = lz4_compress;
> >> ->decompress = lz4_decompress_unknownoutputsize;
> >>
> >>
> >> the last one produces unneccessary compilation warning.
> >>
> >
> > did you guys have a chance to review the patch? it does not change
> > implementation/internals, just decompression exported symbols.
>
> IMHO, all these memory buffers should be of type "(const) void *", cfr.
> e.g. read(2) and memcpy(3).
>
> This avoids casts in the callers.

How about using "const unsigned char *" for the exported symbols in the
patch?
It is OK unless the patch changes the internal implementation.

Thanks,
Kyungsik

2013-07-19 09:05:27

by Sergey Senozhatsky

[permalink] [raw]
Subject: Re: [PATCH] LZ4: compression/decompression signedness mismatch

On (07/19/13 17:27), Kyungsik Lee wrote:
> > >> On (07/12/13 11:28), Yann Collet wrote:
> > >> > The reference implementation, hosted at :�
> > >> > [1]https://code.google.com/p/lz4/
> > >> > only proposes char* (signed) types as part of the interface contract.
> > >> > I would recommend to keep it that way, to remain consistent.
> > >> > Regards
> > >>
> > >> Crypto lz4 accepts u8 * for both compression and decompression:
> > >>
> > >> lz4_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
> > >> unsigned int slen, u8 *dst, unsigned int *dlen)
> > >>
> > >> lz4_decompress_crypto(struct crypto_tfm *tfm, const u8 *src,
> > >> unsigned int slen, u8 *dst, unsigned int *dlen)
> > >>
> > >>
> > >> Internally LZ4 may cast unsigned char* to signed char*, the same way you
> > >> already do with compression:
> > >>
> > >> int lz4_compress(const unsigned char *src, size_t src_len,
> > >> unsigned char *dst, size_t *dst_len, void *wrkmem)
> > >>
> > >> calls:
> > >> lz4_compressctx(void *ctx,
> > >> const char *source, char *dest,
> > >> int isize, int maxoutputsize)
> > >>
> > >
> > >
> > > + lib/decompress_unlz4.c
> > > STATIC int INIT decompress(unsigned char *buf, int in_len,
> > > int(*fill)(void*, unsigned int),
> > > int(*flush)(void*, unsigned int),
> > > unsigned char *output,
> > > int *posp,
> > > void(*error)(char *x)
> > >
> > >>
> > >> At the moment API is a bit misaligned: unsiged char* for compression and signed char* for
> > >> decompression.
> > >>
> > >>
> > >> My 'real word' use case is, suppose:
> > >>
> > >> struct foo {
> > >> [..]
> > >> int (*compress)(const unsigned char *src, size_t src_len,
> > >> unsigned char *dst, size_t *dst_len, void *wrkmem);
> > >> int (*decompress)(const unsigned char *src, size_t src_len,
> > >> unsigned char *dst, size_t *dst_len);
> > >> };
> > >>
> > >>
> > >> and (for example) module also provides sysfs attribute, so user can switch select
> > >> LZO or LZ4 compressions depending of his needs:
> > >>
> > >> ->compress = lzo1x_1_compress;
> > >> ->decompress = lzo1x_decompress_safe;
> > >>
> > >> to
> > >> ->compress = lz4_compress;
> > >> ->decompress = lz4_decompress_unknownoutputsize;
> > >>
> > >>
> > >> the last one produces unneccessary compilation warning.
> > >>
> > >
> > > did you guys have a chance to review the patch? it does not change
> > > implementation/internals, just decompression exported symbols.
> >
> > IMHO, all these memory buffers should be of type "(const) void *", cfr.
> > e.g. read(2) and memcpy(3).
> >
> > This avoids casts in the callers.
>
> How about using "const unsigned char *" for the exported symbols in the
> patch?

will resend shortly.

> It is OK unless the patch changes the internal implementation.
>

it doesn't.

thanks,
-ss

2013-07-19 09:09:11

by Sergey Senozhatsky

[permalink] [raw]
Subject: [PATCH] LZ4: compression/decompression signedness mismatch (v2)

LZ4 compression and decompression functions require different
in signedness input/output parameters: unsigned char for
compression and signed char for decompression.

Change decompression API to require "(const) unsigned char *".

v2: minor coding style fix.

Signed-off-by: Sergey Senozhatsky <[email protected]>

---

include/linux/lz4.h | 8 ++++----
lib/lz4/lz4_decompress.c | 8 ++++----
2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/include/linux/lz4.h b/include/linux/lz4.h
index d21c13f..4356686 100644
--- a/include/linux/lz4.h
+++ b/include/linux/lz4.h
@@ -67,8 +67,8 @@ int lz4hc_compress(const unsigned char *src, size_t src_len,
* note : Destination buffer must be already allocated.
* slightly faster than lz4_decompress_unknownoutputsize()
*/
-int lz4_decompress(const char *src, size_t *src_len, char *dest,
- size_t actual_dest_len);
+int lz4_decompress(const unsigned char *src, size_t *src_len,
+ unsigned char *dest, size_t actual_dest_len);

/*
* lz4_decompress_unknownoutputsize()
@@ -82,6 +82,6 @@ int lz4_decompress(const char *src, size_t *src_len, char *dest,
* Error if return (< 0)
* note : Destination buffer must be already allocated.
*/
-int lz4_decompress_unknownoutputsize(const char *src, size_t src_len,
- char *dest, size_t *dest_len);
+int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len,
+ unsigned char *dest, size_t *dest_len);
#endif
diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c
index d3414ea..4c069b2 100644
--- a/lib/lz4/lz4_decompress.c
+++ b/lib/lz4/lz4_decompress.c
@@ -283,8 +283,8 @@ _output_error:
return (int) (-(((char *) ip) - source));
}

-int lz4_decompress(const char *src, size_t *src_len, char *dest,
- size_t actual_dest_len)
+int lz4_decompress(const unsigned char *src, size_t *src_len,
+ unsigned char *dest, size_t actual_dest_len)
{
int ret = -1;
int input_len = 0;
@@ -302,8 +302,8 @@ exit_0:
EXPORT_SYMBOL_GPL(lz4_decompress);
#endif

-int lz4_decompress_unknownoutputsize(const char *src, size_t src_len,
- char *dest, size_t *dest_len)
+int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len,
+ unsigned char *dest, size_t *dest_len)
{
int ret = -1;
int out_len = 0;