2002-02-19 15:41:22

by Jakob Kemi

[permalink] [raw]
Subject: [PATCH] hex <-> int conversion routines.

Hi,

When grepping through the kernel I noticed that there exists at least 27
(probably more with non-obvious names) different implementations of hex to
int conversion functions. 2/3 of them in arch and the rest in drivers and fs.
All implementations were variations of this function:

int hex_nibble (char x)
{
if (x >= '0' && x <= '9') return x - '0';
if (x >= 'a' && x <= 'f') return x - 'a' + 10;
if (x >= 'A' && x <= 'F') return x - 'A' + 10;
return -1;
}

My orignial plan was to replace some of them with this version, which (at
least on x86) compiles to roughly half the size and runs twice as fast.

int hex_nibble(char x)
{
unsigned int y; /* Unsigned for correct wrapping */

if ((y = x - '0') <= '9'-'0') return y;
if ((y = x - 'A') <= 'F'-'A') return y+10;
if ((y = x - 'a') <= 'f'-'a') return y+10;
return -1;
}

But due to the many duplicated implementations I think it would be good to
have it in a library function.

I also added three other hex-functions that can replace a lot of duplicated code.

int hexint_nibble (char x); // hex digit to int.
int hexint_byte (const char *src); // hex digit-pair to int.
char inthex_nibble (int x); // int to hex digit.
void inthex_byte (int x, char* dest); // int to hex digit pair.

I see no point in modifying any other code to use these functions until they
are approved. If it get approved, however I can feed patches to the relevant
maintainers.

Regards,
Jakob Kemi

diff -urN -X dontdiff linux-2.5.5-pre1-vanilla/include/linux/hexint.h linux-2.5.5-pre1/include/linux/hexint.h
--- linux-2.5.5-pre1-vanilla/include/linux/hexint.h Thu Jan 1 01:00:00 1970
+++ linux-2.5.5-pre1/include/linux/hexint.h Tue Feb 19 16:24:26 2002
@@ -0,0 +1,85 @@
+/*
+ * include/linux/hexint.h - hex<->int conversions.
+ *
+ * Copyleft (C) 2002, Jakob Kemi <[email protected]>
+ */
+
+#ifndef _LINUX_HEXINT_H
+#define _LINUX_HEXINT_H
+
+#ifdef __KERNEL__
+
+/**
+ * hexint_byte - Convert a hex digit pair to an int.
+ * @src: Pointer to source data.
+ *
+ * Return:
+ * converted value (0-255) on success.
+ * -1 on failure.
+ */
+extern int hexint_byte(const char *src);
+
+/**
+ * _hexint_byte - Same as hexint_byte but inlined.
+ */
+static __inline__ int _hexint_byte(const char *src)
+{
+ unsigned int y; /* unsigned for correct wrapping. */
+ int h;
+
+ /* high part. */
+ if ((y = src[0] - '0') <= '9'-'0') h = y;
+ else if ((y = src[0] - 'a') <= 'f'-'a') h = y+10;
+ else if ((y = src[0] - 'A') <= 'F'-'A') h = y+10;
+ else return -1;
+ h = h << 4;
+
+ /* low part. */
+ if ((y = src[1] - '0') <= '9'-'0') return h | y;
+ if ((y = src[1] - 'a') <= 'f'-'a') return h | (y+10);
+ if ((y = src[1] - 'A') <= 'F'-'A') return h | (y+10);
+ return -1;
+}
+
+/**
+ * hexint_nibble - Convert a hex digit to an int.
+ * @x: digit to convert.
+ *
+ * Return:
+ * converted value (0-15) on success.
+ * -1 on failure.
+ */
+static __inline__ int hexint_nibble(char x)
+{
+ unsigned int y; /* unsigned for correct wrapping */
+
+ if ((y = x - '0') <= '9'-'0') return y;
+ if ((y = x - 'a') <= 'f'-'a') return y+10;
+ if ((y = x - 'A') <= 'F'-'A') return y+10;
+ return -1;
+}
+
+/**
+ * inthex_nibble - Convert an int to a hex digit.
+ */
+static __inline__ char inthex_nibble(int x)
+{
+ const char* digits = "0123456789abcdef";
+
+ return digits[x & 0x0f];
+}
+
+/**
+ * inthex_byte - Convert an int to a hex digit pair.
+ */
+static __inline__ void inthex_byte(int x, char* dest)
+{
+ const char* digits = "0123456789abcdef";
+
+ dest[0] = digits[(x & 0xf0) >> 4];
+ dest[1] = digits[x & 0x0f];
+}
+
+#endif /* __KERNEL__ */
+
+#endif
diff -urN -X dontdiff linux-2.5.5-pre1-vanilla/lib/Makefile linux-2.5.5-pre1/lib/Makefile
--- linux-2.5.5-pre1-vanilla/lib/Makefile Sun Feb 3 14:04:47 2002
+++ linux-2.5.5-pre1/lib/Makefile Tue Feb 19 15:55:37 2002
@@ -10,7 +10,7 @@

export-objs := cmdline.o dec_and_lock.o rwsem-spinlock.o rwsem.o crc32.o

-obj-y := errno.o ctype.o string.o vsprintf.o brlock.o cmdline.o bust_spinlocks.o rbtree.o
+obj-y := errno.o ctype.o string.o vsprintf.o brlock.o cmdline.o bust_spinlocks.o rbtree.o hexint.o

obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
diff -urN -X dontdiff linux-2.5.5-pre1-vanilla/lib/hexint.c linux-2.5.5-pre1/lib/hexint.c
--- linux-2.5.5-pre1-vanilla/lib/hexint.c Thu Jan 1 01:00:00 1970
+++ linux-2.5.5-pre1/lib/hexint.c Tue Feb 19 16:26:00 2002
@@ -0,0 +1,33 @@
+/*
+ * linux/lib/hexint.c - hex<->int conversions.
+ *
+ * Copyleft (C) 2002, Jakob Kemi <[email protected]>
+ */
+
+
+/**
+ * hexint_byte - Convert an hex digit pair to an int.
+ * @src: Pointer to source data.
+ *
+ * Return:
+ * converted value (0-255) on success.
+ * -1 on failure.
+ */
+int hexint_byte(const char *src)
+{
+ unsigned int y; /* unsigned for correct wrapping. */
+ int h;
+
+ /* high part. */
+ if ((y = src[0] - '0') <= '9'-'0') h = y;
+ else if ((y = src[0] - 'a') <= 'f'-'a') h = y+10;
+ else if ((y = src[0] - 'A') <= 'F'-'A') h = y+10;
+ else return -1;
+ h = h << 4;
+
+ /* low part. */
+ if ((y = src[1] - '0') <= '9'-'0') return h | y;
+ if ((y = src[1] - 'a') <= 'f'-'a') return h | (y+10);
+ if ((y = src[1] - 'A') <= 'F'-'A') return h | (y+10);
+ return -1;
+}


2002-02-19 15:53:36

by Martin Dalecki

[permalink] [raw]
Subject: Re: [PATCH] hex <-> int conversion routines.

Jakob Kemi wrote:
> Hi,
>
> When grepping through the kernel I noticed that there exists at least 27
> (probably more with non-obvious names) different implementations of hex to
> int conversion functions. 2/3 of them in arch and the rest in drivers and fs.
> All implementations were variations of this function:

Fine...

> +static __inline__ int _hexint_byte(const char *src)
> +{

...

> +}

Not worth inlining. char conversion are seldomly time critical.


> +static __inline__ int hexint_nibble(char x)

Same applies here.

> +static __inline__ char inthex_nibble(int x)
> +{
> + const char* digits = "0123456789abcdef";
> +
> + return digits[x & 0x0f];
> +}

perhaps better do static const char *digits.

> +static __inline__ void inthex_byte(int x, char* dest)
> +{
> + const char* digits = "0123456789abcdef";
> +
> + dest[0] = digits[(x & 0xf0) >> 4];
> + dest[1] = digits[x & 0x0f];
> +}

perhaps better do static const char *digits.

Regards.

2002-02-19 16:49:37

by Richard B. Johnson

[permalink] [raw]
Subject: Re: [PATCH] hex <-> int conversion routines.


You might want this. Code generation is 0x34 bytes for Intel with
whatever compiler I'm using. It's generic. You could do the conversion
with no compares on intel, in 18 bytes using assembly, but that would
not be generic.


/*
* Generic routine which returns the value of an arbitrary string
* of hex digits. It handles 'A' thru 'F' as well as 'a' thru 'f'.
* Courtesy of [email protected]
*/
int hex2bin(const char *hex)
{
int i, j;
j = 0;
while(*hex) {
j <<= 4;
if((i = (int) *hex++ - '0') > 9)
i -= 7;
j |= i & 95;
}
return j;
}


Cheers,
Dick Johnson

Penguin : Linux version 2.4.1 on an i686 machine (797.90 BogoMips).

I was going to compile a list of innovations that could be
attributed to Microsoft. Once I realized that Ctrl-Alt-Del
was handled in the BIOS, I found that there aren't any.


2002-02-19 18:04:30

by Linus Torvalds

[permalink] [raw]
Subject: Re: [PATCH] hex <-> int conversion routines.



On Tue, 19 Feb 2002, Jakob Kemi wrote:
>
> I also added three other hex-functions that can replace a lot of duplicated code.
>
> int hexint_nibble (char x); // hex digit to int.
> int hexint_byte (const char *src); // hex digit-pair to int.
> char inthex_nibble (int x); // int to hex digit.
> void inthex_byte (int x, char* dest); // int to hex digit pair.

Is there any reason to do all of this?

I suspect 99% of all users can (and probably should) be replaced with
"sscanf()" instead. Which does a lot more, of course, and is not the
fastest thing out there due to that, but anybody who does hex->int
conversion inside some critical loop is just crazy.

Linus

2002-02-19 18:51:31

by Jakob Kemi

[permalink] [raw]
Subject: Re: [PATCH] hex <-> int conversion routines.

> > +static __inline__ int _hexint_byte(const char *src)
> > +{
>
> ...
>
> > +}
>
> Not worth inlining. char conversion are seldomly time critical.
ok, I removed the inline version.

> > +static __inline__ int hexint_nibble(char x)
>
> Same applies here.
I added a tiny function, __hexint_nibble() which doesn't do error checking.
it's very small and can be used inline if needed.

> > +static __inline__ char inthex_nibble(int x)
> > +{
> > + const char* digits = "0123456789abcdef";
> > +
> > + return digits[x & 0x0f];
> > +}
>
> perhaps better do static const char *digits.
GCC doesn't copy const strings, as opposed to other const arrays.
So it should be fine as it is. GCC also reuse duplicated strings.

Regards,
Jakob Kemi

2002-02-19 19:03:31

by Bernd Petrovitsch

[permalink] [raw]
Subject: Re: [PATCH] hex <-> int conversion routines.

Jakob Kemi <[email protected]> wrote:
>> > +static __inline__ char inthex_nibble(int x)
>> > +{
>> > + const char* digits = "0123456789abcdef";
>> > +
>> > + return digits[x & 0x0f];
>> > +}
>>
>> perhaps better do static const char *digits.
>GCC doesn't copy const strings, as opposed to other const arrays.
>So it should be fine as it is. GCC also reuse duplicated strings.

You could also do
return "0123456789abcdef"[x & 0x0f];
though some will find it bad, ugly, wrong
or make a file-global
static const char digits[] = "0123456789abcdef";

Bernd
--
Bernd Petrovitsch Email : [email protected]
g.a.m.s gmbh Fax : +43 1 205255-900
Prinz-Eugen-Stra?e 8 A-1040 Vienna/Austria/Europe
LUGA : http://www.luga.at


2002-02-19 19:04:51

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [PATCH] hex <-> int conversion routines.

Followup to: <02021919493204.00447@jakob>
By author: Jakob Kemi <[email protected]>
In newsgroup: linux.dev.kernel
>
> GCC doesn't copy const strings, as opposed to other const arrays.
> So it should be fine as it is. GCC also reuse duplicated strings.
>

It's still manifest once per file, I believe.

I would suggest making the array an extern instead.

-hpa
--
<[email protected]> at work, <[email protected]> in private!
"Unix gives you enough rope to shoot yourself in the foot."
http://www.zytor.com/~hpa/puzzle.txt <[email protected]>

2002-02-19 20:28:15

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [PATCH] hex <-> int conversion routines.

Followup to: <[email protected]>
By author: Bernd Petrovitsch <[email protected]>
In newsgroup: linux.dev.kernel
>
> Jakob Kemi <[email protected]> wrote:
> >> > +static __inline__ char inthex_nibble(int x)
> >> > +{
> >> > + const char* digits = "0123456789abcdef";
> >> > +
> >> > + return digits[x & 0x0f];
> >> > +}
> >>
> >> perhaps better do static const char *digits.
> >GCC doesn't copy const strings, as opposed to other const arrays.
> >So it should be fine as it is. GCC also reuse duplicated strings.
>
> You could also do
> return "0123456789abcdef"[x & 0x0f];
> though some will find it bad, ugly, wrong
> or make a file-global
> static const char digits[] = "0123456789abcdef";
>

Better yet...

extern const char inthex_digits[];
static __inline__ char inthex_nybble(int x)
{
return inthex_digits[x & 15];
}

(Nibble = small amount of food; nybble = 4 bits. It's a pun on
bite/byte.)

-hpa

--
<[email protected]> at work, <[email protected]> in private!
"Unix gives you enough rope to shoot yourself in the foot."
http://www.zytor.com/~hpa/puzzle.txt <[email protected]>

2002-02-19 20:32:17

by George Anzinger

[permalink] [raw]
Subject: Re: [PATCH] hex <-> int conversion routines.

Linus Torvalds wrote:
>
> On Tue, 19 Feb 2002, Jakob Kemi wrote:
> >
> > I also added three other hex-functions that can replace a lot of duplicated code.
> >
> > int hexint_nibble (char x); // hex digit to int.
> > int hexint_byte (const char *src); // hex digit-pair to int.
> > char inthex_nibble (int x); // int to hex digit.
> > void inthex_byte (int x, char* dest); // int to hex digit pair.
>
> Is there any reason to do all of this?
>
> I suspect 99% of all users can (and probably should) be replaced with
> "sscanf()" instead. Which does a lot more, of course, and is not the
> fastest thing out there due to that, but anybody who does hex->int
> conversion inside some critical loop is just crazy.
>
> Linus
>

So maybe a wrapper or two:

#define hexint_nibble(x) {int y; sscanf(&x,"%1xl",&y); y}
#define hexint_byte(x) {int y; sscanf(x,"%2xl",&y); y}

#define inthex_nibble(x) {char y[2]; sprintf(&y,"%1x",x); y[1]}
#define inthex_byte(x,dest) sprintf(dest,"%02x",x)

Untested, of course.
--
George [email protected]
High-res-timers: http://sourceforge.net/projects/high-res-timers/
Real time sched: http://sourceforge.net/projects/rtsched/

2002-02-20 10:26:14

by Martin Dalecki

[permalink] [raw]
Subject: Re: [PATCH] hex <-> int conversion routines.

>>>+static __inline__ char inthex_nibble(int x)
>>>+{
>>>+ const char* digits = "0123456789abcdef";
>>>+
>>>+ return digits[x & 0x0f];
>>>+}
>>>
>>perhaps better do static const char *digits.
>>
> GCC doesn't copy const strings, as opposed to other const arrays.
Currently.

> So it should be fine as it is. GCC also reuse duplicated strings.

2002-02-23 02:25:07

by Ian Molton

[permalink] [raw]
Subject: Re: [PATCH] hex <-> int conversion routines.

On a sunny 19 Feb 2002 12:27:28 -0800 H. Peter Anvin gathered a sheaf of
electrons and etched in their motions the following immortal words:

> extern const char inthex_digits[];
> static __inline__ char inthex_nybble(int x)
> {
> return inthex_digits[x & 15];
> }

What about the following? It maintains the exact behaviour of the original,
but can be smaller if it doesnt have to deal with >15 in the input (then it
wont need the x &= 0x0f).

It would be 3 cycles for <10 and 4 for >=10 on ARM. I'd imagine this would
be a little quicker than a load from memory as in the above example.

plus it doesnt waste 16 bytes of RAM in a lookup table.

static inline char inthex_nybble(int x){
x &= 0x0f;
return x<10?x^48:x+87;
}

Just a thought...