2008-03-13 01:02:26

by Roman Zippel

[permalink] [raw]
Subject: [PATCH 1/4] introduce explicit signed/unsigned 64bit divide

The current do_div doesn't explicitly say that it's unsigned and the
signed counterpart is missing, which is e.g. needed when dealing with
time values.
This introduces 64bit signed/unsigned divide functions which also
attempts to cleanup the somewhat awkward calling API, which often
requires the use of temporary variables for the dividend. To avoid the
need for temporary variables everywhere for the remainder, each divide
variant also provides a version which doesn't return the remainder.

Each architecture can now provide optimized versions of these function,
otherwise generic fallback implementations will be used.

As an example I provided an alternative for the current x86 divide,
which avoids the asm casts and using an union allows gcc to generate
better code. It also avoids the upper divde in a few more cases, where
the result is known (i.e. upper quotient is zero).

Signed-off-by: Roman Zippel <[email protected]>

---
include/asm-x86/div64.h | 20 +++++++++++++
include/linux/math64.h | 72 ++++++++++++++++++++++++++++++++++++++++++++++++
lib/div64.c | 23 ++++++++++++++-
3 files changed, 113 insertions(+), 2 deletions(-)

Index: linux-2.6/include/asm-x86/div64.h
===================================================================
--- linux-2.6.orig/include/asm-x86/div64.h 2008-03-11 17:15:21.000000000 +0100
+++ linux-2.6/include/asm-x86/div64.h 2008-03-12 21:21:26.000000000 +0100
@@ -50,6 +50,26 @@ div_ll_X_l_rem(long long divs, long div,

}

+static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder)
+{
+ union {
+ u64 v64;
+ u32 v32[2];
+ } d = { dividend };
+ u32 upper;
+
+ upper = d.v32[1];
+ d.v32[1] = 0;
+ if (upper >= divisor) {
+ d.v32[1] = upper / divisor;
+ upper %= divisor;
+ }
+ asm ("divl %2" : "=a" (d.v32[0]), "=d" (*remainder) :
+ "rm" (divisor), "0" (d.v32[0]), "1" (upper));
+ return d.v64;
+}
+#define div_u64_rem div_u64_rem
+
extern uint64_t div64_64(uint64_t dividend, uint64_t divisor);

#else
Index: linux-2.6/lib/div64.c
===================================================================
--- linux-2.6.orig/lib/div64.c 2008-03-11 17:15:21.000000000 +0100
+++ linux-2.6/lib/div64.c 2008-03-12 21:21:26.000000000 +0100
@@ -16,9 +16,8 @@
* assembly versions such as arch/ppc/lib/div64.S and arch/sh/lib/div64.S.
*/

-#include <linux/types.h>
#include <linux/module.h>
-#include <asm/div64.h>
+#include <linux/math64.h>

/* Not needed on 64bit architectures */
#if BITS_PER_LONG == 32
@@ -58,6 +57,26 @@ uint32_t __attribute__((weak)) __div64_3

EXPORT_SYMBOL(__div64_32);

+#ifndef div_s64_rem
+s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder)
+{
+ u64 quotient;
+
+ if (dividend < 0) {
+ quotient = div_u64_rem(-dividend, abs(divisor), (u32 *)remainder);
+ *remainder = -*remainder;
+ if (divisor > 0)
+ quotient = -quotient;
+ } else {
+ quotient = div_u64_rem(dividend, abs(divisor), (u32 *)remainder);
+ if (divisor < 0)
+ quotient = -quotient;
+ }
+ return quotient;
+}
+EXPORT_SYMBOL(div_s64_rem);
+#endif
+
/* 64bit divisor, dividend and result. dynamic precision */
uint64_t div64_64(uint64_t dividend, uint64_t divisor)
{
Index: linux-2.6/include/linux/math64.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6/include/linux/math64.h 2008-03-12 21:21:26.000000000 +0100
@@ -0,0 +1,72 @@
+#ifndef _LINUX_MATH64_H
+#define _LINUX_MATH64_H
+
+#include <linux/types.h>
+#include <asm/div64.h>
+
+#if BITS_PER_LONG == 64
+
+/**
+ * div_u64_rem - unsigned 64bit divide with 32bit divisor with remainder
+ *
+ * This is commonly provided by 32bit archs to provide an optimized 64bit
+ * divide.
+ */
+static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder)
+{
+ *remainder = dividend % divisor;
+ return dividend / divisor;
+}
+
+/**
+ * div_s64_rem - signed 64bit divide with 32bit divisor with remainder
+ */
+static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder)
+{
+ *remainder = dividend % divisor;
+ return dividend / divisor;
+}
+
+#elif BITS_PER_LONG == 32
+
+#ifndef div_u64_rem
+static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder)
+{
+ *remainder = do_div(dividend, divisor);
+ return dividend;
+}
+#endif
+
+#ifndef div_s64_rem
+extern s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder);
+#endif
+
+#endif /* BITS_PER_LONG */
+
+/**
+ * div_u64 - unsigned 64bit divide with 32bit divisor
+ *
+ * This is the most common 64bit divide and should be used if possible,
+ * as many 32bit archs can optimize this variant better than a full 64bit
+ * divide.
+ */
+#ifndef div_u64
+static inline u64 div_u64(u64 dividend, u32 divisor)
+{
+ u32 remainder;
+ return div_u64_rem(dividend, divisor, &remainder);
+}
+#endif
+
+/**
+ * div_s64 - signed 64bit divide with 32bit divisor
+ */
+#ifndef div_u64
+static inline s64 div_s64(s64 dividend, s32 divisor)
+{
+ s32 remainder;
+ return div_s64_rem(dividend, divisor, &remainder);
+}
+#endif
+
+#endif /* _LINUX_MATH64_H */

--


2008-03-13 10:15:15

by Geert Uytterhoeven

[permalink] [raw]
Subject: Re: [PATCH 1/4] introduce explicit signed/unsigned 64bit divide

On Thu, 13 Mar 2008, [email protected] wrote:
> --- /dev/null 1970-01-01 00:00:00.000000000 +0000
> +++ linux-2.6/include/linux/math64.h 2008-03-12 21:21:26.000000000 +0100
> @@ -0,0 +1,72 @@
> +#ifndef _LINUX_MATH64_H
> +#define _LINUX_MATH64_H
> +
> +#include <linux/types.h>
> +#include <asm/div64.h>
> +
> +#if BITS_PER_LONG == 64
> +
> +/**
> + * div_u64_rem - unsigned 64bit divide with 32bit divisor with remainder
> + *
> + * This is commonly provided by 32bit archs to provide an optimized 64bit
> + * divide.
> + */
> +static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder)
> +{
> + *remainder = dividend % divisor;
> + return dividend / divisor;
> +}
> +
> +/**
> + * div_s64_rem - signed 64bit divide with 32bit divisor with remainder
> + */
> +static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder)
^ ^ ^ ^ ^
s64 div_s64_rem s64 s32 s32
> +{
> + *remainder = dividend % divisor;
> + return dividend / divisor;
> +}
> +
> +#elif BITS_PER_LONG == 32
> +
> +#ifndef div_u64_rem
> +static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder)
> +{
> + *remainder = do_div(dividend, divisor);
> + return dividend;
> +}
> +#endif
> +
> +#ifndef div_s64_rem
> +extern s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder);
> +#endif

With kind regards,

Geert Uytterhoeven
Software Architect

Sony Network and Software Technology Center Europe
The Corporate Village · Da Vincilaan 7-D1 · B-1935 Zaventem · Belgium

Phone: +32 (0)2 700 8453
Fax: +32 (0)2 700 8622
E-mail: [email protected]
Internet: http://www.sony-europe.com/

Sony Network and Software Technology Center Europe
A division of Sony Service Centre (Europe) N.V.
Registered office: Technologielaan 7 · B-1840 Londerzeel · Belgium
VAT BE 0413.825.160 · RPR Brussels
Fortis Bank Zaventem · Swift GEBABEBB08A · IBAN BE39001382358619

2008-03-13 15:00:09

by Roman Zippel

[permalink] [raw]
Subject: Re: [PATCH 1/4] introduce explicit signed/unsigned 64bit divide

Hi,

On Thu, 13 Mar 2008, Geert Uytterhoeven wrote:

> > +static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder)
> ^ ^ ^ ^ ^
> s64 div_s64_rem s64 s32 s32

Thanks for noticing.
I'm quite sure I fixed this before, as I compiled this also for
64bit, so I'm not sure how it got back. Anyway, new patch below.

bye, Roman



[PATCH 1/4] introduce explicit signed/unsigned 64bit divide

The current do_div doesn't explicitly say that it's unsigned and the
signed counterpart is missing, which is e.g. needed when dealing with
time values.
This introduces 64bit signed/unsigned divide functions which also
attempts to cleanup the somewhat awkward calling API, which often
requires the use of temporary variables for the dividend. To avoid the
need for temporary variables everywhere for the remainder, each divide
variant also provides a version which doesn't return the remainder.

Each architecture can now provide optimized versions of these function,
otherwise generic fallback implementations will be used.

As an example I provided an alternative for the current x86 divide,
which avoids the asm casts and using an union allows gcc to generate
better code. It also avoids the upper divde in a few more cases, where
the result is known (i.e. upper quotient is zero).

Signed-off-by: Roman Zippel <[email protected]>

---
include/asm-x86/div64.h | 20 +++++++++++++
include/linux/math64.h | 72 ++++++++++++++++++++++++++++++++++++++++++++++++
lib/div64.c | 23 ++++++++++++++-
3 files changed, 113 insertions(+), 2 deletions(-)

Index: linux-2.6/include/asm-x86/div64.h
===================================================================
--- linux-2.6.orig/include/asm-x86/div64.h 2008-03-11 17:15:21.000000000 +0100
+++ linux-2.6/include/asm-x86/div64.h 2008-03-12 21:21:26.000000000 +0100
@@ -50,6 +50,26 @@ div_ll_X_l_rem(long long divs, long div,

}

+static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder)
+{
+ union {
+ u64 v64;
+ u32 v32[2];
+ } d = { dividend };
+ u32 upper;
+
+ upper = d.v32[1];
+ d.v32[1] = 0;
+ if (upper >= divisor) {
+ d.v32[1] = upper / divisor;
+ upper %= divisor;
+ }
+ asm ("divl %2" : "=a" (d.v32[0]), "=d" (*remainder) :
+ "rm" (divisor), "0" (d.v32[0]), "1" (upper));
+ return d.v64;
+}
+#define div_u64_rem div_u64_rem
+
extern uint64_t div64_64(uint64_t dividend, uint64_t divisor);

#else
Index: linux-2.6/lib/div64.c
===================================================================
--- linux-2.6.orig/lib/div64.c 2008-03-11 17:15:21.000000000 +0100
+++ linux-2.6/lib/div64.c 2008-03-12 21:21:26.000000000 +0100
@@ -16,9 +16,8 @@
* assembly versions such as arch/ppc/lib/div64.S and arch/sh/lib/div64.S.
*/

-#include <linux/types.h>
#include <linux/module.h>
-#include <asm/div64.h>
+#include <linux/math64.h>

/* Not needed on 64bit architectures */
#if BITS_PER_LONG == 32
@@ -58,6 +57,26 @@ uint32_t __attribute__((weak)) __div64_3

EXPORT_SYMBOL(__div64_32);

+#ifndef div_s64_rem
+s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder)
+{
+ u64 quotient;
+
+ if (dividend < 0) {
+ quotient = div_u64_rem(-dividend, abs(divisor), (u32 *)remainder);
+ *remainder = -*remainder;
+ if (divisor > 0)
+ quotient = -quotient;
+ } else {
+ quotient = div_u64_rem(dividend, abs(divisor), (u32 *)remainder);
+ if (divisor < 0)
+ quotient = -quotient;
+ }
+ return quotient;
+}
+EXPORT_SYMBOL(div_s64_rem);
+#endif
+
/* 64bit divisor, dividend and result. dynamic precision */
uint64_t div64_64(uint64_t dividend, uint64_t divisor)
{
Index: linux-2.6/include/linux/math64.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6/include/linux/math64.h 2008-03-12 21:21:26.000000000 +0100
@@ -0,0 +1,72 @@
+#ifndef _LINUX_MATH64_H
+#define _LINUX_MATH64_H
+
+#include <linux/types.h>
+#include <asm/div64.h>
+
+#if BITS_PER_LONG == 64
+
+/**
+ * div_u64_rem - unsigned 64bit divide with 32bit divisor with remainder
+ *
+ * This is commonly provided by 32bit archs to provide an optimized 64bit
+ * divide.
+ */
+static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder)
+{
+ *remainder = dividend % divisor;
+ return dividend / divisor;
+}
+
+/**
+ * div_s64_rem - signed 64bit divide with 32bit divisor with remainder
+ */
+static inline s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder)
+{
+ *remainder = dividend % divisor;
+ return dividend / divisor;
+}
+
+#elif BITS_PER_LONG == 32
+
+#ifndef div_u64_rem
+static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder)
+{
+ *remainder = do_div(dividend, divisor);
+ return dividend;
+}
+#endif
+
+#ifndef div_s64_rem
+extern s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder);
+#endif
+
+#endif /* BITS_PER_LONG */
+
+/**
+ * div_u64 - unsigned 64bit divide with 32bit divisor
+ *
+ * This is the most common 64bit divide and should be used if possible,
+ * as many 32bit archs can optimize this variant better than a full 64bit
+ * divide.
+ */
+#ifndef div_u64
+static inline u64 div_u64(u64 dividend, u32 divisor)
+{
+ u32 remainder;
+ return div_u64_rem(dividend, divisor, &remainder);
+}
+#endif
+
+/**
+ * div_s64 - signed 64bit divide with 32bit divisor
+ */
+#ifndef div_s64
+static inline s64 div_s64(s64 dividend, s32 divisor)
+{
+ s32 remainder;
+ return div_s64_rem(dividend, divisor, &remainder);
+}
+#endif
+
+#endif /* _LINUX_MATH64_H */

2008-03-13 20:36:41

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH 1/4] introduce explicit signed/unsigned 64bit divide

On Thu, 13 Mar 2008 15:59:27 +0100 (CET)
Roman Zippel <[email protected]> wrote:

> On Thu, 13 Mar 2008, Geert Uytterhoeven wrote:
>
> > > +static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder)
> > ^ ^ ^ ^ ^
> > s64 div_s64_rem s64 s32 s32
>
> Thanks for noticing.
> I'm quite sure I fixed this before, as I compiled this also for
> 64bit, so I'm not sure how it got back. Anyway, new patch below.

I think what happened was that [patch 3/4] fixed this up. Of course,
that patch doesn't apply on this updated [1/4]. I _could_ just take the
old [1/4] (I think), but I don't know if that wouild be bisection-friendly.

Anyway, please redo&resend? Thanks.



Please have a think about that code in arch/x86/kvm/i8254.c too. It is
painful to see remote subsystems (re)implementing generic infrastructure.
Can KVM use existing code? Should we hoist what KVM has done there into
generic code? Did it have to use a(nother bleeding) macro?

2008-03-14 17:45:44

by Roman Zippel

[permalink] [raw]
Subject: Re: [PATCH 1/4] introduce explicit signed/unsigned 64bit divide

Hi,

On Thu, 13 Mar 2008, Andrew Morton wrote:

> I think what happened was that [patch 3/4] fixed this up. Of course,
> that patch doesn't apply on this updated [1/4]. I _could_ just take the
> old [1/4] (I think), but I don't know if that wouild be bisection-friendly.
>
> Anyway, please redo&resend? Thanks.

Done.

> Please have a think about that code in arch/x86/kvm/i8254.c too. It is
> painful to see remote subsystems (re)implementing generic infrastructure.
> Can KVM use existing code? Should we hoist what KVM has done there into
> generic code? Did it have to use a(nother bleeding) macro?

Looker closer at it, div64_u64() seems to be a bit overkill, as the
divisor is a 32bit value, so the following should do the same job (only
compile tested):

u64 muldiv64(u64 a, u32 b, u32 c)
{
union {
u64 ll;
struct {
u32 low, high;
};
} u, res, rl, rh;

u.ll = a;
rl.ll = (u64)b * u.low;
rh.ll = (u64)b * u.high;
rh.ll += rl.high;
res.high = div_u64_rem(rh.ll, c, &rl.high);
res.low = div_u64(rl.ll, c);
return res.ll;
}

Moving it to a more generic location shouldn't be a big problem.

bye, Roman