2007-02-27 01:35:21

by Stephen Hemminger

[permalink] [raw]
Subject: [PATCH] udivdi3: 64 bit divide

The kernel already has several implmentations and usages of 64 by 64
bit divide.

Although it is significantly slower, there are places that need it so
provide one generic version using scaling, and allow existing platform
versions to continue.

This looks like good -mm material. Tested on x86_64 and i386.

Signed-off-by: Stephen Hemminger <[email protected]>

---
arch/alpha/Kconfig | 4 ++++
arch/arm/Kconfig | 4 ++++
arch/arm26/Kconfig | 4 ++++
arch/avr32/Kconfig | 4 ++++
arch/cris/Kconfig | 4 ++++
arch/frv/Kconfig | 4 ++++
arch/h8300/Kconfig | 4 ++++
arch/i386/Kconfig | 4 ++++
arch/m32r/Kconfig | 4 ++++
arch/m68k/Kconfig | 4 ++++
arch/m68knommu/Kconfig | 4 ++++
arch/mips/Kconfig | 4 ++++
arch/parisc/Kconfig | 4 ++++
arch/powerpc/Kconfig | 4 ++++
arch/ppc/Kconfig | 4 ++++
arch/s390/Kconfig | 4 ++++
arch/sh64/Kconfig | 4 ++++
arch/v850/Kconfig | 3 +++
arch/xtensa/Kconfig | 4 ++++
lib/Makefile | 1 +
lib/udivdi3.c | 37 +++++++++++++++++++++++++++++++++++++
net/ipv4/tcp_cubic.c | 24 +-----------------------
net/netfilter/xt_connbytes.c | 19 +------------------
23 files changed, 115 insertions(+), 41 deletions(-)

--- pktgen.orig/net/ipv4/tcp_cubic.c 2007-02-26 13:40:08.000000000 -0800
+++ pktgen/net/ipv4/tcp_cubic.c 2007-02-26 14:31:41.000000000 -0800
@@ -51,7 +51,6 @@
module_param(tcp_friendliness, int, 0644);
MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness");

-#include <asm/div64.h>

/* BIC TCP Parameters */
struct bictcp {
@@ -93,27 +92,6 @@
tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
}

-/* 64bit divisor, dividend and result. dynamic precision */
-static inline u_int64_t div64_64(u_int64_t dividend, u_int64_t divisor)
-{
- u_int32_t d = divisor;
-
- if (divisor > 0xffffffffULL) {
- unsigned int shift = fls(divisor >> 32);
-
- d = divisor >> shift;
- dividend >>= shift;
- }
-
- /* avoid 64 bit division if possible */
- if (dividend >> 32)
- do_div(dividend, d);
- else
- dividend = (uint32_t) dividend / d;
-
- return dividend;
-}
-
/*
* calculate the cubic root of x using Newton-Raphson
*/
@@ -134,7 +112,7 @@
*/
do {
x1 = x;
- x = (2 * x + (uint32_t) div64_64(a, x*x)) / 3;
+ x = (2 * x + (u32) (a / (x*x))) / 3;
} while (abs(x1 - x) > 1);

return x;
--- pktgen.orig/net/netfilter/xt_connbytes.c 2007-02-26 13:40:08.000000000 -0800
+++ pktgen/net/netfilter/xt_connbytes.c 2007-02-26 14:29:13.000000000 -0800
@@ -16,7 +16,6 @@
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/xt_connbytes.h>

-#include <asm/div64.h>
#include <asm/bitops.h>

MODULE_LICENSE("GPL");
@@ -24,22 +23,6 @@
MODULE_DESCRIPTION("iptables match for matching number of pkts/bytes per connection");
MODULE_ALIAS("ipt_connbytes");

-/* 64bit divisor, dividend and result. dynamic precision */
-static u_int64_t div64_64(u_int64_t dividend, u_int64_t divisor)
-{
- u_int32_t d = divisor;
-
- if (divisor > 0xffffffffULL) {
- unsigned int shift = fls(divisor >> 32);
-
- d = divisor >> shift;
- dividend >>= shift;
- }
-
- do_div(dividend, d);
- return dividend;
-}
-
static int
match(const struct sk_buff *skb,
const struct net_device *in,
@@ -106,7 +89,7 @@
break;
}
if (pkts != 0)
- what = div64_64(bytes, pkts);
+ what = bytes / pkts;
break;
}

--- pktgen.orig/lib/Makefile 2007-02-26 13:40:08.000000000 -0800
+++ pktgen/lib/Makefile 2007-02-26 14:17:31.000000000 -0800
@@ -28,6 +28,7 @@
lib-$(CONFIG_SEMAPHORE_SLEEPERS) += semaphore-sleepers.o
lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o
obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
+obj-$(CONFIG_GENERIC_UDIVDI3) += udivdi3.o
obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o
obj-$(CONFIG_PLIST) += plist.o
obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
--- pktgen.orig/arch/alpha/Kconfig 2007-02-26 13:51:29.000000000 -0800
+++ pktgen/arch/alpha/Kconfig 2007-02-26 13:54:35.000000000 -0800
@@ -33,6 +33,10 @@
bool
default n

+config GENERIC_UDIVDI3
+ bool
+ default y
+
config GENERIC_FIND_NEXT_BIT
bool
default y
--- pktgen.orig/arch/arm/Kconfig 2007-02-26 13:51:29.000000000 -0800
+++ pktgen/arch/arm/Kconfig 2007-02-26 13:54:57.000000000 -0800
@@ -90,6 +90,10 @@
bool
default n

+config GENERIC_UDIVDI3
+ bool
+ default y
+
config GENERIC_HWEIGHT
bool
default y
--- pktgen.orig/arch/arm26/Kconfig 2007-02-26 13:48:46.000000000 -0800
+++ pktgen/arch/arm26/Kconfig 2007-02-26 13:55:24.000000000 -0800
@@ -49,6 +49,10 @@
bool
default n

+config GENERIC_UDIVDI3
+ bool
+ default y
+
config GENERIC_HWEIGHT
bool
default y
--- pktgen.orig/arch/avr32/Kconfig 2007-02-26 13:51:29.000000000 -0800
+++ pktgen/arch/avr32/Kconfig 2007-02-26 13:55:39.000000000 -0800
@@ -53,6 +53,10 @@
bool
default n

+config GENERIC_UDIVDI3
+ bool
+ default y
+
config GENERIC_BUST_SPINLOCK
bool

--- pktgen.orig/arch/cris/Kconfig 2007-02-26 13:51:29.000000000 -0800
+++ pktgen/arch/cris/Kconfig 2007-02-26 13:55:53.000000000 -0800
@@ -28,6 +28,10 @@
bool
default n

+config GENERIC_UDIVDI3
+ bool
+ default y
+
config GENERIC_FIND_NEXT_BIT
bool
default y
--- pktgen.orig/arch/frv/Kconfig 2007-02-26 13:51:29.000000000 -0800
+++ pktgen/arch/frv/Kconfig 2007-02-26 13:56:09.000000000 -0800
@@ -53,6 +53,10 @@
bool
default y

+config GENERIC_UDIVDI3
+ bool
+ default y
+
mainmenu "Fujitsu FR-V Kernel Configuration"

source "init/Kconfig"
--- pktgen.orig/arch/h8300/Kconfig 2007-02-26 13:51:29.000000000 -0800
+++ pktgen/arch/h8300/Kconfig 2007-02-26 13:56:20.000000000 -0800
@@ -41,6 +41,10 @@
bool
default n

+config GENERIC_UDIVDI3
+ bool
+ default y
+
config GENERIC_FIND_NEXT_BIT
bool
default y
--- pktgen.orig/arch/i386/Kconfig 2007-02-26 14:01:59.000000000 -0800
+++ pktgen/arch/i386/Kconfig 2007-02-26 14:02:28.000000000 -0800
@@ -75,6 +75,10 @@
bool
default y

+config GENERIC_UDIVDI3
+ bool
+ default y
+
config ARCH_MAY_HAVE_PC_FDC
bool
default y
--- pktgen.orig/arch/m32r/Kconfig 2007-02-26 13:51:29.000000000 -0800
+++ pktgen/arch/m32r/Kconfig 2007-02-26 13:56:43.000000000 -0800
@@ -229,6 +229,10 @@
bool
default n

+config GENERIC_UDIVDI3
+ bool
+ default y
+
config GENERIC_FIND_NEXT_BIT
bool
default y
--- pktgen.orig/arch/m68k/Kconfig 2007-02-26 13:51:29.000000000 -0800
+++ pktgen/arch/m68k/Kconfig 2007-02-26 13:56:56.000000000 -0800
@@ -25,6 +25,10 @@
bool
default n

+config GENERIC_UDIVDI3
+ bool
+ default y
+
config GENERIC_HWEIGHT
bool
default y
--- pktgen.orig/arch/m68knommu/Kconfig 2007-02-26 13:51:29.000000000 -0800
+++ pktgen/arch/m68knommu/Kconfig 2007-02-26 13:57:05.000000000 -0800
@@ -37,6 +37,10 @@
bool
default n

+config GENERIC_UDIVDI3
+ bool
+ default y
+
config GENERIC_FIND_NEXT_BIT
bool
default y
--- pktgen.orig/arch/mips/Kconfig 2007-02-26 13:51:29.000000000 -0800
+++ pktgen/arch/mips/Kconfig 2007-02-26 13:57:13.000000000 -0800
@@ -843,6 +843,10 @@
bool
default n

+config GENERIC_UDIVDI3
+ bool
+ default y
+
config GENERIC_FIND_NEXT_BIT
bool
default y
--- pktgen.orig/arch/parisc/Kconfig 2007-02-26 13:51:29.000000000 -0800
+++ pktgen/arch/parisc/Kconfig 2007-02-26 13:57:21.000000000 -0800
@@ -33,6 +33,10 @@
bool
default n

+config GENERIC_UDIVDI3
+ bool
+ default y
+
config GENERIC_FIND_NEXT_BIT
bool
default y
--- pktgen.orig/arch/powerpc/Kconfig 2007-02-26 13:51:29.000000000 -0800
+++ pktgen/arch/powerpc/Kconfig 2007-02-26 13:57:31.000000000 -0800
@@ -49,6 +49,10 @@
bool
default y if 64BIT

+config GENERIC_UDIVDI3
+ bool
+ default y
+
config GENERIC_HWEIGHT
bool
default y
--- pktgen.orig/arch/ppc/Kconfig 2007-02-26 13:51:29.000000000 -0800
+++ pktgen/arch/ppc/Kconfig 2007-02-26 13:57:44.000000000 -0800
@@ -27,6 +27,10 @@
bool
default n

+config GENERIC_UDIVDI3
+ bool
+ default y
+
config GENERIC_HWEIGHT
bool
default y
--- pktgen.orig/arch/s390/Kconfig 2007-02-26 13:51:29.000000000 -0800
+++ pktgen/arch/s390/Kconfig 2007-02-26 13:57:53.000000000 -0800
@@ -34,6 +34,10 @@
bool
default n

+config GENERIC_UDIVDI3
+ bool
+ default y
+
config GENERIC_HWEIGHT
bool
default y
--- pktgen.orig/arch/sh64/Kconfig 2007-02-26 13:51:29.000000000 -0800
+++ pktgen/arch/sh64/Kconfig 2007-02-26 14:00:32.000000000 -0800
@@ -21,6 +21,10 @@
bool
default y

+config GENERIC_UDIVDI3
+ bool
+ default y
+
config GENERIC_FIND_NEXT_BIT
bool
default y
--- pktgen.orig/arch/v850/Kconfig 2007-02-26 13:51:29.000000000 -0800
+++ pktgen/arch/v850/Kconfig 2007-02-26 13:59:29.000000000 -0800
@@ -19,6 +19,9 @@
config RWSEM_XCHGADD_ALGORITHM
bool
default n
+config GENERIC_UDIVDI3
+ bool
+ default y
config GENERIC_FIND_NEXT_BIT
bool
default y
--- pktgen.orig/arch/xtensa/Kconfig 2007-02-26 13:51:29.000000000 -0800
+++ pktgen/arch/xtensa/Kconfig 2007-02-26 13:59:45.000000000 -0800
@@ -26,6 +26,10 @@
bool
default y

+config GENERIC_UDIVDI3
+ bool
+ default y
+
config GENERIC_FIND_NEXT_BIT
bool
default y
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ pktgen/lib/udivdi3.c 2007-02-26 14:14:13.000000000 -0800
@@ -0,0 +1,37 @@
+/*
+ * Generic C version of full 64 bit by 64 bit division
+ * Extracted from version used by netfilter connection tracking
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * Code generated for this function might be very inefficient
+ * for some CPUs, can be overridden by linking arch-specific
+ * assembly versions such as arch/sparc/lib/udivdi.S
+ */
+#include <linux/types.h>
+#include <linux/module.h>
+#include <asm/div64.h>
+
+uint64_t __udivdi3(uint64_t dividend, uint64_t divisor)
+{
+ uint32_t d = divisor;
+
+ /* Scale divisor to 32 bits */
+ if (divisor > 0xffffffffULL) {
+ unsigned int shift = fls(divisor >> 32);
+
+ d = divisor >> shift;
+ dividend >>= shift;
+ }
+
+ /* avoid 64 bit division if possible */
+ if (dividend >> 32)
+ do_div(dividend, d);
+ else
+ dividend = (uint32_t) dividend / d;
+
+ return dividend;
+}
+EXPORT_SYMBOL(__udivdi3);


2007-02-27 20:24:44

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH] udivdi3: 64 bit divide

> On Mon, 26 Feb 2007 17:35:17 -0800 Stephen Hemminger <[email protected]> wrote:
> The kernel already has several implmentations and usages of 64 by 64
> bit divide.
>
> Although it is significantly slower, there are places that need it so
> provide one generic version using scaling, and allow existing platform
> versions to continue.

The reason we implement 64/32 via do_div() is, for better or for worse, to
make people think before they use it. And to make it stand out, and so
that we discover places that are using it by accident, where they could use
something cheaper.

However your implementation of the presumably even more expensive 64/64
allows us to do 64/64 with a plain old "/" operator.

If the do_div() philosophy is any good then we should surely repeat it for
64/64, no?

2007-02-27 21:18:42

by Stephen Hemminger

[permalink] [raw]
Subject: Re: [PATCH] udivdi3: 64 bit divide

On Tue, 27 Feb 2007 12:24:37 -0800
Andrew Morton <[email protected]> wrote:

> > On Mon, 26 Feb 2007 17:35:17 -0800 Stephen Hemminger <[email protected]> wrote:
> > The kernel already has several implmentations and usages of 64 by 64
> > bit divide.
> >
> > Although it is significantly slower, there are places that need it so
> > provide one generic version using scaling, and allow existing platform
> > versions to continue.
>
> The reason we implement 64/32 via do_div() is, for better or for worse, to
> make people think before they use it. And to make it stand out, and so
> that we discover places that are using it by accident, where they could use
> something cheaper.
>
> However your implementation of the presumably even more expensive 64/64
> allows us to do 64/64 with a plain old "/" operator.
>
> If the do_div() philosophy is any good then we should surely repeat it for
> 64/64, no?
>

Then we should pull the existing udivdi3 implementations?

2007-02-27 21:37:05

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH] udivdi3: 64 bit divide

> On Tue, 27 Feb 2007 13:18:40 -0800 Stephen Hemminger <[email protected]> wrote:
> On Tue, 27 Feb 2007 12:24:37 -0800
> Andrew Morton <[email protected]> wrote:
>
> > > On Mon, 26 Feb 2007 17:35:17 -0800 Stephen Hemminger <[email protected]> wrote:
> > > The kernel already has several implmentations and usages of 64 by 64
> > > bit divide.
> > >
> > > Although it is significantly slower, there are places that need it so
> > > provide one generic version using scaling, and allow existing platform
> > > versions to continue.
> >
> > The reason we implement 64/32 via do_div() is, for better or for worse, to
> > make people think before they use it. And to make it stand out, and so
> > that we discover places that are using it by accident, where they could use
> > something cheaper.
> >
> > However your implementation of the presumably even more expensive 64/64
> > allows us to do 64/64 with a plain old "/" operator.
> >
> > If the do_div() philosophy is any good then we should surely repeat it for
> > 64/64, no?
> >
>
> Then we should pull the existing udivdi3 implementations?

Not much point really. Some architectures have gone and done that, but x86
has not. x86 has enough coverage for us to pick up most problems, and any
remaining problems are obviously in scruffy architectures which don't care
about performance ;)

2007-02-27 22:24:41

by Russell King

[permalink] [raw]
Subject: Re: [PATCH] udivdi3: 64 bit divide

On Tue, Feb 27, 2007 at 01:36:56PM -0800, Andrew Morton wrote:
> On Tue, 27 Feb 2007 13:18:40 -0800 Stephen Hemminger <[email protected]> wrote:
> > Then we should pull the existing udivdi3 implementations?
>
> Not much point really. Some architectures have gone and done that, but x86
> has not. x86 has enough coverage for us to pick up most problems, and any
> remaining problems are obviously in scruffy architectures which don't care
> about performance ;)

I doubt arm26 uses udivdi3, but that's something Ian would have to
confirm.

--
Russell King
Linux kernel 2.6 ARM Linux - http://www.arm.linux.org.uk/
maintainer of:

2007-02-27 22:47:24

by Ian molton

[permalink] [raw]
Subject: Re: [PATCH] udivdi3: 64 bit divide

Russell King wrote:
> On Tue, Feb 27, 2007 at 01:36:56PM -0800, Andrew Morton wrote:
>> On Tue, 27 Feb 2007 13:18:40 -0800 Stephen Hemminger <[email protected]> wrote:
>>> Then we should pull the existing udivdi3 implementations?
>> Not much point really. Some architectures have gone and done that, but x86
>> has not. x86 has enough coverage for us to pick up most problems, and any
>> remaining problems are obviously in scruffy architectures which don't care
>> about performance ;)
>
> I doubt arm26 uses udivdi3, but that's something Ian would have to
> confirm.

I doubt it is used also, however I am not in a position to test this
until at least after I have moved house. Please leave alone for now.

2007-02-28 22:31:56

by Jan Engelhardt

[permalink] [raw]
Subject: Re: [PATCH] udivdi3: 64 bit divide


On Feb 27 2007 22:39, Ian Molton wrote:
> Russell King wrote:
>> On Tue, Feb 27, 2007 at 01:36:56PM -0800, Andrew Morton wrote:
>> > On Tue, 27 Feb 2007 13:18:40 -0800 Stephen Hemminger
>> > <[email protected]> wrote:
>> > > Then we should pull the existing udivdi3 implementations?
>> > >
>> > Not much point really. Some architectures have gone
>> > and done that, but x86 has not. x86 has enough
>> > coverage for us to pick up most problems, and any
>> > remaining problems are obviously in scruffy
>> > architectures which don't care about performance ;)
>>
>> I doubt arm26 uses udivdi3, but that's something Ian
>> would have to confirm.
>
> I doubt it is used also, however I am not in a position
> to test this until at least after I have moved house.
> Please leave alone for now.

Simple. The non-arch specific code does not use 64/64
divides through the "/" operator (otherwise there would
already have been udivdi3 linking errors). So what
remains to check is arch/arm26. grep -Pr 'int64|\bu64'
returns only a few results to check (kernel/ecard.c,
nwfpe/), so the answer is most likely no.



Jan
--

2007-03-01 01:29:22

by Tim Schmielau

[permalink] [raw]
Subject: Re: [PATCH] udivdi3: 64 bit divide

On Tue, 27 Feb 2007, Andrew Morton wrote:
> > On Mon, 26 Feb 2007 17:35:17 -0800 Stephen Hemminger <[email protected]> wrote:
> > The kernel already has several implmentations and usages of 64 by 64
> > bit divide.
> >
> > Although it is significantly slower, there are places that need it so
> > provide one generic version using scaling, and allow existing platform
> > versions to continue.
>
> The reason we implement 64/32 via do_div() is, for better or for worse, to
> make people think before they use it. And to make it stand out, and so
> that we discover places that are using it by accident, where they could use
> something cheaper.

IMHO it is even more important that the user of your 64/64 div is aware
that it only returns an approximate result.

I certainly don't want to have any code in the kernel that by accident
makes an allocation a few bytes short of the actual size of the object
(just to make up a drastic example).

Tim