Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1755810AbbBHOLr (ORCPT ); Sun, 8 Feb 2015 09:11:47 -0500 Received: from mail-la0-f48.google.com ([209.85.215.48]:34003 "EHLO mail-la0-f48.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754520AbbBHOLo (ORCPT ); Sun, 8 Feb 2015 09:11:44 -0500 From: Yury Norov To: klimov.linux@gmail.com, davem@davemloft.net, akpm@linux-foundation.org, hannes@stressinduktion.org, dborkman@redhat.com, laijs@cn.fujitsu.com, takahiro.akashi@linaro.org, valentinrothberg@gmail.com, linux@horizon.com, msalter@redhat.com, chris@chris-wilson.co.uk, tgraf@suug.ch Cc: Yury Norov , linux-kernel@vger.kernel.org Subject: [PATCH v3 1/3] lib: find_*_bit reimplementation Date: Sun, 8 Feb 2015 17:10:17 +0300 Message-Id: <1423404619-10653-2-git-send-email-yury.norov@gmail.com> X-Mailer: git-send-email 2.1.0 In-Reply-To: <1423404619-10653-1-git-send-email-yury.norov@gmail.com> References: <1423404619-10653-1-git-send-email-yury.norov@gmail.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 11306 Lines: 431 New implementations takes less space in source file (see diffstat) and in object. For me it's 710 vs 453 bytes of text. It also shows better performance. Signed-off-by: Yury Norov --- lib/find_last_bit.c | 30 ++---- lib/find_next_bit.c | 275 ++++++++++++++++------------------------------------ 2 files changed, 92 insertions(+), 213 deletions(-) diff --git a/lib/find_last_bit.c b/lib/find_last_bit.c index 91ca09f..106050f 100644 --- a/lib/find_last_bit.c +++ b/lib/find_last_bit.c @@ -4,6 +4,9 @@ * Written by Rusty Russell * (Inspired by David Howell's find_next_bit implementation) * + * Rewritten by Yury Norov to decrease + * size and improve performance, 2015. + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -12,36 +15,19 @@ #include #include -#include -#include +#include #ifndef find_last_bit unsigned long find_last_bit(const unsigned long *addr, unsigned long size) { - unsigned long words; - unsigned long tmp; - - /* Start at final word. */ - words = size / BITS_PER_LONG; - - /* Partial final word? */ - if (size & (BITS_PER_LONG-1)) { - tmp = (addr[words] & (~0UL >> (BITS_PER_LONG - - (size & (BITS_PER_LONG-1))))); - if (tmp) - goto found; - } + unsigned long idx = DIV_ROUND_UP(size, BITS_PER_LONG); - while (words) { - tmp = addr[--words]; - if (tmp) { -found: - return words * BITS_PER_LONG + __fls(tmp); - } + while (idx--) { + if (addr[idx]) + return min(idx * BITS_PER_LONG + __fls(addr[idx]), size); } - /* Not found */ return size; } EXPORT_SYMBOL(find_last_bit); diff --git a/lib/find_next_bit.c b/lib/find_next_bit.c index 0cbfc0b..71aa497 100644 --- a/lib/find_next_bit.c +++ b/lib/find_next_bit.c @@ -3,6 +3,9 @@ * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * + * Rewritten by Yury Norov to decrease + * size and improve performance, 2015. + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -11,10 +14,48 @@ #include #include -#include -#include +#include + +#define HIGH_BITS_MASK(nr) (ULONG_MAX << (nr)) + +#if !defined(find_next_bit) || !defined(find_next_zero_bit) +static unsigned long _find_next_bit(const unsigned long *addr, + unsigned long nbits, unsigned long start, unsigned long mask) +{ + unsigned long tmp; + + if (start >= nbits) + return nbits; + + tmp = addr[start / BITS_PER_LONG] ^ mask; -#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) + /* Handle 1st word. */ + if (!IS_ALIGNED(start, BITS_PER_LONG)) { + tmp &= HIGH_BITS_MASK(start % BITS_PER_LONG); + start = round_down(start, BITS_PER_LONG); + } + + while (!tmp) { + start += BITS_PER_LONG; + if (start >= nbits) + return nbits; + + /* + * This is an equvalent for: + * + * tmp = find_set ? addr[start / BITS_PER_LONG] + * : ~addr[start / BITS_PER_LONG]; + * + * but saves a branch condition. + * + * Thanks George Spelvin for idea. + */ + tmp = addr[start / BITS_PER_LONG] ^ mask; + } + + return min(start + __ffs(tmp), nbits); +} +#endif #ifndef find_next_bit /* @@ -23,86 +64,16 @@ unsigned long find_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { - const unsigned long *p = addr + BITOP_WORD(offset); - unsigned long result = offset & ~(BITS_PER_LONG-1); - unsigned long tmp; - - if (offset >= size) - return size; - size -= result; - offset %= BITS_PER_LONG; - if (offset) { - tmp = *(p++); - tmp &= (~0UL << offset); - if (size < BITS_PER_LONG) - goto found_first; - if (tmp) - goto found_middle; - size -= BITS_PER_LONG; - result += BITS_PER_LONG; - } - while (size & ~(BITS_PER_LONG-1)) { - if ((tmp = *(p++))) - goto found_middle; - result += BITS_PER_LONG; - size -= BITS_PER_LONG; - } - if (!size) - return result; - tmp = *p; - -found_first: - tmp &= (~0UL >> (BITS_PER_LONG - size)); - if (tmp == 0UL) /* Are any bits set? */ - return result + size; /* Nope. */ -found_middle: - return result + __ffs(tmp); + return _find_next_bit(addr, size, offset, 0UL); } EXPORT_SYMBOL(find_next_bit); #endif #ifndef find_next_zero_bit -/* - * This implementation of find_{first,next}_zero_bit was stolen from - * Linus' asm-alpha/bitops.h. - */ unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { - const unsigned long *p = addr + BITOP_WORD(offset); - unsigned long result = offset & ~(BITS_PER_LONG-1); - unsigned long tmp; - - if (offset >= size) - return size; - size -= result; - offset %= BITS_PER_LONG; - if (offset) { - tmp = *(p++); - tmp |= ~0UL >> (BITS_PER_LONG - offset); - if (size < BITS_PER_LONG) - goto found_first; - if (~tmp) - goto found_middle; - size -= BITS_PER_LONG; - result += BITS_PER_LONG; - } - while (size & ~(BITS_PER_LONG-1)) { - if (~(tmp = *(p++))) - goto found_middle; - result += BITS_PER_LONG; - size -= BITS_PER_LONG; - } - if (!size) - return result; - tmp = *p; - -found_first: - tmp |= ~0UL << size; - if (tmp == ~0UL) /* Are any bits zero? */ - return result + size; /* Nope. */ -found_middle: - return result + ffz(tmp); + return _find_next_bit(addr, size, offset, ~0UL); } EXPORT_SYMBOL(find_next_zero_bit); #endif @@ -113,24 +84,14 @@ EXPORT_SYMBOL(find_next_zero_bit); */ unsigned long find_first_bit(const unsigned long *addr, unsigned long size) { - const unsigned long *p = addr; - unsigned long result = 0; - unsigned long tmp; + unsigned long idx; - while (size & ~(BITS_PER_LONG-1)) { - if ((tmp = *(p++))) - goto found; - result += BITS_PER_LONG; - size -= BITS_PER_LONG; + for (idx = 0; idx * BITS_PER_LONG < size; idx++) { + if (addr[idx]) + return min(idx * BITS_PER_LONG + __ffs(addr[idx]), size); } - if (!size) - return result; - tmp = (*p) & (~0UL >> (BITS_PER_LONG - size)); - if (tmp == 0UL) /* Are any bits set? */ - return result + size; /* Nope. */ -found: - return result + __ffs(tmp); + return size; } EXPORT_SYMBOL(find_first_bit); #endif @@ -141,24 +102,14 @@ EXPORT_SYMBOL(find_first_bit); */ unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) { - const unsigned long *p = addr; - unsigned long result = 0; - unsigned long tmp; + unsigned long idx; - while (size & ~(BITS_PER_LONG-1)) { - if (~(tmp = *(p++))) - goto found; - result += BITS_PER_LONG; - size -= BITS_PER_LONG; + for (idx = 0; idx * BITS_PER_LONG < size; idx++) { + if (addr[idx] != ULONG_MAX) + return min(idx * BITS_PER_LONG + ffz(addr[idx]), size); } - if (!size) - return result; - tmp = (*p) | (~0UL << size); - if (tmp == ~0UL) /* Are any bits zero? */ - return result + size; /* Nope. */ -found: - return result + ffz(tmp); + return size; } EXPORT_SYMBOL(find_first_zero_bit); #endif @@ -166,18 +117,6 @@ EXPORT_SYMBOL(find_first_zero_bit); #ifdef __BIG_ENDIAN /* include/linux/byteorder does not support "unsigned long" type */ -static inline unsigned long ext2_swabp(const unsigned long * x) -{ -#if BITS_PER_LONG == 64 - return (unsigned long) __swab64p((u64 *) x); -#elif BITS_PER_LONG == 32 - return (unsigned long) __swab32p((u32 *) x); -#else -#error BITS_PER_LONG not defined -#endif -} - -/* include/linux/byteorder doesn't support "unsigned long" type */ static inline unsigned long ext2_swab(const unsigned long y) { #if BITS_PER_LONG == 64 @@ -189,48 +128,40 @@ static inline unsigned long ext2_swab(const unsigned long y) #endif } -#ifndef find_next_zero_bit_le -unsigned long find_next_zero_bit_le(const void *addr, unsigned - long size, unsigned long offset) +#if !defined(find_next_bit_le) || !defined(find_next_zero_bit_le) +static unsigned long _find_next_bit_le(const unsigned long *addr, + unsigned long nbits, unsigned long start, unsigned long mask) { - const unsigned long *p = addr; - unsigned long result = offset & ~(BITS_PER_LONG - 1); unsigned long tmp; - if (offset >= size) - return size; - p += BITOP_WORD(offset); - size -= result; - offset &= (BITS_PER_LONG - 1UL); - if (offset) { - tmp = ext2_swabp(p++); - tmp |= (~0UL >> (BITS_PER_LONG - offset)); - if (size < BITS_PER_LONG) - goto found_first; - if (~tmp) - goto found_middle; - size -= BITS_PER_LONG; - result += BITS_PER_LONG; + if (start >= nbits) + return nbits; + + tmp = addr[start / BITS_PER_LONG] ^ mask; + + /* Handle 1st word. */ + if (!IS_ALIGNED(start, BITS_PER_LONG)) { + tmp &= ext2_swab(HIGH_BITS_MASK(start % BITS_PER_LONG)); + start = round_down(start, BITS_PER_LONG); } - while (size & ~(BITS_PER_LONG - 1)) { - if (~(tmp = *(p++))) - goto found_middle_swap; - result += BITS_PER_LONG; - size -= BITS_PER_LONG; + while (!tmp) { + start += BITS_PER_LONG; + if (start >= nbits) + return nbits; + + tmp = addr[start / BITS_PER_LONG] ^ mask; } - if (!size) - return result; - tmp = ext2_swabp(p); -found_first: - tmp |= ~0UL << size; - if (tmp == ~0UL) /* Are any bits zero? */ - return result + size; /* Nope. Skip ffz */ -found_middle: - return result + ffz(tmp); -found_middle_swap: - return result + ffz(ext2_swab(tmp)); + return min(start + __ffs(ext2_swab(tmp)), nbits); +} +#endif + +#ifndef find_next_zero_bit_le +unsigned long find_next_zero_bit_le(const void *addr, unsigned + long size, unsigned long offset) +{ + return _find_next_bit_le(addr, size, offset, ~0UL); } EXPORT_SYMBOL(find_next_zero_bit_le); #endif @@ -239,45 +170,7 @@ EXPORT_SYMBOL(find_next_zero_bit_le); unsigned long find_next_bit_le(const void *addr, unsigned long size, unsigned long offset) { - const unsigned long *p = addr; - unsigned long result = offset & ~(BITS_PER_LONG - 1); - unsigned long tmp; - - if (offset >= size) - return size; - p += BITOP_WORD(offset); - size -= result; - offset &= (BITS_PER_LONG - 1UL); - if (offset) { - tmp = ext2_swabp(p++); - tmp &= (~0UL << offset); - if (size < BITS_PER_LONG) - goto found_first; - if (tmp) - goto found_middle; - size -= BITS_PER_LONG; - result += BITS_PER_LONG; - } - - while (size & ~(BITS_PER_LONG - 1)) { - tmp = *(p++); - if (tmp) - goto found_middle_swap; - result += BITS_PER_LONG; - size -= BITS_PER_LONG; - } - if (!size) - return result; - tmp = ext2_swabp(p); -found_first: - tmp &= (~0UL >> (BITS_PER_LONG - size)); - if (tmp == 0UL) /* Are any bits set? */ - return result + size; /* Nope. */ -found_middle: - return result + __ffs(tmp); - -found_middle_swap: - return result + __ffs(ext2_swab(tmp)); + return _find_next_bit_le(addr, size, offset, 0UL); } EXPORT_SYMBOL(find_next_bit_le); #endif -- 2.1.0 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/