Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S932571AbbBQCgi (ORCPT ); Mon, 16 Feb 2015 21:36:38 -0500 Received: from mail-lb0-f179.google.com ([209.85.217.179]:42981 "EHLO mail-lb0-f179.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754546AbbBQCf7 (ORCPT ); Mon, 16 Feb 2015 21:35:59 -0500 From: Yury Norov To: linux@horizon.com, klimov.linux@gmail.com, linux@rasmusvillemoes.dk Cc: akpm@linux-foundation.org, davem@davemloft.net, dborkman@redhat.com, hannes@stressinduktion.org, laijs@cn.fujitsu.com, msalter@redhat.com, takahiro.akashi@linaro.org, tgraf@suug.ch, valentinrothberg@gmail.com, yury.norov@gmail.com, linux-kernel@vger.kernel.org, chris@chris-wilson.co.uk Subject: [PATCH v4 1/3] lib: find_*_bit reimplementation Date: Tue, 17 Feb 2015 05:35:41 +0300 Message-Id: <1424140543-865-2-git-send-email-yury.norov@gmail.com> X-Mailer: git-send-email 2.1.0 In-Reply-To: <1424140543-865-1-git-send-email-yury.norov@gmail.com> References: <1423404619-10653-1-git-send-email-yury.norov@gmail.com> <1424140543-865-1-git-send-email-yury.norov@gmail.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 12647 Lines: 458 The new implementation takes less space in the sources (see diffstat) and in the object. For me it's 710 vs 453 bytes of text. It also shows a better performance. find_last_bit description fixed due to obvious typo. In this patch 2 macros were introduced: {LOW,HIGH}_BITS_MASK, that are doing almost the same as BITMAP_{FIRST,LAST}_WORD_MASK in include/linux/bitmap.h. But 'LAST' macro is potentially less effective, because it issues a conditional branch that can be omitted. If it is replaced one day by a more effective implementation, {LOW,HIGH}_BITS_MASK can be removed. Signed-off-by: Yury Norov --- include/linux/bitops.h | 4 +- lib/find_last_bit.c | 37 +++---- lib/find_next_bit.c | 269 ++++++++++++++----------------------------------- 3 files changed, 94 insertions(+), 216 deletions(-) diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 5d858e0..297f5bd 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -218,9 +218,9 @@ static inline unsigned long __ffs64(u64 word) /** * find_last_bit - find the last set bit in a memory region * @addr: The address to start the search at - * @size: The maximum size to search + * @size: The number of bits to search * - * Returns the bit number of the first set bit, or size. + * Returns the bit number of the last set bit, or size. */ extern unsigned long find_last_bit(const unsigned long *addr, unsigned long size); diff --git a/lib/find_last_bit.c b/lib/find_last_bit.c index 91ca09f..edbb281 100644 --- a/lib/find_last_bit.c +++ b/lib/find_last_bit.c @@ -4,6 +4,9 @@ * Written by Rusty Russell * (Inspired by David Howell's find_next_bit implementation) * + * Rewritten by Yury Norov to decrease + * size and improve performance, 2015. + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -12,36 +15,26 @@ #include #include -#include -#include +#include + +#define LOW_BITS_MASK(nr) (~0UL >> -(nr)) #ifndef find_last_bit unsigned long find_last_bit(const unsigned long *addr, unsigned long size) { - unsigned long words; - unsigned long tmp; + if (size) { + unsigned long val = LOW_BITS_MASK(size % BITS_PER_LONG); + unsigned long idx = (size-1) / BITS_PER_LONG; - /* Start at final word. */ - words = size / BITS_PER_LONG; + do { + val &= addr[idx]; + if (val) + return idx * BITS_PER_LONG + __fls(val); - /* Partial final word? */ - if (size & (BITS_PER_LONG-1)) { - tmp = (addr[words] & (~0UL >> (BITS_PER_LONG - - (size & (BITS_PER_LONG-1))))); - if (tmp) - goto found; + val = ~0ul; + } while (idx--); } - - while (words) { - tmp = addr[--words]; - if (tmp) { -found: - return words * BITS_PER_LONG + __fls(tmp); - } - } - - /* Not found */ return size; } EXPORT_SYMBOL(find_last_bit); diff --git a/lib/find_next_bit.c b/lib/find_next_bit.c index 0cbfc0b..1f5f108 100644 --- a/lib/find_next_bit.c +++ b/lib/find_next_bit.c @@ -3,6 +3,9 @@ * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * + * Rewritten by Yury Norov to decrease + * size and improve performance, 2015. + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -11,98 +14,60 @@ #include #include -#include -#include +#include -#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) +#define HIGH_BITS_MASK(nr) (~0UL << (nr)) + +#if !defined(find_next_bit) || !defined(find_next_zero_bit) -#ifndef find_next_bit /* - * Find the next set bit in a memory region. + * This is a common helper function for find_next_bit and + * find_next_zero_bit. The difference is the "invert" argument, which + * is XORed with each fetched word before searching it for one bits. */ -unsigned long find_next_bit(const unsigned long *addr, unsigned long size, - unsigned long offset) +static unsigned long _find_next_bit(const unsigned long *addr, + unsigned long nbits, unsigned long start, unsigned long invert) { - const unsigned long *p = addr + BITOP_WORD(offset); - unsigned long result = offset & ~(BITS_PER_LONG-1); unsigned long tmp; - if (offset >= size) - return size; - size -= result; - offset %= BITS_PER_LONG; - if (offset) { - tmp = *(p++); - tmp &= (~0UL << offset); - if (size < BITS_PER_LONG) - goto found_first; - if (tmp) - goto found_middle; - size -= BITS_PER_LONG; - result += BITS_PER_LONG; - } - while (size & ~(BITS_PER_LONG-1)) { - if ((tmp = *(p++))) - goto found_middle; - result += BITS_PER_LONG; - size -= BITS_PER_LONG; + if (!nbits || start >= nbits) + return nbits; + + tmp = addr[start / BITS_PER_LONG] ^ invert; + + /* Handle 1st word. */ + tmp &= HIGH_BITS_MASK(start % BITS_PER_LONG); + start = round_down(start, BITS_PER_LONG); + + while (!tmp) { + start += BITS_PER_LONG; + if (start >= nbits) + return nbits; + + tmp = addr[start / BITS_PER_LONG] ^ invert; } - if (!size) - return result; - tmp = *p; -found_first: - tmp &= (~0UL >> (BITS_PER_LONG - size)); - if (tmp == 0UL) /* Are any bits set? */ - return result + size; /* Nope. */ -found_middle: - return result + __ffs(tmp); + return min(start + __ffs(tmp), nbits); } -EXPORT_SYMBOL(find_next_bit); #endif -#ifndef find_next_zero_bit +#ifndef find_next_bit /* - * This implementation of find_{first,next}_zero_bit was stolen from - * Linus' asm-alpha/bitops.h. + * Find the next set bit in a memory region. */ +unsigned long find_next_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + return _find_next_bit(addr, size, offset, 0UL); +} +EXPORT_SYMBOL(find_next_bit); +#endif + +#ifndef find_next_zero_bit unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { - const unsigned long *p = addr + BITOP_WORD(offset); - unsigned long result = offset & ~(BITS_PER_LONG-1); - unsigned long tmp; - - if (offset >= size) - return size; - size -= result; - offset %= BITS_PER_LONG; - if (offset) { - tmp = *(p++); - tmp |= ~0UL >> (BITS_PER_LONG - offset); - if (size < BITS_PER_LONG) - goto found_first; - if (~tmp) - goto found_middle; - size -= BITS_PER_LONG; - result += BITS_PER_LONG; - } - while (size & ~(BITS_PER_LONG-1)) { - if (~(tmp = *(p++))) - goto found_middle; - result += BITS_PER_LONG; - size -= BITS_PER_LONG; - } - if (!size) - return result; - tmp = *p; - -found_first: - tmp |= ~0UL << size; - if (tmp == ~0UL) /* Are any bits zero? */ - return result + size; /* Nope. */ -found_middle: - return result + ffz(tmp); + return _find_next_bit(addr, size, offset, ~0UL); } EXPORT_SYMBOL(find_next_zero_bit); #endif @@ -113,24 +78,14 @@ EXPORT_SYMBOL(find_next_zero_bit); */ unsigned long find_first_bit(const unsigned long *addr, unsigned long size) { - const unsigned long *p = addr; - unsigned long result = 0; - unsigned long tmp; + unsigned long idx; - while (size & ~(BITS_PER_LONG-1)) { - if ((tmp = *(p++))) - goto found; - result += BITS_PER_LONG; - size -= BITS_PER_LONG; + for (idx = 0; idx * BITS_PER_LONG < size; idx++) { + if (addr[idx]) + return min(idx * BITS_PER_LONG + __ffs(addr[idx]), size); } - if (!size) - return result; - tmp = (*p) & (~0UL >> (BITS_PER_LONG - size)); - if (tmp == 0UL) /* Are any bits set? */ - return result + size; /* Nope. */ -found: - return result + __ffs(tmp); + return size; } EXPORT_SYMBOL(find_first_bit); #endif @@ -141,24 +96,14 @@ EXPORT_SYMBOL(find_first_bit); */ unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) { - const unsigned long *p = addr; - unsigned long result = 0; - unsigned long tmp; + unsigned long idx; - while (size & ~(BITS_PER_LONG-1)) { - if (~(tmp = *(p++))) - goto found; - result += BITS_PER_LONG; - size -= BITS_PER_LONG; + for (idx = 0; idx * BITS_PER_LONG < size; idx++) { + if (addr[idx] != ~0UL) + return min(idx * BITS_PER_LONG + ffz(addr[idx]), size); } - if (!size) - return result; - tmp = (*p) | (~0UL << size); - if (tmp == ~0UL) /* Are any bits zero? */ - return result + size; /* Nope. */ -found: - return result + ffz(tmp); + return size; } EXPORT_SYMBOL(find_first_zero_bit); #endif @@ -166,18 +111,6 @@ EXPORT_SYMBOL(find_first_zero_bit); #ifdef __BIG_ENDIAN /* include/linux/byteorder does not support "unsigned long" type */ -static inline unsigned long ext2_swabp(const unsigned long * x) -{ -#if BITS_PER_LONG == 64 - return (unsigned long) __swab64p((u64 *) x); -#elif BITS_PER_LONG == 32 - return (unsigned long) __swab32p((u32 *) x); -#else -#error BITS_PER_LONG not defined -#endif -} - -/* include/linux/byteorder doesn't support "unsigned long" type */ static inline unsigned long ext2_swab(const unsigned long y) { #if BITS_PER_LONG == 64 @@ -189,48 +122,38 @@ static inline unsigned long ext2_swab(const unsigned long y) #endif } -#ifndef find_next_zero_bit_le -unsigned long find_next_zero_bit_le(const void *addr, unsigned - long size, unsigned long offset) +#if !defined(find_next_bit_le) || !defined(find_next_zero_bit_le) +static unsigned long _find_next_bit_le(const unsigned long *addr, + unsigned long nbits, unsigned long start, unsigned long invert) { - const unsigned long *p = addr; - unsigned long result = offset & ~(BITS_PER_LONG - 1); unsigned long tmp; - if (offset >= size) - return size; - p += BITOP_WORD(offset); - size -= result; - offset &= (BITS_PER_LONG - 1UL); - if (offset) { - tmp = ext2_swabp(p++); - tmp |= (~0UL >> (BITS_PER_LONG - offset)); - if (size < BITS_PER_LONG) - goto found_first; - if (~tmp) - goto found_middle; - size -= BITS_PER_LONG; - result += BITS_PER_LONG; - } + if (!nbits || start >= nbits) + return nbits; + + tmp = addr[start / BITS_PER_LONG] ^ invert; + + /* Handle 1st word. */ + tmp &= ext2_swab(HIGH_BITS_MASK(start % BITS_PER_LONG)); + start = round_down(start, BITS_PER_LONG); - while (size & ~(BITS_PER_LONG - 1)) { - if (~(tmp = *(p++))) - goto found_middle_swap; - result += BITS_PER_LONG; - size -= BITS_PER_LONG; + while (!tmp) { + start += BITS_PER_LONG; + if (start >= nbits) + return nbits; + + tmp = addr[start / BITS_PER_LONG] ^ invert; } - if (!size) - return result; - tmp = ext2_swabp(p); -found_first: - tmp |= ~0UL << size; - if (tmp == ~0UL) /* Are any bits zero? */ - return result + size; /* Nope. Skip ffz */ -found_middle: - return result + ffz(tmp); -found_middle_swap: - return result + ffz(ext2_swab(tmp)); + return min(start + __ffs(ext2_swab(tmp)), nbits); +} +#endif + +#ifndef find_next_zero_bit_le +unsigned long find_next_zero_bit_le(const void *addr, unsigned + long size, unsigned long offset) +{ + return _find_next_bit_le(addr, size, offset, ~0UL); } EXPORT_SYMBOL(find_next_zero_bit_le); #endif @@ -239,45 +162,7 @@ EXPORT_SYMBOL(find_next_zero_bit_le); unsigned long find_next_bit_le(const void *addr, unsigned long size, unsigned long offset) { - const unsigned long *p = addr; - unsigned long result = offset & ~(BITS_PER_LONG - 1); - unsigned long tmp; - - if (offset >= size) - return size; - p += BITOP_WORD(offset); - size -= result; - offset &= (BITS_PER_LONG - 1UL); - if (offset) { - tmp = ext2_swabp(p++); - tmp &= (~0UL << offset); - if (size < BITS_PER_LONG) - goto found_first; - if (tmp) - goto found_middle; - size -= BITS_PER_LONG; - result += BITS_PER_LONG; - } - - while (size & ~(BITS_PER_LONG - 1)) { - tmp = *(p++); - if (tmp) - goto found_middle_swap; - result += BITS_PER_LONG; - size -= BITS_PER_LONG; - } - if (!size) - return result; - tmp = ext2_swabp(p); -found_first: - tmp &= (~0UL >> (BITS_PER_LONG - size)); - if (tmp == 0UL) /* Are any bits set? */ - return result + size; /* Nope. */ -found_middle: - return result + __ffs(tmp); - -found_middle_swap: - return result + __ffs(ext2_swab(tmp)); + return _find_next_bit_le(addr, size, offset, 0UL); } EXPORT_SYMBOL(find_next_bit_le); #endif -- 2.1.0 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/