Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1759916AbbBIIcQ (ORCPT ); Mon, 9 Feb 2015 03:32:16 -0500 Received: from ns.horizon.com ([71.41.210.147]:29771 "HELO ns.horizon.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with SMTP id S1759525AbbBIIcN (ORCPT ); Mon, 9 Feb 2015 03:32:13 -0500 Date: 9 Feb 2015 03:32:11 -0500 Message-ID: <20150209083211.11953.qmail@ns.horizon.com> From: "George Spelvin" To: akpm@linux-foundation.org, chris@chris-wilson.co.uk, davem@davemloft.net, dborkman@redhat.com, hannes@stressinduktion.org, klimov.linux@gmail.com, laijs@cn.fujitsu.com, linux@horizon.com, msalter@redhat.com, takahiro.akashi@linaro.org, tgraf@suug.ch, valentinrothberg@gmail.com, yury.norov@gmail.com Subject: Re: [PATCH v3 1/3] lib: find_*_bit reimplementation Cc: linux-kernel@vger.kernel.org In-Reply-To: <1423404619-10653-2-git-send-email-yury.norov@gmail.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 6699 Lines: 222 Two more comments on the code. Two minor, but one that seems like a bug, so for now, it's Nacked-by: George Spelvin Specifically, it seems like find_last_bit used to ignore trailing garbage in the bitmap, but now will stop searching if the last word contains some set bits not within size. The minor one is that I don't think the first-word masking needs to be conditional. The general code works fine if the start is aligned (HIGH_BITS_MASK just generates an all-ones mask), is quite quick, and saves a test & conditional branch. A second minor one is that the comment in include/linux/bitops.h has an obvious typo, and should probably be fixed, too. Here's a diff on top of yours with all my suggested changes, both these two and the ones from my previous e-mail. diff --git a/find_last_bit.c b/find_last_bit.c index e67e970..106050f 100644 --- a/find_last_bit.c +++ b/find_last_bit.c @@ -13,6 +13,7 @@ * 2 of the License, or (at your option) any later version. */ +#include #include #include diff --git a/find_next_bit.c b/find_next_bit.c index 71aa497..9d01d4a 100644 --- a/find_next_bit.c +++ b/find_next_bit.c @@ -16,41 +16,34 @@ #include #include -#define HIGH_BITS_MASK(nr) (ULONG_MAX << (nr)) +#define HIGH_BITS_MASK(nr) (~0UL << (nr)) #if !defined(find_next_bit) || !defined(find_next_zero_bit) + +/* + * This is a common helper function for find_next_bit and + * find_next_zero_bit. The difference is the "invert" argument, which + * is XORed with each fetched word before searching it for one bits. + */ static unsigned long _find_next_bit(const unsigned long *addr, - unsigned long nbits, unsigned long start, unsigned long mask) + unsigned long nbits, unsigned long start, unsigned long invert) { unsigned long tmp; if (start >= nbits) return nbits; - tmp = addr[start / BITS_PER_LONG] ^ mask; + tmp = addr[start / BITS_PER_LONG] ^ invert; /* Handle 1st word. */ - if (!IS_ALIGNED(start, BITS_PER_LONG)) { - tmp &= HIGH_BITS_MASK(start % BITS_PER_LONG); - start = round_down(start, BITS_PER_LONG); - } + tmp &= HIGH_BITS_MASK(start % BITS_PER_LONG); + start = round_down(start, BITS_PER_LONG); while (!tmp) { start += BITS_PER_LONG; if (start >= nbits) return nbits; - - /* - * This is an equvalent for: - * - * tmp = find_set ? addr[start / BITS_PER_LONG] - * : ~addr[start / BITS_PER_LONG]; - * - * but saves a branch condition. - * - * Thanks George Spelvin for idea. - */ - tmp = addr[start / BITS_PER_LONG] ^ mask; + tmp = addr[start / BITS_PER_LONG] ^ invert; } return min(start + __ffs(tmp), nbits); @@ -105,7 +98,7 @@ unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) unsigned long idx; for (idx = 0; idx * BITS_PER_LONG < size; idx++) { - if (addr[idx] != ULONG_MAX) + if (~addr[idx]) return min(idx * BITS_PER_LONG + ffz(addr[idx]), size); } @@ -130,14 +123,14 @@ static inline unsigned long ext2_swab(const unsigned long y) #if !defined(find_next_bit_le) || !defined(find_next_zero_bit_le) static unsigned long _find_next_bit_le(const unsigned long *addr, - unsigned long nbits, unsigned long start, unsigned long mask) + unsigned long nbits, unsigned long start, unsigned long invert) { unsigned long tmp; if (start >= nbits) return nbits; - tmp = addr[start / BITS_PER_LONG] ^ mask; + tmp = addr[start / BITS_PER_LONG] ^ invert; /* Handle 1st word. */ if (!IS_ALIGNED(start, BITS_PER_LONG)) { @@ -150,7 +143,7 @@ static unsigned long _find_next_bit_le(const unsigned long *addr, if (start >= nbits) return nbits; - tmp = addr[start / BITS_PER_LONG] ^ mask; + tmp = addr[start / BITS_PER_LONG] ^ invert; } return min(start + __ffs(ext2_swab(tmp)), nbits); diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 5d858e02..4a5e5934 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -218,9 +218,9 @@ static inline unsigned long __ffs64(u64 word) /** * find_last_bit - find the last set bit in a memory region * @addr: The address to start the search at - * @size: The maximum size to search + * @size: The number of bits to search * - * Returns the bit number of the first set bit, or size. + * Returns the bit number of the last set bit, or size if no bits are set. */ extern unsigned long find_last_bit(const unsigned long *addr, unsigned long size); Now for the serious issue. I see a function change, fo find_last_bit with no justifying comments. diff --git a/lib/find_last_bit.c b/lib/find_last_bit.c index 91ca09f..106050f 100644 --- a/lib/find_last_bit.c +++ b/lib/find_last_bit.c @@ -19,29 +21,13 @@ unsigned long find_last_bit(const unsigned long *addr, unsigned long size) { - unsigned long words; - unsigned long tmp; - - /* Start at final word. */ - words = size / BITS_PER_LONG; - - /* Partial final word? */ - if (size & (BITS_PER_LONG-1)) { - tmp = (addr[words] & (~0UL >> (BITS_PER_LONG - - (size & (BITS_PER_LONG-1))))); - if (tmp) - goto found; - } + unsigned long idx = DIV_ROUND_UP(size, BITS_PER_LONG); - while (words) { - tmp = addr[--words]; - if (tmp) { -found: - return words * BITS_PER_LONG + __fls(tmp); - } + while (idx--) { + if (addr[idx]) + return min(idx * BITS_PER_LONG + __fls(addr[idx]), size); } - /* Not found */ return size; } EXPORT_SYMBOL(find_last_bit); Previously, the last word was masked, so bits beyond "size" were ignored. With the revised code, something like find_last_bit(array, 96) will return 96 if array[1] >> 32 is non-zero, even if array[1] & 0xffffffff is zero. Looking through the callers, I haven't found a case where this matters yet so perhaps it's a safe optimization, but this really needs to be more clearly documented if intentional. If no change was desired, I'd think a good way to do this would be: unsigned long find_last_bit(const unsigned long *addr, unsigned long size) { size_t idx = DIV_ROUND_UP(size, BITS_PER_LONG); unsigned long tmp = addr[--idx]; tmp &= (2UL << (size % BITS_PER_LONG)) - 1; /* Mask last word */ while (!tmp) { if (!idx) return size; tmp = addr[--idx]; } return idx * BITS_PER_LONG + __fls(tmp); } Which is not that different from the old code, but cleaned up. (Feel free to add my Signed-off-by if you think you need it.) -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/