
Cleanup of ARM bitops functions. Introduce the findbits.S which allows for optimized algo.
Signed-off-by: Nishanth Menon x0nishan@ti.com
--- arch/arm/lib/Makefile | 1 arch/arm/lib/findbit.S | 181 +++++++++++++++++++++++++++++++++++++++++++++++ include/asm-arm/bitops.h | 133 +++++++++++++++++++++++----------- 3 files changed, 273 insertions(+), 42 deletions(-)
Index: u-boot-v2.git/arch/arm/lib/Makefile =================================================================== --- u-boot-v2.git.orig/arch/arm/lib/Makefile 2008-06-19 00:52:51.000000000 -0500 +++ u-boot-v2.git/arch/arm/lib/Makefile 2008-06-19 00:54:17.000000000 -0500 @@ -8,6 +8,7 @@ obj-y += _udivsi3.o obj-y += _umodsi3.o obj-y += _lshrdi3.o +obj-y += findbit.o obj-y += arm.o obj-$(CONFIG_MODULES) += module.o extra-$(CONFIG_GENERIC_LINKER_SCRIPT) += u-boot.lds Index: u-boot-v2.git/arch/arm/lib/findbit.S =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ u-boot-v2.git/arch/arm/lib/findbit.S 2008-06-19 00:54:17.000000000 -0500 @@ -0,0 +1,181 @@ +/** + * @file + * @brief common bitops + */ +/* + * Originally from Linux kernel + * arch/arm/lib/findbit.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * 16th March 2001 - John Ripley jripley@sonicblue.com + * Fixed so that "size" is an exclusive not an inclusive quantity. + * All users of these functions expect exclusive sizes, and may + * also call with zero size. + * Reworked by rmk. + */ + .text + +/* + * Purpose : Find a 'zero' bit + * Prototype: int find_first_zero_bit(void *addr, unsigned int maxbit); + */ + .globl _find_first_zero_bit_le; +_find_first_zero_bit_le: + teq r1, #0 + beq 3f + mov r2, #0 +1: ldrb r3, [r0, r2, lsr #3] + eors r3, r3, #0xff @ invert bits + bne .L_found @ any now set - found zero bit + add r2, r2, #8 @ next bit pointer +2: cmp r2, r1 @ any more? + blo 1b +3: mov r0, r1 @ no free bits + mov pc, lr + +/* + * Purpose : Find next 'zero' bit + * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, + * int offset) + */ + .globl _find_next_zero_bit_le; +_find_next_zero_bit_le: + teq r1, #0 + beq 3b + ands ip, r2, #7 + beq 1b @ If new byte, goto old routine + ldrb r3, [r0, r2, lsr #3] + eor r3, r3, #0xff @ now looking for a 1 bit + movs r3, r3, lsr ip @ shift off unused bits + bne .L_found + orr r2, r2, #7 @ if zero, then no bits here + add r2, r2, #1 @ align bit pointer + b 2b @ loop for next bit + +/* + * Purpose : Find a 'one' bit + * Prototype: int find_first_bit(const unsigned long *addr, unsigned int maxbit) + */ + .globl _find_first_bit_le; +_find_first_bit_le: + teq r1, #0 + beq 3f + mov r2, #0 +1: ldrb r3, [r0, r2, lsr #3] + movs r3, r3 + bne .L_found @ any now set - found zero bit + add r2, r2, #8 @ next bit pointer +2: cmp r2, r1 @ any more? + blo 1b +3: mov r0, r1 @ no free bits + mov pc, lr + +/* + * Purpose : Find next 'one' bit + * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, + * int offset) + */ + .globl _find_next_bit_le; +_find_next_bit_le: + teq r1, #0 + beq 3b + ands ip, r2, #7 + beq 1b @ If new byte, goto old routine + ldrb r3, [r0, r2, lsr #3] + movs r3, r3, lsr ip @ shift off unused bits + bne .L_found + orr r2, r2, #7 @ if zero, then no bits here + add r2, r2, #1 @ align bit pointer + b 2b @ loop for next bit + +#ifdef __ARMEB__ + + .globl _find_first_zero_bit_be; +_find_first_zero_bit_be: + teq r1, #0 + beq 3f + mov r2, #0 +1: eor r3, r2, #0x18 @ big endian byte ordering + ldrb r3, [r0, r3, lsr #3] + eors r3, r3, #0xff @ invert bits + bne .L_found @ any now set - found zero bit + add r2, r2, #8 @ next bit pointer +2: cmp r2, r1 @ any more? + blo 1b +3: mov r0, r1 @ no free bits + mov pc, lr + + .globl _find_next_zero_bit_be; +_find_next_zero_bit_be: + teq r1, #0 + beq 3b + ands ip, r2, #7 + beq 1b @ If new byte, goto old routine + eor r3, r2, #0x18 @ big endian byte ordering + ldrb r3, [r0, r3, lsr #3] + eor r3, r3, #0xff @ now looking for a 1 bit + movs r3, r3, lsr ip @ shift off unused bits + bne .L_found + orr r2, r2, #7 @ if zero, then no bits here + add r2, r2, #1 @ align bit pointer + b 2b @ loop for next bit + + .globl _find_first_bit_be; +_find_first_bit_be: + teq r1, #0 + beq 3f + mov r2, #0 +1: eor r3, r2, #0x18 @ big endian byte ordering + ldrb r3, [r0, r3, lsr #3] + movs r3, r3 + bne .L_found @ any now set - found zero bit + add r2, r2, #8 @ next bit pointer +2: cmp r2, r1 @ any more? + blo 1b +3: mov r0, r1 @ no free bits + mov pc, lr + + .globl _find_next_bit_be; +_find_next_bit_be: + teq r1, #0 + beq 3b + ands ip, r2, #7 + beq 1b @ If new byte, goto old routine + eor r3, r2, #0x18 @ big endian byte ordering + ldrb r3, [r0, r3, lsr #3] + movs r3, r3, lsr ip @ shift off unused bits + bne .L_found + orr r2, r2, #7 @ if zero, then no bits here + add r2, r2, #1 @ align bit pointer + b 2b @ loop for next bit + +#endif + +/* + * One or more bits in the LSB of r3 are assumed to be set. + */ +.L_found: +#if __LINUX_ARM_ARCH__ >= 5 + rsb r1, r3, #0 + and r3, r3, r1 + clz r3, r3 + rsb r3, r3, #31 + add r0, r2, r3 +#else + tst r3, #0x0f + addeq r2, r2, #4 + movne r3, r3, lsl #4 + tst r3, #0x30 + addeq r2, r2, #2 + movne r3, r3, lsl #2 + tst r3, #0x40 + addeq r2, r2, #1 + mov r0, r2 +#endif + mov pc, lr + Index: u-boot-v2.git/include/asm-arm/bitops.h =================================================================== --- u-boot-v2.git.orig/include/asm-arm/bitops.h 2008-06-19 00:52:51.000000000 -0500 +++ u-boot-v2.git/include/asm-arm/bitops.h 2008-06-19 00:54:30.000000000 -0500 @@ -15,32 +15,28 @@ #ifndef __ASM_ARM_BITOPS_H #define __ASM_ARM_BITOPS_H
+#ifndef _LINUX_BITOPS_H +#error only <linux/bitops.h> can be included directly +#endif + /* - * Function prototypes to keep gcc -Wall happy. + * Functions equivalent of ops.h */ -extern void set_bit(int nr, volatile void * addr); - static inline void __set_bit(int nr, volatile void *addr) { ((unsigned char *) addr)[nr >> 3] |= (1U << (nr & 7)); }
-extern void clear_bit(int nr, volatile void * addr); - static inline void __clear_bit(int nr, volatile void *addr) { ((unsigned char *) addr)[nr >> 3] &= ~(1U << (nr & 7)); }
-extern void change_bit(int nr, volatile void * addr); - static inline void __change_bit(int nr, volatile void *addr) { ((unsigned char *) addr)[nr >> 3] ^= (1U << (nr & 7)); }
-extern int test_and_set_bit(int nr, volatile void * addr); - static inline int __test_and_set_bit(int nr, volatile void *addr) { unsigned int mask = 1 << (nr & 7); @@ -51,8 +47,6 @@ return oldval & mask; }
-extern int test_and_clear_bit(int nr, volatile void * addr); - static inline int __test_and_clear_bit(int nr, volatile void *addr) { unsigned int mask = 1 << (nr & 7); @@ -63,8 +57,6 @@ return oldval & mask; }
-extern int test_and_change_bit(int nr, volatile void * addr); - static inline int __test_and_change_bit(int nr, volatile void *addr) { unsigned int mask = 1 << (nr & 7); @@ -75,9 +67,6 @@ return oldval & mask; }
-extern int find_first_zero_bit(void * addr, unsigned size); -extern int find_next_zero_bit(void * addr, int size, int offset); - /* * This routine doesn't need to be atomic. */ @@ -86,6 +75,48 @@ return ((unsigned char *) addr)[nr >> 3] & (1U << (nr & 7)); }
+#define set_bit(x, y) __set_bit(x, y) +#define clear_bit(x, y) __clear_bit(x, y) +#define change_bit(x, y) __change_bit(x, y) +#define test_and_set_bit(x, y) __test_and_set_bit(x, y) +#define test_and_clear_bit(x, y) __test_and_clear_bit(x, y) +#define test_and_change_bit(x, y) __test_and_change_bit(x, y) + +#ifndef __ARMEB__ +/* + * These are the little endian definitions. + */ +extern int _find_first_zero_bit_le(const void *p, unsigned size); +extern int _find_next_zero_bit_le(const void *p, int size, int offset); +extern int _find_first_bit_le(const unsigned long *p, unsigned size); +extern int _find_next_bit_le(const unsigned long *p, int size, int offset); +#define find_first_zero_bit(p, sz) _find_first_zero_bit_le(p, sz) +#define find_next_zero_bit(p, sz, off) _find_next_zero_bit_le(p, sz, off) +#define find_first_bit(p, sz) _find_first_bit_le(p, sz) +#define find_next_bit(p, sz, off) _find_next_bit_le(p, sz, off) + +#define WORD_BITOFF_TO_LE(x) ((x)) + +#else /* ! __ARMEB__ */ + +/* + * These are the big endian definitions. + */ +extern int _find_first_zero_bit_be(const void *p, unsigned size); +extern int _find_next_zero_bit_be(const void *p, int size, int offset); +extern int _find_first_bit_be(const unsigned long *p, unsigned size); +extern int _find_next_bit_be(const unsigned long *p, int size, int offset); +#define find_first_zero_bit(p, sz) _find_first_zero_bit_be(p, sz) +#define find_next_zero_bit(p, sz, off) _find_next_zero_bit_be(p, sz, off) +#define find_first_bit(p, sz) _find_first_bit_be(p, sz) +#define find_next_bit(p, sz, off) _find_next_bit_be(p, sz, off) + +#define WORD_BITOFF_TO_LE(x) ((x) ^ 0x18) + +#endif /* __ARMEB__ */ + +#if defined(__LINUX_ARM_ARCH__) && (__LINUX_ARM_ARCH__ < 5) + /* * ffz = Find First Zero in word. Undefined if no zero exists, * so code should check against ~0UL first.. @@ -103,35 +134,53 @@ if (word & 0x40000000) { k -= 1; } return k; } +#include <asm-generic/bitops/__ffs.h> +#include <asm-generic/bitops/ffs.h> +#include <asm-generic/bitops/fls.h> +#else /* ! __ARM__USE_GENERIC_FF */ + +static inline int constant_fls(int x) +{ + int r = 32; + + if (!x) + return 0; + if (!(x & 0xffff0000u)) { + x <<= 16; + r -= 16; + } + if (!(x & 0xff000000u)) { + x <<= 8; + r -= 8; + } + if (!(x & 0xf0000000u)) { + x <<= 4; + r -= 4; + } + if (!(x & 0xc0000000u)) { + x <<= 2; + r -= 2; + } + if (!(x & 0x80000000u)) { + x <<= 1; + r -= 1; + } + return r; +}
/* - * ffs: find first bit set. This is defined the same way as - * the libc and compiler builtin ffs routines, therefore - * differs in spirit from the above ffz (man ffs). - */ - -#define ffs(x) generic_ffs(x) - -/* - * hweightN: returns the hamming weight (i.e. the number - * of bits set) of a N-bit word + * On ARMv5 and above those functions can be implemented around + * the clz instruction for much better code efficiency. */ +#define fls(x) \ + (__builtin_constant_p(x) ? constant_fls(x) : \ + ({ int __r; asm("clz\t%0, %1" : "=r"(__r) : "r"(x) : "cc"); 32-__r; })) +#define ffs(x) ({ unsigned long __t = (x); fls(__t &-__t); }) +#define __ffs(x) (ffs(x) - 1) +#define ffz(x) __ffs(~(x)) +#endif /* __ARM__USE_GENERIC_FF */ +#include <asm-generic/bitops/fls64.h>
-#define hweight32(x) generic_hweight32(x) -#define hweight16(x) generic_hweight16(x) -#define hweight8(x) generic_hweight8(x) - -#define ext2_set_bit test_and_set_bit -#define ext2_clear_bit test_and_clear_bit -#define ext2_test_bit test_bit -#define ext2_find_first_zero_bit find_first_zero_bit -#define ext2_find_next_zero_bit find_next_zero_bit - -/* Bitmap functions for the minix filesystem. */ -#define minix_test_and_set_bit(nr,addr) test_and_set_bit(nr,addr) -#define minix_set_bit(nr,addr) set_bit(nr,addr) -#define minix_test_and_clear_bit(nr,addr) test_and_clear_bit(nr,addr) -#define minix_test_bit(nr,addr) test_bit(nr,addr) -#define minix_find_first_zero_bit(addr,size) find_first_zero_bit(addr,size) +#include <asm-generic/bitops/hweight.h>
#endif /* _ARM_BITOPS_H */