[U-Boot] [PATCH 01/11] arm: config: Introduce CONFIG_SYS_ARM_ARCH

Introduce new helper Kconfig option, which is automatically set to the version of ARM architecture for which the U-Boot is built. This is useful when selecting tuning options in the libgcc imported from Linux kernel.
Signed-off-by: Marek Vasut marex@denx.de Cc: Albert Aribaud albert.u.boot@aribaud.net Cc: Masahiro Yamada yamada.masahiro@socionext.com Cc: Simon Glass sjg@chromium.org Cc: Tom Rini trini@konsulko.com --- arch/arm/Kconfig | 14 ++++++++++++++ 1 file changed, 14 insertions(+)
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 9851065..abbe33e 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -64,6 +64,20 @@ config SYS_CPU default "sa1100" if CPU_SA1100 default "armv8" if ARM64
+config SYS_ARM_ARCH + int + default 4 if CPU_ARM720T + default 4 if CPU_ARM920T + default 5 if CPU_ARM926EJS + default 5 if CPU_ARM946ES + default 6 if CPU_ARM1136 + default 6 if CPU_ARM1176 + default 7 if CPU_V7 + default 7 if CPU_V7M + default 5 if CPU_PXA + default 4 if CPU_SA1100 + default 8 if ARM64 + config SEMIHOSTING bool "support boot from semihosting" help

Import unified.h from Linux kernel 4.4.6 , commit 0d1912303e54ed1b2a371be0bba51c384dd57326 . This header file contains macros used in libgcc functions in Linux kernel on ARM and will be needed for the libgcc sync.
Since unified.h defines the W(instr) macro, we must drop this from the macro from memcpy.S , otherwise this triggers a warning about symbol redefinition. In order to keep the changes to unified.h to the minimum, tweak arch/arm/lib/Makefile such that it defines the CONFIG_ARM_ASM_UNIFIED macro, which places .syntax unified into all of the assembler files. This is mandatory.
Moreover, for Thumb2 build, define CONFIG_THUMB2_KERNEL macro if and only if Thumb2 build is enabled. This macro is checked by unified.h and toggles between ARM and Thumb2 variant of the instructions in the assembler source files.
Finally, this patch defines __LINUX_ARM_ARCH__=N macro based on the new CONFIG_SYS_ARM_ARCH Kconfig option. This macro selects between more optimal and more dense codepaths which work on armv5 and newer and less optimal codepaths which work on armv4 and possible armv3m. Tegra2 needs the same special handling as it does in arch/arm/Makefile to cater for the arm720t boot core.
Signed-off-by: Marek Vasut marex@denx.de Cc: Albert Aribaud albert.u.boot@aribaud.net Cc: Masahiro Yamada yamada.masahiro@socionext.com Cc: Simon Glass sjg@chromium.org Cc: Tom Rini trini@konsulko.com --- arch/arm/include/asm/assembler.h | 1 + arch/arm/include/asm/unified.h | 140 +++++++++++++++++++++++++++++++++++++++ arch/arm/lib/Makefile | 9 ++- arch/arm/lib/memcpy.S | 6 -- 4 files changed, 149 insertions(+), 7 deletions(-) create mode 100644 arch/arm/include/asm/unified.h
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 11b80fb..ae1e42f 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -15,6 +15,7 @@ */
#include <config.h> +#include <asm/unified.h>
/* * Endian independent macros for shifting bytes within registers. diff --git a/arch/arm/include/asm/unified.h b/arch/arm/include/asm/unified.h new file mode 100644 index 0000000..a91ae49 --- /dev/null +++ b/arch/arm/include/asm/unified.h @@ -0,0 +1,140 @@ +/* + * include/asm-arm/unified.h - Unified Assembler Syntax helper macros + * + * Copyright (C) 2008 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __ASM_UNIFIED_H +#define __ASM_UNIFIED_H + +#if defined(__ASSEMBLY__) && defined(CONFIG_ARM_ASM_UNIFIED) + .syntax unified +#endif + +#ifdef CONFIG_CPU_V7M +#define AR_CLASS(x...) +#define M_CLASS(x...) x +#else +#define AR_CLASS(x...) x +#define M_CLASS(x...) +#endif + +#ifdef CONFIG_THUMB2_KERNEL + +#if __GNUC__ < 4 +#error Thumb-2 kernel requires gcc >= 4 +#endif + +/* The CPSR bit describing the instruction set (Thumb) */ +#define PSR_ISETSTATE PSR_T_BIT + +#define ARM(x...) +#define THUMB(x...) x +#ifdef __ASSEMBLY__ +#define W(instr) instr.w +#else +#define WASM(instr) #instr ".w" +#endif + +#else /* !CONFIG_THUMB2_KERNEL */ + +/* The CPSR bit describing the instruction set (ARM) */ +#define PSR_ISETSTATE 0 + +#define ARM(x...) x +#define THUMB(x...) +#ifdef __ASSEMBLY__ +#define W(instr) instr +#else +#define WASM(instr) #instr +#endif + +#endif /* CONFIG_THUMB2_KERNEL */ + +#ifndef CONFIG_ARM_ASM_UNIFIED + +/* + * If the unified assembly syntax isn't used (in ARM mode), these + * macros expand to an empty string + */ +#ifdef __ASSEMBLY__ + .macro it, cond + .endm + .macro itt, cond + .endm + .macro ite, cond + .endm + .macro ittt, cond + .endm + .macro itte, cond + .endm + .macro itet, cond + .endm + .macro itee, cond + .endm + .macro itttt, cond + .endm + .macro ittte, cond + .endm + .macro ittet, cond + .endm + .macro ittee, cond + .endm + .macro itett, cond + .endm + .macro itete, cond + .endm + .macro iteet, cond + .endm + .macro iteee, cond + .endm +#else /* !__ASSEMBLY__ */ +__asm__( +" .macro it, cond\n" +" .endm\n" +" .macro itt, cond\n" +" .endm\n" +" .macro ite, cond\n" +" .endm\n" +" .macro ittt, cond\n" +" .endm\n" +" .macro itte, cond\n" +" .endm\n" +" .macro itet, cond\n" +" .endm\n" +" .macro itee, cond\n" +" .endm\n" +" .macro itttt, cond\n" +" .endm\n" +" .macro ittte, cond\n" +" .endm\n" +" .macro ittet, cond\n" +" .endm\n" +" .macro ittee, cond\n" +" .endm\n" +" .macro itett, cond\n" +" .endm\n" +" .macro itete, cond\n" +" .endm\n" +" .macro iteet, cond\n" +" .endm\n" +" .macro iteee, cond\n" +" .endm\n"); +#endif /* __ASSEMBLY__ */ + +#endif /* CONFIG_ARM_ASM_UNIFIED */ + +#endif /* !__ASM_UNIFIED_H */ diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index 7a0fb58..1112b9e 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -62,9 +62,17 @@ ifneq (,$(findstring -mabi=aapcs-linux,$(PLATFORM_CPPFLAGS))) extra-y += eabi_compat.o endif
+asflags-y += -DCONFIG_ARM_ASM_UNIFIED +ifeq ($(CONFIG_SPL_BUILD)$(CONFIG_TEGRA),yy) +asflags-y += -D__LINUX_ARM_ARCH__=4 +else +asflags-y += -D__LINUX_ARM_ARCH__=$(CONFIG_SYS_ARM_ARCH) +endif + # some files can only build in ARM or THUMB2, not THUMB1
ifdef CONFIG_SYS_THUMB_BUILD +asflags-$(CONFIG_HAS_THUMB2) += -DCONFIG_THUMB2_KERNEL ifndef CONFIG_HAS_THUMB2
# for C files, just apend -marm, which will override previous -mthumb* @@ -82,6 +90,5 @@ AFLAGS_REMOVE_memset.o := -mthumb -mthumb-interwork AFLAGS_REMOVE_memcpy.o := -mthumb -mthumb-interwork AFLAGS_memset.o := -DMEMSET_NO_THUMB_BUILD AFLAGS_memcpy.o := -DMEMCPY_NO_THUMB_BUILD - endif endif diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S index 7d9fc0f..00602e9 100644 --- a/arch/arm/lib/memcpy.S +++ b/arch/arm/lib/memcpy.S @@ -13,12 +13,6 @@ #include <linux/linkage.h> #include <asm/assembler.h>
-#if defined(CONFIG_SYS_THUMB_BUILD) && !defined(MEMCPY_NO_THUMB_BUILD) -#define W(instr) instr.w -#else -#define W(instr) instr -#endif - #define LDR1W_SHIFT 0 #define STR1W_SHIFT 0

Drop the underscore from the filenames of files implementing libgcc routines. There is no functional change. This change is done to make sync with Linux kernel easier.
Signed-off-by: Marek Vasut marex@denx.de Cc: Albert Aribaud albert.u.boot@aribaud.net Cc: Masahiro Yamada yamada.masahiro@socionext.com Cc: Simon Glass sjg@chromium.org Cc: Tom Rini trini@konsulko.com --- arch/arm/lib/Makefile | 6 +++--- arch/arm/lib/{_ashldi3.S => ashldi3.S} | 0 arch/arm/lib/{_ashrdi3.S => ashrdi3.S} | 0 arch/arm/lib/{_divsi3.S => divsi3.S} | 0 arch/arm/lib/{_lshrdi3.S => lshrdi3.S} | 0 arch/arm/lib/{_modsi3.S => modsi3.S} | 0 arch/arm/lib/{_udivsi3.S => udivsi3.S} | 0 arch/arm/lib/{_uldivmod.S => uldivmod.S} | 0 arch/arm/lib/{_umodsi3.S => umodsi3.S} | 0 9 files changed, 3 insertions(+), 3 deletions(-) rename arch/arm/lib/{_ashldi3.S => ashldi3.S} (100%) rename arch/arm/lib/{_ashrdi3.S => ashrdi3.S} (100%) rename arch/arm/lib/{_divsi3.S => divsi3.S} (100%) rename arch/arm/lib/{_lshrdi3.S => lshrdi3.S} (100%) rename arch/arm/lib/{_modsi3.S => modsi3.S} (100%) rename arch/arm/lib/{_udivsi3.S => udivsi3.S} (100%) rename arch/arm/lib/{_uldivmod.S => uldivmod.S} (100%) rename arch/arm/lib/{_umodsi3.S => umodsi3.S} (100%)
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index 1112b9e..cafd94d 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -5,9 +5,9 @@ # SPDX-License-Identifier: GPL-2.0+ #
-lib-$(CONFIG_USE_PRIVATE_LIBGCC) += _ashldi3.o _ashrdi3.o _divsi3.o \ - _lshrdi3.o _modsi3.o _udivsi3.o _umodsi3.o div0.o \ - _uldivmod.o +lib-$(CONFIG_USE_PRIVATE_LIBGCC) += ashldi3.o ashrdi3.o divsi3.o \ + lshrdi3.o modsi3.o udivsi3.o umodsi3.o div0.o \ + uldivmod.o
ifdef CONFIG_CPU_V7M obj-y += vectors_m.o crt0.o diff --git a/arch/arm/lib/_ashldi3.S b/arch/arm/lib/ashldi3.S similarity index 100% rename from arch/arm/lib/_ashldi3.S rename to arch/arm/lib/ashldi3.S diff --git a/arch/arm/lib/_ashrdi3.S b/arch/arm/lib/ashrdi3.S similarity index 100% rename from arch/arm/lib/_ashrdi3.S rename to arch/arm/lib/ashrdi3.S diff --git a/arch/arm/lib/_divsi3.S b/arch/arm/lib/divsi3.S similarity index 100% rename from arch/arm/lib/_divsi3.S rename to arch/arm/lib/divsi3.S diff --git a/arch/arm/lib/_lshrdi3.S b/arch/arm/lib/lshrdi3.S similarity index 100% rename from arch/arm/lib/_lshrdi3.S rename to arch/arm/lib/lshrdi3.S diff --git a/arch/arm/lib/_modsi3.S b/arch/arm/lib/modsi3.S similarity index 100% rename from arch/arm/lib/_modsi3.S rename to arch/arm/lib/modsi3.S diff --git a/arch/arm/lib/_udivsi3.S b/arch/arm/lib/udivsi3.S similarity index 100% rename from arch/arm/lib/_udivsi3.S rename to arch/arm/lib/udivsi3.S diff --git a/arch/arm/lib/_uldivmod.S b/arch/arm/lib/uldivmod.S similarity index 100% rename from arch/arm/lib/_uldivmod.S rename to arch/arm/lib/uldivmod.S diff --git a/arch/arm/lib/_umodsi3.S b/arch/arm/lib/umodsi3.S similarity index 100% rename from arch/arm/lib/_umodsi3.S rename to arch/arm/lib/umodsi3.S

Sync the libgcc shift operations with Linux kernel 4.4.6 , commit 0d1912303e54ed1b2a371be0bba51c384dd57326 . Syncing these three files is easy, as there is almost no change in them, except the addition of Thumb support.
This patch also defines CONFIG_THUMB2_KERNEL and CONFIG_ARM_ASM_UNIFIED which is necessary for correct build of these files both in ARM and Thumb mode, just like Linux does.
Signed-off-by: Marek Vasut marex@denx.de Cc: Albert Aribaud albert.u.boot@aribaud.net Cc: Masahiro Yamada yamada.masahiro@socionext.com Cc: Simon Glass sjg@chromium.org Cc: Tom Rini trini@konsulko.com --- arch/arm/lib/ashldi3.S | 12 ++++++++---- arch/arm/lib/ashrdi3.S | 12 ++++++++---- arch/arm/lib/lshrdi3.S | 12 ++++++++---- 3 files changed, 24 insertions(+), 12 deletions(-)
diff --git a/arch/arm/lib/ashldi3.S b/arch/arm/lib/ashldi3.S index 9c34c21..a4f5ebb 100644 --- a/arch/arm/lib/ashldi3.S +++ b/arch/arm/lib/ashldi3.S @@ -5,6 +5,7 @@ */
#include <linux/linkage.h> +#include <asm/assembler.h>
#ifdef __ARMEB__ #define al r1 @@ -14,15 +15,18 @@ #define ah r1 #endif
-.globl __ashldi3 -__ashldi3: +ENTRY(__ashldi3) ENTRY(__aeabi_llsl)
subs r3, r2, #32 rsb ip, r2, #32 movmi ah, ah, lsl r2 movpl ah, al, lsl r3 - orrmi ah, ah, al, lsr ip + ARM( orrmi ah, ah, al, lsr ip ) + THUMB( lsrmi r3, al, ip ) + THUMB( orrmi ah, ah, r3 ) mov al, al, lsl r2 - mov pc, lr + ret lr + +ENDPROC(__ashldi3) ENDPROC(__aeabi_llsl) diff --git a/arch/arm/lib/ashrdi3.S b/arch/arm/lib/ashrdi3.S index c74fd64..c6e1ed3 100644 --- a/arch/arm/lib/ashrdi3.S +++ b/arch/arm/lib/ashrdi3.S @@ -5,6 +5,7 @@ */
#include <linux/linkage.h> +#include <asm/assembler.h>
#ifdef __ARMEB__ #define al r1 @@ -14,15 +15,18 @@ #define ah r1 #endif
-.globl __ashrdi3 -__ashrdi3: +ENTRY(__ashrdi3) ENTRY(__aeabi_lasr)
subs r3, r2, #32 rsb ip, r2, #32 movmi al, al, lsr r2 movpl al, ah, asr r3 - orrmi al, al, ah, lsl ip + ARM( orrmi al, al, ah, lsl ip ) + THUMB( lslmi r3, ah, ip ) + THUMB( orrmi al, al, r3 ) mov ah, ah, asr r2 - mov pc, lr + ret lr + +ENDPROC(__ashrdi3) ENDPROC(__aeabi_lasr) diff --git a/arch/arm/lib/lshrdi3.S b/arch/arm/lib/lshrdi3.S index 1f9b916..9c51141 100644 --- a/arch/arm/lib/lshrdi3.S +++ b/arch/arm/lib/lshrdi3.S @@ -5,6 +5,7 @@ */
#include <linux/linkage.h> +#include <asm/assembler.h>
#ifdef __ARMEB__ #define al r1 @@ -14,15 +15,18 @@ #define ah r1 #endif
-.globl __lshrdi3 -__lshrdi3: +ENTRY(__lshrdi3) ENTRY(__aeabi_llsr)
subs r3, r2, #32 rsb ip, r2, #32 movmi al, al, lsr r2 movpl al, ah, lsr r3 - orrmi al, al, ah, lsl ip + ARM( orrmi al, al, ah, lsl ip ) + THUMB( lslmi r3, ah, ip ) + THUMB( orrmi al, al, r3 ) mov ah, ah, lsr r2 - mov pc, lr + ret lr + +ENDPROC(__lshrdi3) ENDPROC(__aeabi_llsr)

Sync the libgcc 32bit division and modulo operations with Linux 4.4.6 , commit 0d1912303e54ed1b2a371be0bba51c384dd57326 . The functions in these four files are present in lib1funcs.S in Linux, so replace these files with lib1funcs.S from Linux.
Since we do not support stack unwinding, instead of importing the whole asm/unwind.h and all the baggage, this patch defines empty UNWIND() macro in lib1funcs.S . Moreover, to make all of the functions available, define CONFIG_AEABI , which is safe, because U-Boot is always compiled with ARM EABI.
This patch also defines CONFIG_THUMB2_KERNEL and CONFIG_ARM_ASM_UNIFIED which is necessary for correct build of these files both in ARM and Thumb mode, just like Linux does.
Signed-off-by: Marek Vasut marex@denx.de Cc: Albert Aribaud albert.u.boot@aribaud.net Cc: Masahiro Yamada yamada.masahiro@socionext.com Cc: Simon Glass sjg@chromium.org Cc: Tom Rini trini@konsulko.com --- arch/arm/lib/Makefile | 5 +- arch/arm/lib/divsi3.S | 143 ------------------- arch/arm/lib/lib1funcs.S | 351 +++++++++++++++++++++++++++++++++++++++++++++++ arch/arm/lib/modsi3.S | 99 ------------- arch/arm/lib/udivsi3.S | 95 ------------- arch/arm/lib/umodsi3.S | 90 ------------ 6 files changed, 353 insertions(+), 430 deletions(-) delete mode 100644 arch/arm/lib/divsi3.S create mode 100644 arch/arm/lib/lib1funcs.S delete mode 100644 arch/arm/lib/modsi3.S delete mode 100644 arch/arm/lib/udivsi3.S delete mode 100644 arch/arm/lib/umodsi3.S
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index cafd94d..0b0dbcb 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -5,9 +5,8 @@ # SPDX-License-Identifier: GPL-2.0+ #
-lib-$(CONFIG_USE_PRIVATE_LIBGCC) += ashldi3.o ashrdi3.o divsi3.o \ - lshrdi3.o modsi3.o udivsi3.o umodsi3.o div0.o \ - uldivmod.o +lib-$(CONFIG_USE_PRIVATE_LIBGCC) += ashldi3.o ashrdi3.o lshrdi3.o \ + lib1funcs.o uldivmod.o div0.o
ifdef CONFIG_CPU_V7M obj-y += vectors_m.o crt0.o diff --git a/arch/arm/lib/divsi3.S b/arch/arm/lib/divsi3.S deleted file mode 100644 index c463c68..0000000 --- a/arch/arm/lib/divsi3.S +++ /dev/null @@ -1,143 +0,0 @@ -#include <linux/linkage.h> - -.macro ARM_DIV_BODY dividend, divisor, result, curbit - -#if __LINUX_ARM_ARCH__ >= 5 - - clz \curbit, \divisor - clz \result, \dividend - sub \result, \curbit, \result - mov \curbit, #1 - mov \divisor, \divisor, lsl \result - mov \curbit, \curbit, lsl \result - mov \result, #0 - -#else - - @ Initially shift the divisor left 3 bits if possible, - @ set curbit accordingly. This allows for curbit to be located - @ at the left end of each 4 bit nibbles in the division loop - @ to save one loop in most cases. - tst \divisor, #0xe0000000 - moveq \divisor, \divisor, lsl #3 - moveq \curbit, #8 - movne \curbit, #1 - - @ Unless the divisor is very big, shift it up in multiples of - @ four bits, since this is the amount of unwinding in the main - @ division loop. Continue shifting until the divisor is - @ larger than the dividend. -1: cmp \divisor, #0x10000000 - cmplo \divisor, \dividend - movlo \divisor, \divisor, lsl #4 - movlo \curbit, \curbit, lsl #4 - blo 1b - - @ For very big divisors, we must shift it a bit at a time, or - @ we will be in danger of overflowing. -1: cmp \divisor, #0x80000000 - cmplo \divisor, \dividend - movlo \divisor, \divisor, lsl #1 - movlo \curbit, \curbit, lsl #1 - blo 1b - - mov \result, #0 - -#endif - - @ Division loop -1: cmp \dividend, \divisor - subhs \dividend, \dividend, \divisor - orrhs \result, \result, \curbit - cmp \dividend, \divisor, lsr #1 - subhs \dividend, \dividend, \divisor, lsr #1 - orrhs \result, \result, \curbit, lsr #1 - cmp \dividend, \divisor, lsr #2 - subhs \dividend, \dividend, \divisor, lsr #2 - orrhs \result, \result, \curbit, lsr #2 - cmp \dividend, \divisor, lsr #3 - subhs \dividend, \dividend, \divisor, lsr #3 - orrhs \result, \result, \curbit, lsr #3 - cmp \dividend, #0 @ Early termination? - movnes \curbit, \curbit, lsr #4 @ No, any more bits to do? - movne \divisor, \divisor, lsr #4 - bne 1b - -.endm - -.macro ARM_DIV2_ORDER divisor, order - -#if __LINUX_ARM_ARCH__ >= 5 - - clz \order, \divisor - rsb \order, \order, #31 - -#else - - cmp \divisor, #(1 << 16) - movhs \divisor, \divisor, lsr #16 - movhs \order, #16 - movlo \order, #0 - - cmp \divisor, #(1 << 8) - movhs \divisor, \divisor, lsr #8 - addhs \order, \order, #8 - - cmp \divisor, #(1 << 4) - movhs \divisor, \divisor, lsr #4 - addhs \order, \order, #4 - - cmp \divisor, #(1 << 2) - addhi \order, \order, #3 - addls \order, \order, \divisor, lsr #1 - -#endif - -.endm - - .align 5 -.globl __divsi3 -__divsi3: -ENTRY(__aeabi_idiv) - cmp r1, #0 - eor ip, r0, r1 @ save the sign of the result. - beq Ldiv0 - rsbmi r1, r1, #0 @ loops below use unsigned. - subs r2, r1, #1 @ division by 1 or -1 ? - beq 10f - movs r3, r0 - rsbmi r3, r0, #0 @ positive dividend value - cmp r3, r1 - bls 11f - tst r1, r2 @ divisor is power of 2 ? - beq 12f - - ARM_DIV_BODY r3, r1, r0, r2 - - cmp ip, #0 - rsbmi r0, r0, #0 - mov pc, lr - -10: teq ip, r0 @ same sign ? - rsbmi r0, r0, #0 - mov pc, lr - -11: movlo r0, #0 - moveq r0, ip, asr #31 - orreq r0, r0, #1 - mov pc, lr - -12: ARM_DIV2_ORDER r1, r2 - - cmp ip, #0 - mov r0, r3, lsr r2 - rsbmi r0, r0, #0 - mov pc, lr - -Ldiv0: - - str lr, [sp, #-4]! - bl __div0 - mov r0, #0 @ About as wrong as it could be. - ldr pc, [sp], #4 -ENDPROC(__aeabi_idiv) diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S new file mode 100644 index 0000000..5871dbe --- /dev/null +++ b/arch/arm/lib/lib1funcs.S @@ -0,0 +1,351 @@ +/* + * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines + * + * Author: Nicolas Pitre nico@fluxnic.net + * - contributed to gcc-3.4 on Sep 30, 2003 + * - adapted for the Linux kernel on Oct 2, 2003 + */ + +/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc. + + * SPDX-License-Identifier: GPL-2.0+ + */ + + +#include <linux/linkage.h> +#include <asm/assembler.h> + +/* + * U-Boot compatibility bit, define empty UNWIND() macro as, since we + * do not support stack unwinding and define CONFIG_AEABI to make all + * of the functions available without diverging from Linux code. + */ +#ifdef __UBOOT__ +#define UNWIND(x...) +#define CONFIG_AEABI +#endif + +.macro ARM_DIV_BODY dividend, divisor, result, curbit + +#if __LINUX_ARM_ARCH__ >= 5 + + clz \curbit, \divisor + clz \result, \dividend + sub \result, \curbit, \result + mov \curbit, #1 + mov \divisor, \divisor, lsl \result + mov \curbit, \curbit, lsl \result + mov \result, #0 + +#else + + @ Initially shift the divisor left 3 bits if possible, + @ set curbit accordingly. This allows for curbit to be located + @ at the left end of each 4 bit nibbles in the division loop + @ to save one loop in most cases. + tst \divisor, #0xe0000000 + moveq \divisor, \divisor, lsl #3 + moveq \curbit, #8 + movne \curbit, #1 + + @ Unless the divisor is very big, shift it up in multiples of + @ four bits, since this is the amount of unwinding in the main + @ division loop. Continue shifting until the divisor is + @ larger than the dividend. +1: cmp \divisor, #0x10000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #4 + movlo \curbit, \curbit, lsl #4 + blo 1b + + @ For very big divisors, we must shift it a bit at a time, or + @ we will be in danger of overflowing. +1: cmp \divisor, #0x80000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #1 + movlo \curbit, \curbit, lsl #1 + blo 1b + + mov \result, #0 + +#endif + + @ Division loop +1: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + orrhs \result, \result, \curbit + cmp \dividend, \divisor, lsr #1 + subhs \dividend, \dividend, \divisor, lsr #1 + orrhs \result, \result, \curbit, lsr #1 + cmp \dividend, \divisor, lsr #2 + subhs \dividend, \dividend, \divisor, lsr #2 + orrhs \result, \result, \curbit, lsr #2 + cmp \dividend, \divisor, lsr #3 + subhs \dividend, \dividend, \divisor, lsr #3 + orrhs \result, \result, \curbit, lsr #3 + cmp \dividend, #0 @ Early termination? + movnes \curbit, \curbit, lsr #4 @ No, any more bits to do? + movne \divisor, \divisor, lsr #4 + bne 1b + +.endm + + +.macro ARM_DIV2_ORDER divisor, order + +#if __LINUX_ARM_ARCH__ >= 5 + + clz \order, \divisor + rsb \order, \order, #31 + +#else + + cmp \divisor, #(1 << 16) + movhs \divisor, \divisor, lsr #16 + movhs \order, #16 + movlo \order, #0 + + cmp \divisor, #(1 << 8) + movhs \divisor, \divisor, lsr #8 + addhs \order, \order, #8 + + cmp \divisor, #(1 << 4) + movhs \divisor, \divisor, lsr #4 + addhs \order, \order, #4 + + cmp \divisor, #(1 << 2) + addhi \order, \order, #3 + addls \order, \order, \divisor, lsr #1 + +#endif + +.endm + + +.macro ARM_MOD_BODY dividend, divisor, order, spare + +#if __LINUX_ARM_ARCH__ >= 5 + + clz \order, \divisor + clz \spare, \dividend + sub \order, \order, \spare + mov \divisor, \divisor, lsl \order + +#else + + mov \order, #0 + + @ Unless the divisor is very big, shift it up in multiples of + @ four bits, since this is the amount of unwinding in the main + @ division loop. Continue shifting until the divisor is + @ larger than the dividend. +1: cmp \divisor, #0x10000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #4 + addlo \order, \order, #4 + blo 1b + + @ For very big divisors, we must shift it a bit at a time, or + @ we will be in danger of overflowing. +1: cmp \divisor, #0x80000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #1 + addlo \order, \order, #1 + blo 1b + +#endif + + @ Perform all needed subtractions to keep only the reminder. + @ Do comparisons in batch of 4 first. + subs \order, \order, #3 @ yes, 3 is intended here + blt 2f + +1: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + cmp \dividend, \divisor, lsr #1 + subhs \dividend, \dividend, \divisor, lsr #1 + cmp \dividend, \divisor, lsr #2 + subhs \dividend, \dividend, \divisor, lsr #2 + cmp \dividend, \divisor, lsr #3 + subhs \dividend, \dividend, \divisor, lsr #3 + cmp \dividend, #1 + mov \divisor, \divisor, lsr #4 + subges \order, \order, #4 + bge 1b + + tst \order, #3 + teqne \dividend, #0 + beq 5f + + @ Either 1, 2 or 3 comparison/subtractions are left. +2: cmn \order, #2 + blt 4f + beq 3f + cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + mov \divisor, \divisor, lsr #1 +3: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + mov \divisor, \divisor, lsr #1 +4: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor +5: +.endm + + +ENTRY(__udivsi3) +ENTRY(__aeabi_uidiv) +UNWIND(.fnstart) + + subs r2, r1, #1 + reteq lr + bcc Ldiv0 + cmp r0, r1 + bls 11f + tst r1, r2 + beq 12f + + ARM_DIV_BODY r0, r1, r2, r3 + + mov r0, r2 + ret lr + +11: moveq r0, #1 + movne r0, #0 + ret lr + +12: ARM_DIV2_ORDER r1, r2 + + mov r0, r0, lsr r2 + ret lr + +UNWIND(.fnend) +ENDPROC(__udivsi3) +ENDPROC(__aeabi_uidiv) + +ENTRY(__umodsi3) +UNWIND(.fnstart) + + subs r2, r1, #1 @ compare divisor with 1 + bcc Ldiv0 + cmpne r0, r1 @ compare dividend with divisor + moveq r0, #0 + tsthi r1, r2 @ see if divisor is power of 2 + andeq r0, r0, r2 + retls lr + + ARM_MOD_BODY r0, r1, r2, r3 + + ret lr + +UNWIND(.fnend) +ENDPROC(__umodsi3) + +ENTRY(__divsi3) +ENTRY(__aeabi_idiv) +UNWIND(.fnstart) + + cmp r1, #0 + eor ip, r0, r1 @ save the sign of the result. + beq Ldiv0 + rsbmi r1, r1, #0 @ loops below use unsigned. + subs r2, r1, #1 @ division by 1 or -1 ? + beq 10f + movs r3, r0 + rsbmi r3, r0, #0 @ positive dividend value + cmp r3, r1 + bls 11f + tst r1, r2 @ divisor is power of 2 ? + beq 12f + + ARM_DIV_BODY r3, r1, r0, r2 + + cmp ip, #0 + rsbmi r0, r0, #0 + ret lr + +10: teq ip, r0 @ same sign ? + rsbmi r0, r0, #0 + ret lr + +11: movlo r0, #0 + moveq r0, ip, asr #31 + orreq r0, r0, #1 + ret lr + +12: ARM_DIV2_ORDER r1, r2 + + cmp ip, #0 + mov r0, r3, lsr r2 + rsbmi r0, r0, #0 + ret lr + +UNWIND(.fnend) +ENDPROC(__divsi3) +ENDPROC(__aeabi_idiv) + +ENTRY(__modsi3) +UNWIND(.fnstart) + + cmp r1, #0 + beq Ldiv0 + rsbmi r1, r1, #0 @ loops below use unsigned. + movs ip, r0 @ preserve sign of dividend + rsbmi r0, r0, #0 @ if negative make positive + subs r2, r1, #1 @ compare divisor with 1 + cmpne r0, r1 @ compare dividend with divisor + moveq r0, #0 + tsthi r1, r2 @ see if divisor is power of 2 + andeq r0, r0, r2 + bls 10f + + ARM_MOD_BODY r0, r1, r2, r3 + +10: cmp ip, #0 + rsbmi r0, r0, #0 + ret lr + +UNWIND(.fnend) +ENDPROC(__modsi3) + +#ifdef CONFIG_AEABI + +ENTRY(__aeabi_uidivmod) +UNWIND(.fnstart) +UNWIND(.save {r0, r1, ip, lr} ) + + stmfd sp!, {r0, r1, ip, lr} + bl __aeabi_uidiv + ldmfd sp!, {r1, r2, ip, lr} + mul r3, r0, r2 + sub r1, r1, r3 + ret lr + +UNWIND(.fnend) +ENDPROC(__aeabi_uidivmod) + +ENTRY(__aeabi_idivmod) +UNWIND(.fnstart) +UNWIND(.save {r0, r1, ip, lr} ) + stmfd sp!, {r0, r1, ip, lr} + bl __aeabi_idiv + ldmfd sp!, {r1, r2, ip, lr} + mul r3, r0, r2 + sub r1, r1, r3 + ret lr + +UNWIND(.fnend) +ENDPROC(__aeabi_idivmod) + +#endif + +Ldiv0: +UNWIND(.fnstart) +UNWIND(.pad #4) +UNWIND(.save {lr}) + str lr, [sp, #-8]! + bl __div0 + mov r0, #0 @ About as wrong as it could be. + ldr pc, [sp], #8 +UNWIND(.fnend) +ENDPROC(Ldiv0) diff --git a/arch/arm/lib/modsi3.S b/arch/arm/lib/modsi3.S deleted file mode 100644 index c5e1c22..0000000 --- a/arch/arm/lib/modsi3.S +++ /dev/null @@ -1,99 +0,0 @@ -#include <linux/linkage.h> - -.macro ARM_MOD_BODY dividend, divisor, order, spare - -#if __LINUX_ARM_ARCH__ >= 5 - - clz \order, \divisor - clz \spare, \dividend - sub \order, \order, \spare - mov \divisor, \divisor, lsl \order - -#else - - mov \order, #0 - - @ Unless the divisor is very big, shift it up in multiples of - @ four bits, since this is the amount of unwinding in the main - @ division loop. Continue shifting until the divisor is - @ larger than the dividend. -1: cmp \divisor, #0x10000000 - cmplo \divisor, \dividend - movlo \divisor, \divisor, lsl #4 - addlo \order, \order, #4 - blo 1b - - @ For very big divisors, we must shift it a bit at a time, or - @ we will be in danger of overflowing. -1: cmp \divisor, #0x80000000 - cmplo \divisor, \dividend - movlo \divisor, \divisor, lsl #1 - addlo \order, \order, #1 - blo 1b - -#endif - - @ Perform all needed substractions to keep only the reminder. - @ Do comparisons in batch of 4 first. - subs \order, \order, #3 @ yes, 3 is intended here - blt 2f - -1: cmp \dividend, \divisor - subhs \dividend, \dividend, \divisor - cmp \dividend, \divisor, lsr #1 - subhs \dividend, \dividend, \divisor, lsr #1 - cmp \dividend, \divisor, lsr #2 - subhs \dividend, \dividend, \divisor, lsr #2 - cmp \dividend, \divisor, lsr #3 - subhs \dividend, \dividend, \divisor, lsr #3 - cmp \dividend, #1 - mov \divisor, \divisor, lsr #4 - subges \order, \order, #4 - bge 1b - - tst \order, #3 - teqne \dividend, #0 - beq 5f - - @ Either 1, 2 or 3 comparison/substractions are left. -2: cmn \order, #2 - blt 4f - beq 3f - cmp \dividend, \divisor - subhs \dividend, \dividend, \divisor - mov \divisor, \divisor, lsr #1 -3: cmp \dividend, \divisor - subhs \dividend, \dividend, \divisor - mov \divisor, \divisor, lsr #1 -4: cmp \dividend, \divisor - subhs \dividend, \dividend, \divisor -5: -.endm - - .align 5 -ENTRY(__modsi3) - cmp r1, #0 - beq Ldiv0 - rsbmi r1, r1, #0 @ loops below use unsigned. - movs ip, r0 @ preserve sign of dividend - rsbmi r0, r0, #0 @ if negative make positive - subs r2, r1, #1 @ compare divisor with 1 - cmpne r0, r1 @ compare dividend with divisor - moveq r0, #0 - tsthi r1, r2 @ see if divisor is power of 2 - andeq r0, r0, r2 - bls 10f - - ARM_MOD_BODY r0, r1, r2, r3 - -10: cmp ip, #0 - rsbmi r0, r0, #0 - mov pc, lr -ENDPROC(__modsi3) - -Ldiv0: - - str lr, [sp, #-4]! - bl __div0 - mov r0, #0 @ About as wrong as it could be. - ldr pc, [sp], #4 diff --git a/arch/arm/lib/udivsi3.S b/arch/arm/lib/udivsi3.S deleted file mode 100644 index 3b653be..0000000 --- a/arch/arm/lib/udivsi3.S +++ /dev/null @@ -1,95 +0,0 @@ -#include <linux/linkage.h> - -/* # 1 "libgcc1.S" */ -@ libgcc1 routines for ARM cpu. -@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk) -dividend .req r0 -divisor .req r1 -result .req r2 -curbit .req r3 -/* ip .req r12 */ -/* sp .req r13 */ -/* lr .req r14 */ -/* pc .req r15 */ - .text - .globl __udivsi3 - .type __udivsi3 ,function - .globl __aeabi_uidiv - .type __aeabi_uidiv ,function - .align 0 - __udivsi3: - __aeabi_uidiv: - cmp divisor, #0 - beq Ldiv0 - mov curbit, #1 - mov result, #0 - cmp dividend, divisor - bcc Lgot_result -Loop1: - @ Unless the divisor is very big, shift it up in multiples of - @ four bits, since this is the amount of unwinding in the main - @ division loop. Continue shifting until the divisor is - @ larger than the dividend. - cmp divisor, #0x10000000 - cmpcc divisor, dividend - movcc divisor, divisor, lsl #4 - movcc curbit, curbit, lsl #4 - bcc Loop1 -Lbignum: - @ For very big divisors, we must shift it a bit at a time, or - @ we will be in danger of overflowing. - cmp divisor, #0x80000000 - cmpcc divisor, dividend - movcc divisor, divisor, lsl #1 - movcc curbit, curbit, lsl #1 - bcc Lbignum -Loop3: - @ Test for possible subtractions, and note which bits - @ are done in the result. On the final pass, this may subtract - @ too much from the dividend, but the result will be ok, since the - @ "bit" will have been shifted out at the bottom. - cmp dividend, divisor - subcs dividend, dividend, divisor - orrcs result, result, curbit - cmp dividend, divisor, lsr #1 - subcs dividend, dividend, divisor, lsr #1 - orrcs result, result, curbit, lsr #1 - cmp dividend, divisor, lsr #2 - subcs dividend, dividend, divisor, lsr #2 - orrcs result, result, curbit, lsr #2 - cmp dividend, divisor, lsr #3 - subcs dividend, dividend, divisor, lsr #3 - orrcs result, result, curbit, lsr #3 - cmp dividend, #0 @ Early termination? - movnes curbit, curbit, lsr #4 @ No, any more bits to do? - movne divisor, divisor, lsr #4 - bne Loop3 -Lgot_result: - mov r0, result - mov pc, lr -Ldiv0: - str lr, [sp, #-4]! - bl __div0 (PLT) - mov r0, #0 @ about as wrong as it could be - ldmia sp!, {pc} - .size __udivsi3 , . - __udivsi3 - -ENTRY(__aeabi_uidivmod) - - stmfd sp!, {r0, r1, ip, lr} - bl __aeabi_uidiv - ldmfd sp!, {r1, r2, ip, lr} - mul r3, r0, r2 - sub r1, r1, r3 - mov pc, lr -ENDPROC(__aeabi_uidivmod) - -ENTRY(__aeabi_idivmod) - - stmfd sp!, {r0, r1, ip, lr} - bl __aeabi_idiv - ldmfd sp!, {r1, r2, ip, lr} - mul r3, r0, r2 - sub r1, r1, r3 - mov pc, lr -ENDPROC(__aeabi_idivmod) diff --git a/arch/arm/lib/umodsi3.S b/arch/arm/lib/umodsi3.S deleted file mode 100644 index b166737..0000000 --- a/arch/arm/lib/umodsi3.S +++ /dev/null @@ -1,90 +0,0 @@ -#include <linux/linkage.h> - -/* # 1 "libgcc1.S" */ -@ libgcc1 routines for ARM cpu. -@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk) -/* # 145 "libgcc1.S" */ -dividend .req r0 -divisor .req r1 -overdone .req r2 -curbit .req r3 -/* ip .req r12 */ -/* sp .req r13 */ -/* lr .req r14 */ -/* pc .req r15 */ - .text - .type __umodsi3 ,function - .align 0 - ENTRY(__umodsi3) - cmp divisor, #0 - beq Ldiv0 - mov curbit, #1 - cmp dividend, divisor - movcc pc, lr -Loop1: - @ Unless the divisor is very big, shift it up in multiples of - @ four bits, since this is the amount of unwinding in the main - @ division loop. Continue shifting until the divisor is - @ larger than the dividend. - cmp divisor, #0x10000000 - cmpcc divisor, dividend - movcc divisor, divisor, lsl #4 - movcc curbit, curbit, lsl #4 - bcc Loop1 -Lbignum: - @ For very big divisors, we must shift it a bit at a time, or - @ we will be in danger of overflowing. - cmp divisor, #0x80000000 - cmpcc divisor, dividend - movcc divisor, divisor, lsl #1 - movcc curbit, curbit, lsl #1 - bcc Lbignum -Loop3: - @ Test for possible subtractions. On the final pass, this may - @ subtract too much from the dividend, so keep track of which - @ subtractions are done, we can fix them up afterwards... - mov overdone, #0 - cmp dividend, divisor - subcs dividend, dividend, divisor - cmp dividend, divisor, lsr #1 - subcs dividend, dividend, divisor, lsr #1 - orrcs overdone, overdone, curbit, ror #1 - cmp dividend, divisor, lsr #2 - subcs dividend, dividend, divisor, lsr #2 - orrcs overdone, overdone, curbit, ror #2 - cmp dividend, divisor, lsr #3 - subcs dividend, dividend, divisor, lsr #3 - orrcs overdone, overdone, curbit, ror #3 - mov ip, curbit - cmp dividend, #0 @ Early termination? - movnes curbit, curbit, lsr #4 @ No, any more bits to do? - movne divisor, divisor, lsr #4 - bne Loop3 - @ Any subtractions that we should not have done will be recorded in - @ the top three bits of "overdone". Exactly which were not needed - @ are governed by the position of the bit, stored in ip. - @ If we terminated early, because dividend became zero, - @ then none of the below will match, since the bit in ip will not be - @ in the bottom nibble. - ands overdone, overdone, #0xe0000000 - moveq pc, lr @ No fixups needed - tst overdone, ip, ror #3 - addne dividend, dividend, divisor, lsr #3 - tst overdone, ip, ror #2 - addne dividend, dividend, divisor, lsr #2 - tst overdone, ip, ror #1 - addne dividend, dividend, divisor, lsr #1 - mov pc, lr -Ldiv0: - str lr, [sp, #-4]! - bl __div0 (PLT) - mov r0, #0 @ about as wrong as it could be - ldmia sp!, {pc} - .size __umodsi3 , . - __umodsi3 -/* # 320 "libgcc1.S" */ -/* # 421 "libgcc1.S" */ -/* # 433 "libgcc1.S" */ -/* # 456 "libgcc1.S" */ -/* # 500 "libgcc1.S" */ -/* # 580 "libgcc1.S" */ -ENDPROC(__umodsi3)

This assembler source won't build in Thumb2 mode, so fix it adding the necessary Thumb2 conditional macros from unified.h .
This patch also defines CONFIG_THUMB2_KERNEL and CONFIG_ARM_ASM_UNIFIED which is necessary for correct build of these files both in ARM and Thumb mode, just like Linux does.
Signed-off-by: Marek Vasut marex@denx.de Cc: Albert Aribaud albert.u.boot@aribaud.net Cc: Masahiro Yamada yamada.masahiro@socionext.com Cc: Simon Glass sjg@chromium.org Cc: Tom Rini trini@konsulko.com --- arch/arm/lib/uldivmod.S | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/arch/arm/lib/uldivmod.S b/arch/arm/lib/uldivmod.S index 426c2f2..2efcd73 100644 --- a/arch/arm/lib/uldivmod.S +++ b/arch/arm/lib/uldivmod.S @@ -9,10 +9,6 @@ #include <linux/linkage.h> #include <asm/assembler.h>
-/* We don't use Thumb instructions for now */ -#define ARM(x...) x -#define THUMB(x...) - /* * A, Q = r0 + (r1 << 32) * B, R = r2 + (r3 << 32) @@ -226,7 +222,9 @@ THUMB( orrpl A_0, A_0, TMP ) @ Shift A to the right by the appropriate amount. rsb D_1, D_0, #32 mov Q_0, A_0, lsr D_0 - orr Q_0, A_1, lsl D_1 + ARM( orr Q_0, Q_0, A_1, lsl D_1 ) + THUMB( lsl A_1, D_1 ) + THUMB( orr Q_0, A_1 ) mov Q_1, A_1, lsr D_0 @ Move C to R mov R_0, C_0

Import __do_div64 from Linux 4.4.6 , commit 0d1912303e54ed1b2a371be0bba51c384dd57326 on arm32. This function is for some toolchains, which generate _udivmoddi4() for 64 bit division.
Since we do not support stack unwinding, instead of importing the whole asm/unwind.h and all the baggage, this patch defines empty UNWIND() macro.
This patch also defines CONFIG_THUMB2_KERNEL and CONFIG_ARM_ASM_UNIFIED which is necessary for correct build of these files both in ARM and Thumb mode, just like Linux does.
Signed-off-by: Marek Vasut marex@denx.de Cc: Albert Aribaud albert.u.boot@aribaud.net Cc: Masahiro Yamada yamada.masahiro@socionext.com Cc: Simon Glass sjg@chromium.org Cc: Tom Rini trini@konsulko.com --- arch/arm/lib/Makefile | 2 +- arch/arm/lib/div64.S | 214 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 215 insertions(+), 1 deletion(-) create mode 100644 arch/arm/lib/div64.S
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index 0b0dbcb..8945915 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -6,7 +6,7 @@ #
lib-$(CONFIG_USE_PRIVATE_LIBGCC) += ashldi3.o ashrdi3.o lshrdi3.o \ - lib1funcs.o uldivmod.o div0.o + lib1funcs.o uldivmod.o div0.o div64.o
ifdef CONFIG_CPU_V7M obj-y += vectors_m.o crt0.o diff --git a/arch/arm/lib/div64.S b/arch/arm/lib/div64.S new file mode 100644 index 0000000..f238bc6 --- /dev/null +++ b/arch/arm/lib/div64.S @@ -0,0 +1,214 @@ +/* + * linux/arch/arm/lib/div64.S + * + * Optimized computation of 64-bit dividend / 32-bit divisor + * + * Author: Nicolas Pitre + * Created: Oct 5, 2003 + * Copyright: Monta Vista Software, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> +#ifdef __UBOOT__ +#define UNWIND(x...) +#endif + +#ifdef __ARMEB__ +#define xh r0 +#define xl r1 +#define yh r2 +#define yl r3 +#else +#define xl r0 +#define xh r1 +#define yl r2 +#define yh r3 +#endif + +/* + * __do_div64: perform a division with 64-bit dividend and 32-bit divisor. + * + * Note: Calling convention is totally non standard for optimal code. + * This is meant to be used by do_div() from include/asm/div64.h only. + * + * Input parameters: + * xh-xl = dividend (clobbered) + * r4 = divisor (preserved) + * + * Output values: + * yh-yl = result + * xh = remainder + * + * Clobbered regs: xl, ip + */ + +ENTRY(__do_div64) +UNWIND(.fnstart) + + @ Test for easy paths first. + subs ip, r4, #1 + bls 9f @ divisor is 0 or 1 + tst ip, r4 + beq 8f @ divisor is power of 2 + + @ See if we need to handle upper 32-bit result. + cmp xh, r4 + mov yh, #0 + blo 3f + + @ Align divisor with upper part of dividend. + @ The aligned divisor is stored in yl preserving the original. + @ The bit position is stored in ip. + +#if __LINUX_ARM_ARCH__ >= 5 + + clz yl, r4 + clz ip, xh + sub yl, yl, ip + mov ip, #1 + mov ip, ip, lsl yl + mov yl, r4, lsl yl + +#else + + mov yl, r4 + mov ip, #1 +1: cmp yl, #0x80000000 + cmpcc yl, xh + movcc yl, yl, lsl #1 + movcc ip, ip, lsl #1 + bcc 1b + +#endif + + @ The division loop for needed upper bit positions. + @ Break out early if dividend reaches 0. +2: cmp xh, yl + orrcs yh, yh, ip + subcss xh, xh, yl + movnes ip, ip, lsr #1 + mov yl, yl, lsr #1 + bne 2b + + @ See if we need to handle lower 32-bit result. +3: cmp xh, #0 + mov yl, #0 + cmpeq xl, r4 + movlo xh, xl + retlo lr + + @ The division loop for lower bit positions. + @ Here we shift remainer bits leftwards rather than moving the + @ divisor for comparisons, considering the carry-out bit as well. + mov ip, #0x80000000 +4: movs xl, xl, lsl #1 + adcs xh, xh, xh + beq 6f + cmpcc xh, r4 +5: orrcs yl, yl, ip + subcs xh, xh, r4 + movs ip, ip, lsr #1 + bne 4b + ret lr + + @ The top part of remainder became zero. If carry is set + @ (the 33th bit) this is a false positive so resume the loop. + @ Otherwise, if lower part is also null then we are done. +6: bcs 5b + cmp xl, #0 + reteq lr + + @ We still have remainer bits in the low part. Bring them up. + +#if __LINUX_ARM_ARCH__ >= 5 + + clz xh, xl @ we know xh is zero here so... + add xh, xh, #1 + mov xl, xl, lsl xh + mov ip, ip, lsr xh + +#else + +7: movs xl, xl, lsl #1 + mov ip, ip, lsr #1 + bcc 7b + +#endif + + @ Current remainder is now 1. It is worthless to compare with + @ divisor at this point since divisor can not be smaller than 3 here. + @ If possible, branch for another shift in the division loop. + @ If no bit position left then we are done. + movs ip, ip, lsr #1 + mov xh, #1 + bne 4b + ret lr + +8: @ Division by a power of 2: determine what that divisor order is + @ then simply shift values around + +#if __LINUX_ARM_ARCH__ >= 5 + + clz ip, r4 + rsb ip, ip, #31 + +#else + + mov yl, r4 + cmp r4, #(1 << 16) + mov ip, #0 + movhs yl, yl, lsr #16 + movhs ip, #16 + + cmp yl, #(1 << 8) + movhs yl, yl, lsr #8 + addhs ip, ip, #8 + + cmp yl, #(1 << 4) + movhs yl, yl, lsr #4 + addhs ip, ip, #4 + + cmp yl, #(1 << 2) + addhi ip, ip, #3 + addls ip, ip, yl, lsr #1 + +#endif + + mov yh, xh, lsr ip + mov yl, xl, lsr ip + rsb ip, ip, #32 + ARM( orr yl, yl, xh, lsl ip ) + THUMB( lsl xh, xh, ip ) + THUMB( orr yl, yl, xh ) + mov xh, xl, lsl ip + mov xh, xh, lsr ip + ret lr + + @ eq -> division by 1: obvious enough... +9: moveq yl, xl + moveq yh, xh + moveq xh, #0 + reteq lr +UNWIND(.fnend) + +UNWIND(.fnstart) +UNWIND(.pad #4) +UNWIND(.save {lr}) +Ldiv0_64: + @ Division by 0: + str lr, [sp, #-8]! + bl __div0 + + @ as wrong as it could be... + mov yl, #0 + mov yh, #0 + mov xh, #0 + ldr pc, [sp], #8 + +UNWIND(.fnend) +ENDPROC(__do_div64)

Fix the following warning when building for thumb2 target by tweaking the instruction syntax:
Warning: conditional infixes are deprecated in unified syntax
Signed-off-by: Marek Vasut marex@denx.de Cc: Albert Aribaud albert.u.boot@aribaud.net Cc: Masahiro Yamada yamada.masahiro@socionext.com Cc: Simon Glass sjg@chromium.org Cc: Tom Rini trini@konsulko.com --- arch/arm/lib/div64.S | 4 ++-- arch/arm/lib/lib1funcs.S | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/arch/arm/lib/div64.S b/arch/arm/lib/div64.S index f238bc6..7956051 100644 --- a/arch/arm/lib/div64.S +++ b/arch/arm/lib/div64.S @@ -90,8 +90,8 @@ UNWIND(.fnstart) @ Break out early if dividend reaches 0. 2: cmp xh, yl orrcs yh, yh, ip - subcss xh, xh, yl - movnes ip, ip, lsr #1 + subscs xh, xh, yl + movsne ip, ip, lsr #1 mov yl, yl, lsr #1 bne 2b
diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S index 5871dbe..c343ea9 100644 --- a/arch/arm/lib/lib1funcs.S +++ b/arch/arm/lib/lib1funcs.S @@ -84,7 +84,7 @@ subhs \dividend, \dividend, \divisor, lsr #3 orrhs \result, \result, \curbit, lsr #3 cmp \dividend, #0 @ Early termination? - movnes \curbit, \curbit, lsr #4 @ No, any more bits to do? + movsne \curbit, \curbit, lsr #4 @ No, any more bits to do? movne \divisor, \divisor, lsr #4 bne 1b
@@ -170,7 +170,7 @@ subhs \dividend, \dividend, \divisor, lsr #3 cmp \dividend, #1 mov \divisor, \divisor, lsr #4 - subges \order, \order, #4 + subsge \order, \order, #4 bge 1b
tst \order, #3

Import muldi3.S from Linux 4.4.6 , commit 0d1912303e54ed1b2a371be0bba51c384dd57326 on arm32. This file implements __aeabi_lmul and it's alias __muldi3, which is needed when doing Thumb1 builds.
This patch also defines CONFIG_THUMB2_KERNEL and CONFIG_ARM_ASM_UNIFIED which is necessary for correct build of these files both in ARM and Thumb mode, just like Linux does.
Signed-off-by: Marek Vasut marex@denx.de Cc: Albert Aribaud albert.u.boot@aribaud.net Cc: Masahiro Yamada yamada.masahiro@socionext.com Cc: Simon Glass sjg@chromium.org Cc: Tom Rini trini@konsulko.com --- arch/arm/lib/Makefile | 3 ++- arch/arm/lib/muldi3.S | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 1 deletion(-) create mode 100644 arch/arm/lib/muldi3.S
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index 8945915..86489fd 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -6,7 +6,8 @@ #
lib-$(CONFIG_USE_PRIVATE_LIBGCC) += ashldi3.o ashrdi3.o lshrdi3.o \ - lib1funcs.o uldivmod.o div0.o div64.o + lib1funcs.o uldivmod.o div0.o \ + div64.o muldi3.o
ifdef CONFIG_CPU_V7M obj-y += vectors_m.o crt0.o diff --git a/arch/arm/lib/muldi3.S b/arch/arm/lib/muldi3.S new file mode 100644 index 0000000..2043059 --- /dev/null +++ b/arch/arm/lib/muldi3.S @@ -0,0 +1,48 @@ +/* + * linux/arch/arm/lib/muldi3.S + * + * Author: Nicolas Pitre + * Created: Oct 19, 2005 + * Copyright: Monta Vista Software, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> + +#ifdef __ARMEB__ +#define xh r0 +#define xl r1 +#define yh r2 +#define yl r3 +#else +#define xl r0 +#define xh r1 +#define yl r2 +#define yh r3 +#endif + +ENTRY(__muldi3) +ENTRY(__aeabi_lmul) + + mul xh, yl, xh + mla xh, xl, yh, xh + mov ip, xl, lsr #16 + mov yh, yl, lsr #16 + bic xl, xl, ip, lsl #16 + bic yl, yl, yh, lsl #16 + mla xh, yh, ip, xh + mul yh, xl, yh + mul xl, yl, xl + mul ip, yl, ip + adds xl, xl, yh, lsl #16 + adc xh, xh, yh, lsr #16 + adds xl, xl, ip, lsl #16 + adc xh, xh, ip, lsr #16 + ret lr + +ENDPROC(__muldi3) +ENDPROC(__aeabi_lmul)

Import functions into lib1funcs.S which are required for Thumb1 build. These functions come from gcc 5.3.1 release.
Signed-off-by: Marek Vasut marex@denx.de Cc: Albert Aribaud albert.u.boot@aribaud.net Cc: Masahiro Yamada yamada.masahiro@socionext.com Cc: Simon Glass sjg@chromium.org Cc: Tom Rini trini@konsulko.com --- arch/arm/lib/lib1funcs.S | 53 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+)
diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S index c343ea9..408839b 100644 --- a/arch/arm/lib/lib1funcs.S +++ b/arch/arm/lib/lib1funcs.S @@ -349,3 +349,56 @@ UNWIND(.save {lr}) ldr pc, [sp], #8 UNWIND(.fnend) ENDPROC(Ldiv0) + +/* Thumb-1 specialities */ +#if defined(CONFIG_SYS_THUMB_BUILD) && !defined(CONFIG_HAS_THUMB2) +ENTRY(__gnu_thumb1_case_sqi) + push {r1} + mov r1, lr + lsrs r1, r1, #1 + lsls r1, r1, #1 + ldrsb r1, [r1, r0] + lsls r1, r1, #1 + add lr, lr, r1 + pop {r1} + bx lr +ENDPROC(__gnu_thumb1_case_sqi) + +ENTRY(__gnu_thumb1_case_uqi) + push {r1} + mov r1, lr + lsrs r1, r1, #1 + lsls r1, r1, #1 + ldrb r1, [r1, r0] + lsls r1, r1, #1 + add lr, lr, r1 + pop {r1} + bx lr +ENDPROC(__gnu_thumb1_case_uqi) + +ENTRY(__gnu_thumb1_case_shi) + push {r0, r1} + mov r1, lr + lsrs r1, r1, #1 + lsls r0, r0, #1 + lsls r1, r1, #1 + ldrsh r1, [r1, r0] + lsls r1, r1, #1 + add lr, lr, r1 + pop {r0, r1} + bx lr +ENDPROC(__gnu_thumb1_case_shi) + +ENTRY(__gnu_thumb1_case_uhi) + push {r0, r1} + mov r1, lr + lsrs r1, r1, #1 + lsls r0, r0, #1 + lsls r1, r1, #1 + ldrh r1, [r1, r0] + lsls r1, r1, #1 + add lr, lr, r1 + pop {r0, r1} + bx lr +ENDPROC(__gnu_thumb1_case_uhi) +#endif

This patch decouples U-Boot binary from the toolchain on systems where private libgcc is available. Instead of pulling in functions provided by the libgcc from the toolchain, U-Boot will use it's own set of libgcc functions. These functions are usually imported from Linux kernel, which also uses it's own libgcc functions instead of the ones provided by the toolchain.
This patch solves a rather common problem. The toolchain can usually generate code for many variants of target architecture and often even different endianness. The libgcc on the other hand is usually compiled for one particular configuration and the functions provided by it may or may not be suited for use in U-Boot. This can manifest in two ways, either the U-Boot fails to compile altogether and linker will complain or, in the much worse case, the resulting U-Boot will build, but will misbehave in very subtle and hard to debug ways.
Signed-off-by: Marek Vasut marex@denx.de Cc: Albert Aribaud albert.u.boot@aribaud.net Cc: Masahiro Yamada yamada.masahiro@socionext.com Cc: Simon Glass sjg@chromium.org Cc: Tom Rini trini@konsulko.com --- lib/Kconfig | 1 + 1 file changed, 1 insertion(+)
diff --git a/lib/Kconfig b/lib/Kconfig index 7a45336..2b911bc 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -14,6 +14,7 @@ config HAVE_PRIVATE_LIBGCC config USE_PRIVATE_LIBGCC bool "Use private libgcc" depends on HAVE_PRIVATE_LIBGCC + default y if HAVE_PRIVATE_LIBGCC help This option allows you to use the built-in libgcc implementation of U-Boot instead of the one privided by the compiler.
participants (1)
-
Marek Vasut