[U-Boot] [PATCH] Fix board init code to use a valid C runtime environment

board_init_f_mem() alters the C runtime environment's stack it ls actually already using. This is not a valid C runtime environment and may conflict with the C compiler's expectations.
Split board_init_f_mem into C functions which do not alter their own stack and therefore function in a valid C runtime environment.
NOTE: this has not been tested with all architectures.
Signed-off-by: Albert ARIBAUD albert.u.boot@aribaud.net --- arch/arc/lib/start.S | 20 +++++++++++++--- arch/arm/lib/crt0.S | 10 ++++++-- arch/arm/lib/crt0_64.S | 12 +++++++--- arch/microblaze/cpu/start.S | 4 ++-- arch/nios2/cpu/start.S | 17 ++++++++++++-- arch/powerpc/cpu/ppc4xx/start.S | 18 ++++++++++---- arch/x86/cpu/start.S | 10 ++++++-- arch/x86/lib/fsp/fsp_common.c | 2 +- common/init/board_init.c | 33 +++++++++++++++----------- include/common.h | 52 +++++++++++++++++++++++++---------------- 10 files changed, 126 insertions(+), 52 deletions(-)
diff --git a/arch/arc/lib/start.S b/arch/arc/lib/start.S index 26a5934..d24d60b 100644 --- a/arch/arc/lib/start.S +++ b/arch/arc/lib/start.S @@ -54,14 +54,28 @@ ENTRY(_start) mov %sp, CONFIG_SYS_INIT_SP_ADDR mov %fp, %sp
- /* Allocate and zero GD, update SP */ + /* Allocate GD, update SP */ mov %r0, %sp - bl board_init_f_mem + bl board_init_f_gd_size + /* Update stack- and frame-pointers */ + sub %sp, %sp, %r0 + mov %fp, %sp + + /* Zero GD */ + mov %r0, %sp + bl board_init_f_gd
+ /* Allocate malloc, update SP */ + mov %r0, %sp + bl board_init_f_malloc_size /* Update stack- and frame-pointers */ - mov %sp, %r0 + sub %sp, %sp, %r0 mov %fp, %sp
+ /* update GD with malloc base */ + mov %r0, %sp + bl board_init_f_malloc + /* Zero the one and only argument of "board_init_f" */ mov_s %r0, 0 j board_init_f diff --git a/arch/arm/lib/crt0.S b/arch/arm/lib/crt0.S index 80548eb..26abab7 100644 --- a/arch/arm/lib/crt0.S +++ b/arch/arm/lib/crt0.S @@ -82,9 +82,15 @@ ENTRY(_main) #else bic sp, sp, #7 /* 8-byte alignment for ABI compliance */ #endif + bl board_init_f_gd_size + sub sp, r0 mov r0, sp - bl board_init_f_mem - mov sp, r0 + mov r9, r0 + bl board_init_f_gd + bl board_init_f_malloc_size + sub sp, r0 + mov r0, sp + bl board_init_f_malloc
mov r0, #0 bl board_init_f diff --git a/arch/arm/lib/crt0_64.S b/arch/arm/lib/crt0_64.S index cef1c71..e716889 100644 --- a/arch/arm/lib/crt0_64.S +++ b/arch/arm/lib/crt0_64.S @@ -75,9 +75,15 @@ ENTRY(_main) ldr x0, =(CONFIG_SYS_INIT_SP_ADDR) #endif bic sp, x0, #0xf /* 16-byte alignment for ABI compliance */ - bl board_init_f_mem - mov sp, x0 - + bl board_init_f_gd_size + sub sp, x0 + mov x0, sp + bl board_init_f_gd + bl board_init_f_malloc_size + sub sp, x0 + mov x0, sp + bl board_init_f_malloc + mov x0, #0 bl board_init_f
diff --git a/arch/microblaze/cpu/start.S b/arch/microblaze/cpu/start.S index 14f46a8..206be3e 100644 --- a/arch/microblaze/cpu/start.S +++ b/arch/microblaze/cpu/start.S @@ -25,7 +25,7 @@ _start:
addi r8, r0, __end mts rslr, r8 - /* TODO: Redo this code to call board_init_f_mem() */ + /* TODO: Redo this code to call board_init_f_*() */ #if defined(CONFIG_SPL_BUILD) addi r1, r0, CONFIG_SPL_STACK_ADDR mts rshr, r1 @@ -142,7 +142,7 @@ _start: ori r12, r12, 0x1a0 mts rmsr, r12
- /* TODO: Redo this code to call board_init_f_mem() */ + /* TODO: Redo this code to call board_init_f_*() */ clear_bss: /* clear BSS segments */ addi r5, r0, __bss_start diff --git a/arch/nios2/cpu/start.S b/arch/nios2/cpu/start.S index 54787c5..4d5f0b0 100644 --- a/arch/nios2/cpu/start.S +++ b/arch/nios2/cpu/start.S @@ -107,9 +107,22 @@ _reloc: mov fp, sp
/* Allocate and zero GD, update SP */ + movhi r2, %hi(board_init_f_gd_size@h) + ori r2, r2, %lo(board_init_gd_size@h) + callr r2 + sub sp, sp, r4 + mov r4, sp + movhi r2, %hi(board_init_f_gd@h) + ori r2, r2, %lo(board_init_f_gd@h) + callr r2 + /* Allocate malloc arena, update SP */ + movhi r2, %hi(board_init_f_malloc_size@h) + ori r2, r2, %lo(board_init_malloc_size@h) + callr r2 + sub sp, sp, r4 mov r4, sp - movhi r2, %hi(board_init_f_mem@h) - ori r2, r2, %lo(board_init_f_mem@h) + movhi r2, %hi(board_init_f_malloc@h) + ori r2, r2, %lo(board_init_f_malloc@h) callr r2
/* Update stack- and frame-pointers */ diff --git a/arch/powerpc/cpu/ppc4xx/start.S b/arch/powerpc/cpu/ppc4xx/start.S index 77d4040..c827c95 100644 --- a/arch/powerpc/cpu/ppc4xx/start.S +++ b/arch/powerpc/cpu/ppc4xx/start.S @@ -761,9 +761,14 @@ _start:
bl cpu_init_f /* run low-level CPU init code (from Flash) */ #ifdef CONFIG_SYS_GENERIC_BOARD + bl board_init_f_gd_size + sub r1, r1, r3 mr r3, r1 - bl board_init_f_mem - mr r1, r3 + bl board_init_f_gd + bl board_init_f_malloc_size + sub r1, r1, r3 + mr r3, r1 + bl board_init_f_malloc li r0,0 stwu r0, -4(r1) stwu r0, -4(r1) @@ -1037,9 +1042,14 @@ _start:
bl cpu_init_f /* run low-level CPU init code (from Flash) */ #ifdef CONFIG_SYS_GENERIC_BOARD + bl board_init_f_gd_size + sub r1, r1, r3 + mr r3, r1 + bl board_init_f_gd + bl board_init_f_malloc_size + sub r1, r1, r3 mr r3, r1 - bl board_init_f_mem - mr r1, r3 + bl board_init_f_malloc stwu r0, -4(r1) stwu r0, -4(r1) #endif diff --git a/arch/x86/cpu/start.S b/arch/x86/cpu/start.S index 5b4ee79..25ac286 100644 --- a/arch/x86/cpu/start.S +++ b/arch/x86/cpu/start.S @@ -122,9 +122,15 @@ car_init_ret: */ #endif /* Set up global data */ + call board_init_f_gd_size + subl %esp, %eax mov %esp, %eax - call board_init_f_mem - mov %eax, %esp + call board_init_f_gd + /* Set up malloc arena */ + call board_init_f_malloc_size + subl %esp, %eax + mov %esp, %eax + call board_init_f_malloc
#ifdef CONFIG_DEBUG_UART call debug_uart_init diff --git a/arch/x86/lib/fsp/fsp_common.c b/arch/x86/lib/fsp/fsp_common.c index c78df94..cd7fd86 100644 --- a/arch/x86/lib/fsp/fsp_common.c +++ b/arch/x86/lib/fsp/fsp_common.c @@ -95,7 +95,7 @@ int x86_fsp_init(void) /* * The second time we enter here, adjust the size of malloc() * pool before relocation. Given gd->malloc_base was adjusted - * after the call to board_init_f_mem() in arch/x86/cpu/start.S, + * after the call to board_init_f_malloc() in arch/x86/cpu/start.S, * we should fix up gd->malloc_limit here. */ gd->malloc_limit += CONFIG_FSP_SYS_MALLOC_F_LEN; diff --git a/common/init/board_init.c b/common/init/board_init.c index e74b63b..1313138 100644 --- a/common/init/board_init.c +++ b/common/init/board_init.c @@ -29,32 +29,39 @@ __weak void arch_setup_gd(struct global_data *gd_ptr) } #endif /* !CONFIG_X86 */
-ulong board_init_f_mem(ulong top) +ulong board_init_f_gd_size(void) +{ + return roundup(sizeof(struct global_data), 16); +} + +void board_init_f_gd(struct global_data *gd_ptr) { - struct global_data *gd_ptr; #ifndef _USE_MEMCPY int *ptr; #endif
- /* Leave space for the stack we are running with now */ - top -= 0x40; - - top -= sizeof(struct global_data); - top = ALIGN(top, 16); - gd_ptr = (struct global_data *)top; #ifdef _USE_MEMCPY memset(gd_ptr, '\0', sizeof(*gd)); #else for (ptr = (int *)gd_ptr; ptr < (int *)(gd_ptr + 1); ) *ptr++ = 0; #endif - arch_setup_gd(gd_ptr); +}
+ulong board_init_f_malloc_size(void) +{ #if defined(CONFIG_SYS_MALLOC_F) && \ - (!defined(CONFIG_SPL_BUILD) || !defined(CONFIG_SYS_SPL_MALLOC_START)) - top -= CONFIG_SYS_MALLOC_F_LEN; - gd->malloc_base = top; + (!defined(CONFIG_SPL_BUILD) || !defined(CONFIG_SYS_SPL_MALLOC_START)) + return CONFIG_SYS_MALLOC_F_LEN; +#else + return 0; #endif +}
- return top; +void board_init_f_malloc(ulong malloc_base) +{ +#if defined(CONFIG_SYS_MALLOC_F) && \ + (!defined(CONFIG_SPL_BUILD) || !defined(CONFIG_SYS_SPL_MALLOC_START)) + gd->malloc_base = malloc_base; +#endif } diff --git a/include/common.h b/include/common.h index 09a131d..dbc2808 100644 --- a/include/common.h +++ b/include/common.h @@ -225,32 +225,44 @@ void board_init_f(ulong); void board_init_r(gd_t *, ulong) __attribute__ ((noreturn));
/** - * board_init_f_mem() - Allocate global data and set stack position + * board_init_f_gd_size() - Return the size of the global data * * This function is called by each architecture very early in the start-up - * code to set up the environment for board_init_f(). It allocates space for - * global_data (see include/asm-generic/global_data.h) and places the stack - * below this. + * code to allow the C runtime to reserve space for the GD on the stack. + * + * @return: GD size + */ +ulong board_init_f_gd_size(void); + +/** + * board_init_f_gd() - Initialize (zero) the global data * - * This function requires a stack[1] Normally this is at @top. The function - * starts allocating space from 64 bytes below @top. First it creates space - * for global_data. Then it calls arch_setup_gd() which sets gd to point to - * the global_data space and can reserve additional bytes of space if - * required). Finally it allocates early malloc() memory - * (CONFIG_SYS_MALLOC_F_LEN). The new top of the stack is just below this, - * and it returned by this function. + * This function is called once the C runtime has allocated the GD on + * the stack. It must initialize the GD. + * + * @gd_ptr: the pointer to the GD to initialize + */ +void board_init_f_gd(struct global_data *gd_ptr); + +/** + * board_init_f_malloc_size() - Return the size of the malloc arena * - * [1] Strictly speaking it would be possible to implement this function - * in C on many archs such that it does not require a stack. However this - * does not seem hugely important as only 64 byte are wasted. The 64 bytes - * are used to handle the calling standard which generally requires pushing - * addresses or registers onto the stack. We should be able to get away with - * less if this becomes important. + * This function is called once the GD is initialized, to allow the C + * runtime to allocate the malloc arena on the stack. + * + * @return: Malloc arena size + */ +ulong board_init_f_malloc_size(void); + +/** + * board_init_f_malloc() - Mark the malloc arena base in the global data * - * @top: Top of available memory, also normally the top of the stack - * @return: New stack location + * This function is called once the malloc arena is allocated on the + * stack. It marks the malloc arena base in the global data. + * + * @malloc_base: the base of the malloc arena */ -ulong board_init_f_mem(ulong top); +ulong board_init_f_malloc(ulong malloc_base);
/** * arch_setup_gd() - Set up the global_data pointer

Hi Albert,
On 2015年11月10日 08:20, Albert ARIBAUD wrote:
board_init_f_mem() alters the C runtime environment's stack it ls actually already using. This is not a valid C runtime environment and may conflict with the C compiler's expectations.
Split board_init_f_mem into C functions which do not alter their own stack and therefore function in a valid C runtime environment.
NOTE: this has not been tested with all architectures.
Signed-off-by: Albert ARIBAUD albert.u.boot@aribaud.net
arch/arc/lib/start.S | 20 +++++++++++++--- arch/arm/lib/crt0.S | 10 ++++++-- arch/arm/lib/crt0_64.S | 12 +++++++--- arch/microblaze/cpu/start.S | 4 ++-- arch/nios2/cpu/start.S | 17 ++++++++++++-- arch/powerpc/cpu/ppc4xx/start.S | 18 ++++++++++---- arch/x86/cpu/start.S | 10 ++++++-- arch/x86/lib/fsp/fsp_common.c | 2 +- common/init/board_init.c | 33 +++++++++++++++----------- include/common.h | 52 +++++++++++++++++++++++++---------------- 10 files changed, 126 insertions(+), 52 deletions(-)
Apply the patch.
$ git am "/work/tmp/[U-Boot] [PATCH] Fix board init code to use a valid C runtime environment.eml" Applying: Fix board init code to use a valid C runtime environment /work/nios2-linux/u-boot/.git/rebase-apply/patch:91: trailing whitespace. /work/nios2-linux/u-boot/.git/rebase-apply/patch:292: trailing whitespace. * /work/nios2-linux/u-boot/.git/rebase-apply/patch:309: trailing whitespace. * /work/nios2-linux/u-boot/.git/rebase-apply/patch:325: trailing whitespace. * /work/nios2-linux/u-boot/.git/rebase-apply/patch:337: trailing whitespace. * warning: 5 lines add whitespace errors.
Make.
LD arch/nios2/lib/built-in.o CC common/init/board_init.o common/init/board_init.c:61:6: error: conflicting types for 'board_init_f_malloc' void board_init_f_malloc(ulong malloc_base) ^ In file included from common/init/board_init.c:10:0: include/common.h:265:7: note: previous declaration of 'board_init_f_malloc' was here ulong board_init_f_malloc(ulong malloc_base); ^ scripts/Makefile.build:277: recipe for target 'common/init/board_init.o' failed make[2]: *** [common/init/board_init.o] Error 1 scripts/Makefile.build:422: recipe for target 'common/init' failed make[1]: *** [common/init] Error 2 Makefile:1205: recipe for target 'common' failed make: *** [common] Error 2
Fix common.h.
diff --git a/include/common.h b/include/common.h index dbc2808..7eb3ba8 100644 --- a/include/common.h +++ b/include/common.h @@ -262,7 +262,7 @@ ulong board_init_f_malloc_size(void); * * @malloc_base: the base of the malloc arena */ -ulong board_init_f_malloc(ulong malloc_base); +void board_init_f_malloc(ulong malloc_base);
/** * arch_setup_gd() - Set up the global_data pointer
Make again.
LDS u-boot.lds LD u-boot arch/nios2/cpu/start.o: In function `_reloc': /work/nios2-linux/u-boot/arch/nios2/cpu/start.S:111: undefined reference to `board_init_gd_size' /work/nios2-linux/u-boot/arch/nios2/cpu/start.S:120: undefined reference to `board_init_malloc_size' Makefile:1187: recipe for target 'u-boot' failed make: *** [u-boot] Error 1
Fix start.S.
diff --git a/arch/nios2/cpu/start.S b/arch/nios2/cpu/start.S index 4d5f0b0..c163ce1 100644 --- a/arch/nios2/cpu/start.S +++ b/arch/nios2/cpu/start.S @@ -108,7 +108,7 @@ _reloc:
/* Allocate and zero GD, update SP */ movhi r2, %hi(board_init_f_gd_size@h) - ori r2, r2, %lo(board_init_gd_size@h) + ori r2, r2, %lo(board_init_f_gd_size@h) callr r2 sub sp, sp, r4 mov r4, sp @@ -117,7 +117,7 @@ _reloc: callr r2 /* Allocate malloc arena, update SP */ movhi r2, %hi(board_init_f_malloc_size@h) - ori r2, r2, %lo(board_init_malloc_size@h) + ori r2, r2, %lo(board_init_f_malloc_size@h) callr r2 sub sp, sp, r4 mov r4, sp
Make and run, but failed to boot. I will gdb and tell you the trace later.
Best regards, Thomas

Hello Thomas,
On Tue, 10 Nov 2015 09:14:01 +0800, Thomas Chou thomas@wytron.com.tw wrote:
Hi Albert,
On 2015年11月10日 08:20, Albert ARIBAUD wrote:
board_init_f_mem() alters the C runtime environment's stack it ls actually already using. This is not a valid C runtime environment and may conflict with the C compiler's expectations.
Split board_init_f_mem into C functions which do not alter their own stack and therefore function in a valid C runtime environment.
NOTE: this has not been tested with all architectures.
Signed-off-by: Albert ARIBAUD albert.u.boot@aribaud.net
arch/arc/lib/start.S | 20 +++++++++++++--- arch/arm/lib/crt0.S | 10 ++++++-- arch/arm/lib/crt0_64.S | 12 +++++++--- arch/microblaze/cpu/start.S | 4 ++-- arch/nios2/cpu/start.S | 17 ++++++++++++-- arch/powerpc/cpu/ppc4xx/start.S | 18 ++++++++++---- arch/x86/cpu/start.S | 10 ++++++-- arch/x86/lib/fsp/fsp_common.c | 2 +- common/init/board_init.c | 33 +++++++++++++++----------- include/common.h | 52 +++++++++++++++++++++++++---------------- 10 files changed, 126 insertions(+), 52 deletions(-)
Apply the patch.
$ git am "/work/tmp/[U-Boot] [PATCH] Fix board init code to use a valid C runtime environment.eml" Applying: Fix board init code to use a valid C runtime environment /work/nios2-linux/u-boot/.git/rebase-apply/patch:91: trailing whitespace.
/work/nios2-linux/u-boot/.git/rebase-apply/patch:292: trailing whitespace.
/work/nios2-linux/u-boot/.git/rebase-apply/patch:309: trailing whitespace.
/work/nios2-linux/u-boot/.git/rebase-apply/patch:325: trailing whitespace.
/work/nios2-linux/u-boot/.git/rebase-apply/patch:337: trailing whitespace.
warning: 5 lines add whitespace errors.
Make.
LD arch/nios2/lib/built-in.o CC common/init/board_init.o common/init/board_init.c:61:6: error: conflicting types for 'board_init_f_malloc' void board_init_f_malloc(ulong malloc_base) ^ In file included from common/init/board_init.c:10:0: include/common.h:265:7: note: previous declaration of 'board_init_f_malloc' was here ulong board_init_f_malloc(ulong malloc_base); ^ scripts/Makefile.build:277: recipe for target 'common/init/board_init.o' failed make[2]: *** [common/init/board_init.o] Error 1 scripts/Makefile.build:422: recipe for target 'common/init' failed make[1]: *** [common/init] Error 2 Makefile:1205: recipe for target 'common' failed make: *** [common] Error 2
Fix common.h.
diff --git a/include/common.h b/include/common.h index dbc2808..7eb3ba8 100644 --- a/include/common.h +++ b/include/common.h @@ -262,7 +262,7 @@ ulong board_init_f_malloc_size(void);
- @malloc_base: the base of the malloc arena
*/ -ulong board_init_f_malloc(ulong malloc_base); +void board_init_f_malloc(ulong malloc_base);
/**
- arch_setup_gd() - Set up the global_data pointer
Make again.
LDS u-boot.lds LD u-boot arch/nios2/cpu/start.o: In function `_reloc': /work/nios2-linux/u-boot/arch/nios2/cpu/start.S:111: undefined reference to `board_init_gd_size' /work/nios2-linux/u-boot/arch/nios2/cpu/start.S:120: undefined reference to `board_init_malloc_size' Makefile:1187: recipe for target 'u-boot' failed make: *** [u-boot] Error 1
Fix start.S.
diff --git a/arch/nios2/cpu/start.S b/arch/nios2/cpu/start.S index 4d5f0b0..c163ce1 100644 --- a/arch/nios2/cpu/start.S +++ b/arch/nios2/cpu/start.S @@ -108,7 +108,7 @@ _reloc:
/* Allocate and zero GD, update SP */ movhi r2, %hi(board_init_f_gd_size@h)
- ori r2, r2, %lo(board_init_gd_size@h)
- ori r2, r2, %lo(board_init_f_gd_size@h) callr r2 sub sp, sp, r4 mov r4, sp
@@ -117,7 +117,7 @@ _reloc: callr r2 /* Allocate malloc arena, update SP */ movhi r2, %hi(board_init_f_malloc_size@h)
- ori r2, r2, %lo(board_init_malloc_size@h)
- ori r2, r2, %lo(board_init_f_malloc_size@h) callr r2 sub sp, sp, r4 mov r4, sp
Make and run, but failed to boot. I will gdb and tell you the trace later.
I'll fix the general part ASAP -- I did a first run of the patch on ARM then generalized it afterward but I had to rush to remain in the merge window, which explains the header bunder, sorry.
Regarding the NIOS part, I did it by copying the ARM code and guessing the NIOS asm language, but I am no expert (and have no NIOS HW) - so apologies for this, and thanks for your help!
Best regards, Thomas
Amicalement,

Hi Albert,
On Tue, Nov 10, 2015 at 8:20 AM, Albert ARIBAUD albert.u.boot@aribaud.net wrote:
board_init_f_mem() alters the C runtime environment's stack it ls actually already using. This is not a valid C runtime environment and may conflict with the C compiler's expectations.
What C compiler's expectations conflicts with current board_init_f_mem()? Did you see any actual error on any board?
Split board_init_f_mem into C functions which do not alter their own stack and therefore function in a valid C runtime environment.
NOTE: this has not been tested with all architectures.
Signed-off-by: Albert ARIBAUD albert.u.boot@aribaud.net
[snip]
Regards, Bin

Hello Bin,
On Tue, 10 Nov 2015 11:54:08 +0800, Bin Meng bmeng.cn@gmail.com wrote:
Hi Albert,
On Tue, Nov 10, 2015 at 8:20 AM, Albert ARIBAUD albert.u.boot@aribaud.net wrote:
board_init_f_mem() alters the C runtime environment's stack it ls actually already using. This is not a valid C runtime environment and may conflict with the C compiler's expectations.
What C compiler's expectations conflicts with current board_init_f_mem()? Did you see any actual error on any board?
I did not see any error directly caused by board_init_f_mem() manipulating its own stack and approximating its own usage of it in the process --just like, if you'll allow me this analogy, I saw quite a few worn tires but never saw personally a car crashing because of that; I do however change my own car's tires when they're worn. It's not a question of fixing an /failure/, is a question of fixing an /obvious/ /risk/.
Back to board_init_f_mem(): it is a C function, therefore, the calling context provides a working C environment for it and expects it to not abuse that environment; and once the function has returned, the calling context may do whatever it likes with this environment as long as it does not trash any currently running function's stack use and does not lose the function's return value.
This is somewhat akin to 'programming by contract': if any side (calling or callee) breaks the contract, anything could happen. The C environment provides a writable stack hopefully large enough to accommodate the function's local storage needs, but nothing says where that stack comes from or where it will go. The C stack could even be at a different location for every C function and still respect the caller vs callee contract.
Now, board_init_f_mem:
The 64 bytes reserve at the start of board_init_f_mem() does in fact acknowledge a breach of contract: it is an ad hoc fix to the fact that the C stack passed to board_init_f_mem() is actually the one used by its prologue for local storage before the first instruction is hit.
And this value is arbitrary: for now it works, but there is no guarantee that it will work for every architecture and configuration. Worse, add a few local variables and it /will/ not work any more.
And as I hinted, you don't know what the caller of board_init_f_mem might do on the stack when it gets control back, and this may include trashing whatever board_init_f_mem() put there -- Yes, right /now/ we *do* know; but later on we /might not/ necessarily.
IOW, the current "contract" between board_init_f_mem() and its caller is unique, diverges from the usual C contract, and is plain /weird/. which implies that whenever modifying either the caller or callee, one has to keep that divergent and unusual contract in mind, and pray that the compiler's own (standard) view won't interfere (see aside below).
So it's best if we keep our code compliant with the assumptions of the C run-time that the C stack is provided to the /compiler/ of a function for local variable storage and temporary register saves, not to the function itself to work on.
[ASIDE]
Unrelated to this patch (at least unrelated what board_init_f_mem does), here is an example of assuming the compiler will behave one way and being bitten hard when it does not.
in gcc, option -ffixed-reg directs the compiler to leave register 'reg' alone so that it keeps its value and can be used as some sort of system- wide global variable. In ARM, we use -ffixed-r9 (because the ARM EABI says that of all registers, r9 is the one to be used as that system-wide global), and r9 points to our GD. This is efficient, and compliant with the 'contract' which defines how '--fixed-reg' works. As long as the compiler respects it, we're fine.
Previously, r9 was set to point to GD outside the C environment. Then came the arch_setup_gd() function which did it from inside the C env, and that went fine -- after all, that function was the only one which did affected r9 since in all C functions the /compiler/ wad told to leave r9 alone by -ffixed-r9, and the user code only touched it in one place.
Now comes gcc 5.2.1. It does not break the contract: in ARM builds, nothing changes, and in Thumb-1 builds, the only change is that r9 may be saved on entry to the function and then restored on exit. This still leaves r9 alone as far as user code is concerned (I haven't done ad hoc testing, but the test I've done shows so, see below).
But this breaks arch_setup_gd() in Thumb-1 builds, because now, on entry it saves r9, then t sets it to GD, then... it restores it.
Once I 'fixed' this by making arch_setup_gd an empty-body function and setting r9 from crt0.S outside the C environment execution, the Thumb-1 build of U-Boot on Open-RD worked: everywhere GD was accessed by user code, r9 was correct, despite just about any C function pushing and popping r9 all the time.
The break of arch_setup_gd() was because of how strict we and the GCC folks interpret(ed) the 'leave reg alone if -ffixed-reg is given' contract.
With arch_setup_gd(), we went from stricty assuming 'reg is constant from the user code perspective' to less strictly assuming 'reg is constant for both user and compiler-generated code'.
The GCC folks kept strictly assuming the former, and their ARM and Thumb-1 code generations both respect their constant assumption; but only their ARM code generation respects ours in gcc 5.2.1.
Had we always assumed the strictest definition for the -ffixed-reg contract, arch_setup_gd() would not have broken with GCC 5.2.1 Thumb-1 (because it would not have been written in C, due to the fact that by definition a C function cannot alter the fixed register).
[END OF ASIDE]
That is the same with board_init_f_mem() here: we are not in control of how the compiler generates a call sequence or the prologue or epilogue, especially across all architectures and compiler options, and for instance we can't tell if there is a configuration of compiler options which breaks the 64-byte assumption.
We /could/ assume that a C function altering its own stack is OK as long as we are careful; but we've just seen with the r9 aside that "careful" may not always be careful enough.
Or we can strictly assume that a C function altering its own stack is *not* OK, and do all stack alterations outside any C function. We can still leave size computations and data initializations in C functions where they are clearer and easier to maintain within the constraints of the C runtime.
Think of it as discovering some out-of-spec value written to a register's reserved area which apparently cause no harm (and possibly even were copied from code issued by the manufacturer) because the reserved area is not used. Then comes a new version of the silicon where the area is still described as reserved but for which the value has now a functional effect. Boom. And then (or, more probably, after the few hours it will take to finally find out that the cause of the boom was the out-of-spec value) you'll wish that you had fixed that out-of-spec value years before.
Same here: we have an out-of-spec situation; let's make it in-spec.
Amicalement,

Hi Albert,
On 9 November 2015 at 17:20, Albert ARIBAUD albert.u.boot@aribaud.net wrote:
board_init_f_mem() alters the C runtime environment's stack it ls actually already using. This is not a valid C runtime environment and may conflict with the C compiler's expectations.
Split board_init_f_mem into C functions which do not alter their own stack and therefore function in a valid C runtime environment.
NOTE: this has not been tested with all architectures.
Signed-off-by: Albert ARIBAUD albert.u.boot@aribaud.net
arch/arc/lib/start.S | 20 +++++++++++++--- arch/arm/lib/crt0.S | 10 ++++++-- arch/arm/lib/crt0_64.S | 12 +++++++--- arch/microblaze/cpu/start.S | 4 ++-- arch/nios2/cpu/start.S | 17 ++++++++++++-- arch/powerpc/cpu/ppc4xx/start.S | 18 ++++++++++---- arch/x86/cpu/start.S | 10 ++++++-- arch/x86/lib/fsp/fsp_common.c | 2 +- common/init/board_init.c | 33 +++++++++++++++----------- include/common.h | 52 +++++++++++++++++++++++++---------------- 10 files changed, 126 insertions(+), 52 deletions(-)
diff --git a/arch/arc/lib/start.S b/arch/arc/lib/start.S index 26a5934..d24d60b 100644 --- a/arch/arc/lib/start.S +++ b/arch/arc/lib/start.S @@ -54,14 +54,28 @@ ENTRY(_start) mov %sp, CONFIG_SYS_INIT_SP_ADDR mov %fp, %sp
/* Allocate and zero GD, update SP */
/* Allocate GD, update SP */ mov %r0, %sp
bl board_init_f_mem
bl board_init_f_gd_size
/* Update stack- and frame-pointers */
sub %sp, %sp, %r0
mov %fp, %sp
/* Zero GD */
mov %r0, %sp
bl board_init_f_gd
/* Allocate malloc, update SP */
mov %r0, %sp
bl board_init_f_malloc_size /* Update stack- and frame-pointers */
mov %sp, %r0
sub %sp, %sp, %r0 mov %fp, %sp
/* update GD with malloc base */
mov %r0, %sp
bl board_init_f_malloc
I'm hoping we don't have to bring back this assembler. Do we really need to put everything in separate functions?
Perhaps you could add a bit more detail in the commit message as to what problem this solves?
/* Zero the one and only argument of "board_init_f" */ mov_s %r0, 0 j board_init_f
diff --git a/arch/arm/lib/crt0.S b/arch/arm/lib/crt0.S index 80548eb..26abab7 100644 --- a/arch/arm/lib/crt0.S +++ b/arch/arm/lib/crt0.S @@ -82,9 +82,15 @@ ENTRY(_main) #else bic sp, sp, #7 /* 8-byte alignment for ABI compliance */ #endif
bl board_init_f_gd_size
sub sp, r0 mov r0, sp
bl board_init_f_mem
mov sp, r0
mov r9, r0
bl board_init_f_gd
bl board_init_f_malloc_size
sub sp, r0
mov r0, sp
bl board_init_f_malloc mov r0, #0 bl board_init_f
diff --git a/arch/arm/lib/crt0_64.S b/arch/arm/lib/crt0_64.S index cef1c71..e716889 100644 --- a/arch/arm/lib/crt0_64.S +++ b/arch/arm/lib/crt0_64.S @@ -75,9 +75,15 @@ ENTRY(_main) ldr x0, =(CONFIG_SYS_INIT_SP_ADDR) #endif bic sp, x0, #0xf /* 16-byte alignment for ABI compliance */
bl board_init_f_mem
mov sp, x0
bl board_init_f_gd_size
sub sp, x0
mov x0, sp
bl board_init_f_gd
bl board_init_f_malloc_size
sub sp, x0
mov x0, sp
bl board_init_f_malloc
mov x0, #0 bl board_init_f
diff --git a/arch/microblaze/cpu/start.S b/arch/microblaze/cpu/start.S index 14f46a8..206be3e 100644 --- a/arch/microblaze/cpu/start.S +++ b/arch/microblaze/cpu/start.S @@ -25,7 +25,7 @@ _start:
addi r8, r0, __end mts rslr, r8
/* TODO: Redo this code to call board_init_f_mem() */
/* TODO: Redo this code to call board_init_f_*() */
#if defined(CONFIG_SPL_BUILD) addi r1, r0, CONFIG_SPL_STACK_ADDR mts rshr, r1 @@ -142,7 +142,7 @@ _start: ori r12, r12, 0x1a0 mts rmsr, r12
/* TODO: Redo this code to call board_init_f_mem() */
/* TODO: Redo this code to call board_init_f_*() */
clear_bss: /* clear BSS segments */ addi r5, r0, __bss_start diff --git a/arch/nios2/cpu/start.S b/arch/nios2/cpu/start.S index 54787c5..4d5f0b0 100644 --- a/arch/nios2/cpu/start.S +++ b/arch/nios2/cpu/start.S @@ -107,9 +107,22 @@ _reloc: mov fp, sp
/* Allocate and zero GD, update SP */
movhi r2, %hi(board_init_f_gd_size@h)
ori r2, r2, %lo(board_init_gd_size@h)
callr r2
sub sp, sp, r4
mov r4, sp
movhi r2, %hi(board_init_f_gd@h)
ori r2, r2, %lo(board_init_f_gd@h)
callr r2
/* Allocate malloc arena, update SP */
movhi r2, %hi(board_init_f_malloc_size@h)
ori r2, r2, %lo(board_init_malloc_size@h)
callr r2
sub sp, sp, r4 mov r4, sp
movhi r2, %hi(board_init_f_mem@h)
ori r2, r2, %lo(board_init_f_mem@h)
movhi r2, %hi(board_init_f_malloc@h)
ori r2, r2, %lo(board_init_f_malloc@h) callr r2 /* Update stack- and frame-pointers */
diff --git a/arch/powerpc/cpu/ppc4xx/start.S b/arch/powerpc/cpu/ppc4xx/start.S index 77d4040..c827c95 100644 --- a/arch/powerpc/cpu/ppc4xx/start.S +++ b/arch/powerpc/cpu/ppc4xx/start.S @@ -761,9 +761,14 @@ _start:
bl cpu_init_f /* run low-level CPU init code (from Flash) */
#ifdef CONFIG_SYS_GENERIC_BOARD
bl board_init_f_gd_size
sub r1, r1, r3 mr r3, r1
bl board_init_f_mem
mr r1, r3
bl board_init_f_gd
bl board_init_f_malloc_size
sub r1, r1, r3
mr r3, r1
bl board_init_f_malloc li r0,0 stwu r0, -4(r1) stwu r0, -4(r1)
@@ -1037,9 +1042,14 @@ _start:
bl cpu_init_f /* run low-level CPU init code (from Flash) */
#ifdef CONFIG_SYS_GENERIC_BOARD
bl board_init_f_gd_size
sub r1, r1, r3
mr r3, r1
bl board_init_f_gd
bl board_init_f_malloc_size
sub r1, r1, r3 mr r3, r1
bl board_init_f_mem
mr r1, r3
bl board_init_f_malloc stwu r0, -4(r1) stwu r0, -4(r1)
#endif diff --git a/arch/x86/cpu/start.S b/arch/x86/cpu/start.S index 5b4ee79..25ac286 100644 --- a/arch/x86/cpu/start.S +++ b/arch/x86/cpu/start.S @@ -122,9 +122,15 @@ car_init_ret: */ #endif /* Set up global data */
call board_init_f_gd_size
subl %esp, %eax mov %esp, %eax
call board_init_f_mem
mov %eax, %esp
call board_init_f_gd
/* Set up malloc arena */
call board_init_f_malloc_size
subl %esp, %eax
mov %esp, %eax
call board_init_f_malloc
#ifdef CONFIG_DEBUG_UART call debug_uart_init diff --git a/arch/x86/lib/fsp/fsp_common.c b/arch/x86/lib/fsp/fsp_common.c index c78df94..cd7fd86 100644 --- a/arch/x86/lib/fsp/fsp_common.c +++ b/arch/x86/lib/fsp/fsp_common.c @@ -95,7 +95,7 @@ int x86_fsp_init(void) /* * The second time we enter here, adjust the size of malloc() * pool before relocation. Given gd->malloc_base was adjusted
* after the call to board_init_f_mem() in arch/x86/cpu/start.S,
* after the call to board_init_f_malloc() in arch/x86/cpu/start.S, * we should fix up gd->malloc_limit here. */ gd->malloc_limit += CONFIG_FSP_SYS_MALLOC_F_LEN;
diff --git a/common/init/board_init.c b/common/init/board_init.c index e74b63b..1313138 100644 --- a/common/init/board_init.c +++ b/common/init/board_init.c @@ -29,32 +29,39 @@ __weak void arch_setup_gd(struct global_data *gd_ptr) } #endif /* !CONFIG_X86 */
-ulong board_init_f_mem(ulong top) +ulong board_init_f_gd_size(void) +{
return roundup(sizeof(struct global_data), 16);
+}
+void board_init_f_gd(struct global_data *gd_ptr) {
struct global_data *gd_ptr;
#ifndef _USE_MEMCPY int *ptr; #endif
/* Leave space for the stack we are running with now */
top -= 0x40;
top -= sizeof(struct global_data);
top = ALIGN(top, 16);
gd_ptr = (struct global_data *)top;
#ifdef _USE_MEMCPY memset(gd_ptr, '\0', sizeof(*gd)); #else for (ptr = (int *)gd_ptr; ptr < (int *)(gd_ptr + 1); ) *ptr++ = 0; #endif
arch_setup_gd(gd_ptr);
+}
+ulong board_init_f_malloc_size(void) +{ #if defined(CONFIG_SYS_MALLOC_F) && \
(!defined(CONFIG_SPL_BUILD) || !defined(CONFIG_SYS_SPL_MALLOC_START))
top -= CONFIG_SYS_MALLOC_F_LEN;
gd->malloc_base = top;
- (!defined(CONFIG_SPL_BUILD) || !defined(CONFIG_SYS_SPL_MALLOC_START))
return CONFIG_SYS_MALLOC_F_LEN;
+#else
return 0;
#endif +}
return top;
+void board_init_f_malloc(ulong malloc_base) +{ +#if defined(CONFIG_SYS_MALLOC_F) && \
- (!defined(CONFIG_SPL_BUILD) || !defined(CONFIG_SYS_SPL_MALLOC_START))
gd->malloc_base = malloc_base;
+#endif } diff --git a/include/common.h b/include/common.h index 09a131d..dbc2808 100644 --- a/include/common.h +++ b/include/common.h @@ -225,32 +225,44 @@ void board_init_f(ulong); void board_init_r(gd_t *, ulong) __attribute__ ((noreturn));
/**
- board_init_f_mem() - Allocate global data and set stack position
- board_init_f_gd_size() - Return the size of the global data
- This function is called by each architecture very early in the start-up
- code to set up the environment for board_init_f(). It allocates space for
- global_data (see include/asm-generic/global_data.h) and places the stack
- below this.
- code to allow the C runtime to reserve space for the GD on the stack.
- @return: GD size
- */
+ulong board_init_f_gd_size(void);
+/**
- board_init_f_gd() - Initialize (zero) the global data
- This function requires a stack[1] Normally this is at @top. The function
- starts allocating space from 64 bytes below @top. First it creates space
- for global_data. Then it calls arch_setup_gd() which sets gd to point to
- the global_data space and can reserve additional bytes of space if
- required). Finally it allocates early malloc() memory
- (CONFIG_SYS_MALLOC_F_LEN). The new top of the stack is just below this,
- and it returned by this function.
- This function is called once the C runtime has allocated the GD on
- the stack. It must initialize the GD.
- @gd_ptr: the pointer to the GD to initialize
- */
+void board_init_f_gd(struct global_data *gd_ptr);
+/**
- board_init_f_malloc_size() - Return the size of the malloc arena
- [1] Strictly speaking it would be possible to implement this function
- in C on many archs such that it does not require a stack. However this
- does not seem hugely important as only 64 byte are wasted. The 64 bytes
- are used to handle the calling standard which generally requires pushing
- addresses or registers onto the stack. We should be able to get away with
- less if this becomes important.
- This function is called once the GD is initialized, to allow the C
- runtime to allocate the malloc arena on the stack.
- @return: Malloc arena size
- */
+ulong board_init_f_malloc_size(void);
+/**
- board_init_f_malloc() - Mark the malloc arena base in the global data
- @top: Top of available memory, also normally the top of the stack
- @return: New stack location
- This function is called once the malloc arena is allocated on the
- stack. It marks the malloc arena base in the global data.
*/
- @malloc_base: the base of the malloc arena
-ulong board_init_f_mem(ulong top); +ulong board_init_f_malloc(ulong malloc_base);
/**
- arch_setup_gd() - Set up the global_data pointer
-- 2.5.0
U-Boot mailing list U-Boot@lists.denx.de http://lists.denx.de/mailman/listinfo/u-boot
Regards, Simon

Hello Simon,
On Tue, 10 Nov 2015 13:04:50 -0700, Simon Glass sjg@chromium.org wrote:
I'm hoping we don't have to bring back this assembler.
What we need in ASM is just the part that actually alters the stack (i.e., changes the SP of the architecture) so that no C function has to do it.
Do we really need to put everything in separate functions?
We need the (non-C) allocation code to be told the size to allocate and to pass the base address of the allocated space, so that makes two calls per allocated space, and there's GD and malloc, so four calls.
Now, I created one function per call, but there are ways to use less than four functions.
I could have specified the allocations sizes directly in crt0.S, but I agree that all we can do in C should be done in C, so I kept the size calculations in C functions.
We could also reduce the score to one function for GD and one for malloc as follows: instead of calling one ulong(*)(void) to get the size and one void(*)(void*) to initialize the allocated space, we could call one ulong(*)(void*) twice, first with the void* equal to NULL (and the function then returns the size it needs allocated) and second with the void* equal to the base address of the allocated space (and the function can then initialize it).
We could even reduce this to one single ulong(int chunk_id, void*) where chunk_id would be either CHUNK_GD or CHUNK_MALLOC), and the function would return the right size or do the right initializations for the chunk considered. Plus, if a new type of chunk should be allocated in addition to GD and the malloc arena, this could be handled by adding a new chunk_id and adding cases in the alloc function for size computation and chunk initialization.
Perhaps you could add a bit more detail in the commit message as to what problem this solves?
Will do in v3.
Regards, Simon
Amicalement,
participants (4)
-
Albert ARIBAUD
-
Bin Meng
-
Simon Glass
-
Thomas Chou