[U-Boot] [PATCH] ARM: crt0: Pass malloc base address

From: Fabio Estevam fabio.estevam@freescale.com
Commit 5ba534d247d418 ("arm: Switch 32-bit ARM to using generic global_data setup") causes malloc() to fail in SPL.
The reason is that the GD_MALLOC_BASE is not passed anymore.
Restore the code that passes malloc base so that we can have malloc working in SPL code again.
Signed-off-by: Fabio Estevam fabio.estevam@freescale.com --- arch/arm/lib/crt0.S | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/arch/arm/lib/crt0.S b/arch/arm/lib/crt0.S index 80548eb..d620126 100644 --- a/arch/arm/lib/crt0.S +++ b/arch/arm/lib/crt0.S @@ -87,6 +87,10 @@ ENTRY(_main) mov sp, r0
mov r0, #0 +#if defined(CONFIG_SYS_MALLOC_F_LEN) + sub sp, sp, #CONFIG_SYS_MALLOC_F_LEN + str sp, [r9, #GD_MALLOC_BASE] +#endif bl board_init_f
#if ! defined(CONFIG_SPL_BUILD)

Hi Fabio,
On 11 November 2015 at 13:23, Fabio Estevam festevam@gmail.com wrote:
From: Fabio Estevam fabio.estevam@freescale.com
Commit 5ba534d247d418 ("arm: Switch 32-bit ARM to using generic global_data setup") causes malloc() to fail in SPL.
The reason is that the GD_MALLOC_BASE is not passed anymore.
Restore the code that passes malloc base so that we can have malloc working in SPL code again.
Signed-off-by: Fabio Estevam fabio.estevam@freescale.com
arch/arm/lib/crt0.S | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/arch/arm/lib/crt0.S b/arch/arm/lib/crt0.S index 80548eb..d620126 100644 --- a/arch/arm/lib/crt0.S +++ b/arch/arm/lib/crt0.S @@ -87,6 +87,10 @@ ENTRY(_main) mov sp, r0
mov r0, #0
+#if defined(CONFIG_SYS_MALLOC_F_LEN)
sub sp, sp, #CONFIG_SYS_MALLOC_F_LEN
str sp, [r9, #GD_MALLOC_BASE]
+#endif bl board_init_f
#if ! defined(CONFIG_SPL_BUILD)
1.9.1
Thanks for digging into this. But this should be set up in board_init_f_mem():
#if defined(CONFIG_SYS_MALLOC_F) && \ (!defined(CONFIG_SPL_BUILD) || !defined(CONFIG_SYS_SPL_MALLOC_START)) top -= CONFIG_SYS_MALLOC_F_LEN; gd->malloc_base = top; #endif
Is it possible that the #ifdef logic is wrong for your board?
Regardds, Simon

Hi Simon,
On Wed, Nov 11, 2015 at 6:26 PM, Simon Glass sjg@chromium.org wrote:
Thanks for digging into this. But this should be set up in board_init_f_mem():
#if defined(CONFIG_SYS_MALLOC_F) && \ (!defined(CONFIG_SPL_BUILD) || !defined(CONFIG_SYS_SPL_MALLOC_START)) top -= CONFIG_SYS_MALLOC_F_LEN; gd->malloc_base = top; #endif
Is it possible that the #ifdef logic is wrong for your board?
Good point. Looks like this is the problem indeed.
I have manually removed the #ifdef logic just for testing:
--- a/common/init/board_init.c +++ b/common/init/board_init.c @@ -50,11 +50,8 @@ ulong board_init_f_mem(ulong top) #endif arch_setup_gd(gd_ptr);
-#if defined(CONFIG_SYS_MALLOC_F) && \ - (!defined(CONFIG_SPL_BUILD) || !defined(CONFIG_SYS_SPL_MALLOC_START)) top -= CONFIG_SYS_MALLOC_F_LEN; gd->malloc_base = top; -#endif
return top; }
,and then malloc() works fine in SPL.
Doing a make mx6sabresd_spl_defconfig I get: CONFIG_SYS_MALLOC_F=y
For SPL_BUILD I get in menuconfig:
Symbol: SPL_BUILD [=SPL_BUILD] Type : unknown
and CONFIG_SYS_SPL_MALLOC_START does not exist.
Regards,
Fabio Estevam

Hi Simon and Albert,
On Wed, Nov 11, 2015 at 6:41 PM, Fabio Estevam festevam@gmail.com wrote:
Hi Simon,
On Wed, Nov 11, 2015 at 6:26 PM, Simon Glass sjg@chromium.org wrote:
Thanks for digging into this. But this should be set up in board_init_f_mem():
#if defined(CONFIG_SYS_MALLOC_F) && \ (!defined(CONFIG_SPL_BUILD) || !defined(CONFIG_SYS_SPL_MALLOC_START)) top -= CONFIG_SYS_MALLOC_F_LEN; gd->malloc_base = top; #endif
Is it possible that the #ifdef logic is wrong for your board?
Good point. Looks like this is the problem indeed.
I have manually removed the #ifdef logic just for testing:
--- a/common/init/board_init.c +++ b/common/init/board_init.c @@ -50,11 +50,8 @@ ulong board_init_f_mem(ulong top) #endif arch_setup_gd(gd_ptr);
-#if defined(CONFIG_SYS_MALLOC_F) && \
(!defined(CONFIG_SPL_BUILD) || !defined(CONFIG_SYS_SPL_MALLOC_START)) top -= CONFIG_SYS_MALLOC_F_LEN; gd->malloc_base = top;
-#endif
return top;
}
,and then malloc() works fine in SPL.
If I change the logic like this then malloc() works:
--- a/common/init/board_init.c +++ b/common/init/board_init.c @@ -51,7 +51,7 @@ ulong board_init_f_mem(ulong top) arch_setup_gd(gd_ptr);
#if defined(CONFIG_SYS_MALLOC_F) && \ - (!defined(CONFIG_SPL_BUILD) || !defined(CONFIG_SYS_SPL_MALLOC_START)) + (defined(CONFIG_SPL_BUILD) || !defined(CONFIG_SYS_SPL_MALLOC_START)) top -= CONFIG_SYS_MALLOC_F_LEN; gd->malloc_base = top; #endif
Shouldn't we test for defined(CONFIG_SPL_BUILD) instead of !defined(CONFIG_SPL_BUILD)?
Thanks

Hi Fabio,
On 11 November 2015 at 14:00, Fabio Estevam festevam@gmail.com wrote:
Hi Simon and Albert,
On Wed, Nov 11, 2015 at 6:41 PM, Fabio Estevam festevam@gmail.com wrote:
Hi Simon,
On Wed, Nov 11, 2015 at 6:26 PM, Simon Glass sjg@chromium.org wrote:
Thanks for digging into this. But this should be set up in board_init_f_mem():
#if defined(CONFIG_SYS_MALLOC_F) && \ (!defined(CONFIG_SPL_BUILD) || !defined(CONFIG_SYS_SPL_MALLOC_START)) top -= CONFIG_SYS_MALLOC_F_LEN; gd->malloc_base = top; #endif
Is it possible that the #ifdef logic is wrong for your board?
Good point. Looks like this is the problem indeed.
I have manually removed the #ifdef logic just for testing:
--- a/common/init/board_init.c +++ b/common/init/board_init.c @@ -50,11 +50,8 @@ ulong board_init_f_mem(ulong top) #endif arch_setup_gd(gd_ptr);
-#if defined(CONFIG_SYS_MALLOC_F) && \
(!defined(CONFIG_SPL_BUILD) || !defined(CONFIG_SYS_SPL_MALLOC_START)) top -= CONFIG_SYS_MALLOC_F_LEN; gd->malloc_base = top;
-#endif
return top;
}
,and then malloc() works fine in SPL.
If I change the logic like this then malloc() works:
--- a/common/init/board_init.c +++ b/common/init/board_init.c @@ -51,7 +51,7 @@ ulong board_init_f_mem(ulong top) arch_setup_gd(gd_ptr);
#if defined(CONFIG_SYS_MALLOC_F) && \
(!defined(CONFIG_SPL_BUILD) || !defined(CONFIG_SYS_SPL_MALLOC_START))
(defined(CONFIG_SPL_BUILD) || !defined(CONFIG_SYS_SPL_MALLOC_START)) top -= CONFIG_SYS_MALLOC_F_LEN; gd->malloc_base = top;
#endif
Shouldn't we test for defined(CONFIG_SPL_BUILD) instead of !defined(CONFIG_SPL_BUILD)?
That test is intended to avoid setting up simple malloc() if we plan to use full malloc() in SPL. Of course, full malloc() is set up a little later (in spl_init()). But we should not need both - either we use simple malloc() or full malloc().
But for your board I see:
$ grep CONFIG_SYS_SPL_MALLOC_SIZE b/mx6sabresd_spl/u-boot.cfg #define CONFIG_SYS_SPL_MALLOC_SIZE 0x3200000
So you will not be able to use simple malloc(). I'd suggest calling spl_init() from board_init_f() if you need malloc() there. But it presumably needs to be done after you have SDRAM up. So perhaps consider removing some things from board_init_f() and put them in spl_board init()?
Regards, Simon

Hi Simon,
On Wed, Nov 11, 2015 at 7:08 PM, Simon Glass sjg@chromium.org wrote:
That test is intended to avoid setting up simple malloc() if we plan to use full malloc() in SPL. Of course, full malloc() is set up a little later (in spl_init()). But we should not need both - either we use simple malloc() or full malloc().
But for your board I see:
$ grep CONFIG_SYS_SPL_MALLOC_SIZE b/mx6sabresd_spl/u-boot.cfg #define CONFIG_SYS_SPL_MALLOC_SIZE 0x3200000
So you will not be able to use simple malloc(). I'd suggest calling spl_init() from board_init_f() if you need malloc() there. But it
Yes, I do call spl_init() from board_init_f() prior to calling malloc() and this has been working fine prior to 5ba534d247d418.
presumably needs to be done after you have SDRAM up. So perhaps
The trick here is that I need malloc to work prior to have SDRAM configured.
On cgtqmx6eval we need to read SPI NOR to determine what kind of DDR we will need to configure.
Otavio has sent the SPL support for cgtqmx6eval, but it has not reached U-boot mainline yet.
I am reproducing the problem on a mx6sabresd_spl target with the simple example code I sent previously.
Regards,
Fabio Estevam

Hi Fabio,
On 11 November 2015 at 14:24, Fabio Estevam festevam@gmail.com wrote:
Hi Simon,
On Wed, Nov 11, 2015 at 7:08 PM, Simon Glass sjg@chromium.org wrote:
That test is intended to avoid setting up simple malloc() if we plan to use full malloc() in SPL. Of course, full malloc() is set up a little later (in spl_init()). But we should not need both - either we use simple malloc() or full malloc().
But for your board I see:
$ grep CONFIG_SYS_SPL_MALLOC_SIZE b/mx6sabresd_spl/u-boot.cfg #define CONFIG_SYS_SPL_MALLOC_SIZE 0x3200000
So you will not be able to use simple malloc(). I'd suggest calling spl_init() from board_init_f() if you need malloc() there. But it
Yes, I do call spl_init() from board_init_f() prior to calling malloc() and this has been working fine prior to 5ba534d247d418.
presumably needs to be done after you have SDRAM up. So perhaps
The trick here is that I need malloc to work prior to have SDRAM configured.
On cgtqmx6eval we need to read SPI NOR to determine what kind of DDR we will need to configure.
Otavio has sent the SPL support for cgtqmx6eval, but it has not reached U-boot mainline yet.
I am reproducing the problem on a mx6sabresd_spl target with the simple example code I sent previously.
I see. That's not a use case I have seen before.
I'd suggest changing the #ifdef to always set up early malloc() if CONFIG_SYS_MALLOC_F is set. There will of course be a new malloc() once spi_init() is called, but that does not matter.
In your patch, please be careful to add some docs to the README on this point (the early malloc() is only there to permit DRAM init, etc.). It could get quite confusing...
Regards, Simon

Hello Simon,
On Wed, 11 Nov 2015 14:49:05 -0700, Simon Glass sjg@chromium.org wrote:
Hi Fabio,
On 11 November 2015 at 14:24, Fabio Estevam festevam@gmail.com wrote:
Hi Simon,
On Wed, Nov 11, 2015 at 7:08 PM, Simon Glass sjg@chromium.org wrote:
That test is intended to avoid setting up simple malloc() if we plan to use full malloc() in SPL. Of course, full malloc() is set up a little later (in spl_init()). But we should not need both - either we use simple malloc() or full malloc().
But for your board I see:
$ grep CONFIG_SYS_SPL_MALLOC_SIZE b/mx6sabresd_spl/u-boot.cfg #define CONFIG_SYS_SPL_MALLOC_SIZE 0x3200000
So you will not be able to use simple malloc(). I'd suggest calling spl_init() from board_init_f() if you need malloc() there. But it
Yes, I do call spl_init() from board_init_f() prior to calling malloc() and this has been working fine prior to 5ba534d247d418.
presumably needs to be done after you have SDRAM up. So perhaps
The trick here is that I need malloc to work prior to have SDRAM configured.
On cgtqmx6eval we need to read SPI NOR to determine what kind of DDR we will need to configure.
Otavio has sent the SPL support for cgtqmx6eval, but it has not reached U-boot mainline yet.
I am reproducing the problem on a mx6sabresd_spl target with the simple example code I sent previously.
I see. That's not a use case I have seen before.
I'd suggest changing the #ifdef to always set up early malloc() if CONFIG_SYS_MALLOC_F is set. There will of course be a new malloc() once spi_init() is called, but that does not matter.
In your patch, please be careful to add some docs to the README on this point (the early malloc() is only there to permit DRAM init, etc.). It could get quite confusing...
I'm not sure all details are clear to me so let me chime in on this.
IIUC, what Fabio needs is a working C runtime including malloc, based on some IRAM rather than SDRAM.
This means there will be two phases where malloc can be used, the usual one after SDRAM init, and a new one before SDRAM init.
Of course, the amount of memory reserved for the malloc arena cannot be the same in IRAM as it will be in SDRAM, meaning that the routine which reserves the arena must handle both cases, by detecting whether it is running before or after SDRAM init and choosing the right malloc size macro, or by just bein passed that info from the code responsible for maintaining the C runtime environment (arch/arm/lib/crt0.C in the ARM case) -- via the 'chunk_id' method I described earlier in https://www.mail-archive.com/u-boot@lists.denx.de/msg191898.html
There is a *VERY* *BIG* *PROBLEM* to the whole idea of malloc-before-SDRAM:
We could manage to transfer the pre-SDRAM-init maloc arena content to the post-SDRAM-init malloc arena, thus preserving all pre- SDRAM-init mallocs() across the stack switch.
*BUT*: all (variable-stored) pointers to pre-SDRAM-malloc'ed space will become dangling pointers, because we have no easy way of telling where these pointers are and relocating them.
This means that the mallocs() done before SDRAM init are lost and that al pointers to them /will/ become dangling, including any copies and derivatives of these; all code which depend these pointers and their will have to handle the situation, which will prove *quite* complex or, to be blunter, will prove not actually done, and will prove so through weird bugs creeping up that we'll take some time to relate to malloc issues, and to fix.
Of course, we could just decide that any malloc done before SDRAM init is lost and that user code should deal with it. But I fear it will do so just as badly as it would handle the "maloc transfer" alternative I just described. Plus, if the malloc'ed data contains hardware module state or any non-recoverable-again information, then we *cannot* just drop it.
Now, there is a systemic solution: that all code which stores a malloc-arena-based pointer value in memory call a system-provided function which, if running pre-SDRAM-init, would record the location of the pointer in a list; upon C environment switch from pre- to post-SDRAM contexts, the list would be run through and each pointer would be 'relocated' from the old to the new maloc arena. Post-init, the functon would not do any recording. That causes a slight performance hit on mallocs, but I don't think it'll affect boot time that much.
Only code which has to run before SDRAM init would have to use the function.
The benefit of this approach is that maloc'ed space would remain malloc'ed after SDRAM init and declared pointers to it would remain valid. Code which has to malloc before SDRAM init would not have to re-malloc or fix anything, or even realize the stack and malloc arena have moved, as long as it has declared its malloc-related pointer(s).
Of course the list would be limited. But seeing as we'd be running in a tight context and only to get SDRAM running, the list would not extend much.
I imagine a scheme where the list is kept in the malloc arena. The malloc space itself would still grow 'down' from the stack top while the pointer list would grow 'up' from the stack 'bottom' limit.
I could whip up an RFC patch (with ARM support, to be extended to other platforms as my recent experience showed I'm not that good at NIOS2 for instance) if people are interested.
Regards, Simon
Amicalement,

Hi Albert,
On Thu, Nov 12, 2015 at 4:57 AM, Albert ARIBAUD albert.u.boot@aribaud.net wrote:
I could whip up an RFC patch (with ARM support, to be extended to other platforms as my recent experience showed I'm not that good at NIOS2 for instance) if people are interested.
I would be glad to test such patch, thanks.

Hi Albert,
On 11 November 2015 at 23:57, Albert ARIBAUD albert.u.boot@aribaud.net wrote:
Hello Simon,
On Wed, 11 Nov 2015 14:49:05 -0700, Simon Glass sjg@chromium.org wrote:
Hi Fabio,
On 11 November 2015 at 14:24, Fabio Estevam festevam@gmail.com wrote:
Hi Simon,
On Wed, Nov 11, 2015 at 7:08 PM, Simon Glass sjg@chromium.org wrote:
That test is intended to avoid setting up simple malloc() if we plan to use full malloc() in SPL. Of course, full malloc() is set up a little later (in spl_init()). But we should not need both - either we use simple malloc() or full malloc().
But for your board I see:
$ grep CONFIG_SYS_SPL_MALLOC_SIZE b/mx6sabresd_spl/u-boot.cfg #define CONFIG_SYS_SPL_MALLOC_SIZE 0x3200000
So you will not be able to use simple malloc(). I'd suggest calling spl_init() from board_init_f() if you need malloc() there. But it
Yes, I do call spl_init() from board_init_f() prior to calling malloc() and this has been working fine prior to 5ba534d247d418.
presumably needs to be done after you have SDRAM up. So perhaps
The trick here is that I need malloc to work prior to have SDRAM configured.
On cgtqmx6eval we need to read SPI NOR to determine what kind of DDR we will need to configure.
Otavio has sent the SPL support for cgtqmx6eval, but it has not reached U-boot mainline yet.
I am reproducing the problem on a mx6sabresd_spl target with the simple example code I sent previously.
I see. That's not a use case I have seen before.
I'd suggest changing the #ifdef to always set up early malloc() if CONFIG_SYS_MALLOC_F is set. There will of course be a new malloc() once spi_init() is called, but that does not matter.
In your patch, please be careful to add some docs to the README on this point (the early malloc() is only there to permit DRAM init, etc.). It could get quite confusing...
I'm not sure all details are clear to me so let me chime in on this.
IIUC, what Fabio needs is a working C runtime including malloc, based on some IRAM rather than SDRAM.
This means there will be two phases where malloc can be used, the usual one after SDRAM init, and a new one before SDRAM init.
Of course, the amount of memory reserved for the malloc arena cannot be the same in IRAM as it will be in SDRAM, meaning that the routine which reserves the arena must handle both cases, by detecting whether it is running before or after SDRAM init and choosing the right malloc size macro, or by just bein passed that info from the code responsible for maintaining the C runtime environment (arch/arm/lib/crt0.C in the ARM case) -- via the 'chunk_id' method I described earlier in https://www.mail-archive.com/u-boot@lists.denx.de/msg191898.html
There is a *VERY* *BIG* *PROBLEM* to the whole idea of malloc-before-SDRAM:
We could manage to transfer the pre-SDRAM-init maloc arena content to the post-SDRAM-init malloc arena, thus preserving all pre- SDRAM-init mallocs() across the stack switch.
*BUT*: all (variable-stored) pointers to pre-SDRAM-malloc'ed space will become dangling pointers, because we have no easy way of telling where these pointers are and relocating them.
This means that the mallocs() done before SDRAM init are lost and that al pointers to them /will/ become dangling, including any copies and derivatives of these; all code which depend these pointers and their will have to handle the situation, which will prove *quite* complex or, to be blunter, will prove not actually done, and will prove so through weird bugs creeping up that we'll take some time to relate to malloc issues, and to fix.
Of course, we could just decide that any malloc done before SDRAM init is lost and that user code should deal with it. But I fear it will do so just as badly as it would handle the "maloc transfer" alternative I just described. Plus, if the malloc'ed data contains hardware module state or any non-recoverable-again information, then we *cannot* just drop it.
Now, there is a systemic solution: that all code which stores a malloc-arena-based pointer value in memory call a system-provided function which, if running pre-SDRAM-init, would record the location of the pointer in a list; upon C environment switch from pre- to post-SDRAM contexts, the list would be run through and each pointer would be 'relocated' from the old to the new maloc arena. Post-init, the functon would not do any recording. That causes a slight performance hit on mallocs, but I don't think it'll affect boot time that much.
Only code which has to run before SDRAM init would have to use the function.
The benefit of this approach is that maloc'ed space would remain malloc'ed after SDRAM init and declared pointers to it would remain valid. Code which has to malloc before SDRAM init would not have to re-malloc or fix anything, or even realize the stack and malloc arena have moved, as long as it has declared its malloc-related pointer(s).
Of course the list would be limited. But seeing as we'd be running in a tight context and only to get SDRAM running, the list would not extend much.
I imagine a scheme where the list is kept in the malloc arena. The malloc space itself would still grow 'down' from the stack top while the pointer list would grow 'up' from the stack 'bottom' limit.
I could whip up an RFC patch (with ARM support, to be extended to other platforms as my recent experience showed I'm not that good at NIOS2 for instance) if people are interested.
I'm really not sure that this problem is worth solving.
We have quite a hard boundary between board_init_f() and board_init_r() in U-Boot proper. For some platforms the memory available in the former is not available in the latter. People understand (I think) that pre-relocation stuff goes away and exists only to assist with bringing up the new environment.
Driver model is started up in board_init_f() but then the whole thing is thrown away and started again in board_init_r(). So if someone kept a device pointer around for later (e.g. in global_data) it would not work (Don't Do That!). This approach has not caused any reported problems yet.
While you can copy memory and fix up pointers returned by malloc(), pointers to rodata and other things may still exist. It would be tedious or error-prone to relocate these.
My preference would be to keep it simple, and just make it clear that board_init_f() may have an early malloc(), but even if it does it goes away in board_init_r(). That rule can apply to SPL just as easily as U-Boot proper.
Regards, Simon

Hello Simon,
On Thu, 12 Nov 2015 08:59:54 -0700, Simon Glass sjg@chromium.org wrote:
Hi Albert,
On 11 November 2015 at 23:57, Albert ARIBAUD albert.u.boot@aribaud.net wrote:
Hello Simon,
On Wed, 11 Nov 2015 14:49:05 -0700, Simon Glass sjg@chromium.org wrote:
Hi Fabio,
On 11 November 2015 at 14:24, Fabio Estevam festevam@gmail.com wrote:
Hi Simon,
On Wed, Nov 11, 2015 at 7:08 PM, Simon Glass sjg@chromium.org wrote:
That test is intended to avoid setting up simple malloc() if we plan to use full malloc() in SPL. Of course, full malloc() is set up a little later (in spl_init()). But we should not need both - either we use simple malloc() or full malloc().
But for your board I see:
$ grep CONFIG_SYS_SPL_MALLOC_SIZE b/mx6sabresd_spl/u-boot.cfg #define CONFIG_SYS_SPL_MALLOC_SIZE 0x3200000
So you will not be able to use simple malloc(). I'd suggest calling spl_init() from board_init_f() if you need malloc() there. But it
Yes, I do call spl_init() from board_init_f() prior to calling malloc() and this has been working fine prior to 5ba534d247d418.
presumably needs to be done after you have SDRAM up. So perhaps
The trick here is that I need malloc to work prior to have SDRAM configured.
On cgtqmx6eval we need to read SPI NOR to determine what kind of DDR we will need to configure.
Otavio has sent the SPL support for cgtqmx6eval, but it has not reached U-boot mainline yet.
I am reproducing the problem on a mx6sabresd_spl target with the simple example code I sent previously.
I see. That's not a use case I have seen before.
I'd suggest changing the #ifdef to always set up early malloc() if CONFIG_SYS_MALLOC_F is set. There will of course be a new malloc() once spi_init() is called, but that does not matter.
In your patch, please be careful to add some docs to the README on this point (the early malloc() is only there to permit DRAM init, etc.). It could get quite confusing...
I'm not sure all details are clear to me so let me chime in on this.
IIUC, what Fabio needs is a working C runtime including malloc, based on some IRAM rather than SDRAM.
This means there will be two phases where malloc can be used, the usual one after SDRAM init, and a new one before SDRAM init.
Of course, the amount of memory reserved for the malloc arena cannot be the same in IRAM as it will be in SDRAM, meaning that the routine which reserves the arena must handle both cases, by detecting whether it is running before or after SDRAM init and choosing the right malloc size macro, or by just bein passed that info from the code responsible for maintaining the C runtime environment (arch/arm/lib/crt0.C in the ARM case) -- via the 'chunk_id' method I described earlier in https://www.mail-archive.com/u-boot@lists.denx.de/msg191898.html
There is a *VERY* *BIG* *PROBLEM* to the whole idea of malloc-before-SDRAM:
We could manage to transfer the pre-SDRAM-init maloc arena content to the post-SDRAM-init malloc arena, thus preserving all pre- SDRAM-init mallocs() across the stack switch.
*BUT*: all (variable-stored) pointers to pre-SDRAM-malloc'ed space will become dangling pointers, because we have no easy way of telling where these pointers are and relocating them.
This means that the mallocs() done before SDRAM init are lost and that al pointers to them /will/ become dangling, including any copies and derivatives of these; all code which depend these pointers and their will have to handle the situation, which will prove *quite* complex or, to be blunter, will prove not actually done, and will prove so through weird bugs creeping up that we'll take some time to relate to malloc issues, and to fix.
Of course, we could just decide that any malloc done before SDRAM init is lost and that user code should deal with it. But I fear it will do so just as badly as it would handle the "maloc transfer" alternative I just described. Plus, if the malloc'ed data contains hardware module state or any non-recoverable-again information, then we *cannot* just drop it.
Now, there is a systemic solution: that all code which stores a malloc-arena-based pointer value in memory call a system-provided function which, if running pre-SDRAM-init, would record the location of the pointer in a list; upon C environment switch from pre- to post-SDRAM contexts, the list would be run through and each pointer would be 'relocated' from the old to the new maloc arena. Post-init, the functon would not do any recording. That causes a slight performance hit on mallocs, but I don't think it'll affect boot time that much.
Only code which has to run before SDRAM init would have to use the function.
The benefit of this approach is that maloc'ed space would remain malloc'ed after SDRAM init and declared pointers to it would remain valid. Code which has to malloc before SDRAM init would not have to re-malloc or fix anything, or even realize the stack and malloc arena have moved, as long as it has declared its malloc-related pointer(s).
Of course the list would be limited. But seeing as we'd be running in a tight context and only to get SDRAM running, the list would not extend much.
I imagine a scheme where the list is kept in the malloc arena. The malloc space itself would still grow 'down' from the stack top while the pointer list would grow 'up' from the stack 'bottom' limit.
I could whip up an RFC patch (with ARM support, to be extended to other platforms as my recent experience showed I'm not that good at NIOS2 for instance) if people are interested.
I'm really not sure that this problem is worth solving.
We have quite a hard boundary between board_init_f() and board_init_r() in U-Boot proper. For some platforms the memory available in the former is not available in the latter. People understand (I think) that pre-relocation stuff goes away and exists only to assist with bringing up the new environment.
(I fear that half the question is what people understand, and then the other half is what they think they understand and have not -- here, since malloc and DM is available before and after SDRAM init, people might think it is just the *same* DM across the transition -- or, eventually, want it to be.)
Driver model is started up in board_init_f() but then the whole thing is thrown away and started again in board_init_r(). So if someone kept a device pointer around for later (e.g. in global_data) it would not work (Don't Do That!). This approach has not caused any reported problems yet.
Isn't it a potential problem that the driver model is started (and therefore some devices intialized) and then thrown away? Some devices might not like (or even permit) being initialized twice, or that may cause glitches, for instance.
While you can copy memory and fix up pointers returned by malloc(), pointers to rodata and other things may still exist. It would be tedious or error-prone to relocate these.
rodata does not move during SDRAM init (and the stack swap we're considering here). It moves during U-Boot relocation though; and yes, pointers to ro data pre-relocation will become stale after relocation, but this has been the case even way before DM existed.
My preference would be to keep it simple, and just make it clear that board_init_f() may have an early malloc(), but even if it does it goes away in board_init_r(). That rule can apply to SPL just as easily as U-Boot proper.
IMO, keeping it simple does not play well with making DM and malloc available pre-SDRAM init :) --- but don't miscontrue my meaning: I'm fine with DM and malloc pre-SDRAM on platforms which have enough IRAM / SRAM / lockable cache / whatever for it. My point is just that sooner or later, someone will start wanting not to do again after SDRAM init what they've already done before it, because it avoids double HW init issues, becaue it saves time, etc. And that's going to be way sooner than later, IMO.
Regards, Simon
Amicalement,

Hi Albert,
On 12 November 2015 at 09:13, Albert ARIBAUD albert.u.boot@aribaud.net wrote:
Hello Simon,
On Thu, 12 Nov 2015 08:59:54 -0700, Simon Glass sjg@chromium.org wrote:
Hi Albert,
On 11 November 2015 at 23:57, Albert ARIBAUD albert.u.boot@aribaud.net wrote:
Hello Simon,
On Wed, 11 Nov 2015 14:49:05 -0700, Simon Glass sjg@chromium.org wrote:
Hi Fabio,
On 11 November 2015 at 14:24, Fabio Estevam festevam@gmail.com wrote:
Hi Simon,
On Wed, Nov 11, 2015 at 7:08 PM, Simon Glass sjg@chromium.org wrote:
That test is intended to avoid setting up simple malloc() if we plan to use full malloc() in SPL. Of course, full malloc() is set up a little later (in spl_init()). But we should not need both - either we use simple malloc() or full malloc().
But for your board I see:
$ grep CONFIG_SYS_SPL_MALLOC_SIZE b/mx6sabresd_spl/u-boot.cfg #define CONFIG_SYS_SPL_MALLOC_SIZE 0x3200000
So you will not be able to use simple malloc(). I'd suggest calling spl_init() from board_init_f() if you need malloc() there. But it
Yes, I do call spl_init() from board_init_f() prior to calling malloc() and this has been working fine prior to 5ba534d247d418.
presumably needs to be done after you have SDRAM up. So perhaps
The trick here is that I need malloc to work prior to have SDRAM configured.
On cgtqmx6eval we need to read SPI NOR to determine what kind of DDR we will need to configure.
Otavio has sent the SPL support for cgtqmx6eval, but it has not reached U-boot mainline yet.
I am reproducing the problem on a mx6sabresd_spl target with the simple example code I sent previously.
I see. That's not a use case I have seen before.
I'd suggest changing the #ifdef to always set up early malloc() if CONFIG_SYS_MALLOC_F is set. There will of course be a new malloc() once spi_init() is called, but that does not matter.
In your patch, please be careful to add some docs to the README on this point (the early malloc() is only there to permit DRAM init, etc.). It could get quite confusing...
I'm not sure all details are clear to me so let me chime in on this.
IIUC, what Fabio needs is a working C runtime including malloc, based on some IRAM rather than SDRAM.
This means there will be two phases where malloc can be used, the usual one after SDRAM init, and a new one before SDRAM init.
Of course, the amount of memory reserved for the malloc arena cannot be the same in IRAM as it will be in SDRAM, meaning that the routine which reserves the arena must handle both cases, by detecting whether it is running before or after SDRAM init and choosing the right malloc size macro, or by just bein passed that info from the code responsible for maintaining the C runtime environment (arch/arm/lib/crt0.C in the ARM case) -- via the 'chunk_id' method I described earlier in https://www.mail-archive.com/u-boot@lists.denx.de/msg191898.html
There is a *VERY* *BIG* *PROBLEM* to the whole idea of malloc-before-SDRAM:
We could manage to transfer the pre-SDRAM-init maloc arena content to the post-SDRAM-init malloc arena, thus preserving all pre- SDRAM-init mallocs() across the stack switch.
*BUT*: all (variable-stored) pointers to pre-SDRAM-malloc'ed space will become dangling pointers, because we have no easy way of telling where these pointers are and relocating them.
This means that the mallocs() done before SDRAM init are lost and that al pointers to them /will/ become dangling, including any copies and derivatives of these; all code which depend these pointers and their will have to handle the situation, which will prove *quite* complex or, to be blunter, will prove not actually done, and will prove so through weird bugs creeping up that we'll take some time to relate to malloc issues, and to fix.
Of course, we could just decide that any malloc done before SDRAM init is lost and that user code should deal with it. But I fear it will do so just as badly as it would handle the "maloc transfer" alternative I just described. Plus, if the malloc'ed data contains hardware module state or any non-recoverable-again information, then we *cannot* just drop it.
Now, there is a systemic solution: that all code which stores a malloc-arena-based pointer value in memory call a system-provided function which, if running pre-SDRAM-init, would record the location of the pointer in a list; upon C environment switch from pre- to post-SDRAM contexts, the list would be run through and each pointer would be 'relocated' from the old to the new maloc arena. Post-init, the functon would not do any recording. That causes a slight performance hit on mallocs, but I don't think it'll affect boot time that much.
Only code which has to run before SDRAM init would have to use the function.
The benefit of this approach is that maloc'ed space would remain malloc'ed after SDRAM init and declared pointers to it would remain valid. Code which has to malloc before SDRAM init would not have to re-malloc or fix anything, or even realize the stack and malloc arena have moved, as long as it has declared its malloc-related pointer(s).
Of course the list would be limited. But seeing as we'd be running in a tight context and only to get SDRAM running, the list would not extend much.
I imagine a scheme where the list is kept in the malloc arena. The malloc space itself would still grow 'down' from the stack top while the pointer list would grow 'up' from the stack 'bottom' limit.
I could whip up an RFC patch (with ARM support, to be extended to other platforms as my recent experience showed I'm not that good at NIOS2 for instance) if people are interested.
I'm really not sure that this problem is worth solving.
We have quite a hard boundary between board_init_f() and board_init_r() in U-Boot proper. For some platforms the memory available in the former is not available in the latter. People understand (I think) that pre-relocation stuff goes away and exists only to assist with bringing up the new environment.
(I fear that half the question is what people understand, and then the other half is what they think they understand and have not -- here, since malloc and DM is available before and after SDRAM init, people might think it is just the *same* DM across the transition -- or, eventually, want it to be.)
Possibly, but we can add documentation in the code to correct this view.
Driver model is started up in board_init_f() but then the whole thing is thrown away and started again in board_init_r(). So if someone kept a device pointer around for later (e.g. in global_data) it would not work (Don't Do That!). This approach has not caused any reported problems yet.
Isn't it a potential problem that the driver model is started (and therefore some devices intialized) and then thrown away? Some devices might not like (or even permit) being initialized twice, or that may cause glitches, for instance.
Yes. This is called out in the driver model README for example. It hasn't caused any problems yet though.
While you can copy memory and fix up pointers returned by malloc(), pointers to rodata and other things may still exist. It would be tedious or error-prone to relocate these.
rodata does not move during SDRAM init (and the stack swap we're considering here). It moves during U-Boot relocation though; and yes, pointers to ro data pre-relocation will become stale after relocation, but this has been the case even way before DM existed.
Conceptually I am trying to make board_init_f() and board_init_r() mean the same thing in SPL and U-Boot proper. That is, the SDRAM init happens in board_init_f() and there is an early malloc() available then. Once you get to board_init_r() you have a new malloc() which sits in SDRAM. For both SPL and U-Boot proper I don't think the boundary should be when SDRAM is inited, but when board_init_r() is called.
My preference would be to keep it simple, and just make it clear that board_init_f() may have an early malloc(), but even if it does it goes away in board_init_r(). That rule can apply to SPL just as easily as U-Boot proper.
IMO, keeping it simple does not play well with making DM and malloc available pre-SDRAM init :) --- but don't miscontrue my meaning: I'm fine with DM and malloc pre-SDRAM on platforms which have enough IRAM / SRAM / lockable cache / whatever for it. My point is just that sooner or later, someone will start wanting not to do again after SDRAM init what they've already done before it, because it avoids double HW init issues, becaue it saves time, etc. And that's going to be way sooner than later, IMO.
Yes that is quite possibility true. I'm happy to review any patches if it helps. My comment about it possibly not being worthwhile solving is just that. Things have a way of changing, so what looks unnecessary now might look essential tomorrow. In a way, the simple malloc() fits into that category.
Regards, Simon

Hello Simon,
On Fri, 13 Nov 2015 19:12:13 -0700, Simon Glass sjg@chromium.org wrote:
IMO, keeping it simple does not play well with making DM and malloc available pre-SDRAM init :) --- but don't miscontrue my meaning: I'm fine with DM and malloc pre-SDRAM on platforms which have enough IRAM / SRAM / lockable cache / whatever for it. My point is just that sooner or later, someone will start wanting not to do again after SDRAM init what they've already done before it, because it avoids double HW init issues, becaue it saves time, etc. And that's going to be way sooner than later, IMO.
Yes that is quite possibility true. I'm happy to review any patches if it helps. My comment about it possibly not being worthwhile solving is just that. Things have a way of changing, so what looks unnecessary now might look essential tomorrow. In a way, the simple malloc() fits into that category.
And so evolve the uses of DM, of the DT, etc.
Alright, so I will post my RFC and read your comments with much interest!
Regards, Simon
Amicalement,

Hello Fabio,
On Wed, 11 Nov 2015 18:23:17 -0200, Fabio Estevam festevam@gmail.com wrote:
From: Fabio Estevam fabio.estevam@freescale.com
Commit 5ba534d247d418 ("arm: Switch 32-bit ARM to using generic global_data setup") causes malloc() to fail in SPL.
The reason is that the GD_MALLOC_BASE is not passed anymore.
This is the explanation of the SPL fail, but not the /cause/ of it. IOW, why is GD_MALLOC_BASE not passed any more, and what function or routine is it not passed any more to?
Restore the code that passes malloc base so that we can have malloc working in SPL code again.
[...]
+#if defined(CONFIG_SYS_MALLOC_F_LEN)
- sub sp, sp, #CONFIG_SYS_MALLOC_F_LEN
- str sp, [r9, #GD_MALLOC_BASE]
+#endif
NAK, as this only papers over the actual issue. Board_init_f_mem should have set the malloc base in GD. Therefore, rather than doing it again later, we must determine why it was not properly done earlier.11111
Can you give me the toolchain version, board name and commit ID that I could use to reproduce the *faulty* build and check the generated code?
Amicalement,

Hi Albert,
On Wed, Nov 11, 2015 at 6:33 PM, Albert ARIBAUD albert.u.boot@aribaud.net wrote:
+#if defined(CONFIG_SYS_MALLOC_F_LEN)
sub sp, sp, #CONFIG_SYS_MALLOC_F_LEN
str sp, [r9, #GD_MALLOC_BASE]
+#endif
NAK, as this only papers over the actual issue. Board_init_f_mem should have set the malloc base in GD. Therefore, rather than doing it again later, we must determine why it was not properly done earlier.11111
Can you give me the toolchain version, board name and commit ID that I could use to reproduce the *faulty* build and check the generated code?
Sure, I am testing top of head U-boot (cad049907). Target is mx6sabresd_spl_defconfig.
Toolchain is: arm-linux-gnueabi-gcc (Ubuntu/Linaro 4.7.3-12ubuntu1) 4.7.3
In order to reproduce the malloc failure, please apply this patch against mainline:
--- a/board/freescale/mx6sabresd/mx6sabresd.c +++ b/board/freescale/mx6sabresd/mx6sabresd.c @@ -30,6 +30,7 @@ #include "../common/pfuze.h" #include <asm/arch/mx6-ddr.h> #include <usb.h> +#include <malloc.h>
DECLARE_GLOBAL_DATA_PTR;
@@ -833,6 +834,8 @@ static void spl_dram_init(void)
void board_init_f(ulong dummy) { + void __iomem *ptr; + /* setup AIPS and disable watchdog */ arch_cpu_init();
@@ -848,6 +851,12 @@ void board_init_f(ulong dummy) /* UART clocks enabled and gd valid - init serial console */ preloader_console_init();
+ spl_init(); + + ptr = malloc(64); + if (!ptr) + puts("******* malloc returned NULL\n"); + /* DDR initialization */ spl_dram_init();
Also, as I just explained to Simon if I remove the ifdefery like this:
--- a/common/init/board_init.c +++ b/common/init/board_init.c @@ -50,11 +50,8 @@ ulong board_init_f_mem(ulong top) #endif arch_setup_gd(gd_ptr);
-#if defined(CONFIG_SYS_MALLOC_F) && \ - (!defined(CONFIG_SPL_BUILD) || !defined(CONFIG_SYS_SPL_MALLOC_START)) top -= CONFIG_SYS_MALLOC_F_LEN; gd->malloc_base = top; -#endif
return top; }
Then malloc() works fine in SPL.
So it seems I need to find a way to make CONFIG_SPL_BUILD=n or CONFIG_SYS_SPL_MALLOC_START=n.
Thanks,
Fabio Estevam
participants (3)
-
Albert ARIBAUD
-
Fabio Estevam
-
Simon Glass