[U-Boot] [PATCH] futile c relocation attempt

--- arch/arm/cpu/arm926ejs/start.S | 8 ++++- arch/arm/lib/board.c | 57 +++++++++++++++++++++++++++++++++++++++- include/configs/top9000_9xe.h | 1 + 3 files changed, 63 insertions(+), 3 deletions(-)
diff --git a/arch/arm/cpu/arm926ejs/start.S b/arch/arm/cpu/arm926ejs/start.S index 5a7ae7e..b7517db 100644 --- a/arch/arm/cpu/arm926ejs/start.S +++ b/arch/arm/cpu/arm926ejs/start.S @@ -213,7 +213,7 @@ relocate_code: /* Set up the stack */ stack_setup: mov sp, r4 - +#ifndef CONFIG_USE_C_RELOCATION adr r0, _start ldr r2, _TEXT_BASE ldr r3, _bss_start_ofs @@ -266,7 +266,7 @@ fixnext: str r1, [r0] add r2, r2, #8 /* each rel.dyn entry is 8 bytes */ cmp r2, r3 - ble fixloop + blt fixloop #endif #endif /* #ifndef CONFIG_SKIP_RELOCATE_UBOOT */
@@ -288,6 +288,7 @@ clbss_l:str r2, [r0] /* clear loop... */ bl coloured_LED_init bl red_LED_on #endif +#endif /* CONFIG_USE_C_RELOCATION */
/* * We are done. Do not return, instead branch to second part of board @@ -314,10 +315,13 @@ _board_init_r_ofs: .word board_init_r - _start #endif
+.globl _rel_dyn_start_ofs _rel_dyn_start_ofs: .word __rel_dyn_start - _start +.globl _rel_dyn_end_ofs _rel_dyn_end_ofs: .word __rel_dyn_end - _start +.globl _dynsym_start_ofs _dynsym_start_ofs: .word __dynsym_start - _start
diff --git a/arch/arm/lib/board.c b/arch/arm/lib/board.c index e411d93..0b6b3ed 100644 --- a/arch/arm/lib/board.c +++ b/arch/arm/lib/board.c @@ -37,7 +37,7 @@ * IRQ Stack: 00ebff7c * FIQ Stack: 00ebef7c */ - +#define DEBUG #include <common.h> #include <command.h> #include <malloc.h> @@ -50,6 +50,10 @@ #include <onenand_uboot.h> #include <mmc.h>
+#ifdef CONFIG_USE_C_RELOCATION +#include <elf.h> +#endif + #ifdef CONFIG_BITBANGMII #include <miiphy.h> #endif @@ -509,6 +513,15 @@ void board_init_f (ulong bootflag) init_fnc_t **init_fnc_ptr; gd_t *id; ulong addr, addr_sp; +#ifdef CONFIG_USE_C_RELOCATION + extern ulong _dynsym_start_ofs; + extern ulong _rel_dyn_start_ofs; + extern ulong _rel_dyn_end_ofs; + extern ulong _bss_start_ofs; + extern ulong _bss_end_ofs; + Elf32_Rel *rel_dyn_ptr; + ulong *p; +#endif
/* Pointer is writable since we allocated a register for it */ gd = (gd_t *) (CONFIG_SYS_INIT_SP_ADDR); @@ -661,6 +674,48 @@ void board_init_f (ulong bootflag) debug ("relocation Offset is: %08lx\n", gd->reloc_off); memcpy (id, (void *)gd, sizeof (gd_t));
+#ifdef CONFIG_USE_C_RELOCATION + /* TODO: check for identical source and destination */ + /* TODO: check for overlapping */ + /* copy image, including initialized data */ + debug ("memcpy(%08lx,%08lx,%ld)\n", + addr, _TEXT_BASE, _bss_start_ofs); + memcpy ((void *)addr, (void *)_TEXT_BASE, _bss_start_ofs); + /* now fix the code */ + debug ("_dynsym_start_ofs=%08lx _rel_dyn_start_ofs=%08lx _rel_dyn_end_ofs=%08lx\n", + _dynsym_start_ofs, _rel_dyn_start_ofs, _rel_dyn_end_ofs); + for (rel_dyn_ptr = (Elf32_Rel *)(_TEXT_BASE + _rel_dyn_start_ofs); + rel_dyn_ptr < (Elf32_Rel *)(_TEXT_BASE + _rel_dyn_end_ofs); + rel_dyn_ptr++) { + ulong *patchaddr = (ulong *) (rel_dyn_ptr->r_offset + gd->reloc_off); + debug ("patch %08lx : %08lx\n", + (ulong)patchaddr, (ulong)rel_dyn_ptr->r_info); + switch (ELF32_R_TYPE(rel_dyn_ptr->r_info)) { + case 23: /* rel fixup */ + *patchaddr += gd->reloc_off; + break; + case 2: /* abs fixup */ + { + Elf32_Sym *sym = (Elf32_Sym *)(_TEXT_BASE + _dynsym_start_ofs); + sym += ELF32_R_SYM(rel_dyn_ptr->r_info); + *patchaddr = gd->reloc_off + sym->st_value; + } + break; + default: /* unhandled fixup */ + break; + } + } + /* clear BSS */ +# ifndef CONFIG_PRELOADER + debug ("clearing BSS %08lx..%08lx\n", + addr + _bss_start_ofs, addr + _bss_end_ofs); + for (p = (ulong *)(addr + _bss_start_ofs); + p < (ulong *)(addr + _bss_end_ofs); + *p++ = 0) + ; +# endif +#endif + debug ("calling relocate_code\n"); relocate_code (addr_sp, id, addr);
/* NOTREACHED - relocate_code() does not return */ diff --git a/include/configs/top9000_9xe.h b/include/configs/top9000_9xe.h index f7fa198..e4ca026 100644 --- a/include/configs/top9000_9xe.h +++ b/include/configs/top9000_9xe.h @@ -73,6 +73,7 @@ #define CONFIG_SKIP_LOWLEVEL_INIT /*#define CONFIG_SKIP_RELOCATE_UBOOT*/ /*#define CONFIG_SYS_ARM_WITHOUT_RELOC*/ +#define CONFIG_USE_C_RELOCATION #define CONFIG_RELOC_FIXUP_WORKS #define CONFIG_SYS_NO_ICACHE #define CONFIG_SYS_NO_DCACHE

Reinhard Meyer schrieb:
arch/arm/cpu/arm926ejs/start.S | 8 ++++- arch/arm/lib/board.c | 57 +++++++++++++++++++++++++++++++++++++++- include/configs/top9000_9xe.h | 1 + 3 files changed, 63 insertions(+), 3 deletions(-)
PLEASE - this is only for study, not to be applied !
Reinhard

Le 05/10/2010 16:48, Reinhard Meyer a écrit :
include/configs/top9000_9xe.h | 1 +
Can't find this one in my u-boot tree. Did I miss something?
+#ifdef CONFIG_USE_C_RELOCATION
- /* TODO: check for identical source and destination */
- /* TODO: check for overlapping */
- /* copy image, including initialized data */
- debug ("memcpy(%08lx,%08lx,%ld)\n",
addr, _TEXT_BASE, _bss_start_ofs);
- memcpy ((void *)addr, (void *)_TEXT_BASE, _bss_start_ofs);
- /* now fix the code */
- debug ("_dynsym_start_ofs=%08lx _rel_dyn_start_ofs=%08lx _rel_dyn_end_ofs=%08lx\n",
_dynsym_start_ofs, _rel_dyn_start_ofs, _rel_dyn_end_ofs);
- for (rel_dyn_ptr = (Elf32_Rel *)(_TEXT_BASE + _rel_dyn_start_ofs);
rel_dyn_ptr< (Elf32_Rel *)(_TEXT_BASE + _rel_dyn_end_ofs);
rel_dyn_ptr++) {
ulong *patchaddr = (ulong *) (rel_dyn_ptr->r_offset + gd->reloc_off);
debug ("patch %08lx : %08lx\n",
(ulong)patchaddr, (ulong)rel_dyn_ptr->r_info);
switch (ELF32_R_TYPE(rel_dyn_ptr->r_info)) {
case 23: /* rel fixup */
*patchaddr += gd->reloc_off;
break;
case 2: /* abs fixup */
{
Elf32_Sym *sym = (Elf32_Sym *)(_TEXT_BASE + _dynsym_start_ofs);
sym += ELF32_R_SYM(rel_dyn_ptr->r_info);
*patchaddr = gd->reloc_off + sym->st_value;
}
break;
default: /* unhandled fixup */
break;
}
- }
- /* clear BSS */
+# ifndef CONFIG_PRELOADER
- debug ("clearing BSS %08lx..%08lx\n",
addr + _bss_start_ofs, addr + _bss_end_ofs);
- for (p = (ulong *)(addr + _bss_start_ofs);
p< (ulong *)(addr + _bss_end_ofs);
*p++ = 0)
;
+# endif +#endif
All of _bss_start_ofs, _rel_dyn_start_ofs, _rel_dyn_end_ofs, _dynsym_start_ofs and _bss_end_ofs are going to be modified by the relocation fixup loop. The compiler *might* preload them into registers... or not; which could cause issues with ..._end_ofs ones, because it could cause the loops to overflow/underflow.
- debug ("calling relocate_code\n"); relocate_code (addr_sp, id, addr);
Have you not just relocated and fixed up in the C code? If so, relocate_code (which would then become a misnomer) should have its own relocate and fixup code removed (your patch does not), and only perform the C environment (stack, etc) switch.
Side note: if board_init_f handles relocation and fixup and relocate_code just does stack switch between board_init_f and board_init_r, then I'd rather board_init_f *return* to start.S rather than *call* it if that's possible. That would make the control flow less circuitous: begin in start.S, set a minimal stack, call board_init_f, returns to start.S, switch to final stack, calls (branches to) board_init_r.
Amicalement,

Dear Albert ARIBAUD,
include/configs/top9000_9xe.h | 1 +
That's my board. Not in mainstream yet. Just sets the define there.
+#ifdef CONFIG_USE_C_RELOCATION
- /* TODO: check for identical source and destination */
- /* TODO: check for overlapping */
- /* copy image, including initialized data */
- debug ("memcpy(%08lx,%08lx,%ld)\n",
addr, _TEXT_BASE, _bss_start_ofs);
- memcpy ((void *)addr, (void *)_TEXT_BASE, _bss_start_ofs);
- /* now fix the code */
- debug ("_dynsym_start_ofs=%08lx _rel_dyn_start_ofs=%08lx _rel_dyn_end_ofs=%08lx\n",
_dynsym_start_ofs, _rel_dyn_start_ofs, _rel_dyn_end_ofs);
- for (rel_dyn_ptr = (Elf32_Rel *)(_TEXT_BASE + _rel_dyn_start_ofs);
rel_dyn_ptr< (Elf32_Rel *)(_TEXT_BASE + _rel_dyn_end_ofs);
rel_dyn_ptr++) {
ulong *patchaddr = (ulong *) (rel_dyn_ptr->r_offset + gd->reloc_off);
debug ("patch %08lx : %08lx\n",
(ulong)patchaddr, (ulong)rel_dyn_ptr->r_info);
switch (ELF32_R_TYPE(rel_dyn_ptr->r_info)) {
case 23: /* rel fixup */
*patchaddr += gd->reloc_off;
break;
case 2: /* abs fixup */
{
Elf32_Sym *sym = (Elf32_Sym *)(_TEXT_BASE + _dynsym_start_ofs);
sym += ELF32_R_SYM(rel_dyn_ptr->r_info);
*patchaddr = gd->reloc_off + sym->st_value;
}
break;
default: /* unhandled fixup */
break;
}
- }
- /* clear BSS */
+# ifndef CONFIG_PRELOADER
- debug ("clearing BSS %08lx..%08lx\n",
addr + _bss_start_ofs, addr + _bss_end_ofs);
- for (p = (ulong *)(addr + _bss_start_ofs);
p< (ulong *)(addr + _bss_end_ofs);
*p++ = 0)
;
+# endif +#endif
All of _bss_start_ofs, _rel_dyn_start_ofs, _rel_dyn_end_ofs, _dynsym_start_ofs and _bss_end_ofs are going to be modified by the relocation fixup loop. The compiler *might* preload them into registers... or not; which could cause issues with ..._end_ofs ones, because it could cause the loops to overflow/underflow.
They are not going to be modified, they are in the unrelocated location!
- debug ("calling relocate_code\n"); relocate_code (addr_sp, id, addr);
Have you not just relocated and fixed up in the C code? If so, relocate_code (which would then become a misnomer) should have its own relocate and fixup code removed (your patch does not), and only perform the C environment (stack, etc) switch.
Incorrect. I removed the relocation (#ifndef) in start.S leaving only setting of the SP and calling of board_init_r active. True for the misnomer... It was supposed to be a quick hack, beauty comes later ;)
Side note: if board_init_f handles relocation and fixup and relocate_code just does stack switch between board_init_f and board_init_r, then I'd rather board_init_f *return* to start.S rather than *call* it if that's possible. That would make the control flow less circuitous: begin in start.S, set a minimal stack, call board_init_f, returns to start.S, switch to final stack, calls (branches to) board_init_r.
I agree here. I would have board_init_f return the desired SP value, for example, or let it return the gd pointer - something along that line.
Or one completely skips returning back to asm and sets the sp with an asm() statement in "C", obtains a function pointer to board_init_r, adjusts that and calls that function.
Amicalement,
Reinhard

Le 05/10/2010 19:41, Reinhard Meyer a écrit :
Dear Albert ARIBAUD,
include/configs/top9000_9xe.h | 1 +
That's my board. Not in mainstream yet. Just sets the define there.
+#ifdef CONFIG_USE_C_RELOCATION
- /* TODO: check for identical source and destination */
- /* TODO: check for overlapping */
- /* copy image, including initialized data */
- debug ("memcpy(%08lx,%08lx,%ld)\n",
- addr, _TEXT_BASE, _bss_start_ofs);
- memcpy ((void *)addr, (void *)_TEXT_BASE, _bss_start_ofs);
- /* now fix the code */
- debug ("_dynsym_start_ofs=%08lx _rel_dyn_start_ofs=%08lx
_rel_dyn_end_ofs=%08lx\n",
- _dynsym_start_ofs, _rel_dyn_start_ofs, _rel_dyn_end_ofs);
- for (rel_dyn_ptr = (Elf32_Rel *)(_TEXT_BASE + _rel_dyn_start_ofs);
- rel_dyn_ptr< (Elf32_Rel *)(_TEXT_BASE + _rel_dyn_end_ofs);
- rel_dyn_ptr++) {
- ulong *patchaddr = (ulong *) (rel_dyn_ptr->r_offset + gd->reloc_off);
- debug ("patch %08lx : %08lx\n",
- (ulong)patchaddr, (ulong)rel_dyn_ptr->r_info);
- switch (ELF32_R_TYPE(rel_dyn_ptr->r_info)) {
- case 23: /* rel fixup */
- *patchaddr += gd->reloc_off;
- break;
- case 2: /* abs fixup */
- {
- Elf32_Sym *sym = (Elf32_Sym *)(_TEXT_BASE + _dynsym_start_ofs);
- sym += ELF32_R_SYM(rel_dyn_ptr->r_info);
- *patchaddr = gd->reloc_off + sym->st_value;
- }
- break;
- default: /* unhandled fixup */
- break;
- }
- }
- /* clear BSS */
+# ifndef CONFIG_PRELOADER
- debug ("clearing BSS %08lx..%08lx\n",
- addr + _bss_start_ofs, addr + _bss_end_ofs);
- for (p = (ulong *)(addr + _bss_start_ofs);
- p< (ulong *)(addr + _bss_end_ofs);
- *p++ = 0)
- ;
+# endif +#endif
All of _bss_start_ofs, _rel_dyn_start_ofs, _rel_dyn_end_ofs, _dynsym_start_ofs and _bss_end_ofs are going to be modified by the relocation fixup loop. The compiler *might* preload them into registers... or not; which could cause issues with ..._end_ofs ones, because it could cause the loops to overflow/underflow.
They are not going to be modified, they are in the unrelocated location!
Oops. Correct.
- debug ("calling relocate_code\n");
relocate_code (addr_sp, id, addr);
Have you not just relocated and fixed up in the C code? If so, relocate_code (which would then become a misnomer) should have its own relocate and fixup code removed (your patch does not), and only perform the C environment (stack, etc) switch.
Incorrect. I removed the relocation (#ifndef) in start.S leaving only setting of the SP and calling of board_init_r active. True for the misnomer... It was supposed to be a quick hack, beauty comes later ;)
Ok.
Side note: if board_init_f handles relocation and fixup and relocate_code just does stack switch between board_init_f and board_init_r, then I'd rather board_init_f *return* to start.S rather than *call* it if that's possible. That would make the control flow less circuitous: begin in start.S, set a minimal stack, call board_init_f, returns to start.S, switch to final stack, calls (branches to) board_init_r.
I agree here. I would have board_init_f return the desired SP value, for example, or let it return the gd pointer - something along that line.
Or one completely skips returning back to asm and sets the sp with an asm() statement in "C", obtains a function pointer to board_init_r, adjusts that and calls that function.
I'd rather not leave asm statements in board.c as it is supposed to converge between archs IIUC.
Amicalement,
:)
Reinhard
Amicalement,

On 06/10/10 01:48, Reinhard Meyer wrote:
arch/arm/cpu/arm926ejs/start.S | 8 ++++- arch/arm/lib/board.c | 57 +++++++++++++++++++++++++++++++++++++++- include/configs/top9000_9xe.h | 1 + 3 files changed, 63 insertions(+), 3 deletions(-)
I had a quick look at this and nothing is jumping out at me. Of course I am not familiar with ARM asm...
I don't see any reason why this ultimately will not work eventually. You may be having some issues with the transition from asm->C->asm through the relocation - This was an especially painful thing for me involving an intermediate trampoline which I have only recently figured out how to remove.
Maybe some memory barriers are needed to stop the C optimiser mangling things?
I am sure what you have is very close to the real solution :)
I do think the main relocation fixup loop can be moved into a common library in which case we can add additional case statements. The nice thing is that x86 as all Type 8 which is specifically allocated to x86 so my "if
TEXT_BASE" checks can be kept. For size freaks, we could litter the code
with #ifdefs to remove un-needed cases ;)
Interestingly, ARM is adding gd->reloc_off while x86 is subtracting gd->reloc_off. If this is correct, I need to change the calculation of gd_reloc_off to be consistent
Regards,
Graeme

Dear Graeme Russ,
I had a quick look at this and nothing is jumping out at me. Of course I am not familiar with ARM asm...
I don't see any reason why this ultimately will not work eventually. You may be having some issues with the transition from asm->C->asm through the relocation - This was an especially painful thing for me involving an intermediate trampoline which I have only recently figured out how to remove.
I think the relocation itself works fine, when looking at the debug output.
Maybe some memory barriers are needed to stop the C optimiser mangling things?
It can't really optimize across the call to the (now wrongly named) reloc_code in assembly.
Interestingly, ARM is adding gd->reloc_off while x86 is subtracting gd->reloc_off. If this is correct, I need to change the calculation of gd_reloc_off to be consistent
It should always ADD it, when it is calculated as "Final Location" - "Current Location". Note, of course, that the value itself might well be negative.
If, for example, the NOR Flash is at a higher address than RAM...
Best Regards Reinhard

On Wednesday, October 6, 2010, Reinhard Meyer u-boot@emk-elektronik.de wrote:
Dear Graeme Russ,
I had a quick look at this and nothing is jumping out at me. Of course I am not familiar with ARM asm...
I don't see any reason why this ultimately will not work eventually. You may be having some issues with the transition from asm->C->asm through the relocation - This was an especially painful thing for me involving an intermediate trampoline which I have only recently figured out how to remove.
I think the relocation itself works fine, when looking at the debug output.
Time to disassemble using objdump
Maybe some memory barriers are needed to stop the C optimiser mangling things?
It can't really optimize across the call to the (now wrongly named) reloc_code in assembly.
True, objdump will clarify
Interestingly, ARM is adding gd->reloc_off while x86 is subtracting gd->reloc_off. If this is correct, I need to change the calculation of gd_reloc_off to be consistent
It should always ADD it, when it is calculated as "Final Location" - "Current Location".
I have my calculation the wrong way around
Note, of course, that the value itself might well be negative.
If, for example, the NOR Flash is at a higher address than RAM...
Which it is for x86
Regards
Graeme

On 10/6/2010 2:43 AM, Graeme Russ wrote:
On 06/10/10 01:48, Reinhard Meyer wrote:
arch/arm/cpu/arm926ejs/start.S | 8 ++++- arch/arm/lib/board.c | 57 +++++++++++++++++++++++++++++++++++++++- include/configs/top9000_9xe.h | 1 + 3 files changed, 63 insertions(+), 3 deletions(-)
I had a quick look at this and nothing is jumping out at me. Of course I am not familiar with ARM asm...
I don't see any reason why this ultimately will not work eventually. You may be having some issues with the transition from asm->C->asm through the relocation - This was an especially painful thing for me involving an intermediate trampoline which I have only recently figured out how to remove.
Maybe some memory barriers are needed to stop the C optimiser mangling things?
I am sure what you have is very close to the real solution :)
I do think the main relocation fixup loop can be moved into a common library in which case we can add additional case statements. The nice thing is that x86 as all Type 8 which is specifically allocated to x86 so my "if
Hi All, I think that type 8 IS NOT allocated to the 386. For instance, R_PPC_ADDR14_BRTAKEN also has a value of 8. So does R_ARM_ABS8. I think that there will be a lot of #ifdefs just to keep the references symbolic. Many of the different platform relocation types will come out to be the same code in the switch statement, but different symbolic names. There will also be some entries that are processor specific and have no equivalent on other processors. I think it would be a good idea to use the symbolic values of the Relocation types (as opposed to integer constants), as it will make the code clearer. There are sort of two ways to organize the code inside the switch statement. Since the code inside the switch statement is very short, it might be best for each architecture (ELF format) to be bunched together, even at the expense of repeating the same executable statements that some other formats may use, as follows: #ifdef PPC case R_PPC_ADDR32: /* S + A */ <code to do the deed> break;
case R_PPC_RELATIVE: /* B + A */ <code to do the deed> break; #endif #ifdef I386 case R_386_32: /* S + A */ /* I think this is the other location type Graeme used, but I could be wrong */ <code to do the deed> break;
case R_386_RELATIVE: /* B + A */ <code to do the deed> break;
#endif #ifdef ARM case R_ARM_ABS32: /* S + A */ /* I don't remember the ARM relocation types used */ <code to do the deed> break;
case R_ARM_REL32: /* B + A - P */ <code to do the deed> break;
#endif
Or we could group the various Relocation types by what they actually do: #ifdef PPC case R_PPC_ADDR32: /* S + A */ #endif #ifdef I386 case R_386_32: /* S + A */ /* I think this is the other location type Graeme used, but I could be wrong */ #endif #ifdef ARM case R_ARM_ABS32: /* S + A */ /* I don't remember the ARM relocation types used */ #endif <code to do the deed> break;
#ifdef PPC case R_PPC_RELATIVE: /* B + A */ #endif #ifdef I386 case R_386_RELATIVE: /* B + A */ #endif <code to do the deed> break;
#ifdef ARM case R_ARM_REL32: /* B + A - P */ <code to do the deed> break; #endif
Note that the ARM_REL32 is defined differently than the PPC/I386 relative, FWIW. I also don't know what to use for the names of the binary formats. It would be nice if we could use something already in the header files? Thoughts on all this solicited!
Best Regards, Bill Campbell
TEXT_BASE" checks can be kept. For size freaks, we could litter the code
with #ifdefs to remove un-needed cases ;)
Interestingly, ARM is adding gd->reloc_off while x86 is subtracting gd->reloc_off. If this is correct, I need to change the calculation of gd_reloc_off to be consistent
Adding is the way the specifications define it. add B+A.
Best Regards, Bill Campbell
Regards,
Graeme _______________________________________________ U-Boot mailing list U-Boot@lists.denx.de http://lists.denx.de/mailman/listinfo/u-boot
participants (4)
-
Albert ARIBAUD
-
Graeme Russ
-
J. William Campbell
-
Reinhard Meyer