[U-Boot] [PATCH] envcrc: extract default environment from target ELF files

Rather than rely on dirty hacks to compile the environment on the host and extract the CRC from that, have envcrc extract the environment straight from the ELF object that will be linked into u-boot itself. This makes the envcrc code a bit more complicated, but it simplifies the build process and host requirements because we don't have to try and recreate the environment that the target will be seeing on the host. This avoids some issues that crop up from time to time where the preprocessor defines on the host don't expand in the same way as for the target -- in case the target uses those defines to customize the environment, or the host defines conflicts with some of the target values.
Signed-off-by: Mike Frysinger vapier@gentoo.org --- common/Makefile | 4 +- common/env_embedded.c | 30 +--------- tools/Makefile | 3 +- tools/envcrc.c | 156 ++++++++++++++++++++++++++++++++++++++++++-------
diff --git a/common/Makefile b/common/Makefile index c8e5d26..02bc71b 100644 --- a/common/Makefile +++ b/common/Makefile @@ -173,9 +173,9 @@ all: $(LIB) $(AOBJS) $(LIB): $(obj).depend $(OBJS) $(AR) $(ARFLAGS) $@ $(OBJS)
-$(obj)env_embedded.o: $(src)env_embedded.c $(obj)../tools/envcrc +$(obj)env_embedded.o: $(src)env_embedded.c $(obj)env_common.o $(obj)../tools/envcrc $(CC) $(AFLAGS) -Wa,--no-warn \ - -DENV_CRC=$(shell $(obj)../tools/envcrc) \ + -DENV_CRC=$(shell $(obj)../tools/envcrc $(obj)env_common.o) \ -c -o $@ $(src)env_embedded.c
$(obj)../tools/envcrc: diff --git a/common/env_embedded.c b/common/env_embedded.c index ae6cac4..60736c7 100644 --- a/common/env_embedded.c +++ b/common/env_embedded.c @@ -21,26 +21,10 @@ * MA 02111-1307 USA */
-#ifndef __ASSEMBLY__ -#define __ASSEMBLY__ /* Dirty trick to get only #defines */ -#endif -#define __ASM_STUB_PROCESSOR_H__ /* don't include asm/processor. */ #include <config.h> -#undef __ASSEMBLY__ #include <environment.h>
/* - * Handle HOSTS that have prepended - * crap on symbol names, not TARGETS. - */ -#if defined(__APPLE__) -/* Leading underscore on symbols */ -# define SYM_CHAR "_" -#else /* No leading character on symbols */ -# define SYM_CHAR -#endif - -/* * Generate embedded environment table * inside U-Boot image, if needed. */ @@ -75,7 +59,7 @@ #else # define GEN_SET_VALUE(name, value) asm (GEN_SYMNAME(name) " = " GEN_VALUE(value)) #endif -#define GEN_SYMNAME(str) SYM_CHAR #str +#define GEN_SYMNAME(str) #str #define GEN_VALUE(str) #str #define GEN_ABS(name, value) \ asm (".globl " GEN_SYMNAME(name)); \ @@ -197,17 +181,7 @@ env_t redundand_environment __PPCENV__ = { #endif /* CONFIG_ENV_ADDR_REDUND */
/* - * These will end up in the .text section - * if the environment strings are embedded - * in the image. When this is used for - * tools/envcrc, they are placed in the - * .data/.sdata section. - * - */ -unsigned long env_size __PPCTEXT__ = sizeof(env_t); - -/* - * Add in absolutes. + * Add in absolutes. This symbol is only referenced from linker scripts. */ GEN_ABS(env_offset, CONFIG_ENV_OFFSET);
diff --git a/tools/Makefile b/tools/Makefile index 43c284c..982d65a 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -80,7 +80,6 @@ BIN_FILES-$(CONFIG_INCA_IP) += inca-swap-bytes$(SFX) BIN_FILES-$(CONFIG_NETCONSOLE) += ncb$(SFX)
# Source files which exist outside the tools directory -EXT_OBJ_FILES-y += common/env_embedded.o EXT_OBJ_FILES-y += lib_generic/crc32.o EXT_OBJ_FILES-y += lib_generic/md5.o EXT_OBJ_FILES-y += lib_generic/sha1.o @@ -156,7 +155,7 @@ MAKEDEPEND = makedepend
all: $(obj).depend $(BINS) $(LOGO-y) subdirs
-$(obj)envcrc$(SFX): $(obj)envcrc.o $(obj)crc32.o $(obj)env_embedded.o $(obj)sha1.o +$(obj)envcrc$(SFX): $(obj)envcrc.o $(obj)crc32.o $(obj)sha1.o $(CC) $(CFLAGS) -o $@ $^
$(obj)ubsha1$(SFX): $(obj)ubsha1.o $(obj)sha1.o $(obj)os_support.o diff --git a/tools/envcrc.c b/tools/envcrc.c index 5b0f7cd..87453e5 100644 --- a/tools/envcrc.c +++ b/tools/envcrc.c @@ -21,11 +21,15 @@ * MA 02111-1307 USA */
+#include "compiler.h" +#include <errno.h> +#include <stdbool.h> #include <stdio.h> #include <stdint.h> #include <stdlib.h> #include <string.h> #include <unistd.h> +#include "elf.h"
#ifndef __ASSEMBLY__ #define __ASSEMBLY__ /* Dirty trick to get only #defines */ @@ -67,48 +71,156 @@
#define ENV_SIZE (CONFIG_ENV_SIZE - ENV_HEADER_SIZE)
+unsigned char host_endian(void) +{ + struct { + union { + uint16_t sint; + unsigned char str[2]; + } u; + } data; + data.u.sint = 0xBBAA; + return data.u.str[0] == 0xBB ? ELFDATA2MSB : ELFDATA2LSB; +} +bool swapit; +#define EGET(val) ( \ + !swapit ? (val) : \ + sizeof(val) == 1 ? (val) : \ + sizeof(val) == 2 ? uswap_16(val) : \ + sizeof(val) == 4 ? uswap_32(val) : \ + sizeof(val) == 8 ? uswap_64(val) : \ + 1 \ +)
extern uint32_t crc32 (uint32_t, const unsigned char *, unsigned int);
-#ifdef ENV_IS_EMBEDDED -extern unsigned int env_size; -extern unsigned char environment; -#endif /* ENV_IS_EMBEDDED */ +static bool elfread(FILE *fp, long offset, void *ptr, size_t size, size_t nmemb) +{ + if (fseek(fp, offset, SEEK_SET)) + return false; + if (fread(ptr, size, nmemb, fp) != nmemb) + return false; + return true; +} + +/* Avoid using elf.h since not all systems have it */ +unsigned char environment[CONFIG_ENV_SIZE]; +bool read_env_from_elf(const char *elf_file) +{ + const char env_symbol[] = "default_environment"; + char buf[256]; + FILE *fp; + bool ret = false; + int i; + + Elf32_Ehdr ehdr; + Elf32_Shdr shdr, strtab, symtab; + Elf32_Sym sym; + + fp = fopen(elf_file, "r"); + if (!fp) + return false; + + /* Make sure this is a valid ELF */ + if (!elfread(fp, 0, &ehdr, sizeof(ehdr), 1)) + goto done; + if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG)) { + errno = EINVAL; + goto done; + } + if (ehdr.e_ident[EI_CLASS] != ELFCLASS32) { + errno = EINVAL; + goto done; + } + swapit = host_endian() == ehdr.e_ident[EI_DATA] ? false : true; + + /* Find the string & symbol table */ + memset(&strtab, 0, sizeof(strtab)); /* shut gcc the hell up */ + memset(&symtab, 0, sizeof(symtab)); /* shut gcc the hell up */ + strtab.sh_type = SHT_NULL; + symtab.sh_type = SHT_NULL; + for (i = 0; i < EGET(ehdr.e_shnum); ++i) { + long off = EGET(ehdr.e_shoff) + i * EGET(ehdr.e_shentsize); + if (!elfread(fp, off, &shdr, sizeof(shdr), 1)) + goto done; + if (EGET(shdr.sh_type) == SHT_STRTAB) + strtab = shdr; + else if (EGET(shdr.sh_type) == SHT_SYMTAB) + symtab = shdr; + } + if (strtab.sh_type == SHT_NULL || symtab.sh_type == SHT_NULL) { + errno = EINVAL; + goto done; + } + + /* Find the environment symbol */ + for (i = 0; i < EGET(symtab.sh_size) / EGET(symtab.sh_entsize); ++i) { + char *tbuf; + long off = EGET(symtab.sh_offset) + i * sizeof(sym); + if (!elfread(fp, off, &sym, sizeof(sym), 1)) + goto done; + off = EGET(strtab.sh_offset) + EGET(sym.st_name); + tbuf = buf; + if (!elfread(fp, off, tbuf, 1, sizeof(env_symbol))) + goto done; + /* handle ABI prefixed symbols automatically */ + if (tbuf[0] == '_') { + tbuf[sizeof(env_symbol)] = '\0'; + ++tbuf; + } + if (!strcmp(tbuf, env_symbol)) { + off = EGET(ehdr.e_shoff) + EGET(sym.st_shndx) * EGET(ehdr.e_shentsize); + if (!elfread(fp, off, &shdr, sizeof(shdr), 1)) + goto done; + off = EGET(shdr.sh_offset) + EGET(sym.st_value); + if (!elfread(fp, off, environment + ENV_HEADER_SIZE, 1, EGET(sym.st_size))) + goto done; + ret = true; + break; + } + } + + done: + fclose(fp); + return ret; +}
int main (int argc, char **argv) { -#ifdef ENV_IS_EMBEDDED +#ifdef ENV_IS_EMBEDDED unsigned char pad = 0x00; uint32_t crc; - unsigned char *envptr = &environment, + unsigned char *envptr = environment, *dataptr = envptr + ENV_HEADER_SIZE; unsigned int datasize = ENV_SIZE; - unsigned int eoe; + const char *env_file;
- if (argv[1] && !strncmp(argv[1], "--binary", 8)) { + if (argc < 2) { + puts("Usage: envcrc <environment object> [--binary [le]]"); + return 1; + } + env_file = argv[1]; + + if (argv[2] && !strncmp(argv[2], "--binary", 8)) { int ipad = 0xff; - if (argv[1][8] == '=') - sscanf(argv[1] + 9, "%i", &ipad); + if (argv[2][8] == '=') + sscanf(argv[2] + 9, "%i", &ipad); pad = ipad; } + memset(dataptr, pad, datasize);
- if (pad) { - /* find the end of env */ - for (eoe = 0; eoe < datasize - 1; ++eoe) - if (!dataptr[eoe] && !dataptr[eoe+1]) { - eoe += 2; - break; - } - if (eoe < datasize - 1) - memset(dataptr + eoe, pad, datasize - eoe); + if (!read_env_from_elf(env_file)) { + fprintf(stderr, "unable to read environment from %s: %s\n", + env_file, strerror(errno)); + return 1; }
crc = crc32 (0, dataptr, datasize);
/* Check if verbose mode is activated passing a parameter to the program */ - if (argc > 1) { - if (!strncmp(argv[1], "--binary", 8)) { - int le = (argc > 2 ? !strcmp(argv[2], "le") : 1); + if (argc > 2) { + if (!strncmp(argv[2], "--binary", 8)) { + int le = (argc > 3 ? !strcmp(argv[3], "le") : 1); size_t i, start, end, step; if (le) { start = 0;

Dear Mike Frysinger,
In message 1245143108-7334-1-git-send-email-vapier@gentoo.org you wrote:
Rather than rely on dirty hacks to compile the environment on the host and extract the CRC from that, have envcrc extract the environment straight from the ELF object that will be linked into u-boot itself. This makes the envcrc code a bit more complicated, but it simplifies the build process and host requirements because we don't have to try and recreate the environment that the target will be seeing on the host. This avoids some issues that crop up from time to time where the preprocessor defines on the host don't expand in the same way as for the target -- in case the target uses those defines to customize the environment, or the host defines conflicts with some of the target values.
Can you please be a bit more specific about which sort of problems you are talking here? We've been using this code for many years now, but I cannot remember any problems with it.
Signed-off-by: Mike Frysinger vapier@gentoo.org
common/Makefile | 4 +- common/env_embedded.c | 30 +--------- tools/Makefile | 3 +- tools/envcrc.c | 156 ++++++++++++++++++++++++++++++++++++++++++-------
The diffstat indicates that the new solution is more complicated than the old one, so I'd like to understand why this is needed (or if).
Best regards,
Wolfgang Denk

On Tuesday 16 June 2009 14:02:16 Wolfgang Denk wrote:
Mike Frysinger wrote:
Rather than rely on dirty hacks to compile the environment on the host and extract the CRC from that, have envcrc extract the environment straight from the ELF object that will be linked into u-boot itself. This makes the envcrc code a bit more complicated, but it simplifies the build process and host requirements because we don't have to try and recreate the environment that the target will be seeing on the host. This avoids some issues that crop up from time to time where the preprocessor defines on the host don't expand in the same way as for the target -- in case the target uses those defines to customize the environment, or the host defines conflicts with some of the target values.
Can you please be a bit more specific about which sort of problems you are talking here?
sure ... i'll be verbose so hopefully you can see what i see
We've been using this code for many years now, but I cannot remember any problems with it.
that's because the code paths have slowly acquired platform-specific hacks so that it kept working :)
at any rate, here's a longer problem description and surrounding issues ...
if the default environment utilizes defines that come from the toolchain in any level of indirection, the environment that is built into the target u-boot will not match the environment that is compiled on the host for crc generation.
a real world example is the unified ADI config header. it generates a default environment based on functionality actually available. the BF537 family has networking hardware in some parts, but not all (BF534 does not). so we key off of the variant to determine whether to enable networking by default. or we determine that some on-chip rom functions are available for certain variants and enable only those functions.
so the CPP dependency chain looks something like: - Blackfin toolchain sets up variant defines - ADI board headers set up capabilities based on variant defines - common ADI board header enables some commands based on variant defines - common ADI board header sets up default environment based on capabilities and commands available
since the host toolchain does not set up the same CPP dependencies as the Blackfin toolchain, this tree falls apart and the resulting environment string that envcrc operates on differs, so the CRCs differ.
i can understand your position of "setting up board configs this way is wrong", but i've structured it this way because it greatly reduces my maintenance burden while increasing regression capabilities. for example, i have a Blackfin-specific patch to the MAKEALL script that allows me to regression test not only the board-specific configuration, but also uncommon or otherwise untested cpu and boot mode configurations. for example, the bf537-stamp defaults to the BF537 cpu and BYPASS boot mode. but now my regression builds can automatically verify BF534/BF536 cpu code paths as well as PARALLEL/SPI/NAND/UART code paths. previously, some changes would accidentally break these uncommon edge cases and wouldnt be noticed until someone else happened to build. to say that these code paths were horribly broken most of the time is an understatement :).
i have also seen a case or two where the host toolchain inadvertently expanded things that ended up in the environment because they were not declared as strings. for example, many defines are not: #define CONFIG_FOO "foo" but rather they are: #define CONFIG_FOO foo just grep common/env_*.c for MKSTR() to see just how many things are affected in this way. having to worry about values here which may be arbitrarily expanded across host toolchains is a bad design practice imo.
if the environment is only ever compiled by one toolchain, the target one, then it significantly reduces the chance for unexpected and unwelcomed surprises. after all, the only way to notice something has gone wrong is to build the source with a specific host toolchain, burn the image, reset, and see the dreaded "CRC mismatch, using default environment" error from u-boot. unless you're familiar with this problem and/or have seen/root caused this before, attempting to track down the source of this problem can be very time consuming (as it was the first time i hit this problem).
common/Makefile | 4 +- common/env_embedded.c | 30 +--------- tools/Makefile | 3 +- tools/envcrc.c | 156 ++++++++++++++++++++++++++++++++++++++++++-------
The diffstat indicates that the new solution is more complicated than the old one, so I'd like to understand why this is needed (or if).
if LoC are the metric, then yes, but conceptually this change is simpler. the current behavior requires mixing target u-boot environment headers with the host toolchain, and a whole bunch of hacks to make sure things actually compile, produce the same end environment with two vastly different build environments, and end up with the same crc value. my proposed change means we only compile 1 environment object -- the one that is going into the target u- boot binary. we then extract the environment directly from the object which is actually going into u-boot (so we are absolutely assured that the environment we are working with on the host matches the target).
hope this clears things up -mike

Dear Mike Frysinger,
In message 200906190536.13184.vapier@gentoo.org you wrote:
We've been using this code for many years now, but I cannot remember any problems with it.
...
if the default environment utilizes defines that come from the toolchain in any level of indirection, the environment that is built into the target u-boot will not match the environment that is compiled on the host for crc generation.
Hm... Defines that influence the envrionment should never come from the tool chain, but only from U-Boot source files.
so the CPP dependency chain looks something like:
- Blackfin toolchain sets up variant defines
- ADI board headers set up capabilities based on variant defines
- common ADI board header enables some commands based on variant defines
- common ADI board header sets up default environment based on capabilities
and commands available
since the host toolchain does not set up the same CPP dependencies as the Blackfin toolchain, this tree falls apart and the resulting environment string that envcrc operates on differs, so the CRCs differ.
I see what your problem is.
i can understand your position of "setting up board configs this way is wrong", but i've structured it this way because it greatly reduces my maintenance burden while increasing regression capabilities. for example, i
I see. I understand what you have in mind, but I don't think that such a specific hack for a single user justifies such a change.
Instead of setting such defines in the tool chain (whatever this means exactly) it would be probably more appropriate to pass arguments or environment variables to make, I think - and not more difficult, too.
i have also seen a case or two where the host toolchain inadvertently expanded things that ended up in the environment because they were not declared as strings. for example, many defines are not: #define CONFIG_FOO "foo" but rather they are: #define CONFIG_FOO foo just grep common/env_*.c for MKSTR() to see just how many things are affected in this way. having to worry about values here which may be arbitrarily expanded across host toolchains is a bad design practice imo.
Patches for such problems are welcome.
if the environment is only ever compiled by one toolchain, the target one, then it significantly reduces the chance for unexpected and unwelcomed surprises. after all, the only way to notice something has gone wrong is to build the source with a specific host toolchain, burn the image, reset, and see the dreaded "CRC mismatch, using default environment" error from u-boot.
Ummm.. but this is not a real problem, because a simple "saveenv" will fix it, right? All boards that don't embed the environment work like this.
hope this clears things up
I understand your arguments, but I don't come to the same conclusions. Sorry.
Best regards,
Wolfgang Denk

On Friday 10 July 2009 18:07:52 Wolfgang Denk wrote:
Mike Frysinger wrote:
i can understand your position of "setting up board configs this way is wrong", but i've structured it this way because it greatly reduces my maintenance burden while increasing regression capabilities. for example, i
I see. I understand what you have in mind, but I don't think that such a specific hack for a single user justifies such a change.
this isnt a hack. the code is thought out and designed to do one thing properly. if you want to see hacks, look at the nested mess that exists today in the code that is compiled on the host to produce the embedded environment. as for "single user", yes i might be the only arch so far *to complain aloud*, but that doesnt directly imply i'm the only one who would use it.
the current process design for creating the embedded env is fundamentally flawed. i'm not saying it doesnt work as is on most development systems we care about today, but working doesnt translate directly to "designed correctly".
Instead of setting such defines in the tool chain (whatever this means exactly) it would be probably more appropriate to pass arguments or environment variables to make, I think - and not more difficult, too.
i guess i could create a header that enumerates all the possible toolchain defines and translates them to CONFIG_xxx such that they get transferred between build environments, but that sounds pretty fragile and would require constant updates as new processors are supported. i dont think make env vars would work because these are dynamic CPP defines.
i have also seen a case or two where the host toolchain inadvertently expanded things that ended up in the environment because they were not declared as strings. for example, many defines are not: #define CONFIG_FOO "foo" but rather they are: #define CONFIG_FOO foo just grep common/env_*.c for MKSTR() to see just how many things are affected in this way. having to worry about values here which may be arbitrarily expanded across host toolchains is a bad design practice imo.
Patches for such problems are welcome.
i'm not interested in fighting a battle that, by design, can never be won
if the environment is only ever compiled by one toolchain, the target one, then it significantly reduces the chance for unexpected and unwelcomed surprises. after all, the only way to notice something has gone wrong is to build the source with a specific host toolchain, burn the image, reset, and see the dreaded "CRC mismatch, using default environment" error from u-boot.
Ummm.. but this is not a real problem, because a simple "saveenv" will fix it, right? All boards that don't embed the environment work like this.
yes, a saveenv will reset the crc, but again, this is a workaround for a design flaw. i'm interested in fixing these flaws, not telling people to "go run saveenv and pretend there isnt a problem". after all, it makes it pretty hard to take a u-boot binary image, send it to a factory for programming, and then additionally have the board run the native code merely to rebuild the environment crc that should have been correct in the first place. -mike
participants (2)
-
Mike Frysinger
-
Wolfgang Denk