
That's true, but I think the most important case is lcd scrolling, where it's usually a big power of two -- that's where we had the #ifdef, so the problem was known, I suppose.
I think the most important case for *you* is lcd scrolling, but for 99% of everyone else, it isn't at all:)
Well, its a big memcpy, and it has direct effect on the user. Every other copy is smaller, or has no interactive value.
memcpy() and memset() are used 100 times more often in non-lcd related code and most boards don't even have LCDs.
That's true. But it's only a boot loader (I just looked at what Nicolas Pitre did in the kernel for ARM strcpy and, well....).
So I made some measures (it's one of Pike's rules of programming:
* Rule 2. Measure. Don't tune for speed until you've measured, and even then don't unless one part of the code overwhelms the rest.
)
I booted in u-boot, typed "setenv stdout serial" then "boot", which goes over the ethernet. Stopped the system after u-boot gave over control to the kernel. Result: 10412 memcopies so divided (number, length):
3941 4 1583 6 772 20 1 46 1 47 3 60 1024 64 1 815 1 888 770 1148 1543 1480 1 2283 1 3836 770 4096
So I dare say non-power-of-4 is a minority anyways: 1587 calls, 12689 bytes. i.e. 15.2% of the calls and 0.2% of the data.
Data collected in memory with patch below, used with following line:
od -An -t d4 logfile | awk '{print $4}' | sort -n | uniq -c
diff --git a/include/configs/nhk8815.h b/include/configs/nhk8815.h index edd698e..a390f28 100644 --- a/include/configs/nhk8815.h +++ b/include/configs/nhk8815.h @@ -28,6 +28,8 @@
#include <nomadik.h>
+#define CONFIG_MCLOGSIZE (16*1024) + #define CONFIG_ARM926EJS #define CONFIG_NOMADIK #define CONFIG_NOMADIK_8815 /* cpu variant */ diff --git a/lib_generic/string.c b/lib_generic/string.c index 5f7aff9..5afa11e 100644 --- a/lib_generic/string.c +++ b/lib_generic/string.c @@ -19,6 +19,7 @@ #include <linux/string.h> #include <linux/ctype.h> #include <malloc.h> +#include <common.h>
#if 0 /* not used - was: #ifndef __HAVE_ARCH_STRNICMP */ @@ -461,11 +462,29 @@ char * bcopy(const char * src, char * dest, int count) * You should not use this function to access IO space, use memcpy_toio() * or memcpy_fromio() instead. */ + +#ifndef CONFIG_MCLOGSIZE /* if you want to log the memcpy calls, define it */ +#define CONFIG_MCLOGSIZE 0 +#endif +struct mclog {int idx; void *dst; const void *src; int cnt;}; +static struct mclog mclog[CONFIG_MCLOGSIZE]; + void * memcpy(void *dest, const void *src, size_t count) { char *d8 = (char *)dest, *s8 = (char *)src; unsigned long *dl = (unsigned long *)dest, *sl = (unsigned long *)src;
+ if (CONFIG_MCLOGSIZE) { + static int idx; + struct mclog *p = mclog + (idx % (CONFIG_MCLOGSIZE ?: 1)); + if (!idx) printf("memcpy log at %p, size 0x%x\n", + mclog, sizeof(mclog)); + p->idx = idx++; + p->dst = dest; + p->src = src; + p->cnt = count; + } + /* if all data is aligned (common case), copy a word at a time */ if ( (((int)dest | (int)src | count) & (sizeof(long) - 1)) == 0) { count /= sizeof(unsigned long);