[U-Boot] [PATCH v2 00/10] armv7: cache maintenance operations

With D-cache and MMU enabled for ARM in u-boot it becomes imperative to support a minimal set of cache maintenance operations and necessary initializations before enabling MMU.
This series of patches attempt to do the following for armv7: * Necessary initialization sequence before enabling MMU that includes invalidation of TLB, data caches, branch predictor array etc. * Framework for supporting SOC specific outer caches in a generic manner (using a structure of function pointers - inspired by the Linux implementation) * Generic armv7 cache maintenance operations for caches known to the CPU * Support for ARM PL310 L2 cache controller used in OMAP4 * Cleanup of the cleanup_before_linux() function * Adapting all armv7 SOCs to use the new framework and removing duplicated code
Testing: * Extensive testing on OMAP4430SDP and OMAP3430SDP by creating coherency issues and solving them using the maintenance routines - Eg: memfill a region of memory with a known pattern - Invalidate the region - Read back and compare the region with the original pattern - If match fails it means that invalidate is successful - Now add a flush call just before the invalidate - If match succeeds it means that flush was successful - Outer caches were tested with experiments involving making the function pointers NULL * Kernel booting on OMAP4430SDP and OMAP3430SDP Note: v2 has been tested only on OMAP4430SDP
v2: * Pointer based callback mechanism for outer cache operations changed to a weakly linked functions. * Change -march=armv7-a back to armv5 * Moved utility macros out of armv7.h * Added documentation for new CONFIG options. * Changed implementation of log2n to not use CLZ instruction as armv4 doesn't support this instruction and newly added Tegra2 uses -march=armv4 * Blank line after local variable declarations - fixed globally * Explicitly added an empty flush_cache() under #ifdef CONFIG_SYS_NO_DCACHE * Removed the print inside the weakly linked stub function - __arm_init_before_mmu * Fixed signature of flush_cache in cache.c * More descriptive commit message for the PL310 support patch * C struct for PL310 register accesses * Fixed white space issues
Aneesh V (10): arm: make default implementation of cache_flush() weakly linked armv7: add miscellaneous utility macros armv7: cache maintenance operations for armv7 armv7: replace CONFIG_L2_OFF with CONFIG_SYS_NO_L2CACHE armv7: integrate cache maintenance support arm: minor fixes for cache and mmu handling armv7: add PL310 support to u-boot armv7: adapt omap4 to the new cache maintenance framework armv7: adapt omap3 to the new cache maintenance framework armv7: adapt s5pc1xx to the new cache maintenance framework
README | 11 + arch/arm/cpu/armv7/Makefile | 2 +- arch/arm/cpu/armv7/cache_v7.c | 390 +++++++++++++++++++++++++ arch/arm/cpu/armv7/cpu.c | 51 ++-- arch/arm/cpu/armv7/omap3/Makefile | 1 - arch/arm/cpu/armv7/omap3/board.c | 138 ++++++++-- arch/arm/cpu/armv7/omap3/cache.S | 263 ----------------- arch/arm/cpu/armv7/omap3/lowlevel_init.S | 32 ++ arch/arm/cpu/armv7/omap4/lowlevel_init.S | 18 ++ arch/arm/cpu/armv7/s5pc1xx/cache.S | 88 +----- arch/arm/cpu/armv7/start.S | 18 +- arch/arm/include/asm/arch-omap3/omap3.h | 20 ++ arch/arm/include/asm/arch-omap3/sys_proto.h | 10 +- arch/arm/include/asm/arch-omap4/sys_proto.h | 1 - arch/arm/include/asm/arch-s5pc1xx/sys_proto.h | 3 - arch/arm/include/asm/armv7.h | 68 +++++ arch/arm/include/asm/pl310.h | 74 +++++ arch/arm/include/asm/utils.h | 80 +++++ arch/arm/lib/Makefile | 1 + arch/arm/lib/board.c | 6 + arch/arm/lib/cache-cp15.c | 16 +- arch/arm/lib/cache-pl310.c | 116 ++++++++ arch/arm/lib/cache.c | 20 +- include/common.h | 5 +- include/configs/ca9x4_ct_vxp.h | 2 +- include/configs/efikamx.h | 2 +- include/configs/mx51evk.h | 2 +- include/configs/mx53evk.h | 2 +- include/configs/omap4_panda.h | 8 +- include/configs/omap4_sdp4430.h | 8 +- include/configs/s5pc210_universal.h | 2 +- include/configs/tegra2-common.h | 2 +- include/configs/vision2.h | 2 +- 33 files changed, 1037 insertions(+), 425 deletions(-) create mode 100644 arch/arm/cpu/armv7/cache_v7.c delete mode 100644 arch/arm/cpu/armv7/omap3/cache.S create mode 100644 arch/arm/include/asm/armv7.h create mode 100644 arch/arm/include/asm/pl310.h create mode 100644 arch/arm/include/asm/utils.h create mode 100644 arch/arm/lib/cache-pl310.c

make default implementation of cache_flush() weakly linked so that sub-architectures can override it
Signed-off-by: Aneesh V aneesh@ti.com --- arch/arm/lib/cache.c | 4 +++- 1 files changed, 3 insertions(+), 1 deletions(-)
diff --git a/arch/arm/lib/cache.c b/arch/arm/lib/cache.c index 30686fe..27123cd 100644 --- a/arch/arm/lib/cache.c +++ b/arch/arm/lib/cache.c @@ -25,7 +25,7 @@
#include <common.h>
-void flush_cache (unsigned long dummy1, unsigned long dummy2) +void __flush_cache(unsigned long start, unsigned long size) { #if defined(CONFIG_OMAP2420) || defined(CONFIG_ARM1136) void arm1136_cache_flush(void); @@ -45,3 +45,5 @@ void flush_cache (unsigned long dummy1, unsigned long dummy2) #endif return; } +void flush_cache(unsigned long start, unsigned long size) + __attribute__((weak, alias("__flush_cache")));

add utility macros for: * bit field operations * log2n functions
Signed-off-by: Aneesh V aneesh@ti.com --- arch/arm/include/asm/utils.h | 80 ++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 80 insertions(+), 0 deletions(-) create mode 100644 arch/arm/include/asm/utils.h
diff --git a/arch/arm/include/asm/utils.h b/arch/arm/include/asm/utils.h new file mode 100644 index 0000000..d581539 --- /dev/null +++ b/arch/arm/include/asm/utils.h @@ -0,0 +1,80 @@ +/* + * (C) Copyright 2010 + * Texas Instruments, <www.ti.com> + * + * Aneesh V aneesh@ti.com + * + * See file CREDITS for list of people who contributed to this + * project. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + */ +#ifndef _UTILS_H_ +#define _UTILS_H_ + +/* extract a bit field from a bit vector */ +#define get_bit_field(nr, start, mask)\ + (((nr) & (mask)) >> (start)) + +/* Set a field in a bit vector */ +#define set_bit_field(nr, start, mask, val)\ + do { \ + (nr) = ((nr) & ~(mask)) | (((val) << (start)) & (mask));\ + } while (0); + +/* + * Utility macro for read-modify-write of a hardware register + * addr - address of the register + * shift - starting bit position of the field to be modified + * msk - mask for the field + * val - value to be shifted masked and written to the field + */ +#define modify_reg_32(addr, shift, msk, val) \ + do {\ + writel(((readl(addr) & ~(msk))|(((val) << (shift)) & (msk))),\ + (addr));\ + } while (0); + +static inline s32 log_2_n_round_up(u32 n) +{ + s32 log2n = -1; + u32 temp = n; + + while (temp) { + log2n++; + temp >>= 1; + } + + if (n & (n - 1)) + return log2n + 1; /* not power of 2 - round up */ + else + return log2n; /* power of 2 */ +} + +static inline s32 log_2_n_round_down(u32 n) +{ + s32 log2n = -1; + u32 temp = n; + + while (temp) { + log2n++; + temp >>= 1; + } + + return log2n; +} + +#endif /* _OMAP_COMMON_H_ */

- Add a framework for layered cache maintenance - separate out SOC specific outer cache maintenance from maintenance of caches known to CPU
- Add generic ARMv7 cache maintenance operations that affect all caches known to ARMv7 CPUs. For instance in Cortex-A8 these opertions will affect both L1 and L2 caches. In Cortex-A9 these will affect only L1 cache
- D-cache operations supported: - Invalidate entire D-cache - Invalidate D-cache range - Flush(clean & invalidate) entire D-cache - Flush D-cache range - I-cache operations supported: - Invalidate entire I-cache
- Add maintenance functions for TLB, branch predictor array etc.
- Enable -march=armv7-a so that armv7 assembly instructions can be used
Signed-off-by: Aneesh V aneesh@ti.com --- README | 5 + arch/arm/cpu/armv7/Makefile | 2 +- arch/arm/cpu/armv7/cache_v7.c | 390 +++++++++++++++++++++++++++++++++++++++++ arch/arm/include/asm/armv7.h | 68 +++++++ include/common.h | 5 +- 5 files changed, 468 insertions(+), 2 deletions(-) create mode 100644 arch/arm/cpu/armv7/cache_v7.c create mode 100644 arch/arm/include/asm/armv7.h
diff --git a/README b/README index 21cd71b..ba01c52 100644 --- a/README +++ b/README @@ -448,6 +448,11 @@ The following options need to be configured: Note: If a "bootargs" environment is defined, it will overwride the defaults discussed just above.
+- Cache Configuration: + CONFIG_SYS_NO_ICACHE - Do not enable instruction cache in U-Boot + CONFIG_SYS_NO_DCACHE - Do not enable data cache in U-Boot + CONFIG_SYS_NO_L2CACHE- Do not enable L2 cache in U-Boot + - Serial Ports: CONFIG_PL010_SERIAL
diff --git a/arch/arm/cpu/armv7/Makefile b/arch/arm/cpu/armv7/Makefile index 8c0e915..299792a 100644 --- a/arch/arm/cpu/armv7/Makefile +++ b/arch/arm/cpu/armv7/Makefile @@ -26,7 +26,7 @@ include $(TOPDIR)/config.mk LIB = $(obj)lib$(CPU).o
START := start.o -COBJS := cpu.o +COBJS := cpu.o cache_v7.o COBJS += syslib.o
SRCS := $(START:.o=.S) $(COBJS:.o=.c) diff --git a/arch/arm/cpu/armv7/cache_v7.c b/arch/arm/cpu/armv7/cache_v7.c new file mode 100644 index 0000000..46d8e09 --- /dev/null +++ b/arch/arm/cpu/armv7/cache_v7.c @@ -0,0 +1,390 @@ +/* + * (C) Copyright 2010 + * Texas Instruments Incorporated - http://www.ti.com/ + * Aneesh V aneesh@ti.com + * + * See file CREDITS for list of people who contributed to this + * project. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + */ +#include <linux/types.h> +#include <common.h> +#include <asm/armv7.h> +#include <asm/utils.h> + +#define ARMV7_DCACHE_INVAL_ALL 1 +#define ARMV7_DCACHE_CLEAN_INVAL_ALL 2 +#define ARMV7_DCACHE_INVAL_RANGE 3 +#define ARMV7_DCACHE_CLEAN_INVAL_RANGE 4 + +#ifndef CONFIG_SYS_NO_DCACHE +/* + * Write the level and type you want to Cache Size Selection Register(CSSELR) + * to get size details from Current Cache Size ID Register(CCSIDR) + */ +static void set_csselr(u32 level, u32 type) +{ u32 csselr = level << 1 | type; + + /* Write to Cache Size Selection Register(CSSELR) */ + asm volatile ("mcr p15, 2, %0, c0, c0, 0" : : "r" (csselr)); +} + +static u32 get_ccsidr(void) +{ + u32 ccsidr; + + /* Read current CP15 Cache Size ID Register */ + asm volatile ("mrc p15, 1, %0, c0, c0, 0" : "=r" (ccsidr)); + return ccsidr; +} + +static u32 get_clidr(void) +{ + u32 clidr; + + /* Read current CP15 Cache Level ID Register */ + asm volatile ("mrc p15,1,%0,c0,c0,1" : "=r" (clidr)); + return clidr; +} + +static void v7_inval_dcache_level_setway(u32 level, u32 num_sets, + u32 num_ways, u32 way_shift, + u32 log2_line_len) +{ + int way, set, setway; + + /* + * For optimal assembly code: + * a. count down + * b. have bigger loop inside + */ + for (way = num_ways - 1; way >= 0 ; way--) + for (set = num_sets - 1; set >= 0; set--) { + setway = (level << 1) | (set << log2_line_len) | + (way << way_shift); + /* Invalidate data/unified cache line by set/way */ + asm volatile (" mcr p15, 0, %0, c7, c6, 2" + : : "r" (setway)); + } + /* DMB to make sure the operation is complete */ + CP15DMB; +} + +static void v7_clean_inval_dcache_level_setway(u32 level, u32 num_sets, + u32 num_ways, u32 way_shift, + u32 log2_line_len) +{ + int way, set, setway; + + /* + * For optimal assembly code: + * a. count down + * b. have bigger loop inside + */ + for (way = num_ways - 1; way >= 0 ; way--) + for (set = num_sets - 1; set >= 0; set--) { + setway = (level << 1) | (set << log2_line_len) | + (way << way_shift); + /* + * Clean & Invalidate data/unified + * cache line by set/way + */ + asm volatile (" mcr p15, 0, %0, c7, c14, 2" + : : "r" (setway)); + } + /* DMB to make sure the operation is complete */ + CP15DMB; +} + +static void v7_maint_dcache_level_setway(u32 level, u32 operation) +{ + u32 ccsidr; + u32 num_sets, num_ways, log2_line_len, log2_num_ways; + u32 way_shift; + + set_csselr(level, ARMV7_CSSELR_IND_DATA_UNIFIED); + + ccsidr = get_ccsidr(); + + log2_line_len = get_bit_field(ccsidr, CCSIDR_LINE_SIZE_OFFSET, + CCSIDR_LINE_SIZE_MASK) + 2; + /* Converting from words to bytes */ + log2_line_len += 2; + + num_ways = get_bit_field(ccsidr, CCSIDR_ASSOCIATIVITY_OFFSET, + CCSIDR_ASSOCIATIVITY_MASK) + 1; + num_sets = get_bit_field(ccsidr, CCSIDR_NUM_SETS_OFFSET, + CCSIDR_NUM_SETS_MASK) + 1; + /* + * According to ARMv7 ARM number of sets and number of ways need + * not be a power of 2 + */ + log2_num_ways = log_2_n_round_up(num_ways); + + way_shift = (32 - log2_num_ways); + if (operation == ARMV7_DCACHE_INVAL_ALL) + v7_inval_dcache_level_setway(level, num_sets, num_ways, + way_shift, log2_line_len); + else if (operation == ARMV7_DCACHE_CLEAN_INVAL_ALL) + v7_clean_inval_dcache_level_setway(level, num_sets, num_ways, + way_shift, log2_line_len); +} + +static void v7_maint_dcache_all(u32 operation) +{ + u32 level, cache_type, level_start_bit = 0; + + u32 clidr = get_clidr(); + + for (level = 0; level < 7; level++) { + cache_type = get_bit_field(clidr, level_start_bit, + 0x7 << level_start_bit); + if ((cache_type == ARMV7_CLIDR_CTYPE_DATA_ONLY) || + (cache_type == ARMV7_CLIDR_CTYPE_INSTRUCTION_DATA) || + (cache_type == ARMV7_CLIDR_CTYPE_UNIFIED)) + v7_maint_dcache_level_setway(level, operation); + level_start_bit += 3; + } +} + +static void v7_dcache_clean_inval_range(u32 start, + u32 stop, u32 line_len) +{ + u32 mva; + + /* Align start to cache line boundary */ + start &= ~(line_len - 1); + for (mva = start; mva < stop; mva = mva + line_len) + /* DCCIMVAC - Clean & Invalidate data cache by MVA to PoC */ + asm volatile ("mcr p15, 0, %0, c7, c14, 1" : : "r" (mva)); +} + +static void v7_dcache_inval_range(u32 start, u32 stop, u32 line_len) +{ + u32 mva; + + /* + * If start address is not aligned to cache-line flush the first + * line to prevent affecting somebody else's buffer + */ + if (start & (line_len - 1)) { + v7_dcache_clean_inval_range(start, start + 1, line_len); + /* move to next cache line */ + start = (start + line_len - 1) & ~(line_len - 1); + } + + /* + * If stop address is not aligned to cache-line flush the last + * line to prevent affecting somebody else's buffer + */ + if (stop & (line_len - 1)) { + v7_dcache_clean_inval_range(stop, stop + 1, line_len); + /* align to the beginning of this cache line */ + stop &= ~(line_len - 1); + } + + for (mva = start; mva < stop; mva = mva + line_len) + /* DCIMVAC - Invalidate data cache by MVA to PoC */ + asm volatile ("mcr p15, 0, %0, c7, c6, 1" : : "r" (mva)); +} + +static void v7_dcache_maint_range(u32 start, u32 stop, u32 range_op) +{ + u32 line_len, ccsidr; + + ccsidr = get_ccsidr(); + line_len = get_bit_field(ccsidr, CCSIDR_LINE_SIZE_OFFSET, + CCSIDR_LINE_SIZE_MASK) + 2; + /* Converting from words to bytes */ + line_len += 2; + /* converting from log2(linelen) to linelen */ + line_len = 1 << line_len; + + switch (range_op) { + case ARMV7_DCACHE_CLEAN_INVAL_RANGE: + v7_dcache_clean_inval_range(start, stop, line_len); + break; + case ARMV7_DCACHE_INVAL_RANGE: + v7_dcache_inval_range(start, stop, line_len); + break; + } + + /* DMB to make sure the operation is complete */ + CP15DMB; +} + +/* Invalidate TLB */ +static void v7_inval_tlb(void) +{ + /* Invalidate entire unified TLB */ + asm volatile ("mcr p15, 0, %0, c8, c7, 0" : : "r" (0)); + /* Invalidate entire data TLB */ + asm volatile ("mcr p15, 0, %0, c8, c6, 0" : : "r" (0)); + /* Invalidate entire instruction TLB */ + asm volatile ("mcr p15, 0, %0, c8, c5, 0" : : "r" (0)); + /* Full system DSB - make sure that the invalidation is complete */ + CP15DSB; + /* Full system ISB - make sure the instruction stream sees it */ + CP15ISB; +} + +void invalidate_dcache_all(void) +{ + v7_maint_dcache_all(ARMV7_DCACHE_INVAL_ALL); + + v7_outer_cache_inval_all(); +} + +/* + * Performs a clean & invalidation of the entire data cache + * at all levels + */ +void flush_dcache_all(void) +{ + v7_maint_dcache_all(ARMV7_DCACHE_CLEAN_INVAL_ALL); + + v7_outer_cache_flush_all(); +} + +/* + * Invalidates range in all levels of D-cache/unified cache used: + * Affects the range [start, stop - 1] + */ +void invalidate_dcache_range(unsigned long start, unsigned long stop) +{ + + v7_dcache_maint_range(start, stop, ARMV7_DCACHE_INVAL_RANGE); + + v7_outer_cache_inval_range(start, stop); +} + +/* + * Flush range(clean & invalidate) from all levels of D-cache/unified + * cache used: + * Affects the range [start, stop - 1] + */ +void flush_dcache_range(unsigned long start, unsigned long stop) +{ + v7_dcache_maint_range(start, stop, ARMV7_DCACHE_CLEAN_INVAL_RANGE); + + v7_outer_cache_flush_range(start, stop); +} + +void arm_init_before_mmu(void) +{ + v7_outer_cache_enable(); + invalidate_dcache_all(); + v7_inval_tlb(); +} + +/* + * Flush range from all levels of d-cache/unified-cache used: + * Affects the range [start, start + size - 1] + */ +void flush_cache(unsigned long start, unsigned long size) +{ + flush_dcache_range(start, start + size); +} +#else /* #ifndef CONFIG_SYS_NO_DCACHE */ +void invalidate_dcache_all(void) +{ +} + +void flush_dcache_all(void) +{ +} + +void invalidate_dcache_range(unsigned long start, unsigned long stop) +{ +} + +void flush_dcache_range(unsigned long start, unsigned long stop) +{ +} + +void arm_init_before_mmu(void) +{ +} + +void flush_cache(unsigned long start, unsigned long size) +{ +} +#endif /* #ifndef CONFIG_SYS_NO_DCACHE */ + +#ifndef CONFIG_SYS_NO_ICACHE +/* Invalidate entire I-cache and branch predictor array */ +void invalidate_icache_all(void) +{ + /* + * Invalidate all instruction caches to PoU. + * Also flushes branch target cache. + */ + asm volatile ("mcr p15, 0, %0, c7, c5, 0" : : "r" (0)); + + /* Invalidate entire branch predictor array */ + asm volatile ("mcr p15, 0, %0, c7, c5, 6" : : "r" (0)); + + /* Full system DSB - make sure that the invalidation is complete */ + CP15DSB; + + /* ISB - make sure the instruction stream sees it */ + CP15ISB; +} +#else +void invalidate_icache_all(void) +{ +} +#endif + +/* + * Stub implementations for outer cache operations + */ +void __v7_outer_cache_enable(void) +{ +} +void v7_outer_cache_enable(void) + __attribute__((weak, alias("__v7_outer_cache_enable"))); + +void __v7_outer_cache_disable(void) +{ +} +void v7_outer_cache_disable(void) + __attribute__((weak, alias("__v7_outer_cache_disable"))); + +void __v7_outer_cache_flush_all(void) +{ +} +void v7_outer_cache_flush_all(void) + __attribute__((weak, alias("__v7_outer_cache_flush_all"))); + +void __v7_outer_cache_inval_all(void) +{ +} +void v7_outer_cache_inval_all(void) + __attribute__((weak, alias("__v7_outer_cache_inval_all"))); + +void __v7_outer_cache_flush_range(u32 start, u32 end) +{ +} +void v7_outer_cache_flush_range(u32 start, u32 end) + __attribute__((weak, alias("__v7_outer_cache_flush_range"))); + +void __v7_outer_cache_inval_range(u32 start, u32 end) +{ +} +void v7_outer_cache_inval_range(u32 start, u32 end) + __attribute__((weak, alias("__v7_outer_cache_inval_range"))); diff --git a/arch/arm/include/asm/armv7.h b/arch/arm/include/asm/armv7.h new file mode 100644 index 0000000..50cc167 --- /dev/null +++ b/arch/arm/include/asm/armv7.h @@ -0,0 +1,68 @@ +/* + * (C) Copyright 2010 + * Texas Instruments Incorporated - http://www.ti.com/ + * + * Aneesh V aneesh@ti.com + * + * See file CREDITS for list of people who contributed to this + * project. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + */ +#ifndef ARMV7_H +#define ARMV7_H +#include <linux/types.h> + +/* CCSIDR */ +#define CCSIDR_LINE_SIZE_OFFSET 0 +#define CCSIDR_LINE_SIZE_MASK 0x7 +#define CCSIDR_ASSOCIATIVITY_OFFSET 3 +#define CCSIDR_ASSOCIATIVITY_MASK (0x3FF << 3) +#define CCSIDR_NUM_SETS_OFFSET 13 +#define CCSIDR_NUM_SETS_MASK (0x7FFF << 13) + +/* + * Values for InD field in CSSELR + * Selects the type of cache + */ +#define ARMV7_CSSELR_IND_DATA_UNIFIED 0 +#define ARMV7_CSSELR_IND_INSTRUCTION 1 + +/* Values for Ctype fields in CLIDR */ +#define ARMV7_CLIDR_CTYPE_NO_CACHE 0 +#define ARMV7_CLIDR_CTYPE_INSTRUCTION_ONLY 1 +#define ARMV7_CLIDR_CTYPE_DATA_ONLY 2 +#define ARMV7_CLIDR_CTYPE_INSTRUCTION_DATA 3 +#define ARMV7_CLIDR_CTYPE_UNIFIED 4 + +/* + * CP15 Barrier instructions + * Please note that we have separate barrier instructions in ARMv7 + * However, we use the CP15 based instructtions because we use + * -march=armv5 in U-Boot + */ +#define CP15ISB asm volatile ("mcr p15, 0, %0, c7, c5, 4" : : "r" (0)) +#define CP15DSB asm volatile ("mcr p15, 0, %0, c7, c10, 4" : : "r" (0)) +#define CP15DMB asm volatile ("mcr p15, 0, %0, c7, c10, 5" : : "r" (0)) + +void v7_outer_cache_enable(void); +void v7_outer_cache_disable(void); +void v7_outer_cache_flush_all(void); +void v7_outer_cache_inval_all(void); +void v7_outer_cache_flush_range(u32 start, u32 end); +void v7_outer_cache_inval_range(u32 start, u32 end); + +#endif diff --git a/include/common.h b/include/common.h index d8c912d..46fa112 100644 --- a/include/common.h +++ b/include/common.h @@ -411,6 +411,7 @@ void icache_disable(void); int dcache_status (void); void dcache_enable (void); void dcache_disable(void); +void mmu_disable(void); void relocate_code (ulong, gd_t *, ulong) __attribute__ ((noreturn)); ulong get_endaddr (void); void trap_init (ulong); @@ -603,9 +604,11 @@ ulong video_setmem (ulong);
/* arch/$(ARCH)/lib/cache.c */ void flush_cache (unsigned long, unsigned long); +void flush_dcache_all(void); void flush_dcache_range(unsigned long start, unsigned long stop); void invalidate_dcache_range(unsigned long start, unsigned long stop); - +void invalidate_dcache_all(void); +void invalidate_icache_all(void);
/* arch/$(ARCH)/lib/ticks.S */ unsigned long long get_ticks(void);

replace all occurences of CONFIG_L2_OFF with a more appropriate CONFIG_SYS_NO_L2CACHE
CONFIG_SYS_NO_L2CACHE has been chosen to be in line with CONFIG_SYS_NO_ICACHE and CONFIG_SYS_NO_DCACHE
Signed-off-by: Aneesh V aneesh@ti.com --- arch/arm/cpu/armv7/cpu.c | 6 +----- include/configs/ca9x4_ct_vxp.h | 2 +- include/configs/efikamx.h | 2 +- include/configs/mx51evk.h | 2 +- include/configs/mx53evk.h | 2 +- include/configs/omap4_panda.h | 2 +- include/configs/omap4_sdp4430.h | 2 +- include/configs/s5pc210_universal.h | 2 +- include/configs/tegra2-common.h | 2 +- include/configs/vision2.h | 2 +- 10 files changed, 10 insertions(+), 14 deletions(-)
diff --git a/arch/arm/cpu/armv7/cpu.c b/arch/arm/cpu/armv7/cpu.c index a01e0d6..7f28d87 100644 --- a/arch/arm/cpu/armv7/cpu.c +++ b/arch/arm/cpu/armv7/cpu.c @@ -35,11 +35,7 @@ #include <command.h> #include <asm/system.h> #include <asm/cache.h> -#ifndef CONFIG_L2_OFF -#include <asm/arch/sys_proto.h> -#endif - -static void cache_flush(void); +#include <asm/armv7.h>
int cleanup_before_linux(void) { diff --git a/include/configs/ca9x4_ct_vxp.h b/include/configs/ca9x4_ct_vxp.h index 63f003d..bdd4e91 100644 --- a/include/configs/ca9x4_ct_vxp.h +++ b/include/configs/ca9x4_ct_vxp.h @@ -40,7 +40,7 @@
#define CONFIG_CMDLINE_TAG 1 /* enable passing of ATAGs */ #define CONFIG_SETUP_MEMORY_TAGS 1 -#define CONFIG_L2_OFF 1 +#define CONFIG_SYS_NO_L2CACHE 1 #define CONFIG_INITRD_TAG 1
/* Size of malloc() pool */ diff --git a/include/configs/efikamx.h b/include/configs/efikamx.h index 1424347..0b64f54 100644 --- a/include/configs/efikamx.h +++ b/include/configs/efikamx.h @@ -38,7 +38,7 @@ #define CONFIG_DISPLAY_CPUINFO #define CONFIG_DISPLAY_BOARDINFO
-#define CONFIG_L2_OFF +#define CONFIG_SYS_NO_L2CACHE
/* * Bootloader Components Configuration diff --git a/include/configs/mx51evk.h b/include/configs/mx51evk.h index 591d6e1..20c299b 100644 --- a/include/configs/mx51evk.h +++ b/include/configs/mx51evk.h @@ -33,7 +33,7 @@ #define CONFIG_DISPLAY_CPUINFO #define CONFIG_DISPLAY_BOARDINFO
-#define CONFIG_L2_OFF +#define CONFIG_SYS_NO_L2CACHE
#include <asm/arch/imx-regs.h> /* diff --git a/include/configs/mx53evk.h b/include/configs/mx53evk.h index f2a5752..244a1ae 100644 --- a/include/configs/mx53evk.h +++ b/include/configs/mx53evk.h @@ -29,7 +29,7 @@ #define CONFIG_DISPLAY_CPUINFO #define CONFIG_DISPLAY_BOARDINFO
-#define CONFIG_L2_OFF +#define CONFIG_SYS_NO_L2CACHE
#include <asm/arch/imx-regs.h>
diff --git a/include/configs/omap4_panda.h b/include/configs/omap4_panda.h index 2b03b0f..8684798 100644 --- a/include/configs/omap4_panda.h +++ b/include/configs/omap4_panda.h @@ -46,7 +46,7 @@ #define CONFIG_DISPLAY_BOARDINFO 1
/* Keep L2 Cache Disabled */ -#define CONFIG_L2_OFF 1 +#define CONFIG_SYS_NO_L2CACHE 1
/* Clock Defines */ #define V_OSCK 38400000 /* Clock output from T2 */ diff --git a/include/configs/omap4_sdp4430.h b/include/configs/omap4_sdp4430.h index 9a8bb73..7cc0c13 100644 --- a/include/configs/omap4_sdp4430.h +++ b/include/configs/omap4_sdp4430.h @@ -47,7 +47,7 @@ #define CONFIG_DISPLAY_BOARDINFO 1
/* Keep L2 Cache Disabled */ -#define CONFIG_L2_OFF 1 +#define CONFIG_SYS_NO_L2CACHE 1
/* Clock Defines */ #define V_OSCK 38400000 /* Clock output from T2 */ diff --git a/include/configs/s5pc210_universal.h b/include/configs/s5pc210_universal.h index c033a8d..09f5cfc 100644 --- a/include/configs/s5pc210_universal.h +++ b/include/configs/s5pc210_universal.h @@ -43,7 +43,7 @@ #define CONFIG_DISPLAY_BOARDINFO
/* Keep L2 Cache Disabled */ -#define CONFIG_L2_OFF 1 +#define CONFIG_SYS_NO_L2CACHE 1
#define CONFIG_SYS_SDRAM_BASE 0x40000000 #define CONFIG_SYS_TEXT_BASE 0x44800000 diff --git a/include/configs/tegra2-common.h b/include/configs/tegra2-common.h index 4f4374a..27edf68 100644 --- a/include/configs/tegra2-common.h +++ b/include/configs/tegra2-common.h @@ -31,7 +31,7 @@ #define CONFIG_ARMCORTEXA9 /* This is an ARM V7 CPU core */ #define CONFIG_TEGRA2 /* in a NVidia Tegra2 core */ #define CONFIG_MACH_TEGRA_GENERIC /* which is a Tegra generic machine */ -#define CONFIG_L2_OFF /* No L2 cache */ +#define CONFIG_SYS_NO_L2CACHE /* No L2 cache */
#include <asm/arch/tegra2.h> /* get chip and board defs */
diff --git a/include/configs/vision2.h b/include/configs/vision2.h index 4c8e7fa..d6c99de 100644 --- a/include/configs/vision2.h +++ b/include/configs/vision2.h @@ -26,7 +26,7 @@
#define CONFIG_MX51 /* in a mx51 */ -#define CONFIG_L2_OFF +#define CONFIG_SYS_NO_L2CACHE
#include <asm/arch/imx-regs.h>

- Enable I-cache on bootup - Enable MMU and D-cache immediately after relocation - Do necessary initialization before enabling d-cache and MMU - Changes to cleanup_before_linux() - Make changes according to the new framework
Signed-off-by: Aneesh V aneesh@ti.com --- arch/arm/cpu/armv7/cpu.c | 45 +++++++++++++++++++------------------------ arch/arm/cpu/armv7/start.S | 18 ++++++++++++++++- arch/arm/lib/board.c | 6 +++++ arch/arm/lib/cache-cp15.c | 7 ++++++ arch/arm/lib/cache.c | 5 ---- 5 files changed, 50 insertions(+), 31 deletions(-)
diff --git a/arch/arm/cpu/armv7/cpu.c b/arch/arm/cpu/armv7/cpu.c index 7f28d87..5c69d04 100644 --- a/arch/arm/cpu/armv7/cpu.c +++ b/arch/arm/cpu/armv7/cpu.c @@ -34,13 +34,10 @@ #include <common.h> #include <command.h> #include <asm/system.h> -#include <asm/cache.h> #include <asm/armv7.h>
int cleanup_before_linux(void) { - unsigned int i; - /* * this function is called just before we call linux * it prepares the processor for linux @@ -49,31 +46,29 @@ int cleanup_before_linux(void) */ disable_interrupts();
- /* turn off I/D-cache */ + /* + * Turn off I-cache and invalidate it + */ icache_disable(); - dcache_disable(); + invalidate_icache_all();
- /* invalidate I-cache */ - cache_flush(); - -#ifndef CONFIG_L2_OFF - /* turn off L2 cache */ - l2_cache_disable(); - /* invalidate L2 cache also */ - invalidate_dcache(get_device_type()); -#endif - i = 0; - /* mem barrier to sync up things */ - asm("mcr p15, 0, %0, c7, c10, 4": :"r"(i)); + /* + * turn off D-cache + * dcache_disable() in turn flushes the d-cache and disables MMU + */ + dcache_disable();
-#ifndef CONFIG_L2_OFF - l2_cache_enable(); -#endif + /* + * After D-cache is flushed and before it is disabled there may + * be some new valid entries brought into the cache. We are sure + * that these lines are not dirty and will not affect our execution. + * (because unwinding the call-stack and setting a bit in CP15 SCTRL + * is all we did during this. We have not pushed anything on to the + * stack. Neither have we affected any static data) + * So just invalidate the entire d-cache again to avoid coherency + * problems for kernel + */ + invalidate_dcache_all();
return 0; } - -static void cache_flush(void) -{ - asm ("mcr p15, 0, %0, c7, c5, 0": :"r" (0)); -} diff --git a/arch/arm/cpu/armv7/start.S b/arch/arm/cpu/armv7/start.S index cb4f92f..0b54397 100644 --- a/arch/arm/cpu/armv7/start.S +++ b/arch/arm/cpu/armv7/start.S @@ -239,6 +239,14 @@ clbss_l:str r2, [r0] /* clear loop... */ * initialization, now running from RAM. */ jump_2_ram: +/* + * If I-cache is enabled invalidate it + */ +#ifndef CONFIG_SYS_NO_ICACHE + mcr p15, 0, r0, c7, c5, 0 @ invalidate icache + mcr p15, 0, r0, c7, c10, 4 @ DSB + mcr p15, 0, r0, c7, c5, 4 @ ISB +#endif ldr r0, _board_init_r_ofs adr r1, _start add lr, r0, r1 @@ -274,6 +282,9 @@ cpu_init_crit: mov r0, #0 @ set up for MCR mcr p15, 0, r0, c8, c7, 0 @ invalidate TLBs mcr p15, 0, r0, c7, c5, 0 @ invalidate icache + mcr p15, 0, r0, c7, c5, 6 @ invalidate BP array + mcr p15, 0, r0, c7, c10, 4 @ DSB + mcr p15, 0, r0, c7, c5, 4 @ ISB
/* * disable MMU stuff and caches @@ -282,7 +293,12 @@ cpu_init_crit: bic r0, r0, #0x00002000 @ clear bits 13 (--V-) bic r0, r0, #0x00000007 @ clear bits 2:0 (-CAM) orr r0, r0, #0x00000002 @ set bit 1 (--A-) Align - orr r0, r0, #0x00000800 @ set bit 12 (Z---) BTB + orr r0, r0, #0x00000800 @ set bit 11 (Z---) BTB +#ifdef CONFIG_SYS_NO_ICACHE + bic r0, r0, #0x00001000 @ clear bit 12 (I) I-cache +#else + orr r0, r0, #0x00001000 @ set bit 12 (I) I-cache +#endif mcr p15, 0, r0, c1, c0, 0
/* diff --git a/arch/arm/lib/board.c b/arch/arm/lib/board.c index c620d2c..72ee108 100644 --- a/arch/arm/lib/board.c +++ b/arch/arm/lib/board.c @@ -459,6 +459,12 @@ void board_init_r (gd_t *id, ulong dest_addr)
gd->flags |= GD_FLG_RELOC; /* tell others: relocation done */
+ /* + * Enable D$: + * I$, if needed, must be already enabled in start.S + */ + dcache_enable(); + monitor_flash_len = _bss_start_ofs; debug ("monitor flash len: %08lX\n", monitor_flash_len); board_init(); /* Setup chipselects */ diff --git a/arch/arm/lib/cache-cp15.c b/arch/arm/lib/cache-cp15.c index d9175f0..fd97c45 100644 --- a/arch/arm/lib/cache-cp15.c +++ b/arch/arm/lib/cache-cp15.c @@ -34,6 +34,12 @@
DECLARE_GLOBAL_DATA_PTR;
+void __arm_init_before_mmu(void) +{ +} +void arm_init_before_mmu(void) + __attribute__((weak, alias("__arm_init_before_mmu"))); + static void cp_delay (void) { volatile int i; @@ -65,6 +71,7 @@ static inline void mmu_setup(void) int i; u32 reg;
+ arm_init_before_mmu(); /* Set up an identity-mapping for all 4GB, rw for everyone */ for (i = 0; i < 4096; i++) page_table[i] = i << 20 | (3 << 10) | 0x12; diff --git a/arch/arm/lib/cache.c b/arch/arm/lib/cache.c index 27123cd..dc3242c 100644 --- a/arch/arm/lib/cache.c +++ b/arch/arm/lib/cache.c @@ -38,11 +38,6 @@ void __flush_cache(unsigned long start, unsigned long size) /* disable write buffer as well (page 2-22) */ asm("mcr p15, 0, %0, c7, c10, 4" : : "r" (0)); #endif -#ifdef CONFIG_OMAP34XX - void v7_flush_cache_all(void); - - v7_flush_cache_all(); -#endif return; } void flush_cache(unsigned long start, unsigned long size)

1. make sure that page table setup is not done multiple times 2. flush_dcache_all() is more appropriate while disabling cache than a range flush on the entire memory(flush_cache())
Provide a default implementation for flush_dcache_all() for backward compatibility and to avoid build issues.
Signed-off-by: Aneesh V aneesh@ti.com --- arch/arm/lib/cache-cp15.c | 9 +++++++-- arch/arm/lib/cache.c | 11 +++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-)
diff --git a/arch/arm/lib/cache-cp15.c b/arch/arm/lib/cache-cp15.c index fd97c45..b1ccc3c 100644 --- a/arch/arm/lib/cache-cp15.c +++ b/arch/arm/lib/cache-cp15.c @@ -92,13 +92,18 @@ static inline void mmu_setup(void) set_cr(reg | CR_M); }
+static int mmu_enabled(void) +{ + return get_cr() & CR_M; +} + /* cache_bit must be either CR_I or CR_C */ static void cache_enable(uint32_t cache_bit) { uint32_t reg;
/* The data cache is not active unless the mmu is enabled too */ - if (cache_bit == CR_C) + if ((cache_bit == CR_C) && !mmu_enabled()) mmu_setup(); reg = get_cr(); /* get control reg. */ cp_delay(); @@ -117,7 +122,7 @@ static void cache_disable(uint32_t cache_bit) return; /* if disabling data cache, disable mmu too */ cache_bit |= CR_M; - flush_cache(0, ~0); + flush_dcache_all(); } reg = get_cr(); cp_delay(); diff --git a/arch/arm/lib/cache.c b/arch/arm/lib/cache.c index dc3242c..92b61a2 100644 --- a/arch/arm/lib/cache.c +++ b/arch/arm/lib/cache.c @@ -42,3 +42,14 @@ void __flush_cache(unsigned long start, unsigned long size) } void flush_cache(unsigned long start, unsigned long size) __attribute__((weak, alias("__flush_cache"))); + +/* + * Default implementation: + * do a range flush for the entire range + */ +void __flush_dcache_all(void) +{ + flush_cache(0, ~0); +} +void flush_dcache_all(void) + __attribute__((weak, alias("__flush_dcache_all")));

PL310 is the L2$ controller from ARM used in many SoCs including the Cortex-A9 based OMAP4430
Add support for some of the key PL310 operations - Invalidate all - Invalidate range - Flush(clean & invalidate) all - Flush range
Signed-off-by: Aneesh V aneesh@ti.com --- README | 6 ++ arch/arm/include/asm/pl310.h | 74 +++++++++++++++++++++++++++ arch/arm/lib/Makefile | 1 + arch/arm/lib/cache-pl310.c | 116 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 197 insertions(+), 0 deletions(-) create mode 100644 arch/arm/include/asm/pl310.h create mode 100644 arch/arm/lib/cache-pl310.c
diff --git a/README b/README index ba01c52..f1547a4 100644 --- a/README +++ b/README @@ -453,6 +453,12 @@ The following options need to be configured: CONFIG_SYS_NO_DCACHE - Do not enable data cache in U-Boot CONFIG_SYS_NO_L2CACHE- Do not enable L2 cache in U-Boot
+- Cache Configuration for ARM: + CONFIG_SYS_L2_PL310 - Enable support for ARM PL310 L2 cache + controller + CONFIG_SYS_PL310_BASE - Physical base address of PL310 + controller register space + - Serial Ports: CONFIG_PL010_SERIAL
diff --git a/arch/arm/include/asm/pl310.h b/arch/arm/include/asm/pl310.h new file mode 100644 index 0000000..ffc58e9 --- /dev/null +++ b/arch/arm/include/asm/pl310.h @@ -0,0 +1,74 @@ +/* + * (C) Copyright 2010 + * Texas Instruments Incorporated - http://www.ti.com/ + * + * Aneesh V aneesh@ti.com + * + * See file CREDITS for list of people who contributed to this + * project. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + */ +#ifndef _PL310_H_ +#define _PL310_H_ + +#include <linux/types.h> + +/* Register bit fields */ +#define PL310_AUX_CTRL_ASSOCIATIVITY_MASK (1 << 16) + +struct pl310_regs { + u32 pl310_cache_id; + u32 pl310_cache_type; + u32 pad1[62]; + u32 pl310_ctrl; + u32 pl310_aux_ctrl; + u32 pl310_tag_latency_ctrl; + u32 pl310_data_latency_ctrl; + u32 pad2[60]; + u32 pl310_event_cnt_ctrl; + u32 pl310_event_cnt1_cfg; + u32 pl310_event_cnt0_cfg; + u32 pl310_event_cnt1_val; + u32 pl310_event_cnt0_val; + u32 pl310_intr_mask; + u32 pl310_masked_intr_stat; + u32 pl310_raw_intr_stat; + u32 pl310_intr_clear; + u32 pad3[323]; + u32 pl310_cache_sync; + u32 pad4[15]; + u32 pl310_inv_line_pa; + u32 pad5[2]; + u32 pl310_inv_way; + u32 pad6[12]; + u32 pl310_clean_line_pa; + u32 pad7[1]; + u32 pl310_clean_line_idx; + u32 pl310_clean_way; + u32 pad8[12]; + u32 pl310_clean_inv_line_pa; + u32 pad9[1]; + u32 pl310_clean_inv_line_idx; + u32 pl310_clean_inv_way; +}; + +void pl310_inval_all(void); +void pl310_clean_inval_all(void); +void pl310_inval_range(u32 start, u32 end); +void pl310_clean_inval_range(u32 start, u32 end); + +#endif diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index 454440c..98f32da 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -42,6 +42,7 @@ COBJS-y += cache.o ifndef CONFIG_SYS_NO_CP15_CACHE COBJS-y += cache-cp15.o endif +COBJS-$(CONFIG_SYS_L2_PL310) += cache-pl310.o COBJS-y += interrupts.o COBJS-y += reset.o
diff --git a/arch/arm/lib/cache-pl310.c b/arch/arm/lib/cache-pl310.c new file mode 100644 index 0000000..f55c63a --- /dev/null +++ b/arch/arm/lib/cache-pl310.c @@ -0,0 +1,116 @@ +/* + * (C) Copyright 2010 + * Texas Instruments Incorporated - http://www.ti.com/ + * + * Aneesh V aneesh@ti.com + * + * See file CREDITS for list of people who contributed to this + * project. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + */ +#include <linux/types.h> +#include <asm/io.h> +#include <asm/armv7.h> +#include <asm/pl310.h> +#include <config.h> + +struct pl310_regs *const pl310 = (struct pl310_regs *)CONFIG_SYS_PL310_BASE; + +static void pl310_cache_sync(void) +{ + writel(0, &pl310->pl310_cache_sync); +} + +static void pl310_background_op_all_ways(u32 *op_reg) +{ + u32 assoc_16, associativity, way_mask; + + assoc_16 = readl(&pl310->pl310_aux_ctrl) & + PL310_AUX_CTRL_ASSOCIATIVITY_MASK; + if (assoc_16) + associativity = 16; + else + associativity = 8; + + way_mask = (1 << associativity) - 1; + /* Invalidate all ways */ + writel(way_mask, op_reg); + /* Wait for all ways to be invalidated */ + while (readl(op_reg) && way_mask) + ; + pl310_cache_sync(); +} + +void v7_outer_cache_inval_all(void) +{ + pl310_background_op_all_ways(&pl310->pl310_inv_way); +} + +void v7_outer_cache_flush_all(void) +{ + pl310_background_op_all_ways(&pl310->pl310_clean_inv_way); +} + +/* Flush(clean invalidate) memory from start to stop-1 */ +void v7_outer_cache_flush_range(u32 start, u32 stop) +{ + /* PL310 currently supports only 32 bytes cache line */ + u32 pa, line_size = 32; + + /* + * Align to the beginning of cache-line - this ensures that + * the first 5 bits are 0 as required by PL310 TRM + */ + start &= ~(line_size - 1); + + for (pa = start; pa < stop; pa = pa + line_size) + writel(pa, &pl310->pl310_clean_inv_line_pa); + + pl310_cache_sync(); +} + +/* invalidate memory from start to stop-1 */ +void v7_outer_cache_inval_range(u32 start, u32 stop) +{ + /* PL310 currently supports only 32 bytes cache line */ + u32 pa, line_size = 32; + + /* + * If start address is not aligned to cache-line flush the first + * line to prevent affecting somebody else's buffer + */ + if (start & (line_size - 1)) { + v7_outer_cache_flush_range(start, start + 1); + /* move to next cache line */ + start = (start + line_size - 1) & ~(line_size - 1); + } + + /* + * If stop address is not aligned to cache-line flush the last + * line to prevent affecting somebody else's buffer + */ + if (stop & (line_size - 1)) { + v7_outer_cache_flush_range(stop, stop + 1); + /* align to the beginning of this cache line */ + stop &= ~(line_size - 1); + } + + for (pa = start; pa < stop; pa = pa + line_size) + writel(pa, &pl310->pl310_inv_line_pa); + + pl310_cache_sync(); +}

adapt omap4 to the new layered cache maintenance framework
Signed-off-by: Aneesh V aneesh@ti.com --- arch/arm/cpu/armv7/omap4/lowlevel_init.S | 18 ++++++++++++++++++ arch/arm/include/asm/arch-omap4/sys_proto.h | 1 - include/configs/omap4_panda.h | 8 +++++--- include/configs/omap4_sdp4430.h | 8 +++++--- 4 files changed, 28 insertions(+), 7 deletions(-)
diff --git a/arch/arm/cpu/armv7/omap4/lowlevel_init.S b/arch/arm/cpu/armv7/omap4/lowlevel_init.S index 026dfa4..5e6c16f 100644 --- a/arch/arm/cpu/armv7/omap4/lowlevel_init.S +++ b/arch/arm/cpu/armv7/omap4/lowlevel_init.S @@ -45,3 +45,21 @@ lowlevel_init: */ bl s_init pop {ip, pc} + +set_pl310_ctrl_reg: + PUSH {r4-r11, lr} @ save registers - ROM code may pollute + @ our registers + LDR r12, =0x102 @ Set PL310 control register - value in R0 + .word 0xe1600070 @ SMC #0 - hand assembled because -march=armv5 + @ call ROM Code API to set control register + POP {r4-r11, pc} + +.globl v7_outer_cache_enable +v7_outer_cache_enable: + MOV r0, #1 + B set_pl310_ctrl_reg + +.globl v7_outer_cache_disable +v7_outer_cache_disable: + MOV r0, #0 + B set_pl310_ctrl_reg diff --git a/arch/arm/include/asm/arch-omap4/sys_proto.h b/arch/arm/include/asm/arch-omap4/sys_proto.h index 4813e9e..017f216 100644 --- a/arch/arm/include/asm/arch-omap4/sys_proto.h +++ b/arch/arm/include/asm/arch-omap4/sys_proto.h @@ -31,7 +31,6 @@ struct omap_sysinfo { void gpmc_init(void); void watchdog_init(void); u32 get_device_type(void); -void invalidate_dcache(u32); void set_muxconf_regs(void); void sr32(void *, u32, u32, u32); u32 wait_on_value(u32, u32, void *, u32); diff --git a/include/configs/omap4_panda.h b/include/configs/omap4_panda.h index 8684798..d7abf94 100644 --- a/include/configs/omap4_panda.h +++ b/include/configs/omap4_panda.h @@ -45,9 +45,6 @@ #define CONFIG_DISPLAY_CPUINFO 1 #define CONFIG_DISPLAY_BOARDINFO 1
-/* Keep L2 Cache Disabled */ -#define CONFIG_SYS_NO_L2CACHE 1 - /* Clock Defines */ #define V_OSCK 38400000 /* Clock output from T2 */ #define V_SCLK V_OSCK @@ -234,4 +231,9 @@ CONFIG_SYS_INIT_RAM_SIZE - \ GENERATED_GBL_DATA_SIZE)
+#ifndef CONFIG_SYS_NO_L2CACHE +#define CONFIG_SYS_L2_PL310 1 +#define CONFIG_SYS_PL310_BASE 0x48242000 +#endif + #endif /* __CONFIG_H */ diff --git a/include/configs/omap4_sdp4430.h b/include/configs/omap4_sdp4430.h index 7cc0c13..3d5ffe5 100644 --- a/include/configs/omap4_sdp4430.h +++ b/include/configs/omap4_sdp4430.h @@ -46,9 +46,6 @@ #define CONFIG_DISPLAY_CPUINFO 1 #define CONFIG_DISPLAY_BOARDINFO 1
-/* Keep L2 Cache Disabled */ -#define CONFIG_SYS_NO_L2CACHE 1 - /* Clock Defines */ #define V_OSCK 38400000 /* Clock output from T2 */ #define V_SCLK V_OSCK @@ -240,4 +237,9 @@ CONFIG_SYS_INIT_RAM_SIZE - \ GENERATED_GBL_DATA_SIZE)
+#ifndef CONFIG_SYS_NO_L2CACHE +#define CONFIG_SYS_L2_PL310 1 +#define CONFIG_SYS_PL310_BASE 0x48242000 +#endif + #endif /* __CONFIG_H */

adapt omap3 to the new layered cache maintenance framework
Signed-off-by: Aneesh V aneesh@ti.com --- arch/arm/cpu/armv7/omap3/Makefile | 1 - arch/arm/cpu/armv7/omap3/board.c | 138 ++++++++++++-- arch/arm/cpu/armv7/omap3/cache.S | 263 --------------------------- arch/arm/cpu/armv7/omap3/lowlevel_init.S | 32 ++++ arch/arm/include/asm/arch-omap3/omap3.h | 20 ++ arch/arm/include/asm/arch-omap3/sys_proto.h | 10 +- 6 files changed, 178 insertions(+), 286 deletions(-) delete mode 100644 arch/arm/cpu/armv7/omap3/cache.S
diff --git a/arch/arm/cpu/armv7/omap3/Makefile b/arch/arm/cpu/armv7/omap3/Makefile index 7164d50..522bcd2 100644 --- a/arch/arm/cpu/armv7/omap3/Makefile +++ b/arch/arm/cpu/armv7/omap3/Makefile @@ -26,7 +26,6 @@ include $(TOPDIR)/config.mk LIB = $(obj)lib$(SOC).o
SOBJS := lowlevel_init.o -SOBJS += cache.o
COBJS += board.o COBJS += clock.o diff --git a/arch/arm/cpu/armv7/omap3/board.c b/arch/arm/cpu/armv7/omap3/board.c index 6c2a132..39866e0 100644 --- a/arch/arm/cpu/armv7/omap3/board.c +++ b/arch/arm/cpu/armv7/omap3/board.c @@ -37,8 +37,12 @@ #include <asm/arch/sys_proto.h> #include <asm/arch/mem.h> #include <asm/cache.h> +#include <asm/armv7.h>
+/* Declarations */ extern omap3_sysinfo sysinfo; +static void omap3_setup_aux_cr(void); +static void omap3_invalidate_l2_cache_secure(void);
/****************************************************************************** * Routine: delay @@ -166,27 +170,13 @@ void s_init(void)
try_unlock_memory();
- /* - * Right now flushing at low MPU speed. - * Need to move after clock init - */ - invalidate_dcache(get_device_type()); -#ifndef CONFIG_ICACHE_OFF - icache_enable(); -#endif + /* Errata workarounds */ + omap3_setup_aux_cr();
-#ifdef CONFIG_L2_OFF - l2_cache_disable(); -#else - l2_cache_enable(); +#ifndef CONFIG_SYS_NO_L2CACHE + /* Invalidate L2-cache from secure mode */ + omap3_invalidate_l2_cache_secure(); #endif - /* - * Writing to AuxCR in U-boot using SMI for GP DEV - * Currently SMI in Kernel on ES2 devices seems to have an issue - * Once that is resolved, we can postpone this config to kernel - */ - if (get_device_type() == GP_DEVICE) - setup_auxcr();
set_muxconf_regs(); delay(100); @@ -292,3 +282,113 @@ int checkboard (void) return 0; } #endif /* CONFIG_DISPLAY_BOARDINFO */ + +static void omap3_emu_romcode_call(u32 service_id, u32 *parameters) +{ + u32 i, num_params = *parameters; + u32 *sram_scratch_space = (u32 *)OMAP3_PUBLIC_SRAM_SCRATCH_AREA; + + /* + * copy the parameters to an un-cached area to avoid coherency + * issues + */ + for (i = 0; i < num_params; i++) { + __raw_writel(*parameters, sram_scratch_space); + parameters++; + sram_scratch_space++; + } + + /* Now make the PPA call */ + do_omap3_emu_romcode_call(service_id, OMAP3_PUBLIC_SRAM_SCRATCH_AREA); +} + +static void omap3_update_aux_cr_secure(u32 set_bits, u32 clear_bits) +{ + u32 acr; + + /* Read ACR */ + asm volatile ("mrc p15, 0, %0, c1, c0, 1" : "=r" (acr)); + acr &= ~clear_bits; + acr |= set_bits; + + if (get_device_type() == GP_DEVICE) { + omap3_gp_romcode_call(OMAP3_GP_ROMCODE_API_WRITE_ACR, + acr); + } else { + struct emu_hal_params emu_romcode_params; + emu_romcode_params.num_params = 1; + emu_romcode_params.param1 = acr; + omap3_emu_romcode_call(OMAP3_EMU_HAL_API_WRITE_ACR, + (u32 *)&emu_romcode_params); + } +} + +static void omap3_update_aux_cr(u32 set_bits, u32 clear_bits) +{ + u32 acr; + + /* Read ACR */ + asm volatile ("mrc p15, 0, %0, c1, c0, 1" : "=r" (acr)); + acr &= ~clear_bits; + acr |= set_bits; + + /* Write ACR - affects non-secure banked bits */ + asm volatile ("mcr p15, 0, %0, c1, c0, 1" : : "r" (acr)); +} + +static void omap3_setup_aux_cr(void) +{ + /* Workaround for Cortex-A8 errata: #454179 #430973 + * Set "IBE" bit + * Set "Disable Brach Size Mispredicts" bit + * Workaround for erratum #621766 + * Enable L1NEON bit + * ACR |= (IBE | DBSM | L1NEON) => ACR |= 0xE0 + */ + omap3_update_aux_cr_secure(0xE0, 0); +} + +#if !defined(CONFIG_SYS_NO_DCACHE) && !defined(CONFIG_SYS_NO_L2CACHE) + +/* Invalidate the entire L2 cache from secure mode */ +static void omap3_invalidate_l2_cache_secure(void) +{ + if (get_device_type() == GP_DEVICE) { + omap3_gp_romcode_call(OMAP3_GP_ROMCODE_API_L2_INVAL, + 0); + } else { + struct emu_hal_params emu_romcode_params; + emu_romcode_params.num_params = 1; + emu_romcode_params.param1 = 0; + omap3_emu_romcode_call(OMAP3_EMU_HAL_API_L2_INVAL, + (u32 *)&emu_romcode_params); + } +} + +void v7_outer_cache_enable(void) +{ + /* Set L2EN */ + omap3_update_aux_cr_secure(0x2, 0); + + /* + * On some revisions L2EN bit is banked on some revisions it's not + * No harm in setting both banked bits(in fact this is required + * by an erratum) + */ + omap3_update_aux_cr(0x2, 0); +} + +void v7_outer_cache_disable(void) +{ + /* Clear L2EN */ + omap3_update_aux_cr_secure(0, 0x2); + + /* + * On some revisions L2EN bit is banked on some revisions it's not + * No harm in clearing both banked bits(in fact this is required + * by an erratum) + */ + omap3_update_aux_cr(0, 0x2); +} + +#endif diff --git a/arch/arm/cpu/armv7/omap3/cache.S b/arch/arm/cpu/armv7/omap3/cache.S deleted file mode 100644 index cda87ba..0000000 --- a/arch/arm/cpu/armv7/omap3/cache.S +++ /dev/null @@ -1,263 +0,0 @@ -/* - * Copyright (c) 2009 Wind River Systems, Inc. - * Tom Rix Tom.Rix@windriver.com - * - * This file is based on and replaces the existing cache.c file - * The copyrights for the cache.c file are: - * - * (C) Copyright 2008 Texas Insturments - * - * (C) Copyright 2002 - * Sysgo Real-Time Solutions, GmbH <www.elinos.com> - * Marius Groeger mgroeger@sysgo.de - * - * (C) Copyright 2002 - * Gary Jennejohn, DENX Software Engineering, gj@denx.de - * - * See file CREDITS for list of people who contributed to this - * project. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA - */ - -#include <asm/arch/omap3.h> - -/* - * omap3 cache code - */ - -.align 5 -.global invalidate_dcache -.global l2_cache_enable -.global l2_cache_disable -.global setup_auxcr - -/* - * invalidate_dcache() - * - * Invalidate the whole D-cache. - * - * Corrupted registers: r0-r5, r7, r9-r11 - * - * - mm - mm_struct describing address space - */ -invalidate_dcache: - stmfd r13!, {r0 - r5, r7, r9 - r12, r14} - - mov r7, r0 @ take a backup of device type - cmp r0, #0x3 @ check if the device type is - @ GP - moveq r12, #0x1 @ set up to invalide L2 -smi: .word 0x01600070 @ Call SMI monitor (smieq) - cmp r7, #0x3 @ compare again in case its - @ lost - beq finished_inval @ if GP device, inval done - @ above - - mrc p15, 1, r0, c0, c0, 1 @ read clidr - ands r3, r0, #0x7000000 @ extract loc from clidr - mov r3, r3, lsr #23 @ left align loc bit field - beq finished_inval @ if loc is 0, then no need to - @ clean - mov r10, #0 @ start clean at cache level 0 -inval_loop1: - add r2, r10, r10, lsr #1 @ work out 3x current cache - @ level - mov r1, r0, lsr r2 @ extract cache type bits from - @ clidr - and r1, r1, #7 @ mask of the bits for current - @ cache only - cmp r1, #2 @ see what cache we have at - @ this level - blt skip_inval @ skip if no cache, or just - @ i-cache - mcr p15, 2, r10, c0, c0, 0 @ select current cache level - @ in cssr - mov r2, #0 @ operand for mcr SBZ - mcr p15, 0, r2, c7, c5, 4 @ flush prefetch buffer to - @ sych the new cssr&csidr, - @ with armv7 this is 'isb', - @ but we compile with armv5 - mrc p15, 1, r1, c0, c0, 0 @ read the new csidr - and r2, r1, #7 @ extract the length of the - @ cache lines - add r2, r2, #4 @ add 4 (line length offset) - ldr r4, =0x3ff - ands r4, r4, r1, lsr #3 @ find maximum number on the - @ way size - clz r5, r4 @ find bit position of way - @ size increment - ldr r7, =0x7fff - ands r7, r7, r1, lsr #13 @ extract max number of the - @ index size -inval_loop2: - mov r9, r4 @ create working copy of max - @ way size -inval_loop3: - orr r11, r10, r9, lsl r5 @ factor way and cache number - @ into r11 - orr r11, r11, r7, lsl r2 @ factor index number into r11 - mcr p15, 0, r11, c7, c6, 2 @ invalidate by set/way - subs r9, r9, #1 @ decrement the way - bge inval_loop3 - subs r7, r7, #1 @ decrement the index - bge inval_loop2 -skip_inval: - add r10, r10, #2 @ increment cache number - cmp r3, r10 - bgt inval_loop1 -finished_inval: - mov r10, #0 @ swith back to cache level 0 - mcr p15, 2, r10, c0, c0, 0 @ select current cache level - @ in cssr - mcr p15, 0, r10, c7, c5, 4 @ flush prefetch buffer, - @ with armv7 this is 'isb', - @ but we compile with armv5 - - ldmfd r13!, {r0 - r5, r7, r9 - r12, pc} - -l2_cache_set: - stmfd r13!, {r4 - r6, lr} - mov r5, r0 - bl get_cpu_rev - mov r4, r0 - bl get_cpu_family - @ ES2 onwards we can disable/enable L2 ourselves - cmp r0, #CPU_OMAP34XX - cmpeq r4, #CPU_3XX_ES10 - mrc 15, 0, r0, cr1, cr0, 1 - bic r0, r0, #2 - orr r0, r0, r5, lsl #1 - mcreq 15, 0, r0, cr1, cr0, 1 - @ GP Device ROM code API usage here - @ r12 = AUXCR Write function and r0 value - mov ip, #3 - @ SMCNE instruction to call ROM Code API - .word 0x11600070 - ldmfd r13!, {r4 - r6, pc} - -l2_cache_enable: - mov r0, #1 - b l2_cache_set - -l2_cache_disable: - mov r0, #0 - b l2_cache_set - -/****************************************************************************** - * Routine: setup_auxcr() - * Description: Write to AuxCR desired value using SMI. - * general use. - *****************************************************************************/ -setup_auxcr: - mrc p15, 0, r0, c0, c0, 0 @ read main ID register - and r2, r0, #0x00f00000 @ variant - and r3, r0, #0x0000000f @ revision - orr r1, r3, r2, lsr #20-4 @ combine variant and revision - mov r12, #0x3 - mrc p15, 0, r0, c1, c0, 1 - orr r0, r0, #0x10 @ Enable ASA - @ Enable L1NEON on pre-r2p1 (erratum 621766 workaround) - cmp r1, #0x21 - orrlt r0, r0, #1 << 5 - .word 0xE1600070 @ SMC - mov r12, #0x2 - mrc p15, 1, r0, c9, c0, 2 - @ Set PLD_FWD bit in L2AUXCR on pre-r2p1 (erratum 725233 workaround) - cmp r1, #0x21 - orrlt r0, r0, #1 << 27 - .word 0xE1600070 @ SMC - bx lr - -.align 5 -.global v7_flush_dcache_all -.global v7_flush_cache_all - -/* - * v7_flush_dcache_all() - * - * Flush the whole D-cache. - * - * Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode) - * - * - mm - mm_struct describing address space - */ -v7_flush_dcache_all: -# dmb @ ensure ordering with previous memory accesses - mrc p15, 1, r0, c0, c0, 1 @ read clidr - ands r3, r0, #0x7000000 @ extract loc from clidr - mov r3, r3, lsr #23 @ left align loc bit field - beq finished @ if loc is 0, then no need to clean - mov r10, #0 @ start clean at cache level 0 -loop1: - add r2, r10, r10, lsr #1 @ work out 3x current cache level - mov r1, r0, lsr r2 @ extract cache type bits from clidr - and r1, r1, #7 @ mask of the bits for current cache only - cmp r1, #2 @ see what cache we have at this level - blt skip @ skip if no cache, or just i-cache - mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr - mcr p15, 0, r10, c7, c5, 4 @ flush prefetch buffer, - @ with armv7 this is 'isb', - @ but we compile with armv5 - mrc p15, 1, r1, c0, c0, 0 @ read the new csidr - and r2, r1, #7 @ extract the length of the cache lines - add r2, r2, #4 @ add 4 (line length offset) - ldr r4, =0x3ff - ands r4, r4, r1, lsr #3 @ find maximum number on the way size - clz r5, r4 @ find bit position of way size increment - ldr r7, =0x7fff - ands r7, r7, r1, lsr #13 @ extract max number of the index size -loop2: - mov r9, r4 @ create working copy of max way size -loop3: - orr r11, r10, r9, lsl r5 @ factor way and cache number into r11 - orr r11, r11, r7, lsl r2 @ factor index number into r11 - mcr p15, 0, r11, c7, c14, 2 @ clean & invalidate by set/way - subs r9, r9, #1 @ decrement the way - bge loop3 - subs r7, r7, #1 @ decrement the index - bge loop2 -skip: - add r10, r10, #2 @ increment cache number - cmp r3, r10 - bgt loop1 -finished: - mov r10, #0 @ swith back to cache level 0 - mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr -# dsb - mcr p15, 0, r10, c7, c5, 4 @ flush prefetch buffer, - @ with armv7 this is 'isb', - @ but we compile with armv5 - mov pc, lr - -/* - * v7_flush_cache_all() - * - * Flush the entire cache system. - * The data cache flush is now achieved using atomic clean / invalidates - * working outwards from L1 cache. This is done using Set/Way based cache - * maintainance instructions. - * The instruction cache can still be invalidated back to the point of - * unification in a single instruction. - * - */ -v7_flush_cache_all: - stmfd sp!, {r0-r7, r9-r11, lr} - bl v7_flush_dcache_all - mov r0, #0 - mcr p15, 0, r0, c7, c5, 0 @ I+BTB cache invalidate - ldmfd sp!, {r0-r7, r9-r11, lr} - mov pc, lr diff --git a/arch/arm/cpu/armv7/omap3/lowlevel_init.S b/arch/arm/cpu/armv7/omap3/lowlevel_init.S index 109481e..dfb515b 100644 --- a/arch/arm/cpu/armv7/omap3/lowlevel_init.S +++ b/arch/arm/cpu/armv7/omap3/lowlevel_init.S @@ -35,6 +35,38 @@ _TEXT_BASE: .word CONFIG_SYS_TEXT_BASE /* sdram load addr from config.mk */
+.global omap3_gp_romcode_call +omap3_gp_romcode_call: + PUSH {r4-r12, lr} @ Save all registers from ROM code! + MOV r12, r0 @ Copy the Service ID in R12 + MOV r0, r1 @ Copy parameter to R0 + mcr p15, 0, r0, c7, c10, 4 @ DSB + mcr p15, 0, r0, c7, c10, 5 @ DMB + .word 0xe1600070 @ SMC #0 to enter monitor - hand assembled + @ because we use -march=armv5 + POP {r4-r12, pc} + +/* + * Funtion for making PPA HAL API calls in secure devices + * Input: + * R0 - Service ID + * R1 - paramer list + */ +.global do_omap3_emu_romcode_call +do_omap3_emu_romcode_call: + PUSH {r4-r12, lr} @ Save all registers from ROM code! + MOV r12, r0 @ Copy the Secure Service ID in R12 + MOV r3, r1 @ Copy the pointer to va_list in R3 + MOV r1, #0 @ Process ID - 0 + MOV r2, #OMAP3_EMU_HAL_START_HAL_CRITICAL @ Copy the pointer + @ to va_list in R3 + MOV r6, #0xFF @ Indicate new Task call + mcr p15, 0, r0, c7, c10, 4 @ DSB + mcr p15, 0, r0, c7, c10, 5 @ DMB + .word 0xe1600071 @ SMC #1 to call PPA service - hand assembled + @ because we use -march=armv5 + POP {r4-r12, pc} + #if !defined(CONFIG_SYS_NAND_BOOT) && !defined(CONFIG_SYS_NAND_BOOT) /************************************************************************** * cpy_clk_code: relocates clock code into SRAM where its safer to execute diff --git a/arch/arm/include/asm/arch-omap3/omap3.h b/arch/arm/include/asm/arch-omap3/omap3.h index 3957c79..7b861a3 100644 --- a/arch/arm/include/asm/arch-omap3/omap3.h +++ b/arch/arm/include/asm/arch-omap3/omap3.h @@ -145,8 +145,14 @@ struct gpio { #define SRAM_VECT_CODE (SRAM_OFFSET0 | SRAM_OFFSET1 | \ SRAM_OFFSET2)
+#define OMAP3_PUBLIC_SRAM_BASE 0x40208000 /* Works for GP & EMU */ +#define OMAP3_PUBLIC_SRAM_END 0x40210000 + #define LOW_LEVEL_SRAM_STACK 0x4020FFFC
+/* scratch area - accessible on both EMU and GP */ +#define OMAP3_PUBLIC_SRAM_SCRATCH_AREA OMAP3_PUBLIC_SRAM_BASE + #define DEBUG_LED1 149 /* gpio */ #define DEBUG_LED2 150 /* gpio */
@@ -213,4 +219,18 @@ struct gpio {
#define OMAP3730 0x0c00
+/* + * ROM code API related flags + */ +#define OMAP3_GP_ROMCODE_API_L2_INVAL 1 +#define OMAP3_GP_ROMCODE_API_WRITE_ACR 3 + +/* + * EMU device PPA HAL related flags + */ +#define OMAP3_EMU_HAL_API_L2_INVAL 40 +#define OMAP3_EMU_HAL_API_WRITE_ACR 42 + +#define OMAP3_EMU_HAL_START_HAL_CRITICAL 4 + #endif diff --git a/arch/arm/include/asm/arch-omap3/sys_proto.h b/arch/arm/include/asm/arch-omap3/sys_proto.h index 4a28ba1..995e7cb 100644 --- a/arch/arm/include/asm/arch-omap3/sys_proto.h +++ b/arch/arm/include/asm/arch-omap3/sys_proto.h @@ -27,6 +27,11 @@ typedef struct { char *nand_string; } omap3_sysinfo;
+struct emu_hal_params { + u32 num_params; + u32 param1; +}; + void prcm_init(void); void per_clocks_enable(void);
@@ -53,9 +58,7 @@ u32 is_running_in_sdram(void); u32 is_running_in_sram(void); u32 is_running_in_flash(void); u32 get_device_type(void); -void l2cache_enable(void); void secureworld_exit(void); -void setup_auxcr(void); void try_unlock_memory(void); u32 get_boot_type(void); void invalidate_dcache(u32); @@ -66,5 +69,6 @@ void make_cs1_contiguous(void); void omap_nand_switch_ecc(int); void power_init_r(void); void dieid_num_r(void); - +void do_omap3_emu_romcode_call(u32 service_id, u32 parameters); +void omap3_gp_romcode_call(u32 service_id, u32 parameter); #endif

adapt s5pc1xx to the new layered cache maintenance framework
Signed-off-by: Aneesh V aneesh@ti.com --- arch/arm/cpu/armv7/s5pc1xx/cache.S | 88 ++----------------------- arch/arm/include/asm/arch-s5pc1xx/sys_proto.h | 3 - 2 files changed, 7 insertions(+), 84 deletions(-)
diff --git a/arch/arm/cpu/armv7/s5pc1xx/cache.S b/arch/arm/cpu/armv7/s5pc1xx/cache.S index 7734b32..726afe7 100644 --- a/arch/arm/cpu/armv7/s5pc1xx/cache.S +++ b/arch/arm/cpu/armv7/s5pc1xx/cache.S @@ -23,98 +23,24 @@ * MA 02111-1307 USA */
-#include <asm/arch/cpu.h> - .align 5 -.global invalidate_dcache -.global l2_cache_enable -.global l2_cache_disable - -/* - * invalidate_dcache() - * Invalidate the whole D-cache. - * - * Corrupted registers: r0-r5, r7, r9-r11 - */ -invalidate_dcache: - stmfd r13!, {r0 - r5, r7, r9 - r12, r14} - - cmp r0, #0xC100 @ check if the cpu is s5pc100
- beq finished_inval @ s5pc100 doesn't need this - @ routine - mrc p15, 1, r0, c0, c0, 1 @ read clidr - ands r3, r0, #0x7000000 @ extract loc from clidr - mov r3, r3, lsr #23 @ left align loc bit field - beq finished_inval @ if loc is 0, then no need to - @ clean - mov r10, #0 @ start clean at cache level 0 -inval_loop1: - add r2, r10, r10, lsr #1 @ work out 3x current cache - @ level - mov r1, r0, lsr r2 @ extract cache type bits from - @ clidr - and r1, r1, #7 @ mask of the bits for current - @ cache only - cmp r1, #2 @ see what cache we have at - @ this level - blt skip_inval @ skip if no cache, or just - @ i-cache - mcr p15, 2, r10, c0, c0, 0 @ select current cache level - @ in cssr - mov r2, #0 @ operand for mcr SBZ - mcr p15, 0, r2, c7, c5, 4 @ flush prefetch buffer to - @ sych the new cssr&csidr, - @ with armv7 this is 'isb', - @ but we compile with armv5 - mrc p15, 1, r1, c0, c0, 0 @ read the new csidr - and r2, r1, #7 @ extract the length of the - @ cache lines - add r2, r2, #4 @ add 4 (line length offset) - ldr r4, =0x3ff - ands r4, r4, r1, lsr #3 @ find maximum number on the - @ way size - clz r5, r4 @ find bit position of way - @ size increment - ldr r7, =0x7fff - ands r7, r7, r1, lsr #13 @ extract max number of the - @ index size -inval_loop2: - mov r9, r4 @ create working copy of max - @ way size -inval_loop3: - orr r11, r10, r9, lsl r5 @ factor way and cache number - @ into r11 - orr r11, r11, r7, lsl r2 @ factor index number into r11 - mcr p15, 0, r11, c7, c6, 2 @ invalidate by set/way - subs r9, r9, #1 @ decrement the way - bge inval_loop3 - subs r7, r7, #1 @ decrement the index - bge inval_loop2 -skip_inval: - add r10, r10, #2 @ increment cache number - cmp r3, r10 - bgt inval_loop1 -finished_inval: - mov r10, #0 @ swith back to cache level 0 - mcr p15, 2, r10, c0, c0, 0 @ select current cache level - @ in cssr - mcr p15, 0, r10, c7, c5, 4 @ flush prefetch buffer, - @ with armv7 this is 'isb', - @ but we compile with armv5 +#if !defined(CONFIG_SYS_NO_DCACHE) && !defined(CONFIG_SYS_NO_L2CACHE)
- ldmfd r13!, {r0 - r5, r7, r9 - r12, pc} - -l2_cache_enable: +.global v7_outer_cache_enable +v7_outer_cache_enable: push {r0, r1, r2, lr} mrc 15, 0, r3, cr1, cr0, 1 orr r3, r3, #2 mcr 15, 0, r3, cr1, cr0, 1 pop {r1, r2, r3, pc}
-l2_cache_disable: +.global v7_outer_cache_disable +v7_outer_cache_disable: push {r0, r1, r2, lr} mrc 15, 0, r3, cr1, cr0, 1 bic r3, r3, #2 mcr 15, 0, r3, cr1, cr0, 1 pop {r1, r2, r3, pc} + +#endif diff --git a/arch/arm/include/asm/arch-s5pc1xx/sys_proto.h b/arch/arm/include/asm/arch-s5pc1xx/sys_proto.h index 3078aaf..7b83c5a 100644 --- a/arch/arm/include/asm/arch-s5pc1xx/sys_proto.h +++ b/arch/arm/include/asm/arch-s5pc1xx/sys_proto.h @@ -25,8 +25,5 @@ #define _SYS_PROTO_H_
u32 get_device_type(void); -void invalidate_dcache(u32); -void l2_cache_disable(void); -void l2_cache_enable(void);
#endif

Hi,
Can I inquire please what has happened with this patch series? Will it be applied? I have done some basic testing here on Tegra2 and it appears to work.
Regards, Simon
On Tue, Mar 8, 2011 at 5:07 AM, Aneesh V aneesh@ti.com wrote:
With D-cache and MMU enabled for ARM in u-boot it becomes imperative to support a minimal set of cache maintenance operations and necessary initializations before enabling MMU.
This series of patches attempt to do the following for armv7:
- Necessary initialization sequence before enabling MMU that includes
invalidation of TLB, data caches, branch predictor array etc.
- Framework for supporting SOC specific outer caches in a generic manner
(using a structure of function pointers - inspired by the Linux implementation)
- Generic armv7 cache maintenance operations for caches known to the CPU
- Support for ARM PL310 L2 cache controller used in OMAP4
- Cleanup of the cleanup_before_linux() function
- Adapting all armv7 SOCs to use the new framework and removing
duplicated code
Testing:
- Extensive testing on OMAP4430SDP and OMAP3430SDP by creating coherency
issues and solving them using the maintenance routines - Eg: memfill a region of memory with a known pattern - Invalidate the region - Read back and compare the region with the original pattern - If match fails it means that invalidate is successful - Now add a flush call just before the invalidate - If match succeeds it means that flush was successful - Outer caches were tested with experiments involving making the function pointers NULL
- Kernel booting on OMAP4430SDP and OMAP3430SDP
Note: v2 has been tested only on OMAP4430SDP
v2:
- Pointer based callback mechanism for outer cache operations
changed to a weakly linked functions.
- Change -march=armv7-a back to armv5
- Moved utility macros out of armv7.h
- Added documentation for new CONFIG options.
- Changed implementation of log2n to not use CLZ instruction as armv4
doesn't support this instruction and newly added Tegra2 uses -march=armv4
- Blank line after local variable declarations - fixed globally
- Explicitly added an empty flush_cache() under
#ifdef CONFIG_SYS_NO_DCACHE
- Removed the print inside the weakly linked stub function -
__arm_init_before_mmu
- Fixed signature of flush_cache in cache.c
- More descriptive commit message for the PL310 support patch
- C struct for PL310 register accesses
- Fixed white space issues
Aneesh V (10): arm: make default implementation of cache_flush() weakly linked armv7: add miscellaneous utility macros armv7: cache maintenance operations for armv7 armv7: replace CONFIG_L2_OFF with CONFIG_SYS_NO_L2CACHE armv7: integrate cache maintenance support arm: minor fixes for cache and mmu handling armv7: add PL310 support to u-boot armv7: adapt omap4 to the new cache maintenance framework armv7: adapt omap3 to the new cache maintenance framework armv7: adapt s5pc1xx to the new cache maintenance framework
README | 11 + arch/arm/cpu/armv7/Makefile | 2 +- arch/arm/cpu/armv7/cache_v7.c | 390 +++++++++++++++++++++++++ arch/arm/cpu/armv7/cpu.c | 51 ++-- arch/arm/cpu/armv7/omap3/Makefile | 1 - arch/arm/cpu/armv7/omap3/board.c | 138 ++++++++-- arch/arm/cpu/armv7/omap3/cache.S | 263 ----------------- arch/arm/cpu/armv7/omap3/lowlevel_init.S | 32 ++ arch/arm/cpu/armv7/omap4/lowlevel_init.S | 18 ++ arch/arm/cpu/armv7/s5pc1xx/cache.S | 88 +----- arch/arm/cpu/armv7/start.S | 18 +- arch/arm/include/asm/arch-omap3/omap3.h | 20 ++ arch/arm/include/asm/arch-omap3/sys_proto.h | 10 +- arch/arm/include/asm/arch-omap4/sys_proto.h | 1 - arch/arm/include/asm/arch-s5pc1xx/sys_proto.h | 3 - arch/arm/include/asm/armv7.h | 68 +++++ arch/arm/include/asm/pl310.h | 74 +++++ arch/arm/include/asm/utils.h | 80 +++++ arch/arm/lib/Makefile | 1 + arch/arm/lib/board.c | 6 + arch/arm/lib/cache-cp15.c | 16 +- arch/arm/lib/cache-pl310.c | 116 ++++++++ arch/arm/lib/cache.c | 20 +- include/common.h | 5 +- include/configs/ca9x4_ct_vxp.h | 2 +- include/configs/efikamx.h | 2 +- include/configs/mx51evk.h | 2 +- include/configs/mx53evk.h | 2 +- include/configs/omap4_panda.h | 8 +- include/configs/omap4_sdp4430.h | 8 +- include/configs/s5pc210_universal.h | 2 +- include/configs/tegra2-common.h | 2 +- include/configs/vision2.h | 2 +- 33 files changed, 1037 insertions(+), 425 deletions(-) create mode 100644 arch/arm/cpu/armv7/cache_v7.c delete mode 100644 arch/arm/cpu/armv7/omap3/cache.S create mode 100644 arch/arm/include/asm/armv7.h create mode 100644 arch/arm/include/asm/pl310.h create mode 100644 arch/arm/include/asm/utils.h create mode 100644 arch/arm/lib/cache-pl310.c
U-Boot mailing list U-Boot@lists.denx.de http://lists.denx.de/mailman/listinfo/u-boot

On Tue, Mar 8, 2011 at 5:07 AM, Aneesh V aneesh@ti.com wrote:
With D-cache and MMU enabled for ARM in u-boot it becomes imperative to support a minimal set of cache maintenance operations and necessary initializations before enabling MMU.
This series of patches attempt to do the following for armv7:
- Necessary initialization sequence before enabling MMU that includes
invalidation of TLB, data caches, branch predictor array etc.
- Framework for supporting SOC specific outer caches in a generic manner
(using a structure of function pointers - inspired by the Linux implementation)
- Generic armv7 cache maintenance operations for caches known to the CPU
- Support for ARM PL310 L2 cache controller used in OMAP4
- Cleanup of the cleanup_before_linux() function
- Adapting all armv7 SOCs to use the new framework and removing
duplicated code
Any thoughts on this? What is the status please? Will this patch be applied, or if not, what is left to be done?
Regards, Simon

Albert, Wolfgang,
Do you plan to take this series or do you expect some more changes?
best regards, Aneesh
On Thursday 05 May 2011 10:18 AM, Simon Glass wrote:
On Tue, Mar 8, 2011 at 5:07 AM, Aneesh V <aneesh@ti.com <mailto:aneesh@ti.com>> wrote: With D-cache and MMU enabled for ARM in u-boot it becomes imperative to support a minimal set of cache maintenance operations and necessary initializations before enabling MMU. This series of patches attempt to do the following for armv7: * Necessary initialization sequence before enabling MMU that includes invalidation of TLB, data caches, branch predictor array etc. * Framework for supporting SOC specific outer caches in a generic manner (using a structure of function pointers - inspired by the Linux implementation) * Generic armv7 cache maintenance operations for caches known to the CPU * Support for ARM PL310 L2 cache controller used in OMAP4 * Cleanup of the cleanup_before_linux() function * Adapting all armv7 SOCs to use the new framework and removing duplicated code
Any thoughts on this? What is the status please? Will this patch be applied, or if not, what is left to be done?
Regards, Simon

With D-cache and MMU enabled for ARM in u-boot it becomes imperative to support a minimal set of cache maintenance operations and necessary initializations before enabling MMU.
This series of patches attempt to do the following for armv7: * Necessary initialization sequence before enabling MMU that includes invalidation of TLB, data caches, branch predictor array etc. * Framework for supporting SOC specific outer caches in a generic manner (using a structure of function pointers - inspired by the Linux implementation) * Generic armv7 cache maintenance operations for caches known to the CPU * Support for ARM PL310 L2 cache controller used in OMAP4 * Cleanup of the cleanup_before_linux() function * Adapting all armv7 SOCs to use the new framework and removing duplicated code
Testing: * Extensive testing on OMAP4430SDP and OMAP3430SDP by creating coherency issues and solving them using the maintenance routines - Eg: memfill a region of memory with a known pattern - Invalidate the region - Read back and compare the region with the original pattern - If match fails it means that invalidate is successful - Now add a flush call just before the invalidate - If match succeeds it means that flush was successful - Outer caches were tested with experiments involving making the function pointers NULL * Kernel booting on OMAP4430SDP and OMAP3430SDP Note: v2 has been tested only on OMAP4430SDP
V2: * Pointer based callback mechanism for outer cache operations changed to a weakly linked functions. * Change -march=armv7-a back to armv5 * Moved utility macros out of armv7.h * Added documentation for new CONFIG options. * Changed implementation of log2n to not use CLZ instruction as armv4 doesn't support this instruction and newly added Tegra2 uses -march=armv4 * Blank line after local variable declarations - fixed globally * Explicitly added an empty flush_cache() under #ifdef CONFIG_SYS_NO_DCACHE * Removed the print inside the weakly linked stub function - __arm_init_before_mmu * Fixed signature of flush_cache in cache.c * More descriptive commit message for the PL310 support patch * C struct for PL310 register accesses * Fixed white space issues
V3: * Rebased to latest HEAD of master * Added comments on changes done in V2 in individual patch headers. This was missing in V2
Aneesh V (10): arm: make default implementation of cache_flush() weakly linked armv7: add miscellaneous utility macros armv7: cache maintenance operations for armv7 armv7: replace CONFIG_L2_OFF with CONFIG_SYS_NO_L2CACHE armv7: integrate cache maintenance support arm: minor fixes for cache and mmu handling armv7: add PL310 support to u-boot armv7: adapt omap4 to the new cache maintenance framework armv7: adapt omap3 to the new cache maintenance framework armv7: adapt s5pc1xx to the new cache maintenance framework
README | 11 + arch/arm/cpu/armv7/Makefile | 2 +- arch/arm/cpu/armv7/cache_v7.c | 390 +++++++++++++++++++++++++ arch/arm/cpu/armv7/cpu.c | 51 ++-- arch/arm/cpu/armv7/omap3/Makefile | 1 - arch/arm/cpu/armv7/omap3/board.c | 138 ++++++++-- arch/arm/cpu/armv7/omap3/cache.S | 263 ----------------- arch/arm/cpu/armv7/omap3/lowlevel_init.S | 32 ++ arch/arm/cpu/armv7/omap4/lowlevel_init.S | 18 ++ arch/arm/cpu/armv7/s5pc1xx/cache.S | 88 +----- arch/arm/cpu/armv7/start.S | 18 +- arch/arm/include/asm/arch-omap3/omap3.h | 20 ++ arch/arm/include/asm/arch-omap3/sys_proto.h | 10 +- arch/arm/include/asm/arch-omap4/sys_proto.h | 1 - arch/arm/include/asm/arch-s5pc1xx/sys_proto.h | 3 - arch/arm/include/asm/armv7.h | 68 +++++ arch/arm/include/asm/pl310.h | 74 +++++ arch/arm/include/asm/utils.h | 80 +++++ arch/arm/lib/Makefile | 1 + arch/arm/lib/board.c | 6 + arch/arm/lib/cache-cp15.c | 16 +- arch/arm/lib/cache-pl310.c | 116 ++++++++ arch/arm/lib/cache.c | 20 +- include/common.h | 5 +- include/configs/ca9x4_ct_vxp.h | 2 +- include/configs/efikamx.h | 2 +- include/configs/mx51evk.h | 2 +- include/configs/mx53evk.h | 2 +- include/configs/omap4_panda.h | 8 +- include/configs/omap4_sdp4430.h | 8 +- include/configs/s5pc210_universal.h | 2 +- include/configs/tegra2-common.h | 2 +- include/configs/vision2.h | 2 +- 33 files changed, 1037 insertions(+), 425 deletions(-) create mode 100644 arch/arm/cpu/armv7/cache_v7.c delete mode 100644 arch/arm/cpu/armv7/omap3/cache.S create mode 100644 arch/arm/include/asm/armv7.h create mode 100644 arch/arm/include/asm/pl310.h create mode 100644 arch/arm/include/asm/utils.h create mode 100644 arch/arm/lib/cache-pl310.c

make default implementation of cache_flush() weakly linked so that sub-architectures can override it
Signed-off-by: Aneesh V aneesh@ti.com --- arch/arm/lib/cache.c | 4 +++- 1 files changed, 3 insertions(+), 1 deletions(-)
diff --git a/arch/arm/lib/cache.c b/arch/arm/lib/cache.c index 30686fe..27123cd 100644 --- a/arch/arm/lib/cache.c +++ b/arch/arm/lib/cache.c @@ -25,7 +25,7 @@
#include <common.h>
-void flush_cache (unsigned long dummy1, unsigned long dummy2) +void __flush_cache(unsigned long start, unsigned long size) { #if defined(CONFIG_OMAP2420) || defined(CONFIG_ARM1136) void arm1136_cache_flush(void); @@ -45,3 +45,5 @@ void flush_cache (unsigned long dummy1, unsigned long dummy2) #endif return; } +void flush_cache(unsigned long start, unsigned long size) + __attribute__((weak, alias("__flush_cache")));

add utility macros for: * bit field operations * log2n functions
Signed-off-by: Aneesh V aneesh@ti.com --- V2: * Newly added in v2 --- arch/arm/include/asm/utils.h | 80 ++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 80 insertions(+), 0 deletions(-) create mode 100644 arch/arm/include/asm/utils.h
diff --git a/arch/arm/include/asm/utils.h b/arch/arm/include/asm/utils.h new file mode 100644 index 0000000..d581539 --- /dev/null +++ b/arch/arm/include/asm/utils.h @@ -0,0 +1,80 @@ +/* + * (C) Copyright 2010 + * Texas Instruments, <www.ti.com> + * + * Aneesh V aneesh@ti.com + * + * See file CREDITS for list of people who contributed to this + * project. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + */ +#ifndef _UTILS_H_ +#define _UTILS_H_ + +/* extract a bit field from a bit vector */ +#define get_bit_field(nr, start, mask)\ + (((nr) & (mask)) >> (start)) + +/* Set a field in a bit vector */ +#define set_bit_field(nr, start, mask, val)\ + do { \ + (nr) = ((nr) & ~(mask)) | (((val) << (start)) & (mask));\ + } while (0); + +/* + * Utility macro for read-modify-write of a hardware register + * addr - address of the register + * shift - starting bit position of the field to be modified + * msk - mask for the field + * val - value to be shifted masked and written to the field + */ +#define modify_reg_32(addr, shift, msk, val) \ + do {\ + writel(((readl(addr) & ~(msk))|(((val) << (shift)) & (msk))),\ + (addr));\ + } while (0); + +static inline s32 log_2_n_round_up(u32 n) +{ + s32 log2n = -1; + u32 temp = n; + + while (temp) { + log2n++; + temp >>= 1; + } + + if (n & (n - 1)) + return log2n + 1; /* not power of 2 - round up */ + else + return log2n; /* power of 2 */ +} + +static inline s32 log_2_n_round_down(u32 n) +{ + s32 log2n = -1; + u32 temp = n; + + while (temp) { + log2n++; + temp >>= 1; + } + + return log2n; +} + +#endif /* _OMAP_COMMON_H_ */

Dear Aneesh V,
In message 1305202276-27784-3-git-send-email-aneesh@ti.com you wrote:
add utility macros for:
- bit field operations
- log2n functions
...
+/* extract a bit field from a bit vector */ +#define get_bit_field(nr, start, mask)\
- (((nr) & (mask)) >> (start))
+/* Set a field in a bit vector */ +#define set_bit_field(nr, start, mask, val)\
- do { \
(nr) = ((nr) & ~(mask)) | (((val) << (start)) & (mask));\
- } while (0);
I really dislike such "useful" helpers, because they make the code unreadable. Being nonstandrd, nbody knows what they are doing, so you always will have to look up the implementation before you can continue reading the code. It's a waste of time an resources.
+/*
- Utility macro for read-modify-write of a hardware register
- addr - address of the register
- shift - starting bit position of the field to be modified
- msk - mask for the field
- val - value to be shifted masked and written to the field
- */
+#define modify_reg_32(addr, shift, msk, val) \
- do {\
writel(((readl(addr) & ~(msk))|(((val) << (shift)) & (msk))),\
(addr));\
- } while (0);
NAK again, for the same reasons.
Note that there are some semi-standardized I/O accessro macros available, at least for some architectures, like clrbits.*(), setbits_(), or clrsetbits().
See for example "arch/arm/include/asm/io.h", "arch/powerpc/include/asm/io.h" for reference.
Instead of reinventing the wheel (just differently shaped) we should rather try and use a single, standardized set of such helpers.
So please use the existing macros instead of inventing new, non-standard ones.
Best regards,
Wolfgang Denk

On Sun, May 15, 2011 at 11:44 AM, Wolfgang Denk wd@denx.de wrote:
Dear Aneesh V,
In message 1305202276-27784-3-git-send-email-aneesh@ti.com you wrote:
add utility macros for:
- bit field operations
- log2n functions
...
+/* extract a bit field from a bit vector */ +#define get_bit_field(nr, start, mask)\
(((nr) & (mask)) >> (start))
+/* Set a field in a bit vector */ +#define set_bit_field(nr, start, mask, val)\
do { \
(nr) = ((nr) & ~(mask)) | (((val) << (start)) & (mask));\
} while (0);
I really dislike such "useful" helpers, because they make the code unreadable. Being nonstandrd, nbody knows what they are doing, so you always will have to look up the implementation before you can continue reading the code. It's a waste of time an resources.
Hi Wolfgang,
Please consider my thoughts below on this subject.
It would be good to enhance these helpers to address the bitfield disaster which is a modern SOC.
I agree that having standard helpers is useful and in fact I don't believe the existing clrbits and setbits go far enough. I have noticed that various architectures have their own macros for handling bitfields. Since these are all different (thus you need to read each one to understand the code, as you say) and pan-U-Boot solutions appear to be rejected, we are stuck with bloated, error-prone code full of shifts and masks.
I believe that this problem is getting worse - e.g. USB on Tegra2 writes various fields of about 20 registers to get things up and running. I find translating SOC datasheet register definitions into C code with shifts and masks to be slow and error-prone work. Also we do need to maintain this code, and it gets reused for new SOC variants, etc. So it is not as if it is written once and then buried and forgotten. There is also a tendency to use 'magic' constants rather than #define values or something with a sensible name, then hopefully add a half-hearted comment. This requires constant return looks at the datasheet to see what bits were chosen.
Being a boot loader, charged with basic hardware initialisation, I believe bitfield access primitives should be well-supported by U-Boot.
Would you consider an RFC patch to add pan-U-Boot bitfield operations? Failing that, how about just for ARM?
Regards, Simon
+/*
- Utility macro for read-modify-write of a hardware register
- addr - address of the register
- shift - starting bit position of the field to be modified
- msk - mask for the field
- val - value to be shifted masked and written to the field
- */
+#define modify_reg_32(addr, shift, msk, val) \
do {\
writel(((readl(addr) & ~(msk))|(((val) << (shift)) &
(msk))),\
(addr));\
} while (0);
NAK again, for the same reasons.
Note that there are some semi-standardized I/O accessro macros available, at least for some architectures, like clrbits.*(), setbits_(), or clrsetbits().
See for example "arch/arm/include/asm/io.h", "arch/powerpc/include/asm/io.h" for reference.
Instead of reinventing the wheel (just differently shaped) we should rather try and use a single, standardized set of such helpers.
So please use the existing macros instead of inventing new, non-standard ones.
Best regards,
Wolfgang Denk
-- DENX Software Engineering GmbH, MD: Wolfgang Denk & Detlev Zundel HRB 165235 Munich, Office: Kirchenstr.5, D-82194 Groebenzell, Germany Phone: (+49)-8142-66989-10 Fax: (+49)-8142-66989-80 Email: wd@denx.de No journaling file system can recover your data if the disk dies. - Steve Rago in D4Cw1p.L9E@plc.com _______________________________________________ U-Boot mailing list U-Boot@lists.denx.de http://lists.denx.de/mailman/listinfo/u-boot

On Sun, May 15, 2011 at 03:15:46PM -0700, Simon Glass wrote:
I believe that this problem is getting worse - e.g. USB on Tegra2 writes various fields of about 20 registers to get things up and running. I find translating SOC datasheet register definitions into C code with shifts and masks to be slow and error-prone work. Also we do need to maintain this code, and it gets reused for new SOC variants, etc. So it is not as if it is written once and then buried and forgotten. There is also a tendency to use 'magic' constants rather than #define values or something with a sensible name, then hopefully add a half-hearted comment. This requires constant return looks at the datasheet to see what bits were chosen.
I have nothing against helper functions like this (although I think they should be inline functions, not macros, to guard against bugs due to side-effects in arguments).
But wouldn't it be even better to define the registers as C structures containing bitfields, so the C compiler can do the error-prone shifting and masking? Well-commented structure definitions for these registers also eliminate the need to refer back to the datasheet when working on the code.

On Sun, May 15, 2011 at 7:23 PM, Eric Cooper ecc@cmu.edu wrote:
On Sun, May 15, 2011 at 03:15:46PM -0700, Simon Glass wrote:
I believe that this problem is getting worse - e.g. USB on Tegra2 writes various fields of about 20 registers to get things up and running. I find translating SOC datasheet register definitions into C code with shifts
and
masks to be slow and error-prone work. Also we do need to maintain this code, and it gets reused for new SOC variants, etc. So it is not as if it
is
written once and then buried and forgotten. There is also a tendency to
use
'magic' constants rather than #define values or something with a sensible name, then hopefully add a half-hearted comment. This requires constant return looks at the datasheet to see what bits were chosen.
I have nothing against helper functions like this (although I think they should be inline functions, not macros, to guard against bugs due to side-effects in arguments).
This does limits the functionality unfortunately. With macros it is possible to (for example) derive a shift and mask from a single base token.
But wouldn't it be even better to define the registers as C structures containing bitfields, so the C compiler can do the error-prone shifting and masking? Well-commented structure definitions for these registers also eliminate the need to refer back to the datasheet when working on the code.
I believe the problem here is the read/modify/write behaviour which is not well-defined in C. One ends up using volatile to ensure the right behaviour, and then this doesn't necessarily work either. Where one write must happen before the next read the compiler must have barriers to indicate this, which would then have to be inserted manually (rather than just in the writel() macro). The opposite problem is that if you update several fields at once the compiler may or may not optimise these into a single access, and this is not under programmer control. So writel() and readl() are preferred.
Perhaps in newer compilers bitfield semantics have been improved?
-- Eric Cooper e c c @ c m u . e d u _______________________________________________ U-Boot mailing list U-Boot@lists.denx.de http://lists.denx.de/mailman/listinfo/u-boot

Dear Simon Glass,
In message BANLkTin6o4OEp7aBfCsxpt=z5oZToxVE2g@mail.gmail.com you wrote:
I believe the problem here is the read/modify/write behaviour which is not well-defined in C. One ends up using volatile to ensure the right behaviour, and then this doesn't necessarily work either. Where one write must happen
volatile does not help anything here. You MUST always make sure to use proper I/O accessors. Even in ARM code this has become mandatory by now, even if older ARM cores and compilers have been forgiving enough for a long time.
Perhaps in newer compilers bitfield semantics have been improved?
Bitfields have always been a mess, and should be avoided like hell.
Best regards,
Wolfgang Denk

Dear Simon Glass,
In message BANLkTi=ZL-WHPE9mXDgBZzXoEaZ10pBhsg@mail.gmail.com you wrote:
Being a boot loader, charged with basic hardware initialisation, I believe bitfield access primitives should be well-supported by U-Boot.
I agree, and they are.
Would you consider an RFC patch to add pan-U-Boot bitfield operations? Failing that, how about just for ARM?
I fail to understand what you mean. We already have such implementations in place. I just oobject against adding new, non-standard ones when the existing ones perform the same purpose.
Best regards,
Wolfgang Denk

On Sun, May 15, 2011 at 10:51 PM, Wolfgang Denk wd@denx.de wrote:
Dear Simon Glass,
In message BANLkTi=ZL-WHPE9mXDgBZzXoEaZ10pBhsg@mail.gmail.com you wrote:
Being a boot loader, charged with basic hardware initialisation, I believe bitfield access primitives should be well-supported by U-Boot.
I agree, and they are.
Hi Wolfgang,
Well there is something there, but we can always do better.
Would you consider an RFC patch to add pan-U-Boot bitfield operations? Failing that, how about just for ARM?
I fail to understand what you mean. Â We already have such implementations in place. Â I just oobject against adding new, non-standard ones when the existing ones perform the same purpose.
There are a few very primitive macros in setbits and clrbits. I would very much like to see at least:
- define a field once in a header in an easy format along with any enums which define allowable values if applicable - pack and unpack a value into a field: so set a bitfield to 13, for example - obtain a mask for a field (i.e. with all bits set) - equivalent of writel and readl for a bitfield (writel in fact being read/modify/write so perhaps a different name)
Some of these exist, some don't.
Any thoughts? I we get something in there I suspect ARM people will use it at least, and that will reduce the number of ways this gets done. At present the local bitfield macros are not accessible to even other SOCs, let along other architectures. As this thread shows (and some of the code I have been reviewing recently) there is a demand for something more functional.
Regards, Simon
Best regards,
Wolfgang Denk
-- DENX Software Engineering GmbH, Â Â MD: Wolfgang Denk & Detlev Zundel HRB 165235 Munich, Office: Kirchenstr.5, D-82194 Groebenzell, Germany Phone: (+49)-8142-66989-10 Fax: (+49)-8142-66989-80 Email: wd@denx.de I think it's a new feature. Don't tell anyone it was an accident. :-) Â -- Larry Wall on s/foo/bar/eieio in 10911@jpl-devvax.JPL.NASA.GOV

Dear Simon Glass,
In message BANLkTine7BFSYOusgxwg0rg3owNbPN_dOQ@mail.gmail.com you wrote:
There are a few very primitive macros in setbits and clrbits. I would very much like to see at least:
Maybe we can agree to use these existing macros then instead of inventing new ones with the same functionality.
- define a field once in a header in an easy format along with any
enums which define allowable values if applicable
- pack and unpack a value into a field: so set a bitfield to 13, for exampl=
e
- obtain a mask for a field (i.e. with all bits set)
- equivalent of writel and readl for a bitfield (writel in fact being
read/modify/write so perhaps a different name)
Some of these exist, some don't.
Do you know of examples of such more complex definitions in the Linux kernel code?
Any thoughts? I we get something in there I suspect ARM people will use it at least, and that will reduce the number of ways this gets
I doubt that. You can see that even the existing macros are unknown to many, and they also do not bother to check around or asdk. It's always so much easier to invent your own new shiny macro that fits your current purpose perfectly.
done. At present the local bitfield macros are not accessible to even other SOCs, let along other architectures. As this thread shows (and some of the code I have been reviewing recently) there is a demand for something more functional.
How is this problem being addressed in Linux? At the very minimum we should chose a compatible implementation.
Best regards,
Wolfgang Denk

Hi Wolfgang,
On Tuesday 17 May 2011 10:57 AM, Wolfgang Denk wrote:
Dear Simon Glass,
In messageBANLkTine7BFSYOusgxwg0rg3owNbPN_dOQ@mail.gmail.com you wrote:
There are a few very primitive macros in setbits and clrbits. I would very much like to see at least:
Maybe we can agree to use these existing macros then instead of inventing new ones with the same functionality.
- define a field once in a header in an easy format along with any
enums which define allowable values if applicable
- pack and unpack a value into a field: so set a bitfield to 13, for exampl=
e
- obtain a mask for a field (i.e. with all bits set)
- equivalent of writel and readl for a bitfield (writel in fact being
read/modify/write so perhaps a different name)
Some of these exist, some don't.
Do you know of examples of such more complex definitions in the Linux kernel code?
In fact I had searched for a macro for similar needs as my set_bit_field() is addressing in Linux Kernel too but didn't find any.
Please note that my requirement is not for doing IO but doing bit field operations on a C integer variable: 1. Prepare a variable field by field to be finally written to a register using writel() 2. Extract a field from a variable that typically holds the value of a register after a readl()
I couldn't find any utility macros/functions for doing something like this.
Please somebody correct me if I am wrong.
best regards, Aneesh

Dear Aneesh V,
In message 4DD23561.70105@ti.com you wrote:
In fact I had searched for a macro for similar needs as my set_bit_field() is addressing in Linux Kernel too but didn't find any.
...
I couldn't find any utility macros/functions for doing something like this.
Please somebody correct me if I am wrong.
I am not aware of such a functionality in the Linux kernel, either.
A quick search did not even show any attempts to introduce such macros to Linux. It appears they don't need such a feature ?
But I may easily be overlooking something.
Best regards,
Wolfgang Denk

Hi Wolfgang,
On Tue, May 17, 2011 at 2:57 PM, Wolfgang Denk wd@denx.de wrote:
Dear Aneesh V,
In message 4DD23561.70105@ti.com you wrote:
In fact I had searched for a macro for similar needs as my set_bit_field() is addressing in Linux Kernel too but didn't find any.
...
I couldn't find any utility macros/functions for doing something like this.
Please somebody correct me if I am wrong.
I am not aware of such a functionality in the Linux kernel, either.
A quick search did not even show any attempts to introduce such macros to Linux. Â It appears they don't need such a feature ?
Wolfgang, what's your final take on this. Can I continue to use the utility macros given that I don't have any 'standard' alternative. For register accesses I shall move to setclrbits*() macros that you had suggested. But for bit operations I do not have any alternatives.
best regards, Aneesh

On Tue, May 31, 2011 at 12:54 AM, V, Aneesh aneesh@ti.com wrote:
Hi Wolfgang,
On Tue, May 17, 2011 at 2:57 PM, Wolfgang Denk wd@denx.de wrote:
Dear Aneesh V,
In message 4DD23561.70105@ti.com you wrote:
In fact I had searched for a macro for similar needs as my set_bit_field() is addressing in Linux Kernel too but didn't find any.
...
I couldn't find any utility macros/functions for doing something like this.
Please somebody correct me if I am wrong.
I am not aware of such a functionality in the Linux kernel, either.
A quick search did not even show any attempts to introduce such macros to Linux. Â It appears they don't need such a feature ?
Wolfgang, what's your final take on this. Can I continue to use the utility macros given that I don't have any 'standard' alternative. For register accesses I shall move to setclrbits*() macros that you had suggested. But for bit operations I do not have any alternatives.
best regards, Aneesh
Hi Aneesh,
If it helps, I will email a cleaned up bitfield patch to the list tomorrow. I hoping that it will get a warm reception :-)
Regards, Simon

Hi Simon,
On Wednesday 01 June 2011 07:43 AM, Simon Glass wrote:
On Tue, May 31, 2011 at 12:54 AM, V, Aneeshaneesh@ti.com wrote:
Hi Wolfgang,
On Tue, May 17, 2011 at 2:57 PM, Wolfgang Denkwd@denx.de wrote:
Dear Aneesh V,
In message4DD23561.70105@ti.com you wrote:
In fact I had searched for a macro for similar needs as my set_bit_field() is addressing in Linux Kernel too but didn't find any.
...
I couldn't find any utility macros/functions for doing something like this.
Please somebody correct me if I am wrong.
I am not aware of such a functionality in the Linux kernel, either.
A quick search did not even show any attempts to introduce such macros to Linux. It appears they don't need such a feature ?
Wolfgang, what's your final take on this. Can I continue to use the utility macros given that I don't have any 'standard' alternative. For register accesses I shall move to setclrbits*() macros that you had suggested. But for bit operations I do not have any alternatives.
best regards, Aneesh
Hi Aneesh,
If it helps, I will email a cleaned up bitfield patch to the list tomorrow. I hoping that it will get a warm reception :-)
Thanks. That's great news at least for me.
best regards, Aneesh

On Monday 16 May 2011 12:14 AM, Wolfgang Denk wrote:
Dear Aneesh V,
In message1305202276-27784-3-git-send-email-aneesh@ti.com you wrote:
add utility macros for:
- bit field operations
- log2n functions
...
+/* extract a bit field from a bit vector */ +#define get_bit_field(nr, start, mask)\
- (((nr)& (mask))>> (start))
+/* Set a field in a bit vector */ +#define set_bit_field(nr, start, mask, val)\
- do { \
(nr) = ((nr)& ~(mask)) | (((val)<< (start))& (mask));\
- } while (0);
I really dislike such "useful" helpers, because they make the code unreadable. Being nonstandrd, nbody knows what they are doing, so you always will have to look up the implementation before you can continue reading the code. It's a waste of time an resources.
I will be very happy to use a standard one if one exists. I checked in bitops.h but couldn't find something that's equivalent to the above. Can you point me to a standard one that does something equivalent.
Yes, you may have to look-up the implementation, but maybe just once. That goes with any API, right?
On the other hand, doing shifting ORing, ANDing etc directly in the code is less readable in my opinion.
+/*
- Utility macro for read-modify-write of a hardware register
- addr - address of the register
- shift - starting bit position of the field to be modified
- msk - mask for the field
- val - value to be shifted masked and written to the field
- */
+#define modify_reg_32(addr, shift, msk, val) \
- do {\
writel(((readl(addr)& ~(msk))|(((val)<< (shift))& (msk))),\
(addr));\
- } while (0);
NAK again, for the same reasons.
Note that there are some semi-standardized I/O accessro macros available, at least for some architectures, like clrbits.*(), setbits_(), or clrsetbits().
See for example "arch/arm/include/asm/io.h", "arch/powerpc/include/asm/io.h" for reference.
Instead of reinventing the wheel (just differently shaped) we should rather try and use a single, standardized set of such helpers.
So please use the existing macros instead of inventing new, non-standard ones.
clrsetbits will work for this need albeit not as clean as the above one. I will use that.
Best regards,
Wolfgang Denk

Dear Wolfgang,
On Monday 16 May 2011 08:37 PM, Aneesh V wrote:
On Monday 16 May 2011 12:14 AM, Wolfgang Denk wrote:
Dear Aneesh V,
In message1305202276-27784-3-git-send-email-aneesh@ti.com you wrote:
add utility macros for:
- bit field operations
- log2n functions
...
+/* extract a bit field from a bit vector */ +#define get_bit_field(nr, start, mask)\
- (((nr)& (mask))>> (start))
+/* Set a field in a bit vector */ +#define set_bit_field(nr, start, mask, val)\
- do { \
- (nr) = ((nr)& ~(mask)) | (((val)<< (start))& (mask));\
- } while (0);
I really dislike such "useful" helpers, because they make the code unreadable. Being nonstandrd, nbody knows what they are doing, so you always will have to look up the implementation before you can continue reading the code. It's a waste of time an resources.
What if I change the above to something like this(please note there isn't any Linux, U-Boot substitute for this):
+/* extract a bit field from a bit vector */ +#define get_bit_field(nr, field)\ + (((nr) & (field##_MASK)) >> (field##_OFFSET)) + +/* Set a field in a bit vector */ +#define set_bit_field(nr, field, val)\ + do { \ + (nr) = ((nr) & ~(field##_MASK)) | (((val) << (field##_OFFSET))\ + & (field##_MASK));\ + } while (0); +
And use it like this: assoc = get_bit_field(ccsidr, CCSIDR_ASSOCIATIVITY); set_bit_field(ccsidr, CCSIDR_ASSOCIATIVITY, assoc + 1);
Isn't it more intuitive and almost self-explanatory now.
If you still don't like these as standard generic macros, how about having these macros just for OMAP with these names and use them only for OMAP code?
omap_get_bit_field(nr, field) omap_set_bit_field(nr, field, val)
I can live without them for the ARM generic code but I use it extensively in my SPL series.
Please note that Simon's recent work for bitfield operations doesn't help me because the field definitions available to me are in shift/mask format and not in the range format that he uses. I will not be able to now generate the range format for the hundreds of registers I use.
best regards, Aneesh

Dear Aneesh V,
In message 4DECF8DA.9030806@ti.com you wrote:
I really dislike such "useful" helpers, because they make the code unreadable. Being nonstandrd, nbody knows what they are doing, so you always will have to look up the implementation before you can continue reading the code. It's a waste of time an resources.
What if I change the above to something like this(please note there isn't any Linux, U-Boot substitute for this):
+/* extract a bit field from a bit vector */ +#define get_bit_field(nr, field)\
- (((nr) & (field##_MASK)) >> (field##_OFFSET))
+/* Set a field in a bit vector */ +#define set_bit_field(nr, field, val)\
- do { \
(nr) = ((nr) & ~(field##_MASK)) | (((val) << (field##_OFFSET))\
& (field##_MASK));\
- } while (0);
And use it like this: assoc = get_bit_field(ccsidr, CCSIDR_ASSOCIATIVITY); set_bit_field(ccsidr, CCSIDR_ASSOCIATIVITY, assoc + 1);
Isn't it more intuitive and almost self-explanatory now.
To me it is definitely NOT self-explanatory.
Actually I cannot even understand the argument names, now how it's supposed to be used. I would interpret "nr" as "number"; in the context of a bit field probably a bit number? Wrong guess...
It's highly cryptic and will only work with very special #defines providing definitions of foo_MASK and foo_OFFSET.
It is not clear that you can use this on plain simple memory stored variables only, and that you must not use this on any device registers; yet your example looks as if this were intended to be used on registers - but then we would need proper I/O accessors.
And there are still many cases where you cannot use this because for example evaluating several bits from the same object will cause repeated accesses, which may have side effects (when accessing device registers).
And, last but not least: they are nonstandard. I still don't understand why you cannot use the standard macros that already exist in Linux and in U-Boot, here for example clrbits*(), setbits*() and clrsetbits*() ?
If you still don't like these as standard generic macros, how about having these macros just for OMAP with these names and use them only for OMAP code?
Would that make it anything better? If it's not good enough for general use, we can still use it for OMAP? Like: oh, it's ony OMAP, so code quality does not matter? I think that's not good reasoning.
Please note that Simon's recent work for bitfield operations doesn't help me because the field definitions available to me are in shift/mask format and not in the range format that he uses. I will not be
I don;t think I'm going to accept that code either.
able to now generate the range format for the hundreds of registers I use.
Ah! I thought you were talking about registers. See note above about I/O accessors.
Best regards,
Wolfgang Denk

Hi Wolfgang,
On Tuesday 07 June 2011 12:20 AM, Wolfgang Denk wrote:
Dear Aneesh V,
In message4DECF8DA.9030806@ti.com you wrote:
I really dislike such "useful" helpers, because they make the code unreadable. Being nonstandrd, nbody knows what they are doing, so you always will have to look up the implementation before you can continue reading the code. It's a waste of time an resources.
What if I change the above to something like this(please note there isn't any Linux, U-Boot substitute for this):
+/* extract a bit field from a bit vector */ +#define get_bit_field(nr, field)\
- (((nr)& (field##_MASK))>> (field##_OFFSET))
+/* Set a field in a bit vector */ +#define set_bit_field(nr, field, val)\
- do { \
(nr) = ((nr)& ~(field##_MASK)) | (((val)<< (field##_OFFSET))\
& (field##_MASK));\
- } while (0);
And use it like this: assoc = get_bit_field(ccsidr, CCSIDR_ASSOCIATIVITY); set_bit_field(ccsidr, CCSIDR_ASSOCIATIVITY, assoc + 1);
Isn't it more intuitive and almost self-explanatory now.
To me it is definitely NOT self-explanatory.
Actually I cannot even understand the argument names, now how it's supposed to be used. I would interpret "nr" as "number"; in the context of a bit field probably a bit number? Wrong guess...
It's highly cryptic and will only work with very special #defines providing definitions of foo_MASK and foo_OFFSET.
It is not clear that you can use this on plain simple memory stored variables only, and that you must not use this on any device registers; yet your example looks as if this were intended to be used on registers - but then we would need proper I/O accessors.
No. it's not meant to be directly used on registers. Please see below.
And there are still many cases where you cannot use this because for example evaluating several bits from the same object will cause repeated accesses, which may have side effects (when accessing device registers).
And, last but not least: they are nonstandard. I still don't understand why you cannot use the standard macros that already exist in Linux and in U-Boot, here for example clrbits*(), setbits*() and clrsetbits*() ?
As I had mentioned in a previous mail, please note that the above macros are not for the same use-case as clrsetbits*() or friends (I had one macro that did something similar to clrsetbits*() and I intent to remove that in the next revision)
The above macros are for bit-field manipulation in a C integer variable - nothing more.
However, the typical use of this is for extracting bit-fields from an IO register that is already read into an integer variable (instead of reading the IO register multiple times). And similarly for write.
So, if I have to write 5 different fields in a register I first write them into a variable and finally call writel() instead of making 5 clrsetbits*() calls.
There aren't any standard routines available for this need in Linux or U-Boot. I think you had agreed on this fact sometime back.
If you still don't like these as standard generic macros, how about having these macros just for OMAP with these names and use them only for OMAP code?
Would that make it anything better? If it's not good enough for general use, we can still use it for OMAP? Like: oh, it's ony OMAP, so code quality does not matter? I think that's not good reasoning.
No. It was not about code quality. The question was whether these macros were generic enough to be used as the standard U-boot ones. The key question is how do you represent bit fields. There are different alternatives for this.
a. bit range (say 5:3) b. shift(3) and field width(3) c. shift(3) and mask(0x38)
We traditionally use (c) and we have auto-generated defines in this form. So, my macros use this format. I was not sure if other SoCs follow the same approach. That's why I suggested making them OMAP specific if you think (c) is not the standard approach.
best regards, Aneesh

Dear Aneesh V,
In message 4DEDE8D9.7030306@ti.com you wrote:
As I had mentioned in a previous mail, please note that the above macros are not for the same use-case as clrsetbits*() or friends (I had one macro that did something similar to clrsetbits*() and I intent to remove that in the next revision)
The above macros are for bit-field manipulation in a C integer variable
- nothing more.
Why cannot we use the existing macros?
So, if I have to write 5 different fields in a register I first write them into a variable and finally call writel() instead of making 5 clrsetbits*() calls.
It does not make much difference to me if you call one macro or another 5 times.
It does mater to me to have several incompatible implementations doing essentially the same thing.
There aren't any standard routines available for this need in Linux or U-Boot. I think you had agreed on this fact sometime back.
I agree in so far as I am not aware of any such macros in Linux either. But my conclusion is a different one - it boils down to: Linux is way more complex than U-Boot, so if they don;t need this, we don't need it either.
No. It was not about code quality. The question was whether these macros were generic enough to be used as the standard U-boot ones. The key question is how do you represent bit fields. There are different alternatives for this.
a. bit range (say 5:3) b. shift(3) and field width(3) c. shift(3) and mask(0x38)
d) Value and mask
We traditionally use (c) and we have auto-generated defines in this form. So, my macros use this format. I was not sure if other SoCs follow the same approach. That's why I suggested making them OMAP specific if you think (c) is not the standard approach.
Actually it does not matter. See my previous message to Simon: you can cover all this with the existing macros, and without adding any significant overhead.
So far, I did not see a single good argument why any new, nonstandard macros would be needed.
Best regards,
Wolfgang Denk

Dear Wolfgang,
On Tuesday 07 June 2011 04:09 PM, Wolfgang Denk wrote:
Dear Aneesh V,
In message4DEDE8D9.7030306@ti.com you wrote:
As I had mentioned in a previous mail, please note that the above macros are not for the same use-case as clrsetbits*() or friends (I had one macro that did something similar to clrsetbits*() and I intent to remove that in the next revision)
The above macros are for bit-field manipulation in a C integer variable
- nothing more.
Why cannot we use the existing macros?
So, if I have to write 5 different fields in a register I first write them into a variable and finally call writel() instead of making 5 clrsetbits*() calls.
It does not make much difference to me if you call one macro or another 5 times.
No it makes a difference. It's 5 writes to a variable typically in an ARM register + 1 IO access vs 5 IO accesses. It's logically not equivalent.
Further if the 5 values are constants a smart compiler will fold the five writes into one write to the ARM register + 1 IO access, which won't happen if you used 5 clrsetbits*()
Let me give you a solid example:
Problem: We want to read-modify-write an IO register 'reg' affecting 3 different fields: a, b, and c. The values to be written to the fields are a_val, b_val, and c_val respectively:
Solution 1 - without any macros:
unsigned int r = readl(reg); r = (r & ~a_mask) | ((a_val << a_shift) & a_mask) r = (r & ~b_mask) | ((b_val << b_shift) & b_mask) r = (r & ~c_mask) | ((c_val << c_shift) & c_mask) writel(r, reg);
Solution2 - with my macros:
unsigned int r = readl(reg); set_bit_field(r, a, a_val); set_bit_field(r, b, b_val); set_bit_field(r, c, c_val); writel(r, reg);
Solution3 - with clrsetbits*():
clrsetbits_le32(reg, a_mask, a_val << a_shift); clrsetbits_le32(reg, b_mask, b_val << b_shift); clrsetbits_le32(reg, c_mask, c_val << c_shift);
Solution 3 is not acceptable to me because it's clearly not equivalent to what I want to do. Writing the register 3 times instead of once may have undesirable side-effects. Even if it worked, it's clearly not efficient.
If you are forcing me to use solution 1, IMHO you are essentially forcing me not to use a sub-routine for a task that is repeated many times in my code, leaving my code to be more error prone and less readable.
You accuse set_bit_field() of being cryptic. I would say the implementation of clrsetbits_le32() is even more cryptic with so many levels of indirection. I think that goes with any sub-routine/API. You need to read the code/documentation once to know what it does. After that you take it's functionality for granted and things become easier for you. If better documentation can improve readability I am happy to do that.
Also, If you don't like it as a generic API I am willing to make it a static inline function in my code. But I need a utility function for this need. If you think the implementation/documentation can be improved I am willing to work on that too. But please suggest a solution for this problem.
It does mater to me to have several incompatible implementations doing essentially the same thing.
They are not doing the same thing as explained above.
There aren't any standard routines available for this need in Linux or U-Boot. I think you had agreed on this fact sometime back.
I agree in so far as I am not aware of any such macros in Linux either. But my conclusion is a different one - it boils down to: Linux is way more complex than U-Boot, so if they don;t need this, we don't need it either.
I am surprised why Linux doesn't have a solution for this. Perhaps the reason must be the confusion about the representation of a field that we discussed below. I suspect there may be non-standard local implementations in different modules.
Also, as somebody already mentioned, can't we do better than Linux?
No. It was not about code quality. The question was whether these macros were generic enough to be used as the standard U-boot ones. The key question is how do you represent bit fields. There are different alternatives for this.
a. bit range (say 5:3) b. shift(3) and field width(3) c. shift(3) and mask(0x38)
d) Value and mask
We traditionally use (c) and we have auto-generated defines in this form. So, my macros use this format. I was not sure if other SoCs follow the same approach. That's why I suggested making them OMAP specific if you think (c) is not the standard approach.
Actually it does not matter. See my previous message to Simon: you can cover all this with the existing macros, and without adding any significant overhead.
So far, I did not see a single good argument why any new, nonstandard macros would be needed.
Please consider the above example and let me know if I missed any solution using the existing standard macros.
best regards, Aneesh

Hi Aneesh.
On Tue, Jun 7, 2011 at 5:14 AM, Aneesh V aneesh@ti.com wrote: [snip]
I am surprised why Linux doesn't have a solution for this. Perhaps the reason must be the confusion about the representation of a field that we discussed below. I suspect there may be non-standard local implementations in different modules.
I think there are a few reasons. Linux tends to do a lot more processing in between bitfield accesses, so the problem is less apparent (bitfield access as a proportion of total code size is small). Also very often Linux supports all different values for a field, so computing values to put into fields (based on function parameters) is more common than just setting to a few selected values, which is common in U-Boot.
Also, as somebody already mentioned, can't we do better than Linux?
In this area, yes!
[snip]
Please consider the above example and let me know if I missed any solution using the existing standard macros.
best regards, Aneesh

Dear Aneesh V,
In message 4DEE161B.2050402@ti.com you wrote:
So, if I have to write 5 different fields in a register I first write them into a variable and finally call writel() instead of making 5 clrsetbits*() calls.
It does not make much difference to me if you call one macro or another 5 times.
No it makes a difference. It's 5 writes to a variable typically in an ARM register + 1 IO access vs 5 IO accesses. It's logically not equivalent.
You can use in_le32() to read the register in a variable, a number of macros operating on that variable, and then a out_le32() to write it back.
It's logically equivalent.
Further if the 5 values are constants a smart compiler will fold the five writes into one write to the ARM register + 1 IO access, which won't happen if you used 5 clrsetbits*()
See above.
If you ever want to use your macros on device registers, you MUST use proper memory barriers. This will probably ruin your idea to combine the access into a single write. So you can just work on a variable as suggested before.
Problem: We want to read-modify-write an IO register 'reg' affecting 3 different fields: a, b, and c. The values to be written to the fields are a_val, b_val, and c_val respectively:
Solution 1 - without any macros:
unsigned int r = readl(reg); r = (r & ~a_mask) | ((a_val << a_shift) & a_mask) r = (r & ~b_mask) | ((b_val << b_shift) & b_mask) r = (r & ~c_mask) | ((c_val << c_shift) & c_mask) writel(r, reg);
Solution2 - with my macros:
unsigned int r = readl(reg); set_bit_field(r, a, a_val); set_bit_field(r, b, b_val); set_bit_field(r, c, c_val); writel(r, reg);
Solution3 - with clrsetbits*():
clrsetbits_le32(reg, a_mask, a_val << a_shift); clrsetbits_le32(reg, b_mask, b_val << b_shift); clrsetbits_le32(reg, c_mask, c_val << c_shift);
Solution 4 - with standard macros, and proper defines:
unsigned int r = in_le32(reg); /* or readl() if you like */
clrsetbits_le32(&r, a_mask, a_val); clrsetbits_le32(&r, b_mask, b_val); clrsetbits_le32(&r, c_mask, c_val);
out_le32(reg, r);
Actually solution 3 looks best to me.
Solution 3 is not acceptable to me because it's clearly not equivalent to what I want to do. Writing the register 3 times instead of once may have undesirable side-effects. Even if it worked, it's clearly not efficient.
In case of side effects you can use solution 4.
We should not bother here about wether this is "efficient" or "not efficient". Probably none opf this code is ever time critical, not to the extent of a few additional instructions.
If you are forcing me to use solution 1, IMHO you are essentially forcing me not to use a sub-routine for a task that is repeated many times in my code, leaving my code to be more error prone and less readable.
I agree that 1 is ugly and error prone, but there is no need to use it.
I repeat: we have a set of powerful macros ready available. Just use them.
You accuse set_bit_field() of being cryptic. I would say the implementation of clrsetbits_le32() is even more cryptic with so many levels of indirection. I think that goes with any sub-routine/API.
Theimplementation is complex, indeed. But the interface is pretty simple, and portable.
You need to read the code/documentation once to know what it does.
Ditto for set_bit_field().
It does mater to me to have several incompatible implementations doing essentially the same thing.
They are not doing the same thing as explained above.
I think they do. Could you please just give it a try with an open mind and not being too much focussed on your own code.
Best regards,
Wolfgang Denk

Dear Wolfgang,
On Tuesday 07 June 2011 09:10 PM, Wolfgang Denk wrote:
Dear Aneesh V,
In message4DEE161B.2050402@ti.com you wrote:
So, if I have to write 5 different fields in a register I first write them into a variable and finally call writel() instead of making 5 clrsetbits*() calls.
It does not make much difference to me if you call one macro or another 5 times.
No it makes a difference. It's 5 writes to a variable typically in an ARM register + 1 IO access vs 5 IO accesses. It's logically not equivalent.
You can use in_le32() to read the register in a variable, a number of macros operating on that variable, and then a out_le32() to write it back.
It's logically equivalent.
Further if the 5 values are constants a smart compiler will fold the five writes into one write to the ARM register + 1 IO access, which won't happen if you used 5 clrsetbits*()
See above.
If you ever want to use your macros on device registers, you MUST use proper memory barriers. This will probably ruin your idea to combine the access into a single write. So you can just work on a variable as suggested before.
Problem: We want to read-modify-write an IO register 'reg' affecting 3 different fields: a, b, and c. The values to be written to the fields are a_val, b_val, and c_val respectively:
Solution 1 - without any macros:
unsigned int r = readl(reg); r = (r& ~a_mask) | ((a_val<< a_shift)& a_mask) r = (r& ~b_mask) | ((b_val<< b_shift)& b_mask) r = (r& ~c_mask) | ((c_val<< c_shift)& c_mask) writel(r, reg);
Solution2 - with my macros:
unsigned int r = readl(reg); set_bit_field(r, a, a_val); set_bit_field(r, b, b_val); set_bit_field(r, c, c_val); writel(r, reg);
Solution3 - with clrsetbits*():
clrsetbits_le32(reg, a_mask, a_val<< a_shift); clrsetbits_le32(reg, b_mask, b_val<< b_shift); clrsetbits_le32(reg, c_mask, c_val<< c_shift);
Solution 4 - with standard macros, and proper defines:
unsigned int r = in_le32(reg); /* or readl() if you like */
clrsetbits_le32(&r, a_mask, a_val); clrsetbits_le32(&r, b_mask, b_val); clrsetbits_le32(&r, c_mask, c_val);
out_le32(reg, r);
I still don't think this is the 'right' solution for my problem. I don't like the fact that clrsetbits_le32() introduces a lot of un-necessary 'volatile's.
Yes, it's about the 'efficiency'. May be it doesn't count in some cases. But, may be it counts in some other cases. Basically, I don't like to sacrifice 'efficiency' unless the cost for achieving it is very high. I don't think having an extra helper function is a big cost. Neither do I believe that readability suffers in this case.
If you still insist, I can use clrsetbits_le32() in the interest of getting this to a closure.
Actually solution 3 looks best to me.
Solution 3 is not acceptable to me because it's clearly not equivalent to what I want to do. Writing the register 3 times instead of once may have undesirable side-effects. Even if it worked, it's clearly not efficient.
In case of side effects you can use solution 4.
We should not bother here about wether this is "efficient" or "not efficient". Probably none opf this code is ever time critical, not to the extent of a few additional instructions.
If you are forcing me to use solution 1, IMHO you are essentially forcing me not to use a sub-routine for a task that is repeated many times in my code, leaving my code to be more error prone and less readable.
I agree that 1 is ugly and error prone, but there is no need to use it.
I repeat: we have a set of powerful macros ready available. Just use them.
We have a set of powerful macros designed for bit-field accesses in IO egisters.
But, what I am looking for is a set of macros for bit-field operations on C integer variables without the un-necessary overhead of IO register accesses. I am looking for missing APIs in bitops.h not anything from io.h
best regards, Aneesh

Dear Aneesh V,
In message 4DEF62A6.7060706@ti.com you wrote:
I still don't think this is the 'right' solution for my problem. I don't like the fact that clrsetbits_le32() introduces a lot of un-necessary 'volatile's.
Well, with this argument you would also have to refuse using readl() and writel() and all other I/O accessor macros. The only place where volatile is used is in the __arch_get*() and __arch_put*() macros, and ther eit is supposed to be ok.
Yes, it's about the 'efficiency'. May be it doesn't count in some cases. But, may be it counts in some other cases. Basically, I don't like to sacrifice 'efficiency' unless the cost for achieving it is very
Try and show me a single case where you see a measurable difference in performance.
If you still insist, I can use clrsetbits_le32() in the interest of getting this to a closure.
Please do. Thanks.
We have a set of powerful macros designed for bit-field accesses in IO egisters.
But, what I am looking for is a set of macros for bit-field operations on C integer variables without the un-necessary overhead of IO register accesses. I am looking for missing APIs in bitops.h not anything from io.h
All the overhead we have is a few memory barriers. Can you measure any difference in performance?
Best regards,
Wolfgang Denk

Dear Wolfgang,
On Thursday 09 June 2011 03:11 AM, Wolfgang Denk wrote:
Dear Aneesh V,
In message4DEF62A6.7060706@ti.com you wrote:
I still don't think this is the 'right' solution for my problem. I don't like the fact that clrsetbits_le32() introduces a lot of un-necessary 'volatile's.
Well, with this argument you would also have to refuse using readl() and writel() and all other I/O accessor macros. The only place where volatile is used is in the __arch_get*() and __arch_put*() macros, and ther eit is supposed to be ok.
Yes, it's about the 'efficiency'. May be it doesn't count in some cases. But, may be it counts in some other cases. Basically, I don't like to sacrifice 'efficiency' unless the cost for achieving it is very
Try and show me a single case where you see a measurable difference in performance.
If you still insist, I can use clrsetbits_le32() in the interest of getting this to a closure.
Please do. Thanks.
As I start re-working on my patches I realize that there is no alternative to get_bit_field(). clrsetbits_le32() works as an alternative for set_bit_field() but I couldn't find anything in io.h that could replace get_bit_field(). The only option I seem to have is to mask and shift directly every time. Is that what you prefer over get_bit_field()?
best regards, Aneesh

Dear Aneesh V,
In message 4DF71FBF.6030408@ti.com you wrote:
As I start re-working on my patches I realize that there is no alternative to get_bit_field(). clrsetbits_le32() works as an alternative for set_bit_field() but I couldn't find anything in io.h that could replace get_bit_field(). The only option I seem to have is to mask and shift directly every time. Is that what you prefer over get_bit_field()?
I don't understand this comment.
You should NOT use clrsetbits_*() instead of set_bit_field() - depending on what you want to do, clrbits_*() or setbits_*() are better choices for simple operations.
Also please notice that none of clrbits_*(), setbits_*() or clrsetbits_*() perform any masking or shifting. If you don't want to do this explicitly when using the macro, you can hide it in respective definitions of the mask values. I showed how this could be done in the examples I posted in the thread with Simon.
Best regards,
Wolfgang Denk

Dear Wolfgang,
On Tuesday 14 June 2011 04:21 PM, Wolfgang Denk wrote:
Dear Aneesh V,
In message4DF71FBF.6030408@ti.com you wrote:
As I start re-working on my patches I realize that there is no alternative to get_bit_field(). clrsetbits_le32() works as an alternative for set_bit_field() but I couldn't find anything in io.h that could replace get_bit_field(). The only option I seem to have is to mask and shift directly every time. Is that what you prefer over get_bit_field()?
I don't understand this comment.
You should NOT use clrsetbits_*() instead of set_bit_field() - depending on what you want to do, clrbits_*() or setbits_*() are better choices for simple operations.
Yes. I have seen those macros. But more often than not the bit field is more than 1 bit wide and the value to be set is not necessarily all 0's or all 1's. That's why I have to use clrsetbits_*()
Also please notice that none of clrbits_*(), setbits_*() or clrsetbits_*() perform any masking or shifting. If you don't want to do this explicitly when using the macro, you can hide it in respective definitions of the mask values. I showed how this could be done in the examples I posted in the thread with Simon.
The problem I have to deal with is different. get_bit_field() was intended to extract bit fields from an integer. So, the target usage will be something like this(where a, b, and c are bit fields in register my_reg)
u32 my_reg, a_val, b_val, c_val;
u32 my_reg = readl(my_reg_addr);
a_val = get_bit_field(my_reg, a_mask); b_val = get_bit_field(my_reg, b_mask); c_val = get_bit_field(my_reg, c_mask);
Do you see an alternative method for doing this using the standard macros?
br, Aneesh

Dear Aneesh V,
In message 4DF7488A.6000909@ti.com you wrote:
Yes. I have seen those macros. But more often than not the bit field is more than 1 bit wide and the value to be set is not necessarily all 0's or all 1's. That's why I have to use clrsetbits_*()
I see. In such a case (and only then) clrsetbits_*() is indeed the right choice.
The problem I have to deal with is different. get_bit_field() was intended to extract bit fields from an integer. So, the target usage will be something like this(where a, b, and c are bit fields in register my_reg)
u32 my_reg, a_val, b_val, c_val;
u32 my_reg = readl(my_reg_addr);
a_val = get_bit_field(my_reg, a_mask); b_val = get_bit_field(my_reg, b_mask); c_val = get_bit_field(my_reg, c_mask);
Do you see an alternative method for doing this using the standard macros?
Please see the example given here:
http://article.gmane.org/gmane.comp.boot-loaders.u-boot/101146
Looking closer, the "FIELD_VAL" macro alone will probably not suffice, as you need both shift directions, like that:
#define FIELD_SHIFT 16 #define FIELD_MASK 0xF
#define FIELD_BITS(x) (x << 16) #define FIELD_MASK FIELD_BITS(0xF) #define FIELD_VAL(x) ((x & FIELD_MASK) >> 16)
The code would then look something like this:
my_reg = readl(my_reg_addr);
a_val = A_VAL(my_reg); b_val = B_VAL(my_reg); c_val = C_VAL(my_reg);
...or similar.
Best regards,
Wolfgang Denk

On Tue, Jun 14, 2011 at 6:53 AM, Wolfgang Denk wd@denx.de wrote:
Dear Aneesh V,
In message 4DF7488A.6000909@ti.com you wrote:
Yes. I have seen those macros. But more often than not the bit field is more than 1 bit wide and the value to be set is not necessarily all 0's or all 1's. That's why I have to use clrsetbits_*()
I see. Â In such a case (and only then) clrsetbits_*() is indeed the right choice.
The problem I have to deal with is different. get_bit_field() was intended to extract bit fields from an integer. So, the target usage will be something like this(where a, b, and c are bit fields in register my_reg)
u32 my_reg, a_val, b_val, c_val;
u32 my_reg = readl(my_reg_addr);
a_val = get_bit_field(my_reg, a_mask); b_val = get_bit_field(my_reg, b_mask); c_val = get_bit_field(my_reg, c_mask);
Do you see an alternative method for doing this using the standard macros?
Please see the example given here:
http://article.gmane.org/gmane.comp.boot-loaders.u-boot/101146
Looking closer, the "FIELD_VAL" macro alone will probably not suffice, as you need both shift directions, like that:
#define FIELD_SHIFT Â Â 16 Â Â Â Â #define FIELD_MASK Â Â Â 0xF
#define FIELD_BITS(x) (x << 16) Â Â Â Â #define FIELD_MASK FIELD_BITS(0xF) Â Â Â Â #define FIELD_VAL(x) ((x & FIELD_MASK) >> 16)
Hi Wolfgang,
I think you have FIELD_MASK being two meanings: the un-shifted or 'raw' mask, and the shifted mask. So perhaps:
#define FIELD_SHIFT Â Â 16 Â Â Â Â #define FIELD_RAWMASK Â Â Â 0xF
#define FIELD_BITS(x) (x << 16) Â Â Â Â #define FIELD_MASK FIELD_BITS(FIELD_RAWMASK) Â Â Â Â #define FIELD_VAL(x) ((x & FIELD_MASK) >> 16)
or with the 16 factored properly, but even harder to read:
#define FIELD_SHIFT Â Â 16 Â Â Â Â #define FIELD_RAWMASK Â Â Â 0xF
#define FIELD_BITS(x) (x << FIELD_SHIFT) Â Â Â Â #define FIELD_MASK FIELD_BITS(FIELD_RAWMASK) Â Â Â Â #define FIELD_VAL(x) ((x & FIELD_MASK) >> FIELD_SHIFT)
(note that FIELD_BITS should arguably mask after the shift).
When you have a lot of these definitions in a row you have mentally check the bit width of the mask:
#define FIELD1_RAWMASK Â Â Â 0xF Â Â Â Â #define FIELD1_SHIFT Â Â 16 Â Â Â Â #define FIELD2_RAWMASK Â Â Â 0x1F Â Â Â Â #define FIELD2_SHIFT Â Â 11 Â Â Â Â #define FIELD3_RAWMASK Â Â Â 0x1F Â Â Â Â #define FIELD3_SHIFT Â Â 7 Â Â Â Â #define FIELD4_RAWMASK Â Â Â 0x1F Â Â Â Â #define FIELD4_SHIFT Â Â 1
#define FIELD1_BITS(x) (x << FIELD1_SHIFT) Â Â Â Â #define FIELD1_MASK FIELD_BITS(FIELD1_RAWMASK) Â Â Â Â #define FIELD1_VAL(x) ((x & FIELD1_MASK) >> FIELD1_SHIFT) Â Â Â Â #define FIELD2_BITS(x) (x << FIELD2_SHIFT) Â Â Â Â #define FIELD2_MASK FIELD_BITS(FIELD2_RAWMASK) Â Â Â Â #define FIELD2_VAL(x) ((x & FIELD2_MASK) >> FIELD2_SHIFT) ...
Is the above correct, or do fields overlap or not cover fully? (exercise for reader) (see [1] below)
So I think is better to think of the width rather than the mask.
#define FIELD_SHIFT Â Â 16
#define FIELD_RAWMASK (1U << FIELD_WIDTH) - 1 Â Â Â Â #define FIELD_BITS(x) (x << FIELD_WIDTH) Â Â Â Â #define FIELD_MASK FIELD_BITS(FIELD_RAWMASK) Â Â Â Â #define FIELD_VAL(x) ((x & FIELD_MASK) >> FIELD_WIDTH)
because then it is more obvious:
#define FIELD1_WIDTH Â Â Â 4 Â Â Â Â #define FIELD1_SHIFT Â Â 16 Â Â Â Â #define FIELD2_WIDTH Â Â Â 5 Â Â Â Â #define FIELD2_SHIFT Â Â 11 Â Â Â Â #define FIELD3_WIDTH Â Â Â 5 Â Â Â Â #define FIELD3_SHIFT Â Â 7 Â Â Â Â #define FIELD4_WIDTH Â Â Â 5 Â Â Â Â #define FIELD4_SHIFT Â Â 1
And now it is a little easier to see that 11+5 = 16, so FIELD2 is ok; 7+5=12 so FIELD3 overlaps, 1+5=6 so FIELD4 isn't big enough. It's still not as good as just numbering your bits on little-endian archs, but I think we have had that discussion.
I think BITS and VAL are not very descriptive which is why I suggested pack and unpack at the time.
But don't get me started talking about bit fields.
Regards, Simon
[1] This is why macros are so nice:
define once: #define bf_mask(field) ((field ## _RAWMASK) << field ## _SHIFT) #define bf_val(field, val) (((val) & bf_mask(field)) >> field ## _SHIFT) #define bf_bits(field, val) (((val) << field ## _SHIFT) & bf_mask(field))
then: Â Â Â Â #define FIELD1_BITS(x) bf_bits(FIELD1, x) Â Â Â Â #define FIELD1_MASK bf_mask(FIELD1) Â Â Â Â #define FIELD1_VAL(x) bf_val(FIELD1, x) Â Â Â Â #define FIELD2_BITS(x) bf_bits(FIELD2, x) Â Â Â Â #define FIELD2_MASK bf_mask(FIELD2) Â Â Â Â #define FIELD2_VAL(x) bf_val(FIELD2, x) ...
The code would then look something like this:
my_reg = readl(my_reg_addr);
a_val = A_VAL(my_reg); Â Â Â Â b_val = B_VAL(my_reg); Â Â Â Â c_val = C_VAL(my_reg);
...or similar.
Best regards,
Wolfgang Denk
-- DENX Software Engineering GmbH,   MD: Wolfgang Denk & Detlev Zundel HRB 165235 Munich, Office: Kirchenstr.5, D-82194 Groebenzell, Germany Phone: (+49)-8142-66989-10 Fax: (+49)-8142-66989-80 Email: wd@denx.de "I dislike companies that have a we-are-the-high-priests-of-hardware- so-you'll-like-what-we-give-you attitude. I like commodity markets in which iron-and-silicon hawkers know that they exist to  provide  fast toys for software types like me to play with..."   - Eric S. Raymond

Dear Simon Glass,
In message BANLkTingFroRJD3w7RwqZu80KoTfoV0bmQ@mail.gmail.com you wrote:
Looking closer, the "FIELD_VAL" macro alone will probably not suffice, as you need both shift directions, like that:
#define FIELD_SHIFT 16 #define FIELD_MASK 0xF #define FIELD_BITS(x) (x << 16) #define FIELD_MASK FIELD_BITS(0xF) #define FIELD_VAL(x) ((x & FIELD_MASK) >> 16)
Hi Wolfgang,
I think you have FIELD_MASK being two meanings: the un-shifted or 'raw' mask, and the shifted mask. So perhaps:
My intention was to have only the real mask, i. e. what you need to apply to the register data (in your language the shifted mask).
#define FIELD_SHIFT 16 #define FIELD_RAWMASK 0xF
This is never really needed - you only use it once.
When you have a lot of these definitions in a row you have mentally check the bit width of the mask:
Yes, this is one of the disadvantages of thinking in bit fields and one reason why I never write code like that.
I mentioned this before - I consider it error prone [like most other "clever" tricks that "make life easy" and allow you to stop thinking].
Is the above correct, or do fields overlap or not cover fully?
All this is possible. There may be overlapping definitions, and there may be reserved bits that need special handling.
There may even be bits that cannot simply be re-writeen as we read them, but that must always be written as zeroes or ones.
I never claimed that these "bit field" juggling was a clever idea. It works for some cases, and causes major pita for others.
Best regards,
Wolfgang Denk

On Tue, Jun 14, 2011 at 11:54 AM, Wolfgang Denk wd@denx.de wrote:
Dear Simon Glass,
In message BANLkTingFroRJD3w7RwqZu80KoTfoV0bmQ@mail.gmail.com you wrote:
Looking closer, the "FIELD_VAL" macro alone will probably not suffice, as you need both shift directions, like that:
#define FIELD_SHIFT Â Â 16 Â Â Â Â #define FIELD_MASK Â Â Â 0xF
#define FIELD_BITS(x) (x << 16) Â Â Â Â #define FIELD_MASK FIELD_BITS(0xF) Â Â Â Â #define FIELD_VAL(x) ((x & FIELD_MASK) >> 16)
Hi Wolfgang,
I think you have FIELD_MASK being two meanings: the un-shifted or 'raw' mask, and the shifted mask. So perhaps:
My intention was to have only the real mask, i. e. what you need to apply to the register data (in your language the shifted mask).
#define FIELD_SHIFT Â Â 16 Â Â Â Â #define FIELD_RAWMASK Â Â Â 0xF
This is never really needed - you only use it once.
When you have a lot of these definitions in a row you have mentally check the bit width of the mask:
Yes, this is one of the disadvantages of thinking in bit fields and one reason why I never write code like that.
I mentioned this before - I consider it error prone [like most other "clever" tricks that "make life easy" and allow you to stop thinking].
Is the above correct, or do fields overlap or not cover fully?
All this is possible. Â There may be overlapping definitions, and there may be reserved bits that need special handling.
There may even be bits that cannot simply be re-writeen as we read them, but that must always be written as zeroes or ones.
I never claimed that these "bit field" juggling was a clever idea. It works for some cases, and causes major pita for others.
Hi Wolfgang,
Yes I agree, but I think you might have missed both my points.
Regards, Simon
Best regards,
Wolfgang Denk
-- DENX Software Engineering GmbH,   MD: Wolfgang Denk & Detlev Zundel HRB 165235 Munich, Office: Kirchenstr.5, D-82194 Groebenzell, Germany Phone: (+49)-8142-66989-10 Fax: (+49)-8142-66989-80 Email: wd@denx.de In an organization, each person rises to the level of his own  incom- petency                     - The Peter Principle

Dear Wolfgang,
On Tuesday 14 June 2011 07:23 PM, Wolfgang Denk wrote:
Dear Aneesh V,
In message4DF7488A.6000909@ti.com you wrote:
Yes. I have seen those macros. But more often than not the bit field is more than 1 bit wide and the value to be set is not necessarily all 0's or all 1's. That's why I have to use clrsetbits_*()
I see. In such a case (and only then) clrsetbits_*() is indeed the right choice.
The problem I have to deal with is different. get_bit_field() was intended to extract bit fields from an integer. So, the target usage will be something like this(where a, b, and c are bit fields in register my_reg)
u32 my_reg, a_val, b_val, c_val;
u32 my_reg = readl(my_reg_addr);
a_val = get_bit_field(my_reg, a_mask); b_val = get_bit_field(my_reg, b_mask); c_val = get_bit_field(my_reg, c_mask);
Do you see an alternative method for doing this using the standard macros?
Please see the example given here:
http://article.gmane.org/gmane.comp.boot-loaders.u-boot/101146
Looking closer, the "FIELD_VAL" macro alone will probably not suffice, as you need both shift directions, like that:
#define FIELD_SHIFT 16 #define FIELD_MASK 0xF
When I said mask I meant the 'shifted mask' like:
#define FIELD_MASK (0xF << 16)
So, the shift information is embedded in this mask and can be extracted by finding the first set bit. But in reality my get_bit_field() function indeed takes both arguments. So it's something like this:
#define get_bit_field(x, shift, mask)\ (((x) & (mask)) >> (shift))
#define A_SHIFT 16 #define A_MASK (0xF << 16)
and then use it like:
a_val = get_bit_field(my_reg, A_SHIFT, A_MASK);
#define FIELD_BITS(x) (x<< 16) #define FIELD_MASK FIELD_BITS(0xF) #define FIELD_VAL(x) ((x& FIELD_MASK)>> 16)
The code would then look something like this:
my_reg = readl(my_reg_addr);
a_val = A_VAL(my_reg); b_val = B_VAL(my_reg); c_val = C_VAL(my_reg);
If I have to do something like this I will have to now generate macros like A_VAL(x) for hundreds of fields. Is it really necessary when the following will do the job for me with the existing infrastructure I have:
a_val = get_bit_field(my_reg, A_SHIFT, A_MASK); b_val = get_bit_field(my_reg, B_SHIFT, B_MASK);
Please note that in this case I don't have an alternative with standard macros. If this is not possible I would rather prefer to keep it simple like this:
a_val = (my_reg & A_MASK) >> A_SHIFT;
best regards, Aneesh

Dear Aneesh V,
In message 4DF871E3.8080307@ti.com you wrote:
So, the shift information is embedded in this mask and can be extracted by finding the first set bit. But in reality my get_bit_field() function indeed takes both arguments. So it's something like this:
As stated before, I will not accept any bit filed macros like get_bit_field().
Best regards,
Wolfgang Denk

Dear Wolfgang,
On Wednesday 15 June 2011 02:50 PM, Wolfgang Denk wrote:
Dear Aneesh V,
In message4DF871E3.8080307@ti.com you wrote:
So, the shift information is embedded in this mask and can be extracted by finding the first set bit. But in reality my get_bit_field() function indeed takes both arguments. So it's something like this:
As stated before, I will not accept any bit filed macros like get_bit_field().
Will you accept something like this?
a_val = (reg & a_mask) >> a_shift;
best regards, Aneesh

Dear Aneesh V,
In message 4DF89102.9040508@ti.com you wrote:
Will you accept something like this?
a_val = (reg & a_mask) >> a_shift;
Yes, of course (that's what seems most natural to me).
Best regards,
Wolfgang Denk

On 15/06/11 22:04, Wolfgang Denk wrote:
Dear Aneesh V,
In message 4DF89102.9040508@ti.com you wrote:
Will you accept something like this?
a_val = (reg & a_mask) >> a_shift;
Yes, of course (that's what seems most natural to me).
Me too - The code is obvious - the desired value is being masked out of a larger composite value and then shifted right to bit 0
And to set the value then you have:
reg &= ~a_mask; /* Clear a_val */ reg |= (a_val << a_shift) & a_mask; /* Set new a_val */
AND'ing with a_mask is required to prevent accidental clobbering when a_val is out-of-range. May give undesirable results by setting an illegal a_val, but at least you don't clobber unrelated bit fields
Regards,
Graeme

Dear Graeme Russ,
In message 4DF8A8CF.5000308@gmail.com you wrote:
And to set the value then you have:
reg &= ~a_mask; /* Clear a_val */ reg |= (a_val << a_shift) & a_mask; /* Set new a_val */
This could be done using
clrsetbits_le32(®, a_mask, a_val << a_shift);
Best regards,
Wolfgang Denk

Hi Wolfgang,
On 15/06/11 22:51, Wolfgang Denk wrote:
Dear Graeme Russ,
In message 4DF8A8CF.5000308@gmail.com you wrote:
And to set the value then you have:
reg &= ~a_mask; /* Clear a_val */ reg |= (a_val << a_shift) & a_mask; /* Set new a_val */
This could be done using
clrsetbits_le32(®, a_mask, a_val << a_shift);
Well the funny thing is, I find the former more obvious than the later ;) (but that's because I am not familiar with clrsetbits*) - But yes, 'I get it'
Regards,
Graeme

On 15/06/11 22:51, Wolfgang Denk wrote:
Dear Graeme Russ,
In message 4DF8A8CF.5000308@gmail.com you wrote:
And to set the value then you have:
reg &= ~a_mask; /* Clear a_val */ reg |= (a_val << a_shift) & a_mask; /* Set new a_val */
This could be done using
clrsetbits_le32(®, a_mask, a_val << a_shift);
Not quite:
clrsetbits_le32(®, a_mask, (a_val << a_shift) & a_mask);
is equivalent except that, as already pointed out, clrsetbits and friends:
a) Are not portable because only ARM and PPC define them which makes them, by definition, non-standard b) Each invocation results in a read barrier plus a write barrier c) If the hardware register is sensitive to partial updates (i.e. requires all bit-fields to be updated in on operation) this requires a read into a local variable, calls to clrsetbits against that variable and finally a write-back - Lots of memory barriers
I know I'm going over old ground, and it's not that I am against clrsetbits, it's just good to know the limitations up-front. So remember, clrsetbits is not platform independent, prevents the compiler from optimising and will most likely impose a performance hit
Regards,
Graeme

Dear Graeme Russ,
In message 4DF9E409.1060600@gmail.com you wrote:
is equivalent except that, as already pointed out, clrsetbits and friends:
a) Are not portable because only ARM and PPC define them which makes them, by definition, non-standard
They should be added to _any_ arch/<arch>/include/asm/io.h that doesn't use them yet.
b) Each invocation results in a read barrier plus a write barrier
...which is intentionally (actually mandatory) when accessing I/O memory, and negligable overhead on plain memory.
c) If the hardware register is sensitive to partial updates (i.e. requires all bit-fields to be updated in on operation) this requires a read into a local variable, calls to clrsetbits against that variable and finally a write-back - Lots of memory barriers
Wrong. The macro does this automatically. There is only a single read and a single write per call.
Best regards,
Wolfgang Denk

Hi Wolfgang,
On Thu, Jun 16, 2011 at 9:46 PM, Wolfgang Denk wd@denx.de wrote:
Dear Graeme Russ,
In message 4DF9E409.1060600@gmail.com you wrote:
is equivalent except that, as already pointed out, clrsetbits and friends:
a) Are not portable because only ARM and PPC define them which makes   them, by definition, non-standard
Agreed - As mentioned in another post, I will look to implement clrsetbits and friends in x86 at some point
They should be added to _any_ arch/<arch>/include/asm/io.h that doesn't use them yet.
b) Each invocation results in a read barrier plus a write barrier
...which is intentionally (actually mandatory) when accessing I/O memory, and negligable overhead on plain memory.
Agreed - The raw performance overhead of the barriers is most likely negligible, although is there possibly a cache flush/sync operation or flush of the predictive branch cache that could have a more significant impact? I don't see this as such a huge problem since clrsetbits is (usually) used to update hardware registers, any flush will happen anyway
c) If the hardware register is sensitive to partial updates (i.e. requires   all bit-fields to be updated in on operation) this requires a read into   a local variable, calls to clrsetbits against that variable and finally   a write-back - Lots of memory barriers
Wrong. The macro does this automatically. Â There is only a single read and a single write per call.
I mean: clrsetbits(&foo_reg, foo_val_1, foo_val_1_mask); clrsetbits(&foo_reg, foo_val_2, foo_val_2_mask); clrsetbits(&foo_reg, foo_val_3, foo_val_3_mask);
This could cause side-effects if foo_reg is sensitive to the three foo_val's being set independently. To ensure all three values are updated in a single write to foo_reg you would need to do:
foo_tmp = readl(&foo_reg);
clrsetbits(&foo_tmp, foo_val_1, foo_val_1_mask); clrsetbits(&foo_tmp, foo_val_2, foo_val_2_mask); clrsetbits(&foo_tmp, foo_val_3, foo_val_3_mask);
foo_tmp = writel(foo_tmp, &foo_reg);
Regards,
Graeme

Hi Grame,
On Wednesday 15 June 2011 06:12 PM, Graeme Russ wrote:
On 15/06/11 22:04, Wolfgang Denk wrote:
Dear Aneesh V,
In message4DF89102.9040508@ti.com you wrote:
Will you accept something like this?
a_val = (reg& a_mask)>> a_shift;
Yes, of course (that's what seems most natural to me).
Me too - The code is obvious - the desired value is being masked out of a larger composite value and then shifted right to bit 0
And to set the value then you have:
reg&= ~a_mask; /* Clear a_val */ reg |= (a_val<< a_shift)& a_mask; /* Set new a_val */
AND'ing with a_mask is required to prevent accidental clobbering when a_val is out-of-range. May give undesirable results by setting an illegal a_val, but at least you don't clobber unrelated bit fields
These are exactly what my helper functions were doing. Are you suggesting that doing these directly is better than doing them using helper functions?
best regards, Aneesh

Hi Aneesh,
On Thu, Jun 16, 2011 at 3:39 PM, Aneesh V aneesh@ti.com wrote:
Hi Grame,
On Wednesday 15 June 2011 06:12 PM, Graeme Russ wrote:
On 15/06/11 22:04, Wolfgang Denk wrote:
Dear Aneesh V,
In message4DF89102.9040508@ti.com  you wrote:
Will you accept something like this?
a_val = (reg& Â a_mask)>> Â a_shift;
Yes, of course (that's what seems most natural to me).
Me too - The code is obvious - the desired value is being masked out of a larger composite value and then shifted right to bit 0
And to set the value then you have:
reg&= ~a_mask; Â Â Â Â Â Â Â Â Â Â Â Â Â /* Clear a_val */ Â Â Â Â reg |= (a_val<< Â a_shift)& Â a_mask; Â Â /* Set new a_val */
AND'ing with a_mask is required to prevent accidental clobbering when a_val is out-of-range. May give undesirable results by setting an illegal a_val, but at least you don't clobber unrelated bit fields
These are exactly what my helper functions were doing. Are you suggesting that doing these directly is better than doing them using helper functions?
I (personally) think that the two lines of memberwise bit mask/shift is more obvious in its intent that even clrsetbits. I understand there is defensive programming issues that can be applied with macros or helper functions, but then the you loose 'obviousness' and sometimes this can make figuring out the problem even harder (one assumes the macro or helper function works properly).
When push comes to shove, the compiler will probably produce identical code anyway.
Sometimes I like seeing the raw elegance of what is going on under the hood :)
Now, that being said, I see no reason not to do the following if I had, for example, multiple serial port configuration registers which are all identical:
/* num data bits is stored in bits 2-4 of the serial config register */ #define DATA_BITS_MASK 0x001c #define DATA_BITS_OFFSET 2
u32 set_serial_data_bits(u32 ser_cfg, u8 data_bits) { ser_cfg &= ~DATA_BITS_MASK; ser_cfg |= ((u32)ser_cfg << DATA_BITS_OFFSET) & Â DATA_BITS_MASK;
return ser_cfg; }
void serial_init(void) { u32 ser_cfg;
for (i=0; i<NUM_SERIAL_PORTS; i++) { ser_cfg = read_serial_cfg(i); ser_cfg = set_serial_data_bits(ser_cfg, 7); write_serial_cfg(i, ser_cfg); } }
But that's just me - I tend to avoid #define macros
Regards,
Graeme

Dear Graeme Russ,
In message BANLkTik3cXeMzU0qpxDLSQMtK-AbEoDCPQ@mail.gmail.com you wrote:
Now, that being said, I see no reason not to do the following if I had, for example, multiple serial port configuration registers which are all identical:
/* num data bits is stored in bits 2-4 of the serial config register */ #define DATA_BITS_MASK 0x001c #define DATA_BITS_OFFSET 2
u32 set_serial_data_bits(u32 ser_cfg, u8 data_bits) { ser_cfg &= ~DATA_BITS_MASK; ser_cfg |= ((u32)ser_cfg << DATA_BITS_OFFSET) & Â DATA_BITS_MASK;
return ser_cfg; }
void serial_init(void) { u32 ser_cfg;
for (i=0; i<NUM_SERIAL_PORTS; i++) { ser_cfg = read_serial_cfg(i); ser_cfg = set_serial_data_bits(ser_cfg, 7); write_serial_cfg(i, ser_cfg); } }
One reason for not doing this is that we should not reinvent the wheel again and again, and instead use standard APIs.
I cannot find any such code in U-Boot, so I cannot check, but to me it smells a lot as if this code should rather use clrsetbits_*() and other proper I/O accessors.
Best regards,
Wolfgang Denk

On 16/06/11 18:15, Wolfgang Denk wrote:
Dear Graeme Russ,
In message BANLkTik3cXeMzU0qpxDLSQMtK-AbEoDCPQ@mail.gmail.com you wrote:
Now, that being said, I see no reason not to do the following if I had, for example, multiple serial port configuration registers which are all identical:
/* num data bits is stored in bits 2-4 of the serial config register */ #define DATA_BITS_MASK 0x001c #define DATA_BITS_OFFSET 2
u32 set_serial_data_bits(u32 ser_cfg, u8 data_bits) { ser_cfg &= ~DATA_BITS_MASK; ser_cfg |= ((u32)ser_cfg << DATA_BITS_OFFSET) & DATA_BITS_MASK;
return ser_cfg; }
void serial_init(void) { u32 ser_cfg;
for (i=0; i<NUM_SERIAL_PORTS; i++) { ser_cfg = read_serial_cfg(i); ser_cfg = set_serial_data_bits(ser_cfg, 7); write_serial_cfg(i, ser_cfg); } }
One reason for not doing this is that we should not reinvent the wheel again and again, and instead use standard APIs.
I cannot find any such code in U-Boot, so I cannot check, but to me it smells a lot as if this code should rather use clrsetbits_*() and other proper I/O accessors.
Except nobody outside ARM and PPC knows about clrsetbits and friends, so I would not call them a standard API
I will, however, keep them in mind and implement them for x86 when I have a need for bit-field operations
Regards,
Graeme

- Add a framework for layered cache maintenance - separate out SOC specific outer cache maintenance from maintenance of caches known to CPU
- Add generic ARMv7 cache maintenance operations that affect all caches known to ARMv7 CPUs. For instance in Cortex-A8 these opertions will affect both L1 and L2 caches. In Cortex-A9 these will affect only L1 cache
- D-cache operations supported: - Invalidate entire D-cache - Invalidate D-cache range - Flush(clean & invalidate) entire D-cache - Flush D-cache range - I-cache operations supported: - Invalidate entire I-cache
- Add maintenance functions for TLB, branch predictor array etc.
- Enable -march=armv7-a so that armv7 assembly instructions can be used
Signed-off-by: Aneesh V aneesh@ti.com --- V2: * Blank line after local variable declarations - fixed globally * Change from pointers to weakly linked functions for outer cache operations * Explicitly added a copy of flush_cache() under #ifdefCONFIG_SYS_NO_DCACHE * Change -march=armv7-a back to armv5 * Update documentation with cache related CONFIG options --- README | 5 + arch/arm/cpu/armv7/Makefile | 2 +- arch/arm/cpu/armv7/cache_v7.c | 390 +++++++++++++++++++++++++++++++++++++++++ arch/arm/include/asm/armv7.h | 68 +++++++ include/common.h | 5 +- 5 files changed, 468 insertions(+), 2 deletions(-) create mode 100644 arch/arm/cpu/armv7/cache_v7.c create mode 100644 arch/arm/include/asm/armv7.h
diff --git a/README b/README index 76b1500..c3b6bec 100644 --- a/README +++ b/README @@ -455,6 +455,11 @@ The following options need to be configured: Note: If a "bootargs" environment is defined, it will overwride the defaults discussed just above.
+- Cache Configuration: + CONFIG_SYS_NO_ICACHE - Do not enable instruction cache in U-Boot + CONFIG_SYS_NO_DCACHE - Do not enable data cache in U-Boot + CONFIG_SYS_NO_L2CACHE- Do not enable L2 cache in U-Boot + - Serial Ports: CONFIG_PL010_SERIAL
diff --git a/arch/arm/cpu/armv7/Makefile b/arch/arm/cpu/armv7/Makefile index 8c0e915..299792a 100644 --- a/arch/arm/cpu/armv7/Makefile +++ b/arch/arm/cpu/armv7/Makefile @@ -26,7 +26,7 @@ include $(TOPDIR)/config.mk LIB = $(obj)lib$(CPU).o
START := start.o -COBJS := cpu.o +COBJS := cpu.o cache_v7.o COBJS += syslib.o
SRCS := $(START:.o=.S) $(COBJS:.o=.c) diff --git a/arch/arm/cpu/armv7/cache_v7.c b/arch/arm/cpu/armv7/cache_v7.c new file mode 100644 index 0000000..46d8e09 --- /dev/null +++ b/arch/arm/cpu/armv7/cache_v7.c @@ -0,0 +1,390 @@ +/* + * (C) Copyright 2010 + * Texas Instruments Incorporated - http://www.ti.com/ + * Aneesh V aneesh@ti.com + * + * See file CREDITS for list of people who contributed to this + * project. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + */ +#include <linux/types.h> +#include <common.h> +#include <asm/armv7.h> +#include <asm/utils.h> + +#define ARMV7_DCACHE_INVAL_ALL 1 +#define ARMV7_DCACHE_CLEAN_INVAL_ALL 2 +#define ARMV7_DCACHE_INVAL_RANGE 3 +#define ARMV7_DCACHE_CLEAN_INVAL_RANGE 4 + +#ifndef CONFIG_SYS_NO_DCACHE +/* + * Write the level and type you want to Cache Size Selection Register(CSSELR) + * to get size details from Current Cache Size ID Register(CCSIDR) + */ +static void set_csselr(u32 level, u32 type) +{ u32 csselr = level << 1 | type; + + /* Write to Cache Size Selection Register(CSSELR) */ + asm volatile ("mcr p15, 2, %0, c0, c0, 0" : : "r" (csselr)); +} + +static u32 get_ccsidr(void) +{ + u32 ccsidr; + + /* Read current CP15 Cache Size ID Register */ + asm volatile ("mrc p15, 1, %0, c0, c0, 0" : "=r" (ccsidr)); + return ccsidr; +} + +static u32 get_clidr(void) +{ + u32 clidr; + + /* Read current CP15 Cache Level ID Register */ + asm volatile ("mrc p15,1,%0,c0,c0,1" : "=r" (clidr)); + return clidr; +} + +static void v7_inval_dcache_level_setway(u32 level, u32 num_sets, + u32 num_ways, u32 way_shift, + u32 log2_line_len) +{ + int way, set, setway; + + /* + * For optimal assembly code: + * a. count down + * b. have bigger loop inside + */ + for (way = num_ways - 1; way >= 0 ; way--) + for (set = num_sets - 1; set >= 0; set--) { + setway = (level << 1) | (set << log2_line_len) | + (way << way_shift); + /* Invalidate data/unified cache line by set/way */ + asm volatile (" mcr p15, 0, %0, c7, c6, 2" + : : "r" (setway)); + } + /* DMB to make sure the operation is complete */ + CP15DMB; +} + +static void v7_clean_inval_dcache_level_setway(u32 level, u32 num_sets, + u32 num_ways, u32 way_shift, + u32 log2_line_len) +{ + int way, set, setway; + + /* + * For optimal assembly code: + * a. count down + * b. have bigger loop inside + */ + for (way = num_ways - 1; way >= 0 ; way--) + for (set = num_sets - 1; set >= 0; set--) { + setway = (level << 1) | (set << log2_line_len) | + (way << way_shift); + /* + * Clean & Invalidate data/unified + * cache line by set/way + */ + asm volatile (" mcr p15, 0, %0, c7, c14, 2" + : : "r" (setway)); + } + /* DMB to make sure the operation is complete */ + CP15DMB; +} + +static void v7_maint_dcache_level_setway(u32 level, u32 operation) +{ + u32 ccsidr; + u32 num_sets, num_ways, log2_line_len, log2_num_ways; + u32 way_shift; + + set_csselr(level, ARMV7_CSSELR_IND_DATA_UNIFIED); + + ccsidr = get_ccsidr(); + + log2_line_len = get_bit_field(ccsidr, CCSIDR_LINE_SIZE_OFFSET, + CCSIDR_LINE_SIZE_MASK) + 2; + /* Converting from words to bytes */ + log2_line_len += 2; + + num_ways = get_bit_field(ccsidr, CCSIDR_ASSOCIATIVITY_OFFSET, + CCSIDR_ASSOCIATIVITY_MASK) + 1; + num_sets = get_bit_field(ccsidr, CCSIDR_NUM_SETS_OFFSET, + CCSIDR_NUM_SETS_MASK) + 1; + /* + * According to ARMv7 ARM number of sets and number of ways need + * not be a power of 2 + */ + log2_num_ways = log_2_n_round_up(num_ways); + + way_shift = (32 - log2_num_ways); + if (operation == ARMV7_DCACHE_INVAL_ALL) + v7_inval_dcache_level_setway(level, num_sets, num_ways, + way_shift, log2_line_len); + else if (operation == ARMV7_DCACHE_CLEAN_INVAL_ALL) + v7_clean_inval_dcache_level_setway(level, num_sets, num_ways, + way_shift, log2_line_len); +} + +static void v7_maint_dcache_all(u32 operation) +{ + u32 level, cache_type, level_start_bit = 0; + + u32 clidr = get_clidr(); + + for (level = 0; level < 7; level++) { + cache_type = get_bit_field(clidr, level_start_bit, + 0x7 << level_start_bit); + if ((cache_type == ARMV7_CLIDR_CTYPE_DATA_ONLY) || + (cache_type == ARMV7_CLIDR_CTYPE_INSTRUCTION_DATA) || + (cache_type == ARMV7_CLIDR_CTYPE_UNIFIED)) + v7_maint_dcache_level_setway(level, operation); + level_start_bit += 3; + } +} + +static void v7_dcache_clean_inval_range(u32 start, + u32 stop, u32 line_len) +{ + u32 mva; + + /* Align start to cache line boundary */ + start &= ~(line_len - 1); + for (mva = start; mva < stop; mva = mva + line_len) + /* DCCIMVAC - Clean & Invalidate data cache by MVA to PoC */ + asm volatile ("mcr p15, 0, %0, c7, c14, 1" : : "r" (mva)); +} + +static void v7_dcache_inval_range(u32 start, u32 stop, u32 line_len) +{ + u32 mva; + + /* + * If start address is not aligned to cache-line flush the first + * line to prevent affecting somebody else's buffer + */ + if (start & (line_len - 1)) { + v7_dcache_clean_inval_range(start, start + 1, line_len); + /* move to next cache line */ + start = (start + line_len - 1) & ~(line_len - 1); + } + + /* + * If stop address is not aligned to cache-line flush the last + * line to prevent affecting somebody else's buffer + */ + if (stop & (line_len - 1)) { + v7_dcache_clean_inval_range(stop, stop + 1, line_len); + /* align to the beginning of this cache line */ + stop &= ~(line_len - 1); + } + + for (mva = start; mva < stop; mva = mva + line_len) + /* DCIMVAC - Invalidate data cache by MVA to PoC */ + asm volatile ("mcr p15, 0, %0, c7, c6, 1" : : "r" (mva)); +} + +static void v7_dcache_maint_range(u32 start, u32 stop, u32 range_op) +{ + u32 line_len, ccsidr; + + ccsidr = get_ccsidr(); + line_len = get_bit_field(ccsidr, CCSIDR_LINE_SIZE_OFFSET, + CCSIDR_LINE_SIZE_MASK) + 2; + /* Converting from words to bytes */ + line_len += 2; + /* converting from log2(linelen) to linelen */ + line_len = 1 << line_len; + + switch (range_op) { + case ARMV7_DCACHE_CLEAN_INVAL_RANGE: + v7_dcache_clean_inval_range(start, stop, line_len); + break; + case ARMV7_DCACHE_INVAL_RANGE: + v7_dcache_inval_range(start, stop, line_len); + break; + } + + /* DMB to make sure the operation is complete */ + CP15DMB; +} + +/* Invalidate TLB */ +static void v7_inval_tlb(void) +{ + /* Invalidate entire unified TLB */ + asm volatile ("mcr p15, 0, %0, c8, c7, 0" : : "r" (0)); + /* Invalidate entire data TLB */ + asm volatile ("mcr p15, 0, %0, c8, c6, 0" : : "r" (0)); + /* Invalidate entire instruction TLB */ + asm volatile ("mcr p15, 0, %0, c8, c5, 0" : : "r" (0)); + /* Full system DSB - make sure that the invalidation is complete */ + CP15DSB; + /* Full system ISB - make sure the instruction stream sees it */ + CP15ISB; +} + +void invalidate_dcache_all(void) +{ + v7_maint_dcache_all(ARMV7_DCACHE_INVAL_ALL); + + v7_outer_cache_inval_all(); +} + +/* + * Performs a clean & invalidation of the entire data cache + * at all levels + */ +void flush_dcache_all(void) +{ + v7_maint_dcache_all(ARMV7_DCACHE_CLEAN_INVAL_ALL); + + v7_outer_cache_flush_all(); +} + +/* + * Invalidates range in all levels of D-cache/unified cache used: + * Affects the range [start, stop - 1] + */ +void invalidate_dcache_range(unsigned long start, unsigned long stop) +{ + + v7_dcache_maint_range(start, stop, ARMV7_DCACHE_INVAL_RANGE); + + v7_outer_cache_inval_range(start, stop); +} + +/* + * Flush range(clean & invalidate) from all levels of D-cache/unified + * cache used: + * Affects the range [start, stop - 1] + */ +void flush_dcache_range(unsigned long start, unsigned long stop) +{ + v7_dcache_maint_range(start, stop, ARMV7_DCACHE_CLEAN_INVAL_RANGE); + + v7_outer_cache_flush_range(start, stop); +} + +void arm_init_before_mmu(void) +{ + v7_outer_cache_enable(); + invalidate_dcache_all(); + v7_inval_tlb(); +} + +/* + * Flush range from all levels of d-cache/unified-cache used: + * Affects the range [start, start + size - 1] + */ +void flush_cache(unsigned long start, unsigned long size) +{ + flush_dcache_range(start, start + size); +} +#else /* #ifndef CONFIG_SYS_NO_DCACHE */ +void invalidate_dcache_all(void) +{ +} + +void flush_dcache_all(void) +{ +} + +void invalidate_dcache_range(unsigned long start, unsigned long stop) +{ +} + +void flush_dcache_range(unsigned long start, unsigned long stop) +{ +} + +void arm_init_before_mmu(void) +{ +} + +void flush_cache(unsigned long start, unsigned long size) +{ +} +#endif /* #ifndef CONFIG_SYS_NO_DCACHE */ + +#ifndef CONFIG_SYS_NO_ICACHE +/* Invalidate entire I-cache and branch predictor array */ +void invalidate_icache_all(void) +{ + /* + * Invalidate all instruction caches to PoU. + * Also flushes branch target cache. + */ + asm volatile ("mcr p15, 0, %0, c7, c5, 0" : : "r" (0)); + + /* Invalidate entire branch predictor array */ + asm volatile ("mcr p15, 0, %0, c7, c5, 6" : : "r" (0)); + + /* Full system DSB - make sure that the invalidation is complete */ + CP15DSB; + + /* ISB - make sure the instruction stream sees it */ + CP15ISB; +} +#else +void invalidate_icache_all(void) +{ +} +#endif + +/* + * Stub implementations for outer cache operations + */ +void __v7_outer_cache_enable(void) +{ +} +void v7_outer_cache_enable(void) + __attribute__((weak, alias("__v7_outer_cache_enable"))); + +void __v7_outer_cache_disable(void) +{ +} +void v7_outer_cache_disable(void) + __attribute__((weak, alias("__v7_outer_cache_disable"))); + +void __v7_outer_cache_flush_all(void) +{ +} +void v7_outer_cache_flush_all(void) + __attribute__((weak, alias("__v7_outer_cache_flush_all"))); + +void __v7_outer_cache_inval_all(void) +{ +} +void v7_outer_cache_inval_all(void) + __attribute__((weak, alias("__v7_outer_cache_inval_all"))); + +void __v7_outer_cache_flush_range(u32 start, u32 end) +{ +} +void v7_outer_cache_flush_range(u32 start, u32 end) + __attribute__((weak, alias("__v7_outer_cache_flush_range"))); + +void __v7_outer_cache_inval_range(u32 start, u32 end) +{ +} +void v7_outer_cache_inval_range(u32 start, u32 end) + __attribute__((weak, alias("__v7_outer_cache_inval_range"))); diff --git a/arch/arm/include/asm/armv7.h b/arch/arm/include/asm/armv7.h new file mode 100644 index 0000000..50cc167 --- /dev/null +++ b/arch/arm/include/asm/armv7.h @@ -0,0 +1,68 @@ +/* + * (C) Copyright 2010 + * Texas Instruments Incorporated - http://www.ti.com/ + * + * Aneesh V aneesh@ti.com + * + * See file CREDITS for list of people who contributed to this + * project. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + */ +#ifndef ARMV7_H +#define ARMV7_H +#include <linux/types.h> + +/* CCSIDR */ +#define CCSIDR_LINE_SIZE_OFFSET 0 +#define CCSIDR_LINE_SIZE_MASK 0x7 +#define CCSIDR_ASSOCIATIVITY_OFFSET 3 +#define CCSIDR_ASSOCIATIVITY_MASK (0x3FF << 3) +#define CCSIDR_NUM_SETS_OFFSET 13 +#define CCSIDR_NUM_SETS_MASK (0x7FFF << 13) + +/* + * Values for InD field in CSSELR + * Selects the type of cache + */ +#define ARMV7_CSSELR_IND_DATA_UNIFIED 0 +#define ARMV7_CSSELR_IND_INSTRUCTION 1 + +/* Values for Ctype fields in CLIDR */ +#define ARMV7_CLIDR_CTYPE_NO_CACHE 0 +#define ARMV7_CLIDR_CTYPE_INSTRUCTION_ONLY 1 +#define ARMV7_CLIDR_CTYPE_DATA_ONLY 2 +#define ARMV7_CLIDR_CTYPE_INSTRUCTION_DATA 3 +#define ARMV7_CLIDR_CTYPE_UNIFIED 4 + +/* + * CP15 Barrier instructions + * Please note that we have separate barrier instructions in ARMv7 + * However, we use the CP15 based instructtions because we use + * -march=armv5 in U-Boot + */ +#define CP15ISB asm volatile ("mcr p15, 0, %0, c7, c5, 4" : : "r" (0)) +#define CP15DSB asm volatile ("mcr p15, 0, %0, c7, c10, 4" : : "r" (0)) +#define CP15DMB asm volatile ("mcr p15, 0, %0, c7, c10, 5" : : "r" (0)) + +void v7_outer_cache_enable(void); +void v7_outer_cache_disable(void); +void v7_outer_cache_flush_all(void); +void v7_outer_cache_inval_all(void); +void v7_outer_cache_flush_range(u32 start, u32 end); +void v7_outer_cache_inval_range(u32 start, u32 end); + +#endif diff --git a/include/common.h b/include/common.h index 00e266e..4f47ef6 100644 --- a/include/common.h +++ b/include/common.h @@ -413,6 +413,7 @@ void icache_disable(void); int dcache_status (void); void dcache_enable (void); void dcache_disable(void); +void mmu_disable(void); void relocate_code (ulong, gd_t *, ulong) __attribute__ ((noreturn)); ulong get_endaddr (void); void trap_init (ulong); @@ -614,9 +615,11 @@ ulong video_setmem (ulong);
/* arch/$(ARCH)/lib/cache.c */ void flush_cache (unsigned long, unsigned long); +void flush_dcache_all(void); void flush_dcache_range(unsigned long start, unsigned long stop); void invalidate_dcache_range(unsigned long start, unsigned long stop); - +void invalidate_dcache_all(void); +void invalidate_icache_all(void);
/* arch/$(ARCH)/lib/ticks.S */ unsigned long long get_ticks(void);

Dear Aneesh V,
In message 1305202276-27784-4-git-send-email-aneesh@ti.com you wrote:
Add a framework for layered cache maintenance
- separate out SOC specific outer cache maintenance from maintenance of caches known to CPU
Add generic ARMv7 cache maintenance operations that affect all caches known to ARMv7 CPUs. For instance in Cortex-A8 these opertions will affect both L1 and L2 caches. In Cortex-A9 these will affect only L1 cache
D-cache operations supported:
- Invalidate entire D-cache
- Invalidate D-cache range
- Flush(clean & invalidate) entire D-cache
- Flush D-cache range
How much of this is actually needed in the context of U-Boot?
...
- for (way = num_ways - 1; way >= 0 ; way--)
for (set = num_sets - 1; set >= 0; set--) {
setway = (level << 1) | (set << log2_line_len) |
(way << way_shift);
/* Invalidate data/unified cache line by set/way */
asm volatile (" mcr p15, 0, %0, c7, c6, 2"
: : "r" (setway));
}
Braces needed for multiline for(). Please fix globally.
...
- if (operation == ARMV7_DCACHE_INVAL_ALL)
v7_inval_dcache_level_setway(level, num_sets, num_ways,
way_shift, log2_line_len);
- else if (operation == ARMV7_DCACHE_CLEAN_INVAL_ALL)
v7_clean_inval_dcache_level_setway(level, num_sets, num_ways,
way_shift, log2_line_len);
Braces needed for multiline statements. Please fix globally.
- for (mva = start; mva < stop; mva = mva + line_len)
/* DCCIMVAC - Clean & Invalidate data cache by MVA to PoC */
asm volatile ("mcr p15, 0, %0, c7, c14, 1" : : "r" (mva));
And again etc. etc.
...
+void invalidate_dcache_all(void) +{ +}
+void flush_dcache_all(void) +{ +}
+void invalidate_dcache_range(unsigned long start, unsigned long stop) +{ +}
+void flush_dcache_range(unsigned long start, unsigned long stop) +{ +}
+void arm_init_before_mmu(void) +{ +}
+void flush_cache(unsigned long start, unsigned long size) +{ +}
Please do not add dead code. I consider it misleading to have functions which promise to perform something, and actually do nothing.
Best regards,
Wolfgang Denk

On Monday 16 May 2011 12:21 AM, Wolfgang Denk wrote:
Dear Aneesh V,
In message1305202276-27784-4-git-send-email-aneesh@ti.com you wrote:
Add a framework for layered cache maintenance
- separate out SOC specific outer cache maintenance from maintenance of caches known to CPU
Add generic ARMv7 cache maintenance operations that affect all caches known to ARMv7 CPUs. For instance in Cortex-A8 these opertions will affect both L1 and L2 caches. In Cortex-A9 these will affect only L1 cache
D-cache operations supported:
- Invalidate entire D-cache
Needed before enabling the caches.
- Invalidate D-cache range
Needed if you are doing DMA.
- Flush(clean& invalidate) entire D-cache
Needed before Linux before disabling the caches.
- Flush D-cache range
Needed in case of DMA out.
How much of this is actually needed in the context of U-Boot?
Please see above. As far as I know OMAP doesn't do DMA in U-Boot.
...
- for (way = num_ways - 1; way>= 0 ; way--)
for (set = num_sets - 1; set>= 0; set--) {
setway = (level<< 1) | (set<< log2_line_len) |
(way<< way_shift);
/* Invalidate data/unified cache line by set/way */
asm volatile (" mcr p15, 0, %0, c7, c6, 2"
: : "r" (setway));
}
Braces needed for multiline for(). Please fix globally.
Ok.
...
- if (operation == ARMV7_DCACHE_INVAL_ALL)
v7_inval_dcache_level_setway(level, num_sets, num_ways,
way_shift, log2_line_len);
- else if (operation == ARMV7_DCACHE_CLEAN_INVAL_ALL)
v7_clean_inval_dcache_level_setway(level, num_sets, num_ways,
way_shift, log2_line_len);
Braces needed for multiline statements. Please fix globally.
- for (mva = start; mva< stop; mva = mva + line_len)
/* DCCIMVAC - Clean& Invalidate data cache by MVA to PoC */
asm volatile ("mcr p15, 0, %0, c7, c14, 1" : : "r" (mva));
And again etc. etc.
...
+void invalidate_dcache_all(void) +{ +}
+void flush_dcache_all(void) +{ +}
+void invalidate_dcache_range(unsigned long start, unsigned long stop) +{ +}
+void flush_dcache_range(unsigned long start, unsigned long stop) +{ +}
+void arm_init_before_mmu(void) +{ +}
+void flush_cache(unsigned long start, unsigned long size) +{ +}
Please do not add dead code. I consider it misleading to have functions which promise to perform something, and actually do nothing.
These are empty only in the else part of an #ifdef. When cache support is compiled out these need to be empty.
best regards, Aneesh

Dear Aneesh V,
In message 4DD23D3A.4010001@ti.com you wrote:
How much of this is actually needed in the context of U-Boot?
Please see above. As far as I know OMAP doesn't do DMA in U-Boot.
Devices like USB oth Ethernet don't use DMA for data transfers?
Please do not add dead code. I consider it misleading to have functions which promise to perform something, and actually do nothing.
These are empty only in the else part of an #ifdef. When cache support is compiled out these need to be empty.
When cache support is compiled out there should be no callers to these functions, so maybe they can disappear?
Best regards,
Wolfgang Denk

On Tuesday 17 May 2011 03:01 PM, Wolfgang Denk wrote:
Dear Aneesh V,
In message4DD23D3A.4010001@ti.com you wrote:
How much of this is actually needed in the context of U-Boot?
Please see above. As far as I know OMAP doesn't do DMA in U-Boot.
Devices like USB oth Ethernet don't use DMA for data transfers?
Please do not add dead code. I consider it misleading to have functions which promise to perform something, and actually do nothing.
These are empty only in the else part of an #ifdef. When cache support is compiled out these need to be empty.
When cache support is compiled out there should be no callers to these functions, so maybe they can disappear?
No, there may be callers in drivers. It will be ugly to keep all the calls in various drivers under #ifdef, right? Is it not better to have some empty functions and not worrying about the calls in various places.
best regards, Aneesh

Hi Wolfgang,
On Tuesday 17 May 2011 03:01 PM, Wolfgang Denk wrote:
Dear Aneesh V,
In message4DD23D3A.4010001@ti.com you wrote:
How much of this is actually needed in the context of U-Boot?
Please see above. As far as I know OMAP doesn't do DMA in U-Boot.
Devices like USB oth Ethernet don't use DMA for data transfers?
Ethernet support is not upstreamed yet. USB doesn't seem to be using DMA on a quick check.
best regards, Aneesh

Hi,
I am picking up on this old thread. My main point is that I think the armv7 cache implementation is a good start and should go into U-Boot. Aneesh are you planning another patch set? Also see below:
On Tue, May 17, 2011 at 2:58 AM, Aneesh V aneesh@ti.com wrote:
Hi Wolfgang,
On Tuesday 17 May 2011 03:01 PM, Wolfgang Denk wrote:
Dear Aneesh V,
In message4DD23D3A.4010001@ti.com  you wrote:
How much of this is actually needed in the context of U-Boot?
Please see above. As far as I know OMAP doesn't do DMA in U-Boot.
Devices like USB oth Ethernet don't use DMA for data transfers?
Ethernet support is not upstreamed yet. USB doesn't seem to be using DMA on a quick check.
There is DMA in the USB EHCI side and this also does d cache range invalidate / flush.
Regards, Simon
best regards, Aneesh _______________________________________________ U-Boot mailing list U-Boot@lists.denx.de http://lists.denx.de/mailman/listinfo/u-boot

replace all occurences of CONFIG_L2_OFF with a more appropriate CONFIG_SYS_NO_L2CACHE
CONFIG_SYS_NO_L2CACHE has been chosen to be in line with CONFIG_SYS_NO_ICACHE and CONFIG_SYS_NO_DCACHE
Signed-off-by: Aneesh V aneesh@ti.com --- arch/arm/cpu/armv7/cpu.c | 6 +----- include/configs/ca9x4_ct_vxp.h | 2 +- include/configs/efikamx.h | 2 +- include/configs/mx51evk.h | 2 +- include/configs/mx53evk.h | 2 +- include/configs/omap4_panda.h | 2 +- include/configs/omap4_sdp4430.h | 2 +- include/configs/s5pc210_universal.h | 2 +- include/configs/tegra2-common.h | 2 +- include/configs/vision2.h | 2 +- 10 files changed, 10 insertions(+), 14 deletions(-)
diff --git a/arch/arm/cpu/armv7/cpu.c b/arch/arm/cpu/armv7/cpu.c index a01e0d6..7f28d87 100644 --- a/arch/arm/cpu/armv7/cpu.c +++ b/arch/arm/cpu/armv7/cpu.c @@ -35,11 +35,7 @@ #include <command.h> #include <asm/system.h> #include <asm/cache.h> -#ifndef CONFIG_L2_OFF -#include <asm/arch/sys_proto.h> -#endif - -static void cache_flush(void); +#include <asm/armv7.h>
int cleanup_before_linux(void) { diff --git a/include/configs/ca9x4_ct_vxp.h b/include/configs/ca9x4_ct_vxp.h index 7f83249..2e025f0 100644 --- a/include/configs/ca9x4_ct_vxp.h +++ b/include/configs/ca9x4_ct_vxp.h @@ -41,7 +41,7 @@
#define CONFIG_CMDLINE_TAG 1 /* enable passing of ATAGs */ #define CONFIG_SETUP_MEMORY_TAGS 1 -#define CONFIG_L2_OFF 1 +#define CONFIG_SYS_NO_L2CACHE 1 #define CONFIG_INITRD_TAG 1
#define CONFIG_OF_LIBFDT 1 diff --git a/include/configs/efikamx.h b/include/configs/efikamx.h index 571c3cb..c2ab599 100644 --- a/include/configs/efikamx.h +++ b/include/configs/efikamx.h @@ -38,7 +38,7 @@ #define CONFIG_DISPLAY_CPUINFO #define CONFIG_DISPLAY_BOARDINFO
-#define CONFIG_L2_OFF +#define CONFIG_SYS_NO_L2CACHE
/* * Bootloader Components Configuration diff --git a/include/configs/mx51evk.h b/include/configs/mx51evk.h index 6a785f8..6fc37c7 100644 --- a/include/configs/mx51evk.h +++ b/include/configs/mx51evk.h @@ -35,7 +35,7 @@
#define CONFIG_SYS_TEXT_BASE 0x97800000
-#define CONFIG_L2_OFF +#define CONFIG_SYS_NO_L2CACHE
#include <asm/arch/imx-regs.h> /* diff --git a/include/configs/mx53evk.h b/include/configs/mx53evk.h index 5749a08..a5619a2 100644 --- a/include/configs/mx53evk.h +++ b/include/configs/mx53evk.h @@ -29,7 +29,7 @@ #define CONFIG_DISPLAY_CPUINFO #define CONFIG_DISPLAY_BOARDINFO
-#define CONFIG_L2_OFF +#define CONFIG_SYS_NO_L2CACHE
#include <asm/arch/imx-regs.h>
diff --git a/include/configs/omap4_panda.h b/include/configs/omap4_panda.h index ffcc9aa..12daebc 100644 --- a/include/configs/omap4_panda.h +++ b/include/configs/omap4_panda.h @@ -46,7 +46,7 @@ #define CONFIG_DISPLAY_BOARDINFO 1
/* Keep L2 Cache Disabled */ -#define CONFIG_L2_OFF 1 +#define CONFIG_SYS_NO_L2CACHE 1
/* Clock Defines */ #define V_OSCK 38400000 /* Clock output from T2 */ diff --git a/include/configs/omap4_sdp4430.h b/include/configs/omap4_sdp4430.h index 8d04d07..673cd5c 100644 --- a/include/configs/omap4_sdp4430.h +++ b/include/configs/omap4_sdp4430.h @@ -47,7 +47,7 @@ #define CONFIG_DISPLAY_BOARDINFO 1
/* Keep L2 Cache Disabled */ -#define CONFIG_L2_OFF 1 +#define CONFIG_SYS_NO_L2CACHE 1
/* Clock Defines */ #define V_OSCK 38400000 /* Clock output from T2 */ diff --git a/include/configs/s5pc210_universal.h b/include/configs/s5pc210_universal.h index 5915984..7d3d0b1 100644 --- a/include/configs/s5pc210_universal.h +++ b/include/configs/s5pc210_universal.h @@ -43,7 +43,7 @@ #define CONFIG_DISPLAY_BOARDINFO
/* Keep L2 Cache Disabled */ -#define CONFIG_L2_OFF 1 +#define CONFIG_SYS_NO_L2CACHE 1
#define CONFIG_SYS_SDRAM_BASE 0x40000000 #define CONFIG_SYS_TEXT_BASE 0x44800000 diff --git a/include/configs/tegra2-common.h b/include/configs/tegra2-common.h index febce35..93c7563 100644 --- a/include/configs/tegra2-common.h +++ b/include/configs/tegra2-common.h @@ -31,7 +31,7 @@ #define CONFIG_ARMCORTEXA9 /* This is an ARM V7 CPU core */ #define CONFIG_TEGRA2 /* in a NVidia Tegra2 core */ #define CONFIG_MACH_TEGRA_GENERIC /* which is a Tegra generic machine */ -#define CONFIG_L2_OFF /* No L2 cache */ +#define CONFIG_SYS_NO_L2CACHE /* No L2 cache */
#define CONFIG_ENABLE_CORTEXA9 /* enable CPU (A9 complex) */
diff --git a/include/configs/vision2.h b/include/configs/vision2.h index 4c8e7fa..d6c99de 100644 --- a/include/configs/vision2.h +++ b/include/configs/vision2.h @@ -26,7 +26,7 @@
#define CONFIG_MX51 /* in a mx51 */ -#define CONFIG_L2_OFF +#define CONFIG_SYS_NO_L2CACHE
#include <asm/arch/imx-regs.h>

Dear Aneesh V,
In message 1305202276-27784-5-git-send-email-aneesh@ti.com you wrote:
replace all occurences of CONFIG_L2_OFF with a more appropriate CONFIG_SYS_NO_L2CACHE
CONFIG_SYS_NO_L2CACHE has been chosen to be in line with CONFIG_SYS_NO_ICACHE and CONFIG_SYS_NO_DCACHE
sorry, but these are ugly mis-nomers. The situation is not that there is no cache present (which "NO_*CACHE" suggestes), but that the cache is (kept) turned off.
CONFIG_SYS_L2CACHE_OFF is a _much_ better name than CONFIG_SYS_NO_L2CACHE.
Please fix all these names.
Best regards,
Wolfgang Denk

Hi Wolfgang,
On Monday 16 May 2011 12:23 AM, Wolfgang Denk wrote:
Dear Aneesh V,
In message1305202276-27784-5-git-send-email-aneesh@ti.com you wrote:
replace all occurences of CONFIG_L2_OFF with a more appropriate CONFIG_SYS_NO_L2CACHE
CONFIG_SYS_NO_L2CACHE has been chosen to be in line with CONFIG_SYS_NO_ICACHE and CONFIG_SYS_NO_DCACHE
sorry, but these are ugly mis-nomers. The situation is not that there is no cache present (which "NO_*CACHE" suggestes), but that the cache is (kept) turned off.
CONFIG_SYS_L2CACHE_OFF is a _much_ better name than CONFIG_SYS_NO_L2CACHE.
Please fix all these names.
Ok. Including the existing CONFIG_SYS_NO_DCACHE and CONFIG_SYS_NO_ICACHE too, right?
best regards, Aneesh

Dear Aneesh V,
In message 4DD24715.4010105@ti.com you wrote:
Please fix all these names.
Ok. Including the existing CONFIG_SYS_NO_DCACHE and CONFIG_SYS_NO_ICACHE too, right?
yes, please. Thanks a lot!
Best regards,
Wolfgang Denk

Hi Wolfgang,
On Tuesday 17 May 2011 04:39 PM, Wolfgang Denk wrote:
Dear Aneesh V,
In message4DD24715.4010105@ti.com you wrote:
Please fix all these names.
Ok. Including the existing CONFIG_SYS_NO_DCACHE and CONFIG_SYS_NO_ICACHE too, right?
yes, please. Thanks a lot!
How about the existing flag CONFIG_SYS_NO_CP15_CACHE
I think this flag should be simply removed. I think CONFIG_SYS_ICACHE_OFF and CONFIG_SYS_DCACHE_OFF together can serve all needs.
best regards, Aneesh

Dear Aneesh V,
In message 4DECBC5C.10002@ti.com you wrote:
Hi Wolfgang,
On Tuesday 17 May 2011 04:39 PM, Wolfgang Denk wrote:
Dear Aneesh V,
In message4DD24715.4010105@ti.com you wrote:
Please fix all these names.
Ok. Including the existing CONFIG_SYS_NO_DCACHE and CONFIG_SYS_NO_ICACHE too, right?
yes, please. Thanks a lot!
How about the existing flag CONFIG_SYS_NO_CP15_CACHE
I think this flag should be simply removed. I think CONFIG_SYS_ICACHE_OFF and CONFIG_SYS_DCACHE_OFF together can serve all needs.
Agreed. Thanks.
Best regards,
Wolfgang Denk

- Enable I-cache on bootup - Enable MMU and D-cache immediately after relocation - Do necessary initialization before enabling d-cache and MMU - Changes to cleanup_before_linux() - Make changes according to the new framework
Signed-off-by: Aneesh V aneesh@ti.com --- V2: * Changes for -march=armv7a -> armv5 change * Removed the print inside the weakly linked stub function - __arm_init_before_mmu --- arch/arm/cpu/armv7/cpu.c | 45 +++++++++++++++++++------------------------ arch/arm/cpu/armv7/start.S | 18 ++++++++++++++++- arch/arm/lib/board.c | 6 +++++ arch/arm/lib/cache-cp15.c | 7 ++++++ arch/arm/lib/cache.c | 5 ---- 5 files changed, 50 insertions(+), 31 deletions(-)
diff --git a/arch/arm/cpu/armv7/cpu.c b/arch/arm/cpu/armv7/cpu.c index 7f28d87..5c69d04 100644 --- a/arch/arm/cpu/armv7/cpu.c +++ b/arch/arm/cpu/armv7/cpu.c @@ -34,13 +34,10 @@ #include <common.h> #include <command.h> #include <asm/system.h> -#include <asm/cache.h> #include <asm/armv7.h>
int cleanup_before_linux(void) { - unsigned int i; - /* * this function is called just before we call linux * it prepares the processor for linux @@ -49,31 +46,29 @@ int cleanup_before_linux(void) */ disable_interrupts();
- /* turn off I/D-cache */ + /* + * Turn off I-cache and invalidate it + */ icache_disable(); - dcache_disable(); + invalidate_icache_all();
- /* invalidate I-cache */ - cache_flush(); - -#ifndef CONFIG_L2_OFF - /* turn off L2 cache */ - l2_cache_disable(); - /* invalidate L2 cache also */ - invalidate_dcache(get_device_type()); -#endif - i = 0; - /* mem barrier to sync up things */ - asm("mcr p15, 0, %0, c7, c10, 4": :"r"(i)); + /* + * turn off D-cache + * dcache_disable() in turn flushes the d-cache and disables MMU + */ + dcache_disable();
-#ifndef CONFIG_L2_OFF - l2_cache_enable(); -#endif + /* + * After D-cache is flushed and before it is disabled there may + * be some new valid entries brought into the cache. We are sure + * that these lines are not dirty and will not affect our execution. + * (because unwinding the call-stack and setting a bit in CP15 SCTRL + * is all we did during this. We have not pushed anything on to the + * stack. Neither have we affected any static data) + * So just invalidate the entire d-cache again to avoid coherency + * problems for kernel + */ + invalidate_dcache_all();
return 0; } - -static void cache_flush(void) -{ - asm ("mcr p15, 0, %0, c7, c5, 0": :"r" (0)); -} diff --git a/arch/arm/cpu/armv7/start.S b/arch/arm/cpu/armv7/start.S index 2929fc7..13e2d44 100644 --- a/arch/arm/cpu/armv7/start.S +++ b/arch/arm/cpu/armv7/start.S @@ -255,6 +255,14 @@ clbss_l:str r2, [r0] /* clear loop... */ * initialization, now running from RAM. */ jump_2_ram: +/* + * If I-cache is enabled invalidate it + */ +#ifndef CONFIG_SYS_NO_ICACHE + mcr p15, 0, r0, c7, c5, 0 @ invalidate icache + mcr p15, 0, r0, c7, c10, 4 @ DSB + mcr p15, 0, r0, c7, c5, 4 @ ISB +#endif ldr r0, _board_init_r_ofs adr r1, _start add lr, r0, r1 @@ -290,6 +298,9 @@ cpu_init_crit: mov r0, #0 @ set up for MCR mcr p15, 0, r0, c8, c7, 0 @ invalidate TLBs mcr p15, 0, r0, c7, c5, 0 @ invalidate icache + mcr p15, 0, r0, c7, c5, 6 @ invalidate BP array + mcr p15, 0, r0, c7, c10, 4 @ DSB + mcr p15, 0, r0, c7, c5, 4 @ ISB
/* * disable MMU stuff and caches @@ -298,7 +309,12 @@ cpu_init_crit: bic r0, r0, #0x00002000 @ clear bits 13 (--V-) bic r0, r0, #0x00000007 @ clear bits 2:0 (-CAM) orr r0, r0, #0x00000002 @ set bit 1 (--A-) Align - orr r0, r0, #0x00000800 @ set bit 12 (Z---) BTB + orr r0, r0, #0x00000800 @ set bit 11 (Z---) BTB +#ifdef CONFIG_SYS_NO_ICACHE + bic r0, r0, #0x00001000 @ clear bit 12 (I) I-cache +#else + orr r0, r0, #0x00001000 @ set bit 12 (I) I-cache +#endif mcr p15, 0, r0, c1, c0, 0
/* diff --git a/arch/arm/lib/board.c b/arch/arm/lib/board.c index 1a784a1..85469ae 100644 --- a/arch/arm/lib/board.c +++ b/arch/arm/lib/board.c @@ -464,6 +464,12 @@ void board_init_r (gd_t *id, ulong dest_addr) gd->flags |= GD_FLG_RELOC; /* tell others: relocation done */
monitor_flash_len = _end_ofs; + /* + * Enable D$: + * I$, if needed, must be already enabled in start.S + */ + dcache_enable(); + debug ("monitor flash len: %08lX\n", monitor_flash_len); board_init(); /* Setup chipselects */
diff --git a/arch/arm/lib/cache-cp15.c b/arch/arm/lib/cache-cp15.c index d9175f0..fd97c45 100644 --- a/arch/arm/lib/cache-cp15.c +++ b/arch/arm/lib/cache-cp15.c @@ -34,6 +34,12 @@
DECLARE_GLOBAL_DATA_PTR;
+void __arm_init_before_mmu(void) +{ +} +void arm_init_before_mmu(void) + __attribute__((weak, alias("__arm_init_before_mmu"))); + static void cp_delay (void) { volatile int i; @@ -65,6 +71,7 @@ static inline void mmu_setup(void) int i; u32 reg;
+ arm_init_before_mmu(); /* Set up an identity-mapping for all 4GB, rw for everyone */ for (i = 0; i < 4096; i++) page_table[i] = i << 20 | (3 << 10) | 0x12; diff --git a/arch/arm/lib/cache.c b/arch/arm/lib/cache.c index 27123cd..dc3242c 100644 --- a/arch/arm/lib/cache.c +++ b/arch/arm/lib/cache.c @@ -38,11 +38,6 @@ void __flush_cache(unsigned long start, unsigned long size) /* disable write buffer as well (page 2-22) */ asm("mcr p15, 0, %0, c7, c10, 4" : : "r" (0)); #endif -#ifdef CONFIG_OMAP34XX - void v7_flush_cache_all(void); - - v7_flush_cache_all(); -#endif return; } void flush_cache(unsigned long start, unsigned long size)

Dear Aneesh V,
In message 1305202276-27784-6-git-send-email-aneesh@ti.com you wrote:
- Enable I-cache on bootup
- Enable MMU and D-cache immediately after relocation
- Do necessary initialization before enabling d-cache and MMU
Would it be possible to do this even _before_ relocation, so to speed up memory accesses during relocation? Of course, proper invalidates/ flushes will be needed before jumping to the RAM address, but I guess this would save a bit of boot times?
Best regards,
Wolfgang Denk

Hi Wolfgang,
On Monday 16 May 2011 12:25 AM, Wolfgang Denk wrote:
Dear Aneesh V,
In message1305202276-27784-6-git-send-email-aneesh@ti.com you wrote:
- Enable I-cache on bootup
- Enable MMU and D-cache immediately after relocation
- Do necessary initialization before enabling d-cache and MMU
Would it be possible to do this even _before_ relocation, so to speed up memory accesses during relocation? Of course, proper invalidates/ flushes will be needed before jumping to the RAM address, but I guess this would save a bit of boot times?
I intentionally kept it after relocation to avoid un-necessary complexities. Relocation is a case of self-modifying code. In Harvard architectures like armv7 there will be coherency issues unless we flush the entire D-cache(range based operation may be equally or more expensive) and invalidate the entire I-cache.
So, in effect everything has to be flushed to memory before you jump to the new location. There may be a small advantage because flushing from cache allows for bursting to the DDR where as bursting is not possible when d-cache is disabled. But I think this is not worth the trouble.
best regards, Aneesh

Dear Aneesh V,
In message 4DD24BD6.5060608@ti.com you wrote:
Would it be possible to do this even _before_ relocation, so to speed up memory accesses during relocation? Of course, proper invalidates/ flushes will be needed before jumping to the RAM address, but I guess this would save a bit of boot times?
I intentionally kept it after relocation to avoid un-necessary complexities.
Yes, I can understand this. Do you plean to extend this to include relocation n a later step?
Relocation is a case of self-modifying code. In Harvard architectures like armv7 there will be coherency issues unless we flush the entire D-cache(range based operation may be equally or more expensive) and invalidate the entire I-cache.
I would not call this self-modifying code.
Regarding the need to flush/invalidate caches when jumping to RAM: yes, we have to flush the D-cache to make sure all data actually has hit the memory. But there should be no need to invalidate the I-cache as we have never been fetching any instructions from this address range, so there cannot be any incorrect entries in cache.
Or am I missing something?
So, in effect everything has to be flushed to memory before you jump to the new location. There may be a small advantage because flushing from cache allows for bursting to the DDR where as bursting is not possible when d-cache is disabled. But I think this is not worth the trouble.
Well, all the data copy will also use cached writes, which are much faster. I think this will result in measurable time differences.
Best regards,
Wolfgang Denk

Hi Wolfgang,
On Tuesday 17 May 2011 04:44 PM, Wolfgang Denk wrote:
Dear Aneesh V,
In message4DD24BD6.5060608@ti.com you wrote:
Would it be possible to do this even _before_ relocation, so to speed up memory accesses during relocation? Of course, proper invalidates/ flushes will be needed before jumping to the RAM address, but I guess this would save a bit of boot times?
I intentionally kept it after relocation to avoid un-necessary complexities.
Yes, I can understand this. Do you plean to extend this to include relocation n a later step?
Relocation is a case of self-modifying code. In Harvard architectures like armv7 there will be coherency issues unless we flush the entire D-cache(range based operation may be equally or more expensive) and invalidate the entire I-cache.
I would not call this self-modifying code.
In ARM literature this kind of situations are also referred to as "self- modifying". Here is an excerpt from ARM manual gloassary:
Self-modifying code Is code that writes one or more instructions to memory and then executes them.
Regarding the need to flush/invalidate caches when jumping to RAM: yes, we have to flush the D-cache to make sure all data actually has hit the memory. But there should be no need to invalidate the I-cache as we have never been fetching any instructions from this address range, so there cannot be any incorrect entries in cache.
Or am I missing something?
This should be largely ok, but not fool-proof for all processors. For processors such as Cortex-A9 that have speculative and out-of-order execution there is no guarantee that any given memory location is not cached at any given point of time as long as the cache is enabled. Here is an excerpt from the ARMv7 manual(section B2.2.2):
"If a memory location is marked as Cacheable there is no mechanism by which it can be guaranteed not to be allocated to an enabled cache at any time. Any application must assume that any Cacheable memory location can be allocated to any enabled cache at any time."
So, in effect everything has to be flushed to memory before you jump to the new location. There may be a small advantage because flushing from cache allows for bursting to the DDR where as bursting is not possible when d-cache is disabled. But I think this is not worth the trouble.
Well, all the data copy will also use cached writes, which are much
You mean, you will do a range-flush on only the .text section's area?
faster. I think this will result in measurable time differences.
Relocation itself was not taking a huge amount of time in my measurement sometime back. It was about 16 ms for OMAP4.
best regards, Aneesh

Dear Aneesh V,
In message 4DD264B7.9060304@ti.com you wrote:
In ARM literature this kind of situations are also referred to as "self- modifying". Here is an excerpt from ARM manual gloassary:
I see. Thanks.
Well, all the data copy will also use cached writes, which are much
You mean, you will do a range-flush on only the .text section's area?
That should indeed be sufficient.
faster. I think this will result in measurable time differences.
Relocation itself was not taking a huge amount of time in my measurement sometime back. It was about 16 ms for OMAP4.
I was especially talking about the loading from external storage, not primarily relocation. This will even be mnore important if we had (much) bigger images - like when loading an OS kernel as second stage payload instead of U-Boot.
Best regards,
Wolfgang Denk

Hi Wolfgang,
On Tuesday 17 May 2011 05:58 PM, Wolfgang Denk wrote:
Dear Aneesh V,
In message4DD264B7.9060304@ti.com you wrote:
In ARM literature this kind of situations are also referred to as "self- modifying". Here is an excerpt from ARM manual gloassary:
I see. Thanks.
Well, all the data copy will also use cached writes, which are much
You mean, you will do a range-flush on only the .text section's area?
That should indeed be sufficient.
faster. I think this will result in measurable time differences.
Relocation itself was not taking a huge amount of time in my measurement sometime back. It was about 16 ms for OMAP4.
I was especially talking about the loading from external storage, not primarily relocation. This will even be mnore important if we had (much) bigger images - like when loading an OS kernel as second stage payload instead of U-Boot.
Are you talking about enabling D-cache in SPL? Technically, this should be possible as soon as DRAM is initialized(because internal RAMs typically do not have enough memory for page-tables(16KB) in addition to the SPL itself). But you might want to consider these.
1. We need to setup the page tables, invalidate the cache, enable it and before jumping to kernel flush and disable it again. Is that all worth just for speeding up the loading part? I think one must consider DMA for loading the image instead.
2. What happens if the payload is U-Boot? You still have the problem of flushing. Besides after reaching U-Boot do we setup MMU again or not? If we decide to retain the page-tables setup by SPL then U-Boot and SPL needs to align on the location of page-tables. This is difficult because U-Boot dynamically determines the memory for page-tables where as SPL doesn't have such intelligence. If we decide to do the MMU setup again, then it means setting up the page-tables twice in the bootloaders!
best regards, Aneesh

Dear Aneesh V,
In message 4DD27819.7010600@ti.com you wrote:
I was especially talking about the loading from external storage, not primarily relocation. This will even be mnore important if we had (much) bigger images - like when loading an OS kernel as second stage payload instead of U-Boot.
Are you talking about enabling D-cache in SPL? Technically, this should
I + D cache, actually.
be possible as soon as DRAM is initialized(because internal RAMs typically do not have enough memory for page-tables(16KB) in addition to the SPL itself). But you might want to consider these.
Ah, I see. Well, you see, I have the big advantage over you that my imagination is not limited by the ugly details of the real ARM hardware in question here ;-)
- We need to setup the page tables, invalidate the cache, enable it
and before jumping to kernel flush and disable it again. Is that all worth just for speeding up the loading part? I think one must consider DMA for loading the image instead.
If it's available, for sure :-) But then, for example when loading from NAND flash (which is probably the most common use case these days), would DMA help?
- What happens if the payload is U-Boot? You still have the problem of
flushing. Besides after reaching U-Boot do we setup MMU again or not? If we decide to retain the page-tables setup by SPL then U-Boot and SPL needs to align on the location of page-tables. This is difficult because U-Boot dynamically determines the memory for page-tables where as SPL doesn't have such intelligence. If we decide to do the MMU setup again, then it means setting up the page-tables twice in the bootloaders!
I guess in the general case we will have to do this twice. And Linux will do it again. It would be nice if we canmake this optional, like we can avoid the copy operation of the relocation part when we know we load to the final address (all this keeping in mind the requirements for ultra short boot times).
Best regards,
Wolfgang Denk

1. make sure that page table setup is not done multiple times 2. flush_dcache_all() is more appropriate while disabling cache than a range flush on the entire memory(flush_cache())
Provide a default implementation for flush_dcache_all() for backward compatibility and to avoid build issues.
Signed-off-by: Aneesh V aneesh@ti.com --- V2: * Fixed signature of flush_cache in cache.c --- arch/arm/lib/cache-cp15.c | 9 +++++++-- arch/arm/lib/cache.c | 11 +++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-)
diff --git a/arch/arm/lib/cache-cp15.c b/arch/arm/lib/cache-cp15.c index fd97c45..b1ccc3c 100644 --- a/arch/arm/lib/cache-cp15.c +++ b/arch/arm/lib/cache-cp15.c @@ -92,13 +92,18 @@ static inline void mmu_setup(void) set_cr(reg | CR_M); }
+static int mmu_enabled(void) +{ + return get_cr() & CR_M; +} + /* cache_bit must be either CR_I or CR_C */ static void cache_enable(uint32_t cache_bit) { uint32_t reg;
/* The data cache is not active unless the mmu is enabled too */ - if (cache_bit == CR_C) + if ((cache_bit == CR_C) && !mmu_enabled()) mmu_setup(); reg = get_cr(); /* get control reg. */ cp_delay(); @@ -117,7 +122,7 @@ static void cache_disable(uint32_t cache_bit) return; /* if disabling data cache, disable mmu too */ cache_bit |= CR_M; - flush_cache(0, ~0); + flush_dcache_all(); } reg = get_cr(); cp_delay(); diff --git a/arch/arm/lib/cache.c b/arch/arm/lib/cache.c index dc3242c..92b61a2 100644 --- a/arch/arm/lib/cache.c +++ b/arch/arm/lib/cache.c @@ -42,3 +42,14 @@ void __flush_cache(unsigned long start, unsigned long size) } void flush_cache(unsigned long start, unsigned long size) __attribute__((weak, alias("__flush_cache"))); + +/* + * Default implementation: + * do a range flush for the entire range + */ +void __flush_dcache_all(void) +{ + flush_cache(0, ~0); +} +void flush_dcache_all(void) + __attribute__((weak, alias("__flush_dcache_all")));

PL310 is the L2$ controller from ARM used in many SoCs including the Cortex-A9 based OMAP4430
Add support for some of the key PL310 operations - Invalidate all - Invalidate range - Flush(clean & invalidate) all - Flush range
Signed-off-by: Aneesh V aneesh@ti.com --- V2: * More descriptive commit message * Changes for function pointer to weakly linked change * C struct for register accesses --- README | 6 ++ arch/arm/include/asm/pl310.h | 74 +++++++++++++++++++++++++++ arch/arm/lib/Makefile | 1 + arch/arm/lib/cache-pl310.c | 116 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 197 insertions(+), 0 deletions(-) create mode 100644 arch/arm/include/asm/pl310.h create mode 100644 arch/arm/lib/cache-pl310.c
diff --git a/README b/README index c3b6bec..e04fed9 100644 --- a/README +++ b/README @@ -460,6 +460,12 @@ The following options need to be configured: CONFIG_SYS_NO_DCACHE - Do not enable data cache in U-Boot CONFIG_SYS_NO_L2CACHE- Do not enable L2 cache in U-Boot
+- Cache Configuration for ARM: + CONFIG_SYS_L2_PL310 - Enable support for ARM PL310 L2 cache + controller + CONFIG_SYS_PL310_BASE - Physical base address of PL310 + controller register space + - Serial Ports: CONFIG_PL010_SERIAL
diff --git a/arch/arm/include/asm/pl310.h b/arch/arm/include/asm/pl310.h new file mode 100644 index 0000000..ffc58e9 --- /dev/null +++ b/arch/arm/include/asm/pl310.h @@ -0,0 +1,74 @@ +/* + * (C) Copyright 2010 + * Texas Instruments Incorporated - http://www.ti.com/ + * + * Aneesh V aneesh@ti.com + * + * See file CREDITS for list of people who contributed to this + * project. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + */ +#ifndef _PL310_H_ +#define _PL310_H_ + +#include <linux/types.h> + +/* Register bit fields */ +#define PL310_AUX_CTRL_ASSOCIATIVITY_MASK (1 << 16) + +struct pl310_regs { + u32 pl310_cache_id; + u32 pl310_cache_type; + u32 pad1[62]; + u32 pl310_ctrl; + u32 pl310_aux_ctrl; + u32 pl310_tag_latency_ctrl; + u32 pl310_data_latency_ctrl; + u32 pad2[60]; + u32 pl310_event_cnt_ctrl; + u32 pl310_event_cnt1_cfg; + u32 pl310_event_cnt0_cfg; + u32 pl310_event_cnt1_val; + u32 pl310_event_cnt0_val; + u32 pl310_intr_mask; + u32 pl310_masked_intr_stat; + u32 pl310_raw_intr_stat; + u32 pl310_intr_clear; + u32 pad3[323]; + u32 pl310_cache_sync; + u32 pad4[15]; + u32 pl310_inv_line_pa; + u32 pad5[2]; + u32 pl310_inv_way; + u32 pad6[12]; + u32 pl310_clean_line_pa; + u32 pad7[1]; + u32 pl310_clean_line_idx; + u32 pl310_clean_way; + u32 pad8[12]; + u32 pl310_clean_inv_line_pa; + u32 pad9[1]; + u32 pl310_clean_inv_line_idx; + u32 pl310_clean_inv_way; +}; + +void pl310_inval_all(void); +void pl310_clean_inval_all(void); +void pl310_inval_range(u32 start, u32 end); +void pl310_clean_inval_range(u32 start, u32 end); + +#endif diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index 03b1b5e..ce374a5 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -42,6 +42,7 @@ COBJS-y += cache.o ifndef CONFIG_SYS_NO_CP15_CACHE COBJS-y += cache-cp15.o endif +COBJS-$(CONFIG_SYS_L2_PL310) += cache-pl310.o COBJS-y += interrupts.o COBJS-y += reset.o SOBJS-$(CONFIG_USE_ARCH_MEMSET) += memset.o diff --git a/arch/arm/lib/cache-pl310.c b/arch/arm/lib/cache-pl310.c new file mode 100644 index 0000000..f55c63a --- /dev/null +++ b/arch/arm/lib/cache-pl310.c @@ -0,0 +1,116 @@ +/* + * (C) Copyright 2010 + * Texas Instruments Incorporated - http://www.ti.com/ + * + * Aneesh V aneesh@ti.com + * + * See file CREDITS for list of people who contributed to this + * project. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + */ +#include <linux/types.h> +#include <asm/io.h> +#include <asm/armv7.h> +#include <asm/pl310.h> +#include <config.h> + +struct pl310_regs *const pl310 = (struct pl310_regs *)CONFIG_SYS_PL310_BASE; + +static void pl310_cache_sync(void) +{ + writel(0, &pl310->pl310_cache_sync); +} + +static void pl310_background_op_all_ways(u32 *op_reg) +{ + u32 assoc_16, associativity, way_mask; + + assoc_16 = readl(&pl310->pl310_aux_ctrl) & + PL310_AUX_CTRL_ASSOCIATIVITY_MASK; + if (assoc_16) + associativity = 16; + else + associativity = 8; + + way_mask = (1 << associativity) - 1; + /* Invalidate all ways */ + writel(way_mask, op_reg); + /* Wait for all ways to be invalidated */ + while (readl(op_reg) && way_mask) + ; + pl310_cache_sync(); +} + +void v7_outer_cache_inval_all(void) +{ + pl310_background_op_all_ways(&pl310->pl310_inv_way); +} + +void v7_outer_cache_flush_all(void) +{ + pl310_background_op_all_ways(&pl310->pl310_clean_inv_way); +} + +/* Flush(clean invalidate) memory from start to stop-1 */ +void v7_outer_cache_flush_range(u32 start, u32 stop) +{ + /* PL310 currently supports only 32 bytes cache line */ + u32 pa, line_size = 32; + + /* + * Align to the beginning of cache-line - this ensures that + * the first 5 bits are 0 as required by PL310 TRM + */ + start &= ~(line_size - 1); + + for (pa = start; pa < stop; pa = pa + line_size) + writel(pa, &pl310->pl310_clean_inv_line_pa); + + pl310_cache_sync(); +} + +/* invalidate memory from start to stop-1 */ +void v7_outer_cache_inval_range(u32 start, u32 stop) +{ + /* PL310 currently supports only 32 bytes cache line */ + u32 pa, line_size = 32; + + /* + * If start address is not aligned to cache-line flush the first + * line to prevent affecting somebody else's buffer + */ + if (start & (line_size - 1)) { + v7_outer_cache_flush_range(start, start + 1); + /* move to next cache line */ + start = (start + line_size - 1) & ~(line_size - 1); + } + + /* + * If stop address is not aligned to cache-line flush the last + * line to prevent affecting somebody else's buffer + */ + if (stop & (line_size - 1)) { + v7_outer_cache_flush_range(stop, stop + 1); + /* align to the beginning of this cache line */ + stop &= ~(line_size - 1); + } + + for (pa = start; pa < stop; pa = pa + line_size) + writel(pa, &pl310->pl310_inv_line_pa); + + pl310_cache_sync(); +}

adapt omap4 to the new layered cache maintenance framework
Signed-off-by: Aneesh V aneesh@ti.com --- V2: * Changes for the function pointer to weakly linked change --- arch/arm/cpu/armv7/omap4/lowlevel_init.S | 18 ++++++++++++++++++ arch/arm/include/asm/arch-omap4/sys_proto.h | 1 - include/configs/omap4_panda.h | 8 +++++--- include/configs/omap4_sdp4430.h | 8 +++++--- 4 files changed, 28 insertions(+), 7 deletions(-)
diff --git a/arch/arm/cpu/armv7/omap4/lowlevel_init.S b/arch/arm/cpu/armv7/omap4/lowlevel_init.S index 026dfa4..5e6c16f 100644 --- a/arch/arm/cpu/armv7/omap4/lowlevel_init.S +++ b/arch/arm/cpu/armv7/omap4/lowlevel_init.S @@ -45,3 +45,21 @@ lowlevel_init: */ bl s_init pop {ip, pc} + +set_pl310_ctrl_reg: + PUSH {r4-r11, lr} @ save registers - ROM code may pollute + @ our registers + LDR r12, =0x102 @ Set PL310 control register - value in R0 + .word 0xe1600070 @ SMC #0 - hand assembled because -march=armv5 + @ call ROM Code API to set control register + POP {r4-r11, pc} + +.globl v7_outer_cache_enable +v7_outer_cache_enable: + MOV r0, #1 + B set_pl310_ctrl_reg + +.globl v7_outer_cache_disable +v7_outer_cache_disable: + MOV r0, #0 + B set_pl310_ctrl_reg diff --git a/arch/arm/include/asm/arch-omap4/sys_proto.h b/arch/arm/include/asm/arch-omap4/sys_proto.h index 4813e9e..017f216 100644 --- a/arch/arm/include/asm/arch-omap4/sys_proto.h +++ b/arch/arm/include/asm/arch-omap4/sys_proto.h @@ -31,7 +31,6 @@ struct omap_sysinfo { void gpmc_init(void); void watchdog_init(void); u32 get_device_type(void); -void invalidate_dcache(u32); void set_muxconf_regs(void); void sr32(void *, u32, u32, u32); u32 wait_on_value(u32, u32, void *, u32); diff --git a/include/configs/omap4_panda.h b/include/configs/omap4_panda.h index 12daebc..9acd340 100644 --- a/include/configs/omap4_panda.h +++ b/include/configs/omap4_panda.h @@ -45,9 +45,6 @@ #define CONFIG_DISPLAY_CPUINFO 1 #define CONFIG_DISPLAY_BOARDINFO 1
-/* Keep L2 Cache Disabled */ -#define CONFIG_SYS_NO_L2CACHE 1 - /* Clock Defines */ #define V_OSCK 38400000 /* Clock output from T2 */ #define V_SCLK V_OSCK @@ -236,4 +233,9 @@ CONFIG_SYS_INIT_RAM_SIZE - \ GENERATED_GBL_DATA_SIZE)
+#ifndef CONFIG_SYS_NO_L2CACHE +#define CONFIG_SYS_L2_PL310 1 +#define CONFIG_SYS_PL310_BASE 0x48242000 +#endif + #endif /* __CONFIG_H */ diff --git a/include/configs/omap4_sdp4430.h b/include/configs/omap4_sdp4430.h index 673cd5c..0fd08b3 100644 --- a/include/configs/omap4_sdp4430.h +++ b/include/configs/omap4_sdp4430.h @@ -46,9 +46,6 @@ #define CONFIG_DISPLAY_CPUINFO 1 #define CONFIG_DISPLAY_BOARDINFO 1
-/* Keep L2 Cache Disabled */ -#define CONFIG_SYS_NO_L2CACHE 1 - /* Clock Defines */ #define V_OSCK 38400000 /* Clock output from T2 */ #define V_SCLK V_OSCK @@ -242,4 +239,9 @@ CONFIG_SYS_INIT_RAM_SIZE - \ GENERATED_GBL_DATA_SIZE)
+#ifndef CONFIG_SYS_NO_L2CACHE +#define CONFIG_SYS_L2_PL310 1 +#define CONFIG_SYS_PL310_BASE 0x48242000 +#endif + #endif /* __CONFIG_H */

Dear Aneesh V,
In message 1305202276-27784-9-git-send-email-aneesh@ti.com you wrote:
adapt omap4 to the new layered cache maintenance framework
NAK because of the CONFIG_SYS_NO_*CACHE names.
Best regards,
Wolfgang Denk

adapt omap3 to the new layered cache maintenance framework
Signed-off-by: Aneesh V aneesh@ti.com --- V2: * Changes for the function pointer to weakly linked change --- arch/arm/cpu/armv7/omap3/Makefile | 1 - arch/arm/cpu/armv7/omap3/board.c | 138 ++++++++++++-- arch/arm/cpu/armv7/omap3/cache.S | 263 --------------------------- arch/arm/cpu/armv7/omap3/lowlevel_init.S | 32 ++++ arch/arm/include/asm/arch-omap3/omap3.h | 20 ++ arch/arm/include/asm/arch-omap3/sys_proto.h | 10 +- 6 files changed, 178 insertions(+), 286 deletions(-) delete mode 100644 arch/arm/cpu/armv7/omap3/cache.S
diff --git a/arch/arm/cpu/armv7/omap3/Makefile b/arch/arm/cpu/armv7/omap3/Makefile index 7164d50..522bcd2 100644 --- a/arch/arm/cpu/armv7/omap3/Makefile +++ b/arch/arm/cpu/armv7/omap3/Makefile @@ -26,7 +26,6 @@ include $(TOPDIR)/config.mk LIB = $(obj)lib$(SOC).o
SOBJS := lowlevel_init.o -SOBJS += cache.o
COBJS += board.o COBJS += clock.o diff --git a/arch/arm/cpu/armv7/omap3/board.c b/arch/arm/cpu/armv7/omap3/board.c index 6c2a132..39866e0 100644 --- a/arch/arm/cpu/armv7/omap3/board.c +++ b/arch/arm/cpu/armv7/omap3/board.c @@ -37,8 +37,12 @@ #include <asm/arch/sys_proto.h> #include <asm/arch/mem.h> #include <asm/cache.h> +#include <asm/armv7.h>
+/* Declarations */ extern omap3_sysinfo sysinfo; +static void omap3_setup_aux_cr(void); +static void omap3_invalidate_l2_cache_secure(void);
/****************************************************************************** * Routine: delay @@ -166,27 +170,13 @@ void s_init(void)
try_unlock_memory();
- /* - * Right now flushing at low MPU speed. - * Need to move after clock init - */ - invalidate_dcache(get_device_type()); -#ifndef CONFIG_ICACHE_OFF - icache_enable(); -#endif + /* Errata workarounds */ + omap3_setup_aux_cr();
-#ifdef CONFIG_L2_OFF - l2_cache_disable(); -#else - l2_cache_enable(); +#ifndef CONFIG_SYS_NO_L2CACHE + /* Invalidate L2-cache from secure mode */ + omap3_invalidate_l2_cache_secure(); #endif - /* - * Writing to AuxCR in U-boot using SMI for GP DEV - * Currently SMI in Kernel on ES2 devices seems to have an issue - * Once that is resolved, we can postpone this config to kernel - */ - if (get_device_type() == GP_DEVICE) - setup_auxcr();
set_muxconf_regs(); delay(100); @@ -292,3 +282,113 @@ int checkboard (void) return 0; } #endif /* CONFIG_DISPLAY_BOARDINFO */ + +static void omap3_emu_romcode_call(u32 service_id, u32 *parameters) +{ + u32 i, num_params = *parameters; + u32 *sram_scratch_space = (u32 *)OMAP3_PUBLIC_SRAM_SCRATCH_AREA; + + /* + * copy the parameters to an un-cached area to avoid coherency + * issues + */ + for (i = 0; i < num_params; i++) { + __raw_writel(*parameters, sram_scratch_space); + parameters++; + sram_scratch_space++; + } + + /* Now make the PPA call */ + do_omap3_emu_romcode_call(service_id, OMAP3_PUBLIC_SRAM_SCRATCH_AREA); +} + +static void omap3_update_aux_cr_secure(u32 set_bits, u32 clear_bits) +{ + u32 acr; + + /* Read ACR */ + asm volatile ("mrc p15, 0, %0, c1, c0, 1" : "=r" (acr)); + acr &= ~clear_bits; + acr |= set_bits; + + if (get_device_type() == GP_DEVICE) { + omap3_gp_romcode_call(OMAP3_GP_ROMCODE_API_WRITE_ACR, + acr); + } else { + struct emu_hal_params emu_romcode_params; + emu_romcode_params.num_params = 1; + emu_romcode_params.param1 = acr; + omap3_emu_romcode_call(OMAP3_EMU_HAL_API_WRITE_ACR, + (u32 *)&emu_romcode_params); + } +} + +static void omap3_update_aux_cr(u32 set_bits, u32 clear_bits) +{ + u32 acr; + + /* Read ACR */ + asm volatile ("mrc p15, 0, %0, c1, c0, 1" : "=r" (acr)); + acr &= ~clear_bits; + acr |= set_bits; + + /* Write ACR - affects non-secure banked bits */ + asm volatile ("mcr p15, 0, %0, c1, c0, 1" : : "r" (acr)); +} + +static void omap3_setup_aux_cr(void) +{ + /* Workaround for Cortex-A8 errata: #454179 #430973 + * Set "IBE" bit + * Set "Disable Brach Size Mispredicts" bit + * Workaround for erratum #621766 + * Enable L1NEON bit + * ACR |= (IBE | DBSM | L1NEON) => ACR |= 0xE0 + */ + omap3_update_aux_cr_secure(0xE0, 0); +} + +#if !defined(CONFIG_SYS_NO_DCACHE) && !defined(CONFIG_SYS_NO_L2CACHE) + +/* Invalidate the entire L2 cache from secure mode */ +static void omap3_invalidate_l2_cache_secure(void) +{ + if (get_device_type() == GP_DEVICE) { + omap3_gp_romcode_call(OMAP3_GP_ROMCODE_API_L2_INVAL, + 0); + } else { + struct emu_hal_params emu_romcode_params; + emu_romcode_params.num_params = 1; + emu_romcode_params.param1 = 0; + omap3_emu_romcode_call(OMAP3_EMU_HAL_API_L2_INVAL, + (u32 *)&emu_romcode_params); + } +} + +void v7_outer_cache_enable(void) +{ + /* Set L2EN */ + omap3_update_aux_cr_secure(0x2, 0); + + /* + * On some revisions L2EN bit is banked on some revisions it's not + * No harm in setting both banked bits(in fact this is required + * by an erratum) + */ + omap3_update_aux_cr(0x2, 0); +} + +void v7_outer_cache_disable(void) +{ + /* Clear L2EN */ + omap3_update_aux_cr_secure(0, 0x2); + + /* + * On some revisions L2EN bit is banked on some revisions it's not + * No harm in clearing both banked bits(in fact this is required + * by an erratum) + */ + omap3_update_aux_cr(0, 0x2); +} + +#endif diff --git a/arch/arm/cpu/armv7/omap3/cache.S b/arch/arm/cpu/armv7/omap3/cache.S deleted file mode 100644 index cda87ba..0000000 --- a/arch/arm/cpu/armv7/omap3/cache.S +++ /dev/null @@ -1,263 +0,0 @@ -/* - * Copyright (c) 2009 Wind River Systems, Inc. - * Tom Rix Tom.Rix@windriver.com - * - * This file is based on and replaces the existing cache.c file - * The copyrights for the cache.c file are: - * - * (C) Copyright 2008 Texas Insturments - * - * (C) Copyright 2002 - * Sysgo Real-Time Solutions, GmbH <www.elinos.com> - * Marius Groeger mgroeger@sysgo.de - * - * (C) Copyright 2002 - * Gary Jennejohn, DENX Software Engineering, gj@denx.de - * - * See file CREDITS for list of people who contributed to this - * project. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA - */ - -#include <asm/arch/omap3.h> - -/* - * omap3 cache code - */ - -.align 5 -.global invalidate_dcache -.global l2_cache_enable -.global l2_cache_disable -.global setup_auxcr - -/* - * invalidate_dcache() - * - * Invalidate the whole D-cache. - * - * Corrupted registers: r0-r5, r7, r9-r11 - * - * - mm - mm_struct describing address space - */ -invalidate_dcache: - stmfd r13!, {r0 - r5, r7, r9 - r12, r14} - - mov r7, r0 @ take a backup of device type - cmp r0, #0x3 @ check if the device type is - @ GP - moveq r12, #0x1 @ set up to invalide L2 -smi: .word 0x01600070 @ Call SMI monitor (smieq) - cmp r7, #0x3 @ compare again in case its - @ lost - beq finished_inval @ if GP device, inval done - @ above - - mrc p15, 1, r0, c0, c0, 1 @ read clidr - ands r3, r0, #0x7000000 @ extract loc from clidr - mov r3, r3, lsr #23 @ left align loc bit field - beq finished_inval @ if loc is 0, then no need to - @ clean - mov r10, #0 @ start clean at cache level 0 -inval_loop1: - add r2, r10, r10, lsr #1 @ work out 3x current cache - @ level - mov r1, r0, lsr r2 @ extract cache type bits from - @ clidr - and r1, r1, #7 @ mask of the bits for current - @ cache only - cmp r1, #2 @ see what cache we have at - @ this level - blt skip_inval @ skip if no cache, or just - @ i-cache - mcr p15, 2, r10, c0, c0, 0 @ select current cache level - @ in cssr - mov r2, #0 @ operand for mcr SBZ - mcr p15, 0, r2, c7, c5, 4 @ flush prefetch buffer to - @ sych the new cssr&csidr, - @ with armv7 this is 'isb', - @ but we compile with armv5 - mrc p15, 1, r1, c0, c0, 0 @ read the new csidr - and r2, r1, #7 @ extract the length of the - @ cache lines - add r2, r2, #4 @ add 4 (line length offset) - ldr r4, =0x3ff - ands r4, r4, r1, lsr #3 @ find maximum number on the - @ way size - clz r5, r4 @ find bit position of way - @ size increment - ldr r7, =0x7fff - ands r7, r7, r1, lsr #13 @ extract max number of the - @ index size -inval_loop2: - mov r9, r4 @ create working copy of max - @ way size -inval_loop3: - orr r11, r10, r9, lsl r5 @ factor way and cache number - @ into r11 - orr r11, r11, r7, lsl r2 @ factor index number into r11 - mcr p15, 0, r11, c7, c6, 2 @ invalidate by set/way - subs r9, r9, #1 @ decrement the way - bge inval_loop3 - subs r7, r7, #1 @ decrement the index - bge inval_loop2 -skip_inval: - add r10, r10, #2 @ increment cache number - cmp r3, r10 - bgt inval_loop1 -finished_inval: - mov r10, #0 @ swith back to cache level 0 - mcr p15, 2, r10, c0, c0, 0 @ select current cache level - @ in cssr - mcr p15, 0, r10, c7, c5, 4 @ flush prefetch buffer, - @ with armv7 this is 'isb', - @ but we compile with armv5 - - ldmfd r13!, {r0 - r5, r7, r9 - r12, pc} - -l2_cache_set: - stmfd r13!, {r4 - r6, lr} - mov r5, r0 - bl get_cpu_rev - mov r4, r0 - bl get_cpu_family - @ ES2 onwards we can disable/enable L2 ourselves - cmp r0, #CPU_OMAP34XX - cmpeq r4, #CPU_3XX_ES10 - mrc 15, 0, r0, cr1, cr0, 1 - bic r0, r0, #2 - orr r0, r0, r5, lsl #1 - mcreq 15, 0, r0, cr1, cr0, 1 - @ GP Device ROM code API usage here - @ r12 = AUXCR Write function and r0 value - mov ip, #3 - @ SMCNE instruction to call ROM Code API - .word 0x11600070 - ldmfd r13!, {r4 - r6, pc} - -l2_cache_enable: - mov r0, #1 - b l2_cache_set - -l2_cache_disable: - mov r0, #0 - b l2_cache_set - -/****************************************************************************** - * Routine: setup_auxcr() - * Description: Write to AuxCR desired value using SMI. - * general use. - *****************************************************************************/ -setup_auxcr: - mrc p15, 0, r0, c0, c0, 0 @ read main ID register - and r2, r0, #0x00f00000 @ variant - and r3, r0, #0x0000000f @ revision - orr r1, r3, r2, lsr #20-4 @ combine variant and revision - mov r12, #0x3 - mrc p15, 0, r0, c1, c0, 1 - orr r0, r0, #0x10 @ Enable ASA - @ Enable L1NEON on pre-r2p1 (erratum 621766 workaround) - cmp r1, #0x21 - orrlt r0, r0, #1 << 5 - .word 0xE1600070 @ SMC - mov r12, #0x2 - mrc p15, 1, r0, c9, c0, 2 - @ Set PLD_FWD bit in L2AUXCR on pre-r2p1 (erratum 725233 workaround) - cmp r1, #0x21 - orrlt r0, r0, #1 << 27 - .word 0xE1600070 @ SMC - bx lr - -.align 5 -.global v7_flush_dcache_all -.global v7_flush_cache_all - -/* - * v7_flush_dcache_all() - * - * Flush the whole D-cache. - * - * Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode) - * - * - mm - mm_struct describing address space - */ -v7_flush_dcache_all: -# dmb @ ensure ordering with previous memory accesses - mrc p15, 1, r0, c0, c0, 1 @ read clidr - ands r3, r0, #0x7000000 @ extract loc from clidr - mov r3, r3, lsr #23 @ left align loc bit field - beq finished @ if loc is 0, then no need to clean - mov r10, #0 @ start clean at cache level 0 -loop1: - add r2, r10, r10, lsr #1 @ work out 3x current cache level - mov r1, r0, lsr r2 @ extract cache type bits from clidr - and r1, r1, #7 @ mask of the bits for current cache only - cmp r1, #2 @ see what cache we have at this level - blt skip @ skip if no cache, or just i-cache - mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr - mcr p15, 0, r10, c7, c5, 4 @ flush prefetch buffer, - @ with armv7 this is 'isb', - @ but we compile with armv5 - mrc p15, 1, r1, c0, c0, 0 @ read the new csidr - and r2, r1, #7 @ extract the length of the cache lines - add r2, r2, #4 @ add 4 (line length offset) - ldr r4, =0x3ff - ands r4, r4, r1, lsr #3 @ find maximum number on the way size - clz r5, r4 @ find bit position of way size increment - ldr r7, =0x7fff - ands r7, r7, r1, lsr #13 @ extract max number of the index size -loop2: - mov r9, r4 @ create working copy of max way size -loop3: - orr r11, r10, r9, lsl r5 @ factor way and cache number into r11 - orr r11, r11, r7, lsl r2 @ factor index number into r11 - mcr p15, 0, r11, c7, c14, 2 @ clean & invalidate by set/way - subs r9, r9, #1 @ decrement the way - bge loop3 - subs r7, r7, #1 @ decrement the index - bge loop2 -skip: - add r10, r10, #2 @ increment cache number - cmp r3, r10 - bgt loop1 -finished: - mov r10, #0 @ swith back to cache level 0 - mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr -# dsb - mcr p15, 0, r10, c7, c5, 4 @ flush prefetch buffer, - @ with armv7 this is 'isb', - @ but we compile with armv5 - mov pc, lr - -/* - * v7_flush_cache_all() - * - * Flush the entire cache system. - * The data cache flush is now achieved using atomic clean / invalidates - * working outwards from L1 cache. This is done using Set/Way based cache - * maintainance instructions. - * The instruction cache can still be invalidated back to the point of - * unification in a single instruction. - * - */ -v7_flush_cache_all: - stmfd sp!, {r0-r7, r9-r11, lr} - bl v7_flush_dcache_all - mov r0, #0 - mcr p15, 0, r0, c7, c5, 0 @ I+BTB cache invalidate - ldmfd sp!, {r0-r7, r9-r11, lr} - mov pc, lr diff --git a/arch/arm/cpu/armv7/omap3/lowlevel_init.S b/arch/arm/cpu/armv7/omap3/lowlevel_init.S index 1458072..67e8ceb 100644 --- a/arch/arm/cpu/armv7/omap3/lowlevel_init.S +++ b/arch/arm/cpu/armv7/omap3/lowlevel_init.S @@ -35,6 +35,38 @@ _TEXT_BASE: .word CONFIG_SYS_TEXT_BASE /* sdram load addr from config.mk */
+.global omap3_gp_romcode_call +omap3_gp_romcode_call: + PUSH {r4-r12, lr} @ Save all registers from ROM code! + MOV r12, r0 @ Copy the Service ID in R12 + MOV r0, r1 @ Copy parameter to R0 + mcr p15, 0, r0, c7, c10, 4 @ DSB + mcr p15, 0, r0, c7, c10, 5 @ DMB + .word 0xe1600070 @ SMC #0 to enter monitor - hand assembled + @ because we use -march=armv5 + POP {r4-r12, pc} + +/* + * Funtion for making PPA HAL API calls in secure devices + * Input: + * R0 - Service ID + * R1 - paramer list + */ +.global do_omap3_emu_romcode_call +do_omap3_emu_romcode_call: + PUSH {r4-r12, lr} @ Save all registers from ROM code! + MOV r12, r0 @ Copy the Secure Service ID in R12 + MOV r3, r1 @ Copy the pointer to va_list in R3 + MOV r1, #0 @ Process ID - 0 + MOV r2, #OMAP3_EMU_HAL_START_HAL_CRITICAL @ Copy the pointer + @ to va_list in R3 + MOV r6, #0xFF @ Indicate new Task call + mcr p15, 0, r0, c7, c10, 4 @ DSB + mcr p15, 0, r0, c7, c10, 5 @ DMB + .word 0xe1600071 @ SMC #1 to call PPA service - hand assembled + @ because we use -march=armv5 + POP {r4-r12, pc} + #if !defined(CONFIG_SYS_NAND_BOOT) && !defined(CONFIG_SYS_NAND_BOOT) /************************************************************************** * cpy_clk_code: relocates clock code into SRAM where its safer to execute diff --git a/arch/arm/include/asm/arch-omap3/omap3.h b/arch/arm/include/asm/arch-omap3/omap3.h index cc2b541..d9d49da 100644 --- a/arch/arm/include/asm/arch-omap3/omap3.h +++ b/arch/arm/include/asm/arch-omap3/omap3.h @@ -159,8 +159,14 @@ struct gpio { #define SRAM_VECT_CODE (SRAM_OFFSET0 | SRAM_OFFSET1 | \ SRAM_OFFSET2)
+#define OMAP3_PUBLIC_SRAM_BASE 0x40208000 /* Works for GP & EMU */ +#define OMAP3_PUBLIC_SRAM_END 0x40210000 + #define LOW_LEVEL_SRAM_STACK 0x4020FFFC
+/* scratch area - accessible on both EMU and GP */ +#define OMAP3_PUBLIC_SRAM_SCRATCH_AREA OMAP3_PUBLIC_SRAM_BASE + #define DEBUG_LED1 149 /* gpio */ #define DEBUG_LED2 150 /* gpio */
@@ -227,4 +233,18 @@ struct gpio {
#define OMAP3730 0x0c00
+/* + * ROM code API related flags + */ +#define OMAP3_GP_ROMCODE_API_L2_INVAL 1 +#define OMAP3_GP_ROMCODE_API_WRITE_ACR 3 + +/* + * EMU device PPA HAL related flags + */ +#define OMAP3_EMU_HAL_API_L2_INVAL 40 +#define OMAP3_EMU_HAL_API_WRITE_ACR 42 + +#define OMAP3_EMU_HAL_START_HAL_CRITICAL 4 + #endif diff --git a/arch/arm/include/asm/arch-omap3/sys_proto.h b/arch/arm/include/asm/arch-omap3/sys_proto.h index 4a28ba1..995e7cb 100644 --- a/arch/arm/include/asm/arch-omap3/sys_proto.h +++ b/arch/arm/include/asm/arch-omap3/sys_proto.h @@ -27,6 +27,11 @@ typedef struct { char *nand_string; } omap3_sysinfo;
+struct emu_hal_params { + u32 num_params; + u32 param1; +}; + void prcm_init(void); void per_clocks_enable(void);
@@ -53,9 +58,7 @@ u32 is_running_in_sdram(void); u32 is_running_in_sram(void); u32 is_running_in_flash(void); u32 get_device_type(void); -void l2cache_enable(void); void secureworld_exit(void); -void setup_auxcr(void); void try_unlock_memory(void); u32 get_boot_type(void); void invalidate_dcache(u32); @@ -66,5 +69,6 @@ void make_cs1_contiguous(void); void omap_nand_switch_ecc(int); void power_init_r(void); void dieid_num_r(void); - +void do_omap3_emu_romcode_call(u32 service_id, u32 parameters); +void omap3_gp_romcode_call(u32 service_id, u32 parameter); #endif

Dear Aneesh V,
In message 1305202276-27784-10-git-send-email-aneesh@ti.com you wrote:
adapt omap3 to the new layered cache maintenance framework
Signed-off-by: Aneesh V aneesh@ti.com
V2:
- Changes for the function pointer to weakly linked change
arch/arm/cpu/armv7/omap3/Makefile | 1 - arch/arm/cpu/armv7/omap3/board.c | 138 ++++++++++++-- arch/arm/cpu/armv7/omap3/cache.S | 263 --------------------------- arch/arm/cpu/armv7/omap3/lowlevel_init.S | 32 ++++ arch/arm/include/asm/arch-omap3/omap3.h | 20 ++ arch/arm/include/asm/arch-omap3/sys_proto.h | 10 +- 6 files changed, 178 insertions(+), 286 deletions(-) delete mode 100644 arch/arm/cpu/armv7/omap3/cache.S
NAK because of the CONFIG_SYS_NO_*CACHE names.
Best regards,
Wolfgang Denk

adapt s5pc1xx to the new layered cache maintenance framework
Signed-off-by: Aneesh V aneesh@ti.com --- V2: * Changes for the function pointer to weakly linked change --- arch/arm/cpu/armv7/s5pc1xx/cache.S | 88 ++----------------------- arch/arm/include/asm/arch-s5pc1xx/sys_proto.h | 3 - 2 files changed, 7 insertions(+), 84 deletions(-)
diff --git a/arch/arm/cpu/armv7/s5pc1xx/cache.S b/arch/arm/cpu/armv7/s5pc1xx/cache.S index 7734b32..726afe7 100644 --- a/arch/arm/cpu/armv7/s5pc1xx/cache.S +++ b/arch/arm/cpu/armv7/s5pc1xx/cache.S @@ -23,98 +23,24 @@ * MA 02111-1307 USA */
-#include <asm/arch/cpu.h> - .align 5 -.global invalidate_dcache -.global l2_cache_enable -.global l2_cache_disable - -/* - * invalidate_dcache() - * Invalidate the whole D-cache. - * - * Corrupted registers: r0-r5, r7, r9-r11 - */ -invalidate_dcache: - stmfd r13!, {r0 - r5, r7, r9 - r12, r14} - - cmp r0, #0xC100 @ check if the cpu is s5pc100
- beq finished_inval @ s5pc100 doesn't need this - @ routine - mrc p15, 1, r0, c0, c0, 1 @ read clidr - ands r3, r0, #0x7000000 @ extract loc from clidr - mov r3, r3, lsr #23 @ left align loc bit field - beq finished_inval @ if loc is 0, then no need to - @ clean - mov r10, #0 @ start clean at cache level 0 -inval_loop1: - add r2, r10, r10, lsr #1 @ work out 3x current cache - @ level - mov r1, r0, lsr r2 @ extract cache type bits from - @ clidr - and r1, r1, #7 @ mask of the bits for current - @ cache only - cmp r1, #2 @ see what cache we have at - @ this level - blt skip_inval @ skip if no cache, or just - @ i-cache - mcr p15, 2, r10, c0, c0, 0 @ select current cache level - @ in cssr - mov r2, #0 @ operand for mcr SBZ - mcr p15, 0, r2, c7, c5, 4 @ flush prefetch buffer to - @ sych the new cssr&csidr, - @ with armv7 this is 'isb', - @ but we compile with armv5 - mrc p15, 1, r1, c0, c0, 0 @ read the new csidr - and r2, r1, #7 @ extract the length of the - @ cache lines - add r2, r2, #4 @ add 4 (line length offset) - ldr r4, =0x3ff - ands r4, r4, r1, lsr #3 @ find maximum number on the - @ way size - clz r5, r4 @ find bit position of way - @ size increment - ldr r7, =0x7fff - ands r7, r7, r1, lsr #13 @ extract max number of the - @ index size -inval_loop2: - mov r9, r4 @ create working copy of max - @ way size -inval_loop3: - orr r11, r10, r9, lsl r5 @ factor way and cache number - @ into r11 - orr r11, r11, r7, lsl r2 @ factor index number into r11 - mcr p15, 0, r11, c7, c6, 2 @ invalidate by set/way - subs r9, r9, #1 @ decrement the way - bge inval_loop3 - subs r7, r7, #1 @ decrement the index - bge inval_loop2 -skip_inval: - add r10, r10, #2 @ increment cache number - cmp r3, r10 - bgt inval_loop1 -finished_inval: - mov r10, #0 @ swith back to cache level 0 - mcr p15, 2, r10, c0, c0, 0 @ select current cache level - @ in cssr - mcr p15, 0, r10, c7, c5, 4 @ flush prefetch buffer, - @ with armv7 this is 'isb', - @ but we compile with armv5 +#if !defined(CONFIG_SYS_NO_DCACHE) && !defined(CONFIG_SYS_NO_L2CACHE)
- ldmfd r13!, {r0 - r5, r7, r9 - r12, pc} - -l2_cache_enable: +.global v7_outer_cache_enable +v7_outer_cache_enable: push {r0, r1, r2, lr} mrc 15, 0, r3, cr1, cr0, 1 orr r3, r3, #2 mcr 15, 0, r3, cr1, cr0, 1 pop {r1, r2, r3, pc}
-l2_cache_disable: +.global v7_outer_cache_disable +v7_outer_cache_disable: push {r0, r1, r2, lr} mrc 15, 0, r3, cr1, cr0, 1 bic r3, r3, #2 mcr 15, 0, r3, cr1, cr0, 1 pop {r1, r2, r3, pc} + +#endif diff --git a/arch/arm/include/asm/arch-s5pc1xx/sys_proto.h b/arch/arm/include/asm/arch-s5pc1xx/sys_proto.h index 3078aaf..7b83c5a 100644 --- a/arch/arm/include/asm/arch-s5pc1xx/sys_proto.h +++ b/arch/arm/include/asm/arch-s5pc1xx/sys_proto.h @@ -25,8 +25,5 @@ #define _SYS_PROTO_H_
u32 get_device_type(void); -void invalidate_dcache(u32); -void l2_cache_disable(void); -void l2_cache_enable(void);
#endif

Dear Aneesh V,
In message 1305202276-27784-11-git-send-email-aneesh@ti.com you wrote:
adapt s5pc1xx to the new layered cache maintenance framework
Signed-off-by: Aneesh V aneesh@ti.com
V2:
- Changes for the function pointer to weakly linked change
arch/arm/cpu/armv7/s5pc1xx/cache.S | 88 ++----------------------- arch/arm/include/asm/arch-s5pc1xx/sys_proto.h | 3 - 2 files changed, 7 insertions(+), 84 deletions(-)
NAK because of the CONFIG_SYS_NO_*CACHE names.
Best regards,
Wolfgang Denk

With D-cache and MMU enabled for ARM in u-boot it becomes imperative to support a minimal set of cache maintenance operations and necessary initializations before enabling MMU.
This series of patches attempt to do the following for armv7: * Necessary initialization sequence before enabling MMU that includes invalidation of TLB, data caches, branch predictor array etc. * Framework for supporting SOC specific outer caches in a generic manner (using a structure of function pointers - inspired by the Linux implementation) * Generic armv7 cache maintenance operations for caches known to the CPU * Support for ARM PL310 L2 cache controller used in OMAP4 * Cleanup of the cleanup_before_linux() function * Adapting all armv7 SOCs to use the new framework and removing duplicated code
Testing: * Extensive testing on OMAP4430SDP and OMAP3430SDP by creating coherency issues and solving them using the maintenance routines - Eg: memfill a region of memory with a known pattern - Invalidate the region - Read back and compare the region with the original pattern - If match fails it means that invalidate is successful - Now add a flush call just before the invalidate - If match succeeds it means that flush was successful - Outer caches were tested with experiments involving making the function pointers NULL * Kernel booting on OMAP4430SDP and OMAP3430SDP Note: v2 has been tested only on OMAP4430SDP
V2: * Pointer based callback mechanism for outer cache operations changed to a weakly linked functions. * Change -march=armv7-a back to armv5 * Moved utility macros out of armv7.h * Added documentation for new CONFIG options. * Changed implementation of log2n to not use CLZ instruction as armv4 doesn't support this instruction and newly added Tegra2 uses -march=armv4 * Blank line after local variable declarations - fixed globally * Explicitly added an empty flush_cache() under #ifdef CONFIG_SYS_NO_DCACHE * Removed the print inside the weakly linked stub function - __arm_init_before_mmu * Fixed signature of flush_cache in cache.c * More descriptive commit message for the PL310 support patch * C struct for PL310 register accesses * Fixed white space issues
V3: * Rebased to latest HEAD of master * Added comments on changes done in V2 in individual patch headers. This was missing in V2
V4: * Removed bit field manipulation macros * Renamed CONFIG_SYS_NO_*CACHE flags to CONFIG_SYS_*CACHE_OFF globally
Aneesh V (9): arm: make default implementation of cache_flush() weakly linked armv7: cache maintenance operations for armv7 armv7: rename cache related CONFIG flags armv7: integrate cache maintenance support arm: minor fixes for cache and mmu handling armv7: add PL310 support to u-boot armv7: adapt omap4 to the new cache maintenance framework armv7: adapt omap3 to the new cache maintenance framework armv7: adapt s5pc1xx to the new cache maintenance framework
README | 11 + arch/arm/cpu/arm1136/start.S | 4 +- arch/arm/cpu/armv7/Makefile | 2 +- arch/arm/cpu/armv7/cache_v7.c | 394 ++++++++++++++++++++ arch/arm/cpu/armv7/cpu.c | 50 +-- arch/arm/cpu/armv7/omap3/Makefile | 1 - arch/arm/cpu/armv7/omap3/board.c | 136 ++++++- arch/arm/cpu/armv7/omap3/cache.S | 263 ------------- arch/arm/cpu/armv7/omap3/lowlevel_init.S | 32 ++ arch/arm/cpu/armv7/omap4/board.c | 12 + arch/arm/cpu/armv7/omap4/lowlevel_init.S | 9 + arch/arm/cpu/armv7/s5pc1xx/cache.S | 88 +---- arch/arm/cpu/armv7/start.S | 18 +- arch/arm/include/asm/arch-omap3/omap3.h | 20 + arch/arm/include/asm/arch-omap3/sys_proto.h | 10 +- arch/arm/include/asm/arch-omap4/sys_proto.h | 2 +- arch/arm/include/asm/arch-s5pc1xx/sys_proto.h | 3 - arch/arm/include/asm/armv7.h | 67 ++++ arch/arm/include/asm/global_data.h | 2 +- arch/arm/include/asm/pl310.h | 73 ++++ .../omap4/lowlevel_init.S => include/asm/utils.h} | 51 ++- arch/arm/lib/Makefile | 3 +- arch/arm/lib/board.c | 8 +- arch/arm/lib/cache-cp15.c | 22 +- arch/arm/lib/cache-pl310.c | 115 ++++++ arch/arm/lib/cache.c | 20 +- board/armltd/integrator/split_by_variant.sh | 8 +- common/cmd_bdinfo.c | 2 +- include/common.h | 5 +- include/configs/B2.h | 3 +- include/configs/assabet.h | 2 +- include/configs/ca9x4_ct_vxp.h | 2 +- include/configs/cerf250.h | 2 +- include/configs/cradle.h | 2 +- include/configs/csb226.h | 2 +- include/configs/dnp1110.h | 2 +- include/configs/efikamx.h | 2 +- include/configs/evb4510.h | 3 +- include/configs/gcplus.h | 2 +- include/configs/innokom.h | 2 +- include/configs/jornada.h | 2 +- include/configs/lart.h | 2 +- include/configs/lubbock.h | 2 +- include/configs/mx51evk.h | 2 +- include/configs/mx53evk.h | 2 +- include/configs/omap4_panda.h | 8 +- include/configs/omap4_sdp4430.h | 8 +- include/configs/pleb2.h | 2 +- include/configs/pxa255_idp.h | 2 +- include/configs/s5pc210_universal.h | 2 +- include/configs/shannon.h | 2 +- include/configs/tegra2-common.h | 2 +- include/configs/trizepsiv.h | 2 +- include/configs/vision2.h | 2 +- include/configs/xaeniax.h | 2 +- include/configs/xm250.h | 2 +- include/configs/zylonite.h | 2 +- 57 files changed, 1022 insertions(+), 479 deletions(-) create mode 100644 arch/arm/cpu/armv7/cache_v7.c delete mode 100644 arch/arm/cpu/armv7/omap3/cache.S create mode 100644 arch/arm/include/asm/armv7.h create mode 100644 arch/arm/include/asm/pl310.h copy arch/arm/{cpu/armv7/omap4/lowlevel_init.S => include/asm/utils.h} (66%) create mode 100644 arch/arm/lib/cache-pl310.c

Hi All,
Le 17/06/2011 11:30, Aneesh V a écrit :
With D-cache and MMU enabled for ARM in u-boot it becomes imperative to support a minimal set of cache maintenance operations and necessary initializations before enabling MMU.
This series of patches attempt to do the following for armv7:
- Necessary initialization sequence before enabling MMU that includes invalidation of TLB, data caches, branch predictor array etc.
- Framework for supporting SOC specific outer caches in a generic manner (using a structure of function pointers - inspired by the Linux implementation)
- Generic armv7 cache maintenance operations for caches known to the CPU
- Support for ARM PL310 L2 cache controller used in OMAP4
- Cleanup of the cleanup_before_linux() function
- Adapting all armv7 SOCs to use the new framework and removing duplicated code
Sandeep, Minkyu: is the patch series OK with you? If so I'll pull it all in ARM and issue a last pull request.
Amicalement,

On Wed, Jun 22, 2011 at 11:11 PM, Albert ARIBAUD albert.u.boot@aribaud.net wrote:
Hi All,
Le 17/06/2011 11:30, Aneesh V a écrit :
With D-cache and MMU enabled for ARM in u-boot it becomes imperative to support a minimal set of cache maintenance operations and necessary initializations before enabling MMU.
This series of patches attempt to do the following for armv7:
- Necessary initialization sequence before enabling MMU that includes
invalidation of TLB, data caches, branch predictor array etc.
- Framework for supporting SOC specific outer caches in a generic manner
(using a structure of function pointers - inspired by the Linux  implementation)
- Generic armv7 cache maintenance operations for caches known to the CPU
- Support for ARM PL310 L2 cache controller used in OMAP4
- Cleanup of the cleanup_before_linux() function
- Adapting all armv7 SOCs to use the new framework and removing
duplicated code
Sandeep, Minkyu: is the patch series OK with you? If so I'll pull it all in ARM and issue a last pull request.
Sandeep, Minkyu,
Would you please let me know if you have any concerns about this series. If not, it will be good to see this going upstream this release. The necessary initializations before enabling the cache are missing today. Also, the absence of proper invalidate and flush functions will be missed by any driver that uses DMA.
best regards, Aneesh

Hi All,
Le 17/06/2011 11:30, Aneesh V a écrit :
With D-cache and MMU enabled for ARM in u-boot it becomes imperative to support a minimal set of cache maintenance operations and necessary initializations before enabling MMU.
This series of patches attempt to do the following for armv7:
- Necessary initialization sequence before enabling MMU that includes invalidation of TLB, data caches, branch predictor array etc.
- Framework for supporting SOC specific outer caches in a generic manner (using a structure of function pointers - inspired by the Linux implementation)
- Generic armv7 cache maintenance operations for caches known to the CPU
- Support for ARM PL310 L2 cache controller used in OMAP4
- Cleanup of the cleanup_before_linux() function
- Adapting all armv7 SOCs to use the new framework and removing duplicated code
Sandeep, Minkyu: is the patch series OK with you? If so I'll pull it all in ARM and issue a last pull request.
Amicalement,
Albert.
I am fine with it.
Acked-by: Sandeep Paulraj <s-paulraj@ti.com
--Sandeep

On 24 June 2011 04:24, Paulraj, Sandeep s-paulraj@ti.com wrote:
Hi All,
Le 17/06/2011 11:30, Aneesh V a écrit :
With D-cache and MMU enabled for ARM in u-boot it becomes imperative to support a minimal set of cache maintenance operations and necessary initializations before enabling MMU.
This series of patches attempt to do the following for armv7:
- Necessary initialization sequence before enabling MMU that includes
invalidation of TLB, data caches, branch predictor array etc.
- Framework for supporting SOC specific outer caches in a generic manner
(using a structure of function pointers - inspired by the Linux   implementation)
- Generic armv7 cache maintenance operations for caches known to the CPU
- Support for ARM PL310 L2 cache controller used in OMAP4
- Cleanup of the cleanup_before_linux() function
- Adapting all armv7 SOCs to use the new framework and removing
duplicated code
Sandeep, Minkyu: is the patch series OK with you? If so I'll pull it all in ARM and issue a last pull request.
Amicalement,
Albert.
I am fine with it.
Acked-by: Sandeep Paulraj <s-paulraj@ti.com
Acked-by: Minkyu Kang mk7.kang@samsung.com
Thanks Minkyu Kang

Le 17/06/2011 11:30, Aneesh V a écrit :
With D-cache and MMU enabled for ARM in u-boot it becomes imperative to support a minimal set of cache maintenance operations and necessary initializations before enabling MMU.
This series of patches attempt to do the following for armv7:
- Necessary initialization sequence before enabling MMU that includes invalidation of TLB, data caches, branch predictor array etc.
- Framework for supporting SOC specific outer caches in a generic manner (using a structure of function pointers - inspired by the Linux implementation)
- Generic armv7 cache maintenance operations for caches known to the CPU
- Support for ARM PL310 L2 cache controller used in OMAP4
- Cleanup of the cleanup_before_linux() function
- Adapting all armv7 SOCs to use the new framework and removing duplicated code
Testing:
- Extensive testing on OMAP4430SDP and OMAP3430SDP by creating coherency issues and solving them using the maintenance routines
- Eg: memfill a region of memory with a known pattern
- Invalidate the region
- Read back and compare the region with the original pattern
- If match fails it means that invalidate is successful
- Now add a flush call just before the invalidate
- If match succeeds it means that flush was successful
- Outer caches were tested with experiments involving making the function pointers NULL
- Kernel booting on OMAP4430SDP and OMAP3430SDP
Note: v2 has been tested only on OMAP4430SDP
V2:
- Pointer based callback mechanism for outer cache operations changed to a weakly linked functions.
- Change -march=armv7-a back to armv5
- Moved utility macros out of armv7.h
- Added documentation for new CONFIG options.
- Changed implementation of log2n to not use CLZ instruction as armv4 doesn't support this instruction and newly added Tegra2 uses -march=armv4
- Blank line after local variable declarations - fixed globally
- Explicitly added an empty flush_cache() under #ifdef CONFIG_SYS_NO_DCACHE
- Removed the print inside the weakly linked stub function - __arm_init_before_mmu
- Fixed signature of flush_cache in cache.c
- More descriptive commit message for the PL310 support patch
- C struct for PL310 register accesses
- Fixed white space issues
V3:
- Rebased to latest HEAD of master
- Added comments on changes done in V2 in individual patch headers. This was missing in V2
V4:
- Removed bit field manipulation macros
- Renamed CONFIG_SYS_NO_*CACHE flags to CONFIG_SYS_*CACHE_OFF globally
Aneesh V (9): arm: make default implementation of cache_flush() weakly linked armv7: cache maintenance operations for armv7 armv7: rename cache related CONFIG flags armv7: integrate cache maintenance support arm: minor fixes for cache and mmu handling armv7: add PL310 support to u-boot armv7: adapt omap4 to the new cache maintenance framework armv7: adapt omap3 to the new cache maintenance framework armv7: adapt s5pc1xx to the new cache maintenance framework
README | 11 + arch/arm/cpu/arm1136/start.S | 4 +- arch/arm/cpu/armv7/Makefile | 2 +- arch/arm/cpu/armv7/cache_v7.c | 394 ++++++++++++++++++++ arch/arm/cpu/armv7/cpu.c | 50 +-- arch/arm/cpu/armv7/omap3/Makefile | 1 - arch/arm/cpu/armv7/omap3/board.c | 136 ++++++- arch/arm/cpu/armv7/omap3/cache.S | 263 ------------- arch/arm/cpu/armv7/omap3/lowlevel_init.S | 32 ++ arch/arm/cpu/armv7/omap4/board.c | 12 + arch/arm/cpu/armv7/omap4/lowlevel_init.S | 9 + arch/arm/cpu/armv7/s5pc1xx/cache.S | 88 +---- arch/arm/cpu/armv7/start.S | 18 +- arch/arm/include/asm/arch-omap3/omap3.h | 20 + arch/arm/include/asm/arch-omap3/sys_proto.h | 10 +- arch/arm/include/asm/arch-omap4/sys_proto.h | 2 +- arch/arm/include/asm/arch-s5pc1xx/sys_proto.h | 3 - arch/arm/include/asm/armv7.h | 67 ++++ arch/arm/include/asm/global_data.h | 2 +- arch/arm/include/asm/pl310.h | 73 ++++ .../omap4/lowlevel_init.S => include/asm/utils.h} | 51 ++- arch/arm/lib/Makefile | 3 +- arch/arm/lib/board.c | 8 +- arch/arm/lib/cache-cp15.c | 22 +- arch/arm/lib/cache-pl310.c | 115 ++++++ arch/arm/lib/cache.c | 20 +- board/armltd/integrator/split_by_variant.sh | 8 +- common/cmd_bdinfo.c | 2 +- include/common.h | 5 +- include/configs/B2.h | 3 +- include/configs/assabet.h | 2 +- include/configs/ca9x4_ct_vxp.h | 2 +- include/configs/cerf250.h | 2 +- include/configs/cradle.h | 2 +- include/configs/csb226.h | 2 +- include/configs/dnp1110.h | 2 +- include/configs/efikamx.h | 2 +- include/configs/evb4510.h | 3 +- include/configs/gcplus.h | 2 +- include/configs/innokom.h | 2 +- include/configs/jornada.h | 2 +- include/configs/lart.h | 2 +- include/configs/lubbock.h | 2 +- include/configs/mx51evk.h | 2 +- include/configs/mx53evk.h | 2 +- include/configs/omap4_panda.h | 8 +- include/configs/omap4_sdp4430.h | 8 +- include/configs/pleb2.h | 2 +- include/configs/pxa255_idp.h | 2 +- include/configs/s5pc210_universal.h | 2 +- include/configs/shannon.h | 2 +- include/configs/tegra2-common.h | 2 +- include/configs/trizepsiv.h | 2 +- include/configs/vision2.h | 2 +- include/configs/xaeniax.h | 2 +- include/configs/xm250.h | 2 +- include/configs/zylonite.h | 2 +- 57 files changed, 1022 insertions(+), 479 deletions(-) create mode 100644 arch/arm/cpu/armv7/cache_v7.c delete mode 100644 arch/arm/cpu/armv7/omap3/cache.S create mode 100644 arch/arm/include/asm/armv7.h create mode 100644 arch/arm/include/asm/pl310.h copy arch/arm/{cpu/armv7/omap4/lowlevel_init.S => include/asm/utils.h} (66%) create mode 100644 arch/arm/lib/cache-pl310.c
Applied to u-boot-arm/master, thanks.
Amicalement,

make default implementation of cache_flush() weakly linked so that sub-architectures can override it
Signed-off-by: Aneesh V aneesh@ti.com --- arch/arm/lib/cache.c | 4 +++- 1 files changed, 3 insertions(+), 1 deletions(-)
diff --git a/arch/arm/lib/cache.c b/arch/arm/lib/cache.c index 30686fe..27123cd 100644 --- a/arch/arm/lib/cache.c +++ b/arch/arm/lib/cache.c @@ -25,7 +25,7 @@
#include <common.h>
-void flush_cache (unsigned long dummy1, unsigned long dummy2) +void __flush_cache(unsigned long start, unsigned long size) { #if defined(CONFIG_OMAP2420) || defined(CONFIG_ARM1136) void arm1136_cache_flush(void); @@ -45,3 +45,5 @@ void flush_cache (unsigned long dummy1, unsigned long dummy2) #endif return; } +void flush_cache(unsigned long start, unsigned long size) + __attribute__((weak, alias("__flush_cache")));

- Add a framework for layered cache maintenance - separate out SOC specific outer cache maintenance from maintenance of caches known to CPU
- Add generic ARMv7 cache maintenance operations that affect all caches known to ARMv7 CPUs. For instance in Cortex-A8 these opertions will affect both L1 and L2 caches. In Cortex-A9 these will affect only L1 cache
- D-cache operations supported: - Invalidate entire D-cache - Invalidate D-cache range - Flush(clean & invalidate) entire D-cache - Flush D-cache range - I-cache operations supported: - Invalidate entire I-cache
- Add maintenance functions for TLB, branch predictor array etc.
- Enable -march=armv7-a so that armv7 assembly instructions can be used
Signed-off-by: Aneesh V aneesh@ti.com --- V2: * Blank line after local variable declarations - fixed globally * Change from pointers to weakly linked functions for outer cache operations * Explicitly added a copy of flush_cache() under #ifdef CONFIG_SYS_NO_DCACHE * Change -march=armv7-a back to armv5 * Update documentation with cache related CONFIG options --- V4: * Squashed utility macros patch with this patch and removed the bit field operation functions * Removed the usage of get_bit_field() * Replaced CONFIG_SYS_NO_*CACHE flags with CONFIG_SYS_*CACHE_OFF --- README | 5 + arch/arm/cpu/armv7/Makefile | 2 +- arch/arm/cpu/armv7/cache_v7.c | 394 +++++++++++++++++++++++++++++++++++++++++ arch/arm/include/asm/armv7.h | 67 +++++++ arch/arm/include/asm/utils.h | 56 ++++++ include/common.h | 5 +- 6 files changed, 527 insertions(+), 2 deletions(-) create mode 100644 arch/arm/cpu/armv7/cache_v7.c create mode 100644 arch/arm/include/asm/armv7.h create mode 100644 arch/arm/include/asm/utils.h
diff --git a/README b/README index 8bb9c8d..ca1520d 100644 --- a/README +++ b/README @@ -460,6 +460,11 @@ The following options need to be configured: Note: If a "bootargs" environment is defined, it will overwride the defaults discussed just above.
+- Cache Configuration: + CONFIG_SYS_ICACHE_OFF - Do not enable instruction cache in U-Boot + CONFIG_SYS_DCACHE_OFF - Do not enable data cache in U-Boot + CONFIG_SYS_L2CACHE_OFF- Do not enable L2 cache in U-Boot + - Serial Ports: CONFIG_PL010_SERIAL
diff --git a/arch/arm/cpu/armv7/Makefile b/arch/arm/cpu/armv7/Makefile index 8c0e915..299792a 100644 --- a/arch/arm/cpu/armv7/Makefile +++ b/arch/arm/cpu/armv7/Makefile @@ -26,7 +26,7 @@ include $(TOPDIR)/config.mk LIB = $(obj)lib$(CPU).o
START := start.o -COBJS := cpu.o +COBJS := cpu.o cache_v7.o COBJS += syslib.o
SRCS := $(START:.o=.S) $(COBJS:.o=.c) diff --git a/arch/arm/cpu/armv7/cache_v7.c b/arch/arm/cpu/armv7/cache_v7.c new file mode 100644 index 0000000..3e1e1bf --- /dev/null +++ b/arch/arm/cpu/armv7/cache_v7.c @@ -0,0 +1,394 @@ +/* + * (C) Copyright 2010 + * Texas Instruments, <www.ti.com> + * Aneesh V aneesh@ti.com + * + * See file CREDITS for list of people who contributed to this + * project. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + */ +#include <linux/types.h> +#include <common.h> +#include <asm/armv7.h> +#include <asm/utils.h> + +#define ARMV7_DCACHE_INVAL_ALL 1 +#define ARMV7_DCACHE_CLEAN_INVAL_ALL 2 +#define ARMV7_DCACHE_INVAL_RANGE 3 +#define ARMV7_DCACHE_CLEAN_INVAL_RANGE 4 + +#ifndef CONFIG_SYS_DCACHE_OFF +/* + * Write the level and type you want to Cache Size Selection Register(CSSELR) + * to get size details from Current Cache Size ID Register(CCSIDR) + */ +static void set_csselr(u32 level, u32 type) +{ u32 csselr = level << 1 | type; + + /* Write to Cache Size Selection Register(CSSELR) */ + asm volatile ("mcr p15, 2, %0, c0, c0, 0" : : "r" (csselr)); +} + +static u32 get_ccsidr(void) +{ + u32 ccsidr; + + /* Read current CP15 Cache Size ID Register */ + asm volatile ("mrc p15, 1, %0, c0, c0, 0" : "=r" (ccsidr)); + return ccsidr; +} + +static u32 get_clidr(void) +{ + u32 clidr; + + /* Read current CP15 Cache Level ID Register */ + asm volatile ("mrc p15,1,%0,c0,c0,1" : "=r" (clidr)); + return clidr; +} + +static void v7_inval_dcache_level_setway(u32 level, u32 num_sets, + u32 num_ways, u32 way_shift, + u32 log2_line_len) +{ + int way, set, setway; + + /* + * For optimal assembly code: + * a. count down + * b. have bigger loop inside + */ + for (way = num_ways - 1; way >= 0 ; way--) { + for (set = num_sets - 1; set >= 0; set--) { + setway = (level << 1) | (set << log2_line_len) | + (way << way_shift); + /* Invalidate data/unified cache line by set/way */ + asm volatile (" mcr p15, 0, %0, c7, c6, 2" + : : "r" (setway)); + } + } + /* DMB to make sure the operation is complete */ + CP15DMB; +} + +static void v7_clean_inval_dcache_level_setway(u32 level, u32 num_sets, + u32 num_ways, u32 way_shift, + u32 log2_line_len) +{ + int way, set, setway; + + /* + * For optimal assembly code: + * a. count down + * b. have bigger loop inside + */ + for (way = num_ways - 1; way >= 0 ; way--) { + for (set = num_sets - 1; set >= 0; set--) { + setway = (level << 1) | (set << log2_line_len) | + (way << way_shift); + /* + * Clean & Invalidate data/unified + * cache line by set/way + */ + asm volatile (" mcr p15, 0, %0, c7, c14, 2" + : : "r" (setway)); + } + } + /* DMB to make sure the operation is complete */ + CP15DMB; +} + +static void v7_maint_dcache_level_setway(u32 level, u32 operation) +{ + u32 ccsidr; + u32 num_sets, num_ways, log2_line_len, log2_num_ways; + u32 way_shift; + + set_csselr(level, ARMV7_CSSELR_IND_DATA_UNIFIED); + + ccsidr = get_ccsidr(); + + log2_line_len = ((ccsidr & CCSIDR_LINE_SIZE_MASK) >> + CCSIDR_LINE_SIZE_OFFSET) + 2; + /* Converting from words to bytes */ + log2_line_len += 2; + + num_ways = ((ccsidr & CCSIDR_ASSOCIATIVITY_MASK) >> + CCSIDR_ASSOCIATIVITY_OFFSET) + 1; + num_sets = ((ccsidr & CCSIDR_NUM_SETS_MASK) >> + CCSIDR_NUM_SETS_OFFSET) + 1; + /* + * According to ARMv7 ARM number of sets and number of ways need + * not be a power of 2 + */ + log2_num_ways = log_2_n_round_up(num_ways); + + way_shift = (32 - log2_num_ways); + if (operation == ARMV7_DCACHE_INVAL_ALL) { + v7_inval_dcache_level_setway(level, num_sets, num_ways, + way_shift, log2_line_len); + } else if (operation == ARMV7_DCACHE_CLEAN_INVAL_ALL) { + v7_clean_inval_dcache_level_setway(level, num_sets, num_ways, + way_shift, log2_line_len); + } +} + +static void v7_maint_dcache_all(u32 operation) +{ + u32 level, cache_type, level_start_bit = 0; + + u32 clidr = get_clidr(); + + for (level = 0; level < 7; level++) { + cache_type = (clidr >> level_start_bit) & 0x7; + if ((cache_type == ARMV7_CLIDR_CTYPE_DATA_ONLY) || + (cache_type == ARMV7_CLIDR_CTYPE_INSTRUCTION_DATA) || + (cache_type == ARMV7_CLIDR_CTYPE_UNIFIED)) + v7_maint_dcache_level_setway(level, operation); + level_start_bit += 3; + } +} + +static void v7_dcache_clean_inval_range(u32 start, + u32 stop, u32 line_len) +{ + u32 mva; + + /* Align start to cache line boundary */ + start &= ~(line_len - 1); + for (mva = start; mva < stop; mva = mva + line_len) { + /* DCCIMVAC - Clean & Invalidate data cache by MVA to PoC */ + asm volatile ("mcr p15, 0, %0, c7, c14, 1" : : "r" (mva)); + } +} + +static void v7_dcache_inval_range(u32 start, u32 stop, u32 line_len) +{ + u32 mva; + + /* + * If start address is not aligned to cache-line flush the first + * line to prevent affecting somebody else's buffer + */ + if (start & (line_len - 1)) { + v7_dcache_clean_inval_range(start, start + 1, line_len); + /* move to next cache line */ + start = (start + line_len - 1) & ~(line_len - 1); + } + + /* + * If stop address is not aligned to cache-line flush the last + * line to prevent affecting somebody else's buffer + */ + if (stop & (line_len - 1)) { + v7_dcache_clean_inval_range(stop, stop + 1, line_len); + /* align to the beginning of this cache line */ + stop &= ~(line_len - 1); + } + + for (mva = start; mva < stop; mva = mva + line_len) { + /* DCIMVAC - Invalidate data cache by MVA to PoC */ + asm volatile ("mcr p15, 0, %0, c7, c6, 1" : : "r" (mva)); + } +} + +static void v7_dcache_maint_range(u32 start, u32 stop, u32 range_op) +{ + u32 line_len, ccsidr; + + ccsidr = get_ccsidr(); + line_len = ((ccsidr & CCSIDR_LINE_SIZE_MASK) >> + CCSIDR_LINE_SIZE_OFFSET) + 2; + /* Converting from words to bytes */ + line_len += 2; + /* converting from log2(linelen) to linelen */ + line_len = 1 << line_len; + + switch (range_op) { + case ARMV7_DCACHE_CLEAN_INVAL_RANGE: + v7_dcache_clean_inval_range(start, stop, line_len); + break; + case ARMV7_DCACHE_INVAL_RANGE: + v7_dcache_inval_range(start, stop, line_len); + break; + } + + /* DMB to make sure the operation is complete */ + CP15DMB; +} + +/* Invalidate TLB */ +static void v7_inval_tlb(void) +{ + /* Invalidate entire unified TLB */ + asm volatile ("mcr p15, 0, %0, c8, c7, 0" : : "r" (0)); + /* Invalidate entire data TLB */ + asm volatile ("mcr p15, 0, %0, c8, c6, 0" : : "r" (0)); + /* Invalidate entire instruction TLB */ + asm volatile ("mcr p15, 0, %0, c8, c5, 0" : : "r" (0)); + /* Full system DSB - make sure that the invalidation is complete */ + CP15DSB; + /* Full system ISB - make sure the instruction stream sees it */ + CP15ISB; +} + +void invalidate_dcache_all(void) +{ + v7_maint_dcache_all(ARMV7_DCACHE_INVAL_ALL); + + v7_outer_cache_inval_all(); +} + +/* + * Performs a clean & invalidation of the entire data cache + * at all levels + */ +void flush_dcache_all(void) +{ + v7_maint_dcache_all(ARMV7_DCACHE_CLEAN_INVAL_ALL); + + v7_outer_cache_flush_all(); +} + +/* + * Invalidates range in all levels of D-cache/unified cache used: + * Affects the range [start, stop - 1] + */ +void invalidate_dcache_range(unsigned long start, unsigned long stop) +{ + + v7_dcache_maint_range(start, stop, ARMV7_DCACHE_INVAL_RANGE); + + v7_outer_cache_inval_range(start, stop); +} + +/* + * Flush range(clean & invalidate) from all levels of D-cache/unified + * cache used: + * Affects the range [start, stop - 1] + */ +void flush_dcache_range(unsigned long start, unsigned long stop) +{ + v7_dcache_maint_range(start, stop, ARMV7_DCACHE_CLEAN_INVAL_RANGE); + + v7_outer_cache_flush_range(start, stop); +} + +void arm_init_before_mmu(void) +{ + v7_outer_cache_enable(); + invalidate_dcache_all(); + v7_inval_tlb(); +} + +/* + * Flush range from all levels of d-cache/unified-cache used: + * Affects the range [start, start + size - 1] + */ +void flush_cache(unsigned long start, unsigned long size) +{ + flush_dcache_range(start, start + size); +} +#else /* #ifndef CONFIG_SYS_DCACHE_OFF */ +void invalidate_dcache_all(void) +{ +} + +void flush_dcache_all(void) +{ +} + +void invalidate_dcache_range(unsigned long start, unsigned long stop) +{ +} + +void flush_dcache_range(unsigned long start, unsigned long stop) +{ +} + +void arm_init_before_mmu(void) +{ +} + +void flush_cache(unsigned long start, unsigned long size) +{ +} +#endif /* #ifndef CONFIG_SYS_DCACHE_OFF */ + +#ifndef CONFIG_SYS_ICACHE_OFF +/* Invalidate entire I-cache and branch predictor array */ +void invalidate_icache_all(void) +{ + /* + * Invalidate all instruction caches to PoU. + * Also flushes branch target cache. + */ + asm volatile ("mcr p15, 0, %0, c7, c5, 0" : : "r" (0)); + + /* Invalidate entire branch predictor array */ + asm volatile ("mcr p15, 0, %0, c7, c5, 6" : : "r" (0)); + + /* Full system DSB - make sure that the invalidation is complete */ + CP15DSB; + + /* ISB - make sure the instruction stream sees it */ + CP15ISB; +} +#else +void invalidate_icache_all(void) +{ +} +#endif + +/* + * Stub implementations for outer cache operations + */ +void __v7_outer_cache_enable(void) +{ +} +void v7_outer_cache_enable(void) + __attribute__((weak, alias("__v7_outer_cache_enable"))); + +void __v7_outer_cache_disable(void) +{ +} +void v7_outer_cache_disable(void) + __attribute__((weak, alias("__v7_outer_cache_disable"))); + +void __v7_outer_cache_flush_all(void) +{ +} +void v7_outer_cache_flush_all(void) + __attribute__((weak, alias("__v7_outer_cache_flush_all"))); + +void __v7_outer_cache_inval_all(void) +{ +} +void v7_outer_cache_inval_all(void) + __attribute__((weak, alias("__v7_outer_cache_inval_all"))); + +void __v7_outer_cache_flush_range(u32 start, u32 end) +{ +} +void v7_outer_cache_flush_range(u32 start, u32 end) + __attribute__((weak, alias("__v7_outer_cache_flush_range"))); + +void __v7_outer_cache_inval_range(u32 start, u32 end) +{ +} +void v7_outer_cache_inval_range(u32 start, u32 end) + __attribute__((weak, alias("__v7_outer_cache_inval_range"))); diff --git a/arch/arm/include/asm/armv7.h b/arch/arm/include/asm/armv7.h new file mode 100644 index 0000000..88b9c62 --- /dev/null +++ b/arch/arm/include/asm/armv7.h @@ -0,0 +1,67 @@ +/* + * (C) Copyright 2010 + * Texas Instruments, <www.ti.com> + * Aneesh V aneesh@ti.com + * + * See file CREDITS for list of people who contributed to this + * project. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + */ +#ifndef ARMV7_H +#define ARMV7_H +#include <linux/types.h> + +/* CCSIDR */ +#define CCSIDR_LINE_SIZE_OFFSET 0 +#define CCSIDR_LINE_SIZE_MASK 0x7 +#define CCSIDR_ASSOCIATIVITY_OFFSET 3 +#define CCSIDR_ASSOCIATIVITY_MASK (0x3FF << 3) +#define CCSIDR_NUM_SETS_OFFSET 13 +#define CCSIDR_NUM_SETS_MASK (0x7FFF << 13) + +/* + * Values for InD field in CSSELR + * Selects the type of cache + */ +#define ARMV7_CSSELR_IND_DATA_UNIFIED 0 +#define ARMV7_CSSELR_IND_INSTRUCTION 1 + +/* Values for Ctype fields in CLIDR */ +#define ARMV7_CLIDR_CTYPE_NO_CACHE 0 +#define ARMV7_CLIDR_CTYPE_INSTRUCTION_ONLY 1 +#define ARMV7_CLIDR_CTYPE_DATA_ONLY 2 +#define ARMV7_CLIDR_CTYPE_INSTRUCTION_DATA 3 +#define ARMV7_CLIDR_CTYPE_UNIFIED 4 + +/* + * CP15 Barrier instructions + * Please note that we have separate barrier instructions in ARMv7 + * However, we use the CP15 based instructtions because we use + * -march=armv5 in U-Boot + */ +#define CP15ISB asm volatile ("mcr p15, 0, %0, c7, c5, 4" : : "r" (0)) +#define CP15DSB asm volatile ("mcr p15, 0, %0, c7, c10, 4" : : "r" (0)) +#define CP15DMB asm volatile ("mcr p15, 0, %0, c7, c10, 5" : : "r" (0)) + +void v7_outer_cache_enable(void); +void v7_outer_cache_disable(void); +void v7_outer_cache_flush_all(void); +void v7_outer_cache_inval_all(void); +void v7_outer_cache_flush_range(u32 start, u32 end); +void v7_outer_cache_inval_range(u32 start, u32 end); + +#endif diff --git a/arch/arm/include/asm/utils.h b/arch/arm/include/asm/utils.h new file mode 100644 index 0000000..828b86c --- /dev/null +++ b/arch/arm/include/asm/utils.h @@ -0,0 +1,56 @@ +/* + * (C) Copyright 2010 + * Texas Instruments, <www.ti.com> + * Aneesh V aneesh@ti.com + * + * See file CREDITS for list of people who contributed to this + * project. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + */ +#ifndef _UTILS_H_ +#define _UTILS_H_ + +static inline s32 log_2_n_round_up(u32 n) +{ + s32 log2n = -1; + u32 temp = n; + + while (temp) { + log2n++; + temp >>= 1; + } + + if (n & (n - 1)) + return log2n + 1; /* not power of 2 - round up */ + else + return log2n; /* power of 2 */ +} + +static inline s32 log_2_n_round_down(u32 n) +{ + s32 log2n = -1; + u32 temp = n; + + while (temp) { + log2n++; + temp >>= 1; + } + + return log2n; +} + +#endif diff --git a/include/common.h b/include/common.h index 1e4a6a5..d754083 100644 --- a/include/common.h +++ b/include/common.h @@ -413,6 +413,7 @@ void icache_disable(void); int dcache_status (void); void dcache_enable (void); void dcache_disable(void); +void mmu_disable(void); void relocate_code (ulong, gd_t *, ulong) __attribute__ ((noreturn)); ulong get_endaddr (void); void trap_init (ulong); @@ -614,9 +615,11 @@ ulong video_setmem (ulong);
/* arch/$(ARCH)/lib/cache.c */ void flush_cache (unsigned long, unsigned long); +void flush_dcache_all(void); void flush_dcache_range(unsigned long start, unsigned long stop); void invalidate_dcache_range(unsigned long start, unsigned long stop); - +void invalidate_dcache_all(void); +void invalidate_icache_all(void);
/* arch/$(ARCH)/lib/ticks.S */ unsigned long long get_ticks(void);

Replace the cache related CONFIG flags with more meaningful names. Following are the changes:
CONFIG_L2_OFF -> CONFIG_SYS_L2CACHE_OFF CONFIG_SYS_NO_ICACHE -> CONFIG_SYS_ICACHE_OFF CONFIG_SYS_NO_DCACHE -> CONFIG_SYS_DCACHE_OFF
Signed-off-by: Aneesh V aneesh@ti.com V2: * Changed CONFIG_L2_OFF -> CONFIG_SYS_NO_L2CACHE V4: * Changed all three flags to the final names suggested as above and accordingly changed the commit message --- arch/arm/cpu/arm1136/start.S | 4 ++-- arch/arm/cpu/armv7/cpu.c | 3 --- arch/arm/include/asm/global_data.h | 2 +- arch/arm/lib/Makefile | 2 -- arch/arm/lib/board.c | 2 +- arch/arm/lib/cache-cp15.c | 6 +++--- board/armltd/integrator/split_by_variant.sh | 8 ++++---- common/cmd_bdinfo.c | 2 +- include/configs/B2.h | 3 ++- include/configs/assabet.h | 2 +- include/configs/ca9x4_ct_vxp.h | 2 +- include/configs/cerf250.h | 2 +- include/configs/cradle.h | 2 +- include/configs/csb226.h | 2 +- include/configs/dnp1110.h | 2 +- include/configs/efikamx.h | 2 +- include/configs/evb4510.h | 3 ++- include/configs/gcplus.h | 2 +- include/configs/innokom.h | 2 +- include/configs/jornada.h | 2 +- include/configs/lart.h | 2 +- include/configs/lubbock.h | 2 +- include/configs/mx51evk.h | 2 +- include/configs/mx53evk.h | 2 +- include/configs/omap4_panda.h | 2 +- include/configs/omap4_sdp4430.h | 2 +- include/configs/pleb2.h | 2 +- include/configs/pxa255_idp.h | 2 +- include/configs/s5pc210_universal.h | 2 +- include/configs/shannon.h | 2 +- include/configs/tegra2-common.h | 2 +- include/configs/trizepsiv.h | 2 +- include/configs/vision2.h | 2 +- include/configs/xaeniax.h | 2 +- include/configs/xm250.h | 2 +- include/configs/zylonite.h | 2 +- 36 files changed, 42 insertions(+), 45 deletions(-)
diff --git a/arch/arm/cpu/arm1136/start.S b/arch/arm/cpu/arm1136/start.S index 3c5f3ef..200a800 100644 --- a/arch/arm/cpu/arm1136/start.S +++ b/arch/arm/cpu/arm1136/start.S @@ -512,10 +512,10 @@ fiq: .align 5 .global arm1136_cache_flush arm1136_cache_flush: -#if !defined(CONFIG_SYS_NO_ICACHE) +#if !defined(CONFIG_SYS_ICACHE_OFF) mcr p15, 0, r1, c7, c5, 0 @ invalidate I cache #endif -#if !defined(CONFIG_SYS_NO_DCACHE) +#if !defined(CONFIG_SYS_DCACHE_OFF) mcr p15, 0, r1, c7, c14, 0 @ invalidate D cache #endif mov pc, lr @ back to caller diff --git a/arch/arm/cpu/armv7/cpu.c b/arch/arm/cpu/armv7/cpu.c index a01e0d6..bc4238f 100644 --- a/arch/arm/cpu/armv7/cpu.c +++ b/arch/arm/cpu/armv7/cpu.c @@ -35,9 +35,6 @@ #include <command.h> #include <asm/system.h> #include <asm/cache.h> -#ifndef CONFIG_L2_OFF -#include <asm/arch/sys_proto.h> -#endif
static void cache_flush(void);
diff --git a/arch/arm/include/asm/global_data.h b/arch/arm/include/asm/global_data.h index 2a84d27..7578d53 100644 --- a/arch/arm/include/asm/global_data.h +++ b/arch/arm/include/asm/global_data.h @@ -70,7 +70,7 @@ typedef struct global_data { unsigned long irq_sp; /* irq stack pointer */ unsigned long start_addr_sp; /* start_addr_stackpointer */ unsigned long reloc_off; -#if !(defined(CONFIG_SYS_NO_ICACHE) && defined(CONFIG_SYS_NO_DCACHE)) +#if !(defined(CONFIG_SYS_ICACHE_OFF) && defined(CONFIG_SYS_DCACHE_OFF)) unsigned long tlb_addr; #endif void **jt; /* jump table */ diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index 03b1b5e..f993d74 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -39,9 +39,7 @@ GLCOBJS += div0.o COBJS-y += board.o COBJS-y += bootm.o COBJS-y += cache.o -ifndef CONFIG_SYS_NO_CP15_CACHE COBJS-y += cache-cp15.o -endif COBJS-y += interrupts.o COBJS-y += reset.o SOBJS-$(CONFIG_USE_ARCH_MEMSET) += memset.o diff --git a/arch/arm/lib/board.c b/arch/arm/lib/board.c index 1a784a1..6342e97 100644 --- a/arch/arm/lib/board.c +++ b/arch/arm/lib/board.c @@ -329,7 +329,7 @@ void board_init_f (ulong bootflag) debug ("Reserving %ldk for protected RAM at %08lx\n", reg, addr); #endif /* CONFIG_PRAM */
-#if !(defined(CONFIG_SYS_NO_ICACHE) && defined(CONFIG_SYS_NO_DCACHE)) +#if !(defined(CONFIG_SYS_ICACHE_OFF) && defined(CONFIG_SYS_DCACHE_OFF)) /* reserve TLB table */ addr -= (4096 * 4);
diff --git a/arch/arm/lib/cache-cp15.c b/arch/arm/lib/cache-cp15.c index d9175f0..ba73fb9 100644 --- a/arch/arm/lib/cache-cp15.c +++ b/arch/arm/lib/cache-cp15.c @@ -24,7 +24,7 @@ #include <common.h> #include <asm/system.h>
-#if !(defined(CONFIG_SYS_NO_ICACHE) && defined(CONFIG_SYS_NO_DCACHE)) +#if !(defined(CONFIG_SYS_ICACHE_OFF) && defined(CONFIG_SYS_DCACHE_OFF))
#if defined(CONFIG_SYS_ARM_CACHE_WRITETHROUGH) #define CACHE_SETUP 0x1a @@ -118,7 +118,7 @@ static void cache_disable(uint32_t cache_bit) } #endif
-#ifdef CONFIG_SYS_NO_ICACHE +#ifdef CONFIG_SYS_ICACHE_OFF void icache_enable (void) { return; @@ -150,7 +150,7 @@ int icache_status(void) } #endif
-#ifdef CONFIG_SYS_NO_DCACHE +#ifdef CONFIG_SYS_DCACHE_OFF void dcache_enable (void) { return; diff --git a/board/armltd/integrator/split_by_variant.sh b/board/armltd/integrator/split_by_variant.sh index d869dd2..19fc832 100755 --- a/board/armltd/integrator/split_by_variant.sh +++ b/board/armltd/integrator/split_by_variant.sh @@ -103,16 +103,16 @@ case "$cpu" in #undef CONFIG_CM_INIT /* CM may not have initialization reg */ #undef CONFIG_CM_TCRAM /* CM may not have TCRAM */ /* May not be processor without cache support */ -#define CONFIG_SYS_NO_ICACHE 1 -#define CONFIG_SYS_NO_DCACHE 1 +#define CONFIG_SYS_ICACHE_OFF 1 +#define CONFIG_SYS_DCACHE_OFF 1 _EOF ;;
arm720t) cat >> ${config_file} << _EOF /* May not be processor without cache support */ -#define CONFIG_SYS_NO_ICACHE 1 -#define CONFIG_SYS_NO_DCACHE 1 +#define CONFIG_SYS_ICACHE_OFF 1 +#define CONFIG_SYS_DCACHE_OFF 1 _EOF ;; esac diff --git a/common/cmd_bdinfo.c b/common/cmd_bdinfo.c index 75924f8..6051120 100644 --- a/common/cmd_bdinfo.c +++ b/common/cmd_bdinfo.c @@ -345,7 +345,7 @@ int do_bdinfo(cmd_tbl_t *cmdtp, int flag, int argc, char * const argv[]) printf("ip_addr = %pI4\n", &bd->bi_ip_addr); #endif printf("baudrate = %d bps\n", bd->bi_baudrate); -#if !(defined(CONFIG_SYS_NO_ICACHE) && defined(CONFIG_SYS_NO_DCACHE)) +#if !(defined(CONFIG_SYS_ICACHE_OFF) && defined(CONFIG_SYS_DCACHE_OFF)) print_num("TLB addr", gd->tlb_addr); #endif print_num("relocaddr", gd->relocaddr); diff --git a/include/configs/B2.h b/include/configs/B2.h index 7846a92..e181fa8 100644 --- a/include/configs/B2.h +++ b/include/configs/B2.h @@ -38,7 +38,8 @@ #define CONFIG_B2 1 /* on an B2 Board */ #define CONFIG_ARM_THUMB 1 /* this is an ARM7TDMI */ #undef CONFIG_ARM7_REVD /* disable ARM720 REV.D Workarounds */ -#define CONFIG_SYS_NO_CP15_CACHE +#define CONFIG_SYS_ICACHE_OFF +#define CONFIG_SYS_DCACHE_OFF #define CONFIG_ARCH_CPU_INIT
#define CONFIG_S3C44B0_CLOCK_SPEED 75 /* we have a 75Mhz S3C44B0*/ diff --git a/include/configs/assabet.h b/include/configs/assabet.h index 5cd1836..dc9bd88 100644 --- a/include/configs/assabet.h +++ b/include/configs/assabet.h @@ -38,7 +38,7 @@
#undef CONFIG_USE_IRQ /* we will never enable dcache, because we have to setup MMU first */ -#define CONFIG_SYS_NO_DCACHE +#define CONFIG_SYS_DCACHE_OFF
#define CONFIG_CMDLINE_TAG 1 /* enable passing of ATAGs */ #define CONFIG_SETUP_MEMORY_TAGS 1 diff --git a/include/configs/ca9x4_ct_vxp.h b/include/configs/ca9x4_ct_vxp.h index 7f83249..f0ac79a 100644 --- a/include/configs/ca9x4_ct_vxp.h +++ b/include/configs/ca9x4_ct_vxp.h @@ -41,7 +41,7 @@
#define CONFIG_CMDLINE_TAG 1 /* enable passing of ATAGs */ #define CONFIG_SETUP_MEMORY_TAGS 1 -#define CONFIG_L2_OFF 1 +#define CONFIG_SYS_L2CACHE_OFF 1 #define CONFIG_INITRD_TAG 1
#define CONFIG_OF_LIBFDT 1 diff --git a/include/configs/cerf250.h b/include/configs/cerf250.h index 9696487..7e179be 100644 --- a/include/configs/cerf250.h +++ b/include/configs/cerf250.h @@ -43,7 +43,7 @@ #undef CONFIG_USE_IRQ /* we don't need IRQ/FIQ stuff */
/* we will never enable dcache, because we have to setup MMU first */ -#define CONFIG_SYS_NO_DCACHE +#define CONFIG_SYS_DCACHE_OFF
/* * Size of malloc() pool diff --git a/include/configs/cradle.h b/include/configs/cradle.h index c21af38..21a8e64 100644 --- a/include/configs/cradle.h +++ b/include/configs/cradle.h @@ -38,7 +38,7 @@ #undef CONFIG_USE_IRQ /* we don't need IRQ/FIQ stuff */
/* we will never enable dcache, because we have to setup MMU first */ -#define CONFIG_SYS_NO_DCACHE +#define CONFIG_SYS_DCACHE_OFF #define CONFIG_SYS_TEXT_BASE 0x0 /* * Size of malloc() pool diff --git a/include/configs/csb226.h b/include/configs/csb226.h index 505740c..dcfbc6e 100644 --- a/include/configs/csb226.h +++ b/include/configs/csb226.h @@ -44,7 +44,7 @@ /* for timer/console/ethernet */
/* we will never enable dcache, because we have to setup MMU first */ -#define CONFIG_SYS_NO_DCACHE +#define CONFIG_SYS_DCACHE_OFF #define CONFIG_SYS_TEXT_BASE 0x0 /* * Hardware drivers diff --git a/include/configs/dnp1110.h b/include/configs/dnp1110.h index 69c6420..7d79d57 100644 --- a/include/configs/dnp1110.h +++ b/include/configs/dnp1110.h @@ -42,7 +42,7 @@
#undef CONFIG_USE_IRQ /* we don't need IRQ/FIQ stuff */ /* we will never enable dcache, because we have to setup MMU first */ -#define CONFIG_SYS_NO_DCACHE +#define CONFIG_SYS_DCACHE_OFF
/* * Size of malloc() pool diff --git a/include/configs/efikamx.h b/include/configs/efikamx.h index 571c3cb..a04ac49 100644 --- a/include/configs/efikamx.h +++ b/include/configs/efikamx.h @@ -38,7 +38,7 @@ #define CONFIG_DISPLAY_CPUINFO #define CONFIG_DISPLAY_BOARDINFO
-#define CONFIG_L2_OFF +#define CONFIG_SYS_L2CACHE_OFF
/* * Bootloader Components Configuration diff --git a/include/configs/evb4510.h b/include/configs/evb4510.h index fb05727..23a2efa 100644 --- a/include/configs/evb4510.h +++ b/include/configs/evb4510.h @@ -43,7 +43,8 @@ #define CONFIG_ARM_THUMB 1 /* this is an ARM7TDMI */ #define CONFIG_S3C4510B 1 /* it's a S3C4510B chip */ #define CONFIG_EVB4510 1 /* on an EVB4510 Board */ -#define CONFIG_SYS_NO_CP15_CACHE +#define CONFIG_SYS_ICACHE_OFF +#define CONFIG_SYS_DCACHE_OFF
#define CONFIG_USE_IRQ #define CONFIG_STACKSIZE_IRQ (4*1024) diff --git a/include/configs/gcplus.h b/include/configs/gcplus.h index fd39ab4..9412daa 100644 --- a/include/configs/gcplus.h +++ b/include/configs/gcplus.h @@ -49,7 +49,7 @@
#undef CONFIG_USE_IRQ /* we don't need IRQ/FIQ stuff */ /* we will never enable dcache, because we have to setup MMU first */ -#define CONFIG_SYS_NO_DCACHE +#define CONFIG_SYS_DCACHE_OFF
#define CONFIG_CMDLINE_TAG 1 /* enable passing of ATAGs */ #define CONFIG_SETUP_MEMORY_TAGS 1 diff --git a/include/configs/innokom.h b/include/configs/innokom.h index 744d65c..2e4b346 100644 --- a/include/configs/innokom.h +++ b/include/configs/innokom.h @@ -43,7 +43,7 @@ #define CONFIG_SYS_TEXT_BASE 0x0
/* we will never enable dcache, because we have to setup MMU first */ -#define CONFIG_SYS_NO_DCACHE +#define CONFIG_SYS_DCACHE_OFF
/* * Hardware drivers diff --git a/include/configs/jornada.h b/include/configs/jornada.h index 41b09aa..84ad2d8 100644 --- a/include/configs/jornada.h +++ b/include/configs/jornada.h @@ -32,7 +32,7 @@ #define CONFIG_SYS_TEXT_BASE 0xC1F00000
/* we will never enable dcache, because we have to setup MMU first */ -#define CONFIG_SYS_NO_DCACHE +#define CONFIG_SYS_DCACHE_OFF #undef CONFIG_USE_IRQ
/* Console setting */ diff --git a/include/configs/lart.h b/include/configs/lart.h index 795cf34..7316b23 100644 --- a/include/configs/lart.h +++ b/include/configs/lart.h @@ -36,7 +36,7 @@
#undef CONFIG_USE_IRQ /* we don't need IRQ/FIQ stuff */ /* we will never enable dcache, because we have to setup MMU first */ -#define CONFIG_SYS_NO_DCACHE +#define CONFIG_SYS_DCACHE_OFF
/* * Size of malloc() pool diff --git a/include/configs/lubbock.h b/include/configs/lubbock.h index b7d53b6..b6ee919 100644 --- a/include/configs/lubbock.h +++ b/include/configs/lubbock.h @@ -47,7 +47,7 @@ #undef CONFIG_USE_IRQ /* we don't need IRQ/FIQ stuff */
/* we will never enable dcache, because we have to setup MMU first */ -#define CONFIG_SYS_NO_DCACHE +#define CONFIG_SYS_DCACHE_OFF
/* * Size of malloc() pool diff --git a/include/configs/mx51evk.h b/include/configs/mx51evk.h index 207b20c..fd42afc 100644 --- a/include/configs/mx51evk.h +++ b/include/configs/mx51evk.h @@ -35,7 +35,7 @@
#define CONFIG_SYS_TEXT_BASE 0x97800000
-#define CONFIG_L2_OFF +#define CONFIG_SYS_L2CACHE_OFF
#include <asm/arch/imx-regs.h> /* diff --git a/include/configs/mx53evk.h b/include/configs/mx53evk.h index 78122a7..5f54035 100644 --- a/include/configs/mx53evk.h +++ b/include/configs/mx53evk.h @@ -29,7 +29,7 @@ #define CONFIG_DISPLAY_CPUINFO #define CONFIG_DISPLAY_BOARDINFO
-#define CONFIG_L2_OFF +#define CONFIG_SYS_L2CACHE_OFF
#include <asm/arch/imx-regs.h>
diff --git a/include/configs/omap4_panda.h b/include/configs/omap4_panda.h index b4e7f41..ab878f9 100644 --- a/include/configs/omap4_panda.h +++ b/include/configs/omap4_panda.h @@ -46,7 +46,7 @@ #define CONFIG_DISPLAY_BOARDINFO 1
/* Keep L2 Cache Disabled */ -#define CONFIG_L2_OFF 1 +#define CONFIG_SYS_L2CACHE_OFF 1
/* Clock Defines */ #define V_OSCK 38400000 /* Clock output from T2 */ diff --git a/include/configs/omap4_sdp4430.h b/include/configs/omap4_sdp4430.h index 584a52b..0ac407a 100644 --- a/include/configs/omap4_sdp4430.h +++ b/include/configs/omap4_sdp4430.h @@ -47,7 +47,7 @@ #define CONFIG_DISPLAY_BOARDINFO 1
/* Keep L2 Cache Disabled */ -#define CONFIG_L2_OFF 1 +#define CONFIG_SYS_L2CACHE_OFF 1
/* Clock Defines */ #define V_OSCK 38400000 /* Clock output from T2 */ diff --git a/include/configs/pleb2.h b/include/configs/pleb2.h index 9dbb406..ad82213 100644 --- a/include/configs/pleb2.h +++ b/include/configs/pleb2.h @@ -44,7 +44,7 @@ #undef CONFIG_USE_IRQ /* we don't need IRQ/FIQ stuff */
/* we will never enable dcache, because we have to setup MMU first */ -#define CONFIG_SYS_NO_DCACHE +#define CONFIG_SYS_DCACHE_OFF
/* * Size of malloc() pool diff --git a/include/configs/pxa255_idp.h b/include/configs/pxa255_idp.h index c1c7f80..e591d87 100644 --- a/include/configs/pxa255_idp.h +++ b/include/configs/pxa255_idp.h @@ -69,7 +69,7 @@ #undef CONFIG_USE_IRQ /* we don't need IRQ/FIQ stuff */
/* we will never enable dcache, because we have to setup MMU first */ -#define CONFIG_SYS_NO_DCACHE +#define CONFIG_SYS_DCACHE_OFF
/* * Size of malloc() pool diff --git a/include/configs/s5pc210_universal.h b/include/configs/s5pc210_universal.h index 5915984..bbe104b 100644 --- a/include/configs/s5pc210_universal.h +++ b/include/configs/s5pc210_universal.h @@ -43,7 +43,7 @@ #define CONFIG_DISPLAY_BOARDINFO
/* Keep L2 Cache Disabled */ -#define CONFIG_L2_OFF 1 +#define CONFIG_SYS_L2CACHE_OFF 1
#define CONFIG_SYS_SDRAM_BASE 0x40000000 #define CONFIG_SYS_TEXT_BASE 0x44800000 diff --git a/include/configs/shannon.h b/include/configs/shannon.h index c0e6643..fce4121 100644 --- a/include/configs/shannon.h +++ b/include/configs/shannon.h @@ -44,7 +44,7 @@
#undef CONFIG_USE_IRQ /* we don't need IRQ/FIQ stuff */ /* we will never enable dcache, because we have to setup MMU first */ -#define CONFIG_SYS_NO_DCACHE +#define CONFIG_SYS_DCACHE_OFF
/* * Size of malloc() pool diff --git a/include/configs/tegra2-common.h b/include/configs/tegra2-common.h index febce35..2d36d23 100644 --- a/include/configs/tegra2-common.h +++ b/include/configs/tegra2-common.h @@ -31,7 +31,7 @@ #define CONFIG_ARMCORTEXA9 /* This is an ARM V7 CPU core */ #define CONFIG_TEGRA2 /* in a NVidia Tegra2 core */ #define CONFIG_MACH_TEGRA_GENERIC /* which is a Tegra generic machine */ -#define CONFIG_L2_OFF /* No L2 cache */ +#define CONFIG_SYS_L2CACHE_OFF /* No L2 cache */
#define CONFIG_ENABLE_CORTEXA9 /* enable CPU (A9 complex) */
diff --git a/include/configs/trizepsiv.h b/include/configs/trizepsiv.h index 2512f93..ec052c4 100644 --- a/include/configs/trizepsiv.h +++ b/include/configs/trizepsiv.h @@ -49,7 +49,7 @@ #undef CONFIG_USE_IRQ /* we don't need IRQ/FIQ stuff */
/* we will never enable dcache, because we have to setup MMU first */ -#define CONFIG_SYS_NO_DCACHE +#define CONFIG_SYS_DCACHE_OFF
#define RTC
diff --git a/include/configs/vision2.h b/include/configs/vision2.h index 66795b5..50c920d 100644 --- a/include/configs/vision2.h +++ b/include/configs/vision2.h @@ -26,7 +26,7 @@
#define CONFIG_MX51 /* in a mx51 */ -#define CONFIG_L2_OFF +#define CONFIG_SYS_L2CACHE_OFF #define CONFIG_SYS_TEXT_BASE 0x97800000
#include <asm/arch/imx-regs.h> diff --git a/include/configs/xaeniax.h b/include/configs/xaeniax.h index a75c426..205e86c 100644 --- a/include/configs/xaeniax.h +++ b/include/configs/xaeniax.h @@ -51,7 +51,7 @@ #undef CONFIG_USE_IRQ /* we don't need IRQ/FIQ stuff */
/* we will never enable dcache, because we have to setup MMU first */ -#define CONFIG_SYS_NO_DCACHE +#define CONFIG_SYS_DCACHE_OFF
/* * select serial console configuration diff --git a/include/configs/xm250.h b/include/configs/xm250.h index 232baf3..39d0da7 100644 --- a/include/configs/xm250.h +++ b/include/configs/xm250.h @@ -38,7 +38,7 @@ #define CONFIG_SYS_TEXT_BASE 0x0
/* we will never enable dcache, because we have to setup MMU first */ -#define CONFIG_SYS_NO_DCACHE +#define CONFIG_SYS_DCACHE_OFF
/* * Size of malloc() pool; this lives below the uppermost 128 KiB which are diff --git a/include/configs/zylonite.h b/include/configs/zylonite.h index 1e03b01..5f153e3 100644 --- a/include/configs/zylonite.h +++ b/include/configs/zylonite.h @@ -48,7 +48,7 @@ #undef CONFIG_USE_IRQ /* we don't need IRQ/FIQ stuff */
/* we will never enable dcache, because we have to setup MMU first */ -#define CONFIG_SYS_NO_DCACHE +#define CONFIG_SYS_DCACHE_OFF
/* * Size of malloc() pool

- Enable I-cache on bootup - Enable MMU and D-cache immediately after relocation - Do necessary initialization before enabling d-cache and MMU - Changes to cleanup_before_linux() - Make changes according to the new framework
Signed-off-by: Aneesh V aneesh@ti.com --- V2: * Changes for -march=armv7a -> armv5 change * Removed the print inside the weakly linked stub function - __arm_init_before_mmu * Replaced CONFIG_SYS_NO_*CACHE with CONFIG_SYS_*CACHE_OFF --- arch/arm/cpu/armv7/cpu.c | 47 +++++++++++++++++++------------------------ arch/arm/cpu/armv7/start.S | 18 +++++++++++++++- arch/arm/lib/board.c | 6 +++++ arch/arm/lib/cache-cp15.c | 7 ++++++ arch/arm/lib/cache.c | 5 ---- 5 files changed, 51 insertions(+), 32 deletions(-)
diff --git a/arch/arm/cpu/armv7/cpu.c b/arch/arm/cpu/armv7/cpu.c index bc4238f..def9ced 100644 --- a/arch/arm/cpu/armv7/cpu.c +++ b/arch/arm/cpu/armv7/cpu.c @@ -35,13 +35,10 @@ #include <command.h> #include <asm/system.h> #include <asm/cache.h> - -static void cache_flush(void); +#include <asm/armv7.h>
int cleanup_before_linux(void) { - unsigned int i; - /* * this function is called just before we call linux * it prepares the processor for linux @@ -50,31 +47,29 @@ int cleanup_before_linux(void) */ disable_interrupts();
- /* turn off I/D-cache */ + /* + * Turn off I-cache and invalidate it + */ icache_disable(); - dcache_disable(); - - /* invalidate I-cache */ - cache_flush(); + invalidate_icache_all();
-#ifndef CONFIG_L2_OFF - /* turn off L2 cache */ - l2_cache_disable(); - /* invalidate L2 cache also */ - invalidate_dcache(get_device_type()); -#endif - i = 0; - /* mem barrier to sync up things */ - asm("mcr p15, 0, %0, c7, c10, 4": :"r"(i)); + /* + * turn off D-cache + * dcache_disable() in turn flushes the d-cache and disables MMU + */ + dcache_disable();
-#ifndef CONFIG_L2_OFF - l2_cache_enable(); -#endif + /* + * After D-cache is flushed and before it is disabled there may + * be some new valid entries brought into the cache. We are sure + * that these lines are not dirty and will not affect our execution. + * (because unwinding the call-stack and setting a bit in CP15 SCTRL + * is all we did during this. We have not pushed anything on to the + * stack. Neither have we affected any static data) + * So just invalidate the entire d-cache again to avoid coherency + * problems for kernel + */ + invalidate_dcache_all();
return 0; } - -static void cache_flush(void) -{ - asm ("mcr p15, 0, %0, c7, c5, 0": :"r" (0)); -} diff --git a/arch/arm/cpu/armv7/start.S b/arch/arm/cpu/armv7/start.S index d91ae12..0e698b6 100644 --- a/arch/arm/cpu/armv7/start.S +++ b/arch/arm/cpu/armv7/start.S @@ -255,6 +255,14 @@ clbss_l:str r2, [r0] /* clear loop... */ * initialization, now running from RAM. */ jump_2_ram: +/* + * If I-cache is enabled invalidate it + */ +#ifndef CONFIG_SYS_ICACHE_OFF + mcr p15, 0, r0, c7, c5, 0 @ invalidate icache + mcr p15, 0, r0, c7, c10, 4 @ DSB + mcr p15, 0, r0, c7, c5, 4 @ ISB +#endif ldr r0, _board_init_r_ofs adr r1, _start add lr, r0, r1 @@ -290,6 +298,9 @@ cpu_init_crit: mov r0, #0 @ set up for MCR mcr p15, 0, r0, c8, c7, 0 @ invalidate TLBs mcr p15, 0, r0, c7, c5, 0 @ invalidate icache + mcr p15, 0, r0, c7, c5, 6 @ invalidate BP array + mcr p15, 0, r0, c7, c10, 4 @ DSB + mcr p15, 0, r0, c7, c5, 4 @ ISB
/* * disable MMU stuff and caches @@ -298,7 +309,12 @@ cpu_init_crit: bic r0, r0, #0x00002000 @ clear bits 13 (--V-) bic r0, r0, #0x00000007 @ clear bits 2:0 (-CAM) orr r0, r0, #0x00000002 @ set bit 1 (--A-) Align - orr r0, r0, #0x00000800 @ set bit 12 (Z---) BTB + orr r0, r0, #0x00000800 @ set bit 11 (Z---) BTB +#ifdef CONFIG_SYS_ICACHE_OFF + bic r0, r0, #0x00001000 @ clear bit 12 (I) I-cache +#else + orr r0, r0, #0x00001000 @ set bit 12 (I) I-cache +#endif mcr p15, 0, r0, c1, c0, 0
/* diff --git a/arch/arm/lib/board.c b/arch/arm/lib/board.c index 6342e97..742cc68 100644 --- a/arch/arm/lib/board.c +++ b/arch/arm/lib/board.c @@ -464,6 +464,12 @@ void board_init_r (gd_t *id, ulong dest_addr) gd->flags |= GD_FLG_RELOC; /* tell others: relocation done */
monitor_flash_len = _end_ofs; + /* + * Enable D$: + * I$, if needed, must be already enabled in start.S + */ + dcache_enable(); + debug ("monitor flash len: %08lX\n", monitor_flash_len); board_init(); /* Setup chipselects */
diff --git a/arch/arm/lib/cache-cp15.c b/arch/arm/lib/cache-cp15.c index ba73fb9..51831a9 100644 --- a/arch/arm/lib/cache-cp15.c +++ b/arch/arm/lib/cache-cp15.c @@ -34,6 +34,12 @@
DECLARE_GLOBAL_DATA_PTR;
+void __arm_init_before_mmu(void) +{ +} +void arm_init_before_mmu(void) + __attribute__((weak, alias("__arm_init_before_mmu"))); + static void cp_delay (void) { volatile int i; @@ -65,6 +71,7 @@ static inline void mmu_setup(void) int i; u32 reg;
+ arm_init_before_mmu(); /* Set up an identity-mapping for all 4GB, rw for everyone */ for (i = 0; i < 4096; i++) page_table[i] = i << 20 | (3 << 10) | 0x12; diff --git a/arch/arm/lib/cache.c b/arch/arm/lib/cache.c index 27123cd..dc3242c 100644 --- a/arch/arm/lib/cache.c +++ b/arch/arm/lib/cache.c @@ -38,11 +38,6 @@ void __flush_cache(unsigned long start, unsigned long size) /* disable write buffer as well (page 2-22) */ asm("mcr p15, 0, %0, c7, c10, 4" : : "r" (0)); #endif -#ifdef CONFIG_OMAP34XX - void v7_flush_cache_all(void); - - v7_flush_cache_all(); -#endif return; } void flush_cache(unsigned long start, unsigned long size)

1. make sure that page table setup is not done multiple times 2. flush_dcache_all() is more appropriate while disabling cache than a range flush on the entire memory(flush_cache())
Provide a default implementation for flush_dcache_all() for backward compatibility and to avoid build issues.
Signed-off-by: Aneesh V aneesh@ti.com --- V2: * Fixed signature of flush_cache in cache.c --- arch/arm/lib/cache-cp15.c | 9 +++++++-- arch/arm/lib/cache.c | 11 +++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-)
diff --git a/arch/arm/lib/cache-cp15.c b/arch/arm/lib/cache-cp15.c index 51831a9..e6c3eae 100644 --- a/arch/arm/lib/cache-cp15.c +++ b/arch/arm/lib/cache-cp15.c @@ -92,13 +92,18 @@ static inline void mmu_setup(void) set_cr(reg | CR_M); }
+static int mmu_enabled(void) +{ + return get_cr() & CR_M; +} + /* cache_bit must be either CR_I or CR_C */ static void cache_enable(uint32_t cache_bit) { uint32_t reg;
/* The data cache is not active unless the mmu is enabled too */ - if (cache_bit == CR_C) + if ((cache_bit == CR_C) && !mmu_enabled()) mmu_setup(); reg = get_cr(); /* get control reg. */ cp_delay(); @@ -117,7 +122,7 @@ static void cache_disable(uint32_t cache_bit) return; /* if disabling data cache, disable mmu too */ cache_bit |= CR_M; - flush_cache(0, ~0); + flush_dcache_all(); } reg = get_cr(); cp_delay(); diff --git a/arch/arm/lib/cache.c b/arch/arm/lib/cache.c index dc3242c..92b61a2 100644 --- a/arch/arm/lib/cache.c +++ b/arch/arm/lib/cache.c @@ -42,3 +42,14 @@ void __flush_cache(unsigned long start, unsigned long size) } void flush_cache(unsigned long start, unsigned long size) __attribute__((weak, alias("__flush_cache"))); + +/* + * Default implementation: + * do a range flush for the entire range + */ +void __flush_dcache_all(void) +{ + flush_cache(0, ~0); +} +void flush_dcache_all(void) + __attribute__((weak, alias("__flush_dcache_all")));

PL310 is the L2$ controller from ARM used in many SoCs including the Cortex-A9 based OMAP4430
Add support for some of the key PL310 operations - Invalidate all - Invalidate range - Flush(clean & invalidate) all - Flush range
Signed-off-by: Aneesh V aneesh@ti.com --- V2: * More descriptive commit message * Changes for function pointer to weakly linked change * C struct for register accesses --- README | 6 ++ arch/arm/include/asm/pl310.h | 73 ++++++++++++++++++++++++++ arch/arm/lib/Makefile | 1 + arch/arm/lib/cache-pl310.c | 115 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 195 insertions(+), 0 deletions(-) create mode 100644 arch/arm/include/asm/pl310.h create mode 100644 arch/arm/lib/cache-pl310.c
diff --git a/README b/README index ca1520d..978e438 100644 --- a/README +++ b/README @@ -465,6 +465,12 @@ The following options need to be configured: CONFIG_SYS_DCACHE_OFF - Do not enable data cache in U-Boot CONFIG_SYS_L2CACHE_OFF- Do not enable L2 cache in U-Boot
+- Cache Configuration for ARM: + CONFIG_SYS_L2_PL310 - Enable support for ARM PL310 L2 cache + controller + CONFIG_SYS_PL310_BASE - Physical base address of PL310 + controller register space + - Serial Ports: CONFIG_PL010_SERIAL
diff --git a/arch/arm/include/asm/pl310.h b/arch/arm/include/asm/pl310.h new file mode 100644 index 0000000..fb506e6 --- /dev/null +++ b/arch/arm/include/asm/pl310.h @@ -0,0 +1,73 @@ +/* + * (C) Copyright 2010 + * Texas Instruments, <www.ti.com> + * Aneesh V aneesh@ti.com + * + * See file CREDITS for list of people who contributed to this + * project. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + */ +#ifndef _PL310_H_ +#define _PL310_H_ + +#include <linux/types.h> + +/* Register bit fields */ +#define PL310_AUX_CTRL_ASSOCIATIVITY_MASK (1 << 16) + +struct pl310_regs { + u32 pl310_cache_id; + u32 pl310_cache_type; + u32 pad1[62]; + u32 pl310_ctrl; + u32 pl310_aux_ctrl; + u32 pl310_tag_latency_ctrl; + u32 pl310_data_latency_ctrl; + u32 pad2[60]; + u32 pl310_event_cnt_ctrl; + u32 pl310_event_cnt1_cfg; + u32 pl310_event_cnt0_cfg; + u32 pl310_event_cnt1_val; + u32 pl310_event_cnt0_val; + u32 pl310_intr_mask; + u32 pl310_masked_intr_stat; + u32 pl310_raw_intr_stat; + u32 pl310_intr_clear; + u32 pad3[323]; + u32 pl310_cache_sync; + u32 pad4[15]; + u32 pl310_inv_line_pa; + u32 pad5[2]; + u32 pl310_inv_way; + u32 pad6[12]; + u32 pl310_clean_line_pa; + u32 pad7[1]; + u32 pl310_clean_line_idx; + u32 pl310_clean_way; + u32 pad8[12]; + u32 pl310_clean_inv_line_pa; + u32 pad9[1]; + u32 pl310_clean_inv_line_idx; + u32 pl310_clean_inv_way; +}; + +void pl310_inval_all(void); +void pl310_clean_inval_all(void); +void pl310_inval_range(u32 start, u32 end); +void pl310_clean_inval_range(u32 start, u32 end); + +#endif diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index f993d74..d31321a 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -40,6 +40,7 @@ COBJS-y += board.o COBJS-y += bootm.o COBJS-y += cache.o COBJS-y += cache-cp15.o +COBJS-$(CONFIG_SYS_L2_PL310) += cache-pl310.o COBJS-y += interrupts.o COBJS-y += reset.o SOBJS-$(CONFIG_USE_ARCH_MEMSET) += memset.o diff --git a/arch/arm/lib/cache-pl310.c b/arch/arm/lib/cache-pl310.c new file mode 100644 index 0000000..36c629c --- /dev/null +++ b/arch/arm/lib/cache-pl310.c @@ -0,0 +1,115 @@ +/* + * (C) Copyright 2010 + * Texas Instruments, <www.ti.com> + * Aneesh V aneesh@ti.com + * + * See file CREDITS for list of people who contributed to this + * project. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + */ +#include <linux/types.h> +#include <asm/io.h> +#include <asm/armv7.h> +#include <asm/pl310.h> +#include <config.h> + +struct pl310_regs *const pl310 = (struct pl310_regs *)CONFIG_SYS_PL310_BASE; + +static void pl310_cache_sync(void) +{ + writel(0, &pl310->pl310_cache_sync); +} + +static void pl310_background_op_all_ways(u32 *op_reg) +{ + u32 assoc_16, associativity, way_mask; + + assoc_16 = readl(&pl310->pl310_aux_ctrl) & + PL310_AUX_CTRL_ASSOCIATIVITY_MASK; + if (assoc_16) + associativity = 16; + else + associativity = 8; + + way_mask = (1 << associativity) - 1; + /* Invalidate all ways */ + writel(way_mask, op_reg); + /* Wait for all ways to be invalidated */ + while (readl(op_reg) && way_mask) + ; + pl310_cache_sync(); +} + +void v7_outer_cache_inval_all(void) +{ + pl310_background_op_all_ways(&pl310->pl310_inv_way); +} + +void v7_outer_cache_flush_all(void) +{ + pl310_background_op_all_ways(&pl310->pl310_clean_inv_way); +} + +/* Flush(clean invalidate) memory from start to stop-1 */ +void v7_outer_cache_flush_range(u32 start, u32 stop) +{ + /* PL310 currently supports only 32 bytes cache line */ + u32 pa, line_size = 32; + + /* + * Align to the beginning of cache-line - this ensures that + * the first 5 bits are 0 as required by PL310 TRM + */ + start &= ~(line_size - 1); + + for (pa = start; pa < stop; pa = pa + line_size) + writel(pa, &pl310->pl310_clean_inv_line_pa); + + pl310_cache_sync(); +} + +/* invalidate memory from start to stop-1 */ +void v7_outer_cache_inval_range(u32 start, u32 stop) +{ + /* PL310 currently supports only 32 bytes cache line */ + u32 pa, line_size = 32; + + /* + * If start address is not aligned to cache-line flush the first + * line to prevent affecting somebody else's buffer + */ + if (start & (line_size - 1)) { + v7_outer_cache_flush_range(start, start + 1); + /* move to next cache line */ + start = (start + line_size - 1) & ~(line_size - 1); + } + + /* + * If stop address is not aligned to cache-line flush the last + * line to prevent affecting somebody else's buffer + */ + if (stop & (line_size - 1)) { + v7_outer_cache_flush_range(stop, stop + 1); + /* align to the beginning of this cache line */ + stop &= ~(line_size - 1); + } + + for (pa = start; pa < stop; pa = pa + line_size) + writel(pa, &pl310->pl310_inv_line_pa); + + pl310_cache_sync(); +}

adapt omap4 to the new layered cache maintenance framework
Signed-off-by: Aneesh V aneesh@ti.com --- V2: * Changes for the function pointer to weakly linked change V4: * Replaced CONFIG_SYS_NO_*CACHE with CONFIG_SYS_*CACHE_OFF --- arch/arm/cpu/armv7/omap4/board.c | 12 ++++++++++++ arch/arm/cpu/armv7/omap4/lowlevel_init.S | 9 +++++++++ arch/arm/include/asm/arch-omap4/sys_proto.h | 2 +- include/configs/omap4_panda.h | 8 +++++--- include/configs/omap4_sdp4430.h | 8 +++++--- 5 files changed, 32 insertions(+), 7 deletions(-)
diff --git a/arch/arm/cpu/armv7/omap4/board.c b/arch/arm/cpu/armv7/omap4/board.c index fcd29a7..de4cc2a 100644 --- a/arch/arm/cpu/armv7/omap4/board.c +++ b/arch/arm/cpu/armv7/omap4/board.c @@ -127,3 +127,15 @@ int arch_cpu_init(void) set_muxconf_regs(); return 0; } + +#ifndef CONFIG_SYS_L2CACHE_OFF +void v7_outer_cache_enable(void) +{ + set_pl310_ctrl_reg(1); +} + +void v7_outer_cache_disable(void) +{ + set_pl310_ctrl_reg(0); +} +#endif diff --git a/arch/arm/cpu/armv7/omap4/lowlevel_init.S b/arch/arm/cpu/armv7/omap4/lowlevel_init.S index 026dfa4..6abfbba 100644 --- a/arch/arm/cpu/armv7/omap4/lowlevel_init.S +++ b/arch/arm/cpu/armv7/omap4/lowlevel_init.S @@ -45,3 +45,12 @@ lowlevel_init: */ bl s_init pop {ip, pc} + +.globl set_pl310_ctrl_reg +set_pl310_ctrl_reg: + PUSH {r4-r11, lr} @ save registers - ROM code may pollute + @ our registers + LDR r12, =0x102 @ Set PL310 control register - value in R0 + .word 0xe1600070 @ SMC #0 - hand assembled because -march=armv5 + @ call ROM Code API to set control register + POP {r4-r11, pc} diff --git a/arch/arm/include/asm/arch-omap4/sys_proto.h b/arch/arm/include/asm/arch-omap4/sys_proto.h index 4813e9e..4fa4f4b 100644 --- a/arch/arm/include/asm/arch-omap4/sys_proto.h +++ b/arch/arm/include/asm/arch-omap4/sys_proto.h @@ -31,11 +31,11 @@ struct omap_sysinfo { void gpmc_init(void); void watchdog_init(void); u32 get_device_type(void); -void invalidate_dcache(u32); void set_muxconf_regs(void); void sr32(void *, u32, u32, u32); u32 wait_on_value(u32, u32, void *, u32); void sdelay(unsigned long); +void set_pl310_ctrl_reg(u32 val);
extern const struct omap_sysinfo sysinfo;
diff --git a/include/configs/omap4_panda.h b/include/configs/omap4_panda.h index ab878f9..1daffb7 100644 --- a/include/configs/omap4_panda.h +++ b/include/configs/omap4_panda.h @@ -45,9 +45,6 @@ #define CONFIG_DISPLAY_CPUINFO 1 #define CONFIG_DISPLAY_BOARDINFO 1
-/* Keep L2 Cache Disabled */ -#define CONFIG_SYS_L2CACHE_OFF 1 - /* Clock Defines */ #define V_OSCK 38400000 /* Clock output from T2 */ #define V_SCLK V_OSCK @@ -235,4 +232,9 @@ CONFIG_SYS_INIT_RAM_SIZE - \ GENERATED_GBL_DATA_SIZE)
+#ifndef CONFIG_SYS_L2CACHE_OFF +#define CONFIG_SYS_L2_PL310 1 +#define CONFIG_SYS_PL310_BASE 0x48242000 +#endif + #endif /* __CONFIG_H */ diff --git a/include/configs/omap4_sdp4430.h b/include/configs/omap4_sdp4430.h index 0ac407a..68ffa87 100644 --- a/include/configs/omap4_sdp4430.h +++ b/include/configs/omap4_sdp4430.h @@ -46,9 +46,6 @@ #define CONFIG_DISPLAY_CPUINFO 1 #define CONFIG_DISPLAY_BOARDINFO 1
-/* Keep L2 Cache Disabled */ -#define CONFIG_SYS_L2CACHE_OFF 1 - /* Clock Defines */ #define V_OSCK 38400000 /* Clock output from T2 */ #define V_SCLK V_OSCK @@ -241,4 +238,9 @@ CONFIG_SYS_INIT_RAM_SIZE - \ GENERATED_GBL_DATA_SIZE)
+#ifndef CONFIG_SYS_L2CACHE_OFF +#define CONFIG_SYS_L2_PL310 1 +#define CONFIG_SYS_PL310_BASE 0x48242000 +#endif + #endif /* __CONFIG_H */

adapt omap3 to the new layered cache maintenance framework
Signed-off-by: Aneesh V aneesh@ti.com --- V2: * Changes for the function pointer to weakly linked change V4: * Minor change in the conditional compilation of L2 related functions * Replaced CONFIG_SYS_NO_*CACHE with CONFIG_SYS_*CACHE_OFF --- arch/arm/cpu/armv7/omap3/Makefile | 1 - arch/arm/cpu/armv7/omap3/board.c | 136 ++++++++++++-- arch/arm/cpu/armv7/omap3/cache.S | 263 --------------------------- arch/arm/cpu/armv7/omap3/lowlevel_init.S | 32 ++++ arch/arm/include/asm/arch-omap3/omap3.h | 20 ++ arch/arm/include/asm/arch-omap3/sys_proto.h | 10 +- 6 files changed, 176 insertions(+), 286 deletions(-) delete mode 100644 arch/arm/cpu/armv7/omap3/cache.S
diff --git a/arch/arm/cpu/armv7/omap3/Makefile b/arch/arm/cpu/armv7/omap3/Makefile index 7164d50..522bcd2 100644 --- a/arch/arm/cpu/armv7/omap3/Makefile +++ b/arch/arm/cpu/armv7/omap3/Makefile @@ -26,7 +26,6 @@ include $(TOPDIR)/config.mk LIB = $(obj)lib$(SOC).o
SOBJS := lowlevel_init.o -SOBJS += cache.o
COBJS += board.o COBJS += clock.o diff --git a/arch/arm/cpu/armv7/omap3/board.c b/arch/arm/cpu/armv7/omap3/board.c index 6c2a132..98519a9 100644 --- a/arch/arm/cpu/armv7/omap3/board.c +++ b/arch/arm/cpu/armv7/omap3/board.c @@ -37,8 +37,12 @@ #include <asm/arch/sys_proto.h> #include <asm/arch/mem.h> #include <asm/cache.h> +#include <asm/armv7.h>
+/* Declarations */ extern omap3_sysinfo sysinfo; +static void omap3_setup_aux_cr(void); +static void omap3_invalidate_l2_cache_secure(void);
/****************************************************************************** * Routine: delay @@ -166,27 +170,13 @@ void s_init(void)
try_unlock_memory();
- /* - * Right now flushing at low MPU speed. - * Need to move after clock init - */ - invalidate_dcache(get_device_type()); -#ifndef CONFIG_ICACHE_OFF - icache_enable(); -#endif + /* Errata workarounds */ + omap3_setup_aux_cr();
-#ifdef CONFIG_L2_OFF - l2_cache_disable(); -#else - l2_cache_enable(); +#ifndef CONFIG_SYS_L2CACHE_OFF + /* Invalidate L2-cache from secure mode */ + omap3_invalidate_l2_cache_secure(); #endif - /* - * Writing to AuxCR in U-boot using SMI for GP DEV - * Currently SMI in Kernel on ES2 devices seems to have an issue - * Once that is resolved, we can postpone this config to kernel - */ - if (get_device_type() == GP_DEVICE) - setup_auxcr();
set_muxconf_regs(); delay(100); @@ -292,3 +282,111 @@ int checkboard (void) return 0; } #endif /* CONFIG_DISPLAY_BOARDINFO */ + +static void omap3_emu_romcode_call(u32 service_id, u32 *parameters) +{ + u32 i, num_params = *parameters; + u32 *sram_scratch_space = (u32 *)OMAP3_PUBLIC_SRAM_SCRATCH_AREA; + + /* + * copy the parameters to an un-cached area to avoid coherency + * issues + */ + for (i = 0; i < num_params; i++) { + __raw_writel(*parameters, sram_scratch_space); + parameters++; + sram_scratch_space++; + } + + /* Now make the PPA call */ + do_omap3_emu_romcode_call(service_id, OMAP3_PUBLIC_SRAM_SCRATCH_AREA); +} + +static void omap3_update_aux_cr_secure(u32 set_bits, u32 clear_bits) +{ + u32 acr; + + /* Read ACR */ + asm volatile ("mrc p15, 0, %0, c1, c0, 1" : "=r" (acr)); + acr &= ~clear_bits; + acr |= set_bits; + + if (get_device_type() == GP_DEVICE) { + omap3_gp_romcode_call(OMAP3_GP_ROMCODE_API_WRITE_ACR, + acr); + } else { + struct emu_hal_params emu_romcode_params; + emu_romcode_params.num_params = 1; + emu_romcode_params.param1 = acr; + omap3_emu_romcode_call(OMAP3_EMU_HAL_API_WRITE_ACR, + (u32 *)&emu_romcode_params); + } +} + +static void omap3_update_aux_cr(u32 set_bits, u32 clear_bits) +{ + u32 acr; + + /* Read ACR */ + asm volatile ("mrc p15, 0, %0, c1, c0, 1" : "=r" (acr)); + acr &= ~clear_bits; + acr |= set_bits; + + /* Write ACR - affects non-secure banked bits */ + asm volatile ("mcr p15, 0, %0, c1, c0, 1" : : "r" (acr)); +} + +static void omap3_setup_aux_cr(void) +{ + /* Workaround for Cortex-A8 errata: #454179 #430973 + * Set "IBE" bit + * Set "Disable Brach Size Mispredicts" bit + * Workaround for erratum #621766 + * Enable L1NEON bit + * ACR |= (IBE | DBSM | L1NEON) => ACR |= 0xE0 + */ + omap3_update_aux_cr_secure(0xE0, 0); +} + +#ifndef CONFIG_SYS_L2CACHE_OFF +/* Invalidate the entire L2 cache from secure mode */ +static void omap3_invalidate_l2_cache_secure(void) +{ + if (get_device_type() == GP_DEVICE) { + omap3_gp_romcode_call(OMAP3_GP_ROMCODE_API_L2_INVAL, + 0); + } else { + struct emu_hal_params emu_romcode_params; + emu_romcode_params.num_params = 1; + emu_romcode_params.param1 = 0; + omap3_emu_romcode_call(OMAP3_EMU_HAL_API_L2_INVAL, + (u32 *)&emu_romcode_params); + } +} + +void v7_outer_cache_enable(void) +{ + /* Set L2EN */ + omap3_update_aux_cr_secure(0x2, 0); + + /* + * On some revisions L2EN bit is banked on some revisions it's not + * No harm in setting both banked bits(in fact this is required + * by an erratum) + */ + omap3_update_aux_cr(0x2, 0); +} + +void v7_outer_cache_disable(void) +{ + /* Clear L2EN */ + omap3_update_aux_cr_secure(0, 0x2); + + /* + * On some revisions L2EN bit is banked on some revisions it's not + * No harm in clearing both banked bits(in fact this is required + * by an erratum) + */ + omap3_update_aux_cr(0, 0x2); +} +#endif diff --git a/arch/arm/cpu/armv7/omap3/cache.S b/arch/arm/cpu/armv7/omap3/cache.S deleted file mode 100644 index cda87ba..0000000 --- a/arch/arm/cpu/armv7/omap3/cache.S +++ /dev/null @@ -1,263 +0,0 @@ -/* - * Copyright (c) 2009 Wind River Systems, Inc. - * Tom Rix Tom.Rix@windriver.com - * - * This file is based on and replaces the existing cache.c file - * The copyrights for the cache.c file are: - * - * (C) Copyright 2008 Texas Insturments - * - * (C) Copyright 2002 - * Sysgo Real-Time Solutions, GmbH <www.elinos.com> - * Marius Groeger mgroeger@sysgo.de - * - * (C) Copyright 2002 - * Gary Jennejohn, DENX Software Engineering, gj@denx.de - * - * See file CREDITS for list of people who contributed to this - * project. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA - */ - -#include <asm/arch/omap3.h> - -/* - * omap3 cache code - */ - -.align 5 -.global invalidate_dcache -.global l2_cache_enable -.global l2_cache_disable -.global setup_auxcr - -/* - * invalidate_dcache() - * - * Invalidate the whole D-cache. - * - * Corrupted registers: r0-r5, r7, r9-r11 - * - * - mm - mm_struct describing address space - */ -invalidate_dcache: - stmfd r13!, {r0 - r5, r7, r9 - r12, r14} - - mov r7, r0 @ take a backup of device type - cmp r0, #0x3 @ check if the device type is - @ GP - moveq r12, #0x1 @ set up to invalide L2 -smi: .word 0x01600070 @ Call SMI monitor (smieq) - cmp r7, #0x3 @ compare again in case its - @ lost - beq finished_inval @ if GP device, inval done - @ above - - mrc p15, 1, r0, c0, c0, 1 @ read clidr - ands r3, r0, #0x7000000 @ extract loc from clidr - mov r3, r3, lsr #23 @ left align loc bit field - beq finished_inval @ if loc is 0, then no need to - @ clean - mov r10, #0 @ start clean at cache level 0 -inval_loop1: - add r2, r10, r10, lsr #1 @ work out 3x current cache - @ level - mov r1, r0, lsr r2 @ extract cache type bits from - @ clidr - and r1, r1, #7 @ mask of the bits for current - @ cache only - cmp r1, #2 @ see what cache we have at - @ this level - blt skip_inval @ skip if no cache, or just - @ i-cache - mcr p15, 2, r10, c0, c0, 0 @ select current cache level - @ in cssr - mov r2, #0 @ operand for mcr SBZ - mcr p15, 0, r2, c7, c5, 4 @ flush prefetch buffer to - @ sych the new cssr&csidr, - @ with armv7 this is 'isb', - @ but we compile with armv5 - mrc p15, 1, r1, c0, c0, 0 @ read the new csidr - and r2, r1, #7 @ extract the length of the - @ cache lines - add r2, r2, #4 @ add 4 (line length offset) - ldr r4, =0x3ff - ands r4, r4, r1, lsr #3 @ find maximum number on the - @ way size - clz r5, r4 @ find bit position of way - @ size increment - ldr r7, =0x7fff - ands r7, r7, r1, lsr #13 @ extract max number of the - @ index size -inval_loop2: - mov r9, r4 @ create working copy of max - @ way size -inval_loop3: - orr r11, r10, r9, lsl r5 @ factor way and cache number - @ into r11 - orr r11, r11, r7, lsl r2 @ factor index number into r11 - mcr p15, 0, r11, c7, c6, 2 @ invalidate by set/way - subs r9, r9, #1 @ decrement the way - bge inval_loop3 - subs r7, r7, #1 @ decrement the index - bge inval_loop2 -skip_inval: - add r10, r10, #2 @ increment cache number - cmp r3, r10 - bgt inval_loop1 -finished_inval: - mov r10, #0 @ swith back to cache level 0 - mcr p15, 2, r10, c0, c0, 0 @ select current cache level - @ in cssr - mcr p15, 0, r10, c7, c5, 4 @ flush prefetch buffer, - @ with armv7 this is 'isb', - @ but we compile with armv5 - - ldmfd r13!, {r0 - r5, r7, r9 - r12, pc} - -l2_cache_set: - stmfd r13!, {r4 - r6, lr} - mov r5, r0 - bl get_cpu_rev - mov r4, r0 - bl get_cpu_family - @ ES2 onwards we can disable/enable L2 ourselves - cmp r0, #CPU_OMAP34XX - cmpeq r4, #CPU_3XX_ES10 - mrc 15, 0, r0, cr1, cr0, 1 - bic r0, r0, #2 - orr r0, r0, r5, lsl #1 - mcreq 15, 0, r0, cr1, cr0, 1 - @ GP Device ROM code API usage here - @ r12 = AUXCR Write function and r0 value - mov ip, #3 - @ SMCNE instruction to call ROM Code API - .word 0x11600070 - ldmfd r13!, {r4 - r6, pc} - -l2_cache_enable: - mov r0, #1 - b l2_cache_set - -l2_cache_disable: - mov r0, #0 - b l2_cache_set - -/****************************************************************************** - * Routine: setup_auxcr() - * Description: Write to AuxCR desired value using SMI. - * general use. - *****************************************************************************/ -setup_auxcr: - mrc p15, 0, r0, c0, c0, 0 @ read main ID register - and r2, r0, #0x00f00000 @ variant - and r3, r0, #0x0000000f @ revision - orr r1, r3, r2, lsr #20-4 @ combine variant and revision - mov r12, #0x3 - mrc p15, 0, r0, c1, c0, 1 - orr r0, r0, #0x10 @ Enable ASA - @ Enable L1NEON on pre-r2p1 (erratum 621766 workaround) - cmp r1, #0x21 - orrlt r0, r0, #1 << 5 - .word 0xE1600070 @ SMC - mov r12, #0x2 - mrc p15, 1, r0, c9, c0, 2 - @ Set PLD_FWD bit in L2AUXCR on pre-r2p1 (erratum 725233 workaround) - cmp r1, #0x21 - orrlt r0, r0, #1 << 27 - .word 0xE1600070 @ SMC - bx lr - -.align 5 -.global v7_flush_dcache_all -.global v7_flush_cache_all - -/* - * v7_flush_dcache_all() - * - * Flush the whole D-cache. - * - * Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode) - * - * - mm - mm_struct describing address space - */ -v7_flush_dcache_all: -# dmb @ ensure ordering with previous memory accesses - mrc p15, 1, r0, c0, c0, 1 @ read clidr - ands r3, r0, #0x7000000 @ extract loc from clidr - mov r3, r3, lsr #23 @ left align loc bit field - beq finished @ if loc is 0, then no need to clean - mov r10, #0 @ start clean at cache level 0 -loop1: - add r2, r10, r10, lsr #1 @ work out 3x current cache level - mov r1, r0, lsr r2 @ extract cache type bits from clidr - and r1, r1, #7 @ mask of the bits for current cache only - cmp r1, #2 @ see what cache we have at this level - blt skip @ skip if no cache, or just i-cache - mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr - mcr p15, 0, r10, c7, c5, 4 @ flush prefetch buffer, - @ with armv7 this is 'isb', - @ but we compile with armv5 - mrc p15, 1, r1, c0, c0, 0 @ read the new csidr - and r2, r1, #7 @ extract the length of the cache lines - add r2, r2, #4 @ add 4 (line length offset) - ldr r4, =0x3ff - ands r4, r4, r1, lsr #3 @ find maximum number on the way size - clz r5, r4 @ find bit position of way size increment - ldr r7, =0x7fff - ands r7, r7, r1, lsr #13 @ extract max number of the index size -loop2: - mov r9, r4 @ create working copy of max way size -loop3: - orr r11, r10, r9, lsl r5 @ factor way and cache number into r11 - orr r11, r11, r7, lsl r2 @ factor index number into r11 - mcr p15, 0, r11, c7, c14, 2 @ clean & invalidate by set/way - subs r9, r9, #1 @ decrement the way - bge loop3 - subs r7, r7, #1 @ decrement the index - bge loop2 -skip: - add r10, r10, #2 @ increment cache number - cmp r3, r10 - bgt loop1 -finished: - mov r10, #0 @ swith back to cache level 0 - mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr -# dsb - mcr p15, 0, r10, c7, c5, 4 @ flush prefetch buffer, - @ with armv7 this is 'isb', - @ but we compile with armv5 - mov pc, lr - -/* - * v7_flush_cache_all() - * - * Flush the entire cache system. - * The data cache flush is now achieved using atomic clean / invalidates - * working outwards from L1 cache. This is done using Set/Way based cache - * maintainance instructions. - * The instruction cache can still be invalidated back to the point of - * unification in a single instruction. - * - */ -v7_flush_cache_all: - stmfd sp!, {r0-r7, r9-r11, lr} - bl v7_flush_dcache_all - mov r0, #0 - mcr p15, 0, r0, c7, c5, 0 @ I+BTB cache invalidate - ldmfd sp!, {r0-r7, r9-r11, lr} - mov pc, lr diff --git a/arch/arm/cpu/armv7/omap3/lowlevel_init.S b/arch/arm/cpu/armv7/omap3/lowlevel_init.S index 1458072..67e8ceb 100644 --- a/arch/arm/cpu/armv7/omap3/lowlevel_init.S +++ b/arch/arm/cpu/armv7/omap3/lowlevel_init.S @@ -35,6 +35,38 @@ _TEXT_BASE: .word CONFIG_SYS_TEXT_BASE /* sdram load addr from config.mk */
+.global omap3_gp_romcode_call +omap3_gp_romcode_call: + PUSH {r4-r12, lr} @ Save all registers from ROM code! + MOV r12, r0 @ Copy the Service ID in R12 + MOV r0, r1 @ Copy parameter to R0 + mcr p15, 0, r0, c7, c10, 4 @ DSB + mcr p15, 0, r0, c7, c10, 5 @ DMB + .word 0xe1600070 @ SMC #0 to enter monitor - hand assembled + @ because we use -march=armv5 + POP {r4-r12, pc} + +/* + * Funtion for making PPA HAL API calls in secure devices + * Input: + * R0 - Service ID + * R1 - paramer list + */ +.global do_omap3_emu_romcode_call +do_omap3_emu_romcode_call: + PUSH {r4-r12, lr} @ Save all registers from ROM code! + MOV r12, r0 @ Copy the Secure Service ID in R12 + MOV r3, r1 @ Copy the pointer to va_list in R3 + MOV r1, #0 @ Process ID - 0 + MOV r2, #OMAP3_EMU_HAL_START_HAL_CRITICAL @ Copy the pointer + @ to va_list in R3 + MOV r6, #0xFF @ Indicate new Task call + mcr p15, 0, r0, c7, c10, 4 @ DSB + mcr p15, 0, r0, c7, c10, 5 @ DMB + .word 0xe1600071 @ SMC #1 to call PPA service - hand assembled + @ because we use -march=armv5 + POP {r4-r12, pc} + #if !defined(CONFIG_SYS_NAND_BOOT) && !defined(CONFIG_SYS_NAND_BOOT) /************************************************************************** * cpy_clk_code: relocates clock code into SRAM where its safer to execute diff --git a/arch/arm/include/asm/arch-omap3/omap3.h b/arch/arm/include/asm/arch-omap3/omap3.h index cc2b541..d9d49da 100644 --- a/arch/arm/include/asm/arch-omap3/omap3.h +++ b/arch/arm/include/asm/arch-omap3/omap3.h @@ -159,8 +159,14 @@ struct gpio { #define SRAM_VECT_CODE (SRAM_OFFSET0 | SRAM_OFFSET1 | \ SRAM_OFFSET2)
+#define OMAP3_PUBLIC_SRAM_BASE 0x40208000 /* Works for GP & EMU */ +#define OMAP3_PUBLIC_SRAM_END 0x40210000 + #define LOW_LEVEL_SRAM_STACK 0x4020FFFC
+/* scratch area - accessible on both EMU and GP */ +#define OMAP3_PUBLIC_SRAM_SCRATCH_AREA OMAP3_PUBLIC_SRAM_BASE + #define DEBUG_LED1 149 /* gpio */ #define DEBUG_LED2 150 /* gpio */
@@ -227,4 +233,18 @@ struct gpio {
#define OMAP3730 0x0c00
+/* + * ROM code API related flags + */ +#define OMAP3_GP_ROMCODE_API_L2_INVAL 1 +#define OMAP3_GP_ROMCODE_API_WRITE_ACR 3 + +/* + * EMU device PPA HAL related flags + */ +#define OMAP3_EMU_HAL_API_L2_INVAL 40 +#define OMAP3_EMU_HAL_API_WRITE_ACR 42 + +#define OMAP3_EMU_HAL_START_HAL_CRITICAL 4 + #endif diff --git a/arch/arm/include/asm/arch-omap3/sys_proto.h b/arch/arm/include/asm/arch-omap3/sys_proto.h index 4a28ba1..995e7cb 100644 --- a/arch/arm/include/asm/arch-omap3/sys_proto.h +++ b/arch/arm/include/asm/arch-omap3/sys_proto.h @@ -27,6 +27,11 @@ typedef struct { char *nand_string; } omap3_sysinfo;
+struct emu_hal_params { + u32 num_params; + u32 param1; +}; + void prcm_init(void); void per_clocks_enable(void);
@@ -53,9 +58,7 @@ u32 is_running_in_sdram(void); u32 is_running_in_sram(void); u32 is_running_in_flash(void); u32 get_device_type(void); -void l2cache_enable(void); void secureworld_exit(void); -void setup_auxcr(void); void try_unlock_memory(void); u32 get_boot_type(void); void invalidate_dcache(u32); @@ -66,5 +69,6 @@ void make_cs1_contiguous(void); void omap_nand_switch_ecc(int); void power_init_r(void); void dieid_num_r(void); - +void do_omap3_emu_romcode_call(u32 service_id, u32 parameters); +void omap3_gp_romcode_call(u32 service_id, u32 parameter); #endif

adapt s5pc1xx to the new layered cache maintenance framework
Signed-off-by: Aneesh V aneesh@ti.com --- V2: * Changes for the function pointer to weakly linked change V4: * Minor change in the conditional compilation of L2 related code in cache.S * Replaced CONFIG_SYS_NO_*CACHE with CONFIG_SYS_*CACHE_OFF --- arch/arm/cpu/armv7/s5pc1xx/cache.S | 88 ++----------------------- arch/arm/include/asm/arch-s5pc1xx/sys_proto.h | 3 - 2 files changed, 6 insertions(+), 85 deletions(-)
diff --git a/arch/arm/cpu/armv7/s5pc1xx/cache.S b/arch/arm/cpu/armv7/s5pc1xx/cache.S index 7734b32..c7d6221 100644 --- a/arch/arm/cpu/armv7/s5pc1xx/cache.S +++ b/arch/arm/cpu/armv7/s5pc1xx/cache.S @@ -23,98 +23,22 @@ * MA 02111-1307 USA */
-#include <asm/arch/cpu.h> - .align 5 -.global invalidate_dcache -.global l2_cache_enable -.global l2_cache_disable - -/* - * invalidate_dcache() - * Invalidate the whole D-cache. - * - * Corrupted registers: r0-r5, r7, r9-r11 - */ -invalidate_dcache: - stmfd r13!, {r0 - r5, r7, r9 - r12, r14} - - cmp r0, #0xC100 @ check if the cpu is s5pc100 - - beq finished_inval @ s5pc100 doesn't need this - @ routine - mrc p15, 1, r0, c0, c0, 1 @ read clidr - ands r3, r0, #0x7000000 @ extract loc from clidr - mov r3, r3, lsr #23 @ left align loc bit field - beq finished_inval @ if loc is 0, then no need to - @ clean - mov r10, #0 @ start clean at cache level 0 -inval_loop1: - add r2, r10, r10, lsr #1 @ work out 3x current cache - @ level - mov r1, r0, lsr r2 @ extract cache type bits from - @ clidr - and r1, r1, #7 @ mask of the bits for current - @ cache only - cmp r1, #2 @ see what cache we have at - @ this level - blt skip_inval @ skip if no cache, or just - @ i-cache - mcr p15, 2, r10, c0, c0, 0 @ select current cache level - @ in cssr - mov r2, #0 @ operand for mcr SBZ - mcr p15, 0, r2, c7, c5, 4 @ flush prefetch buffer to - @ sych the new cssr&csidr, - @ with armv7 this is 'isb', - @ but we compile with armv5 - mrc p15, 1, r1, c0, c0, 0 @ read the new csidr - and r2, r1, #7 @ extract the length of the - @ cache lines - add r2, r2, #4 @ add 4 (line length offset) - ldr r4, =0x3ff - ands r4, r4, r1, lsr #3 @ find maximum number on the - @ way size - clz r5, r4 @ find bit position of way - @ size increment - ldr r7, =0x7fff - ands r7, r7, r1, lsr #13 @ extract max number of the - @ index size -inval_loop2: - mov r9, r4 @ create working copy of max - @ way size -inval_loop3: - orr r11, r10, r9, lsl r5 @ factor way and cache number - @ into r11 - orr r11, r11, r7, lsl r2 @ factor index number into r11 - mcr p15, 0, r11, c7, c6, 2 @ invalidate by set/way - subs r9, r9, #1 @ decrement the way - bge inval_loop3 - subs r7, r7, #1 @ decrement the index - bge inval_loop2 -skip_inval: - add r10, r10, #2 @ increment cache number - cmp r3, r10 - bgt inval_loop1 -finished_inval: - mov r10, #0 @ swith back to cache level 0 - mcr p15, 2, r10, c0, c0, 0 @ select current cache level - @ in cssr - mcr p15, 0, r10, c7, c5, 4 @ flush prefetch buffer, - @ with armv7 this is 'isb', - @ but we compile with armv5 - - ldmfd r13!, {r0 - r5, r7, r9 - r12, pc}
-l2_cache_enable: +#ifndef CONFIG_SYS_L2CACHE_OFF +.global v7_outer_cache_enable +v7_outer_cache_enable: push {r0, r1, r2, lr} mrc 15, 0, r3, cr1, cr0, 1 orr r3, r3, #2 mcr 15, 0, r3, cr1, cr0, 1 pop {r1, r2, r3, pc}
-l2_cache_disable: +.global v7_outer_cache_disable +v7_outer_cache_disable: push {r0, r1, r2, lr} mrc 15, 0, r3, cr1, cr0, 1 bic r3, r3, #2 mcr 15, 0, r3, cr1, cr0, 1 pop {r1, r2, r3, pc} +#endif diff --git a/arch/arm/include/asm/arch-s5pc1xx/sys_proto.h b/arch/arm/include/asm/arch-s5pc1xx/sys_proto.h index 3078aaf..7b83c5a 100644 --- a/arch/arm/include/asm/arch-s5pc1xx/sys_proto.h +++ b/arch/arm/include/asm/arch-s5pc1xx/sys_proto.h @@ -25,8 +25,5 @@ #define _SYS_PROTO_H_
u32 get_device_type(void); -void invalidate_dcache(u32); -void l2_cache_disable(void); -void l2_cache_enable(void);
#endif
participants (9)
-
Albert ARIBAUD
-
Aneesh V
-
Eric Cooper
-
Graeme Russ
-
Minkyu Kang
-
Paulraj, Sandeep
-
Simon Glass
-
V, Aneesh
-
Wolfgang Denk