[U-Boot] [Patch v2 1/3] armv8/cache: Consolidate setting for MAIR and TCR

Move setting for MAIR and TCR to cache_v8.c, to avoid conflict with sub-architecture.
Signed-off-by: York Sun yorksun@freescale.com CC: David Feng fenghua@phytium.com.cn --- Changed since v1 v1: Acked-by: David Feng fenghua@phytium.com.cn v2: No change.
arch/arm/cpu/armv8/cache_v8.c | 22 +++++++++++++++++++--- arch/arm/cpu/armv8/start.S | 22 ---------------------- 2 files changed, 19 insertions(+), 25 deletions(-)
diff --git a/arch/arm/cpu/armv8/cache_v8.c b/arch/arm/cpu/armv8/cache_v8.c index 131fdab..7acae1b 100644 --- a/arch/arm/cpu/armv8/cache_v8.c +++ b/arch/arm/cpu/armv8/cache_v8.c @@ -45,15 +45,31 @@ static void mmu_setup(void)
/* load TTBR0 */ el = current_el(); - if (el == 1) + if (el == 1) { asm volatile("msr ttbr0_el1, %0" : : "r" (gd->arch.tlb_addr) : "memory"); - else if (el == 2) + asm volatile("msr tcr_el1, %0" + : : "r" (TCR_FLAGS | TCR_EL1_IPS_BITS) + : "memory"); + asm volatile("msr mair_el1, %0" + : : "r" (MEMORY_ATTRIBUTES) : "memory"); + } else if (el == 2) { asm volatile("msr ttbr0_el2, %0" : : "r" (gd->arch.tlb_addr) : "memory"); - else + asm volatile("msr tcr_el2, %0" + : : "r" (TCR_FLAGS | TCR_EL2_IPS_BITS) + : "memory"); + asm volatile("msr mair_el2, %0" + : : "r" (MEMORY_ATTRIBUTES) : "memory"); + } else { asm volatile("msr ttbr0_el3, %0" : : "r" (gd->arch.tlb_addr) : "memory"); + asm volatile("msr tcr_el3, %0" + : : "r" (TCR_FLAGS | TCR_EL2_IPS_BITS) + : "memory"); + asm volatile("msr mair_el3, %0" + : : "r" (MEMORY_ATTRIBUTES) : "memory"); + }
/* enable the mmu */ set_sctlr(get_sctlr() | CR_M); diff --git a/arch/arm/cpu/armv8/start.S b/arch/arm/cpu/armv8/start.S index bcc2603..90daa4d 100644 --- a/arch/arm/cpu/armv8/start.S +++ b/arch/arm/cpu/armv8/start.S @@ -128,28 +128,6 @@ ENTRY(c_runtime_cpu_setup) isb sy #endif
-#ifndef CONFIG_SYS_DCACHE_OFF - /* - * Setup MAIR and TCR. - */ - ldr x0, =MEMORY_ATTRIBUTES - ldr x1, =TCR_FLAGS - - switch_el x2, 3f, 2f, 1f -3: orr x1, x1, TCR_EL3_IPS_BITS - msr mair_el3, x0 - msr tcr_el3, x1 - b 0f -2: orr x1, x1, TCR_EL2_IPS_BITS - msr mair_el2, x0 - msr tcr_el2, x1 - b 0f -1: orr x1, x1, TCR_EL1_IPS_BITS - msr mair_el1, x0 - msr tcr_el1, x1 -0: -#endif - /* Relocate vBAR */ adr x0, vectors switch_el x1, 3f, 2f, 1f

If D-cache is enabled, we need to flush it, and invalidate i-cache before jumping to the new location. This should be done right after relocation.
Signed-off-by: York Sun yorksun@freescale.com CC: David Feng fenghua@phytium.com.cn --- Changed since v1:
v2: use macro switch_el invalidate i-cache by detection of SCTLR.IC
arch/arm/cpu/armv8/start.S | 6 ------ arch/arm/lib/relocate_64.S | 19 +++++++++++++++++++ 2 files changed, 19 insertions(+), 6 deletions(-)
diff --git a/arch/arm/cpu/armv8/start.S b/arch/arm/cpu/armv8/start.S index 90daa4d..e70c51d 100644 --- a/arch/arm/cpu/armv8/start.S +++ b/arch/arm/cpu/armv8/start.S @@ -122,12 +122,6 @@ ENDPROC(lowlevel_init) /*-----------------------------------------------------------------------*/
ENTRY(c_runtime_cpu_setup) - /* If I-cache is enabled invalidate it */ -#ifndef CONFIG_SYS_ICACHE_OFF - ic iallu /* I+BTB cache invalidate */ - isb sy -#endif - /* Relocate vBAR */ adr x0, vectors switch_el x1, 3f, 2f, 1f diff --git a/arch/arm/lib/relocate_64.S b/arch/arm/lib/relocate_64.S index 7fba9e2..5c51cae 100644 --- a/arch/arm/lib/relocate_64.S +++ b/arch/arm/lib/relocate_64.S @@ -11,6 +11,7 @@ #include <asm-offsets.h> #include <config.h> #include <linux/linkage.h> +#include <asm/macro.h>
/* * void relocate_code (addr_moni) @@ -19,6 +20,9 @@ * x0 holds the destination address. */ ENTRY(relocate_code) + stp x29, x30, [sp, #-32]! /* create a stack frame */ + mov x29, sp + str x0, [sp, #16] /* * Copy u-boot from flash to RAM */ @@ -32,6 +36,7 @@ copy_loop: stp x10, x11, [x0], #16 /* copy to target address [x0] */ cmp x1, x2 /* until source end address [x2] */ b.lo copy_loop + str x0, [sp, #24]
/* * Fix .rela.dyn relocations @@ -54,5 +59,19 @@ fixnext: b.lo fixloop
relocate_done: + switch_el x1, 3f, 2f, 1f + bl hang +3: mrs x0, sctlr_el3 + b 0f +2: mrs x0, sctlr_el2 + b 0f +1: mrs x0, sctlr_el1 +0: tbz w0, #2, 5f /* skip flushing cache if disabled */ + tbz w0, #12, 4f /* invalide i-cache is enabled */ + ic iallu /* i-cache invalidate all */ + isb sy +4: ldp x0, x1, [sp, #16] + bl __asm_flush_dcache_range +5: ldp x29, x30, [sp],#16 ret ENDPROC(relocate_code)

When SoC first boots up, we should invalidate the cache but not flush it. We can use the same function for invalid and flush mostly, with a wrapper.
Invalidating large cache can ben slow on emulator, so we postpone doing so until I-cache is enabled, and before enabling D-cache.
Signed-off-by: York Sun yorksun@freescale.com CC: David Feng fenghua@phytium.com.cn --- Changed since v1:
v2: Move calling __asm_invalidate_icache_all from start.S to icache_enable() Remove calling __asm_invalidate_tlb_all from start.S
arch/arm/cpu/armv8/cache.S | 53 +++++++++++++++++++++++++++++------------ arch/arm/cpu/armv8/cache_v8.c | 3 ++- arch/arm/cpu/armv8/start.S | 10 ++++---- arch/arm/include/asm/system.h | 1 + 4 files changed, 47 insertions(+), 20 deletions(-)
diff --git a/arch/arm/cpu/armv8/cache.S b/arch/arm/cpu/armv8/cache.S index 546a83e..249799c 100644 --- a/arch/arm/cpu/armv8/cache.S +++ b/arch/arm/cpu/armv8/cache.S @@ -19,11 +19,12 @@ * clean and invalidate one level cache. * * x0: cache level - * x1~x9: clobbered + * x1: 0 flush & invalidate, 1 invalidate only + * x2~x9: clobbered */ ENTRY(__asm_flush_dcache_level) - lsl x1, x0, #1 - msr csselr_el1, x1 /* select cache level */ + lsl x12, x0, #1 + msr csselr_el1, x12 /* select cache level */ isb /* sync change of cssidr_el1 */ mrs x6, ccsidr_el1 /* read the new cssidr_el1 */ and x2, x6, #7 /* x2 <- log2(cache line size)-4 */ @@ -35,7 +36,7 @@ ENTRY(__asm_flush_dcache_level) clz w5, w4 /* bit position of #ways */ mov x4, #0x7fff and x4, x4, x6, lsr #13 /* x4 <- max number of #sets */ - /* x1 <- cache level << 1 */ + /* x12 <- cache level << 1 */ /* x2 <- line length offset */ /* x3 <- number of cache ways - 1 */ /* x4 <- number of cache sets - 1 */ @@ -45,11 +46,14 @@ loop_set: mov x6, x3 /* x6 <- working copy of #ways */ loop_way: lsl x7, x6, x5 - orr x9, x1, x7 /* map way and level to cisw value */ + orr x9, x12, x7 /* map way and level to cisw value */ lsl x7, x4, x2 orr x9, x9, x7 /* map set number to cisw value */ - dc cisw, x9 /* clean & invalidate by set/way */ - subs x6, x6, #1 /* decrement the way */ + tbz w1, #0, 1f + dc isw, x9 + b 2f +1: dc cisw, x9 /* clean & invalidate by set/way */ +2: subs x6, x6, #1 /* decrement the way */ b.ge loop_way subs x4, x4, #1 /* decrement the set */ b.ge loop_set @@ -58,11 +62,14 @@ loop_way: ENDPROC(__asm_flush_dcache_level)
/* - * void __asm_flush_dcache_all(void) + * void __asm_flush_dcache_all(int invalidate_only) + * + * x0: 0 flush & invalidate, 1 invalidate only * * clean and invalidate all data cache by SET/WAY. */ -ENTRY(__asm_flush_dcache_all) +ENTRY(__asm_dcache_all) + mov x1, x0 dsb sy mrs x10, clidr_el1 /* read clidr_el1 */ lsr x11, x10, #24 @@ -76,13 +83,13 @@ ENTRY(__asm_flush_dcache_all) /* x15 <- return address */
loop_level: - lsl x1, x0, #1 - add x1, x1, x0 /* x0 <- tripled cache level */ - lsr x1, x10, x1 - and x1, x1, #7 /* x1 <- cache type */ - cmp x1, #2 + lsl x12, x0, #1 + add x12, x12, x0 /* x0 <- tripled cache level */ + lsr x12, x10, x12 + and x12, x12, #7 /* x12 <- cache type */ + cmp x12, #2 b.lt skip /* skip if no cache or icache */ - bl __asm_flush_dcache_level + bl __asm_flush_dcache_level /* x1 = 0 flush, 1 invalidate */ skip: add x0, x0, #1 /* increment cache level */ cmp x11, x0 @@ -96,8 +103,24 @@ skip:
finished: ret +ENDPROC(__asm_dcache_all) + +ENTRY(__asm_flush_dcache_all) + mov x16, lr + mov x0, #0 + bl __asm_dcache_all + mov lr, x16 + ret ENDPROC(__asm_flush_dcache_all)
+ENTRY(__asm_invalidate_dcache_all) + mov x16, lr + mov x0, #0xffff + bl __asm_dcache_all + mov lr, x16 + ret +ENDPROC(__asm_invalidate_dcache_all) + /* * void __asm_flush_dcache_range(start, end) * diff --git a/arch/arm/cpu/armv8/cache_v8.c b/arch/arm/cpu/armv8/cache_v8.c index 7acae1b..a96ecda 100644 --- a/arch/arm/cpu/armv8/cache_v8.c +++ b/arch/arm/cpu/armv8/cache_v8.c @@ -80,7 +80,7 @@ static void mmu_setup(void) */ void invalidate_dcache_all(void) { - __asm_flush_dcache_all(); + __asm_invalidate_dcache_all(); }
/* @@ -177,6 +177,7 @@ int dcache_status(void)
void icache_enable(void) { + __asm_invalidate_icache_all(); set_sctlr(get_sctlr() | CR_I); }
diff --git a/arch/arm/cpu/armv8/start.S b/arch/arm/cpu/armv8/start.S index e70c51d..4f95289 100644 --- a/arch/arm/cpu/armv8/start.S +++ b/arch/arm/cpu/armv8/start.S @@ -64,10 +64,12 @@ reset: msr cpacr_el1, x0 /* Enable FP/SIMD */ 0:
- /* Cache/BPB/TLB Invalidate */ - bl __asm_flush_dcache_all /* dCache clean&invalidate */ - bl __asm_invalidate_icache_all /* iCache invalidate */ - bl __asm_invalidate_tlb_all /* invalidate TLBs */ + /* + * Cache/BPB/TLB Invalidate + * i-cache is invalidated before enabled in icache_enable() + * tlb is invalidated before mmu is enabled in dcache_enable() + * d-cache is invalidated before enabled in dcache_enable() + */
/* Processor specific initialization */ bl lowlevel_init diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h index 4178f8c..74ee9a4 100644 --- a/arch/arm/include/asm/system.h +++ b/arch/arm/include/asm/system.h @@ -66,6 +66,7 @@ static inline void set_sctlr(unsigned int val) }
void __asm_flush_dcache_all(void); +void __asm_invalidate_dcache_all(void); void __asm_flush_dcache_range(u64 start, u64 end); void __asm_invalidate_tlb_all(void); void __asm_invalidate_icache_all(void);

Hi York,
On Wed, 26 Feb 2014 13:26:02 -0800, York Sun yorksun@freescale.com wrote:
Move setting for MAIR and TCR to cache_v8.c, to avoid conflict with sub-architecture.
Signed-off-by: York Sun yorksun@freescale.com CC: David Feng fenghua@phytium.com.cn
Changed since v1 v1: Acked-by: David Feng fenghua@phytium.com.cn v2: No change.
Didn't a v3 already hit the list, and shouldn't this series be v4?
Amicalement,

On 02/26/2014 01:53 PM, Albert ARIBAUD wrote:
Hi York,
On Wed, 26 Feb 2014 13:26:02 -0800, York Sun yorksun@freescale.com wrote:
Move setting for MAIR and TCR to cache_v8.c, to avoid conflict with sub-architecture.
Signed-off-by: York Sun yorksun@freescale.com CC: David Feng fenghua@phytium.com.cn
Changed since v1 v1: Acked-by: David Feng fenghua@phytium.com.cn v2: No change.
Didn't a v3 already hit the list, and shouldn't this series be v4?
Really? I don't recall. Did I leak an internal review patch to the list? Previous version is http://patchwork.ozlabs.org/patch/319067/
York

Hi York,
On Wed, 26 Feb 2014 13:58:20 -0800, York Sun yorksun@freescale.com wrote:
On 02/26/2014 01:53 PM, Albert ARIBAUD wrote:
Hi York,
On Wed, 26 Feb 2014 13:26:02 -0800, York Sun yorksun@freescale.com wrote:
Move setting for MAIR and TCR to cache_v8.c, to avoid conflict with sub-architecture.
Signed-off-by: York Sun yorksun@freescale.com CC: David Feng fenghua@phytium.com.cn
Changed since v1 v1: Acked-by: David Feng fenghua@phytium.com.cn v2: No change.
Didn't a v3 already hit the list, and shouldn't this series be v4?
Really? I don't recall. Did I leak an internal review patch to the list? Previous version is http://patchwork.ozlabs.org/patch/319067/
My mistake (and second one yesterday evening). V2 is fine.
York
Amicalement,

Hi York,
On Wed, 26 Feb 2014 13:26:02 -0800, York Sun yorksun@freescale.com wrote:
Move setting for MAIR and TCR to cache_v8.c, to avoid conflict with sub-architecture.
Signed-off-by: York Sun yorksun@freescale.com CC: David Feng fenghua@phytium.com.cn
Whole series applied to u-boot-arm/master, thanks!
Amicalement,
participants (2)
-
Albert ARIBAUD
-
York Sun