[U-Boot] [Patch v3 1/4] Added 64-bit MMIO accessors for ARMv8

From: "J. German Rivera" German.Rivera@freescale.com
This is needed for accessing peripherals with 64-bit MMIO registers, from ARMv8 processors.
Signed-off-by: J. German Rivera German.Rivera@freescale.com --- Change log: v3: No change. The blank line mentioned in review is in the other patch.
arch/arm/include/asm/io.h | 8 ++++++++ 1 file changed, 8 insertions(+)
diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h index 6a1f05a..95528dd 100644 --- a/arch/arm/include/asm/io.h +++ b/arch/arm/include/asm/io.h @@ -70,10 +70,12 @@ static inline phys_addr_t virt_to_phys(void * vaddr) #define __arch_getb(a) (*(volatile unsigned char *)(a)) #define __arch_getw(a) (*(volatile unsigned short *)(a)) #define __arch_getl(a) (*(volatile unsigned int *)(a)) +#define __arch_getq(a) (*(volatile unsigned long long *)(a))
#define __arch_putb(v,a) (*(volatile unsigned char *)(a) = (v)) #define __arch_putw(v,a) (*(volatile unsigned short *)(a) = (v)) #define __arch_putl(v,a) (*(volatile unsigned int *)(a) = (v)) +#define __arch_putq(v,a) (*(volatile unsigned long long *)(a) = (v))
extern inline void __raw_writesb(unsigned long addr, const void *data, int bytelen) @@ -123,10 +125,12 @@ extern inline void __raw_readsl(unsigned long addr, void *data, int longlen) #define __raw_writeb(v,a) __arch_putb(v,a) #define __raw_writew(v,a) __arch_putw(v,a) #define __raw_writel(v,a) __arch_putl(v,a) +#define __raw_writeq(v,a) __arch_putq(v,a)
#define __raw_readb(a) __arch_getb(a) #define __raw_readw(a) __arch_getw(a) #define __raw_readl(a) __arch_getl(a) +#define __raw_readq(a) __arch_getq(a)
/* * TODO: The kernel offers some more advanced versions of barriers, it might @@ -139,10 +143,12 @@ extern inline void __raw_readsl(unsigned long addr, void *data, int longlen) #define writeb(v,c) ({ u8 __v = v; __iowmb(); __arch_putb(__v,c); __v; }) #define writew(v,c) ({ u16 __v = v; __iowmb(); __arch_putw(__v,c); __v; }) #define writel(v,c) ({ u32 __v = v; __iowmb(); __arch_putl(__v,c); __v; }) +#define writeq(v,c) ({ u64 __v = v; __iowmb(); __arch_putq(__v,c); __v; })
#define readb(c) ({ u8 __v = __arch_getb(c); __iormb(); __v; }) #define readw(c) ({ u16 __v = __arch_getw(c); __iormb(); __v; }) #define readl(c) ({ u32 __v = __arch_getl(c); __iormb(); __v; }) +#define readq(c) ({ u64 __v = __arch_getq(c); __iormb(); __v; })
/* * The compiler seems to be incapable of optimising constants @@ -168,9 +174,11 @@ extern inline void __raw_readsl(unsigned long addr, void *data, int longlen) #define out_arch(type,endian,a,v) __raw_write##type(cpu_to_##endian(v),a) #define in_arch(type,endian,a) endian##_to_cpu(__raw_read##type(a))
+#define out_le64(a,v) out_arch(q,le64,a,v) #define out_le32(a,v) out_arch(l,le32,a,v) #define out_le16(a,v) out_arch(w,le16,a,v)
+#define in_le64(a) in_arch(q,le64,a) #define in_le32(a) in_arch(l,le32,a) #define in_le16(a) in_arch(w,le16,a)

Freescale LayerScape with Chassis Generation 3 is a set of SoCs with ARMv8 cores and 3rd generation of Chassis. We use different MMU setup to support memory map and cache attribute for these SoCs. MMU and cache are enabled very early to bootst performance, especially for early development on emulators. After u-boot relocates to DDR, a new MMU table with QBMan cache access is created in DDR. SMMU pagesize is set in SMMU_sACR register. Both DDR3 and DDR4 are supported.
Signed-off-by: York Sun yorksun@freescale.com Signed-off-by: Varun Sethi Varun.Sethi@freescale.com Signed-off-by: Arnab Basu arnab.basu@freescale.com --- Change log: v3: Remove blank lines at the of files Fix cluster PLL GSR register for accessing beyond array size Update final MMU table to support QBMan memory with cache Set SMMU pagesize in SMMU_sACR register in lowlevel init. Add DDR4 support Remove forcing L3 cache flusing Update GICv3 redistributor base address
Some of these changes are caused by model change.
arch/arm/cpu/armv8/cache_v8.c | 7 +- arch/arm/cpu/armv8/fsl-lsch3/Makefile | 10 + arch/arm/cpu/armv8/fsl-lsch3/README | 10 + arch/arm/cpu/armv8/fsl-lsch3/cpu.c | 474 +++++++++++++++++++++ arch/arm/cpu/armv8/fsl-lsch3/cpu.h | 7 + arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S | 65 +++ arch/arm/cpu/armv8/fsl-lsch3/speed.c | 176 ++++++++ arch/arm/cpu/armv8/fsl-lsch3/speed.h | 7 + arch/arm/cpu/armv8/fsl-lsch3/timer.c | 62 +++ arch/arm/include/asm/arch-fsl-lsch3/clock.h | 23 + arch/arm/include/asm/arch-fsl-lsch3/config.h | 65 +++ arch/arm/include/asm/arch-fsl-lsch3/gpio.h | 9 + arch/arm/include/asm/arch-fsl-lsch3/immap_lsch3.h | 116 +++++ arch/arm/include/asm/arch-fsl-lsch3/imx-regs.h | 13 + arch/arm/include/asm/arch-fsl-lsch3/mmu.h | 10 + arch/arm/include/asm/config.h | 4 + arch/arm/include/asm/system.h | 2 + drivers/i2c/mxc_i2c.c | 5 + include/common.h | 5 +- 19 files changed, 1066 insertions(+), 4 deletions(-) create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/Makefile create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/README create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/cpu.c create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/cpu.h create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/speed.c create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/speed.h create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/timer.c create mode 100644 arch/arm/include/asm/arch-fsl-lsch3/clock.h create mode 100644 arch/arm/include/asm/arch-fsl-lsch3/config.h create mode 100644 arch/arm/include/asm/arch-fsl-lsch3/gpio.h create mode 100644 arch/arm/include/asm/arch-fsl-lsch3/immap_lsch3.h create mode 100644 arch/arm/include/asm/arch-fsl-lsch3/imx-regs.h create mode 100644 arch/arm/include/asm/arch-fsl-lsch3/mmu.h
diff --git a/arch/arm/cpu/armv8/cache_v8.c b/arch/arm/cpu/armv8/cache_v8.c index a96ecda..c47acba 100644 --- a/arch/arm/cpu/armv8/cache_v8.c +++ b/arch/arm/cpu/armv8/cache_v8.c @@ -83,12 +83,17 @@ void invalidate_dcache_all(void) __asm_invalidate_dcache_all(); }
+void __weak flush_l3_cache(void) +{ +} + /* * Performs a clean & invalidation of the entire data cache at all levels */ void flush_dcache_all(void) { __asm_flush_dcache_all(); + flush_l3_cache(); }
/* @@ -221,7 +226,7 @@ void invalidate_icache_all(void) * Enable dCache & iCache, whether cache is actually enabled * depend on CONFIG_SYS_DCACHE_OFF and CONFIG_SYS_ICACHE_OFF */ -void enable_caches(void) +void __weak enable_caches(void) { icache_enable(); dcache_enable(); diff --git a/arch/arm/cpu/armv8/fsl-lsch3/Makefile b/arch/arm/cpu/armv8/fsl-lsch3/Makefile new file mode 100644 index 0000000..4b859cf --- /dev/null +++ b/arch/arm/cpu/armv8/fsl-lsch3/Makefile @@ -0,0 +1,10 @@ +# +# Copyright 2014, Freescale Semiconductor +# +# SPDX-License-Identifier: GPL-2.0+ +# + +obj-y += cpu.o +obj-y += timer.o +obj-y += lowlevel.o +obj-y += speed.o diff --git a/arch/arm/cpu/armv8/fsl-lsch3/README b/arch/arm/cpu/armv8/fsl-lsch3/README new file mode 100644 index 0000000..de34a91 --- /dev/null +++ b/arch/arm/cpu/armv8/fsl-lsch3/README @@ -0,0 +1,10 @@ +# +# Copyright 2014 Freescale Semiconductor +# +# SPDX-License-Identifier: GPL-2.0+ +# + +Freescale LayerScape with Chassis Generation 3 + +This architecture supports Freescale ARMv8 SoCs with Chassis generation 3, +for example LS2100A. diff --git a/arch/arm/cpu/armv8/fsl-lsch3/cpu.c b/arch/arm/cpu/armv8/fsl-lsch3/cpu.c new file mode 100644 index 0000000..2780390 --- /dev/null +++ b/arch/arm/cpu/armv8/fsl-lsch3/cpu.c @@ -0,0 +1,474 @@ +/* + * Copyright 2014 Freescale Semiconductor, Inc. + * + * SPDX-License-Identifier: GPL-2.0+ + */ + +#include <common.h> +#include <asm/io.h> +#include <asm/system.h> +#include <asm/armv8/mmu.h> +#include <asm/io.h> +#include <asm/arch-fsl-lsch3/immap_lsch3.h> +#include "cpu.h" +#include "speed.h" + +DECLARE_GLOBAL_DATA_PTR; + +#ifndef CONFIG_SYS_DCACHE_OFF +/* + * To start MMU before DDR is available, we create MMU table in SRAM. + * The base address of SRAM is CONFIG_SYS_FSL_OCRAM_BASE. We use three + * levels of translation tables here to cover 40-bit address space. + * We use 4KB granule size, with 40 bits physical address, T0SZ=24 + * Level 0 IA[39], table address @0 + * Level 1 IA[31:30], table address @01000, 0x2000 + * Level 2 IA[29:21], table address @0x3000 + */ + +#define EARLY_SECTION_SHIFT_L0 39UL +#define EARLY_SECTION_SHIFT_L1 30UL +#define EARLY_SECTION_SHIFT_L2 21UL +#define EARLY_BLOCK_SIZE_L0 0x8000000000UL +#define EARLY_BLOCK_SIZE_L1 (1 << EARLY_SECTION_SHIFT_L1) +#define EARLY_BLOCK_SIZE_L2 (1 << EARLY_SECTION_SHIFT_L2) +#define CONFIG_SYS_IFC_BASE 0x30000000 +#define CONFIG_SYS_IFC_SIZE 0x10000000 +#define CONFIG_SYS_IFC_BASE2 0x500000000 +#define CONFIG_SYS_IFC_SIZE2 0x100000000 +#define TCR_EL2_PS_40BIT (2 << 16) +#define EARLY_VA_BITS (40) +#define EARLY_TCR (TCR_TG0_4K | \ + TCR_EL2_PS_40BIT | \ + TCR_SHARED_NON | \ + TCR_ORGN_NC | \ + TCR_IRGN_NC | \ + TCR_T0SZ(EARLY_VA_BITS)) + +/* + * Final MMU + * Let's start from the same layout as early MMU and modify as needed. + * IFC regions will be cache-inhibit. + */ +#define FINAL_SECTION_SHIFT_L0 39UL +#define FINAL_SECTION_SHIFT_L1 30UL +#define FINAL_SECTION_SHIFT_L2 21UL +#define FINAL_BLOCK_SIZE_L0 0x8000000000UL +#define FINAL_BLOCK_SIZE_L1 (1 << FINAL_SECTION_SHIFT_L1) +#define FINAL_BLOCK_SIZE_L2 (1 << FINAL_SECTION_SHIFT_L2) +#define FINAL_QBMAN_CACHED_MEM 0x818000000UL +#define FINAL_QBMAN_CACHED_SIZE 0x4000000 +#define TCR_EL2_PS_40BIT (2 << 16) +#define FINAL_VA_BITS (40) +#define FINAL_TCR (TCR_TG0_4K | \ + TCR_EL2_PS_40BIT | \ + TCR_SHARED_NON | \ + TCR_ORGN_NC | \ + TCR_IRGN_NC | \ + TCR_T0SZ(FINAL_VA_BITS)) + + +static void set_pgtable_section(u64 *page_table, u64 index, u64 section, + u8 memory_type) +{ + u64 value; + + value = section | PMD_TYPE_SECT | PMD_SECT_AF; + value |= PMD_ATTRINDX(memory_type); + page_table[index] = value; +} + +static inline void early_mmu_setup(void) +{ + int el; + u64 i; + u64 section_l1t0, section_l1t1, section_l2; + u64 *level0_table = (u64 *)CONFIG_SYS_FSL_OCRAM_BASE; + u64 *level1_table_0 = (u64 *)(CONFIG_SYS_FSL_OCRAM_BASE + 0x1000); + u64 *level1_table_1 = (u64 *)(CONFIG_SYS_FSL_OCRAM_BASE + 0x2000); + u64 *level2_table = (u64 *)(CONFIG_SYS_FSL_OCRAM_BASE + 0x3000); + + + level0_table[0] = + (u64)level1_table_0 | PMD_TYPE_TABLE; + level0_table[1] = + (u64)level1_table_1 | PMD_TYPE_TABLE; + + /* + * set level 1 table 0 to cache_inhibit, covering 0 to 512GB + * set level 1 table 1 to cache enabled, covering 512GB to 1TB + * set level 2 table to cache-inhibit, covering 0 to 1GB + */ + section_l1t0 = 0; + section_l1t1 = EARLY_BLOCK_SIZE_L0; + section_l2 = 0; + for (i = 0; i < 512; i++) { + set_pgtable_section(level1_table_0, i, section_l1t0, + MT_DEVICE_NGNRNE); + set_pgtable_section(level1_table_1, i, section_l1t1, + MT_NORMAL); + set_pgtable_section(level2_table, i, section_l2, + MT_DEVICE_NGNRNE); + section_l1t0 += EARLY_BLOCK_SIZE_L1; + section_l1t1 += EARLY_BLOCK_SIZE_L1; + section_l2 += EARLY_BLOCK_SIZE_L2; + } + + level1_table_0[0] = + (u64)level2_table | PMD_TYPE_TABLE; + level1_table_0[1] = + 0x40000000 | PMD_SECT_AF | PMD_TYPE_SECT | + PMD_ATTRINDX(MT_DEVICE_NGNRNE); + level1_table_0[2] = + 0x80000000 | PMD_SECT_AF | PMD_TYPE_SECT | + PMD_ATTRINDX(MT_NORMAL); + level1_table_0[3] = + 0xc0000000 | PMD_SECT_AF | PMD_TYPE_SECT | + PMD_ATTRINDX(MT_NORMAL); + + /* Rewrite table to enable cache */ + set_pgtable_section(level2_table, + CONFIG_SYS_FSL_OCRAM_BASE >> EARLY_SECTION_SHIFT_L2, + CONFIG_SYS_FSL_OCRAM_BASE, + MT_NORMAL); + for (i = CONFIG_SYS_IFC_BASE >> EARLY_SECTION_SHIFT_L2; + i < (CONFIG_SYS_IFC_BASE + CONFIG_SYS_IFC_SIZE) + >> EARLY_SECTION_SHIFT_L2; i++) { + section_l2 = i << EARLY_SECTION_SHIFT_L2; + set_pgtable_section(level2_table, i, + section_l2, MT_NORMAL); + } + + el = current_el(); + if (el == 1) { + asm volatile("dsb sy;isb"); + asm volatile("msr ttbr0_el1, %0" + : : "r" ((u64)level0_table) : "memory"); + asm volatile("msr tcr_el1, %0" + : : "r" (EARLY_TCR) : "memory"); + asm volatile("msr mair_el1, %0" + : : "r" (MEMORY_ATTRIBUTES) : "memory"); + } else if (el == 2) { + asm volatile("dsb sy;isb"); + asm volatile("msr ttbr0_el2, %0" + : : "r" ((u64)level0_table) : "memory"); + asm volatile("msr tcr_el2, %0" + : : "r" (EARLY_TCR) : "memory"); + asm volatile("msr mair_el2, %0" + : : "r" (MEMORY_ATTRIBUTES) : "memory"); + } else if (el == 3) { + asm volatile("dsb sy;isb"); + asm volatile("msr ttbr0_el3, %0" + : : "r" ((u64)level0_table) : "memory"); + asm volatile("msr tcr_el3, %0" + : : "r" (EARLY_TCR) : "memory"); + asm volatile("msr mair_el3, %0" + : : "r" (MEMORY_ATTRIBUTES) : "memory"); + } else { + hang(); + } + + set_sctlr(get_sctlr() | CR_M); +} + +static inline void final_mmu_setup(void) +{ + int el; + u64 i, tbl_base, tbl_limit, section_base; + u64 section_l1t0, section_l1t1, section_l2; + u64 *level0_table = (u64 *)gd->arch.tlb_addr; + u64 *level1_table_0 = (u64 *)(gd->arch.tlb_addr + 0x1000); + u64 *level1_table_1 = (u64 *)(gd->arch.tlb_addr + 0x2000); + u64 *level2_table_0 = (u64 *)(gd->arch.tlb_addr + 0x3000); + u64 *level2_table_1 = (u64 *)(gd->arch.tlb_addr + 0x4000); + + + level0_table[0] = + (u64)level1_table_0 | PMD_TYPE_TABLE; + level0_table[1] = + (u64)level1_table_1 | PMD_TYPE_TABLE; + + /* + * set level 1 table 0 to cache_inhibit, covering 0 to 512GB + * set level 1 table 1 to cache enabled, covering 512GB to 1TB + * set level 2 table 0 to cache-inhibit, covering 0 to 1GB + */ + section_l1t0 = 0; + section_l1t1 = FINAL_BLOCK_SIZE_L0; + section_l2 = 0; + for (i = 0; i < 512; i++) { + set_pgtable_section(level1_table_0, i, section_l1t0, + MT_DEVICE_NGNRNE); + set_pgtable_section(level1_table_1, i, section_l1t1, + MT_NORMAL); + set_pgtable_section(level2_table_0, i, section_l2, + MT_DEVICE_NGNRNE); + section_l1t0 += FINAL_BLOCK_SIZE_L1; + section_l1t1 += FINAL_BLOCK_SIZE_L1; + section_l2 += FINAL_BLOCK_SIZE_L2; + } + + level1_table_0[0] = + (u64)level2_table_0 | PMD_TYPE_TABLE; + level1_table_0[2] = + 0x80000000 | PMD_SECT_AF | PMD_TYPE_SECT | + PMD_ATTRINDX(MT_NORMAL); + level1_table_0[3] = + 0xc0000000 | PMD_SECT_AF | PMD_TYPE_SECT | + PMD_ATTRINDX(MT_NORMAL); + + /* Rewrite table to enable cache */ + set_pgtable_section(level2_table_0, + CONFIG_SYS_FSL_OCRAM_BASE >> FINAL_SECTION_SHIFT_L2, + CONFIG_SYS_FSL_OCRAM_BASE, + MT_NORMAL); + + /* + * Fill in other part of tables if cache is needed + * If finer granularity than 1GB is needed, sub table + * should be created. + */ + section_base = FINAL_QBMAN_CACHED_MEM & ~(FINAL_BLOCK_SIZE_L1 - 1); + i = section_base >> FINAL_SECTION_SHIFT_L1; + level1_table_0[i] = (u64)level2_table_1 | PMD_TYPE_TABLE; + section_l2 = section_base; + for (i = 0; i < 512; i++) { + set_pgtable_section(level2_table_1, i, section_l2, + MT_DEVICE_NGNRNE); + section_l2 += FINAL_BLOCK_SIZE_L2; + } + tbl_base = FINAL_QBMAN_CACHED_MEM & (FINAL_BLOCK_SIZE_L1 - 1); + tbl_limit = (FINAL_QBMAN_CACHED_MEM + FINAL_QBMAN_CACHED_SIZE) & + (FINAL_BLOCK_SIZE_L1 - 1); + for (i = tbl_base >> FINAL_SECTION_SHIFT_L2; + i < tbl_limit >> FINAL_SECTION_SHIFT_L2; i++) { + section_l2 = section_base + (i << FINAL_SECTION_SHIFT_L2); + set_pgtable_section(level2_table_1, i, + section_l2, MT_NORMAL); + } + + el = current_el(); + if (el == 1) { + asm volatile("dsb sy;isb"); + asm volatile("msr ttbr0_el1, %0" + : : "r" ((u64)level0_table) : "memory"); + asm volatile("msr tcr_el1, %0" + : : "r" (FINAL_TCR) : "memory"); + asm volatile("msr mair_el1, %0" + : : "r" (MEMORY_ATTRIBUTES) : "memory"); + } else if (el == 2) { + asm volatile("dsb sy;isb"); + asm volatile("msr ttbr0_el2, %0" + : : "r" ((u64)level0_table) : "memory"); + asm volatile("msr tcr_el2, %0" + : : "r" (FINAL_TCR) : "memory"); + asm volatile("msr mair_el2, %0" + : : "r" (MEMORY_ATTRIBUTES) : "memory"); + } else if (el == 3) { + asm volatile("dsb sy;isb"); + asm volatile("msr ttbr0_el3, %0" + : : "r" ((u64)level0_table) : "memory"); + asm volatile("msr tcr_el3, %0" + : : "r" (FINAL_TCR) : "memory"); + asm volatile("msr mair_el3, %0" + : : "r" (MEMORY_ATTRIBUTES) : "memory"); + } else { + hang(); + } + + set_sctlr(get_sctlr() | CR_M); +} + +int arch_cpu_init(void) +{ + icache_enable(); + __asm_invalidate_dcache_all(); + __asm_invalidate_tlb_all(); + early_mmu_setup(); + set_sctlr(get_sctlr() | CR_C); + return 0; +} + +/* + * flush_l3_cache + * Dickens L3 cache can be flushed by transitioning from FAM to SFONLY power + * state, by writing to HP-F P-state request register. + */ +#define HNF0_PSTATE_REQ 0x04200010 +#define HNF1_PSTATE_REQ 0x04210010 +#define HNF2_PSTATE_REQ 0x04220010 +#define HNF3_PSTATE_REQ 0x04230010 +#define HNF4_PSTATE_REQ 0x04240010 +#define HNF5_PSTATE_REQ 0x04250010 +#define HNF6_PSTATE_REQ 0x04260010 +#define HNF7_PSTATE_REQ 0x04270010 +#define HNFPSTAT_MASK (0xFFFFFFFFFFFFFFFC) +#define HNFPSTAT_FAM 0x3 +#define HNFPSTAT_SFONLY 0x01 + +static void hnf_pstate_req(u64 *ptr, u64 state) +{ + int timeout = 1000; + out_le64(ptr, (in_le64(ptr) & HNFPSTAT_MASK) | (state & 0x3)); + ptr++; + /* checking if the transition is completed */ + while (timeout > 0) { + if (((in_le64(ptr) & 0x0c) >> 2) == (state & 0x3)) + break; + udelay(100); + timeout--; + } +} + +void flush_l3_cache(void) +{ + hnf_pstate_req((u64 *)HNF0_PSTATE_REQ, HNFPSTAT_SFONLY); + hnf_pstate_req((u64 *)HNF1_PSTATE_REQ, HNFPSTAT_SFONLY); + hnf_pstate_req((u64 *)HNF2_PSTATE_REQ, HNFPSTAT_SFONLY); + hnf_pstate_req((u64 *)HNF3_PSTATE_REQ, HNFPSTAT_SFONLY); + hnf_pstate_req((u64 *)HNF4_PSTATE_REQ, HNFPSTAT_SFONLY); + hnf_pstate_req((u64 *)HNF5_PSTATE_REQ, HNFPSTAT_SFONLY); + hnf_pstate_req((u64 *)HNF6_PSTATE_REQ, HNFPSTAT_SFONLY); + hnf_pstate_req((u64 *)HNF7_PSTATE_REQ, HNFPSTAT_SFONLY); + hnf_pstate_req((u64 *)HNF0_PSTATE_REQ, HNFPSTAT_FAM); + hnf_pstate_req((u64 *)HNF1_PSTATE_REQ, HNFPSTAT_FAM); + hnf_pstate_req((u64 *)HNF2_PSTATE_REQ, HNFPSTAT_FAM); + hnf_pstate_req((u64 *)HNF3_PSTATE_REQ, HNFPSTAT_FAM); + hnf_pstate_req((u64 *)HNF4_PSTATE_REQ, HNFPSTAT_FAM); + hnf_pstate_req((u64 *)HNF5_PSTATE_REQ, HNFPSTAT_FAM); + hnf_pstate_req((u64 *)HNF6_PSTATE_REQ, HNFPSTAT_FAM); + hnf_pstate_req((u64 *)HNF7_PSTATE_REQ, HNFPSTAT_FAM); +} + +/* + * This function is called from lib/board.c. + * It recreates MMU table in main memory. MMU and d-cache are enabled earlier. + * There is no need to disable d-cache for this operation. + */ +void enable_caches(void) +{ + final_mmu_setup(); + flush_dcache_range(gd->arch.tlb_addr, + gd->arch.tlb_addr + gd->arch.tlb_size); + __asm_invalidate_tlb_all(); +} +#endif + +static inline u32 init_type(u32 cluster, int init_id) +{ + struct ccsr_gur *gur = (void *)(CONFIG_SYS_FSL_GUTS_ADDR); + u32 idx = (cluster >> (init_id * 8)) & TP_CLUSTER_INIT_MASK; + u32 type = in_le32(&gur->tp_ityp[idx]); + + if (type & TP_ITYP_AV) + return type; + + return 0; +} + +u32 cpu_mask(void) +{ + struct ccsr_gur __iomem *gur = (void *)(CONFIG_SYS_FSL_GUTS_ADDR); + int i = 0, count = 0; + u32 cluster, type, mask = 0; + + do { + int j; + cluster = in_le32(&gur->tp_cluster[i].lower); + for (j = 0; j < TP_INIT_PER_CLUSTER; j++) { + type = init_type(cluster, j); + if (type) { + if (TP_ITYP_TYPE(type) == TP_ITYP_TYPE_ARM) + mask |= 1 << count; + count++; + } + } + i++; + } while ((cluster & TP_CLUSTER_EOC) != TP_CLUSTER_EOC); + + return mask; +} + +/* + * Return the number of cores on this SOC. + */ +int cpu_numcores(void) +{ + return hweight32(cpu_mask()); +} + +int fsl_qoriq_core_to_cluster(unsigned int core) +{ + struct ccsr_gur __iomem *gur = + (void __iomem *)(CONFIG_SYS_FSL_GUTS_ADDR); + int i = 0, count = 0; + u32 cluster; + + do { + int j; + cluster = in_le32(&gur->tp_cluster[i].lower); + for (j = 0; j < TP_INIT_PER_CLUSTER; j++) { + if (init_type(cluster, j)) { + if (count == core) + return i; + count++; + } + } + i++; + } while ((cluster & TP_CLUSTER_EOC) != TP_CLUSTER_EOC); + + return -1; /* cannot identify the cluster */ +} + +u32 fsl_qoriq_core_to_type(unsigned int core) +{ + struct ccsr_gur __iomem *gur = + (void __iomem *)(CONFIG_SYS_FSL_GUTS_ADDR); + int i = 0, count = 0; + u32 cluster, type; + + do { + int j; + cluster = in_le32(&gur->tp_cluster[i].lower); + for (j = 0; j < TP_INIT_PER_CLUSTER; j++) { + type = init_type(cluster, j); + if (type) { + if (count == core) + return type; + count++; + } + } + i++; + } while ((cluster & TP_CLUSTER_EOC) != TP_CLUSTER_EOC); + + return -1; /* cannot identify the cluster */ +} + +#ifdef CONFIG_DISPLAY_CPUINFO +int print_cpuinfo(void) +{ + struct sys_info sysinfo; + char buf[32]; + unsigned int i, core; + u32 type; + + get_sys_info(&sysinfo); + puts("Clock Configuration:"); + for_each_cpu(i, core, cpu_numcores(), cpu_mask()) { + if (!(i % 3)) + puts("\n "); + type = TP_ITYP_VER(fsl_qoriq_core_to_type(core)); + printf("CPU%d(%s):%-4s MHz ", core, + type == TY_ITYP_VER_A7 ? "A7 " : + (type == TY_ITYP_VER_A53 ? "A53" : + (type == TY_ITYP_VER_A57 ? "A57" : " ")), + strmhz(buf, sysinfo.freq_processor[core])); + } + printf("\n Bus: %-4s MHz ", + strmhz(buf, sysinfo.freq_systembus)); + printf("DDR: %-4s MHz", strmhz(buf, sysinfo.freq_ddrbus)); + puts("\n"); + + return 0; +} +#endif diff --git a/arch/arm/cpu/armv8/fsl-lsch3/cpu.h b/arch/arm/cpu/armv8/fsl-lsch3/cpu.h new file mode 100644 index 0000000..28544d7 --- /dev/null +++ b/arch/arm/cpu/armv8/fsl-lsch3/cpu.h @@ -0,0 +1,7 @@ +/* + * Copyright 2014, Freescale Semiconductor + * + * SPDX-License-Identifier: GPL-2.0+ + */ + +int fsl_qoriq_core_to_cluster(unsigned int core); diff --git a/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S b/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S new file mode 100644 index 0000000..087d5d1 --- /dev/null +++ b/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S @@ -0,0 +1,65 @@ +/* + * (C) Copyright 2014 Freescale Semiconductor + * + * SPDX-License-Identifier: GPL-2.0+ + * + * Extracted from armv8/start.S + */ + +#include <config.h> +#include <linux/linkage.h> +#include <asm/macro.h> + +ENTRY(lowlevel_init) + /* Initialize GIC Secure Bank Status */ + mov x29, lr /* Save LR */ + + /* Set the SMMU page size in the sACR register */ + ldr x1, =SMMU_BASE + ldr w0, [x1, #0x10] + orr w0, w0, #1 << 16 /* set sACR.pagesize to indicate 64K page */ + str w0, [x1, #0x10] + +#if defined(CONFIG_GICV2) || defined(CONFIG_GICV3) + branch_if_slave x0, 1f + ldr x0, =GICD_BASE + bl gic_init_secure +1: +#if defined(CONFIG_GICV3) + ldr x0, =GICR_BASE + bl gic_init_secure_percpu +#elif defined(CONFIG_GICV2) + ldr x0, =GICD_BASE + ldr x1, =GICC_BASE + bl gic_init_secure_percpu +#endif +#endif + + branch_if_master x0, x1, 1f + + /* + * Slave should wait for master clearing spin table. + * This sync prevent salves observing incorrect + * value of spin table and jumping to wrong place. + */ +#if defined(CONFIG_GICV2) || defined(CONFIG_GICV3) +#ifdef CONFIG_GICV2 + ldr x0, =GICC_BASE +#endif + bl gic_wait_for_interrupt +#endif + + /* + * All processors will enter EL2 and optionally EL1. + */ + bl armv8_switch_to_el2 +#ifdef CONFIG_ARMV8_SWITCH_TO_EL1 + bl armv8_switch_to_el1 +#endif + b 2f + +1: +2: + mov lr, x29 /* Restore LR */ + ret +ENDPROC(lowlevel_init) diff --git a/arch/arm/cpu/armv8/fsl-lsch3/speed.c b/arch/arm/cpu/armv8/fsl-lsch3/speed.c new file mode 100644 index 0000000..dc4a34b --- /dev/null +++ b/arch/arm/cpu/armv8/fsl-lsch3/speed.c @@ -0,0 +1,176 @@ +/* + * Copyright 2014, Freescale Semiconductor, Inc. + * + * SPDX-License-Identifier: GPL-2.0+ + * + * Derived from arch/power/cpu/mpc85xx/speed.c + */ + +#include <common.h> +#include <linux/compiler.h> +#include <fsl_ifc.h> +#include <asm/processor.h> +#include <asm/io.h> +#include <asm/arch-fsl-lsch3/immap_lsch3.h> +#include <asm/arch/clock.h> +#include "cpu.h" + +DECLARE_GLOBAL_DATA_PTR; + +#ifndef CONFIG_SYS_FSL_NUM_CC_PLLS +#define CONFIG_SYS_FSL_NUM_CC_PLLS 6 +#endif + + +void get_sys_info(struct sys_info *sys_info) +{ + struct ccsr_gur __iomem *gur = (void *)(CONFIG_SYS_FSL_GUTS_ADDR); +#ifdef CONFIG_FSL_IFC + struct fsl_ifc *ifc_regs = (void *)CONFIG_SYS_IFC_ADDR; + u32 ccr; +#endif + struct ccsr_clk_cluster_group __iomem *clk_grp[2] = { + (void *)(CONFIG_SYS_FSL_CH3_CLK_GRPA_ADDR), + (void *)(CONFIG_SYS_FSL_CH3_CLK_GRPB_ADDR) + }; + struct ccsr_clk_ctrl __iomem *clk_ctrl = + (void *)(CONFIG_SYS_FSL_CH3_CLK_CTRL_ADDR); + unsigned int cpu; + const u8 core_cplx_pll[16] = { + [0] = 0, /* CC1 PPL / 1 */ + [1] = 0, /* CC1 PPL / 2 */ + [2] = 0, /* CC1 PPL / 4 */ + [4] = 1, /* CC2 PPL / 1 */ + [5] = 1, /* CC2 PPL / 2 */ + [6] = 1, /* CC2 PPL / 4 */ + [8] = 2, /* CC3 PPL / 1 */ + [9] = 2, /* CC3 PPL / 2 */ + [10] = 2, /* CC3 PPL / 4 */ + [12] = 3, /* CC4 PPL / 1 */ + [13] = 3, /* CC4 PPL / 2 */ + [14] = 3, /* CC4 PPL / 4 */ + }; + + const u8 core_cplx_pll_div[16] = { + [0] = 1, /* CC1 PPL / 1 */ + [1] = 2, /* CC1 PPL / 2 */ + [2] = 4, /* CC1 PPL / 4 */ + [4] = 1, /* CC2 PPL / 1 */ + [5] = 2, /* CC2 PPL / 2 */ + [6] = 4, /* CC2 PPL / 4 */ + [8] = 1, /* CC3 PPL / 1 */ + [9] = 2, /* CC3 PPL / 2 */ + [10] = 4, /* CC3 PPL / 4 */ + [12] = 1, /* CC4 PPL / 1 */ + [13] = 2, /* CC4 PPL / 2 */ + [14] = 4, /* CC4 PPL / 4 */ + }; + + uint i, cluster; + uint freq_c_pll[CONFIG_SYS_FSL_NUM_CC_PLLS]; + uint ratio[CONFIG_SYS_FSL_NUM_CC_PLLS]; + unsigned long sysclk = CONFIG_SYS_CLK_FREQ; + int cc_group[12] = CONFIG_SYS_FSL_CLUSTER_CLOCKS; + u32 c_pll_sel, cplx_pll; + void *offset; + + sys_info->freq_systembus = sysclk; +#ifdef CONFIG_DDR_CLK_FREQ + sys_info->freq_ddrbus = CONFIG_DDR_CLK_FREQ; +#else + sys_info->freq_ddrbus = sysclk; +#endif + + sys_info->freq_systembus *= (in_le32(&gur->rcwsr[0]) >> + FSL_CHASSIS3_RCWSR0_SYS_PLL_RAT_SHIFT) & + FSL_CHASSIS3_RCWSR0_SYS_PLL_RAT_MASK; + sys_info->freq_ddrbus *= (in_le32(&gur->rcwsr[0]) >> + FSL_CHASSIS3_RCWSR0_MEM_PLL_RAT_SHIFT) & + FSL_CHASSIS3_RCWSR0_MEM_PLL_RAT_MASK; + + for (i = 0; i < CONFIG_SYS_FSL_NUM_CC_PLLS; i++) { + /* + * fixme: prefer to combine the following into one line, but + * cannot pass compiling without warning about in_le32. + */ + offset = (void *)((size_t)clk_grp[i/3] + + offsetof(struct ccsr_clk_cluster_group, + pllngsr[i%3].gsr)); + ratio[i] = (in_le32(offset) >> 1) & 0x3f; + if (ratio[i] > 4) + freq_c_pll[i] = sysclk * ratio[i]; + else + freq_c_pll[i] = sys_info->freq_systembus * ratio[i]; + } + + for_each_cpu(i, cpu, cpu_numcores(), cpu_mask()) { + cluster = fsl_qoriq_core_to_cluster(cpu); + c_pll_sel = (in_le32(&clk_ctrl->clkcncsr[cluster].csr) >> 27) + & 0xf; + cplx_pll = core_cplx_pll[c_pll_sel]; + cplx_pll += cc_group[cluster] - 1; + sys_info->freq_processor[cpu] = + freq_c_pll[cplx_pll] / core_cplx_pll_div[c_pll_sel]; + } + +#if defined(CONFIG_FSL_IFC) + ccr = in_le32(&ifc_regs->ifc_ccr); + ccr = ((ccr & IFC_CCR_CLK_DIV_MASK) >> IFC_CCR_CLK_DIV_SHIFT) + 1; + + sys_info->freq_localbus = sys_info->freq_systembus / ccr; +#endif +} + + +int get_clocks(void) +{ + struct sys_info sys_info; + get_sys_info(&sys_info); + gd->cpu_clk = sys_info.freq_processor[0]; + gd->bus_clk = sys_info.freq_systembus; + gd->mem_clk = sys_info.freq_ddrbus; + +#if defined(CONFIG_FSL_ESDHC) + gd->arch.sdhc_clk = gd->bus_clk / 2; +#endif /* defined(CONFIG_FSL_ESDHC) */ + + if (gd->cpu_clk != 0) + return 0; + else + return 1; +} + +/******************************************** + * get_bus_freq + * return system bus freq in Hz + *********************************************/ +ulong get_bus_freq(ulong dummy) +{ + if (!gd->bus_clk) + get_clocks(); + + return gd->bus_clk; +} + +/******************************************** + * get_ddr_freq + * return ddr bus freq in Hz + *********************************************/ +ulong get_ddr_freq(ulong dummy) +{ + if (!gd->mem_clk) + get_clocks(); + + return gd->mem_clk; +} + +unsigned int mxc_get_clock(enum mxc_clock clk) +{ + switch (clk) { + case MXC_I2C_CLK: + return get_bus_freq(0) / 2; + default: + printf("Unsupported clock\n"); + } + return 0; +} diff --git a/arch/arm/cpu/armv8/fsl-lsch3/speed.h b/arch/arm/cpu/armv8/fsl-lsch3/speed.h new file mode 100644 index 0000000..15af5b9 --- /dev/null +++ b/arch/arm/cpu/armv8/fsl-lsch3/speed.h @@ -0,0 +1,7 @@ +/* + * Copyright 2014, Freescale Semiconductor, Inc. + * + * SPDX-License-Identifier: GPL-2.0+ + */ + +void get_sys_info(struct sys_info *sys_info); diff --git a/arch/arm/cpu/armv8/fsl-lsch3/timer.c b/arch/arm/cpu/armv8/fsl-lsch3/timer.c new file mode 100644 index 0000000..3adfa41 --- /dev/null +++ b/arch/arm/cpu/armv8/fsl-lsch3/timer.c @@ -0,0 +1,62 @@ +/* + * Copyright 2014, Freescale Semiconductor + * + * SPDX-License-Identifier: GPL-2.0+ + */ + +#include <common.h> +#include <div64.h> +#include <linux/compiler.h> + +static inline u64 get_cntfrq(void) +{ + u64 cntfrq; + asm volatile("mrs %0, cntfrq_el0" : "=r" (cntfrq)); + return cntfrq; +} + +static inline u64 tick_to_time(u64 tick) +{ + tick *= CONFIG_SYS_HZ; + do_div(tick, get_cntfrq()); + return tick; +} + +static inline u64 time_to_tick(u64 time) +{ + time *= get_cntfrq(); + do_div(time, CONFIG_SYS_HZ); + return time; +} + +static inline u64 us_to_tick(unsigned long long usec) +{ + usec = usec * get_cntfrq() + 999999; + do_div(usec, 1000000); + + return usec; +} + +u64 get_ticks(void) +{ + u64 cval; + + asm volatile("isb;mrs %0, cntpct_el0" : "=r" (cval)); + + return cval; +} + +ulong get_timer(ulong base) +{ + return tick_to_time(get_ticks()) - base; +} + +void __udelay(unsigned long usec) +{ + u64 start, tmo; + + start = get_ticks(); + tmo = us_to_tick(usec); + while (get_ticks() < (start + tmo)) + ; +} diff --git a/arch/arm/include/asm/arch-fsl-lsch3/clock.h b/arch/arm/include/asm/arch-fsl-lsch3/clock.h new file mode 100644 index 0000000..831af0b --- /dev/null +++ b/arch/arm/include/asm/arch-fsl-lsch3/clock.h @@ -0,0 +1,23 @@ +/* + * Copyright 2014 Freescale Semiconductor, Inc. + * + * SPDX-License-Identifier: GPL-2.0+ + * + */ + +#ifndef __ASM_ARCH_FSL_LSCH3_CLOCK_H_ +#define __ASM_ARCH_FSL_LSCH3_CLOCK_H_ + +#include <common.h> + +enum mxc_clock { + MXC_ARM_CLK = 0, + MXC_BUS_CLK, + MXC_UART_CLK, + MXC_ESDHC_CLK, + MXC_I2C_CLK, +}; + +unsigned int mxc_get_clock(enum mxc_clock clk); + +#endif /* __ASM_ARCH_FSL_LSCH3_CLOCK_H_ */ diff --git a/arch/arm/include/asm/arch-fsl-lsch3/config.h b/arch/arm/include/asm/arch-fsl-lsch3/config.h new file mode 100644 index 0000000..c987a19 --- /dev/null +++ b/arch/arm/include/asm/arch-fsl-lsch3/config.h @@ -0,0 +1,65 @@ +/* + * Copyright 2014, Freescale Semiconductor + * + * SPDX-License-Identifier: GPL-2.0+ + */ + +#ifndef _ASM_ARMV8_FSL_LSCH3_CONFIG_ +#define _ASM_ARMV8_FSL_LSCH3_CONFIG_ + +#include <fsl_ddrc_version.h> + +#define CONFIG_SYS_FSL_OCRAM_BASE 0x18000000 /* initial RAM */ +/* Link Definitions */ +#define CONFIG_SYS_INIT_SP_ADDR (CONFIG_SYS_FSL_OCRAM_BASE + 0xfff0) + +#define CONFIG_SYS_IMMR 0x01000000 +#define CONFIG_SYS_FSL_DDR_ADDR (CONFIG_SYS_IMMR + 0x00080000) +#define CONFIG_SYS_FSL_DDR2_ADDR (CONFIG_SYS_IMMR + 0x00090000) +#define CONFIG_SYS_FSL_GUTS_ADDR (CONFIG_SYS_IMMR + 0x00E00000) +#define CONFIG_SYS_FSL_PMU_ADDR (CONFIG_SYS_IMMR + 0x00E30000) +#define CONFIG_SYS_FSL_CH3_CLK_GRPA_ADDR (CONFIG_SYS_IMMR + 0x00300000) +#define CONFIG_SYS_FSL_CH3_CLK_GRPB_ADDR (CONFIG_SYS_IMMR + 0x00310000) +#define CONFIG_SYS_FSL_CH3_CLK_CTRL_ADDR (CONFIG_SYS_IMMR + 0x00370000) +#define CONFIG_SYS_IFC_ADDR (CONFIG_SYS_IMMR + 0x01240000) +#define CONFIG_SYS_NS16550_COM1 (CONFIG_SYS_IMMR + 0x011C0500) +#define CONFIG_SYS_NS16550_COM2 (CONFIG_SYS_IMMR + 0x011C0600) +#define CONFIG_SYS_FSL_TIMER_ADDR 0x023d0000 +#define CONFIG_SYS_FSL_PMU_CLTBENR (CONFIG_SYS_FSL_PMU_ADDR + \ + 0x18A0) + +#define I2C1_BASE_ADDR (CONFIG_SYS_IMMR + 0x01000000) +#define I2C2_BASE_ADDR (CONFIG_SYS_IMMR + 0x01010000) +#define I2C3_BASE_ADDR (CONFIG_SYS_IMMR + 0x01020000) +#define I2C4_BASE_ADDR (CONFIG_SYS_IMMR + 0x01030000) + +/* Generic Interrupt Controller Definitions */ +#define GICD_BASE 0x06000000 +#define GICR_BASE 0x06100000 + +/* SMMU Defintions */ +#define SMMU_BASE 0x05000000 /* GR0 Base */ + +/* DDR */ +#define CONFIG_SYS_FSL_DDR_LE +#define CONFIG_VERY_BIG_RAM +#define CONFIG_SYS_FSL_DDRC_ARM_GEN3 /* Enable Freescale ARM DDR3 driver */ +#define CONFIG_SYS_FSL_DDR /* Freescale DDR driver */ +#define CONFIG_SYS_LS2_DDR_BLOCK1_SIZE ((phys_size_t)2 << 30) +#define CONFIG_MAX_MEM_MAPPED CONFIG_SYS_LS2_DDR_BLOCK1_SIZE +#define CONFIG_SYS_FSL_DDR_VER FSL_DDR_VER_5_0 + + +/* IFC */ +#define CONFIG_SYS_FSL_IFC_LE + +#ifdef CONFIG_LS2100A +#define CONFIG_MAX_CPUS 16 +#define CONFIG_SYS_FSL_IFC_BANK_COUNT 8 +#define CONFIG_NUM_DDR_CONTROLLERS 2 +#define CONFIG_SYS_FSL_CLUSTER_CLOCKS { 1, 1, 4, 4 } +#else +#error SoC not defined +#endif + +#endif /* _ASM_ARMV8_FSL_LSCH3_CONFIG_ */ diff --git a/arch/arm/include/asm/arch-fsl-lsch3/gpio.h b/arch/arm/include/asm/arch-fsl-lsch3/gpio.h new file mode 100644 index 0000000..f23a78c --- /dev/null +++ b/arch/arm/include/asm/arch-fsl-lsch3/gpio.h @@ -0,0 +1,9 @@ +/* + * Copyright 2014, Freescale Semiconductor + * + * SPDX-License-Identifier: GPL-2.0+ + */ + +#ifndef _ASM_ARMV8_FSL_LSCH3_GPIO_H_ +#define _ASM_ARMV8_FSL_LSCH3_GPIO_H_ +#endif /* _ASM_ARMV8_FSL_LSCH3_GPIO_H_ */ diff --git a/arch/arm/include/asm/arch-fsl-lsch3/immap_lsch3.h b/arch/arm/include/asm/arch-fsl-lsch3/immap_lsch3.h new file mode 100644 index 0000000..18e66bd --- /dev/null +++ b/arch/arm/include/asm/arch-fsl-lsch3/immap_lsch3.h @@ -0,0 +1,116 @@ +/* + * LayerScape Internal Memory Map + * + * Copyright 2014 Freescale Semiconductor, Inc. + * + * SPDX-License-Identifier: GPL-2.0+ + */ + +#ifndef __ARCH_FSL_LSCH3_IMMAP_H +#define __ARCH_FSL_LSCH3_IMMAP_H_ + +/* This is chassis generation 3 */ + +struct sys_info { + unsigned long freq_processor[CONFIG_MAX_CPUS]; + unsigned long freq_systembus; + unsigned long freq_ddrbus; + unsigned long freq_localbus; + unsigned long freq_qe; +#ifdef CONFIG_SYS_DPAA_FMAN + unsigned long freq_fman[CONFIG_SYS_NUM_FMAN]; +#endif +#ifdef CONFIG_SYS_DPAA_QBMAN + unsigned long freq_qman; +#endif +#ifdef CONFIG_SYS_DPAA_PME + unsigned long freq_pme; +#endif +}; + +/* Global Utilities Block */ +struct ccsr_gur { + u32 porsr1; /* POR status 1 */ + u32 porsr2; /* POR status 2 */ + u8 res_008[0x20-0x8]; + u32 gpporcr1; /* General-purpose POR configuration */ + u32 gpporcr2; /* General-purpose POR configuration 2 */ + u32 dcfg_fusesr; /* Fuse status register */ + u32 gpporcr3; + u32 gpporcr4; + u8 res_034[0x70-0x34]; + u32 devdisr; /* Device disable control */ + u32 devdisr2; /* Device disable control 2 */ + u32 devdisr3; /* Device disable control 3 */ + u32 devdisr4; /* Device disable control 4 */ + u32 devdisr5; /* Device disable control 5 */ + u32 devdisr6; /* Device disable control 6 */ + u32 devdisr7; /* Device disable control 7 */ + u8 res_08c[0x90-0x8c]; + u32 coredisru; /* uppper portion for support of 64 cores */ + u32 coredisrl; /* lower portion for support of 64 cores */ + u8 res_098[0xa0-0x98]; + u32 pvr; /* Processor version */ + u32 svr; /* System version */ + u32 mvr; /* Manufacturing version */ + u8 res_0ac[0x100-0xac]; + u32 rcwsr[32]; /* Reset control word status */ + +#define FSL_CHASSIS3_RCWSR0_SYS_PLL_RAT_SHIFT 2 +#define FSL_CHASSIS3_RCWSR0_SYS_PLL_RAT_MASK 0x1f +#define FSL_CHASSIS3_RCWSR0_MEM_PLL_RAT_SHIFT 10 +#define FSL_CHASSIS3_RCWSR0_MEM_PLL_RAT_MASK 0x3f + u8 res_180[0x200-0x180]; + u32 scratchrw[32]; /* Scratch Read/Write */ + u8 res_280[0x300-0x280]; + u32 scratchw1r[4]; /* Scratch Read (Write once) */ + u8 res_310[0x400-0x310]; + u32 bootlocptrl; /* Boot location pointer low-order addr */ + u32 bootlocptrh; /* Boot location pointer high-order addr */ + u8 res_408[0x500-0x408]; + u8 res_500[0x740-0x500]; /* add more registers when needed */ + u32 tp_ityp[64]; /* Topology Initiator Type Register */ + struct { + u32 upper; + u32 lower; + } tp_cluster[3]; /* Core Cluster n Topology Register */ + u8 res_858[0x1000-0x858]; +}; + +#define TP_ITYP_AV 0x00000001 /* Initiator available */ +#define TP_ITYP_TYPE(x) (((x) & 0x6) >> 1) /* Initiator Type */ +#define TP_ITYP_TYPE_ARM 0x0 +#define TP_ITYP_TYPE_PPC 0x1 /* PowerPC */ +#define TP_ITYP_TYPE_OTHER 0x2 /* StarCore DSP */ +#define TP_ITYP_TYPE_HA 0x3 /* HW Accelerator */ +#define TP_ITYP_THDS(x) (((x) & 0x18) >> 3) /* # threads */ +#define TP_ITYP_VER(x) (((x) & 0xe0) >> 5) /* Initiator Version */ +#define TY_ITYP_VER_A7 0x1 +#define TY_ITYP_VER_A53 0x2 +#define TY_ITYP_VER_A57 0x3 + +#define TP_CLUSTER_EOC 0x80000000 /* end of clusters */ +#define TP_CLUSTER_INIT_MASK 0x0000003f /* initiator mask */ +#define TP_INIT_PER_CLUSTER 4 + +struct ccsr_clk_cluster_group { + struct { + u8 res_00[0x10]; + u32 csr; + u8 res_14[0x20-0x14]; + } hwncsr[3]; + u8 res_60[0x80-0x60]; + struct { + u32 gsr; + u8 res_84[0xa0-0x84]; + } pllngsr[3]; + u8 res_e0[0x100-0xe0]; +}; + +struct ccsr_clk_ctrl { + struct { + u32 csr; /* core cluster n clock control status */ + u8 res_04[0x20-0x04]; + } clkcncsr[8]; +}; +#endif /* __ARCH_FSL_LSCH3_IMMAP_H */ diff --git a/arch/arm/include/asm/arch-fsl-lsch3/imx-regs.h b/arch/arm/include/asm/arch-fsl-lsch3/imx-regs.h new file mode 100644 index 0000000..8f00535 --- /dev/null +++ b/arch/arm/include/asm/arch-fsl-lsch3/imx-regs.h @@ -0,0 +1,13 @@ +/* + * Copyright 2014 Freescale Semiconductor, Inc. + * + * SPDX-License-Identifier: GPL-2.0+ + * + */ + +#ifndef __ASM_ARCH_FSL_LSCH3_IMX_REGS_H_ +#define __ASM_ARCH_FSL_LSCH3_IMX_REGS_H_ + +#define I2C_QUIRK_REG /* enable 8-bit driver */ + +#endif /* __ASM_ARCH_FSL_LSCH3_IMX_REGS_H_ */ diff --git a/arch/arm/include/asm/arch-fsl-lsch3/mmu.h b/arch/arm/include/asm/arch-fsl-lsch3/mmu.h new file mode 100644 index 0000000..edbed47 --- /dev/null +++ b/arch/arm/include/asm/arch-fsl-lsch3/mmu.h @@ -0,0 +1,10 @@ +/* + * Copyright 2014, Freescale Semiconductor + * + * SPDX-License-Identifier: GPL-2.0+ + */ + +#ifndef _ASM_ARMV8_FSL_LSCH3_MMU_H_ +#define _ASM_ARMV8_FSL_LSCH3_MMU_H_ +#include <asm/arch-armv8/mmu.h> +#endif /* _ASM_ARMV8_FSL_LSCH3_MMU_H_ */ diff --git a/arch/arm/include/asm/config.h b/arch/arm/include/asm/config.h index 2a20a77..d3433da 100644 --- a/arch/arm/include/asm/config.h +++ b/arch/arm/include/asm/config.h @@ -17,4 +17,8 @@ #define CONFIG_STATIC_RELA #endif
+#ifdef CONFIG_FSL_LSCH3 +#include <asm/arch-fsl-lsch3/config.h> +#endif + #endif diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h index 74ee9a4..d51ba66 100644 --- a/arch/arm/include/asm/system.h +++ b/arch/arm/include/asm/system.h @@ -78,6 +78,8 @@ void gic_send_sgi(unsigned long sgino); void wait_for_wakeup(void); void smp_kick_all_cpus(void);
+void flush_l3_cache(void); + #endif /* __ASSEMBLY__ */
#else /* CONFIG_ARM64 */ diff --git a/drivers/i2c/mxc_i2c.c b/drivers/i2c/mxc_i2c.c index 48468d7..c14797c 100644 --- a/drivers/i2c/mxc_i2c.c +++ b/drivers/i2c/mxc_i2c.c @@ -429,6 +429,11 @@ static void * const i2c_bases[] = { (void *)I2C3_BASE_ADDR #elif defined(CONFIG_VF610) (void *)I2C0_BASE_ADDR +#elif defined(CONFIG_FSL_LSCH3) + (void *)I2C1_BASE_ADDR, + (void *)I2C2_BASE_ADDR, + (void *)I2C3_BASE_ADDR, + (void *)I2C4_BASE_ADDR #else #error "architecture not supported" #endif diff --git a/include/common.h b/include/common.h index 232136c..a42d6d2 100644 --- a/include/common.h +++ b/include/common.h @@ -693,9 +693,6 @@ ulong get_PERCLK3(void); ulong get_bus_freq (ulong); int get_serial_clock(void);
-#if defined(CONFIG_MPC83xx) || defined(CONFIG_MPC85xx) -ulong get_ddr_freq(ulong); -#endif #if defined(CONFIG_MPC85xx) typedef MPC85xx_SYS_INFO sys_info_t; void get_sys_info ( sys_info_t * ); @@ -711,6 +708,8 @@ static inline ulong get_ddr_freq(ulong dummy) { return get_bus_freq(dummy); } +#else +ulong get_ddr_freq(ulong); #endif
#if defined(CONFIG_4xx)

On Wed, May 28, 2014 at 6:46 PM, York Sun yorksun@freescale.com wrote:
Freescale LayerScape with Chassis Generation 3 is a set of SoCs with ARMv8 cores and 3rd generation of Chassis. We use different MMU setup to support memory map and cache attribute for these SoCs. MMU and cache are enabled very early to bootst performance, especially for early development on emulators. After u-boot relocates to DDR, a new MMU table with QBMan cache access is created in DDR. SMMU pagesize is set in SMMU_sACR register. Both DDR3 and DDR4 are supported.
Signed-off-by: York Sun yorksun@freescale.com Signed-off-by: Varun Sethi Varun.Sethi@freescale.com Signed-off-by: Arnab Basu arnab.basu@freescale.com
Change log: v3: Remove blank lines at the of files Fix cluster PLL GSR register for accessing beyond array size Update final MMU table to support QBMan memory with cache Set SMMU pagesize in SMMU_sACR register in lowlevel init. Add DDR4 support Remove forcing L3 cache flusing Update GICv3 redistributor base address
Some of these changes are caused by model change.
arch/arm/cpu/armv8/cache_v8.c | 7 +- arch/arm/cpu/armv8/fsl-lsch3/Makefile | 10 + arch/arm/cpu/armv8/fsl-lsch3/README | 10 + arch/arm/cpu/armv8/fsl-lsch3/cpu.c | 474 +++++++++++++++++++++ arch/arm/cpu/armv8/fsl-lsch3/cpu.h | 7 + arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S | 65 +++ arch/arm/cpu/armv8/fsl-lsch3/speed.c | 176 ++++++++ arch/arm/cpu/armv8/fsl-lsch3/speed.h | 7 + arch/arm/cpu/armv8/fsl-lsch3/timer.c | 62 +++ arch/arm/include/asm/arch-fsl-lsch3/clock.h | 23 + arch/arm/include/asm/arch-fsl-lsch3/config.h | 65 +++ arch/arm/include/asm/arch-fsl-lsch3/gpio.h | 9 + arch/arm/include/asm/arch-fsl-lsch3/immap_lsch3.h | 116 +++++ arch/arm/include/asm/arch-fsl-lsch3/imx-regs.h | 13 + arch/arm/include/asm/arch-fsl-lsch3/mmu.h | 10 + arch/arm/include/asm/config.h | 4 + arch/arm/include/asm/system.h | 2 + drivers/i2c/mxc_i2c.c | 5 + include/common.h | 5 +- 19 files changed, 1066 insertions(+), 4 deletions(-) create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/Makefile create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/README create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/cpu.c create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/cpu.h create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/speed.c create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/speed.h create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/timer.c create mode 100644 arch/arm/include/asm/arch-fsl-lsch3/clock.h create mode 100644 arch/arm/include/asm/arch-fsl-lsch3/config.h create mode 100644 arch/arm/include/asm/arch-fsl-lsch3/gpio.h create mode 100644 arch/arm/include/asm/arch-fsl-lsch3/immap_lsch3.h create mode 100644 arch/arm/include/asm/arch-fsl-lsch3/imx-regs.h create mode 100644 arch/arm/include/asm/arch-fsl-lsch3/mmu.h
diff --git a/arch/arm/cpu/armv8/cache_v8.c b/arch/arm/cpu/armv8/cache_v8.c index a96ecda..c47acba 100644 --- a/arch/arm/cpu/armv8/cache_v8.c +++ b/arch/arm/cpu/armv8/cache_v8.c @@ -83,12 +83,17 @@ void invalidate_dcache_all(void) __asm_invalidate_dcache_all(); }
+void __weak flush_l3_cache(void) +{ +}
/*
- Performs a clean & invalidation of the entire data cache at all levels
*/ void flush_dcache_all(void) { __asm_flush_dcache_all();
flush_l3_cache();
}
/* @@ -221,7 +226,7 @@ void invalidate_icache_all(void)
- Enable dCache & iCache, whether cache is actually enabled
- depend on CONFIG_SYS_DCACHE_OFF and CONFIG_SYS_ICACHE_OFF
*/ -void enable_caches(void) +void __weak enable_caches(void) { icache_enable(); dcache_enable(); diff --git a/arch/arm/cpu/armv8/fsl-lsch3/Makefile b/arch/arm/cpu/armv8/fsl-lsch3/Makefile new file mode 100644 index 0000000..4b859cf --- /dev/null +++ b/arch/arm/cpu/armv8/fsl-lsch3/Makefile @@ -0,0 +1,10 @@ +# +# Copyright 2014, Freescale Semiconductor +# +# SPDX-License-Identifier: GPL-2.0+ +#
+obj-y += cpu.o +obj-y += timer.o +obj-y += lowlevel.o +obj-y += speed.o diff --git a/arch/arm/cpu/armv8/fsl-lsch3/README b/arch/arm/cpu/armv8/fsl-lsch3/README new file mode 100644 index 0000000..de34a91 --- /dev/null +++ b/arch/arm/cpu/armv8/fsl-lsch3/README @@ -0,0 +1,10 @@ +# +# Copyright 2014 Freescale Semiconductor +# +# SPDX-License-Identifier: GPL-2.0+ +#
+Freescale LayerScape with Chassis Generation 3
+This architecture supports Freescale ARMv8 SoCs with Chassis generation 3, +for example LS2100A. diff --git a/arch/arm/cpu/armv8/fsl-lsch3/cpu.c b/arch/arm/cpu/armv8/fsl-lsch3/cpu.c new file mode 100644 index 0000000..2780390 --- /dev/null +++ b/arch/arm/cpu/armv8/fsl-lsch3/cpu.c @@ -0,0 +1,474 @@ +/*
- Copyright 2014 Freescale Semiconductor, Inc.
- SPDX-License-Identifier: GPL-2.0+
- */
+#include <common.h> +#include <asm/io.h> +#include <asm/system.h> +#include <asm/armv8/mmu.h> +#include <asm/io.h> +#include <asm/arch-fsl-lsch3/immap_lsch3.h> +#include "cpu.h" +#include "speed.h"
+DECLARE_GLOBAL_DATA_PTR;
+#ifndef CONFIG_SYS_DCACHE_OFF +/*
- To start MMU before DDR is available, we create MMU table in SRAM.
- The base address of SRAM is CONFIG_SYS_FSL_OCRAM_BASE. We use three
- levels of translation tables here to cover 40-bit address space.
- We use 4KB granule size, with 40 bits physical address, T0SZ=24
- Level 0 IA[39], table address @0
- Level 1 IA[31:30], table address @01000, 0x2000
- Level 2 IA[29:21], table address @0x3000
- */
+#define EARLY_SECTION_SHIFT_L0 39UL +#define EARLY_SECTION_SHIFT_L1 30UL +#define EARLY_SECTION_SHIFT_L2 21UL +#define EARLY_BLOCK_SIZE_L0 0x8000000000UL +#define EARLY_BLOCK_SIZE_L1 (1 << EARLY_SECTION_SHIFT_L1) +#define EARLY_BLOCK_SIZE_L2 (1 << EARLY_SECTION_SHIFT_L2) +#define CONFIG_SYS_IFC_BASE 0x30000000 +#define CONFIG_SYS_IFC_SIZE 0x10000000 +#define CONFIG_SYS_IFC_BASE2 0x500000000 +#define CONFIG_SYS_IFC_SIZE2 0x100000000 +#define TCR_EL2_PS_40BIT (2 << 16) +#define EARLY_VA_BITS (40) +#define EARLY_TCR (TCR_TG0_4K | \
TCR_EL2_PS_40BIT | \
TCR_SHARED_NON | \
TCR_ORGN_NC | \
TCR_IRGN_NC | \
TCR_T0SZ(EARLY_VA_BITS))
+/*
- Final MMU
- Let's start from the same layout as early MMU and modify as needed.
- IFC regions will be cache-inhibit.
- */
+#define FINAL_SECTION_SHIFT_L0 39UL +#define FINAL_SECTION_SHIFT_L1 30UL +#define FINAL_SECTION_SHIFT_L2 21UL +#define FINAL_BLOCK_SIZE_L0 0x8000000000UL +#define FINAL_BLOCK_SIZE_L1 (1 << FINAL_SECTION_SHIFT_L1) +#define FINAL_BLOCK_SIZE_L2 (1 << FINAL_SECTION_SHIFT_L2) +#define FINAL_QBMAN_CACHED_MEM 0x818000000UL +#define FINAL_QBMAN_CACHED_SIZE 0x4000000 +#define TCR_EL2_PS_40BIT (2 << 16) +#define FINAL_VA_BITS (40) +#define FINAL_TCR (TCR_TG0_4K | \
TCR_EL2_PS_40BIT | \
TCR_SHARED_NON | \
TCR_ORGN_NC | \
TCR_IRGN_NC | \
TCR_T0SZ(FINAL_VA_BITS))
+static void set_pgtable_section(u64 *page_table, u64 index, u64 section,
u8 memory_type)
+{
u64 value;
value = section | PMD_TYPE_SECT | PMD_SECT_AF;
value |= PMD_ATTRINDX(memory_type);
page_table[index] = value;
+}
This function looks like it should be common.
+static inline void early_mmu_setup(void) +{
int el;
u64 i;
u64 section_l1t0, section_l1t1, section_l2;
u64 *level0_table = (u64 *)CONFIG_SYS_FSL_OCRAM_BASE;
u64 *level1_table_0 = (u64 *)(CONFIG_SYS_FSL_OCRAM_BASE + 0x1000);
u64 *level1_table_1 = (u64 *)(CONFIG_SYS_FSL_OCRAM_BASE + 0x2000);
u64 *level2_table = (u64 *)(CONFIG_SYS_FSL_OCRAM_BASE + 0x3000);
level0_table[0] =
(u64)level1_table_0 | PMD_TYPE_TABLE;
level0_table[1] =
(u64)level1_table_1 | PMD_TYPE_TABLE;
/*
* set level 1 table 0 to cache_inhibit, covering 0 to 512GB
* set level 1 table 1 to cache enabled, covering 512GB to 1TB
* set level 2 table to cache-inhibit, covering 0 to 1GB
*/
section_l1t0 = 0;
section_l1t1 = EARLY_BLOCK_SIZE_L0;
section_l2 = 0;
for (i = 0; i < 512; i++) {
set_pgtable_section(level1_table_0, i, section_l1t0,
MT_DEVICE_NGNRNE);
set_pgtable_section(level1_table_1, i, section_l1t1,
MT_NORMAL);
set_pgtable_section(level2_table, i, section_l2,
MT_DEVICE_NGNRNE);
section_l1t0 += EARLY_BLOCK_SIZE_L1;
section_l1t1 += EARLY_BLOCK_SIZE_L1;
section_l2 += EARLY_BLOCK_SIZE_L2;
}
level1_table_0[0] =
(u64)level2_table | PMD_TYPE_TABLE;
level1_table_0[1] =
0x40000000 | PMD_SECT_AF | PMD_TYPE_SECT |
PMD_ATTRINDX(MT_DEVICE_NGNRNE);
level1_table_0[2] =
0x80000000 | PMD_SECT_AF | PMD_TYPE_SECT |
PMD_ATTRINDX(MT_NORMAL);
level1_table_0[3] =
0xc0000000 | PMD_SECT_AF | PMD_TYPE_SECT |
PMD_ATTRINDX(MT_NORMAL);
/* Rewrite table to enable cache */
set_pgtable_section(level2_table,
CONFIG_SYS_FSL_OCRAM_BASE >> EARLY_SECTION_SHIFT_L2,
CONFIG_SYS_FSL_OCRAM_BASE,
MT_NORMAL);
for (i = CONFIG_SYS_IFC_BASE >> EARLY_SECTION_SHIFT_L2;
i < (CONFIG_SYS_IFC_BASE + CONFIG_SYS_IFC_SIZE)
>> EARLY_SECTION_SHIFT_L2; i++) {
section_l2 = i << EARLY_SECTION_SHIFT_L2;
set_pgtable_section(level2_table, i,
section_l2, MT_NORMAL);
}
el = current_el();
We really can't have u-boot running at random ELs in v8 for different platforms. It's a mess on v7. You should never be at EL3. u-boot could be defined to run at EL1, but then you need to be able to go back to EL2 to boot the kernel. So really u-boot should always run at EL2 unless you are running in a VM, but that would be a different platform.
if (el == 1) {
asm volatile("dsb sy;isb");
asm volatile("msr ttbr0_el1, %0"
: : "r" ((u64)level0_table) : "memory");
asm volatile("msr tcr_el1, %0"
: : "r" (EARLY_TCR) : "memory");
asm volatile("msr mair_el1, %0"
: : "r" (MEMORY_ATTRIBUTES) : "memory");
These should all be inline functions or macros.
} else if (el == 2) {
asm volatile("dsb sy;isb");
asm volatile("msr ttbr0_el2, %0"
: : "r" ((u64)level0_table) : "memory");
asm volatile("msr tcr_el2, %0"
: : "r" (EARLY_TCR) : "memory");
asm volatile("msr mair_el2, %0"
: : "r" (MEMORY_ATTRIBUTES) : "memory");
} else if (el == 3) {
asm volatile("dsb sy;isb");
asm volatile("msr ttbr0_el3, %0"
: : "r" ((u64)level0_table) : "memory");
asm volatile("msr tcr_el3, %0"
: : "r" (EARLY_TCR) : "memory");
asm volatile("msr mair_el3, %0"
: : "r" (MEMORY_ATTRIBUTES) : "memory");
} else {
hang();
}
set_sctlr(get_sctlr() | CR_M);
+}
+static inline void final_mmu_setup(void)
Looks like nearly the same code repeated...
+{
int el;
u64 i, tbl_base, tbl_limit, section_base;
u64 section_l1t0, section_l1t1, section_l2;
u64 *level0_table = (u64 *)gd->arch.tlb_addr;
u64 *level1_table_0 = (u64 *)(gd->arch.tlb_addr + 0x1000);
u64 *level1_table_1 = (u64 *)(gd->arch.tlb_addr + 0x2000);
u64 *level2_table_0 = (u64 *)(gd->arch.tlb_addr + 0x3000);
u64 *level2_table_1 = (u64 *)(gd->arch.tlb_addr + 0x4000);
level0_table[0] =
(u64)level1_table_0 | PMD_TYPE_TABLE;
level0_table[1] =
(u64)level1_table_1 | PMD_TYPE_TABLE;
/*
* set level 1 table 0 to cache_inhibit, covering 0 to 512GB
* set level 1 table 1 to cache enabled, covering 512GB to 1TB
* set level 2 table 0 to cache-inhibit, covering 0 to 1GB
*/
section_l1t0 = 0;
section_l1t1 = FINAL_BLOCK_SIZE_L0;
section_l2 = 0;
for (i = 0; i < 512; i++) {
set_pgtable_section(level1_table_0, i, section_l1t0,
MT_DEVICE_NGNRNE);
set_pgtable_section(level1_table_1, i, section_l1t1,
MT_NORMAL);
set_pgtable_section(level2_table_0, i, section_l2,
MT_DEVICE_NGNRNE);
section_l1t0 += FINAL_BLOCK_SIZE_L1;
section_l1t1 += FINAL_BLOCK_SIZE_L1;
section_l2 += FINAL_BLOCK_SIZE_L2;
}
level1_table_0[0] =
(u64)level2_table_0 | PMD_TYPE_TABLE;
level1_table_0[2] =
0x80000000 | PMD_SECT_AF | PMD_TYPE_SECT |
PMD_ATTRINDX(MT_NORMAL);
level1_table_0[3] =
0xc0000000 | PMD_SECT_AF | PMD_TYPE_SECT |
PMD_ATTRINDX(MT_NORMAL);
/* Rewrite table to enable cache */
set_pgtable_section(level2_table_0,
CONFIG_SYS_FSL_OCRAM_BASE >> FINAL_SECTION_SHIFT_L2,
CONFIG_SYS_FSL_OCRAM_BASE,
MT_NORMAL);
/*
* Fill in other part of tables if cache is needed
* If finer granularity than 1GB is needed, sub table
* should be created.
*/
section_base = FINAL_QBMAN_CACHED_MEM & ~(FINAL_BLOCK_SIZE_L1 - 1);
i = section_base >> FINAL_SECTION_SHIFT_L1;
level1_table_0[i] = (u64)level2_table_1 | PMD_TYPE_TABLE;
section_l2 = section_base;
for (i = 0; i < 512; i++) {
set_pgtable_section(level2_table_1, i, section_l2,
MT_DEVICE_NGNRNE);
section_l2 += FINAL_BLOCK_SIZE_L2;
}
tbl_base = FINAL_QBMAN_CACHED_MEM & (FINAL_BLOCK_SIZE_L1 - 1);
tbl_limit = (FINAL_QBMAN_CACHED_MEM + FINAL_QBMAN_CACHED_SIZE) &
(FINAL_BLOCK_SIZE_L1 - 1);
for (i = tbl_base >> FINAL_SECTION_SHIFT_L2;
i < tbl_limit >> FINAL_SECTION_SHIFT_L2; i++) {
section_l2 = section_base + (i << FINAL_SECTION_SHIFT_L2);
set_pgtable_section(level2_table_1, i,
section_l2, MT_NORMAL);
}
el = current_el();
if (el == 1) {
asm volatile("dsb sy;isb");
asm volatile("msr ttbr0_el1, %0"
: : "r" ((u64)level0_table) : "memory");
asm volatile("msr tcr_el1, %0"
: : "r" (FINAL_TCR) : "memory");
asm volatile("msr mair_el1, %0"
: : "r" (MEMORY_ATTRIBUTES) : "memory");
} else if (el == 2) {
asm volatile("dsb sy;isb");
asm volatile("msr ttbr0_el2, %0"
: : "r" ((u64)level0_table) : "memory");
asm volatile("msr tcr_el2, %0"
: : "r" (FINAL_TCR) : "memory");
asm volatile("msr mair_el2, %0"
: : "r" (MEMORY_ATTRIBUTES) : "memory");
} else if (el == 3) {
asm volatile("dsb sy;isb");
asm volatile("msr ttbr0_el3, %0"
: : "r" ((u64)level0_table) : "memory");
asm volatile("msr tcr_el3, %0"
: : "r" (FINAL_TCR) : "memory");
asm volatile("msr mair_el3, %0"
: : "r" (MEMORY_ATTRIBUTES) : "memory");
} else {
hang();
}
set_sctlr(get_sctlr() | CR_M);
+}
+int arch_cpu_init(void) +{
icache_enable();
__asm_invalidate_dcache_all();
__asm_invalidate_tlb_all();
early_mmu_setup();
set_sctlr(get_sctlr() | CR_C);
return 0;
+}
+/*
- flush_l3_cache
- Dickens L3 cache can be flushed by transitioning from FAM to SFONLY power
- state, by writing to HP-F P-state request register.
Other SOCs will have Dickens. Are these registers FSL specific? If not, this should be common.
Also, I believe the proper way to flush Dickens is using the architected cache flushing method where you walk the levels out to level 3.
- */
+#define HNF0_PSTATE_REQ 0x04200010 +#define HNF1_PSTATE_REQ 0x04210010 +#define HNF2_PSTATE_REQ 0x04220010 +#define HNF3_PSTATE_REQ 0x04230010 +#define HNF4_PSTATE_REQ 0x04240010 +#define HNF5_PSTATE_REQ 0x04250010 +#define HNF6_PSTATE_REQ 0x04260010 +#define HNF7_PSTATE_REQ 0x04270010 +#define HNFPSTAT_MASK (0xFFFFFFFFFFFFFFFC) +#define HNFPSTAT_FAM 0x3 +#define HNFPSTAT_SFONLY 0x01
+static void hnf_pstate_req(u64 *ptr, u64 state) +{
int timeout = 1000;
out_le64(ptr, (in_le64(ptr) & HNFPSTAT_MASK) | (state & 0x3));
ptr++;
/* checking if the transition is completed */
while (timeout > 0) {
if (((in_le64(ptr) & 0x0c) >> 2) == (state & 0x3))
break;
udelay(100);
timeout--;
}
+}
+void flush_l3_cache(void) +{
hnf_pstate_req((u64 *)HNF0_PSTATE_REQ, HNFPSTAT_SFONLY);
hnf_pstate_req((u64 *)HNF1_PSTATE_REQ, HNFPSTAT_SFONLY);
hnf_pstate_req((u64 *)HNF2_PSTATE_REQ, HNFPSTAT_SFONLY);
hnf_pstate_req((u64 *)HNF3_PSTATE_REQ, HNFPSTAT_SFONLY);
hnf_pstate_req((u64 *)HNF4_PSTATE_REQ, HNFPSTAT_SFONLY);
hnf_pstate_req((u64 *)HNF5_PSTATE_REQ, HNFPSTAT_SFONLY);
hnf_pstate_req((u64 *)HNF6_PSTATE_REQ, HNFPSTAT_SFONLY);
hnf_pstate_req((u64 *)HNF7_PSTATE_REQ, HNFPSTAT_SFONLY);
hnf_pstate_req((u64 *)HNF0_PSTATE_REQ, HNFPSTAT_FAM);
hnf_pstate_req((u64 *)HNF1_PSTATE_REQ, HNFPSTAT_FAM);
hnf_pstate_req((u64 *)HNF2_PSTATE_REQ, HNFPSTAT_FAM);
hnf_pstate_req((u64 *)HNF3_PSTATE_REQ, HNFPSTAT_FAM);
hnf_pstate_req((u64 *)HNF4_PSTATE_REQ, HNFPSTAT_FAM);
hnf_pstate_req((u64 *)HNF5_PSTATE_REQ, HNFPSTAT_FAM);
hnf_pstate_req((u64 *)HNF6_PSTATE_REQ, HNFPSTAT_FAM);
hnf_pstate_req((u64 *)HNF7_PSTATE_REQ, HNFPSTAT_FAM);
+}
+/*
- This function is called from lib/board.c.
- It recreates MMU table in main memory. MMU and d-cache are enabled earlier.
- There is no need to disable d-cache for this operation.
- */
+void enable_caches(void) +{
final_mmu_setup();
flush_dcache_range(gd->arch.tlb_addr,
gd->arch.tlb_addr + gd->arch.tlb_size);
__asm_invalidate_tlb_all();
+} +#endif
+static inline u32 init_type(u32 cluster, int init_id)
init_type? That's a great name.
+{
struct ccsr_gur *gur = (void *)(CONFIG_SYS_FSL_GUTS_ADDR);
u32 idx = (cluster >> (init_id * 8)) & TP_CLUSTER_INIT_MASK;
u32 type = in_le32(&gur->tp_ityp[idx]);
if (type & TP_ITYP_AV)
return type;
return 0;
+}
+u32 cpu_mask(void) +{
struct ccsr_gur __iomem *gur = (void *)(CONFIG_SYS_FSL_GUTS_ADDR);
int i = 0, count = 0;
u32 cluster, type, mask = 0;
do {
int j;
cluster = in_le32(&gur->tp_cluster[i].lower);
for (j = 0; j < TP_INIT_PER_CLUSTER; j++) {
type = init_type(cluster, j);
if (type) {
if (TP_ITYP_TYPE(type) == TP_ITYP_TYPE_ARM)
mask |= 1 << count;
count++;
}
}
i++;
} while ((cluster & TP_CLUSTER_EOC) != TP_CLUSTER_EOC);
return mask;
+}
+/*
- Return the number of cores on this SOC.
- */
+int cpu_numcores(void) +{
return hweight32(cpu_mask());
+}
+int fsl_qoriq_core_to_cluster(unsigned int core) +{
struct ccsr_gur __iomem *gur =
(void __iomem *)(CONFIG_SYS_FSL_GUTS_ADDR);
int i = 0, count = 0;
u32 cluster;
do {
int j;
cluster = in_le32(&gur->tp_cluster[i].lower);
for (j = 0; j < TP_INIT_PER_CLUSTER; j++) {
if (init_type(cluster, j)) {
if (count == core)
return i;
count++;
}
}
i++;
} while ((cluster & TP_CLUSTER_EOC) != TP_CLUSTER_EOC);
return -1; /* cannot identify the cluster */
+}
+u32 fsl_qoriq_core_to_type(unsigned int core) +{
struct ccsr_gur __iomem *gur =
(void __iomem *)(CONFIG_SYS_FSL_GUTS_ADDR);
int i = 0, count = 0;
u32 cluster, type;
do {
int j;
cluster = in_le32(&gur->tp_cluster[i].lower);
for (j = 0; j < TP_INIT_PER_CLUSTER; j++) {
type = init_type(cluster, j);
if (type) {
if (count == core)
return type;
count++;
}
}
i++;
} while ((cluster & TP_CLUSTER_EOC) != TP_CLUSTER_EOC);
return -1; /* cannot identify the cluster */
+}
Do you plan on supporting PSCI because all this core and cluster stuff belongs there.
+#ifdef CONFIG_DISPLAY_CPUINFO +int print_cpuinfo(void) +{
struct sys_info sysinfo;
char buf[32];
unsigned int i, core;
u32 type;
get_sys_info(&sysinfo);
puts("Clock Configuration:");
for_each_cpu(i, core, cpu_numcores(), cpu_mask()) {
if (!(i % 3))
puts("\n ");
type = TP_ITYP_VER(fsl_qoriq_core_to_type(core));
printf("CPU%d(%s):%-4s MHz ", core,
type == TY_ITYP_VER_A7 ? "A7 " :
(type == TY_ITYP_VER_A53 ? "A53" :
(type == TY_ITYP_VER_A57 ? "A57" : " ")),
strmhz(buf, sysinfo.freq_processor[core]));
}
printf("\n Bus: %-4s MHz ",
strmhz(buf, sysinfo.freq_systembus));
printf("DDR: %-4s MHz", strmhz(buf, sysinfo.freq_ddrbus));
puts("\n");
return 0;
+} +#endif diff --git a/arch/arm/cpu/armv8/fsl-lsch3/cpu.h b/arch/arm/cpu/armv8/fsl-lsch3/cpu.h new file mode 100644 index 0000000..28544d7 --- /dev/null +++ b/arch/arm/cpu/armv8/fsl-lsch3/cpu.h @@ -0,0 +1,7 @@ +/*
- Copyright 2014, Freescale Semiconductor
- SPDX-License-Identifier: GPL-2.0+
- */
+int fsl_qoriq_core_to_cluster(unsigned int core); diff --git a/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S b/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S new file mode 100644 index 0000000..087d5d1 --- /dev/null +++ b/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S @@ -0,0 +1,65 @@ +/*
- (C) Copyright 2014 Freescale Semiconductor
- SPDX-License-Identifier: GPL-2.0+
- Extracted from armv8/start.S
- */
+#include <config.h> +#include <linux/linkage.h> +#include <asm/macro.h>
+ENTRY(lowlevel_init)
/* Initialize GIC Secure Bank Status */
mov x29, lr /* Save LR */
/* Set the SMMU page size in the sACR register */
ldr x1, =SMMU_BASE
ldr w0, [x1, #0x10]
orr w0, w0, #1 << 16 /* set sACR.pagesize to indicate 64K page */
str w0, [x1, #0x10]
+#if defined(CONFIG_GICV2) || defined(CONFIG_GICV3)
You can have either v2 or v3?
branch_if_slave x0, 1f
ldr x0, =GICD_BASE
bl gic_init_secure
+1: +#if defined(CONFIG_GICV3)
ldr x0, =GICR_BASE
bl gic_init_secure_percpu
+#elif defined(CONFIG_GICV2)
ldr x0, =GICD_BASE
ldr x1, =GICC_BASE
bl gic_init_secure_percpu
+#endif +#endif
branch_if_master x0, x1, 1f
/*
* Slave should wait for master clearing spin table.
* This sync prevent salves observing incorrect
* value of spin table and jumping to wrong place.
*/
+#if defined(CONFIG_GICV2) || defined(CONFIG_GICV3) +#ifdef CONFIG_GICV2
ldr x0, =GICC_BASE
+#endif
bl gic_wait_for_interrupt
+#endif
/*
* All processors will enter EL2 and optionally EL1.
*/
bl armv8_switch_to_el2
+#ifdef CONFIG_ARMV8_SWITCH_TO_EL1
bl armv8_switch_to_el1
+#endif
b 2f
This all looks like cut and paste from existing startup code. Can't you refactor things?
+1: +2:
mov lr, x29 /* Restore LR */
ret
+ENDPROC(lowlevel_init) diff --git a/arch/arm/cpu/armv8/fsl-lsch3/speed.c b/arch/arm/cpu/armv8/fsl-lsch3/speed.c new file mode 100644 index 0000000..dc4a34b --- /dev/null +++ b/arch/arm/cpu/armv8/fsl-lsch3/speed.c @@ -0,0 +1,176 @@ +/*
- Copyright 2014, Freescale Semiconductor, Inc.
- SPDX-License-Identifier: GPL-2.0+
- Derived from arch/power/cpu/mpc85xx/speed.c
- */
+#include <common.h> +#include <linux/compiler.h> +#include <fsl_ifc.h> +#include <asm/processor.h> +#include <asm/io.h> +#include <asm/arch-fsl-lsch3/immap_lsch3.h> +#include <asm/arch/clock.h> +#include "cpu.h"
+DECLARE_GLOBAL_DATA_PTR;
+#ifndef CONFIG_SYS_FSL_NUM_CC_PLLS +#define CONFIG_SYS_FSL_NUM_CC_PLLS 6 +#endif
+void get_sys_info(struct sys_info *sys_info) +{
struct ccsr_gur __iomem *gur = (void *)(CONFIG_SYS_FSL_GUTS_ADDR);
+#ifdef CONFIG_FSL_IFC
struct fsl_ifc *ifc_regs = (void *)CONFIG_SYS_IFC_ADDR;
u32 ccr;
+#endif
struct ccsr_clk_cluster_group __iomem *clk_grp[2] = {
(void *)(CONFIG_SYS_FSL_CH3_CLK_GRPA_ADDR),
(void *)(CONFIG_SYS_FSL_CH3_CLK_GRPB_ADDR)
};
struct ccsr_clk_ctrl __iomem *clk_ctrl =
(void *)(CONFIG_SYS_FSL_CH3_CLK_CTRL_ADDR);
unsigned int cpu;
const u8 core_cplx_pll[16] = {
[0] = 0, /* CC1 PPL / 1 */
[1] = 0, /* CC1 PPL / 2 */
[2] = 0, /* CC1 PPL / 4 */
[4] = 1, /* CC2 PPL / 1 */
[5] = 1, /* CC2 PPL / 2 */
[6] = 1, /* CC2 PPL / 4 */
[8] = 2, /* CC3 PPL / 1 */
[9] = 2, /* CC3 PPL / 2 */
[10] = 2, /* CC3 PPL / 4 */
[12] = 3, /* CC4 PPL / 1 */
[13] = 3, /* CC4 PPL / 2 */
[14] = 3, /* CC4 PPL / 4 */
};
const u8 core_cplx_pll_div[16] = {
[0] = 1, /* CC1 PPL / 1 */
[1] = 2, /* CC1 PPL / 2 */
[2] = 4, /* CC1 PPL / 4 */
[4] = 1, /* CC2 PPL / 1 */
[5] = 2, /* CC2 PPL / 2 */
[6] = 4, /* CC2 PPL / 4 */
[8] = 1, /* CC3 PPL / 1 */
[9] = 2, /* CC3 PPL / 2 */
[10] = 4, /* CC3 PPL / 4 */
[12] = 1, /* CC4 PPL / 1 */
[13] = 2, /* CC4 PPL / 2 */
[14] = 4, /* CC4 PPL / 4 */
};
uint i, cluster;
uint freq_c_pll[CONFIG_SYS_FSL_NUM_CC_PLLS];
uint ratio[CONFIG_SYS_FSL_NUM_CC_PLLS];
unsigned long sysclk = CONFIG_SYS_CLK_FREQ;
int cc_group[12] = CONFIG_SYS_FSL_CLUSTER_CLOCKS;
u32 c_pll_sel, cplx_pll;
void *offset;
sys_info->freq_systembus = sysclk;
+#ifdef CONFIG_DDR_CLK_FREQ
sys_info->freq_ddrbus = CONFIG_DDR_CLK_FREQ;
+#else
sys_info->freq_ddrbus = sysclk;
+#endif
sys_info->freq_systembus *= (in_le32(&gur->rcwsr[0]) >>
FSL_CHASSIS3_RCWSR0_SYS_PLL_RAT_SHIFT) &
FSL_CHASSIS3_RCWSR0_SYS_PLL_RAT_MASK;
sys_info->freq_ddrbus *= (in_le32(&gur->rcwsr[0]) >>
FSL_CHASSIS3_RCWSR0_MEM_PLL_RAT_SHIFT) &
FSL_CHASSIS3_RCWSR0_MEM_PLL_RAT_MASK;
for (i = 0; i < CONFIG_SYS_FSL_NUM_CC_PLLS; i++) {
/*
* fixme: prefer to combine the following into one line, but
* cannot pass compiling without warning about in_le32.
*/
offset = (void *)((size_t)clk_grp[i/3] +
offsetof(struct ccsr_clk_cluster_group,
pllngsr[i%3].gsr));
ratio[i] = (in_le32(offset) >> 1) & 0x3f;
if (ratio[i] > 4)
freq_c_pll[i] = sysclk * ratio[i];
else
freq_c_pll[i] = sys_info->freq_systembus * ratio[i];
}
for_each_cpu(i, cpu, cpu_numcores(), cpu_mask()) {
cluster = fsl_qoriq_core_to_cluster(cpu);
c_pll_sel = (in_le32(&clk_ctrl->clkcncsr[cluster].csr) >> 27)
& 0xf;
cplx_pll = core_cplx_pll[c_pll_sel];
cplx_pll += cc_group[cluster] - 1;
sys_info->freq_processor[cpu] =
freq_c_pll[cplx_pll] / core_cplx_pll_div[c_pll_sel];
}
+#if defined(CONFIG_FSL_IFC)
ccr = in_le32(&ifc_regs->ifc_ccr);
ccr = ((ccr & IFC_CCR_CLK_DIV_MASK) >> IFC_CCR_CLK_DIV_SHIFT) + 1;
sys_info->freq_localbus = sys_info->freq_systembus / ccr;
+#endif +}
+int get_clocks(void) +{
struct sys_info sys_info;
get_sys_info(&sys_info);
gd->cpu_clk = sys_info.freq_processor[0];
gd->bus_clk = sys_info.freq_systembus;
gd->mem_clk = sys_info.freq_ddrbus;
+#if defined(CONFIG_FSL_ESDHC)
gd->arch.sdhc_clk = gd->bus_clk / 2;
+#endif /* defined(CONFIG_FSL_ESDHC) */
if (gd->cpu_clk != 0)
return 0;
else
return 1;
+}
+/********************************************
- get_bus_freq
- return system bus freq in Hz
- *********************************************/
+ulong get_bus_freq(ulong dummy) +{
if (!gd->bus_clk)
get_clocks();
return gd->bus_clk;
+}
+/********************************************
- get_ddr_freq
- return ddr bus freq in Hz
- *********************************************/
+ulong get_ddr_freq(ulong dummy) +{
if (!gd->mem_clk)
get_clocks();
return gd->mem_clk;
+}
+unsigned int mxc_get_clock(enum mxc_clock clk) +{
switch (clk) {
case MXC_I2C_CLK:
return get_bus_freq(0) / 2;
default:
printf("Unsupported clock\n");
}
return 0;
+} diff --git a/arch/arm/cpu/armv8/fsl-lsch3/speed.h b/arch/arm/cpu/armv8/fsl-lsch3/speed.h new file mode 100644 index 0000000..15af5b9 --- /dev/null +++ b/arch/arm/cpu/armv8/fsl-lsch3/speed.h @@ -0,0 +1,7 @@ +/*
- Copyright 2014, Freescale Semiconductor, Inc.
- SPDX-License-Identifier: GPL-2.0+
- */
+void get_sys_info(struct sys_info *sys_info); diff --git a/arch/arm/cpu/armv8/fsl-lsch3/timer.c b/arch/arm/cpu/armv8/fsl-lsch3/timer.c new file mode 100644 index 0000000..3adfa41 --- /dev/null +++ b/arch/arm/cpu/armv8/fsl-lsch3/timer.c @@ -0,0 +1,62 @@ +/*
- Copyright 2014, Freescale Semiconductor
- SPDX-License-Identifier: GPL-2.0+
- */
+#include <common.h> +#include <div64.h> +#include <linux/compiler.h>
+static inline u64 get_cntfrq(void) +{
u64 cntfrq;
asm volatile("mrs %0, cntfrq_el0" : "=r" (cntfrq));
return cntfrq;
+}
+static inline u64 tick_to_time(u64 tick) +{
tick *= CONFIG_SYS_HZ;
do_div(tick, get_cntfrq());
return tick;
+}
+static inline u64 time_to_tick(u64 time) +{
time *= get_cntfrq();
do_div(time, CONFIG_SYS_HZ);
return time;
+}
+static inline u64 us_to_tick(unsigned long long usec) +{
usec = usec * get_cntfrq() + 999999;
do_div(usec, 1000000);
return usec;
+}
+u64 get_ticks(void) +{
u64 cval;
asm volatile("isb;mrs %0, cntpct_el0" : "=r" (cval));
return cval;
+}
+ulong get_timer(ulong base) +{
return tick_to_time(get_ticks()) - base;
+}
+void __udelay(unsigned long usec) +{
u64 start, tmo;
start = get_ticks();
tmo = us_to_tick(usec);
while (get_ticks() < (start + tmo))
;
+}
What's wrong with the existing arch timer code?
Rob

On 05/29/2014 06:19 AM, Rob Herring wrote:
On Wed, May 28, 2014 at 6:46 PM, York Sun yorksun@freescale.com wrote:
<snip>
+static void set_pgtable_section(u64 *page_table, u64 index, u64 section,
u8 memory_type)
+{
u64 value;
value = section | PMD_TYPE_SECT | PMD_SECT_AF;
value |= PMD_ATTRINDX(memory_type);
page_table[index] = value;
+}
This function looks like it should be common.
There is a common version in arch/arm/cpu/armv8/cache_v8.c. This version has more flexibility. I am not sure which one will be used as common.
+static inline void early_mmu_setup(void) +{
int el;
u64 i;
u64 section_l1t0, section_l1t1, section_l2;
u64 *level0_table = (u64 *)CONFIG_SYS_FSL_OCRAM_BASE;
u64 *level1_table_0 = (u64 *)(CONFIG_SYS_FSL_OCRAM_BASE + 0x1000);
u64 *level1_table_1 = (u64 *)(CONFIG_SYS_FSL_OCRAM_BASE + 0x2000);
u64 *level2_table = (u64 *)(CONFIG_SYS_FSL_OCRAM_BASE + 0x3000);
level0_table[0] =
(u64)level1_table_0 | PMD_TYPE_TABLE;
level0_table[1] =
(u64)level1_table_1 | PMD_TYPE_TABLE;
/*
* set level 1 table 0 to cache_inhibit, covering 0 to 512GB
* set level 1 table 1 to cache enabled, covering 512GB to 1TB
* set level 2 table to cache-inhibit, covering 0 to 1GB
*/
section_l1t0 = 0;
section_l1t1 = EARLY_BLOCK_SIZE_L0;
section_l2 = 0;
for (i = 0; i < 512; i++) {
set_pgtable_section(level1_table_0, i, section_l1t0,
MT_DEVICE_NGNRNE);
set_pgtable_section(level1_table_1, i, section_l1t1,
MT_NORMAL);
set_pgtable_section(level2_table, i, section_l2,
MT_DEVICE_NGNRNE);
section_l1t0 += EARLY_BLOCK_SIZE_L1;
section_l1t1 += EARLY_BLOCK_SIZE_L1;
section_l2 += EARLY_BLOCK_SIZE_L2;
}
level1_table_0[0] =
(u64)level2_table | PMD_TYPE_TABLE;
level1_table_0[1] =
0x40000000 | PMD_SECT_AF | PMD_TYPE_SECT |
PMD_ATTRINDX(MT_DEVICE_NGNRNE);
level1_table_0[2] =
0x80000000 | PMD_SECT_AF | PMD_TYPE_SECT |
PMD_ATTRINDX(MT_NORMAL);
level1_table_0[3] =
0xc0000000 | PMD_SECT_AF | PMD_TYPE_SECT |
PMD_ATTRINDX(MT_NORMAL);
/* Rewrite table to enable cache */
set_pgtable_section(level2_table,
CONFIG_SYS_FSL_OCRAM_BASE >> EARLY_SECTION_SHIFT_L2,
CONFIG_SYS_FSL_OCRAM_BASE,
MT_NORMAL);
for (i = CONFIG_SYS_IFC_BASE >> EARLY_SECTION_SHIFT_L2;
i < (CONFIG_SYS_IFC_BASE + CONFIG_SYS_IFC_SIZE)
>> EARLY_SECTION_SHIFT_L2; i++) {
section_l2 = i << EARLY_SECTION_SHIFT_L2;
set_pgtable_section(level2_table, i,
section_l2, MT_NORMAL);
}
el = current_el();
We really can't have u-boot running at random ELs in v8 for different platforms. It's a mess on v7. You should never be at EL3. u-boot could be defined to run at EL1, but then you need to be able to go back to EL2 to boot the kernel. So really u-boot should always run at EL2 unless you are running in a VM, but that would be a different platform.
I have to run u-boot at EL3. Otherwise I can't access Dickens. I vaguely remember other reasons. I will have to dig it out if needed.
if (el == 1) {
asm volatile("dsb sy;isb");
asm volatile("msr ttbr0_el1, %0"
: : "r" ((u64)level0_table) : "memory");
asm volatile("msr tcr_el1, %0"
: : "r" (EARLY_TCR) : "memory");
asm volatile("msr mair_el1, %0"
: : "r" (MEMORY_ATTRIBUTES) : "memory");
These should all be inline functions or macros.
Sure.
} else if (el == 2) {
asm volatile("dsb sy;isb");
asm volatile("msr ttbr0_el2, %0"
: : "r" ((u64)level0_table) : "memory");
asm volatile("msr tcr_el2, %0"
: : "r" (EARLY_TCR) : "memory");
asm volatile("msr mair_el2, %0"
: : "r" (MEMORY_ATTRIBUTES) : "memory");
} else if (el == 3) {
asm volatile("dsb sy;isb");
asm volatile("msr ttbr0_el3, %0"
: : "r" ((u64)level0_table) : "memory");
asm volatile("msr tcr_el3, %0"
: : "r" (EARLY_TCR) : "memory");
asm volatile("msr mair_el3, %0"
: : "r" (MEMORY_ATTRIBUTES) : "memory");
} else {
hang();
}
set_sctlr(get_sctlr() | CR_M);
+}
+static inline void final_mmu_setup(void)
Looks like nearly the same code repeated...
It is similar but different. The final_mmu_setup has different content in the table. I can reorganize to use as much common code as possible.
+{
int el;
u64 i, tbl_base, tbl_limit, section_base;
u64 section_l1t0, section_l1t1, section_l2;
u64 *level0_table = (u64 *)gd->arch.tlb_addr;
u64 *level1_table_0 = (u64 *)(gd->arch.tlb_addr + 0x1000);
u64 *level1_table_1 = (u64 *)(gd->arch.tlb_addr + 0x2000);
u64 *level2_table_0 = (u64 *)(gd->arch.tlb_addr + 0x3000);
u64 *level2_table_1 = (u64 *)(gd->arch.tlb_addr + 0x4000);
level0_table[0] =
(u64)level1_table_0 | PMD_TYPE_TABLE;
level0_table[1] =
(u64)level1_table_1 | PMD_TYPE_TABLE;
/*
* set level 1 table 0 to cache_inhibit, covering 0 to 512GB
* set level 1 table 1 to cache enabled, covering 512GB to 1TB
* set level 2 table 0 to cache-inhibit, covering 0 to 1GB
*/
section_l1t0 = 0;
section_l1t1 = FINAL_BLOCK_SIZE_L0;
section_l2 = 0;
for (i = 0; i < 512; i++) {
set_pgtable_section(level1_table_0, i, section_l1t0,
MT_DEVICE_NGNRNE);
set_pgtable_section(level1_table_1, i, section_l1t1,
MT_NORMAL);
set_pgtable_section(level2_table_0, i, section_l2,
MT_DEVICE_NGNRNE);
section_l1t0 += FINAL_BLOCK_SIZE_L1;
section_l1t1 += FINAL_BLOCK_SIZE_L1;
section_l2 += FINAL_BLOCK_SIZE_L2;
}
level1_table_0[0] =
(u64)level2_table_0 | PMD_TYPE_TABLE;
level1_table_0[2] =
0x80000000 | PMD_SECT_AF | PMD_TYPE_SECT |
PMD_ATTRINDX(MT_NORMAL);
level1_table_0[3] =
0xc0000000 | PMD_SECT_AF | PMD_TYPE_SECT |
PMD_ATTRINDX(MT_NORMAL);
/* Rewrite table to enable cache */
set_pgtable_section(level2_table_0,
CONFIG_SYS_FSL_OCRAM_BASE >> FINAL_SECTION_SHIFT_L2,
CONFIG_SYS_FSL_OCRAM_BASE,
MT_NORMAL);
/*
* Fill in other part of tables if cache is needed
* If finer granularity than 1GB is needed, sub table
* should be created.
*/
section_base = FINAL_QBMAN_CACHED_MEM & ~(FINAL_BLOCK_SIZE_L1 - 1);
i = section_base >> FINAL_SECTION_SHIFT_L1;
level1_table_0[i] = (u64)level2_table_1 | PMD_TYPE_TABLE;
section_l2 = section_base;
for (i = 0; i < 512; i++) {
set_pgtable_section(level2_table_1, i, section_l2,
MT_DEVICE_NGNRNE);
section_l2 += FINAL_BLOCK_SIZE_L2;
}
tbl_base = FINAL_QBMAN_CACHED_MEM & (FINAL_BLOCK_SIZE_L1 - 1);
tbl_limit = (FINAL_QBMAN_CACHED_MEM + FINAL_QBMAN_CACHED_SIZE) &
(FINAL_BLOCK_SIZE_L1 - 1);
for (i = tbl_base >> FINAL_SECTION_SHIFT_L2;
i < tbl_limit >> FINAL_SECTION_SHIFT_L2; i++) {
section_l2 = section_base + (i << FINAL_SECTION_SHIFT_L2);
set_pgtable_section(level2_table_1, i,
section_l2, MT_NORMAL);
}
el = current_el();
if (el == 1) {
asm volatile("dsb sy;isb");
asm volatile("msr ttbr0_el1, %0"
: : "r" ((u64)level0_table) : "memory");
asm volatile("msr tcr_el1, %0"
: : "r" (FINAL_TCR) : "memory");
asm volatile("msr mair_el1, %0"
: : "r" (MEMORY_ATTRIBUTES) : "memory");
} else if (el == 2) {
asm volatile("dsb sy;isb");
asm volatile("msr ttbr0_el2, %0"
: : "r" ((u64)level0_table) : "memory");
asm volatile("msr tcr_el2, %0"
: : "r" (FINAL_TCR) : "memory");
asm volatile("msr mair_el2, %0"
: : "r" (MEMORY_ATTRIBUTES) : "memory");
} else if (el == 3) {
asm volatile("dsb sy;isb");
asm volatile("msr ttbr0_el3, %0"
: : "r" ((u64)level0_table) : "memory");
asm volatile("msr tcr_el3, %0"
: : "r" (FINAL_TCR) : "memory");
asm volatile("msr mair_el3, %0"
: : "r" (MEMORY_ATTRIBUTES) : "memory");
} else {
hang();
}
set_sctlr(get_sctlr() | CR_M);
+}
+int arch_cpu_init(void) +{
icache_enable();
__asm_invalidate_dcache_all();
__asm_invalidate_tlb_all();
early_mmu_setup();
set_sctlr(get_sctlr() | CR_C);
return 0;
+}
+/*
- flush_l3_cache
- Dickens L3 cache can be flushed by transitioning from FAM to SFONLY power
- state, by writing to HP-F P-state request register.
Other SOCs will have Dickens. Are these registers FSL specific? If not, this should be common.
I don't think they are FSL specific. But I haven't found a proper place to host it. Can you share what other SoCs have Dickens? If they are not supported yet, we can keep the code here until we are clear then move it out.
Also, I believe the proper way to flush Dickens is using the architected cache flushing method where you walk the levels out to level 3.
False. L3 cache gets flushed with instruction DCCIVAC. So if we flush the cache by range, it works OK. But it doesn't work with DCISW or DCCISW. If we flush cache by walking the levels, it doesn't work. We can only walk level 1 and level 2.
- */
+#define HNF0_PSTATE_REQ 0x04200010 +#define HNF1_PSTATE_REQ 0x04210010 +#define HNF2_PSTATE_REQ 0x04220010 +#define HNF3_PSTATE_REQ 0x04230010 +#define HNF4_PSTATE_REQ 0x04240010 +#define HNF5_PSTATE_REQ 0x04250010 +#define HNF6_PSTATE_REQ 0x04260010 +#define HNF7_PSTATE_REQ 0x04270010 +#define HNFPSTAT_MASK (0xFFFFFFFFFFFFFFFC) +#define HNFPSTAT_FAM 0x3 +#define HNFPSTAT_SFONLY 0x01
+static void hnf_pstate_req(u64 *ptr, u64 state) +{
int timeout = 1000;
out_le64(ptr, (in_le64(ptr) & HNFPSTAT_MASK) | (state & 0x3));
ptr++;
/* checking if the transition is completed */
while (timeout > 0) {
if (((in_le64(ptr) & 0x0c) >> 2) == (state & 0x3))
break;
udelay(100);
timeout--;
}
+}
+void flush_l3_cache(void) +{
hnf_pstate_req((u64 *)HNF0_PSTATE_REQ, HNFPSTAT_SFONLY);
hnf_pstate_req((u64 *)HNF1_PSTATE_REQ, HNFPSTAT_SFONLY);
hnf_pstate_req((u64 *)HNF2_PSTATE_REQ, HNFPSTAT_SFONLY);
hnf_pstate_req((u64 *)HNF3_PSTATE_REQ, HNFPSTAT_SFONLY);
hnf_pstate_req((u64 *)HNF4_PSTATE_REQ, HNFPSTAT_SFONLY);
hnf_pstate_req((u64 *)HNF5_PSTATE_REQ, HNFPSTAT_SFONLY);
hnf_pstate_req((u64 *)HNF6_PSTATE_REQ, HNFPSTAT_SFONLY);
hnf_pstate_req((u64 *)HNF7_PSTATE_REQ, HNFPSTAT_SFONLY);
hnf_pstate_req((u64 *)HNF0_PSTATE_REQ, HNFPSTAT_FAM);
hnf_pstate_req((u64 *)HNF1_PSTATE_REQ, HNFPSTAT_FAM);
hnf_pstate_req((u64 *)HNF2_PSTATE_REQ, HNFPSTAT_FAM);
hnf_pstate_req((u64 *)HNF3_PSTATE_REQ, HNFPSTAT_FAM);
hnf_pstate_req((u64 *)HNF4_PSTATE_REQ, HNFPSTAT_FAM);
hnf_pstate_req((u64 *)HNF5_PSTATE_REQ, HNFPSTAT_FAM);
hnf_pstate_req((u64 *)HNF6_PSTATE_REQ, HNFPSTAT_FAM);
hnf_pstate_req((u64 *)HNF7_PSTATE_REQ, HNFPSTAT_FAM);
+}
+/*
- This function is called from lib/board.c.
- It recreates MMU table in main memory. MMU and d-cache are enabled earlier.
- There is no need to disable d-cache for this operation.
- */
+void enable_caches(void) +{
final_mmu_setup();
flush_dcache_range(gd->arch.tlb_addr,
gd->arch.tlb_addr + gd->arch.tlb_size);
__asm_invalidate_tlb_all();
+} +#endif
+static inline u32 init_type(u32 cluster, int init_id)
init_type? That's a great name.
That is initiator type. It is a funny name used by many FSL SoCs.
+{
struct ccsr_gur *gur = (void *)(CONFIG_SYS_FSL_GUTS_ADDR);
u32 idx = (cluster >> (init_id * 8)) & TP_CLUSTER_INIT_MASK;
u32 type = in_le32(&gur->tp_ityp[idx]);
if (type & TP_ITYP_AV)
return type;
return 0;
+}
+u32 cpu_mask(void) +{
struct ccsr_gur __iomem *gur = (void *)(CONFIG_SYS_FSL_GUTS_ADDR);
int i = 0, count = 0;
u32 cluster, type, mask = 0;
do {
int j;
cluster = in_le32(&gur->tp_cluster[i].lower);
for (j = 0; j < TP_INIT_PER_CLUSTER; j++) {
type = init_type(cluster, j);
if (type) {
if (TP_ITYP_TYPE(type) == TP_ITYP_TYPE_ARM)
mask |= 1 << count;
count++;
}
}
i++;
} while ((cluster & TP_CLUSTER_EOC) != TP_CLUSTER_EOC);
return mask;
+}
+/*
- Return the number of cores on this SOC.
- */
+int cpu_numcores(void) +{
return hweight32(cpu_mask());
+}
+int fsl_qoriq_core_to_cluster(unsigned int core) +{
struct ccsr_gur __iomem *gur =
(void __iomem *)(CONFIG_SYS_FSL_GUTS_ADDR);
int i = 0, count = 0;
u32 cluster;
do {
int j;
cluster = in_le32(&gur->tp_cluster[i].lower);
for (j = 0; j < TP_INIT_PER_CLUSTER; j++) {
if (init_type(cluster, j)) {
if (count == core)
return i;
count++;
}
}
i++;
} while ((cluster & TP_CLUSTER_EOC) != TP_CLUSTER_EOC);
return -1; /* cannot identify the cluster */
+}
+u32 fsl_qoriq_core_to_type(unsigned int core) +{
struct ccsr_gur __iomem *gur =
(void __iomem *)(CONFIG_SYS_FSL_GUTS_ADDR);
int i = 0, count = 0;
u32 cluster, type;
do {
int j;
cluster = in_le32(&gur->tp_cluster[i].lower);
for (j = 0; j < TP_INIT_PER_CLUSTER; j++) {
type = init_type(cluster, j);
if (type) {
if (count == core)
return type;
count++;
}
}
i++;
} while ((cluster & TP_CLUSTER_EOC) != TP_CLUSTER_EOC);
return -1; /* cannot identify the cluster */
+}
Do you plan on supporting PSCI because all this core and cluster stuff belongs there.
What's PSCI? This code is similar to FSL chassis generation 2 code, but it is for chassis generation 3. I plan to move it to a common place once we have another platform using chassis generation 3.
+#ifdef CONFIG_DISPLAY_CPUINFO +int print_cpuinfo(void) +{
struct sys_info sysinfo;
char buf[32];
unsigned int i, core;
u32 type;
get_sys_info(&sysinfo);
puts("Clock Configuration:");
for_each_cpu(i, core, cpu_numcores(), cpu_mask()) {
if (!(i % 3))
puts("\n ");
type = TP_ITYP_VER(fsl_qoriq_core_to_type(core));
printf("CPU%d(%s):%-4s MHz ", core,
type == TY_ITYP_VER_A7 ? "A7 " :
(type == TY_ITYP_VER_A53 ? "A53" :
(type == TY_ITYP_VER_A57 ? "A57" : " ")),
strmhz(buf, sysinfo.freq_processor[core]));
}
printf("\n Bus: %-4s MHz ",
strmhz(buf, sysinfo.freq_systembus));
printf("DDR: %-4s MHz", strmhz(buf, sysinfo.freq_ddrbus));
puts("\n");
return 0;
+} +#endif diff --git a/arch/arm/cpu/armv8/fsl-lsch3/cpu.h b/arch/arm/cpu/armv8/fsl-lsch3/cpu.h new file mode 100644 index 0000000..28544d7 --- /dev/null +++ b/arch/arm/cpu/armv8/fsl-lsch3/cpu.h @@ -0,0 +1,7 @@ +/*
- Copyright 2014, Freescale Semiconductor
- SPDX-License-Identifier: GPL-2.0+
- */
+int fsl_qoriq_core_to_cluster(unsigned int core); diff --git a/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S b/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S new file mode 100644 index 0000000..087d5d1 --- /dev/null +++ b/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S @@ -0,0 +1,65 @@ +/*
- (C) Copyright 2014 Freescale Semiconductor
- SPDX-License-Identifier: GPL-2.0+
- Extracted from armv8/start.S
- */
+#include <config.h> +#include <linux/linkage.h> +#include <asm/macro.h>
+ENTRY(lowlevel_init)
/* Initialize GIC Secure Bank Status */
mov x29, lr /* Save LR */
/* Set the SMMU page size in the sACR register */
ldr x1, =SMMU_BASE
ldr w0, [x1, #0x10]
orr w0, w0, #1 << 16 /* set sACR.pagesize to indicate 64K page */
str w0, [x1, #0x10]
+#if defined(CONFIG_GICV2) || defined(CONFIG_GICV3)
You can have either v2 or v3?
We do in the model. Since we don't have the final SoC yet, and I don't know how many varities will exist, I am reluctant to remove v2.
branch_if_slave x0, 1f
ldr x0, =GICD_BASE
bl gic_init_secure
+1: +#if defined(CONFIG_GICV3)
ldr x0, =GICR_BASE
bl gic_init_secure_percpu
+#elif defined(CONFIG_GICV2)
ldr x0, =GICD_BASE
ldr x1, =GICC_BASE
bl gic_init_secure_percpu
+#endif +#endif
branch_if_master x0, x1, 1f
/*
* Slave should wait for master clearing spin table.
* This sync prevent salves observing incorrect
* value of spin table and jumping to wrong place.
*/
+#if defined(CONFIG_GICV2) || defined(CONFIG_GICV3) +#ifdef CONFIG_GICV2
ldr x0, =GICC_BASE
+#endif
bl gic_wait_for_interrupt
+#endif
/*
* All processors will enter EL2 and optionally EL1.
*/
bl armv8_switch_to_el2
+#ifdef CONFIG_ARMV8_SWITCH_TO_EL1
bl armv8_switch_to_el1
+#endif
b 2f
This all looks like cut and paste from existing startup code. Can't you refactor things?
Right. We have added some code which only applies to this SoC. That's why the copy-n-paste then modify. I am also holding other patches which add a lot more code into this file.
+1: +2:
mov lr, x29 /* Restore LR */
ret
+ENDPROC(lowlevel_init) diff --git a/arch/arm/cpu/armv8/fsl-lsch3/speed.c b/arch/arm/cpu/armv8/fsl-lsch3/speed.c new file mode 100644 index 0000000..dc4a34b --- /dev/null +++ b/arch/arm/cpu/armv8/fsl-lsch3/speed.c @@ -0,0 +1,176 @@ +/*
- Copyright 2014, Freescale Semiconductor, Inc.
- SPDX-License-Identifier: GPL-2.0+
- Derived from arch/power/cpu/mpc85xx/speed.c
- */
+#include <common.h> +#include <linux/compiler.h> +#include <fsl_ifc.h> +#include <asm/processor.h> +#include <asm/io.h> +#include <asm/arch-fsl-lsch3/immap_lsch3.h> +#include <asm/arch/clock.h> +#include "cpu.h"
+DECLARE_GLOBAL_DATA_PTR;
+#ifndef CONFIG_SYS_FSL_NUM_CC_PLLS +#define CONFIG_SYS_FSL_NUM_CC_PLLS 6 +#endif
+void get_sys_info(struct sys_info *sys_info) +{
struct ccsr_gur __iomem *gur = (void *)(CONFIG_SYS_FSL_GUTS_ADDR);
+#ifdef CONFIG_FSL_IFC
struct fsl_ifc *ifc_regs = (void *)CONFIG_SYS_IFC_ADDR;
u32 ccr;
+#endif
struct ccsr_clk_cluster_group __iomem *clk_grp[2] = {
(void *)(CONFIG_SYS_FSL_CH3_CLK_GRPA_ADDR),
(void *)(CONFIG_SYS_FSL_CH3_CLK_GRPB_ADDR)
};
struct ccsr_clk_ctrl __iomem *clk_ctrl =
(void *)(CONFIG_SYS_FSL_CH3_CLK_CTRL_ADDR);
unsigned int cpu;
const u8 core_cplx_pll[16] = {
[0] = 0, /* CC1 PPL / 1 */
[1] = 0, /* CC1 PPL / 2 */
[2] = 0, /* CC1 PPL / 4 */
[4] = 1, /* CC2 PPL / 1 */
[5] = 1, /* CC2 PPL / 2 */
[6] = 1, /* CC2 PPL / 4 */
[8] = 2, /* CC3 PPL / 1 */
[9] = 2, /* CC3 PPL / 2 */
[10] = 2, /* CC3 PPL / 4 */
[12] = 3, /* CC4 PPL / 1 */
[13] = 3, /* CC4 PPL / 2 */
[14] = 3, /* CC4 PPL / 4 */
};
const u8 core_cplx_pll_div[16] = {
[0] = 1, /* CC1 PPL / 1 */
[1] = 2, /* CC1 PPL / 2 */
[2] = 4, /* CC1 PPL / 4 */
[4] = 1, /* CC2 PPL / 1 */
[5] = 2, /* CC2 PPL / 2 */
[6] = 4, /* CC2 PPL / 4 */
[8] = 1, /* CC3 PPL / 1 */
[9] = 2, /* CC3 PPL / 2 */
[10] = 4, /* CC3 PPL / 4 */
[12] = 1, /* CC4 PPL / 1 */
[13] = 2, /* CC4 PPL / 2 */
[14] = 4, /* CC4 PPL / 4 */
};
uint i, cluster;
uint freq_c_pll[CONFIG_SYS_FSL_NUM_CC_PLLS];
uint ratio[CONFIG_SYS_FSL_NUM_CC_PLLS];
unsigned long sysclk = CONFIG_SYS_CLK_FREQ;
int cc_group[12] = CONFIG_SYS_FSL_CLUSTER_CLOCKS;
u32 c_pll_sel, cplx_pll;
void *offset;
sys_info->freq_systembus = sysclk;
+#ifdef CONFIG_DDR_CLK_FREQ
sys_info->freq_ddrbus = CONFIG_DDR_CLK_FREQ;
+#else
sys_info->freq_ddrbus = sysclk;
+#endif
sys_info->freq_systembus *= (in_le32(&gur->rcwsr[0]) >>
FSL_CHASSIS3_RCWSR0_SYS_PLL_RAT_SHIFT) &
FSL_CHASSIS3_RCWSR0_SYS_PLL_RAT_MASK;
sys_info->freq_ddrbus *= (in_le32(&gur->rcwsr[0]) >>
FSL_CHASSIS3_RCWSR0_MEM_PLL_RAT_SHIFT) &
FSL_CHASSIS3_RCWSR0_MEM_PLL_RAT_MASK;
for (i = 0; i < CONFIG_SYS_FSL_NUM_CC_PLLS; i++) {
/*
* fixme: prefer to combine the following into one line, but
* cannot pass compiling without warning about in_le32.
*/
offset = (void *)((size_t)clk_grp[i/3] +
offsetof(struct ccsr_clk_cluster_group,
pllngsr[i%3].gsr));
ratio[i] = (in_le32(offset) >> 1) & 0x3f;
if (ratio[i] > 4)
freq_c_pll[i] = sysclk * ratio[i];
else
freq_c_pll[i] = sys_info->freq_systembus * ratio[i];
}
for_each_cpu(i, cpu, cpu_numcores(), cpu_mask()) {
cluster = fsl_qoriq_core_to_cluster(cpu);
c_pll_sel = (in_le32(&clk_ctrl->clkcncsr[cluster].csr) >> 27)
& 0xf;
cplx_pll = core_cplx_pll[c_pll_sel];
cplx_pll += cc_group[cluster] - 1;
sys_info->freq_processor[cpu] =
freq_c_pll[cplx_pll] / core_cplx_pll_div[c_pll_sel];
}
+#if defined(CONFIG_FSL_IFC)
ccr = in_le32(&ifc_regs->ifc_ccr);
ccr = ((ccr & IFC_CCR_CLK_DIV_MASK) >> IFC_CCR_CLK_DIV_SHIFT) + 1;
sys_info->freq_localbus = sys_info->freq_systembus / ccr;
+#endif +}
+int get_clocks(void) +{
struct sys_info sys_info;
get_sys_info(&sys_info);
gd->cpu_clk = sys_info.freq_processor[0];
gd->bus_clk = sys_info.freq_systembus;
gd->mem_clk = sys_info.freq_ddrbus;
+#if defined(CONFIG_FSL_ESDHC)
gd->arch.sdhc_clk = gd->bus_clk / 2;
+#endif /* defined(CONFIG_FSL_ESDHC) */
if (gd->cpu_clk != 0)
return 0;
else
return 1;
+}
+/********************************************
- get_bus_freq
- return system bus freq in Hz
- *********************************************/
+ulong get_bus_freq(ulong dummy) +{
if (!gd->bus_clk)
get_clocks();
return gd->bus_clk;
+}
+/********************************************
- get_ddr_freq
- return ddr bus freq in Hz
- *********************************************/
+ulong get_ddr_freq(ulong dummy) +{
if (!gd->mem_clk)
get_clocks();
return gd->mem_clk;
+}
+unsigned int mxc_get_clock(enum mxc_clock clk) +{
switch (clk) {
case MXC_I2C_CLK:
return get_bus_freq(0) / 2;
default:
printf("Unsupported clock\n");
}
return 0;
+} diff --git a/arch/arm/cpu/armv8/fsl-lsch3/speed.h b/arch/arm/cpu/armv8/fsl-lsch3/speed.h new file mode 100644 index 0000000..15af5b9 --- /dev/null +++ b/arch/arm/cpu/armv8/fsl-lsch3/speed.h @@ -0,0 +1,7 @@ +/*
- Copyright 2014, Freescale Semiconductor, Inc.
- SPDX-License-Identifier: GPL-2.0+
- */
+void get_sys_info(struct sys_info *sys_info); diff --git a/arch/arm/cpu/armv8/fsl-lsch3/timer.c b/arch/arm/cpu/armv8/fsl-lsch3/timer.c new file mode 100644 index 0000000..3adfa41 --- /dev/null +++ b/arch/arm/cpu/armv8/fsl-lsch3/timer.c @@ -0,0 +1,62 @@ +/*
- Copyright 2014, Freescale Semiconductor
- SPDX-License-Identifier: GPL-2.0+
- */
+#include <common.h> +#include <div64.h> +#include <linux/compiler.h>
+static inline u64 get_cntfrq(void) +{
u64 cntfrq;
asm volatile("mrs %0, cntfrq_el0" : "=r" (cntfrq));
return cntfrq;
+}
+static inline u64 tick_to_time(u64 tick) +{
tick *= CONFIG_SYS_HZ;
do_div(tick, get_cntfrq());
return tick;
+}
+static inline u64 time_to_tick(u64 time) +{
time *= get_cntfrq();
do_div(time, CONFIG_SYS_HZ);
return time;
+}
+static inline u64 us_to_tick(unsigned long long usec) +{
usec = usec * get_cntfrq() + 999999;
do_div(usec, 1000000);
return usec;
+}
+u64 get_ticks(void) +{
u64 cval;
asm volatile("isb;mrs %0, cntpct_el0" : "=r" (cval));
return cval;
+}
+ulong get_timer(ulong base) +{
return tick_to_time(get_ticks()) - base;
+}
+void __udelay(unsigned long usec) +{
u64 start, tmo;
start = get_ticks();
tmo = us_to_tick(usec);
while (get_ticks() < (start + tmo))
;
+}
What's wrong with the existing arch timer code?
It was created before the common armv8 code was settled. At time, the generic code didn't work for this SoC. Now things has changed. I need reexamine this part.
I really appreciate your effort reviewing this patch. A new version will be sent soon. I may have to split this patch to separate the common part.
York

On Thu, May 29, 2014 at 10:19 AM, York Sun yorksun@freescale.com wrote:
On 05/29/2014 06:19 AM, Rob Herring wrote:
On Wed, May 28, 2014 at 6:46 PM, York Sun yorksun@freescale.com wrote:
<snip>
+static void set_pgtable_section(u64 *page_table, u64 index, u64 section,
u8 memory_type)
+{
u64 value;
value = section | PMD_TYPE_SECT | PMD_SECT_AF;
value |= PMD_ATTRINDX(memory_type);
page_table[index] = value;
+}
This function looks like it should be common.
There is a common version in arch/arm/cpu/armv8/cache_v8.c. This version has more flexibility. I am not sure which one will be used as common.
Then add the flexibility to the common one.
el = current_el();
We really can't have u-boot running at random ELs in v8 for different platforms. It's a mess on v7. You should never be at EL3. u-boot could be defined to run at EL1, but then you need to be able to go back to EL2 to boot the kernel. So really u-boot should always run at EL2 unless you are running in a VM, but that would be a different platform.
I have to run u-boot at EL3. Otherwise I can't access Dickens. I vaguely remember other reasons. I will have to dig it out if needed.
You may start in EL3, but then the early init code should drop to EL2. If you need EL3 later on for something PSCI does not address, then you are probably doing things wrong.
- flush_l3_cache
- Dickens L3 cache can be flushed by transitioning from FAM to SFONLY power
- state, by writing to HP-F P-state request register.
Other SOCs will have Dickens. Are these registers FSL specific? If not, this should be common.
I don't think they are FSL specific. But I haven't found a proper place to host it. Can you share what other SoCs have Dickens? If they are not supported yet, we can keep the code here until we are clear then move it out.
It is safe to say most if not all SOCs based on A53 and/or A57 will also be based on Dickens aka CCN-504.
Also, I believe the proper way to flush Dickens is using the architected cache flushing method where you walk the levels out to level 3.
False. L3 cache gets flushed with instruction DCCIVAC. So if we flush the cache by range, it works OK. But it doesn't work with DCISW or DCCISW. If we flush cache by walking the levels, it doesn't work. We can only walk level 1 and level 2.
So the EL3 boot code should do any one-time invalidate all operations and u-boot in EL2 should only use range operations. There are limitations in the by way operations such as they are not SMP safe. If the by way operations are EL3 only, then that's probably a sign you are not doing things as intended. Or it could be an oversight and we need to figure out a common way to handle this across SOCs.
- */
+#define HNF0_PSTATE_REQ 0x04200010 +#define HNF1_PSTATE_REQ 0x04210010 +#define HNF2_PSTATE_REQ 0x04220010 +#define HNF3_PSTATE_REQ 0x04230010 +#define HNF4_PSTATE_REQ 0x04240010 +#define HNF5_PSTATE_REQ 0x04250010 +#define HNF6_PSTATE_REQ 0x04260010 +#define HNF7_PSTATE_REQ 0x04270010 +#define HNFPSTAT_MASK (0xFFFFFFFFFFFFFFFC) +#define HNFPSTAT_FAM 0x3 +#define HNFPSTAT_SFONLY 0x01
+static void hnf_pstate_req(u64 *ptr, u64 state) +{
int timeout = 1000;
out_le64(ptr, (in_le64(ptr) & HNFPSTAT_MASK) | (state & 0x3));
ptr++;
/* checking if the transition is completed */
while (timeout > 0) {
if (((in_le64(ptr) & 0x0c) >> 2) == (state & 0x3))
break;
udelay(100);
timeout--;
}
+}
+void flush_l3_cache(void) +{
hnf_pstate_req((u64 *)HNF0_PSTATE_REQ, HNFPSTAT_SFONLY);
hnf_pstate_req((u64 *)HNF1_PSTATE_REQ, HNFPSTAT_SFONLY);
hnf_pstate_req((u64 *)HNF2_PSTATE_REQ, HNFPSTAT_SFONLY);
hnf_pstate_req((u64 *)HNF3_PSTATE_REQ, HNFPSTAT_SFONLY);
hnf_pstate_req((u64 *)HNF4_PSTATE_REQ, HNFPSTAT_SFONLY);
hnf_pstate_req((u64 *)HNF5_PSTATE_REQ, HNFPSTAT_SFONLY);
hnf_pstate_req((u64 *)HNF6_PSTATE_REQ, HNFPSTAT_SFONLY);
hnf_pstate_req((u64 *)HNF7_PSTATE_REQ, HNFPSTAT_SFONLY);
hnf_pstate_req((u64 *)HNF0_PSTATE_REQ, HNFPSTAT_FAM);
hnf_pstate_req((u64 *)HNF1_PSTATE_REQ, HNFPSTAT_FAM);
hnf_pstate_req((u64 *)HNF2_PSTATE_REQ, HNFPSTAT_FAM);
hnf_pstate_req((u64 *)HNF3_PSTATE_REQ, HNFPSTAT_FAM);
hnf_pstate_req((u64 *)HNF4_PSTATE_REQ, HNFPSTAT_FAM);
hnf_pstate_req((u64 *)HNF5_PSTATE_REQ, HNFPSTAT_FAM);
hnf_pstate_req((u64 *)HNF6_PSTATE_REQ, HNFPSTAT_FAM);
hnf_pstate_req((u64 *)HNF7_PSTATE_REQ, HNFPSTAT_FAM);
+}
+/*
- This function is called from lib/board.c.
- It recreates MMU table in main memory. MMU and d-cache are enabled earlier.
- There is no need to disable d-cache for this operation.
- */
+void enable_caches(void) +{
final_mmu_setup();
flush_dcache_range(gd->arch.tlb_addr,
gd->arch.tlb_addr + gd->arch.tlb_size);
__asm_invalidate_tlb_all();
+} +#endif
+static inline u32 init_type(u32 cluster, int init_id)
init_type? That's a great name.
That is initiator type. It is a funny name used by many FSL SoCs.
+{
struct ccsr_gur *gur = (void *)(CONFIG_SYS_FSL_GUTS_ADDR);
u32 idx = (cluster >> (init_id * 8)) & TP_CLUSTER_INIT_MASK;
u32 type = in_le32(&gur->tp_ityp[idx]);
if (type & TP_ITYP_AV)
return type;
return 0;
+}
+u32 cpu_mask(void) +{
struct ccsr_gur __iomem *gur = (void *)(CONFIG_SYS_FSL_GUTS_ADDR);
int i = 0, count = 0;
u32 cluster, type, mask = 0;
do {
int j;
cluster = in_le32(&gur->tp_cluster[i].lower);
for (j = 0; j < TP_INIT_PER_CLUSTER; j++) {
type = init_type(cluster, j);
if (type) {
if (TP_ITYP_TYPE(type) == TP_ITYP_TYPE_ARM)
mask |= 1 << count;
count++;
}
}
i++;
} while ((cluster & TP_CLUSTER_EOC) != TP_CLUSTER_EOC);
return mask;
+}
+/*
- Return the number of cores on this SOC.
- */
+int cpu_numcores(void) +{
return hweight32(cpu_mask());
+}
+int fsl_qoriq_core_to_cluster(unsigned int core) +{
struct ccsr_gur __iomem *gur =
(void __iomem *)(CONFIG_SYS_FSL_GUTS_ADDR);
int i = 0, count = 0;
u32 cluster;
do {
int j;
cluster = in_le32(&gur->tp_cluster[i].lower);
for (j = 0; j < TP_INIT_PER_CLUSTER; j++) {
if (init_type(cluster, j)) {
if (count == core)
return i;
count++;
}
}
i++;
} while ((cluster & TP_CLUSTER_EOC) != TP_CLUSTER_EOC);
return -1; /* cannot identify the cluster */
+}
+u32 fsl_qoriq_core_to_type(unsigned int core) +{
struct ccsr_gur __iomem *gur =
(void __iomem *)(CONFIG_SYS_FSL_GUTS_ADDR);
int i = 0, count = 0;
u32 cluster, type;
do {
int j;
cluster = in_le32(&gur->tp_cluster[i].lower);
for (j = 0; j < TP_INIT_PER_CLUSTER; j++) {
type = init_type(cluster, j);
if (type) {
if (count == core)
return type;
count++;
}
}
i++;
} while ((cluster & TP_CLUSTER_EOC) != TP_CLUSTER_EOC);
return -1; /* cannot identify the cluster */
+}
Do you plan on supporting PSCI because all this core and cluster stuff belongs there.
What's PSCI?
Power State Coordination Interface, a spec from ARM to do cpu and cluster power/boot control. It's strongly encouraged for v8 and good luck with upstream kernel support without it.
There are patches for v7 to add PSCI implementation to u-boot from Marc Zyngier on the list. It is debatable whether u-boot is the right place for it rather than separate secure firmware. There is also ARM Trusted Firmware which implements PSCI and you should look at.
This code is similar to FSL chassis generation 2 code, but it is for chassis generation 3. I plan to move it to a common place once we have another platform using chassis generation 3.
Gen2 would be PowerPC, right? Not sure how that is relevant.
/*
* Slave should wait for master clearing spin table.
* This sync prevent salves observing incorrect
* value of spin table and jumping to wrong place.
*/
+#if defined(CONFIG_GICV2) || defined(CONFIG_GICV3) +#ifdef CONFIG_GICV2
ldr x0, =GICC_BASE
+#endif
bl gic_wait_for_interrupt
+#endif
/*
* All processors will enter EL2 and optionally EL1.
*/
bl armv8_switch_to_el2
+#ifdef CONFIG_ARMV8_SWITCH_TO_EL1
bl armv8_switch_to_el1
+#endif
b 2f
This all looks like cut and paste from existing startup code. Can't you refactor things?
Right. We have added some code which only applies to this SoC. That's why the copy-n-paste then modify. I am also holding other patches which add a lot more code into this file.
Then add callouts so you can add SOC specific initialization.
Rob

On 05/29/2014 10:37 AM, Rob Herring wrote:
On Thu, May 29, 2014 at 10:19 AM, York Sun yorksun@freescale.com wrote:
On 05/29/2014 06:19 AM, Rob Herring wrote:
On Wed, May 28, 2014 at 6:46 PM, York Sun yorksun@freescale.com wrote:
<snip>
+static void set_pgtable_section(u64 *page_table, u64 index, u64 section,
u8 memory_type)
+{
u64 value;
value = section | PMD_TYPE_SECT | PMD_SECT_AF;
value |= PMD_ATTRINDX(memory_type);
page_table[index] = value;
+}
This function looks like it should be common.
There is a common version in arch/arm/cpu/armv8/cache_v8.c. This version has more flexibility. I am not sure which one will be used as common.
Then add the flexibility to the common one.
Will do.
el = current_el();
We really can't have u-boot running at random ELs in v8 for different platforms. It's a mess on v7. You should never be at EL3. u-boot could be defined to run at EL1, but then you need to be able to go back to EL2 to boot the kernel. So really u-boot should always run at EL2 unless you are running in a VM, but that would be a different platform.
I have to run u-boot at EL3. Otherwise I can't access Dickens. I vaguely remember other reasons. I will have to dig it out if needed.
You may start in EL3, but then the early init code should drop to EL2. If you need EL3 later on for something PSCI does not address, then you are probably doing things wrong.
This part code supports all ELs. I think it is not wrong here. There is something planned to have a secure monitor before u-boot and call it when special access is needed. I am going to reorganize the code here to reuse more.
Is the general ARMv8 code already dropping to EL2?
- flush_l3_cache
- Dickens L3 cache can be flushed by transitioning from FAM to SFONLY power
- state, by writing to HP-F P-state request register.
Other SOCs will have Dickens. Are these registers FSL specific? If not, this should be common.
I don't think they are FSL specific. But I haven't found a proper place to host it. Can you share what other SoCs have Dickens? If they are not supported yet, we can keep the code here until we are clear then move it out.
It is safe to say most if not all SOCs based on A53 and/or A57 will also be based on Dickens aka CCN-504.
Great. But in order to to access CCN-504 register to flush L3 cache, it needs to be EL3. At least for the SoC I am debugging. Please suggest a location to host CCN-504 code. Should it go to drivers/misc?
Also, I believe the proper way to flush Dickens is using the architected cache flushing method where you walk the levels out to level 3.
False. L3 cache gets flushed with instruction DCCIVAC. So if we flush the cache by range, it works OK. But it doesn't work with DCISW or DCCISW. If we flush cache by walking the levels, it doesn't work. We can only walk level 1 and level 2.
So the EL3 boot code should do any one-time invalidate all operations and u-boot in EL2 should only use range operations. There are limitations in the by way operations such as they are not SMP safe. If the by way operations are EL3 only, then that's probably a sign you are not doing things as intended. Or it could be an oversight and we need to figure out a common way to handle this across SOCs.
- */
+#define HNF0_PSTATE_REQ 0x04200010 +#define HNF1_PSTATE_REQ 0x04210010 +#define HNF2_PSTATE_REQ 0x04220010 +#define HNF3_PSTATE_REQ 0x04230010 +#define HNF4_PSTATE_REQ 0x04240010 +#define HNF5_PSTATE_REQ 0x04250010 +#define HNF6_PSTATE_REQ 0x04260010 +#define HNF7_PSTATE_REQ 0x04270010 +#define HNFPSTAT_MASK (0xFFFFFFFFFFFFFFFC) +#define HNFPSTAT_FAM 0x3 +#define HNFPSTAT_SFONLY 0x01
+static void hnf_pstate_req(u64 *ptr, u64 state) +{
int timeout = 1000;
out_le64(ptr, (in_le64(ptr) & HNFPSTAT_MASK) | (state & 0x3));
ptr++;
/* checking if the transition is completed */
while (timeout > 0) {
if (((in_le64(ptr) & 0x0c) >> 2) == (state & 0x3))
break;
udelay(100);
timeout--;
}
+}
+void flush_l3_cache(void) +{
hnf_pstate_req((u64 *)HNF0_PSTATE_REQ, HNFPSTAT_SFONLY);
hnf_pstate_req((u64 *)HNF1_PSTATE_REQ, HNFPSTAT_SFONLY);
hnf_pstate_req((u64 *)HNF2_PSTATE_REQ, HNFPSTAT_SFONLY);
hnf_pstate_req((u64 *)HNF3_PSTATE_REQ, HNFPSTAT_SFONLY);
hnf_pstate_req((u64 *)HNF4_PSTATE_REQ, HNFPSTAT_SFONLY);
hnf_pstate_req((u64 *)HNF5_PSTATE_REQ, HNFPSTAT_SFONLY);
hnf_pstate_req((u64 *)HNF6_PSTATE_REQ, HNFPSTAT_SFONLY);
hnf_pstate_req((u64 *)HNF7_PSTATE_REQ, HNFPSTAT_SFONLY);
hnf_pstate_req((u64 *)HNF0_PSTATE_REQ, HNFPSTAT_FAM);
hnf_pstate_req((u64 *)HNF1_PSTATE_REQ, HNFPSTAT_FAM);
hnf_pstate_req((u64 *)HNF2_PSTATE_REQ, HNFPSTAT_FAM);
hnf_pstate_req((u64 *)HNF3_PSTATE_REQ, HNFPSTAT_FAM);
hnf_pstate_req((u64 *)HNF4_PSTATE_REQ, HNFPSTAT_FAM);
hnf_pstate_req((u64 *)HNF5_PSTATE_REQ, HNFPSTAT_FAM);
hnf_pstate_req((u64 *)HNF6_PSTATE_REQ, HNFPSTAT_FAM);
hnf_pstate_req((u64 *)HNF7_PSTATE_REQ, HNFPSTAT_FAM);
+}
+/*
- This function is called from lib/board.c.
- It recreates MMU table in main memory. MMU and d-cache are enabled earlier.
- There is no need to disable d-cache for this operation.
- */
+void enable_caches(void) +{
final_mmu_setup();
flush_dcache_range(gd->arch.tlb_addr,
gd->arch.tlb_addr + gd->arch.tlb_size);
__asm_invalidate_tlb_all();
+} +#endif
+static inline u32 init_type(u32 cluster, int init_id)
init_type? That's a great name.
That is initiator type. It is a funny name used by many FSL SoCs.
By the way, I am changing the name to initiator_type.
+{
struct ccsr_gur *gur = (void *)(CONFIG_SYS_FSL_GUTS_ADDR);
u32 idx = (cluster >> (init_id * 8)) & TP_CLUSTER_INIT_MASK;
u32 type = in_le32(&gur->tp_ityp[idx]);
if (type & TP_ITYP_AV)
return type;
return 0;
+}
+u32 cpu_mask(void) +{
struct ccsr_gur __iomem *gur = (void *)(CONFIG_SYS_FSL_GUTS_ADDR);
int i = 0, count = 0;
u32 cluster, type, mask = 0;
do {
int j;
cluster = in_le32(&gur->tp_cluster[i].lower);
for (j = 0; j < TP_INIT_PER_CLUSTER; j++) {
type = init_type(cluster, j);
if (type) {
if (TP_ITYP_TYPE(type) == TP_ITYP_TYPE_ARM)
mask |= 1 << count;
count++;
}
}
i++;
} while ((cluster & TP_CLUSTER_EOC) != TP_CLUSTER_EOC);
return mask;
+}
+/*
- Return the number of cores on this SOC.
- */
+int cpu_numcores(void) +{
return hweight32(cpu_mask());
+}
+int fsl_qoriq_core_to_cluster(unsigned int core) +{
struct ccsr_gur __iomem *gur =
(void __iomem *)(CONFIG_SYS_FSL_GUTS_ADDR);
int i = 0, count = 0;
u32 cluster;
do {
int j;
cluster = in_le32(&gur->tp_cluster[i].lower);
for (j = 0; j < TP_INIT_PER_CLUSTER; j++) {
if (init_type(cluster, j)) {
if (count == core)
return i;
count++;
}
}
i++;
} while ((cluster & TP_CLUSTER_EOC) != TP_CLUSTER_EOC);
return -1; /* cannot identify the cluster */
+}
+u32 fsl_qoriq_core_to_type(unsigned int core) +{
struct ccsr_gur __iomem *gur =
(void __iomem *)(CONFIG_SYS_FSL_GUTS_ADDR);
int i = 0, count = 0;
u32 cluster, type;
do {
int j;
cluster = in_le32(&gur->tp_cluster[i].lower);
for (j = 0; j < TP_INIT_PER_CLUSTER; j++) {
type = init_type(cluster, j);
if (type) {
if (count == core)
return type;
count++;
}
}
i++;
} while ((cluster & TP_CLUSTER_EOC) != TP_CLUSTER_EOC);
return -1; /* cannot identify the cluster */
+}
Do you plan on supporting PSCI because all this core and cluster stuff belongs there.
What's PSCI?
Power State Coordination Interface, a spec from ARM to do cpu and cluster power/boot control. It's strongly encouraged for v8 and good luck with upstream kernel support without it.
There are patches for v7 to add PSCI implementation to u-boot from Marc Zyngier on the list. It is debatable whether u-boot is the right place for it rather than separate secure firmware. There is also ARM Trusted Firmware which implements PSCI and you should look at.
Thanks for the explanation. The above code is to probe the number of cores, to support SMP.
This code is similar to FSL chassis generation 2 code, but it is for chassis generation 3. I plan to move it to a common place once we have another platform using chassis generation 3.
Gen2 would be PowerPC, right? Not sure how that is relevant.
It is not relevant to ARM.
/*
* Slave should wait for master clearing spin table.
* This sync prevent salves observing incorrect
* value of spin table and jumping to wrong place.
*/
+#if defined(CONFIG_GICV2) || defined(CONFIG_GICV3) +#ifdef CONFIG_GICV2
ldr x0, =GICC_BASE
+#endif
bl gic_wait_for_interrupt
+#endif
/*
* All processors will enter EL2 and optionally EL1.
*/
bl armv8_switch_to_el2
+#ifdef CONFIG_ARMV8_SWITCH_TO_EL1
bl armv8_switch_to_el1
+#endif
b 2f
This all looks like cut and paste from existing startup code. Can't you refactor things?
Right. We have added some code which only applies to this SoC. That's why the copy-n-paste then modify. I am also holding other patches which add a lot more code into this file.
Then add callouts so you can add SOC specific initialization.
There is a reason to use weak for lowlevel_init. It would be easier to implement real low level init without adding callout here and there. I am cleaning up the GICv2 macro here so it will look cleaner. If we end up the same code when we have the real SoC, I will cleanup this function. For now, I prefer the separate one.
York

On Thu, May 29, 2014 at 06:37:36PM +0100, Rob Herring wrote:
On Thu, May 29, 2014 at 10:19 AM, York Sun yorksun@freescale.com wrote:
On 05/29/2014 06:19 AM, Rob Herring wrote:
On Wed, May 28, 2014 at 6:46 PM, York Sun yorksun@freescale.com wrote:
<snip>
+static void set_pgtable_section(u64 *page_table, u64 index, u64 section,
u8 memory_type)
+{
u64 value;
value = section | PMD_TYPE_SECT | PMD_SECT_AF;
value |= PMD_ATTRINDX(memory_type);
page_table[index] = value;
+}
This function looks like it should be common.
There is a common version in arch/arm/cpu/armv8/cache_v8.c. This version has more flexibility. I am not sure which one will be used as common.
Then add the flexibility to the common one.
el = current_el();
We really can't have u-boot running at random ELs in v8 for different platforms. It's a mess on v7. You should never be at EL3. u-boot could be defined to run at EL1, but then you need to be able to go back to EL2 to boot the kernel. So really u-boot should always run at EL2 unless you are running in a VM, but that would be a different platform.
I have to run u-boot at EL3. Otherwise I can't access Dickens. I vaguely remember other reasons. I will have to dig it out if needed.
You may start in EL3, but then the early init code should drop to EL2. If you need EL3 later on for something PSCI does not address, then you are probably doing things wrong.
- flush_l3_cache
- Dickens L3 cache can be flushed by transitioning from FAM to SFONLY power
- state, by writing to HP-F P-state request register.
Other SOCs will have Dickens. Are these registers FSL specific? If not, this should be common.
I don't think they are FSL specific. But I haven't found a proper place to host it. Can you share what other SoCs have Dickens? If they are not supported yet, we can keep the code here until we are clear then move it out.
It is safe to say most if not all SOCs based on A53 and/or A57 will also be based on Dickens aka CCN-504.
Also, I believe the proper way to flush Dickens is using the architected cache flushing method where you walk the levels out to level 3.
False. L3 cache gets flushed with instruction DCCIVAC. So if we flush the cache by range, it works OK. But it doesn't work with DCISW or DCCISW. If we flush cache by walking the levels, it doesn't work. We can only walk level 1 and level 2.
So the EL3 boot code should do any one-time invalidate all operations and u-boot in EL2 should only use range operations. There are limitations in the by way operations such as they are not SMP safe. If the by way operations are EL3 only, then that's probably a sign you are not doing things as intended. Or it could be an oversight and we need to figure out a common way to handle this across SOCs.
This is the only sane way of doing things.
The set/way operations can only be used for two things:
* One-off IMPLEMENTATION DEFINED cache initialisation (for power on).
* Emptying of a given CPU's architected cache levels (which is only necessary for power off).
In neither of these cases are the operations used to provide any sort of visibility guarantee (i.e. they do not make uncached data visible to CPUs, or cached data visible to other observers). In both of these cases if you have PSCI this is hidden by the implementation.
It should be entirely possible to flush/clean the data/area you care about by VA.
Given that this L3 cache and (IIRC the APM L3) can handle VA operations, I would rather that U-boot and general purposes OSs assume that there is such a cache present always, which is outside of our control and already enabled. While this necessitates some cache flushing before turning the MMU and caches on, it will otherwise be transparent.
This is what we exepect in Linux since c218bca74eea (arm64: Relax the kernel cache requirements for boot). It makes a single image possible in the presnce of system caches, and makes things consistent for KVM, kexec, and chain-loading U-Boot.
Cheers, Mark.

From: "J. German Rivera" German.Rivera@freescale.com
Adding supoort to load and start the Layerscape Management Complex (MC) firmware. First, the MC GCR register is set to 0 to reset all cores. MC firmware and DPL images are copied from their location in NOR flash to DDR. MC registers are updated with the location of these images. Deasserting the reset bit of MC GCR register releases core 0 to run. Core 1 will be released by MC firmware. Stop bits are not touched for this step. U-boot waits for MC until it boots up. In case of a failure, device tree is updated accordingly. The MC firmware image uses FIT format.
Signed-off-by: J. German Rivera German.Rivera@freescale.com Signed-off-by: York Sun yorksun@freescale.com Signed-off-by: Lijun Pan Lijun.Pan@freescale.com Signed-off-by: Shruti Kanetkar Shruti@Freescale.com --- Change log: v3: Add error detection and update device tree if failure Revise loading address to avoid overlap Use FIT image for the firmware Remove blank lines at the end of files
README | 27 ++++ arch/arm/cpu/armv8/fsl-lsch3/cpu.c | 11 ++ drivers/net/Makefile | 1 + drivers/net/fsl_mc/Makefile | 8 ++ drivers/net/fsl_mc/mc.c | 274 ++++++++++++++++++++++++++++++++++++ include/fdt_support.h | 14 +- include/fsl_mc.h | 59 ++++++++ 7 files changed, 391 insertions(+), 3 deletions(-) create mode 100644 drivers/net/fsl_mc/Makefile create mode 100644 drivers/net/fsl_mc/mc.c create mode 100644 include/fsl_mc.h
diff --git a/README b/README index a280435..95a0282 100644 --- a/README +++ b/README @@ -4640,6 +4640,33 @@ within that device. window->master inbound window->master LAW->the ucode address in master's memory space.
+Freescale Layerscape Management Complex Firmware Support: +--------------------------------------------------------- +The Freescale Layerscape Management Complex (MC) supports the loading of +"firmware". +This firmware often needs to be loaded during U-Boot booting, so macros +are used to identify the storage device (NOR flash, SPI, etc) and the address +within that device. + +- CONFIG_FSL_MC_ENET + Enable the MC driver for Layerscape SoCs. + +- CONFIG_SYS_LS_MC_FW_ADDR + The address in the storage device where the firmware is located. The + meaning of this address depends on which CONFIG_SYS_LS_MC_FW_IN_xxx macro + is also specified. + +- CONFIG_SYS_LS_MC_FW_LENGTH + The maximum possible size of the firmware. The firmware binary format + has a field that specifies the actual size of the firmware, but it + might not be possible to read any part of the firmware unless some + local storage is allocated to hold the entire firmware first. + +- CONFIG_SYS_LS_MC_FW_IN_NOR + Specifies that MC firmware is located in NOR flash, mapped as + normal addressable memory via the LBC. CONFIG_SYS_LS_MC_FW_ADDR is the + virtual address in NOR flash. + Building the Software: ======================
diff --git a/arch/arm/cpu/armv8/fsl-lsch3/cpu.c b/arch/arm/cpu/armv8/fsl-lsch3/cpu.c index 2780390..42fbbdb 100644 --- a/arch/arm/cpu/armv8/fsl-lsch3/cpu.c +++ b/arch/arm/cpu/armv8/fsl-lsch3/cpu.c @@ -12,6 +12,7 @@ #include <asm/arch-fsl-lsch3/immap_lsch3.h> #include "cpu.h" #include "speed.h" +#include <fsl_mc.h>
DECLARE_GLOBAL_DATA_PTR;
@@ -472,3 +473,13 @@ int print_cpuinfo(void) return 0; } #endif + +int cpu_eth_init(bd_t *bis) +{ + int error = 0; + +#ifdef CONFIG_FSL_MC_ENET + error = mc_init(bis); +#endif + return error; +} diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 6005f7e..6226cb2 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -64,3 +64,4 @@ obj-$(CONFIG_XILINX_EMACLITE) += xilinx_emaclite.o obj-$(CONFIG_XILINX_LL_TEMAC) += xilinx_ll_temac.o xilinx_ll_temac_mdio.o \ xilinx_ll_temac_fifo.o xilinx_ll_temac_sdma.o obj-$(CONFIG_ZYNQ_GEM) += zynq_gem.o +obj-$(CONFIG_FSL_MC_ENET) += fsl_mc/ diff --git a/drivers/net/fsl_mc/Makefile b/drivers/net/fsl_mc/Makefile new file mode 100644 index 0000000..4834086 --- /dev/null +++ b/drivers/net/fsl_mc/Makefile @@ -0,0 +1,8 @@ +# +# Copyright 2014 Freescale Semiconductor, Inc. +# +# SPDX-License-Identifier: GPL-2.0+ +# + +# Layerscape MC driver +obj-y += mc.o diff --git a/drivers/net/fsl_mc/mc.c b/drivers/net/fsl_mc/mc.c new file mode 100644 index 0000000..58fd674 --- /dev/null +++ b/drivers/net/fsl_mc/mc.c @@ -0,0 +1,274 @@ +/* + * Copyright (C) 2014 Freescale Semiconductor + * + * SPDX-License-Identifier: GPL-2.0+ + */ +#include <errno.h> +#include <asm/io.h> +#include <fsl_mc.h> + +DECLARE_GLOBAL_DATA_PTR; +static int mc_boot_status; + +/** + * Copying MC firmware or DPL image to DDR + */ +static int mc_copy_image(const char *title, + u64 image_addr, u32 image_size, u64 mc_ram_addr) +{ + debug("%s copied to address %p\n", title, (void *)mc_ram_addr); + memcpy((void *)mc_ram_addr, (void *)image_addr, image_size); + return 0; +} + +/** + * MC firmware FIT image parser checks if the image is in FIT + * format, verifies integrity of the image and calculates + * raw image address and size values. + * Returns 0 if success and 1 if any of the above mentioned + * task fail. + **/ + +int parse_mc_firmware_fit_image(const void **raw_image_addr, + size_t *raw_image_size) +{ + int format; + void *fit_hdr; + int node_offset; + const void *data; + size_t size; + const char *uname = "firmware"; + + /* Check if the image is in NOR flash*/ +#ifdef CONFIG_SYS_LS_MC_FW_IN_NOR + fit_hdr = (void *)CONFIG_SYS_LS_MC_FW_ADDR; +#else +#error "No CONFIG_SYS_LS_MC_FW_IN_xxx defined" +#endif + + /* Check if Image is in FIT format */ + format = genimg_get_format(fit_hdr); + + if (format != IMAGE_FORMAT_FIT) { + debug("Not a FIT image\n"); + return 1; + } + + if (!fit_check_format(fit_hdr)) { + debug("Bad FIT image format\n"); + return 1; + } + + /* Find node offset of MC Firmware image */ + if (uname == NULL) { + debug("FIT subimage unit name not provided"); + return 1; + } + + node_offset = fit_image_get_node(fit_hdr, uname); + + if (node_offset < 0) { + debug("Can not find %s subimage\n", uname); + return 1; + } + + /* Verify MC firmware image */ + if (!(fit_image_verify(fit_hdr, node_offset))) { + debug("Bad MC firmware hash"); + return 1; + } + + /* Get address and size of raw image */ + fit_image_get_data(fit_hdr, node_offset, &data, &size); + + *raw_image_addr = data; + *raw_image_size = size; + + return 0; +} + +int mc_init(bd_t *bis) +{ + int error = 0; + int timeout = 200000; + struct mc_ccsr_registers __iomem *mc_ccsr_regs = MC_CCSR_BASE_ADDR; + u64 mc_ram_addr; + u64 mc_dpl_offset; + u32 reg_gsr; + u32 mc_fw_boot_status; + void *fdt_hdr; + int dpl_size; + + BUILD_BUG_ON(CONFIG_SYS_LS_MC_FW_LENGTH % 4 != 0); + + /* + * The MC private DRAM block was already carved at the end of DRAM + * by board_init_f() using CONFIG_SYS_MEM_TOP_HIDE: + */ + if (gd->bd->bi_dram[1].start) { + mc_ram_addr = + gd->bd->bi_dram[1].start + gd->bd->bi_dram[1].size; + } else { + mc_ram_addr = + gd->bd->bi_dram[0].start + gd->bd->bi_dram[0].size; + } + + /* + * Management Complex cores should be held at reset out of POR. + * U-boot should be the first software to touch MC. To be safe, + * we reset all cores again by setting GCR1 to 0. It doesn't do + * anything if they are held at reset. After we setup the firmware + * we kick off MC by deasserting the reset bit for core 0, and + * deasserting the reset bits for Command Portal Managers. + * The stop bits are not touched here. They are used to stop the + * cores when they are active. Setting stop bits doesn't stop the + * cores from fetching instructions when they are released from + * reset. + */ + out_le32(&mc_ccsr_regs->reg_gcr1, 0); + dmb(); + + /* Call MC FW parser function */ + const void *raw_image_addr; + size_t raw_image_size = 0; + + error = parse_mc_firmware_fit_image(&raw_image_addr, &raw_image_size); + if (error != 0) + goto out; + /* + * Load the MC FW at the beginning of the MC private DRAM block: + */ + mc_copy_image( + "MC Firmware", + (u64)raw_image_addr, + raw_image_size, + mc_ram_addr); + + /* + * Calculate offset in the MC private DRAM block at which the MC DPL + * blob is to be placed: + */ +#ifdef CONFIG_SYS_LS_MC_DRAM_DPL_OFFSET + BUILD_BUG_ON( + (CONFIG_SYS_LS_MC_DRAM_DPL_OFFSET & 0x3) != 0 || + CONFIG_SYS_LS_MC_DRAM_DPL_OFFSET > 0xffffffff); + + mc_dpl_offset = CONFIG_SYS_LS_MC_DRAM_DPL_OFFSET; +#else + mc_dpl_offset = mc_get_dram_block_size() - + roundup(CONFIG_SYS_LS_MC_DPL_LENGTH, 4096); + + if ((mc_dpl_offset & 0x3) != 0 || mc_dpl_offset > 0xffffffff) { + printf("%s: Invalid MC DPL offset: %llu\n", + __func__, mc_dpl_offset); + error = -EINVAL; + goto out; + } +#endif + + /* Check if DPL image is in NOR flash */ +#ifdef CONFIG_SYS_LS_MC_DPL_IN_NOR + fdt_hdr = (void *)CONFIG_SYS_LS_MC_DPL_ADDR; +#else +#error "No CONFIG_SYS_LS_MC_DPL_IN_xxx defined" +#endif + + dpl_size = fdt_totalsize(fdt_hdr); + + /* + * Load the MC DPL blob at the far end of the MC private DRAM block: + */ + mc_copy_image( + "MC DPL blob", + (u64)fdt_hdr, + dpl_size, + mc_ram_addr + mc_dpl_offset); + + debug("mc_ccsr_regs %p\n", mc_ccsr_regs); + + /* + * Tell MC where the MC Firmware image was loaded in DDR: + */ + out_le32(&mc_ccsr_regs->reg_mcfbalr, (u32)mc_ram_addr); + out_le32(&mc_ccsr_regs->reg_mcfbahr, (u32)((u64)mc_ram_addr >> 32)); + out_le32(&mc_ccsr_regs->reg_mcfapr, MCFAPR_BYPASS_ICID_MASK); + + /* + * Tell MC where the DPL blob was loaded in DDR, by indicating + * its offset relative to the beginning of the DDR block + * allocated to the MC firmware. The MC firmware is responsible + * for checking that there is no overlap between the DPL blob + * and the runtime heap and stack of the MC firmware itself. + * + * NOTE: bits [31:2] of this offset need to be stored in bits [29:0] of + * the GSR MC CCSR register. So, this offset is assumed to be 4-byte + * aligned. + * Care must be taken not to write 1s into bits 31 and 30 of the GSR in + * this case as the SoC COP or PIC will be signaled. + */ + out_le32(&mc_ccsr_regs->reg_gsr, (u32)(mc_dpl_offset >> 2)); + + /* + * Deassert reset and release MC core 0 to run + */ + out_le32(&mc_ccsr_regs->reg_gcr1, GCR1_P1_DE_RST | GCR1_M_ALL_DE_RST); + dmb(); + debug("Polling mc_ccsr_regs->reg_gsr ...\n"); + + for (;;) { + reg_gsr = in_le32(&mc_ccsr_regs->reg_gsr); + mc_fw_boot_status = (reg_gsr & GSR_FS_MASK); + if (mc_fw_boot_status & 0x1) + break; + + udelay(1000); /* throttle polling */ + if (timeout-- <= 0) + break; + } + + if (timeout <= 0) { + printf("%s: timeout booting management complex firmware\n", + __func__); + + /* TODO: Get an error status from an MC CCSR register */ + error = -ETIMEDOUT; + goto out; + } + + printf("Management complex booted (boot status: %#x)\n", + mc_fw_boot_status); + + if (mc_fw_boot_status != 0x1) { + /* + * TODO: Identify critical errors from the GSR register's FS + * field and for those errors, set error to -ENODEV or other + * appropriate errno, so that the status property is set to + * failure in the fsl,dprc device tree node. + */ + } + +out: + if (error != 0) + mc_boot_status = -error; + else + mc_boot_status = 0; + + return error; +} + +int get_mc_boot_status(void) +{ + return mc_boot_status; +} + +/** + * Return the actual size of the MC private DRAM block. + * + * NOTE: For now this function always returns the minimum required size, + * However, in the future, the actual size may be obtained from an environment + * variable. + */ +unsigned long mc_get_dram_block_size(void) +{ + return CONFIG_SYS_LS_MC_DRAM_BLOCK_MIN_SIZE; +} diff --git a/include/fdt_support.h b/include/fdt_support.h index ae010bb..f928306 100644 --- a/include/fdt_support.h +++ b/include/fdt_support.h @@ -113,17 +113,25 @@ static inline int fdt_status_disabled(void *fdt, int nodeoffset) { return fdt_set_node_status(fdt, nodeoffset, FDT_STATUS_DISABLED, 0); } +static inline int fdt_status_fail(void *fdt, int nodeoffset) +{ + return fdt_set_node_status(fdt, nodeoffset, FDT_STATUS_FAIL, 0); +}
-int fdt_set_status_by_alias(void *fdt, const char* alias, +int fdt_set_status_by_alias(void *fdt, const char *alias, enum fdt_status status, unsigned int error_code); -static inline int fdt_status_okay_by_alias(void *fdt, const char* alias) +static inline int fdt_status_okay_by_alias(void *fdt, const char *alias) { return fdt_set_status_by_alias(fdt, alias, FDT_STATUS_OKAY, 0); } -static inline int fdt_status_disabled_by_alias(void *fdt, const char* alias) +static inline int fdt_status_disabled_by_alias(void *fdt, const char *alias) { return fdt_set_status_by_alias(fdt, alias, FDT_STATUS_DISABLED, 0); } +static inline int fdt_status_fail_by_alias(void *fdt, const char *alias) +{ + return fdt_set_status_by_alias(fdt, alias, FDT_STATUS_FAIL, 0); +}
#endif /* ifdef CONFIG_OF_LIBFDT */
diff --git a/include/fsl_mc.h b/include/fsl_mc.h new file mode 100644 index 0000000..b9f089e --- /dev/null +++ b/include/fsl_mc.h @@ -0,0 +1,59 @@ +/* + * Copyright (C) 2014 Freescale Semiconductor + * + * SPDX-License-Identifier: GPL-2.0+ + */ + +#ifndef __FSL_MC_H__ +#define __FSL_MC_H__ + +#include <common.h> + +#define MC_CCSR_BASE_ADDR \ + ((struct mc_ccsr_registers __iomem *)0x8340000) + +#define BIT(x) (1 << (x)) +#define GCR1_P1_STOP BIT(31) +#define GCR1_P2_STOP BIT(30) +#define GCR1_P1_DE_RST BIT(23) +#define GCR1_P2_DE_RST BIT(22) +#define GCR1_M1_DE_RST BIT(15) +#define GCR1_M2_DE_RST BIT(14) +#define GCR1_M_ALL_DE_RST (GCR1_M1_DE_RST | GCR1_M2_DE_RST) +#define GSR_FS_MASK 0x3fffffff +#define MCFAPR_PL_MASK (0x1 << 18) +#define MCFAPR_BMT_MASK (0x1 << 17) +#define MCFAPR_BYPASS_ICID_MASK \ + (MCFAPR_PL_MASK | MCFAPR_BMT_MASK) + +#define SOC_MC_PORTALS_BASE_ADDR ((void __iomem *)0x00080C000000) +#define SOC_MC_PORTAL_STRIDE 0x10000 + +#define SOC_MC_PORTAL_ADDR(_portal_id) \ + ((void __iomem *)((uintptr_t)SOC_MC_PORTALS_BASE_ADDR + \ + (_portal_id) * SOC_MC_PORTAL_STRIDE)) + +struct mc_ccsr_registers { + u32 reg_gcr1; + u32 reserved1; + u32 reg_gsr; + u32 reserved2; + u32 reg_sicbalr; + u32 reg_sicbahr; + u32 reg_sicapr; + u32 reserved3; + u32 reg_mcfbalr; + u32 reg_mcfbahr; + u32 reg_mcfapr; + u32 reserved4[0x2f1]; + u32 reg_psr; + u32 reserved5; + u32 reg_brr[2]; + u32 reserved6[0x80]; + u32 reg_error[]; +}; + +int mc_init(bd_t *bis); + +int get_mc_boot_status(void); +#endif

LS2100A is an ARMv8 implementation. This adds board support for emulator and simulator: Two DDR controllers UART2 is used as the console IFC timing is tightened for speedy booting Support DDR3 and DDR4 as separated targets Management Complex (MC) is enabled
Signed-off-by: York Sun yorksun@freescale.com Signed-off-by: Arnab Basu arnab.basu@freescale.com Signed-off-by: J. German Rivera German.Rivera@freescale.com Signed-off-by: Bhupesh Sharma bhupesh.sharma@freescale.com --- Change log: v3: Add support for DDR4 target and simulator target Squash Manage complex patch (previous 5/5) into this Reserve last 512MB memory for MC use Change MC firmware location in NOR flash Fix UART clock source speed Update IFC address mux Use generic board Disable DDR memory beyound 39 physical address space due to Linux limitation
Some of these changes are caused by model changing.
board/freescale/ls2100a/Makefile | 8 ++ board/freescale/ls2100a/README | 16 +++ board/freescale/ls2100a/ddr.c | 175 ++++++++++++++++++++++++++++ board/freescale/ls2100a/ddr.h | 57 ++++++++++ board/freescale/ls2100a/ls2100a.c | 100 ++++++++++++++++ boards.cfg | 3 + include/configs/ls2100a_common.h | 226 +++++++++++++++++++++++++++++++++++++ include/configs/ls2100a_emu.h | 19 ++++ include/configs/ls2100a_simu.h | 16 +++ 9 files changed, 620 insertions(+) create mode 100644 board/freescale/ls2100a/Makefile create mode 100644 board/freescale/ls2100a/README create mode 100644 board/freescale/ls2100a/ddr.c create mode 100644 board/freescale/ls2100a/ddr.h create mode 100644 board/freescale/ls2100a/ls2100a.c create mode 100644 include/configs/ls2100a_common.h create mode 100644 include/configs/ls2100a_emu.h create mode 100644 include/configs/ls2100a_simu.h
diff --git a/board/freescale/ls2100a/Makefile b/board/freescale/ls2100a/Makefile new file mode 100644 index 0000000..c8da338 --- /dev/null +++ b/board/freescale/ls2100a/Makefile @@ -0,0 +1,8 @@ +# +# Copyright 2014 Freescale Semiconductor +# +# SPDX-License-Identifier: GPL-2.0+ +# + +obj-y += ls2100a.o +obj-y += ddr.o diff --git a/board/freescale/ls2100a/README b/board/freescale/ls2100a/README new file mode 100644 index 0000000..9a8a618 --- /dev/null +++ b/board/freescale/ls2100a/README @@ -0,0 +1,16 @@ +Freescale ls2100a_emu + +This is a emulator target with limited peripherals. + +Memory map from core's view + +0x00_0000_0000 .. 0x00_000F_FFFF Boot Rom +0x00_0100_0000 .. 0x00_0FFF_FFFF CCSR +0x00_1800_0000 .. 0x00_181F_FFFF OCRAM +0x00_3000_0000 .. 0x00_3FFF_FFFF IFC region #1 +0x00_8000_0000 .. 0x00_FFFF_FFFF DDR region #1 +0x05_1000_0000 .. 0x05_FFFF_FFFF IFC region #2 +0x80_8000_0000 .. 0xFF_FFFF_FFFF DDR region #2 + +Other addresses are either reserved, or not used directly by u-boot. +This list should be updated when more addresses are used. diff --git a/board/freescale/ls2100a/ddr.c b/board/freescale/ls2100a/ddr.c new file mode 100644 index 0000000..257bc16 --- /dev/null +++ b/board/freescale/ls2100a/ddr.c @@ -0,0 +1,175 @@ +/* + * Copyright 2014 Freescale Semiconductor, Inc. + * + * SPDX-License-Identifier: GPL-2.0+ + */ + +#include <common.h> +#include <fsl_ddr_sdram.h> +#include <fsl_ddr_dimm_params.h> +#include "ddr.h" + +DECLARE_GLOBAL_DATA_PTR; + +void fsl_ddr_board_options(memctl_options_t *popts, + dimm_params_t *pdimm, + unsigned int ctrl_num) +{ + const struct board_specific_parameters *pbsp, *pbsp_highest = NULL; + ulong ddr_freq; + + if (ctrl_num > 3) { + printf("Not supported controller number %d\n", ctrl_num); + return; + } + if (!pdimm->n_ranks) + return; + + /* + * we use identical timing for all slots. If needed, change the code + * to pbsp = rdimms[ctrl_num] or pbsp = udimms[ctrl_num]; + */ + if (popts->registered_dimm_en) + pbsp = rdimms[0]; + else + pbsp = udimms[0]; + + + /* Get clk_adjust, wrlvl_start, wrlvl_ctl, according to the board ddr + * freqency and n_banks specified in board_specific_parameters table. + */ + ddr_freq = get_ddr_freq(0) / 1000000; + while (pbsp->datarate_mhz_high) { + if (pbsp->n_ranks == pdimm->n_ranks && + (pdimm->rank_density >> 30) >= pbsp->rank_gb) { + if (ddr_freq <= pbsp->datarate_mhz_high) { + popts->clk_adjust = pbsp->clk_adjust; + popts->wrlvl_start = pbsp->wrlvl_start; + popts->wrlvl_ctl_2 = pbsp->wrlvl_ctl_2; + popts->wrlvl_ctl_3 = pbsp->wrlvl_ctl_3; + goto found; + } + pbsp_highest = pbsp; + } + pbsp++; + } + + if (pbsp_highest) { + printf("Error: board specific timing not found for data rate %lu MT/s\n" + "Trying to use the highest speed (%u) parameters\n", + ddr_freq, pbsp_highest->datarate_mhz_high); + popts->clk_adjust = pbsp_highest->clk_adjust; + popts->wrlvl_start = pbsp_highest->wrlvl_start; + popts->wrlvl_ctl_2 = pbsp->wrlvl_ctl_2; + popts->wrlvl_ctl_3 = pbsp->wrlvl_ctl_3; + } else { + panic("DIMM is not supported by this board"); + } +found: + debug("Found timing match: n_ranks %d, data rate %d, rank_gb %d\n" + "\tclk_adjust %d, wrlvl_start %d, wrlvl_ctrl_2 0x%x, wrlvl_ctrl_3 0x%x\n", + pbsp->n_ranks, pbsp->datarate_mhz_high, pbsp->rank_gb, + pbsp->clk_adjust, pbsp->wrlvl_start, pbsp->wrlvl_ctl_2, + pbsp->wrlvl_ctl_3); + + /* + * Factors to consider for half-strength driver enable: + * - number of DIMMs installed + */ + popts->half_strength_driver_enable = 1; + /* + * Write leveling override + */ + popts->wrlvl_override = 1; + popts->wrlvl_sample = 0xf; + + /* + * Rtt and Rtt_WR override + */ + popts->rtt_override = 0; + + /* Enable ZQ calibration */ + popts->zq_en = 1; + +#ifdef CONFIG_SYS_FSL_DDR4 + popts->ddr_cdr1 = DDR_CDR1_DHC_EN | DDR_CDR1_ODT(DDR_CDR_ODT_80ohm); + popts->ddr_cdr2 = DDR_CDR2_ODT(DDR_CDR_ODT_80ohm) | + DDR_CDR2_VREF_OVRD(70); /* Vref = 70% */ +#else + /* DHC_EN =1, ODT = 75 Ohm */ + popts->ddr_cdr1 = DDR_CDR1_DHC_EN | DDR_CDR1_ODT(DDR_CDR_ODT_75ohm); + popts->ddr_cdr2 = DDR_CDR2_ODT(DDR_CDR_ODT_75ohm); +#endif +} + +#ifdef CONFIG_SYS_DDR_RAW_TIMING +dimm_params_t ddr_raw_timing = { + .n_ranks = 2, + .rank_density = 1073741824u, + .capacity = 2147483648, + .primary_sdram_width = 64, + .ec_sdram_width = 0, + .registered_dimm = 0, + .mirrored_dimm = 0, + .n_row_addr = 14, + .n_col_addr = 10, + .n_banks_per_sdram_device = 8, + .edc_config = 0, + .burst_lengths_bitmask = 0x0c, + + .tckmin_x_ps = 937, + .caslat_x = 0x6FC << 4, /* 14,13,11,10,9,8,7,6 */ + .taa_ps = 13090, + .twr_ps = 15000, + .trcd_ps = 13090, + .trrd_ps = 5000, + .trp_ps = 13090, + .tras_ps = 33000, + .trc_ps = 46090, + .trfc_ps = 160000, + .twtr_ps = 7500, + .trtp_ps = 7500, + .refresh_rate_ps = 7800000, + .tfaw_ps = 25000, +}; + +int fsl_ddr_get_dimm_params(dimm_params_t *pdimm, + unsigned int controller_number, + unsigned int dimm_number) +{ + const char dimm_model[] = "Fixed DDR on board"; + + if (((controller_number == 0) && (dimm_number == 0)) || + ((controller_number == 1) && (dimm_number == 0))) { + memcpy(pdimm, &ddr_raw_timing, sizeof(dimm_params_t)); + memset(pdimm->mpart, 0, sizeof(pdimm->mpart)); + memcpy(pdimm->mpart, dimm_model, sizeof(dimm_model) - 1); + } + + return 0; +} +#endif +phys_size_t initdram(int board_type) +{ + phys_size_t dram_size; + + puts("Initializing DDR...."); + + puts("using SPD\n"); + dram_size = fsl_ddr_sdram(); + + return dram_size; +} + +void dram_init_banksize(void) +{ + gd->bd->bi_dram[0].start = CONFIG_SYS_SDRAM_BASE; + if (gd->ram_size > CONFIG_SYS_LS2_DDR_BLOCK1_SIZE) { + gd->bd->bi_dram[0].size = CONFIG_SYS_LS2_DDR_BLOCK1_SIZE; + gd->bd->bi_dram[1].start = CONFIG_SYS_DDR_BLOCK2_BASE; + gd->bd->bi_dram[1].size = gd->ram_size - + CONFIG_SYS_LS2_DDR_BLOCK1_SIZE; + } else { + gd->bd->bi_dram[0].size = gd->ram_size; + } +} diff --git a/board/freescale/ls2100a/ddr.h b/board/freescale/ls2100a/ddr.h new file mode 100644 index 0000000..77f6aaf --- /dev/null +++ b/board/freescale/ls2100a/ddr.h @@ -0,0 +1,57 @@ +/* + * Copyright 2014 Freescale Semiconductor, Inc. + * + * SPDX-License-Identifier: GPL-2.0+ + */ + +#ifndef __DDR_H__ +#define __DDR_H__ +struct board_specific_parameters { + u32 n_ranks; + u32 datarate_mhz_high; + u32 rank_gb; + u32 clk_adjust; + u32 wrlvl_start; + u32 wrlvl_ctl_2; + u32 wrlvl_ctl_3; +}; + +/* + * These tables contain all valid speeds we want to override with board + * specific parameters. datarate_mhz_high values need to be in ascending order + * for each n_ranks group. + */ + +static const struct board_specific_parameters udimm0[] = { + /* + * memory controller 0 + * num| hi| rank| clk| wrlvl | wrlvl | wrlvl + * ranks| mhz| GB |adjst| start | ctl2 | ctl3 + */ + {2, 2140, 0, 4, 4, 0x0, 0x0}, + {1, 2140, 0, 4, 4, 0x0, 0x0}, + {} +}; + +static const struct board_specific_parameters rdimm0[] = { + /* + * memory controller 0 + * num| hi| rank| clk| wrlvl | wrlvl | wrlvl + * ranks| mhz| GB |adjst| start | ctl2 | ctl3 + */ + {4, 2140, 0, 5, 4, 0x0, 0x0}, + {2, 2140, 0, 5, 4, 0x0, 0x0}, + {1, 2140, 0, 4, 4, 0x0, 0x0}, + {} +}; + +static const struct board_specific_parameters *udimms[] = { + udimm0, +}; + +static const struct board_specific_parameters *rdimms[] = { + rdimm0, +}; + + +#endif diff --git a/board/freescale/ls2100a/ls2100a.c b/board/freescale/ls2100a/ls2100a.c new file mode 100644 index 0000000..a18db1d --- /dev/null +++ b/board/freescale/ls2100a/ls2100a.c @@ -0,0 +1,100 @@ +/* + * Copyright 2014 Freescale Semiconductor + * + * SPDX-License-Identifier: GPL-2.0+ + */ +#include <common.h> +#include <malloc.h> +#include <errno.h> +#include <netdev.h> +#include <fsl_ifc.h> +#include <fsl_ddr.h> +#include <asm/io.h> +#include <fdt_support.h> +#include <libfdt.h> +#include <fsl_mc.h> + +DECLARE_GLOBAL_DATA_PTR; + +int board_init(void) +{ + init_final_memctl_regs(); + return 0; +} + +int board_early_init_f(void) +{ + init_early_memctl_regs(); /* tighten IFC timing */ + + return 0; +} + +int dram_init(void) +{ + printf("DRAM: "); + gd->ram_size = initdram(0); + + return 0; +} + +int timer_init(void) +{ + u32 __iomem *cntcr = (u32 *)CONFIG_SYS_FSL_TIMER_ADDR; + u32 __iomem *cltbenr = (u32 *)CONFIG_SYS_FSL_PMU_CLTBENR; + + out_le32(cltbenr, 0x1); /* enable cluster0 timebase */ + out_le32(cntcr, 0x1); /* enable clock for timer */ + + return 0; +} + +/* + * Board specific reset that is system reset. + */ +void reset_cpu(ulong addr) +{ +} + +int board_eth_init(bd_t *bis) +{ + int error = 0; + +#ifdef CONFIG_SMC91111 + error = smc91111_initialize(0, CONFIG_SMC91111_BASE); +#endif + +#ifdef CONFIG_FSL_MC_ENET + error = cpu_eth_init(bis); +#endif + return error; +} + +#ifdef CONFIG_FSL_MC_ENET +void fdt_fixup_board_enet(void *fdt) +{ + int offset; + + offset = fdt_path_offset(fdt, "/fsl,dprc@0"); + if (get_mc_boot_status() == 0) + fdt_status_okay(fdt, offset); + else + fdt_status_fail(fdt, offset); +} +#endif + +#ifdef CONFIG_OF_BOARD_SETUP +void ft_board_setup(void *blob, bd_t *bd) +{ + phys_addr_t base; + phys_size_t size; + + /* limit the memory size to bank 1 until Linux can handle 40-bit PA */ + base = getenv_bootm_low(); + size = getenv_bootm_size(); + fdt_fixup_memory(blob, (u64)base, (u64)size); + +#ifdef CONFIG_FSL_MC_ENET + fdt_fixup_board_enet(blob); +#endif +} +#endif diff --git a/boards.cfg b/boards.cfg index 8b1177c..6670a4c 100644 --- a/boards.cfg +++ b/boards.cfg @@ -44,6 +44,9 @@ ###########################################################################################################
Active aarch64 armv8 - armltd vexpress64 vexpress_aemv8a vexpress_aemv8a:ARM64 David Feng fenghua@phytium.com.cn +Active aarch64 armv8 fsl-lsch3 freescale ls2100a ls2100a_emu ls2100a_emu:ARM64,EMU York Sun yorksun@freescale.com +Active aarch64 armv8 fsl-lsch3 freescale ls2100a ls2100a_emu_D4 ls2100a_emu:ARM64,EMU,SYS_FSL_DDR4 York Sun yorksun@freescale.com +Active aarch64 armv8 fsl-lsch3 freescale ls2100a ls2100a_simu ls2100a_simu:ARM64,SIMU York Sun yorksun@freescale.com Active arc arc700 - synopsys - axs101 - Alexey Brodkin abrodkin@synopsys.com Active arc arc700 - synopsys <none> arcangel4 - Alexey Brodkin abrodkin@synopsys.com Active arc arc700 - synopsys <none> arcangel4-be - Alexey Brodkin abrodkin@synopsys.com diff --git a/include/configs/ls2100a_common.h b/include/configs/ls2100a_common.h new file mode 100644 index 0000000..f9f904e --- /dev/null +++ b/include/configs/ls2100a_common.h @@ -0,0 +1,226 @@ +/* + * Copyright (C) 2014 Freescale Semiconductor + * + * SPDX-License-Identifier: GPL-2.0+ + */ + +#ifndef __LS2_COMMON_H +#define __LS2_COMMON_H + +#define CONFIG_SYS_GENERIC_BOARD + +#define CONFIG_REMAKE_ELF +#define CONFIG_FSL_LSCH3 +#define CONFIG_LS2100A +#define CONFIG_GICV3 + +/* Link Definitions */ +#define CONFIG_SYS_TEXT_BASE 0x30000000 + +#define CONFIG_SYS_NO_FLASH + +#define CONFIG_SUPPORT_RAW_INITRD + +#define CONFIG_SKIP_LOWLEVEL_INIT +#define CONFIG_BOARD_EARLY_INIT_F 1 + +#define CONFIG_IDENT_STRING " LS2100A-EMU" +#define CONFIG_BOOTP_VCI_STRING "U-boot.LS2100A-EMU" + +/* Flat Device Tree Definitions */ +#define CONFIG_OF_LIBFDT +#define CONFIG_OF_BOARD_SETUP + +/* new uImage format support */ +#define CONFIG_FIT +#define CONFIG_FIT_VERBOSE /* enable fit_format_{error,warning}() */ + +#define CONFIG_FSL_DDR_INTERACTIVE /* Interactive debugging */ +#ifndef CONFIG_SYS_FSL_DDR4 +#define CONFIG_SYS_FSL_DDR3 /* Use DDR3 memory */ +#define CONFIG_SYS_DDR_RAW_TIMING +#endif +#define CONFIG_DIMM_SLOTS_PER_CTLR 1 +#define CONFIG_CHIP_SELECTS_PER_CTRL 4 + +#define CONFIG_SYS_FSL_DDR_INTLV_256B /* force 256 byte interleaving */ + +/* SMP Definitions */ +#define CPU_RELEASE_ADDR CONFIG_SYS_INIT_SP_ADDR + +#define CONFIG_SYS_DDR_SDRAM_BASE 0x80000000UL +#define CONFIG_SYS_FSL_DDR_SDRAM_BASE_PHY 0 +#define CONFIG_SYS_SDRAM_BASE CONFIG_SYS_DDR_SDRAM_BASE +#define CONFIG_SYS_DDR_BLOCK2_BASE 0x8080000000ULL + +/* Generic Timer Definitions */ +#define COUNTER_FREQUENCY 12000000 /* 12MHz */ + +/* Size of malloc() pool */ +#define CONFIG_SYS_MALLOC_LEN (CONFIG_ENV_SIZE + 128 * 1024) + +/* I2C */ +#define CONFIG_CMD_I2C +#define CONFIG_SYS_I2C +#define CONFIG_SYS_I2C_MXC +#define CONFIG_SYS_MXC_I2C1_SPEED 40000000 +#define CONFIG_SYS_MXC_I2C2_SPEED 40000000 + +/* Serial Port */ +#define CONFIG_CONS_INDEX 2 +#define CONFIG_SYS_NS16550 +#define CONFIG_SYS_NS16550_SERIAL +#define CONFIG_SYS_NS16550_REG_SIZE 1 +#define CONFIG_SYS_NS16550_CLK (get_bus_freq(0)/2) + +#define CONFIG_BAUDRATE 115200 +#define CONFIG_SYS_BAUDRATE_TABLE { 9600, 19200, 38400, 57600, 115200 } + +/* IFC */ +#define CONFIG_FSL_IFC +#define CONFIG_SYS_NOR0_CSPR_EXT (0x0) +#define CONFIG_SYS_NOR_AMASK IFC_AMASK(128*1024*1024) +/* + * During booting, CS0 needs to be at the region of 0x30000000, i.e. the IFC + * address 0. But this region is limited to 256MB. To accommodate bigger NOR + * flash and other devices, we will map CS0 to 0x580000000 after relocation. + * CONFIG_SYS_FLASH_BASE has the final address (core view) + * CONFIG_SYS_FLASH_BASE_PHYS has the final address (IFC view) + * CONFIG_SYS_FLASH_BASE_PHYS_EARLY has the temporary IFC address + * CONFIG_SYS_TEXT_BASE is linked to 0x30000000 for booting + */ +#define CONFIG_SYS_FLASH_BASE 0x580000000ULL +#define CONFIG_SYS_FLASH_BASE_PHYS 0x80000000 +#define CONFIG_SYS_FLASH_BASE_PHYS_EARLY 0x00000000 + +/* + * NOR Flash Timing Params + */ +#define CONFIG_SYS_NOR0_CSPR \ + (CSPR_PHYS_ADDR(CONFIG_SYS_FLASH_BASE_PHYS) | \ + CSPR_PORT_SIZE_16 | \ + CSPR_MSEL_NOR | \ + CSPR_V) +#define CONFIG_SYS_NOR0_CSPR_EARLY \ + (CSPR_PHYS_ADDR(CONFIG_SYS_FLASH_BASE_PHYS_EARLY) | \ + CSPR_PORT_SIZE_16 | \ + CSPR_MSEL_NOR | \ + CSPR_V) +#define CONFIG_SYS_NOR_CSOR CSOR_NOR_ADM_SHIFT(12) +#define CONFIG_SYS_NOR_FTIM0 (FTIM0_NOR_TACSE(0x1) | \ + FTIM0_NOR_TEADC(0x1) | \ + FTIM0_NOR_TEAHC(0x1)) +#define CONFIG_SYS_NOR_FTIM1 (FTIM1_NOR_TACO(0x1) | \ + FTIM1_NOR_TRAD_NOR(0x1)) +#define CONFIG_SYS_NOR_FTIM2 (FTIM2_NOR_TCS(0x0) | \ + FTIM2_NOR_TCH(0x0) | \ + FTIM2_NOR_TWP(0x1)) +#define CONFIG_SYS_NOR_FTIM3 0x04000000 +#define CONFIG_SYS_IFC_CCR 0x01000000 + +#define CONFIG_SYS_CSPR0_EXT CONFIG_SYS_NOR0_CSPR_EXT +#define CONFIG_SYS_CSPR0 CONFIG_SYS_NOR0_CSPR_EARLY +#define CONFIG_SYS_CSPR0_FINAL CONFIG_SYS_NOR0_CSPR +#define CONFIG_SYS_AMASK0 CONFIG_SYS_NOR_AMASK +#define CONFIG_SYS_CSOR0 CONFIG_SYS_NOR_CSOR +#define CONFIG_SYS_CS0_FTIM0 CONFIG_SYS_NOR_FTIM0 +#define CONFIG_SYS_CS0_FTIM1 CONFIG_SYS_NOR_FTIM1 +#define CONFIG_SYS_CS0_FTIM2 CONFIG_SYS_NOR_FTIM2 +#define CONFIG_SYS_CS0_FTIM3 CONFIG_SYS_NOR_FTIM3 + +/* MC firmware */ +#define CONFIG_FSL_MC_ENET +#define CONFIG_SYS_LS_MC_DRAM_BLOCK_MIN_SIZE (512UL * 1024 * 1024) +#define CONFIG_SYS_LS_MC_FW_IN_NOR +#define CONFIG_SYS_LS_MC_FW_ADDR 0x580200000ULL +/* TODO Actual FW length needs to be determined at runtime from FW header */ +#define CONFIG_SYS_LS_MC_FW_LENGTH (4U * 1024 * 1024) +#define CONFIG_SYS_LS_MC_DPL_IN_NOR +#define CONFIG_SYS_LS_MC_DPL_ADDR 0x5806C0000ULL +/* TODO Actual DPL max length needs to be confirmed with the MC FW team */ +#define CONFIG_SYS_LS_MC_DPL_LENGTH 4096 +#define CONFIG_SYS_LS_MC_DRAM_DPL_OFFSET 0xe00000 + +/* Carve the MC private DRAM block from the end of DRAM */ +#ifdef CONFIG_FSL_MC_ENET +#define CONFIG_SYS_MEM_TOP_HIDE mc_get_dram_block_size() +#endif + +/* Command line configuration */ +#define CONFIG_CMD_CACHE +#define CONFIG_CMD_BDI +#define CONFIG_CMD_DHCP +#define CONFIG_CMD_ENV +#define CONFIG_CMD_FLASH +#define CONFIG_CMD_IMI +#define CONFIG_CMD_MEMORY +#define CONFIG_CMD_MII +#define CONFIG_CMD_NET +#define CONFIG_CMD_PING +#define CONFIG_CMD_SAVEENV +#define CONFIG_CMD_RUN +#define CONFIG_CMD_BOOTD +#define CONFIG_CMD_ECHO +#define CONFIG_CMD_SOURCE +#define CONFIG_CMD_FAT +#define CONFIG_DOS_PARTITION + +/* Miscellaneous configurable options */ +#define CONFIG_SYS_LOAD_ADDR (CONFIG_SYS_DDR_SDRAM_BASE + 0x10000000) + +/* Physical Memory Map */ +/* fixme: these need to be checked against the board */ +#define CONFIG_CHIP_SELECTS_PER_CTRL 4 +#define CONFIG_SYS_CLK_FREQ 133333333 + + +#define CONFIG_NR_DRAM_BANKS 2 + +#define CONFIG_SYS_HZ 1000 + +#define CONFIG_HWCONFIG +#define HWCONFIG_BUFFER_SIZE 128 + +#define CONFIG_DISPLAY_CPUINFO + +/* Initial environment variables */ +#define CONFIG_EXTRA_ENV_SETTINGS \ + "hwconfig=fsl_ddr:bank_intlv=auto\0" \ + "loadaddr=0x80100000\0" \ + "kernel_addr=0x100000\0" \ + "ramdisk_addr=0x800000\0" \ + "ramdisk_size=0x2000000\0" \ + "fdt_high=0xffffffffffffffff\0" \ + "initrd_high=0xffffffffffffffff\0" \ + "kernel_start=0x581200000\0" \ + "kernel_load=0x806f0000\0" \ + "kernel_size=0x1000000\0" \ + "console=ttyAMA0,38400n8\0" + +#define CONFIG_BOOTARGS "console=ttyS1,115200 root=/dev/ram0 " \ + "earlyprintk=uart8250-8bit,0x21c0600" +#define CONFIG_BOOTCOMMAND "cp.b $kernel_start $kernel_load " \ + "$kernel_size && bootm $kernel_load" +#define CONFIG_BOOTDELAY 1 + +/* Store environment at top of flash */ +#define CONFIG_ENV_IS_NOWHERE 1 +#define CONFIG_ENV_SIZE 0x1000 + +/* Monitor Command Prompt */ +#define CONFIG_SYS_CBSIZE 512 /* Console I/O Buffer Size */ +#define CONFIG_SYS_PROMPT "> " +#define CONFIG_SYS_PBSIZE (CONFIG_SYS_CBSIZE + \ + sizeof(CONFIG_SYS_PROMPT) + 16) +#define CONFIG_SYS_HUSH_PARSER +#define CONFIG_SYS_PROMPT_HUSH_PS2 "> " +#define CONFIG_SYS_BARGSIZE CONFIG_SYS_CBSIZE /* Boot args buffer */ +#define CONFIG_SYS_LONGHELP +#define CONFIG_CMDLINE_EDITING 1 +#define CONFIG_SYS_MAXARGS 64 /* max command args */ + +#ifndef __ASSEMBLY__ +unsigned long mc_get_dram_block_size(void); +#endif + +#endif /* __LS2_COMMON_H */ diff --git a/include/configs/ls2100a_emu.h b/include/configs/ls2100a_emu.h new file mode 100644 index 0000000..3bcb5a0 --- /dev/null +++ b/include/configs/ls2100a_emu.h @@ -0,0 +1,19 @@ +/* + * Copyright 2014 Freescale Semiconductor + * + * SPDX-License-Identifier: GPL-2.0+ + */ + +#ifndef __LS2_EMU_H +#define __LS2_EMU_H + +#include "ls2100a_common.h" + +#define CONFIG_DDR_SPD +#define CONFIG_SYS_FSL_DDR_EMU /* Support emulator */ +#define SPD_EEPROM_ADDRESS1 0x51 +#define SPD_EEPROM_ADDRESS2 0x52 +#define SPD_EEPROM_ADDRESS SPD_EEPROM_ADDRESS1 +#define CONFIG_SYS_SPD_BUS_NUM 1 /* SPD on I2C bus 1 */ + +#endif /* __LS2_EMU_H */ diff --git a/include/configs/ls2100a_simu.h b/include/configs/ls2100a_simu.h new file mode 100644 index 0000000..53e1982 --- /dev/null +++ b/include/configs/ls2100a_simu.h @@ -0,0 +1,16 @@ +/* + * Copyright (C) 2014 Freescale Semiconductor + * + * SPDX-License-Identifier: GPL-2.0+ + */ + +#ifndef __LS2_SIMU_H +#define __LS2_SIMU_H + +#include "ls2100a_common.h" + +/* SMSC 91C111 ethernet configuration */ +#define CONFIG_SMC91111 +#define CONFIG_SMC91111_BASE (0x2210000) + +#endif /* __LS2_SIMU_H */
participants (3)
-
Mark Rutland
-
Rob Herring
-
York Sun