[U-Boot] [Patch v2 1/5] fdt_support: Move of_read_number to fdt_support.h

From: Arnab Basu arnab.basu@freescale.com
This is being done so that it can be used outside 'fdt_support.c'. Making life more convenient when reading device node properties that can be 32 or 64 bits long.
Signed-off-by: Arnab Basu arnab.basu@freescale.com Cc: Scott Wood scottwood@freescale.com --- Change log v2: new patch
common/fdt_support.c | 9 --------- include/fdt_support.h | 9 +++++++++ 2 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/common/fdt_support.c b/common/fdt_support.c index 784a570..7ec7d4f 100644 --- a/common/fdt_support.c +++ b/common/fdt_support.c @@ -930,15 +930,6 @@ void fdt_del_node_and_alias(void *blob, const char *alias) fdt_delprop(blob, off, alias); }
-/* Helper to read a big number; size is in cells (not bytes) */ -static inline u64 of_read_number(const fdt32_t *cell, int size) -{ - u64 r = 0; - while (size--) - r = (r << 32) | fdt32_to_cpu(*(cell++)); - return r; -} - #define PRu64 "%llx"
/* Max address size we deal with */ diff --git a/include/fdt_support.h b/include/fdt_support.h index 1bda686..649fbd6 100644 --- a/include/fdt_support.h +++ b/include/fdt_support.h @@ -133,6 +133,15 @@ static inline int fdt_status_fail_by_alias(void *fdt, const char *alias) return fdt_set_status_by_alias(fdt, alias, FDT_STATUS_FAIL, 0); }
+/* Helper to read a big number; size is in cells (not bytes) */ +static inline u64 of_read_number(const fdt32_t *cell, int size) +{ + u64 r = 0; + while (size--) + r = (r << 32) | fdt32_to_cpu(*(cell++)); + return r; +} + #endif /* ifdef CONFIG_OF_LIBFDT */
#ifdef USE_HOSTCC

From: Arnab Basu arnab.basu@freescale.com
of_bus_default_count_cells can be used to get the #address-cells and #size-cells defined by the current node's parent node. This is required when using of_read_number to read from FDT nodes that can be 32 or 64 bytes depending on values defined by the parent.
Signed-off-by: Arnab Basu arnab.basu@freescale.com CC: Scott Wood scottwood@freescale.com --- Change log v2: new patch
common/fdt_support.c | 2 +- include/fdt_support.h | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-)
diff --git a/common/fdt_support.c b/common/fdt_support.c index 7ec7d4f..3f64156 100644 --- a/common/fdt_support.c +++ b/common/fdt_support.c @@ -963,7 +963,7 @@ struct of_bus { };
/* Default translator (generic bus) */ -static void of_bus_default_count_cells(void *blob, int parentoffset, +void of_bus_default_count_cells(void *blob, int parentoffset, int *addrc, int *sizec) { const fdt32_t *prop; diff --git a/include/fdt_support.h b/include/fdt_support.h index 649fbd6..c3d1fbc 100644 --- a/include/fdt_support.h +++ b/include/fdt_support.h @@ -142,6 +142,9 @@ static inline u64 of_read_number(const fdt32_t *cell, int size) return r; }
+void of_bus_default_count_cells(void *blob, int parentoffset, + int *addrc, int *sizec); + #endif /* ifdef CONFIG_OF_LIBFDT */
#ifdef USE_HOSTCC

Secondary cores need to be released from holdoff by boot release registers. With GPP bootrom, they can boot from main memory directly. Individual spin table is used for each core. If a single release address is needed, defining macro CONFIG_FSL_SMP_RELEASE_ALL will use the CPU_RELEASE_ADDR. Spin table and the boot page is reserved in device tree so OS won't overwrite.
Signed-off-by: York Sun yorksun@freescale.com Signed-off-by: Arnab Basu arnab.basu@freescale.com --- v2: Removed copying boot page. Use u-boot image as is in memory. Added dealing with different size of addr_cell in device tree. Added dealing with big- and little-endian. Added flushing spin table after cpu_release().
arch/arm/cpu/armv8/fsl-lsch3/Makefile | 2 + arch/arm/cpu/armv8/fsl-lsch3/cpu.c | 13 ++ arch/arm/cpu/armv8/fsl-lsch3/cpu.h | 1 + arch/arm/cpu/armv8/fsl-lsch3/fdt.c | 56 +++++++ arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S | 128 ++++++++++++--- arch/arm/cpu/armv8/fsl-lsch3/mp.c | 172 +++++++++++++++++++++ arch/arm/cpu/armv8/fsl-lsch3/mp.h | 36 +++++ arch/arm/cpu/armv8/transition.S | 63 +------- arch/arm/include/asm/arch-fsl-lsch3/config.h | 3 +- arch/arm/include/asm/arch-fsl-lsch3/immap_lsch3.h | 35 +++++ arch/arm/include/asm/macro.h | 92 +++++++++++ arch/arm/lib/gic_64.S | 10 +- common/board_f.c | 2 +- 13 files changed, 525 insertions(+), 88 deletions(-) create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/fdt.c create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/mp.c create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/mp.h
diff --git a/arch/arm/cpu/armv8/fsl-lsch3/Makefile b/arch/arm/cpu/armv8/fsl-lsch3/Makefile index 9249537..f920eeb 100644 --- a/arch/arm/cpu/armv8/fsl-lsch3/Makefile +++ b/arch/arm/cpu/armv8/fsl-lsch3/Makefile @@ -7,3 +7,5 @@ obj-y += cpu.o obj-y += lowlevel.o obj-y += speed.o +obj-$(CONFIG_MP) += mp.o +obj-$(CONFIG_OF_LIBFDT) += fdt.o diff --git a/arch/arm/cpu/armv8/fsl-lsch3/cpu.c b/arch/arm/cpu/armv8/fsl-lsch3/cpu.c index c129d03..47b947f 100644 --- a/arch/arm/cpu/armv8/fsl-lsch3/cpu.c +++ b/arch/arm/cpu/armv8/fsl-lsch3/cpu.c @@ -11,6 +11,7 @@ #include <asm/io.h> #include <asm/arch-fsl-lsch3/immap_lsch3.h> #include "cpu.h" +#include "mp.h" #include "speed.h" #include <fsl_mc.h>
@@ -434,3 +435,15 @@ int cpu_eth_init(bd_t *bis) #endif return error; } + + +int arch_early_init_r(void) +{ + int rv; + rv = fsl_lsch3_wake_seconday_cores(); + + if (rv) + printf("Did not wake secondary cores\n"); + + return 0; +} diff --git a/arch/arm/cpu/armv8/fsl-lsch3/cpu.h b/arch/arm/cpu/armv8/fsl-lsch3/cpu.h index 28544d7..2e3312b 100644 --- a/arch/arm/cpu/armv8/fsl-lsch3/cpu.h +++ b/arch/arm/cpu/armv8/fsl-lsch3/cpu.h @@ -5,3 +5,4 @@ */
int fsl_qoriq_core_to_cluster(unsigned int core); +u32 cpu_mask(void); diff --git a/arch/arm/cpu/armv8/fsl-lsch3/fdt.c b/arch/arm/cpu/armv8/fsl-lsch3/fdt.c new file mode 100644 index 0000000..2dbcdcb --- /dev/null +++ b/arch/arm/cpu/armv8/fsl-lsch3/fdt.c @@ -0,0 +1,56 @@ +/* + * Copyright 2014 Freescale Semiconductor, Inc. + * + * SPDX-License-Identifier: GPL-2.0+ + */ + +#include <common.h> +#include <libfdt.h> +#include <fdt_support.h> +#include "mp.h" + +#ifdef CONFIG_MP +void ft_fixup_cpu(void *blob) +{ + int off; + __maybe_unused u64 spin_tbl_addr = (u64)get_spin_tbl_addr(); + fdt32_t *reg; + int addr_cells; + u64 val; + size_t *boot_code_size = &(__secondary_boot_code_size); + + off = fdt_node_offset_by_prop_value(blob, -1, "device_type", "cpus", 4); + of_bus_default_count_cells(blob, off, &addr_cells, NULL); + + off = fdt_node_offset_by_prop_value(blob, -1, "device_type", "cpu", 4); + while (off != -FDT_ERR_NOTFOUND) { + reg = (fdt32_t *)fdt_getprop(blob, off, "reg", 0); + if (reg) { + val = spin_tbl_addr; +#ifndef CONFIG_FSL_SMP_RELEASE_ALL + val += id_to_core(of_read_number(reg, addr_cells)) + * SPIN_TABLE_ELEM_SIZE; +#endif + val = cpu_to_fdt64(val); + fdt_setprop_string(blob, off, "enable-method", + "spin-table"); + fdt_setprop(blob, off, "cpu-release-addr", + &val, sizeof(val)); + } else { + puts("cpu NULL\n"); + } + off = fdt_node_offset_by_prop_value(blob, off, "device_type", + "cpu", 4); + } + + fdt_add_mem_rsv(blob, (uintptr_t)&secondary_boot_code, + *boot_code_size); +} +#endif + +void ft_cpu_setup(void *blob, bd_t *bd) +{ +#ifdef CONFIG_MP + ft_fixup_cpu(blob); +#endif +} diff --git a/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S b/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S index ad32b6c..629e6d4 100644 --- a/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S +++ b/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S @@ -8,7 +8,9 @@
#include <config.h> #include <linux/linkage.h> +#include <asm/gic.h> #include <asm/macro.h> +#include "mp.h"
ENTRY(lowlevel_init) mov x29, lr /* Save LR */ @@ -37,29 +39,119 @@ ENTRY(lowlevel_init)
branch_if_master x0, x1, 1f
+ ldr x0, =secondary_boot_func + blr x0 + +1: +2: + mov lr, x29 /* Restore LR */ + ret +ENDPROC(lowlevel_init) + + /* Keep literals not used by the secondary boot code outside it */ + .ltorg + + /* Using 64 bit alignment since the spin table is accessed as data */ + .align 4 + .global secondary_boot_code + /* Secondary Boot Code starts here */ +secondary_boot_code: + .global __spin_table +__spin_table: + .space CONFIG_MAX_CPUS*SPIN_TABLE_ELEM_SIZE + + .align 2 +ENTRY(secondary_boot_func) /* - * Slave should wait for master clearing spin table. - * This sync prevent salves observing incorrect - * value of spin table and jumping to wrong place. + * MPIDR_EL1 Fields: + * MPIDR[1:0] = AFF0_CPUID <- Core ID (0,1) + * MPIDR[7:2] = AFF0_RES + * MPIDR[15:8] = AFF1_CLUSTERID <- Cluster ID (0,1,2,3) + * MPIDR[23:16] = AFF2_CLUSTERID + * MPIDR[24] = MT + * MPIDR[29:25] = RES0 + * MPIDR[30] = U + * MPIDR[31] = ME + * MPIDR[39:32] = AFF3 + * + * Linear Processor ID (LPID) calculation from MPIDR_EL1: + * (We only use AFF0_CPUID and AFF1_CLUSTERID for now + * until AFF2_CLUSTERID and AFF3 have non-zero values) + * + * LPID = MPIDR[15:8] | MPIDR[1:0] */ -#if defined(CONFIG_GICV2) || defined(CONFIG_GICV3) -#ifdef CONFIG_GICV2 - ldr x0, =GICC_BASE + mrs x0, mpidr_el1 + ubfm x1, x0, #8, #15 + ubfm x2, x0, #0, #1 + orr x10, x2, x1, lsl #2 /* x10 has LPID */ + ubfm x9, x0, #0, #15 /* x9 contains MPIDR[15:0] */ + /* + * offset of the spin table element for this core from start of spin + * table (each elem is padded to 64 bytes) + */ + lsl x1, x10, #6 + ldr x0, =__spin_table + /* physical address of this cpus spin table element */ + add x11, x1, x0 + + str x9, [x11, #16] /* LPID */ + mov x4, #1 + str x4, [x11, #8] /* STATUS */ + dsb sy +#if defined(CONFIG_GICV3) + gic_wait_for_interrupt_m x0 #endif - bl gic_wait_for_interrupt + + bl secondary_switch_to_el2 +#ifdef CONFIG_ARMV8_SWITCH_TO_EL1 + secondary_switch_to_el1 #endif
- /* - * All processors will enter EL2 and optionally EL1. +slave_cpu: + wfe +#ifdef CONFIG_FSL_SMP_RELEASE_ALL + /* All cores are released from the address in the 1st spin table + * element */ - bl armv8_switch_to_el2 -#ifdef CONFIG_ARMV8_SWITCH_TO_EL1 - bl armv8_switch_to_el1 + ldr x1, =__spin_table + ldr x0, [x1] +#else + ldr x0, [x11] +#endif + cbz x0, slave_cpu +#ifndef CONFIG_ARMV8_SWITCH_TO_EL1 + mrs x1, sctlr_el2 +#else + mrs x1, sctlr_el1 #endif - b 2f + tbz x1, #25, cpu_is_le + rev x0, x0 /* BE to LE conversion */ +cpu_is_le: + br x0 /* branch to the given address */ +ENDPROC(secondary_boot_func)
-1: -2: - mov lr, x29 /* Restore LR */ - ret -ENDPROC(lowlevel_init) +ENTRY(secondary_switch_to_el2) + switch_el x0, 1f, 0f, 0f +0: ret +1: armv8_switch_to_el2_m x0 +ENDPROC(secondary_switch_to_el2) + +ENTRY(secondary_switch_to_el1) + switch_el x0, 0f, 1f, 0f +0: ret +1: armv8_switch_to_el1_m x0, x1 +ENDPROC(secondary_switch_to_el1) + + /* Ensure that the literals used by the secondary boot code are + * assembled within it (this is required so that we can protect + * this area with a single memreserve region + */ + .ltorg + + /* 64 bit alignment for elements accessed as data */ + .align 4 + .globl __secondary_boot_code_size + .type __secondary_boot_code_size, %object + /* Secondary Boot Code ends here */ +__secondary_boot_code_size: + .quad .-secondary_boot_code diff --git a/arch/arm/cpu/armv8/fsl-lsch3/mp.c b/arch/arm/cpu/armv8/fsl-lsch3/mp.c new file mode 100644 index 0000000..b29eda9 --- /dev/null +++ b/arch/arm/cpu/armv8/fsl-lsch3/mp.c @@ -0,0 +1,172 @@ +/* + * Copyright 2014 Freescale Semiconductor, Inc. + * + * SPDX-License-Identifier: GPL-2.0+ + */ + +#include <common.h> +#include <asm/io.h> +#include <asm/system.h> +#include <asm/io.h> +#include <asm/arch-fsl-lsch3/immap_lsch3.h> +#include "mp.h" + +DECLARE_GLOBAL_DATA_PTR; + +void *get_spin_tbl_addr(void) +{ + void *ptr = (void *)&__spin_table; + + return ptr; +} + +phys_addr_t determine_mp_bootpg(void) +{ + return (phys_addr_t)&secondary_boot_code; +} + +int fsl_lsch3_wake_seconday_cores(void) +{ + struct ccsr_gur __iomem *gur = (void *)(CONFIG_SYS_FSL_GUTS_ADDR); + struct ccsr_reset __iomem *rst = (void *)(CONFIG_SYS_FSL_RST_ADDR); + u32 cores, cpu_up_mask = 1; + int i, timeout = 10; + u64 *table = get_spin_tbl_addr(); + + cores = cpu_mask(); + /* Clear spin table so that secondary processors + * observe the correct value after waking up from wfe. + */ + memset(table, 0, CONFIG_MAX_CPUS*SPIN_TABLE_ELEM_SIZE); + flush_dcache_range((unsigned long)table, + (unsigned long)table + + (CONFIG_MAX_CPUS*SPIN_TABLE_ELEM_SIZE)); + + printf("Waking secondary cores to start from %lx\n", gd->relocaddr); + out_le32(&gur->bootlocptrh, (u32)(gd->relocaddr >> 32)); + out_le32(&gur->bootlocptrl, (u32)gd->relocaddr); + out_le32(&gur->scratchrw[6], 1); + asm volatile("dsb st" : : : "memory"); + rst->brrl = cores; + asm volatile("dsb st" : : : "memory"); + + /* This is needed as a precautionary measure. + * If some code before this has accidentally released the secondary + * cores then the pre-bootloader code will trap them in a "wfe" unless + * the scratchrw[6] is set. In this case we need a sev here to get these + * cores moving again. + */ + asm volatile("sev"); + + while (timeout--) { + flush_dcache_range((unsigned long)table, (unsigned long)table + + CONFIG_MAX_CPUS * 64); + for (i = 1; i < CONFIG_MAX_CPUS; i++) { + if (table[i * WORDS_PER_SPIN_TABLE_ENTRY + + SPIN_TABLE_ELEM_STATUS_IDX]) + cpu_up_mask |= 1 << i; + } + if (hweight32(cpu_up_mask) == hweight32(cores)) + break; + udelay(10); + } + if (timeout <= 0) { + printf("Not all cores (0x%x) are up (0x%x)\n", + cores, cpu_up_mask); + return 1; + } + printf("All (%d) cores are up.\n", hweight32(cores)); + + return 0; +} + +int is_core_valid(unsigned int core) +{ + return !!((1 << core) & cpu_mask()); +} + +int cpu_reset(int nr) +{ + puts("Feature is not implemented.\n"); + + return 0; +} + +int cpu_disable(int nr) +{ + puts("Feature is not implemented.\n"); + + return 0; +} + +int core_to_pos(int nr) +{ + u32 cores = cpu_mask(); + int i, count = 0; + + if (nr == 0) { + return 0; + } else if (nr >= hweight32(cores)) { + puts("Not a valid core number.\n"); + return -1; + } + + for (i = 1; i < 32; i++) { + if (is_core_valid(i)) { + count++; + if (count == nr) + break; + } + } + + return count; +} + +int cpu_status(int nr) +{ + u64 *table; + int pos; + + if (nr == 0) { + table = (u64 *)get_spin_tbl_addr(); + printf("table base @ 0x%p\n", table); + } else { + pos = core_to_pos(nr); + if (pos < 0) + return -1; + table = (u64 *)get_spin_tbl_addr() + pos * + WORDS_PER_SPIN_TABLE_ENTRY; + printf("table @ 0x%p\n", table); + printf(" addr - 0x%016llx\n", + table[SPIN_TABLE_ELEM_ENTRY_ADDR_IDX]); + printf(" status - 0x%016llx\n", + table[SPIN_TABLE_ELEM_STATUS_IDX]); + printf(" lpid - 0x%016llx\n", + table[SPIN_TABLE_ELEM_LPID_IDX]); + } + + return 0; +} + +int cpu_release(int nr, int argc, char * const argv[]) +{ + u64 boot_addr; + u64 *table = (u64 *)get_spin_tbl_addr(); +#ifndef CONFIG_FSL_SMP_RELEASE_ALL + int pos; + + pos = core_to_pos(nr); + if (pos <= 0) + return -1; + + table += pos * WORDS_PER_SPIN_TABLE_ENTRY; +#endif + boot_addr = simple_strtoull(argv[0], NULL, 16); + table[SPIN_TABLE_ELEM_ENTRY_ADDR_IDX] = boot_addr; + flush_dcache_range((unsigned long)table, + (unsigned long)table + SPIN_TABLE_ELEM_SIZE); + asm volatile("dsb st"); + smp_kick_all_cpus(); /* only those with entry addr set will run */ + + return 0; +} diff --git a/arch/arm/cpu/armv8/fsl-lsch3/mp.h b/arch/arm/cpu/armv8/fsl-lsch3/mp.h new file mode 100644 index 0000000..06ac0bc --- /dev/null +++ b/arch/arm/cpu/armv8/fsl-lsch3/mp.h @@ -0,0 +1,36 @@ +/* + * Copyright 2014, Freescale Semiconductor + * + * SPDX-License-Identifier: GPL-2.0+ + */ + +#ifndef _FSL_CH3_MP_H +#define _FSL_CH3_MP_H + +/* +* Each spin table element is defined as +* struct { +* uint64_t entry_addr; +* uint64_t status; +* uint64_t lpid; +* }; +* we pad this struct to 64 bytes so each entry is in its own cacheline +* the actual spin table is an array of these structures +*/ +#define SPIN_TABLE_ELEM_ENTRY_ADDR_IDX 0 +#define SPIN_TABLE_ELEM_STATUS_IDX 1 +#define SPIN_TABLE_ELEM_LPID_IDX 2 +#define WORDS_PER_SPIN_TABLE_ENTRY 8 /* pad to 64 bytes */ +#define SPIN_TABLE_ELEM_SIZE 64 + +#define id_to_core(x) ((x & 3) | (x >> 6)) +#ifndef __ASSEMBLY__ +extern u64 __spin_table[]; +extern u64 *secondary_boot_code; +extern size_t __secondary_boot_code_size; +int fsl_lsch3_wake_seconday_cores(void); +void *get_spin_tbl_addr(void); +phys_addr_t determine_mp_bootpg(void); +void secondary_boot_func(void); +#endif +#endif /* _FSL_CH3_MP_H */ diff --git a/arch/arm/cpu/armv8/transition.S b/arch/arm/cpu/armv8/transition.S index 38dea5c..ade1cde 100644 --- a/arch/arm/cpu/armv8/transition.S +++ b/arch/arm/cpu/armv8/transition.S @@ -14,70 +14,11 @@ ENTRY(armv8_switch_to_el2) switch_el x0, 1f, 0f, 0f 0: ret -1: - mov x0, #0x5b1 /* Non-secure EL0/EL1 | HVC | 64bit EL2 */ - msr scr_el3, x0 - msr cptr_el3, xzr /* Disable coprocessor traps to EL3 */ - mov x0, #0x33ff - msr cptr_el2, x0 /* Disable coprocessor traps to EL2 */ - - /* Initialize SCTLR_EL2 */ - msr sctlr_el2, xzr - - /* Return to the EL2_SP2 mode from EL3 */ - mov x0, sp - msr sp_el2, x0 /* Migrate SP */ - mrs x0, vbar_el3 - msr vbar_el2, x0 /* Migrate VBAR */ - mov x0, #0x3c9 - msr spsr_el3, x0 /* EL2_SP2 | D | A | I | F */ - msr elr_el3, lr - eret +1: armv8_switch_to_el2_m x0 ENDPROC(armv8_switch_to_el2)
ENTRY(armv8_switch_to_el1) switch_el x0, 0f, 1f, 0f 0: ret -1: - /* Initialize Generic Timers */ - mrs x0, cnthctl_el2 - orr x0, x0, #0x3 /* Enable EL1 access to timers */ - msr cnthctl_el2, x0 - msr cntvoff_el2, xzr - mrs x0, cntkctl_el1 - orr x0, x0, #0x3 /* Enable EL0 access to timers */ - msr cntkctl_el1, x0 - - /* Initilize MPID/MPIDR registers */ - mrs x0, midr_el1 - mrs x1, mpidr_el1 - msr vpidr_el2, x0 - msr vmpidr_el2, x1 - - /* Disable coprocessor traps */ - mov x0, #0x33ff - msr cptr_el2, x0 /* Disable coprocessor traps to EL2 */ - msr hstr_el2, xzr /* Disable coprocessor traps to EL2 */ - mov x0, #3 << 20 - msr cpacr_el1, x0 /* Enable FP/SIMD at EL1 */ - - /* Initialize HCR_EL2 */ - mov x0, #(1 << 31) /* 64bit EL1 */ - orr x0, x0, #(1 << 29) /* Disable HVC */ - msr hcr_el2, x0 - - /* SCTLR_EL1 initialization */ - mov x0, #0x0800 - movk x0, #0x30d0, lsl #16 - msr sctlr_el1, x0 - - /* Return to the EL1_SP1 mode from EL2 */ - mov x0, sp - msr sp_el1, x0 /* Migrate SP */ - mrs x0, vbar_el2 - msr vbar_el1, x0 /* Migrate VBAR */ - mov x0, #0x3c5 - msr spsr_el2, x0 /* EL1_SP1 | D | A | I | F */ - msr elr_el2, lr - eret +1: armv8_switch_to_el1_m x0, x1 ENDPROC(armv8_switch_to_el1) diff --git a/arch/arm/include/asm/arch-fsl-lsch3/config.h b/arch/arm/include/asm/arch-fsl-lsch3/config.h index b17410a..3c1ee80 100644 --- a/arch/arm/include/asm/arch-fsl-lsch3/config.h +++ b/arch/arm/include/asm/arch-fsl-lsch3/config.h @@ -8,7 +8,7 @@ #define _ASM_ARMV8_FSL_LSCH3_CONFIG_
#include <fsl_ddrc_version.h> - +#define CONFIG_MP #define CONFIG_SYS_FSL_OCRAM_BASE 0x18000000 /* initial RAM */ /* Link Definitions */ #define CONFIG_SYS_INIT_SP_ADDR (CONFIG_SYS_FSL_OCRAM_BASE + 0xfff0) @@ -18,6 +18,7 @@ #define CONFIG_SYS_FSL_DDR2_ADDR (CONFIG_SYS_IMMR + 0x00090000) #define CONFIG_SYS_FSL_GUTS_ADDR (CONFIG_SYS_IMMR + 0x00E00000) #define CONFIG_SYS_FSL_PMU_ADDR (CONFIG_SYS_IMMR + 0x00E30000) +#define CONFIG_SYS_FSL_RST_ADDR (CONFIG_SYS_IMMR + 0x00E60000) #define CONFIG_SYS_FSL_CH3_CLK_GRPA_ADDR (CONFIG_SYS_IMMR + 0x00300000) #define CONFIG_SYS_FSL_CH3_CLK_GRPB_ADDR (CONFIG_SYS_IMMR + 0x00310000) #define CONFIG_SYS_FSL_CH3_CLK_CTRL_ADDR (CONFIG_SYS_IMMR + 0x00370000) diff --git a/arch/arm/include/asm/arch-fsl-lsch3/immap_lsch3.h b/arch/arm/include/asm/arch-fsl-lsch3/immap_lsch3.h index 18e66bd..ee1d651 100644 --- a/arch/arm/include/asm/arch-fsl-lsch3/immap_lsch3.h +++ b/arch/arm/include/asm/arch-fsl-lsch3/immap_lsch3.h @@ -113,4 +113,39 @@ struct ccsr_clk_ctrl { u8 res_04[0x20-0x04]; } clkcncsr[8]; }; + +struct ccsr_reset { + u32 rstcr; /* 0x000 */ + u32 rstcrsp; /* 0x004 */ + u8 res_008[0x10-0x08]; /* 0x008 */ + u32 rstrqmr1; /* 0x010 */ + u32 rstrqmr2; /* 0x014 */ + u32 rstrqsr1; /* 0x018 */ + u32 rstrqsr2; /* 0x01c */ + u32 rstrqwdtmrl; /* 0x020 */ + u32 rstrqwdtmru; /* 0x024 */ + u8 res_028[0x30-0x28]; /* 0x028 */ + u32 rstrqwdtsrl; /* 0x030 */ + u32 rstrqwdtsru; /* 0x034 */ + u8 res_038[0x60-0x38]; /* 0x038 */ + u32 brrl; /* 0x060 */ + u32 brru; /* 0x064 */ + u8 res_068[0x80-0x68]; /* 0x068 */ + u32 pirset; /* 0x080 */ + u32 pirclr; /* 0x084 */ + u8 res_088[0x90-0x88]; /* 0x088 */ + u32 brcorenbr; /* 0x090 */ + u8 res_094[0x100-0x94]; /* 0x094 */ + u32 rcw_reqr; /* 0x100 */ + u32 rcw_completion; /* 0x104 */ + u8 res_108[0x110-0x108]; /* 0x108 */ + u32 pbi_reqr; /* 0x110 */ + u32 pbi_completion; /* 0x114 */ + u8 res_118[0xa00-0x118]; /* 0x118 */ + u32 qmbm_warmrst; /* 0xa00 */ + u32 soc_warmrst; /* 0xa04 */ + u8 res_a08[0xbf8-0xa08]; /* 0xa08 */ + u32 ip_rev1; /* 0xbf8 */ + u32 ip_rev2; /* 0xbfc */ +}; #endif /* __ARCH_FSL_LSCH3_IMMAP_H */ diff --git a/arch/arm/include/asm/macro.h b/arch/arm/include/asm/macro.h index f77e4b8..0009c28 100644 --- a/arch/arm/include/asm/macro.h +++ b/arch/arm/include/asm/macro.h @@ -105,6 +105,98 @@ lr .req x30 cbz \xreg1, \master_label .endm
+.macro armv8_switch_to_el2_m, xreg1 + mov \xreg1, #0x5b1 /* Non-secure EL0/EL1 | HVC | 64bit EL2 */ + msr scr_el3, \xreg1 + msr cptr_el3, xzr /* Disable coprocessor traps to EL3 */ + mov \xreg1, #0x33ff + msr cptr_el2, \xreg1 /* Disable coprocessor traps to EL2 */ + + /* Initialize SCTLR_EL2 */ + /* + * setting RES1 bits (29,28,23,22,18,16,11,5,4) to 1 + * and RES0 bits (31,30,27,26,24,21,20,17,15-13,10-6) + + * EE,WXN,I,SA,C,A,M to 0 + */ + mov \xreg1, #0x0830 + movk \xreg1, #0x30C5, lsl #16 + msr sctlr_el2, \xreg1 + + /* Return to the EL2_SP2 mode from EL3 */ + mov \xreg1, sp + msr sp_el2, \xreg1 /* Migrate SP */ + mrs \xreg1, vbar_el3 + msr vbar_el2, \xreg1 /* Migrate VBAR */ + mov \xreg1, #0x3c9 + msr spsr_el3, \xreg1 /* EL2_SP2 | D | A | I | F */ + msr elr_el3, lr + eret +.endm + +.macro armv8_switch_to_el1_m, xreg1, xreg2 + /* Initialize Generic Timers */ + mrs \xreg1, cnthctl_el2 + orr \xreg1, \xreg1, #0x3 /* Enable EL1 access to timers */ + msr cnthctl_el2, \xreg1 + msr cntvoff_el2, xzr + + /* Initilize MPID/MPIDR registers */ + mrs \xreg1, midr_el1 + mrs \xreg2, mpidr_el1 + msr vpidr_el2, \xreg1 + msr vmpidr_el2, \xreg2 + + /* Disable coprocessor traps */ + mov \xreg1, #0x33ff + msr cptr_el2, \xreg1 /* Disable coprocessor traps to EL2 */ + msr hstr_el2, xzr /* Disable coprocessor traps to EL2 */ + mov \xreg1, #3 << 20 + msr cpacr_el1, \xreg1 /* Enable FP/SIMD at EL1 */ + + /* Initialize HCR_EL2 */ + mov \xreg1, #(1 << 31) /* 64bit EL1 */ + orr \xreg1, \xreg1, #(1 << 29) /* Disable HVC */ + msr hcr_el2, \xreg1 + + /* SCTLR_EL1 initialization */ + /* + * setting RES1 bits (29,28,23,22,20,11) to 1 + * and RES0 bits (31,30,27,21,17,13,10,6) + + * UCI,EE,EOE,WXN,nTWE,nTWI,UCT,DZE,I,UMA,SED,ITD, + * CP15BEN,SA0,SA,C,A,M to 0 + */ + mov \xreg1, #0x0800 + movk \xreg1, #0x30d0, lsl #16 + msr sctlr_el1, \xreg1 + + /* Return to the EL1_SP1 mode from EL2 */ + mov \xreg1, sp + msr sp_el1, \xreg1 /* Migrate SP */ + mrs \xreg1, vbar_el2 + msr vbar_el1, \xreg1 /* Migrate VBAR */ + mov \xreg1, #0x3c5 + msr spsr_el2, \xreg1 /* EL1_SP1 | D | A | I | F */ + msr elr_el2, lr + eret +.endm + +#if defined(CONFIG_GICV3) +.macro gic_wait_for_interrupt_m xreg1 +0 : wfi + mrs \xreg1, ICC_IAR1_EL1 + msr ICC_EOIR1_EL1, \xreg1 + cbnz \xreg1, 0b +.endm +#elif defined(CONFIG_GICV2) +.macro gic_wait_for_interrupt_m xreg1, wreg2 +0 : wfi + ldr \wreg2, [\xreg1, GICC_AIAR] + str \wreg2, [\xreg1, GICC_AEOIR] + and \wreg2, \wreg2, #3ff + cbnz \wreg2, 0b +.endm +#endif + #endif /* CONFIG_ARM64 */
#endif /* __ASSEMBLY__ */ diff --git a/arch/arm/lib/gic_64.S b/arch/arm/lib/gic_64.S index d56396e..a3e18f7 100644 --- a/arch/arm/lib/gic_64.S +++ b/arch/arm/lib/gic_64.S @@ -10,8 +10,8 @@ #include <asm-offsets.h> #include <config.h> #include <linux/linkage.h> -#include <asm/macro.h> #include <asm/gic.h> +#include <asm/macro.h>
/************************************************************************* @@ -181,14 +181,10 @@ ENDPROC(gic_kick_secondary_cpus) * *************************************************************************/ ENTRY(gic_wait_for_interrupt) -0: wfi #if defined(CONFIG_GICV3) - mrs x9, ICC_IAR1_EL1 - msr ICC_EOIR1_EL1, x9 + gic_wait_for_interrupt_m x9 #elif defined(CONFIG_GICV2) - ldr w9, [x0, GICC_AIAR] - str w9, [x0, GICC_AEOIR] + gic_wait_for_interrupt_m x0, w9 #endif - cbnz w9, 0b ret ENDPROC(gic_wait_for_interrupt) diff --git a/common/board_f.c b/common/board_f.c index d5e7622..79668f4 100644 --- a/common/board_f.c +++ b/common/board_f.c @@ -43,7 +43,7 @@ #include <watchdog.h> #include <asm/errno.h> #include <asm/io.h> -#ifdef CONFIG_MP +#if defined(CONFIG_MP) && (defined(CONFIG_MPC86xx) || defined(CONFIG_E500)) #include <asm/mp.h> #endif #include <asm/sections.h>

Hi York,
I have mostly minor comments this time; this is looking pretty good.
On Tue, Aug 19, 2014 at 09:28:00PM +0100, York Sun wrote:
Secondary cores need to be released from holdoff by boot release registers. With GPP bootrom, they can boot from main memory directly. Individual spin table is used for each core. If a single release address is needed, defining macro CONFIG_FSL_SMP_RELEASE_ALL will use the CPU_RELEASE_ADDR. Spin table and the boot page is reserved in device tree so OS won't overwrite.
Signed-off-by: York Sun yorksun@freescale.com Signed-off-by: Arnab Basu arnab.basu@freescale.com
v2: Removed copying boot page. Use u-boot image as is in memory. Added dealing with different size of addr_cell in device tree. Added dealing with big- and little-endian. Added flushing spin table after cpu_release().
arch/arm/cpu/armv8/fsl-lsch3/Makefile | 2 + arch/arm/cpu/armv8/fsl-lsch3/cpu.c | 13 ++ arch/arm/cpu/armv8/fsl-lsch3/cpu.h | 1 + arch/arm/cpu/armv8/fsl-lsch3/fdt.c | 56 +++++++ arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S | 128 ++++++++++++--- arch/arm/cpu/armv8/fsl-lsch3/mp.c | 172 +++++++++++++++++++++ arch/arm/cpu/armv8/fsl-lsch3/mp.h | 36 +++++ arch/arm/cpu/armv8/transition.S | 63 +------- arch/arm/include/asm/arch-fsl-lsch3/config.h | 3 +- arch/arm/include/asm/arch-fsl-lsch3/immap_lsch3.h | 35 +++++ arch/arm/include/asm/macro.h | 92 +++++++++++ arch/arm/lib/gic_64.S | 10 +- common/board_f.c | 2 +- 13 files changed, 525 insertions(+), 88 deletions(-) create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/fdt.c create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/mp.c create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/mp.h
diff --git a/arch/arm/cpu/armv8/fsl-lsch3/Makefile b/arch/arm/cpu/armv8/fsl-lsch3/Makefile index 9249537..f920eeb 100644 --- a/arch/arm/cpu/armv8/fsl-lsch3/Makefile +++ b/arch/arm/cpu/armv8/fsl-lsch3/Makefile @@ -7,3 +7,5 @@ obj-y += cpu.o obj-y += lowlevel.o obj-y += speed.o +obj-$(CONFIG_MP) += mp.o +obj-$(CONFIG_OF_LIBFDT) += fdt.o diff --git a/arch/arm/cpu/armv8/fsl-lsch3/cpu.c b/arch/arm/cpu/armv8/fsl-lsch3/cpu.c index c129d03..47b947f 100644 --- a/arch/arm/cpu/armv8/fsl-lsch3/cpu.c +++ b/arch/arm/cpu/armv8/fsl-lsch3/cpu.c @@ -11,6 +11,7 @@ #include <asm/io.h> #include <asm/arch-fsl-lsch3/immap_lsch3.h> #include "cpu.h" +#include "mp.h" #include "speed.h" #include <fsl_mc.h>
@@ -434,3 +435,15 @@ int cpu_eth_init(bd_t *bis) #endif return error; }
+int arch_early_init_r(void) +{
int rv;
rv = fsl_lsch3_wake_seconday_cores();
if (rv)
printf("Did not wake secondary cores\n");
return 0;
+} diff --git a/arch/arm/cpu/armv8/fsl-lsch3/cpu.h b/arch/arm/cpu/armv8/fsl-lsch3/cpu.h index 28544d7..2e3312b 100644 --- a/arch/arm/cpu/armv8/fsl-lsch3/cpu.h +++ b/arch/arm/cpu/armv8/fsl-lsch3/cpu.h @@ -5,3 +5,4 @@ */
int fsl_qoriq_core_to_cluster(unsigned int core); +u32 cpu_mask(void); diff --git a/arch/arm/cpu/armv8/fsl-lsch3/fdt.c b/arch/arm/cpu/armv8/fsl-lsch3/fdt.c new file mode 100644 index 0000000..2dbcdcb --- /dev/null +++ b/arch/arm/cpu/armv8/fsl-lsch3/fdt.c @@ -0,0 +1,56 @@ +/*
- Copyright 2014 Freescale Semiconductor, Inc.
- SPDX-License-Identifier: GPL-2.0+
- */
+#include <common.h> +#include <libfdt.h> +#include <fdt_support.h> +#include "mp.h"
+#ifdef CONFIG_MP +void ft_fixup_cpu(void *blob) +{
int off;
__maybe_unused u64 spin_tbl_addr = (u64)get_spin_tbl_addr();
fdt32_t *reg;
int addr_cells;
u64 val;
size_t *boot_code_size = &(__secondary_boot_code_size);
off = fdt_node_offset_by_prop_value(blob, -1, "device_type", "cpus", 4);
I didn't think /cpus had device_type = "cpus". I can't see any instances in any DTs I have to hand. Can we not find /cpus by path?
of_bus_default_count_cells(blob, off, &addr_cells, NULL);
off = fdt_node_offset_by_prop_value(blob, -1, "device_type", "cpu", 4);
while (off != -FDT_ERR_NOTFOUND) {
reg = (fdt32_t *)fdt_getprop(blob, off, "reg", 0);
if (reg) {
val = spin_tbl_addr;
+#ifndef CONFIG_FSL_SMP_RELEASE_ALL
val += id_to_core(of_read_number(reg, addr_cells))
* SPIN_TABLE_ELEM_SIZE;
+#endif
val = cpu_to_fdt64(val);
fdt_setprop_string(blob, off, "enable-method",
"spin-table");
fdt_setprop(blob, off, "cpu-release-addr",
&val, sizeof(val));
} else {
puts("cpu NULL\n");
Could we change that to "Warning: found cpu node without reg property" or something like that which makes the problem clear?
}
off = fdt_node_offset_by_prop_value(blob, off, "device_type",
"cpu", 4);
}
fdt_add_mem_rsv(blob, (uintptr_t)&secondary_boot_code,
*boot_code_size);
+} +#endif
+void ft_cpu_setup(void *blob, bd_t *bd) +{ +#ifdef CONFIG_MP
ft_fixup_cpu(blob);
+#endif +} diff --git a/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S b/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S index ad32b6c..629e6d4 100644 --- a/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S +++ b/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S @@ -8,7 +8,9 @@
#include <config.h> #include <linux/linkage.h> +#include <asm/gic.h> #include <asm/macro.h> +#include "mp.h"
ENTRY(lowlevel_init) mov x29, lr /* Save LR */ @@ -37,29 +39,119 @@ ENTRY(lowlevel_init)
branch_if_master x0, x1, 1f
ldr x0, =secondary_boot_func
blr x0
+1: +2:
Isn't the '2' label redundant?
mov lr, x29 /* Restore LR */
ret
+ENDPROC(lowlevel_init)
/* Keep literals not used by the secondary boot code outside it */
.ltorg
/* Using 64 bit alignment since the spin table is accessed as data */
.align 4
.global secondary_boot_code
/* Secondary Boot Code starts here */
+secondary_boot_code:
.global __spin_table
+__spin_table:
.space CONFIG_MAX_CPUS*SPIN_TABLE_ELEM_SIZE
.align 2
+ENTRY(secondary_boot_func) /*
* Slave should wait for master clearing spin table.
* This sync prevent salves observing incorrect
* value of spin table and jumping to wrong place.
* MPIDR_EL1 Fields:
* MPIDR[1:0] = AFF0_CPUID <- Core ID (0,1)
* MPIDR[7:2] = AFF0_RES
* MPIDR[15:8] = AFF1_CLUSTERID <- Cluster ID (0,1,2,3)
* MPIDR[23:16] = AFF2_CLUSTERID
* MPIDR[24] = MT
* MPIDR[29:25] = RES0
* MPIDR[30] = U
* MPIDR[31] = ME
* MPIDR[39:32] = AFF3
*
* Linear Processor ID (LPID) calculation from MPIDR_EL1:
* (We only use AFF0_CPUID and AFF1_CLUSTERID for now
* until AFF2_CLUSTERID and AFF3 have non-zero values)
*
* LPID = MPIDR[15:8] | MPIDR[1:0] */
-#if defined(CONFIG_GICV2) || defined(CONFIG_GICV3) -#ifdef CONFIG_GICV2
ldr x0, =GICC_BASE
mrs x0, mpidr_el1
ubfm x1, x0, #8, #15
ubfm x2, x0, #0, #1
orr x10, x2, x1, lsl #2 /* x10 has LPID */
ubfm x9, x0, #0, #15 /* x9 contains MPIDR[15:0] */
/*
* offset of the spin table element for this core from start of spin
* table (each elem is padded to 64 bytes)
*/
lsl x1, x10, #6
ldr x0, =__spin_table
/* physical address of this cpus spin table element */
add x11, x1, x0
str x9, [x11, #16] /* LPID */
mov x4, #1
str x4, [x11, #8] /* STATUS */
dsb sy
+#if defined(CONFIG_GICV3)
gic_wait_for_interrupt_m x0
#endif
bl gic_wait_for_interrupt
Why do we only need to wait for GICv3? We previously did so for GICv2.
bl secondary_switch_to_el2
+#ifdef CONFIG_ARMV8_SWITCH_TO_EL1
secondary_switch_to_el1
#endif
Missing bl?
Is there any reason to have the SWITCH_TO_EL1 option other than for debugging?
EL2 is the preferred EL to boot at for Linux and Xen (it gives far more flexibility), and if dropping to EL1 is necessary I think it would make more sense as a run-time option than a compile-time option.
/*
* All processors will enter EL2 and optionally EL1.
+slave_cpu:
wfe
+#ifdef CONFIG_FSL_SMP_RELEASE_ALL
/* All cores are released from the address in the 1st spin table
* element */
bl armv8_switch_to_el2
-#ifdef CONFIG_ARMV8_SWITCH_TO_EL1
bl armv8_switch_to_el1
ldr x1, =__spin_table
ldr x0, [x1]
+#else
ldr x0, [x11]
+#endif
cbz x0, slave_cpu
Similarly is there any reason to have the option of a single release addr if we can support unique addresses?
[...]
diff --git a/arch/arm/cpu/armv8/fsl-lsch3/mp.c b/arch/arm/cpu/armv8/fsl-lsch3/mp.c new file mode 100644 index 0000000..b29eda9 --- /dev/null +++ b/arch/arm/cpu/armv8/fsl-lsch3/mp.c @@ -0,0 +1,172 @@ +/*
- Copyright 2014 Freescale Semiconductor, Inc.
- SPDX-License-Identifier: GPL-2.0+
- */
+#include <common.h> +#include <asm/io.h> +#include <asm/system.h> +#include <asm/io.h> +#include <asm/arch-fsl-lsch3/immap_lsch3.h> +#include "mp.h"
+DECLARE_GLOBAL_DATA_PTR;
+void *get_spin_tbl_addr(void) +{
void *ptr = (void *)&__spin_table;
return ptr;
+}
I don't think the cast is necessary here. As far as I can see this could be just:
void *get_spin_tbl_addr(void) { return &__spin_table; }
[...]
diff --git a/arch/arm/include/asm/macro.h b/arch/arm/include/asm/macro.h index f77e4b8..0009c28 100644 --- a/arch/arm/include/asm/macro.h +++ b/arch/arm/include/asm/macro.h @@ -105,6 +105,98 @@ lr .req x30 cbz \xreg1, \master_label .endm
+.macro armv8_switch_to_el2_m, xreg1
mov \xreg1, #0x5b1 /* Non-secure EL0/EL1 | HVC | 64bit EL2 */
msr scr_el3, \xreg1
When dropping to EL1 from EL2 we disable HVC via HCR_EL2; presumably due to lack of a handler. Would it make sense to do similarly here and disable SMC here until we have a user (e.g. PSCI)?
msr cptr_el3, xzr /* Disable coprocessor traps to EL3 */
mov \xreg1, #0x33ff
msr cptr_el2, \xreg1 /* Disable coprocessor traps to EL2 */
/* Initialize SCTLR_EL2 */
/*
* setting RES1 bits (29,28,23,22,18,16,11,5,4) to 1
* and RES0 bits (31,30,27,26,24,21,20,17,15-13,10-6) +
* EE,WXN,I,SA,C,A,M to 0
*/
mov \xreg1, #0x0830
movk \xreg1, #0x30C5, lsl #16
msr sctlr_el2, \xreg1
/* Return to the EL2_SP2 mode from EL3 */
mov \xreg1, sp
msr sp_el2, \xreg1 /* Migrate SP */
mrs \xreg1, vbar_el3
msr vbar_el2, \xreg1 /* Migrate VBAR */
mov \xreg1, #0x3c9
msr spsr_el3, \xreg1 /* EL2_SP2 | D | A | I | F */
msr elr_el3, lr
eret
+.endm
Otherwise this looks fine to me.
+.macro armv8_switch_to_el1_m, xreg1, xreg2
/* Initialize Generic Timers */
mrs \xreg1, cnthctl_el2
orr \xreg1, \xreg1, #0x3 /* Enable EL1 access to timers */
msr cnthctl_el2, \xreg1
msr cntvoff_el2, xzr
/* Initilize MPID/MPIDR registers */
mrs \xreg1, midr_el1
mrs \xreg2, mpidr_el1
msr vpidr_el2, \xreg1
msr vmpidr_el2, \xreg2
/* Disable coprocessor traps */
mov \xreg1, #0x33ff
msr cptr_el2, \xreg1 /* Disable coprocessor traps to EL2 */
msr hstr_el2, xzr /* Disable coprocessor traps to EL2 */
mov \xreg1, #3 << 20
msr cpacr_el1, \xreg1 /* Enable FP/SIMD at EL1 */
/* Initialize HCR_EL2 */
mov \xreg1, #(1 << 31) /* 64bit EL1 */
orr \xreg1, \xreg1, #(1 << 29) /* Disable HVC */
msr hcr_el2, \xreg1
/* SCTLR_EL1 initialization */
/*
* setting RES1 bits (29,28,23,22,20,11) to 1
* and RES0 bits (31,30,27,21,17,13,10,6) +
* UCI,EE,EOE,WXN,nTWE,nTWI,UCT,DZE,I,UMA,SED,ITD,
* CP15BEN,SA0,SA,C,A,M to 0
*/
mov \xreg1, #0x0800
movk \xreg1, #0x30d0, lsl #16
msr sctlr_el1, \xreg1
/* Return to the EL1_SP1 mode from EL2 */
mov \xreg1, sp
msr sp_el1, \xreg1 /* Migrate SP */
mrs \xreg1, vbar_el2
msr vbar_el1, \xreg1 /* Migrate VBAR */
mov \xreg1, #0x3c5
msr spsr_el2, \xreg1 /* EL1_SP1 | D | A | I | F */
msr elr_el2, lr
eret
+.endm
This looks fine to me.
Thanks, Mark.

On 08/21/2014 06:47 AM, Mark Rutland wrote:
Hi York,
I have mostly minor comments this time; this is looking pretty good.
On Tue, Aug 19, 2014 at 09:28:00PM +0100, York Sun wrote:
Secondary cores need to be released from holdoff by boot release registers. With GPP bootrom, they can boot from main memory directly. Individual spin table is used for each core. If a single release address is needed, defining macro CONFIG_FSL_SMP_RELEASE_ALL will use the CPU_RELEASE_ADDR. Spin table and the boot page is reserved in device tree so OS won't overwrite.
Signed-off-by: York Sun yorksun@freescale.com Signed-off-by: Arnab Basu arnab.basu@freescale.com
v2: Removed copying boot page. Use u-boot image as is in memory. Added dealing with different size of addr_cell in device tree. Added dealing with big- and little-endian. Added flushing spin table after cpu_release().
arch/arm/cpu/armv8/fsl-lsch3/Makefile | 2 + arch/arm/cpu/armv8/fsl-lsch3/cpu.c | 13 ++ arch/arm/cpu/armv8/fsl-lsch3/cpu.h | 1 + arch/arm/cpu/armv8/fsl-lsch3/fdt.c | 56 +++++++ arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S | 128 ++++++++++++--- arch/arm/cpu/armv8/fsl-lsch3/mp.c | 172 +++++++++++++++++++++ arch/arm/cpu/armv8/fsl-lsch3/mp.h | 36 +++++ arch/arm/cpu/armv8/transition.S | 63 +------- arch/arm/include/asm/arch-fsl-lsch3/config.h | 3 +- arch/arm/include/asm/arch-fsl-lsch3/immap_lsch3.h | 35 +++++ arch/arm/include/asm/macro.h | 92 +++++++++++ arch/arm/lib/gic_64.S | 10 +- common/board_f.c | 2 +- 13 files changed, 525 insertions(+), 88 deletions(-) create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/fdt.c create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/mp.c create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/mp.h
diff --git a/arch/arm/cpu/armv8/fsl-lsch3/Makefile b/arch/arm/cpu/armv8/fsl-lsch3/Makefile index 9249537..f920eeb 100644 --- a/arch/arm/cpu/armv8/fsl-lsch3/Makefile +++ b/arch/arm/cpu/armv8/fsl-lsch3/Makefile @@ -7,3 +7,5 @@ obj-y += cpu.o obj-y += lowlevel.o obj-y += speed.o +obj-$(CONFIG_MP) += mp.o +obj-$(CONFIG_OF_LIBFDT) += fdt.o diff --git a/arch/arm/cpu/armv8/fsl-lsch3/cpu.c b/arch/arm/cpu/armv8/fsl-lsch3/cpu.c index c129d03..47b947f 100644 --- a/arch/arm/cpu/armv8/fsl-lsch3/cpu.c +++ b/arch/arm/cpu/armv8/fsl-lsch3/cpu.c @@ -11,6 +11,7 @@ #include <asm/io.h> #include <asm/arch-fsl-lsch3/immap_lsch3.h> #include "cpu.h" +#include "mp.h" #include "speed.h" #include <fsl_mc.h>
@@ -434,3 +435,15 @@ int cpu_eth_init(bd_t *bis) #endif return error; }
+int arch_early_init_r(void) +{
int rv;
rv = fsl_lsch3_wake_seconday_cores();
if (rv)
printf("Did not wake secondary cores\n");
return 0;
+} diff --git a/arch/arm/cpu/armv8/fsl-lsch3/cpu.h b/arch/arm/cpu/armv8/fsl-lsch3/cpu.h index 28544d7..2e3312b 100644 --- a/arch/arm/cpu/armv8/fsl-lsch3/cpu.h +++ b/arch/arm/cpu/armv8/fsl-lsch3/cpu.h @@ -5,3 +5,4 @@ */
int fsl_qoriq_core_to_cluster(unsigned int core); +u32 cpu_mask(void); diff --git a/arch/arm/cpu/armv8/fsl-lsch3/fdt.c b/arch/arm/cpu/armv8/fsl-lsch3/fdt.c new file mode 100644 index 0000000..2dbcdcb --- /dev/null +++ b/arch/arm/cpu/armv8/fsl-lsch3/fdt.c @@ -0,0 +1,56 @@ +/*
- Copyright 2014 Freescale Semiconductor, Inc.
- SPDX-License-Identifier: GPL-2.0+
- */
+#include <common.h> +#include <libfdt.h> +#include <fdt_support.h> +#include "mp.h"
+#ifdef CONFIG_MP +void ft_fixup_cpu(void *blob) +{
int off;
__maybe_unused u64 spin_tbl_addr = (u64)get_spin_tbl_addr();
fdt32_t *reg;
int addr_cells;
u64 val;
size_t *boot_code_size = &(__secondary_boot_code_size);
off = fdt_node_offset_by_prop_value(blob, -1, "device_type", "cpus", 4);
I didn't think /cpus had device_type = "cpus". I can't see any instances in any DTs I have to hand. Can we not find /cpus by path?
I will let Arnab to comment on this. He is coordinating with Linux device tree.
of_bus_default_count_cells(blob, off, &addr_cells, NULL);
off = fdt_node_offset_by_prop_value(blob, -1, "device_type", "cpu", 4);
while (off != -FDT_ERR_NOTFOUND) {
reg = (fdt32_t *)fdt_getprop(blob, off, "reg", 0);
if (reg) {
val = spin_tbl_addr;
+#ifndef CONFIG_FSL_SMP_RELEASE_ALL
val += id_to_core(of_read_number(reg, addr_cells))
* SPIN_TABLE_ELEM_SIZE;
+#endif
val = cpu_to_fdt64(val);
fdt_setprop_string(blob, off, "enable-method",
"spin-table");
fdt_setprop(blob, off, "cpu-release-addr",
&val, sizeof(val));
} else {
puts("cpu NULL\n");
Could we change that to "Warning: found cpu node without reg property" or something like that which makes the problem clear?
Yes, we can.
}
off = fdt_node_offset_by_prop_value(blob, off, "device_type",
"cpu", 4);
}
fdt_add_mem_rsv(blob, (uintptr_t)&secondary_boot_code,
*boot_code_size);
+} +#endif
+void ft_cpu_setup(void *blob, bd_t *bd) +{ +#ifdef CONFIG_MP
ft_fixup_cpu(blob);
+#endif +} diff --git a/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S b/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S index ad32b6c..629e6d4 100644 --- a/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S +++ b/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S @@ -8,7 +8,9 @@
#include <config.h> #include <linux/linkage.h> +#include <asm/gic.h> #include <asm/macro.h> +#include "mp.h"
ENTRY(lowlevel_init) mov x29, lr /* Save LR */ @@ -37,29 +39,119 @@ ENTRY(lowlevel_init)
branch_if_master x0, x1, 1f
ldr x0, =secondary_boot_func
blr x0
+1: +2:
Isn't the '2' label redundant?
We have some internal code dealing with trust zone between the 1 and 2. It is not likely to be used in long term since we are trying to move them into security monitor. I can drop label 2 here.
mov lr, x29 /* Restore LR */
ret
+ENDPROC(lowlevel_init)
/* Keep literals not used by the secondary boot code outside it */
.ltorg
/* Using 64 bit alignment since the spin table is accessed as data */
.align 4
.global secondary_boot_code
/* Secondary Boot Code starts here */
+secondary_boot_code:
.global __spin_table
+__spin_table:
.space CONFIG_MAX_CPUS*SPIN_TABLE_ELEM_SIZE
.align 2
+ENTRY(secondary_boot_func) /*
* Slave should wait for master clearing spin table.
* This sync prevent salves observing incorrect
* value of spin table and jumping to wrong place.
* MPIDR_EL1 Fields:
* MPIDR[1:0] = AFF0_CPUID <- Core ID (0,1)
* MPIDR[7:2] = AFF0_RES
* MPIDR[15:8] = AFF1_CLUSTERID <- Cluster ID (0,1,2,3)
* MPIDR[23:16] = AFF2_CLUSTERID
* MPIDR[24] = MT
* MPIDR[29:25] = RES0
* MPIDR[30] = U
* MPIDR[31] = ME
* MPIDR[39:32] = AFF3
*
* Linear Processor ID (LPID) calculation from MPIDR_EL1:
* (We only use AFF0_CPUID and AFF1_CLUSTERID for now
* until AFF2_CLUSTERID and AFF3 have non-zero values)
*
* LPID = MPIDR[15:8] | MPIDR[1:0] */
-#if defined(CONFIG_GICV2) || defined(CONFIG_GICV3) -#ifdef CONFIG_GICV2
ldr x0, =GICC_BASE
mrs x0, mpidr_el1
ubfm x1, x0, #8, #15
ubfm x2, x0, #0, #1
orr x10, x2, x1, lsl #2 /* x10 has LPID */
ubfm x9, x0, #0, #15 /* x9 contains MPIDR[15:0] */
/*
* offset of the spin table element for this core from start of spin
* table (each elem is padded to 64 bytes)
*/
lsl x1, x10, #6
ldr x0, =__spin_table
/* physical address of this cpus spin table element */
add x11, x1, x0
str x9, [x11, #16] /* LPID */
mov x4, #1
str x4, [x11, #8] /* STATUS */
dsb sy
+#if defined(CONFIG_GICV3)
gic_wait_for_interrupt_m x0
#endif
bl gic_wait_for_interrupt
Why do we only need to wait for GICv3? We previously did so for GICv2.
I think this is leftover from the attempt to boot all cores simultaneously. I will let Arnab to comment.
bl secondary_switch_to_el2
+#ifdef CONFIG_ARMV8_SWITCH_TO_EL1
secondary_switch_to_el1
#endif
Missing bl?
Looks so.
Is there any reason to have the SWITCH_TO_EL1 option other than for debugging?
Good question. I will let Arnab to comment here.
EL2 is the preferred EL to boot at for Linux and Xen (it gives far more flexibility), and if dropping to EL1 is necessary I think it would make more sense as a run-time option than a compile-time option.
/*
* All processors will enter EL2 and optionally EL1.
+slave_cpu:
wfe
+#ifdef CONFIG_FSL_SMP_RELEASE_ALL
/* All cores are released from the address in the 1st spin table
* element */
bl armv8_switch_to_el2
-#ifdef CONFIG_ARMV8_SWITCH_TO_EL1
bl armv8_switch_to_el1
ldr x1, =__spin_table
ldr x0, [x1]
+#else
ldr x0, [x11]
+#endif
cbz x0, slave_cpu
Similarly is there any reason to have the option of a single release addr if we can support unique addresses?
I think it was used by Linux for some ARM parts. I personally not a fun of using single release. But if it makes everyone happy, I can keep it.
[...]
diff --git a/arch/arm/cpu/armv8/fsl-lsch3/mp.c b/arch/arm/cpu/armv8/fsl-lsch3/mp.c new file mode 100644 index 0000000..b29eda9 --- /dev/null +++ b/arch/arm/cpu/armv8/fsl-lsch3/mp.c @@ -0,0 +1,172 @@ +/*
- Copyright 2014 Freescale Semiconductor, Inc.
- SPDX-License-Identifier: GPL-2.0+
- */
+#include <common.h> +#include <asm/io.h> +#include <asm/system.h> +#include <asm/io.h> +#include <asm/arch-fsl-lsch3/immap_lsch3.h> +#include "mp.h"
+DECLARE_GLOBAL_DATA_PTR;
+void *get_spin_tbl_addr(void) +{
void *ptr = (void *)&__spin_table;
return ptr;
+}
I don't think the cast is necessary here. As far as I can see this could be just:
void *get_spin_tbl_addr(void) { return &__spin_table; }
Let me try that.
[...]
diff --git a/arch/arm/include/asm/macro.h b/arch/arm/include/asm/macro.h index f77e4b8..0009c28 100644 --- a/arch/arm/include/asm/macro.h +++ b/arch/arm/include/asm/macro.h @@ -105,6 +105,98 @@ lr .req x30 cbz \xreg1, \master_label .endm
+.macro armv8_switch_to_el2_m, xreg1
mov \xreg1, #0x5b1 /* Non-secure EL0/EL1 | HVC | 64bit EL2 */
msr scr_el3, \xreg1
When dropping to EL1 from EL2 we disable HVC via HCR_EL2; presumably due to lack of a handler. Would it make sense to do similarly here and disable SMC here until we have a user (e.g. PSCI)?
I will let Arnab to comment here.
York

Hi Mark,
-----Original Message----- From: u-boot-bounces@lists.denx.de [mailto:u-boot-bounces@lists.denx.de] On Behalf Of York Sun Sent: Friday, August 22, 2014 12:03 AM To: Mark Rutland; Basu Arnab-B45036 Cc: trini@ti.com; u-boot@lists.denx.de; Wood Scott-B07421 Subject: Re: [U-Boot] [Patch v2 3/5] armv8/fsl-lsch3: Release secondary cores from boot hold off with Boot Page
On 08/21/2014 06:47 AM, Mark Rutland wrote:
Hi York,
I have mostly minor comments this time; this is looking pretty good.
On Tue, Aug 19, 2014 at 09:28:00PM +0100, York Sun wrote:
Secondary cores need to be released from holdoff by boot release registers. With GPP bootrom, they can boot from main memory directly. Individual spin table is used for each core. If a single release address is needed, defining macro CONFIG_FSL_SMP_RELEASE_ALL will use the CPU_RELEASE_ADDR. Spin table and the boot page is reserved in device tree so OS won't overwrite.
Signed-off-by: York Sun yorksun@freescale.com Signed-off-by: Arnab Basu arnab.basu@freescale.com
v2: Removed copying boot page. Use u-boot image as is in memory. Added dealing with different size of addr_cell in device tree. Added dealing with big- and little-endian. Added flushing spin table after cpu_release().
arch/arm/cpu/armv8/fsl-lsch3/Makefile | 2 + arch/arm/cpu/armv8/fsl-lsch3/cpu.c | 13 ++ arch/arm/cpu/armv8/fsl-lsch3/cpu.h | 1 + arch/arm/cpu/armv8/fsl-lsch3/fdt.c | 56 +++++++ arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S | 128 ++++++++++++-
--
arch/arm/cpu/armv8/fsl-lsch3/mp.c | 172
+++++++++++++++++++++
arch/arm/cpu/armv8/fsl-lsch3/mp.h | 36 +++++ arch/arm/cpu/armv8/transition.S | 63 +------- arch/arm/include/asm/arch-fsl-lsch3/config.h | 3 +- arch/arm/include/asm/arch-fsl-lsch3/immap_lsch3.h | 35 +++++ arch/arm/include/asm/macro.h | 92 +++++++++++ arch/arm/lib/gic_64.S | 10 +- common/board_f.c | 2 +- 13 files changed, 525 insertions(+), 88 deletions(-) create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/fdt.c create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/mp.c create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/mp.h
diff --git a/arch/arm/cpu/armv8/fsl-lsch3/Makefile b/arch/arm/cpu/armv8/fsl-lsch3/Makefile index 9249537..f920eeb 100644 --- a/arch/arm/cpu/armv8/fsl-lsch3/Makefile +++ b/arch/arm/cpu/armv8/fsl-lsch3/Makefile @@ -7,3 +7,5 @@ obj-y += cpu.o obj-y += lowlevel.o obj-y += speed.o +obj-$(CONFIG_MP) += mp.o +obj-$(CONFIG_OF_LIBFDT) += fdt.o diff --git a/arch/arm/cpu/armv8/fsl-lsch3/cpu.c b/arch/arm/cpu/armv8/fsl-lsch3/cpu.c index c129d03..47b947f 100644 --- a/arch/arm/cpu/armv8/fsl-lsch3/cpu.c +++ b/arch/arm/cpu/armv8/fsl-lsch3/cpu.c @@ -11,6 +11,7 @@ #include <asm/io.h> #include <asm/arch-fsl-lsch3/immap_lsch3.h> #include "cpu.h" +#include "mp.h" #include "speed.h" #include <fsl_mc.h>
@@ -434,3 +435,15 @@ int cpu_eth_init(bd_t *bis) #endif return error; }
+int arch_early_init_r(void) +{
int rv;
rv = fsl_lsch3_wake_seconday_cores();
if (rv)
printf("Did not wake secondary cores\n");
return 0;
+} diff --git a/arch/arm/cpu/armv8/fsl-lsch3/cpu.h b/arch/arm/cpu/armv8/fsl-lsch3/cpu.h index 28544d7..2e3312b 100644 --- a/arch/arm/cpu/armv8/fsl-lsch3/cpu.h +++ b/arch/arm/cpu/armv8/fsl-lsch3/cpu.h @@ -5,3 +5,4 @@ */
int fsl_qoriq_core_to_cluster(unsigned int core); +u32 cpu_mask(void); diff --git a/arch/arm/cpu/armv8/fsl-lsch3/fdt.c b/arch/arm/cpu/armv8/fsl-lsch3/fdt.c new file mode 100644 index 0000000..2dbcdcb --- /dev/null +++ b/arch/arm/cpu/armv8/fsl-lsch3/fdt.c @@ -0,0 +1,56 @@ +/*
- Copyright 2014 Freescale Semiconductor, Inc.
- SPDX-License-Identifier: GPL-2.0+
- */
+#include <common.h> +#include <libfdt.h> +#include <fdt_support.h> +#include "mp.h"
+#ifdef CONFIG_MP +void ft_fixup_cpu(void *blob) +{
int off;
__maybe_unused u64 spin_tbl_addr = (u64)get_spin_tbl_addr();
fdt32_t *reg;
int addr_cells;
u64 val;
size_t *boot_code_size = &(__secondary_boot_code_size);
off = fdt_node_offset_by_prop_value(blob, -1, "device_type",
- "cpus", 4);
I didn't think /cpus had device_type = "cpus". I can't see any instances in any DTs I have to hand. Can we not find /cpus by path?
I will let Arnab to comment on this. He is coordinating with Linux device tree.
Since I contribute to the DTS for FSL ARMv8 SoC, here is my rationale behind the same. I have used the standard ARM cpu device-tree binding documentation as a reference (see [1]) which defined the device_type which it mentions should be set to cpu.
Please let me know if I am missing something.
[1] https://www.kernel.org/doc/Documentation/devicetree/bindings/arm/cpus.txt
of_bus_default_count_cells(blob, off, &addr_cells, NULL);
off = fdt_node_offset_by_prop_value(blob, -1, "device_type",
"cpu", 4);
while (off != -FDT_ERR_NOTFOUND) {
reg = (fdt32_t *)fdt_getprop(blob, off, "reg", 0);
if (reg) {
val = spin_tbl_addr; #ifndef
+CONFIG_FSL_SMP_RELEASE_ALL
val += id_to_core(of_read_number(reg,
addr_cells))
* SPIN_TABLE_ELEM_SIZE; #endif
val = cpu_to_fdt64(val);
fdt_setprop_string(blob, off, "enable-method",
"spin-table");
fdt_setprop(blob, off, "cpu-release-addr",
&val, sizeof(val));
} else {
puts("cpu NULL\n");
Could we change that to "Warning: found cpu node without reg property" or something like that which makes the problem clear?
Yes, we can.
}
off = fdt_node_offset_by_prop_value(blob, off,
"device_type",
"cpu", 4);
}
fdt_add_mem_rsv(blob, (uintptr_t)&secondary_boot_code,
*boot_code_size); } #endif
+void ft_cpu_setup(void *blob, bd_t *bd) { #ifdef CONFIG_MP
ft_fixup_cpu(blob);
+#endif +} diff --git a/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S b/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S index ad32b6c..629e6d4 100644 --- a/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S +++ b/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S @@ -8,7 +8,9 @@
#include <config.h> #include <linux/linkage.h> +#include <asm/gic.h> #include <asm/macro.h> +#include "mp.h"
ENTRY(lowlevel_init) mov x29, lr /* Save LR */ @@ -37,29 +39,119 @@ ENTRY(lowlevel_init)
branch_if_master x0, x1, 1f
ldr x0, =secondary_boot_func
blr x0
+1: +2:
Isn't the '2' label redundant?
We have some internal code dealing with trust zone between the 1 and 2. It is not likely to be used in long term since we are trying to move them into security monitor. I can drop label 2 here.
U-boot can still be booted in EL3, as it can be well booted w/o a ATF or EL3 capable s/w running before the same. That's why we have CONFIG_EL3 and CONFIG_EL2 code legs in the u-boot ARMv8 code.
mov lr, x29 /* Restore LR */
ret
+ENDPROC(lowlevel_init)
/* Keep literals not used by the secondary boot code outside it
*/
.ltorg
/* Using 64 bit alignment since the spin table is accessed as
data */
.align 4
.global secondary_boot_code
/* Secondary Boot Code starts here */
+secondary_boot_code:
.global __spin_table
+__spin_table:
.space CONFIG_MAX_CPUS*SPIN_TABLE_ELEM_SIZE
.align 2
+ENTRY(secondary_boot_func) /*
* Slave should wait for master clearing spin table.
* This sync prevent salves observing incorrect
* value of spin table and jumping to wrong place.
* MPIDR_EL1 Fields:
* MPIDR[1:0] = AFF0_CPUID <- Core ID (0,1)
* MPIDR[7:2] = AFF0_RES
* MPIDR[15:8] = AFF1_CLUSTERID <- Cluster ID (0,1,2,3)
* MPIDR[23:16] = AFF2_CLUSTERID
* MPIDR[24] = MT
* MPIDR[29:25] = RES0
* MPIDR[30] = U
* MPIDR[31] = ME
* MPIDR[39:32] = AFF3
*
* Linear Processor ID (LPID) calculation from MPIDR_EL1:
* (We only use AFF0_CPUID and AFF1_CLUSTERID for now
* until AFF2_CLUSTERID and AFF3 have non-zero values)
*
* LPID = MPIDR[15:8] | MPIDR[1:0] */
-#if defined(CONFIG_GICV2) || defined(CONFIG_GICV3) -#ifdef CONFIG_GICV2
ldr x0, =GICC_BASE
mrs x0, mpidr_el1
ubfm x1, x0, #8, #15
ubfm x2, x0, #0, #1
orr x10, x2, x1, lsl #2 /* x10 has LPID */
ubfm x9, x0, #0, #15 /* x9 contains MPIDR[15:0] */
/*
* offset of the spin table element for this core from start of
spin
* table (each elem is padded to 64 bytes)
*/
lsl x1, x10, #6
ldr x0, =__spin_table
/* physical address of this cpus spin table element */
add x11, x1, x0
str x9, [x11, #16] /* LPID */
mov x4, #1
str x4, [x11, #8] /* STATUS */
dsb sy
+#if defined(CONFIG_GICV3)
gic_wait_for_interrupt_m x0
#endif
bl gic_wait_for_interrupt
Why do we only need to wait for GICv3? We previously did so for GICv2.
I think this is leftover from the attempt to boot all cores simultaneously. I will let Arnab to comment.
bl secondary_switch_to_el2
+#ifdef CONFIG_ARMV8_SWITCH_TO_EL1
secondary_switch_to_el1
#endif
Missing bl?
Looks so.
Is there any reason to have the SWITCH_TO_EL1 option other than for debugging?
Good question. I will let Arnab to comment here.
EL2 is the preferred EL to boot at for Linux and Xen (it gives far more flexibility), and if dropping to EL1 is necessary I think it would make more sense as a run-time option than a compile-time option.
/*
* All processors will enter EL2 and optionally EL1.
+slave_cpu:
wfe
+#ifdef CONFIG_FSL_SMP_RELEASE_ALL
/* All cores are released from the address in the 1st spin
table
* element */
bl armv8_switch_to_el2
-#ifdef CONFIG_ARMV8_SWITCH_TO_EL1
bl armv8_switch_to_el1
ldr x1, =__spin_table
ldr x0, [x1]
+#else
ldr x0, [x11]
+#endif
cbz x0, slave_cpu
Similarly is there any reason to have the option of a single release addr if we can support unique addresses?
I think it was used by Linux for some ARM parts. I personally not a fun of using single release. But if it makes everyone happy, I can keep it.
We followed the standard ARMv8 foundation model DTS initially which along with others supported a single release address for all the cores. So, we wanted to comply to the same.
[...]
diff --git a/arch/arm/cpu/armv8/fsl-lsch3/mp.c b/arch/arm/cpu/armv8/fsl-lsch3/mp.c new file mode 100644 index 0000000..b29eda9 --- /dev/null +++ b/arch/arm/cpu/armv8/fsl-lsch3/mp.c @@ -0,0 +1,172 @@ +/*
- Copyright 2014 Freescale Semiconductor, Inc.
- SPDX-License-Identifier: GPL-2.0+
- */
+#include <common.h> +#include <asm/io.h> +#include <asm/system.h> +#include <asm/io.h> +#include <asm/arch-fsl-lsch3/immap_lsch3.h> +#include "mp.h"
+DECLARE_GLOBAL_DATA_PTR;
+void *get_spin_tbl_addr(void) +{
void *ptr = (void *)&__spin_table;
return ptr;
+}
I don't think the cast is necessary here. As far as I can see this could be just:
void *get_spin_tbl_addr(void) { return &__spin_table; }
Let me try that.
[...]
diff --git a/arch/arm/include/asm/macro.h b/arch/arm/include/asm/macro.h index f77e4b8..0009c28 100644 --- a/arch/arm/include/asm/macro.h +++ b/arch/arm/include/asm/macro.h @@ -105,6 +105,98 @@ lr .req x30 cbz \xreg1, \master_label .endm
+.macro armv8_switch_to_el2_m, xreg1
mov \xreg1, #0x5b1 /* Non-secure EL0/EL1 | HVC | 64bit EL2
*/
msr scr_el3, \xreg1
When dropping to EL1 from EL2 we disable HVC via HCR_EL2; presumably due to lack of a handler. Would it make sense to do similarly here and disable SMC here until we have a user (e.g. PSCI)?
I will let Arnab to comment here.
In my view u-boot should be similar to UEFI and both should drop only to EL2 before booting either Linux or Hypervisor. UEFI code doesn't switch to EL1 before invoking Linux. Thoughts?
York _______________________________________________ U-Boot mailing list U-Boot@lists.denx.de http://lists.denx.de/mailman/listinfo/u-boot

Hi Mark
On 08/22/2014 12:20 AM, Sharma Bhupesh-B45370 wrote:
Hi Mark,
-----Original Message----- From: u-boot-bounces@lists.denx.de [mailto:u-boot-bounces@lists.denx.de] On Behalf Of York Sun Sent: Friday, August 22, 2014 12:03 AM To: Mark Rutland; Basu Arnab-B45036 Cc: trini@ti.com; u-boot@lists.denx.de; Wood Scott-B07421 Subject: Re: [U-Boot] [Patch v2 3/5] armv8/fsl-lsch3: Release secondary cores from boot hold off with Boot Page
On 08/21/2014 06:47 AM, Mark Rutland wrote:
Hi York,
I have mostly minor comments this time; this is looking pretty good.
Thanks for taking the time to review this.
On Tue, Aug 19, 2014 at 09:28:00PM +0100, York Sun wrote:
Secondary cores need to be released from holdoff by boot release registers. With GPP bootrom, they can boot from main memory directly. Individual spin table is used for each core. If a single release address is needed, defining macro CONFIG_FSL_SMP_RELEASE_ALL will use the CPU_RELEASE_ADDR. Spin table and the boot page is reserved in device tree so OS won't overwrite.
Signed-off-by: York Sun yorksun@freescale.com Signed-off-by: Arnab Basu arnab.basu@freescale.com
v2: Removed copying boot page. Use u-boot image as is in memory. Added dealing with different size of addr_cell in device tree. Added dealing with big- and little-endian. Added flushing spin table after cpu_release().
arch/arm/cpu/armv8/fsl-lsch3/Makefile | 2 + arch/arm/cpu/armv8/fsl-lsch3/cpu.c | 13 ++ arch/arm/cpu/armv8/fsl-lsch3/cpu.h | 1 + arch/arm/cpu/armv8/fsl-lsch3/fdt.c | 56 +++++++ arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S | 128 ++++++++++++-
--
arch/arm/cpu/armv8/fsl-lsch3/mp.c | 172
+++++++++++++++++++++
arch/arm/cpu/armv8/fsl-lsch3/mp.h | 36 +++++ arch/arm/cpu/armv8/transition.S | 63 +------- arch/arm/include/asm/arch-fsl-lsch3/config.h | 3 +- arch/arm/include/asm/arch-fsl-lsch3/immap_lsch3.h | 35 +++++ arch/arm/include/asm/macro.h | 92 +++++++++++ arch/arm/lib/gic_64.S | 10 +- common/board_f.c | 2 +- 13 files changed, 525 insertions(+), 88 deletions(-) create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/fdt.c create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/mp.c create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/mp.h
diff --git a/arch/arm/cpu/armv8/fsl-lsch3/Makefile b/arch/arm/cpu/armv8/fsl-lsch3/Makefile index 9249537..f920eeb 100644 --- a/arch/arm/cpu/armv8/fsl-lsch3/Makefile +++ b/arch/arm/cpu/armv8/fsl-lsch3/Makefile @@ -7,3 +7,5 @@ obj-y += cpu.o obj-y += lowlevel.o obj-y += speed.o +obj-$(CONFIG_MP) += mp.o +obj-$(CONFIG_OF_LIBFDT) += fdt.o diff --git a/arch/arm/cpu/armv8/fsl-lsch3/cpu.c b/arch/arm/cpu/armv8/fsl-lsch3/cpu.c index c129d03..47b947f 100644 --- a/arch/arm/cpu/armv8/fsl-lsch3/cpu.c +++ b/arch/arm/cpu/armv8/fsl-lsch3/cpu.c @@ -11,6 +11,7 @@ #include <asm/io.h> #include <asm/arch-fsl-lsch3/immap_lsch3.h> #include "cpu.h" +#include "mp.h" #include "speed.h" #include <fsl_mc.h>
@@ -434,3 +435,15 @@ int cpu_eth_init(bd_t *bis) #endif return error; }
+int arch_early_init_r(void) +{
int rv;
rv = fsl_lsch3_wake_seconday_cores();
if (rv)
printf("Did not wake secondary cores\n");
return 0;
+} diff --git a/arch/arm/cpu/armv8/fsl-lsch3/cpu.h b/arch/arm/cpu/armv8/fsl-lsch3/cpu.h index 28544d7..2e3312b 100644 --- a/arch/arm/cpu/armv8/fsl-lsch3/cpu.h +++ b/arch/arm/cpu/armv8/fsl-lsch3/cpu.h @@ -5,3 +5,4 @@ */
int fsl_qoriq_core_to_cluster(unsigned int core); +u32 cpu_mask(void); diff --git a/arch/arm/cpu/armv8/fsl-lsch3/fdt.c b/arch/arm/cpu/armv8/fsl-lsch3/fdt.c new file mode 100644 index 0000000..2dbcdcb --- /dev/null +++ b/arch/arm/cpu/armv8/fsl-lsch3/fdt.c @@ -0,0 +1,56 @@ +/*
- Copyright 2014 Freescale Semiconductor, Inc.
- SPDX-License-Identifier: GPL-2.0+
- */
+#include <common.h> +#include <libfdt.h> +#include <fdt_support.h> +#include "mp.h"
+#ifdef CONFIG_MP +void ft_fixup_cpu(void *blob) +{
int off;
__maybe_unused u64 spin_tbl_addr = (u64)get_spin_tbl_addr();
fdt32_t *reg;
int addr_cells;
u64 val;
size_t *boot_code_size = &(__secondary_boot_code_size);
off = fdt_node_offset_by_prop_value(blob, -1, "device_type",
- "cpus", 4);
I didn't think /cpus had device_type = "cpus". I can't see any instances in any DTs I have to hand. Can we not find /cpus by path?
I will let Arnab to comment on this. He is coordinating with Linux device tree.
Since I contribute to the DTS for FSL ARMv8 SoC, here is my rationale behind the same. I have used the standard ARM cpu device-tree binding documentation as a reference (see [1]) which defined the device_type which it mentions should be set to cpu.
Please let me know if I am missing something.
[1] https://www.kernel.org/doc/Documentation/devicetree/bindings/arm/cpus.txt
I see the confusion here, "device_type" is required for the "cpu" node but not its container the "cpus" node.
I will change this.
of_bus_default_count_cells(blob, off, &addr_cells, NULL);
off = fdt_node_offset_by_prop_value(blob, -1, "device_type",
"cpu", 4);
while (off != -FDT_ERR_NOTFOUND) {
reg = (fdt32_t *)fdt_getprop(blob, off, "reg", 0);
if (reg) {
val = spin_tbl_addr; #ifndef
+CONFIG_FSL_SMP_RELEASE_ALL
val += id_to_core(of_read_number(reg,
addr_cells))
* SPIN_TABLE_ELEM_SIZE; #endif
val = cpu_to_fdt64(val);
fdt_setprop_string(blob, off, "enable-method",
"spin-table");
fdt_setprop(blob, off, "cpu-release-addr",
&val, sizeof(val));
} else {
puts("cpu NULL\n");
Could we change that to "Warning: found cpu node without reg property" or something like that which makes the problem clear?
Yes, we can.
}
off = fdt_node_offset_by_prop_value(blob, off,
"device_type",
"cpu", 4);
}
fdt_add_mem_rsv(blob, (uintptr_t)&secondary_boot_code,
*boot_code_size); } #endif
+void ft_cpu_setup(void *blob, bd_t *bd) { #ifdef CONFIG_MP
ft_fixup_cpu(blob);
+#endif +} diff --git a/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S b/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S index ad32b6c..629e6d4 100644 --- a/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S +++ b/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S @@ -8,7 +8,9 @@
#include <config.h> #include <linux/linkage.h> +#include <asm/gic.h> #include <asm/macro.h> +#include "mp.h"
ENTRY(lowlevel_init) mov x29, lr /* Save LR */ @@ -37,29 +39,119 @@ ENTRY(lowlevel_init)
branch_if_master x0, x1, 1f
ldr x0, =secondary_boot_func
blr x0
+1: +2:
Isn't the '2' label redundant?
We have some internal code dealing with trust zone between the 1 and 2. It is not likely to be used in long term since we are trying to move them into security monitor. I can drop label 2 here.
Yes, now that I look at it the labels in this file could do with some cleanup.
Will do.
U-boot can still be booted in EL3, as it can be well booted w/o a ATF or EL3 capable s/w running before the same. That's why we have CONFIG_EL3 and CONFIG_EL2 code legs in the u-boot ARMv8 code.
mov lr, x29 /* Restore LR */
ret
+ENDPROC(lowlevel_init)
/* Keep literals not used by the secondary boot code outside it
*/
.ltorg
/* Using 64 bit alignment since the spin table is accessed as
data */
.align 4
.global secondary_boot_code
/* Secondary Boot Code starts here */
+secondary_boot_code:
.global __spin_table
+__spin_table:
.space CONFIG_MAX_CPUS*SPIN_TABLE_ELEM_SIZE
.align 2
+ENTRY(secondary_boot_func) /*
* Slave should wait for master clearing spin table.
* This sync prevent salves observing incorrect
* value of spin table and jumping to wrong place.
* MPIDR_EL1 Fields:
* MPIDR[1:0] = AFF0_CPUID <- Core ID (0,1)
* MPIDR[7:2] = AFF0_RES
* MPIDR[15:8] = AFF1_CLUSTERID <- Cluster ID (0,1,2,3)
* MPIDR[23:16] = AFF2_CLUSTERID
* MPIDR[24] = MT
* MPIDR[29:25] = RES0
* MPIDR[30] = U
* MPIDR[31] = ME
* MPIDR[39:32] = AFF3
*
* Linear Processor ID (LPID) calculation from MPIDR_EL1:
* (We only use AFF0_CPUID and AFF1_CLUSTERID for now
* until AFF2_CLUSTERID and AFF3 have non-zero values)
*
* LPID = MPIDR[15:8] | MPIDR[1:0] */
-#if defined(CONFIG_GICV2) || defined(CONFIG_GICV3) -#ifdef CONFIG_GICV2
ldr x0, =GICC_BASE
mrs x0, mpidr_el1
ubfm x1, x0, #8, #15
ubfm x2, x0, #0, #1
orr x10, x2, x1, lsl #2 /* x10 has LPID */
ubfm x9, x0, #0, #15 /* x9 contains MPIDR[15:0] */
/*
* offset of the spin table element for this core from start of
spin
* table (each elem is padded to 64 bytes)
*/
lsl x1, x10, #6
ldr x0, =__spin_table
/* physical address of this cpus spin table element */
add x11, x1, x0
str x9, [x11, #16] /* LPID */
mov x4, #1
str x4, [x11, #8] /* STATUS */
dsb sy
+#if defined(CONFIG_GICV3)
#endifgic_wait_for_interrupt_m x0
bl gic_wait_for_interrupt
Why do we only need to wait for GICv3? We previously did so for GICv2.
I think this is leftover from the attempt to boot all cores simultaneously. I will let Arnab to comment.
There is a "#elif" that I'm sure used to be here but seems to have got lost somehow. For CONFIG_GICV2 "gic_wait_for_interrupt_m" takes 2 arguments. We should be waiting in both cases.
Will fix.
bl secondary_switch_to_el2
+#ifdef CONFIG_ARMV8_SWITCH_TO_EL1
#endifsecondary_switch_to_el1
Missing bl?
Looks so.
Is there any reason to have the SWITCH_TO_EL1 option other than for debugging?
Good question. I will let Arnab to comment here.
EL2 is the preferred EL to boot at for Linux and Xen (it gives far more flexibility), and if dropping to EL1 is necessary I think it would make more sense as a run-time option than a compile-time option.
I don't think we plan to boot Linux in EL1. This is primarily here to maintain uniformity with "arch/arm/lib/bootm.c". If I remove it from here and it was ever defined in the config, then the boot core would enter Linux in EL1 while the secondaries entered Linux in EL2. I don't know if that breaks anything...
The run-time option seems interesting and it would definitely work for the primary core which could access the u-boot env variables and such but the secondaries are executing assembly and the communication between cores is fairly primitive (sgi's and sev's etc) so this might require a little bit of work.
If you have any thoughts on how we can go about it, I would be glad to do some research, but that seems to be the topic for a separate patchset I guess.
/*
* All processors will enter EL2 and optionally EL1.
+slave_cpu:
wfe
+#ifdef CONFIG_FSL_SMP_RELEASE_ALL
/* All cores are released from the address in the 1st spin
table
* element */
bl armv8_switch_to_el2
-#ifdef CONFIG_ARMV8_SWITCH_TO_EL1
bl armv8_switch_to_el1
ldr x1, =__spin_table
ldr x0, [x1]
+#else
ldr x0, [x11]
+#endif
cbz x0, slave_cpu
Similarly is there any reason to have the option of a single release addr if we can support unique addresses?
I think it was used by Linux for some ARM parts. I personally not a fun of using single release. But if it makes everyone happy, I can keep it.
We followed the standard ARMv8 foundation model DTS initially which along with others supported a single release address for all the cores. So, we wanted to comply to the same.
Yes this is left over code which should (and will) be cleaned up.
[...]
diff --git a/arch/arm/cpu/armv8/fsl-lsch3/mp.c b/arch/arm/cpu/armv8/fsl-lsch3/mp.c new file mode 100644 index 0000000..b29eda9 --- /dev/null +++ b/arch/arm/cpu/armv8/fsl-lsch3/mp.c @@ -0,0 +1,172 @@ +/*
- Copyright 2014 Freescale Semiconductor, Inc.
- SPDX-License-Identifier: GPL-2.0+
- */
+#include <common.h> +#include <asm/io.h> +#include <asm/system.h> +#include <asm/io.h> +#include <asm/arch-fsl-lsch3/immap_lsch3.h> +#include "mp.h"
+DECLARE_GLOBAL_DATA_PTR;
+void *get_spin_tbl_addr(void) +{
void *ptr = (void *)&__spin_table;
return ptr;
+}
I don't think the cast is necessary here. As far as I can see this could be just:
void *get_spin_tbl_addr(void) { return &__spin_table; }
Let me try that.
[...]
diff --git a/arch/arm/include/asm/macro.h b/arch/arm/include/asm/macro.h index f77e4b8..0009c28 100644 --- a/arch/arm/include/asm/macro.h +++ b/arch/arm/include/asm/macro.h @@ -105,6 +105,98 @@ lr .req x30 cbz \xreg1, \master_label .endm
+.macro armv8_switch_to_el2_m, xreg1
mov \xreg1, #0x5b1 /* Non-secure EL0/EL1 | HVC | 64bit EL2
*/
msr scr_el3, \xreg1
When dropping to EL1 from EL2 we disable HVC via HCR_EL2; presumably due to lack of a handler. Would it make sense to do similarly here and disable SMC here until we have a user (e.g. PSCI)?
I will let Arnab to comment here.
SMC's are disabled (we are setting bit 7, the SMD bit). The comment does not capture this. I'll fix it.
Thanks Arnab
In my view u-boot should be similar to UEFI and both should drop only to EL2 before booting either Linux or Hypervisor. UEFI code doesn't switch to EL1 before invoking Linux. Thoughts?
York _______________________________________________ U-Boot mailing list U-Boot@lists.denx.de http://lists.denx.de/mailman/listinfo/u-boot

Hi Arnab,
[...]
Is there any reason to have the SWITCH_TO_EL1 option other than for debugging?
Good question. I will let Arnab to comment here.
EL2 is the preferred EL to boot at for Linux and Xen (it gives far more flexibility), and if dropping to EL1 is necessary I think it would make more sense as a run-time option than a compile-time option.
I don't think we plan to boot Linux in EL1. This is primarily here to maintain uniformity with "arch/arm/lib/bootm.c". If I remove it from here and it was ever defined in the config, then the boot core would enter Linux in EL1 while the secondaries entered Linux in EL2. I don't know if that breaks anything...
Linux will be very unhappy, we require that CPUs enter the kernel in a consistent mode. So keeping the CPUs in the same mode is the most important thing for now.
I don't see a reason other than debugging to boot any CPU at EL1N if EL2 is present (so I'm not keen on the CONFIG_ARMV8_SWITCH_TO_EL1 option at all). The ideal case would be to always drop to the highest privileged non-secure mode the CPU supports (i.e. EL2 if present, EL1N otherwise).
If there's an OS that requires EL1N rather than EL2 then I think that should be the special case (so users get the option of EL2 features by default, and the OS has more flexibility to fix things up at EL2 if necessary).
The run-time option seems interesting and it would definitely work for the primary core which could access the u-boot env variables and such but the secondaries are executing assembly and the communication between cores is fairly primitive (sgi's and sev's etc) so this might require a little bit of work.
If you have any thoughts on how we can go about it, I would be glad to do some research, but that seems to be the topic for a separate patchset I guess.
I guess if secondaries were first dropped into a spin-table at EL2 you could boot them into a shim that did something like:
- Reset the cpu-release-addr for the CPU - Configure EL1 - Drop to EL1 - Return to the spin-table
So all that would be necessary is the sev and the usual polling to see that the CPU has responded to the cpu-release-addr being written.
As you say, that's a topic for a separate patchset.
/*
* All processors will enter EL2 and optionally EL1.
+slave_cpu:
wfe
+#ifdef CONFIG_FSL_SMP_RELEASE_ALL
/* All cores are released from the address in the 1st spin
table
* element */
bl armv8_switch_to_el2
-#ifdef CONFIG_ARMV8_SWITCH_TO_EL1
bl armv8_switch_to_el1
ldr x1, =__spin_table
ldr x0, [x1]
+#else
ldr x0, [x11]
+#endif
cbz x0, slave_cpu
Similarly is there any reason to have the option of a single release addr if we can support unique addresses?
I think it was used by Linux for some ARM parts. I personally not a fun of using single release. But if it makes everyone happy, I can keep it.
We followed the standard ARMv8 foundation model DTS initially which along with others supported a single release address for all the cores. So, we wanted to comply to the same.
Yes this is left over code which should (and will) be cleaned up.
The foundation model DTS is unfortunately not a good example, and it's too late to change it due to existing users.
Linux has supported unique cpu release addresses since the start of the arm64 port, and it's the preferred implementation. The bootwrapper was a quick hack to get things booting rather than a reference bootloader. It does plenty of things wrong that I would like to fix.
I realise that this is not made clear at the moment. Putting together a document with guidance for bootloaders has been on my TODO list, but unfortunately it is at the far end.
[...]
diff --git a/arch/arm/include/asm/macro.h b/arch/arm/include/asm/macro.h index f77e4b8..0009c28 100644 --- a/arch/arm/include/asm/macro.h +++ b/arch/arm/include/asm/macro.h @@ -105,6 +105,98 @@ lr .req x30 cbz \xreg1, \master_label .endm
+.macro armv8_switch_to_el2_m, xreg1
mov \xreg1, #0x5b1 /* Non-secure EL0/EL1 | HVC | 64bit EL2
*/
msr scr_el3, \xreg1
When dropping to EL1 from EL2 we disable HVC via HCR_EL2; presumably due to lack of a handler. Would it make sense to do similarly here and disable SMC here until we have a user (e.g. PSCI)?
I will let Arnab to comment here.
SMC's are disabled (we are setting bit 7, the SMD bit). The comment does not capture this. I'll fix it.
Ah, sorry. I attempted to review the hex values manually but evidently I got confused here. I've just gone over the value here (0x5b1 for SCR_EL3) with a piece of paper and an ARM ARM to hand, and it looks sane to me.
The comment update would be nice regardless.
Thanks, Mark.

Hi Bhupesh,
[...]
diff --git a/arch/arm/cpu/armv8/fsl-lsch3/fdt.c b/arch/arm/cpu/armv8/fsl-lsch3/fdt.c new file mode 100644 index 0000000..2dbcdcb --- /dev/null +++ b/arch/arm/cpu/armv8/fsl-lsch3/fdt.c @@ -0,0 +1,56 @@ +/*
- Copyright 2014 Freescale Semiconductor, Inc.
- SPDX-License-Identifier: GPL-2.0+
- */
+#include <common.h> +#include <libfdt.h> +#include <fdt_support.h> +#include "mp.h"
+#ifdef CONFIG_MP +void ft_fixup_cpu(void *blob) +{
int off;
__maybe_unused u64 spin_tbl_addr = (u64)get_spin_tbl_addr();
fdt32_t *reg;
int addr_cells;
u64 val;
size_t *boot_code_size = &(__secondary_boot_code_size);
off = fdt_node_offset_by_prop_value(blob, -1, "device_type",
- "cpus", 4);
I didn't think /cpus had device_type = "cpus". I can't see any instances in any DTs I have to hand. Can we not find /cpus by path?
I will let Arnab to comment on this. He is coordinating with Linux device tree.
Since I contribute to the DTS for FSL ARMv8 SoC, here is my rationale behind the same. I have used the standard ARM cpu device-tree binding documentation as a reference (see [1]) which defined the device_type which it mentions should be set to cpu.
Please let me know if I am missing something.
[1] https://www.kernel.org/doc/Documentation/devicetree/bindings/arm/cpus.txt
Hi. As Arnab replied, it's only the CPU nodes themselves (e.g. /cpus/cpu@0) that have device_type = "cpu". The /cpus node does not have a device_type at all.
The /cpus node can always be found by path however, as the name is special.
[...]
--- a/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S +++ b/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S @@ -8,7 +8,9 @@
#include <config.h> #include <linux/linkage.h> +#include <asm/gic.h> #include <asm/macro.h> +#include "mp.h"
ENTRY(lowlevel_init) mov x29, lr /* Save LR */ @@ -37,29 +39,119 @@ ENTRY(lowlevel_init)
branch_if_master x0, x1, 1f
ldr x0, =secondary_boot_func
blr x0
+1: +2:
Isn't the '2' label redundant?
We have some internal code dealing with trust zone between the 1 and 2. It is not likely to be used in long term since we are trying to move them into security monitor. I can drop label 2 here.
U-boot can still be booted in EL3, as it can be well booted w/o a ATF or EL3 capable s/w running before the same. That's why we have CONFIG_EL3 and CONFIG_EL2 code legs in the u-boot ARMv8 code.
Ok. I was only confused by the fact the label didn't seem to be used anywhere, and it sounds like it can be dropped for now?
[...]
/*
* All processors will enter EL2 and optionally EL1.
+slave_cpu:
wfe
+#ifdef CONFIG_FSL_SMP_RELEASE_ALL
/* All cores are released from the address in the 1st spin
table
* element */
bl armv8_switch_to_el2
-#ifdef CONFIG_ARMV8_SWITCH_TO_EL1
bl armv8_switch_to_el1
ldr x1, =__spin_table
ldr x0, [x1]
+#else
ldr x0, [x11]
+#endif
cbz x0, slave_cpu
Similarly is there any reason to have the option of a single release addr if we can support unique addresses?
I think it was used by Linux for some ARM parts. I personally not a fun of using single release. But if it makes everyone happy, I can keep it.
We followed the standard ARMv8 foundation model DTS initially which along with others supported a single release address for all the cores. So, we wanted to comply to the same.
As I mentioned elsewhere, the existing DTS aren't good examples. The fact that isn't clear is a problem on the Linux side, and I'm sorry that they have misled you.
Using unique addresses is preferred. Sharing a single address should be discouraged.
[...]
diff --git a/arch/arm/include/asm/macro.h b/arch/arm/include/asm/macro.h index f77e4b8..0009c28 100644 --- a/arch/arm/include/asm/macro.h +++ b/arch/arm/include/asm/macro.h @@ -105,6 +105,98 @@ lr .req x30 cbz \xreg1, \master_label .endm
+.macro armv8_switch_to_el2_m, xreg1
mov \xreg1, #0x5b1 /* Non-secure EL0/EL1 | HVC | 64bit EL2
*/
msr scr_el3, \xreg1
When dropping to EL1 from EL2 we disable HVC via HCR_EL2; presumably due to lack of a handler. Would it make sense to do similarly here and disable SMC here until we have a user (e.g. PSCI)?
I will let Arnab to comment here.
In my view u-boot should be similar to UEFI and both should drop only to EL2 before booting either Linux or Hypervisor. UEFI code doesn't switch to EL1 before invoking Linux. Thoughts?
That sounds ideal to me.
In general I'd hope any bootloader would hand-off to the OS (Linux, Xen, whatever) at the highest-privileged non-secure mode. So EL2 if present, otherwise EL1N.
We don't have any legacy to worry about with old kernels that cannot boot at EL1N, so I don't see a good reason to drop to EL1N if EL2 is available.
If the option of booting at EL1 becomes necessary for some piece of software, I'd rather that were a run-time option and that piece of software were treated as the special case.
Thanks, Mark.

Hi York,
/*
* All processors will enter EL2 and optionally EL1.
+slave_cpu:
wfe
+#ifdef CONFIG_FSL_SMP_RELEASE_ALL
/* All cores are released from the address in the 1st spin table
* element */
bl armv8_switch_to_el2
-#ifdef CONFIG_ARMV8_SWITCH_TO_EL1
bl armv8_switch_to_el1
ldr x1, =__spin_table
ldr x0, [x1]
+#else
ldr x0, [x11]
+#endif
cbz x0, slave_cpu
Similarly is there any reason to have the option of a single release addr if we can support unique addresses?
I think it was used by Linux for some ARM parts. I personally not a fun of using single release.
That makes two of us. The single release address on those ARM dts is a legacy mistake that we can't fix up without breaking some models. We don't need to propagate that mistake to new platforms.
But if it makes everyone happy, I can keep it.
I'd be happier with CONFIG_FSL_SMP_RELEASE_ALL dropped entirely. Ideally U-Boot would always provide a unique cpu-release-address for each CPU.
Thanks, Mark.

Spin table is at the very beginning of boot code. Each core has an individual release address within the spin table, the ft_cpu_setup fn updates the "cpu-release-addr" property of each cpu node with the corresponding release address.
Also fix CPU_RELEASE_ADDR to point to secondary_boot_func.
Signed-off-by: York Sun yorksun@freescale.com Signed-off-by: Arnab Basu arnab.basu@freescale.com --- Change log v2: Use individual spin table for each core by default
board/freescale/ls2085a/ls2085a.c | 2 ++ include/configs/ls2085a_common.h | 9 ++++++--- 2 files changed, 8 insertions(+), 3 deletions(-)
diff --git a/board/freescale/ls2085a/ls2085a.c b/board/freescale/ls2085a/ls2085a.c index a18db1d..3daa787 100644 --- a/board/freescale/ls2085a/ls2085a.c +++ b/board/freescale/ls2085a/ls2085a.c @@ -88,6 +88,8 @@ void ft_board_setup(void *blob, bd_t *bd) phys_addr_t base; phys_size_t size;
+ ft_cpu_setup(blob, bd); + /* limit the memory size to bank 1 until Linux can handle 40-bit PA */ base = getenv_bootm_low(); size = getenv_bootm_size(); diff --git a/include/configs/ls2085a_common.h b/include/configs/ls2085a_common.h index 2bd5a47..4c258ca 100644 --- a/include/configs/ls2085a_common.h +++ b/include/configs/ls2085a_common.h @@ -45,14 +45,16 @@
#define CONFIG_SYS_FSL_DDR_INTLV_256B /* force 256 byte interleaving */
-/* SMP Definitions */ -#define CPU_RELEASE_ADDR CONFIG_SYS_INIT_SP_ADDR - #define CONFIG_SYS_DDR_SDRAM_BASE 0x80000000UL #define CONFIG_SYS_FSL_DDR_SDRAM_BASE_PHY 0 #define CONFIG_SYS_SDRAM_BASE CONFIG_SYS_DDR_SDRAM_BASE #define CONFIG_SYS_DDR_BLOCK2_BASE 0x8080000000ULL
+/* + * SMP Definitions + */ +#define CPU_RELEASE_ADDR secondary_boot_func + /* Generic Timer Definitions */ #define COUNTER_FREQUENCY 12000000 /* 12MHz */
@@ -167,6 +169,7 @@
/* Miscellaneous configurable options */ #define CONFIG_SYS_LOAD_ADDR (CONFIG_SYS_DDR_SDRAM_BASE + 0x10000000) +#define CONFIG_ARCH_EARLY_INIT_R
/* Physical Memory Map */ /* fixme: these need to be checked against the board */

When booting with SP, RCW resides at the beginning of IFC NOR flash.
Signed-off-by: York Sun yorksun@freescale.com --- Change log v2: no change
include/configs/ls2085a_common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/configs/ls2085a_common.h b/include/configs/ls2085a_common.h index 4c258ca..83c3d62 100644 --- a/include/configs/ls2085a_common.h +++ b/include/configs/ls2085a_common.h @@ -15,7 +15,7 @@ #define CONFIG_GICV3
/* Link Definitions */ -#define CONFIG_SYS_TEXT_BASE 0x30000000 +#define CONFIG_SYS_TEXT_BASE 0x30001000
#define CONFIG_SYS_NO_FLASH
participants (4)
-
Arnab Basu
-
bhupesh.sharmaï¼ freescale.com
-
Mark Rutland
-
York Sun