[U-Boot] [PATCH 0/9] Add PSCI support for Jetson TK1/Tegra124

I've picked up Ian's and Thierry's work on PSCI support for Tegra124, specifically the Jetson TK1 board. This series contains Ian's patches and a rewritten version of the PSCI core as suggested by Thierry. It's working fine on the TK1, allowing Linux to online/offline CPUs 1-3 as it likes (provided CONFIG_CPU_IDLE is off).
Patches are also available here:
https://github.com/siemens/u-boot/tree/jetson-tk1
Looking forward to review.
Jan
CC: Ian Campbell ijc@hellion.org.uk CC: Jan Kiszka jan.kiszka@siemens.com
Ian Campbell (4): tegra124: Add more registers to struct mc_ctlr virt-dt: Allow reservation of the secure region when it is in a RAM carveout. jetson-tk1: Add PSCI configuration options and reserve secure code tegra124: Reserve secure RAM using MC_SECURITY_CFG{0, 1}_0
Jan Kiszka (5): ARM: Factor out reusable psci_cpu_off_common ARM: Factor out reusable psci_cpu_entry tegra: Make tegra_powergate_power_on public tegra: Add ap_pm_init hook tegra124: Add PSCI support for Tegra124
arch/arm/cpu/armv7/Makefile | 1 + arch/arm/cpu/armv7/psci.S | 90 +++++++++++++++++++++++++ arch/arm/cpu/armv7/sunxi/psci.S | 84 +---------------------- arch/arm/cpu/armv7/tegra-common/Makefile | 1 + arch/arm/cpu/armv7/tegra-common/psci.S | 101 ++++++++++++++++++++++++++++ arch/arm/cpu/armv7/tegra124/Kconfig | 2 + arch/arm/cpu/armv7/tegra124/Makefile | 7 ++ arch/arm/cpu/armv7/tegra124/ap.c | 44 ++++++++++++ arch/arm/cpu/armv7/virt-dt.c | 5 ++ arch/arm/cpu/armv7/virt-v7.c | 5 ++ arch/arm/cpu/tegra-common/ap.c | 15 +++++ arch/arm/cpu/tegra-common/powergate.c | 2 +- arch/arm/include/asm/arch-tegra/ap.h | 5 ++ arch/arm/include/asm/arch-tegra/powergate.h | 1 + arch/arm/include/asm/arch-tegra124/flow.h | 5 ++ arch/arm/include/asm/arch-tegra124/mc.h | 35 +++++++++- arch/arm/include/asm/system.h | 1 + board/nvidia/common/board.c | 4 ++ include/configs/jetson-tk1.h | 5 ++ 19 files changed, 329 insertions(+), 84 deletions(-) create mode 100644 arch/arm/cpu/armv7/tegra-common/psci.S create mode 100644 arch/arm/cpu/armv7/tegra124/Makefile create mode 100644 arch/arm/cpu/armv7/tegra124/ap.c

From: Jan Kiszka jan.kiszka@siemens.com
Move parts of sunxi's psci_cpu_off into psci_cpu_off_common, namely cache disabling and flushing, clrex and the disabling of SMP for the dying CPU. These steps are apparently generic for ARMv7 and will be reused for Tegra124 support.
Signed-off-by: Jan Kiszka jan.kiszka@siemens.com --- arch/arm/cpu/armv7/psci.S | 71 +++++++++++++++++++++++++++++++++++++++++ arch/arm/cpu/armv7/sunxi/psci.S | 63 +----------------------------------- 2 files changed, 72 insertions(+), 62 deletions(-)
diff --git a/arch/arm/cpu/armv7/psci.S b/arch/arm/cpu/armv7/psci.S index bf11a34..d688607 100644 --- a/arch/arm/cpu/armv7/psci.S +++ b/arch/arm/cpu/armv7/psci.S @@ -99,4 +99,75 @@ _smc_psci: pop {r4-r7, lr} movs pc, lr @ Return to the kernel
+/* Imported from Linux kernel */ +LENTRY(v7_flush_dcache_all) + dmb @ ensure ordering with previous memory accesses + mrc p15, 1, r0, c0, c0, 1 @ read clidr + ands r3, r0, #0x7000000 @ extract loc from clidr + mov r3, r3, lsr #23 @ left align loc bit field + beq finished @ if loc is 0, then no need to clean + mov r10, #0 @ start clean at cache level 0 +flush_levels: + add r2, r10, r10, lsr #1 @ work out 3x current cache level + mov r1, r0, lsr r2 @ extract cache type bits from clidr + and r1, r1, #7 @ mask of the bits for current cache only + cmp r1, #2 @ see what cache we have at this level + blt skip @ skip if no cache, or just i-cache + mrs r9, cpsr @ make cssr&csidr read atomic + mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr + isb @ isb to sych the new cssr&csidr + mrc p15, 1, r1, c0, c0, 0 @ read the new csidr + msr cpsr_c, r9 + and r2, r1, #7 @ extract the length of the cache lines + add r2, r2, #4 @ add 4 (line length offset) + ldr r4, =0x3ff + ands r4, r4, r1, lsr #3 @ find maximum number on the way size + clz r5, r4 @ find bit position of way size increment + ldr r7, =0x7fff + ands r7, r7, r1, lsr #13 @ extract max number of the index size +loop1: + mov r9, r7 @ create working copy of max index +loop2: + orr r11, r10, r4, lsl r5 @ factor way and cache number into r11 + orr r11, r11, r9, lsl r2 @ factor index number into r11 + mcr p15, 0, r11, c7, c14, 2 @ clean & invalidate by set/way + subs r9, r9, #1 @ decrement the index + bge loop2 + subs r4, r4, #1 @ decrement the way + bge loop1 +skip: + add r10, r10, #2 @ increment cache number + cmp r3, r10 + bgt flush_levels +finished: + mov r10, #0 @ swith back to cache level 0 + mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr + dsb st + isb + bx lr +ENDPROC(v7_flush_dcache_all) + +ENTRY(psci_cpu_off_common) + push {lr} + + mrc p15, 0, r0, c1, c0, 0 @ SCTLR + bic r0, r0, #(1 << 2) @ Clear C bit + mcr p15, 0, r0, c1, c0, 0 @ SCTLR + isb + dsb + + bl v7_flush_dcache_all + + clrex @ Why??? + + mrc p15, 0, r0, c1, c0, 1 @ ACTLR + bic r0, r0, #(1 << 6) @ Clear SMP bit + mcr p15, 0, r0, c1, c0, 1 @ ACTLR + isb + dsb + + pop {lr} + bx lr +ENDPROC(psci_cpu_off_common) + .popsection diff --git a/arch/arm/cpu/armv7/sunxi/psci.S b/arch/arm/cpu/armv7/sunxi/psci.S index 5be497b..6785fdd 100644 --- a/arch/arm/cpu/armv7/sunxi/psci.S +++ b/arch/arm/cpu/armv7/sunxi/psci.S @@ -199,53 +199,6 @@ psci_cpu_on: _target_pc: .word 0
-/* Imported from Linux kernel */ -v7_flush_dcache_all: - dmb @ ensure ordering with previous memory accesses - mrc p15, 1, r0, c0, c0, 1 @ read clidr - ands r3, r0, #0x7000000 @ extract loc from clidr - mov r3, r3, lsr #23 @ left align loc bit field - beq finished @ if loc is 0, then no need to clean - mov r10, #0 @ start clean at cache level 0 -flush_levels: - add r2, r10, r10, lsr #1 @ work out 3x current cache level - mov r1, r0, lsr r2 @ extract cache type bits from clidr - and r1, r1, #7 @ mask of the bits for current cache only - cmp r1, #2 @ see what cache we have at this level - blt skip @ skip if no cache, or just i-cache - mrs r9, cpsr @ make cssr&csidr read atomic - mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr - isb @ isb to sych the new cssr&csidr - mrc p15, 1, r1, c0, c0, 0 @ read the new csidr - msr cpsr_c, r9 - and r2, r1, #7 @ extract the length of the cache lines - add r2, r2, #4 @ add 4 (line length offset) - ldr r4, =0x3ff - ands r4, r4, r1, lsr #3 @ find maximum number on the way size - clz r5, r4 @ find bit position of way size increment - ldr r7, =0x7fff - ands r7, r7, r1, lsr #13 @ extract max number of the index size -loop1: - mov r9, r7 @ create working copy of max index -loop2: - orr r11, r10, r4, lsl r5 @ factor way and cache number into r11 - orr r11, r11, r9, lsl r2 @ factor index number into r11 - mcr p15, 0, r11, c7, c14, 2 @ clean & invalidate by set/way - subs r9, r9, #1 @ decrement the index - bge loop2 - subs r4, r4, #1 @ decrement the way - bge loop1 -skip: - add r10, r10, #2 @ increment cache number - cmp r3, r10 - bgt flush_levels -finished: - mov r10, #0 @ swith back to cache level 0 - mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr - dsb st - isb - bx lr - _sunxi_cpu_entry: @ Set SMP bit mrc p15, 0, r0, c1, c0, 1 @@ -262,21 +215,7 @@ _sunxi_cpu_entry:
.globl psci_cpu_off psci_cpu_off: - mrc p15, 0, r0, c1, c0, 0 @ SCTLR - bic r0, r0, #(1 << 2) @ Clear C bit - mcr p15, 0, r0, c1, c0, 0 @ SCTLR - isb - dsb - - bl v7_flush_dcache_all - - clrex @ Why??? - - mrc p15, 0, r0, c1, c0, 1 @ ACTLR - bic r0, r0, #(1 << 6) @ Clear SMP bit - mcr p15, 0, r0, c1, c0, 1 @ ACTLR - isb - dsb + bl psci_cpu_off_common
@ Ask CPU0 to pull the rug... movw r0, #(GICD_BASE & 0xffff)

From: Jan Kiszka jan.kiszka@siemens.com
_sunxi_cpu_entry can be converted completely into a reusable psci_cpu_entry. Tegra124 will use it as well.
Signed-off-by: Jan Kiszka jan.kiszka@siemens.com --- arch/arm/cpu/armv7/psci.S | 19 +++++++++++++++++++ arch/arm/cpu/armv7/sunxi/psci.S | 21 ++------------------- 2 files changed, 21 insertions(+), 19 deletions(-)
diff --git a/arch/arm/cpu/armv7/psci.S b/arch/arm/cpu/armv7/psci.S index d688607..e916d71 100644 --- a/arch/arm/cpu/armv7/psci.S +++ b/arch/arm/cpu/armv7/psci.S @@ -170,4 +170,23 @@ ENTRY(psci_cpu_off_common) bx lr ENDPROC(psci_cpu_off_common)
+ENTRY(psci_cpu_entry) + @ Set SMP bit + mrc p15, 0, r0, c1, c0, 1 @ ACTLR + orr r0, r0, #(1 << 6) @ Set SMP bit + mcr p15, 0, r0, c1, c0, 1 @ ACTLR + isb + + bl _nonsec_init + bl psci_arch_init + + adr r0, _psci_target_pc + ldr r0, [r0] + b _do_nonsec_entry +ENDPROC(psci_cpu_entry) + +.globl _psci_target_pc +_psci_target_pc: + .word 0 + .popsection diff --git a/arch/arm/cpu/armv7/sunxi/psci.S b/arch/arm/cpu/armv7/sunxi/psci.S index 6785fdd..c3a8dc1 100644 --- a/arch/arm/cpu/armv7/sunxi/psci.S +++ b/arch/arm/cpu/armv7/sunxi/psci.S @@ -138,7 +138,7 @@ out: mcr p15, 0, r7, c1, c1, 0 @ r2 = target PC .globl psci_cpu_on psci_cpu_on: - adr r0, _target_pc + ldr r0, =_psci_target_pc str r2, [r0] dsb
@@ -150,7 +150,7 @@ psci_cpu_on: mov r4, #1 lsl r4, r4, r1
- adr r6, _sunxi_cpu_entry + ldr r6, =psci_cpu_entry str r6, [r0, #0x1a4] @ PRIVATE_REG (boot vector)
@ Assert reset on target CPU @@ -196,23 +196,6 @@ psci_cpu_on: mov r0, #ARM_PSCI_RET_SUCCESS @ Return PSCI_RET_SUCCESS mov pc, lr
-_target_pc: - .word 0 - -_sunxi_cpu_entry: - @ Set SMP bit - mrc p15, 0, r0, c1, c0, 1 - orr r0, r0, #0x40 - mcr p15, 0, r0, c1, c0, 1 - isb - - bl _nonsec_init - bl psci_arch_init - - adr r0, _target_pc - ldr r0, [r0] - b _do_nonsec_entry - .globl psci_cpu_off psci_cpu_off: bl psci_cpu_off_common

Hi,
On Sun, Feb 15, 2015 at 5:28 AM, Jan Kiszka jan.kiszka@web.de wrote:
From: Jan Kiszka jan.kiszka@siemens.com
_sunxi_cpu_entry can be converted completely into a reusable psci_cpu_entry. Tegra124 will use it as well.
Signed-off-by: Jan Kiszka jan.kiszka@siemens.com
arch/arm/cpu/armv7/psci.S | 19 +++++++++++++++++++ arch/arm/cpu/armv7/sunxi/psci.S | 21 ++------------------- 2 files changed, 21 insertions(+), 19 deletions(-)
diff --git a/arch/arm/cpu/armv7/psci.S b/arch/arm/cpu/armv7/psci.S index d688607..e916d71 100644 --- a/arch/arm/cpu/armv7/psci.S +++ b/arch/arm/cpu/armv7/psci.S @@ -170,4 +170,23 @@ ENTRY(psci_cpu_off_common) bx lr ENDPROC(psci_cpu_off_common)
+ENTRY(psci_cpu_entry)
@ Set SMP bit
mrc p15, 0, r0, c1, c0, 1 @ ACTLR
orr r0, r0, #(1 << 6) @ Set SMP bit
mcr p15, 0, r0, c1, c0, 1 @ ACTLR
isb
bl _nonsec_init
bl psci_arch_init
adr r0, _psci_target_pc
ldr r0, [r0]
b _do_nonsec_entry
+ENDPROC(psci_cpu_entry)
+.globl _psci_target_pc +_psci_target_pc:
.word 0
The sunxi version didn't have a per-core target_pc variable. It is still the case here. Is this the correct way to implement it? I see per-core storage of this in some of the kernel's smp ops.
On sunxi it works because the only platform using it only has one secondary core.
ChenYu
.popsection
diff --git a/arch/arm/cpu/armv7/sunxi/psci.S b/arch/arm/cpu/armv7/sunxi/psci.S index 6785fdd..c3a8dc1 100644 --- a/arch/arm/cpu/armv7/sunxi/psci.S +++ b/arch/arm/cpu/armv7/sunxi/psci.S @@ -138,7 +138,7 @@ out: mcr p15, 0, r7, c1, c1, 0 @ r2 = target PC .globl psci_cpu_on psci_cpu_on:
adr r0, _target_pc
ldr r0, =_psci_target_pc str r2, [r0] dsb
@@ -150,7 +150,7 @@ psci_cpu_on: mov r4, #1 lsl r4, r4, r1
adr r6, _sunxi_cpu_entry
ldr r6, =psci_cpu_entry str r6, [r0, #0x1a4] @ PRIVATE_REG (boot vector) @ Assert reset on target CPU
@@ -196,23 +196,6 @@ psci_cpu_on: mov r0, #ARM_PSCI_RET_SUCCESS @ Return PSCI_RET_SUCCESS mov pc, lr
-_target_pc:
.word 0
-_sunxi_cpu_entry:
@ Set SMP bit
mrc p15, 0, r0, c1, c0, 1
orr r0, r0, #0x40
mcr p15, 0, r0, c1, c0, 1
isb
bl _nonsec_init
bl psci_arch_init
adr r0, _target_pc
ldr r0, [r0]
b _do_nonsec_entry
.globl psci_cpu_off psci_cpu_off: bl psci_cpu_off_common -- 2.1.4
U-Boot mailing list U-Boot@lists.denx.de http://lists.denx.de/mailman/listinfo/u-boot

On 2015-02-15 03:01, Chen-Yu Tsai wrote:
Hi,
On Sun, Feb 15, 2015 at 5:28 AM, Jan Kiszka jan.kiszka@web.de wrote:
From: Jan Kiszka jan.kiszka@siemens.com
_sunxi_cpu_entry can be converted completely into a reusable psci_cpu_entry. Tegra124 will use it as well.
Signed-off-by: Jan Kiszka jan.kiszka@siemens.com
arch/arm/cpu/armv7/psci.S | 19 +++++++++++++++++++ arch/arm/cpu/armv7/sunxi/psci.S | 21 ++------------------- 2 files changed, 21 insertions(+), 19 deletions(-)
diff --git a/arch/arm/cpu/armv7/psci.S b/arch/arm/cpu/armv7/psci.S index d688607..e916d71 100644 --- a/arch/arm/cpu/armv7/psci.S +++ b/arch/arm/cpu/armv7/psci.S @@ -170,4 +170,23 @@ ENTRY(psci_cpu_off_common) bx lr ENDPROC(psci_cpu_off_common)
+ENTRY(psci_cpu_entry)
@ Set SMP bit
mrc p15, 0, r0, c1, c0, 1 @ ACTLR
orr r0, r0, #(1 << 6) @ Set SMP bit
mcr p15, 0, r0, c1, c0, 1 @ ACTLR
isb
bl _nonsec_init
bl psci_arch_init
adr r0, _psci_target_pc
ldr r0, [r0]
b _do_nonsec_entry
+ENDPROC(psci_cpu_entry)
+.globl _psci_target_pc +_psci_target_pc:
.word 0
The sunxi version didn't have a per-core target_pc variable. It is still the case here. Is this the correct way to implement it? I see per-core storage of this in some of the kernel's smp ops.
On sunxi it works because the only platform using it only has one secondary core.
With homogeneous SMP, it probably works as well because reset vectors may not differ across the cores. But this remains a valid point.
I'm considering to push this variable to the top of the per-CPU stack. Calculating the stack position is actually another function to factor out.
Thanks, Jan

-----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1
On 2015-02-15 07:29, Jan Kiszka wrote:
On 2015-02-15 03:01, Chen-Yu Tsai wrote:
Hi,
On Sun, Feb 15, 2015 at 5:28 AM, Jan Kiszka jan.kiszka@web.de wrote:
From: Jan Kiszka jan.kiszka@siemens.com
_sunxi_cpu_entry can be converted completely into a reusable psci_cpu_entry. Tegra124 will use it as well.
Signed-off-by: Jan Kiszka jan.kiszka@siemens.com --- arch/arm/cpu/armv7/psci.S | 19 +++++++++++++++++++ arch/arm/cpu/armv7/sunxi/psci.S | 21 ++------------------- 2 files changed, 21 insertions(+), 19 deletions(-)
diff --git a/arch/arm/cpu/armv7/psci.S b/arch/arm/cpu/armv7/psci.S index d688607..e916d71 100644 --- a/arch/arm/cpu/armv7/psci.S +++ b/arch/arm/cpu/armv7/psci.S @@ -170,4 +170,23 @@ ENTRY(psci_cpu_off_common) bx lr ENDPROC(psci_cpu_off_common)
+ENTRY(psci_cpu_entry) + @ Set SMP bit + mrc p15, 0, r0, c1, c0, 1 @ ACTLR + orr r0, r0, #(1 << 6) @ Set SMP bit + mcr p15, 0, r0, c1, c0, 1 @ ACTLR + isb + + bl _nonsec_init + bl psci_arch_init + + adr r0, _psci_target_pc + ldr r0, [r0] + b _do_nonsec_entry +ENDPROC(psci_cpu_entry) + +.globl _psci_target_pc +_psci_target_pc: + .word 0
The sunxi version didn't have a per-core target_pc variable. It is still the case here. Is this the correct way to implement it? I see per-core storage of this in some of the kernel's smp ops.
On sunxi it works because the only platform using it only has one secondary core.
With homogeneous SMP, it probably works as well because reset vectors may not differ across the cores. But this remains a valid point.
I'm considering to push this variable to the top of the per-CPU stack. Calculating the stack position is actually another function to factor out.
https://github.com/siemens/u-boot/commits/jetson-tk1-v2
works fine on the TK1, but I'd like to give it a try on a Banana Pi as well (currently out of reach) before reposting.
Jan

From: Ian Campbell ijc@hellion.org.uk
I will need mc_security_cfg0/1 in a future patch and I added the rest while debugging, so thought I might as well commit them.
Signed-off-by: Ian Campbell ijc@hellion.org.uk Signed-off-by: Jan Kiszka jan.kiszka@siemens.com --- arch/arm/include/asm/arch-tegra124/mc.h | 35 +++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-)
diff --git a/arch/arm/include/asm/arch-tegra124/mc.h b/arch/arm/include/asm/arch-tegra124/mc.h index d526dfe..5557732 100644 --- a/arch/arm/include/asm/arch-tegra124/mc.h +++ b/arch/arm/include/asm/arch-tegra124/mc.h @@ -35,9 +35,40 @@ struct mc_ctlr { u32 mc_emem_adr_cfg; /* offset 0x54 */ u32 mc_emem_adr_cfg_dev0; /* offset 0x58 */ u32 mc_emem_adr_cfg_dev1; /* offset 0x5C */ - u32 reserved3[12]; /* offset 0x60 - 0x8C */ + u32 reserved3[4]; /* offset 0x60 - 0x6C */ + u32 mc_security_cfg0; /* offset 0x70 */ + u32 mc_security_cfg1; /* offset 0x74 */ + u32 reserved4[6]; /* offset 0x7C - 0x8C */ u32 mc_emem_arb_reserved[28]; /* offset 0x90 - 0xFC */ - u32 reserved4[338]; /* offset 0x100 - 0x644 */ + u32 reserved5[74]; /* offset 0x100 - 0x224 */ + u32 mc_smmu_translation_enable_0; /* offset 0x228 */ + u32 mc_smmu_translation_enable_1; /* offset 0x22C */ + u32 mc_smmu_translation_enable_2; /* offset 0x230 */ + u32 mc_smmu_translation_enable_3; /* offset 0x234 */ + u32 mc_smmu_afi_asid; /* offset 0x238 */ + u32 mc_smmu_avpc_asid; /* offset 0x23C */ + u32 mc_smmu_dc_asid; /* offset 0x240 */ + u32 mc_smmu_dcb_asid; /* offset 0x244 */ + u32 reserved6[2]; /* offset 0x248 - 0x24C */ + u32 mc_smmu_hc_asid; /* offset 0x250 */ + u32 mc_smmu_hda_asid; /* offset 0x254 */ + u32 mc_smmu_isp2_asid; /* offset 0x258 */ + u32 reserved7[2]; /* offset 0x25C - 0x260 */ + u32 mc_smmu_msenc_asid; /* offset 0x264 */ + u32 mc_smmu_nv_asid; /* offset 0x268 */ + u32 mc_smmu_nv2_asid; /* offset 0x26C */ + u32 mc_smmu_ppcs_asid; /* offset 0x270 */ + u32 mc_smmu_sata_asid; /* offset 0x274 */ + u32 reserved8[1]; /* offset 0x278 */ + u32 mc_smmu_vde_asid; /* offset 0x27C */ + u32 mc_smmu_vi_asid; /* offset 0x280 */ + u32 mc_smmu_vic_asid; /* offset 0x284 */ + u32 mc_smmu_xusb_host_asid; /* offset 0x288 */ + u32 mc_smmu_xusb_dev_asid; /* offset 0x28C */ + u32 reserved9[1]; /* offset 0x290 */ + u32 mc_smmu_tsec_asid; /* offset 0x294 */ + u32 mc_smmu_ppcs1_asid; /* offset 0x298 */ + u32 reserved10[235]; /* offset 0x29C - 0x644 */ u32 mc_video_protect_bom; /* offset 0x648 */ u32 mc_video_protect_size_mb; /* offset 0x64c */ u32 mc_video_protect_reg_ctrl; /* offset 0x650 */

From: Ian Campbell ijc@hellion.org.uk
In this case the secure code lives in RAM, and hence needs to be reserved, but it has been relocated, so the reservation of __secure_start does not apply.
Add support for setting CONFIG_ARMV7_SECURE_RESERVE_SIZE to reserve such a region.
This will be used in a subsequent patch for Jetson-TK1
Signed-off-by: Ian Campbell ijc@hellion.org.uk Signed-off-by: Jan Kiszka jan.kiszka@siemens.com --- arch/arm/cpu/armv7/virt-dt.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/arch/arm/cpu/armv7/virt-dt.c b/arch/arm/cpu/armv7/virt-dt.c index ad19e4c..eb95031 100644 --- a/arch/arm/cpu/armv7/virt-dt.c +++ b/arch/arm/cpu/armv7/virt-dt.c @@ -96,6 +96,11 @@ int armv7_update_dt(void *fdt) /* secure code lives in RAM, keep it alive */ fdt_add_mem_rsv(fdt, (unsigned long)__secure_start, __secure_end - __secure_start); +#elif defined(CONFIG_ARMV7_SECURE_RESERVE_SIZE) + /* secure code has been relocated into RAM carveout, keep it alive */ + fdt_add_mem_rsv(fdt, + CONFIG_ARMV7_SECURE_BASE, + CONFIG_ARMV7_SECURE_RESERVE_SIZE); #endif
return fdt_psci(fdt);

From: Jan Kiszka jan.kiszka@siemens.com
Will be used for unpowergating CPUs.
Signed-off-by: Jan Kiszka jan.kiszka@siemens.com --- arch/arm/cpu/tegra-common/powergate.c | 2 +- arch/arm/include/asm/arch-tegra/powergate.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/arch/arm/cpu/tegra-common/powergate.c b/arch/arm/cpu/tegra-common/powergate.c index 439cff3..6331cd4 100644 --- a/arch/arm/cpu/tegra-common/powergate.c +++ b/arch/arm/cpu/tegra-common/powergate.c @@ -44,7 +44,7 @@ static int tegra_powergate_set(enum tegra_powergate id, bool state) return -ETIMEDOUT; }
-static int tegra_powergate_power_on(enum tegra_powergate id) +int tegra_powergate_power_on(enum tegra_powergate id) { return tegra_powergate_set(id, true); } diff --git a/arch/arm/include/asm/arch-tegra/powergate.h b/arch/arm/include/asm/arch-tegra/powergate.h index 130b58b..2e491f1 100644 --- a/arch/arm/include/asm/arch-tegra/powergate.h +++ b/arch/arm/include/asm/arch-tegra/powergate.h @@ -33,6 +33,7 @@ enum tegra_powergate {
int tegra_powergate_sequence_power_up(enum tegra_powergate id, enum periph_id periph); +int tegra_powergate_power_on(enum tegra_powergate id); int tegra_powergate_power_off(enum tegra_powergate id);
#endif

From: Jan Kiszka jan.kiszka@siemens.com
This function will be used to initialize CPU power management for Tegra SOCs. For now it does nothing.
Signed-off-by: Jan Kiszka jan.kiszka@siemens.com --- arch/arm/include/asm/arch-tegra/ap.h | 5 +++++ board/nvidia/common/board.c | 4 ++++ 2 files changed, 9 insertions(+)
diff --git a/arch/arm/include/asm/arch-tegra/ap.h b/arch/arm/include/asm/arch-tegra/ap.h index 5c8be94..208db90 100644 --- a/arch/arm/include/asm/arch-tegra/ap.h +++ b/arch/arm/include/asm/arch-tegra/ap.h @@ -63,6 +63,11 @@ int tegra_get_chip(void); */ int tegra_get_sku_info(void);
+/** + * Initialize power management for application processors + */ +void ap_pm_init(void); + /* Do any chip-specific cache config */ void config_cache(void);
diff --git a/board/nvidia/common/board.c b/board/nvidia/common/board.c index 80ef8fd..c62b3da 100644 --- a/board/nvidia/common/board.c +++ b/board/nvidia/common/board.c @@ -21,6 +21,7 @@ #include <asm/arch/pwm.h> #endif #include <asm/arch/tegra.h> +#include <asm/arch-tegra/ap.h> #include <asm/arch-tegra/board.h> #include <asm/arch-tegra/clk_rst.h> #include <asm/arch-tegra/pmc.h> @@ -56,6 +57,7 @@ const struct tegra_sysinfo sysinfo = { CONFIG_TEGRA_BOARD_STRING };
+__weak void ap_pm_init(void) {} __weak void pinmux_init(void) {} __weak void pin_mux_usb(void) {} __weak void pin_mux_spi(void) {} @@ -96,6 +98,8 @@ int board_init(void) clock_init(); clock_verify();
+ ap_pm_init(); + #ifdef CONFIG_TEGRA_SPI pin_mux_spi(); #endif

From: Jan Kiszka jan.kiszka@siemens.com
This is based on Thierry Reding's work and uses Ian Campell's preparatory patches. It comes with full support for CPU_ON/OFF PSCI services. The algorithm used in this version for turning CPUs on and off was proposed by Thierry Reding in http://thread.gmane.org/gmane.comp.boot-loaders.u-boot/210881. It consists of first enabling CPU1..3 via the PMC, just to powergate them again with the help of the Flow Controller. Once the Flow Controller is in place, we can leave the PMC alone while processing CPU_ON and CPU_OFF PSCI requests.
Signed-off-by: Jan Kiszka jan.kiszka@siemens.com --- arch/arm/cpu/armv7/Makefile | 1 + arch/arm/cpu/armv7/tegra-common/Makefile | 1 + arch/arm/cpu/armv7/tegra-common/psci.S | 101 ++++++++++++++++++++++++++++++ arch/arm/cpu/armv7/tegra124/Makefile | 7 +++ arch/arm/cpu/armv7/tegra124/ap.c | 44 +++++++++++++ arch/arm/include/asm/arch-tegra124/flow.h | 5 ++ 6 files changed, 159 insertions(+) create mode 100644 arch/arm/cpu/armv7/tegra-common/psci.S create mode 100644 arch/arm/cpu/armv7/tegra124/Makefile create mode 100644 arch/arm/cpu/armv7/tegra124/ap.c
diff --git a/arch/arm/cpu/armv7/Makefile b/arch/arm/cpu/armv7/Makefile index 409e6f5..616b6cc 100644 --- a/arch/arm/cpu/armv7/Makefile +++ b/arch/arm/cpu/armv7/Makefile @@ -59,6 +59,7 @@ obj-$(CONFIG_SOCFPGA) += socfpga/ obj-$(if $(filter stv0991,$(SOC)),y) += stv0991/ obj-$(CONFIG_ARCH_SUNXI) += sunxi/ obj-$(CONFIG_TEGRA20) += tegra20/ +obj-$(CONFIG_TEGRA124) += tegra124/ obj-$(CONFIG_U8500) += u8500/ obj-$(CONFIG_ARCH_UNIPHIER) += uniphier/ obj-$(CONFIG_VF610) += vf610/ diff --git a/arch/arm/cpu/armv7/tegra-common/Makefile b/arch/arm/cpu/armv7/tegra-common/Makefile index 463c260..89355ca 100644 --- a/arch/arm/cpu/armv7/tegra-common/Makefile +++ b/arch/arm/cpu/armv7/tegra-common/Makefile @@ -7,4 +7,5 @@ # SPDX-License-Identifier: GPL-2.0+ #
+obj-$(CONFIG_ARMV7_PSCI) += psci.o obj-$(CONFIG_CMD_ENTERRCM) += cmd_enterrcm.o diff --git a/arch/arm/cpu/armv7/tegra-common/psci.S b/arch/arm/cpu/armv7/tegra-common/psci.S new file mode 100644 index 0000000..b63a117 --- /dev/null +++ b/arch/arm/cpu/armv7/tegra-common/psci.S @@ -0,0 +1,101 @@ +/* + * Copyright (C) 2014, NVIDIA + * Copyright (C) 2015, Siemens AG + * + * Authors: + * Thierry Reding treding@nvidia.com + * Jan Kiszka jan.kiszka@siemens.com + * + * SPDX-License-Identifier: GPL-2.0+ + */ + +#include <linux/linkage.h> +#include <asm/psci.h> + + .pushsection ._secure.text, "ax" + .arch_extension sec + +#define TEGRA_SB_CSR_0 0x6000c200 +#define NS_RST_VEC_WR_DIS (1 << 1) + +#define TEGRA_RESET_EXCEPTION_VECTOR 0x6000f100 + +#define TEGRA_FLOW_CTRL_BASE 0x60007000 +#define FLOW_CTRL_CPU_CSR 0x08 +#define CSR_ENABLE (1 << 0) +#define CSR_IMMEDIATE_WAKE (1 << 3) +#define CSR_WAIT_WFI_SHIFT 8 +#define FLOW_CTRL_CPU1_CSR 0x18 + +@ converts CPU ID into FLOW_CTRL_CPUn_CSR offset +.macro get_csr_reg cpu, ofs, tmp + cmp \cpu, #0 @ CPU0? + lsl \tmp, \cpu, #3 @ multiple by 8 (register offset CPU1-3) + moveq \ofs, #FLOW_CTRL_CPU_CSR + addne \ofs, \tmp, #FLOW_CTRL_CPU1_CSR - 8 +.endm + +ENTRY(psci_arch_init) + mrc p15, 0, r5, c1, c1, 0 @ Read SCR + bic r5, r5, #1 @ Secure mode + mcr p15, 0, r5, c1, c1, 0 @ Write SCR + isb + + @ lock reset vector + ldr r6, =TEGRA_SB_CSR_0 + ldr r5, [r6] + orr r5, r5, #NS_RST_VEC_WR_DIS + str r5, [r6] + + mrc p15, 0, r4, c0, c0, 5 @ MPIDR + and r4, r4, #7 @ number of CPUs in cluster + mov r5, #400 @ 1 KiB of stack per CPU + mul r4, r4, r5 + + adr r5, text_end @ end of text + add r5, r5, #0x2000 @ Skip two pages + lsr r5, r5, #12 @ Align to start of page + lsl r5, r5, #12 + sub sp, r5, r4 @ here's our stack! + + bx lr +ENDPROC(psci_arch_init) + +ENTRY(psci_cpu_off) + bl psci_cpu_off_common + + mrc p15, 0, r1, c0, c0, 5 @ MPIDR + and r1, r1, #7 @ number of CPUs in cluster + + get_csr_reg r1, r2, r3 + + ldr r6, =TEGRA_FLOW_CTRL_BASE + mov r5, #(CSR_ENABLE) + add r5, r1, lsl #CSR_WAIT_WFI_SHIFT + str r5, [r6, r2] + +_loop: wfi + b _loop +ENDPROC(psci_cpu_off) + +ENTRY(psci_cpu_on) + ldr r0, =_psci_target_pc + str r2, [r0] + dsb + + ldr r6, =TEGRA_RESET_EXCEPTION_VECTOR + ldr r5, =psci_cpu_entry + str r5, [r6] + + get_csr_reg r1, r2, r3 + + ldr r6, =TEGRA_FLOW_CTRL_BASE + mov r5, #(CSR_IMMEDIATE_WAKE | CSR_ENABLE) + str r5, [r6, r2] + + mov r0, #ARM_PSCI_RET_SUCCESS @ Return PSCI_RET_SUCCESS + mov pc, lr +ENDPROC(psci_cpu_on) + +text_end: + .popsection diff --git a/arch/arm/cpu/armv7/tegra124/Makefile b/arch/arm/cpu/armv7/tegra124/Makefile new file mode 100644 index 0000000..b907277 --- /dev/null +++ b/arch/arm/cpu/armv7/tegra124/Makefile @@ -0,0 +1,7 @@ +# +# (C) Copyright 2015, Siemens AG +# +# SPDX-License-Identifier: GPL-2.0+ +# + +obj-$(CONFIG_ARMV7_PSCI) += ap.o diff --git a/arch/arm/cpu/armv7/tegra124/ap.c b/arch/arm/cpu/armv7/tegra124/ap.c new file mode 100644 index 0000000..eebc0ea --- /dev/null +++ b/arch/arm/cpu/armv7/tegra124/ap.c @@ -0,0 +1,44 @@ +/* + * (C) Copyright 2015, Siemens AG + * Author: Jan Kiszka jan.kiszka@siemens.com + * + * SPDX-License-Identifier: GPL-2.0+ + */ + +#include <common.h> +#include <asm/io.h> +#include <asm/arch/flow.h> +#include <asm/arch/powergate.h> +#include <asm/arch-tegra/ap.h> +#include <asm/arch-tegra/pmc.h> + +static void park_cpu(void) +{ + while (1) + asm volatile("wfi"); +} + +void ap_pm_init(void) +{ + struct flow_ctlr *flow = (struct flow_ctlr *)NV_PA_FLOW_BASE; + struct pmc_ctlr *pmc = (struct pmc_ctlr *)NV_PA_PMC_BASE; + + writel((u32)park_cpu, EXCEP_VECTOR_CPU_RESET_VECTOR); + + tegra_powergate_power_on(TEGRA_POWERGATE_CPU1); + tegra_powergate_power_on(TEGRA_POWERGATE_CPU2); + tegra_powergate_power_on(TEGRA_POWERGATE_CPU3); + + writel((2 << CSR_WAIT_WFI_SHIFT) | CSR_ENABLE, &flow->cpu1_csr); + writel((4 << CSR_WAIT_WFI_SHIFT) | CSR_ENABLE, &flow->cpu2_csr); + writel((8 << CSR_WAIT_WFI_SHIFT) | CSR_ENABLE, &flow->cpu3_csr); + + writel(EVENT_MODE_STOP, &flow->halt_cpu1_events); + writel(EVENT_MODE_STOP, &flow->halt_cpu2_events); + writel(EVENT_MODE_STOP, &flow->halt_cpu3_events); + + while (readl(&pmc->pmc_pwrgate_status) & ((1 << TEGRA_POWERGATE_CPU1) | + (1 << TEGRA_POWERGATE_CPU2) | + (1 << TEGRA_POWERGATE_CPU3))) + /* wait */; +} diff --git a/arch/arm/include/asm/arch-tegra124/flow.h b/arch/arm/include/asm/arch-tegra124/flow.h index 0db1881..d5f24a0 100644 --- a/arch/arm/include/asm/arch-tegra124/flow.h +++ b/arch/arm/include/asm/arch-tegra124/flow.h @@ -37,4 +37,9 @@ struct flow_ctlr { /* FLOW_CTLR_CLUSTER_CONTROL_0 0x2c */ #define ACTIVE_LP (1 << 0)
+/* CPUn_CSR_0 */ +#define CSR_ENABLE (1 << 0) +#define CSR_IMMEDIATE_WAKE (1 << 3) +#define CSR_WAIT_WFI_SHIFT 8 + #endif /* _TEGRA124_FLOW_H_ */

From: Ian Campbell ijc@hellion.org.uk
The secure world code is relocated to the MB just below the top of 4G, we reserve it in the FDT (by setting CONFIG_ARMV7_SECURE_RESERVE_SIZE) but it is not protected in h/w. See next patch.
Signed-off-by: Ian Campbell ijc@hellion.org.uk Signed-off-by: Jan Kiszka jan.kiszka@siemens.com --- arch/arm/cpu/armv7/tegra124/Kconfig | 2 ++ include/configs/jetson-tk1.h | 5 +++++ 2 files changed, 7 insertions(+)
diff --git a/arch/arm/cpu/armv7/tegra124/Kconfig b/arch/arm/cpu/armv7/tegra124/Kconfig index 88f627c..5114299 100644 --- a/arch/arm/cpu/armv7/tegra124/Kconfig +++ b/arch/arm/cpu/armv7/tegra124/Kconfig @@ -5,6 +5,8 @@ choice
config TARGET_JETSON_TK1 bool "NVIDIA Tegra124 Jetson TK1 board" + select CPU_V7_HAS_NONSEC if !SPL_BUILD + select CPU_V7_HAS_VIRT if !SPL_BUILD
config TARGET_NYAN_BIG bool "Google/NVIDIA Nyan-big Chrombook" diff --git a/include/configs/jetson-tk1.h b/include/configs/jetson-tk1.h index 0a79c7c..80c2952 100644 --- a/include/configs/jetson-tk1.h +++ b/include/configs/jetson-tk1.h @@ -81,4 +81,9 @@ #include "tegra-common-usb-gadget.h" #include "tegra-common-post.h"
+#define CONFIG_ARMV7_PSCI 1 +/* Reserve top 1M for secure RAM */ +#define CONFIG_ARMV7_SECURE_BASE 0xfff00000 +#define CONFIG_ARMV7_SECURE_RESERVE_SIZE 0x00100000 + #endif /* __CONFIG_H */

From: Ian Campbell ijc@hellion.org.uk
These registers can be used to prevent non-secure world from accessing a megabyte aligned region of RAM, use them to protect the u-boot secure monitor code.
At first I tried to do this from s_init(), however this inexplicably causes u-boot's networking (e.g. DHCP) to fail, while networking under Linux was fine.
So instead I have added a new weak arch function protect_secure_section() called from relocate_secure_section() and reserved the region there. This is better overall since it defers the reservation until after the sec vs. non-sec decision (which can be influenced by an envvar) has been made when booting the os.
Signed-off-by: Ian Campbell ijc@hellion.org.uk Signed-off-by: Jan Kiszka jan.kiszka@siemens.com --- arch/arm/cpu/armv7/virt-v7.c | 5 +++++ arch/arm/cpu/tegra-common/ap.c | 15 +++++++++++++++ arch/arm/include/asm/system.h | 1 + 3 files changed, 21 insertions(+)
diff --git a/arch/arm/cpu/armv7/virt-v7.c b/arch/arm/cpu/armv7/virt-v7.c index b69fd37..eb6195c 100644 --- a/arch/arm/cpu/armv7/virt-v7.c +++ b/arch/arm/cpu/armv7/virt-v7.c @@ -46,6 +46,10 @@ static unsigned long get_gicd_base_address(void) #endif }
+/* Define a specific version of this function to enable any available + * hardware protections for the reserved region */ +void __weak protect_secure_section(void) {} + static void relocate_secure_section(void) { #ifdef CONFIG_ARMV7_SECURE_BASE @@ -54,6 +58,7 @@ static void relocate_secure_section(void) memcpy((void *)CONFIG_ARMV7_SECURE_BASE, __secure_start, sz); flush_dcache_range(CONFIG_ARMV7_SECURE_BASE, CONFIG_ARMV7_SECURE_BASE + sz + 1); + protect_secure_section(); invalidate_icache_all(); #endif } diff --git a/arch/arm/cpu/tegra-common/ap.c b/arch/arm/cpu/tegra-common/ap.c index a17dfd1..f1d3070 100644 --- a/arch/arm/cpu/tegra-common/ap.c +++ b/arch/arm/cpu/tegra-common/ap.c @@ -10,6 +10,7 @@ #include <common.h> #include <asm/io.h> #include <asm/arch/gp_padctrl.h> +#include <asm/arch/mc.h> #include <asm/arch-tegra/ap.h> #include <asm/arch-tegra/clock.h> #include <asm/arch-tegra/fuse.h> @@ -154,6 +155,20 @@ static void init_pmc_scratch(void) writel(odmdata, &pmc->pmc_scratch20); }
+#ifdef CONFIG_ARMV7_SECURE_RESERVE_SIZE +void protect_secure_section(void) +{ + struct mc_ctlr *mc = (struct mc_ctlr *)NV_PA_MC_BASE; + + /* Must be MB aligned */ + BUILD_BUG_ON(CONFIG_ARMV7_SECURE_BASE & 0xFFFFF); + BUILD_BUG_ON(CONFIG_ARMV7_SECURE_RESERVE_SIZE & 0xFFFFF); + + writel(CONFIG_ARMV7_SECURE_BASE, &mc->mc_security_cfg0); + writel(CONFIG_ARMV7_SECURE_RESERVE_SIZE>>20, &mc->mc_security_cfg1); +} +#endif + void s_init(void) { /* Init PMC scratch memory */ diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h index 89f2294..21be69d 100644 --- a/arch/arm/include/asm/system.h +++ b/arch/arm/include/asm/system.h @@ -76,6 +76,7 @@ void armv8_switch_to_el1(void); void gic_init(void); void gic_send_sgi(unsigned long sgino); void wait_for_wakeup(void); +void protect_secure_region(void); void smp_kick_all_cpus(void);
void flush_l3_cache(void);
participants (2)
-
Chen-Yu Tsai
-
Jan Kiszka