[PATCH] armv8: always use current exception level for TCR_ELx access

Currently get_tcr() takes an "el" parameter, to select the proper version of the TCR_ELx system register. This is problematic in case of the Apple M1, since it runs with HCR_EL2.E2H fixed to 1, so TCR_EL2 is actually using the TCR_EL1 layout, and we get the wrong version.
For U-Boot's purposes the only sensible choice here is the current exception level, and indeed most caller treat it like that, so let's remove that parameter and read the current EL inside the function. This allows us to check for the E2H bit, and pretend it's EL1 in this case.
There are two callers which don't care about the EL, and they pass 0, which looks wrong, but is irrelevant in these two cases, since we don't use the return value there. So the change cannot affect those two.
Signed-off-by: Andre Przywara andre.przywara@arm.com --- arch/arm/cpu/armv8/cache_v8.c | 28 +++++++++++++++++++++---- arch/arm/cpu/armv8/fsl-layerscape/cpu.c | 4 ++-- arch/arm/include/asm/armv8/mmu.h | 2 +- 3 files changed, 27 insertions(+), 7 deletions(-)
diff --git a/arch/arm/cpu/armv8/cache_v8.c b/arch/arm/cpu/armv8/cache_v8.c index 3de18c7675..101c521e61 100644 --- a/arch/arm/cpu/armv8/cache_v8.c +++ b/arch/arm/cpu/armv8/cache_v8.c @@ -39,8 +39,28 @@ DECLARE_GLOBAL_DATA_PTR; * off: FFF */
-u64 get_tcr(int el, u64 *pips, u64 *pva_bits) +static int get_effective_el(void) { + int el = current_el(); + + if (el == 2) { + u64 hcr_el2; + + /* + * If we are using the EL2&0 translation regime, the TCR_EL2 + * looks like the EL1 version, even though we are in EL2. + */ + __asm__ ("mrs %0, HCR_EL2\n" : "=r" (hcr_el2)); + if (hcr_el2 & BIT(34)) + return 1; + } + + return el; +} + +u64 get_tcr(u64 *pips, u64 *pva_bits) +{ + int el = get_effective_el(); u64 max_addr = 0; u64 ips, va_bits; u64 tcr; @@ -115,7 +135,7 @@ static u64 *find_pte(u64 addr, int level)
debug("addr=%llx level=%d\n", addr, level);
- get_tcr(0, NULL, &va_bits); + get_tcr(NULL, &va_bits); if (va_bits < 39) start_level = 1;
@@ -343,7 +363,7 @@ __weak u64 get_page_table_size(void) u64 va_bits; int start_level = 0;
- get_tcr(0, NULL, &va_bits); + get_tcr(NULL, &va_bits); if (va_bits < 39) start_level = 1;
@@ -415,7 +435,7 @@ __weak void mmu_setup(void) setup_all_pgtables();
el = current_el(); - set_ttbr_tcr_mair(el, gd->arch.tlb_addr, get_tcr(el, NULL, NULL), + set_ttbr_tcr_mair(el, gd->arch.tlb_addr, get_tcr(NULL, NULL), MEMORY_ATTRIBUTES);
/* enable the mmu */ diff --git a/arch/arm/cpu/armv8/fsl-layerscape/cpu.c b/arch/arm/cpu/armv8/fsl-layerscape/cpu.c index 253008a9c1..c989a43cbe 100644 --- a/arch/arm/cpu/armv8/fsl-layerscape/cpu.c +++ b/arch/arm/cpu/armv8/fsl-layerscape/cpu.c @@ -454,7 +454,7 @@ static inline void early_mmu_setup(void)
/* point TTBR to the new table */ set_ttbr_tcr_mair(el, gd->arch.tlb_addr, - get_tcr(el, NULL, NULL) & + get_tcr(NULL, NULL) & ~(TCR_ORGN_MASK | TCR_IRGN_MASK), MEMORY_ATTRIBUTES);
@@ -609,7 +609,7 @@ static inline void final_mmu_setup(void) invalidate_icache_all();
/* point TTBR to the new table */ - set_ttbr_tcr_mair(el, gd->arch.tlb_addr, get_tcr(el, NULL, NULL), + set_ttbr_tcr_mair(el, gd->arch.tlb_addr, get_tcr(NULL, NULL), MEMORY_ATTRIBUTES);
set_sctlr(get_sctlr() | CR_M); diff --git a/arch/arm/include/asm/armv8/mmu.h b/arch/arm/include/asm/armv8/mmu.h index c36b2cf5a5..98ce521ec5 100644 --- a/arch/arm/include/asm/armv8/mmu.h +++ b/arch/arm/include/asm/armv8/mmu.h @@ -134,7 +134,7 @@ struct mm_region {
extern struct mm_region *mem_map; void setup_pgtables(void); -u64 get_tcr(int el, u64 *pips, u64 *pva_bits); +u64 get_tcr(u64 *pips, u64 *pva_bits); #endif
#endif /* _ASM_ARMV8_MMU_H_ */

From: Andre Przywara andre.przywara@arm.com Date: Tue, 7 Jun 2022 15:08:34 +0100
Currently get_tcr() takes an "el" parameter, to select the proper version of the TCR_ELx system register. This is problematic in case of the Apple M1, since it runs with HCR_EL2.E2H fixed to 1, so TCR_EL2 is actually using the TCR_EL1 layout, and we get the wrong version.
For U-Boot's purposes the only sensible choice here is the current exception level, and indeed most caller treat it like that, so let's remove that parameter and read the current EL inside the function. This allows us to check for the E2H bit, and pretend it's EL1 in this case.
There are two callers which don't care about the EL, and they pass 0, which looks wrong, but is irrelevant in these two cases, since we don't use the return value there. So the change cannot affect those two.
Signed-off-by: Andre Przywara andre.przywara@arm.com
Review-by: Mark Kettenis kettenis@openbsd.org Tested-by: Mark Kettenis kettenis@openbsd.org
One comment below though...
arch/arm/cpu/armv8/cache_v8.c | 28 +++++++++++++++++++++---- arch/arm/cpu/armv8/fsl-layerscape/cpu.c | 4 ++-- arch/arm/include/asm/armv8/mmu.h | 2 +- 3 files changed, 27 insertions(+), 7 deletions(-)
diff --git a/arch/arm/cpu/armv8/cache_v8.c b/arch/arm/cpu/armv8/cache_v8.c index 3de18c7675..101c521e61 100644 --- a/arch/arm/cpu/armv8/cache_v8.c +++ b/arch/arm/cpu/armv8/cache_v8.c @@ -39,8 +39,28 @@ DECLARE_GLOBAL_DATA_PTR;
- off: FFF
*/
-u64 get_tcr(int el, u64 *pips, u64 *pva_bits) +static int get_effective_el(void) {
- int el = current_el();
- if (el == 2) {
u64 hcr_el2;
/*
* If we are using the EL2&0 translation regime, the TCR_EL2
* looks like the EL1 version, even though we are in EL2.
*/
__asm__ ("mrs %0, HCR_EL2\n" : "=r" (hcr_el2));
if (hcr_el2 & BIT(34))
Ideally that would be a #define. But I can see why you ended up doing this. The most logical place for the #define would be in arch/arm/include/asm/system.h, but that header is used in assembly code and the bitshifting stuff in there is problematic as we'd need something like:
#define HCR_EL2_E2H (1UL << 34)
but that isn't legal in assembly code.
return 1;
- }
- return el;
+}
+u64 get_tcr(u64 *pips, u64 *pva_bits) +{
- int el = get_effective_el(); u64 max_addr = 0; u64 ips, va_bits; u64 tcr;
@@ -115,7 +135,7 @@ static u64 *find_pte(u64 addr, int level)
debug("addr=%llx level=%d\n", addr, level);
- get_tcr(0, NULL, &va_bits);
- get_tcr(NULL, &va_bits); if (va_bits < 39) start_level = 1;
@@ -343,7 +363,7 @@ __weak u64 get_page_table_size(void) u64 va_bits; int start_level = 0;
- get_tcr(0, NULL, &va_bits);
- get_tcr(NULL, &va_bits); if (va_bits < 39) start_level = 1;
@@ -415,7 +435,7 @@ __weak void mmu_setup(void) setup_all_pgtables();
el = current_el();
- set_ttbr_tcr_mair(el, gd->arch.tlb_addr, get_tcr(el, NULL, NULL),
set_ttbr_tcr_mair(el, gd->arch.tlb_addr, get_tcr(NULL, NULL), MEMORY_ATTRIBUTES);
/* enable the mmu */
diff --git a/arch/arm/cpu/armv8/fsl-layerscape/cpu.c b/arch/arm/cpu/armv8/fsl-layerscape/cpu.c index 253008a9c1..c989a43cbe 100644 --- a/arch/arm/cpu/armv8/fsl-layerscape/cpu.c +++ b/arch/arm/cpu/armv8/fsl-layerscape/cpu.c @@ -454,7 +454,7 @@ static inline void early_mmu_setup(void)
/* point TTBR to the new table */ set_ttbr_tcr_mair(el, gd->arch.tlb_addr,
get_tcr(el, NULL, NULL) &
get_tcr(NULL, NULL) & ~(TCR_ORGN_MASK | TCR_IRGN_MASK), MEMORY_ATTRIBUTES);
@@ -609,7 +609,7 @@ static inline void final_mmu_setup(void) invalidate_icache_all();
/* point TTBR to the new table */
- set_ttbr_tcr_mair(el, gd->arch.tlb_addr, get_tcr(el, NULL, NULL),
set_ttbr_tcr_mair(el, gd->arch.tlb_addr, get_tcr(NULL, NULL), MEMORY_ATTRIBUTES);
set_sctlr(get_sctlr() | CR_M);
diff --git a/arch/arm/include/asm/armv8/mmu.h b/arch/arm/include/asm/armv8/mmu.h index c36b2cf5a5..98ce521ec5 100644 --- a/arch/arm/include/asm/armv8/mmu.h +++ b/arch/arm/include/asm/armv8/mmu.h @@ -134,7 +134,7 @@ struct mm_region {
extern struct mm_region *mem_map; void setup_pgtables(void); -u64 get_tcr(int el, u64 *pips, u64 *pva_bits); +u64 get_tcr(u64 *pips, u64 *pva_bits); #endif
#endif /* _ASM_ARMV8_MMU_H_ */
2.25.1

On Tue, 7 Jun 2022 19:45:14 +0200 (CEST) Mark Kettenis mark.kettenis@xs4all.nl wrote:
From: Andre Przywara andre.przywara@arm.com Date: Tue, 7 Jun 2022 15:08:34 +0100
Currently get_tcr() takes an "el" parameter, to select the proper version of the TCR_ELx system register. This is problematic in case of the Apple M1, since it runs with HCR_EL2.E2H fixed to 1, so TCR_EL2 is actually using the TCR_EL1 layout, and we get the wrong version.
For U-Boot's purposes the only sensible choice here is the current exception level, and indeed most caller treat it like that, so let's remove that parameter and read the current EL inside the function. This allows us to check for the E2H bit, and pretend it's EL1 in this case.
There are two callers which don't care about the EL, and they pass 0, which looks wrong, but is irrelevant in these two cases, since we don't use the return value there. So the change cannot affect those two.
Signed-off-by: Andre Przywara andre.przywara@arm.com
Review-by: Mark Kettenis kettenis@openbsd.org Tested-by: Mark Kettenis kettenis@openbsd.org
One comment below though...
arch/arm/cpu/armv8/cache_v8.c | 28 +++++++++++++++++++++---- arch/arm/cpu/armv8/fsl-layerscape/cpu.c | 4 ++-- arch/arm/include/asm/armv8/mmu.h | 2 +- 3 files changed, 27 insertions(+), 7 deletions(-)
diff --git a/arch/arm/cpu/armv8/cache_v8.c b/arch/arm/cpu/armv8/cache_v8.c index 3de18c7675..101c521e61 100644 --- a/arch/arm/cpu/armv8/cache_v8.c +++ b/arch/arm/cpu/armv8/cache_v8.c @@ -39,8 +39,28 @@ DECLARE_GLOBAL_DATA_PTR;
- off: FFF
*/
-u64 get_tcr(int el, u64 *pips, u64 *pva_bits) +static int get_effective_el(void) {
- int el = current_el();
- if (el == 2) {
u64 hcr_el2;
/*
* If we are using the EL2&0 translation regime, the TCR_EL2
* looks like the EL1 version, even though we are in EL2.
*/
__asm__ ("mrs %0, HCR_EL2\n" : "=r" (hcr_el2));
if (hcr_el2 & BIT(34))
Ideally that would be a #define. But I can see why you ended up doing
I am afraid you are giving me too much credit here, I just didn't think about this ;-)
this. The most logical place for the #define would be in arch/arm/include/asm/system.h, but that header is used in assembly code and the bitshifting stuff in there is problematic as we'd need something like:
#define HCR_EL2_E2H (1UL << 34)
but that isn't legal in assembly code.
Well, but we could at least define the bit number (just 34), then use that in both start.S and here, inside the BIT() macro. I will send a v2 tomorrow.
Cheers, Andre
return 1;
- }
- return el;
+}
+u64 get_tcr(u64 *pips, u64 *pva_bits) +{
- int el = get_effective_el(); u64 max_addr = 0; u64 ips, va_bits; u64 tcr;
@@ -115,7 +135,7 @@ static u64 *find_pte(u64 addr, int level)
debug("addr=%llx level=%d\n", addr, level);
- get_tcr(0, NULL, &va_bits);
- get_tcr(NULL, &va_bits); if (va_bits < 39) start_level = 1;
@@ -343,7 +363,7 @@ __weak u64 get_page_table_size(void) u64 va_bits; int start_level = 0;
- get_tcr(0, NULL, &va_bits);
- get_tcr(NULL, &va_bits); if (va_bits < 39) start_level = 1;
@@ -415,7 +435,7 @@ __weak void mmu_setup(void) setup_all_pgtables();
el = current_el();
- set_ttbr_tcr_mair(el, gd->arch.tlb_addr, get_tcr(el, NULL, NULL),
set_ttbr_tcr_mair(el, gd->arch.tlb_addr, get_tcr(NULL, NULL), MEMORY_ATTRIBUTES);
/* enable the mmu */
diff --git a/arch/arm/cpu/armv8/fsl-layerscape/cpu.c b/arch/arm/cpu/armv8/fsl-layerscape/cpu.c index 253008a9c1..c989a43cbe 100644 --- a/arch/arm/cpu/armv8/fsl-layerscape/cpu.c +++ b/arch/arm/cpu/armv8/fsl-layerscape/cpu.c @@ -454,7 +454,7 @@ static inline void early_mmu_setup(void)
/* point TTBR to the new table */ set_ttbr_tcr_mair(el, gd->arch.tlb_addr,
get_tcr(el, NULL, NULL) &
get_tcr(NULL, NULL) & ~(TCR_ORGN_MASK | TCR_IRGN_MASK), MEMORY_ATTRIBUTES);
@@ -609,7 +609,7 @@ static inline void final_mmu_setup(void) invalidate_icache_all();
/* point TTBR to the new table */
- set_ttbr_tcr_mair(el, gd->arch.tlb_addr, get_tcr(el, NULL, NULL),
set_ttbr_tcr_mair(el, gd->arch.tlb_addr, get_tcr(NULL, NULL), MEMORY_ATTRIBUTES);
set_sctlr(get_sctlr() | CR_M);
diff --git a/arch/arm/include/asm/armv8/mmu.h b/arch/arm/include/asm/armv8/mmu.h index c36b2cf5a5..98ce521ec5 100644 --- a/arch/arm/include/asm/armv8/mmu.h +++ b/arch/arm/include/asm/armv8/mmu.h @@ -134,7 +134,7 @@ struct mm_region {
extern struct mm_region *mem_map; void setup_pgtables(void); -u64 get_tcr(int el, u64 *pips, u64 *pva_bits); +u64 get_tcr(u64 *pips, u64 *pva_bits); #endif
#endif /* _ASM_ARMV8_MMU_H_ */
2.25.1
participants (2)
-
Andre Przywara
-
Mark Kettenis