[U-Boot] [PATCH 0/6] sh4: fix and simplify cache manipulation

In my tests I experience that sometimes SH4 does not properly fetch instructions after immediate jump to a code loaded by means of rtl8139, the problem is gone if "icache off" is executed after load or if cache invalidation and write-back is used on data load instead of just cache invalidation, which is apparently an improper backbone of flush_dcache_range() function.
The changeset contains a couple of bugfixes and a general simplification of the code related to cache manipulation. Note that while caches are found on SH2 and SH3 for long time they were inactive in U-Boot, because I don't have SH2/SH3 hardware for testing I don't spread SH4 cache fixes to those cpus, however generally it should be the same, in that case arch/sh/cpu/sh4/cache.c file can be moved to arch/sh/lib folder
For 3/6 patch there is a minor patch application dependency on one change https://patchwork.ozlabs.org/patch/656383/ , so formally it might be easier to apply the series after the series, which switches SH architectures to generic board.
Vladimir Zapolskiy (6): sh4: cache: correct dcache flush to invalidate with write-back sh4: cache: correct flush_cache() to writeback and invalidate sh3: remove unused cache.c file from being built sh: cache use jump_to_P2() and back_to_P1() from asm/system.h sh: cache: don't modify CCR from P1 area sh4: cache: move exported cache manipulation functions into cache.c
arch/sh/cpu/sh3/Makefile | 2 +- arch/sh/cpu/sh3/cache.c | 96 --------------------------------------------- arch/sh/cpu/sh4/cache.c | 88 ++++++++++++++++++++--------------------- arch/sh/cpu/sh4/cpu.c | 34 ---------------- arch/sh/include/asm/cache.h | 2 - 5 files changed, 43 insertions(+), 179 deletions(-) delete mode 100644 arch/sh/cpu/sh3/cache.c

In common usecases flush_cache() assumes both cache invalidation and write-back to memory, thus in flush_dcache_range() implementation change SH4 cache write-back only instruction 'ocbwb' with cache purge instruction 'ocbp', according to the User's Manual there should be no performance penalty for that.
Note that under circumstances only cache invalidation is expected from flush_cache() call, in these occasional cases the current version of flush_cache() works, which is a wrapper over invalidate_dcache_range() at the moment, this will be fixed in the following change.
Signed-off-by: Vladimir Zapolskiy vz@mleia.com --- arch/sh/cpu/sh4/cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/sh/cpu/sh4/cache.c b/arch/sh/cpu/sh4/cache.c index e1ee970..b3e5fd5 100644 --- a/arch/sh/cpu/sh4/cache.c +++ b/arch/sh/cpu/sh4/cache.c @@ -97,7 +97,7 @@ void flush_dcache_range(unsigned long start, unsigned long end)
start &= ~(L1_CACHE_BYTES - 1); for (v = start; v < end; v += L1_CACHE_BYTES) { - asm volatile ("ocbwb %0" : /* no output */ + asm volatile ("ocbp %0" : /* no output */ : "m" (__m(v))); } }

In common usecases flush_cache() assumes both cache invalidation and write-back to memory, instead of doing cache invalidation only with the wrapped 'ocbi' instruction pin flush_cache() to cache invalidation with memory write-back done by 'ocbp'.
Signed-off-by: Vladimir Zapolskiy vz@mleia.com --- arch/sh/cpu/sh4/cpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/sh/cpu/sh4/cpu.c b/arch/sh/cpu/sh4/cpu.c index e8ee0a4..e6b5c86 100644 --- a/arch/sh/cpu/sh4/cpu.c +++ b/arch/sh/cpu/sh4/cpu.c @@ -37,7 +37,7 @@ int do_reset (cmd_tbl_t *cmdtp, int flag, int argc, char * const argv[])
void flush_cache (unsigned long addr, unsigned long size) { - invalidate_dcache_range(addr , addr + size); + flush_dcache_range(addr , addr + size); }
void icache_enable (void)

The change is similar to commit 994b56616bae ("sh: delete an unused source file") for SH2, however here the removed cache.c file was built and included into an image as a dead code.
If it is needed in future the contents can be reused from a similar arch/sh/cpu/sh4/cache.c file, which is in turn will be moved to a shared among all core flavours location at arch/sh/lib/cache.c.
Signed-off-by: Vladimir Zapolskiy vz@mleia.com --- arch/sh/cpu/sh3/Makefile | 2 +- arch/sh/cpu/sh3/cache.c | 96 ------------------------------------------------ 2 files changed, 1 insertion(+), 97 deletions(-) delete mode 100644 arch/sh/cpu/sh3/cache.c
diff --git a/arch/sh/cpu/sh3/Makefile b/arch/sh/cpu/sh3/Makefile index 959f551..cddc15b 100644 --- a/arch/sh/cpu/sh3/Makefile +++ b/arch/sh/cpu/sh3/Makefile @@ -11,4 +11,4 @@ # SPDX-License-Identifier: GPL-2.0+ #
-obj-y = cpu.o interrupts.o watchdog.o cache.o +obj-y = cpu.o interrupts.o watchdog.o diff --git a/arch/sh/cpu/sh3/cache.c b/arch/sh/cpu/sh3/cache.c deleted file mode 100644 index 34cbbff..0000000 --- a/arch/sh/cpu/sh3/cache.c +++ /dev/null @@ -1,96 +0,0 @@ -/* - * (C) Copyright 2007 - * Yoshihiro Shimoda shimoda.yoshihiro@renesas.com - * - * (C) Copyright 2007 - * Nobobuhiro Iwamatsu iwamatsu@nigauri.org - * - * SPDX-License-Identifier: GPL-2.0+ - */ - -#include <common.h> -#include <command.h> -#include <asm/processor.h> -#include <asm/io.h> - -/* - * Jump to P2 area. - * When handling TLB or caches, we need to do it from P2 area. - */ -#define jump_to_P2() \ - do { \ - unsigned long __dummy; \ - __asm__ __volatile__( \ - "mov.l 1f, %0\n\t" \ - "or %1, %0\n\t" \ - "jmp @%0\n\t" \ - " nop\n\t" \ - ".balign 4\n" \ - "1: .long 2f\n" \ - "2:" \ - : "=&r" (__dummy) \ - : "r" (0x20000000)); \ - } while (0) - -/* - * Back to P1 area. - */ -#define back_to_P1() \ - do { \ - unsigned long __dummy; \ - __asm__ __volatile__( \ - "nop;nop;nop;nop;nop;nop;nop\n\t" \ - "mov.l 1f, %0\n\t" \ - "jmp @%0\n\t" \ - " nop\n\t" \ - ".balign 4\n" \ - "1: .long 2f\n" \ - "2:" \ - : "=&r" (__dummy)); \ - } while (0) - -#define CACHE_VALID 1 -#define CACHE_UPDATED 2 - -static inline void cache_wback_all(void) -{ - unsigned long addr, data, i, j; - - jump_to_P2(); - for (i = 0; i < CACHE_OC_NUM_ENTRIES; i++) { - for (j = 0; j < CACHE_OC_NUM_WAYS; j++) { - addr = CACHE_OC_ADDRESS_ARRAY - | (j << CACHE_OC_WAY_SHIFT) - | (i << CACHE_OC_ENTRY_SHIFT); - data = inl(addr); - if (data & CACHE_UPDATED) { - data &= ~CACHE_UPDATED; - outl(data, addr); - } - } - } - back_to_P1(); -} - - -#define CACHE_ENABLE 0 -#define CACHE_DISABLE 1 - -int cache_control(unsigned int cmd) -{ - unsigned long ccr; - - jump_to_P2(); - ccr = inl(CCR); - - if (ccr & CCR_CACHE_ENABLE) - cache_wback_all(); - - if (cmd == CACHE_DISABLE) - outl(CCR_CACHE_STOP, CCR); - else - outl(CCR_CACHE_INIT, CCR); - back_to_P1(); - - return 0; -}

Both jump_to_P2() and back_to_P1() functions are found in asm/system.h header file and functionally they are the same, don't redefine them.
Signed-off-by: Vladimir Zapolskiy vz@mleia.com --- arch/sh/cpu/sh4/cache.c | 39 ++------------------------------------- 1 file changed, 2 insertions(+), 37 deletions(-)
diff --git a/arch/sh/cpu/sh4/cache.c b/arch/sh/cpu/sh4/cache.c index b3e5fd5..50695b6 100644 --- a/arch/sh/cpu/sh4/cache.c +++ b/arch/sh/cpu/sh4/cache.c @@ -7,44 +7,9 @@
#include <common.h> #include <command.h> -#include <asm/processor.h> #include <asm/io.h> - -/* - * Jump to P2 area. - * When handling TLB or caches, we need to do it from P2 area. - */ -#define jump_to_P2() \ - do { \ - unsigned long __dummy; \ - __asm__ __volatile__( \ - "mov.l 1f, %0\n\t" \ - "or %1, %0\n\t" \ - "jmp @%0\n\t" \ - " nop\n\t" \ - ".balign 4\n" \ - "1: .long 2f\n" \ - "2:" \ - : "=&r" (__dummy) \ - : "r" (0x20000000)); \ - } while (0) - -/* - * Back to P1 area. - */ -#define back_to_P1() \ - do { \ - unsigned long __dummy; \ - __asm__ __volatile__( \ - "nop;nop;nop;nop;nop;nop;nop\n\t" \ - "mov.l 1f, %0\n\t" \ - "jmp @%0\n\t" \ - " nop\n\t" \ - ".balign 4\n" \ - "1: .long 2f\n" \ - "2:" \ - : "=&r" (__dummy)); \ - } while (0) +#include <asm/processor.h> +#include <asm/system.h>
#define CACHE_VALID 1 #define CACHE_UPDATED 2

cache_wback_all() is a local function and it is called from cache_control() only, which is in turn jumps to P2 area.
The change fixes an issue when cache_wback_all() returns from P2 to P1, however cache_control() continues to manipulate with CCR register, according to the User's Manual this is restricted.
Signed-off-by: Vladimir Zapolskiy vz@mleia.com --- arch/sh/cpu/sh4/cache.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/arch/sh/cpu/sh4/cache.c b/arch/sh/cpu/sh4/cache.c index 50695b6..7750f0f 100644 --- a/arch/sh/cpu/sh4/cache.c +++ b/arch/sh/cpu/sh4/cache.c @@ -18,10 +18,10 @@ static inline void cache_wback_all(void) { unsigned long addr, data, i, j;
- jump_to_P2(); - for (i = 0; i < CACHE_OC_NUM_ENTRIES; i++){ + for (i = 0; i < CACHE_OC_NUM_ENTRIES; i++) { for (j = 0; j < CACHE_OC_NUM_WAYS; j++) { - addr = CACHE_OC_ADDRESS_ARRAY | (j << CACHE_OC_WAY_SHIFT) + addr = CACHE_OC_ADDRESS_ARRAY + | (j << CACHE_OC_WAY_SHIFT) | (i << CACHE_OC_ENTRY_SHIFT); data = inl(addr); if (data & CACHE_UPDATED) { @@ -30,10 +30,8 @@ static inline void cache_wback_all(void) } } } - back_to_P1(); }
- #define CACHE_ENABLE 0 #define CACHE_DISABLE 1

No functional change, moving cache manipulation functions into cache.c allows to collect all of them in a single location and as a pleasant side effect cache_control() function can be unexported now.
Signed-off-by: Vladimir Zapolskiy vz@mleia.com --- arch/sh/cpu/sh4/cache.c | 39 ++++++++++++++++++++++++++++++++++++--- arch/sh/cpu/sh4/cpu.c | 34 ---------------------------------- arch/sh/include/asm/cache.h | 2 -- 3 files changed, 36 insertions(+), 39 deletions(-)
diff --git a/arch/sh/cpu/sh4/cache.c b/arch/sh/cpu/sh4/cache.c index 7750f0f..6175c67 100644 --- a/arch/sh/cpu/sh4/cache.c +++ b/arch/sh/cpu/sh4/cache.c @@ -1,6 +1,6 @@ /* - * (C) Copyright 2007 - * Nobuhiro Iwamatsu iwamatsu@nigauri.org + * (C) Copyright 2016 Vladimir Zapolskiy vz@mleia.com + * (C) Copyright 2007 Nobuhiro Iwamatsu iwamatsu@nigauri.org * * SPDX-License-Identifier: GPL-2.0+ */ @@ -35,7 +35,7 @@ static inline void cache_wback_all(void) #define CACHE_ENABLE 0 #define CACHE_DISABLE 1
-int cache_control(unsigned int cmd) +static int cache_control(unsigned int cmd) { unsigned long ccr;
@@ -75,3 +75,36 @@ void invalidate_dcache_range(unsigned long start, unsigned long end) : "m" (__m(v))); } } + +void flush_cache(unsigned long addr, unsigned long size) +{ + flush_dcache_range(addr , addr + size); +} + +void icache_enable(void) +{ + cache_control(CACHE_ENABLE); +} + +void icache_disable(void) +{ + cache_control(CACHE_DISABLE); +} + +int icache_status(void) +{ + return 0; +} + +void dcache_enable(void) +{ +} + +void dcache_disable(void) +{ +} + +int dcache_status(void) +{ + return 0; +} diff --git a/arch/sh/cpu/sh4/cpu.c b/arch/sh/cpu/sh4/cpu.c index e6b5c86..aa8d4df 100644 --- a/arch/sh/cpu/sh4/cpu.c +++ b/arch/sh/cpu/sh4/cpu.c @@ -9,7 +9,6 @@ #include <command.h> #include <netdev.h> #include <asm/processor.h> -#include <asm/cache.h>
int checkcpu(void) { @@ -35,39 +34,6 @@ int do_reset (cmd_tbl_t *cmdtp, int flag, int argc, char * const argv[]) return 0; }
-void flush_cache (unsigned long addr, unsigned long size) -{ - flush_dcache_range(addr , addr + size); -} - -void icache_enable (void) -{ - cache_control(0); -} - -void icache_disable (void) -{ - cache_control(1); -} - -int icache_status (void) -{ - return 0; -} - -void dcache_enable (void) -{ -} - -void dcache_disable (void) -{ -} - -int dcache_status (void) -{ - return 0; -} - int cpu_eth_init(bd_t *bis) { #ifdef CONFIG_SH_ETHER diff --git a/arch/sh/include/asm/cache.h b/arch/sh/include/asm/cache.h index abaf405..b548a35 100644 --- a/arch/sh/include/asm/cache.h +++ b/arch/sh/include/asm/cache.h @@ -3,8 +3,6 @@
#if defined(CONFIG_CPU_SH4)
-int cache_control(unsigned int cmd); - #define L1_CACHE_BYTES 32
struct __large_struct { unsigned long buf[100]; };
participants (1)
-
Vladimir Zapolskiy