[U-Boot] [PATCH 0/3] arm: Introduce writel/readl_relaxed accessors

Admittedly this is the long way round to solve some nasty SPL code size problem, but it looked beneficial to others as well, so here we go:
arch/arm/include/asm/io.h looks like it's been around since the dawn of time, and was more or less blindly copied from Linux. We don't use and don't need most of the definitions, and mainline Linux got rid of them anyway, so patch 1/3 cleans up this header file to just contain what we need in U-Boot.
Patch 2/3 introduces readl/writel_relaxed accessors, which are cheaper, but more importantly save one (barrier) instruction per accessor. This helps to bring down code size, since especially DRAM controller inits in SPLs tend to do a lot of MMIO.
Consequently patch 3/3 introduces them in the Allwinner H6 DRAM driver, which reduces the SPL size by a whopping 2KB, due to a twist: The AArch64 exception table needs to be 2KB aligned, but we don't do anything special about it the linker script. So depending on where the code before the vectors ends, we have potentially large padding: At the moment this last address is 0x1824 for the H6, so the vectors can only start at 0x2000. By reducing the code size before the vectors by just (at least) 9 instructions, the vectors start at 0x1800 and we save most of the padding.
I understand that the proper solution is to fill the gap before the vectors with code instead of NOPs, but I couldn't find any obvious way doing this in the linker script. If anyone has any idea here, I am all ears.
Cheers, Andre.
Andre Przywara (3): arm: clean up asm/io.h arm: introduce _relaxed MMIO accessors sunxi: H6: use writel_relaxed for DRAM timing register accesses
arch/arm/include/asm/io.h | 164 +++-------------------------------- arch/arm/mach-sunxi/dram_sun50i_h6.c | 79 +++++++++-------- 2 files changed, 54 insertions(+), 189 deletions(-)

asm/io.h is the header file containing the central MMIO accessor macros. Judging by the header and the comments, it was apparently once copied from the Linux kernel, but has deviated since then heavily.
Clean up the definitions by: - removing pointless Linux history - removing commented code - removing outdated comments - removing unused definitions, for instance for mem_isa and mem_pci
This massively improves the readability of the file.
Signed-off-by: Andre Przywara andre.przywara@arm.com --- arch/arm/include/asm/io.h | 154 +--------------------------------------------- 1 file changed, 2 insertions(+), 152 deletions(-)
diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h index 12bc7fbe06..bcbaf0d83c 100644 --- a/arch/arm/include/asm/io.h +++ b/arch/arm/include/asm/io.h @@ -1,44 +1,25 @@ /* - * linux/include/asm-arm/io.h + * I/O device access primitives. Based on early versions from the Linux kernel. * * Copyright (C) 1996-2000 Russell King * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. - * - * Modifications: - * 16-Sep-1996 RMK Inlined the inx/outx functions & optimised for both - * constant addresses and variable addresses. - * 04-Dec-1997 RMK Moved a lot of this stuff to the new architecture - * specific IO header files. - * 27-Mar-1999 PJB Second parameter of memcpy_toio is const.. - * 04-Apr-1999 PJB Added check_signature. - * 12-Dec-1999 RMK More cleanups - * 18-Jun-2000 RMK Removed virt_to_* and friends definitions */ #ifndef __ASM_ARM_IO_H #define __ASM_ARM_IO_H
-#ifdef __KERNEL__ - #include <linux/types.h> #include <asm/byteorder.h> #include <asm/memory.h> #include <asm/barriers.h> -#if 0 /* XXX###XXX */ -#include <asm/arch/hardware.h> -#endif /* XXX###XXX */
static inline void sync(void) { }
-/* - * Generic virtual read/write. Note that we don't support half-word - * read/writes. We define __arch_*[bl] here, and leave __arch_*w - * to the architecture specific code. - */ +/* Generic virtual read/write. */ #define __arch_getb(a) (*(volatile unsigned char *)(a)) #define __arch_getw(a) (*(volatile unsigned short *)(a)) #define __arch_getl(a) (*(volatile unsigned int *)(a)) @@ -205,13 +186,6 @@ static inline void __raw_readsl(unsigned long addr, void *data, int longlen) #define setbits_8(addr, set) setbits(8, addr, set) #define clrsetbits_8(addr, clear, set) clrsetbits(8, addr, clear, set)
-/* - * Now, pick up the machine-defined IO definitions - */ -#if 0 /* XXX###XXX */ -#include <asm/arch/io.h> -#endif /* XXX###XXX */ - /* * IO port access primitives * ------------------------- @@ -275,16 +249,6 @@ static inline void __raw_readsl(unsigned long addr, void *data, int longlen) #define writesb(a, d, s) __raw_writesb((unsigned long)a, d, s) #define readsb(a, d, s) __raw_readsb((unsigned long)a, d, s)
-/* - * DMA-consistent mapping functions. These allocate/free a region of - * uncached, unwrite-buffered mapped memory space for use with DMA - * devices. This is the "generic" version. The PCI specific version - * is in pci.h - */ -extern void *consistent_alloc(int gfp, size_t size, dma_addr_t *handle); -extern void consistent_free(void *vaddr, size_t size, dma_addr_t handle); -extern void consistent_sync(void *vaddr, size_t size, int rw); - /* * String version of IO memory access ops: */ @@ -292,124 +256,10 @@ extern void _memcpy_fromio(void *, unsigned long, size_t); extern void _memcpy_toio(unsigned long, const void *, size_t); extern void _memset_io(unsigned long, int, size_t);
-extern void __readwrite_bug(const char *fn); - -/* - * If this architecture has PCI memory IO, then define the read/write - * macros. These should only be used with the cookie passed from - * ioremap. - */ -#ifdef __mem_pci - -#define readb(c) ({ unsigned int __v = __raw_readb(__mem_pci(c)); __v; }) -#define readw(c) ({ unsigned int __v = le16_to_cpu(__raw_readw(__mem_pci(c))); __v; }) -#define readl(c) ({ unsigned int __v = le32_to_cpu(__raw_readl(__mem_pci(c))); __v; }) - -#define writeb(v,c) __raw_writeb(v,__mem_pci(c)) -#define writew(v,c) __raw_writew(cpu_to_le16(v),__mem_pci(c)) -#define writel(v,c) __raw_writel(cpu_to_le32(v),__mem_pci(c)) - -#define memset_io(c,v,l) _memset_io(__mem_pci(c),(v),(l)) -#define memcpy_fromio(a,c,l) _memcpy_fromio((a),__mem_pci(c),(l)) -#define memcpy_toio(c,a,l) _memcpy_toio(__mem_pci(c),(a),(l)) - -#define eth_io_copy_and_sum(s,c,l,b) \ - eth_copy_and_sum((s),__mem_pci(c),(l),(b)) - -static inline int -check_signature(unsigned long io_addr, const unsigned char *signature, - int length) -{ - int retval = 0; - do { - if (readb(io_addr) != *signature) - goto out; - io_addr++; - signature++; - length--; - } while (length); - retval = 1; -out: - return retval; -} - -#else #define memset_io(a, b, c) memset((void *)(a), (b), (c)) #define memcpy_fromio(a, b, c) memcpy((a), (void *)(b), (c)) #define memcpy_toio(a, b, c) memcpy((void *)(a), (b), (c))
-#if !defined(readb) - -#define readb(addr) (__readwrite_bug("readb"),0) -#define readw(addr) (__readwrite_bug("readw"),0) -#define readl(addr) (__readwrite_bug("readl"),0) -#define writeb(v,addr) __readwrite_bug("writeb") -#define writew(v,addr) __readwrite_bug("writew") -#define writel(v,addr) __readwrite_bug("writel") - -#define eth_io_copy_and_sum(a,b,c,d) __readwrite_bug("eth_io_copy_and_sum") - -#define check_signature(io,sig,len) (0) - -#endif -#endif /* __mem_pci */ - -/* - * If this architecture has ISA IO, then define the isa_read/isa_write - * macros. - */ -#ifdef __mem_isa - -#define isa_readb(addr) __raw_readb(__mem_isa(addr)) -#define isa_readw(addr) __raw_readw(__mem_isa(addr)) -#define isa_readl(addr) __raw_readl(__mem_isa(addr)) -#define isa_writeb(val,addr) __raw_writeb(val,__mem_isa(addr)) -#define isa_writew(val,addr) __raw_writew(val,__mem_isa(addr)) -#define isa_writel(val,addr) __raw_writel(val,__mem_isa(addr)) -#define isa_memset_io(a,b,c) _memset_io(__mem_isa(a),(b),(c)) -#define isa_memcpy_fromio(a,b,c) _memcpy_fromio((a),__mem_isa(b),(c)) -#define isa_memcpy_toio(a,b,c) _memcpy_toio(__mem_isa((a)),(b),(c)) - -#define isa_eth_io_copy_and_sum(a,b,c,d) \ - eth_copy_and_sum((a),__mem_isa(b),(c),(d)) - -static inline int -isa_check_signature(unsigned long io_addr, const unsigned char *signature, - int length) -{ - int retval = 0; - do { - if (isa_readb(io_addr) != *signature) - goto out; - io_addr++; - signature++; - length--; - } while (length); - retval = 1; -out: - return retval; -} - -#else /* __mem_isa */ - -#define isa_readb(addr) (__readwrite_bug("isa_readb"),0) -#define isa_readw(addr) (__readwrite_bug("isa_readw"),0) -#define isa_readl(addr) (__readwrite_bug("isa_readl"),0) -#define isa_writeb(val,addr) __readwrite_bug("isa_writeb") -#define isa_writew(val,addr) __readwrite_bug("isa_writew") -#define isa_writel(val,addr) __readwrite_bug("isa_writel") -#define isa_memset_io(a,b,c) __readwrite_bug("isa_memset_io") -#define isa_memcpy_fromio(a,b,c) __readwrite_bug("isa_memcpy_fromio") -#define isa_memcpy_toio(a,b,c) __readwrite_bug("isa_memcpy_toio") - -#define isa_eth_io_copy_and_sum(a,b,c,d) \ - __readwrite_bug("isa_eth_io_copy_and_sum") - -#define isa_check_signature(io,sig,len) (0) - -#endif /* __mem_isa */ -#endif /* __KERNEL__ */ - #include <asm-generic/io.h> #include <iotrace.h>

On 11.01.2019, at 01:31, Andre Przywara andre.przywara@arm.com wrote:
asm/io.h is the header file containing the central MMIO accessor macros. Judging by the header and the comments, it was apparently once copied from the Linux kernel, but has deviated since then heavily.
Clean up the definitions by:
- removing pointless Linux history
- removing commented code
- removing outdated comments
- removing unused definitions, for instance for mem_isa and mem_pci
This massively improves the readability of the file.
Signed-off-by: Andre Przywara andre.przywara@arm.com
Reviewed-by: Philipp Tomsich philipp.tomsich@theobroma-systems.com

The normal MMIO accessor macros (readX/writeX) guarantee a strong ordering, even with normal memory accesses [1]. For some MMIO operations (framebuffers being a prominent example) this is not needed, and the rather costly barrier inserted on weakly ordered systems like ARM costs some performance. To mitigate this issue, Linux introduced readX_relaxed and writeX_relaxed primitives, which only guarantee ordering between each other, so are typically faster due to the missing barrier.
We probably do not care so much about performance in U-Boot, but want to have those primitives for two other reasons: - Being able to use the _relaxed macros simplifies porting drivers from Linux. - The missing barrier typically allows to generate smaller code, which can relieve some chronically tight SPL builds.
Add those macros definitions by using the __raw versions of the accessors, which matches an earlier (and less complicated) version of the Linux implementation.
[1] https://lwn.net/Articles/698014/ --- arch/arm/include/asm/io.h | 10 ++++++++++ 1 file changed, 10 insertions(+)
diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h index bcbaf0d83c..5b24b88efc 100644 --- a/arch/arm/include/asm/io.h +++ b/arch/arm/include/asm/io.h @@ -98,11 +98,21 @@ static inline void __raw_readsl(unsigned long addr, void *data, int longlen) #define writel(v,c) ({ u32 __v = v; __iowmb(); __arch_putl(__v,c); __v; }) #define writeq(v,c) ({ u64 __v = v; __iowmb(); __arch_putq(__v,c); __v; })
+#define writeb_relaxed(v,c) __raw_writeb(v, c) +#define writew_relaxed(v,c) __raw_writew(v, c) +#define writel_relaxed(v,c) __raw_writel(v, c) +#define writeq_relaxed(v,c) __raw_writeq(v, c) + #define readb(c) ({ u8 __v = __arch_getb(c); __iormb(); __v; }) #define readw(c) ({ u16 __v = __arch_getw(c); __iormb(); __v; }) #define readl(c) ({ u32 __v = __arch_getl(c); __iormb(); __v; }) #define readq(c) ({ u64 __v = __arch_getq(c); __iormb(); __v; })
+#define readb_relaxed(c) __raw_readb(c) +#define readw_relaxed(c) __raw_readw(c) +#define readl_relaxed(c) __raw_readl(c) +#define readq_relaxed(c) __raw_readq(c) + /* * The compiler seems to be incapable of optimising constants * properly. Spell it out to the compiler in some cases.

On 11.01.2019, at 01:31, Andre Przywara andre.przywara@arm.com wrote:
The normal MMIO accessor macros (readX/writeX) guarantee a strong ordering, even with normal memory accesses [1]. For some MMIO operations (framebuffers being a prominent example) this is not needed, and the rather costly barrier inserted on weakly ordered systems like ARM costs some performance. To mitigate this issue, Linux introduced readX_relaxed and writeX_relaxed primitives, which only guarantee ordering between each other, so are typically faster due to the missing barrier.
We probably do not care so much about performance in U-Boot, but want to have those primitives for two other reasons:
- Being able to use the _relaxed macros simplifies porting drivers from
Linux.
- The missing barrier typically allows to generate smaller code, which can
relieve some chronically tight SPL builds.
Add those macros definitions by using the __raw versions of the accessors, which matches an earlier (and less complicated) version of the Linux implementation.
No Signed-off-by?
Reviewed-by: Philipp Tomsich philipp.tomsich@theobroma-systems.com [On my experimental RK3399 after modifying a few drivers:] Tested-by: Philipp Tomsich philipp.tomsich@theobroma-systems.com

On 06/02/2019 12:46, Philipp Tomsich wrote:
On 11.01.2019, at 01:31, Andre Przywara andre.przywara@arm.com wrote:
Hi,
The normal MMIO accessor macros (readX/writeX) guarantee a strong ordering, even with normal memory accesses [1]. For some MMIO operations (framebuffers being a prominent example) this is not needed, and the rather costly barrier inserted on weakly ordered systems like ARM costs some performance. To mitigate this issue, Linux introduced readX_relaxed and writeX_relaxed primitives, which only guarantee ordering between each other, so are typically faster due to the missing barrier.
We probably do not care so much about performance in U-Boot, but want to have those primitives for two other reasons:
- Being able to use the _relaxed macros simplifies porting drivers from
Linux.
- The missing barrier typically allows to generate smaller code, which can
relieve some chronically tight SPL builds.
Add those macros definitions by using the __raw versions of the accessors, which matches an earlier (and less complicated) version of the Linux implementation.
No Signed-off-by?
Doh, indeed. Got so excited about my commit message, that I forgot the obvious (and I only think about -s *after* hitting Enter).
Reviewed-by: Philipp Tomsich philipp.tomsich@theobroma-systems.com [On my experimental RK3399 after modifying a few drivers:] Tested-by: Philipp Tomsich philipp.tomsich@theobroma-systems.com
Cool, many thanks for that! Out of curiosity, did you really need *_relaxed for some reason?
Cheers, Andre.

On 06.02.2019, at 22:53, André Przywara andre.przywara@arm.com wrote:
On 06/02/2019 12:46, Philipp Tomsich wrote:
On 11.01.2019, at 01:31, Andre Przywara <andre.przywara@arm.com mailto:andre.przywara@arm.com> wrote:
Hi,
The normal MMIO accessor macros (readX/writeX) guarantee a strong ordering, even with normal memory accesses [1]. For some MMIO operations (framebuffers being a prominent example) this is not needed, and the rather costly barrier inserted on weakly ordered systems like ARM costs some performance. To mitigate this issue, Linux introduced readX_relaxed and writeX_relaxed primitives, which only guarantee ordering between each other, so are typically faster due to the missing barrier.
We probably do not care so much about performance in U-Boot, but want to have those primitives for two other reasons:
- Being able to use the _relaxed macros simplifies porting drivers from
Linux.
- The missing barrier typically allows to generate smaller code, which can
relieve some chronically tight SPL builds.
Add those macros definitions by using the __raw versions of the accessors, which matches an earlier (and less complicated) version of the Linux implementation.
No Signed-off-by?
Doh, indeed. Got so excited about my commit message, that I forgot the obvious (and I only think about -s *after* hitting Enter).
Reviewed-by: Philipp Tomsich <philipp.tomsich@theobroma-systems.com mailto:philipp.tomsich@theobroma-systems.com> [On my experimental RK3399 after modifying a few drivers:] Tested-by: Philipp Tomsich <philipp.tomsich@theobroma-systems.com mailto:philipp.tomsich@theobroma-systems.com>
Cool, many thanks for that! Out of curiosity, did you really need *_relaxed for some reason?
Not yet, but possibly soon. I have been working on some changes to the way we auto-configure for different DRAM timings (effectively doing something similar to how DIMMs are detected using SPDs)… and have been fighting code-size growth on that front. As I have to set up timings for 3 frequencies on the 3399, the amount of dmb()s add up from the strongly ordered writel and clrsetbits calls.
Cheers, Phil.

The timing registers in the DRAM controller can be programmed in any order, as they will only take effect once the controller is eventually "activated".
Switch the MMIO writes in mctl_set_timing_lpddr3() over to use writel_relaxed(), since we don't need the stronger guarantee of the normal writel(). We satisfy the overall ordering requirement by ending the function with an explicit DMB barrier.
In this case we are not interested in the performance benefit this usually gives, but in the saved instructions, which sum up for the many writes we have in the timing setup. Due to alignment effects this shrinks our chronically tight H6 SPL by a whopping 2KB, which brings it in the same region as for the other AArch64 Allwinner SPL builds.
Signed-off-by: Andre Przywara andre.przywara@arm.com --- arch/arm/mach-sunxi/dram_sun50i_h6.c | 79 +++++++++++++++++++----------------- 1 file changed, 42 insertions(+), 37 deletions(-)
diff --git a/arch/arm/mach-sunxi/dram_sun50i_h6.c b/arch/arm/mach-sunxi/dram_sun50i_h6.c index 5da90a2835..84a33a63d6 100644 --- a/arch/arm/mach-sunxi/dram_sun50i_h6.c +++ b/arch/arm/mach-sunxi/dram_sun50i_h6.c @@ -241,51 +241,55 @@ static void mctl_set_timing_lpddr3(struct dram_para *para) memcpy(mctl_phy->mr, mr_lpddr3, sizeof(mr_lpddr3));
/* set DRAM timing */ - writel((twtp << 24) | (tfaw << 16) | (trasmax << 8) | tras, - &mctl_ctl->dramtmg[0]); - writel((txp << 16) | (trtp << 8) | trc, &mctl_ctl->dramtmg[1]); - writel((tcwl << 24) | (tcl << 16) | (trd2wr << 8) | twr2rd, - &mctl_ctl->dramtmg[2]); - writel((tmrw << 20) | (tmrd << 12) | tmod, &mctl_ctl->dramtmg[3]); - writel((trcd << 24) | (tccd << 16) | (trrd << 8) | trp, - &mctl_ctl->dramtmg[4]); - writel((tcksrx << 24) | (tcksre << 16) | (tckesr << 8) | tcke, - &mctl_ctl->dramtmg[5]); + writel_relaxed((twtp << 24) | (tfaw << 16) | (trasmax << 8) | tras, + &mctl_ctl->dramtmg[0]); + writel_relaxed((txp << 16) | (trtp << 8) | trc, &mctl_ctl->dramtmg[1]); + writel_relaxed((tcwl << 24) | (tcl << 16) | (trd2wr << 8) | twr2rd, + &mctl_ctl->dramtmg[2]); + writel_relaxed((tmrw << 20) | (tmrd << 12) | tmod, + &mctl_ctl->dramtmg[3]); + writel_relaxed((trcd << 24) | (tccd << 16) | (trrd << 8) | trp, + &mctl_ctl->dramtmg[4]); + writel_relaxed((tcksrx << 24) | (tcksre << 16) | (tckesr << 8) | tcke, + &mctl_ctl->dramtmg[5]); /* Value suggested by ZynqMP manual and used by libdram */ - writel((txp + 2) | 0x02020000, &mctl_ctl->dramtmg[6]); - writel((txsfast << 24) | (txsabort << 16) | (txsdll << 8) | txs, - &mctl_ctl->dramtmg[8]); - writel(txsr, &mctl_ctl->dramtmg[14]); + writel_relaxed((txp + 2) | 0x02020000, &mctl_ctl->dramtmg[6]); + writel_relaxed((txsfast << 24) | (txsabort << 16) | (txsdll << 8) | txs, + &mctl_ctl->dramtmg[8]); + writel_relaxed(txsr, &mctl_ctl->dramtmg[14]);
clrsetbits_le32(&mctl_ctl->init[0], (3 << 30), (1 << 30)); - writel(0, &mctl_ctl->dfimisc); + writel_relaxed(0, &mctl_ctl->dfimisc); clrsetbits_le32(&mctl_ctl->rankctl, 0xff0, 0x660);
/* * Set timing registers of the PHY. * Note: the PHY is clocked 2x from the DRAM frequency. */ - writel((trrd << 25) | (tras << 17) | (trp << 9) | (trtp << 1), + writel_relaxed((trrd << 25) | (tras << 17) | (trp << 9) | (trtp << 1), &mctl_phy->dtpr[0]); - writel((tfaw << 17) | 0x28000400 | (tmrd << 1), &mctl_phy->dtpr[1]); - writel(((txs << 6) - 1) | (tcke << 17), &mctl_phy->dtpr[2]); - writel(((txsdll << 22) - (0x1 << 16)) | twtr_sa | (tcksrea << 8), - &mctl_phy->dtpr[3]); - writel((txp << 1) | (trfc << 17) | 0x800, &mctl_phy->dtpr[4]); - writel((trc << 17) | (trcd << 9) | (twtr << 1), &mctl_phy->dtpr[5]); - writel(0x0505, &mctl_phy->dtpr[6]); + writel_relaxed((tfaw << 17) | 0x28000400 | (tmrd << 1), + &mctl_phy->dtpr[1]); + writel_relaxed(((txs << 6) - 1) | (tcke << 17), &mctl_phy->dtpr[2]); + writel_relaxed(((txsdll << 22) - (0x1 << 16)) | twtr_sa | + (tcksrea << 8), &mctl_phy->dtpr[3]); + writel_relaxed((txp << 1) | (trfc << 17) | 0x800, &mctl_phy->dtpr[4]); + writel_relaxed((trc << 17) | (trcd << 9) | (twtr << 1), + &mctl_phy->dtpr[5]); + writel_relaxed(0x0505, &mctl_phy->dtpr[6]);
/* Configure DFI timing */ - writel(tcl | 0x2000200 | (t_rdata_en << 16) | 0x808000, - &mctl_ctl->dfitmg0); - writel(0x040201, &mctl_ctl->dfitmg1); + writel_relaxed(tcl | 0x2000200 | (t_rdata_en << 16) | 0x808000, + &mctl_ctl->dfitmg0); + writel_relaxed(0x040201, &mctl_ctl->dfitmg1);
/* Configure PHY timing */ - writel(tdinit0 | (tdinit1 << 20), &mctl_phy->ptr[3]); - writel(tdinit2 | (tdinit3 << 18), &mctl_phy->ptr[4]); + writel_relaxed(tdinit0 | (tdinit1 << 20), &mctl_phy->ptr[3]); + writel_relaxed(tdinit2 | (tdinit3 << 18), &mctl_phy->ptr[4]);
/* set refresh timing */ - writel((trefi << 16) | trfc, &mctl_ctl->rfshtmg); + writel_relaxed((trefi << 16) | trfc, &mctl_ctl->rfshtmg); + DMB; }
static void mctl_sys_init(struct dram_para *para) @@ -476,17 +480,17 @@ static void mctl_bit_delay_set(struct dram_para *para) val = readl(&mctl_phy->dx[i].bdlr0); for (j = 0; j < 4; j++) val += para->dx_write_delays[i][j] << (j * 8); - writel(val, &mctl_phy->dx[i].bdlr0); + writel_relaxed(val, &mctl_phy->dx[i].bdlr0);
val = readl(&mctl_phy->dx[i].bdlr1); for (j = 0; j < 4; j++) val += para->dx_write_delays[i][j + 4] << (j * 8); - writel(val, &mctl_phy->dx[i].bdlr1); + writel_relaxed(val, &mctl_phy->dx[i].bdlr1);
val = readl(&mctl_phy->dx[i].bdlr2); for (j = 0; j < 4; j++) val += para->dx_write_delays[i][j + 8] << (j * 8); - writel(val, &mctl_phy->dx[i].bdlr2); + writel_relaxed(val, &mctl_phy->dx[i].bdlr2); } clrbits_le32(&mctl_phy->pgcr[0], BIT(26));
@@ -494,22 +498,22 @@ static void mctl_bit_delay_set(struct dram_para *para) val = readl(&mctl_phy->dx[i].bdlr3); for (j = 0; j < 4; j++) val += para->dx_read_delays[i][j] << (j * 8); - writel(val, &mctl_phy->dx[i].bdlr3); + writel_relaxed(val, &mctl_phy->dx[i].bdlr3);
val = readl(&mctl_phy->dx[i].bdlr4); for (j = 0; j < 4; j++) val += para->dx_read_delays[i][j + 4] << (j * 8); - writel(val, &mctl_phy->dx[i].bdlr4); + writel_relaxed(val, &mctl_phy->dx[i].bdlr4);
val = readl(&mctl_phy->dx[i].bdlr5); for (j = 0; j < 4; j++) val += para->dx_read_delays[i][j + 8] << (j * 8); - writel(val, &mctl_phy->dx[i].bdlr5); + writel_relaxed(val, &mctl_phy->dx[i].bdlr5);
val = readl(&mctl_phy->dx[i].bdlr6); val += (para->dx_read_delays[i][12] << 8) | (para->dx_read_delays[i][13] << 16); - writel(val, &mctl_phy->dx[i].bdlr6); + writel_relaxed(val, &mctl_phy->dx[i].bdlr6); } setbits_le32(&mctl_phy->pgcr[0], BIT(26)); udelay(1); @@ -517,8 +521,9 @@ static void mctl_bit_delay_set(struct dram_para *para) for (i = 1; i < 14; i++) { val = readl(&mctl_phy->acbdlr[i]); val += 0x0a0a0a0a; - writel(val, &mctl_phy->acbdlr[i]); + writel_relaxed(val, &mctl_phy->acbdlr[i]); } + DMB; }
static void mctl_channel_init(struct dram_para *para)
participants (3)
-
Andre Przywara
-
André Przywara
-
Philipp Tomsich