[PATCH 0/4] sunxi: DRAM: H6: fixes and size reduction

Hi,
this contains two fixes for the Allwinner H6 DRAM code: patch 1/4 adds a DSB barrier instruction after the actual DRAM register setup, to make sure that subsequent DRAM accesses actually match the just programmed setup. The last patch makes sure the compiler does not optimise away the MMIO writes for the address map setup. The middle two patches help to bring the code size down (which would increase with just patch 4/4): by splitting the parameter struct into a constant and a dynamic part, we help the compiler with its constant propagation optimisation, so it can fold some parameters directly into the code. This helps with the notoriously tight H6 SPL.
Gunjan, please have a look at my version of your original OPi 3 LTS fix patch 1/4: Does that still work reliably? If not, can you add your "udelay(50);" right after this new DSB? And maybe experiment with the duration a bit?
Cheers, Andre
Andre Przywara (3): sunxi: DRAM: H6: const-ify DRAM function parameters sunxi: DRAM: H6: split struct dram_para sunxi: DRAM: H6: use proper MMIO accessors in mctl_set_addrmap()
Gunjan Gupta (1): sunxi: DRAM: H6: add barrier after finishing DRAM setup
.../include/asm/arch-sunxi/dram_sun50i_h6.h | 10 +- arch/arm/mach-sunxi/dram_sun50i_h6.c | 243 ++++++++++-------- .../mach-sunxi/dram_timings/h6_ddr3_1333.c | 2 +- arch/arm/mach-sunxi/dram_timings/h6_lpddr3.c | 2 +- 4 files changed, 138 insertions(+), 119 deletions(-)

From: Gunjan Gupta viraniac@gmail.com
During the DRAM controller setup, we program its registers for certain configurations (multiple times), then try to access the DRAM array, to detect the number of rows and columns in the used DRAM chips. This requires that all MMIO writes have reached the DRAM controller, before we actually access the DRAM. Add a DSB instruction at the end of the controller init function, that ensures that outstanding stores have been completed.
That hopefully fixes occasional DRAM init failures on some H6 boards like the Orange Pi 3 LTS, where this leads to the erroneous detection of 4GB instead of the actual 2GB.
Signed-off-by: Gunjan Gupta viraniac@gmail.com [Andre: move DSB, add comment] Signed-off-by: Andre Przywara andre.przywara@arm.com --- arch/arm/mach-sunxi/dram_sun50i_h6.c | 6 ++++++ 1 file changed, 6 insertions(+)
diff --git a/arch/arm/mach-sunxi/dram_sun50i_h6.c b/arch/arm/mach-sunxi/dram_sun50i_h6.c index bff2e42513c..43a2d19f084 100644 --- a/arch/arm/mach-sunxi/dram_sun50i_h6.c +++ b/arch/arm/mach-sunxi/dram_sun50i_h6.c @@ -554,6 +554,12 @@ static bool mctl_channel_init(struct dram_para *para) writel(0x7ff, &mctl_com->maer1); writel(0xffff, &mctl_com->maer2);
+ /* + * Make sure all MMIO writes are committed to the DRAM controller, + * so that accesses to the DRAM array adhere to the above programming. + */ + dsb(); + return true; }

Tested the patch. With only dsb, it hangs within 5 reboots itself. Adding udelay(50) moved it to once in 20 times. But it still hangs. So this patch does not solve the problem
Thanks & Regards Gunjan Gupta

There are quite some functions in the Allwinner H6 DRAM "driver", some of them actually change the parameters in the structure passed to them, but many are actually not.
To increase the optimisation potential for the code, mark those functions that just read members of the passed dram_para struct as "const". Use the opportunity to avoid the forward declarations by moving the mctl_core_init() function.
This in itself does not decrease the code size, but lays the groundwork for future changes doing so.
Signed-off-by: Andre Przywara andre.przywara@arm.com --- .../include/asm/arch-sunxi/dram_sun50i_h6.h | 6 +-- arch/arm/mach-sunxi/dram_sun50i_h6.c | 48 +++++++++---------- .../mach-sunxi/dram_timings/h6_ddr3_1333.c | 2 +- arch/arm/mach-sunxi/dram_timings/h6_lpddr3.c | 2 +- 4 files changed, 27 insertions(+), 31 deletions(-)
diff --git a/arch/arm/include/asm/arch-sunxi/dram_sun50i_h6.h b/arch/arm/include/asm/arch-sunxi/dram_sun50i_h6.h index be02655cdd5..a7c6435220f 100644 --- a/arch/arm/include/asm/arch-sunxi/dram_sun50i_h6.h +++ b/arch/arm/include/asm/arch-sunxi/dram_sun50i_h6.h @@ -313,8 +313,8 @@ check_member(sunxi_mctl_phy_reg, dx[3].reserved_0xf0, 0xaf0); */ #define RD_LINES_PER_BYTE_LANE (BITS_PER_BYTE + 6) struct dram_para { - u32 clk; - enum sunxi_dram_type type; + const u32 clk; + const enum sunxi_dram_type type; u8 cols; u8 rows; u8 ranks; @@ -331,6 +331,6 @@ static inline int ns_to_t(int nanoseconds) return DIV_ROUND_UP(ctrl_freq * nanoseconds, 1000); }
-void mctl_set_timing_params(struct dram_para *para); +void mctl_set_timing_params(const struct dram_para *para);
#endif /* _SUNXI_DRAM_SUN50I_H6_H */ diff --git a/arch/arm/mach-sunxi/dram_sun50i_h6.c b/arch/arm/mach-sunxi/dram_sun50i_h6.c index 43a2d19f084..1187f1960a0 100644 --- a/arch/arm/mach-sunxi/dram_sun50i_h6.c +++ b/arch/arm/mach-sunxi/dram_sun50i_h6.c @@ -36,25 +36,6 @@ * similar PHY is ZynqMP. */
-static void mctl_sys_init(struct dram_para *para); -static void mctl_com_init(struct dram_para *para); -static bool mctl_channel_init(struct dram_para *para); - -static bool mctl_core_init(struct dram_para *para) -{ - mctl_sys_init(para); - mctl_com_init(para); - switch (para->type) { - case SUNXI_DRAM_TYPE_LPDDR3: - case SUNXI_DRAM_TYPE_DDR3: - mctl_set_timing_params(para); - break; - default: - panic("Unsupported DRAM type!"); - }; - return mctl_channel_init(para); -} - /* PHY initialisation */ static void mctl_phy_pir_init(u32 val) { @@ -152,7 +133,7 @@ static void mctl_set_master_priority(void) MBUS_CONF(HDCP2, true, HIGH, 2, 100, 64, 32); }
-static void mctl_sys_init(struct dram_para *para) +static void mctl_sys_init(u32 clk_rate) { struct sunxi_ccm_reg * const ccm = (struct sunxi_ccm_reg *)SUNXI_CCM_BASE; @@ -173,7 +154,7 @@ static void mctl_sys_init(struct dram_para *para)
/* Set PLL5 rate to doubled DRAM clock rate */ writel(CCM_PLL5_CTRL_EN | CCM_PLL5_LOCK_EN | - CCM_PLL5_CTRL_N(para->clk * 2 / 24), &ccm->pll5_cfg); + CCM_PLL5_CTRL_N(clk_rate * 2 / 24), &ccm->pll5_cfg); mctl_await_completion(&ccm->pll5_cfg, CCM_PLL5_LOCK, CCM_PLL5_LOCK);
/* Configure DRAM mod clock */ @@ -198,7 +179,7 @@ static void mctl_sys_init(struct dram_para *para) writel(0x8000, &mctl_ctl->unk_0x00c); }
-static void mctl_set_addrmap(struct dram_para *para) +static void mctl_set_addrmap(const struct dram_para *para) { struct sunxi_mctl_ctl_reg * const mctl_ctl = (struct sunxi_mctl_ctl_reg *)SUNXI_DRAM_CTL0_BASE; @@ -284,7 +265,7 @@ static void mctl_set_addrmap(struct dram_para *para) mctl_ctl->addrmap[8] = 0x3F3F; }
-static void mctl_com_init(struct dram_para *para) +static void mctl_com_init(const struct dram_para *para) { struct sunxi_mctl_com_reg * const mctl_com = (struct sunxi_mctl_com_reg *)SUNXI_DRAM_COM_BASE; @@ -354,7 +335,7 @@ static void mctl_com_init(struct dram_para *para) } }
-static void mctl_bit_delay_set(struct dram_para *para) +static void mctl_bit_delay_set(const struct dram_para *para) { struct sunxi_mctl_phy_reg * const mctl_phy = (struct sunxi_mctl_phy_reg *)SUNXI_DRAM_PHY0_BASE; @@ -413,7 +394,7 @@ static void mctl_bit_delay_set(struct dram_para *para) } }
-static bool mctl_channel_init(struct dram_para *para) +static bool mctl_channel_init(const struct dram_para *para) { struct sunxi_mctl_com_reg * const mctl_com = (struct sunxi_mctl_com_reg *)SUNXI_DRAM_COM_BASE; @@ -563,6 +544,21 @@ static bool mctl_channel_init(struct dram_para *para) return true; }
+static bool mctl_core_init(const struct dram_para *para) +{ + mctl_sys_init(para->clk); + mctl_com_init(para); + switch (para->type) { + case SUNXI_DRAM_TYPE_LPDDR3: + case SUNXI_DRAM_TYPE_DDR3: + mctl_set_timing_params(para); + break; + default: + panic("Unsupported DRAM type!"); + }; + return mctl_channel_init(para); +} + static void mctl_auto_detect_rank_width(struct dram_para *para) { /* this is minimum size that it's supported */ @@ -637,7 +633,7 @@ static void mctl_auto_detect_dram_size(struct dram_para *para) } }
-unsigned long mctl_calc_size(struct dram_para *para) +unsigned long mctl_calc_size(const struct dram_para *para) { u8 width = para->bus_full_width ? 4 : 2;
diff --git a/arch/arm/mach-sunxi/dram_timings/h6_ddr3_1333.c b/arch/arm/mach-sunxi/dram_timings/h6_ddr3_1333.c index 2136ca3a4cb..6dde6e78717 100644 --- a/arch/arm/mach-sunxi/dram_timings/h6_ddr3_1333.c +++ b/arch/arm/mach-sunxi/dram_timings/h6_ddr3_1333.c @@ -38,7 +38,7 @@ static u32 mr_ddr3[7] = { };
/* TODO: flexible timing */ -void mctl_set_timing_params(struct dram_para *para) +void mctl_set_timing_params(const struct dram_para *para) { struct sunxi_mctl_ctl_reg * const mctl_ctl = (struct sunxi_mctl_ctl_reg *)SUNXI_DRAM_CTL0_BASE; diff --git a/arch/arm/mach-sunxi/dram_timings/h6_lpddr3.c b/arch/arm/mach-sunxi/dram_timings/h6_lpddr3.c index 10008601134..2a95484322c 100644 --- a/arch/arm/mach-sunxi/dram_timings/h6_lpddr3.c +++ b/arch/arm/mach-sunxi/dram_timings/h6_lpddr3.c @@ -17,7 +17,7 @@ static u32 mr_lpddr3[12] = { };
/* TODO: flexible timing */ -void mctl_set_timing_params(struct dram_para *para) +void mctl_set_timing_params(const struct dram_para *para) { struct sunxi_mctl_ctl_reg * const mctl_ctl = (struct sunxi_mctl_ctl_reg *)SUNXI_DRAM_CTL0_BASE;

On Saturday, October 21, 2023 3:10:23 AM CEST Andre Przywara wrote:
There are quite some functions in the Allwinner H6 DRAM "driver", some of them actually change the parameters in the structure passed to them, but many are actually not.
To increase the optimisation potential for the code, mark those functions that just read members of the passed dram_para struct as "const". Use the opportunity to avoid the forward declarations by moving the mctl_core_init() function.
This in itself does not decrease the code size, but lays the groundwork for future changes doing so.
Signed-off-by: Andre Przywara andre.przywara@arm.com
Reviewed-by: Jernej Skrabec jernej.skrabec@gmail.com
Best regards, Jernej
.../include/asm/arch-sunxi/dram_sun50i_h6.h | 6 +-- arch/arm/mach-sunxi/dram_sun50i_h6.c | 48 +++++++++---------- .../mach-sunxi/dram_timings/h6_ddr3_1333.c | 2 +- arch/arm/mach-sunxi/dram_timings/h6_lpddr3.c | 2 +- 4 files changed, 27 insertions(+), 31 deletions(-)
diff --git a/arch/arm/include/asm/arch-sunxi/dram_sun50i_h6.h b/arch/arm/include/asm/arch-sunxi/dram_sun50i_h6.h index be02655cdd5..a7c6435220f 100644 --- a/arch/arm/include/asm/arch-sunxi/dram_sun50i_h6.h +++ b/arch/arm/include/asm/arch-sunxi/dram_sun50i_h6.h @@ -313,8 +313,8 @@ check_member(sunxi_mctl_phy_reg, dx[3].reserved_0xf0, 0xaf0); */ #define RD_LINES_PER_BYTE_LANE (BITS_PER_BYTE + 6) struct dram_para {
- u32 clk;
- enum sunxi_dram_type type;
- const u32 clk;
- const enum sunxi_dram_type type; u8 cols; u8 rows; u8 ranks;
@@ -331,6 +331,6 @@ static inline int ns_to_t(int nanoseconds) return DIV_ROUND_UP(ctrl_freq * nanoseconds, 1000); }
-void mctl_set_timing_params(struct dram_para *para); +void mctl_set_timing_params(const struct dram_para *para);
#endif /* _SUNXI_DRAM_SUN50I_H6_H */ diff --git a/arch/arm/mach-sunxi/dram_sun50i_h6.c b/arch/arm/mach-sunxi/dram_sun50i_h6.c index 43a2d19f084..1187f1960a0 100644 --- a/arch/arm/mach-sunxi/dram_sun50i_h6.c +++ b/arch/arm/mach-sunxi/dram_sun50i_h6.c @@ -36,25 +36,6 @@
- similar PHY is ZynqMP.
*/
-static void mctl_sys_init(struct dram_para *para); -static void mctl_com_init(struct dram_para *para); -static bool mctl_channel_init(struct dram_para *para);
-static bool mctl_core_init(struct dram_para *para) -{
- mctl_sys_init(para);
- mctl_com_init(para);
- switch (para->type) {
- case SUNXI_DRAM_TYPE_LPDDR3:
- case SUNXI_DRAM_TYPE_DDR3:
mctl_set_timing_params(para);
break;
- default:
panic("Unsupported DRAM type!");
- };
- return mctl_channel_init(para);
-}
/* PHY initialisation */ static void mctl_phy_pir_init(u32 val) { @@ -152,7 +133,7 @@ static void mctl_set_master_priority(void) MBUS_CONF(HDCP2, true, HIGH, 2, 100, 64, 32); }
-static void mctl_sys_init(struct dram_para *para) +static void mctl_sys_init(u32 clk_rate) { struct sunxi_ccm_reg * const ccm = (struct sunxi_ccm_reg *)SUNXI_CCM_BASE; @@ -173,7 +154,7 @@ static void mctl_sys_init(struct dram_para *para)
/* Set PLL5 rate to doubled DRAM clock rate */ writel(CCM_PLL5_CTRL_EN | CCM_PLL5_LOCK_EN |
CCM_PLL5_CTRL_N(para->clk * 2 / 24), &ccm->pll5_cfg);
mctl_await_completion(&ccm->pll5_cfg, CCM_PLL5_LOCK,CCM_PLL5_CTRL_N(clk_rate * 2 / 24), &ccm->pll5_cfg);
CCM_PLL5_LOCK);
/* Configure DRAM mod clock */ @@ -198,7 +179,7 @@ static void mctl_sys_init(struct dram_para *para) writel(0x8000, &mctl_ctl->unk_0x00c); }
-static void mctl_set_addrmap(struct dram_para *para) +static void mctl_set_addrmap(const struct dram_para *para) { struct sunxi_mctl_ctl_reg * const mctl_ctl = (struct sunxi_mctl_ctl_reg
*)SUNXI_DRAM_CTL0_BASE;
@@ -284,7 +265,7 @@ static void mctl_set_addrmap(struct dram_para *para) mctl_ctl->addrmap[8] = 0x3F3F; }
-static void mctl_com_init(struct dram_para *para) +static void mctl_com_init(const struct dram_para *para) { struct sunxi_mctl_com_reg * const mctl_com = (struct sunxi_mctl_com_reg
*)SUNXI_DRAM_COM_BASE;
@@ -354,7 +335,7 @@ static void mctl_com_init(struct dram_para *para) } }
-static void mctl_bit_delay_set(struct dram_para *para) +static void mctl_bit_delay_set(const struct dram_para *para) { struct sunxi_mctl_phy_reg * const mctl_phy = (struct sunxi_mctl_phy_reg
*)SUNXI_DRAM_PHY0_BASE;
@@ -413,7 +394,7 @@ static void mctl_bit_delay_set(struct dram_para *para) } }
-static bool mctl_channel_init(struct dram_para *para) +static bool mctl_channel_init(const struct dram_para *para) { struct sunxi_mctl_com_reg * const mctl_com = (struct sunxi_mctl_com_reg
*)SUNXI_DRAM_COM_BASE;
@@ -563,6 +544,21 @@ static bool mctl_channel_init(struct dram_para *para) return true; }
+static bool mctl_core_init(const struct dram_para *para) +{
- mctl_sys_init(para->clk);
- mctl_com_init(para);
- switch (para->type) {
- case SUNXI_DRAM_TYPE_LPDDR3:
- case SUNXI_DRAM_TYPE_DDR3:
mctl_set_timing_params(para);
break;
- default:
panic("Unsupported DRAM type!");
- };
- return mctl_channel_init(para);
+}
static void mctl_auto_detect_rank_width(struct dram_para *para) { /* this is minimum size that it's supported */ @@ -637,7 +633,7 @@ static void mctl_auto_detect_dram_size(struct dram_para *para) } }
-unsigned long mctl_calc_size(struct dram_para *para) +unsigned long mctl_calc_size(const struct dram_para *para) { u8 width = para->bus_full_width ? 4 : 2;
diff --git a/arch/arm/mach-sunxi/dram_timings/h6_ddr3_1333.c b/arch/arm/mach-sunxi/dram_timings/h6_ddr3_1333.c index 2136ca3a4cb..6dde6e78717 100644 --- a/arch/arm/mach-sunxi/dram_timings/h6_ddr3_1333.c +++ b/arch/arm/mach-sunxi/dram_timings/h6_ddr3_1333.c @@ -38,7 +38,7 @@ static u32 mr_ddr3[7] = { };
/* TODO: flexible timing */ -void mctl_set_timing_params(struct dram_para *para) +void mctl_set_timing_params(const struct dram_para *para) { struct sunxi_mctl_ctl_reg * const mctl_ctl = (struct sunxi_mctl_ctl_reg
*)SUNXI_DRAM_CTL0_BASE;
diff --git a/arch/arm/mach-sunxi/dram_timings/h6_lpddr3.c b/arch/arm/mach-sunxi/dram_timings/h6_lpddr3.c index 10008601134..2a95484322c 100644 --- a/arch/arm/mach-sunxi/dram_timings/h6_lpddr3.c +++ b/arch/arm/mach-sunxi/dram_timings/h6_lpddr3.c @@ -17,7 +17,7 @@ static u32 mr_lpddr3[12] = { };
/* TODO: flexible timing */ -void mctl_set_timing_params(struct dram_para *para) +void mctl_set_timing_params(const struct dram_para *para) { struct sunxi_mctl_ctl_reg * const mctl_ctl = (struct sunxi_mctl_ctl_reg
*)SUNXI_DRAM_CTL0_BASE;

Currently there is one DRAM parameter struct for the Allwinner H6 DRAM "driver". It contains many fields that are compile time constants (set by Kconfig variables), though there are also some fields that are probed and changed over the runtime of the DRAM initialisation.
Because of this mixture, the compiler cannot properly optimise the code for size, as it does not consider constant propagation in its full potential.
Help the compiler out by splitting that structure into two: one that only contains values known at compile time, and another one where the values will actually change. The former can then be declared "const", which will let the compiler fold its values directly into the code using it. To facilitate this, the definition of the struct has to become "static const" outside of any functions, so move that part out of sunxi_dram_init().
That results in code savings around 500 bytes, which helps the notorously tight H6 SPL to stay within it current limit.
Signed-off-by: Andre Przywara andre.przywara@arm.com --- .../include/asm/arch-sunxi/dram_sun50i_h6.h | 12 +- arch/arm/mach-sunxi/dram_sun50i_h6.c | 134 +++++++++--------- 2 files changed, 77 insertions(+), 69 deletions(-)
diff --git a/arch/arm/include/asm/arch-sunxi/dram_sun50i_h6.h b/arch/arm/include/asm/arch-sunxi/dram_sun50i_h6.h index a7c6435220f..058f2cabb6e 100644 --- a/arch/arm/include/asm/arch-sunxi/dram_sun50i_h6.h +++ b/arch/arm/include/asm/arch-sunxi/dram_sun50i_h6.h @@ -313,17 +313,19 @@ check_member(sunxi_mctl_phy_reg, dx[3].reserved_0xf0, 0xaf0); */ #define RD_LINES_PER_BYTE_LANE (BITS_PER_BYTE + 6) struct dram_para { - const u32 clk; - const enum sunxi_dram_type type; + u32 clk; + enum sunxi_dram_type type; + u8 dx_read_delays[NR_OF_BYTE_LANES][RD_LINES_PER_BYTE_LANE]; + u8 dx_write_delays[NR_OF_BYTE_LANES][WR_LINES_PER_BYTE_LANE]; +}; + +struct dram_config { u8 cols; u8 rows; u8 ranks; u8 bus_full_width; - const u8 dx_read_delays[NR_OF_BYTE_LANES][RD_LINES_PER_BYTE_LANE]; - const u8 dx_write_delays[NR_OF_BYTE_LANES][WR_LINES_PER_BYTE_LANE]; };
- static inline int ns_to_t(int nanoseconds) { const unsigned int ctrl_freq = CONFIG_DRAM_CLK / 2; diff --git a/arch/arm/mach-sunxi/dram_sun50i_h6.c b/arch/arm/mach-sunxi/dram_sun50i_h6.c index 1187f1960a0..8e959f4c600 100644 --- a/arch/arm/mach-sunxi/dram_sun50i_h6.c +++ b/arch/arm/mach-sunxi/dram_sun50i_h6.c @@ -179,15 +179,15 @@ static void mctl_sys_init(u32 clk_rate) writel(0x8000, &mctl_ctl->unk_0x00c); }
-static void mctl_set_addrmap(const struct dram_para *para) +static void mctl_set_addrmap(const struct dram_config *config) { struct sunxi_mctl_ctl_reg * const mctl_ctl = (struct sunxi_mctl_ctl_reg *)SUNXI_DRAM_CTL0_BASE; - u8 cols = para->cols; - u8 rows = para->rows; - u8 ranks = para->ranks; + u8 cols = config->cols; + u8 rows = config->rows; + u8 ranks = config->ranks;
- if (!para->bus_full_width) + if (!config->bus_full_width) cols -= 1;
/* Ranks */ @@ -265,7 +265,8 @@ static void mctl_set_addrmap(const struct dram_para *para) mctl_ctl->addrmap[8] = 0x3F3F; }
-static void mctl_com_init(const struct dram_para *para) +static void mctl_com_init(const struct dram_para *para, + const struct dram_config *config) { struct sunxi_mctl_com_reg * const mctl_com = (struct sunxi_mctl_com_reg *)SUNXI_DRAM_COM_BASE; @@ -275,7 +276,7 @@ static void mctl_com_init(const struct dram_para *para) (struct sunxi_mctl_phy_reg *)SUNXI_DRAM_PHY0_BASE; u32 reg_val, tmp;
- mctl_set_addrmap(para); + mctl_set_addrmap(config);
setbits_le32(&mctl_com->cr, BIT(31));
@@ -294,12 +295,12 @@ static void mctl_com_init(const struct dram_para *para) clrsetbits_le32(&mctl_com->unk_0x008, 0x3f00, reg_val);
/* TODO: DDR4 */ - reg_val = MSTR_BURST_LENGTH(8) | MSTR_ACTIVE_RANKS(para->ranks); + reg_val = MSTR_BURST_LENGTH(8) | MSTR_ACTIVE_RANKS(config->ranks); if (para->type == SUNXI_DRAM_TYPE_LPDDR3) reg_val |= MSTR_DEVICETYPE_LPDDR3; if (para->type == SUNXI_DRAM_TYPE_DDR3) reg_val |= MSTR_DEVICETYPE_DDR3 | MSTR_2TMODE; - if (para->bus_full_width) + if (config->bus_full_width) reg_val |= MSTR_BUSWIDTH_FULL; else reg_val |= MSTR_BUSWIDTH_HALF; @@ -311,7 +312,7 @@ static void mctl_com_init(const struct dram_para *para) reg_val = DCR_DDR3 | DCR_DDR8BANK | DCR_DDR2T; writel(reg_val | 0x400, &mctl_phy->dcr);
- if (para->ranks == 2) + if (config->ranks == 2) writel(0x0303, &mctl_ctl->odtmap); else writel(0x0201, &mctl_ctl->odtmap); @@ -329,7 +330,7 @@ static void mctl_com_init(const struct dram_para *para) } writel(reg_val, &mctl_ctl->odtcfg);
- if (!para->bus_full_width) { + if (!config->bus_full_width) { writel(0x0, &mctl_phy->dx[2].gcr[0]); writel(0x0, &mctl_phy->dx[3].gcr[0]); } @@ -394,7 +395,8 @@ static void mctl_bit_delay_set(const struct dram_para *para) } }
-static bool mctl_channel_init(const struct dram_para *para) +static bool mctl_channel_init(const struct dram_para *para, + const struct dram_config *config) { struct sunxi_mctl_com_reg * const mctl_com = (struct sunxi_mctl_com_reg *)SUNXI_DRAM_COM_BASE; @@ -429,14 +431,14 @@ static bool mctl_channel_init(const struct dram_para *para)
udelay(100);
- if (para->ranks == 2) + if (config->ranks == 2) setbits_le32(&mctl_phy->dtcr[1], 0x30000); else clrsetbits_le32(&mctl_phy->dtcr[1], 0x30000, 0x10000);
if (sunxi_dram_is_lpddr(para->type)) clrbits_le32(&mctl_phy->dtcr[1], BIT(1)); - if (para->ranks == 2) { + if (config->ranks == 2) { writel(0x00010001, &mctl_phy->rankidr); writel(0x20000, &mctl_phy->odtcr); } else { @@ -544,10 +546,11 @@ static bool mctl_channel_init(const struct dram_para *para) return true; }
-static bool mctl_core_init(const struct dram_para *para) +static bool mctl_core_init(const struct dram_para *para, + const struct dram_config *config) { mctl_sys_init(para->clk); - mctl_com_init(para); + mctl_com_init(para, config); switch (para->type) { case SUNXI_DRAM_TYPE_LPDDR3: case SUNXI_DRAM_TYPE_DDR3: @@ -556,14 +559,15 @@ static bool mctl_core_init(const struct dram_para *para) default: panic("Unsupported DRAM type!"); }; - return mctl_channel_init(para); + return mctl_channel_init(para, config); }
-static void mctl_auto_detect_rank_width(struct dram_para *para) +static void mctl_auto_detect_rank_width(const struct dram_para *para, + struct dram_config *config) { /* this is minimum size that it's supported */ - para->cols = 8; - para->rows = 13; + config->cols = 8; + config->rows = 13;
/* * Previous versions of this driver tried to auto detect the rank @@ -579,68 +583,69 @@ static void mctl_auto_detect_rank_width(struct dram_para *para) */
debug("testing 32-bit width, rank = 2\n"); - para->bus_full_width = 1; - para->ranks = 2; - if (mctl_core_init(para)) + config->bus_full_width = 1; + config->ranks = 2; + if (mctl_core_init(para, config)) return;
debug("testing 32-bit width, rank = 1\n"); - para->bus_full_width = 1; - para->ranks = 1; - if (mctl_core_init(para)) + config->bus_full_width = 1; + config->ranks = 1; + if (mctl_core_init(para, config)) return;
debug("testing 16-bit width, rank = 2\n"); - para->bus_full_width = 0; - para->ranks = 2; - if (mctl_core_init(para)) + config->bus_full_width = 0; + config->ranks = 2; + if (mctl_core_init(para, config)) return;
debug("testing 16-bit width, rank = 1\n"); - para->bus_full_width = 0; - para->ranks = 1; - if (mctl_core_init(para)) + config->bus_full_width = 0; + config->ranks = 1; + if (mctl_core_init(para, config)) return;
panic("This DRAM setup is currently not supported.\n"); }
-static void mctl_auto_detect_dram_size(struct dram_para *para) +static void mctl_auto_detect_dram_size(const struct dram_para *para, + struct dram_config *config) { /* TODO: non-(LP)DDR3 */
/* detect row address bits */ - para->cols = 8; - para->rows = 18; - mctl_core_init(para); + config->cols = 8; + config->rows = 18; + mctl_core_init(para, config);
- for (para->rows = 13; para->rows < 18; para->rows++) { + for (config->rows = 13; config->rows < 18; config->rows++) { /* 8 banks, 8 bit per byte and 16/32 bit width */ - if (mctl_mem_matches((1 << (para->rows + para->cols + - 4 + para->bus_full_width)))) + if (mctl_mem_matches((1 << (config->rows + config->cols + + 4 + config->bus_full_width)))) break; }
/* detect column address bits */ - para->cols = 11; - mctl_core_init(para); + config->cols = 11; + mctl_core_init(para, config);
- for (para->cols = 8; para->cols < 11; para->cols++) { + for (config->cols = 8; config->cols < 11; config->cols++) { /* 8 bits per byte and 16/32 bit width */ - if (mctl_mem_matches(1 << (para->cols + 1 + - para->bus_full_width))) + if (mctl_mem_matches(1 << (config->cols + 1 + + config->bus_full_width))) break; } }
-unsigned long mctl_calc_size(const struct dram_para *para) +unsigned long mctl_calc_size(const struct dram_config *config) { - u8 width = para->bus_full_width ? 4 : 2; + u8 width = config->bus_full_width ? 4 : 2;
/* TODO: non-(LP)DDR3 */
/* 8 banks */ - return (1ULL << (para->cols + para->rows + 3)) * width * para->ranks; + return (1ULL << (config->cols + config->rows + 3)) * width * config->ranks; }
#define SUN50I_H6_LPDDR3_DX_WRITE_DELAYS \ @@ -665,36 +670,37 @@ unsigned long mctl_calc_size(const struct dram_para *para) { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, \ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }}
+static const struct dram_para para = { + .clk = CONFIG_DRAM_CLK, +#ifdef CONFIG_SUNXI_DRAM_H6_LPDDR3 + .type = SUNXI_DRAM_TYPE_LPDDR3, + .dx_read_delays = SUN50I_H6_LPDDR3_DX_READ_DELAYS, + .dx_write_delays = SUN50I_H6_LPDDR3_DX_WRITE_DELAYS, +#elif defined(CONFIG_SUNXI_DRAM_H6_DDR3_1333) + .type = SUNXI_DRAM_TYPE_DDR3, + .dx_read_delays = SUN50I_H6_DDR3_DX_READ_DELAYS, + .dx_write_delays = SUN50I_H6_DDR3_DX_WRITE_DELAYS, +#endif +}; + unsigned long sunxi_dram_init(void) { struct sunxi_mctl_com_reg * const mctl_com = (struct sunxi_mctl_com_reg *)SUNXI_DRAM_COM_BASE; struct sunxi_prcm_reg *const prcm = (struct sunxi_prcm_reg *)SUNXI_PRCM_BASE; - struct dram_para para = { - .clk = CONFIG_DRAM_CLK, -#ifdef CONFIG_SUNXI_DRAM_H6_LPDDR3 - .type = SUNXI_DRAM_TYPE_LPDDR3, - .dx_read_delays = SUN50I_H6_LPDDR3_DX_READ_DELAYS, - .dx_write_delays = SUN50I_H6_LPDDR3_DX_WRITE_DELAYS, -#elif defined(CONFIG_SUNXI_DRAM_H6_DDR3_1333) - .type = SUNXI_DRAM_TYPE_DDR3, - .dx_read_delays = SUN50I_H6_DDR3_DX_READ_DELAYS, - .dx_write_delays = SUN50I_H6_DDR3_DX_WRITE_DELAYS, -#endif - }; - + struct dram_config config; unsigned long size;
setbits_le32(&prcm->res_cal_ctrl, BIT(8)); clrbits_le32(&prcm->ohms240, 0x3f);
- mctl_auto_detect_rank_width(¶); - mctl_auto_detect_dram_size(¶); + mctl_auto_detect_rank_width(¶, &config); + mctl_auto_detect_dram_size(¶, &config);
- mctl_core_init(¶); + mctl_core_init(¶, &config);
- size = mctl_calc_size(¶); + size = mctl_calc_size(&config);
clrsetbits_le32(&mctl_com->cr, 0xf0, (size >> (10 + 10 + 4)) & 0xf0);

On Saturday, October 21, 2023 3:10:24 AM CEST Andre Przywara wrote:
Currently there is one DRAM parameter struct for the Allwinner H6 DRAM "driver". It contains many fields that are compile time constants (set by Kconfig variables), though there are also some fields that are probed and changed over the runtime of the DRAM initialisation.
Because of this mixture, the compiler cannot properly optimise the code for size, as it does not consider constant propagation in its full potential.
Help the compiler out by splitting that structure into two: one that only contains values known at compile time, and another one where the values will actually change. The former can then be declared "const", which will let the compiler fold its values directly into the code using it. To facilitate this, the definition of the struct has to become "static const" outside of any functions, so move that part out of sunxi_dram_init().
That results in code savings around 500 bytes, which helps the notorously tight H6 SPL to stay within it current limit.
Signed-off-by: Andre Przywara andre.przywara@arm.com
Reviewed-by: Jernej Skrabec jernej.skrabec@gmail.com
Best regards, Jernej
.../include/asm/arch-sunxi/dram_sun50i_h6.h | 12 +- arch/arm/mach-sunxi/dram_sun50i_h6.c | 134 +++++++++--------- 2 files changed, 77 insertions(+), 69 deletions(-)
diff --git a/arch/arm/include/asm/arch-sunxi/dram_sun50i_h6.h b/arch/arm/include/asm/arch-sunxi/dram_sun50i_h6.h index a7c6435220f..058f2cabb6e 100644 --- a/arch/arm/include/asm/arch-sunxi/dram_sun50i_h6.h +++ b/arch/arm/include/asm/arch-sunxi/dram_sun50i_h6.h @@ -313,17 +313,19 @@ check_member(sunxi_mctl_phy_reg, dx[3].reserved_0xf0, 0xaf0); */ #define RD_LINES_PER_BYTE_LANE (BITS_PER_BYTE + 6) struct dram_para {
- const u32 clk;
- const enum sunxi_dram_type type;
- u32 clk;
- enum sunxi_dram_type type;
- u8 dx_read_delays[NR_OF_BYTE_LANES][RD_LINES_PER_BYTE_LANE];
- u8 dx_write_delays[NR_OF_BYTE_LANES][WR_LINES_PER_BYTE_LANE];
+};
+struct dram_config { u8 cols; u8 rows; u8 ranks; u8 bus_full_width;
- const u8 dx_read_delays[NR_OF_BYTE_LANES][RD_LINES_PER_BYTE_LANE];
- const u8 dx_write_delays[NR_OF_BYTE_LANES]
[WR_LINES_PER_BYTE_LANE];
};
static inline int ns_to_t(int nanoseconds) { const unsigned int ctrl_freq = CONFIG_DRAM_CLK / 2; diff --git a/arch/arm/mach-sunxi/dram_sun50i_h6.c b/arch/arm/mach-sunxi/dram_sun50i_h6.c index 1187f1960a0..8e959f4c600 100644 --- a/arch/arm/mach-sunxi/dram_sun50i_h6.c +++ b/arch/arm/mach-sunxi/dram_sun50i_h6.c @@ -179,15 +179,15 @@ static void mctl_sys_init(u32 clk_rate) writel(0x8000, &mctl_ctl->unk_0x00c); }
-static void mctl_set_addrmap(const struct dram_para *para) +static void mctl_set_addrmap(const struct dram_config *config) { struct sunxi_mctl_ctl_reg * const mctl_ctl = (struct sunxi_mctl_ctl_reg
*)SUNXI_DRAM_CTL0_BASE;
- u8 cols = para->cols;
- u8 rows = para->rows;
- u8 ranks = para->ranks;
- u8 cols = config->cols;
- u8 rows = config->rows;
- u8 ranks = config->ranks;
- if (!para->bus_full_width)
if (!config->bus_full_width) cols -= 1;
/* Ranks */
@@ -265,7 +265,8 @@ static void mctl_set_addrmap(const struct dram_para *para) mctl_ctl->addrmap[8] = 0x3F3F; }
-static void mctl_com_init(const struct dram_para *para) +static void mctl_com_init(const struct dram_para *para,
const struct dram_config *config)
{ struct sunxi_mctl_com_reg * const mctl_com = (struct sunxi_mctl_com_reg
*)SUNXI_DRAM_COM_BASE;
@@ -275,7 +276,7 @@ static void mctl_com_init(const struct dram_para *para) (struct sunxi_mctl_phy_reg
*)SUNXI_DRAM_PHY0_BASE;
u32 reg_val, tmp;
- mctl_set_addrmap(para);
mctl_set_addrmap(config);
setbits_le32(&mctl_com->cr, BIT(31));
@@ -294,12 +295,12 @@ static void mctl_com_init(const struct dram_para *para) clrsetbits_le32(&mctl_com->unk_0x008, 0x3f00, reg_val);
/* TODO: DDR4 */
- reg_val = MSTR_BURST_LENGTH(8) | MSTR_ACTIVE_RANKS(para->ranks);
- reg_val = MSTR_BURST_LENGTH(8) | MSTR_ACTIVE_RANKS(config->ranks); if (para->type == SUNXI_DRAM_TYPE_LPDDR3) reg_val |= MSTR_DEVICETYPE_LPDDR3; if (para->type == SUNXI_DRAM_TYPE_DDR3) reg_val |= MSTR_DEVICETYPE_DDR3 | MSTR_2TMODE;
- if (para->bus_full_width)
- if (config->bus_full_width) reg_val |= MSTR_BUSWIDTH_FULL; else reg_val |= MSTR_BUSWIDTH_HALF;
@@ -311,7 +312,7 @@ static void mctl_com_init(const struct dram_para *para) reg_val = DCR_DDR3 | DCR_DDR8BANK | DCR_DDR2T; writel(reg_val | 0x400, &mctl_phy->dcr);
- if (para->ranks == 2)
- if (config->ranks == 2) writel(0x0303, &mctl_ctl->odtmap); else writel(0x0201, &mctl_ctl->odtmap);
@@ -329,7 +330,7 @@ static void mctl_com_init(const struct dram_para *para) } writel(reg_val, &mctl_ctl->odtcfg);
- if (!para->bus_full_width) {
- if (!config->bus_full_width) { writel(0x0, &mctl_phy->dx[2].gcr[0]); writel(0x0, &mctl_phy->dx[3].gcr[0]); }
@@ -394,7 +395,8 @@ static void mctl_bit_delay_set(const struct dram_para *para) } }
-static bool mctl_channel_init(const struct dram_para *para) +static bool mctl_channel_init(const struct dram_para *para,
const struct dram_config *config)
{ struct sunxi_mctl_com_reg * const mctl_com = (struct sunxi_mctl_com_reg
*)SUNXI_DRAM_COM_BASE;
@@ -429,14 +431,14 @@ static bool mctl_channel_init(const struct dram_para *para)
udelay(100);
- if (para->ranks == 2)
if (config->ranks == 2) setbits_le32(&mctl_phy->dtcr[1], 0x30000); else clrsetbits_le32(&mctl_phy->dtcr[1], 0x30000, 0x10000);
if (sunxi_dram_is_lpddr(para->type)) clrbits_le32(&mctl_phy->dtcr[1], BIT(1));
- if (para->ranks == 2) {
- if (config->ranks == 2) { writel(0x00010001, &mctl_phy->rankidr); writel(0x20000, &mctl_phy->odtcr); } else {
@@ -544,10 +546,11 @@ static bool mctl_channel_init(const struct dram_para *para) return true; }
-static bool mctl_core_init(const struct dram_para *para) +static bool mctl_core_init(const struct dram_para *para,
const struct dram_config *config)
{ mctl_sys_init(para->clk);
- mctl_com_init(para);
- mctl_com_init(para, config); switch (para->type) { case SUNXI_DRAM_TYPE_LPDDR3: case SUNXI_DRAM_TYPE_DDR3:
@@ -556,14 +559,15 @@ static bool mctl_core_init(const struct dram_para *para) default: panic("Unsupported DRAM type!"); };
- return mctl_channel_init(para);
- return mctl_channel_init(para, config);
}
-static void mctl_auto_detect_rank_width(struct dram_para *para) +static void mctl_auto_detect_rank_width(const struct dram_para *para,
struct dram_config
*config)
{ /* this is minimum size that it's supported */
- para->cols = 8;
- para->rows = 13;
config->cols = 8;
config->rows = 13;
/*
- Previous versions of this driver tried to auto detect the rank
@@ -579,68 +583,69 @@ static void mctl_auto_detect_rank_width(struct dram_para *para) */
debug("testing 32-bit width, rank = 2\n");
- para->bus_full_width = 1;
- para->ranks = 2;
- if (mctl_core_init(para))
config->bus_full_width = 1;
config->ranks = 2;
if (mctl_core_init(para, config)) return;
debug("testing 32-bit width, rank = 1\n");
- para->bus_full_width = 1;
- para->ranks = 1;
- if (mctl_core_init(para))
config->bus_full_width = 1;
config->ranks = 1;
if (mctl_core_init(para, config)) return;
debug("testing 16-bit width, rank = 2\n");
- para->bus_full_width = 0;
- para->ranks = 2;
- if (mctl_core_init(para))
config->bus_full_width = 0;
config->ranks = 2;
if (mctl_core_init(para, config)) return;
debug("testing 16-bit width, rank = 1\n");
- para->bus_full_width = 0;
- para->ranks = 1;
- if (mctl_core_init(para))
config->bus_full_width = 0;
config->ranks = 1;
if (mctl_core_init(para, config)) return;
panic("This DRAM setup is currently not supported.\n");
}
-static void mctl_auto_detect_dram_size(struct dram_para *para) +static void mctl_auto_detect_dram_size(const struct dram_para *para,
struct dram_config *config)
{ /* TODO: non-(LP)DDR3 */
/* detect row address bits */
- para->cols = 8;
- para->rows = 18;
- mctl_core_init(para);
- config->cols = 8;
- config->rows = 18;
- mctl_core_init(para, config);
- for (para->rows = 13; para->rows < 18; para->rows++) {
- for (config->rows = 13; config->rows < 18; config->rows++) { /* 8 banks, 8 bit per byte and 16/32 bit width */
if (mctl_mem_matches((1 << (para->rows + para->cols +
4 + para-
bus_full_width))))
if (mctl_mem_matches((1 << (config->rows + config->cols +
4 + config-
bus_full_width)))) break; }
/* detect column address bits */
- para->cols = 11;
- mctl_core_init(para);
- config->cols = 11;
- mctl_core_init(para, config);
- for (para->cols = 8; para->cols < 11; para->cols++) {
- for (config->cols = 8; config->cols < 11; config->cols++) { /* 8 bits per byte and 16/32 bit width */
if (mctl_mem_matches(1 << (para->cols + 1 +
para-
bus_full_width)))
if (mctl_mem_matches(1 << (config->cols + 1 +
config-
bus_full_width))) break; } }
-unsigned long mctl_calc_size(const struct dram_para *para) +unsigned long mctl_calc_size(const struct dram_config *config) {
- u8 width = para->bus_full_width ? 4 : 2;
u8 width = config->bus_full_width ? 4 : 2;
/* TODO: non-(LP)DDR3 */
/* 8 banks */
- return (1ULL << (para->cols + para->rows + 3)) * width * para-
ranks;
- return (1ULL << (config->cols + config->rows + 3)) * width *
config->ranks; }
#define SUN50I_H6_LPDDR3_DX_WRITE_DELAYS \ @@ -665,36 +670,37 @@ unsigned long mctl_calc_size(const struct dram_para *para) { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, \ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }}
+static const struct dram_para para = {
- .clk = CONFIG_DRAM_CLK,
+#ifdef CONFIG_SUNXI_DRAM_H6_LPDDR3
- .type = SUNXI_DRAM_TYPE_LPDDR3,
- .dx_read_delays = SUN50I_H6_LPDDR3_DX_READ_DELAYS,
- .dx_write_delays = SUN50I_H6_LPDDR3_DX_WRITE_DELAYS,
+#elif defined(CONFIG_SUNXI_DRAM_H6_DDR3_1333)
- .type = SUNXI_DRAM_TYPE_DDR3,
- .dx_read_delays = SUN50I_H6_DDR3_DX_READ_DELAYS,
- .dx_write_delays = SUN50I_H6_DDR3_DX_WRITE_DELAYS,
+#endif +};
unsigned long sunxi_dram_init(void) { struct sunxi_mctl_com_reg * const mctl_com = (struct sunxi_mctl_com_reg
*)SUNXI_DRAM_COM_BASE;
struct sunxi_prcm_reg *const prcm = (struct sunxi_prcm_reg *)SUNXI_PRCM_BASE;
- struct dram_para para = {
.clk = CONFIG_DRAM_CLK,
-#ifdef CONFIG_SUNXI_DRAM_H6_LPDDR3
.type = SUNXI_DRAM_TYPE_LPDDR3,
.dx_read_delays = SUN50I_H6_LPDDR3_DX_READ_DELAYS,
.dx_write_delays = SUN50I_H6_LPDDR3_DX_WRITE_DELAYS,
-#elif defined(CONFIG_SUNXI_DRAM_H6_DDR3_1333)
.type = SUNXI_DRAM_TYPE_DDR3,
.dx_read_delays = SUN50I_H6_DDR3_DX_READ_DELAYS,
.dx_write_delays = SUN50I_H6_DDR3_DX_WRITE_DELAYS,
-#endif
- };
struct dram_config config; unsigned long size;
setbits_le32(&prcm->res_cal_ctrl, BIT(8)); clrbits_le32(&prcm->ohms240, 0x3f);
- mctl_auto_detect_rank_width(¶);
- mctl_auto_detect_dram_size(¶);
- mctl_auto_detect_rank_width(¶, &config);
- mctl_auto_detect_dram_size(¶, &config);
- mctl_core_init(¶);
- mctl_core_init(¶, &config);
- size = mctl_calc_size(¶);
size = mctl_calc_size(&config);
clrsetbits_le32(&mctl_com->cr, 0xf0, (size >> (10 + 10 + 4)) &
0xf0);

For accessing MMIO registers, we must not rely on the compiler to realise every access to a struct which we made point to some MMIO base address. From a compiler's point of view, those writes could be considered pointless, since no code consumes them later on: the compiler would actually be free to optimise them away.
So we need at least the "volatile" attribute in the pointer declaration, but a better fix is of course to use the proper MMIO accessors (writel), as we do everywhere else. Since MMIO writes are already ordered within themselves (courtesy of the "device nGnRnE" memory attribures), and we don't do any DMA operation which would requrire synchronising with normal memory accesses, we can use the cheaper writel_relaxed() accessor, which have the additional advantange of saving one instruction, for each call.
Signed-off-by: Andre Przywara andre.przywara@arm.com --- arch/arm/mach-sunxi/dram_sun50i_h6.c | 69 ++++++++++++++++------------ 1 file changed, 39 insertions(+), 30 deletions(-)
diff --git a/arch/arm/mach-sunxi/dram_sun50i_h6.c b/arch/arm/mach-sunxi/dram_sun50i_h6.c index 8e959f4c600..89e855c1a7d 100644 --- a/arch/arm/mach-sunxi/dram_sun50i_h6.c +++ b/arch/arm/mach-sunxi/dram_sun50i_h6.c @@ -192,77 +192,86 @@ static void mctl_set_addrmap(const struct dram_config *config)
/* Ranks */ if (ranks == 2) - mctl_ctl->addrmap[0] = rows + cols - 3; + writel_relaxed(rows + cols - 3, &mctl_ctl->addrmap[0]); else - mctl_ctl->addrmap[0] = 0x1F; + writel_relaxed(0x1f, &mctl_ctl->addrmap[0]);
/* Banks, hardcoded to 8 banks now */ - mctl_ctl->addrmap[1] = (cols - 2) | (cols - 2) << 8 | (cols - 2) << 16; + writel_relaxed((cols - 2) | (cols - 2) << 8 | (cols - 2) << 16, + &mctl_ctl->addrmap[1]);
/* Columns */ - mctl_ctl->addrmap[2] = 0; + writel_relaxed(0, &mctl_ctl->addrmap[2]); switch (cols) { case 7: - mctl_ctl->addrmap[3] = 0x1F1F1F00; - mctl_ctl->addrmap[4] = 0x1F1F; + writel_relaxed(0x1f1f1f00, &mctl_ctl->addrmap[3]); + writel_relaxed(0x1f1f, &mctl_ctl->addrmap[4]); break; case 8: - mctl_ctl->addrmap[3] = 0x1F1F0000; - mctl_ctl->addrmap[4] = 0x1F1F; + writel_relaxed(0x1f1f0000, &mctl_ctl->addrmap[3]); + writel_relaxed(0x1f1f, &mctl_ctl->addrmap[4]); break; case 9: - mctl_ctl->addrmap[3] = 0x1F000000; - mctl_ctl->addrmap[4] = 0x1F1F; + writel_relaxed(0x1f000000, &mctl_ctl->addrmap[3]); + writel_relaxed(0x1f1f, &mctl_ctl->addrmap[4]); break; case 10: - mctl_ctl->addrmap[3] = 0; - mctl_ctl->addrmap[4] = 0x1F1F; + writel_relaxed(0, &mctl_ctl->addrmap[3]); + writel_relaxed(0x1f1f, &mctl_ctl->addrmap[4]); break; case 11: - mctl_ctl->addrmap[3] = 0; - mctl_ctl->addrmap[4] = 0x1F00; + writel_relaxed(0, &mctl_ctl->addrmap[3]); + writel_relaxed(0x1f00, &mctl_ctl->addrmap[4]); break; case 12: - mctl_ctl->addrmap[3] = 0; - mctl_ctl->addrmap[4] = 0; + writel_relaxed(0, &mctl_ctl->addrmap[3]); + writel_relaxed(0, &mctl_ctl->addrmap[4]); break; default: panic("Unsupported DRAM configuration: column number invalid\n"); }
/* Rows */ - mctl_ctl->addrmap[5] = (cols - 3) | ((cols - 3) << 8) | ((cols - 3) << 16) | ((cols - 3) << 24); + writel_relaxed((cols - 3) | ((cols - 3) << 8) | ((cols - 3) << 16) | ((cols - 3) << 24), + &mctl_ctl->addrmap[5]); switch (rows) { case 13: - mctl_ctl->addrmap[6] = (cols - 3) | 0x0F0F0F00; - mctl_ctl->addrmap[7] = 0x0F0F; + writel_relaxed((cols - 3) | 0x0f0f0f00, &mctl_ctl->addrmap[6]); + writel_relaxed(0x0f0f, &mctl_ctl->addrmap[7]); break; case 14: - mctl_ctl->addrmap[6] = (cols - 3) | ((cols - 3) << 8) | 0x0F0F0000; - mctl_ctl->addrmap[7] = 0x0F0F; + writel_relaxed((cols - 3) | ((cols - 3) << 8) | 0x0f0f0000, + &mctl_ctl->addrmap[6]); + writel_relaxed(0x0f0f, &mctl_ctl->addrmap[7]); break; case 15: - mctl_ctl->addrmap[6] = (cols - 3) | ((cols - 3) << 8) | ((cols - 3) << 16) | 0x0F000000; - mctl_ctl->addrmap[7] = 0x0F0F; + writel_relaxed((cols - 3) | ((cols - 3) << 8) | ((cols - 3) << 16) | 0x0f000000, + &mctl_ctl->addrmap[6]); + writel_relaxed(0x0f0f, &mctl_ctl->addrmap[7]); break; case 16: - mctl_ctl->addrmap[6] = (cols - 3) | ((cols - 3) << 8) | ((cols - 3) << 16) | ((cols - 3) << 24); - mctl_ctl->addrmap[7] = 0x0F0F; + writel_relaxed((cols - 3) | ((cols - 3) << 8) | ((cols - 3) << 16) | ((cols - 3) << 24), + &mctl_ctl->addrmap[6]); + writel_relaxed(0x0f0f, &mctl_ctl->addrmap[7]); break; case 17: - mctl_ctl->addrmap[6] = (cols - 3) | ((cols - 3) << 8) | ((cols - 3) << 16) | ((cols - 3) << 24); - mctl_ctl->addrmap[7] = (cols - 3) | 0x0F00; + writel_relaxed((cols - 3) | ((cols - 3) << 8) | ((cols - 3) << 16) | ((cols - 3) << 24), + &mctl_ctl->addrmap[6]); + writel_relaxed((cols - 3) | 0x0f00, &mctl_ctl->addrmap[7]); break; case 18: - mctl_ctl->addrmap[6] = (cols - 3) | ((cols - 3) << 8) | ((cols - 3) << 16) | ((cols - 3) << 24); - mctl_ctl->addrmap[7] = (cols - 3) | ((cols - 3) << 8); + writel_relaxed((cols - 3) | ((cols - 3) << 8) | ((cols - 3) << 16) | ((cols - 3) << 24), + &mctl_ctl->addrmap[6]); + writel_relaxed((cols - 3) | ((cols - 3) << 8), + &mctl_ctl->addrmap[7]); break; default: panic("Unsupported DRAM configuration: row number invalid\n"); }
/* Bank groups, DDR4 only */ - mctl_ctl->addrmap[8] = 0x3F3F; + writel_relaxed(0x3f3f, &mctl_ctl->addrmap[8]); + dsb(); }
static void mctl_com_init(const struct dram_para *para,

On Saturday, October 21, 2023 3:10:25 AM CEST Andre Przywara wrote:
For accessing MMIO registers, we must not rely on the compiler to realise every access to a struct which we made point to some MMIO base address. From a compiler's point of view, those writes could be considered pointless, since no code consumes them later on: the compiler would actually be free to optimise them away.
So we need at least the "volatile" attribute in the pointer declaration, but a better fix is of course to use the proper MMIO accessors (writel), as we do everywhere else. Since MMIO writes are already ordered within themselves (courtesy of the "device nGnRnE" memory attribures), and we don't do any DMA operation which would requrire synchronising with normal memory accesses, we can use the cheaper writel_relaxed() accessor, which have the additional advantange of saving one instruction, for each call.
Signed-off-by: Andre Przywara andre.przywara@arm.com
Great catch! I wonder if this ever caused any issue.
Reviewed-by: Jernej Skrabec jernej.skrabec@gmail.com
Best regards, Jernej
arch/arm/mach-sunxi/dram_sun50i_h6.c | 69 ++++++++++++++++------------ 1 file changed, 39 insertions(+), 30 deletions(-)
diff --git a/arch/arm/mach-sunxi/dram_sun50i_h6.c b/arch/arm/mach-sunxi/dram_sun50i_h6.c index 8e959f4c600..89e855c1a7d 100644 --- a/arch/arm/mach-sunxi/dram_sun50i_h6.c +++ b/arch/arm/mach-sunxi/dram_sun50i_h6.c @@ -192,77 +192,86 @@ static void mctl_set_addrmap(const struct dram_config *config)
/* Ranks */ if (ranks == 2)
mctl_ctl->addrmap[0] = rows + cols - 3;
elsewritel_relaxed(rows + cols - 3, &mctl_ctl->addrmap[0]);
mctl_ctl->addrmap[0] = 0x1F;
writel_relaxed(0x1f, &mctl_ctl->addrmap[0]);
/* Banks, hardcoded to 8 banks now */
- mctl_ctl->addrmap[1] = (cols - 2) | (cols - 2) << 8 | (cols - 2)
<< 16;
writel_relaxed((cols - 2) | (cols - 2) << 8 | (cols - 2) << 16,
&mctl_ctl->addrmap[1]);
/* Columns */
- mctl_ctl->addrmap[2] = 0;
- writel_relaxed(0, &mctl_ctl->addrmap[2]); switch (cols) { case 7:
mctl_ctl->addrmap[3] = 0x1F1F1F00;
mctl_ctl->addrmap[4] = 0x1F1F;
writel_relaxed(0x1f1f1f00, &mctl_ctl->addrmap[3]);
break; case 8:writel_relaxed(0x1f1f, &mctl_ctl->addrmap[4]);
mctl_ctl->addrmap[3] = 0x1F1F0000;
mctl_ctl->addrmap[4] = 0x1F1F;
writel_relaxed(0x1f1f0000, &mctl_ctl->addrmap[3]);
break; case 9:writel_relaxed(0x1f1f, &mctl_ctl->addrmap[4]);
mctl_ctl->addrmap[3] = 0x1F000000;
mctl_ctl->addrmap[4] = 0x1F1F;
writel_relaxed(0x1f000000, &mctl_ctl->addrmap[3]);
break; case 10:writel_relaxed(0x1f1f, &mctl_ctl->addrmap[4]);
mctl_ctl->addrmap[3] = 0;
mctl_ctl->addrmap[4] = 0x1F1F;
writel_relaxed(0, &mctl_ctl->addrmap[3]);
break; case 11:writel_relaxed(0x1f1f, &mctl_ctl->addrmap[4]);
mctl_ctl->addrmap[3] = 0;
mctl_ctl->addrmap[4] = 0x1F00;
writel_relaxed(0, &mctl_ctl->addrmap[3]);
break; case 12:writel_relaxed(0x1f00, &mctl_ctl->addrmap[4]);
mctl_ctl->addrmap[3] = 0;
mctl_ctl->addrmap[4] = 0;
writel_relaxed(0, &mctl_ctl->addrmap[3]);
break; default: panic("Unsupported DRAM configuration: column numberwritel_relaxed(0, &mctl_ctl->addrmap[4]);
invalid\n");
}
/* Rows */
- mctl_ctl->addrmap[5] = (cols - 3) | ((cols - 3) << 8) | ((cols -
3) << 16)
| ((cols - 3) << 24); + writel_relaxed((cols - 3) | ((cols - 3) << 8) | ((cols - 3) << 16) | ((cols - 3) << 24), + &mctl_ctl- addrmap[5]); switch (rows) { case 13:
mctl_ctl->addrmap[6] = (cols - 3) | 0x0F0F0F00;
mctl_ctl->addrmap[7] = 0x0F0F;
writel_relaxed((cols - 3) | 0x0f0f0f00, &mctl_ctl-
addrmap[6]);
break; case 14:writel_relaxed(0x0f0f, &mctl_ctl->addrmap[7]);
mctl_ctl->addrmap[6] = (cols - 3) | ((cols - 3) << 8) |
0x0F0F0000;
mctl_ctl->addrmap[7] = 0x0F0F;
writel_relaxed((cols - 3) | ((cols - 3) << 8) |
0x0f0f0000,
&mctl_ctl->addrmap[6]);
break; case 15:writel_relaxed(0x0f0f, &mctl_ctl->addrmap[7]);
mctl_ctl->addrmap[6] = (cols - 3) | ((cols - 3) << 8) |
((cols - 3) <<
- | 0x0F000000; - mctl_ctl->addrmap[7] = 0x0F0F;
writel_relaxed((cols - 3) | ((cols - 3) << 8) | ((cols -
3) << 16) |
0x0f000000, + &mctl_ctl- addrmap[6]);
break; case 16:writel_relaxed(0x0f0f, &mctl_ctl->addrmap[7]);
mctl_ctl->addrmap[6] = (cols - 3) | ((cols - 3) << 8) |
((cols - 3) <<
- | ((cols - 3) << 24); - mctl_ctl->addrmap[7] = 0x0F0F;
writel_relaxed((cols - 3) | ((cols - 3) << 8) | ((cols -
3) << 16) |
((cols - 3) << 24), + &mctl_ctl- addrmap[6]);
break; case 17:writel_relaxed(0x0f0f, &mctl_ctl->addrmap[7]);
mctl_ctl->addrmap[6] = (cols - 3) | ((cols - 3) << 8) |
((cols - 3) <<
- | ((cols - 3) << 24); - mctl_ctl->addrmap[7] = (cols - 3) |
0x0F00;
writel_relaxed((cols - 3) | ((cols - 3) << 8) | ((cols -
3) << 16) |
((cols - 3) << 24), + &mctl_ctl- addrmap[6]);
writel_relaxed((cols - 3) | 0x0f00, &mctl_ctl-
addrmap[7]); break; case 18:
mctl_ctl->addrmap[6] = (cols - 3) | ((cols - 3) << 8) |
((cols - 3) <<
- | ((cols - 3) << 24); - mctl_ctl->addrmap[7] = (cols - 3) |
((cols -
- << 8);
writel_relaxed((cols - 3) | ((cols - 3) << 8) | ((cols -
3) << 16) |
((cols - 3) << 24), + &mctl_ctl- addrmap[6]);
writel_relaxed((cols - 3) | ((cols - 3) << 8),
break; default: panic("Unsupported DRAM configuration: row number&mctl_ctl->addrmap[7]);
invalid\n");
}
/* Bank groups, DDR4 only */
- mctl_ctl->addrmap[8] = 0x3F3F;
- writel_relaxed(0x3f3f, &mctl_ctl->addrmap[8]);
- dsb();
}
static void mctl_com_init(const struct dram_para *para,
participants (3)
-
Andre Przywara
-
Gunjan Gupta
-
Jernej Škrabec