[U-Boot] [U-Boot RESEND v2 00/10] Enable edma support for ti-qspi

This patch series enables DMA for QSPI on dra7xx and am43xx.
Resending this series after rebasing on current origin/master.
v2: https://www.mail-archive.com/u-boot@lists.denx.de/msg179404.html
changes in v2: * Move edma related code to edma driver instead of handling it in ti-qspi driver.
v1: https://www.mail-archive.com/u-boot@lists.denx.de/msg176479.html
Kishon Vijay Abraham I (2): ARM: AM43xx: Add support for disabling clocks in uboot ARM: OMAP5: Add support for disabling clocks in uboot
Ravi Babu (2): sf: allocate cache aligned buffers to copy from flash env: use cache line aligned memory for flash read
Tom Rini (1): sf: ops: Add spi_flash_copy_mmap function
Vignesh R (5): ARM: OMAP5: Add functions to enable and disable EDMA3 clocks ARM: AM43XX: Add functions to enable and disable EDMA3 clocks dma: ti-edma3: Add helper function to support edma3 transfer spi: ti_qspi: Use DMA to read from qspi flash ARM: dra7xx_evm: Enable EDMA3 in SPL to support DMA on qspi
arch/arm/cpu/armv7/am33xx/clock.c | 52 +++++++++++++++++ arch/arm/cpu/armv7/am33xx/clock_am43xx.c | 36 ++++++++++++ arch/arm/cpu/armv7/omap-common/clocks-common.c | 53 +++++++++++++++++ arch/arm/cpu/armv7/omap5/hw_data.c | 41 ++++++++++++++ arch/arm/cpu/armv7/omap5/prcm-regs.c | 4 ++ arch/arm/include/asm/arch-am33xx/clock.h | 1 + arch/arm/include/asm/arch-omap5/hardware.h | 5 ++ arch/arm/include/asm/omap_common.h | 11 ++++ arch/arm/include/asm/ti-common/ti-edma3.h | 2 + common/cmd_sf.c | 6 +- common/env_sf.c | 11 ++-- drivers/dma/ti-edma3.c | 78 ++++++++++++++++++++++++++ drivers/mtd/spi/sf_ops.c | 8 ++- drivers/spi/ti_qspi.c | 23 ++++++++ include/configs/dra7xx_evm.h | 2 + include/spi.h | 3 + 16 files changed, 327 insertions(+), 9 deletions(-)

From: Ravi Babu ravibabu@ti.com
Use memalign() with ARCH_DMA_MINALIGN to allocate read buffers. This is required because, flash drivers may use DMA for read operations and may have to invalidate the buffer before read.
Signed-off-by: Ravi Babu ravibabu@ti.com Signed-off-by: Vignesh R vigneshr@ti.com Reviewed-by: Tom Rini trini@konsulko.com --- common/cmd_sf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/common/cmd_sf.c b/common/cmd_sf.c index 3746e0d9644f..ac7f5dfb8181 100644 --- a/common/cmd_sf.c +++ b/common/cmd_sf.c @@ -223,7 +223,7 @@ static int spi_flash_update(struct spi_flash *flash, u32 offset,
if (end - buf >= 200) scale = (end - buf) / 100; - cmp_buf = malloc(flash->sector_size); + cmp_buf = memalign(ARCH_DMA_MINALIGN, flash->sector_size); if (cmp_buf) { ulong last_update = get_timer(0);
@@ -484,12 +484,12 @@ static int do_spi_flash_test(int argc, char * const argv[]) if (*argv[2] == 0 || *endp != 0) return -1;
- vbuf = malloc(len); + vbuf = memalign(ARCH_DMA_MINALIGN, len); if (!vbuf) { printf("Cannot allocate memory (%lu bytes)\n", len); return 1; } - buf = malloc(len); + buf = memalign(ARCH_DMA_MINALIGN, len); if (!buf) { free(vbuf); printf("Cannot allocate memory (%lu bytes)\n", len);

On 17 August 2015 at 13:29, Vignesh R vigneshr@ti.com wrote:
From: Ravi Babu ravibabu@ti.com
Use memalign() with ARCH_DMA_MINALIGN to allocate read buffers. This is required because, flash drivers may use DMA for read operations and may have to invalidate the buffer before read.
Signed-off-by: Ravi Babu ravibabu@ti.com Signed-off-by: Vignesh R vigneshr@ti.com Reviewed-by: Tom Rini trini@konsulko.com
Reviewed-by: Jagan Teki jteki@openedev.com
common/cmd_sf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/common/cmd_sf.c b/common/cmd_sf.c index 3746e0d9644f..ac7f5dfb8181 100644 --- a/common/cmd_sf.c +++ b/common/cmd_sf.c @@ -223,7 +223,7 @@ static int spi_flash_update(struct spi_flash *flash, u32 offset,
if (end - buf >= 200) scale = (end - buf) / 100;
cmp_buf = malloc(flash->sector_size);
cmp_buf = memalign(ARCH_DMA_MINALIGN, flash->sector_size); if (cmp_buf) { ulong last_update = get_timer(0);
@@ -484,12 +484,12 @@ static int do_spi_flash_test(int argc, char * const argv[]) if (*argv[2] == 0 || *endp != 0) return -1;
vbuf = malloc(len);
vbuf = memalign(ARCH_DMA_MINALIGN, len); if (!vbuf) { printf("Cannot allocate memory (%lu bytes)\n", len); return 1; }
buf = malloc(len);
buf = memalign(ARCH_DMA_MINALIGN, len); if (!buf) { free(vbuf); printf("Cannot allocate memory (%lu bytes)\n", len);
-- 2.5.0
thanks!

From: Ravi Babu ravibabu@ti.com
Use memalign() with ARCH_DMA_MINALIGN to allocate read buffers. This is required because, flash drivers may use DMA for read operations and may have to invalidate the buffer before read.
Signed-off-by: Ravi Babu ravibabu@ti.com Signed-off-by: Vignesh R vigneshr@ti.com Reviewed-by: Tom Rini trini@konsulko.com --- common/env_sf.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/common/env_sf.c b/common/env_sf.c index e928f5752cc7..940983124fbb 100644 --- a/common/env_sf.c +++ b/common/env_sf.c @@ -79,7 +79,7 @@ int saveenv(void) if (CONFIG_ENV_SECT_SIZE > CONFIG_ENV_SIZE) { saved_size = CONFIG_ENV_SECT_SIZE - CONFIG_ENV_SIZE; saved_offset = env_new_offset + CONFIG_ENV_SIZE; - saved_buffer = malloc(saved_size); + saved_buffer = memalign(ARCH_DMA_MINALIGN, saved_size); if (!saved_buffer) { ret = 1; goto done; @@ -142,9 +142,10 @@ void env_relocate_spec(void) env_t *tmp_env2 = NULL; env_t *ep = NULL;
- tmp_env1 = (env_t *)malloc(CONFIG_ENV_SIZE); - tmp_env2 = (env_t *)malloc(CONFIG_ENV_SIZE); - + tmp_env1 = (env_t *)memalign(ARCH_DMA_MINALIGN, + CONFIG_ENV_SIZE); + tmp_env2 = (env_t *)memalign(ARCH_DMA_MINALIGN, + CONFIG_ENV_SIZE); if (!tmp_env1 || !tmp_env2) { set_default_env("!malloc() failed"); goto out; @@ -295,7 +296,7 @@ void env_relocate_spec(void) int ret; char *buf = NULL;
- buf = (char *)malloc(CONFIG_ENV_SIZE); + buf = (char *)memalign(ARCH_DMA_MINALIGN, CONFIG_ENV_SIZE); env_flash = spi_flash_probe(CONFIG_ENV_SPI_BUS, CONFIG_ENV_SPI_CS, CONFIG_ENV_SPI_MAX_HZ, CONFIG_ENV_SPI_MODE); if (!env_flash) {

From: Kishon Vijay Abraham I kishon@ti.com
Add do_disable_clocks() to disable clock domains and module clocks. These clocks are enabled using do_enable_clocks().
Signed-off-by: Kishon Vijay Abraham I kishon@ti.com Signed-off-by: Vignesh R vigneshr@ti.com Reviewed-by: Jagan Teki jteki@openedev.com --- arch/arm/cpu/armv7/am33xx/clock.c | 52 ++++++++++++++++++++++++++++++++ arch/arm/include/asm/arch-am33xx/clock.h | 1 + 2 files changed, 53 insertions(+)
diff --git a/arch/arm/cpu/armv7/am33xx/clock.c b/arch/arm/cpu/armv7/am33xx/clock.c index ec7d46838b74..595c951ed245 100644 --- a/arch/arm/cpu/armv7/am33xx/clock.c +++ b/arch/arm/cpu/armv7/am33xx/clock.c @@ -144,6 +144,33 @@ static inline void enable_clock_module(u32 *const clkctrl_addr, u32 enable_mode, wait_for_clk_enable(clkctrl_addr); }
+static inline void wait_for_clk_disable(u32 *clkctrl_addr) +{ + u32 clkctrl, idlest = MODULE_CLKCTRL_IDLEST_FULLY_FUNCTIONAL; + u32 bound = LDELAY; + + while ((idlest != MODULE_CLKCTRL_IDLEST_DISABLED)) { + clkctrl = readl(clkctrl_addr); + idlest = (clkctrl & MODULE_CLKCTRL_IDLEST_MASK) >> + MODULE_CLKCTRL_IDLEST_SHIFT; + if (--bound == 0) { + printf("Clock disable failed for 0x%p idlest 0x%x\n", + clkctrl_addr, clkctrl); + return; + } + } +} +static inline void disable_clock_module(u32 *const clkctrl_addr, + u32 wait_for_disable) +{ + clrsetbits_le32(clkctrl_addr, MODULE_CLKCTRL_MODULEMODE_MASK, + MODULE_CLKCTRL_MODULEMODE_SW_DISABLE << + MODULE_CLKCTRL_MODULEMODE_SHIFT); + debug("Disable clock module - %p\n", clkctrl_addr); + if (wait_for_disable) + wait_for_clk_disable(clkctrl_addr); +} + static inline void enable_clock_domain(u32 *const clkctrl_reg, u32 enable_mode) { clrsetbits_le32(clkctrl_reg, CD_CLKCTRL_CLKTRCTRL_MASK, @@ -151,6 +178,14 @@ static inline void enable_clock_domain(u32 *const clkctrl_reg, u32 enable_mode) debug("Enable clock domain - %p\n", clkctrl_reg); }
+static inline void disable_clock_domain(u32 *const clkctrl_reg) +{ + clrsetbits_le32(clkctrl_reg, CD_CLKCTRL_CLKTRCTRL_MASK, + CD_CLKCTRL_CLKTRCTRL_SW_SLEEP << + CD_CLKCTRL_CLKTRCTRL_SHIFT); + debug("Disable clock domain - %p\n", clkctrl_reg); +} + void do_enable_clocks(u32 *const *clk_domains, u32 *const *clk_modules_explicit_en, u8 wait_for_enable) { @@ -170,6 +205,23 @@ void do_enable_clocks(u32 *const *clk_domains, }; }
+void do_disable_clocks(u32 *const *clk_domains, + u32 *const *clk_modules_disable, + u8 wait_for_disable) +{ + u32 i, max = 100; + + + /* Clock modules that need to be put in SW_DISABLE */ + for (i = 0; (i < max) && clk_modules_disable[i]; i++) + disable_clock_module(clk_modules_disable[i], + wait_for_disable); + + /* Put the clock domains in SW_SLEEP mode */ + for (i = 0; (i < max) && clk_domains[i]; i++) + disable_clock_domain(clk_domains[i]); +} + /* * Before scaling up the clocks we need to have the PMIC scale up the * voltages first. This will be dependent on which PMIC is in use diff --git a/arch/arm/include/asm/arch-am33xx/clock.h b/arch/arm/include/asm/arch-am33xx/clock.h index 4af6b57e42f5..a6d2419fb843 100644 --- a/arch/arm/include/asm/arch-am33xx/clock.h +++ b/arch/arm/include/asm/arch-am33xx/clock.h @@ -112,5 +112,6 @@ void do_setup_dpll(const struct dpll_regs *, const struct dpll_params *); void prcm_init(void); void enable_basic_clocks(void); void do_enable_clocks(u32 *const *, u32 *const *, u8); +void do_disable_clocks(u32 *const *, u32 *const *, u8);
#endif

From: Kishon Vijay Abraham I kishon@ti.com
Add do_disable_clocks() to disable clock domains and module clocks. These clocks are enabled using do_enable_clocks().
Signed-off-by: Kishon Vijay Abraham I kishon@ti.com Signed-off-by: Vignesh R vigneshr@ti.com Reviewed-by: Jagan Teki jteki@openedev.com --- arch/arm/cpu/armv7/omap-common/clocks-common.c | 53 ++++++++++++++++++++++++++ arch/arm/include/asm/omap_common.h | 4 ++ 2 files changed, 57 insertions(+)
diff --git a/arch/arm/cpu/armv7/omap-common/clocks-common.c b/arch/arm/cpu/armv7/omap-common/clocks-common.c index c94a80781931..e28b79568d1d 100644 --- a/arch/arm/cpu/armv7/omap-common/clocks-common.c +++ b/arch/arm/cpu/armv7/omap-common/clocks-common.c @@ -648,6 +648,14 @@ static inline void enable_clock_domain(u32 const clkctrl_reg, u32 enable_mode) debug("Enable clock domain - %x\n", clkctrl_reg); }
+static inline void disable_clock_domain(u32 const clkctrl_reg) +{ + clrsetbits_le32(clkctrl_reg, CD_CLKCTRL_CLKTRCTRL_MASK, + CD_CLKCTRL_CLKTRCTRL_SW_SLEEP << + CD_CLKCTRL_CLKTRCTRL_SHIFT); + debug("Disable clock domain - %x\n", clkctrl_reg); +} + static inline void wait_for_clk_enable(u32 clkctrl_addr) { u32 clkctrl, idlest = MODULE_CLKCTRL_IDLEST_DISABLED; @@ -677,6 +685,34 @@ static inline void enable_clock_module(u32 const clkctrl_addr, u32 enable_mode, wait_for_clk_enable(clkctrl_addr); }
+static inline void wait_for_clk_disable(u32 clkctrl_addr) +{ + u32 clkctrl, idlest = MODULE_CLKCTRL_IDLEST_FULLY_FUNCTIONAL; + u32 bound = LDELAY; + + while ((idlest != MODULE_CLKCTRL_IDLEST_DISABLED)) { + clkctrl = readl(clkctrl_addr); + idlest = (clkctrl & MODULE_CLKCTRL_IDLEST_MASK) >> + MODULE_CLKCTRL_IDLEST_SHIFT; + if (--bound == 0) { + printf("Clock disable failed for 0x%x idlest 0x%x\n", + clkctrl_addr, clkctrl); + return; + } + } +} + +static inline void disable_clock_module(u32 const clkctrl_addr, + u32 wait_for_disable) +{ + clrsetbits_le32(clkctrl_addr, MODULE_CLKCTRL_MODULEMODE_MASK, + MODULE_CLKCTRL_MODULEMODE_SW_DISABLE << + MODULE_CLKCTRL_MODULEMODE_SHIFT); + debug("Disable clock module - %x\n", clkctrl_addr); + if (wait_for_disable) + wait_for_clk_disable(clkctrl_addr); +} + void freq_update_core(void) { u32 freq_config1 = 0; @@ -800,6 +836,23 @@ void do_enable_clocks(u32 const *clk_domains, } }
+void do_disable_clocks(u32 const *clk_domains, + u32 const *clk_modules_disable, + u8 wait_for_disable) +{ + u32 i, max = 100; + + + /* Clock modules that need to be put in SW_DISABLE */ + for (i = 0; (i < max) && clk_modules_disable[i]; i++) + disable_clock_module(clk_modules_disable[i], + wait_for_disable); + + /* Put the clock domains in SW_SLEEP mode */ + for (i = 0; (i < max) && clk_domains[i]; i++) + disable_clock_domain(clk_domains[i]); +} + void prcm_init(void) { switch (omap_hw_init_context()) { diff --git a/arch/arm/include/asm/omap_common.h b/arch/arm/include/asm/omap_common.h index 056affc3fabd..87cdaad1d60f 100644 --- a/arch/arm/include/asm/omap_common.h +++ b/arch/arm/include/asm/omap_common.h @@ -575,6 +575,10 @@ void do_enable_clocks(u32 const *clk_domains, u32 const *clk_modules_explicit_en, u8 wait_for_enable);
+void do_disable_clocks(u32 const *clk_domains, + u32 const *clk_modules_disable, + u8 wait_for_disable); + void setup_post_dividers(u32 const base, const struct dpll_params *params); u32 omap_ddr_clk(void);

Adds functions to enable and disable edma3 clocks which can be invoked by drivers using edma3 to control the clocks.
Signed-off-by: Vignesh R vigneshr@ti.com Reviewed-by: Tom Rini trini@konsulko.com --- * dropped #ifdefs in header file.
arch/arm/cpu/armv7/omap5/hw_data.c | 41 ++++++++++++++++++++++++++++++++++++ arch/arm/cpu/armv7/omap5/prcm-regs.c | 4 ++++ arch/arm/include/asm/omap_common.h | 7 ++++++ 3 files changed, 52 insertions(+)
diff --git a/arch/arm/cpu/armv7/omap5/hw_data.c b/arch/arm/cpu/armv7/omap5/hw_data.c index 3a723cace71a..33f92b7e225d 100644 --- a/arch/arm/cpu/armv7/omap5/hw_data.c +++ b/arch/arm/cpu/armv7/omap5/hw_data.c @@ -565,6 +565,47 @@ void enable_basic_uboot_clocks(void) 1); }
+#ifdef CONFIG_TI_EDMA3 +void enable_edma3_clocks(void) +{ + u32 const clk_domains_edma3[] = { + 0 + }; + + u32 const clk_modules_hw_auto_edma3[] = { + (*prcm)->cm_l3main1_tptc1_clkctrl, + (*prcm)->cm_l3main1_tptc2_clkctrl, + 0 + }; + + u32 const clk_modules_explicit_en_edma3[] = { + 0 + }; + + do_enable_clocks(clk_domains_edma3, + clk_modules_hw_auto_edma3, + clk_modules_explicit_en_edma3, + 1); +} + +void disable_edma3_clocks(void) +{ + u32 const clk_domains_edma3[] = { + 0 + }; + + u32 const clk_modules_disable_edma3[] = { + (*prcm)->cm_l3main1_tptc1_clkctrl, + (*prcm)->cm_l3main1_tptc2_clkctrl, + 0 + }; + + do_disable_clocks(clk_domains_edma3, + clk_modules_disable_edma3, + 1); +} +#endif + const struct ctrl_ioregs ioregs_omap5430 = { .ctrl_ddrch = DDR_IO_I_34OHM_SR_FASTEST_WD_DQ_NO_PULL_DQS_PULL_DOWN, .ctrl_lpddr2ch = DDR_IO_I_34OHM_SR_FASTEST_WD_CK_CKE_NCS_CA_PULL_DOWN, diff --git a/arch/arm/cpu/armv7/omap5/prcm-regs.c b/arch/arm/cpu/armv7/omap5/prcm-regs.c index cd51fe7678be..d01ce88306ee 100644 --- a/arch/arm/cpu/armv7/omap5/prcm-regs.c +++ b/arch/arm/cpu/armv7/omap5/prcm-regs.c @@ -989,4 +989,8 @@ struct prcm_regs const dra7xx_prcm = {
.prm_abbldo_mpu_setup = 0x4AE07DDC, .prm_abbldo_mpu_ctrl = 0x4AE07DE0, + + /*l3main1 edma*/ + .cm_l3main1_tptc1_clkctrl = 0x4a008778, + .cm_l3main1_tptc2_clkctrl = 0x4a008780, }; diff --git a/arch/arm/include/asm/omap_common.h b/arch/arm/include/asm/omap_common.h index 87cdaad1d60f..b67d4b673d99 100644 --- a/arch/arm/include/asm/omap_common.h +++ b/arch/arm/include/asm/omap_common.h @@ -349,6 +349,10 @@ struct prcm_regs { /* IPU */ u32 cm_ipu_clkstctrl; u32 cm_ipu_i2c5_clkctrl; + + /*l3main1 edma*/ + u32 cm_l3main1_tptc1_clkctrl; + u32 cm_l3main1_tptc2_clkctrl; };
struct omap_sys_ctrl_regs { @@ -598,6 +602,9 @@ void recalibrate_iodelay(void);
void omap_smc1(u32 service, u32 val);
+void enable_edma3_clocks(void); +void disable_edma3_clocks(void); + /* ABB */ #define OMAP_ABB_NOMINAL_OPP 0 #define OMAP_ABB_FAST_OPP 1

Adds functions to enable and disable edma3 clocks which can be invoked by drivers using edma3 to control the clocks.
Signed-off-by: Vignesh R vigneshr@ti.com Reviewed-by: Tom Rini trini@konsulko.com Reviewed-by: Jagan Teki jteki@openedev.com --- arch/arm/cpu/armv7/am33xx/clock_am43xx.c | 36 ++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+)
diff --git a/arch/arm/cpu/armv7/am33xx/clock_am43xx.c b/arch/arm/cpu/armv7/am33xx/clock_am43xx.c index b6396942bbff..35c431eb292d 100644 --- a/arch/arm/cpu/armv7/am33xx/clock_am43xx.c +++ b/arch/arm/cpu/armv7/am33xx/clock_am43xx.c @@ -135,3 +135,39 @@ void enable_basic_clocks(void) /* For OPP100 the mac clock should be /5. */ writel(0x4, &cmdpll->clkselmacclk); } + +#ifdef CONFIG_TI_EDMA3 +void enable_edma3_clocks(void) +{ + u32 *const clk_domains_edma3[] = { + 0 + }; + + u32 *const clk_modules_explicit_en_edma3[] = { + &cmper->tpccclkctrl, + &cmper->tptc0clkctrl, + 0 + }; + + do_enable_clocks(clk_domains_edma3, + clk_modules_explicit_en_edma3, + 1); +} + +void disable_edma3_clocks(void) +{ + u32 *const clk_domains_edma3[] = { + 0 + }; + + u32 *const clk_modules_disable_edma3[] = { + &cmper->tpccclkctrl, + &cmper->tptc0clkctrl, + 0 + }; + + do_disable_clocks(clk_domains_edma3, + clk_modules_disable_edma3, + 1); +} +#endif

From: Tom Rini trini@ti.com
When doing a memory mapped copy we may have DMA available and thus need to have this copy abstracted so that the driver can do it, rather than a simple memcpy.
Signed-off-by: Tom Rini trini@ti.com Signed-off-by: Vignesh R vigneshr@ti.com Reviewed-by: Jagan Teki jteki@openedev.com --- drivers/mtd/spi/sf_ops.c | 8 +++++++- include/spi.h | 3 +++ 2 files changed, 10 insertions(+), 1 deletion(-)
diff --git a/drivers/mtd/spi/sf_ops.c b/drivers/mtd/spi/sf_ops.c index 38592f518b72..900ec1f2a9ce 100644 --- a/drivers/mtd/spi/sf_ops.c +++ b/drivers/mtd/spi/sf_ops.c @@ -14,6 +14,7 @@ #include <spi.h> #include <spi_flash.h> #include <watchdog.h> +#include <linux/compiler.h>
#include "sf_internal.h"
@@ -378,6 +379,11 @@ int spi_flash_read_common(struct spi_flash *flash, const u8 *cmd, return ret; }
+void __weak spi_flash_copy_mmap(void *data, void *offset, size_t len) +{ + memcpy(data, offset, len); +} + int spi_flash_cmd_read_ops(struct spi_flash *flash, u32 offset, size_t len, void *data) { @@ -394,7 +400,7 @@ int spi_flash_cmd_read_ops(struct spi_flash *flash, u32 offset, return ret; } spi_xfer(flash->spi, 0, NULL, NULL, SPI_XFER_MMAP); - memcpy(data, flash->memory_map + offset, len); + spi_flash_copy_mmap(data, flash->memory_map + offset, len); spi_xfer(flash->spi, 0, NULL, NULL, SPI_XFER_MMAP_END); spi_release_bus(flash->spi); return 0; diff --git a/include/spi.h b/include/spi.h index 18362364cf0e..51fdfd6d7360 100644 --- a/include/spi.h +++ b/include/spi.h @@ -272,6 +272,9 @@ int spi_set_wordlen(struct spi_slave *slave, unsigned int wordlen); int spi_xfer(struct spi_slave *slave, unsigned int bitlen, const void *dout, void *din, unsigned long flags);
+/* Copy memory mapped data */ +void spi_flash_copy_mmap(void *data, void *offset, size_t len); + /** * Determine if a SPI chipselect is valid. * This function is provided by the board if the low-level SPI driver

Signed-off-by: Vignesh R vigneshr@ti.com Reviewed-by: Jagan Teki jteki@openedev.com --- arch/arm/include/asm/ti-common/ti-edma3.h | 2 + drivers/dma/ti-edma3.c | 78 +++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+)
diff --git a/arch/arm/include/asm/ti-common/ti-edma3.h b/arch/arm/include/asm/ti-common/ti-edma3.h index 5adc1dac0e65..6a7a321c1bdf 100644 --- a/arch/arm/include/asm/ti-common/ti-edma3.h +++ b/arch/arm/include/asm/ti-common/ti-edma3.h @@ -117,5 +117,7 @@ void edma3_set_src_addr(u32 base, int slot, u32 src); void edma3_set_transfer_params(u32 base, int slot, int acnt, int bcnt, int ccnt, u16 bcnt_rld, enum edma3_sync_dimension sync_mode); +void edma3_transfer(unsigned long edma3_base_addr, unsigned int + edma_slot_num, void *dst, void *src, size_t len);
#endif diff --git a/drivers/dma/ti-edma3.c b/drivers/dma/ti-edma3.c index 8184ded9fa81..d6a427f2e21d 100644 --- a/drivers/dma/ti-edma3.c +++ b/drivers/dma/ti-edma3.c @@ -382,3 +382,81 @@ void qedma3_stop(u32 base, struct edma3_channel_config *cfg) /* Clear the channel map */ __raw_writel(0, base + EDMA3_QCHMAP(cfg->chnum)); } + +void edma3_transfer(unsigned long edma3_base_addr, unsigned int + edma_slot_num, void *dst, void *src, size_t len) +{ + struct edma3_slot_config slot; + struct edma3_channel_config edma_channel; + int b_cnt_value = 1; + int rem_bytes = 0; + int a_cnt_value = len; + unsigned int addr = (unsigned int) (dst); + unsigned int max_acnt = 0x7FFFU; + + if (len > max_acnt) { + b_cnt_value = (len / max_acnt); + rem_bytes = (len % max_acnt); + a_cnt_value = max_acnt; + } + + slot.opt = 0; + slot.src = ((unsigned int) src); + slot.acnt = a_cnt_value; + slot.bcnt = b_cnt_value; + slot.ccnt = 1; + slot.src_bidx = a_cnt_value; + slot.dst_bidx = a_cnt_value; + slot.src_cidx = 0; + slot.dst_cidx = 0; + slot.link = EDMA3_PARSET_NULL_LINK; + slot.bcntrld = 0; + slot.opt = EDMA3_SLOPT_TRANS_COMP_INT_ENB | + EDMA3_SLOPT_COMP_CODE(0) | + EDMA3_SLOPT_STATIC | EDMA3_SLOPT_AB_SYNC; + + edma3_slot_configure(edma3_base_addr, edma_slot_num, &slot); + edma_channel.slot = edma_slot_num; + edma_channel.chnum = 0; + edma_channel.complete_code = 0; + /* set event trigger to dst update */ + edma_channel.trigger_slot_word = EDMA3_TWORD(dst); + + qedma3_start(edma3_base_addr, &edma_channel); + edma3_set_dest_addr(edma3_base_addr, edma_channel.slot, addr); + + while (edma3_check_for_transfer(edma3_base_addr, &edma_channel)) + ; + qedma3_stop(edma3_base_addr, &edma_channel); + + if (rem_bytes != 0) { + slot.opt = 0; + slot.src = + (b_cnt_value * max_acnt) + ((unsigned int) src); + slot.acnt = rem_bytes; + slot.bcnt = 1; + slot.ccnt = 1; + slot.src_bidx = rem_bytes; + slot.dst_bidx = rem_bytes; + slot.src_cidx = 0; + slot.dst_cidx = 0; + slot.link = EDMA3_PARSET_NULL_LINK; + slot.bcntrld = 0; + slot.opt = EDMA3_SLOPT_TRANS_COMP_INT_ENB | + EDMA3_SLOPT_COMP_CODE(0) | + EDMA3_SLOPT_STATIC | EDMA3_SLOPT_AB_SYNC; + edma3_slot_configure(edma3_base_addr, edma_slot_num, &slot); + edma_channel.slot = edma_slot_num; + edma_channel.chnum = 0; + edma_channel.complete_code = 0; + /* set event trigger to dst update */ + edma_channel.trigger_slot_word = EDMA3_TWORD(dst); + + qedma3_start(edma3_base_addr, &edma_channel); + edma3_set_dest_addr(edma3_base_addr, edma_channel.slot, addr + + (max_acnt * b_cnt_value)); + while (edma3_check_for_transfer(edma3_base_addr, &edma_channel)) + ; + qedma3_stop(edma3_base_addr, &edma_channel); + } +}

ti_qspi uses memory map mode for faster read. Enabling DMA will increase read speed by 3x @48MHz on DRA74 EVM.
Signed-off-by: Vignesh R vigneshr@ti.com Reviewed-by: Jagan Teki jteki@openedev.com --- drivers/spi/ti_qspi.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+)
diff --git a/drivers/spi/ti_qspi.c b/drivers/spi/ti_qspi.c index 3356c0f072e5..753d68980bd6 100644 --- a/drivers/spi/ti_qspi.c +++ b/drivers/spi/ti_qspi.c @@ -13,6 +13,8 @@ #include <spi.h> #include <asm/gpio.h> #include <asm/omap_gpio.h> +#include <asm/omap_common.h> +#include <asm/ti-common/ti-edma3.h>
/* ti qpsi register bit masks */ #define QSPI_TIMEOUT 2000000 @@ -347,3 +349,24 @@ int spi_xfer(struct spi_slave *slave, unsigned int bitlen, const void *dout,
return 0; } +#ifdef CONFIG_TI_EDMA3 +void spi_flash_copy_mmap(void *data, void *offset, size_t len) +{ + unsigned int addr = (unsigned int) (data); + unsigned int edma_slot_num = 1; + + /* Invalidate the area, so no writeback into the RAM races with DMA */ + invalidate_dcache_range(addr, addr + roundup(len, ARCH_DMA_MINALIGN)); + + /* enable edma3 clocks */ + enable_edma3_clocks(); + + /* Call edma3 api to do actual DMA transfer */ + edma3_transfer(EDMA3_BASE, edma_slot_num, data, offset, len); + + /* disable edma3 clocks */ + disable_edma3_clocks(); + + *((unsigned int *)offset) += len; +} +#endif

On 17 August 2015 at 13:29, Vignesh R vigneshr@ti.com wrote:
ti_qspi uses memory map mode for faster read. Enabling DMA will increase read speed by 3x @48MHz on DRA74 EVM.
Signed-off-by: Vignesh R vigneshr@ti.com Reviewed-by: Jagan Teki jteki@openedev.com
drivers/spi/ti_qspi.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+)
diff --git a/drivers/spi/ti_qspi.c b/drivers/spi/ti_qspi.c index 3356c0f072e5..753d68980bd6 100644 --- a/drivers/spi/ti_qspi.c +++ b/drivers/spi/ti_qspi.c @@ -13,6 +13,8 @@ #include <spi.h> #include <asm/gpio.h> #include <asm/omap_gpio.h> +#include <asm/omap_common.h> +#include <asm/ti-common/ti-edma3.h>
/* ti qpsi register bit masks */ #define QSPI_TIMEOUT 2000000 @@ -347,3 +349,24 @@ int spi_xfer(struct spi_slave *slave, unsigned int bitlen, const void *dout,
return 0;
}
Please add below comment here, I have asked the same on previous version patch this will track us to the work future.
/* TODO: control from sf layer to here through dm-spi */
+#ifdef CONFIG_TI_EDMA3 +void spi_flash_copy_mmap(void *data, void *offset, size_t len) +{
unsigned int addr = (unsigned int) (data);
unsigned int edma_slot_num = 1;
/* Invalidate the area, so no writeback into the RAM races with DMA */
invalidate_dcache_range(addr, addr + roundup(len, ARCH_DMA_MINALIGN));
/* enable edma3 clocks */
enable_edma3_clocks();
/* Call edma3 api to do actual DMA transfer */
edma3_transfer(EDMA3_BASE, edma_slot_num, data, offset, len);
/* disable edma3 clocks */
disable_edma3_clocks();
*((unsigned int *)offset) += len;
+}
+#endif
2.5.0
thanks!

On 8/17/2015 1:48 PM, Jagan Teki wrote:
On 17 August 2015 at 13:29, Vignesh R vigneshr@ti.com wrote:
ti_qspi uses memory map mode for faster read. Enabling DMA will increase read speed by 3x @48MHz on DRA74 EVM.
Signed-off-by: Vignesh R vigneshr@ti.com Reviewed-by: Jagan Teki jteki@openedev.com
drivers/spi/ti_qspi.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+)
diff --git a/drivers/spi/ti_qspi.c b/drivers/spi/ti_qspi.c index 3356c0f072e5..753d68980bd6 100644 --- a/drivers/spi/ti_qspi.c +++ b/drivers/spi/ti_qspi.c @@ -13,6 +13,8 @@ #include <spi.h> #include <asm/gpio.h> #include <asm/omap_gpio.h> +#include <asm/omap_common.h> +#include <asm/ti-common/ti-edma3.h>
/* ti qpsi register bit masks */ #define QSPI_TIMEOUT 2000000 @@ -347,3 +349,24 @@ int spi_xfer(struct spi_slave *slave, unsigned int bitlen, const void *dout,
return 0;
}
Please add below comment here, I have asked the same on previous version patch this will track us to the work future.
/* TODO: control from sf layer to here through dm-spi */
Oops.. Sorry, I overlooked it.. Will add the comment and send it soon. Thanks!
+#ifdef CONFIG_TI_EDMA3 +void spi_flash_copy_mmap(void *data, void *offset, size_t len) +{
unsigned int addr = (unsigned int) (data);
unsigned int edma_slot_num = 1;
/* Invalidate the area, so no writeback into the RAM races with DMA */
invalidate_dcache_range(addr, addr + roundup(len, ARCH_DMA_MINALIGN));
/* enable edma3 clocks */
enable_edma3_clocks();
/* Call edma3 api to do actual DMA transfer */
edma3_transfer(EDMA3_BASE, edma_slot_num, data, offset, len);
/* disable edma3 clocks */
disable_edma3_clocks();
*((unsigned int *)offset) += len;
+}
+#endif
2.5.0
thanks!

ti_qspi uses memory map mode for faster read. Enabling DMA will increase read speed by 3x @48MHz on DRA74 EVM.
Signed-off-by: Vignesh R vigneshr@ti.com Reviewed-by: Jagan Teki jteki@openedev.com --- * Added a TODO comment
drivers/spi/ti_qspi.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+)
diff --git a/drivers/spi/ti_qspi.c b/drivers/spi/ti_qspi.c index 3356c0f072e5..c148d1cadfbd 100644 --- a/drivers/spi/ti_qspi.c +++ b/drivers/spi/ti_qspi.c @@ -13,6 +13,8 @@ #include <spi.h> #include <asm/gpio.h> #include <asm/omap_gpio.h> +#include <asm/omap_common.h> +#include <asm/ti-common/ti-edma3.h>
/* ti qpsi register bit masks */ #define QSPI_TIMEOUT 2000000 @@ -347,3 +349,26 @@ int spi_xfer(struct spi_slave *slave, unsigned int bitlen, const void *dout,
return 0; } + +/* TODO: control from sf layer to here through dm-spi */ +#ifdef CONFIG_TI_EDMA3 +void spi_flash_copy_mmap(void *data, void *offset, size_t len) +{ + unsigned int addr = (unsigned int) (data); + unsigned int edma_slot_num = 1; + + /* Invalidate the area, so no writeback into the RAM races with DMA */ + invalidate_dcache_range(addr, addr + roundup(len, ARCH_DMA_MINALIGN)); + + /* enable edma3 clocks */ + enable_edma3_clocks(); + + /* Call edma3 api to do actual DMA transfer */ + edma3_transfer(EDMA3_BASE, edma_slot_num, data, offset, len); + + /* disable edma3 clocks */ + disable_edma3_clocks(); + + *((unsigned int *)offset) += len; +} +#endif

Enable TI_EDMA3 and SPL_DMA support, so as to reduce boot time. With DMA enabled there is almost 3x improvement in read performance. This helps in reducing boot time in qspiboot mode
Also add EDMA3 base address for DRA7XX and AM57XX.
Signed-off-by: Vignesh R vigneshr@ti.com Reviewed-by: Jagan Teki jteki@openedev.com --- arch/arm/include/asm/arch-omap5/hardware.h | 5 +++++ include/configs/dra7xx_evm.h | 2 ++ 2 files changed, 7 insertions(+)
diff --git a/arch/arm/include/asm/arch-omap5/hardware.h b/arch/arm/include/asm/arch-omap5/hardware.h index f7011b4e904a..a5bd60009289 100644 --- a/arch/arm/include/asm/arch-omap5/hardware.h +++ b/arch/arm/include/asm/arch-omap5/hardware.h @@ -23,4 +23,9 @@ /* GPMC Base address */ #define GPMC_BASE 0x50000000
+/* EDMA3 Base address for DRA7XX and AM57XX */ +#if defined(CONFIG_DRA7XX) || defined(CONFIG_AM57XX) +#define EDMA3_BASE 0x43300000 +#endif + #endif diff --git a/include/configs/dra7xx_evm.h b/include/configs/dra7xx_evm.h index 74994479e60b..6e32de854619 100644 --- a/include/configs/dra7xx_evm.h +++ b/include/configs/dra7xx_evm.h @@ -166,6 +166,8 @@
/* SPI SPL */ #define CONFIG_SPL_SPI_SUPPORT +#define CONFIG_SPL_DMA_SUPPORT +#define CONFIG_TI_EDMA3 #define CONFIG_SPL_SPI_LOAD #define CONFIG_SPL_SPI_FLASH_SUPPORT #define CONFIG_SYS_SPI_U_BOOT_OFFS 0x40000

On 17 August 2015 at 13:29, Vignesh R vigneshr@ti.com wrote:
This patch series enables DMA for QSPI on dra7xx and am43xx.
Resending this series after rebasing on current origin/master.
v2: https://www.mail-archive.com/u-boot@lists.denx.de/msg179404.html
changes in v2:
- Move edma related code to edma driver instead of handling it in ti-qspi driver.
v1: https://www.mail-archive.com/u-boot@lists.denx.de/msg176479.html
Kishon Vijay Abraham I (2): ARM: AM43xx: Add support for disabling clocks in uboot ARM: OMAP5: Add support for disabling clocks in uboot
Ravi Babu (2): sf: allocate cache aligned buffers to copy from flash env: use cache line aligned memory for flash read
Tom Rini (1): sf: ops: Add spi_flash_copy_mmap function
Vignesh R (5): ARM: OMAP5: Add functions to enable and disable EDMA3 clocks ARM: AM43XX: Add functions to enable and disable EDMA3 clocks dma: ti-edma3: Add helper function to support edma3 transfer spi: ti_qspi: Use DMA to read from qspi flash ARM: dra7xx_evm: Enable EDMA3 in SPL to support DMA on qspi
arch/arm/cpu/armv7/am33xx/clock.c | 52 +++++++++++++++++ arch/arm/cpu/armv7/am33xx/clock_am43xx.c | 36 ++++++++++++ arch/arm/cpu/armv7/omap-common/clocks-common.c | 53 +++++++++++++++++ arch/arm/cpu/armv7/omap5/hw_data.c | 41 ++++++++++++++ arch/arm/cpu/armv7/omap5/prcm-regs.c | 4 ++ arch/arm/include/asm/arch-am33xx/clock.h | 1 + arch/arm/include/asm/arch-omap5/hardware.h | 5 ++ arch/arm/include/asm/omap_common.h | 11 ++++ arch/arm/include/asm/ti-common/ti-edma3.h | 2 + common/cmd_sf.c | 6 +- common/env_sf.c | 11 ++-- drivers/dma/ti-edma3.c | 78 ++++++++++++++++++++++++++ drivers/mtd/spi/sf_ops.c | 8 ++- drivers/spi/ti_qspi.c | 23 ++++++++ include/configs/dra7xx_evm.h | 2 + include/spi.h | 3 + 16 files changed, 327 insertions(+), 9 deletions(-)
-- 2.5.0
U-Boot mailing list U-Boot@lists.denx.de http://lists.denx.de/mailman/listinfo/u-boot
Applied to u-boot-spi/master
thanks!
participants (3)
-
Jagan Teki
-
R, Vignesh
-
Vignesh R