[U-Boot] [PATCH 00/11] Enable edma support for ti-qspi

This patch series enables DMA for QSPI on dra7xx and am43xx. Patch 1 and 2 make sure the buffers are mem aligned for dma operation. Patch 3-6 add support for enabling and disabling of edma3 clocks on am43xx and dra7xx. Patch 7-9 add dma functionality to ti_qspi driver. Patch 10-11 enable edma3 support on dra7xx and am43xx in respective config files.
Tested on DRA74 EVM: http://pastebin.ubuntu.com/11813977/ Tested on AM437X EVM SK: http://pastebin.ubuntu.com/11813994/
Kishon Vijay Abraham I (2): ARM: AM43xx: Add support for disabling clocks in uboot ARM: OMAP5: Add support for disabling clocks in uboot
Ravi Babu (2): sf: allocate cache aligned buffers to copy from flash env: use cache line aligned memory for flash read
Tom Rini (1): sf: ops: Add spi_flash_copy_mmap function
Vignesh R (6): ARM: OMAP5: Add functions to enable and disable EDMA3 clocks ARM: AM43XX: Add functions to enable and disable EDMA3 clocks spi: ti_qspi: Use DMA to read from qspi flash dma: ti-edma3: Add BIT(x) macro definition ARM: dra7xx_evm: Enable EDMA3 in SPL to support DMA on qspi ARM: am43xx_evm: Enable EDMA3 support DMA on qspi
arch/arm/cpu/armv7/am33xx/clock.c | 52 ++++++++++++ arch/arm/cpu/armv7/am33xx/clock_am43xx.c | 36 +++++++++ arch/arm/cpu/armv7/omap-common/clocks-common.c | 53 ++++++++++++ arch/arm/cpu/armv7/omap5/hw_data.c | 41 ++++++++++ arch/arm/cpu/armv7/omap5/prcm-regs.c | 4 + arch/arm/include/asm/arch-am33xx/clock.h | 1 + arch/arm/include/asm/arch-am33xx/hardware_am43xx.h | 3 + arch/arm/include/asm/arch-omap5/hardware.h | 5 ++ arch/arm/include/asm/omap_common.h | 13 +++ common/cmd_sf.c | 6 +- common/env_sf.c | 11 +-- drivers/dma/ti-edma3.c | 2 + drivers/mtd/spi/sf_ops.c | 8 +- drivers/spi/ti_qspi.c | 93 ++++++++++++++++++++++ include/configs/am43xx_evm.h | 2 + include/configs/dra7xx_evm.h | 2 + include/spi.h | 3 + 17 files changed, 326 insertions(+), 9 deletions(-)

From: Ravi Babu ravibabu@ti.com
Use memalign() with ARCH_DMA_MINALIGN to allocate read buffers. This is required because, flash drivers may use DMA for read operations and may have to invalidate the buffer before read.
Signed-off-by: Ravi Babu ravibabu@ti.com Signed-off-by: Vignesh R vigneshr@ti.com --- common/cmd_sf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/common/cmd_sf.c b/common/cmd_sf.c index aef8c2a5ea84..db84a78e1698 100644 --- a/common/cmd_sf.c +++ b/common/cmd_sf.c @@ -223,7 +223,7 @@ static int spi_flash_update(struct spi_flash *flash, u32 offset,
if (end - buf >= 200) scale = (end - buf) / 100; - cmp_buf = malloc(flash->sector_size); + cmp_buf = memalign(ARCH_DMA_MINALIGN, flash->sector_size); if (cmp_buf) { ulong last_update = get_timer(0);
@@ -480,12 +480,12 @@ static int do_spi_flash_test(int argc, char * const argv[]) if (*argv[2] == 0 || *endp != 0) return -1;
- vbuf = malloc(len); + vbuf = memalign(ARCH_DMA_MINALIGN, len); if (!vbuf) { printf("Cannot allocate memory (%lu bytes)\n", len); return 1; } - buf = malloc(len); + buf = memalign(ARCH_DMA_MINALIGN, len); if (!buf) { free(vbuf); printf("Cannot allocate memory (%lu bytes)\n", len);

From: Ravi Babu ravibabu@ti.com
Use memalign() with ARCH_DMA_MINALIGN to allocate read buffers. This is required because, flash drivers may use DMA for read operations and may have to invalidate the buffer before read.
Signed-off-by: Ravi Babu ravibabu@ti.com Signed-off-by: Vignesh R vigneshr@ti.com --- common/env_sf.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/common/env_sf.c b/common/env_sf.c index e928f5752cc7..940983124fbb 100644 --- a/common/env_sf.c +++ b/common/env_sf.c @@ -79,7 +79,7 @@ int saveenv(void) if (CONFIG_ENV_SECT_SIZE > CONFIG_ENV_SIZE) { saved_size = CONFIG_ENV_SECT_SIZE - CONFIG_ENV_SIZE; saved_offset = env_new_offset + CONFIG_ENV_SIZE; - saved_buffer = malloc(saved_size); + saved_buffer = memalign(ARCH_DMA_MINALIGN, saved_size); if (!saved_buffer) { ret = 1; goto done; @@ -142,9 +142,10 @@ void env_relocate_spec(void) env_t *tmp_env2 = NULL; env_t *ep = NULL;
- tmp_env1 = (env_t *)malloc(CONFIG_ENV_SIZE); - tmp_env2 = (env_t *)malloc(CONFIG_ENV_SIZE); - + tmp_env1 = (env_t *)memalign(ARCH_DMA_MINALIGN, + CONFIG_ENV_SIZE); + tmp_env2 = (env_t *)memalign(ARCH_DMA_MINALIGN, + CONFIG_ENV_SIZE); if (!tmp_env1 || !tmp_env2) { set_default_env("!malloc() failed"); goto out; @@ -295,7 +296,7 @@ void env_relocate_spec(void) int ret; char *buf = NULL;
- buf = (char *)malloc(CONFIG_ENV_SIZE); + buf = (char *)memalign(ARCH_DMA_MINALIGN, CONFIG_ENV_SIZE); env_flash = spi_flash_probe(CONFIG_ENV_SPI_BUS, CONFIG_ENV_SPI_CS, CONFIG_ENV_SPI_MAX_HZ, CONFIG_ENV_SPI_MODE); if (!env_flash) {

From: Kishon Vijay Abraham I kishon@ti.com
Add do_disable_clocks() to disable clock domains and module clocks. These clocks are enabled using do_enable_clocks().
Signed-off-by: Kishon Vijay Abraham I kishon@ti.com Signed-off-by: Vignesh R vigneshr@ti.com --- arch/arm/cpu/armv7/am33xx/clock.c | 52 ++++++++++++++++++++++++++++++++ arch/arm/include/asm/arch-am33xx/clock.h | 1 + 2 files changed, 53 insertions(+)
diff --git a/arch/arm/cpu/armv7/am33xx/clock.c b/arch/arm/cpu/armv7/am33xx/clock.c index ec7d46838b74..595c951ed245 100644 --- a/arch/arm/cpu/armv7/am33xx/clock.c +++ b/arch/arm/cpu/armv7/am33xx/clock.c @@ -144,6 +144,33 @@ static inline void enable_clock_module(u32 *const clkctrl_addr, u32 enable_mode, wait_for_clk_enable(clkctrl_addr); }
+static inline void wait_for_clk_disable(u32 *clkctrl_addr) +{ + u32 clkctrl, idlest = MODULE_CLKCTRL_IDLEST_FULLY_FUNCTIONAL; + u32 bound = LDELAY; + + while ((idlest != MODULE_CLKCTRL_IDLEST_DISABLED)) { + clkctrl = readl(clkctrl_addr); + idlest = (clkctrl & MODULE_CLKCTRL_IDLEST_MASK) >> + MODULE_CLKCTRL_IDLEST_SHIFT; + if (--bound == 0) { + printf("Clock disable failed for 0x%p idlest 0x%x\n", + clkctrl_addr, clkctrl); + return; + } + } +} +static inline void disable_clock_module(u32 *const clkctrl_addr, + u32 wait_for_disable) +{ + clrsetbits_le32(clkctrl_addr, MODULE_CLKCTRL_MODULEMODE_MASK, + MODULE_CLKCTRL_MODULEMODE_SW_DISABLE << + MODULE_CLKCTRL_MODULEMODE_SHIFT); + debug("Disable clock module - %p\n", clkctrl_addr); + if (wait_for_disable) + wait_for_clk_disable(clkctrl_addr); +} + static inline void enable_clock_domain(u32 *const clkctrl_reg, u32 enable_mode) { clrsetbits_le32(clkctrl_reg, CD_CLKCTRL_CLKTRCTRL_MASK, @@ -151,6 +178,14 @@ static inline void enable_clock_domain(u32 *const clkctrl_reg, u32 enable_mode) debug("Enable clock domain - %p\n", clkctrl_reg); }
+static inline void disable_clock_domain(u32 *const clkctrl_reg) +{ + clrsetbits_le32(clkctrl_reg, CD_CLKCTRL_CLKTRCTRL_MASK, + CD_CLKCTRL_CLKTRCTRL_SW_SLEEP << + CD_CLKCTRL_CLKTRCTRL_SHIFT); + debug("Disable clock domain - %p\n", clkctrl_reg); +} + void do_enable_clocks(u32 *const *clk_domains, u32 *const *clk_modules_explicit_en, u8 wait_for_enable) { @@ -170,6 +205,23 @@ void do_enable_clocks(u32 *const *clk_domains, }; }
+void do_disable_clocks(u32 *const *clk_domains, + u32 *const *clk_modules_disable, + u8 wait_for_disable) +{ + u32 i, max = 100; + + + /* Clock modules that need to be put in SW_DISABLE */ + for (i = 0; (i < max) && clk_modules_disable[i]; i++) + disable_clock_module(clk_modules_disable[i], + wait_for_disable); + + /* Put the clock domains in SW_SLEEP mode */ + for (i = 0; (i < max) && clk_domains[i]; i++) + disable_clock_domain(clk_domains[i]); +} + /* * Before scaling up the clocks we need to have the PMIC scale up the * voltages first. This will be dependent on which PMIC is in use diff --git a/arch/arm/include/asm/arch-am33xx/clock.h b/arch/arm/include/asm/arch-am33xx/clock.h index 4af6b57e42f5..a6d2419fb843 100644 --- a/arch/arm/include/asm/arch-am33xx/clock.h +++ b/arch/arm/include/asm/arch-am33xx/clock.h @@ -112,5 +112,6 @@ void do_setup_dpll(const struct dpll_regs *, const struct dpll_params *); void prcm_init(void); void enable_basic_clocks(void); void do_enable_clocks(u32 *const *, u32 *const *, u8); +void do_disable_clocks(u32 *const *, u32 *const *, u8);
#endif

From: Kishon Vijay Abraham I kishon@ti.com
Add do_disable_clocks() to disable clock domains and module clocks. These clocks are enabled using do_enable_clocks().
Signed-off-by: Kishon Vijay Abraham I kishon@ti.com Signed-off-by: Vignesh R vigneshr@ti.com --- arch/arm/cpu/armv7/omap-common/clocks-common.c | 53 ++++++++++++++++++++++++++ arch/arm/include/asm/omap_common.h | 4 ++ 2 files changed, 57 insertions(+)
diff --git a/arch/arm/cpu/armv7/omap-common/clocks-common.c b/arch/arm/cpu/armv7/omap-common/clocks-common.c index c94a80781931..e28b79568d1d 100644 --- a/arch/arm/cpu/armv7/omap-common/clocks-common.c +++ b/arch/arm/cpu/armv7/omap-common/clocks-common.c @@ -648,6 +648,14 @@ static inline void enable_clock_domain(u32 const clkctrl_reg, u32 enable_mode) debug("Enable clock domain - %x\n", clkctrl_reg); }
+static inline void disable_clock_domain(u32 const clkctrl_reg) +{ + clrsetbits_le32(clkctrl_reg, CD_CLKCTRL_CLKTRCTRL_MASK, + CD_CLKCTRL_CLKTRCTRL_SW_SLEEP << + CD_CLKCTRL_CLKTRCTRL_SHIFT); + debug("Disable clock domain - %x\n", clkctrl_reg); +} + static inline void wait_for_clk_enable(u32 clkctrl_addr) { u32 clkctrl, idlest = MODULE_CLKCTRL_IDLEST_DISABLED; @@ -677,6 +685,34 @@ static inline void enable_clock_module(u32 const clkctrl_addr, u32 enable_mode, wait_for_clk_enable(clkctrl_addr); }
+static inline void wait_for_clk_disable(u32 clkctrl_addr) +{ + u32 clkctrl, idlest = MODULE_CLKCTRL_IDLEST_FULLY_FUNCTIONAL; + u32 bound = LDELAY; + + while ((idlest != MODULE_CLKCTRL_IDLEST_DISABLED)) { + clkctrl = readl(clkctrl_addr); + idlest = (clkctrl & MODULE_CLKCTRL_IDLEST_MASK) >> + MODULE_CLKCTRL_IDLEST_SHIFT; + if (--bound == 0) { + printf("Clock disable failed for 0x%x idlest 0x%x\n", + clkctrl_addr, clkctrl); + return; + } + } +} + +static inline void disable_clock_module(u32 const clkctrl_addr, + u32 wait_for_disable) +{ + clrsetbits_le32(clkctrl_addr, MODULE_CLKCTRL_MODULEMODE_MASK, + MODULE_CLKCTRL_MODULEMODE_SW_DISABLE << + MODULE_CLKCTRL_MODULEMODE_SHIFT); + debug("Disable clock module - %x\n", clkctrl_addr); + if (wait_for_disable) + wait_for_clk_disable(clkctrl_addr); +} + void freq_update_core(void) { u32 freq_config1 = 0; @@ -800,6 +836,23 @@ void do_enable_clocks(u32 const *clk_domains, } }
+void do_disable_clocks(u32 const *clk_domains, + u32 const *clk_modules_disable, + u8 wait_for_disable) +{ + u32 i, max = 100; + + + /* Clock modules that need to be put in SW_DISABLE */ + for (i = 0; (i < max) && clk_modules_disable[i]; i++) + disable_clock_module(clk_modules_disable[i], + wait_for_disable); + + /* Put the clock domains in SW_SLEEP mode */ + for (i = 0; (i < max) && clk_domains[i]; i++) + disable_clock_domain(clk_domains[i]); +} + void prcm_init(void) { switch (omap_hw_init_context()) { diff --git a/arch/arm/include/asm/omap_common.h b/arch/arm/include/asm/omap_common.h index 5469435cc756..30e899ad46e3 100644 --- a/arch/arm/include/asm/omap_common.h +++ b/arch/arm/include/asm/omap_common.h @@ -575,6 +575,10 @@ void do_enable_clocks(u32 const *clk_domains, u32 const *clk_modules_explicit_en, u8 wait_for_enable);
+void do_disable_clocks(u32 const *clk_domains, + u32 const *clk_modules_disable, + u8 wait_for_disable); + void setup_post_dividers(u32 const base, const struct dpll_params *params); u32 omap_ddr_clk(void);

Adds functions to enable and disable edma3 clocks which can be invoked by drivers using edma3 to control the clocks.
Signed-off-by: Vignesh R vigneshr@ti.com --- arch/arm/cpu/armv7/omap5/hw_data.c | 41 ++++++++++++++++++++++++++++++++++++ arch/arm/cpu/armv7/omap5/prcm-regs.c | 4 ++++ arch/arm/include/asm/omap_common.h | 9 ++++++++ 3 files changed, 54 insertions(+)
diff --git a/arch/arm/cpu/armv7/omap5/hw_data.c b/arch/arm/cpu/armv7/omap5/hw_data.c index 3a723cace71a..33f92b7e225d 100644 --- a/arch/arm/cpu/armv7/omap5/hw_data.c +++ b/arch/arm/cpu/armv7/omap5/hw_data.c @@ -565,6 +565,47 @@ void enable_basic_uboot_clocks(void) 1); }
+#ifdef CONFIG_TI_EDMA3 +void enable_edma3_clocks(void) +{ + u32 const clk_domains_edma3[] = { + 0 + }; + + u32 const clk_modules_hw_auto_edma3[] = { + (*prcm)->cm_l3main1_tptc1_clkctrl, + (*prcm)->cm_l3main1_tptc2_clkctrl, + 0 + }; + + u32 const clk_modules_explicit_en_edma3[] = { + 0 + }; + + do_enable_clocks(clk_domains_edma3, + clk_modules_hw_auto_edma3, + clk_modules_explicit_en_edma3, + 1); +} + +void disable_edma3_clocks(void) +{ + u32 const clk_domains_edma3[] = { + 0 + }; + + u32 const clk_modules_disable_edma3[] = { + (*prcm)->cm_l3main1_tptc1_clkctrl, + (*prcm)->cm_l3main1_tptc2_clkctrl, + 0 + }; + + do_disable_clocks(clk_domains_edma3, + clk_modules_disable_edma3, + 1); +} +#endif + const struct ctrl_ioregs ioregs_omap5430 = { .ctrl_ddrch = DDR_IO_I_34OHM_SR_FASTEST_WD_DQ_NO_PULL_DQS_PULL_DOWN, .ctrl_lpddr2ch = DDR_IO_I_34OHM_SR_FASTEST_WD_CK_CKE_NCS_CA_PULL_DOWN, diff --git a/arch/arm/cpu/armv7/omap5/prcm-regs.c b/arch/arm/cpu/armv7/omap5/prcm-regs.c index cd51fe7678be..d01ce88306ee 100644 --- a/arch/arm/cpu/armv7/omap5/prcm-regs.c +++ b/arch/arm/cpu/armv7/omap5/prcm-regs.c @@ -989,4 +989,8 @@ struct prcm_regs const dra7xx_prcm = {
.prm_abbldo_mpu_setup = 0x4AE07DDC, .prm_abbldo_mpu_ctrl = 0x4AE07DE0, + + /*l3main1 edma*/ + .cm_l3main1_tptc1_clkctrl = 0x4a008778, + .cm_l3main1_tptc2_clkctrl = 0x4a008780, }; diff --git a/arch/arm/include/asm/omap_common.h b/arch/arm/include/asm/omap_common.h index 30e899ad46e3..1d758fc9050c 100644 --- a/arch/arm/include/asm/omap_common.h +++ b/arch/arm/include/asm/omap_common.h @@ -349,6 +349,10 @@ struct prcm_regs { /* IPU */ u32 cm_ipu_clkstctrl; u32 cm_ipu_i2c5_clkctrl; + + /*l3main1 edma*/ + u32 cm_l3main1_tptc1_clkctrl; + u32 cm_l3main1_tptc2_clkctrl; };
struct omap_sys_ctrl_regs { @@ -598,6 +602,11 @@ void recalibrate_iodelay(void);
void omap_smc1(u32 service, u32 val);
+#ifdef CONFIG_TI_EDMA3 +void enable_edma3_clocks(void); +void disable_edma3_clocks(void); +#endif + /* ABB */ #define OMAP_ABB_NOMINAL_OPP 0 #define OMAP_ABB_FAST_OPP 1

Adds functions to enable and disable edma3 clocks which can be invoked by drivers using edma3 to control the clocks.
Signed-off-by: Vignesh R vigneshr@ti.com --- arch/arm/cpu/armv7/am33xx/clock_am43xx.c | 36 ++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+)
diff --git a/arch/arm/cpu/armv7/am33xx/clock_am43xx.c b/arch/arm/cpu/armv7/am33xx/clock_am43xx.c index b1c0025eebe8..4a410a21952e 100644 --- a/arch/arm/cpu/armv7/am33xx/clock_am43xx.c +++ b/arch/arm/cpu/armv7/am33xx/clock_am43xx.c @@ -134,3 +134,39 @@ void enable_basic_clocks(void) /* For OPP100 the mac clock should be /5. */ writel(0x4, &cmdpll->clkselmacclk); } + +#ifdef CONFIG_TI_EDMA3 +void enable_edma3_clocks(void) +{ + u32 *const clk_domains_edma3[] = { + 0 + }; + + u32 *const clk_modules_explicit_en_edma3[] = { + &cmper->tpccclkctrl, + &cmper->tptc0clkctrl, + 0 + }; + + do_enable_clocks(clk_domains_edma3, + clk_modules_explicit_en_edma3, + 1); +} + +void disable_edma3_clocks(void) +{ + u32 *const clk_domains_edma3[] = { + 0 + }; + + u32 *const clk_modules_disable_edma3[] = { + &cmper->tpccclkctrl, + &cmper->tptc0clkctrl, + 0 + }; + + do_disable_clocks(clk_domains_edma3, + clk_modules_disable_edma3, + 1); +} +#endif

From: Tom Rini trini@ti.com
When doing a memory mapped copy we may have DMA available and thus need to have this copy abstracted so that the driver can do it, rather than a simple memcpy.
Signed-off-by: Vignesh R vigneshr@ti.com --- drivers/mtd/spi/sf_ops.c | 8 +++++++- include/spi.h | 3 +++ 2 files changed, 10 insertions(+), 1 deletion(-)
diff --git a/drivers/mtd/spi/sf_ops.c b/drivers/mtd/spi/sf_ops.c index 38592f518b72..900ec1f2a9ce 100644 --- a/drivers/mtd/spi/sf_ops.c +++ b/drivers/mtd/spi/sf_ops.c @@ -14,6 +14,7 @@ #include <spi.h> #include <spi_flash.h> #include <watchdog.h> +#include <linux/compiler.h>
#include "sf_internal.h"
@@ -378,6 +379,11 @@ int spi_flash_read_common(struct spi_flash *flash, const u8 *cmd, return ret; }
+void __weak spi_flash_copy_mmap(void *data, void *offset, size_t len) +{ + memcpy(data, offset, len); +} + int spi_flash_cmd_read_ops(struct spi_flash *flash, u32 offset, size_t len, void *data) { @@ -394,7 +400,7 @@ int spi_flash_cmd_read_ops(struct spi_flash *flash, u32 offset, return ret; } spi_xfer(flash->spi, 0, NULL, NULL, SPI_XFER_MMAP); - memcpy(data, flash->memory_map + offset, len); + spi_flash_copy_mmap(data, flash->memory_map + offset, len); spi_xfer(flash->spi, 0, NULL, NULL, SPI_XFER_MMAP_END); spi_release_bus(flash->spi); return 0; diff --git a/include/spi.h b/include/spi.h index 18362364cf0e..51fdfd6d7360 100644 --- a/include/spi.h +++ b/include/spi.h @@ -272,6 +272,9 @@ int spi_set_wordlen(struct spi_slave *slave, unsigned int wordlen); int spi_xfer(struct spi_slave *slave, unsigned int bitlen, const void *dout, void *din, unsigned long flags);
+/* Copy memory mapped data */ +void spi_flash_copy_mmap(void *data, void *offset, size_t len); + /** * Determine if a SPI chipselect is valid. * This function is provided by the board if the low-level SPI driver

ti_qspi uses memory map mode for faster read. Enabling DMA will increase read speed by 3x @48MHz on DRA74 EVM.
Signed-off-by: Vignesh R vigneshr@ti.com --- drivers/spi/ti_qspi.c | 93 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+)
diff --git a/drivers/spi/ti_qspi.c b/drivers/spi/ti_qspi.c index 3356c0f072e5..0c533e295962 100644 --- a/drivers/spi/ti_qspi.c +++ b/drivers/spi/ti_qspi.c @@ -13,6 +13,8 @@ #include <spi.h> #include <asm/gpio.h> #include <asm/omap_gpio.h> +#include <asm/omap_common.h> +#include <asm/ti-common/ti-edma3.h>
/* ti qpsi register bit masks */ #define QSPI_TIMEOUT 2000000 @@ -347,3 +349,94 @@ int spi_xfer(struct spi_slave *slave, unsigned int bitlen, const void *dout,
return 0; } +#ifdef CONFIG_TI_EDMA3 +void spi_flash_copy_mmap(void *data, void *offset, size_t len) +{ + struct edma3_slot_config slot; + struct edma3_channel_config edma_channel; + int b_cnt_value = 1; + int rem_bytes = 0; + int a_cnt_value = len; + unsigned int addr = (unsigned int) (data); + unsigned int max_acnt = 0x7FFFU; + unsigned int edma_slot_num = 1; + + if (len > max_acnt) { + b_cnt_value = (len / max_acnt); + rem_bytes = (len % max_acnt); + a_cnt_value = max_acnt; + } + + /* Invalidate the area, so no writeback into the RAM races with DMA */ + invalidate_dcache_range(addr, addr + roundup(len, ARCH_DMA_MINALIGN)); + + /* enable edma3 clocks */ + enable_edma3_clocks(); + /* Compute QSPI address and size */ + slot.opt = 0; + slot.src = ((unsigned int) offset); + slot.acnt = a_cnt_value; + slot.bcnt = b_cnt_value; + slot.ccnt = 1; + slot.src_bidx = a_cnt_value; + slot.dst_bidx = a_cnt_value; + slot.src_cidx = 0; + slot.dst_cidx = 0; + slot.link = EDMA3_PARSET_NULL_LINK; + slot.bcntrld = 0; + slot.opt = EDMA3_SLOPT_TRANS_COMP_INT_ENB | + EDMA3_SLOPT_COMP_CODE(0) | + EDMA3_SLOPT_STATIC | EDMA3_SLOPT_AB_SYNC; + + edma3_slot_configure(EDMA3_BASE, edma_slot_num, &slot); + edma_channel.slot = edma_slot_num; + edma_channel.chnum = 0; + edma_channel.complete_code = 0; + /* set event trigger to dst update */ + edma_channel.trigger_slot_word = EDMA3_TWORD(dst); + + qedma3_start(EDMA3_BASE, &edma_channel); + edma3_set_dest_addr(EDMA3_BASE, edma_channel.slot, addr); + + while (edma3_check_for_transfer(EDMA3_BASE, &edma_channel)) + ; + qedma3_stop(EDMA3_BASE, &edma_channel); + + if (rem_bytes != 0) { + /* Compute QSPI address and size */ + slot.opt = 0; + slot.src = + (b_cnt_value * max_acnt) + ((unsigned int) offset); + slot.acnt = rem_bytes; + slot.bcnt = 1; + slot.ccnt = 1; + slot.src_bidx = rem_bytes; + slot.dst_bidx = rem_bytes; + slot.src_cidx = 0; + slot.dst_cidx = 0; + slot.link = EDMA3_PARSET_NULL_LINK; + slot.bcntrld = 0; + slot.opt = EDMA3_SLOPT_TRANS_COMP_INT_ENB | + EDMA3_SLOPT_COMP_CODE(0) | + EDMA3_SLOPT_STATIC | EDMA3_SLOPT_AB_SYNC; + edma3_slot_configure(EDMA3_BASE, edma_slot_num, &slot); + edma_channel.slot = edma_slot_num; + edma_channel.chnum = 0; + edma_channel.complete_code = 0; + /* set event trigger to dst update */ + edma_channel.trigger_slot_word = EDMA3_TWORD(dst); + + qedma3_start(EDMA3_BASE, &edma_channel); + edma3_set_dest_addr(EDMA3_BASE, edma_channel.slot, addr + + (max_acnt * b_cnt_value)); + + while (edma3_check_for_transfer(EDMA3_BASE, &edma_channel)) + ; + qedma3_stop(EDMA3_BASE, &edma_channel); + } + *((unsigned int *)offset) += len; + + /* disable edma3 clocks */ + disable_edma3_clocks(); +} +#endif

On Fri, Jul 03, 2015 at 04:46:10PM +0530, Vignesh R wrote:
ti_qspi uses memory map mode for faster read. Enabling DMA will increase read speed by 3x @48MHz on DRA74 EVM.
Signed-off-by: Vignesh R vigneshr@ti.com
This ignores the feedback from http://lists.denx.de/pipermail/u-boot/2014-July/183715.html where we need to model the DMA changes on how it's done for mxs_spi.c

On 7/3/2015 5:12 PM, Tom Rini wrote:
On Fri, Jul 03, 2015 at 04:46:10PM +0530, Vignesh R wrote:
ti_qspi uses memory map mode for faster read. Enabling DMA will increase read speed by 3x @48MHz on DRA74 EVM.
Signed-off-by: Vignesh R vigneshr@ti.com
This ignores the feedback from http://lists.denx.de/pipermail/u-boot/2014-July/183715.html where we need to model the DMA changes on how it's done for mxs_spi.c
Sorry.. I didn't look into that before. mxs_spi uses peripheral DMA to read/write flash. But ti_qspi can use DMA to read from flash in mmap mode only. In current u-boot, defining CONFIG_TI_SPI_MMAP will make memory map address available (spi_flash->memory_map) to sf layer and spi_flash_cmd_read_ops() (in sf_ops.c) directly calls memcpy() to read data from flash into buffer. There is no spi_xfer() call to the ti_qspi driver at all.
In order to implement mxs_spi like approach for ti_qspi.c, I can delete mmap handling in sf_ops.c( I don't think any other spi driver uses this part of code), so that spi_xfer() is always called. And then, in spi_xfer() implementation of ti_qspi, I can do DMA transfer similar to mxs_spi.c. Is this approach ok?
And are you ok with patch 1 and 2 of this series?
Regards Vignesh

On Saturday 04 July 2015 06:23 PM, R, Vignesh wrote:
On 7/3/2015 5:12 PM, Tom Rini wrote:
On Fri, Jul 03, 2015 at 04:46:10PM +0530, Vignesh R wrote:
ti_qspi uses memory map mode for faster read. Enabling DMA will increase read speed by 3x @48MHz on DRA74 EVM.
Signed-off-by: Vignesh R vigneshr@ti.com
This ignores the feedback from http://lists.denx.de/pipermail/u-boot/2014-July/183715.html where we need to model the DMA changes on how it's done for mxs_spi.c
Sorry.. I didn't look into that before. mxs_spi uses peripheral DMA to read/write flash. But ti_qspi can use DMA to read from flash in mmap mode only. In current u-boot, defining CONFIG_TI_SPI_MMAP will make memory map address available (spi_flash->memory_map) to sf layer and spi_flash_cmd_read_ops() (in sf_ops.c) directly calls memcpy() to read data from flash into buffer. There is no spi_xfer() call to the ti_qspi driver at all.
In order to implement mxs_spi like approach for ti_qspi.c, I can delete mmap handling in sf_ops.c( I don't think any other spi driver uses this part of code), so that spi_xfer() is always called. And then, in spi_xfer() implementation of ti_qspi, I can do DMA transfer similar to mxs_spi.c. Is this approach ok?
I think I misinterpreted the thread previously. The suggestion is to move DMA initialization related code from to ti-edma3.c and use spi_flash_copy_mmap() just to pass addresses to ti-edma3 apis. Am I correct?

On 07/03/2015 05:12 PM, Tom Rini wrote:
On Fri, Jul 03, 2015 at 04:46:10PM +0530, Vignesh R wrote:
ti_qspi uses memory map mode for faster read. Enabling DMA will increase read speed by 3x @48MHz on DRA74 EVM.
Signed-off-by: Vignesh R vigneshr@ti.com
This ignores the feedback from http://lists.denx.de/pipermail/u-boot/2014-July/183715.html where we need to model the DMA changes on how it's done for mxs_spi.c
Is the following patch an acceptable solution?
8<-------------------------------------------------------------------
Move DMA related initialization code to helper function in ti-edma3 driver. Use this function for scheduling DMA transfer from ti_qspi driver.
diff --git a/arch/arm/include/asm/ti-common/ti-edma3.h b/arch/arm/include/asm/ti-common/ti-edma3.h index 5adc1dac0e65..6a7a321c1bdf 100644 --- a/arch/arm/include/asm/ti-common/ti-edma3.h +++ b/arch/arm/include/asm/ti-common/ti-edma3.h @@ -117,5 +117,7 @@ void edma3_set_src_addr(u32 base, int slot, u32 src); void edma3_set_transfer_params(u32 base, int slot, int acnt, int bcnt, int ccnt, u16 bcnt_rld, enum edma3_sync_dimension sync_mode); +void edma3_transfer(unsigned long edma3_base_addr, unsigned int + edma_slot_num, void *dst, void *src, size_t len);
#endif diff --git a/drivers/dma/ti-edma3.c b/drivers/dma/ti-edma3.c index 8184ded9fa81..d6a427f2e21d 100644 --- a/drivers/dma/ti-edma3.c +++ b/drivers/dma/ti-edma3.c @@ -382,3 +382,81 @@ void qedma3_stop(u32 base, struct edma3_channel_config *cfg) /* Clear the channel map */ __raw_writel(0, base + EDMA3_QCHMAP(cfg->chnum)); } + +void edma3_transfer(unsigned long edma3_base_addr, unsigned int + edma_slot_num, void *dst, void *src, size_t len) +{ + struct edma3_slot_config slot; + struct edma3_channel_config edma_channel; + int b_cnt_value = 1; + int rem_bytes = 0; + int a_cnt_value = len; + unsigned int addr = (unsigned int) (dst); + unsigned int max_acnt = 0x7FFFU; + + if (len > max_acnt) { + b_cnt_value = (len / max_acnt); + rem_bytes = (len % max_acnt); + a_cnt_value = max_acnt; + } + + slot.opt = 0; + slot.src = ((unsigned int) src); + slot.acnt = a_cnt_value; + slot.bcnt = b_cnt_value; + slot.ccnt = 1; + slot.src_bidx = a_cnt_value; + slot.dst_bidx = a_cnt_value; + slot.src_cidx = 0; + slot.dst_cidx = 0; + slot.link = EDMA3_PARSET_NULL_LINK; + slot.bcntrld = 0; + slot.opt = EDMA3_SLOPT_TRANS_COMP_INT_ENB | + EDMA3_SLOPT_COMP_CODE(0) | + EDMA3_SLOPT_STATIC | EDMA3_SLOPT_AB_SYNC; + + edma3_slot_configure(edma3_base_addr, edma_slot_num, &slot); + edma_channel.slot = edma_slot_num; + edma_channel.chnum = 0; + edma_channel.complete_code = 0; + /* set event trigger to dst update */ + edma_channel.trigger_slot_word = EDMA3_TWORD(dst); + + qedma3_start(edma3_base_addr, &edma_channel); + edma3_set_dest_addr(edma3_base_addr, edma_channel.slot, addr); + + while (edma3_check_for_transfer(edma3_base_addr, &edma_channel)) + ; + qedma3_stop(edma3_base_addr, &edma_channel); + + if (rem_bytes != 0) { + slot.opt = 0; + slot.src = + (b_cnt_value * max_acnt) + ((unsigned int) src); + slot.acnt = rem_bytes; + slot.bcnt = 1; + slot.ccnt = 1; + slot.src_bidx = rem_bytes; + slot.dst_bidx = rem_bytes; + slot.src_cidx = 0; + slot.dst_cidx = 0; + slot.link = EDMA3_PARSET_NULL_LINK; + slot.bcntrld = 0; + slot.opt = EDMA3_SLOPT_TRANS_COMP_INT_ENB | + EDMA3_SLOPT_COMP_CODE(0) | + EDMA3_SLOPT_STATIC | EDMA3_SLOPT_AB_SYNC; + edma3_slot_configure(edma3_base_addr, edma_slot_num, &slot); + edma_channel.slot = edma_slot_num; + edma_channel.chnum = 0; + edma_channel.complete_code = 0; + /* set event trigger to dst update */ + edma_channel.trigger_slot_word = EDMA3_TWORD(dst); + + qedma3_start(edma3_base_addr, &edma_channel); + edma3_set_dest_addr(edma3_base_addr, edma_channel.slot, addr + + (max_acnt * b_cnt_value)); + while (edma3_check_for_transfer(edma3_base_addr, &edma_channel)) + ; + qedma3_stop(edma3_base_addr, &edma_channel); + } +} diff --git a/drivers/spi/ti_qspi.c b/drivers/spi/ti_qspi.c index 3356c0f072e5..753d68980bd6 100644 --- a/drivers/spi/ti_qspi.c +++ b/drivers/spi/ti_qspi.c @@ -13,6 +13,8 @@ #include <spi.h> #include <asm/gpio.h> #include <asm/omap_gpio.h> +#include <asm/omap_common.h> +#include <asm/ti-common/ti-edma3.h>
/* ti qpsi register bit masks */ #define QSPI_TIMEOUT 2000000 @@ -347,3 +349,24 @@ int spi_xfer(struct spi_slave *slave, unsigned int bitlen, const void *dout,
return 0; } +#ifdef CONFIG_TI_EDMA3 +void spi_flash_copy_mmap(void *data, void *offset, size_t len) +{ + unsigned int addr = (unsigned int) (data); + unsigned int edma_slot_num = 1; + + /* Invalidate the area, so no writeback into the RAM races with DMA */ + invalidate_dcache_range(addr, addr + roundup(len, ARCH_DMA_MINALIGN)); + + /* enable edma3 clocks */ + enable_edma3_clocks(); + + /* Call edma3 api to do actual DMA transfer */ + edma3_transfer(EDMA3_BASE, edma_slot_num, data, offset, len); + + /* disable edma3 clocks */ + disable_edma3_clocks(); + + *((unsigned int *)offset) += len; +} +#endif

On Thu, Jul 09, 2015 at 12:10:03PM +0530, Vignesh R wrote:
On 07/03/2015 05:12 PM, Tom Rini wrote:
On Fri, Jul 03, 2015 at 04:46:10PM +0530, Vignesh R wrote:
ti_qspi uses memory map mode for faster read. Enabling DMA will increase read speed by 3x @48MHz on DRA74 EVM.
Signed-off-by: Vignesh R vigneshr@ti.com
This ignores the feedback from http://lists.denx.de/pipermail/u-boot/2014-July/183715.html where we need to model the DMA changes on how it's done for mxs_spi.c
Is the following patch an acceptable solution?
Jagan, are you OK with the SPI side of this? Thanks!
8<-------------------------------------------------------------------
Move DMA related initialization code to helper function in ti-edma3 driver. Use this function for scheduling DMA transfer from ti_qspi driver.
diff --git a/arch/arm/include/asm/ti-common/ti-edma3.h b/arch/arm/include/asm/ti-common/ti-edma3.h index 5adc1dac0e65..6a7a321c1bdf 100644 --- a/arch/arm/include/asm/ti-common/ti-edma3.h +++ b/arch/arm/include/asm/ti-common/ti-edma3.h @@ -117,5 +117,7 @@ void edma3_set_src_addr(u32 base, int slot, u32 src); void edma3_set_transfer_params(u32 base, int slot, int acnt, int bcnt, int ccnt, u16 bcnt_rld, enum edma3_sync_dimension sync_mode); +void edma3_transfer(unsigned long edma3_base_addr, unsigned int
edma_slot_num, void *dst, void *src, size_t len);
#endif diff --git a/drivers/dma/ti-edma3.c b/drivers/dma/ti-edma3.c index 8184ded9fa81..d6a427f2e21d 100644 --- a/drivers/dma/ti-edma3.c +++ b/drivers/dma/ti-edma3.c @@ -382,3 +382,81 @@ void qedma3_stop(u32 base, struct edma3_channel_config *cfg) /* Clear the channel map */ __raw_writel(0, base + EDMA3_QCHMAP(cfg->chnum)); }
+void edma3_transfer(unsigned long edma3_base_addr, unsigned int
edma_slot_num, void *dst, void *src, size_t len)
+{
- struct edma3_slot_config slot;
- struct edma3_channel_config edma_channel;
- int b_cnt_value = 1;
- int rem_bytes = 0;
- int a_cnt_value = len;
- unsigned int addr = (unsigned int) (dst);
- unsigned int max_acnt = 0x7FFFU;
- if (len > max_acnt) {
b_cnt_value = (len / max_acnt);
rem_bytes = (len % max_acnt);
a_cnt_value = max_acnt;
- }
- slot.opt = 0;
- slot.src = ((unsigned int) src);
- slot.acnt = a_cnt_value;
- slot.bcnt = b_cnt_value;
- slot.ccnt = 1;
- slot.src_bidx = a_cnt_value;
- slot.dst_bidx = a_cnt_value;
- slot.src_cidx = 0;
- slot.dst_cidx = 0;
- slot.link = EDMA3_PARSET_NULL_LINK;
- slot.bcntrld = 0;
- slot.opt = EDMA3_SLOPT_TRANS_COMP_INT_ENB |
EDMA3_SLOPT_COMP_CODE(0) |
EDMA3_SLOPT_STATIC | EDMA3_SLOPT_AB_SYNC;
- edma3_slot_configure(edma3_base_addr, edma_slot_num, &slot);
- edma_channel.slot = edma_slot_num;
- edma_channel.chnum = 0;
- edma_channel.complete_code = 0;
/* set event trigger to dst update */
- edma_channel.trigger_slot_word = EDMA3_TWORD(dst);
- qedma3_start(edma3_base_addr, &edma_channel);
- edma3_set_dest_addr(edma3_base_addr, edma_channel.slot, addr);
- while (edma3_check_for_transfer(edma3_base_addr, &edma_channel))
;
- qedma3_stop(edma3_base_addr, &edma_channel);
- if (rem_bytes != 0) {
slot.opt = 0;
slot.src =
(b_cnt_value * max_acnt) + ((unsigned int) src);
slot.acnt = rem_bytes;
slot.bcnt = 1;
slot.ccnt = 1;
slot.src_bidx = rem_bytes;
slot.dst_bidx = rem_bytes;
slot.src_cidx = 0;
slot.dst_cidx = 0;
slot.link = EDMA3_PARSET_NULL_LINK;
slot.bcntrld = 0;
slot.opt = EDMA3_SLOPT_TRANS_COMP_INT_ENB |
EDMA3_SLOPT_COMP_CODE(0) |
EDMA3_SLOPT_STATIC | EDMA3_SLOPT_AB_SYNC;
edma3_slot_configure(edma3_base_addr, edma_slot_num, &slot);
edma_channel.slot = edma_slot_num;
edma_channel.chnum = 0;
edma_channel.complete_code = 0;
/* set event trigger to dst update */
edma_channel.trigger_slot_word = EDMA3_TWORD(dst);
qedma3_start(edma3_base_addr, &edma_channel);
edma3_set_dest_addr(edma3_base_addr, edma_channel.slot, addr +
(max_acnt * b_cnt_value));
while (edma3_check_for_transfer(edma3_base_addr, &edma_channel))
;
qedma3_stop(edma3_base_addr, &edma_channel);
- }
+} diff --git a/drivers/spi/ti_qspi.c b/drivers/spi/ti_qspi.c index 3356c0f072e5..753d68980bd6 100644 --- a/drivers/spi/ti_qspi.c +++ b/drivers/spi/ti_qspi.c @@ -13,6 +13,8 @@ #include <spi.h> #include <asm/gpio.h> #include <asm/omap_gpio.h> +#include <asm/omap_common.h> +#include <asm/ti-common/ti-edma3.h>
/* ti qpsi register bit masks */ #define QSPI_TIMEOUT 2000000 @@ -347,3 +349,24 @@ int spi_xfer(struct spi_slave *slave, unsigned int bitlen, const void *dout,
return 0; } +#ifdef CONFIG_TI_EDMA3 +void spi_flash_copy_mmap(void *data, void *offset, size_t len) +{
- unsigned int addr = (unsigned int) (data);
- unsigned int edma_slot_num = 1;
- /* Invalidate the area, so no writeback into the RAM races with DMA */
- invalidate_dcache_range(addr, addr + roundup(len, ARCH_DMA_MINALIGN));
- /* enable edma3 clocks */
- enable_edma3_clocks();
- /* Call edma3 api to do actual DMA transfer */
- edma3_transfer(EDMA3_BASE, edma_slot_num, data, offset, len);
- /* disable edma3 clocks */
- disable_edma3_clocks();
- *((unsigned int *)offset) += len;
+} +#endif
-- Regards Vignesh

On 7/15/2015 12:32 AM, Tom Rini wrote:
On Thu, Jul 09, 2015 at 12:10:03PM +0530, Vignesh R wrote:
On 07/03/2015 05:12 PM, Tom Rini wrote:
On Fri, Jul 03, 2015 at 04:46:10PM +0530, Vignesh R wrote:
ti_qspi uses memory map mode for faster read. Enabling DMA will increase read speed by 3x @48MHz on DRA74 EVM.
Signed-off-by: Vignesh R vigneshr@ti.com
This ignores the feedback from http://lists.denx.de/pipermail/u-boot/2014-July/183715.html where we need to model the DMA changes on how it's done for mxs_spi.c
Is the following patch an acceptable solution?
Jagan, are you OK with the SPI side of this? Thanks!
Gentle ping... Any comments? I will send a v2 for this series if the below patch is acceptable.
8<-------------------------------------------------------------------
Move DMA related initialization code to helper function in ti-edma3 driver. Use this function for scheduling DMA transfer from ti_qspi driver.
diff --git a/arch/arm/include/asm/ti-common/ti-edma3.h b/arch/arm/include/asm/ti-common/ti-edma3.h index 5adc1dac0e65..6a7a321c1bdf 100644 --- a/arch/arm/include/asm/ti-common/ti-edma3.h +++ b/arch/arm/include/asm/ti-common/ti-edma3.h @@ -117,5 +117,7 @@ void edma3_set_src_addr(u32 base, int slot, u32 src); void edma3_set_transfer_params(u32 base, int slot, int acnt, int bcnt, int ccnt, u16 bcnt_rld, enum edma3_sync_dimension sync_mode); +void edma3_transfer(unsigned long edma3_base_addr, unsigned int
edma_slot_num, void *dst, void *src, size_t len);
#endif diff --git a/drivers/dma/ti-edma3.c b/drivers/dma/ti-edma3.c index 8184ded9fa81..d6a427f2e21d 100644 --- a/drivers/dma/ti-edma3.c +++ b/drivers/dma/ti-edma3.c @@ -382,3 +382,81 @@ void qedma3_stop(u32 base, struct edma3_channel_config *cfg) /* Clear the channel map */ __raw_writel(0, base + EDMA3_QCHMAP(cfg->chnum)); }
+void edma3_transfer(unsigned long edma3_base_addr, unsigned int
edma_slot_num, void *dst, void *src, size_t len)
+{
- struct edma3_slot_config slot;
- struct edma3_channel_config edma_channel;
- int b_cnt_value = 1;
- int rem_bytes = 0;
- int a_cnt_value = len;
- unsigned int addr = (unsigned int) (dst);
- unsigned int max_acnt = 0x7FFFU;
- if (len > max_acnt) {
b_cnt_value = (len / max_acnt);
rem_bytes = (len % max_acnt);
a_cnt_value = max_acnt;
- }
- slot.opt = 0;
- slot.src = ((unsigned int) src);
- slot.acnt = a_cnt_value;
- slot.bcnt = b_cnt_value;
- slot.ccnt = 1;
- slot.src_bidx = a_cnt_value;
- slot.dst_bidx = a_cnt_value;
- slot.src_cidx = 0;
- slot.dst_cidx = 0;
- slot.link = EDMA3_PARSET_NULL_LINK;
- slot.bcntrld = 0;
- slot.opt = EDMA3_SLOPT_TRANS_COMP_INT_ENB |
EDMA3_SLOPT_COMP_CODE(0) |
EDMA3_SLOPT_STATIC | EDMA3_SLOPT_AB_SYNC;
- edma3_slot_configure(edma3_base_addr, edma_slot_num, &slot);
- edma_channel.slot = edma_slot_num;
- edma_channel.chnum = 0;
- edma_channel.complete_code = 0;
/* set event trigger to dst update */
- edma_channel.trigger_slot_word = EDMA3_TWORD(dst);
- qedma3_start(edma3_base_addr, &edma_channel);
- edma3_set_dest_addr(edma3_base_addr, edma_channel.slot, addr);
- while (edma3_check_for_transfer(edma3_base_addr, &edma_channel))
;
- qedma3_stop(edma3_base_addr, &edma_channel);
- if (rem_bytes != 0) {
slot.opt = 0;
slot.src =
(b_cnt_value * max_acnt) + ((unsigned int) src);
slot.acnt = rem_bytes;
slot.bcnt = 1;
slot.ccnt = 1;
slot.src_bidx = rem_bytes;
slot.dst_bidx = rem_bytes;
slot.src_cidx = 0;
slot.dst_cidx = 0;
slot.link = EDMA3_PARSET_NULL_LINK;
slot.bcntrld = 0;
slot.opt = EDMA3_SLOPT_TRANS_COMP_INT_ENB |
EDMA3_SLOPT_COMP_CODE(0) |
EDMA3_SLOPT_STATIC | EDMA3_SLOPT_AB_SYNC;
edma3_slot_configure(edma3_base_addr, edma_slot_num, &slot);
edma_channel.slot = edma_slot_num;
edma_channel.chnum = 0;
edma_channel.complete_code = 0;
/* set event trigger to dst update */
edma_channel.trigger_slot_word = EDMA3_TWORD(dst);
qedma3_start(edma3_base_addr, &edma_channel);
edma3_set_dest_addr(edma3_base_addr, edma_channel.slot, addr +
(max_acnt * b_cnt_value));
while (edma3_check_for_transfer(edma3_base_addr, &edma_channel))
;
qedma3_stop(edma3_base_addr, &edma_channel);
- }
+} diff --git a/drivers/spi/ti_qspi.c b/drivers/spi/ti_qspi.c index 3356c0f072e5..753d68980bd6 100644 --- a/drivers/spi/ti_qspi.c +++ b/drivers/spi/ti_qspi.c @@ -13,6 +13,8 @@ #include <spi.h> #include <asm/gpio.h> #include <asm/omap_gpio.h> +#include <asm/omap_common.h> +#include <asm/ti-common/ti-edma3.h>
/* ti qpsi register bit masks */ #define QSPI_TIMEOUT 2000000 @@ -347,3 +349,24 @@ int spi_xfer(struct spi_slave *slave, unsigned int bitlen, const void *dout,
return 0; } +#ifdef CONFIG_TI_EDMA3 +void spi_flash_copy_mmap(void *data, void *offset, size_t len) +{
- unsigned int addr = (unsigned int) (data);
- unsigned int edma_slot_num = 1;
- /* Invalidate the area, so no writeback into the RAM races with DMA */
- invalidate_dcache_range(addr, addr + roundup(len, ARCH_DMA_MINALIGN));
- /* enable edma3 clocks */
- enable_edma3_clocks();
- /* Call edma3 api to do actual DMA transfer */
- edma3_transfer(EDMA3_BASE, edma_slot_num, data, offset, len);
- /* disable edma3 clocks */
- disable_edma3_clocks();
- *((unsigned int *)offset) += len;
+} +#endif
-- Regards Vignesh

Add BIT(x) macro definition to edma3 driver file. Fixes following compiler warning when CONFIG_TI_EDMA3 is enabled for am437x and dra7x:
drivers/dma/ti-edma3.c: In function ‘edma3_set_dest’: drivers/dma/ti-edma3.c:92:10: warning: implicit declaration of function ‘BIT’ [-Wimplicit-function-declaration]
Signed-off-by: Vignesh R vigneshr@ti.com --- drivers/dma/ti-edma3.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/drivers/dma/ti-edma3.c b/drivers/dma/ti-edma3.c index 8184ded9fa81..638cf525a278 100644 --- a/drivers/dma/ti-edma3.c +++ b/drivers/dma/ti-edma3.c @@ -13,6 +13,8 @@ #include <common.h> #include <asm/ti-common/ti-edma3.h>
+#define BIT(x) (1 << (x)) + #define EDMA3_SL_BASE(slot) (0x4000 + ((slot) << 5)) #define EDMA3_SL_MAX_NUM 512 #define EDMA3_SLOPT_FIFO_WIDTH_MASK (0x7 << 8)

Vignesh wrote...
[snip]
+#define BIT(x) (1 << (x))
Is this not something that would be better in a global header file somewhere rather than it starting a trend of a per-driver, per-arch, etc. definitions?
Andy.

Enable TI_EDMA3 and SPL_DMA support, so as to reduce boot time. With DMA enabled there is almost 3x improvement in read performance. This helps in reducing boot time in qspiboot mode
Also add EDMA3 base address for DRA7XX and AM57XX.
Signed-off-by: Vignesh R vigneshr@ti.com --- arch/arm/include/asm/arch-omap5/hardware.h | 5 +++++ include/configs/dra7xx_evm.h | 2 ++ 2 files changed, 7 insertions(+)
diff --git a/arch/arm/include/asm/arch-omap5/hardware.h b/arch/arm/include/asm/arch-omap5/hardware.h index f7011b4e904a..a5bd60009289 100644 --- a/arch/arm/include/asm/arch-omap5/hardware.h +++ b/arch/arm/include/asm/arch-omap5/hardware.h @@ -23,4 +23,9 @@ /* GPMC Base address */ #define GPMC_BASE 0x50000000
+/* EDMA3 Base address for DRA7XX and AM57XX */ +#if defined(CONFIG_DRA7XX) || defined(CONFIG_AM57XX) +#define EDMA3_BASE 0x43300000 +#endif + #endif diff --git a/include/configs/dra7xx_evm.h b/include/configs/dra7xx_evm.h index d84427d1d8bc..fe09aac27f71 100644 --- a/include/configs/dra7xx_evm.h +++ b/include/configs/dra7xx_evm.h @@ -165,6 +165,8 @@
/* SPI SPL */ #define CONFIG_SPL_SPI_SUPPORT +#define CONFIG_SPL_DMA_SUPPORT +#define CONFIG_TI_EDMA3 #define CONFIG_SPL_SPI_LOAD #define CONFIG_SPL_SPI_FLASH_SUPPORT #define CONFIG_SYS_SPI_U_BOOT_OFFS 0x40000

Enable CONFIG_TI_EDMA3 for am43xx, this increases read performance by 1.5x.
Also add EDMA3 base address for am43xx.
Signed-off-by: Vignesh R vigneshr@ti.com --- arch/arm/include/asm/arch-am33xx/hardware_am43xx.h | 3 +++ include/configs/am43xx_evm.h | 2 ++ 2 files changed, 5 insertions(+)
diff --git a/arch/arm/include/asm/arch-am33xx/hardware_am43xx.h b/arch/arm/include/asm/arch-am33xx/hardware_am43xx.h index 479893e47ea1..a7da6b5cfde6 100644 --- a/arch/arm/include/asm/arch-am33xx/hardware_am43xx.h +++ b/arch/arm/include/asm/arch-am33xx/hardware_am43xx.h @@ -98,4 +98,7 @@
#define NUM_CRYSTAL_FREQ 0x4
+/* EDMA3 Base Address */ +#define EDMA3_BASE 0x49000000 + #endif /* __AM43XX_HARDWARE_AM43XX_H */ diff --git a/include/configs/am43xx_evm.h b/include/configs/am43xx_evm.h index 33e534a76583..5f733ea9905b 100644 --- a/include/configs/am43xx_evm.h +++ b/include/configs/am43xx_evm.h @@ -208,6 +208,8 @@ #define CONFIG_SF_DEFAULT_SPEED 48000000 #define CONFIG_DEFAULT_SPI_MODE SPI_MODE_3
+#define CONFIG_TI_EDMA3 + /* Enhance our eMMC support / experience. */ #define CONFIG_CMD_GPT #define CONFIG_EFI_PARTITION
participants (4)
-
Andy Pont
-
R, Vignesh
-
Tom Rini
-
Vignesh R