[U-Boot] [PATCH 0/9] iMX/MXS: Get ready for cache

This series of patches prepares iMX platforms based on MXS (i.MX28, i.MX6Q) for use of caches. This patchset prepares:
Ethernet: FEC MXC is ready to use caches NAND: NAND driver is ready to use caches SPI/MMC: i.MX28 MMC driver is ready to use caches
Eric Nelson (2): net: force PKTALIGN to ARCH_DMA_MINALIGN net: fec_mxc: allow use with cache enabled
Marek Vasut (7): ARM926EJS: Implement cache operations i.MX28: Add cache support into the APBH DMA driver i.MX28: Add cache support to MXS NAND driver MMC: Implement generic bounce buffer i.MX28: Do data transfers via DMA in MMC driver i.MX28: Make use of the bounce buffer i.MX28: Enable caches by default
arch/arm/cpu/arm926ejs/cache.c | 66 +++++++-- arch/arm/cpu/arm926ejs/mx28/mx28.c | 10 ++ drivers/dma/apbh_dma.c | 23 +++- drivers/mmc/mmc.c | 102 +++++++++++++- drivers/mmc/mxsmmc.c | 70 +++++---- drivers/mtd/nand/mxs_nand.c | 53 +++++++- drivers/net/fec_mxc.c | 277 +++++++++++++++++++++++++----------- drivers/net/fec_mxc.h | 19 +--- include/configs/m28evk.h | 1 + include/configs/mx28evk.h | 1 + include/net.h | 3 +- 11 files changed, 471 insertions(+), 154 deletions(-)
Cc: Andy Fleming afleming@gmail.com Cc: Fabio Estevam festevam@gmail.com Cc: Stefano Babic sbabic@denx.de Cc: Eric Nelson eric.nelson@boundarydevices.com

Signed-off-by: Marek Vasut marex@denx.de Cc: Stefano Babic sbabic@denx.de --- arch/arm/cpu/arm926ejs/cache.c | 66 ++++++++++++++++++++++++++++++++------- 1 files changed, 54 insertions(+), 12 deletions(-)
diff --git a/arch/arm/cpu/arm926ejs/cache.c b/arch/arm/cpu/arm926ejs/cache.c index 504f604..5b23e3a 100644 --- a/arch/arm/cpu/arm926ejs/cache.c +++ b/arch/arm/cpu/arm926ejs/cache.c @@ -23,29 +23,71 @@ #include <common.h>
#ifndef CONFIG_SYS_DCACHE_OFF -static inline void dcache_noop(void) + +#ifndef CONFIG_SYS_CACHELINE_SIZE +#define CONFIG_SYS_CACHELINE_SIZE 32 +#endif + +void invalidate_dcache_all(void) { - if (dcache_status()) { - puts("WARNING: cache operations are not implemented!\n" - "WARNING: disabling D-Cache now, you can re-enable it" - "later with 'dcache on' command\n"); - dcache_disable(); - } + asm volatile("mcr p15, 0, %0, c7, c6, 0\n"::"r"(0)); }
-void invalidate_dcache_all(void) +void flush_dcache_all(void) { - dcache_noop(); + asm volatile( + "0:" + "mrc p15, 0, r15, c7, c14, 3\n" + "bne 0b\n" + "mcr p15, 0, %0, c7, c10, 4\n" + ::"r"(0):"memory" + ); +} + +static int check_cache_range(unsigned long start, unsigned long stop) +{ + int ok = 1; + + if (start & (CONFIG_SYS_CACHELINE_SIZE - 1)) + ok = 0; + + if (stop & (CONFIG_SYS_CACHELINE_SIZE - 1)) + ok = 0; + + if (!ok) + printf("CACHE: Misaligned operation at range [%08lx, %08lx]\n", + start, stop); + + return ok; }
void invalidate_dcache_range(unsigned long start, unsigned long stop) { - dcache_noop(); + if (!check_cache_range(start, stop)) + return; + + while (start < stop) { + asm volatile("mcr p15, 0, %0, c7, c6, 1\n"::"r"(start)); + start += CONFIG_SYS_CACHELINE_SIZE; + } }
void flush_dcache_range(unsigned long start, unsigned long stop) { - dcache_noop(); + if (!check_cache_range(start, stop)) + return; + + while (start < stop) { + asm volatile("mcr p15, 0, %0, c7, c14, 1\n"::"r"(start)); + start += CONFIG_SYS_CACHELINE_SIZE; + } + + asm("mcr p15, 0, %0, c7, c10, 4\n"::"r"(0)); +} + +void flush_cache(unsigned long start, unsigned long size) +{ + flush_dcache_range(start, start + size); } #else /* #ifndef CONFIG_SYS_DCACHE_OFF */ void invalidate_dcache_all(void) @@ -64,7 +106,7 @@ void flush_dcache_range(unsigned long start, unsigned long stop) { }
-void flush_cache(unsigned long start, unsigned long size) +void flush_cache(unsigned long start, unsigned long size) { } #endif /* #ifndef CONFIG_SYS_DCACHE_OFF */

On 16/03/2012 05:33, Marek Vasut wrote:
Signed-off-by: Marek Vasut marex@denx.de Cc: Stefano Babic sbabic@denx.de
arch/arm/cpu/arm926ejs/cache.c | 66 ++++++++++++++++++++++++++++++++------- 1 files changed, 54 insertions(+), 12 deletions(-)
diff --git a/arch/arm/cpu/arm926ejs/cache.c b/arch/arm/cpu/arm926ejs/cache.c index 504f604..5b23e3a 100644 --- a/arch/arm/cpu/arm926ejs/cache.c +++ b/arch/arm/cpu/arm926ejs/cache.c @@ -23,29 +23,71 @@ #include <common.h>
#ifndef CONFIG_SYS_DCACHE_OFF -static inline void dcache_noop(void)
+#ifndef CONFIG_SYS_CACHELINE_SIZE +#define CONFIG_SYS_CACHELINE_SIZE 32 +#endif
+void invalidate_dcache_all(void) {
- if (dcache_status()) {
puts("WARNING: cache operations are not implemented!\n"
"WARNING: disabling D-Cache now, you can re-enable it"
"later with 'dcache on' command\n");
dcache_disable();
- }
- asm volatile("mcr p15, 0, %0, c7, c6, 0\n"::"r"(0));
}
-void invalidate_dcache_all(void) +void flush_dcache_all(void) {
- dcache_noop();
- asm volatile(
"0:"
"mrc p15, 0, r15, c7, c14, 3\n"
"bne 0b\n"
"mcr p15, 0, %0, c7, c10, 4\n"
::"r"(0):"memory"
- );
+}
+static int check_cache_range(unsigned long start, unsigned long stop) +{
- int ok = 1;
- if (start & (CONFIG_SYS_CACHELINE_SIZE - 1))
ok = 0;
- if (stop & (CONFIG_SYS_CACHELINE_SIZE - 1))
ok = 0;
- if (!ok)
printf("CACHE: Misaligned operation at range [%08lx, %08lx]\n",
start, stop);
- return ok;
}
void invalidate_dcache_range(unsigned long start, unsigned long stop) {
- dcache_noop();
- if (!check_cache_range(start, stop))
return;
- while (start < stop) {
asm volatile("mcr p15, 0, %0, c7, c6, 1\n"::"r"(start));
start += CONFIG_SYS_CACHELINE_SIZE;
- }
}
void flush_dcache_range(unsigned long start, unsigned long stop) {
- dcache_noop();
- if (!check_cache_range(start, stop))
return;
- while (start < stop) {
asm volatile("mcr p15, 0, %0, c7, c14, 1\n"::"r"(start));
start += CONFIG_SYS_CACHELINE_SIZE;
- }
- asm("mcr p15, 0, %0, c7, c10, 4\n"::"r"(0));
+}
+void flush_cache(unsigned long start, unsigned long size) +{
- flush_dcache_range(start, start + size);
} #else /* #ifndef CONFIG_SYS_DCACHE_OFF */ void invalidate_dcache_all(void) @@ -64,7 +106,7 @@ void flush_dcache_range(unsigned long start, unsigned long stop) { }
-void flush_cache(unsigned long start, unsigned long size) +void flush_cache(unsigned long start, unsigned long size) { } #endif /* #ifndef CONFIG_SYS_DCACHE_OFF */
Acked-by: Stefano Babic sbabic@denx.de
Best regards, Stefano Babic

The desc_append() now flushes descriptors into RAM.
Signed-off-by: Marek Vasut marex@denx.de Cc: Stefano Babic sbabic@denx.de --- drivers/dma/apbh_dma.c | 23 ++++++++++++++++++++++- 1 files changed, 22 insertions(+), 1 deletions(-)
diff --git a/drivers/dma/apbh_dma.c b/drivers/dma/apbh_dma.c index e85f5fe..c086629 100644 --- a/drivers/dma/apbh_dma.c +++ b/drivers/dma/apbh_dma.c @@ -93,6 +93,21 @@ static int mxs_dma_read_semaphore(int channel) return tmp; }
+#ifndef CONFIG_SYS_DCACHE_OFF +void mxs_dma_flush_desc(struct mxs_dma_desc *desc) +{ + uint32_t addr; + uint32_t size; + + addr = (uint32_t)desc; + size = roundup(sizeof(struct mxs_dma_desc), MXS_DMA_ALIGNMENT); + + flush_dcache_range(addr, addr + size); +} +#else +inline void mxs_dma_flush_desc(struct mxs_dma_desc *desc) {} +#endif + /* * Enable a DMA channel. * @@ -329,8 +344,10 @@ static int mxs_dma_release(int channel) struct mxs_dma_desc *mxs_dma_desc_alloc(void) { struct mxs_dma_desc *pdesc; + uint32_t size;
- pdesc = memalign(MXS_DMA_ALIGNMENT, sizeof(struct mxs_dma_desc)); + size = roundup(sizeof(struct mxs_dma_desc), MXS_DMA_ALIGNMENT); + pdesc = memalign(MXS_DMA_ALIGNMENT, size);
if (pdesc == NULL) return NULL; @@ -415,12 +432,16 @@ int mxs_dma_desc_append(int channel, struct mxs_dma_desc *pdesc)
last->cmd.next = mxs_dma_cmd_address(pdesc); last->cmd.data |= MXS_DMA_DESC_CHAIN; + + mxs_dma_flush_desc(last); } pdesc->flags |= MXS_DMA_DESC_READY; if (pdesc->flags & MXS_DMA_DESC_FIRST) pchan->pending_num++; list_add_tail(&pdesc->node, &pchan->active);
+ mxs_dma_flush_desc(pdesc); + return ret; }

Signed-off-by: Marek Vasut marex@denx.de Cc: Stefano Babic sbabic@denx.de --- drivers/mtd/nand/mxs_nand.c | 53 ++++++++++++++++++++++++++++++++++++++++-- 1 files changed, 50 insertions(+), 3 deletions(-)
diff --git a/drivers/mtd/nand/mxs_nand.c b/drivers/mtd/nand/mxs_nand.c index ce2a326..4b1297a 100644 --- a/drivers/mtd/nand/mxs_nand.c +++ b/drivers/mtd/nand/mxs_nand.c @@ -50,6 +50,7 @@ struct mxs_nand_info { int cur_chip;
uint32_t cmd_queue_len; + uint32_t data_buf_size;
uint8_t *cmd_buf; uint8_t *data_buf; @@ -73,6 +74,36 @@ struct mxs_nand_info {
struct nand_ecclayout fake_ecc_layout;
+/* + * Cache management functions + */ +#ifndef CONFIG_SYS_DCACHE_OFF +static void mxs_nand_flush_data_buf(struct mxs_nand_info *info) +{ + uint32_t addr = (uint32_t)info->data_buf; + + flush_dcache_range(addr, addr + info->data_buf_size); +} + +static void mxs_nand_inval_data_buf(struct mxs_nand_info *info) +{ + uint32_t addr = (uint32_t)info->data_buf; + + invalidate_dcache_range(addr, addr + info->data_buf_size); +} + +static void mxs_nand_flush_cmd_buf(struct mxs_nand_info *info) +{ + uint32_t addr = (uint32_t)info->cmd_buf; + + flush_dcache_range(addr, addr + MXS_NAND_COMMAND_BUFFER_SIZE); +} +#else +static inline void mxs_nand_flush_data_buf(struct mxs_nand_info *info) {} +static inline void mxs_nand_inval_data_buf(struct mxs_nand_info *info) {} +static inline void mxs_nand_flush_cmd_buf(struct mxs_nand_info *info) {} +#endif + static struct mxs_dma_desc *mxs_nand_get_dma_desc(struct mxs_nand_info *info) { struct mxs_dma_desc *desc; @@ -286,6 +317,9 @@ static void mxs_nand_cmd_ctrl(struct mtd_info *mtd, int data, unsigned int ctrl)
mxs_dma_desc_append(channel, d);
+ /* Flush caches */ + mxs_nand_flush_cmd_buf(nand_info); + /* Execute the DMA chain. */ ret = mxs_dma_go(channel); if (ret) @@ -435,6 +469,9 @@ static void mxs_nand_read_buf(struct mtd_info *mtd, uint8_t *buf, int length) goto rtn; }
+ /* Invalidate caches */ + mxs_nand_inval_data_buf(nand_info); + memcpy(buf, nand_info->data_buf, length);
rtn: @@ -484,6 +521,9 @@ static void mxs_nand_write_buf(struct mtd_info *mtd, const uint8_t *buf,
mxs_dma_desc_append(channel, d);
+ /* Flush caches */ + mxs_nand_flush_data_buf(nand_info); + /* Execute the DMA chain. */ ret = mxs_dma_go(channel); if (ret) @@ -600,6 +640,9 @@ static int mxs_nand_ecc_read_page(struct mtd_info *mtd, struct nand_chip *nand, goto rtn; }
+ /* Invalidate caches */ + mxs_nand_inval_data_buf(nand_info); + /* Read DMA completed, now do the mark swapping. */ mxs_nand_swap_block_mark(mtd, nand_info->data_buf, nand_info->oob_buf);
@@ -687,6 +730,9 @@ static void mxs_nand_ecc_write_page(struct mtd_info *mtd,
mxs_dma_desc_append(channel, d);
+ /* Flush caches */ + mxs_nand_flush_data_buf(nand_info); + /* Execute the DMA chain. */ ret = mxs_dma_go(channel); if (ret) { @@ -978,18 +1024,19 @@ int mxs_nand_alloc_buffers(struct mxs_nand_info *nand_info) uint8_t *buf; const int size = NAND_MAX_PAGESIZE + NAND_MAX_OOBSIZE;
+ nand_info->data_buf_size = roundup(size, MXS_DMA_ALIGNMENT); + /* DMA buffers */ - buf = memalign(MXS_DMA_ALIGNMENT, size); + buf = memalign(MXS_DMA_ALIGNMENT, nand_info->data_buf_size); if (!buf) { printf("MXS NAND: Error allocating DMA buffers\n"); return -ENOMEM; }
- memset(buf, 0, size); + memset(buf, 0, nand_info->data_buf_size);
nand_info->data_buf = buf; nand_info->oob_buf = buf + NAND_MAX_PAGESIZE; - /* Command buffers */ nand_info->cmd_buf = memalign(MXS_DMA_ALIGNMENT, MXS_NAND_COMMAND_BUFFER_SIZE);

This implements generic bounce buffer at the end of MMC command submission chain. Therefore if unaligned data are passed, they are copied. This stuff should be pushed down into the MMC subsystem to squash all places generating these unaligned data.
Signed-off-by: Marek Vasut marex@denx.de Cc: Andy Fleming afleming@gmail.com --- drivers/mmc/mmc.c | 102 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 files changed, 99 insertions(+), 3 deletions(-)
diff --git a/drivers/mmc/mmc.c b/drivers/mmc/mmc.c index 49c3349..07b6032 100644 --- a/drivers/mmc/mmc.c +++ b/drivers/mmc/mmc.c @@ -47,10 +47,105 @@ int __board_mmc_getcd(struct mmc *mmc) { int board_mmc_getcd(struct mmc *mmc)__attribute__((weak, alias("__board_mmc_getcd")));
+#ifdef CONFIG_MMC_BOUNCE_BUFFER +static int mmc_bounce_need_bounce(struct mmc_data *orig) +{ + ulong addr, len; + + if (orig->flags & MMC_DATA_READ) + addr = (ulong)orig->dest; + else + addr = (ulong)orig->src; + + if (addr % ARCH_DMA_MINALIGN) { + debug("MMC: Unaligned data destination address %08lx!\n", addr); + return 1; + } + + len = (ulong)(orig->blocksize * orig->blocks); + if (len % ARCH_DMA_MINALIGN) { + debug("MMC: Unaligned data destination length %08lx!\n", len); + return 1; + } + + return 0; +} + +static int mmc_bounce_buffer_start(struct mmc_data *backup, + struct mmc_data *orig) +{ + ulong origlen, len; + void *buffer; + + if (!orig) + return 0; + + if (!mmc_bounce_need_bounce(orig)) + return 0; + + memcpy(backup, orig, sizeof(struct mmc_data)); + + origlen = orig->blocksize * orig->blocks; + len = roundup(origlen, ARCH_DMA_MINALIGN); + buffer = memalign(ARCH_DMA_MINALIGN, len); + if (!buffer) { + puts("MMC: Error allocating MMC bounce buffer!\n"); + return 1; + } + + if (orig->flags & MMC_DATA_READ) { + orig->dest = buffer; + } else { + memcpy(buffer, orig->src, origlen); + orig->src = buffer; + } + + return 0; +} + +static void mmc_bounce_buffer_stop(struct mmc_data *backup, + struct mmc_data *orig) +{ + ulong len; + + if (!orig) + return; + + if (!mmc_bounce_need_bounce(backup)) + return; + + if (backup->flags & MMC_DATA_READ) { + len = backup->blocksize * backup->blocks; + memcpy(backup->dest, orig->dest, len); + free(orig->dest); + orig->dest = backup->dest; + } else { + free((void *)orig->src); + orig->src = backup->src; + } + + return; + +} +#else +static inline int mmc_bounce_buffer_start(struct mmc_data *backup, + struct mmc_data *orig) { } +static inline void mmc_bounce_buffer_start(struct mmc_data *backup, + struct mmc_data *orig) { } +#endif + int mmc_send_cmd(struct mmc *mmc, struct mmc_cmd *cmd, struct mmc_data *data) { -#ifdef CONFIG_MMC_TRACE + struct mmc_data backup; int ret; + + memset(&backup, 0, sizeof(backup)); + + ret = mmc_bounce_buffer_start(&backup, data); + if (ret) + return ret; + +#ifdef CONFIG_MMC_TRACE int i; u8 *ptr;
@@ -99,10 +194,11 @@ int mmc_send_cmd(struct mmc *mmc, struct mmc_cmd *cmd, struct mmc_data *data) printf("\t\tERROR MMC rsp not supported\n"); break; } - return ret; #else - return mmc->send_cmd(mmc, cmd, data); + ret = mmc->send_cmd(mmc, cmd, data); #endif + mmc_bounce_buffer_stop(&backup, data); + return ret; }
int mmc_send_status(struct mmc *mmc, int timeout)

This utilizes the newly introduced bounce buffers in the MMC layer.
Signed-off-by: Marek Vasut marex@denx.de Cc: Stefano Babic sbabic@denx.de Cc: Andy Fleming afleming@gmail.com Cc: Fabio Estevam festevam@gmail.com --- drivers/mmc/mxsmmc.c | 70 ++++++++++++++++++++++++++++--------------------- 1 files changed, 40 insertions(+), 30 deletions(-)
diff --git a/drivers/mmc/mxsmmc.c b/drivers/mmc/mxsmmc.c index 5f87a1e..e8bad9d 100644 --- a/drivers/mmc/mxsmmc.c +++ b/drivers/mmc/mxsmmc.c @@ -41,6 +41,7 @@ #include <asm/arch/clock.h> #include <asm/arch/imx-regs.h> #include <asm/arch/sys_proto.h> +#include <asm/arch/dma.h>
struct mxsmmc_priv { int id; @@ -49,6 +50,7 @@ struct mxsmmc_priv { uint32_t *clkctrl_ssp; uint32_t buswidth; int (*mmc_is_wp)(int); + struct mxs_dma_desc *desc; };
#define MXSMMC_MAX_TIMEOUT 10000 @@ -64,8 +66,7 @@ mxsmmc_send_cmd(struct mmc *mmc, struct mmc_cmd *cmd, struct mmc_data *data) struct mx28_ssp_regs *ssp_regs = priv->regs; uint32_t reg; int timeout; - uint32_t data_count; - uint32_t *data_ptr; + uint32_t data_count, cache_data_count; uint32_t ctrl0;
debug("MMC%d: CMD%d\n", mmc->block_dev.dev, cmd->cmdidx); @@ -183,40 +184,41 @@ mxsmmc_send_cmd(struct mmc *mmc, struct mmc_cmd *cmd, struct mmc_data *data) if (!data) return 0;
- /* Process the data */ data_count = data->blocksize * data->blocks; - timeout = MXSMMC_MAX_TIMEOUT; + + if (data_count % ARCH_DMA_MINALIGN) + cache_data_count = roundup(data_count, ARCH_DMA_MINALIGN); + else + cache_data_count = data_count; + if (data->flags & MMC_DATA_READ) { - data_ptr = (uint32_t *)data->dest; - while (data_count && --timeout) { - reg = readl(&ssp_regs->hw_ssp_status); - if (!(reg & SSP_STATUS_FIFO_EMPTY)) { - *data_ptr++ = readl(&ssp_regs->hw_ssp_data); - data_count -= 4; - timeout = MXSMMC_MAX_TIMEOUT; - } else - udelay(1000); - } + priv->desc->cmd.data = MXS_DMA_DESC_COMMAND_DMA_WRITE; + priv->desc->cmd.address = (dma_addr_t)data->dest; } else { - data_ptr = (uint32_t *)data->src; - timeout *= 100; - while (data_count && --timeout) { - reg = readl(&ssp_regs->hw_ssp_status); - if (!(reg & SSP_STATUS_FIFO_FULL)) { - writel(*data_ptr++, &ssp_regs->hw_ssp_data); - data_count -= 4; - timeout = MXSMMC_MAX_TIMEOUT; - } else - udelay(1000); - } + priv->desc->cmd.data = MXS_DMA_DESC_COMMAND_DMA_READ; + priv->desc->cmd.address = (dma_addr_t)data->src; + + /* Flush data to DRAM so DMA can pick them up */ + flush_dcache_range((uint32_t)priv->desc->cmd.address, + (uint32_t)(priv->desc->cmd.address + cache_data_count)); }
- if (!timeout) { - printf("MMC%d: Data timeout with command %d (status 0x%08x)!\n", - mmc->block_dev.dev, cmd->cmdidx, reg); + priv->desc->cmd.data |= MXS_DMA_DESC_IRQ | MXS_DMA_DESC_DEC_SEM | + (data_count << MXS_DMA_DESC_BYTES_OFFSET); + + + mxs_dma_desc_append(MXS_DMA_CHANNEL_AHB_APBH_SSP0, priv->desc); + if (mxs_dma_go(MXS_DMA_CHANNEL_AHB_APBH_SSP0)) { + printf("MMC%d: DMA transfer failed\n", mmc->block_dev.dev); return COMM_ERR; }
+ /* The data arrived into DRAM, invalidate cache over them */ + if (data->flags & MMC_DATA_READ) { + invalidate_dcache_range((uint32_t)priv->desc->cmd.address, + (uint32_t)(priv->desc->cmd.address + cache_data_count)); + } + /* Check data errors */ reg = readl(&ssp_regs->hw_ssp_status); if (reg & @@ -270,7 +272,8 @@ static int mxsmmc_init(struct mmc *mmc) /* 8 bits word length in MMC mode */ clrsetbits_le32(&ssp_regs->hw_ssp_ctrl1, SSP_CTRL1_SSP_MODE_MASK | SSP_CTRL1_WORD_LENGTH_MASK, - SSP_CTRL1_SSP_MODE_SD_MMC | SSP_CTRL1_WORD_LENGTH_EIGHT_BITS); + SSP_CTRL1_SSP_MODE_SD_MMC | SSP_CTRL1_WORD_LENGTH_EIGHT_BITS | + SSP_CTRL1_DMA_ENABLE);
/* Set initial bit clock 400 KHz */ mx28_set_ssp_busclock(priv->id, 400); @@ -300,6 +303,13 @@ int mxsmmc_initialize(bd_t *bis, int id, int (*wp)(int)) return -ENOMEM; }
+ priv->desc = mxs_dma_desc_alloc(); + if (!priv->desc) { + free(priv); + free(mmc); + return -ENOMEM; + } + priv->mmc_is_wp = wp; priv->id = id; switch (id) { @@ -345,7 +355,7 @@ int mxsmmc_initialize(bd_t *bis, int id, int (*wp)(int)) */ mmc->f_min = 400000; mmc->f_max = mxc_get_clock(MXC_SSP0_CLK + id) * 1000 / 2; - mmc->b_max = 0; + mmc->b_max = 0x40;
mmc_register(mmc); return 0;

This allows i.MX28 MMC host to fully utilize DMA transfers and caches, greatly improving speed.
Signed-off-by: Marek Vasut marex@denx.de Cc: Fabio Estevam festevam@gmail.com --- include/configs/m28evk.h | 1 + include/configs/mx28evk.h | 1 + 2 files changed, 2 insertions(+), 0 deletions(-)
diff --git a/include/configs/m28evk.h b/include/configs/m28evk.h index 4d59153..dc1acbf 100644 --- a/include/configs/m28evk.h +++ b/include/configs/m28evk.h @@ -140,6 +140,7 @@ */ #ifdef CONFIG_CMD_MMC #define CONFIG_MMC +#define CONFIG_MMC_BOUNCE_BUFFER #define CONFIG_GENERIC_MMC #define CONFIG_MXS_MMC #endif diff --git a/include/configs/mx28evk.h b/include/configs/mx28evk.h index 04967d7..33b2f78 100644 --- a/include/configs/mx28evk.h +++ b/include/configs/mx28evk.h @@ -138,6 +138,7 @@ #ifdef CONFIG_CMD_MMC #define CONFIG_MMC #define CONFIG_GENERIC_MMC +#define CONFIG_MMC_BOUNCE_BUFFER #define CONFIG_MXS_MMC #endif

Signed-off-by: Marek Vasut marex@denx.de Cc: Stefano Babic sbabic@denx.de --- arch/arm/cpu/arm926ejs/mx28/mx28.c | 10 ++++++++++ 1 files changed, 10 insertions(+), 0 deletions(-)
diff --git a/arch/arm/cpu/arm926ejs/mx28/mx28.c b/arch/arm/cpu/arm926ejs/mx28/mx28.c index 9bfd83b..cf6d4e9 100644 --- a/arch/arm/cpu/arm926ejs/mx28/mx28.c +++ b/arch/arm/cpu/arm926ejs/mx28/mx28.c @@ -63,6 +63,16 @@ void reset_cpu(ulong ignored) ; }
+void enable_caches(void) +{ +#ifndef CONFIG_SYS_ICACHE_OFF + icache_enable(); +#endif +#ifndef CONFIG_SYS_DCACHE_OFF + dcache_enable(); +#endif +} + int mx28_wait_mask_set(struct mx28_register_32 *reg, uint32_t mask, int timeout) { while (--timeout) {

From: Eric Nelson eric.nelson@boundarydevices.com
This will prevent the need for architectures whose DMA alignment is greater than 32 to have bounce buffers.
Signed-off-by: Eric Nelson eric.nelson@boundarydevices.com --- include/net.h | 3 ++- 1 files changed, 2 insertions(+), 1 deletions(-)
diff --git a/include/net.h b/include/net.h index 1707a7f..14a0d33 100644 --- a/include/net.h +++ b/include/net.h @@ -16,6 +16,7 @@ #include <commproc.h> #endif /* CONFIG_8xx */
+#include <asm/cache.h> #include <asm/byteorder.h> /* for nton* / ntoh* stuff */
@@ -31,7 +32,7 @@ # define PKTBUFSRX 4 #endif
-#define PKTALIGN 32 +#define PKTALIGN ARCH_DMA_MINALIGN
/* IPv4 addresses are always 32 bits in size */ typedef u32 IPaddr_t;

From: Eric Nelson eric.nelson@boundarydevices.com
Ensure that transmit and receive buffers are cache-line aligned. Invalidate cache for each packet as received, update receive buffer descriptors one cache line at a time, flush cache before transmitting.
Original patch by Marek: http://lists.denx.de/pipermail/u-boot/2012-February/117695.html
Signed-off-by: Eric Nelson <eric.nelson at boundarydevices.com> Acked-by: Marek Vasut marex@denx.de Tested-by: Marek Vasut marex@denx.de --- drivers/net/fec_mxc.c | 277 ++++++++++++++++++++++++++++++++++--------------- drivers/net/fec_mxc.h | 19 +--- 2 files changed, 192 insertions(+), 104 deletions(-)
diff --git a/drivers/net/fec_mxc.c b/drivers/net/fec_mxc.c index 1fdd071..d8db9f0 100644 --- a/drivers/net/fec_mxc.c +++ b/drivers/net/fec_mxc.c @@ -38,16 +38,28 @@ DECLARE_GLOBAL_DATA_PTR; #error "CONFIG_MII has to be defined!" #endif
-#ifndef CONFIG_FEC_XCV_TYPE -#define CONFIG_FEC_XCV_TYPE MII100 +#ifndef CONFIG_FEC_XCV_TYPE +#define CONFIG_FEC_XCV_TYPE MII100 #endif
/* * The i.MX28 operates with packets in big endian. We need to swap them before * sending and after receiving. */ -#ifdef CONFIG_MX28 -#define CONFIG_FEC_MXC_SWAP_PACKET +#ifdef CONFIG_MX28 +#define CONFIG_FEC_MXC_SWAP_PACKET +#endif + +#define RXDESC_PER_CACHELINE (ARCH_DMA_MINALIGN/sizeof(struct fec_bd)) + +/* Check various alignment issues at compile time */ +#if ((ARCH_DMA_MINALIGN < 16) || (ARCH_DMA_MINALIGN % 16 != 0)) +#error "ARCH_DMA_MINALIGN must be multiple of 16!" +#endif + +#if ((PKTALIGN < ARCH_DMA_MINALIGN) || \ + (PKTALIGN % ARCH_DMA_MINALIGN != 0)) +#error "PKTALIGN must be multiple of ARCH_DMA_MINALIGN!" #endif
#undef DEBUG @@ -59,7 +71,7 @@ struct nbuf { uint8_t head[16]; /**< MAC header(6 + 6 + 2) + 2(aligned) */ };
-#ifdef CONFIG_FEC_MXC_SWAP_PACKET +#ifdef CONFIG_FEC_MXC_SWAP_PACKET static void swap_packet(uint32_t *packet, int length) { int i; @@ -259,43 +271,52 @@ static int fec_tx_task_disable(struct fec_priv *fec) * Initialize receive task's buffer descriptors * @param[in] fec all we know about the device yet * @param[in] count receive buffer count to be allocated - * @param[in] size size of each receive buffer + * @param[in] dsize desired size of each receive buffer * @return 0 on success * * For this task we need additional memory for the data buffers. And each * data buffer requires some alignment. Thy must be aligned to a specific - * boundary each (DB_DATA_ALIGNMENT). + * boundary each. */ -static int fec_rbd_init(struct fec_priv *fec, int count, int size) +static int fec_rbd_init(struct fec_priv *fec, int count, int dsize) { - int ix; - uint32_t p = 0; - - /* reserve data memory and consider alignment */ - if (fec->rdb_ptr == NULL) - fec->rdb_ptr = malloc(size * count + DB_DATA_ALIGNMENT); - p = (uint32_t)fec->rdb_ptr; - if (!p) { - puts("fec_mxc: not enough malloc memory\n"); - return -ENOMEM; - } - memset((void *)p, 0, size * count + DB_DATA_ALIGNMENT); - p += DB_DATA_ALIGNMENT-1; - p &= ~(DB_DATA_ALIGNMENT-1); - - for (ix = 0; ix < count; ix++) { - writel(p, &fec->rbd_base[ix].data_pointer); - p += size; - writew(FEC_RBD_EMPTY, &fec->rbd_base[ix].status); - writew(0, &fec->rbd_base[ix].data_length); - } + uint32_t size; + int i; + /* - * mark the last RBD to close the ring + * Allocate memory for the buffers. This allocation respects the + * alignment */ - writew(FEC_RBD_WRAP | FEC_RBD_EMPTY, &fec->rbd_base[ix - 1].status); + size = roundup(dsize, ARCH_DMA_MINALIGN); + for (i = 0; i < count; i++) { + uint32_t data_ptr = readl(&fec->rbd_base[i].data_pointer); + if (data_ptr == 0) { + uint8_t *data = memalign(ARCH_DMA_MINALIGN, + size); + if (!data) { + printf("%s: error allocating rxbuf %d\n", + __func__, i); + goto err; + } + writel((uint32_t)data, &fec->rbd_base[i].data_pointer); + } /* needs allocation */ + writew(FEC_RBD_EMPTY, &fec->rbd_base[i].status); + writew(0, &fec->rbd_base[i].data_length); + } + + /* Mark the last RBD to close the ring. */ + writew(FEC_RBD_WRAP | FEC_RBD_EMPTY, &fec->rbd_base[i - 1].status); fec->rbd_index = 0;
return 0; + +err: + for (; i >= 0; i--) { + uint32_t data_ptr = readl(&fec->rbd_base[i].data_pointer); + free((void *)data_ptr); + } + + return -ENOMEM; }
/** @@ -312,9 +333,13 @@ static int fec_rbd_init(struct fec_priv *fec, int count, int size) */ static void fec_tbd_init(struct fec_priv *fec) { + unsigned addr = (unsigned)fec->tbd_base; + unsigned size = roundup(2 * sizeof(struct fec_bd), + ARCH_DMA_MINALIGN); writew(0x0000, &fec->tbd_base[0].status); writew(FEC_TBD_WRAP, &fec->tbd_base[1].status); fec->tbd_index = 0; + flush_dcache_range(addr, addr+size); }
/** @@ -324,16 +349,10 @@ static void fec_tbd_init(struct fec_priv *fec) */ static void fec_rbd_clean(int last, struct fec_bd *pRbd) { - /* - * Reset buffer descriptor as empty - */ + unsigned short flags = FEC_RBD_EMPTY; if (last) - writew(FEC_RBD_WRAP | FEC_RBD_EMPTY, &pRbd->status); - else - writew(FEC_RBD_EMPTY, &pRbd->status); - /* - * no data in it - */ + flags |= FEC_RBD_WRAP; + writew(flags, &pRbd->status); writew(0, &pRbd->data_length); }
@@ -387,12 +406,25 @@ static int fec_open(struct eth_device *edev) { struct fec_priv *fec = (struct fec_priv *)edev->priv; int speed; + uint32_t addr, size; + int i;
debug("fec_open: fec_open(dev)\n"); /* full-duplex, heartbeat disabled */ writel(1 << 2, &fec->eth->x_cntrl); fec->rbd_index = 0;
+ /* Invalidate all descriptors */ + for (i = 0; i < FEC_RBD_NUM - 1; i++) + fec_rbd_clean(0, &fec->rbd_base[i]); + fec_rbd_clean(1, &fec->rbd_base[i]); + + /* Flush the descriptors into RAM */ + size = roundup(FEC_RBD_NUM * sizeof(struct fec_bd), + ARCH_DMA_MINALIGN); + addr = (uint32_t)fec->rbd_base; + flush_dcache_range(addr, addr + size); + #ifdef FEC_QUIRK_ENET_MAC /* Enable ENET HW endian SWAP */ writel(readl(&fec->eth->ecntrl) | FEC_ECNTRL_DBSWAP, @@ -478,38 +510,55 @@ static int fec_open(struct eth_device *edev)
static int fec_init(struct eth_device *dev, bd_t* bd) { - uint32_t base; struct fec_priv *fec = (struct fec_priv *)dev->priv; uint32_t mib_ptr = (uint32_t)&fec->eth->rmon_t_drop; uint32_t rcntrl; - int i; + uint32_t size; + int i, ret;
/* Initialize MAC address */ fec_set_hwaddr(dev);
/* - * reserve memory for both buffer descriptor chains at once - * Datasheet forces the startaddress of each chain is 16 byte - * aligned + * Allocate transmit descriptors, there are two in total. This + * allocation respects cache alignment. */ - if (fec->base_ptr == NULL) - fec->base_ptr = malloc((2 + FEC_RBD_NUM) * - sizeof(struct fec_bd) + DB_ALIGNMENT); - base = (uint32_t)fec->base_ptr; - if (!base) { - puts("fec_mxc: not enough malloc memory\n"); - return -ENOMEM; + if (!fec->tbd_base) { + size = roundup(2 * sizeof(struct fec_bd), + ARCH_DMA_MINALIGN); + fec->tbd_base = memalign(ARCH_DMA_MINALIGN, size); + if (!fec->tbd_base) { + ret = -ENOMEM; + goto err1; + } + memset(fec->tbd_base, 0, size); + fec_tbd_init(fec); + flush_dcache_range((unsigned)fec->tbd_base, size); } - memset((void *)base, 0, (2 + FEC_RBD_NUM) * - sizeof(struct fec_bd) + DB_ALIGNMENT); - base += (DB_ALIGNMENT-1); - base &= ~(DB_ALIGNMENT-1); - - fec->rbd_base = (struct fec_bd *)base;
- base += FEC_RBD_NUM * sizeof(struct fec_bd); - - fec->tbd_base = (struct fec_bd *)base; + /* + * Allocate receive descriptors. This allocation respects cache + * alignment. + */ + if (!fec->rbd_base) { + size = roundup(FEC_RBD_NUM * sizeof(struct fec_bd), + ARCH_DMA_MINALIGN); + fec->rbd_base = memalign(ARCH_DMA_MINALIGN, size); + if (!fec->rbd_base) { + ret = -ENOMEM; + goto err2; + } + memset(fec->rbd_base, 0, size); + /* + * Initialize RxBD ring + */ + if (fec_rbd_init(fec, FEC_RBD_NUM, FEC_MAX_PKT_SIZE) < 0) { + ret = -ENOMEM; + goto err3; + } + flush_dcache_range((unsigned)fec->rbd_base, + (unsigned)fec->rbd_base + size); + }
/* * Set interrupt mask register @@ -566,23 +615,19 @@ static int fec_init(struct eth_device *dev, bd_t* bd) writel((uint32_t)fec->tbd_base, &fec->eth->etdsr); writel((uint32_t)fec->rbd_base, &fec->eth->erdsr);
- /* - * Initialize RxBD/TxBD rings - */ - if (fec_rbd_init(fec, FEC_RBD_NUM, FEC_MAX_PKT_SIZE) < 0) { - free(fec->base_ptr); - fec->base_ptr = NULL; - return -ENOMEM; - } - fec_tbd_init(fec); - - #ifndef CONFIG_PHYLIB if (fec->xcv_type != SEVENWIRE) miiphy_restart_aneg(dev); #endif fec_open(dev); return 0; + +err3: + free(fec->rbd_base); +err2: + free(fec->tbd_base); +err1: + return ret; }
/** @@ -631,9 +676,11 @@ static void fec_halt(struct eth_device *dev) * @param[in] length Data count in bytes * @return 0 on success */ -static int fec_send(struct eth_device *dev, volatile void* packet, int length) +static int fec_send(struct eth_device *dev, volatile void *packet, int length) { unsigned int status; + uint32_t size; + uint32_t addr;
/* * This routine transmits one frame. This routine only accepts @@ -650,15 +697,21 @@ static int fec_send(struct eth_device *dev, volatile void* packet, int length) }
/* - * Setup the transmit buffer - * Note: We are always using the first buffer for transmission, - * the second will be empty and only used to stop the DMA engine + * Setup the transmit buffer. We are always using the first buffer for + * transmission, the second will be empty and only used to stop the DMA + * engine. We also flush the packet to RAM here to avoid cache trouble. */ -#ifdef CONFIG_FEC_MXC_SWAP_PACKET +#ifdef CONFIG_FEC_MXC_SWAP_PACKET swap_packet((uint32_t *)packet, length); #endif + + addr = (uint32_t)packet; + size = roundup(length, ARCH_DMA_MINALIGN); + flush_dcache_range(addr, addr + size); + writew(length, &fec->tbd_base[fec->tbd_index].data_length); - writel((uint32_t)packet, &fec->tbd_base[fec->tbd_index].data_pointer); + writel(addr, &fec->tbd_base[fec->tbd_index].data_pointer); + /* * update BD's status now * This block: @@ -672,16 +725,30 @@ static int fec_send(struct eth_device *dev, volatile void* packet, int length) writew(status, &fec->tbd_base[fec->tbd_index].status);
/* + * Flush data cache. This code flushes both TX descriptors to RAM. + * After this code, the descriptors will be safely in RAM and we + * can start DMA. + */ + size = roundup(2 * sizeof(struct fec_bd), ARCH_DMA_MINALIGN); + addr = (uint32_t)fec->tbd_base; + flush_dcache_range(addr, addr + size); + + /* * Enable SmartDMA transmit task */ fec_tx_task_enable(fec);
/* - * wait until frame is sent . + * Wait until frame is sent. On each turn of the wait cycle, we must + * invalidate data cache to see what's really in RAM. Also, we need + * barrier here. */ + invalidate_dcache_range(addr, addr + size); while (readw(&fec->tbd_base[fec->tbd_index].status) & FEC_TBD_READY) { udelay(1); + invalidate_dcache_range(addr, addr + size); } + debug("fec_send: status 0x%x index %d\n", readw(&fec->tbd_base[fec->tbd_index].status), fec->tbd_index); @@ -707,6 +774,8 @@ static int fec_recv(struct eth_device *dev) int frame_length, len = 0; struct nbuf *frame; uint16_t bd_status; + uint32_t addr, size; + int i; uchar buff[FEC_MAX_PKT_SIZE];
/* @@ -737,8 +806,23 @@ static int fec_recv(struct eth_device *dev) }
/* - * ensure reading the right buffer status + * Read the buffer status. Before the status can be read, the data cache + * must be invalidated, because the data in RAM might have been changed + * by DMA. The descriptors are properly aligned to cachelines so there's + * no need to worry they'd overlap. + * + * WARNING: By invalidating the descriptor here, we also invalidate + * the descriptors surrounding this one. Therefore we can NOT change the + * contents of this descriptor nor the surrounding ones. The problem is + * that in order to mark the descriptor as processed, we need to change + * the descriptor. The solution is to mark the whole cache line when all + * descriptors in the cache line are processed. */ + addr = (uint32_t)rbd; + addr &= ~(ARCH_DMA_MINALIGN - 1); + size = roundup(sizeof(struct fec_bd), ARCH_DMA_MINALIGN); + invalidate_dcache_range(addr, addr + size); + bd_status = readw(&rbd->status); debug("fec_recv: status 0x%x\n", bd_status);
@@ -751,9 +835,16 @@ static int fec_recv(struct eth_device *dev) frame = (struct nbuf *)readl(&rbd->data_pointer); frame_length = readw(&rbd->data_length) - 4; /* + * Invalidate data cache over the buffer + */ + addr = (uint32_t)frame; + size = roundup(frame_length, ARCH_DMA_MINALIGN); + invalidate_dcache_range(addr, addr + size); + + /* * Fill the buffer and pass it to upper layers */ -#ifdef CONFIG_FEC_MXC_SWAP_PACKET +#ifdef CONFIG_FEC_MXC_SWAP_PACKET swap_packet((uint32_t *)frame->data, frame_length); #endif memcpy(buff, frame->data, frame_length); @@ -765,11 +856,25 @@ static int fec_recv(struct eth_device *dev) (ulong)rbd->data_pointer, bd_status); } + /* - * free the current buffer, restart the engine - * and move forward to the next buffer + * Free the current buffer, restart the engine and move forward + * to the next buffer. Here we check if the whole cacheline of + * descriptors was already processed and if so, we mark it free + * as whole. */ - fec_rbd_clean(fec->rbd_index == (FEC_RBD_NUM - 1) ? 1 : 0, rbd); + size = RXDESC_PER_CACHELINE - 1; + if ((fec->rbd_index & size) == size) { + i = fec->rbd_index - size; + addr = (uint32_t)&fec->rbd_base[i]; + for (; i <= fec->rbd_index ; i++) { + fec_rbd_clean(i == (FEC_RBD_NUM - 1), + &fec->rbd_base[i]); + } + flush_dcache_range(addr, + addr + ARCH_DMA_MINALIGN); + } + fec_rx_task_enable(fec); fec->rbd_index = (fec->rbd_index + 1) % FEC_RBD_NUM; } @@ -866,7 +971,7 @@ static int fec_probe(bd_t *bd, int dev_id, int phy_id, uint32_t base_addr) bus->read = fec_phy_read; bus->write = fec_phy_write; sprintf(bus->name, edev->name); -#ifdef CONFIG_MX28 +#ifdef CONFIG_MX28 /* * The i.MX28 has two ethernet interfaces, but they are not equal. * Only the first one can access the MDIO bus. @@ -901,7 +1006,7 @@ err1: return ret; }
-#ifndef CONFIG_FEC_MXC_MULTI +#ifndef CONFIG_FEC_MXC_MULTI int fecmxc_initialize(bd_t *bd) { int lout = 1; diff --git a/drivers/net/fec_mxc.h b/drivers/net/fec_mxc.h index 2eb7803..852b2e0 100644 --- a/drivers/net/fec_mxc.h +++ b/drivers/net/fec_mxc.h @@ -234,22 +234,6 @@ struct ethernet_regs { #endif
/** - * @brief Descriptor buffer alignment - * - * i.MX27 requires a 16 byte alignment (but for the first element only) - */ -#define DB_ALIGNMENT 16 - -/** - * @brief Data buffer alignment - * - * i.MX27 requires a four byte alignment for transmit and 16 bits - * alignment for receive so take 16 - * Note: Valid for member data_pointer in struct buffer_descriptor - */ -#define DB_DATA_ALIGNMENT 16 - -/** * @brief Receive & Transmit Buffer Descriptor definitions * * Note: The first BD must be aligned (see DB_ALIGNMENT) @@ -282,8 +266,7 @@ struct fec_priv { struct fec_bd *tbd_base; /* TBD ring */ int tbd_index; /* next transmit BD to write */ bd_t *bd; - void *rdb_ptr; - void *base_ptr; + uint8_t *tdb_ptr; int dev_id; int phy_id; struct mii_dev *bus;

Hi,
This series of patches prepares iMX platforms based on MXS (i.MX28, i.MX6Q) for use of caches. This patchset prepares:
Ethernet: FEC MXC is ready to use caches NAND: NAND driver is ready to use caches SPI/MMC: i.MX28 MMC driver is ready to use caches
Stefano, I consider these eligible for application. Most of these are resubmissions from the mailing list, I just cobbled them together as I was running tests on them.
Andy, you might want to review the bounce buffer business though. We'll probably need bounce buffers in a lot of places, until we can start slowly squeezing them into the code.
Best regards, Marek Vasut
participants (3)
-
Marek Vasut
-
Marek Vasut
-
Stefano Babic