[U-Boot] [PATCH V3] net: fec_mxc: allow use with cache enabled

ensure that transmit and receive buffers are cache-line aligned invalidate cache for each packet as received update receive buffer descriptors one cache line at a time flush cache before transmitting Original patch by Marek: http://lists.denx.de/pipermail/u-boot/2012-February/117695.html
Signed-off-by: Eric Nelson eric.nelson@boundarydevices.com
--- V2 addresses some concerns from the ML: - Use readl()/writel() instead of mapped data structure accesses - Wrong comment style - &rbd_base[0] == rbd_base removed 'volatile' from fec_send().
V3 updates from ML (and Marek): consolidated CONFIG_FEC_DATA_ALIGNMENT and CONFIG_FEC_DESC_ALIGNMENT added cache flushes after initialization of TBD/RBD
drivers/net/fec_mxc.c | 265 +++++++++++++++++++++++++++++++++++-------------- drivers/net/fec_mxc.h | 19 +---- 2 files changed, 189 insertions(+), 95 deletions(-)
diff --git a/drivers/net/fec_mxc.c b/drivers/net/fec_mxc.c index 1fdd071..94a3927 100644 --- a/drivers/net/fec_mxc.c +++ b/drivers/net/fec_mxc.c @@ -50,6 +50,24 @@ DECLARE_GLOBAL_DATA_PTR; #define CONFIG_FEC_MXC_SWAP_PACKET #endif
+#if ARCH_DMA_MINALIGN > CONFIG_SYS_CACHELINE_SIZE +#define CONFIG_FEC_ALIGN ARCH_DMA_MINALIGN +#else +#define CONFIG_FEC_ALIGN CONFIG_SYS_CACHELINE_SIZE +#endif + +#define RXDESC_PER_CACHELINE (CONFIG_FEC_ALIGN/sizeof(struct fec_bd)) + +/* Check various alignment issues at compile time */ +#if ((CONFIG_FEC_ALIGN < 16) || (CONFIG_FEC_ALIGN % 16 != 0)) +#error "CONFIG_FEC_ALIGN must be multiple of 16!" +#endif + +#if ((PKTALIGN < CONFIG_FEC_ALIGN) || \ + (PKTALIGN % CONFIG_FEC_ALIGN != 0)) +#error "PKTALIGN must be multiple of CONFIG_FEC_ALIGN!" +#endif + #undef DEBUG
struct nbuf { @@ -259,43 +277,52 @@ static int fec_tx_task_disable(struct fec_priv *fec) * Initialize receive task's buffer descriptors * @param[in] fec all we know about the device yet * @param[in] count receive buffer count to be allocated - * @param[in] size size of each receive buffer + * @param[in] dsize desired size of each receive buffer * @return 0 on success * * For this task we need additional memory for the data buffers. And each * data buffer requires some alignment. Thy must be aligned to a specific - * boundary each (DB_DATA_ALIGNMENT). + * boundary each. */ -static int fec_rbd_init(struct fec_priv *fec, int count, int size) +static int fec_rbd_init(struct fec_priv *fec, int count, int dsize) { - int ix; - uint32_t p = 0; - - /* reserve data memory and consider alignment */ - if (fec->rdb_ptr == NULL) - fec->rdb_ptr = malloc(size * count + DB_DATA_ALIGNMENT); - p = (uint32_t)fec->rdb_ptr; - if (!p) { - puts("fec_mxc: not enough malloc memory\n"); - return -ENOMEM; - } - memset((void *)p, 0, size * count + DB_DATA_ALIGNMENT); - p += DB_DATA_ALIGNMENT-1; - p &= ~(DB_DATA_ALIGNMENT-1); - - for (ix = 0; ix < count; ix++) { - writel(p, &fec->rbd_base[ix].data_pointer); - p += size; - writew(FEC_RBD_EMPTY, &fec->rbd_base[ix].status); - writew(0, &fec->rbd_base[ix].data_length); - } + uint32_t size; + int i; + /* - * mark the last RBD to close the ring + * Allocate memory for the buffers. This allocation respects the + * alignment */ - writew(FEC_RBD_WRAP | FEC_RBD_EMPTY, &fec->rbd_base[ix - 1].status); + size = roundup(dsize, CONFIG_FEC_ALIGN); + for (i = 0; i < count; i++) { + uint32_t data_ptr = readl(&fec->rbd_base[i].data_pointer); + if (data_ptr == 0) { + uint8_t *data = memalign(CONFIG_FEC_ALIGN, + size); + if (!data) { + printf("%s: error allocating rxbuf %d\n", + __func__, i); + goto err; + } + writel((uint32_t)data, &fec->rbd_base[i].data_pointer); + } /* needs allocation */ + writew(FEC_RBD_EMPTY, &fec->rbd_base[i].status); + writew(0, &fec->rbd_base[i].data_length); + } + + /* Mark the last RBD to close the ring. */ + writew(FEC_RBD_WRAP | FEC_RBD_EMPTY, &fec->rbd_base[i - 1].status); fec->rbd_index = 0;
return 0; + +err: + for (; i >= 0; i--) { + uint32_t data_ptr = readl(&fec->rbd_base[i].data_pointer); + free((void *)data_ptr); + } + + return -ENOMEM; }
/** @@ -312,9 +339,13 @@ static int fec_rbd_init(struct fec_priv *fec, int count, int size) */ static void fec_tbd_init(struct fec_priv *fec) { + unsigned addr = (unsigned)fec->tbd_base; + unsigned size = roundup(2 * sizeof(struct fec_bd), + CONFIG_FEC_ALIGN); writew(0x0000, &fec->tbd_base[0].status); writew(FEC_TBD_WRAP, &fec->tbd_base[1].status); fec->tbd_index = 0; + flush_dcache_range(addr, addr+size); }
/** @@ -324,16 +355,10 @@ static void fec_tbd_init(struct fec_priv *fec) */ static void fec_rbd_clean(int last, struct fec_bd *pRbd) { - /* - * Reset buffer descriptor as empty - */ + unsigned short flags = FEC_RBD_EMPTY; if (last) - writew(FEC_RBD_WRAP | FEC_RBD_EMPTY, &pRbd->status); - else - writew(FEC_RBD_EMPTY, &pRbd->status); - /* - * no data in it - */ + flags |= FEC_RBD_WRAP; + writew(flags, &pRbd->status); writew(0, &pRbd->data_length); }
@@ -387,12 +412,25 @@ static int fec_open(struct eth_device *edev) { struct fec_priv *fec = (struct fec_priv *)edev->priv; int speed; + uint32_t addr, size; + int i;
debug("fec_open: fec_open(dev)\n"); /* full-duplex, heartbeat disabled */ writel(1 << 2, &fec->eth->x_cntrl); fec->rbd_index = 0;
+ /* Invalidate all descriptors */ + for (i = 0; i < FEC_RBD_NUM - 1; i++) + fec_rbd_clean(0, &fec->rbd_base[i]); + fec_rbd_clean(1, &fec->rbd_base[i]); + + /* Flush the descriptors into RAM */ + size = roundup(FEC_RBD_NUM * sizeof(struct fec_bd), + CONFIG_FEC_ALIGN); + addr = (uint32_t)fec->rbd_base; + flush_dcache_range(addr, addr + size); + #ifdef FEC_QUIRK_ENET_MAC /* Enable ENET HW endian SWAP */ writel(readl(&fec->eth->ecntrl) | FEC_ECNTRL_DBSWAP, @@ -478,38 +516,55 @@ static int fec_open(struct eth_device *edev)
static int fec_init(struct eth_device *dev, bd_t* bd) { - uint32_t base; struct fec_priv *fec = (struct fec_priv *)dev->priv; uint32_t mib_ptr = (uint32_t)&fec->eth->rmon_t_drop; uint32_t rcntrl; - int i; + uint32_t size; + int i, ret;
/* Initialize MAC address */ fec_set_hwaddr(dev);
/* - * reserve memory for both buffer descriptor chains at once - * Datasheet forces the startaddress of each chain is 16 byte - * aligned + * Allocate transmit descriptors, there are two in total. This + * allocation respects cache alignment. */ - if (fec->base_ptr == NULL) - fec->base_ptr = malloc((2 + FEC_RBD_NUM) * - sizeof(struct fec_bd) + DB_ALIGNMENT); - base = (uint32_t)fec->base_ptr; - if (!base) { - puts("fec_mxc: not enough malloc memory\n"); - return -ENOMEM; + if (!fec->tbd_base) { + size = roundup(2 * sizeof(struct fec_bd), + CONFIG_FEC_ALIGN); + fec->tbd_base = memalign(CONFIG_FEC_ALIGN, size); + if (!fec->tbd_base) { + ret = -ENOMEM; + goto err1; + } + memset(fec->tbd_base, 0, size); + fec_tbd_init(fec); + flush_dcache_range((unsigned)fec->tbd_base, size); } - memset((void *)base, 0, (2 + FEC_RBD_NUM) * - sizeof(struct fec_bd) + DB_ALIGNMENT); - base += (DB_ALIGNMENT-1); - base &= ~(DB_ALIGNMENT-1); - - fec->rbd_base = (struct fec_bd *)base;
- base += FEC_RBD_NUM * sizeof(struct fec_bd); - - fec->tbd_base = (struct fec_bd *)base; + /* + * Allocate receive descriptors. This allocation respects cache + * alignment. + */ + if (!fec->rbd_base) { + size = roundup(FEC_RBD_NUM * sizeof(struct fec_bd), + CONFIG_FEC_ALIGN); + fec->rbd_base = memalign(CONFIG_FEC_ALIGN, size); + if (!fec->rbd_base) { + ret = -ENOMEM; + goto err2; + } + memset(fec->rbd_base, 0, size); + /* + * Initialize RxBD ring + */ + if (fec_rbd_init(fec, FEC_RBD_NUM, FEC_MAX_PKT_SIZE) < 0) { + ret = -ENOMEM; + goto err3; + } + flush_dcache_range((unsigned)fec->rbd_base, + (unsigned)fec->rbd_base + size); + }
/* * Set interrupt mask register @@ -566,23 +621,19 @@ static int fec_init(struct eth_device *dev, bd_t* bd) writel((uint32_t)fec->tbd_base, &fec->eth->etdsr); writel((uint32_t)fec->rbd_base, &fec->eth->erdsr);
- /* - * Initialize RxBD/TxBD rings - */ - if (fec_rbd_init(fec, FEC_RBD_NUM, FEC_MAX_PKT_SIZE) < 0) { - free(fec->base_ptr); - fec->base_ptr = NULL; - return -ENOMEM; - } - fec_tbd_init(fec); - - #ifndef CONFIG_PHYLIB if (fec->xcv_type != SEVENWIRE) miiphy_restart_aneg(dev); #endif fec_open(dev); return 0; + +err3: + free(fec->rbd_base); +err2: + free(fec->tbd_base); +err1: + return ret; }
/** @@ -631,9 +682,11 @@ static void fec_halt(struct eth_device *dev) * @param[in] length Data count in bytes * @return 0 on success */ -static int fec_send(struct eth_device *dev, volatile void* packet, int length) +static int fec_send(struct eth_device *dev, void *packet, int length) { unsigned int status; + uint32_t size; + uint32_t addr;
/* * This routine transmits one frame. This routine only accepts @@ -650,15 +703,21 @@ static int fec_send(struct eth_device *dev, volatile void* packet, int length) }
/* - * Setup the transmit buffer - * Note: We are always using the first buffer for transmission, - * the second will be empty and only used to stop the DMA engine + * Setup the transmit buffer. We are always using the first buffer for + * transmission, the second will be empty and only used to stop the DMA + * engine. We also flush the packet to RAM here to avoid cache trouble. */ #ifdef CONFIG_FEC_MXC_SWAP_PACKET swap_packet((uint32_t *)packet, length); #endif + + addr = (uint32_t)packet; + size = roundup(length, CONFIG_FEC_ALIGN); + flush_dcache_range(addr, addr + size); + writew(length, &fec->tbd_base[fec->tbd_index].data_length); - writel((uint32_t)packet, &fec->tbd_base[fec->tbd_index].data_pointer); + writel(addr, &fec->tbd_base[fec->tbd_index].data_pointer); + /* * update BD's status now * This block: @@ -672,16 +731,30 @@ static int fec_send(struct eth_device *dev, volatile void* packet, int length) writew(status, &fec->tbd_base[fec->tbd_index].status);
/* + * Flush data cache. This code flushes both TX descriptors to RAM. + * After this code, the descriptors will be safely in RAM and we + * can start DMA. + */ + size = roundup(2 * sizeof(struct fec_bd), CONFIG_FEC_ALIGN); + addr = (uint32_t)fec->tbd_base; + flush_dcache_range(addr, addr + size); + + /* * Enable SmartDMA transmit task */ fec_tx_task_enable(fec);
/* - * wait until frame is sent . + * Wait until frame is sent. On each turn of the wait cycle, we must + * invalidate data cache to see what's really in RAM. Also, we need + * barrier here. */ + invalidate_dcache_range(addr, addr + size); while (readw(&fec->tbd_base[fec->tbd_index].status) & FEC_TBD_READY) { udelay(1); + invalidate_dcache_range(addr, addr + size); } + debug("fec_send: status 0x%x index %d\n", readw(&fec->tbd_base[fec->tbd_index].status), fec->tbd_index); @@ -707,6 +780,8 @@ static int fec_recv(struct eth_device *dev) int frame_length, len = 0; struct nbuf *frame; uint16_t bd_status; + uint32_t addr, size; + int i; uchar buff[FEC_MAX_PKT_SIZE];
/* @@ -737,8 +812,23 @@ static int fec_recv(struct eth_device *dev) }
/* - * ensure reading the right buffer status + * Read the buffer status. Before the status can be read, the data cache + * must be invalidated, because the data in RAM might have been changed + * by DMA. The descriptors are properly aligned to cachelines so there's + * no need to worry they'd overlap. + * + * WARNING: By invalidating the descriptor here, we also invalidate + * the descriptors surrounding this one. Therefore we can NOT change the + * contents of this descriptor nor the surrounding ones. The problem is + * that in order to mark the descriptor as processed, we need to change + * the descriptor. The solution is to mark the whole cache line when all + * descriptors in the cache line are processed. */ + addr = (uint32_t)rbd; + addr &= ~(CONFIG_FEC_ALIGN - 1); + size = roundup(sizeof(struct fec_bd), CONFIG_FEC_ALIGN); + invalidate_dcache_range(addr, addr + size); + bd_status = readw(&rbd->status); debug("fec_recv: status 0x%x\n", bd_status);
@@ -751,6 +841,13 @@ static int fec_recv(struct eth_device *dev) frame = (struct nbuf *)readl(&rbd->data_pointer); frame_length = readw(&rbd->data_length) - 4; /* + * Invalidate data cache over the buffer + */ + addr = (uint32_t)frame; + size = roundup(frame_length, CONFIG_FEC_ALIGN); + invalidate_dcache_range(addr, addr + size); + + /* * Fill the buffer and pass it to upper layers */ #ifdef CONFIG_FEC_MXC_SWAP_PACKET @@ -765,11 +862,25 @@ static int fec_recv(struct eth_device *dev) (ulong)rbd->data_pointer, bd_status); } + /* - * free the current buffer, restart the engine - * and move forward to the next buffer + * Free the current buffer, restart the engine and move forward + * to the next buffer. Here we check if the whole cacheline of + * descriptors was already processed and if so, we mark it free + * as whole. */ - fec_rbd_clean(fec->rbd_index == (FEC_RBD_NUM - 1) ? 1 : 0, rbd); + size = RXDESC_PER_CACHELINE - 1; + if ((fec->rbd_index & size) == size) { + i = fec->rbd_index - size; + addr = (uint32_t)&fec->rbd_base[i]; + for (; i <= fec->rbd_index ; i++) { + fec_rbd_clean(i == (FEC_RBD_NUM - 1), + &fec->rbd_base[i]); + } + flush_dcache_range(addr, + addr + CONFIG_FEC_ALIGN); + } + fec_rx_task_enable(fec); fec->rbd_index = (fec->rbd_index + 1) % FEC_RBD_NUM; } diff --git a/drivers/net/fec_mxc.h b/drivers/net/fec_mxc.h index 2eb7803..852b2e0 100644 --- a/drivers/net/fec_mxc.h +++ b/drivers/net/fec_mxc.h @@ -234,22 +234,6 @@ struct ethernet_regs { #endif
/** - * @brief Descriptor buffer alignment - * - * i.MX27 requires a 16 byte alignment (but for the first element only) - */ -#define DB_ALIGNMENT 16 - -/** - * @brief Data buffer alignment - * - * i.MX27 requires a four byte alignment for transmit and 16 bits - * alignment for receive so take 16 - * Note: Valid for member data_pointer in struct buffer_descriptor - */ -#define DB_DATA_ALIGNMENT 16 - -/** * @brief Receive & Transmit Buffer Descriptor definitions * * Note: The first BD must be aligned (see DB_ALIGNMENT) @@ -282,8 +266,7 @@ struct fec_priv { struct fec_bd *tbd_base; /* TBD ring */ int tbd_index; /* next transmit BD to write */ bd_t *bd; - void *rdb_ptr; - void *base_ptr; + uint8_t *tdb_ptr; int dev_id; int phy_id; struct mii_dev *bus;

On Tuesday 13 March 2012 10:04:31 Eric Nelson wrote:
--- a/drivers/net/fec_mxc.c +++ b/drivers/net/fec_mxc.c
+#if ARCH_DMA_MINALIGN > CONFIG_SYS_CACHELINE_SIZE +#define CONFIG_FEC_ALIGN ARCH_DMA_MINALIGN +#else +#define CONFIG_FEC_ALIGN CONFIG_SYS_CACHELINE_SIZE +#endif
i don't think this is something you should be checking. if this is an actual problem (and i don't think it is), it's something we should handle in common code. if you need to dma from memory, then use ARCH_DMA_MINALIGN.
+#error "CONFIG_FEC_ALIGN must be multiple of 16!" +#error "PKTALIGN must be multiple of CONFIG_FEC_ALIGN!"
please don't use tabs after #define/#error/etc... just one space -mike

Thanks Mike,
On 03/13/2012 07:41 AM, Mike Frysinger wrote:
On Tuesday 13 March 2012 10:04:31 Eric Nelson wrote:
--- a/drivers/net/fec_mxc.c +++ b/drivers/net/fec_mxc.c
+#if ARCH_DMA_MINALIGN> CONFIG_SYS_CACHELINE_SIZE +#define CONFIG_FEC_ALIGN ARCH_DMA_MINALIGN +#else +#define CONFIG_FEC_ALIGN CONFIG_SYS_CACHELINE_SIZE +#endif
i don't think this is something you should be checking. if this is an actual problem (and i don't think it is), it's something we should handle in common code. if you need to dma from memory, then use ARCH_DMA_MINALIGN.
Marek, you've mentioned some restrictions for other i.MX devices.
Are you aware of any problem collapsing this?
Note that other CPUs will need to have CONFIG_SYS_CACHELINE_SIZE to prevent the default of 64.
+#error "CONFIG_FEC_ALIGN must be multiple of 16!" +#error "PKTALIGN must be multiple of CONFIG_FEC_ALIGN!"
please don't use tabs after #define/#error/etc... just one space
Ok. I'll address in V4.

On Tuesday 13 March 2012 14:42:29 Eric Nelson wrote:
On 03/13/2012 07:41 AM, Mike Frysinger wrote:
On Tuesday 13 March 2012 10:04:31 Eric Nelson wrote:
--- a/drivers/net/fec_mxc.c +++ b/drivers/net/fec_mxc.c
+#if ARCH_DMA_MINALIGN> CONFIG_SYS_CACHELINE_SIZE +#define CONFIG_FEC_ALIGN ARCH_DMA_MINALIGN +#else +#define CONFIG_FEC_ALIGN CONFIG_SYS_CACHELINE_SIZE +#endif
i don't think this is something you should be checking. if this is an actual problem (and i don't think it is), it's something we should handle in common code. if you need to dma from memory, then use ARCH_DMA_MINALIGN.
Marek, you've mentioned some restrictions for other i.MX devices.
Are you aware of any problem collapsing this?
Note that other CPUs will need to have CONFIG_SYS_CACHELINE_SIZE to prevent the default of 64.
and those cores should be making sure that ARCH_DMA_MINALIGN is sufficient to handle those larger cache lines. this define provides two meanings: minimum alignment for the DMA itself, and for sanely flushing the cache on that dma buffer. so there should be no situation where ARCH_DMA_MINALIGN is smaller than the cacheline size.
the few boards i see defining CONFIG_SYS_CACHELINE_SIZE to 64 are all ARM based, and ARM sets ARCH_DMA_MINALIGN to CONFIG_SYS_CACHELINE_SIZE if it's defined -mike

Dear Mike Frysinger,
On Tuesday 13 March 2012 14:42:29 Eric Nelson wrote:
On 03/13/2012 07:41 AM, Mike Frysinger wrote:
On Tuesday 13 March 2012 10:04:31 Eric Nelson wrote:
--- a/drivers/net/fec_mxc.c +++ b/drivers/net/fec_mxc.c
+#if ARCH_DMA_MINALIGN> CONFIG_SYS_CACHELINE_SIZE +#define CONFIG_FEC_ALIGN ARCH_DMA_MINALIGN +#else +#define CONFIG_FEC_ALIGN CONFIG_SYS_CACHELINE_SIZE +#endif
i don't think this is something you should be checking. if this is an actual problem (and i don't think it is), it's something we should handle in common code. if you need to dma from memory, then use ARCH_DMA_MINALIGN.
Marek, you've mentioned some restrictions for other i.MX devices.
Are you aware of any problem collapsing this?
Note that other CPUs will need to have CONFIG_SYS_CACHELINE_SIZE to prevent the default of 64.
and those cores should be making sure that ARCH_DMA_MINALIGN is sufficient to handle those larger cache lines. this define provides two meanings: minimum alignment for the DMA itself, and for sanely flushing the cache on that dma buffer. so there should be no situation where ARCH_DMA_MINALIGN is smaller than the cacheline size.
the few boards i see defining CONFIG_SYS_CACHELINE_SIZE to 64 are all ARM based, and ARM sets ARCH_DMA_MINALIGN to CONFIG_SYS_CACHELINE_SIZE if it's defined -mike
Eric, can you check also PPC /wrt to this? This driver is also used on PPC.
Best regards, Marek Vasut

On 03/13/2012 06:43 PM, Marek Vasut wrote:
Dear Mike Frysinger,
On Tuesday 13 March 2012 14:42:29 Eric Nelson wrote:
On 03/13/2012 07:41 AM, Mike Frysinger wrote:
On Tuesday 13 March 2012 10:04:31 Eric Nelson wrote:
--- a/drivers/net/fec_mxc.c +++ b/drivers/net/fec_mxc.c
+#if ARCH_DMA_MINALIGN> CONFIG_SYS_CACHELINE_SIZE +#define CONFIG_FEC_ALIGN ARCH_DMA_MINALIGN +#else +#define CONFIG_FEC_ALIGN CONFIG_SYS_CACHELINE_SIZE +#endif
i don't think this is something you should be checking. if this is an actual problem (and i don't think it is), it's something we should handle in common code. if you need to dma from memory, then use ARCH_DMA_MINALIGN.
Marek, you've mentioned some restrictions for other i.MX devices.
Are you aware of any problem collapsing this?
Note that other CPUs will need to have CONFIG_SYS_CACHELINE_SIZE to prevent the default of 64.
and those cores should be making sure that ARCH_DMA_MINALIGN is sufficient to handle those larger cache lines. this define provides two meanings: minimum alignment for the DMA itself, and for sanely flushing the cache on that dma buffer. so there should be no situation where ARCH_DMA_MINALIGN is smaller than the cacheline size.
the few boards i see defining CONFIG_SYS_CACHELINE_SIZE to 64 are all ARM based, and ARM sets ARCH_DMA_MINALIGN to CONFIG_SYS_CACHELINE_SIZE if it's defined -mike
Eric, can you check also PPC /wrt to this? This driver is also used on PPC.
Hi Marek,
I'm not seeing any reference to FEC_MXC on PPC. All boards that use it appear to be i.MX:
~/u-boot-imx6$ grep -w CONFIG_FEC_MXC include/configs/*.h include/configs/flea3.h:#define CONFIG_FEC_MXC include/configs/imx27lite-common.h:#define CONFIG_FEC_MXC include/configs/m28evk.h:#define CONFIG_FEC_MXC include/configs/mx25pdk.h:#define CONFIG_FEC_MXC include/configs/mx28evk.h:#define CONFIG_FEC_MXC include/configs/mx35pdk.h:#define CONFIG_FEC_MXC include/configs/mx51evk.h:#define CONFIG_FEC_MXC include/configs/mx53evk.h:#define CONFIG_FEC_MXC include/configs/mx53loco.h:#define CONFIG_FEC_MXC include/configs/mx53smd.h:#define CONFIG_FEC_MXC include/configs/mx6qarm2.h:#define CONFIG_FEC_MXC include/configs/mx6qsabrelite.h:#define CONFIG_FEC_MXC include/configs/tx25.h:#define CONFIG_FEC_MXC include/configs/vision2.h:#define CONFIG_FEC_MXC include/configs/zmx25.h:#define CONFIG_FEC_MXC
Most of the PPC devices seem to have values of 16 or 32 for ARCH_DMA_MINALIGN, but PPC64BRIDGE and E500MC would have a problem if their drivers don't implement a bounce buffer because PKTALIGN < ARCH_DMA_MINALIGN.
(see arch/powerpc/include/asm/cache.h)
This condition is properly tested for in fec_mxc.c.
Regards,
Eric

On Wednesday 14 March 2012 01:12:38 Eric Nelson wrote:
Most of the PPC devices seem to have values of 16 or 32 for ARCH_DMA_MINALIGN, but PPC64BRIDGE and E500MC would have a problem if their drivers don't implement a bounce buffer because PKTALIGN < ARCH_DMA_MINALIGN.
(see arch/powerpc/include/asm/cache.h)
This condition is properly tested for in fec_mxc.c.
so fix this in common code instead of hacking around it in individual drivers. seems to me that PKTALIGN should be defined to ARCH_DMA_MINALIGN and ultimately removed. -mike

On 03/13/2012 10:41 PM, Mike Frysinger wrote:
On Wednesday 14 March 2012 01:12:38 Eric Nelson wrote:
Most of the PPC devices seem to have values of 16 or 32 for ARCH_DMA_MINALIGN, but PPC64BRIDGE and E500MC would have a problem if their drivers don't implement a bounce buffer because PKTALIGN< ARCH_DMA_MINALIGN.
(see arch/powerpc/include/asm/cache.h)
This condition is properly tested for in fec_mxc.c.
so fix this in common code instead of hacking around it in individual drivers. seems to me that PKTALIGN should be defined to ARCH_DMA_MINALIGN and ultimately removed. -mike
Hi Mike,
I'm not in a position to test against MAKEALL, but it appears that all architectures have cache.h and define ARCH_DMA_MINALIGN, so it should be trivially easy to fix PKTALIGN to be at least ARCH_DMA_MINALIGN as shown below.
PKTSIZE_ALIGN seems safe for all architectures at 1536.
Note that this will reduce the value to 16 for some PPC devices, but I haven't found any place that this would break things.
Is this what you're after?
Are you in a position to run MAKEALL or can you advise about the requirements?
Please advise,
Eric
~/u-boot-imx6$ git diff diff --git a/include/net.h b/include/net.h index e4d42c2..ff428d0 100644 --- a/include/net.h +++ b/include/net.h @@ -16,6 +16,7 @@ #include <commproc.h> #endif /* CONFIG_8xx */
+#include <asm/cache.h> #include <asm/byteorder.h> /* for nton* / ntoh* stuff */
@@ -31,7 +32,7 @@ # define PKTBUFSRX 4 #endif
-#define PKTALIGN 32 +#define PKTALIGN ARCH_DMA_MINALIGN
/* IPv4 addresses are always 32 bits in size */ typedef u32 IPaddr_t;

On Wednesday 14 March 2012 15:12:10 Eric Nelson wrote:
On 03/13/2012 10:41 PM, Mike Frysinger wrote:
On Wednesday 14 March 2012 01:12:38 Eric Nelson wrote:
Most of the PPC devices seem to have values of 16 or 32 for ARCH_DMA_MINALIGN, but PPC64BRIDGE and E500MC would have a problem if their drivers don't implement a bounce buffer because PKTALIGN< ARCH_DMA_MINALIGN.
(see arch/powerpc/include/asm/cache.h)
This condition is properly tested for in fec_mxc.c.
so fix this in common code instead of hacking around it in individual drivers. seems to me that PKTALIGN should be defined to ARCH_DMA_MINALIGN and ultimately removed.
I'm not in a position to test against MAKEALL, but it appears that all architectures have cache.h and define ARCH_DMA_MINALIGN
ARCH_DMA_MINALIGN is required. if an arch/board omits it, they are broken and you need not worry about it. we already have common code requiring int.
--- a/include/net.h +++ b/include/net.h
-#define PKTALIGN 32 +#define PKTALIGN ARCH_DMA_MINALIGN
looks fine to me -mike

On 03/14/2012 01:33 PM, Mike Frysinger wrote:
On Wednesday 14 March 2012 15:12:10 Eric Nelson wrote:
On 03/13/2012 10:41 PM, Mike Frysinger wrote:
On Wednesday 14 March 2012 01:12:38 Eric Nelson wrote:
Most of the PPC devices seem to have values of 16 or 32 for ARCH_DMA_MINALIGN, but PPC64BRIDGE and E500MC would have a problem if their drivers don't implement a bounce buffer because PKTALIGN< ARCH_DMA_MINALIGN.
(see arch/powerpc/include/asm/cache.h)
This condition is properly tested for in fec_mxc.c.
so fix this in common code instead of hacking around it in individual drivers. seems to me that PKTALIGN should be defined to ARCH_DMA_MINALIGN and ultimately removed.
I'm not in a position to test against MAKEALL, but it appears that all architectures have cache.h and define ARCH_DMA_MINALIGN
ARCH_DMA_MINALIGN is required. if an arch/board omits it, they are broken and you need not worry about it. we already have common code requiring int.
Sounds good.
--- a/include/net.h +++ b/include/net.h
-#define PKTALIGN 32 +#define PKTALIGN ARCH_DMA_MINALIGN
looks fine to me -mike
You want I should send a formal patch?
Should I consider "looks fine" to be an ack?
If so, I'll also send an update (V5) to fec_mxc that removes the check on PKTALIGN.
Please advise,
Eric

Dear Eric Nelson,
On 03/13/2012 06:43 PM, Marek Vasut wrote:
Dear Mike Frysinger,
On Tuesday 13 March 2012 14:42:29 Eric Nelson wrote:
On 03/13/2012 07:41 AM, Mike Frysinger wrote:
On Tuesday 13 March 2012 10:04:31 Eric Nelson wrote:
--- a/drivers/net/fec_mxc.c +++ b/drivers/net/fec_mxc.c
+#if ARCH_DMA_MINALIGN> CONFIG_SYS_CACHELINE_SIZE +#define CONFIG_FEC_ALIGN ARCH_DMA_MINALIGN +#else +#define CONFIG_FEC_ALIGN CONFIG_SYS_CACHELINE_SIZE +#endif
i don't think this is something you should be checking. if this is an actual problem (and i don't think it is), it's something we should handle in common code. if you need to dma from memory, then use ARCH_DMA_MINALIGN.
Marek, you've mentioned some restrictions for other i.MX devices.
Are you aware of any problem collapsing this?
Note that other CPUs will need to have CONFIG_SYS_CACHELINE_SIZE to prevent the default of 64.
and those cores should be making sure that ARCH_DMA_MINALIGN is sufficient to handle those larger cache lines. this define provides two meanings: minimum alignment for the DMA itself, and for sanely flushing the cache on that dma buffer. so there should be no situation where ARCH_DMA_MINALIGN is smaller than the cacheline size.
the few boards i see defining CONFIG_SYS_CACHELINE_SIZE to 64 are all ARM based, and ARM sets ARCH_DMA_MINALIGN to CONFIG_SYS_CACHELINE_SIZE if it's defined -mike
Eric, can you check also PPC /wrt to this? This driver is also used on PPC.
Hi Marek,
I'm not seeing any reference to FEC_MXC on PPC. All boards that use it appear to be i.MX:
I see, my mistake. Though the PPC systems should be converted ;-)
~/u-boot-imx6$ grep -w CONFIG_FEC_MXC include/configs/*.h include/configs/flea3.h:#define CONFIG_FEC_MXC include/configs/imx27lite-common.h:#define CONFIG_FEC_MXC include/configs/m28evk.h:#define CONFIG_FEC_MXC include/configs/mx25pdk.h:#define CONFIG_FEC_MXC include/configs/mx28evk.h:#define CONFIG_FEC_MXC include/configs/mx35pdk.h:#define CONFIG_FEC_MXC include/configs/mx51evk.h:#define CONFIG_FEC_MXC include/configs/mx53evk.h:#define CONFIG_FEC_MXC include/configs/mx53loco.h:#define CONFIG_FEC_MXC include/configs/mx53smd.h:#define CONFIG_FEC_MXC include/configs/mx6qarm2.h:#define CONFIG_FEC_MXC include/configs/mx6qsabrelite.h:#define CONFIG_FEC_MXC include/configs/tx25.h:#define CONFIG_FEC_MXC include/configs/vision2.h:#define CONFIG_FEC_MXC include/configs/zmx25.h:#define CONFIG_FEC_MXC
Most of the PPC devices seem to have values of 16 or 32 for ARCH_DMA_MINALIGN, but PPC64BRIDGE and E500MC would have a problem if their drivers don't implement a bounce buffer because PKTALIGN < ARCH_DMA_MINALIGN.
Awww, leave the check there then, those PPC systems should produce warning when they will be flipped so this is not forgotten.
(see arch/powerpc/include/asm/cache.h)
This condition is properly tested for in fec_mxc.c.
Regards,
Eric
Best regards, Marek Vasut

Dear Eric Nelson,
Thanks Mike,
On 03/13/2012 07:41 AM, Mike Frysinger wrote:
On Tuesday 13 March 2012 10:04:31 Eric Nelson wrote:
--- a/drivers/net/fec_mxc.c +++ b/drivers/net/fec_mxc.c
+#if ARCH_DMA_MINALIGN> CONFIG_SYS_CACHELINE_SIZE +#define CONFIG_FEC_ALIGN ARCH_DMA_MINALIGN +#else +#define CONFIG_FEC_ALIGN CONFIG_SYS_CACHELINE_SIZE +#endif
i don't think this is something you should be checking. if this is an actual problem (and i don't think it is), it's something we should handle in common code. if you need to dma from memory, then use ARCH_DMA_MINALIGN.
Marek, you've mentioned some restrictions for other i.MX devices.
Are you aware of any problem collapsing this?
Well yes, there exists a CPU which has 32byte cacheline, but FEC such that needs DMA areas aligned to 16 bytes. That's why you need higher of those two to be safe.
Note that other CPUs will need to have CONFIG_SYS_CACHELINE_SIZE to prevent the default of 64.
Agreed
+#error "CONFIG_FEC_ALIGN must be multiple of 16!" +#error "PKTALIGN must be multiple of CONFIG_FEC_ALIGN!"
please don't use tabs after #define/#error/etc... just one space
Ok. I'll address in V4.
This is actually my work, Mike, I just love it neatly aligned with tabs :-p
Best regards, Marek Vasut

On 03/13/2012 07:04 AM, Eric Nelson wrote:
ensure that transmit and receive buffers are cache-line aligned invalidate cache for each packet as received update receive buffer descriptors one cache line at a time flush cache before transmitting
Original patch by Marek: http://lists.denx.de/pipermail/u-boot/2012-February/117695.html
Signed-off-by: Eric Nelsoneric.nelson@boundarydevices.com
V2 addresses some concerns from the ML:
- Use readl()/writel() instead of mapped data structure accesses
- Wrong comment style
-&rbd_base[0] == rbd_base removed 'volatile' from fec_send().
<snip>
diff --git a/drivers/net/fec_mxc.c b/drivers/net/fec_mxc.c index 1fdd071..94a3927 100644 --- a/drivers/net/fec_mxc.c +++ b/drivers/net/fec_mxc.c
...
@@ -631,9 +682,11 @@ static void fec_halt(struct eth_device *dev)
- @param[in] length Data count in bytes
- @return 0 on success
*/ -static int fec_send(struct eth_device *dev, volatile void* packet, int length) +static int fec_send(struct eth_device *dev, void *packet, int length) { unsigned int status;
I made this change to keep checkpatch happy (it doesn't like volatile), but the declaration of struct eth_device in include/net.h seems to want the volatile:
int (*send) (struct eth_device*, volatile void* packet, int length);
I'd rather have a checkpatch warning than a compiler warning, so I'll fix this in V4...

Dear Eric Nelson,
On 03/13/2012 07:04 AM, Eric Nelson wrote:
ensure that transmit and receive buffers are cache-line aligned
invalidate cache for each packet as received
update receive buffer descriptors one cache line at a time
flush cache before transmitting
Original patch by Marek: http://lists.denx.de/pipermail/u-boot/2012-February/117695.html
Signed-off-by: Eric Nelsoneric.nelson@boundarydevices.com
V2 addresses some concerns from the ML:
Use readl()/writel() instead of mapped data structure
accesses
Wrong comment style
-&rbd_base[0] == rbd_base removed 'volatile' from fec_send().
<snip>
diff --git a/drivers/net/fec_mxc.c b/drivers/net/fec_mxc.c index 1fdd071..94a3927 100644 --- a/drivers/net/fec_mxc.c +++ b/drivers/net/fec_mxc.c
...
@@ -631,9 +682,11 @@ static void fec_halt(struct eth_device *dev)
- @param[in] length Data count in bytes
- @return 0 on success
*/
-static int fec_send(struct eth_device *dev, volatile void* packet, int length) +static int fec_send(struct eth_device *dev, void *packet, int length)
{
unsigned int status;
I made this change to keep checkpatch happy (it doesn't like volatile), but the declaration of struct eth_device in include/net.h seems to want the volatile:
int (*send) (struct eth_device*, volatile void* packet, int length);
I'd rather have a checkpatch warning than a compiler warning, so I'll fix this in V4...
I believe the volatile is there for a reason (not a reason meaningful for this device though). I believe on some boards, this was used to avoid some cache trouble or such.
Best regards, Marek Vasut

Dear Eric Nelson,
ensure that transmit and receive buffers are cache-line aligned invalidate cache for each packet as received update receive buffer descriptors one cache line at a time flush cache before transmitting
Original patch by Marek: http://lists.denx.de/pipermail/u-boot/2012-February/117695.html
Signed-off-by: Eric Nelson eric.nelson@boundarydevices.com
V2 addresses some concerns from the ML:
- Use readl()/writel() instead of mapped data structure accesses
- Wrong comment style
- &rbd_base[0] == rbd_base
removed 'volatile' from fec_send().
V3 updates from ML (and Marek): consolidated CONFIG_FEC_DATA_ALIGNMENT and CONFIG_FEC_DESC_ALIGNMENT added cache flushes after initialization of TBD/RBD
drivers/net/fec_mxc.c | 265 +++++++++++++++++++++++++++++++++++-------------- drivers/net/fec_mxc.h | 19 +---- 2 files changed, 189 insertions(+), 95 deletions(-)
diff --git a/drivers/net/fec_mxc.c b/drivers/net/fec_mxc.c index 1fdd071..94a3927 100644 --- a/drivers/net/fec_mxc.c +++ b/drivers/net/fec_mxc.c @@ -50,6 +50,24 @@ DECLARE_GLOBAL_DATA_PTR; #define CONFIG_FEC_MXC_SWAP_PACKET #endif
+#if ARCH_DMA_MINALIGN > CONFIG_SYS_CACHELINE_SIZE +#define CONFIG_FEC_ALIGN ARCH_DMA_MINALIGN +#else +#define CONFIG_FEC_ALIGN CONFIG_SYS_CACHELINE_SIZE +#endif
Check PPC and let's go with ARCH_DMA_MINALIGN
+#define RXDESC_PER_CACHELINE (CONFIG_FEC_ALIGN/sizeof(struct fec_bd))
+/* Check various alignment issues at compile time */ +#if ((CONFIG_FEC_ALIGN < 16) || (CONFIG_FEC_ALIGN % 16 != 0)) +#error "CONFIG_FEC_ALIGN must be multiple of 16!" +#endif
+#if ((PKTALIGN < CONFIG_FEC_ALIGN) || \
- (PKTALIGN % CONFIG_FEC_ALIGN != 0))
+#error "PKTALIGN must be multiple of CONFIG_FEC_ALIGN!" +#endif
We should keep these checks in case some obscure platform appears.
Otherwise, we're almost there Eric! Great work !!
Best regards, Marek Vasut
participants (3)
-
Eric Nelson
-
Marek Vasut
-
Mike Frysinger