[PATCH 1/5] net: eth-uclass: guard against reentrant eth_init()/eth_halt() calls

With netconsole, any log message can result in an eth_init(), possibly causing an reentrant call into eth_init() if a driver's ops print anything:
eth_init() -> driver.start() -> printf() -> netconsole -> eth_init() eth_halt() -> driver.stop() -> printf() -> netconsole -> eth_init()
Rather than expecting every single Ethernet driver to handle this case, prevent the reentrant calls in eth_init() and eth_halt().
The issue was noticed on an AM62x board, where a bootm after simultaneous netconsole and TFTP would result in a crash.
Signed-off-by: Matthias Schiffer matthias.schiffer@ew.tq-group.com --- net/eth-uclass.c | 40 ++++++++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 8 deletions(-)
diff --git a/net/eth-uclass.c b/net/eth-uclass.c index 3d0ec91dfa4..193218a328b 100644 --- a/net/eth-uclass.c +++ b/net/eth-uclass.c @@ -48,6 +48,8 @@ struct eth_uclass_priv {
/* eth_errno - This stores the most recent failure code from DM functions */ static int eth_errno; +/* Are we currently in eth_init() or eth_halt()? */ +static bool in_init_halt;
/* board-specific Ethernet Interface initializations. */ __weak int board_interface_eth_init(struct udevice *dev, @@ -285,11 +287,19 @@ U_BOOT_ENV_CALLBACK(ethaddr, on_ethaddr);
int eth_init(void) { - char *ethact = env_get("ethact"); - char *ethrotate = env_get("ethrotate"); struct udevice *current = NULL; struct udevice *old_current; int ret = -ENODEV; + char *ethrotate; + char *ethact; + + if (in_init_halt) + return -EBUSY; + + in_init_halt = true; + + ethact = env_get("ethact"); + ethrotate = env_get("ethrotate");
/* * When 'ethrotate' variable is set to 'no' and 'ethact' variable @@ -298,8 +308,10 @@ int eth_init(void) if ((ethrotate != NULL) && (strcmp(ethrotate, "no") == 0)) { if (ethact) { current = eth_get_dev_by_name(ethact); - if (!current) - return -EINVAL; + if (!current) { + ret = -EINVAL; + goto end; + } } }
@@ -307,7 +319,8 @@ int eth_init(void) current = eth_get_dev(); if (!current) { log_err("No ethernet found.\n"); - return -ENODEV; + ret = -ENODEV; + goto end; } }
@@ -324,7 +337,8 @@ int eth_init(void)
priv->state = ETH_STATE_ACTIVE; priv->running = true; - return 0; + ret = 0; + goto end; } } else { ret = eth_errno; @@ -344,6 +358,8 @@ int eth_init(void) current = eth_get_dev(); } while (old_current != current);
+end: + in_init_halt = false; return ret; }
@@ -352,17 +368,25 @@ void eth_halt(void) struct udevice *current; struct eth_device_priv *priv;
+ if (in_init_halt) + return; + + in_init_halt = true; + current = eth_get_dev(); if (!current) - return; + goto end;
priv = dev_get_uclass_priv(current); if (!priv || !priv->running) - return; + goto end;
eth_get_ops(current)->stop(current); priv->state = ETH_STATE_PASSIVE; priv->running = false; + +end: + in_init_halt = false; }
int eth_is_active(struct udevice *dev)

The eth-uclass state machine doesn't prevent imbalanced start()/stop() calls - to the contrary, it even provides eth_init_state_only() and eth_halt_state_only() functions that change the state without any calls into the driver. This means that the driver must be robust against duplicate start() and stop() calls as well as send/recv calls while the interface is down.
We decide not to print error messages but just to return an error in the latter case, as trying to send packets on a disabled interface commonly happens when the netconsole is still active after the Ethernet has been halted during bootm.
Signed-off-by: Matthias Schiffer matthias.schiffer@ew.tq-group.com --- drivers/net/ti/am65-cpsw-nuss.c | 9 +++++++++ 1 file changed, 9 insertions(+)
diff --git a/drivers/net/ti/am65-cpsw-nuss.c b/drivers/net/ti/am65-cpsw-nuss.c index b151e25d6a4..4a57e945a3a 100644 --- a/drivers/net/ti/am65-cpsw-nuss.c +++ b/drivers/net/ti/am65-cpsw-nuss.c @@ -328,6 +328,9 @@ static int am65_cpsw_start(struct udevice *dev) struct ti_udma_drv_chan_cfg_data *dma_rx_cfg_data; int ret, i;
+ if (common->started) + return 0; + ret = power_domain_on(&common->pwrdmn); if (ret) { dev_err(dev, "power_domain_on() failed %d\n", ret); @@ -488,6 +491,9 @@ static int am65_cpsw_send(struct udevice *dev, void *packet, int length) struct ti_udma_drv_packet_data packet_data; int ret;
+ if (!common->started) + return -ENETDOWN; + packet_data.pkt_type = AM65_CPSW_CPPI_PKT_TYPE; packet_data.dest_tag = priv->port_id; ret = dma_send(&common->dma_tx, packet, length, &packet_data); @@ -504,6 +510,9 @@ static int am65_cpsw_recv(struct udevice *dev, int flags, uchar **packetp) struct am65_cpsw_priv *priv = dev_get_priv(dev); struct am65_cpsw_common *common = priv->cpsw_common;
+ if (!common->started) + return -ENETDOWN; + /* try to receive a new packet */ return dma_receive(&common->dma_rx, (void **)packetp, NULL); }

The RX DMA channel has been requested at this point already, so it must be freed.
Signed-off-by: Matthias Schiffer matthias.schiffer@ew.tq-group.com --- drivers/net/ti/am65-cpsw-nuss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ti/am65-cpsw-nuss.c b/drivers/net/ti/am65-cpsw-nuss.c index 4a57e945a3a..65ade1afd05 100644 --- a/drivers/net/ti/am65-cpsw-nuss.c +++ b/drivers/net/ti/am65-cpsw-nuss.c @@ -362,7 +362,7 @@ static int am65_cpsw_start(struct udevice *dev) UDMA_RX_BUF_SIZE); if (ret) { dev_err(dev, "RX dma add buf failed %d\n", ret); - goto err_free_tx; + goto err_free_rx; } }

net.h is needed for PKTBUFSRX. Without this definition, the driver will always use 4 RX buffers, causing am65-cpsw-nuss initialization to fail when a higher number of buffers is requested.
Signed-off-by: Matthias Schiffer matthias.schiffer@ew.tq-group.com --- drivers/dma/ti/k3-udma.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c index ef3074aa13f..4e6f7f570c5 100644 --- a/drivers/dma/ti/k3-udma.c +++ b/drivers/dma/ti/k3-udma.c @@ -12,6 +12,7 @@ #include <asm/io.h> #include <asm/bitops.h> #include <malloc.h> +#include <net.h> #include <linux/bitops.h> #include <linux/dma-mapping.h> #include <linux/sizes.h>

Buffers must not have an unclean cache before being used for DMA - a pending write-back may corrupt the next dev-to-mem transfer otherwise.
This was consistently noticeable during long TFTP transfers, when an ARP request is answered by U-Boot in the middle of the transfer:
As U-Boot's arp_receive() reuses the receive buffer to prepare its reply packet, the beginning of one of the next incoming TFTP packets is overwritten by the ARP reply. The corrupted packet is ignored, but the TFTP transfer stalls for a few seconds until a timeout is detected and a retransmit is triggered.
Signed-off-by: Matthias Schiffer matthias.schiffer@ew.tq-group.com --- drivers/dma/ti/k3-udma.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c index 4e6f7f570c5..8f6d396653e 100644 --- a/drivers/dma/ti/k3-udma.c +++ b/drivers/dma/ti/k3-udma.c @@ -2678,6 +2678,9 @@ int udma_prepare_rcv_buf(struct dma *dma, void *dst, size_t size) cppi5_hdesc_set_pktlen(desc_rx, size); cppi5_hdesc_attach_buf(desc_rx, dma_dst, size, dma_dst, size);
+ invalidate_dcache_range((unsigned long)dma_dst, + (unsigned long)(dma_dst + size)); + flush_dcache_range((unsigned long)desc_rx, ALIGN((unsigned long)desc_rx + uc->config.hdesc_size, ARCH_DMA_MINALIGN));

Hello Matthias,
On Fri, 2024-04-26 at 10:02 +0200, Matthias Schiffer wrote:
Buffers must not have an unclean cache before being used for DMA - a pending write-back may corrupt the next dev-to-mem transfer otherwise.
This was consistently noticeable during long TFTP transfers, when an ARP request is answered by U-Boot in the middle of the transfer:
As U-Boot's arp_receive() reuses the receive buffer to prepare its reply packet, the beginning of one of the next incoming TFTP packets is overwritten by the ARP reply. The corrupted packet is ignored, but the TFTP transfer stalls for a few seconds until a timeout is detected and a retransmit is triggered.
Signed-off-by: Matthias Schiffer matthias.schiffer@ew.tq-group.com
thanks for the series! I've tested it on a custom AM625-based board issuing a a couple of pings:
am65_cpsw_nuss ethernet@8000000: K3 CPSW: nuss_ver: 0x6BA01103 cpsw_ver: 0x6BA81103 ale_ver: 0x00290105 Ports:2 Net: eth0: ethernet@8000000port@1 Autobooting in 3 seconds, press "<Esc><Esc>" to stop U-Boot# ping 192.168.251.10 ti-udma dma-controller@485c0000: k3_dmaring Ring probed rings:150, sci-dev-id:30 ti-udma dma-controller@485c0000: dma-ring-reset-quirk: disabled am65_cpsw_nuss_port ethernet@8000000port@1: K3 CPSW: rflow_id_base: 19 link up on port 1, speed 100, full duplex Using ethernet@8000000port@1 device host 192.168.251.10 is alive U-Boot# ping 192.168.251.10 am65_cpsw_nuss_port ethernet@8000000port@1: K3 CPSW: rflow_id_base: 19 link up on port 1, speed 100, full duplex Using ethernet@8000000port@1 device host 192.168.251.10 is alive
This sequence looks exactly the same with your patches and without. Therefore for the whole series:
Tested-by: Alexander Sverdlin alexander.sverdlin@siemens.com
drivers/dma/ti/k3-udma.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c index 4e6f7f570c5..8f6d396653e 100644 --- a/drivers/dma/ti/k3-udma.c +++ b/drivers/dma/ti/k3-udma.c @@ -2678,6 +2678,9 @@ int udma_prepare_rcv_buf(struct dma *dma, void *dst, size_t size) cppi5_hdesc_set_pktlen(desc_rx, size); cppi5_hdesc_attach_buf(desc_rx, dma_dst, size, dma_dst, size); + invalidate_dcache_range((unsigned long)dma_dst, + (unsigned long)(dma_dst + size));
flush_dcache_range((unsigned long)desc_rx, ALIGN((unsigned long)desc_rx + uc->config.hdesc_size, ARCH_DMA_MINALIGN));

On Fri, Apr 26, 2024 at 10:02:24AM +0200, Matthias Schiffer wrote:
With netconsole, any log message can result in an eth_init(), possibly causing an reentrant call into eth_init() if a driver's ops print anything:
eth_init() -> driver.start() -> printf() -> netconsole -> eth_init() eth_halt() -> driver.stop() -> printf() -> netconsole -> eth_init()
Rather than expecting every single Ethernet driver to handle this case, prevent the reentrant calls in eth_init() and eth_halt().
The issue was noticed on an AM62x board, where a bootm after simultaneous netconsole and TFTP would result in a crash.
Link: https://e2e.ti.com/support/processors-group/processors/f/processors-forum/13...
Signed-off-by: Matthias Schiffer matthias.schiffer@ew.tq-group.com
net/eth-uclass.c | 40 ++++++++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 8 deletions(-)
diff --git a/net/eth-uclass.c b/net/eth-uclass.c index 3d0ec91dfa4..193218a328b 100644 --- a/net/eth-uclass.c +++ b/net/eth-uclass.c @@ -48,6 +48,8 @@ struct eth_uclass_priv {
/* eth_errno - This stores the most recent failure code from DM functions */ static int eth_errno; +/* Are we currently in eth_init() or eth_halt()? */ +static bool in_init_halt;
/* board-specific Ethernet Interface initializations. */ __weak int board_interface_eth_init(struct udevice *dev, @@ -285,11 +287,19 @@ U_BOOT_ENV_CALLBACK(ethaddr, on_ethaddr);
int eth_init(void) {
- char *ethact = env_get("ethact");
- char *ethrotate = env_get("ethrotate"); struct udevice *current = NULL; struct udevice *old_current; int ret = -ENODEV;
char *ethrotate;
char *ethact;
if (in_init_halt)
return -EBUSY;
in_init_halt = true;
ethact = env_get("ethact");
ethrotate = env_get("ethrotate");
/*
- When 'ethrotate' variable is set to 'no' and 'ethact' variable
@@ -298,8 +308,10 @@ int eth_init(void) if ((ethrotate != NULL) && (strcmp(ethrotate, "no") == 0)) { if (ethact) { current = eth_get_dev_by_name(ethact);
if (!current)
return -EINVAL;
if (!current) {
ret = -EINVAL;
goto end;
} }}
@@ -307,7 +319,8 @@ int eth_init(void) current = eth_get_dev(); if (!current) { log_err("No ethernet found.\n");
return -ENODEV;
ret = -ENODEV;
} }goto end;
@@ -324,7 +337,8 @@ int eth_init(void)
priv->state = ETH_STATE_ACTIVE; priv->running = true;
return 0;
ret = 0;
goto end; } } else { ret = eth_errno;
@@ -344,6 +358,8 @@ int eth_init(void) current = eth_get_dev(); } while (old_current != current);
+end:
- in_init_halt = false; return ret;
}
@@ -352,17 +368,25 @@ void eth_halt(void) struct udevice *current; struct eth_device_priv *priv;
- if (in_init_halt)
return;
- in_init_halt = true;
- current = eth_get_dev(); if (!current)
return;
goto end;
priv = dev_get_uclass_priv(current); if (!priv || !priv->running)
return;
goto end;
eth_get_ops(current)->stop(current); priv->state = ETH_STATE_PASSIVE; priv->running = false;
+end:
- in_init_halt = false;
}
int eth_is_active(struct udevice *dev)
TQ-Systems GmbH | Mühlstraße 2, Gut Delling | 82229 Seefeld, Germany Amtsgericht München, HRB 105018 Geschäftsführer: Detlef Schneider, Rüdiger Stahl, Stefan Schneider https://www.tq-group.com/

On Fri, 26 Apr 2024 10:02:24 +0200, Matthias Schiffer wrote:
With netconsole, any log message can result in an eth_init(), possibly causing an reentrant call into eth_init() if a driver's ops print anything:
eth_init() -> driver.start() -> printf() -> netconsole -> eth_init() eth_halt() -> driver.stop() -> printf() -> netconsole -> eth_init()
[...]
Applied to u-boot/next, thanks!
participants (4)
-
Matthias Schiffer
-
Siddharth Vadapalli
-
Sverdlin, Alexander
-
Tom Rini