[U-Boot] [PATCH v3 1/8] ehci: cosmetic: Define used constants

Signed-off-by: Benoît Thébaudeau benoit.thebaudeau@advansee.com Cc: Marek Vasut marex@denx.de Cc: Ilya Yanok ilya.yanok@cogentembedded.com Cc: Stefan Herbrechtsmeier stefan@herbrechtsmeier.net --- Changes for v2: N/A. Changes for v3: - New patch.
.../drivers/usb/host/ehci-hcd.c | 135 +++++++++++--------- .../drivers/usb/host/ehci.h | 75 ++++++++++- 2 files changed, 150 insertions(+), 60 deletions(-)
diff --git u-boot-usb-8d5fb14.orig/drivers/usb/host/ehci-hcd.c u-boot-usb-8d5fb14/drivers/usb/host/ehci-hcd.c index 5b3b906..1977c28 100644 --- u-boot-usb-8d5fb14.orig/drivers/usb/host/ehci-hcd.c +++ u-boot-usb-8d5fb14/drivers/usb/host/ehci-hcd.c @@ -163,7 +163,7 @@ static int ehci_reset(void)
#ifdef CONFIG_USB_EHCI_TXFIFO_THRESH cmd = ehci_readl(&hcor->or_txfilltuning); - cmd &= ~TXFIFO_THRESH(0x3f); + cmd &= ~TXFIFO_THRESH_MASK; cmd |= TXFIFO_THRESH(CONFIG_USB_EHCI_TXFIFO_THRESH); ehci_writel(&hcor->or_txfilltuning, cmd); #endif @@ -186,7 +186,7 @@ static int ehci_td_buffer(struct qTD *td, void *buf, size_t sz) while (idx < QT_BUFFER_CNT) { td->qt_buffer[idx] = cpu_to_hc32(addr); td->qt_buffer_hi[idx] = 0; - next = (addr + 4096) & ~4095; + next = (addr + EHCI_PAGE_SIZE) & ~(EHCI_PAGE_SIZE - 1); delta = next - addr; if (delta >= sz) break; @@ -208,7 +208,8 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, int length, struct devrequest *req) { ALLOC_ALIGN_BUFFER(struct QH, qh, 1, USB_DMA_MINALIGN); - ALLOC_ALIGN_BUFFER(struct qTD, qtd, 3, USB_DMA_MINALIGN); +#define QTD_COUNT 3 + ALLOC_ALIGN_BUFFER(struct qTD, qtd, QTD_COUNT, USB_DMA_MINALIGN); int qtd_counter = 0;
volatile struct qTD *vtd; @@ -230,7 +231,7 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, le16_to_cpu(req->index));
memset(qh, 0, sizeof(struct QH)); - memset(qtd, 0, 3 * sizeof(*qtd)); + memset(qtd, 0, QTD_COUNT * sizeof(*qtd));
toggle = usb_gettoggle(dev, usb_pipeendpoint(pipe), usb_pipeout(pipe));
@@ -246,19 +247,20 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, */ qh->qh_link = cpu_to_hc32((uint32_t)qh_list | QH_LINK_TYPE_QH); c = (usb_pipespeed(pipe) != USB_SPEED_HIGH && - usb_pipeendpoint(pipe) == 0) ? 1 : 0; - endpt = (8 << 28) | - (c << 27) | - (usb_maxpacket(dev, pipe) << 16) | - (0 << 15) | - (1 << 14) | - (usb_pipespeed(pipe) << 12) | - (usb_pipeendpoint(pipe) << 8) | - (0 << 7) | (usb_pipedevice(pipe) << 0); + usb_pipeendpoint(pipe) == 0); + endpt = (8 << QH_ENDPT1_RL) | + (c << QH_ENDPT1_C) | + (usb_maxpacket(dev, pipe) << QH_ENDPT1_MAXPKTLEN) | + (0 << QH_ENDPT1_H) | + (QH_ENDPT1_DTC_DT_FROM_QTD << QH_ENDPT1_DTC) | + (usb_pipespeed(pipe) << QH_ENDPT1_EPS) | + (usb_pipeendpoint(pipe) << QH_ENDPT1_ENDPT) | + (0 << QH_ENDPT1_I) | (usb_pipedevice(pipe) << QH_ENDPT1_DEVADDR); qh->qh_endpt1 = cpu_to_hc32(endpt); - endpt = (1 << 30) | - (dev->portnr << 23) | - (dev->parent->devnum << 16) | (0 << 8) | (0 << 0); + endpt = (1 << QH_ENDPT2_MULT) | + (dev->portnr << QH_ENDPT2_PORTNUM) | + (dev->parent->devnum << QH_ENDPT2_HUBADDR) | + (0 << QH_ENDPT2_UFRAMECMASK) | (0 << QH_ENDPT2_UFRAMESMASK); qh->qh_endpt2 = cpu_to_hc32(endpt); qh->qh_overlay.qt_next = cpu_to_hc32(QT_NEXT_TERMINATE);
@@ -276,9 +278,12 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, */ qtd[qtd_counter].qt_next = cpu_to_hc32(QT_NEXT_TERMINATE); qtd[qtd_counter].qt_altnext = cpu_to_hc32(QT_NEXT_TERMINATE); - token = (0 << 31) | - (sizeof(*req) << 16) | - (0 << 15) | (0 << 12) | (3 << 10) | (2 << 8) | (0x80 << 0); + token = (0 << QT_TOKEN_DT) | + (sizeof(*req) << QT_TOKEN_TOTALBYTES) | + (0 << QT_TOKEN_IOC) | (0 << QT_TOKEN_CPAGE) | + (3 << QT_TOKEN_CERR) | + (QT_TOKEN_PID_SETUP << QT_TOKEN_PID) | + (QT_TOKEN_STATUS_ACTIVE << QT_TOKEN_STATUS); qtd[qtd_counter].qt_token = cpu_to_hc32(token); if (ehci_td_buffer(&qtd[qtd_counter], req, sizeof(*req)) != 0) { printf("unable construct SETUP td\n"); @@ -302,12 +307,14 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, */ qtd[qtd_counter].qt_next = cpu_to_hc32(QT_NEXT_TERMINATE); qtd[qtd_counter].qt_altnext = cpu_to_hc32(QT_NEXT_TERMINATE); - token = (toggle << 31) | - (length << 16) | - ((req == NULL ? 1 : 0) << 15) | - (0 << 12) | - (3 << 10) | - ((usb_pipein(pipe) ? 1 : 0) << 8) | (0x80 << 0); + token = (toggle << QT_TOKEN_DT) | + (length << QT_TOKEN_TOTALBYTES) | + ((req == NULL) << QT_TOKEN_IOC) | + (0 << QT_TOKEN_CPAGE) | + (3 << QT_TOKEN_CERR) | + ((usb_pipein(pipe) ? QT_TOKEN_PID_IN : QT_TOKEN_PID_OUT) << + QT_TOKEN_PID) | + (QT_TOKEN_STATUS_ACTIVE << QT_TOKEN_STATUS); qtd[qtd_counter].qt_token = cpu_to_hc32(token); if (ehci_td_buffer(&qtd[qtd_counter], buffer, length) != 0) { printf("unable construct DATA td\n"); @@ -328,12 +335,14 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, */ qtd[qtd_counter].qt_next = cpu_to_hc32(QT_NEXT_TERMINATE); qtd[qtd_counter].qt_altnext = cpu_to_hc32(QT_NEXT_TERMINATE); - token = (toggle << 31) | - (0 << 16) | - (1 << 15) | - (0 << 12) | - (3 << 10) | - ((usb_pipein(pipe) ? 0 : 1) << 8) | (0x80 << 0); + token = (toggle << QT_TOKEN_DT) | + (0 << QT_TOKEN_TOTALBYTES) | + (1 << QT_TOKEN_IOC) | + (0 << QT_TOKEN_CPAGE) | + (3 << QT_TOKEN_CERR) | + ((usb_pipein(pipe) ? QT_TOKEN_PID_OUT : QT_TOKEN_PID_IN) << + QT_TOKEN_PID) | + (QT_TOKEN_STATUS_ACTIVE << QT_TOKEN_STATUS); qtd[qtd_counter].qt_token = cpu_to_hc32(token); /* Update previous qTD! */ *tdp = cpu_to_hc32((uint32_t)&qtd[qtd_counter]); @@ -346,7 +355,8 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, flush_dcache_range((uint32_t)qh_list, ALIGN_END_ADDR(struct QH, qh_list, 1)); flush_dcache_range((uint32_t)qh, ALIGN_END_ADDR(struct QH, qh, 1)); - flush_dcache_range((uint32_t)qtd, ALIGN_END_ADDR(struct qTD, qtd, 3)); + flush_dcache_range((uint32_t)qtd, + ALIGN_END_ADDR(struct qTD, qtd, QTD_COUNT));
/* Set async. queue head pointer. */ ehci_writel(&hcor->or_asynclistaddr, (uint32_t)qh_list); @@ -359,10 +369,10 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, cmd |= CMD_ASE; ehci_writel(&hcor->or_usbcmd, cmd);
- ret = handshake((uint32_t *)&hcor->or_usbsts, STD_ASS, STD_ASS, + ret = handshake((uint32_t *)&hcor->or_usbsts, STS_ASS, STS_ASS, 100 * 1000); if (ret < 0) { - printf("EHCI fail timeout STD_ASS set\n"); + printf("EHCI fail timeout STS_ASS set\n"); goto fail; }
@@ -377,10 +387,10 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, invalidate_dcache_range((uint32_t)qh, ALIGN_END_ADDR(struct QH, qh, 1)); invalidate_dcache_range((uint32_t)qtd, - ALIGN_END_ADDR(struct qTD, qtd, 3)); + ALIGN_END_ADDR(struct qTD, qtd, QTD_COUNT));
token = hc32_to_cpu(vtd->qt_token); - if (!(token & 0x80)) + if (!(token & (QT_TOKEN_STATUS_ACTIVE << QT_TOKEN_STATUS))) break; WATCHDOG_RESET(); } while (get_timer(ts) < timeout); @@ -398,50 +408,53 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, ALIGN((uint32_t)buffer + length, ARCH_DMA_MINALIGN));
/* Check that the TD processing happened */ - if (token & 0x80) { + if (token & (QT_TOKEN_STATUS_ACTIVE << QT_TOKEN_STATUS)) printf("EHCI timed out on TD - token=%#x\n", token); - }
/* Disable async schedule. */ cmd = ehci_readl(&hcor->or_usbcmd); cmd &= ~CMD_ASE; ehci_writel(&hcor->or_usbcmd, cmd);
- ret = handshake((uint32_t *)&hcor->or_usbsts, STD_ASS, 0, + ret = handshake((uint32_t *)&hcor->or_usbsts, STS_ASS, 0, 100 * 1000); if (ret < 0) { - printf("EHCI fail timeout STD_ASS reset\n"); + printf("EHCI fail timeout STS_ASS reset\n"); goto fail; }
token = hc32_to_cpu(qh->qh_overlay.qt_token); - if (!(token & 0x80)) { + if (!(token & (QT_TOKEN_STATUS_ACTIVE << QT_TOKEN_STATUS))) { debug("TOKEN=%#x\n", token); - switch (token & 0xfc) { + switch ((token >> QT_TOKEN_STATUS) & (QT_TOKEN_STATUS_ACTIVE | + QT_TOKEN_STATUS_HALTED | QT_TOKEN_STATUS_DATBUFERR | + QT_TOKEN_STATUS_BABBLEDET | QT_TOKEN_STATUS_XACTERR | + QT_TOKEN_STATUS_MISSEDUFRAME)) { case 0: - toggle = token >> 31; + toggle = token >> QT_TOKEN_DT; usb_settoggle(dev, usb_pipeendpoint(pipe), usb_pipeout(pipe), toggle); dev->status = 0; break; - case 0x40: + case QT_TOKEN_STATUS_HALTED: dev->status = USB_ST_STALLED; break; - case 0xa0: - case 0x20: + case QT_TOKEN_STATUS_ACTIVE | QT_TOKEN_STATUS_DATBUFERR: + case QT_TOKEN_STATUS_DATBUFERR: dev->status = USB_ST_BUF_ERR; break; - case 0x50: - case 0x10: + case QT_TOKEN_STATUS_HALTED | QT_TOKEN_STATUS_BABBLEDET: + case QT_TOKEN_STATUS_BABBLEDET: dev->status = USB_ST_BABBLE_DET; break; default: dev->status = USB_ST_CRC_ERR; - if ((token & 0x40) == 0x40) + if (token & (QT_TOKEN_STATUS_HALTED << QT_TOKEN_STATUS)) dev->status |= USB_ST_STALLED; break; } - dev->act_len = length - ((token >> 16) & 0x7fff); + dev->act_len = length - ((token & QT_TOKEN_TOTALBYTES_MASK) >> + QT_TOKEN_TOTALBYTES); } else { dev->act_len = 0; debug("dev=%u, usbsts=%#x, p[1]=%#x, p[2]=%#x\n", @@ -499,12 +512,14 @@ ehci_submit_root(struct usb_device *dev, unsigned long pipe, void *buffer, case USB_DT_DEVICE: debug("USB_DT_DEVICE request\n"); srcptr = &descriptor.device; - srclen = 0x12; + srclen = descriptor.device.bLength; break; case USB_DT_CONFIG: debug("USB_DT_CONFIG config\n"); srcptr = &descriptor.config; - srclen = 0x19; + srclen = descriptor.config.bLength + + descriptor.interface.bLength + + descriptor.endpoint.bLength; break; case USB_DT_STRING: debug("USB_DT_STRING config\n"); @@ -539,7 +554,7 @@ ehci_submit_root(struct usb_device *dev, unsigned long pipe, void *buffer, case USB_DT_HUB: debug("USB_DT_HUB config\n"); srcptr = &descriptor.hub; - srclen = 0x8; + srclen = descriptor.hub.bLength; break; default: debug("unknown value %x\n", le16_to_cpu(req->value)); @@ -577,13 +592,13 @@ ehci_submit_root(struct usb_device *dev, unsigned long pipe, void *buffer, tmpbuf[1] |= USB_PORT_STAT_POWER >> 8;
if (ehci_is_TDI()) { - switch ((reg >> 26) & 3) { - case 0: + switch ((reg & PORTSC_PSPD_MASK) >> PORTSC_PSPD_OFFS) { + case PORTSC_PSPD_FS: break; - case 1: + case PORTSC_PSPD_LS: tmpbuf[1] |= USB_PORT_STAT_LOW_SPEED >> 8; break; - case 2: + case PORTSC_PSPD_HS: default: tmpbuf[1] |= USB_PORT_STAT_HIGH_SPEED >> 8; break; @@ -744,11 +759,13 @@ int usb_lowlevel_init(void) /* Set head of reclaim list */ memset(qh_list, 0, sizeof(*qh_list)); qh_list->qh_link = cpu_to_hc32((uint32_t)qh_list | QH_LINK_TYPE_QH); - qh_list->qh_endpt1 = cpu_to_hc32((1 << 15) | (USB_SPEED_HIGH << 12)); + qh_list->qh_endpt1 = cpu_to_hc32((1 << QH_ENDPT1_H) | + (USB_SPEED_HIGH << QH_ENDPT1_EPS)); qh_list->qh_curtd = cpu_to_hc32(QT_NEXT_TERMINATE); qh_list->qh_overlay.qt_next = cpu_to_hc32(QT_NEXT_TERMINATE); qh_list->qh_overlay.qt_altnext = cpu_to_hc32(QT_NEXT_TERMINATE); - qh_list->qh_overlay.qt_token = cpu_to_hc32(0x40); + qh_list->qh_overlay.qt_token = cpu_to_hc32(QT_TOKEN_STATUS_HALTED << + QT_TOKEN_STATUS);
reg = ehci_readl(&hccr->cr_hcsparams); descriptor.hub.bNbrPorts = HCS_N_PORTS(reg); diff --git u-boot-usb-8d5fb14.orig/drivers/usb/host/ehci.h u-boot-usb-8d5fb14/drivers/usb/host/ehci.h index 7992983..c6f1794 100644 --- u-boot-usb-8d5fb14.orig/drivers/usb/host/ehci.h +++ u-boot-usb-8d5fb14/drivers/usb/host/ehci.h @@ -68,7 +68,7 @@ struct ehci_hcor { #define CMD_RESET (1 << 1) /* reset HC not bus */ #define CMD_RUN (1 << 0) /* start/stop HC */ uint32_t or_usbsts; -#define STD_ASS (1 << 15) +#define STS_ASS (1 << 15) #define STS_HALT (1 << 12) uint32_t or_usbintr; #define INTR_UE (1 << 0) /* USB interrupt enable */ @@ -83,11 +83,44 @@ struct ehci_hcor { uint32_t _reserved_0_; uint32_t or_burstsize; uint32_t or_txfilltuning; +#define TXFIFO_THRESH_MASK (0x3f << 16) #define TXFIFO_THRESH(p) ((p & 0x3f) << 16) uint32_t _reserved_1_[6]; uint32_t or_configflag; #define FLAG_CF (1 << 0) /* true: we'll support "high speed" */ uint32_t or_portsc[CONFIG_SYS_USB_EHCI_MAX_ROOT_PORTS]; +#define PORTSC_PTS_OFFS 30 +#define PORTSC_PTS_MASK (0x3 << PORTSC_PTS_OFFS) +#define PORTSC_STS (1 << 29) +#define PORTSC_PTW (1 << 28) +#define PORTSC_PSPD_OFFS 26 +#define PORTSC_PSPD_MASK (0x3 << PORTSC_PSPD_OFFS) +#define PORTSC_PSPD_FS 0x0 +#define PORTSC_PSPD_LS 0x1 +#define PORTSC_PSPD_HS 0x2 +#define PORTSC_PFSC (1 << 24) +#define PORTSC_PHCD (1 << 23) +#define PORTSC_WKOC (1 << 22) +#define PORTSC_WKDS (1 << 21) +#define PORTSC_WKCN (1 << 20) +#define PORTSC_PTC_OFFS 16 +#define PORTSC_PTC_MASK (0xf << PORTSC_PTC_OFFS) +#define PORTSC_PIC_OFFS 14 +#define PORTSC_PIC_MASK (0x3 << PORTSC_PIC_OFFS) +#define PORTSC_PO (1 << 13) +#define PORTSC_PP (1 << 12) +#define PORTSC_LS_OFFS 10 +#define PORTSC_LS_MASK (0x3 << PORTSC_LS_OFFS) +#define PORTSC_HSP (1 << 9) +#define PORTSC_PR (1 << 8) +#define PORTSC_SUSP (1 << 7) +#define PORTSC_FPR (1 << 6) +#define PORTSC_OCC (1 << 5) +#define PORTSC_OCA (1 << 4) +#define PORTSC_PEC (1 << 3) +#define PORTSC_PE (1 << 2) +#define PORTSC_OSC (1 << 1) +#define PORTSC_CCS (1 << 0) uint32_t or_systune; } __attribute__ ((packed, aligned(4)));
@@ -175,6 +208,25 @@ struct qTD { #define QT_NEXT_TERMINATE 1 uint32_t qt_altnext; /* see EHCI 3.5.2 */ uint32_t qt_token; /* see EHCI 3.5.3 */ +#define QT_TOKEN_DT 31 /* Data Toggle */ +#define QT_TOKEN_TOTALBYTES 16 /* Total Bytes to Transfer */ +#define QT_TOKEN_TOTALBYTES_MASK (0x7fff << QT_TOKEN_TOTALBYTES) +#define QT_TOKEN_IOC 15 /* Interrupt On Complete */ +#define QT_TOKEN_CPAGE 12 /* Current Page */ +#define QT_TOKEN_CERR 10 /* Error Counter */ +#define QT_TOKEN_PID 8 /* PID Code */ +#define QT_TOKEN_PID_OUT 0x0 +#define QT_TOKEN_PID_IN 0x1 +#define QT_TOKEN_PID_SETUP 0x2 +#define QT_TOKEN_STATUS 0 /* Status */ +#define QT_TOKEN_STATUS_ACTIVE 0x80 +#define QT_TOKEN_STATUS_HALTED 0x40 +#define QT_TOKEN_STATUS_DATBUFERR 0x20 +#define QT_TOKEN_STATUS_BABBLEDET 0x10 +#define QT_TOKEN_STATUS_XACTERR 0x08 +#define QT_TOKEN_STATUS_MISSEDUFRAME 0x04 +#define QT_TOKEN_STATUS_SPLITXSTATE 0x02 +#define QT_TOKEN_STATUS_PERR 0x01 #define QT_BUFFER_CNT 5 uint32_t qt_buffer[QT_BUFFER_CNT]; /* see EHCI 3.5.4 */ uint32_t qt_buffer_hi[QT_BUFFER_CNT]; /* Appendix B */ @@ -182,6 +234,8 @@ struct qTD { uint32_t unused[3]; };
+#define EHCI_PAGE_SIZE 4096 + /* Queue Head (QH). */ struct QH { uint32_t qh_link; @@ -191,7 +245,26 @@ struct QH { #define QH_LINK_TYPE_SITD 4 #define QH_LINK_TYPE_FSTN 6 uint32_t qh_endpt1; +#define QH_ENDPT1_RL 28 /* NAK Count Reload */ +#define QH_ENDPT1_C 27 /* Control Endpoint Flag */ +#define QH_ENDPT1_MAXPKTLEN 16 /* Maximum Packet Length */ +#define QH_ENDPT1_H 15 /* Head of Reclamation List Flag */ +#define QH_ENDPT1_DTC 14 /* Data Toggle Control */ +#define QH_ENDPT1_DTC_IGNORE_QTD_TD 0x0 +#define QH_ENDPT1_DTC_DT_FROM_QTD 0x1 +#define QH_ENDPT1_EPS 12 /* Endpoint Speed */ +#define QH_ENDPT1_EPS_FS 0x0 +#define QH_ENDPT1_EPS_LS 0x1 +#define QH_ENDPT1_EPS_HS 0x2 +#define QH_ENDPT1_ENDPT 8 /* Endpoint Number */ +#define QH_ENDPT1_I 7 /* Inactivate on Next Transaction */ +#define QH_ENDPT1_DEVADDR 0 /* Device Address */ uint32_t qh_endpt2; +#define QH_ENDPT2_MULT 30 /* High-Bandwidth Pipe Multiplier */ +#define QH_ENDPT2_PORTNUM 23 /* Port Number */ +#define QH_ENDPT2_HUBADDR 16 /* Hub Address */ +#define QH_ENDPT2_UFRAMECMASK 8 /* Split Completion Mask */ +#define QH_ENDPT2_UFRAMESMASK 0 /* Interrupt Schedule Mask */ uint32_t qh_curtd; struct qTD qh_overlay; /*

Dear Benoît Thébaudeau,
Signed-off-by: Benoît Thébaudeau benoit.thebaudeau@advansee.com Cc: Marek Vasut marex@denx.de Cc: Ilya Yanok ilya.yanok@cogentembedded.com Cc: Stefan Herbrechtsmeier stefan@herbrechtsmeier.net
Changes for v2: N/A. Changes for v3:
- New patch.
.../drivers/usb/host/ehci-hcd.c | 135 +++++++++++--------- .../drivers/usb/host/ehci.h | 75 ++++++++++- 2 files changed, 150 insertions(+), 60 deletions(-)
diff --git u-boot-usb-8d5fb14.orig/drivers/usb/host/ehci-hcd.c u-boot-usb-8d5fb14/drivers/usb/host/ehci-hcd.c index 5b3b906..1977c28 100644 --- u-boot-usb-8d5fb14.orig/drivers/usb/host/ehci-hcd.c +++ u-boot-usb-8d5fb14/drivers/usb/host/ehci-hcd.c @@ -163,7 +163,7 @@ static int ehci_reset(void)
#ifdef CONFIG_USB_EHCI_TXFIFO_THRESH cmd = ehci_readl(&hcor->or_txfilltuning);
- cmd &= ~TXFIFO_THRESH(0x3f);
- cmd &= ~TXFIFO_THRESH_MASK; cmd |= TXFIFO_THRESH(CONFIG_USB_EHCI_TXFIFO_THRESH); ehci_writel(&hcor->or_txfilltuning, cmd);
#endif @@ -186,7 +186,7 @@ static int ehci_td_buffer(struct qTD *td, void *buf, size_t sz) while (idx < QT_BUFFER_CNT) { td->qt_buffer[idx] = cpu_to_hc32(addr); td->qt_buffer_hi[idx] = 0;
next = (addr + 4096) & ~4095;
delta = next - addr; if (delta >= sz) break;next = (addr + EHCI_PAGE_SIZE) & ~(EHCI_PAGE_SIZE - 1);
@@ -208,7 +208,8 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, int length, struct devrequest *req) { ALLOC_ALIGN_BUFFER(struct QH, qh, 1, USB_DMA_MINALIGN);
- ALLOC_ALIGN_BUFFER(struct qTD, qtd, 3, USB_DMA_MINALIGN);
+#define QTD_COUNT 3
ALLOC_ALIGN_BUFFER(struct qTD, qtd, QTD_COUNT, USB_DMA_MINALIGN); int qtd_counter = 0;
volatile struct qTD *vtd;
@@ -230,7 +231,7 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, le16_to_cpu(req->index));
memset(qh, 0, sizeof(struct QH));
- memset(qtd, 0, 3 * sizeof(*qtd));
memset(qtd, 0, QTD_COUNT * sizeof(*qtd));
toggle = usb_gettoggle(dev, usb_pipeendpoint(pipe), usb_pipeout(pipe));
@@ -246,19 +247,20 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, */ qh->qh_link = cpu_to_hc32((uint32_t)qh_list | QH_LINK_TYPE_QH); c = (usb_pipespeed(pipe) != USB_SPEED_HIGH &&
usb_pipeendpoint(pipe) == 0) ? 1 : 0;
- endpt = (8 << 28) |
(c << 27) |
(usb_maxpacket(dev, pipe) << 16) |
(0 << 15) |
(1 << 14) |
(usb_pipespeed(pipe) << 12) |
(usb_pipeendpoint(pipe) << 8) |
(0 << 7) | (usb_pipedevice(pipe) << 0);
usb_pipeendpoint(pipe) == 0);
- endpt = (8 << QH_ENDPT1_RL) |
(c << QH_ENDPT1_C) |
Maybe define it as #deifne QH_ENDPT1(x) ((x) << SEOMTHING) ? [...]
@@ -398,50 +408,53 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, ALIGN((uint32_t)buffer + length, ARCH_DMA_MINALIGN));
/* Check that the TD processing happened */
- if (token & 0x80) {
- if (token & (QT_TOKEN_STATUS_ACTIVE << QT_TOKEN_STATUS)) printf("EHCI timed out on TD - token=%#x\n", token);
}
/* Disable async schedule. */ cmd = ehci_readl(&hcor->or_usbcmd); cmd &= ~CMD_ASE; ehci_writel(&hcor->or_usbcmd, cmd);
ret = handshake((uint32_t *)&hcor->or_usbsts, STD_ASS, 0,
- ret = handshake((uint32_t *)&hcor->or_usbsts, STS_ASS, 0,
Ooooh, nice catch :)
[...] The rest is cool.
btw when (I hope you will) resubmitting next time, just submit the whole series under 0/8 patch (or 1/8) of the old one to make it a nice thread.

Dear Marek Vasut,
Dear Benoît Thébaudeau,
Signed-off-by: Benoît Thébaudeau benoit.thebaudeau@advansee.com Cc: Marek Vasut marex@denx.de Cc: Ilya Yanok ilya.yanok@cogentembedded.com Cc: Stefan Herbrechtsmeier stefan@herbrechtsmeier.net
Changes for v2: N/A. Changes for v3:
- New patch.
.../drivers/usb/host/ehci-hcd.c | 135 +++++++++++--------- .../drivers/usb/host/ehci.h | 75 ++++++++++- 2 files changed, 150 insertions(+), 60 deletions(-)
diff --git u-boot-usb-8d5fb14.orig/drivers/usb/host/ehci-hcd.c u-boot-usb-8d5fb14/drivers/usb/host/ehci-hcd.c index 5b3b906..1977c28 100644 --- u-boot-usb-8d5fb14.orig/drivers/usb/host/ehci-hcd.c +++ u-boot-usb-8d5fb14/drivers/usb/host/ehci-hcd.c @@ -163,7 +163,7 @@ static int ehci_reset(void)
#ifdef CONFIG_USB_EHCI_TXFIFO_THRESH cmd = ehci_readl(&hcor->or_txfilltuning);
- cmd &= ~TXFIFO_THRESH(0x3f);
- cmd &= ~TXFIFO_THRESH_MASK; cmd |= TXFIFO_THRESH(CONFIG_USB_EHCI_TXFIFO_THRESH); ehci_writel(&hcor->or_txfilltuning, cmd);
#endif @@ -186,7 +186,7 @@ static int ehci_td_buffer(struct qTD *td, void *buf, size_t sz) while (idx < QT_BUFFER_CNT) { td->qt_buffer[idx] = cpu_to_hc32(addr); td->qt_buffer_hi[idx] = 0;
next = (addr + 4096) & ~4095;
delta = next - addr; if (delta >= sz) break;next = (addr + EHCI_PAGE_SIZE) & ~(EHCI_PAGE_SIZE - 1);
@@ -208,7 +208,8 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, int length, struct devrequest *req) { ALLOC_ALIGN_BUFFER(struct QH, qh, 1, USB_DMA_MINALIGN);
- ALLOC_ALIGN_BUFFER(struct qTD, qtd, 3, USB_DMA_MINALIGN);
+#define QTD_COUNT 3
ALLOC_ALIGN_BUFFER(struct qTD, qtd, QTD_COUNT, USB_DMA_MINALIGN); int qtd_counter = 0;
volatile struct qTD *vtd;
@@ -230,7 +231,7 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, le16_to_cpu(req->index));
memset(qh, 0, sizeof(struct QH));
- memset(qtd, 0, 3 * sizeof(*qtd));
memset(qtd, 0, QTD_COUNT * sizeof(*qtd));
toggle = usb_gettoggle(dev, usb_pipeendpoint(pipe), usb_pipeout(pipe));
@@ -246,19 +247,20 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, */ qh->qh_link = cpu_to_hc32((uint32_t)qh_list | QH_LINK_TYPE_QH); c = (usb_pipespeed(pipe) != USB_SPEED_HIGH &&
usb_pipeendpoint(pipe) == 0) ? 1 : 0;
- endpt = (8 << 28) |
(c << 27) |
(usb_maxpacket(dev, pipe) << 16) |
(0 << 15) |
(1 << 14) |
(usb_pipespeed(pipe) << 12) |
(usb_pipeendpoint(pipe) << 8) |
(0 << 7) | (usb_pipedevice(pipe) << 0);
usb_pipeendpoint(pipe) == 0);
- endpt = (8 << QH_ENDPT1_RL) |
(c << QH_ENDPT1_C) |
Maybe define it as #deifne QH_ENDPT1(x) ((x) << SEOMTHING) ? [...]
For all of these?
@@ -398,50 +408,53 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, ALIGN((uint32_t)buffer + length, ARCH_DMA_MINALIGN));
/* Check that the TD processing happened */
- if (token & 0x80) {
- if (token & (QT_TOKEN_STATUS_ACTIVE << QT_TOKEN_STATUS)) printf("EHCI timed out on TD - token=%#x\n", token);
}
/* Disable async schedule. */ cmd = ehci_readl(&hcor->or_usbcmd); cmd &= ~CMD_ASE; ehci_writel(&hcor->or_usbcmd, cmd);
ret = handshake((uint32_t *)&hcor->or_usbsts, STD_ASS, 0,
- ret = handshake((uint32_t *)&hcor->or_usbsts, STS_ASS, 0,
Ooooh, nice catch :)
[...] The rest is cool.
btw when (I hope you will) resubmitting next time, just submit the whole series under 0/8 patch (or 1/8) of the old one to make it a nice thread.
OK, so with 2/8 removed since you have applied it. What is the rule here? I thought that a new version of a patch should be posted as a reply to the previous version so that patchwork could mark the old version as superseded (even if this feature is not yet available for U-Boot). Does it apply only to single patches while series should be replied to the previous 0/n?
And should 3/n be a reply to 2/n, to 0/n (or to 1/n if no 0/n), or to nothing?
I will wait for your review of 8/8.
Best regards, Benoît

Dear Benoît Thébaudeau,
[...]
@@ -246,19 +247,20 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, */
qh->qh_link = cpu_to_hc32((uint32_t)qh_list | QH_LINK_TYPE_QH); c = (usb_pipespeed(pipe) != USB_SPEED_HIGH &&
usb_pipeendpoint(pipe) == 0) ? 1 : 0;
- endpt = (8 << 28) |
(c << 27) |
(usb_maxpacket(dev, pipe) << 16) |
(0 << 15) |
(1 << 14) |
(usb_pipespeed(pipe) << 12) |
(usb_pipeendpoint(pipe) << 8) |
(0 << 7) | (usb_pipedevice(pipe) << 0);
usb_pipeendpoint(pipe) == 0);
- endpt = (8 << QH_ENDPT1_RL) |
(c << QH_ENDPT1_C) |
Maybe define it as #deifne QH_ENDPT1(x) ((x) << SEOMTHING) ? [...]
For all of these?
Yes, do you not think it's better than define the offsets only?
@@ -398,50 +408,53 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, ALIGN((uint32_t)buffer + length, ARCH_DMA_MINALIGN));
/* Check that the TD processing happened */
- if (token & 0x80) {
if (token & (QT_TOKEN_STATUS_ACTIVE << QT_TOKEN_STATUS))
printf("EHCI timed out on TD - token=%#x\n", token);
}
/* Disable async schedule. */ cmd = ehci_readl(&hcor->or_usbcmd); cmd &= ~CMD_ASE; ehci_writel(&hcor->or_usbcmd, cmd);
ret = handshake((uint32_t *)&hcor->or_usbsts, STD_ASS, 0,
- ret = handshake((uint32_t *)&hcor->or_usbsts, STS_ASS, 0,
Ooooh, nice catch :)
[...] The rest is cool.
btw when (I hope you will) resubmitting next time, just submit the whole series under 0/8 patch (or 1/8) of the old one to make it a nice thread.
OK, so with 2/8 removed since you have applied it.
Yes
What is the rule here? I thought that a new version of a patch should be posted as a reply to the previous version so that patchwork could mark the old version as superseded
Yes, unless the series changed so much (aka. too much reordering etc), that it's easier to repost it in reply to the first patch in the series.
(even if this feature is not yet available for U-Boot). Does it apply only to single patches while series should be replied to the previous 0/n?
I didn't figure out these gems in patchwork myself. I'm not a big fan of PW either.
And should 3/n be a reply to 2/n, to 0/n (or to 1/n if no 0/n), or to nothing?
Just use the standard threading git send-email does.
I will wait for your review of 8/8.
I think it's fine to just resend it so I'll either pick first 7 or all 8. First 7 are certain once we agree on the above stuff :)
Best regards, Benoît
Best regards, Marek Vasut

Dear Marek Vasut,
@@ -246,19 +247,20 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, */
qh->qh_link = cpu_to_hc32((uint32_t)qh_list | QH_LINK_TYPE_QH); c = (usb_pipespeed(pipe) != USB_SPEED_HIGH &&
usb_pipeendpoint(pipe) == 0) ? 1 : 0;
- endpt = (8 << 28) |
(c << 27) |
(usb_maxpacket(dev, pipe) << 16) |
(0 << 15) |
(1 << 14) |
(usb_pipespeed(pipe) << 12) |
(usb_pipeendpoint(pipe) << 8) |
(0 << 7) | (usb_pipedevice(pipe) << 0);
usb_pipeendpoint(pipe) == 0);
- endpt = (8 << QH_ENDPT1_RL) |
(c << QH_ENDPT1_C) |
Maybe define it as #deifne QH_ENDPT1(x) ((x) << SEOMTHING) ? [...]
For all of these?
Yes, do you not think it's better than define the offsets only?
Yes, I hesitated with this solution when writing this code.
Where the offset is needed, I can instead write additional extraction macros.
@@ -398,50 +408,53 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, ALIGN((uint32_t)buffer + length, ARCH_DMA_MINALIGN));
/* Check that the TD processing happened */
- if (token & 0x80) {
if (token & (QT_TOKEN_STATUS_ACTIVE << QT_TOKEN_STATUS))
printf("EHCI timed out on TD - token=%#x\n", token);
}
/* Disable async schedule. */ cmd = ehci_readl(&hcor->or_usbcmd); cmd &= ~CMD_ASE; ehci_writel(&hcor->or_usbcmd, cmd);
ret = handshake((uint32_t *)&hcor->or_usbsts, STD_ASS, 0,
- ret = handshake((uint32_t *)&hcor->or_usbsts, STS_ASS, 0,
Ooooh, nice catch :)
[...] The rest is cool.
btw when (I hope you will) resubmitting next time, just submit the whole series under 0/8 patch (or 1/8) of the old one to make it a nice thread.
OK, so with 2/8 removed since you have applied it.
Yes
What is the rule here? I thought that a new version of a patch should be posted as a reply to the previous version so that patchwork could mark the old version as superseded
Yes, unless the series changed so much (aka. too much reordering etc), that it's easier to repost it in reply to the first patch in the series.
(even if this feature is not yet available for U-Boot). Does it apply only to single patches while series should be replied to the previous 0/n?
I didn't figure out these gems in patchwork myself. I'm not a big fan of PW either.
And should 3/n be a reply to 2/n, to 0/n (or to 1/n if no 0/n), or to nothing?
Just use the standard threading git send-email does.
I will wait for your review of 8/8.
I think it's fine to just resend it so I'll either pick first 7 or all 8. First 7 are certain once we agree on the above stuff :)
OK.
Best regards, Benoît

Dear Benoît Thébaudeau,
Dear Marek Vasut,
@@ -246,19 +247,20 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, */
qh->qh_link = cpu_to_hc32((uint32_t)qh_list | QH_LINK_TYPE_QH); c = (usb_pipespeed(pipe) != USB_SPEED_HIGH &&
usb_pipeendpoint(pipe) == 0) ? 1 : 0;
- endpt = (8 << 28) |
(c << 27) |
(usb_maxpacket(dev, pipe) << 16) |
(0 << 15) |
(1 << 14) |
(usb_pipespeed(pipe) << 12) |
(usb_pipeendpoint(pipe) << 8) |
(0 << 7) | (usb_pipedevice(pipe) << 0);
usb_pipeendpoint(pipe) == 0);
- endpt = (8 << QH_ENDPT1_RL) |
(c << QH_ENDPT1_C) |
Maybe define it as #deifne QH_ENDPT1(x) ((x) << SEOMTHING) ? [...]
For all of these?
Yes, do you not think it's better than define the offsets only?
Yes, I hesitated with this solution when writing this code.
Where the offset is needed, I can instead write additional extraction macros.
Oh, there're places where the offset is needed? What about adding both BIT(x) and BIT_OFFSET in such cases ? And BIT(x) (x << BIT_OFFSET) in that case.
[...]
Best regards, Marek Vasut

Dear Marek Vasut,
@@ -246,19 +247,20 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, */
qh->qh_link = cpu_to_hc32((uint32_t)qh_list | QH_LINK_TYPE_QH); c = (usb_pipespeed(pipe) != USB_SPEED_HIGH &&
usb_pipeendpoint(pipe) == 0) ? 1 : 0;
- endpt = (8 << 28) |
(c << 27) |
(usb_maxpacket(dev, pipe) << 16) |
(0 << 15) |
(1 << 14) |
(usb_pipespeed(pipe) << 12) |
(usb_pipeendpoint(pipe) << 8) |
(0 << 7) | (usb_pipedevice(pipe) << 0);
usb_pipeendpoint(pipe) == 0);
- endpt = (8 << QH_ENDPT1_RL) |
(c << QH_ENDPT1_C) |
Maybe define it as #deifne QH_ENDPT1(x) ((x) << SEOMTHING) ? [...]
For all of these?
Yes, do you not think it's better than define the offsets only?
Yes, I hesitated with this solution when writing this code.
Where the offset is needed, I can instead write additional extraction macros.
Oh, there're places where the offset is needed?
Yes, I think.
What about adding both BIT(x) and BIT_OFFSET in such cases ? And BIT(x) (x << BIT_OFFSET) in that case.
I would prefer: #define FIELD(fldval) (((fldval) & MASK) << OFFSET) #define GET_FIELD(regval) (((regval) >> OFFSET) & MASK)
Are you OK with that?
Best regards, Benoît

Dear Benoît Thébaudeau,
Dear Marek Vasut,
> @@ -246,19 +247,20 @@ ehci_submit_async(struct usb_device > *dev, > unsigned > long pipe, void *buffer, */ > > qh->qh_link = cpu_to_hc32((uint32_t)qh_list | > QH_LINK_TYPE_QH); > c = (usb_pipespeed(pipe) != USB_SPEED_HIGH && > > - usb_pipeendpoint(pipe) == 0) ? 1 : 0; > - endpt = (8 << 28) | > - (c << 27) | > - (usb_maxpacket(dev, pipe) << 16) | > - (0 << 15) | > - (1 << 14) | > - (usb_pipespeed(pipe) << 12) | > - (usb_pipeendpoint(pipe) << 8) | > - (0 << 7) | (usb_pipedevice(pipe) << 0); > + usb_pipeendpoint(pipe) == 0); > + endpt = (8 << QH_ENDPT1_RL) | > + (c << QH_ENDPT1_C) |
Maybe define it as #deifne QH_ENDPT1(x) ((x) << SEOMTHING) ? [...]
For all of these?
Yes, do you not think it's better than define the offsets only?
Yes, I hesitated with this solution when writing this code.
Where the offset is needed, I can instead write additional extraction macros.
Oh, there're places where the offset is needed?
Yes, I think.
What about adding both BIT(x) and BIT_OFFSET in such cases ? And BIT(x) (x << BIT_OFFSET) in that case.
I would prefer: #define FIELD(fldval) (((fldval) & MASK) << OFFSET) #define GET_FIELD(regval) (((regval) >> OFFSET) & MASK)
Yes of course.
Are you OK with that?
Best regards, Benoît
Best regards, Marek Vasut

Hi all,
This series aims at improving EHCI performance. There is also some code cleanup BTW.
Best regards, Benoît

Make some light cosmetic code cleanup by the way.
Signed-off-by: Benoît Thébaudeau benoit.thebaudeau@advansee.com Cc: Marek Vasut marex@denx.de Cc: Ilya Yanok ilya.yanok@cogentembedded.com Cc: Stefan Herbrechtsmeier stefan@herbrechtsmeier.net --- Changes for v2: N/A. Changes for v3: - New patch. Changes for v4: - Use accessor macros rather than offsets and masks. - More code cleanup.
.../drivers/usb/host/ehci-hcd.c | 138 ++++++++++---------- .../drivers/usb/host/ehci.h | 49 ++++++- 2 files changed, 118 insertions(+), 69 deletions(-)
diff --git u-boot-usb-4f8254e.orig/drivers/usb/host/ehci-hcd.c u-boot-usb-4f8254e/drivers/usb/host/ehci-hcd.c index 2a00389..00a155b 100644 --- u-boot-usb-4f8254e.orig/drivers/usb/host/ehci-hcd.c +++ u-boot-usb-4f8254e/drivers/usb/host/ehci-hcd.c @@ -163,7 +163,7 @@ static int ehci_reset(void)
#ifdef CONFIG_USB_EHCI_TXFIFO_THRESH cmd = ehci_readl(&hcor->or_txfilltuning); - cmd &= ~TXFIFO_THRESH(0x3f); + cmd &= ~TXFIFO_THRESH_MASK; cmd |= TXFIFO_THRESH(CONFIG_USB_EHCI_TXFIFO_THRESH); ehci_writel(&hcor->or_txfilltuning, cmd); #endif @@ -186,7 +186,7 @@ static int ehci_td_buffer(struct qTD *td, void *buf, size_t sz) while (idx < QT_BUFFER_CNT) { td->qt_buffer[idx] = cpu_to_hc32(addr); td->qt_buffer_hi[idx] = 0; - next = (addr + 4096) & ~4095; + next = (addr + EHCI_PAGE_SIZE) & ~(EHCI_PAGE_SIZE - 1); delta = next - addr; if (delta >= sz) break; @@ -208,7 +208,8 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, int length, struct devrequest *req) { ALLOC_ALIGN_BUFFER(struct QH, qh, 1, USB_DMA_MINALIGN); - ALLOC_ALIGN_BUFFER(struct qTD, qtd, 3, USB_DMA_MINALIGN); +#define QTD_COUNT 3 + ALLOC_ALIGN_BUFFER(struct qTD, qtd, QTD_COUNT, USB_DMA_MINALIGN); int qtd_counter = 0;
volatile struct qTD *vtd; @@ -230,7 +231,7 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, le16_to_cpu(req->index));
memset(qh, 0, sizeof(struct QH)); - memset(qtd, 0, 3 * sizeof(*qtd)); + memset(qtd, 0, QTD_COUNT * sizeof(*qtd));
toggle = usb_gettoggle(dev, usb_pipeendpoint(pipe), usb_pipeout(pipe));
@@ -245,20 +246,17 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, * - qh_overlay.qt_altnext */ qh->qh_link = cpu_to_hc32((uint32_t)qh_list | QH_LINK_TYPE_QH); - c = (usb_pipespeed(pipe) != USB_SPEED_HIGH && - usb_pipeendpoint(pipe) == 0) ? 1 : 0; - endpt = (8 << 28) | - (c << 27) | - (usb_maxpacket(dev, pipe) << 16) | - (0 << 15) | - (1 << 14) | - (usb_pipespeed(pipe) << 12) | - (usb_pipeendpoint(pipe) << 8) | - (0 << 7) | (usb_pipedevice(pipe) << 0); + c = usb_pipespeed(pipe) != USB_SPEED_HIGH && !usb_pipeendpoint(pipe); + endpt = QH_ENDPT1_RL(8) | QH_ENDPT1_C(c) | + QH_ENDPT1_MAXPKTLEN(usb_maxpacket(dev, pipe)) | QH_ENDPT1_H(0) | + QH_ENDPT1_DTC(QH_ENDPT1_DTC_DT_FROM_QTD) | + QH_ENDPT1_EPS(usb_pipespeed(pipe)) | + QH_ENDPT1_ENDPT(usb_pipeendpoint(pipe)) | QH_ENDPT1_I(0) | + QH_ENDPT1_DEVADDR(usb_pipedevice(pipe)); qh->qh_endpt1 = cpu_to_hc32(endpt); - endpt = (1 << 30) | - (dev->portnr << 23) | - (dev->parent->devnum << 16) | (0 << 8) | (0 << 0); + endpt = QH_ENDPT2_MULT(1) | QH_ENDPT2_PORTNUM(dev->portnr) | + QH_ENDPT2_HUBADDR(dev->parent->devnum) | + QH_ENDPT2_UFCMASK(0) | QH_ENDPT2_UFSMASK(0); qh->qh_endpt2 = cpu_to_hc32(endpt); qh->qh_overlay.qt_next = cpu_to_hc32(QT_NEXT_TERMINATE);
@@ -276,12 +274,13 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, */ qtd[qtd_counter].qt_next = cpu_to_hc32(QT_NEXT_TERMINATE); qtd[qtd_counter].qt_altnext = cpu_to_hc32(QT_NEXT_TERMINATE); - token = (0 << 31) | - (sizeof(*req) << 16) | - (0 << 15) | (0 << 12) | (3 << 10) | (2 << 8) | (0x80 << 0); + token = QT_TOKEN_DT(0) | QT_TOKEN_TOTALBYTES(sizeof(*req)) | + QT_TOKEN_IOC(0) | QT_TOKEN_CPAGE(0) | QT_TOKEN_CERR(3) | + QT_TOKEN_PID(QT_TOKEN_PID_SETUP) | + QT_TOKEN_STATUS(QT_TOKEN_STATUS_ACTIVE); qtd[qtd_counter].qt_token = cpu_to_hc32(token); - if (ehci_td_buffer(&qtd[qtd_counter], req, sizeof(*req)) != 0) { - printf("unable construct SETUP td\n"); + if (ehci_td_buffer(&qtd[qtd_counter], req, sizeof(*req))) { + printf("unable to construct SETUP TD\n"); goto fail; } /* Update previous qTD! */ @@ -302,15 +301,14 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, */ qtd[qtd_counter].qt_next = cpu_to_hc32(QT_NEXT_TERMINATE); qtd[qtd_counter].qt_altnext = cpu_to_hc32(QT_NEXT_TERMINATE); - token = (toggle << 31) | - (length << 16) | - ((req == NULL ? 1 : 0) << 15) | - (0 << 12) | - (3 << 10) | - ((usb_pipein(pipe) ? 1 : 0) << 8) | (0x80 << 0); + token = QT_TOKEN_DT(toggle) | QT_TOKEN_TOTALBYTES(length) | + QT_TOKEN_IOC(req == NULL) | QT_TOKEN_CPAGE(0) | + QT_TOKEN_CERR(3) | QT_TOKEN_PID(usb_pipein(pipe) ? + QT_TOKEN_PID_IN : QT_TOKEN_PID_OUT) | + QT_TOKEN_STATUS(QT_TOKEN_STATUS_ACTIVE); qtd[qtd_counter].qt_token = cpu_to_hc32(token); - if (ehci_td_buffer(&qtd[qtd_counter], buffer, length) != 0) { - printf("unable construct DATA td\n"); + if (ehci_td_buffer(&qtd[qtd_counter], buffer, length)) { + printf("unable to construct DATA TD\n"); goto fail; } /* Update previous qTD! */ @@ -328,12 +326,11 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, */ qtd[qtd_counter].qt_next = cpu_to_hc32(QT_NEXT_TERMINATE); qtd[qtd_counter].qt_altnext = cpu_to_hc32(QT_NEXT_TERMINATE); - token = (toggle << 31) | - (0 << 16) | - (1 << 15) | - (0 << 12) | - (3 << 10) | - ((usb_pipein(pipe) ? 0 : 1) << 8) | (0x80 << 0); + token = QT_TOKEN_DT(toggle) | QT_TOKEN_TOTALBYTES(0) | + QT_TOKEN_IOC(1) | QT_TOKEN_CPAGE(0) | QT_TOKEN_CERR(3) | + QT_TOKEN_PID(usb_pipein(pipe) ? + QT_TOKEN_PID_OUT : QT_TOKEN_PID_IN) | + QT_TOKEN_STATUS(QT_TOKEN_STATUS_ACTIVE); qtd[qtd_counter].qt_token = cpu_to_hc32(token); /* Update previous qTD! */ *tdp = cpu_to_hc32((uint32_t)&qtd[qtd_counter]); @@ -346,7 +343,8 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, flush_dcache_range((uint32_t)qh_list, ALIGN_END_ADDR(struct QH, qh_list, 1)); flush_dcache_range((uint32_t)qh, ALIGN_END_ADDR(struct QH, qh, 1)); - flush_dcache_range((uint32_t)qtd, ALIGN_END_ADDR(struct qTD, qtd, 3)); + flush_dcache_range((uint32_t)qtd, + ALIGN_END_ADDR(struct qTD, qtd, QTD_COUNT));
/* Set async. queue head pointer. */ ehci_writel(&hcor->or_asynclistaddr, (uint32_t)qh_list); @@ -359,10 +357,10 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, cmd |= CMD_ASE; ehci_writel(&hcor->or_usbcmd, cmd);
- ret = handshake((uint32_t *)&hcor->or_usbsts, STD_ASS, STD_ASS, + ret = handshake((uint32_t *)&hcor->or_usbsts, STS_ASS, STS_ASS, 100 * 1000); if (ret < 0) { - printf("EHCI fail timeout STD_ASS set\n"); + printf("EHCI fail timeout STS_ASS set\n"); goto fail; }
@@ -377,10 +375,10 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, invalidate_dcache_range((uint32_t)qh, ALIGN_END_ADDR(struct QH, qh, 1)); invalidate_dcache_range((uint32_t)qtd, - ALIGN_END_ADDR(struct qTD, qtd, 3)); + ALIGN_END_ADDR(struct qTD, qtd, QTD_COUNT));
token = hc32_to_cpu(vtd->qt_token); - if (!(token & 0x80)) + if (!(QT_TOKEN_GET_STATUS(token) & QT_TOKEN_STATUS_ACTIVE)) break; WATCHDOG_RESET(); } while (get_timer(ts) < timeout); @@ -398,50 +396,50 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, ALIGN((uint32_t)buffer + length, ARCH_DMA_MINALIGN));
/* Check that the TD processing happened */ - if (token & 0x80) { + if (QT_TOKEN_GET_STATUS(token) & QT_TOKEN_STATUS_ACTIVE) printf("EHCI timed out on TD - token=%#x\n", token); - }
/* Disable async schedule. */ cmd = ehci_readl(&hcor->or_usbcmd); cmd &= ~CMD_ASE; ehci_writel(&hcor->or_usbcmd, cmd);
- ret = handshake((uint32_t *)&hcor->or_usbsts, STD_ASS, 0, + ret = handshake((uint32_t *)&hcor->or_usbsts, STS_ASS, 0, 100 * 1000); if (ret < 0) { - printf("EHCI fail timeout STD_ASS reset\n"); + printf("EHCI fail timeout STS_ASS reset\n"); goto fail; }
token = hc32_to_cpu(qh->qh_overlay.qt_token); - if (!(token & 0x80)) { + if (!(QT_TOKEN_GET_STATUS(token) & QT_TOKEN_STATUS_ACTIVE)) { debug("TOKEN=%#x\n", token); - switch (token & 0xfc) { + switch (QT_TOKEN_GET_STATUS(token) & + ~(QT_TOKEN_STATUS_SPLITXSTATE | QT_TOKEN_STATUS_PERR)) { case 0: - toggle = token >> 31; + toggle = QT_TOKEN_GET_DT(token); usb_settoggle(dev, usb_pipeendpoint(pipe), usb_pipeout(pipe), toggle); dev->status = 0; break; - case 0x40: + case QT_TOKEN_STATUS_HALTED: dev->status = USB_ST_STALLED; break; - case 0xa0: - case 0x20: + case QT_TOKEN_STATUS_ACTIVE | QT_TOKEN_STATUS_DATBUFERR: + case QT_TOKEN_STATUS_DATBUFERR: dev->status = USB_ST_BUF_ERR; break; - case 0x50: - case 0x10: + case QT_TOKEN_STATUS_HALTED | QT_TOKEN_STATUS_BABBLEDET: + case QT_TOKEN_STATUS_BABBLEDET: dev->status = USB_ST_BABBLE_DET; break; default: dev->status = USB_ST_CRC_ERR; - if ((token & 0x40) == 0x40) + if (QT_TOKEN_GET_STATUS(token) & QT_TOKEN_STATUS_HALTED) dev->status |= USB_ST_STALLED; break; } - dev->act_len = length - ((token >> 16) & 0x7fff); + dev->act_len = length - QT_TOKEN_GET_TOTALBYTES(token); } else { dev->act_len = 0; debug("dev=%u, usbsts=%#x, p[1]=%#x, p[2]=%#x\n", @@ -499,12 +497,14 @@ ehci_submit_root(struct usb_device *dev, unsigned long pipe, void *buffer, case USB_DT_DEVICE: debug("USB_DT_DEVICE request\n"); srcptr = &descriptor.device; - srclen = 0x12; + srclen = descriptor.device.bLength; break; case USB_DT_CONFIG: debug("USB_DT_CONFIG config\n"); srcptr = &descriptor.config; - srclen = 0x19; + srclen = descriptor.config.bLength + + descriptor.interface.bLength + + descriptor.endpoint.bLength; break; case USB_DT_STRING: debug("USB_DT_STRING config\n"); @@ -539,7 +539,7 @@ ehci_submit_root(struct usb_device *dev, unsigned long pipe, void *buffer, case USB_DT_HUB: debug("USB_DT_HUB config\n"); srcptr = &descriptor.hub; - srclen = 0x8; + srclen = descriptor.hub.bLength; break; default: debug("unknown value %x\n", le16_to_cpu(req->value)); @@ -577,13 +577,13 @@ ehci_submit_root(struct usb_device *dev, unsigned long pipe, void *buffer, tmpbuf[1] |= USB_PORT_STAT_POWER >> 8;
if (ehci_is_TDI()) { - switch ((reg >> 26) & 3) { - case 0: + switch (PORTSC_PSPD(reg)) { + case PORTSC_PSPD_FS: break; - case 1: + case PORTSC_PSPD_LS: tmpbuf[1] |= USB_PORT_STAT_LOW_SPEED >> 8; break; - case 2: + case PORTSC_PSPD_HS: default: tmpbuf[1] |= USB_PORT_STAT_HIGH_SPEED >> 8; break; @@ -729,26 +729,28 @@ int usb_lowlevel_init(void) uint32_t reg; uint32_t cmd;
- if (ehci_hcd_init() != 0) + if (ehci_hcd_init()) return -1;
/* EHCI spec section 4.1 */ - if (ehci_reset() != 0) + if (ehci_reset()) return -1;
#if defined(CONFIG_EHCI_HCD_INIT_AFTER_RESET) - if (ehci_hcd_init() != 0) + if (ehci_hcd_init()) return -1; #endif
/* Set head of reclaim list */ memset(qh_list, 0, sizeof(*qh_list)); qh_list->qh_link = cpu_to_hc32((uint32_t)qh_list | QH_LINK_TYPE_QH); - qh_list->qh_endpt1 = cpu_to_hc32((1 << 15) | (USB_SPEED_HIGH << 12)); + qh_list->qh_endpt1 = cpu_to_hc32(QH_ENDPT1_H(1) | + QH_ENDPT1_EPS(USB_SPEED_HIGH)); qh_list->qh_curtd = cpu_to_hc32(QT_NEXT_TERMINATE); qh_list->qh_overlay.qt_next = cpu_to_hc32(QT_NEXT_TERMINATE); qh_list->qh_overlay.qt_altnext = cpu_to_hc32(QT_NEXT_TERMINATE); - qh_list->qh_overlay.qt_token = cpu_to_hc32(0x40); + qh_list->qh_overlay.qt_token = + cpu_to_hc32(QT_TOKEN_STATUS(QT_TOKEN_STATUS_HALTED));
reg = ehci_readl(&hccr->cr_hcsparams); descriptor.hub.bNbrPorts = HCS_N_PORTS(reg); @@ -808,7 +810,7 @@ submit_control_msg(struct usb_device *dev, unsigned long pipe, void *buffer, }
if (usb_pipedevice(pipe) == rootdev) { - if (rootdev == 0) + if (!rootdev) dev->speed = USB_SPEED_HIGH; return ehci_submit_root(dev, pipe, buffer, length, setup); } diff --git u-boot-usb-4f8254e.orig/drivers/usb/host/ehci.h u-boot-usb-4f8254e/drivers/usb/host/ehci.h index 7992983..39acdf9 100644 --- u-boot-usb-4f8254e.orig/drivers/usb/host/ehci.h +++ u-boot-usb-4f8254e/drivers/usb/host/ehci.h @@ -68,7 +68,7 @@ struct ehci_hcor { #define CMD_RESET (1 << 1) /* reset HC not bus */ #define CMD_RUN (1 << 0) /* start/stop HC */ uint32_t or_usbsts; -#define STD_ASS (1 << 15) +#define STS_ASS (1 << 15) #define STS_HALT (1 << 12) uint32_t or_usbintr; #define INTR_UE (1 << 0) /* USB interrupt enable */ @@ -83,11 +83,16 @@ struct ehci_hcor { uint32_t _reserved_0_; uint32_t or_burstsize; uint32_t or_txfilltuning; +#define TXFIFO_THRESH_MASK (0x3f << 16) #define TXFIFO_THRESH(p) ((p & 0x3f) << 16) uint32_t _reserved_1_[6]; uint32_t or_configflag; #define FLAG_CF (1 << 0) /* true: we'll support "high speed" */ uint32_t or_portsc[CONFIG_SYS_USB_EHCI_MAX_ROOT_PORTS]; +#define PORTSC_PSPD(x) (((x) >> 26) & 0x3) +#define PORTSC_PSPD_FS 0x0 +#define PORTSC_PSPD_LS 0x1 +#define PORTSC_PSPD_HS 0x2 uint32_t or_systune; } __attribute__ ((packed, aligned(4)));
@@ -175,6 +180,27 @@ struct qTD { #define QT_NEXT_TERMINATE 1 uint32_t qt_altnext; /* see EHCI 3.5.2 */ uint32_t qt_token; /* see EHCI 3.5.3 */ +#define QT_TOKEN_DT(x) (((x) & 0x1) << 31) /* Data Toggle */ +#define QT_TOKEN_GET_DT(x) (((x) >> 31) & 0x1) +#define QT_TOKEN_TOTALBYTES(x) (((x) & 0x7fff) << 16) /* Total Bytes to Transfer */ +#define QT_TOKEN_GET_TOTALBYTES(x) (((x) >> 16) & 0x7fff) +#define QT_TOKEN_IOC(x) (((x) & 0x1) << 15) /* Interrupt On Complete */ +#define QT_TOKEN_CPAGE(x) (((x) & 0x7) << 12) /* Current Page */ +#define QT_TOKEN_CERR(x) (((x) & 0x3) << 10) /* Error Counter */ +#define QT_TOKEN_PID(x) (((x) & 0x3) << 8) /* PID Code */ +#define QT_TOKEN_PID_OUT 0x0 +#define QT_TOKEN_PID_IN 0x1 +#define QT_TOKEN_PID_SETUP 0x2 +#define QT_TOKEN_STATUS(x) (((x) & 0xff) << 0) /* Status */ +#define QT_TOKEN_GET_STATUS(x) (((x) >> 0) & 0xff) +#define QT_TOKEN_STATUS_ACTIVE 0x80 +#define QT_TOKEN_STATUS_HALTED 0x40 +#define QT_TOKEN_STATUS_DATBUFERR 0x20 +#define QT_TOKEN_STATUS_BABBLEDET 0x10 +#define QT_TOKEN_STATUS_XACTERR 0x08 +#define QT_TOKEN_STATUS_MISSEDUFRAME 0x04 +#define QT_TOKEN_STATUS_SPLITXSTATE 0x02 +#define QT_TOKEN_STATUS_PERR 0x01 #define QT_BUFFER_CNT 5 uint32_t qt_buffer[QT_BUFFER_CNT]; /* see EHCI 3.5.4 */ uint32_t qt_buffer_hi[QT_BUFFER_CNT]; /* Appendix B */ @@ -182,6 +208,8 @@ struct qTD { uint32_t unused[3]; };
+#define EHCI_PAGE_SIZE 4096 + /* Queue Head (QH). */ struct QH { uint32_t qh_link; @@ -191,7 +219,26 @@ struct QH { #define QH_LINK_TYPE_SITD 4 #define QH_LINK_TYPE_FSTN 6 uint32_t qh_endpt1; +#define QH_ENDPT1_RL(x) (((x) & 0xf) << 28) /* NAK Count Reload */ +#define QH_ENDPT1_C(x) (((x) & 0x1) << 27) /* Control Endpoint Flag */ +#define QH_ENDPT1_MAXPKTLEN(x) (((x) & 0x7ff) << 16) /* Maximum Packet Length */ +#define QH_ENDPT1_H(x) (((x) & 0x1) << 15) /* Head of Reclamation List Flag */ +#define QH_ENDPT1_DTC(x) (((x) & 0x1) << 14) /* Data Toggle Control */ +#define QH_ENDPT1_DTC_IGNORE_QTD_TD 0x0 +#define QH_ENDPT1_DTC_DT_FROM_QTD 0x1 +#define QH_ENDPT1_EPS(x) (((x) & 0x3) << 12) /* Endpoint Speed */ +#define QH_ENDPT1_EPS_FS 0x0 +#define QH_ENDPT1_EPS_LS 0x1 +#define QH_ENDPT1_EPS_HS 0x2 +#define QH_ENDPT1_ENDPT(x) (((x) & 0xf) << 8) /* Endpoint Number */ +#define QH_ENDPT1_I(x) (((x) & 0x1) << 7) /* Inactivate on Next Transaction */ +#define QH_ENDPT1_DEVADDR(x) (((x) & 0x7f) << 0) /* Device Address */ uint32_t qh_endpt2; +#define QH_ENDPT2_MULT(x) (((x) & 0x3) << 30) /* High-Bandwidth Pipe Multiplier */ +#define QH_ENDPT2_PORTNUM(x) (((x) & 0x7f) << 23) /* Port Number */ +#define QH_ENDPT2_HUBADDR(x) (((x) & 0x7f) << 16) /* Hub Address */ +#define QH_ENDPT2_UFCMASK(x) (((x) & 0xff) << 8) /* Split Completion Mask */ +#define QH_ENDPT2_UFSMASK(x) (((x) & 0xff) << 0) /* Interrupt Schedule Mask */ uint32_t qh_curtd; struct qTD qh_overlay; /*

This patch takes advantage of the hardware EHCI qTD queuing mechanism to avoid software and transfer splitting overhead so as to make transfers as fast as possible.
The only drawback is a call to memalign. However, this is fast compared to the transfer timings, and the heap size to allocate is small, e.g. 128 kiB in the worst case for a transfer length of 65535 packets of 512 bytes.
Tested on i.MX25, i.MX35 and i.MX51. In my test conditions, the speed gain was very significant (several times faster), which is really appreciable when accessing large files.
Signed-off-by: Benoît Thébaudeau benoit.thebaudeau@advansee.com Cc: Marek Vasut marex@denx.de Cc: Ilya Yanok ilya.yanok@cogentembedded.com Cc: Stefan Herbrechtsmeier stefan@herbrechtsmeier.net --- Changes for v2: - Use DIV_ROUND_UP to make code more readable. Changes for v3: - Remove extra unalignment term in the computation of qtd_count. - Fix ZLP support. - Add #warning if CONFIG_SYS_MALLOC_LEN is likely to be too small. - Make code more readable. - Add comments. Changes for v4: None.
.../drivers/usb/host/ehci-hcd.c | 167 ++++++++++++++++---- 1 file changed, 138 insertions(+), 29 deletions(-)
diff --git u-boot-usb-4f8254e.orig/drivers/usb/host/ehci-hcd.c u-boot-usb-4f8254e/drivers/usb/host/ehci-hcd.c index 00a155b..a0ef5db 100644 --- u-boot-usb-4f8254e.orig/drivers/usb/host/ehci-hcd.c +++ u-boot-usb-4f8254e/drivers/usb/host/ehci-hcd.c @@ -208,8 +208,8 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, int length, struct devrequest *req) { ALLOC_ALIGN_BUFFER(struct QH, qh, 1, USB_DMA_MINALIGN); -#define QTD_COUNT 3 - ALLOC_ALIGN_BUFFER(struct qTD, qtd, QTD_COUNT, USB_DMA_MINALIGN); + struct qTD *qtd; + int qtd_count = 0; int qtd_counter = 0;
volatile struct qTD *vtd; @@ -230,8 +230,74 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, le16_to_cpu(req->value), le16_to_cpu(req->value), le16_to_cpu(req->index));
+ /* + * The USB transfer is split into qTD transfers. Eeach qTD transfer is + * described by a transfer descriptor (the qTD). The qTDs form a linked + * list with a queue head (QH). + * + * Each qTD transfer starts with a new USB packet, i.e. a packet cannot + * have its beginning in a qTD transfer and its end in the following + * one, so the qTD transfer lengths have to be chosen accordingly. + * + * Each qTD transfer uses up to QT_BUFFER_CNT data buffers, mapped to + * single pages. The first data buffer can start at any offset within a + * page (not considering the cache-line alignment issues), while the + * following buffers must be page-aligned. There is no alignment + * constraint on the size of a qTD transfer. + */ + if (req != NULL) + /* 1 qTD will be needed for SETUP, and 1 for ACK. */ + qtd_count += 1 + 1; + if (length > 0 || req == NULL) { + /* + * Determine the qTD transfer size that will be used for the + * data payload (not considering the final qTD transfer, which + * may be shorter). + * + * In order to keep each packet within a qTD transfer, the qTD + * transfer size is aligned to EHCI_PAGE_SIZE, which is a + * multiple of wMaxPacketSize (except in some cases for + * interrupt transfers, see comment in submit_int_msg()). + * + * By default, i.e. if the input buffer is page-aligned, + * QT_BUFFER_CNT full pages will be used. + */ + int xfr_sz = QT_BUFFER_CNT; + /* + * However, if the input buffer is not page-aligned, the qTD + * transfer size will be one page shorter, and the first qTD + * data buffer of each transfer will be page-unaligned. + */ + if ((uint32_t)buffer & (EHCI_PAGE_SIZE - 1)) + xfr_sz--; + /* Convert the qTD transfer size to bytes. */ + xfr_sz *= EHCI_PAGE_SIZE; + /* + * Determine the number of qTDs that will be required for the + * data payload. This value has to be rounded up since the final + * qTD transfer may be shorter than the regular qTD transfer + * size that has just been computed. + */ + qtd_count += DIV_ROUND_UP(length, xfr_sz); + /* ZLPs also need a qTD. */ + if (!qtd_count) + qtd_count++; + } +/* + * Threshold value based on the worst-case total size of the qTDs to allocate + * for a mass-storage transfer of 65535 blocks of 512 bytes. + */ +#if CONFIG_SYS_MALLOC_LEN <= 128 * 1024 +#warning CONFIG_SYS_MALLOC_LEN may be too small for EHCI +#endif + qtd = memalign(USB_DMA_MINALIGN, qtd_count * sizeof(struct qTD)); + if (qtd == NULL) { + printf("unable to allocate TDs\n"); + return -1; + } + memset(qh, 0, sizeof(struct QH)); - memset(qtd, 0, QTD_COUNT * sizeof(*qtd)); + memset(qtd, 0, qtd_count * sizeof(*qtd));
toggle = usb_gettoggle(dev, usb_pipeendpoint(pipe), usb_pipeout(pipe));
@@ -290,30 +356,64 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, }
if (length > 0 || req == NULL) { - /* - * Setup request qTD (3.5 in ehci-r10.pdf) - * - * qt_next ................ 03-00 H - * qt_altnext ............. 07-04 H - * qt_token ............... 0B-08 H - * - * [ buffer, buffer_hi ] loaded with "buffer". - */ - qtd[qtd_counter].qt_next = cpu_to_hc32(QT_NEXT_TERMINATE); - qtd[qtd_counter].qt_altnext = cpu_to_hc32(QT_NEXT_TERMINATE); - token = QT_TOKEN_DT(toggle) | QT_TOKEN_TOTALBYTES(length) | - QT_TOKEN_IOC(req == NULL) | QT_TOKEN_CPAGE(0) | - QT_TOKEN_CERR(3) | QT_TOKEN_PID(usb_pipein(pipe) ? - QT_TOKEN_PID_IN : QT_TOKEN_PID_OUT) | - QT_TOKEN_STATUS(QT_TOKEN_STATUS_ACTIVE); - qtd[qtd_counter].qt_token = cpu_to_hc32(token); - if (ehci_td_buffer(&qtd[qtd_counter], buffer, length)) { - printf("unable to construct DATA TD\n"); - goto fail; - } - /* Update previous qTD! */ - *tdp = cpu_to_hc32((uint32_t)&qtd[qtd_counter]); - tdp = &qtd[qtd_counter++].qt_next; + uint8_t *buf_ptr = buffer; + int left_length = length; + + do { + /* + * Determine the size of this qTD transfer. By default, + * QT_BUFFER_CNT full pages can be used. + */ + int xfr_bytes = QT_BUFFER_CNT * EHCI_PAGE_SIZE; + /* + * However, if the input buffer is not page-aligned, the + * portion of the first page before the buffer start + * offset within that page is unusable. + */ + xfr_bytes -= (uint32_t)buf_ptr & (EHCI_PAGE_SIZE - 1); + /* + * In order to keep each packet within a qTD transfer, + * align the qTD transfer size to EHCI_PAGE_SIZE. + */ + xfr_bytes &= ~(EHCI_PAGE_SIZE - 1); + /* + * This transfer may be shorter than the available qTD + * transfer size that has just been computed. + */ + xfr_bytes = min(xfr_bytes, left_length); + + /* + * Setup request qTD (3.5 in ehci-r10.pdf) + * + * qt_next ................ 03-00 H + * qt_altnext ............. 07-04 H + * qt_token ............... 0B-08 H + * + * [ buffer, buffer_hi ] loaded with "buffer". + */ + qtd[qtd_counter].qt_next = + cpu_to_hc32(QT_NEXT_TERMINATE); + qtd[qtd_counter].qt_altnext = + cpu_to_hc32(QT_NEXT_TERMINATE); + token = QT_TOKEN_DT(toggle) | + QT_TOKEN_TOTALBYTES(xfr_bytes) | + QT_TOKEN_IOC(req == NULL) | QT_TOKEN_CPAGE(0) | + QT_TOKEN_CERR(3) | + QT_TOKEN_PID(usb_pipein(pipe) ? + QT_TOKEN_PID_IN : QT_TOKEN_PID_OUT) | + QT_TOKEN_STATUS(QT_TOKEN_STATUS_ACTIVE); + qtd[qtd_counter].qt_token = cpu_to_hc32(token); + if (ehci_td_buffer(&qtd[qtd_counter], buf_ptr, + xfr_bytes)) { + printf("unable to construct DATA TD\n"); + goto fail; + } + /* Update previous qTD! */ + *tdp = cpu_to_hc32((uint32_t)&qtd[qtd_counter]); + tdp = &qtd[qtd_counter++].qt_next; + buf_ptr += xfr_bytes; + left_length -= xfr_bytes; + } while (left_length > 0); }
if (req != NULL) { @@ -344,7 +444,7 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, ALIGN_END_ADDR(struct QH, qh_list, 1)); flush_dcache_range((uint32_t)qh, ALIGN_END_ADDR(struct QH, qh, 1)); flush_dcache_range((uint32_t)qtd, - ALIGN_END_ADDR(struct qTD, qtd, QTD_COUNT)); + ALIGN_END_ADDR(struct qTD, qtd, qtd_count));
/* Set async. queue head pointer. */ ehci_writel(&hcor->or_asynclistaddr, (uint32_t)qh_list); @@ -375,7 +475,7 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, invalidate_dcache_range((uint32_t)qh, ALIGN_END_ADDR(struct QH, qh, 1)); invalidate_dcache_range((uint32_t)qtd, - ALIGN_END_ADDR(struct qTD, qtd, QTD_COUNT)); + ALIGN_END_ADDR(struct qTD, qtd, qtd_count));
token = hc32_to_cpu(vtd->qt_token); if (!(QT_TOKEN_GET_STATUS(token) & QT_TOKEN_STATUS_ACTIVE)) @@ -448,9 +548,11 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, ehci_readl(&hcor->or_portsc[1])); }
+ free(qtd); return (dev->status != USB_ST_NOT_PROC) ? 0 : -1;
fail: + free(qtd); return -1; }
@@ -827,6 +929,13 @@ submit_int_msg(struct usb_device *dev, unsigned long pipe, void *buffer, /* * Interrupt transfers requiring several transactions are not supported * because bInterval is ignored. + * + * Also, ehci_submit_async() relies on wMaxPacketSize being a power of 2 + * if several qTDs are required, while the USB specification does not + * constrain this for interrupt transfers. That means that + * ehci_submit_async() would support interrupt transfers requiring + * several transactions only as long as the transfer size does not + * require more than a single qTD. */ if (length > usb_maxpacket(dev, pipe)) { printf("%s: Interrupt transfers requiring several transactions "

Benoît Thébaudeau Application Engineer benoit.thebaudeau@advansee.com +33 2 40 50 21 73 ADVANSEE SARL 9, rue Alfred Kastler CS 30750 44307 Nantes CEDEX 3 France
----- Original Message -----
From: "Benoît Thébaudeau" benoit.thebaudeau@advansee.com To: u-boot@lists.denx.de Cc: "Marek Vasut" marex@denx.de, "Ilya Yanok" ilya.yanok@cogentembedded.com, "Stefan Herbrechtsmeier" stefan@herbrechtsmeier.net Sent: Friday, August 10, 2012 6:21:37 PM Subject: [PATCH v4 0/7] ehci: Improve performance
Hi all,
This series aims at improving EHCI performance. There is also some code cleanup BTW.
Best regards, Benoît

Now that the EHCI driver allocates its qTDs from the heap, the MSC driver is only limited by the SCSI commands it uses.
Signed-off-by: Benoît Thébaudeau benoit.thebaudeau@advansee.com Cc: Marek Vasut marex@denx.de Cc: Ilya Yanok ilya.yanok@cogentembedded.com Cc: Stefan Herbrechtsmeier stefan@herbrechtsmeier.net --- Changes for v2: None. Changes for v3: - Patch swapped with the currently preceding one. Changes for v4: None.
.../common/usb_storage.c | 33 +++++++++----------- 1 file changed, 15 insertions(+), 18 deletions(-)
diff --git u-boot-usb-4f8254e.orig/common/usb_storage.c u-boot-usb-4f8254e/common/usb_storage.c index 0cd6399..822bd64 100644 --- u-boot-usb-4f8254e.orig/common/usb_storage.c +++ u-boot-usb-4f8254e/common/usb_storage.c @@ -157,12 +157,13 @@ struct us_data {
#ifdef CONFIG_USB_EHCI /* - * The U-Boot EHCI driver cannot handle more than 5 page aligned buffers - * of 4096 bytes in a transfer without running itself out of qt_buffers + * The U-Boot EHCI driver can handle any transfer length as long as there is + * enough free heap space left, but the SCSI READ(10) and WRITE(10) commands are + * limited to 65535 bytes. */ -#define USB_MAX_XFER_BLK(start, blksz) (((4096 * 5) - (start % 4096)) / blksz) +#define USB_MAX_XFER_BLK 65535 #else -#define USB_MAX_XFER_BLK(start, blksz) 20 +#define USB_MAX_XFER_BLK 20 #endif
static struct us_data usb_stor[USB_MAX_STOR_DEV]; @@ -1050,7 +1051,7 @@ static void usb_bin_fixup(struct usb_device_descriptor descriptor, unsigned long usb_stor_read(int device, unsigned long blknr, unsigned long blkcnt, void *buffer) { - unsigned long start, blks, buf_addr, max_xfer_blk; + unsigned long start, blks, buf_addr; unsigned short smallblks; struct usb_device *dev; struct us_data *ss; @@ -1092,14 +1093,12 @@ unsigned long usb_stor_read(int device, unsigned long blknr, /* XXX need some comment here */ retry = 2; srb->pdata = (unsigned char *)buf_addr; - max_xfer_blk = USB_MAX_XFER_BLK(buf_addr, - usb_dev_desc[device].blksz); - if (blks > max_xfer_blk) - smallblks = (unsigned short) max_xfer_blk; + if (blks > USB_MAX_XFER_BLK) + smallblks = USB_MAX_XFER_BLK; else smallblks = (unsigned short) blks; retry_it: - if (smallblks == max_xfer_blk) + if (smallblks == USB_MAX_XFER_BLK) usb_show_progress(); srb->datalen = usb_dev_desc[device].blksz * smallblks; srb->pdata = (unsigned char *)buf_addr; @@ -1120,7 +1119,7 @@ retry_it: start, smallblks, buf_addr);
usb_disable_asynch(0); /* asynch transfer allowed */ - if (blkcnt >= max_xfer_blk) + if (blkcnt >= USB_MAX_XFER_BLK) debug("\n"); return blkcnt; } @@ -1128,7 +1127,7 @@ retry_it: unsigned long usb_stor_write(int device, unsigned long blknr, unsigned long blkcnt, const void *buffer) { - unsigned long start, blks, buf_addr, max_xfer_blk; + unsigned long start, blks, buf_addr; unsigned short smallblks; struct usb_device *dev; struct us_data *ss; @@ -1173,14 +1172,12 @@ unsigned long usb_stor_write(int device, unsigned long blknr, */ retry = 2; srb->pdata = (unsigned char *)buf_addr; - max_xfer_blk = USB_MAX_XFER_BLK(buf_addr, - usb_dev_desc[device].blksz); - if (blks > max_xfer_blk) - smallblks = (unsigned short) max_xfer_blk; + if (blks > USB_MAX_XFER_BLK) + smallblks = USB_MAX_XFER_BLK; else smallblks = (unsigned short) blks; retry_it: - if (smallblks == max_xfer_blk) + if (smallblks == USB_MAX_XFER_BLK) usb_show_progress(); srb->datalen = usb_dev_desc[device].blksz * smallblks; srb->pdata = (unsigned char *)buf_addr; @@ -1201,7 +1198,7 @@ retry_it: start, smallblks, buf_addr);
usb_disable_asynch(0); /* asynch transfer allowed */ - if (blkcnt >= max_xfer_blk) + if (blkcnt >= USB_MAX_XFER_BLK) debug("\n"); return blkcnt;

Hi Benoit,
On Fri, Aug 10, 2012 at 8:23 PM, Benoît Thébaudeau < benoit.thebaudeau@advansee.com> wrote:
diff --git u-boot-usb-4f8254e.orig/common/usb_storage.c u-boot-usb-4f8254e/common/usb_storage.c index 0cd6399..822bd64 100644 --- u-boot-usb-4f8254e.orig/common/usb_storage.c +++ u-boot-usb-4f8254e/common/usb_storage.c @@ -157,12 +157,13 @@ struct us_data {
#ifdef CONFIG_USB_EHCI /*
- The U-Boot EHCI driver cannot handle more than 5 page aligned buffers
- of 4096 bytes in a transfer without running itself out of qt_buffers
- The U-Boot EHCI driver can handle any transfer length as long as there
is
- enough free heap space left, but the SCSI READ(10) and WRITE(10)
commands are
- limited to 65535 bytes.
bytes?
*/ -#define USB_MAX_XFER_BLK(start, blksz) (((4096 * 5) - (start % 4096)) / blksz) +#define USB_MAX_XFER_BLK 65535
But here you limit it to 65535 _blocks_, right? One of the two should be wrong ;)
Regards, Ilya.

Hi Ilya,
On Fri, Aug 10, 2012 at 8:34:14 PM, Ilya Yanok wrote:
Hi Benoit,
On Fri, Aug 10, 2012 at 8:23 PM, Benoît Thébaudeau < benoit.thebaudeau@advansee.com > wrote:
diff --git u-boot-usb-4f8254e.orig/common/usb_storage.c u-boot-usb-4f8254e/common/usb_storage.c
index 0cd6399..822bd64 100644
--- u-boot-usb-4f8254e.orig/common/usb_storage.c
+++ u-boot-usb-4f8254e/common/usb_storage.c
@@ -157,12 +157,13 @@ struct us_data {
#ifdef CONFIG_USB_EHCI
/*
- The U-Boot EHCI driver cannot handle more than 5 page aligned
buffers
- of 4096 bytes in a transfer without running itself out of
qt_buffers
- The U-Boot EHCI driver can handle any transfer length as long
as there is
- enough free heap space left, but the SCSI READ(10) and
WRITE(10) commands are
- limited to 65535 bytes.
bytes?
*/
-#define USB_MAX_XFER_BLK(start, blksz) (((4096 * 5) - (start % 4096)) / blksz)
+#define USB_MAX_XFER_BLK 65535
But here you limit it to 65535 _blocks_, right? One of the two should be wrong ;)
Argh, it was a typo in the comment. Thanks for catching this. I meant "blocks" of course. Marek, can you fix this comment on-the-fly when applying?
Best regards, Benoît

Dear Benoît Thébaudeau,
Hi Ilya,
On Fri, Aug 10, 2012 at 8:34:14 PM, Ilya Yanok wrote:
Hi Benoit,
On Fri, Aug 10, 2012 at 8:23 PM, Benoît Thébaudeau <
benoit.thebaudeau@advansee.com > wrote:
diff --git u-boot-usb-4f8254e.orig/common/usb_storage.c u-boot-usb-4f8254e/common/usb_storage.c
index 0cd6399..822bd64 100644
--- u-boot-usb-4f8254e.orig/common/usb_storage.c
+++ u-boot-usb-4f8254e/common/usb_storage.c
@@ -157,12 +157,13 @@ struct us_data {
#ifdef CONFIG_USB_EHCI
/*
- The U-Boot EHCI driver cannot handle more than 5 page aligned
buffers
- of 4096 bytes in a transfer without running itself out of
qt_buffers
- The U-Boot EHCI driver can handle any transfer length as long
as there is
- enough free heap space left, but the SCSI READ(10) and
WRITE(10) commands are
- limited to 65535 bytes.
bytes?
*/
-#define USB_MAX_XFER_BLK(start, blksz) (((4096 * 5) - (start % 4096)) / blksz)
+#define USB_MAX_XFER_BLK 65535
But here you limit it to 65535 _blocks_, right? One of the two should be wrong ;)
Argh, it was a typo in the comment. Thanks for catching this. I meant "blocks" of course. Marek, can you fix this comment on-the-fly when applying?
Roger, will do!
Best regards, Benoît
Best regards, Marek Vasut

Adjust time-out value for the new EHCI mechanism.
Signed-off-by: Benoît Thébaudeau benoit.thebaudeau@advansee.com Cc: Marek Vasut marex@denx.de Cc: Ilya Yanok ilya.yanok@cogentembedded.com Cc: Stefan Herbrechtsmeier stefan@herbrechtsmeier.net --- Changes for v2: None. Changes for v3: None. Changes for v4: None.
.../common/usb_storage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git u-boot-usb-4f8254e.orig/common/usb_storage.c u-boot-usb-4f8254e/common/usb_storage.c index 822bd64..f0798b2 100644 --- u-boot-usb-4f8254e.orig/common/usb_storage.c +++ u-boot-usb-4f8254e/common/usb_storage.c @@ -712,7 +712,7 @@ int usb_stor_BBB_transport(ccb *srb, struct us_data *us) else pipe = pipeout; result = usb_bulk_msg(us->pusb_dev, pipe, srb->pdata, srb->datalen, - &data_actlen, USB_CNTL_TIMEOUT * 5); + &data_actlen, USB_CNTL_TIMEOUT * 100); /* special handling of STALL in DATA phase */ if ((result < 0) && (us->pusb_dev->status & USB_ST_STALLED)) { USB_STOR_PRINTF("DATA:stall\n");

Hi Benoit,
On Fri, Aug 10, 2012 at 8:23 PM, Benoît Thébaudeau < benoit.thebaudeau@advansee.com> wrote:
Adjust time-out value for the new EHCI mechanism.
Could you please be a bit more specific? ;)
How this timeout is related to the new mechanism? Is it really EHCI specific? If it is, that's hardcoding of lower layer details again, I think that's undesirable...
But generally this series looks really good. Thanks a lot!
Regards, Ilya.

Hi Ilya,
On Fri, Aug 10, 2012 at 8:03:12 PM, Ilya Yanok wrote:
Hi Benoit,
On Fri, Aug 10, 2012 at 8:23 PM, Benoît Thébaudeau < benoit.thebaudeau@advansee.com > wrote:
Adjust time-out value for the new EHCI mechanism.
Could you please be a bit more specific? ;)
How this timeout is related to the new mechanism? Is it really EHCI specific? If it is, that's hardcoding of lower layer details again, I think that's undesirable...
Well, I did this specific patch a very long time ago, and I don't remember the details. I know that things did not work without it in my test conditions at that time. I've just run again all my tests with the current code on all my platforms without this patch, and everything works fine. So it was perhaps a device-related issue rather than an EHCI-related one. Since the rationale for this patch is no longer clear and things work fine without it, we can probably drop it. I let you and Marek decide.
But generally this series looks really good. Thanks a lot!
Great. You're welcome.
Best regards, Benoît

The commit 5dd95cf made the MSC driver EHCI-specific. This patch restores a basic support of non-EHCI HCDs, like before that commit.
The fallback transfer size is certainly not optimal, but at least it should work like before.
Signed-off-by: Benoît Thébaudeau benoit.thebaudeau@advansee.com Cc: Marek Vasut marex@denx.de Cc: Ilya Yanok ilya.yanok@cogentembedded.com Cc: Stefan Herbrechtsmeier stefan@herbrechtsmeier.net --- Changes for v2: None. Changes for v3: - Patch swapped with the currently following one. Changes for v4: None.
.../common/usb_storage.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git u-boot-usb-4f8254e.orig/common/usb_storage.c u-boot-usb-4f8254e/common/usb_storage.c index bdc306f..0cd6399 100644 --- u-boot-usb-4f8254e.orig/common/usb_storage.c +++ u-boot-usb-4f8254e/common/usb_storage.c @@ -155,11 +155,15 @@ struct us_data { trans_cmnd transport; /* transport routine */ };
+#ifdef CONFIG_USB_EHCI /* * The U-Boot EHCI driver cannot handle more than 5 page aligned buffers * of 4096 bytes in a transfer without running itself out of qt_buffers */ #define USB_MAX_XFER_BLK(start, blksz) (((4096 * 5) - (start % 4096)) / blksz) +#else +#define USB_MAX_XFER_BLK(start, blksz) 20 +#endif
static struct us_data usb_stor[USB_MAX_STOR_DEV];

There is a 5-ms delay in usb_stor_BBB_transport, which occurs every 10 kiB of data for fragmented fatload usb, i.e. roughly 500 ms of delay per MiB. This adds up to quite a bit of delay if you're loading a large ramdisk.
The purpose of this delay should be to debounce the 5-V/100-mA USB power up. This patch skips the delay if the device has already been queried as ready.
Signed-off-by: Jim Shimer mgi2475@motorola.com
Rework following the review: - Rebase against the latest u-boot-usb master. - Replace typedef with #define. - Use the existing flags struct field instead of adding a new field. - Remove the setter function. - Remove the typecasts. Signed-off-by: Benoît Thébaudeau benoit.thebaudeau@advansee.com
Cc: Marek Vasut marex@denx.de Cc: Ilya Yanok ilya.yanok@cogentembedded.com Cc: Stefan Herbrechtsmeier stefan@herbrechtsmeier.net Cc: Jim Shimer mgi2475@motorola.com --- Changes for v2: N/A. Changes for v3: - New patch. Changes for v4: None.
.../common/usb_storage.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-)
diff --git u-boot-usb-4f8254e.orig/common/usb_storage.c u-boot-usb-4f8254e/common/usb_storage.c index f0798b2..b000f09 100644 --- u-boot-usb-4f8254e.orig/common/usb_storage.c +++ u-boot-usb-4f8254e/common/usb_storage.c @@ -136,6 +136,7 @@ struct us_data { struct usb_device *pusb_dev; /* this usb_device */
unsigned int flags; /* from filter initially */ +# define USB_READY (1 << 0) unsigned char ifnum; /* interface number */ unsigned char ep_in; /* in endpoint */ unsigned char ep_out; /* out ....... */ @@ -698,7 +699,8 @@ int usb_stor_BBB_transport(ccb *srb, struct us_data *us) usb_stor_BBB_reset(us); return USB_STOR_TRANSPORT_FAILED; } - mdelay(5); + if (!(us->flags & USB_READY)) + mdelay(5); pipein = usb_rcvbulkpipe(us->pusb_dev, us->ep_in); pipeout = usb_sndbulkpipe(us->pusb_dev, us->ep_out); /* DATA phase + error handling */ @@ -963,8 +965,10 @@ static int usb_test_unit_ready(ccb *srb, struct us_data *ss) srb->cmd[1] = srb->lun << 5; srb->datalen = 0; srb->cmdlen = 12; - if (ss->transport(srb, ss) == USB_STOR_TRANSPORT_GOOD) + if (ss->transport(srb, ss) == USB_STOR_TRANSPORT_GOOD) { + ss->flags |= USB_READY; return 0; + } usb_request_sense(srb, ss); mdelay(100); } while (retries--); @@ -1114,6 +1118,7 @@ retry_it: blks -= smallblks; buf_addr += srb->datalen; } while (blks != 0); + ss->flags &= ~USB_READY;
USB_STOR_PRINTF("usb_read: end startblk %lx, blccnt %x buffer %lx\n", start, smallblks, buf_addr); @@ -1193,6 +1198,7 @@ retry_it: blks -= smallblks; buf_addr += srb->datalen; } while (blks != 0); + ss->flags &= ~USB_READY;
USB_STOR_PRINTF("usb_write: end startblk %lx, blccnt %x buffer %lx\n", start, smallblks, buf_addr); @@ -1404,6 +1410,7 @@ int usb_stor_get_info(struct usb_device *dev, struct us_data *ss, cap[0] = 2880; cap[1] = 0x200; } + ss->flags &= ~USB_READY; USB_STOR_PRINTF("Read Capacity returns: 0x%lx, 0x%lx\n", cap[0], cap[1]); #if 0

Relax the qTD transfer alignment constraints in order to need less qTDs for buffers that are aligned to 512 bytes but not to pages.
Signed-off-by: Benoît Thébaudeau benoit.thebaudeau@advansee.com Cc: Marek Vasut marex@denx.de Cc: Ilya Yanok ilya.yanok@cogentembedded.com Cc: Stefan Herbrechtsmeier stefan@herbrechtsmeier.net --- Changes for v2: N/A. Changes for v3: - New patch. Changes for v4: - Optimize away the qtd_toggle variable.
.../drivers/usb/host/ehci-hcd.c | 67 +++++++++++--------- 1 file changed, 37 insertions(+), 30 deletions(-)
diff --git u-boot-usb-4f8254e.orig/drivers/usb/host/ehci-hcd.c u-boot-usb-4f8254e/drivers/usb/host/ehci-hcd.c index a0ef5db..18b4bc6 100644 --- u-boot-usb-4f8254e.orig/drivers/usb/host/ehci-hcd.c +++ u-boot-usb-4f8254e/drivers/usb/host/ehci-hcd.c @@ -215,7 +215,7 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, volatile struct qTD *vtd; unsigned long ts; uint32_t *tdp; - uint32_t endpt, token, usbsts; + uint32_t endpt, maxpacket, token, usbsts; uint32_t c, toggle; uint32_t cmd; int timeout; @@ -230,6 +230,7 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, le16_to_cpu(req->value), le16_to_cpu(req->value), le16_to_cpu(req->index));
+#define PKT_ALIGN 512 /* * The USB transfer is split into qTD transfers. Eeach qTD transfer is * described by a transfer descriptor (the qTD). The qTDs form a linked @@ -251,43 +252,41 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, if (length > 0 || req == NULL) { /* * Determine the qTD transfer size that will be used for the - * data payload (not considering the final qTD transfer, which - * may be shorter). + * data payload (not considering the first qTD transfer, which + * may be longer or shorter, and the final one, which may be + * shorter). * * In order to keep each packet within a qTD transfer, the qTD - * transfer size is aligned to EHCI_PAGE_SIZE, which is a - * multiple of wMaxPacketSize (except in some cases for - * interrupt transfers, see comment in submit_int_msg()). + * transfer size is aligned to PKT_ALIGN, which is a multiple of + * wMaxPacketSize (except in some cases for interrupt transfers, + * see comment in submit_int_msg()). * - * By default, i.e. if the input buffer is page-aligned, + * By default, i.e. if the input buffer is aligned to PKT_ALIGN, * QT_BUFFER_CNT full pages will be used. */ int xfr_sz = QT_BUFFER_CNT; /* - * However, if the input buffer is not page-aligned, the qTD - * transfer size will be one page shorter, and the first qTD + * However, if the input buffer is not aligned to PKT_ALIGN, the + * qTD transfer size will be one page shorter, and the first qTD * data buffer of each transfer will be page-unaligned. */ - if ((uint32_t)buffer & (EHCI_PAGE_SIZE - 1)) + if ((uint32_t)buffer & (PKT_ALIGN - 1)) xfr_sz--; /* Convert the qTD transfer size to bytes. */ xfr_sz *= EHCI_PAGE_SIZE; /* - * Determine the number of qTDs that will be required for the - * data payload. This value has to be rounded up since the final - * qTD transfer may be shorter than the regular qTD transfer - * size that has just been computed. + * Approximate by excess the number of qTDs that will be + * required for the data payload. The exact formula is way more + * complicated and saves at most 2 qTDs, i.e. a total of 128 + * bytes. */ - qtd_count += DIV_ROUND_UP(length, xfr_sz); - /* ZLPs also need a qTD. */ - if (!qtd_count) - qtd_count++; + qtd_count += 2 + length / xfr_sz; } /* - * Threshold value based on the worst-case total size of the qTDs to allocate - * for a mass-storage transfer of 65535 blocks of 512 bytes. + * Threshold value based on the worst-case total size of the allocated qTDs for + * a mass-storage transfer of 65535 blocks of 512 bytes. */ -#if CONFIG_SYS_MALLOC_LEN <= 128 * 1024 +#if CONFIG_SYS_MALLOC_LEN <= 64 + 128 * 1024 #warning CONFIG_SYS_MALLOC_LEN may be too small for EHCI #endif qtd = memalign(USB_DMA_MINALIGN, qtd_count * sizeof(struct qTD)); @@ -313,8 +312,9 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, */ qh->qh_link = cpu_to_hc32((uint32_t)qh_list | QH_LINK_TYPE_QH); c = usb_pipespeed(pipe) != USB_SPEED_HIGH && !usb_pipeendpoint(pipe); + maxpacket = usb_maxpacket(dev, pipe); endpt = QH_ENDPT1_RL(8) | QH_ENDPT1_C(c) | - QH_ENDPT1_MAXPKTLEN(usb_maxpacket(dev, pipe)) | QH_ENDPT1_H(0) | + QH_ENDPT1_MAXPKTLEN(maxpacket) | QH_ENDPT1_H(0) | QH_ENDPT1_DTC(QH_ENDPT1_DTC_DT_FROM_QTD) | QH_ENDPT1_EPS(usb_pipespeed(pipe)) | QH_ENDPT1_ENDPT(usb_pipeendpoint(pipe)) | QH_ENDPT1_I(0) | @@ -373,9 +373,9 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, xfr_bytes -= (uint32_t)buf_ptr & (EHCI_PAGE_SIZE - 1); /* * In order to keep each packet within a qTD transfer, - * align the qTD transfer size to EHCI_PAGE_SIZE. + * align the qTD transfer size to PKT_ALIGN. */ - xfr_bytes &= ~(EHCI_PAGE_SIZE - 1); + xfr_bytes &= ~(PKT_ALIGN - 1); /* * This transfer may be shorter than the available qTD * transfer size that has just been computed. @@ -411,6 +411,13 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, /* Update previous qTD! */ *tdp = cpu_to_hc32((uint32_t)&qtd[qtd_counter]); tdp = &qtd[qtd_counter++].qt_next; + /* + * Data toggle has to be adjusted since the qTD transfer + * size is not always an even multiple of + * wMaxPacketSize. + */ + if ((xfr_bytes / maxpacket) & 1) + toggle ^= 1; buf_ptr += xfr_bytes; left_length -= xfr_bytes; } while (left_length > 0); @@ -426,7 +433,7 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, */ qtd[qtd_counter].qt_next = cpu_to_hc32(QT_NEXT_TERMINATE); qtd[qtd_counter].qt_altnext = cpu_to_hc32(QT_NEXT_TERMINATE); - token = QT_TOKEN_DT(toggle) | QT_TOKEN_TOTALBYTES(0) | + token = QT_TOKEN_DT(1) | QT_TOKEN_TOTALBYTES(0) | QT_TOKEN_IOC(1) | QT_TOKEN_CPAGE(0) | QT_TOKEN_CERR(3) | QT_TOKEN_PID(usb_pipein(pipe) ? QT_TOKEN_PID_OUT : QT_TOKEN_PID_IN) | @@ -931,11 +938,11 @@ submit_int_msg(struct usb_device *dev, unsigned long pipe, void *buffer, * because bInterval is ignored. * * Also, ehci_submit_async() relies on wMaxPacketSize being a power of 2 - * if several qTDs are required, while the USB specification does not - * constrain this for interrupt transfers. That means that - * ehci_submit_async() would support interrupt transfers requiring - * several transactions only as long as the transfer size does not - * require more than a single qTD. + * <= PKT_ALIGN if several qTDs are required, while the USB + * specification does not constrain this for interrupt transfers. That + * means that ehci_submit_async() would support interrupt transfers + * requiring several transactions only as long as the transfer size does + * not require more than a single qTD. */ if (length > usb_maxpacket(dev, pipe)) { printf("%s: Interrupt transfers requiring several transactions "

Dear Benoît Thébaudeau,
Hi all,
This series aims at improving EHCI performance. There is also some code cleanup BTW.
Best regards, Benoît
Applied all but 5/7 and pushed. Since we all have vacations etc etc, this will appear in the mainline around start of september.
Thanks a lot for this awesome patchset! _Great_ work guys!
Best regards, Marek Vasut

Dear Marek Vasut,
Dear Benoît Thébaudeau,
Hi all,
This series aims at improving EHCI performance. There is also some code cleanup BTW.
Best regards, Benoît
Applied all but 5/7 and pushed. Since we all have vacations etc etc, this will appear in the mainline around start of september.
Thanks. But will it make it for next release since September is after the official end of the current merge window?
I also have series for ehci-mxc and ehci-mx5 that I will try to post next week. They fix many things, add new features and add support for i.MX25 and i.MX35.
Thanks a lot for this awesome patchset! _Great_ work guys!
Thanks.
Best regards, Benoît

Dear Benoît Thébaudeau,
Dear Marek Vasut,
Dear Benoît Thébaudeau,
Hi all,
This series aims at improving EHCI performance. There is also some code cleanup BTW.
Best regards, Benoît
Applied all but 5/7 and pushed. Since we all have vacations etc etc, this will appear in the mainline around start of september.
Thanks. But will it make it for next release since September is after the official end of the current merge window?
If it's properly tested, I'll try to persuade WD to pull it in (and then the same round of horror as during the last release will repeat ;-) )
I also have series for ehci-mxc and ehci-mx5 that I will try to post next week. They fix many things, add new features and add support for i.MX25 and i.MX35.
Cool!
Thanks a lot for this awesome patchset! _Great_ work guys!
Thanks.
Best regards, Benoît
Best regards, Marek Vasut
participants (3)
-
Benoît Thébaudeau
-
Ilya Yanok
-
Marek Vasut