
Dear Marek Vasut,
On Fri, Jul 27, 2012 at 02:54:07 PM, Marek Vasut wrote:
This patch takes advantage of the hardware EHCI qTD queuing mechanism to avoid software overhead and to make transfers as fast as possible.
The only drawback is a call to memalign. However, this is fast compared to the transfer timings, and the heap size to allocate is small, e.g. a little bit more than 100 kB for a transfer length of 65535 packets of 512 bytes.
Tested on i.MX25 and i.MX35. In my test conditions, the speedup was about 15x using page-aligned buffers, which is really appreciable when accessing large files.
Signed-off-by: Benoît Thébaudeau benoit.thebaudeau@advansee.com Cc: Marek Vasut marex@denx.de Cc: Ilya Yanok ilya.yanok@cogentembedded.com Cc: Stefan Herbrechtsmeier stefan@herbrechtsmeier.net
.../drivers/usb/host/ehci-hcd.c | 94 ++++++++++++++------ 1 file changed, 65 insertions(+), 29 deletions(-)
diff --git u-boot-usb-1b4bd0e.orig/drivers/usb/host/ehci-hcd.c u-boot-usb-1b4bd0e/drivers/usb/host/ehci-hcd.c index 5b3b906..b5645fa 100644 --- u-boot-usb-1b4bd0e.orig/drivers/usb/host/ehci-hcd.c +++ u-boot-usb-1b4bd0e/drivers/usb/host/ehci-hcd.c @@ -208,7 +208,8 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, int length, struct devrequest *req) { ALLOC_ALIGN_BUFFER(struct QH, qh, 1, USB_DMA_MINALIGN);
- ALLOC_ALIGN_BUFFER(struct qTD, qtd, 3, USB_DMA_MINALIGN);
struct qTD *qtd;
int qtd_count = 0; int qtd_counter = 0;
volatile struct qTD *vtd;
@@ -229,8 +230,25 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, le16_to_cpu(req->value), le16_to_cpu(req->value), le16_to_cpu(req->index));
- if (req != NULL) /* SETUP + ACK */
qtd_count += 1 + 1;
- if (length > 0 || req == NULL) { /* buffer */
if ((uint32_t)buffer & 4095) /* page-unaligned */
qtd_count += (((uint32_t)buffer & 4095) + length +
(QT_BUFFER_CNT - 1) * 4096 - 1) /
((QT_BUFFER_CNT - 1) * 4096);
Ok, maybe you can please elaborate on this crazy calculation in here? Or somehow clarify it? Also, won't the macros in include/common.h help in a way? (like ROUND() etc).
I have already posted a v2 for this specific patch (only 2/5) that does exactly that. Please review only the latest versions of patches.
I don't really graps what you're trying to calculate here, so maybe even a comment would help.
I'll do that.
else /* page-aligned */
qtd_count += (length + QT_BUFFER_CNT * 4096 - 1) /
(QT_BUFFER_CNT * 4096);
Same here, also please avoid using those 4096 and such constants ... maybe #define them in ehci.h ?
Yes. It was already everywhere, so I went on the same way. Do you think using PAGE_SIZE from <linux/compat.h> would be fine since these 4096 are nothing more than page sizes?
- }
- qtd = memalign(USB_DMA_MINALIGN, qtd_count * sizeof(struct qTD));
So your code can alloc more than 5 qTDs ? How does it chain them then? Into more QHs ?
It's done in exactly the same way as for the original 3 qTDs, only with more qTDs, but still with 5 qt_buffers per qTD.
- if (qtd == NULL) {
printf("unable to allocate TDs\n");
return -1;
- }
- memset(qh, 0, sizeof(struct QH));
- memset(qtd, 0, 3 * sizeof(*qtd));
memset(qtd, 0, qtd_count * sizeof(*qtd));
toggle = usb_gettoggle(dev, usb_pipeendpoint(pipe), usb_pipeout(pipe));
@@ -291,31 +309,46 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, }
if (length > 0 || req == NULL) {
/*
* Setup request qTD (3.5 in ehci-r10.pdf)
*
* qt_next ................ 03-00 H
* qt_altnext ............. 07-04 H
* qt_token ............... 0B-08 H
*
* [ buffer, buffer_hi ] loaded with "buffer".
*/
qtd[qtd_counter].qt_next = cpu_to_hc32(QT_NEXT_TERMINATE);
qtd[qtd_counter].qt_altnext = cpu_to_hc32(QT_NEXT_TERMINATE);
token = (toggle << 31) |
(length << 16) |
((req == NULL ? 1 : 0) << 15) |
(0 << 12) |
(3 << 10) |
((usb_pipein(pipe) ? 1 : 0) << 8) | (0x80 << 0);
qtd[qtd_counter].qt_token = cpu_to_hc32(token);
if (ehci_td_buffer(&qtd[qtd_counter], buffer, length) != 0) {
printf("unable construct DATA td\n");
goto fail;
}
/* Update previous qTD! */
*tdp = cpu_to_hc32((uint32_t)&qtd[qtd_counter]);
tdp = &qtd[qtd_counter++].qt_next;
uint8_t *buf_ptr = buffer;
int left_length = length;
do {
int xfr_bytes = min(left_length,
(QT_BUFFER_CNT * 4096 -
((uint32_t)buf_ptr & 4095)) &
~4095);
Magic formula yet again ... comment would again be welcome please.
OK.
/*
* Setup request qTD (3.5 in ehci-r10.pdf)
*
* qt_next ................ 03-00 H
* qt_altnext ............. 07-04 H
* qt_token ............... 0B-08 H
*
* [ buffer, buffer_hi ] loaded with "buffer".
*/
qtd[qtd_counter].qt_next =
cpu_to_hc32(QT_NEXT_TERMINATE);
qtd[qtd_counter].qt_altnext =
cpu_to_hc32(QT_NEXT_TERMINATE);
token = (toggle << 31) |
(xfr_bytes << 16) |
((req == NULL ? 1 : 0) << 15) |
(0 << 12) |
(3 << 10) |
((usb_pipein(pipe) ? 1 : 0) << 8) | (0x80 << 0);
If you could fix all this magic afterwards (not in these patches), that'd be great.
Do you only mean #defining all those values?
qtd[qtd_counter].qt_token = cpu_to_hc32(token);
if (ehci_td_buffer(&qtd[qtd_counter], buf_ptr,
xfr_bytes) != 0) {
printf("unable construct DATA td\n");
goto fail;
}
/* Update previous qTD! */
*tdp = cpu_to_hc32((uint32_t)&qtd[qtd_counter]);
tdp = &qtd[qtd_counter++].qt_next;
buf_ptr += xfr_bytes;
left_length -= xfr_bytes;
} while (left_length > 0);
}
if (req != NULL) {
@@ -346,7 +379,8 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, flush_dcache_range((uint32_t)qh_list, ALIGN_END_ADDR(struct QH, qh_list, 1)); flush_dcache_range((uint32_t)qh, ALIGN_END_ADDR(struct QH, qh, 1));
- flush_dcache_range((uint32_t)qtd, ALIGN_END_ADDR(struct qTD, qtd,
3));
flush_dcache_range((uint32_t)qtd,
ALIGN_END_ADDR(struct qTD, qtd, qtd_count));
/* Set async. queue head pointer. */ ehci_writel(&hcor->or_asynclistaddr, (uint32_t)qh_list);
@@ -377,7 +411,7 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, invalidate_dcache_range((uint32_t)qh, ALIGN_END_ADDR(struct QH, qh, 1)); invalidate_dcache_range((uint32_t)qtd,
ALIGN_END_ADDR(struct qTD, qtd, 3));
ALIGN_END_ADDR(struct qTD, qtd, qtd_count));
token = hc32_to_cpu(vtd->qt_token); if (!(token & 0x80))
@@ -450,9 +484,11 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, ehci_readl(&hcor->or_portsc[1])); }
- free(qtd); return (dev->status != USB_ST_NOT_PROC) ? 0 : -1;
fail:
- free(qtd); return -1;
}
Best regards, Benoît Thébaudeau