
Dear Ilya Yanok,
From: Tom Rini trini@ti.com
The USB spec says that 32 bytes is the minimum required alignment. However on some platforms we have a larger minimum requirement for cache coherency. In those cases, use that value rather than the USB spec minimum. We add a cpp check to <usb.h> to define USB_DMA_MINALIGN and make use of it in ehci-hcd.c and musb_core.h. We cannot use MAX() here as we are not allowed to have tests inside of align(...).
Cc: Marek Vasut marex@denx.de Signed-off-by: Tom Rini trini@ti.com [ilya.yanok]: fix size alignment, drop (incorrect) rounding when invalidating the buffer. If we got unaligned buffer from the upper layer -- that's definetely a bug so it's good to buzz about it. But we have to align the buffer length -- upper layers should take care to reserve enough space. Signed-off-by: Ilya Yanok ilya.yanok@cogentembedded.com
Changes from Tom's V4:
- Internal buffers should be not only address but also size aligned
- Don't try to fix unalignment of external buffer
- Fix also debug() checks in ehci_td_buffer() (though size check is meaningless: in the current API only size of transfer is passed which is not always the same as size of underlying buffer and can be unaligned.
No bounce-buffering is implemented so unaligned buffers coming from the upper layers will still result in invalidate_dcache_range() vows. But I tested it with unaligned fatload and got strange result: no errors from invalidate_dcache_range, I got "EHCI timed out on TD" errors instead (the same situtation without this patch and cache disabled). Looks like unaligned buffers are problem for EHCI even without cache involved... Aligned fatload works like a charm.
drivers/usb/host/ehci-hcd.c | 89 +++++++++++++++++++++++++----------------- drivers/usb/musb/musb_core.h | 2 +- include/usb.h | 10 +++++ 3 files changed, 65 insertions(+), 36 deletions(-)
diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c index 04300be..74a5c76 100644 --- a/drivers/usb/host/ehci-hcd.c +++ b/drivers/usb/host/ehci-hcd.c @@ -34,7 +34,9 @@ struct ehci_hccr *hccr; /* R/O registers, not need for volatile */ volatile struct ehci_hcor *hcor;
static uint16_t portreset; -static struct QH qh_list __attribute__((aligned(32))); +static char __qh_list[ALIGN(sizeof(struct QH), USB_DMA_MINALIGN)]
__attribute__((aligned(USB_DMA_MINALIGN)));
+static struct QH *qh_list = (struct QH *)__qh_list;
Maybe we should create DEFINE_ALIGNED_VARIABLE as a common.h macro?
static struct descriptor { struct usb_hub_descriptor hub; @@ -172,13 +174,13 @@ static int ehci_td_buffer(struct qTD *td, void *buf, size_t sz) { uint32_t delta, next; uint32_t addr = (uint32_t)buf;
- size_t rsz = roundup(sz, 32);
size_t rsz = roundup(sz, USB_DMA_MINALIGN); int idx;
if (sz != rsz) debug("EHCI-HCD: Misaligned buffer size (%08x)\n", sz);
- if (addr & 31)
- if (addr != ALIGN(addr, USB_DMA_MINALIGN)) debug("EHCI-HCD: Misaligned buffer address (%p)\n", buf);
Good :)
idx = 0; @@ -207,8 +209,12 @@ static int ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, int length, struct devrequest *req) {
- static struct QH qh __attribute__((aligned(32)));
- static struct qTD qtd[3] __attribute__((aligned (32)));
static char *__qh[ALIGN(sizeof(struct QH), USB_DMA_MINALIGN)]
__attribute__((aligned(USB_DMA_MINALIGN)));
struct QH *qh = (struct QH *)__qh;
static char *__qtd[ALIGN(3*sizeof(struct qTD), USB_DMA_MINALIGN)]
__attribute__((aligned(USB_DMA_MINALIGN)));
struct qTD *qtd = (struct qTD *)__qtd; int qtd_counter = 0;
volatile struct qTD *vtd;
@@ -229,8 +235,8 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, le16_to_cpu(req->value), le16_to_cpu(req->value), le16_to_cpu(req->index));
- memset(&qh, 0, sizeof(struct QH));
- memset(qtd, 0, sizeof(qtd));
memset(qh, 0, sizeof(struct QH));
memset(qtd, 0, 3 * sizeof(*qtd));
toggle = usb_gettoggle(dev, usb_pipeendpoint(pipe), usb_pipeout(pipe));
@@ -244,7 +250,7 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, * qh_overlay.qt_next ...... 13-10 H * - qh_overlay.qt_altnext */
- qh.qh_link = cpu_to_hc32((uint32_t)&qh_list | QH_LINK_TYPE_QH);
- qh->qh_link = cpu_to_hc32((uint32_t)qh_list | QH_LINK_TYPE_QH); c = (usb_pipespeed(pipe) != USB_SPEED_HIGH && usb_pipeendpoint(pipe) == 0) ? 1 : 0; endpt = (8 << 28) |
@@ -255,14 +261,14 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, (usb_pipespeed(pipe) << 12) | (usb_pipeendpoint(pipe) << 8) | (0 << 7) | (usb_pipedevice(pipe) << 0);
- qh.qh_endpt1 = cpu_to_hc32(endpt);
- qh->qh_endpt1 = cpu_to_hc32(endpt); endpt = (1 << 30) | (dev->portnr << 23) | (dev->parent->devnum << 16) | (0 << 8) | (0 << 0);
- qh.qh_endpt2 = cpu_to_hc32(endpt);
- qh.qh_overlay.qt_next = cpu_to_hc32(QT_NEXT_TERMINATE);
- qh->qh_endpt2 = cpu_to_hc32(endpt);
- qh->qh_overlay.qt_next = cpu_to_hc32(QT_NEXT_TERMINATE);
- tdp = &qh.qh_overlay.qt_next;
tdp = &qh->qh_overlay.qt_next;
if (req != NULL) { /*
@@ -340,13 +346,15 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, tdp = &qtd[qtd_counter++].qt_next; }
- qh_list.qh_link = cpu_to_hc32((uint32_t)&qh | QH_LINK_TYPE_QH);
qh_list->qh_link = cpu_to_hc32((uint32_t)qh | QH_LINK_TYPE_QH);
/* Flush dcache */
- flush_dcache_range((uint32_t)&qh_list,
(uint32_t)&qh_list + sizeof(struct QH));
- flush_dcache_range((uint32_t)&qh, (uint32_t)&qh + sizeof(struct QH));
- flush_dcache_range((uint32_t)qtd, (uint32_t)qtd + sizeof(qtd));
- flush_dcache_range((uint32_t)qh_list,
(uint32_t)qh_list + ALIGN(sizeof(struct QH), USB_DMA_MINALIGN));
- flush_dcache_range((uint32_t)qh, (uint32_t)qh +
ALIGN(sizeof(struct QH), USB_DMA_MINALIGN));
- flush_dcache_range((uint32_t)qtd, (uint32_t)qtd +
ALIGN(3*sizeof(*qtd), USB_DMA_MINALIGN));
Maybe we should make a macro for those things here to prevent such spaghetti of code ?
[...]
Minor things really, otherwise it's really good :)