[U-Boot] [PATCH] Improve DaVinci SPI speed

Reduce the number of reads per byte transferred on the BUF register from 2 to 1 and take advantage of the TX buffer in the SPI module.
Signed-off-by: Delio Brignoli dbrignoli@audioscience.com --- drivers/spi/davinci_spi.c | 67 +++++++++++++++++++++++--------------------- 1 files changed, 35 insertions(+), 32 deletions(-)
diff --git a/drivers/spi/davinci_spi.c b/drivers/spi/davinci_spi.c index 60ba007..b4a74de 100644 --- a/drivers/spi/davinci_spi.c +++ b/drivers/spi/davinci_spi.c @@ -131,6 +131,7 @@ int spi_xfer(struct spi_slave *slave, unsigned int bitlen, { struct davinci_spi_slave *ds = to_davinci_spi(slave); unsigned int len, data1_reg_val = readl(&ds->regs->dat1); + unsigned int i_cnt = 0, o_cnt = 0, buf_reg_val; int ret, i; const u8 *txp = dout; /* dout can be NULL for read operation */ u8 *rxp = din; /* din can be NULL for write operation */ @@ -159,41 +160,43 @@ int spi_xfer(struct spi_slave *slave, unsigned int bitlen, readl(&ds->regs->buf);
/* keep writing and reading 1 byte until done */ - for (i = 0; i < len; i++) { - /* wait till TXFULL is asserted */ - while (readl(&ds->regs->buf) & SPIBUF_TXFULL_MASK); - - /* write the data */ - data1_reg_val &= ~0xFFFF; - if (txp) { - data1_reg_val |= *txp; - txp++; + while((i_cnt < len) || (o_cnt < len)) { + /* read RX buffer and flags */ + buf_reg_val = readl(&ds->regs->buf); + + /* if data is available */ + if ((i_cnt < len) && (buf_reg_val & SPIBUF_RXEMPTY_MASK) == 0) { + /* if there is no read buffer simply ignore the read character */ + if(rxp) { + *rxp = buf_reg_val & 0xFF; + rxp++; + } + /* increment read words count */ + i_cnt++; }
- /* - * Write to DAT1 is required to keep the serial transfer going. - * We just terminate when we reach the end. - */ - if ((i == (len - 1)) && (flags & SPI_XFER_END)) { - /* clear CS hold */ - writel(data1_reg_val & - ~(1 << SPIDAT1_CSHOLD_SHIFT), &ds->regs->dat1); - } else { - /* enable CS hold */ - data1_reg_val |= ((1 << SPIDAT1_CSHOLD_SHIFT) | + /* if the tx buffer is empty and there is still data to transmit */ + if ((o_cnt < len) && ((buf_reg_val & SPIBUF_TXFULL_MASK) == 0)) { + /* write the data */ + data1_reg_val &= ~0xFFFF; + if(txp) { + data1_reg_val |= *txp; + txp++; + } + /* write to DAT1 is required to keep the serial transfer going */ + /* we just terminate when we reach the end */ + if((o_cnt == (len -1)) && (flags & SPI_XFER_END)) { + /* clear CS hold */ + writel(data1_reg_val & + ~(1 << SPIDAT1_CSHOLD_SHIFT), &ds->regs->dat1); + } else { + /* enable CS hold and write TX register */ + data1_reg_val |= ((1 << SPIDAT1_CSHOLD_SHIFT) | (slave->cs << SPIDAT1_CSNR_SHIFT)); - writel(data1_reg_val, &ds->regs->dat1); - } - - /* read the data - wait for data availability */ - while (readl(&ds->regs->buf) & SPIBUF_RXEMPTY_MASK); - - if (rxp) { - *rxp = readl(&ds->regs->buf) & 0xFF; - rxp++; - } else { - /* simply drop the read character */ - readl(&ds->regs->buf); + writel(data1_reg_val, &ds->regs->dat1); + } + /* increment written words count */ + o_cnt++; } } return 0;

Reduce the number of reads per byte transferred on the BUF register from 2 to 1 and take advantage of the TX buffer in the SPI module.
May I ask which chip device this was tested on.
Signed-off-by: Delio Brignoli dbrignoli@audioscience.com
drivers/spi/davinci_spi.c | 67 +++++++++++++++++++++++-----------------
1 files changed, 35 insertions(+), 32 deletions(-)
diff --git a/drivers/spi/davinci_spi.c b/drivers/spi/davinci_spi.c index 60ba007..b4a74de 100644 --- a/drivers/spi/davinci_spi.c +++ b/drivers/spi/davinci_spi.c @@ -131,6 +131,7 @@ int spi_xfer(struct spi_slave *slave, unsigned int bitlen, { struct davinci_spi_slave *ds = to_davinci_spi(slave); unsigned int len, data1_reg_val = readl(&ds->regs->dat1);
- unsigned int i_cnt = 0, o_cnt = 0, buf_reg_val; int ret, i; const u8 *txp = dout; /* dout can be NULL for read operation */ u8 *rxp = din; /* din can be NULL for write operation */
@@ -159,41 +160,43 @@ int spi_xfer(struct spi_slave *slave, unsigned int bitlen, readl(&ds->regs->buf);
/* keep writing and reading 1 byte until done */
- for (i = 0; i < len; i++) {
/* wait till TXFULL is asserted */
while (readl(&ds->regs->buf) & SPIBUF_TXFULL_MASK);
/* write the data */
data1_reg_val &= ~0xFFFF;
if (txp) {
data1_reg_val |= *txp;
txp++;
- while((i_cnt < len) || (o_cnt < len)) {
/* read RX buffer and flags */
buf_reg_val = readl(&ds->regs->buf);
/* if data is available */
if ((i_cnt < len) && (buf_reg_val & SPIBUF_RXEMPTY_MASK) == 0)
{
/* if there is no read buffer simply ignore the read
character */
if(rxp) {
*rxp = buf_reg_val & 0xFF;
rxp++;
}
/* increment read words count */
}i_cnt++;
/*
* Write to DAT1 is required to keep the serial transfer going.
* We just terminate when we reach the end.
*/
if ((i == (len - 1)) && (flags & SPI_XFER_END)) {
/* clear CS hold */
writel(data1_reg_val &
~(1 << SPIDAT1_CSHOLD_SHIFT), &ds->regs->dat1);
} else {
/* enable CS hold */
data1_reg_val |= ((1 << SPIDAT1_CSHOLD_SHIFT) |
/* if the tx buffer is empty and there is still data to
transmit */
if ((o_cnt < len) && ((buf_reg_val & SPIBUF_TXFULL_MASK) ==
0)) {
/* write the data */
data1_reg_val &= ~0xFFFF;
if(txp) {
data1_reg_val |= *txp;
txp++;
}
/* write to DAT1 is required to keep the serial transfer
going */
/* we just terminate when we reach the end */
if((o_cnt == (len -1)) && (flags & SPI_XFER_END)) {
/* clear CS hold */
writel(data1_reg_val &
~(1 << SPIDAT1_CSHOLD_SHIFT), &ds->regs-
dat1);
} else {
/* enable CS hold and write TX register */
data1_reg_val |= ((1 << SPIDAT1_CSHOLD_SHIFT) | (slave->cs << SPIDAT1_CSNR_SHIFT));
writel(data1_reg_val, &ds->regs->dat1);
}
/* read the data - wait for data availability */
while (readl(&ds->regs->buf) & SPIBUF_RXEMPTY_MASK);
if (rxp) {
*rxp = readl(&ds->regs->buf) & 0xFF;
rxp++;
} else {
/* simply drop the read character */
readl(&ds->regs->buf);
writel(data1_reg_val, &ds->regs->dat1);
}
/* increment written words count */
} } return 0;o_cnt++;
-- 1.6.3.3

Hello Paulraj,
On 13/05/2010, at 17:10, Paulraj, Sandeep wrote:
Reduce the number of reads per byte transferred on the BUF register from 2 to 1 and take advantage of the TX buffer in the SPI module.
May I ask which chip device this was tested on.
Sure, it was tested on a LogicPD Zoomâ„¢ OMAP-L138 eXperimenter Kit (da850_omapl138_evm_config).
-- Delio
Signed-off-by: Delio Brignoli dbrignoli@audioscience.com
drivers/spi/davinci_spi.c | 67 +++++++++++++++++++++++-----------------
1 files changed, 35 insertions(+), 32 deletions(-)
diff --git a/drivers/spi/davinci_spi.c b/drivers/spi/davinci_spi.c index 60ba007..b4a74de 100644 --- a/drivers/spi/davinci_spi.c +++ b/drivers/spi/davinci_spi.c @@ -131,6 +131,7 @@ int spi_xfer(struct spi_slave *slave, unsigned int bitlen, { struct davinci_spi_slave *ds = to_davinci_spi(slave); unsigned int len, data1_reg_val = readl(&ds->regs->dat1);
- unsigned int i_cnt = 0, o_cnt = 0, buf_reg_val; int ret, i; const u8 *txp = dout; /* dout can be NULL for read operation */ u8 *rxp = din; /* din can be NULL for write operation */
@@ -159,41 +160,43 @@ int spi_xfer(struct spi_slave *slave, unsigned int bitlen, readl(&ds->regs->buf);
/* keep writing and reading 1 byte until done */
- for (i = 0; i < len; i++) {
/* wait till TXFULL is asserted */
while (readl(&ds->regs->buf) & SPIBUF_TXFULL_MASK);
/* write the data */
data1_reg_val &= ~0xFFFF;
if (txp) {
data1_reg_val |= *txp;
txp++;
- while((i_cnt < len) || (o_cnt < len)) {
/* read RX buffer and flags */
buf_reg_val = readl(&ds->regs->buf);
/* if data is available */
if ((i_cnt < len) && (buf_reg_val & SPIBUF_RXEMPTY_MASK) == 0)
{
/* if there is no read buffer simply ignore the read
character */
if(rxp) {
*rxp = buf_reg_val & 0xFF;
rxp++;
}
/* increment read words count */
}i_cnt++;
/*
* Write to DAT1 is required to keep the serial transfer going.
* We just terminate when we reach the end.
*/
if ((i == (len - 1)) && (flags & SPI_XFER_END)) {
/* clear CS hold */
writel(data1_reg_val &
~(1 << SPIDAT1_CSHOLD_SHIFT), &ds->regs->dat1);
} else {
/* enable CS hold */
data1_reg_val |= ((1 << SPIDAT1_CSHOLD_SHIFT) |
/* if the tx buffer is empty and there is still data to
transmit */
if ((o_cnt < len) && ((buf_reg_val & SPIBUF_TXFULL_MASK) ==
0)) {
/* write the data */
data1_reg_val &= ~0xFFFF;
if(txp) {
data1_reg_val |= *txp;
txp++;
}
/* write to DAT1 is required to keep the serial transfer
going */
/* we just terminate when we reach the end */
if((o_cnt == (len -1)) && (flags & SPI_XFER_END)) {
/* clear CS hold */
writel(data1_reg_val &
~(1 << SPIDAT1_CSHOLD_SHIFT), &ds->regs-
dat1);
} else {
/* enable CS hold and write TX register */
data1_reg_val |= ((1 << SPIDAT1_CSHOLD_SHIFT) | (slave->cs << SPIDAT1_CSNR_SHIFT));
writel(data1_reg_val, &ds->regs->dat1);
}
/* read the data - wait for data availability */
while (readl(&ds->regs->buf) & SPIBUF_RXEMPTY_MASK);
if (rxp) {
*rxp = readl(&ds->regs->buf) & 0xFF;
rxp++;
} else {
/* simply drop the read character */
readl(&ds->regs->buf);
writel(data1_reg_val, &ds->regs->dat1);
}
/* increment written words count */
} } return 0;o_cnt++;
-- 1.6.3.3

Hello Paulraj,
On 13/05/2010, at 17:10, Paulraj, Sandeep wrote:
Reduce the number of reads per byte transferred on the BUF register
from 2
to 1 and take advantage of the TX buffer in the SPI module.
May I ask which chip device this was tested on.
Sure, it was tested on a LogicPD Zoom(tm) OMAP-L138 eXperimenter Kit (da850_omapl138_evm_config).
--
If nobody has objections then I will apply it this weekend to u-boot-ti
The reason for asking which board/SOC was because I don't have that EVM.

Hi Sandeep,
On Thu, May 13, 2010 at 23:01:31, Paulraj, Sandeep wrote:
Hello Paulraj,
On 13/05/2010, at 17:10, Paulraj, Sandeep wrote:
Reduce the number of reads per byte transferred on the BUF register
from 2
to 1 and take advantage of the TX buffer in the SPI module.
May I ask which chip device this was tested on.
Sure, it was tested on a LogicPD Zoom(tm) OMAP-L138 eXperimenter Kit (da850_omapl138_evm_config).
--
If nobody has objections then I will apply it this weekend to u-boot-ti
I am yet to review this (came in only yesterday evening). Can you please wait till early next week before applying it.
Thanks, Sekhar

Hi Delio,
On Thu, May 13, 2010 at 18:27:51, Delio Brignoli wrote:
Reduce the number of reads per byte transferred on the BUF register from 2 to 1 and take advantage of the TX buffer in the SPI module.
The patch looks good to me.
Can you please publish some sort of numbers in the patch description indicating the performance improvement achieved?
A minor nit below:
Signed-off-by: Delio Brignoli dbrignoli@audioscience.com
drivers/spi/davinci_spi.c | 67 +++++++++++++++++++++++--------------------- 1 files changed, 35 insertions(+), 32 deletions(-)
diff --git a/drivers/spi/davinci_spi.c b/drivers/spi/davinci_spi.c index 60ba007..b4a74de 100644 --- a/drivers/spi/davinci_spi.c +++ b/drivers/spi/davinci_spi.c @@ -131,6 +131,7 @@ int spi_xfer(struct spi_slave *slave, unsigned int bitlen, {
[...]
/* write to DAT1 is required to keep the serial transfer going */
/* we just terminate when we reach the end */
if((o_cnt == (len -1)) && (flags & SPI_XFER_END)) {
Please include a space before the '-'
Thanks, Sekhar

Hello Sekhar,
On 18/05/2010, at 14:25, Nori, Sekhar wrote:
The patch looks good to me.
Can you please publish some sort of numbers in the patch description indicating the performance improvement achieved?
will do.
A minor nit below:
[...]
if((o_cnt == (len -1)) && (flags & SPI_XFER_END)) {
Please include a space before the '-'
Thanks -- Delio
Thanks, Sekhar

Hello Sekhar,
I resubmitted the patch a few days ago (13th of May) with the changes you requested. Please ack it, so it can be applied.
Thanks -- Delio
On 18/05/2010, at 14:25, Nori, Sekhar wrote:
On Thu, May 13, 2010 at 18:27:51, Delio Brignoli wrote:
Reduce the number of reads per byte transferred on the BUF register from 2 to 1 and take advantage of the TX buffer in the SPI module.
The patch looks good to me.
Can you please publish some sort of numbers in the patch description indicating the performance improvement achieved?
A minor nit below:
Signed-off-by: Delio Brignoli dbrignoli@audioscience.com
drivers/spi/davinci_spi.c | 67 +++++++++++++++++++++++--------------------- 1 files changed, 35 insertions(+), 32 deletions(-)
diff --git a/drivers/spi/davinci_spi.c b/drivers/spi/davinci_spi.c index 60ba007..b4a74de 100644 --- a/drivers/spi/davinci_spi.c +++ b/drivers/spi/davinci_spi.c @@ -131,6 +131,7 @@ int spi_xfer(struct spi_slave *slave, unsigned int bitlen, {
[...]
/* write to DAT1 is required to keep the serial transfer going */
/* we just terminate when we reach the end */
if((o_cnt == (len -1)) && (flags & SPI_XFER_END)) {
Please include a space before the '-'
Thanks, Sekhar

Dear Delio Brignoli,
In message 25096244-C685-4D68-BBF9-6DF2F154AFB8@audioscience.com you wrote:
Reduce the number of reads per byte transferred on the BUF register from 2 to 1 and take advantage of the TX buffer in the SPI module.
...
/* if data is available */
if ((i_cnt < len) && (buf_reg_val & SPIBUF_RXEMPTY_MASK) == 0) {
/* if there is no read buffer simply ignore the read character */
if(rxp) {
*rxp = buf_reg_val & 0xFF;
rxp++;
}
Please change into:
if (rxp) *rxp++ = buf_reg_val & 0xFF;
if ((o_cnt < len) && ((buf_reg_val & SPIBUF_TXFULL_MASK) == 0)) {
/* write the data */
data1_reg_val &= ~0xFFFF;
if(txp) {
data1_reg_val |= *txp;
txp++;
}
Same here:
if (txp) data1_reg_val |= *txp++; Please fix all other occurrences of "if(" into "if (" as well.
/* write to DAT1 is required to keep the serial transfer going */
/* we just terminate when we reach the end */
Incorrect multiline comment style.
writel(data1_reg_val &
~(1 << SPIDAT1_CSHOLD_SHIFT), &ds->regs->dat1);
Line too long.
Best regards,
Wolfgang Denk

Hello Wolfgang,
On 21/05/2010, at 15:13, Wolfgang Denk wrote:
*rxp = buf_reg_val & 0xFF;
rxp++;
}
Please change into:
if (rxp) *rxp++ = buf_reg_val & 0xFF;
Are you sure? Is folding that 3 line block into a one liner really worth the loss of readability? I know what that means, so do you and many others on this ML, but that's bound to raise a few eyebrows. A quick google search reveals quite a few articles devoted to clearing up the confusion about the semantics of that pointer dereference and post-increment. Just a few years ago I would have preferred your suggestion, but now I find that readability trumps cleverness, at least in a case like this. In the end it's your decision: if you like to be kinky, it's your prerogative ;-)
if ((o_cnt < len) && ((buf_reg_val & SPIBUF_TXFULL_MASK) == 0)) {
/* write the data */
data1_reg_val &= ~0xFFFF;
if(txp) {
data1_reg_val |= *txp;
txp++;
}
Same here:
if (txp) data1_reg_val |= *txp++;
See my comment above.
Please fix all other occurrences of "if(" into "if (" as well.
OK
/* write to DAT1 is required to keep the serial transfer going */
/* we just terminate when we reach the end */
will fix
writel(data1_reg_val &
~(1 << SPIDAT1_CSHOLD_SHIFT), &ds->regs->dat1);
Line too long.
ditto
Thanks for the feedback -- Delio
Best regards,
Wolfgang Denk
-- DENX Software Engineering GmbH, MD: Wolfgang Denk & Detlev Zundel HRB 165235 Munich, Office: Kirchenstr.5, D-82194 Groebenzell, Germany Phone: (+49)-8142-66989-10 Fax: (+49)-8142-66989-80 Email: wd@denx.de The software required `Windows 95 or better', so I installed Linux.

Dear Delio Brignoli,
In message 15866EAE-624B-4850-B10F-78AB99E3A1A2@audioscience.com you wrote:
On 21/05/2010, at 15:13, Wolfgang Denk wrote:
*rxp = buf_reg_val & 0xFF;
rxp++;
}
Please change into:
if (rxp) *rxp++ = buf_reg_val & 0xFF;
Are you sure? Is folding that 3 line block into a one liner really worth the loss of readability? I know what that means, so do you and many others on this ML, but that's bound to raise a few eyebrows. A quick
Removing the need for braces is a good thing.
This is plain C, without any tricks or loopholes.
in a case like this. In the end it's your decision: if you like to be kinky, it's your prerogative ;-)
Kinky? Funny how you see that.
Best regards,
Wolfgang Denk

Hello Wolfgang,
On 24/05/2010, at 22:48, Wolfgang Denk wrote:
In message 15866EAE-624B-4850-B10F-78AB99E3A1A2@audioscience.com you wrote:
On 21/05/2010, at 15:13, Wolfgang Denk wrote:
*rxp = buf_reg_val & 0xFF;
rxp++;
}
Please change into:
if (rxp) *rxp++ = buf_reg_val & 0xFF;
Are you sure? Is folding that 3 line block into a one liner really worth the loss of readability? I know what that means, so do you and many others on this ML, but that's bound to raise a few eyebrows. A quick
Removing the need for braces is a good thing.
by making the code less intelligible, which is a bad thing.
This is plain C, without any tricks or loopholes.
This is C, the hard-to-read kind, but you refuse to acknowledge that by stating only your side of the argument, while I pointed out the tradeoff involved and my opinion. I have no inclination to argue in this fashion :-)
Kind Regards -- Delio
Best regards,
Wolfgang Denk
-- DENX Software Engineering GmbH, MD: Wolfgang Denk & Detlev Zundel HRB 165235 Munich, Office: Kirchenstr.5, D-82194 Groebenzell, Germany Phone: (+49)-8142-66989-10 Fax: (+49)-8142-66989-80 Email: wd@denx.de Build a system that even a fool can use and only a fool will want to use it.

Hi Delio,
On Sun, May 23, 2010 at 17:11:56, Delio Brignoli wrote:
Hello Wolfgang,
On 21/05/2010, at 15:13, Wolfgang Denk wrote:
*rxp = buf_reg_val & 0xFF;
rxp++;
}
Please change into:
if (rxp) *rxp++ = buf_reg_val & 0xFF;
Are you sure? Is folding that 3 line block into a one liner really worth the loss of readability? I know what that means, so do you and many others on this ML, but that's bound to raise a few eyebrows. A quick google search reveals quite a few articles devoted to clearing up the confusion about the semantics of that pointer dereference and post-increment. Just a few years ago I would have preferred your suggestion, but now I find that readability trumps cleverness, at least in a case like this. In the end it's your decision: if you like to be kinky, it's your prerogative ;-)
Looking for the pointer de-reference and increment expression in Linux kernel, I had ~4500 hits and ~600 hits in U-Boot. So, may be this is not such an uncommon expression after all (at least in the kernel/U-Boot world).
Considering this, can you please accept the change Wolfgang is asking for for so this useful patch can move forward?
After all, code readability is also about pattern matching in the mind and if developers are already used to seeing this expression not many would find it odd.
Thanks, Sekhar

Hello Sekhar,
On 01/06/2010, at 12:18, Nori, Sekhar wrote: [...]
Considering this, can you please accept the change Wolfgang is asking for for so this useful patch can move forward?
Yes, I am fine applying the changes Wolfgang requested. I have been busy working on other stuff, hence the delay. Will be posting the updated patch shortly.
Thanks -- Delio

On Thu, May 13, 2010 at 8:57 AM, Delio Brignoli dbrignoli@audioscience.com wrote:
Reduce the number of reads per byte transferred on the BUF register from 2 to 1 and take advantage of the TX buffer in the SPI module. On LogicPD OMAP-L138 EVM, SPI read throughput goes up from ~0.8Mbyte/s to ~1.3Mbyte/s. Tested with a 2Mbyte image file.
Signed-off-by: Delio Brignoli dbrignoli@audioscience.com
Applies cleanly on u-boot master branch -- 2f05e39 of git://git.denx.de/u-boot.git
Applies cleanly and compiles with da850evm_config on u-boot-omap-l1 master branch -- 5f16b85 of 2f05e39 . Also running the resulting u-boot.bin did show a noticeable improvement in kernel load time.
Tested-by: Ben Gardiner bengardiner@nanometrics.ca

On Fri, May 21, 2010 at 9:15 AM, Ben Gardiner bengardiner@nanometrics.ca wrote:
Applies cleanly and compiles with da850evm_config on u-boot-omap-l1 master branch -- 5f16b85 of 2f05e39 . Also running the resulting
Sorry, that should be 5f16b85 of git://arago-project.org/git/people/sekhar/u-boot-omapl1.git .
participants (5)
-
Ben Gardiner
-
Delio Brignoli
-
Nori, Sekhar
-
Paulraj, Sandeep
-
Wolfgang Denk