[U-Boot] [PATCHv2 1/4] Optimized nand_read_buf for kirkwood (V3)

21 Feb 2013

The basic idea is taken from the linux-kernel, but further optimized.
First align the buffer to 8 bytes, then use ldrd/strd to read and store
in 8 byte quantities, then do the final bytes.
Tested using: 'date ; nand read.raw 0xE00000 0x0 0x10000 ; date'.
Without this patch, NAND read of 132MB took 49s (~2.69MB/s). With this
patch in place, reading the same amount of data was done in 27s
(~4.89MB/s). So read performance is increased by ~80%!
Signed-off-by: Nico Erfurth ne@erfurth.eu
Tested-by: Phil Sutter phil.sutter@viprinet.com
Cc: Prafulla Wadaskar prafulla@marvell.com
---
 drivers/mtd/nand/kirkwood_nand.c |   32 ++++++++++++++++++++++++++++++++
 1 files changed, 32 insertions(+), 0 deletions(-)

diff --git a/drivers/mtd/nand/kirkwood_nand.c b/drivers/mtd/nand/kirkwood_nand.c
index bdab5aa..99e5f35 100644
--- a/drivers/mtd/nand/kirkwood_nand.c
+++ b/drivers/mtd/nand/kirkwood_nand.c
@@ -38,6 +38,37 @@ struct kwnandf_registers {
 static struct kwnandf_registers *nf_reg =
    (struct kwnandf_registers *)KW_NANDF_BASE;
+
+/*
+ * The basic idea is stolen from the linux kernel, but the inner loop is
+ * optimized a bit more.
+ */
+static void kw_nand_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
+{
+	struct nand_chip *chip = mtd->priv;
+
+	while (len && (unsigned long)buf & 7) {
+		*buf++ = readb(chip->IO_ADDR_R);
+		len--;
+	};
+
+	/* This loop reads and writes 64bit per round. */
+	asm volatile (
+		"1:\n"
+		"  subs   %0, #8\n"
+		"  ldrpld r2, [%2]\n"
+		"  strpld r2, [%1], #8\n"
+		"  bhi    1b\n"
+		"  addne  %0, #8\n"
+		: "+&r" (len), "+&r" (buf)
+		: "r" (chip->IO_ADDR_R)
+		: "r2", "r3", "memory", "cc"
+	);
+
+	while (len--)
+		*buf++ = readb(chip->IO_ADDR_R);
+}
+
 /*
  * hardware specific access to control-lines/bits
  */
@@ -76,6 +107,7 @@ int board_nand_init(struct nand_chip *nand)
    nand->options = NAND_COPYBACK | NAND_CACHEPRG | NAND_NO_PADDING;
    nand->ecc.mode = NAND_ECC_SOFT;
    nand->cmd_ctrl = kw_nand_hwcontrol;
+	nand->read_buf = kw_nand_read_buf;
    nand->chip_delay = 40;
    nand->select_chip = kw_nand_select_chip;
    return 0;
-- 
1.7.3.4