[U-Boot] EXT4 slow boot problem

Hi,
On Wed Oct 8 10:11:58 CEST 2014, Blair Pendleton sent following mail to the list: We have discovered a significant performance problem in U-Boot accessing an EXT4 file system. If a large image file happens to be created with more than 4 extents, file read time increases from a few seconds to several minutes. The root cause is the indexed extent block is being re-read tens of thousands of times from the underlying device.
In our case, the image file is ~44MB and the underlying device is a MMC SD card. Software updates are performed in Linux, and depending on a particular instance of EXT4 block allocations, the image file is created with 3, 4 or 5 extents. With 3 or 4 extents (tree depth 0), file reads are fast because the extent records are located in the cached i-node and the entire file is read with 3 or 4 large MMC block reads. With 5 extents (tree depth 1), extent records are in a separate block referenced by an extent index node. The current U-Boot implementation makes no attempt to cache this block and is forced to re-read it for every block of file data (EXT4 block size 1KB). This results in excess of 43000 MMC read operations and boot times so long that the system appears to be hung.
Blair Pendleton Fluke Networks R&D Colorado Springs, CO, USA
I encountered the same problem on my board. My Linux Kernel is in an ext4 partition on eMMC. I usually read it in less than 1s. Trace example: 10870816 bytes read in 622 ms (16.7 MiB/s) When I have the problem (kernel file in more than 4 extents), I read it in 30s to 35s. Trace example: 10876728 bytes read in 34035 ms (311.5 KiB/s)
I don't use an up-to-date U-Boot but I think this is not corrected in latest U-Boot. I made a patch that does not completely solve the problem but should fix performance problem in my case.
It is not good enough for mainline but might help others if they encounter the same problem.
Problems I see in this patch: - there are still a useless zalloc/free in read_allocated_block (called for each block) - I don't know how many blocks should be cached. In my case I need only one block to be cached. - I have not tested this patch with more than one extent index block.
Best Regards,
Christophe
Christophe Ronco (1): ext4: cache first extent index blocks (#06343)
fs/ext4/ext4_common.c | 93 ++++++++++++++++++++++++++++++++++++++++++++++++--- fs/ext4/ext4_common.h | 16 +++++++++ fs/ext4/ext4fs.c | 16 +++++++-- 3 files changed, 117 insertions(+), 8 deletions(-)

Extent index blocks are read many times when reading completely a big file. This is time costly if underlying driver is slow. Caching the first index blocks cost a bit of RAM but speed up file reading a lot. --- fs/ext4/ext4_common.c | 93 ++++++++++++++++++++++++++++++++++++++++++++++++--- fs/ext4/ext4_common.h | 16 +++++++++ fs/ext4/ext4fs.c | 16 +++++++-- 3 files changed, 117 insertions(+), 8 deletions(-)
diff --git a/fs/ext4/ext4_common.c b/fs/ext4/ext4_common.c index cab5465..68ab2e9 100644 --- a/fs/ext4/ext4_common.c +++ b/fs/ext4/ext4_common.c @@ -44,6 +44,7 @@ int ext4fs_indir3_size; int ext4fs_indir3_blkno = -1; struct ext2_inode *g_parent_inode; static int symlinknest; +struct ext4_extent_blocs ext4fs_extents_blocs;
#if defined(CONFIG_EXT4_WRITE) uint32_t ext4fs_div_roundup(uint32_t size, uint32_t n) @@ -1413,6 +1414,7 @@ static struct ext4_extent_header *ext4fs_get_extent_block unsigned long long block; int blksz = EXT2_BLOCK_SIZE(data); int i; + int found;
while (1) { index = (struct ext4_extent_idx *)(ext_block + 1); @@ -1435,11 +1437,24 @@ static struct ext4_extent_header *ext4fs_get_extent_block block = le16_to_cpu(index[i].ei_leaf_hi); block = (block << 32) + le32_to_cpu(index[i].ei_leaf_lo);
- if (ext4fs_devread((lbaint_t)block << log2_blksz, 0, blksz, - buf)) - ext_block = (struct ext4_extent_header *)buf; - else - return 0; + /* look in saved extent blocks */ + found = 0; + for (i = 0; i < ext4fs_extents_blocs.bloc_nb; i++) { + if (ext4fs_extents_blocs.ext_blocs[i].block == block) { + memcpy(buf, ext4fs_extents_blocs.ext_blocs[i].ext_header, + blksz); + ext_block = (struct ext4_extent_header *)buf; + found = 1; + } + } + + if (found == 0) { + if (ext4fs_devread((lbaint_t)block << log2_blksz, 0, + blksz, buf)) + ext_block = (struct ext4_extent_header *)buf; + else + return 0; + } } }
@@ -1465,6 +1480,74 @@ static int ext4fs_blockgroup (char *)blkgrp); }
+void ext4fs_init_extents_blocks_global(void) +{ + memset(&ext4fs_extents_blocs, 0, sizeof(struct ext4_extent_blocs)); +} + +int ext4fs_read_extents_blocks(struct ext2_inode *inode) +{ + int blksz = EXT2_BLOCK_SIZE(ext4fs_root); + int log2_blksz; + struct ext4_extent_header *ext_block; + struct ext4_extent_idx *index; + unsigned long long block; + int entries; + char *buf; + int i; + + ext4fs_extents_blocs.bloc_nb = 0; + log2_blksz = LOG2_BLOCK_SIZE(ext4fs_root) + - get_fs()->dev_desc->log2blksz; + + if ((le32_to_cpu(inode->flags) & EXT4_EXTENTS_FL) == 0) + return 0; + + ext_block = (struct ext4_extent_header *)inode->b.blocks.dir_blocks; + + if (le16_to_cpu(ext_block->eh_magic) != EXT4_EXT_MAGIC) + return 0; + if (ext_block->eh_depth == 0) + return 0; + + entries = le16_to_cpu(ext_block->eh_entries); + index = (struct ext4_extent_idx *)(ext_block + 1); + + for (i = 0; (i < entries) && (i < MAX_EXTENT_BLOCS); i++) { + /* bloc number */ + block = le16_to_cpu(index[i].ei_leaf_hi); + block = (block << 32) + le32_to_cpu(index[i].ei_leaf_lo); + /* bloc data */ + buf = zalloc(blksz); + if (!buf) + break; + if (ext4fs_devread((lbaint_t)block << log2_blksz, 0, blksz, + buf) == 0) { + free(buf); + break; + } + ext4fs_extents_blocs.ext_blocs[i].block = block; + ext4fs_extents_blocs.ext_blocs[i].ext_header = + (struct ext4_extent_header *)buf; + } + ext4fs_extents_blocs.bloc_nb = i; + return i; +} + +void ext4fs_free_extents_blocks(void) +{ + int i; + + for (i = 0; i < ext4fs_extents_blocs.bloc_nb; i++) { + if (ext4fs_extents_blocs.ext_blocs[i].ext_header) { + free(ext4fs_extents_blocs.ext_blocs[i].ext_header); + ext4fs_extents_blocs.ext_blocs[i].ext_header = NULL; + ext4fs_extents_blocs.ext_blocs[i].block = 0; + } + } + ext4fs_extents_blocs.bloc_nb = 0; +} + int ext4fs_read_inode(struct ext2_data *data, int ino, struct ext2_inode *inode) { struct ext2_block_group blkgrp; diff --git a/fs/ext4/ext4_common.h b/fs/ext4/ext4_common.h index 48fd2ac..0f80856 100644 --- a/fs/ext4/ext4_common.h +++ b/fs/ext4/ext4_common.h @@ -41,6 +41,18 @@ #define SUPERBLOCK_SIZE 1024 #define F_FILE 1
+#define MAX_EXTENT_BLOCS 10 + +struct ext4_extent_bloc { + unsigned long long block; + struct ext4_extent_header *ext_header; +}; + +struct ext4_extent_blocs { + int bloc_nb; + struct ext4_extent_bloc ext_blocs[MAX_EXTENT_BLOCS]; +}; + static inline void *zalloc(size_t size) { void *p = memalign(ARCH_DMA_MINALIGN, size); @@ -57,6 +69,10 @@ int ext4fs_find_file(const char *path, struct ext2fs_node *rootnode, int ext4fs_iterate_dir(struct ext2fs_node *dir, char *name, struct ext2fs_node **fnode, int *ftype);
+void ext4fs_init_extents_blocks_global(void); +int ext4fs_read_extents_blocks(struct ext2_inode *inode); +void ext4fs_free_extents_blocks(void); + #if defined(CONFIG_EXT4_WRITE) uint32_t ext4fs_div_roundup(uint32_t size, uint32_t n); int ext4fs_checksum_update(unsigned int i); diff --git a/fs/ext4/ext4fs.c b/fs/ext4/ext4fs.c index 258b937..88db1fb 100644 --- a/fs/ext4/ext4fs.c +++ b/fs/ext4/ext4fs.c @@ -70,6 +70,12 @@ int ext4fs_read_file(struct ext2fs_node *node, loff_t pos,
blockcnt = lldiv(((len + pos) + blocksize - 1), blocksize);
+ ext4fs_init_extents_blocks_global(); + if (blockcnt > 100) { + /* Big reading: read extent blocks tree if any */ + ext4fs_read_extents_blocks(&(node->inode)); + } + for (i = lldiv(pos, blocksize); i < blockcnt; i++) { lbaint_t blknr; int blockoff = pos - (blocksize * i); @@ -77,7 +83,7 @@ int ext4fs_read_file(struct ext2fs_node *node, loff_t pos, int skipfirst = 0; blknr = read_allocated_block(&(node->inode), i); if (blknr < 0) - return -1; + goto error;
blknr = blknr << log2_fs_blocksize;
@@ -134,7 +140,7 @@ int ext4fs_read_file(struct ext2fs_node *node, loff_t pos, delayed_extent, delayed_buf); if (status == 0) - return -1; + goto error; previous_block_number = -1; } memset(buf, 0, blocksize - skipfirst); @@ -147,12 +153,16 @@ int ext4fs_read_file(struct ext2fs_node *node, loff_t pos, delayed_skipfirst, delayed_extent, delayed_buf); if (status == 0) - return -1; + goto error; previous_block_number = -1; }
*actread = len; + ext4fs_free_extents_blocks(); return 0; +error: + ext4fs_free_extents_blocks(); + return -1; }
int ext4fs_ls(const char *dirname)

On Fri, Oct 27, 2017 at 04:18:11PM +0200, Christophe Ronco wrote:
Extent index blocks are read many times when reading completely a big file. This is time costly if underlying driver is slow. Caching the first index blocks cost a bit of RAM but speed up file reading a lot.
fs/ext4/ext4_common.c | 93 ++++++++++++++++++++++++++++++++++++++++++++++++--- fs/ext4/ext4_common.h | 16 +++++++++ fs/ext4/ext4fs.c | 16 +++++++-- 3 files changed, 117 insertions(+), 8 deletions(-)
diff --git a/fs/ext4/ext4_common.c b/fs/ext4/ext4_common.c index cab5465..68ab2e9 100644 --- a/fs/ext4/ext4_common.c +++ b/fs/ext4/ext4_common.c @@ -44,6 +44,7 @@ int ext4fs_indir3_size; int ext4fs_indir3_blkno = -1; struct ext2_inode *g_parent_inode; static int symlinknest; +struct ext4_extent_blocs ext4fs_extents_blocs;
#if defined(CONFIG_EXT4_WRITE) uint32_t ext4fs_div_roundup(uint32_t size, uint32_t n) @@ -1413,6 +1414,7 @@ static struct ext4_extent_header *ext4fs_get_extent_block unsigned long long block; int blksz = EXT2_BLOCK_SIZE(data); int i;
int found;
while (1) { index = (struct ext4_extent_idx *)(ext_block + 1);
@@ -1435,11 +1437,24 @@ static struct ext4_extent_header *ext4fs_get_extent_block block = le16_to_cpu(index[i].ei_leaf_hi); block = (block << 32) + le32_to_cpu(index[i].ei_leaf_lo);
if (ext4fs_devread((lbaint_t)block << log2_blksz, 0, blksz,
buf))
ext_block = (struct ext4_extent_header *)buf;
else
return 0;
/* look in saved extent blocks */
found = 0;
for (i = 0; i < ext4fs_extents_blocs.bloc_nb; i++) {
if (ext4fs_extents_blocs.ext_blocs[i].block == block) {
memcpy(buf, ext4fs_extents_blocs.ext_blocs[i].ext_header,
blksz);
ext_block = (struct ext4_extent_header *)buf;
found = 1;
}
}
if (found == 0) {
if (ext4fs_devread((lbaint_t)block << log2_blksz, 0,
blksz, buf))
ext_block = (struct ext4_extent_header *)buf;
else
return 0;
}}
}
@@ -1465,6 +1480,74 @@ static int ext4fs_blockgroup (char *)blkgrp); }
+void ext4fs_init_extents_blocks_global(void) +{
- memset(&ext4fs_extents_blocs, 0, sizeof(struct ext4_extent_blocs));
+}
+int ext4fs_read_extents_blocks(struct ext2_inode *inode) +{
- int blksz = EXT2_BLOCK_SIZE(ext4fs_root);
- int log2_blksz;
- struct ext4_extent_header *ext_block;
- struct ext4_extent_idx *index;
- unsigned long long block;
- int entries;
- char *buf;
- int i;
- ext4fs_extents_blocs.bloc_nb = 0;
- log2_blksz = LOG2_BLOCK_SIZE(ext4fs_root)
- get_fs()->dev_desc->log2blksz;
- if ((le32_to_cpu(inode->flags) & EXT4_EXTENTS_FL) == 0)
return 0;
- ext_block = (struct ext4_extent_header *)inode->b.blocks.dir_blocks;
- if (le16_to_cpu(ext_block->eh_magic) != EXT4_EXT_MAGIC)
return 0;
- if (ext_block->eh_depth == 0)
return 0;
- entries = le16_to_cpu(ext_block->eh_entries);
- index = (struct ext4_extent_idx *)(ext_block + 1);
- for (i = 0; (i < entries) && (i < MAX_EXTENT_BLOCS); i++) {
/* bloc number */
block = le16_to_cpu(index[i].ei_leaf_hi);
block = (block << 32) + le32_to_cpu(index[i].ei_leaf_lo);
/* bloc data */
buf = zalloc(blksz);
if (!buf)
break;
if (ext4fs_devread((lbaint_t)block << log2_blksz, 0, blksz,
buf) == 0) {
free(buf);
break;
}
ext4fs_extents_blocs.ext_blocs[i].block = block;
ext4fs_extents_blocs.ext_blocs[i].ext_header =
(struct ext4_extent_header *)buf;
- }
- ext4fs_extents_blocs.bloc_nb = i;
- return i;
+}
+void ext4fs_free_extents_blocks(void) +{
- int i;
- for (i = 0; i < ext4fs_extents_blocs.bloc_nb; i++) {
if (ext4fs_extents_blocs.ext_blocs[i].ext_header) {
free(ext4fs_extents_blocs.ext_blocs[i].ext_header);
ext4fs_extents_blocs.ext_blocs[i].ext_header = NULL;
ext4fs_extents_blocs.ext_blocs[i].block = 0;
}
- }
- ext4fs_extents_blocs.bloc_nb = 0;
+}
int ext4fs_read_inode(struct ext2_data *data, int ino, struct ext2_inode *inode) { struct ext2_block_group blkgrp; diff --git a/fs/ext4/ext4_common.h b/fs/ext4/ext4_common.h index 48fd2ac..0f80856 100644 --- a/fs/ext4/ext4_common.h +++ b/fs/ext4/ext4_common.h @@ -41,6 +41,18 @@ #define SUPERBLOCK_SIZE 1024 #define F_FILE 1
+#define MAX_EXTENT_BLOCS 10
+struct ext4_extent_bloc {
- unsigned long long block;
- struct ext4_extent_header *ext_header;
+};
+struct ext4_extent_blocs {
- int bloc_nb;
- struct ext4_extent_bloc ext_blocs[MAX_EXTENT_BLOCS];
+};
static inline void *zalloc(size_t size) { void *p = memalign(ARCH_DMA_MINALIGN, size); @@ -57,6 +69,10 @@ int ext4fs_find_file(const char *path, struct ext2fs_node *rootnode, int ext4fs_iterate_dir(struct ext2fs_node *dir, char *name, struct ext2fs_node **fnode, int *ftype);
+void ext4fs_init_extents_blocks_global(void); +int ext4fs_read_extents_blocks(struct ext2_inode *inode); +void ext4fs_free_extents_blocks(void);
#if defined(CONFIG_EXT4_WRITE) uint32_t ext4fs_div_roundup(uint32_t size, uint32_t n); int ext4fs_checksum_update(unsigned int i); diff --git a/fs/ext4/ext4fs.c b/fs/ext4/ext4fs.c index 258b937..88db1fb 100644 --- a/fs/ext4/ext4fs.c +++ b/fs/ext4/ext4fs.c @@ -70,6 +70,12 @@ int ext4fs_read_file(struct ext2fs_node *node, loff_t pos,
blockcnt = lldiv(((len + pos) + blocksize - 1), blocksize);
- ext4fs_init_extents_blocks_global();
- if (blockcnt > 100) {
/* Big reading: read extent blocks tree if any */
ext4fs_read_extents_blocks(&(node->inode));
- }
- for (i = lldiv(pos, blocksize); i < blockcnt; i++) { lbaint_t blknr; int blockoff = pos - (blocksize * i);
@@ -77,7 +83,7 @@ int ext4fs_read_file(struct ext2fs_node *node, loff_t pos, int skipfirst = 0; blknr = read_allocated_block(&(node->inode), i); if (blknr < 0)
return -1;
goto error;
blknr = blknr << log2_fs_blocksize;
@@ -134,7 +140,7 @@ int ext4fs_read_file(struct ext2fs_node *node, loff_t pos, delayed_extent, delayed_buf); if (status == 0)
return -1;
goto error; previous_block_number = -1; } memset(buf, 0, blocksize - skipfirst);
@@ -147,12 +153,16 @@ int ext4fs_read_file(struct ext2fs_node *node, loff_t pos, delayed_skipfirst, delayed_extent, delayed_buf); if (status == 0)
return -1;
goto error;
previous_block_number = -1; }
*actread = len;
ext4fs_free_extents_blocks(); return 0;
+error:
- ext4fs_free_extents_blocks();
- return -1;
}
int ext4fs_ls(const char *dirname)
Is this still needed with: commit ecdfb4195b20eb2dcde3c4083170016c13c69e8b Author: Ian Ray ian.ray@ge.com Date: Wed Nov 8 15:35:10 2017 +0000
ext4: recover from filesystem corruption when reading
Some fixes when reading EXT files and directory entries were identified after using e2fuzz to corrupt an EXT3 filesystem:
- Stop reading directory entries if the offset becomes badly aligned.
- Avoid overwriting memory by clamping the length used to zero the buffer in ext4fs_read_file. Also sanity check blocksize.
Signed-off-by: Ian Ray ian.ray@ge.com Signed-off-by: Martyn Welch martyn.welch@collabora.co.uk Reviewed-by: Stefano Babic sbabic@denx.de
Applied? Thanks!
participants (2)
-
Christophe Ronco
-
Tom Rini