[U-Boot] [PATCH 1/2] fs: fat: dynamically allocate memory for temporary buffer

From: Stefan Agner stefan.ag...@toradex.com
Drop the statically allocated get_contents_vfatname_block and dynamically allocate a buffer only if required. This saves 64KiB of memory.
Signed-off-by: Stefan Agner stefan.ag...@toradex.com Signed-off-by: Tien Fong Chee tien.fong.chee@intel.com --- fs/fat/fat.c | 19 +++++++++++++------ 1 files changed, 13 insertions(+), 6 deletions(-)
diff --git a/fs/fat/fat.c b/fs/fat/fat.c index 8803fb4..aa4636d 100644 --- a/fs/fat/fat.c +++ b/fs/fat/fat.c @@ -307,9 +307,6 @@ get_cluster(fsdata *mydata, __u32 clustnum, __u8 *buffer, unsigned long size) * into 'buffer'. * Update the number of bytes read in *gotsize or return -1 on fatal errors. */ -__u8 get_contents_vfatname_block[MAX_CLUSTSIZE] - __aligned(ARCH_DMA_MINALIGN); - static int get_contents(fsdata *mydata, dir_entry *dentptr, loff_t pos, __u8 *buffer, loff_t maxsize, loff_t *gotsize) { @@ -320,7 +317,7 @@ static int get_contents(fsdata *mydata, dir_entry *dentptr, loff_t pos, loff_t actsize;
*gotsize = 0; - debug("Filesize: %llu bytes\n", filesize); + debug("Filesize: %llu bytes, starting at pos %llu\n", filesize, pos);
if (pos >= filesize) { debug("Read position past EOF: %llu\n", pos); @@ -352,15 +349,25 @@ static int get_contents(fsdata *mydata, dir_entry *dentptr, loff_t pos,
/* align to beginning of next cluster if any */ if (pos) { + __u8 *tmp_buffer; + + tmp_buffer = malloc_cache_aligned(MAX_CLUSTSIZE); + if (!tmp_buffer) { + debug("Error: allocating buffer\n"); + return -ENOMEM; + } + actsize = min(filesize, (loff_t)bytesperclust); - if (get_cluster(mydata, curclust, get_contents_vfatname_block, + if (get_cluster(mydata, curclust, tmp_buffer, (int)actsize) != 0) { printf("Error reading cluster\n"); + free(tmp_buffer); return -1; } filesize -= actsize; actsize -= pos; - memcpy(buffer, get_contents_vfatname_block + pos, actsize); + memcpy(buffer, tmp_buffer + pos, actsize); + free(tmp_buffer); *gotsize += actsize; if (!filesize) return 0;

From: Tien Fong Chee tien.fong.chee@intel.com
Release cluster block immediately when no longer use would help to reduce 64KiB memory allocated to the memory pool.
Signed-off-by: Tien Fong Chee tien.fong.chee@intel.com --- fs/fat/fat.c | 11 +++++++++-- 1 files changed, 9 insertions(+), 2 deletions(-)
diff --git a/fs/fat/fat.c b/fs/fat/fat.c index aa4636d..5574620 100644 --- a/fs/fat/fat.c +++ b/fs/fat/fat.c @@ -1153,12 +1153,19 @@ int file_fat_read_at(const char *filename, loff_t pos, void *buffer, goto out_free_both;
debug("reading %s at pos %llu\n", filename, pos); - ret = get_contents(&fsdata, itr->dent, pos, buffer, maxsize, actread); + + /* For saving default max clustersize memory allocated to malloc pool */ + dir_entry *dentptr = itr->dent; + + free(itr); + + ret = get_contents(&fsdata, dentptr, pos, buffer, maxsize, actread);
out_free_both: free(fsdata.fatbuf); out_free_itr: - free(itr); + if (!itr) + free(itr); return ret; }

On Thu, 2019-01-17 at 14:52 +0800, tien.fong.chee@intel.com wrote:
From: Tien Fong Chee tien.fong.chee@intel.com
Release cluster block immediately when no longer use would help to reduce 64KiB memory allocated to the memory pool.
Signed-off-by: Tien Fong Chee tien.fong.chee@intel.com
fs/fat/fat.c | 11 +++++++++-- 1 files changed, 9 insertions(+), 2 deletions(-)
diff --git a/fs/fat/fat.c b/fs/fat/fat.c index aa4636d..5574620 100644 --- a/fs/fat/fat.c +++ b/fs/fat/fat.c @@ -1153,12 +1153,19 @@ int file_fat_read_at(const char *filename, loff_t pos, void *buffer, goto out_free_both; debug("reading %s at pos %llu\n", filename, pos);
- ret = get_contents(&fsdata, itr->dent, pos, buffer, maxsize,
actread);
- /* For saving default max clustersize memory allocated to
malloc pool */
- dir_entry *dentptr = itr->dent;
- free(itr);
I noticed msising of assignning NULL to itr after freeing it. Other than this, any comment about this implementation to maximise the reusable on reduce memory pool.
- ret = get_contents(&fsdata, dentptr, pos, buffer, maxsize,
actread); out_free_both: free(fsdata.fatbuf); out_free_itr:
- free(itr);
- if (!itr)
free(itr);
return ret; }

On 1/17/19 7:52 AM, tien.fong.chee@intel.com wrote:
From: Stefan Agner stefan.ag...@toradex.com
Drop the statically allocated get_contents_vfatname_block and dynamically allocate a buffer only if required. This saves 64KiB of memory.
Signed-off-by: Stefan Agner stefan.ag...@toradex.com Signed-off-by: Tien Fong Chee tien.fong.chee@intel.com
fs/fat/fat.c | 19 +++++++++++++------ 1 files changed, 13 insertions(+), 6 deletions(-)
diff --git a/fs/fat/fat.c b/fs/fat/fat.c index 8803fb4..aa4636d 100644 --- a/fs/fat/fat.c +++ b/fs/fat/fat.c @@ -307,9 +307,6 @@ get_cluster(fsdata *mydata, __u32 clustnum, __u8 *buffer, unsigned long size)
- into 'buffer'.
- Update the number of bytes read in *gotsize or return -1 on fatal errors.
*/ -__u8 get_contents_vfatname_block[MAX_CLUSTSIZE]
- __aligned(ARCH_DMA_MINALIGN);
static int get_contents(fsdata *mydata, dir_entry *dentptr, loff_t pos, __u8 *buffer, loff_t maxsize, loff_t *gotsize) { @@ -320,7 +317,7 @@ static int get_contents(fsdata *mydata, dir_entry *dentptr, loff_t pos, loff_t actsize;
*gotsize = 0;
- debug("Filesize: %llu bytes\n", filesize);
- debug("Filesize: %llu bytes, starting at pos %llu\n", filesize, pos);
This looks like a separate change.
if (pos >= filesize) { debug("Read position past EOF: %llu\n", pos); @@ -352,15 +349,25 @@ static int get_contents(fsdata *mydata, dir_entry *dentptr, loff_t pos,
/* align to beginning of next cluster if any */ if (pos) {
__u8 *tmp_buffer;
tmp_buffer = malloc_cache_aligned(MAX_CLUSTSIZE);
Don't you know the cluster size by now ?
if (!tmp_buffer) {
debug("Error: allocating buffer\n");
return -ENOMEM;
}
- actsize = min(filesize, (loff_t)bytesperclust);
if (get_cluster(mydata, curclust, get_contents_vfatname_block,
if (get_cluster(mydata, curclust, tmp_buffer, (int)actsize) != 0) { printf("Error reading cluster\n");
} filesize -= actsize; actsize -= pos;free(tmp_buffer); return -1;
memcpy(buffer, get_contents_vfatname_block + pos, actsize);
memcpy(buffer, tmp_buffer + pos, actsize);
free(tmp_buffer);
How many times is this malloc()/free() called ? I wonder how much this slows things down and how much it fragments the heap. Maybe the amount of calls to this can be reduced somehow.
*gotsize += actsize; if (!filesize) return 0;

On Fri, 2019-01-18 at 07:26 +0100, Marek Vasut wrote:
On 1/17/19 7:52 AM, tien.fong.chee@intel.com wrote:
From: Stefan Agner stefan.ag...@toradex.com
Drop the statically allocated get_contents_vfatname_block and dynamically allocate a buffer only if required. This saves 64KiB of memory.
Signed-off-by: Stefan Agner stefan.ag...@toradex.com Signed-off-by: Tien Fong Chee tien.fong.chee@intel.com
fs/fat/fat.c | 19 +++++++++++++------ 1 files changed, 13 insertions(+), 6 deletions(-)
diff --git a/fs/fat/fat.c b/fs/fat/fat.c index 8803fb4..aa4636d 100644 --- a/fs/fat/fat.c +++ b/fs/fat/fat.c @@ -307,9 +307,6 @@ get_cluster(fsdata *mydata, __u32 clustnum, __u8 *buffer, unsigned long size) * into 'buffer'. * Update the number of bytes read in *gotsize or return -1 on fatal errors. */ -__u8 get_contents_vfatname_block[MAX_CLUSTSIZE]
- __aligned(ARCH_DMA_MINALIGN);
static int get_contents(fsdata *mydata, dir_entry *dentptr, loff_t pos, __u8 *buffer, loff_t maxsize, loff_t *gotsize) { @@ -320,7 +317,7 @@ static int get_contents(fsdata *mydata, dir_entry *dentptr, loff_t pos, loff_t actsize; *gotsize = 0;
- debug("Filesize: %llu bytes\n", filesize);
- debug("Filesize: %llu bytes, starting at pos %llu\n",
filesize, pos);
This looks like a separate change.
Okay.
if (pos >= filesize) { debug("Read position past EOF: %llu\n", pos); @@ -352,15 +349,25 @@ static int get_contents(fsdata *mydata, dir_entry *dentptr, loff_t pos, /* align to beginning of next cluster if any */ if (pos) {
__u8 *tmp_buffer;
tmp_buffer = malloc_cache_aligned(MAX_CLUSTSIZE);
Don't you know the cluster size by now ?
Yeah, i think so, we can run allocate memory based on actsize. Please correct me if i'm wrong.
if (!tmp_buffer) {
debug("Error: allocating buffer\n");
return -ENOMEM;
}
actsize = min(filesize, (loff_t)bytesperclust);
if (get_cluster(mydata, curclust,
get_contents_vfatname_block,
if (get_cluster(mydata, curclust, tmp_buffer,
(int)actsize) != 0) { printf("Error reading cluster\n");
free(tmp_buffer);
return -1; } filesize -= actsize; actsize -= pos;
memcpy(buffer, get_contents_vfatname_block + pos,
actsize);
memcpy(buffer, tmp_buffer + pos, actsize);
free(tmp_buffer);
How many times is this malloc()/free() called ? I wonder how much this slows things down and how much it fragments the heap. Maybe the amount of calls to this can be reduced somehow.
There is performance penalty for when reading file based on offset chunk by chunk at small memory such as OCRAM. However, i believe this doesn't impact in most use cases because most of them running in large DDR size. Most of the time, the reading of the file is starting from offset zero(no memory allocation is required).
I can "feel" that is not much difference actually in performance based on print out responding for my chunk by chunk file reading use case in these changes. But these changes with [patch 2/2] would help to maximize reusable from memory pool, which lead to only 1 block of max cluster is required allocated in memory pool instead of two blocks.
What do you think?
*gotsize += actsize; if (!filesize) return 0;
participants (3)
-
Chee, Tien Fong
-
Marek Vasut
-
tien.fong.chee@intel.com