[PATCH] Revert "lib: sparse: Make CHUNK_TYPE_RAW buffer aligned"

This reverts commit 62649165cb02ab95b57360bb362886935f524f26.
The patch decreased the write performance quite a bit. Here is an example on an i.MX 8M Quad platform. - Before the revert: Sending sparse 'vendor' 1/2 (516436 KB) OKAY [ 5.113s] Writing 'vendor' OKAY [128.335s] Sending sparse 'vendor' 2/2 (76100 KB) OKAY [ 0.802s] Writing 'vendor' OKAY [ 27.902s] - After the revert: Sending sparse 'vendor' 1/2 (516436 KB) OKAY [ 5.310s] Writing 'vendor' OKAY [ 18.041s] Sending sparse 'vendor' 2/2 (76100 KB) OKAY [ 1.244s] Writing 'vendor' OKAY [ 2.663s]
Considering that the patch only moves buffer around to avoid a warning message about misaligned buffers, let's keep the best performances.
Signed-off-by: Gary Bisson gary.bisson@boundarydevices.com Signed-off-by: Troy Kisky troy.kisky@boundarydevices.com --- lib/image-sparse.c | 69 ++++++---------------------------------------- 1 file changed, 8 insertions(+), 61 deletions(-)
diff --git a/lib/image-sparse.c b/lib/image-sparse.c index 5ec0f94ab3e..d80fdbbf58e 100644 --- a/lib/image-sparse.c +++ b/lib/image-sparse.c @@ -46,66 +46,9 @@ #include <asm/cache.h>
#include <linux/math64.h> -#include <linux/err.h>
static void default_log(const char *ignored, char *response) {}
-static lbaint_t write_sparse_chunk_raw(struct sparse_storage *info, - lbaint_t blk, lbaint_t blkcnt, - void *data, - char *response) -{ - lbaint_t n = blkcnt, write_blks, blks = 0, aligned_buf_blks = 100; - uint32_t *aligned_buf = NULL; - - if (CONFIG_IS_ENABLED(SYS_DCACHE_OFF)) { - write_blks = info->write(info, blk, n, data); - if (write_blks < n) - goto write_fail; - - return write_blks; - } - - aligned_buf = memalign(ARCH_DMA_MINALIGN, info->blksz * aligned_buf_blks); - if (!aligned_buf) { - info->mssg("Malloc failed for: CHUNK_TYPE_RAW", response); - return -ENOMEM; - } - - while (blkcnt > 0) { - n = min(aligned_buf_blks, blkcnt); - memcpy(aligned_buf, data, n * info->blksz); - - /* write_blks might be > n due to NAND bad-blocks */ - write_blks = info->write(info, blk + blks, n, aligned_buf); - if (write_blks < n) { - free(aligned_buf); - goto write_fail; - } - - blks += write_blks; - data += n * info->blksz; - blkcnt -= n; - } - - free(aligned_buf); - return blks; - -write_fail: - if (IS_ERR_VALUE(write_blks)) { - printf("%s: Write failed, block #" LBAFU " [" LBAFU "] (%lld)\n", - __func__, blk + blks, n, (long long)write_blks); - info->mssg("flash write failure", response); - return write_blks; - } - - /* write_blks < n */ - printf("%s: Write failed, block #" LBAFU " [" LBAFU "]\n", - __func__, blk + blks, n); - info->mssg("flash write failure(incomplete)", response); - return -1; -} - int write_sparse_image(struct sparse_storage *info, const char *part_name, void *data, char *response) { @@ -209,11 +152,15 @@ int write_sparse_image(struct sparse_storage *info, return -1; }
- blks = write_sparse_chunk_raw(info, blk, blkcnt, - data, response); - if (blks < 0) + blks = info->write(info, blk, blkcnt, data); + /* blks might be > blkcnt (eg. NAND bad-blocks) */ + if (blks < blkcnt) { + printf("%s: %s" LBAFU " [" LBAFU "]\n", + __func__, "Write failed, block #", + blk, blks); + info->mssg("flash write failure", response); return -1; - + } blk += blks; bytes_written += ((u64)blkcnt) * info->blksz; total_blocks += chunk_header->chunk_sz;

On 11/18/22 07:13, Gary Bisson wrote:
This reverts commit 62649165cb02ab95b57360bb362886935f524f26.
The patch decreased the write performance quite a bit. Here is an example on an i.MX 8M Quad platform.
- Before the revert:
Sending sparse 'vendor' 1/2 (516436 KB) OKAY [ 5.113s] Writing 'vendor' OKAY [128.335s] Sending sparse 'vendor' 2/2 (76100 KB) OKAY [ 0.802s] Writing 'vendor' OKAY [ 27.902s]
- After the revert:
Sending sparse 'vendor' 1/2 (516436 KB) OKAY [ 5.310s] Writing 'vendor' OKAY [ 18.041s] Sending sparse 'vendor' 2/2 (76100 KB) OKAY [ 1.244s] Writing 'vendor' OKAY [ 2.663s]
Considering that the patch only moves buffer around to avoid a warning message about misaligned buffers, let's keep the best performances.
So what is the point of this warning?
--Sean
Signed-off-by: Gary Bisson gary.bisson@boundarydevices.com Signed-off-by: Troy Kisky troy.kisky@boundarydevices.com
lib/image-sparse.c | 69 ++++++---------------------------------------- 1 file changed, 8 insertions(+), 61 deletions(-)
diff --git a/lib/image-sparse.c b/lib/image-sparse.c index 5ec0f94ab3e..d80fdbbf58e 100644 --- a/lib/image-sparse.c +++ b/lib/image-sparse.c @@ -46,66 +46,9 @@ #include <asm/cache.h>
#include <linux/math64.h> -#include <linux/err.h>
static void default_log(const char *ignored, char *response) {}
-static lbaint_t write_sparse_chunk_raw(struct sparse_storage *info,
lbaint_t blk, lbaint_t blkcnt,
void *data,
char *response)
-{
- lbaint_t n = blkcnt, write_blks, blks = 0, aligned_buf_blks = 100;
- uint32_t *aligned_buf = NULL;
- if (CONFIG_IS_ENABLED(SYS_DCACHE_OFF)) {
write_blks = info->write(info, blk, n, data);
if (write_blks < n)
goto write_fail;
return write_blks;
- }
- aligned_buf = memalign(ARCH_DMA_MINALIGN, info->blksz * aligned_buf_blks);
- if (!aligned_buf) {
info->mssg("Malloc failed for: CHUNK_TYPE_RAW", response);
return -ENOMEM;
- }
- while (blkcnt > 0) {
n = min(aligned_buf_blks, blkcnt);
memcpy(aligned_buf, data, n * info->blksz);
/* write_blks might be > n due to NAND bad-blocks */
write_blks = info->write(info, blk + blks, n, aligned_buf);
if (write_blks < n) {
free(aligned_buf);
goto write_fail;
}
blks += write_blks;
data += n * info->blksz;
blkcnt -= n;
- }
- free(aligned_buf);
- return blks;
-write_fail:
- if (IS_ERR_VALUE(write_blks)) {
printf("%s: Write failed, block #" LBAFU " [" LBAFU "] (%lld)\n",
__func__, blk + blks, n, (long long)write_blks);
info->mssg("flash write failure", response);
return write_blks;
- }
- /* write_blks < n */
- printf("%s: Write failed, block #" LBAFU " [" LBAFU "]\n",
__func__, blk + blks, n);
- info->mssg("flash write failure(incomplete)", response);
- return -1;
-}
int write_sparse_image(struct sparse_storage *info, const char *part_name, void *data, char *response) { @@ -209,11 +152,15 @@ int write_sparse_image(struct sparse_storage *info, return -1; }
blks = write_sparse_chunk_raw(info, blk, blkcnt,
data, response);
if (blks < 0)
blks = info->write(info, blk, blkcnt, data);
/* blks might be > blkcnt (eg. NAND bad-blocks) */
if (blks < blkcnt) {
printf("%s: %s" LBAFU " [" LBAFU "]\n",
__func__, "Write failed, block #",
blk, blks);
info->mssg("flash write failure", response); return -1;
} blk += blks; bytes_written += ((u64)blkcnt) * info->blksz; total_blocks += chunk_header->chunk_sz;

Hi,
On Fri, Nov 18, 2022 at 10:36:58AM -0500, Sean Anderson wrote:
On 11/18/22 07:13, Gary Bisson wrote:
This reverts commit 62649165cb02ab95b57360bb362886935f524f26.
The patch decreased the write performance quite a bit. Here is an example on an i.MX 8M Quad platform.
- Before the revert:
Sending sparse 'vendor' 1/2 (516436 KB) OKAY [ 5.113s] Writing 'vendor' OKAY [128.335s] Sending sparse 'vendor' 2/2 (76100 KB) OKAY [ 0.802s] Writing 'vendor' OKAY [ 27.902s]
- After the revert:
Sending sparse 'vendor' 1/2 (516436 KB) OKAY [ 5.310s] Writing 'vendor' OKAY [ 18.041s] Sending sparse 'vendor' 2/2 (76100 KB) OKAY [ 1.244s] Writing 'vendor' OKAY [ 2.663s]
Considering that the patch only moves buffer around to avoid a warning message about misaligned buffers, let's keep the best performances.
So what is the point of this warning?
Well the warning does say something true that the cache operation is not aligned. Better ask Simon as he's the one who changed the print from a debug to warn_non_spl one: bcc53bf0958 arm: Show cache warnings in U-Boot proper only
BTW, in my case I couldn't see the misaligned messages, yet I saw the performance hit described above.
Regards, Gary

On 11/21/22 09:50, Gary Bisson wrote:
Hi,
On Fri, Nov 18, 2022 at 10:36:58AM -0500, Sean Anderson wrote:
On 11/18/22 07:13, Gary Bisson wrote:
This reverts commit 62649165cb02ab95b57360bb362886935f524f26.
The patch decreased the write performance quite a bit. Here is an example on an i.MX 8M Quad platform.
- Before the revert:
Sending sparse 'vendor' 1/2 (516436 KB) OKAY [ 5.113s] Writing 'vendor' OKAY [128.335s] Sending sparse 'vendor' 2/2 (76100 KB) OKAY [ 0.802s] Writing 'vendor' OKAY [ 27.902s]
- After the revert:
Sending sparse 'vendor' 1/2 (516436 KB) OKAY [ 5.310s] Writing 'vendor' OKAY [ 18.041s] Sending sparse 'vendor' 2/2 (76100 KB) OKAY [ 1.244s] Writing 'vendor' OKAY [ 2.663s]
Considering that the patch only moves buffer around to avoid a warning message about misaligned buffers, let's keep the best performances.
So what is the point of this warning?
Well the warning does say something true that the cache operation is not aligned. Better ask Simon as he's the one who changed the print from a debug to warn_non_spl one: bcc53bf0958 arm: Show cache warnings in U-Boot proper only
BTW, in my case I couldn't see the misaligned messages, yet I saw the performance hit described above.
Maybe it is better to keep this as a Kconfig? Some arches may support unaligned access but others may not. I wonder if we have something like this already.
--Seam

Hi Gary, Sean,
On lun., nov. 21, 2022 at 10:09, Sean Anderson sean.anderson@seco.com wrote:
On 11/21/22 09:50, Gary Bisson wrote:
Hi,
On Fri, Nov 18, 2022 at 10:36:58AM -0500, Sean Anderson wrote:
On 11/18/22 07:13, Gary Bisson wrote:
This reverts commit 62649165cb02ab95b57360bb362886935f524f26.
The patch decreased the write performance quite a bit. Here is an example on an i.MX 8M Quad platform.
- Before the revert:
Sending sparse 'vendor' 1/2 (516436 KB) OKAY [ 5.113s] Writing 'vendor' OKAY [128.335s] Sending sparse 'vendor' 2/2 (76100 KB) OKAY [ 0.802s] Writing 'vendor' OKAY [ 27.902s]
- After the revert:
Sending sparse 'vendor' 1/2 (516436 KB) OKAY [ 5.310s] Writing 'vendor' OKAY [ 18.041s] Sending sparse 'vendor' 2/2 (76100 KB) OKAY [ 1.244s] Writing 'vendor' OKAY [ 2.663s]
Considering that the patch only moves buffer around to avoid a warning message about misaligned buffers, let's keep the best performances.
So what is the point of this warning?
Well the warning does say something true that the cache operation is not aligned. Better ask Simon as he's the one who changed the print from a debug to warn_non_spl one: bcc53bf0958 arm: Show cache warnings in U-Boot proper only
BTW, in my case I couldn't see the misaligned messages, yet I saw the performance hit described above.
I also reproduce this problem on AM62x SK EVM.
Before the revert: Sending sparse 'super' 1/2 (768793 KB) OKAY [ 23.954s] Writing 'super' OKAY [ 75.926s] Sending sparse 'super' 2/2 (629819 KB) OKAY [ 19.641s] Writing 'super' OKAY [ 62.849s] Finished. Total time: 182.474s
After the revert: Sending sparse 'super' 1/2 (768793 KB) OKAY [ 23.895s] Writing 'super' OKAY [ 12.961s] Sending sparse 'super' 2/2 (629819 KB) OKAY [ 19.562s] Writing 'super' OKAY [ 12.805s] Finished. Total time: 69.327s
And like Gary, I did not observe the misaligned messages.
Did we come up with a solution for this performance regression?
I will continue looking on my end but please let me know if you already solved this.
Thanks,
Matijs
Maybe it is better to keep this as a Kconfig? Some arches may support unaligned access but others may not. I wonder if we have something like this already.
--Seam

On ven., juin 16, 2023 at 13:56, Mattijs Korpershoek mkorpershoek@baylibre.com wrote:
Hi Gary, Sean,
On lun., nov. 21, 2022 at 10:09, Sean Anderson sean.anderson@seco.com wrote:
On 11/21/22 09:50, Gary Bisson wrote:
Hi,
On Fri, Nov 18, 2022 at 10:36:58AM -0500, Sean Anderson wrote:
On 11/18/22 07:13, Gary Bisson wrote:
This reverts commit 62649165cb02ab95b57360bb362886935f524f26.
The patch decreased the write performance quite a bit. Here is an example on an i.MX 8M Quad platform.
- Before the revert:
Sending sparse 'vendor' 1/2 (516436 KB) OKAY [ 5.113s] Writing 'vendor' OKAY [128.335s] Sending sparse 'vendor' 2/2 (76100 KB) OKAY [ 0.802s] Writing 'vendor' OKAY [ 27.902s]
- After the revert:
Sending sparse 'vendor' 1/2 (516436 KB) OKAY [ 5.310s] Writing 'vendor' OKAY [ 18.041s] Sending sparse 'vendor' 2/2 (76100 KB) OKAY [ 1.244s] Writing 'vendor' OKAY [ 2.663s]
Considering that the patch only moves buffer around to avoid a warning message about misaligned buffers, let's keep the best performances.
So what is the point of this warning?
Well the warning does say something true that the cache operation is not aligned. Better ask Simon as he's the one who changed the print from a debug to warn_non_spl one: bcc53bf0958 arm: Show cache warnings in U-Boot proper only
BTW, in my case I couldn't see the misaligned messages, yet I saw the performance hit described above.
I also reproduce this problem on AM62x SK EVM.
Before the revert: Sending sparse 'super' 1/2 (768793 KB) OKAY [ 23.954s] Writing 'super' OKAY [ 75.926s] Sending sparse 'super' 2/2 (629819 KB) OKAY [ 19.641s] Writing 'super' OKAY [ 62.849s] Finished. Total time: 182.474s
After the revert: Sending sparse 'super' 1/2 (768793 KB) OKAY [ 23.895s] Writing 'super' OKAY [ 12.961s] Sending sparse 'super' 2/2 (629819 KB) OKAY [ 19.562s] Writing 'super' OKAY [ 12.805s] Finished. Total time: 69.327s
And like Gary, I did not observe the misaligned messages.
Did we come up with a solution for this performance regression?
I will continue looking on my end but please let me know if you already solved this.
Answering to myself here. My attempt of solving this problem has been submitted here:
https://lore.kernel.org/r/20230616-sparse-flash-fix-v1-1-6bafeacc567b@baylib...
Thanks,
Matijs
Maybe it is better to keep this as a Kconfig? Some arches may support unaligned access but others may not. I wonder if we have something like this already.
--Seam

On Fri, Jun 16, 2023 at 03:50:06PM +0200, Mattijs Korpershoek wrote:
On ven., juin 16, 2023 at 13:56, Mattijs Korpershoek mkorpershoek@baylibre.com wrote:
Hi Gary, Sean,
On lun., nov. 21, 2022 at 10:09, Sean Anderson sean.anderson@seco.com wrote:
On 11/21/22 09:50, Gary Bisson wrote:
Hi,
On Fri, Nov 18, 2022 at 10:36:58AM -0500, Sean Anderson wrote:
On 11/18/22 07:13, Gary Bisson wrote:
This reverts commit 62649165cb02ab95b57360bb362886935f524f26.
The patch decreased the write performance quite a bit. Here is an example on an i.MX 8M Quad platform.
- Before the revert:
Sending sparse 'vendor' 1/2 (516436 KB) OKAY [ 5.113s] Writing 'vendor' OKAY [128.335s] Sending sparse 'vendor' 2/2 (76100 KB) OKAY [ 0.802s] Writing 'vendor' OKAY [ 27.902s]
- After the revert:
Sending sparse 'vendor' 1/2 (516436 KB) OKAY [ 5.310s] Writing 'vendor' OKAY [ 18.041s] Sending sparse 'vendor' 2/2 (76100 KB) OKAY [ 1.244s] Writing 'vendor' OKAY [ 2.663s]
Considering that the patch only moves buffer around to avoid a warning message about misaligned buffers, let's keep the best performances.
So what is the point of this warning?
Well the warning does say something true that the cache operation is not aligned. Better ask Simon as he's the one who changed the print from a debug to warn_non_spl one: bcc53bf0958 arm: Show cache warnings in U-Boot proper only
BTW, in my case I couldn't see the misaligned messages, yet I saw the performance hit described above.
I also reproduce this problem on AM62x SK EVM.
Before the revert: Sending sparse 'super' 1/2 (768793 KB) OKAY [ 23.954s] Writing 'super' OKAY [ 75.926s] Sending sparse 'super' 2/2 (629819 KB) OKAY [ 19.641s] Writing 'super' OKAY [ 62.849s] Finished. Total time: 182.474s
After the revert: Sending sparse 'super' 1/2 (768793 KB) OKAY [ 23.895s] Writing 'super' OKAY [ 12.961s] Sending sparse 'super' 2/2 (629819 KB) OKAY [ 19.562s] Writing 'super' OKAY [ 12.805s] Finished. Total time: 69.327s
And like Gary, I did not observe the misaligned messages.
Did we come up with a solution for this performance regression?
I will continue looking on my end but please let me know if you already solved this.
Answering to myself here. My attempt of solving this problem has been submitted here:
https://lore.kernel.org/r/20230616-sparse-flash-fix-v1-1-6bafeacc567b@baylib...
Thanks for digging in to this!
participants (4)
-
Gary Bisson
-
Mattijs Korpershoek
-
Sean Anderson
-
Tom Rini