[U-Boot] [PATCH 0/3] dfu: Several enhancements for dfu subsystem

This patch series provide following improvements: - Time needed for flashing is reduced by switching CRC32 calculation to be optional. - Access to eMMC device partitions (like boot) is now possible - It is now possible to assign several envs for dfu command
Lukasz Majewski (2): dfu: mmc: Provide support for eMMC boot partition access dfu: Introduction of the "dfu_checksum_method" env variable for checksum method setting
Przemyslaw Marczak (1): dfu: add static alt num count in dfu_config_entities()
drivers/dfu/dfu.c | 40 +++++++++++++++++++++++++++++++++++----- drivers/dfu/dfu_mmc.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++-- include/dfu.h | 10 ++++++++++ 3 files changed, 91 insertions(+), 7 deletions(-)

Before this patch it was only possible to access only the default eMMC partition. By partition selection I mean the access to eMMC via ext_csd[179] register programming.
It sometimes happens that it is necessary to write to other partitions. This patch adds extra attributes to "mmc" sub type of the dfu_alt_info variable (e.g. boot-mmc.bin mmc 0 200 mmcpart 1;)
It saves the original boot value and restores it after storing the file.
Such definition will not impact other definitions of the "mmc" dfu partitions specifier.
Change-Id: I34069510eb27aa80794189d2d13cdb97e54ba83d Signed-off-by: Lukasz Majewski l.majewski@samsung.com --- drivers/dfu/dfu_mmc.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++-- include/dfu.h | 5 +++++ 2 files changed, 51 insertions(+), 2 deletions(-)
diff --git a/drivers/dfu/dfu_mmc.c b/drivers/dfu/dfu_mmc.c index 651cfff..a7224b6 100644 --- a/drivers/dfu/dfu_mmc.c +++ b/drivers/dfu/dfu_mmc.c @@ -18,11 +18,29 @@ static unsigned char __aligned(CONFIG_SYS_CACHELINE_SIZE) dfu_file_buf[CONFIG_SYS_DFU_MAX_FILE_SIZE]; static long dfu_file_buf_len;
+static int mmc_access_part(struct dfu_entity *dfu, struct mmc *mmc, int part) +{ + int ret; + + if (part == mmc->part_num) + return 0; + + ret = mmc_switch_part(dfu->dev_num, part); + if (ret) { + error("Cannot switch to partition %d\n", part); + return ret; + } + mmc->part_num = part; + + return 0; +} + static int mmc_block_op(enum dfu_op op, struct dfu_entity *dfu, u64 offset, void *buf, long *len) { struct mmc *mmc = find_mmc_device(dfu->dev_num); u32 blk_start, blk_count, n = 0; + int ret, part_num_bkp = 0;
/* * We must ensure that we work in lba_blk_size chunks, so ALIGN @@ -39,6 +57,13 @@ static int mmc_block_op(enum dfu_op op, struct dfu_entity *dfu, return -EINVAL; }
+ if (dfu->data.mmc.partition_access != DFU_NOT_SUPPORTED) { + part_num_bkp = mmc->part_num; + ret = mmc_access_part(dfu, mmc, dfu->data.mmc.partition_access); + if (ret) + return ret; + } + debug("%s: %s dev: %d start: %d cnt: %d buf: 0x%p\n", __func__, op == DFU_OP_READ ? "MMC READ" : "MMC WRITE", dfu->dev_num, blk_start, blk_count, buf); @@ -57,9 +82,17 @@ static int mmc_block_op(enum dfu_op op, struct dfu_entity *dfu,
if (n != blk_count) { error("MMC operation failed"); + if (dfu->data.mmc.partition_access != DFU_NOT_SUPPORTED) + mmc_access_part(dfu, mmc, part_num_bkp); return -EIO; }
+ if (dfu->data.mmc.partition_access != DFU_NOT_SUPPORTED) { + ret = mmc_access_part(dfu, mmc, part_num_bkp); + if (ret) + return ret; + } + return 0; }
@@ -193,12 +226,22 @@ int dfu_fill_entity_mmc(struct dfu_entity *dfu, char *s) char *st;
dfu->dev_type = DFU_DEV_MMC; + dfu->data.mmc.partition_access = DFU_NOT_SUPPORTED; + st = strsep(&s, " "); if (!strcmp(st, "mmc")) { dfu->layout = DFU_RAW_ADDR; dfu->data.mmc.lba_start = simple_strtoul(s, &s, 16); - dfu->data.mmc.lba_size = simple_strtoul(++s, &s, 16); + s++; + dfu->data.mmc.lba_size = simple_strtoul(s, &s, 16); dfu->data.mmc.lba_blk_size = get_mmc_blk_size(dfu->dev_num); + if (*s) { + s++; + st = strsep(&s, " "); + if (!strcmp(st, "mmcpart")) + dfu->data.mmc.partition_access = + simple_strtoul(s, &s, 0); + } } else if (!strcmp(st, "fat")) { dfu->layout = DFU_FS_FAT; } else if (!strcmp(st, "ext4")) { @@ -236,7 +279,8 @@ int dfu_fill_entity_mmc(struct dfu_entity *dfu, char *s)
if (dfu->layout == DFU_FS_EXT4 || dfu->layout == DFU_FS_FAT) { dfu->data.mmc.dev = simple_strtoul(s, &s, 10); - dfu->data.mmc.part = simple_strtoul(++s, &s, 10); + s++; + dfu->data.mmc.part = simple_strtoul(s, &s, 10); }
dfu->read_medium = dfu_read_medium_mmc; diff --git a/include/dfu.h b/include/dfu.h index 6c71ecb..751f0fd 100644 --- a/include/dfu.h +++ b/include/dfu.h @@ -37,12 +37,17 @@ enum dfu_op { DFU_OP_WRITE, };
+#define DFU_NOT_SUPPORTED -1 + struct mmc_internal_data { /* RAW programming */ unsigned int lba_start; unsigned int lba_size; unsigned int lba_blk_size;
+ /* Partition access */ + int partition_access; + /* FAT/EXT */ unsigned int dev; unsigned int part;

On Monday, March 31, 2014 at 10:48:47 AM, Lukasz Majewski wrote:
Before this patch it was only possible to access only the default eMMC partition. By partition selection I mean the access to eMMC via ext_csd[179] register programming.
It sometimes happens that it is necessary to write to other partitions. This patch adds extra attributes to "mmc" sub type of the dfu_alt_info variable (e.g. boot-mmc.bin mmc 0 200 mmcpart 1;)
It saves the original boot value and restores it after storing the file.
Such definition will not impact other definitions of the "mmc" dfu partitions specifier.
Change-Id: I34069510eb27aa80794189d2d13cdb97e54ba83d Signed-off-by: Lukasz Majewski l.majewski@samsung.com
Please strip the Change-Id .
Best regards, Marek Vasut

Hi Marek,
On Monday, March 31, 2014 at 10:48:47 AM, Lukasz Majewski wrote:
Before this patch it was only possible to access only the default eMMC partition. By partition selection I mean the access to eMMC via ext_csd[179] register programming.
It sometimes happens that it is necessary to write to other partitions. This patch adds extra attributes to "mmc" sub type of the dfu_alt_info variable (e.g. boot-mmc.bin mmc 0 200 mmcpart 1;)
It saves the original boot value and restores it after storing the file.
Such definition will not impact other definitions of the "mmc" dfu partitions specifier.
Change-Id: I34069510eb27aa80794189d2d13cdb97e54ba83d Signed-off-by: Lukasz Majewski l.majewski@samsung.com
Please strip the Change-Id .
Damn, I always forgot about stripping them out.
Best regards, Marek Vasut

Before this patch it was only possible to access the default eMMC HW partition. By partition selection I mean the access to eMMC via the ext_csd[179] register programming.
It sometimes happens that it is necessary to write to other partitions. This patch adds extra attribute to "raw" sub type of the dfu_alt_info environment variable (e.g. boot-mmc.bin raw 0x0 0x200 mmcpart 1;)
It saves the original boot value and restores it after storing the file.
Signed-off-by: Lukasz Majewski l.majewski@samsung.com --- Changes for v2: - Adjust the code to be applicable on top of newest u-boot-usb branch --- drivers/dfu/dfu_mmc.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ include/dfu.h | 3 +++ 2 files changed, 49 insertions(+)
diff --git a/drivers/dfu/dfu_mmc.c b/drivers/dfu/dfu_mmc.c index 5e10ea7..63cc876 100644 --- a/drivers/dfu/dfu_mmc.c +++ b/drivers/dfu/dfu_mmc.c @@ -18,11 +18,29 @@ static unsigned char __aligned(CONFIG_SYS_CACHELINE_SIZE) dfu_file_buf[CONFIG_SYS_DFU_MAX_FILE_SIZE]; static long dfu_file_buf_len;
+static int mmc_access_part(struct dfu_entity *dfu, struct mmc *mmc, int part) +{ + int ret; + + if (part == mmc->part_num) + return 0; + + ret = mmc_switch_part(dfu->dev_num, part); + if (ret) { + error("Cannot switch to partition %d\n", part); + return ret; + } + mmc->part_num = part; + + return 0; +} + static int mmc_block_op(enum dfu_op op, struct dfu_entity *dfu, u64 offset, void *buf, long *len) { struct mmc *mmc = find_mmc_device(dfu->dev_num); u32 blk_start, blk_count, n = 0; + int ret, part_num_bkp = 0;
/* * We must ensure that we work in lba_blk_size chunks, so ALIGN @@ -39,6 +57,13 @@ static int mmc_block_op(enum dfu_op op, struct dfu_entity *dfu, return -EINVAL; }
+ if (dfu->data.mmc.hw_partition >= 0) { + part_num_bkp = mmc->part_num; + ret = mmc_access_part(dfu, mmc, dfu->data.mmc.hw_partition); + if (ret) + return ret; + } + debug("%s: %s dev: %d start: %d cnt: %d buf: 0x%p\n", __func__, op == DFU_OP_READ ? "MMC READ" : "MMC WRITE", dfu->dev_num, blk_start, blk_count, buf); @@ -57,9 +82,17 @@ static int mmc_block_op(enum dfu_op op, struct dfu_entity *dfu,
if (n != blk_count) { error("MMC operation failed"); + if (dfu->data.mmc.hw_partition >= 0) + mmc_access_part(dfu, mmc, part_num_bkp); return -EIO; }
+ if (dfu->data.mmc.hw_partition >= 0) { + ret = mmc_access_part(dfu, mmc, part_num_bkp); + if (ret) + return ret; + } + return 0; }
@@ -194,6 +227,8 @@ int dfu_read_medium_mmc(struct dfu_entity *dfu, u64 offset, void *buf, * 2nd and 3rd: * lba_start and lba_size, for raw write * mmc_dev and mmc_part, for filesystems and part + * 4th (optional): + * mmcpart <num> (access to HW eMMC partitions) */ int dfu_fill_entity_mmc(struct dfu_entity *dfu, char *s) { @@ -233,11 +268,22 @@ int dfu_fill_entity_mmc(struct dfu_entity *dfu, char *s) return -ENODEV; }
+ dfu->data.mmc.hw_partition = -EINVAL; if (!strcmp(entity_type, "raw")) { dfu->layout = DFU_RAW_ADDR; dfu->data.mmc.lba_start = second_arg; dfu->data.mmc.lba_size = third_arg; dfu->data.mmc.lba_blk_size = mmc->read_bl_len; + + /* + * Check for an extra entry at dfu_alt_info env variable + * specifying the mmc HW defined partition number + */ + if (s) + if (!strcmp(strsep(&s, " "), "mmcpart")) + dfu->data.mmc.hw_partition = + simple_strtoul(s, NULL, 0); + } else if (!strcmp(entity_type, "part")) { disk_partition_t partinfo; block_dev_desc_t *blk_dev = &mmc->block_dev; diff --git a/include/dfu.h b/include/dfu.h index 986598e..26ffbc8 100644 --- a/include/dfu.h +++ b/include/dfu.h @@ -43,6 +43,9 @@ struct mmc_internal_data { unsigned int lba_size; unsigned int lba_blk_size;
+ /* eMMC HW partition access */ + int hw_partition; + /* FAT/EXT */ unsigned int dev; unsigned int part;

On Friday, May 09, 2014 at 04:58:15 PM, Lukasz Majewski wrote:
Before this patch it was only possible to access the default eMMC HW partition. By partition selection I mean the access to eMMC via the ext_csd[179] register programming.
It sometimes happens that it is necessary to write to other partitions. This patch adds extra attribute to "raw" sub type of the dfu_alt_info environment variable (e.g. boot-mmc.bin raw 0x0 0x200 mmcpart 1;)
It saves the original boot value and restores it after storing the file.
Applied, thanks.
Best regards, Marek Vasut

From: Przemyslaw Marczak p.marczak@samsung.com
Thanks to this multiple calls of function dfu_config_entities() give continuous dfu alt numbering until call dfu_free_entities().
This allows to store dfu entities in multiple env variables.
Change-Id: Ibca7e785bfca9f53b64d3dff0490185b06841b54 Signed-off-by: Przemyslaw Marczak p.marczak@samsung.com --- drivers/dfu/dfu.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/dfu/dfu.c b/drivers/dfu/dfu.c index f94c412..e15f959 100644 --- a/drivers/dfu/dfu.c +++ b/drivers/dfu/dfu.c @@ -19,6 +19,7 @@ static bool dfu_reset_request; static LIST_HEAD(dfu_list); static int dfu_alt_num; +static int alt_num_count;
bool dfu_reset(void) { @@ -379,6 +380,8 @@ void dfu_free_entities(void) if (t) free(t); INIT_LIST_HEAD(&dfu_list); + + alt_num_count = 0; }
int dfu_config_entities(char *env, char *interface, int num) @@ -396,11 +399,12 @@ int dfu_config_entities(char *env, char *interface, int num) for (i = 0; i < dfu_alt_num; i++) {
s = strsep(&env, ";"); - ret = dfu_fill_entity(&dfu[i], s, i, interface, num); + ret = dfu_fill_entity(&dfu[i], s, alt_num_count, interface, num); if (ret) return -1;
list_add_tail(&dfu[i].list, &dfu_list); + alt_num_count++; }
return 0;

On Monday, March 31, 2014 at 10:48:48 AM, Lukasz Majewski wrote:
From: Przemyslaw Marczak p.marczak@samsung.com
Thanks to this multiple calls of function dfu_config_entities() give continuous dfu alt numbering until call dfu_free_entities().
This allows to store dfu entities in multiple env variables.
Change-Id: Ibca7e785bfca9f53b64d3dff0490185b06841b54 Signed-off-by: Przemyslaw Marczak p.marczak@samsung.com
drivers/dfu/dfu.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/dfu/dfu.c b/drivers/dfu/dfu.c index f94c412..e15f959 100644 --- a/drivers/dfu/dfu.c +++ b/drivers/dfu/dfu.c @@ -19,6 +19,7 @@ static bool dfu_reset_request; static LIST_HEAD(dfu_list); static int dfu_alt_num; +static int alt_num_count;
Can the variable name here be any less consistent with the rest ... ? ;-)
[...]
Best regards, Marek Vasut

Hi Marek,
On Monday, March 31, 2014 at 10:48:48 AM, Lukasz Majewski wrote:
From: Przemyslaw Marczak p.marczak@samsung.com
Thanks to this multiple calls of function dfu_config_entities() give continuous dfu alt numbering until call dfu_free_entities().
This allows to store dfu entities in multiple env variables.
Change-Id: Ibca7e785bfca9f53b64d3dff0490185b06841b54 Signed-off-by: Przemyslaw Marczak p.marczak@samsung.com
drivers/dfu/dfu.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/dfu/dfu.c b/drivers/dfu/dfu.c index f94c412..e15f959 100644 --- a/drivers/dfu/dfu.c +++ b/drivers/dfu/dfu.c @@ -19,6 +19,7 @@ static bool dfu_reset_request; static LIST_HEAD(dfu_list); static int dfu_alt_num; +static int alt_num_count;
Can the variable name here be any less consistent with the rest ... ? ;-)
I think, that something like dfu_alt_num_cnt would fit better there.
[...]
Best regards, Marek Vasut

Hello, This code was applied to u-boot-samsung few weeks ago.
On 03/31/2014 11:15 AM, Lukasz Majewski wrote:
Hi Marek,
On Monday, March 31, 2014 at 10:48:48 AM, Lukasz Majewski wrote:
From: Przemyslaw Marczak p.marczak@samsung.com
Thanks to this multiple calls of function dfu_config_entities() give continuous dfu alt numbering until call dfu_free_entities().
This allows to store dfu entities in multiple env variables.
Change-Id: Ibca7e785bfca9f53b64d3dff0490185b06841b54 Signed-off-by: Przemyslaw Marczak p.marczak@samsung.com
drivers/dfu/dfu.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/dfu/dfu.c b/drivers/dfu/dfu.c index f94c412..e15f959 100644 --- a/drivers/dfu/dfu.c +++ b/drivers/dfu/dfu.c @@ -19,6 +19,7 @@ static bool dfu_reset_request; static LIST_HEAD(dfu_list); static int dfu_alt_num; +static int alt_num_count;
Can the variable name here be any less consistent with the rest ... ? ;-)
I think, that something like dfu_alt_num_cnt would fit better there.
[...]
Best regards, Marek Vasut
Thanks

On Tuesday, April 01, 2014 at 08:47:05 AM, Przemyslaw Marczak wrote:
Hello, This code was applied to u-boot-samsung few weeks ago.
Please do NOT top-post. Why did it arrive in the ML yesterday ?
On 03/31/2014 11:15 AM, Lukasz Majewski wrote:
Hi Marek,
On Monday, March 31, 2014 at 10:48:48 AM, Lukasz Majewski wrote:
From: Przemyslaw Marczak p.marczak@samsung.com
Thanks to this multiple calls of function dfu_config_entities() give continuous dfu alt numbering until call dfu_free_entities().
This allows to store dfu entities in multiple env variables.
Change-Id: Ibca7e785bfca9f53b64d3dff0490185b06841b54 Signed-off-by: Przemyslaw Marczak p.marczak@samsung.com
drivers/dfu/dfu.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/dfu/dfu.c b/drivers/dfu/dfu.c index f94c412..e15f959 100644 --- a/drivers/dfu/dfu.c +++ b/drivers/dfu/dfu.c @@ -19,6 +19,7 @@
static bool dfu_reset_request; static LIST_HEAD(dfu_list); static int dfu_alt_num;
+static int alt_num_count;
Can the variable name here be any less consistent with the rest ... ? ;-)
I think, that something like dfu_alt_num_cnt would fit better there.
[...]
Best regards, Marek Vasut
Thanks
Best regards, Marek Vasut

Hi Marek,
On Tuesday, April 01, 2014 at 08:47:05 AM, Przemyslaw Marczak wrote:
Hello, This code was applied to u-boot-samsung few weeks ago.
Please do NOT top-post. Why did it arrive in the ML yesterday ?
It was mine over-zeal :-). Sorry for confusion.
On 03/31/2014 11:15 AM, Lukasz Majewski wrote:
Hi Marek,
On Monday, March 31, 2014 at 10:48:48 AM, Lukasz Majewski wrote:
From: Przemyslaw Marczak p.marczak@samsung.com
Thanks to this multiple calls of function dfu_config_entities() give continuous dfu alt numbering until call dfu_free_entities().
This allows to store dfu entities in multiple env variables.
Change-Id: Ibca7e785bfca9f53b64d3dff0490185b06841b54 Signed-off-by: Przemyslaw Marczak p.marczak@samsung.com
drivers/dfu/dfu.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/dfu/dfu.c b/drivers/dfu/dfu.c index f94c412..e15f959 100644 --- a/drivers/dfu/dfu.c +++ b/drivers/dfu/dfu.c @@ -19,6 +19,7 @@
static bool dfu_reset_request; static LIST_HEAD(dfu_list); static int dfu_alt_num;
+static int alt_num_count;
Can the variable name here be any less consistent with the rest ... ? ;-)
I think, that something like dfu_alt_num_cnt would fit better there.
[...]
Best regards, Marek Vasut
Thanks
Best regards, Marek Vasut

Up till now the CRC32 of received data was calculated unconditionally. The standard crc32 implementation causes long delays when large images were uploaded.
The "dfu_checksum_method" environment variable gives the opportunity to enable on demand (when e.g. debugging) the crc32 calculation. It can be done without need to recompile the u-boot binary.
By default the crc32 is not calculated.
Tests results: 400 MiB ums.img file With crc32 calculation: 65 sec [avg 6.29 MB/s] Without crc32 calculation: 25 sec [avg 16.17 MB/s]
Signed-off-by: Lukasz Majewski l.majewski@samsung.com --- drivers/dfu/dfu.c | 34 ++++++++++++++++++++++++++++++---- include/dfu.h | 5 +++++ 2 files changed, 35 insertions(+), 4 deletions(-)
diff --git a/drivers/dfu/dfu.c b/drivers/dfu/dfu.c index e15f959..5d50b47 100644 --- a/drivers/dfu/dfu.c +++ b/drivers/dfu/dfu.c @@ -20,6 +20,7 @@ static bool dfu_reset_request; static LIST_HEAD(dfu_list); static int dfu_alt_num; static int alt_num_count; +static int dfu_checksum_method;
bool dfu_reset(void) { @@ -99,6 +100,23 @@ unsigned char *dfu_get_buf(void) return dfu_buf; }
+static int dfu_get_checksum_method(void) +{ + char *s; + + s = getenv("dfu_checksum_method"); + if (!s) + return DFU_NO_CHECKSUM; + + if (!strcmp(s, "crc32")) { + debug("%s: DFU checksum method: %s\n", __func__, s); + return DFU_CRC32; + } else { + error("DFU checksum method: %s not supported!\n", s); + return -EINVAL; + } +} + static int dfu_write_buffer_drain(struct dfu_entity *dfu) { long w_size; @@ -109,8 +127,8 @@ static int dfu_write_buffer_drain(struct dfu_entity *dfu) if (w_size == 0) return 0;
- /* update CRC32 */ - dfu->crc = crc32(dfu->crc, dfu->i_buf_start, w_size); + if (dfu_checksum_method == DFU_CRC32) + dfu->crc = crc32(dfu->crc, dfu->i_buf_start, w_size);
ret = dfu->write_medium(dfu, dfu->offset, dfu->i_buf_start, &w_size); if (ret) @@ -234,7 +252,8 @@ static int dfu_read_buffer_fill(struct dfu_entity *dfu, void *buf, int size) /* consume */ if (chunk > 0) { memcpy(buf, dfu->i_buf, chunk); - dfu->crc = crc32(dfu->crc, buf, chunk); + if (dfu_checksum_method == DFU_CRC32) + dfu->crc = crc32(dfu->crc, buf, chunk); dfu->i_buf += chunk; dfu->b_left -= chunk; dfu->r_left -= chunk; @@ -318,7 +337,9 @@ int dfu_read(struct dfu_entity *dfu, void *buf, int size, int blk_seq_num) }
if (ret < size) { - debug("%s: %s CRC32: 0x%x\n", __func__, dfu->name, dfu->crc); + if (dfu_checksum_method == DFU_CRC32) + debug("%s: %s CRC32: 0x%x\n", __func__, dfu->name, + dfu->crc); puts("\nUPLOAD ... done\nCtrl+C to exit ...\n");
dfu_free_buf(); @@ -393,6 +414,11 @@ int dfu_config_entities(char *env, char *interface, int num) dfu_alt_num = dfu_find_alt_num(env); debug("%s: dfu_alt_num=%d\n", __func__, dfu_alt_num);
+ ret = dfu_get_checksum_method(); + if (ret < 0) + return ret; + dfu_checksum_method = ret; + dfu = calloc(sizeof(*dfu), dfu_alt_num); if (!dfu) return -1; diff --git a/include/dfu.h b/include/dfu.h index 751f0fd..855d6dc 100644 --- a/include/dfu.h +++ b/include/dfu.h @@ -37,6 +37,11 @@ enum dfu_op { DFU_OP_WRITE, };
+enum dfu_checksum { + DFU_NO_CHECKSUM = 0, + DFU_CRC32, +}; + #define DFU_NOT_SUPPORTED -1
struct mmc_internal_data {

On Monday, March 31, 2014 at 10:48:49 AM, Lukasz Majewski wrote:
Up till now the CRC32 of received data was calculated unconditionally. The standard crc32 implementation causes long delays when large images were uploaded.
You might want to check common/cmd_hash.c and include/hash.h for the hash_command() call. It does the resolution of the hash algorithm from it's name and you can operate also SHA1 and SHA256 with it. It would be nice if you could just extend it a bit and use that instead of adding another ad-hoc mechanism.
Do you think it'd be possible to reuse it please ?
Best regards, Marek Vasut

Hi Marek,
On Monday, March 31, 2014 at 10:48:49 AM, Lukasz Majewski wrote:
Up till now the CRC32 of received data was calculated unconditionally. The standard crc32 implementation causes long delays when large images were uploaded.
You might want to check common/cmd_hash.c and include/hash.h for the hash_command() call. It does the resolution of the hash algorithm from it's name and you can operate also SHA1 and SHA256 with it. It would be nice if you could just extend it a bit and use that instead of adding another ad-hoc mechanism.
Do you think it'd be possible to reuse it please ?
I think, that crc32 shall be calculated when needed. That is why I've added a dfu_ckecksum_method variable.
With its help it is now possible to use different algorithms for checking - not only crc32 (which in u-boot is the default and painfully slow implementation).
In the future the code: if (dfu_checksum_method == DFU_CRC32) crc32 calculation;
will be changed to:
switch (dfu_checksum_method) { case CRC32: crc32 calculation; break; case SHA1: sha1 calculation; break; case MD5: md5 calculation; break; }
Moreover it is possible to dynamically change the checksum method (between invoking dfu command) via adjusting "dfu_checksum_method" variable.
The default approach is to not calculate anything.
Best regards, Marek Vasut

On Monday, March 31, 2014 at 11:24:31 AM, Lukasz Majewski wrote:
Hi Marek,
On Monday, March 31, 2014 at 10:48:49 AM, Lukasz Majewski wrote:
Up till now the CRC32 of received data was calculated unconditionally. The standard crc32 implementation causes long delays when large images were uploaded.
You might want to check common/cmd_hash.c and include/hash.h for the hash_command() call. It does the resolution of the hash algorithm from it's name and you can operate also SHA1 and SHA256 with it. It would be nice if you could just extend it a bit and use that instead of adding another ad-hoc mechanism.
Do you think it'd be possible to reuse it please ?
I think, that crc32 shall be calculated when needed. That is why I've added a dfu_ckecksum_method variable.
With its help it is now possible to use different algorithms for checking - not only crc32 (which in u-boot is the default and painfully slow implementation).
In the future the code: if (dfu_checksum_method == DFU_CRC32) crc32 calculation;
will be changed to:
switch (dfu_checksum_method) { case CRC32: crc32 calculation; break; case SHA1: sha1 calculation; break; case MD5: md5 calculation; break; }
Moreover it is possible to dynamically change the checksum method (between invoking dfu command) via adjusting "dfu_checksum_method" variable.
The default approach is to not calculate anything.
I get it, but the direct calling of crc32() function can be abstracted already with the hash_command() now. You won't need to the above switch in such case. Also, you can implement a NULL hash algo, which would effectivelly model the case where you want to disable hashing.
Best regards, Marek Vasut

Hi Marek,
On Monday, March 31, 2014 at 11:24:31 AM, Lukasz Majewski wrote:
Hi Marek,
On Monday, March 31, 2014 at 10:48:49 AM, Lukasz Majewski wrote:
Up till now the CRC32 of received data was calculated unconditionally. The standard crc32 implementation causes long delays when large images were uploaded.
You might want to check common/cmd_hash.c and include/hash.h for the hash_command() call. It does the resolution of the hash algorithm from it's name and you can operate also SHA1 and SHA256 with it. It would be nice if you could just extend it a bit and use that instead of adding another ad-hoc mechanism.
Do you think it'd be possible to reuse it please ?
I think, that crc32 shall be calculated when needed. That is why I've added a dfu_ckecksum_method variable.
With its help it is now possible to use different algorithms for checking - not only crc32 (which in u-boot is the default and painfully slow implementation).
In the future the code: if (dfu_checksum_method == DFU_CRC32) crc32 calculation;
will be changed to:
switch (dfu_checksum_method) { case CRC32: crc32 calculation; break; case SHA1: sha1 calculation; break; case MD5: md5 calculation; break; }
Moreover it is possible to dynamically change the checksum method (between invoking dfu command) via adjusting "dfu_checksum_method" variable.
The default approach is to not calculate anything.
I get it, but the direct calling of crc32() function can be abstracted already with the hash_command() now. You won't need to the above switch in such case. Also, you can implement a NULL hash algo, which would effectivelly model the case where you want to disable hashing.
I will look closer to the hash_command() - maybe it would be enough to only call it with a proper parameter.
Thanks for tip.
Best regards, Marek Vasut

Hi Lukasz,
On Mar 31, 2014, at 11:48 AM, Lukasz Majewski wrote:
Up till now the CRC32 of received data was calculated unconditionally. The standard crc32 implementation causes long delays when large images were uploaded.
The "dfu_checksum_method" environment variable gives the opportunity to enable on demand (when e.g. debugging) the crc32 calculation. It can be done without need to recompile the u-boot binary.
By default the crc32 is not calculated.
Tests results: 400 MiB ums.img file With crc32 calculation: 65 sec [avg 6.29 MB/s] Without crc32 calculation: 25 sec [avg 16.17 MB/s]
That's interesting; I'm surprised that there's so much difference. Can we get some info about the environment? I.e. board, whether cache is enabled etc.
The crc table is per byte and I guess lookups maybe expensive.
Regards
-- Pantelis
Signed-off-by: Lukasz Majewski l.majewski@samsung.com
drivers/dfu/dfu.c | 34 ++++++++++++++++++++++++++++++---- include/dfu.h | 5 +++++ 2 files changed, 35 insertions(+), 4 deletions(-)
diff --git a/drivers/dfu/dfu.c b/drivers/dfu/dfu.c index e15f959..5d50b47 100644 --- a/drivers/dfu/dfu.c +++ b/drivers/dfu/dfu.c @@ -20,6 +20,7 @@ static bool dfu_reset_request; static LIST_HEAD(dfu_list); static int dfu_alt_num; static int alt_num_count; +static int dfu_checksum_method;
bool dfu_reset(void) { @@ -99,6 +100,23 @@ unsigned char *dfu_get_buf(void) return dfu_buf; }
+static int dfu_get_checksum_method(void) +{
- char *s;
- s = getenv("dfu_checksum_method");
- if (!s)
return DFU_NO_CHECKSUM;
- if (!strcmp(s, "crc32")) {
debug("%s: DFU checksum method: %s\n", __func__, s);
return DFU_CRC32;
- } else {
error("DFU checksum method: %s not supported!\n", s);
return -EINVAL;
- }
+}
static int dfu_write_buffer_drain(struct dfu_entity *dfu) { long w_size; @@ -109,8 +127,8 @@ static int dfu_write_buffer_drain(struct dfu_entity *dfu) if (w_size == 0) return 0;
- /* update CRC32 */
- dfu->crc = crc32(dfu->crc, dfu->i_buf_start, w_size);
if (dfu_checksum_method == DFU_CRC32)
dfu->crc = crc32(dfu->crc, dfu->i_buf_start, w_size);
ret = dfu->write_medium(dfu, dfu->offset, dfu->i_buf_start, &w_size); if (ret)
@@ -234,7 +252,8 @@ static int dfu_read_buffer_fill(struct dfu_entity *dfu, void *buf, int size) /* consume */ if (chunk > 0) { memcpy(buf, dfu->i_buf, chunk);
dfu->crc = crc32(dfu->crc, buf, chunk);
if (dfu_checksum_method == DFU_CRC32)
dfu->crc = crc32(dfu->crc, buf, chunk); dfu->i_buf += chunk; dfu->b_left -= chunk; dfu->r_left -= chunk;
@@ -318,7 +337,9 @@ int dfu_read(struct dfu_entity *dfu, void *buf, int size, int blk_seq_num) }
if (ret < size) {
debug("%s: %s CRC32: 0x%x\n", __func__, dfu->name, dfu->crc);
if (dfu_checksum_method == DFU_CRC32)
debug("%s: %s CRC32: 0x%x\n", __func__, dfu->name,
dfu->crc);
puts("\nUPLOAD ... done\nCtrl+C to exit ...\n");
dfu_free_buf();
@@ -393,6 +414,11 @@ int dfu_config_entities(char *env, char *interface, int num) dfu_alt_num = dfu_find_alt_num(env); debug("%s: dfu_alt_num=%d\n", __func__, dfu_alt_num);
- ret = dfu_get_checksum_method();
- if (ret < 0)
return ret;
- dfu_checksum_method = ret;
- dfu = calloc(sizeof(*dfu), dfu_alt_num); if (!dfu) return -1;
diff --git a/include/dfu.h b/include/dfu.h index 751f0fd..855d6dc 100644 --- a/include/dfu.h +++ b/include/dfu.h @@ -37,6 +37,11 @@ enum dfu_op { DFU_OP_WRITE, };
+enum dfu_checksum {
- DFU_NO_CHECKSUM = 0,
- DFU_CRC32,
+};
#define DFU_NOT_SUPPORTED -1
struct mmc_internal_data {
1.7.10.4

Hi Pantelis,
Hi Lukasz,
On Mar 31, 2014, at 11:48 AM, Lukasz Majewski wrote:
Up till now the CRC32 of received data was calculated unconditionally. The standard crc32 implementation causes long delays when large images were uploaded.
The "dfu_checksum_method" environment variable gives the opportunity to enable on demand (when e.g. debugging) the crc32 calculation. It can be done without need to recompile the u-boot binary.
By default the crc32 is not calculated.
Tests results: 400 MiB ums.img file With crc32 calculation: 65 sec [avg 6.29 MB/s] Without crc32 calculation: 25 sec [avg 16.17 MB/s]
That's interesting; I'm surprised that there's so much difference. Can we get some info about the environment? I.e. board, whether cache is enabled etc.
Board Exynos4412 - Trats2. Cache L1 enabled, cache L2 disabled.
Crc32 is calculated for 32 MiB chunks of data in the received buffer. I'm using the standard software crc32 u-boot's implementation. It is the same as the one for perl-crc32 debian package.
The crc table is per byte and I guess lookups maybe expensive.
It seems so. Moreover the Exynos4412 has HW crypto engine which supports SHA1, MD5 and other algorithms. Unfortunately it doesn't provide speedup for crc32.
Regards
-- Pantelis
Signed-off-by: Lukasz Majewski l.majewski@samsung.com
drivers/dfu/dfu.c | 34 ++++++++++++++++++++++++++++++---- include/dfu.h | 5 +++++ 2 files changed, 35 insertions(+), 4 deletions(-)
diff --git a/drivers/dfu/dfu.c b/drivers/dfu/dfu.c index e15f959..5d50b47 100644 --- a/drivers/dfu/dfu.c +++ b/drivers/dfu/dfu.c @@ -20,6 +20,7 @@ static bool dfu_reset_request; static LIST_HEAD(dfu_list); static int dfu_alt_num; static int alt_num_count; +static int dfu_checksum_method;
bool dfu_reset(void) { @@ -99,6 +100,23 @@ unsigned char *dfu_get_buf(void) return dfu_buf; }
+static int dfu_get_checksum_method(void) +{
- char *s;
- s = getenv("dfu_checksum_method");
- if (!s)
return DFU_NO_CHECKSUM;
- if (!strcmp(s, "crc32")) {
debug("%s: DFU checksum method: %s\n", __func__,
s);
return DFU_CRC32;
- } else {
error("DFU checksum method: %s not supported!\n",
s);
return -EINVAL;
- }
+}
static int dfu_write_buffer_drain(struct dfu_entity *dfu) { long w_size; @@ -109,8 +127,8 @@ static int dfu_write_buffer_drain(struct dfu_entity *dfu) if (w_size == 0) return 0;
- /* update CRC32 */
- dfu->crc = crc32(dfu->crc, dfu->i_buf_start, w_size);
- if (dfu_checksum_method == DFU_CRC32)
dfu->crc = crc32(dfu->crc, dfu->i_buf_start,
w_size);
ret = dfu->write_medium(dfu, dfu->offset, dfu->i_buf_start, &w_size); if (ret) @@ -234,7 +252,8 @@ static int dfu_read_buffer_fill(struct dfu_entity *dfu, void *buf, int size) /* consume */ if (chunk > 0) { memcpy(buf, dfu->i_buf, chunk);
dfu->crc = crc32(dfu->crc, buf, chunk);
if (dfu_checksum_method == DFU_CRC32)
dfu->crc = crc32(dfu->crc, buf,
chunk); dfu->i_buf += chunk; dfu->b_left -= chunk; dfu->r_left -= chunk; @@ -318,7 +337,9 @@ int dfu_read(struct dfu_entity *dfu, void *buf, int size, int blk_seq_num) }
if (ret < size) {
debug("%s: %s CRC32: 0x%x\n", __func__, dfu->name,
dfu->crc);
if (dfu_checksum_method == DFU_CRC32)
debug("%s: %s CRC32: 0x%x\n", __func__,
dfu->name,
dfu->crc);
puts("\nUPLOAD ... done\nCtrl+C to exit ...\n");
dfu_free_buf();
@@ -393,6 +414,11 @@ int dfu_config_entities(char *env, char *interface, int num) dfu_alt_num = dfu_find_alt_num(env); debug("%s: dfu_alt_num=%d\n", __func__, dfu_alt_num);
- ret = dfu_get_checksum_method();
- if (ret < 0)
return ret;
- dfu_checksum_method = ret;
- dfu = calloc(sizeof(*dfu), dfu_alt_num); if (!dfu) return -1;
diff --git a/include/dfu.h b/include/dfu.h index 751f0fd..855d6dc 100644 --- a/include/dfu.h +++ b/include/dfu.h @@ -37,6 +37,11 @@ enum dfu_op { DFU_OP_WRITE, };
+enum dfu_checksum {
- DFU_NO_CHECKSUM = 0,
- DFU_CRC32,
+};
#define DFU_NOT_SUPPORTED -1
struct mmc_internal_data {
1.7.10.4

Hi Lukasz,
On Mar 31, 2014, at 3:04 PM, Lukasz Majewski wrote:
Hi Pantelis,
Hi Lukasz,
On Mar 31, 2014, at 11:48 AM, Lukasz Majewski wrote:
Up till now the CRC32 of received data was calculated unconditionally. The standard crc32 implementation causes long delays when large images were uploaded.
The "dfu_checksum_method" environment variable gives the opportunity to enable on demand (when e.g. debugging) the crc32 calculation. It can be done without need to recompile the u-boot binary.
By default the crc32 is not calculated.
Tests results: 400 MiB ums.img file With crc32 calculation: 65 sec [avg 6.29 MB/s] Without crc32 calculation: 25 sec [avg 16.17 MB/s]
That's interesting; I'm surprised that there's so much difference. Can we get some info about the environment? I.e. board, whether cache is enabled etc.
Board Exynos4412 - Trats2. Cache L1 enabled, cache L2 disabled.
Crc32 is calculated for 32 MiB chunks of data in the received buffer. I'm using the standard software crc32 u-boot's implementation. It is the same as the one for perl-crc32 debian package.
Thanks for the report. Would it be too much to ask for the time it takes to do a crc32 of the same image on user-space after boot?
I'm interested in the effect the disabling of L2 has.
The crc table is per byte and I guess lookups maybe expensive.
It seems so. Moreover the Exynos4412 has HW crypto engine which supports SHA1, MD5 and other algorithms. Unfortunately it doesn't provide speedup for crc32.
You could do a basic tradeoff of speed vs memory by creating larger tables but it might not work if we're cache trashing.
Or even try using CRC with smaller tables (i.e. 4 bits) which would not affect the cache much.
Regards
-- Pantelis
Regards
-- Pantelis
Signed-off-by: Lukasz Majewski l.majewski@samsung.com
drivers/dfu/dfu.c | 34 ++++++++++++++++++++++++++++++---- include/dfu.h | 5 +++++ 2 files changed, 35 insertions(+), 4 deletions(-)
diff --git a/drivers/dfu/dfu.c b/drivers/dfu/dfu.c index e15f959..5d50b47 100644 --- a/drivers/dfu/dfu.c +++ b/drivers/dfu/dfu.c @@ -20,6 +20,7 @@ static bool dfu_reset_request; static LIST_HEAD(dfu_list); static int dfu_alt_num; static int alt_num_count; +static int dfu_checksum_method;
bool dfu_reset(void) { @@ -99,6 +100,23 @@ unsigned char *dfu_get_buf(void) return dfu_buf; }
+static int dfu_get_checksum_method(void) +{
- char *s;
- s = getenv("dfu_checksum_method");
- if (!s)
return DFU_NO_CHECKSUM;
- if (!strcmp(s, "crc32")) {
debug("%s: DFU checksum method: %s\n", __func__,
s);
return DFU_CRC32;
- } else {
error("DFU checksum method: %s not supported!\n",
s);
return -EINVAL;
- }
+}
static int dfu_write_buffer_drain(struct dfu_entity *dfu) { long w_size; @@ -109,8 +127,8 @@ static int dfu_write_buffer_drain(struct dfu_entity *dfu) if (w_size == 0) return 0;
- /* update CRC32 */
- dfu->crc = crc32(dfu->crc, dfu->i_buf_start, w_size);
- if (dfu_checksum_method == DFU_CRC32)
dfu->crc = crc32(dfu->crc, dfu->i_buf_start,
w_size);
ret = dfu->write_medium(dfu, dfu->offset, dfu->i_buf_start, &w_size); if (ret) @@ -234,7 +252,8 @@ static int dfu_read_buffer_fill(struct dfu_entity *dfu, void *buf, int size) /* consume */ if (chunk > 0) { memcpy(buf, dfu->i_buf, chunk);
dfu->crc = crc32(dfu->crc, buf, chunk);
if (dfu_checksum_method == DFU_CRC32)
dfu->crc = crc32(dfu->crc, buf,
chunk); dfu->i_buf += chunk; dfu->b_left -= chunk; dfu->r_left -= chunk; @@ -318,7 +337,9 @@ int dfu_read(struct dfu_entity *dfu, void *buf, int size, int blk_seq_num) }
if (ret < size) {
debug("%s: %s CRC32: 0x%x\n", __func__, dfu->name,
dfu->crc);
if (dfu_checksum_method == DFU_CRC32)
debug("%s: %s CRC32: 0x%x\n", __func__,
dfu->name,
dfu->crc);
puts("\nUPLOAD ... done\nCtrl+C to exit ...\n");
dfu_free_buf();
@@ -393,6 +414,11 @@ int dfu_config_entities(char *env, char *interface, int num) dfu_alt_num = dfu_find_alt_num(env); debug("%s: dfu_alt_num=%d\n", __func__, dfu_alt_num);
- ret = dfu_get_checksum_method();
- if (ret < 0)
return ret;
- dfu_checksum_method = ret;
- dfu = calloc(sizeof(*dfu), dfu_alt_num); if (!dfu) return -1;
diff --git a/include/dfu.h b/include/dfu.h index 751f0fd..855d6dc 100644 --- a/include/dfu.h +++ b/include/dfu.h @@ -37,6 +37,11 @@ enum dfu_op { DFU_OP_WRITE, };
+enum dfu_checksum {
- DFU_NO_CHECKSUM = 0,
- DFU_CRC32,
+};
#define DFU_NOT_SUPPORTED -1
struct mmc_internal_data {
1.7.10.4
-- Best regards,
Lukasz Majewski
Samsung R&D Institute Poland (SRPOL) | Linux Platform Group

Hi Lukasz,
Hmm, looking at the code the crc is updated when draining the buffer. That means that in essence you're working with cache-cold data.
Can you try performing the crc32 right in the dfu_write() function just after the memcpy?
Regards
-- Pantelis
On Mar 31, 2014, at 3:04 PM, Lukasz Majewski wrote:
Hi Pantelis,
Hi Lukasz,
On Mar 31, 2014, at 11:48 AM, Lukasz Majewski wrote:
Up till now the CRC32 of received data was calculated unconditionally. The standard crc32 implementation causes long delays when large images were uploaded.
The "dfu_checksum_method" environment variable gives the opportunity to enable on demand (when e.g. debugging) the crc32 calculation. It can be done without need to recompile the u-boot binary.
By default the crc32 is not calculated.
Tests results: 400 MiB ums.img file With crc32 calculation: 65 sec [avg 6.29 MB/s] Without crc32 calculation: 25 sec [avg 16.17 MB/s]
That's interesting; I'm surprised that there's so much difference. Can we get some info about the environment? I.e. board, whether cache is enabled etc.
Board Exynos4412 - Trats2. Cache L1 enabled, cache L2 disabled.
Crc32 is calculated for 32 MiB chunks of data in the received buffer. I'm using the standard software crc32 u-boot's implementation. It is the same as the one for perl-crc32 debian package.
The crc table is per byte and I guess lookups maybe expensive.
It seems so. Moreover the Exynos4412 has HW crypto engine which supports SHA1, MD5 and other algorithms. Unfortunately it doesn't provide speedup for crc32.
Regards
-- Pantelis
Signed-off-by: Lukasz Majewski l.majewski@samsung.com
drivers/dfu/dfu.c | 34 ++++++++++++++++++++++++++++++---- include/dfu.h | 5 +++++ 2 files changed, 35 insertions(+), 4 deletions(-)
diff --git a/drivers/dfu/dfu.c b/drivers/dfu/dfu.c index e15f959..5d50b47 100644 --- a/drivers/dfu/dfu.c +++ b/drivers/dfu/dfu.c @@ -20,6 +20,7 @@ static bool dfu_reset_request; static LIST_HEAD(dfu_list); static int dfu_alt_num; static int alt_num_count; +static int dfu_checksum_method;
bool dfu_reset(void) { @@ -99,6 +100,23 @@ unsigned char *dfu_get_buf(void) return dfu_buf; }
+static int dfu_get_checksum_method(void) +{
- char *s;
- s = getenv("dfu_checksum_method");
- if (!s)
return DFU_NO_CHECKSUM;
- if (!strcmp(s, "crc32")) {
debug("%s: DFU checksum method: %s\n", __func__,
s);
return DFU_CRC32;
- } else {
error("DFU checksum method: %s not supported!\n",
s);
return -EINVAL;
- }
+}
static int dfu_write_buffer_drain(struct dfu_entity *dfu) { long w_size; @@ -109,8 +127,8 @@ static int dfu_write_buffer_drain(struct dfu_entity *dfu) if (w_size == 0) return 0;
- /* update CRC32 */
- dfu->crc = crc32(dfu->crc, dfu->i_buf_start, w_size);
- if (dfu_checksum_method == DFU_CRC32)
dfu->crc = crc32(dfu->crc, dfu->i_buf_start,
w_size);
ret = dfu->write_medium(dfu, dfu->offset, dfu->i_buf_start, &w_size); if (ret) @@ -234,7 +252,8 @@ static int dfu_read_buffer_fill(struct dfu_entity *dfu, void *buf, int size) /* consume */ if (chunk > 0) { memcpy(buf, dfu->i_buf, chunk);
dfu->crc = crc32(dfu->crc, buf, chunk);
if (dfu_checksum_method == DFU_CRC32)
dfu->crc = crc32(dfu->crc, buf,
chunk); dfu->i_buf += chunk; dfu->b_left -= chunk; dfu->r_left -= chunk; @@ -318,7 +337,9 @@ int dfu_read(struct dfu_entity *dfu, void *buf, int size, int blk_seq_num) }
if (ret < size) {
debug("%s: %s CRC32: 0x%x\n", __func__, dfu->name,
dfu->crc);
if (dfu_checksum_method == DFU_CRC32)
debug("%s: %s CRC32: 0x%x\n", __func__,
dfu->name,
dfu->crc);
puts("\nUPLOAD ... done\nCtrl+C to exit ...\n");
dfu_free_buf();
@@ -393,6 +414,11 @@ int dfu_config_entities(char *env, char *interface, int num) dfu_alt_num = dfu_find_alt_num(env); debug("%s: dfu_alt_num=%d\n", __func__, dfu_alt_num);
- ret = dfu_get_checksum_method();
- if (ret < 0)
return ret;
- dfu_checksum_method = ret;
- dfu = calloc(sizeof(*dfu), dfu_alt_num); if (!dfu) return -1;
diff --git a/include/dfu.h b/include/dfu.h index 751f0fd..855d6dc 100644 --- a/include/dfu.h +++ b/include/dfu.h @@ -37,6 +37,11 @@ enum dfu_op { DFU_OP_WRITE, };
+enum dfu_checksum {
- DFU_NO_CHECKSUM = 0,
- DFU_CRC32,
+};
#define DFU_NOT_SUPPORTED -1
struct mmc_internal_data {
1.7.10.4
-- Best regards,
Lukasz Majewski
Samsung R&D Institute Poland (SRPOL) | Linux Platform Group

On Mon, Mar 31, 2014 at 10:48:49AM +0200, Lukasz Majewski wrote:
Up till now the CRC32 of received data was calculated unconditionally. The standard crc32 implementation causes long delays when large images were uploaded.
The "dfu_checksum_method" environment variable gives the opportunity to enable on demand (when e.g. debugging) the crc32 calculation. It can be done without need to recompile the u-boot binary.
By default the crc32 is not calculated.
Tests results: 400 MiB ums.img file With crc32 calculation: 65 sec [avg 6.29 MB/s] Without crc32 calculation: 25 sec [avg 16.17 MB/s]
Signed-off-by: Lukasz Majewski l.majewski@samsung.com
OK, so, protocol question. What's going on in the background here such that it's a good and safe idea to not do this checksum and we won't end up in the case where data was corrupted and we just bricked a board in update mode?

On Monday, March 31, 2014 at 08:05:17 PM, Tom Rini wrote:
On Mon, Mar 31, 2014 at 10:48:49AM +0200, Lukasz Majewski wrote:
Up till now the CRC32 of received data was calculated unconditionally. The standard crc32 implementation causes long delays when large images were uploaded.
The "dfu_checksum_method" environment variable gives the opportunity to enable on demand (when e.g. debugging) the crc32 calculation. It can be done without need to recompile the u-boot binary.
By default the crc32 is not calculated.
Tests results: 400 MiB ums.img file With crc32 calculation: 65 sec [avg 6.29 MB/s] Without crc32 calculation: 25 sec [avg 16.17 MB/s]
Signed-off-by: Lukasz Majewski l.majewski@samsung.com
OK, so, protocol question. What's going on in the background here such that it's a good and safe idea to not do this checksum and we won't end up in the case where data was corrupted and we just bricked a board in update mode?
This is just a convenience measure for people who do a lot of updates and thus the time matters, it's not a production-feature.
Best regards, Marek Vasut

On Mon, Mar 31, 2014 at 08:15:41PM +0200, Marek Vasut wrote:
On Monday, March 31, 2014 at 08:05:17 PM, Tom Rini wrote:
On Mon, Mar 31, 2014 at 10:48:49AM +0200, Lukasz Majewski wrote:
Up till now the CRC32 of received data was calculated unconditionally. The standard crc32 implementation causes long delays when large images were uploaded.
The "dfu_checksum_method" environment variable gives the opportunity to enable on demand (when e.g. debugging) the crc32 calculation. It can be done without need to recompile the u-boot binary.
By default the crc32 is not calculated.
Tests results: 400 MiB ums.img file With crc32 calculation: 65 sec [avg 6.29 MB/s] Without crc32 calculation: 25 sec [avg 16.17 MB/s]
Signed-off-by: Lukasz Majewski l.majewski@samsung.com
OK, so, protocol question. What's going on in the background here such that it's a good and safe idea to not do this checksum and we won't end up in the case where data was corrupted and we just bricked a board in update mode?
This is just a convenience measure for people who do a lot of updates and thus the time matters, it's not a production-feature.
That's what I figured. So we've got the default backwards.

On Mon, 31 Mar 2014 14:05:17 -0400 Tom Rini trini@ti.com wrote:
On Mon, Mar 31, 2014 at 10:48:49AM +0200, Lukasz Majewski wrote:
Up till now the CRC32 of received data was calculated unconditionally. The standard crc32 implementation causes long delays when large images were uploaded.
The "dfu_checksum_method" environment variable gives the opportunity to enable on demand (when e.g. debugging) the crc32 calculation. It can be done without need to recompile the u-boot binary.
By default the crc32 is not calculated.
Tests results: 400 MiB ums.img file With crc32 calculation: 65 sec [avg 6.29 MB/s] Without crc32 calculation: 25 sec [avg 16.17 MB/s]
Signed-off-by: Lukasz Majewski l.majewski@samsung.com
OK, so, protocol question.
The DFU 1.1 standard in its appendinx B specifies the DFU suffix. It has the crc32 for the whole file, vendorID, device ID and other handy fields.
Unfortunately, this part of the standard is not supported neither at dfu implementation in u-boot nor dfu-util (v.0.5 - debian).
It would be handy for small files (like bootloaders, kernels) where we download all the data at once. For critical files it should be definitely implemented. From my glimpse observation the dfu-util would require some extension to support the DFU suffix (Tormod, please correct me if I'm wrong).
For large files (400 MiB in this case) it is useless since we store data as it goes (with 32 MiB chunks). Also, as we send the large files we experience the biggest performance penalty from CRC32 calculation. It takes considerable time and in my opinion now serves only for debug purposes, to provide the final CRC for comparison with original one, even though the file is already on flash.
When we use CRC in such a way, we should be able to decide which tool (algorithm) use for debug. SHA1, MD5, etc are widely available on each linux box. To have the same crc32 algorithm, which is in u-boot, implemented as linux command line tool you need to search a bit (libarchive-zip-perl package for debian).
I think that we can improve the crc32 performance with calculating it for smaller chunks, which already fits L1 cache. Now they are calculated for 32 MiB.
What's going on in the background here such that it's a good and safe idea to not do this checksum and we won't end up in the case where data was corrupted and we just bricked a board in update mode?
Now we rely solely on testing. Downloading file, checking CRC and compare with original. I also have some automated tests, which utilize MD5.
TO SUM UP:
1. Handling of the DFU suffix shall be implemented and utilized in both u-boot and dfu-util with critical files (bootloaders, kernel).
2. There should be freedom to use different checksum algorithms for providing debugging information.
3. The current CRC32 calculation at DFU should be optimized.
Best regards, Lukasz Majewski

-----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1
On 03/31/2014 04:44 PM, Lukasz Majewski wrote:
On Mon, 31 Mar 2014 14:05:17 -0400 Tom Rini trini@ti.com wrote:
On Mon, Mar 31, 2014 at 10:48:49AM +0200, Lukasz Majewski wrote:
Up till now the CRC32 of received data was calculated unconditionally. The standard crc32 implementation causes long delays when large images were uploaded.
The "dfu_checksum_method" environment variable gives the opportunity to enable on demand (when e.g. debugging) the crc32 calculation. It can be done without need to recompile the u-boot binary.
By default the crc32 is not calculated.
Tests results: 400 MiB ums.img file With crc32 calculation: 65 sec [avg 6.29 MB/s] Without crc32 calculation: 25 sec [avg 16.17 MB/s]
Signed-off-by: Lukasz Majewski l.majewski@samsung.com
OK, so, protocol question.
The DFU 1.1 standard in its appendinx B specifies the DFU suffix. It has the crc32 for the whole file, vendorID, device ID and other handy fields.
Unfortunately, this part of the standard is not supported neither at dfu implementation in u-boot nor dfu-util (v.0.5 - debian).
OK, so this is the important part. We're doing a crc32 on stuff that's not required by spec, just handy for verification, manually.
[snip]
When we use CRC in such a way, we should be able to decide which tool (algorithm) use for debug. SHA1, MD5, etc are widely available on each linux box. To have the same crc32 algorithm, which is in u-boot, implemented as linux command line tool you need to search a bit (libarchive-zip-perl package for debian).
I take your point, but I use rhash -C to get matching crc32 and it was the first thing I stumbled upon when going "oh right, what does crc32?".
[snip]
TO SUM UP:
- Handling of the DFU suffix shall be implemented and utilized in both
u-boot and dfu-util with critical files (bootloaders, kernel).
- There should be freedom to use different checksum algorithms for
providing debugging information.
- The current CRC32 calculation at DFU should be optimized.
Well, is there work being done on the dfu-util side currently or planned in the near future to do #1 per the spec?
But, with that, I'm happier to default to no crc32'ing the data for today.
- -- Tom

Hi Tom,
-----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1
On 03/31/2014 04:44 PM, Lukasz Majewski wrote:
On Mon, 31 Mar 2014 14:05:17 -0400 Tom Rini trini@ti.com wrote:
On Mon, Mar 31, 2014 at 10:48:49AM +0200, Lukasz Majewski wrote:
Up till now the CRC32 of received data was calculated unconditionally. The standard crc32 implementation causes long delays when large images were uploaded.
The "dfu_checksum_method" environment variable gives the opportunity to enable on demand (when e.g. debugging) the crc32 calculation. It can be done without need to recompile the u-boot binary.
By default the crc32 is not calculated.
Tests results: 400 MiB ums.img file With crc32 calculation: 65 sec [avg 6.29 MB/s] Without crc32 calculation: 25 sec [avg 16.17 MB/s]
Signed-off-by: Lukasz Majewski l.majewski@samsung.com
OK, so, protocol question.
The DFU 1.1 standard in its appendinx B specifies the DFU suffix. It has the crc32 for the whole file, vendorID, device ID and other handy fields.
Unfortunately, this part of the standard is not supported neither at dfu implementation in u-boot nor dfu-util (v.0.5 - debian).
OK, so this is the important part. We're doing a crc32 on stuff that's not required by spec, just handy for verification, manually.
Yes, indeed.
[snip]
When we use CRC in such a way, we should be able to decide which tool (algorithm) use for debug. SHA1, MD5, etc are widely available on each linux box. To have the same crc32 algorithm, which is in u-boot, implemented as linux command line tool you need to search a bit (libarchive-zip-perl package for debian).
I take your point, but I use rhash -C to get matching crc32 and it was the first thing I stumbled upon when going "oh right, what does crc32?".
[snip]
TO SUM UP:
- Handling of the DFU suffix shall be implemented and utilized in
both u-boot and dfu-util with critical files (bootloaders, kernel).
- There should be freedom to use different checksum algorithms for
providing debugging information.
- The current CRC32 calculation at DFU should be optimized.
Well, is there work being done on the dfu-util side currently or planned in the near future to do #1 per the spec?
As Tormod has written, dfu-util supports it for initial setting, but is sending data with this information stripped. Hence u-boot shall not calculate CRC32 unless we plan to add a customized prefix for crucial binaries.
But, with that, I'm happier to default to no crc32'ing the data for today.
Thanks for your opinion. We share similar view on the issue.
Tom -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.11 (GNU/Linux) Comment: Using GnuPG with Thunderbird - http://www.enigmail.net/
iQIcBAEBAgAGBQJTOdhmAAoJENk4IS6UOR1WDsYP/2eg3v+K5nCUZ22eYnrY4s14 f8KUan8My7Ifr/to9qbAIFsSuw5mlLPvYy5JNnrbmmipDH2bQIO20R1t94/Mm8Ut Hoj1nbGZ3JvMsoj86D+9pFz2AchVgbpvs+boiJGw2s1TZ3xKoNlJ1O4WJ8ttZRZS 1B3FC50PKYJK6lgWgfvds2AevLIAcF1QyePVsOLVKV2USilFiZ1LVb8qUFp5l6Ja LX3wfQjhPq4gQq8bX7LW6zNbDkXuZjxLlKT/kUxzl2qclpHj4+8rXVVRf1mLaLvU Dvx50V/JCncIivRBhfvK2BoQ/LOntmPwGfO95AY57naP9+nCzzE9vdKv/Ki5vpju /q/CjlXbkS4iwru+91neyMdfeCiiALV2yW0GBORgph7kCpUk343S753epl/MFmAW rOQ0xBjw4q5KeXijtQG5bdevynPkB09soKKhQX5XRe7i8olXe32+khQVwqpomjkG v0YbKvUTuCZ1NNZqEey/zO4gJPR2Tq4QiNFpfFPvcYOqlqoC/C84HfO+M8ocKiUh SUgdaUQI+uP7LSQ7Yv5N149ZD4aWAfsyU5YCtyC0gI9aXRF5YqwWU96eym8PUVM8 amo+srsp1uK5l6XRO0cqtM9cmLedPRNAEKhah4/GgZa3S6lTH3oD7JBEPNyazTsc FpOnIzGk5JJnVpeobQv2 =HfXm -----END PGP SIGNATURE-----

On Mon, Mar 31, 2014 at 10:44 PM, Lukasz Majewski wrote:
The DFU 1.1 standard in its appendinx B specifies the DFU suffix. It has the crc32 for the whole file, vendorID, device ID and other handy fields.
Unfortunately, this part of the standard is not supported neither at dfu implementation in u-boot nor dfu-util (v.0.5 - debian).
It would be handy for small files (like bootloaders, kernels) where we download all the data at once. For critical files it should be definitely implemented. From my glimpse observation the dfu-util would require some extension to support the DFU suffix (Tormod, please correct me if I'm wrong).
You are wrong :) Please don't use what's available in Debian (stable?) as a reference. I don't know what their maintainer is up to. dfu-util supports the DFU suffix according to the DFU standard. That means it checks the CRC after reading the file, and also checks that vendor/product values match, then sends the firmware to the device after stripping off the suffix.
Are you wanting to do some CRC checking at the device side? That would be outside the DFU standard. But you can always put whatever you want in the "firmware", including proprietary headers or suffices. We already support some of those, please see the dfu-util (and dfu-suffx/dfu-prefix) code at dfu-util.gnumonks.org.
Regards, Tormod

Hi Tormod,
On Mon, Mar 31, 2014 at 10:44 PM, Lukasz Majewski wrote:
The DFU 1.1 standard in its appendinx B specifies the DFU suffix. It has the crc32 for the whole file, vendorID, device ID and other handy fields.
Unfortunately, this part of the standard is not supported neither at dfu implementation in u-boot nor dfu-util (v.0.5 - debian).
It would be handy for small files (like bootloaders, kernels) where we download all the data at once. For critical files it should be definitely implemented. From my glimpse observation the dfu-util would require some extension to support the DFU suffix (Tormod, please correct me if I'm wrong).
You are wrong :) Please don't use what's available in Debian (stable?) as a reference.
I'm regarding this as a reference since 80% of developers will download the dfu-util with apt-get on debian.
I don't know what their maintainer is up to. dfu-util supports the DFU suffix according to the DFU standard. That means it checks the CRC after reading the file, and also checks that vendor/product values match, then sends the firmware to the device after stripping off the suffix.
So this means that: 1. The file before reading by dfu-util has to be equipped with suffix. 2. The dfu-util reads it and then if matching sends data (with sufix stripped) to target. This means that we are "protected" from downloading wrong firmware to device, however 3. The target doesn't have any means to check if the downloaded data is consistent - the information about CRC is stripped at dfu-util.
Are you wanting to do some CRC checking at the device side? That would be outside the DFU standard.
I hoped that the suffix is appended by dfu-util and then sent with the binary to target. As a result we would be able to perform some integrity tests.
But you can always put whatever you want in the "firmware", including proprietary headers or suffices.
I think that this would be finally required for updating small (crucial) files - like bootloaders, kernels.
We already support some of those, please see the dfu-util (and dfu-suffx/dfu-prefix) code at dfu-util.gnumonks.org.
Ok, I see. This would probably require extension of ./src/prefix.c file. In this way u-boot community would impose some kind of standard prefix/suffix only for u-boot. It's a pity, that integrity checking is not standardized in the DFU.
Regards, Tormod

Hello.
On Tue, 2014-04-01 at 11:00, Lukasz Majewski wrote:
Hi Tormod,
On Mon, Mar 31, 2014 at 10:44 PM, Lukasz Majewski wrote:
The DFU 1.1 standard in its appendinx B specifies the DFU suffix. It has the crc32 for the whole file, vendorID, device ID and other handy fields.
Unfortunately, this part of the standard is not supported neither at dfu implementation in u-boot nor dfu-util (v.0.5 - debian).
It would be handy for small files (like bootloaders, kernels) where we download all the data at once. For critical files it should be definitely implemented. From my glimpse observation the dfu-util would require some extension to support the DFU suffix (Tormod, please correct me if I'm wrong).
You are wrong :) Please don't use what's available in Debian (stable?) as a reference.
I'm regarding this as a reference since 80% of developers will download the dfu-util with apt-get on debian.
You really believe that 80% of all developers are using Debian? If they ship an old version there is nothing Tormod can do about it. I implemented the dfu suffix feature one or two years ago and made a release after it. If distros are not picking it up you have to fill a bug for them to update.
I don't know what their maintainer is up to. dfu-util supports the DFU suffix according to the DFU standard. That means it checks the CRC after reading the file, and also checks that vendor/product values match, then sends the firmware to the device after stripping off the suffix.
So this means that:
- The file before reading by dfu-util has to be equipped with suffix.
- The dfu-util reads it and then if matching sends data (with sufix
stripped) to target. This means that we are "protected" from downloading wrong firmware to device, however 3. The target doesn't have any means to check if the downloaded data is consistent - the information about CRC is stripped at dfu-util.
Correct. That is how the DFU spec defines it.
Are you wanting to do some CRC checking at the device side? That would be outside the DFU standard.
I hoped that the suffix is appended by dfu-util and then sent with the binary to target. As a result we would be able to perform some integrity tests.
The spec requires to remove it. I also found that odd when implementing it but the spec is clear on this.
But you can always put whatever you want in the "firmware", including proprietary headers or suffices.
I think that this would be finally required for updating small (crucial) files - like bootloaders, kernels.
We already support some of those, please see the dfu-util (and dfu-suffx/dfu-prefix) code at dfu-util.gnumonks.org.
Ok, I see. This would probably require extension of ./src/prefix.c file. In this way u-boot community would impose some kind of standard prefix/suffix only for u-boot. It's a pity, that integrity checking is not standardized in the DFU.
It all depends on how much you want to be compatible with the DFU spec.
regards Stefan Schmidt

Hi Stefan,
Hello.
On Tue, 2014-04-01 at 11:00, Lukasz Majewski wrote:
Hi Tormod,
On Mon, Mar 31, 2014 at 10:44 PM, Lukasz Majewski wrote:
The DFU 1.1 standard in its appendinx B specifies the DFU suffix. It has the crc32 for the whole file, vendorID, device ID and other handy fields.
Unfortunately, this part of the standard is not supported neither at dfu implementation in u-boot nor dfu-util (v.0.5 - debian).
It would be handy for small files (like bootloaders, kernels) where we download all the data at once. For critical files it should be definitely implemented. From my glimpse observation the dfu-util would require some extension to support the DFU suffix (Tormod, please correct me if I'm wrong).
You are wrong :) Please don't use what's available in Debian (stable?) as a reference.
I'm regarding this as a reference since 80% of developers will download the dfu-util with apt-get on debian.
You really believe that 80% of all developers are using Debian?
It seems, that some miscommunication has crept in. What I meant was that majority of developers, who are using deb based distro (debian, ubuntu), would be lazy and use the apt-get/aptitude utility to install dfu-util.
It doesn't mean, that I'm not using the newest dfu-util (I recall that there was some issue with libusb).
If they ship an old version there is nothing Tormod can do about it. I implemented the dfu suffix feature one or two years ago and made a release after it. If distros are not picking it up you have to fill a bug for them to update.
Maybe this is the thing to do.
I don't know what their maintainer is up to. dfu-util supports the DFU suffix according to the DFU standard. That means it checks the CRC after reading the file, and also checks that vendor/product values match, then sends the firmware to the device after stripping off the suffix.
So this means that:
- The file before reading by dfu-util has to be equipped with
suffix. 2. The dfu-util reads it and then if matching sends data (with sufix stripped) to target. This means that we are "protected" from downloading wrong firmware to device, however 3. The target doesn't have any means to check if the downloaded data is consistent - the information about CRC is stripped at dfu-util.
Correct. That is how the DFU spec defines it.
Now it is clear.
Are you wanting to do some CRC checking at the device side? That would be outside the DFU standard.
I hoped that the suffix is appended by dfu-util and then sent with the binary to target. As a result we would be able to perform some integrity tests.
The spec requires to remove it. I also found that odd when implementing it but the spec is clear on this.
But you can always put whatever you want in the "firmware", including proprietary headers or suffices.
I think that this would be finally required for updating small (crucial) files - like bootloaders, kernels.
We already support some of those, please see the dfu-util (and dfu-suffx/dfu-prefix) code at dfu-util.gnumonks.org.
Ok, I see. This would probably require extension of ./src/prefix.c file. In this way u-boot community would impose some kind of standard prefix/suffix only for u-boot. It's a pity, that integrity checking is not standardized in the DFU.
It all depends on how much you want to be compatible with the DFU spec.
I would like to be as close to the standard as possible. Otherwise I could be blamed for breaking compatibility.
regards Stefan Schmidt

Up till now the CRC32 of received data was calculated unconditionally. The standard crc32 implementation causes long delays when large images were uploaded.
The "dfu_checksum_method" environment variable gives the opportunity to enable on demand (when e.g. debugging) the crc32 calculation. It can be done without need to recompile the u-boot binary.
By default the crc32 is not calculated.
Tests results: 400 MiB ums.img file With crc32 calculation: 65 sec [avg 6.29 MB/s] Without crc32 calculation: 25 sec [avg 16.17 MB/s]
Signed-off-by: Lukasz Majewski l.majewski@samsung.com Cc: Marek Vasut marex@denx.de --- Dependency: "lib:crc32: Allow setting of the initial crc32 value" http://patchwork.ozlabs.org/patch/345720/
Changes for v2: - Utilization of hash_block generic function to calculate CRC32 checksum
--- drivers/dfu/dfu.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 47 insertions(+), 6 deletions(-)
diff --git a/drivers/dfu/dfu.c b/drivers/dfu/dfu.c index 51b1026..7705c37 100644 --- a/drivers/dfu/dfu.c +++ b/drivers/dfu/dfu.c @@ -13,6 +13,7 @@ #include <mmc.h> #include <fat.h> #include <dfu.h> +#include <hash.h> #include <linux/list.h> #include <linux/compiler.h>
@@ -20,6 +21,7 @@ static bool dfu_reset_request; static LIST_HEAD(dfu_list); static int dfu_alt_num; static int alt_num_cnt; +static char *dfu_checksum_method;
bool dfu_reset(void) { @@ -99,6 +101,24 @@ unsigned char *dfu_get_buf(void) return dfu_buf; }
+static char *dfu_get_checksum_method(void) +{ + char *s; + + s = getenv("dfu_checksum_method"); + if (!s) + return NULL; + + if (!strcmp(s, "crc32")) { + debug("%s: DFU checksum method: %s\n", __func__, s); + return s; + } + + error("DFU checksum method: %s not supported!\n", s); + + return NULL; +} + static int dfu_write_buffer_drain(struct dfu_entity *dfu) { long w_size; @@ -109,9 +129,16 @@ static int dfu_write_buffer_drain(struct dfu_entity *dfu) if (w_size == 0) return 0;
- /* update CRC32 */ - dfu->crc = crc32(dfu->crc, dfu->i_buf_start, w_size); - + if (dfu_checksum_method) { + hash_block(dfu_checksum_method, dfu->i_buf_start, w_size, + (uint8_t *)&dfu->crc, NULL); + /* + * Call to ntohl() is necessary because of + * hash_block() implementation, which returns + * crc32 in the network order (big endian) + */ + dfu->crc = ntohl(dfu->crc); + } ret = dfu->write_medium(dfu, dfu->offset, dfu->i_buf_start, &w_size); if (ret) debug("%s: Write error!\n", __func__); @@ -134,7 +161,9 @@ int dfu_flush(struct dfu_entity *dfu, void *buf, int size, int blk_seq_num) if (dfu->flush_medium) ret = dfu->flush_medium(dfu);
- printf("\nDFU complete CRC32: 0x%08x\n", dfu->crc); + if (dfu_checksum_method) + printf("\nDFU complete %s: 0x%08x\n", dfu_checksum_method, + dfu->crc);
/* clear everything */ dfu_free_buf(); @@ -234,7 +263,16 @@ static int dfu_read_buffer_fill(struct dfu_entity *dfu, void *buf, int size) /* consume */ if (chunk > 0) { memcpy(buf, dfu->i_buf, chunk); - dfu->crc = crc32(dfu->crc, buf, chunk); + if (dfu_checksum_method) { + hash_block(dfu_checksum_method, buf, chunk, + (uint8_t *)&dfu->crc, NULL); + /* + * Call to ntohl() is necessary because of + * hash_block() implementation, which returns + * crc32 in the network order (big endian) + */ + dfu->crc = ntohl(dfu->crc); + } dfu->i_buf += chunk; dfu->b_left -= chunk; dfu->r_left -= chunk; @@ -318,7 +356,9 @@ int dfu_read(struct dfu_entity *dfu, void *buf, int size, int blk_seq_num) }
if (ret < size) { - debug("%s: %s CRC32: 0x%x\n", __func__, dfu->name, dfu->crc); + if (dfu_checksum_method) + debug("%s: %s %s: 0x%x\n", __func__, dfu->name, + dfu_checksum_method, dfu->crc); puts("\nUPLOAD ... done\nCtrl+C to exit ...\n");
dfu_free_buf(); @@ -393,6 +433,7 @@ int dfu_config_entities(char *env, char *interface, int num) dfu_alt_num = dfu_find_alt_num(env); debug("%s: dfu_alt_num=%d\n", __func__, dfu_alt_num);
+ dfu_checksum_method = dfu_get_checksum_method(); dfu = calloc(sizeof(*dfu), dfu_alt_num); if (!dfu) return -1;

On Monday, May 05, 2014 at 03:16:26 PM, Lukasz Majewski wrote:
Up till now the CRC32 of received data was calculated unconditionally. The standard crc32 implementation causes long delays when large images were uploaded.
The "dfu_checksum_method" environment variable gives the opportunity to enable on demand (when e.g. debugging) the crc32 calculation. It can be done without need to recompile the u-boot binary.
By default the crc32 is not calculated.
Tests results: 400 MiB ums.img file With crc32 calculation: 65 sec [avg 6.29 MB/s] Without crc32 calculation: 25 sec [avg 16.17 MB/s]
Signed-off-by: Lukasz Majewski l.majewski@samsung.com Cc: Marek Vasut marex@denx.de
Acked-by: Marek Vasut marex@denx.de
Best regards, Marek Vasut

Up till now the CRC32 of received data was calculated unconditionally. The standard crc32 implementation causes long delay when large images were uploaded.
The "dfu_hash_algo" environment variable gives the opportunity to enable on demand (when e.g. debugging) the hash (crc32) calculation. It can be done without need to recompile the u-boot binary and reuses the generic hash framework.
By default the crc32 is NOT calculated anymore.
Tests results: 400 MiB ums.img file With crc32 calculation: 65 sec [avg 6.29 MB/s] Without crc32 calculation: 25 sec [avg 16.17 MB/s]
Signed-off-by: Lukasz Majewski l.majewski@samsung.com Cc: Marek Vasut marex@denx.de --- Changes for v2: - Utilization of hash_block generic function to calculate CRC32 checksum Changes for v3: - Remove dependency on altering the lib/hash.c generic implementation - Use of hash_update() function to calculate crc32 in the same way as it was done with crc32 --- drivers/dfu/dfu.c | 47 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 5 deletions(-)
diff --git a/drivers/dfu/dfu.c b/drivers/dfu/dfu.c index 51b1026..c06f4cc 100644 --- a/drivers/dfu/dfu.c +++ b/drivers/dfu/dfu.c @@ -13,6 +13,7 @@ #include <mmc.h> #include <fat.h> #include <dfu.h> +#include <hash.h> #include <linux/list.h> #include <linux/compiler.h>
@@ -20,6 +21,7 @@ static bool dfu_reset_request; static LIST_HEAD(dfu_list); static int dfu_alt_num; static int alt_num_cnt; +static struct hash_algo *dfu_hash_algo;
bool dfu_reset(void) { @@ -99,6 +101,24 @@ unsigned char *dfu_get_buf(void) return dfu_buf; }
+static char *dfu_get_hash_algo(void) +{ + char *s; + + s = getenv("dfu_hash_algo"); + if (!s) + return NULL; + + if (!strcmp(s, "crc32")) { + debug("%s: DFU hash method: %s\n", __func__, s); + return s; + } + + error("DFU hash method: %s not supported!\n", s); + + return NULL; +} + static int dfu_write_buffer_drain(struct dfu_entity *dfu) { long w_size; @@ -109,8 +129,9 @@ static int dfu_write_buffer_drain(struct dfu_entity *dfu) if (w_size == 0) return 0;
- /* update CRC32 */ - dfu->crc = crc32(dfu->crc, dfu->i_buf_start, w_size); + if (dfu_hash_algo) + dfu_hash_algo->hash_update(dfu_hash_algo, &dfu->crc, + dfu->i_buf_start, w_size, 0);
ret = dfu->write_medium(dfu, dfu->offset, dfu->i_buf_start, &w_size); if (ret) @@ -134,7 +155,9 @@ int dfu_flush(struct dfu_entity *dfu, void *buf, int size, int blk_seq_num) if (dfu->flush_medium) ret = dfu->flush_medium(dfu);
- printf("\nDFU complete CRC32: 0x%08x\n", dfu->crc); + if (dfu_hash_algo) + printf("\nDFU complete %s: 0x%08x\n", dfu_hash_algo->name, + dfu->crc);
/* clear everything */ dfu_free_buf(); @@ -234,7 +257,11 @@ static int dfu_read_buffer_fill(struct dfu_entity *dfu, void *buf, int size) /* consume */ if (chunk > 0) { memcpy(buf, dfu->i_buf, chunk); - dfu->crc = crc32(dfu->crc, buf, chunk); + if (dfu_hash_algo) + dfu_hash_algo->hash_update(dfu_hash_algo, + &dfu->crc, buf, + chunk, 0); + dfu->i_buf += chunk; dfu->b_left -= chunk; dfu->r_left -= chunk; @@ -318,7 +345,9 @@ int dfu_read(struct dfu_entity *dfu, void *buf, int size, int blk_seq_num) }
if (ret < size) { - debug("%s: %s CRC32: 0x%x\n", __func__, dfu->name, dfu->crc); + if (dfu_hash_algo) + debug("%s: %s %s: 0x%x\n", __func__, dfu->name, + dfu_hash_algo->name, dfu->crc); puts("\nUPLOAD ... done\nCtrl+C to exit ...\n");
dfu_free_buf(); @@ -393,6 +422,14 @@ int dfu_config_entities(char *env, char *interface, int num) dfu_alt_num = dfu_find_alt_num(env); debug("%s: dfu_alt_num=%d\n", __func__, dfu_alt_num);
+ dfu_hash_algo = NULL; + s = dfu_get_hash_algo(); + if (s) { + ret = hash_lookup_algo(s, &dfu_hash_algo); + if (ret) + error("Hash algorithm %s not supported\n", s); + } + dfu = calloc(sizeof(*dfu), dfu_alt_num); if (!dfu) return -1;

On Thursday, May 08, 2014 at 02:27:47 PM, Lukasz Majewski wrote:
Up till now the CRC32 of received data was calculated unconditionally. The standard crc32 implementation causes long delay when large images were uploaded.
The "dfu_hash_algo" environment variable gives the opportunity to enable on demand (when e.g. debugging) the hash (crc32) calculation. It can be done without need to recompile the u-boot binary and reuses the generic hash framework.
By default the crc32 is NOT calculated anymore.
Tests results: 400 MiB ums.img file With crc32 calculation: 65 sec [avg 6.29 MB/s] Without crc32 calculation: 25 sec [avg 16.17 MB/s]
Signed-off-by: Lukasz Majewski l.majewski@samsung.com Cc: Marek Vasut marex@denx.de
Reviewed-by: Marek Vasut marex@denx.de
Best regards, Marek Vasut

Dear Lukasz Majewski,
In message 1399552067-31208-1-git-send-email-l.majewski@samsung.com you wrote:
Up till now the CRC32 of received data was calculated unconditionally. The standard crc32 implementation causes long delay when large images were uploaded.
The "dfu_hash_algo" environment variable gives the opportunity to enable on demand (when e.g. debugging) the hash (crc32) calculation. It can be done without need to recompile the u-boot binary and reuses the generic hash framework.
By default the crc32 is NOT calculated anymore.
I consider this a VARY BAD idea, as it causes a significant decrease of reliability and robustness of the systems. Please do not do this.
In any case, if you introduce this, the behaviour should be documented, and the default setting should be such as to keep the previous behaviour, i. e. CRC checking should remain on by default. then people who are willing to trade reliability for a little speed can still switch it off, but the unawarerest of the users will not suffer.
Best regards,
Wolfgang Denk

Hi Wolfgang,
Dear Lukasz Majewski,
In message 1399552067-31208-1-git-send-email-l.majewski@samsung.com you wrote:
Up till now the CRC32 of received data was calculated unconditionally. The standard crc32 implementation causes long delay when large images were uploaded.
The "dfu_hash_algo" environment variable gives the opportunity to enable on demand (when e.g. debugging) the hash (crc32) calculation. It can be done without need to recompile the u-boot binary and reuses the generic hash framework.
By default the crc32 is NOT calculated anymore.
I consider this a VARY BAD idea, as it causes a significant decrease of reliability and robustness of the systems. Please do not do this.
I do understand that reliability is very important, but please consider following arguments:
1. Now calculated crc32 is only used for debugging.
For automated tests I use MD5 and compare this value before sending data to target via DFU and after I read it. This testing is done purely on HOST machine.
Please refer to the discussion which we had at previous version of this patch:
http://thread.gmane.org/gmane.comp.boot-loaders.u-boot/183311/focus=183455
Participants have agreed, that we shall optionally enable crc32 (or other algorithm) calculation.
2. The current crc32 implementation is painfully slow (although I have only L1 enabled on my target).
3. With large files (like rootfs images) we store data (to medium) with 32 MiB chunks, which means that when we calculate complete crc32 the image is already written to its final destination.
Of course we could store the rootfs to some "free" space on the eMMC, then calculate crc32 and store it to the final position. This however would take considerable time and require wrapping our binaries to special headers (as described below).
4. This patch also allows some flexibility: by setting the env variable we can decide which algorithm to use (crc32, sha1, etc). It is appealing since we use the hash_* code anyway.
In any case, if you introduce this, the behaviour should be documented, and the default setting should be such as to keep the previous behaviour, i. e. CRC checking should remain on by default. then people who are willing to trade reliability for a little speed
I would not touch the code if the speedup wouldn't be so significant. Reducing flashing time of 400 MiB file from 65 s to 25 s is worth its effort.
can still switch it off, but the unawarerest of the users will not suffer.
As I've stated previously the crc32 in the current dfu implementation is only informative.
To take the full advantage of it, we would need to modify the dfu-util to wrap the sent file to some kind of header or locally write some script to do that. However, this is not specified by the standard and would be u-boot's extension of the DFU.
Even more important issue is that it would work only for small files (like uImage).
Best regards,
Wolfgang Denk

Dear Lukasz,
In message 20140509085203.31133238@amdc2363 you wrote:
For automated tests I use MD5 and compare this value before sending data to target via DFU and after I read it. This testing is done purely on HOST machine.
This is unsufficient. You should always verify the image on the target after the download has completed.
Participants have agreed, that we shall optionally enable crc32 (or other algorithm) calculation.
If this is the default now, it should remain the default.
- The current crc32 implementation is painfully slow (although I have
only L1 enabled on my target).
This is an unrelated problem then, which should excluded from this discussion here.
- With large files (like rootfs images) we store data (to medium) with
32 MiB chunks, which means that when we calculate complete crc32 the image is already written to its final destination.
You can still detect if the download was corrupted, report a proper error and initiate a re-download. This would at least give you a chance to react to corrupted data. Just closing the eyes and hoping no errors will ever happen has always been a bad strategy.
- This patch also allows some flexibility: by setting the env variable
we can decide which algorithm to use (crc32, sha1, etc). It is appealing since we use the hash_* code anyway.
Agreed. This was not my point.
What I complained about is the change in behaviour. I asked to make the existing behaviour the default, so unaware users will not be affected. Only if you intentionally want some other behaviour you can then enable this by setting the env variable.
In any case, if you introduce this, the behaviour should be documented, and the default setting should be such as to keep the previous behaviour, i. e. CRC checking should remain on by default. then people who are willing to trade reliability for a little speed
I would not touch the code if the speedup wouldn't be so significant. Reducing flashing time of 400 MiB file from 65 s to 25 s is worth its effort.
I disagree, if you pay for the speed by reduced reliability, and if you don't even inform the user about this new behaviour.
Also, I feel it might be worth to investigate why the checksumming is slow on your system.
As I've stated previously the crc32 in the current dfu implementation is only informative.
It is pretty useful information, isn't it?
To take the full advantage of it, we would need to modify the dfu-util to wrap the sent file to some kind of header or locally write some script to do that. However, this is not specified by the standard and would be u-boot's extension of the DFU.
Ok, add this to the many deficientcies of DFU :-(
Even more important issue is that it would work only for small files (like uImage).
Why so? Can we not calculate CRC even when the transfer is broken down into several chunks?
Best regards,
Wolfgang Denk

Hi Wolfgang,
Dear Lukasz,
In message 20140509085203.31133238@amdc2363 you wrote:
For automated tests I use MD5 and compare this value before sending data to target via DFU and after I read it. This testing is done purely on HOST machine.
This is unsufficient. You should always verify the image on the target after the download has completed.
I agree.
Participants have agreed, that we shall optionally enable crc32 (or other algorithm) calculation.
If this is the default now, it should remain the default.
- The current crc32 implementation is painfully slow (although I
have only L1 enabled on my target).
This is an unrelated problem then, which should excluded from this discussion here.
- With large files (like rootfs images) we store data (to medium)
with 32 MiB chunks, which means that when we calculate complete crc32 the image is already written to its final destination.
You can still detect if the download was corrupted, report a proper error and initiate a re-download. This would at least give you a chance to react to corrupted data.
In this particular case I would need to chop the large file either at dfu-util or on some script, where each chunk need to have the header with its crc32. Then before storing each chunk I can assess if data wasn't corrupted.
This would provide reliability.
Now, even that I have the crc32 calculated for a chunk, I don't have the original one to compare.
Just closing the eyes and hoping no errors will ever happen has always been a bad strategy.
+1
- This patch also allows some flexibility: by setting the env
variable we can decide which algorithm to use (crc32, sha1, etc). It is appealing since we use the hash_* code anyway.
Agreed. This was not my point.
What I complained about is the change in behaviour. I asked to make the existing behaviour the default, so unaware users will not be affected. Only if you intentionally want some other behaviour you can then enable this by setting the env variable.
Ok. I will preserve the default behavior. However, personally I think that for a long term this proposed solution is better.
In any case, if you introduce this, the behaviour should be documented, and the default setting should be such as to keep the previous behaviour, i. e. CRC checking should remain on by default. then people who are willing to trade reliability for a little speed
I would not touch the code if the speedup wouldn't be so significant. Reducing flashing time of 400 MiB file from 65 s to 25 s is worth its effort.
I disagree, if you pay for the speed by reduced reliability, and if you don't even inform the user about this new behaviour.
Also, I feel it might be worth to investigate why the checksumming is slow on your system.
As I've stated previously the crc32 in the current dfu implementation is only informative.
It is pretty useful information, isn't it?
It depends what do you want to do with it. If you have target connected via serial to some test setup and log this output and process it on HOST afterwards, then it is useful.
Otherwise, you only see on console the CRC, which you can by hand compare with crc calculated on your host. And this information displays just after you stored the data to the medium (and corrupted the previous one).
To take the full advantage of it, we would need to modify the dfu-util to wrap the sent file to some kind of header or locally write some script to do that. However, this is not specified by the standard and would be u-boot's extension of the DFU.
Ok, add this to the many deficientcies of DFU :-(
The standard only allow the file which is the input to dfu-util to be protected by CRC. Then dfu-util check this value and strips off the header.
Even more important issue is that it would work only for small files (like uImage).
Why so? Can we not calculate CRC even when the transfer is broken down into several chunks?
To do that one would need to:
- chop the large file to several smaller ones (and the chunk size can be different for each platform and must be know for HOST utils) - calculate crc32 for each chunk - wrap it to some header not conforming to the DFU standard -it would be the u-boot extension - send each chunk separately to target - by calling dfu-util several times.
Handling of this would be difficult because of the need of DFU state machine extension.
Best regards,
Wolfgang Denk

On Fri, May 09, 2014 at 10:31:54AM +0200, Wolfgang Denk wrote:
Dear Lukasz,
In message 20140509085203.31133238@amdc2363 you wrote:
For automated tests I use MD5 and compare this value before sending data to target via DFU and after I read it. This testing is done purely on HOST machine.
This is unsufficient. You should always verify the image on the target after the download has completed.
True. But this patch doesn't really change what you would have to do, and arguably make it easier.
Participants have agreed, that we shall optionally enable crc32 (or other algorithm) calculation.
If this is the default now, it should remain the default.
Keep in mind what this current default is. We say "here was the CRC32". We do not compare it with an expected value nor do we have the ability to since we're not passed from the host what the value was.
- The current crc32 implementation is painfully slow (although I have
only L1 enabled on my target).
This is an unrelated problem then, which should excluded from this discussion here.
Agreed.
- With large files (like rootfs images) we store data (to medium) with
32 MiB chunks, which means that when we calculate complete crc32 the image is already written to its final destination.
You can still detect if the download was corrupted, report a proper error and initiate a re-download. This would at least give you a chance to react to corrupted data. Just closing the eyes and hoping no errors will ever happen has always been a bad strategy.
Before and after this change, only if the console is being monitored by some script. We do not nor are we given an expected hash so we cannot say data was corrupted.
- This patch also allows some flexibility: by setting the env variable
we can decide which algorithm to use (crc32, sha1, etc). It is appealing since we use the hash_* code anyway.
Agreed. This was not my point.
What I complained about is the change in behaviour. I asked to make the existing behaviour the default, so unaware users will not be affected. Only if you intentionally want some other behaviour you can then enable this by setting the env variable.
Well, looking at mainline usage of DFU, Lukasz is speaking for about half of the users / implementors. Since Denx is working with the other half, can you shed some light on actual use vs theoretical possibilities?

Hi Tom, Wolfgang,
On Fri, May 09, 2014 at 10:31:54AM +0200, Wolfgang Denk wrote:
Dear Lukasz,
In message 20140509085203.31133238@amdc2363 you wrote:
For automated tests I use MD5 and compare this value before sending data to target via DFU and after I read it. This testing is done purely on HOST machine.
This is unsufficient. You should always verify the image on the target after the download has completed.
True. But this patch doesn't really change what you would have to do, and arguably make it easier.
Participants have agreed, that we shall optionally enable crc32 (or other algorithm) calculation.
If this is the default now, it should remain the default.
Keep in mind what this current default is. We say "here was the CRC32". We do not compare it with an expected value nor do we have the ability to since we're not passed from the host what the value was.
- The current crc32 implementation is painfully slow (although I
have only L1 enabled on my target).
This is an unrelated problem then, which should excluded from this discussion here.
Agreed.
- With large files (like rootfs images) we store data (to
medium) with 32 MiB chunks, which means that when we calculate complete crc32 the image is already written to its final destination.
You can still detect if the download was corrupted, report a proper error and initiate a re-download. This would at least give you a chance to react to corrupted data. Just closing the eyes and hoping no errors will ever happen has always been a bad strategy.
Before and after this change, only if the console is being monitored by some script. We do not nor are we given an expected hash so we cannot say data was corrupted.
- This patch also allows some flexibility: by setting the env
variable we can decide which algorithm to use (crc32, sha1, etc). It is appealing since we use the hash_* code anyway.
Agreed. This was not my point.
What I complained about is the change in behaviour. I asked to make the existing behaviour the default, so unaware users will not be affected. Only if you intentionally want some other behaviour you can then enable this by setting the env variable.
Well, looking at mainline usage of DFU, Lukasz is speaking for about half of the users / implementors. Since Denx is working with the other half, can you shed some light on actual use vs theoretical possibilities?
I don't want to urge anybody on making any conclusion here :-), but I would be very grateful if we could come up with an agreement.
As I've stated previously, my opinion is similar to the one presented by Tom in this message.
For me it would be best to not calculate any checksum on default and only enable it when needed.

Hello Lukasz,
Sorry for answering so late to this thread ...
Am 15.05.2014 09:09, schrieb Lukasz Majewski:
Hi Tom, Wolfgang,
On Fri, May 09, 2014 at 10:31:54AM +0200, Wolfgang Denk wrote:
Dear Lukasz,
In message20140509085203.31133238@amdc2363 you wrote:
For automated tests I use MD5 and compare this value before sending data to target via DFU and after I read it. This testing is done purely on HOST machine.
This is unsufficient. You should always verify the image on the target after the download has completed.
True. But this patch doesn't really change what you would have to do, and arguably make it easier.
Participants have agreed, that we shall optionally enable crc32 (or other algorithm) calculation.
If this is the default now, it should remain the default.
Keep in mind what this current default is. We say "here was the CRC32". We do not compare it with an expected value nor do we have the ability to since we're not passed from the host what the value was.
- The current crc32 implementation is painfully slow (although I
have only L1 enabled on my target).
This is an unrelated problem then, which should excluded from this discussion here.
Agreed.
- With large files (like rootfs images) we store data (to
medium) with 32 MiB chunks, which means that when we calculate complete crc32 the image is already written to its final destination.
You can still detect if the download was corrupted, report a proper error and initiate a re-download. This would at least give you a chance to react to corrupted data. Just closing the eyes and hoping no errors will ever happen has always been a bad strategy.
Before and after this change, only if the console is being monitored by some script. We do not nor are we given an expected hash so we cannot say data was corrupted.
- This patch also allows some flexibility: by setting the env
variable we can decide which algorithm to use (crc32, sha1, etc). It is appealing since we use the hash_* code anyway.
Agreed. This was not my point.
What I complained about is the change in behaviour. I asked to make the existing behaviour the default, so unaware users will not be affected. Only if you intentionally want some other behaviour you can then enable this by setting the env variable.
Well, looking at mainline usage of DFU, Lukasz is speaking for about half of the users / implementors. Since Denx is working with the other half, can you shed some light on actual use vs theoretical possibilities?
I don't want to urge anybody on making any conclusion here :-), but I would be very grateful if we could come up with an agreement.
As I've stated previously, my opinion is similar to the one presented by Tom in this message.
For me it would be best to not calculate any checksum on default and only enable it when needed.
Hmm.. as we use the calculated crc only for printing it on the console, the question is really, why should we calculate it?
I try this patch on the siemens boards and report if the speed impact is there also so big as in your tests. (which board was this? Are there caches enabled?)
And I ask the customer of the siemens boards, if they check the crc value on the u-boot console output, if not, I vote for droping the crc calculation as default ...
BTW: Should such a crc check not be added to dfu-util and u-boot?
bye, Heiko

Dear Lukasz,
In message 20140515090904.32f1d13d@amdc2363 you wrote:
What I complained about is the change in behaviour. I asked to make the existing behaviour the default, so unaware users will not be affected. Only if you intentionally want some other behaviour you can then enable this by setting the env variable.
Well, looking at mainline usage of DFU, Lukasz is speaking for about half of the users / implementors. Since Denx is working with the other half, can you shed some light on actual use vs theoretical possibilities?
I don't want to urge anybody on making any conclusion here :-), but I would be very grateful if we could come up with an agreement.
As I've stated previously, my opinion is similar to the one presented by Tom in this message.
For me it would be best to not calculate any checksum on default and only enable it when needed.
I asked Heiko to run some actual tests on the boards where he has to maintain DFU for. For a 288 MiB image he did not measure any difference - with your patch applied, both with and without CRC enabled, we would get the same (slow) 1:54 minutes download time.
This reinforces my speculation that you are actually addressing the wrong problem. Instead of adding new code and environment variables and making the system even more complex, we should just leave everything as is, and you should try to find out why the CRC calculation is so low for you. Checking if caches are enabled is probably among the things that should be done first.
Regarding the checksumming topic in general: the fact that the DFU standard defines a method to verify the checksum of the image (dwCRC field in the DFU File Suffix), but does not transmit this vital data to the target so the actual file download and storage procedure on the target is completely unprotected is IMO a serious design flaw of the DFU protocl. Do you think it would be possible to have this augmented / fixed?
Best regards,
Wolfgang Denk

Hi Wolfgang,
Dear Lukasz,
In message 20140515090904.32f1d13d@amdc2363 you wrote:
What I complained about is the change in behaviour. I asked to make the existing behaviour the default, so unaware users will not be affected. Only if you intentionally want some other behaviour you can then enable this by setting the env variable.
Well, looking at mainline usage of DFU, Lukasz is speaking for about half of the users / implementors. Since Denx is working with the other half, can you shed some light on actual use vs theoretical possibilities?
I don't want to urge anybody on making any conclusion here :-), but I would be very grateful if we could come up with an agreement.
As I've stated previously, my opinion is similar to the one presented by Tom in this message.
For me it would be best to not calculate any checksum on default and only enable it when needed.
I asked Heiko to run some actual tests on the boards where he has to maintain DFU for. For a 288 MiB image he did not measure any difference - with your patch applied, both with and without CRC enabled, we would get the same (slow) 1:54 minutes download time.
As I've spoken with Heiko, am33xx uses NAND memory. I deal with eMMC. Moreover, the size of "chunks" are different - 1 MiB and 32 MiB.
I must double check for the rationale for chunk size of 32 MiB at Trats/Trats2 boards. I suspect, that eMMC write performance depend on that.
I will perform some performance tests with 1 MiB chunk size and share the result.
This reinforces my speculation that you are actually addressing the wrong problem. Instead of adding new code and environment variables and making the system even more complex, we should just leave everything as is,
During working on this patch I've replaced the crc32() method with the call to hash_method(), which IMHO is welcome.
I also don't personally like the crc32, hence I like the choice which this patch gives me to use other algorithm (for which I've got HW support on my platform - e.g. MD5).
and you should try to find out why the CRC calculation is so low for you. Checking if caches are enabled is probably among the things that should be done first.
L1 is enabled. L2 has been disabled on purpose (power consumption reduction).
Regarding the checksumming topic in general: the fact that the DFU standard defines a method to verify the checksum of the image (dwCRC field in the DFU File Suffix), but does not transmit this vital data to the target so the actual file download and storage procedure on the target is completely unprotected is IMO a serious design flaw of the DFU protocl. Do you think it would be possible to have this augmented / fixed?
Please note that the last revision of DFU is from 2004. I've contacted Greg KH (one of the original authors) and he replied that no new attempt to revise the standard was made.
It is possible to fix this problem, by augmenting the current implementation.
I saw the idea [*] of defining only one (or special) alt setting and in this one file embed the header with integrity data, list of files/partitions images included in this file, and even the information to where we want to write. In this way we would still comply with DFU 1.1 standard, which would be "wrapped" to some host scripts and special u-boot code. It even would be possible to leave the current code untouched.
The original link with the idea [*]: http://codelectron.wordpress.com/2014/02/28/flexible-firmware-upgrade/
And the ML discussion: http://thread.gmane.org/gmane.comp.boot-loaders.u-boot/181715/match=proposal...
The best however, would be to revise the standard to include such functionality to it. In the same time I'm fully aware that this is very unlikely to happen.
Best regards,
Wolfgang Denk

Dear Lukasz,
In message 20140515154334.626923b4@amdc2363 you wrote:
This reinforces my speculation that you are actually addressing the wrong problem. Instead of adding new code and environment variables and making the system even more complex, we should just leave everything as is,
During working on this patch I've replaced the crc32() method with the call to hash_method(), which IMHO is welcome.
Yes, indeed this is highly welcome. Thanks a lot for that!
I also don't personally like the crc32, hence I like the choice which this patch gives me to use other algorithm (for which I've got HW support on my platform - e.g. MD5).
Well, is this really useful? dfu-utils provides only CRC caculation, so where would you get the reference value for any other checksum metod from?
and you should try to find out why the CRC calculation is so low for you. Checking if caches are enabled is probably among the things that should be done first.
L1 is enabled. L2 has been disabled on purpose (power consumption reduction).
This certainly contributes to slow code execution.
Please note that the last revision of DFU is from 2004. I've contacted Greg KH (one of the original authors) and he replied that no new attempt to revise the standard was made.
This may just mean that users were just happy with the current situation. It's definitely better than if changed had been proposed but rejected.
The best however, would be to revise the standard to include such functionality to it. In the same time I'm fully aware that this is very unlikely to happen.
Why do you think it is unlikely? Of course, it would require that someone comes up with such a proposal in the first place. But you sound as if you were certain a proposal had no chance for being considered. I may be naive, but should we not at least try before giving up?
Best regards,
Wolfgang Denk

Hi Wolfgang,
Dear Lukasz,
In message 20140515154334.626923b4@amdc2363 you wrote:
This reinforces my speculation that you are actually addressing the wrong problem. Instead of adding new code and environment variables and making the system even more complex, we should just leave everything as is,
During working on this patch I've replaced the crc32() method with the call to hash_method(), which IMHO is welcome.
Yes, indeed this is highly welcome. Thanks a lot for that!
I also don't personally like the crc32, hence I like the choice which this patch gives me to use other algorithm (for which I've got HW support on my platform - e.g. MD5).
Well, is this really useful? dfu-utils provides only CRC caculation, so where would you get the reference value for any other checksum metod from?
I was rather thinking about a test setup with my target connected via serial console to HOST machine. Then I could compare the CRC32/MD5/SHA1 just after sending the data.
For my target it is better to use MD5 or SHA1 since support for them is provided via the specialized, embedded crypto IP.
and you should try to find out why the CRC calculation is so low for you. Checking if caches are enabled is probably among the things that should be done first.
L1 is enabled. L2 has been disabled on purpose (power consumption reduction).
This certainly contributes to slow code execution.
Please note that the last revision of DFU is from 2004. I've contacted Greg KH (one of the original authors) and he replied that no new attempt to revise the standard was made.
This may just mean that users were just happy with the current situation.
It is hard to say.
It's definitely better than if changed had been proposed but rejected.
True.
The best however, would be to revise the standard to include such functionality to it. In the same time I'm fully aware that this is very unlikely to happen.
Why do you think it is unlikely?
I don't have the experience with preparing USB standards, but I assume that it is somewhat hard to revise the standard after 10 years.
Of course, it would require that someone comes up with such a proposal in the first place. But you sound as if you were certain a proposal had no chance for being considered.
No, this is not what I meant.
I may be naive, but should we not at least try before giving up?
Unfortunately my time budget is limited and I feel like this has lower priority than fixing/solving current DFU problems.
Best regards,
Wolfgang Denk

Hi Wolfgang, Tom,
Hi Wolfgang,
Dear Lukasz,
In message 20140515090904.32f1d13d@amdc2363 you wrote:
What I complained about is the change in behaviour. I asked to make the existing behaviour the default, so unaware users will not be affected. Only if you intentionally want some other behaviour you can then enable this by setting the env variable.
Well, looking at mainline usage of DFU, Lukasz is speaking for about half of the users / implementors. Since Denx is working with the other half, can you shed some light on actual use vs theoretical possibilities?
I don't want to urge anybody on making any conclusion here :-), but I would be very grateful if we could come up with an agreement.
As I've stated previously, my opinion is similar to the one presented by Tom in this message.
For me it would be best to not calculate any checksum on default and only enable it when needed.
I asked Heiko to run some actual tests on the boards where he has to maintain DFU for. For a 288 MiB image he did not measure any difference - with your patch applied, both with and without CRC enabled, we would get the same (slow) 1:54 minutes download time.
As I've spoken with Heiko, am33xx uses NAND memory. I deal with eMMC. Moreover, the size of "chunks" are different - 1 MiB and 32 MiB.
I must double check for the rationale for chunk size of 32 MiB at Trats/Trats2 boards. I suspect, that eMMC write performance depend on that.
I will perform some performance tests with 1 MiB chunk size and share the result.
Unfortunately the 32 MiB is fixed for our platform. since lthor uses it by default.
This reinforces my speculation that you are actually addressing the wrong problem. Instead of adding new code and environment variables and making the system even more complex, we should just leave everything as is,
During working on this patch I've replaced the crc32() method with the call to hash_method(), which IMHO is welcome.
I also don't personally like the crc32, hence I like the choice which this patch gives me to use other algorithm (for which I've got HW support on my platform - e.g. MD5).
and you should try to find out why the CRC calculation is so low for you. Checking if caches are enabled is probably among the things that should be done first.
L1 is enabled. L2 has been disabled on purpose (power consumption reduction).
Regarding L2 - our platform requires SMC calls to enable and manage L2 cache. In my opinion support for this in u-boot is an overkill.
Can we conclude this whole discussion? The main point was if we should keep calculating and displaying crc32 as default for DFU transfers.
I'm for the option to NOT display and calculate it by default (PATCH v3).

Hello Lukasz,
Am 16.05.2014 10:58, schrieb Lukasz Majewski:
Hi Wolfgang, Tom,
Hi Wolfgang,
Dear Lukasz,
In message20140515090904.32f1d13d@amdc2363 you wrote:
What I complained about is the change in behaviour. I asked to make the existing behaviour the default, so unaware users will not be affected. Only if you intentionally want some other behaviour you can then enable this by setting the env variable.
Well, looking at mainline usage of DFU, Lukasz is speaking for about half of the users / implementors. Since Denx is working with the other half, can you shed some light on actual use vs theoretical possibilities?
I don't want to urge anybody on making any conclusion here :-), but I would be very grateful if we could come up with an agreement.
As I've stated previously, my opinion is similar to the one presented by Tom in this message.
For me it would be best to not calculate any checksum on default and only enable it when needed.
I asked Heiko to run some actual tests on the boards where he has to maintain DFU for. For a 288 MiB image he did not measure any difference - with your patch applied, both with and without CRC enabled, we would get the same (slow) 1:54 minutes download time.
As I've spoken with Heiko, am33xx uses NAND memory. I deal with eMMC. Moreover, the size of "chunks" are different - 1 MiB and 32 MiB.
I must double check for the rationale for chunk size of 32 MiB at Trats/Trats2 boards. I suspect, that eMMC write performance depend on that.
I will perform some performance tests with 1 MiB chunk size and share the result.
Unfortunately the 32 MiB is fixed for our platform. since lthor uses it by default.
This reinforces my speculation that you are actually addressing the wrong problem. Instead of adding new code and environment variables and making the system even more complex, we should just leave everything as is,
During working on this patch I've replaced the crc32() method with the call to hash_method(), which IMHO is welcome.
I also don't personally like the crc32, hence I like the choice which this patch gives me to use other algorithm (for which I've got HW support on my platform - e.g. MD5).
and you should try to find out why the CRC calculation is so low for you. Checking if caches are enabled is probably among the things that should be done first.
L1 is enabled. L2 has been disabled on purpose (power consumption reduction).
Regarding L2 - our platform requires SMC calls to enable and manage L2 cache. In my opinion support for this in u-boot is an overkill.
Can we conclude this whole discussion? The main point was if we should keep calculating and displaying crc32 as default for DFU transfers.
I'm for the option to NOT display and calculate it by default (PATCH v3).
I talked with the siemens board customer, they also do not check/use the displayed value from U-Boot ...
So, for me it is OK to not display this value ... but we should add to DFU such a check ... or?
bye, Heiko

Hi Heiko,
Hello Lukasz,
Am 16.05.2014 10:58, schrieb Lukasz Majewski:
Hi Wolfgang, Tom,
Hi Wolfgang,
Dear Lukasz,
In message20140515090904.32f1d13d@amdc2363 you wrote:
> What I complained about is the change in behaviour. I asked > to make the existing behaviour the default, so unaware users > will not be affected. Only if you intentionally want some > other behaviour you can then enable this by setting the env > variable.
Well, looking at mainline usage of DFU, Lukasz is speaking for about half of the users / implementors. Since Denx is working with the other half, can you shed some light on actual use vs theoretical possibilities?
I don't want to urge anybody on making any conclusion here :-), but I would be very grateful if we could come up with an agreement.
As I've stated previously, my opinion is similar to the one presented by Tom in this message.
For me it would be best to not calculate any checksum on default and only enable it when needed.
I asked Heiko to run some actual tests on the boards where he has to maintain DFU for. For a 288 MiB image he did not measure any difference - with your patch applied, both with and without CRC enabled, we would get the same (slow) 1:54 minutes download time.
As I've spoken with Heiko, am33xx uses NAND memory. I deal with eMMC. Moreover, the size of "chunks" are different - 1 MiB and 32 MiB.
I must double check for the rationale for chunk size of 32 MiB at Trats/Trats2 boards. I suspect, that eMMC write performance depend on that.
I will perform some performance tests with 1 MiB chunk size and share the result.
Unfortunately the 32 MiB is fixed for our platform. since lthor uses it by default.
This reinforces my speculation that you are actually addressing the wrong problem. Instead of adding new code and environment variables and making the system even more complex, we should just leave everything as is,
During working on this patch I've replaced the crc32() method with the call to hash_method(), which IMHO is welcome.
I also don't personally like the crc32, hence I like the choice which this patch gives me to use other algorithm (for which I've got HW support on my platform - e.g. MD5).
and you should try to find out why the CRC calculation is so low for you. Checking if caches are enabled is probably among the things that should be done first.
L1 is enabled. L2 has been disabled on purpose (power consumption reduction).
Regarding L2 - our platform requires SMC calls to enable and manage L2 cache. In my opinion support for this in u-boot is an overkill.
Can we conclude this whole discussion? The main point was if we should keep calculating and displaying crc32 as default for DFU transfers.
I'm for the option to NOT display and calculate it by default (PATCH v3).
I talked with the siemens board customer, they also do not check/use the displayed value from U-Boot ...
So, for me it is OK to not display this value ...
Ok, so we now have a consensus here.
but we should add to DFU such a check ... or?
Yes, we should add a way to send the CRC32/MD5/SHA1 to our boards.
Unfortunately I'm out of office now, so it is hard for me to develop something.
However, the rough idea would be to send the CRC32 (or any other checksum) to a separate alt setting.
bye, Heiko

Hi Heiko,
Hello Lukasz,
Am 16.05.2014 10:58, schrieb Lukasz Majewski:
Hi Wolfgang, Tom,
Hi Wolfgang,
Dear Lukasz,
In message20140515090904.32f1d13d@amdc2363 you wrote:
> What I complained about is the change in behaviour. I asked > to make the existing behaviour the default, so unaware users > will not be affected. Only if you intentionally want some > other behaviour you can then enable this by setting the env > variable.
Well, looking at mainline usage of DFU, Lukasz is speaking for about half of the users / implementors. Since Denx is working with the other half, can you shed some light on actual use vs theoretical possibilities?
I don't want to urge anybody on making any conclusion here :-), but I would be very grateful if we could come up with an agreement.
As I've stated previously, my opinion is similar to the one presented by Tom in this message.
For me it would be best to not calculate any checksum on default and only enable it when needed.
I asked Heiko to run some actual tests on the boards where he has to maintain DFU for. For a 288 MiB image he did not measure any difference - with your patch applied, both with and without CRC enabled, we would get the same (slow) 1:54 minutes download time.
As I've spoken with Heiko, am33xx uses NAND memory. I deal with eMMC. Moreover, the size of "chunks" are different - 1 MiB and 32 MiB.
I must double check for the rationale for chunk size of 32 MiB at Trats/Trats2 boards. I suspect, that eMMC write performance depend on that.
I will perform some performance tests with 1 MiB chunk size and share the result.
Unfortunately the 32 MiB is fixed for our platform. since lthor uses it by default.
This reinforces my speculation that you are actually addressing the wrong problem. Instead of adding new code and environment variables and making the system even more complex, we should just leave everything as is,
During working on this patch I've replaced the crc32() method with the call to hash_method(), which IMHO is welcome.
I also don't personally like the crc32, hence I like the choice which this patch gives me to use other algorithm (for which I've got HW support on my platform - e.g. MD5).
and you should try to find out why the CRC calculation is so low for you. Checking if caches are enabled is probably among the things that should be done first.
L1 is enabled. L2 has been disabled on purpose (power consumption reduction).
Regarding L2 - our platform requires SMC calls to enable and manage L2 cache. In my opinion support for this in u-boot is an overkill.
Can we conclude this whole discussion? The main point was if we should keep calculating and displaying crc32 as default for DFU transfers.
I'm for the option to NOT display and calculate it by default (PATCH v3).
I talked with the siemens board customer, they also do not check/use the displayed value from U-Boot ...
So, for me it is OK to not display this value ...
Applied this patch (with no default CRC32 calculation - v3) to u-boot-dfu tree.
but we should add to DFU such a check ... or?
bye, Heiko

Up till now the CRC32 of received data was calculated unconditionally. The standard crc32 implementation causes long delay when large images were uploaded.
The "dfu_hash_algo" environment variable gives the opportunity to disable on demand the hash (crc32) calculation. It can be done without the need to recompile the u-boot binary.
By default the crc32 is calculated, which means that legacy behavior has been preserved.
Tests results: 400 MiB ums.img file With crc32 calculation: 65 sec [avg 6.29 MB/s] Without crc32 calculation: 25 sec [avg 16.17 MB/s]
Signed-off-by: Lukasz Majewski l.majewski@samsung.com Cc: Marek Vasut marex@denx.de
--- Changes for v2: - Utilization of hash_block generic function to calculate CRC32 checksum Changes for v3: - Remove dependency on altering the lib/hash.c generic implementation - Use of hash_update() function to calculate crc32 in the same way as it was done with crc32 Changes for v4: - When dfu_hash_algo env is not defined, the crc32 is calculated --- drivers/dfu/dfu.c | 52 +++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 5 deletions(-)
diff --git a/drivers/dfu/dfu.c b/drivers/dfu/dfu.c index a938109..5878f99 100644 --- a/drivers/dfu/dfu.c +++ b/drivers/dfu/dfu.c @@ -13,6 +13,7 @@ #include <mmc.h> #include <fat.h> #include <dfu.h> +#include <hash.h> #include <linux/list.h> #include <linux/compiler.h>
@@ -20,6 +21,7 @@ static bool dfu_reset_request; static LIST_HEAD(dfu_list); static int dfu_alt_num; static int alt_num_cnt; +static struct hash_algo *dfu_hash_algo;
bool dfu_reset(void) { @@ -99,6 +101,29 @@ unsigned char *dfu_get_buf(void) return dfu_buf; }
+static char *dfu_get_hash_algo(void) +{ + char *s; + + s = getenv("dfu_hash_algo"); + /* + * By default the legacy behaviour to calculate the crc32 hash + * value is preserved. + * + * To disable calculation of the hash algorithm for received data + * specify the "dfu_hash_algo = disabled" at your board envs. + */ + debug("%s: DFU hash method: %s\n", __func__, s ? s : "not specified"); + + if (!s || !strcmp(s, "crc32")) + return "crc32"; + + if (!strcmp(s, "disabled")) + return NULL; + + return NULL; +} + static int dfu_write_buffer_drain(struct dfu_entity *dfu) { long w_size; @@ -109,8 +134,9 @@ static int dfu_write_buffer_drain(struct dfu_entity *dfu) if (w_size == 0) return 0;
- /* update CRC32 */ - dfu->crc = crc32(dfu->crc, dfu->i_buf_start, w_size); + if (dfu_hash_algo) + dfu_hash_algo->hash_update(dfu_hash_algo, &dfu->crc, + dfu->i_buf_start, w_size, 0);
ret = dfu->write_medium(dfu, dfu->offset, dfu->i_buf_start, &w_size); if (ret) @@ -138,7 +164,9 @@ int dfu_flush(struct dfu_entity *dfu, void *buf, int size, int blk_seq_num) if (dfu->flush_medium) ret = dfu->flush_medium(dfu);
- printf("\nDFU complete CRC32: 0x%08x\n", dfu->crc); + if (dfu_hash_algo) + printf("\nDFU complete %s: 0x%08x\n", dfu_hash_algo->name, + dfu->crc);
/* clear everything */ dfu_free_buf(); @@ -238,7 +266,11 @@ static int dfu_read_buffer_fill(struct dfu_entity *dfu, void *buf, int size) /* consume */ if (chunk > 0) { memcpy(buf, dfu->i_buf, chunk); - dfu->crc = crc32(dfu->crc, buf, chunk); + if (dfu_hash_algo) + dfu_hash_algo->hash_update(dfu_hash_algo, + &dfu->crc, buf, + chunk, 0); + dfu->i_buf += chunk; dfu->b_left -= chunk; dfu->r_left -= chunk; @@ -322,7 +354,9 @@ int dfu_read(struct dfu_entity *dfu, void *buf, int size, int blk_seq_num) }
if (ret < size) { - debug("%s: %s CRC32: 0x%x\n", __func__, dfu->name, dfu->crc); + if (dfu_hash_algo) + debug("%s: %s %s: 0x%x\n", __func__, dfu->name, + dfu_hash_algo->name, dfu->crc); puts("\nUPLOAD ... done\nCtrl+C to exit ...\n");
dfu_free_buf(); @@ -397,6 +431,14 @@ int dfu_config_entities(char *env, char *interface, int num) dfu_alt_num = dfu_find_alt_num(env); debug("%s: dfu_alt_num=%d\n", __func__, dfu_alt_num);
+ dfu_hash_algo = NULL; + s = dfu_get_hash_algo(); + if (s) { + ret = hash_lookup_algo(s, &dfu_hash_algo); + if (ret) + error("Hash algorithm %s not supported\n", s); + } + dfu = calloc(sizeof(*dfu), dfu_alt_num); if (!dfu) return -1;
participants (10)
-
Heiko Schocher
-
Lukasz Majewski
-
Lukasz Majewski
-
Marek Vasut
-
Pantelis Antoniou
-
Przemyslaw Marczak
-
Stefan Schmidt
-
Tom Rini
-
Tormod Volden
-
Wolfgang Denk