[U-Boot] [PATCH v2] mmc: omap: timeout counter fix

Having a loop with a counter is no timing guarentee for timing accuracy or compiler optimizations. For e.g. the same loop counter which runs when the MPU is running at 600MHz will timeout in around half the time when running at 1GHz. or the example where GCC 4.5 compiles with different optimization compared to GCC 4.4. use a udelay(10) to ensure we have a predictable delay. use an emperical number - 100000 uSec ~= 1sec for a worst case timeout. This should never happen, and is adequate imaginary condition for us to fail with timeout.
Signed-off-by: Nishanth Menon nm@ti.com --- V2: additional cleanups + made MAX_RETRY a macro for reuse throughout the file. tested on PandaBoard with 1GHz boot frequency and GCC4.5 on u-boot 2010.09 + mmc patches. Requesting testing on other platforms
V1: http://www.mail-archive.com/u-boot@lists.denx.de/msg40969.html
drivers/mmc/omap_hsmmc.c | 114 +++++++++++++++++++++++++++++++++++++++------- 1 files changed, 97 insertions(+), 17 deletions(-)
diff --git a/drivers/mmc/omap_hsmmc.c b/drivers/mmc/omap_hsmmc.c index 9271470..f7bdfe9 100644 --- a/drivers/mmc/omap_hsmmc.c +++ b/drivers/mmc/omap_hsmmc.c @@ -30,6 +30,7 @@ #include <twl4030.h> #include <asm/io.h> #include <asm/arch/mmc_host_def.h> +#define MAX_RETRY 100000
static int mmc_read_data(hsmmc_t *mmc_base, char *buf, unsigned int size); static int mmc_write_data(hsmmc_t *mmc_base, const char *buf, unsigned int siz); @@ -70,18 +71,32 @@ unsigned char mmc_board_init(hsmmc_t *mmc_base)
void mmc_init_stream(hsmmc_t *mmc_base) { + int retry = MAX_RETRY;
writel(readl(&mmc_base->con) | INIT_INITSTREAM, &mmc_base->con);
writel(MMC_CMD0, &mmc_base->cmd); - while (!(readl(&mmc_base->stat) & CC_MASK)) - ; + while (!(readl(&mmc_base->stat) & CC_MASK)) { + retry--; + udelay(10); + if (!retry) { + printf("%s: timedout waiting for cc!\n", __func__); + return; + } + } writel(CC_MASK, &mmc_base->stat) ; writel(MMC_CMD0, &mmc_base->cmd) ; - while (!(readl(&mmc_base->stat) & CC_MASK)) - ; + retry = MAX_RETRY; + while (!(readl(&mmc_base->stat) & CC_MASK)) { + retry--; + udelay(10); + if (!retry) { + printf("%s: timedout waiting for cc2!\n", __func__); + return; + } + } writel(readl(&mmc_base->con) & ~INIT_INITSTREAM, &mmc_base->con); }
@@ -91,16 +106,31 @@ static int mmc_init_setup(struct mmc *mmc) hsmmc_t *mmc_base = (hsmmc_t *)mmc->priv; unsigned int reg_val; unsigned int dsor; + int retry = MAX_RETRY;
mmc_board_init(mmc_base);
writel(readl(&mmc_base->sysconfig) | MMC_SOFTRESET, &mmc_base->sysconfig); - while ((readl(&mmc_base->sysstatus) & RESETDONE) == 0) - ; + while ((readl(&mmc_base->sysstatus) & RESETDONE) == 0) { + retry--; + udelay(10); + if (!retry) { + printf("%s: timedout waiting for cc2!\n", __func__); + return TIMEOUT; + } + } writel(readl(&mmc_base->sysctl) | SOFTRESETALL, &mmc_base->sysctl); - while ((readl(&mmc_base->sysctl) & SOFTRESETALL) != 0x0) - ; + retry = MAX_RETRY; + while ((readl(&mmc_base->sysctl) & SOFTRESETALL) != 0x0) { + retry--; + udelay(10); + if (!retry) { + printf("%s: timedout waiting for softresetall!\n", + __func__); + return TIMEOUT; + } + } writel(DTW_1_BITMODE | SDBP_PWROFF | SDVS_3V0, &mmc_base->hctl); writel(readl(&mmc_base->capa) | VS30_3V0SUP | VS18_1V8SUP, &mmc_base->capa); @@ -116,8 +146,15 @@ static int mmc_init_setup(struct mmc *mmc) (ICE_STOP | DTO_15THDTO | CEN_DISABLE)); mmc_reg_out(&mmc_base->sysctl, ICE_MASK | CLKD_MASK, (dsor << CLKD_OFFSET) | ICE_OSCILLATE); - while ((readl(&mmc_base->sysctl) & ICS_MASK) == ICS_NOTREADY) - ; + retry = MAX_RETRY; + while ((readl(&mmc_base->sysctl) & ICS_MASK) == ICS_NOTREADY) { + retry--; + udelay(10); + if (!retry) { + printf("%s: timedout waiting for ics!\n", __func__); + return TIMEOUT; + } + } writel(readl(&mmc_base->sysctl) | CEN_ENABLE, &mmc_base->sysctl);
writel(readl(&mmc_base->hctl) | SDBP_PWRON, &mmc_base->hctl); @@ -137,14 +174,27 @@ static int mmc_send_cmd(struct mmc *mmc, struct mmc_cmd *cmd, { hsmmc_t *mmc_base = (hsmmc_t *)mmc->priv; unsigned int flags, mmc_stat; - unsigned int retry = 0x100000; + int retry = MAX_RETRY;
- while ((readl(&mmc_base->pstate) & DATI_MASK) == DATI_CMDDIS) - ; + while ((readl(&mmc_base->pstate) & DATI_MASK) == DATI_CMDDIS) { + retry--; + udelay(10); + if (!retry) { + printf("%s: timedout waiting for cmddis!\n", __func__); + return TIMEOUT; + } + } writel(0xFFFFFFFF, &mmc_base->stat); - while (readl(&mmc_base->stat)) - ; + retry = MAX_RETRY; + while (readl(&mmc_base->stat)) { + retry--; + udelay(10); + if (!retry) { + printf("%s: timedout waiting for stat!\n", __func__); + return TIMEOUT; + } + } /* * CMDREG * CMDIDX[13:8] : Command index @@ -200,8 +250,11 @@ static int mmc_send_cmd(struct mmc *mmc, struct mmc_cmd *cmd, writel(cmd->cmdarg, &mmc_base->arg); writel((cmd->cmdidx << 24) | flags, &mmc_base->cmd);
+ retry = MAX_RETRY; do { mmc_stat = readl(&mmc_base->stat); + if (!mmc_stat) + udelay(10); retry--; } while ((mmc_stat == 0) && (retry > 0));
@@ -253,8 +306,18 @@ static int mmc_read_data(hsmmc_t *mmc_base, char *buf, unsigned int size) count /= 4;
while (size) { + int retry = MAX_RETRY; do { mmc_stat = readl(&mmc_base->stat); + if (!mmc_stat) { + retry--; + udelay(10); + } + if (!retry) { + printf("%s: timedout waiting for status!\n", + __func__); + return TIMEOUT; + } } while (mmc_stat == 0);
if ((mmc_stat & ERRI_MASK) != 0) @@ -298,8 +361,18 @@ static int mmc_write_data(hsmmc_t *mmc_base, const char *buf, unsigned int size) count /= 4;
while (size) { + int retry = MAX_RETRY; do { mmc_stat = readl(&mmc_base->stat); + if (!mmc_stat) { + retry--; + udelay(10); + } + if (!retry) { + printf("%s: timedout waiting for status!\n", + __func__); + return TIMEOUT; + } } while (mmc_stat == 0);
if ((mmc_stat & ERRI_MASK) != 0) @@ -334,6 +407,7 @@ static void mmc_set_ios(struct mmc *mmc) { hsmmc_t *mmc_base = (hsmmc_t *)mmc->priv; unsigned int dsor = 0; + int retry = MAX_RETRY;
/* configue bus width */ switch (mmc->bus_width) { @@ -372,8 +446,14 @@ static void mmc_set_ios(struct mmc *mmc) mmc_reg_out(&mmc_base->sysctl, ICE_MASK | CLKD_MASK, (dsor << CLKD_OFFSET) | ICE_OSCILLATE);
- while ((readl(&mmc_base->sysctl) & ICS_MASK) == ICS_NOTREADY) - ; + while ((readl(&mmc_base->sysctl) & ICS_MASK) == ICS_NOTREADY) { + retry--; + udelay(10); + if (!retry) { + printf("%s: timedout waiting for ics!\n", __func__); + return; + } + } writel(readl(&mmc_base->sysctl) | CEN_ENABLE, &mmc_base->sysctl); }

Dear Nishanth Menon,
Having a loop with a counter is no timing guarentee for timing accuracy or compiler optimizations. For e.g. the same loop counter which runs when the MPU is running at 600MHz will timeout in around half the time when running at 1GHz. or the example where GCC 4.5 compiles with different optimization compared to GCC 4.4. use a udelay(10) to ensure we have a predictable delay. use an emperical number - 100000 uSec ~= 1sec for a worst case timeout. This should never happen, and is adequate imaginary condition for us to fail with timeout.
In such cases I prefer to use:
uint64_t etime; ... etime = get_ticks() + get_tbclk(); /* 1 second */ do { whatever; udelay (xx); } while (condition && get_ticks() <= etime);
That is far more accurate than calling udelay() 100000 times. (depending on implementation and granularity udelay of a few usecs might be rounded significantly) You can still call udelay inside the loop if you don't want to poll the condition too tightly...
Best Regards, Reinhard

Reinhard Meyer had written, on 10/25/2010 08:14 PM, the following:
Dear Nishanth Menon,
Having a loop with a counter is no timing guarentee for timing accuracy or compiler optimizations. For e.g. the same loop counter which runs when the MPU is running at 600MHz will timeout in around half the time when running at 1GHz. or the example where GCC 4.5 compiles with different optimization compared to GCC 4.4. use a udelay(10) to ensure we have a predictable delay. use an emperical number - 100000 uSec ~= 1sec for a worst case timeout. This should never happen, and is adequate imaginary condition for us to fail with timeout.
In such cases I prefer to use:
uint64_t etime; ... etime = get_ticks() + get_tbclk(); /* 1 second */ do { whatever; udelay (xx); } while (condition && get_ticks() <= etime);
That is far more accurate than calling udelay() 100000 times. (depending on implementation and granularity udelay of a few usecs might be rounded significantly) You can still call udelay inside the loop if you don't want to poll the condition too tightly...
hmmm.. almost like the jiffies in kernel ;).. timing wise, I see that the only benefit is that the approach gives a little more accuracy to the timeout delay - the other benefit is option to loop continuously instead of delaying with udelays - overall, at least the segments I saw had no reason to hit the registers so hard (alright we dont have much else to do.. but still).. I am very open to options from Sukumar(original author) as well..

Dear Nishanth Menon,
In message 4CC62C81.8000202@ti.com you wrote:
You can still call udelay inside the loop if you don't want to poll the condition too tightly...
hmmm.. almost like the jiffies in kernel ;).. timing wise, I see that
Yes, except for the bugs... ;-)
Best regards,
Wolfgang Denk

Dear Reinhard Meyer,
In message 4CC62B6C.30601@emk-elektronik.de you wrote:
In such cases I prefer to use:
uint64_t etime; ... etime = get_ticks() + get_tbclk(); /* 1 second */ do { whatever; udelay (xx); } while (condition && get_ticks() <= etime);
That is far more accurate than calling udelay() 100000 times.
It may be more accuratre, but it may also be HORRIBLY WRONG!!
Do NOT do that!! NEVER implement such a delay loop as
end = time() + delay; while (time() < end) ...
It fails in case the timer wraps around.
Assume 32 bit counters, start time = 0xFFFFFFF0, delay = 0x20. It will compute end = 0x10, the while codition is immediately false, and you don't have any delay at all, which most probably generates a false error condition.
Correct implementation of a timeout like that should always look like that:
start = time(); while ((time() - start) < delay) ...
This works much better (assuming unsigned arithmetics).
Best regards,
Wolfgang Denk

-----Original Message----- From: u-boot-bounces@lists.denx.de [mailto:u-boot-bounces@lists.denx.de] On Behalf Of Wolfgang Denk Sent: Tuesday, October 26, 2010 10:59 AM To: Reinhard Meyer Cc: Menon, Nishanth; u-boot Subject: Re: [U-Boot] [PATCH v2] mmc: omap: timeout counter fix
Dear Reinhard Meyer,
In message 4CC62B6C.30601@emk-elektronik.de you wrote:
In such cases I prefer to use:
uint64_t etime; ... etime = get_ticks() + get_tbclk(); /* 1 second */ do { whatever; udelay (xx); } while (condition && get_ticks() <= etime);
That is far more accurate than calling udelay() 100000 times.
It may be more accuratre, but it may also be HORRIBLY WRONG!!
Do NOT do that!! NEVER implement such a delay loop as
end = time() + delay; while (time() < end) ...
It fails in case the timer wraps around.
Assume 32 bit counters, start time = 0xFFFFFFF0, delay = 0x20. It will compute end = 0x10, the while codition is immediately false, and you don't have any delay at all, which most probably generates a false error condition.
Correct implementation of a timeout like that should always look like that:
start = time(); while ((time() - start) < delay) ...
This works much better (assuming unsigned arithmetics).
[Ghorai] Thanks.. This is the best approach. Otherwise udelay() will increase the boot time.
Best regards,
Wolfgang Denk
-- DENX Software Engineering GmbH, MD: Wolfgang Denk & Detlev Zundel HRB 165235 Munich, Office: Kirchenstr.5, D-82194 Groebenzell, Germany Phone: (+49)-8142-66989-10 Fax: (+49)-8142-66989-80 Email: wd@denx.de Ein weiser Herrscher kann in einem großen Land mehr Gutes bewirken als in einem kleinen - ein dummer Herrscher aber auch viel mehr Un- fug. Da weise Herrscher seltener sind als dumme, war ich schon immer gegen große Reiche skeptisch. - Herbert Rosendorfer _______________________________________________ U-Boot mailing list U-Boot@lists.denx.de http://lists.denx.de/mailman/listinfo/u-boot

Ghorai, Sukumar had written, on 10/26/2010 12:34 AM, the following: [...]
[Ghorai] Thanks.. This is the best approach. Otherwise udelay() will increase the boot time.
Please define "increase the boot time" with the context to the patch where you think the increase of boot time will be? In my experience, it has not. the iterations are such that: while (condition_not_met) udelay(10); This is reasonable enough to wait till the condition is met. a) u-boot boot logic is not invoked until "mmc part 0" is invoked -> u-boot boot time does not change b) the actual fatload by itself will only incur minor latency addition - IMHO - necessary addition - in cases where the check conditions are not met.
would be great if you can illustrate within the patch areas which need continuous checks Vs the ones that do not need continuous checks.

Dear Wolfgang Denk,
In message4CC62B6C.30601@emk-elektronik.de you wrote:
In such cases I prefer to use:
uint64_t etime; ... etime = get_ticks() + get_tbclk(); /* 1 second */ do { whatever; udelay (xx); } while (condition&& get_ticks()<= etime);
That is far more accurate than calling udelay() 100000 times.
It may be more accuratre, but it may also be HORRIBLY WRONG!!
Do NOT do that!! NEVER implement such a delay loop as
end = time() + delay; while (time()< end) ...
It fails in case the timer wraps around.
Assume 32 bit counters, start time = 0xFFFFFFF0, delay = 0x20. It will compute end = 0x10, the while codition is immediately false, and you don't have any delay at all, which most probably generates a false error condition.
I used and assumed a 64 bit counter, that will not wrap around while our civilization still exists...
If get_ticks() is only 32 bits worth, both methods will misbehave at a 32 bit wrap over.
Correct implementation of a timeout like that should always look like that:
start = time(); while ((time() - start)< delay) ...
This works much better (assuming unsigned arithmetics).
True, provided the underlying timer is really 64 bits, otherwise this fails, too...
Best would be to assign get_ticks() to a 32 bit unsigned and use 32 bit vars for start and delay as well.
The original udelay() implementation in AT91 would have failed at a 32 bit wrap over, too (fixed in mainline). I hope other implementations are done right, too...
Best regards,
Reinhard

Dear Reinhard Meyer,
In message 4CC66A67.4000608@emk-elektronik.de you wrote:
It fails in case the timer wraps around.
Assume 32 bit counters, start time = 0xFFFFFFF0, delay = 0x20. It will compute end = 0x10, the while codition is immediately false, and you don't have any delay at all, which most probably generates a false error condition.
I used and assumed a 64 bit counter, that will not wrap around while our civilization still exists...
The code is still wrong, and as a simple correct implementation exists there is no excuse for using such incorrect code.
Please fix that!
If get_ticks() is only 32 bits worth, both methods will misbehave at a 32 bit wrap over.
No.
start = time(); while ((time() - start)< delay) ...
This works much better (assuming unsigned arithmetics).
True, provided the underlying timer is really 64 bits, otherwise this fails, too...
You are wrong. Try for example this:
--------------------- snip ------------------- #include <stdio.h>
int main(void) { unsigned int time = 0xFFFFFFF0; unsigned int delay = 0x20; unsigned int start;
start = time;
printf("start=0x%X\n", start);
while ((time - start) < delay) { printf("time=0x%X...\n", time); ++time; }
return 0; } --------------------- snip -------------------
Best would be to assign get_ticks() to a 32 bit unsigned and use 32 bit vars for start and delay as well.
? But that's what I wrote?
Best regards,
Wolfgang Denk

Dear Wolfgang Denk,
Dear Reinhard Meyer,
In message4CC66A67.4000608@emk-elektronik.de you wrote:
It fails in case the timer wraps around.
Assume 32 bit counters, start time = 0xFFFFFFF0, delay = 0x20. It will compute end = 0x10, the while codition is immediately false, and you don't have any delay at all, which most probably generates a false error condition.
I used and assumed a 64 bit counter, that will not wrap around while our civilization still exists...
The code is still wrong, and as a simple correct implementation exists there is no excuse for using such incorrect code.
Please fix that!
Agreed here. People are invited to dig through u-boot and find all those places.
If get_ticks() is only 32 bits worth, both methods will misbehave at a 32 bit wrap over.
No.
start = time(); while ((time() - start)< delay) ...
This works much better (assuming unsigned arithmetics).
True, provided the underlying timer is really 64 bits, otherwise this fails, too...
You are wrong. Try for example this:
--------------------- snip ------------------- #include <stdio.h>
int main(void) { unsigned int time = 0xFFFFFFF0; unsigned int delay = 0x20; unsigned int start;
You are wrong here, because you take it out of context. My "demo" is using the (declared as) 64 bit function get_ticks(). I mentioned above that this function MUST be truly returning 64 bits worth of (incrementing) value to make any version work. If get_ticks() just returns a 32 bit counter value neither method will work reliably. Just check all implementations that this function is implemented correctly.

I just had a look at other ARM implementations of timer.c.
Some have a colourful mix of 32 and 64 bits values, resulting in some 64 bit timer functions returning the upper 32 bits always cleared.
Some implement udelay() in the "while (xxxtime() < endtime);" variant.
I will fix this for at91 and submit a patch.
I also see that:
void reset_timer(void) { gd->timer_reset_value = get_ticks(); }
ulong get_timer(ulong base) { return tick_to_time(get_ticks() - gd->timer_reset_value) - base; }
If implemented with true 64 bits for get_ticks() that function is useable for timeout programming:
ulong timeval = get_timer (0);
do { ... } while (get_timer (timeval) < TIMEOUT);
It appears that the "base" parameter and return value is in CONFIG_SYS_HZ units, and not in native ticks. That causes 64 bit mul/div every time get_timer() is called. Won't hurt in a timeout loop, though.
I guess the theoretically unnecessary function reset_timer() might have been invented to mask the issue of 32 bit wraparounds when get_timer() is not truly 64 bits???
Reinhard

Dear Reinhard Meyer,
In message 4CC67CA1.9090302@emk-elektronik.de you wrote:
If implemented with true 64 bits for get_ticks() that function is useable for timeout programming:
ulong timeval = get_timer (0);
do { ... } while (get_timer (timeval) < TIMEOUT);
It appears that the "base" parameter and return value is in CONFIG_SYS_HZ units, and not in native ticks. That causes 64 bit mul/div every time get_timer() is called. Won't hurt in a timeout loop, though.
But it will hurt in othe rplaces.
Also, this code _is_ a bit different, as "get_timer(0)" makes sure the counter starts ticking again at 0, and get_timer() is defined to have millisecond resolution. So you have a guaranteed 2^32 milliseconds or 4294967 seconds or about 3.3 years available which indeed should be sufficient to implement standard timeouts.
Best regards,
Wolfgang Denk

Wolfgang Denk schrieb:
Dear Reinhard Meyer,
In message 4CC67CA1.9090302@emk-elektronik.de you wrote:
If implemented with true 64 bits for get_ticks() that function is useable for timeout programming:
ulong timeval = get_timer (0);
do { ... } while (get_timer (timeval) < TIMEOUT);
It appears that the "base" parameter and return value is in CONFIG_SYS_HZ units, and not in native ticks. That causes 64 bit mul/div every time get_timer() is called. Won't hurt in a timeout loop, though.
But it will hurt in othe rplaces.
Also, this code _is_ a bit different, as "get_timer(0)" makes sure the counter starts ticking again at 0
Nope, it does not reset the counter itself. It returns the current counter value (recalculated into CONFIG_SYS_HZ units). Maybe you mean reset_timer() instead?
In arm9226ejs/omap/timer.c udelay() is implemented with reset_timer() and get_timer(). Since those functions are inherently not nestable, beware of base=get_timer(0); do { ... udelay(xx); ... } while (get_timer(base) < TIMEOUT); constructs!
, and get_timer() is defined to have millisecond resolution.
Actually CONFIG_SYS_HZ (whatever that is).
So you have a guaranteed 2^32 milliseconds or 4294967 seconds or about 3.3 years available which indeed should be sufficient to implement standard timeouts.
I think it is necessary to summarize all implicit or explicit documented "defined to have's" regarding the timer and then to verify that all implementations adhere to them.
Reinhard

Dear Reinhard Meyer,
In message 4CC68A07.6050109@emk-elektronik.de you wrote:
Also, this code _is_ a bit different, as "get_timer(0)" makes sure the counter starts ticking again at 0
Nope, it does not reset the counter itself. It returns the current counter value (recalculated into CONFIG_SYS_HZ units). Maybe you mean reset_timer() instead?
Yes, you are right, I was wrong.
have millisecond resolution.
Actually CONFIG_SYS_HZ (whatever that is).
It is defined to be 1000.
I think it is necessary to summarize all implicit or explicit documented "defined to have's" regarding the timer and then to verify that all implementations adhere to them.
Indeed - documentation is an are where U-Boot has a serious lack.
Best regards,
Wolfgang Denk

Dear Wolfgang Denk,
Actually CONFIG_SYS_HZ (whatever that is).
It is defined to be 1000.
Ok, game with that.
Then the define CONFIG_SYS_HZ should not be in every <board>.h since that suggests that a board developer has some freedom there...
I think it is necessary to summarize all implicit or explicit documented "defined to have's" regarding the timer and then to verify that all implementations adhere to them.
Indeed - documentation is an are where U-Boot has a serious lack.
OK, to summarize:
1. reset_timer(), get_timer(base) operate with 1ms granularity. An implementation MUST make that close to 1ms. On second thought, it might be valueable to be able to set CONFIG_SYS_HZ to the exact value of the actual granularity. (for example 1024, if a 32kHz based timer is used).
It should be pointed out, that the pair reset_timer()/get_timer() cannot be used nested,
and MOST IMPORTANT that some implementations of udelay() might call reset_timer() and therefore break an outer timeout loop !!!
It is also open if reset_timer() does actually reset the hardware timer (e.g. tbu/tbl at PPC) - which would be messing up any time difference calculation using get_ticks() - or does emulate that by remembering the hardware value and subtracting it later in every subsequent get_timer() call?
2. get_ticks() and friends operate at a higher rate (tbu/tbl for PPC). Since they are defined as having 64 bits they MUST not wrap at 32 bits, i.e. if the hardware provides only 32 bits, the upper 32 bits must be emulated by software. Otherwise we have to document that get_ticks() cannot be used to get 64 bit time differences.
OR fall back to a 32 bit get_ticks() that should perhaps increment at a microsecond rate (see Bill Campbell's post).
If you really closely look at the various implementations of those timers, you will easyly see the wide variations implemented there.
Best Regards, Reinhard

Dear Reinhard Meyer,
In message 4CC6AADC.8050608@emk-elektronik.de you wrote:
Then the define CONFIG_SYS_HZ should not be in every <board>.h since that suggests that a board developer has some freedom there...
Agreed - there are historical reasons this has ever been changable at all.
and MOST IMPORTANT that some implementations of udelay() might call reset_timer() and therefore break an outer timeout loop !!!
Such implementations are inherently broken and need to be fixed.
It is also open if reset_timer() does actually reset the hardware timer (e.g. tbu/tbl at PPC) - which would be messing up any time difference calculation using get_ticks() - or does emulate that by remembering the hardware value and subtracting it later in every subsequent get_timer() call?
This is an implementation detail.
- get_ticks() and friends operate at a higher rate (tbu/tbl for PPC).
Since they are defined as having 64 bits they MUST not wrap at 32 bits, i.e. if the hardware provides only 32 bits, the upper 32 bits must be emulated by software.
Right.
Otherwise we have to document that get_ticks() cannot be used to get 64 bit time differences.
No. Such an implementation is broken and needs fixing.
If you really closely look at the various implementations of those timers, you will easyly see the wide variations implemented there.
Yes, I am aware of this :-(
Best regards,
Wolfgang Denk

Dear Wolfgang Denk,
Then the define CONFIG_SYS_HZ should not be in every <board>.h since that suggests that a board developer has some freedom there...
Agreed - there are historical reasons this has ever been changable at all.
and MOST IMPORTANT that some implementations of udelay() might call reset_timer() and therefore break an outer timeout loop !!!
Such implementations are inherently broken and need to be fixed.
Found such in arm926ejs/omap... But then, that timer is multiple-broken: relocation broken (uses static data), returns 32 a bit value in get_ticks(), returns CONFIG_SYS_HZ in get_tbclk() instead of the rate get_ticks() increments...
PXA: void udelay_masked (unsigned long usec) { unsigned long long tmp; ulong tmo;
tmo = us_to_tick(usec); tmp = get_ticks() + tmo; /* get current timestamp */
while (get_ticks() < tmp) /* loop till event _OR FOREVER is tmp happens to be > 32 bit_ */ /*NOP*/;
}
unsigned long long get_ticks(void) { return readl(OSCR); } - not any better :( -- its the same code that AT91 had before I fixed it.
It is also open if reset_timer() does actually reset the hardware timer (e.g. tbu/tbl at PPC) - which would be messing up any time difference calculation using get_ticks() - or does emulate that by remembering the hardware value and subtracting it later in every subsequent get_timer() call?
This is an implementation detail.
IF we require that get_ticks() and get_timer() shall not interfere with each other and IF both are based on the same hardware timer only the second method is available (same if the hardware timer is not easyly resettable).
- get_ticks() and friends operate at a higher rate (tbu/tbl for PPC).
Since they are defined as having 64 bits they MUST not wrap at 32 bits, i.e. if the hardware provides only 32 bits, the upper 32 bits must be emulated by software.
Right.
Otherwise we have to document that get_ticks() cannot be used to get 64 bit time differences.
No. Such an implementation is broken and needs fixing.
Original AT91 timer.c was like that, and I think other ARMs where this was copied around should be looked at... I don't know when get_timer() became 64 bits, but it seems that some implementations just did change the return type: uint64 get_timer(void) {return (uint64)timer_val_32;}
If you really closely look at the various implementations of those timers, you will easyly see the wide variations implemented there.
Yes, I am aware of this :-(
I will start beautifying the AT91 timer - its already quite there, except for a possible timer wrap problem in udelay() after a few billion years :)
Best Regards, Reinhard

On 10/26/2010 6:33 AM, Reinhard Meyer wrote:
Dear Wolfgang Denk,
Then the define CONFIG_SYS_HZ should not be in every<board>.h since that suggests that a board developer has some freedom there...
Agreed - there are historical reasons this has ever been changable at all.
and MOST IMPORTANT that some implementations of udelay() might call reset_timer() and therefore break an outer timeout loop !!!
Such implementations are inherently broken and need to be fixed.
Found such in arm926ejs/omap... But then, that timer is multiple-broken: relocation broken (uses static data), returns 32 a bit value in get_ticks(), returns CONFIG_SYS_HZ in get_tbclk() instead of the rate get_ticks() increments...
PXA: void udelay_masked (unsigned long usec) { unsigned long long tmp; ulong tmo;
tmo = us_to_tick(usec); tmp = get_ticks() + tmo; /* get current timestamp */
while (get_ticks()< tmp) /* loop till event _OR FOREVER is tmp happens to be> 32 bit_ */ /*NOP*/;
}
unsigned long long get_ticks(void) { return readl(OSCR); }
- not any better :( -- its the same code that AT91 had before I fixed it.
It is also open if reset_timer() does actually reset the hardware timer (e.g. tbu/tbl at PPC) - which would be messing up any time difference calculation using get_ticks() - or does emulate that by remembering the hardware value and subtracting it later in every subsequent get_timer() call?
This is an implementation detail.
IF we require that get_ticks() and get_timer() shall not interfere with each other and IF both are based on the same hardware timer only the second method is available (same if the hardware timer is not easyly resettable).
- get_ticks() and friends operate at a higher rate (tbu/tbl for PPC).
Since they are defined as having 64 bits they MUST not wrap at 32 bits, i.e. if the hardware provides only 32 bits, the upper 32 bits must be emulated by software.
Right.
Otherwise we have to document that get_ticks() cannot be used to get 64 bit time differences.
No. Such an implementation is broken and needs fixing.
Original AT91 timer.c was like that, and I think other ARMs where this was copied around should be looked at... I don't know when get_timer() became 64 bits, but it seems that some implementations just did change the return type: uint64 get_timer(void) {return (uint64)timer_val_32;}
Hi All,
I am pretty sure the migration to 64 bits was caused by 1) people not understanding that the timer operating on time DIFFERENCES would work fine even if the underlying timer wrapped around (most probable problem) and possibly 2) broken timer functions causing bogus timeouts, improperly "fixed" by switching to 64 bits.
I think u-boot could get along just fine with only 2 time related functions, uint_32 get_timer(uint_32 base) and udelay(uint 32 delay). udelay will only work on "small" values of delay, on the order of milliseconds. It is to be used when a "short" but "precise" delay in microsecond resolution is required. Users of get_timer must understand that it is only valid if it is called "often enough", i.e. at least once per period of the underlying timer. This is required because u-boot does not want to rely on interrupts as a timer update method. Therefore, all uses of get_timer must 1) call it once initially to get a start value, and 2) call get_timer at least once per period of the underlying hardware counter. This underlying period is guaranteed to be at least 4.29 seconds (32 bit counter at 4 GHz). Note that this does NOT mean that the total wait must be less than 4.29 seconds, only that the rate at which the elapsed time is TESTED must be adequate.
In order to implement this functionality, at least one hardware timer of some kind is required. An additional software "timer" in 1 ms resolution may be useful in maintaining the software time. If the hardware timer update rate is programmable, u-boot MAY set the update rate on initialization On initialization, u-boot MAY reset the hardware timer and MAY reset any associated software timer. The hardware timer MAY be started on initialization. On each call to get_timer(), u-boot MUST start the hardware timer if it was not started already. On calls to get_timer, u-boot MUST NOT reset the hardware timer if it was already started. The software timer MAY be reset if u-boot can unambiguously determine that more than 4.29 seconds has elapsed since the last call to get_timer.
The simplest case for implementing this scheme is if two programmable timers exist that can be set to 1ms and 1us. The timers are initialized at start-up, get_timer just returns the 32 bit 1 ms timer and udelay just waits for the number of ticks required on the second timer to elapse. The most common harder case is where there is only one timer available, it is running at 1 us per tick or faster, and we cannot control the rate. udelay is still easy, because we can convert the (small) delay in us to a delay in ticks by a 32 bit multiply that will not overflow 32 bits even if we have quite a few fractional bits in the tics per microsecond value. The elapsed ticks required is the (delay in us * us/per tick) >> # fractional bits in us/per tick. If that is not close enough for you, you can do it as (delay in us * (integer part of us/tick)) + ((delay in us * (fractional part)us/tick) >> # fraction bits). For "nice" numbers, like any integral number of MHz, there is no fractional part. Only numbers like 66 MHz, or 1.666 GHz require messing with the fractional part. For get_timer, it is a bit harder. The program must keep two 32 bit global variables, the timer reading "last time" and the software timer in 1 ms resolution. Whenever get_timer is called, it must increase the software timer by the number of ms that have elapsed since the previous update and record the corresponding timer reading as the new "last time". Note that if the number of ms elapsed is not an integer (a common case), the value recorded as the "last time" must be decreased by the number of ticks not included in the 1 ms resolution software timer. There are many different ways to accomplish update, depending on what hardware math capabilities are available, and whether one thinks efficiency is important here. Conceptually, you convert the elapsed time in ticks into an equivalent number of ms, add that number to the software timer, store the current value of the hardware timer in last time, and subtract any "remainder" ticks from that value. If the elapsed time is less that one ms, do no update of "last hardware time" and return the current software counter. If the elapsed time is greater than 4.29 seconds, reset the software counter to 0, record the current hardware counter time and return the current software counter. In between, do the math, which will fit into 32 bits.
If this idea seems like a good one, I can provide more detail on the conversions for various hardware capabilities is people want. Comments welcome.
Best Regards,
Bill Campbell
If you really closely look at the various implementations of those timers, you will easyly see the wide variations implemented there.
Yes, I am aware of this :-(
I will start beautifying the AT91 timer - its already quite there, except for a possible timer wrap problem in udelay() after a few billion years :)
Best Regards, Reinhard

Dear J. William Campbell,
Hi All,
I am pretty sure the migration to 64 bits was caused by 1) people not understanding that the timer operating on time DIFFERENCES would work fine even if the underlying timer wrapped around (most probable problem) and possibly 2) broken timer functions causing bogus timeouts, improperly "fixed" by switching to 64 bits.
I think u-boot could get along just fine with only 2 time related functions, uint_32 get_timer(uint_32 base) and udelay(uint 32 delay). udelay will only work on "small" values of delay, on the order of milliseconds. It is to be used when a "short" but "precise" delay in microsecond resolution is required. Users of get_timer must understand that it is only valid if it is called "often enough", i.e. at least once per period of the underlying timer. This is required because u-boot does not want to rely on interrupts as a timer update method. Therefore, all uses of get_timer must 1) call it once initially to get a start value, and 2) call get_timer at least once per period of the underlying hardware counter. This underlying period is guaranteed to be at least 4.29 seconds (32 bit counter at 4 GHz). Note that this does NOT mean that the total wait must be less than 4.29 seconds, only that the rate at which the elapsed time is TESTED must be adequate.
In order to implement this functionality, at least one hardware timer of some kind is required. An additional software "timer" in 1 ms resolution may be useful in maintaining the software time. If the hardware timer update rate is programmable, u-boot MAY set the update rate on initialization On initialization, u-boot MAY reset the hardware timer and MAY reset any associated software timer. The hardware timer MAY be started on initialization. On each call to get_timer(), u-boot MUST start the hardware timer if it was not started already. On calls to get_timer, u-boot MUST NOT reset the hardware timer if it was already started. The software timer MAY be reset if u-boot can unambiguously determine that more than 4.29 seconds has elapsed since the last call to get_timer.
The simplest case for implementing this scheme is if two programmable timers exist that can be set to 1ms and 1us. The timers are initialized at start-up, get_timer just returns the 32 bit 1 ms timer and udelay just waits for the number of ticks required on the second timer to elapse. The most common harder case is where there is only one timer available, it is running at 1 us per tick or faster, and we cannot control the rate. udelay is still easy, because we can convert the (small) delay in us to a delay in ticks by a 32 bit multiply that will not overflow 32 bits even if we have quite a few fractional bits in the tics per microsecond value. The elapsed ticks required is the (delay in us * us/per tick) >> # fractional bits in us/per tick. If that is not close enough for you, you can do it as (delay in us * (integer part of us/tick)) + ((delay in us * (fractional part)us/tick) >> # fraction bits). For "nice" numbers, like any integral number of MHz, there is no fractional
part. Only numbers like 66 MHz, or 1.666 GHz require messing with the fractional part. For get_timer, it is a bit harder. The program must keep two 32 bit global variables, the timer reading "last time" and the software timer in 1 ms resolution. Whenever get_timer is called, it must increase the software timer by the number of ms that have elapsed since the previous update and record the corresponding timer reading as the new "last time". Note that if the number of ms elapsed is not an integer (a common case), the value recorded as the "last time" must be decreased by the number of ticks not included in the 1 ms resolution software timer. There are many different ways to accomplish update, depending on what hardware math capabilities are available, and whether one thinks efficiency is important here. Conceptually, you convert the elapsed time in ticks into an equivalent number of ms, add that number to the software timer, store the current value of the hardware timer in last time, and subtract any "remainder" ticks from that value. If the elapsed time is les
s
that one ms, do no update of "last hardware time" and return the current software counter. If the elapsed time is greater than 4.29 seconds, reset the software counter to 0, record the current hardware counter time and return the current software counter. In between, do the math, which will fit into 32 bits.
If this idea seems like a good one, I can provide more detail on the conversions for various hardware capabilities is people want. Comments welcome.
To get the timer mess cleaned up three things have to happen:
1. A consensus and documentation how it MUST be handled
2. Fix all timer implementations to adhere to that
3. Fix all timer uses to adhere to that
To start, RFC:
a) udelay(u32) MUST NOT interfere with other timer functions b) u32 get_timer(u32 base) MUST return (current_time - base) using millisecond units c) get_timer() MUST exhaust the full 32 bit range before wrapping d) to ensure the function of internal high frequency wrapping processes, get_timer() MUST be called at least once a second while a timeout loop is run (this will typically be the case anyway) e) reset_timer() is not needed, potentially harmful if used and shall be removed. This will also allow for nested timeouts, e.g. inner timeout for accessing a hardware and outer timeout for having the hardware perform a function f) get_ticks() and get_tbclk() SHALL NOT be used to check for timeouts except for cases where the better than ms resolution is really needed g) get_ticks() MUST return true 64 bit time h) all uses of get_ticks() and get_timer() shall be fixed to use get_timer() as follows:
u32 start_ms;
start_ms = get_timer(0);
any_type_of_loop { ... /* check for timeout */ if (get_timer(start_ms) >= TIMEOUT_IN_MS) /* handle timeout */ ... } Alternative, but in contrast to current get_timer() implementation is to drop the get_timer parameter and do the subtraction in the if.
--> get_timer(base) is a bit of a misnomer, it should be better named something like get_ms_since(base).
_Observation:_
It seems possible to move udelay() and get_timer() and static helper functions into a common, ARCH and SoC independent file, provided that the ARCH/SoC/board dependant implementations of get_ticks() runs at >= 1 MHz and is true 64 bits. IF above d) is met get_ticks might be replaced by a u32 function to be used by the common udelay() and get_timer() implementation. That would allow all timer code to use 32 bit arithmetic only.
Best Regards, Reinhard

On 10/27/2010 11:02 PM, Reinhard Meyer wrote:
Dear J. William Campbell,
Hi All,
I am pretty sure the migration to 64 bits was caused by 1) people not understanding that the timer operating on time DIFFERENCES would work fine even if the underlying timer wrapped around (most probable problem) and possibly 2) broken timer functions causing bogus timeouts, improperly "fixed" by switching to 64 bits.
I think u-boot could get along just fine with only 2 time related functions, uint_32 get_timer(uint_32 base) and udelay(uint 32 delay). udelay will only work on "small" values of delay, on the order of milliseconds. It is to be used when a "short" but "precise" delay in microsecond resolution is required. Users of get_timer must understand that it is only valid if it is called "often enough", i.e. at least once per period of the underlying timer. This is required because u-boot does not want to rely on interrupts as a timer update method. Therefore, all uses of get_timer must 1) call it once initially to get a start value, and 2) call get_timer at least once per period of the underlying hardware counter. This underlying period is guaranteed to be at least 4.29 seconds (32 bit counter at 4 GHz). Note that this does NOT mean that the total wait must be less than 4.29 seconds, only that the rate at which the elapsed time is TESTED must be adequate.
In order to implement this functionality, at least one hardware timer of some kind is required. An additional software "timer" in 1 ms resolution may be useful in maintaining the software time. If the hardware timer update rate is programmable, u-boot MAY set the update rate on initialization On initialization, u-boot MAY reset the hardware timer and MAY reset any associated software timer. The hardware timer MAY be started on initialization. On each call to get_timer(), u-boot MUST start the hardware timer if it was not started already. On calls to get_timer, u-boot MUST NOT reset the hardware timer if it was already started. The software timer MAY be reset if u-boot can unambiguously determine that more than 4.29 seconds has elapsed since the last call to get_timer.
The simplest case for implementing this scheme is if two programmable timers exist that can be set to 1ms and 1us. The timers are initialized at start-up, get_timer just returns the 32 bit 1 ms timer and udelay just waits for the number of ticks required on the second timer to elapse. The most common harder case is where there is only one timer available, it is running at 1 us per tick or faster, and we cannot control the rate. udelay is still easy, because we can convert the (small) delay in us to a delay in ticks by a 32 bit multiply that will not overflow 32 bits even if we have quite a few fractional bits in the tics per microsecond value. The elapsed ticks required is the (delay in us * us/per tick) >> # fractional bits in us/per tick. If that is not close enough for you, you can do it as (delay in us * (integer part of us/tick)) + ((delay in us * (fractional part)us/tick) >> # fraction bits). For "nice" numbers, like any integral number of MHz, there is no fractional
part. Only numbers like 66 MHz, or 1.666 GHz require messing with the fractional part. For get_timer, it is a bit harder. The program must keep two 32 bit global variables, the timer reading "last time" and the software timer in 1 ms resolution. Whenever get_timer is called, it must increase the software timer by the number of ms that have elapsed since the previous update and record the corresponding timer reading as the new "last time". Note that if the number of ms elapsed is not an integer (a common case), the value recorded as the "last time" must be decreased by the number of ticks not included in the 1 ms resolution software timer. There are many different ways to accomplish update, depending on what hardware math capabilities are available, and whether one thinks efficiency is important here. Conceptually, you convert the elapsed time in ticks into an equivalent number of ms, add that number to the software timer, store the current value of the hardware timer in last time, and subtract any "remainder" ticks from that value. If the elapsed time is les
s
that one ms, do no update of "last hardware time" and return the current software counter. If the elapsed time is greater than 4.29 seconds, reset the software counter to 0, record the current hardware counter time and return the current software counter. In between, do the math, which will fit into 32 bits.
If this idea seems like a good one, I can provide more detail on the conversions for various hardware capabilities is people want. Comments welcome.
To get the timer mess cleaned up three things have to happen:
Hi All,
I am glad somebody was still interested. I was afraid I had scared everyone off.
A consensus and documentation how it MUST be handled
Fix all timer implementations to adhere to that
Fix all timer uses to adhere to that
I agree this is required.
To start, RFC:
a) udelay(u32) MUST NOT interfere with other timer functions b) u32 get_timer(u32 base) MUST return (current_time - base) using millisecond units c) get_timer() MUST exhaust the full 32 bit range before wrapping d) to ensure the function of internal high frequency wrapping processes, get_timer() MUST be called at least once a second while a timeout loop is run (this will typically be the case anyway)
I have no problem with this, but it might be stricter than needed. I cant imagine anybody has a timer running faster than 4 GHz, and that allows 4 seconds of interval.
e) reset_timer() is not needed, potentially harmful if used and shall be removed. This will also allow for nested timeouts, e.g. inner timeout for accessing a hardware and outer timeout for having the hardware perform a function f) get_ticks() and get_tbclk() SHALL NOT be used to check for timeouts except for cases where the better than ms resolution is really needed
I have an issue here. I think no such use cases really exist. For things like bit-banging, udelay is more what one wants. What are the counter-examples where sub-millisecond resolution in a loop waiting for some expected event is required?
g) get_ticks() MUST return true 64 bit time
I hope get_ticks must not exist, at least as a "generic" globally visible u-boot function. Having it present spawns confusion. People will use it instead of the generic get_timer when they don't have to, creating the portability problems we are trying to get rid of.
h) all uses of get_ticks() and get_timer() shall be fixed to use get_timer() as follows:
u32 start_ms;
start_ms = get_timer(0);
any_type_of_loop { ... /* check for timeout */ if (get_timer(start_ms) >= TIMEOUT_IN_MS) /* handle timeout */ ... } Alternative, but in contrast to current get_timer() implementation is to drop the get_timer parameter and do the subtraction in the if.
No problem here.
--> get_timer(base) is a bit of a misnomer, it should be better named something like get_ms_since(base).
_Observation:_
It seems possible to move udelay() and get_timer() and static helper functions into a common, ARCH and SoC independent file, provided that the ARCH/SoC/board dependant implementations of get_ticks() runs at >= 1 MHz and is true 64 bits. IF above d) is met get_ticks might be replaced by a u32 function to be used by the common udelay() and get_timer() implementation. That would allow all timer code to use 32 bit arithmetic only.
I see no down-side in requiring d) to be true. It is clearly no problem for udelay, as the caller gives up control anyway. I can't imagine a wait for event loop that takes a second to execute. It may be that "common" C code is not really a good idea, in that different CPUs may want to convert timer ticks to millisec/microsec in different ways, based on clock rates and available CPU capabilities (like having or not having a 32 bit divide, needing to deal with clock rates with repeating fractional parts etc.). If ticks per microsecond is an integer and a 32 bit divide is available, the code is trivial. If we must avoid the divide and/or the ticks per microsecond is not an integer, things get somewhat messier but not impossible. If we don't care about the divide taking a bit of time, generic code is pretty easy to write even allowing for fractional ticks per nicrosecond. Usually, the high-rate timer can be read without requiring a call to another routine. This might be a good idea in that people will not then "grow" another timer structure on top of the read timer routine! It is for sure true that if we are worried about efficiency, there are at most three or four versions of the ticks to microseconds or milliseconds code required for all the different cases, so it is not so bad to write them out and let the users decide which one fits their cpu.
Best Regards, Bill Campbell
Best Regards, Reinhard

On 01.11.2010 14:47, J. William Campbell wrote:
On 10/27/2010 11:02 PM, Reinhard Meyer wrote:
Dear J. William Campbell,
Hi All,
I am pretty sure the migration to 64 bits was caused by 1) people not understanding that the timer operating on time DIFFERENCES would work fine even if the underlying timer wrapped around (most probable problem) and possibly 2) broken timer functions causing bogus timeouts, improperly "fixed" by switching to 64 bits.
I think u-boot could get along just fine with only 2 time related functions, uint_32 get_timer(uint_32 base) and udelay(uint 32 delay). udelay will only work on "small" values of delay, on the order of milliseconds. It is to be used when a "short" but "precise" delay in microsecond resolution is required. Users of get_timer must understand that it is only valid if it is called "often enough", i.e. at least once per period of the underlying timer. This is required because u-boot does not want to rely on interrupts as a timer update method. Therefore, all uses of get_timer must 1) call it once initially to get a start value, and 2) call get_timer at least once per period of the underlying hardware counter. This underlying period is guaranteed to be at least 4.29 seconds (32 bit counter at 4 GHz). Note that this does NOT mean that the total wait must be less than 4.29 seconds, only that the rate at which the elapsed time is TESTED must be adequate.
In order to implement this functionality, at least one hardware timer of some kind is required. An additional software "timer" in 1 ms resolution may be useful in maintaining the software time. If the hardware timer update rate is programmable, u-boot MAY set the update rate on initialization On initialization, u-boot MAY reset the hardware timer and MAY reset any associated software timer. The hardware timer MAY be started on initialization. On each call to get_timer(), u-boot MUST start the hardware timer if it was not started already. On calls to get_timer, u-boot MUST NOT reset the hardware timer if it was already started. The software timer MAY be reset if u-boot can unambiguously determine that more than 4.29 seconds has elapsed since the last call to get_timer.
The simplest case for implementing this scheme is if two programmable timers exist that can be set to 1ms and 1us. The timers are initialized at start-up, get_timer just returns the 32 bit 1 ms timer and udelay just waits for the number of ticks required on the second timer to elapse. The most common harder case is where there is only one timer available, it is running at 1 us per tick or faster, and we cannot control the rate. udelay is still easy, because we can convert the (small) delay in us to a delay in ticks by a 32 bit multiply that will not overflow 32 bits even if we have quite a few fractional bits in the tics per microsecond value. The elapsed ticks required is the (delay in us * us/per tick) >> # fractional bits in us/per tick. If that is not close enough for you, you can do it as (delay in us * (integer part of us/tick)) + ((delay in us * (fractional part)us/tick) >> # fraction bits). For "nice" numbers, like any integral number of MHz, there is no fraction
al
part. Only numbers like 66 MHz, or 1.666 GHz require messing with the fractional part. For get_timer, it is a bit harder. The program must keep two 32 bit global variables, the timer reading "last time" and the software timer in 1 ms resolution. Whenever get_timer is called, it must increase the software timer by the number of ms that have elapsed since the previous update and record the corresponding timer reading as the new "last time". Note that if the number of ms elapsed is not an integer (a common case), the value recorded as the "last time" must be decreased by the number of ticks not included in the 1 ms resolution software timer. There are many different ways to accomplish update, depending on what hardware math capabilities are available, and whether one thinks efficiency is important here. Conceptually, you convert the elapsed time in ticks into an equivalent number of ms, add that number to the software timer, store the current value of the hardware timer in last time, and subtract any "remainder" ticks from that value. If the elapsed time is l
es
s
that one ms, do no update of "last hardware time" and return the current software counter. If the elapsed time is greater than 4.29 seconds, reset the software counter to 0, record the current hardware counter time and return the current software counter. In between, do the math, which will fit into 32 bits.
If this idea seems like a good one, I can provide more detail on the conversions for various hardware capabilities is people want. Comments welcome.
To get the timer mess cleaned up three things have to happen:
Hi All,
I am glad somebody was still interested. I was afraid I had scared everyone off.
A consensus and documentation how it MUST be handled
Fix all timer implementations to adhere to that
Fix all timer uses to adhere to that
I agree this is required.
To start, RFC:
a) udelay(u32) MUST NOT interfere with other timer functions b) u32 get_timer(u32 base) MUST return (current_time - base) using millisecond units c) get_timer() MUST exhaust the full 32 bit range before wrapping d) to ensure the function of internal high frequency wrapping processes, get_timer() MUST be called at least once a second while a timeout loop is run (this will typically be the case anyway)
I have no problem with this, but it might be stricter than needed. I cant imagine anybody has a timer running faster than 4 GHz, and that allows 4 seconds of interval.
4 GHz / 2**32 yields 1 second. Assuming no prescaler... just theoretical anyway. A polling loop with timeout that does poll that seldom is illogical anyway.
e) reset_timer() is not needed, potentially harmful if used and shall be removed. This will also allow for nested timeouts, e.g. inner timeout for accessing a hardware and outer timeout for having the hardware perform a function f) get_ticks() and get_tbclk() SHALL NOT be used to check for timeouts except for cases where the better than ms resolution is really needed
I have an issue here. I think no such use cases really exist. For things like bit-banging, udelay is more what one wants. What are the counter-examples where sub-millisecond resolution in a loop waiting for some expected event is required?
Well, the "really needed" clause says it. I don't see it either.
g) get_ticks() MUST return true 64 bit time
I hope get_ticks must not exist, at least as a "generic" globally visible u-boot function. Having it present spawns confusion. People will use it instead of the generic get_timer when they don't have to, creating the portability problems we are trying to get rid of.
I also agree it would be best to get rid of externally available get_ticks() and friends.
h) all uses of get_ticks() and get_timer() shall be fixed to use get_timer() as follows:
u32 start_ms;
start_ms = get_timer(0);
any_type_of_loop { ... /* check for timeout */ if (get_timer(start_ms) >= TIMEOUT_IN_MS) /* handle timeout */ ... } Alternative, but in contrast to current get_timer() implementation is to drop the get_timer parameter and do the subtraction in the if.
No problem here.
How much code uses get_ticks, or get_timer, or just uses a counted loop of small udelays? I have seen all variants but never made stats of that. We probably should leave get_timer() as it is.
--> get_timer(base) is a bit of a misnomer, it should be better named something like get_ms_since(base).
_Observation:_
It seems possible to move udelay() and get_timer() and static helper functions into a common, ARCH and SoC independent file, provided that the ARCH/SoC/board dependant implementations of get_ticks() runs at >= 1 MHz and is true 64 bits. IF above d) is met get_ticks might be replaced by a u32 function to be used by the common udelay() and get_timer() implementation. That would allow all timer code to use 32 bit arithmetic only.
I see no down-side in requiring d) to be true. It is clearly no problem for udelay, as the caller gives up control anyway. I can't imagine a wait for event loop that takes a second to execute. It may be that "common" C code is not really a good idea, in that different CPUs may want to convert timer ticks to millisec/microsec in different ways, based on clock rates and available CPU capabilities (like having or not having a 32 bit divide, needing to deal with clock rates with repeating fractional parts etc.). If ticks per microsecond is an integer and a 32 bit divide is available, the code is trivial. If we must avoid the divide and/or the ticks per microsecond is not an integer, things get somewhat messier but not impossible. If we don't care about the divide taking a bit of time, generic code is pretty easy to write even allowing for fractional ticks per nicrosecond. Usually, the high-rate timer can be read without requiring a call to another routine. This might be a good idea in that people will not then "grow" another timer structure on top of the read timer routine! It is for sure true that if we are worried about efficiency, there are at most three or four versions of the ticks to microseconds or milliseconds code required for all the different cases, so it is not so bad to write them out and let the users decide which one fits their cpu.
For efficiency it would help if CONFIG_SYS_HZ would be allowed to be in the range of 1000 - 2000 Hz (or 700 - 1400), so on any hardware a simple shift would do for get_timer(). Of course the constant CONFIG_SYS_HZ should be replaced by a function like get_timer_hz() to allow for runtime calculation of the best fit.
Best Regards, Reinhard

Dear Reinhard Meyer,
In message 4CCF1CAF.1020107@emk-elektronik.de you wrote:
For efficiency it would help if CONFIG_SYS_HZ would be allowed to be in the range of 1000 - 2000 Hz (or 700 - 1400), so on any hardware a simple shift would do for get_timer(). Of course the constant CONFIG_SYS_HZ should be replaced by a function like get_timer_hz() to allow for runtime calculation of the best fit.
No. If you need such differing settings, please use a different config variable.
Best regards,
Wolfgang Denk

Reinhard Meyer had written, on 10/26/2010 02:57 AM, the following:
Wolfgang Denk schrieb:
Dear Reinhard Meyer,
In message 4CC67CA1.9090302@emk-elektronik.de you wrote:
If implemented with true 64 bits for get_ticks() that function is useable for timeout programming:
ulong timeval = get_timer (0);
do { ... } while (get_timer (timeval) < TIMEOUT);
It appears that the "base" parameter and return value is in CONFIG_SYS_HZ units, and not in native ticks. That causes 64 bit mul/div every time get_timer() is called. Won't hurt in a timeout loop, though.
But it will hurt in othe rplaces.
Also, this code _is_ a bit different, as "get_timer(0)" makes sure the counter starts ticking again at 0
Nope, it does not reset the counter itself. It returns the current counter value (recalculated into CONFIG_SYS_HZ units). Maybe you mean reset_timer() instead?
In arm9226ejs/omap/timer.c udelay() is implemented with reset_timer() and get_timer(). Since those functions are inherently not nestable, beware of base=get_timer(0); do { ... udelay(xx); ... } while (get_timer(base) < TIMEOUT); constructs!
At least the omap3+ code arch/arm/cpu/armv7/omap-common/timer.c __udelay() does'nt seem to reset the timer BUT, unsigned long long get_ticks(void) { return get_timer(0); } ulong get_timer(ulong base) { return get_timer_masked() - base; } ulong get_timer_masked(void) { /* current tick value */ ulong now = readl(&timer_base->tcrr) / (TIMER_CLOCK / CONFIG_SYS_HZ);
if (now >= lastinc) /* normal mode (non roll) */ /* move stamp fordward with absoulte diff ticks */ timestamp += (now - lastinc); else /* we have rollover of incrementer */ timestamp += ((TIMER_LOAD_VAL / (TIMER_CLOCK / CONFIG_SYS_HZ)) - lastinc) + now; lastinc = now; return timestamp; }
if I am not mistaken, this is 32bit long.. but since we have as wdenk mentions below a 2^32 tick duration, we can safely go with an option such as: /* If we fail after 1 second wait, something is really bad */ #define MAX_RETRY_US (1 * 1000 * 1000)
uint64_t etime; /* actually this could be u32 */
etime = get_ticks() + usec2ticks(MAX_RETRY_US); while (!(readl(&mmc_base->stat) & CC_MASK)) { if (get_ticks() <= etime) { printf("%s: timedout waiting for cc2!\n", __func__); return; } }
sounds right?
, and get_timer() is defined to have millisecond resolution.
Actually CONFIG_SYS_HZ (whatever that is).
So you have a guaranteed 2^32 milliseconds or 4294967 seconds or about 3.3 years available which indeed should be sufficient to implement standard timeouts.
I think it is necessary to summarize all implicit or explicit documented "defined to have's" regarding the timer and then to verify that all implementations adhere to them.
yes indeed.. :(

Dear Nishanth Menon,
In message 4CC6EFB1.9000701@ti.com you wrote:
uint64_t etime; /* actually this could be u32 */
etime = get_ticks() + usec2ticks(MAX_RETRY_US); while (!(readl(&mmc_base->stat) & CC_MASK)) { if (get_ticks() <= etime) { printf("%s: timedout waiting for cc2!\n", __func__); return; } }
sounds right?
No. This code is always wrong. Please fix it as described.
Best regards,
Wolfgang Denk

Wolfgang Denk had written, on 10/26/2010 10:17 AM, the following:
Dear Nishanth Menon,
In message 4CC6EFB1.9000701@ti.com you wrote:
uint64_t etime; /* actually this could be u32 */
etime = get_ticks() + usec2ticks(MAX_RETRY_US); while (!(readl(&mmc_base->stat) & CC_MASK)) { if (get_ticks() <= etime) { printf("%s: timedout waiting for cc2!\n", __func__); return; } }
sounds right?
No. This code is always wrong. Please fix it as described.
Apologies on being a dudhead, I suppose you mean the following:
ulong start; start = get_timer(0); while (!(readl(&mmc_base->stat) & CC_MASK)) { if (get_timer(start) > usec2ticks(MAX_RETRY_US)) { printf("%s: timedout waiting for cc2!\n", __func__); return; } }
would this be better?

Dear Nishanth Menon,
In message 4CC6F23A.2040608@ti.com you wrote:
No. This code is always wrong. Please fix it as described.
Apologies on being a dudhead, I suppose you mean the following:
ulong start; start = get_timer(0); while (!(readl(&mmc_base->stat) & CC_MASK)) { if (get_timer(start) > usec2ticks(MAX_RETRY_US)) { printf("%s: timedout waiting for cc2!\n", __func__); return; } }
would this be better?
No, not at all, as get_timer() returns milliseconds, not ticks.
You probably want something like:
ulong start = get_timer(0);
while (!(readl(&mmc_base->stat) & CC_MASK)) { if (get_timer(0) - start >= MAX_RETRY_US/1000) { printf(...); return; } udelay(100); }
or such.
Best regards,
Wolfgang Denk

On 26.10.2010 18:26, Wolfgang Denk wrote:
Dear Nishanth Menon,
In message4CC6F23A.2040608@ti.com you wrote:
No. This code is always wrong. Please fix it as described.
Apologies on being a dudhead, I suppose you mean the following:
ulong start; start = get_timer(0); while (!(readl(&mmc_base->stat)& CC_MASK)) { if (get_timer(start)> usec2ticks(MAX_RETRY_US)) { printf("%s: timedout waiting for cc2!\n", __func__); return; } }
would this be better?
No, not at all, as get_timer() returns milliseconds, not ticks.
You probably want something like:
ulong start = get_timer(0);
while (!(readl(&mmc_base->stat)& CC_MASK)) { if (get_timer(0) - start>= MAX_RETRY_US/1000) { printf(...); return; } udelay(100); }
This will work, of course, but the original semantics of get_timer() seems to be:
ulong start = get_timer(0);
loop { if (get_timer(start) >= timeout_in_ms) we_have_timeout(); }
--> get_timer(x) returns (time - x)
the subtraction is done internally.
Reinhard

On 10/25/2010 11:01 PM, Reinhard Meyer wrote:
Dear Wolfgang Denk,
Dear Reinhard Meyer,
In message4CC66A67.4000608@emk-elektronik.de you wrote:
It fails in case the timer wraps around.
Assume 32 bit counters, start time = 0xFFFFFFF0, delay = 0x20. It will compute end = 0x10, the while codition is immediately false, and you don't have any delay at all, which most probably generates a false error condition.
I used and assumed a 64 bit counter, that will not wrap around while our civilization still exists...
The code is still wrong, and as a simple correct implementation exists there is no excuse for using such incorrect code.
Please fix that!
Agreed here. People are invited to dig through u-boot and find all those places.
If get_ticks() is only 32 bits worth, both methods will misbehave at a 32 bit wrap over.
No.
start = time(); while ((time() - start)< delay) ...
This works much better (assuming unsigned arithmetics).
True, provided the underlying timer is really 64 bits, otherwise this fails, too...
You are wrong. Try for example this:
--------------------- snip ------------------- #include<stdio.h>
int main(void) { unsigned int time = 0xFFFFFFF0; unsigned int delay = 0x20; unsigned int start;
You are wrong here, because you take it out of context. My "demo" is using the (declared as) 64 bit function get_ticks(). I mentioned above that this function MUST be truly returning 64 bits worth of (incrementing) value to make any version work. If get_ticks() just returns a 32 bit counter value neither method will work reliably. Just check all implementations that this function is implemented correctly.
Hi All, I have wondered for quite some time about the rush to make get_ticks() return a 64 bit value. For any "reasonable" purpose, like waiting a few seconds for something to complete, a 32 bit timebase is plenty adequate. If the number of ticks per second is 1000000000, i.e. a 1 GHz clock rate, the clock wraps in a 32 bit word about every five seconds. The trick is that time always moves forward, so a current get_ticks() - a previous get_ticks() is ALWAYS a positive number. It is necessary to check the clock more often than (0X100000000 - your_timeout) times per second, but unless your timeout is very near the maximum time that fits into 32 bits, this won't be a problem. Most CPUs have a counter that count at a "reasonable" rate. Some CPUs also have a "cycle counter" that runs at the CPU clock rate. These counters are useful to determine exactly how many machine cycles a certain process took, and therefore they have high resolution. Timers for simple delays neither need nor want such resolution. If the only counter available on you CPU runs at several GHz, and is 64 bits long, just shift it right a few bits to reduce the resolution to a "reasonable" resolution and return a 32 bit value. There is no need for a bunch of long long variables and extra code running around to process simple timeouts. It may be that we need a different routine for precision timing measurements with high resolution, but it needn't, and probably shouldn't IMHO be get_ticks().
Best Regards, Bill Campbell
U-Boot mailing list U-Boot@lists.denx.de http://lists.denx.de/mailman/listinfo/u-boot

Dear Reinhard Meyer,
In message 4CC66ECA.9000106@emk-elektronik.de you wrote:
Agreed here. People are invited to dig through u-boot and find all those places.
You know the ones you added best :-)
int main(void) { unsigned int time = 0xFFFFFFF0; unsigned int delay = 0x20; unsigned int start;
You are wrong here, because you take it out of context.
I am not wrong. Feel free to replace "unsigned int" by "unsigned long long", and 0xFFFFFFF0 by 0xFFFFFFFFFFFFFFF0ULL, and the "%X" by "%llX".
My "demo" is using the (declared as) 64 bit function get_ticks(). I mentioned above that this function MUST be truly returning 64 bits worth of (incrementing) value to make any version work.
What's the difference? It is inherently wrong to believe an overflow would never occur just because the precision of a number is "long enough". Remeber y2k issues. Remember the time overflog for UNIX systems in 2038, etc. etc.
If get_ticks() just returns a 32 bit counter value neither method will work reliably. Just check all implementations that this function is implemented correctly.
No, the code is still WRONG. It'ss just that the problem is less likely to hit.
BTW - who says the timer starts counting at 0 ?
Best regards,
Wolfgang Denk

Dear Wolfgang Denk,
Dear Reinhard Meyer,
In message 4CC66ECA.9000106@emk-elektronik.de you wrote:
Agreed here. People are invited to dig through u-boot and find all those places.
You know the ones you added best :-)
int main(void) { unsigned int time = 0xFFFFFFF0; unsigned int delay = 0x20; unsigned int start;
You are wrong here, because you take it out of context.
I am not wrong. Feel free to replace "unsigned int" by "unsigned long long", and 0xFFFFFFF0 by 0xFFFFFFFFFFFFFFF0ULL, and the "%X" by "%llX".
My "demo" is using the (declared as) 64 bit function get_ticks(). I mentioned above that this function MUST be truly returning 64 bits worth of (incrementing) value to make any version work.
What's the difference? It is inherently wrong to believe an overflow would never occur just because the precision of a number is "long enough". Remeber y2k issues. Remember the time overflog for UNIX systems in 2038, etc. etc.
If get_ticks() just returns a 32 bit counter value neither method will work reliably. Just check all implementations that this function is implemented correctly.
You should really try to understand what I am saying here:
IF get_timer() returns 0x0000000000000000 to 0x00000000ffffffff and wraps back to 0x0000000000000000 (thats how it is or was implemented in SOME architectures) neither mine (agreed to be wrong) nor your code would work if it uses 64 bits vars.
Reinhard

Dear Nishanth Menon,
In message 1288054924-24989-1-git-send-email-nm@ti.com you wrote:
Having a loop with a counter is no timing guarentee for timing accuracy or compiler optimizations. For e.g. the same loop counter which runs when the MPU is running at 600MHz will timeout in around half the time when running at 1GHz. or the example where GCC 4.5 compiles with different optimization compared to GCC 4.4. use a udelay(10) to ensure we have a predictable delay. use an emperical number - 100000 uSec ~= 1sec for a worst case timeout.
Hm... 100000 usec = 0.1 sec only... Guess you mean
100,000 x 10 usec = 1 sec ?
Best regards,
Wolfgang Denk

-----Original Message----- From: u-boot-bounces@lists.denx.de [mailto:u-boot-bounces@lists.denx.de] On Behalf Of Wolfgang Denk Sent: Tuesday, October 26, 2010 10:06 AM To: Menon, Nishanth Cc: u-boot Subject: Re: [U-Boot] [PATCH v2] mmc: omap: timeout counter fix
Dear Nishanth Menon,
In message 1288054924-24989-1-git-send-email-nm@ti.com you wrote:
Having a loop with a counter is no timing guarentee for timing accuracy or compiler optimizations. For e.g. the same loop counter which runs when the MPU is running at 600MHz will timeout in around half the time when running at 1GHz. or the example where GCC 4.5 compiles with different optimization compared to GCC 4.4. use a udelay(10) to ensure we have a predictable delay. use an emperical number - 100000 uSec ~= 1sec for a worst case timeout.
Hm... 100000 usec = 0.1 sec only... Guess you mean
100,000 x 10 usec = 1 sec ?
Best regards,
Wolfgang Denk
[..snip..] [Menon, Nishanth] - overall, at least the segments I saw had no reason to hit the registers so hard (alright we dont have much else to do.. but still).. I am very open to options from Sukumar(original author) as well..
[Ghorai] I am agreeing with the part where it's looks while(1) loop. And can add retry counter too. otherwise I feel I will increase the boot time.
participants (5)
-
Ghorai, Sukumar
-
J. William Campbell
-
Nishanth Menon
-
Reinhard Meyer
-
Wolfgang Denk