diff --git a/cores/arduino/wiring.c b/cores/arduino/wiring.c index 6efda6e..0cd25e5 100644 --- a/cores/arduino/wiring.c +++ b/cores/arduino/wiring.c @@ -123,118 +123,114 @@ void delay(unsigned long ms) void delayMicroseconds(unsigned int us) { // call = 4 cycles + 2 to 4 cycles to init us(2 for constant delay, 4 for variable) + // calling avrlib's delay_us() function with low values (e.g. 1 or // 2 microseconds) gives delays longer than desired. //delay_us(us); #if F_CPU >= 24000000L // for the 24 MHz clock for the aventurous ones, trying to overclock - // for a one-microsecond delay, simply wait 6 cycles and return. The overhead - // of the function call yields a delay of exactly one microsecond. - __asm__ __volatile__ ( - "nop" "\n\t" - "nop" "\n\t" - "nop" "\n\t" - "nop" "\n\t" - "nop" "\n\t" - "nop"); //just waiting 6 cycles - if (--us == 0) - return; + // zero delay fix + if (!us) return; // = 3 cycles, (4 when true) // the following loop takes a 1/6 of a microsecond (4 cycles) // per iteration, so execute it six times for each microsecond of // delay requested. - us *= 6; // x6 us + us *= 6; // x6 us, = 7 cycles // account for the time taken in the preceeding commands. - us -= 2; + // we just burned 22 (24) cycles above, remove 5, (5*4=20) + // us is at least 6 so we can substract 5 + us -= 5; //=2 cycles #elif F_CPU >= 20000000L // for the 20 MHz clock on rare Arduino boards - // for a one-microsecond delay, simply wait 2 cycles and return. The overhead - // of the function call yields a delay of exactly one microsecond. + // for a one-microsecond delay, simply return. the overhead + // of the function call takes 18 (20) cycles, which is 1us __asm__ __volatile__ ( "nop" "\n\t" - "nop"); //just waiting 2 cycle - if (--us == 0) - return; + "nop" "\n\t" + "nop" "\n\t" + "nop"); //just waiting 4 cycles + if (us <= 1) return; // = 3 cycles, (4 when true) // the following loop takes a 1/5 of a microsecond (4 cycles) // per iteration, so execute it five times for each microsecond of // delay requested. - us = (us<<2) + us; // x5 us + us = (us << 2) + us; // x5 us, = 7 cycles // account for the time taken in the preceeding commands. - us -= 2; + // we just burned 26 (28) cycles above, remove 7, (7*4=28) + // us is at least 10 so we can substract 7 + us -= 7; // 2 cycles #elif F_CPU >= 16000000L // for the 16 MHz clock on most Arduino boards // for a one-microsecond delay, simply return. the overhead - // of the function call yields a delay of approximately 1 1/8 us. - if (--us == 0) - return; + // of the function call takes 14 (16) cycles, which is 1us + if (us <= 1) return; // = 3 cycles, (4 when true) // the following loop takes 1/4 of a microsecond (4 cycles) // per iteration, so execute it four times for each microsecond of // delay requested. - us <<= 2; // x4 us + us <<= 2; // x4 us, = 4 cycles // account for the time taken in the preceeding commands. - us -= 2; + // we just burned 19 (21) cycles above, remove 5, (5*4=20) + // us is at least 8 so we can substract 5 + us -= 5; // = 2 cycles, #elif F_CPU >= 12000000L // for the 12 MHz clock if somebody is working with USB - // for a one-microsecond delay, simply return. the overhead - // of the function call yields a delay of approximately 1.5 us. - if (--us == 0) - return; + // for a 1 microsecond delay, simply return. the overhead + // of the function call takes 14 (16) cycles, which is 1.5us + if (us <= 1) return; // = 3 cycles, (4 when true) // the following loop takes 1/3 of a microsecond (4 cycles) // per iteration, so execute it three times for each microsecond of // delay requested. - us = (us << 1) + us; // x3 us + us = (us << 1) + us; // x3 us, = 5 cycles // account for the time taken in the preceeding commands. - us -= 2; + // we just burned 20 (22) cycles above, remove 5, (5*4=20) + // us is at least 6 so we can substract 5 + us -= 5; //2 cycles + #elif F_CPU >= 8000000L // for the 8 MHz internal clock - // for a one- or two-microsecond delay, simply return. the overhead of - // the function calls takes more than two microseconds. can't just - // subtract two, since us is unsigned; we'd overflow. - if (--us == 0) - return; - if (--us == 0) - return; + // for a 1 and 2 microsecond delay, simply return. the overhead + // of the function call takes 14 (16) cycles, which is 2us + if (us <= 2) return; // = 3 cycles, (4 when true) // the following loop takes 1/2 of a microsecond (4 cycles) // per iteration, so execute it twice for each microsecond of // delay requested. - us <<= 1; //x2 us - - // partially compensate for the time taken by the preceeding commands. - // we can't subtract any more than this or we'd overflow w/ small delays. - us--; + us <<= 1; //x2 us, = 2 cycles + + // account for the time taken in the preceeding commands. + // we just burned 17 (19) cycles above, remove 4, (4*4=16) + // us is at least 6 so we can substract 4 + us -= 4; // = 2 cycles #else // for the 1 MHz internal clock (default settings for common Atmega microcontrollers) - // the overhead of the function calls takes about 16 microseconds. - if (us <= 16) //4 cycles spent here - return; - if (us <= 22) { //4 cycles spent here - return; - } - - // compensate for the time taken by the preceeding and next commands. - us -= 22; + // the overhead of the function calls is 14 (16) cycles + if (us <= 16) return; //= 3 cycles, (4 when true) + if (us <= 25) return; //= 3 cycles, (4 when true), (must be at least 25 if we want to substract 22) + // compensate for the time taken by the preceeding and next commands (about 22 cycles) + us -= 22; // = 2 cycles // the following loop takes 4 microseconds (4 cycles) // per iteration, so execute it us/4 times - us >>= 2; // us div 4 + // us is at least 4, divided by 4 gives us 1 (no zero delay bug) + us >>= 2; // us div 4, = 4 cycles + + #endif // busy wait @@ -360,14 +356,32 @@ void init() #endif #if defined(ADCSRA) - // set a2d prescale factor to 128 - // 16 MHz / 128 = 125 KHz, inside the desired 50-200 KHz range. - // XXX: this will not work properly for other clock speeds, and - // this code should use F_CPU to determine the prescale factor. - sbi(ADCSRA, ADPS2); - sbi(ADCSRA, ADPS1); - sbi(ADCSRA, ADPS0); - + // set a2d prescaler so we are inside the desired 50-200 KHz range. + #if F_CPU >= 16000000 // 16 MHz / 128 = 125 KHz + sbi(ADCSRA, ADPS2); + sbi(ADCSRA, ADPS1); + sbi(ADCSRA, ADPS0); + #elif F_CPU >= 8000000 // 8 MHz / 64 = 125 KHz + sbi(ADCSRA, ADPS2); + sbi(ADCSRA, ADPS1); + cbi(ADCSRA, ADPS0); + #elif F_CPU >= 4000000 // 4 MHz / 32 = 125 KHz + sbi(ADCSRA, ADPS2); + cbi(ADCSRA, ADPS1); + sbi(ADCSRA, ADPS0); + #elif F_CPU >= 2000000 // 2 MHz / 16 = 125 KHz + sbi(ADCSRA, ADPS2); + cbi(ADCSRA, ADPS1); + cbi(ADCSRA, ADPS0); + #elif F_CPU >= 1000000 // 1 MHz / 8 = 125 KHz + cbi(ADCSRA, ADPS2); + sbi(ADCSRA, ADPS1); + sbi(ADCSRA, ADPS0); + #else // 128 kHz / 2 = 64 KHz -> This is the closest you can get, the prescaler is 2 + cbi(ADCSRA, ADPS2); + cbi(ADCSRA, ADPS1); + sbi(ADCSRA, ADPS0); + #endif // enable a2d conversions sbi(ADCSRA, ADEN); #endif