Fix SoftwareSerial timings

Instead of using a lookup table with (wrong) timings, this calculates
the timings in SoftwareSerial::begin. This is probably a bit slower, but
since it typically happens once, this shouldn't be a problem.
Additionally, since the lookup tables can be removed, this is also a lot
smaller, as well as supporting arbitrary CPU speeds and baudrates,
instead of the limited set that was defined before.

Furthermore, this switches to use the _delay_loop_2 function from
avr-libc instead of a handcoded delay function. The avr-libc function
only takes two instructions, as opposed to four instructions for the old
one. The compiler also inlines the avr-libc function, which makes the
timings more reliable.

The calculated timings directly rely on the instructions generated by
the compiler, since a significant amount of time is spent processing
(compared to the delays, especially at higher speeds). This means that
if the code is changed, or a different compiler is used, the
calculations might need changing (though a few cycles more or less
shouldn't cause immediate breakage).

The timings in the code have been calculated from the assembly generated
by gcc 4.8.2 and gcc 4.3.2.

The RX baudrates supported by SoftwareSerial are still not unlimited. At
16Mhz, using gcc 4.8.2, everything up to 115200 works. At 8Mhz, it works
up to 57600. Using gcc 4.3.2, it also works up to 57600 at 16Mhz and up
to 38400 at 8Mhz. Note that at these highest speeds, communication
works, but is still quite sensitive to other interrupts (like the
millis() interrupts) when bytes are sent back-to-back, so there still
are corrupted bytes in RX.

TX works up to 115200 for all combinations of compiler and clock rates.

This fixes #2019
This commit is contained in:
Matthijs Kooijman 2014-04-23 19:23:58 +02:00
parent 08c3bfdc9f
commit 0a6072dccb
2 changed files with 69 additions and 121 deletions

View File

@ -42,92 +42,7 @@ http://arduiniana.org.
#include <avr/pgmspace.h>
#include <Arduino.h>
#include <SoftwareSerial.h>
//
// Lookup table
//
typedef struct _DELAY_TABLE
{
long baud;
unsigned short rx_delay_centering;
unsigned short rx_delay_intrabit;
unsigned short rx_delay_stopbit;
unsigned short tx_delay;
} DELAY_TABLE;
#if F_CPU == 16000000
static const DELAY_TABLE PROGMEM table[] =
{
// baud rxcenter rxintra rxstop tx
{ 115200, 1, 17, 17, 12, },
{ 57600, 10, 37, 37, 33, },
{ 38400, 25, 57, 57, 54, },
{ 31250, 31, 70, 70, 68, },
{ 28800, 34, 77, 77, 74, },
{ 19200, 54, 117, 117, 114, },
{ 14400, 74, 156, 156, 153, },
{ 9600, 114, 236, 236, 233, },
{ 4800, 233, 474, 474, 471, },
{ 2400, 471, 950, 950, 947, },
{ 1200, 947, 1902, 1902, 1899, },
{ 600, 1902, 3804, 3804, 3800, },
{ 300, 3804, 7617, 7617, 7614, },
};
const int XMIT_START_ADJUSTMENT = 5;
#elif F_CPU == 8000000
static const DELAY_TABLE table[] PROGMEM =
{
// baud rxcenter rxintra rxstop tx
{ 115200, 1, 5, 5, 3, },
{ 57600, 1, 15, 15, 13, },
{ 38400, 2, 25, 26, 23, },
{ 31250, 7, 32, 33, 29, },
{ 28800, 11, 35, 35, 32, },
{ 19200, 20, 55, 55, 52, },
{ 14400, 30, 75, 75, 72, },
{ 9600, 50, 114, 114, 112, },
{ 4800, 110, 233, 233, 230, },
{ 2400, 229, 472, 472, 469, },
{ 1200, 467, 948, 948, 945, },
{ 600, 948, 1895, 1895, 1890, },
{ 300, 1895, 3805, 3805, 3802, },
};
const int XMIT_START_ADJUSTMENT = 4;
#elif F_CPU == 20000000
// 20MHz support courtesy of the good people at macegr.com.
// Thanks, Garrett!
static const DELAY_TABLE PROGMEM table[] =
{
// baud rxcenter rxintra rxstop tx
{ 115200, 3, 21, 21, 18, },
{ 57600, 20, 43, 43, 41, },
{ 38400, 37, 73, 73, 70, },
{ 31250, 45, 89, 89, 88, },
{ 28800, 46, 98, 98, 95, },
{ 19200, 71, 148, 148, 145, },
{ 14400, 96, 197, 197, 194, },
{ 9600, 146, 297, 297, 294, },
{ 4800, 296, 595, 595, 592, },
{ 2400, 592, 1189, 1189, 1186, },
{ 1200, 1187, 2379, 2379, 2376, },
{ 600, 2379, 4759, 4759, 4755, },
{ 300, 4759, 9523, 9523, 9520, },
};
const int XMIT_START_ADJUSTMENT = 6;
#else
#error This version of SoftwareSerial supports only 20, 16 and 8MHz processors
#endif
#include <util/delay_basic.h>
//
// Statics
@ -162,16 +77,7 @@ inline void DebugPulse(uint8_t pin, uint8_t count)
/* static */
inline void SoftwareSerial::tunedDelay(uint16_t delay) {
uint8_t tmp=0;
asm volatile("sbiw %0, 0x01 \n\t"
"ldi %1, 0xFF \n\t"
"cpi %A0, 0xFF \n\t"
"cpc %B0, %1 \n\t"
"brne .-10 \n\t"
: "+r" (delay), "+a" (tmp)
: "0" (delay)
);
_delay_loop_2(delay);
}
// This function sets the current object as the "listening"
@ -256,12 +162,6 @@ void SoftwareSerial::recv()
d |= 0x80;
}
// skip the stop bit
tunedDelay(_rx_delay_stopbit);
DebugPulse(_DEBUG_PIN2, 1);
// Re-enable interrupts when we're sure to be inside the stop bit
setRxIntMsk(true);
if (_inverse_logic)
d = ~d;
@ -280,6 +180,14 @@ void SoftwareSerial::recv()
#endif
_buffer_overflow = true;
}
// skip the stop bit
tunedDelay(_rx_delay_stopbit);
DebugPulse(_DEBUG_PIN1, 1);
// Re-enable interrupts when we're sure to be inside the stop bit
setRxIntMsk(true);
}
#if GCC_VERSION < 40302
@ -378,6 +286,13 @@ void SoftwareSerial::setRX(uint8_t rx)
_receivePortRegister = portInputRegister(port);
}
uint16_t SoftwareSerial::subtract_cap(uint16_t num, uint16_t sub) {
if (num > sub)
return num - sub;
else
return 1;
}
//
// Public methods
//
@ -386,26 +301,55 @@ void SoftwareSerial::begin(long speed)
{
_rx_delay_centering = _rx_delay_intrabit = _rx_delay_stopbit = _tx_delay = 0;
for (unsigned i=0; i<sizeof(table)/sizeof(table[0]); ++i)
{
long baud = pgm_read_dword(&table[i].baud);
if (baud == speed)
{
if (digitalPinToPCICR(_receivePin))
{
// Only setup rx when we have a valid PCINT for this pin
_rx_delay_centering = pgm_read_word(&table[i].rx_delay_centering);
_rx_delay_intrabit = pgm_read_word(&table[i].rx_delay_intrabit);
_rx_delay_stopbit = pgm_read_word(&table[i].rx_delay_stopbit);
}
_tx_delay = pgm_read_word(&table[i].tx_delay);
break;
}
}
// Precalculate the various delays, in number of 4-cycle delays
uint16_t bit_delay = (F_CPU / speed) / 4;
// 12 (gcc 4.8.2) or 13 (gcc 4.3.2) cycles from start bit to first bit,
// 15 (gcc 4.8.2) or 16 (gcc 4.3.2) cycles between bits,
// 12 (gcc 4.8.2) or 14 (gcc 4.3.2) cycles from last bit to stop bit
// These are all close enough to just use 15 cycles, since the inter-bit
// timings are the most critical (deviations stack 8 times)
_tx_delay = subtract_cap(bit_delay, 15 / 4);
// Only setup rx when we have a valid PCINT for this pin
if (digitalPinToPCICR(_receivePin)) {
#if GCC_VERSION > 40800
// Timings counted from gcc 4.8.2 output. This works up to 115200 on
// 16Mhz and 57600 on 8Mhz.
//
// When the start bit occurs, there are 3 or 4 cycles before the
// interrupt flag is set, 4 cycles before the PC is set to the right
// interrupt vector address and the old PC is pushed on the stack,
// and then 75 cycles of instructions (including the RJMP in the
// ISR vector table) until the first delay. After the delay, there
// are 17 more cycles until the pin value is read (excluding the
// delay in the loop).
// We want to have a total delay of 1.5 bit time. Inside the loop,
// we already wait for 1 bit time - 23 cycles, so here we wait for
// 0.5 bit time - (71 + 18 - 22) cycles.
_rx_delay_centering = subtract_cap(bit_delay / 2, (4 + 4 + 75 + 17 - 23) / 4);
// There are 23 cycles in each loop iteration (excluding the delay)
_rx_delay_intrabit = subtract_cap(bit_delay, 23 / 4);
// There are 37 cycles from the last bit read to the start of
// stopbit delay and 11 cycles from the delay until the interrupt
// mask is enabled again (which _must_ happen during the stopbit).
// This delay aims at 3/4 of a bit time, meaning the end of the
// delay will be at 1/4th of the stopbit. This allows some extra
// time for ISR cleanup, which makes 115200 baud at 16Mhz work more
// reliably
_rx_delay_stopbit = subtract_cap(bit_delay * 3 / 4, (37 + 11) / 4);
#else // Timings counted from gcc 4.3.2 output
// Note that this code is a _lot_ slower, mostly due to bad register
// allocation choices of gcc. This works up to 57600 on 16Mhz and
// 38400 on 8Mhz.
_rx_delay_centering = subtract_cap(bit_delay / 2, (4 + 4 + 97 + 29 - 11) / 4);
_rx_delay_intrabit = subtract_cap(bit_delay, 11 / 4);
_rx_delay_stopbit = subtract_cap(bit_delay * 3 / 4, (44 + 17) / 4);
#endif
// Set up RX interrupts, but only if we have a valid RX baud rate
if (_rx_delay_stopbit)
{
// Enable the PCINT for the entire port here, but never disable it
// (others might also need it, so we disable the interrupt by using
// the per-pin PCMSK register).

View File

@ -56,6 +56,7 @@ private:
volatile uint8_t *_pcint_maskreg;
uint8_t _pcint_maskvalue;
// Expressed as 4-cycle delays (must never be 0!)
uint16_t _rx_delay_centering;
uint16_t _rx_delay_intrabit;
uint16_t _rx_delay_stopbit;
@ -78,6 +79,9 @@ private:
void setRX(uint8_t receivePin);
void setRxIntMsk(bool enable) __attribute__((__always_inline__));
// Return num - sub, or 1 if the result would be < 1
static uint16_t subtract_cap(uint16_t num, uint16_t sub);
// private static method for timing
static inline void tunedDelay(uint16_t delay);