Make getTimeNowNt even faster (#3504)

The last version is already much better than the original, mostly because it doesn't call
CriticalSectionLocker, which has a ton of overhead due to debug tracking.  But this version is
another 4 instructions / 12 bytes shorter.  Does as much match in 32-bit land as possible, and
avoids math operations that span 64-bits (i.e. either operate on the lower half or upper half, but
not both).  The result is only 3 instructions not including the necessary 4 loads (ptr to now, now,
ptr to upper, upper), 1 store (upper), and 1 return/branch.
This commit is contained in:
Scott Smith 2021-11-08 11:24:31 -08:00 committed by GitHub
parent 2166a5ba16
commit 8639b7c7e4
3 changed files with 99 additions and 15 deletions

View File

@ -64,23 +64,10 @@ efitimeus_t getTimeNowUs(void) {
} }
// this is bits 30-61, not 32-63. We only support 62-bit time. You can fire me in 36,533 years static WrapAround62 timeNt;
// (1,461 on the simulator).
static volatile uint32_t upperTimeNt = 0;
efitick_t getTimeNowNt() { efitick_t getTimeNowNt() {
// Shift cannot be 31, as we wouldn't be able to tell if time is moving forward or backward return timeNt.update(getTimeNowLowerNt());
// relative to upperTimeNt. We do need to handle both directions as our "thread" can be
// racing with other "threads" in sampling stamp and updating upperTimeNt.
constexpr unsigned shift = 30;
uint32_t stamp = getTimeNowLowerNt();
uint32_t upper = upperTimeNt;
uint32_t relative_unsigned = stamp - (upper << shift);
efitick_t time64 = (efitick_t(upper) << shift) + (int32_t)relative_unsigned;
upperTimeNt = time64 >> shift;
return time64;
} }
#endif /* !EFI_UNIT_TEST */ #endif /* !EFI_UNIT_TEST */

View File

@ -51,6 +51,37 @@
#ifdef __cplusplus #ifdef __cplusplus
extern "C" extern "C"
{ {
/**
* Provide a 62-bit counter from a 32-bit counter source that wraps around.
*
* If you'd like it use it with a 16-bit counter, shift the source by 16 before passing it here.
* This class is thread/interrupt-safe.
*/
struct WrapAround62 {
uint64_t update(uint32_t source) {
// Shift cannot be 31, as we wouldn't be able to tell if time is moving forward or
// backward relative to m_upper. We do need to handle both directions as our
// "thread" can be racing with other "threads" in sampling stamp and updating
// m_upper.
constexpr unsigned shift = 30;
uint32_t upper = m_upper;
uint32_t relative_unsigned = source - (upper << shift);
upper += int32_t(relative_unsigned) >> shift;
m_upper = upper;
// Yes we could just do upper<<shift, but then the result would span both halves of
// the 64-bit result. Doing it this way means we only operate on one half at a
// time. Source will supply those bits anyways, so we don't need them from
// upper...
return (efitick_t(upper >> (32 - shift)) << 32) | source;
}
private:
volatile uint32_t m_upper = 0;
};
#endif /* __cplusplus */ #endif /* __cplusplus */

View File

@ -499,3 +499,69 @@ TEST(util, PeakDetect) {
// Small value past the timeout is used // Small value past the timeout is used
EXPECT_EQ(dut.detect(500, startTime + timeout + 1), 500); EXPECT_EQ(dut.detect(500, startTime + timeout + 1), 500);
} }
TEST(util, WrapAround62) {
// Random test
{
WrapAround62 t;
uint32_t source = 0;
uint64_t actual = 0;
// Test random progression, positive and negative.
uint32_t seed = time(NULL);
printf("Testing with seed 0x%08x\n", seed);
srandom(seed);
for (unsigned i = 0; i < 10000; i++) {
int32_t delta = random();
if (delta < 0) {
delta = ~delta;
}
delta -= RAND_MAX >> 1;
// Cap negative test
if (delta < 0 && -delta > actual) {
delta = -actual;
}
source += delta;
actual += delta;
uint64_t next = t.update(source);
EXPECT_EQ(actual, next);
}
}
// More pointed test for expected edge conditions
{
WrapAround62 t;
EXPECT_EQ(t.update(0x03453455), 0x003453455LL);
EXPECT_EQ(t.update(0x42342323), 0x042342323LL);
EXPECT_EQ(t.update(0x84356345), 0x084356345LL);
EXPECT_EQ(t.update(0x42342323), 0x042342323LL);
EXPECT_EQ(t.update(0x84356345), 0x084356345LL);
EXPECT_EQ(t.update(0xC5656565), 0x0C5656565LL);
EXPECT_EQ(t.update(0x01122112), 0x101122112LL); // Wrap around!
EXPECT_EQ(t.update(0xC5656565), 0x0C5656565LL);
EXPECT_EQ(t.update(0x84356345), 0x084356345LL);
EXPECT_EQ(t.update(0xC5656565), 0x0C5656565LL);
EXPECT_EQ(t.update(0x01122112), 0x101122112LL); // Wrap around!
EXPECT_EQ(t.update(0x42342323), 0x142342323LL);
EXPECT_EQ(t.update(0x84356345), 0x184356345LL);
EXPECT_EQ(t.update(0x42342323), 0x142342323LL);
EXPECT_EQ(t.update(0x84356345), 0x184356345LL);
EXPECT_EQ(t.update(0xC5656565), 0x1C5656565LL);
EXPECT_EQ(t.update(0x01122112), 0x201122112LL); // Wrap around!
EXPECT_EQ(t.update(0xC5656565), 0x1C5656565LL);
EXPECT_EQ(t.update(0x84356345), 0x184356345LL);
EXPECT_EQ(t.update(0xC5656565), 0x1C5656565LL);
EXPECT_EQ(t.update(0x01122112), 0x201122112LL); // Wrap around!
EXPECT_EQ(t.update(0xC5656565), 0x1C5656565LL);
EXPECT_EQ(t.update(0x84356345), 0x184356345LL);
EXPECT_EQ(t.update(0x42342323), 0x142342323LL);
EXPECT_EQ(t.update(0x01122112), 0x101122112LL);
EXPECT_EQ(t.update(0x84356345), 0x084356345LL);
EXPECT_EQ(t.update(0x42342323), 0x042342323LL);
EXPECT_EQ(t.update(0x03453455), 0x003453455LL);
}
}