Make getTimeNowNt even faster (#3504)
The last version is already much better than the original, mostly because it doesn't call CriticalSectionLocker, which has a ton of overhead due to debug tracking. But this version is another 4 instructions / 12 bytes shorter. Does as much match in 32-bit land as possible, and avoids math operations that span 64-bits (i.e. either operate on the lower half or upper half, but not both). The result is only 3 instructions not including the necessary 4 loads (ptr to now, now, ptr to upper, upper), 1 store (upper), and 1 return/branch.
This commit is contained in:
parent
2166a5ba16
commit
8639b7c7e4
|
@ -64,23 +64,10 @@ efitimeus_t getTimeNowUs(void) {
|
|||
}
|
||||
|
||||
|
||||
// this is bits 30-61, not 32-63. We only support 62-bit time. You can fire me in 36,533 years
|
||||
// (1,461 on the simulator).
|
||||
static volatile uint32_t upperTimeNt = 0;
|
||||
static WrapAround62 timeNt;
|
||||
|
||||
efitick_t getTimeNowNt() {
|
||||
// Shift cannot be 31, as we wouldn't be able to tell if time is moving forward or backward
|
||||
// relative to upperTimeNt. We do need to handle both directions as our "thread" can be
|
||||
// racing with other "threads" in sampling stamp and updating upperTimeNt.
|
||||
constexpr unsigned shift = 30;
|
||||
|
||||
uint32_t stamp = getTimeNowLowerNt();
|
||||
uint32_t upper = upperTimeNt;
|
||||
uint32_t relative_unsigned = stamp - (upper << shift);
|
||||
efitick_t time64 = (efitick_t(upper) << shift) + (int32_t)relative_unsigned;
|
||||
upperTimeNt = time64 >> shift;
|
||||
|
||||
return time64;
|
||||
return timeNt.update(getTimeNowLowerNt());
|
||||
}
|
||||
#endif /* !EFI_UNIT_TEST */
|
||||
|
||||
|
|
|
@ -51,6 +51,37 @@
|
|||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
|
||||
/**
|
||||
* Provide a 62-bit counter from a 32-bit counter source that wraps around.
|
||||
*
|
||||
* If you'd like it use it with a 16-bit counter, shift the source by 16 before passing it here.
|
||||
* This class is thread/interrupt-safe.
|
||||
*/
|
||||
struct WrapAround62 {
|
||||
uint64_t update(uint32_t source) {
|
||||
// Shift cannot be 31, as we wouldn't be able to tell if time is moving forward or
|
||||
// backward relative to m_upper. We do need to handle both directions as our
|
||||
// "thread" can be racing with other "threads" in sampling stamp and updating
|
||||
// m_upper.
|
||||
constexpr unsigned shift = 30;
|
||||
|
||||
uint32_t upper = m_upper;
|
||||
uint32_t relative_unsigned = source - (upper << shift);
|
||||
upper += int32_t(relative_unsigned) >> shift;
|
||||
m_upper = upper;
|
||||
|
||||
// Yes we could just do upper<<shift, but then the result would span both halves of
|
||||
// the 64-bit result. Doing it this way means we only operate on one half at a
|
||||
// time. Source will supply those bits anyways, so we don't need them from
|
||||
// upper...
|
||||
return (efitick_t(upper >> (32 - shift)) << 32) | source;
|
||||
}
|
||||
|
||||
private:
|
||||
volatile uint32_t m_upper = 0;
|
||||
};
|
||||
|
||||
#endif /* __cplusplus */
|
||||
|
||||
|
||||
|
|
|
@ -499,3 +499,69 @@ TEST(util, PeakDetect) {
|
|||
// Small value past the timeout is used
|
||||
EXPECT_EQ(dut.detect(500, startTime + timeout + 1), 500);
|
||||
}
|
||||
|
||||
TEST(util, WrapAround62) {
|
||||
// Random test
|
||||
{
|
||||
WrapAround62 t;
|
||||
uint32_t source = 0;
|
||||
uint64_t actual = 0;
|
||||
|
||||
// Test random progression, positive and negative.
|
||||
uint32_t seed = time(NULL);
|
||||
printf("Testing with seed 0x%08x\n", seed);
|
||||
srandom(seed);
|
||||
for (unsigned i = 0; i < 10000; i++) {
|
||||
int32_t delta = random();
|
||||
if (delta < 0) {
|
||||
delta = ~delta;
|
||||
}
|
||||
delta -= RAND_MAX >> 1;
|
||||
|
||||
// Cap negative test
|
||||
if (delta < 0 && -delta > actual) {
|
||||
delta = -actual;
|
||||
}
|
||||
|
||||
source += delta;
|
||||
actual += delta;
|
||||
|
||||
uint64_t next = t.update(source);
|
||||
EXPECT_EQ(actual, next);
|
||||
}
|
||||
}
|
||||
|
||||
// More pointed test for expected edge conditions
|
||||
{
|
||||
WrapAround62 t;
|
||||
|
||||
EXPECT_EQ(t.update(0x03453455), 0x003453455LL);
|
||||
EXPECT_EQ(t.update(0x42342323), 0x042342323LL);
|
||||
EXPECT_EQ(t.update(0x84356345), 0x084356345LL);
|
||||
EXPECT_EQ(t.update(0x42342323), 0x042342323LL);
|
||||
EXPECT_EQ(t.update(0x84356345), 0x084356345LL);
|
||||
EXPECT_EQ(t.update(0xC5656565), 0x0C5656565LL);
|
||||
EXPECT_EQ(t.update(0x01122112), 0x101122112LL); // Wrap around!
|
||||
EXPECT_EQ(t.update(0xC5656565), 0x0C5656565LL);
|
||||
EXPECT_EQ(t.update(0x84356345), 0x084356345LL);
|
||||
EXPECT_EQ(t.update(0xC5656565), 0x0C5656565LL);
|
||||
EXPECT_EQ(t.update(0x01122112), 0x101122112LL); // Wrap around!
|
||||
EXPECT_EQ(t.update(0x42342323), 0x142342323LL);
|
||||
EXPECT_EQ(t.update(0x84356345), 0x184356345LL);
|
||||
EXPECT_EQ(t.update(0x42342323), 0x142342323LL);
|
||||
EXPECT_EQ(t.update(0x84356345), 0x184356345LL);
|
||||
EXPECT_EQ(t.update(0xC5656565), 0x1C5656565LL);
|
||||
EXPECT_EQ(t.update(0x01122112), 0x201122112LL); // Wrap around!
|
||||
EXPECT_EQ(t.update(0xC5656565), 0x1C5656565LL);
|
||||
EXPECT_EQ(t.update(0x84356345), 0x184356345LL);
|
||||
EXPECT_EQ(t.update(0xC5656565), 0x1C5656565LL);
|
||||
EXPECT_EQ(t.update(0x01122112), 0x201122112LL); // Wrap around!
|
||||
EXPECT_EQ(t.update(0xC5656565), 0x1C5656565LL);
|
||||
EXPECT_EQ(t.update(0x84356345), 0x184356345LL);
|
||||
EXPECT_EQ(t.update(0x42342323), 0x142342323LL);
|
||||
EXPECT_EQ(t.update(0x01122112), 0x101122112LL);
|
||||
EXPECT_EQ(t.update(0x84356345), 0x084356345LL);
|
||||
EXPECT_EQ(t.update(0x42342323), 0x042342323LL);
|
||||
EXPECT_EQ(t.update(0x03453455), 0x003453455LL);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue