From 8c88c49facc2a89659a96bd8abab26272b79005e Mon Sep 17 00:00:00 2001 From: Giovanni Di Sirio Date: Fri, 11 Jun 2021 10:04:12 +0000 Subject: [PATCH] Performance improvements thanks to code paths tuning using likely/unlikely macros. git-svn-id: svn://svn.code.sf.net/p/chibios/svn/trunk@14528 27425a3e-05d8-49a3-a47f-9c15f0e5edd8 --- .../benchmarks/rt-stm32g4-170mhz-gcc.txt | 18 +++++++------- os/rt/src/chschd.c | 24 ++++++++++++------- release_note_next.txt | 3 +++ 3 files changed, 28 insertions(+), 17 deletions(-) diff --git a/doc/rt/reports/benchmarks/rt-stm32g4-170mhz-gcc.txt b/doc/rt/reports/benchmarks/rt-stm32g4-170mhz-gcc.txt index ba5f5df27..5f4573569 100644 --- a/doc/rt/reports/benchmarks/rt-stm32g4-170mhz-gcc.txt +++ b/doc/rt/reports/benchmarks/rt-stm32g4-170mhz-gcc.txt @@ -1,11 +1,11 @@ *** ChibiOS/RT Test Suite *** -*** Compiled: Jun 7 2021 - 08:51:33 +*** Compiled: Jun 11 2021 - 11:12:01 *** Platform: STM32G4 Hi-resolution Line *** Test Board: STMicroelectronics STM32 Nucleo64-G474RE *** -*** Text size: 51148 bytes +*** Text size: 50964 bytes *** RO data size: 9904 bytes *** Data size: 220 bytes *** BSS size: 5400 bytes @@ -189,7 +189,7 @@ === Test Sequence 12 (Benchmarks) ---------------------------------------------------------------------------- --- Test Case 12.1 (Messages performance #1) ---- Score : 904248 msgs/S, 1808496 ctxswc/S +--- Score : 923906 msgs/S, 1847812 ctxswc/S --- Result: SUCCESS ---------------------------------------------------------------------------- --- Test Case 12.2 (Messages performance #2) @@ -201,7 +201,7 @@ --- Result: SUCCESS ---------------------------------------------------------------------------- --- Test Case 12.4 (Context Switch performance) ---- Score : 2821568 ctxswc/S +--- Score : 2893608 ctxswc/S --- Result: SUCCESS ---------------------------------------------------------------------------- --- Test Case 12.5 (Threads performance, full cycle) @@ -209,15 +209,15 @@ --- Result: SUCCESS ---------------------------------------------------------------------------- --- Test Case 12.6 (Threads performance, create/exit only) ---- Score : 634325 threads/S +--- Score : 639095 threads/S --- Result: SUCCESS ---------------------------------------------------------------------------- --- Test Case 12.7 (Mass reschedule performance) ---- Score : 288135 reschedules/S, 1728810 ctxswc/S +--- Score : 289115 reschedules/S, 1734690 ctxswc/S --- Result: SUCCESS ---------------------------------------------------------------------------- --- Test Case 12.8 (Round-Robin voluntary reschedule) ---- Score : 1748060 ctxswc/S +--- Score : 1752560 ctxswc/S --- Result: SUCCESS ---------------------------------------------------------------------------- --- Test Case 12.9 (Virtual Timers set/reset performance) @@ -249,11 +249,11 @@ Final result: SUCCESS *** ChibiOS OS Library Test Suite *** -*** Compiled: Jun 7 2021 - 08:51:33 +*** Compiled: Jun 11 2021 - 11:12:01 *** Platform: STM32G4 Hi-resolution Line *** Test Board: STMicroelectronics STM32 Nucleo64-G474RE *** -*** Text size: 51148 bytes +*** Text size: 50964 bytes *** RO data size: 9904 bytes *** Data size: 220 bytes *** BSS size: 5400 bytes diff --git a/os/rt/src/chschd.c b/os/rt/src/chschd.c index 88bddc532..659a99e11 100644 --- a/os/rt/src/chschd.c +++ b/os/rt/src/chschd.c @@ -414,11 +414,13 @@ void chSchWakeupS(thread_t *ntp, msg_t msg) { } #endif - /* If the waken thread has a not-greater priority than the current + /* If the woken thread has a not-greater priority than the current one then it is just inserted in the ready list else it made running immediately and the invoking thread goes in the ready - list instead.*/ - if (ntp->hdr.pqueue.prio <= otp->hdr.pqueue.prio) { + list instead. + Note, we are favoring the path where the woken thread has higher + priority.*/ + if (unlikely(ntp->hdr.pqueue.prio <= otp->hdr.pqueue.prio)) { (void) __sch_ready_behind(ntp); } else { @@ -455,7 +457,9 @@ void chSchRescheduleS(void) { chDbgCheckClassS(); - if (firstprio(&oip->rlist.pqueue) > tp->hdr.pqueue.prio) { + /* Note, we are favoring the path where the reschedule is necessary + because higher priority threads are ready.*/ + if (likely(firstprio(&oip->rlist.pqueue) > tp->hdr.pqueue.prio)) { __sch_reschedule_ahead(); } } @@ -563,19 +567,21 @@ void chSchPreemption(void) { tprio_t p1 = firstprio(&oip->rlist.pqueue); tprio_t p2 = tp->hdr.pqueue.prio; + /* Note, we are favoring the path where preemption is necessary + because higher priority threads are ready.*/ #if CH_CFG_TIME_QUANTUM > 0 if (tp->ticks > (tslices_t)0) { - if (p1 > p2) { + if (likely(p1 > p2)) { __sch_reschedule_ahead(); } } else { - if (p1 >= p2) { + if (likely(p1 >= p2)) { __sch_reschedule_behind(); } } #else /* CH_CFG_TIME_QUANTUM == 0 */ - if (p1 > p2) { + if (likely(p1 > p2)) { __sch_reschedule_ahead(); } #endif /* CH_CFG_TIME_QUANTUM == 0 */ @@ -595,7 +601,9 @@ void chSchDoYieldS(void) { chDbgCheckClassS(); - if (firstprio(&oip->rlist.pqueue) >= tp->hdr.pqueue.prio) { + /* If this function has been called then it is likely there are threads + at same priority level.*/ + if (likely(firstprio(&oip->rlist.pqueue) >= tp->hdr.pqueue.prio)) { __sch_reschedule_behind(); } } diff --git a/release_note_next.txt b/release_note_next.txt index 2a9715e90..55c27dc0d 100644 --- a/release_note_next.txt +++ b/release_note_next.txt @@ -47,6 +47,9 @@ integrated in our structure: WolfSSL, FatFS and lwIP. - Support for full-SMP multi-core threading. - Support for decoupled multi-core threading. +- Performance improvements thanks to code paths tuning using likely/unlikely + macros. This feature requires compiler support and is currently enabled + for GCC. - 64 bits monotonic time stamps with the same resolution of system time. - Much more efficient and accurate Virtual Timers in tick-less mode. - Automatic reload of Virtual Timers, now both one-shot and continuous timers