optimize .count() method of bounded_bitset to leverage popcount special instructions. Confirmed to work for gcc in -msse4 flag is passed.

This commit is contained in:
Francisco 2021-03-11 11:29:00 +00:00 committed by Francisco Paisana
parent 7dcb703d06
commit 3b491ab06b
1 changed files with 5 additions and 3 deletions

View File

@ -164,10 +164,12 @@ public:
size_t result = 0;
for (size_t i = 0; i < nof_words_(); i++) {
// result += __builtin_popcountl(buffer[i]);
word_t w = buffer[i];
for (; w; w >>= 1u) {
result += (w & 1u);
// Note: use an "int" for count triggers popcount optimization if SSE instructions are enabled.
int c = 0;
for (word_t w = buffer[i]; w > 0; c++) {
w &= w - 1;
}
result += c;
}
return result;
}