tiny speedups

This commit is contained in:
tromp 2016-10-19 23:03:26 -04:00
parent e914263dfb
commit 690fc5eff4
2 changed files with 24 additions and 28 deletions

View File

@ -324,13 +324,11 @@ struct equi {
u32 nexthtunits;
u32 dunits;
u32 prevbo;
u32 nextbo;
htlayout(equi *eq, u32 r): hta(eq->hta), prevhtunits(0), dunits(0) {
u32 nexthashbytes = hashsize(r);
nexthtunits = hashwords(nexthashbytes);
prevbo = 0;
nextbo = nexthtunits * sizeof(htunit) - nexthashbytes; // 0-3
if (r) {
u32 prevhashbytes = hashsize(r-1);
prevhtunits = hashwords(prevhashbytes);
@ -457,15 +455,15 @@ struct equi {
#else
#error not implemented
#endif
bucket0 *buck = htl.hta.heap0 + bucketid; // optimize by updating previous buck?!
bucket0 *buck = htl.hta.heap0 + bucketid;
const u32 slot = buck->getslot();
if (slot >= NSLOTS) {
bfull++;
continue;
}
htunit *s = buck->slots[slot]; // TRY OFFSETTING BY htl.nexthtunits
memcpy(s->bytes+htl.nextbo, ph+WN/8-hashbytes, hashbytes);
s[htl.nexthtunits].tag = tree(block * HASHESPERBLAKE + i);
htunit *s = buck->slots[slot] + htl.nexthtunits;
memcpy(s->bytes-hashbytes, ph+WN/8-hashbytes, hashbytes);
s->tag = tree(block * HASHESPERBLAKE + i);
}
}
}
@ -475,10 +473,10 @@ struct equi {
collisiondata cd;
for (u32 bucketid=id; bucketid < NBUCKETS; bucketid += nthreads) {
cd.clear();
bucket0 *buck = htl.hta.heap0 + bucketid; // optimize by updating previous buck?!
bucket0 *buck = htl.hta.heap0 + bucketid;
u32 bsize = buck->getnslots();
for (u32 s1 = 0; s1 < bsize; s1++) {
const htunit *slot1 = buck->slots[s1]; // optimize by updating previous slot1?!
const htunit *slot1 = buck->slots[s1];
if (!cd.addslot(s1, htl.getxhash0(slot1))) {
xfull++;
continue;
@ -508,7 +506,7 @@ struct equi {
#else
#error not implemented
#endif
bucket1 *xorbuck = htl.hta.heap1 + xorbucketid; // optimize by updating previous buck?!
bucket1 *xorbuck = htl.hta.heap1 + xorbucketid;
const u32 xorslot = xorbuck->getslot();
if (xorslot >= NSLOTS) {
bfull++;
@ -516,8 +514,8 @@ struct equi {
}
htunit *xs = xorbuck->slots[xorslot];
for (u32 i=htl.dunits; i < htl.prevhtunits; i++)
xs[i-htl.dunits].word = slot0[i].word ^ slot1[i].word;
xs[htl.nexthtunits].tag = tree(bucketid, s0, s1);
xs++->word = slot0[i].word ^ slot1[i].word;
xs->tag = tree(bucketid, s0, s1);
}
}
}
@ -561,7 +559,7 @@ struct equi {
#else
#error not implemented
#endif
bucket0 *xorbuck = htl.hta.heap0 + xorbucketid; // optimize by updating previous buck?!
bucket0 *xorbuck = htl.hta.heap0 + xorbucketid;
const u32 xorslot = xorbuck->getslot();
if (xorslot >= NSLOTS) {
bfull++;
@ -569,8 +567,8 @@ struct equi {
}
htunit *xs = xorbuck->slots[xorslot];
for (u32 i=htl.dunits; i < htl.prevhtunits; i++)
xs[i-htl.dunits].word = slot0[i].word ^ slot1[i].word;
xs[htl.nexthtunits].tag = tree(bucketid, s0, s1);
xs++->word = slot0[i].word ^ slot1[i].word;
xs->tag = tree(bucketid, s0, s1);
}
}
}

View File

@ -310,13 +310,11 @@ struct equi {
u32 nexthtunits;
u32 dunits;
u32 prevbo;
u32 nextbo;
htlayout(equi *eq, u32 r): hta(eq->hta), prevhtunits(0), dunits(0) {
u32 nexthashbytes = hashsize(r);
nexthtunits = hashwords(nexthashbytes);
prevbo = 0;
nextbo = nexthtunits * sizeof(htunit) - nexthashbytes; // 0-3
if (r) {
u32 prevhashbytes = hashsize(r-1);
prevhtunits = hashwords(prevhashbytes);
@ -448,9 +446,9 @@ struct equi {
bfull++;
continue;
}
htunit *s = hta.heap0[bucketid][slot]; // TRY OFFSETTING BY htl.nexthtunits
memcpy(s->bytes+htl.nextbo, ph+WN/8-hashbytes, hashbytes);
s[htl.nexthtunits].tag = tree(block * HASHESPERBLAKE + i);
htunit *s = hta.heap0[bucketid][slot] + htl.nexthtunits;
memcpy(s->bytes-hashbytes, ph+WN/8-hashbytes, hashbytes);
s->tag = tree(block * HASHESPERBLAKE + i);
}
}
}
@ -460,10 +458,10 @@ struct equi {
collisiondata cd;
for (u32 bucketid=id; bucketid < NBUCKETS; bucketid += nthreads) {
cd.clear();
slot0 *buck = htl.hta.heap0[bucketid]; // optimize by updating previous buck?!
u32 bsize = getnslots(r-1, bucketid); // optimize by putting bucketsize with block?!
slot0 *buck = htl.hta.heap0[bucketid];
u32 bsize = getnslots(r-1, bucketid);
for (u32 s1 = 0; s1 < bsize; s1++) {
const htunit *slot1 = buck[s1]; // optimize by updating previous slot1?!
const htunit *slot1 = buck[s1];
if (!cd.addslot(s1, htl.getxhash0(slot1))) {
xfull++;
continue;
@ -500,8 +498,8 @@ struct equi {
}
htunit *xs = htl.hta.heap1[xorbucketid][xorslot];
for (u32 i=htl.dunits; i < htl.prevhtunits; i++)
xs[i-htl.dunits].word = slot0[i].word ^ slot1[i].word;
xs[htl.nexthtunits].tag = tree(bucketid, s0, s1);
xs++->word = slot0[i].word ^ slot1[i].word;
xs->tag = tree(bucketid, s0, s1);
}
}
}
@ -512,10 +510,10 @@ struct equi {
collisiondata cd;
for (u32 bucketid=id; bucketid < NBUCKETS; bucketid += nthreads) {
cd.clear();
slot1 *buck = htl.hta.heap1[bucketid]; // OPTIMIZE BY UPDATING PREVIOUS
slot1 *buck = htl.hta.heap1[bucketid];
u32 bsize = getnslots(r-1, bucketid);
for (u32 s1 = 0; s1 < bsize; s1++) {
const htunit *slot1 = buck[s1]; // OPTIMIZE BY UPDATING PREVIOUS
const htunit *slot1 = buck[s1];
if (!cd.addslot(s1, htl.getxhash1(slot1))) {
xfull++;
continue;
@ -552,8 +550,8 @@ struct equi {
}
htunit *xs = htl.hta.heap0[xorbucketid][xorslot];
for (u32 i=htl.dunits; i < htl.prevhtunits; i++)
xs[i-htl.dunits].word = slot0[i].word ^ slot1[i].word;
xs[htl.nexthtunits].tag = tree(bucketid, s0, s1);
xs++->word = slot0[i].word ^ slot1[i].word;
xs->tag = tree(bucketid, s0, s1);
}
}
}