tiny speedups

2016-10-19 23:03:26 -04:00 · 2016-10-19 23:03:26 -04:00 · 690fc5eff4
parent e914263dfb
commit 690fc5eff4
2 changed files with 24 additions and 28 deletions
--- a/dev_miner.h
+++ b/dev_miner.h
@ -324,13 +324,11 @@ struct equi {
    u32 nexthtunits;
    u32 dunits;
    u32 prevbo;
-    u32 nextbo;
  
    htlayout(equi *eq, u32 r): hta(eq->hta), prevhtunits(0), dunits(0) {
      u32 nexthashbytes = hashsize(r);
      nexthtunits = hashwords(nexthashbytes);
      prevbo = 0;
-      nextbo = nexthtunits * sizeof(htunit) - nexthashbytes; // 0-3
      if (r) {
        u32 prevhashbytes = hashsize(r-1);
        prevhtunits = hashwords(prevhashbytes);
@ -457,15 +455,15 @@ struct equi {
 #else
 #error not implemented
 #endif
-        bucket0 *buck = htl.hta.heap0 + bucketid; // optimize by updating previous buck?!
+        bucket0 *buck = htl.hta.heap0 + bucketid;
        const u32 slot = buck->getslot();
        if (slot >= NSLOTS) {
          bfull++;
          continue;
        }
-        htunit *s = buck->slots[slot]; // TRY OFFSETTING BY htl.nexthtunits
-        memcpy(s->bytes+htl.nextbo, ph+WN/8-hashbytes, hashbytes);
-        s[htl.nexthtunits].tag = tree(block * HASHESPERBLAKE + i);
+        htunit *s = buck->slots[slot] + htl.nexthtunits;
+        memcpy(s->bytes-hashbytes, ph+WN/8-hashbytes, hashbytes);
+        s->tag = tree(block * HASHESPERBLAKE + i);
      }
    }
  }
@ -475,10 +473,10 @@ struct equi {
    collisiondata cd;
    for (u32 bucketid=id; bucketid < NBUCKETS; bucketid += nthreads) {
      cd.clear();
-      bucket0 *buck = htl.hta.heap0 + bucketid; // optimize by updating previous buck?!
+      bucket0 *buck = htl.hta.heap0 + bucketid;
      u32 bsize = buck->getnslots();
      for (u32 s1 = 0; s1 < bsize; s1++) {
-        const htunit *slot1 = buck->slots[s1];          // optimize by updating previous slot1?!
+        const htunit *slot1 = buck->slots[s1];
        if (!cd.addslot(s1, htl.getxhash0(slot1))) {
          xfull++;
          continue;
@ -508,7 +506,7 @@ struct equi {
 #else
 #error not implemented
 #endif
-          bucket1 *xorbuck = htl.hta.heap1 + xorbucketid; // optimize by updating previous buck?!
+          bucket1 *xorbuck = htl.hta.heap1 + xorbucketid;
          const u32 xorslot = xorbuck->getslot();
          if (xorslot >= NSLOTS) {
            bfull++;
@ -516,8 +514,8 @@ struct equi {
          }
          htunit *xs = xorbuck->slots[xorslot];
          for (u32 i=htl.dunits; i < htl.prevhtunits; i++)
-            xs[i-htl.dunits].word = slot0[i].word ^ slot1[i].word;
-          xs[htl.nexthtunits].tag = tree(bucketid, s0, s1);
+            xs++->word = slot0[i].word ^ slot1[i].word;
+          xs->tag = tree(bucketid, s0, s1);
        }
      }
    }
@ -561,7 +559,7 @@ struct equi {
 #else
 #error not implemented
 #endif
-          bucket0 *xorbuck = htl.hta.heap0 + xorbucketid; // optimize by updating previous buck?!
+          bucket0 *xorbuck = htl.hta.heap0 + xorbucketid;
          const u32 xorslot = xorbuck->getslot();
          if (xorslot >= NSLOTS) {
            bfull++;
@ -569,8 +567,8 @@ struct equi {
          }
          htunit *xs = xorbuck->slots[xorslot];
          for (u32 i=htl.dunits; i < htl.prevhtunits; i++)
-            xs[i-htl.dunits].word = slot0[i].word ^ slot1[i].word;
-          xs[htl.nexthtunits].tag = tree(bucketid, s0, s1);
+            xs++->word = slot0[i].word ^ slot1[i].word;
+          xs->tag = tree(bucketid, s0, s1);
        }
      }
    }
--- a/equi_dev_miner.h
+++ b/equi_dev_miner.h
@ -310,13 +310,11 @@ struct equi {
    u32 nexthtunits;
    u32 dunits;
    u32 prevbo;
-    u32 nextbo;
  
    htlayout(equi *eq, u32 r): hta(eq->hta), prevhtunits(0), dunits(0) {
      u32 nexthashbytes = hashsize(r);
      nexthtunits = hashwords(nexthashbytes);
      prevbo = 0;
-      nextbo = nexthtunits * sizeof(htunit) - nexthashbytes; // 0-3
      if (r) {
        u32 prevhashbytes = hashsize(r-1);
        prevhtunits = hashwords(prevhashbytes);
@ -448,9 +446,9 @@ struct equi {
          bfull++;
          continue;
        }
-        htunit *s = hta.heap0[bucketid][slot]; // TRY OFFSETTING BY htl.nexthtunits
-        memcpy(s->bytes+htl.nextbo, ph+WN/8-hashbytes, hashbytes);
-        s[htl.nexthtunits].tag = tree(block * HASHESPERBLAKE + i);
+        htunit *s = hta.heap0[bucketid][slot] + htl.nexthtunits;
+        memcpy(s->bytes-hashbytes, ph+WN/8-hashbytes, hashbytes);
+        s->tag = tree(block * HASHESPERBLAKE + i);
      }
    }
  }
@ -460,10 +458,10 @@ struct equi {
    collisiondata cd;
    for (u32 bucketid=id; bucketid < NBUCKETS; bucketid += nthreads) {
      cd.clear();
-      slot0 *buck = htl.hta.heap0[bucketid]; // optimize by updating previous buck?!
-      u32 bsize = getnslots(r-1, bucketid);       // optimize by putting bucketsize with block?!
+      slot0 *buck = htl.hta.heap0[bucketid];
+      u32 bsize = getnslots(r-1, bucketid);
      for (u32 s1 = 0; s1 < bsize; s1++) {
-        const htunit *slot1 = buck[s1];          // optimize by updating previous slot1?!
+        const htunit *slot1 = buck[s1];
        if (!cd.addslot(s1, htl.getxhash0(slot1))) {
          xfull++;
          continue;
@ -500,8 +498,8 @@ struct equi {
          }
          htunit *xs = htl.hta.heap1[xorbucketid][xorslot];
          for (u32 i=htl.dunits; i < htl.prevhtunits; i++)
-            xs[i-htl.dunits].word = slot0[i].word ^ slot1[i].word;
-          xs[htl.nexthtunits].tag = tree(bucketid, s0, s1);
+            xs++->word = slot0[i].word ^ slot1[i].word;
+          xs->tag = tree(bucketid, s0, s1);
        }
      }
    }
@ -512,10 +510,10 @@ struct equi {
    collisiondata cd;
    for (u32 bucketid=id; bucketid < NBUCKETS; bucketid += nthreads) {
      cd.clear();
-      slot1 *buck = htl.hta.heap1[bucketid]; // OPTIMIZE BY UPDATING PREVIOUS
+      slot1 *buck = htl.hta.heap1[bucketid];
      u32 bsize = getnslots(r-1, bucketid);
      for (u32 s1 = 0; s1 < bsize; s1++) {
-        const htunit *slot1 = buck[s1];          // OPTIMIZE BY UPDATING PREVIOUS
+        const htunit *slot1 = buck[s1];
        if (!cd.addslot(s1, htl.getxhash1(slot1))) {
          xfull++;
          continue;
@ -552,8 +550,8 @@ struct equi {
          }
          htunit *xs = htl.hta.heap0[xorbucketid][xorslot];
          for (u32 i=htl.dunits; i < htl.prevhtunits; i++)
-            xs[i-htl.dunits].word = slot0[i].word ^ slot1[i].word;
-          xs[htl.nexthtunits].tag = tree(bucketid, s0, s1);
+            xs++->word = slot0[i].word ^ slot1[i].word;
+          xs->tag = tree(bucketid, s0, s1);
        }
      }
    }