fixes and 1445 benches

2016-10-27 16:45:07 -04:00 · 2016-10-27 16:45:07 -04:00 · 6e0d9baaae
parent efe48b0ae0
commit 6e0d9baaae
5 changed files with 20 additions and 9 deletions
--- a/4
+++ b/4
@ -20,13 +20,13 @@ equi1g:	equi.h equi_miner.h equi_miner.cpp Makefile
 	g++ -g -std=c++11 -DLOGSPARK -DSPARKSCALE=11 equi_miner.cpp blake/blake2b.cpp -pthread -o equi1g

 eq1445:	equi.h equi_miner.h equi_miner.cpp Makefile
-	$(GPP) -DRESTBITS=4 -DWN=144 -DWK=5 equi_miner.cpp blake/blake2b.cpp -o eq1445
+	$(GPP) -DATOMIC -DRESTBITS=4 -DWN=144 -DWK=5 equi_miner.cpp blake/blake2b.cpp -o eq1445

 eq14451:	equi.h equi_miner.h equi_miner.cpp Makefile
 	$(GPP) -DRESTBITS=4 -DWN=144 -DWK=5 equi_miner.cpp blake/blake2b.cpp -o eq14451

 eq1445avx2:	equi.h equi_miner.h equi_miner.cpp blake2-avx2/blake2bip.c Makefile
-	$(GPP) -DUSE_AVX2 -DRESTBITS=4 -DWN=144 -DWK=5 equi_miner.cpp blake/blake2b.cpp blake2-avx2/blake2bip.c -o eq1445avx2
+	$(GPP) -DATOMIC -DUSE_AVX2 -DRESTBITS=4 -DWN=144 -DWK=5 equi_miner.cpp blake/blake2b.cpp blake2-avx2/blake2bip.c -o eq1445avx2

 eq1445avx21:	equi.h equi_miner.h equi_miner.cpp blake2-avx2/blake2bip.c Makefile
 	$(GPP) -DUSE_AVX2 -DRESTBITS=4 -DWN=144 -DWK=5 equi_miner.cpp blake/blake2b.cpp blake2-avx2/blake2bip.c -o eq1445avx21
--- a/README.md
+++ b/README.md
@ -63,3 +63,10 @@ Performance summary (on 4GHz i7-4790K and NVidia GTX980):
 - 8 x dev1:    20.6 Sol/s

 - eqcuda:      23.6 Sol/s
+
+And now, for something completely different: (144,5)
+
+- eq1445 -t 8:     1.0 Sol/s
+- eq1445avx2 -t 8: 1.2 Sol/s
+
+- eqcuda1445:      2.2 Sol/s
--- a/dev_miner.h
+++ b/dev_miner.h
@ -927,7 +927,7 @@ struct equi {
        }
      }
    }
-    printf(" %d candidates ", nc);
+    // printf(" %d candidates ", nc);
  }
 };

--- a/equi_miner.cu
+++ b/equi_miner.cu
@ -38,8 +38,6 @@ inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=t

 // number of buckets
 static const u32 NBUCKETS = 1<<BUCKBITS;
-// bucket mask
-static const u32 BUCKMASK = NBUCKETS-1;
 // 2_log of number of slots per bucket
 static const u32 SLOTBITS = RESTBITS+1+1;
 static const u32 SLOTRANGE = 1<<SLOTBITS;
@ -67,10 +65,11 @@ struct tree {
  __device__ tree(const u32 idx) {
    bid_s0_s1_x = idx;
  }
-  __device__ tree(const u32 bid, const u32 s0, const u32 s1, const u32 xh) {
 #ifdef XINTREE
+  __device__ tree(const u32 bid, const u32 s0, const u32 s1, const u32 xh) {
  bid_s0_s1_x = ((((bid << SLOTBITS) | s0) << SLOTBITS) | s1) << RESTBITS | xh;
 #else
+  __device__ tree(const u32 bid, const u32 s0, const u32 s1) {
  bid_s0_s1_x = (((bid << SLOTBITS) | s0) << SLOTBITS) | s1;
 #endif
  }
@ -240,6 +239,8 @@ struct equi {
    listindices4(buck[t.slotid1()].attr, indices+size);
    orderindices(indices, size);
  }
+
+#if WK == 9
  __device__ void listindices6(const tree t, u32 *indices) {
    const bucket1 &buck = hta.trees1[2][t.bucketid()];
    const u32 size = 1 << 5;
@ -268,6 +269,7 @@ struct equi {
    listindices8(buck[t.slotid1()].attr, indices+size);
    orderindices(indices, size);
  }
+#endif
  __device__ void candidate(const tree t) {
    proof prf;
 #if WK==9
@ -564,12 +566,11 @@ __global__ void digitE(equi *eq, const u32 r) {
        if (htl.equal(pslot0->hash, pslot1->hash))
          continue;
        u32 xorbucketid;
-        u32 xhash;
        const uchar *bytes0 = pslot0->hash->bytes, *bytes1 = pslot1->hash->bytes;
 #if WN == 200 && BUCKBITS == 16 && RESTBITS == 4 && defined(XINTREE)
        xorbucketid = ((u32)(bytes0[htl.prevbo] ^ bytes1[htl.prevbo]) << 8)
                        | (bytes0[htl.prevbo+1] ^ bytes1[htl.prevbo+1]);
-                  xhash = (bytes0[htl.prevbo+2] ^ bytes1[htl.prevbo+2]) >> 4;
+        u32 xhash = (bytes0[htl.prevbo+2] ^ bytes1[htl.prevbo+2]) >> 4;
 #elif WN == 144 && BUCKBITS == 20 && RESTBITS == 4
        xorbucketid = ((((u32)(bytes0[htl.prevbo+1] ^ bytes1[htl.prevbo+1]) << 8)
                            | (bytes0[htl.prevbo+2] ^ bytes1[htl.prevbo+2])) << 4)
@ -600,6 +601,9 @@ __global__ void digitE(equi *eq, const u32 r) {
 }

 #ifdef UNROLL
+// bucket mask
+static const u32 BUCKMASK = NBUCKETS-1;
+
 __global__ void digit_1(equi *eq) {
  equi::htlayout htl(eq, 1);
  equi::collisiondata cd;
--- a/equi_miner.h
+++ b/equi_miner.h
@ -988,7 +988,7 @@ static const u32 NBLOCKS = (NHASHES+HASHESPERBLOCK-1)/HASHESPERBLOCK;
        }
      }
    }
-    printf(" %d candidates ", nc);
+    // printf(" %d candidates ", nc);
  }
 };