diff --git a/Makefile b/Makefile index a40d7ad..16050c5 100644 --- a/Makefile +++ b/Makefile @@ -20,13 +20,13 @@ equi1g: equi.h equi_miner.h equi_miner.cpp Makefile g++ -g -std=c++11 -DLOGSPARK -DSPARKSCALE=11 equi_miner.cpp blake/blake2b.cpp -pthread -o equi1g eq1445: equi.h equi_miner.h equi_miner.cpp Makefile - $(GPP) -DRESTBITS=4 -DWN=144 -DWK=5 equi_miner.cpp blake/blake2b.cpp -o eq1445 + $(GPP) -DATOMIC -DRESTBITS=4 -DWN=144 -DWK=5 equi_miner.cpp blake/blake2b.cpp -o eq1445 eq14451: equi.h equi_miner.h equi_miner.cpp Makefile $(GPP) -DRESTBITS=4 -DWN=144 -DWK=5 equi_miner.cpp blake/blake2b.cpp -o eq14451 eq1445avx2: equi.h equi_miner.h equi_miner.cpp blake2-avx2/blake2bip.c Makefile - $(GPP) -DUSE_AVX2 -DRESTBITS=4 -DWN=144 -DWK=5 equi_miner.cpp blake/blake2b.cpp blake2-avx2/blake2bip.c -o eq1445avx2 + $(GPP) -DATOMIC -DUSE_AVX2 -DRESTBITS=4 -DWN=144 -DWK=5 equi_miner.cpp blake/blake2b.cpp blake2-avx2/blake2bip.c -o eq1445avx2 eq1445avx21: equi.h equi_miner.h equi_miner.cpp blake2-avx2/blake2bip.c Makefile $(GPP) -DUSE_AVX2 -DRESTBITS=4 -DWN=144 -DWK=5 equi_miner.cpp blake/blake2b.cpp blake2-avx2/blake2bip.c -o eq1445avx21 diff --git a/README.md b/README.md index 9e8babe..85317f1 100644 --- a/README.md +++ b/README.md @@ -63,3 +63,10 @@ Performance summary (on 4GHz i7-4790K and NVidia GTX980): - 8 x dev1: 20.6 Sol/s - eqcuda: 23.6 Sol/s + +And now, for something completely different: (144,5) + +- eq1445 -t 8: 1.0 Sol/s +- eq1445avx2 -t 8: 1.2 Sol/s + +- eqcuda1445: 2.2 Sol/s diff --git a/dev_miner.h b/dev_miner.h index 7af510b..a402982 100644 --- a/dev_miner.h +++ b/dev_miner.h @@ -927,7 +927,7 @@ struct equi { } } } - printf(" %d candidates ", nc); + // printf(" %d candidates ", nc); } }; diff --git a/equi_miner.cu b/equi_miner.cu index 807bc08..0b2aea5 100644 --- a/equi_miner.cu +++ b/equi_miner.cu @@ -38,8 +38,6 @@ inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=t // number of buckets static const u32 NBUCKETS = 1<hash, pslot1->hash)) continue; u32 xorbucketid; - u32 xhash; const uchar *bytes0 = pslot0->hash->bytes, *bytes1 = pslot1->hash->bytes; #if WN == 200 && BUCKBITS == 16 && RESTBITS == 4 && defined(XINTREE) xorbucketid = ((u32)(bytes0[htl.prevbo] ^ bytes1[htl.prevbo]) << 8) | (bytes0[htl.prevbo+1] ^ bytes1[htl.prevbo+1]); - xhash = (bytes0[htl.prevbo+2] ^ bytes1[htl.prevbo+2]) >> 4; + u32 xhash = (bytes0[htl.prevbo+2] ^ bytes1[htl.prevbo+2]) >> 4; #elif WN == 144 && BUCKBITS == 20 && RESTBITS == 4 xorbucketid = ((((u32)(bytes0[htl.prevbo+1] ^ bytes1[htl.prevbo+1]) << 8) | (bytes0[htl.prevbo+2] ^ bytes1[htl.prevbo+2])) << 4) @@ -600,6 +601,9 @@ __global__ void digitE(equi *eq, const u32 r) { } #ifdef UNROLL +// bucket mask +static const u32 BUCKMASK = NBUCKETS-1; + __global__ void digit_1(equi *eq) { equi::htlayout htl(eq, 1); equi::collisiondata cd; diff --git a/equi_miner.h b/equi_miner.h index 2d34e9d..dea1b2d 100644 --- a/equi_miner.h +++ b/equi_miner.h @@ -988,7 +988,7 @@ static const u32 NBLOCKS = (NHASHES+HASHESPERBLOCK-1)/HASHESPERBLOCK; } } } - printf(" %d candidates ", nc); + // printf(" %d candidates ", nc); } };