fixes and 1445 benches

This commit is contained in:
John Tromp 2016-10-27 16:45:07 -04:00
parent efe48b0ae0
commit 6e0d9baaae
5 changed files with 20 additions and 9 deletions

View File

@ -20,13 +20,13 @@ equi1g: equi.h equi_miner.h equi_miner.cpp Makefile
g++ -g -std=c++11 -DLOGSPARK -DSPARKSCALE=11 equi_miner.cpp blake/blake2b.cpp -pthread -o equi1g
eq1445: equi.h equi_miner.h equi_miner.cpp Makefile
$(GPP) -DRESTBITS=4 -DWN=144 -DWK=5 equi_miner.cpp blake/blake2b.cpp -o eq1445
$(GPP) -DATOMIC -DRESTBITS=4 -DWN=144 -DWK=5 equi_miner.cpp blake/blake2b.cpp -o eq1445
eq14451: equi.h equi_miner.h equi_miner.cpp Makefile
$(GPP) -DRESTBITS=4 -DWN=144 -DWK=5 equi_miner.cpp blake/blake2b.cpp -o eq14451
eq1445avx2: equi.h equi_miner.h equi_miner.cpp blake2-avx2/blake2bip.c Makefile
$(GPP) -DUSE_AVX2 -DRESTBITS=4 -DWN=144 -DWK=5 equi_miner.cpp blake/blake2b.cpp blake2-avx2/blake2bip.c -o eq1445avx2
$(GPP) -DATOMIC -DUSE_AVX2 -DRESTBITS=4 -DWN=144 -DWK=5 equi_miner.cpp blake/blake2b.cpp blake2-avx2/blake2bip.c -o eq1445avx2
eq1445avx21: equi.h equi_miner.h equi_miner.cpp blake2-avx2/blake2bip.c Makefile
$(GPP) -DUSE_AVX2 -DRESTBITS=4 -DWN=144 -DWK=5 equi_miner.cpp blake/blake2b.cpp blake2-avx2/blake2bip.c -o eq1445avx21

View File

@ -63,3 +63,10 @@ Performance summary (on 4GHz i7-4790K and NVidia GTX980):
- 8 x dev1: 20.6 Sol/s
- eqcuda: 23.6 Sol/s
And now, for something completely different: (144,5)
- eq1445 -t 8: 1.0 Sol/s
- eq1445avx2 -t 8: 1.2 Sol/s
- eqcuda1445: 2.2 Sol/s

View File

@ -927,7 +927,7 @@ struct equi {
}
}
}
printf(" %d candidates ", nc);
// printf(" %d candidates ", nc);
}
};

View File

@ -38,8 +38,6 @@ inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=t
// number of buckets
static const u32 NBUCKETS = 1<<BUCKBITS;
// bucket mask
static const u32 BUCKMASK = NBUCKETS-1;
// 2_log of number of slots per bucket
static const u32 SLOTBITS = RESTBITS+1+1;
static const u32 SLOTRANGE = 1<<SLOTBITS;
@ -67,10 +65,11 @@ struct tree {
__device__ tree(const u32 idx) {
bid_s0_s1_x = idx;
}
__device__ tree(const u32 bid, const u32 s0, const u32 s1, const u32 xh) {
#ifdef XINTREE
__device__ tree(const u32 bid, const u32 s0, const u32 s1, const u32 xh) {
bid_s0_s1_x = ((((bid << SLOTBITS) | s0) << SLOTBITS) | s1) << RESTBITS | xh;
#else
__device__ tree(const u32 bid, const u32 s0, const u32 s1) {
bid_s0_s1_x = (((bid << SLOTBITS) | s0) << SLOTBITS) | s1;
#endif
}
@ -240,6 +239,8 @@ struct equi {
listindices4(buck[t.slotid1()].attr, indices+size);
orderindices(indices, size);
}
#if WK == 9
__device__ void listindices6(const tree t, u32 *indices) {
const bucket1 &buck = hta.trees1[2][t.bucketid()];
const u32 size = 1 << 5;
@ -268,6 +269,7 @@ struct equi {
listindices8(buck[t.slotid1()].attr, indices+size);
orderindices(indices, size);
}
#endif
__device__ void candidate(const tree t) {
proof prf;
#if WK==9
@ -564,12 +566,11 @@ __global__ void digitE(equi *eq, const u32 r) {
if (htl.equal(pslot0->hash, pslot1->hash))
continue;
u32 xorbucketid;
u32 xhash;
const uchar *bytes0 = pslot0->hash->bytes, *bytes1 = pslot1->hash->bytes;
#if WN == 200 && BUCKBITS == 16 && RESTBITS == 4 && defined(XINTREE)
xorbucketid = ((u32)(bytes0[htl.prevbo] ^ bytes1[htl.prevbo]) << 8)
| (bytes0[htl.prevbo+1] ^ bytes1[htl.prevbo+1]);
xhash = (bytes0[htl.prevbo+2] ^ bytes1[htl.prevbo+2]) >> 4;
u32 xhash = (bytes0[htl.prevbo+2] ^ bytes1[htl.prevbo+2]) >> 4;
#elif WN == 144 && BUCKBITS == 20 && RESTBITS == 4
xorbucketid = ((((u32)(bytes0[htl.prevbo+1] ^ bytes1[htl.prevbo+1]) << 8)
| (bytes0[htl.prevbo+2] ^ bytes1[htl.prevbo+2])) << 4)
@ -600,6 +601,9 @@ __global__ void digitE(equi *eq, const u32 r) {
}
#ifdef UNROLL
// bucket mask
static const u32 BUCKMASK = NBUCKETS-1;
__global__ void digit_1(equi *eq) {
equi::htlayout htl(eq, 1);
equi::collisiondata cd;

View File

@ -988,7 +988,7 @@ static const u32 NBLOCKS = (NHASHES+HASHESPERBLOCK-1)/HASHESPERBLOCK;
}
}
}
printf(" %d candidates ", nc);
// printf(" %d candidates ", nc);
}
};