fixes and 1445 benches
This commit is contained in:
parent
efe48b0ae0
commit
6e0d9baaae
4
Makefile
4
Makefile
|
@ -20,13 +20,13 @@ equi1g: equi.h equi_miner.h equi_miner.cpp Makefile
|
|||
g++ -g -std=c++11 -DLOGSPARK -DSPARKSCALE=11 equi_miner.cpp blake/blake2b.cpp -pthread -o equi1g
|
||||
|
||||
eq1445: equi.h equi_miner.h equi_miner.cpp Makefile
|
||||
$(GPP) -DRESTBITS=4 -DWN=144 -DWK=5 equi_miner.cpp blake/blake2b.cpp -o eq1445
|
||||
$(GPP) -DATOMIC -DRESTBITS=4 -DWN=144 -DWK=5 equi_miner.cpp blake/blake2b.cpp -o eq1445
|
||||
|
||||
eq14451: equi.h equi_miner.h equi_miner.cpp Makefile
|
||||
$(GPP) -DRESTBITS=4 -DWN=144 -DWK=5 equi_miner.cpp blake/blake2b.cpp -o eq14451
|
||||
|
||||
eq1445avx2: equi.h equi_miner.h equi_miner.cpp blake2-avx2/blake2bip.c Makefile
|
||||
$(GPP) -DUSE_AVX2 -DRESTBITS=4 -DWN=144 -DWK=5 equi_miner.cpp blake/blake2b.cpp blake2-avx2/blake2bip.c -o eq1445avx2
|
||||
$(GPP) -DATOMIC -DUSE_AVX2 -DRESTBITS=4 -DWN=144 -DWK=5 equi_miner.cpp blake/blake2b.cpp blake2-avx2/blake2bip.c -o eq1445avx2
|
||||
|
||||
eq1445avx21: equi.h equi_miner.h equi_miner.cpp blake2-avx2/blake2bip.c Makefile
|
||||
$(GPP) -DUSE_AVX2 -DRESTBITS=4 -DWN=144 -DWK=5 equi_miner.cpp blake/blake2b.cpp blake2-avx2/blake2bip.c -o eq1445avx21
|
||||
|
|
|
@ -63,3 +63,10 @@ Performance summary (on 4GHz i7-4790K and NVidia GTX980):
|
|||
- 8 x dev1: 20.6 Sol/s
|
||||
|
||||
- eqcuda: 23.6 Sol/s
|
||||
|
||||
And now, for something completely different: (144,5)
|
||||
|
||||
- eq1445 -t 8: 1.0 Sol/s
|
||||
- eq1445avx2 -t 8: 1.2 Sol/s
|
||||
|
||||
- eqcuda1445: 2.2 Sol/s
|
||||
|
|
|
@ -927,7 +927,7 @@ struct equi {
|
|||
}
|
||||
}
|
||||
}
|
||||
printf(" %d candidates ", nc);
|
||||
// printf(" %d candidates ", nc);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -38,8 +38,6 @@ inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=t
|
|||
|
||||
// number of buckets
|
||||
static const u32 NBUCKETS = 1<<BUCKBITS;
|
||||
// bucket mask
|
||||
static const u32 BUCKMASK = NBUCKETS-1;
|
||||
// 2_log of number of slots per bucket
|
||||
static const u32 SLOTBITS = RESTBITS+1+1;
|
||||
static const u32 SLOTRANGE = 1<<SLOTBITS;
|
||||
|
@ -67,10 +65,11 @@ struct tree {
|
|||
__device__ tree(const u32 idx) {
|
||||
bid_s0_s1_x = idx;
|
||||
}
|
||||
__device__ tree(const u32 bid, const u32 s0, const u32 s1, const u32 xh) {
|
||||
#ifdef XINTREE
|
||||
__device__ tree(const u32 bid, const u32 s0, const u32 s1, const u32 xh) {
|
||||
bid_s0_s1_x = ((((bid << SLOTBITS) | s0) << SLOTBITS) | s1) << RESTBITS | xh;
|
||||
#else
|
||||
__device__ tree(const u32 bid, const u32 s0, const u32 s1) {
|
||||
bid_s0_s1_x = (((bid << SLOTBITS) | s0) << SLOTBITS) | s1;
|
||||
#endif
|
||||
}
|
||||
|
@ -240,6 +239,8 @@ struct equi {
|
|||
listindices4(buck[t.slotid1()].attr, indices+size);
|
||||
orderindices(indices, size);
|
||||
}
|
||||
|
||||
#if WK == 9
|
||||
__device__ void listindices6(const tree t, u32 *indices) {
|
||||
const bucket1 &buck = hta.trees1[2][t.bucketid()];
|
||||
const u32 size = 1 << 5;
|
||||
|
@ -268,6 +269,7 @@ struct equi {
|
|||
listindices8(buck[t.slotid1()].attr, indices+size);
|
||||
orderindices(indices, size);
|
||||
}
|
||||
#endif
|
||||
__device__ void candidate(const tree t) {
|
||||
proof prf;
|
||||
#if WK==9
|
||||
|
@ -564,12 +566,11 @@ __global__ void digitE(equi *eq, const u32 r) {
|
|||
if (htl.equal(pslot0->hash, pslot1->hash))
|
||||
continue;
|
||||
u32 xorbucketid;
|
||||
u32 xhash;
|
||||
const uchar *bytes0 = pslot0->hash->bytes, *bytes1 = pslot1->hash->bytes;
|
||||
#if WN == 200 && BUCKBITS == 16 && RESTBITS == 4 && defined(XINTREE)
|
||||
xorbucketid = ((u32)(bytes0[htl.prevbo] ^ bytes1[htl.prevbo]) << 8)
|
||||
| (bytes0[htl.prevbo+1] ^ bytes1[htl.prevbo+1]);
|
||||
xhash = (bytes0[htl.prevbo+2] ^ bytes1[htl.prevbo+2]) >> 4;
|
||||
u32 xhash = (bytes0[htl.prevbo+2] ^ bytes1[htl.prevbo+2]) >> 4;
|
||||
#elif WN == 144 && BUCKBITS == 20 && RESTBITS == 4
|
||||
xorbucketid = ((((u32)(bytes0[htl.prevbo+1] ^ bytes1[htl.prevbo+1]) << 8)
|
||||
| (bytes0[htl.prevbo+2] ^ bytes1[htl.prevbo+2])) << 4)
|
||||
|
@ -600,6 +601,9 @@ __global__ void digitE(equi *eq, const u32 r) {
|
|||
}
|
||||
|
||||
#ifdef UNROLL
|
||||
// bucket mask
|
||||
static const u32 BUCKMASK = NBUCKETS-1;
|
||||
|
||||
__global__ void digit_1(equi *eq) {
|
||||
equi::htlayout htl(eq, 1);
|
||||
equi::collisiondata cd;
|
||||
|
|
|
@ -988,7 +988,7 @@ static const u32 NBLOCKS = (NHASHES+HASHESPERBLOCK-1)/HASHESPERBLOCK;
|
|||
}
|
||||
}
|
||||
}
|
||||
printf(" %d candidates ", nc);
|
||||
// printf(" %d candidates ", nc);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in New Issue