add cantor slots enabling 2^10 buckets to equi_miner
This commit is contained in:
parent
85e486fd69
commit
fec951a2a3
|
@ -1,109 +0,0 @@
|
|||
// Wagner's algorithm for Generalized Birthday Paradox, a memory-hard proof-of-work
|
||||
// Copyright (c) 2016 John Tromp
|
||||
|
||||
#include "equi_dev_miner.h"
|
||||
#include <unistd.h>
|
||||
#include "ctype.h"
|
||||
|
||||
int hextobyte(const char * x) {
|
||||
u32 b = 0;
|
||||
for (int i = 0; i < 2; i++) {
|
||||
uchar c = tolower(x[i]);
|
||||
assert(isxdigit(c));
|
||||
b = (b << 4) | (c - (c >= '0' && c <= '9' ? '0' : ('a' - 10)));
|
||||
}
|
||||
return b;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
int nthreads = 1;
|
||||
int nonce = 0;
|
||||
int range = 1;
|
||||
bool showsol = false;
|
||||
const char *header = "";
|
||||
const char *hex = "";
|
||||
int c;
|
||||
while ((c = getopt (argc, argv, "h:n:r:t:x:s")) != -1) {
|
||||
switch (c) {
|
||||
case 'h':
|
||||
header = optarg;
|
||||
break;
|
||||
case 'n':
|
||||
nonce = atoi(optarg);
|
||||
break;
|
||||
case 'r':
|
||||
range = atoi(optarg);
|
||||
break;
|
||||
case 's':
|
||||
showsol = true;
|
||||
break;
|
||||
case 't':
|
||||
nthreads = atoi(optarg);
|
||||
break;
|
||||
case 'x':
|
||||
hex = optarg;
|
||||
break;
|
||||
}
|
||||
}
|
||||
#ifndef XWITHASH
|
||||
if (sizeof(tree) > 4)
|
||||
printf("WARNING: please compile with -DXWITHASH to shrink tree!\n");
|
||||
#endif
|
||||
#ifdef ATOMIC
|
||||
if (nthreads==1)
|
||||
printf("WARNING: use of atomics hurts single threaded performance!\n");
|
||||
#else
|
||||
assert(nthreads==1);
|
||||
#endif
|
||||
printf("Looking for wagner-tree on (\"%s\",%d", hex ? "0x..." : header, nonce);
|
||||
if (range > 1)
|
||||
printf("-%d", nonce+range-1);
|
||||
printf(") with %d %d-bit digits and %d threads\n", NDIGITS, DIGITBITS, nthreads);
|
||||
thread_ctx *threads = (thread_ctx *)calloc(nthreads, sizeof(thread_ctx));
|
||||
assert(threads);
|
||||
equi eq(nthreads);
|
||||
printf("Using %dMB of memory", 1 + eq.hta.alloced / 0x100000);
|
||||
#ifdef USE_AVX2
|
||||
printf(" and AVX2 intrinsics to compute 4-way blake2b\n");
|
||||
#else
|
||||
printf(" and no AVX2\n");
|
||||
#endif
|
||||
u32 sumnsols = 0;
|
||||
char headernonce[HEADERNONCELEN];
|
||||
u32 hdrlen = strlen(header);
|
||||
if (*hex) {
|
||||
assert(strlen(hex) == 2 * HEADERNONCELEN);
|
||||
for (int i = 0; i < HEADERNONCELEN; i++)
|
||||
headernonce[i] = hextobyte(&hex[2*i]);
|
||||
} else {
|
||||
memcpy(headernonce, header, hdrlen);
|
||||
memset(headernonce+hdrlen, 0, sizeof(headernonce)-hdrlen);
|
||||
}
|
||||
for (int r = 0; r < range; r++) {
|
||||
((u32 *)headernonce)[32] = htole32(nonce+r);
|
||||
eq.setheadernonce(headernonce, sizeof(headernonce));
|
||||
for (int t = 0; t < nthreads; t++) {
|
||||
threads[t].id = t;
|
||||
threads[t].eq = &eq;
|
||||
int err = pthread_create(&threads[t].thread, NULL, worker, (void *)&threads[t]);
|
||||
assert(err == 0);
|
||||
}
|
||||
for (int t = 0; t < nthreads; t++) {
|
||||
int err = pthread_join(threads[t].thread, NULL);
|
||||
assert(err == 0);
|
||||
}
|
||||
u32 nsols, maxsols = min(MAXSOLS, eq.nsols);
|
||||
for (nsols = 0; nsols < maxsols; nsols++) {
|
||||
if (showsol) {
|
||||
printf("\nSolution");
|
||||
for (u32 i = 0; i < PROOFSIZE; i++)
|
||||
printf(" %jx", (uintmax_t)eq.sols[nsols][i]);
|
||||
}
|
||||
}
|
||||
printf("\n%d solutions\n", nsols);
|
||||
sumnsols += nsols;
|
||||
}
|
||||
free(threads);
|
||||
printf("%d total solutions\n", sumnsols);
|
||||
return 0;
|
||||
}
|
1075
equi_dev_miner.h
1075
equi_dev_miner.h
File diff suppressed because it is too large
Load Diff
94
equi_miner.h
94
equi_miner.h
|
@ -103,22 +103,26 @@ struct tree {
|
|||
// so now we do things "manually"
|
||||
u32 bid_s0_s1;
|
||||
|
||||
#ifdef CANTOR
|
||||
static const u32 CANTORBITS = 2*SLOTBITS-2;
|
||||
static const u32 CANTORMASK = (1<<CANTORBITS) - 1;
|
||||
static const u32 CANTORMAXSQRT = 2 * NSLOTS;
|
||||
static_assert(BUCKBITS + CANTORBITS <= 32, "cantor throws a fit");
|
||||
#else
|
||||
static_assert(BUCKBITS + 2 * SLOTBIS <= 32, "cantor throws a fit");
|
||||
#endif
|
||||
|
||||
// constructor for height 0 trees stores index instead
|
||||
tree(const u32 idx) {
|
||||
bid_s0_s1 = idx;
|
||||
}
|
||||
static u32 cantor(u32 s0, u32 s1) {
|
||||
return s1*(s1+1)/2 + s0;
|
||||
}
|
||||
tree(const u32 bid, const u32 s0, const u32 s1) {
|
||||
// SLOTDIFF saves 1 bit by encoding the distance between
|
||||
// the two slots modulo SLOTRANGE instead, and picking
|
||||
// slotid0 such that this distance is at most SLOTRANGE/2
|
||||
// the extra branching involved gives noticeable slowdown
|
||||
#ifdef SLOTDIFF
|
||||
u32 ds10 = (s1 - s0) & SLOTMASK;
|
||||
if (ds10 & SLOTMSB) {
|
||||
bid_s0_s1 = (((bid << SLOTBITS) | s1) << (SLOTBITS-1)) | (SLOTMASK & ~ds10);
|
||||
} else {
|
||||
bid_s0_s1 = (((bid << SLOTBITS) | s0) << (SLOTBITS-1)) | (ds10 - 1);
|
||||
}
|
||||
// CANTOR saves 2 bits by Cantor pairing
|
||||
#ifdef CANTOR
|
||||
bid_s0_s1 = (bid << CANTORBITS) | cantor(s0,s1);
|
||||
#else
|
||||
bid_s0_s1 = (((bid << SLOTBITS) | s0) << SLOTBITS) | s1;
|
||||
#endif
|
||||
|
@ -129,33 +133,46 @@ struct tree {
|
|||
}
|
||||
// retrieve bucket index
|
||||
u32 bucketid() const {
|
||||
#ifdef SLOTDIFF
|
||||
return bid_s0_s1 >> (2 * SLOTBITS - 1);
|
||||
#ifdef CANTOR
|
||||
return bid_s0_s1 >> (2*SLOTBITS - 2);
|
||||
#else
|
||||
return bid_s0_s1 >> (2 * SLOTBITS);
|
||||
return bid_s0_s1 >> (2*SLOTBITS);
|
||||
#endif
|
||||
}
|
||||
// retrieve first slot index
|
||||
u32 slotid0() const {
|
||||
#ifdef SLOTDIFF
|
||||
return (bid_s0_s1 >> (SLOTBITS-1)) & SLOTMASK;
|
||||
#else
|
||||
return (bid_s0_s1 >> SLOTBITS) & SLOTMASK;
|
||||
#endif
|
||||
#ifdef CANTOR
|
||||
u32 slotid0(u32 s1) const {
|
||||
return (bid_s0_s1 & CANTORMASK) - cantor(0,s1);
|
||||
}
|
||||
#else
|
||||
u32 slotid0() const {
|
||||
return (bid_s0_s1 >> SLOTBITS) & SLOTMASK;
|
||||
}
|
||||
#endif
|
||||
// retrieve second slot index
|
||||
u32 slotid1() const {
|
||||
#ifdef SLOTDIFF
|
||||
return (slotid0() + 1 + (bid_s0_s1 & (SLOTMASK>>1))) & SLOTMASK;
|
||||
#ifdef CANTOR
|
||||
u32 k, q, sqr = 8*(bid_s0_s1 & CANTORMASK)+1;;
|
||||
// this k=sqrt(sqr) computing loop averages 3.4 iterations out of maximum 9
|
||||
for (k = CANTORMAXSQRT; (q = sqr/k) < k; k = (k+q)/2) ;
|
||||
return (k-1) / 2;
|
||||
#else
|
||||
return bid_s0_s1 & SLOTMASK;
|
||||
#endif
|
||||
}
|
||||
bool prob_disjoint(const tree other) const {
|
||||
#ifdef CANTOR
|
||||
if (bucketid() != other.bucketid())
|
||||
return true;
|
||||
u32 s1 = slotid1(), s0 = slotid0(s1);
|
||||
u32 os1 = other.slotid1(), os0 = other.slotid0(os1);
|
||||
return s1 != os1 && s0 != os0;
|
||||
#else
|
||||
tree xort(bid_s0_s1 ^ other.bid_s0_s1);
|
||||
return xort.bucketid() || (xort.slotid0() && xort.slotid1());
|
||||
// next two tests catch much fewer cases and are therefore skipped
|
||||
// && slotid0() != other.slotid1() && slotid1() != other.slotid0()
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -345,17 +362,28 @@ struct equi {
|
|||
const slot1 *buck = hta.heap1[t.bucketid()];
|
||||
const u32 size = 1 << --r;
|
||||
u32 tagi = hashwords(hashsize(r));
|
||||
return listindices1(r, buck[t.slotid0()][tagi].tag, indices)
|
||||
|| listindices1(r, buck[t.slotid1()][tagi].tag, indices+size)
|
||||
|| orderindices(indices, size) || indices[0] == indices[size];
|
||||
#ifdef CANTOR
|
||||
u32 s1 = t.slotid1(), s0 = t.slotid0(s1);
|
||||
#else
|
||||
u32 s1 = t.slotid1(), s0 = t.slotid0();
|
||||
#endif
|
||||
tree t0 = buck[s0][tagi].tag, t1 = buck[s1][tagi].tag;
|
||||
return !t0.prob_disjoint(t1)
|
||||
|| listindices1(r, t0, indices) || listindices1(r, t1, indices+size)
|
||||
|| orderindices(indices, size) || indices[0] == indices[size];
|
||||
}
|
||||
// need separate instance for accessing (differently typed) heap1
|
||||
bool listindices1(u32 r, const tree t, u32 *indices) {
|
||||
const slot0 *buck = hta.heap0[t.bucketid()];
|
||||
const u32 size = 1 << --r;
|
||||
u32 tagi = hashwords(hashsize(r));
|
||||
return listindices0(r, buck[t.slotid0()][tagi].tag, indices)
|
||||
|| listindices0(r, buck[t.slotid1()][tagi].tag, indices+size)
|
||||
#ifdef CANTOR
|
||||
u32 s1 = t.slotid1(), s0 = t.slotid0(s1);
|
||||
#else
|
||||
u32 s1 = t.slotid1(), s0 = t.slotid0();
|
||||
#endif
|
||||
tree t0 = buck[s0][tagi].tag, t1 = buck[s1][tagi].tag;
|
||||
return listindices0(r, t0, indices) || listindices0(r, t1, indices+size)
|
||||
|| orderindices(indices, size) || indices[0] == indices[size];
|
||||
}
|
||||
// check a candidate that resulted in 0 xor
|
||||
|
@ -430,6 +458,8 @@ struct equi {
|
|||
return slot->bytes[prevbo] >> 4;
|
||||
#elif WN == 200 && RESTBITS == 8
|
||||
return (slot->bytes[prevbo] & 0xf) << 4 | slot->bytes[prevbo+1] >> 4;
|
||||
#elif WN == 200 && RESTBITS == 10
|
||||
return (slot->bytes[prevbo] & 0x3f) << 4 | slot->bytes[prevbo+1] >> 4;
|
||||
#elif WN == 144 && RESTBITS == 4
|
||||
return slot->bytes[prevbo] & 0xf;
|
||||
#else
|
||||
|
@ -442,6 +472,8 @@ struct equi {
|
|||
return slot->bytes[prevbo] & 0xf;
|
||||
#elif WN == 200 && RESTBITS == 8
|
||||
return slot->bytes[prevbo];
|
||||
#elif WN == 200 && RESTBITS == 10
|
||||
return (slot->bytes[prevbo] & 0x3) << 8 | slot->bytes[prevbo+1];
|
||||
#elif WN == 144 && RESTBITS == 4
|
||||
return slot->bytes[prevbo] & 0xf;
|
||||
#else
|
||||
|
@ -554,6 +586,8 @@ static const u32 NBLOCKS = (NHASHES+HASHESPERBLOCK-1)/HASHESPERBLOCK;
|
|||
// figure out bucket for this hash by extracting leading BUCKBITS bits
|
||||
#if BUCKBITS == 12 && RESTBITS == 8
|
||||
const u32 bucketid = ((u32)ph[0] << 4) | ph[1] >> 4;
|
||||
#elif BUCKBITS == 10 && RESTBITS == 10
|
||||
const u32 bucketid = ((u32)ph[0] << 2) | ph[1] >> 6;
|
||||
#elif BUCKBITS == 16 && RESTBITS == 4
|
||||
const u32 bucketid = ((u32)ph[0] << 8) | ph[1];
|
||||
#elif BUCKBITS == 20 && RESTBITS == 4
|
||||
|
@ -601,6 +635,9 @@ static const u32 NBLOCKS = (NHASHES+HASHESPERBLOCK-1)/HASHESPERBLOCK;
|
|||
#if WN == 200 && BUCKBITS == 12 && RESTBITS == 8
|
||||
xorbucketid = (((u32)(bytes0[htl.prevbo+1] ^ bytes1[htl.prevbo+1]) & 0xf) << 8)
|
||||
| (bytes0[htl.prevbo+2] ^ bytes1[htl.prevbo+2]);
|
||||
#elif WN == 200 && BUCKBITS == 10 && RESTBITS == 10
|
||||
xorbucketid = (((u32)(bytes0[htl.prevbo+1] ^ bytes1[htl.prevbo+1]) & 0xf) << 6)
|
||||
| (bytes0[htl.prevbo+2] ^ bytes1[htl.prevbo+2]) >> 2;
|
||||
#elif WN == 144 && BUCKBITS == 20 && RESTBITS == 4
|
||||
xorbucketid = ((((u32)(bytes0[htl.prevbo+1] ^ bytes1[htl.prevbo+1]) << 8)
|
||||
| (bytes0[htl.prevbo+2] ^ bytes1[htl.prevbo+2])) << 4)
|
||||
|
@ -651,6 +688,9 @@ static const u32 NBLOCKS = (NHASHES+HASHESPERBLOCK-1)/HASHESPERBLOCK;
|
|||
#if WN == 200 && BUCKBITS == 12 && RESTBITS == 8
|
||||
xorbucketid = ((u32)(bytes0[htl.prevbo+1] ^ bytes1[htl.prevbo+1]) << 4)
|
||||
| (bytes0[htl.prevbo+2] ^ bytes1[htl.prevbo+2]) >> 4;
|
||||
#elif WN == 200 && BUCKBITS == 10 && RESTBITS == 10
|
||||
xorbucketid = ((u32)(bytes0[htl.prevbo+2] ^ bytes1[htl.prevbo+2]) << 2)
|
||||
| (bytes0[htl.prevbo+3] ^ bytes1[htl.prevbo+3]) >> 6;
|
||||
#elif WN == 144 && BUCKBITS == 20 && RESTBITS == 4
|
||||
xorbucketid = ((((u32)(bytes0[htl.prevbo+1] ^ bytes1[htl.prevbo+1]) << 8)
|
||||
| (bytes0[htl.prevbo+2] ^ bytes1[htl.prevbo+2])) << 4)
|
||||
|
|
Loading…
Reference in New Issue