new headernonce setup and many small changes

This commit is contained in:
John Tromp 2016-10-23 22:29:22 -04:00
parent 8f56458881
commit 22fc059af2
8 changed files with 78 additions and 40 deletions

View File

@ -22,6 +22,9 @@ dev: equi.h dev_miner.h dev_miner.cpp blake2b/asm/zcblake2_avx2.o Makefile
dev1: equi.h dev_miner.h dev_miner.cpp blake2b/asm/zcblake2_avx2.o Makefile
$(GPP) dev_miner.cpp blake/blake2b.cpp blake2b/asm/zcblake2_avx2.o -o dev1
hash1: equi.h dev_miner.h dev_miner.cpp blake2b/asm/zcblake2_avx2.o Makefile
$(GPP) -DHASHONLY dev_miner.cpp blake/blake2b.cpp blake2b/asm/zcblake2_avx2.o -o hash1
equidev: equi.h equi_dev_miner.h equi_dev_miner.cpp Makefile
$(GPP) -DATOMIC equi_dev_miner.cpp blake/blake2b.cpp -o equidev

View File

@ -2,14 +2,28 @@
#include <stdint.h>
#include <string.h>
#ifdef __cplusplus
extern "C" {
#endif
void Blake2PrepareMidstate4(void *midstate, unsigned char *input);
//midstate: 256 bytes of buffer for output midstate, aligned by 32
//input: 140 bytes header, preferably aligned by 8
#ifdef __cplusplus
}
#endif
//midstate: 256 bytes of buffer for output midstate, aligned by 32
//input: 140 bytes header, preferably aligned by 8
#ifdef __cplusplus
extern "C" {
#endif
void Blake2Run4(unsigned char *hashout, void *midstate, uint32_t indexctr);
//hashout: hash output buffer: 4*64 bytes
//midstate: 256 bytes from Blake2PrepareMidstate4
//indexctr: For n=200, k=9: {0, 4, 8, ..., 1048572}
#ifdef __cplusplus
}
#endif
unsigned char __attribute__((aligned(8))) testdata[140] =
{
@ -25,6 +39,9 @@ unsigned char __attribute__((aligned(8))) testdata[140] =
};
//expected output: 281dd5fc6d878538e640987b9bc597dbbd4af2cdf8bf5fb03bdfcefa40d8747d out.bin
#ifndef NTRIALS
#define NTRIALS 1
#endif
int main(void)
{
unsigned char midstate_a[256+32];
@ -36,18 +53,24 @@ int main(void)
int i;
Blake2PrepareMidstate4(pmidstate, testdata);
#ifdef IO
outfile = fopen("out.bin", "wb");
#endif
for (i=0; i<1048576; i+=4) {
for (i=0; i<NTRIALS*1048576; i+=4) {
Blake2Run4(phashout, pmidstate, i);
memcpy(buf, phashout, 50);
memcpy(buf+50, phashout+64, 50);
memcpy(buf+100, phashout+128, 50);
memcpy(buf+150, phashout+192, 50);
#ifdef IO
fwrite(buf, 200, 1, outfile);
#endif
}
#ifdef IO
fclose(outfile);
#endif
return 0;
}

View File

@ -49,8 +49,13 @@ int main(int argc, char **argv) {
equi eq(nthreads);
printf("Using %dMB of memory\n", 1 + eq.hta.alloced / 0x100000);
u32 sumnsols = 0;
char headernonce[HEADERNONCELEN];
u32 hdrlen = strlen(header);
memcpy(headernonce, header, hdrlen);
memset(headernonce+hdrlen, 0, sizeof(headernonce)-hdrlen);
for (int r = 0; r < range; r++) {
eq.setnonce(header, strlen(header), nonce+r);
((u32 *)headernonce)[32] = htole32(nonce+r);
eq.setheadernonce(headernonce, sizeof(headernonce));
for (int t = 0; t < nthreads; t++) {
threads[t].id = t;
threads[t].eq = &eq;

View File

@ -229,10 +229,8 @@ struct htalloc {
}
};
typedef uchar midstate_t[256];
struct equi {
midstate_t blake_ctx;
alignas(32) uchar blake_ctx[256];
htalloc hta;
bsizes *nslots;
proof *sols;
@ -257,19 +255,13 @@ struct equi {
free(nslots);
free(sols);
}
#define ALIGN256(x) ((long)(x+31) & -32L)
void setnonce(const char *header, const u32 headerlen, const u32 nonce) {
uchar __attribute__((aligned(8))) hdrnonce[140];
memcpy(hdrnonce, header, headerlen);
assert(headerlen <= 108);
memset(hdrnonce+headerlen, 0, 140-32-headerlen);
uint32_t le_nonce = htole32(nonce);
memcpy(hdrnonce+140-32, &le_nonce, sizeof(u32));
memset(hdrnonce+140-28, 0, 28);
uchar unaligned[sizeof(midstate_t)+31], *aligned = (uchar *)ALIGN256(unaligned);
void *midstate = (void *)aligned;
Blake2PrepareMidstate4(midstate, hdrnonce);
memcpy(&blake_ctx, midstate, sizeof(midstate_t));
void setheadernonce(const char *headernonce, const u32 len) {
alignas(8) uchar alignheader[HEADERNONCELEN];
memcpy(alignheader, headernonce, len);
assert(len == HEADERNONCELEN);
alignas(32) uchar midstate[256];
Blake2PrepareMidstate4(midstate, alignheader);
memcpy(&blake_ctx, midstate, 256);
memset(nslots, 0, NBUCKETS * sizeof(au32)); // only nslots[0] needs zeroing
nsols = xfull = bfull = hfull = 0;
}
@ -547,18 +539,18 @@ struct equi {
void digit0(const u32 id) {
htlayout htl(this, 0);
#ifndef HASHONLY
const u32 hashbytes = hashsize(0);
uchar unaligned[2*sizeof(midstate_t)+256+31], *aligned = (uchar *)ALIGN256(unaligned);
void *midstate0 = (void *)aligned;
void *midstate = (void *)(aligned+sizeof(midstate_t));
uchar *hashes = (uchar *)(aligned+2*sizeof(midstate_t));
memcpy(midstate0, blake_ctx, sizeof(midstate_t));
#endif
alignas(32) uchar midstate[256], hashes[256];
//aligned256 midstate, hashes;
memcpy((void *)midstate, blake_ctx, 256);
for (u32 block = id; block < NBLOCKS; block += nthreads) {
memcpy(midstate, midstate0, sizeof(midstate_t));
Blake2Run4(hashes, midstate, block * BLAKESINPARALLEL);
Blake2Run4(hashes, (void *)midstate, block * BLAKESINPARALLEL);
#ifndef HASHONLY
for (u32 i = 0; i<BLAKESINPARALLEL; i++) {
for (u32 j = 0; j<HASHESPERBLAKE; j++) {
const uchar *ph = hashes + i * 64 + j * WN/8;
const uchar *ph = hashes+ i * 64 + j * WN/8;
const u32 bucketid = ((u32)ph[0] << 4) | ph[1] >> 4;
const u32 slot = getslot0(bucketid);
if (slot >= NSLOTS) {
@ -570,6 +562,7 @@ struct equi {
s->tag = tree((block * BLAKESINPARALLEL + i) * HASHESPERBLAKE + j);
}
}
#endif
}
}
@ -1000,6 +993,9 @@ void *worker(void *vp) {
if (tp->id == 0) printf("Digit 0");
eq->digit0(tp->id);
#ifdef HASHONLY
pthread_exit(NULL);
#endif
barrier(&eq->barry);
if (tp->id == 0) eq->showbsizes(0);
barrier(&eq->barry);

7
equi.c
View File

@ -21,13 +21,18 @@ int main(int argc, char **argv) {
}
printf("Verifying size %d proof for equi(\"%s\",%d)\n",
PROOFSIZE, header, nonce);
char headernonce[HEADERNONCELEN];
u32 hdrlen = strlen(header);
memcpy(headernonce, header, hdrlen);
memset(headernonce+hdrlen, 0, sizeof(headernonce)-hdrlen);
((u32 *)headernonce)[32] = htole32(nonce);
for (int nsols=0; scanf(" Solution") == 0; nsols++) {
u32 indices[PROOFSIZE];
for (int n = 0; n < PROOFSIZE; n++) {
int nscan = scanf(" %x", &indices[n]);
assert(nscan == 1);
}
int pow_rc = verify(indices, header, strlen(header), nonce);
int pow_rc = verify(indices, headernonce, sizeof(headernonce));
if (pow_rc == POW_OK)
printf("Verified\n");
else

21
equi.h
View File

@ -27,6 +27,10 @@ typedef unsigned char uchar;
#define WK 9
#endif
#ifndef HEADERNONCELEN
#define HEADERNONCELEN 140
#endif
#define NDIGITS (WK+1)
#define DIGITBITS (WN/(NDIGITS))
@ -38,7 +42,7 @@ static const u32 HASHOUT = HASHESPERBLAKE*WN/8;
typedef u32 proof[PROOFSIZE];
void setheader(blake2b_state *ctx, const char *header, const u32 headerlen, u32 nce) {
void setheader(blake2b_state *ctx, const char *headernonce) {
uint32_t le_N = htole32(WN);
uint32_t le_K = htole32(WK);
uchar personal[] = "ZcashPoW01230123";
@ -57,15 +61,10 @@ void setheader(blake2b_state *ctx, const char *header, const u32 headerlen, u32
memset(P->salt, 0, sizeof(P->salt));
memcpy(P->personal, (const uint8_t *)personal, 16);
blake2b_init_param(ctx, P);
blake2b_update(ctx, (const uchar *)header, headerlen);
uchar nonce[32];
memset(nonce, 0, 32);
uint32_t le_nonce = htole32(nce);
memcpy(nonce, &le_nonce, 4);
blake2b_update(ctx, nonce, 32);
blake2b_update(ctx, (const uchar *)headernonce, HEADERNONCELEN);
}
enum verify_code { POW_OK, POW_DUPLICATE, POW_OUT_OF_ORDER, POW_NONZERO_XOR };
enum verify_code { POW_OK, POW_HEADER_LENGTH, POW_DUPLICATE, POW_OUT_OF_ORDER, POW_NONZERO_XOR };
const char *errstr[] = { "OK", "duplicate index", "indices out of order", "nonzero xor" };
void genhash(blake2b_state *ctx, u32 idx, uchar *hash) {
@ -119,11 +118,13 @@ bool duped(proof prf) {
}
// verify Wagner conditions
int verify(u32 indices[PROOFSIZE], const char *header, const u32 headerlen, const u32 nonce) {
int verify(u32 indices[PROOFSIZE], const char *headernonce, const u32 headerlen) {
if (headerlen != HEADERNONCELEN)
return POW_HEADER_LENGTH;
if (duped(indices))
return POW_DUPLICATE;
blake2b_state ctx;
setheader(&ctx, header, headerlen, nonce);
setheader(&ctx, headernonce);
uchar hash[WN/8];
return verifyrec(&ctx, indices, hash, WK);
}

View File

@ -49,8 +49,13 @@ int main(int argc, char **argv) {
equi eq(nthreads);
printf("Using %dMB of memory\n", 1 + eq.hta.alloced / 0x100000);
u32 sumnsols = 0;
char headernonce[HEADERNONCELEN];
u32 hdrlen = strlen(header);
memcpy(headernonce, header, hdrlen);
memset(headernonce+hdrlen, 0, sizeof(headernonce)-hdrlen);
for (int r = 0; r < range; r++) {
eq.setnonce(header, strlen(header), nonce+r);
((u32 *)headernonce)[32] = htole32(nonce+r);
eq.setheadernonce(headernonce, sizeof(headernonce));
for (int t = 0; t < nthreads; t++) {
threads[t].id = t;
threads[t].eq = &eq;

View File

@ -224,8 +224,8 @@ struct equi {
free(nslots);
free(sols);
}
void setnonce(const char *header, const u32 headerlen, const u32 nonce) {
setheader(&blake_ctx, header, headerlen, nonce);
void setheadernonce(const char *headernonce, const u32 len) {
setheader(&blake_ctx, headernonce);
memset(nslots, 0, NBUCKETS * sizeof(au32)); // only nslots[0] needs zeroing
nsols = xfull = bfull = hfull = 0;
}