add cantor slots enabling 2^10 buckets to equi_miner

2016-11-17 18:03:11 -05:00 · 2016-11-17 18:03:11 -05:00 · fec951a2a3
parent 85e486fd69
commit fec951a2a3
3 changed files with 67 additions and 1211 deletions
--- a/equi_dev_miner.cpp
+++ b/equi_dev_miner.cpp
@ -1,109 +0,0 @@
-// Wagner's algorithm for Generalized Birthday Paradox, a memory-hard proof-of-work
-// Copyright (c) 2016 John Tromp
-
-#include "equi_dev_miner.h"
-#include <unistd.h>
-#include "ctype.h"
-
-int hextobyte(const char * x) {
-  u32 b = 0;
-  for (int i = 0; i < 2; i++) {
-    uchar c = tolower(x[i]);
-    assert(isxdigit(c));
-    b = (b << 4) | (c - (c >= '0' && c <= '9' ? '0' : ('a' - 10)));
-  }
-  return b;
-}
-
-int main(int argc, char **argv) {
-  int nthreads = 1;
-  int nonce = 0;
-  int range = 1;
-  bool showsol = false;
-  const char *header = "";
-  const char *hex = "";
-  int c;
-  while ((c = getopt (argc, argv, "h:n:r:t:x:s")) != -1) {
-    switch (c) {
-      case 'h':
-        header = optarg;
-        break;
-      case 'n':
-        nonce = atoi(optarg);
-        break;
-      case 'r':
-        range = atoi(optarg);
-        break;
-      case 's':
-        showsol = true;
-        break;
-      case 't':
-        nthreads = atoi(optarg);
-        break;
-      case 'x':
-        hex = optarg;
-        break;
-    }
-  }
-#ifndef XWITHASH
-  if (sizeof(tree) > 4)
-    printf("WARNING: please compile with -DXWITHASH to shrink tree!\n");
-#endif
-#ifdef ATOMIC
-  if (nthreads==1)
-    printf("WARNING: use of atomics hurts single threaded performance!\n");
-#else
-  assert(nthreads==1);
-#endif
-  printf("Looking for wagner-tree on (\"%s\",%d", hex ? "0x..." : header, nonce);
-  if (range > 1)
-    printf("-%d", nonce+range-1);
-  printf(") with %d %d-bit digits and %d threads\n", NDIGITS, DIGITBITS, nthreads);
-  thread_ctx *threads = (thread_ctx *)calloc(nthreads, sizeof(thread_ctx));
-  assert(threads);
-  equi eq(nthreads);
-  printf("Using %dMB of memory", 1 + eq.hta.alloced / 0x100000);
-#ifdef USE_AVX2
-  printf(" and AVX2 intrinsics to compute 4-way blake2b\n");
-#else
-  printf(" and no AVX2\n");
-#endif
-  u32 sumnsols = 0;
-  char headernonce[HEADERNONCELEN];
-  u32 hdrlen = strlen(header);
-  if (*hex) {
-    assert(strlen(hex) == 2 * HEADERNONCELEN);
-    for (int i = 0; i < HEADERNONCELEN; i++)
-      headernonce[i] = hextobyte(&hex[2*i]);
-  } else {
-    memcpy(headernonce, header, hdrlen);
-    memset(headernonce+hdrlen, 0, sizeof(headernonce)-hdrlen);
-  }
-  for (int r = 0; r < range; r++) {
-    ((u32 *)headernonce)[32] = htole32(nonce+r);
-    eq.setheadernonce(headernonce, sizeof(headernonce));
-    for (int t = 0; t < nthreads; t++) {
-      threads[t].id = t;
-      threads[t].eq = &eq;
-      int err = pthread_create(&threads[t].thread, NULL, worker, (void *)&threads[t]);
-      assert(err == 0);
-    }
-    for (int t = 0; t < nthreads; t++) {
-      int err = pthread_join(threads[t].thread, NULL);
-      assert(err == 0);
-    }
-    u32 nsols, maxsols = min(MAXSOLS, eq.nsols);
-    for (nsols = 0; nsols < maxsols; nsols++) {
-      if (showsol) {
-        printf("\nSolution");
-        for (u32 i = 0; i < PROOFSIZE; i++)
-          printf(" %jx", (uintmax_t)eq.sols[nsols][i]);
-      }
-    }
-    printf("\n%d solutions\n", nsols);
-    sumnsols += nsols;
-  }
-  free(threads);
-  printf("%d total solutions\n", sumnsols);
-  return 0;
-}
--- a/equi_dev_miner.h
+++ b/equi_dev_miner.h
--- a/equi_miner.h
+++ b/equi_miner.h
@ -103,22 +103,26 @@ struct tree {
  // so now we do things "manually"
  u32 bid_s0_s1;

+#ifdef CANTOR
+  static const u32 CANTORBITS = 2*SLOTBITS-2;
+  static const u32 CANTORMASK = (1<<CANTORBITS) - 1;
+  static const u32 CANTORMAXSQRT = 2 * NSLOTS;
+  static_assert(BUCKBITS + CANTORBITS <= 32, "cantor throws a fit");
+#else
+  static_assert(BUCKBITS + 2 * SLOTBIS <= 32, "cantor throws a fit");
+#endif
+
  // constructor for height 0 trees stores index instead
  tree(const u32 idx) {
    bid_s0_s1 = idx;
  }
+  static u32 cantor(u32 s0, u32 s1) {
+    return s1*(s1+1)/2 + s0;
+  }
  tree(const u32 bid, const u32 s0, const u32 s1) {
-// SLOTDIFF saves 1 bit by encoding the distance between
-// the two slots modulo SLOTRANGE instead, and picking
-// slotid0 such that this distance is at most SLOTRANGE/2
-// the extra branching involved gives noticeable slowdown
-#ifdef SLOTDIFF
-    u32 ds10 = (s1 - s0) & SLOTMASK;
-    if (ds10 & SLOTMSB) {
-      bid_s0_s1 = (((bid << SLOTBITS) | s1) << (SLOTBITS-1)) | (SLOTMASK & ~ds10);
-    } else {
-      bid_s0_s1 = (((bid << SLOTBITS) | s0) << (SLOTBITS-1)) | (ds10 - 1);
-    }
+// CANTOR saves 2 bits by Cantor pairing
+#ifdef CANTOR
+    bid_s0_s1 = (bid << CANTORBITS) | cantor(s0,s1);
 #else
    bid_s0_s1 = (((bid << SLOTBITS) | s0) << SLOTBITS) | s1;
 #endif
@ -129,33 +133,46 @@ struct tree {
  }
  // retrieve bucket index
  u32 bucketid() const {
-#ifdef SLOTDIFF
-    return bid_s0_s1 >> (2 * SLOTBITS - 1);
+#ifdef CANTOR
+    return bid_s0_s1 >> (2*SLOTBITS - 2);
 #else
-    return bid_s0_s1 >> (2 * SLOTBITS);
+    return bid_s0_s1 >> (2*SLOTBITS);
 #endif
  }
  // retrieve first slot index
-  u32 slotid0() const {
-#ifdef SLOTDIFF
-    return (bid_s0_s1 >> (SLOTBITS-1)) & SLOTMASK;
-#else
-    return (bid_s0_s1 >> SLOTBITS) & SLOTMASK;
-#endif
+#ifdef CANTOR
+  u32 slotid0(u32 s1) const {
+    return (bid_s0_s1 & CANTORMASK) - cantor(0,s1);
  }
+#else
+  u32 slotid0() const {
+    return (bid_s0_s1 >> SLOTBITS) & SLOTMASK;
+  }
+#endif
  // retrieve second slot index
  u32 slotid1() const {
-#ifdef SLOTDIFF
-    return (slotid0() + 1 + (bid_s0_s1 & (SLOTMASK>>1))) & SLOTMASK;
+#ifdef CANTOR
+    u32 k, q, sqr = 8*(bid_s0_s1 & CANTORMASK)+1;;
+    // this k=sqrt(sqr) computing loop averages 3.4 iterations out of maximum 9
+    for (k = CANTORMAXSQRT; (q = sqr/k) < k; k = (k+q)/2) ;
+    return (k-1) / 2;
 #else
    return bid_s0_s1 & SLOTMASK;
 #endif
  }
  bool prob_disjoint(const tree other) const {
+#ifdef CANTOR
+    if (bucketid() != other.bucketid())
+      return true;
+    u32 s1 = slotid1(), s0 = slotid0(s1);
+    u32 os1 = other.slotid1(), os0 = other.slotid0(os1);
+    return s1 != os1 && s0 != os0;
+#else
    tree xort(bid_s0_s1 ^ other.bid_s0_s1);
    return xort.bucketid() || (xort.slotid0() && xort.slotid1());
    // next two tests catch much fewer cases and are therefore skipped
    // && slotid0() != other.slotid1() && slotid1() != other.slotid0()
+#endif
  }
 };

@ -345,17 +362,28 @@ struct equi {
    const slot1 *buck = hta.heap1[t.bucketid()];
    const u32 size = 1 << --r;
    u32 tagi = hashwords(hashsize(r));
-    return listindices1(r, buck[t.slotid0()][tagi].tag, indices)
-        || listindices1(r, buck[t.slotid1()][tagi].tag, indices+size)
-        || orderindices(indices, size) || indices[0] == indices[size];
+#ifdef CANTOR
+    u32 s1 = t.slotid1(), s0 = t.slotid0(s1);
+#else
+    u32 s1 = t.slotid1(), s0 = t.slotid0();
+#endif
+    tree t0 = buck[s0][tagi].tag, t1 = buck[s1][tagi].tag;
+    return !t0.prob_disjoint(t1)
+      || listindices1(r, t0, indices) || listindices1(r, t1, indices+size)
+      || orderindices(indices, size) || indices[0] == indices[size];
  }
  // need separate instance for accessing (differently typed) heap1
  bool listindices1(u32 r, const tree t, u32 *indices) {
    const slot0 *buck = hta.heap0[t.bucketid()];
    const u32 size = 1 << --r;
    u32 tagi = hashwords(hashsize(r));
-    return listindices0(r, buck[t.slotid0()][tagi].tag, indices)
-        || listindices0(r, buck[t.slotid1()][tagi].tag, indices+size)
+#ifdef CANTOR
+    u32 s1 = t.slotid1(), s0 = t.slotid0(s1);
+#else
+    u32 s1 = t.slotid1(), s0 = t.slotid0();
+#endif
+    tree t0 = buck[s0][tagi].tag, t1 = buck[s1][tagi].tag;
+    return listindices0(r, t0, indices) || listindices0(r, t1, indices+size)
        || orderindices(indices, size) || indices[0] == indices[size];
  }
  // check a candidate that resulted in 0 xor
@ -430,6 +458,8 @@ struct equi {
      return slot->bytes[prevbo] >> 4;
 #elif WN == 200 && RESTBITS == 8
      return (slot->bytes[prevbo] & 0xf) << 4 | slot->bytes[prevbo+1] >> 4;
+#elif WN == 200 && RESTBITS == 10
+      return (slot->bytes[prevbo] & 0x3f) << 4 | slot->bytes[prevbo+1] >> 4;
 #elif WN == 144 && RESTBITS == 4
      return slot->bytes[prevbo] & 0xf;
 #else
@ -442,6 +472,8 @@ struct equi {
      return slot->bytes[prevbo] & 0xf;
 #elif WN == 200 && RESTBITS == 8
      return slot->bytes[prevbo];
+#elif WN == 200 && RESTBITS == 10
+      return (slot->bytes[prevbo] & 0x3) << 8 | slot->bytes[prevbo+1];
 #elif WN == 144 && RESTBITS == 4
      return slot->bytes[prevbo] & 0xf;
 #else
@ -554,6 +586,8 @@ static const u32 NBLOCKS = (NHASHES+HASHESPERBLOCK-1)/HASHESPERBLOCK;
          // figure out bucket for this hash by extracting leading BUCKBITS bits
 #if BUCKBITS == 12 && RESTBITS == 8
          const u32 bucketid = ((u32)ph[0] << 4) | ph[1] >> 4;
+#elif BUCKBITS == 10 && RESTBITS == 10
+          const u32 bucketid = ((u32)ph[0] << 2) | ph[1] >> 6;
 #elif BUCKBITS == 16 && RESTBITS == 4
          const u32 bucketid = ((u32)ph[0] << 8) | ph[1];
 #elif BUCKBITS == 20 && RESTBITS == 4
@ -601,6 +635,9 @@ static const u32 NBLOCKS = (NHASHES+HASHESPERBLOCK-1)/HASHESPERBLOCK;
 #if WN == 200 && BUCKBITS == 12 && RESTBITS == 8
          xorbucketid = (((u32)(bytes0[htl.prevbo+1] ^ bytes1[htl.prevbo+1]) & 0xf) << 8)
                             | (bytes0[htl.prevbo+2] ^ bytes1[htl.prevbo+2]);
+#elif WN == 200 && BUCKBITS == 10 && RESTBITS == 10
+          xorbucketid = (((u32)(bytes0[htl.prevbo+1] ^ bytes1[htl.prevbo+1]) & 0xf) << 6)
+                             | (bytes0[htl.prevbo+2] ^ bytes1[htl.prevbo+2]) >> 2;
 #elif WN == 144 && BUCKBITS == 20 && RESTBITS == 4
          xorbucketid = ((((u32)(bytes0[htl.prevbo+1] ^ bytes1[htl.prevbo+1]) << 8)
                              | (bytes0[htl.prevbo+2] ^ bytes1[htl.prevbo+2])) << 4)
@ -651,6 +688,9 @@ static const u32 NBLOCKS = (NHASHES+HASHESPERBLOCK-1)/HASHESPERBLOCK;
 #if WN == 200 && BUCKBITS == 12 && RESTBITS == 8
          xorbucketid = ((u32)(bytes0[htl.prevbo+1] ^ bytes1[htl.prevbo+1]) << 4)
                            | (bytes0[htl.prevbo+2] ^ bytes1[htl.prevbo+2]) >> 4;
+#elif WN == 200 && BUCKBITS == 10 && RESTBITS == 10
+          xorbucketid = ((u32)(bytes0[htl.prevbo+2] ^ bytes1[htl.prevbo+2]) << 2)
+                            | (bytes0[htl.prevbo+3] ^ bytes1[htl.prevbo+3]) >> 6;
 #elif WN == 144 && BUCKBITS == 20 && RESTBITS == 4
          xorbucketid = ((((u32)(bytes0[htl.prevbo+1] ^ bytes1[htl.prevbo+1]) << 8)
                              | (bytes0[htl.prevbo+2] ^ bytes1[htl.prevbo+2])) << 4)