drop OpenCL solvers
This commit is contained in:
parent
2a6eb0516c
commit
340064b9c6
Binary file not shown.
|
@ -1,150 +0,0 @@
|
|||
// Blake2-B CUDA Implementation
|
||||
// tpruvot@github July 2016
|
||||
// permission granted to use under MIT license
|
||||
// modified for use in Zcash by John Tromp September 2016
|
||||
|
||||
/**
|
||||
* uint2 direct ops by c++ operator definitions
|
||||
*/
|
||||
|
||||
// static __device__ __forceinline__ uint2 operator^ (uint2 a, uint2 b) {
|
||||
// return make_uint2(a.x ^ b.x, a.y ^ b.y);
|
||||
// }
|
||||
|
||||
// uint2 ROR/ROL methods
|
||||
uint2 ROR2(const uint2 a, const int offset) {
|
||||
uint2 result;
|
||||
if (!offset)
|
||||
result = a;
|
||||
else if (offset < 32) {
|
||||
result.y = ((a.y >> offset) | (a.x << (32 - offset)));
|
||||
result.x = ((a.x >> offset) | (a.y << (32 - offset)));
|
||||
} else if (offset == 32) {
|
||||
result.y = a.x;
|
||||
result.x = a.y;
|
||||
} else {
|
||||
result.y = ((a.x >> (offset - 32)) | (a.y << (64 - offset)));
|
||||
result.x = ((a.y >> (offset - 32)) | (a.x << (64 - offset)));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
uint2 SWAPUINT2(uint2 value) {
|
||||
uint2 result;
|
||||
result.x = value.y;
|
||||
result.y = value.x;
|
||||
return result;
|
||||
// return make_uint2(value.y, value.x);
|
||||
}
|
||||
|
||||
#define ROR24(u) ROR2(u,24)
|
||||
#define ROR16(u) ROR2(u,16)
|
||||
|
||||
__constant int8_t blake2b_sigma[12][16] = {
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
|
||||
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
|
||||
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
|
||||
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
|
||||
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
|
||||
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
|
||||
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
|
||||
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
|
||||
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
|
||||
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 } ,
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
|
||||
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }
|
||||
};
|
||||
|
||||
void G(const int32_t r, const int32_t i, uint64_t *a, uint64_t *b, uint64_t *c, uint64_t *d, uint64_t const m[16]) {
|
||||
*a += *b + m[ blake2b_sigma[r][2*i] ];
|
||||
((uint2*)d)[0] = SWAPUINT2( ((uint2*)d)[0] ^ ((uint2*)a)[0] );
|
||||
*c += *d;
|
||||
((uint2*)b)[0] = ROR24( ((uint2*)b)[0] ^ ((uint2*)c)[0] );
|
||||
*a += *b + m[ blake2b_sigma[r][2*i+1] ];
|
||||
((uint2*)d)[0] = ROR16( ((uint2*)d)[0] ^ ((uint2*)a)[0] );
|
||||
*c += *d;
|
||||
((uint2*)b)[0] = ROR2( ((uint2*)b)[0] ^ ((uint2*)c)[0], 63U);
|
||||
}
|
||||
|
||||
#define ROUND(r) \
|
||||
G(r, 0, &v[0], &v[4], &v[ 8], &v[12], m); \
|
||||
G(r, 1, &v[1], &v[5], &v[ 9], &v[13], m); \
|
||||
G(r, 2, &v[2], &v[6], &v[10], &v[14], m); \
|
||||
G(r, 3, &v[3], &v[7], &v[11], &v[15], m); \
|
||||
G(r, 4, &v[0], &v[5], &v[10], &v[15], m); \
|
||||
G(r, 5, &v[1], &v[6], &v[11], &v[12], m); \
|
||||
G(r, 6, &v[2], &v[7], &v[ 8], &v[13], m); \
|
||||
G(r, 7, &v[3], &v[4], &v[ 9], &v[14], m);
|
||||
|
||||
void blake2b_gpu_hash(blake2b_state *state, uint32_t idx, uint8_t *hash, uint32_t outlen) {
|
||||
const uint32_t leb = idx;
|
||||
*(uint32_t*)(state->buf + state->buflen) = leb;
|
||||
state->buflen += 4;
|
||||
state->counter += state->buflen;
|
||||
for (unsigned i = 0; i < BLAKE2B_BLOCKBYTES - state->buflen; i++)
|
||||
state->buf[i+state->buflen] = 0;
|
||||
|
||||
uint64_t *d_data = (uint64_t *)state->buf;
|
||||
uint64_t m[16];
|
||||
|
||||
m[0] = d_data[0];
|
||||
m[1] = d_data[1];
|
||||
m[2] = d_data[2];
|
||||
m[3] = d_data[3];
|
||||
m[4] = d_data[4];
|
||||
m[5] = d_data[5];
|
||||
m[6] = d_data[6];
|
||||
m[7] = d_data[7];
|
||||
m[8] = d_data[8];
|
||||
m[9] = d_data[9];
|
||||
m[10] = d_data[10];
|
||||
m[11] = d_data[11];
|
||||
m[12] = d_data[12];
|
||||
m[13] = d_data[13];
|
||||
m[14] = d_data[14];
|
||||
m[15] = d_data[15];
|
||||
|
||||
uint64_t v[16];
|
||||
|
||||
v[0] = state->h[0];
|
||||
v[1] = state->h[1];
|
||||
v[2] = state->h[2];
|
||||
v[3] = state->h[3];
|
||||
v[4] = state->h[4];
|
||||
v[5] = state->h[5];
|
||||
v[6] = state->h[6];
|
||||
v[7] = state->h[7];
|
||||
v[8] = 0x6a09e667f3bcc908;
|
||||
v[9] = 0xbb67ae8584caa73b;
|
||||
v[10] = 0x3c6ef372fe94f82b;
|
||||
v[11] = 0xa54ff53a5f1d36f1;
|
||||
v[12] = 0x510e527fade682d1 ^ state->counter;
|
||||
v[13] = 0x9b05688c2b3e6c1f;
|
||||
v[14] = 0x1f83d9abfb41bd6b ^ 0xffffffffffffffff;
|
||||
v[15] = 0x5be0cd19137e2179;
|
||||
|
||||
ROUND( 0 );
|
||||
ROUND( 1 );
|
||||
ROUND( 2 );
|
||||
ROUND( 3 );
|
||||
ROUND( 4 );
|
||||
ROUND( 5 );
|
||||
ROUND( 6 );
|
||||
ROUND( 7 );
|
||||
ROUND( 8 );
|
||||
ROUND( 9 );
|
||||
ROUND( 10 );
|
||||
ROUND( 11 );
|
||||
|
||||
state->h[0] ^= v[0] ^ v[ 8];
|
||||
state->h[1] ^= v[1] ^ v[ 9];
|
||||
state->h[2] ^= v[2] ^ v[10];
|
||||
state->h[3] ^= v[3] ^ v[11];
|
||||
state->h[4] ^= v[4] ^ v[12];
|
||||
state->h[5] ^= v[5] ^ v[13];
|
||||
state->h[6] ^= v[6] ^ v[14];
|
||||
state->h[7] ^= v[7] ^ v[15];
|
||||
|
||||
for (unsigned i = 0; i < outlen; i++)
|
||||
hash[i] = ((uint8_t*)state->h)[i];
|
||||
}
|
|
@ -1,156 +0,0 @@
|
|||
#if defined(__OPENCL_HOST__)
|
||||
#define __global
|
||||
#include "../blake2.h"
|
||||
#else
|
||||
typedef char int8_t;
|
||||
typedef uchar uint8_t;
|
||||
typedef short int16_t;
|
||||
typedef ushort uint16_t;
|
||||
typedef int int32_t;
|
||||
typedef uint uint32_t;
|
||||
typedef long int64_t;
|
||||
typedef ulong uint64_t;
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define ALIGN(x) __declspec(align(x))
|
||||
#else
|
||||
#define ALIGN(x) __attribute__ ((__aligned__(x)))
|
||||
#endif
|
||||
|
||||
enum blake2b_constant
|
||||
{
|
||||
BLAKE2B_BLOCKBYTES = 128,
|
||||
BLAKE2B_OUTBYTES = 64,
|
||||
BLAKE2B_KEYBYTES = 64,
|
||||
BLAKE2B_SALTBYTES = 16,
|
||||
BLAKE2B_PERSONALBYTES = 16
|
||||
};
|
||||
|
||||
#pragma pack(push, 1)
|
||||
ALIGN( 64 ) typedef struct __blake2b_state {
|
||||
uint64_t h[8];
|
||||
uint8_t buf[BLAKE2B_BLOCKBYTES];
|
||||
uint16_t counter;
|
||||
uint8_t buflen;
|
||||
uint8_t lastblock;
|
||||
} blake2b_state;
|
||||
#pragma pack(pop)
|
||||
#endif
|
||||
|
||||
#define COLLISION_BIT_LENGTH (WN / (WK+1))
|
||||
#define COLLISION_BYTE_LENGTH ((COLLISION_BIT_LENGTH+7)/8)
|
||||
#define FINAL_FULL_WIDTH (2*COLLISION_BYTE_LENGTH+sizeof(uint32_t)*(1 << (WK)))
|
||||
|
||||
|
||||
#define NDIGITS (WK+1)
|
||||
#define DIGITBITS (WN/(NDIGITS))
|
||||
#define PROOFSIZE (1u<<WK)
|
||||
#define COMPRESSED_PROOFSIZE ((COLLISION_BIT_LENGTH+1)*PROOFSIZE*4/(8*sizeof(uint32_t)))
|
||||
#define BASE (1u<<DIGITBITS)
|
||||
#define NHASHES (2u*BASE)
|
||||
#define HASHESPERBLAKE (512/WN)
|
||||
#define HASHOUT (HASHESPERBLAKE*WN/8)
|
||||
|
||||
// 2_log of number of buckets
|
||||
#define BUCKBITS (DIGITBITS-RESTBITS)
|
||||
|
||||
// number of buckets
|
||||
#define NBUCKETS (1<<BUCKBITS)
|
||||
// 2_log of number of slots per bucket
|
||||
#define SLOTBITS (RESTBITS+1+1)
|
||||
// number of slots per bucket
|
||||
#define NSLOTS (1u<<SLOTBITS)
|
||||
// number of per-xhash slots
|
||||
#define XFULL 16
|
||||
// SLOTBITS mask
|
||||
#define SLOTMASK (NSLOTS-1)
|
||||
// number of possible values of xhash (rest of n) bits
|
||||
#define NRESTS (1u<<RESTBITS)
|
||||
// number of blocks of hashes extracted from single 512 bit blake2b output
|
||||
#define NBLOCKS ((NHASHES+HASHESPERBLAKE-1)/HASHESPERBLAKE)
|
||||
// nothing larger found in 100000 runs
|
||||
#define MAXSOLS 8
|
||||
|
||||
#define WORDS(bits) ((bits + 31) / 32)
|
||||
#define HASHWORDS0 WORDS(WN - DIGITBITS + RESTBITS)
|
||||
#define HASHWORDS1 WORDS(WN - 2*DIGITBITS + RESTBITS)
|
||||
|
||||
typedef uint32_t proof[PROOFSIZE];
|
||||
|
||||
// tree = | xhash(RESTBITS) | slotid1(SLOTBITS) | slotid0(SLOTBITS) | bucketid(BUCKBITS) |
|
||||
// index = | bucketid(BUCKBITS) | slotid0(SLOTBITS) |
|
||||
typedef uint32_t tree;
|
||||
|
||||
typedef union hashunit {
|
||||
uint32_t word;
|
||||
uint8_t bytes[4];
|
||||
} hashunit;
|
||||
|
||||
typedef struct slot0 {
|
||||
tree attr;
|
||||
hashunit hash[HASHWORDS0];
|
||||
} slot0;
|
||||
|
||||
typedef struct slot1 {
|
||||
tree attr;
|
||||
hashunit hash[HASHWORDS1];
|
||||
} slot1;
|
||||
|
||||
// a bucket is NSLOTS treenodes
|
||||
typedef slot0 bucket0[NSLOTS];
|
||||
typedef slot1 bucket1[NSLOTS];
|
||||
// the N-bit hash consists of K+1 n-bit "digits"
|
||||
// each of which corresponds to a layer of NBUCKETS buckets
|
||||
typedef bucket0 digit0[NBUCKETS];
|
||||
typedef bucket1 digit1[NBUCKETS];
|
||||
|
||||
// manages hash and tree data
|
||||
typedef struct htalloc {
|
||||
__global bucket0 *trees0[(WK+1)/2];
|
||||
__global bucket1 *trees1[WK/2];
|
||||
} htalloc;
|
||||
|
||||
typedef uint32_t bsizes[NBUCKETS];
|
||||
|
||||
|
||||
typedef struct htlayout {
|
||||
htalloc hta;
|
||||
uint32_t prevhashunits;
|
||||
uint32_t nexthashunits;
|
||||
uint32_t dunits;
|
||||
uint32_t prevbo;
|
||||
uint32_t nextbo;
|
||||
} htlayout;
|
||||
|
||||
#if RESTBITS <= 6
|
||||
typedef uint8_t xslot;
|
||||
#else
|
||||
typedef uint16_t xslot;
|
||||
#endif
|
||||
|
||||
typedef struct collisiondata {
|
||||
#ifdef XBITMAP
|
||||
#if NSLOTS > 64
|
||||
#error cant use XBITMAP with more than 64 slots
|
||||
#endif
|
||||
uint64_t xhashmap[NRESTS];
|
||||
uint64_t xmap;
|
||||
#else
|
||||
xslot nxhashslots[NRESTS];
|
||||
xslot xhashslots[NRESTS][XFULL];
|
||||
xslot *xx;
|
||||
uint32_t n0;
|
||||
uint32_t n1;
|
||||
#endif
|
||||
uint32_t s0;
|
||||
} collisiondata;
|
||||
|
||||
|
||||
typedef struct equi {
|
||||
blake2b_state blake_ctx;
|
||||
htalloc hta;
|
||||
__global bsizes *nslots;
|
||||
__global proof *sols;
|
||||
uint32_t nsols;
|
||||
uint32_t nthreads;
|
||||
} equi;
|
File diff suppressed because it is too large
Load Diff
Binary file not shown.
|
@ -1,150 +0,0 @@
|
|||
// Blake2-B CUDA Implementation
|
||||
// tpruvot@github July 2016
|
||||
// permission granted to use under MIT license
|
||||
// modified for use in Zcash by John Tromp September 2016
|
||||
|
||||
/**
|
||||
* uint2 direct ops by c++ operator definitions
|
||||
*/
|
||||
|
||||
// static __device__ __forceinline__ uint2 operator^ (uint2 a, uint2 b) {
|
||||
// return make_uint2(a.x ^ b.x, a.y ^ b.y);
|
||||
// }
|
||||
|
||||
// uint2 ROR/ROL methods
|
||||
uint2 ROR2(const uint2 a, const int offset) {
|
||||
uint2 result;
|
||||
if (!offset)
|
||||
result = a;
|
||||
else if (offset < 32) {
|
||||
result.y = ((a.y >> offset) | (a.x << (32 - offset)));
|
||||
result.x = ((a.x >> offset) | (a.y << (32 - offset)));
|
||||
} else if (offset == 32) {
|
||||
result.y = a.x;
|
||||
result.x = a.y;
|
||||
} else {
|
||||
result.y = ((a.x >> (offset - 32)) | (a.y << (64 - offset)));
|
||||
result.x = ((a.y >> (offset - 32)) | (a.x << (64 - offset)));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
uint2 SWAPUINT2(uint2 value) {
|
||||
uint2 result;
|
||||
result.x = value.y;
|
||||
result.y = value.x;
|
||||
return result;
|
||||
// return make_uint2(value.y, value.x);
|
||||
}
|
||||
|
||||
#define ROR24(u) ROR2(u,24)
|
||||
#define ROR16(u) ROR2(u,16)
|
||||
|
||||
__constant int8_t blake2b_sigma[12][16] = {
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
|
||||
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
|
||||
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
|
||||
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
|
||||
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
|
||||
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
|
||||
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
|
||||
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
|
||||
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
|
||||
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 } ,
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
|
||||
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }
|
||||
};
|
||||
|
||||
void G(const int32_t r, const int32_t i, uint64_t *a, uint64_t *b, uint64_t *c, uint64_t *d, uint64_t const m[16]) {
|
||||
*a += *b + m[ blake2b_sigma[r][2*i] ];
|
||||
((uint2*)d)[0] = SWAPUINT2( ((uint2*)d)[0] ^ ((uint2*)a)[0] );
|
||||
*c += *d;
|
||||
((uint2*)b)[0] = ROR24( ((uint2*)b)[0] ^ ((uint2*)c)[0] );
|
||||
*a += *b + m[ blake2b_sigma[r][2*i+1] ];
|
||||
((uint2*)d)[0] = ROR16( ((uint2*)d)[0] ^ ((uint2*)a)[0] );
|
||||
*c += *d;
|
||||
((uint2*)b)[0] = ROR2( ((uint2*)b)[0] ^ ((uint2*)c)[0], 63U);
|
||||
}
|
||||
|
||||
#define ROUND(r) \
|
||||
G(r, 0, &v[0], &v[4], &v[ 8], &v[12], m); \
|
||||
G(r, 1, &v[1], &v[5], &v[ 9], &v[13], m); \
|
||||
G(r, 2, &v[2], &v[6], &v[10], &v[14], m); \
|
||||
G(r, 3, &v[3], &v[7], &v[11], &v[15], m); \
|
||||
G(r, 4, &v[0], &v[5], &v[10], &v[15], m); \
|
||||
G(r, 5, &v[1], &v[6], &v[11], &v[12], m); \
|
||||
G(r, 6, &v[2], &v[7], &v[ 8], &v[13], m); \
|
||||
G(r, 7, &v[3], &v[4], &v[ 9], &v[14], m);
|
||||
|
||||
void blake2b_gpu_hash(blake2b_state *state, uint32_t idx, uint8_t *hash, uint32_t outlen) {
|
||||
const uint32_t leb = idx;
|
||||
*(uint32_t*)(state->buf + state->buflen) = leb;
|
||||
state->buflen += 4;
|
||||
state->counter += state->buflen;
|
||||
for (unsigned i = 0; i < BLAKE2B_BLOCKBYTES - state->buflen; i++)
|
||||
state->buf[i+state->buflen] = 0;
|
||||
|
||||
uint64_t *d_data = (uint64_t *)state->buf;
|
||||
uint64_t m[16];
|
||||
|
||||
m[0] = d_data[0];
|
||||
m[1] = d_data[1];
|
||||
m[2] = d_data[2];
|
||||
m[3] = d_data[3];
|
||||
m[4] = d_data[4];
|
||||
m[5] = d_data[5];
|
||||
m[6] = d_data[6];
|
||||
m[7] = d_data[7];
|
||||
m[8] = d_data[8];
|
||||
m[9] = d_data[9];
|
||||
m[10] = d_data[10];
|
||||
m[11] = d_data[11];
|
||||
m[12] = d_data[12];
|
||||
m[13] = d_data[13];
|
||||
m[14] = d_data[14];
|
||||
m[15] = d_data[15];
|
||||
|
||||
uint64_t v[16];
|
||||
|
||||
v[0] = state->h[0];
|
||||
v[1] = state->h[1];
|
||||
v[2] = state->h[2];
|
||||
v[3] = state->h[3];
|
||||
v[4] = state->h[4];
|
||||
v[5] = state->h[5];
|
||||
v[6] = state->h[6];
|
||||
v[7] = state->h[7];
|
||||
v[8] = 0x6a09e667f3bcc908;
|
||||
v[9] = 0xbb67ae8584caa73b;
|
||||
v[10] = 0x3c6ef372fe94f82b;
|
||||
v[11] = 0xa54ff53a5f1d36f1;
|
||||
v[12] = 0x510e527fade682d1 ^ state->counter;
|
||||
v[13] = 0x9b05688c2b3e6c1f;
|
||||
v[14] = 0x1f83d9abfb41bd6b ^ 0xffffffffffffffff;
|
||||
v[15] = 0x5be0cd19137e2179;
|
||||
|
||||
ROUND( 0 );
|
||||
ROUND( 1 );
|
||||
ROUND( 2 );
|
||||
ROUND( 3 );
|
||||
ROUND( 4 );
|
||||
ROUND( 5 );
|
||||
ROUND( 6 );
|
||||
ROUND( 7 );
|
||||
ROUND( 8 );
|
||||
ROUND( 9 );
|
||||
ROUND( 10 );
|
||||
ROUND( 11 );
|
||||
|
||||
state->h[0] ^= v[0] ^ v[ 8];
|
||||
state->h[1] ^= v[1] ^ v[ 9];
|
||||
state->h[2] ^= v[2] ^ v[10];
|
||||
state->h[3] ^= v[3] ^ v[11];
|
||||
state->h[4] ^= v[4] ^ v[12];
|
||||
state->h[5] ^= v[5] ^ v[13];
|
||||
state->h[6] ^= v[6] ^ v[14];
|
||||
state->h[7] ^= v[7] ^ v[15];
|
||||
|
||||
for (unsigned i = 0; i < outlen; i++)
|
||||
hash[i] = ((uint8_t*)state->h)[i];
|
||||
}
|
|
@ -1,156 +0,0 @@
|
|||
#if defined(__OPENCL_HOST__)
|
||||
#define __global
|
||||
#include "../blake2.h"
|
||||
#else
|
||||
typedef char int8_t;
|
||||
typedef uchar uint8_t;
|
||||
typedef short int16_t;
|
||||
typedef ushort uint16_t;
|
||||
typedef int int32_t;
|
||||
typedef uint uint32_t;
|
||||
typedef long int64_t;
|
||||
typedef ulong uint64_t;
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define ALIGN(x) __declspec(align(x))
|
||||
#else
|
||||
#define ALIGN(x) __attribute__ ((__aligned__(x)))
|
||||
#endif
|
||||
|
||||
enum blake2b_constant
|
||||
{
|
||||
BLAKE2B_BLOCKBYTES = 128,
|
||||
BLAKE2B_OUTBYTES = 64,
|
||||
BLAKE2B_KEYBYTES = 64,
|
||||
BLAKE2B_SALTBYTES = 16,
|
||||
BLAKE2B_PERSONALBYTES = 16
|
||||
};
|
||||
|
||||
#pragma pack(push, 1)
|
||||
ALIGN( 64 ) typedef struct __blake2b_state {
|
||||
uint64_t h[8];
|
||||
uint8_t buf[BLAKE2B_BLOCKBYTES];
|
||||
uint16_t counter;
|
||||
uint8_t buflen;
|
||||
uint8_t lastblock;
|
||||
} blake2b_state;
|
||||
#pragma pack(pop)
|
||||
#endif
|
||||
|
||||
#define COLLISION_BIT_LENGTH (WN / (WK+1))
|
||||
#define COLLISION_BYTE_LENGTH ((COLLISION_BIT_LENGTH+7)/8)
|
||||
#define FINAL_FULL_WIDTH (2*COLLISION_BYTE_LENGTH+sizeof(uint32_t)*(1 << (WK)))
|
||||
|
||||
|
||||
#define NDIGITS (WK+1)
|
||||
#define DIGITBITS (WN/(NDIGITS))
|
||||
#define PROOFSIZE (1u<<WK)
|
||||
#define COMPRESSED_PROOFSIZE ((COLLISION_BIT_LENGTH+1)*PROOFSIZE*4/(8*sizeof(uint32_t)))
|
||||
#define BASE (1u<<DIGITBITS)
|
||||
#define NHASHES (2u*BASE)
|
||||
#define HASHESPERBLAKE (512/WN)
|
||||
#define HASHOUT (HASHESPERBLAKE*WN/8)
|
||||
|
||||
// 2_log of number of buckets
|
||||
#define BUCKBITS (DIGITBITS-RESTBITS)
|
||||
|
||||
// number of buckets
|
||||
#define NBUCKETS (1<<BUCKBITS)
|
||||
// 2_log of number of slots per bucket
|
||||
#define SLOTBITS (RESTBITS+1+1)
|
||||
// number of slots per bucket
|
||||
#define NSLOTS (1u<<SLOTBITS)
|
||||
// number of per-xhash slots
|
||||
#define XFULL 16
|
||||
// SLOTBITS mask
|
||||
#define SLOTMASK (NSLOTS-1)
|
||||
// number of possible values of xhash (rest of n) bits
|
||||
#define NRESTS (1u<<RESTBITS)
|
||||
// number of blocks of hashes extracted from single 512 bit blake2b output
|
||||
#define NBLOCKS ((NHASHES+HASHESPERBLAKE-1)/HASHESPERBLAKE)
|
||||
// nothing larger found in 100000 runs
|
||||
#define MAXSOLS 8
|
||||
|
||||
#define WORDS(bits) ((bits + 31) / 32)
|
||||
#define HASHWORDS0 WORDS(WN - DIGITBITS + RESTBITS)
|
||||
#define HASHWORDS1 WORDS(WN - 2*DIGITBITS + RESTBITS)
|
||||
|
||||
typedef uint32_t proof[PROOFSIZE];
|
||||
|
||||
// tree = | xhash(RESTBITS) | slotid1(SLOTBITS) | slotid0(SLOTBITS) | bucketid(BUCKBITS) |
|
||||
// index = | bucketid(BUCKBITS) | slotid0(SLOTBITS) |
|
||||
typedef uint32_t tree;
|
||||
|
||||
typedef union hashunit {
|
||||
uint32_t word;
|
||||
uint8_t bytes[4];
|
||||
} hashunit;
|
||||
|
||||
typedef struct slot0 {
|
||||
tree attr;
|
||||
hashunit hash[HASHWORDS0];
|
||||
} slot0;
|
||||
|
||||
typedef struct slot1 {
|
||||
tree attr;
|
||||
hashunit hash[HASHWORDS1];
|
||||
} slot1;
|
||||
|
||||
// a bucket is NSLOTS treenodes
|
||||
typedef slot0 bucket0[NSLOTS];
|
||||
typedef slot1 bucket1[NSLOTS];
|
||||
// the N-bit hash consists of K+1 n-bit "digits"
|
||||
// each of which corresponds to a layer of NBUCKETS buckets
|
||||
typedef bucket0 digit0[NBUCKETS];
|
||||
typedef bucket1 digit1[NBUCKETS];
|
||||
|
||||
// manages hash and tree data
|
||||
typedef struct htalloc {
|
||||
__global bucket0 *trees0[(WK+1)/2];
|
||||
__global bucket1 *trees1[WK/2];
|
||||
} htalloc;
|
||||
|
||||
typedef uint32_t bsizes[NBUCKETS];
|
||||
|
||||
|
||||
typedef struct htlayout {
|
||||
htalloc hta;
|
||||
uint32_t prevhashunits;
|
||||
uint32_t nexthashunits;
|
||||
uint32_t dunits;
|
||||
uint32_t prevbo;
|
||||
uint32_t nextbo;
|
||||
} htlayout;
|
||||
|
||||
#if RESTBITS <= 6
|
||||
typedef uint8_t xslot;
|
||||
#else
|
||||
typedef uint16_t xslot;
|
||||
#endif
|
||||
|
||||
typedef struct collisiondata {
|
||||
#ifdef XBITMAP
|
||||
#if NSLOTS > 64
|
||||
#error cant use XBITMAP with more than 64 slots
|
||||
#endif
|
||||
uint64_t xhashmap[NRESTS];
|
||||
uint64_t xmap;
|
||||
#else
|
||||
xslot nxhashslots[NRESTS];
|
||||
xslot xhashslots[NRESTS][XFULL];
|
||||
xslot *xx;
|
||||
uint32_t n0;
|
||||
uint32_t n1;
|
||||
#endif
|
||||
uint32_t s0;
|
||||
} collisiondata;
|
||||
|
||||
|
||||
typedef struct equi {
|
||||
blake2b_state blake_ctx;
|
||||
htalloc hta;
|
||||
__global bsizes *nslots;
|
||||
__global proof *sols;
|
||||
uint32_t nsols;
|
||||
uint32_t nthreads;
|
||||
} equi;
|
File diff suppressed because it is too large
Load Diff
|
@ -1,555 +0,0 @@
|
|||
# 1 "input.cl"
|
||||
# 1 "<built-in>"
|
||||
# 1 "<command-line>"
|
||||
# 1 "/usr/include/stdc-predef.h" 1 3 4
|
||||
# 1 "<command-line>" 2
|
||||
# 1 "input.cl"
|
||||
# 1 "param.h" 1
|
||||
# 60 "param.h"
|
||||
typedef struct sols_s
|
||||
{
|
||||
uint nr;
|
||||
uint likely_invalids;
|
||||
uchar valid[2000];
|
||||
uint values[2000][(1 << 9)];
|
||||
} sols_t;
|
||||
# 2 "input.cl" 2
|
||||
# 36 "input.cl"
|
||||
__constant ulong blake_iv[] =
|
||||
{
|
||||
0x6a09e667f3bcc908, 0xbb67ae8584caa73b,
|
||||
0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1,
|
||||
0x510e527fade682d1, 0x9b05688c2b3e6c1f,
|
||||
0x1f83d9abfb41bd6b, 0x5be0cd19137e2179,
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
__kernel
|
||||
void kernel_init_ht(__global char *ht)
|
||||
{
|
||||
uint tid = get_global_id(0);
|
||||
*(__global uint *)(ht + tid * ((1 << (((200 / (9 + 1)) + 1) - 20)) * 9) * 32) = 0;
|
||||
}
|
||||
# 80 "input.cl"
|
||||
uint ht_store(uint round, __global char *ht, uint i,
|
||||
ulong xi0, ulong xi1, ulong xi2, ulong xi3)
|
||||
{
|
||||
uint row;
|
||||
__global char *p;
|
||||
uint cnt;
|
||||
# 111 "input.cl"
|
||||
if (!(round % 2))
|
||||
row = (xi0 & 0xffff) | ((xi0 & 0xf00000) >> 4);
|
||||
else
|
||||
row = ((xi0 & 0xf0000) >> 0) |
|
||||
((xi0 & 0xf00) << 4) | ((xi0 & 0xf00000) >> 12) |
|
||||
((xi0 & 0xf) << 4) | ((xi0 & 0xf000) >> 12);
|
||||
|
||||
|
||||
|
||||
xi0 = (xi0 >> 16) | (xi1 << (64 - 16));
|
||||
xi1 = (xi1 >> 16) | (xi2 << (64 - 16));
|
||||
xi2 = (xi2 >> 16) | (xi3 << (64 - 16));
|
||||
p = ht + row * ((1 << (((200 / (9 + 1)) + 1) - 20)) * 9) * 32;
|
||||
cnt = atomic_inc((__global uint *)p);
|
||||
if (cnt >= ((1 << (((200 / (9 + 1)) + 1) - 20)) * 9))
|
||||
return 1;
|
||||
p += cnt * 32 + (8 + ((round) / 2) * 4);
|
||||
|
||||
*(__global uint *)(p - 4) = i;
|
||||
if (round == 0 || round == 1)
|
||||
{
|
||||
|
||||
*(__global ulong *)(p + 0) = xi0;
|
||||
*(__global ulong *)(p + 8) = xi1;
|
||||
*(__global ulong *)(p + 16) = xi2;
|
||||
}
|
||||
else if (round == 2)
|
||||
{
|
||||
|
||||
*(__global ulong *)(p + 0) = xi0;
|
||||
*(__global ulong *)(p + 8) = xi1;
|
||||
*(__global uint *)(p + 16) = xi2;
|
||||
}
|
||||
else if (round == 3 || round == 4)
|
||||
{
|
||||
|
||||
*(__global ulong *)(p + 0) = xi0;
|
||||
*(__global ulong *)(p + 8) = xi1;
|
||||
|
||||
}
|
||||
else if (round == 5)
|
||||
{
|
||||
|
||||
*(__global ulong *)(p + 0) = xi0;
|
||||
*(__global uint *)(p + 8) = xi1;
|
||||
}
|
||||
else if (round == 6 || round == 7)
|
||||
{
|
||||
|
||||
*(__global ulong *)(p + 0) = xi0;
|
||||
}
|
||||
else if (round == 8)
|
||||
{
|
||||
|
||||
*(__global uint *)(p + 0) = xi0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
# 188 "input.cl"
|
||||
__kernel __attribute__((reqd_work_group_size(64, 1, 1)))
|
||||
void kernel_round0(__global ulong *blake_state, __global char *ht,
|
||||
__global uint *debug)
|
||||
{
|
||||
uint tid = get_global_id(0);
|
||||
ulong v[16];
|
||||
uint inputs_per_thread = (1 << (200 / (9 + 1))) / get_global_size(0);
|
||||
uint input = tid * inputs_per_thread;
|
||||
uint input_end = (tid + 1) * inputs_per_thread;
|
||||
uint dropped = 0;
|
||||
while (input < input_end)
|
||||
{
|
||||
|
||||
|
||||
ulong word1 = (ulong)input << 32;
|
||||
|
||||
v[0] = blake_state[0];
|
||||
v[1] = blake_state[1];
|
||||
v[2] = blake_state[2];
|
||||
v[3] = blake_state[3];
|
||||
v[4] = blake_state[4];
|
||||
v[5] = blake_state[5];
|
||||
v[6] = blake_state[6];
|
||||
v[7] = blake_state[7];
|
||||
v[8] = blake_iv[0];
|
||||
v[9] = blake_iv[1];
|
||||
v[10] = blake_iv[2];
|
||||
v[11] = blake_iv[3];
|
||||
v[12] = blake_iv[4];
|
||||
v[13] = blake_iv[5];
|
||||
v[14] = blake_iv[6];
|
||||
v[15] = blake_iv[7];
|
||||
|
||||
v[12] ^= 140 + 4 ;
|
||||
|
||||
v[14] ^= -1;
|
||||
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + word1); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + word1); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + word1); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + word1); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + word1); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + word1); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + word1); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + word1); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + word1); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + word1); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + word1); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + word1); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
|
||||
|
||||
ulong h[7];
|
||||
h[0] = blake_state[0] ^ v[0] ^ v[8];
|
||||
h[1] = blake_state[1] ^ v[1] ^ v[9];
|
||||
h[2] = blake_state[2] ^ v[2] ^ v[10];
|
||||
h[3] = blake_state[3] ^ v[3] ^ v[11];
|
||||
h[4] = blake_state[4] ^ v[4] ^ v[12];
|
||||
h[5] = blake_state[5] ^ v[5] ^ v[13];
|
||||
h[6] = (blake_state[6] ^ v[6] ^ v[14]) & 0xffff;
|
||||
|
||||
|
||||
|
||||
dropped += ht_store(0, ht, input * 2,
|
||||
h[0],
|
||||
h[1],
|
||||
h[2],
|
||||
h[3]);
|
||||
dropped += ht_store(0, ht, input * 2 + 1,
|
||||
(h[3] >> 8) | (h[4] << (64 - 8)),
|
||||
(h[4] >> 8) | (h[5] << (64 - 8)),
|
||||
(h[5] >> 8) | (h[6] << (64 - 8)),
|
||||
(h[6] >> 8));
|
||||
|
||||
|
||||
|
||||
|
||||
input++;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
# 415 "input.cl"
|
||||
uint xor_and_store(uint round, __global char *ht_dst, uint row,
|
||||
uint slot_a, uint slot_b, __global ulong *a, __global ulong *b)
|
||||
{
|
||||
ulong xi0, xi1, xi2;
|
||||
|
||||
|
||||
|
||||
if (round == 1 || round == 2)
|
||||
{
|
||||
|
||||
xi0 = *(a++) ^ *(b++);
|
||||
xi1 = *(a++) ^ *(b++);
|
||||
xi2 = *a ^ *b;
|
||||
if (round == 2)
|
||||
{
|
||||
|
||||
xi0 = (xi0 >> 8) | (xi1 << (64 - 8));
|
||||
xi1 = (xi1 >> 8) | (xi2 << (64 - 8));
|
||||
xi2 = (xi2 >> 8);
|
||||
}
|
||||
}
|
||||
else if (round == 3)
|
||||
{
|
||||
|
||||
xi0 = *a++ ^ *b++;
|
||||
xi1 = *a++ ^ *b++;
|
||||
xi2 = *(__global uint *)a ^ *(__global uint *)b;
|
||||
}
|
||||
else if (round == 4 || round == 5)
|
||||
{
|
||||
|
||||
xi0 = *a++ ^ *b++;
|
||||
xi1 = *a ^ *b;
|
||||
xi2 = 0;
|
||||
if (round == 4)
|
||||
{
|
||||
|
||||
xi0 = (xi0 >> 8) | (xi1 << (64 - 8));
|
||||
xi1 = (xi1 >> 8);
|
||||
}
|
||||
}
|
||||
else if (round == 6)
|
||||
{
|
||||
|
||||
xi0 = *a++ ^ *b++;
|
||||
xi1 = *(__global uint *)a ^ *(__global uint *)b;
|
||||
xi2 = 0;
|
||||
if (round == 6)
|
||||
{
|
||||
|
||||
xi0 = (xi0 >> 8) | (xi1 << (64 - 8));
|
||||
xi1 = (xi1 >> 8);
|
||||
}
|
||||
}
|
||||
else if (round == 7 || round == 8)
|
||||
{
|
||||
|
||||
xi0 = *a ^ *b;
|
||||
xi1 = 0;
|
||||
xi2 = 0;
|
||||
if (round == 8)
|
||||
{
|
||||
|
||||
xi0 = (xi0 >> 8);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (!xi0 && !xi1)
|
||||
return 0;
|
||||
|
||||
|
||||
|
||||
return ht_store(round, ht_dst, ((row << 12) | ((slot_b & 0x3f) << 6) | (slot_a & 0x3f)),
|
||||
xi0, xi1, xi2, 0);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void equihash_round(uint round, __global char *ht_src, __global char *ht_dst,
|
||||
__global uint *debug)
|
||||
{
|
||||
uint tid = get_global_id(0);
|
||||
uint tlid = get_local_id(0);
|
||||
__global char *p;
|
||||
uint cnt;
|
||||
uchar first_words[((1 << (((200 / (9 + 1)) + 1) - 20)) * 9)];
|
||||
uchar mask;
|
||||
uint i, j;
|
||||
|
||||
|
||||
ushort collisions[((1 << (((200 / (9 + 1)) + 1) - 20)) * 9) * 3];
|
||||
uint nr_coll = 0;
|
||||
uint n;
|
||||
uint dropped_coll, dropped_stor;
|
||||
__global ulong *a, *b;
|
||||
uint xi_offset;
|
||||
|
||||
xi_offset = (8 + ((round - 1) / 2) * 4);
|
||||
# 524 "input.cl"
|
||||
mask = 0;
|
||||
|
||||
|
||||
|
||||
p = (ht_src + tid * ((1 << (((200 / (9 + 1)) + 1) - 20)) * 9) * 32);
|
||||
cnt = *(__global uint *)p;
|
||||
cnt = min(cnt, (uint)((1 << (((200 / (9 + 1)) + 1) - 20)) * 9));
|
||||
p += xi_offset;
|
||||
for (i = 0; i < cnt; i++, p += 32)
|
||||
first_words[i] = *(__global uchar *)p;
|
||||
|
||||
nr_coll = 0;
|
||||
dropped_coll = 0;
|
||||
for (i = 0; i < cnt; i++)
|
||||
for (j = i + 1; j < cnt; j++)
|
||||
if ((first_words[i] & mask) ==
|
||||
(first_words[j] & mask))
|
||||
{
|
||||
|
||||
if (nr_coll >= sizeof (collisions) / sizeof (*collisions))
|
||||
dropped_coll++;
|
||||
else
|
||||
|
||||
|
||||
collisions[nr_coll++] =
|
||||
((ushort)j << 8) | ((ushort)i & 0xff);
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
dropped_stor = 0;
|
||||
for (n = 0; n < nr_coll; n++)
|
||||
{
|
||||
i = collisions[n] & 0xff;
|
||||
j = collisions[n] >> 8;
|
||||
a = (__global ulong *)
|
||||
(ht_src + tid * ((1 << (((200 / (9 + 1)) + 1) - 20)) * 9) * 32 + i * 32 + xi_offset);
|
||||
b = (__global ulong *)
|
||||
(ht_src + tid * ((1 << (((200 / (9 + 1)) + 1) - 20)) * 9) * 32 + j * 32 + xi_offset);
|
||||
dropped_stor += xor_and_store(round, ht_dst, tid, i, j, a, b);
|
||||
}
|
||||
if (round < 8)
|
||||
|
||||
*(__global uint *)(ht_src + tid * ((1 << (((200 / (9 + 1)) + 1) - 20)) * 9) * 32) = 0;
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
# 585 "input.cl"
|
||||
__kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round1(__global char *ht_src, __global char *ht_dst, __global uint *debug) { equihash_round(1, ht_src, ht_dst, debug); }
|
||||
__kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round2(__global char *ht_src, __global char *ht_dst, __global uint *debug) { equihash_round(2, ht_src, ht_dst, debug); }
|
||||
__kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round3(__global char *ht_src, __global char *ht_dst, __global uint *debug) { equihash_round(3, ht_src, ht_dst, debug); }
|
||||
__kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round4(__global char *ht_src, __global char *ht_dst, __global uint *debug) { equihash_round(4, ht_src, ht_dst, debug); }
|
||||
__kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round5(__global char *ht_src, __global char *ht_dst, __global uint *debug) { equihash_round(5, ht_src, ht_dst, debug); }
|
||||
__kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round6(__global char *ht_src, __global char *ht_dst, __global uint *debug) { equihash_round(6, ht_src, ht_dst, debug); }
|
||||
__kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round7(__global char *ht_src, __global char *ht_dst, __global uint *debug) { equihash_round(7, ht_src, ht_dst, debug); }
|
||||
|
||||
|
||||
__kernel __attribute__((reqd_work_group_size(64, 1, 1)))
|
||||
void kernel_round8(__global char *ht_src, __global char *ht_dst,
|
||||
__global uint *debug, __global sols_t *sols)
|
||||
{
|
||||
uint tid = get_global_id(0);
|
||||
equihash_round(8, ht_src, ht_dst, debug);
|
||||
if (!tid)
|
||||
sols->nr = sols->likely_invalids = 0;
|
||||
}
|
||||
|
||||
uint expand_ref(__global char *ht, uint xi_offset, uint row, uint slot)
|
||||
{
|
||||
return *(__global uint *)(ht + row * ((1 << (((200 / (9 + 1)) + 1) - 20)) * 9) * 32 +
|
||||
slot * 32 + xi_offset - 4);
|
||||
}
|
||||
|
||||
void expand_refs(__global uint *ins, uint nr_inputs, __global char **htabs,
|
||||
uint round)
|
||||
{
|
||||
__global char *ht = htabs[round % 2];
|
||||
uint i = nr_inputs - 1;
|
||||
uint j = nr_inputs * 2 - 1;
|
||||
uint xi_offset = (8 + ((round) / 2) * 4);
|
||||
do
|
||||
{
|
||||
ins[j] = expand_ref(ht, xi_offset,
|
||||
(ins[i] >> 12), ((ins[i] >> 6) & 0x3f));
|
||||
ins[j - 1] = expand_ref(ht, xi_offset,
|
||||
(ins[i] >> 12), (ins[i] & 0x3f));
|
||||
if (!i)
|
||||
break ;
|
||||
i--;
|
||||
j -= 2;
|
||||
}
|
||||
while (1);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void potential_sol(__global char **htabs, __global sols_t *sols,
|
||||
uint ref0, uint ref1)
|
||||
{
|
||||
uint sol_i;
|
||||
uint nr_values;
|
||||
sol_i = atomic_inc(&sols->nr);
|
||||
if (sol_i >= 2000)
|
||||
return ;
|
||||
sols->valid[sol_i] = 0;
|
||||
nr_values = 0;
|
||||
sols->values[sol_i][nr_values++] = ref0;
|
||||
sols->values[sol_i][nr_values++] = ref1;
|
||||
uint round = 9 - 1;
|
||||
do
|
||||
{
|
||||
round--;
|
||||
expand_refs(&(sols->values[sol_i][0]), nr_values, htabs, round);
|
||||
nr_values *= 2;
|
||||
}
|
||||
while (round > 0);
|
||||
sols->valid[sol_i] = 1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
__kernel
|
||||
void kernel_sols(__global char *ht0, __global char *ht1, __global sols_t *sols)
|
||||
{
|
||||
uint tid = get_global_id(0);
|
||||
__global char *htabs[2] = { ht0, ht1 };
|
||||
uint ht_i = (9 - 1) % 2;
|
||||
uint cnt;
|
||||
uint xi_offset = (8 + ((9 - 1) / 2) * 4);
|
||||
uint i, j;
|
||||
__global char *a, *b;
|
||||
uint ref_i, ref_j;
|
||||
|
||||
|
||||
ulong collisions[5];
|
||||
uint coll;
|
||||
|
||||
|
||||
|
||||
uint mask = 0xffffff;
|
||||
|
||||
|
||||
|
||||
a = htabs[ht_i] + tid * ((1 << (((200 / (9 + 1)) + 1) - 20)) * 9) * 32;
|
||||
cnt = *(__global uint *)a;
|
||||
cnt = min(cnt, (uint)((1 << (((200 / (9 + 1)) + 1) - 20)) * 9));
|
||||
coll = 0;
|
||||
a += xi_offset;
|
||||
for (i = 0; i < cnt; i++, a += 32)
|
||||
for (j = i + 1, b = a + 32; j < cnt; j++, b += 32)
|
||||
if (((*(__global uint *)a) & mask) ==
|
||||
((*(__global uint *)b) & mask))
|
||||
{
|
||||
ref_i = *(__global uint *)(a - 4);
|
||||
ref_j = *(__global uint *)(b - 4);
|
||||
if (coll < sizeof (collisions) / sizeof (*collisions))
|
||||
collisions[coll++] = ((ulong)ref_i << 32) | ref_j;
|
||||
else
|
||||
atomic_inc(&sols->likely_invalids);
|
||||
}
|
||||
if (!coll)
|
||||
return ;
|
||||
for (i = 0; i < coll; i++)
|
||||
potential_sol(htabs, sols, collisions[i] >> 32,
|
||||
collisions[i] & 0xffffffff);
|
||||
}
|
Binary file not shown.
Binary file not shown.
|
@ -1,526 +0,0 @@
|
|||
# 1 "input.cl"
|
||||
# 1 "<built-in>"
|
||||
# 1 "<command-line>"
|
||||
# 1 "/usr/include/stdc-predef.h" 1 3 4
|
||||
# 1 "<command-line>" 2
|
||||
# 1 "input.cl"
|
||||
# 1 "param.h" 1
|
||||
# 60 "param.h"
|
||||
typedef struct sols_s
|
||||
{
|
||||
uint nr;
|
||||
uint likely_invalidss;
|
||||
uchar valid[2000];
|
||||
uint values[2000][(1 << 9)];
|
||||
} sols_t;
|
||||
# 2 "input.cl" 2
|
||||
# 35 "input.cl"
|
||||
__constant ulong blake_iv[] =
|
||||
{
|
||||
0x6a09e667f3bcc908, 0xbb67ae8584caa73b,
|
||||
0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1,
|
||||
0x510e527fade682d1, 0x9b05688c2b3e6c1f,
|
||||
0x1f83d9abfb41bd6b, 0x5be0cd19137e2179,
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
__kernel
|
||||
void kernel_init_ht(__global char *ht)
|
||||
{
|
||||
uint tid = get_global_id(0);
|
||||
*(__global uint *)(ht + tid * ((1 << (((200 / (9 + 1)) + 1) - 16)) * 3) * 32) = 0;
|
||||
}
|
||||
# 79 "input.cl"
|
||||
uint ht_store(uint round, __global char *ht, uint i,
|
||||
ulong xi0, ulong xi1, ulong xi2, ulong xi3)
|
||||
{
|
||||
uint row;
|
||||
__global char *p;
|
||||
uint cnt;
|
||||
|
||||
if (!(round % 2))
|
||||
row = (xi0 & 0xffff);
|
||||
else
|
||||
|
||||
|
||||
|
||||
|
||||
row = ((xi0 & 0xf00) << 4) | ((xi0 & 0xf00000) >> 12) |
|
||||
((xi0 & 0xf) << 4) | ((xi0 & 0xf000) >> 12);
|
||||
# 119 "input.cl"
|
||||
xi0 = (xi0 >> 16) | (xi1 << (64 - 16));
|
||||
xi1 = (xi1 >> 16) | (xi2 << (64 - 16));
|
||||
xi2 = (xi2 >> 16) | (xi3 << (64 - 16));
|
||||
p = ht + row * ((1 << (((200 / (9 + 1)) + 1) - 16)) * 3) * 32;
|
||||
cnt = atomic_inc((__global uint *)p);
|
||||
if (cnt >= ((1 << (((200 / (9 + 1)) + 1) - 16)) * 3))
|
||||
return 1;
|
||||
p += cnt * 32 + (8 + ((round) / 2) * 4);
|
||||
|
||||
*(__global uint *)(p - 4) = i;
|
||||
if (round == 0 || round == 1)
|
||||
{
|
||||
|
||||
*(__global ulong *)(p + 0) = xi0;
|
||||
*(__global ulong *)(p + 8) = xi1;
|
||||
*(__global ulong *)(p + 16) = xi2;
|
||||
}
|
||||
else if (round == 2)
|
||||
{
|
||||
|
||||
*(__global ulong *)(p + 0) = xi0;
|
||||
*(__global ulong *)(p + 8) = xi1;
|
||||
*(__global uint *)(p + 16) = xi2;
|
||||
}
|
||||
else if (round == 3 || round == 4)
|
||||
{
|
||||
|
||||
*(__global ulong *)(p + 0) = xi0;
|
||||
*(__global ulong *)(p + 8) = xi1;
|
||||
|
||||
}
|
||||
else if (round == 5)
|
||||
{
|
||||
|
||||
*(__global ulong *)(p + 0) = xi0;
|
||||
*(__global uint *)(p + 8) = xi1;
|
||||
}
|
||||
else if (round == 6 || round == 7)
|
||||
{
|
||||
|
||||
*(__global ulong *)(p + 0) = xi0;
|
||||
}
|
||||
else if (round == 8)
|
||||
{
|
||||
|
||||
*(__global uint *)(p + 0) = xi0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
# 187 "input.cl"
|
||||
__kernel __attribute__((reqd_work_group_size(64, 1, 1)))
|
||||
void kernel_round0(__global ulong *blake_state, __global char *ht,
|
||||
__global uint *debug)
|
||||
{
|
||||
uint tid = get_global_id(0);
|
||||
ulong v[16];
|
||||
uint inputs_per_thread = (1 << (200 / (9 + 1))) / get_global_size(0);
|
||||
uint input = tid * inputs_per_thread;
|
||||
uint input_end = (tid + 1) * inputs_per_thread;
|
||||
uint dropped = 0;
|
||||
while (input < input_end)
|
||||
{
|
||||
|
||||
|
||||
ulong word1 = (ulong)input << 32;
|
||||
|
||||
v[0] = blake_state[0];
|
||||
v[1] = blake_state[1];
|
||||
v[2] = blake_state[2];
|
||||
v[3] = blake_state[3];
|
||||
v[4] = blake_state[4];
|
||||
v[5] = blake_state[5];
|
||||
v[6] = blake_state[6];
|
||||
v[7] = blake_state[7];
|
||||
v[8] = blake_iv[0];
|
||||
v[9] = blake_iv[1];
|
||||
v[10] = blake_iv[2];
|
||||
v[11] = blake_iv[3];
|
||||
v[12] = blake_iv[4];
|
||||
v[13] = blake_iv[5];
|
||||
v[14] = blake_iv[6];
|
||||
v[15] = blake_iv[7];
|
||||
|
||||
v[12] ^= 140 + 4 ;
|
||||
|
||||
v[14] ^= -1;
|
||||
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + word1); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + word1); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + word1); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + word1); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + word1); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + word1); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + word1); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + word1); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + word1); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + word1); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + word1); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + word1); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
|
||||
|
||||
ulong h[7];
|
||||
h[0] = blake_state[0] ^ v[0] ^ v[8];
|
||||
h[1] = blake_state[1] ^ v[1] ^ v[9];
|
||||
h[2] = blake_state[2] ^ v[2] ^ v[10];
|
||||
h[3] = blake_state[3] ^ v[3] ^ v[11];
|
||||
h[4] = blake_state[4] ^ v[4] ^ v[12];
|
||||
h[5] = blake_state[5] ^ v[5] ^ v[13];
|
||||
h[6] = (blake_state[6] ^ v[6] ^ v[14]) & 0xffff;
|
||||
|
||||
|
||||
|
||||
dropped += ht_store(0, ht, input * 2,
|
||||
h[0],
|
||||
h[1],
|
||||
h[2],
|
||||
h[3]);
|
||||
dropped += ht_store(0, ht, input * 2 + 1,
|
||||
(h[3] >> 8) | (h[4] << (64 - 8)),
|
||||
(h[4] >> 8) | (h[5] << (64 - 8)),
|
||||
(h[5] >> 8) | (h[6] << (64 - 8)),
|
||||
(h[6] >> 8));
|
||||
|
||||
|
||||
|
||||
|
||||
input++;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
# 409 "input.cl"
|
||||
uint xor_and_store(uint round, __global char *ht_dst, uint row,
|
||||
uint slot_a, uint slot_b, __global ulong *a, __global ulong *b)
|
||||
{
|
||||
ulong xi0, xi1, xi2;
|
||||
|
||||
|
||||
|
||||
if (round == 1 || round == 2)
|
||||
{
|
||||
|
||||
|
||||
xi0 = *(a++) ^ *(b++);
|
||||
xi1 = *(a++) ^ *(b++);
|
||||
xi2 = *a ^ *b;
|
||||
}
|
||||
else if (round == 3)
|
||||
{
|
||||
|
||||
xi0 = *a++ ^ *b++;
|
||||
xi1 = *a++ ^ *b++;
|
||||
xi2 = *(__global uint *)a ^ *(__global uint *)b;
|
||||
}
|
||||
else if (round == 4 || round == 5)
|
||||
{
|
||||
|
||||
xi0 = *a++ ^ *b++;
|
||||
xi1 = *a ^ *b;
|
||||
xi2 = 0;
|
||||
}
|
||||
else if (round == 6)
|
||||
{
|
||||
|
||||
xi0 = *a++ ^ *b++;
|
||||
xi1 = *(__global uint *)a ^ *(__global uint *)b;
|
||||
xi2 = 0;
|
||||
}
|
||||
else if (round == 7 || round == 8)
|
||||
{
|
||||
|
||||
xi0 = *a ^ *b;
|
||||
xi1 = 0;
|
||||
xi2 = 0;
|
||||
}
|
||||
|
||||
|
||||
if (!xi0 && !xi1)
|
||||
return 0;
|
||||
|
||||
|
||||
|
||||
return ht_store(round, ht_dst, ((row << 16) | ((slot_b & 0xff) << 8) | (slot_a & 0xff)),
|
||||
xi0, xi1, xi2, 0);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void equihash_round(uint round, __global char *ht_src, __global char *ht_dst,
|
||||
__global uint *debug)
|
||||
{
|
||||
uint tid = get_global_id(0);
|
||||
uint tlid = get_local_id(0);
|
||||
__global char *p;
|
||||
uint cnt;
|
||||
uchar first_words[((1 << (((200 / (9 + 1)) + 1) - 16)) * 3)];
|
||||
uchar mask;
|
||||
uint i, j;
|
||||
|
||||
|
||||
ushort collisions[((1 << (((200 / (9 + 1)) + 1) - 16)) * 3) * 2];
|
||||
uint nr_coll = 0;
|
||||
uint n;
|
||||
uint dropped_coll, dropped_stor;
|
||||
__global ulong *a, *b;
|
||||
uint xi_offset;
|
||||
|
||||
xi_offset = (8 + ((round - 1) / 2) * 4);
|
||||
|
||||
|
||||
mask = ((!(round % 2)) ? 0x0f : 0xf0);
|
||||
# 499 "input.cl"
|
||||
p = (ht_src + tid * ((1 << (((200 / (9 + 1)) + 1) - 16)) * 3) * 32);
|
||||
cnt = *(__global uint *)p;
|
||||
cnt = min(cnt, (uint)((1 << (((200 / (9 + 1)) + 1) - 16)) * 3));
|
||||
p += xi_offset;
|
||||
for (i = 0; i < cnt; i++, p += 32)
|
||||
first_words[i] = *(__global uchar *)p;
|
||||
|
||||
nr_coll = 0;
|
||||
dropped_coll = 0;
|
||||
for (i = 0; i < cnt; i++)
|
||||
for (j = i + 1; j < cnt; j++)
|
||||
if ((first_words[i] & mask) ==
|
||||
(first_words[j] & mask))
|
||||
{
|
||||
|
||||
if (nr_coll >= sizeof (collisions) / sizeof (*collisions))
|
||||
dropped_coll++;
|
||||
else
|
||||
|
||||
|
||||
collisions[nr_coll++] =
|
||||
((ushort)j << 8) | ((ushort)i & 0xff);
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
uint adj = (!(round % 2)) ? 1 : 0;
|
||||
|
||||
dropped_stor = 0;
|
||||
for (n = 0; n < nr_coll; n++)
|
||||
{
|
||||
i = collisions[n] & 0xff;
|
||||
j = collisions[n] >> 8;
|
||||
a = (__global ulong *)
|
||||
(ht_src + tid * ((1 << (((200 / (9 + 1)) + 1) - 16)) * 3) * 32 + i * 32 + xi_offset
|
||||
+ adj);
|
||||
b = (__global ulong *)
|
||||
(ht_src + tid * ((1 << (((200 / (9 + 1)) + 1) - 16)) * 3) * 32 + j * 32 + xi_offset
|
||||
+ adj);
|
||||
dropped_stor += xor_and_store(round, ht_dst, tid, i, j, a, b);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
# 557 "input.cl"
|
||||
__kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round1(__global char *ht_src, __global char *ht_dst, __global uint *debug) { equihash_round(1, ht_src, ht_dst, debug); }
|
||||
__kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round2(__global char *ht_src, __global char *ht_dst, __global uint *debug) { equihash_round(2, ht_src, ht_dst, debug); }
|
||||
__kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round3(__global char *ht_src, __global char *ht_dst, __global uint *debug) { equihash_round(3, ht_src, ht_dst, debug); }
|
||||
__kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round4(__global char *ht_src, __global char *ht_dst, __global uint *debug) { equihash_round(4, ht_src, ht_dst, debug); }
|
||||
__kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round5(__global char *ht_src, __global char *ht_dst, __global uint *debug) { equihash_round(5, ht_src, ht_dst, debug); }
|
||||
__kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round6(__global char *ht_src, __global char *ht_dst, __global uint *debug) { equihash_round(6, ht_src, ht_dst, debug); }
|
||||
__kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round7(__global char *ht_src, __global char *ht_dst, __global uint *debug) { equihash_round(7, ht_src, ht_dst, debug); }
|
||||
__kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round8(__global char *ht_src, __global char *ht_dst, __global uint *debug) { equihash_round(8, ht_src, ht_dst, debug); }
|
||||
|
||||
uint expand_ref(__global char *ht, uint xi_offset, uint row, uint slot)
|
||||
{
|
||||
return *(__global uint *)(ht + row * ((1 << (((200 / (9 + 1)) + 1) - 16)) * 3) * 32 +
|
||||
slot * 32 + xi_offset - 4);
|
||||
}
|
||||
|
||||
void expand_refs(__global uint *ins, uint nr_inputs, __global char **htabs,
|
||||
uint round)
|
||||
{
|
||||
__global char *ht = htabs[round % 2];
|
||||
uint i = nr_inputs - 1;
|
||||
uint j = nr_inputs * 2 - 1;
|
||||
uint xi_offset = (8 + ((round) / 2) * 4);
|
||||
do
|
||||
{
|
||||
ins[j] = expand_ref(ht, xi_offset,
|
||||
(ins[i] >> 16), ((ins[i] >> 8) & 0xff));
|
||||
ins[j - 1] = expand_ref(ht, xi_offset,
|
||||
(ins[i] >> 16), (ins[i] & 0xff));
|
||||
if (!i)
|
||||
break ;
|
||||
i--;
|
||||
j -= 2;
|
||||
}
|
||||
while (1);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void potential_sol(__global char **htabs, __global sols_t *sols,
|
||||
uint ref0, uint ref1)
|
||||
{
|
||||
uint sol_i;
|
||||
uint nr_values;
|
||||
sol_i = atomic_inc(&sols->nr);
|
||||
if (sol_i >= 2000)
|
||||
return ;
|
||||
sols->valid[sol_i] = 0;
|
||||
nr_values = 0;
|
||||
sols->values[sol_i][nr_values++] = ref0;
|
||||
sols->values[sol_i][nr_values++] = ref1;
|
||||
uint round = 9 - 1;
|
||||
do
|
||||
{
|
||||
round--;
|
||||
expand_refs(&(sols->values[sol_i][0]), nr_values, htabs, round);
|
||||
nr_values *= 2;
|
||||
}
|
||||
while (round > 0);
|
||||
sols->valid[sol_i] = 1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
__kernel
|
||||
void kernel_sols(__global char *ht0, __global char *ht1, __global sols_t *sols)
|
||||
{
|
||||
uint tid = get_global_id(0);
|
||||
__global char *htabs[2] = { ht0, ht1 };
|
||||
uint ht_i = (9 - 1) % 2;
|
||||
uint cnt;
|
||||
uint xi_offset = (8 + ((9 - 1) / 2) * 4);
|
||||
uint i, j;
|
||||
__global char *a, *b;
|
||||
uint ref_i, ref_j;
|
||||
|
||||
|
||||
ulong collisions[5];
|
||||
uint coll;
|
||||
|
||||
|
||||
|
||||
uint mask = 0xffffff;
|
||||
|
||||
|
||||
|
||||
if (tid == 0)
|
||||
sols->nr = sols->likely_invalidss = 0;
|
||||
mem_fence(CLK_GLOBAL_MEM_FENCE);
|
||||
a = htabs[ht_i] + tid * ((1 << (((200 / (9 + 1)) + 1) - 16)) * 3) * 32;
|
||||
cnt = *(__global uint *)a;
|
||||
cnt = min(cnt, (uint)((1 << (((200 / (9 + 1)) + 1) - 16)) * 3));
|
||||
coll = 0;
|
||||
a += xi_offset;
|
||||
for (i = 0; i < cnt; i++, a += 32)
|
||||
for (j = i + 1, b = a + 32; j < cnt; j++, b += 32)
|
||||
if (((*(__global uint *)a) & mask) ==
|
||||
((*(__global uint *)b) & mask))
|
||||
{
|
||||
ref_i = *(__global uint *)(a - 4);
|
||||
ref_j = *(__global uint *)(b - 4);
|
||||
if (coll < sizeof (collisions) / sizeof (*collisions))
|
||||
collisions[coll++] = ((ulong)ref_i << 32) | ref_j;
|
||||
else
|
||||
atomic_inc(&sols->likely_invalidss);
|
||||
}
|
||||
if (!coll)
|
||||
return ;
|
||||
for (i = 0; i < coll; i++)
|
||||
potential_sol(htabs, sols, collisions[i] >> 32,
|
||||
collisions[i] & 0xffffffff);
|
||||
}
|
|
@ -1,531 +0,0 @@
|
|||
# 1 "input.cl"
|
||||
# 1 "<built-in>"
|
||||
# 1 "<command-line>"
|
||||
# 1 "/usr/include/stdc-predef.h" 1 3 4
|
||||
# 1 "<command-line>" 2
|
||||
# 1 "input.cl"
|
||||
# 1 "param.h" 1
|
||||
# 60 "param.h"
|
||||
typedef struct sols_s
|
||||
{
|
||||
uint nr;
|
||||
uint likely_invalidss;
|
||||
uchar valid[2000];
|
||||
uint values[2000][(1 << 9)];
|
||||
} sols_t;
|
||||
# 2 "input.cl" 2
|
||||
# 35 "input.cl"
|
||||
__constant ulong blake_iv[] =
|
||||
{
|
||||
0x6a09e667f3bcc908, 0xbb67ae8584caa73b,
|
||||
0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1,
|
||||
0x510e527fade682d1, 0x9b05688c2b3e6c1f,
|
||||
0x1f83d9abfb41bd6b, 0x5be0cd19137e2179,
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
__kernel
|
||||
void kernel_init_ht(__global char *ht)
|
||||
{
|
||||
uint tid = get_global_id(0);
|
||||
*(__global uint *)(ht + tid * ((1 << (((200 / (9 + 1)) + 1) - 19)) * 9) * 32) = 0;
|
||||
}
|
||||
# 79 "input.cl"
|
||||
uint ht_store(uint round, __global char *ht, uint i,
|
||||
ulong xi0, ulong xi1, ulong xi2, ulong xi3)
|
||||
{
|
||||
uint row;
|
||||
__global char *p;
|
||||
uint cnt;
|
||||
# 103 "input.cl"
|
||||
if (!(round % 2))
|
||||
row = (xi0 & 0xffff) | ((xi0 & 0xe00000) >> 5);
|
||||
else
|
||||
row = ((xi0 & 0xe0000) >> 1) |
|
||||
((xi0 & 0xf00) << 4) | ((xi0 & 0xf00000) >> 12) |
|
||||
((xi0 & 0xf) << 4) | ((xi0 & 0xf000) >> 12);
|
||||
# 119 "input.cl"
|
||||
xi0 = (xi0 >> 16) | (xi1 << (64 - 16));
|
||||
xi1 = (xi1 >> 16) | (xi2 << (64 - 16));
|
||||
xi2 = (xi2 >> 16) | (xi3 << (64 - 16));
|
||||
p = ht + row * ((1 << (((200 / (9 + 1)) + 1) - 19)) * 9) * 32;
|
||||
cnt = atomic_inc((__global uint *)p);
|
||||
if (cnt >= ((1 << (((200 / (9 + 1)) + 1) - 19)) * 9))
|
||||
return 1;
|
||||
p += cnt * 32 + (8 + ((round) / 2) * 4);
|
||||
|
||||
*(__global uint *)(p - 4) = i;
|
||||
if (round == 0 || round == 1)
|
||||
{
|
||||
|
||||
*(__global ulong *)(p + 0) = xi0;
|
||||
*(__global ulong *)(p + 8) = xi1;
|
||||
*(__global ulong *)(p + 16) = xi2;
|
||||
}
|
||||
else if (round == 2)
|
||||
{
|
||||
|
||||
*(__global ulong *)(p + 0) = xi0;
|
||||
*(__global ulong *)(p + 8) = xi1;
|
||||
*(__global uint *)(p + 16) = xi2;
|
||||
}
|
||||
else if (round == 3 || round == 4)
|
||||
{
|
||||
|
||||
*(__global ulong *)(p + 0) = xi0;
|
||||
*(__global ulong *)(p + 8) = xi1;
|
||||
|
||||
}
|
||||
else if (round == 5)
|
||||
{
|
||||
|
||||
*(__global ulong *)(p + 0) = xi0;
|
||||
*(__global uint *)(p + 8) = xi1;
|
||||
}
|
||||
else if (round == 6 || round == 7)
|
||||
{
|
||||
|
||||
*(__global ulong *)(p + 0) = xi0;
|
||||
}
|
||||
else if (round == 8)
|
||||
{
|
||||
|
||||
*(__global uint *)(p + 0) = xi0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
# 187 "input.cl"
|
||||
__kernel __attribute__((reqd_work_group_size(64, 1, 1)))
|
||||
void kernel_round0(__global ulong *blake_state, __global char *ht,
|
||||
__global uint *debug)
|
||||
{
|
||||
uint tid = get_global_id(0);
|
||||
ulong v[16];
|
||||
uint inputs_per_thread = (1 << (200 / (9 + 1))) / get_global_size(0);
|
||||
uint input = tid * inputs_per_thread;
|
||||
uint input_end = (tid + 1) * inputs_per_thread;
|
||||
uint dropped = 0;
|
||||
while (input < input_end)
|
||||
{
|
||||
|
||||
|
||||
ulong word1 = (ulong)input << 32;
|
||||
|
||||
v[0] = blake_state[0];
|
||||
v[1] = blake_state[1];
|
||||
v[2] = blake_state[2];
|
||||
v[3] = blake_state[3];
|
||||
v[4] = blake_state[4];
|
||||
v[5] = blake_state[5];
|
||||
v[6] = blake_state[6];
|
||||
v[7] = blake_state[7];
|
||||
v[8] = blake_iv[0];
|
||||
v[9] = blake_iv[1];
|
||||
v[10] = blake_iv[2];
|
||||
v[11] = blake_iv[3];
|
||||
v[12] = blake_iv[4];
|
||||
v[13] = blake_iv[5];
|
||||
v[14] = blake_iv[6];
|
||||
v[15] = blake_iv[7];
|
||||
|
||||
v[12] ^= 140 + 4 ;
|
||||
|
||||
v[14] ^= -1;
|
||||
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + word1); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + word1); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + word1); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + word1); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + word1); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + word1); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + word1); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + word1); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + word1); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + word1); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + word1); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + word1); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
|
||||
|
||||
ulong h[7];
|
||||
h[0] = blake_state[0] ^ v[0] ^ v[8];
|
||||
h[1] = blake_state[1] ^ v[1] ^ v[9];
|
||||
h[2] = blake_state[2] ^ v[2] ^ v[10];
|
||||
h[3] = blake_state[3] ^ v[3] ^ v[11];
|
||||
h[4] = blake_state[4] ^ v[4] ^ v[12];
|
||||
h[5] = blake_state[5] ^ v[5] ^ v[13];
|
||||
h[6] = (blake_state[6] ^ v[6] ^ v[14]) & 0xffff;
|
||||
|
||||
|
||||
|
||||
dropped += ht_store(0, ht, input * 2,
|
||||
h[0],
|
||||
h[1],
|
||||
h[2],
|
||||
h[3]);
|
||||
dropped += ht_store(0, ht, input * 2 + 1,
|
||||
(h[3] >> 8) | (h[4] << (64 - 8)),
|
||||
(h[4] >> 8) | (h[5] << (64 - 8)),
|
||||
(h[5] >> 8) | (h[6] << (64 - 8)),
|
||||
(h[6] >> 8));
|
||||
|
||||
|
||||
|
||||
|
||||
input++;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
# 409 "input.cl"
|
||||
uint xor_and_store(uint round, __global char *ht_dst, uint row,
|
||||
uint slot_a, uint slot_b, __global ulong *a, __global ulong *b)
|
||||
{
|
||||
ulong xi0, xi1, xi2;
|
||||
|
||||
|
||||
|
||||
if (round == 1 || round == 2)
|
||||
{
|
||||
|
||||
|
||||
xi0 = *(a++) ^ *(b++);
|
||||
xi1 = *(a++) ^ *(b++);
|
||||
xi2 = *a ^ *b;
|
||||
}
|
||||
else if (round == 3)
|
||||
{
|
||||
|
||||
xi0 = *a++ ^ *b++;
|
||||
xi1 = *a++ ^ *b++;
|
||||
xi2 = *(__global uint *)a ^ *(__global uint *)b;
|
||||
}
|
||||
else if (round == 4 || round == 5)
|
||||
{
|
||||
|
||||
xi0 = *a++ ^ *b++;
|
||||
xi1 = *a ^ *b;
|
||||
xi2 = 0;
|
||||
}
|
||||
else if (round == 6)
|
||||
{
|
||||
|
||||
xi0 = *a++ ^ *b++;
|
||||
xi1 = *(__global uint *)a ^ *(__global uint *)b;
|
||||
xi2 = 0;
|
||||
}
|
||||
else if (round == 7 || round == 8)
|
||||
{
|
||||
|
||||
xi0 = *a ^ *b;
|
||||
xi1 = 0;
|
||||
xi2 = 0;
|
||||
}
|
||||
|
||||
|
||||
if (!xi0 && !xi1)
|
||||
return 0;
|
||||
|
||||
|
||||
|
||||
return ht_store(round, ht_dst, ((row << 13) | ((slot_b & 0x3f) << 6) | (slot_a & 0x3f)),
|
||||
xi0, xi1, xi2, 0);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void equihash_round(uint round, __global char *ht_src, __global char *ht_dst,
|
||||
__global uint *debug)
|
||||
{
|
||||
uint tid = get_global_id(0);
|
||||
uint tlid = get_local_id(0);
|
||||
__global char *p;
|
||||
uint cnt;
|
||||
uchar first_words[((1 << (((200 / (9 + 1)) + 1) - 19)) * 9)];
|
||||
uchar mask;
|
||||
uint i, j;
|
||||
|
||||
|
||||
ushort collisions[((1 << (((200 / (9 + 1)) + 1) - 19)) * 9) * 2];
|
||||
uint nr_coll = 0;
|
||||
uint n;
|
||||
uint dropped_coll, dropped_stor;
|
||||
__global ulong *a, *b;
|
||||
uint xi_offset;
|
||||
|
||||
xi_offset = (8 + ((round - 1) / 2) * 4);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
mask = ((!(round % 2)) ? 0x01 : 0x10);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
p = (ht_src + tid * ((1 << (((200 / (9 + 1)) + 1) - 19)) * 9) * 32);
|
||||
cnt = *(__global uint *)p;
|
||||
cnt = min(cnt, (uint)((1 << (((200 / (9 + 1)) + 1) - 19)) * 9));
|
||||
p += xi_offset;
|
||||
for (i = 0; i < cnt; i++, p += 32)
|
||||
first_words[i] = *(__global uchar *)p;
|
||||
|
||||
nr_coll = 0;
|
||||
dropped_coll = 0;
|
||||
for (i = 0; i < cnt; i++)
|
||||
for (j = i + 1; j < cnt; j++)
|
||||
if ((first_words[i] & mask) ==
|
||||
(first_words[j] & mask))
|
||||
{
|
||||
|
||||
if (nr_coll >= sizeof (collisions) / sizeof (*collisions))
|
||||
dropped_coll++;
|
||||
else
|
||||
|
||||
|
||||
collisions[nr_coll++] =
|
||||
((ushort)j << 8) | ((ushort)i & 0xff);
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
uint adj = (!(round % 2)) ? 1 : 0;
|
||||
|
||||
dropped_stor = 0;
|
||||
for (n = 0; n < nr_coll; n++)
|
||||
{
|
||||
i = collisions[n] & 0xff;
|
||||
j = collisions[n] >> 8;
|
||||
a = (__global ulong *)
|
||||
(ht_src + tid * ((1 << (((200 / (9 + 1)) + 1) - 19)) * 9) * 32 + i * 32 + xi_offset
|
||||
+ adj);
|
||||
b = (__global ulong *)
|
||||
(ht_src + tid * ((1 << (((200 / (9 + 1)) + 1) - 19)) * 9) * 32 + j * 32 + xi_offset
|
||||
+ adj);
|
||||
dropped_stor += xor_and_store(round, ht_dst, tid, i, j, a, b);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
# 557 "input.cl"
|
||||
__kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round1(__global char *ht_src, __global char *ht_dst, __global uint *debug) { equihash_round(1, ht_src, ht_dst, debug); }
|
||||
__kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round2(__global char *ht_src, __global char *ht_dst, __global uint *debug) { equihash_round(2, ht_src, ht_dst, debug); }
|
||||
__kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round3(__global char *ht_src, __global char *ht_dst, __global uint *debug) { equihash_round(3, ht_src, ht_dst, debug); }
|
||||
__kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round4(__global char *ht_src, __global char *ht_dst, __global uint *debug) { equihash_round(4, ht_src, ht_dst, debug); }
|
||||
__kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round5(__global char *ht_src, __global char *ht_dst, __global uint *debug) { equihash_round(5, ht_src, ht_dst, debug); }
|
||||
__kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round6(__global char *ht_src, __global char *ht_dst, __global uint *debug) { equihash_round(6, ht_src, ht_dst, debug); }
|
||||
__kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round7(__global char *ht_src, __global char *ht_dst, __global uint *debug) { equihash_round(7, ht_src, ht_dst, debug); }
|
||||
__kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round8(__global char *ht_src, __global char *ht_dst, __global uint *debug) { equihash_round(8, ht_src, ht_dst, debug); }
|
||||
|
||||
uint expand_ref(__global char *ht, uint xi_offset, uint row, uint slot)
|
||||
{
|
||||
return *(__global uint *)(ht + row * ((1 << (((200 / (9 + 1)) + 1) - 19)) * 9) * 32 +
|
||||
slot * 32 + xi_offset - 4);
|
||||
}
|
||||
|
||||
void expand_refs(__global uint *ins, uint nr_inputs, __global char **htabs,
|
||||
uint round)
|
||||
{
|
||||
__global char *ht = htabs[round % 2];
|
||||
uint i = nr_inputs - 1;
|
||||
uint j = nr_inputs * 2 - 1;
|
||||
uint xi_offset = (8 + ((round) / 2) * 4);
|
||||
do
|
||||
{
|
||||
ins[j] = expand_ref(ht, xi_offset,
|
||||
(ins[i] >> 13), ((ins[i] >> 6) & 0x3f));
|
||||
ins[j - 1] = expand_ref(ht, xi_offset,
|
||||
(ins[i] >> 13), (ins[i] & 0x3f));
|
||||
if (!i)
|
||||
break ;
|
||||
i--;
|
||||
j -= 2;
|
||||
}
|
||||
while (1);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void potential_sol(__global char **htabs, __global sols_t *sols,
|
||||
uint ref0, uint ref1)
|
||||
{
|
||||
uint sol_i;
|
||||
uint nr_values;
|
||||
sol_i = atomic_inc(&sols->nr);
|
||||
if (sol_i >= 2000)
|
||||
return ;
|
||||
sols->valid[sol_i] = 0;
|
||||
nr_values = 0;
|
||||
sols->values[sol_i][nr_values++] = ref0;
|
||||
sols->values[sol_i][nr_values++] = ref1;
|
||||
uint round = 9 - 1;
|
||||
do
|
||||
{
|
||||
round--;
|
||||
expand_refs(&(sols->values[sol_i][0]), nr_values, htabs, round);
|
||||
nr_values *= 2;
|
||||
}
|
||||
while (round > 0);
|
||||
sols->valid[sol_i] = 1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
__kernel
|
||||
void kernel_sols(__global char *ht0, __global char *ht1, __global sols_t *sols)
|
||||
{
|
||||
uint tid = get_global_id(0);
|
||||
__global char *htabs[2] = { ht0, ht1 };
|
||||
uint ht_i = (9 - 1) % 2;
|
||||
uint cnt;
|
||||
uint xi_offset = (8 + ((9 - 1) / 2) * 4);
|
||||
uint i, j;
|
||||
__global char *a, *b;
|
||||
uint ref_i, ref_j;
|
||||
|
||||
|
||||
ulong collisions[5];
|
||||
uint coll;
|
||||
|
||||
|
||||
|
||||
uint mask = 0xffffff;
|
||||
|
||||
|
||||
|
||||
if (tid == 0)
|
||||
sols->nr = sols->likely_invalidss = 0;
|
||||
mem_fence(CLK_GLOBAL_MEM_FENCE);
|
||||
a = htabs[ht_i] + tid * ((1 << (((200 / (9 + 1)) + 1) - 19)) * 9) * 32;
|
||||
cnt = *(__global uint *)a;
|
||||
cnt = min(cnt, (uint)((1 << (((200 / (9 + 1)) + 1) - 19)) * 9));
|
||||
coll = 0;
|
||||
a += xi_offset;
|
||||
for (i = 0; i < cnt; i++, a += 32)
|
||||
for (j = i + 1, b = a + 32; j < cnt; j++, b += 32)
|
||||
if (((*(__global uint *)a) & mask) ==
|
||||
((*(__global uint *)b) & mask))
|
||||
{
|
||||
ref_i = *(__global uint *)(a - 4);
|
||||
ref_j = *(__global uint *)(b - 4);
|
||||
if (coll < sizeof (collisions) / sizeof (*collisions))
|
||||
collisions[coll++] = ((ulong)ref_i << 32) | ref_j;
|
||||
else
|
||||
atomic_inc(&sols->likely_invalidss);
|
||||
}
|
||||
if (!coll)
|
||||
return ;
|
||||
for (i = 0; i < coll; i++)
|
||||
potential_sol(htabs, sols, collisions[i] >> 32,
|
||||
collisions[i] & 0xffffffff);
|
||||
}
|
|
@ -1,526 +0,0 @@
|
|||
# 1 "input.cl"
|
||||
# 1 "<built-in>"
|
||||
# 1 "<command-line>"
|
||||
# 1 "/usr/include/stdc-predef.h" 1 3 4
|
||||
# 1 "<command-line>" 2
|
||||
# 1 "input.cl"
|
||||
# 1 "param.h" 1
|
||||
# 60 "param.h"
|
||||
typedef struct sols_s
|
||||
{
|
||||
uint nr;
|
||||
uint likely_invalidss;
|
||||
uchar valid[2000];
|
||||
uint values[2000][(1 << 9)];
|
||||
} sols_t;
|
||||
# 2 "input.cl" 2
|
||||
# 35 "input.cl"
|
||||
__constant ulong blake_iv[] =
|
||||
{
|
||||
0x6a09e667f3bcc908, 0xbb67ae8584caa73b,
|
||||
0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1,
|
||||
0x510e527fade682d1, 0x9b05688c2b3e6c1f,
|
||||
0x1f83d9abfb41bd6b, 0x5be0cd19137e2179,
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
__kernel
|
||||
void kernel_init_ht(__global char *ht)
|
||||
{
|
||||
uint tid = get_global_id(0);
|
||||
*(__global uint *)(ht + tid * ((1 << (((200 / (9 + 1)) + 1) - 20)) * 13) * 32) = 0;
|
||||
}
|
||||
# 79 "input.cl"
|
||||
uint ht_store(uint round, __global char *ht, uint i,
|
||||
ulong xi0, ulong xi1, ulong xi2, ulong xi3)
|
||||
{
|
||||
uint row;
|
||||
__global char *p;
|
||||
uint cnt;
|
||||
# 110 "input.cl"
|
||||
if (!(round % 2))
|
||||
row = (xi0 & 0xffff) | ((xi0 & 0xf00000) >> 4);
|
||||
else
|
||||
row = ((xi0 & 0xf0000) >> 0) |
|
||||
((xi0 & 0xf00) << 4) | ((xi0 & 0xf00000) >> 12) |
|
||||
((xi0 & 0xf) << 4) | ((xi0 & 0xf000) >> 12);
|
||||
|
||||
|
||||
|
||||
xi0 = (xi0 >> 16) | (xi1 << (64 - 16));
|
||||
xi1 = (xi1 >> 16) | (xi2 << (64 - 16));
|
||||
xi2 = (xi2 >> 16) | (xi3 << (64 - 16));
|
||||
p = ht + row * ((1 << (((200 / (9 + 1)) + 1) - 20)) * 13) * 32;
|
||||
cnt = atomic_inc((__global uint *)p);
|
||||
if (cnt >= ((1 << (((200 / (9 + 1)) + 1) - 20)) * 13))
|
||||
return 1;
|
||||
p += cnt * 32 + (8 + ((round) / 2) * 4);
|
||||
|
||||
*(__global uint *)(p - 4) = i;
|
||||
if (round == 0 || round == 1)
|
||||
{
|
||||
|
||||
*(__global ulong *)(p + 0) = xi0;
|
||||
*(__global ulong *)(p + 8) = xi1;
|
||||
*(__global ulong *)(p + 16) = xi2;
|
||||
}
|
||||
else if (round == 2)
|
||||
{
|
||||
|
||||
*(__global ulong *)(p + 0) = xi0;
|
||||
*(__global ulong *)(p + 8) = xi1;
|
||||
*(__global uint *)(p + 16) = xi2;
|
||||
}
|
||||
else if (round == 3 || round == 4)
|
||||
{
|
||||
|
||||
*(__global ulong *)(p + 0) = xi0;
|
||||
*(__global ulong *)(p + 8) = xi1;
|
||||
|
||||
}
|
||||
else if (round == 5)
|
||||
{
|
||||
|
||||
*(__global ulong *)(p + 0) = xi0;
|
||||
*(__global uint *)(p + 8) = xi1;
|
||||
}
|
||||
else if (round == 6 || round == 7)
|
||||
{
|
||||
|
||||
*(__global ulong *)(p + 0) = xi0;
|
||||
}
|
||||
else if (round == 8)
|
||||
{
|
||||
|
||||
*(__global uint *)(p + 0) = xi0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
# 187 "input.cl"
|
||||
__kernel __attribute__((reqd_work_group_size(64, 1, 1)))
|
||||
void kernel_round0(__global ulong *blake_state, __global char *ht,
|
||||
__global uint *debug)
|
||||
{
|
||||
uint tid = get_global_id(0);
|
||||
ulong v[16];
|
||||
uint inputs_per_thread = (1 << (200 / (9 + 1))) / get_global_size(0);
|
||||
uint input = tid * inputs_per_thread;
|
||||
uint input_end = (tid + 1) * inputs_per_thread;
|
||||
uint dropped = 0;
|
||||
while (input < input_end)
|
||||
{
|
||||
|
||||
|
||||
ulong word1 = (ulong)input << 32;
|
||||
|
||||
v[0] = blake_state[0];
|
||||
v[1] = blake_state[1];
|
||||
v[2] = blake_state[2];
|
||||
v[3] = blake_state[3];
|
||||
v[4] = blake_state[4];
|
||||
v[5] = blake_state[5];
|
||||
v[6] = blake_state[6];
|
||||
v[7] = blake_state[7];
|
||||
v[8] = blake_iv[0];
|
||||
v[9] = blake_iv[1];
|
||||
v[10] = blake_iv[2];
|
||||
v[11] = blake_iv[3];
|
||||
v[12] = blake_iv[4];
|
||||
v[13] = blake_iv[5];
|
||||
v[14] = blake_iv[6];
|
||||
v[15] = blake_iv[7];
|
||||
|
||||
v[12] ^= 140 + 4 ;
|
||||
|
||||
v[14] ^= -1;
|
||||
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + word1); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + word1); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + word1); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + word1); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + word1); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + word1); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + word1); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + word1); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + word1); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + word1); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + word1); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + word1); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
|
||||
|
||||
ulong h[7];
|
||||
h[0] = blake_state[0] ^ v[0] ^ v[8];
|
||||
h[1] = blake_state[1] ^ v[1] ^ v[9];
|
||||
h[2] = blake_state[2] ^ v[2] ^ v[10];
|
||||
h[3] = blake_state[3] ^ v[3] ^ v[11];
|
||||
h[4] = blake_state[4] ^ v[4] ^ v[12];
|
||||
h[5] = blake_state[5] ^ v[5] ^ v[13];
|
||||
h[6] = (blake_state[6] ^ v[6] ^ v[14]) & 0xffff;
|
||||
|
||||
|
||||
|
||||
dropped += ht_store(0, ht, input * 2,
|
||||
h[0],
|
||||
h[1],
|
||||
h[2],
|
||||
h[3]);
|
||||
dropped += ht_store(0, ht, input * 2 + 1,
|
||||
(h[3] >> 8) | (h[4] << (64 - 8)),
|
||||
(h[4] >> 8) | (h[5] << (64 - 8)),
|
||||
(h[5] >> 8) | (h[6] << (64 - 8)),
|
||||
(h[6] >> 8));
|
||||
|
||||
|
||||
|
||||
|
||||
input++;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
# 409 "input.cl"
|
||||
uint xor_and_store(uint round, __global char *ht_dst, uint row,
|
||||
uint slot_a, uint slot_b, __global ulong *a, __global ulong *b)
|
||||
{
|
||||
ulong xi0, xi1, xi2;
|
||||
|
||||
|
||||
|
||||
if (round == 1 || round == 2)
|
||||
{
|
||||
|
||||
|
||||
xi0 = *(a++) ^ *(b++);
|
||||
xi1 = *(a++) ^ *(b++);
|
||||
xi2 = *a ^ *b;
|
||||
}
|
||||
else if (round == 3)
|
||||
{
|
||||
|
||||
xi0 = *a++ ^ *b++;
|
||||
xi1 = *a++ ^ *b++;
|
||||
xi2 = *(__global uint *)a ^ *(__global uint *)b;
|
||||
}
|
||||
else if (round == 4 || round == 5)
|
||||
{
|
||||
|
||||
xi0 = *a++ ^ *b++;
|
||||
xi1 = *a ^ *b;
|
||||
xi2 = 0;
|
||||
}
|
||||
else if (round == 6)
|
||||
{
|
||||
|
||||
xi0 = *a++ ^ *b++;
|
||||
xi1 = *(__global uint *)a ^ *(__global uint *)b;
|
||||
xi2 = 0;
|
||||
}
|
||||
else if (round == 7 || round == 8)
|
||||
{
|
||||
|
||||
xi0 = *a ^ *b;
|
||||
xi1 = 0;
|
||||
xi2 = 0;
|
||||
}
|
||||
|
||||
|
||||
if (!xi0 && !xi1)
|
||||
return 0;
|
||||
|
||||
|
||||
|
||||
return ht_store(round, ht_dst, ((row << 12) | ((slot_b & 0x3f) << 6) | (slot_a & 0x3f)),
|
||||
xi0, xi1, xi2, 0);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void equihash_round(uint round, __global char *ht_src, __global char *ht_dst,
|
||||
__global uint *debug)
|
||||
{
|
||||
uint tid = get_global_id(0);
|
||||
uint tlid = get_local_id(0);
|
||||
__global char *p;
|
||||
uint cnt;
|
||||
uchar first_words[((1 << (((200 / (9 + 1)) + 1) - 20)) * 13)];
|
||||
uchar mask;
|
||||
uint i, j;
|
||||
|
||||
|
||||
ushort collisions[((1 << (((200 / (9 + 1)) + 1) - 20)) * 13) * 2];
|
||||
uint nr_coll = 0;
|
||||
uint n;
|
||||
uint dropped_coll, dropped_stor;
|
||||
__global ulong *a, *b;
|
||||
uint xi_offset;
|
||||
|
||||
xi_offset = (8 + ((round - 1) / 2) * 4);
|
||||
# 495 "input.cl"
|
||||
mask = 0;
|
||||
|
||||
|
||||
|
||||
p = (ht_src + tid * ((1 << (((200 / (9 + 1)) + 1) - 20)) * 13) * 32);
|
||||
cnt = *(__global uint *)p;
|
||||
cnt = min(cnt, (uint)((1 << (((200 / (9 + 1)) + 1) - 20)) * 13));
|
||||
p += xi_offset;
|
||||
for (i = 0; i < cnt; i++, p += 32)
|
||||
first_words[i] = *(__global uchar *)p;
|
||||
|
||||
nr_coll = 0;
|
||||
dropped_coll = 0;
|
||||
for (i = 0; i < cnt; i++)
|
||||
for (j = i + 1; j < cnt; j++)
|
||||
if ((first_words[i] & mask) ==
|
||||
(first_words[j] & mask))
|
||||
{
|
||||
|
||||
if (nr_coll >= sizeof (collisions) / sizeof (*collisions))
|
||||
dropped_coll++;
|
||||
else
|
||||
|
||||
|
||||
collisions[nr_coll++] =
|
||||
((ushort)j << 8) | ((ushort)i & 0xff);
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
uint adj = (!(round % 2)) ? 1 : 0;
|
||||
|
||||
dropped_stor = 0;
|
||||
for (n = 0; n < nr_coll; n++)
|
||||
{
|
||||
i = collisions[n] & 0xff;
|
||||
j = collisions[n] >> 8;
|
||||
a = (__global ulong *)
|
||||
(ht_src + tid * ((1 << (((200 / (9 + 1)) + 1) - 20)) * 13) * 32 + i * 32 + xi_offset
|
||||
+ adj);
|
||||
b = (__global ulong *)
|
||||
(ht_src + tid * ((1 << (((200 / (9 + 1)) + 1) - 20)) * 13) * 32 + j * 32 + xi_offset
|
||||
+ adj);
|
||||
dropped_stor += xor_and_store(round, ht_dst, tid, i, j, a, b);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
# 557 "input.cl"
|
||||
__kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round1(__global char *ht_src, __global char *ht_dst, __global uint *debug) { equihash_round(1, ht_src, ht_dst, debug); }
|
||||
__kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round2(__global char *ht_src, __global char *ht_dst, __global uint *debug) { equihash_round(2, ht_src, ht_dst, debug); }
|
||||
__kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round3(__global char *ht_src, __global char *ht_dst, __global uint *debug) { equihash_round(3, ht_src, ht_dst, debug); }
|
||||
__kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round4(__global char *ht_src, __global char *ht_dst, __global uint *debug) { equihash_round(4, ht_src, ht_dst, debug); }
|
||||
__kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round5(__global char *ht_src, __global char *ht_dst, __global uint *debug) { equihash_round(5, ht_src, ht_dst, debug); }
|
||||
__kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round6(__global char *ht_src, __global char *ht_dst, __global uint *debug) { equihash_round(6, ht_src, ht_dst, debug); }
|
||||
__kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round7(__global char *ht_src, __global char *ht_dst, __global uint *debug) { equihash_round(7, ht_src, ht_dst, debug); }
|
||||
__kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round8(__global char *ht_src, __global char *ht_dst, __global uint *debug) { equihash_round(8, ht_src, ht_dst, debug); }
|
||||
|
||||
uint expand_ref(__global char *ht, uint xi_offset, uint row, uint slot)
|
||||
{
|
||||
return *(__global uint *)(ht + row * ((1 << (((200 / (9 + 1)) + 1) - 20)) * 13) * 32 +
|
||||
slot * 32 + xi_offset - 4);
|
||||
}
|
||||
|
||||
void expand_refs(__global uint *ins, uint nr_inputs, __global char **htabs,
|
||||
uint round)
|
||||
{
|
||||
__global char *ht = htabs[round % 2];
|
||||
uint i = nr_inputs - 1;
|
||||
uint j = nr_inputs * 2 - 1;
|
||||
uint xi_offset = (8 + ((round) / 2) * 4);
|
||||
do
|
||||
{
|
||||
ins[j] = expand_ref(ht, xi_offset,
|
||||
(ins[i] >> 12), ((ins[i] >> 6) & 0x3f));
|
||||
ins[j - 1] = expand_ref(ht, xi_offset,
|
||||
(ins[i] >> 12), (ins[i] & 0x3f));
|
||||
if (!i)
|
||||
break ;
|
||||
i--;
|
||||
j -= 2;
|
||||
}
|
||||
while (1);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void potential_sol(__global char **htabs, __global sols_t *sols,
|
||||
uint ref0, uint ref1)
|
||||
{
|
||||
uint sol_i;
|
||||
uint nr_values;
|
||||
sol_i = atomic_inc(&sols->nr);
|
||||
if (sol_i >= 2000)
|
||||
return ;
|
||||
sols->valid[sol_i] = 0;
|
||||
nr_values = 0;
|
||||
sols->values[sol_i][nr_values++] = ref0;
|
||||
sols->values[sol_i][nr_values++] = ref1;
|
||||
uint round = 9 - 1;
|
||||
do
|
||||
{
|
||||
round--;
|
||||
expand_refs(&(sols->values[sol_i][0]), nr_values, htabs, round);
|
||||
nr_values *= 2;
|
||||
}
|
||||
while (round > 0);
|
||||
sols->valid[sol_i] = 1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
__kernel
|
||||
void kernel_sols(__global char *ht0, __global char *ht1, __global sols_t *sols)
|
||||
{
|
||||
uint tid = get_global_id(0);
|
||||
__global char *htabs[2] = { ht0, ht1 };
|
||||
uint ht_i = (9 - 1) % 2;
|
||||
uint cnt;
|
||||
uint xi_offset = (8 + ((9 - 1) / 2) * 4);
|
||||
uint i, j;
|
||||
__global char *a, *b;
|
||||
uint ref_i, ref_j;
|
||||
|
||||
|
||||
ulong collisions[5];
|
||||
uint coll;
|
||||
|
||||
|
||||
|
||||
uint mask = 0xffffff;
|
||||
|
||||
|
||||
|
||||
if (tid == 0)
|
||||
sols->nr = sols->likely_invalidss = 0;
|
||||
mem_fence(CLK_GLOBAL_MEM_FENCE);
|
||||
a = htabs[ht_i] + tid * ((1 << (((200 / (9 + 1)) + 1) - 20)) * 13) * 32;
|
||||
cnt = *(__global uint *)a;
|
||||
cnt = min(cnt, (uint)((1 << (((200 / (9 + 1)) + 1) - 20)) * 13));
|
||||
coll = 0;
|
||||
a += xi_offset;
|
||||
for (i = 0; i < cnt; i++, a += 32)
|
||||
for (j = i + 1, b = a + 32; j < cnt; j++, b += 32)
|
||||
if (((*(__global uint *)a) & mask) ==
|
||||
((*(__global uint *)b) & mask))
|
||||
{
|
||||
ref_i = *(__global uint *)(a - 4);
|
||||
ref_j = *(__global uint *)(b - 4);
|
||||
if (coll < sizeof (collisions) / sizeof (*collisions))
|
||||
collisions[coll++] = ((ulong)ref_i << 32) | ref_j;
|
||||
else
|
||||
atomic_inc(&sols->likely_invalidss);
|
||||
}
|
||||
if (!coll)
|
||||
return ;
|
||||
for (i = 0; i < coll; i++)
|
||||
potential_sol(htabs, sols, collisions[i] >> 32,
|
||||
collisions[i] & 0xffffffff);
|
||||
}
|
|
@ -1,2 +0,0 @@
|
|||
#include "AvailableSolvers.h"
|
||||
|
|
@ -21,6 +21,7 @@ CREATE_SOLVER_STUB(cpu_xenoncat, "cpu_xenoncat_STUB")
|
|||
CREATE_SOLVER_STUB(cuda_tromp, "cuda_tromp_STUB")
|
||||
CREATE_SOLVER_STUB(cuda_djezo, "cuda_djezo_STUB")
|
||||
#endif
|
||||
// OpenCL solvers are fropped replace with new OS solvers
|
||||
#ifdef USE_OCL_XMP
|
||||
#include "../ocl_xpm/ocl_xmp.hpp"
|
||||
#else
|
||||
|
|
|
@ -5,12 +5,9 @@ VisualStudioVersion = 12.0.40629.0
|
|||
MinimumVisualStudioVersion = 10.0.40219.1
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nheqminer", "nheqminer.vcxproj", "{6FF7D209-05A3-4550-93CC-211D33503719}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{AB01E715-795A-4089-8DF0-AE6EBDC1AB48} = {AB01E715-795A-4089-8DF0-AE6EBDC1AB48}
|
||||
{299E011B-5242-4EDA-B2F2-73C9B48F12FD} = {299E011B-5242-4EDA-B2F2-73C9B48F12FD}
|
||||
{6C180164-4DBE-45D7-85E0-7BDFACF3FC7B} = {6C180164-4DBE-45D7-85E0-7BDFACF3FC7B}
|
||||
{33C2B469-F025-4223-B9B6-E69D42FEA7D6} = {33C2B469-F025-4223-B9B6-E69D42FEA7D6}
|
||||
{5DBCE38A-C8D2-4498-A92A-9AF8D5196135} = {5DBCE38A-C8D2-4498-A92A-9AF8D5196135}
|
||||
{5EC9EDEB-8E49-4126-9161-1560683CBC71} = {5EC9EDEB-8E49-4126-9161-1560683CBC71}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cuda_tromp", "..\cuda_tromp\cuda_tromp.vcxproj", "{33C2B469-F025-4223-B9B6-E69D42FEA7D6}"
|
||||
|
@ -19,12 +16,6 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cpu_xenoncat", "..\cpu_xeno
|
|||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cpu_tromp", "..\cpu_tromp\cpu_tromp.vcxproj", "{6C180164-4DBE-45D7-85E0-7BDFACF3FC7B}"
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ocl_xpm", "..\ocl_xpm\ocl_xpm.vcxproj", "{5EC9EDEB-8E49-4126-9161-1560683CBC71}"
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ocl_device_utils", "..\ocl_device_utils\ocl_device_utils.vcxproj", "{5DBCE38A-C8D2-4498-A92A-9AF8D5196135}"
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ocl_silentarmy", "..\ocl_silentarmy\ocl_silentarmy.vcxproj", "{AB01E715-795A-4089-8DF0-AE6EBDC1AB48}"
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cuda_djezo", "..\cuda_djezo\cuda_djezo.vcxproj", "{268B10AD-D845-498B-8663-AB8911CA2039}"
|
||||
EndProject
|
||||
Global
|
||||
|
@ -73,33 +64,6 @@ Global
|
|||
{6C180164-4DBE-45D7-85E0-7BDFACF3FC7B}.ReleaseSlow|Win32.ActiveCfg = ReleaseSSE2|x64
|
||||
{6C180164-4DBE-45D7-85E0-7BDFACF3FC7B}.ReleaseSlow|x64.ActiveCfg = ReleaseSSE2|x64
|
||||
{6C180164-4DBE-45D7-85E0-7BDFACF3FC7B}.ReleaseSlow|x64.Build.0 = ReleaseSSE2|x64
|
||||
{5EC9EDEB-8E49-4126-9161-1560683CBC71}.Debug|Win32.ActiveCfg = Debug|x64
|
||||
{5EC9EDEB-8E49-4126-9161-1560683CBC71}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{5EC9EDEB-8E49-4126-9161-1560683CBC71}.Debug|x64.Build.0 = Debug|x64
|
||||
{5EC9EDEB-8E49-4126-9161-1560683CBC71}.Release|Win32.ActiveCfg = Release|x64
|
||||
{5EC9EDEB-8E49-4126-9161-1560683CBC71}.Release|x64.ActiveCfg = Release|x64
|
||||
{5EC9EDEB-8E49-4126-9161-1560683CBC71}.Release|x64.Build.0 = Release|x64
|
||||
{5EC9EDEB-8E49-4126-9161-1560683CBC71}.ReleaseSlow|Win32.ActiveCfg = Release|x64
|
||||
{5EC9EDEB-8E49-4126-9161-1560683CBC71}.ReleaseSlow|x64.ActiveCfg = Release|x64
|
||||
{5EC9EDEB-8E49-4126-9161-1560683CBC71}.ReleaseSlow|x64.Build.0 = Release|x64
|
||||
{5DBCE38A-C8D2-4498-A92A-9AF8D5196135}.Debug|Win32.ActiveCfg = Debug|x64
|
||||
{5DBCE38A-C8D2-4498-A92A-9AF8D5196135}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{5DBCE38A-C8D2-4498-A92A-9AF8D5196135}.Debug|x64.Build.0 = Debug|x64
|
||||
{5DBCE38A-C8D2-4498-A92A-9AF8D5196135}.Release|Win32.ActiveCfg = Release|x64
|
||||
{5DBCE38A-C8D2-4498-A92A-9AF8D5196135}.Release|x64.ActiveCfg = Release|x64
|
||||
{5DBCE38A-C8D2-4498-A92A-9AF8D5196135}.Release|x64.Build.0 = Release|x64
|
||||
{5DBCE38A-C8D2-4498-A92A-9AF8D5196135}.ReleaseSlow|Win32.ActiveCfg = Release|x64
|
||||
{5DBCE38A-C8D2-4498-A92A-9AF8D5196135}.ReleaseSlow|x64.ActiveCfg = Release|x64
|
||||
{5DBCE38A-C8D2-4498-A92A-9AF8D5196135}.ReleaseSlow|x64.Build.0 = Release|x64
|
||||
{AB01E715-795A-4089-8DF0-AE6EBDC1AB48}.Debug|Win32.ActiveCfg = Debug|x64
|
||||
{AB01E715-795A-4089-8DF0-AE6EBDC1AB48}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{AB01E715-795A-4089-8DF0-AE6EBDC1AB48}.Debug|x64.Build.0 = Debug|x64
|
||||
{AB01E715-795A-4089-8DF0-AE6EBDC1AB48}.Release|Win32.ActiveCfg = Release|x64
|
||||
{AB01E715-795A-4089-8DF0-AE6EBDC1AB48}.Release|x64.ActiveCfg = Release|x64
|
||||
{AB01E715-795A-4089-8DF0-AE6EBDC1AB48}.Release|x64.Build.0 = Release|x64
|
||||
{AB01E715-795A-4089-8DF0-AE6EBDC1AB48}.ReleaseSlow|Win32.ActiveCfg = Release|x64
|
||||
{AB01E715-795A-4089-8DF0-AE6EBDC1AB48}.ReleaseSlow|x64.ActiveCfg = Release|x64
|
||||
{AB01E715-795A-4089-8DF0-AE6EBDC1AB48}.ReleaseSlow|x64.Build.0 = Release|x64
|
||||
{268B10AD-D845-498B-8663-AB8911CA2039}.Debug|Win32.ActiveCfg = Debug|x64
|
||||
{268B10AD-D845-498B-8663-AB8911CA2039}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{268B10AD-D845-498B-8663-AB8911CA2039}.Debug|x64.Build.0 = Debug|x64
|
||||
|
|
|
@ -84,7 +84,7 @@
|
|||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;USE_CPU_TROMP;USE_CPU_XENONCAT;USE_CUDA_TROMP;USE_OCL_XMP;USE_OCL_SILENTARMY;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;USE_CPU_TROMP;USE_CPU_XENONCAT;USE_CUDA_TROMP;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<EnableEnhancedInstructionSet>NotSet</EnableEnhancedInstructionSet>
|
||||
<AdditionalOptions>-D_WIN32_WINNT=0x0601 %(AdditionalOptions)</AdditionalOptions>
|
||||
<DisableSpecificWarnings>4068;4996;4503;4267;4180;4290;4244;4800;4334;4251</DisableSpecificWarnings>
|
||||
|
@ -153,7 +153,6 @@
|
|||
<ClCompile Include="amount.cpp" />
|
||||
<ClCompile Include="api.cpp" />
|
||||
<ClCompile Include="arith_uint256.cpp" />
|
||||
<ClCompile Include="AvailableSolvers.cpp" />
|
||||
<ClCompile Include="crypto\sha256.cpp" />
|
||||
<ClCompile Include="json\json_spirit_reader.cpp">
|
||||
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">/bigobj %(AdditionalOptions)</AdditionalOptions>
|
||||
|
|
|
@ -226,9 +226,6 @@
|
|||
<ClCompile Include="utilstrencodings.cpp">
|
||||
<Filter>Source Files\stuff</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="AvailableSolvers.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="MinerFactory.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
|
|
|
@ -1,15 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
||||
// This will list OpenCL devices, but AMD will only have aditional BusID
|
||||
struct OpenCLDevice {
|
||||
unsigned int DeviceID;
|
||||
std::string _CL_DEVICE_NAME;
|
||||
std::string _CL_DEVICE_TYPE;
|
||||
unsigned long long _CL_DEVICE_GLOBAL_MEM_SIZE;
|
||||
std::string _CL_DEVICE_VENDOR;
|
||||
std::string _CL_DEVICE_VERSION;
|
||||
std::string _CL_DRIVER_VERSION;
|
||||
};
|
||||
|
File diff suppressed because it is too large
Load Diff
|
@ -1,146 +0,0 @@
|
|||
#include "ocl_device_utils.h"
|
||||
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
#include <utility>
|
||||
#include <algorithm>
|
||||
|
||||
using namespace std;
|
||||
using namespace cl;
|
||||
|
||||
|
||||
bool ocl_device_utils::_hasQueried = false;
|
||||
std::vector<std::string> ocl_device_utils::_platformNames;
|
||||
std::vector<PrintInfo> ocl_device_utils::_devicesPlatformsDevices;
|
||||
|
||||
vector<Platform> ocl_device_utils::getPlatforms() {
|
||||
vector<Platform> platforms;
|
||||
try {
|
||||
Platform::get(&platforms);
|
||||
}
|
||||
catch (Error const& err) {
|
||||
#if defined(CL_PLATFORM_NOT_FOUND_KHR)
|
||||
if (err.err() == CL_PLATFORM_NOT_FOUND_KHR)
|
||||
cout << "No OpenCL platforms found" << endl;
|
||||
else
|
||||
#endif
|
||||
throw err;
|
||||
}
|
||||
return platforms;
|
||||
}
|
||||
|
||||
void ocl_device_utils::print_opencl_devices() {
|
||||
ocl_device_utils::QueryDevices();
|
||||
ocl_device_utils::PrintDevices();
|
||||
}
|
||||
|
||||
vector<Device> ocl_device_utils::getDevices(vector<Platform> const& _platforms, unsigned _platformId) {
|
||||
vector<Device> devices;
|
||||
try {
|
||||
_platforms[_platformId].getDevices(/*CL_DEVICE_TYPE_CPU| */CL_DEVICE_TYPE_GPU | CL_DEVICE_TYPE_ACCELERATOR, &devices);
|
||||
}
|
||||
catch (Error const& err) {
|
||||
// if simply no devices found return empty vector
|
||||
if (err.err() != CL_DEVICE_NOT_FOUND)
|
||||
throw err;
|
||||
}
|
||||
return devices;
|
||||
}
|
||||
|
||||
string ocl_device_utils::StringnNullTerminatorFix(const string& str) {
|
||||
return string(str.c_str(), strlen(str.c_str()));
|
||||
}
|
||||
|
||||
bool ocl_device_utils::QueryDevices() {
|
||||
if (!_hasQueried) {
|
||||
_hasQueried = true;
|
||||
try {
|
||||
// get platforms
|
||||
auto platforms = getPlatforms();
|
||||
if (platforms.empty()) {
|
||||
cout << "No OpenCL platforms found" << endl;
|
||||
return false;
|
||||
}
|
||||
else {
|
||||
for (auto i_pId = 0u; i_pId < platforms.size(); ++i_pId) {
|
||||
string platformName = StringnNullTerminatorFix(platforms[i_pId].getInfo<CL_PLATFORM_NAME>());
|
||||
if (std::find(_platformNames.begin(), _platformNames.end(), platformName) == _platformNames.end()) {
|
||||
PrintInfo current;
|
||||
_platformNames.push_back(platformName);
|
||||
// new
|
||||
current.PlatformName = platformName;
|
||||
current.PlatformNum = i_pId;
|
||||
|
||||
auto clDevs = getDevices(platforms, i_pId);
|
||||
for (auto i_devId = 0u; i_devId < clDevs.size(); ++i_devId) {
|
||||
OpenCLDevice curDevice;
|
||||
curDevice.DeviceID = i_devId;
|
||||
curDevice._CL_DEVICE_NAME = StringnNullTerminatorFix(clDevs[i_devId].getInfo<CL_DEVICE_NAME>());
|
||||
switch (clDevs[i_devId].getInfo<CL_DEVICE_TYPE>()) {
|
||||
case CL_DEVICE_TYPE_CPU:
|
||||
curDevice._CL_DEVICE_TYPE = "CPU";
|
||||
break;
|
||||
case CL_DEVICE_TYPE_GPU:
|
||||
curDevice._CL_DEVICE_TYPE = "GPU";
|
||||
break;
|
||||
case CL_DEVICE_TYPE_ACCELERATOR:
|
||||
curDevice._CL_DEVICE_TYPE = "ACCELERATOR";
|
||||
break;
|
||||
default:
|
||||
curDevice._CL_DEVICE_TYPE = "DEFAULT";
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
curDevice._CL_DEVICE_GLOBAL_MEM_SIZE = clDevs[i_devId].getInfo<CL_DEVICE_GLOBAL_MEM_SIZE>();
|
||||
curDevice._CL_DEVICE_VENDOR = StringnNullTerminatorFix(clDevs[i_devId].getInfo<CL_DEVICE_VENDOR>());
|
||||
curDevice._CL_DEVICE_VERSION = StringnNullTerminatorFix(clDevs[i_devId].getInfo<CL_DEVICE_VERSION>());
|
||||
curDevice._CL_DRIVER_VERSION = StringnNullTerminatorFix(clDevs[i_devId].getInfo<CL_DRIVER_VERSION>());
|
||||
|
||||
current.Devices.push_back(curDevice);
|
||||
}
|
||||
_devicesPlatformsDevices.push_back(current);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (exception &ex) {
|
||||
// TODO
|
||||
cout << "ocl_device_utils::QueryDevices() exception: " << ex.what() << endl;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
int ocl_device_utils::GetCountForPlatform(int platformID) {
|
||||
for (const auto &platInfo : _devicesPlatformsDevices)
|
||||
{
|
||||
if (platformID == platInfo.PlatformNum) {
|
||||
return platInfo.Devices.size();
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void ocl_device_utils::PrintDevices() {
|
||||
int allDevsCount = 0;
|
||||
for (const auto &platInfo : _devicesPlatformsDevices) {
|
||||
allDevsCount += platInfo.Devices.size();
|
||||
}
|
||||
cout << "Number of OpenCL devices found: " << allDevsCount << endl;
|
||||
{
|
||||
int devPlatformsComma = _devicesPlatformsDevices.size();
|
||||
for (const auto &platInfo : _devicesPlatformsDevices) {
|
||||
cout << "\tPlatform: " << platInfo.PlatformName << " | " << "PlatformNum: " << platInfo.PlatformNum << endl;
|
||||
cout << "\t\tDevices: " << endl;
|
||||
// device print
|
||||
int devComma = platInfo.Devices.size();
|
||||
for (const auto &dev : platInfo.Devices) {
|
||||
cout << "\t\t\t#" << dev.DeviceID << " " << dev._CL_DEVICE_NAME << " | " << dev._CL_DEVICE_TYPE << endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,34 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#define __CL_ENABLE_EXCEPTIONS
|
||||
#define CL_USE_DEPRECATED_OPENCL_2_0_APIS
|
||||
|
||||
#include "cl_ext.hpp"
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include "OpenCLDevice.h"
|
||||
|
||||
|
||||
struct PrintInfo {
|
||||
std::string PlatformName;
|
||||
int PlatformNum;
|
||||
std::vector<OpenCLDevice> Devices;
|
||||
};
|
||||
|
||||
class ocl_device_utils {
|
||||
public:
|
||||
static bool QueryDevices();
|
||||
static void PrintDevices();
|
||||
static int GetCountForPlatform(int platformID);
|
||||
static void print_opencl_devices();
|
||||
|
||||
private:
|
||||
static std::vector<cl::Device> getDevices(std::vector<cl::Platform> const& _platforms, unsigned _platformId);
|
||||
static std::vector<cl::Platform> getPlatforms();
|
||||
|
||||
static bool _hasQueried;
|
||||
static std::vector<std::string> _platformNames;
|
||||
static std::vector<PrintInfo> _devicesPlatformsDevices;
|
||||
|
||||
static std::string StringnNullTerminatorFix(const std::string& str);
|
||||
};
|
|
@ -1,95 +0,0 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="cl_ext.hpp" />
|
||||
<ClInclude Include="ocl_device_utils.h" />
|
||||
<ClInclude Include="opencl.h" />
|
||||
<ClInclude Include="OpenCLDevice.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="ocl_device_utils.cpp" />
|
||||
<ClCompile Include="opencl.cpp" />
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{5DBCE38A-C8D2-4498-A92A-9AF8D5196135}</ProjectGuid>
|
||||
<Keyword>Win32Proj</Keyword>
|
||||
<RootNamespace>ocl_device_utils</RootNamespace>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>StaticLibrary</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<PlatformToolset>v120</PlatformToolset>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>StaticLibrary</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<PlatformToolset>v120</PlatformToolset>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
|
||||
<IntDir>$(Platform)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
|
||||
<IntDir>$(Platform)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<ClCompile>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>$(AMDAPPSDKROOT)\include\</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>$(AMDAPPSDKROOT)\include\;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
|
@ -1,13 +0,0 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup>
|
||||
<ClInclude Include="cl_ext.hpp" />
|
||||
<ClInclude Include="ocl_device_utils.h" />
|
||||
<ClInclude Include="OpenCLDevice.h" />
|
||||
<ClInclude Include="opencl.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="ocl_device_utils.cpp" />
|
||||
<ClCompile Include="opencl.cpp" />
|
||||
</ItemGroup>
|
||||
</Project>
|
|
@ -1,174 +0,0 @@
|
|||
#include "opencl.h"
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <stdio.h>
|
||||
|
||||
extern cl_platform_id gPlatform;
|
||||
// extern cl_program gProgram;
|
||||
|
||||
bool clInitialize(int requiredPlatform, std::vector<cl_device_id> &gpus)
|
||||
{
|
||||
cl_platform_id platforms[64];
|
||||
cl_uint numPlatforms;
|
||||
OCLR(clGetPlatformIDs(sizeof(platforms)/sizeof(cl_platform_id), platforms, &numPlatforms), false);
|
||||
if (!numPlatforms) {
|
||||
printf("<error> no OpenCL platforms found\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
/*int platformIdx = -1;
|
||||
if (requiredPlatform) {
|
||||
for (decltype(numPlatforms) i = 0; i < numPlatforms; i++) {
|
||||
char name[1024] = {0};
|
||||
OCLR(clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, sizeof(name), name, 0), false);
|
||||
printf("found platform[%i] name = '%s'\n", (int)i, name);
|
||||
if (strcmp(name, requiredPlatform) == 0) {
|
||||
platformIdx = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
platformIdx = 0;
|
||||
}*/
|
||||
|
||||
int platformIdx = requiredPlatform;
|
||||
|
||||
|
||||
if (platformIdx == -1) {
|
||||
printf("<error> platform %s not exists\n", requiredPlatform);
|
||||
return false;
|
||||
}
|
||||
|
||||
gPlatform = platforms[platformIdx];
|
||||
|
||||
cl_uint numDevices = 0;
|
||||
cl_device_id devices[64];
|
||||
clGetDeviceIDs(gPlatform, CL_DEVICE_TYPE_GPU, sizeof(devices)/sizeof(cl_device_id), devices, &numDevices);
|
||||
if (numDevices) {
|
||||
printf("<info> found %d devices\n", numDevices);
|
||||
} else {
|
||||
printf("<error> no OpenCL GPU devices found.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
for (decltype(numDevices) i = 0; i < numDevices; i++) {
|
||||
gpus.push_back(devices[i]);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool clCompileKernel(cl_context gContext,
|
||||
cl_device_id gpu,
|
||||
const char *binaryName,
|
||||
const std::vector<const char*> &sources,
|
||||
const char *arguments,
|
||||
cl_int *binstatus,
|
||||
cl_program *gProgram)
|
||||
{
|
||||
std::ifstream testfile(binaryName);
|
||||
|
||||
// size_t binsizes[64];
|
||||
|
||||
// const unsigned char *binaries[64];
|
||||
|
||||
if(!testfile) {
|
||||
|
||||
|
||||
printf("<info> compiling ...\n");
|
||||
|
||||
std::string sourceFile;
|
||||
for (auto &i: sources) {
|
||||
std::ifstream stream;
|
||||
stream.exceptions(std::ifstream::failbit | std::ifstream::badbit);
|
||||
try {
|
||||
stream.open(i);
|
||||
} catch (std::system_error& e) {
|
||||
fprintf(stderr, "<error> %s\n", e.code().message().c_str());
|
||||
return false;
|
||||
}
|
||||
std::string str((std::istreambuf_iterator<char>(stream)), std::istreambuf_iterator<char>());
|
||||
sourceFile.append(str);
|
||||
}
|
||||
|
||||
printf("<info> source: %u bytes\n", (unsigned)sourceFile.size());
|
||||
if(sourceFile.size() < 1){
|
||||
fprintf(stderr, "<error> source files not found or empty\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
cl_int error;
|
||||
const char *sources[] = { sourceFile.c_str(), 0 };
|
||||
*gProgram = clCreateProgramWithSource(gContext, 1, sources, 0, &error);
|
||||
OCLR(error, false);
|
||||
|
||||
if (clBuildProgram(*gProgram, 1, &gpu, arguments, 0, 0) != CL_SUCCESS) {
|
||||
size_t logSize;
|
||||
clGetProgramBuildInfo(*gProgram, gpu, CL_PROGRAM_BUILD_LOG, 0, 0, &logSize);
|
||||
|
||||
std::unique_ptr<char[]> log(new char[logSize]);
|
||||
clGetProgramBuildInfo(*gProgram, gpu, CL_PROGRAM_BUILD_LOG, logSize, log.get(), 0);
|
||||
printf("%s\n", log.get());
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t binsize;
|
||||
OCLR(clGetProgramInfo(*gProgram, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &binsize, 0), false);
|
||||
// for (size_t i = 0; i < 1; i++) {
|
||||
if(!binsize) {
|
||||
printf("<error> no binary available!\n");
|
||||
return false;
|
||||
}
|
||||
// }
|
||||
|
||||
printf("<info> binsize = %u bytes\n", (unsigned)binsize);
|
||||
// std::unique_ptr<unsigned char[]> binary(new unsigned char[binsize+1]);
|
||||
|
||||
// for (size_t i = 0; i < gpus.size(); i++)
|
||||
std::unique_ptr<unsigned char[]> binary(new unsigned char[binsize+1]);
|
||||
// binaries[i] = new unsigned char[binsizes[i]];
|
||||
|
||||
// for (auto &b: binaries)
|
||||
// b = binary.get();
|
||||
OCLR(clGetProgramInfo(*gProgram, CL_PROGRAM_BINARIES, sizeof(void*), &binary, 0), false);
|
||||
|
||||
{
|
||||
std::ofstream bin(binaryName, std::ofstream::binary | std::ofstream::trunc);
|
||||
bin.write((const char*)binary.get(), binsize);
|
||||
bin.close();
|
||||
}
|
||||
|
||||
OCLR(clReleaseProgram(*gProgram), false);
|
||||
}
|
||||
|
||||
std::ifstream bfile(binaryName, std::ifstream::binary);
|
||||
if(!bfile) {
|
||||
printf("<error> %s not found\n", binaryName);
|
||||
return false;
|
||||
}
|
||||
|
||||
bfile.seekg(0, bfile.end);
|
||||
size_t binsize = bfile.tellg();
|
||||
bfile.seekg(0, bfile.beg);
|
||||
if(!binsize){
|
||||
printf("<error> %s empty\n", binaryName);
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<char> binary(binsize+1);
|
||||
bfile.read(&binary[0], binsize);
|
||||
bfile.close();
|
||||
|
||||
cl_int error;
|
||||
// binstatus.resize(gpus.size(), 0);
|
||||
// std::vector<size_t> binsizes(gpus.size(), binsize);
|
||||
// std::vector<const unsigned char*> binaries(gpus.size(), (const unsigned char*)&binary[0]);
|
||||
const unsigned char *binaryPtr = (const unsigned char*)&binary[0];
|
||||
|
||||
*gProgram = clCreateProgramWithBinary(gContext, 1, &gpu, &binsize, &binaryPtr, binstatus, &error);
|
||||
OCLR(error, false);
|
||||
OCLR(clBuildProgram(*gProgram, 1, &gpu, 0, 0, 0), false);
|
||||
return true;
|
||||
}
|
|
@ -1,131 +0,0 @@
|
|||
/*
|
||||
* opencl.h
|
||||
*
|
||||
* Created on: 01.05.2014
|
||||
* Author: mad
|
||||
*/
|
||||
|
||||
#ifndef OPENCL_H_
|
||||
#define OPENCL_H_
|
||||
|
||||
#pragma warning(disable: 4996)
|
||||
#include <CL/cl.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <vector>
|
||||
|
||||
// extern cl_context gContext;
|
||||
|
||||
|
||||
|
||||
#define OCL(error) \
|
||||
if(cl_int err = error){ \
|
||||
printf("OpenCL error: %d at %s:%d\n", err, __FILE__, __LINE__); \
|
||||
return; \
|
||||
}
|
||||
|
||||
#define OCLR(error, ret) \
|
||||
if(cl_int err = error){ \
|
||||
printf("OpenCL error: %d at %s:%d\n", err, __FILE__, __LINE__); \
|
||||
return ret; \
|
||||
}
|
||||
|
||||
#define OCLE(error) \
|
||||
if(cl_int err = error){ \
|
||||
printf("OpenCL error: %d at %s:%d\n", err, __FILE__, __LINE__); \
|
||||
exit(err); \
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
template<typename T>
|
||||
class clBuffer {
|
||||
public:
|
||||
|
||||
clBuffer() {
|
||||
|
||||
Size = 0;
|
||||
HostData = 0;
|
||||
DeviceData = 0;
|
||||
|
||||
}
|
||||
|
||||
~clBuffer() {
|
||||
|
||||
if(HostData)
|
||||
delete [] HostData;
|
||||
|
||||
if(DeviceData)
|
||||
clReleaseMemObject(DeviceData);
|
||||
|
||||
}
|
||||
|
||||
void init(cl_context gContext, int size, cl_mem_flags flags = 0) {
|
||||
|
||||
Size = size;
|
||||
|
||||
if(!(flags & CL_MEM_HOST_NO_ACCESS)){
|
||||
HostData = new T[Size];
|
||||
memset(HostData, 0, Size*sizeof(T));
|
||||
}else
|
||||
HostData = 0;
|
||||
|
||||
//printf("clCreateBuffer: size = %d, %d bytes\n", Size, Size*sizeof(T));
|
||||
|
||||
cl_int error;
|
||||
if (flags & CL_MEM_HOST_NO_ACCESS)
|
||||
flags = CL_MEM_READ_WRITE;
|
||||
DeviceData = clCreateBuffer(gContext, flags, Size*sizeof(T), 0, &error);
|
||||
OCL(error);
|
||||
|
||||
}
|
||||
|
||||
void copyToDevice(cl_command_queue cq, bool blocking = true) {
|
||||
|
||||
OCL(clEnqueueWriteBuffer(cq, DeviceData, blocking, 0, Size*sizeof(T), HostData, 0, 0, 0));
|
||||
|
||||
}
|
||||
|
||||
void copyToHost(cl_command_queue cq, bool blocking = true, unsigned size = 0) {
|
||||
|
||||
if(size == 0)
|
||||
size = Size;
|
||||
|
||||
OCL(clEnqueueReadBuffer(cq, DeviceData, blocking, 0, size*sizeof(T), HostData, 0, 0, 0));
|
||||
|
||||
}
|
||||
|
||||
T& get(int index) {
|
||||
return HostData[index];
|
||||
}
|
||||
|
||||
T& operator[](int index) {
|
||||
return HostData[index];
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
int Size;
|
||||
T* HostData;
|
||||
cl_mem DeviceData;
|
||||
|
||||
|
||||
};
|
||||
|
||||
|
||||
bool clInitialize(int requiredPlatform, std::vector<cl_device_id> &gpus);
|
||||
bool clCompileKernel(cl_context gContext,
|
||||
cl_device_id gpu,
|
||||
const char *binaryName,
|
||||
const std::vector<const char*> &sources,
|
||||
const char *arguments,
|
||||
cl_int *binstatus,
|
||||
cl_program *gProgram);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#endif /* OPENCL_H_ */
|
|
@ -1,536 +0,0 @@
|
|||
#include "ocl_silentarmy.hpp"
|
||||
|
||||
//#define _CRT_SECURE_NO_WARNINGS
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdarg.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <assert.h>
|
||||
#include <sys/types.h>
|
||||
//#include <sys/time.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
//#include <unistd.h>
|
||||
//#include <getopt.h>
|
||||
#include <errno.h>
|
||||
|
||||
|
||||
#include "opencl.h"
|
||||
|
||||
#include <fstream>
|
||||
|
||||
#include "sa_blake.h"
|
||||
|
||||
typedef uint8_t uchar;
|
||||
typedef uint32_t uint;
|
||||
typedef uint64_t ulong;
|
||||
#include "param.h"
|
||||
|
||||
#define MIN(A, B) (((A) < (B)) ? (A) : (B))
|
||||
#define MAX(A, B) (((A) > (B)) ? (A) : (B))
|
||||
|
||||
#define WN PARAM_N
|
||||
#define WK PARAM_K
|
||||
|
||||
#define COLLISION_BIT_LENGTH (WN / (WK+1))
|
||||
#define COLLISION_BYTE_LENGTH ((COLLISION_BIT_LENGTH+7)/8)
|
||||
#define FINAL_FULL_WIDTH (2*COLLISION_BYTE_LENGTH+sizeof(uint32_t)*(1 << (WK)))
|
||||
|
||||
#define NDIGITS (WK+1)
|
||||
#define DIGITBITS (WN/(NDIGITS))
|
||||
#define PROOFSIZE (1u<<WK)
|
||||
#define COMPRESSED_PROOFSIZE ((COLLISION_BIT_LENGTH+1)*PROOFSIZE*4/(8*sizeof(uint32_t)))
|
||||
|
||||
typedef struct debug_s
|
||||
{
|
||||
uint32_t dropped_coll;
|
||||
uint32_t dropped_stor;
|
||||
} debug_t;
|
||||
|
||||
struct OclContext {
|
||||
cl_context _context;
|
||||
cl_program _program;
|
||||
cl_device_id _dev_id;
|
||||
|
||||
cl_platform_id platform_id = 0;
|
||||
|
||||
cl_command_queue queue;
|
||||
|
||||
cl_kernel k_init_ht;
|
||||
cl_kernel k_rounds[PARAM_K];
|
||||
cl_kernel k_sols;
|
||||
|
||||
cl_mem buf_ht[2], buf_sols, buf_dbg;
|
||||
size_t global_ws;
|
||||
size_t local_work_size = 64;
|
||||
|
||||
sols_t *sols;
|
||||
|
||||
bool init(cl_device_id dev, unsigned threadsNum, unsigned threadsPerBlock);
|
||||
|
||||
~OclContext() {
|
||||
clReleaseMemObject(buf_dbg);
|
||||
clReleaseMemObject(buf_ht[0]);
|
||||
clReleaseMemObject(buf_ht[1]);
|
||||
free(sols);
|
||||
}
|
||||
};
|
||||
|
||||
cl_mem check_clCreateBuffer(cl_context ctx, cl_mem_flags flags, size_t size,
|
||||
void *host_ptr);
|
||||
|
||||
bool OclContext::init(
|
||||
cl_device_id dev,
|
||||
unsigned int threadsNum,
|
||||
unsigned int threadsPerBlock)
|
||||
{
|
||||
cl_int error;
|
||||
|
||||
queue = clCreateCommandQueue(_context, dev, 0, &error);
|
||||
|
||||
#ifdef ENABLE_DEBUG
|
||||
size_t dbg_size = NR_ROWS;
|
||||
#else
|
||||
size_t dbg_size = 1;
|
||||
#endif
|
||||
|
||||
buf_dbg = check_clCreateBuffer(_context, CL_MEM_READ_WRITE |
|
||||
CL_MEM_HOST_NO_ACCESS, dbg_size, NULL);
|
||||
buf_ht[0] = check_clCreateBuffer(_context, CL_MEM_READ_WRITE, HT_SIZE, NULL);
|
||||
buf_ht[1] = check_clCreateBuffer(_context, CL_MEM_READ_WRITE, HT_SIZE, NULL);
|
||||
buf_sols = check_clCreateBuffer(_context, CL_MEM_READ_WRITE, sizeof(sols_t),
|
||||
NULL);
|
||||
|
||||
|
||||
fprintf(stderr, "Hash tables will use %.1f MB\n", 2.0 * HT_SIZE / 1e6);
|
||||
|
||||
k_init_ht = clCreateKernel(_program, "kernel_init_ht", &error);
|
||||
for (unsigned i = 0; i < WK; i++) {
|
||||
char kernelName[128];
|
||||
sprintf(kernelName, "kernel_round%d", i);
|
||||
k_rounds[i] = clCreateKernel(_program, kernelName, &error);
|
||||
}
|
||||
|
||||
sols = (sols_t *)malloc(sizeof(*sols));
|
||||
|
||||
k_sols = clCreateKernel(_program, "kernel_sols", &error);
|
||||
return true;
|
||||
}
|
||||
|
||||
///
|
||||
int verbose = 0;
|
||||
uint32_t show_encoded = 0;
|
||||
|
||||
cl_mem check_clCreateBuffer(cl_context ctx, cl_mem_flags flags, size_t size,
|
||||
void *host_ptr)
|
||||
{
|
||||
cl_int status;
|
||||
cl_mem ret;
|
||||
ret = clCreateBuffer(ctx, flags, size, host_ptr, &status);
|
||||
if (status != CL_SUCCESS || !ret)
|
||||
printf("clCreateBuffer (%d)\n", status);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void check_clSetKernelArg(cl_kernel k, cl_uint a_pos, cl_mem *a)
|
||||
{
|
||||
cl_int status;
|
||||
status = clSetKernelArg(k, a_pos, sizeof(*a), a);
|
||||
if (status != CL_SUCCESS)
|
||||
printf("clSetKernelArg (%d)\n", status);
|
||||
}
|
||||
|
||||
void check_clEnqueueNDRangeKernel(cl_command_queue queue, cl_kernel k, cl_uint
|
||||
work_dim, const size_t *global_work_offset, const size_t
|
||||
*global_work_size, const size_t *local_work_size, cl_uint
|
||||
num_events_in_wait_list, const cl_event *event_wait_list, cl_event
|
||||
*event)
|
||||
{
|
||||
cl_uint status;
|
||||
status = clEnqueueNDRangeKernel(queue, k, work_dim, global_work_offset,
|
||||
global_work_size, local_work_size, num_events_in_wait_list,
|
||||
event_wait_list, event);
|
||||
if (status != CL_SUCCESS)
|
||||
printf("clEnqueueNDRangeKernel (%d)\n", status);
|
||||
}
|
||||
|
||||
void check_clEnqueueReadBuffer(cl_command_queue queue, cl_mem buffer, cl_bool
|
||||
blocking_read, size_t offset, size_t size, void *ptr, cl_uint
|
||||
num_events_in_wait_list, const cl_event *event_wait_list, cl_event
|
||||
*event)
|
||||
{
|
||||
cl_int status;
|
||||
status = clEnqueueReadBuffer(queue, buffer, blocking_read, offset,
|
||||
size, ptr, num_events_in_wait_list, event_wait_list, event);
|
||||
if (status != CL_SUCCESS)
|
||||
printf("clEnqueueReadBuffer (%d)\n", status);
|
||||
}
|
||||
|
||||
void hexdump(uint8_t *a, uint32_t a_len)
|
||||
{
|
||||
for (uint32_t i = 0; i < a_len; i++)
|
||||
fprintf(stderr, "%02x", a[i]);
|
||||
}
|
||||
|
||||
char *s_hexdump(const void *_a, uint32_t a_len)
|
||||
{
|
||||
const uint8_t *a = (uint8_t *)_a;
|
||||
static char buf[1024];
|
||||
uint32_t i;
|
||||
for (i = 0; i < a_len && i + 2 < sizeof(buf); i++)
|
||||
sprintf(buf + i * 2, "%02x", a[i]);
|
||||
buf[i * 2] = 0;
|
||||
return buf;
|
||||
}
|
||||
|
||||
uint8_t hex2val(const char *base, size_t off)
|
||||
{
|
||||
const char c = base[off];
|
||||
if (c >= '0' && c <= '9') return c - '0';
|
||||
else if (c >= 'a' && c <= 'f') return 10 + c - 'a';
|
||||
else if (c >= 'A' && c <= 'F') return 10 + c - 'A';
|
||||
printf("Invalid hex char at offset %zd: ...%c...\n", off, c);
|
||||
return 0;
|
||||
}
|
||||
|
||||
unsigned nr_compute_units(const char *gpu)
|
||||
{
|
||||
if (!strcmp(gpu, "rx480")) return 36;
|
||||
fprintf(stderr, "Unknown GPU: %s\n", gpu);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void compress(uint8_t *out, uint32_t *inputs, uint32_t n)
|
||||
{
|
||||
uint32_t byte_pos = 0;
|
||||
int32_t bits_left = PREFIX + 1;
|
||||
uint8_t x = 0;
|
||||
uint8_t x_bits_used = 0;
|
||||
uint8_t *pOut = out;
|
||||
while (byte_pos < n)
|
||||
{
|
||||
if (bits_left >= 8 - x_bits_used)
|
||||
{
|
||||
x |= inputs[byte_pos] >> (bits_left - 8 + x_bits_used);
|
||||
bits_left -= 8 - x_bits_used;
|
||||
x_bits_used = 8;
|
||||
}
|
||||
else if (bits_left > 0)
|
||||
{
|
||||
uint32_t mask = ~(-1 << (8 - x_bits_used));
|
||||
mask = ((~mask) >> bits_left) & mask;
|
||||
x |= (inputs[byte_pos] << (8 - x_bits_used - bits_left)) & mask;
|
||||
x_bits_used += bits_left;
|
||||
bits_left = 0;
|
||||
}
|
||||
else if (bits_left <= 0)
|
||||
{
|
||||
assert(!bits_left);
|
||||
byte_pos++;
|
||||
bits_left = PREFIX + 1;
|
||||
}
|
||||
if (x_bits_used == 8)
|
||||
{
|
||||
*pOut++ = x;
|
||||
x = x_bits_used = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void get_program_build_log(cl_program program, cl_device_id device)
|
||||
{
|
||||
cl_int status;
|
||||
char val[2 * 1024 * 1024];
|
||||
size_t ret = 0;
|
||||
status = clGetProgramBuildInfo(program, device,
|
||||
CL_PROGRAM_BUILD_LOG,
|
||||
sizeof(val), // size_t param_value_size
|
||||
&val, // void *param_value
|
||||
&ret); // size_t *param_value_size_ret
|
||||
if (status != CL_SUCCESS)
|
||||
printf("clGetProgramBuildInfo (%d)\n", status);
|
||||
fprintf(stderr, "%s\n", val);
|
||||
}
|
||||
|
||||
size_t select_work_size_blake(void)
|
||||
{
|
||||
size_t work_size =
|
||||
64 * /* thread per wavefront */
|
||||
BLAKE_WPS * /* wavefront per simd */
|
||||
4 * /* simd per compute unit */
|
||||
nr_compute_units("rx480");
|
||||
// Make the work group size a multiple of the nr of wavefronts, while
|
||||
// dividing the number of inputs. This results in the worksize being a
|
||||
// power of 2.
|
||||
while (NR_INPUTS % work_size)
|
||||
work_size += 64;
|
||||
//debug("Blake: work size %zd\n", work_size);
|
||||
return work_size;
|
||||
}
|
||||
|
||||
static void init_ht(cl_command_queue queue, cl_kernel k_init_ht, cl_mem buf_ht)
|
||||
{
|
||||
size_t global_ws = NR_ROWS;
|
||||
size_t local_ws = 64;
|
||||
cl_int status;
|
||||
#if 0
|
||||
uint32_t pat = -1;
|
||||
status = clEnqueueFillBuffer(queue, buf_ht, &pat, sizeof(pat), 0,
|
||||
NR_ROWS * NR_SLOTS * SLOT_LEN,
|
||||
0, // cl_uint num_events_in_wait_list
|
||||
NULL, // cl_event *event_wait_list
|
||||
NULL); // cl_event *event
|
||||
if (status != CL_SUCCESS)
|
||||
fatal("clEnqueueFillBuffer (%d)\n", status);
|
||||
#endif
|
||||
status = clSetKernelArg(k_init_ht, 0, sizeof(buf_ht), &buf_ht);
|
||||
if (status != CL_SUCCESS)
|
||||
printf("clSetKernelArg (%d)\n", status);
|
||||
check_clEnqueueNDRangeKernel(queue, k_init_ht,
|
||||
1, // cl_uint work_dim
|
||||
NULL, // size_t *global_work_offset
|
||||
&global_ws, // size_t *global_work_size
|
||||
&local_ws, // size_t *local_work_size
|
||||
0, // cl_uint num_events_in_wait_list
|
||||
NULL, // cl_event *event_wait_list
|
||||
NULL); // cl_event *event
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Sort a pair of binary blobs (a, b) which are consecutive in memory and
|
||||
** occupy a total of 2*len 32-bit words.
|
||||
**
|
||||
** a points to the pair
|
||||
** len number of 32-bit words in each pair
|
||||
*/
|
||||
void sort_pair(uint32_t *a, uint32_t len)
|
||||
{
|
||||
uint32_t *b = a + len;
|
||||
uint32_t tmp, need_sorting = 0;
|
||||
for (uint32_t i = 0; i < len; i++)
|
||||
if (need_sorting || a[i] > b[i])
|
||||
{
|
||||
need_sorting = 1;
|
||||
tmp = a[i];
|
||||
a[i] = b[i];
|
||||
b[i] = tmp;
|
||||
}
|
||||
else if (a[i] < b[i])
|
||||
return;
|
||||
}
|
||||
static uint32_t verify_sol(sols_t *sols, unsigned sol_i)
|
||||
{
|
||||
uint32_t *inputs = sols->values[sol_i];
|
||||
uint32_t seen_len = (1 << (PREFIX + 1)) / 8;
|
||||
uint8_t seen[(1 << (PREFIX + 1)) / 8];
|
||||
uint32_t i;
|
||||
uint8_t tmp;
|
||||
// look for duplicate inputs
|
||||
memset(seen, 0, seen_len);
|
||||
for (i = 0; i < (1 << PARAM_K); i++)
|
||||
{
|
||||
tmp = seen[inputs[i] / 8];
|
||||
seen[inputs[i] / 8] |= 1 << (inputs[i] & 7);
|
||||
if (tmp == seen[inputs[i] / 8])
|
||||
{
|
||||
// at least one input value is a duplicate
|
||||
sols->valid[sol_i] = 0;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
// the valid flag is already set by the GPU, but set it again because
|
||||
// I plan to change the GPU code to not set it
|
||||
sols->valid[sol_i] = 1;
|
||||
// sort the pairs in place
|
||||
for (uint32_t level = 0; level < PARAM_K; level++)
|
||||
for (i = 0; i < (1 << PARAM_K); i += (2 << level))
|
||||
sort_pair(&inputs[i], 1 << level);
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
ocl_silentarmy::ocl_silentarmy(int platf_id, int dev_id) {
|
||||
platform_id = platf_id;
|
||||
device_id = dev_id;
|
||||
// TODO
|
||||
threadsNum = 8192;
|
||||
wokrsize = 128; // 256;
|
||||
}
|
||||
|
||||
std::string ocl_silentarmy::getdevinfo() {
|
||||
/*TODO get name*/
|
||||
return "GPU_ID(" + std::to_string(device_id)+ ")";
|
||||
}
|
||||
|
||||
// STATICS START
|
||||
int ocl_silentarmy::getcount() { /*TODO*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
void ocl_silentarmy::getinfo(int platf_id, int d_id, std::string& gpu_name, int& sm_count, std::string& version) { /*TODO*/ }
|
||||
|
||||
void ocl_silentarmy::start(ocl_silentarmy& device_context) {
|
||||
/*TODO*/
|
||||
device_context.is_init_success = false;
|
||||
device_context.oclc = new OclContext();
|
||||
|
||||
std::vector<cl_device_id> allGpus;
|
||||
if (!clInitialize(device_context.platform_id, allGpus)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// this is kinda stupid but it works
|
||||
std::vector<cl_device_id> gpus;
|
||||
for (unsigned i = 0; i < allGpus.size(); ++i) {
|
||||
if (i == device_context.device_id) {
|
||||
printf("Using device %d as GPU %d\n", i, (int)gpus.size());
|
||||
device_context.oclc->_dev_id = allGpus[i];
|
||||
gpus.push_back(allGpus[i]);
|
||||
}
|
||||
}
|
||||
|
||||
if (!gpus.size()){
|
||||
printf("Device id %d not found\n", device_context.device_id);
|
||||
return;
|
||||
}
|
||||
|
||||
// context create
|
||||
for (unsigned i = 0; i < gpus.size(); i++) {
|
||||
cl_context_properties props[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)device_context.oclc->platform_id, 0 };
|
||||
cl_int error;
|
||||
device_context.oclc->_context = clCreateContext(NULL, 1, &gpus[i], 0, 0, &error);
|
||||
//OCLR(error, false);
|
||||
if (cl_int err = error) {
|
||||
printf("OpenCL error: %d at %s:%d\n", err, __FILE__, __LINE__);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<cl_int> binstatus;
|
||||
binstatus.resize(gpus.size());
|
||||
|
||||
for (size_t i = 0; i < gpus.size(); i++) {
|
||||
char kernelName[64];
|
||||
sprintf(kernelName, "silentarmy_gpu%u.bin", (unsigned)i);
|
||||
if (!clCompileKernel(device_context.oclc->_context,
|
||||
gpus[i],
|
||||
kernelName,
|
||||
{ "zcash/gpu/kernel.cl" },
|
||||
"",
|
||||
&binstatus[i],
|
||||
&device_context.oclc->_program)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < gpus.size(); ++i) {
|
||||
if (binstatus[i] == CL_SUCCESS) {
|
||||
if (!device_context.oclc->init(gpus[i], device_context.threadsNum, device_context.wokrsize)) {
|
||||
printf("Init failed");
|
||||
return;
|
||||
}
|
||||
}
|
||||
else {
|
||||
printf("GPU %d: failed to load kernel\n", i);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
device_context.is_init_success = true;
|
||||
}
|
||||
|
||||
void ocl_silentarmy::stop(ocl_silentarmy& device_context) {
|
||||
if (device_context.oclc != nullptr) delete device_context.oclc;
|
||||
}
|
||||
|
||||
void ocl_silentarmy::solve(const char *tequihash_header,
|
||||
unsigned int tequihash_header_len,
|
||||
const char* nonce,
|
||||
unsigned int nonce_len,
|
||||
std::function<bool()> cancelf,
|
||||
std::function<void(const std::vector<uint32_t>&, size_t, const unsigned char*)> solutionf,
|
||||
std::function<void(void)> hashdonef,
|
||||
ocl_silentarmy& device_context) {
|
||||
|
||||
unsigned char context[140];
|
||||
memset(context, 0, 140);
|
||||
memcpy(context, tequihash_header, tequihash_header_len);
|
||||
memcpy(context + tequihash_header_len, nonce, nonce_len);
|
||||
|
||||
OclContext *miner = device_context.oclc;
|
||||
clFlush(miner->queue);
|
||||
|
||||
blake2b_state_t initialCtx;
|
||||
zcash_blake2b_init(&initialCtx, ZCASH_HASH_LEN, PARAM_N, PARAM_K);
|
||||
zcash_blake2b_update(&initialCtx, (const uint8_t*)context, 128, 0);
|
||||
|
||||
cl_mem buf_blake_st;
|
||||
buf_blake_st = check_clCreateBuffer(miner->_context, CL_MEM_READ_ONLY |
|
||||
CL_MEM_COPY_HOST_PTR, sizeof(blake2b_state_s), &initialCtx);
|
||||
|
||||
|
||||
for (unsigned round = 0; round < PARAM_K; round++)
|
||||
{
|
||||
if (round < 2)
|
||||
init_ht(miner->queue, miner->k_init_ht, miner->buf_ht[round % 2]);
|
||||
if (!round)
|
||||
{
|
||||
check_clSetKernelArg(miner->k_rounds[round], 0, &buf_blake_st);
|
||||
check_clSetKernelArg(miner->k_rounds[round], 1, &miner->buf_ht[round % 2]);
|
||||
miner->global_ws = select_work_size_blake();
|
||||
}
|
||||
else
|
||||
{
|
||||
check_clSetKernelArg(miner->k_rounds[round], 0, &miner->buf_ht[(round - 1) % 2]);
|
||||
check_clSetKernelArg(miner->k_rounds[round], 1, &miner->buf_ht[round % 2]);
|
||||
miner->global_ws = NR_ROWS;
|
||||
}
|
||||
check_clSetKernelArg(miner->k_rounds[round], 2, &miner->buf_dbg);
|
||||
if (round == PARAM_K - 1)
|
||||
check_clSetKernelArg(miner->k_rounds[round], 3, &miner->buf_sols);
|
||||
check_clEnqueueNDRangeKernel(miner->queue, miner->k_rounds[round], 1, NULL,
|
||||
&miner->global_ws, &miner->local_work_size, 0, NULL, NULL);
|
||||
// cancel function
|
||||
if (cancelf()) return;
|
||||
}
|
||||
check_clSetKernelArg(miner->k_sols, 0, &miner->buf_ht[0]);
|
||||
check_clSetKernelArg(miner->k_sols, 1, &miner->buf_ht[1]);
|
||||
check_clSetKernelArg(miner->k_sols, 2, &miner->buf_sols);
|
||||
miner->global_ws = NR_ROWS;
|
||||
check_clEnqueueNDRangeKernel(miner->queue, miner->k_sols, 1, NULL,
|
||||
&miner->global_ws, &miner->local_work_size, 0, NULL, NULL);
|
||||
|
||||
check_clEnqueueReadBuffer(miner->queue, miner->buf_sols,
|
||||
CL_TRUE, // cl_bool blocking_read
|
||||
0, // size_t offset
|
||||
sizeof(*miner->sols), // size_t size
|
||||
miner->sols, // void *ptr
|
||||
0, // cl_uint num_events_in_wait_list
|
||||
NULL, // cl_event *event_wait_list
|
||||
NULL); // cl_event *event
|
||||
|
||||
if (miner->sols->nr > MAX_SOLS)
|
||||
miner->sols->nr = MAX_SOLS;
|
||||
|
||||
clReleaseMemObject(buf_blake_st);
|
||||
|
||||
for (unsigned sol_i = 0; sol_i < miner->sols->nr; sol_i++) {
|
||||
verify_sol(miner->sols, sol_i);
|
||||
}
|
||||
|
||||
uint8_t proof[COMPRESSED_PROOFSIZE * 2];
|
||||
for (uint32_t i = 0; i < miner->sols->nr; i++) {
|
||||
if (miner->sols->valid[i]) {
|
||||
compress(proof, (uint32_t *)(miner->sols->values[i]), 1 << PARAM_K);
|
||||
solutionf(std::vector<uint32_t>(0), 1344, proof);
|
||||
}
|
||||
}
|
||||
hashdonef();
|
||||
}
|
||||
|
||||
// STATICS END
|
||||
|
|
@ -1,58 +0,0 @@
|
|||
#pragma once
|
||||
#ifdef _LIB
|
||||
#define DLL_OCL_SILENTARMY __declspec(dllexport)
|
||||
#else
|
||||
#define DLL_OCL_SILENTARMY
|
||||
#endif
|
||||
|
||||
// remove after
|
||||
#include <string>
|
||||
#include <functional>
|
||||
#include <vector>
|
||||
#include <cstdint>
|
||||
|
||||
struct OclContext;
|
||||
|
||||
|
||||
|
||||
struct DLL_OCL_SILENTARMY ocl_silentarmy
|
||||
{
|
||||
//int threadsperblock;
|
||||
int blocks;
|
||||
int device_id;
|
||||
int platform_id;
|
||||
|
||||
OclContext* oclc;
|
||||
// threads
|
||||
unsigned threadsNum; // TMP
|
||||
unsigned wokrsize;
|
||||
|
||||
bool is_init_success = false;
|
||||
|
||||
ocl_silentarmy(int platf_id, int dev_id);
|
||||
|
||||
std::string getdevinfo();
|
||||
|
||||
static int getcount();
|
||||
|
||||
static void getinfo(int platf_id, int d_id, std::string& gpu_name, int& sm_count, std::string& version);
|
||||
|
||||
static void start(ocl_silentarmy& device_context);
|
||||
|
||||
static void stop(ocl_silentarmy& device_context);
|
||||
|
||||
static void solve(const char *tequihash_header,
|
||||
unsigned int tequihash_header_len,
|
||||
const char* nonce,
|
||||
unsigned int nonce_len,
|
||||
std::function<bool()> cancelf,
|
||||
std::function<void(const std::vector<uint32_t>&, size_t, const unsigned char*)> solutionf,
|
||||
std::function<void(void)> hashdonef,
|
||||
ocl_silentarmy& device_context);
|
||||
|
||||
std::string getname() { return "OCL_SILENTARMY"; }
|
||||
|
||||
private:
|
||||
std::string m_gpu_name;
|
||||
std::string m_version;
|
||||
};
|
|
@ -1,98 +0,0 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="ocl_silentarmy.cpp" />
|
||||
<ClCompile Include="sa_blake.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="ocl_silentarmy.hpp" />
|
||||
<ClInclude Include="param.h" />
|
||||
<ClInclude Include="sa_blake.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="zcash\gpu\input.cl" />
|
||||
<None Include="zcash\gpu\kernel.cl" />
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{AB01E715-795A-4089-8DF0-AE6EBDC1AB48}</ProjectGuid>
|
||||
<Keyword>Win32Proj</Keyword>
|
||||
<RootNamespace>ocl_silentarmy</RootNamespace>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>StaticLibrary</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<PlatformToolset>v120</PlatformToolset>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>StaticLibrary</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<PlatformToolset>v120</PlatformToolset>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
|
||||
<IntDir>$(Platform)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<IntDir>$(Platform)\$(Configuration)\</IntDir>
|
||||
<OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<ClCompile>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>..\ocl_device_utils;$(AMDAPPSDKROOT)\include\;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>..\ocl_device_utils;$(AMDAPPSDKROOT)\include\;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
|
@ -1,28 +0,0 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup>
|
||||
<ClCompile Include="ocl_silentarmy.cpp" />
|
||||
<ClCompile Include="sa_blake.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="ocl_silentarmy.hpp" />
|
||||
<ClInclude Include="param.h" />
|
||||
<ClInclude Include="sa_blake.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Filter Include="zcash">
|
||||
<UniqueIdentifier>{34381c66-ca5c-4daa-aa30-58dcf33e2d66}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="zcash\gpu">
|
||||
<UniqueIdentifier>{c7687099-e206-4d36-8836-f7032bffc7da}</UniqueIdentifier>
|
||||
</Filter>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="zcash\gpu\input.cl">
|
||||
<Filter>zcash\gpu</Filter>
|
||||
</None>
|
||||
<None Include="zcash\gpu\kernel.cl">
|
||||
<Filter>zcash\gpu</Filter>
|
||||
</None>
|
||||
</ItemGroup>
|
||||
</Project>
|
|
@ -1,66 +0,0 @@
|
|||
#define PARAM_N 200
|
||||
#define PARAM_K 9
|
||||
#define PREFIX (PARAM_N / (PARAM_K + 1))
|
||||
#define NR_INPUTS (1 << PREFIX)
|
||||
// Approximate log base 2 of number of elements in hash tables
|
||||
#define APX_NR_ELMS_LOG (PREFIX + 1)
|
||||
// Number of rows and slots is affected by this. 20 offers the best performance
|
||||
// but occasionally misses ~1% of solutions.
|
||||
#define NR_ROWS_LOG 20
|
||||
|
||||
// Make hash tables OVERHEAD times larger than necessary to store the average
|
||||
// number of elements per row. The ideal value is as small as possible to
|
||||
// reduce memory usage, but not too small or else elements are dropped from the
|
||||
// hash tables.
|
||||
//
|
||||
// The actual number of elements per row is closer to the theoretical average
|
||||
// (less variance) when NR_ROWS_LOG is small. So accordingly OVERHEAD can be
|
||||
// smaller.
|
||||
//
|
||||
// Even (as opposed to odd) values of OVERHEAD sometimes significantly decrease
|
||||
// performance as they cause VRAM channel conflicts.
|
||||
#if NR_ROWS_LOG == 16
|
||||
#define OVERHEAD 3
|
||||
#elif NR_ROWS_LOG == 18
|
||||
#define OVERHEAD 5
|
||||
#elif NR_ROWS_LOG == 19
|
||||
#define OVERHEAD 9
|
||||
#elif NR_ROWS_LOG == 20
|
||||
#define OVERHEAD 13
|
||||
#endif
|
||||
|
||||
#define NR_ROWS (1 << NR_ROWS_LOG)
|
||||
#define NR_SLOTS ((1 << (APX_NR_ELMS_LOG - NR_ROWS_LOG)) * OVERHEAD)
|
||||
// Length of 1 element (slot) in bytes
|
||||
#define SLOT_LEN 32
|
||||
// Total size of hash table
|
||||
#define HT_SIZE (NR_ROWS * NR_SLOTS * SLOT_LEN)
|
||||
// Length of Zcash block header and nonce
|
||||
#define ZCASH_BLOCK_HEADER_LEN 140
|
||||
#define ZCASH_NONCE_LEN 32
|
||||
// Number of bytes Zcash needs out of Blake
|
||||
#define ZCASH_HASH_LEN 50
|
||||
// Number of wavefronts per SIMD for the Blake kernel.
|
||||
// Blake is ALU-bound (beside the atomic counter being incremented) so we need
|
||||
// at least 2 wavefronts per SIMD to hide the 2-clock latency of integer
|
||||
// instructions. 10 is the max supported by the hw.
|
||||
#define BLAKE_WPS 10
|
||||
#define MAX_SOLS 2000
|
||||
|
||||
// Optional features
|
||||
#undef ENABLE_DEBUG
|
||||
|
||||
/*
|
||||
** Return the offset of Xi in bytes from the beginning of the slot.
|
||||
*/
|
||||
#define xi_offset_for_round(round) (8 + ((round) / 2) * 4)
|
||||
|
||||
// An (uncompressed) solution stores (1 << PARAM_K) 32-bit values
|
||||
#define SOL_SIZE ((1 << PARAM_K) * 4)
|
||||
typedef struct sols_s
|
||||
{
|
||||
uint nr;
|
||||
uint likely_invalids;
|
||||
uchar valid[MAX_SOLS];
|
||||
uint values[MAX_SOLS][(1 << PARAM_K)];
|
||||
} sols_t;
|
|
@ -1,104 +0,0 @@
|
|||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include "sa_blake.h"
|
||||
|
||||
static const uint32_t blake2b_block_len = 128;
|
||||
static const uint32_t blake2b_rounds = 12;
|
||||
static const uint64_t blake2b_iv[8] =
|
||||
{
|
||||
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
|
||||
0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
|
||||
0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
|
||||
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL,
|
||||
};
|
||||
static const uint8_t blake2b_sigma[12][16] =
|
||||
{
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
|
||||
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
|
||||
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
|
||||
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
|
||||
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
|
||||
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 },
|
||||
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 },
|
||||
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 },
|
||||
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
|
||||
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 },
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
|
||||
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
|
||||
};
|
||||
|
||||
/*
|
||||
** Init the state according to Zcash parameters.
|
||||
*/
|
||||
void zcash_blake2b_init(blake2b_state_t *st, uint8_t hash_len,
|
||||
uint32_t n, uint32_t k)
|
||||
{
|
||||
assert(n > k);
|
||||
assert(hash_len <= 64);
|
||||
st->h[0] = blake2b_iv[0] ^ (0x01010000 | hash_len);
|
||||
for (uint32_t i = 1; i <= 5; i++)
|
||||
st->h[i] = blake2b_iv[i];
|
||||
st->h[6] = blake2b_iv[6] ^ *(uint64_t *)"ZcashPoW";
|
||||
st->h[7] = blake2b_iv[7] ^ (((uint64_t)k << 32) | n);
|
||||
st->bytes = 0;
|
||||
}
|
||||
|
||||
static uint64_t rotr64(uint64_t a, uint8_t bits)
|
||||
{
|
||||
return (a >> bits) | (a << (64 - bits));
|
||||
}
|
||||
|
||||
static void mix(uint64_t *va, uint64_t *vb, uint64_t *vc, uint64_t *vd,
|
||||
uint64_t x, uint64_t y)
|
||||
{
|
||||
*va = (*va + *vb + x);
|
||||
*vd = rotr64(*vd ^ *va, 32);
|
||||
*vc = (*vc + *vd);
|
||||
*vb = rotr64(*vb ^ *vc, 24);
|
||||
*va = (*va + *vb + y);
|
||||
*vd = rotr64(*vd ^ *va, 16);
|
||||
*vc = (*vc + *vd);
|
||||
*vb = rotr64(*vb ^ *vc, 63);
|
||||
}
|
||||
|
||||
/*
|
||||
** Process either a full message block or the final partial block.
|
||||
** Note that v[13] is not XOR'd because st->bytes is assumed to never overflow.
|
||||
**
|
||||
** _msg pointer to message (must be zero-padded to 128 bytes if final block)
|
||||
** msg_len must be 128 (<= 128 allowed only for final partial block)
|
||||
** is_final indicate if this is the final block
|
||||
*/
|
||||
void zcash_blake2b_update(blake2b_state_t *st, const uint8_t *_msg,
|
||||
uint32_t msg_len, uint32_t is_final)
|
||||
{
|
||||
const uint64_t *m = (const uint64_t *)_msg;
|
||||
uint64_t v[16];
|
||||
assert(msg_len <= 128);
|
||||
assert(st->bytes <= UINT64_MAX - msg_len);
|
||||
memcpy(v + 0, st->h, 8 * sizeof (*v));
|
||||
memcpy(v + 8, blake2b_iv, 8 * sizeof (*v));
|
||||
v[12] ^= (st->bytes += msg_len);
|
||||
v[14] ^= is_final ? -1 : 0;
|
||||
for (uint32_t round = 0; round < blake2b_rounds; round++)
|
||||
{
|
||||
const uint8_t *s = blake2b_sigma[round];
|
||||
mix(v + 0, v + 4, v + 8, v + 12, m[s[0]], m[s[1]]);
|
||||
mix(v + 1, v + 5, v + 9, v + 13, m[s[2]], m[s[3]]);
|
||||
mix(v + 2, v + 6, v + 10, v + 14, m[s[4]], m[s[5]]);
|
||||
mix(v + 3, v + 7, v + 11, v + 15, m[s[6]], m[s[7]]);
|
||||
mix(v + 0, v + 5, v + 10, v + 15, m[s[8]], m[s[9]]);
|
||||
mix(v + 1, v + 6, v + 11, v + 12, m[s[10]], m[s[11]]);
|
||||
mix(v + 2, v + 7, v + 8, v + 13, m[s[12]], m[s[13]]);
|
||||
mix(v + 3, v + 4, v + 9, v + 14, m[s[14]], m[s[15]]);
|
||||
}
|
||||
for (uint32_t i = 0; i < 8; i++)
|
||||
st->h[i] ^= v[i] ^ v[i + 8];
|
||||
}
|
||||
|
||||
void zcash_blake2b_final(blake2b_state_t *st, uint8_t *out, uint8_t outlen)
|
||||
{
|
||||
assert(outlen <= 64);
|
||||
memcpy(out, st->h, outlen);
|
||||
}
|
|
@ -1,11 +0,0 @@
|
|||
#pragma once
|
||||
typedef struct blake2b_state_s
|
||||
{
|
||||
uint64_t h[8];
|
||||
uint64_t bytes;
|
||||
} blake2b_state_t;
|
||||
void zcash_blake2b_init(blake2b_state_t *st, uint8_t hash_len,
|
||||
uint32_t n, uint32_t k);
|
||||
void zcash_blake2b_update(blake2b_state_t *st, const uint8_t *_msg,
|
||||
uint32_t msg_len, uint32_t is_final);
|
||||
void zcash_blake2b_final(blake2b_state_t *st, uint8_t *out, uint8_t outlen);
|
|
@ -1,704 +0,0 @@
|
|||
#include "param.h"
|
||||
|
||||
/*
|
||||
** Assuming NR_ROWS_LOG == 16, the hash table slots have this layout (length in
|
||||
** bytes in parens):
|
||||
**
|
||||
** round 0, table 0: cnt(4) i(4) pad(0) Xi(23.0) pad(1)
|
||||
** round 1, table 1: cnt(4) i(4) pad(0.5) Xi(20.5) pad(3)
|
||||
** round 2, table 0: cnt(4) i(4) i(4) pad(0) Xi(18.0) pad(2)
|
||||
** round 3, table 1: cnt(4) i(4) i(4) pad(0.5) Xi(15.5) pad(4)
|
||||
** round 4, table 0: cnt(4) i(4) i(4) i(4) pad(0) Xi(13.0) pad(3)
|
||||
** round 5, table 1: cnt(4) i(4) i(4) i(4) pad(0.5) Xi(10.5) pad(5)
|
||||
** round 6, table 0: cnt(4) i(4) i(4) i(4) i(4) pad(0) Xi( 8.0) pad(4)
|
||||
** round 7, table 1: cnt(4) i(4) i(4) i(4) i(4) pad(0.5) Xi( 5.5) pad(6)
|
||||
** round 8, table 0: cnt(4) i(4) i(4) i(4) i(4) i(4) pad(0) Xi( 3.0) pad(5)
|
||||
**
|
||||
** If the first byte of Xi is 0xAB then:
|
||||
** - on even rounds, 'A' is part of the colliding PREFIX, 'B' is part of Xi
|
||||
** - on odd rounds, 'A' and 'B' are both part of the colliding PREFIX, but
|
||||
** 'A' is considered redundant padding as it was used to compute the row #
|
||||
**
|
||||
** - cnt is an atomic counter keeping track of the number of used slots.
|
||||
** it is used in the first slot only; subsequent slots replace it with
|
||||
** 4 padding bytes
|
||||
** - i encodes either the 21-bit input value (round 0) or a reference to two
|
||||
** inputs from the previous round
|
||||
**
|
||||
** Formula for Xi length and pad length above:
|
||||
** > for i in range(9):
|
||||
** > xi=(200-20*i-NR_ROWS_LOG)/8.; ci=8+4*((i)/2); print xi,32-ci-xi
|
||||
**
|
||||
** Note that the fractional .5-byte/4-bit padding following Xi for odd rounds
|
||||
** is the 4 most significant bits of the last byte of Xi.
|
||||
*/
|
||||
|
||||
__constant ulong blake_iv[] =
|
||||
{
|
||||
0x6a09e667f3bcc908, 0xbb67ae8584caa73b,
|
||||
0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1,
|
||||
0x510e527fade682d1, 0x9b05688c2b3e6c1f,
|
||||
0x1f83d9abfb41bd6b, 0x5be0cd19137e2179,
|
||||
};
|
||||
|
||||
/*
|
||||
** Reset counters in hash table.
|
||||
*/
|
||||
__kernel
|
||||
void kernel_init_ht(__global char *ht)
|
||||
{
|
||||
uint tid = get_global_id(0);
|
||||
*(__global uint *)(ht + tid * NR_SLOTS * SLOT_LEN) = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
** If xi0,xi1,xi2,xi3 are stored consecutively in little endian then they
|
||||
** represent (hex notation, group of 5 hex digits are a group of PREFIX bits):
|
||||
** aa aa ab bb bb cc cc cd dd... [round 0]
|
||||
** --------------------
|
||||
** ...ab bb bb cc cc cd dd... [odd round]
|
||||
** --------------
|
||||
** ...cc cc cd dd... [next even round]
|
||||
** -----
|
||||
** Bytes underlined are going to be stored in the slot. Preceding bytes
|
||||
** (and possibly part of the underlined bytes, depending on NR_ROWS_LOG) are
|
||||
** used to compute the row number.
|
||||
**
|
||||
** Round 0: xi0,xi1,xi2,xi3 is a 25-byte Xi (xi3: only the low byte matter)
|
||||
** Round 1: xi0,xi1,xi2 is a 23-byte Xi (incl. the colliding PREFIX nibble)
|
||||
** TODO: update lines below with padding nibbles
|
||||
** Round 2: xi0,xi1,xi2 is a 20-byte Xi (xi2: only the low 4 bytes matter)
|
||||
** Round 3: xi0,xi1,xi2 is a 17.5-byte Xi (xi2: only the low 1.5 bytes matter)
|
||||
** Round 4: xi0,xi1 is a 15-byte Xi (xi1: only the low 7 bytes matter)
|
||||
** Round 5: xi0,xi1 is a 12.5-byte Xi (xi1: only the low 4.5 bytes matter)
|
||||
** Round 6: xi0,xi1 is a 10-byte Xi (xi1: only the low 2 bytes matter)
|
||||
** Round 7: xi0 is a 7.5-byte Xi (xi0: only the low 7.5 bytes matter)
|
||||
** Round 8: xi0 is a 5-byte Xi (xi0: only the low 5 bytes matter)
|
||||
**
|
||||
** Return 0 if successfully stored, or 1 if the row overflowed.
|
||||
*/
|
||||
uint ht_store(uint round, __global char *ht, uint i,
|
||||
ulong xi0, ulong xi1, ulong xi2, ulong xi3)
|
||||
{
|
||||
uint row;
|
||||
__global char *p;
|
||||
uint cnt;
|
||||
#if NR_ROWS_LOG == 16
|
||||
if (!(round % 2))
|
||||
row = (xi0 & 0xffff);
|
||||
else
|
||||
// if we have in hex: "ab cd ef..." (little endian xi0) then this
|
||||
// formula computes the row as 0xdebc. it skips the 'a' nibble as it
|
||||
// is part of the PREFIX. The Xi will be stored starting with "ef...";
|
||||
// 'e' will be considered padding and 'f' is part of the current PREFIX
|
||||
row = ((xi0 & 0xf00) << 4) | ((xi0 & 0xf00000) >> 12) |
|
||||
((xi0 & 0xf) << 4) | ((xi0 & 0xf000) >> 12);
|
||||
#elif NR_ROWS_LOG == 18
|
||||
if (!(round % 2))
|
||||
row = (xi0 & 0xffff) | ((xi0 & 0xc00000) >> 6);
|
||||
else
|
||||
row = ((xi0 & 0xc0000) >> 2) |
|
||||
((xi0 & 0xf00) << 4) | ((xi0 & 0xf00000) >> 12) |
|
||||
((xi0 & 0xf) << 4) | ((xi0 & 0xf000) >> 12);
|
||||
#elif NR_ROWS_LOG == 19
|
||||
if (!(round % 2))
|
||||
row = (xi0 & 0xffff) | ((xi0 & 0xe00000) >> 5);
|
||||
else
|
||||
row = ((xi0 & 0xe0000) >> 1) |
|
||||
((xi0 & 0xf00) << 4) | ((xi0 & 0xf00000) >> 12) |
|
||||
((xi0 & 0xf) << 4) | ((xi0 & 0xf000) >> 12);
|
||||
#elif NR_ROWS_LOG == 20
|
||||
if (!(round % 2))
|
||||
row = (xi0 & 0xffff) | ((xi0 & 0xf00000) >> 4);
|
||||
else
|
||||
row = ((xi0 & 0xf0000) >> 0) |
|
||||
((xi0 & 0xf00) << 4) | ((xi0 & 0xf00000) >> 12) |
|
||||
((xi0 & 0xf) << 4) | ((xi0 & 0xf000) >> 12);
|
||||
#else
|
||||
#error "unsupported NR_ROWS_LOG"
|
||||
#endif
|
||||
xi0 = (xi0 >> 16) | (xi1 << (64 - 16));
|
||||
xi1 = (xi1 >> 16) | (xi2 << (64 - 16));
|
||||
xi2 = (xi2 >> 16) | (xi3 << (64 - 16));
|
||||
p = ht + row * NR_SLOTS * SLOT_LEN;
|
||||
cnt = atomic_inc((__global uint *)p);
|
||||
if (cnt >= NR_SLOTS)
|
||||
return 1;
|
||||
p += cnt * SLOT_LEN + xi_offset_for_round(round);
|
||||
// store "i" (always 4 bytes before Xi)
|
||||
*(__global uint *)(p - 4) = i;
|
||||
if (round == 0 || round == 1)
|
||||
{
|
||||
// store 24 bytes
|
||||
*(__global ulong *)(p + 0) = xi0;
|
||||
*(__global ulong *)(p + 8) = xi1;
|
||||
*(__global ulong *)(p + 16) = xi2;
|
||||
}
|
||||
else if (round == 2)
|
||||
{
|
||||
// store 20 bytes
|
||||
*(__global ulong *)(p + 0) = xi0;
|
||||
*(__global ulong *)(p + 8) = xi1;
|
||||
*(__global uint *)(p + 16) = xi2;
|
||||
}
|
||||
else if (round == 3 || round == 4)
|
||||
{
|
||||
// store 16 bytes
|
||||
*(__global ulong *)(p + 0) = xi0;
|
||||
*(__global ulong *)(p + 8) = xi1;
|
||||
|
||||
}
|
||||
else if (round == 5)
|
||||
{
|
||||
// store 12 bytes
|
||||
*(__global ulong *)(p + 0) = xi0;
|
||||
*(__global uint *)(p + 8) = xi1;
|
||||
}
|
||||
else if (round == 6 || round == 7)
|
||||
{
|
||||
// store 8 bytes
|
||||
*(__global ulong *)(p + 0) = xi0;
|
||||
}
|
||||
else if (round == 8)
|
||||
{
|
||||
// store 4 bytes
|
||||
*(__global uint *)(p + 0) = xi0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define mix(va, vb, vc, vd, x, y) \
|
||||
va = (va + vb + x); \
|
||||
vd = rotate((vd ^ va), (ulong)64 - 32); \
|
||||
vc = (vc + vd); \
|
||||
vb = rotate((vb ^ vc), (ulong)64 - 24); \
|
||||
va = (va + vb + y); \
|
||||
vd = rotate((vd ^ va), (ulong)64 - 16); \
|
||||
vc = (vc + vd); \
|
||||
vb = rotate((vb ^ vc), (ulong)64 - 63);
|
||||
|
||||
/*
|
||||
** Execute round 0 (blake).
|
||||
**
|
||||
** Note: making the work group size less than or equal to the wavefront size
|
||||
** allows the OpenCL compiler to remove the barrier() calls, see "2.2 Local
|
||||
** Memory (LDS) Optimization 2-10" in:
|
||||
** http://developer.amd.com/tools-and-sdks/opencl-zone/amd-accelerated-parallel-processing-app-sdk/opencl-optimization-guide/
|
||||
*/
|
||||
__kernel __attribute__((reqd_work_group_size(64, 1, 1)))
|
||||
void kernel_round0(__global ulong *blake_state, __global char *ht,
|
||||
__global uint *debug)
|
||||
{
|
||||
uint tid = get_global_id(0);
|
||||
ulong v[16];
|
||||
uint inputs_per_thread = NR_INPUTS / get_global_size(0);
|
||||
uint input = tid * inputs_per_thread;
|
||||
uint input_end = (tid + 1) * inputs_per_thread;
|
||||
uint dropped = 0;
|
||||
while (input < input_end)
|
||||
{
|
||||
// shift "i" to occupy the high 32 bits of the second ulong word in the
|
||||
// message block
|
||||
ulong word1 = (ulong)input << 32;
|
||||
// init vector v
|
||||
v[0] = blake_state[0];
|
||||
v[1] = blake_state[1];
|
||||
v[2] = blake_state[2];
|
||||
v[3] = blake_state[3];
|
||||
v[4] = blake_state[4];
|
||||
v[5] = blake_state[5];
|
||||
v[6] = blake_state[6];
|
||||
v[7] = blake_state[7];
|
||||
v[8] = blake_iv[0];
|
||||
v[9] = blake_iv[1];
|
||||
v[10] = blake_iv[2];
|
||||
v[11] = blake_iv[3];
|
||||
v[12] = blake_iv[4];
|
||||
v[13] = blake_iv[5];
|
||||
v[14] = blake_iv[6];
|
||||
v[15] = blake_iv[7];
|
||||
// mix in length of data
|
||||
v[12] ^= ZCASH_BLOCK_HEADER_LEN + 4 /* length of "i" */;
|
||||
// last block
|
||||
v[14] ^= -1;
|
||||
|
||||
// round 1
|
||||
mix(v[0], v[4], v[8], v[12], 0, word1);
|
||||
mix(v[1], v[5], v[9], v[13], 0, 0);
|
||||
mix(v[2], v[6], v[10], v[14], 0, 0);
|
||||
mix(v[3], v[7], v[11], v[15], 0, 0);
|
||||
mix(v[0], v[5], v[10], v[15], 0, 0);
|
||||
mix(v[1], v[6], v[11], v[12], 0, 0);
|
||||
mix(v[2], v[7], v[8], v[13], 0, 0);
|
||||
mix(v[3], v[4], v[9], v[14], 0, 0);
|
||||
// round 2
|
||||
mix(v[0], v[4], v[8], v[12], 0, 0);
|
||||
mix(v[1], v[5], v[9], v[13], 0, 0);
|
||||
mix(v[2], v[6], v[10], v[14], 0, 0);
|
||||
mix(v[3], v[7], v[11], v[15], 0, 0);
|
||||
mix(v[0], v[5], v[10], v[15], word1, 0);
|
||||
mix(v[1], v[6], v[11], v[12], 0, 0);
|
||||
mix(v[2], v[7], v[8], v[13], 0, 0);
|
||||
mix(v[3], v[4], v[9], v[14], 0, 0);
|
||||
// round 3
|
||||
mix(v[0], v[4], v[8], v[12], 0, 0);
|
||||
mix(v[1], v[5], v[9], v[13], 0, 0);
|
||||
mix(v[2], v[6], v[10], v[14], 0, 0);
|
||||
mix(v[3], v[7], v[11], v[15], 0, 0);
|
||||
mix(v[0], v[5], v[10], v[15], 0, 0);
|
||||
mix(v[1], v[6], v[11], v[12], 0, 0);
|
||||
mix(v[2], v[7], v[8], v[13], 0, word1);
|
||||
mix(v[3], v[4], v[9], v[14], 0, 0);
|
||||
// round 4
|
||||
mix(v[0], v[4], v[8], v[12], 0, 0);
|
||||
mix(v[1], v[5], v[9], v[13], 0, word1);
|
||||
mix(v[2], v[6], v[10], v[14], 0, 0);
|
||||
mix(v[3], v[7], v[11], v[15], 0, 0);
|
||||
mix(v[0], v[5], v[10], v[15], 0, 0);
|
||||
mix(v[1], v[6], v[11], v[12], 0, 0);
|
||||
mix(v[2], v[7], v[8], v[13], 0, 0);
|
||||
mix(v[3], v[4], v[9], v[14], 0, 0);
|
||||
// round 5
|
||||
mix(v[0], v[4], v[8], v[12], 0, 0);
|
||||
mix(v[1], v[5], v[9], v[13], 0, 0);
|
||||
mix(v[2], v[6], v[10], v[14], 0, 0);
|
||||
mix(v[3], v[7], v[11], v[15], 0, 0);
|
||||
mix(v[0], v[5], v[10], v[15], 0, word1);
|
||||
mix(v[1], v[6], v[11], v[12], 0, 0);
|
||||
mix(v[2], v[7], v[8], v[13], 0, 0);
|
||||
mix(v[3], v[4], v[9], v[14], 0, 0);
|
||||
// round 6
|
||||
mix(v[0], v[4], v[8], v[12], 0, 0);
|
||||
mix(v[1], v[5], v[9], v[13], 0, 0);
|
||||
mix(v[2], v[6], v[10], v[14], 0, 0);
|
||||
mix(v[3], v[7], v[11], v[15], 0, 0);
|
||||
mix(v[0], v[5], v[10], v[15], 0, 0);
|
||||
mix(v[1], v[6], v[11], v[12], 0, 0);
|
||||
mix(v[2], v[7], v[8], v[13], 0, 0);
|
||||
mix(v[3], v[4], v[9], v[14], word1, 0);
|
||||
// round 7
|
||||
mix(v[0], v[4], v[8], v[12], 0, 0);
|
||||
mix(v[1], v[5], v[9], v[13], word1, 0);
|
||||
mix(v[2], v[6], v[10], v[14], 0, 0);
|
||||
mix(v[3], v[7], v[11], v[15], 0, 0);
|
||||
mix(v[0], v[5], v[10], v[15], 0, 0);
|
||||
mix(v[1], v[6], v[11], v[12], 0, 0);
|
||||
mix(v[2], v[7], v[8], v[13], 0, 0);
|
||||
mix(v[3], v[4], v[9], v[14], 0, 0);
|
||||
// round 8
|
||||
mix(v[0], v[4], v[8], v[12], 0, 0);
|
||||
mix(v[1], v[5], v[9], v[13], 0, 0);
|
||||
mix(v[2], v[6], v[10], v[14], 0, word1);
|
||||
mix(v[3], v[7], v[11], v[15], 0, 0);
|
||||
mix(v[0], v[5], v[10], v[15], 0, 0);
|
||||
mix(v[1], v[6], v[11], v[12], 0, 0);
|
||||
mix(v[2], v[7], v[8], v[13], 0, 0);
|
||||
mix(v[3], v[4], v[9], v[14], 0, 0);
|
||||
// round 9
|
||||
mix(v[0], v[4], v[8], v[12], 0, 0);
|
||||
mix(v[1], v[5], v[9], v[13], 0, 0);
|
||||
mix(v[2], v[6], v[10], v[14], 0, 0);
|
||||
mix(v[3], v[7], v[11], v[15], 0, 0);
|
||||
mix(v[0], v[5], v[10], v[15], 0, 0);
|
||||
mix(v[1], v[6], v[11], v[12], 0, 0);
|
||||
mix(v[2], v[7], v[8], v[13], word1, 0);
|
||||
mix(v[3], v[4], v[9], v[14], 0, 0);
|
||||
// round 10
|
||||
mix(v[0], v[4], v[8], v[12], 0, 0);
|
||||
mix(v[1], v[5], v[9], v[13], 0, 0);
|
||||
mix(v[2], v[6], v[10], v[14], 0, 0);
|
||||
mix(v[3], v[7], v[11], v[15], word1, 0);
|
||||
mix(v[0], v[5], v[10], v[15], 0, 0);
|
||||
mix(v[1], v[6], v[11], v[12], 0, 0);
|
||||
mix(v[2], v[7], v[8], v[13], 0, 0);
|
||||
mix(v[3], v[4], v[9], v[14], 0, 0);
|
||||
// round 11
|
||||
mix(v[0], v[4], v[8], v[12], 0, word1);
|
||||
mix(v[1], v[5], v[9], v[13], 0, 0);
|
||||
mix(v[2], v[6], v[10], v[14], 0, 0);
|
||||
mix(v[3], v[7], v[11], v[15], 0, 0);
|
||||
mix(v[0], v[5], v[10], v[15], 0, 0);
|
||||
mix(v[1], v[6], v[11], v[12], 0, 0);
|
||||
mix(v[2], v[7], v[8], v[13], 0, 0);
|
||||
mix(v[3], v[4], v[9], v[14], 0, 0);
|
||||
// round 12
|
||||
mix(v[0], v[4], v[8], v[12], 0, 0);
|
||||
mix(v[1], v[5], v[9], v[13], 0, 0);
|
||||
mix(v[2], v[6], v[10], v[14], 0, 0);
|
||||
mix(v[3], v[7], v[11], v[15], 0, 0);
|
||||
mix(v[0], v[5], v[10], v[15], word1, 0);
|
||||
mix(v[1], v[6], v[11], v[12], 0, 0);
|
||||
mix(v[2], v[7], v[8], v[13], 0, 0);
|
||||
mix(v[3], v[4], v[9], v[14], 0, 0);
|
||||
|
||||
// compress v into the blake state; this produces the 50-byte hash
|
||||
// (two Xi values)
|
||||
ulong h[7];
|
||||
h[0] = blake_state[0] ^ v[0] ^ v[8];
|
||||
h[1] = blake_state[1] ^ v[1] ^ v[9];
|
||||
h[2] = blake_state[2] ^ v[2] ^ v[10];
|
||||
h[3] = blake_state[3] ^ v[3] ^ v[11];
|
||||
h[4] = blake_state[4] ^ v[4] ^ v[12];
|
||||
h[5] = blake_state[5] ^ v[5] ^ v[13];
|
||||
h[6] = (blake_state[6] ^ v[6] ^ v[14]) & 0xffff;
|
||||
|
||||
// store the two Xi values in the hash table
|
||||
#if ZCASH_HASH_LEN == 50
|
||||
dropped += ht_store(0, ht, input * 2,
|
||||
h[0],
|
||||
h[1],
|
||||
h[2],
|
||||
h[3]);
|
||||
dropped += ht_store(0, ht, input * 2 + 1,
|
||||
(h[3] >> 8) | (h[4] << (64 - 8)),
|
||||
(h[4] >> 8) | (h[5] << (64 - 8)),
|
||||
(h[5] >> 8) | (h[6] << (64 - 8)),
|
||||
(h[6] >> 8));
|
||||
#else
|
||||
#error "unsupported ZCASH_HASH_LEN"
|
||||
#endif
|
||||
|
||||
input++;
|
||||
}
|
||||
#ifdef ENABLE_DEBUG
|
||||
debug[tid * 2] = 0;
|
||||
debug[tid * 2 + 1] = dropped;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if NR_ROWS_LOG <= 16 && NR_SLOTS <= (1 << 8)
|
||||
|
||||
#define ENCODE_INPUTS(row, slot0, slot1) \
|
||||
((row << 16) | ((slot1 & 0xff) << 8) | (slot0 & 0xff))
|
||||
#define DECODE_ROW(REF) (REF >> 16)
|
||||
#define DECODE_SLOT1(REF) ((REF >> 8) & 0xff)
|
||||
#define DECODE_SLOT0(REF) (REF & 0xff)
|
||||
|
||||
#elif NR_ROWS_LOG == 18 && NR_SLOTS <= (1 << 7)
|
||||
|
||||
#define ENCODE_INPUTS(row, slot0, slot1) \
|
||||
((row << 14) | ((slot1 & 0x7f) << 7) | (slot0 & 0x7f))
|
||||
#define DECODE_ROW(REF) (REF >> 14)
|
||||
#define DECODE_SLOT1(REF) ((REF >> 7) & 0x7f)
|
||||
#define DECODE_SLOT0(REF) (REF & 0x7f)
|
||||
|
||||
#elif NR_ROWS_LOG == 19 && NR_SLOTS <= (1 << 6)
|
||||
|
||||
#define ENCODE_INPUTS(row, slot0, slot1) \
|
||||
((row << 13) | ((slot1 & 0x3f) << 6) | (slot0 & 0x3f)) /* 1 spare bit */
|
||||
#define DECODE_ROW(REF) (REF >> 13)
|
||||
#define DECODE_SLOT1(REF) ((REF >> 6) & 0x3f)
|
||||
#define DECODE_SLOT0(REF) (REF & 0x3f)
|
||||
|
||||
#elif NR_ROWS_LOG == 20 && NR_SLOTS <= (1 << 6)
|
||||
|
||||
#define ENCODE_INPUTS(row, slot0, slot1) \
|
||||
((row << 12) | ((slot1 & 0x3f) << 6) | (slot0 & 0x3f))
|
||||
#define DECODE_ROW(REF) (REF >> 12)
|
||||
#define DECODE_SLOT1(REF) ((REF >> 6) & 0x3f)
|
||||
#define DECODE_SLOT0(REF) (REF & 0x3f)
|
||||
|
||||
#else
|
||||
#error "unsupported NR_ROWS_LOG"
|
||||
#endif
|
||||
|
||||
/*
|
||||
** XOR a pair of Xi values computed at "round - 1" and store the result in the
|
||||
** hash table being built for "round". Note that when building the table for
|
||||
** even rounds we need to skip 1 padding byte present in the "round - 1" table
|
||||
** (the "0xAB" byte mentioned in the description at the top of this file.) But
|
||||
** also note we can't load data directly past this byte because this would
|
||||
** cause an unaligned memory access which is undefined per the OpenCL spec.
|
||||
**
|
||||
** Return 0 if successfully stored, or 1 if the row overflowed.
|
||||
*/
|
||||
uint xor_and_store(uint round, __global char *ht_dst, uint row,
|
||||
uint slot_a, uint slot_b, __global ulong *a, __global ulong *b)
|
||||
{
|
||||
ulong xi0, xi1, xi2;
|
||||
#if NR_ROWS_LOG >= 16 && NR_ROWS_LOG <= 20
|
||||
// Note: for NR_ROWS_LOG == 20, for odd rounds, we could optimize by not
|
||||
// storing the byte containing bits from the previous PREFIX block for
|
||||
if (round == 1 || round == 2)
|
||||
{
|
||||
// xor 24 bytes
|
||||
xi0 = *(a++) ^ *(b++);
|
||||
xi1 = *(a++) ^ *(b++);
|
||||
xi2 = *a ^ *b;
|
||||
if (round == 2)
|
||||
{
|
||||
// skip padding byte
|
||||
xi0 = (xi0 >> 8) | (xi1 << (64 - 8));
|
||||
xi1 = (xi1 >> 8) | (xi2 << (64 - 8));
|
||||
xi2 = (xi2 >> 8);
|
||||
}
|
||||
}
|
||||
else if (round == 3)
|
||||
{
|
||||
// xor 20 bytes
|
||||
xi0 = *a++ ^ *b++;
|
||||
xi1 = *a++ ^ *b++;
|
||||
xi2 = *(__global uint *)a ^ *(__global uint *)b;
|
||||
}
|
||||
else if (round == 4 || round == 5)
|
||||
{
|
||||
// xor 16 bytes
|
||||
xi0 = *a++ ^ *b++;
|
||||
xi1 = *a ^ *b;
|
||||
xi2 = 0;
|
||||
if (round == 4)
|
||||
{
|
||||
// skip padding byte
|
||||
xi0 = (xi0 >> 8) | (xi1 << (64 - 8));
|
||||
xi1 = (xi1 >> 8);
|
||||
}
|
||||
}
|
||||
else if (round == 6)
|
||||
{
|
||||
// xor 12 bytes
|
||||
xi0 = *a++ ^ *b++;
|
||||
xi1 = *(__global uint *)a ^ *(__global uint *)b;
|
||||
xi2 = 0;
|
||||
if (round == 6)
|
||||
{
|
||||
// skip padding byte
|
||||
xi0 = (xi0 >> 8) | (xi1 << (64 - 8));
|
||||
xi1 = (xi1 >> 8);
|
||||
}
|
||||
}
|
||||
else if (round == 7 || round == 8)
|
||||
{
|
||||
// xor 8 bytes
|
||||
xi0 = *a ^ *b;
|
||||
xi1 = 0;
|
||||
xi2 = 0;
|
||||
if (round == 8)
|
||||
{
|
||||
// skip padding byte
|
||||
xi0 = (xi0 >> 8);
|
||||
}
|
||||
}
|
||||
// invalid solutions (which start happenning in round 5) have duplicate
|
||||
// inputs and xor to zero, so discard them
|
||||
if (!xi0 && !xi1)
|
||||
return 0;
|
||||
#else
|
||||
#error "unsupported NR_ROWS_LOG"
|
||||
#endif
|
||||
return ht_store(round, ht_dst, ENCODE_INPUTS(row, slot_a, slot_b),
|
||||
xi0, xi1, xi2, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
** Execute one Equihash round. Read from ht_src, XOR colliding pairs of Xi,
|
||||
** store them in ht_dst.
|
||||
*/
|
||||
void equihash_round(uint round, __global char *ht_src, __global char *ht_dst,
|
||||
__global uint *debug)
|
||||
{
|
||||
uint tid = get_global_id(0);
|
||||
uint tlid = get_local_id(0);
|
||||
__global char *p;
|
||||
uint cnt;
|
||||
uchar first_words[NR_SLOTS];
|
||||
uchar mask;
|
||||
uint i, j;
|
||||
// NR_SLOTS is already oversized (by a factor of OVERHEAD), but we want to
|
||||
// make it even larger
|
||||
ushort collisions[NR_SLOTS * 3];
|
||||
uint nr_coll = 0;
|
||||
uint n;
|
||||
uint dropped_coll, dropped_stor;
|
||||
__global ulong *a, *b;
|
||||
uint xi_offset;
|
||||
// read first words of Xi from the previous (round - 1) hash table
|
||||
xi_offset = xi_offset_for_round(round - 1);
|
||||
// the mask is also computed to read data from the previous round
|
||||
#if NR_ROWS_LOG == 16
|
||||
mask = ((!(round % 2)) ? 0x0f : 0xf0);
|
||||
#elif NR_ROWS_LOG == 18
|
||||
mask = ((!(round % 2)) ? 0x03 : 0x30);
|
||||
#elif NR_ROWS_LOG == 19
|
||||
mask = ((!(round % 2)) ? 0x01 : 0x10);
|
||||
#elif NR_ROWS_LOG == 20
|
||||
mask = 0; /* we can vastly simplify the code below */
|
||||
#else
|
||||
#error "unsupported NR_ROWS_LOG"
|
||||
#endif
|
||||
p = (ht_src + tid * NR_SLOTS * SLOT_LEN);
|
||||
cnt = *(__global uint *)p;
|
||||
cnt = min(cnt, (uint)NR_SLOTS); // handle possible overflow in prev. round
|
||||
p += xi_offset;
|
||||
for (i = 0; i < cnt; i++, p += SLOT_LEN)
|
||||
first_words[i] = *(__global uchar *)p;
|
||||
// find collisions
|
||||
nr_coll = 0;
|
||||
dropped_coll = 0;
|
||||
for (i = 0; i < cnt; i++)
|
||||
for (j = i + 1; j < cnt; j++)
|
||||
if ((first_words[i] & mask) ==
|
||||
(first_words[j] & mask))
|
||||
{
|
||||
// collision!
|
||||
if (nr_coll >= sizeof (collisions) / sizeof (*collisions))
|
||||
dropped_coll++;
|
||||
else
|
||||
#if NR_SLOTS <= (1 << 8)
|
||||
// note: this assumes slots can be encoded in 8 bits
|
||||
collisions[nr_coll++] =
|
||||
((ushort)j << 8) | ((ushort)i & 0xff);
|
||||
#else
|
||||
#error "unsupported NR_SLOTS"
|
||||
#endif
|
||||
}
|
||||
// XOR colliding pairs of Xi
|
||||
dropped_stor = 0;
|
||||
for (n = 0; n < nr_coll; n++)
|
||||
{
|
||||
i = collisions[n] & 0xff;
|
||||
j = collisions[n] >> 8;
|
||||
a = (__global ulong *)
|
||||
(ht_src + tid * NR_SLOTS * SLOT_LEN + i * SLOT_LEN + xi_offset);
|
||||
b = (__global ulong *)
|
||||
(ht_src + tid * NR_SLOTS * SLOT_LEN + j * SLOT_LEN + xi_offset);
|
||||
dropped_stor += xor_and_store(round, ht_dst, tid, i, j, a, b);
|
||||
}
|
||||
if (round < 8)
|
||||
// reset the counter in preparation of the next round
|
||||
*(__global uint *)(ht_src + tid * NR_SLOTS * SLOT_LEN) = 0;
|
||||
#ifdef ENABLE_DEBUG
|
||||
debug[tid * 2] = dropped_coll;
|
||||
debug[tid * 2 + 1] = dropped_stor;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
** This defines kernel_round1, kernel_round2, ..., kernel_round7.
|
||||
*/
|
||||
#define KERNEL_ROUND(N) \
|
||||
__kernel __attribute__((reqd_work_group_size(64, 1, 1))) \
|
||||
void kernel_round ## N(__global char *ht_src, __global char *ht_dst, \
|
||||
__global uint *debug) \
|
||||
{ \
|
||||
equihash_round(N, ht_src, ht_dst, debug); \
|
||||
}
|
||||
KERNEL_ROUND(1)
|
||||
KERNEL_ROUND(2)
|
||||
KERNEL_ROUND(3)
|
||||
KERNEL_ROUND(4)
|
||||
KERNEL_ROUND(5)
|
||||
KERNEL_ROUND(6)
|
||||
KERNEL_ROUND(7)
|
||||
|
||||
// kernel_round8 takes an extra argument, "sols"
|
||||
__kernel __attribute__((reqd_work_group_size(64, 1, 1)))
|
||||
void kernel_round8(__global char *ht_src, __global char *ht_dst,
|
||||
__global uint *debug, __global sols_t *sols)
|
||||
{
|
||||
uint tid = get_global_id(0);
|
||||
equihash_round(8, ht_src, ht_dst, debug);
|
||||
if (!tid)
|
||||
sols->nr = sols->likely_invalids = 0;
|
||||
}
|
||||
|
||||
uint expand_ref(__global char *ht, uint xi_offset, uint row, uint slot)
|
||||
{
|
||||
return *(__global uint *)(ht + row * NR_SLOTS * SLOT_LEN +
|
||||
slot * SLOT_LEN + xi_offset - 4);
|
||||
}
|
||||
|
||||
void expand_refs(__global uint *ins, uint nr_inputs, __global char **htabs,
|
||||
uint round)
|
||||
{
|
||||
__global char *ht = htabs[round % 2];
|
||||
uint i = nr_inputs - 1;
|
||||
uint j = nr_inputs * 2 - 1;
|
||||
uint xi_offset = xi_offset_for_round(round);
|
||||
do
|
||||
{
|
||||
ins[j] = expand_ref(ht, xi_offset,
|
||||
DECODE_ROW(ins[i]), DECODE_SLOT1(ins[i]));
|
||||
ins[j - 1] = expand_ref(ht, xi_offset,
|
||||
DECODE_ROW(ins[i]), DECODE_SLOT0(ins[i]));
|
||||
if (!i)
|
||||
break ;
|
||||
i--;
|
||||
j -= 2;
|
||||
}
|
||||
while (1);
|
||||
}
|
||||
|
||||
/*
|
||||
** Verify if a potential solution is in fact valid.
|
||||
*/
|
||||
void potential_sol(__global char **htabs, __global sols_t *sols,
|
||||
uint ref0, uint ref1)
|
||||
{
|
||||
uint sol_i;
|
||||
uint nr_values;
|
||||
sol_i = atomic_inc(&sols->nr);
|
||||
if (sol_i >= MAX_SOLS)
|
||||
return ;
|
||||
sols->valid[sol_i] = 0;
|
||||
nr_values = 0;
|
||||
sols->values[sol_i][nr_values++] = ref0;
|
||||
sols->values[sol_i][nr_values++] = ref1;
|
||||
uint round = PARAM_K - 1;
|
||||
do
|
||||
{
|
||||
round--;
|
||||
expand_refs(&(sols->values[sol_i][0]), nr_values, htabs, round);
|
||||
nr_values *= 2;
|
||||
}
|
||||
while (round > 0);
|
||||
sols->valid[sol_i] = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
** Scan the hash tables to find Equihash solutions.
|
||||
*/
|
||||
__kernel
|
||||
void kernel_sols(__global char *ht0, __global char *ht1, __global sols_t *sols)
|
||||
{
|
||||
uint tid = get_global_id(0);
|
||||
__global char *htabs[2] = { ht0, ht1 };
|
||||
uint ht_i = (PARAM_K - 1) % 2; // table filled at last round
|
||||
uint cnt;
|
||||
uint xi_offset = xi_offset_for_round(PARAM_K - 1);
|
||||
uint i, j;
|
||||
__global char *a, *b;
|
||||
uint ref_i, ref_j;
|
||||
// it's ok for the collisions array to be so small, as if it fills up
|
||||
// the potential solutions are likely invalid (many duplicate inputs)
|
||||
ulong collisions[5];
|
||||
uint coll;
|
||||
#if NR_ROWS_LOG >= 16 && NR_ROWS_LOG <= 20
|
||||
// in the final hash table, we are looking for a match on both the bits
|
||||
// part of the previous PREFIX colliding bits, and the last PREFIX bits.
|
||||
uint mask = 0xffffff;
|
||||
#else
|
||||
#error "unsupported NR_ROWS_LOG"
|
||||
#endif
|
||||
a = htabs[ht_i] + tid * NR_SLOTS * SLOT_LEN;
|
||||
cnt = *(__global uint *)a;
|
||||
cnt = min(cnt, (uint)NR_SLOTS); // handle possible overflow in last round
|
||||
coll = 0;
|
||||
a += xi_offset;
|
||||
for (i = 0; i < cnt; i++, a += SLOT_LEN)
|
||||
for (j = i + 1, b = a + SLOT_LEN; j < cnt; j++, b += SLOT_LEN)
|
||||
if (((*(__global uint *)a) & mask) ==
|
||||
((*(__global uint *)b) & mask))
|
||||
{
|
||||
ref_i = *(__global uint *)(a - 4);
|
||||
ref_j = *(__global uint *)(b - 4);
|
||||
if (coll < sizeof (collisions) / sizeof (*collisions))
|
||||
collisions[coll++] = ((ulong)ref_i << 32) | ref_j;
|
||||
else
|
||||
atomic_inc(&sols->likely_invalids);
|
||||
}
|
||||
if (!coll)
|
||||
return ;
|
||||
for (i = 0; i < coll; i++)
|
||||
potential_sol(htabs, sols, collisions[i] >> 32,
|
||||
collisions[i] & 0xffffffff);
|
||||
}
|
|
@ -1,555 +0,0 @@
|
|||
# 1 "input.cl"
|
||||
# 1 "<built-in>"
|
||||
# 1 "<command-line>"
|
||||
# 1 "/usr/include/stdc-predef.h" 1 3 4
|
||||
# 1 "<command-line>" 2
|
||||
# 1 "input.cl"
|
||||
# 1 "param.h" 1
|
||||
# 60 "param.h"
|
||||
typedef struct sols_s
|
||||
{
|
||||
uint nr;
|
||||
uint likely_invalids;
|
||||
uchar valid[2000];
|
||||
uint values[2000][(1 << 9)];
|
||||
} sols_t;
|
||||
# 2 "input.cl" 2
|
||||
# 36 "input.cl"
|
||||
__constant ulong blake_iv[] =
|
||||
{
|
||||
0x6a09e667f3bcc908, 0xbb67ae8584caa73b,
|
||||
0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1,
|
||||
0x510e527fade682d1, 0x9b05688c2b3e6c1f,
|
||||
0x1f83d9abfb41bd6b, 0x5be0cd19137e2179,
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
__kernel
|
||||
void kernel_init_ht(__global char *ht)
|
||||
{
|
||||
uint tid = get_global_id(0);
|
||||
*(__global uint *)(ht + tid * ((1 << (((200 / (9 + 1)) + 1) - 20)) * 9) * 32) = 0;
|
||||
}
|
||||
# 80 "input.cl"
|
||||
uint ht_store(uint round, __global char *ht, uint i,
|
||||
ulong xi0, ulong xi1, ulong xi2, ulong xi3)
|
||||
{
|
||||
uint row;
|
||||
__global char *p;
|
||||
uint cnt;
|
||||
# 111 "input.cl"
|
||||
if (!(round % 2))
|
||||
row = (xi0 & 0xffff) | ((xi0 & 0xf00000) >> 4);
|
||||
else
|
||||
row = ((xi0 & 0xf0000) >> 0) |
|
||||
((xi0 & 0xf00) << 4) | ((xi0 & 0xf00000) >> 12) |
|
||||
((xi0 & 0xf) << 4) | ((xi0 & 0xf000) >> 12);
|
||||
|
||||
|
||||
|
||||
xi0 = (xi0 >> 16) | (xi1 << (64 - 16));
|
||||
xi1 = (xi1 >> 16) | (xi2 << (64 - 16));
|
||||
xi2 = (xi2 >> 16) | (xi3 << (64 - 16));
|
||||
p = ht + row * ((1 << (((200 / (9 + 1)) + 1) - 20)) * 9) * 32;
|
||||
cnt = atomic_inc((__global uint *)p);
|
||||
if (cnt >= ((1 << (((200 / (9 + 1)) + 1) - 20)) * 9))
|
||||
return 1;
|
||||
p += cnt * 32 + (8 + ((round) / 2) * 4);
|
||||
|
||||
*(__global uint *)(p - 4) = i;
|
||||
if (round == 0 || round == 1)
|
||||
{
|
||||
|
||||
*(__global ulong *)(p + 0) = xi0;
|
||||
*(__global ulong *)(p + 8) = xi1;
|
||||
*(__global ulong *)(p + 16) = xi2;
|
||||
}
|
||||
else if (round == 2)
|
||||
{
|
||||
|
||||
*(__global ulong *)(p + 0) = xi0;
|
||||
*(__global ulong *)(p + 8) = xi1;
|
||||
*(__global uint *)(p + 16) = xi2;
|
||||
}
|
||||
else if (round == 3 || round == 4)
|
||||
{
|
||||
|
||||
*(__global ulong *)(p + 0) = xi0;
|
||||
*(__global ulong *)(p + 8) = xi1;
|
||||
|
||||
}
|
||||
else if (round == 5)
|
||||
{
|
||||
|
||||
*(__global ulong *)(p + 0) = xi0;
|
||||
*(__global uint *)(p + 8) = xi1;
|
||||
}
|
||||
else if (round == 6 || round == 7)
|
||||
{
|
||||
|
||||
*(__global ulong *)(p + 0) = xi0;
|
||||
}
|
||||
else if (round == 8)
|
||||
{
|
||||
|
||||
*(__global uint *)(p + 0) = xi0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
# 188 "input.cl"
|
||||
__kernel __attribute__((reqd_work_group_size(64, 1, 1)))
|
||||
void kernel_round0(__global ulong *blake_state, __global char *ht,
|
||||
__global uint *debug)
|
||||
{
|
||||
uint tid = get_global_id(0);
|
||||
ulong v[16];
|
||||
uint inputs_per_thread = (1 << (200 / (9 + 1))) / get_global_size(0);
|
||||
uint input = tid * inputs_per_thread;
|
||||
uint input_end = (tid + 1) * inputs_per_thread;
|
||||
uint dropped = 0;
|
||||
while (input < input_end)
|
||||
{
|
||||
|
||||
|
||||
ulong word1 = (ulong)input << 32;
|
||||
|
||||
v[0] = blake_state[0];
|
||||
v[1] = blake_state[1];
|
||||
v[2] = blake_state[2];
|
||||
v[3] = blake_state[3];
|
||||
v[4] = blake_state[4];
|
||||
v[5] = blake_state[5];
|
||||
v[6] = blake_state[6];
|
||||
v[7] = blake_state[7];
|
||||
v[8] = blake_iv[0];
|
||||
v[9] = blake_iv[1];
|
||||
v[10] = blake_iv[2];
|
||||
v[11] = blake_iv[3];
|
||||
v[12] = blake_iv[4];
|
||||
v[13] = blake_iv[5];
|
||||
v[14] = blake_iv[6];
|
||||
v[15] = blake_iv[7];
|
||||
|
||||
v[12] ^= 140 + 4 ;
|
||||
|
||||
v[14] ^= -1;
|
||||
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + word1); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + word1); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + word1); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + word1); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + word1); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + word1); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + word1); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + word1); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + word1); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + word1); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + word1); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + 0); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + 0); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + 0); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + 0); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);;
|
||||
v[0] = (v[0] + v[5] + word1); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + 0); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);;
|
||||
v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + 0); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);;
|
||||
v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + 0); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);;
|
||||
v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + 0); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);;
|
||||
|
||||
|
||||
|
||||
ulong h[7];
|
||||
h[0] = blake_state[0] ^ v[0] ^ v[8];
|
||||
h[1] = blake_state[1] ^ v[1] ^ v[9];
|
||||
h[2] = blake_state[2] ^ v[2] ^ v[10];
|
||||
h[3] = blake_state[3] ^ v[3] ^ v[11];
|
||||
h[4] = blake_state[4] ^ v[4] ^ v[12];
|
||||
h[5] = blake_state[5] ^ v[5] ^ v[13];
|
||||
h[6] = (blake_state[6] ^ v[6] ^ v[14]) & 0xffff;
|
||||
|
||||
|
||||
|
||||
dropped += ht_store(0, ht, input * 2,
|
||||
h[0],
|
||||
h[1],
|
||||
h[2],
|
||||
h[3]);
|
||||
dropped += ht_store(0, ht, input * 2 + 1,
|
||||
(h[3] >> 8) | (h[4] << (64 - 8)),
|
||||
(h[4] >> 8) | (h[5] << (64 - 8)),
|
||||
(h[5] >> 8) | (h[6] << (64 - 8)),
|
||||
(h[6] >> 8));
|
||||
|
||||
|
||||
|
||||
|
||||
input++;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
# 415 "input.cl"
|
||||
uint xor_and_store(uint round, __global char *ht_dst, uint row,
|
||||
uint slot_a, uint slot_b, __global ulong *a, __global ulong *b)
|
||||
{
|
||||
ulong xi0, xi1, xi2;
|
||||
|
||||
|
||||
|
||||
if (round == 1 || round == 2)
|
||||
{
|
||||
|
||||
xi0 = *(a++) ^ *(b++);
|
||||
xi1 = *(a++) ^ *(b++);
|
||||
xi2 = *a ^ *b;
|
||||
if (round == 2)
|
||||
{
|
||||
|
||||
xi0 = (xi0 >> 8) | (xi1 << (64 - 8));
|
||||
xi1 = (xi1 >> 8) | (xi2 << (64 - 8));
|
||||
xi2 = (xi2 >> 8);
|
||||
}
|
||||
}
|
||||
else if (round == 3)
|
||||
{
|
||||
|
||||
xi0 = *a++ ^ *b++;
|
||||
xi1 = *a++ ^ *b++;
|
||||
xi2 = *(__global uint *)a ^ *(__global uint *)b;
|
||||
}
|
||||
else if (round == 4 || round == 5)
|
||||
{
|
||||
|
||||
xi0 = *a++ ^ *b++;
|
||||
xi1 = *a ^ *b;
|
||||
xi2 = 0;
|
||||
if (round == 4)
|
||||
{
|
||||
|
||||
xi0 = (xi0 >> 8) | (xi1 << (64 - 8));
|
||||
xi1 = (xi1 >> 8);
|
||||
}
|
||||
}
|
||||
else if (round == 6)
|
||||
{
|
||||
|
||||
xi0 = *a++ ^ *b++;
|
||||
xi1 = *(__global uint *)a ^ *(__global uint *)b;
|
||||
xi2 = 0;
|
||||
if (round == 6)
|
||||
{
|
||||
|
||||
xi0 = (xi0 >> 8) | (xi1 << (64 - 8));
|
||||
xi1 = (xi1 >> 8);
|
||||
}
|
||||
}
|
||||
else if (round == 7 || round == 8)
|
||||
{
|
||||
|
||||
xi0 = *a ^ *b;
|
||||
xi1 = 0;
|
||||
xi2 = 0;
|
||||
if (round == 8)
|
||||
{
|
||||
|
||||
xi0 = (xi0 >> 8);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (!xi0 && !xi1)
|
||||
return 0;
|
||||
|
||||
|
||||
|
||||
return ht_store(round, ht_dst, ((row << 12) | ((slot_b & 0x3f) << 6) | (slot_a & 0x3f)),
|
||||
xi0, xi1, xi2, 0);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void equihash_round(uint round, __global char *ht_src, __global char *ht_dst,
|
||||
__global uint *debug)
|
||||
{
|
||||
uint tid = get_global_id(0);
|
||||
uint tlid = get_local_id(0);
|
||||
__global char *p;
|
||||
uint cnt;
|
||||
uchar first_words[((1 << (((200 / (9 + 1)) + 1) - 20)) * 9)];
|
||||
uchar mask;
|
||||
uint i, j;
|
||||
|
||||
|
||||
ushort collisions[((1 << (((200 / (9 + 1)) + 1) - 20)) * 9) * 3];
|
||||
uint nr_coll = 0;
|
||||
uint n;
|
||||
uint dropped_coll, dropped_stor;
|
||||
__global ulong *a, *b;
|
||||
uint xi_offset;
|
||||
|
||||
xi_offset = (8 + ((round - 1) / 2) * 4);
|
||||
# 524 "input.cl"
|
||||
mask = 0;
|
||||
|
||||
|
||||
|
||||
p = (ht_src + tid * ((1 << (((200 / (9 + 1)) + 1) - 20)) * 9) * 32);
|
||||
cnt = *(__global uint *)p;
|
||||
cnt = min(cnt, (uint)((1 << (((200 / (9 + 1)) + 1) - 20)) * 9));
|
||||
p += xi_offset;
|
||||
for (i = 0; i < cnt; i++, p += 32)
|
||||
first_words[i] = *(__global uchar *)p;
|
||||
|
||||
nr_coll = 0;
|
||||
dropped_coll = 0;
|
||||
for (i = 0; i < cnt; i++)
|
||||
for (j = i + 1; j < cnt; j++)
|
||||
if ((first_words[i] & mask) ==
|
||||
(first_words[j] & mask))
|
||||
{
|
||||
|
||||
if (nr_coll >= sizeof (collisions) / sizeof (*collisions))
|
||||
dropped_coll++;
|
||||
else
|
||||
|
||||
|
||||
collisions[nr_coll++] =
|
||||
((ushort)j << 8) | ((ushort)i & 0xff);
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
dropped_stor = 0;
|
||||
for (n = 0; n < nr_coll; n++)
|
||||
{
|
||||
i = collisions[n] & 0xff;
|
||||
j = collisions[n] >> 8;
|
||||
a = (__global ulong *)
|
||||
(ht_src + tid * ((1 << (((200 / (9 + 1)) + 1) - 20)) * 9) * 32 + i * 32 + xi_offset);
|
||||
b = (__global ulong *)
|
||||
(ht_src + tid * ((1 << (((200 / (9 + 1)) + 1) - 20)) * 9) * 32 + j * 32 + xi_offset);
|
||||
dropped_stor += xor_and_store(round, ht_dst, tid, i, j, a, b);
|
||||
}
|
||||
if (round < 8)
|
||||
|
||||
*(__global uint *)(ht_src + tid * ((1 << (((200 / (9 + 1)) + 1) - 20)) * 9) * 32) = 0;
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
# 585 "input.cl"
|
||||
__kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round1(__global char *ht_src, __global char *ht_dst, __global uint *debug) { equihash_round(1, ht_src, ht_dst, debug); }
|
||||
__kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round2(__global char *ht_src, __global char *ht_dst, __global uint *debug) { equihash_round(2, ht_src, ht_dst, debug); }
|
||||
__kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round3(__global char *ht_src, __global char *ht_dst, __global uint *debug) { equihash_round(3, ht_src, ht_dst, debug); }
|
||||
__kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round4(__global char *ht_src, __global char *ht_dst, __global uint *debug) { equihash_round(4, ht_src, ht_dst, debug); }
|
||||
__kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round5(__global char *ht_src, __global char *ht_dst, __global uint *debug) { equihash_round(5, ht_src, ht_dst, debug); }
|
||||
__kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round6(__global char *ht_src, __global char *ht_dst, __global uint *debug) { equihash_round(6, ht_src, ht_dst, debug); }
|
||||
__kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round7(__global char *ht_src, __global char *ht_dst, __global uint *debug) { equihash_round(7, ht_src, ht_dst, debug); }
|
||||
|
||||
|
||||
__kernel __attribute__((reqd_work_group_size(64, 1, 1)))
|
||||
void kernel_round8(__global char *ht_src, __global char *ht_dst,
|
||||
__global uint *debug, __global sols_t *sols)
|
||||
{
|
||||
uint tid = get_global_id(0);
|
||||
equihash_round(8, ht_src, ht_dst, debug);
|
||||
if (!tid)
|
||||
sols->nr = sols->likely_invalids = 0;
|
||||
}
|
||||
|
||||
uint expand_ref(__global char *ht, uint xi_offset, uint row, uint slot)
|
||||
{
|
||||
return *(__global uint *)(ht + row * ((1 << (((200 / (9 + 1)) + 1) - 20)) * 9) * 32 +
|
||||
slot * 32 + xi_offset - 4);
|
||||
}
|
||||
|
||||
void expand_refs(__global uint *ins, uint nr_inputs, __global char **htabs,
|
||||
uint round)
|
||||
{
|
||||
__global char *ht = htabs[round % 2];
|
||||
uint i = nr_inputs - 1;
|
||||
uint j = nr_inputs * 2 - 1;
|
||||
uint xi_offset = (8 + ((round) / 2) * 4);
|
||||
do
|
||||
{
|
||||
ins[j] = expand_ref(ht, xi_offset,
|
||||
(ins[i] >> 12), ((ins[i] >> 6) & 0x3f));
|
||||
ins[j - 1] = expand_ref(ht, xi_offset,
|
||||
(ins[i] >> 12), (ins[i] & 0x3f));
|
||||
if (!i)
|
||||
break ;
|
||||
i--;
|
||||
j -= 2;
|
||||
}
|
||||
while (1);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void potential_sol(__global char **htabs, __global sols_t *sols,
|
||||
uint ref0, uint ref1)
|
||||
{
|
||||
uint sol_i;
|
||||
uint nr_values;
|
||||
sol_i = atomic_inc(&sols->nr);
|
||||
if (sol_i >= 2000)
|
||||
return ;
|
||||
sols->valid[sol_i] = 0;
|
||||
nr_values = 0;
|
||||
sols->values[sol_i][nr_values++] = ref0;
|
||||
sols->values[sol_i][nr_values++] = ref1;
|
||||
uint round = 9 - 1;
|
||||
do
|
||||
{
|
||||
round--;
|
||||
expand_refs(&(sols->values[sol_i][0]), nr_values, htabs, round);
|
||||
nr_values *= 2;
|
||||
}
|
||||
while (round > 0);
|
||||
sols->valid[sol_i] = 1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
__kernel
|
||||
void kernel_sols(__global char *ht0, __global char *ht1, __global sols_t *sols)
|
||||
{
|
||||
uint tid = get_global_id(0);
|
||||
__global char *htabs[2] = { ht0, ht1 };
|
||||
uint ht_i = (9 - 1) % 2;
|
||||
uint cnt;
|
||||
uint xi_offset = (8 + ((9 - 1) / 2) * 4);
|
||||
uint i, j;
|
||||
__global char *a, *b;
|
||||
uint ref_i, ref_j;
|
||||
|
||||
|
||||
ulong collisions[5];
|
||||
uint coll;
|
||||
|
||||
|
||||
|
||||
uint mask = 0xffffff;
|
||||
|
||||
|
||||
|
||||
a = htabs[ht_i] + tid * ((1 << (((200 / (9 + 1)) + 1) - 20)) * 9) * 32;
|
||||
cnt = *(__global uint *)a;
|
||||
cnt = min(cnt, (uint)((1 << (((200 / (9 + 1)) + 1) - 20)) * 9));
|
||||
coll = 0;
|
||||
a += xi_offset;
|
||||
for (i = 0; i < cnt; i++, a += 32)
|
||||
for (j = i + 1, b = a + 32; j < cnt; j++, b += 32)
|
||||
if (((*(__global uint *)a) & mask) ==
|
||||
((*(__global uint *)b) & mask))
|
||||
{
|
||||
ref_i = *(__global uint *)(a - 4);
|
||||
ref_j = *(__global uint *)(b - 4);
|
||||
if (coll < sizeof (collisions) / sizeof (*collisions))
|
||||
collisions[coll++] = ((ulong)ref_i << 32) | ref_j;
|
||||
else
|
||||
atomic_inc(&sols->likely_invalids);
|
||||
}
|
||||
if (!coll)
|
||||
return ;
|
||||
for (i = 0; i < coll; i++)
|
||||
potential_sol(htabs, sols, collisions[i] >> 32,
|
||||
collisions[i] & 0xffffffff);
|
||||
}
|
|
@ -1,305 +0,0 @@
|
|||
#include "ocl_xmp.hpp"
|
||||
|
||||
|
||||
|
||||
// miner instance
|
||||
#include "opencl.h"
|
||||
#include <cstdint>
|
||||
|
||||
#include <boost/filesystem.hpp>
|
||||
|
||||
// is this really needed?
|
||||
//#include "uint256.h"
|
||||
|
||||
// hardcoded defines, looks like not working
|
||||
// hardcoded defines fix this
|
||||
#define RESTBITS 4
|
||||
#define XINTREE
|
||||
#define UNROLL
|
||||
#define __OPENCL_HOST__
|
||||
#include "zcash/gpu/common.h"
|
||||
|
||||
struct MinerInstance {
|
||||
cl_context _context;
|
||||
cl_program _program;
|
||||
|
||||
cl_command_queue queue;
|
||||
clBuffer<blake2b_state> blake2bState;
|
||||
clBuffer<uint32_t> heap0;
|
||||
clBuffer<uint32_t> heap1;
|
||||
clBuffer<bsizes> nslots;
|
||||
clBuffer<proof> sols;
|
||||
clBuffer<uint32_t> numSols;
|
||||
cl_kernel _digitHKernel;
|
||||
cl_kernel _digitOKernel;
|
||||
cl_kernel _digitEKernel;
|
||||
cl_kernel _digitKKernel;
|
||||
cl_kernel _digitKernels[9];
|
||||
|
||||
//hide_xmp_hack::uint256 nonce; // TODO IS THIS NEEDED????
|
||||
|
||||
bool init(cl_context context, cl_program program, cl_device_id dev, unsigned threadsNum, unsigned threadsPerBlock);
|
||||
};
|
||||
|
||||
cl_context gContext = 0;
|
||||
cl_program gProgram = 0;
|
||||
cl_platform_id gPlatform = 0;
|
||||
|
||||
|
||||
bool MinerInstance::init(cl_context context,
|
||||
cl_program program,
|
||||
cl_device_id dev,
|
||||
unsigned int threadsNum,
|
||||
unsigned int threadsPerBlock)
|
||||
{
|
||||
cl_int error;
|
||||
|
||||
_context = context;
|
||||
_program = program;
|
||||
queue = clCreateCommandQueue(context, dev, 0, &error);
|
||||
|
||||
blake2bState.init(context, 1, CL_MEM_READ_WRITE);
|
||||
heap0.init(context, sizeof(digit0) / sizeof(uint32_t), CL_MEM_HOST_NO_ACCESS);
|
||||
heap1.init(context, sizeof(digit1) / sizeof(uint32_t), CL_MEM_HOST_NO_ACCESS);
|
||||
nslots.init(context, 2, CL_MEM_READ_WRITE);
|
||||
sols.init(context, MAXSOLS, CL_MEM_READ_WRITE);
|
||||
numSols.init(context, 1, CL_MEM_READ_WRITE);
|
||||
|
||||
_digitHKernel = clCreateKernel(program, "digitH", &error);
|
||||
_digitOKernel = clCreateKernel(program, "digitOdd", &error);
|
||||
_digitEKernel = clCreateKernel(program, "digitEven", &error);
|
||||
_digitKKernel = clCreateKernel(program, "digitK", &error);
|
||||
OCLR(clSetKernelArg(_digitHKernel, 0, sizeof(cl_mem), &blake2bState.DeviceData), 1);
|
||||
OCLR(clSetKernelArg(_digitHKernel, 1, sizeof(cl_mem), &heap0.DeviceData), 1);
|
||||
OCLR(clSetKernelArg(_digitHKernel, 2, sizeof(cl_mem), &nslots.DeviceData), 1);
|
||||
|
||||
OCLR(clSetKernelArg(_digitOKernel, 1, sizeof(cl_mem), &heap0.DeviceData), 1);
|
||||
OCLR(clSetKernelArg(_digitOKernel, 2, sizeof(cl_mem), &heap1.DeviceData), 1);
|
||||
OCLR(clSetKernelArg(_digitOKernel, 3, sizeof(cl_mem), &nslots.DeviceData), 1);
|
||||
OCLR(clSetKernelArg(_digitEKernel, 1, sizeof(cl_mem), &heap0.DeviceData), 1);
|
||||
OCLR(clSetKernelArg(_digitEKernel, 2, sizeof(cl_mem), &heap1.DeviceData), 1);
|
||||
OCLR(clSetKernelArg(_digitEKernel, 3, sizeof(cl_mem), &nslots.DeviceData), 1);
|
||||
|
||||
for (unsigned i = 1; i <= 8; i++) {
|
||||
char kernelName[32];
|
||||
sprintf(kernelName, "digit_%u", i);
|
||||
_digitKernels[i] = clCreateKernel(program, kernelName, &error);
|
||||
OCLR(clSetKernelArg(_digitKernels[i], 0, sizeof(cl_mem), &heap0.DeviceData), 1);
|
||||
OCLR(clSetKernelArg(_digitKernels[i], 1, sizeof(cl_mem), &heap1.DeviceData), 1);
|
||||
OCLR(clSetKernelArg(_digitKernels[i], 2, sizeof(cl_mem), &nslots.DeviceData), 1);
|
||||
}
|
||||
|
||||
OCLR(clSetKernelArg(_digitKKernel, 0, sizeof(cl_mem), &heap0.DeviceData), 1);
|
||||
OCLR(clSetKernelArg(_digitKKernel, 1, sizeof(cl_mem), &heap1.DeviceData), 1);
|
||||
OCLR(clSetKernelArg(_digitKKernel, 2, sizeof(cl_mem), &nslots.DeviceData), 1);
|
||||
OCLR(clSetKernelArg(_digitKKernel, 3, sizeof(cl_mem), &sols.DeviceData), 1);
|
||||
OCLR(clSetKernelArg(_digitKKernel, 4, sizeof(cl_mem), &numSols.DeviceData), 1);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
////////////////////////////
|
||||
////statics non class START
|
||||
|
||||
static void setheader(blake2b_state *ctx, const char *header, const uint32_t headerlen)
|
||||
{
|
||||
uint32_t le_N = WN;
|
||||
uint32_t le_K = WK;
|
||||
char personal[] = "ZcashPoW01230123";
|
||||
memcpy(personal + 8, &le_N, 4);
|
||||
memcpy(personal + 12, &le_K, 4);
|
||||
blake2b_param P[1];
|
||||
P->digest_length = HASHOUT;
|
||||
P->key_length = 0;
|
||||
P->fanout = 1;
|
||||
P->depth = 1;
|
||||
P->leaf_length = 0;
|
||||
P->node_offset = 0;
|
||||
P->node_depth = 0;
|
||||
P->inner_length = 0;
|
||||
memset(P->reserved, 0, sizeof(P->reserved));
|
||||
memset(P->salt, 0, sizeof(P->salt));
|
||||
memcpy(P->personal, (const uint8_t *)personal, 16);
|
||||
blake2b_init_param(ctx, P);
|
||||
blake2b_update(ctx, (const uint8_t*)header, headerlen);
|
||||
}
|
||||
|
||||
static void setnonce(blake2b_state *ctx, const uint8_t *nonce)
|
||||
{
|
||||
blake2b_update(ctx, nonce, 32);
|
||||
}
|
||||
|
||||
static int inline digit(cl_command_queue clQueue, cl_kernel kernel, size_t nthreads, size_t threadsPerBlock)
|
||||
{
|
||||
size_t globalSize[] = { nthreads, 1, 1 };
|
||||
size_t localSize[] = { threadsPerBlock, 1 };
|
||||
OCLR(clEnqueueNDRangeKernel(clQueue, kernel, 1, 0, globalSize, localSize, 0, 0, 0), 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
////statics non class END
|
||||
////////////////////////////
|
||||
|
||||
ocl_xmp::ocl_xmp(int platf_id, int dev_id) { /*TODO*/
|
||||
platform_id = platf_id;
|
||||
device_id = dev_id;
|
||||
// TODO
|
||||
threadsNum = 8192;
|
||||
wokrsize = 128; // 256;
|
||||
//threadsperblock = 128;
|
||||
}
|
||||
|
||||
std::string ocl_xmp::getdevinfo() { /*TODO*/
|
||||
return "GPU_ID(" + std::to_string(device_id) + ")";
|
||||
}
|
||||
|
||||
// STATICS START
|
||||
int ocl_xmp::getcount() { /*TODO*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
void ocl_xmp::getinfo(int platf_id, int d_id, std::string& gpu_name, int& sm_count, std::string& version) { /*TODO*/ }
|
||||
|
||||
void ocl_xmp::start(ocl_xmp& device_context) {
|
||||
/*TODO*/
|
||||
device_context.is_init_success = false;
|
||||
cl_context gContext[64] = { 0 };
|
||||
cl_program gProgram[64] = { 0 };
|
||||
|
||||
|
||||
std::vector<cl_device_id> allGpus;
|
||||
if (!clInitialize(device_context.platform_id, allGpus)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// this is kinda stupid but it works
|
||||
std::vector<cl_device_id> gpus;
|
||||
for (unsigned i = 0; i < allGpus.size(); ++i) {
|
||||
if (i == device_context.device_id) {
|
||||
printf("Using device %d as GPU %d\n", i, (int)gpus.size());
|
||||
gpus.push_back(allGpus[i]);
|
||||
}
|
||||
}
|
||||
|
||||
if (!gpus.size()){
|
||||
printf("Device id %d not found\n", device_context.device_id);
|
||||
return;
|
||||
}
|
||||
|
||||
// context create
|
||||
for (unsigned i = 0; i < gpus.size(); i++) {
|
||||
cl_context_properties props[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)gPlatform, 0 };
|
||||
cl_int error;
|
||||
gContext[i] = clCreateContext(props, 1, &gpus[i], 0, 0, &error);
|
||||
//OCLR(error, false);
|
||||
if (cl_int err = error) {
|
||||
printf("OpenCL error: %d at %s:%d\n", err, __FILE__, __LINE__);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<cl_int> binstatus;
|
||||
binstatus.resize(gpus.size());
|
||||
|
||||
for (size_t i = 0; i < gpus.size(); i++) {
|
||||
char kernelName[64];
|
||||
sprintf(kernelName, "equiw200k9_gpu%u.bin", (unsigned)i);
|
||||
if (!clCompileKernel(gContext[i],
|
||||
gpus[i],
|
||||
kernelName,
|
||||
{ "zcash/gpu/equihash.cl" },
|
||||
"-I./zcash/gpu -DXINTREE -DWN=200 -DWK=9 -DRESTBITS=4 -DUNROLL",
|
||||
&binstatus[i],
|
||||
&gProgram[i])) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < gpus.size(); ++i) {
|
||||
if (binstatus[i] == CL_SUCCESS) {
|
||||
device_context.context = new MinerInstance();
|
||||
if (!device_context.context->init(gContext[i], gProgram[i], gpus[i], device_context.threadsNum, device_context.wokrsize)) {
|
||||
printf("Init failed");
|
||||
return;
|
||||
}
|
||||
}
|
||||
else {
|
||||
printf("GPU %d: failed to load kernel\n", i);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
device_context.is_init_success = true;
|
||||
}
|
||||
|
||||
void ocl_xmp::stop(ocl_xmp& device_context) { /*TODO*/ }
|
||||
|
||||
void ocl_xmp::solve(const char *tequihash_header,
|
||||
unsigned int tequihash_header_len,
|
||||
const char* nonce,
|
||||
unsigned int nonce_len,
|
||||
std::function<bool()> cancelf,
|
||||
std::function<void(const std::vector<uint32_t>&, size_t, const unsigned char*)> solutionf,
|
||||
std::function<void(void)> hashdonef,
|
||||
ocl_xmp& device_context) {
|
||||
if (device_context.is_init_success == false) {
|
||||
printf("fail OCL\n");
|
||||
//cancelf();
|
||||
return;
|
||||
}
|
||||
|
||||
// move to context or somewhere or leave?
|
||||
blake2b_state initialCtx;
|
||||
setheader(&initialCtx, tequihash_header, tequihash_header_len);
|
||||
|
||||
MinerInstance *miner = device_context.context;
|
||||
clFlush(miner->queue);
|
||||
|
||||
/*hide_xmp_hack::uint256 nNonce = hide_xmp_hack::uint256(nonce);
|
||||
miner->nonce = nNonce;*/
|
||||
*miner->blake2bState.HostData = initialCtx;
|
||||
setnonce(miner->blake2bState.HostData, (const uint8_t*)nonce);
|
||||
memset(miner->nslots.HostData, 0, 2 * sizeof(bsizes));
|
||||
*miner->numSols.HostData = 0;
|
||||
miner->blake2bState.copyToDevice(miner->queue, false);
|
||||
miner->nslots.copyToDevice(miner->queue, false);
|
||||
miner->numSols.copyToDevice(miner->queue, false);
|
||||
|
||||
digit(miner->queue, miner->_digitHKernel, device_context.threadsNum, device_context.wokrsize);
|
||||
#if BUCKBITS == 16 && RESTBITS == 4 && defined XINTREE && defined(UNROLL)
|
||||
for (unsigned i = 1; i <= 8; i++)
|
||||
digit(miner->queue, miner->_digitKernels[i], device_context.threadsNum, device_context.wokrsize);
|
||||
#else
|
||||
size_t globalSize[] = { _threadsNum, 1, 1 };
|
||||
size_t localSize[] = { _threadsPerBlocksNum, 1 };
|
||||
for (unsigned r = 1; r < WK; r++) {
|
||||
if (r & 1) {
|
||||
OCL(clSetKernelArg(miner->_digitOKernel, 0, sizeof(cl_uint), &r));
|
||||
OCL(clEnqueueNDRangeKernel(miner->queue, miner->_digitOKernel, 1, 0, globalSize, localSize, 0, 0, 0));
|
||||
}
|
||||
else {
|
||||
OCL(clSetKernelArg(miner->_digitEKernel, 0, sizeof(cl_uint), &r));
|
||||
OCL(clEnqueueNDRangeKernel(miner->queue, miner->_digitEKernel, 1, 0, globalSize, localSize, 0, 0, 0));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
digit(miner->queue, miner->_digitKKernel, device_context.threadsNum, device_context.wokrsize);
|
||||
|
||||
// get solutions
|
||||
miner->sols.copyToHost(miner->queue, true);
|
||||
miner->numSols.copyToHost(miner->queue, true);
|
||||
for (unsigned s = 0; s < miner->numSols.HostData[0]; s++)
|
||||
{
|
||||
std::vector<uint32_t> index_vector(PROOFSIZE);
|
||||
for (u32 i = 0; i < PROOFSIZE; i++) {
|
||||
index_vector[i] = miner->sols[s][i];
|
||||
}
|
||||
|
||||
solutionf(index_vector, DIGITBITS, nullptr);
|
||||
if (cancelf()) return;
|
||||
}
|
||||
hashdonef();
|
||||
}
|
||||
|
||||
// STATICS END
|
|
@ -1,56 +0,0 @@
|
|||
#pragma once
|
||||
#ifdef _LIB
|
||||
#define DLL_OCL_XMP __declspec(dllexport)
|
||||
#else
|
||||
#define DLL_OCL_XMP
|
||||
#endif
|
||||
|
||||
// remove after
|
||||
#include <string>
|
||||
#include <functional>
|
||||
#include <vector>
|
||||
#include <cstdint>
|
||||
|
||||
struct MinerInstance;
|
||||
|
||||
struct DLL_OCL_XMP ocl_xmp
|
||||
{
|
||||
//int threadsperblock;
|
||||
int blocks;
|
||||
int device_id;
|
||||
int platform_id;
|
||||
|
||||
MinerInstance* context;
|
||||
// threads
|
||||
unsigned threadsNum; // TMP
|
||||
unsigned wokrsize;
|
||||
|
||||
bool is_init_success = false;
|
||||
|
||||
ocl_xmp(int platf_id, int dev_id);
|
||||
|
||||
std::string getdevinfo();
|
||||
|
||||
static int getcount();
|
||||
|
||||
static void getinfo(int platf_id, int d_id, std::string& gpu_name, int& sm_count, std::string& version);
|
||||
|
||||
static void start(ocl_xmp& device_context);
|
||||
|
||||
static void stop(ocl_xmp& device_context);
|
||||
|
||||
static void solve(const char *tequihash_header,
|
||||
unsigned int tequihash_header_len,
|
||||
const char* nonce,
|
||||
unsigned int nonce_len,
|
||||
std::function<bool()> cancelf,
|
||||
std::function<void(const std::vector<uint32_t>&, size_t, const unsigned char*)> solutionf,
|
||||
std::function<void(void)> hashdonef,
|
||||
ocl_xmp& device_context);
|
||||
|
||||
std::string getname() { return "OCL_XMP"; }
|
||||
|
||||
private:
|
||||
std::string m_gpu_name;
|
||||
std::string m_version;
|
||||
};
|
|
@ -1,100 +0,0 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="ocl_xmp.hpp" />
|
||||
<ClInclude Include="zcash\gpu\common.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="..\cpu_tromp\blake2\blake2bx.cpp" />
|
||||
<ClCompile Include="ocl_xmp.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="zcash\gpu\equihash.cl" />
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{5EC9EDEB-8E49-4126-9161-1560683CBC71}</ProjectGuid>
|
||||
<Keyword>Win32Proj</Keyword>
|
||||
<RootNamespace>ocl_xpm</RootNamespace>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>StaticLibrary</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<PlatformToolset>v120</PlatformToolset>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>StaticLibrary</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<PlatformToolset>v120</PlatformToolset>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<ClCompile>
|
||||
<PrecompiledHeader>NotUsing</PrecompiledHeader>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;_USRDLL;OCL_XPM_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<SDLCheck>
|
||||
</SDLCheck>
|
||||
<AdditionalIncludeDirectories>..\ocl_device_utils;..\cpu_tromp;..\3rdparty\include;$(AMDAPPSDKROOT)\include\</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalDependencies>OpenCL.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalLibraryDirectories>..\3rdparty\libs\win64;$(AMDAPPSDKROOT)\lib\x86_64\</AdditionalLibraryDirectories>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PrecompiledHeader>NotUsing</PrecompiledHeader>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;_USRDLL;OCL_XPM_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<SDLCheck>
|
||||
</SDLCheck>
|
||||
<AdditionalIncludeDirectories>..\ocl_device_utils;..\3rdparty\include;$(AMDAPPSDKROOT)\include\;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
<AdditionalDependencies>OpenCL.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalLibraryDirectories>..\3rdparty\libs\win64;$(AMDAPPSDKROOT)\lib\x86_64\</AdditionalLibraryDirectories>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
|
@ -1,26 +0,0 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup>
|
||||
<Filter Include="zcash">
|
||||
<UniqueIdentifier>{69f1aa4c-1be3-4265-a93c-b58266bad10b}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="zcash\gpu">
|
||||
<UniqueIdentifier>{a95c2e64-90c0-48d9-9287-46723392025d}</UniqueIdentifier>
|
||||
</Filter>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="ocl_xmp.hpp" />
|
||||
<ClInclude Include="zcash\gpu\common.h">
|
||||
<Filter>zcash\gpu</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="ocl_xmp.cpp" />
|
||||
<ClCompile Include="..\cpu_tromp\blake2\blake2bx.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="zcash\gpu\equihash.cl">
|
||||
<Filter>zcash\gpu</Filter>
|
||||
</None>
|
||||
</ItemGroup>
|
||||
</Project>
|
|
@ -1,150 +0,0 @@
|
|||
// Blake2-B CUDA Implementation
|
||||
// tpruvot@github July 2016
|
||||
// permission granted to use under MIT license
|
||||
// modified for use in Zcash by John Tromp September 2016
|
||||
|
||||
/**
|
||||
* uint2 direct ops by c++ operator definitions
|
||||
*/
|
||||
|
||||
// static __device__ __forceinline__ uint2 operator^ (uint2 a, uint2 b) {
|
||||
// return make_uint2(a.x ^ b.x, a.y ^ b.y);
|
||||
// }
|
||||
|
||||
// uint2 ROR/ROL methods
|
||||
uint2 ROR2(const uint2 a, const int offset) {
|
||||
uint2 result;
|
||||
if (!offset)
|
||||
result = a;
|
||||
else if (offset < 32) {
|
||||
result.y = ((a.y >> offset) | (a.x << (32 - offset)));
|
||||
result.x = ((a.x >> offset) | (a.y << (32 - offset)));
|
||||
} else if (offset == 32) {
|
||||
result.y = a.x;
|
||||
result.x = a.y;
|
||||
} else {
|
||||
result.y = ((a.x >> (offset - 32)) | (a.y << (64 - offset)));
|
||||
result.x = ((a.y >> (offset - 32)) | (a.x << (64 - offset)));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
uint2 SWAPUINT2(uint2 value) {
|
||||
uint2 result;
|
||||
result.x = value.y;
|
||||
result.y = value.x;
|
||||
return result;
|
||||
// return make_uint2(value.y, value.x);
|
||||
}
|
||||
|
||||
#define ROR24(u) ROR2(u,24)
|
||||
#define ROR16(u) ROR2(u,16)
|
||||
|
||||
__constant int8_t blake2b_sigma[12][16] = {
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
|
||||
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
|
||||
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
|
||||
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
|
||||
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
|
||||
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
|
||||
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
|
||||
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
|
||||
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
|
||||
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 } ,
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
|
||||
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }
|
||||
};
|
||||
|
||||
void G(const int32_t r, const int32_t i, uint64_t *a, uint64_t *b, uint64_t *c, uint64_t *d, uint64_t const m[16]) {
|
||||
*a += *b + m[ blake2b_sigma[r][2*i] ];
|
||||
((uint2*)d)[0] = SWAPUINT2( ((uint2*)d)[0] ^ ((uint2*)a)[0] );
|
||||
*c += *d;
|
||||
((uint2*)b)[0] = ROR24( ((uint2*)b)[0] ^ ((uint2*)c)[0] );
|
||||
*a += *b + m[ blake2b_sigma[r][2*i+1] ];
|
||||
((uint2*)d)[0] = ROR16( ((uint2*)d)[0] ^ ((uint2*)a)[0] );
|
||||
*c += *d;
|
||||
((uint2*)b)[0] = ROR2( ((uint2*)b)[0] ^ ((uint2*)c)[0], 63U);
|
||||
}
|
||||
|
||||
#define ROUND(r) \
|
||||
G(r, 0, &v[0], &v[4], &v[ 8], &v[12], m); \
|
||||
G(r, 1, &v[1], &v[5], &v[ 9], &v[13], m); \
|
||||
G(r, 2, &v[2], &v[6], &v[10], &v[14], m); \
|
||||
G(r, 3, &v[3], &v[7], &v[11], &v[15], m); \
|
||||
G(r, 4, &v[0], &v[5], &v[10], &v[15], m); \
|
||||
G(r, 5, &v[1], &v[6], &v[11], &v[12], m); \
|
||||
G(r, 6, &v[2], &v[7], &v[ 8], &v[13], m); \
|
||||
G(r, 7, &v[3], &v[4], &v[ 9], &v[14], m);
|
||||
|
||||
void blake2b_gpu_hash(blake2b_state *state, uint32_t idx, uint8_t *hash, uint32_t outlen) {
|
||||
const uint32_t leb = idx;
|
||||
*(uint32_t*)(state->buf + state->buflen) = leb;
|
||||
state->buflen += 4;
|
||||
state->counter += state->buflen;
|
||||
for (unsigned i = 0; i < BLAKE2B_BLOCKBYTES - state->buflen; i++)
|
||||
state->buf[i+state->buflen] = 0;
|
||||
|
||||
uint64_t *d_data = (uint64_t *)state->buf;
|
||||
uint64_t m[16];
|
||||
|
||||
m[0] = d_data[0];
|
||||
m[1] = d_data[1];
|
||||
m[2] = d_data[2];
|
||||
m[3] = d_data[3];
|
||||
m[4] = d_data[4];
|
||||
m[5] = d_data[5];
|
||||
m[6] = d_data[6];
|
||||
m[7] = d_data[7];
|
||||
m[8] = d_data[8];
|
||||
m[9] = d_data[9];
|
||||
m[10] = d_data[10];
|
||||
m[11] = d_data[11];
|
||||
m[12] = d_data[12];
|
||||
m[13] = d_data[13];
|
||||
m[14] = d_data[14];
|
||||
m[15] = d_data[15];
|
||||
|
||||
uint64_t v[16];
|
||||
|
||||
v[0] = state->h[0];
|
||||
v[1] = state->h[1];
|
||||
v[2] = state->h[2];
|
||||
v[3] = state->h[3];
|
||||
v[4] = state->h[4];
|
||||
v[5] = state->h[5];
|
||||
v[6] = state->h[6];
|
||||
v[7] = state->h[7];
|
||||
v[8] = 0x6a09e667f3bcc908;
|
||||
v[9] = 0xbb67ae8584caa73b;
|
||||
v[10] = 0x3c6ef372fe94f82b;
|
||||
v[11] = 0xa54ff53a5f1d36f1;
|
||||
v[12] = 0x510e527fade682d1 ^ state->counter;
|
||||
v[13] = 0x9b05688c2b3e6c1f;
|
||||
v[14] = 0x1f83d9abfb41bd6b ^ 0xffffffffffffffff;
|
||||
v[15] = 0x5be0cd19137e2179;
|
||||
|
||||
ROUND( 0 );
|
||||
ROUND( 1 );
|
||||
ROUND( 2 );
|
||||
ROUND( 3 );
|
||||
ROUND( 4 );
|
||||
ROUND( 5 );
|
||||
ROUND( 6 );
|
||||
ROUND( 7 );
|
||||
ROUND( 8 );
|
||||
ROUND( 9 );
|
||||
ROUND( 10 );
|
||||
ROUND( 11 );
|
||||
|
||||
state->h[0] ^= v[0] ^ v[ 8];
|
||||
state->h[1] ^= v[1] ^ v[ 9];
|
||||
state->h[2] ^= v[2] ^ v[10];
|
||||
state->h[3] ^= v[3] ^ v[11];
|
||||
state->h[4] ^= v[4] ^ v[12];
|
||||
state->h[5] ^= v[5] ^ v[13];
|
||||
state->h[6] ^= v[6] ^ v[14];
|
||||
state->h[7] ^= v[7] ^ v[15];
|
||||
|
||||
for (unsigned i = 0; i < outlen; i++)
|
||||
hash[i] = ((uint8_t*)state->h)[i];
|
||||
}
|
|
@ -1,159 +0,0 @@
|
|||
#if defined(__OPENCL_HOST__)
|
||||
#define __global
|
||||
//#include "blake2/blake2.h"
|
||||
//#include "equi.h"
|
||||
#include "../cpu_tromp/equi.h"
|
||||
|
||||
#else
|
||||
typedef char int8_t;
|
||||
typedef uchar uint8_t;
|
||||
typedef short int16_t;
|
||||
typedef ushort uint16_t;
|
||||
typedef int int32_t;
|
||||
typedef uint uint32_t;
|
||||
typedef long int64_t;
|
||||
typedef ulong uint64_t;
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define ALIGN(x) __declspec(align(x))
|
||||
#else
|
||||
#define ALIGN(x) __attribute__ ((__aligned__(x)))
|
||||
#endif
|
||||
|
||||
enum blake2b_constant
|
||||
{
|
||||
BLAKE2B_BLOCKBYTES = 128,
|
||||
BLAKE2B_OUTBYTES = 64,
|
||||
BLAKE2B_KEYBYTES = 64,
|
||||
BLAKE2B_SALTBYTES = 16,
|
||||
BLAKE2B_PERSONALBYTES = 16
|
||||
};
|
||||
|
||||
#pragma pack(push, 1)
|
||||
ALIGN( 64 ) typedef struct __blake2b_state {
|
||||
uint64_t h[8];
|
||||
uint8_t buf[BLAKE2B_BLOCKBYTES];
|
||||
uint16_t counter;
|
||||
uint8_t buflen;
|
||||
uint8_t lastblock;
|
||||
} blake2b_state;
|
||||
#pragma pack(pop)
|
||||
#endif
|
||||
|
||||
#define COLLISION_BIT_LENGTH (WN / (WK+1))
|
||||
#define COLLISION_BYTE_LENGTH ((COLLISION_BIT_LENGTH+7)/8)
|
||||
#define FINAL_FULL_WIDTH (2*COLLISION_BYTE_LENGTH+sizeof(uint32_t)*(1 << (WK)))
|
||||
|
||||
|
||||
#define NDIGITS (WK+1)
|
||||
#define DIGITBITS (WN/(NDIGITS))
|
||||
//#define PROOFSIZE (1u<<WK)
|
||||
#define COMPRESSED_PROOFSIZE ((COLLISION_BIT_LENGTH+1)*PROOFSIZE*4/(8*sizeof(uint32_t)))
|
||||
//#define BASE (1u<<DIGITBITS)
|
||||
//#define NHASHES (2u*BASE)
|
||||
//#define HASHESPERBLAKE (512/WN)
|
||||
//#define HASHOUT (HASHESPERBLAKE*WN/8)
|
||||
|
||||
// 2_log of number of buckets
|
||||
#define BUCKBITS (DIGITBITS-RESTBITS)
|
||||
|
||||
// number of buckets
|
||||
#define NBUCKETS (1<<BUCKBITS)
|
||||
// 2_log of number of slots per bucket
|
||||
#define SLOTBITS (RESTBITS+1+1)
|
||||
// number of slots per bucket
|
||||
#define NSLOTS (1u<<SLOTBITS)
|
||||
// number of per-xhash slots
|
||||
#define XFULL 16
|
||||
// SLOTBITS mask
|
||||
#define SLOTMASK (NSLOTS-1)
|
||||
// number of possible values of xhash (rest of n) bits
|
||||
#define NRESTS (1u<<RESTBITS)
|
||||
// number of blocks of hashes extracted from single 512 bit blake2b output
|
||||
#define NBLOCKS ((NHASHES+HASHESPERBLAKE-1)/HASHESPERBLAKE)
|
||||
// nothing larger found in 100000 runs
|
||||
#define MAXSOLS 8
|
||||
|
||||
#define WORDS(bits) ((bits + 31) / 32)
|
||||
#define HASHWORDS0 WORDS(WN - DIGITBITS + RESTBITS)
|
||||
#define HASHWORDS1 WORDS(WN - 2*DIGITBITS + RESTBITS)
|
||||
|
||||
typedef uint32_t proof[PROOFSIZE];
|
||||
|
||||
// tree = | xhash(RESTBITS) | slotid1(SLOTBITS) | slotid0(SLOTBITS) | bucketid(BUCKBITS) |
|
||||
// index = | bucketid(BUCKBITS) | slotid0(SLOTBITS) |
|
||||
typedef uint32_t tree;
|
||||
|
||||
typedef union hashunit {
|
||||
uint32_t word;
|
||||
uint8_t bytes[4];
|
||||
} hashunit;
|
||||
|
||||
typedef struct slot0 {
|
||||
tree attr;
|
||||
hashunit hash[HASHWORDS0];
|
||||
} slot0;
|
||||
|
||||
typedef struct slot1 {
|
||||
tree attr;
|
||||
hashunit hash[HASHWORDS1];
|
||||
} slot1;
|
||||
|
||||
// a bucket is NSLOTS treenodes
|
||||
typedef slot0 bucket0[NSLOTS];
|
||||
typedef slot1 bucket1[NSLOTS];
|
||||
// the N-bit hash consists of K+1 n-bit "digits"
|
||||
// each of which corresponds to a layer of NBUCKETS buckets
|
||||
typedef bucket0 digit0[NBUCKETS];
|
||||
typedef bucket1 digit1[NBUCKETS];
|
||||
|
||||
// manages hash and tree data
|
||||
typedef struct htalloc {
|
||||
__global bucket0 *trees0[(WK+1)/2];
|
||||
__global bucket1 *trees1[WK/2];
|
||||
} htalloc;
|
||||
|
||||
typedef uint32_t bsizes[NBUCKETS];
|
||||
|
||||
|
||||
typedef struct htlayout {
|
||||
htalloc hta;
|
||||
uint32_t prevhashunits;
|
||||
uint32_t nexthashunits;
|
||||
uint32_t dunits;
|
||||
uint32_t prevbo;
|
||||
uint32_t nextbo;
|
||||
} htlayout;
|
||||
|
||||
#if RESTBITS <= 6
|
||||
typedef uint8_t xslot;
|
||||
#else
|
||||
typedef uint16_t xslot;
|
||||
#endif
|
||||
|
||||
typedef struct collisiondata {
|
||||
#ifdef XBITMAP
|
||||
#if NSLOTS > 64
|
||||
#error cant use XBITMAP with more than 64 slots
|
||||
#endif
|
||||
uint64_t xhashmap[NRESTS];
|
||||
uint64_t xmap;
|
||||
#else
|
||||
xslot nxhashslots[NRESTS];
|
||||
xslot xhashslots[NRESTS][XFULL];
|
||||
xslot *xx;
|
||||
uint32_t n0;
|
||||
uint32_t n1;
|
||||
#endif
|
||||
uint32_t s0;
|
||||
} collisiondata;
|
||||
|
||||
|
||||
typedef struct equi {
|
||||
blake2b_state blake_ctx;
|
||||
htalloc hta;
|
||||
__global bsizes *nslots;
|
||||
__global proof *sols;
|
||||
uint32_t nsols;
|
||||
uint32_t nthreads;
|
||||
} equi;
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue