zcash-patched-for-explorer/src/snark/libsnark/common/profiling.cpp

380 lines
10 KiB
C++

/** @file
*****************************************************************************
Implementation of functions for profiling code blocks.
See profiling.hpp .
*****************************************************************************
* @author This file is part of libsnark, developed by SCIPR Lab
* and contributors (see AUTHORS).
* @copyright MIT license (see LICENSE file)
*****************************************************************************/
#include "common/profiling.hpp"
#include <cassert>
#include <stdexcept>
#include <chrono>
#include <cstdio>
#include <list>
#include <vector>
#include <ctime>
#include "common/default_types/ec_pp.hpp"
#include "common/utils.hpp"
#ifndef NO_PROCPS
#include <proc/readproc.h>
#endif
namespace libsnark {
long long get_nsec_time()
{
auto timepoint = std::chrono::high_resolution_clock::now();
return std::chrono::duration_cast<std::chrono::nanoseconds>(timepoint.time_since_epoch()).count();
}
/* Return total CPU time consumed by all threads of the process, in nanoseconds. */
long long get_nsec_cpu_time()
{
::timespec ts;
if ( ::clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts) )
throw ::std::runtime_error("clock_gettime(CLOCK_PROCESS_CPUTIME_ID) failed");
// If we expected this to work, don't silently ignore failures, because that would hide the problem and incur an unnecessarily system-call overhead. So if we ever observe this exception, we should probably add a suitable #ifdef .
//TODO: clock_gettime(CLOCK_PROCESS_CPUTIME_ID) is not supported by native Windows. What about Cygwin? Should we #ifdef on CLOCK_PROCESS_CPUTIME_ID or on __linux__?
return ts.tv_sec * 1000000000ll + ts.tv_nsec;
}
long long start_time, last_time;
long long start_cpu_time, last_cpu_time;
void start_profiling()
{
printf("Reset time counters for profiling\n");
last_time = start_time = get_nsec_time();
last_cpu_time = start_cpu_time = get_nsec_cpu_time();
}
std::map<std::string, size_t> invocation_counts;
std::map<std::string, long long> enter_times;
std::map<std::string, long long> last_times;
std::map<std::string, long long> cumulative_times;
//TODO: Instead of analogous maps for time and cpu_time, use a single struct-valued map
std::map<std::string, long long> enter_cpu_times;
std::map<std::string, long long> last_cpu_times;
std::map<std::pair<std::string, std::string>, long long> op_counts;
std::map<std::pair<std::string, std::string>, long long> cumulative_op_counts; // ((msg, data_point), value)
// TODO: Convert op_counts and cumulative_op_counts from pair to structs
size_t indentation = 0;
std::vector<std::string> block_names;
std::list<std::pair<std::string, long long*> > op_data_points = {
#ifdef PROFILE_OP_COUNTS
std::make_pair("Fradd", &Fr<default_ec_pp>::add_cnt),
std::make_pair("Frsub", &Fr<default_ec_pp>::sub_cnt),
std::make_pair("Frmul", &Fr<default_ec_pp>::mul_cnt),
std::make_pair("Frinv", &Fr<default_ec_pp>::inv_cnt),
std::make_pair("Fqadd", &Fq<default_ec_pp>::add_cnt),
std::make_pair("Fqsub", &Fq<default_ec_pp>::sub_cnt),
std::make_pair("Fqmul", &Fq<default_ec_pp>::mul_cnt),
std::make_pair("Fqinv", &Fq<default_ec_pp>::inv_cnt),
std::make_pair("G1add", &G1<default_ec_pp>::add_cnt),
std::make_pair("G1dbl", &G1<default_ec_pp>::dbl_cnt),
std::make_pair("G2add", &G2<default_ec_pp>::add_cnt),
std::make_pair("G2dbl", &G2<default_ec_pp>::dbl_cnt)
#endif
};
bool inhibit_profiling_info = false;
bool inhibit_profiling_counters = false;
void clear_profiling_counters()
{
invocation_counts.clear();
last_times.clear();
last_cpu_times.clear();
cumulative_times.clear();
}
void print_cumulative_time_entry(const std::string &key, const long long factor)
{
const double total_ms = (cumulative_times.at(key) * 1e-6);
const size_t cnt = invocation_counts.at(key);
const double avg_ms = total_ms / cnt;
printf(" %-45s: %12.5fms = %lld * %0.5fms (%zu invocations, %0.5fms = %lld * %0.5fms per invocation)\n", key.c_str(), total_ms, factor, total_ms/factor, cnt, avg_ms, factor, avg_ms/factor);
}
void print_cumulative_times(const long long factor)
{
printf("Dumping times:\n");
for (auto& kv : cumulative_times)
{
print_cumulative_time_entry(kv.first, factor);
}
}
void print_cumulative_op_counts(const bool only_fq)
{
#ifdef PROFILE_OP_COUNTS
printf("Dumping operation counts:\n");
for (auto& msg : invocation_counts)
{
printf(" %-45s: ", msg.first.c_str());
bool first = true;
for (auto& data_point : op_data_points)
{
if (only_fq && data_point.first.compare(0, 2, "Fq") != 0)
{
continue;
}
if (!first)
{
printf(", ");
}
printf("%-5s = %7.0f (%3zu)",
data_point.first.c_str(),
1. * cumulative_op_counts[std::make_pair(msg.first, data_point.first)] / msg.second,
msg.second);
first = false;
}
printf("\n");
}
#else
UNUSED(only_fq);
#endif
}
void print_op_profiling(const std::string &msg)
{
#ifdef PROFILE_OP_COUNTS
printf("\n");
print_indent();
printf("(opcounts) = (");
bool first = true;
for (std::pair<std::string, long long*> p : op_data_points)
{
if (!first)
{
printf(", ");
}
printf("%s=%lld", p.first.c_str(), *(p.second)-op_counts[std::make_pair(msg, p.first)]);
first = false;
}
printf(")");
#else
UNUSED(msg);
#endif
}
static void print_times_from_last_and_start(long long now, long long last,
long long cpu_now, long long cpu_last)
{
long long time_from_start = now - start_time;
long long time_from_last = now - last;
long long cpu_time_from_start = cpu_now - start_cpu_time;
long long cpu_time_from_last = cpu_now - cpu_last;
if (time_from_last != 0) {
double parallelism_from_last = 1.0 * cpu_time_from_last / time_from_last;
printf("[%0.4fs x%0.2f]", time_from_last * 1e-9, parallelism_from_last);
} else {
printf("[ ]");
}
if (time_from_start != 0) {
double parallelism_from_start = 1.0 * cpu_time_from_start / time_from_start;
printf("\t(%0.4fs x%0.2f from start)", time_from_start * 1e-9, parallelism_from_start);
}
}
void print_time(const char* msg)
{
if (inhibit_profiling_info)
{
return;
}
long long now = get_nsec_time();
long long cpu_now = get_nsec_cpu_time();
printf("%-35s\t", msg);
print_times_from_last_and_start(now, last_time, cpu_now, last_cpu_time);
#ifdef PROFILE_OP_COUNTS
print_op_profiling(msg);
#endif
printf("\n");
fflush(stdout);
last_time = now;
last_cpu_time = cpu_now;
}
void print_header(const char *msg)
{
printf("\n================================================================================\n");
printf("%s\n", msg);
printf("================================================================================\n\n");
}
void print_indent()
{
for (size_t i = 0; i < indentation; ++i)
{
printf(" ");
}
}
void op_profiling_enter(const std::string &msg)
{
for (std::pair<std::string, long long*> p : op_data_points)
{
op_counts[std::make_pair(msg, p.first)] = *(p.second);
}
}
void enter_block(const std::string &msg, const bool indent)
{
if (inhibit_profiling_counters)
{
return;
}
block_names.emplace_back(msg);
long long t = get_nsec_time();
enter_times[msg] = t;
long long cpu_t = get_nsec_cpu_time();
enter_cpu_times[msg] = cpu_t;
if (inhibit_profiling_info)
{
return;
}
#ifdef MULTICORE
#pragma omp critical
#endif
{
op_profiling_enter(msg);
print_indent();
printf("(enter) %-35s\t", msg.c_str());
print_times_from_last_and_start(t, t, cpu_t, cpu_t);
printf("\n");
fflush(stdout);
if (indent)
{
++indentation;
}
}
}
void leave_block(const std::string &msg, const bool indent)
{
if (inhibit_profiling_counters)
{
return;
}
#ifndef MULTICORE
assert(*(--block_names.end()) == msg);
#endif
block_names.pop_back();
++invocation_counts[msg];
long long t = get_nsec_time();
last_times[msg] = (t - enter_times[msg]);
cumulative_times[msg] += (t - enter_times[msg]);
long long cpu_t = get_nsec_cpu_time();
last_cpu_times[msg] = (cpu_t - enter_cpu_times[msg]);
#ifdef PROFILE_OP_COUNTS
for (std::pair<std::string, long long*> p : op_data_points)
{
cumulative_op_counts[std::make_pair(msg, p.first)] += *(p.second)-op_counts[std::make_pair(msg, p.first)];
}
#endif
if (inhibit_profiling_info)
{
return;
}
#ifdef MULTICORE
#pragma omp critical
#endif
{
if (indent)
{
--indentation;
}
print_indent();
printf("(leave) %-35s\t", msg.c_str());
print_times_from_last_and_start(t, enter_times[msg], cpu_t, enter_cpu_times[msg]);
print_op_profiling(msg);
printf("\n");
fflush(stdout);
}
}
void print_mem(const std::string &s)
{
#ifndef NO_PROCPS
struct proc_t usage;
look_up_our_self(&usage);
if (s.empty())
{
printf("* Peak vsize (physical memory+swap) in mebibytes: %lu\n", usage.vsize >> 20);
}
else
{
printf("* Peak vsize (physical memory+swap) in mebibytes (%s): %lu\n", s.c_str(), usage.vsize >> 20);
}
#else
printf("* Memory profiling not supported in NO_PROCPS mode\n");
#endif
}
void print_compilation_info()
{
#ifdef __GNUC__
printf("g++ version: %s\n", __VERSION__);
//printf("Compiled on %s %s\n", __DATE__, __TIME__);
#endif
#ifdef STATIC
printf("STATIC: yes\n");
#else
printf("STATIC: no\n");
#endif
#ifdef MULTICORE
printf("MULTICORE: yes\n");
#else
printf("MULTICORE: no\n");
#endif
#ifdef DEBUG
printf("DEBUG: yes\n");
#else
printf("DEBUG: no\n");
#endif
#ifdef PROFILE_OP_COUNTS
printf("PROFILE_OP_COUNTS: yes\n");
#else
printf("PROFILE_OP_COUNTS: no\n");
#endif
#ifdef _GLIBCXX_DEBUG
printf("_GLIBCXX_DEBUG: yes\n");
#else
printf("_GLIBCXX_DEBUG: no\n");
#endif
}
} // libsnark