// Input:
//
// vector<double>
//
// Output:
//
// Average and error, assuming a normal distribution.
//

#include <vector>
#include <math.h>
#include <iostream>
#include <iomanip>
#include "perfctr.h"
#include <cstdlib>
#include <cerrno>
#include <cstring>

static float student_t[5][34] = {
 { 3.078, 1.886, 1.638, 1.533,
   1.476, 1.440, 1.415, 1.397, 1.383,
   1.372, 1.363, 1.356, 1.350, 1.345,
   1.341, 1.337, 1.333, 1.330, 1.328,
   1.325, 1.323, 1.321, 1.319, 1.318,
   1.316, 1.315, 1.314, 1.313, 1.311,
   1.310, 1.303, 1.296, 1.289, 1.282 },
 { 6.314, 2.920, 2.353, 2.132,
   2.015, 1.943, 1.895, 1.860, 1.833,
   1.812, 1.796, 1.782, 1.771, 1.761,
   1.753, 1.746, 1.740, 1.734, 1.729,
   1.725, 1.721, 1.717, 1.714, 1.711,
   1.708, 1.706, 1.703, 1.701, 1.699,
   1.697, 1.684, 1.671, 1.658, 1.645 },
 { 12.706, 4.303, 3.182, 2.776,
   2.571, 2.447, 2.365, 2.306, 2.262,
   2.228, 2.201, 2.179, 2.160, 2.145,
   2.131, 2.120, 2.110, 2.101, 2.093,
   2.086, 2.080, 2.074, 2.069, 2.064,
   2.060, 2.056, 2.052, 2.048, 2.045,
   2.042, 2.021, 2.000, 1.980, 1.960 },
 { 31.821, 6.965, 4.541, 3.747,
   3.365, 3.143, 2.998, 2.896, 2.821,
   2.764, 2.718, 2.681, 2.650, 2.624,
   2.602, 2.583, 2.567, 2.552, 2.539,
   2.528, 2.518, 2.508, 2.500, 2.492,
   2.485, 2.479, 2.473, 2.467, 2.462,
   2.457, 2.423, 2.390, 2.358, 2.326 },
 { 63.657, 9.925, 5.841, 4.604,
   4.032, 3.707, 3.499, 3.355, 3.250,
   3.169, 3.106, 3.055, 3.012, 2.977,
   2.947, 2.921, 2.898, 2.878, 2.861,
   2.845, 2.831, 2.819, 2.807, 2.797,
   2.787, 2.779, 2.771, 2.763, 2.756,
   2.750, 2.704, 2.660, 2.617, 2.576 } };

static float student_t_certainty[5] = { 0.1, 0.05, 0.025, 0.01, 0.005 };        // Right cross over chance.

static float
t(int certainty_index, int freedoms)
{
  if (freedoms <= 30)
    return student_t[certainty_index][freedoms - 1];
  double a, b, y1, y2, y3;
  long x1, x2;
  long x3 = 0;
  int i;
  if (freedoms <= 60)
  {
    i = 29;
    x1 = 30;
    x2 = 40;
    x3 = 60;
  }
  else if (freedoms <= 120)
  {
    i = 30;
    x1 = 40;
    x2 = 60;
    x3 = 120;
  }
  else
  {
    i = 31;
    x1 = 60;
    x2 = 120;
    /* x3 = infinity */
  }
  y1 = student_t[certainty_index][i];
  y2 = student_t[certainty_index][i + 1];
  y3 = student_t[certainty_index][i + 2];
  if (freedoms <= 120)
  {
    double c, d;
    d =   (x1 * x1 * (x3 - x2) + x2 * x2 * (x1 - x3) + x3 * x3 * (x2 - x1));
    a = - (x1      * (y3 - y2) + x2      * (y1 - y3) + x3      * (y2 - y1)) / d;
    b =   (x1 * x1 * (y3 - y2) + x2 * x2 * (y1 - y3) + x3 * x3 * (y2 - y1)) / d;
    c = y2 - a * x2 * x2 - b * x2;
    return (a * freedoms * freedoms + b * freedoms + c);
  }
  double ln1, ln2;
  ln1 = log(y2 - y3);
  ln2 = log(y1 - y3);
  a = - (     ln1 -      ln2) / (x1 - x2);
  b =   (x1 * ln1 - x2 * ln2) / (x1 - x2);
  return (y3 + exp(a * freedoms + b));
}

static int
normal_distribution(std::vector<double> const& input, int certainty_index, double cut_off_high, double cut_off_low,
                    double& x_avg, double & s_n1)
{
  size_t n = input.size();
  double x_sum = 0;
  for (std::vector<double>::const_iterator iter(input.begin()); iter != input.end(); ++iter)
  {
    double count = *iter;
    if (count > cut_off_high || count < cut_off_low)
      --n;
    else
      x_sum += count;
  }
  x_avg = x_sum / n;
  double vtn1 = 0;	// variation times n - 1
  for (std::vector<double>::const_iterator iter(input.begin()); iter != input.end(); ++iter)
  {
    double count = *iter;
    if (count > cut_off_high || count < cut_off_low)
      continue;
    vtn1 += (count - x_avg) * (count - x_avg);
  }
  s_n1 = sqrt(vtn1 / (n - 1));
  return n;
}

struct stats_st {
  int n;
  double x_avg;
  double s_n1;
  double cut_off_low;
  double cut_off_high;
  double xright;
  double xleft;
};

static stats_st
determine_stats(std::vector<double> const& input, int certainty_index)
{
  stats_st stats;
  stats.x_avg = 0;
  stats.s_n1 = 1e30;
  double prev_s_n1;
  do {
    prev_s_n1 = stats.s_n1;
    stats.cut_off_high = stats.x_avg + 3 * stats.s_n1;
    stats.cut_off_low = stats.x_avg - 3 * stats.s_n1;
    stats.n = normal_distribution(input, certainty_index, stats.cut_off_high, stats.cut_off_low, stats.x_avg, stats.s_n1);
  }
  while(stats.n > 0 && stats.s_n1 != prev_s_n1);

  stats.xleft = 1e30;
  stats.xright = 0;
  for (std::vector<double>::const_iterator iter(input.begin()); iter != input.end(); ++iter)
  {
    double count = *iter;
    if (stats.xleft > stats.xright || stats.xleft > count)
      stats.xleft = count;
    if (stats.xright < stats.xleft || stats.xright < count)
      stats.xright = count;
  }
  if (stats.xright < stats.cut_off_high)
    stats.cut_off_high = stats.xright;
  if (stats.xleft > stats.cut_off_low)
    stats.cut_off_low = stats.xleft;
  stats.xright = stats.cut_off_high;
  stats.xleft = stats.cut_off_low;

  return stats;
}

void calculate_stats(char const* name[], std::vector<double> const* input)
{
  int const certainty_index = 2;
  std::cout << "Intervals of " << 100 - 200 * student_t_certainty[certainty_index] << "% certainty.\n";
  for (int a = 0; a < 2; ++a)
  {
    size_t n = input[a].size();  
    if (n > 0)
    {
      stats_st stats = determine_stats(input[a], certainty_index);
      n = stats.n;
      if (n == 0)
      {
	std::cout << "Input is noise.\n";
	return;
      }
      std::cout << std::setw(24) << name[a] << " (" << std::setw(4) << n << ") : " << std::setw(10);
      std::cout.setf(std::ios::fixed, std::ios::floatfield);
      std::cout << std::setprecision(0) << stats.x_avg << " +/- ";
      std::cout.width(10);
      std::cout.setf(std::ios::left, std::ios::adjustfield);
      if (n == 1)
	std::cout << "?\n";
      else
      {
	double err = t(certainty_index, n - 1) * stats.s_n1 / sqrt(n);
	std::cout << err;
	std::cout.setf(std::ios::right, std::ios::adjustfield);
	std::cout.setf(std::ios::fixed, std::ios::floatfield);
	std::cout.precision(1);
	std::cout << '(' << std::setw(4) << 100.0 * err / stats.x_avg << " % )\n";
      }
    }
  }
}

static struct vperfctr *self;
static struct perfctr_info info;
static struct vperfctr_control control;

void do_init(void)
{
    self = vperfctr_open();
    if( !self ) {
        perror("vperfctr_open");
        exit(1);
    }
    if( vperfctr_info(self, &info) < 0 ) {
        perror("vperfctr_info");
        exit(1);
    }
    std::cout << "\nPerfCtr Info:\n";
    perfctr_print_info(&info);
}

void do_read(struct perfctr_sum_ctrs* sum)
{
    /*
     * This is the preferred method for sampling all enabled counters.
     * It doesn't return control data or current kernel-level state though.
     * The control data can be retrieved using vperfctr_read_state().
     *
     * Alternatively you may call vperfctr_read_tsc() or vperfctr_read_pmc()
     * to sample a single counter's value.
     */
    vperfctr_read_ctrs(self, sum);
}

void print_control(struct perfctr_cpu_control const* control)
{
    unsigned int i;

    std::cout << "\nControl used:\n";
    std::cout << "tsc_on\t\t\t" << control->tsc_on << "\n";
    std::cout << "nractrs\t\t\t" << control->nractrs << "\n";
    for(i = 0; i < control->nractrs; ++i) {
        if( control->pmc_map[i] >= 18 )
            std::cout << "pmc_map[" << i << "]\t\t0x" << std::hex << control->pmc_map[i] << std::dec << "\n";
        else
            std::cout << "pmc_map[" << i << "]\t\t" << control->pmc_map[i] << "\n";
        std::cout << "evntsel[" << i << "]\t\t0x" << std::hex << control->evntsel[i] << std::dec << "\n";
        if( control->evntsel_aux[i] )
            std::cout << "evntsel_aux[" << i << "]\t\t0x" << std::hex << control->evntsel_aux[i] << std::dec << "\n";
    }
}

void do_setup(void)
{
    unsigned int tsc_on = 1;
    unsigned int nractrs = 1;
    unsigned int pmc_map0 = 0;
    unsigned int evntsel0 = 0;
    unsigned int evntsel_aux0 = 0;

    memset(&control, 0, sizeof control);

    /* Attempt to set up control to count clocks via the TSC
       and retired instructions via PMC0. */
    switch( info.cpu_type ) {
      case PERFCTR_X86_GENERIC:
        nractrs = 0;            /* no PMCs available */
        break;
      case PERFCTR_X86_INTEL_P5:
      case PERFCTR_X86_INTEL_P5MMX:
      case PERFCTR_X86_CYRIX_MII:
        /* event 0x16 (INSTRUCTIONS_EXECUTED), count at CPL 3 */
        evntsel0 = 0x16 | (2 << 6);
        break;
      case PERFCTR_X86_INTEL_P6:
      case PERFCTR_X86_INTEL_PII:
      case PERFCTR_X86_INTEL_PIII:
      case PERFCTR_X86_AMD_K7:
        /* event 0xC0 (INST_RETIRED), count at CPL > 0, Enable */
        evntsel0 = 0xC0 | (1 << 16) | (1 << 22);
        break;
      case PERFCTR_X86_WINCHIP_C6:
        tsc_on = 0;             /* no working TSC available */
        evntsel0 = 0x02;        /* X86_INSTRUCTIONS */
        break;
      case PERFCTR_X86_WINCHIP_2:
        tsc_on = 0;             /* no working TSC available */
        evntsel0 = 0x16;        /* INSTRUCTIONS_EXECUTED */
        break;
      case PERFCTR_X86_VIA_C3:
        pmc_map0 = 1;           /* redirect PMC0 to PERFCTR1 */
        evntsel0 = 0xC0;        /* INSTRUCTIONS_EXECUTED */
        break;
      case PERFCTR_X86_INTEL_P4:
      case PERFCTR_X86_INTEL_P4M2:
        /* PMC0: IQ_COUNTER0 with fast RDPMC */
        pmc_map0 = 0x0C | (1 << 31);
        /* IQ_CCCR0: required flags, ESCR 4 (CRU_ESCR0), Enable */
        evntsel0 = (0x3 << 16) | (4 << 13) | (1 << 12);
        /* CRU_ESCR0: event 2 (instr_retired), NBOGUSNTAG, CPL>0 */
        evntsel_aux0 = (2 << 25) | (1 << 9) | (1 << 2);
        break;
      default:
        std::cerr << "cpu type " << info.cpu_type << " (" << perfctr_cpu_name(&info) << ") not supported\n";
        exit(1);
    }
    control.cpu_control.tsc_on = tsc_on;
    control.cpu_control.nractrs = nractrs;
    control.cpu_control.pmc_map[0] = pmc_map0;
    control.cpu_control.evntsel[0] = evntsel0;
    control.cpu_control.evntsel_aux[0] = evntsel_aux0;

    print_control(&control.cpu_control);
}

void do_enable(void)
{
    if( vperfctr_control(self, &control) < 0 ) {
        perror("vperfctr_control");
        exit(1);
    }
}

void do_print(struct perfctr_sum_ctrs const* before,
              struct perfctr_sum_ctrs const* after,
	      std::vector<double>* data)
{
    if( control.cpu_control.tsc_on )
    {
        std::cout << "tsc\t\t\t" << (after->tsc - before->tsc) << std::endl;
	data[0].push_back((double)(after->tsc - before->tsc));
    }
    if( control.cpu_control.nractrs )
    {
        std::cout << "pmc[0]\t\t\t" << (after->pmc[0] - before->pmc[0]) << std::endl;
	data[1].push_back((double)(after->pmc[0] - before->pmc[0]));
    }
}

